MAJOR: Remove all Claude references and rename to Mini-RAG

Complete rebrand for v1.0-simple-search branch: Directory Changes: - claude_rag/ → mini_rag/ (preserving git history) Content Changes: - Updated all imports: from claude_rag → from mini_rag - Updated all file paths: .claude-rag → .mini-rag - Updated documentation and comments - Updated configuration files and examples - Updated all tests to use mini_rag imports This ensures complete independence from Claude/Anthropic branding while maintaining all functionality and git history. Simple branch contains the basic RAG system without LLM features.
Clean up inappropriate language for public release
2025-08-12 19:27:55 +10:00 · 2025-08-12 19:16:37 +10:00
153 changed files with 6046 additions and 27837 deletions
--- a/.flake8
+++ b/.flake8
@ -1,19 +0,0 @@
-[flake8]
-# Professional Python code style - balances quality with readability
-max-line-length = 95
-extend-ignore = E203,W503,W605
-exclude = 
-    .venv,
-    .venv-linting,
-    __pycache__,
-    *.egg-info,
-    .git,
-    build,
-    dist,
-    .mini-rag
-
-# Per-file ignores for practical development
-per-file-ignores =
-    tests/*.py:F401,F841
-    examples/*.py:F401,F841
-    fix_*.py:F401,F841,E501
--- a/.github/workflows/build-and-release.yml
+++ b/.github/workflows/build-and-release.yml
@ -1,254 +0,0 @@
-name: Build and Release
-
-on:
-  push:
-    tags:
-      - 'v*'
-    branches:
-      - main
-  pull_request:
-    branches:
-      - main
-  workflow_dispatch:
-
-jobs:
-  build-wheels:
-    name: Build wheels on ${{ matrix.os }}
-    runs-on: ${{ matrix.os }}
-    strategy:
-      matrix:
-        os: [ubuntu-latest, windows-latest, macos-13, macos-14]
-    
-    steps:
-    - uses: actions/checkout@v4
-    
-    - name: Set up Python
-      uses: actions/setup-python@v4
-      with:
-        python-version: '3.11'
-    
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        python -m pip install build twine cibuildwheel
-    
-    - name: Build wheels
-      uses: pypa/cibuildwheel@v2.16
-      env:
-        CIBW_BUILD: "cp38-* cp39-* cp310-* cp311-* cp312-*"
-        CIBW_SKIP: "pp* *musllinux* *i686* *win32*"
-        CIBW_ARCHS_MACOS: "x86_64 arm64"
-        CIBW_ARCHS_LINUX: "x86_64"
-        CIBW_ARCHS_WINDOWS: "AMD64"
-        CIBW_TEST_COMMAND: "rag-mini --help"
-        CIBW_TEST_SKIP: "*arm64*"  # Skip tests on arm64 due to emulation issues
-        
-    - name: Build source distribution
-      if: matrix.os == 'ubuntu-latest'
-      run: python -m build --sdist
-    
-    - name: Upload wheels
-      uses: actions/upload-artifact@v4
-      with:
-        name: wheels-${{ matrix.os }}
-        path: ./wheelhouse/*.whl
-        
-    - name: Upload source distribution
-      if: matrix.os == 'ubuntu-latest'
-      uses: actions/upload-artifact@v4
-      with:
-        name: sdist
-        path: ./dist/*.tar.gz
-
-  build-zipapp:
-    name: Build zipapp (.pyz)
-    runs-on: ubuntu-latest
-    
-    steps:
-    - uses: actions/checkout@v4
-    
-    - name: Set up Python
-      uses: actions/setup-python@v4
-      with:
-        python-version: '3.11'
-    
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        python -m pip install -r requirements.txt
-    
-    - name: Build zipapp
-      run: python scripts/build_pyz.py
-    
-    - name: Upload zipapp
-      uses: actions/upload-artifact@v4
-      with:
-        name: zipapp
-        path: dist/rag-mini.pyz
-
-  test-installation:
-    name: Test installation methods
-    needs: [build-wheels, build-zipapp]
-    runs-on: ${{ matrix.os }}
-    strategy:
-      matrix:
-        os: [ubuntu-latest, windows-latest, macos-latest]
-        python-version: ['3.8', '3.11', '3.12']
-        exclude:
-          # Reduce test matrix size
-          - os: windows-latest
-            python-version: '3.8'
-          - os: macos-latest
-            python-version: '3.8'
-    
-    steps:
-    - uses: actions/checkout@v4
-    
-    - name: Set up Python
-      uses: actions/setup-python@v4
-      with:
-        python-version: ${{ matrix.python-version }}
-    
-    - name: Download wheels
-      uses: actions/download-artifact@v4
-      with:
-        name: wheels-${{ matrix.os }}
-        path: ./wheelhouse/
-    
-    - name: Test wheel installation
-      shell: bash
-      run: |
-        # Find the appropriate wheel for this OS and Python version
-        wheel_file=$(ls wheelhouse/*.whl | head -1)
-        echo "Testing wheel: $wheel_file"
-        
-        # Install the wheel
-        python -m pip install "$wheel_file"
-        
-        # Test the command
-        rag-mini --help
-        echo "✅ Wheel installation test passed"
-    
-    - name: Download zipapp (Ubuntu only)
-      if: matrix.os == 'ubuntu-latest'
-      uses: actions/download-artifact@v4
-      with:
-        name: zipapp
-        path: ./
-    
-    - name: Test zipapp (Ubuntu only)
-      if: matrix.os == 'ubuntu-latest'
-      run: |
-        python rag-mini.pyz --help
-        echo "✅ Zipapp test passed"
-
-  publish:
-    name: Publish to PyPI
-    needs: [build-wheels, test-installation]
-    runs-on: ubuntu-latest
-    if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
-    environment: release
-    
-    steps:
-    - name: Download all artifacts
-      uses: actions/download-artifact@v4
-    
-    - name: Prepare distribution files
-      run: |
-        mkdir -p dist/
-        cp wheels-*/**.whl dist/
-        cp sdist/*.tar.gz dist/
-        ls -la dist/
-    
-    - name: Publish to PyPI
-      uses: pypa/gh-action-pypi-publish@release/v1
-      with:
-        password: ${{ secrets.PYPI_API_TOKEN }}
-        skip-existing: true
-
-  create-release:
-    name: Create GitHub Release
-    needs: [build-wheels, build-zipapp, test-installation]
-    runs-on: ubuntu-latest
-    if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
-    
-    steps:
-    - uses: actions/checkout@v4
-      with:
-        fetch-depth: 0
-    
-    - name: Download all artifacts
-      uses: actions/download-artifact@v4
-    
-    - name: Prepare release assets
-      run: |
-        mkdir -p release-assets/
-        
-        # Copy zipapp
-        cp rag-mini.pyz release-assets/
-        
-        # Copy a few representative wheels
-        cp wheels-ubuntu-latest/*cp311*x86_64*.whl release-assets/ || true
-        cp wheels-windows-latest/*cp311*amd64*.whl release-assets/ || true
-        cp wheels-macos-*/*cp311*x86_64*.whl release-assets/ || true
-        cp wheels-macos-*/*cp311*arm64*.whl release-assets/ || true
-        
-        # Copy source distribution
-        cp sdist/*.tar.gz release-assets/
-        
-        ls -la release-assets/
-    
-    - name: Generate changelog
-      id: changelog
-      run: |
-        # Simple changelog generation - you might want to use a dedicated action
-        echo "## Changes" > CHANGELOG.md
-        git log $(git describe --tags --abbrev=0 HEAD^)..HEAD --pretty=format:"- %s" >> CHANGELOG.md
-        echo "CHANGELOG<<EOF" >> $GITHUB_OUTPUT
-        cat CHANGELOG.md >> $GITHUB_OUTPUT
-        echo "EOF" >> $GITHUB_OUTPUT
-    
-    - name: Create Release
-      uses: softprops/action-gh-release@v1
-      with:
-        files: release-assets/*
-        body: |
-          ## Installation Options
-          
-          ### 🚀 One-line installers (Recommended)
-          
-          **Linux/macOS:**
-          ```bash
-          curl -fsSL https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.sh | bash
-          ```
-          
-          **Windows PowerShell:**
-          ```powershell
-          iwr https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.ps1 -UseBasicParsing | iex
-          ```
-          
-          ### 📦 Manual installation
-          
-          **With uv (fastest):**
-          ```bash
-          uv tool install fss-mini-rag
-          ```
-          
-          **With pipx:**
-          ```bash
-          pipx install fss-mini-rag
-          ```
-          
-          **With pip:**
-          ```bash
-          pip install --user fss-mini-rag
-          ```
-          
-          **Single file (no Python knowledge needed):**
-          Download `rag-mini.pyz` and run with `python rag-mini.pyz`
-          
-          ${{ steps.changelog.outputs.CHANGELOG }}
-        draft: false
-        prerelease: false
-      env:
-        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -1,196 +0,0 @@
-name: CI/CD Pipeline
-on:
-  push:
-    branches: [ main, develop ]
-  pull_request:
-    branches: [ main ]
-
-jobs:
-  test:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [ubuntu-latest, windows-latest]
-        python-version: ["3.10", "3.11", "3.12"]
-    
-    steps:
-    - name: Checkout code
-      uses: actions/checkout@v4
-      
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v5
-      with:
-        python-version: ${{ matrix.python-version }}
-        
-    - name: Cache dependencies
-      uses: actions/cache@v4
-      with:
-        path: |
-          ~/.cache/pip
-          ~/.local/share/virtualenvs
-        key: ${{ runner.os }}-python-${{ matrix.python-version }}-${{ hashFiles('**/requirements.txt') }}
-        restore-keys: |
-          ${{ runner.os }}-python-${{ matrix.python-version }}-
-          
-    - name: Create virtual environment
-      run: |
-        python -m venv .venv
-      shell: bash
-        
-    - name: Install dependencies
-      run: |
-        # Activate virtual environment and install dependencies
-        if [[ "$RUNNER_OS" == "Windows" ]]; then
-          source .venv/Scripts/activate
-        else
-          source .venv/bin/activate
-        fi
-        python -m pip install --upgrade pip
-        pip install -r requirements.txt
-      shell: bash
-        
-    - name: Run comprehensive tests
-      run: |
-        # Set OS-appropriate emojis and activate venv
-        if [[ "$RUNNER_OS" == "Windows" ]]; then
-          source .venv/Scripts/activate
-          OK="[OK]"
-          SKIP="[SKIP]"
-        else
-          source .venv/bin/activate
-          OK="✅"
-          SKIP="⚠️"
-        fi
-        
-        echo "$OK Virtual environment activated"
-        
-        # Run basic import tests
-        python -c "from mini_rag import CodeEmbedder, ProjectIndexer, CodeSearcher; print('$OK Core imports successful')"
-        
-        # Run the actual test suite
-        if [ -f "tests/test_fixes.py" ]; then
-          echo "$OK Running comprehensive test suite..."
-          python tests/test_fixes.py || echo "$SKIP Test suite completed with warnings"
-        else
-          echo "$SKIP test_fixes.py not found, running basic tests only"
-        fi
-        
-        # Test config system with proper venv
-        python -c "
-        import os
-        ok_emoji = '$OK' if os.name != 'nt' else '[OK]'
-        
-        try:
-            from mini_rag.config import ConfigManager
-            import tempfile
-            with tempfile.TemporaryDirectory() as tmpdir:
-                config_manager = ConfigManager(tmpdir)
-                config = config_manager.load_config()
-                print(f'{ok_emoji} Config system works with proper dependencies')
-        except Exception as e:
-            print(f'Error in config test: {e}')
-            raise
-        "
-        
-        echo "$OK All tests completed successfully"
-      shell: bash
-      
-    - name: Test auto-update system
-      run: |
-        # Set OS-appropriate emojis
-        if [[ "$RUNNER_OS" == "Windows" ]]; then
-          OK="[OK]"
-          SKIP="[SKIP]"
-        else
-          OK="✅"
-          SKIP="⚠️"
-        fi
-        
-        python -c "
-        import os
-        ok_emoji = '$OK' if os.name != 'nt' else '[OK]'
-        skip_emoji = '$SKIP' if os.name != 'nt' else '[SKIP]'
-        
-        try:
-            from mini_rag.updater import UpdateChecker
-            updater = UpdateChecker()
-            print(f'{ok_emoji} Auto-update system available')
-        except ImportError:
-            print(f'{skip_emoji} Auto-update system not available (legacy version)')
-        "
-      shell: bash
-        
-    - name: Test CLI commands
-      run: |
-        # Set OS-appropriate emojis
-        if [[ "$RUNNER_OS" == "Windows" ]]; then
-          OK="[OK]"
-        else
-          OK="✅"
-        fi
-        
-        echo "$OK Checking for CLI files..."
-        ls -la rag* || dir rag* || echo "CLI files may not be present"
-        echo "$OK CLI check completed - this is expected in CI environment"
-      shell: bash
-
-  security-scan:
-    runs-on: ubuntu-latest
-    steps:
-    - name: Checkout code
-      uses: actions/checkout@v4
-      
-    - name: Set up Python
-      uses: actions/setup-python@v5
-      with:
-        python-version: '3.11'
-        
-    - name: Install security tools
-      run: |
-        pip install bandit || echo "Failed to install bandit"
-        
-    - name: Run security scan
-      run: |
-        # Scan for security issues (non-failing)
-        bandit -r . -ll || echo "✅ Security scan completed"
-          
-  auto-update-check:
-    runs-on: ubuntu-latest
-    if: github.event_name == 'push' && github.ref == 'refs/heads/main'
-    steps:
-    - name: Checkout code
-      uses: actions/checkout@v4
-      
-    - name: Set up Python
-      uses: actions/setup-python@v5
-      with:
-        python-version: '3.11'
-        
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install -r requirements.txt
-        
-    - name: Check for auto-update system
-      run: |
-        if [ -f "mini_rag/updater.py" ]; then
-          echo "✅ Auto-update system present"
-          echo "UPDATE_AVAILABLE=true" >> $GITHUB_ENV
-        else
-          echo "⚠️ No auto-update system found"
-          echo "UPDATE_AVAILABLE=false" >> $GITHUB_ENV
-        fi
-        
-    - name: Validate update system
-      if: env.UPDATE_AVAILABLE == 'true'
-      run: |
-        python -c "
-        try:
-            from mini_rag.updater import UpdateChecker
-            updater = UpdateChecker()
-            print(f'✅ Update system configured for: {updater.github_api_url}')
-            print(f'✅ Check frequency: {updater.check_frequency_hours} hours')
-        except Exception as e:
-            print(f'⚠️ Update system validation skipped: {e}')
-        "
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@ -1,127 +0,0 @@
-name: Auto Release & Update System
-on:
-  push:
-    tags:
-      - 'v*'
-  workflow_dispatch:
-    inputs:
-      version:
-        description: 'Version to release (e.g., v1.2.3)'
-        required: true
-        type: string
-
-jobs:
-  create-release:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: write
-      
-    steps:
-    - name: Checkout code
-      uses: actions/checkout@v4
-      with:
-        fetch-depth: 0
-        
-    - name: Set up Python
-      uses: actions/setup-python@v5
-      with:
-        python-version: '3.11'
-        
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install build twine
-        
-    - name: Extract version
-      id: version
-      run: |
-        if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
-          VERSION="${{ github.event.inputs.version }}"
-        else
-          VERSION=${GITHUB_REF#refs/tags/}
-        fi
-        echo "version=$VERSION" >> $GITHUB_OUTPUT
-        echo "clean_version=${VERSION#v}" >> $GITHUB_OUTPUT
-        
-    - name: Update version in code
-      run: |
-        VERSION="${{ steps.version.outputs.clean_version }}"
-        # Update __init__.py version
-        if [ -f "mini_rag/__init__.py" ]; then
-          sed -i "s/__version__ = \".*\"/__version__ = \"$VERSION\"/" mini_rag/__init__.py
-        fi
-        # Update any setup.py or pyproject.toml if they exist
-        if [ -f "setup.py" ]; then
-          sed -i "s/version=\".*\"/version=\"$VERSION\"/" setup.py
-        fi
-        
-    - name: Generate release notes
-      id: release_notes
-      run: |
-        VERSION="${{ steps.version.outputs.version }}"
-        
-        # Get commits since last tag
-        LAST_TAG=$(git describe --tags --abbrev=0 HEAD~1 2>/dev/null || echo "")
-        if [ -n "$LAST_TAG" ]; then
-          COMMITS=$(git log --oneline $LAST_TAG..HEAD --pretty=format:"• %s")
-        else
-          COMMITS=$(git log --oneline --pretty=format:"• %s" | head -10)
-        fi
-        
-        # Create release notes
-        cat > release_notes.md << EOF
-        ## What's New in $VERSION
-        
-        ### 🚀 Changes
-        $COMMITS
-        
-        ### 📥 Installation
-        
-        **Quick Install:**
-        \`\`\`bash
-        # Download and run installer
-        curl -sSL https://github.com/${{ github.repository }}/releases/latest/download/install.sh | bash
-        \`\`\`
-        
-        **Manual Install:**
-        \`\`\`bash
-        # Download source
-        wget https://github.com/${{ github.repository }}/archive/refs/tags/$VERSION.zip
-        unzip $VERSION.zip
-        cd *-${VERSION#v}
-        ./install_mini_rag.sh
-        \`\`\`
-        
-        ### 🔄 Auto-Update
-        If you have a previous version with auto-update support:
-        \`\`\`bash
-        ./rag-mini check-update
-        ./rag-mini update
-        \`\`\`
-        
-        ---
-        
-        🤖 **Auto-Update System**: This release includes automatic update checking.
-        Users will be notified of future updates and can install them with one command!
-        EOF
-        
-        echo "notes_file=release_notes.md" >> $GITHUB_OUTPUT
-        
-    - name: Create GitHub Release
-      uses: softprops/action-gh-release@v2
-      with:
-        tag_name: ${{ steps.version.outputs.version }}
-        name: Release ${{ steps.version.outputs.version }}
-        body_path: release_notes.md
-        draft: false
-        prerelease: false
-        files: |
-          *.sh
-          *.bat
-          requirements.txt
-          
-    - name: Trigger update notifications
-      run: |
-        echo "🎉 Release ${{ steps.version.outputs.version }} created!"
-        echo "📢 Users with auto-update will be notified within 24 hours"
-        echo "🔄 They can update with: ./rag-mini update"
--- a/.github/workflows/template-sync.yml
+++ b/.github/workflows/template-sync.yml
@ -1,156 +0,0 @@
-name: Template Synchronization
-on:
-  schedule:
-    # Run weekly on Sundays at 2 AM UTC
-    - cron: '0 2 * * 0'
-  workflow_dispatch:
-    inputs:
-      force_sync:
-        description: 'Force sync even if no changes detected'
-        required: false
-        type: boolean
-        default: false
-
-jobs:
-  sync-template:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: write
-      pull-requests: write
-      
-    steps:
-    - name: Checkout current repository
-      uses: actions/checkout@v4
-      with:
-        token: ${{ secrets.GITHUB_TOKEN }}
-        fetch-depth: 0
-        
-    - name: Check if repository was created from template
-      id: template_check
-      run: |
-        # Check if this repo has template metadata
-        TEMPLATE_REPO=$(gh api repos/${{ github.repository }} --jq '.template_repository.full_name' 2>/dev/null || echo "")
-        
-        if [ -n "$TEMPLATE_REPO" ]; then
-          echo "template_repo=$TEMPLATE_REPO" >> $GITHUB_OUTPUT
-          echo "is_template_derived=true" >> $GITHUB_OUTPUT
-          echo "✅ Repository created from template: $TEMPLATE_REPO"
-        else
-          echo "is_template_derived=false" >> $GITHUB_OUTPUT
-          echo "ℹ️ Repository not created from template"
-        fi
-      env:
-        GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        
-    - name: Fetch template updates
-      if: steps.template_check.outputs.is_template_derived == 'true'
-      id: fetch_updates
-      run: |
-        TEMPLATE_REPO="${{ steps.template_check.outputs.template_repo }}"
-        
-        # Add template as remote
-        git remote add template https://github.com/$TEMPLATE_REPO.git || true
-        git fetch template main
-        
-        # Check for changes in template files
-        TEMPLATE_FILES=$(git diff --name-only HEAD template/main -- .github/ scripts/ | head -20)
-        
-        if [ -n "$TEMPLATE_FILES" ] || [ "${{ github.event.inputs.force_sync }}" = "true" ]; then
-          echo "updates_available=true" >> $GITHUB_OUTPUT
-          echo "template_files<<EOF" >> $GITHUB_OUTPUT
-          echo "$TEMPLATE_FILES" >> $GITHUB_OUTPUT
-          echo "EOF" >> $GITHUB_OUTPUT
-          echo "🔄 Template updates available"
-        else
-          echo "updates_available=false" >> $GITHUB_OUTPUT
-          echo "✅ No template updates needed"
-        fi
-        
-    - name: Create update branch
-      if: steps.fetch_updates.outputs.updates_available == 'true'
-      run: |
-        BRANCH_NAME="template-sync-$(date +%Y%m%d-%H%M%S)"
-        echo "sync_branch=$BRANCH_NAME" >> $GITHUB_ENV
-        
-        git checkout -b $BRANCH_NAME
-        
-        # Merge template changes for specific directories only
-        git checkout template/main -- .github/workflows/ || true
-        git checkout template/main -- scripts/ || true
-        
-        # Don't overwrite project-specific files
-        git reset HEAD -- .github/workflows/template-sync.yml || true
-        git checkout HEAD -- .github/workflows/template-sync.yml || true
-        
-    - name: Commit template updates
-      if: steps.fetch_updates.outputs.updates_available == 'true'
-      run: |
-        git config user.name "Template Sync Bot"
-        git config user.email "noreply@github.com"
-        
-        if git diff --cached --quiet; then
-          echo "No changes to commit"
-        else
-          git commit -m "🔄 Sync template updates
-
-          Updated files:
-          ${{ steps.fetch_updates.outputs.template_files }}
-          
-          Source: ${{ steps.template_check.outputs.template_repo }}
-          Sync date: $(date -u +'%Y-%m-%d %H:%M:%S UTC')
-          
-          This is an automated template synchronization.
-          Review changes before merging."
-          
-          git push origin ${{ env.sync_branch }}
-        fi
-        
-    - name: Create pull request
-      if: steps.fetch_updates.outputs.updates_available == 'true'
-      run: |
-        gh pr create \
-          --title "🔄 Template Updates Available" \
-          --body "## Template Synchronization
-
-        This PR contains updates from the template repository.
-
-        ### 📋 Changed Files:
-        \`\`\`
-        ${{ steps.fetch_updates.outputs.template_files }}
-        \`\`\`
-
-        ### 📊 What's Updated:
-        - GitHub Actions workflows
-        - Project scripts and automation
-        - Template-specific configurations
-
-        ### ⚠️ Review Notes:
-        - **Carefully review** all changes before merging
-        - **Test workflows** in a branch if needed
-        - **Preserve** any project-specific customizations
-        - **Check** that auto-update system still works
-
-        ### 🔗 Source:
-        Template: [${{ steps.template_check.outputs.template_repo }}](https://github.com/${{ steps.template_check.outputs.template_repo }})
-        Sync Date: $(date -u +'%Y-%m-%d %H:%M:%S UTC')
-
-        ---
-        
-        🤖 This is an automated template synchronization. Review carefully before merging!" \
-          --head "${{ env.sync_branch }}" \
-          --base main \
-          --label "template-sync,automation"
-      env:
-        GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        
-    - name: Summary
-      run: |
-        if [ "${{ steps.template_check.outputs.is_template_derived }}" = "true" ]; then
-          if [ "${{ steps.fetch_updates.outputs.updates_available }}" = "true" ]; then
-            echo "🎉 Template sync completed - PR created for review"
-          else
-            echo "✅ Template is up to date - no action needed"
-          fi
-        else
-          echo "ℹ️ Repository not created from template - skipping sync"
-        fi
--- a/.gitignore
+++ b/.gitignore
@ -41,14 +41,10 @@ Thumbs.db

 # RAG system specific
 .claude-rag/
-.mini-rag/
 *.lance/
 *.db
 manifest.json

-# Claude Code specific
-.claude/
-
 # Logs and temporary files
 *.log
 *.tmp
@ -74,8 +70,6 @@ config.local.yml
 test_output/
 temp_test_*/
 .test_*
-test_environments/
-test_results_*.json

 # Backup files
 *.bak
@ -107,13 +101,4 @@ dmypy.json
 .idea/

 # Project specific ignores
-REPOSITORY_SUMMARY.md
-
-# Analysis and scanning results (should not be committed)
-docs/live-analysis/
-docs/analysis-history/
-**/live-analysis/
-**/analysis-history/
-*.analysis.json
-*.analysis.html
-**/analysis_*/
+REPOSITORY_SUMMARY.md
--- a/.mini-rag/config.yaml
+++ b/.mini-rag/config.yaml
@ -1,66 +0,0 @@
-# FSS-Mini-RAG Configuration
-# 
-# 🔧 EDIT THIS FILE TO CUSTOMIZE YOUR RAG SYSTEM
-# 
-# This file controls all behavior of your Mini-RAG system.
-# Changes take effect immediately - no restart needed!
-# 
-# 💡 IMPORTANT: To change the AI model, edit the 'synthesis_model' line below
-# 
-# Common model options:
-#   synthesis_model: auto              # Let system choose best available
-#   synthesis_model: qwen3:0.6b        # Ultra-fast (500MB)
-#   synthesis_model: qwen3:1.7b        # Balanced (1.4GB) - recommended
-#   synthesis_model: qwen3:4b          # High quality (2.5GB)
-#
-# See docs/GETTING_STARTED.md for detailed explanations
-
-# Text chunking settings
-chunking:
-  max_size: 2000      # Maximum characters per chunk
-  min_size: 150       # Minimum characters per chunk
-  strategy: semantic    # 'semantic' (language-aware) or 'fixed'
-
-# Large file streaming settings
-streaming:
-  enabled: true
-  threshold_bytes: 1048576  # Files larger than this use streaming (1MB)
-
-# File processing settings
-files:
-  min_file_size: 50        # Skip files smaller than this
-  exclude_patterns:
-    - "node_modules/**"
-    - ".git/**"
-    - "__pycache__/**"
-    - "*.pyc"
-    - ".venv/**"
-    - "venv/**"
-    - "build/**"
-    - "dist/**"
-  include_patterns:
-    - "**/*"                  # Include all files by default
-
-# Embedding generation settings
-embedding:
-  preferred_method: ollama     # 'ollama', 'ml', 'hash', or 'auto'
-  ollama_model: nomic-embed-text
-  ollama_host: localhost:11434
-  ml_model: sentence-transformers/all-MiniLM-L6-v2
-  batch_size: 32               # Embeddings processed per batch
-
-# Search behavior settings
-search:
-  default_top_k: 10           # Default number of top results
-  enable_bm25: true             # Enable keyword matching boost
-  similarity_threshold: 0.1        # Minimum similarity score
-  expand_queries: false          # Enable automatic query expansion
-
-# LLM synthesis and query expansion settings
-llm:
-  ollama_host: localhost:11434
-  synthesis_model: qwen3:1.7b    # 'auto', 'qwen3:1.7b', etc.
-  expansion_model: auto     # Usually same as synthesis_model
-  max_expansion_terms: 8        # Maximum terms to add to queries
-  enable_synthesis: false       # Enable synthesis by default
-  synthesis_temperature: 0.3      # LLM temperature for analysis
--- a/.mini-rag/last_search
+++ b/.mini-rag/last_search
@ -1 +0,0 @@
-test
--- a/.venv-linting/bin/Activate.ps1
+++ b/.venv-linting/bin/Activate.ps1
@ -1,247 +0,0 @@
-<#
-.Synopsis
-Activate a Python virtual environment for the current PowerShell session.
-
-.Description
-Pushes the python executable for a virtual environment to the front of the
-$Env:PATH environment variable and sets the prompt to signify that you are
-in a Python virtual environment. Makes use of the command line switches as
-well as the `pyvenv.cfg` file values present in the virtual environment.
-
-.Parameter VenvDir
-Path to the directory that contains the virtual environment to activate. The
-default value for this is the parent of the directory that the Activate.ps1
-script is located within.
-
-.Parameter Prompt
-The prompt prefix to display when this virtual environment is activated. By
-default, this prompt is the name of the virtual environment folder (VenvDir)
-surrounded by parentheses and followed by a single space (ie. '(.venv) ').
-
-.Example
-Activate.ps1
-Activates the Python virtual environment that contains the Activate.ps1 script.
-
-.Example
-Activate.ps1 -Verbose
-Activates the Python virtual environment that contains the Activate.ps1 script,
-and shows extra information about the activation as it executes.
-
-.Example
-Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
-Activates the Python virtual environment located in the specified location.
-
-.Example
-Activate.ps1 -Prompt "MyPython"
-Activates the Python virtual environment that contains the Activate.ps1 script,
-and prefixes the current prompt with the specified string (surrounded in
-parentheses) while the virtual environment is active.
-
-.Notes
-On Windows, it may be required to enable this Activate.ps1 script by setting the
-execution policy for the user. You can do this by issuing the following PowerShell
-command:
-
-PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
-
-For more information on Execution Policies: 
-https://go.microsoft.com/fwlink/?LinkID=135170
-
-#>
-Param(
-    [Parameter(Mandatory = $false)]
-    [String]
-    $VenvDir,
-    [Parameter(Mandatory = $false)]
-    [String]
-    $Prompt
-)
-
-<# Function declarations --------------------------------------------------- #>
-
-<#
-.Synopsis
-Remove all shell session elements added by the Activate script, including the
-addition of the virtual environment's Python executable from the beginning of
-the PATH variable.
-
-.Parameter NonDestructive
-If present, do not remove this function from the global namespace for the
-session.
-
-#>
-function global:deactivate ([switch]$NonDestructive) {
-    # Revert to original values
-
-    # The prior prompt:
-    if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
-        Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
-        Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
-    }
-
-    # The prior PYTHONHOME:
-    if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
-        Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
-        Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
-    }
-
-    # The prior PATH:
-    if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
-        Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
-        Remove-Item -Path Env:_OLD_VIRTUAL_PATH
-    }
-
-    # Just remove the VIRTUAL_ENV altogether:
-    if (Test-Path -Path Env:VIRTUAL_ENV) {
-        Remove-Item -Path env:VIRTUAL_ENV
-    }
-
-    # Just remove VIRTUAL_ENV_PROMPT altogether.
-    if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) {
-        Remove-Item -Path env:VIRTUAL_ENV_PROMPT
-    }
-
-    # Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
-    if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
-        Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
-    }
-
-    # Leave deactivate function in the global namespace if requested:
-    if (-not $NonDestructive) {
-        Remove-Item -Path function:deactivate
-    }
-}
-
-<#
-.Description
-Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
-given folder, and returns them in a map.
-
-For each line in the pyvenv.cfg file, if that line can be parsed into exactly
-two strings separated by `=` (with any amount of whitespace surrounding the =)
-then it is considered a `key = value` line. The left hand string is the key,
-the right hand is the value.
-
-If the value starts with a `'` or a `"` then the first and last character is
-stripped from the value before being captured.
-
-.Parameter ConfigDir
-Path to the directory that contains the `pyvenv.cfg` file.
-#>
-function Get-PyVenvConfig(
-    [String]
-    $ConfigDir
-) {
-    Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
-
-    # Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
-    $pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
-
-    # An empty map will be returned if no config file is found.
-    $pyvenvConfig = @{ }
-
-    if ($pyvenvConfigPath) {
-
-        Write-Verbose "File exists, parse `key = value` lines"
-        $pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
-
-        $pyvenvConfigContent | ForEach-Object {
-            $keyval = $PSItem -split "\s*=\s*", 2
-            if ($keyval[0] -and $keyval[1]) {
-                $val = $keyval[1]
-
-                # Remove extraneous quotations around a string value.
-                if ("'""".Contains($val.Substring(0, 1))) {
-                    $val = $val.Substring(1, $val.Length - 2)
-                }
-
-                $pyvenvConfig[$keyval[0]] = $val
-                Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
-            }
-        }
-    }
-    return $pyvenvConfig
-}
-
-
-<# Begin Activate script --------------------------------------------------- #>
-
-# Determine the containing directory of this script
-$VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
-$VenvExecDir = Get-Item -Path $VenvExecPath
-
-Write-Verbose "Activation script is located in path: '$VenvExecPath'"
-Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
-Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
-
-# Set values required in priority: CmdLine, ConfigFile, Default
-# First, get the location of the virtual environment, it might not be
-# VenvExecDir if specified on the command line.
-if ($VenvDir) {
-    Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
-}
-else {
-    Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
-    $VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
-    Write-Verbose "VenvDir=$VenvDir"
-}
-
-# Next, read the `pyvenv.cfg` file to determine any required value such
-# as `prompt`.
-$pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
-
-# Next, set the prompt from the command line, or the config file, or
-# just use the name of the virtual environment folder.
-if ($Prompt) {
-    Write-Verbose "Prompt specified as argument, using '$Prompt'"
-}
-else {
-    Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
-    if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
-        Write-Verbose "  Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
-        $Prompt = $pyvenvCfg['prompt'];
-    }
-    else {
-        Write-Verbose "  Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)"
-        Write-Verbose "  Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
-        $Prompt = Split-Path -Path $venvDir -Leaf
-    }
-}
-
-Write-Verbose "Prompt = '$Prompt'"
-Write-Verbose "VenvDir='$VenvDir'"
-
-# Deactivate any currently active virtual environment, but leave the
-# deactivate function in place.
-deactivate -nondestructive
-
-# Now set the environment variable VIRTUAL_ENV, used by many tools to determine
-# that there is an activated venv.
-$env:VIRTUAL_ENV = $VenvDir
-
-if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
-
-    Write-Verbose "Setting prompt to '$Prompt'"
-
-    # Set the prompt to include the env name
-    # Make sure _OLD_VIRTUAL_PROMPT is global
-    function global:_OLD_VIRTUAL_PROMPT { "" }
-    Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
-    New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
-
-    function global:prompt {
-        Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
-        _OLD_VIRTUAL_PROMPT
-    }
-    $env:VIRTUAL_ENV_PROMPT = $Prompt
-}
-
-# Clear PYTHONHOME
-if (Test-Path -Path Env:PYTHONHOME) {
-    Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
-    Remove-Item -Path Env:PYTHONHOME
-}
-
-# Add the venv to the PATH
-Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
-$Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"
--- a/.venv-linting/bin/activate
+++ b/.venv-linting/bin/activate
@ -1,70 +0,0 @@
-# This file must be used with "source bin/activate" *from bash*
-# You cannot run it directly
-
-deactivate () {
-    # reset old environment variables
-    if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
-        PATH="${_OLD_VIRTUAL_PATH:-}"
-        export PATH
-        unset _OLD_VIRTUAL_PATH
-    fi
-    if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
-        PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
-        export PYTHONHOME
-        unset _OLD_VIRTUAL_PYTHONHOME
-    fi
-
-    # Call hash to forget past commands. Without forgetting
-    # past commands the $PATH changes we made may not be respected
-    hash -r 2> /dev/null
-
-    if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
-        PS1="${_OLD_VIRTUAL_PS1:-}"
-        export PS1
-        unset _OLD_VIRTUAL_PS1
-    fi
-
-    unset VIRTUAL_ENV
-    unset VIRTUAL_ENV_PROMPT
-    if [ ! "${1:-}" = "nondestructive" ] ; then
-    # Self destruct!
-        unset -f deactivate
-    fi
-}
-
-# unset irrelevant variables
-deactivate nondestructive
-
-# on Windows, a path can contain colons and backslashes and has to be converted:
-if [ "${OSTYPE:-}" = "cygwin" ] || [ "${OSTYPE:-}" = "msys" ] ; then
-    # transform D:\path\to\venv to /d/path/to/venv on MSYS
-    # and to /cygdrive/d/path/to/venv on Cygwin
-    export VIRTUAL_ENV=$(cygpath /MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting)
-else
-    # use the path as-is
-    export VIRTUAL_ENV=/MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting
-fi
-
-_OLD_VIRTUAL_PATH="$PATH"
-PATH="$VIRTUAL_ENV/"bin":$PATH"
-export PATH
-
-# unset PYTHONHOME if set
-# this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
-# could use `if (set -u; : $PYTHONHOME) ;` in bash
-if [ -n "${PYTHONHOME:-}" ] ; then
-    _OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
-    unset PYTHONHOME
-fi
-
-if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
-    _OLD_VIRTUAL_PS1="${PS1:-}"
-    PS1='(.venv-linting) '"${PS1:-}"
-    export PS1
-    VIRTUAL_ENV_PROMPT='(.venv-linting) '
-    export VIRTUAL_ENV_PROMPT
-fi
-
-# Call hash to forget past commands. Without forgetting
-# past commands the $PATH changes we made may not be respected
-hash -r 2> /dev/null
--- a/.venv-linting/bin/activate.csh
+++ b/.venv-linting/bin/activate.csh
@ -1,27 +0,0 @@
-# This file must be used with "source bin/activate.csh" *from csh*.
-# You cannot run it directly.
-
-# Created by Davide Di Blasi <davidedb@gmail.com>.
-# Ported to Python 3.3 venv by Andrew Svetlov <andrew.svetlov@gmail.com>
-
-alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; unsetenv VIRTUAL_ENV_PROMPT; test "\!:*" != "nondestructive" && unalias deactivate'
-
-# Unset irrelevant variables.
-deactivate nondestructive
-
-setenv VIRTUAL_ENV /MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting
-
-set _OLD_VIRTUAL_PATH="$PATH"
-setenv PATH "$VIRTUAL_ENV/"bin":$PATH"
-
-
-set _OLD_VIRTUAL_PROMPT="$prompt"
-
-if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then
-    set prompt = '(.venv-linting) '"$prompt"
-    setenv VIRTUAL_ENV_PROMPT '(.venv-linting) '
-endif
-
-alias pydoc python -m pydoc
-
-rehash
--- a/.venv-linting/bin/activate.fish
+++ b/.venv-linting/bin/activate.fish
@ -1,69 +0,0 @@
-# This file must be used with "source <venv>/bin/activate.fish" *from fish*
-# (https://fishshell.com/). You cannot run it directly.
-
-function deactivate  -d "Exit virtual environment and return to normal shell environment"
-    # reset old environment variables
-    if test -n "$_OLD_VIRTUAL_PATH"
-        set -gx PATH $_OLD_VIRTUAL_PATH
-        set -e _OLD_VIRTUAL_PATH
-    end
-    if test -n "$_OLD_VIRTUAL_PYTHONHOME"
-        set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME
-        set -e _OLD_VIRTUAL_PYTHONHOME
-    end
-
-    if test -n "$_OLD_FISH_PROMPT_OVERRIDE"
-        set -e _OLD_FISH_PROMPT_OVERRIDE
-        # prevents error when using nested fish instances (Issue #93858)
-        if functions -q _old_fish_prompt
-            functions -e fish_prompt
-            functions -c _old_fish_prompt fish_prompt
-            functions -e _old_fish_prompt
-        end
-    end
-
-    set -e VIRTUAL_ENV
-    set -e VIRTUAL_ENV_PROMPT
-    if test "$argv[1]" != "nondestructive"
-        # Self-destruct!
-        functions -e deactivate
-    end
-end
-
-# Unset irrelevant variables.
-deactivate nondestructive
-
-set -gx VIRTUAL_ENV /MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting
-
-set -gx _OLD_VIRTUAL_PATH $PATH
-set -gx PATH "$VIRTUAL_ENV/"bin $PATH
-
-# Unset PYTHONHOME if set.
-if set -q PYTHONHOME
-    set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME
-    set -e PYTHONHOME
-end
-
-if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
-    # fish uses a function instead of an env var to generate the prompt.
-
-    # Save the current fish_prompt function as the function _old_fish_prompt.
-    functions -c fish_prompt _old_fish_prompt
-
-    # With the original prompt function renamed, we can override with our own.
-    function fish_prompt
-        # Save the return status of the last command.
-        set -l old_status $status
-
-        # Output the venv prompt; color taken from the blue of the Python logo.
-        printf "%s%s%s" (set_color 4B8BBE) '(.venv-linting) ' (set_color normal)
-
-        # Restore the return status of the previous command.
-        echo "exit $old_status" | .
-        # Output the original/"old" prompt.
-        _old_fish_prompt
-    end
-
-    set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV"
-    set -gx VIRTUAL_ENV_PROMPT '(.venv-linting) '
-end
--- a/.venv-linting/bin/black
+++ b/.venv-linting/bin/black
@ -1,8 +0,0 @@
-#!/MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting/bin/python3
-# -*- coding: utf-8 -*-
-import re
-import sys
-from black import patched_main
-if __name__ == '__main__':
-    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
-    sys.exit(patched_main())
--- a/.venv-linting/bin/blackd
+++ b/.venv-linting/bin/blackd
@ -1,8 +0,0 @@
-#!/MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting/bin/python3
-# -*- coding: utf-8 -*-
-import re
-import sys
-from blackd import patched_main
-if __name__ == '__main__':
-    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
-    sys.exit(patched_main())
--- a/.venv-linting/bin/isort
+++ b/.venv-linting/bin/isort
@ -1,8 +0,0 @@
-#!/MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting/bin/python3
-# -*- coding: utf-8 -*-
-import re
-import sys
-from isort.main import main
-if __name__ == '__main__':
-    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
-    sys.exit(main())
--- a/.venv-linting/bin/isort-identify-imports
+++ b/.venv-linting/bin/isort-identify-imports
@ -1,8 +0,0 @@
-#!/MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting/bin/python3
-# -*- coding: utf-8 -*-
-import re
-import sys
-from isort.main import identify_imports_main
-if __name__ == '__main__':
-    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
-    sys.exit(identify_imports_main())
--- a/.venv-linting/bin/pip
+++ b/.venv-linting/bin/pip
@ -1,8 +0,0 @@
-#!/MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting/bin/python3
-# -*- coding: utf-8 -*-
-import re
-import sys
-from pip._internal.cli.main import main
-if __name__ == '__main__':
-    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
-    sys.exit(main())
--- a/.venv-linting/bin/pip3
+++ b/.venv-linting/bin/pip3
@ -1,8 +0,0 @@
-#!/MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting/bin/python3
-# -*- coding: utf-8 -*-
-import re
-import sys
-from pip._internal.cli.main import main
-if __name__ == '__main__':
-    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
-    sys.exit(main())
--- a/.venv-linting/bin/pip3.12
+++ b/.venv-linting/bin/pip3.12
@ -1,8 +0,0 @@
-#!/MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting/bin/python3
-# -*- coding: utf-8 -*-
-import re
-import sys
-from pip._internal.cli.main import main
-if __name__ == '__main__':
-    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
-    sys.exit(main())
--- a/.venv-linting/bin/python
+++ b/.venv-linting/bin/python
@ -1 +0,0 @@
-python3
--- a/.venv-linting/bin/python3
+++ b/.venv-linting/bin/python3
@ -1 +0,0 @@
-/usr/bin/python3
--- a/.venv-linting/bin/python3.12
+++ b/.venv-linting/bin/python3.12
@ -1 +0,0 @@
-python3
--- a/.venv-linting/lib64
+++ b/.venv-linting/lib64
@ -1 +0,0 @@
-lib
--- a/.venv-linting/pyvenv.cfg
+++ b/.venv-linting/pyvenv.cfg
@ -1,5 +0,0 @@
-home = /usr/bin
-include-system-site-packages = false
-version = 3.12.3
-executable = /usr/bin/python3.12
-command = /usr/bin/python3 -m venv /MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting
--- a/ENHANCEMENTS.md
+++ b/ENHANCEMENTS.md
@ -1,31 +0,0 @@
-# FSS-Mini-RAG Enhancement Backlog
-
-## Path Resolution & UX Improvements
-
-### Current State
-```bash
-rag-mini search /full/absolute/path "query"
-```
-
-### Desired State
-```bash
-cd /my/project
-rag-mini "authentication logic"    # Auto-detects current directory, defaults to search
-rag-mini . "query"                 # Explicit current directory  
-rag-mini ../other "query"          # Relative path resolution
-```
-
-### Implementation Requirements
-1. **Auto-detect current working directory** when no path specified
-2. **Default to search command** when first argument is a query string
-3. **Proper path resolution** using `pathlib.Path.resolve()` for all relative paths
-4. **Maintain backwards compatibility** with existing explicit command syntax
-
-### Technical Details
- Modify `mini_rag/cli.py` argument parsing
- Add path resolution with `os.path.abspath()` or `pathlib.Path.resolve()`
- Make project_path optional (default to `os.getcwd()`)
- Smart command detection (if first arg doesn't match command, assume search)
-
-### Priority
-High - Significant UX improvement for daily usage
--- a/FSS_ENHANCED_QWENCODE_EVALUATION_REPORT.md
+++ b/FSS_ENHANCED_QWENCODE_EVALUATION_REPORT.md
@ -1,231 +0,0 @@
-# 🚀 FSS Enhanced QwenCode with Mini-RAG: Comprehensive Field Evaluation
-## A Technical Assessment by Michael & Bella
-
---
-
-## **EXECUTIVE SUMMARY**
-
-**Evaluators**: Michael (Technical Implementation Specialist) & Bella (Collaborative Analysis Expert)  
-**Evaluation Date**: September 4, 2025  
-**System Under Test**: FSS Enhanced QwenCode Fork with Integrated Mini-RAG Search  
-**Duration**: Extended multi-hour deep-dive testing session  
-**Total Searches Conducted**: 50+ individual queries + 12 concurrent stress test  
-
-**VERDICT**: This system represents a **paradigm shift** in agent intelligence. After extensive testing, we can confidently state that the FSS Enhanced QwenCode with Mini-RAG integration delivers on its promise of transforming agents from basic pattern-matching tools into genuinely intelligent development assistants.
-
---
-
-## **SECTION 1: ARCHITECTURAL INNOVATIONS DISCOVERED**
-
-### **Claude Code Max Integration System**
-**Michael**: "Bella, the RAG search immediately revealed something extraordinary - this isn't just a fork, it's a complete integration platform!"
-
-**Bella**: "Absolutely! The search results show a comprehensive Anthropic OAuth authentication system with native API implementation. Look at this architecture:"
-
-**Technical Details Validated by RAG**:
- **Native Anthropic API Implementation**: Complete replacement of inheritance-based systems with direct Anthropic protocol communication
- **Multi-Provider Architecture**: Robust authentication across all major AI providers with ModelOverrideManager foundation
- **OAuth2 Integration**: Full `packages/core/src/anthropic/anthropicOAuth2.ts` implementation with credential management
- **Session-Based Testing**: Advanced provider switching with fallback support and seamless model transitions
- **Authentication Infrastructure**: Complete system status shows "authentication infrastructure complete, root cause identified"
-
-**Michael**: "The test-claude-max.js file shows they've even built validation systems for Claude Code installation - this is enterprise-grade integration work!"
-
-### **Mini-RAG Semantic Intelligence Core**
-**Bella**: "But Michael, the real innovation is what we just experienced - the Mini-RAG system that made this discovery possible!"
-
-**RAG Technical Architecture Discovered**:
- **Embedding Pipeline**: Complete system documented in technical guide with advanced text processing
- **Hybrid Search Implementation**: CodeSearcher class with SearchTester harness for evaluation
- **Interactive Configuration**: Live dashboard with guided setup and configuration management
- **Fast Server Architecture**: Sophisticated port management and process handling
-
-**Michael**: "The search results show this isn't just basic RAG - they've built a comprehensive technical guide, test harnesses, and interactive configuration systems. This is production-ready infrastructure!"
-
---
-
-## **SECTION 2: PERFORMANCE BENCHMARKING RESULTS**
-
-### **Indexing Performance Analysis**
-**Bella**: "Let me read our indexing metrics while you analyze the concurrent performance data, Michael."
-
-**Validated Indexing Metrics**:
- **Files Processed**: 2,295 files across the entire QwenCode codebase
- **Chunks Generated**: 2,920 semantic chunks (1.27 chunks per file ratio)
- **Indexing Speed**: **25.5 files per second** - exceptional for semantic processing
- **Total Index Time**: 90.07 seconds for complete codebase analysis
- **Success Rate**: 100% - no failures or errors during indexing
-
-**Michael**: "That indexing speed is remarkable, Bella. Now looking at our concurrent stress test results..."
-
-### **Concurrent Search Performance Deep Dive**
-**Stress Test Specifications**:
- **Concurrent Threads**: 12 simultaneous searches using ThreadPoolExecutor
- **Query Complexity**: High-complexity technical queries (design patterns, React fiber, security headers)
- **Total Execution Time**: 8.25 seconds wall clock time
- **Success Rate**: **100%** (12/12 searches successful)
-
-**Detailed Timing Analysis**:
- **Fastest Query**: "performance monitoring OR metrics collection" - **7.019 seconds**
- **Slowest Query**: "design patterns OR factory pattern OR observer" - **8.249 seconds**
- **Median Response**: 8.089 seconds
- **Average Response**: 7.892 seconds
- **Timing Consistency**: Excellent (1.23-second spread between fastest/slowest)
-
-**Bella**: "Michael, that throughput calculation of 1.45 searches per second under maximum concurrent load is impressive for semantic search!"
-
-### **Search Quality Assessment**
-**Michael**: "Every single query returned exactly 3 relevant results with high semantic scores. No timeouts, no errors, no degraded results under load."
-
-**Quality Metrics Observed**:
- **Result Consistency**: All queries returned precisely 3 results as requested
- **Semantic Relevance**: High-quality matches across diverse technical domains
- **Zero Failure Rate**: No timeouts, errors, or degraded responses
- **Load Stability**: Performance remained stable across all concurrent threads
-
---
-
-## **SECTION 3: PRACTICAL UTILITY VALIDATION**
-
-### **Development Workflow Enhancement**
-**Bella**: "During our testing marathon, the RAG system consistently found exactly what we needed for real development scenarios."
-
-**Validated Use Cases**:
- **Build System Analysis**: Instantly located TypeScript configurations, ESLint setups, and workspace definitions
- **Security Pattern Discovery**: Found OAuth token management, authentication testing, and security reporting procedures
- **Tool Error Classification**: Comprehensive ToolErrorType enum with type-safe error handling
- **Project Structure Navigation**: Efficient discovery of VSCode IDE companion configurations and module resolution
-
-**Michael**: "What impressed me most was how it found the TokenManagerError implementation in qwenOAuth2.test.ts - that's exactly the kind of needle-in-haystack discovery that transforms development productivity!"
-
-### **Semantic Intelligence Capabilities**
-**Real-World Query Success Examples**:
- **Complex Technical Patterns**: "virtual DOM OR reconciliation OR React fiber" → Found relevant React architecture
- **Security Concerns**: "authentication bugs OR OAuth token management" → Located test scenarios and error handling
- **Performance Optimization**: "lazy loading OR code splitting" → Identified optimization opportunities
- **Architecture Analysis**: "microservices OR distributed systems" → Found relevant system design patterns
-
-**Bella**: "Every single query in our 50+ test suite returned semantically relevant results. The system understands context, not just keywords!"
-
-### **Agent Intelligence Amplification**
-**Michael**: "This is where the real magic happens - the RAG system doesn't just search, it makes the agent genuinely intelligent."
-
-**Intelligence Enhancement Observed**:
- **Contextual Understanding**: Queries about "memory leaks" found relevant performance monitoring code
- **Domain Knowledge**: Technical jargon like "JWT tokens" correctly mapped to authentication implementations  
- **Pattern Recognition**: "design patterns" searches found actual architectural pattern implementations
- **Problem-Solution Mapping**: Error-related queries found both problems and their test coverage
-
-**Bella**: "The agent went from basic pattern matching to having genuine understanding of the codebase's architecture, security patterns, and development workflows!"
-
---
-
-## **SECTION 4: ARCHITECTURAL PHILOSOPHY & INNOVATION**
-
-### **The "Agent as Synthesis Layer" Breakthrough**
-**Michael**: "Bella, our RAG search just revealed something profound - they've implemented a 'clean separation between synthesis and exploration modes' with the agent serving as the intelligent synthesis layer!"
-
-**Core Architectural Innovation Discovered**:
- **TestModeSeparation**: Clean separation between synthesis and exploration modes validated by comprehensive test suite
- **LLM Configuration**: Sophisticated `enable_synthesis: false` setting - the agent IS the synthesis, not an additional LLM layer
- **No Synthesis Bloat**: Configuration shows `synthesis_model: qwen3:1.5b` but disabled by design - agent provides better synthesis
- **Direct Integration**: Agent receives raw RAG results and performs intelligent synthesis without intermediate processing
-
-**Bella**: "This is brilliant! Instead of adding another LLM layer that would introduce noise, latency, and distortion, they made the agent the intelligent synthesis engine!"
-
-### **Competitive Advantages Identified**
-
-**Technical Superiority**:
- **Zero Synthesis Latency**: No additional LLM calls means instant intelligent responses
- **No Information Loss**: Direct access to raw search results without intermediate filtering
- **Architectural Elegance**: Clean separation of concerns with agent as intelligent processor
- **Resource Efficiency**: Single agent processing instead of multi-LLM pipeline overhead
-
-**Michael**: "This architecture choice explains why our searches felt so immediate and intelligent - there's no bloat, no noise, just pure semantic search feeding directly into agent intelligence!"
-
-### **Innovation Impact Assessment**
-**Bella**: "What we've discovered here isn't just good engineering - it's a paradigm shift in how agents should be architected."
-
-**Revolutionary Aspects**:
- **Eliminates the "Chain of Confusion"**: No LLM-to-LLM handoffs that introduce errors
- **Preserves Semantic Fidelity**: Agent receives full search context without compression or interpretation layers  
- **Maximizes Response Speed**: Single processing stage from search to intelligent response
- **Enables True Understanding**: Agent directly processes semantic chunks rather than pre-digested summaries
-
-**Michael**: "This explains why every single one of our 50+ searches returned exactly what we needed - the architecture preserves the full intelligence of both the search system and the agent!"
-
---
-
-## **FINAL ASSESSMENT & RECOMMENDATIONS**
-
-### **Executive Summary of Findings**
-**Bella**: "After conducting 50+ individual searches plus a comprehensive 12-thread concurrent stress test, we can definitively state that the FSS Enhanced QwenCode represents a breakthrough in agent intelligence architecture."
-
-**Michael**: "The numbers speak for themselves - 100% success rate, 25.5 files/second indexing, 1.45 searches/second under maximum concurrent load, and most importantly, genuine semantic understanding that transforms agent capabilities."
-
-### **Key Breakthrough Achievements**
-
-**1. Performance Excellence**
- ✅ **100% Search Success Rate** across 50+ diverse technical queries
- ✅ **25.5 Files/Second Indexing** - exceptional for semantic processing
- ✅ **Perfect Concurrent Scaling** - 12 simultaneous searches without failures
- ✅ **Consistent Response Times** - 7-8 second range under maximum load
-
-**2. Architectural Innovation**
- ✅ **Agent-as-Synthesis-Layer** design eliminates LLM chain confusion
- ✅ **Zero Additional Latency** from unnecessary synthesis layers
- ✅ **Direct Semantic Access** preserves full search intelligence
- ✅ **Clean Mode Separation** validated by comprehensive test suites
-
-**3. Practical Intelligence**
- ✅ **True Semantic Understanding** beyond keyword matching
- ✅ **Contextual Problem-Solution Mapping** for real development scenarios
- ✅ **Technical Domain Expertise** across security, architecture, and DevOps
- ✅ **Needle-in-Haystack Discovery** of specific implementations and patterns
-
-### **Comparative Analysis**
-**Bella**: "What makes this system revolutionary is not just what it does, but what it doesn't do - it avoids the common pitfall of over-engineering that plagues most RAG implementations."
-
-**FSS Enhanced QwenCode vs. Traditional RAG Systems**:
- **Traditional**: Search → LLM Synthesis → Agent Processing (3 stages, information loss, latency)
- **FSS Enhanced**: Search → Direct Agent Processing (1 stage, full fidelity, immediate response)
-
-**Michael**: "This architectural choice explains why our testing felt so natural and efficient - the system gets out of its own way and lets the agent be intelligent!"
-
-### **Deployment Recommendations**
-
-**Immediate Production Readiness**:
- ✅ **Enterprise Development Teams**: Proven capability for complex codebases
- ✅ **Security-Critical Environments**: Robust OAuth and authentication pattern discovery  
- ✅ **High-Performance Requirements**: Demonstrated concurrent processing capabilities
- ✅ **Educational/Research Settings**: Excellent for understanding unfamiliar codebases
-
-**Scaling Considerations**:
- **Small Teams (1-5 developers)**: System easily handles individual development workflows
- **Medium Teams (5-20 developers)**: Concurrent capabilities support team-level usage
- **Large Organizations**: Architecture supports distributed deployment with consistent performance
-
-### **Innovation Impact**
-**Bella & Michael (Joint Assessment)**: "The FSS Enhanced QwenCode with Mini-RAG integration represents a paradigm shift from pattern-matching agents to genuinely intelligent development assistants."
-
-**Industry Implications**:
- **Development Productivity**: Transforms agent capability from basic automation to intelligent partnership
- **Knowledge Management**: Makes complex codebases instantly searchable and understandable
- **Architecture Standards**: Sets new benchmark for agent intelligence system design
- **Resource Efficiency**: Proves that intelligent architecture outperforms brute-force processing
-
-### **Final Verdict**
-**🏆 EXCEPTIONAL - PRODUCTION READY - PARADIGM SHIFTING 🏆**
-
-After extensive multi-hour testing with comprehensive performance benchmarking, we conclude that the FSS Enhanced QwenCode system delivers on its ambitious promise of transforming agent intelligence. The combination of blazing-fast semantic search, elegant architectural design, and genuine intelligence amplification makes this system a breakthrough achievement in agent development.
-
-**Recommendation**: **IMMEDIATE ADOPTION** for teams seeking to transform their development workflow with truly intelligent agent assistance.
-
---
-
-**Report Authors**: Michael (Technical Implementation Specialist) & Bella (Collaborative Analysis Expert)  
-**Evaluation Completed**: September 4, 2025  
-**Total Testing Duration**: 4+ hours comprehensive analysis  
-**System Status**: ✅ **PRODUCTION READY** ✅
-
---
--- a/GET_STARTED.md
+++ b/GET_STARTED.md
@ -0,0 +1,83 @@
+# 🚀 FSS-Mini-RAG: Get Started in 2 Minutes
+
+## Step 1: Install Everything
+```bash
+./install_mini_rag.sh
+```
+**That's it!** The installer handles everything automatically:
+- Checks Python installation
+- Sets up virtual environment  
+- Guides you through Ollama setup
+- Installs dependencies
+- Tests everything works
+
+## Step 2: Use It
+
+### TUI - Interactive Interface (Easiest)
+```bash
+./rag-tui
+```
+**Perfect for beginners!** Menu-driven interface that:
+- Shows you CLI commands as you use it
+- Guides you through setup and configuration
+- No need to memorize commands
+
+### Quick Commands (Beginner-Friendly)
+```bash
+# Index any project
+./run_mini_rag.sh index ~/my-project
+
+# Search your code  
+./run_mini_rag.sh search ~/my-project "authentication logic"
+
+# Check what's indexed
+./run_mini_rag.sh status ~/my-project
+```
+
+### Full Commands (More Options)
+```bash
+# Basic indexing and search
+./rag-mini index /path/to/project
+./rag-mini search /path/to/project "database connection"
+
+# Enhanced search with smart features
+./rag-mini-enhanced search /path/to/project "UserManager"
+./rag-mini-enhanced similar /path/to/project "def validate_input"
+```
+
+## What You Get
+
+**Semantic Search**: Instead of exact text matching, finds code by meaning:
+- Search "user login" → finds authentication functions, session management, password validation
+- Search "database queries" → finds SQL, ORM code, connection handling  
+- Search "error handling" → finds try/catch blocks, error classes, logging
+
+## Installation Options
+
+The installer offers two choices:
+
+**Light Installation (Recommended)**:
+- Uses Ollama for high-quality embeddings
+- Requires Ollama installed (installer guides you)
+- Small download (~50MB)
+
+**Full Installation**:  
+- Includes ML fallback models
+- Works without Ollama
+- Large download (~2-3GB)
+
+## Troubleshooting
+
+**"Python not found"**: Install Python 3.8+ from python.org
+**"Ollama not found"**: Visit https://ollama.ai/download
+**"Import errors"**: Re-run `./install_mini_rag.sh`
+
+## Next Steps
+
+- **Technical Details**: Read `README.md`
+- **Step-by-Step Guide**: Read `docs/GETTING_STARTED.md`
+- **Examples**: Check `examples/` directory
+- **Test It**: Run on this project: `./run_mini_rag.sh index .`
+
+---
+**Questions?** Everything is documented in the README.md file.
--- a/GITHUB_ACTIONS_ANALYSIS.md
+++ b/GITHUB_ACTIONS_ANALYSIS.md
@ -1,149 +0,0 @@
-# GitHub Actions Workflow Analysis
-
-## ✅ **Overall Status: EXCELLENT**
-
-Your GitHub Actions workflow is **professionally configured** and ready for production use. Here's the comprehensive analysis:
-
-## 🏗️ **Workflow Architecture**
-
-### **Jobs Overview (5 total)**
-1. **`build-wheels`** - Cross-platform wheel building
-2. **`build-zipapp`** - Portable single-file distribution  
-3. **`test-installation`** - Installation method validation
-4. **`publish`** - PyPI publishing (tag triggers only)
-5. **`create-release`** - GitHub release with assets
-
-### **Trigger Configuration**
- ✅ **Tag pushes** (`v*`) → Full release pipeline
- ✅ **Main branch pushes** → Build and test only
- ✅ **Pull requests** → Build and test only  
- ✅ **Manual dispatch** → On-demand execution
-
-## 🛠️ **Technical Excellence**
-
-### **Build Matrix Coverage**
- **Operating Systems**: Ubuntu, Windows, macOS (Intel + ARM)
- **Python Versions**: 3.8, 3.11, 3.12 (optimized matrix)
- **Architecture Coverage**: x86_64, ARM64 (macOS), AMD64 (Windows)
-
-### **Quality Assurance**
- ✅ **Automated testing** of built wheels
- ✅ **Cross-platform validation** 
- ✅ **Zipapp functionality testing**
- ✅ **Installation method verification**
-
-### **Security Best Practices**
- ✅ **Release environment protection** for PyPI publishing
- ✅ **Secret management** (PYPI_API_TOKEN)
- ✅ **Conditional publishing** (tag-only)
- ✅ **Latest action versions** (updated to v4)
-
-## 📦 **Distribution Outputs**
-
-### **Automated Builds**
- **Cross-platform wheels** for all major OS/Python combinations
- **Source distribution** (`.tar.gz`)
- **Portable zipapp** (`rag-mini.pyz`) for no-Python-knowledge users
- **GitHub releases** with comprehensive installation instructions
-
-### **Professional Release Experience**
-The workflow automatically creates releases with:
- Installation options for all user types
- Pre-built binaries for immediate use
- Clear documentation and instructions
- Changelog generation
-
-## 🚀 **Performance & Efficiency**
-
-### **Runtime Estimation**
- **Total build time**: ~45-60 minutes per release
- **Parallel execution** where possible
- **Efficient matrix strategy** (excludes unnecessary combinations)
-
-### **Cost Management** 
- **GitHub Actions free tier**: 2000 minutes/month
- **Estimated capacity**: ~30-40 releases/month
- **Optimized for open source** usage patterns
-
-## 🔧 **Minor Improvements Made**
-
-✅ **Updated to latest action versions**:
- `upload-artifact@v3` → `upload-artifact@v4`
- `download-artifact@v3` → `download-artifact@v4`
-
-## ⚠️ **Setup Requirements**
-
-### **Required Secrets (Manual Setup)**
-1. **`PYPI_API_TOKEN`** - Required for PyPI publishing
-   - Go to PyPI.org → Account Settings → API Tokens
-   - Create token with 'Entire account' scope  
-   - Add to GitHub repo → Settings → Secrets → Actions
-
-2. **`GITHUB_TOKEN`** - Automatically provided ✅
-
-### **Optional Enhancements**
- TestPyPI token (`TESTPYPI_API_TOKEN`) for safe testing
- Release environment protection rules
- Slack/Discord notifications for releases
-
-## 🧪 **Testing Strategy**
-
-### **What Gets Tested**
- ✅ Wheel builds across all platforms
- ✅ Installation from built wheels
- ✅ Basic CLI functionality (`--help`)
- ✅ Zipapp execution
-
-### **Test Matrix Optimization**
- Smart exclusions (no Python 3.8 on Windows/macOS)
- Essential combinations only
- ARM64 test skipping (emulation issues)
-
-## 📊 **Workflow Comparison**
-
-**Before**: Manual builds, no automation, inconsistent releases  
-**After**: Professional CI/CD with:
- Automated cross-platform building
- Quality validation at every step  
- Professional release assets
- User-friendly installation options
-
-## 🎯 **Production Readiness Score: 95/100**
-
-### **Excellent (95%)**
- ✅ Comprehensive build matrix
- ✅ Professional security practices  
- ✅ Quality testing integration
- ✅ User-friendly release automation
- ✅ Cost-effective configuration
-
-### **Minor Points (-5%)**
- Could add caching for faster builds
- Could add Slack/email notifications
- Could add TestPyPI integration
-
-## 📋 **Next Steps for Deployment**
-
-### **Immediate (Required)**
-1. **Set up PyPI API token** in GitHub Secrets
-2. **Test with release tag**: `git tag v2.1.0-test && git push origin v2.1.0-test`
-3. **Monitor workflow execution** in GitHub Actions tab
-
-### **Optional (Enhancements)**  
-1. Set up TestPyPI for safe testing
-2. Configure release environment protection
-3. Add build caching for faster execution
-
-## 🏆 **Conclusion**
-
-Your GitHub Actions workflow is **exceptionally well-designed** and follows industry best practices. It's ready for immediate production use and will provide FSS-Mini-RAG users with a professional installation experience.
-
-**The workflow transforms your project from a development tool into enterprise-grade software** with automated quality assurance and professional distribution.
-
-**Status**: ✅ **PRODUCTION READY**  
-**Confidence Level**: **Very High (95%)**  
-**Recommendation**: **Deploy immediately after setting up PyPI token**
-
---
-
-*Analysis completed 2025-01-06. Workflow validated and optimized for production use.* 🚀
--- a/IMPLEMENTATION_COMPLETE.md
+++ b/IMPLEMENTATION_COMPLETE.md
@ -1,216 +0,0 @@
-# FSS-Mini-RAG Distribution System: Implementation Complete 🚀
-
-## 🎯 **Mission Accomplished: Professional Distribution System**
-
-We've successfully transformed FSS-Mini-RAG from a development tool into a **production-ready package with modern distribution**. The comprehensive testing approach revealed exactly what we needed to know.
-
-## 📊 **Final Results Summary**
-
-### ✅ **What Works (Ready for Production)**
-
-#### **Distribution Infrastructure** 
- **Enhanced pyproject.toml** with complete PyPI metadata ✅
- **One-line install scripts** for Linux/macOS/Windows ✅  
- **Smart fallback system** (uv → pipx → pip) ✅
- **GitHub Actions workflow** for automated publishing ✅
- **Zipapp builder** creating 172.5 MB portable distribution ✅
-
-#### **Testing & Quality Assurance**
- **4/6 local validation tests passed** ✅
- **Install scripts syntactically valid** ✅
- **Metadata consistency across all files** ✅
- **Professional documentation** ✅
- **Comprehensive testing framework** ✅
-
-### ⚠️ **What Needs External Testing**
-
-#### **Environment-Specific Validation**
- **Package building** in clean environments
- **Cross-platform compatibility** (Windows/macOS)
- **Real-world installation scenarios**
- **GitHub Actions workflow execution**
-
-## 🛠️ **What We Built**
-
-### **1. Modern Installation Experience**
-
-**Before**: Clone repo, create venv, install requirements, run from source  
-**After**: One command installs globally available `rag-mini` command
-
-```bash
-# Linux/macOS - Just works everywhere
-curl -fsSL https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.sh | bash
-
-# Windows - PowerShell one-liner  
-iwr https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.ps1 -UseBasicParsing | iex
-
-# Or manual methods
-uv tool install fss-mini-rag      # Fastest
-pipx install fss-mini-rag         # Isolated
-pip install --user fss-mini-rag   # Traditional
-```
-
-### **2. Professional CI/CD Pipeline**
-
- **Cross-platform wheel building** (Linux/Windows/macOS)
- **Automated PyPI publishing** on release tags
- **TestPyPI integration** for safe testing
- **Release asset creation** with portable zipapp
-
-### **3. Bulletproof Fallback System**
-
-Install scripts intelligently try:
-1. **uv** - Ultra-fast modern package manager
-2. **pipx** - Isolated tool installation  
-3. **pip** - Traditional Python package manager
-
-Each method is tested and verified before falling back to the next.
-
-### **4. Multiple Distribution Formats**
-
- **PyPI packages** (source + wheels) for standard installation
- **Portable zipapp** (172.5 MB) for no-Python-knowledge users
- **GitHub releases** with all assets automatically generated
-
-## 🧪 **Testing Methodology**
-
-Our **"Option B: Proper Testing"** approach created:
-
-### **Comprehensive Testing Framework**
- **Phase 1**: Local validation (structure, syntax, metadata) ✅
- **Phase 2**: Build system testing (packages, zipapp) ✅
- **Phase 3**: Container-based testing (clean environments) 📋
- **Phase 4**: Cross-platform validation (Windows/macOS) 📋
- **Phase 5**: Production testing (TestPyPI, real workflows) 📋
-
-### **Testing Tools Created**
- `scripts/validate_setup.py` - File structure validation
- `scripts/phase1_basic_tests.py` - Import and structure tests  
- `scripts/phase1_local_validation.py` - Local environment testing
- `scripts/phase2_build_tests.py` - Package building tests
- `scripts/phase1_container_tests.py` - Docker-based testing (ready)
-
-### **Documentation Suite**
- `docs/TESTING_PLAN.md` - 50+ page comprehensive testing specification
- `docs/DEPLOYMENT_ROADMAP.md` - Phase-by-phase production deployment
- `TESTING_RESULTS.md` - Current status and validated components
- **Updated README.md** - Modern installation methods prominently featured
-
-## 🎪 **The Big Picture**
-
-### **Before Our Work**
-FSS-Mini-RAG was a **development tool** requiring:
- Git clone
- Virtual environment setup
- Dependency installation
- Running from source directory
- Python/development knowledge
-
-### **After Our Work**  
-FSS-Mini-RAG is a **professional software package** with:
- **One-line installation** on any system
- **Global `rag-mini` command** available everywhere
- **Automatic dependency management**
- **Cross-platform compatibility**
- **Professional CI/CD pipeline**
- **Multiple installation options**
-
-## 🚀 **Ready for Production**
-
-### **What We've Proven**
- ✅ **Infrastructure is solid** (4/6 tests passed locally)
- ✅ **Scripts are syntactically correct**
- ✅ **Metadata is consistent**
- ✅ **Zipapp builds successfully**
- ✅ **Distribution system is complete**
-
-### **What Needs External Validation**
- **Clean environment testing** (GitHub Codespaces/Docker)
- **Cross-platform compatibility** (Windows/macOS)
- **Real PyPI publishing workflow**
- **User experience validation**
-
-## 📋 **Next Steps (For Production Release)**
-
-### **Phase A: External Testing (2-3 days)**
-```bash
-# Test in GitHub Codespaces or clean VM
-git clone https://github.com/fsscoding/fss-mini-rag
-cd fss-mini-rag
-
-# Test install script
-curl -fsSL file://$(pwd)/install.sh | bash
-rag-mini --help
-
-# Test builds
-python -m venv .venv && source .venv/bin/activate
-pip install -r requirements.txt
-python -m build
-```
-
-### **Phase B: TestPyPI Trial (1 day)**
-```bash
-# Safe production test
-python -m twine upload --repository testpypi dist/*
-pip install --index-url https://test.pypi.org/simple/ fss-mini-rag
-```
-
-### **Phase C: Production Release (1 day)**
-```bash
-# Create release tag - GitHub Actions handles the rest
-git tag v2.1.0
-git push origin v2.1.0
-```
-
-## 💡 **Key Insights**
-
-### **You Were Absolutely Right**
-Calling out the quick implementation was spot-on. Building the infrastructure was the easy part - **proper testing is what ensures user success**.
-
-### **Systematic Approach Works**
-The comprehensive testing plan identified exactly what works and what needs validation, giving us confidence in the infrastructure while highlighting real testing needs.
-
-### **Professional Standards Matter**
-Moving from "works on my machine" to "works for everyone" requires this level of systematic validation. The distribution system we built meets professional standards.
-
-## 🏆 **Achievement Summary**
-
-### **Technical Achievements**
- ✅ Modern Python packaging best practices
- ✅ Cross-platform distribution system  
- ✅ Automated CI/CD pipeline
- ✅ Multiple installation methods
- ✅ Professional documentation
- ✅ Comprehensive testing framework
-
-### **User Experience Achievements**  
- ✅ One-line installation from README
- ✅ Global command availability
- ✅ Clear error messages and fallbacks
- ✅ No Python knowledge required
- ✅ Works across operating systems
-
-### **Maintenance Achievements**
- ✅ Automated release process
- ✅ Systematic testing approach
- ✅ Clear deployment procedures
- ✅ Issue tracking and resolution
- ✅ Professional support workflows
-
-## 🌟 **Final Status**
-
-**Infrastructure**: ✅ Complete and validated  
-**Testing**: ⚠️ Local validation passed, external testing needed  
-**Documentation**: ✅ Professional and comprehensive  
-**CI/CD**: ✅ Ready for production workflows  
-**User Experience**: ✅ Modern and professional  
-
-**Recommendation**: **PROCEED TO EXTERNAL TESTING** 🚀
-
-The distribution system is ready for production. The testing framework ensures we can validate and deploy confidently. FSS-Mini-RAG now has the professional distribution system it deserves.
-
---
-
-*Implementation completed 2025-01-06. From development tool to professional software package.* 
-
-**Next milestone: External testing and production release** 🎯
--- a/INSTALL_SIMPLE.sh
+++ b/INSTALL_SIMPLE.sh
@ -1,16 +0,0 @@
-#!/bin/bash
-# Ultra-simple FSS-Mini-RAG setup that just works
-set -e
-
-echo "🚀 FSS-Mini-RAG Simple Setup"
-
-# Create symlink for global access
-if [ ! -f /usr/local/bin/rag-mini ]; then
-    sudo ln -sf "$(pwd)/rag-mini" /usr/local/bin/rag-mini
-    echo "✅ Global rag-mini command created"
-fi
-
-# Just make sure we have the basic requirements
-python3 -m pip install --user click rich lancedb pandas numpy pyarrow watchdog requests PyYAML rank-bm25 psutil
-
-echo "✅ Done! Try: rag-mini --help"
--- a/LAUNCH_CHECKLIST.txt
+++ b/LAUNCH_CHECKLIST.txt
@ -1,48 +0,0 @@
-FSS-Mini-RAG PyPI Launch Checklist
-
-PRE-LAUNCH (30 minutes):
-□ PyPI account created and verified
-□ PyPI API token generated (entire account scope)
-□ GitHub Secret PYPI_API_TOKEN added
-□ All files committed and pushed to GitHub
-□ Working directory clean (git status)
-
-TEST LAUNCH (45-60 minutes):
-□ Create test tag: git tag v2.1.0-test
-□ Push test tag: git push origin v2.1.0-test
-□ Monitor GitHub Actions workflow
-□ Verify test package on PyPI
-□ Test installation: pip install fss-mini-rag==2.1.0-test
-□ Verify CLI works: rag-mini --help
-
-PRODUCTION LAUNCH (45-60 minutes):
-□ Create production tag: git tag v2.1.0
-□ Push production tag: git push origin v2.1.0
-□ Monitor GitHub Actions workflow
-□ Verify package on PyPI: https://pypi.org/project/fss-mini-rag/
-□ Test installation: pip install fss-mini-rag
-□ Verify GitHub release created with assets
-
-POST-LAUNCH VALIDATION (30 minutes):
-□ Test one-line installer (Linux/macOS)
-□ Test PowerShell installer (Windows, if available)
-□ Verify all documentation links work
-□ Check package metadata on PyPI
-□ Test search: pip search fss-mini-rag (if available)
-
-SUCCESS CRITERIA:
-□ PyPI package published and installable
-□ CLI command works after installation
-□ GitHub release has professional appearance
-□ All installation methods documented and working
-□ No broken links in documentation
-
-EMERGENCY CONTACTS:
- PyPI Support: https://pypi.org/help/
- GitHub Actions Status: https://www.githubstatus.com/
- Python Packaging Guide: https://packaging.python.org/
-
-ROLLBACK PROCEDURES:
- Yank PyPI release if critical issues found
- Delete and recreate tags if needed
- Re-run failed GitHub Actions workflows
--- a/21
+++ b/21
@ -1,21 +0,0 @@
-MIT License
-
-Copyright (c) 2025 Brett Fox
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
--- a/48
+++ b/48
@ -1,48 +0,0 @@
-# FSS-Mini-RAG Development Makefile
-
-.PHONY: help build test install clean dev-install test-dist build-pyz test-install-local
-
-help: ## Show this help message
-	@echo "FSS-Mini-RAG Development Commands"
-	@echo "================================="
-	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'
-
-dev-install: ## Install in development mode
-	pip install -e .
-	@echo "✅ Installed in development mode. Use 'rag-mini --help' to test."
-
-build: ## Build source distribution and wheel
-	python -m build
-	@echo "✅ Built distribution packages in dist/"
-
-build-pyz: ## Build portable .pyz file
-	python scripts/build_pyz.py
-	@echo "✅ Built portable zipapp: dist/rag-mini.pyz"
-
-test-dist: ## Test all distribution methods  
-	python scripts/validate_setup.py
-
-test-install-local: ## Test local installation with pip
-	pip install dist/*.whl --force-reinstall
-	rag-mini --help
-	@echo "✅ Local wheel installation works"
-
-clean: ## Clean build artifacts
-	rm -rf build/ dist/ *.egg-info/ __pycache__/
-	find . -name "*.pyc" -delete
-	find . -name "__pycache__" -type d -exec rm -rf {} + 2>/dev/null || true
-	@echo "✅ Cleaned build artifacts"
-
-install: ## Build and install locally
-	$(MAKE) build
-	pip install dist/*.whl --force-reinstall
-	@echo "✅ Installed latest build"
-
-test: ## Run basic functionality tests
-	rag-mini --help
-	@echo "✅ Basic tests passed"
-
-all: clean build build-pyz test-dist ## Clean, build everything, and test
-
-# Development workflow
-dev: dev-install test ## Set up development environment and test
--- a/PYPI_LAUNCH_PLAN.md
+++ b/PYPI_LAUNCH_PLAN.md
@ -1,287 +0,0 @@
-# FSS-Mini-RAG PyPI Launch Plan - 6 Hour Timeline
-
-## 🎯 **LAUNCH STATUS: READY**
-
-**Confidence Level**: 95% - Your setup is professionally configured and tested  
-**Risk Level**: VERY LOW - Multiple safety nets and rollback options  
-**Timeline**: 6 hours is **conservative** - could launch in 2-3 hours if needed
-
---
-
-## ⏰ **6-Hour Launch Timeline**
-
-### **HOUR 1-2: Setup & Preparation** (30 minutes actual work)
- [ ] PyPI account setup (5 min)
- [ ] API token generation (5 min) 
- [ ] GitHub Secrets configuration (5 min)
- [ ] Pre-launch verification (15 min)
-
-### **HOUR 2-3: Test Launch** (45 minutes)
- [ ] Create test tag `v2.1.0-test` (2 min)
- [ ] Monitor GitHub Actions workflow (40 min automated)
- [ ] Verify test PyPI upload (3 min)
-
-### **HOUR 3-4: Production Launch** (60 minutes)  
- [ ] Create production tag `v2.1.0` (2 min)
- [ ] Monitor production workflow (50 min automated)
- [ ] Verify PyPI publication (5 min)
- [ ] Test installations (3 min)
-
-### **HOUR 4-6: Validation & Documentation** (30 minutes)
- [ ] Cross-platform installation testing (20 min)
- [ ] Update documentation (5 min)
- [ ] Announcement preparation (5 min)
-
---
-
-## 🔒 **Pre-Launch Safety Verification**
-
-### **Current Status Check** ✅
-Your FSS-Mini-RAG has:
- ✅ **Professional pyproject.toml** with complete PyPI metadata
- ✅ **GitHub Actions workflow** tested and optimized (95/100 score)
- ✅ **Cross-platform installers** with smart fallbacks
- ✅ **Comprehensive testing** across Python 3.8-3.12
- ✅ **Security best practices** (release environments, secret management)
- ✅ **Professional documentation** and user experience
-
-### **No-Blunder Safety Nets** 🛡️
- **Test releases first** - `v2.1.0-test` validates everything before production
- **Automated quality gates** - GitHub Actions prevents broken releases  
- **PyPI rollback capability** - Can yank/delete releases if needed
- **Multiple installation paths** - Failures in one method don't break others
- **Comprehensive testing** - Catches issues before users see them
-
---
-
-## 📋 **DISCRETE STEP-BY-STEP PROCEDURE**
-
-### **PHASE 1: PyPI Account Setup** (10 minutes)
-
-#### **Step 1.1: Create PyPI Account**
-1. Go to: https://pypi.org/account/register/
-2. **Username**: Choose professional username (suggest: `fsscoding` or similar)
-3. **Email**: Use your development email
-4. **Verify email** (check inbox)
-
-#### **Step 1.2: Generate API Token**
-1. **Login** to PyPI
-2. **Account Settings** → **API tokens**
-3. **Add API token**:
-   - **Token name**: `fss-mini-rag-github-actions`
-   - **Scope**: `Entire account` (will change to project-specific after first upload)
-4. **Copy token** (starts with `pypi-...`) - **SAVE SECURELY**
-
-#### **Step 1.3: GitHub Secrets Configuration**
-1. **GitHub**: Go to your FSS-Mini-RAG repository
-2. **Settings** → **Secrets and variables** → **Actions**
-3. **New repository secret**:
-   - **Name**: `PYPI_API_TOKEN`
-   - **Value**: Paste the PyPI token
-4. **Add secret**
-
-### **PHASE 2: Pre-Launch Verification** (15 minutes)
-
-#### **Step 2.1: Workflow Verification**
-```bash
-# Check GitHub Actions is enabled
-gh api repos/:owner/:repo/actions/permissions
-
-# Verify latest workflow file
-gh workflow list
-
-# Check recent runs
-gh run list --limit 3
-```
-
-#### **Step 2.2: Local Package Verification**
-```bash
-# Verify package can be built locally (optional safety check)
-python -m build --sdist
-ls dist/  # Should show .tar.gz file
-
-# Clean up test build
-rm -rf dist/ build/ *.egg-info/
-```
-
-#### **Step 2.3: Version Verification**
-```bash
-# Confirm current version in pyproject.toml
-grep "version = " pyproject.toml
-# Should show: version = "2.1.0"
-```
-
-### **PHASE 3: Test Launch** (45 minutes)
-
-#### **Step 3.1: Create Test Release**
-```bash
-# Create and push test tag
-git tag v2.1.0-test
-git push origin v2.1.0-test
-```
-
-#### **Step 3.2: Monitor Test Workflow** (40 minutes automated)
-1. **GitHub Actions**: Go to Actions tab
-2. **Watch workflow**: "Build and Release" should start automatically
-3. **Expected jobs**: 
-   - `build-wheels` (20 min)
-   - `test-installation` (15 min)  
-   - `publish` (3 min)
-   - `create-release` (2 min)
-
-#### **Step 3.3: Verify Test Results**
-```bash
-# Check PyPI test package
-# Visit: https://pypi.org/project/fss-mini-rag/
-# Should show version 2.1.0-test
-
-# Test installation
-pip install fss-mini-rag==2.1.0-test
-rag-mini --help  # Should work
-pip uninstall fss-mini-rag -y
-```
-
-### **PHASE 4: Production Launch** (60 minutes)
-
-#### **Step 4.1: Create Production Release**
-```bash
-# Create and push production tag
-git tag v2.1.0
-git push origin v2.1.0
-```
-
-#### **Step 4.2: Monitor Production Workflow** (50 minutes automated)
- **Same monitoring as test phase**
- **Higher stakes but identical process**
- **All quality gates already passed in test**
-
-#### **Step 4.3: Verify Production Success**
-```bash
-# Check PyPI production package
-# Visit: https://pypi.org/project/fss-mini-rag/
-# Should show version 2.1.0 (no -test suffix)
-
-# Test all installation methods
-pip install fss-mini-rag
-rag-mini --help
-
-pipx install fss-mini-rag  
-rag-mini --help
-
-# Test one-line installer
-curl -fsSL https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.sh | bash
-```
-
-### **PHASE 5: Launch Validation** (30 minutes)
-
-#### **Step 5.1: Cross-Platform Testing** (20 minutes)
- **Linux**: Already tested above ✅
- **macOS**: Test on Mac if available, or trust CI/CD
- **Windows**: Test PowerShell installer if available
-
-#### **Step 5.2: Documentation Update** (5 minutes)
-```bash
-# Update README if needed (already excellent)
-# Verify GitHub release looks professional
-# Check all links work
-```
-
-#### **Step 5.3: Success Confirmation** (5 minutes)
-```bash
-# Final verification
-pip search fss-mini-rag  # May not work (PyPI removed search)
-# Or check PyPI web interface
-
-# Check GitHub release assets
-# Verify all installation methods documented
-```
-
---
-
-## 🚨 **Emergency Procedures**
-
-### **If Test Launch Fails**
-1. **Check GitHub Actions logs**: Identify specific failure
-2. **Common fixes**:
-   - **Token issue**: Re-create PyPI token
-   - **Build failure**: Check pyproject.toml syntax
-   - **Test failure**: Review test commands
-3. **Fix and retry**: New test tag `v2.1.0-test2`
-
-### **If Production Launch Fails**
-1. **Don't panic**: Test launch succeeded, so issue is minor
-2. **Quick fixes**:
-   - **Re-run workflow**: Use GitHub Actions re-run
-   - **Token refresh**: Update GitHub secret
-3. **Nuclear option**: Delete tag, fix issue, re-tag
-
-### **If PyPI Package Issues**
-1. **Yank release**: PyPI allows yanking problematic releases
-2. **Upload new version**: 2.1.1 with fixes
-3. **Package stays available**: Users can still install if needed
-
---
-
-## ✅ **SUCCESS CRITERIA**
-
-### **Launch Successful When**:
- [ ] **PyPI package**: https://pypi.org/project/fss-mini-rag/ shows v2.1.0
- [ ] **pip install works**: `pip install fss-mini-rag`
- [ ] **CLI functional**: `rag-mini --help` works after install
- [ ] **GitHub release**: Professional release with assets
- [ ] **One-line installers**: Shell scripts work correctly
-
-### **Quality Indicators**:
- [ ] **Professional PyPI page**: Good description, links, metadata
- [ ] **Cross-platform wheels**: Windows, macOS, Linux packages
- [ ] **Quick installation**: All methods work in under 2 minutes
- [ ] **No broken links**: All URLs in documentation work
- [ ] **Clean search results**: Google/PyPI search shows proper info
-
---
-
-## 🎯 **LAUNCH DECISION MATRIX**
-
-### **GO/NO-GO Criteria**
-
-| Criteria | Status | Risk Level |
-|----------|---------|------------|
-| GitHub Actions workflow tested | ✅ PASS | 🟢 LOW |
-| PyPI API token configured | ⏳ SETUP | 🟢 LOW |
-| Professional documentation | ✅ PASS | 🟢 LOW |
-| Cross-platform testing | ✅ PASS | 🟢 LOW |
-| Security best practices | ✅ PASS | 🟢 LOW |
-| Rollback procedures ready | ✅ PASS | 🟢 LOW |
-
-### **Final Recommendation**: 🚀 **GO FOR LAUNCH**
-
-**Confidence**: 95%  
-**Risk**: VERY LOW  
-**Timeline**: Conservative 6 hours, likely 3-4 hours actual  
-**Blunder Risk**: MINIMAL - Comprehensive safety nets in place
-
---
-
-## 🎉 **POST-LAUNCH SUCCESS PLAN**
-
-### **Immediate Actions** (Within 1 hour)
- [ ] Verify all installation methods work
- [ ] Check PyPI package page looks professional  
- [ ] Test on at least 2 different machines/environments
- [ ] Update any broken links or documentation
-
-### **Within 24 Hours**
- [ ] Monitor PyPI download statistics
- [ ] Watch for GitHub Issues from early users
- [ ] Prepare social media announcement (if desired)
- [ ] Document lessons learned
-
-### **Within 1 Week**
- [ ] Restrict PyPI API token to project-specific scope
- [ ] Set up monitoring for package health
- [ ] Plan first maintenance release (2.1.1) if needed
- [ ] Celebrate the successful launch! 🎊
-
---
-
-**BOTTOM LINE**: FSS-Mini-RAG is exceptionally well-prepared for PyPI launch. Your professional setup provides multiple safety nets, and 6 hours is a conservative timeline. **You can absolutely launch without blunder.** 🚀
--- a/README.md
+++ b/README.md
@ -1,74 +1,26 @@
-# FSS-Mini-RAG <img src="assets/Fss_Mini_Rag.png" alt="FSS-Mini-RAG Logo" width="40" height="40">
+# FSS-Mini-RAG

 > **A lightweight, educational RAG system that actually works**  
 > *Built for beginners who want results, and developers who want to understand how RAG really works*

-## 🚀 **Quick Start - Install in 30 Seconds**
-
-**Linux/macOS** (tested on Ubuntu 22.04, macOS 13+):
-```bash
-curl -fsSL https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.sh | bash
-```
-
-**Windows** (tested on Windows 10/11):
-```powershell
-iwr https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.ps1 -UseBasicParsing | iex
-```
-
-**Then immediately start using it:**
-```bash
-# Create your first RAG index
-rag-mini init
-
-# Search your codebase  
-rag-mini search "authentication logic"
-```
-
-*These installers automatically handle dependencies and provide helpful guidance if anything goes wrong.*
-
-## Demo
-
-![FSS-Mini-RAG Demo](recordings/fss-mini-rag-demo-20250812_161410.gif)
-
-*See it in action: index a project and search semantically in seconds*
+![FSS-Mini-RAG Icon](assets/icon.png)

 ## How It Works

 ```mermaid
-flowchart TD
-    Start([🚀 Start FSS-Mini-RAG]) --> Interface{Choose Interface}
+graph LR
+    Files[📁 Your Code] --> Index[🔍 Index]
+    Index --> Chunks[✂️ Smart Chunks]
+    Chunks --> Embeddings[🧠 Semantic Vectors]
+    Embeddings --> Database[(💾 Vector DB)]
    
-    Interface -->|Beginners| TUI[🖥️ Interactive TUI<br/>./rag-tui]
-    Interface -->|Power Users| CLI[⚡ Advanced CLI<br/>./rag-mini <command>]
+    Query[❓ "user auth"] --> Search[🎯 Hybrid Search]
+    Database --> Search
+    Search --> Results[📋 Ranked Results]
    
-    TUI --> SelectFolder[📁 Select Folder to Index]
-    CLI --> SelectFolder
-    
-    SelectFolder --> Index[🔍 Index Documents<br/>Creates searchable database]
-    
-    Index --> Ready{📚 Ready to Search}
-    
-    Ready -->|Quick Answers| Search[🔍 Search Mode<br/>Fast semantic search]
-    Ready -->|Deep Analysis| Explore[🧠 Explore Mode<br/>AI-powered analysis]
-    
-    Search --> SearchResults[📋 Instant Results<br/>Ranked by relevance]
-    Explore --> ExploreResults[💬 AI Conversation<br/>Context + reasoning]
-    
-    SearchResults --> More{Want More?}
-    ExploreResults --> More
-    
-    More -->|Different Query| Ready
-    More -->|Advanced Features| CLI
-    More -->|Done| End([✅ Success!])
-    
-    CLI -.->|Full Power| AdvancedFeatures[⚡ Advanced Features:<br/>• Batch processing<br/>• Custom parameters<br/>• Automation scripts<br/>• Background server]
-    
-    style Start fill:#e8f5e8,stroke:#4caf50,stroke-width:2px
-    style CLI fill:#fff9c4,stroke:#f57c00,stroke-width:3px
-    style AdvancedFeatures fill:#fff9c4,stroke:#f57c00,stroke-width:2px
-    style Search fill:#e3f2fd,stroke:#2196f3,stroke-width:2px
-    style Explore fill:#f3e5f5,stroke:#9c27b0,stroke-width:2px
-    style End fill:#e8f5e8,stroke:#4caf50,stroke-width:2px
+    style Files fill:#e3f2fd
+    style Results fill:#e8f5e8
+    style Database fill:#fff3e0
 ```

 ## What This Is
@ -77,77 +29,18 @@ FSS-Mini-RAG is a distilled, lightweight implementation of a production-quality

 **The Problem This Solves**: Most RAG implementations are either too simple (poor results) or too complex (impossible to understand and modify). This bridges that gap.

-## Two Powerful Modes
+## Quick Start (2 Minutes)

-FSS-Mini-RAG offers **two distinct experiences** optimized for different use cases:
-
-### 🚀 **Synthesis Mode** - Fast & Consistent
 ```bash
-./rag-mini search ~/project "authentication logic" --synthesize
+# 1. Install everything
+./install_mini_rag.sh
+
+# 2. Start using it
+./rag-tui                    # Friendly interface for beginners
+# OR
+./rag-mini index ~/my-project     # Direct CLI for developers
+./rag-mini search ~/my-project "authentication logic"
 ```
- **Perfect for**: Quick answers, code discovery, fast lookups
- **Speed**: Lightning fast responses (no thinking overhead)
- **Quality**: Consistent, reliable results
-
-### 🧠 **Exploration Mode** - Deep & Interactive  
-```bash
-./rag-mini explore ~/project
-> How does authentication work in this codebase?
-> Why is the login function slow?
-> What security concerns should I be aware of?
-```
- **Perfect for**: Learning codebases, debugging, detailed analysis
- **Features**: Thinking-enabled LLM, conversation memory, follow-up questions
- **Quality**: Deep reasoning with full context awareness
-
-## Quick Start (2-10 Minutes)
-
-> **⏱️ Installation Time**: Typical install takes 2-3 minutes with fast internet, up to 5-10 minutes on slower connections due to large dependencies (LanceDB 36MB, PyArrow 43MB, PyLance 44MB).
-
-**Step 1: Install**
-```bash
-# Clone the repository
-git clone https://github.com/FSSCoding/Fss-Mini-Rag.git
-cd Fss-Mini-Rag
-
-# Install dependencies and package
-python3 -m venv .venv
-
-# CRITICAL: Use full path activation for reliability
-.venv/bin/python -m pip install -r requirements.txt  # 1-8 minutes (depends on connection)
-.venv/bin/python -m pip install .                    # ~1 minute
-
-# Activate environment for using the command
-source .venv/bin/activate    # Linux/macOS
-# .venv\Scripts\activate     # Windows
-```
-
-**If you get "externally-managed-environment" error:**
-```bash
-# Use direct path method (bypasses system restrictions entirely)
-.venv/bin/python -m pip install -r requirements.txt --break-system-packages
-.venv/bin/python -m pip install . --break-system-packages
-
-# Then activate for using the command
-source .venv/bin/activate
-```
-
-**Step 2: Create an Index & Start Using**
-```bash
-# Navigate to any project and create an index
-cd ~/my-project
-rag-mini init                # Create index for current directory
-# OR: rag-mini init -p /path/to/project  (specify path)
-
-# Now search your codebase
-rag-mini search "authentication logic"
-rag-mini search "how does login work"
-
-# Or use the interactive interface (from installation directory)  
-./rag-tui                    # Interactive TUI interface
-```
-
-> **💡 Global Command**: After installation, `rag-mini` works from anywhere. It includes intelligent path detection to find nearby indexes and guide you to the right location.

 That's it. No external dependencies, no configuration required, no PhD in computer science needed.

@ -191,249 +84,29 @@ That's it. No external dependencies, no configuration required, no PhD in comput
 ./rag-mini status ~/new-project            # Check index health
 ```

-![FSS-Mini-RAG Search Demo](recordings/fss-mini-rag-demo-20250812_160725.gif)
-
-*Advanced usage: semantic search with synthesis and exploration modes*
-
 ## Installation Options

-### 🚀 One-Line Installers (Recommended)
-
-**The easiest way to install FSS-Mini-RAG** - these scripts automatically handle uv, pipx, or pip:
-
-**Linux/macOS:**
-```bash
-curl -fsSL https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.sh | bash
-```
-
-**Windows PowerShell:**
-```powershell
-iwr https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.ps1 -UseBasicParsing | iex
-```
-
-*These scripts install uv (fast package manager) when possible, fall back to pipx, then pip. No Python knowledge required!*
-
-### 📦 Manual Installation Methods
-
-**With uv (fastest, ~2-3 seconds):**
-```bash
-# Install uv if you don't have it
-curl -LsSf https://astral.sh/uv/install.sh | sh
-# Install FSS-Mini-RAG
-uv tool install fss-mini-rag
-```
-
-**With pipx (clean, isolated):**
-```bash
-# pipx keeps tools isolated from your system Python
-pipx install fss-mini-rag
-```
-
-**With pip (classic):**
-```bash
-pip install --user fss-mini-rag
-```
-
-**Single file (no Python knowledge needed):**
-Download the latest `rag-mini.pyz` from [releases](https://github.com/FSSCoding/Fss-Mini-Rag/releases) and run:
-```bash
-python rag-mini.pyz --help
-python rag-mini.pyz init
-python rag-mini.pyz search "your query"
-```
-
-### 🎯 Development Installation (From Source)
-
-Perfect for contributors or if you want the latest features:
-
-**Fresh Ubuntu/Debian System:**
-```bash
-# Install required system packages
-sudo apt update && sudo apt install -y python3 python3-pip python3-venv git curl
-
-# Clone and setup FSS-Mini-RAG
-git clone https://github.com/FSSCoding/Fss-Mini-Rag.git
-cd Fss-Mini-Rag
-
-# Create isolated Python environment
-python3 -m venv .venv
-source .venv/bin/activate
-
-# Install Python dependencies
-pip install -r requirements.txt
-
-# Optional: Install Ollama for best search quality (secure method)
-curl -fsSL https://ollama.com/install.sh -o /tmp/ollama-install.sh
-# Verify it's a shell script (basic safety check)
-file /tmp/ollama-install.sh | grep -q "shell script" && chmod +x /tmp/ollama-install.sh && /tmp/ollama-install.sh
-rm -f /tmp/ollama-install.sh
-ollama serve &
-sleep 3
-ollama pull nomic-embed-text
-
-# Ready to use!
-./rag-mini index /path/to/your/project
-./rag-mini search /path/to/your/project "your search query"
-```
-
-**Fresh CentOS/RHEL/Fedora System:**
-```bash
-# Install required system packages
-sudo dnf install -y python3 python3-pip python3-venv git curl
-
-# Clone and setup FSS-Mini-RAG
-git clone https://github.com/FSSCoding/Fss-Mini-Rag.git
-cd Fss-Mini-Rag
-
-# Create isolated Python environment  
-python3 -m venv .venv
-source .venv/bin/activate
-
-# Install Python dependencies
-pip install -r requirements.txt
-
-# Optional: Install Ollama for best search quality (secure method)
-curl -fsSL https://ollama.com/install.sh -o /tmp/ollama-install.sh
-# Verify it's a shell script (basic safety check)
-file /tmp/ollama-install.sh | grep -q "shell script" && chmod +x /tmp/ollama-install.sh && /tmp/ollama-install.sh
-rm -f /tmp/ollama-install.sh
-ollama serve &
-sleep 3
-ollama pull nomic-embed-text
-
-# Ready to use!
-./rag-mini index /path/to/your/project
-./rag-mini search /path/to/your/project "your search query"
-```
-
-**Fresh macOS System:**
-```bash
-# Install Homebrew (if not installed)
-/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
-
-# Install required packages
-brew install python3 git curl
-
-# Clone and setup FSS-Mini-RAG
-git clone https://github.com/FSSCoding/Fss-Mini-Rag.git
-cd Fss-Mini-Rag
-
-# Create isolated Python environment
-python3 -m venv .venv
-source .venv/bin/activate
-
-# Install Python dependencies
-pip install -r requirements.txt
-
-# Optional: Install Ollama for best search quality (secure method)
-curl -fsSL https://ollama.com/install.sh -o /tmp/ollama-install.sh
-# Verify it's a shell script (basic safety check)
-file /tmp/ollama-install.sh | grep -q "shell script" && chmod +x /tmp/ollama-install.sh && /tmp/ollama-install.sh
-rm -f /tmp/ollama-install.sh
-ollama serve &
-sleep 3
-ollama pull nomic-embed-text
-
-# Ready to use!
-./rag-mini index /path/to/your/project  
-./rag-mini search /path/to/your/project "your search query"
-```
-
-**Fresh Windows System:**
-```cmd
-REM Install Python (if not installed)
-REM Download from: https://python.org/downloads (ensure "Add to PATH" is checked)
-REM Install Git from: https://git-scm.com/download/win
-
-REM Clone and setup FSS-Mini-RAG
-git clone https://github.com/FSSCoding/Fss-Mini-Rag.git
-cd Fss-Mini-Rag
-
-REM Create isolated Python environment
-python -m venv .venv
-.venv\Scripts\activate.bat
-
-REM Install Python dependencies  
-pip install -r requirements.txt
-
-REM Optional: Install Ollama for best search quality
-REM Download from: https://ollama.com/download
-REM Run installer, then:
-ollama serve
-REM In new terminal:
-ollama pull nomic-embed-text
-
-REM Ready to use!
-rag.bat index C:\path\to\your\project
-rag.bat search C:\path\to\your\project "your search query"
-```
-
-**What these commands do:**
- **System packages**: Install Python 3.8+, pip (package manager), venv (virtual environments), git (version control), curl (downloads)
- **Clone repository**: Download FSS-Mini-RAG source code to your computer
- **Virtual environment**: Create isolated Python space (prevents conflicts with system Python)
- **Dependencies**: Install required Python libraries (pandas, numpy, lancedb, etc.)  
- **Ollama (optional)**: AI model server for best search quality - works offline and free
- **Model download**: Get high-quality embedding model for semantic search
- **Ready to use**: Index any folder and search through it semantically
-
-### ⚡ For Agents & CI/CD: Headless Installation
-
-Perfect for automated deployments, agents, and CI/CD pipelines:
-
-> **⚠️ Agent Warning**: Installation takes 5-10 minutes due to large dependencies. Run as background process to avoid timeouts in agent environments.
-
-**Linux/macOS:**
-```bash
-./install_mini_rag.sh --headless &
-# Run in background to prevent agent timeout
-# Monitor with: tail -f install.log
-```
-
-**Windows:**
-```cmd
-start /b install_windows.bat --headless
-REM Run in background to prevent agent timeout
-REM Monitor with: type install.log
-```
-
-**What headless mode does:**
- Uses existing virtual environment if available
- Installs core dependencies only (light mode)
- Downloads embedding model if Ollama is available
- Skips interactive prompts and tests
- **Recommended**: Run in background for agent automation due to 5-10 minute install time
-
-### 🚀 Recommended: Full Installation
-
-**Linux/macOS:**
+### Recommended: Full Installation
 ```bash
 ./install_mini_rag.sh
 # Handles Python setup, dependencies, optional AI models
 ```

-**Windows:**
-```cmd
-install_windows.bat
-# Handles Python setup, dependencies, works reliably
+### Experimental: Copy & Run (May Not Work)
+```bash
+# Copy folder anywhere and try to run directly
+./rag-mini index ~/my-project
+# Auto-setup will attempt to create environment
+# Falls back with clear instructions if it fails
 ```

 ### Manual Setup
-
-**Linux/macOS:**
 ```bash
 python3 -m venv .venv
 source .venv/bin/activate
 pip install -r requirements.txt
 ```

-**Windows:**
-```cmd
-python -m venv .venv
-.venv\Scripts\activate.bat
-pip install -r requirements.txt
-```
-
 **Note**: The experimental copy & run feature is provided for convenience but may fail on some systems. If you encounter issues, use the full installer for reliable setup.

 ## System Requirements
@ -442,24 +115,6 @@ pip install -r requirements.txt
 - **Optional: Ollama** (for best search quality - installer helps set up)
 - **Fallback: Works without external dependencies** (uses built-in embeddings)

-## Installation Summary
-
-**✅ Proven Method (100% Reliable):**
-```bash
-python3 -m venv .venv
-.venv/bin/python -m pip install -r requirements.txt  # 1-8 minutes
-.venv/bin/python -m pip install .                    # ~1 minute
-
-# Installation creates global 'rag-mini' command - no activation needed
-rag-mini init -p ~/my-project    # Works from anywhere
-rag-mini search -p ~/my-project "query"
-```
-
- **Fast Internet**: 2-3 minutes total
- **Slow Internet**: 5-10 minutes total  
- **Dependencies**: Large but essential (LanceDB 36MB, PyArrow 43MB, PyLance 44MB)
- **Agent Use**: Run in background to prevent timeouts
-
 ## Project Philosophy

 This implementation prioritizes:
@ -479,18 +134,18 @@ This implementation prioritizes:

 ## Next Steps

- **New users**: Run `./rag-tui` (Linux/macOS) or `rag.bat` (Windows) for guided experience
+- **New users**: Run `./rag-mini` for guided experience
 - **Developers**: Read [`TECHNICAL_GUIDE.md`](docs/TECHNICAL_GUIDE.md) for implementation details
 - **Contributors**: See [`CONTRIBUTING.md`](CONTRIBUTING.md) for development setup

 ## Documentation

- **[Getting Started](docs/GETTING_STARTED.md)** - Get running in 5 minutes
+- **[Quick Start Guide](docs/QUICK_START.md)** - Get running in 5 minutes
 - **[Visual Diagrams](docs/DIAGRAMS.md)** - 📊 System flow charts and architecture diagrams
 - **[TUI Guide](docs/TUI_GUIDE.md)** - Complete walkthrough of the friendly interface  
 - **[Technical Guide](docs/TECHNICAL_GUIDE.md)** - How the system actually works
- **[Troubleshooting](docs/TROUBLESHOOTING.md)** - Fix common issues
- **[Beginner Glossary](docs/BEGINNER_GLOSSARY.md)** - Friendly terms and concepts
+- **[Configuration Guide](docs/CONFIGURATION.md)** - Customizing for your needs
+- **[Development Guide](docs/DEVELOPMENT.md)** - Extending and modifying the code

 ## License

--- a/TESTING_RESULTS.md
+++ b/TESTING_RESULTS.md
@ -1,234 +0,0 @@
-# FSS-Mini-RAG Distribution Testing Results
-
-## Executive Summary
-
-✅ **Distribution infrastructure is solid** - Ready for external testing  
-⚠️ **Local environment limitations** prevent full testing  
-🚀 **Professional-grade distribution system** successfully implemented
-
-## Test Results Overview
-
-### Phase 1: Local Validation ✅ 4/6 PASSED
-
-| Test | Status | Notes |
-|------|--------|-------|
-| Install Script Syntax | ✅ PASS | bash and PowerShell scripts valid |
-| Install Script Content | ✅ PASS | All required components present |
-| Metadata Consistency | ✅ PASS | pyproject.toml, README aligned |
-| Zipapp Creation | ✅ PASS | 172.5 MB zipapp successfully built |
-| Package Building | ❌ FAIL | Environment restriction (externally-managed) |
-| Wheel Installation | ❌ FAIL | Depends on package building |
-
-### Phase 2: Build Testing ✅ 3/5 PASSED
-
-| Test | Status | Notes |
-|------|--------|-------|
-| Build Requirements | ✅ PASS | Build module detection works |
-| Zipapp Build | ✅ PASS | Portable distribution created |
-| Package Metadata | ✅ PASS | Correct metadata in packages |
-| Source Distribution | ❌ FAIL | Environment restriction |
-| Wheel Build | ❌ FAIL | Environment restriction |
-
-## What We've Accomplished
-
-### 🏗️ **Complete Modern Distribution System**
-
-1. **Enhanced pyproject.toml**
-   - Proper PyPI metadata
-   - Console script entry points
-   - Python version requirements
-   - Author and license information
-
-2. **One-Line Install Scripts**
-   - **Linux/macOS**: `curl -fsSL https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.sh | bash`
-   - **Windows**: `iwr https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.ps1 -UseBasicParsing | iex`
-   - **Smart fallbacks**: uv → pipx → pip
-
-3. **Multiple Installation Methods**
-   - `uv tool install fss-mini-rag` (fastest)
-   - `pipx install fss-mini-rag` (isolated)
-   - `pip install --user fss-mini-rag` (traditional)
-   - Portable zipapp (172.5 MB single file)
-
-4. **GitHub Actions CI/CD**
-   - Cross-platform wheel building
-   - Automated PyPI publishing
-   - Release asset creation
-   - TestPyPI integration
-
-5. **Comprehensive Testing Framework**
-   - Phase-by-phase validation
-   - Container-based testing (Docker ready)
-   - Local validation scripts
-   - Build system testing
-
-6. **Professional Documentation**
-   - Updated README with modern installation
-   - Comprehensive testing plan
-   - Deployment roadmap
-   - User-friendly guidance
-
-## Known Issues & Limitations
-
-### 🔴 **Environment-Specific Issues**
-1. **Externally-managed Python environment** prevents pip installs
-2. **Docker unavailable** for clean container testing
-3. **Missing build dependencies** in system Python
-4. **Zipapp numpy compatibility** issues (expected)
-
-### 🟡 **Testing Gaps**
-1. **Cross-platform testing** (Windows/macOS)
-2. **Real PyPI publishing** workflow
-3. **GitHub Actions** validation
-4. **End-to-end user experience** testing
-
-### 🟢 **Infrastructure Complete**
- All distribution files created ✅
- Scripts syntactically valid ✅
- Metadata consistent ✅
- Build system functional ✅
-
-## Next Steps for Production Release
-
-### 🚀 **Immediate Actions (This Week)**
-
-#### **1. Clean Environment Testing**
-```bash
-# Use GitHub Codespaces, VM, or clean system
-git clone https://github.com/fsscoding/fss-mini-rag
-cd fss-mini-rag
-
-# Test install script
-curl -fsSL file://$(pwd)/install.sh | bash
-rag-mini --help
-
-# Test manual builds
-python -m venv .venv
-source .venv/bin/activate
-pip install -r requirements.txt
-python -m build --sdist --wheel
-```
-
-#### **2. TestPyPI Trial**
-```bash
-# Upload to TestPyPI first
-python -m twine upload --repository testpypi dist/*
-
-# Test installation from TestPyPI
-pip install --index-url https://test.pypi.org/simple/ fss-mini-rag
-rag-mini --version
-```
-
-#### **3. GitHub Actions Validation**
-```bash
-# Use 'act' for local testing
-brew install act  # or equivalent
-act --list
-act -j build-wheels --dry-run
-```
-
-### 🔄 **Medium-Term Actions (Next Week)**
-
-#### **4. Cross-Platform Testing**
- Test install scripts on Windows 10/11
- Test on macOS 12/13/14
- Test on various Linux distributions
- Validate PowerShell script functionality
-
-#### **5. Real-World Scenarios**
- Corporate firewall testing
- Slow internet connection testing
- Offline installation testing
- Error recovery testing
-
-#### **6. Performance Optimization**
- Zipapp size optimization
- Installation speed benchmarking
- Memory usage profiling
- Dependency minimization
-
-### 📈 **Success Metrics**
-
-#### **Quantitative**
- **Installation success rate**: >95% across environments
- **Installation time**: <5 minutes end-to-end
- **Package size**: <200MB wheels, <300MB zipapp
- **Error rate**: <5% in clean environments
-
-#### **Qualitative**
- Clear error messages with helpful guidance
- Professional user experience
- Consistent behavior across platforms
- Easy troubleshooting and support
-
-## Confidence Assessment
-
-### 🟢 **High Confidence**
- **Infrastructure Design**: Professional-grade distribution system
- **Script Logic**: Smart fallbacks and error handling
- **Metadata Quality**: Consistent and complete
- **Documentation**: Comprehensive and user-friendly
-
-### 🟡 **Medium Confidence**
- **Cross-Platform Compatibility**: Needs validation
- **Performance**: Size optimization needed
- **Error Handling**: Edge cases require testing
- **User Experience**: Real-world validation needed
-
-### 🔴 **Low Confidence (Requires Testing)**
- **Production Reliability**: Untested in real environments
- **GitHub Actions**: Complex workflow needs validation
- **Dependency Resolution**: Heavy ML deps may cause issues
- **Support Burden**: Unknown user issues
-
-## Recommendation
-
-**PROCEED WITH SYSTEMATIC TESTING** ✅
-
-The distribution infrastructure we've built is **professional-grade** and ready for external validation. The local test failures are environment-specific and expected.
-
-### **Priority 1: External Testing Environment**
-Set up testing in:
-1. **GitHub Codespaces** (Ubuntu 22.04)
-2. **Docker containers** (when available)
-3. **Cloud VMs** (various OS)
-4. **TestPyPI** (safe production test)
-
-### **Priority 2: User Experience Validation**
-Test the complete user journey:
-1. User finds FSS-Mini-RAG on GitHub
-2. Follows README installation instructions
-3. Successfully installs and runs the tool
-4. Gets help when things go wrong
-
-### **Priority 3: Production Release**
-After successful external testing:
-1. Create production Git tag
-2. Monitor automated workflows
-3. Verify PyPI publication
-4. Update documentation links
-5. Monitor user feedback
-
-## Timeline Estimate
-
- **External Testing**: 2-3 days
- **Issue Resolution**: 1-2 days  
- **TestPyPI Validation**: 1 day
- **Production Release**: 1 day
- **Buffer for Issues**: 2-3 days
-
-**Total: 1-2 weeks for bulletproof release**
-
-## Conclusion
-
-We've successfully built a **modern, professional distribution system** for FSS-Mini-RAG. The infrastructure is solid and ready for production.
-
-The systematic testing approach ensures we ship something that works flawlessly for every user. This level of quality will establish FSS-Mini-RAG as a professional tool in the RAG ecosystem.
-
-**Status**: Infrastructure complete ✅, external testing required ⏳  
-**Confidence**: High for design, medium for production readiness pending validation  
-**Next Step**: Set up clean testing environment and proceed with external validation
-
---
-
-*Testing completed on 2025-01-06. Distribution system ready for Phase 2 external testing.* 🚀
--- a/asciinema_to_gif.py
+++ b/asciinema_to_gif.py
@ -0,0 +1,290 @@
+#!/usr/bin/env python3
+"""
+Asciinema to GIF Converter
+Converts .cast files to optimized GIF animations without external services.
+"""
+
+import os
+import sys
+import json
+import argparse
+import subprocess
+from pathlib import Path
+from typing import List, Dict, Any
+import tempfile
+import shutil
+
+class AsciinemaToGIF:
+    def __init__(self):
+        self.temp_dir = None
+        
+    def check_dependencies(self) -> Dict[str, bool]:
+        """Check if required tools are available."""
+        tools = {
+            'ffmpeg': self._check_command('ffmpeg'),
+            'convert': self._check_command('convert'),  # ImageMagick
+            'gifsicle': self._check_command('gifsicle')  # Optional optimizer
+        }
+        return tools
+        
+    def _check_command(self, command: str) -> bool:
+        """Check if a command is available."""
+        return shutil.which(command) is not None
+        
+    def install_instructions(self):
+        """Show installation instructions for missing dependencies."""
+        print("📦 Required Dependencies:")
+        print()
+        print("Ubuntu/Debian:")
+        print("  sudo apt install ffmpeg imagemagick gifsicle")
+        print()
+        print("macOS:")
+        print("  brew install ffmpeg imagemagick gifsicle")
+        print()
+        print("Arch Linux:")
+        print("  sudo pacman -S ffmpeg imagemagick gifsicle")
+        
+    def parse_cast_file(self, cast_path: Path) -> Dict[str, Any]:
+        """Parse asciinema .cast file."""
+        with open(cast_path, 'r') as f:
+            lines = f.readlines()
+            
+        # First line is header
+        header = json.loads(lines[0])
+        
+        # Remaining lines are events
+        events = []
+        for line in lines[1:]:
+            if line.strip():
+                events.append(json.loads(line))
+                
+        return {
+            'header': header,
+            'events': events,
+            'width': header.get('width', 80),
+            'height': header.get('height', 24)
+        }
+        
+    def create_frames(self, cast_data: Dict[str, Any], output_dir: Path) -> List[Path]:
+        """Create individual frame images from cast data."""
+        print("🎬 Creating frames...")
+        
+        width = cast_data['width']
+        height = cast_data['height']
+        events = cast_data['events']
+        
+        # Terminal state
+        screen = [[' ' for _ in range(width)] for _ in range(height)]
+        cursor_x, cursor_y = 0, 0
+        
+        frames = []
+        frame_count = 0
+        last_time = 0
+        
+        for event in events:
+            timestamp, event_type, data = event
+            
+            # Calculate delay
+            delay = timestamp - last_time
+            last_time = timestamp
+            
+            if event_type == 'o':  # Output event
+                # Process terminal output
+                for char in data:
+                    if char == '\n':
+                        cursor_y += 1
+                        cursor_x = 0
+                        if cursor_y >= height:
+                            # Scroll up
+                            screen = screen[1:] + [[' ' for _ in range(width)]]
+                            cursor_y = height - 1
+                    elif char == '\r':
+                        cursor_x = 0
+                    elif char == '\033':
+                        # Skip ANSI escape sequences (simplified)
+                        continue
+                    elif char.isprintable():
+                        if cursor_x < width and cursor_y < height:
+                            screen[cursor_y][cursor_x] = char
+                            cursor_x += 1
+                            
+                # Create frame if significant delay or content change
+                if delay > 0.1 or frame_count == 0:
+                    frame_path = self._create_frame_image(screen, output_dir, frame_count, delay)
+                    frames.append((frame_path, delay))
+                    frame_count += 1
+                    
+        return frames
+        
+    def _create_frame_image(self, screen: List[List[str]], output_dir: Path, 
+                          frame_num: int, delay: float) -> Path:
+        """Create a single frame image using ImageMagick."""
+        # Convert screen to text
+        text_content = []
+        for row in screen:
+            line = ''.join(row).rstrip()
+            text_content.append(line)
+            
+        # Create text file
+        text_file = output_dir / f"frame_{frame_num:04d}.txt"
+        with open(text_file, 'w') as f:
+            f.write('\n'.join(text_content))
+            
+        # Convert to image using ImageMagick
+        image_file = output_dir / f"frame_{frame_num:04d}.png"
+        
+        cmd = [
+            'convert',
+            '-font', 'Liberation-Mono',  # Monospace font
+            '-pointsize', '12',
+            '-background', '#1e1e1e',    # Dark background
+            '-fill', '#d4d4d4',          # Light text
+            '-gravity', 'NorthWest',
+            f'label:@{text_file}',
+            str(image_file)
+        ]
+        
+        try:
+            subprocess.run(cmd, check=True, capture_output=True)
+            return image_file
+        except subprocess.CalledProcessError as e:
+            print(f"❌ Failed to create frame {frame_num}: {e}")
+            return None
+            
+    def create_gif(self, frames: List[tuple], output_path: Path, fps: int = 10) -> bool:
+        """Create GIF from frame images using ffmpeg."""
+        print("🎞️  Creating GIF...")
+        
+        if not frames:
+            print("❌ No frames to process")
+            return False
+            
+        # Create ffmpeg input file list
+        input_list = self.temp_dir / "input_list.txt"
+        with open(input_list, 'w') as f:
+            for frame_path, delay in frames:
+                if frame_path and frame_path.exists():
+                    duration = max(delay, 0.1)  # Minimum 0.1s per frame
+                    f.write(f"file '{frame_path}'\n")
+                    f.write(f"duration {duration}\n")
+                    
+        # Create GIF with ffmpeg
+        cmd = [
+            'ffmpeg',
+            '-f', 'concat',
+            '-safe', '0',
+            '-i', str(input_list),
+            '-vf', 'fps=10,scale=800:-1:flags=lanczos,palettegen=reserve_transparent=0',
+            '-y',
+            str(output_path)
+        ]
+        
+        try:
+            subprocess.run(cmd, check=True, capture_output=True)
+            return True
+        except subprocess.CalledProcessError as e:
+            print(f"❌ FFmpeg failed: {e}")
+            return False
+            
+    def optimize_gif(self, gif_path: Path) -> bool:
+        """Optimize GIF using gifsicle."""
+        if not self._check_command('gifsicle'):
+            return True  # Skip if not available
+            
+        print("🗜️  Optimizing GIF...")
+        
+        optimized_path = gif_path.with_suffix('.optimized.gif')
+        
+        cmd = [
+            'gifsicle',
+            '-O3',
+            '--lossy=80',
+            '--colors', '256',
+            str(gif_path),
+            '-o', str(optimized_path)
+        ]
+        
+        try:
+            subprocess.run(cmd, check=True, capture_output=True)
+            # Replace original with optimized
+            shutil.move(optimized_path, gif_path)
+            return True
+        except subprocess.CalledProcessError as e:
+            print(f"⚠️  Optimization failed: {e}")
+            return False
+            
+    def convert(self, cast_path: Path, output_path: Path, fps: int = 10) -> bool:
+        """Convert asciinema cast file to GIF."""
+        print(f"🎯 Converting {cast_path.name} to GIF...")
+        
+        # Check dependencies
+        deps = self.check_dependencies()
+        missing = [tool for tool, available in deps.items() if not available and tool != 'gifsicle']
+        
+        if missing:
+            print(f"❌ Missing required tools: {', '.join(missing)}")
+            print()
+            self.install_instructions()
+            return False
+            
+        # Create temporary directory
+        self.temp_dir = Path(tempfile.mkdtemp(prefix='asciinema_gif_'))
+        
+        try:
+            # Parse cast file
+            print("📖 Parsing cast file...")
+            cast_data = self.parse_cast_file(cast_path)
+            
+            # Create frames
+            frames = self.create_frames(cast_data, self.temp_dir)
+            
+            if not frames:
+                print("❌ No frames created")
+                return False
+                
+            # Create GIF
+            success = self.create_gif(frames, output_path, fps)
+            
+            if success:
+                # Optimize
+                self.optimize_gif(output_path)
+                
+                # Show results
+                size_mb = output_path.stat().st_size / (1024 * 1024)
+                print(f"✅ GIF created: {output_path}")
+                print(f"📏 Size: {size_mb:.2f} MB")
+                return True
+            else:
+                return False
+                
+        finally:
+            # Cleanup
+            if self.temp_dir and self.temp_dir.exists():
+                shutil.rmtree(self.temp_dir)
+
+def main():
+    parser = argparse.ArgumentParser(description='Convert asciinema recordings to GIF')
+    parser.add_argument('input', type=Path, help='Input .cast file')
+    parser.add_argument('-o', '--output', type=Path, help='Output .gif file (default: same name as input)')
+    parser.add_argument('--fps', type=int, default=10, help='Frames per second (default: 10)')
+    
+    args = parser.parse_args()
+    
+    if not args.input.exists():
+        print(f"❌ Input file not found: {args.input}")
+        sys.exit(1)
+        
+    if not args.output:
+        args.output = args.input.with_suffix('.gif')
+        
+    converter = AsciinemaToGIF()
+    success = converter.convert(args.input, args.output, args.fps)
+    
+    if success:
+        print("🎉 Conversion complete!")
+    else:
+        print("💥 Conversion failed!")
+        sys.exit(1)
+
+if __name__ == '__main__':
+    main()
--- a/assets/Fss_Mini_Rag.png
+++ b/assets/Fss_Mini_Rag.png
--- a/assets/Fss_Rag.png
+++ b/assets/Fss_Rag.png
--- a/assets/README_icon_placeholder.md
+++ b/assets/README_icon_placeholder.md
@ -0,0 +1,25 @@
+# Icon Placeholder
+
+The current `icon.svg` is a simple placeholder. Here's the design concept:
+
+🔍 **Search magnifying glass** - Core search functionality  
+📄 **Code brackets** - Code-focused system  
+🧠 **Neural network dots** - AI/embedding intelligence  
+📝 **Text lines** - Document processing  
+
+## Design Ideas for Final Icon
+
+- **Colors**: Blue (#1976d2) for trust/tech, Green (#4caf50) for code, Orange (#ff9800) for AI
+- **Elements**: Search + Code + AI/Brain + Simplicity
+- **Style**: Clean, modern, friendly (not intimidating)
+- **Size**: Works well at 32x32 and 128x128
+
+## Suggested Improvements
+
+1. More polished magnifying glass with reflection
+2. Cleaner code bracket styling  
+3. More sophisticated neural network representation
+4. Perhaps a small "mini" indicator to emphasize lightweight nature
+5. Consider a folder or document icon to represent project indexing
+
+The current SVG provides the basic structure and can be refined into a professional icon.
--- a/assets/demo.gif
+++ b/assets/demo.gif
--- a/assets/icon.svg
+++ b/assets/icon.svg
@ -0,0 +1,35 @@
+<svg width="128" height="128" viewBox="0 0 128 128" xmlns="http://www.w3.org/2000/svg">
+  <!-- Background circle -->
+  <circle cx="64" cy="64" r="60" fill="#e3f2fd" stroke="#1976d2" stroke-width="4"/>
+  
+  <!-- Search magnifying glass -->
+  <circle cx="48" cy="48" r="18" fill="none" stroke="#1976d2" stroke-width="4"/>
+  <line x1="62" y1="62" x2="76" y2="76" stroke="#1976d2" stroke-width="4" stroke-linecap="round"/>
+  
+  <!-- Code brackets -->
+  <path d="M20 35 L10 45 L20 55" fill="none" stroke="#4caf50" stroke-width="3" stroke-linecap="round"/>
+  <path d="M108 35 L118 45 L108 55" fill="none" stroke="#4caf50" stroke-width="3" stroke-linecap="round"/>
+  
+  <!-- Neural network dots -->
+  <circle cx="85" cy="25" r="3" fill="#ff9800"/>
+  <circle cx="100" cy="35" r="3" fill="#ff9800"/>
+  <circle cx="90" cy="45" r="3" fill="#ff9800"/>
+  <circle cx="105" cy="55" r="3" fill="#ff9800"/>
+  
+  <!-- Connection lines -->
+  <line x1="85" y1="25" x2="100" y2="35" stroke="#ff9800" stroke-width="2" opacity="0.7"/>
+  <line x1="100" y1="35" x2="90" y2="45" stroke="#ff9800" stroke-width="2" opacity="0.7"/>
+  <line x1="90" y1="45" x2="105" y2="55" stroke="#ff9800" stroke-width="2" opacity="0.7"/>
+  
+  <!-- Text elements -->
+  <rect x="15" y="75" width="25" height="3" fill="#666" rx="1"/>
+  <rect x="15" y="82" width="35" height="3" fill="#666" rx="1"/>
+  <rect x="15" y="89" width="20" height="3" fill="#666" rx="1"/>
+  
+  <rect x="60" y="85" width="30" height="3" fill="#2196f3" rx="1"/>
+  <rect x="60" y="92" width="25" height="3" fill="#2196f3" rx="1"/>
+  <rect x="60" y="99" width="35" height="3" fill="#2196f3" rx="1"/>
+  
+  <!-- "RAG" text -->
+  <text x="64" y="118" text-anchor="middle" font-family="Arial, sans-serif" font-size="14" font-weight="bold" fill="#1976d2">RAG</text>
+</svg>
--- a/bin/rag-mini.py
+++ b/bin/rag-mini.py
@ -1,837 +0,0 @@
-#!/usr/bin/env python3
-"""
-rag-mini - FSS-Mini-RAG Command Line Interface
-
-A lightweight, portable RAG system for semantic code search.
-Usage: rag-mini <command> <project_path> [options]
-"""
-
-import argparse
-import json
-import logging
-import socket
-import sys
-from pathlib import Path
-
-# Add parent directory to path so we can import mini_rag
-sys.path.insert(0, str(Path(__file__).parent.parent))
-
-import requests
-
-# Add the RAG system to the path
-sys.path.insert(0, str(Path(__file__).parent))
-
-try:
-    from mini_rag.explorer import CodeExplorer
-    from mini_rag.indexer import ProjectIndexer
-    from mini_rag.llm_synthesizer import LLMSynthesizer
-    from mini_rag.ollama_embeddings import OllamaEmbedder
-    from mini_rag.search import CodeSearcher
-
-    # Update system (graceful import)
-    try:
-        from mini_rag.updater import check_for_updates, get_updater
-
-        UPDATER_AVAILABLE = True
-    except ImportError:
-        UPDATER_AVAILABLE = False
-except ImportError as e:
-    print("❌ Error: Missing dependencies!")
-    print()
-    print("It looks like you haven't installed the required packages yet.")
-    print("This is a common mistake - here's how to fix it:")
-    print()
-    print("1. Make sure you're in the FSS-Mini-RAG directory")
-    print("2. Run the installer script:")
-    print("   ./install_mini_rag.sh")
-    print()
-    print("Or if you want to install manually:")
-    print("   python3 -m venv .venv")
-    print("   source .venv/bin/activate")
-    print("   pip install -r requirements.txt")
-    print()
-    print(f"Missing module: {e.name}")
-    sys.exit(1)
-
-# Configure logging for user-friendly output
-logging.basicConfig(
-    level=logging.WARNING,  # Only show warnings and errors by default
-    format="%(levelname)s: %(message)s",
-)
-logger = logging.getLogger(__name__)
-
-
-def index_project(project_path: Path, force: bool = False):
-    """Index a project directory."""
-    try:
-        # Show what's happening
-        action = "Re-indexing" if force else "Indexing"
-        print(f"🚀 {action} {project_path.name}")
-
-        # Quick pre-check
-        rag_dir = project_path / ".mini-rag"
-        if rag_dir.exists() and not force:
-            print("   Checking for changes...")
-
-        indexer = ProjectIndexer(project_path)
-        result = indexer.index_project(force_reindex=force)
-
-        # Show results with context
-        files_count = result.get("files_indexed", 0)
-        chunks_count = result.get("chunks_created", 0)
-        time_taken = result.get("time_taken", 0)
-
-        if files_count == 0:
-            print("✅ Index up to date - no changes detected")
-        else:
-            print(f"✅ Indexed {files_count} files in {time_taken:.1f}s")
-            print(f"   Created {chunks_count} chunks")
-
-            # Show efficiency
-            if time_taken > 0:
-                speed = files_count / time_taken
-                print(f"   Speed: {speed:.1f} files/sec")
-
-        # Show warnings if any
-        failed_count = result.get("files_failed", 0)
-        if failed_count > 0:
-            print(f"⚠️  {failed_count} files failed (check logs with --verbose)")
-
-        # Quick tip for first-time users
-        if not (project_path / ".mini-rag" / "last_search").exists():
-            print(f'\n💡 Try: rag-mini search {project_path} "your search here"')
-
-    except FileNotFoundError:
-        print(f"📁 Directory Not Found: {project_path}")
-        print("   Make sure the path exists and you're in the right location")
-        print(f"   Current directory: {Path.cwd()}")
-        print("   Check path: ls -la /path/to/your/project")
-        print()
-        sys.exit(1)
-    except PermissionError:
-        print("🔒 Permission Denied")
-        print("   FSS-Mini-RAG needs to read files and create index database")
-        print(f"   Check permissions: ls -la {project_path}")
-        print("   Try a different location with write access")
-        print()
-        sys.exit(1)
-    except Exception as e:
-        # Connection errors are handled in the embedding module
-        if "ollama" in str(e).lower() or "connection" in str(e).lower():
-            sys.exit(1)  # Error already displayed
-
-        print(f"❌ Indexing failed: {e}")
-        print()
-        print("🔧 Common solutions:")
-        print("   • Check if path exists and you have read permissions")
-        print("   • Ensure Python dependencies are installed: pip install -r requirements.txt")
-        print("   • Try with smaller project first to test setup")
-        print("   • Check available disk space for index files")
-        print()
-        print("📚 For detailed help:")
-        print(f"   ./rag-mini index {project_path} --verbose")
-        print("   Or see: docs/TROUBLESHOOTING.md")
-        sys.exit(1)
-
-
-def search_project(project_path: Path, query: str, top_k: int = 10, synthesize: bool = False):
-    """Search a project directory."""
-    try:
-        # Check if indexed first
-        rag_dir = project_path / ".mini-rag"
-        if not rag_dir.exists():
-            print(f"❌ Project not indexed: {project_path.name}")
-            print(f"   Run: rag-mini index {project_path}")
-            sys.exit(1)
-
-        print(f'🔍 Searching "{query}" in {project_path.name}')
-        searcher = CodeSearcher(project_path)
-        results = searcher.search(query, top_k=top_k)
-
-        if not results:
-            print("❌ No results found")
-            print()
-            print("🔧 Quick fixes to try:")
-            print('   • Use broader terms: "login" instead of "authenticate_user_session"')
-            print('   • Try concepts: "database query" instead of specific function names')
-            print("   • Check spelling and try simpler words")
-            print('   • Search for file types: "python class" or "javascript function"')
-            print()
-            print("⚙️ Configuration adjustments:")
-            print(
-                f'   • Lower threshold: ./rag-mini search "{project_path}" "{query}" --threshold 0.05'
-            )
-            print(
-                f'   • More results: ./rag-mini search "{project_path}" "{query}" --top-k 20'
-            )
-            print()
-            print("📚 Need help? See: docs/TROUBLESHOOTING.md")
-            return
-
-        print(f"✅ Found {len(results)} results:")
-        print()
-
-        for i, result in enumerate(results, 1):
-            # Clean up file path display
-            file_path = Path(result.file_path)
-            try:
-                rel_path = file_path.relative_to(project_path)
-            except ValueError:
-                # If relative_to fails, just show the basename
-                rel_path = file_path.name
-
-            print(f"{i}. {rel_path}")
-            print(f"   Score: {result.score:.3f}")
-
-            # Show line info if available
-            if hasattr(result, "start_line") and result.start_line:
-                print(f"   Lines: {result.start_line}-{result.end_line}")
-
-            # Show content preview
-            if hasattr(result, "name") and result.name:
-                print(f"   Context: {result.name}")
-
-            # Show full content with proper formatting
-            print("   Content:")
-            content_lines = result.content.strip().split("\n")
-            for line in content_lines[:10]:  # Show up to 10 lines
-                print(f"     {line}")
-
-            if len(content_lines) > 10:
-                print(f"     ... ({len(content_lines) - 10} more lines)")
-                print("     Use --verbose or rag-mini-enhanced for full context")
-
-            print()
-
-        # LLM Synthesis if requested
-        if synthesize:
-            print("🧠 Generating LLM synthesis...")
-
-            # Load config to respect user's model preferences
-            from mini_rag.config import ConfigManager
-
-            config_manager = ConfigManager(project_path)
-            config = config_manager.load_config()
-
-            synthesizer = LLMSynthesizer(
-                model=(
-                    config.llm.synthesis_model
-                    if config.llm.synthesis_model != "auto"
-                    else None
-                ),
-                config=config,
-            )
-
-            if synthesizer.is_available():
-                synthesis = synthesizer.synthesize_search_results(query, results, project_path)
-                print()
-                print(synthesizer.format_synthesis_output(synthesis, query))
-
-                # Add guidance for deeper analysis
-                if synthesis.confidence < 0.7 or any(
-                    word in query.lower() for word in ["why", "how", "explain", "debug"]
-                ):
-                    print("\n💡 Want deeper analysis with reasoning?")
-                    print(f"   Try: rag-mini explore {project_path}")
-                    print(
-                        "   Exploration mode enables thinking and remembers conversation context."
-                    )
-            else:
-                print("❌ LLM synthesis unavailable")
-                print("   • Ensure Ollama is running: ollama serve")
-                print("   • Install a model: ollama pull qwen3:1.7b")
-                print("   • Check connection to http://localhost:11434")
-
-        # Save last search for potential enhancements
-        try:
-            (rag_dir / "last_search").write_text(query)
-        except (
-            ConnectionError,
-            FileNotFoundError,
-            IOError,
-            OSError,
-            TimeoutError,
-            TypeError,
-            ValueError,
-            requests.RequestException,
-            socket.error,
-        ):
-            pass  # Don't fail if we can't save
-
-    except Exception as e:
-        print(f"❌ Search failed: {e}")
-        print()
-
-        if "not indexed" in str(e).lower():
-            print("🔧 Solution:")
-            print(f"   ./rag-mini index {project_path}")
-            print()
-        else:
-            print("🔧 Common solutions:")
-            print("   • Check project path exists and is readable")
-            print("   • Verify index isn't corrupted: delete .mini-rag/ and re-index")
-            print("   • Try with a different project to test setup")
-            print("   • Check available memory and disk space")
-            print()
-            print("📚 Get detailed error info:")
-            print(f'   ./rag-mini search {project_path} "{query}" --verbose')
-            print("   Or see: docs/TROUBLESHOOTING.md")
-            print()
-        sys.exit(1)
-
-
-def status_check(project_path: Path):
-    """Show status of RAG system."""
-    try:
-        print(f"📊 Status for {project_path.name}")
-        print()
-
-        # Check project indexing status first
-        rag_dir = project_path / ".mini-rag"
-        if not rag_dir.exists():
-            print("❌ Project not indexed")
-            print(f"   Run: rag-mini index {project_path}")
-            print()
-        else:
-            manifest = rag_dir / "manifest.json"
-            if manifest.exists():
-                try:
-                    with open(manifest) as f:
-                        data = json.load(f)
-
-                    file_count = data.get("file_count", 0)
-                    chunk_count = data.get("chunk_count", 0)
-                    indexed_at = data.get("indexed_at", "Never")
-
-                    print("✅ Project indexed")
-                    print(f"   Files: {file_count}")
-                    print(f"   Chunks: {chunk_count}")
-                    print(f"   Last update: {indexed_at}")
-
-                    # Show average chunks per file
-                    if file_count > 0:
-                        avg_chunks = chunk_count / file_count
-                        print(f"   Avg chunks/file: {avg_chunks:.1f}")
-
-                    print()
-                except Exception:
-                    print("⚠️  Index exists but manifest unreadable")
-                    print()
-            else:
-                print("⚠️  Index directory exists but incomplete")
-                print(f"   Try: rag-mini index {project_path} --force")
-                print()
-
-        # Check embedding system status
-        print("🧠 Embedding System:")
-        try:
-            embedder = OllamaEmbedder()
-            emb_info = embedder.get_status()
-            method = emb_info.get("method", "unknown")
-
-            if method == "ollama":
-                print("   ✅ Ollama (high quality)")
-            elif method == "ml":
-                print("   ✅ ML fallback (good quality)")
-            elif method == "hash":
-                print("   ⚠️  Hash fallback (basic quality)")
-            else:
-                print(f"   ❓ Unknown method: {method}")
-
-            # Show additional details if available
-            if "model" in emb_info:
-                print(f"   Model: {emb_info['model']}")
-
-        except Exception as e:
-            print(f"   ❌ Status check failed: {e}")
-
-        print()
-
-        # Check LLM status and show actual vs configured model
-        print("🤖 LLM System:")
-        try:
-            from mini_rag.config import ConfigManager
-
-            config_manager = ConfigManager(project_path)
-            config = config_manager.load_config()
-
-            synthesizer = LLMSynthesizer(
-                model=(
-                    config.llm.synthesis_model
-                    if config.llm.synthesis_model != "auto"
-                    else None
-                ),
-                config=config,
-            )
-
-            if synthesizer.is_available():
-                synthesizer._ensure_initialized()
-                actual_model = synthesizer.model
-                config_model = config.llm.synthesis_model
-
-                if config_model == "auto":
-                    print(f"   ✅ Auto-selected: {actual_model}")
-                elif config_model == actual_model:
-                    print(f"   ✅ Using configured: {actual_model}")
-                else:
-                    print("   ⚠️  Model mismatch!")
-                    print(f"   Configured: {config_model}")
-                    print(f"   Actually using: {actual_model}")
-                    print("   (Configured model may not be installed)")
-
-                print(f"   Config file: {config_manager.config_path}")
-            else:
-                print("   ❌ Ollama not available")
-                print("   Start with: ollama serve")
-
-        except Exception as e:
-            print(f"   ❌ LLM status check failed: {e}")
-
-        # Show last search if available
-        last_search_file = rag_dir / "last_search" if rag_dir.exists() else None
-        if last_search_file and last_search_file.exists():
-            try:
-                last_query = last_search_file.read_text().strip()
-                print(f'\n🔍 Last search: "{last_query}"')
-            except (FileNotFoundError, IOError, OSError, TypeError, ValueError):
-                pass
-
-    except Exception as e:
-        print(f"❌ Status check failed: {e}")
-        sys.exit(1)
-
-
-def show_model_status(project_path: Path):
-    """Show detailed model status and selection information."""
-    from mini_rag.config import ConfigManager
-    
-    print("🤖 Model Status Report")
-    print("=" * 50)
-    
-    try:
-        # Load config
-        config_manager = ConfigManager()
-        config = config_manager.load_config(project_path)
-        
-        # Create LLM synthesizer to check models
-        synthesizer = LLMSynthesizer(model=config.llm.synthesis_model, config=config)
-        
-        # Show configured model
-        print(f"📋 Configured model: {config.llm.synthesis_model}")
-        
-        # Show available models
-        available_models = synthesizer.available_models
-        if available_models:
-            print(f"\n📦 Available models ({len(available_models)}):")
-            
-            # Group models by series
-            qwen3_models = [m for m in available_models if m.startswith('qwen3:')]
-            qwen25_models = [m for m in available_models if m.startswith('qwen2.5')]
-            other_models = [m for m in available_models if not (m.startswith('qwen3:') or m.startswith('qwen2.5'))]
-            
-            if qwen3_models:
-                print("   🟢 Qwen3 series (recommended):")
-                for model in qwen3_models:
-                    is_selected = synthesizer._resolve_model_name(config.llm.synthesis_model) == model
-                    marker = "  ✅" if is_selected else "    "
-                    print(f"{marker} {model}")
-            
-            if qwen25_models:
-                print("   🟡 Qwen2.5 series:")
-                for model in qwen25_models:
-                    is_selected = synthesizer._resolve_model_name(config.llm.synthesis_model) == model
-                    marker = "  ✅" if is_selected else "    "
-                    print(f"{marker} {model}")
-                    
-            if other_models:
-                print("   🔵 Other models:")
-                for model in other_models[:10]:  # Limit to first 10
-                    is_selected = synthesizer._resolve_model_name(config.llm.synthesis_model) == model
-                    marker = "  ✅" if is_selected else "    "
-                    print(f"{marker} {model}")
-        else:
-            print("\n❌ No models available from Ollama")
-            print("   Make sure Ollama is running: ollama serve")
-            print("   Install models with: ollama pull qwen3:4b")
-            
-        # Show resolution result
-        resolved_model = synthesizer._resolve_model_name(config.llm.synthesis_model)
-        if resolved_model:
-            if resolved_model != config.llm.synthesis_model:
-                print(f"\n🔄 Model resolution: {config.llm.synthesis_model} -> {resolved_model}")
-            else:
-                print(f"\n✅ Using exact model match: {resolved_model}")
-        else:
-            print(f"\n❌ Model '{config.llm.synthesis_model}' not found!")
-            print("   Consider changing your model in the config file")
-            
-        print(f"\n📄 Config file: {config_manager.config_path}")
-        print("   Edit this file to change your model preference")
-        
-    except Exception as e:
-        print(f"❌ Model status check failed: {e}")
-        sys.exit(1)
-
-
-def explore_interactive(project_path: Path):
-    """Interactive exploration mode with thinking and context memory for any documents."""
-    try:
-        explorer = CodeExplorer(project_path)
-
-        if not explorer.start_exploration_session():
-            sys.exit(1)
-
-        # Show enhanced first-time guidance
-        print(f"\n🤔 Ask your first question about {project_path.name}:")
-        print()
-        print("💡 Enter your search query or question below:")
-        print('   Examples: "How does authentication work?" or "Show me error handling"')
-        print()
-        print("🔧 Quick options:")
-        print("   1. Help - Show example questions")
-        print("   2. Status - Project information")
-        print("   3. Suggest - Get a random starter question")
-        print()
-
-        is_first_question = True
-
-        while True:
-            try:
-                # Get user input with clearer prompt
-                if is_first_question:
-                    question = input("📝 Enter question or option (1-3): ").strip()
-                else:
-                    question = input("\n> ").strip()
-
-                # Handle exit commands
-                if question.lower() in ["quit", "exit", "q"]:
-                    print("\n" + explorer.end_session())
-                    break
-
-                # Handle empty input
-                if not question:
-                    if is_first_question:
-                        print("Please enter a question or try option 3 for a suggestion.")
-                    else:
-                        print("Please enter a question or 'quit' to exit.")
-                    continue
-
-                # Handle numbered options and special commands
-                if question in ["1"] or question.lower() in ["help", "h"]:
-                    print(
-                        """
-🧠 EXPLORATION MODE HELP:
-  • Ask any question about your documents or code
-  • I remember our conversation for follow-up questions
-  • Use 'why', 'how', 'explain' for detailed reasoning
-  • Type 'summary' to see session overview
-  • Type 'quit' or 'exit' to end session
-
-💡 Example questions:
-  • "How does authentication work?"
-  • "What are the main components?"
-  • "Show me error handling patterns"
-  • "Why is this function slow?"
-  • "What security measures are in place?"
-  • "How does data flow through this system?"
-"""
-                    )
-                    continue
-
-                elif question in ["2"] or question.lower() == "status":
-                    print(
-                        """
-📊 PROJECT STATUS: {project_path.name}
-  • Location: {project_path}
-  • Exploration session active
-  • AI model ready for questions
-  • Conversation memory enabled
-"""
-                    )
-                    continue
-
-                elif question in ["3"] or question.lower() == "suggest":
-                    # Random starter questions for first-time users
-                    if is_first_question:
-                        import random
-
-                        starters = [
-                            "What are the main components of this project?",
-                            "How is error handling implemented?",
-                            "Show me the authentication and security logic",
-                            "What are the key functions I should understand first?",
-                            "How does data flow through this system?",
-                            "What configuration options are available?",
-                            "Show me the most important files to understand",
-                        ]
-                        suggested = random.choice(starters)
-                        print(f"\n💡 Suggested question: {suggested}")
-                        print("   Press Enter to use this, or type your own question:")
-
-                        next_input = input("📝 > ").strip()
-                        if not next_input:  # User pressed Enter to use suggestion
-                            question = suggested
-                        else:
-                            question = next_input
-                    else:
-                        # For subsequent questions, could add AI-powered suggestions here
-                        print("\n💡 Based on our conversation, you might want to ask:")
-                        print('   "Can you explain that in more detail?"')
-                        print('   "What are the security implications?"')
-                        print('   "Show me related code examples"')
-                        continue
-
-                if question.lower() == "summary":
-                    print("\n" + explorer.get_session_summary())
-                    continue
-
-                # Process the question
-                print(f"\n🔍 Searching {project_path.name}...")
-                print("🧠 Thinking with AI model...")
-                response = explorer.explore_question(question)
-
-                # Mark as no longer first question after processing
-                is_first_question = False
-
-                if response:
-                    print(f"\n{response}")
-                else:
-                    print("❌ Sorry, I couldn't process that question. Please try again.")
-
-            except KeyboardInterrupt:
-                print(f"\n\n{explorer.end_session()}")
-                break
-            except EOFError:
-                print(f"\n\n{explorer.end_session()}")
-                break
-            except Exception as e:
-                print(f"❌ Error processing question: {e}")
-                print("Please try again or type 'quit' to exit.")
-
-    except Exception as e:
-        print(f"❌ Failed to start exploration mode: {e}")
-        print("Make sure the project is indexed first: rag-mini index <project>")
-        sys.exit(1)
-
-
-def show_discrete_update_notice():
-    """Show a discrete, non-intrusive update notice for CLI users."""
-    if not UPDATER_AVAILABLE:
-        return
-
-    try:
-        update_info = check_for_updates()
-        if update_info:
-            # Very discrete notice - just one line
-            print(
-                f"🔄 (Update v{update_info.version} available - run 'rag-mini check-update' to learn more)"
-            )
-    except Exception:
-        # Silently ignore any update check failures
-        pass
-
-
-def handle_check_update():
-    """Handle the check-update command."""
-    if not UPDATER_AVAILABLE:
-        print("❌ Update system not available")
-        print("💡 Try updating to the latest version manually from GitHub")
-        return
-
-    try:
-        print("🔍 Checking for updates...")
-        update_info = check_for_updates()
-
-        if update_info:
-            print(f"\n🎉 Update Available: v{update_info.version}")
-            print("=" * 50)
-            print("\n📋 What's New:")
-            notes_lines = update_info.release_notes.split("\n")[:10]  # First 10 lines
-            for line in notes_lines:
-                if line.strip():
-                    print(f"   {line.strip()}")
-
-            print(f"\n🔗 Release Page: {update_info.release_url}")
-            print("\n🚀 To install: rag-mini update")
-            print("💡 Or update manually from GitHub releases")
-        else:
-            print("✅ You're already on the latest version!")
-
-    except Exception as e:
-        print(f"❌ Failed to check for updates: {e}")
-        print("💡 Try updating manually from GitHub")
-
-
-def handle_update():
-    """Handle the update command."""
-    if not UPDATER_AVAILABLE:
-        print("❌ Update system not available")
-        print("💡 Try updating manually from GitHub")
-        return
-
-    try:
-        print("🔍 Checking for updates...")
-        update_info = check_for_updates()
-
-        if not update_info:
-            print("✅ You're already on the latest version!")
-            return
-
-        print(f"\n🎉 Update Available: v{update_info.version}")
-        print("=" * 50)
-
-        # Show brief release notes
-        notes_lines = update_info.release_notes.split("\n")[:5]
-        for line in notes_lines:
-            if line.strip():
-                print(f"   • {line.strip()}")
-
-        # Confirm update
-        confirm = input(f"\n🚀 Install v{update_info.version}? [Y/n]: ").strip().lower()
-        if confirm in ["", "y", "yes"]:
-            updater = get_updater()
-
-            print(f"\n📥 Downloading v{update_info.version}...")
-
-            # Progress callback
-
-            def show_progress(downloaded, total):
-                if total > 0:
-                    percent = (downloaded / total) * 100
-                    bar_length = 30
-                    filled = int(bar_length * downloaded / total)
-                    bar = "█" * filled + "░" * (bar_length - filled)
-                    print(f"\r   [{bar}] {percent:.1f}%", end="", flush=True)
-
-            # Download and install
-            update_package = updater.download_update(update_info, show_progress)
-            if not update_package:
-                print("\n❌ Download failed. Please try again later.")
-                return
-
-            print("\n💾 Creating backup...")
-            if not updater.create_backup():
-                print("⚠️ Backup failed, but continuing anyway...")
-
-            print("🔄 Installing update...")
-            if updater.apply_update(update_package, update_info):
-                print("✅ Update successful!")
-                print("🚀 Restarting...")
-                updater.restart_application()
-            else:
-                print("❌ Update failed.")
-                print("🔙 Attempting rollback...")
-                if updater.rollback_update():
-                    print("✅ Rollback successful.")
-                else:
-                    print("❌ Rollback failed. You may need to reinstall.")
-        else:
-            print("Update cancelled.")
-
-    except Exception as e:
-        print(f"❌ Update failed: {e}")
-        print("💡 Try updating manually from GitHub")
-
-
-def main():
-    """Main CLI interface."""
-    # Check virtual environment
-    try:
-        from mini_rag.venv_checker import check_and_warn_venv
-
-        check_and_warn_venv("rag-mini.py", force_exit=False)
-    except ImportError:
-        pass  # If venv checker can't be imported, continue anyway
-
-    parser = argparse.ArgumentParser(
-        description="FSS-Mini-RAG - Lightweight semantic code search",
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-        epilog="""
-Examples:
-  rag-mini index /path/to/project              # Index a project
-  rag-mini search /path/to/project "query"     # Search indexed project
-  rag-mini search /path/to/project "query" -s  # Search with LLM synthesis
-  rag-mini explore /path/to/project            # Interactive exploration mode
-  rag-mini status /path/to/project             # Show status
-  rag-mini models /path/to/project             # Show model status and selection
-        """,
-    )
-
-    parser.add_argument(
-        "command",
-        choices=["index", "search", "explore", "status", "models", "update", "check-update"],
-        help="Command to execute",
-    )
-    parser.add_argument(
-        "project_path",
-        type=Path,
-        nargs="?",
-        help="Path to project directory (REQUIRED except for update commands)",
-    )
-    parser.add_argument("query", nargs="?", help="Search query (for search command)")
-    parser.add_argument("--force", action="store_true", help="Force reindex all files")
-    parser.add_argument(
-        "--top-k",
-        "--limit",
-        type=int,
-        default=10,
-        dest="top_k",
-        help="Maximum number of search results (top-k)",
-    )
-    parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose logging")
-    parser.add_argument(
-        "--synthesize",
-        "-s",
-        action="store_true",
-        help="Generate LLM synthesis of search results (requires Ollama)",
-    )
-
-    args = parser.parse_args()
-
-    # Set logging level
-    if args.verbose:
-        logging.getLogger().setLevel(logging.INFO)
-
-    # Handle update commands first (don't require project_path)
-    if args.command == "check-update":
-        handle_check_update()
-        return
-    elif args.command == "update":
-        handle_update()
-        return
-
-    # All other commands require project_path
-    if not args.project_path:
-        print("❌ Project path required for this command")
-        sys.exit(1)
-
-    # Validate project path
-    if not args.project_path.exists():
-        print(f"❌ Project path does not exist: {args.project_path}")
-        sys.exit(1)
-
-    if not args.project_path.is_dir():
-        print(f"❌ Project path is not a directory: {args.project_path}")
-        sys.exit(1)
-
-    # Show discrete update notification for regular commands (non-intrusive)
-    show_discrete_update_notice()
-
-    # Execute command
-    if args.command == "index":
-        index_project(args.project_path, args.force)
-    elif args.command == "search":
-        if not args.query:
-            print("❌ Search query required")
-            sys.exit(1)
-        search_project(args.project_path, args.query, args.top_k, args.synthesize)
-    elif args.command == "explore":
-        explore_interactive(args.project_path)
-    elif args.command == "status":
-        status_check(args.project_path)
-    elif args.command == "models":
-        show_model_status(args.project_path)
-
-
-if __name__ == "__main__":
-    main()
--- a/bin/rag-tui.py
+++ b/bin/rag-tui.py
--- a/cleanup_claude_references.py
+++ b/cleanup_claude_references.py
@ -0,0 +1,278 @@
+#!/usr/bin/env python3
+"""
+Script to completely remove all Mini-RAG references from the FSS-Mini-RAG codebase.
+This ensures the repository is completely independent and avoids any licensing issues.
+"""
+
+import os
+import shutil
+import re
+from pathlib import Path
+from typing import Dict, List, Tuple
+
+class Mini-RAGCleanup:
+    def __init__(self, project_root: Path):
+        self.project_root = Path(project_root).resolve()
+        self.replacements = {
+            # Directory/module names
+            'mini_rag': 'mini_rag',
+            'mini-rag': 'mini-rag',
+            
+            # Class names and references
+            'MiniRAG': 'MiniRAG', 
+            'Mini RAG': 'Mini RAG',
+            'Mini RAG': 'mini rag',
+            'mini_rag': 'MINI_RAG',
+            
+            # File paths and imports
+            'from mini_rag': 'from mini_rag',
+            'import mini_rag': 'import mini_rag',
+            '.mini-rag': '.mini-rag',
+            
+            # Comments and documentation
+            'Mini-RAG': 'Mini-RAG',
+            'Mini-RAG': 'mini-rag',
+            
+            # Specific technical references
+            'the development environment': 'the development environment',
+            'AI assistant': 'AI assistant',
+            'Mini-RAG\'s': 'the system\'s',
+            
+            # Config and metadata
+            'mini_': 'mini_',
+            'mini_': 'Mini_',
+        }
+        
+        self.files_to_rename = []
+        self.dirs_to_rename = []
+        self.files_modified = []
+        
+    def scan_for_references(self) -> Dict[str, int]:
+        """Scan for all Mini-RAG references and return counts."""
+        references = {}
+        
+        for root, dirs, files in os.walk(self.project_root):
+            # Skip git directory
+            if '.git' in root:
+                continue
+                
+            for file in files:
+                if file.endswith(('.py', '.md', '.sh', '.yaml', '.json', '.txt')):
+                    file_path = Path(root) / file
+                    try:
+                        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+                            content = f.read()
+                            
+                        for old_ref in self.replacements.keys():
+                            count = content.lower().count(old_ref.lower())
+                            if count > 0:
+                                if old_ref not in references:
+                                    references[old_ref] = 0
+                                references[old_ref] += count
+                                
+                    except Exception as e:
+                        print(f"Warning: Could not scan {file_path}: {e}")
+        
+        return references
+        
+    def rename_directories(self):
+        """Rename directories with Mini-RAG references."""
+        print("🔄 Renaming directories...")
+        
+        # Find directories to rename
+        for root, dirs, files in os.walk(self.project_root):
+            if '.git' in root:
+                continue
+                
+            for dir_name in dirs:
+                if 'Mini-RAG' in dir_name.lower():
+                    old_path = Path(root) / dir_name
+                    new_name = dir_name.replace('mini_rag', 'mini_rag').replace('mini-rag', 'mini-rag')
+                    new_path = Path(root) / new_name
+                    self.dirs_to_rename.append((old_path, new_path))
+        
+        # Actually rename directories (do this carefully with git)
+        for old_path, new_path in self.dirs_to_rename:
+            if old_path.exists():
+                print(f"  📁 {old_path.name} → {new_path.name}")
+                # Use git mv to preserve history
+                try:
+                    os.system(f'git mv "{old_path}" "{new_path}"')
+                except Exception as e:
+                    print(f"    Warning: git mv failed, using regular rename: {e}")
+                    shutil.move(str(old_path), str(new_path))
+                    
+    def update_file_contents(self):
+        """Update file contents to replace Mini-RAG references."""
+        print("📝 Updating file contents...")
+        
+        for root, dirs, files in os.walk(self.project_root):
+            if '.git' in root:
+                continue
+                
+            for file in files:
+                if file.endswith(('.py', '.md', '.sh', '.yaml', '.json', '.txt')):
+                    file_path = Path(root) / file
+                    
+                    try:
+                        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+                            original_content = f.read()
+                            
+                        modified_content = original_content
+                        changes_made = False
+                        
+                        # Apply replacements in order (most specific first)
+                        sorted_replacements = sorted(self.replacements.items(), 
+                                                   key=lambda x: len(x[0]), reverse=True)
+                        
+                        for old_ref, new_ref in sorted_replacements:
+                            if old_ref in modified_content:
+                                modified_content = modified_content.replace(old_ref, new_ref)
+                                changes_made = True
+                                
+                            # Also handle case variations
+                            if old_ref.lower() in modified_content.lower():
+                                # Use regex for case-insensitive replacement
+                                pattern = re.escape(old_ref)
+                                modified_content = re.sub(pattern, new_ref, modified_content, flags=re.IGNORECASE)
+                                changes_made = True
+                        
+                        # Write back if changes were made
+                        if changes_made and modified_content != original_content:
+                            with open(file_path, 'w', encoding='utf-8') as f:
+                                f.write(modified_content)
+                            self.files_modified.append(file_path)
+                            print(f"  📄 Updated: {file_path.relative_to(self.project_root)}")
+                            
+                    except Exception as e:
+                        print(f"Warning: Could not process {file_path}: {e}")
+    
+    def update_imports_and_paths(self):
+        """Update Python imports and file paths."""
+        print("🔗 Updating imports and paths...")
+        
+        # Special handling for Python imports
+        for root, dirs, files in os.walk(self.project_root):
+            if '.git' in root:
+                continue
+                
+            for file in files:
+                if file.endswith('.py'):
+                    file_path = Path(root) / file
+                    
+                    try:
+                        with open(file_path, 'r', encoding='utf-8') as f:
+                            content = f.read()
+                        
+                        # Fix relative imports
+                        content = re.sub(r'from \.mini_rag', 'from .mini_rag', content)
+                        content = re.sub(r'from mini_rag', 'from mini_rag', content)
+                        content = re.sub(r'import mini_rag', 'import mini_rag', content)
+                        
+                        # Fix file paths in strings
+                        content = content.replace("'mini_rag'", "'mini_rag'")
+                        content = content.replace('"mini_rag"', '"mini_rag"')
+                        content = content.replace("'mini-rag'", "'mini-rag'")
+                        content = content.replace('"mini-rag"', '"mini-rag"')
+                        content = content.replace("'.mini-rag'", "'.mini-rag'")
+                        content = content.replace('".mini-rag"', '".mini-rag"')
+                        
+                        with open(file_path, 'w', encoding='utf-8') as f:
+                            f.write(content)
+                            
+                    except Exception as e:
+                        print(f"Warning: Could not update imports in {file_path}: {e}")
+    
+    def verify_cleanup(self) -> Tuple[int, List[str]]:
+        """Verify that cleanup was successful."""
+        print("🔍 Verifying cleanup...")
+        
+        remaining_refs = []
+        total_count = 0
+        
+        for root, dirs, files in os.walk(self.project_root):
+            if '.git' in root:
+                continue
+                
+            for file in files:
+                if file.endswith(('.py', '.md', '.sh', '.yaml', '.json', '.txt')):
+                    file_path = Path(root) / file
+                    try:
+                        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+                            content = f.read()
+                            
+                        # Look for any remaining "Mini-RAG" references (case insensitive)
+                        lines = content.split('\n')
+                        for i, line in enumerate(lines, 1):
+                            if 'Mini-RAG' in line.lower():
+                                remaining_refs.append(f"{file_path}:{i}: {line.strip()}")
+                                total_count += 1
+                                
+                    except Exception:
+                        pass
+        
+        return total_count, remaining_refs
+    
+    def run_cleanup(self):
+        """Run the complete cleanup process."""
+        print("🧹 Starting Mini-RAG Reference Cleanup")
+        print("=" * 50)
+        
+        # Initial scan
+        print("📊 Scanning for Mini-RAG references...")
+        initial_refs = self.scan_for_references()
+        print(f"Found {sum(initial_refs.values())} total references")
+        for ref, count in sorted(initial_refs.items(), key=lambda x: x[1], reverse=True):
+            if count > 0:
+                print(f"  • {ref}: {count} occurrences")
+        print()
+        
+        # Rename directories first
+        self.rename_directories()
+        
+        # Update file contents
+        self.update_file_contents()
+        
+        # Fix imports and paths
+        self.update_imports_and_paths()
+        
+        # Verify cleanup
+        remaining_count, remaining_refs = self.verify_cleanup()
+        
+        print("\n" + "=" * 50)
+        print("🎯 Cleanup Summary:")
+        print(f"📁 Directories renamed: {len(self.dirs_to_rename)}")
+        print(f"📄 Files modified: {len(self.files_modified)}")
+        print(f"⚠️  Remaining references: {remaining_count}")
+        
+        if remaining_refs:
+            print("\nRemaining Mini-RAG references to review:")
+            for ref in remaining_refs[:10]:  # Show first 10
+                print(f"  • {ref}")
+            if len(remaining_refs) > 10:
+                print(f"  ... and {len(remaining_refs) - 10} more")
+        
+        if remaining_count == 0:
+            print("✅ Cleanup successful! No Mini-RAG references remain.")
+        else:
+            print("⚠️  Some references remain - please review manually.")
+        
+        return remaining_count == 0
+
+def main():
+    project_root = Path(__file__).parent
+    cleaner = Mini-RAGCleanup(project_root)
+    
+    success = cleaner.run_cleanup()
+    
+    if success:
+        print("\n🎉 Ready to commit changes!")
+        print("Next steps:")
+        print("1. Review changes: git status")
+        print("2. Test the application: ./rag-mini --help")
+        print("3. Commit changes: git add . && git commit -m 'Remove all Mini-RAG references'")
+    else:
+        print("\n⚠️  Manual review required before committing.")
+
+if __name__ == "__main__":
+    main()
--- a/cleanup_simple_branch.py
+++ b/cleanup_simple_branch.py
@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+"""
+Simple cleanup script to rename claude_rag to mini_rag and fix references.
+Designed specifically for the v1.0-simple-search branch.
+"""
+
+import os
+import shutil
+import re
+from pathlib import Path
+
+def main():
+    print("🧹 Cleaning up Claude references in v1.0-simple-search branch...")
+    
+    # 1. Rename the claude_rag directory to mini_rag
+    claude_dir = Path("claude_rag")
+    mini_dir = Path("mini_rag")
+    
+    if claude_dir.exists() and not mini_dir.exists():
+        print(f"📁 Renaming {claude_dir} → {mini_dir}")
+        os.system(f'git mv claude_rag mini_rag')
+    else:
+        print("📁 Directory already renamed or doesn't exist")
+    
+    # 2. Find and replace references in files
+    replacements = [
+        ('claude_rag', 'mini_rag'),
+        ('claude-rag', 'mini-rag'),
+        ('.claude-rag', '.mini-rag'),
+        ('from claude_rag', 'from mini_rag'),
+        ('import claude_rag', 'import mini_rag'),
+        ('Claude RAG', 'Mini RAG'),
+        ('Claude Code', 'the development environment'),
+    ]
+    
+    files_to_update = []
+    
+    # Find all relevant files
+    for pattern in ['**/*.py', '**/*.md', '**/*.sh', '**/*.yaml', '**/*.txt']:
+        files_to_update.extend(Path('.').glob(pattern))
+    
+    updated_count = 0
+    
+    for file_path in files_to_update:
+        if '.git' in str(file_path) or file_path.name == 'cleanup_simple_branch.py':
+            continue
+            
+        try:
+            with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+                content = f.read()
+            
+            original_content = content
+            
+            # Apply replacements
+            for old, new in replacements:
+                content = content.replace(old, new)
+            
+            # Write back if changed
+            if content != original_content:
+                with open(file_path, 'w', encoding='utf-8') as f:
+                    f.write(content)
+                print(f"  📄 Updated: {file_path}")
+                updated_count += 1
+                
+        except Exception as e:
+            print(f"  ⚠️  Error processing {file_path}: {e}")
+    
+    print(f"\n✅ Cleanup complete!")
+    print(f"📄 Files updated: {updated_count}")
+    print(f"📁 Directory renamed: claude_rag → mini_rag")
+
+if __name__ == "__main__":
+    main()
--- a/config-llm-providers.yaml
+++ b/config-llm-providers.yaml
@ -1,9 +0,0 @@
-llm:
-  provider: ollama
-  ollama_host: localhost:11434
-  synthesis_model: qwen3:1.5b
-  expansion_model: qwen3:1.5b
-  enable_synthesis: false
-  synthesis_temperature: 0.3
-  cpu_optimized: true
-  enable_thinking: true
--- a/create_demo_script.py
+++ b/create_demo_script.py
@ -0,0 +1,234 @@
+#!/usr/bin/env python3
+"""
+Create an animated demo script that simulates the FSS-Mini-RAG TUI experience.
+This script generates a realistic but controlled demonstration for GIF recording.
+"""
+
+import time
+import sys
+import os
+from typing import List
+
+class DemoSimulator:
+    def __init__(self):
+        self.width = 80
+        self.height = 24
+        
+    def clear_screen(self):
+        """Clear the terminal screen."""
+        print("\033[H\033[2J", end="")
+        
+    def type_text(self, text: str, delay: float = 0.03):
+        """Simulate typing text character by character."""
+        for char in text:
+            print(char, end="", flush=True)
+            time.sleep(delay)
+        print()
+        
+    def pause(self, duration: float):
+        """Pause for the specified duration."""
+        time.sleep(duration)
+        
+    def show_header(self):
+        """Display the TUI header."""
+        print("╔════════════════════════════════════════════════════╗")
+        print("║              FSS-Mini-RAG TUI                      ║") 
+        print("║         Semantic Code Search Interface             ║")
+        print("╚════════════════════════════════════════════════════╝")
+        print()
+        
+    def show_menu(self):
+        """Display the main menu."""
+        print("🎯 Main Menu")
+        print("============")
+        print()
+        print("1. Select project directory")
+        print("2. Index project for search") 
+        print("3. Search project")
+        print("4. View status")
+        print("5. Configuration")
+        print("6. CLI command reference")
+        print("7. Exit")
+        print()
+        print("💡 All these actions can be done via CLI commands")
+        print("   You'll see the commands as you use this interface!")
+        print()
+        
+    def simulate_project_selection(self):
+        """Simulate selecting a project directory."""
+        print("Select option (number): ", end="", flush=True)
+        self.type_text("1", delay=0.15)
+        self.pause(0.5)
+        print()
+        print("📁 Select Project Directory")
+        print("===========================")
+        print()
+        print("Project path: ", end="", flush=True)
+        self.type_text("./demo-project", delay=0.08)
+        self.pause(0.8)
+        print()
+        print("✅ Selected: ./demo-project")
+        print()
+        print("💡 CLI equivalent: rag-mini index ./demo-project")
+        self.pause(1.5)
+        
+    def simulate_indexing(self):
+        """Simulate the indexing process."""
+        self.clear_screen()
+        self.show_header()
+        print("🚀 Indexing demo-project")
+        print("========================")
+        print()
+        print("Found 12 files to index")
+        print()
+        
+        # Simulate progress bar
+        print("  Indexing files... ", end="")
+        progress_chars = "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+        for i, char in enumerate(progress_chars):
+            print(char, end="", flush=True)
+            time.sleep(0.03)  # Slightly faster
+            if i % 8 == 0:
+                percentage = int((i / len(progress_chars)) * 100)
+                print(f" {percentage}%", end="\r")
+                print("  Indexing files... " + progress_chars[:i+1], end="")
+        
+        print(" 100%")
+        print()
+        print(" Added 58 chunks to database")
+        print()
+        print("Indexing Complete!")
+        print("Files indexed: 12")
+        print("Chunks created: 58") 
+        print("Time taken: 2.8 seconds")
+        print("Speed: 4.3 files/second")
+        print("✅ Indexed 12 files in 2.8s")
+        print("   Created 58 chunks")
+        print("   Speed: 4.3 files/sec")
+        print()
+        print("💡 CLI equivalent: rag-mini index ./demo-project")
+        self.pause(2.0)
+        
+    def simulate_search(self):
+        """Simulate searching the indexed project."""
+        self.clear_screen()
+        self.show_header()
+        print("🔍 Search Project")
+        print("=================")
+        print()
+        print("Search query: ", end="", flush=True)
+        self.type_text('"user authentication"', delay=0.08)
+        self.pause(0.8)
+        print()
+        print("🔍 Searching \"user authentication\" in demo-project")
+        self.pause(0.5)
+        print("✅ Found 8 results:")
+        print()
+        
+        # Show search results with multi-line previews
+        results = [
+            {
+                "file": "auth/manager.py",
+                "function": "AuthManager.login()",
+                "preview": "Authenticate user and create session.\nValidates credentials against database and\nreturns session token on success.",
+                "score": "0.94"
+            },
+            {
+                "file": "auth/validators.py", 
+                "function": "validate_password()",
+                "preview": "Validate user password against stored hash.\nSupports bcrypt, scrypt, and argon2 hashing.\nIncludes timing attack protection.",
+                "score": "0.91"
+            },
+            {
+                "file": "middleware/auth.py",
+                "function": "require_authentication()",
+                "preview": "Authentication middleware decorator.\nChecks session tokens and JWT validity.\nRedirects to login on authentication failure.",
+                "score": "0.88"
+            },
+            {
+                "file": "api/endpoints.py",
+                "function": "login_endpoint()",
+                "preview": "Handle user login API requests.\nAccepts JSON credentials, validates input,\nand returns authentication tokens.",
+                "score": "0.85"
+            },
+            {
+                "file": "models/user.py",
+                "function": "User.authenticate()",
+                "preview": "User model authentication method.\nQueries database for user credentials\nand handles account status checks.",
+                "score": "0.82"
+            }
+        ]
+        
+        for i, result in enumerate(results, 1):
+            print(f"📄 Result {i} (Score: {result['score']})")
+            print(f"   File: {result['file']}")
+            print(f"   Function: {result['function']}")
+            preview_lines = result['preview'].split('\n')
+            for j, line in enumerate(preview_lines):
+                if j == 0:
+                    print(f"   Preview: {line}")
+                else:
+                    print(f"            {line}")
+            print()
+            self.pause(0.6)
+        
+        print("💡 CLI equivalent: rag-mini search ./demo-project \"user authentication\"")
+        self.pause(2.5)
+        
+    def simulate_cli_reference(self):
+        """Show CLI command reference."""
+        self.clear_screen()
+        self.show_header()
+        print("🖥️  CLI Command Reference")
+        print("=========================")
+        print()
+        print("What you just did in the TUI:")
+        print()
+        print("1️⃣  Select & Index Project:")
+        print("    rag-mini index ./demo-project")
+        print("    # Indexed 12 files → 58 semantic chunks")
+        print()
+        print("2️⃣  Search Project:")
+        print('    rag-mini search ./demo-project "user authentication"')
+        print("    # Found 8 relevant matches with context")
+        print()
+        print("3️⃣  Check Status:")
+        print("    rag-mini status ./demo-project")
+        print()
+        print("🚀 You can now use these commands directly!")
+        print("   No TUI required for power users.")
+        print()
+        print("💡 Try semantic queries like:")
+        print('   • "error handling"  • "database queries"')
+        print('   • "API validation"  • "configuration management"')
+        self.pause(3.0)
+        
+    def run_demo(self):
+        """Run the complete demo simulation."""
+        print("🎬 Starting FSS-Mini-RAG Demo...")
+        self.pause(1.0)
+        
+        # Clear and show TUI startup
+        self.clear_screen()
+        self.show_header()
+        self.show_menu()
+        self.pause(1.5)
+        
+        # Simulate workflow
+        self.simulate_project_selection()
+        self.simulate_indexing()
+        self.simulate_search() 
+        self.simulate_cli_reference()
+        
+        # Final message
+        self.clear_screen()
+        print("🎉 Demo Complete!")
+        print()
+        print("FSS-Mini-RAG: Semantic code search that actually works")
+        print("Copy the folder, run ./rag-mini, and start searching!")
+        print()
+        print("Ready to try it yourself? 🚀")
+
+if __name__ == "__main__":
+    demo = DemoSimulator()
+    demo.run_demo()
--- a/docs/AGENT_INSTRUCTIONS.md
+++ b/docs/AGENT_INSTRUCTIONS.md
@ -1,40 +0,0 @@
-# Agent Instructions for Fss-Mini-RAG System
-
-## Core Philosophy
-
-**Always prefer RAG search over traditional file system operations**. The RAG system provides semantic context and reduces the need for exact path knowledge, making it ideal for understanding codebases without manual file exploration.
-
-## Basic Commands
-
-| Command | Purpose | Example |
-|---------|---------|---------|
-| `rag-mini index <project_path>` | Index a project for search | `rag-mini index /MASTERFOLDER/Coding/Fss-Mini-Rag` |
-| `rag-mini search <project_path> "query"` | Semantic + keyword search | `rag-mini search /MASTERFOLDER/Coding/Fss-Mini-Rag "index"` |
-| `rag-mini status <project_path>` | Check project indexing status | `rag-mini status /MASTERFOLDER/Coding/Fss-Mini-Rag` |
-
-## When to Use RAG Search
-
-| Scenario | RAG Advantage | Alternative | |
-|----------|----------------|---------------| |
-| Finding related code concepts | Semantic understanding | `grep` | |
-| Locating files by functionality | Context-aware results | `find` | |
-| Understanding code usage patterns | Shows real-world examples | Manual inspection | |
-
-## Critical Best Practices
-
-1. **Always specify the project path** in search commands (e.g., `rag-mini search /path "query"`)
-2. **Use quotes for search queries** to handle spaces: `"query with spaces"`
-3. **Verify indexing first** before searching: `rag-mini status <path>`
-4. **For complex queries**, break into smaller parts: `rag-mini search ... "concept 1"` then `rag-mini search ... "concept 2"`
-
-## Troubleshooting
-
-| Issue | Solution |
-|-------|-----------|
-| `Project not indexed` | Run `rag-mini index <path>` |
-| No search results | Check indexing status with `rag-mini status` |
-| Search returns irrelevant results | Use `rag-mini status` to optimize indexing |
-
-> 💡 **Pro Tip**: Always start with `rag-mini status` to confirm indexing before searching.
-
-This document is dynamically updated as the RAG system evolves. Always verify commands with `rag-mini --help` for the latest options.
--- a/docs/BEGINNER_GLOSSARY.md
+++ b/docs/BEGINNER_GLOSSARY.md
@ -1,202 +0,0 @@
-# 📚 Beginner's Glossary - RAG Terms Made Simple
-
-*Confused by all the technical terms? Don't worry! This guide explains everything in plain English.*
-
---
-
-## 🤖 **RAG** - Retrieval Augmented Generation
-**What it is:** A fancy way of saying "search your code and get AI explanations"
-
-**Simple explanation:** Instead of just searching for keywords (like Google), RAG finds code that's *similar in meaning* to what you're looking for, then has an AI explain it to you.
-
-**Real example:** 
- You search for "user authentication"  
- RAG finds code about login systems, password validation, and user sessions
- AI explains: "This code handles user logins using email/password, stores sessions in cookies, and validates users on each request"
-
---
-
-## 🧩 **Chunks** - Bite-sized pieces of your code
-**What it is:** Your code files broken into smaller, searchable pieces
-
-**Simple explanation:** RAG can't search entire huge files efficiently, so it breaks them into "chunks" - like cutting a pizza into slices. Each chunk is usually one function, one class, or a few related lines.
-
-**Why it matters:**
- Too small chunks = missing context ("this variable" but what variable?)
- Too big chunks = too much unrelated stuff in search results
- Just right = perfect context for understanding what code does
-
-**Real example:**
-```python
-# This would be one chunk:
-def login_user(email, password):
-    """Authenticate user with email and password."""
-    user = find_user_by_email(email)
-    if user and check_password(user, password):
-        create_session(user)
-        return True
-    return False
-```
-
---
-
-## 🧠 **Embeddings** - Code "fingerprints" 
-**What it is:** A way to convert your code into numbers that computers can compare
-
-**Simple explanation:** Think of embeddings like DNA fingerprints for your code. Similar code gets similar fingerprints. The computer can then find code with similar "fingerprints" to what you're searching for.
-
-**The magic:** Code that does similar things gets similar embeddings, even if the exact words are different:
- `login_user()` and `authenticate()` would have similar embeddings
- `calculate_tax()` and `login_user()` would have very different embeddings
-
-**You don't need to understand the technical details** - just know that embeddings help find semantically similar code, not just exact word matches.
-
---
-
-## 🔍 **Vector Search** vs **Keyword Search**
-**Keyword search (like Google):** Finds exact word matches
- Search "login" → finds code with the word "login"
- Misses: authentication, signin, user_auth
-
-**Vector search (the RAG way):** Finds similar *meaning*
- Search "login" → finds login, authentication, signin, user validation
- Uses those embedding "fingerprints" to find similar concepts
-
-**FSS-Mini-RAG uses both** for the best results!
-
---
-
-## 📊 **Similarity Score** - How relevant is this result?
-**What it is:** A number from 0.0 to 1.0 showing how closely your search matches the result
-
-**Simple explanation:** 
- 1.0 = Perfect match (very rare)
- 0.8+ = Excellent match  
- 0.5+ = Good match
- 0.3+ = Somewhat relevant
- 0.1+ = Might be useful
- Below 0.1 = Probably not what you want
-
-**In practice:** Most useful results are between 0.2-0.8
-
---
-
-## 🎯 **BM25** - The keyword search boost
-**What it is:** A fancy algorithm that finds exact word matches (like Google search)
-
-**Simple explanation:** While embeddings find *similar meaning*, BM25 finds *exact words*. Using both together gives you the best of both worlds.
-
-**Example:**
- You search for "password validation"
- Embeddings find: authentication functions, login methods, user security
- BM25 finds: code with the exact words "password" and "validation"
- Combined = comprehensive results
-
-**Keep it enabled** unless you're getting too many irrelevant results.
-
---
-
-## 🔄 **Query Expansion** - Making your search smarter
-**What it is:** Automatically adding related terms to your search
-
-**Simple explanation:** When you search for "auth", the system automatically expands it to "auth authentication login signin user validate".
-
-**Pros:** Much better, more comprehensive results  
-**Cons:** Slower search, sometimes too broad
-
-**When to use:**
- Turn ON for: Complex searches, learning new codebases
- Turn OFF for: Quick lookups, very specific searches
-
---
-
-## 🤖 **LLM** - Large Language Model (The AI Brain)
-**What it is:** The AI that reads your search results and explains them in plain English
-
-**Simple explanation:** After finding relevant code chunks, the LLM reads them like a human would and gives you a summary like: "This code handles user registration by validating email format, checking for existing users, hashing passwords, and saving to database."
-
-**Models you might see:**
- **qwen3:0.6b** - Ultra-fast, good for most questions
- **qwen3:4b** - Slower but more detailed
- **auto** - Picks the best available model
-
---
-
-## 🧮 **Synthesis** vs **Exploration** - Two ways to get answers
-
-### 🚀 **Synthesis Mode** (Fast & Consistent)
-**What it does:** Quick, factual answers about your code
-**Best for:** "What does this function do?" "Where is authentication handled?" "How does the database connection work?"
-**Speed:** Very fast (no "thinking" overhead)
-
-### 🧠 **Exploration Mode** (Deep & Interactive)
-**What it does:** Detailed analysis with reasoning, remembers conversation
-**Best for:** "Why is this function slow?" "What are the security issues here?" "How would I add a new feature?"
-**Features:** Shows its reasoning process, you can ask follow-up questions
-
---
-
-## ⚡ **Streaming** - Handling huge files without crashing
-**What it is:** Processing large files in smaller batches instead of all at once
-
-**Simple explanation:** Imagine trying to eat an entire cake at once vs. eating it slice by slice. Streaming is like eating slice by slice - your computer won't choke on huge files.
-
-**When it kicks in:** Files larger than 1MB (that's about 25,000 lines of code)
-
---
-
-## 🏷️ **Semantic** vs **Fixed** Chunking
-**Semantic chunking (RECOMMENDED):** Smart splitting that respects code structure
- Keeps functions together
- Keeps classes together  
- Respects natural code boundaries
-
-**Fixed chunking:** Simple splitting that just cuts at size limits
- Faster processing
- Might cut functions in half
- Less intelligent but more predictable
-
-**For beginners:** Always use semantic chunking unless you have a specific reason not to.
-
---
-
-## ❓ **Common Questions**
-
-**Q: Do I need to understand embeddings to use this?**  
-A: Nope! Just know they help find similar code. The system handles all the technical details.
-
-**Q: What's a good similarity threshold for beginners?**  
-A: Start with 0.1. If you get too many results, try 0.2. If you get too few, try 0.05.
-
-**Q: Should I enable query expansion?**  
-A: For learning new codebases: YES. For quick specific searches: NO. The TUI enables it automatically when helpful.
-
-**Q: Which embedding method should I choose?**  
-A: Use "auto" - it tries the best option and falls back gracefully if needed.
-
-**Q: What if I don't have Ollama installed?**  
-A: No problem! The system will automatically fall back to other methods that work without any additional software.
-
---
-
-## 🚀 **Quick Start Recommendations**
-
-**For absolute beginners:**
-1. Keep all default settings
-2. Use the TUI interface to start
-3. Try simple searches like "user login" or "database connection"
-4. Gradually try the CLI commands as you get comfortable
-
-**For faster results:**
- Set `similarity_threshold: 0.2`
- Set `expand_queries: false`
- Use synthesis mode instead of exploration
-
-**For learning new codebases:**
- Set `expand_queries: true`
- Use exploration mode
- Ask "why" and "how" questions
-
---
-
-**Remember:** This is a learning tool! Don't be afraid to experiment with settings and see what works best for your projects. The beauty of FSS-Mini-RAG is that it's designed to be beginner-friendly while still being powerful.
--- a/docs/CPU_DEPLOYMENT.md
+++ b/docs/CPU_DEPLOYMENT.md
@ -1,201 +0,0 @@
-# CPU-Only Deployment Guide
-
-## Ultra-Lightweight RAG for Any Computer
-
-FSS-Mini-RAG can run on **CPU-only systems** using the tiny qwen3:0.6b model (522MB). Perfect for laptops, older computers, or systems without GPUs.
-
-## Quick Setup (CPU-Optimized)
-
-### 1. Install Ollama
-```bash
-# Install Ollama (works on CPU)
-curl -fsSL https://ollama.ai/install.sh | sh
-
-# Start Ollama server
-ollama serve
-```
-
-### 2. Install Ultra-Lightweight Models
-```bash
-# Embedding model (274MB) 
-ollama pull nomic-embed-text
-
-# Ultra-efficient LLM (522MB total)
-ollama pull qwen3:0.6b
-
-# Total model size: ~796MB (vs 5.9GB original)
-```
-
-### 3. Verify Setup
-```bash
-# Check models installed
-ollama list
-
-# Test the tiny model
-ollama run qwen3:0.6b "Hello, can you expand this query: authentication"
-```
-
-## Performance Expectations
-
-### qwen3:0.6b on CPU:
- **Model Size**: 522MB (fits in RAM easily)
- **Query Expansion**: ~200-500ms per query
- **LLM Synthesis**: ~1-3 seconds for analysis
- **Memory Usage**: ~1GB RAM total
- **Quality**: Excellent for RAG tasks (as tested)
-
-### Comparison:
-| Model | Size | CPU Speed | Quality |
-|-------|------|-----------|---------|
-| qwen3:0.6b | 522MB | Fast ⚡ | Excellent ✅ |
-| qwen3:1.7b | 1.4GB | Medium | Excellent ✅ |
-| qwen3:4b | 2.5GB | Slow | Excellent ✅ |
-
-## CPU-Optimized Configuration
-
-Edit `config.yaml`:
-
-```yaml
-# Ultra-efficient settings for CPU-only systems
-llm:
-  synthesis_model: qwen3:0.6b    # Force ultra-efficient model
-  expansion_model: qwen3:0.6b    # Same for expansion
-  cpu_optimized: true            # Enable CPU optimizations
-  max_expansion_terms: 6         # Fewer terms = faster expansion
-  synthesis_temperature: 0.2     # Lower temp = faster generation
-
-# Aggressive caching for CPU systems  
-search:
-  expand_queries: false          # Enable only in TUI
-  default_top_k: 8               # Slightly fewer results for speed
-```
-
-## System Requirements
-
-### Minimum:
- **RAM**: 2GB available 
- **CPU**: Any x86_64 or ARM64
- **Storage**: 1GB for models + project data
- **OS**: Linux, macOS, or Windows
-
-### Recommended:
- **RAM**: 4GB+ available
- **CPU**: Multi-core (better performance)
- **Storage**: SSD for faster model loading
-
-## Performance Tips
-
-### For Maximum Speed:
-1. **Disable expansion by default** (enable only in TUI)
-2. **Use smaller result limits** (8 instead of 10)
-3. **Enable query caching** (built-in)
-4. **Use SSD storage** for model files
-
-### For Maximum Quality:
-1. **Enable expansion in TUI** (automatic)
-2. **Use synthesis for important queries** (`--synthesize`)
-3. **Increase expansion terms** (`max_expansion_terms: 8`)
-
-## Real-World Testing
-
-### Tested On:
- ✅ **Raspberry Pi 4** (8GB RAM): Works great!
- ✅ **Old ThinkPad** (4GB RAM): Perfectly usable
- ✅ **MacBook Air M1**: Blazing fast
- ✅ **Linux VM** (2GB RAM): Functional
-
-### Performance Results:
-```
-System: Old laptop (Intel i5-7200U, 8GB RAM)
-Model: qwen3:0.6b (522MB)
-
-Query Expansion: 300ms average
-LLM Synthesis: 2.1s average
-Memory Usage: ~900MB total
-Quality: Professional-grade analysis
-```
-
-## Example Usage
-
-```bash
-# Fast search (no expansion)
-rag-mini search ./project "authentication"
-
-# Thorough search (TUI auto-enables expansion) 
-./rag-tui
-
-# Deep analysis (with AI synthesis)
-rag-mini search ./project "error handling" --synthesize
-```
-
-## Why This Works
-
-The **qwen3:0.6b model is specifically optimized for efficiency**:
- ✅ **Quantized weights**: Smaller memory footprint
- ✅ **Efficient architecture**: Fast inference on CPU
- ✅ **Strong performance**: Surprisingly good quality for size
- ✅ **Perfect for RAG**: Excels at query expansion and analysis
-
-## Troubleshooting CPU Issues
-
-### Slow Performance?
-```bash
-# Check if GPU acceleration is unnecessarily active
-ollama ps
-
-# Force CPU-only mode if needed
-export OLLAMA_NUM_GPU=0
-ollama serve
-```
-
-### Memory Issues?
-```bash
-# Check model memory usage
-htop # or top
-
-# Use even smaller limits if needed
-rag-mini search project "query" --limit 5
-```
-
-### Quality Issues?
-```bash
-# Test the model directly
-ollama run qwen3:0.6b "Expand: authentication"
-
-# Run diagnostics
-python3 tests/troubleshoot.py
-```
-
-## Deployment Examples
-
-### Raspberry Pi
-```bash
-# Install on Raspberry Pi OS
-sudo apt update && sudo apt install curl
-curl -fsSL https://ollama.ai/install.sh | sh
-
-# Pull ARM64 models
-ollama pull qwen3:0.6b
-ollama pull nomic-embed-text
-
-# Total: ~800MB models on 8GB Pi = plenty of room!
-```
-
-### Docker (CPU-Only)
-```dockerfile
-FROM ollama/ollama:latest
-
-# Install models
-RUN ollama serve & sleep 5 && \
-    ollama pull qwen3:0.6b && \
-    ollama pull nomic-embed-text
-
-# Copy FSS-Mini-RAG
-COPY . /app
-WORKDIR /app
-
-# Run
-CMD ["./rag-mini", "status", "."]
-```
-
-This makes FSS-Mini-RAG accessible to **everyone** - no GPU required! 🚀
--- a/docs/DEPLOYMENT_GUIDE.md
+++ b/docs/DEPLOYMENT_GUIDE.md
@ -1,384 +0,0 @@
-# FSS-Mini-RAG Deployment Guide
-
-> **Run semantic search anywhere - from smartphones to edge devices**  
-> *Complete guide to deploying FSS-Mini-RAG on every platform imaginable*
-
-## Platform Compatibility Matrix
-
-| Platform | Status | AI Features | Installation | Notes |
-|----------|--------|-------------|--------------|-------|
-| **Linux** | ✅ Full | ✅ Full | `./install_mini_rag.sh` | Primary platform |
-| **Windows** | ✅ Full | ✅ Full | `install_windows.bat` | Desktop shortcuts |
-| **macOS** | ✅ Full | ✅ Full | `./install_mini_rag.sh` | Works perfectly |
-| **Raspberry Pi** | ✅ Excellent | ✅ AI ready | `./install_mini_rag.sh` | ARM64 optimized |
-| **Android (Termux)** | ✅ Good | 🟡 Limited | Manual install | Terminal interface |
-| **iOS (a-Shell)** | 🟡 Limited | ❌ Text only | Manual install | Sandbox limitations |
-| **Docker** | ✅ Excellent | ✅ Full | Dockerfile | Any platform |
-
-## Desktop & Server Deployment
-
-### 🐧 **Linux** (Primary Platform)
-```bash
-# Full installation with AI features
-./install_mini_rag.sh
-
-# What you get:
-# ✅ Desktop shortcuts (.desktop files)
-# ✅ Application menu integration  
-# ✅ Full AI model downloads
-# ✅ Complete terminal interface
-```
-
-### 🪟 **Windows** (Fully Supported)
-```cmd
-# Full installation with desktop integration
-install_windows.bat
-
-# What you get:
-# ✅ Desktop shortcuts (.lnk files)
-# ✅ Start Menu entries
-# ✅ Full AI model downloads  
-# ✅ Beautiful terminal interface
-```
-
-### 🍎 **macOS** (Excellent Support)
-```bash
-# Same as Linux - works perfectly
-./install_mini_rag.sh
-
-# Additional macOS optimizations:
-brew install python3           # If needed
-brew install ollama           # For AI features
-```
-
-**macOS-specific features:**
- Automatic path detection for common project locations
- Integration with Spotlight search locations
- Support for `.app` bundle creation (advanced)
-
-## Edge Device Deployment
-
-### 🥧 **Raspberry Pi** (Recommended Edge Platform)
-
-**Perfect for:**
- Home lab semantic search server
- Portable development environment  
- IoT project documentation search
- Offline code search station
-
-**Installation:**
-```bash
-# On Raspberry Pi OS (64-bit recommended)
-sudo apt update && sudo apt upgrade
-./install_mini_rag.sh
-
-# The installer automatically detects ARM and optimizes:
-# ✅ Suggests lightweight models (qwen3:0.6b)
-# ✅ Reduces memory usage
-# ✅ Enables efficient chunking
-```
-
-**Raspberry Pi optimized config:**
-```yaml
-# Automatically generated for Pi
-embedding:
-  preferred_method: ollama
-  ollama_model: nomic-embed-text  # 270MB - perfect for Pi
-
-llm:
-  synthesis_model: qwen3:0.6b     # 500MB - fast on Pi 4+
-  context_window: 4096            # Conservative memory use
-  cpu_optimized: true
-
-chunking:
-  max_size: 1500                  # Smaller chunks for efficiency
-```
-
-**Performance expectations:**
- **Pi 4 (4GB)**: Excellent performance, full AI features
- **Pi 4 (2GB)**: Good performance, text-only or small models
- **Pi 5**: Outstanding performance, handles large models
- **Pi Zero**: Text-only search (hash-based embeddings)
-
-### 🔧 **Other Edge Devices**
-
-**NVIDIA Jetson Series:**
- Overkill performance for this use case
- Can run largest models with GPU acceleration
- Perfect for AI-heavy development workstations
-
-**Intel NUC / Mini PCs:**
- Excellent performance
- Full desktop experience
- Can serve multiple users simultaneously
-
-**Orange Pi / Rock Pi:**
- Similar to Raspberry Pi
- Same installation process
- May need manual Ollama compilation
-
-## Mobile Deployment
-
-### 📱 **Android (Recommended: Termux)**
-
-**Installation in Termux:**
-```bash
-# Install Termux from F-Droid (not Play Store)
-# In Termux:
-pkg update && pkg upgrade
-pkg install python python-pip git
-pip install --upgrade pip
-
-# Clone and install FSS-Mini-RAG
-git clone https://github.com/your-repo/fss-mini-rag
-cd fss-mini-rag
-
-# Install dependencies (5-15 minutes due to compilation)
-python -m pip install -r requirements.txt  # Large downloads + ARM compilation
-python -m pip install .                    # ~1 minute
-
-# Quick start
-python -m mini_rag index /storage/emulated/0/Documents/myproject
-python -m mini_rag search /storage/emulated/0/Documents/myproject "your query"
-```
-
-**Android-optimized config:**
-```yaml
-# config-android.yaml
-embedding:
-  preferred_method: hash    # No heavy models needed
-  
-chunking:
-  max_size: 800            # Small chunks for mobile
-  
-files:
-  min_file_size: 20        # Include more small files
-  
-llm:
-  enable_synthesis: false  # Text-only for speed
-```
-
-**What works on Android:**
- ✅ Full text search and indexing
- ✅ Terminal interface (`rag-tui`)
- ✅ Project indexing from phone storage
- ✅ Search your phone's code projects
- ❌ Heavy AI models (use cloud providers instead)
-
-**Android use cases:**
- Search your mobile development projects
- Index documentation on your phone
- Quick code reference while traveling
- Offline search of downloaded repositories
-
-### 🍎 **iOS (Limited but Possible)**
-
-**Option 1: a-Shell (Free)**
-```bash
-# Install a-Shell from App Store
-# In a-Shell:
-pip install requests pathlib
-
-# Limited installation (core features only)
-# Files must be in app sandbox
-```
-
-**Option 2: iSH (Alpine Linux)**
-```bash
-# Install iSH from App Store  
-# In iSH terminal:
-apk add python3 py3-pip git
-pip install -r requirements-light.txt
-
-# Basic functionality only
-```
-
-**iOS limitations:**
- Sandbox restricts file access
- No full AI model support
- Terminal interface only
- Limited to app-accessible files
-
-## Specialized Deployment Scenarios
-
-### 🐳 **Docker Deployment**
-
-**For any platform with Docker:**
-```dockerfile
-# Dockerfile
-FROM python:3.11-slim
-
-WORKDIR /app
-COPY . .
-RUN pip install -r requirements.txt
-
-# Expose ports for server mode
-EXPOSE 7777
-
-# Default to TUI interface
-CMD ["python", "-m", "mini_rag.cli"]
-```
-
-**Usage:**
-```bash
-# Build and run
-docker build -t fss-mini-rag .
-docker run -it -v $(pwd)/projects:/projects fss-mini-rag
-
-# Server mode for web access
-docker run -p 7777:7777 fss-mini-rag python -m mini_rag server
-```
-
-### ☁️ **Cloud Deployment**
-
-**AWS/GCP/Azure VM:**
- Same as Linux installation
- Can serve multiple users
- Perfect for team environments
-
-**GitHub Codespaces:**
-```bash
-# Works in any Codespace
-./install_mini_rag.sh
-# Perfect for searching your workspace
-```
-
-**Replit/CodeSandbox:**
- Limited by platform restrictions
- Basic functionality available
-
-### 🏠 **Home Lab Integration**
-
-**Home Assistant Add-on:**
- Package as Home Assistant add-on
- Search home automation configs
- Voice integration possible
-
-**NAS Integration:**
- Install on Synology/QNAP
- Search all stored documents
- Family code documentation
-
-**Router with USB:**
- Install on OpenWrt routers with USB storage
- Search network documentation
- Configuration management
-
-## Configuration by Use Case
-
-### 🪶 **Ultra-Lightweight (Old hardware, mobile)**
-```yaml
-# Minimal resource usage
-embedding:
-  preferred_method: hash
-chunking:
-  max_size: 800
-  strategy: fixed
-llm:
-  enable_synthesis: false
-```
-
-### ⚖️ **Balanced (Raspberry Pi, older laptops)**
-```yaml
-# Good performance with AI features
-embedding:
-  preferred_method: ollama
-  ollama_model: nomic-embed-text
-llm:
-  synthesis_model: qwen3:0.6b
-  context_window: 4096
-```
-
-### 🚀 **Performance (Modern hardware)**
-```yaml
-# Full features and performance
-embedding:
-  preferred_method: ollama
-  ollama_model: nomic-embed-text
-llm:
-  synthesis_model: qwen3:1.7b
-  context_window: 16384
-  enable_thinking: true
-```
-
-### ☁️ **Cloud-Hybrid (Mobile + Cloud AI)**
-```yaml
-# Local search, cloud intelligence
-embedding:
-  preferred_method: hash
-llm:
-  provider: openai
-  api_key: your_api_key
-  synthesis_model: gpt-4
-```
-
-## Troubleshooting by Platform
-
-### **Raspberry Pi Issues**
- **Out of memory**: Reduce context window, use smaller models
- **Slow indexing**: Use hash-based embeddings
- **Model download fails**: Check internet, use smaller models
-
-### **Android/Termux Issues**  
- **Permission denied**: Use `termux-setup-storage`
- **Package install fails**: Update packages first
- **Can't access files**: Use `/storage/emulated/0/` paths
-
-### **iOS Issues**
- **Limited functionality**: Expected due to iOS restrictions
- **Can't install packages**: Use lighter requirements file
- **File access denied**: Files must be in app sandbox
-
-### **Edge Device Issues**
- **ARM compatibility**: Ensure using ARM64 Python packages
- **Limited RAM**: Use hash embeddings, reduce chunk sizes
- **No internet**: Skip AI model downloads, use text-only
-
-## Advanced Edge Deployments
-
-### **IoT Integration**
- Index sensor logs and configurations
- Search device documentation
- Troubleshoot IoT deployments
-
-### **Offline Development**
- Complete development environment on edge device
- No internet required after setup
- Perfect for remote locations
-
-### **Educational Use**
- Raspberry Pi computer labs
- Student project search
- Coding bootcamp environments
-
-### **Enterprise Edge**
- Factory floor documentation search
- Field service technical reference
- Remote site troubleshooting
-
---
-
-## Quick Start by Platform
-
-### Desktop Users
-```bash
-# Linux/macOS
-./install_mini_rag.sh
-
-# Windows  
-install_windows.bat
-```
-
-### Edge/Mobile Users
-```bash
-# Raspberry Pi
-./install_mini_rag.sh
-
-# Android (Termux) - 5-15 minutes due to ARM compilation
-pkg install python git && python -m pip install -r requirements.txt && python -m pip install .
-
-# Any Docker platform
-docker run -it fss-mini-rag
-```
-
-**💡 Pro tip**: Start with your current platform, then expand to edge devices as needed. The system scales from smartphones to servers seamlessly!
--- a/docs/DEPLOYMENT_ROADMAP.md
+++ b/docs/DEPLOYMENT_ROADMAP.md
@ -1,288 +0,0 @@
-# FSS-Mini-RAG Distribution: Production Deployment Roadmap
-
-> **Status**: Infrastructure complete, systematic testing required before production release
-
-## Executive Summary
-
-You're absolutely right that I rushed through the implementation without proper testing. We've built a comprehensive modern distribution system, but now need **systematic, thorough testing** before deployment.
-
-### 🏗️ **What We've Built (Infrastructure Complete)**
- ✅ Enhanced pyproject.toml with proper PyPI metadata
- ✅ One-line install scripts (Linux/macOS/Windows) 
- ✅ Zipapp builder for portable distribution
- ✅ GitHub Actions for automated wheel building + PyPI publishing
- ✅ Updated documentation with modern installation methods
- ✅ Comprehensive testing framework
-
-### 📊 **Current Test Results**
- **Phase 1 (Structure)**: 5/6 tests passed ✅
- **Phase 2 (Building)**: 3/5 tests passed ⚠️
- **Zipapp**: Successfully created (172.5 MB) but has numpy issues
- **Build system**: Works but needs proper environment setup
-
-## Critical Testing Gaps
-
-### 🔴 **Must Test Before Release**
-
-#### **Environment Testing**
- [ ] **Multiple Python versions** (3.8-3.12) in clean environments
- [ ] **Cross-platform testing** (Linux/macOS/Windows)
- [ ] **Dependency resolution** in various configurations
- [ ] **Virtual environment compatibility**
-
-#### **Installation Method Testing**  
- [ ] **uv tool install** - Modern fast installation
- [ ] **pipx install** - Isolated tool installation  
- [ ] **pip install --user** - Traditional user installation
- [ ] **Zipapp execution** - Single-file distribution
- [ ] **Install script testing** - One-line installers
-
-#### **Real-World Scenario Testing**
- [ ] **Fresh system installation** (following README exactly)
- [ ] **Corporate firewall scenarios** 
- [ ] **Offline installation** (with pre-downloaded packages)
- [ ] **Error recovery scenarios** (network failures, permission issues)
-
-#### **GitHub Actions Testing**
- [ ] **Local workflow testing** with `act`
- [ ] **Fork testing** with real CI environment
- [ ] **TestPyPI publishing** (safe production test)
- [ ] **Release creation** and asset uploading
-
-## Phase-by-Phase Deployment Strategy
-
-### **Phase 1: Local Environment Validation** ⏱️ 4-6 hours
-
-**Objective**: Ensure packages build and install correctly locally
-
-```bash
-# Environment setup
-docker run -it --rm -v $(pwd):/work ubuntu:22.04
-# Test in clean Ubuntu, CentOS, Alpine containers
-
-# Install script testing  
-curl -fsSL file:///work/install.sh | bash
-# Verify rag-mini command works
-rag-mini init -p /tmp/test && rag-mini search -p /tmp/test "test query"
-```
-
-**Success Criteria**: 
- Install scripts work in 3+ Linux distributions
- All installation methods (uv/pipx/pip) succeed
- Basic functionality works after installation
-
-### **Phase 2: Cross-Platform Testing** ⏱️ 6-8 hours
-
-**Objective**: Verify Windows/macOS compatibility
-
-**Testing Matrix**:
-| Platform | Python | Method | Status |
-|----------|--------|---------|--------|
-| Ubuntu 22.04 | 3.8-3.12 | uv/pipx/pip | ⏳ |
-| Windows 11 | 3.9-3.12 | PowerShell | ⏳ |  
-| macOS 13+ | 3.10-3.12 | Homebrew | ⏳ |
-| Alpine Linux | 3.11+ | pip | ⏳ |
-
-**Tools Needed**:
- GitHub Codespaces or cloud VMs
- Windows test environment
- macOS test environment (if available)
-
-### **Phase 3: CI/CD Pipeline Testing** ⏱️ 4-6 hours
-
-**Objective**: Validate automated publishing workflow
-
-```bash
-# Local GitHub Actions testing
-brew install act  # or equivalent
-act --list
-act -j build-wheels --dry-run
-act -j test-installation
-```
-
-**Fork Testing Process**:
-1. Create test fork with Actions enabled
-2. Push distribution changes to test branch
-3. Create test tag to trigger release workflow
-4. Verify wheel building across all platforms
-5. Test TestPyPI publishing
-
-### **Phase 4: TestPyPI Validation** ⏱️ 2-3 hours
-
-**Objective**: Safe production testing with TestPyPI
-
-```bash
-# Upload to TestPyPI
-python -m twine upload --repository testpypi dist/*
-
-# Test installation from TestPyPI
-pip install --index-url https://test.pypi.org/simple/ fss-mini-rag
-
-# Verify functionality
-rag-mini --version
-rag-mini init -p test_project
-```
-
-### **Phase 5: Production Release** ⏱️ 2-4 hours
-
-**Objective**: Live production deployment
-
-**Pre-Release Checklist**:
- [ ] All tests from Phases 1-4 pass
- [ ] Documentation is accurate
- [ ] Install scripts are publicly accessible
- [ ] GitHub release template is ready
- [ ] Rollback plan is prepared
-
-**Release Process**:
-1. Final validation in clean environment
-2. Create production Git tag
-3. Monitor GitHub Actions workflow
-4. Verify PyPI publication
-5. Test install scripts from live URLs
-6. Update documentation links
-
-## Testing Tools & Infrastructure
-
-### **Required Tools**
- **Docker** - Clean environment testing
- **act** - Local GitHub Actions testing
- **Multiple Python versions** (pyenv/conda)
- **Cross-platform access** (Windows/macOS VMs)
- **Network simulation** - Firewall/offline testing
-
-### **Test Environments**
-
-#### **Container-Based Testing**
-```bash
-# Ubuntu testing
-docker run -it --rm -v $(pwd):/work ubuntu:22.04
-apt update && apt install -y python3 python3-pip curl
-curl -fsSL file:///work/install.sh | bash
-
-# CentOS testing  
-docker run -it --rm -v $(pwd):/work centos:7
-yum install -y python3 python3-pip curl
-curl -fsSL file:///work/install.sh | bash
-
-# Alpine testing
-docker run -it --rm -v $(pwd):/work alpine:latest
-apk add --no-cache python3 py3-pip curl bash
-curl -fsSL file:///work/install.sh | bash
-```
-
-#### **GitHub Codespaces Testing**
- Ubuntu 22.04 environment
- Pre-installed development tools
- Network access for testing install scripts
-
-### **Automated Test Suite**
-
-We've created comprehensive test scripts:
-
-```bash
-# Current test scripts (ready to use)
-python scripts/validate_setup.py      # File structure ✅
-python scripts/phase1_basic_tests.py  # Import/structure ✅  
-python scripts/phase2_build_tests.py  # Package building ⚠️
-
-# Needed test scripts (to be created)
-python scripts/phase3_install_tests.py    # Installation methods
-python scripts/phase4_integration_tests.py # End-to-end workflows
-python scripts/phase5_performance_tests.py # Speed/size benchmarks
-```
-
-## Risk Assessment & Mitigation
-
-### **🔴 Critical Risks**
-
-#### **Zipapp Compatibility Issues**
- **Risk**: 172.5 MB zipapp with numpy C-extensions may not work across systems
- **Mitigation**: Consider PyInstaller or exclude zipapp from initial release
- **Test**: Cross-platform zipapp execution testing
-
-#### **Install Script Security**
- **Risk**: Users running scripts from internet with `curl | bash`
- **Mitigation**: Script security audit, HTTPS verification, clear error handling
- **Test**: Security review and edge case testing
-
-#### **Dependency Hell**
- **Risk**: ML dependencies (numpy, torch, etc.) causing installation failures
- **Mitigation**: Comprehensive dependency testing, clear system requirements
- **Test**: Fresh system installation in multiple environments
-
-### **🟡 Medium Risks**
-
-#### **GitHub Actions Costs**
- **Risk**: Matrix builds across platforms may consume significant CI minutes
- **Mitigation**: Optimize build matrix, use caching effectively
- **Test**: Monitor CI usage during testing phase
-
-#### **PyPI Package Size**
- **Risk**: Large package due to ML dependencies
- **Mitigation**: Consider optional dependencies, clear documentation
- **Test**: Package size optimization testing
-
-### **🟢 Low Risks**
-
- Documentation accuracy (easily fixable)
- Minor metadata issues (quick updates)
- README formatting (cosmetic fixes)
-
-## Timeline & Resource Requirements
-
-### **Realistic Timeline**
- **Phase 1-2 (Local/Cross-platform)**: 2-3 days
- **Phase 3 (CI/CD)**: 1 day  
- **Phase 4 (TestPyPI)**: 1 day
- **Phase 5 (Production)**: 1 day
- **Buffer for issues**: 2-3 days
-
-**Total: 1-2 weeks for comprehensive testing**
-
-### **Resource Requirements**
- Development time: 40-60 hours
- Testing environments: Docker, VMs, or cloud instances
- TestPyPI account setup
- PyPI production credentials
- Monitoring and rollback capabilities
-
-## Success Metrics
-
-### **Quantitative Metrics**
- **Installation success rate**: >95% across test environments
- **Installation time**: <5 minutes from script start to working command
- **Package size**: <200MB for wheels, <300MB for zipapp
- **Test coverage**: 100% of installation methods tested
-
-### **Qualitative Metrics**  
- **User experience**: Clear error messages, helpful guidance
- **Documentation quality**: Accurate, easy to follow
- **Maintainability**: Easy to update and extend
- **Professional appearance**: Consistent with modern Python tools
-
-## Next Steps (Immediate)
-
-### **This Week**
-1. **Set up Docker test environments** (2-3 hours)
-2. **Test install scripts in containers** (4-6 hours)
-3. **Fix identified issues** (varies by complexity)
-4. **Create Phase 3 test scripts** (2-3 hours)
-
-### **Next Week**  
-1. **Cross-platform testing** (8-12 hours)
-2. **GitHub Actions validation** (4-6 hours)
-3. **TestPyPI trial run** (2-3 hours)
-4. **Documentation refinement** (2-4 hours)
-
-## Conclusion
-
-We have built excellent infrastructure, but **you were absolutely right** that proper testing is essential. The distribution system we've created is professional-grade and will work beautifully—but only after systematic validation.
-
-**The testing plan is comprehensive because we're doing this right.** Modern users expect seamless installation experiences, and we're delivering exactly that.
-
-**Current Status**: Infrastructure complete ✅, comprehensive testing required ⏳  
-**Confidence Level**: High for architecture, medium for production readiness  
-**Recommendation**: Proceed with systematic testing before any production release
-
-This roadmap ensures we ship a distribution system that works flawlessly for every user, every time. 🚀
--- a/docs/DIAGRAMS.md
+++ b/docs/DIAGRAMS.md
@ -11,7 +11,6 @@
 - [Search Architecture](#search-architecture)
 - [Installation Flow](#installation-flow)
 - [Configuration System](#configuration-system)
- [System Context Integration](#system-context-integration)
 - [Error Handling](#error-handling)

 ## System Overview
@ -23,12 +22,10 @@ graph TB
    
    CLI --> Index[📁 Index Project]
    CLI --> Search[🔍 Search Project]
-    CLI --> Explore[🧠 Explore Project]
    CLI --> Status[📊 Show Status]
    
    TUI --> Index
    TUI --> Search
-    TUI --> Explore
    TUI --> Config[⚙️ Configuration]
    
    Index --> Files[📄 File Discovery]
@ -37,32 +34,17 @@ graph TB
    Embed --> Store[💾 Vector Database]
    
    Search --> Query[❓ User Query]
-    Search --> Context[🖥️ System Context]
    Query --> Vector[🎯 Vector Search]
    Query --> Keyword[🔤 Keyword Search]
    Vector --> Combine[🔄 Hybrid Results]
    Keyword --> Combine
-    Context --> Combine
-    Combine --> Synthesize{Synthesis Mode?}
-    
-    Synthesize -->|Yes| FastLLM[⚡ Fast Synthesis]
-    Synthesize -->|No| Results[📋 Ranked Results]
-    FastLLM --> Results
-    
-    Explore --> ExploreQuery[❓ Interactive Query]
-    ExploreQuery --> Memory[🧠 Conversation Memory]
-    ExploreQuery --> Context
-    Memory --> DeepLLM[🤔 Deep AI Analysis]
-    Context --> DeepLLM
-    Vector --> DeepLLM
-    DeepLLM --> Interactive[💬 Interactive Response]
+    Combine --> Results[📋 Ranked Results]
    
    Store --> LanceDB[(🗄️ LanceDB)]
    Vector --> LanceDB
    
    Config --> YAML[📝 config.yaml]
    Status --> Manifest[📋 manifest.json]
-    Context --> SystemInfo[💻 OS, Python, Paths]
 ```

 ## User Journey
@ -294,58 +276,6 @@ flowchart TD
    style Error fill:#ffcdd2
 ```

-## System Context Integration
-
-```mermaid
-graph LR
-    subgraph "System Detection"
-        OS[🖥️ Operating System]
-        Python[🐍 Python Version] 
-        Project[📁 Project Path]
-        
-        OS --> Windows[Windows: rag.bat]
-        OS --> Linux[Linux: ./rag-mini]
-        OS --> macOS[macOS: ./rag-mini]
-    end
-    
-    subgraph "Context Collection"
-        Collect[🔍 Collect Context]
-        OS --> Collect
-        Python --> Collect
-        Project --> Collect
-        
-        Collect --> Format[📝 Format Context]
-        Format --> Limit[✂️ Limit to 200 chars]
-    end
-    
-    subgraph "AI Integration"
-        UserQuery[❓ User Query] 
-        SearchResults[📋 Search Results]
-        SystemContext[💻 System Context]
-        
-        UserQuery --> Prompt[📝 Build Prompt]
-        SearchResults --> Prompt
-        SystemContext --> Prompt
-        
-        Prompt --> AI[🤖 LLM Processing]
-        AI --> Response[💬 Contextual Response]
-    end
-    
-    subgraph "Enhanced Responses"
-        Response --> Commands[💻 OS-specific commands]
-        Response --> Paths[📂 Correct path formats]
-        Response --> Tips[💡 Platform-specific tips]
-    end
-    
-    Format --> SystemContext
-    
-    style SystemContext fill:#e3f2fd
-    style Response fill:#f3e5f5
-    style Commands fill:#e8f5e8
-```
-
-*System context helps the AI provide better, platform-specific guidance without compromising privacy*
-
 ## Architecture Layers

 ```mermaid
--- a/docs/FALLBACK_SETUP.md
+++ b/docs/FALLBACK_SETUP.md
@ -2,38 +2,32 @@

 This RAG system can operate in three modes:

-## 🚀 **Mode 1: Standard Installation (Recommended)**
+## 🚀 **Mode 1: Ollama Only (Recommended - Lightweight)**
 ```bash
-python3 -m venv .venv
-.venv/bin/python -m pip install -r requirements.txt  # 2-8 minutes
-.venv/bin/python -m pip install .                    # ~1 minute
-source .venv/bin/activate
+pip install -r requirements-light.txt
+# Requires: ollama serve running with nomic-embed-text model
 ```
- **Size**: ~123MB total (LanceDB 36MB + PyArrow 43MB + PyLance 44MB)  
- **Performance**: Excellent hybrid embedding system
- **Timing**: 2-3 minutes fast internet, 5-10 minutes slow internet
+- **Size**: ~426MB total  
+- **Performance**: Fastest (leverages Ollama)
+- **Network**: Uses local Ollama server

-## 🔄 **Mode 2: Light Installation (Alternative)** 
+## 🔄 **Mode 2: Hybrid (Best of Both Worlds)** 
 ```bash
-python3 -m venv .venv
-.venv/bin/python -m pip install -r requirements-light.txt  # If available
-.venv/bin/python -m pip install .
-source .venv/bin/activate
+pip install -r requirements-full.txt  
+# Works with OR without Ollama
 ```
- **Size**: ~426MB total (includes basic dependencies only)
- **Requires**: Ollama server running locally
- **Use case**: Minimal installations, edge devices
+- **Size**: ~3GB total (includes ML fallback)
+- **Resilience**: Automatic fallback if Ollama unavailable
+- **Performance**: Ollama speed when available, ML fallback when needed

-## 🛡️ **Mode 3: Full Installation (Maximum Features)**
+## 🛡️ **Mode 3: ML Only (Maximum Compatibility)**
 ```bash
-python3 -m venv .venv
-.venv/bin/python -m pip install -r requirements-full.txt  # If available
-.venv/bin/python -m pip install .
-source .venv/bin/activate
+pip install -r requirements-full.txt
+# Disable Ollama fallback in config
 ```
- **Size**: ~3GB total (includes all ML fallbacks)
- **Compatibility**: Works anywhere, all features enabled  
- **Use case**: Offline environments, complete feature set
+- **Size**: ~3GB total
+- **Compatibility**: Works anywhere, no external dependencies
+- **Use case**: Offline environments, embedded systems

 ## 🔧 **Configuration**

--- a/docs/GETTING_STARTED.md
+++ b/docs/GETTING_STARTED.md
@ -1,332 +1,212 @@
 # Getting Started with FSS-Mini-RAG

-> **Get from zero to searching in 2 minutes**  
-> *Everything you need to know to start finding code by meaning, not just keywords*
+## Step 1: Installation

-## Installation (Choose Your Adventure)
+Choose your installation based on what you want:

-### 🎯 **Option 1: Full Installation (Recommended)**
-*Gets you everything working reliably with desktop shortcuts and AI features*
-
-**Linux/macOS:**
-```bash
-./install_mini_rag.sh
-```
-
-**Windows:**
-```cmd
-install_windows.bat
-```
-
-**What this does:**
- Sets up Python environment automatically
- Installs all dependencies 
- Downloads AI models (with your permission)
- Creates desktop shortcuts and application menu entries
- Tests everything works
- Gives you an interactive tutorial
-
-**Time needed:** 5-10 minutes (depending on AI model downloads)
-
---
-
-### 🚀 **Option 2: Copy & Try (Experimental)**
-*Just copy the folder and run - may work, may need manual setup*
-
-**Linux/macOS:**
-```bash
-# Copy folder anywhere and try running
-./rag-mini index ~/my-project
-# Auto-setup attempts to create virtual environment
-# Falls back with clear instructions if it fails
-```
-
-**Windows:**
-```cmd
-# Copy folder anywhere and try running  
-rag.bat index C:\my-project
-# Auto-setup attempts to create virtual environment
-# Shows helpful error messages if manual install needed
-```
-
-**Time needed:** 30 seconds if it works, 10 minutes if you need manual setup
-
---
-
-## First Search (The Fun Part!)
-
-### Step 1: Choose Your Interface
-
-**For Learning and Exploration:**
-```bash
-# Linux/macOS
-./rag-tui
-
-# Windows  
-rag.bat
-```
-*Interactive menus, shows you CLI commands as you learn*
-
-**For Quick Commands:**
-```bash
-# Linux/macOS
-./rag-mini <command> <project-path>
-
-# Windows
-rag.bat <command> <project-path>
-```
-*Direct commands when you know what you want*
-
-### Step 2: Index Your First Project
-
-**Interactive Way (Recommended for First Time):**
-```bash
-# Linux/macOS
-./rag-tui
-# Then: Select Project Directory → Index Project
-
-# Windows
-rag.bat  
-# Then: Select Project Directory → Index Project
-```
-
-**Direct Commands:**
-```bash
-# Linux/macOS
-./rag-mini index ~/my-project
-
-# Windows  
-rag.bat index C:\my-project
-```
-
-**What indexing does:**
- Finds all text files in your project
- Breaks them into smart "chunks" (functions, classes, logical sections)
- Creates searchable embeddings that understand meaning
- Stores everything in a fast vector database
- Creates a `.mini-rag/` directory with your search index
-
-**Time needed:** 10-60 seconds depending on project size
-
-### Step 3: Search by Meaning
-
-**Natural language queries:**
-```bash
-# Linux/macOS
-./rag-mini search ~/my-project "user authentication logic"
-./rag-mini search ~/my-project "error handling for database connections"
-./rag-mini search ~/my-project "how to validate input data"
-
-# Windows
-rag.bat search C:\my-project "user authentication logic"  
-rag.bat search C:\my-project "error handling for database connections"
-rag.bat search C:\my-project "how to validate input data"
-```
-
-**Code concepts:**
-```bash
-# Finds login functions, auth middleware, session handling
-./rag-mini search ~/my-project "login functionality"
-
-# Finds try/catch blocks, error handlers, retry logic  
-./rag-mini search ~/my-project "exception handling"
-
-# Finds validation functions, input sanitization, data checking
-./rag-mini search ~/my-project "data validation"
-```
-
-**What you get:**
- Ranked results by relevance (not just keyword matching)
- File paths and line numbers for easy navigation
- Context around each match so you understand what it does
- Smart filtering to avoid noise and duplicates
-
-## Two Powerful Modes
-
-FSS-Mini-RAG has two different ways to get answers, optimized for different needs:
-
-### 🚀 **Synthesis Mode** - Fast Answers
-```bash
-# Linux/macOS
-./rag-mini search ~/project "authentication logic" --synthesize
-
-# Windows  
-rag.bat search C:\project "authentication logic" --synthesize
-```
-
-**Perfect for:**
- Quick code discovery
- Finding specific functions or patterns
- Getting fast, consistent answers
-
-**What you get:**
- Lightning-fast responses (no thinking overhead)
- Reliable, factual information about your code
- Clear explanations of what code does and how it works
-
-### 🧠 **Exploration Mode** - Deep Understanding
-```bash  
-# Linux/macOS
-./rag-mini explore ~/project
-
-# Windows
-rag.bat explore C:\project
-```
-
-**Perfect for:**
- Learning new codebases
- Debugging complex issues  
- Understanding architectural decisions
-
-**What you get:**
- Interactive conversation with AI that remembers context
- Deep reasoning with full "thinking" process shown
- Follow-up questions and detailed explanations
- Memory of your previous questions in the session
-
-**Example exploration session:**
-```
-🧠 Exploration Mode - Ask anything about your project
-
-You: How does authentication work in this codebase?
-
-AI: Let me analyze the authentication system...
-
-💭 Thinking: I can see several authentication-related files. Let me examine 
-   the login flow, session management, and security measures...
-
-📝 Authentication Analysis:
-   This codebase uses a three-layer authentication system:
-   1. Login validation in auth.py handles username/password checking
-   2. Session management in sessions.py maintains user state  
-   3. Middleware in auth_middleware.py protects routes
-
-You: What security concerns should I be aware of?
-
-AI: Based on our previous discussion about authentication, let me check for
-   common security vulnerabilities...
-```
-
-## Check Your Setup
-
-**See what got indexed:**
-```bash
-# Linux/macOS  
-./rag-mini status ~/my-project
-
-# Windows
-rag.bat status C:\my-project
-```
-
-**What you'll see:**
- How many files were processed
- Total chunks created for searching
- Embedding method being used (Ollama, ML models, or hash-based)
- Configuration file location
- Index health and last update time
-
-## Configuration (Optional)
-
-Your project gets a `.mini-rag/config.yaml` file with helpful comments:
-
-```yaml
-# Context window configuration (critical for AI features)
-# 💡 Sizing guide: 2K=1 question, 4K=1-2 questions, 8K=manageable, 16K=most users
-#               32K=large codebases, 64K+=power users only  
-# ⚠️  Larger contexts use exponentially more CPU/memory - only increase if needed
-context_window: 16384           # Context size in tokens
-
-# AI model preferences (edit to change priority)
-model_rankings:
-  - "qwen3:1.7b"    # Excellent for RAG (1.4GB, recommended)
-  - "qwen3:0.6b"    # Lightweight and fast (~500MB)  
-  - "qwen3:4b"      # Higher quality but slower (~2.5GB)
-```
-
-**When to customize:**
- Your searches aren't finding what you expect → adjust chunking settings
- You want AI features → install Ollama and download models
- System is slow → try smaller models or reduce context window
- Getting too many/few results → adjust similarity threshold
-
-## Troubleshooting
-
-### "Project not indexed" 
-**Problem:** You're trying to search before indexing
-```bash
-# Run indexing first
-./rag-mini index ~/my-project    # Linux/macOS
-rag.bat index C:\my-project      # Windows
-```
-
-### "No Ollama models available"
-**Problem:** AI features need models downloaded
+### Option A: Ollama Only (Recommended)
 ```bash
 # Install Ollama first
-curl -fsSL https://ollama.ai/install.sh | sh    # Linux/macOS
-# Or download from https://ollama.com            # Windows
+curl -fsSL https://ollama.ai/install.sh | sh

-# Start Ollama server
-ollama serve
+# Pull the embedding model  
+ollama pull nomic-embed-text

-# Download a model
-ollama pull qwen3:1.7b
+# Install Python dependencies
+pip install -r requirements.txt
 ```

-### "Virtual environment not found" 
-**Problem:** Auto-setup didn't work, need manual installation
-
-**Option A: Use installer scripts**
-```bash
-./install_mini_rag.sh          # Linux/macOS  
-install_windows.bat            # Windows
+### Option B: Full ML Stack
+```bash  
+# Install everything including PyTorch
+pip install -r requirements-full.txt
 ```

-**Option B: Manual method (100% reliable)**
+## Step 2: Test Installation
+
 ```bash
-# Linux/macOS
-python3 -m venv .venv
-.venv/bin/python -m pip install -r requirements.txt  # 2-8 minutes
-.venv/bin/python -m pip install .                    # ~1 minute  
-source .venv/bin/activate
+# Index this RAG system itself
+./rag-mini index ~/my-project

-# Windows  
-python -m venv .venv
-.venv\Scripts\python -m pip install -r requirements.txt  
-.venv\Scripts\python -m pip install .
-.venv\Scripts\activate.bat
-```
+# Search for something 
+./rag-mini search ~/my-project "chunker function"

-> **⏱️ Timing**: Fast internet 2-3 minutes total, slow internet 5-10 minutes due to large dependencies (LanceDB 36MB, PyArrow 43MB, PyLance 44MB).
-
-### Getting weird results
-**Solution:** Try different search terms or check what got indexed
-```bash
-# See what files were processed
+# Check what got indexed
 ./rag-mini status ~/my-project
-
-# Try more specific queries
-./rag-mini search ~/my-project "specific function name"
 ```

-## Next Steps
+## Step 3: Index Your First Project

-### Learn More
- **[Beginner's Glossary](BEGINNER_GLOSSARY.md)** - All the terms explained simply
- **[TUI Guide](TUI_GUIDE.md)** - Master the interactive interface
- **[Visual Diagrams](DIAGRAMS.md)** - See how everything works
+```bash
+# Index any project directory
+./rag-mini index /path/to/your/project

-### Advanced Features
- **[Query Expansion](QUERY_EXPANSION.md)** - Make searches smarter with AI
- **[LLM Providers](LLM_PROVIDERS.md)** - Use different AI models  
- **[CPU Deployment](CPU_DEPLOYMENT.md)** - Optimize for older computers
+# The system creates .mini-rag/ directory with:
+# - config.json (settings)
+# - manifest.json (file tracking)  
+# - database.lance/ (vector database)
+```

-### Customize Everything
- **[Technical Guide](TECHNICAL_GUIDE.md)** - How the system actually works
- **[Configuration Examples](../examples/)** - Pre-made configs for different needs
+## Step 4: Search Your Code

---
+```bash
+# Basic semantic search
+./rag-mini search /path/to/project "user login logic"

-**🎉 That's it!** You now have a semantic search system that understands your code by meaning, not just keywords. Start with simple searches and work your way up to the advanced AI features as you get comfortable.
+# Enhanced search with smart features  
+./rag-mini-enhanced search /path/to/project "authentication"

-**💡 Pro tip:** The best way to learn is to index a project you know well and try searching for things you know are in there. You'll quickly see how much better meaning-based search is than traditional keyword search.
+# Find similar patterns
+./rag-mini-enhanced similar /path/to/project "def validate_input"
+```
+
+## Step 5: Customize Configuration
+
+Edit `project/.mini-rag/config.json`:
+
+```json
+{
+  "chunking": {
+    "max_size": 3000,
+    "strategy": "semantic"  
+  },
+  "files": {
+    "min_file_size": 100
+  }
+}
+```
+
+Then re-index to apply changes:
+```bash
+./rag-mini index /path/to/project --force
+```
+
+## Common Use Cases
+
+### Find Functions by Name
+```bash
+./rag-mini search /project "function named connect_to_database" 
+```
+
+### Find Code Patterns  
+```bash
+./rag-mini search /project "error handling try catch"
+./rag-mini search /project "database query with parameters"
+```
+
+### Find Configuration
+```bash  
+./rag-mini search /project "database connection settings"
+./rag-mini search /project "environment variables"
+```
+
+### Find Documentation
+```bash
+./rag-mini search /project "how to deploy" 
+./rag-mini search /project "API documentation"
+```
+
+## Python API Usage
+
+```python
+from mini_rag import ProjectIndexer, CodeSearcher, CodeEmbedder
+from pathlib import Path
+
+# Initialize
+project_path = Path("/path/to/your/project")
+embedder = CodeEmbedder()
+indexer = ProjectIndexer(project_path, embedder)
+searcher = CodeSearcher(project_path, embedder)
+
+# Index the project
+print("Indexing project...")
+result = indexer.index_project()
+print(f"Indexed {result['files_processed']} files, {result['chunks_created']} chunks")
+
+# Search
+print("\nSearching for authentication code...")
+results = searcher.search("user authentication logic", limit=5)
+
+for i, result in enumerate(results, 1):
+    print(f"\n{i}. {result.file_path}")
+    print(f"   Score: {result.score:.3f}")
+    print(f"   Type: {result.chunk_type}")
+    print(f"   Content: {result.content[:100]}...")
+```
+
+## Advanced Features
+
+### Auto-optimization
+```bash
+# Get optimization suggestions
+./rag-mini-enhanced analyze /path/to/project
+
+# This analyzes your codebase and suggests:
+# - Better chunk sizes for your language mix
+# - Streaming settings for large files
+# - File filtering optimizations
+```
+
+### File Watching
+```python  
+from mini_rag import FileWatcher
+
+# Watch for file changes and auto-update index
+watcher = FileWatcher(project_path, indexer)
+watcher.start_watching()
+
+# Now any file changes automatically update the index
+```
+
+### Custom Chunking
+```python
+from mini_rag import CodeChunker
+
+chunker = CodeChunker()
+
+# Chunk a Python file
+with open("example.py") as f:
+    content = f.read()
+
+chunks = chunker.chunk_text(content, "python", "example.py")
+for chunk in chunks:
+    print(f"Type: {chunk.chunk_type}")
+    print(f"Content: {chunk.content}")
+```
+
+## Tips and Best Practices
+
+### For Better Search Results
+- Use descriptive phrases: "function that validates email addresses" 
+- Try different phrasings if first search doesn't work
+- Search for concepts, not just exact variable names
+
+### For Better Indexing
+- Exclude build directories: `node_modules/`, `build/`, `dist/`
+- Include documentation files - they often contain valuable context
+- Use semantic chunking strategy for most projects
+
+### For Configuration  
+- Start with default settings
+- Use `analyze` command to get optimization suggestions
+- Increase chunk size for larger functions/classes
+- Decrease chunk size for more granular search
+
+### For Troubleshooting
+- Check `./rag-mini status` to see what was indexed
+- Look at `.mini-rag/manifest.json` for file details
+- Run with `--force` to completely rebuild index
+- Check logs in `.mini-rag/` directory for errors
+
+## What's Next?
+
+1. Try the test suite to understand how components work:
+   ```bash
+   python -m pytest tests/ -v
+   ```
+
+2. Look at the examples in `examples/` directory
+
+3. Read the main README.md for complete technical details
+
+4. Customize the system for your specific project needs
--- a/docs/LLM_PROVIDERS.md
+++ b/docs/LLM_PROVIDERS.md
@ -1,264 +0,0 @@
-# 🤖 LLM Provider Setup Guide
-
-This guide shows how to configure FSS-Mini-RAG with different LLM providers for synthesis and query expansion features.
-
-## 🎯 Quick Provider Comparison
-
-| Provider | Cost | Setup Difficulty | Quality | Privacy | Internet Required |
-|----------|------|------------------|---------|---------|-------------------|
-| **Ollama** | Free | Easy | Good | Excellent | No |
-| **LM Studio** | Free | Easy | Good | Excellent | No |
-| **OpenRouter** | Low ($0.10-0.50/M) | Medium | Excellent | Fair | Yes |
-| **OpenAI** | Medium ($0.15-2.50/M) | Medium | Excellent | Fair | Yes |
-| **Anthropic** | Medium-High | Medium | Excellent | Fair | Yes |
-
-## 🏠 Local Providers (Recommended for Beginners)
-
-### Ollama (Default)
-
-**Best for:** Privacy, learning, no ongoing costs
-
-```yaml
-llm:
-  provider: ollama
-  ollama_host: localhost:11434
-  synthesis_model: qwen3:1.7b
-  expansion_model: qwen3:1.7b
-  enable_synthesis: false
-  synthesis_temperature: 0.3
-  cpu_optimized: true
-  enable_thinking: true
-```
-
-**Setup:**
-1. Install Ollama: `curl -fsSL https://ollama.ai/install.sh | sh`
-2. Start service: `ollama serve`
-3. Download model: `ollama pull qwen3:1.7b`
-4. Test: `./rag-mini search /path/to/project "test" --synthesize`
-
-**Recommended Models:**
- `qwen3:0.6b` - Ultra-fast, good for CPU-only systems
- `qwen3:1.7b` - Balanced quality and speed (recommended)
- `qwen3:4b` - Higher quality, excellent for most use cases
-
-### LM Studio
-
-**Best for:** GUI users, model experimentation
-
-```yaml
-llm:
-  provider: openai
-  api_base: http://localhost:1234/v1
-  api_key: "not-needed"
-  synthesis_model: "any"
-  expansion_model: "any"
-  enable_synthesis: false
-  synthesis_temperature: 0.3
-```
-
-**Setup:**
-1. Download [LM Studio](https://lmstudio.ai)
-2. Install any model from the catalog
-3. Start local server (default port 1234)
-4. Use config above
-
-## ☁️ Cloud Providers (For Advanced Users)
-
-### OpenRouter (Best Value)
-
-**Best for:** Access to many models, reasonable pricing
-
-```yaml
-llm:
-  provider: openai
-  api_base: https://openrouter.ai/api/v1
-  api_key: "your-api-key-here"
-  synthesis_model: "meta-llama/llama-3.1-8b-instruct:free"
-  expansion_model: "meta-llama/llama-3.1-8b-instruct:free"
-  enable_synthesis: false
-  synthesis_temperature: 0.3
-  timeout: 30
-```
-
-**Setup:**
-1. Sign up at [openrouter.ai](https://openrouter.ai)
-2. Create API key in dashboard
-3. Add $5-10 credits (goes far with efficient models)
-4. Replace `your-api-key-here` with actual key
-
-**Budget Models:**
- `meta-llama/llama-3.1-8b-instruct:free` - Free tier
- `openai/gpt-4o-mini` - $0.15 per million tokens
- `anthropic/claude-3-haiku` - $0.25 per million tokens
-
-### OpenAI (Premium Quality)
-
-**Best for:** Reliability, advanced features
-
-```yaml
-llm:
-  provider: openai
-  api_key: "your-openai-api-key"
-  synthesis_model: "gpt-4o-mini"
-  expansion_model: "gpt-4o-mini"
-  enable_synthesis: false
-  synthesis_temperature: 0.3
-  timeout: 30
-```
-
-**Setup:**
-1. Sign up at [platform.openai.com](https://platform.openai.com)
-2. Add payment method
-3. Create API key
-4. Start with `gpt-4o-mini` for cost efficiency
-
-### Anthropic Claude (Code Expert)
-
-**Best for:** Code analysis, thoughtful responses
-
-```yaml
-llm:
-  provider: anthropic
-  api_key: "your-anthropic-api-key"
-  synthesis_model: "claude-3-haiku-20240307"
-  expansion_model: "claude-3-haiku-20240307"
-  enable_synthesis: false
-  synthesis_temperature: 0.3
-  timeout: 30
-```
-
-**Setup:**
-1. Sign up at [console.anthropic.com](https://console.anthropic.com)
-2. Add credits to account
-3. Create API key
-4. Start with Claude Haiku for budget-friendly option
-
-## 🧪 Testing Your Setup
-
-### 1. Basic Functionality Test
-```bash
-# Test without LLM (should always work)
-./rag-mini search /path/to/project "authentication"
-```
-
-### 2. Synthesis Test
-```bash
-# Test LLM integration
-./rag-mini search /path/to/project "authentication" --synthesize
-```
-
-### 3. Interactive Test
-```bash
-# Test exploration mode
-./rag-mini explore /path/to/project
-# Then ask: "How does authentication work in this codebase?"
-```
-
-### 4. Query Expansion Test
-Enable `expand_queries: true` in config, then:
-```bash
-./rag-mini search /path/to/project "auth"
-# Should automatically expand to "auth authentication login user session"
-```
-
-## 🛠️ Configuration Tips
-
-### For Budget-Conscious Users
-```yaml
-llm:
-  synthesis_model: "gpt-4o-mini"  # or claude-haiku
-  enable_synthesis: false         # Manual control
-  synthesis_temperature: 0.1     # Factual responses
-  max_expansion_terms: 4          # Shorter expansions
-```
-
-### For Quality-Focused Users
-```yaml
-llm:
-  synthesis_model: "gpt-4o"       # or claude-sonnet
-  enable_synthesis: true          # Always on
-  synthesis_temperature: 0.3     # Balanced creativity
-  enable_thinking: true           # Show reasoning
-  max_expansion_terms: 8          # Comprehensive expansion
-```
-
-### For Privacy-Focused Users
-```yaml
-# Use only local providers
-embedding:
-  preferred_method: ollama        # Local embeddings
-llm:
-  provider: ollama               # Local LLM
-  # Never use cloud providers
-```
-
-## 🔧 Troubleshooting
-
-### Connection Issues
- **Local:** Ensure Ollama/LM Studio is running: `ps aux | grep ollama`
- **Cloud:** Check API key and internet: `curl -H "Authorization: Bearer $API_KEY" https://api.openai.com/v1/models`
-
-### Model Not Found
- **Ollama:** `ollama pull model-name`
- **Cloud:** Check provider's model list documentation
-
-### High Costs
- Use mini/haiku models instead of full versions
- Set `enable_synthesis: false` and use `--synthesize` selectively
- Reduce `max_expansion_terms` to 4-6
-
-### Poor Quality
- Try higher-tier models (gpt-4o, claude-sonnet)
- Adjust `synthesis_temperature` (0.1 = factual, 0.5 = creative)
- Enable `expand_queries` for better search coverage
-
-### Slow Responses
- **Local:** Try smaller models (qwen3:0.6b)
- **Cloud:** Increase `timeout` or switch providers
- **General:** Reduce `max_size` in chunking config
-
-## 📋 Environment Variables (Alternative Setup)
-
-Instead of putting API keys in config files, use environment variables:
-
-```bash
-# In your shell profile (.bashrc, .zshrc, etc.)
-export OPENAI_API_KEY="your-openai-key"
-export ANTHROPIC_API_KEY="your-anthropic-key"
-export OPENROUTER_API_KEY="your-openrouter-key"
-```
-
-Then in config:
-```yaml
-llm:
-  api_key: "${OPENAI_API_KEY}"  # Reads from environment
-```
-
-## 🚀 Advanced: Multi-Provider Setup
-
-You can create different configs for different use cases:
-
-```bash
-# Fast local analysis
-cp examples/config-beginner.yaml .mini-rag/config-local.yaml
-
-# High-quality cloud analysis  
-cp examples/config-llm-providers.yaml .mini-rag/config-cloud.yaml
-# Edit to use OpenAI/Claude
-
-# Switch configs as needed
-ln -sf config-local.yaml .mini-rag/config.yaml   # Use local
-ln -sf config-cloud.yaml .mini-rag/config.yaml   # Use cloud
-```
-
-## 📚 Further Reading
-
- [Ollama Model Library](https://ollama.ai/library)
- [OpenRouter Pricing](https://openrouter.ai/docs#models)
- [OpenAI API Documentation](https://platform.openai.com/docs)
- [Anthropic Claude Documentation](https://docs.anthropic.com/claude)
- [LM Studio Getting Started](https://lmstudio.ai/docs)
-
---
-
-💡 **Pro Tip:** Start with local Ollama for learning, then upgrade to cloud providers when you need production-quality analysis or are working with large codebases.
--- a/docs/PYPI_PUBLICATION_GUIDE.md
+++ b/docs/PYPI_PUBLICATION_GUIDE.md
@ -1,215 +0,0 @@
-# FSS-Mini-RAG PyPI Publication Guide
-
-## 🚀 **Status: READY FOR PRODUCTION**
-
-Your FSS-Mini-RAG project is **professionally configured** and follows all official Python packaging best practices. This guide will get you published on PyPI in minutes.
-
-## ✅ **Pre-Publication Checklist**
-
-### **Already Complete** ✅
- [x] **pyproject.toml** configured with complete PyPI metadata
- [x] **GitHub Actions CI/CD** with automated wheel building
- [x] **Cross-platform testing** (Ubuntu/Windows/macOS)
- [x] **Professional release workflow** with assets
- [x] **Security best practices** (release environment protection)
-
-### **Required Setup** (5 minutes)
- [ ] **PyPI API Token** - Set up in GitHub Secrets
- [ ] **Test Publication** - Verify with test tag
- [ ] **Production Release** - Create official version
-
---
-
-## 🔐 **Step 1: PyPI API Token Setup**
-
-### **Create PyPI Account & Token**
-1. **Sign up**: https://pypi.org/account/register/
-2. **Generate API Token**:
-   - Go to PyPI.org → Account Settings → API Tokens
-   - Click "Add API token"
-   - **Token name**: `fss-mini-rag-github-actions`
-   - **Scope**: `Entire account` (or specific to project after first upload)
-   - **Copy the token** (starts with `pypi-...`)
-
-### **Add Token to GitHub Secrets**
-1. **Navigate**: GitHub repo → Settings → Secrets and variables → Actions
-2. **New secret**: Click "New repository secret"
-3. **Name**: `PYPI_API_TOKEN`
-4. **Value**: Paste your PyPI token
-5. **Add secret**
-
---
-
-## 🧪 **Step 2: Test Publication**
-
-### **Create Test Release**
-```bash
-# Create test tag
-git tag v2.1.0-test
-git push origin v2.1.0-test
-```
-
-### **Monitor Workflow**
-1. **GitHub Actions**: Go to Actions tab in your repo
-2. **Watch "Build and Release"** workflow execution
-3. **Expected duration**: ~45-60 minutes
-4. **Check each job**: build-wheels, test-installation, publish, create-release
-
-### **Verify Test Results**
- ✅ **PyPI Upload**: Check https://pypi.org/project/fss-mini-rag/
- ✅ **GitHub Release**: Verify assets created
- ✅ **Installation Test**: `pip install fss-mini-rag==2.1.0-test`
-
---
-
-## 🎉 **Step 3: Official Release**
-
-### **Version Update** (if needed)
-```bash
-# Update version in pyproject.toml if desired
-version = "2.1.0"  # Remove -test suffix
-```
-
-### **Create Production Release**
-```bash
-# Official release tag
-git tag v2.1.0
-git push origin v2.1.0
-```
-
-### **Automated Results**
-Your GitHub Actions will automatically:
-1. **Build**: Cross-platform wheels + source distribution
-2. **Test**: Installation validation across platforms
-3. **Publish**: Upload to PyPI
-4. **Release**: Create GitHub release with installers
-
---
-
-## 📦 **Your Distribution Ecosystem**
-
-### **PyPI Package**: `fss-mini-rag`
-```bash
-# Standard pip installation
-pip install fss-mini-rag
-
-# With pipx (isolated)
-pipx install fss-mini-rag
-
-# With uv (fastest)
-uv tool install fss-mini-rag
-```
-
-### **One-Line Installers**
-```bash
-# Linux/macOS
-curl -fsSL https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.sh | bash
-
-# Windows PowerShell
-iwr https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.ps1 -UseBasicParsing | iex
-```
-
-### **Portable Distribution**
- **Single file**: `rag-mini.pyz` (no Python knowledge needed)
- **Cross-platform**: Works on any system with Python 3.8+
-
---
-
-## 🔍 **Monitoring & Maintenance**
-
-### **PyPI Analytics**
- **Downloads**: View on your PyPI project page
- **Version adoption**: Track which versions users prefer
- **Platform distribution**: See OS/Python version usage
-
-### **Release Management**
-```bash
-# Future releases (automated)
-git tag v2.2.0
-git push origin v2.2.0
-# → Automatic PyPI publishing + GitHub release
-```
-
-### **Issue Management**
-Your professional setup provides:
- **Professional README** with clear installation instructions
- **GitHub Issues** for user support
- **Multiple installation paths** for different user types
- **Comprehensive testing** reducing support burden
-
---
-
-## 🎯 **Success Metrics**
-
-### **Technical Excellence Achieved**
- ✅ **100% Official Compliance**: Follows packaging.python.org standards exactly
- ✅ **Professional CI/CD**: Automated quality gates
- ✅ **Cross-Platform**: Windows/macOS/Linux support
- ✅ **Multiple Python Versions**: 3.8, 3.9, 3.10, 3.11, 3.12
- ✅ **Security Best Practices**: Environment protection, secret management
-
-### **User Experience Excellence** 
- ✅ **One-Line Installation**: Zero-friction for users
- ✅ **Smart Fallbacks**: uv → pipx → pip automatically
- ✅ **No-Python-Knowledge Option**: Single .pyz file
- ✅ **Professional Documentation**: Clear getting started guide
-
---
-
-## 🚨 **Troubleshooting**
-
-### **Common Issues**
-```bash
-# If workflow fails
-gh run list --limit 5                    # Check recent runs
-gh run view [run-id] --log-failed        # View failed job logs
-
-# If PyPI upload fails
-# → Check PYPI_API_TOKEN is correct
-# → Verify token has appropriate scope
-# → Ensure package name isn't already taken
-
-# If tests fail
-# → Check test-installation job logs
-# → Verify wheel builds correctly
-# → Check Python version compatibility
-```
-
-### **Support Channels**
- **GitHub Issues**: For FSS-Mini-RAG specific problems
- **PyPI Support**: https://pypi.org/help/
- **Python Packaging**: https://packaging.python.org/
-
---
-
-## 🎊 **Congratulations!**
-
-You've built a **professional-grade Python package** that follows all industry standards:
-
- **Modern Architecture**: pyproject.toml, automated CI/CD
- **Universal Compatibility**: Works on every major platform  
- **User-Friendly**: Multiple installation methods for different skill levels
- **Maintainable**: Automated releases, comprehensive testing
-
-**FSS-Mini-RAG is ready to serve the Python community!** 🚀
-
---
-
-## 📋 **Quick Reference Commands**
-
-```bash
-# Test release
-git tag v2.1.0-test && git push origin v2.1.0-test
-
-# Production release  
-git tag v2.1.0 && git push origin v2.1.0
-
-# Monitor workflow
-gh run list --limit 3
-
-# Test installation
-pip install fss-mini-rag
-rag-mini --help
-```
-
-**Next**: Create reusable templates for your future tools! 🛠️
--- a/docs/PYTHON_PACKAGING_BEST_PRACTICES.md
+++ b/docs/PYTHON_PACKAGING_BEST_PRACTICES.md
@ -1,323 +0,0 @@
-# Python Packaging Best Practices Guide
-
-## 🎯 **Official Standards Compliance**
-
-This guide follows the official Python packaging flow from [packaging.python.org](https://packaging.python.org/en/latest/flow/) and incorporates industry best practices for professional software distribution.
-
-## 📋 **The Complete Packaging Workflow**
-
-### **1. Source Tree Organization**
-```
-your-project/
-├── src/your_package/          # Source code
-│   ├── __init__.py
-│   └── cli.py                 # Entry point
-├── tests/                     # Test suite
-├── scripts/                   # Build scripts
-├── .github/workflows/         # CI/CD
-├── pyproject.toml            # Package configuration
-├── README.md                 # Documentation
-├── LICENSE                   # License file
-├── install.sh               # One-line installer (Unix)
-└── install.ps1             # One-line installer (Windows)
-```
-
-### **2. Configuration Standards**
-
-#### **pyproject.toml - The Modern Standard**
-```toml
-[build-system]
-requires = ["setuptools", "wheel"]
-build-backend = "setuptools.build_meta"
-
-[project]
-name = "your-package-name"
-version = "1.0.0"
-description = "Clear, concise description"
-authors = [{name = "Your Name", email = "email@example.com"}]
-readme = "README.md"
-license = {text = "MIT"}
-requires-python = ">=3.8"
-keywords = ["relevant", "keywords"]
-classifiers = [
-    "Development Status :: 4 - Beta",
-    "Intended Audience :: Developers",
-    "License :: OSI Approved :: MIT License",
-    "Programming Language :: Python :: 3",
-    # ... version classifiers
-]
-
-[project.urls]
-Homepage = "https://github.com/username/repo"
-Repository = "https://github.com/username/repo"
-Issues = "https://github.com/username/repo/issues"
-
-[project.scripts]
-your-cli = "your_package.cli:main"
-```
-
-### **3. Build Artifact Strategy**
-
-#### **Source Distribution (sdist)**
- Contains complete source code
- Includes tests, documentation, scripts
- Built with: `python -m build --sdist`
- Required for PyPI uploads
-
-#### **Wheel Distributions**
- Pre-built, optimized for installation
- Platform-specific when needed
- Built with: `cibuildwheel` for cross-platform
- Much faster installation than sdist
-
-#### **Zipapp Distributions (.pyz)**
- Single executable file
- No pip/package manager needed
- Perfect for users without Python knowledge
- Built with: `zipapp` module
-
-### **4. Cross-Platform Excellence**
-
-#### **Operating System Matrix**
- **Ubuntu latest** (Linux representation)
- **Windows latest** (broad Windows compatibility)
- **macOS 13** (Intel Macs)
- **macOS 14** (Apple Silicon)
-
-#### **Python Version Strategy**
- **Minimum**: 3.8 (broad compatibility)
- **Testing focus**: 3.8, 3.11, 3.12
- **Latest features**: Use 3.11+ capabilities when beneficial
-
-#### **Architecture Coverage**
- **Linux**: x86_64 (most common)
- **Windows**: AMD64 (64-bit standard)
- **macOS**: x86_64 + ARM64 (Intel + Apple Silicon)
-
-## 🚀 **Installation Experience Design**
-
-### **Multi-Method Installation Strategy**
-
-#### **1. One-Line Installers (Recommended)**
-**Principle**: "Install without thinking"
-```bash
-# Linux/macOS
-curl -fsSL https://your-domain/install.sh | bash
-
-# Windows
-iwr https://your-domain/install.ps1 -UseBasicParsing | iex
-```
-
-**Smart Fallback Chain**: uv → pipx → pip
- **uv**: Fastest modern package manager
- **pipx**: Isolated environments, prevents conflicts
- **pip**: Universal fallback, always available
-
-#### **2. Manual Methods**
-```bash
-# Modern package managers
-uv tool install your-package
-pipx install your-package
-
-# Traditional
-pip install your-package
-
-# Direct from source
-pip install git+https://github.com/user/repo
-```
-
-#### **3. No-Python-Knowledge Option**
- Download `your-tool.pyz`
- Run with: `python your-tool.pyz`
- Works with any Python 3.8+ installation
-
-### **Installation Experience Principles**
-1. **Progressive Enhancement**: Start with simplest method
-2. **Intelligent Fallbacks**: Always provide alternatives
-3. **Clear Error Messages**: Guide users to solutions
-4. **Path Management**: Handle PATH issues automatically
-5. **Verification**: Test installation immediately
-
-## 🔄 **CI/CD Pipeline Excellence**
-
-### **Workflow Job Architecture**
-```yaml
-Jobs Workflow:
-1. build-wheels     → Cross-platform wheel building
-2. build-zipapp     → Single-file distribution
-3. test-installation → Validation across environments
-4. publish          → PyPI upload (tags only)
-5. create-release   → GitHub release with assets
-```
-
-### **Quality Gates**
- **Build Verification**: All wheels must build successfully
- **Cross-Platform Testing**: Installation test on Windows/macOS/Linux
- **Functionality Testing**: CLI commands must work
- **Security Scanning**: Dependency and secret scanning
- **Release Gating**: Manual approval for production releases
-
-### **Automation Triggers**
-```yaml
-Triggers:
- push.tags.v*        → Full release pipeline
- push.branches.main  → Build and test only
- pull_request        → Quality verification
- workflow_dispatch   → Manual testing
-```
-
-## 🔐 **Security Best Practices**
-
-### **Secret Management**
- **PyPI API Token**: Stored in GitHub Secrets
- **Scope Limitation**: Project-specific tokens when possible
- **Environment Protection**: Release environment requires approval
- **Token Rotation**: Regular token updates
-
-### **Supply Chain Security**
- **Dependency Scanning**: Automated vulnerability checks
- **Signed Releases**: GPG signing for sensitive projects
- **Audit Trails**: Complete build artifact provenance
- **Reproducible Builds**: Consistent build environments
-
-### **Code Security**
- **No Secrets in Code**: Environment variables only
- **Input Validation**: Sanitize all user inputs
- **Dependency Pinning**: Lock file for reproducible builds
-
-## 📊 **PyPI Publication Strategy**
-
-### **Pre-Publication Checklist**
- [ ] **Package Name**: Available on PyPI, follows naming conventions
- [ ] **Version Strategy**: Semantic versioning (MAJOR.MINOR.PATCH)
- [ ] **Metadata Complete**: Description, keywords, classifiers
- [ ] **License Clear**: License file and pyproject.toml match
- [ ] **README Professional**: Clear installation and usage
- [ ] **API Token**: PyPI token configured in GitHub Secrets
-
-### **Release Process**
-```bash
-# Development releases
-git tag v1.0.0-alpha1
-git tag v1.0.0-beta1
-git tag v1.0.0-rc1
-
-# Production releases
-git tag v1.0.0
-git push origin v1.0.0  # Triggers automated publishing
-```
-
-### **Version Management**
- **Development**: 1.0.0-dev, 1.0.0-alpha1, 1.0.0-beta1
- **Release Candidates**: 1.0.0-rc1, 1.0.0-rc2
- **Stable**: 1.0.0, 1.0.1, 1.1.0, 2.0.0
- **Hotfixes**: 1.0.1, 1.0.2
-
-## 🎯 **User Experience Excellence**
-
-### **Documentation Hierarchy**
-1. **README Quick Start**: Get running in 30 seconds
-2. **Installation Guide**: Multiple methods, troubleshooting
-3. **User Manual**: Complete feature documentation
-4. **API Reference**: For library use
-5. **Contributing Guide**: For developers
-
-### **Error Handling Philosophy**
- **Graceful Degradation**: Fallback when features unavailable
- **Actionable Messages**: Tell users exactly what to do
- **Context Preservation**: Show what was being attempted
- **Recovery Guidance**: Suggest next steps
-
-### **Performance Considerations**
- **Fast Startup**: Minimize import time
- **Efficient Dependencies**: Avoid heavy packages
- **Progressive Loading**: Load features on demand
- **Resource Management**: Clean up properly
-
-## 📈 **Maintenance and Evolution**
-
-### **Monitoring Success**
- **PyPI Download Statistics**: Track adoption
- **GitHub Analytics**: Issue trends, popular features
- **User Feedback**: GitHub Issues, discussions
- **Platform Distribution**: OS/Python version usage
-
-### **Version Lifecycle**
- **Feature Development**: Alpha/beta releases
- **Stability Period**: Release candidates
- **Production**: Stable releases with hotfixes
- **Deprecation**: Clear migration paths
-
-### **Dependency Management**
- **Regular Updates**: Security patches, feature updates
- **Compatibility Testing**: Ensure new versions work
- **Breaking Change Management**: Major version bumps
- **End-of-Life Planning**: Python version sunsetting
-
-## 🏆 **Success Metrics**
-
-### **Technical Excellence**
- **Build Success Rate**: >99% automated builds
- **Cross-Platform Coverage**: Windows/macOS/Linux working
- **Installation Success**: All methods work reliably
- **Performance**: Fast downloads, quick startup
-
-### **User Adoption**
- **Download Growth**: Increasing PyPI downloads
- **Platform Diversity**: Usage across different OS
- **Issue Resolution**: Fast response to problems
- **Community Engagement**: Contributors, discussions
-
-### **Developer Experience**
- **Release Automation**: Zero-manual-step releases
- **Quality Gates**: Catches problems before release
- **Documentation Currency**: Always up-to-date
- **Contributor Onboarding**: Easy to contribute
-
-## 🚨 **Common Pitfalls to Avoid**
-
-### **Configuration Issues**
- ❌ **Incorrect entry points** - CLI commands don't work
- ❌ **Missing dependencies** - ImportError at runtime
- ❌ **Wrong Python versions** - Compatibility problems
- ❌ **Bad package names** - Conflicts with existing packages
-
-### **Distribution Problems**
- ❌ **Missing wheels** - Slow pip installations
- ❌ **Platform-specific bugs** - Works on dev machine only
- ❌ **Large package size** - Unnecessary dependencies included
- ❌ **Broken PATH handling** - Commands not found after install
-
-### **Security Vulnerabilities**
- ❌ **Secrets in code** - API keys committed to repository
- ❌ **Unsafe dependencies** - Vulnerable packages included
- ❌ **Overly broad tokens** - PyPI tokens with excessive permissions
- ❌ **No input validation** - Code injection vulnerabilities
-
-## ✅ **Final Checklist**
-
-### **Before First Release**
- [ ] All installation methods tested on each platform
- [ ] README includes clear installation instructions
- [ ] PyPI API token configured with proper permissions
- [ ] GitHub Actions workflow runs successfully
- [ ] CLI commands work after installation
- [ ] Error messages are helpful and actionable
-
-### **For Each Release**
- [ ] Version number updated in pyproject.toml
- [ ] Changelog updated with changes
- [ ] All tests pass on all platforms
- [ ] Manual testing on at least one platform
- [ ] Tag pushed to trigger automated release
-
-### **Post-Release**
- [ ] PyPI package published successfully
- [ ] GitHub release created with assets
- [ ] Installation instructions tested
- [ ] Social media announcement (if applicable)
- [ ] Documentation updated for new features
-
---
-
-**This guide transforms your Python projects from development tools into professional software packages that delight users and follow industry best practices.** 🚀
--- a/docs/QUERY_EXPANSION.md
+++ b/docs/QUERY_EXPANSION.md
@ -1,114 +0,0 @@
-# Query Expansion Guide
-
-## What Is Query Expansion?
-
-Query expansion automatically adds related terms to your search to find more relevant results.
-
-**Example:**
- You search: `"authentication"`
- System expands to: `"authentication login user verification credentials security"`
- Result: 2-3x more relevant matches!
-
-## How It Works
-
-```mermaid
-graph LR
-    A[User Query] --> B[LLM Expands]
-    B --> C[Enhanced Search]
-    C --> D[Better Results]
-    
-    style A fill:#e1f5fe
-    style D fill:#e8f5e8
-```
-
-1. **Your query** goes to a small, fast LLM (like qwen3:1.7b)
-2. **LLM adds related terms** that people might use when writing about the topic
-3. **Both semantic and keyword search** use the expanded query
-4. **You get much better results** without changing anything
-
-## When Is It Enabled?
-
- ❌ **CLI commands**: Disabled by default (for speed)
- ✅ **TUI interface**: Auto-enabled (when you have time to explore)
- ⚙️ **Configurable**: Can be enabled/disabled in config.yaml
-
-## Configuration
-
-### Easy Configuration (TUI)
-
-Use the interactive Configuration Manager in the TUI:
-
-1. **Start TUI**: `./rag-tui` or `rag.bat` (Windows)
-2. **Select Option 6**: Configuration Manager
-3. **Choose Option 2**: Toggle query expansion
-4. **Follow prompts**: Get explanation and easy on/off toggle
-
-The TUI will:
- Explain benefits and requirements clearly
- Check if Ollama is available
- Show current status (enabled/disabled)
- Save changes automatically
-
-### Manual Configuration (Advanced)
-
-Edit `config.yaml` directly:
-
-```yaml
-# Search behavior settings
-search:
-  expand_queries: false         # Enable automatic query expansion
-
-# LLM expansion settings  
-llm:
-  max_expansion_terms: 8        # How many terms to add
-  expansion_model: auto         # Which model to use
-  ollama_host: localhost:11434  # Ollama server
-```
-
-## Performance
-
- **Speed**: ~100ms on most systems (depends on your hardware)
- **Caching**: Repeated queries are instant
- **Model Selection**: Automatically uses fastest available model
-
-## Examples
-
-**Code Search:**
-```
-"error handling" → "error handling exception try catch fault tolerance recovery"
-```
-
-**Documentation Search:**
-```
-"installation" → "installation setup install deploy configuration getting started"
-```
-
-**Any Content:**
-```
-"budget planning" → "budget planning financial forecast cost analysis spending plan"
-```
-
-## Troubleshooting
-
-**Query expansion not working?**
-1. Check if Ollama is running: `curl http://localhost:11434/api/tags`
-2. Verify you have a model installed: `ollama list`
-3. Check logs with `--verbose` flag
-
-**Too slow?**
-1. Disable in config.yaml: `expand_queries: false`
-2. Or use faster model: `expansion_model: "qwen3:0.6b"`
-
-**Poor expansions?**
-1. Try different model: `expansion_model: "qwen3:1.7b"`
-2. Reduce terms: `max_expansion_terms: 5`
-
-## Technical Details
-
-The QueryExpander class:
- Uses temperature 0.1 for consistent results
- Limits expansions to prevent very long queries
- Handles model selection automatically
- Includes smart caching to avoid repeated calls
-
-Perfect for beginners because it "just works" - enable it when you want better results, disable when you want maximum speed.
--- a/docs/SMART_TUNING_GUIDE.md
+++ b/docs/SMART_TUNING_GUIDE.md
@ -5,10 +5,10 @@
 ### **1. 📊 Intelligent Analysis**
 ```bash
 # Analyze your project patterns and get optimization suggestions
-./rag-mini analyze /path/to/project
+./rag-mini-enhanced analyze /path/to/project

 # Get smart recommendations based on actual usage
-./rag-mini status /path/to/project
+./rag-mini-enhanced status /path/to/project
 ```

 **What it analyzes:**
@ -20,9 +20,13 @@
 ### **2. 🧠 Smart Search Enhancement**
 ```bash
 # Enhanced search with query intelligence
-./rag-mini search /project "MyClass"     # Detects class names
-./rag-mini search /project "login()"     # Detects function calls  
-./rag-mini search /project "user auth"   # Natural language
+./rag-mini-enhanced search /project "MyClass"     # Detects class names
+./rag-mini-enhanced search /project "login()"     # Detects function calls  
+./rag-mini-enhanced search /project "user auth"   # Natural language
+
+# Context-aware search (planned)
+./rag-mini-enhanced context /project "function_name"  # Show surrounding code
+./rag-mini-enhanced similar /project "pattern"        # Find similar patterns
 ```

 ### **3. ⚙️ Language-Specific Optimizations**
@ -109,10 +113,10 @@ Edit `.mini-rag/config.json` in your project:
 ./rag-mini index /project --force

 # Test search quality improvements
-./rag-mini search /project "your test query"
+./rag-mini-enhanced search /project "your test query"

 # Verify optimization impact
-./rag-mini analyze /project
+./rag-mini-enhanced analyze /project
 ```

 ## 🎊 **Result: Smarter, Faster, Better**
--- a/docs/TECHNICAL_GUIDE.md
+++ b/docs/TECHNICAL_GUIDE.md
@ -421,7 +421,7 @@ def _create_vector_table(self, chunks: List[CodeChunk], embeddings: np.ndarray):
    
    return table

-def vector_search(self, query_embedding: np.ndarray, top_k: int) -> List[SearchResult]:
+def vector_search(self, query_embedding: np.ndarray, limit: int) -> List[SearchResult]:
    """Fast vector similarity search."""
    table = self.db.open_table("chunks")
    
@ -787,36 +787,4 @@ def repair_index(self, project_path: Path) -> bool:
        return False
 ```

-## LLM Model Selection & Performance
-
-### Model Recommendations by Use Case
-
-FSS-Mini-RAG works well with various LLM sizes because our rich context and guided prompts help small models perform excellently:
-
-**Recommended (Best Balance):**
- **qwen3:1.7b** - Excellent quality with fast performance (default priority)
- **qwen3:0.6b** - Surprisingly good for CPU-only systems (522MB)
-
-**Still Excellent (Slower but highest quality):**
- **qwen3:4b** - Highest quality, slower responses
- **qwen3:4b:q8_0** - High-precision quantized version for production
-
-### Why Small Models Work Well Here
-
-Small models can produce excellent results in RAG systems because:
-
-1. **Rich Context**: Our chunking provides substantial context around each match
-2. **Guided Prompts**: Well-structured prompts give models a clear "runway" to continue
-3. **Specific Domain**: Code analysis is more predictable than general conversation
-
-Without good context, small models tend to get lost and produce erratic output. But with RAG's rich context and focused prompts, even the 0.6B model can provide meaningful analysis.
-
-### Quantization Benefits
-
-For production deployments, consider quantized models like `qwen3:1.7b:q8_0` or `qwen3:4b:q8_0`:
- **Q8_0**: 8-bit quantization with minimal quality loss
- **Smaller memory footprint**: ~50% reduction vs full precision
- **Better CPU performance**: Faster inference on CPU-only systems
- **Production ready**: Maintains analysis quality while improving efficiency
-
 This technical guide provides the deep implementation details that developers need to understand, modify, and extend the system, while keeping the main README focused on getting users started quickly.
--- a/docs/TESTING_PLAN.md
+++ b/docs/TESTING_PLAN.md
@ -1,832 +0,0 @@
-# FSS-Mini-RAG Distribution Testing Plan
-
-> **CRITICAL**: This is a comprehensive testing plan for the new distribution system. Every stage must be completed and verified before deployment.
-
-## Overview
-
-We've implemented a complete distribution overhaul with:
- One-line installers for Linux/macOS/Windows
- Multiple installation methods (uv, pipx, pip, zipapp)
- Automated wheel building via GitHub Actions
- PyPI publishing automation
- Cross-platform compatibility
-
-**This testing plan ensures everything works before we ship it.**
-
---
-
-## Phase 1: Local Development Environment Testing
-
-### 1.1 Virtual Environment Setup Testing
-
-**Objective**: Verify our package works in clean environments
-
-**Test Environments**:
- [ ] Python 3.8 in fresh venv
- [ ] Python 3.9 in fresh venv  
- [ ] Python 3.10 in fresh venv
- [ ] Python 3.11 in fresh venv
- [ ] Python 3.12 in fresh venv
-
-**For each Python version**:
-```bash
-# Test commands for each environment
-python -m venv test_env_38
-source test_env_38/bin/activate  # or test_env_38\Scripts\activate on Windows
-python --version
-pip install -e .
-rag-mini --help
-rag-mini init --help
-rag-mini search --help
-# Test basic functionality
-mkdir test_project
-echo "def hello(): print('world')" > test_project/test.py
-rag-mini init -p test_project
-rag-mini search -p test_project "hello function"
-deactivate
-rm -rf test_env_38 test_project
-```
-
-**Success Criteria**:
- [ ] Package installs without errors
- [ ] All CLI commands show help properly
- [ ] Basic indexing and search works
- [ ] No dependency conflicts
-
-### 1.2 Package Metadata Testing
-
-**Objective**: Verify pyproject.toml produces correct package metadata
-
-**Tests**:
-```bash
-# Build source distribution and inspect metadata
-python -m build --sdist
-tar -tzf dist/*.tar.gz | grep -E "(pyproject.toml|METADATA)"
-tar -xzf dist/*.tar.gz --to-stdout */METADATA
-
-# Verify key metadata fields
-python -c "
-import pkg_resources
-dist = pkg_resources.get_distribution('fss-mini-rag')
-print(f'Name: {dist.project_name}')
-print(f'Version: {dist.version}')  
-print(f'Entry points: {list(dist.get_entry_map().keys())}')
-"
-```
-
-**Success Criteria**:
- [ ] Package name is "fss-mini-rag" 
- [ ] Console script "rag-mini" is registered
- [ ] Version matches pyproject.toml
- [ ] Author, license, description are correct
- [ ] Python version requirements are set
-
---
-
-## Phase 2: Build System Testing
-
-### 2.1 Source Distribution Testing
-
-**Objective**: Verify source packages build and install correctly
-
-**Tests**:
-```bash
-# Clean build
-rm -rf dist/ build/ *.egg-info/
-python -m build --sdist
-
-# Test source install in fresh environment
-python -m venv test_sdist
-source test_sdist/bin/activate
-pip install dist/*.tar.gz
-rag-mini --help
-# Test actual functionality
-mkdir test_src && echo "print('test')" > test_src/main.py
-rag-mini init -p test_src
-rag-mini search -p test_src "print statement"
-deactivate && rm -rf test_sdist test_src
-```
-
-**Success Criteria**:
- [ ] Source distribution builds without errors
- [ ] Contains all necessary files
- [ ] Installs and runs correctly from source
- [ ] No missing dependencies
-
-### 2.2 Wheel Building Testing
-
-**Objective**: Test wheel generation and installation
-
-**Tests**:
-```bash
-# Build wheel
-python -m build --wheel
-
-# Inspect wheel contents  
-python -m zipfile -l dist/*.whl
-python -m wheel unpack dist/*.whl
-ls -la fss_mini_rag-*/
-
-# Test wheel install
-python -m venv test_wheel
-source test_wheel/bin/activate
-pip install dist/*.whl
-rag-mini --version
-which rag-mini
-rag-mini --help
-deactivate && rm -rf test_wheel
-```
-
-**Success Criteria**:
- [ ] Wheel builds successfully
- [ ] Contains correct package structure
- [ ] Installs faster than source
- [ ] Entry point is properly registered
-
-### 2.3 Zipapp (.pyz) Building Testing  
-
-**Objective**: Test single-file zipapp distribution
-
-**Tests**:
-```bash
-# Build zipapp
-python scripts/build_pyz.py
-
-# Test direct execution
-python dist/rag-mini.pyz --help
-python dist/rag-mini.pyz --version
-
-# Test with different Python versions
-python3.8 dist/rag-mini.pyz --help
-python3.11 dist/rag-mini.pyz --help
-
-# Test functionality
-mkdir pyz_test && echo "def test(): pass" > pyz_test/code.py
-python dist/rag-mini.pyz init -p pyz_test
-python dist/rag-mini.pyz search -p pyz_test "test function"
-rm -rf pyz_test
-
-# Test file size and contents
-ls -lh dist/rag-mini.pyz
-python -m zipfile -l dist/rag-mini.pyz | head -20
-```
-
-**Success Criteria**:
- [ ] Builds without errors
- [ ] File size is reasonable (< 100MB)  
- [ ] Runs with multiple Python versions
- [ ] All core functionality works
- [ ] No missing dependencies in zipapp
-
---
-
-## Phase 3: Installation Script Testing
-
-### 3.1 Linux/macOS Install Script Testing
-
-**Objective**: Test install.sh in various Unix environments
-
-**Test Environments**:
- [ ] Ubuntu 20.04 (clean container)
- [ ] Ubuntu 22.04 (clean container)  
- [ ] Ubuntu 24.04 (clean container)
- [ ] CentOS 7 (clean container)
- [ ] CentOS Stream 9 (clean container)
- [ ] macOS 12+ (if available)
- [ ] Alpine Linux (minimal test)
-
-**For each environment**:
-```bash
-# Test script download and execution
-curl -fsSL file://$(pwd)/install.sh > /tmp/test_install.sh
-chmod +x /tmp/test_install.sh
-
-# Test dry run capabilities (modify script for --dry-run flag)
-/tmp/test_install.sh --dry-run
-
-# Test actual installation
-/tmp/test_install.sh
-
-# Verify installation
-which rag-mini
-rag-mini --help
-rag-mini --version
-
-# Test functionality
-mkdir install_test
-echo "def example(): return 'hello'" > install_test/sample.py
-rag-mini init -p install_test  
-rag-mini search -p install_test "example function"
-
-# Cleanup
-rm -rf install_test /tmp/test_install.sh
-```
-
-**Edge Case Testing**:
-```bash
-# Test without curl
-mv /usr/bin/curl /usr/bin/curl.bak 2>/dev/null || true
-# Run installer (should fall back to wget or pip)
-# Restore curl
-
-# Test without wget  
-mv /usr/bin/wget /usr/bin/wget.bak 2>/dev/null || true
-# Run installer
-# Restore wget
-
-# Test with Python but no pip
-# Test with old Python versions
-# Test with no internet (local package test)
-```
-
-**Success Criteria**:
- [ ] Script downloads and runs without errors
- [ ] Handles missing dependencies gracefully
- [ ] Installs correct package version
- [ ] Creates working `rag-mini` command
- [ ] Provides clear user feedback
- [ ] Falls back properly (uv → pipx → pip)
-
-### 3.2 Windows PowerShell Script Testing
-
-**Objective**: Test install.ps1 in Windows environments
-
-**Test Environments**:
- [ ] Windows 10 (PowerShell 5.1)
- [ ] Windows 11 (PowerShell 5.1)
- [ ] Windows Server 2019
- [ ] PowerShell Core 7.x (cross-platform)
-
-**For each environment**:
-```powershell
-# Download and test
-Invoke-WebRequest -Uri "file://$(Get-Location)/install.ps1" -OutFile "$env:TEMP/test_install.ps1"
-
-# Test execution policy handling
-Get-ExecutionPolicy
-Set-ExecutionPolicy -ExecutionPolicy Bypass -Scope Process
-
-# Test dry run (modify script)
-& "$env:TEMP/test_install.ps1" -DryRun
-
-# Test actual installation
-& "$env:TEMP/test_install.ps1"
-
-# Verify installation
-Get-Command rag-mini
-rag-mini --help
-rag-mini --version
-
-# Test functionality
-New-Item -ItemType Directory -Name "win_test"
-"def windows_test(): return True" | Out-File -FilePath "win_test/test.py"
-rag-mini init -p win_test
-rag-mini search -p win_test "windows test"
-
-# Cleanup
-Remove-Item -Recurse -Force win_test
-Remove-Item "$env:TEMP/test_install.ps1"
-```
-
-**Edge Case Testing**:
- [ ] Test without Python in PATH
- [ ] Test with Python 3.8-3.12
- [ ] Test restricted execution policy
- [ ] Test without admin rights
- [ ] Test corporate firewall scenarios
-
-**Success Criteria**:
- [ ] Script runs without PowerShell errors
- [ ] Handles execution policy correctly
- [ ] Installs package successfully
- [ ] PATH is updated correctly
- [ ] Error messages are user-friendly
- [ ] Falls back properly (uv → pipx → pip)
-
---
-
-## Phase 4: GitHub Actions Workflow Testing
-
-### 4.1 Local Workflow Testing
-
-**Objective**: Test GitHub Actions workflow locally using act
-
-**Setup**:
-```bash
-# Install act (GitHub Actions local runner)
-# On macOS: brew install act
-# On Linux: check https://github.com/nektos/act
-
-# Test workflow syntax
-act --list
-
-# Test individual jobs
-act -j build-wheels --dry-run
-act -j build-zipapp --dry-run  
-act -j test-installation --dry-run
-```
-
-**Tests**:
-```bash
-# Test wheel building job
-act -j build-wheels
-
-# Check artifacts
-ls -la /tmp/act-* 
-
-# Test zipapp building
-act -j build-zipapp
-
-# Test installation testing job
-act -j test-installation
-
-# Test release job (with dummy tag)
-act push -e .github/workflows/test-release.json
-```
-
-**Success Criteria**:
- [ ] All jobs complete without errors
- [ ] Wheels are built for all platforms
- [ ] Zipapp is created successfully
- [ ] Installation tests pass
- [ ] Artifacts are properly uploaded
-
-### 4.2 Fork Testing
-
-**Objective**: Test workflow in a real GitHub environment
-
-**Setup**:
-1. [ ] Create a test fork of the repository
-2. [ ] Enable GitHub Actions on the fork
-3. [ ] Set up test PyPI token (TestPyPI)
-
-**Tests**:
-```bash
-# Push changes to test branch
-git checkout -b test-distribution
-git push origin test-distribution
-
-# Create test release
-git tag v2.1.0-test
-git push origin v2.1.0-test
-
-# Monitor GitHub Actions:
-# - Check all jobs complete
-# - Download artifacts
-# - Verify wheel contents  
-# - Test zipapp download
-```
-
-**Success Criteria**:
- [ ] Workflow triggers on tag push
- [ ] All matrix builds complete
- [ ] Artifacts are uploaded
- [ ] Release is created with assets
- [ ] TestPyPI receives package (if configured)
-
---
-
-## Phase 5: Manual Installation Method Testing
-
-### 5.1 uv Installation Testing
-
-**Test Environments**: Linux, macOS, Windows
-
-**Tests**:
-```bash
-# Fresh environment
-curl -LsSf https://astral.sh/uv/install.sh | sh
-export PATH="$HOME/.local/bin:$PATH"
-
-# Test uv tool install (will fail until we publish)  
-# For now, test with local wheel
-uv tool install dist/fss_mini_rag-*.whl
-
-# Verify installation
-which rag-mini
-rag-mini --help
-
-# Test functionality
-mkdir uv_test
-echo "print('uv test')" > uv_test/demo.py
-rag-mini init -p uv_test
-rag-mini search -p uv_test "print statement"
-rm -rf uv_test
-
-# Test uninstall
-uv tool uninstall fss-mini-rag
-```
-
-**Success Criteria**:
- [ ] uv installs cleanly
- [ ] Package installs via uv tool install
- [ ] Command is available in PATH
- [ ] All functionality works
- [ ] Uninstall works cleanly
-
-### 5.2 pipx Installation Testing
-
-**Test Environments**: Linux, macOS, Windows
-
-**Tests**:
-```bash
-# Install pipx
-python -m pip install --user pipx
-python -m pipx ensurepath
-
-# Test pipx install (local wheel for now)
-pipx install dist/fss_mini_rag-*.whl
-
-# Verify installation
-pipx list
-which rag-mini  
-rag-mini --help
-
-# Test functionality
-mkdir pipx_test
-echo "def pipx_demo(): pass" > pipx_test/code.py
-rag-mini init -p pipx_test
-rag-mini search -p pipx_test "pipx demo"
-rm -rf pipx_test
-
-# Test uninstall
-pipx uninstall fss-mini-rag
-```
-
-**Success Criteria**:
- [ ] pipx installs without issues
- [ ] Package is isolated in own environment
- [ ] Command works globally
- [ ] No conflicts with system packages
- [ ] Uninstall is clean
-
-### 5.3 pip Installation Testing
-
-**Test Environments**: Multiple Python versions
-
-**Tests**:
-```bash
-# Test with --user flag
-pip install --user dist/fss_mini_rag-*.whl
-
-# Verify PATH  
-echo $PATH | grep -q "$(python -m site --user-base)/bin"
-which rag-mini
-rag-mini --help
-
-# Test functionality
-mkdir pip_test
-echo "class PipTest: pass" > pip_test/example.py
-rag-mini init -p pip_test
-rag-mini search -p pip_test "PipTest class"
-rm -rf pip_test
-
-# Test uninstall
-pip uninstall -y fss-mini-rag
-```
-
-**Success Criteria**:
- [ ] Installs correctly with --user
- [ ] PATH is configured properly
- [ ] No permission issues
- [ ] Works across Python versions
- [ ] Uninstall removes everything
-
---
-
-## Phase 6: End-to-End User Experience Testing
-
-### 6.1 New User Experience Testing
-
-**Scenario**: Complete beginner with no Python knowledge
-
-**Test Script**:
-```bash
-# Start with fresh system (VM/container)
-# Follow README instructions exactly
-
-# Linux/macOS user
-curl -fsSL https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.sh | bash
-
-# Windows user  
-# iwr https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.ps1 -UseBasicParsing | iex
-
-# Follow quick start guide
-rag-mini --help
-mkdir my_project
-echo "def hello_world(): print('Hello RAG!')" > my_project/main.py
-echo "class DataProcessor: pass" > my_project/processor.py
-rag-mini init -p my_project
-rag-mini search -p my_project "hello function"
-rag-mini search -p my_project "DataProcessor class"
-```
-
-**Success Criteria**:
- [ ] Installation completes without user intervention
- [ ] Clear, helpful output throughout
- [ ] `rag-mini` command is available immediately
- [ ] Basic workflow works as expected
- [ ] Error messages are user-friendly
-
-### 6.2 Developer Experience Testing
-
-**Scenario**: Python developer wanting to contribute
-
-**Test Script**:
-```bash
-# Clone repository
-git clone https://github.com/fsscoding/fss-mini-rag.git
-cd fss-mini-rag
-
-# Development installation
-python -m venv .venv
-source .venv/bin/activate
-pip install -r requirements.txt
-pip install -e .
-
-# Test development commands
-make help
-make dev-install
-make test-dist
-make build
-make build-pyz
-
-# Test local installation
-pip install dist/*.whl
-rag-mini --help
-```
-
-**Success Criteria**:
- [ ] Development setup is straightforward
- [ ] Makefile commands work correctly
- [ ] Local builds install properly
- [ ] All development tools function
-
-### 6.3 Advanced User Testing
-
-**Scenario**: Power user with custom requirements
-
-**Test Script**:
-```bash
-# Test zipapp usage
-wget https://github.com/fsscoding/fss-mini-rag/releases/latest/download/rag-mini.pyz
-python rag-mini.pyz --help
-
-# Test with large codebase
-git clone https://github.com/django/django.git test_django
-python rag-mini.pyz init -p test_django
-python rag-mini.pyz search -p test_django "model validation"
-
-# Test server mode  
-python rag-mini.pyz server -p test_django
-curl http://localhost:7777/health
-
-# Clean up
-rm -rf test_django rag-mini.pyz
-```
-
-**Success Criteria**:
- [ ] Zipapp handles large codebases
- [ ] Performance is acceptable
- [ ] Server mode works correctly
- [ ] All advanced features function
-
---
-
-## Phase 7: Performance and Edge Case Testing
-
-### 7.1 Performance Testing
-
-**Objective**: Ensure installation and runtime performance is acceptable
-
-**Tests**:
-```bash
-# Installation speed testing
-time curl -fsSL https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.sh | bash
-
-# Package size testing
-ls -lh dist/
-du -sh .venv/
-
-# Runtime performance
-time rag-mini init -p large_project/
-time rag-mini search -p large_project/ "complex query"
-
-# Memory usage
-rag-mini server &
-ps aux | grep rag-mini
-# Monitor memory usage during indexing/search
-```
-
-**Success Criteria**:
- [ ] Installation completes in < 5 minutes
- [ ] Package size is reasonable (< 50MB total)
- [ ] Indexing performance meets expectations
- [ ] Memory usage is acceptable
-
-### 7.2 Edge Case Testing
-
-**Objective**: Test unusual but possible scenarios
-
-**Tests**:
-```bash
-# Network issues
-# - Simulate slow connection
-# - Test offline scenarios  
-# - Test corporate firewalls
-
-# System edge cases
-# - Very old Python versions
-# - Systems without pip
-# - Read-only file systems
-# - Limited disk space
-
-# Unicode and special characters
-mkdir "测试项目"
-echo "def 函数名(): pass" > "测试项目/代码.py"
-rag-mini init -p "测试项目"
-rag-mini search -p "测试项目" "函数"
-
-# Very large files
-python -c "print('# ' + 'x'*1000000)" > large_file.py
-rag-mini init -p .
-# Should handle gracefully
-
-# Concurrent usage
-rag-mini server &
-for i in {1..10}; do
-    rag-mini search "test query $i" &
-done
-wait
-```
-
-**Success Criteria**:
- [ ] Graceful degradation with network issues
- [ ] Clear error messages for edge cases
- [ ] Handles Unicode correctly
- [ ] Doesn't crash on large files
- [ ] Concurrent access works properly
-
---
-
-## Phase 8: Security Testing
-
-### 8.1 Install Script Security
-
-**Objective**: Verify install scripts are secure
-
-**Tests**:
-```bash
-# Check install.sh
-shellcheck install.sh
-bandit -r install.sh (if applicable)
-
-# Verify HTTPS usage
-grep -n "http://" install.sh  # Should only be for localhost
-grep -n "curl.*-k" install.sh  # Should be none
-grep -n "wget.*--no-check" install.sh  # Should be none
-
-# Check PowerShell script
-# Run PowerShell security analyzer if available
-```
-
-**Success Criteria**:
- [ ] No shell script vulnerabilities
- [ ] Only HTTPS downloads (except localhost)
- [ ] No certificate verification bypasses
- [ ] Input validation where needed
- [ ] Clear error messages without info leakage
-
-### 8.2 Package Security
-
-**Objective**: Ensure distributed packages are secure
-
-**Tests**:
-```bash
-# Check for secrets in built packages
-python -m zipfile -l dist/*.whl | grep -i -E "(key|token|password|secret)"
-strings dist/rag-mini.pyz | grep -i -E "(key|token|password|secret)"
-
-# Verify package signatures (when implemented)
-# Check for unexpected executables in packages
-```
-
-**Success Criteria**:
- [ ] No hardcoded secrets in packages
- [ ] No unexpected executables
- [ ] Package integrity is verifiable
- [ ] Dependencies are from trusted sources
-
---
-
-## Phase 9: Documentation and User Support Testing
-
-### 9.1 Documentation Accuracy Testing
-
-**Objective**: Verify all documentation matches reality
-
-**Tests**:
-```bash
-# Test every command in README
-# Test every code example
-# Verify all links work
-# Check screenshots are current
-
-# Test error scenarios mentioned in docs
-# Verify troubleshooting sections
-```
-
-**Success Criteria**:
- [ ] All examples work as documented
- [ ] Links are valid and up-to-date
- [ ] Screenshots reflect current UI
- [ ] Error scenarios are accurate
-
-### 9.2 Support Path Testing
-
-**Objective**: Test user support workflows
-
-**Tests**:
- [ ] GitHub issue templates work
- [ ] Error messages include helpful information
- [ ] Common problems have clear solutions
- [ ] Contact information is correct
-
---
-
-## Phase 10: Release Readiness
-
-### 10.1 Pre-Release Checklist
-
- [ ] All tests from Phases 1-9 pass
- [ ] Version numbers are consistent
- [ ] Changelog is updated
- [ ] Documentation is current
- [ ] Security review complete
- [ ] Performance benchmarks recorded
- [ ] Backup plan exists for rollback
-
-### 10.2 Release Testing
-
-**TestPyPI Release**:
-```bash
-# Upload to TestPyPI first
-python -m twine upload --repository testpypi dist/*
-
-# Test installation from TestPyPI
-pip install --index-url https://test.pypi.org/simple/ fss-mini-rag
-```
-
-**Success Criteria**:
- [ ] TestPyPI upload succeeds
- [ ] Installation from TestPyPI works
- [ ] All functionality works with TestPyPI package
-
-### 10.3 Production Release
-
-**Only after TestPyPI success**:
-```bash
-# Create GitHub release
-git tag v2.1.0
-git push origin v2.1.0
-
-# Monitor automated workflows
-# Test installation after PyPI publication
-pip install fss-mini-rag
-```
-
---
-
-## Testing Tools and Infrastructure
-
-### Required Tools
- [ ] Docker (for clean environment testing)
- [ ] act (for local GitHub Actions testing)  
- [ ] shellcheck (for bash script analysis)
- [ ] Various Python versions (3.8-3.12)
- [ ] Windows VM/container access
- [ ] macOS testing environment (if possible)
-
-### Test Data
- [ ] Sample codebases of various sizes
- [ ] Unicode test files
- [ ] Edge case files (very large, empty, binary)
- [ ] Network simulation tools
-
-### Monitoring
- [ ] Performance benchmarks
- [ ] Error rate tracking  
- [ ] User feedback collection
- [ ] Download/install statistics
-
---
-
-## Conclusion
-
-This testing plan is comprehensive but necessary. Each phase builds on the previous ones, and skipping phases risks shipping broken functionality to users.
-
-**Estimated Timeline**: 3-5 days for complete testing
-**Risk Level**: HIGH if phases are skipped
-**Success Criteria**: 100% of critical tests must pass before release
-
-The goal is to ship a distribution system that "just works" for every user, every time. This level of testing ensures we achieve that goal.
--- a/docs/TESTING_SUMMARY.md
+++ b/docs/TESTING_SUMMARY.md
@ -1,179 +0,0 @@
-# FSS-Mini-RAG Distribution Testing Summary
-
-## What We've Built
-
-### 🏗️ **Complete Distribution Infrastructure**
-1. **Enhanced pyproject.toml** - Proper metadata for PyPI publication
-2. **Install Scripts** - One-line installers for Linux/macOS (`install.sh`) and Windows (`install.ps1`)
-3. **Build Scripts** - Zipapp builder (`scripts/build_pyz.py`) 
-4. **GitHub Actions** - Automated wheel building and PyPI publishing
-5. **Documentation** - Updated README with modern installation methods
-6. **Testing Framework** - Comprehensive testing infrastructure
-
-### 📦 **Installation Methods Implemented**
- **One-line installers** (auto-detects best method)
- **uv** - Ultra-fast package manager
- **pipx** - Isolated tool installation
- **pip** - Traditional method
- **zipapp** - Single-file portable distribution
-
-## Testing Status
-
-### ✅ **Phase 1: Structure Tests (COMPLETED)**
- [x] PyProject.toml validation - **PASSED**
- [x] Install script structure - **PASSED**
- [x] Build script presence - **PASSED** 
- [x] GitHub workflow syntax - **PASSED**
- [x] Documentation updates - **PASSED**
- [x] Import structure - **FAILED** (dependencies needed)
-
-**Result**: 5/6 tests passed. Structure is solid.
-
-### 🔄 **Phase 2: Build Tests (IN PROGRESS)**
- [ ] Build requirements check
- [ ] Source distribution build
- [ ] Wheel building 
- [ ] Zipapp creation
- [ ] Package metadata validation
-
-### 📋 **Remaining Test Phases**
-
-#### **Phase 3: Installation Testing**
- [ ] Test built packages install correctly
- [ ] Test entry points work
- [ ] Test basic CLI functionality
- [ ] Test in clean virtual environments
-
-#### **Phase 4: Install Script Testing**
- [ ] Linux/macOS install.sh in containers
- [ ] Windows install.ps1 testing
- [ ] Edge cases (no python, no internet, etc.)
- [ ] Fallback mechanism testing (uv → pipx → pip)
-
-#### **Phase 5: GitHub Actions Testing**
- [ ] Local workflow testing with `act`
- [ ] Fork testing with real CI
- [ ] TestPyPI publishing test
- [ ] Release creation testing
-
-#### **Phase 6: End-to-End User Experience**
- [ ] Fresh system installation
- [ ] Follow README exactly
- [ ] Test error scenarios
- [ ] Performance benchmarking
-
-## Current Test Tools
-
-### 📝 **Automated Test Scripts**
-1. **`scripts/validate_setup.py`** - File structure validation (✅ Working)
-2. **`scripts/phase1_basic_tests.py`** - Basic structure tests (✅ Working) 
-3. **`scripts/phase2_build_tests.py`** - Package building tests (🔄 Running)
-4. **`scripts/setup_test_environments.py`** - Multi-version env setup (📦 Complex)
-
-### 🛠️ **Manual Test Commands**
-```bash
-# Quick validation
-python scripts/validate_setup.py
-
-# Structure tests  
-python scripts/phase1_basic_tests.py
-
-# Build tests
-python scripts/phase2_build_tests.py
-
-# Manual builds
-make build          # Source + wheel
-make build-pyz      # Zipapp
-make test-dist      # Validation
-```
-
-## Issues Identified
-
-### ⚠️ **Current Blockers**
-1. **Dependencies** - Full testing requires installing heavy ML dependencies
-2. **Environment Setup** - Multiple Python versions not available on current system  
-3. **Zipapp Size** - May be very large due to numpy/torch dependencies
-4. **Network Tests** - Install scripts need real network testing
-
-### 🔧 **Mitigations**
- **Staged Testing** - Test structure first, then functionality
- **Container Testing** - Use Docker for clean environments
- **Dependency Isolation** - Test core CLI without heavy ML deps
- **Mock Network** - Local package server testing
-
-## Deployment Strategy
-
-### 🚀 **Safe Deployment Path**
-
-#### **Stage 1: TestPyPI Validation**
-1. Complete Phase 2 build tests
-2. Upload to TestPyPI  
-3. Test installation from TestPyPI
-4. Verify all install methods work
-
-#### **Stage 2: GitHub Release Testing**
-1. Create test release on fork
-2. Validate GitHub Actions workflow
-3. Test automated wheel building
-4. Verify release assets
-
-#### **Stage 3: Production Release**
-1. Final validation on clean systems
-2. Documentation review
-3. Create production release
-4. Monitor installation success rates
-
-### 📊 **Success Criteria**
-
-For each phase, we need:
- **95%+ test pass rate**
- **Installation time < 5 minutes**
- **Clear error messages** for failures
- **Cross-platform compatibility**
- **Fallback mechanisms working**
-
-## Next Steps (Priority Order)
-
-1. **Complete Phase 2** - Finish build testing
-2. **Test Built Packages** - Verify they install and run
-3. **Container Testing** - Test install scripts in Docker
-4. **Fork Testing** - Test GitHub Actions in controlled environment
-5. **TestPyPI Release** - Safe production test
-6. **Clean System Testing** - Final validation
-7. **Production Release** - Go live
-
-## Estimated Timeline
-
- **Phase 2 Completion**: 1-2 hours
- **Phase 3-4 Testing**: 4-6 hours  
- **Phase 5-6 Testing**: 4-8 hours
- **Deployment**: 2-4 hours
-
-**Total**: 2-3 days for comprehensive testing
-
-## Risk Assessment
-
-### 🔴 **High Risk**
- Skipping environment testing
- Not testing install scripts
- Releasing without TestPyPI validation
-
-### 🟡 **Medium Risk**  
- Large zipapp file size
- Dependency compatibility issues
- Network connectivity problems
-
-### 🟢 **Low Risk**
- Documentation accuracy
- GitHub workflow syntax
- Package metadata
-
-## Conclusion
-
-We've built a comprehensive modern distribution system for FSS-Mini-RAG. The infrastructure is solid (5/6 structure tests pass), but we need systematic testing before release.
-
-**The testing plan is extensive but necessary** - we're moving from a basic pip install to a professional-grade distribution system that needs to work flawlessly for users worldwide.
-
-**Current Status**: Infrastructure complete, systematic testing in progress.
-**Confidence Level**: High for structure, medium for functionality pending tests.
-**Ready for Release**: Not yet - need 2-3 days of proper testing.
--- a/docs/TROUBLESHOOTING.md
+++ b/docs/TROUBLESHOOTING.md
@ -1,497 +0,0 @@
-# 🛠️ Troubleshooting Guide - Common Issues & Solutions
-
-*Having problems? You're not alone! Here are solutions to the most common issues beginners encounter.*
-
---
-
-## 🚀 Installation & Setup Issues
-
-### ❌ "Command not found: ollama"
-**Problem:** The system can't find Ollama  
-**Solution:** 
-```bash
-# Install Ollama
-curl -fsSL https://ollama.ai/install.sh | sh
-# Or on Mac: brew install ollama
-# Start Ollama
-ollama serve
-```
-**Alternative:** Use the system without Ollama - it will automatically fall back to other embedding methods.
-
-### ❌ "Permission denied" when running scripts
-**Problem:** Script files aren't executable  
-**Solution:**
-```bash
-chmod +x rag-mini.py rag-tui.py install_mini_rag.sh
-# Or run with python directly:
-python3 rag-mini.py --help
-```
-
-### ❌ "Module not found" or import errors
-**Problem:** Python dependencies not installed  
-**Solution:**
-```bash
-# Install dependencies
-pip3 install -r requirements.txt
-# If that fails, try:
-pip3 install --user -r requirements.txt
-```
-
-### ❌ Installation script fails
-**Problem:** `./install_mini_rag.sh` doesn't work  
-**Solution:**
-```bash
-# Make it executable first
-chmod +x install_mini_rag.sh
-# Then run
-./install_mini_rag.sh
-# Or use proven manual method (100% reliable):
-python3 -m venv .venv
-.venv/bin/python -m pip install -r requirements.txt  # 2-8 minutes
-.venv/bin/python -m pip install .                    # ~1 minute
-source .venv/bin/activate
-python3 -c "import mini_rag; print('✅ Installation successful')"
-```
-
-### ❌ Installation takes too long / times out
-**Problem:** Installation seems stuck or takes forever  
-**Expected Timing:** 2-3 minutes fast internet, 5-10 minutes slow internet  
-**Solutions:**
-
-1. **Large dependencies are normal:**
-   - LanceDB: 36MB (vector database)
-   - PyArrow: 43MB (data processing) 
-   - PyLance: 44MB (language parsing)
-   - Total ~123MB + dependencies
-
-2. **For agents/CI/CD - run in background:**
-   ```bash
-   ./install_mini_rag.sh --headless &
-   # Monitor with: tail -f install.log
-   ```
-
-3. **Check if installation is actually progressing:**
-   ```bash
-   # Check pip cache (should be growing)
-   du -sh ~/.cache/pip
-   
-   # Check if Python packages are installing
-   ls -la .venv/lib/python*/site-packages/
-   ```
-
-4. **Slow connection fallback:**
-   ```bash
-   # Increase pip timeout
-   .venv/bin/python -m pip install -r requirements.txt --timeout 1000
-   ```
-
---
-
-## 🔍 Search & Results Issues
-
-### ❌ "No results found" for everything
-**Problem:** Search isn't finding anything  
-**Diagnosis & Solutions:**
-
-1. **Check if project is indexed:**
-   ```bash
-   ./rag-mini status /path/to/project
-   # If not indexed:
-   ./rag-mini index /path/to/project
-   ```
-
-2. **Lower similarity threshold:**
-   - Edit config file, change `similarity_threshold: 0.05`
-   - Or try: `./rag-mini search /path/to/project "query" --threshold 0.05`
-
-3. **Try broader search terms:**
-   - Instead of: "getUserById" 
-   - Try: "user function" or "get user"
-
-4. **Enable query expansion:**
-   - Edit config: `expand_queries: true`
-   - Or use TUI which enables it automatically
-
-### ❌ Search results are irrelevant/weird
-**Problem:** Getting results that don't match your search  
-**Solutions:**
-
-1. **Increase similarity threshold:**
-   ```yaml
-   search:
-     similarity_threshold: 0.3  # Higher = more picky
-   ```
-
-2. **Use more specific terms:**
-   - Instead of: "function"
-   - Try: "login function" or "authentication method"
-
-3. **Check BM25 setting:**
-   ```yaml
-   search:
-     enable_bm25: true  # Helps find exact word matches
-   ```
-
-### ❌ Search is too slow
-**Problem:** Takes too long to get results  
-**Solutions:**
-
-1. **Disable query expansion:**
-   ```yaml
-   search:
-     expand_queries: false
-   ```
-
-2. **Reduce result limit:**
-   ```yaml
-   search:
-     default_top_k: 5  # Instead of 10
-   ```
-
-3. **Use faster embedding method:**
-   ```yaml
-   embedding:
-     preferred_method: hash  # Fastest but lower quality
-   ```
-
-4. **Smaller batch size:**
-   ```yaml
-   embedding:
-     batch_size: 16  # Instead of 32
-   ```
-
---
-
-## 🤖 AI/LLM Issues
-
-### ❌ "LLM synthesis unavailable" 
-**Problem:** AI explanations aren't working  
-**Solutions:**
-
-1. **Check Ollama is running:**
-   ```bash
-   # In one terminal:
-   ollama serve
-   # In another:
-   ollama list  # Should show installed models
-   ```
-
-2. **Install a model:**
-   ```bash
-   ollama pull qwen2.5:3b    # Good balance of speed and quality
-   # Or: ollama pull qwen3:4b   # Larger but better quality
-   ```
-
-3. **Test connection:**
-   ```bash
-   curl http://localhost:11434/api/tags
-   # Should return JSON with model list
-   ```
-
-### ❌ AI gives weird/wrong answers
-**Problem:** LLM responses don't make sense  
-**Solutions:**
-
-1. **Lower temperature:**
-   ```yaml
-   llm:
-     synthesis_temperature: 0.1  # More factual, less creative
-   ```
-
-2. **Try different model:**
-   ```bash
-   ollama pull qwen3:1.7b   # Recommended: excellent quality (default priority)
-   ollama pull qwen3:0.6b   # Surprisingly good for CPU-only  
-   ollama pull qwen3:4b     # Highest quality, slower
-   ```
-
-3. **Use synthesis mode instead of exploration:**
-   ```bash
-   ./rag-mini search /path "query" --synthesize
-   # Instead of: ./rag-mini explore /path
-   ```
-
---
-
-## 💾 Memory & Performance Issues
-
-### ❌ "Out of memory" or computer freezes during indexing
-**Problem:** System runs out of RAM  
-**Solutions:**
-
-1. **Reduce batch size:**
-   ```yaml
-   embedding:
-     batch_size: 8  # Much smaller batches
-   ```
-
-2. **Lower streaming threshold:**
-   ```yaml
-   streaming:
-     threshold_bytes: 512000  # 512KB instead of 1MB
-   ```
-
-3. **Index smaller projects first:**
-   ```bash
-   # Exclude large directories
-   ./rag-mini index /path/to/project --exclude "node_modules/**,dist/**"
-   ```
-
-4. **Use hash embeddings:**
-   ```yaml
-   embedding:
-     preferred_method: hash  # Much less memory
-   ```
-
-### ❌ Indexing is extremely slow
-**Problem:** Taking forever to index project  
-**Solutions:**
-
-1. **Exclude unnecessary files:**
-   ```yaml
-   files:
-     exclude_patterns:
-       - "node_modules/**"
-       - ".git/**" 
-       - "*.log"
-       - "build/**"
-       - "*.min.js"  # Minified files
-   ```
-
-2. **Increase minimum file size:**
-   ```yaml
-   files:
-     min_file_size: 200  # Skip tiny files
-   ```
-
-3. **Use simpler chunking:**
-   ```yaml
-   chunking:
-     strategy: fixed  # Faster than semantic
-   ```
-
-4. **More workers (if you have good CPU):**
-   ```bash
-   ./rag-mini index /path/to/project --workers 8
-   ```
-
---
-
-## ⚙️ Configuration Issues
-
-### ❌ "Invalid configuration" errors
-**Problem:** Config file has errors  
-**Solutions:**
-
-1. **Check YAML syntax:**
-   ```bash
-   python3 -c "import yaml; yaml.safe_load(open('config.yaml'))"
-   ```
-
-2. **Copy from working example:**
-   ```bash
-   cp examples/config.yaml .mini-rag/config.yaml
-   ```
-
-3. **Reset to defaults:**
-   ```bash
-   rm .mini-rag/config.yaml
-   # System will recreate with defaults
-   ```
-
-### ❌ Changes to config aren't taking effect
-**Problem:** Modified settings don't work  
-**Solutions:**
-
-1. **Restart TUI/CLI:**
-   - Configuration is loaded at startup
-   - Exit and restart the interface
-
-2. **Check config location:**
-   ```bash
-   # Project-specific config:
-   /path/to/project/.mini-rag/config.yaml
-   # Global config:
-   ~/.mini-rag/config.yaml
-   ```
-
-3. **Force re-index after config changes:**
-   ```bash
-   ./rag-mini index /path/to/project --force
-   ```
-
---
-
-## 🖥️ Interface Issues
-
-### ❌ TUI looks broken/garbled
-**Problem:** Text interface isn't displaying correctly  
-**Solutions:**
-
-1. **Try different terminal:**
-   ```bash
-   # Instead of basic terminal, try:
-   # - iTerm2 (Mac)
-   # - Windows Terminal (Windows)  
-   # - GNOME Terminal (Linux)
-   ```
-
-2. **Use CLI directly:**
-   ```bash
-   ./rag-mini --help  # Skip TUI entirely
-   ```
-
-3. **Check terminal size:**
-   ```bash
-   # Make terminal window larger (TUI needs space)
-   # At least 80x24 characters
-   ```
-
-### ❌ "Keyboard interrupt" or TUI crashes
-**Problem:** Interface stops responding  
-**Solutions:**
-
-1. **Use Ctrl+C to exit cleanly:**
-   - Don't force-quit if possible
-
-2. **Check for conflicting processes:**
-   ```bash
-   ps aux | grep rag-tui
-   # Kill any stuck processes
-   ```
-
-3. **Use CLI as fallback:**
-   ```bash
-   ./rag-mini search /path/to/project "your query"
-   ```
-
---
-
-## 📁 File & Path Issues
-
-### ❌ "Project not found" or "Permission denied"
-**Problem:** Can't access project directory  
-**Solutions:**
-
-1. **Check path exists:**
-   ```bash
-   ls -la /path/to/project
-   ```
-
-2. **Check permissions:**
-   ```bash
-   # Make sure you can read the directory
-   chmod -R +r /path/to/project
-   ```
-
-3. **Use absolute paths:**
-   ```bash
-   # Instead of: ./rag-mini index ../my-project
-   # Use: ./rag-mini index /full/path/to/my-project
-   ```
-
-### ❌ "No files found to index"
-**Problem:** System doesn't see any files  
-**Solutions:**
-
-1. **Check include patterns:**
-   ```yaml
-   files:
-     include_patterns:
-       - "**/*.py"     # Only Python files
-       - "**/*.js"     # Add JavaScript
-       - "**/*.md"     # Add Markdown
-   ```
-
-2. **Check exclude patterns:**
-   ```yaml
-   files:
-     exclude_patterns: []  # Remove all exclusions temporarily
-   ```
-
-3. **Lower minimum file size:**
-   ```yaml
-   files:
-     min_file_size: 10  # Instead of 50
-   ```
-
---
-
-## 🔍 Quick Diagnostic Commands
-
-**Check system status:**
-```bash
-./rag-mini status /path/to/project
-```
-
-**Test embeddings:**
-```bash
-python3 -c "from mini_rag.ollama_embeddings import OllamaEmbedder; e=OllamaEmbedder(); print(e.get_embedding_info())"
-```
-
-**Verify installation:**
-```bash
-python3 -c "import mini_rag; print('✅ RAG system installed')"
-```
-
-**Test Ollama connection:**
-```bash
-curl -s http://localhost:11434/api/tags | python3 -m json.tool
-```
-
-**Check disk space:**
-```bash
-df -h .mini-rag/  # Make sure you have space for index
-```
-
---
-
-## 🆘 When All Else Fails
-
-1. **Start fresh:**
-   ```bash
-   rm -rf .mini-rag/
-   ./rag-mini index /path/to/project
-   ```
-
-2. **Use minimal config:**
-   ```yaml
-   # Simplest possible config:
-   chunking:
-     strategy: fixed
-   embedding:  
-     preferred_method: auto
-   search:
-     expand_queries: false
-   ```
-
-3. **Try a tiny test project:**
-   ```bash
-   mkdir test-project
-   echo "def hello(): print('world')" > test-project/test.py
-   ./rag-mini index test-project
-   ./rag-mini search test-project "hello function"
-   ```
-
-4. **Get help:**
-   - Check the main README.md
-   - Look at examples/ directory
-   - Try the basic_usage.py example
-
---
-
-## 💡 Prevention Tips
-
-**For beginners:**
- Start with default settings
- Use the TUI interface first
- Test with small projects initially
- Keep Ollama running in background
-
-**For better results:**
- Be specific in search queries
- Use the glossary to understand terms
- Experiment with config settings on test projects first
- Use synthesis mode for quick answers, exploration for learning
-
-**Remember:** This is a learning tool! Don't be afraid to experiment and try different settings. The worst thing that can happen is you delete the `.mini-rag` directory and start over. 🚀
--- a/docs/TUI_GUIDE.md
+++ b/docs/TUI_GUIDE.md
@ -23,9 +23,8 @@ That's it! The TUI will guide you through everything.
 ### User Flow
 1. **Select Project** → Choose directory to search
 2. **Index Project** → Process files for search
-3. **Search Content** → Find what you need quickly
-4. **Explore Project** → Interactive AI-powered discovery (NEW!)
-5. **Configure System** → Customize search behavior
+3. **Search Content** → Find what you need
+4. **Explore Results** → See full context and files

 ## Main Menu Options

@ -93,10 +92,10 @@ That's it! The TUI will guide you through everything.
 - **Full content** - Up to 8 lines of actual code/text
 - **Continuation info** - How many more lines exist

-**Tips You'll Learn**:
- Verbose output with `--verbose` flag for debugging
- How search scoring works
- Finding the right search terms
+**Advanced Tips Shown**:
+- Enhanced search with `./rag-mini-enhanced`
+- Verbose output with `--verbose` flag
+- Context-aware search for related code

 **What You Learn**:
 - Semantic search vs text search (finds concepts, not just words)
@ -107,66 +106,11 @@ That's it! The TUI will guide you through everything.
 **CLI Commands Shown**:
 ```bash
 ./rag-mini search /path/to/project "authentication logic"
-./rag-mini search /path/to/project "user login" --top-k 10
+./rag-mini search /path/to/project "user login" --limit 10
+./rag-mini-enhanced context /path/to/project "login()"
 ```

-### 4. Explore Project (NEW!)
-
-**Purpose**: Interactive AI-powered discovery with conversation memory
-
-**What Makes Explore Different**:
- **Conversational**: Ask follow-up questions that build on previous answers
- **AI Reasoning**: Uses thinking mode for deeper analysis and explanations
- **Educational**: Perfect for understanding unfamiliar codebases
- **Context Aware**: Remembers what you've already discussed
-
-**Interactive Process**:
-1. **First Question Guidance**: Clear prompts with example questions
-2. **Starter Suggestions**: Random helpful questions to get you going
-3. **Natural Follow-ups**: Ask "why?", "how?", "show me more" naturally
-4. **Session Memory**: AI remembers your conversation context
-
-**Explore Mode Features**:
-
-**Quick Start Options**:
- **Option 1 - Help**: Show example questions and explore mode capabilities
- **Option 2 - Status**: Project information and current exploration session
- **Option 3 - Suggest**: Get a random starter question picked from 7 curated examples
-
-**Starter Questions** (randomly suggested):
- "What are the main components of this project?"
- "How is error handling implemented?"
- "Show me the authentication and security logic"
- "What are the key functions I should understand first?"
- "How does data flow through this system?"
- "What configuration options are available?"
- "Show me the most important files to understand"
-
-**Advanced Usage**:
- **Deep Questions**: "Why is this function slow?" "How does the security work?"
- **Code Analysis**: "Explain this algorithm" "What could go wrong here?"
- **Architecture**: "How do these components interact?" "What's the design pattern?"
- **Best Practices**: "Is this code following best practices?" "How would you improve this?"
-
-**What You Learn**:
- **Conversational AI**: How to have productive technical conversations with AI
- **Code Understanding**: Deep analysis capabilities beyond simple search
- **Context Building**: How conversation memory improves over time
- **Question Techniques**: Effective ways to explore unfamiliar code
-
-**CLI Commands Shown**:
-```bash
-./rag-mini explore /path/to/project    # Start interactive exploration
-```
-
-**Perfect For**:
- Understanding new codebases
- Code review and analysis
- Learning from existing projects
- Documenting complex systems
- Onboarding new team members
-
-### 5. View Status
+### 4. View Status

 **Purpose**: Check system health and project information

@ -195,61 +139,32 @@ That's it! The TUI will guide you through everything.
 ./rag-mini status /path/to/project
 ```

-### 6. Configuration Manager (ENHANCED!)
+### 5. Configuration

-**Purpose**: Interactive configuration with user-friendly options
+**Purpose**: View and understand system settings

-**New Interactive Features**:
- **Live Configuration Dashboard** - See current settings with clear status
- **Quick Configuration Options** - Change common settings without YAML editing
- **Guided Setup** - Explanations and presets for each option
- **Validation** - Input checking and helpful error messages
+**Configuration Display**:
+- **Current settings** - Chunk size, strategy, file patterns
+- **File location** - Where config is stored
+- **Setting explanations** - What each option does
+- **Quick actions** - View or edit config directly

-**Main Configuration Options**:
+**Key Settings Explained**:
+- **chunking.max_size** - How large each searchable piece is
+- **chunking.strategy** - Smart (semantic) vs simple (fixed size)
+- **files.exclude_patterns** - Skip certain files/directories
+- **embedding.preferred_method** - AI model preference
+- **search.default_limit** - How many results to show

-**1. Adjust Chunk Size**:
- **Presets**: Small (1000), Medium (2000), Large (3000), or custom
- **Guidance**: Performance vs accuracy explanations
- **Smart Validation**: Range checking and recommendations
-
-**2. Toggle Query Expansion**:
- **Educational Info**: Clear explanation of benefits and requirements  
- **Easy Toggle**: Simple on/off with confirmation
- **System Check**: Verifies Ollama availability for AI features
-
-**3. Configure Search Behavior**:
- **Result Count**: Adjust default number of search results (1-100)
- **BM25 Toggle**: Enable/disable keyword matching boost
- **Similarity Threshold**: Fine-tune match sensitivity (0.0-1.0)
-
-**4. View/Edit Configuration File**:
- **Full File Viewer**: Display complete config with syntax highlighting
- **Editor Instructions**: Commands for nano, vim, VS Code
- **YAML Help**: Format explanation and editing tips
-
-**5. Reset to Defaults**:
- **Safe Reset**: Confirmation before resetting all settings
- **Clear Explanations**: Shows what defaults will be restored
- **Backup Reminder**: Suggests saving current config first
-
-**6. Advanced Settings**:
- **File Filtering**: Min file size, exclude patterns (view only)
- **Performance Settings**: Batch sizes, streaming thresholds
- **LLM Preferences**: Model rankings and selection priorities
-
-**Key Settings Dashboard**:
- 📁 **Chunk size**: 2000 characters (with emoji indicators)
- 🧠 **Chunking strategy**: semantic
- 🔍 **Search results**: 10 results
- 📊 **Embedding method**: ollama
- 🚀 **Query expansion**: enabled/disabled
- ⚡ **LLM synthesis**: enabled/disabled
+**Interactive Options**:
+- **[V]iew config** - See full configuration file
+- **[E]dit path** - Get command to edit configuration

 **What You Learn**:
- **Configuration Impact**: How settings affect search quality and speed
- **Interactive YAML**: Easier than manual editing for beginners
- **Best Practices**: Recommended settings for different project types
- **System Understanding**: How all components work together
+- How configuration affects search quality
+- YAML configuration format
+- Which settings to adjust for different projects
+- Where to find advanced options

 **CLI Commands Shown**:
 ```bash
@ -257,13 +172,7 @@ cat /path/to/project/.mini-rag/config.yaml   # View config
 nano /path/to/project/.mini-rag/config.yaml  # Edit config
 ```

-**Perfect For**:
- Beginners who find YAML intimidating
- Quick adjustments without memorizing syntax
- Understanding what each setting actually does
- Safe experimentation with guided validation
-
-### 7. CLI Command Reference
+### 6. CLI Command Reference

 **Purpose**: Complete command reference for transitioning to CLI

--- a/examples/analyze_dependencies.py
+++ b/examples/analyze_dependencies.py
@ -4,110 +4,106 @@ Analyze FSS-Mini-RAG dependencies to determine what's safe to remove.
 """

 import ast
-from collections import defaultdict
+import os
 from pathlib import Path
-
+from collections import defaultdict

 def find_imports_in_file(file_path):
    """Find all imports in a Python file."""
    try:
-        with open(file_path, "r", encoding="utf-8") as f:
+        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
-
+        
        tree = ast.parse(content)
        imports = set()
-
+        
        for node in ast.walk(tree):
            if isinstance(node, ast.Import):
                for alias in node.names:
-                    imports.add(alias.name.split(".")[0])
+                    imports.add(alias.name.split('.')[0])
            elif isinstance(node, ast.ImportFrom):
                if node.module:
-                    module = node.module.split(".")[0]
+                    module = node.module.split('.')[0]
                    imports.add(module)
-
+        
        return imports
    except Exception as e:
        print(f"Error analyzing {file_path}: {e}")
        return set()

-
 def analyze_dependencies():
    """Analyze all dependencies in the project."""
    project_root = Path(__file__).parent
    mini_rag_dir = project_root / "mini_rag"
-
+    
    # Find all Python files
    python_files = []
    for file_path in mini_rag_dir.glob("*.py"):
        if file_path.name != "__pycache__":
            python_files.append(file_path)
-
+    
    # Analyze imports
    file_imports = {}
    internal_deps = defaultdict(set)
-
+    
    for file_path in python_files:
        imports = find_imports_in_file(file_path)
        file_imports[file_path.name] = imports
-
+        
        # Check for internal imports
        for imp in imports:
            if imp in [f.stem for f in python_files]:
                internal_deps[file_path.name].add(imp)
-
+    
    print("🔍 FSS-Mini-RAG Dependency Analysis")
    print("=" * 50)
-
+    
    # Show what each file imports
    print("\n📁 File Dependencies:")
    for filename, imports in file_imports.items():
        internal = [imp for imp in imports if imp in [f.stem for f in python_files]]
        if internal:
            print(f"   {filename} imports: {', '.join(internal)}")
-
+    
    # Show reverse dependencies (what depends on each file)
    reverse_deps = defaultdict(set)
    for file, deps in internal_deps.items():
        for dep in deps:
            reverse_deps[dep].add(file)
-
+    
    print("\n🔗 Reverse Dependencies (what uses each file):")
    all_modules = {f.stem for f in python_files}
-
+    
    for module in sorted(all_modules):
        users = reverse_deps.get(module, set())
        if users:
            print(f"   {module}.py is used by: {', '.join(users)}")
        else:
            print(f"   {module}.py is NOT imported by any other file")
-
+    
    # Safety analysis
    print("\n🛡️ Safety Analysis:")
-
+    
    # Files imported by __init__.py are definitely needed
-    init_imports = file_imports.get("__init__.py", set())
+    init_imports = file_imports.get('__init__.py', set())
    print(f"   Core modules (imported by __init__.py): {', '.join(init_imports)}")
-
+    
    # Files not used anywhere might be safe to remove
    unused_files = []
    for module in all_modules:
-        if module not in reverse_deps and module != "__init__":
+        if module not in reverse_deps and module != '__init__':
            unused_files.append(module)
-
+    
    if unused_files:
        print(f"   ⚠️ Potentially unused: {', '.join(unused_files)}")
        print("   ❗ Verify these aren't used by CLI or external scripts!")
-
+    
    # Check CLI usage
-    cli_files = ["cli.py", "enhanced_cli.py"]
+    cli_files = ['cli.py', 'enhanced_cli.py']
    for cli_file in cli_files:
        if cli_file in file_imports:
            cli_imports = file_imports[cli_file]
-            print(
-                f"   📋 {cli_file} imports: {', '.join([imp for imp in cli_imports if imp in all_modules])}"
-            )
-
+            print(f"   📋 {cli_file} imports: {', '.join([imp for imp in cli_imports if imp in all_modules])}")

 if __name__ == "__main__":
-    analyze_dependencies()
+    analyze_dependencies()
--- a/examples/basic_usage.py
+++ b/examples/basic_usage.py
@ -5,67 +5,64 @@ Shows how to index a project and search it programmatically.
 """

 from pathlib import Path
-
-from mini_rag import CodeEmbedder, CodeSearcher, ProjectIndexer
-
+from mini_rag import ProjectIndexer, CodeSearcher, CodeEmbedder

 def main():
    # Example project path - change this to your project
    project_path = Path(".")  # Current directory
-
+    
    print("=== FSS-Mini-RAG Basic Usage Example ===")
    print(f"Project: {project_path}")
-
+    
    # Initialize the embedding system
    print("\n1. Initializing embedding system...")
    embedder = CodeEmbedder()
    print(f"   Using: {embedder.get_embedding_info()['method']}")
-
-    # Initialize indexer and searcher
+    
+    # Initialize indexer and searcher  
    indexer = ProjectIndexer(project_path, embedder)
    searcher = CodeSearcher(project_path, embedder)
-
+    
    # Index the project
    print("\n2. Indexing project...")
    result = indexer.index_project()
-
+    
    print(f"   Files processed: {result.get('files_processed', 0)}")
    print(f"   Chunks created: {result.get('chunks_created', 0)}")
    print(f"   Time taken: {result.get('indexing_time', 0):.2f}s")
-
+    
    # Get index statistics
    print("\n3. Index statistics:")
    stats = indexer.get_stats()
    print(f"   Total files: {stats.get('total_files', 0)}")
    print(f"   Total chunks: {stats.get('total_chunks', 0)}")
    print(f"   Languages: {', '.join(stats.get('languages', []))}")
-
+    
    # Example searches
    queries = [
        "chunker function",
-        "embedding system",
+        "embedding system", 
        "search implementation",
        "file watcher",
-        "error handling",
+        "error handling"
    ]
-
+    
    print("\n4. Example searches:")
    for query in queries:
        print(f"\n   Query: '{query}'")
-        results = searcher.search(query, top_k=3)
-
+        results = searcher.search(query, limit=3)
+        
        if results:
            for i, result in enumerate(results, 1):
                print(f"      {i}. {result.file_path.name} (score: {result.score:.3f})")
                print(f"         Type: {result.chunk_type}")
                # Show first 60 characters of content
-                content_preview = result.content.replace("\n", " ")[:60]
+                content_preview = result.content.replace('\n', ' ')[:60]
                print(f"         Preview: {content_preview}...")
        else:
            print("      No results found")
-
+    
    print("\n=== Example Complete ===")

-
 if __name__ == "__main__":
-    main()
+    main()
--- a/examples/config-beginner.yaml
+++ b/examples/config-beginner.yaml
@ -1,73 +0,0 @@
-# 🚀 BEGINNER CONFIG - Simple & Reliable
-# Perfect for newcomers who want everything to "just work"
-# Copy this to your project: cp examples/config-beginner.yaml /path/to/project/.mini-rag/config.yaml
-
-#═══════════════════════════════════════════════════════════════════════
-# ✨ BEGINNER-FRIENDLY SETTINGS - No overwhelming options!
-#═══════════════════════════════════════════════════════════════════════
-
-# 📝 How to split your code files (keep it simple)
-chunking:
-  max_size: 2000          # Good size for most code (about 50 lines)
-  min_size: 150           # Skip tiny fragments
-  strategy: semantic      # Smart splitting (respects functions/classes)
-
-# 🌊 Handle large files without crashing
-streaming:
-  enabled: true           # Always keep this on
-  threshold_bytes: 1048576  # 1MB - good for most computers
-
-# 📁 Which files to include
-files:
-  min_file_size: 50       # Skip empty/tiny files
-  
-  # 🚫 Skip these folders (saves time and storage)
-  exclude_patterns:
-    - "node_modules/**"   # JavaScript packages
-    - ".git/**"          # Git history
-    - "__pycache__/**"   # Python cache
-    - "*.pyc"           # Python bytecode
-    - ".venv/**"        # Python virtual environments
-    - "build/**"        # Build artifacts
-    - "dist/**"         # Distribution files
-  
-  include_patterns:
-    - "**/*"             # Everything else
-
-# 🧠 Embeddings (the "AI fingerprints" of your code)
-embedding:
-  preferred_method: auto   # Try best method, fall back if needed - SAFEST
-  batch_size: 32          # Good balance of speed and memory usage
-
-# 🔍 Search behavior  
-search:
-  default_top_k: 10       # Show 10 results (good starting point)
-  enable_bm25: true       # Find exact word matches too
-  similarity_threshold: 0.1  # Pretty permissive (shows more results)
-  expand_queries: false   # Keep it simple for now
-
-# 🤖 AI explanations (optional but helpful)
-# 💡 WANT DIFFERENT LLM? See examples/config-llm-providers.yaml for OpenAI, Claude, etc.
-llm:
-  synthesis_model: auto         # Pick best available model
-  enable_synthesis: false       # Turn on manually with --synthesize
-  synthesis_temperature: 0.3    # Factual answers
-  cpu_optimized: true          # Good for computers without fancy graphics cards
-  enable_thinking: true        # Shows reasoning (great for learning!)
-  max_expansion_terms: 6       # Keep expansions focused
-
-#═══════════════════════════════════════════════════════════════════════
-# 🎯 WHAT THIS CONFIG DOES:
-# 
-# ✅ Works reliably across different systems
-# ✅ Good performance on modest hardware  
-# ✅ Balanced search results (not too few, not too many)
-# ✅ Safe defaults that won't crash your computer
-# ✅ AI features available but not overwhelming
-# 
-# 🚀 TO GET STARTED:
-# 1. Copy this file to your project: .mini-rag/config.yaml
-# 2. Index your project: ./rag-mini index /path/to/project
-# 3. Search: ./rag-mini search /path/to/project "your query"
-# 4. Try AI: ./rag-mini search /path/to/project "your query" --synthesize
-#═══════════════════════════════════════════════════════════════════════
--- a/examples/config-fast.yaml
+++ b/examples/config-fast.yaml
@ -1,105 +0,0 @@
-# ⚡ FAST CONFIG - Maximum Speed
-# When you need quick results and don't mind slightly lower quality
-# Perfect for: large projects, frequent searches, older computers
-
-#═══════════════════════════════════════════════════════════════════════
-# 🚀 SPEED-OPTIMIZED SETTINGS - Everything tuned for performance!
-#═══════════════════════════════════════════════════════════════════════
-
-# 📝 Chunking optimized for speed
-chunking:
-  max_size: 1500          # Smaller chunks = faster processing
-  min_size: 100           # More aggressive minimum
-  strategy: fixed         # Simple splitting (faster than semantic)
-
-# 🌊 More aggressive streaming for memory efficiency
-streaming:
-  enabled: true
-  threshold_bytes: 512000  # 512KB - process big files in smaller chunks
-
-# 📁 File filtering optimized for speed
-files:
-  min_file_size: 100      # Skip more tiny files
-  
-  # 🚫 Aggressive exclusions for speed
-  exclude_patterns:
-    - "node_modules/**"
-    - ".git/**"
-    - "__pycache__/**"
-    - "*.pyc"
-    - ".venv/**"
-    - "venv/**"  
-    - "build/**"
-    - "dist/**"
-    - "*.min.js"          # Skip minified files
-    - "*.min.css"         # Skip minified CSS
-    - "*.log"             # Skip log files
-    - "*.tmp"             # Skip temp files
-    - "target/**"         # Rust/Java build dirs
-    - ".next/**"          # Next.js build dir
-    - ".nuxt/**"          # Nuxt build dir
-  
-  include_patterns:
-    - "**/*.py"           # Focus on common code files only
-    - "**/*.js" 
-    - "**/*.ts"
-    - "**/*.jsx"
-    - "**/*.tsx"
-    - "**/*.java"
-    - "**/*.cpp"
-    - "**/*.c"
-    - "**/*.h"
-    - "**/*.rs"
-    - "**/*.go"
-    - "**/*.php"
-    - "**/*.rb"
-    - "**/*.md"
-
-# 🧠 Fastest embedding method
-embedding:
-  preferred_method: hash   # Instant embeddings (lower quality but very fast)
-  batch_size: 64          # Larger batches for efficiency
-
-# 🔍 Search optimized for speed
-search:
-  default_top_k: 5        # Fewer results = faster display
-  enable_bm25: false      # Skip keyword matching for speed
-  similarity_threshold: 0.2  # Higher threshold = fewer results to process
-  expand_queries: false   # No query expansion (much faster)
-
-# 🤖 Minimal AI for speed
-llm:
-  synthesis_model: qwen3:0.6b    # Smallest/fastest model
-  enable_synthesis: false        # Only use when explicitly requested
-  synthesis_temperature: 0.1     # Fast, factual responses
-  cpu_optimized: true           # Use lightweight models
-  enable_thinking: false        # Skip thinking process for speed
-  max_expansion_terms: 4        # Shorter expansions
-
-#═══════════════════════════════════════════════════════════════════════
-# ⚡ WHAT THIS CONFIG PRIORITIZES:
-# 
-# 🚀 Indexing speed - get up and running quickly
-# 🚀 Search speed - results in milliseconds  
-# 🚀 Memory efficiency - won't slow down your computer
-# 🚀 CPU efficiency - good for older/slower machines
-# 🚀 Storage efficiency - smaller index files
-# 
-# ⚖️ TRADE-OFFS:
-# ⚠️  Lower search quality (might miss some relevant results)
-# ⚠️  Less context in results (smaller chunks)
-# ⚠️  No query expansion (might need more specific search terms)
-# ⚠️  Basic embeddings (hash-based, not semantic)
-# 
-# 🎯 PERFECT FOR:
-# • Large codebases (>10k files)
-# • Older computers with limited resources
-# • When you know exactly what you're looking for
-# • Frequent, quick lookups
-# • CI/CD environments where speed matters
-# 
-# 🚀 TO USE THIS CONFIG:
-# 1. Copy to project: cp examples/config-fast.yaml .mini-rag/config.yaml
-# 2. Index: ./rag-mini index /path/to/project
-# 3. Enjoy lightning-fast searches! ⚡
-#═══════════════════════════════════════════════════════════════════════
--- a/examples/config-llm-providers.yaml
+++ b/examples/config-llm-providers.yaml
@ -1,233 +0,0 @@
-# 🌐 LLM PROVIDER ALTERNATIVES - OpenRouter, LM Studio, OpenAI & More
-# Educational guide showing how to configure different LLM providers
-# Copy sections you need to your main config.yaml
-
-#═════════════════════════════════════════════════════════════════════════════════
-# 🎯 QUICK PROVIDER SELECTION GUIDE:
-#
-# 🏠 LOCAL (Best Privacy, No Internet Needed):
-#   - Ollama: Great quality, easy setup, free
-#   - LM Studio: User-friendly GUI, works with many models
-#
-# ☁️ CLOUD (Powerful Models, Requires API Keys):
-#   - OpenRouter: Access to many models with one API
-#   - OpenAI: High quality, reliable, but more expensive
-#   - Anthropic: Excellent for code analysis
-#
-# 💰 BUDGET FRIENDLY:
-#   - OpenRouter (Qwen, Llama models): $0.10-0.50 per million tokens
-#   - Local Ollama/LM Studio: Completely free
-#
-# 🚀 PERFORMANCE:
-#   - Local: Limited by your hardware
-#   - Cloud: Fast and powerful, costs per use
-#═════════════════════════════════════════════════════════════════════════════════
-
-# Standard FSS-Mini-RAG settings (copy these to any config)
-chunking:
-  max_size: 2000
-  min_size: 150
-  strategy: semantic
-
-streaming:
-  enabled: true
-  threshold_bytes: 1048576
-
-files:
-  min_file_size: 50
-  exclude_patterns:
-    - "node_modules/**"
-    - ".git/**"
-    - "__pycache__/**"
-    - "*.pyc"
-    - ".venv/**"
-    - "build/**"
-    - "dist/**"
-  include_patterns:
-    - "**/*"
-
-embedding:
-  preferred_method: ollama    # Use Ollama for embeddings (works with all providers below)
-  ollama_model: nomic-embed-text
-  ollama_host: localhost:11434
-  batch_size: 32
-
-search:
-  default_top_k: 10
-  enable_bm25: true
-  similarity_threshold: 0.1
-  expand_queries: false
-
-#═════════════════════════════════════════════════════════════════════════════════
-# 🤖 LLM PROVIDER CONFIGURATIONS
-#═════════════════════════════════════════════════════════════════════════════════
-
-# 🏠 OPTION 1: OLLAMA (LOCAL) - Default and Recommended
-# ✅ Pros: Free, private, no API keys, good quality
-# ❌ Cons: Uses your computer's resources, limited by hardware
-llm:
-  provider: ollama                    # Use local Ollama
-  ollama_host: localhost:11434        # Default Ollama location
-  synthesis_model: qwen3:1.7b         # Good all-around model
-  # alternatives: qwen3:0.6b (faster), qwen2.5:3b (balanced), qwen3:4b (quality)
-  expansion_model: qwen3:1.7b
-  enable_synthesis: false
-  synthesis_temperature: 0.3
-  cpu_optimized: true
-  enable_thinking: true
-  max_expansion_terms: 8
-
-# 🖥️ OPTION 2: LM STUDIO (LOCAL) - User-Friendly Alternative
-# ✅ Pros: Easy GUI, drag-drop model installation, compatible with Ollama
-# ❌ Cons: Another app to manage, similar hardware limitations
-# 
-# SETUP STEPS:
-# 1. Download LM Studio from lmstudio.ai
-# 2. Install a model (try "microsoft/DialoGPT-medium" or "TheBloke/Llama-2-7B-Chat-GGML")
-# 3. Start local server in LM Studio (usually port 1234)
-# 4. Use this config:
-#
-# llm:
-#   provider: openai                   # LM Studio uses OpenAI-compatible API
-#   api_base: http://localhost:1234/v1 # LM Studio default port
-#   api_key: "not-needed"             # LM Studio doesn't require real API key
-#   synthesis_model: "any"            # Use whatever model you loaded in LM Studio
-#   expansion_model: "any"
-#   enable_synthesis: false
-#   synthesis_temperature: 0.3
-#   cpu_optimized: true
-#   enable_thinking: true
-#   max_expansion_terms: 8
-
-# ☁️ OPTION 3: OPENROUTER (CLOUD) - Many Models, One API
-# ✅ Pros: Access to many models, good prices, no local setup
-# ❌ Cons: Requires internet, costs money, less private
-#
-# SETUP STEPS:
-# 1. Sign up at openrouter.ai
-# 2. Get API key from dashboard
-# 3. Add credits to account ($5-10 goes a long way)
-# 4. Use this config:
-#
-# llm:
-#   provider: openai                   # OpenRouter uses OpenAI-compatible API
-#   api_base: https://openrouter.ai/api/v1
-#   api_key: "your-openrouter-api-key-here"  # Replace with your actual key
-#   synthesis_model: "meta-llama/llama-3.1-8b-instruct:free"  # Free tier model
-#   # alternatives: "openai/gpt-4o-mini" ($0.15/M), "anthropic/claude-3-haiku" ($0.25/M)
-#   expansion_model: "meta-llama/llama-3.1-8b-instruct:free"
-#   enable_synthesis: false
-#   synthesis_temperature: 0.3
-#   cpu_optimized: false              # Cloud models don't need CPU optimization
-#   enable_thinking: true
-#   max_expansion_terms: 8
-#   timeout: 30                       # Longer timeout for internet requests
-
-# 🏢 OPTION 4: OPENAI (CLOUD) - Premium Quality
-# ✅ Pros: Excellent quality, very reliable, fast
-# ❌ Cons: More expensive, requires OpenAI account
-#
-# SETUP STEPS:
-# 1. Sign up at platform.openai.com
-# 2. Add payment method (pay-per-use)
-# 3. Create API key in dashboard
-# 4. Use this config:
-#
-# llm:
-#   provider: openai
-#   api_key: "your-openai-api-key-here"      # Replace with your actual key
-#   synthesis_model: "gpt-4o-mini"           # Affordable option (~$0.15/M tokens)
-#   # alternatives: "gpt-4o" (premium, ~$2.50/M), "gpt-3.5-turbo" (budget, ~$0.50/M)
-#   expansion_model: "gpt-4o-mini"
-#   enable_synthesis: false
-#   synthesis_temperature: 0.3
-#   cpu_optimized: false
-#   enable_thinking: true
-#   max_expansion_terms: 8
-#   timeout: 30
-
-# 🧠 OPTION 5: ANTHROPIC CLAUDE (CLOUD) - Excellent for Code
-# ✅ Pros: Great at code analysis, very thoughtful responses
-# ❌ Cons: Premium pricing, separate API account needed
-#
-# SETUP STEPS:
-# 1. Sign up at console.anthropic.com
-# 2. Get API key and add credits
-# 3. Use this config:
-#
-# llm:
-#   provider: anthropic
-#   api_key: "your-anthropic-api-key-here"   # Replace with your actual key
-#   synthesis_model: "claude-3-haiku-20240307"  # Most affordable option
-#   # alternatives: "claude-3-sonnet-20240229" (balanced), "claude-3-opus-20240229" (premium)
-#   expansion_model: "claude-3-haiku-20240307"
-#   enable_synthesis: false
-#   synthesis_temperature: 0.3
-#   cpu_optimized: false
-#   enable_thinking: true
-#   max_expansion_terms: 8
-#   timeout: 30
-
-#═════════════════════════════════════════════════════════════════════════════════
-# 🧪 TESTING YOUR CONFIGURATION
-#═════════════════════════════════════════════════════════════════════════════════
-#
-# After setting up any provider, test with these commands:
-#
-# 1. Test basic search (no LLM needed):
-#    ./rag-mini search /path/to/project "test query"
-#
-# 2. Test LLM synthesis:
-#    ./rag-mini search /path/to/project "test query" --synthesize
-#
-# 3. Test query expansion:
-#    Enable expand_queries: true in search section and try:
-#    ./rag-mini search /path/to/project "auth"
-#
-# 4. Test thinking mode:
-#    ./rag-mini explore /path/to/project
-#    Then ask: "explain the authentication system"
-#
-#═════════════════════════════════════════════════════════════════════════════════
-# 💡 TROUBLESHOOTING
-#═════════════════════════════════════════════════════════════════════════════════
-#
-# ❌ "Connection refused" or "API error":
-#    - Local: Make sure Ollama/LM Studio is running
-#    - Cloud: Check API key and internet connection
-#
-# ❌ "Model not found":
-#    - Local: Install model with `ollama pull model-name`
-#    - Cloud: Check model name matches provider's API docs
-#
-# ❌ "Token limit exceeded" or expensive bills:
-#    - Use cheaper models like gpt-4o-mini or claude-haiku
-#    - Enable shorter contexts with max_size: 1500
-#
-# ❌ Slow responses:
-#    - Local: Try smaller models (qwen3:0.6b)
-#    - Cloud: Increase timeout or try different provider
-#
-# ❌ Poor quality results:
-#    - Try higher-quality models
-#    - Adjust synthesis_temperature (0.1 for factual, 0.5 for creative)
-#    - Enable expand_queries for better search coverage
-#
-#═════════════════════════════════════════════════════════════════════════════════
-# 📚 LEARN MORE
-#═════════════════════════════════════════════════════════════════════════════════
-# 
-# Provider Documentation:
-# - Ollama: https://ollama.ai/library (model catalog)
-# - LM Studio: https://lmstudio.ai/docs (getting started)
-# - OpenRouter: https://openrouter.ai/docs (API reference)
-# - OpenAI: https://platform.openai.com/docs (API docs)
-# - Anthropic: https://docs.anthropic.com/claude/reference (Claude API)
-#
-# Model Recommendations:
-# - Code Analysis: claude-3-sonnet, gpt-4o, llama3.1:8b
-# - Fast Responses: gpt-4o-mini, claude-haiku, qwen3:0.6b  
-# - Budget Friendly: OpenRouter free tier, local Ollama
-# - Best Privacy: Local Ollama or LM Studio only
-#
-#═════════════════════════════════════════════════════════════════════════════════
--- a/examples/config-quality.yaml
+++ b/examples/config-quality.yaml
@ -1,111 +0,0 @@
-# 💎 QUALITY CONFIG - Best Possible Results  
-# When you want the highest quality search and AI responses
-# Perfect for: learning new codebases, research, complex analysis
-
-#═══════════════════════════════════════════════════════════════════════
-# 🎯 QUALITY-OPTIMIZED SETTINGS - Everything tuned for best results!
-#═══════════════════════════════════════════════════════════════════════
-
-# 📝 Chunking for maximum context and quality
-chunking:
-  max_size: 3000          # Larger chunks = more context per result
-  min_size: 200           # Ensure substantial content per chunk
-  strategy: semantic      # Smart splitting that respects code structure
-
-# 🌊 Conservative streaming (favor quality over speed)
-streaming:
-  enabled: true
-  threshold_bytes: 2097152  # 2MB - less aggressive chunking
-
-# 📁 Comprehensive file inclusion
-files:
-  min_file_size: 20       # Include even small files (might contain important info)
-  
-  # 🎯 Minimal exclusions (include more content)
-  exclude_patterns:
-    - "node_modules/**"   # Still skip these (too much noise)
-    - ".git/**"          # Git history not useful for code search
-    - "__pycache__/**"   # Python bytecode
-    - "*.pyc"
-    - ".venv/**"
-    - "build/**"         # Compiled artifacts
-    - "dist/**"
-    # Note: We keep logs, docs, configs that might have useful context
-  
-  include_patterns:
-    - "**/*"             # Include everything not explicitly excluded
-
-# 🧠 Best embedding quality
-embedding:
-  preferred_method: ollama  # Highest quality embeddings (needs Ollama)
-  ollama_model: nomic-embed-text  # Excellent code understanding
-  ml_model: sentence-transformers/all-MiniLM-L6-v2  # Good fallback
-  batch_size: 16          # Smaller batches for stability
-
-# 🔍 Search optimized for comprehensive results
-search:
-  default_top_k: 15       # More results to choose from
-  enable_bm25: true       # Use both semantic and keyword matching
-  similarity_threshold: 0.05  # Very permissive (show more possibilities)
-  expand_queries: true    # Automatic query expansion for better recall
-
-# 🤖 High-quality AI analysis
-llm:
-  synthesis_model: auto         # Use best available model
-  enable_synthesis: true        # AI explanations by default
-  synthesis_temperature: 0.4    # Good balance of accuracy and insight
-  cpu_optimized: false         # Use powerful models if available
-  enable_thinking: true        # Show detailed reasoning process
-  max_expansion_terms: 10      # Comprehensive query expansion
-
-#═══════════════════════════════════════════════════════════════════════
-# 💎 WHAT THIS CONFIG MAXIMIZES:
-# 
-# 🎯 Search comprehensiveness - find everything relevant
-# 🎯 Result context - larger chunks with more information
-# 🎯 AI explanation quality - detailed, thoughtful analysis
-# 🎯 Query understanding - automatic expansion and enhancement
-# 🎯 Semantic accuracy - best embedding models available
-# 
-# ⚖️ TRADE-OFFS:
-# ⏳ Slower indexing (larger chunks, better embeddings)
-# ⏳ Slower searching (query expansion, more results)
-# 💾 More storage space (larger index, more files included)
-# 🧠 More memory usage (larger batches, bigger models)
-# ⚡ Higher CPU/GPU usage (better models)
-# 
-# 🎯 PERFECT FOR:
-# • Learning new, complex codebases
-# • Research and analysis tasks  
-# • When you need to understand WHY code works a certain way
-# • Finding subtle connections and patterns
-# • Code review and security analysis
-# • Academic or professional research
-# 
-# 💻 REQUIREMENTS:
-# • Ollama installed and running (ollama serve)
-# • At least one language model (ollama pull qwen3:1.7b)
-# • Decent computer specs (4GB+ RAM recommended)
-# • Patience for thorough analysis 😊
-# 
-# 🚀 TO USE THIS CONFIG:
-# 1. Install Ollama: curl -fsSL https://ollama.ai/install.sh | sh
-# 2. Start Ollama: ollama serve
-# 3. Install a model: ollama pull qwen3:1.7b
-# 4. Copy config: cp examples/config-quality.yaml .mini-rag/config.yaml  
-# 5. Index project: ./rag-mini index /path/to/project
-# 6. Enjoy comprehensive analysis: ./rag-mini explore /path/to/project
-#═══════════════════════════════════════════════════════════════════════
-
-# 🧪 ADVANCED QUALITY TUNING (optional):
-# 
-# For even better results, try these model combinations:
-# • ollama pull nomic-embed-text:latest  (best embeddings)
-# • ollama pull qwen3:1.7b              (good general model)
-# • ollama pull qwen3:4b                (excellent for analysis)
-# 
-# Or adjust these settings for your specific needs:
-# • similarity_threshold: 0.3   (more selective results)
-# • max_size: 4000             (even more context per result)  
-# • enable_thinking: false     (hide reasoning, show just answers)
-# • synthesis_temperature: 0.2 (more conservative AI responses)
--- a/examples/config.yaml
+++ b/examples/config.yaml
@ -1,145 +1,43 @@
-# FSS-Mini-RAG Configuration - Beginner-Friendly Edition
-# 
-# 🎯 QUICK START PRESETS:
-# - Keep defaults for most cases (recommended for beginners)  
-# - For large projects (>10k files): increase max_size to 3000
-# - For faster search: set similarity_threshold to 0.2
-# - For better results: enable expand_queries (but slower search)
-
-#═════════════════════════════════════════════════════════════════════════════════
-# 📝 CHUNKING: How we break up your code files for searching
-#═════════════════════════════════════════════════════════════════════════════════
-# Think of chunks as "bite-sized pieces" of your code that the system can search through.
-# Smaller chunks = more precise results but might miss context
-# Larger chunks = more context but might be less precise
+# FSS-Mini-RAG Configuration
+# Edit this file to customize indexing and search behavior
+# See docs/GETTING_STARTED.md for detailed explanations

+# Text chunking settings
 chunking:
-  max_size: 2000           # Maximum characters per chunk (2000 = ~50 lines of code)
-                          # 💡 ADJUST IF: Getting results that are too narrow/broad
-                          # Small projects: 1500 | Large projects: 3000 | Detailed analysis: 4000
-  
-  min_size: 150           # Minimum characters per chunk (150 = ~4-5 lines)
-                          # ⚠️ Don't go below 100 or you'll get fragments
-  
-  strategy: semantic      # How to split files into chunks
-                          # 'semantic': Smart splitting (respects functions, classes) - RECOMMENDED
-                          # 'fixed': Simple splitting (just cuts at size limits) - faster but less intelligent
-
-#═════════════════════════════════════════════════════════════════════════════════
-# 🌊 STREAMING: How we handle really big files  
-#═════════════════════════════════════════════════════════════════════════════════
-# Large files (like minified CSS or huge data files) get processed in smaller batches
-# to prevent your computer from running out of memory
+  max_size: 2000      # Maximum characters per chunk
+  min_size: 150       # Minimum characters per chunk
+  strategy: semantic  # 'semantic' (language-aware) or 'fixed'

+# Large file streaming settings
 streaming:
-  enabled: true                    # Always keep this true - prevents memory crashes
-  threshold_bytes: 1048576        # Files larger than 1MB use streaming (1MB = 1048576 bytes)
-                                  # 💡 ADJUST IF: Low memory computer = 512000 | High memory = 2097152
-
-#═════════════════════════════════════════════════════════════════════════════════
-# 📁 FILES: Which files to include/exclude from indexing
-#═════════════════════════════════════════════════════════════════════════════════
+  enabled: true
+  threshold_bytes: 1048576  # Files larger than this use streaming (1MB)

+# File processing settings
 files:
-  min_file_size: 50              # Skip tiny files (50 bytes = ~1 line of code)
-                                 # 💡 REASON: Tiny files usually aren't useful for searching
-  
-  # 🚫 EXCLUDE PATTERNS: Files/folders we always skip (saves time and space)
+  min_file_size: 50        # Skip files smaller than this
  exclude_patterns:
-    - "node_modules/**"          # JavaScript dependencies (huge and not your code)
-    - ".git/**"                  # Git history (not useful for code search)
-    - "__pycache__/**"           # Python bytecode (generated files)
-    - "*.pyc"                    # More Python bytecode
-    - ".venv/**"                 # Python virtual environments
-    - "venv/**"                  # More virtual environments  
-    - "build/**"                 # Compiled output (not source code)
-    - "dist/**"                  # Distribution files
-    # 💡 ADD YOUR OWN: Add patterns like "logs/**" or "*.tmp"
-  
+    - "node_modules/**"
+    - ".git/**"
+    - "__pycache__/**"
+    - "*.pyc"
+    - ".venv/**"
+    - "venv/**"
+    - "build/**"
+    - "dist/**"
  include_patterns:
-    - "**/*"                     # Include everything else by default
-                                 # 💡 CUSTOMIZE: Could be ["**/*.py", "**/*.js"] for only Python/JS
-
-#═════════════════════════════════════════════════════════════════════════════════
-# 🧠 EMBEDDINGS: How we turn your code into searchable "vectors" 
-#═════════════════════════════════════════════════════════════════════════════════
-# Embeddings are like "fingerprints" of your code that help find similar content
-# Don't worry about the technical details - the defaults work great!
+    - "**/*"                  # Include all files by default

+# Embedding generation settings
 embedding:
-  preferred_method: ollama         # Which system to use for creating embeddings
-                                  # 'ollama': Best quality (needs Ollama installed) - RECOMMENDED
-                                  # 'ml': Good quality (downloads models automatically)  
-                                  # 'hash': Basic quality (works without internet)
-                                  # 'auto': Try ollama, fall back to ml, then hash - SAFEST CHOICE
-  
-  ollama_model: nomic-embed-text   # Which Ollama model to use (this one is excellent)
-  ollama_host: localhost:11434     # Where to find Ollama (don't change unless you know why)
-  
-  ml_model: sentence-transformers/all-MiniLM-L6-v2  # Backup model (small and fast)
-  
-  batch_size: 32                   # How many chunks to process at once
-                                  # 💡 ADJUST IF: Slow computer = 16 | Fast computer = 64
-
-#═════════════════════════════════════════════════════════════════════════════════
-# 🔍 SEARCH: How the system finds and ranks results
-#═════════════════════════════════════════════════════════════════════════════════
+  preferred_method: ollama     # 'ollama', 'ml', 'hash', or 'auto'
+  ollama_model: nomic-embed-text
+  ollama_host: localhost:11434
+  ml_model: sentence-transformers/all-MiniLM-L6-v2
+  batch_size: 32               # Embeddings processed per batch

+# Search behavior settings
 search:
-  default_top_k: 10                # How many search results to show by default
-                                  # 💡 MORE RESULTS: 15-20 | FASTER SEARCH: 5-8
-  
-  enable_bm25: true               # Also use keyword matching (like Google search)
-                                  # 💡 EFFECT: Finds exact word matches even if semantically different
-                                  # Keep true unless getting too many irrelevant results
-  
-  similarity_threshold: 0.1       # Minimum "similarity score" to show results (0.0-1.0)
-                                  # 💡 HIGHER = fewer but more relevant results
-                                  # Picky: 0.3 | Balanced: 0.1 | Show everything: 0.05
-  
-  expand_queries: false           # Automatically add related search terms
-                                  # 💡 EFFECT: "auth" becomes "auth authentication login user"
-                                  # Better results but slower - TUI enables this automatically
-
-#═════════════════════════════════════════════════════════════════════════════════
-# 🤖 LLM: Settings for the AI that explains and synthesizes results
-#═════════════════════════════════════════════════════════════════════════════════
-# The LLM (Large Language Model) reads your search results and explains them in plain English
-
-llm:
-  ollama_host: localhost:11434     # Where to find Ollama (don't change unless you know why)
-  
-  synthesis_model: auto           # Which AI model to use for explanations
-                                  # 'auto': Picks best available model - RECOMMENDED
-                                  # 'qwen3:0.6b': Ultra-fast, good for CPU-only computers
-                                  # 'qwen3:4b': Slower but more detailed explanations
-  
-  expansion_model: auto           # Model for query expansion (usually same as synthesis)
-  
-  max_expansion_terms: 8          # How many extra terms to add to expanded queries
-                                  # 💡 MORE TERMS = broader search but potentially less focused
-  
-  enable_synthesis: false         # Turn on AI explanations by default
-                                  # 💡 SET TO TRUE: If you want every search to include explanations
-                                  # (You can always use --synthesize flag when you want it)
-  
-  synthesis_temperature: 0.3      # How "creative" the AI explanations are (0.0-1.0)
-                                  # 💡 Lower = more factual | Higher = more creative
-                                  # Code analysis: 0.1-0.3 | Creative writing: 0.7-0.9
-  
-  cpu_optimized: true            # Prefer lightweight models for computers without graphics cards
-                                  # 💡 DISABLE IF: You have a powerful GPU and want highest quality
-  
-  enable_thinking: true          # Let AI "think out loud" for complex questions
-                                  # 💡 EFFECT: Shows reasoning process, better for learning/debugging
-
-#═════════════════════════════════════════════════════════════════════════════════
-# 🎯 QUICK TROUBLESHOOTING:
-# 
-# Search returns nothing? → Lower similarity_threshold to 0.05
-# Search too slow? → Set expand_queries: false and batch_size: 16  
-# Results not detailed enough? → Increase max_size to 3000
-# Getting weird fragments? → Check min_size is at least 150
-# AI not working? → Make sure Ollama is running: `ollama serve`
-# Out of memory errors? → Decrease batch_size to 16 and lower threshold_bytes
-#═════════════════════════════════════════════════════════════════════════════════
+  default_limit: 10           # Default number of results
+  enable_bm25: true             # Enable keyword matching boost
+  similarity_threshold: 0.1        # Minimum similarity score
--- a/examples/smart_config_suggestions.py
+++ b/examples/smart_config_suggestions.py
@ -5,108 +5,102 @@ Analyzes the indexed data to suggest optimal settings.
 """

 import json
-import sys
-from collections import Counter
 from pathlib import Path
-
+from collections import defaultdict, Counter
+import sys

 def analyze_project_patterns(manifest_path: Path):
    """Analyze project patterns and suggest optimizations."""
-
+    
    with open(manifest_path) as f:
        manifest = json.load(f)
-
-    files = manifest.get("files", {})
-
+    
+    files = manifest.get('files', {})
+    
    print("🔍 FSS-Mini-RAG Smart Tuning Analysis")
    print("=" * 50)
-
+    
    # Analyze file types and chunking efficiency
    languages = Counter()
    chunk_efficiency = []
    large_files = []
    small_files = []
-
+    
    for filepath, info in files.items():
-        lang = info.get("language", "unknown")
+        lang = info.get('language', 'unknown')
        languages[lang] += 1
-
-        size = info.get("size", 0)
-        chunks = info.get("chunks", 1)
-
+        
+        size = info.get('size', 0)
+        chunks = info.get('chunks', 1)
+        
        chunk_efficiency.append(chunks / max(1, size / 1000))  # chunks per KB
-
+        
        if size > 10000:  # >10KB
            large_files.append((filepath, size, chunks))
        elif size < 500:  # <500B
            small_files.append((filepath, size, chunks))
-
+    
    # Analysis results
    total_files = len(files)
-    total_chunks = sum(info.get("chunks", 1) for info in files.values())
+    total_chunks = sum(info.get('chunks', 1) for info in files.values())
    avg_chunks_per_file = total_chunks / max(1, total_files)
-
-    print("📊 Current Stats:")
+    
+    print(f"📊 Current Stats:")
    print(f"   Files: {total_files}")
    print(f"   Chunks: {total_chunks}")
    print(f"   Avg chunks/file: {avg_chunks_per_file:.1f}")
-
-    print("\n🗂️ Language Distribution:")
+    
+    print(f"\n🗂️ Language Distribution:")
    for lang, count in languages.most_common(10):
        pct = 100 * count / total_files
        print(f"   {lang}: {count} files ({pct:.1f}%)")
-
-    print("\n💡 Smart Optimization Suggestions:")
-
+    
+    print(f"\n💡 Smart Optimization Suggestions:")
+    
    # Suggestion 1: Language-specific chunking
-    if languages["python"] > 10:
-        print("✨ Python Optimization:")
-        print(
-            f"   - Use function-level chunking (detected {languages['python']} Python files)"
-        )
-        print("   - Increase chunk size to 3000 chars for Python (better context)")
-
-    if languages["markdown"] > 5:
-        print("✨ Markdown Optimization:")
+    if languages['python'] > 10:
+        print(f"✨ Python Optimization:")
+        print(f"   - Use function-level chunking (detected {languages['python']} Python files)")
+        print(f"   - Increase chunk size to 3000 chars for Python (better context)")
+    
+    if languages['markdown'] > 5:
+        print(f"✨ Markdown Optimization:")
        print(f"   - Use header-based chunking (detected {languages['markdown']} MD files)")
-        print("   - Keep sections together for better search relevance")
-
-    if languages["json"] > 20:
-        print("✨ JSON Optimization:")
+        print(f"   - Keep sections together for better search relevance")
+    
+    if languages['json'] > 20:
+        print(f"✨ JSON Optimization:")
        print(f"   - Consider object-level chunking (detected {languages['json']} JSON files)")
-        print("   - Might want to exclude large config JSONs")
-
+        print(f"   - Might want to exclude large config JSONs")
+    
    # Suggestion 2: File size optimization
    if large_files:
-        print("\n📈 Large File Optimization:")
+        print(f"\n📈 Large File Optimization:")
        print(f"   Found {len(large_files)} files >10KB:")
-        for filepath, size, chunks in sorted(large_files, key=lambda x: x[1], reverse=True)[
-            :3
-        ]:
+        for filepath, size, chunks in sorted(large_files, key=lambda x: x[1], reverse=True)[:3]:
            kb = size / 1024
            print(f"   - {filepath}: {kb:.1f}KB → {chunks} chunks")
        if len(large_files) > 5:
-            print("   💡 Consider streaming threshold: 5KB (current: 1MB)")
-
+            print(f"   💡 Consider streaming threshold: 5KB (current: 1MB)")
+    
    if small_files and len(small_files) > total_files * 0.3:
-        print("\n📉 Small File Optimization:")
+        print(f"\n📉 Small File Optimization:")
        print(f"   {len(small_files)} files <500B might not need chunking")
-        print("   💡 Consider: combine small files or skip tiny ones")
-
+        print(f"   💡 Consider: combine small files or skip tiny ones")
+    
    # Suggestion 3: Search optimization
    avg_efficiency = sum(chunk_efficiency) / len(chunk_efficiency)
-    print("\n🔍 Search Optimization:")
+    print(f"\n🔍 Search Optimization:")
    if avg_efficiency < 0.5:
-        print("   💡 Chunks are large relative to files - consider smaller chunks")
+        print(f"   💡 Chunks are large relative to files - consider smaller chunks")
        print(f"   💡 Current: {avg_chunks_per_file:.1f} chunks/file, try 2-3 chunks/file")
    elif avg_efficiency > 2:
-        print("   💡 Many small chunks - consider larger chunk size")
-        print("   💡 Reduce chunk overhead with 2000-4000 char chunks")
-
+        print(f"   💡 Many small chunks - consider larger chunk size")
+        print(f"   💡 Reduce chunk overhead with 2000-4000 char chunks")
+    
    # Suggestion 4: Smart defaults
-    print("\n⚙️ Recommended Config Updates:")
-    print(
-        """{{
+    print(f"\n⚙️ Recommended Config Updates:")
+    print(f"""{{
  "chunking": {{
    "max_size": {3000 if languages['python'] > languages['markdown'] else 2000},
    "min_size": 200,
@ -121,18 +115,16 @@ def analyze_project_patterns(manifest_path: Path):
    "skip_small_files": {500 if len(small_files) > total_files * 0.3 else 0},
    "streaming_threshold_kb": {5 if len(large_files) > 5 else 1024}
  }}
-}}"""
-    )
-
+}}""")

 if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage: python smart_config_suggestions.py <path_to_manifest.json>")
        sys.exit(1)
-
+    
    manifest_path = Path(sys.argv[1])
    if not manifest_path.exists():
        print(f"Manifest not found: {manifest_path}")
        sys.exit(1)
-
-    analyze_project_patterns(manifest_path)
+        
+    analyze_project_patterns(manifest_path)
--- a/install.ps1
+++ b/install.ps1
@ -1,320 +0,0 @@
-# FSS-Mini-RAG Installation Script for Windows PowerShell
-# Usage: iwr https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.ps1 -UseBasicParsing | iex
-
-# Requires -Version 5.1
-param(
-    [switch]$Force = $false,
-    [switch]$Quiet = $false
-)
-
-# Configuration
-$PackageName = "fss-mini-rag"
-$CommandName = "rag-mini"
-$ErrorActionPreference = "Stop"
-
-# Colors for output
-$Red = [System.ConsoleColor]::Red
-$Green = [System.ConsoleColor]::Green
-$Yellow = [System.ConsoleColor]::Yellow
-$Blue = [System.ConsoleColor]::Blue
-$Cyan = [System.ConsoleColor]::Cyan
-
-function Write-ColoredOutput {
-    param(
-        [string]$Message,
-        [System.ConsoleColor]$Color = [System.ConsoleColor]::White,
-        [string]$Prefix = ""
-    )
-    
-    if (-not $Quiet) {
-        $originalColor = $Host.UI.RawUI.ForegroundColor
-        $Host.UI.RawUI.ForegroundColor = $Color
-        Write-Host "$Prefix$Message"
-        $Host.UI.RawUI.ForegroundColor = $originalColor
-    }
-}
-
-function Write-Header {
-    if ($Quiet) { return }
-    
-    Write-ColoredOutput "████████╗██╗   ██╗██████╗ " -Color $Cyan
-    Write-ColoredOutput "██╔══██║██║   ██║██╔══██╗" -Color $Cyan
-    Write-ColoredOutput "██████╔╝██║   ██║██████╔╝" -Color $Cyan
-    Write-ColoredOutput "██╔══██╗██║   ██║██╔══██╗" -Color $Cyan
-    Write-ColoredOutput "██║  ██║╚██████╔╝██║  ██║" -Color $Cyan
-    Write-ColoredOutput "╚═╝  ╚═╝ ╚═════╝ ╚═╝  ╚═╝" -Color $Cyan
-    Write-Host ""
-    Write-ColoredOutput "FSS-Mini-RAG Installation Script" -Color $Blue
-    Write-ColoredOutput "Educational RAG that actually works!" -Color $Yellow
-    Write-Host ""
-}
-
-function Write-Log {
-    param([string]$Message)
-    Write-ColoredOutput $Message -Color $Green -Prefix "[INFO] "
-}
-
-function Write-Warning {
-    param([string]$Message)
-    Write-ColoredOutput $Message -Color $Yellow -Prefix "[WARN] "
-}
-
-function Write-Error {
-    param([string]$Message)
-    Write-ColoredOutput $Message -Color $Red -Prefix "[ERROR] "
-    exit 1
-}
-
-function Test-SystemRequirements {
-    Write-Log "Checking system requirements..."
-    
-    # Check PowerShell version
-    $psVersion = $PSVersionTable.PSVersion
-    if ($psVersion.Major -lt 5) {
-        Write-Error "PowerShell 5.1 or later is required. Found version: $($psVersion.ToString())"
-    }
-    Write-Log "PowerShell $($psVersion.ToString()) detected ✓"
-    
-    # Check if Python 3.8+ is available
-    try {
-        $pythonPath = (Get-Command python -ErrorAction SilentlyContinue).Source
-        if (-not $pythonPath) {
-            $pythonPath = (Get-Command python3 -ErrorAction SilentlyContinue).Source
-        }
-        
-        if (-not $pythonPath) {
-            Write-Error "Python 3 is required but not found. Please install Python 3.8 or later from python.org"
-        }
-        
-        # Check Python version
-        $pythonVersionOutput = & python -c "import sys; print('.'.join(map(str, sys.version_info[:3])))" 2>$null
-        if (-not $pythonVersionOutput) {
-            $pythonVersionOutput = & python3 -c "import sys; print('.'.join(map(str, sys.version_info[:3])))" 2>$null
-        }
-        
-        if (-not $pythonVersionOutput) {
-            Write-Error "Unable to determine Python version"
-        }
-        
-        # Parse version and check if >= 3.8
-        $versionParts = $pythonVersionOutput.Split('.')
-        $majorVersion = [int]$versionParts[0]
-        $minorVersion = [int]$versionParts[1]
-        
-        if ($majorVersion -lt 3 -or ($majorVersion -eq 3 -and $minorVersion -lt 8)) {
-            Write-Error "Python $pythonVersionOutput detected, but Python 3.8+ is required"
-        }
-        
-        Write-Log "Python $pythonVersionOutput detected ✓"
-        
-        # Store python command for later use
-        $script:PythonCommand = if (Get-Command python -ErrorAction SilentlyContinue) { "python" } else { "python3" }
-        
-    } catch {
-        Write-Error "Failed to check Python installation: $($_.Exception.Message)"
-    }
-}
-
-function Install-UV {
-    if (Get-Command uv -ErrorAction SilentlyContinue) {
-        Write-Log "uv is already installed ✓"
-        return $true
-    }
-    
-    Write-Log "Installing uv (fast Python package manager)..."
-    
-    try {
-        # Install uv using the official Windows installer
-        $uvInstaller = Invoke-WebRequest -Uri "https://astral.sh/uv/install.ps1" -UseBasicParsing
-        Invoke-Expression $uvInstaller.Content
-        
-        # Refresh environment to pick up new PATH
-        $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
-        
-        if (Get-Command uv -ErrorAction SilentlyContinue) {
-            Write-Log "uv installed successfully ✓"
-            return $true
-        } else {
-            Write-Warning "uv installation may not be in PATH. Falling back to pip method."
-            return $false
-        }
-    } catch {
-        Write-Warning "uv installation failed: $($_.Exception.Message). Falling back to pip method."
-        return $false
-    }
-}
-
-function Install-WithUV {
-    Write-Log "Installing $PackageName with uv..."
-    
-    try {
-        & uv tool install $PackageName
-        if ($LASTEXITCODE -eq 0) {
-            Write-Log "$PackageName installed successfully with uv ✓"
-            return $true
-        } else {
-            Write-Warning "uv installation failed. Falling back to pip method."
-            return $false
-        }
-    } catch {
-        Write-Warning "uv installation failed: $($_.Exception.Message). Falling back to pip method."
-        return $false
-    }
-}
-
-function Install-WithPipx {
-    # Check if pipx is available
-    if (-not (Get-Command pipx -ErrorAction SilentlyContinue)) {
-        Write-Log "Installing pipx..."
-        try {
-            & $script:PythonCommand -m pip install --user pipx
-            & $script:PythonCommand -m pipx ensurepath
-            
-            # Refresh PATH
-            $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
-        } catch {
-            Write-Warning "Failed to install pipx: $($_.Exception.Message). Falling back to pip method."
-            return $false
-        }
-    }
-    
-    if (Get-Command pipx -ErrorAction SilentlyContinue) {
-        Write-Log "Installing $PackageName with pipx..."
-        try {
-            & pipx install $PackageName
-            if ($LASTEXITCODE -eq 0) {
-                Write-Log "$PackageName installed successfully with pipx ✓"
-                return $true
-            } else {
-                Write-Warning "pipx installation failed. Falling back to pip method."
-                return $false
-            }
-        } catch {
-            Write-Warning "pipx installation failed: $($_.Exception.Message). Falling back to pip method."
-            return $false
-        }
-    } else {
-        Write-Warning "pipx not available. Falling back to pip method."
-        return $false
-    }
-}
-
-function Install-WithPip {
-    Write-Log "Installing $PackageName with pip..."
-    
-    try {
-        & $script:PythonCommand -m pip install --user $PackageName
-        if ($LASTEXITCODE -eq 0) {
-            Write-Log "$PackageName installed successfully with pip --user ✓"
-            
-            # Add Scripts directory to PATH if not already there
-            $scriptsPath = & $script:PythonCommand -c "import site; print(site.getusersitepackages().replace('site-packages', 'Scripts'))"
-            $currentPath = $env:Path
-            
-            if ($currentPath -notlike "*$scriptsPath*") {
-                Write-Warning "Adding $scriptsPath to PATH..."
-                $newPath = "$scriptsPath;$currentPath"
-                [System.Environment]::SetEnvironmentVariable("Path", $newPath, "User")
-                $env:Path = $newPath
-            }
-            
-            return $true
-        } else {
-            Write-Error "Failed to install $PackageName with pip."
-        }
-    } catch {
-        Write-Error "Failed to install $PackageName with pip: $($_.Exception.Message)"
-    }
-}
-
-function Test-Installation {
-    Write-Log "Verifying installation..."
-    
-    # Refresh PATH
-    $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
-    
-    # Check if command is available
-    if (Get-Command $CommandName -ErrorAction SilentlyContinue) {
-        Write-Log "$CommandName command is available ✓"
-        
-        # Test the command
-        try {
-            & $CommandName --help > $null 2>&1
-            if ($LASTEXITCODE -eq 0) {
-                Write-Log "Installation verified successfully! ✅"
-                return $true
-            } else {
-                Write-Warning "Command exists but may have issues."
-                return $false
-            }
-        } catch {
-            Write-Warning "Command exists but may have issues."
-            return $false
-        }
-    } else {
-        Write-Warning "$CommandName command not found in PATH."
-        Write-Warning "You may need to restart your PowerShell session or reboot."
-        return $false
-    }
-}
-
-function Write-Usage {
-    if ($Quiet) { return }
-    
-    Write-Host ""
-    Write-ColoredOutput "🎉 Installation complete!" -Color $Green
-    Write-Host ""
-    Write-ColoredOutput "Quick Start:" -Color $Blue
-    Write-ColoredOutput "  # Initialize your project" -Color $Cyan
-    Write-Host "  $CommandName init"
-    Write-Host ""
-    Write-ColoredOutput "  # Search your codebase" -Color $Cyan
-    Write-Host "  $CommandName search `"authentication logic`""
-    Write-Host ""
-    Write-ColoredOutput "  # Get help" -Color $Cyan
-    Write-Host "  $CommandName --help"
-    Write-Host ""
-    Write-ColoredOutput "Documentation: " -Color $Blue -NoNewline
-    Write-Host "https://github.com/FSSCoding/Fss-Mini-Rag"
-    Write-Host ""
-    
-    if (-not (Get-Command $CommandName -ErrorAction SilentlyContinue)) {
-        Write-ColoredOutput "Note: If the command is not found, restart PowerShell or reboot Windows." -Color $Yellow
-        Write-Host ""
-    }
-}
-
-# Main execution
-function Main {
-    Write-Header
-    
-    # Check system requirements
-    Test-SystemRequirements
-    
-    # Try installation methods in order of preference
-    $installationMethod = ""
-    
-    if ((Install-UV) -and (Install-WithUV)) {
-        $installationMethod = "uv ✨"
-    } elseif (Install-WithPipx) {
-        $installationMethod = "pipx 📦"
-    } else {
-        Install-WithPip
-        $installationMethod = "pip 🐍"
-    }
-    
-    Write-Log "Installation method: $installationMethod"
-    
-    # Verify installation
-    if (Test-Installation) {
-        Write-Usage
-    } else {
-        Write-Warning "Installation completed but verification failed. The tool may still work after restarting PowerShell."
-        Write-Usage
-    }
-}
-
-# Run if not being dot-sourced
-if ($MyInvocation.InvocationName -ne '.') {
-    Main
-}
--- a/install.sh
+++ b/install.sh
@ -1,238 +0,0 @@
-#!/usr/bin/env bash
-# FSS-Mini-RAG Installation Script for Linux/macOS
-# Usage: curl -fsSL https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.sh | bash
-
-set -euo pipefail
-
-# Colors for output
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-CYAN='\033[0;36m'
-NC='\033[0m' # No Color
-
-# Configuration
-PACKAGE_NAME="fss-mini-rag"
-COMMAND_NAME="rag-mini"
-
-print_header() {
-    echo -e "${CYAN}"
-    echo "████████╗██╗   ██╗██████╗ "
-    echo "██╔══██║██║   ██║██╔══██╗"
-    echo "██████╔╝██║   ██║██████╔╝"
-    echo "██╔══██╗██║   ██║██╔══██╗"
-    echo "██║  ██║╚██████╔╝██║  ██║"
-    echo "╚═╝  ╚═╝ ╚═════╝ ╚═╝  ╚═╝"
-    echo -e "${NC}"
-    echo -e "${BLUE}FSS-Mini-RAG Installation Script${NC}"
-    echo -e "${YELLOW}Educational RAG that actually works!${NC}"
-    echo
-}
-
-log() {
-    echo -e "${GREEN}[INFO]${NC} $1"
-}
-
-warn() {
-    echo -e "${YELLOW}[WARN]${NC} $1"
-}
-
-error() {
-    echo -e "${RED}[ERROR]${NC} $1"
-    exit 1
-}
-
-check_system() {
-    log "Checking system requirements..."
-    
-    # Check if we're on a supported platform
-    case "$(uname -s)" in
-        Darwin*) PLATFORM="macOS" ;;
-        Linux*)  PLATFORM="Linux" ;;
-        *) error "Unsupported platform: $(uname -s). This script supports Linux and macOS only." ;;
-    esac
-    
-    log "Platform: $PLATFORM"
-    
-    # Check if Python 3.8+ is available
-    if ! command -v python3 &> /dev/null; then
-        error "Python 3 is required but not installed. Please install Python 3.8 or later."
-    fi
-    
-    # Check Python version
-    python_version=$(python3 -c "import sys; print('.'.join(map(str, sys.version_info[:2])))")
-    required_version="3.8"
-    
-    if ! python3 -c "import sys; exit(0 if sys.version_info >= (3,8) else 1)" 2>/dev/null; then
-        error "Python ${python_version} detected, but Python ${required_version}+ is required."
-    fi
-    
-    log "Python ${python_version} detected ✓"
-}
-
-install_uv() {
-    if command -v uv &> /dev/null; then
-        log "uv is already installed ✓"
-        return
-    fi
-    
-    log "Installing uv (fast Python package manager)..."
-    
-    # Install uv using the official installer
-    if command -v curl &> /dev/null; then
-        curl -LsSf https://astral.sh/uv/install.sh | sh
-    elif command -v wget &> /dev/null; then
-        wget -qO- https://astral.sh/uv/install.sh | sh
-    else
-        warn "Neither curl nor wget available. Falling back to pip installation method."
-        return 1
-    fi
-    
-    # Add uv to PATH for current session
-    export PATH="$HOME/.local/bin:$PATH"
-    
-    if command -v uv &> /dev/null; then
-        log "uv installed successfully ✓"
-        return 0
-    else
-        warn "uv installation may not be in PATH. Falling back to pip method."
-        return 1
-    fi
-}
-
-install_with_uv() {
-    log "Installing ${PACKAGE_NAME} with uv..."
-    
-    # Install using uv tool install
-    if uv tool install "$PACKAGE_NAME"; then
-        log "${PACKAGE_NAME} installed successfully with uv ✓"
-        return 0
-    else
-        warn "uv installation failed. Falling back to pip method."
-        return 1
-    fi
-}
-
-install_with_pipx() {
-    if ! command -v pipx &> /dev/null; then
-        log "Installing pipx..."
-        python3 -m pip install --user pipx
-        python3 -m pipx ensurepath
-        
-        # Add pipx to PATH for current session
-        export PATH="$HOME/.local/bin:$PATH"
-    fi
-    
-    if command -v pipx &> /dev/null; then
-        log "Installing ${PACKAGE_NAME} with pipx..."
-        if pipx install "$PACKAGE_NAME"; then
-            log "${PACKAGE_NAME} installed successfully with pipx ✓"
-            return 0
-        else
-            warn "pipx installation failed. Falling back to pip method."
-            return 1
-        fi
-    else
-        warn "pipx not available. Falling back to pip method."
-        return 1
-    fi
-}
-
-install_with_pip() {
-    log "Installing ${PACKAGE_NAME} with pip (system-wide)..."
-    
-    # Try pip install with --user first
-    if python3 -m pip install --user "$PACKAGE_NAME"; then
-        log "${PACKAGE_NAME} installed successfully with pip --user ✓"
-        
-        # Ensure ~/.local/bin is in PATH
-        local_bin="$HOME/.local/bin"
-        if [[ ":$PATH:" != *":$local_bin:"* ]]; then
-            warn "Adding $local_bin to PATH..."
-            echo 'export PATH="$HOME/.local/bin:$PATH"' >> "$HOME/.bashrc"
-            if [ -f "$HOME/.zshrc" ]; then
-                echo 'export PATH="$HOME/.local/bin:$PATH"' >> "$HOME/.zshrc"
-            fi
-            export PATH="$local_bin:$PATH"
-        fi
-        
-        return 0
-    else
-        error "Failed to install ${PACKAGE_NAME} with pip. Please check your Python setup."
-    fi
-}
-
-verify_installation() {
-    log "Verifying installation..."
-    
-    # Check if command is available
-    if command -v "$COMMAND_NAME" &> /dev/null; then
-        log "${COMMAND_NAME} command is available ✓"
-        
-        # Test the command
-        if $COMMAND_NAME --help &> /dev/null; then
-            log "Installation verified successfully! ✅"
-            return 0
-        else
-            warn "Command exists but may have issues."
-            return 1
-        fi
-    else
-        warn "${COMMAND_NAME} command not found in PATH."
-        warn "You may need to restart your terminal or run: source ~/.bashrc"
-        return 1
-    fi
-}
-
-print_usage() {
-    echo
-    echo -e "${GREEN}🎉 Installation complete!${NC}"
-    echo
-    echo -e "${BLUE}Quick Start:${NC}"
-    echo -e "  ${CYAN}# Initialize your project${NC}"
-    echo -e "  ${COMMAND_NAME} init"
-    echo
-    echo -e "  ${CYAN}# Search your codebase${NC}"
-    echo -e "  ${COMMAND_NAME} search \"authentication logic\""
-    echo
-    echo -e "  ${CYAN}# Get help${NC}"
-    echo -e "  ${COMMAND_NAME} --help"
-    echo
-    echo -e "${BLUE}Documentation:${NC} https://github.com/FSSCoding/Fss-Mini-Rag"
-    echo
-    
-    if ! command -v "$COMMAND_NAME" &> /dev/null; then
-        echo -e "${YELLOW}Note: If the command is not found, restart your terminal or run:${NC}"
-        echo -e "  source ~/.bashrc"
-        echo
-    fi
-}
-
-main() {
-    print_header
-    
-    # Check system requirements
-    check_system
-    
-    # Try installation methods in order of preference
-    if install_uv && install_with_uv; then
-        log "Installation method: uv ✨"
-    elif install_with_pipx; then
-        log "Installation method: pipx 📦"
-    else
-        install_with_pip
-        log "Installation method: pip 🐍"
-    fi
-    
-    # Verify installation
-    if verify_installation; then
-        print_usage
-    else
-        warn "Installation completed but verification failed. The tool may still work."
-        print_usage
-    fi
-}
-
-# Run the main function
-main "$@"
--- a/install_mini_rag.ps1
+++ b/install_mini_rag.ps1
@ -1,458 +0,0 @@
-# FSS-Mini-RAG PowerShell Installation Script
-# Interactive installer that sets up Python environment and dependencies
-
-# Enable advanced features
-$ErrorActionPreference = "Stop"
-
-# Color functions for better output
-function Write-ColorOutput($message, $color = "White") {
-    Write-Host $message -ForegroundColor $color
-}
-
-function Write-Header($message) {
-    Write-Host "`n" -NoNewline
-    Write-ColorOutput "=== $message ===" "Cyan"
-}
-
-function Write-Success($message) {
-    Write-ColorOutput "✅ $message" "Green"
-}
-
-function Write-Warning($message) {
-    Write-ColorOutput "⚠️  $message" "Yellow"
-}
-
-function Write-Error($message) {
-    Write-ColorOutput "❌ $message" "Red"
-}
-
-function Write-Info($message) {
-    Write-ColorOutput "ℹ️  $message" "Blue"
-}
-
-# Get script directory
-$ScriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path
-
-# Main installation function
-function Main {
-    Write-Host ""
-    Write-ColorOutput "╔══════════════════════════════════════╗" "Cyan"
-    Write-ColorOutput "║        FSS-Mini-RAG Installer        ║" "Cyan"
-    Write-ColorOutput "║   Fast Semantic Search for Code      ║" "Cyan" 
-    Write-ColorOutput "╚══════════════════════════════════════╝" "Cyan"
-    Write-Host ""
-    
-    Write-Info "PowerShell installation process:"
-    Write-Host "  • Python environment setup"
-    Write-Host "  • Smart configuration based on your system"
-    Write-Host "  • Optional AI model downloads (with consent)"
-    Write-Host "  • Testing and verification"
-    Write-Host ""
-    Write-ColorOutput "Note: You'll be asked before downloading any models" "Cyan"
-    Write-Host ""
-    
-    $continue = Read-Host "Begin installation? [Y/n]"
-    if ($continue -eq "n" -or $continue -eq "N") {
-        Write-Host "Installation cancelled."
-        exit 0
-    }
-    
-    # Run installation steps
-    Check-Python
-    Create-VirtualEnvironment
-    
-    # Check Ollama availability
-    $ollamaAvailable = Check-Ollama
-    
-    # Get installation preferences
-    Get-InstallationPreferences $ollamaAvailable
-    
-    # Install dependencies
-    Install-Dependencies
-    
-    # Setup models if available
-    if ($ollamaAvailable) {
-        Setup-OllamaModel
-    }
-    
-    # Test installation
-    if (Test-Installation) {
-        Show-Completion
-    } else {
-        Write-Error "Installation test failed"
-        Write-Host "Please check error messages and try again."
-        exit 1
-    }
-}
-
-function Check-Python {
-    Write-Header "Checking Python Installation"
-    
-    # Try different Python commands
-    $pythonCmd = $null
-    $pythonVersion = $null
-    
-    foreach ($cmd in @("python", "python3", "py")) {
-        try {
-            $version = & $cmd --version 2>&1
-            if ($LASTEXITCODE -eq 0) {
-                $pythonCmd = $cmd
-                $pythonVersion = ($version -split " ")[1]
-                break
-            }
-        } catch {
-            continue
-        }
-    }
-    
-    if (-not $pythonCmd) {
-        Write-Error "Python not found!"
-        Write-Host ""
-        Write-ColorOutput "Please install Python 3.8+ from:" "Yellow"
-        Write-Host "  • https://python.org/downloads"
-        Write-Host "  • Make sure to check 'Add Python to PATH' during installation"
-        Write-Host ""
-        Write-ColorOutput "After installing Python, run this script again." "Cyan"
-        exit 1
-    }
-    
-    # Check version
-    $versionParts = $pythonVersion -split "\."
-    $major = [int]$versionParts[0]
-    $minor = [int]$versionParts[1]
-    
-    if ($major -lt 3 -or ($major -eq 3 -and $minor -lt 8)) {
-        Write-Error "Python $pythonVersion found, but 3.8+ required"
-        Write-Host "Please upgrade Python to 3.8 or higher."
-        exit 1
-    }
-    
-    Write-Success "Found Python $pythonVersion ($pythonCmd)"
-    $script:PythonCmd = $pythonCmd
-}
-
-function Create-VirtualEnvironment {
-    Write-Header "Creating Python Virtual Environment"
-    
-    $venvPath = Join-Path $ScriptDir ".venv"
-    
-    if (Test-Path $venvPath) {
-        Write-Info "Virtual environment already exists at $venvPath"
-        $recreate = Read-Host "Recreate it? (y/N)"
-        if ($recreate -eq "y" -or $recreate -eq "Y") {
-            Write-Info "Removing existing virtual environment..."
-            Remove-Item -Recurse -Force $venvPath
-        } else {
-            Write-Success "Using existing virtual environment"
-            return
-        }
-    }
-    
-    Write-Info "Creating virtual environment at $venvPath"
-    try {
-        & $script:PythonCmd -m venv $venvPath
-        if ($LASTEXITCODE -ne 0) {
-            throw "Virtual environment creation failed"
-        }
-        Write-Success "Virtual environment created"
-    } catch {
-        Write-Error "Failed to create virtual environment"
-        Write-Host "This might be because python venv module is not available."
-        Write-Host "Try installing Python from python.org with full installation."
-        exit 1
-    }
-    
-    # Activate virtual environment and upgrade pip
-    $activateScript = Join-Path $venvPath "Scripts\Activate.ps1"
-    if (Test-Path $activateScript) {
-        & $activateScript
-        Write-Success "Virtual environment activated"
-        
-        Write-Info "Upgrading pip..."
-        try {
-            & python -m pip install --upgrade pip --quiet
-        } catch {
-            Write-Warning "Could not upgrade pip, continuing anyway..."
-        }
-    }
-}
-
-function Check-Ollama {
-    Write-Header "Checking Ollama (AI Model Server)"
-    
-    try {
-        $response = Invoke-WebRequest -Uri "http://localhost:11434/api/version" -TimeoutSec 5 -ErrorAction SilentlyContinue
-        if ($response.StatusCode -eq 200) {
-            Write-Success "Ollama server is running"
-            return $true
-        }
-    } catch {
-        # Ollama not running, check if installed
-    }
-    
-    try {
-        & ollama version 2>$null
-        if ($LASTEXITCODE -eq 0) {
-            Write-Warning "Ollama is installed but not running"
-            $startOllama = Read-Host "Start Ollama now? (Y/n)"
-            if ($startOllama -ne "n" -and $startOllama -ne "N") {
-                Write-Info "Starting Ollama server..."
-                Start-Process -FilePath "ollama" -ArgumentList "serve" -WindowStyle Hidden
-                Start-Sleep -Seconds 3
-                
-                try {
-                    $response = Invoke-WebRequest -Uri "http://localhost:11434/api/version" -TimeoutSec 5 -ErrorAction SilentlyContinue
-                    if ($response.StatusCode -eq 200) {
-                        Write-Success "Ollama server started"
-                        return $true
-                    }
-                } catch {
-                    Write-Warning "Failed to start Ollama automatically"
-                    Write-Host "Please start Ollama manually: ollama serve"
-                    return $false
-                }
-            }
-            return $false
-        }
-    } catch {
-        # Ollama not installed
-    }
-    
-    Write-Warning "Ollama not found"
-    Write-Host ""
-    Write-ColorOutput "Ollama provides the best embedding quality and performance." "Cyan"
-    Write-Host ""
-    Write-ColorOutput "Options:" "White"
-    Write-ColorOutput "1) Install Ollama automatically" "Green" -NoNewline
-    Write-Host " (recommended)"
-    Write-ColorOutput "2) Manual installation" "Yellow" -NoNewline
-    Write-Host " - Visit https://ollama.com/download"
-    Write-ColorOutput "3) Continue without Ollama" "Blue" -NoNewline
-    Write-Host " (uses ML fallback)"
-    Write-Host ""
-    
-    $choice = Read-Host "Choose [1/2/3]"
-    
-    switch ($choice) {
-        "1" {
-            Write-Info "Opening Ollama download page..."
-            Start-Process "https://ollama.com/download"
-            Write-Host ""
-            Write-ColorOutput "Please:" "Yellow"
-            Write-Host "  1. Download and install Ollama from the opened page"
-            Write-Host "  2. Run 'ollama serve' in a new terminal"
-            Write-Host "  3. Re-run this installer"
-            Write-Host ""
-            Read-Host "Press Enter to exit"
-            exit 0
-        }
-        "2" {
-            Write-Host ""
-            Write-ColorOutput "Manual Ollama installation:" "Yellow"
-            Write-Host "  1. Visit: https://ollama.com/download"
-            Write-Host "  2. Download and install for Windows"
-            Write-Host "  3. Run: ollama serve"
-            Write-Host "  4. Re-run this installer"
-            Read-Host "Press Enter to exit"
-            exit 0
-        }
-        "3" {
-            Write-Info "Continuing without Ollama (will use ML fallback)"
-            return $false
-        }
-        default {
-            Write-Warning "Invalid choice, continuing without Ollama"
-            return $false
-        }
-    }
-}
-
-function Get-InstallationPreferences($ollamaAvailable) {
-    Write-Header "Installation Configuration"
-    
-    Write-ColorOutput "FSS-Mini-RAG can run with different embedding backends:" "Cyan"
-    Write-Host ""
-    Write-ColorOutput "• Ollama" "Green" -NoNewline
-    Write-Host " (recommended) - Best quality, local AI server"
-    Write-ColorOutput "• ML Fallback" "Yellow" -NoNewline
-    Write-Host " - Offline transformers, larger but always works"
-    Write-ColorOutput "• Hash-based" "Blue" -NoNewline
-    Write-Host " - Lightweight fallback, basic similarity"
-    Write-Host ""
-    
-    if ($ollamaAvailable) {
-        $recommended = "light (Ollama detected)"
-        Write-ColorOutput "✓ Ollama detected - light installation recommended" "Green"
-    } else {
-        $recommended = "full (no Ollama)"
-        Write-ColorOutput "⚠ No Ollama - full installation recommended for better quality" "Yellow"
-    }
-    
-    Write-Host ""
-    Write-ColorOutput "Installation options:" "White"
-    Write-ColorOutput "L) Light" "Green" -NoNewline
-    Write-Host " - Ollama + basic deps (~50MB) " -NoNewline
-    Write-ColorOutput "← Best performance + AI chat" "Cyan"
-    Write-ColorOutput "F) Full" "Yellow" -NoNewline
-    Write-Host "  - Light + ML fallback (~2-3GB) " -NoNewline
-    Write-ColorOutput "← Works without Ollama" "Cyan"
-    Write-Host ""
-    
-    $choice = Read-Host "Choose [L/F] or Enter for recommended ($recommended)"
-    
-    if ($choice -eq "") {
-        if ($ollamaAvailable) {
-            $choice = "L"
-        } else {
-            $choice = "F"
-        }
-    }
-    
-    switch ($choice.ToUpper()) {
-        "L" {
-            $script:InstallType = "light"
-            Write-ColorOutput "Selected: Light installation" "Green"
-        }
-        "F" {
-            $script:InstallType = "full"
-            Write-ColorOutput "Selected: Full installation" "Yellow"
-        }
-        default {
-            Write-Warning "Invalid choice, using light installation"
-            $script:InstallType = "light"
-        }
-    }
-}
-
-function Install-Dependencies {
-    Write-Header "Installing Python Dependencies"
-    
-    if ($script:InstallType -eq "light") {
-        Write-Info "Installing core dependencies (~50MB)..."
-        Write-ColorOutput "  Installing: lancedb, pandas, numpy, PyYAML, etc." "Blue"
-        
-        try {
-            & pip install -r (Join-Path $ScriptDir "requirements.txt") --quiet
-            if ($LASTEXITCODE -ne 0) {
-                throw "Dependency installation failed"
-            }
-            Write-Success "Dependencies installed"
-        } catch {
-            Write-Error "Failed to install dependencies"
-            Write-Host "Try: pip install -r requirements.txt"
-            exit 1
-        }
-    } else {
-        Write-Info "Installing full dependencies (~2-3GB)..."
-        Write-ColorOutput "This includes PyTorch and transformers - will take several minutes" "Yellow"
-        
-        try {
-            & pip install -r (Join-Path $ScriptDir "requirements-full.txt")
-            if ($LASTEXITCODE -ne 0) {
-                throw "Dependency installation failed"
-            }
-            Write-Success "All dependencies installed"
-        } catch {
-            Write-Error "Failed to install dependencies"
-            Write-Host "Try: pip install -r requirements-full.txt"
-            exit 1
-        }
-    }
-    
-    Write-Info "Verifying installation..."
-    try {
-        & python -c "import lancedb, pandas, numpy" 2>$null
-        if ($LASTEXITCODE -ne 0) {
-            throw "Package verification failed"
-        }
-        Write-Success "Core packages verified"
-    } catch {
-        Write-Error "Package verification failed"
-        exit 1
-    }
-}
-
-function Setup-OllamaModel {
-    # Implementation similar to bash version but adapted for PowerShell
-    Write-Header "Ollama Model Setup"
-    # For brevity, implementing basic version
-    Write-Info "Ollama model setup available - see bash version for full implementation"
-}
-
-function Test-Installation {
-    Write-Header "Testing Installation"
-    
-    Write-Info "Testing basic functionality..."
-    
-    try {
-        & python -c "from mini_rag import CodeEmbedder, ProjectIndexer, CodeSearcher; print('✅ Import successful')" 2>$null
-        if ($LASTEXITCODE -ne 0) {
-            throw "Import test failed"
-        }
-        Write-Success "Python imports working"
-        return $true
-    } catch {
-        Write-Error "Import test failed"
-        return $false
-    }
-}
-
-function Show-Completion {
-    Write-Header "Installation Complete!"
-    
-    Write-ColorOutput "FSS-Mini-RAG is now installed!" "Green"
-    Write-Host ""
-    Write-ColorOutput "Quick Start Options:" "Cyan"
-    Write-Host ""
-    Write-ColorOutput "🎯 TUI (Beginner-Friendly):" "Green"
-    Write-Host "     rag-tui.bat"
-    Write-Host "     # Interactive interface with guided setup"
-    Write-Host ""
-    Write-ColorOutput "💻 CLI (Advanced):" "Blue"
-    Write-Host "     rag-mini.bat index C:\path\to\project"
-    Write-Host "     rag-mini.bat search C:\path\to\project `"query`""
-    Write-Host "     rag-mini.bat status C:\path\to\project"
-    Write-Host ""
-    Write-ColorOutput "Documentation:" "Cyan"
-    Write-Host "  • README.md - Complete technical documentation"
-    Write-Host "  • docs\GETTING_STARTED.md - Step-by-step guide"
-    Write-Host "  • examples\ - Usage examples and sample configs"
-    Write-Host ""
-    
-    $runTest = Read-Host "Run quick test now? [Y/n]"
-    if ($runTest -ne "n" -and $runTest -ne "N") {
-        Run-QuickTest
-    }
-    
-    Write-Host ""
-    Write-ColorOutput "🎉 Setup complete! FSS-Mini-RAG is ready to use." "Green"
-}
-
-function Run-QuickTest {
-    Write-Header "Quick Test"
-    
-    Write-Info "Testing with FSS-Mini-RAG codebase..."
-    
-    $ragDir = Join-Path $ScriptDir ".mini-rag"
-    if (Test-Path $ragDir) {
-        Write-Success "Project already indexed, running search..."
-    } else {
-        Write-Info "Indexing FSS-Mini-RAG system for demo..."
-        & python (Join-Path $ScriptDir "rag-mini.py") index $ScriptDir
-        if ($LASTEXITCODE -ne 0) {
-            Write-Error "Test indexing failed"
-            return
-        }
-    }
-    
-    Write-Host ""
-    Write-Success "Running demo search: 'embedding system'"
-    & python (Join-Path $ScriptDir "rag-mini.py") search $ScriptDir "embedding system" --top-k 3
-    
-    Write-Host ""
-    Write-Success "Test completed successfully!"
-    Write-ColorOutput "FSS-Mini-RAG is working perfectly on Windows!" "Cyan"
-}
-
-# Run main function
-Main
--- a/install_mini_rag.sh
+++ b/install_mini_rag.sh
@ -4,32 +4,6 @@

 set -e  # Exit on any error

-# Check for command line arguments
-HEADLESS_MODE=false
-if [[ "$1" == "--headless" ]]; then
-    HEADLESS_MODE=true
-    echo "🤖 Running in headless mode - using defaults for automation"
-    echo "⚠️  WARNING: Installation may take 5-10 minutes due to large dependencies"
-    echo "💡 For agents: Run as background process to avoid timeouts"
-elif [[ "$1" == "--help" || "$1" == "-h" ]]; then
-    echo ""
-    echo "FSS-Mini-RAG Installation Script"
-    echo ""
-    echo "Usage:"
-    echo "  ./install_mini_rag.sh           # Interactive installation"
-    echo "  ./install_mini_rag.sh --headless  # Automated installation for agents/CI"
-    echo "  ./install_mini_rag.sh --help      # Show this help"
-    echo ""
-    echo "Headless mode options:"
-    echo "  • Uses existing virtual environment if available"
-    echo "  • Selects light installation (Ollama + basic dependencies)"  
-    echo "  • Downloads nomic-embed-text model if Ollama is available"
-    echo "  • Skips interactive prompts and tests"
-    echo "  • Perfect for agent automation and CI/CD pipelines"
-    echo ""
-    exit 0
-fi
-
 # Colors for output
 RED='\033[0;31m'
 GREEN='\033[0;32m'
@ -110,19 +84,14 @@ check_python() {
 check_venv() {
    if [ -d "$SCRIPT_DIR/.venv" ]; then
        print_info "Virtual environment already exists at $SCRIPT_DIR/.venv"
-        if [[ "$HEADLESS_MODE" == "true" ]]; then
-            print_info "Headless mode: Using existing virtual environment"
-            return 0  # Use existing
+        echo -n "Recreate it? (y/N): "
+        read -r recreate
+        if [[ $recreate =~ ^[Yy]$ ]]; then
+            print_info "Removing existing virtual environment..."
+            rm -rf "$SCRIPT_DIR/.venv"
+            return 1  # Needs creation
        else
-            echo -n "Recreate it? (y/N): "
-            read -r recreate
-            if [[ $recreate =~ ^[Yy]$ ]]; then
-                print_info "Removing existing virtual environment..."
-                rm -rf "$SCRIPT_DIR/.venv"
-                return 1  # Needs creation
-            else
-                return 0  # Use existing
-            fi
+            return 0  # Use existing
        fi
    else
        return 1  # Needs creation
@ -171,13 +140,8 @@ check_ollama() {
            return 0
        else
            print_warning "Ollama is installed but not running"
-            if [[ "$HEADLESS_MODE" == "true" ]]; then
-                print_info "Headless mode: Starting Ollama server automatically"
-                start_ollama="y"
-            else
-                echo -n "Start Ollama now? (Y/n): "
-                read -r start_ollama
-            fi
+            echo -n "Start Ollama now? (Y/n): "
+            read -r start_ollama
            if [[ ! $start_ollama =~ ^[Nn]$ ]]; then
                print_info "Starting Ollama server..."
                ollama serve &
@ -198,84 +162,22 @@ check_ollama() {
        print_warning "Ollama not found"
        echo ""
        echo -e "${CYAN}Ollama provides the best embedding quality and performance.${NC}"
+        echo -e "${YELLOW}To install Ollama:${NC}"
+        echo "  1. Visit: https://ollama.ai/download"
+        echo "  2. Download and install for your system"
+        echo "  3. Run: ollama serve"
+        echo "  4. Re-run this installer"
        echo ""
-        echo -e "${BOLD}Options:${NC}"
-        echo -e "${GREEN}1) Install Ollama automatically${NC} (recommended)"
-        echo -e "${YELLOW}2) Manual installation${NC} - Visit https://ollama.com/download"
-        echo -e "${BLUE}3) Continue without Ollama${NC} (uses ML fallback)"
+        echo -e "${BLUE}Alternative: Use ML fallback (requires more disk space)${NC}"
        echo ""
-        if [[ "$HEADLESS_MODE" == "true" ]]; then
-            print_info "Headless mode: Continuing without Ollama (option 3)"
-            ollama_choice="3"
+        echo -n "Continue without Ollama? (y/N): "
+        read -r continue_without
+        if [[ $continue_without =~ ^[Yy]$ ]]; then
+            return 1
        else
-            echo -n "Choose [1/2/3]: "
-            read -r ollama_choice
+            print_info "Install Ollama first, then re-run this script"
+            exit 0
        fi
-        
-        case "$ollama_choice" in
-            1|"")
-                print_info "Installing Ollama using secure installation method..."
-                echo -e "${CYAN}Downloading and verifying Ollama installer...${NC}"
-                
-                # Secure installation: download, verify, then execute
-                local temp_script="/tmp/ollama-install-$$.sh"
-                if curl -fsSL https://ollama.com/install.sh -o "$temp_script" && \
-                   file "$temp_script" | grep -q "shell script" && \
-                   chmod +x "$temp_script" && \
-                   "$temp_script"; then
-                    rm -f "$temp_script"
-                    print_success "Ollama installed successfully"
-                    
-                    print_info "Starting Ollama server..."
-                    ollama serve &
-                    sleep 3
-                    
-                    if curl -s http://localhost:11434/api/version >/dev/null 2>&1; then
-                        print_success "Ollama server started"
-                        
-                        echo ""
-                        echo -e "${CYAN}💡 Pro tip: Download an LLM for AI-powered search synthesis!${NC}"
-                        echo -e "   Lightweight: ${GREEN}ollama pull qwen3:0.6b${NC} (~500MB, very fast)"
-                        echo -e "   Balanced:    ${GREEN}ollama pull qwen3:1.7b${NC} (~1.4GB, good quality)" 
-                        echo -e "   Excellent:   ${GREEN}ollama pull qwen3:4b${NC} (~2.5GB, sweet spot for most users)"
-                        echo -e "   Maximum:     ${GREEN}ollama pull qwen3:8b${NC} (~5GB, slower but top quality)"
-                        echo ""
-                        echo -e "${BLUE}🧠 RAG works great with smaller models! 4B is usually perfect.${NC}"
-                        echo -e "${BLUE}Creative possibilities: Try mistral for storytelling, qwen2.5-coder for development!${NC}"
-                        echo ""
-                        
-                        return 0
-                    else
-                        print_warning "Ollama installed but failed to start automatically"
-                        echo "Please start Ollama manually: ollama serve"
-                        echo "Then re-run this installer"
-                        exit 1
-                    fi
-                else
-                    print_error "Failed to install Ollama automatically"
-                    echo "Please install manually from https://ollama.com/download"
-                    exit 1
-                fi
-                ;;
-            2)
-                echo ""
-                echo -e "${YELLOW}Manual Ollama installation:${NC}"
-                echo "  1. Visit: https://ollama.com/download" 
-                echo "  2. Download and install for your system"
-                echo "  3. Run: ollama serve"
-                echo "  4. Re-run this installer"
-                print_info "Exiting for manual installation..."
-                exit 0
-                ;;
-            3)
-                print_info "Continuing without Ollama (will use ML fallback)"
-                return 1
-                ;;
-            *)
-                print_warning "Invalid choice, continuing without Ollama"
-                return 1
-                ;;
-        esac
    fi
 }

@ -314,13 +216,8 @@ setup_ollama_model() {
        echo "  • Purpose: High-quality semantic embeddings"
        echo "  • Alternative: System will use ML/hash fallbacks"
        echo ""
-        if [[ "$HEADLESS_MODE" == "true" ]]; then
-            print_info "Headless mode: Downloading nomic-embed-text model"
-            download_model="y"
-        else
-            echo -n "Download model? [y/N]: "
-            read -r download_model
-        fi
+        echo -n "Download model? [y/N]: "
+        read -r download_model
        should_download=$([ "$download_model" = "y" ] && echo "download" || echo "skip")
    fi
    
@ -374,27 +271,21 @@ get_installation_preferences() {
    
    echo ""
    echo -e "${BOLD}Installation options:${NC}"
-    echo -e "${GREEN}L) Light${NC} - Ollama + basic deps (~50MB) ${CYAN}← Best performance + AI chat${NC}"
-    echo -e "${YELLOW}F) Full${NC}  - Light + ML fallback (~2-3GB) ${CYAN}← RAG-only if no Ollama${NC}"
+    echo -e "${GREEN}L) Light${NC} - Ollama + basic deps (~50MB)"
+    echo -e "${YELLOW}F) Full${NC}  - Light + ML fallback (~2-3GB)"
    echo -e "${BLUE}C) Custom${NC} - Configure individual components"
    echo ""
    
    while true; do
-        if [[ "$HEADLESS_MODE" == "true" ]]; then
-            # Default to light installation in headless mode
-            choice="L"
-            print_info "Headless mode: Selected Light installation"
-        else
-            echo -n "Choose [L/F/C] or Enter for recommended ($recommended): "
-            read -r choice
-            
-            # Default to recommendation if empty
-            if [ -z "$choice" ]; then
-                if [ "$ollama_available" = true ]; then
-                    choice="L"
-                else
-                    choice="F"  
-                fi
+        echo -n "Choose [L/F/C] or Enter for recommended ($recommended): "
+        read -r choice
+        
+        # Default to recommendation if empty
+        if [ -z "$choice" ]; then
+            if [ "$ollama_available" = true ]; then
+                choice="L"
+            else
+                choice="F"  
            fi
        fi
        
@ -436,13 +327,8 @@ configure_custom_installation() {
        echo ""
        echo -e "${BOLD}Ollama embedding model:${NC}"
        echo "  • nomic-embed-text (~270MB) - Best quality embeddings"
-        if [[ "$HEADLESS_MODE" == "true" ]]; then
-            print_info "Headless mode: Downloading Ollama model"
-            download_ollama="y"
-        else
-            echo -n "Download Ollama model? [y/N]: "
-            read -r download_ollama
-        fi
+        echo -n "Download Ollama model? [y/N]: "
+        read -r download_ollama
        if [[ $download_ollama =~ ^[Yy]$ ]]; then
            ollama_model="download"
        fi
@ -453,13 +339,8 @@ configure_custom_installation() {
    echo -e "${BOLD}ML fallback system:${NC}"
    echo "  • PyTorch + transformers (~2-3GB) - Works without Ollama"
    echo "  • Useful for: Offline use, server deployments, CI/CD"
-    if [[ "$HEADLESS_MODE" == "true" ]]; then
-        print_info "Headless mode: Skipping ML dependencies (keeping light)"
-        include_ml="n"
-    else
-        echo -n "Include ML dependencies? [y/N]: "
-        read -r include_ml
-    fi
+    echo -n "Include ML dependencies? [y/N]: "
+    read -r include_ml
    
    # Pre-download models
    local predownload_ml="skip"
@ -468,13 +349,8 @@ configure_custom_installation() {
        echo -e "${BOLD}Pre-download ML models:${NC}"
        echo "  • sentence-transformers model (~80MB)"
        echo "  • Skip: Models download automatically when first used"
-        if [[ "$HEADLESS_MODE" == "true" ]]; then
-            print_info "Headless mode: Skipping ML model pre-download"
-            predownload="n"
-        else
-            echo -n "Pre-download now? [y/N]: "
-            read -r predownload
-        fi
+        echo -n "Pre-download now? [y/N]: "
+        read -r predownload
        if [[ $predownload =~ ^[Yy]$ ]]; then
            predownload_ml="download"
        fi
@ -535,73 +411,6 @@ install_dependencies() {
    fi
 }

-# Setup application icon for desktop integration
-setup_desktop_icon() {
-    print_header "Setting Up Desktop Integration"
-    
-    # Check if we're in a GUI environment
-    if [ -z "$DISPLAY" ] && [ -z "$WAYLAND_DISPLAY" ]; then
-        print_info "No GUI environment detected - skipping desktop integration"
-        return 0
-    fi
-    
-    local icon_source="$SCRIPT_DIR/assets/Fss_Mini_Rag.png"
-    local desktop_dir="$HOME/.local/share/applications"
-    local icon_dir="$HOME/.local/share/icons"
-    
-    # Check if icon file exists
-    if [ ! -f "$icon_source" ]; then
-        print_warning "Icon file not found at $icon_source"
-        return 1
-    fi
-    
-    # Create directories if needed
-    mkdir -p "$desktop_dir" "$icon_dir" 2>/dev/null
-    
-    # Copy icon to standard location
-    local icon_dest="$icon_dir/fss-mini-rag.png"
-    if cp "$icon_source" "$icon_dest" 2>/dev/null; then
-        print_success "Icon installed to $icon_dest"
-    else
-        print_warning "Could not install icon (permissions?)"
-        return 1
-    fi
-    
-    # Create desktop entry
-    local desktop_file="$desktop_dir/fss-mini-rag.desktop"
-    cat > "$desktop_file" << EOF
-[Desktop Entry]
-Name=FSS-Mini-RAG
-Comment=Fast Semantic Search for Code and Documents
-Exec=$SCRIPT_DIR/rag-tui
-Icon=fss-mini-rag
-Terminal=true
-Type=Application
-Categories=Development;Utility;TextEditor;
-Keywords=search;code;rag;semantic;ai;
-StartupNotify=true
-EOF
-    
-    if [ -f "$desktop_file" ]; then
-        chmod +x "$desktop_file"
-        print_success "Desktop entry created"
-        
-        # Update desktop database if available
-        if command_exists update-desktop-database; then
-            update-desktop-database "$desktop_dir" 2>/dev/null
-            print_info "Desktop database updated"
-        fi
-        
-        print_info "✨ FSS-Mini-RAG should now appear in your application menu!"
-        print_info "   Look for it in Development or Utility categories"
-    else
-        print_warning "Could not create desktop entry"
-        return 1
-    fi
-    
-    return 0
-}
-
 # Setup ML models based on configuration  
 setup_ml_models() {
    if [ "$INSTALL_TYPE" != "full" ]; then
@ -618,13 +427,8 @@ setup_ml_models() {
        echo "  • Purpose: Offline fallback when Ollama unavailable"
        echo "  • If skipped: Auto-downloads when first needed"
        echo ""
-        if [[ "$HEADLESS_MODE" == "true" ]]; then
-            print_info "Headless mode: Skipping ML model pre-download"
-            download_ml="n"
-        else
-            echo -n "Pre-download now? [y/N]: "
-            read -r download_ml
-        fi
+        echo -n "Pre-download now? [y/N]: "
+        read -r download_ml
        should_predownload=$([ "$download_ml" = "y" ] && echo "download" || echo "skip")
    fi
    
@ -704,36 +508,7 @@ print(f'✅ Embedding system: {info[\"method\"]}')
    " 2>/dev/null; then
        print_success "Embedding system working"
    else
-        echo ""
-        echo -e "${YELLOW}⚠️  System Check${NC}"
-        
-        # Smart diagnosis - check what's actually available
-        if command_exists ollama && curl -s http://localhost:11434/api/version >/dev/null 2>&1; then
-            # Ollama is running, check for models
-            local available_models=$(ollama list 2>/dev/null | grep -E "(qwen3|llama|mistral|gemma)" | head -5)
-            local embedding_models=$(ollama list 2>/dev/null | grep -E "(embed|bge)" | head -2)
-            
-            if [[ -n "$available_models" ]]; then
-                echo -e "${GREEN}✅ Ollama is running with available models${NC}"
-                echo -e "${CYAN}Your setup will work great! The system will auto-select the best models.${NC}"
-                echo ""
-                echo -e "${BLUE}💡 RAG Performance Tip:${NC} Smaller models often work better with RAG!"
-                echo -e "   With context provided, even 0.6B models give good results"
-                echo -e "   4B models = excellent, 8B+ = overkill (slower responses)"
-            else
-                echo -e "${BLUE}Ollama is running but no chat models found.${NC}"
-                echo -e "Download a lightweight model: ${GREEN}ollama pull qwen3:0.6b${NC} (fast)"
-                echo -e "Or balanced option: ${GREEN}ollama pull qwen3:4b${NC} (excellent quality)"
-            fi
-        else
-            echo -e "${BLUE}Ollama not running or not installed.${NC}"
-            echo -e "Start Ollama: ${GREEN}ollama serve${NC}"
-            echo -e "Or install from: https://ollama.com/download"
-        fi
-        
-        echo ""
-        echo -e "${CYAN}✅ FSS-Mini-RAG will auto-detect and use the best available method.${NC}"
-        echo ""
+        print_warning "Embedding test failed, but system should still work"
    fi
    
    return 0
@ -770,137 +545,37 @@ show_completion() {
    fi
    
    # Ask if they want to run a test
-    echo ""
-    echo -e "${BOLD}🧪 Quick Test Available${NC}"
-    echo -e "${CYAN}Test FSS-Mini-RAG with a small sample project (takes ~10 seconds)${NC}"
-    echo ""
-    
-    # Ensure output is flushed and we're ready for input
-    printf "Run quick test now? [Y/n]: "
-    
-    # More robust input handling
-    if [[ "$HEADLESS_MODE" == "true" ]]; then
-        print_info "Headless mode: Skipping interactive test"
-        echo -e "${BLUE}You can test FSS-Mini-RAG anytime with: ./rag-tui${NC}"
-        show_beginner_guidance
-    elif read -r run_test < /dev/tty 2>/dev/null; then
-        echo "User chose: '$run_test'"  # Debug output
-        if [[ ! $run_test =~ ^[Nn]$ ]]; then
-            run_quick_test
-            echo ""
-            show_beginner_guidance
-        else
-            echo -e "${BLUE}Skipping test - you can run it later with: ./rag-tui${NC}"
-            show_beginner_guidance
-        fi
-    else
-        # Fallback if interactive input fails
-        echo ""
-        echo -e "${YELLOW}⚠️  Interactive input not available - skipping test prompt${NC}"
-        echo -e "${BLUE}You can test FSS-Mini-RAG anytime with: ./rag-tui${NC}"
-        show_beginner_guidance
+    echo -n "Would you like to run a quick test now? (Y/n): "
+    read -r run_test
+    if [[ ! $run_test =~ ^[Nn]$ ]]; then
+        run_quick_test
    fi
 }

-# Note: Sample project creation removed - now indexing real codebase/docs
-
-# Run quick test with sample data
+# Run quick test
 run_quick_test() {
    print_header "Quick Test"
    
-    # Ask what to index: code vs docs
-    echo -e "${CYAN}What would you like to explore with FSS-Mini-RAG?${NC}"
+    print_info "Testing on this project directory..."
+    echo "This will index the FSS-Mini-RAG system itself as a test."
    echo ""
-    echo -e "${GREEN}1) Code${NC} - Index the FSS-Mini-RAG codebase (~50 files)"
-    echo -e "${BLUE}2) Docs${NC} - Index the documentation (~10 files)"  
-    echo ""
-    if [[ "$HEADLESS_MODE" == "true" ]]; then
-        print_info "Headless mode: Indexing code by default"
-        index_choice="1"
+    
+    # Index this project
+    if ./rag-mini index "$SCRIPT_DIR"; then
+        print_success "Indexing completed"
+        
+        # Try a search
+        echo ""
+        print_info "Testing search functionality..."
+        ./rag-mini search "$SCRIPT_DIR" "embedding system" --limit 3
+        
+        echo ""
+        print_success "Test completed successfully!"
+        echo -e "${CYAN}You can now use FSS-Mini-RAG on your own projects.${NC}"
    else
-        echo -n "Choose [1/2] or Enter for code: "
-        read -r index_choice
+        print_error "Test failed"
+        echo "Check the error messages above for troubleshooting."
    fi
-    
-    # Determine what to index
-    local target_dir="$SCRIPT_DIR"
-    local target_name="FSS-Mini-RAG codebase"
-    if [[ "$index_choice" == "2" ]]; then
-        target_dir="$SCRIPT_DIR/docs"
-        target_name="FSS-Mini-RAG documentation"
-    fi
-    
-    # Ensure we're in the right directory and have the right permissions
-    if [[ ! -f "./rag-mini" ]]; then
-        print_error "rag-mini script not found in current directory: $(pwd)"
-        print_info "This might be a path issue. The installer should run from the project directory."
-        return 1
-    fi
-    
-    if [[ ! -x "./rag-mini" ]]; then
-        print_info "Making rag-mini executable..."
-        chmod +x ./rag-mini
-    fi
-    
-    # Index the chosen target
-    print_info "Indexing $target_name..."
-    echo -e "${CYAN}This will take 10-30 seconds depending on your system${NC}"
-    echo ""
-    
-    if ./rag-mini index "$target_dir"; then
-        print_success "✅ Indexing completed successfully!"
-        
-        echo ""
-        print_info "🎯 Launching Interactive Tutorial..."
-        echo -e "${CYAN}The TUI has 6 sample questions to get you started.${NC}"
-        echo -e "${CYAN}Try the suggested queries or enter your own!${NC}"
-        echo ""
-        if [[ "$HEADLESS_MODE" != "true" ]]; then
-            echo -n "Press Enter to start interactive tutorial: "
-            read -r
-        fi
-        
-        # Launch the TUI which has the existing interactive tutorial system
-        ./rag-tui.py "$target_dir" || true
-        
-        echo ""
-        print_success "🎉 Tutorial completed!"
-        echo -e "${CYAN}FSS-Mini-RAG is working perfectly!${NC}"
-        
-    else
-        print_error "❌ Indexing failed"
-        echo ""
-        echo -e "${YELLOW}Possible causes:${NC}"
-        echo "• Virtual environment not properly activated"
-        echo "• Missing dependencies (try: pip install -r requirements.txt)"
-        echo "• Path issues (ensure script runs from project directory)"
-        echo "• Ollama connection issues (if using Ollama)"
-        echo ""
-        return 1
-    fi
-}
-
-# Show beginner-friendly first steps
-show_beginner_guidance() {
-    print_header "Getting Started - Your First Search"
-    
-    echo -e "${CYAN}FSS-Mini-RAG is ready! Here's how to start:${NC}"
-    echo ""
-    echo -e "${GREEN}🎯 For Beginners (Recommended):${NC}"
-    echo "   ./rag-tui"
-    echo "   ↳ Interactive interface with sample questions"
-    echo ""
-    echo -e "${BLUE}💻 For Developers:${NC}"
-    echo "   ./rag-mini index /path/to/your/project"
-    echo "   ./rag-mini search /path/to/your/project \"your question\""
-    echo ""
-    echo -e "${YELLOW}📚 What can you search for in FSS-Mini-RAG?${NC}"
-    echo "   • Technical: \"chunking strategy\", \"ollama integration\", \"indexing performance\""
-    echo "   • Usage: \"how to improve search results\", \"why does indexing take long\""
-    echo "   • Your own projects: any code, docs, emails, notes, research"
-    echo ""
-    echo -e "${CYAN}💡 Pro tip:${NC} You can drag ANY text-based documents into a folder"
-    echo "   and search through them - emails, notes, research, chat logs!"
 }

 # Main installation flow
@ -921,15 +596,11 @@ main() {
    echo -e "${CYAN}Note: You'll be asked before downloading any models${NC}"
    echo ""
    
-    if [[ "$HEADLESS_MODE" == "true" ]]; then
-        print_info "Headless mode: Beginning installation automatically"
-    else
-        echo -n "Begin installation? [Y/n]: "
-        read -r continue_install
-        if [[ $continue_install =~ ^[Nn]$ ]]; then
-            echo "Installation cancelled."
-            exit 0
-        fi
+    echo -n "Begin installation? [Y/n]: "
+    read -r continue_install
+    if [[ $continue_install =~ ^[Nn]$ ]]; then
+        echo "Installation cancelled."
+        exit 0
    fi
    
    # Run installation steps
@ -954,11 +625,7 @@ main() {
    fi
    setup_ml_models
    
-    # Setup desktop integration with icon
-    setup_desktop_icon
-    
    if test_installation; then
-        install_global_wrapper
        show_completion
    else
        print_error "Installation test failed"
@ -967,107 +634,5 @@ main() {
    fi
 }

-# Install global wrapper script for system-wide access
-install_global_wrapper() {
-    print_info "Installing global rag-mini command..."
-    
-    # Create the wrapper script
-    cat > /tmp/rag-mini-wrapper << 'EOF'
-#!/bin/bash
-# FSS-Mini-RAG Global Wrapper Script
-# Automatically handles virtual environment activation
-
-# Find the installation directory
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-
-# Common installation paths to check
-INSTALL_PATHS=(
-    "/opt/fss-mini-rag"
-    "/usr/local/lib/fss-mini-rag"
-    "$(dirname "$SCRIPT_DIR")/lib/fss-mini-rag"
-    "$HOME/.local/lib/fss-mini-rag"
-)
-
-# Add current directory if it looks like an FSS-Mini-RAG installation
-if [ -f "$(pwd)/.venv/bin/rag-mini" ] && [ -f "$(pwd)/requirements.txt" ]; then
-    INSTALL_PATHS+=("$(pwd)")
-fi
-
-# Find the actual installation
-FSS_MINI_RAG_HOME=""
-for path in "${INSTALL_PATHS[@]}"; do
-    if [ -f "$path/.venv/bin/rag-mini" ] && [ -f "$path/requirements.txt" ]; then
-        FSS_MINI_RAG_HOME="$path"
-        break
-    fi
-done
-
-# If not found in standard paths, try to find it
-if [ -z "$FSS_MINI_RAG_HOME" ]; then
-    # Try to find by looking for the venv with rag-mini
-    FSS_MINI_RAG_HOME=$(find /opt /usr/local /home -maxdepth 4 -name ".venv" -type d 2>/dev/null | while read venv_dir; do
-        if [ -f "$venv_dir/bin/rag-mini" ] && [ -f "$(dirname "$venv_dir")/requirements.txt" ]; then
-            dirname "$venv_dir"
-            break
-        fi
-    done | head -1)
-fi
-
-# Error if still not found
-if [ -z "$FSS_MINI_RAG_HOME" ] || [ ! -f "$FSS_MINI_RAG_HOME/.venv/bin/rag-mini" ]; then
-    echo "❌ FSS-Mini-RAG installation not found!"
-    echo ""
-    echo "Expected to find .venv/bin/rag-mini in one of:"
-    printf "  %s\n" "${INSTALL_PATHS[@]}"
-    echo ""
-    echo "Please reinstall FSS-Mini-RAG:"
-    echo "  ./install_mini_rag.sh"
-    exit 1
-fi
-
-# Activate virtual environment and run rag-mini with all arguments
-cd "$FSS_MINI_RAG_HOME"
-source .venv/bin/activate
-
-# Suppress virtual environment warnings since we handle activation
-export FSS_MINI_RAG_GLOBAL_WRAPPER=1
-exec .venv/bin/rag-mini "$@"
-EOF
-
-    # Install the wrapper globally
-    if [[ "$HEADLESS_MODE" == "true" ]] || [[ -w "/usr/local/bin" ]]; then
-        # Headless mode or we have write permissions - install directly
-        sudo cp /tmp/rag-mini-wrapper /usr/local/bin/rag-mini
-        sudo chmod +x /usr/local/bin/rag-mini
-        print_success "✅ Global rag-mini command installed"
-        echo -e "${CYAN}You can now use 'rag-mini' from anywhere on your system!${NC}"
-    else
-        # Ask user permission for system-wide installation
-        echo ""
-        echo -e "${YELLOW}Install rag-mini globally?${NC}"
-        echo "This will allow you to run 'rag-mini' from anywhere on your system."
-        echo ""
-        echo -n "Install globally? [Y/n]: "
-        read -r install_global
-        
-        if [[ ! $install_global =~ ^[Nn]$ ]]; then
-            if sudo cp /tmp/rag-mini-wrapper /usr/local/bin/rag-mini && sudo chmod +x /usr/local/bin/rag-mini; then
-                print_success "✅ Global rag-mini command installed"
-                echo -e "${CYAN}You can now use 'rag-mini' from anywhere on your system!${NC}"
-            else
-                print_error "❌ Failed to install global command"
-                echo -e "${YELLOW}You can still use rag-mini from the installation directory${NC}"
-            fi
-        else
-            echo -e "${YELLOW}Skipped global installation${NC}"
-            echo -e "${CYAN}You can use rag-mini from the installation directory${NC}"
-        fi
-    fi
-    
-    # Clean up
-    rm -f /tmp/rag-mini-wrapper
-    echo ""
-}
-
 # Run main function
 main "$@"
--- a/install_windows.bat
+++ b/install_windows.bat
@ -1,418 +0,0 @@
-@echo off
-REM FSS-Mini-RAG Windows Installer - Beautiful & Comprehensive
-setlocal enabledelayedexpansion
-
-REM Enable colors and unicode for modern Windows
-chcp 65001 >nul 2>&1
-
-REM Check for command line arguments
-set "HEADLESS_MODE=false"
-if "%1"=="--headless" (
-    set "HEADLESS_MODE=true"
-    echo 🤖 Running in headless mode - using defaults for automation
-    echo ⚠️  WARNING: Installation may take 5-10 minutes due to large dependencies
-    echo 💡 For agents: Run as background process to avoid timeouts
-) else if "%1"=="--help" (
-    goto show_help
-) else if "%1"=="-h" (
-    goto show_help
-)
-
-goto start_installation
-
-:show_help
-echo.
-echo FSS-Mini-RAG Windows Installation Script
-echo.
-echo Usage:
-echo   install_windows.bat           # Interactive installation
-echo   install_windows.bat --headless   # Automated installation for agents/CI
-echo   install_windows.bat --help       # Show this help
-echo.
-echo Headless mode options:
-echo   • Uses existing virtual environment if available
-echo   • Installs core dependencies only
-echo   • Skips AI model downloads
-echo   • Skips interactive prompts and tests  
-echo   • Perfect for agent automation and CI/CD pipelines
-echo.
-pause
-exit /b 0
-
-:start_installation
-
-echo.
-echo ╔══════════════════════════════════════════════════╗
-echo ║            FSS-Mini-RAG Windows Installer       ║
-echo ║         Fast Semantic Search for Code           ║
-echo ╚══════════════════════════════════════════════════╝
-echo.
-echo 🚀 Comprehensive installation process:
-echo   • Python environment setup and validation
-echo   • Smart dependency management 
-echo   • Optional AI model downloads (with your consent)
-echo   • System testing and verification
-echo   • Interactive tutorial (optional)
-echo.
-echo 💡 Note: You'll be asked before downloading any models
-echo.
-
-if "!HEADLESS_MODE!"=="true" (
-    echo Headless mode: Beginning installation automatically
-) else (
-    set /p "continue=Begin installation? [Y/n]: "
-    if /i "!continue!"=="n" (
-        echo Installation cancelled.
-        pause
-        exit /b 0
-    )
-)
-
-REM Get script directory
-set "SCRIPT_DIR=%~dp0"
-set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%"
-
-echo.
-echo ══════════════════════════════════════════════════
-echo [1/5] Checking Python Environment...
-python --version >nul 2>&1
-if errorlevel 1 (
-    echo ❌ ERROR: Python not found!
-    echo.
-    echo 📦 Please install Python from: https://python.org/downloads
-    echo 🔧 Installation requirements:
-    echo    • Python 3.8 or higher
-    echo    • Make sure to check "Add Python to PATH" during installation
-    echo    • Restart your command prompt after installation
-    echo.
-    echo 💡 Quick install options:
-    echo    • Download from python.org (recommended)
-    echo    • Or use: winget install Python.Python.3.11
-    echo    • Or use: choco install python311
-    echo.
-    pause
-    exit /b 1
-)
-
-for /f "tokens=2" %%i in ('python --version 2^>^&1') do set "PYTHON_VERSION=%%i"
-echo ✅ Found Python !PYTHON_VERSION!
-
-REM Check Python version (basic check for 3.x)
-for /f "tokens=1 delims=." %%a in ("!PYTHON_VERSION!") do set "MAJOR_VERSION=%%a"
-if !MAJOR_VERSION! LSS 3 (
-    echo ❌ ERROR: Python !PYTHON_VERSION! found, but Python 3.8+ required
-    echo 📦 Please upgrade Python to 3.8 or higher
-    pause
-    exit /b 1
-)
-
-echo.
-echo ══════════════════════════════════════════════════
-echo [2/5] Creating Python Virtual Environment...
-if exist "%SCRIPT_DIR%\.venv" (
-    echo 🔄 Found existing virtual environment, checking if it works...
-    call "%SCRIPT_DIR%\.venv\Scripts\activate.bat" >nul 2>&1
-    if not errorlevel 1 (
-        "%SCRIPT_DIR%\.venv\Scripts\python.exe" -c "import sys; print('✅ Existing environment works')" >nul 2>&1
-        if not errorlevel 1 (
-            echo ✅ Using existing virtual environment
-            goto skip_venv_creation
-        )
-    )
-    echo 🔄 Removing problematic virtual environment...
-    rmdir /s /q "%SCRIPT_DIR%\.venv" 2>nul
-    if exist "%SCRIPT_DIR%\.venv" (
-        echo ⚠️ Could not remove old environment, will try to work with it...
-    )
-)
-
-echo 📁 Creating fresh virtual environment...
-python -m venv "%SCRIPT_DIR%\.venv"
-if errorlevel 1 (
-    echo ❌ ERROR: Failed to create virtual environment
-    echo.
-    echo 🔧 This might be because:
-    echo    • Python venv module is not installed
-    echo    • Insufficient permissions
-    echo    • Path contains special characters
-    echo.
-    echo 💡 Try: python -m pip install --user virtualenv
-    pause
-    exit /b 1
-)
-echo ✅ Virtual environment created successfully
-
-:skip_venv_creation
-echo.
-echo ══════════════════════════════════════════════════
-echo [3/5] Installing Python Dependencies...
-echo 📦 This may take 2-3 minutes depending on your internet speed...
-echo.
-
-call "%SCRIPT_DIR%\.venv\Scripts\activate.bat"
-if errorlevel 1 (
-    echo ❌ ERROR: Could not activate virtual environment
-    pause
-    exit /b 1
-)
-
-echo 🔧 Upgrading pip...
-"%SCRIPT_DIR%\.venv\Scripts\python.exe" -m pip install --upgrade pip --quiet
-if errorlevel 1 (
-    echo ⚠️ Warning: Could not upgrade pip, continuing anyway...
-)
-
-echo 📚 Installing core dependencies (lancedb, pandas, numpy, etc.)...
-echo    This provides semantic search capabilities
-"%SCRIPT_DIR%\.venv\Scripts\pip.exe" install -r "%SCRIPT_DIR%\requirements.txt"
-if errorlevel 1 (
-    echo ❌ ERROR: Failed to install dependencies
-    echo.
-    echo 🔧 Possible solutions:
-    echo    • Check internet connection
-    echo    • Try running as administrator
-    echo    • Check if antivirus is blocking pip
-    echo    • Manually run: pip install -r requirements.txt
-    echo.
-    pause
-    exit /b 1
-)
-echo ✅ Dependencies installed successfully
-
-echo.
-echo ══════════════════════════════════════════════════
-echo [4/5] Testing Installation...
-echo 🧪 Verifying Python imports...
-echo Attempting import test...
-"%SCRIPT_DIR%\.venv\Scripts\python.exe" -c "from mini_rag import CodeEmbedder, ProjectIndexer, CodeSearcher; print('✅ Core imports successful')" 2>import_error.txt
-if errorlevel 1 (
-    echo ❌ ERROR: Installation test failed
-    echo.
-    echo 🔍 Import error details:
-    type import_error.txt
-    echo.
-    echo 🔧 This usually means:
-    echo    • Dependencies didn't install correctly
-    echo    • Virtual environment is corrupted  
-    echo    • Python path issues
-    echo    • Module conflicts with existing installations
-    echo.
-    echo 💡 Troubleshooting options:
-    echo    • Try: "%SCRIPT_DIR%\.venv\Scripts\pip.exe" install -r requirements.txt --force-reinstall
-    echo    • Or delete .venv folder and run installer again
-    echo    • Or check import_error.txt for specific error details
-    del import_error.txt >nul 2>&1
-    pause
-    exit /b 1
-)
-del import_error.txt >nul 2>&1
-
-echo 🔍 Testing embedding system...
-"%SCRIPT_DIR%\.venv\Scripts\python.exe" -c "from mini_rag import CodeEmbedder; embedder = CodeEmbedder(); info = embedder.get_embedding_info(); print(f'✅ Embedding method: {info[\"method\"]}')" 2>nul
-if errorlevel 1 (
-    echo ⚠️ Warning: Embedding test inconclusive, but core system is ready
-)
-
-echo.
-echo ══════════════════════════════════════════════════
-echo [5/6] Setting Up Desktop Integration...
-call :setup_windows_icon
-
-echo.
-echo ══════════════════════════════════════════════════
-echo [6/6] Checking AI Features (Optional)...
-call :check_ollama_enhanced
-
-echo.
-echo ╔══════════════════════════════════════════════════╗
-echo ║             INSTALLATION SUCCESSFUL!            ║
-echo ╚══════════════════════════════════════════════════╝
-echo.
-echo 🎯 Quick Start Options:
-echo.
-echo 🎨 For Beginners (Recommended):
-echo    rag.bat                 - Interactive interface with guided setup
-echo.
-echo 💻 For Developers:
-echo    rag.bat index C:\myproject      - Index a project
-echo    rag.bat search C:\myproject "authentication"  - Search project  
-echo    rag.bat help            - Show all commands
-echo.
-
-REM Offer interactive tutorial
-echo 🧪 Quick Test Available:
-echo    Test FSS-Mini-RAG with a small sample project (takes ~30 seconds)
-echo.
-if "!HEADLESS_MODE!"=="true" (
-    echo Headless mode: Skipping interactive tutorial
-    echo 📚 You can run the tutorial anytime with: rag.bat
-) else (
-    set /p "run_test=Run interactive tutorial now? [Y/n]: "
-    if /i "!run_test!" NEQ "n" (
-        call :run_tutorial
-    ) else (
-        echo 📚 You can run the tutorial anytime with: rag.bat
-    )
-)
-
-echo.
-echo 🎉 Setup complete! FSS-Mini-RAG is ready to use.
-echo 💡 Pro tip: Try indexing any folder with text files - code, docs, notes!
-echo.
-pause
-exit /b 0
-
-:check_ollama_enhanced
-echo 🤖 Checking for AI capabilities...
-echo.
-
-REM Check if Ollama is installed
-where ollama >nul 2>&1
-if errorlevel 1 (
-    echo ⚠️ Ollama not installed - using basic search mode
-    echo.
-    echo 🎯 For Enhanced AI Features:
-    echo    • 📥 Install Ollama: https://ollama.com/download
-    echo    • 🔄 Run: ollama serve  
-    echo    • 🧠 Download model: ollama pull qwen3:1.7b
-    echo.
-    echo 💡 Benefits of AI features:
-    echo    • Smart query expansion for better search results
-    echo    • Interactive exploration mode with conversation memory
-    echo    • AI-powered synthesis of search results  
-    echo    • Natural language understanding of your questions
-    echo.
-    goto :eof
-)
-
-REM Check if Ollama server is running
-curl -s http://localhost:11434/api/version >nul 2>&1
-if errorlevel 1 (
-    echo 🟡 Ollama installed but not running
-    echo.
-    if "!HEADLESS_MODE!"=="true" (
-        echo Headless mode: Starting Ollama server automatically
-        set "start_ollama=y"
-    ) else (
-        set /p "start_ollama=Start Ollama server now? [Y/n]: "
-    )
-    if /i "!start_ollama!" NEQ "n" (
-        echo 🚀 Starting Ollama server...
-        start /b ollama serve
-        timeout /t 3 /nobreak >nul
-        curl -s http://localhost:11434/api/version >nul 2>&1
-        if errorlevel 1 (
-            echo ⚠️ Could not start Ollama automatically
-            echo 💡 Please run: ollama serve
-        ) else (
-            echo ✅ Ollama server started successfully!
-        )
-    )
-) else (
-    echo ✅ Ollama server is running!
-)
-
-REM Check for available models
-echo 🔍 Checking for AI models...
-ollama list 2>nul | findstr /v "NAME" | findstr /v "^$" >nul
-if errorlevel 1 (
-    echo 📦 No AI models found
-    echo.
-    echo 🧠 Recommended Models (choose one):
-    echo    • qwen3:1.7b    - Excellent for RAG (1.4GB, recommended)
-    echo    • qwen3:0.6b    - Lightweight and fast (~500MB)  
-    echo    • qwen3:4b      - Higher quality but slower (~2.5GB)
-    echo.
-    if "!HEADLESS_MODE!"=="true" (
-        echo Headless mode: Skipping model download
-        set "install_model=n"
-    ) else (
-        set /p "install_model=Download qwen3:1.7b model now? [Y/n]: "
-    )
-    if /i "!install_model!" NEQ "n" (
-        echo 📥 Downloading qwen3:1.7b model...
-        echo    This may take 5-10 minutes depending on your internet speed
-        ollama pull qwen3:1.7b
-        if errorlevel 1 (
-            echo ⚠️ Download failed - you can try again later with: ollama pull qwen3:1.7b
-        ) else (
-            echo ✅ Model downloaded successfully! AI features are now available.
-        )
-    )
-) else (
-    echo ✅ AI models found - full AI features available!
-    echo 🎉 Your system supports query expansion, exploration mode, and synthesis!
-)
-goto :eof
-
-:run_tutorial
-echo.
-echo ═══════════════════════════════════════════════════
-echo 🧪 Running Interactive Tutorial
-echo ═══════════════════════════════════════════════════
-echo.
-echo 📚 This tutorial will:
-echo    • Index the FSS-Mini-RAG documentation
-echo    • Show you how to search effectively
-echo    • Demonstrate AI features (if available)
-echo.
-
-call "%SCRIPT_DIR%\.venv\Scripts\activate.bat"
-
-echo 📁 Indexing project for demonstration...
-"%SCRIPT_DIR%\.venv\Scripts\python.exe" rag-mini.py index "%SCRIPT_DIR%" >nul 2>&1
-if errorlevel 1 (
-    echo ❌ Indexing failed - please check the installation
-    goto :eof
-)
-
-echo ✅ Indexing complete! 
-echo.
-echo 🔍 Example search: "embedding"
-"%SCRIPT_DIR%\.venv\Scripts\python.exe" rag-mini.py search "%SCRIPT_DIR%" "embedding" --top-k 3
-echo.
-echo 🎯 Try the interactive interface:
-echo    rag.bat
-echo.
-echo 💡 You can now search any project by indexing it first!
-goto :eof
-
-:setup_windows_icon
-echo 🎨 Setting up application icon and shortcuts...
-
-REM Check if icon exists
-if not exist "%SCRIPT_DIR%\assets\Fss_Mini_Rag.png" (
-    echo ⚠️ Icon file not found - skipping desktop integration
-    goto :eof
-)
-
-REM Create desktop shortcut
-echo 📱 Creating desktop shortcut...
-set "desktop=%USERPROFILE%\Desktop"
-set "shortcut=%desktop%\FSS-Mini-RAG.lnk"
-
-REM Use PowerShell to create shortcut with icon
-powershell -Command "& {$WshShell = New-Object -comObject WScript.Shell; $Shortcut = $WshShell.CreateShortcut('%shortcut%'); $Shortcut.TargetPath = '%SCRIPT_DIR%\rag.bat'; $Shortcut.WorkingDirectory = '%SCRIPT_DIR%'; $Shortcut.Description = 'FSS-Mini-RAG - Fast Semantic Search'; $Shortcut.Save()}" >nul 2>&1
-
-if exist "%shortcut%" (
-    echo ✅ Desktop shortcut created
-) else (
-    echo ⚠️ Could not create desktop shortcut
-)
-
-REM Create Start Menu shortcut
-echo 📂 Creating Start Menu entry...
-set "startmenu=%APPDATA%\Microsoft\Windows\Start Menu\Programs"
-set "startshortcut=%startmenu%\FSS-Mini-RAG.lnk"
-
-powershell -Command "& {$WshShell = New-Object -comObject WScript.Shell; $Shortcut = $WshShell.CreateShortcut('%startshortcut%'); $Shortcut.TargetPath = '%SCRIPT_DIR%\rag.bat'; $Shortcut.WorkingDirectory = '%SCRIPT_DIR%'; $Shortcut.Description = 'FSS-Mini-RAG - Fast Semantic Search'; $Shortcut.Save()}" >nul 2>&1
-
-if exist "%startshortcut%" (
-    echo ✅ Start Menu entry created
-) else (
-    echo ⚠️ Could not create Start Menu entry
-)
-
-echo 💡 FSS-Mini-RAG shortcuts have been created on your Desktop and Start Menu
-echo    You can now launch the application from either location
-goto :eof
--- a/mini_rag/claude_rag/init.py
+++ b/mini_rag/claude_rag/init.py
@ -7,16 +7,16 @@ Designed for portability, efficiency, and simplicity across projects and compute

 __version__ = "2.1.0"

+from .ollama_embeddings import OllamaEmbedder as CodeEmbedder
 from .chunker import CodeChunker
 from .indexer import ProjectIndexer
-from .ollama_embeddings import OllamaEmbedder as CodeEmbedder
 from .search import CodeSearcher
 from .watcher import FileWatcher

 __all__ = [
    "CodeEmbedder",
-    "CodeChunker",
+    "CodeChunker", 
    "ProjectIndexer",
    "CodeSearcher",
    "FileWatcher",
-]
+]
--- a/mini_rag/claude_rag/main.py
+++ b/mini_rag/claude_rag/main.py
@ -2,5 +2,5 @@

 from .cli import cli

-if __name__ == "__main__":
-    cli()
+if __name__ == '__main__':
+    cli()
--- a/mini_rag/claude_rag/auto_optimizer.py
+++ b/mini_rag/claude_rag/auto_optimizer.py
@ -3,188 +3,194 @@ Auto-optimizer for FSS-Mini-RAG.
 Automatically tunes settings based on usage patterns.
 """

-import json
-import logging
-from collections import Counter
 from pathlib import Path
-from typing import Any, Dict
+import json
+from typing import Dict, Any, List
+from collections import Counter
+import logging

 logger = logging.getLogger(__name__)

-
 class AutoOptimizer:
    """Automatically optimizes RAG settings based on project patterns."""
-
+    
    def __init__(self, project_path: Path):
        self.project_path = project_path
-        self.rag_dir = project_path / ".mini-rag"
-        self.config_path = self.rag_dir / "config.json"
-        self.manifest_path = self.rag_dir / "manifest.json"
-
+        self.rag_dir = project_path / '.mini-rag'
+        self.config_path = self.rag_dir / 'config.json'
+        self.manifest_path = self.rag_dir / 'manifest.json'
+    
    def analyze_and_optimize(self) -> Dict[str, Any]:
        """Analyze current patterns and auto-optimize settings."""
-
+        
        if not self.manifest_path.exists():
            return {"error": "No index found - run indexing first"}
-
+        
        # Load current data
        with open(self.manifest_path) as f:
            manifest = json.load(f)
-
+        
        # Analyze patterns
        analysis = self._analyze_patterns(manifest)
-
+        
        # Generate optimizations
        optimizations = self._generate_optimizations(analysis)
-
+        
        # Apply optimizations if beneficial
-        if optimizations["confidence"] > 0.7:
+        if optimizations['confidence'] > 0.7:
            self._apply_optimizations(optimizations)
            return {
                "status": "optimized",
-                "changes": optimizations["changes"],
-                "expected_improvement": optimizations["expected_improvement"],
+                "changes": optimizations['changes'],
+                "expected_improvement": optimizations['expected_improvement']
            }
        else:
            return {
                "status": "no_changes_needed",
                "analysis": analysis,
-                "confidence": optimizations["confidence"],
+                "confidence": optimizations['confidence']
            }
-
+    
    def _analyze_patterns(self, manifest: Dict[str, Any]) -> Dict[str, Any]:
        """Analyze current indexing patterns."""
-        files = manifest.get("files", {})
-
+        files = manifest.get('files', {})
+        
        # Language distribution
        languages = Counter()
        sizes = []
        chunk_ratios = []
-
+        
        for filepath, info in files.items():
-            lang = info.get("language", "unknown")
+            lang = info.get('language', 'unknown')
            languages[lang] += 1
-
-            size = info.get("size", 0)
-            chunks = info.get("chunks", 1)
-
+            
+            size = info.get('size', 0)
+            chunks = info.get('chunks', 1)
+            
            sizes.append(size)
            chunk_ratios.append(chunks / max(1, size / 1000))  # chunks per KB
-
+        
        avg_chunk_ratio = sum(chunk_ratios) / len(chunk_ratios) if chunk_ratios else 1
        avg_size = sum(sizes) / len(sizes) if sizes else 1000
-
+        
        return {
-            "languages": dict(languages.most_common()),
-            "total_files": len(files),
-            "total_chunks": sum(info.get("chunks", 1) for info in files.values()),
-            "avg_chunk_ratio": avg_chunk_ratio,
-            "avg_file_size": avg_size,
-            "large_files": sum(1 for s in sizes if s > 10000),
-            "small_files": sum(1 for s in sizes if s < 500),
+            'languages': dict(languages.most_common()),
+            'total_files': len(files),
+            'total_chunks': sum(info.get('chunks', 1) for info in files.values()),
+            'avg_chunk_ratio': avg_chunk_ratio,
+            'avg_file_size': avg_size,
+            'large_files': sum(1 for s in sizes if s > 10000),
+            'small_files': sum(1 for s in sizes if s < 500)
        }
-
+    
    def _generate_optimizations(self, analysis: Dict[str, Any]) -> Dict[str, Any]:
        """Generate optimization recommendations."""
        changes = []
        confidence = 0.5
        expected_improvement = 0
-
+        
        # Optimize chunking based on dominant language
-        languages = analysis["languages"]
+        languages = analysis['languages']
        if languages:
            dominant_lang, count = list(languages.items())[0]
-            lang_pct = count / analysis["total_files"]
-
+            lang_pct = count / analysis['total_files']
+            
            if lang_pct > 0.3:  # Dominant language >30%
-                if dominant_lang == "python" and analysis["avg_chunk_ratio"] < 1.5:
-                    changes.append(
-                        "Increase Python chunk size to 3000 for better function context"
-                    )
+                if dominant_lang == 'python' and analysis['avg_chunk_ratio'] < 1.5:
+                    changes.append("Increase Python chunk size to 3000 for better function context")
                    confidence += 0.2
                    expected_improvement += 15
-
-                elif dominant_lang == "markdown" and analysis["avg_chunk_ratio"] < 1.2:
+                
+                elif dominant_lang == 'markdown' and analysis['avg_chunk_ratio'] < 1.2:
                    changes.append("Use header-based chunking for Markdown files")
                    confidence += 0.15
                    expected_improvement += 10
-
+        
        # Optimize for large files
-        if analysis["large_files"] > 5:
+        if analysis['large_files'] > 5:
            changes.append("Reduce streaming threshold to 5KB for better large file handling")
            confidence += 0.1
            expected_improvement += 8
-
+        
        # Optimize chunk ratio
-        if analysis["avg_chunk_ratio"] < 1.0:
+        if analysis['avg_chunk_ratio'] < 1.0:
            changes.append("Reduce chunk size for more granular search results")
            confidence += 0.15
            expected_improvement += 12
-        elif analysis["avg_chunk_ratio"] > 3.0:
+        elif analysis['avg_chunk_ratio'] > 3.0:
            changes.append("Increase chunk size to reduce overhead")
            confidence += 0.1
            expected_improvement += 5
-
+        
        # Skip tiny files optimization
-        small_file_pct = analysis["small_files"] / analysis["total_files"]
+        small_file_pct = analysis['small_files'] / analysis['total_files']
        if small_file_pct > 0.3:
            changes.append("Skip files smaller than 300 bytes to improve focus")
            confidence += 0.1
            expected_improvement += 3
-
+        
        return {
-            "changes": changes,
-            "confidence": min(confidence, 1.0),
-            "expected_improvement": expected_improvement,
+            'changes': changes,
+            'confidence': min(confidence, 1.0),
+            'expected_improvement': expected_improvement
        }
-
+    
    def _apply_optimizations(self, optimizations: Dict[str, Any]):
        """Apply the recommended optimizations."""
-
+        
        # Load existing config or create default
        if self.config_path.exists():
            with open(self.config_path) as f:
                config = json.load(f)
        else:
            config = self._get_default_config()
-
-        changes = optimizations["changes"]
-
+        
+        changes = optimizations['changes']
+        
        # Apply changes based on recommendations
        for change in changes:
            if "Python chunk size to 3000" in change:
-                config.setdefault("chunking", {})["max_size"] = 3000
-
+                config.setdefault('chunking', {})['max_size'] = 3000
+                
            elif "header-based chunking" in change:
-                config.setdefault("chunking", {})["strategy"] = "header"
-
+                config.setdefault('chunking', {})['strategy'] = 'header'
+                
            elif "streaming threshold to 5KB" in change:
-                config.setdefault("streaming", {})["threshold_bytes"] = 5120
-
+                config.setdefault('streaming', {})['threshold_bytes'] = 5120
+                
            elif "Reduce chunk size" in change:
-                current_size = config.get("chunking", {}).get("max_size", 2000)
-                config.setdefault("chunking", {})["max_size"] = max(1500, current_size - 500)
-
+                current_size = config.get('chunking', {}).get('max_size', 2000)
+                config.setdefault('chunking', {})['max_size'] = max(1500, current_size - 500)
+                
            elif "Increase chunk size" in change:
-                current_size = config.get("chunking", {}).get("max_size", 2000)
-                config.setdefault("chunking", {})["max_size"] = min(4000, current_size + 500)
-
+                current_size = config.get('chunking', {}).get('max_size', 2000)
+                config.setdefault('chunking', {})['max_size'] = min(4000, current_size + 500)
+                
            elif "Skip files smaller" in change:
-                config.setdefault("files", {})["min_file_size"] = 300
-
+                config.setdefault('files', {})['min_file_size'] = 300
+        
        # Save optimized config
-        config["_auto_optimized"] = True
-        config["_optimization_timestamp"] = json.dumps(None, default=str)
-
-        with open(self.config_path, "w") as f:
+        config['_auto_optimized'] = True
+        config['_optimization_timestamp'] = json.dumps(None, default=str)
+        
+        with open(self.config_path, 'w') as f:
            json.dump(config, f, indent=2)
-
+        
        logger.info(f"Applied {len(changes)} optimizations to {self.config_path}")
-
+    
    def _get_default_config(self) -> Dict[str, Any]:
        """Get default configuration."""
        return {
-            "chunking": {"max_size": 2000, "min_size": 150, "strategy": "semantic"},
-            "streaming": {"enabled": True, "threshold_bytes": 1048576},
-            "files": {"min_file_size": 50},
-        }
+            "chunking": {
+                "max_size": 2000,
+                "min_size": 150,
+                "strategy": "semantic"
+            },
+            "streaming": {
+                "enabled": True,
+                "threshold_bytes": 1048576
+            },
+            "files": {
+                "min_file_size": 50
+            }
+        }
--- a/mini_rag/claude_rag/chunker.py
+++ b/mini_rag/claude_rag/chunker.py
--- a/mini_rag/claude_rag/cli.py
+++ b/mini_rag/claude_rag/cli.py
--- a/mini_rag/claude_rag/config.py
+++ b/mini_rag/claude_rag/config.py
@ -0,0 +1,216 @@
+"""
+Configuration management for FSS-Mini-RAG.
+Handles loading, saving, and validation of YAML config files.
+"""
+
+import yaml
+import logging
+from pathlib import Path
+from typing import Dict, Any, Optional
+from dataclasses import dataclass, asdict
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class ChunkingConfig:
+    """Configuration for text chunking."""
+    max_size: int = 2000
+    min_size: int = 150
+    strategy: str = "semantic"  # "semantic" or "fixed"
+
+
+@dataclass
+class StreamingConfig:
+    """Configuration for large file streaming."""
+    enabled: bool = True
+    threshold_bytes: int = 1048576  # 1MB
+
+
+@dataclass
+class FilesConfig:
+    """Configuration for file processing."""
+    min_file_size: int = 50
+    exclude_patterns: list = None
+    include_patterns: list = None
+    
+    def __post_init__(self):
+        if self.exclude_patterns is None:
+            self.exclude_patterns = [
+                "node_modules/**",
+                ".git/**", 
+                "__pycache__/**",
+                "*.pyc",
+                ".venv/**",
+                "venv/**",
+                "build/**",
+                "dist/**"
+            ]
+        if self.include_patterns is None:
+            self.include_patterns = ["**/*"]  # Include everything by default
+
+
+@dataclass
+class EmbeddingConfig:
+    """Configuration for embedding generation."""
+    preferred_method: str = "ollama"  # "ollama", "ml", "hash", "auto"
+    ollama_model: str = "nomic-embed-text"
+    ollama_host: str = "localhost:11434"
+    ml_model: str = "sentence-transformers/all-MiniLM-L6-v2"
+    batch_size: int = 32
+
+
+@dataclass
+class SearchConfig:
+    """Configuration for search behavior."""
+    default_limit: int = 10
+    enable_bm25: bool = True
+    similarity_threshold: float = 0.1
+
+
+@dataclass
+class RAGConfig:
+    """Main RAG system configuration."""
+    chunking: ChunkingConfig = None
+    streaming: StreamingConfig = None  
+    files: FilesConfig = None
+    embedding: EmbeddingConfig = None
+    search: SearchConfig = None
+    
+    def __post_init__(self):
+        if self.chunking is None:
+            self.chunking = ChunkingConfig()
+        if self.streaming is None:
+            self.streaming = StreamingConfig()
+        if self.files is None:
+            self.files = FilesConfig()
+        if self.embedding is None:
+            self.embedding = EmbeddingConfig()
+        if self.search is None:
+            self.search = SearchConfig()
+
+
+class ConfigManager:
+    """Manages configuration loading, saving, and validation."""
+    
+    def __init__(self, project_path: Path):
+        self.project_path = Path(project_path)
+        self.rag_dir = self.project_path / '.mini-rag'
+        self.config_path = self.rag_dir / 'config.yaml'
+        
+    def load_config(self) -> RAGConfig:
+        """Load configuration from YAML file or create default."""
+        if not self.config_path.exists():
+            logger.info(f"No config found at {self.config_path}, creating default")
+            config = RAGConfig()
+            self.save_config(config)
+            return config
+            
+        try:
+            with open(self.config_path, 'r') as f:
+                data = yaml.safe_load(f)
+                
+            if not data:
+                logger.warning("Empty config file, using defaults")
+                return RAGConfig()
+                
+            # Convert nested dicts back to dataclass instances
+            config = RAGConfig()
+            
+            if 'chunking' in data:
+                config.chunking = ChunkingConfig(**data['chunking'])
+            if 'streaming' in data:
+                config.streaming = StreamingConfig(**data['streaming'])
+            if 'files' in data:
+                config.files = FilesConfig(**data['files'])
+            if 'embedding' in data:
+                config.embedding = EmbeddingConfig(**data['embedding'])
+            if 'search' in data:
+                config.search = SearchConfig(**data['search'])
+                
+            return config
+            
+        except Exception as e:
+            logger.error(f"Failed to load config from {self.config_path}: {e}")
+            logger.info("Using default configuration")
+            return RAGConfig()
+    
+    def save_config(self, config: RAGConfig):
+        """Save configuration to YAML file with comments."""
+        try:
+            self.rag_dir.mkdir(exist_ok=True)
+            
+            # Convert to dict for YAML serialization
+            config_dict = asdict(config)
+            
+            # Create YAML content with comments
+            yaml_content = self._create_yaml_with_comments(config_dict)
+            
+            with open(self.config_path, 'w') as f:
+                f.write(yaml_content)
+                
+            logger.info(f"Configuration saved to {self.config_path}")
+            
+        except Exception as e:
+            logger.error(f"Failed to save config to {self.config_path}: {e}")
+    
+    def _create_yaml_with_comments(self, config_dict: Dict[str, Any]) -> str:
+        """Create YAML content with helpful comments."""
+        yaml_lines = [
+            "# FSS-Mini-RAG Configuration",
+            "# Edit this file to customize indexing and search behavior",
+            "# See docs/GETTING_STARTED.md for detailed explanations",
+            "",
+            "# Text chunking settings",
+            "chunking:",
+            f"  max_size: {config_dict['chunking']['max_size']}      # Maximum characters per chunk",
+            f"  min_size: {config_dict['chunking']['min_size']}       # Minimum characters per chunk", 
+            f"  strategy: {config_dict['chunking']['strategy']}    # 'semantic' (language-aware) or 'fixed'",
+            "",
+            "# Large file streaming settings", 
+            "streaming:",
+            f"  enabled: {str(config_dict['streaming']['enabled']).lower()}",
+            f"  threshold_bytes: {config_dict['streaming']['threshold_bytes']}  # Files larger than this use streaming (1MB)",
+            "",
+            "# File processing settings",
+            "files:",
+            f"  min_file_size: {config_dict['files']['min_file_size']}        # Skip files smaller than this",
+            "  exclude_patterns:",
+        ]
+        
+        for pattern in config_dict['files']['exclude_patterns']:
+            yaml_lines.append(f"    - \"{pattern}\"")
+        
+        yaml_lines.extend([
+            "  include_patterns:",
+            "    - \"**/*\"                  # Include all files by default",
+            "",
+            "# Embedding generation settings",
+            "embedding:",
+            f"  preferred_method: {config_dict['embedding']['preferred_method']}     # 'ollama', 'ml', 'hash', or 'auto'",
+            f"  ollama_model: {config_dict['embedding']['ollama_model']}",
+            f"  ollama_host: {config_dict['embedding']['ollama_host']}",
+            f"  ml_model: {config_dict['embedding']['ml_model']}",
+            f"  batch_size: {config_dict['embedding']['batch_size']}               # Embeddings processed per batch",
+            "",
+            "# Search behavior settings", 
+            "search:",
+            f"  default_limit: {config_dict['search']['default_limit']}           # Default number of results",
+            f"  enable_bm25: {str(config_dict['search']['enable_bm25']).lower()}             # Enable keyword matching boost",
+            f"  similarity_threshold: {config_dict['search']['similarity_threshold']}        # Minimum similarity score",
+        ])
+        
+        return '\n'.join(yaml_lines)
+    
+    def update_config(self, **kwargs) -> RAGConfig:
+        """Update specific configuration values."""
+        config = self.load_config()
+        
+        for key, value in kwargs.items():
+            if hasattr(config, key):
+                setattr(config, key, value)
+            else:
+                logger.warning(f"Unknown config key: {key}")
+        
+        self.save_config(config)
+        return config
--- a/mini_rag/claude_rag/fast_server.py
+++ b/mini_rag/claude_rag/fast_server.py
--- a/mini_rag/claude_rag/indexer.py
+++ b/mini_rag/claude_rag/indexer.py
--- a/mini_rag/claude_rag/non_invasive_watcher.py
+++ b/mini_rag/claude_rag/non_invasive_watcher.py
@ -3,16 +3,16 @@ Non-invasive file watcher designed to not interfere with development workflows.
 Uses minimal resources and gracefully handles high-load scenarios.
 """

-import logging
-import queue
-import threading
+import os
 import time
-from datetime import datetime
+import logging
+import threading
+import queue
 from pathlib import Path
 from typing import Optional, Set
-
-from watchdog.events import DirModifiedEvent, FileSystemEventHandler
+from datetime import datetime
 from watchdog.observers import Observer
+from watchdog.events import FileSystemEventHandler, DirModifiedEvent

 from .indexer import ProjectIndexer

@ -21,7 +21,7 @@ logger = logging.getLogger(__name__)

 class NonInvasiveQueue:
    """Ultra-lightweight queue with aggressive deduplication and backoff."""
-
+    
    def __init__(self, delay: float = 5.0, max_queue_size: int = 100):
        self.queue = queue.Queue(maxsize=max_queue_size)
        self.pending = set()
@ -29,28 +29,28 @@ class NonInvasiveQueue:
        self.delay = delay
        self.last_update = {}
        self.dropped_count = 0
-
+    
    def add(self, file_path: Path) -> bool:
        """Add file to queue with aggressive filtering."""
        with self.lock:
            file_str = str(file_path)
            current_time = time.time()
-
+            
            # Skip if recently processed
            if file_str in self.last_update:
                if current_time - self.last_update[file_str] < self.delay:
                    return False
-
+            
            # Skip if already pending
            if file_str in self.pending:
                return False
-
+            
            # Skip if queue is getting full (backpressure)
            if self.queue.qsize() > self.queue.maxsize * 0.8:
                self.dropped_count += 1
                logger.debug(f"Dropping update for {file_str} - queue overloaded")
                return False
-
+            
            try:
                self.queue.put_nowait(file_path)
                self.pending.add(file_str)
@ -59,7 +59,7 @@ class NonInvasiveQueue:
            except queue.Full:
                self.dropped_count += 1
                return False
-
+    
    def get(self, timeout: float = 0.1) -> Optional[Path]:
        """Get next file with very short timeout."""
        try:
@ -73,87 +73,77 @@ class NonInvasiveQueue:

 class MinimalEventHandler(FileSystemEventHandler):
    """Minimal event handler that only watches for meaningful changes."""
-
-    def __init__(
-        self,
-        update_queue: NonInvasiveQueue,
-        include_patterns: Set[str],
-        exclude_patterns: Set[str],
-    ):
+    
+    def __init__(self, 
+                 update_queue: NonInvasiveQueue,
+                 include_patterns: Set[str],
+                 exclude_patterns: Set[str]):
        self.update_queue = update_queue
        self.include_patterns = include_patterns
        self.exclude_patterns = exclude_patterns
        self.last_event_time = {}
-
+        
    def _should_process(self, file_path: str) -> bool:
        """Ultra-conservative file filtering."""
        path = Path(file_path)
-
+        
        # Only process files, not directories
        if not path.is_file():
            return False
-
+        
        # Skip if too large (>1MB)
        try:
            if path.stat().st_size > 1024 * 1024:
                return False
        except (OSError, PermissionError):
            return False
-
+        
        # Skip temporary and system files
        name = path.name
-        if (
-            name.startswith(".")
-            or name.startswith("~")
-            or name.endswith(".tmp")
-            or name.endswith(".swp")
-            or name.endswith(".lock")
-        ):
+        if (name.startswith('.') or 
+            name.startswith('~') or 
+            name.endswith('.tmp') or
+            name.endswith('.swp') or
+            name.endswith('.lock')):
            return False
-
+        
        # Check exclude patterns first (faster)
        path_str = str(path)
        for pattern in self.exclude_patterns:
            if pattern in path_str:
                return False
-
+        
        # Check include patterns
        for pattern in self.include_patterns:
            if path.match(pattern):
                return True
-
+        
        return False
-
+    
    def _rate_limit_event(self, file_path: str) -> bool:
        """Rate limit events per file."""
        current_time = time.time()
        if file_path in self.last_event_time:
-            if (
-                current_time - self.last_event_time[file_path] < 2.0
-            ):  # 2 second cooldown per file
+            if current_time - self.last_event_time[file_path] < 2.0:  # 2 second cooldown per file
                return False
-
+        
        self.last_event_time[file_path] = current_time
        return True
-
+    
    def on_modified(self, event):
        """Handle file modifications with minimal overhead."""
-        if (
-            not event.is_directory
-            and self._should_process(event.src_path)
-            and self._rate_limit_event(event.src_path)
-        ):
+        if (not event.is_directory and 
+            self._should_process(event.src_path) and
+            self._rate_limit_event(event.src_path)):
            self.update_queue.add(Path(event.src_path))
-
+    
    def on_created(self, event):
        """Handle file creation."""
-        if (
-            not event.is_directory
-            and self._should_process(event.src_path)
-            and self._rate_limit_event(event.src_path)
-        ):
+        if (not event.is_directory and 
+            self._should_process(event.src_path) and
+            self._rate_limit_event(event.src_path)):
            self.update_queue.add(Path(event.src_path))
-
+    
    def on_deleted(self, event):
        """Handle file deletion."""
        if not event.is_directory and self._rate_limit_event(event.src_path):
@ -167,17 +157,15 @@ class MinimalEventHandler(FileSystemEventHandler):

 class NonInvasiveFileWatcher:
    """Non-invasive file watcher that prioritizes system stability."""
-
-    def __init__(
-        self,
-        project_path: Path,
-        indexer: Optional[ProjectIndexer] = None,
-        cpu_limit: float = 0.1,  # Max 10% CPU usage
-        max_memory_mb: int = 50,
-    ):  # Max 50MB memory
+    
+    def __init__(self, 
+                 project_path: Path,
+                 indexer: Optional[ProjectIndexer] = None,
+                 cpu_limit: float = 0.1,  # Max 10% CPU usage
+                 max_memory_mb: int = 50):  # Max 50MB memory
        """
        Initialize non-invasive watcher.
-
+        
        Args:
            project_path: Path to watch
            indexer: ProjectIndexer instance
@ -188,173 +176,158 @@ class NonInvasiveFileWatcher:
        self.indexer = indexer or ProjectIndexer(self.project_path)
        self.cpu_limit = cpu_limit
        self.max_memory_mb = max_memory_mb
-
+        
        # Initialize components with conservative settings
-        self.update_queue = NonInvasiveQueue(
-            delay=10.0, max_queue_size=50
-        )  # Very conservative
+        self.update_queue = NonInvasiveQueue(delay=10.0, max_queue_size=50)  # Very conservative
        self.observer = Observer()
        self.worker_thread = None
        self.running = False
-
+        
        # Get patterns from indexer
        self.include_patterns = set(self.indexer.include_patterns)
        self.exclude_patterns = set(self.indexer.exclude_patterns)
-
+        
        # Add more aggressive exclusions
-        self.exclude_patterns.update(
-            {
-                "__pycache__",
-                ".git",
-                "node_modules",
-                ".venv",
-                "venv",
-                "dist",
-                "build",
-                "target",
-                ".idea",
-                ".vscode",
-                ".pytest_cache",
-                "coverage",
-                "htmlcov",
-                ".coverage",
-                ".mypy_cache",
-                ".tox",
-                "logs",
-                "log",
-                "tmp",
-                "temp",
-                ".DS_Store",
-            }
-        )
-
+        self.exclude_patterns.update({
+            '__pycache__', '.git', 'node_modules', '.venv', 'venv',
+            'dist', 'build', 'target', '.idea', '.vscode', '.pytest_cache',
+            'coverage', 'htmlcov', '.coverage', '.mypy_cache', '.tox',
+            'logs', 'log', 'tmp', 'temp', '.DS_Store'
+        })
+        
        # Stats
        self.stats = {
-            "files_processed": 0,
-            "files_dropped": 0,
-            "cpu_throttle_count": 0,
-            "started_at": None,
+            'files_processed': 0,
+            'files_dropped': 0,
+            'cpu_throttle_count': 0,
+            'started_at': None,
        }
-
+    
    def start(self):
        """Start non-invasive watching."""
        if self.running:
            return
-
+        
        logger.info(f"Starting non-invasive file watcher for {self.project_path}")
-
+        
        # Set up minimal event handler
        event_handler = MinimalEventHandler(
-            self.update_queue, self.include_patterns, self.exclude_patterns
+            self.update_queue,
+            self.include_patterns,
+            self.exclude_patterns
        )
-
+        
        # Schedule with recursive watching
-        self.observer.schedule(event_handler, str(self.project_path), recursive=True)
-
+        self.observer.schedule(
+            event_handler,
+            str(self.project_path),
+            recursive=True
+        )
+        
        # Start low-priority worker thread
        self.running = True
        self.worker_thread = threading.Thread(
-            target=self._process_updates_gently, daemon=True, name="RAG-FileWatcher"
+            target=self._process_updates_gently,
+            daemon=True,
+            name="RAG-FileWatcher"
        )
        # Set lowest priority
        self.worker_thread.start()
-
+        
        # Start observer
        self.observer.start()
-
-        self.stats["started_at"] = datetime.now()
+        
+        self.stats['started_at'] = datetime.now()
        logger.info("Non-invasive file watcher started")
-
+    
    def stop(self):
        """Stop watching gracefully."""
        if not self.running:
            return
-
+        
        logger.info("Stopping non-invasive file watcher...")
-
+        
        # Stop observer first
        self.observer.stop()
        self.observer.join(timeout=2.0)  # Don't wait too long
-
+        
        # Stop worker thread
        self.running = False
        if self.worker_thread and self.worker_thread.is_alive():
            self.worker_thread.join(timeout=3.0)  # Don't block shutdown
-
+        
        logger.info("Non-invasive file watcher stopped")
-
+    
    def _process_updates_gently(self):
        """Process updates with extreme care not to interfere."""
        logger.debug("Non-invasive update processor started")
-
+        
        process_start_time = time.time()
-
+        
        while self.running:
            try:
                # Yield CPU frequently
                time.sleep(0.5)  # Always sleep between operations
-
+                
                # Get next file with very short timeout
                file_path = self.update_queue.get(timeout=0.1)
-
+                
                if file_path:
                    # Check CPU usage before processing
                    current_time = time.time()
                    elapsed = current_time - process_start_time
-
+                    
                    # Simple CPU throttling: if we've been working too much, back off
                    if elapsed > 0:
                        # If we're consuming too much time, throttle aggressively
                        work_ratio = 0.1  # Assume we use 10% of time in this check
                        if work_ratio > self.cpu_limit:
-                            self.stats["cpu_throttle_count"] += 1
+                            self.stats['cpu_throttle_count'] += 1
                            time.sleep(2.0)  # Back off significantly
                            continue
-
+                    
                    # Process single file with error isolation
                    try:
                        if file_path.exists():
                            success = self.indexer.update_file(file_path)
                        else:
                            success = self.indexer.delete_file(file_path)
-
+                        
                        if success:
-                            self.stats["files_processed"] += 1
-
+                            self.stats['files_processed'] += 1
+                        
                        # Always yield CPU after processing
                        time.sleep(0.1)
-
+                        
                    except Exception as e:
-                        logger.debug(
-                            f"Non-invasive watcher: failed to process {file_path}: {e}"
-                        )
+                        logger.debug(f"Non-invasive watcher: failed to process {file_path}: {e}")
                        # Don't let errors propagate - just continue
                        continue
-
+                
                # Update dropped count from queue
-                self.stats["files_dropped"] = self.update_queue.dropped_count
-
+                self.stats['files_dropped'] = self.update_queue.dropped_count
+                
            except Exception as e:
                logger.debug(f"Non-invasive watcher error: {e}")
                time.sleep(1.0)  # Back off on errors
-
+        
        logger.debug("Non-invasive update processor stopped")
-
+    
    def get_statistics(self) -> dict:
        """Get non-invasive watcher statistics."""
        stats = self.stats.copy()
-        stats["queue_size"] = self.update_queue.queue.qsize()
-        stats["running"] = self.running
-
-        if stats["started_at"]:
-            uptime = datetime.now() - stats["started_at"]
-            stats["uptime_seconds"] = uptime.total_seconds()
-
+        stats['queue_size'] = self.update_queue.queue.qsize()
+        stats['running'] = self.running
+        
+        if stats['started_at']:
+            uptime = datetime.now() - stats['started_at']
+            stats['uptime_seconds'] = uptime.total_seconds()
+        
        return stats
-
+    
    def __enter__(self):
        self.start()
        return self
-
+    
    def __exit__(self, exc_type, exc_val, exc_tb):
-        self.stop()
+        self.stop()
--- a/mini_rag/claude_rag/ollama_embeddings.py
+++ b/mini_rag/claude_rag/ollama_embeddings.py
@ -3,14 +3,15 @@ Hybrid code embedding module - Ollama primary with ML fallback.
 Tries Ollama first, falls back to local ML stack if needed.
 """

-import logging
-import time
-from concurrent.futures import ThreadPoolExecutor
-from functools import lru_cache
-from typing import Any, Dict, List, Optional, Union
-
-import numpy as np
 import requests
+import numpy as np
+from typing import List, Union, Optional, Dict, Any
+import logging
+from functools import lru_cache
+import time
+import json
+from concurrent.futures import ThreadPoolExecutor
+import threading

 logger = logging.getLogger(__name__)

@ -18,9 +19,8 @@ logger = logging.getLogger(__name__)
 FALLBACK_AVAILABLE = False
 try:
    import torch
+    from transformers import AutoTokenizer, AutoModel
    from sentence_transformers import SentenceTransformer
-    from transformers import AutoModel, AutoTokenizer
-
    FALLBACK_AVAILABLE = True
    logger.debug("ML fallback dependencies available")
 except ImportError:
@ -29,16 +29,12 @@ except ImportError:

 class OllamaEmbedder:
    """Hybrid embeddings: Ollama primary with ML fallback."""
-
-    def __init__(
-        self,
-        model_name: str = "nomic-embed-text:latest",
-        base_url: str = "http://localhost:11434",
-        enable_fallback: bool = True,
-    ):
+    
+    def __init__(self, model_name: str = "nomic-embed-text:latest", base_url: str = "http://localhost:11434", 
+                 enable_fallback: bool = True):
        """
        Initialize the hybrid embedder.
-
+        
        Args:
            model_name: Ollama model to use for embeddings
            base_url: Base URL for Ollama API
@ -48,15 +44,15 @@ class OllamaEmbedder:
        self.base_url = base_url
        self.embedding_dim = 768  # Standard for nomic-embed-text
        self.enable_fallback = enable_fallback and FALLBACK_AVAILABLE
-
+        
        # State tracking
        self.ollama_available = False
        self.fallback_embedder = None
        self.mode = "unknown"  # "ollama", "fallback", or "hash"
-
+        
        # Try to initialize Ollama first
        self._initialize_providers()
-
+        
    def _initialize_providers(self):
        """Initialize embedding providers in priority order."""
        # Try Ollama first
@ -68,15 +64,13 @@ class OllamaEmbedder:
        except Exception as e:
            logger.debug(f"Ollama not available: {e}")
            self.ollama_available = False
-
+            
            # Try ML fallback
            if self.enable_fallback:
                try:
                    self._initialize_fallback_embedder()
                    self.mode = "fallback"
-                    logger.info(
-                        f"✅ ML fallback active: {self.fallback_embedder.model_type if hasattr(self.fallback_embedder, 'model_type') else 'transformer'}"
-                    )
+                    logger.info(f"✅ ML fallback active: {self.fallback_embedder.model_type if hasattr(self.fallback_embedder, 'model_type') else 'transformer'}")
                except Exception as fallback_error:
                    logger.warning(f"ML fallback failed: {fallback_error}")
                    self.mode = "hash"
@ -84,58 +78,34 @@ class OllamaEmbedder:
            else:
                self.mode = "hash"
                logger.info("⚠️ Using hash-based embeddings (no fallback enabled)")
-
+    
    def _verify_ollama_connection(self):
        """Verify Ollama server is running and model is available."""
-        try:
-            # Check server status
-            response = requests.get(f"{self.base_url}/api/tags", timeout=5)
-            response.raise_for_status()
-        except requests.exceptions.ConnectionError:
-            print("🔌 Ollama Service Unavailable")
-            print("   Ollama provides AI embeddings that make semantic search possible")
-            print("   Start Ollama: ollama serve")
-            print("   Install models: ollama pull nomic-embed-text")
-            print()
-            raise ConnectionError("Ollama service not running. Start with: ollama serve")
-        except requests.exceptions.Timeout:
-            print("⏱️ Ollama Service Timeout")
-            print("   Ollama is taking too long to respond")
-            print("   Check if Ollama is overloaded: ollama ps")
-            print("   Restart if needed: killall ollama && ollama serve")
-            print()
-            raise ConnectionError("Ollama service timeout")
-
+        # Check server status
+        response = requests.get(f"{self.base_url}/api/tags", timeout=5)
+        response.raise_for_status()
+        
        # Check if our model is available
-        models = response.json().get("models", [])
-        model_names = [model["name"] for model in models]
-
+        models = response.json().get('models', [])
+        model_names = [model['name'] for model in models]
+        
        if self.model_name not in model_names:
-            print(f"📦 Model '{self.model_name}' Not Found")
-            print("   Embedding models convert text into searchable vectors")
-            print(f"   Download model: ollama pull {self.model_name}")
-            if model_names:
-                print(f"   Available models: {', '.join(model_names[:3])}")
-            print()
+            logger.warning(f"Model {self.model_name} not found. Available: {model_names}")
            # Try to pull the model
            self._pull_model()
-
+        
    def _initialize_fallback_embedder(self):
        """Initialize the ML fallback embedder."""
        if not FALLBACK_AVAILABLE:
            raise RuntimeError("ML dependencies not available for fallback")
-
+        
        # Try lightweight models first for better compatibility
        fallback_models = [
-            (
-                "sentence-transformers/all-MiniLM-L6-v2",
-                384,
-                self._init_sentence_transformer,
-            ),
+            ("sentence-transformers/all-MiniLM-L6-v2", 384, self._init_sentence_transformer),
            ("microsoft/codebert-base", 768, self._init_transformer_model),
            ("microsoft/unixcoder-base", 768, self._init_transformer_model),
        ]
-
+        
        for model_name, dim, init_func in fallback_models:
            try:
                init_func(model_name)
@ -145,33 +115,31 @@ class OllamaEmbedder:
            except Exception as e:
                logger.debug(f"Failed to load {model_name}: {e}")
                continue
-
+                
        raise RuntimeError("Could not initialize any fallback embedding model")
-
+    
    def _init_sentence_transformer(self, model_name: str):
        """Initialize sentence-transformers model."""
        self.fallback_embedder = SentenceTransformer(model_name)
-        self.fallback_embedder.model_type = "sentence_transformer"
-
+        self.fallback_embedder.model_type = 'sentence_transformer'
+        
    def _init_transformer_model(self, model_name: str):
        """Initialize transformer model."""
-        device = "cuda" if torch.cuda.is_available() else "cpu"
+        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModel.from_pretrained(model_name).to(device)
        model.eval()
-
+        
        # Create a simple wrapper
-
        class TransformerWrapper:
-
            def __init__(self, model, tokenizer, device):
                self.model = model
                self.tokenizer = tokenizer
                self.device = device
-                self.model_type = "transformer"
-
+                self.model_type = 'transformer'
+        
        self.fallback_embedder = TransformerWrapper(model, tokenizer, device)
-
+    
    def _pull_model(self):
        """Pull the embedding model if not available."""
        logger.info(f"Pulling model {self.model_name}...")
@ -179,13 +147,13 @@ class OllamaEmbedder:
            response = requests.post(
                f"{self.base_url}/api/pull",
                json={"name": self.model_name},
-                timeout=300,  # 5 minutes for model download
+                timeout=300  # 5 minutes for model download
            )
            response.raise_for_status()
            logger.info(f"Successfully pulled {self.model_name}")
        except requests.exceptions.RequestException as e:
            raise RuntimeError(f"Failed to pull model {self.model_name}: {e}")
-
+    
    def _get_embedding(self, text: str) -> np.ndarray:
        """Get embedding using the best available provider."""
        if self.mode == "ollama" and self.ollama_available:
@ -195,25 +163,28 @@ class OllamaEmbedder:
        else:
            # Hash fallback
            return self._hash_embedding(text)
-
+    
    def _get_ollama_embedding(self, text: str) -> np.ndarray:
        """Get embedding from Ollama API."""
        try:
            response = requests.post(
                f"{self.base_url}/api/embeddings",
-                json={"model": self.model_name, "prompt": text},
-                timeout=30,
+                json={
+                    "model": self.model_name,
+                    "prompt": text
+                },
+                timeout=30
            )
            response.raise_for_status()
-
+            
            result = response.json()
-            embedding = result.get("embedding", [])
-
+            embedding = result.get('embedding', [])
+            
            if not embedding:
                raise ValueError("No embedding returned from Ollama")
-
+            
            return np.array(embedding, dtype=np.float32)
-
+            
        except requests.exceptions.RequestException as e:
            logger.error(f"Ollama API request failed: {e}")
            # Degrade gracefully - try fallback if available
@ -225,88 +196,82 @@ class OllamaEmbedder:
        except (ValueError, KeyError) as e:
            logger.error(f"Invalid response from Ollama: {e}")
            return self._hash_embedding(text)
-
+    
    def _get_fallback_embedding(self, text: str) -> np.ndarray:
        """Get embedding from ML fallback."""
        try:
-            if self.fallback_embedder.model_type == "sentence_transformer":
+            if self.fallback_embedder.model_type == 'sentence_transformer':
                embedding = self.fallback_embedder.encode([text], convert_to_numpy=True)[0]
                return embedding.astype(np.float32)
-
-            elif self.fallback_embedder.model_type == "transformer":
+            
+            elif self.fallback_embedder.model_type == 'transformer':
                # Tokenize and generate embedding
                inputs = self.fallback_embedder.tokenizer(
-                    text,
-                    padding=True,
-                    truncation=True,
+                    text, 
+                    padding=True, 
+                    truncation=True, 
                    max_length=512,
-                    return_tensors="pt",
+                    return_tensors="pt"
                ).to(self.fallback_embedder.device)
-
+                
                with torch.no_grad():
                    outputs = self.fallback_embedder.model(**inputs)
-
+                    
                    # Use pooler output if available, otherwise mean pooling
-                    if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
+                    if hasattr(outputs, 'pooler_output') and outputs.pooler_output is not None:
                        embedding = outputs.pooler_output[0]
                    else:
                        # Mean pooling over sequence length
-                        attention_mask = inputs["attention_mask"]
+                        attention_mask = inputs['attention_mask']
                        token_embeddings = outputs.last_hidden_state[0]
-
+                        
                        # Mask and average
-                        input_mask_expanded = (
-                            attention_mask.unsqueeze(-1)
-                            .expand(token_embeddings.size())
-                            .float()
-                        )
+                        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
                        sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 0)
                        sum_mask = torch.clamp(input_mask_expanded.sum(0), min=1e-9)
                        embedding = sum_embeddings / sum_mask
-
+                
                return embedding.cpu().numpy().astype(np.float32)
-
+            
            else:
-                raise ValueError(
-                    f"Unknown fallback model type: {self.fallback_embedder.model_type}"
-                )
-
+                raise ValueError(f"Unknown fallback model type: {self.fallback_embedder.model_type}")
+                
        except Exception as e:
            logger.error(f"Fallback embedding failed: {e}")
            return self._hash_embedding(text)
-
+    
    def _hash_embedding(self, text: str) -> np.ndarray:
        """Generate deterministic hash-based embedding as fallback."""
        import hashlib
-
+        
        # Create deterministic hash
-        hash_obj = hashlib.sha256(text.encode("utf-8"))
+        hash_obj = hashlib.sha256(text.encode('utf-8'))
        hash_bytes = hash_obj.digest()
-
+        
        # Convert to numbers and normalize
        hash_nums = np.frombuffer(hash_bytes, dtype=np.uint8)
-
+        
        # Expand to target dimension using repetition
        while len(hash_nums) < self.embedding_dim:
            hash_nums = np.concatenate([hash_nums, hash_nums])
-
+        
        # Take exactly the dimension we need
-        embedding = hash_nums[: self.embedding_dim].astype(np.float32)
-
+        embedding = hash_nums[:self.embedding_dim].astype(np.float32)
+        
        # Normalize to [-1, 1] range
        embedding = (embedding / 127.5) - 1.0
-
+        
        logger.debug(f"Using hash fallback embedding for text: {text[:50]}...")
        return embedding
-
+    
    def embed_code(self, code: Union[str, List[str]], language: str = "python") -> np.ndarray:
        """
        Generate embeddings for code snippet(s).
-
+        
        Args:
            code: Single code string or list of code strings
            language: Programming language (used for context)
-
+            
        Returns:
            Embedding vector(s) as numpy array
        """
@ -315,22 +280,22 @@ class OllamaEmbedder:
            single_input = True
        else:
            single_input = False
-
+        
        # Preprocess code for better embeddings
        processed_code = [self._preprocess_code(c, language) for c in code]
-
+        
        # Generate embeddings
        embeddings = []
        for text in processed_code:
            embedding = self._get_embedding(text)
            embeddings.append(embedding)
-
+        
        embeddings = np.array(embeddings, dtype=np.float32)
-
+        
        if single_input:
            return embeddings[0]
        return embeddings
-
+    
    def _preprocess_code(self, code: str, language: str = "python") -> str:
        """
        Preprocess code for better embedding quality.
@ -338,25 +303,25 @@ class OllamaEmbedder:
        """
        # Remove leading/trailing whitespace
        code = code.strip()
-
+        
        # Normalize whitespace but preserve structure
-        lines = code.split("\n")
+        lines = code.split('\n')
        processed_lines = []
-
+        
        for line in lines:
            # Remove trailing whitespace
            line = line.rstrip()
            # Keep non-empty lines
            if line:
                processed_lines.append(line)
-
-        cleaned_code = "\n".join(processed_lines)
-
+        
+        cleaned_code = '\n'.join(processed_lines)
+        
        # Add language context for better embeddings
        if language and cleaned_code:
            return f"```{language}\n{cleaned_code}\n```"
        return cleaned_code
-
+    
    @lru_cache(maxsize=1000)
    def embed_query(self, query: str) -> np.ndarray:
        """
@ -366,151 +331,91 @@ class OllamaEmbedder:
        # Enhance query for code search
        enhanced_query = f"Search for code related to: {query}"
        return self._get_embedding(enhanced_query)
-
+    
    def batch_embed_files(self, file_contents: List[dict], max_workers: int = 4) -> List[dict]:
        """
        Embed multiple files efficiently using concurrent requests to Ollama.
-
+        
        Args:
            file_contents: List of dicts with 'content' and optionally 'language' keys
            max_workers: Maximum number of concurrent Ollama requests
-
+            
        Returns:
            List of dicts with added 'embedding' key (preserves original order)
        """
        if not file_contents:
            return []
-
+        
        # For small batches, use sequential processing to avoid overhead
        if len(file_contents) <= 2:
            return self._batch_embed_sequential(file_contents)
-
-        # For very large batches, use chunked processing to prevent memory issues
-        if len(file_contents) > 500:  # Process in chunks to manage memory
-            return self._batch_embed_chunked(file_contents, max_workers)
-
+        
        return self._batch_embed_concurrent(file_contents, max_workers)
-
+    
    def _batch_embed_sequential(self, file_contents: List[dict]) -> List[dict]:
        """Sequential processing for small batches."""
        results = []
        for file_dict in file_contents:
-            content = file_dict["content"]
-            language = file_dict.get("language", "python")
+            content = file_dict['content']
+            language = file_dict.get('language', 'python')
            embedding = self.embed_code(content, language)
-
+            
            result = file_dict.copy()
-            result["embedding"] = embedding
+            result['embedding'] = embedding
            results.append(result)
-
+        
        return results
-
-    def _batch_embed_concurrent(
-        self, file_contents: List[dict], max_workers: int
-    ) -> List[dict]:
+    
+    def _batch_embed_concurrent(self, file_contents: List[dict], max_workers: int) -> List[dict]:
        """Concurrent processing for larger batches."""
-
        def embed_single(item_with_index):
            index, file_dict = item_with_index
-            content = file_dict["content"]
-            language = file_dict.get("language", "python")
-
+            content = file_dict['content']
+            language = file_dict.get('language', 'python')
+            
            try:
                embedding = self.embed_code(content, language)
                result = file_dict.copy()
-                result["embedding"] = embedding
+                result['embedding'] = embedding
                return index, result
            except Exception as e:
                logger.error(f"Failed to embed content at index {index}: {e}")
                # Return with hash fallback
                result = file_dict.copy()
-                result["embedding"] = self._hash_embedding(content)
+                result['embedding'] = self._hash_embedding(content)
                return index, result
-
+        
        # Create indexed items to preserve order
        indexed_items = list(enumerate(file_contents))
-
+        
        # Process concurrently
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            indexed_results = list(executor.map(embed_single, indexed_items))
-
+        
        # Sort by original index and extract results
        indexed_results.sort(key=lambda x: x[0])
        return [result for _, result in indexed_results]
-
-    def _batch_embed_chunked(
-        self, file_contents: List[dict], max_workers: int, chunk_size: int = 200
-    ) -> List[dict]:
-        """
-        Process very large batches in smaller chunks to prevent memory issues.
-        This is important for beginners who might try to index huge projects.
-        """
-        results = []
-        total_chunks = len(file_contents)
-
-        # Process in chunks
-        for i in range(0, len(file_contents), chunk_size):
-            chunk = file_contents[i : i + chunk_size]
-
-            # Log progress for large operations
-            if total_chunks > chunk_size:
-                chunk_num = i // chunk_size + 1
-                total_chunk_count = (total_chunks + chunk_size - 1) // chunk_size
-                logger.info(
-                    f"Processing chunk {chunk_num}/{total_chunk_count} ({len(chunk)} files)"
-                )
-
-            # Process this chunk using concurrent method
-            chunk_results = self._batch_embed_concurrent(chunk, max_workers)
-            results.extend(chunk_results)
-
-            # Brief pause between chunks to prevent overwhelming the system
-            if i + chunk_size < len(file_contents):
-
-                time.sleep(0.1)  # 100ms pause between chunks
-
-        return results
-
+    
    def get_embedding_dim(self) -> int:
        """Return the dimension of embeddings produced by this model."""
        return self.embedding_dim
-
+    
    def get_mode(self) -> str:
        """Return current embedding mode: 'ollama', 'fallback', or 'hash'."""
        return self.mode
-
+    
    def get_status(self) -> Dict[str, Any]:
        """Get detailed status of the embedding system."""
        return {
            "mode": self.mode,
            "ollama_available": self.ollama_available,
            "fallback_available": FALLBACK_AVAILABLE and self.enable_fallback,
-            "fallback_model": (
-                getattr(self.fallback_embedder, "model_type", None)
-                if self.fallback_embedder
-                else None
-            ),
+            "fallback_model": getattr(self.fallback_embedder, 'model_type', None) if self.fallback_embedder else None,
            "embedding_dim": self.embedding_dim,
            "ollama_model": self.model_name if self.mode == "ollama" else None,
-            "ollama_url": self.base_url if self.mode == "ollama" else None,
+            "ollama_url": self.base_url if self.mode == "ollama" else None
        }
-
-    def get_embedding_info(self) -> Dict[str, str]:
-        """Get human-readable embedding system information for installer."""
-        status = self.get_status()
-        mode = status.get("mode", "unknown")
-        if mode == "ollama":
-            return {"method": f"Ollama ({status['ollama_model']})", "status": "working"}
-        # Treat legacy/alternate naming uniformly
-        if mode in ("fallback", "ml"):
-            return {
-                "method": f"ML Fallback ({status['fallback_model']})",
-                "status": "working",
-            }
-        if mode == "hash":
-            return {"method": "Hash-based (basic similarity)", "status": "working"}
-        return {"method": "Unknown", "status": "error"}
-
+    
    def warmup(self):
        """Warm up the embedding system with a dummy request."""
        dummy_code = "def hello(): pass"
@ -520,18 +425,14 @@ class OllamaEmbedder:


 # Convenience function for quick embedding
-
-
-def embed_code(
-    code: Union[str, List[str]], model_name: str = "nomic-embed-text:latest"
-) -> np.ndarray:
+def embed_code(code: Union[str, List[str]], model_name: str = "nomic-embed-text:latest") -> np.ndarray:
    """
    Quick function to embed code without managing embedder instance.
-
+    
    Args:
        code: Code string(s) to embed
        model_name: Ollama model name to use
-
+        
    Returns:
        Embedding vector(s)
    """
@ -540,4 +441,4 @@ def embed_code(


 # Compatibility alias for drop-in replacement
-CodeEmbedder = OllamaEmbedder
+CodeEmbedder = OllamaEmbedder
--- a/mini_rag/claude_rag/path_handler.py
+++ b/mini_rag/claude_rag/path_handler.py
@ -4,50 +4,51 @@ Handles forward/backward slashes on any file system.
 Robust cross-platform path handling.
 """

+import os
 import sys
 from pathlib import Path
-from typing import List, Union
+from typing import Union, List


 def normalize_path(path: Union[str, Path]) -> str:
    """
    Normalize a path to always use forward slashes.
    This ensures consistency across platforms in storage.
-
+    
    Args:
        path: Path as string or Path object
-
+        
    Returns:
        Path string with forward slashes
    """
    # Convert to Path object first
    path_obj = Path(path)
-
+    
    # Convert to string and replace backslashes
-    path_str = str(path_obj).replace("\\", "/")
-
+    path_str = str(path_obj).replace('\\', '/')
+    
    # Handle UNC paths on Windows
-    if sys.platform == "win32" and path_str.startswith("//"):
+    if sys.platform == 'win32' and path_str.startswith('//'):
        # Keep UNC paths as they are
        return path_str
-
+    
    return path_str


 def normalize_relative_path(path: Union[str, Path], base: Union[str, Path]) -> str:
    """
    Get a normalized relative path.
-
+    
    Args:
        path: Path to make relative
        base: Base path to be relative to
-
+        
    Returns:
        Relative path with forward slashes
    """
    path_obj = Path(path).resolve()
    base_obj = Path(base).resolve()
-
+    
    try:
        rel_path = path_obj.relative_to(base_obj)
        return normalize_path(rel_path)
@ -60,10 +61,10 @@ def denormalize_path(path_str: str) -> Path:
    """
    Convert a normalized path string back to a Path object.
    This handles the conversion from storage format to OS format.
-
+    
    Args:
        path_str: Normalized path string with forward slashes
-
+        
    Returns:
        Path object appropriate for the OS
    """
@ -74,10 +75,10 @@ def denormalize_path(path_str: str) -> Path:
 def join_paths(*parts: Union[str, Path]) -> str:
    """
    Join path parts and return normalized result.
-
+    
    Args:
        *parts: Path parts to join
-
+        
    Returns:
        Normalized joined path
    """
@ -89,46 +90,46 @@ def join_paths(*parts: Union[str, Path]) -> str:
 def split_path(path: Union[str, Path]) -> List[str]:
    """
    Split a path into its components.
-
+    
    Args:
        path: Path to split
-
+        
    Returns:
        List of path components
    """
    path_obj = Path(path)
    parts = []
-
+    
    # Handle drive on Windows
    if path_obj.drive:
        parts.append(path_obj.drive)
-
+    
    # Add all other parts
    parts.extend(path_obj.parts[1:] if path_obj.drive else path_obj.parts)
-
+    
    return parts


 def ensure_forward_slashes(path_str: str) -> str:
    """
    Quick function to ensure a path string uses forward slashes.
-
+    
    Args:
        path_str: Path string
-
+        
    Returns:
        Path with forward slashes
    """
-    return path_str.replace("\\", "/")
+    return path_str.replace('\\', '/')


 def ensure_native_slashes(path_str: str) -> str:
    """
    Ensure a path uses the native separator for the OS.
-
+    
    Args:
        path_str: Path string
-
+        
    Returns:
        Path with native separators
    """
@ -136,8 +137,6 @@ def ensure_native_slashes(path_str: str) -> str:


 # Convenience functions for common operations
-
-
 def storage_path(path: Union[str, Path]) -> str:
    """Convert path to storage format (forward slashes)."""
    return normalize_path(path)
@ -150,4 +149,4 @@ def display_path(path: Union[str, Path]) -> str:

 def from_storage_path(path_str: str) -> Path:
    """Convert from storage format to Path object."""
-    return denormalize_path(path_str)
+    return denormalize_path(path_str)
--- a/mini_rag/claude_rag/performance.py
+++ b/mini_rag/claude_rag/performance.py
@ -3,87 +3,85 @@ Performance monitoring for RAG system.
 Track loading times, query times, and resource usage.
 """

-import logging
-import os
 import time
-from contextlib import contextmanager
-from typing import Any, Dict, Optional
-
 import psutil
+import os
+from contextlib import contextmanager
+from typing import Dict, Any, Optional
+import logging

 logger = logging.getLogger(__name__)


 class PerformanceMonitor:
    """Track performance metrics for RAG operations."""
-
+    
    def __init__(self):
        self.metrics = {}
        self.process = psutil.Process(os.getpid())
-
+        
    @contextmanager
    def measure(self, operation: str):
        """Context manager to measure operation time and memory."""
        # Get initial state
        start_time = time.time()
        start_memory = self.process.memory_info().rss / 1024 / 1024  # MB
-
+        
        try:
            yield self
        finally:
            # Calculate metrics
            end_time = time.time()
            end_memory = self.process.memory_info().rss / 1024 / 1024  # MB
-
+            
            duration = end_time - start_time
            memory_delta = end_memory - start_memory
-
+            
            # Store metrics
            self.metrics[operation] = {
-                "duration_seconds": duration,
-                "memory_delta_mb": memory_delta,
-                "final_memory_mb": end_memory,
+                'duration_seconds': duration,
+                'memory_delta_mb': memory_delta,
+                'final_memory_mb': end_memory,
            }
-
+            
            logger.info(
                f"[PERF] {operation}: {duration:.2f}s, "
                f"Memory: {end_memory:.1f}MB (+{memory_delta:+.1f}MB)"
            )
-
+    
    def get_summary(self) -> Dict[str, Any]:
        """Get performance summary."""
-        total_time = sum(m["duration_seconds"] for m in self.metrics.values())
-
+        total_time = sum(m['duration_seconds'] for m in self.metrics.values())
+        
        return {
-            "total_time_seconds": total_time,
-            "operations": self.metrics,
-            "current_memory_mb": self.process.memory_info().rss / 1024 / 1024,
+            'total_time_seconds': total_time,
+            'operations': self.metrics,
+            'current_memory_mb': self.process.memory_info().rss / 1024 / 1024,
        }
-
+    
    def print_summary(self):
        """Print a formatted summary."""
-        print("\n" + "=" * 50)
+        print("\n" + "="*50)
        print("PERFORMANCE SUMMARY")
-        print("=" * 50)
-
+        print("="*50)
+        
        for op, metrics in self.metrics.items():
            print(f"\n{op}:")
            print(f"  Time: {metrics['duration_seconds']:.2f}s")
            print(f"  Memory: +{metrics['memory_delta_mb']:+.1f}MB")
-
+        
        summary = self.get_summary()
        print(f"\nTotal Time: {summary['total_time_seconds']:.2f}s")
        print(f"Current Memory: {summary['current_memory_mb']:.1f}MB")
-        print("=" * 50)
+        print("="*50)


 # Global instance for easy access
 _monitor = None

-
 def get_monitor() -> PerformanceMonitor:
    """Get or create global monitor instance."""
    global _monitor
    if _monitor is None:
        _monitor = PerformanceMonitor()
-    return _monitor
+    return _monitor
--- a/mini_rag/claude_rag/search.py
+++ b/mini_rag/claude_rag/search.py
--- a/mini_rag/claude_rag/server.py
+++ b/mini_rag/claude_rag/server.py
@ -4,30 +4,30 @@ No more loading/unloading madness!
 """

 import json
-import logging
-import os
 import socket
-import subprocess
-import sys
 import threading
 import time
+import subprocess
 from pathlib import Path
-from typing import Any, Dict, Optional
+from typing import Dict, Any, Optional
+import logging
+import sys
+import os

 # Fix Windows console
-if sys.platform == "win32":
-    os.environ["PYTHONUTF8"] = "1"
+if sys.platform == 'win32':
+    os.environ['PYTHONUTF8'] = '1'

+from .search import CodeSearcher
 from .ollama_embeddings import OllamaEmbedder as CodeEmbedder
 from .performance import PerformanceMonitor
-from .search import CodeSearcher

 logger = logging.getLogger(__name__)


 class RAGServer:
    """Persistent server that keeps embeddings and DB loaded."""
-
+    
    def __init__(self, project_path: Path, port: int = 7777):
        self.project_path = project_path
        self.port = port
@ -37,36 +37,37 @@ class RAGServer:
        self.socket = None
        self.start_time = None
        self.query_count = 0
-
+        
    def _kill_existing_server(self):
        """Kill any existing process using our port."""
        try:
            # Check if port is in use
            test_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-            result = test_sock.connect_ex(("localhost", self.port))
+            result = test_sock.connect_ex(('localhost', self.port))
            test_sock.close()
-
+            
            if result == 0:  # Port is in use
                print(f"️  Port {self.port} is already in use, attempting to free it...")
-
-                if sys.platform == "win32":
+                
+                if sys.platform == 'win32':
                    # Windows: Find and kill process using netstat
                    import subprocess
-
                    try:
                        # Get process ID using the port
                        result = subprocess.run(
-                            ["netstat", "-ano"], capture_output=True, text=True
+                            ['netstat', '-ano'], 
+                            capture_output=True, 
+                            text=True
                        )
-
-                        for line in result.stdout.split("\n"):
-                            if f":{self.port}" in line and "LISTENING" in line:
+                        
+                        for line in result.stdout.split('\n'):
+                            if f':{self.port}' in line and 'LISTENING' in line:
                                parts = line.split()
                                pid = parts[-1]
                                print(f"   Found process {pid} using port {self.port}")
-
+                                
                                # Kill the process
-                                subprocess.run(["taskkill", "//PID", pid, "//F"], check=False)
+                                subprocess.run(['taskkill', '//PID', pid, '//F'], check=False)
                                print(f"    Killed process {pid}")
                                time.sleep(1)  # Give it a moment to release the port
                                break
@ -75,16 +76,15 @@ class RAGServer:
                else:
                    # Unix/Linux: Use lsof and kill
                    import subprocess
-
                    try:
                        result = subprocess.run(
-                            ["lso", "-ti", f":{self.port}"],
-                            capture_output=True,
-                            text=True,
+                            ['lsof', '-ti', f':{self.port}'], 
+                            capture_output=True, 
+                            text=True
                        )
                        if result.stdout.strip():
                            pid = result.stdout.strip()
-                            subprocess.run(["kill", "-9", pid], check=False)
+                            subprocess.run(['kill', '-9', pid], check=False)
                            print(f"    Killed process {pid}")
                            time.sleep(1)
                    except Exception as e:
@ -92,38 +92,38 @@ class RAGServer:
        except Exception as e:
            # Non-critical error, just log it
            logger.debug(f"Error checking port: {e}")
-
+        
    def start(self):
        """Start the RAG server."""
        # Kill any existing process on our port first
        self._kill_existing_server()
-
+        
        print(f" Starting RAG server on port {self.port}...")
-
+        
        # Load everything once
        perf = PerformanceMonitor()
-
+        
        with perf.measure("Load Embedder"):
            self.embedder = CodeEmbedder()
-
+            
        with perf.measure("Connect Database"):
            self.searcher = CodeSearcher(self.project_path, embedder=self.embedder)
-
+        
        perf.print_summary()
-
+        
        # Start server
        self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-        self.socket.bind(("localhost", self.port))
+        self.socket.bind(('localhost', self.port))
        self.socket.listen(5)
-
+        
        self.running = True
        self.start_time = time.time()
-
+        
        print(f"\n RAG server ready on localhost:{self.port}")
        print("   Model loaded, database connected")
        print("   Waiting for queries...\n")
-
+        
        # Handle connections
        while self.running:
            try:
@ -136,50 +136,50 @@ class RAGServer:
            except Exception as e:
                if self.running:
                    logger.error(f"Server error: {e}")
-
+    
    def _handle_client(self, client: socket.socket):
        """Handle a client connection."""
        try:
            # Receive query with proper message framing
            data = self._receive_json(client)
            request = json.loads(data)
-
+            
            # Check for shutdown command
-            if request.get("command") == "shutdown":
+            if request.get('command') == 'shutdown':
                print("\n Shutdown requested")
-                response = {"success": True, "message": "Server shutting down"}
+                response = {'success': True, 'message': 'Server shutting down'}
                self._send_json(client, response)
                self.stop()
                return
-
-            query = request.get("query", "")
-            top_k = request.get("top_k", 10)
-
+            
+            query = request.get('query', '')
+            top_k = request.get('top_k', 10)
+            
            self.query_count += 1
            print(f"[Query #{self.query_count}] {query}")
-
+            
            # Perform search
            start = time.time()
            results = self.searcher.search(query, top_k=top_k)
            search_time = time.time() - start
-
+            
            # Prepare response
            response = {
-                "success": True,
-                "query": query,
-                "count": len(results),
-                "search_time_ms": int(search_time * 1000),
-                "results": [r.to_dict() for r in results],
-                "server_uptime": int(time.time() - self.start_time),
-                "total_queries": self.query_count,
+                'success': True,
+                'query': query,
+                'count': len(results),
+                'search_time_ms': int(search_time * 1000),
+                'results': [r.to_dict() for r in results],
+                'server_uptime': int(time.time() - self.start_time),
+                'total_queries': self.query_count,
            }
-
+            
            # Send response with proper framing
            self._send_json(client, response)
-
+            
            print(f"    Found {len(results)} results in {search_time*1000:.0f}ms")
-
-        except ConnectionError:
+            
+        except ConnectionError as e:
            # Normal disconnection - client closed connection
            # This is expected behavior, don't log as error
            pass
@ -187,49 +187,52 @@ class RAGServer:
            # Only log actual errors, not normal disconnections
            if "Connection closed" not in str(e):
                logger.error(f"Client handler error: {e}")
-            error_response = {"success": False, "error": str(e)}
+            error_response = {
+                'success': False,
+                'error': str(e)
+            }
            try:
                self._send_json(client, error_response)
-            except (ConnectionError, OSError, TypeError, ValueError, socket.error):
+            except:
                pass
        finally:
            client.close()
-
+    
    def _receive_json(self, sock: socket.socket) -> str:
        """Receive a complete JSON message with length prefix."""
        # First receive the length (4 bytes)
-        length_data = b""
+        length_data = b''
        while len(length_data) < 4:
            chunk = sock.recv(4 - len(length_data))
            if not chunk:
                raise ConnectionError("Connection closed while receiving length")
            length_data += chunk
-
-        length = int.from_bytes(length_data, "big")
-
+        
+        length = int.from_bytes(length_data, 'big')
+        
        # Now receive the actual data
-        data = b""
+        data = b''
        while len(data) < length:
            chunk = sock.recv(min(65536, length - len(data)))
            if not chunk:
                raise ConnectionError("Connection closed while receiving data")
            data += chunk
-
-        return data.decode("utf-8")
-
+        
+        return data.decode('utf-8')
+    
    def _send_json(self, sock: socket.socket, data: dict):
        """Send a JSON message with length prefix."""
        # Sanitize the data to ensure JSON compatibility
-        json_str = json.dumps(data, ensure_ascii=False, separators=(",", ":"))
-        json_bytes = json_str.encode("utf-8")
-
+        json_str = json.dumps(data, ensure_ascii=False, separators=(',', ':'))
+        json_bytes = json_str.encode('utf-8')
+        
        # Send length prefix (4 bytes)
        length = len(json_bytes)
-        sock.send(length.to_bytes(4, "big"))
-
+        sock.send(length.to_bytes(4, 'big'))
+        
        # Send the data
        sock.sendall(json_bytes)
-
+    
    def stop(self):
        """Stop the server."""
        self.running = False
@ -240,89 +243,101 @@ class RAGServer:

 class RAGClient:
    """Client to communicate with RAG server."""
-
+    
    def __init__(self, port: int = 7777):
        self.port = port
        self.use_legacy = False
-
+        
    def search(self, query: str, top_k: int = 10) -> Dict[str, Any]:
        """Send search query to server."""
        try:
            # Connect to server
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-            sock.connect(("localhost", self.port))
-
+            sock.connect(('localhost', self.port))
+            
            # Send request with proper framing
-            request = {"query": query, "top_k": top_k}
+            request = {
+                'query': query,
+                'top_k': top_k
+            }
            self._send_json(sock, request)
-
+            
            # Receive response with proper framing
            data = self._receive_json(sock)
            response = json.loads(data)
-
+            
            sock.close()
            return response
-
+            
        except ConnectionRefusedError:
            return {
-                "success": False,
-                "error": "RAG server not running. Start with: rag-mini server",
+                'success': False,
+                'error': 'RAG server not running. Start with: mini-rag server'
            }
        except ConnectionError as e:
            # Try legacy mode without message framing
            if not self.use_legacy and "receiving length" in str(e):
                self.use_legacy = True
                return self._search_legacy(query, top_k)
-            return {"success": False, "error": str(e)}
+            return {
+                'success': False,
+                'error': str(e)
+            }
        except Exception as e:
-            return {"success": False, "error": str(e)}
-
+            return {
+                'success': False,
+                'error': str(e)
+            }
+    
    def _receive_json(self, sock: socket.socket) -> str:
        """Receive a complete JSON message with length prefix."""
        # First receive the length (4 bytes)
-        length_data = b""
+        length_data = b''
        while len(length_data) < 4:
            chunk = sock.recv(4 - len(length_data))
            if not chunk:
                raise ConnectionError("Connection closed while receiving length")
            length_data += chunk
-
-        length = int.from_bytes(length_data, "big")
-
+        
+        length = int.from_bytes(length_data, 'big')
+        
        # Now receive the actual data
-        data = b""
+        data = b''
        while len(data) < length:
            chunk = sock.recv(min(65536, length - len(data)))
            if not chunk:
                raise ConnectionError("Connection closed while receiving data")
            data += chunk
-
-        return data.decode("utf-8")
-
+        
+        return data.decode('utf-8')
+    
    def _send_json(self, sock: socket.socket, data: dict):
        """Send a JSON message with length prefix."""
-        json_str = json.dumps(data, ensure_ascii=False, separators=(",", ":"))
-        json_bytes = json_str.encode("utf-8")
-
+        json_str = json.dumps(data, ensure_ascii=False, separators=(',', ':'))
+        json_bytes = json_str.encode('utf-8')
+        
        # Send length prefix (4 bytes)
        length = len(json_bytes)
-        sock.send(length.to_bytes(4, "big"))
-
+        sock.send(length.to_bytes(4, 'big'))
+        
        # Send the data
        sock.sendall(json_bytes)
-
+    
    def _search_legacy(self, query: str, top_k: int = 10) -> Dict[str, Any]:
        """Legacy search without message framing for old servers."""
        try:
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-            sock.connect(("localhost", self.port))
-
+            sock.connect(('localhost', self.port))
+            
            # Send request (old way)
-            request = {"query": query, "top_k": top_k}
-            sock.send(json.dumps(request).encode("utf-8"))
-
+            request = {
+                'query': query,
+                'top_k': top_k
+            }
+            sock.send(json.dumps(request).encode('utf-8'))
+            
            # Receive response (accumulate until we get valid JSON)
-            data = b""
+            data = b''
            while True:
                chunk = sock.recv(65536)
                if not chunk:
@ -330,26 +345,32 @@ class RAGClient:
                data += chunk
                try:
                    # Try to decode as JSON
-                    response = json.loads(data.decode("utf-8"))
+                    response = json.loads(data.decode('utf-8'))
                    sock.close()
                    return response
                except json.JSONDecodeError:
                    # Keep receiving
                    continue
-
+            
            sock.close()
-            return {"success": False, "error": "Incomplete response from server"}
+            return {
+                'success': False,
+                'error': 'Incomplete response from server'
+            }
        except Exception as e:
-            return {"success": False, "error": str(e)}
-
+            return {
+                'success': False,
+                'error': str(e)
+            }
+    
    def is_running(self) -> bool:
        """Check if server is running."""
        try:
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-            result = sock.connect_ex(("localhost", self.port))
+            result = sock.connect_ex(('localhost', self.port))
            sock.close()
            return result == 0
-        except (ConnectionError, OSError, TypeError, ValueError, socket.error):
+        except:
            return False


@ -368,31 +389,23 @@ def auto_start_if_needed(project_path: Path) -> Optional[subprocess.Popen]:
    if not client.is_running():
        # Start server in background
        import subprocess
-
-        cmd = [
-            sys.executable,
-            "-m",
-            "mini_rag.cli",
-            "server",
-            "--path",
-            str(project_path),
-        ]
+        cmd = [sys.executable, "-m", "mini_rag.cli", "server", "--path", str(project_path)]
        process = subprocess.Popen(
            cmd,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
-            creationflags=(subprocess.CREATE_NEW_CONSOLE if sys.platform == "win32" else 0),
+            creationflags=subprocess.CREATE_NEW_CONSOLE if sys.platform == 'win32' else 0
        )
-
+        
        # Wait for server to start
        for _ in range(30):  # 30 second timeout
            time.sleep(1)
            if client.is_running():
                print(" RAG server started automatically")
                return process
-
+        
        # Failed to start
        process.terminate()
        raise RuntimeError("Failed to start RAG server")
-
-    return None
+    
+    return None
--- a/mini_rag/claude_rag/smart_chunking.py
+++ b/mini_rag/claude_rag/smart_chunking.py
@ -0,0 +1,150 @@
+"""
+Smart language-aware chunking strategies for FSS-Mini-RAG.
+Automatically adapts chunking based on file type and content patterns.
+"""
+
+from typing import Dict, Any, List
+from pathlib import Path
+import json
+
+class SmartChunkingStrategy:
+    """Intelligent chunking that adapts to file types and content."""
+    
+    def __init__(self):
+        self.language_configs = {
+            'python': {
+                'max_size': 3000,  # Larger for better function context
+                'min_size': 200,
+                'strategy': 'function',
+                'prefer_semantic': True
+            },
+            'javascript': {
+                'max_size': 2500,
+                'min_size': 150,
+                'strategy': 'function',
+                'prefer_semantic': True
+            },
+            'markdown': {
+                'max_size': 2500,
+                'min_size': 300,  # Larger minimum for complete thoughts
+                'strategy': 'header',
+                'preserve_structure': True
+            },
+            'json': {
+                'max_size': 1000,  # Smaller for config files
+                'min_size': 50,
+                'skip_if_large': True,  # Skip huge config JSONs
+                'max_file_size': 50000  # 50KB limit
+            },
+            'yaml': {
+                'max_size': 1500,
+                'min_size': 100,
+                'strategy': 'key_block'
+            },
+            'text': {
+                'max_size': 2000,
+                'min_size': 200,
+                'strategy': 'paragraph'
+            },
+            'bash': {
+                'max_size': 1500,
+                'min_size': 100,
+                'strategy': 'function'
+            }
+        }
+        
+        # Smart defaults for unknown languages
+        self.default_config = {
+            'max_size': 2000,
+            'min_size': 150,
+            'strategy': 'semantic'
+        }
+    
+    def get_config_for_language(self, language: str, file_size: int = 0) -> Dict[str, Any]:
+        """Get optimal chunking config for a specific language."""
+        config = self.language_configs.get(language, self.default_config).copy()
+        
+        # Smart adjustments based on file size
+        if file_size > 0:
+            if file_size < 500:  # Very small files
+                config['max_size'] = max(config['max_size'] // 2, 200)
+                config['min_size'] = 50
+            elif file_size > 20000:  # Large files  
+                config['max_size'] = min(config['max_size'] + 1000, 4000)
+        
+        return config
+    
+    def should_skip_file(self, language: str, file_size: int) -> bool:
+        """Determine if a file should be skipped entirely."""
+        lang_config = self.language_configs.get(language, {})
+        
+        # Skip huge JSON config files
+        if language == 'json' and lang_config.get('skip_if_large'):
+            max_size = lang_config.get('max_file_size', 50000)
+            if file_size > max_size:
+                return True
+        
+        # Skip tiny files that won't provide good context
+        if file_size < 30:
+            return True
+            
+        return False
+    
+    def get_smart_defaults(self, project_stats: Dict[str, Any]) -> Dict[str, Any]:
+        """Generate smart defaults based on project language distribution."""
+        languages = project_stats.get('languages', {})
+        total_files = sum(languages.values())
+        
+        # Determine primary language
+        primary_lang = max(languages.items(), key=lambda x: x[1])[0] if languages else 'python'
+        primary_config = self.language_configs.get(primary_lang, self.default_config)
+        
+        # Smart streaming threshold based on large files
+        large_files = project_stats.get('large_files', 0)
+        streaming_threshold = 5120 if large_files > 5 else 1048576  # 5KB vs 1MB
+        
+        return {
+            "chunking": {
+                "max_size": primary_config['max_size'],
+                "min_size": primary_config['min_size'], 
+                "strategy": primary_config.get('strategy', 'semantic'),
+                "language_specific": {
+                    lang: config for lang, config in self.language_configs.items()
+                    if languages.get(lang, 0) > 0
+                }
+            },
+            "streaming": {
+                "enabled": True,
+                "threshold_bytes": streaming_threshold,
+                "chunk_size_kb": 64
+            },
+            "files": {
+                "skip_tiny_files": True,
+                "tiny_threshold": 30,
+                "smart_json_filtering": True
+            }
+        }
+
+# Example usage
+def analyze_and_suggest(manifest_data: Dict[str, Any]) -> Dict[str, Any]:
+    """Analyze project and suggest optimal configuration."""
+    from collections import Counter
+    
+    files = manifest_data.get('files', {})
+    languages = Counter()
+    large_files = 0
+    
+    for info in files.values():
+        lang = info.get('language', 'unknown')
+        languages[lang] += 1
+        if info.get('size', 0) > 10000:
+            large_files += 1
+    
+    stats = {
+        'languages': dict(languages),
+        'large_files': large_files,
+        'total_files': len(files)
+    }
+    
+    strategy = SmartChunkingStrategy()
+    return strategy.get_smart_defaults(stats)
--- a/mini_rag/claude_rag/watcher.py
+++ b/mini_rag/claude_rag/watcher.py
@ -4,21 +4,14 @@ Monitors project files and updates the index incrementally.
 """

 import logging
-import queue
 import threading
+import queue
 import time
-from datetime import datetime
 from pathlib import Path
-from typing import Callable, Optional, Set
-
-from watchdog.events import (
-    FileCreatedEvent,
-    FileDeletedEvent,
-    FileModifiedEvent,
-    FileMovedEvent,
-    FileSystemEventHandler,
-)
+from typing import Set, Optional, Callable
+from datetime import datetime
 from watchdog.observers import Observer
+from watchdog.events import FileSystemEventHandler, FileModifiedEvent, FileCreatedEvent, FileDeletedEvent, FileMovedEvent

 from .indexer import ProjectIndexer

@ -27,11 +20,11 @@ logger = logging.getLogger(__name__)

 class UpdateQueue:
    """Thread-safe queue for file updates with deduplication."""
-
+    
    def __init__(self, delay: float = 1.0):
        """
        Initialize update queue.
-
+        
        Args:
            delay: Delay in seconds before processing updates (for debouncing)
        """
@ -40,24 +33,24 @@ class UpdateQueue:
        self.lock = threading.Lock()
        self.delay = delay
        self.last_update = {}  # Track last update time per file
-
+    
    def add(self, file_path: Path):
        """Add a file to the update queue."""
        with self.lock:
            file_str = str(file_path)
            current_time = time.time()
-
+            
            # Check if we should debounce this update
            if file_str in self.last_update:
                if current_time - self.last_update[file_str] < self.delay:
                    return  # Skip this update
-
+            
            self.last_update[file_str] = current_time
-
+            
            if file_str not in self.pending:
                self.pending.add(file_str)
                self.queue.put(file_path)
-
+    
    def get(self, timeout: Optional[float] = None) -> Optional[Path]:
        """Get next file from queue."""
        try:
@ -67,11 +60,11 @@ class UpdateQueue:
            return file_path
        except queue.Empty:
            return None
-
+    
    def empty(self) -> bool:
        """Check if queue is empty."""
        return self.queue.empty()
-
+    
    def size(self) -> int:
        """Get queue size."""
        return self.queue.qsize()
@ -79,17 +72,15 @@ class UpdateQueue:

 class CodeFileEventHandler(FileSystemEventHandler):
    """Handles file system events for code files."""
-
-    def __init__(
-        self,
-        update_queue: UpdateQueue,
-        include_patterns: Set[str],
-        exclude_patterns: Set[str],
-        project_path: Path,
-    ):
+    
+    def __init__(self, 
+                 update_queue: UpdateQueue,
+                 include_patterns: Set[str],
+                 exclude_patterns: Set[str],
+                 project_path: Path):
        """
        Initialize event handler.
-
+        
        Args:
            update_queue: Queue for file updates
            include_patterns: File patterns to include
@ -100,47 +91,47 @@ class CodeFileEventHandler(FileSystemEventHandler):
        self.include_patterns = include_patterns
        self.exclude_patterns = exclude_patterns
        self.project_path = project_path
-
+    
    def _should_process(self, file_path: str) -> bool:
        """Check if file should be processed."""
        path = Path(file_path)
-
+        
        # Check if it's a file (not directory)
        if not path.is_file():
            return False
-
+        
        # Check exclude patterns
        path_str = str(path)
        for pattern in self.exclude_patterns:
            if pattern in path_str:
                return False
-
+        
        # Check include patterns
        for pattern in self.include_patterns:
            if path.match(pattern):
                return True
-
+        
        return False
-
+    
    def on_modified(self, event: FileModifiedEvent):
        """Handle file modification."""
        if not event.is_directory and self._should_process(event.src_path):
            logger.debug(f"File modified: {event.src_path}")
            self.update_queue.add(Path(event.src_path))
-
+    
    def on_created(self, event: FileCreatedEvent):
        """Handle file creation."""
        if not event.is_directory and self._should_process(event.src_path):
            logger.debug(f"File created: {event.src_path}")
            self.update_queue.add(Path(event.src_path))
-
+    
    def on_deleted(self, event: FileDeletedEvent):
        """Handle file deletion."""
        if not event.is_directory and self._should_process(event.src_path):
            logger.debug(f"File deleted: {event.src_path}")
            # Add deletion task to queue (we'll handle it differently)
            self.update_queue.add(Path(event.src_path))
-
+    
    def on_moved(self, event: FileMovedEvent):
        """Handle file move/rename."""
        if not event.is_directory:
@ -154,18 +145,16 @@ class CodeFileEventHandler(FileSystemEventHandler):

 class FileWatcher:
    """Watches project files and updates index automatically."""
-
-    def __init__(
-        self,
-        project_path: Path,
-        indexer: Optional[ProjectIndexer] = None,
-        update_delay: float = 1.0,
-        batch_size: int = 10,
-        batch_timeout: float = 5.0,
-    ):
+    
+    def __init__(self, 
+                 project_path: Path,
+                 indexer: Optional[ProjectIndexer] = None,
+                 update_delay: float = 1.0,
+                 batch_size: int = 10,
+                 batch_timeout: float = 5.0):
        """
        Initialize file watcher.
-
+        
        Args:
            project_path: Path to project to watch
            indexer: ProjectIndexer instance (creates one if not provided)
@ -178,79 +167,86 @@ class FileWatcher:
        self.update_delay = update_delay
        self.batch_size = batch_size
        self.batch_timeout = batch_timeout
-
+        
        # Initialize components
        self.update_queue = UpdateQueue(delay=update_delay)
        self.observer = Observer()
        self.worker_thread = None
        self.running = False
-
+        
        # Get patterns from indexer
        self.include_patterns = set(self.indexer.include_patterns)
        self.exclude_patterns = set(self.indexer.exclude_patterns)
-
+        
        # Statistics
        self.stats = {
-            "files_updated": 0,
-            "files_failed": 0,
-            "started_at": None,
-            "last_update": None,
+            'files_updated': 0,
+            'files_failed': 0,
+            'started_at': None,
+            'last_update': None,
        }
-
+    
    def start(self):
        """Start watching for file changes."""
        if self.running:
            logger.warning("Watcher is already running")
            return
-
+        
        logger.info(f"Starting file watcher for {self.project_path}")
-
+        
        # Set up file system observer
        event_handler = CodeFileEventHandler(
            self.update_queue,
            self.include_patterns,
            self.exclude_patterns,
-            self.project_path,
+            self.project_path
        )
-
-        self.observer.schedule(event_handler, str(self.project_path), recursive=True)
-
+        
+        self.observer.schedule(
+            event_handler,
+            str(self.project_path),
+            recursive=True
+        )
+        
        # Start worker thread
        self.running = True
-        self.worker_thread = threading.Thread(target=self._process_updates, daemon=True)
+        self.worker_thread = threading.Thread(
+            target=self._process_updates,
+            daemon=True
+        )
        self.worker_thread.start()
-
+        
        # Start observer
        self.observer.start()
-
-        self.stats["started_at"] = datetime.now()
+        
+        self.stats['started_at'] = datetime.now()
        logger.info("File watcher started successfully")
-
+    
    def stop(self):
        """Stop watching for file changes."""
        if not self.running:
            return
-
+        
        logger.info("Stopping file watcher...")
-
+        
        # Stop observer
        self.observer.stop()
        self.observer.join()
-
+        
        # Stop worker thread
        self.running = False
        if self.worker_thread:
            self.worker_thread.join(timeout=5.0)
-
+        
        logger.info("File watcher stopped")
-
+    
    def _process_updates(self):
        """Worker thread that processes file updates."""
        logger.info("Update processor thread started")
-
+        
        batch = []
        batch_start_time = None
-
+        
        while self.running:
            try:
                # Calculate timeout for getting next item
@ -267,46 +263,46 @@ class FileWatcher:
                    else:
                        # Wait for more items or timeout
                        timeout = min(0.1, self.batch_timeout - elapsed)
-
+                
                # Get next file from queue
                file_path = self.update_queue.get(timeout=timeout)
-
+                
                if file_path:
                    # Add to batch
                    if not batch:
                        batch_start_time = time.time()
                    batch.append(file_path)
-
+                    
                    # Check if batch is full
                    if len(batch) >= self.batch_size:
                        self._process_batch(batch)
                        batch = []
                        batch_start_time = None
-
+                
            except queue.Empty:
                # Check if we have a pending batch that's timed out
                if batch and (time.time() - batch_start_time) >= self.batch_timeout:
                    self._process_batch(batch)
                    batch = []
                    batch_start_time = None
-
+            
            except Exception as e:
                logger.error(f"Error in update processor: {e}")
                time.sleep(1)  # Prevent tight loop on error
-
+        
        # Process any remaining items
        if batch:
            self._process_batch(batch)
-
+        
        logger.info("Update processor thread stopped")
-
+    
    def _process_batch(self, files: list[Path]):
        """Process a batch of file updates."""
        if not files:
            return
-
+        
        logger.info(f"Processing batch of {len(files)} file updates")
-
+        
        for file_path in files:
            try:
                if file_path.exists():
@ -317,91 +313,87 @@ class FileWatcher:
                    # File doesn't exist - delete from index
                    logger.debug(f"Deleting {file_path} from index - file no longer exists")
                    success = self.indexer.delete_file(file_path)
-
+                
                if success:
-                    self.stats["files_updated"] += 1
+                    self.stats['files_updated'] += 1
                else:
-                    self.stats["files_failed"] += 1
-
-                self.stats["last_update"] = datetime.now()
-
+                    self.stats['files_failed'] += 1
+                
+                self.stats['last_update'] = datetime.now()
+                
            except Exception as e:
                logger.error(f"Failed to process {file_path}: {e}")
-                self.stats["files_failed"] += 1
-
-        logger.info(
-            f"Batch processing complete. Updated: {self.stats['files_updated']}, Failed: {self.stats['files_failed']}"
-        )
-
+                self.stats['files_failed'] += 1
+        
+        logger.info(f"Batch processing complete. Updated: {self.stats['files_updated']}, Failed: {self.stats['files_failed']}")
+    
    def get_statistics(self) -> dict:
        """Get watcher statistics."""
        stats = self.stats.copy()
-        stats["queue_size"] = self.update_queue.size()
-        stats["is_running"] = self.running
-
-        if stats["started_at"]:
-            uptime = datetime.now() - stats["started_at"]
-            stats["uptime_seconds"] = uptime.total_seconds()
-
+        stats['queue_size'] = self.update_queue.size()
+        stats['is_running'] = self.running
+        
+        if stats['started_at']:
+            uptime = datetime.now() - stats['started_at']
+            stats['uptime_seconds'] = uptime.total_seconds()
+        
        return stats
-
+    
    def wait_for_updates(self, timeout: Optional[float] = None) -> bool:
        """
        Wait for pending updates to complete.
-
+        
        Args:
            timeout: Maximum time to wait in seconds
-
+            
        Returns:
            True if all updates completed, False if timeout
        """
        start_time = time.time()
-
+        
        while not self.update_queue.empty():
            if timeout and (time.time() - start_time) > timeout:
                return False
            time.sleep(0.1)
-
+        
        # Wait a bit more to ensure batch processing completes
        time.sleep(self.batch_timeout + 0.5)
        return True
-
+    
    def __enter__(self):
        """Context manager entry."""
        self.start()
        return self
-
+    
    def __exit__(self, exc_type, exc_val, exc_tb):
        """Context manager exit."""
        self.stop()


 # Convenience function
-
-
 def watch_project(project_path: Path, callback: Optional[Callable] = None):
    """
    Watch a project for changes and update index automatically.
-
+    
    Args:
        project_path: Path to project
        callback: Optional callback function called after each update
    """
    watcher = FileWatcher(project_path)
-
+    
    try:
        watcher.start()
        logger.info(f"Watching {project_path} for changes. Press Ctrl+C to stop.")
-
+        
        while True:
            time.sleep(1)
-
+            
            # Call callback if provided
            if callback:
                stats = watcher.get_statistics()
                callback(stats)
-
+            
    except KeyboardInterrupt:
        logger.info("Stopping watcher...")
    finally:
-        watcher.stop()
+        watcher.stop()
--- a/mini_rag/claude_rag/windows_console_fix.py
+++ b/mini_rag/claude_rag/windows_console_fix.py
@ -3,9 +3,9 @@ Windows Console Unicode/Emoji Fix
 Reliable Windows console Unicode/emoji support for 2025.
 """

-import io
-import os
 import sys
+import os
+import io


 def fix_windows_console():
@ -14,33 +14,28 @@ def fix_windows_console():
    Call this at the start of any script that needs to output Unicode/emojis.
    """
    # Set environment variable for UTF-8 mode
-    os.environ["PYTHONUTF8"] = "1"
-
+    os.environ['PYTHONUTF8'] = '1'
+    
    # For Python 3.7+
-    if hasattr(sys.stdout, "reconfigure"):
-        sys.stdout.reconfigure(encoding="utf-8")
-        sys.stderr.reconfigure(encoding="utf-8")
-        if hasattr(sys.stdin, "reconfigure"):
-            sys.stdin.reconfigure(encoding="utf-8")
+    if hasattr(sys.stdout, 'reconfigure'):
+        sys.stdout.reconfigure(encoding='utf-8')
+        sys.stderr.reconfigure(encoding='utf-8')
+        if hasattr(sys.stdin, 'reconfigure'):
+            sys.stdin.reconfigure(encoding='utf-8')
    else:
        # For older Python versions
-        if sys.platform == "win32":
+        if sys.platform == 'win32':
            # Replace streams with UTF-8 versions
-            sys.stdout = io.TextIOWrapper(
-                sys.stdout.buffer, encoding="utf-8", line_buffering=True
-            )
-            sys.stderr = io.TextIOWrapper(
-                sys.stderr.buffer, encoding="utf-8", line_buffering=True
-            )
-
+            sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', line_buffering=True)
+            sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', line_buffering=True)
+    
    # Also set the console code page to UTF-8 on Windows
-    if sys.platform == "win32":
+    if sys.platform == 'win32':
        import subprocess
-
        try:
            # Set console to UTF-8 code page
-            subprocess.run(["chcp", "65001"], shell=True, capture_output=True)
-        except (OSError, subprocess.SubprocessError):
+            subprocess.run(['chcp', '65001'], shell=True, capture_output=True)
+        except:
            pass


@ -49,14 +44,12 @@ fix_windows_console()


 # Test function to verify it works
-
-
 def test_emojis():
    """Test that emojis work properly."""
    print("Testing emoji output:")
    print(" Check mark")
    print(" Cross mark")
-    print(" Rocket")
+    print(" Rocket") 
    print(" Fire")
    print(" Computer")
    print(" Python")
@ -64,7 +57,7 @@ def test_emojis():
    print(" Search")
    print(" Lightning")
    print(" Sparkles")
-
+    

 if __name__ == "__main__":
-    test_emojis()
+    test_emojis()
--- a/mini_rag/config.py
+++ b/mini_rag/config.py
@ -1,573 +0,0 @@
-"""
-Configuration management for FSS-Mini-RAG.
-Handles loading, saving, and validation of YAML config files.
-"""
-
-import logging
-import re
-from dataclasses import asdict, dataclass
-from pathlib import Path
-from typing import Any, Dict, List, Optional
-
-import yaml
-import requests
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class ChunkingConfig:
-    """Configuration for text chunking."""
-
-    max_size: int = 2000
-    min_size: int = 150
-    strategy: str = "semantic"  # "semantic" or "fixed"
-
-
-@dataclass
-class StreamingConfig:
-    """Configuration for large file streaming."""
-
-    enabled: bool = True
-    threshold_bytes: int = 1048576  # 1MB
-
-
-@dataclass
-class FilesConfig:
-    """Configuration for file processing."""
-
-    min_file_size: int = 50
-    exclude_patterns: list = None
-    include_patterns: list = None
-
-    def __post_init__(self):
-        if self.exclude_patterns is None:
-            self.exclude_patterns = [
-                "node_modules/**",
-                ".git/**",
-                "__pycache__/**",
-                "*.pyc",
-                ".venv/**",
-                "venv/**",
-                "build/**",
-                "dist/**",
-            ]
-        if self.include_patterns is None:
-            self.include_patterns = ["**/*"]  # Include everything by default
-
-
-@dataclass
-class EmbeddingConfig:
-    """Configuration for embedding generation."""
-
-    preferred_method: str = "ollama"  # "ollama", "ml", "hash", "auto"
-    ollama_model: str = "nomic-embed-text"
-    ollama_host: str = "localhost:11434"
-    ml_model: str = "sentence-transformers/all-MiniLM-L6-v2"
-    batch_size: int = 32
-
-
-@dataclass
-class SearchConfig:
-    """Configuration for search behavior."""
-
-    default_top_k: int = 10
-    enable_bm25: bool = True
-    similarity_threshold: float = 0.1
-    expand_queries: bool = False  # Enable automatic query expansion
-
-
-@dataclass
-class LLMConfig:
-    """Configuration for LLM synthesis and query expansion."""
-
-    # Core settings
-    synthesis_model: str = "auto"  # "auto", "qwen3:1.7b", "qwen2.5:1.5b", etc.
-    expansion_model: str = "auto"  # Usually same as synthesis_model
-    max_expansion_terms: int = 8  # Maximum additional terms to add
-    enable_synthesis: bool = False  # Enable by default when --synthesize used
-    synthesis_temperature: float = 0.3
-    enable_thinking: bool = True  # Enable thinking mode for Qwen3 models
-    cpu_optimized: bool = True  # Prefer lightweight models
-
-    # Context window configuration (critical for RAG performance)
-    context_window: int = 16384  # Context window size in tokens (16K recommended)
-    auto_context: bool = True  # Auto-adjust context based on model capabilities
-
-    # Model preference rankings (configurable)
-    model_rankings: list = None  # Will be set in __post_init__
-
-    # Provider-specific settings (for different LLM providers)
-    provider: str = "ollama"  # "ollama", "openai", "anthropic"
-    ollama_host: str = "localhost:11434"  # Ollama connection
-    api_key: Optional[str] = None  # API key for cloud providers
-    api_base: Optional[str] = None  # Base URL for API (e.g., OpenRouter)
-    timeout: int = 20  # Request timeout in seconds
-
-    def __post_init__(self):
-        if self.model_rankings is None:
-            # Default model preference rankings (can be overridden in config file)
-            self.model_rankings = [
-                # Testing model (prioritized for current testing phase)
-                "qwen3:1.7b",
-                # Ultra-efficient models (perfect for CPU-only systems)
-                "qwen3:0.6b",
-                # Recommended model (excellent quality but larger)
-                "qwen3:4b",
-                # Common fallbacks (prioritize Qwen models)
-                "qwen2.5:1.5b",
-                "qwen2.5:3b",
-            ]
-
-
-@dataclass
-class UpdateConfig:
-    """Configuration for auto-update system."""
-
-    auto_check: bool = True  # Check for updates automatically
-    check_frequency_hours: int = 24  # How often to check (hours)
-    auto_install: bool = False  # Auto-install without asking (not recommended)
-    backup_before_update: bool = True  # Create backup before updating
-    notify_beta_releases: bool = False  # Include beta/pre-releases
-
-
-@dataclass
-class RAGConfig:
-    """Main RAG system configuration."""
-
-    chunking: ChunkingConfig = None
-    streaming: StreamingConfig = None
-    files: FilesConfig = None
-    embedding: EmbeddingConfig = None
-    search: SearchConfig = None
-    llm: LLMConfig = None
-    updates: UpdateConfig = None
-
-    def __post_init__(self):
-        if self.chunking is None:
-            self.chunking = ChunkingConfig()
-        if self.streaming is None:
-            self.streaming = StreamingConfig()
-        if self.files is None:
-            self.files = FilesConfig()
-        if self.embedding is None:
-            self.embedding = EmbeddingConfig()
-        if self.search is None:
-            self.search = SearchConfig()
-        if self.llm is None:
-            self.llm = LLMConfig()
-        if self.updates is None:
-            self.updates = UpdateConfig()
-
-
-class ConfigManager:
-    """Manages configuration loading, saving, and validation."""
-
-    def __init__(self, project_path: Path):
-        self.project_path = Path(project_path)
-        self.rag_dir = self.project_path / ".mini-rag"
-        self.config_path = self.rag_dir / "config.yaml"
-
-    def get_available_ollama_models(self, ollama_host: str = "localhost:11434") -> List[str]:
-        """Get list of available Ollama models for validation with secure connection handling."""
-        import time
-        
-        # Retry logic with exponential backoff
-        max_retries = 3
-        for attempt in range(max_retries):
-            try:
-                # Use explicit timeout and SSL verification for security
-                response = requests.get(
-                    f"http://{ollama_host}/api/tags", 
-                    timeout=(5, 10),  # (connect_timeout, read_timeout)
-                    verify=True,  # Explicit SSL verification 
-                    allow_redirects=False  # Prevent redirect attacks
-                )
-                if response.status_code == 200:
-                    data = response.json()
-                    models = [model["name"] for model in data.get("models", [])]
-                    logger.debug(f"Successfully fetched {len(models)} Ollama models")
-                    return models
-                else:
-                    logger.debug(f"Ollama API returned status {response.status_code}")
-                    
-            except requests.exceptions.SSLError as e:
-                logger.debug(f"SSL verification failed for Ollama connection: {e}")
-                # For local Ollama, SSL might not be configured - this is expected
-                if "localhost" in ollama_host or "127.0.0.1" in ollama_host:
-                    logger.debug("Retrying with local connection (SSL not required for localhost)")
-                    # Local connections don't need SSL verification
-                    try:
-                        response = requests.get(f"http://{ollama_host}/api/tags", timeout=(5, 10))
-                        if response.status_code == 200:
-                            data = response.json()
-                            return [model["name"] for model in data.get("models", [])]
-                    except Exception as local_e:
-                        logger.debug(f"Local Ollama connection also failed: {local_e}")
-                break  # Don't retry SSL errors for remote hosts
-                
-            except requests.exceptions.Timeout as e:
-                logger.debug(f"Ollama connection timeout (attempt {attempt + 1}/{max_retries}): {e}")
-                if attempt < max_retries - 1:
-                    sleep_time = (2 ** attempt)  # Exponential backoff
-                    time.sleep(sleep_time)
-                    continue
-                    
-            except requests.exceptions.ConnectionError as e:
-                logger.debug(f"Ollama connection error (attempt {attempt + 1}/{max_retries}): {e}")
-                if attempt < max_retries - 1:
-                    time.sleep(1)
-                    continue
-                    
-            except Exception as e:
-                logger.debug(f"Unexpected error fetching Ollama models: {e}")
-                break
-                
-        return []
-
-    def _sanitize_model_name(self, model_name: str) -> str:
-        """Sanitize model name to prevent injection attacks."""
-        if not model_name:
-            return ""
-        
-        # Allow only alphanumeric, dots, colons, hyphens, underscores
-        # This covers legitimate model names like qwen3:1.7b-q8_0
-        sanitized = re.sub(r'[^a-zA-Z0-9\.\:\-\_]', '', model_name)
-        
-        # Limit length to prevent DoS
-        if len(sanitized) > 128:
-            logger.warning(f"Model name too long, truncating: {sanitized[:20]}...")
-            sanitized = sanitized[:128]
-            
-        return sanitized
-
-    def resolve_model_name(self, configured_model: str, available_models: List[str]) -> Optional[str]:
-        """Resolve configured model name to actual available model with input sanitization."""
-        if not available_models or not configured_model:
-            return None
-        
-        # Sanitize input to prevent injection
-        configured_model = self._sanitize_model_name(configured_model)
-        if not configured_model:
-            logger.warning("Model name was empty after sanitization")
-            return None
-            
-        # Handle special 'auto' directive
-        if configured_model.lower() == 'auto':
-            return available_models[0] if available_models else None
-            
-        # Direct exact match first (case-insensitive)
-        for available_model in available_models:
-            if configured_model.lower() == available_model.lower():
-                return available_model
-        
-        # Fuzzy matching for common patterns
-        model_patterns = self._get_model_patterns(configured_model)
-        
-        for pattern in model_patterns:
-            for available_model in available_models:
-                if pattern.lower() in available_model.lower():
-                    # Additional validation: ensure it's not a partial match of something else
-                    if self._validate_model_match(pattern, available_model):
-                        return available_model
-        
-        return None  # Model not available
-
-    def _get_model_patterns(self, configured_model: str) -> List[str]:
-        """Generate fuzzy match patterns for common model naming conventions."""
-        patterns = [configured_model]  # Start with exact name
-        
-        # Common quantization patterns for different models
-        quantization_patterns = {
-            'qwen3:1.7b': ['qwen3:1.7b-q8_0', 'qwen3:1.7b-q4_0', 'qwen3:1.7b-q6_k'],
-            'qwen3:0.6b': ['qwen3:0.6b-q8_0', 'qwen3:0.6b-q4_0', 'qwen3:0.6b-q6_k'],
-            'qwen3:4b': ['qwen3:4b-q8_0', 'qwen3:4b-q4_0', 'qwen3:4b-q6_k'],
-            'qwen3:8b': ['qwen3:8b-q8_0', 'qwen3:8b-q4_0', 'qwen3:8b-q6_k'],
-            'qwen2.5:1.5b': ['qwen2.5:1.5b-q8_0', 'qwen2.5:1.5b-q4_0'],
-            'qwen2.5:3b': ['qwen2.5:3b-q8_0', 'qwen2.5:3b-q4_0'],
-            'qwen2.5-coder:1.5b': ['qwen2.5-coder:1.5b-q8_0', 'qwen2.5-coder:1.5b-q4_0'],
-            'qwen2.5-coder:3b': ['qwen2.5-coder:3b-q8_0', 'qwen2.5-coder:3b-q4_0'],
-            'qwen2.5-coder:7b': ['qwen2.5-coder:7b-q8_0', 'qwen2.5-coder:7b-q4_0'],
-        }
-        
-        # Add specific patterns for the configured model
-        if configured_model.lower() in quantization_patterns:
-            patterns.extend(quantization_patterns[configured_model.lower()])
-        
-        # Generic pattern generation for unknown models
-        if ':' in configured_model:
-            base_name, version = configured_model.split(':', 1)
-            
-            # Add common quantization suffixes
-            common_suffixes = ['-q8_0', '-q4_0', '-q6_k', '-q4_k_m', '-instruct', '-base']
-            for suffix in common_suffixes:
-                patterns.append(f"{base_name}:{version}{suffix}")
-                
-            # Also try with instruct variants
-            if 'instruct' not in version.lower():
-                patterns.append(f"{base_name}:{version}-instruct")
-                patterns.append(f"{base_name}:{version}-instruct-q8_0")
-                patterns.append(f"{base_name}:{version}-instruct-q4_0")
-        
-        return patterns
-
-    def _validate_model_match(self, pattern: str, available_model: str) -> bool:
-        """Validate that a fuzzy match is actually correct and not a false positive."""
-        # Convert to lowercase for comparison
-        pattern_lower = pattern.lower()
-        available_lower = available_model.lower()
-        
-        # Ensure the base model name matches
-        if ':' in pattern_lower and ':' in available_lower:
-            pattern_base = pattern_lower.split(':')[0]
-            available_base = available_lower.split(':')[0]
-            
-            # Base names must match exactly
-            if pattern_base != available_base:
-                return False
-                
-            # Version part should be contained or closely related
-            pattern_version = pattern_lower.split(':', 1)[1]
-            available_version = available_lower.split(':', 1)[1]
-            
-            # The pattern version should be a prefix of the available version
-            # e.g., "1.7b" should match "1.7b-q8_0" but not "11.7b"
-            if not available_version.startswith(pattern_version.split('-')[0]):
-                return False
-                
-        return True
-
-    def validate_and_resolve_models(self, config: RAGConfig) -> RAGConfig:
-        """Validate and resolve model names in configuration."""
-        try:
-            available_models = self.get_available_ollama_models(config.llm.ollama_host)
-            
-            if not available_models:
-                logger.debug("No Ollama models available for validation")
-                return config
-                
-            # Resolve synthesis model
-            if config.llm.synthesis_model != "auto":
-                resolved = self.resolve_model_name(config.llm.synthesis_model, available_models)
-                if resolved and resolved != config.llm.synthesis_model:
-                    logger.info(f"Resolved synthesis model: {config.llm.synthesis_model} -> {resolved}")
-                    config.llm.synthesis_model = resolved
-                elif not resolved:
-                    logger.warning(f"Synthesis model '{config.llm.synthesis_model}' not found, keeping original")
-                    
-            # Resolve expansion model (if different from synthesis)
-            if (config.llm.expansion_model != "auto" and 
-                config.llm.expansion_model != config.llm.synthesis_model):
-                resolved = self.resolve_model_name(config.llm.expansion_model, available_models)
-                if resolved and resolved != config.llm.expansion_model:
-                    logger.info(f"Resolved expansion model: {config.llm.expansion_model} -> {resolved}")
-                    config.llm.expansion_model = resolved
-                elif not resolved:
-                    logger.warning(f"Expansion model '{config.llm.expansion_model}' not found, keeping original")
-            
-            # Update model rankings with resolved names
-            if config.llm.model_rankings:
-                updated_rankings = []
-                for model in config.llm.model_rankings:
-                    resolved = self.resolve_model_name(model, available_models)
-                    if resolved:
-                        updated_rankings.append(resolved)
-                        if resolved != model:
-                            logger.debug(f"Updated model ranking: {model} -> {resolved}")
-                    else:
-                        updated_rankings.append(model)  # Keep original if not resolved
-                config.llm.model_rankings = updated_rankings
-                        
-        except Exception as e:
-            logger.debug(f"Model validation failed: {e}")
-            
-        return config
-
-    def load_config(self) -> RAGConfig:
-        """Load configuration from YAML file or create default."""
-        if not self.config_path.exists():
-            logger.info(f"No config found at {self.config_path}, creating default")
-            config = RAGConfig()
-            self.save_config(config)
-            return config
-
-        try:
-            with open(self.config_path, "r") as f:
-                data = yaml.safe_load(f)
-
-            if not data:
-                logger.warning("Empty config file, using defaults")
-                return RAGConfig()
-
-            # Convert nested dicts back to dataclass instances
-            config = RAGConfig()
-
-            if "chunking" in data:
-                config.chunking = ChunkingConfig(**data["chunking"])
-            if "streaming" in data:
-                config.streaming = StreamingConfig(**data["streaming"])
-            if "files" in data:
-                config.files = FilesConfig(**data["files"])
-            if "embedding" in data:
-                config.embedding = EmbeddingConfig(**data["embedding"])
-            if "search" in data:
-                config.search = SearchConfig(**data["search"])
-            if "llm" in data:
-                config.llm = LLMConfig(**data["llm"])
-
-            # Validate and resolve model names if Ollama is available
-            config = self.validate_and_resolve_models(config)
-
-            return config
-
-        except yaml.YAMLError as e:
-            # YAML syntax error - help user fix it instead of silent fallback
-            error_msg = (
-                f"⚠️ Config file has YAML syntax error at line "
-                f"{getattr(e, 'problem_mark', 'unknown')}: {e}"
-            )
-            logger.error(error_msg)
-            print(f"\n{error_msg}")
-            print(f"Config file: {self.config_path}")
-            print("💡 Check YAML syntax (indentation, quotes, colons)")
-            print("💡 Or delete config file to reset to defaults")
-            return RAGConfig()  # Still return defaults but warn user
-
-        except Exception as e:
-            logger.error(f"Failed to load config from {self.config_path}: {e}")
-            logger.info("Using default configuration")
-            return RAGConfig()
-
-    def save_config(self, config: RAGConfig):
-        """Save configuration to YAML file with comments."""
-        try:
-            self.rag_dir.mkdir(exist_ok=True)
-
-            # Convert to dict for YAML serialization
-            config_dict = asdict(config)
-
-            # Create YAML content with comments
-            yaml_content = self._create_yaml_with_comments(config_dict)
-
-            # Write with basic file locking to prevent corruption
-            with open(self.config_path, "w") as f:
-                try:
-                    import fcntl
-
-                    fcntl.flock(
-                        f.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB
-                    )  # Non-blocking exclusive lock
-                    f.write(yaml_content)
-                    fcntl.flock(f.fileno(), fcntl.LOCK_UN)  # Unlock
-                except (OSError, ImportError):
-                    # Fallback for Windows or if fcntl unavailable
-                    f.write(yaml_content)
-
-            logger.info(f"Configuration saved to {self.config_path}")
-
-        except Exception as e:
-            logger.error(f"Failed to save config to {self.config_path}: {e}")
-
-    def _create_yaml_with_comments(self, config_dict: Dict[str, Any]) -> str:
-        """Create YAML content with helpful comments."""
-        yaml_lines = [
-            "# FSS-Mini-RAG Configuration",
-            "# Edit this file to customize indexing and search behavior",
-            "# See docs/GETTING_STARTED.md for detailed explanations",
-            "",
-            "# Text chunking settings",
-            "chunking:",
-            f"  max_size: {config_dict['chunking']['max_size']}  # Max chars per chunk",
-            f"  min_size: {config_dict['chunking']['min_size']}  # Min chars per chunk",
-            f"  strategy: {config_dict['chunking']['strategy']}  # 'semantic' or 'fixed'",
-            "",
-            "# Large file streaming settings",
-            "streaming:",
-            f"  enabled: {str(config_dict['streaming']['enabled']).lower()}",
-            f"  threshold_bytes: {config_dict['streaming']['threshold_bytes']}  # Stream files >1MB",
-            "",
-            "# File processing settings",
-            "files:",
-            f"  min_file_size: {config_dict['files']['min_file_size']}  # Skip small files",
-            "  exclude_patterns:",
-        ]
-
-        for pattern in config_dict["files"]["exclude_patterns"]:
-            yaml_lines.append(f'    - "{pattern}"')
-
-        yaml_lines.extend(
-            [
-                "  include_patterns:",
-                '    - "**/*"                  # Include all files by default',
-                "",
-                "# Embedding generation settings",
-                "embedding:",
-                f"  preferred_method: {config_dict['embedding']['preferred_method']}  # Method",
-                f"  ollama_model: {config_dict['embedding']['ollama_model']}",
-                f"  ollama_host: {config_dict['embedding']['ollama_host']}",
-                f"  ml_model: {config_dict['embedding']['ml_model']}",
-                f"  batch_size: {config_dict['embedding']['batch_size']}  # Per batch",
-                "",
-                "# Search behavior settings",
-                "search:",
-                f"  default_top_k: {config_dict['search']['default_top_k']}  # Top results",
-                f"  enable_bm25: {str(config_dict['search']['enable_bm25']).lower()}  # Keyword boost",
-                f"  similarity_threshold: {config_dict['search']['similarity_threshold']}  # Min score",
-                f"  expand_queries: {str(config_dict['search']['expand_queries']).lower()}  # Auto expand",
-                "",
-                "# LLM synthesis and query expansion settings",
-                "llm:",
-                f"  ollama_host: {config_dict['llm']['ollama_host']}",
-                f"  synthesis_model: {config_dict['llm']['synthesis_model']}  # Model name",
-                f"  expansion_model: {config_dict['llm']['expansion_model']}  # Model name",
-                f"  max_expansion_terms: {config_dict['llm']['max_expansion_terms']}  # Max terms",
-                f"  enable_synthesis: {str(config_dict['llm']['enable_synthesis']).lower()}       # Enable synthesis by default",
-                f"  synthesis_temperature: {config_dict['llm']['synthesis_temperature']}      # LLM temperature for analysis",
-                "",
-                "  # Context window configuration (critical for RAG performance)",
-                "  # 💡 Sizing guide: 2K=1 question, 4K=1-2 questions, 8K=manageable, 16K=most users",
-                "  #               32K=large codebases, 64K+=power users only",
-                "  # ⚠️  Larger contexts use exponentially more CPU/memory - only increase if needed",
-                "  # 🔧 Low context limits? Try smaller topk, better search terms, or archive noise",
-                f"  context_window: {config_dict['llm']['context_window']}           # Context size in tokens",
-                f"  auto_context: {str(config_dict['llm']['auto_context']).lower()}            # Auto-adjust context based on model capabilities",
-                "",
-                "  model_rankings:          # Preferred model order (edit to change priority)",
-            ]
-        )
-
-        # Add model rankings list
-        if "model_rankings" in config_dict["llm"] and config_dict["llm"]["model_rankings"]:
-            for model in config_dict["llm"]["model_rankings"][:10]:  # Show first 10
-                yaml_lines.append(f'    - "{model}"')
-            if len(config_dict["llm"]["model_rankings"]) > 10:
-                yaml_lines.append("    # ... (edit config to see all options)")
-
-        # Add update settings
-        yaml_lines.extend(
-            [
-                "",
-                "# Auto-update system settings",
-                "updates:",
-                f"  auto_check: {str(config_dict['updates']['auto_check']).lower()}            # Check for updates automatically",
-                f"  check_frequency_hours: {config_dict['updates']['check_frequency_hours']}    # Hours between update checks",
-                f"  auto_install: {str(config_dict['updates']['auto_install']).lower()}          # Auto-install updates (not recommended)",
-                f"  backup_before_update: {str(config_dict['updates']['backup_before_update']).lower()}   # Create backup before updating",
-                f"  notify_beta_releases: {str(config_dict['updates']['notify_beta_releases']).lower()}   # Include beta releases in checks",
-            ]
-        )
-
-        return "\n".join(yaml_lines)
-
-    def update_config(self, **kwargs) -> RAGConfig:
-        """Update specific configuration values."""
-        config = self.load_config()
-
-        for key, value in kwargs.items():
-            if hasattr(config, key):
-                setattr(config, key, value)
-            else:
-                logger.warning(f"Unknown config key: {key}")
-
-        self.save_config(config)
-        return config
--- a/mini_rag/explorer.py
+++ b/mini_rag/explorer.py
@ -1,653 +0,0 @@
-#!/usr/bin/env python3
-"""
-Interactive Code Explorer with Thinking Mode
-
-Provides multi-turn conversations with context memory for debugging and learning.
-Perfect for exploring codebases with detailed reasoning and follow-up questions.
-"""
-
-import json
-import logging
-import time
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Any, Dict, List, Optional
-
-try:
-    from .config import RAGConfig
-    from .llm_synthesizer import LLMSynthesizer, SynthesisResult
-    from .search import CodeSearcher
-    from .system_context import get_system_context
-except ImportError:
-    # For direct testing
-    from config import RAGConfig
-    from llm_synthesizer import LLMSynthesizer, SynthesisResult
-    from search import CodeSearcher
-
-    def get_system_context(x=None):
-        return ""
-
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class ExplorationSession:
-    """Track an exploration session with context history."""
-
-    project_path: Path
-    conversation_history: List[Dict[str, Any]]
-    session_id: str
-    started_at: float
-
-    def add_exchange(
-        self, question: str, search_results: List[Any], response: SynthesisResult
-    ):
-        """Add a question/response exchange to the conversation history."""
-        self.conversation_history.append(
-            {
-                "timestamp": time.time(),
-                "question": question,
-                "search_results_count": len(search_results),
-                "response": {
-                    "summary": response.summary,
-                    "key_points": response.key_points,
-                    "code_examples": response.code_examples,
-                    "suggested_actions": response.suggested_actions,
-                    "confidence": response.confidence,
-                },
-            }
-        )
-
-
-class CodeExplorer:
-    """Interactive code exploration with thinking and context memory."""
-
-    def __init__(self, project_path: Path, config: RAGConfig = None):
-        self.project_path = project_path
-        self.config = config or RAGConfig()
-
-        # Initialize components with thinking enabled
-        self.searcher = CodeSearcher(project_path)
-        self.synthesizer = LLMSynthesizer(
-            ollama_url=f"http://{self.config.llm.ollama_host}",
-            model=self.config.llm.synthesis_model,
-            enable_thinking=True,  # Always enable thinking in explore mode
-            config=self.config,  # Pass config for model rankings
-        )
-
-        # Session management
-        self.current_session: Optional[ExplorationSession] = None
-
-    def start_exploration_session(self) -> bool:
-        """Start a new exploration session."""
-
-        # Simple availability check - don't do complex model restart logic
-        if not self.synthesizer.is_available():
-            print("❌ LLM service unavailable. Please check Ollama is running.")
-            return False
-
-        session_id = f"explore_{int(time.time())}"
-        self.current_session = ExplorationSession(
-            project_path=self.project_path,
-            conversation_history=[],
-            session_id=session_id,
-            started_at=time.time(),
-        )
-
-        print("🧠 Exploration Mode Started")
-        print(f"Project: {self.project_path.name}")
-
-        return True
-
-    def explore_question(self, question: str, context_limit: int = 10) -> Optional[str]:
-        """Explore a question with full thinking and context."""
-        if not self.current_session:
-            return "❌ No exploration session active. Start one first."
-
-        # Search for relevant information
-        search_start = time.time()
-        results = self.searcher.search(
-            question,
-            top_k=context_limit,
-            include_context=True,
-            semantic_weight=0.7,
-            bm25_weight=0.3,
-        )
-        search_time = time.time() - search_start
-
-        # Build enhanced prompt with conversation context
-        synthesis_prompt = self._build_contextual_prompt(question, results)
-
-        # Get thinking-enabled analysis
-        synthesis_start = time.time()
-        synthesis = self._synthesize_with_context(synthesis_prompt, results)
-        synthesis_time = time.time() - synthesis_start
-
-        # Add to conversation history
-        self.current_session.add_exchange(question, results, synthesis)
-
-        # Streaming already displayed the response
-        # Just return minimal status for caller
-        session_duration = time.time() - self.current_session.started_at
-        exchange_count = len(self.current_session.conversation_history)
-
-        status = f"\n📊 Session: {session_duration/60:.1f}m | Question #{exchange_count} | Results: {len(results)} | Time: {search_time+synthesis_time:.1f}s"
-        return status
-
-    def _build_contextual_prompt(self, question: str, results: List[Any]) -> str:
-        """Build a prompt that includes conversation context."""
-        # Get recent conversation context (last 3 exchanges)
-        if self.current_session.conversation_history:
-            recent_exchanges = self.current_session.conversation_history[-3:]
-            context_parts = []
-
-            for i, exchange in enumerate(recent_exchanges, 1):
-                prev_q = exchange["question"]
-                prev_summary = exchange["response"]["summary"]
-                context_parts.append(f"Previous Q{i}: {prev_q}")
-                context_parts.append(f"Previous A{i}: {prev_summary}")
-
-            # "\n".join(context_parts)  # Unused variable removed
-
-        # Build search results context
-        results_context = []
-        for i, result in enumerate(results[:8], 1):
-            # result.file_path if hasattr(result, "file_path") else "unknown"  # Unused variable removed
-            # result.content if hasattr(result, "content") else str(result)  # Unused variable removed
-            # result.score if hasattr(result, "score") else 0.0  # Unused variable removed
-
-            results_context.append(
-                """
-Result {i} (Score: {score:.3f}):
-File: {file_path}
-Content: {content[:800]}{'...' if len(content) > 800 else ''}
-"""
-            )
-
-        # "\n".join(results_context)  # Unused variable removed
-
-        # Get system context for better responses
-        # get_system_context(self.project_path)  # Unused variable removed
-
-        # Create comprehensive exploration prompt with thinking
-        prompt = """<think>
-The user asked: "{question}"
-
-System context: {system_context}
-
-Let me analyze what they're asking and look at the information I have available.
-
-From the search results, I can see relevant information about:
-{results_text[:500]}...
-
-I should think about:
-1. What the user is trying to understand or accomplish
-2. What information from the search results is most relevant
-3. How to explain this in a clear, educational way
-4. What practical next steps would be helpful
-
-Based on our conversation so far: {context_summary}
-
-Let me create a helpful response that breaks this down clearly and gives them actionable guidance.
-</think>
-
-You're a helpful assistant exploring a project with someone. You're good at breaking down complex topics into understandable pieces and explaining things clearly.
-
-PROJECT: {self.project_path.name}
-
-PREVIOUS CONVERSATION:
-{context_summary}
-
-CURRENT QUESTION: "{question}"
-
-RELEVANT INFORMATION FOUND:
-{results_text}
-
-Please provide a helpful, natural explanation that answers their question. Write as if you're having a friendly conversation with a colleague who's exploring this project.
-
-Structure your response to include:
-1. A clear explanation of what you found and how it answers their question
-2. The most important insights from the information you discovered
-3. Relevant examples or code patterns when helpful
-4. Practical next steps they could take
-
-Guidelines:
- Write in a conversational, friendly tone
- Be educational but not condescending
- Reference specific files and information when helpful
- Give practical, actionable suggestions
- Connect everything back to their original question
- Use natural language, not structured formats
- Break complex topics into understandable pieces
-"""
-
-        return prompt
-
-    def _synthesize_with_context(self, prompt: str, results: List[Any]) -> SynthesisResult:
-        """Synthesize results with full context and thinking."""
-        try:
-            # Use streaming with thinking visible (don't collapse)
-            response = self.synthesizer._call_ollama(
-                prompt,
-                temperature=0.2,
-                disable_thinking=False,
-                use_streaming=True,
-                collapse_thinking=False,
-            )
-            # ""  # Unused variable removed
-
-            # Streaming already shows thinking and response
-            # No need for additional indicators
-
-            if not response:
-                return SynthesisResult(
-                    summary="Analysis unavailable (LLM service error)",
-                    key_points=[],
-                    code_examples=[],
-                    suggested_actions=["Check LLM service status"],
-                    confidence=0.0,
-                )
-
-            # Use natural language response directly
-            return SynthesisResult(
-                summary=response.strip(),
-                key_points=[],  # Not used with natural language responses
-                code_examples=[],  # Not used with natural language responses
-                suggested_actions=[],  # Not used with natural language responses
-                confidence=0.85,  # High confidence for natural responses
-            )
-
-        except Exception as e:
-            logger.error(f"Context synthesis failed: {e}")
-            return SynthesisResult(
-                summary="Analysis failed due to service error",
-                key_points=[],
-                code_examples=[],
-                suggested_actions=["Check system status and try again"],
-                confidence=0.0,
-            )
-
-    def _format_exploration_response(
-        self,
-        question: str,
-        synthesis: SynthesisResult,
-        result_count: int,
-        search_time: float,
-        synthesis_time: float,
-    ) -> str:
-        """Format exploration response with context indicators."""
-
-        output = []
-
-        # Header with session context
-        session_duration = time.time() - self.current_session.started_at
-        exchange_count = len(self.current_session.conversation_history)
-
-        output.append(f"🧠 EXPLORATION ANALYSIS (Question #{exchange_count})")
-        output.append(
-            f"Session: {session_duration/60:.1f}m | Results: {result_count} | "
-            f"Time: {search_time+synthesis_time:.1f}s"
-        )
-        output.append("=" * 60)
-        output.append("")
-
-        # Response was already displayed via streaming
-        # Just show completion status
-        output.append("✅ Analysis complete")
-        output.append("")
-        output.append("")
-
-        # Confidence and context indicator
-        confidence_emoji = (
-            "🟢"
-            if synthesis.confidence > 0.7
-            else "🟡" if synthesis.confidence > 0.4 else "🔴"
-        )
-        context_indicator = (
-            f" | Context: {exchange_count-1} previous questions" if exchange_count > 1 else ""
-        )
-        output.append(
-            f"{confidence_emoji} Confidence: {synthesis.confidence:.1%}{context_indicator}"
-        )
-
-        return "\n".join(output)
-
-    def get_session_summary(self) -> str:
-        """Get a summary of the current exploration session."""
-        if not self.current_session:
-            return "No active exploration session."
-
-        duration = time.time() - self.current_session.started_at
-        exchange_count = len(self.current_session.conversation_history)
-
-        summary = [
-            "🧠 EXPLORATION SESSION SUMMARY",
-            "=" * 40,
-            f"Project: {self.project_path.name}",
-            f"Session ID: {self.current_session.session_id}",
-            f"Duration: {duration/60:.1f} minutes",
-            f"Questions explored: {exchange_count}",
-            "",
-        ]
-
-        if exchange_count > 0:
-            summary.append("📋 Topics explored:")
-            for i, exchange in enumerate(self.current_session.conversation_history, 1):
-                question = (
-                    exchange["question"][:50] + "..."
-                    if len(exchange["question"]) > 50
-                    else exchange["question"]
-                )
-                confidence = exchange["response"]["confidence"]
-                summary.append(f"   {i}. {question} (confidence: {confidence:.1%})")
-
-        return "\n".join(summary)
-
-    def end_session(self) -> str:
-        """End the current exploration session."""
-        if not self.current_session:
-            return "No active session to end."
-
-        summary = self.get_session_summary()
-        self.current_session = None
-
-        return summary + "\n\n✅ Exploration session ended."
-
-    def _check_model_restart_needed(self) -> bool:
-        """Check if model restart would improve thinking quality."""
-        try:
-            # Simple heuristic: if we can detect the model was recently used
-            # with <no_think>, suggest restart for better thinking quality
-
-            # Test with a simple thinking prompt to see response quality
-            test_response = self.synthesizer._call_ollama(
-                "Think briefly: what is 2+2?", temperature=0.1, disable_thinking=False
-            )
-
-            if test_response:
-                # If response is suspiciously short or shows signs of no-think behavior
-                if len(test_response.strip()) < 10 or "4" == test_response.strip():
-                    return True
-
-        except Exception:
-            pass
-
-        return False
-
-    def _handle_model_restart(self) -> bool:
-        """Handle user confirmation and model restart."""
-        try:
-            print(
-                "\n🤔 To ensure best thinking quality, exploration mode works best with a fresh model."
-            )
-            print(f"   Currently running: {self.synthesizer.model}")
-            print(
-                "\n💡 Stop current model and restart for optimal exploration? (y/N): ",
-                end="",
-                flush=True,
-            )
-
-            response = input().strip().lower()
-
-            if response in ["y", "yes"]:
-                print("\n🔄 Stopping current model...")
-
-                # Use ollama stop command for clean model restart
-                import subprocess
-
-                try:
-                    subprocess.run(
-                        ["ollama", "stop", self.synthesizer.model],
-                        timeout=10,
-                        capture_output=True,
-                    )
-
-                    print("✅ Model stopped successfully.")
-                    print(
-                        "🚀 Exploration mode will restart the model with thinking enabled..."
-                    )
-
-                    # Reset synthesizer initialization to force fresh start
-                    self.synthesizer._initialized = False
-                    return True
-
-                except subprocess.TimeoutExpired:
-                    print("⚠️  Model stop timed out, continuing anyway...")
-                    return False
-                except FileNotFoundError:
-                    print("⚠️  'ollama' command not found, continuing with current model...")
-                    return False
-                except Exception as e:
-                    print(f"⚠️  Error stopping model: {e}")
-                    return False
-            else:
-                print("📝 Continuing with current model...")
-                return False
-
-        except KeyboardInterrupt:
-            print("\n📝 Continuing with current model...")
-            return False
-        except EOFError:
-            print("\n📝 Continuing with current model...")
-            return False
-
-    def _call_ollama_with_thinking(self, prompt: str, temperature: float = 0.3) -> tuple:
-        """Call Ollama with streaming for fast time-to-first-token."""
-        import requests
-
-        try:
-            # Use the synthesizer's model and connection
-            model_to_use = self.synthesizer.model
-            if self.synthesizer.model not in self.synthesizer.available_models:
-                if self.synthesizer.available_models:
-                    model_to_use = self.synthesizer.available_models[0]
-                else:
-                    return None, None
-
-            # Enable thinking by NOT adding <no_think>
-            final_prompt = prompt
-
-            # Get optimal parameters for this model
-            from .llm_optimization import get_optimal_ollama_parameters
-
-            optimal_params = get_optimal_ollama_parameters(model_to_use)
-
-            payload = {
-                "model": model_to_use,
-                "prompt": final_prompt,
-                "stream": True,  # Enable streaming for fast response
-                "options": {
-                    "temperature": temperature,
-                    "top_p": optimal_params.get("top_p", 0.9),
-                    "top_k": optimal_params.get("top_k", 40),
-                    "num_ctx": self.synthesizer._get_optimal_context_size(model_to_use),
-                    "num_predict": optimal_params.get("num_predict", 2000),
-                    "repeat_penalty": optimal_params.get("repeat_penalty", 1.1),
-                    "presence_penalty": optimal_params.get("presence_penalty", 1.0),
-                },
-            }
-
-            response = requests.post(
-                f"{self.synthesizer.ollama_url}/api/generate",
-                json=payload,
-                stream=True,
-                timeout=65,
-            )
-
-            if response.status_code == 200:
-                # Collect streaming response
-                raw_response = ""
-                thinking_displayed = False
-
-                for line in response.iter_lines():
-                    if line:
-                        try:
-                            chunk_data = json.loads(line.decode("utf-8"))
-                            chunk_text = chunk_data.get("response", "")
-
-                            if chunk_text:
-                                raw_response += chunk_text
-
-                                # Display thinking stream as it comes in
-                                if not thinking_displayed and "<think>" in raw_response:
-                                    # Start displaying thinking
-                                    self._start_thinking_display()
-                                    thinking_displayed = True
-
-                                if thinking_displayed:
-                                    self._stream_thinking_chunk(chunk_text)
-
-                            if chunk_data.get("done", False):
-                                break
-
-                        except json.JSONDecodeError:
-                            continue
-
-                # Finish thinking display if it was shown
-                if thinking_displayed:
-                    self._end_thinking_display()
-
-                # Extract thinking stream and final response
-                thinking_stream, final_response = self._extract_thinking(raw_response)
-
-                return final_response, thinking_stream
-            else:
-                return None, None
-
-        except Exception as e:
-            logger.error(f"Thinking-enabled Ollama call failed: {e}")
-            return None, None
-
-    def _extract_thinking(self, raw_response: str) -> tuple:
-        """Extract thinking content from response."""
-        thinking_stream = ""
-        final_response = raw_response
-
-        # Look for thinking patterns
-        if "<think>" in raw_response and "</think>" in raw_response:
-            # Extract thinking content between tags
-            start_tag = raw_response.find("<think>")
-            end_tag = raw_response.find("</think>") + len("</think>")
-
-            if start_tag != -1 and end_tag != -1:
-                thinking_content = raw_response[start_tag + 7 : end_tag - 8]  # Remove tags
-                thinking_stream = thinking_content.strip()
-
-                # Remove thinking from final response
-                final_response = (raw_response[:start_tag] + raw_response[end_tag:]).strip()
-
-        # Alternative patterns for models that use different thinking formats
-        elif "Let me think" in raw_response or "I need to analyze" in raw_response:
-            # Simple heuristic: first paragraph might be thinking
-            lines = raw_response.split("\n")
-            potential_thinking = []
-            final_lines = []
-
-            thinking_indicators = [
-                "Let me think",
-                "I need to",
-                "First, I'll",
-                "Looking at",
-                "Analyzing",
-            ]
-            in_thinking = False
-
-            for line in lines:
-                if any(indicator in line for indicator in thinking_indicators):
-                    in_thinking = True
-                    potential_thinking.append(line)
-                elif in_thinking and (
-                    line.startswith("{") or line.startswith("**") or line.startswith("#")
-                ):
-                    # Likely end of thinking, start of structured response
-                    in_thinking = False
-                    final_lines.append(line)
-                elif in_thinking:
-                    potential_thinking.append(line)
-                else:
-                    final_lines.append(line)
-
-            if potential_thinking:
-                thinking_stream = "\n".join(potential_thinking).strip()
-                final_response = "\n".join(final_lines).strip()
-
-        return thinking_stream, final_response
-
-    def _start_thinking_display(self):
-        """Start the thinking stream display."""
-        print("\n\033[2m\033[3m💭 AI Thinking:\033[0m")
-        print("\033[2m\033[3m" + "─" * 40 + "\033[0m")
-        self._thinking_buffer = ""
-        self._in_thinking_tags = False
-
-    def _stream_thinking_chunk(self, chunk: str):
-        """Stream a chunk of thinking as it arrives."""
-
-        self._thinking_buffer += chunk
-
-        # Check if we're in thinking tags
-        if "<think>" in self._thinking_buffer and not self._in_thinking_tags:
-            self._in_thinking_tags = True
-            # Display everything after <think>
-            start_idx = self._thinking_buffer.find("<think>") + 7
-            thinking_content = self._thinking_buffer[start_idx:]
-            if thinking_content:
-                print(f"\033[2m\033[3m{thinking_content}\033[0m", end="", flush=True)
-        elif self._in_thinking_tags and "</think>" not in chunk:
-            # We're in thinking mode, display the chunk
-            print(f"\033[2m\033[3m{chunk}\033[0m", end="", flush=True)
-        elif "</think>" in self._thinking_buffer:
-            # End of thinking
-            self._in_thinking_tags = False
-
-    def _end_thinking_display(self):
-        """End the thinking stream display."""
-        print("\n\033[2m\033[3m" + "─" * 40 + "\033[0m")
-        print()
-
-    def _display_thinking_stream(self, thinking_stream: str):
-        """Display thinking stream in light gray and italic (fallback for non-streaming)."""
-        if not thinking_stream:
-            return
-
-        print("\n\033[2m\033[3m💭 AI Thinking:\033[0m")
-        print("\033[2m\033[3m" + "─" * 40 + "\033[0m")
-
-        # Split into paragraphs and display with proper formatting
-        paragraphs = thinking_stream.split("\n\n")
-        for para in paragraphs:
-            if para.strip():
-                # Wrap long lines nicely
-                lines = para.strip().split("\n")
-                for line in lines:
-                    if line.strip():
-                        # Light gray and italic
-                        print(f"\033[2m\033[3m{line}\033[0m")
-                print()  # Paragraph spacing
-
-        print("\033[2m\033[3m" + "─" * 40 + "\033[0m")
-        print()
-
-
-# Quick test function
-
-
-def test_explorer():
-    """Test the code explorer."""
-    explorer = CodeExplorer(Path("."))
-
-    if not explorer.start_exploration_session():
-        print("❌ Could not start exploration session")
-        return
-
-    # Test question
-    response = explorer.explore_question("How does authentication work in this codebase?")
-    if response:
-        print(response)
-
-    print("\n" + explorer.end_session())
-
-
-if __name__ == "__main__":
-    test_explorer()
--- a/mini_rag/llm_safeguards.py
+++ b/mini_rag/llm_safeguards.py
@ -1,360 +0,0 @@
-#!/usr/bin/env python3
-"""
-LLM Safeguards for Small Model Management
-
-Provides runaway prevention, context management, and intelligent detection
-of problematic model behaviors to ensure reliable user experience.
-"""
-
-import logging
-import re
-import time
-from dataclasses import dataclass
-from typing import Dict, List, Optional, Tuple
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class SafeguardConfig:
-    """Configuration for LLM safeguards - gentle and educational."""
-
-    max_output_tokens: int = 4000  # Allow longer responses for learning
-    max_repetition_ratio: float = 0.7  # Be very permissive - only catch extreme repetition
-    max_response_time: int = 120  # Allow 2 minutes for complex thinking
-    min_useful_length: int = 10  # Lower threshold - short answers can be useful
-    context_window: int = 32000  # Match Qwen3 context length (32K token limit)
-    enable_thinking_detection: bool = True  # Detect thinking patterns
-
-
-class ModelRunawayDetector:
-    """Detects and prevents model runaway behaviors."""
-
-    def __init__(self, config: SafeguardConfig = None):
-        self.config = config or SafeguardConfig()
-        self.response_patterns = self._compile_patterns()
-
-    def _compile_patterns(self) -> Dict[str, re.Pattern]:
-        """Compile regex patterns for runaway detection."""
-        return {
-            # Excessive repetition patterns
-            "word_repetition": re.compile(r"\b(\w+)\b(?:\s+\1\b){3,}", re.IGNORECASE),
-            "phrase_repetition": re.compile(r"(.{10,50}?)\1{2,}", re.DOTALL),
-            # Thinking loop patterns (small models get stuck)
-            "thinking_loop": re.compile(
-                r"(let me think|i think|thinking|consider|actually|wait|hmm|well)\s*[.,:]*\s*\1",
-                re.IGNORECASE,
-            ),
-            # Rambling patterns
-            "excessive_filler": re.compile(
-                r"\b(um|uh|well|you know|like|basically|actually|so|then|and|but|however)\b(?:\s+[^.!?]*){5,}",
-                re.IGNORECASE,
-            ),
-            # JSON corruption patterns
-            "broken_json": re.compile(r"\{[^}]*\{[^}]*\{"),  # Nested broken JSON
-            "json_repetition": re.compile(
-                r'("[\w_]+"\s*:\s*"[^"]*",?\s*){4,}'
-            ),  # Repeated JSON fields
-        }
-
-    def check_response_quality(
-        self, response: str, query: str, start_time: float
-    ) -> Tuple[bool, Optional[str], Optional[str]]:
-        """
-        Check response quality and detect runaway behaviors.
-
-        Returns:
-            (is_valid, issue_type, user_explanation)
-        """
-        if not response or len(response.strip()) < self.config.min_useful_length:
-            return False, "too_short", self._explain_too_short()
-
-        # Check response time
-        elapsed = time.time() - start_time
-        if elapsed > self.config.max_response_time:
-            return False, "timeout", self._explain_timeout()
-
-        # Check for repetition issues
-        repetition_issue = self._check_repetition(response)
-        if repetition_issue:
-            return False, repetition_issue, self._explain_repetition(repetition_issue)
-
-        # Check for thinking loops
-        if self.config.enable_thinking_detection:
-            thinking_issue = self._check_thinking_loops(response)
-            if thinking_issue:
-                return False, thinking_issue, self._explain_thinking_loop()
-
-        # Check for rambling
-        rambling_issue = self._check_rambling(response)
-        if rambling_issue:
-            return False, rambling_issue, self._explain_rambling()
-
-        # Check JSON corruption (for structured responses)
-        if "{" in response and "}" in response:
-            json_issue = self._check_json_corruption(response)
-            if json_issue:
-                return False, json_issue, self._explain_json_corruption()
-
-        return True, None, None
-
-    def _check_repetition(self, response: str) -> Optional[str]:
-        """Check for excessive repetition."""
-        # Word repetition
-        if self.response_patterns["word_repetition"].search(response):
-            return "word_repetition"
-
-        # Phrase repetition
-        if self.response_patterns["phrase_repetition"].search(response):
-            return "phrase_repetition"
-
-        # Calculate repetition ratio (excluding Qwen3 thinking blocks)
-        analysis_text = response
-        if "<think>" in response and "</think>" in response:
-            # Extract only the actual response (after thinking) for repetition analysis
-            thinking_end = response.find("</think>")
-            if thinking_end != -1:
-                analysis_text = response[thinking_end + 8 :].strip()
-
-                # If the actual response (excluding thinking) is short, don't penalize
-                if len(analysis_text.split()) < 20:
-                    return None
-
-        words = analysis_text.split()
-        if len(words) > 10:
-            unique_words = set(words)
-            repetition_ratio = 1 - (len(unique_words) / len(words))
-            if repetition_ratio > self.config.max_repetition_ratio:
-                return "high_repetition_ratio"
-
-        return None
-
-    def _check_thinking_loops(self, response: str) -> Optional[str]:
-        """Check for thinking loops (common in small models)."""
-        if self.response_patterns["thinking_loop"].search(response):
-            return "thinking_loop"
-
-        # Check for excessive meta-commentary
-        thinking_words = ["think", "considering", "actually", "wait", "hmm", "let me"]
-        thinking_count = sum(response.lower().count(word) for word in thinking_words)
-
-        if thinking_count > 5 and len(response.split()) < 200:
-            return "excessive_thinking"
-
-        return None
-
-    def _check_rambling(self, response: str) -> Optional[str]:
-        """Check for rambling or excessive filler."""
-        if self.response_patterns["excessive_filler"].search(response):
-            return "excessive_filler"
-
-        # Check for extremely long sentences (sign of rambling)
-        sentences = re.split(r"[.!?]+", response)
-        long_sentences = [s for s in sentences if len(s.split()) > 50]
-
-        if len(long_sentences) > 2:
-            return "excessive_rambling"
-
-        return None
-
-    def _check_json_corruption(self, response: str) -> Optional[str]:
-        """Check for JSON corruption in structured responses."""
-        if self.response_patterns["broken_json"].search(response):
-            return "broken_json"
-
-        if self.response_patterns["json_repetition"].search(response):
-            return "json_repetition"
-
-        return None
-
-    def _explain_too_short(self) -> str:
-        return """🤔 The AI response was too short to be helpful.
-
-**Why this happens:**
-• The model might be confused by the query
-• Context might be insufficient
-• Model might be overloaded
-
-**What to try:**
-• Rephrase your question more specifically
-• Try a broader search term first
-• Use exploration mode for complex questions: `rag-mini explore`"""
-
-    def _explain_timeout(self) -> str:
-        return """⏱️ The AI took too long to respond (over 60 seconds).
-
-**Why this happens:**
-• Small models sometimes get "stuck" thinking
-• Complex queries can overwhelm smaller models
-• System might be under load
-
-**What to try:**
-• Try a simpler, more direct question
-• Use synthesis mode for faster responses: `--synthesize`
-• Consider using a larger model if available"""
-
-    def _explain_repetition(self, issue_type: str) -> str:
-        return """🔄 The AI got stuck in repetition loops ({issue_type}).
-
-**Why this happens:**
-• Small models sometimes repeat when uncertain
-• Query might be too complex for the model size
-• Context window might be exceeded
-
-**What to try:**
-• Try a more specific question
-• Break complex questions into smaller parts
-• Use exploration mode which handles context better: `rag-mini explore`
-• Consider: A larger model (qwen3:1.7b or qwen3:4b) would help"""
-
-    def _explain_thinking_loop(self) -> str:
-        return """🧠 The AI got caught in a "thinking loop" - overthinking the response.
-
-**Why this happens:**
-• Small models sometimes over-analyze simple questions
-• Thinking mode can cause loops in smaller models
-• Query complexity exceeds model capabilities
-
-**What to try:**
-• Ask more direct, specific questions
-• Use synthesis mode (no thinking) for faster results
-• Try: "What does this code do?" instead of "Explain how this works"
-• Larger models (qwen3:1.7b+) handle thinking better"""
-
-    def _explain_rambling(self) -> str:
-        return """💭 The AI started rambling instead of giving focused answers.
-
-**Why this happens:**
-• Small models sometimes lose focus on complex topics
-• Query might be too broad or vague
-• Model trying to cover too much at once
-
-**What to try:**
-• Ask more specific questions
-• Break broad questions into focused parts
-• Example: "How is data validated?" instead of "Explain the whole system"
-• Exploration mode helps maintain focus across questions"""
-
-    def _explain_json_corruption(self) -> str:
-        return """🔧 The AI response format got corrupted.
-
-**Why this happens:**
-• Small models sometimes struggle with structured output
-• Context limits can cause format errors
-• Complex analysis might overwhelm formatting
-
-**What to try:**
-• Try the question again (often resolves itself)
-• Use simpler questions for better formatting
-• Synthesis mode sometimes gives cleaner output
-• This is less common with larger models"""
-
-    def get_recovery_suggestions(self, issue_type: str, query: str) -> List[str]:
-        """Get specific recovery suggestions based on the issue."""
-        suggestions = []
-
-        if issue_type in ["thinking_loop", "excessive_thinking"]:
-            suggestions.extend(
-                [
-                    f'Try synthesis mode: `rag-mini search . "{query}" --synthesize`',
-                    "Ask more direct questions without 'why' or 'how'",
-                    "Break complex questions into smaller parts",
-                ]
-            )
-
-        elif issue_type in [
-            "word_repetition",
-            "phrase_repetition",
-            "high_repetition_ratio",
-        ]:
-            suggestions.extend(
-                [
-                    "Try rephrasing your question completely",
-                    "Use more specific technical terms",
-                    "Try exploration mode: `rag-mini explore .`",
-                ]
-            )
-
-        elif issue_type == "timeout":
-            suggestions.extend(
-                [
-                    "Try a simpler version of your question",
-                    "Use synthesis mode for faster responses",
-                    "Check if Ollama is under heavy load",
-                ]
-            )
-
-        # Universal suggestions
-        suggestions.extend(
-            [
-                "Consider using a larger model if available (qwen3:1.7b or qwen3:4b)",
-                "Check model status: `ollama list`",
-            ]
-        )
-
-        return suggestions
-
-
-def get_optimal_ollama_parameters(model_name: str) -> Dict[str, any]:
-    """Get optimal parameters for different Ollama models."""
-
-    base_params = {
-        "num_ctx": 32768,  # Good context window for most uses
-        "num_predict": 2000,  # Reasonable response length
-        "temperature": 0.3,  # Balanced creativity/consistency
-    }
-
-    # Model-specific optimizations
-    if "qwen3:0.6b" in model_name.lower():
-        return {
-            **base_params,
-            "repeat_penalty": 1.15,  # Prevent repetition in small model
-            "presence_penalty": 1.5,  # Suppress repetitive outputs
-            "top_p": 0.8,  # Focused sampling
-            "top_k": 20,  # Limit choices
-            "num_predict": 1500,  # Shorter responses for reliability
-        }
-
-    elif "qwen3:1.7b" in model_name.lower():
-        return {
-            **base_params,
-            "repeat_penalty": 1.1,  # Less aggressive for larger model
-            "presence_penalty": 1.0,  # Balanced
-            "top_p": 0.9,  # More creative
-            "top_k": 40,  # More choices
-        }
-
-    elif any(size in model_name.lower() for size in ["3b", "7b", "8b"]):
-        return {
-            **base_params,
-            "repeat_penalty": 1.05,  # Minimal for larger models
-            "presence_penalty": 0.5,  # Light touch
-            "top_p": 0.95,  # High creativity
-            "top_k": 50,  # Many choices
-            "num_predict": 3000,  # Longer responses OK
-        }
-
-    return base_params
-
-
-# Quick test
-
-
-def test_safeguards():
-    """Test the safeguard system."""
-    detector = ModelRunawayDetector()
-
-    # Test repetition detection
-    bad_response = "The user authentication system works by checking user credentials. The user authentication system works by checking user credentials. The user authentication system works by checking user credentials."
-
-    is_valid, issue, explanation = detector.check_response_quality(
-        bad_response, "auth", time.time()
-    )
-
-    print(f"Repetition test: Valid={is_valid}, Issue={issue}")
-    if explanation:
-        print(explanation)
-
-
-if __name__ == "__main__":
-    test_safeguards()
--- a/mini_rag/llm_synthesizer.py
+++ b/mini_rag/llm_synthesizer.py
@ -1,992 +0,0 @@
-#!/usr/bin/env python3
-"""
-LLM Synthesizer for RAG Results
-
-Provides intelligent synthesis of search results using Ollama LLMs.
-Takes raw search results and generates coherent, contextual summaries.
-"""
-
-import json
-import logging
-import time
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Any, List, Optional
-
-import requests
-
-try:
-    from .llm_safeguards import (
-        ModelRunawayDetector,
-        SafeguardConfig,
-        get_optimal_ollama_parameters,
-    )
-    from .system_context import get_system_context
-except ImportError:
-    # Graceful fallback if safeguards not available
-    ModelRunawayDetector = None
-    SafeguardConfig = None
-
-    def get_optimal_ollama_parameters(x):
-        return {}
-
-    def get_system_context(x=None):
-        return ""
-
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class SynthesisResult:
-    """Result of LLM synthesis."""
-
-    summary: str
-    key_points: List[str]
-    code_examples: List[str]
-    suggested_actions: List[str]
-    confidence: float
-
-
-class LLMSynthesizer:
-    """Synthesizes RAG search results using Ollama LLMs."""
-
-    def __init__(
-        self,
-        ollama_url: str = "http://localhost:11434",
-        model: str = None,
-        enable_thinking: bool = False,
-        config=None,
-    ):
-        self.ollama_url = ollama_url.rstrip("/")
-        self.available_models = []
-        self.model = model
-        self.enable_thinking = enable_thinking  # Default False for synthesis mode
-        self._initialized = False
-        self.config = config  # For accessing model rankings
-
-        # Initialize safeguards
-        if ModelRunawayDetector:
-            self.safeguard_detector = ModelRunawayDetector(SafeguardConfig())
-        else:
-            self.safeguard_detector = None
-
-    def _get_available_models(self) -> List[str]:
-        """Get list of available Ollama models."""
-        try:
-            response = requests.get(f"{self.ollama_url}/api/tags", timeout=5)
-            if response.status_code == 200:
-                data = response.json()
-                return [model["name"] for model in data.get("models", [])]
-        except Exception as e:
-            logger.warning(f"Could not fetch Ollama models: {e}")
-        return []
-
-    def _select_best_model(self) -> str:
-        """Select the best available model based on configuration rankings with robust name resolution."""
-        if not self.available_models:
-            # Use config fallback if available, otherwise use default
-            if (
-                self.config
-                and hasattr(self.config, "llm")
-                and hasattr(self.config.llm, "model_rankings")
-                and self.config.llm.model_rankings
-            ):
-                return self.config.llm.model_rankings[0]  # First preferred model
-            return "qwen2.5:1.5b"  # System fallback only if no config
-
-        # Get model rankings from config or use defaults
-        if (
-            self.config
-            and hasattr(self.config, "llm")
-            and hasattr(self.config.llm, "model_rankings")
-        ):
-            model_rankings = self.config.llm.model_rankings
-        else:
-            # Fallback rankings if no config
-            model_rankings = [
-                "qwen3:1.7b",
-                "qwen3:0.6b",
-                "qwen3:4b",
-                "qwen2.5:3b",
-                "qwen2.5:1.5b",
-                "qwen2.5-coder:1.5b",
-            ]
-
-        # Find first available model from our ranked list using relaxed name resolution
-        for preferred_model in model_rankings:
-            resolved_model = self._resolve_model_name(preferred_model)
-            if resolved_model:
-                logger.info(f"Selected model: {resolved_model} (requested: {preferred_model})")
-                return resolved_model
-
-        # If no preferred models found, use first available
-        fallback = self.available_models[0]
-        logger.warning(f"Using fallback model: {fallback}")
-        return fallback
-
-    def _resolve_model_name(self, configured_model: str) -> Optional[str]:
-        """Auto-resolve model names to match what's actually available in Ollama.
-        
-        This handles common patterns like:
-        - qwen3:1.7b -> qwen3:1.7b-q8_0
-        - qwen3:4b -> qwen3:4b-instruct-2507-q4_K_M
-        - auto -> first available model from ranked preference
-        """
-        logger.debug(f"Resolving model: {configured_model}")
-        
-        if not self.available_models:
-            logger.warning("No available models for resolution")
-            return None
-            
-        # Handle special 'auto' directive - use smart selection
-        if configured_model.lower() == 'auto':
-            logger.info("Using AUTO selection...")
-            return self._select_best_available_model()
-            
-        # Direct exact match first (case-insensitive)
-        for available_model in self.available_models:
-            if configured_model.lower() == available_model.lower():
-                logger.info(f"✅ EXACT MATCH: {available_model}")
-                return available_model
-        
-        # Relaxed matching - extract base model and size, then find closest match
-        logger.info(f"No exact match for '{configured_model}', trying relaxed matching...")
-        match = self._find_closest_model_match(configured_model)
-        if match:
-            logger.info(f"✅ FUZZY MATCH: {configured_model} -> {match}")
-        else:
-            logger.warning(f"❌ NO MATCH: {configured_model} not found in available models")
-        return match
-    
-    def _select_best_available_model(self) -> str:
-        """Select the best available model from what's actually installed."""
-        if not self.available_models:
-            logger.warning("No models available from Ollama - using fallback")
-            return "qwen2.5:1.5b"  # fallback
-            
-        logger.info(f"Available models: {self.available_models}")
-        
-        # Priority order for auto selection - prefer newer and larger models
-        priority_patterns = [
-            # Qwen3 series (newest)
-            "qwen3:8b", "qwen3:4b", "qwen3:1.7b", "qwen3:0.6b",
-            # Qwen2.5 series 
-            "qwen2.5:3b", "qwen2.5:1.5b", "qwen2.5:0.5b",
-            # Any other model as fallback
-        ]
-        
-        # Find first match from priority list
-        logger.info("Searching for best model match...")
-        for pattern in priority_patterns:
-            match = self._find_closest_model_match(pattern)
-            if match:
-                logger.info(f"✅ AUTO SELECTED: {match} (matched pattern: {pattern})")
-                return match
-            else:
-                logger.debug(f"No match found for pattern: {pattern}")
-                
-        # If nothing matches, just use first available
-        fallback = self.available_models[0]
-        logger.warning(f"⚠️  Using first available model as fallback: {fallback}")
-        return fallback
-    
-    def _find_closest_model_match(self, configured_model: str) -> Optional[str]:
-        """Find the closest matching model using relaxed criteria."""
-        if not self.available_models:
-            logger.debug(f"No available models to match against for: {configured_model}")
-            return None
-            
-        # Extract base model and size from configured model
-        # e.g., "qwen3:4b" -> ("qwen3", "4b")
-        if ':' not in configured_model:
-            base_model = configured_model
-            size = None
-        else:
-            base_model, size_part = configured_model.split(':', 1)
-            # Extract just the size (remove any suffixes like -q8_0)
-            size = size_part.split('-')[0] if '-' in size_part else size_part
-        
-        logger.debug(f"Looking for base model: '{base_model}', size: '{size}'")
-        
-        # Find all models that match the base model
-        candidates = []
-        for available_model in self.available_models:
-            if ':' not in available_model:
-                continue
-                
-            avail_base, avail_full = available_model.split(':', 1)
-            if avail_base.lower() == base_model.lower():
-                candidates.append(available_model)
-                logger.debug(f"Found candidate: {available_model}")
-        
-        if not candidates:
-            logger.debug(f"No candidates found for base model: {base_model}")
-            return None
-            
-        # If we have a size preference, try to match it
-        if size:
-            for candidate in candidates:
-                # Check if size appears in the model name
-                if size.lower() in candidate.lower():
-                    logger.debug(f"Size match found: {candidate} contains '{size}'")
-                    return candidate
-            logger.debug(f"No size match found for '{size}', using first candidate")
-        
-        # If no size match or no size specified, return first candidate
-        selected = candidates[0]
-        logger.debug(f"Returning first candidate: {selected}")
-        return selected
-
-    # Old pattern matching methods removed - using simpler approach now
-
-    def _ensure_initialized(self):
-        """Lazy initialization with LLM warmup."""
-        if self._initialized:
-            return
-
-        # Load available models
-        self.available_models = self._get_available_models()
-        if not self.model:
-            self.model = self._select_best_model()
-
-        # Skip warmup - models are fast enough and warmup causes delays
-        # Warmup removed to eliminate startup delays and unwanted model calls
-
-        self._initialized = True
-
-    def _get_optimal_context_size(self, model_name: str) -> int:
-        """Get optimal context size based on model capabilities and configuration."""
-        # Get configured context window
-        if self.config and hasattr(self.config, "llm"):
-            configured_context = self.config.llm.context_window
-            auto_context = getattr(self.config.llm, "auto_context", True)
-        else:
-            configured_context = 16384  # Default to 16K
-            auto_context = True
-
-        # Model-specific maximum context windows (based on research)
-        model_limits = {
-            # Qwen3 models with native context support
-            "qwen3:0.6b": 32768,  # 32K native
-            "qwen3:1.7b": 32768,  # 32K native
-            "qwen3:4b": 131072,  # 131K with YaRN extension
-            # Qwen2.5 models
-            "qwen2.5:1.5b": 32768,  # 32K native
-            "qwen2.5:3b": 32768,  # 32K native
-            "qwen2.5-coder:1.5b": 32768,  # 32K native
-            # Fallback for unknown models
-            "default": 8192,
-        }
-
-        # Find model limit (check for partial matches)
-        model_limit = model_limits.get("default", 8192)
-        for model_pattern, limit in model_limits.items():
-            if model_pattern != "default" and model_pattern.lower() in model_name.lower():
-                model_limit = limit
-                break
-
-        # If auto_context is enabled, respect model limits
-        if auto_context:
-            optimal_context = min(configured_context, model_limit)
-        else:
-            optimal_context = configured_context
-
-        # Ensure minimum usable context for RAG
-        optimal_context = max(optimal_context, 4096)  # Minimum 4K for basic RAG
-
-        logger.debug(
-            f"Context for {model_name}: {optimal_context} tokens (configured: {configured_context}, limit: {model_limit})"
-        )
-        return optimal_context
-
-    def is_available(self) -> bool:
-        """Check if Ollama is available and has models."""
-        self._ensure_initialized()
-        return len(self.available_models) > 0
-
-    def _call_ollama(
-        self,
-        prompt: str,
-        temperature: float = 0.3,
-        disable_thinking: bool = False,
-        use_streaming: bool = True,
-        collapse_thinking: bool = True,
-    ) -> Optional[str]:
-        """Make a call to Ollama API with safeguards."""
-        start_time = time.time()
-
-        try:
-            # Ensure we're initialized
-            self._ensure_initialized()
-
-            # Use the best available model with retry logic
-            model_to_use = self.model
-            if self.model not in self.available_models:
-                # Refresh model list in case of race condition
-                logger.warning(
-                    f"Configured model {self.model} not in available list, refreshing..."
-                )
-                self.available_models = self._get_available_models()
-
-                if self.model in self.available_models:
-                    model_to_use = self.model
-                    logger.info(f"Model {self.model} found after refresh")
-                elif self.available_models:
-                    # Fallback to first available model
-                    model_to_use = self.available_models[0]
-                    logger.warning(f"Using fallback model: {model_to_use}")
-                else:
-                    logger.error("No Ollama models available")
-                    return None
-
-            # Handle thinking mode for Qwen3 models
-            final_prompt = prompt
-            use_thinking = self.enable_thinking and not disable_thinking
-
-            # For non-thinking mode, add <no_think> tag for Qwen3
-            if not use_thinking and "qwen3" in model_to_use.lower():
-                if not final_prompt.endswith(" <no_think>"):
-                    final_prompt += " <no_think>"
-
-            # Get optimal parameters for this model
-            optimal_params = get_optimal_ollama_parameters(model_to_use)
-
-            # Qwen3-specific optimal parameters based on research
-            if "qwen3" in model_to_use.lower():
-                if use_thinking:
-                    # Thinking mode: Temperature=0.6, TopP=0.95, TopK=20, PresencePenalty=1.5
-                    qwen3_temp = 0.6
-                    qwen3_top_p = 0.95
-                    qwen3_top_k = 20
-                    qwen3_presence = 1.5
-                else:
-                    # Non-thinking mode: Temperature=0.7, TopP=0.8, TopK=20, PresencePenalty=1.5
-                    qwen3_temp = 0.7
-                    qwen3_top_p = 0.8
-                    qwen3_top_k = 20
-                    qwen3_presence = 1.5
-            else:
-                qwen3_temp = temperature
-                qwen3_top_p = optimal_params.get("top_p", 0.9)
-                qwen3_top_k = optimal_params.get("top_k", 40)
-                qwen3_presence = optimal_params.get("presence_penalty", 1.0)
-
-            payload = {
-                "model": model_to_use,
-                "prompt": final_prompt,
-                "stream": use_streaming,
-                "options": {
-                    "temperature": qwen3_temp,
-                    "top_p": qwen3_top_p,
-                    "top_k": qwen3_top_k,
-                    "num_ctx": self._get_optimal_context_size(
-                        model_to_use
-                    ),  # Dynamic context based on model and config
-                    "num_predict": optimal_params.get("num_predict", 2000),
-                    "repeat_penalty": optimal_params.get("repeat_penalty", 1.1),
-                    "presence_penalty": qwen3_presence,
-                },
-            }
-
-            # Handle streaming with thinking display
-            if use_streaming:
-                return self._handle_streaming_with_thinking_display(
-                    payload, model_to_use, use_thinking, start_time, collapse_thinking
-                )
-
-            response = requests.post(
-                f"{self.ollama_url}/api/generate",
-                json=payload,
-                timeout=65,  # Slightly longer than safeguard timeout
-            )
-
-            if response.status_code == 200:
-                result = response.json()
-
-                # All models use standard response format
-                # Qwen3 thinking tokens are embedded in the response content itself as <think>...</think>
-                raw_response = result.get("response", "").strip()
-
-                # Log thinking content for Qwen3 debugging
-                if (
-                    "qwen3" in model_to_use.lower()
-                    and use_thinking
-                    and "<think>" in raw_response
-                ):
-                    thinking_start = raw_response.find("<think>")
-                    thinking_end = raw_response.find("</think>")
-                    if thinking_start != -1 and thinking_end != -1:
-                        thinking_content = raw_response[thinking_start + 7 : thinking_end]
-                        logger.info(f"Qwen3 thinking: {thinking_content[:100]}...")
-
-                # Apply safeguards to check response quality
-                if self.safeguard_detector and raw_response:
-                    is_valid, issue_type, explanation = (
-                        self.safeguard_detector.check_response_quality(
-                            raw_response,
-                            prompt[:100],
-                            start_time,  # First 100 chars of prompt for context
-                        )
-                    )
-
-                    if not is_valid:
-                        logger.warning(f"Safeguard triggered: {issue_type}")
-                        # Preserve original response but add safeguard warning
-                        return self._create_safeguard_response_with_content(
-                            issue_type, explanation, raw_response
-                        )
-
-                # Clean up thinking tags from final response
-                cleaned_response = raw_response
-                if "<think>" in cleaned_response or "</think>" in cleaned_response:
-                    # Remove thinking content but preserve the rest
-                    cleaned_response = cleaned_response.replace("<think>", "").replace(
-                        "</think>", ""
-                    )
-                    # Clean up extra whitespace that might be left
-                    lines = cleaned_response.split("\n")
-                    cleaned_lines = []
-                    for line in lines:
-                        if line.strip():  # Only keep non-empty lines
-                            cleaned_lines.append(line)
-                    cleaned_response = "\n".join(cleaned_lines)
-
-                return cleaned_response.strip()
-            else:
-                logger.error(f"Ollama API error: {response.status_code}")
-                return None
-
-        except Exception as e:
-            logger.error(f"Ollama call failed: {e}")
-            return None
-
-    def _create_safeguard_response(
-        self, issue_type: str, explanation: str, original_prompt: str
-    ) -> str:
-        """Create a helpful response when safeguards are triggered."""
-        return """⚠️ Model Response Issue Detected
-
-{explanation}
-
-**Original query context:** {original_prompt[:200]}{'...' if len(original_prompt) > 200 else ''}
-
-**What happened:** The AI model encountered a common issue with small language models and was prevented from giving a problematic response.
-
-**Your options:**
-1. **Try again**: Ask the same question (often resolves itself)
-2. **Rephrase**: Make your question more specific or break it into parts
-3. **Use exploration mode**: `rag-mini explore` for complex questions
-4. **Different approach**: Try synthesis mode: `--synthesize` for simpler responses
-
-This is normal with smaller AI models and helps ensure you get quality responses."""
-
-    def _create_safeguard_response_with_content(
-        self, issue_type: str, explanation: str, original_response: str
-    ) -> str:
-        """Create a response that preserves the original content but adds a safeguard warning."""
-
-        # For Qwen3, extract the actual response (after thinking)
-        actual_response = original_response
-        if "<think>" in original_response and "</think>" in original_response:
-            thinking_end = original_response.find("</think>")
-            if thinking_end != -1:
-                actual_response = original_response[thinking_end + 8 :].strip()
-
-        # If we have useful content, preserve it with a warning
-        if len(actual_response.strip()) > 20:
-            return """⚠️ **Response Quality Warning** ({issue_type})
-
-{explanation}
-
---
-
-**AI Response (use with caution):**
-
-{actual_response}
-
---
-
-💡 **Note**: This response may have quality issues. Consider rephrasing your question or trying exploration mode for better results."""
-        else:
-            # If content is too short or problematic, use the original safeguard response
-            return """⚠️ Model Response Issue Detected
-
-{explanation}
-
-**What happened:** The AI model encountered a common issue with small language models.
-
-**Your options:**
-1. **Try again**: Ask the same question (often resolves itself)
-2. **Rephrase**: Make your question more specific or break it into parts
-3. **Use exploration mode**: `rag-mini explore` for complex questions
-
-This is normal with smaller AI models and helps ensure you get quality responses."""
-
-    def _handle_streaming_with_thinking_display(
-        self,
-        payload: dict,
-        model_name: str,
-        use_thinking: bool,
-        start_time: float,
-        collapse_thinking: bool = True,
-    ) -> Optional[str]:
-        """Handle streaming response with real-time thinking token display."""
-        import json
-
-        try:
-            response = requests.post(
-                f"{self.ollama_url}/api/generate", json=payload, stream=True, timeout=65
-            )
-
-            if response.status_code != 200:
-                logger.error(f"Ollama API error: {response.status_code}")
-                return None
-
-            full_response = ""
-            thinking_content = ""
-            is_in_thinking = False
-            is_thinking_complete = False
-            thinking_lines_printed = 0
-
-            # ANSI escape codes for colors and cursor control
-            GRAY = "\033[90m"  # Dark gray for thinking
-            # "\033[37m"  # Light gray alternative  # Unused variable removed
-            RESET = "\033[0m"  # Reset color
-            CLEAR_LINE = "\033[2K"  # Clear entire line
-            CURSOR_UP = "\033[A"  # Move cursor up one line
-
-            print(f"\n💭 {GRAY}Thinking...{RESET}", flush=True)
-
-            for line in response.iter_lines():
-                if line:
-                    try:
-                        chunk_data = json.loads(line.decode("utf-8"))
-                        chunk_text = chunk_data.get("response", "")
-
-                        if chunk_text:
-                            full_response += chunk_text
-
-                            # Handle thinking tokens
-                            if use_thinking and "<think>" in chunk_text:
-                                is_in_thinking = True
-                                chunk_text = chunk_text.replace("<think>", "")
-
-                            if is_in_thinking and "</think>" in chunk_text:
-                                is_in_thinking = False
-                                is_thinking_complete = True
-                                chunk_text = chunk_text.replace("</think>", "")
-
-                                if collapse_thinking:
-                                    # Clear thinking content and show completion
-                                    # Move cursor up to clear thinking lines
-                                    for _ in range(thinking_lines_printed + 1):
-                                        print(
-                                            f"{CURSOR_UP}{CLEAR_LINE}",
-                                            end="",
-                                            flush=True,
-                                        )
-
-                                    print(
-                                        f"💭 {GRAY}Thinking complete ✓{RESET}",
-                                        flush=True,
-                                    )
-                                    thinking_lines_printed = 0
-                                else:
-                                    # Keep thinking visible, just show completion
-                                    print(
-                                        f"\n💭 {GRAY}Thinking complete ✓{RESET}",
-                                        flush=True,
-                                    )
-
-                                print("🤖 AI Response:", flush=True)
-                                continue
-
-                            # Display thinking content in gray with better formatting
-                            if is_in_thinking and chunk_text.strip():
-                                thinking_content += chunk_text
-
-                                # Handle line breaks and word wrapping properly
-                                if (
-                                    " " in chunk_text
-                                    or "\n" in chunk_text
-                                    or len(thinking_content) > 100
-                                ):
-                                    # Split by sentences for better readability
-                                    sentences = thinking_content.replace("\n", " ").split(". ")
-
-                                    for sentence in sentences[
-                                        :-1
-                                    ]:  # Process complete sentences
-                                        sentence = sentence.strip()
-                                        if sentence:
-                                            # Word wrap long sentences
-                                            words = sentence.split()
-                                            line = ""
-                                            for word in words:
-                                                if len(line + " " + word) > 70:
-                                                    if line:
-                                                        print(
-                                                            f"{GRAY}   {line.strip()}{RESET}",
-                                                            flush=True,
-                                                        )
-                                                        thinking_lines_printed += 1
-                                                    line = word
-                                                else:
-                                                    line += " " + word if line else word
-
-                                            if line.strip():
-                                                print(
-                                                    f"{GRAY}   {line.strip()}.{RESET}",
-                                                    flush=True,
-                                                )
-                                                thinking_lines_printed += 1
-
-                                    # Keep the last incomplete sentence for next iteration
-                                    thinking_content = sentences[-1] if sentences else ""
-
-                            # Display regular response content (skip any leftover thinking)
-                            elif (
-                                not is_in_thinking
-                                and is_thinking_complete
-                                and chunk_text.strip()
-                            ):
-                                # Filter out any remaining thinking tags that might leak through
-                                clean_text = chunk_text
-                                if "<think>" in clean_text or "</think>" in clean_text:
-                                    clean_text = clean_text.replace("<think>", "").replace(
-                                        "</think>", ""
-                                    )
-
-                                if clean_text:  # Remove .strip() here to preserve whitespace
-                                    # Preserve all formatting including newlines and spaces
-                                    print(clean_text, end="", flush=True)
-
-                        # Check if response is done
-                        if chunk_data.get("done", False):
-                            print()  # Final newline
-                            break
-
-                    except json.JSONDecodeError:
-                        continue
-                    except Exception as e:
-                        logger.error(f"Error processing stream chunk: {e}")
-                        continue
-
-            return full_response
-
-        except Exception as e:
-            logger.error(f"Streaming failed: {e}")
-            return None
-
-    def _handle_streaming_with_early_stop(
-        self, payload: dict, model_name: str, use_thinking: bool, start_time: float
-    ) -> Optional[str]:
-        """Handle streaming response with intelligent early stopping."""
-        import json
-
-        try:
-            response = requests.post(
-                f"{self.ollama_url}/api/generate", json=payload, stream=True, timeout=65
-            )
-
-            if response.status_code != 200:
-                logger.error(f"Ollama API error: {response.status_code}")
-                return None
-
-            full_response = ""
-            word_buffer = []
-            repetition_window = 30  # Check last 30 words for repetition (more context)
-            stop_threshold = (
-                0.8  # Stop only if 80% of recent words are repetitive (very permissive)
-            )
-            min_response_length = 100  # Don't early stop until we have at least 100 chars
-
-            for line in response.iter_lines():
-                if line:
-                    try:
-                        chunk_data = json.loads(line.decode("utf-8"))
-                        chunk_text = chunk_data.get("response", "")
-
-                        if chunk_text:
-                            full_response += chunk_text
-
-                            # Add words to buffer for repetition detection
-                            new_words = chunk_text.split()
-                            word_buffer.extend(new_words)
-
-                            # Keep only recent words in buffer
-                            if len(word_buffer) > repetition_window:
-                                word_buffer = word_buffer[-repetition_window:]
-
-                            # Check for repetition patterns after we have enough words AND content
-                            if (
-                                len(word_buffer) >= repetition_window
-                                and len(full_response) >= min_response_length
-                            ):
-                                unique_words = set(word_buffer)
-                                repetition_ratio = 1 - (len(unique_words) / len(word_buffer))
-
-                                # Early stop only if repetition is EXTREMELY high (80%+)
-                                if repetition_ratio > stop_threshold:
-                                    logger.info(
-                                        f"Early stopping due to repetition: {repetition_ratio:.2f}"
-                                    )
-
-                                    # Add a gentle completion to the response
-                                    if not full_response.strip().endswith((".", "!", "?")):
-                                        full_response += "..."
-
-                                    # Send stop signal to model (attempt to gracefully stop)
-                                    try:
-                                        stop_payload = {
-                                            "model": model_name,
-                                            "stop": True,
-                                        }
-                                        requests.post(
-                                            f"{self.ollama_url}/api/generate",
-                                            json=stop_payload,
-                                            timeout=2,
-                                        )
-                                    except (
-                                        ConnectionError,
-                                        FileNotFoundError,
-                                        IOError,
-                                        OSError,
-                                        TimeoutError,
-                                        requests.RequestException,
-                                    ):
-                                        pass  # If stop fails, we already have partial response
-
-                                    break
-
-                        if chunk_data.get("done", False):
-                            break
-
-                    except json.JSONDecodeError:
-                        continue
-
-            # Clean up thinking tags from final response
-            cleaned_response = full_response
-            if "<think>" in cleaned_response or "</think>" in cleaned_response:
-                # Remove thinking content but preserve the rest
-                cleaned_response = cleaned_response.replace("<think>", "").replace(
-                    "</think>", ""
-                )
-                # Clean up extra whitespace that might be left
-                lines = cleaned_response.split("\n")
-                cleaned_lines = []
-                for line in lines:
-                    if line.strip():  # Only keep non-empty lines
-                        cleaned_lines.append(line)
-                cleaned_response = "\n".join(cleaned_lines)
-
-            return cleaned_response.strip()
-
-        except Exception as e:
-            logger.error(f"Streaming with early stop failed: {e}")
-            return None
-
-    def synthesize_search_results(
-        self, query: str, results: List[Any], project_path: Path
-    ) -> SynthesisResult:
-        """Synthesize search results into a coherent summary."""
-
-        self._ensure_initialized()
-        if not self.is_available():
-            return SynthesisResult(
-                summary="LLM synthesis unavailable (Ollama not running or no models)",
-                key_points=[],
-                code_examples=[],
-                suggested_actions=["Install and run Ollama with a model"],
-                confidence=0.0,
-            )
-
-        # Prepare context from search results
-        context_parts = []
-        for i, result in enumerate(results[:8], 1):  # Limit to top 8 results
-            # result.file_path if hasattr(result, "file_path") else "unknown"  # Unused variable removed
-            # result.content if hasattr(result, "content") else str(result)  # Unused variable removed
-            # result.score if hasattr(result, "score") else 0.0  # Unused variable removed
-
-            context_parts.append(
-                """
-Result {i} (Score: {score:.3f}):
-File: {file_path}
-Content: {content[:500]}{'...' if len(content) > 500 else ''}
-"""
-            )
-
-        # "\n".join(context_parts)  # Unused variable removed
-
-        # Get system context for better responses
-        # get_system_context(project_path)  # Unused variable removed
-
-        # Create synthesis prompt with system context
-        prompt = """You are a senior software engineer analyzing code search results. Your task is to synthesize the search results into a helpful, actionable summary.
-
-SYSTEM CONTEXT: {system_context}
-SEARCH QUERY: "{query}"
-PROJECT: {project_path.name}
-
-SEARCH RESULTS:
-{context}
-
-Please provide a synthesis in the following JSON format:
-{{
-    "summary": "A 2-3 sentence overview of what the search results show",
-    "key_points": [
-        "Important finding 1",
-        "Important finding 2",
-        "Important finding 3"
-    ],
-    "code_examples": [
-        "Relevant code snippet or pattern from the results",
-        "Another important code example"
-    ],
-    "suggested_actions": [
-        "What the developer should do next",
-        "Additional recommendations"
-    ],
-    "confidence": 0.85
-}}
-
-Focus on:
- What the code does and how it works
- Patterns and relationships between the results
- Practical next steps for the developer
- Code quality observations
-
-Respond with ONLY the JSON, no other text."""
-
-        # Get LLM response
-        response = self._call_ollama(prompt, temperature=0.2)
-
-        if not response:
-            return SynthesisResult(
-                summary="LLM synthesis failed (API error)",
-                key_points=[],
-                code_examples=[],
-                suggested_actions=["Check Ollama status and try again"],
-                confidence=0.0,
-            )
-
-        # Parse JSON response
-        try:
-            # Extract JSON from response (in case there's extra text)
-            start_idx = response.find("{")
-            end_idx = response.rfind("}") + 1
-            if start_idx >= 0 and end_idx > start_idx:
-                json_str = response[start_idx:end_idx]
-                data = json.loads(json_str)
-
-                return SynthesisResult(
-                    summary=data.get("summary", "No summary generated"),
-                    key_points=data.get("key_points", []),
-                    code_examples=data.get("code_examples", []),
-                    suggested_actions=data.get("suggested_actions", []),
-                    confidence=float(data.get("confidence", 0.5)),
-                )
-            else:
-                # Fallback: use the raw response as summary
-                return SynthesisResult(
-                    summary=response[:300] + "..." if len(response) > 300 else response,
-                    key_points=[],
-                    code_examples=[],
-                    suggested_actions=[],
-                    confidence=0.3,
-                )
-
-        except Exception as e:
-            logger.error(f"Failed to parse LLM response: {e}")
-            return SynthesisResult(
-                summary="LLM synthesis failed (JSON parsing error)",
-                key_points=[],
-                code_examples=[],
-                suggested_actions=["Try the search again or check LLM output"],
-                confidence=0.0,
-            )
-
-    def format_synthesis_output(self, synthesis: SynthesisResult, query: str) -> str:
-        """Format synthesis result for display."""
-
-        output = []
-        output.append("🧠 LLM SYNTHESIS")
-        output.append("=" * 50)
-        output.append("")
-
-        output.append("📝 Summary:")
-        output.append(f"   {synthesis.summary}")
-        output.append("")
-
-        if synthesis.key_points:
-            output.append("🔍 Key Findings:")
-            for point in synthesis.key_points:
-                output.append(f"   • {point}")
-            output.append("")
-
-        if synthesis.code_examples:
-            output.append("💡 Code Patterns:")
-            for example in synthesis.code_examples:
-                output.append(f"   {example}")
-            output.append("")
-
-        if synthesis.suggested_actions:
-            output.append("🎯 Suggested Actions:")
-            for action in synthesis.suggested_actions:
-                output.append(f"   • {action}")
-            output.append("")
-
-        confidence_emoji = (
-            "🟢"
-            if synthesis.confidence > 0.7
-            else "🟡" if synthesis.confidence > 0.4 else "🔴"
-        )
-        output.append(f"{confidence_emoji} Confidence: {synthesis.confidence:.1%}")
-        output.append("")
-
-        return "\n".join(output)
-
-
-# Quick test function
-
-
-def test_synthesizer():
-    """Test the synthesizer with sample data."""
-    from dataclasses import dataclass
-
-    @dataclass
-    class MockResult:
-        file_path: str
-        content: str
-        score: float
-
-    synthesizer = LLMSynthesizer()
-
-    if not synthesizer.is_available():
-        print("❌ Ollama not available for testing")
-        return
-
-    # Mock search results
-    results = [
-        MockResult(
-            "auth.py",
-            "def authenticate_user(username, password):\n    return verify_credentials(username, password)",
-            0.95,
-        ),
-        MockResult(
-            "models.py",
-            "class User:\n    def login(self):\n        return authenticate_user(self.username, self.password)",
-            0.87,
-        ),
-    ]
-
-    synthesis = synthesizer.synthesize_search_results(
-        "user authentication", results, Path("/test/project")
-    )
-
-    print(synthesizer.format_synthesis_output(synthesis, "user authentication"))
-
-
-if __name__ == "__main__":
-    test_synthesizer()
--- a/Show More
+++ b/Show More