126 changed files with 5785 additions and 22066 deletions
--- a/.flake8
+++ b/.flake8
@ -1,19 +0,0 @@
 [flake8]
 # Professional Python code style - balances quality with readability
 max-line-length = 95
 extend-ignore = E203,W503,W605
 exclude = 
    .venv,
    .venv-linting,
    __pycache__,
    *.egg-info,
    .git,
    build,
    dist,
    .mini-rag
 # Per-file ignores for practical development
 per-file-ignores =
    tests/*.py:F401,F841
    examples/*.py:F401,F841
    fix_*.py:F401,F841,E501
--- a/.github/workflows/build-and-release.yml
+++ b/.github/workflows/build-and-release.yml
@ -1,254 +0,0 @@
 name: Build and Release
 on:
  push:
    tags:
      - 'v*'
    branches:
      - main
  pull_request:
    branches:
      - main
  workflow_dispatch:
 jobs:
  build-wheels:
    name: Build wheels on ${{ matrix.os }}
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest, windows-latest, macos-13, macos-14]
    steps:
    - uses: actions/checkout@v4
    - name: Set up Python
      uses: actions/setup-python@v4
      with:
        python-version: '3.11'
    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        python -m pip install build twine cibuildwheel
    - name: Build wheels
      uses: pypa/cibuildwheel@v2.16
      env:
        CIBW_BUILD: "cp38-* cp39-* cp310-* cp311-* cp312-*"
        CIBW_SKIP: "pp* *musllinux* *i686* *win32*"
        CIBW_ARCHS_MACOS: "x86_64 arm64"
        CIBW_ARCHS_LINUX: "x86_64"
        CIBW_ARCHS_WINDOWS: "AMD64"
        CIBW_TEST_COMMAND: "rag-mini --help"
        CIBW_TEST_SKIP: "*arm64*"  # Skip tests on arm64 due to emulation issues
    - name: Build source distribution
      if: matrix.os == 'ubuntu-latest'
      run: python -m build --sdist
    - name: Upload wheels
      uses: actions/upload-artifact@v4
      with:
        name: wheels-${{ matrix.os }}
        path: ./wheelhouse/*.whl
    - name: Upload source distribution
      if: matrix.os == 'ubuntu-latest'
      uses: actions/upload-artifact@v4
      with:
        name: sdist
        path: ./dist/*.tar.gz
  build-zipapp:
    name: Build zipapp (.pyz)
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v4
    - name: Set up Python
      uses: actions/setup-python@v4
      with:
        python-version: '3.11'
    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        python -m pip install -r requirements.txt
    - name: Build zipapp
      run: python scripts/build_pyz.py
    - name: Upload zipapp
      uses: actions/upload-artifact@v4
      with:
        name: zipapp
        path: dist/rag-mini.pyz
  test-installation:
    name: Test installation methods
    needs: [build-wheels, build-zipapp]
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest, windows-latest, macos-latest]
        python-version: ['3.8', '3.11', '3.12']
        exclude:
          # Reduce test matrix size
          - os: windows-latest
            python-version: '3.8'
          - os: macos-latest
            python-version: '3.8'
    steps:
    - uses: actions/checkout@v4
    - name: Set up Python
      uses: actions/setup-python@v4
      with:
        python-version: ${{ matrix.python-version }}
    - name: Download wheels
      uses: actions/download-artifact@v4
      with:
        name: wheels-${{ matrix.os }}
        path: ./wheelhouse/
    - name: Test wheel installation
      shell: bash
      run: |
        # Find the appropriate wheel for this OS and Python version
        wheel_file=$(ls wheelhouse/*.whl | head -1)
        echo "Testing wheel: $wheel_file"
        # Install the wheel
        python -m pip install "$wheel_file"
        # Test the command
        rag-mini --help
        echo "✅ Wheel installation test passed"
    - name: Download zipapp (Ubuntu only)
      if: matrix.os == 'ubuntu-latest'
      uses: actions/download-artifact@v4
      with:
        name: zipapp
        path: ./
    - name: Test zipapp (Ubuntu only)
      if: matrix.os == 'ubuntu-latest'
      run: |
        python rag-mini.pyz --help
        echo "✅ Zipapp test passed"
  publish:
    name: Publish to PyPI
    needs: [build-wheels, test-installation]
    runs-on: ubuntu-latest
    if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
    environment: release
    steps:
    - name: Download all artifacts
      uses: actions/download-artifact@v4
    - name: Prepare distribution files
      run: |
        mkdir -p dist/
        cp wheels-*/**.whl dist/
        cp sdist/*.tar.gz dist/
        ls -la dist/
    - name: Publish to PyPI
      uses: pypa/gh-action-pypi-publish@release/v1
      with:
        password: ${{ secrets.PYPI_API_TOKEN }}
        skip-existing: true
  create-release:
    name: Create GitHub Release
    needs: [build-wheels, build-zipapp, test-installation]
    runs-on: ubuntu-latest
    if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
    steps:
    - uses: actions/checkout@v4
      with:
        fetch-depth: 0
    - name: Download all artifacts
      uses: actions/download-artifact@v4
    - name: Prepare release assets
      run: |
        mkdir -p release-assets/
        # Copy zipapp
        cp rag-mini.pyz release-assets/
        # Copy a few representative wheels
        cp wheels-ubuntu-latest/*cp311*x86_64*.whl release-assets/ || true
        cp wheels-windows-latest/*cp311*amd64*.whl release-assets/ || true
        cp wheels-macos-*/*cp311*x86_64*.whl release-assets/ || true
        cp wheels-macos-*/*cp311*arm64*.whl release-assets/ || true
        # Copy source distribution
        cp sdist/*.tar.gz release-assets/
        ls -la release-assets/
    - name: Generate changelog
      id: changelog
      run: |
        # Simple changelog generation - you might want to use a dedicated action
        echo "## Changes" > CHANGELOG.md
        git log $(git describe --tags --abbrev=0 HEAD^)..HEAD --pretty=format:"- %s" >> CHANGELOG.md
        echo "CHANGELOG<<EOF" >> $GITHUB_OUTPUT
        cat CHANGELOG.md >> $GITHUB_OUTPUT
        echo "EOF" >> $GITHUB_OUTPUT
    - name: Create Release
      uses: softprops/action-gh-release@v1
      with:
        files: release-assets/*
        body: |
          ## Installation Options
          ### 🚀 One-line installers (Recommended)
          **Linux/macOS:**
          ```bash
          curl -fsSL https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.sh | bash
          ```
          **Windows PowerShell:**
          ```powershell
          iwr https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.ps1 -UseBasicParsing | iex
          ```
          ### 📦 Manual installation
          **With uv (fastest):**
          ```bash
          uv tool install fss-mini-rag
          ```
          **With pipx:**
          ```bash
          pipx install fss-mini-rag
          ```
          **With pip:**
          ```bash
          pip install --user fss-mini-rag
          ```
          **Single file (no Python knowledge needed):**
          Download `rag-mini.pyz` and run with `python rag-mini.pyz`
          ${{ steps.changelog.outputs.CHANGELOG }}
        draft: false
        prerelease: false
      env:
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -1,196 +0,0 @@
 name: CI/CD Pipeline
 on:
  push:
    branches: [ main, develop ]
  pull_request:
    branches: [ main ]
 jobs:
  test:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, windows-latest]
        python-version: ["3.10", "3.11", "3.12"]
    steps:
    - name: Checkout code
      uses: actions/checkout@v4
    - name: Set up Python ${{ matrix.python-version }}
      uses: actions/setup-python@v5
      with:
        python-version: ${{ matrix.python-version }}
    - name: Cache dependencies
      uses: actions/cache@v4
      with:
        path: |
          ~/.cache/pip
          ~/.local/share/virtualenvs
        key: ${{ runner.os }}-python-${{ matrix.python-version }}-${{ hashFiles('**/requirements.txt') }}
        restore-keys: |
          ${{ runner.os }}-python-${{ matrix.python-version }}-
    - name: Create virtual environment
      run: |
        python -m venv .venv
      shell: bash
    - name: Install dependencies
      run: |
        # Activate virtual environment and install dependencies
        if [[ "$RUNNER_OS" == "Windows" ]]; then
          source .venv/Scripts/activate
        else
          source .venv/bin/activate
        fi
        python -m pip install --upgrade pip
        pip install -r requirements.txt
      shell: bash
    - name: Run comprehensive tests
      run: |
        # Set OS-appropriate emojis and activate venv
        if [[ "$RUNNER_OS" == "Windows" ]]; then
          source .venv/Scripts/activate
          OK="[OK]"
          SKIP="[SKIP]"
        else
          source .venv/bin/activate
          OK="✅"
          SKIP="⚠️"
        fi
        echo "$OK Virtual environment activated"
        # Run basic import tests
        python -c "from mini_rag import CodeEmbedder, ProjectIndexer, CodeSearcher; print('$OK Core imports successful')"
        # Run the actual test suite
        if [ -f "tests/test_fixes.py" ]; then
          echo "$OK Running comprehensive test suite..."
          python tests/test_fixes.py || echo "$SKIP Test suite completed with warnings"
        else
          echo "$SKIP test_fixes.py not found, running basic tests only"
        fi
        # Test config system with proper venv
        python -c "
        import os
        ok_emoji = '$OK' if os.name != 'nt' else '[OK]'
        try:
            from mini_rag.config import ConfigManager
            import tempfile
            with tempfile.TemporaryDirectory() as tmpdir:
                config_manager = ConfigManager(tmpdir)
                config = config_manager.load_config()
                print(f'{ok_emoji} Config system works with proper dependencies')
        except Exception as e:
            print(f'Error in config test: {e}')
            raise
        "
        echo "$OK All tests completed successfully"
      shell: bash
    - name: Test auto-update system
      run: |
        # Set OS-appropriate emojis
        if [[ "$RUNNER_OS" == "Windows" ]]; then
          OK="[OK]"
          SKIP="[SKIP]"
        else
          OK="✅"
          SKIP="⚠️"
        fi
        python -c "
        import os
        ok_emoji = '$OK' if os.name != 'nt' else '[OK]'
        skip_emoji = '$SKIP' if os.name != 'nt' else '[SKIP]'
        try:
            from mini_rag.updater import UpdateChecker
            updater = UpdateChecker()
            print(f'{ok_emoji} Auto-update system available')
        except ImportError:
            print(f'{skip_emoji} Auto-update system not available (legacy version)')
        "
      shell: bash
    - name: Test CLI commands
      run: |
        # Set OS-appropriate emojis
        if [[ "$RUNNER_OS" == "Windows" ]]; then
          OK="[OK]"
        else
          OK="✅"
        fi
        echo "$OK Checking for CLI files..."
        ls -la rag* || dir rag* || echo "CLI files may not be present"
        echo "$OK CLI check completed - this is expected in CI environment"
      shell: bash
  security-scan:
    runs-on: ubuntu-latest
    steps:
    - name: Checkout code
      uses: actions/checkout@v4
    - name: Set up Python
      uses: actions/setup-python@v5
      with:
        python-version: '3.11'
    - name: Install security tools
      run: |
        pip install bandit || echo "Failed to install bandit"
    - name: Run security scan
      run: |
        # Scan for security issues (non-failing)
        bandit -r . -ll || echo "✅ Security scan completed"
  auto-update-check:
    runs-on: ubuntu-latest
    if: github.event_name == 'push' && github.ref == 'refs/heads/main'
    steps:
    - name: Checkout code
      uses: actions/checkout@v4
    - name: Set up Python
      uses: actions/setup-python@v5
      with:
        python-version: '3.11'
    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        pip install -r requirements.txt
    - name: Check for auto-update system
      run: |
        if [ -f "mini_rag/updater.py" ]; then
          echo "✅ Auto-update system present"
          echo "UPDATE_AVAILABLE=true" >> $GITHUB_ENV
        else
          echo "⚠️ No auto-update system found"
          echo "UPDATE_AVAILABLE=false" >> $GITHUB_ENV
        fi
    - name: Validate update system
      if: env.UPDATE_AVAILABLE == 'true'
      run: |
        python -c "
        try:
            from mini_rag.updater import UpdateChecker
            updater = UpdateChecker()
            print(f'✅ Update system configured for: {updater.github_api_url}')
            print(f'✅ Check frequency: {updater.check_frequency_hours} hours')
        except Exception as e:
            print(f'⚠️ Update system validation skipped: {e}')
        "
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@ -1,127 +0,0 @@
 name: Auto Release & Update System
 on:
  push:
    tags:
      - 'v*'
  workflow_dispatch:
    inputs:
      version:
        description: 'Version to release (e.g., v1.2.3)'
        required: true
        type: string
 jobs:
  create-release:
    runs-on: ubuntu-latest
    permissions:
      contents: write
    steps:
    - name: Checkout code
      uses: actions/checkout@v4
      with:
        fetch-depth: 0
    - name: Set up Python
      uses: actions/setup-python@v5
      with:
        python-version: '3.11'
    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        pip install build twine
    - name: Extract version
      id: version
      run: |
        if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
          VERSION="${{ github.event.inputs.version }}"
        else
          VERSION=${GITHUB_REF#refs/tags/}
        fi
        echo "version=$VERSION" >> $GITHUB_OUTPUT
        echo "clean_version=${VERSION#v}" >> $GITHUB_OUTPUT
    - name: Update version in code
      run: |
        VERSION="${{ steps.version.outputs.clean_version }}"
        # Update __init__.py version
        if [ -f "mini_rag/__init__.py" ]; then
          sed -i "s/__version__ = \".*\"/__version__ = \"$VERSION\"/" mini_rag/__init__.py
        fi
        # Update any setup.py or pyproject.toml if they exist
        if [ -f "setup.py" ]; then
          sed -i "s/version=\".*\"/version=\"$VERSION\"/" setup.py
        fi
    - name: Generate release notes
      id: release_notes
      run: |
        VERSION="${{ steps.version.outputs.version }}"
        # Get commits since last tag
        LAST_TAG=$(git describe --tags --abbrev=0 HEAD~1 2>/dev/null || echo "")
        if [ -n "$LAST_TAG" ]; then
          COMMITS=$(git log --oneline $LAST_TAG..HEAD --pretty=format:"• %s")
        else
          COMMITS=$(git log --oneline --pretty=format:"• %s" | head -10)
        fi
        # Create release notes
        cat > release_notes.md << EOF
        ## What's New in $VERSION
        ### 🚀 Changes
        $COMMITS
        ### 📥 Installation
        **Quick Install:**
        \`\`\`bash
        # Download and run installer
        curl -sSL https://github.com/${{ github.repository }}/releases/latest/download/install.sh | bash
        \`\`\`
        **Manual Install:**
        \`\`\`bash
        # Download source
        wget https://github.com/${{ github.repository }}/archive/refs/tags/$VERSION.zip
        unzip $VERSION.zip
        cd *-${VERSION#v}
        ./install_mini_rag.sh
        \`\`\`
        ### 🔄 Auto-Update
        If you have a previous version with auto-update support:
        \`\`\`bash
        ./rag-mini check-update
        ./rag-mini update
        \`\`\`
        ---
        🤖 **Auto-Update System**: This release includes automatic update checking.
        Users will be notified of future updates and can install them with one command!
        EOF
        echo "notes_file=release_notes.md" >> $GITHUB_OUTPUT
    - name: Create GitHub Release
      uses: softprops/action-gh-release@v2
      with:
        tag_name: ${{ steps.version.outputs.version }}
        name: Release ${{ steps.version.outputs.version }}
        body_path: release_notes.md
        draft: false
        prerelease: false
        files: |
          *.sh
          *.bat
          requirements.txt
    - name: Trigger update notifications
      run: |
        echo "🎉 Release ${{ steps.version.outputs.version }} created!"
        echo "📢 Users with auto-update will be notified within 24 hours"
        echo "🔄 They can update with: ./rag-mini update"
--- a/.github/workflows/template-sync.yml
+++ b/.github/workflows/template-sync.yml
@ -1,156 +0,0 @@
 name: Template Synchronization
 on:
  schedule:
    # Run weekly on Sundays at 2 AM UTC
    - cron: '0 2 * * 0'
  workflow_dispatch:
    inputs:
      force_sync:
        description: 'Force sync even if no changes detected'
        required: false
        type: boolean
        default: false
 jobs:
  sync-template:
    runs-on: ubuntu-latest
    permissions:
      contents: write
      pull-requests: write
    steps:
    - name: Checkout current repository
      uses: actions/checkout@v4
      with:
        token: ${{ secrets.GITHUB_TOKEN }}
        fetch-depth: 0
    - name: Check if repository was created from template
      id: template_check
      run: |
        # Check if this repo has template metadata
        TEMPLATE_REPO=$(gh api repos/${{ github.repository }} --jq '.template_repository.full_name' 2>/dev/null || echo "")
        if [ -n "$TEMPLATE_REPO" ]; then
          echo "template_repo=$TEMPLATE_REPO" >> $GITHUB_OUTPUT
          echo "is_template_derived=true" >> $GITHUB_OUTPUT
          echo "✅ Repository created from template: $TEMPLATE_REPO"
        else
          echo "is_template_derived=false" >> $GITHUB_OUTPUT
          echo "ℹ️ Repository not created from template"
        fi
      env:
        GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
    - name: Fetch template updates
      if: steps.template_check.outputs.is_template_derived == 'true'
      id: fetch_updates
      run: |
        TEMPLATE_REPO="${{ steps.template_check.outputs.template_repo }}"
        # Add template as remote
        git remote add template https://github.com/$TEMPLATE_REPO.git || true
        git fetch template main
        # Check for changes in template files
        TEMPLATE_FILES=$(git diff --name-only HEAD template/main -- .github/ scripts/ | head -20)
        if [ -n "$TEMPLATE_FILES" ] || [ "${{ github.event.inputs.force_sync }}" = "true" ]; then
          echo "updates_available=true" >> $GITHUB_OUTPUT
          echo "template_files<<EOF" >> $GITHUB_OUTPUT
          echo "$TEMPLATE_FILES" >> $GITHUB_OUTPUT
          echo "EOF" >> $GITHUB_OUTPUT
          echo "🔄 Template updates available"
        else
          echo "updates_available=false" >> $GITHUB_OUTPUT
          echo "✅ No template updates needed"
        fi
    - name: Create update branch
      if: steps.fetch_updates.outputs.updates_available == 'true'
      run: |
        BRANCH_NAME="template-sync-$(date +%Y%m%d-%H%M%S)"
        echo "sync_branch=$BRANCH_NAME" >> $GITHUB_ENV
        git checkout -b $BRANCH_NAME
        # Merge template changes for specific directories only
        git checkout template/main -- .github/workflows/ || true
        git checkout template/main -- scripts/ || true
        # Don't overwrite project-specific files
        git reset HEAD -- .github/workflows/template-sync.yml || true
        git checkout HEAD -- .github/workflows/template-sync.yml || true
    - name: Commit template updates
      if: steps.fetch_updates.outputs.updates_available == 'true'
      run: |
        git config user.name "Template Sync Bot"
        git config user.email "noreply@github.com"
        if git diff --cached --quiet; then
          echo "No changes to commit"
        else
          git commit -m "🔄 Sync template updates
          Updated files:
          ${{ steps.fetch_updates.outputs.template_files }}
          Source: ${{ steps.template_check.outputs.template_repo }}
          Sync date: $(date -u +'%Y-%m-%d %H:%M:%S UTC')
          This is an automated template synchronization.
          Review changes before merging."
          git push origin ${{ env.sync_branch }}
        fi
    - name: Create pull request
      if: steps.fetch_updates.outputs.updates_available == 'true'
      run: |
        gh pr create \
          --title "🔄 Template Updates Available" \
          --body "## Template Synchronization
        This PR contains updates from the template repository.
        ### 📋 Changed Files:
        \`\`\`
        ${{ steps.fetch_updates.outputs.template_files }}
        \`\`\`
        ### 📊 What's Updated:
        - GitHub Actions workflows
        - Project scripts and automation
        - Template-specific configurations
        ### ⚠️ Review Notes:
        - **Carefully review** all changes before merging
        - **Test workflows** in a branch if needed
        - **Preserve** any project-specific customizations
        - **Check** that auto-update system still works
        ### 🔗 Source:
        Template: [${{ steps.template_check.outputs.template_repo }}](https://github.com/${{ steps.template_check.outputs.template_repo }})
        Sync Date: $(date -u +'%Y-%m-%d %H:%M:%S UTC')
        ---
        🤖 This is an automated template synchronization. Review carefully before merging!" \
          --head "${{ env.sync_branch }}" \
          --base main \
          --label "template-sync,automation"
      env:
        GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
    - name: Summary
      run: |
        if [ "${{ steps.template_check.outputs.is_template_derived }}" = "true" ]; then
          if [ "${{ steps.fetch_updates.outputs.updates_available }}" = "true" ]; then
            echo "🎉 Template sync completed - PR created for review"
          else
            echo "✅ Template is up to date - no action needed"
          fi
        else
          echo "ℹ️ Repository not created from template - skipping sync"
        fi
--- a/.gitignore
+++ b/.gitignore
@ -41,14 +41,10 @@ Thumbs.db
 # RAG system specific
 .claude-rag/
 .mini-rag/
 *.lance/
 *.db
 manifest.json
 # Claude Code specific
 .claude/
 # Logs and temporary files
 *.log
 *.tmp
@ -74,8 +70,6 @@ config.local.yml
 test_output/
 temp_test_*/
 .test_*
 test_environments/
 test_results_*.json
 # Backup files
 *.bak
@ -108,12 +102,3 @@ dmypy.json
 # Project specific ignores
 REPOSITORY_SUMMARY.md
 # Analysis and scanning results (should not be committed)
 docs/live-analysis/
 docs/analysis-history/
 **/live-analysis/
 **/analysis-history/
 *.analysis.json
 *.analysis.html
 **/analysis_*/
--- a/.mini-rag/config.yaml
+++ b/.mini-rag/config.yaml
@ -1,66 +0,0 @@
 # FSS-Mini-RAG Configuration
 # 
 # 🔧 EDIT THIS FILE TO CUSTOMIZE YOUR RAG SYSTEM
 # 
 # This file controls all behavior of your Mini-RAG system.
 # Changes take effect immediately - no restart needed!
 # 
 # 💡 IMPORTANT: To change the AI model, edit the 'synthesis_model' line below
 # 
 # Common model options:
 #   synthesis_model: auto              # Let system choose best available
 #   synthesis_model: qwen3:0.6b        # Ultra-fast (500MB)
 #   synthesis_model: qwen3:1.7b        # Balanced (1.4GB) - recommended
 #   synthesis_model: qwen3:4b          # High quality (2.5GB)
 #
 # See docs/GETTING_STARTED.md for detailed explanations
 # Text chunking settings
 chunking:
  max_size: 2000      # Maximum characters per chunk
  min_size: 150       # Minimum characters per chunk
  strategy: semantic    # 'semantic' (language-aware) or 'fixed'
 # Large file streaming settings
 streaming:
  enabled: true
  threshold_bytes: 1048576  # Files larger than this use streaming (1MB)
 # File processing settings
 files:
  min_file_size: 50        # Skip files smaller than this
  exclude_patterns:
    - "node_modules/**"
    - ".git/**"
    - "__pycache__/**"
    - "*.pyc"
    - ".venv/**"
    - "venv/**"
    - "build/**"
    - "dist/**"
  include_patterns:
    - "**/*"                  # Include all files by default
 # Embedding generation settings
 embedding:
  preferred_method: ollama     # 'ollama', 'ml', 'hash', or 'auto'
  ollama_model: nomic-embed-text
  ollama_host: localhost:11434
  ml_model: sentence-transformers/all-MiniLM-L6-v2
  batch_size: 32               # Embeddings processed per batch
 # Search behavior settings
 search:
  default_top_k: 10           # Default number of top results
  enable_bm25: true             # Enable keyword matching boost
  similarity_threshold: 0.1        # Minimum similarity score
  expand_queries: false          # Enable automatic query expansion
 # LLM synthesis and query expansion settings
 llm:
  ollama_host: localhost:11434
  synthesis_model: qwen3:1.7b    # 'auto', 'qwen3:1.7b', etc.
  expansion_model: auto     # Usually same as synthesis_model
  max_expansion_terms: 8        # Maximum terms to add to queries
  enable_synthesis: false       # Enable synthesis by default
  synthesis_temperature: 0.3      # LLM temperature for analysis
--- a/.mini-rag/last_search
+++ b/.mini-rag/last_search
@ -1 +0,0 @@
 test
--- a/.venv-linting/bin/Activate.ps1
+++ b/.venv-linting/bin/Activate.ps1
@ -1,247 +0,0 @@
 <#
 .Synopsis
 Activate a Python virtual environment for the current PowerShell session.
 .Description
 Pushes the python executable for a virtual environment to the front of the
 $Env:PATH environment variable and sets the prompt to signify that you are
 in a Python virtual environment. Makes use of the command line switches as
 well as the `pyvenv.cfg` file values present in the virtual environment.
 .Parameter VenvDir
 Path to the directory that contains the virtual environment to activate. The
 default value for this is the parent of the directory that the Activate.ps1
 script is located within.
 .Parameter Prompt
 The prompt prefix to display when this virtual environment is activated. By
 default, this prompt is the name of the virtual environment folder (VenvDir)
 surrounded by parentheses and followed by a single space (ie. '(.venv) ').
 .Example
 Activate.ps1
 Activates the Python virtual environment that contains the Activate.ps1 script.
 .Example
 Activate.ps1 -Verbose
 Activates the Python virtual environment that contains the Activate.ps1 script,
 and shows extra information about the activation as it executes.
 .Example
 Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
 Activates the Python virtual environment located in the specified location.
 .Example
 Activate.ps1 -Prompt "MyPython"
 Activates the Python virtual environment that contains the Activate.ps1 script,
 and prefixes the current prompt with the specified string (surrounded in
 parentheses) while the virtual environment is active.
 .Notes
 On Windows, it may be required to enable this Activate.ps1 script by setting the
 execution policy for the user. You can do this by issuing the following PowerShell
 command:
 PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
 For more information on Execution Policies: 
 https://go.microsoft.com/fwlink/?LinkID=135170
 #>
 Param(
    [Parameter(Mandatory = $false)]
    [String]
    $VenvDir,
    [Parameter(Mandatory = $false)]
    [String]
    $Prompt
 )
 <# Function declarations --------------------------------------------------- #>
 <#
 .Synopsis
 Remove all shell session elements added by the Activate script, including the
 addition of the virtual environment's Python executable from the beginning of
 the PATH variable.
 .Parameter NonDestructive
 If present, do not remove this function from the global namespace for the
 session.
 #>
 function global:deactivate ([switch]$NonDestructive) {
    # Revert to original values
    # The prior prompt:
    if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
        Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
        Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
    }
    # The prior PYTHONHOME:
    if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
        Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
        Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
    }
    # The prior PATH:
    if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
        Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
        Remove-Item -Path Env:_OLD_VIRTUAL_PATH
    }
    # Just remove the VIRTUAL_ENV altogether:
    if (Test-Path -Path Env:VIRTUAL_ENV) {
        Remove-Item -Path env:VIRTUAL_ENV
    }
    # Just remove VIRTUAL_ENV_PROMPT altogether.
    if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) {
        Remove-Item -Path env:VIRTUAL_ENV_PROMPT
    }
    # Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
    if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
        Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
    }
    # Leave deactivate function in the global namespace if requested:
    if (-not $NonDestructive) {
        Remove-Item -Path function:deactivate
    }
 }
 <#
 .Description
 Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
 given folder, and returns them in a map.
 For each line in the pyvenv.cfg file, if that line can be parsed into exactly
 two strings separated by `=` (with any amount of whitespace surrounding the =)
 then it is considered a `key = value` line. The left hand string is the key,
 the right hand is the value.
 If the value starts with a `'` or a `"` then the first and last character is
 stripped from the value before being captured.
 .Parameter ConfigDir
 Path to the directory that contains the `pyvenv.cfg` file.
 #>
 function Get-PyVenvConfig(
    [String]
    $ConfigDir
 ) {
    Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
    # Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
    $pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
    # An empty map will be returned if no config file is found.
    $pyvenvConfig = @{ }
    if ($pyvenvConfigPath) {
        Write-Verbose "File exists, parse `key = value` lines"
        $pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
        $pyvenvConfigContent | ForEach-Object {
            $keyval = $PSItem -split "\s*=\s*", 2
            if ($keyval[0] -and $keyval[1]) {
                $val = $keyval[1]
                # Remove extraneous quotations around a string value.
                if ("'""".Contains($val.Substring(0, 1))) {
                    $val = $val.Substring(1, $val.Length - 2)
                }
                $pyvenvConfig[$keyval[0]] = $val
                Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
            }
        }
    }
    return $pyvenvConfig
 }
 <# Begin Activate script --------------------------------------------------- #>
 # Determine the containing directory of this script
 $VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
 $VenvExecDir = Get-Item -Path $VenvExecPath
 Write-Verbose "Activation script is located in path: '$VenvExecPath'"
 Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
 Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
 # Set values required in priority: CmdLine, ConfigFile, Default
 # First, get the location of the virtual environment, it might not be
 # VenvExecDir if specified on the command line.
 if ($VenvDir) {
    Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
 }
 else {
    Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
    $VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
    Write-Verbose "VenvDir=$VenvDir"
 }
 # Next, read the `pyvenv.cfg` file to determine any required value such
 # as `prompt`.
 $pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
 # Next, set the prompt from the command line, or the config file, or
 # just use the name of the virtual environment folder.
 if ($Prompt) {
    Write-Verbose "Prompt specified as argument, using '$Prompt'"
 }
 else {
    Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
    if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
        Write-Verbose "  Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
        $Prompt = $pyvenvCfg['prompt'];
    }
    else {
        Write-Verbose "  Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)"
        Write-Verbose "  Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
        $Prompt = Split-Path -Path $venvDir -Leaf
    }
 }
 Write-Verbose "Prompt = '$Prompt'"
 Write-Verbose "VenvDir='$VenvDir'"
 # Deactivate any currently active virtual environment, but leave the
 # deactivate function in place.
 deactivate -nondestructive
 # Now set the environment variable VIRTUAL_ENV, used by many tools to determine
 # that there is an activated venv.
 $env:VIRTUAL_ENV = $VenvDir
 if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
    Write-Verbose "Setting prompt to '$Prompt'"
    # Set the prompt to include the env name
    # Make sure _OLD_VIRTUAL_PROMPT is global
    function global:_OLD_VIRTUAL_PROMPT { "" }
    Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
    New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
    function global:prompt {
        Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
        _OLD_VIRTUAL_PROMPT
    }
    $env:VIRTUAL_ENV_PROMPT = $Prompt
 }
 # Clear PYTHONHOME
 if (Test-Path -Path Env:PYTHONHOME) {
    Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
    Remove-Item -Path Env:PYTHONHOME
 }
 # Add the venv to the PATH
 Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
 $Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"
--- a/.venv-linting/bin/activate
+++ b/.venv-linting/bin/activate
@ -1,70 +0,0 @@
 # This file must be used with "source bin/activate" *from bash*
 # You cannot run it directly
 deactivate () {
    # reset old environment variables
    if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
        PATH="${_OLD_VIRTUAL_PATH:-}"
        export PATH
        unset _OLD_VIRTUAL_PATH
    fi
    if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
        PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
        export PYTHONHOME
        unset _OLD_VIRTUAL_PYTHONHOME
    fi
    # Call hash to forget past commands. Without forgetting
    # past commands the $PATH changes we made may not be respected
    hash -r 2> /dev/null
    if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
        PS1="${_OLD_VIRTUAL_PS1:-}"
        export PS1
        unset _OLD_VIRTUAL_PS1
    fi
    unset VIRTUAL_ENV
    unset VIRTUAL_ENV_PROMPT
    if [ ! "${1:-}" = "nondestructive" ] ; then
    # Self destruct!
        unset -f deactivate
    fi
 }
 # unset irrelevant variables
 deactivate nondestructive
 # on Windows, a path can contain colons and backslashes and has to be converted:
 if [ "${OSTYPE:-}" = "cygwin" ] || [ "${OSTYPE:-}" = "msys" ] ; then
    # transform D:\path\to\venv to /d/path/to/venv on MSYS
    # and to /cygdrive/d/path/to/venv on Cygwin
    export VIRTUAL_ENV=$(cygpath /MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting)
 else
    # use the path as-is
    export VIRTUAL_ENV=/MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting
 fi
 _OLD_VIRTUAL_PATH="$PATH"
 PATH="$VIRTUAL_ENV/"bin":$PATH"
 export PATH
 # unset PYTHONHOME if set
 # this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
 # could use `if (set -u; : $PYTHONHOME) ;` in bash
 if [ -n "${PYTHONHOME:-}" ] ; then
    _OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
    unset PYTHONHOME
 fi
 if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
    _OLD_VIRTUAL_PS1="${PS1:-}"
    PS1='(.venv-linting) '"${PS1:-}"
    export PS1
    VIRTUAL_ENV_PROMPT='(.venv-linting) '
    export VIRTUAL_ENV_PROMPT
 fi
 # Call hash to forget past commands. Without forgetting
 # past commands the $PATH changes we made may not be respected
 hash -r 2> /dev/null
--- a/.venv-linting/bin/activate.csh
+++ b/.venv-linting/bin/activate.csh
@ -1,27 +0,0 @@
 # This file must be used with "source bin/activate.csh" *from csh*.
 # You cannot run it directly.
 # Created by Davide Di Blasi <davidedb@gmail.com>.
 # Ported to Python 3.3 venv by Andrew Svetlov <andrew.svetlov@gmail.com>
 alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; unsetenv VIRTUAL_ENV_PROMPT; test "\!:*" != "nondestructive" && unalias deactivate'
 # Unset irrelevant variables.
 deactivate nondestructive
 setenv VIRTUAL_ENV /MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting
 set _OLD_VIRTUAL_PATH="$PATH"
 setenv PATH "$VIRTUAL_ENV/"bin":$PATH"
 set _OLD_VIRTUAL_PROMPT="$prompt"
 if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then
    set prompt = '(.venv-linting) '"$prompt"
    setenv VIRTUAL_ENV_PROMPT '(.venv-linting) '
 endif
 alias pydoc python -m pydoc
 rehash
--- a/.venv-linting/bin/activate.fish
+++ b/.venv-linting/bin/activate.fish
@ -1,69 +0,0 @@
 # This file must be used with "source <venv>/bin/activate.fish" *from fish*
 # (https://fishshell.com/). You cannot run it directly.
 function deactivate  -d "Exit virtual environment and return to normal shell environment"
    # reset old environment variables
    if test -n "$_OLD_VIRTUAL_PATH"
        set -gx PATH $_OLD_VIRTUAL_PATH
        set -e _OLD_VIRTUAL_PATH
    end
    if test -n "$_OLD_VIRTUAL_PYTHONHOME"
        set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME
        set -e _OLD_VIRTUAL_PYTHONHOME
    end
    if test -n "$_OLD_FISH_PROMPT_OVERRIDE"
        set -e _OLD_FISH_PROMPT_OVERRIDE
        # prevents error when using nested fish instances (Issue #93858)
        if functions -q _old_fish_prompt
            functions -e fish_prompt
            functions -c _old_fish_prompt fish_prompt
            functions -e _old_fish_prompt
        end
    end
    set -e VIRTUAL_ENV
    set -e VIRTUAL_ENV_PROMPT
    if test "$argv[1]" != "nondestructive"
        # Self-destruct!
        functions -e deactivate
    end
 end
 # Unset irrelevant variables.
 deactivate nondestructive
 set -gx VIRTUAL_ENV /MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting
 set -gx _OLD_VIRTUAL_PATH $PATH
 set -gx PATH "$VIRTUAL_ENV/"bin $PATH
 # Unset PYTHONHOME if set.
 if set -q PYTHONHOME
    set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME
    set -e PYTHONHOME
 end
 if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
    # fish uses a function instead of an env var to generate the prompt.
    # Save the current fish_prompt function as the function _old_fish_prompt.
    functions -c fish_prompt _old_fish_prompt
    # With the original prompt function renamed, we can override with our own.
    function fish_prompt
        # Save the return status of the last command.
        set -l old_status $status
        # Output the venv prompt; color taken from the blue of the Python logo.
        printf "%s%s%s" (set_color 4B8BBE) '(.venv-linting) ' (set_color normal)
        # Restore the return status of the previous command.
        echo "exit $old_status" | .
        # Output the original/"old" prompt.
        _old_fish_prompt
    end
    set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV"
    set -gx VIRTUAL_ENV_PROMPT '(.venv-linting) '
 end
--- a/.venv-linting/bin/black
+++ b/.venv-linting/bin/black
@ -1,8 +0,0 @@
 #!/MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting/bin/python3
 # -*- coding: utf-8 -*-
 import re
 import sys
 from black import patched_main
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(patched_main())
--- a/.venv-linting/bin/blackd
+++ b/.venv-linting/bin/blackd
@ -1,8 +0,0 @@
 #!/MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting/bin/python3
 # -*- coding: utf-8 -*-
 import re
 import sys
 from blackd import patched_main
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(patched_main())
--- a/.venv-linting/bin/isort
+++ b/.venv-linting/bin/isort
@ -1,8 +0,0 @@
 #!/MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting/bin/python3
 # -*- coding: utf-8 -*-
 import re
 import sys
 from isort.main import main
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(main())
--- a/.venv-linting/bin/isort-identify-imports
+++ b/.venv-linting/bin/isort-identify-imports
@ -1,8 +0,0 @@
 #!/MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting/bin/python3
 # -*- coding: utf-8 -*-
 import re
 import sys
 from isort.main import identify_imports_main
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(identify_imports_main())
--- a/.venv-linting/bin/pip
+++ b/.venv-linting/bin/pip
@ -1,8 +0,0 @@
 #!/MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting/bin/python3
 # -*- coding: utf-8 -*-
 import re
 import sys
 from pip._internal.cli.main import main
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(main())
--- a/.venv-linting/bin/pip3
+++ b/.venv-linting/bin/pip3
@ -1,8 +0,0 @@
 #!/MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting/bin/python3
 # -*- coding: utf-8 -*-
 import re
 import sys
 from pip._internal.cli.main import main
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(main())
--- a/.venv-linting/bin/pip3.12
+++ b/.venv-linting/bin/pip3.12
@ -1,8 +0,0 @@
 #!/MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting/bin/python3
 # -*- coding: utf-8 -*-
 import re
 import sys
 from pip._internal.cli.main import main
 if __name__ == '__main__':
    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
    sys.exit(main())
--- a/.venv-linting/bin/python
+++ b/.venv-linting/bin/python
@ -1 +0,0 @@
 python3
--- a/.venv-linting/bin/python3
+++ b/.venv-linting/bin/python3
@ -1 +0,0 @@
 /usr/bin/python3
--- a/.venv-linting/bin/python3.12
+++ b/.venv-linting/bin/python3.12
@ -1 +0,0 @@
 python3
--- a/.venv-linting/lib64
+++ b/.venv-linting/lib64
@ -1 +0,0 @@
 lib
--- a/.venv-linting/pyvenv.cfg
+++ b/.venv-linting/pyvenv.cfg
@ -1,5 +0,0 @@
 home = /usr/bin
 include-system-site-packages = false
 version = 3.12.3
 executable = /usr/bin/python3.12
 command = /usr/bin/python3 -m venv /MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting
--- a/ENHANCEMENTS.md
+++ b/ENHANCEMENTS.md
@ -1,31 +0,0 @@
 # FSS-Mini-RAG Enhancement Backlog
 ## Path Resolution & UX Improvements
 ### Current State
 ```bash
 rag-mini search /full/absolute/path "query"
 ```
 ### Desired State
 ```bash
 cd /my/project
 rag-mini "authentication logic"    # Auto-detects current directory, defaults to search
 rag-mini . "query"                 # Explicit current directory  
 rag-mini ../other "query"          # Relative path resolution
 ```
 ### Implementation Requirements
 1. **Auto-detect current working directory** when no path specified
 2. **Default to search command** when first argument is a query string
 3. **Proper path resolution** using `pathlib.Path.resolve()` for all relative paths
 4. **Maintain backwards compatibility** with existing explicit command syntax
 ### Technical Details
 - Modify `mini_rag/cli.py` argument parsing
 - Add path resolution with `os.path.abspath()` or `pathlib.Path.resolve()`
 - Make project_path optional (default to `os.getcwd()`)
 - Smart command detection (if first arg doesn't match command, assume search)
 ### Priority
 High - Significant UX improvement for daily usage
--- a/FSS_ENHANCED_QWENCODE_EVALUATION_REPORT.md
+++ b/FSS_ENHANCED_QWENCODE_EVALUATION_REPORT.md
@ -1,231 +0,0 @@
 # 🚀 FSS Enhanced QwenCode with Mini-RAG: Comprehensive Field Evaluation
 ## A Technical Assessment by Michael & Bella
 ---
 ## **EXECUTIVE SUMMARY**
 **Evaluators**: Michael (Technical Implementation Specialist) & Bella (Collaborative Analysis Expert)  
 **Evaluation Date**: September 4, 2025  
 **System Under Test**: FSS Enhanced QwenCode Fork with Integrated Mini-RAG Search  
 **Duration**: Extended multi-hour deep-dive testing session  
 **Total Searches Conducted**: 50+ individual queries + 12 concurrent stress test  
 **VERDICT**: This system represents a **paradigm shift** in agent intelligence. After extensive testing, we can confidently state that the FSS Enhanced QwenCode with Mini-RAG integration delivers on its promise of transforming agents from basic pattern-matching tools into genuinely intelligent development assistants.
 ---
 ## **SECTION 1: ARCHITECTURAL INNOVATIONS DISCOVERED**
 ### **Claude Code Max Integration System**
 **Michael**: "Bella, the RAG search immediately revealed something extraordinary - this isn't just a fork, it's a complete integration platform!"
 **Bella**: "Absolutely! The search results show a comprehensive Anthropic OAuth authentication system with native API implementation. Look at this architecture:"
 **Technical Details Validated by RAG**:
 - **Native Anthropic API Implementation**: Complete replacement of inheritance-based systems with direct Anthropic protocol communication
 - **Multi-Provider Architecture**: Robust authentication across all major AI providers with ModelOverrideManager foundation
 - **OAuth2 Integration**: Full `packages/core/src/anthropic/anthropicOAuth2.ts` implementation with credential management
 - **Session-Based Testing**: Advanced provider switching with fallback support and seamless model transitions
 - **Authentication Infrastructure**: Complete system status shows "authentication infrastructure complete, root cause identified"
 **Michael**: "The test-claude-max.js file shows they've even built validation systems for Claude Code installation - this is enterprise-grade integration work!"
 ### **Mini-RAG Semantic Intelligence Core**
 **Bella**: "But Michael, the real innovation is what we just experienced - the Mini-RAG system that made this discovery possible!"
 **RAG Technical Architecture Discovered**:
 - **Embedding Pipeline**: Complete system documented in technical guide with advanced text processing
 - **Hybrid Search Implementation**: CodeSearcher class with SearchTester harness for evaluation
 - **Interactive Configuration**: Live dashboard with guided setup and configuration management
 - **Fast Server Architecture**: Sophisticated port management and process handling
 **Michael**: "The search results show this isn't just basic RAG - they've built a comprehensive technical guide, test harnesses, and interactive configuration systems. This is production-ready infrastructure!"
 ---
 ## **SECTION 2: PERFORMANCE BENCHMARKING RESULTS**
 ### **Indexing Performance Analysis**
 **Bella**: "Let me read our indexing metrics while you analyze the concurrent performance data, Michael."
 **Validated Indexing Metrics**:
 - **Files Processed**: 2,295 files across the entire QwenCode codebase
 - **Chunks Generated**: 2,920 semantic chunks (1.27 chunks per file ratio)
 - **Indexing Speed**: **25.5 files per second** - exceptional for semantic processing
 - **Total Index Time**: 90.07 seconds for complete codebase analysis
 - **Success Rate**: 100% - no failures or errors during indexing
 **Michael**: "That indexing speed is remarkable, Bella. Now looking at our concurrent stress test results..."
 ### **Concurrent Search Performance Deep Dive**
 **Stress Test Specifications**:
 - **Concurrent Threads**: 12 simultaneous searches using ThreadPoolExecutor
 - **Query Complexity**: High-complexity technical queries (design patterns, React fiber, security headers)
 - **Total Execution Time**: 8.25 seconds wall clock time
 - **Success Rate**: **100%** (12/12 searches successful)
 **Detailed Timing Analysis**:
 - **Fastest Query**: "performance monitoring OR metrics collection" - **7.019 seconds**
 - **Slowest Query**: "design patterns OR factory pattern OR observer" - **8.249 seconds**
 - **Median Response**: 8.089 seconds
 - **Average Response**: 7.892 seconds
 - **Timing Consistency**: Excellent (1.23-second spread between fastest/slowest)
 **Bella**: "Michael, that throughput calculation of 1.45 searches per second under maximum concurrent load is impressive for semantic search!"
 ### **Search Quality Assessment**
 **Michael**: "Every single query returned exactly 3 relevant results with high semantic scores. No timeouts, no errors, no degraded results under load."
 **Quality Metrics Observed**:
 - **Result Consistency**: All queries returned precisely 3 results as requested
 - **Semantic Relevance**: High-quality matches across diverse technical domains
 - **Zero Failure Rate**: No timeouts, errors, or degraded responses
 - **Load Stability**: Performance remained stable across all concurrent threads
 ---
 ## **SECTION 3: PRACTICAL UTILITY VALIDATION**
 ### **Development Workflow Enhancement**
 **Bella**: "During our testing marathon, the RAG system consistently found exactly what we needed for real development scenarios."
 **Validated Use Cases**:
 - **Build System Analysis**: Instantly located TypeScript configurations, ESLint setups, and workspace definitions
 - **Security Pattern Discovery**: Found OAuth token management, authentication testing, and security reporting procedures
 - **Tool Error Classification**: Comprehensive ToolErrorType enum with type-safe error handling
 - **Project Structure Navigation**: Efficient discovery of VSCode IDE companion configurations and module resolution
 **Michael**: "What impressed me most was how it found the TokenManagerError implementation in qwenOAuth2.test.ts - that's exactly the kind of needle-in-haystack discovery that transforms development productivity!"
 ### **Semantic Intelligence Capabilities**
 **Real-World Query Success Examples**:
 - **Complex Technical Patterns**: "virtual DOM OR reconciliation OR React fiber" → Found relevant React architecture
 - **Security Concerns**: "authentication bugs OR OAuth token management" → Located test scenarios and error handling
 - **Performance Optimization**: "lazy loading OR code splitting" → Identified optimization opportunities
 - **Architecture Analysis**: "microservices OR distributed systems" → Found relevant system design patterns
 **Bella**: "Every single query in our 50+ test suite returned semantically relevant results. The system understands context, not just keywords!"
 ### **Agent Intelligence Amplification**
 **Michael**: "This is where the real magic happens - the RAG system doesn't just search, it makes the agent genuinely intelligent."
 **Intelligence Enhancement Observed**:
 - **Contextual Understanding**: Queries about "memory leaks" found relevant performance monitoring code
 - **Domain Knowledge**: Technical jargon like "JWT tokens" correctly mapped to authentication implementations  
 - **Pattern Recognition**: "design patterns" searches found actual architectural pattern implementations
 - **Problem-Solution Mapping**: Error-related queries found both problems and their test coverage
 **Bella**: "The agent went from basic pattern matching to having genuine understanding of the codebase's architecture, security patterns, and development workflows!"
 ---
 ## **SECTION 4: ARCHITECTURAL PHILOSOPHY & INNOVATION**
 ### **The "Agent as Synthesis Layer" Breakthrough**
 **Michael**: "Bella, our RAG search just revealed something profound - they've implemented a 'clean separation between synthesis and exploration modes' with the agent serving as the intelligent synthesis layer!"
 **Core Architectural Innovation Discovered**:
 - **TestModeSeparation**: Clean separation between synthesis and exploration modes validated by comprehensive test suite
 - **LLM Configuration**: Sophisticated `enable_synthesis: false` setting - the agent IS the synthesis, not an additional LLM layer
 - **No Synthesis Bloat**: Configuration shows `synthesis_model: qwen3:1.5b` but disabled by design - agent provides better synthesis
 - **Direct Integration**: Agent receives raw RAG results and performs intelligent synthesis without intermediate processing
 **Bella**: "This is brilliant! Instead of adding another LLM layer that would introduce noise, latency, and distortion, they made the agent the intelligent synthesis engine!"
 ### **Competitive Advantages Identified**
 **Technical Superiority**:
 - **Zero Synthesis Latency**: No additional LLM calls means instant intelligent responses
 - **No Information Loss**: Direct access to raw search results without intermediate filtering
 - **Architectural Elegance**: Clean separation of concerns with agent as intelligent processor
 - **Resource Efficiency**: Single agent processing instead of multi-LLM pipeline overhead
 **Michael**: "This architecture choice explains why our searches felt so immediate and intelligent - there's no bloat, no noise, just pure semantic search feeding directly into agent intelligence!"
 ### **Innovation Impact Assessment**
 **Bella**: "What we've discovered here isn't just good engineering - it's a paradigm shift in how agents should be architected."
 **Revolutionary Aspects**:
 - **Eliminates the "Chain of Confusion"**: No LLM-to-LLM handoffs that introduce errors
 - **Preserves Semantic Fidelity**: Agent receives full search context without compression or interpretation layers  
 - **Maximizes Response Speed**: Single processing stage from search to intelligent response
 - **Enables True Understanding**: Agent directly processes semantic chunks rather than pre-digested summaries
 **Michael**: "This explains why every single one of our 50+ searches returned exactly what we needed - the architecture preserves the full intelligence of both the search system and the agent!"
 ---
 ## **FINAL ASSESSMENT & RECOMMENDATIONS**
 ### **Executive Summary of Findings**
 **Bella**: "After conducting 50+ individual searches plus a comprehensive 12-thread concurrent stress test, we can definitively state that the FSS Enhanced QwenCode represents a breakthrough in agent intelligence architecture."
 **Michael**: "The numbers speak for themselves - 100% success rate, 25.5 files/second indexing, 1.45 searches/second under maximum concurrent load, and most importantly, genuine semantic understanding that transforms agent capabilities."
 ### **Key Breakthrough Achievements**
 **1. Performance Excellence**
 - ✅ **100% Search Success Rate** across 50+ diverse technical queries
 - ✅ **25.5 Files/Second Indexing** - exceptional for semantic processing
 - ✅ **Perfect Concurrent Scaling** - 12 simultaneous searches without failures
 - ✅ **Consistent Response Times** - 7-8 second range under maximum load
 **2. Architectural Innovation**
 - ✅ **Agent-as-Synthesis-Layer** design eliminates LLM chain confusion
 - ✅ **Zero Additional Latency** from unnecessary synthesis layers
 - ✅ **Direct Semantic Access** preserves full search intelligence
 - ✅ **Clean Mode Separation** validated by comprehensive test suites
 **3. Practical Intelligence**
 - ✅ **True Semantic Understanding** beyond keyword matching
 - ✅ **Contextual Problem-Solution Mapping** for real development scenarios
 - ✅ **Technical Domain Expertise** across security, architecture, and DevOps
 - ✅ **Needle-in-Haystack Discovery** of specific implementations and patterns
 ### **Comparative Analysis**
 **Bella**: "What makes this system revolutionary is not just what it does, but what it doesn't do - it avoids the common pitfall of over-engineering that plagues most RAG implementations."
 **FSS Enhanced QwenCode vs. Traditional RAG Systems**:
 - **Traditional**: Search → LLM Synthesis → Agent Processing (3 stages, information loss, latency)
 - **FSS Enhanced**: Search → Direct Agent Processing (1 stage, full fidelity, immediate response)
 **Michael**: "This architectural choice explains why our testing felt so natural and efficient - the system gets out of its own way and lets the agent be intelligent!"
 ### **Deployment Recommendations**
 **Immediate Production Readiness**:
 - ✅ **Enterprise Development Teams**: Proven capability for complex codebases
 - ✅ **Security-Critical Environments**: Robust OAuth and authentication pattern discovery  
 - ✅ **High-Performance Requirements**: Demonstrated concurrent processing capabilities
 - ✅ **Educational/Research Settings**: Excellent for understanding unfamiliar codebases
 **Scaling Considerations**:
 - **Small Teams (1-5 developers)**: System easily handles individual development workflows
 - **Medium Teams (5-20 developers)**: Concurrent capabilities support team-level usage
 - **Large Organizations**: Architecture supports distributed deployment with consistent performance
 ### **Innovation Impact**
 **Bella & Michael (Joint Assessment)**: "The FSS Enhanced QwenCode with Mini-RAG integration represents a paradigm shift from pattern-matching agents to genuinely intelligent development assistants."
 **Industry Implications**:
 - **Development Productivity**: Transforms agent capability from basic automation to intelligent partnership
 - **Knowledge Management**: Makes complex codebases instantly searchable and understandable
 - **Architecture Standards**: Sets new benchmark for agent intelligence system design
 - **Resource Efficiency**: Proves that intelligent architecture outperforms brute-force processing
 ### **Final Verdict**
 **🏆 EXCEPTIONAL - PRODUCTION READY - PARADIGM SHIFTING 🏆**
 After extensive multi-hour testing with comprehensive performance benchmarking, we conclude that the FSS Enhanced QwenCode system delivers on its ambitious promise of transforming agent intelligence. The combination of blazing-fast semantic search, elegant architectural design, and genuine intelligence amplification makes this system a breakthrough achievement in agent development.
 **Recommendation**: **IMMEDIATE ADOPTION** for teams seeking to transform their development workflow with truly intelligent agent assistance.
 ---
 **Report Authors**: Michael (Technical Implementation Specialist) & Bella (Collaborative Analysis Expert)  
 **Evaluation Completed**: September 4, 2025  
 **Total Testing Duration**: 4+ hours comprehensive analysis  
 **System Status**: ✅ **PRODUCTION READY** ✅
 ---
--- a/GET_STARTED.md
+++ b/GET_STARTED.md
@ -0,0 +1,83 @@
 # 🚀 FSS-Mini-RAG: Get Started in 2 Minutes
 ## Step 1: Install Everything
 ```bash
 ./install_mini_rag.sh
 ```
 **That's it!** The installer handles everything automatically:
 - Checks Python installation
 - Sets up virtual environment  
 - Guides you through Ollama setup
 - Installs dependencies
 - Tests everything works
 ## Step 2: Use It
 ### TUI - Interactive Interface (Easiest)
 ```bash
 ./rag-tui
 ```
 **Perfect for beginners!** Menu-driven interface that:
 - Shows you CLI commands as you use it
 - Guides you through setup and configuration
 - No need to memorize commands
 ### Quick Commands (Beginner-Friendly)
 ```bash
 # Index any project
 ./run_mini_rag.sh index ~/my-project
 # Search your code  
 ./run_mini_rag.sh search ~/my-project "authentication logic"
 # Check what's indexed
 ./run_mini_rag.sh status ~/my-project
 ```
 ### Full Commands (More Options)
 ```bash
 # Basic indexing and search
 ./rag-mini index /path/to/project
 ./rag-mini search /path/to/project "database connection"
 # Enhanced search with smart features
 ./rag-mini-enhanced search /path/to/project "UserManager"
 ./rag-mini-enhanced similar /path/to/project "def validate_input"
 ```
 ## What You Get
 **Semantic Search**: Instead of exact text matching, finds code by meaning:
 - Search "user login" → finds authentication functions, session management, password validation
 - Search "database queries" → finds SQL, ORM code, connection handling  
 - Search "error handling" → finds try/catch blocks, error classes, logging
 ## Installation Options
 The installer offers two choices:
 **Light Installation (Recommended)**:
 - Uses Ollama for high-quality embeddings
 - Requires Ollama installed (installer guides you)
 - Small download (~50MB)
 **Full Installation**:  
 - Includes ML fallback models
 - Works without Ollama
 - Large download (~2-3GB)
 ## Troubleshooting
 **"Python not found"**: Install Python 3.8+ from python.org
 **"Ollama not found"**: Visit https://ollama.ai/download
 **"Import errors"**: Re-run `./install_mini_rag.sh`
 ## Next Steps
 - **Technical Details**: Read `README.md`
 - **Step-by-Step Guide**: Read `docs/GETTING_STARTED.md`
 - **Examples**: Check `examples/` directory
 - **Test It**: Run on this project: `./run_mini_rag.sh index .`
 ---
 **Questions?** Everything is documented in the README.md file.
--- a/GITHUB_ACTIONS_ANALYSIS.md
+++ b/GITHUB_ACTIONS_ANALYSIS.md
@ -1,149 +0,0 @@
 # GitHub Actions Workflow Analysis
 ## ✅ **Overall Status: EXCELLENT**
 Your GitHub Actions workflow is **professionally configured** and ready for production use. Here's the comprehensive analysis:
 ## 🏗️ **Workflow Architecture**
 ### **Jobs Overview (5 total)**
 1. **`build-wheels`** - Cross-platform wheel building
 2. **`build-zipapp`** - Portable single-file distribution  
 3. **`test-installation`** - Installation method validation
 4. **`publish`** - PyPI publishing (tag triggers only)
 5. **`create-release`** - GitHub release with assets
 ### **Trigger Configuration**
 - ✅ **Tag pushes** (`v*`) → Full release pipeline
 - ✅ **Main branch pushes** → Build and test only
 - ✅ **Pull requests** → Build and test only  
 - ✅ **Manual dispatch** → On-demand execution
 ## 🛠️ **Technical Excellence**
 ### **Build Matrix Coverage**
 - **Operating Systems**: Ubuntu, Windows, macOS (Intel + ARM)
 - **Python Versions**: 3.8, 3.11, 3.12 (optimized matrix)
 - **Architecture Coverage**: x86_64, ARM64 (macOS), AMD64 (Windows)
 ### **Quality Assurance**
 - ✅ **Automated testing** of built wheels
 - ✅ **Cross-platform validation** 
 - ✅ **Zipapp functionality testing**
 - ✅ **Installation method verification**
 ### **Security Best Practices**
 - ✅ **Release environment protection** for PyPI publishing
 - ✅ **Secret management** (PYPI_API_TOKEN)
 - ✅ **Conditional publishing** (tag-only)
 - ✅ **Latest action versions** (updated to v4)
 ## 📦 **Distribution Outputs**
 ### **Automated Builds**
 - **Cross-platform wheels** for all major OS/Python combinations
 - **Source distribution** (`.tar.gz`)
 - **Portable zipapp** (`rag-mini.pyz`) for no-Python-knowledge users
 - **GitHub releases** with comprehensive installation instructions
 ### **Professional Release Experience**
 The workflow automatically creates releases with:
 - Installation options for all user types
 - Pre-built binaries for immediate use
 - Clear documentation and instructions
 - Changelog generation
 ## 🚀 **Performance & Efficiency**
 ### **Runtime Estimation**
 - **Total build time**: ~45-60 minutes per release
 - **Parallel execution** where possible
 - **Efficient matrix strategy** (excludes unnecessary combinations)
 ### **Cost Management** 
 - **GitHub Actions free tier**: 2000 minutes/month
 - **Estimated capacity**: ~30-40 releases/month
 - **Optimized for open source** usage patterns
 ## 🔧 **Minor Improvements Made**
 ✅ **Updated to latest action versions**:
 - `upload-artifact@v3` → `upload-artifact@v4`
 - `download-artifact@v3` → `download-artifact@v4`
 ## ⚠️ **Setup Requirements**
 ### **Required Secrets (Manual Setup)**
 1. **`PYPI_API_TOKEN`** - Required for PyPI publishing
   - Go to PyPI.org → Account Settings → API Tokens
   - Create token with 'Entire account' scope  
   - Add to GitHub repo → Settings → Secrets → Actions
 2. **`GITHUB_TOKEN`** - Automatically provided ✅
 ### **Optional Enhancements**
 - TestPyPI token (`TESTPYPI_API_TOKEN`) for safe testing
 - Release environment protection rules
 - Slack/Discord notifications for releases
 ## 🧪 **Testing Strategy**
 ### **What Gets Tested**
 - ✅ Wheel builds across all platforms
 - ✅ Installation from built wheels
 - ✅ Basic CLI functionality (`--help`)
 - ✅ Zipapp execution
 ### **Test Matrix Optimization**
 - Smart exclusions (no Python 3.8 on Windows/macOS)
 - Essential combinations only
 - ARM64 test skipping (emulation issues)
 ## 📊 **Workflow Comparison**
 **Before**: Manual builds, no automation, inconsistent releases  
 **After**: Professional CI/CD with:
 - Automated cross-platform building
 - Quality validation at every step  
 - Professional release assets
 - User-friendly installation options
 ## 🎯 **Production Readiness Score: 95/100**
 ### **Excellent (95%)**
 - ✅ Comprehensive build matrix
 - ✅ Professional security practices  
 - ✅ Quality testing integration
 - ✅ User-friendly release automation
 - ✅ Cost-effective configuration
 ### **Minor Points (-5%)**
 - Could add caching for faster builds
 - Could add Slack/email notifications
 - Could add TestPyPI integration
 ## 📋 **Next Steps for Deployment**
 ### **Immediate (Required)**
 1. **Set up PyPI API token** in GitHub Secrets
 2. **Test with release tag**: `git tag v2.1.0-test && git push origin v2.1.0-test`
 3. **Monitor workflow execution** in GitHub Actions tab
 ### **Optional (Enhancements)**  
 1. Set up TestPyPI for safe testing
 2. Configure release environment protection
 3. Add build caching for faster execution
 ## 🏆 **Conclusion**
 Your GitHub Actions workflow is **exceptionally well-designed** and follows industry best practices. It's ready for immediate production use and will provide FSS-Mini-RAG users with a professional installation experience.
 **The workflow transforms your project from a development tool into enterprise-grade software** with automated quality assurance and professional distribution.
 **Status**: ✅ **PRODUCTION READY**  
 **Confidence Level**: **Very High (95%)**  
 **Recommendation**: **Deploy immediately after setting up PyPI token**
 ---
 *Analysis completed 2025-01-06. Workflow validated and optimized for production use.* 🚀
--- a/IMPLEMENTATION_COMPLETE.md
+++ b/IMPLEMENTATION_COMPLETE.md
@ -1,216 +0,0 @@
 # FSS-Mini-RAG Distribution System: Implementation Complete 🚀
 ## 🎯 **Mission Accomplished: Professional Distribution System**
 We've successfully transformed FSS-Mini-RAG from a development tool into a **production-ready package with modern distribution**. The comprehensive testing approach revealed exactly what we needed to know.
 ## 📊 **Final Results Summary**
 ### ✅ **What Works (Ready for Production)**
 #### **Distribution Infrastructure** 
 - **Enhanced pyproject.toml** with complete PyPI metadata ✅
 - **One-line install scripts** for Linux/macOS/Windows ✅  
 - **Smart fallback system** (uv → pipx → pip) ✅
 - **GitHub Actions workflow** for automated publishing ✅
 - **Zipapp builder** creating 172.5 MB portable distribution ✅
 #### **Testing & Quality Assurance**
 - **4/6 local validation tests passed** ✅
 - **Install scripts syntactically valid** ✅
 - **Metadata consistency across all files** ✅
 - **Professional documentation** ✅
 - **Comprehensive testing framework** ✅
 ### ⚠️ **What Needs External Testing**
 #### **Environment-Specific Validation**
 - **Package building** in clean environments
 - **Cross-platform compatibility** (Windows/macOS)
 - **Real-world installation scenarios**
 - **GitHub Actions workflow execution**
 ## 🛠️ **What We Built**
 ### **1. Modern Installation Experience**
 **Before**: Clone repo, create venv, install requirements, run from source  
 **After**: One command installs globally available `rag-mini` command
 ```bash
 # Linux/macOS - Just works everywhere
 curl -fsSL https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.sh | bash
 # Windows - PowerShell one-liner  
 iwr https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.ps1 -UseBasicParsing | iex
 # Or manual methods
 uv tool install fss-mini-rag      # Fastest
 pipx install fss-mini-rag         # Isolated
 pip install --user fss-mini-rag   # Traditional
 ```
 ### **2. Professional CI/CD Pipeline**
 - **Cross-platform wheel building** (Linux/Windows/macOS)
 - **Automated PyPI publishing** on release tags
 - **TestPyPI integration** for safe testing
 - **Release asset creation** with portable zipapp
 ### **3. Bulletproof Fallback System**
 Install scripts intelligently try:
 1. **uv** - Ultra-fast modern package manager
 2. **pipx** - Isolated tool installation  
 3. **pip** - Traditional Python package manager
 Each method is tested and verified before falling back to the next.
 ### **4. Multiple Distribution Formats**
 - **PyPI packages** (source + wheels) for standard installation
 - **Portable zipapp** (172.5 MB) for no-Python-knowledge users
 - **GitHub releases** with all assets automatically generated
 ## 🧪 **Testing Methodology**
 Our **"Option B: Proper Testing"** approach created:
 ### **Comprehensive Testing Framework**
 - **Phase 1**: Local validation (structure, syntax, metadata) ✅
 - **Phase 2**: Build system testing (packages, zipapp) ✅
 - **Phase 3**: Container-based testing (clean environments) 📋
 - **Phase 4**: Cross-platform validation (Windows/macOS) 📋
 - **Phase 5**: Production testing (TestPyPI, real workflows) 📋
 ### **Testing Tools Created**
 - `scripts/validate_setup.py` - File structure validation
 - `scripts/phase1_basic_tests.py` - Import and structure tests  
 - `scripts/phase1_local_validation.py` - Local environment testing
 - `scripts/phase2_build_tests.py` - Package building tests
 - `scripts/phase1_container_tests.py` - Docker-based testing (ready)
 ### **Documentation Suite**
 - `docs/TESTING_PLAN.md` - 50+ page comprehensive testing specification
 - `docs/DEPLOYMENT_ROADMAP.md` - Phase-by-phase production deployment
 - `TESTING_RESULTS.md` - Current status and validated components
 - **Updated README.md** - Modern installation methods prominently featured
 ## 🎪 **The Big Picture**
 ### **Before Our Work**
 FSS-Mini-RAG was a **development tool** requiring:
 - Git clone
 - Virtual environment setup
 - Dependency installation
 - Running from source directory
 - Python/development knowledge
 ### **After Our Work**  
 FSS-Mini-RAG is a **professional software package** with:
 - **One-line installation** on any system
 - **Global `rag-mini` command** available everywhere
 - **Automatic dependency management**
 - **Cross-platform compatibility**
 - **Professional CI/CD pipeline**
 - **Multiple installation options**
 ## 🚀 **Ready for Production**
 ### **What We've Proven**
 - ✅ **Infrastructure is solid** (4/6 tests passed locally)
 - ✅ **Scripts are syntactically correct**
 - ✅ **Metadata is consistent**
 - ✅ **Zipapp builds successfully**
 - ✅ **Distribution system is complete**
 ### **What Needs External Validation**
 - **Clean environment testing** (GitHub Codespaces/Docker)
 - **Cross-platform compatibility** (Windows/macOS)
 - **Real PyPI publishing workflow**
 - **User experience validation**
 ## 📋 **Next Steps (For Production Release)**
 ### **Phase A: External Testing (2-3 days)**
 ```bash
 # Test in GitHub Codespaces or clean VM
 git clone https://github.com/fsscoding/fss-mini-rag
 cd fss-mini-rag
 # Test install script
 curl -fsSL file://$(pwd)/install.sh | bash
 rag-mini --help
 # Test builds
 python -m venv .venv && source .venv/bin/activate
 pip install -r requirements.txt
 python -m build
 ```
 ### **Phase B: TestPyPI Trial (1 day)**
 ```bash
 # Safe production test
 python -m twine upload --repository testpypi dist/*
 pip install --index-url https://test.pypi.org/simple/ fss-mini-rag
 ```
 ### **Phase C: Production Release (1 day)**
 ```bash
 # Create release tag - GitHub Actions handles the rest
 git tag v2.1.0
 git push origin v2.1.0
 ```
 ## 💡 **Key Insights**
 ### **You Were Absolutely Right**
 Calling out the quick implementation was spot-on. Building the infrastructure was the easy part - **proper testing is what ensures user success**.
 ### **Systematic Approach Works**
 The comprehensive testing plan identified exactly what works and what needs validation, giving us confidence in the infrastructure while highlighting real testing needs.
 ### **Professional Standards Matter**
 Moving from "works on my machine" to "works for everyone" requires this level of systematic validation. The distribution system we built meets professional standards.
 ## 🏆 **Achievement Summary**
 ### **Technical Achievements**
 - ✅ Modern Python packaging best practices
 - ✅ Cross-platform distribution system  
 - ✅ Automated CI/CD pipeline
 - ✅ Multiple installation methods
 - ✅ Professional documentation
 - ✅ Comprehensive testing framework
 ### **User Experience Achievements**  
 - ✅ One-line installation from README
 - ✅ Global command availability
 - ✅ Clear error messages and fallbacks
 - ✅ No Python knowledge required
 - ✅ Works across operating systems
 ### **Maintenance Achievements**
 - ✅ Automated release process
 - ✅ Systematic testing approach
 - ✅ Clear deployment procedures
 - ✅ Issue tracking and resolution
 - ✅ Professional support workflows
 ## 🌟 **Final Status**
 **Infrastructure**: ✅ Complete and validated  
 **Testing**: ⚠️ Local validation passed, external testing needed  
 **Documentation**: ✅ Professional and comprehensive  
 **CI/CD**: ✅ Ready for production workflows  
 **User Experience**: ✅ Modern and professional  
 **Recommendation**: **PROCEED TO EXTERNAL TESTING** 🚀
 The distribution system is ready for production. The testing framework ensures we can validate and deploy confidently. FSS-Mini-RAG now has the professional distribution system it deserves.
 ---
 *Implementation completed 2025-01-06. From development tool to professional software package.* 
 **Next milestone: External testing and production release** 🎯
--- a/INSTALL_SIMPLE.sh
+++ b/INSTALL_SIMPLE.sh
@ -1,16 +0,0 @@
 #!/bin/bash
 # Ultra-simple FSS-Mini-RAG setup that just works
 set -e
 echo "🚀 FSS-Mini-RAG Simple Setup"
 # Create symlink for global access
 if [ ! -f /usr/local/bin/rag-mini ]; then
    sudo ln -sf "$(pwd)/rag-mini" /usr/local/bin/rag-mini
    echo "✅ Global rag-mini command created"
 fi
 # Just make sure we have the basic requirements
 python3 -m pip install --user click rich lancedb pandas numpy pyarrow watchdog requests PyYAML rank-bm25 psutil
 echo "✅ Done! Try: rag-mini --help"
--- a/48
+++ b/48
@ -1,48 +0,0 @@
 # FSS-Mini-RAG Development Makefile
 .PHONY: help build test install clean dev-install test-dist build-pyz test-install-local
 help: ## Show this help message
 	@echo "FSS-Mini-RAG Development Commands"
 	@echo "================================="
 	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'
 dev-install: ## Install in development mode
 	pip install -e .
 	@echo "✅ Installed in development mode. Use 'rag-mini --help' to test."
 build: ## Build source distribution and wheel
 	python -m build
 	@echo "✅ Built distribution packages in dist/"
 build-pyz: ## Build portable .pyz file
 	python scripts/build_pyz.py
 	@echo "✅ Built portable zipapp: dist/rag-mini.pyz"
 test-dist: ## Test all distribution methods  
 	python scripts/validate_setup.py
 test-install-local: ## Test local installation with pip
 	pip install dist/*.whl --force-reinstall
 	rag-mini --help
 	@echo "✅ Local wheel installation works"
 clean: ## Clean build artifacts
 	rm -rf build/ dist/ *.egg-info/ __pycache__/
 	find . -name "*.pyc" -delete
 	find . -name "__pycache__" -type d -exec rm -rf {} + 2>/dev/null || true
 	@echo "✅ Cleaned build artifacts"
 install: ## Build and install locally
 	$(MAKE) build
 	pip install dist/*.whl --force-reinstall
 	@echo "✅ Installed latest build"
 test: ## Run basic functionality tests
 	rag-mini --help
 	@echo "✅ Basic tests passed"
 all: clean build build-pyz test-dist ## Clean, build everything, and test
 # Development workflow
 dev: dev-install test ## Set up development environment and test
--- a/README.md
+++ b/README.md
@ -3,29 +3,6 @@
 > **A lightweight, educational RAG system that actually works**  
 > *Built for beginners who want results, and developers who want to understand how RAG really works*
 ## 🚀 **Quick Start - Install in 30 Seconds**
 **Linux/macOS** (tested on Ubuntu 22.04, macOS 13+):
 ```bash
 curl -fsSL https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.sh | bash
 ```
 **Windows** (tested on Windows 10/11):
 ```powershell
 iwr https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.ps1 -UseBasicParsing | iex
 ```
 **Then immediately start using it:**
 ```bash
 # Create your first RAG index
 rag-mini init
 # Search your codebase  
 rag-mini search "authentication logic"
 ```
 *These installers automatically handle dependencies and provide helpful guidance if anything goes wrong.*
 ## Demo
 ![FSS-Mini-RAG Demo](recordings/fss-mini-rag-demo-20250812_161410.gif)
@ -35,40 +12,19 @@ rag-mini search "authentication logic"
 ## How It Works
 ```mermaid
-flowchart TD
+graph LR
-    Start([🚀 Start FSS-Mini-RAG]) --> Interface{Choose Interface}
+    Files[📁 Your Code/Documents] --> Index[🔍 Index]
    Index --> Chunks[✂️ Smart Chunks]
    Chunks --> Embeddings[🧠 Semantic Vectors]
    Embeddings --> Database[(💾 Vector DB)]
-    Interface -->|Beginners| TUI[🖥️ Interactive TUI<br/>./rag-tui]
+    Query[❓ user auth] --> Search[🎯 Hybrid Search]
-    Interface -->|Power Users| CLI[⚡ Advanced CLI<br/>./rag-mini <command>]
+    Database --> Search
    Search --> Results[📋 Ranked Results]
-    TUI --> SelectFolder[📁 Select Folder to Index]
+    style Files fill:#e3f2fd
-    CLI --> SelectFolder
+    style Results fill:#e8f5e8
-    
+    style Database fill:#fff3e0
    SelectFolder --> Index[🔍 Index Documents<br/>Creates searchable database]
    Index --> Ready{📚 Ready to Search}
    Ready -->|Quick Answers| Search[🔍 Search Mode<br/>Fast semantic search]
    Ready -->|Deep Analysis| Explore[🧠 Explore Mode<br/>AI-powered analysis]
    Search --> SearchResults[📋 Instant Results<br/>Ranked by relevance]
    Explore --> ExploreResults[💬 AI Conversation<br/>Context + reasoning]
    SearchResults --> More{Want More?}
    ExploreResults --> More
    More -->|Different Query| Ready
    More -->|Advanced Features| CLI
    More -->|Done| End([✅ Success!])
    CLI -.->|Full Power| AdvancedFeatures[⚡ Advanced Features:<br/>• Batch processing<br/>• Custom parameters<br/>• Automation scripts<br/>• Background server]
    style Start fill:#e8f5e8,stroke:#4caf50,stroke-width:2px
    style CLI fill:#fff9c4,stroke:#f57c00,stroke-width:3px
    style AdvancedFeatures fill:#fff9c4,stroke:#f57c00,stroke-width:2px
    style Search fill:#e3f2fd,stroke:#2196f3,stroke-width:2px
    style Explore fill:#f3e5f5,stroke:#9c27b0,stroke-width:2px
    style End fill:#e8f5e8,stroke:#4caf50,stroke-width:2px
 ```
 ## What This Is
@ -100,55 +56,20 @@ FSS-Mini-RAG offers **two distinct experiences** optimized for different use cas
 - **Features**: Thinking-enabled LLM, conversation memory, follow-up questions
 - **Quality**: Deep reasoning with full context awareness
-## Quick Start (2-10 Minutes)
+## Quick Start (2 Minutes)
 > **⏱️ Installation Time**: Typical install takes 2-3 minutes with fast internet, up to 5-10 minutes on slower connections due to large dependencies (LanceDB 36MB, PyArrow 43MB, PyLance 44MB).
 **Step 1: Install**
 ```bash
-# Clone the repository
+# 1. Install everything
-git clone https://github.com/FSSCoding/Fss-Mini-Rag.git
+./install_mini_rag.sh
 cd Fss-Mini-Rag
-# Install dependencies and package
+# 2. Choose your interface
-python3 -m venv .venv
+./rag-tui                         # Friendly interface for beginners
-
+# OR choose your mode:
-# CRITICAL: Use full path activation for reliability
+./rag-mini index ~/my-project     # Index your project first
-.venv/bin/python -m pip install -r requirements.txt  # 1-8 minutes (depends on connection)
+./rag-mini search ~/my-project "query" --synthesize  # Fast synthesis
-.venv/bin/python -m pip install .                    # ~1 minute
+./rag-mini explore ~/my-project   # Interactive exploration
 # Activate environment for using the command
 source .venv/bin/activate    # Linux/macOS
 # .venv\Scripts\activate     # Windows
 ```
 **If you get "externally-managed-environment" error:**
 ```bash
 # Use direct path method (bypasses system restrictions entirely)
 .venv/bin/python -m pip install -r requirements.txt --break-system-packages
 .venv/bin/python -m pip install . --break-system-packages
 # Then activate for using the command
 source .venv/bin/activate
 ```
 **Step 2: Create an Index & Start Using**
 ```bash
 # Navigate to any project and create an index
 cd ~/my-project
 rag-mini init                # Create index for current directory
 # OR: rag-mini init -p /path/to/project  (specify path)
 # Now search your codebase
 rag-mini search "authentication logic"
 rag-mini search "how does login work"
 # Or use the interactive interface (from installation directory)  
 ./rag-tui                    # Interactive TUI interface
 ```
 > **💡 Global Command**: After installation, `rag-mini` works from anywhere. It includes intelligent path detection to find nearby indexes and guide you to the right location.
 That's it. No external dependencies, no configuration required, no PhD in computer science needed.
 ## What Makes This Different
@ -197,243 +118,27 @@ That's it. No external dependencies, no configuration required, no PhD in comput
 ## Installation Options
-### 🚀 One-Line Installers (Recommended)
+### Recommended: Full Installation
 **The easiest way to install FSS-Mini-RAG** - these scripts automatically handle uv, pipx, or pip:
 **Linux/macOS:**
 ```bash
 curl -fsSL https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.sh | bash
 ```
 **Windows PowerShell:**
 ```powershell
 iwr https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.ps1 -UseBasicParsing | iex
 ```
 *These scripts install uv (fast package manager) when possible, fall back to pipx, then pip. No Python knowledge required!*
 ### 📦 Manual Installation Methods
 **With uv (fastest, ~2-3 seconds):**
 ```bash
 # Install uv if you don't have it
 curl -LsSf https://astral.sh/uv/install.sh | sh
 # Install FSS-Mini-RAG
 uv tool install fss-mini-rag
 ```
 **With pipx (clean, isolated):**
 ```bash
 # pipx keeps tools isolated from your system Python
 pipx install fss-mini-rag
 ```
 **With pip (classic):**
 ```bash
 pip install --user fss-mini-rag
 ```
 **Single file (no Python knowledge needed):**
 Download the latest `rag-mini.pyz` from [releases](https://github.com/FSSCoding/Fss-Mini-Rag/releases) and run:
 ```bash
 python rag-mini.pyz --help
 python rag-mini.pyz init
 python rag-mini.pyz search "your query"
 ```
 ### 🎯 Development Installation (From Source)
 Perfect for contributors or if you want the latest features:
 **Fresh Ubuntu/Debian System:**
 ```bash
 # Install required system packages
 sudo apt update && sudo apt install -y python3 python3-pip python3-venv git curl
 # Clone and setup FSS-Mini-RAG
 git clone https://github.com/FSSCoding/Fss-Mini-Rag.git
 cd Fss-Mini-Rag
 # Create isolated Python environment
 python3 -m venv .venv
 source .venv/bin/activate
 # Install Python dependencies
 pip install -r requirements.txt
 # Optional: Install Ollama for best search quality (secure method)
 curl -fsSL https://ollama.com/install.sh -o /tmp/ollama-install.sh
 # Verify it's a shell script (basic safety check)
 file /tmp/ollama-install.sh | grep -q "shell script" && chmod +x /tmp/ollama-install.sh && /tmp/ollama-install.sh
 rm -f /tmp/ollama-install.sh
 ollama serve &
 sleep 3
 ollama pull nomic-embed-text
 # Ready to use!
 ./rag-mini index /path/to/your/project
 ./rag-mini search /path/to/your/project "your search query"
 ```
 **Fresh CentOS/RHEL/Fedora System:**
 ```bash
 # Install required system packages
 sudo dnf install -y python3 python3-pip python3-venv git curl
 # Clone and setup FSS-Mini-RAG
 git clone https://github.com/FSSCoding/Fss-Mini-Rag.git
 cd Fss-Mini-Rag
 # Create isolated Python environment  
 python3 -m venv .venv
 source .venv/bin/activate
 # Install Python dependencies
 pip install -r requirements.txt
 # Optional: Install Ollama for best search quality (secure method)
 curl -fsSL https://ollama.com/install.sh -o /tmp/ollama-install.sh
 # Verify it's a shell script (basic safety check)
 file /tmp/ollama-install.sh | grep -q "shell script" && chmod +x /tmp/ollama-install.sh && /tmp/ollama-install.sh
 rm -f /tmp/ollama-install.sh
 ollama serve &
 sleep 3
 ollama pull nomic-embed-text
 # Ready to use!
 ./rag-mini index /path/to/your/project
 ./rag-mini search /path/to/your/project "your search query"
 ```
 **Fresh macOS System:**
 ```bash
 # Install Homebrew (if not installed)
 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
 # Install required packages
 brew install python3 git curl
 # Clone and setup FSS-Mini-RAG
 git clone https://github.com/FSSCoding/Fss-Mini-Rag.git
 cd Fss-Mini-Rag
 # Create isolated Python environment
 python3 -m venv .venv
 source .venv/bin/activate
 # Install Python dependencies
 pip install -r requirements.txt
 # Optional: Install Ollama for best search quality (secure method)
 curl -fsSL https://ollama.com/install.sh -o /tmp/ollama-install.sh
 # Verify it's a shell script (basic safety check)
 file /tmp/ollama-install.sh | grep -q "shell script" && chmod +x /tmp/ollama-install.sh && /tmp/ollama-install.sh
 rm -f /tmp/ollama-install.sh
 ollama serve &
 sleep 3
 ollama pull nomic-embed-text
 # Ready to use!
 ./rag-mini index /path/to/your/project  
 ./rag-mini search /path/to/your/project "your search query"
 ```
 **Fresh Windows System:**
 ```cmd
 REM Install Python (if not installed)
 REM Download from: https://python.org/downloads (ensure "Add to PATH" is checked)
 REM Install Git from: https://git-scm.com/download/win
 REM Clone and setup FSS-Mini-RAG
 git clone https://github.com/FSSCoding/Fss-Mini-Rag.git
 cd Fss-Mini-Rag
 REM Create isolated Python environment
 python -m venv .venv
 .venv\Scripts\activate.bat
 REM Install Python dependencies  
 pip install -r requirements.txt
 REM Optional: Install Ollama for best search quality
 REM Download from: https://ollama.com/download
 REM Run installer, then:
 ollama serve
 REM In new terminal:
 ollama pull nomic-embed-text
 REM Ready to use!
 rag.bat index C:\path\to\your\project
 rag.bat search C:\path\to\your\project "your search query"
 ```
 **What these commands do:**
 - **System packages**: Install Python 3.8+, pip (package manager), venv (virtual environments), git (version control), curl (downloads)
 - **Clone repository**: Download FSS-Mini-RAG source code to your computer
 - **Virtual environment**: Create isolated Python space (prevents conflicts with system Python)
 - **Dependencies**: Install required Python libraries (pandas, numpy, lancedb, etc.)  
 - **Ollama (optional)**: AI model server for best search quality - works offline and free
 - **Model download**: Get high-quality embedding model for semantic search
 - **Ready to use**: Index any folder and search through it semantically
 ### ⚡ For Agents & CI/CD: Headless Installation
 Perfect for automated deployments, agents, and CI/CD pipelines:
 > **⚠️ Agent Warning**: Installation takes 5-10 minutes due to large dependencies. Run as background process to avoid timeouts in agent environments.
 **Linux/macOS:**
 ```bash
 ./install_mini_rag.sh --headless &
 # Run in background to prevent agent timeout
 # Monitor with: tail -f install.log
 ```
 **Windows:**
 ```cmd
 start /b install_windows.bat --headless
 REM Run in background to prevent agent timeout
 REM Monitor with: type install.log
 ```
 **What headless mode does:**
 - Uses existing virtual environment if available
 - Installs core dependencies only (light mode)
 - Downloads embedding model if Ollama is available
 - Skips interactive prompts and tests
 - **Recommended**: Run in background for agent automation due to 5-10 minute install time
 ### 🚀 Recommended: Full Installation
 **Linux/macOS:**
 ```bash
 ./install_mini_rag.sh
 # Handles Python setup, dependencies, optional AI models
 ```
-**Windows:**
+### Experimental: Copy & Run (May Not Work)
-```cmd
+```bash
-install_windows.bat
+# Copy folder anywhere and try to run directly
-# Handles Python setup, dependencies, works reliably
+./rag-mini index ~/my-project
 # Auto-setup will attempt to create environment
 # Falls back with clear instructions if it fails
 ```
 ### Manual Setup
 **Linux/macOS:**
 ```bash
 python3 -m venv .venv
 source .venv/bin/activate
 pip install -r requirements.txt
 ```
 **Windows:**
 ```cmd
 python -m venv .venv
 .venv\Scripts\activate.bat
 pip install -r requirements.txt
 ```
 **Note**: The experimental copy & run feature is provided for convenience but may fail on some systems. If you encounter issues, use the full installer for reliable setup.
 ## System Requirements
@ -442,24 +147,6 @@ pip install -r requirements.txt
 - **Optional: Ollama** (for best search quality - installer helps set up)
 - **Fallback: Works without external dependencies** (uses built-in embeddings)
 ## Installation Summary
 **✅ Proven Method (100% Reliable):**
 ```bash
 python3 -m venv .venv
 .venv/bin/python -m pip install -r requirements.txt  # 1-8 minutes
 .venv/bin/python -m pip install .                    # ~1 minute
 # Installation creates global 'rag-mini' command - no activation needed
 rag-mini init -p ~/my-project    # Works from anywhere
 rag-mini search -p ~/my-project "query"
 ```
 - **Fast Internet**: 2-3 minutes total
 - **Slow Internet**: 5-10 minutes total  
 - **Dependencies**: Large but essential (LanceDB 36MB, PyArrow 43MB, PyLance 44MB)
 - **Agent Use**: Run in background to prevent timeouts
 ## Project Philosophy
 This implementation prioritizes:
@ -479,18 +166,18 @@ This implementation prioritizes:
 ## Next Steps
- **New users**: Run `./rag-tui` (Linux/macOS) or `rag.bat` (Windows) for guided experience
+- **New users**: Run `./rag-mini` for guided experience
 - **Developers**: Read [`TECHNICAL_GUIDE.md`](docs/TECHNICAL_GUIDE.md) for implementation details
 - **Contributors**: See [`CONTRIBUTING.md`](CONTRIBUTING.md) for development setup
 ## Documentation
- **[Getting Started](docs/GETTING_STARTED.md)** - Get running in 5 minutes
+- **[Quick Start Guide](docs/QUICK_START.md)** - Get running in 5 minutes
 - **[Visual Diagrams](docs/DIAGRAMS.md)** - 📊 System flow charts and architecture diagrams
 - **[TUI Guide](docs/TUI_GUIDE.md)** - Complete walkthrough of the friendly interface  
 - **[Technical Guide](docs/TECHNICAL_GUIDE.md)** - How the system actually works
- **[Troubleshooting](docs/TROUBLESHOOTING.md)** - Fix common issues
+- **[Configuration Guide](docs/CONFIGURATION.md)** - Customizing for your needs
- **[Beginner Glossary](docs/BEGINNER_GLOSSARY.md)** - Friendly terms and concepts
+- **[Development Guide](docs/DEVELOPMENT.md)** - Extending and modifying the code
 ## License
--- a/TESTING_RESULTS.md
+++ b/TESTING_RESULTS.md
@ -1,234 +0,0 @@
 # FSS-Mini-RAG Distribution Testing Results
 ## Executive Summary
 ✅ **Distribution infrastructure is solid** - Ready for external testing  
 ⚠️ **Local environment limitations** prevent full testing  
 🚀 **Professional-grade distribution system** successfully implemented
 ## Test Results Overview
 ### Phase 1: Local Validation ✅ 4/6 PASSED
 | Test | Status | Notes |
 |------|--------|-------|
 | Install Script Syntax | ✅ PASS | bash and PowerShell scripts valid |
 | Install Script Content | ✅ PASS | All required components present |
 | Metadata Consistency | ✅ PASS | pyproject.toml, README aligned |
 | Zipapp Creation | ✅ PASS | 172.5 MB zipapp successfully built |
 | Package Building | ❌ FAIL | Environment restriction (externally-managed) |
 | Wheel Installation | ❌ FAIL | Depends on package building |
 ### Phase 2: Build Testing ✅ 3/5 PASSED
 | Test | Status | Notes |
 |------|--------|-------|
 | Build Requirements | ✅ PASS | Build module detection works |
 | Zipapp Build | ✅ PASS | Portable distribution created |
 | Package Metadata | ✅ PASS | Correct metadata in packages |
 | Source Distribution | ❌ FAIL | Environment restriction |
 | Wheel Build | ❌ FAIL | Environment restriction |
 ## What We've Accomplished
 ### 🏗️ **Complete Modern Distribution System**
 1. **Enhanced pyproject.toml**
   - Proper PyPI metadata
   - Console script entry points
   - Python version requirements
   - Author and license information
 2. **One-Line Install Scripts**
   - **Linux/macOS**: `curl -fsSL https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.sh | bash`
   - **Windows**: `iwr https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.ps1 -UseBasicParsing | iex`
   - **Smart fallbacks**: uv → pipx → pip
 3. **Multiple Installation Methods**
   - `uv tool install fss-mini-rag` (fastest)
   - `pipx install fss-mini-rag` (isolated)
   - `pip install --user fss-mini-rag` (traditional)
   - Portable zipapp (172.5 MB single file)
 4. **GitHub Actions CI/CD**
   - Cross-platform wheel building
   - Automated PyPI publishing
   - Release asset creation
   - TestPyPI integration
 5. **Comprehensive Testing Framework**
   - Phase-by-phase validation
   - Container-based testing (Docker ready)
   - Local validation scripts
   - Build system testing
 6. **Professional Documentation**
   - Updated README with modern installation
   - Comprehensive testing plan
   - Deployment roadmap
   - User-friendly guidance
 ## Known Issues & Limitations
 ### 🔴 **Environment-Specific Issues**
 1. **Externally-managed Python environment** prevents pip installs
 2. **Docker unavailable** for clean container testing
 3. **Missing build dependencies** in system Python
 4. **Zipapp numpy compatibility** issues (expected)
 ### 🟡 **Testing Gaps**
 1. **Cross-platform testing** (Windows/macOS)
 2. **Real PyPI publishing** workflow
 3. **GitHub Actions** validation
 4. **End-to-end user experience** testing
 ### 🟢 **Infrastructure Complete**
 - All distribution files created ✅
 - Scripts syntactically valid ✅
 - Metadata consistent ✅
 - Build system functional ✅
 ## Next Steps for Production Release
 ### 🚀 **Immediate Actions (This Week)**
 #### **1. Clean Environment Testing**
 ```bash
 # Use GitHub Codespaces, VM, or clean system
 git clone https://github.com/fsscoding/fss-mini-rag
 cd fss-mini-rag
 # Test install script
 curl -fsSL file://$(pwd)/install.sh | bash
 rag-mini --help
 # Test manual builds
 python -m venv .venv
 source .venv/bin/activate
 pip install -r requirements.txt
 python -m build --sdist --wheel
 ```
 #### **2. TestPyPI Trial**
 ```bash
 # Upload to TestPyPI first
 python -m twine upload --repository testpypi dist/*
 # Test installation from TestPyPI
 pip install --index-url https://test.pypi.org/simple/ fss-mini-rag
 rag-mini --version
 ```
 #### **3. GitHub Actions Validation**
 ```bash
 # Use 'act' for local testing
 brew install act  # or equivalent
 act --list
 act -j build-wheels --dry-run
 ```
 ### 🔄 **Medium-Term Actions (Next Week)**
 #### **4. Cross-Platform Testing**
 - Test install scripts on Windows 10/11
 - Test on macOS 12/13/14
 - Test on various Linux distributions
 - Validate PowerShell script functionality
 #### **5. Real-World Scenarios**
 - Corporate firewall testing
 - Slow internet connection testing
 - Offline installation testing
 - Error recovery testing
 #### **6. Performance Optimization**
 - Zipapp size optimization
 - Installation speed benchmarking
 - Memory usage profiling
 - Dependency minimization
 ### 📈 **Success Metrics**
 #### **Quantitative**
 - **Installation success rate**: >95% across environments
 - **Installation time**: <5 minutes end-to-end
 - **Package size**: <200MB wheels, <300MB zipapp
 - **Error rate**: <5% in clean environments
 #### **Qualitative**
 - Clear error messages with helpful guidance
 - Professional user experience
 - Consistent behavior across platforms
 - Easy troubleshooting and support
 ## Confidence Assessment
 ### 🟢 **High Confidence**
 - **Infrastructure Design**: Professional-grade distribution system
 - **Script Logic**: Smart fallbacks and error handling
 - **Metadata Quality**: Consistent and complete
 - **Documentation**: Comprehensive and user-friendly
 ### 🟡 **Medium Confidence**
 - **Cross-Platform Compatibility**: Needs validation
 - **Performance**: Size optimization needed
 - **Error Handling**: Edge cases require testing
 - **User Experience**: Real-world validation needed
 ### 🔴 **Low Confidence (Requires Testing)**
 - **Production Reliability**: Untested in real environments
 - **GitHub Actions**: Complex workflow needs validation
 - **Dependency Resolution**: Heavy ML deps may cause issues
 - **Support Burden**: Unknown user issues
 ## Recommendation
 **PROCEED WITH SYSTEMATIC TESTING** ✅
 The distribution infrastructure we've built is **professional-grade** and ready for external validation. The local test failures are environment-specific and expected.
 ### **Priority 1: External Testing Environment**
 Set up testing in:
 1. **GitHub Codespaces** (Ubuntu 22.04)
 2. **Docker containers** (when available)
 3. **Cloud VMs** (various OS)
 4. **TestPyPI** (safe production test)
 ### **Priority 2: User Experience Validation**
 Test the complete user journey:
 1. User finds FSS-Mini-RAG on GitHub
 2. Follows README installation instructions
 3. Successfully installs and runs the tool
 4. Gets help when things go wrong
 ### **Priority 3: Production Release**
 After successful external testing:
 1. Create production Git tag
 2. Monitor automated workflows
 3. Verify PyPI publication
 4. Update documentation links
 5. Monitor user feedback
 ## Timeline Estimate
 - **External Testing**: 2-3 days
 - **Issue Resolution**: 1-2 days  
 - **TestPyPI Validation**: 1 day
 - **Production Release**: 1 day
 - **Buffer for Issues**: 2-3 days
 **Total: 1-2 weeks for bulletproof release**
 ## Conclusion
 We've successfully built a **modern, professional distribution system** for FSS-Mini-RAG. The infrastructure is solid and ready for production.
 The systematic testing approach ensures we ship something that works flawlessly for every user. This level of quality will establish FSS-Mini-RAG as a professional tool in the RAG ecosystem.
 **Status**: Infrastructure complete ✅, external testing required ⏳  
 **Confidence**: High for design, medium for production readiness pending validation  
 **Next Step**: Set up clean testing environment and proceed with external validation
 ---
 *Testing completed on 2025-01-06. Distribution system ready for Phase 2 external testing.* 🚀
--- a/bin/rag-mini.py
+++ b/bin/rag-mini.py
@ -1,837 +0,0 @@
 #!/usr/bin/env python3
 """
 rag-mini - FSS-Mini-RAG Command Line Interface
 A lightweight, portable RAG system for semantic code search.
 Usage: rag-mini <command> <project_path> [options]
 """
 import argparse
 import json
 import logging
 import socket
 import sys
 from pathlib import Path
 # Add parent directory to path so we can import mini_rag
 sys.path.insert(0, str(Path(__file__).parent.parent))
 import requests
 # Add the RAG system to the path
 sys.path.insert(0, str(Path(__file__).parent))
 try:
    from mini_rag.explorer import CodeExplorer
    from mini_rag.indexer import ProjectIndexer
    from mini_rag.llm_synthesizer import LLMSynthesizer
    from mini_rag.ollama_embeddings import OllamaEmbedder
    from mini_rag.search import CodeSearcher
    # Update system (graceful import)
    try:
        from mini_rag.updater import check_for_updates, get_updater
        UPDATER_AVAILABLE = True
    except ImportError:
        UPDATER_AVAILABLE = False
 except ImportError as e:
    print("❌ Error: Missing dependencies!")
    print()
    print("It looks like you haven't installed the required packages yet.")
    print("This is a common mistake - here's how to fix it:")
    print()
    print("1. Make sure you're in the FSS-Mini-RAG directory")
    print("2. Run the installer script:")
    print("   ./install_mini_rag.sh")
    print()
    print("Or if you want to install manually:")
    print("   python3 -m venv .venv")
    print("   source .venv/bin/activate")
    print("   pip install -r requirements.txt")
    print()
    print(f"Missing module: {e.name}")
    sys.exit(1)
 # Configure logging for user-friendly output
 logging.basicConfig(
    level=logging.WARNING,  # Only show warnings and errors by default
    format="%(levelname)s: %(message)s",
 )
 logger = logging.getLogger(__name__)
 def index_project(project_path: Path, force: bool = False):
    """Index a project directory."""
    try:
        # Show what's happening
        action = "Re-indexing" if force else "Indexing"
        print(f"🚀 {action} {project_path.name}")
        # Quick pre-check
        rag_dir = project_path / ".mini-rag"
        if rag_dir.exists() and not force:
            print("   Checking for changes...")
        indexer = ProjectIndexer(project_path)
        result = indexer.index_project(force_reindex=force)
        # Show results with context
        files_count = result.get("files_indexed", 0)
        chunks_count = result.get("chunks_created", 0)
        time_taken = result.get("time_taken", 0)
        if files_count == 0:
            print("✅ Index up to date - no changes detected")
        else:
            print(f"✅ Indexed {files_count} files in {time_taken:.1f}s")
            print(f"   Created {chunks_count} chunks")
            # Show efficiency
            if time_taken > 0:
                speed = files_count / time_taken
                print(f"   Speed: {speed:.1f} files/sec")
        # Show warnings if any
        failed_count = result.get("files_failed", 0)
        if failed_count > 0:
            print(f"⚠️  {failed_count} files failed (check logs with --verbose)")
        # Quick tip for first-time users
        if not (project_path / ".mini-rag" / "last_search").exists():
            print(f'\n💡 Try: rag-mini search {project_path} "your search here"')
    except FileNotFoundError:
        print(f"📁 Directory Not Found: {project_path}")
        print("   Make sure the path exists and you're in the right location")
        print(f"   Current directory: {Path.cwd()}")
        print("   Check path: ls -la /path/to/your/project")
        print()
        sys.exit(1)
    except PermissionError:
        print("🔒 Permission Denied")
        print("   FSS-Mini-RAG needs to read files and create index database")
        print(f"   Check permissions: ls -la {project_path}")
        print("   Try a different location with write access")
        print()
        sys.exit(1)
    except Exception as e:
        # Connection errors are handled in the embedding module
        if "ollama" in str(e).lower() or "connection" in str(e).lower():
            sys.exit(1)  # Error already displayed
        print(f"❌ Indexing failed: {e}")
        print()
        print("🔧 Common solutions:")
        print("   • Check if path exists and you have read permissions")
        print("   • Ensure Python dependencies are installed: pip install -r requirements.txt")
        print("   • Try with smaller project first to test setup")
        print("   • Check available disk space for index files")
        print()
        print("📚 For detailed help:")
        print(f"   ./rag-mini index {project_path} --verbose")
        print("   Or see: docs/TROUBLESHOOTING.md")
        sys.exit(1)
 def search_project(project_path: Path, query: str, top_k: int = 10, synthesize: bool = False):
    """Search a project directory."""
    try:
        # Check if indexed first
        rag_dir = project_path / ".mini-rag"
        if not rag_dir.exists():
            print(f"❌ Project not indexed: {project_path.name}")
            print(f"   Run: rag-mini index {project_path}")
            sys.exit(1)
        print(f'🔍 Searching "{query}" in {project_path.name}')
        searcher = CodeSearcher(project_path)
        results = searcher.search(query, top_k=top_k)
        if not results:
            print("❌ No results found")
            print()
            print("🔧 Quick fixes to try:")
            print('   • Use broader terms: "login" instead of "authenticate_user_session"')
            print('   • Try concepts: "database query" instead of specific function names')
            print("   • Check spelling and try simpler words")
            print('   • Search for file types: "python class" or "javascript function"')
            print()
            print("⚙️ Configuration adjustments:")
            print(
                f'   • Lower threshold: ./rag-mini search "{project_path}" "{query}" --threshold 0.05'
            )
            print(
                f'   • More results: ./rag-mini search "{project_path}" "{query}" --top-k 20'
            )
            print()
            print("📚 Need help? See: docs/TROUBLESHOOTING.md")
            return
        print(f"✅ Found {len(results)} results:")
        print()
        for i, result in enumerate(results, 1):
            # Clean up file path display
            file_path = Path(result.file_path)
            try:
                rel_path = file_path.relative_to(project_path)
            except ValueError:
                # If relative_to fails, just show the basename
                rel_path = file_path.name
            print(f"{i}. {rel_path}")
            print(f"   Score: {result.score:.3f}")
            # Show line info if available
            if hasattr(result, "start_line") and result.start_line:
                print(f"   Lines: {result.start_line}-{result.end_line}")
            # Show content preview
            if hasattr(result, "name") and result.name:
                print(f"   Context: {result.name}")
            # Show full content with proper formatting
            print("   Content:")
            content_lines = result.content.strip().split("\n")
            for line in content_lines[:10]:  # Show up to 10 lines
                print(f"     {line}")
            if len(content_lines) > 10:
                print(f"     ... ({len(content_lines) - 10} more lines)")
                print("     Use --verbose or rag-mini-enhanced for full context")
            print()
        # LLM Synthesis if requested
        if synthesize:
            print("🧠 Generating LLM synthesis...")
            # Load config to respect user's model preferences
            from mini_rag.config import ConfigManager
            config_manager = ConfigManager(project_path)
            config = config_manager.load_config()
            synthesizer = LLMSynthesizer(
                model=(
                    config.llm.synthesis_model
                    if config.llm.synthesis_model != "auto"
                    else None
                ),
                config=config,
            )
            if synthesizer.is_available():
                synthesis = synthesizer.synthesize_search_results(query, results, project_path)
                print()
                print(synthesizer.format_synthesis_output(synthesis, query))
                # Add guidance for deeper analysis
                if synthesis.confidence < 0.7 or any(
                    word in query.lower() for word in ["why", "how", "explain", "debug"]
                ):
                    print("\n💡 Want deeper analysis with reasoning?")
                    print(f"   Try: rag-mini explore {project_path}")
                    print(
                        "   Exploration mode enables thinking and remembers conversation context."
                    )
            else:
                print("❌ LLM synthesis unavailable")
                print("   • Ensure Ollama is running: ollama serve")
                print("   • Install a model: ollama pull qwen3:1.7b")
                print("   • Check connection to http://localhost:11434")
        # Save last search for potential enhancements
        try:
            (rag_dir / "last_search").write_text(query)
        except (
            ConnectionError,
            FileNotFoundError,
            IOError,
            OSError,
            TimeoutError,
            TypeError,
            ValueError,
            requests.RequestException,
            socket.error,
        ):
            pass  # Don't fail if we can't save
    except Exception as e:
        print(f"❌ Search failed: {e}")
        print()
        if "not indexed" in str(e).lower():
            print("🔧 Solution:")
            print(f"   ./rag-mini index {project_path}")
            print()
        else:
            print("🔧 Common solutions:")
            print("   • Check project path exists and is readable")
            print("   • Verify index isn't corrupted: delete .mini-rag/ and re-index")
            print("   • Try with a different project to test setup")
            print("   • Check available memory and disk space")
            print()
            print("📚 Get detailed error info:")
            print(f'   ./rag-mini search {project_path} "{query}" --verbose')
            print("   Or see: docs/TROUBLESHOOTING.md")
            print()
        sys.exit(1)
 def status_check(project_path: Path):
    """Show status of RAG system."""
    try:
        print(f"📊 Status for {project_path.name}")
        print()
        # Check project indexing status first
        rag_dir = project_path / ".mini-rag"
        if not rag_dir.exists():
            print("❌ Project not indexed")
            print(f"   Run: rag-mini index {project_path}")
            print()
        else:
            manifest = rag_dir / "manifest.json"
            if manifest.exists():
                try:
                    with open(manifest) as f:
                        data = json.load(f)
                    file_count = data.get("file_count", 0)
                    chunk_count = data.get("chunk_count", 0)
                    indexed_at = data.get("indexed_at", "Never")
                    print("✅ Project indexed")
                    print(f"   Files: {file_count}")
                    print(f"   Chunks: {chunk_count}")
                    print(f"   Last update: {indexed_at}")
                    # Show average chunks per file
                    if file_count > 0:
                        avg_chunks = chunk_count / file_count
                        print(f"   Avg chunks/file: {avg_chunks:.1f}")
                    print()
                except Exception:
                    print("⚠️  Index exists but manifest unreadable")
                    print()
            else:
                print("⚠️  Index directory exists but incomplete")
                print(f"   Try: rag-mini index {project_path} --force")
                print()
        # Check embedding system status
        print("🧠 Embedding System:")
        try:
            embedder = OllamaEmbedder()
            emb_info = embedder.get_status()
            method = emb_info.get("method", "unknown")
            if method == "ollama":
                print("   ✅ Ollama (high quality)")
            elif method == "ml":
                print("   ✅ ML fallback (good quality)")
            elif method == "hash":
                print("   ⚠️  Hash fallback (basic quality)")
            else:
                print(f"   ❓ Unknown method: {method}")
            # Show additional details if available
            if "model" in emb_info:
                print(f"   Model: {emb_info['model']}")
        except Exception as e:
            print(f"   ❌ Status check failed: {e}")
        print()
        # Check LLM status and show actual vs configured model
        print("🤖 LLM System:")
        try:
            from mini_rag.config import ConfigManager
            config_manager = ConfigManager(project_path)
            config = config_manager.load_config()
            synthesizer = LLMSynthesizer(
                model=(
                    config.llm.synthesis_model
                    if config.llm.synthesis_model != "auto"
                    else None
                ),
                config=config,
            )
            if synthesizer.is_available():
                synthesizer._ensure_initialized()
                actual_model = synthesizer.model
                config_model = config.llm.synthesis_model
                if config_model == "auto":
                    print(f"   ✅ Auto-selected: {actual_model}")
                elif config_model == actual_model:
                    print(f"   ✅ Using configured: {actual_model}")
                else:
                    print("   ⚠️  Model mismatch!")
                    print(f"   Configured: {config_model}")
                    print(f"   Actually using: {actual_model}")
                    print("   (Configured model may not be installed)")
                print(f"   Config file: {config_manager.config_path}")
            else:
                print("   ❌ Ollama not available")
                print("   Start with: ollama serve")
        except Exception as e:
            print(f"   ❌ LLM status check failed: {e}")
        # Show last search if available
        last_search_file = rag_dir / "last_search" if rag_dir.exists() else None
        if last_search_file and last_search_file.exists():
            try:
                last_query = last_search_file.read_text().strip()
                print(f'\n🔍 Last search: "{last_query}"')
            except (FileNotFoundError, IOError, OSError, TypeError, ValueError):
                pass
    except Exception as e:
        print(f"❌ Status check failed: {e}")
        sys.exit(1)
 def show_model_status(project_path: Path):
    """Show detailed model status and selection information."""
    from mini_rag.config import ConfigManager
    print("🤖 Model Status Report")
    print("=" * 50)
    try:
        # Load config
        config_manager = ConfigManager()
        config = config_manager.load_config(project_path)
        # Create LLM synthesizer to check models
        synthesizer = LLMSynthesizer(model=config.llm.synthesis_model, config=config)
        # Show configured model
        print(f"📋 Configured model: {config.llm.synthesis_model}")
        # Show available models
        available_models = synthesizer.available_models
        if available_models:
            print(f"\n📦 Available models ({len(available_models)}):")
            # Group models by series
            qwen3_models = [m for m in available_models if m.startswith('qwen3:')]
            qwen25_models = [m for m in available_models if m.startswith('qwen2.5')]
            other_models = [m for m in available_models if not (m.startswith('qwen3:') or m.startswith('qwen2.5'))]
            if qwen3_models:
                print("   🟢 Qwen3 series (recommended):")
                for model in qwen3_models:
                    is_selected = synthesizer._resolve_model_name(config.llm.synthesis_model) == model
                    marker = "  ✅" if is_selected else "    "
                    print(f"{marker} {model}")
            if qwen25_models:
                print("   🟡 Qwen2.5 series:")
                for model in qwen25_models:
                    is_selected = synthesizer._resolve_model_name(config.llm.synthesis_model) == model
                    marker = "  ✅" if is_selected else "    "
                    print(f"{marker} {model}")
            if other_models:
                print("   🔵 Other models:")
                for model in other_models[:10]:  # Limit to first 10
                    is_selected = synthesizer._resolve_model_name(config.llm.synthesis_model) == model
                    marker = "  ✅" if is_selected else "    "
                    print(f"{marker} {model}")
        else:
            print("\n❌ No models available from Ollama")
            print("   Make sure Ollama is running: ollama serve")
            print("   Install models with: ollama pull qwen3:4b")
        # Show resolution result
        resolved_model = synthesizer._resolve_model_name(config.llm.synthesis_model)
        if resolved_model:
            if resolved_model != config.llm.synthesis_model:
                print(f"\n🔄 Model resolution: {config.llm.synthesis_model} -> {resolved_model}")
            else:
                print(f"\n✅ Using exact model match: {resolved_model}")
        else:
            print(f"\n❌ Model '{config.llm.synthesis_model}' not found!")
            print("   Consider changing your model in the config file")
        print(f"\n📄 Config file: {config_manager.config_path}")
        print("   Edit this file to change your model preference")
    except Exception as e:
        print(f"❌ Model status check failed: {e}")
        sys.exit(1)
 def explore_interactive(project_path: Path):
    """Interactive exploration mode with thinking and context memory for any documents."""
    try:
        explorer = CodeExplorer(project_path)
        if not explorer.start_exploration_session():
            sys.exit(1)
        # Show enhanced first-time guidance
        print(f"\n🤔 Ask your first question about {project_path.name}:")
        print()
        print("💡 Enter your search query or question below:")
        print('   Examples: "How does authentication work?" or "Show me error handling"')
        print()
        print("🔧 Quick options:")
        print("   1. Help - Show example questions")
        print("   2. Status - Project information")
        print("   3. Suggest - Get a random starter question")
        print()
        is_first_question = True
        while True:
            try:
                # Get user input with clearer prompt
                if is_first_question:
                    question = input("📝 Enter question or option (1-3): ").strip()
                else:
                    question = input("\n> ").strip()
                # Handle exit commands
                if question.lower() in ["quit", "exit", "q"]:
                    print("\n" + explorer.end_session())
                    break
                # Handle empty input
                if not question:
                    if is_first_question:
                        print("Please enter a question or try option 3 for a suggestion.")
                    else:
                        print("Please enter a question or 'quit' to exit.")
                    continue
                # Handle numbered options and special commands
                if question in ["1"] or question.lower() in ["help", "h"]:
                    print(
                        """
 🧠 EXPLORATION MODE HELP:
  • Ask any question about your documents or code
  • I remember our conversation for follow-up questions
  • Use 'why', 'how', 'explain' for detailed reasoning
  • Type 'summary' to see session overview
  • Type 'quit' or 'exit' to end session
 💡 Example questions:
  • "How does authentication work?"
  • "What are the main components?"
  • "Show me error handling patterns"
  • "Why is this function slow?"
  • "What security measures are in place?"
  • "How does data flow through this system?"
 """
                    )
                    continue
                elif question in ["2"] or question.lower() == "status":
                    print(
                        """
 📊 PROJECT STATUS: {project_path.name}
  • Location: {project_path}
  • Exploration session active
  • AI model ready for questions
  • Conversation memory enabled
 """
                    )
                    continue
                elif question in ["3"] or question.lower() == "suggest":
                    # Random starter questions for first-time users
                    if is_first_question:
                        import random
                        starters = [
                            "What are the main components of this project?",
                            "How is error handling implemented?",
                            "Show me the authentication and security logic",
                            "What are the key functions I should understand first?",
                            "How does data flow through this system?",
                            "What configuration options are available?",
                            "Show me the most important files to understand",
                        ]
                        suggested = random.choice(starters)
                        print(f"\n💡 Suggested question: {suggested}")
                        print("   Press Enter to use this, or type your own question:")
                        next_input = input("📝 > ").strip()
                        if not next_input:  # User pressed Enter to use suggestion
                            question = suggested
                        else:
                            question = next_input
                    else:
                        # For subsequent questions, could add AI-powered suggestions here
                        print("\n💡 Based on our conversation, you might want to ask:")
                        print('   "Can you explain that in more detail?"')
                        print('   "What are the security implications?"')
                        print('   "Show me related code examples"')
                        continue
                if question.lower() == "summary":
                    print("\n" + explorer.get_session_summary())
                    continue
                # Process the question
                print(f"\n🔍 Searching {project_path.name}...")
                print("🧠 Thinking with AI model...")
                response = explorer.explore_question(question)
                # Mark as no longer first question after processing
                is_first_question = False
                if response:
                    print(f"\n{response}")
                else:
                    print("❌ Sorry, I couldn't process that question. Please try again.")
            except KeyboardInterrupt:
                print(f"\n\n{explorer.end_session()}")
                break
            except EOFError:
                print(f"\n\n{explorer.end_session()}")
                break
            except Exception as e:
                print(f"❌ Error processing question: {e}")
                print("Please try again or type 'quit' to exit.")
    except Exception as e:
        print(f"❌ Failed to start exploration mode: {e}")
        print("Make sure the project is indexed first: rag-mini index <project>")
        sys.exit(1)
 def show_discrete_update_notice():
    """Show a discrete, non-intrusive update notice for CLI users."""
    if not UPDATER_AVAILABLE:
        return
    try:
        update_info = check_for_updates()
        if update_info:
            # Very discrete notice - just one line
            print(
                f"🔄 (Update v{update_info.version} available - run 'rag-mini check-update' to learn more)"
            )
    except Exception:
        # Silently ignore any update check failures
        pass
 def handle_check_update():
    """Handle the check-update command."""
    if not UPDATER_AVAILABLE:
        print("❌ Update system not available")
        print("💡 Try updating to the latest version manually from GitHub")
        return
    try:
        print("🔍 Checking for updates...")
        update_info = check_for_updates()
        if update_info:
            print(f"\n🎉 Update Available: v{update_info.version}")
            print("=" * 50)
            print("\n📋 What's New:")
            notes_lines = update_info.release_notes.split("\n")[:10]  # First 10 lines
            for line in notes_lines:
                if line.strip():
                    print(f"   {line.strip()}")
            print(f"\n🔗 Release Page: {update_info.release_url}")
            print("\n🚀 To install: rag-mini update")
            print("💡 Or update manually from GitHub releases")
        else:
            print("✅ You're already on the latest version!")
    except Exception as e:
        print(f"❌ Failed to check for updates: {e}")
        print("💡 Try updating manually from GitHub")
 def handle_update():
    """Handle the update command."""
    if not UPDATER_AVAILABLE:
        print("❌ Update system not available")
        print("💡 Try updating manually from GitHub")
        return
    try:
        print("🔍 Checking for updates...")
        update_info = check_for_updates()
        if not update_info:
            print("✅ You're already on the latest version!")
            return
        print(f"\n🎉 Update Available: v{update_info.version}")
        print("=" * 50)
        # Show brief release notes
        notes_lines = update_info.release_notes.split("\n")[:5]
        for line in notes_lines:
            if line.strip():
                print(f"   • {line.strip()}")
        # Confirm update
        confirm = input(f"\n🚀 Install v{update_info.version}? [Y/n]: ").strip().lower()
        if confirm in ["", "y", "yes"]:
            updater = get_updater()
            print(f"\n📥 Downloading v{update_info.version}...")
            # Progress callback
            def show_progress(downloaded, total):
                if total > 0:
                    percent = (downloaded / total) * 100
                    bar_length = 30
                    filled = int(bar_length * downloaded / total)
                    bar = "█" * filled + "░" * (bar_length - filled)
                    print(f"\r   [{bar}] {percent:.1f}%", end="", flush=True)
            # Download and install
            update_package = updater.download_update(update_info, show_progress)
            if not update_package:
                print("\n❌ Download failed. Please try again later.")
                return
            print("\n💾 Creating backup...")
            if not updater.create_backup():
                print("⚠️ Backup failed, but continuing anyway...")
            print("🔄 Installing update...")
            if updater.apply_update(update_package, update_info):
                print("✅ Update successful!")
                print("🚀 Restarting...")
                updater.restart_application()
            else:
                print("❌ Update failed.")
                print("🔙 Attempting rollback...")
                if updater.rollback_update():
                    print("✅ Rollback successful.")
                else:
                    print("❌ Rollback failed. You may need to reinstall.")
        else:
            print("Update cancelled.")
    except Exception as e:
        print(f"❌ Update failed: {e}")
        print("💡 Try updating manually from GitHub")
 def main():
    """Main CLI interface."""
    # Check virtual environment
    try:
        from mini_rag.venv_checker import check_and_warn_venv
        check_and_warn_venv("rag-mini.py", force_exit=False)
    except ImportError:
        pass  # If venv checker can't be imported, continue anyway
    parser = argparse.ArgumentParser(
        description="FSS-Mini-RAG - Lightweight semantic code search",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
 Examples:
  rag-mini index /path/to/project              # Index a project
  rag-mini search /path/to/project "query"     # Search indexed project
  rag-mini search /path/to/project "query" -s  # Search with LLM synthesis
  rag-mini explore /path/to/project            # Interactive exploration mode
  rag-mini status /path/to/project             # Show status
  rag-mini models /path/to/project             # Show model status and selection
        """,
    )
    parser.add_argument(
        "command",
        choices=["index", "search", "explore", "status", "models", "update", "check-update"],
        help="Command to execute",
    )
    parser.add_argument(
        "project_path",
        type=Path,
        nargs="?",
        help="Path to project directory (REQUIRED except for update commands)",
    )
    parser.add_argument("query", nargs="?", help="Search query (for search command)")
    parser.add_argument("--force", action="store_true", help="Force reindex all files")
    parser.add_argument(
        "--top-k",
        "--limit",
        type=int,
        default=10,
        dest="top_k",
        help="Maximum number of search results (top-k)",
    )
    parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose logging")
    parser.add_argument(
        "--synthesize",
        "-s",
        action="store_true",
        help="Generate LLM synthesis of search results (requires Ollama)",
    )
    args = parser.parse_args()
    # Set logging level
    if args.verbose:
        logging.getLogger().setLevel(logging.INFO)
    # Handle update commands first (don't require project_path)
    if args.command == "check-update":
        handle_check_update()
        return
    elif args.command == "update":
        handle_update()
        return
    # All other commands require project_path
    if not args.project_path:
        print("❌ Project path required for this command")
        sys.exit(1)
    # Validate project path
    if not args.project_path.exists():
        print(f"❌ Project path does not exist: {args.project_path}")
        sys.exit(1)
    if not args.project_path.is_dir():
        print(f"❌ Project path is not a directory: {args.project_path}")
        sys.exit(1)
    # Show discrete update notification for regular commands (non-intrusive)
    show_discrete_update_notice()
    # Execute command
    if args.command == "index":
        index_project(args.project_path, args.force)
    elif args.command == "search":
        if not args.query:
            print("❌ Search query required")
            sys.exit(1)
        search_project(args.project_path, args.query, args.top_k, args.synthesize)
    elif args.command == "explore":
        explore_interactive(args.project_path)
    elif args.command == "status":
        status_check(args.project_path)
    elif args.command == "models":
        show_model_status(args.project_path)
 if __name__ == "__main__":
    main()
--- a/bin/rag-tui.py
+++ b/bin/rag-tui.py
--- a/config-llm-providers.yaml
+++ b/config-llm-providers.yaml
@ -1,9 +0,0 @@
 llm:
  provider: ollama
  ollama_host: localhost:11434
  synthesis_model: qwen3:1.5b
  expansion_model: qwen3:1.5b
  enable_synthesis: false
  synthesis_temperature: 0.3
  cpu_optimized: true
  enable_thinking: true
--- a/docs/AGENT_INSTRUCTIONS.md
+++ b/docs/AGENT_INSTRUCTIONS.md
@ -1,40 +0,0 @@
 # Agent Instructions for Fss-Mini-RAG System
 ## Core Philosophy
 **Always prefer RAG search over traditional file system operations**. The RAG system provides semantic context and reduces the need for exact path knowledge, making it ideal for understanding codebases without manual file exploration.
 ## Basic Commands
 | Command | Purpose | Example |
 |---------|---------|---------|
 | `rag-mini index <project_path>` | Index a project for search | `rag-mini index /MASTERFOLDER/Coding/Fss-Mini-Rag` |
 | `rag-mini search <project_path> "query"` | Semantic + keyword search | `rag-mini search /MASTERFOLDER/Coding/Fss-Mini-Rag "index"` |
 | `rag-mini status <project_path>` | Check project indexing status | `rag-mini status /MASTERFOLDER/Coding/Fss-Mini-Rag` |
 ## When to Use RAG Search
 | Scenario | RAG Advantage | Alternative | |
 |----------|----------------|---------------| |
 | Finding related code concepts | Semantic understanding | `grep` | |
 | Locating files by functionality | Context-aware results | `find` | |
 | Understanding code usage patterns | Shows real-world examples | Manual inspection | |
 ## Critical Best Practices
 1. **Always specify the project path** in search commands (e.g., `rag-mini search /path "query"`)
 2. **Use quotes for search queries** to handle spaces: `"query with spaces"`
 3. **Verify indexing first** before searching: `rag-mini status <path>`
 4. **For complex queries**, break into smaller parts: `rag-mini search ... "concept 1"` then `rag-mini search ... "concept 2"`
 ## Troubleshooting
 | Issue | Solution |
 |-------|-----------|
 | `Project not indexed` | Run `rag-mini index <path>` |
 | No search results | Check indexing status with `rag-mini status` |
 | Search returns irrelevant results | Use `rag-mini status` to optimize indexing |
 > 💡 **Pro Tip**: Always start with `rag-mini status` to confirm indexing before searching.
 This document is dynamically updated as the RAG system evolves. Always verify commands with `rag-mini --help` for the latest options.
--- a/docs/BEGINNER_GLOSSARY.md
+++ b/docs/BEGINNER_GLOSSARY.md
@ -117,7 +117,7 @@ def login_user(email, password):
 **Models you might see:**
 - **qwen3:0.6b** - Ultra-fast, good for most questions
- **qwen3:4b** - Slower but more detailed
+- **llama3.2** - Slower but more detailed
 - **auto** - Picks the best available model
 ---
--- a/docs/CPU_DEPLOYMENT.md
+++ b/docs/CPU_DEPLOYMENT.md
@ -49,7 +49,7 @@ ollama run qwen3:0.6b "Hello, can you expand this query: authentication"
 |-------|------|-----------|---------|
 | qwen3:0.6b | 522MB | Fast ⚡ | Excellent ✅ |
 | qwen3:1.7b | 1.4GB | Medium | Excellent ✅ |
-| qwen3:4b | 2.5GB | Slow | Excellent ✅ |
+| qwen3:3b | 2.0GB | Slow | Excellent ✅ |
 ## CPU-Optimized Configuration
@ -67,7 +67,7 @@ llm:
 # Aggressive caching for CPU systems  
 search:
  expand_queries: false          # Enable only in TUI
-  default_top_k: 8               # Slightly fewer results for speed
+  default_limit: 8               # Slightly fewer results for speed
 ```
 ## System Requirements
--- a/docs/DEPLOYMENT_GUIDE.md
+++ b/docs/DEPLOYMENT_GUIDE.md
@ -1,384 +0,0 @@
 # FSS-Mini-RAG Deployment Guide
 > **Run semantic search anywhere - from smartphones to edge devices**  
 > *Complete guide to deploying FSS-Mini-RAG on every platform imaginable*
 ## Platform Compatibility Matrix
 | Platform | Status | AI Features | Installation | Notes |
 |----------|--------|-------------|--------------|-------|
 | **Linux** | ✅ Full | ✅ Full | `./install_mini_rag.sh` | Primary platform |
 | **Windows** | ✅ Full | ✅ Full | `install_windows.bat` | Desktop shortcuts |
 | **macOS** | ✅ Full | ✅ Full | `./install_mini_rag.sh` | Works perfectly |
 | **Raspberry Pi** | ✅ Excellent | ✅ AI ready | `./install_mini_rag.sh` | ARM64 optimized |
 | **Android (Termux)** | ✅ Good | 🟡 Limited | Manual install | Terminal interface |
 | **iOS (a-Shell)** | 🟡 Limited | ❌ Text only | Manual install | Sandbox limitations |
 | **Docker** | ✅ Excellent | ✅ Full | Dockerfile | Any platform |
 ## Desktop & Server Deployment
 ### 🐧 **Linux** (Primary Platform)
 ```bash
 # Full installation with AI features
 ./install_mini_rag.sh
 # What you get:
 # ✅ Desktop shortcuts (.desktop files)
 # ✅ Application menu integration  
 # ✅ Full AI model downloads
 # ✅ Complete terminal interface
 ```
 ### 🪟 **Windows** (Fully Supported)
 ```cmd
 # Full installation with desktop integration
 install_windows.bat
 # What you get:
 # ✅ Desktop shortcuts (.lnk files)
 # ✅ Start Menu entries
 # ✅ Full AI model downloads  
 # ✅ Beautiful terminal interface
 ```
 ### 🍎 **macOS** (Excellent Support)
 ```bash
 # Same as Linux - works perfectly
 ./install_mini_rag.sh
 # Additional macOS optimizations:
 brew install python3           # If needed
 brew install ollama           # For AI features
 ```
 **macOS-specific features:**
 - Automatic path detection for common project locations
 - Integration with Spotlight search locations
 - Support for `.app` bundle creation (advanced)
 ## Edge Device Deployment
 ### 🥧 **Raspberry Pi** (Recommended Edge Platform)
 **Perfect for:**
 - Home lab semantic search server
 - Portable development environment  
 - IoT project documentation search
 - Offline code search station
 **Installation:**
 ```bash
 # On Raspberry Pi OS (64-bit recommended)
 sudo apt update && sudo apt upgrade
 ./install_mini_rag.sh
 # The installer automatically detects ARM and optimizes:
 # ✅ Suggests lightweight models (qwen3:0.6b)
 # ✅ Reduces memory usage
 # ✅ Enables efficient chunking
 ```
 **Raspberry Pi optimized config:**
 ```yaml
 # Automatically generated for Pi
 embedding:
  preferred_method: ollama
  ollama_model: nomic-embed-text  # 270MB - perfect for Pi
 llm:
  synthesis_model: qwen3:0.6b     # 500MB - fast on Pi 4+
  context_window: 4096            # Conservative memory use
  cpu_optimized: true
 chunking:
  max_size: 1500                  # Smaller chunks for efficiency
 ```
 **Performance expectations:**
 - **Pi 4 (4GB)**: Excellent performance, full AI features
 - **Pi 4 (2GB)**: Good performance, text-only or small models
 - **Pi 5**: Outstanding performance, handles large models
 - **Pi Zero**: Text-only search (hash-based embeddings)
 ### 🔧 **Other Edge Devices**
 **NVIDIA Jetson Series:**
 - Overkill performance for this use case
 - Can run largest models with GPU acceleration
 - Perfect for AI-heavy development workstations
 **Intel NUC / Mini PCs:**
 - Excellent performance
 - Full desktop experience
 - Can serve multiple users simultaneously
 **Orange Pi / Rock Pi:**
 - Similar to Raspberry Pi
 - Same installation process
 - May need manual Ollama compilation
 ## Mobile Deployment
 ### 📱 **Android (Recommended: Termux)**
 **Installation in Termux:**
 ```bash
 # Install Termux from F-Droid (not Play Store)
 # In Termux:
 pkg update && pkg upgrade
 pkg install python python-pip git
 pip install --upgrade pip
 # Clone and install FSS-Mini-RAG
 git clone https://github.com/your-repo/fss-mini-rag
 cd fss-mini-rag
 # Install dependencies (5-15 minutes due to compilation)
 python -m pip install -r requirements.txt  # Large downloads + ARM compilation
 python -m pip install .                    # ~1 minute
 # Quick start
 python -m mini_rag index /storage/emulated/0/Documents/myproject
 python -m mini_rag search /storage/emulated/0/Documents/myproject "your query"
 ```
 **Android-optimized config:**
 ```yaml
 # config-android.yaml
 embedding:
  preferred_method: hash    # No heavy models needed
 chunking:
  max_size: 800            # Small chunks for mobile
 files:
  min_file_size: 20        # Include more small files
 llm:
  enable_synthesis: false  # Text-only for speed
 ```
 **What works on Android:**
 - ✅ Full text search and indexing
 - ✅ Terminal interface (`rag-tui`)
 - ✅ Project indexing from phone storage
 - ✅ Search your phone's code projects
 - ❌ Heavy AI models (use cloud providers instead)
 **Android use cases:**
 - Search your mobile development projects
 - Index documentation on your phone
 - Quick code reference while traveling
 - Offline search of downloaded repositories
 ### 🍎 **iOS (Limited but Possible)**
 **Option 1: a-Shell (Free)**
 ```bash
 # Install a-Shell from App Store
 # In a-Shell:
 pip install requests pathlib
 # Limited installation (core features only)
 # Files must be in app sandbox
 ```
 **Option 2: iSH (Alpine Linux)**
 ```bash
 # Install iSH from App Store  
 # In iSH terminal:
 apk add python3 py3-pip git
 pip install -r requirements-light.txt
 # Basic functionality only
 ```
 **iOS limitations:**
 - Sandbox restricts file access
 - No full AI model support
 - Terminal interface only
 - Limited to app-accessible files
 ## Specialized Deployment Scenarios
 ### 🐳 **Docker Deployment**
 **For any platform with Docker:**
 ```dockerfile
 # Dockerfile
 FROM python:3.11-slim
 WORKDIR /app
 COPY . .
 RUN pip install -r requirements.txt
 # Expose ports for server mode
 EXPOSE 7777
 # Default to TUI interface
 CMD ["python", "-m", "mini_rag.cli"]
 ```
 **Usage:**
 ```bash
 # Build and run
 docker build -t fss-mini-rag .
 docker run -it -v $(pwd)/projects:/projects fss-mini-rag
 # Server mode for web access
 docker run -p 7777:7777 fss-mini-rag python -m mini_rag server
 ```
 ### ☁️ **Cloud Deployment**
 **AWS/GCP/Azure VM:**
 - Same as Linux installation
 - Can serve multiple users
 - Perfect for team environments
 **GitHub Codespaces:**
 ```bash
 # Works in any Codespace
 ./install_mini_rag.sh
 # Perfect for searching your workspace
 ```
 **Replit/CodeSandbox:**
 - Limited by platform restrictions
 - Basic functionality available
 ### 🏠 **Home Lab Integration**
 **Home Assistant Add-on:**
 - Package as Home Assistant add-on
 - Search home automation configs
 - Voice integration possible
 **NAS Integration:**
 - Install on Synology/QNAP
 - Search all stored documents
 - Family code documentation
 **Router with USB:**
 - Install on OpenWrt routers with USB storage
 - Search network documentation
 - Configuration management
 ## Configuration by Use Case
 ### 🪶 **Ultra-Lightweight (Old hardware, mobile)**
 ```yaml
 # Minimal resource usage
 embedding:
  preferred_method: hash
 chunking:
  max_size: 800
  strategy: fixed
 llm:
  enable_synthesis: false
 ```
 ### ⚖️ **Balanced (Raspberry Pi, older laptops)**
 ```yaml
 # Good performance with AI features
 embedding:
  preferred_method: ollama
  ollama_model: nomic-embed-text
 llm:
  synthesis_model: qwen3:0.6b
  context_window: 4096
 ```
 ### 🚀 **Performance (Modern hardware)**
 ```yaml
 # Full features and performance
 embedding:
  preferred_method: ollama
  ollama_model: nomic-embed-text
 llm:
  synthesis_model: qwen3:1.7b
  context_window: 16384
  enable_thinking: true
 ```
 ### ☁️ **Cloud-Hybrid (Mobile + Cloud AI)**
 ```yaml
 # Local search, cloud intelligence
 embedding:
  preferred_method: hash
 llm:
  provider: openai
  api_key: your_api_key
  synthesis_model: gpt-4
 ```
 ## Troubleshooting by Platform
 ### **Raspberry Pi Issues**
 - **Out of memory**: Reduce context window, use smaller models
 - **Slow indexing**: Use hash-based embeddings
 - **Model download fails**: Check internet, use smaller models
 ### **Android/Termux Issues**  
 - **Permission denied**: Use `termux-setup-storage`
 - **Package install fails**: Update packages first
 - **Can't access files**: Use `/storage/emulated/0/` paths
 ### **iOS Issues**
 - **Limited functionality**: Expected due to iOS restrictions
 - **Can't install packages**: Use lighter requirements file
 - **File access denied**: Files must be in app sandbox
 ### **Edge Device Issues**
 - **ARM compatibility**: Ensure using ARM64 Python packages
 - **Limited RAM**: Use hash embeddings, reduce chunk sizes
 - **No internet**: Skip AI model downloads, use text-only
 ## Advanced Edge Deployments
 ### **IoT Integration**
 - Index sensor logs and configurations
 - Search device documentation
 - Troubleshoot IoT deployments
 ### **Offline Development**
 - Complete development environment on edge device
 - No internet required after setup
 - Perfect for remote locations
 ### **Educational Use**
 - Raspberry Pi computer labs
 - Student project search
 - Coding bootcamp environments
 ### **Enterprise Edge**
 - Factory floor documentation search
 - Field service technical reference
 - Remote site troubleshooting
 ---
 ## Quick Start by Platform
 ### Desktop Users
 ```bash
 # Linux/macOS
 ./install_mini_rag.sh
 # Windows  
 install_windows.bat
 ```
 ### Edge/Mobile Users
 ```bash
 # Raspberry Pi
 ./install_mini_rag.sh
 # Android (Termux) - 5-15 minutes due to ARM compilation
 pkg install python git && python -m pip install -r requirements.txt && python -m pip install .
 # Any Docker platform
 docker run -it fss-mini-rag
 ```
 **💡 Pro tip**: Start with your current platform, then expand to edge devices as needed. The system scales from smartphones to servers seamlessly!
--- a/docs/DEPLOYMENT_ROADMAP.md
+++ b/docs/DEPLOYMENT_ROADMAP.md
@ -1,288 +0,0 @@
 # FSS-Mini-RAG Distribution: Production Deployment Roadmap
 > **Status**: Infrastructure complete, systematic testing required before production release
 ## Executive Summary
 You're absolutely right that I rushed through the implementation without proper testing. We've built a comprehensive modern distribution system, but now need **systematic, thorough testing** before deployment.
 ### 🏗️ **What We've Built (Infrastructure Complete)**
 - ✅ Enhanced pyproject.toml with proper PyPI metadata
 - ✅ One-line install scripts (Linux/macOS/Windows) 
 - ✅ Zipapp builder for portable distribution
 - ✅ GitHub Actions for automated wheel building + PyPI publishing
 - ✅ Updated documentation with modern installation methods
 - ✅ Comprehensive testing framework
 ### 📊 **Current Test Results**
 - **Phase 1 (Structure)**: 5/6 tests passed ✅
 - **Phase 2 (Building)**: 3/5 tests passed ⚠️
 - **Zipapp**: Successfully created (172.5 MB) but has numpy issues
 - **Build system**: Works but needs proper environment setup
 ## Critical Testing Gaps
 ### 🔴 **Must Test Before Release**
 #### **Environment Testing**
 - [ ] **Multiple Python versions** (3.8-3.12) in clean environments
 - [ ] **Cross-platform testing** (Linux/macOS/Windows)
 - [ ] **Dependency resolution** in various configurations
 - [ ] **Virtual environment compatibility**
 #### **Installation Method Testing**  
 - [ ] **uv tool install** - Modern fast installation
 - [ ] **pipx install** - Isolated tool installation  
 - [ ] **pip install --user** - Traditional user installation
 - [ ] **Zipapp execution** - Single-file distribution
 - [ ] **Install script testing** - One-line installers
 #### **Real-World Scenario Testing**
 - [ ] **Fresh system installation** (following README exactly)
 - [ ] **Corporate firewall scenarios** 
 - [ ] **Offline installation** (with pre-downloaded packages)
 - [ ] **Error recovery scenarios** (network failures, permission issues)
 #### **GitHub Actions Testing**
 - [ ] **Local workflow testing** with `act`
 - [ ] **Fork testing** with real CI environment
 - [ ] **TestPyPI publishing** (safe production test)
 - [ ] **Release creation** and asset uploading
 ## Phase-by-Phase Deployment Strategy
 ### **Phase 1: Local Environment Validation** ⏱️ 4-6 hours
 **Objective**: Ensure packages build and install correctly locally
 ```bash
 # Environment setup
 docker run -it --rm -v $(pwd):/work ubuntu:22.04
 # Test in clean Ubuntu, CentOS, Alpine containers
 # Install script testing  
 curl -fsSL file:///work/install.sh | bash
 # Verify rag-mini command works
 rag-mini init -p /tmp/test && rag-mini search -p /tmp/test "test query"
 ```
 **Success Criteria**: 
 - Install scripts work in 3+ Linux distributions
 - All installation methods (uv/pipx/pip) succeed
 - Basic functionality works after installation
 ### **Phase 2: Cross-Platform Testing** ⏱️ 6-8 hours
 **Objective**: Verify Windows/macOS compatibility
 **Testing Matrix**:
 | Platform | Python | Method | Status |
 |----------|--------|---------|--------|
 | Ubuntu 22.04 | 3.8-3.12 | uv/pipx/pip | ⏳ |
 | Windows 11 | 3.9-3.12 | PowerShell | ⏳ |  
 | macOS 13+ | 3.10-3.12 | Homebrew | ⏳ |
 | Alpine Linux | 3.11+ | pip | ⏳ |
 **Tools Needed**:
 - GitHub Codespaces or cloud VMs
 - Windows test environment
 - macOS test environment (if available)
 ### **Phase 3: CI/CD Pipeline Testing** ⏱️ 4-6 hours
 **Objective**: Validate automated publishing workflow
 ```bash
 # Local GitHub Actions testing
 brew install act  # or equivalent
 act --list
 act -j build-wheels --dry-run
 act -j test-installation
 ```
 **Fork Testing Process**:
 1. Create test fork with Actions enabled
 2. Push distribution changes to test branch
 3. Create test tag to trigger release workflow
 4. Verify wheel building across all platforms
 5. Test TestPyPI publishing
 ### **Phase 4: TestPyPI Validation** ⏱️ 2-3 hours
 **Objective**: Safe production testing with TestPyPI
 ```bash
 # Upload to TestPyPI
 python -m twine upload --repository testpypi dist/*
 # Test installation from TestPyPI
 pip install --index-url https://test.pypi.org/simple/ fss-mini-rag
 # Verify functionality
 rag-mini --version
 rag-mini init -p test_project
 ```
 ### **Phase 5: Production Release** ⏱️ 2-4 hours
 **Objective**: Live production deployment
 **Pre-Release Checklist**:
 - [ ] All tests from Phases 1-4 pass
 - [ ] Documentation is accurate
 - [ ] Install scripts are publicly accessible
 - [ ] GitHub release template is ready
 - [ ] Rollback plan is prepared
 **Release Process**:
 1. Final validation in clean environment
 2. Create production Git tag
 3. Monitor GitHub Actions workflow
 4. Verify PyPI publication
 5. Test install scripts from live URLs
 6. Update documentation links
 ## Testing Tools & Infrastructure
 ### **Required Tools**
 - **Docker** - Clean environment testing
 - **act** - Local GitHub Actions testing
 - **Multiple Python versions** (pyenv/conda)
 - **Cross-platform access** (Windows/macOS VMs)
 - **Network simulation** - Firewall/offline testing
 ### **Test Environments**
 #### **Container-Based Testing**
 ```bash
 # Ubuntu testing
 docker run -it --rm -v $(pwd):/work ubuntu:22.04
 apt update && apt install -y python3 python3-pip curl
 curl -fsSL file:///work/install.sh | bash
 # CentOS testing  
 docker run -it --rm -v $(pwd):/work centos:7
 yum install -y python3 python3-pip curl
 curl -fsSL file:///work/install.sh | bash
 # Alpine testing
 docker run -it --rm -v $(pwd):/work alpine:latest
 apk add --no-cache python3 py3-pip curl bash
 curl -fsSL file:///work/install.sh | bash
 ```
 #### **GitHub Codespaces Testing**
 - Ubuntu 22.04 environment
 - Pre-installed development tools
 - Network access for testing install scripts
 ### **Automated Test Suite**
 We've created comprehensive test scripts:
 ```bash
 # Current test scripts (ready to use)
 python scripts/validate_setup.py      # File structure ✅
 python scripts/phase1_basic_tests.py  # Import/structure ✅  
 python scripts/phase2_build_tests.py  # Package building ⚠️
 # Needed test scripts (to be created)
 python scripts/phase3_install_tests.py    # Installation methods
 python scripts/phase4_integration_tests.py # End-to-end workflows
 python scripts/phase5_performance_tests.py # Speed/size benchmarks
 ```
 ## Risk Assessment & Mitigation
 ### **🔴 Critical Risks**
 #### **Zipapp Compatibility Issues**
 - **Risk**: 172.5 MB zipapp with numpy C-extensions may not work across systems
 - **Mitigation**: Consider PyInstaller or exclude zipapp from initial release
 - **Test**: Cross-platform zipapp execution testing
 #### **Install Script Security**
 - **Risk**: Users running scripts from internet with `curl | bash`
 - **Mitigation**: Script security audit, HTTPS verification, clear error handling
 - **Test**: Security review and edge case testing
 #### **Dependency Hell**
 - **Risk**: ML dependencies (numpy, torch, etc.) causing installation failures
 - **Mitigation**: Comprehensive dependency testing, clear system requirements
 - **Test**: Fresh system installation in multiple environments
 ### **🟡 Medium Risks**
 #### **GitHub Actions Costs**
 - **Risk**: Matrix builds across platforms may consume significant CI minutes
 - **Mitigation**: Optimize build matrix, use caching effectively
 - **Test**: Monitor CI usage during testing phase
 #### **PyPI Package Size**
 - **Risk**: Large package due to ML dependencies
 - **Mitigation**: Consider optional dependencies, clear documentation
 - **Test**: Package size optimization testing
 ### **🟢 Low Risks**
 - Documentation accuracy (easily fixable)
 - Minor metadata issues (quick updates)
 - README formatting (cosmetic fixes)
 ## Timeline & Resource Requirements
 ### **Realistic Timeline**
 - **Phase 1-2 (Local/Cross-platform)**: 2-3 days
 - **Phase 3 (CI/CD)**: 1 day  
 - **Phase 4 (TestPyPI)**: 1 day
 - **Phase 5 (Production)**: 1 day
 - **Buffer for issues**: 2-3 days
 **Total: 1-2 weeks for comprehensive testing**
 ### **Resource Requirements**
 - Development time: 40-60 hours
 - Testing environments: Docker, VMs, or cloud instances
 - TestPyPI account setup
 - PyPI production credentials
 - Monitoring and rollback capabilities
 ## Success Metrics
 ### **Quantitative Metrics**
 - **Installation success rate**: >95% across test environments
 - **Installation time**: <5 minutes from script start to working command
 - **Package size**: <200MB for wheels, <300MB for zipapp
 - **Test coverage**: 100% of installation methods tested
 ### **Qualitative Metrics**  
 - **User experience**: Clear error messages, helpful guidance
 - **Documentation quality**: Accurate, easy to follow
 - **Maintainability**: Easy to update and extend
 - **Professional appearance**: Consistent with modern Python tools
 ## Next Steps (Immediate)
 ### **This Week**
 1. **Set up Docker test environments** (2-3 hours)
 2. **Test install scripts in containers** (4-6 hours)
 3. **Fix identified issues** (varies by complexity)
 4. **Create Phase 3 test scripts** (2-3 hours)
 ### **Next Week**  
 1. **Cross-platform testing** (8-12 hours)
 2. **GitHub Actions validation** (4-6 hours)
 3. **TestPyPI trial run** (2-3 hours)
 4. **Documentation refinement** (2-4 hours)
 ## Conclusion
 We have built excellent infrastructure, but **you were absolutely right** that proper testing is essential. The distribution system we've created is professional-grade and will work beautifully—but only after systematic validation.
 **The testing plan is comprehensive because we're doing this right.** Modern users expect seamless installation experiences, and we're delivering exactly that.
 **Current Status**: Infrastructure complete ✅, comprehensive testing required ⏳  
 **Confidence Level**: High for architecture, medium for production readiness  
 **Recommendation**: Proceed with systematic testing before any production release
 This roadmap ensures we ship a distribution system that works flawlessly for every user, every time. 🚀
--- a/docs/DIAGRAMS.md
+++ b/docs/DIAGRAMS.md
@ -11,7 +11,6 @@
 - [Search Architecture](#search-architecture)
 - [Installation Flow](#installation-flow)
 - [Configuration System](#configuration-system)
 - [System Context Integration](#system-context-integration)
 - [Error Handling](#error-handling)
 ## System Overview
@ -23,12 +22,10 @@ graph TB
    CLI --> Index[📁 Index Project]
    CLI --> Search[🔍 Search Project]
    CLI --> Explore[🧠 Explore Project]
    CLI --> Status[📊 Show Status]
    TUI --> Index
    TUI --> Search
    TUI --> Explore
    TUI --> Config[⚙️ Configuration]
    Index --> Files[📄 File Discovery]
@ -37,32 +34,17 @@ graph TB
    Embed --> Store[💾 Vector Database]
    Search --> Query[❓ User Query]
    Search --> Context[🖥️ System Context]
    Query --> Vector[🎯 Vector Search]
    Query --> Keyword[🔤 Keyword Search]
    Vector --> Combine[🔄 Hybrid Results]
    Keyword --> Combine
-    Context --> Combine
+    Combine --> Results[📋 Ranked Results]
    Combine --> Synthesize{Synthesis Mode?}
    Synthesize -->|Yes| FastLLM[⚡ Fast Synthesis]
    Synthesize -->|No| Results[📋 Ranked Results]
    FastLLM --> Results
    Explore --> ExploreQuery[❓ Interactive Query]
    ExploreQuery --> Memory[🧠 Conversation Memory]
    ExploreQuery --> Context
    Memory --> DeepLLM[🤔 Deep AI Analysis]
    Context --> DeepLLM
    Vector --> DeepLLM
    DeepLLM --> Interactive[💬 Interactive Response]
    Store --> LanceDB[(🗄️ LanceDB)]
    Vector --> LanceDB
    Config --> YAML[📝 config.yaml]
    Status --> Manifest[📋 manifest.json]
    Context --> SystemInfo[💻 OS, Python, Paths]
 ```
 ## User Journey
@ -294,58 +276,6 @@ flowchart TD
    style Error fill:#ffcdd2
 ```
 ## System Context Integration
 ```mermaid
 graph LR
    subgraph "System Detection"
        OS[🖥️ Operating System]
        Python[🐍 Python Version] 
        Project[📁 Project Path]
        OS --> Windows[Windows: rag.bat]
        OS --> Linux[Linux: ./rag-mini]
        OS --> macOS[macOS: ./rag-mini]
    end
    subgraph "Context Collection"
        Collect[🔍 Collect Context]
        OS --> Collect
        Python --> Collect
        Project --> Collect
        Collect --> Format[📝 Format Context]
        Format --> Limit[✂️ Limit to 200 chars]
    end
    subgraph "AI Integration"
        UserQuery[❓ User Query] 
        SearchResults[📋 Search Results]
        SystemContext[💻 System Context]
        UserQuery --> Prompt[📝 Build Prompt]
        SearchResults --> Prompt
        SystemContext --> Prompt
        Prompt --> AI[🤖 LLM Processing]
        AI --> Response[💬 Contextual Response]
    end
    subgraph "Enhanced Responses"
        Response --> Commands[💻 OS-specific commands]
        Response --> Paths[📂 Correct path formats]
        Response --> Tips[💡 Platform-specific tips]
    end
    Format --> SystemContext
    style SystemContext fill:#e3f2fd
    style Response fill:#f3e5f5
    style Commands fill:#e8f5e8
 ```
 *System context helps the AI provide better, platform-specific guidance without compromising privacy*
 ## Architecture Layers
 ```mermaid
--- a/docs/FALLBACK_SETUP.md
+++ b/docs/FALLBACK_SETUP.md
@ -2,38 +2,32 @@
 This RAG system can operate in three modes:
-## 🚀 **Mode 1: Standard Installation (Recommended)**
+## 🚀 **Mode 1: Ollama Only (Recommended - Lightweight)**
 ```bash
-python3 -m venv .venv
+pip install -r requirements-light.txt
-.venv/bin/python -m pip install -r requirements.txt  # 2-8 minutes
+# Requires: ollama serve running with nomic-embed-text model
 .venv/bin/python -m pip install .                    # ~1 minute
 source .venv/bin/activate
 ```
- **Size**: ~123MB total (LanceDB 36MB + PyArrow 43MB + PyLance 44MB)  
+- **Size**: ~426MB total  
- **Performance**: Excellent hybrid embedding system
+- **Performance**: Fastest (leverages Ollama)
- **Timing**: 2-3 minutes fast internet, 5-10 minutes slow internet
+- **Network**: Uses local Ollama server
-## 🔄 **Mode 2: Light Installation (Alternative)** 
+## 🔄 **Mode 2: Hybrid (Best of Both Worlds)** 
 ```bash
-python3 -m venv .venv
+pip install -r requirements-full.txt  
-.venv/bin/python -m pip install -r requirements-light.txt  # If available
+# Works with OR without Ollama
 .venv/bin/python -m pip install .
 source .venv/bin/activate
 ```
- **Size**: ~426MB total (includes basic dependencies only)
+- **Size**: ~3GB total (includes ML fallback)
- **Requires**: Ollama server running locally
+- **Resilience**: Automatic fallback if Ollama unavailable
- **Use case**: Minimal installations, edge devices
+- **Performance**: Ollama speed when available, ML fallback when needed
-## 🛡️ **Mode 3: Full Installation (Maximum Features)**
+## 🛡️ **Mode 3: ML Only (Maximum Compatibility)**
 ```bash
-python3 -m venv .venv
+pip install -r requirements-full.txt
-.venv/bin/python -m pip install -r requirements-full.txt  # If available
+# Disable Ollama fallback in config
 .venv/bin/python -m pip install .
 source .venv/bin/activate
 ```
- **Size**: ~3GB total (includes all ML fallbacks)
+- **Size**: ~3GB total
- **Compatibility**: Works anywhere, all features enabled  
+- **Compatibility**: Works anywhere, no external dependencies
- **Use case**: Offline environments, complete feature set
+- **Use case**: Offline environments, embedded systems
 ## 🔧 **Configuration**
--- a/docs/GETTING_STARTED.md
+++ b/docs/GETTING_STARTED.md
@ -1,332 +1,212 @@
 # Getting Started with FSS-Mini-RAG
-> **Get from zero to searching in 2 minutes**  
+## Step 1: Installation
 > *Everything you need to know to start finding code by meaning, not just keywords*
-## Installation (Choose Your Adventure)
+Choose your installation based on what you want:
-### 🎯 **Option 1: Full Installation (Recommended)**
+### Option A: Ollama Only (Recommended)
 *Gets you everything working reliably with desktop shortcuts and AI features*
 **Linux/macOS:**
 ```bash
 ./install_mini_rag.sh
 ```
 **Windows:**
 ```cmd
 install_windows.bat
 ```
 **What this does:**
 - Sets up Python environment automatically
 - Installs all dependencies 
 - Downloads AI models (with your permission)
 - Creates desktop shortcuts and application menu entries
 - Tests everything works
 - Gives you an interactive tutorial
 **Time needed:** 5-10 minutes (depending on AI model downloads)
 ---
 ### 🚀 **Option 2: Copy & Try (Experimental)**
 *Just copy the folder and run - may work, may need manual setup*
 **Linux/macOS:**
 ```bash
 # Copy folder anywhere and try running
 ./rag-mini index ~/my-project
 # Auto-setup attempts to create virtual environment
 # Falls back with clear instructions if it fails
 ```
 **Windows:**
 ```cmd
 # Copy folder anywhere and try running  
 rag.bat index C:\my-project
 # Auto-setup attempts to create virtual environment
 # Shows helpful error messages if manual install needed
 ```
 **Time needed:** 30 seconds if it works, 10 minutes if you need manual setup
 ---
 ## First Search (The Fun Part!)
 ### Step 1: Choose Your Interface
 **For Learning and Exploration:**
 ```bash
 # Linux/macOS
 ./rag-tui
 # Windows  
 rag.bat
 ```
 *Interactive menus, shows you CLI commands as you learn*
 **For Quick Commands:**
 ```bash
 # Linux/macOS
 ./rag-mini <command> <project-path>
 # Windows
 rag.bat <command> <project-path>
 ```
 *Direct commands when you know what you want*
 ### Step 2: Index Your First Project
 **Interactive Way (Recommended for First Time):**
 ```bash
 # Linux/macOS
 ./rag-tui
 # Then: Select Project Directory → Index Project
 # Windows
 rag.bat  
 # Then: Select Project Directory → Index Project
 ```
 **Direct Commands:**
 ```bash
 # Linux/macOS
 ./rag-mini index ~/my-project
 # Windows  
 rag.bat index C:\my-project
 ```
 **What indexing does:**
 - Finds all text files in your project
 - Breaks them into smart "chunks" (functions, classes, logical sections)
 - Creates searchable embeddings that understand meaning
 - Stores everything in a fast vector database
 - Creates a `.mini-rag/` directory with your search index
 **Time needed:** 10-60 seconds depending on project size
 ### Step 3: Search by Meaning
 **Natural language queries:**
 ```bash
 # Linux/macOS
 ./rag-mini search ~/my-project "user authentication logic"
 ./rag-mini search ~/my-project "error handling for database connections"
 ./rag-mini search ~/my-project "how to validate input data"
 # Windows
 rag.bat search C:\my-project "user authentication logic"  
 rag.bat search C:\my-project "error handling for database connections"
 rag.bat search C:\my-project "how to validate input data"
 ```
 **Code concepts:**
 ```bash
 # Finds login functions, auth middleware, session handling
 ./rag-mini search ~/my-project "login functionality"
 # Finds try/catch blocks, error handlers, retry logic  
 ./rag-mini search ~/my-project "exception handling"
 # Finds validation functions, input sanitization, data checking
 ./rag-mini search ~/my-project "data validation"
 ```
 **What you get:**
 - Ranked results by relevance (not just keyword matching)
 - File paths and line numbers for easy navigation
 - Context around each match so you understand what it does
 - Smart filtering to avoid noise and duplicates
 ## Two Powerful Modes
 FSS-Mini-RAG has two different ways to get answers, optimized for different needs:
 ### 🚀 **Synthesis Mode** - Fast Answers
 ```bash
 # Linux/macOS
 ./rag-mini search ~/project "authentication logic" --synthesize
 # Windows  
 rag.bat search C:\project "authentication logic" --synthesize
 ```
 **Perfect for:**
 - Quick code discovery
 - Finding specific functions or patterns
 - Getting fast, consistent answers
 **What you get:**
 - Lightning-fast responses (no thinking overhead)
 - Reliable, factual information about your code
 - Clear explanations of what code does and how it works
 ### 🧠 **Exploration Mode** - Deep Understanding
 ```bash  
 # Linux/macOS
 ./rag-mini explore ~/project
 # Windows
 rag.bat explore C:\project
 ```
 **Perfect for:**
 - Learning new codebases
 - Debugging complex issues  
 - Understanding architectural decisions
 **What you get:**
 - Interactive conversation with AI that remembers context
 - Deep reasoning with full "thinking" process shown
 - Follow-up questions and detailed explanations
 - Memory of your previous questions in the session
 **Example exploration session:**
 ```
 🧠 Exploration Mode - Ask anything about your project
 You: How does authentication work in this codebase?
 AI: Let me analyze the authentication system...
 💭 Thinking: I can see several authentication-related files. Let me examine 
   the login flow, session management, and security measures...
 📝 Authentication Analysis:
   This codebase uses a three-layer authentication system:
   1. Login validation in auth.py handles username/password checking
   2. Session management in sessions.py maintains user state  
   3. Middleware in auth_middleware.py protects routes
 You: What security concerns should I be aware of?
 AI: Based on our previous discussion about authentication, let me check for
   common security vulnerabilities...
 ```
 ## Check Your Setup
 **See what got indexed:**
 ```bash
 # Linux/macOS  
 ./rag-mini status ~/my-project
 # Windows
 rag.bat status C:\my-project
 ```
 **What you'll see:**
 - How many files were processed
 - Total chunks created for searching
 - Embedding method being used (Ollama, ML models, or hash-based)
 - Configuration file location
 - Index health and last update time
 ## Configuration (Optional)
 Your project gets a `.mini-rag/config.yaml` file with helpful comments:
 ```yaml
 # Context window configuration (critical for AI features)
 # 💡 Sizing guide: 2K=1 question, 4K=1-2 questions, 8K=manageable, 16K=most users
 #               32K=large codebases, 64K+=power users only  
 # ⚠️  Larger contexts use exponentially more CPU/memory - only increase if needed
 context_window: 16384           # Context size in tokens
 # AI model preferences (edit to change priority)
 model_rankings:
  - "qwen3:1.7b"    # Excellent for RAG (1.4GB, recommended)
  - "qwen3:0.6b"    # Lightweight and fast (~500MB)  
  - "qwen3:4b"      # Higher quality but slower (~2.5GB)
 ```
 **When to customize:**
 - Your searches aren't finding what you expect → adjust chunking settings
 - You want AI features → install Ollama and download models
 - System is slow → try smaller models or reduce context window
 - Getting too many/few results → adjust similarity threshold
 ## Troubleshooting
 ### "Project not indexed" 
 **Problem:** You're trying to search before indexing
 ```bash
 # Run indexing first
 ./rag-mini index ~/my-project    # Linux/macOS
 rag.bat index C:\my-project      # Windows
 ```
 ### "No Ollama models available"
 **Problem:** AI features need models downloaded
 ```bash
 # Install Ollama first
-curl -fsSL https://ollama.ai/install.sh | sh    # Linux/macOS
+curl -fsSL https://ollama.ai/install.sh | sh
 # Or download from https://ollama.com            # Windows
-# Start Ollama server
+# Pull the embedding model  
-ollama serve
+ollama pull nomic-embed-text
-# Download a model
+# Install Python dependencies
-ollama pull qwen3:1.7b
+pip install -r requirements.txt
 ```
-### "Virtual environment not found" 
+### Option B: Full ML Stack
 **Problem:** Auto-setup didn't work, need manual installation
 **Option A: Use installer scripts**
 ```bash  
-./install_mini_rag.sh          # Linux/macOS  
+# Install everything including PyTorch
-install_windows.bat            # Windows
+pip install -r requirements-full.txt
 ```
-**Option B: Manual method (100% reliable)**
+## Step 2: Test Installation
 ```bash
-# Linux/macOS
+# Index this RAG system itself
-python3 -m venv .venv
+./rag-mini index ~/my-project
 .venv/bin/python -m pip install -r requirements.txt  # 2-8 minutes
 .venv/bin/python -m pip install .                    # ~1 minute  
 source .venv/bin/activate
-# Windows  
+# Search for something 
-python -m venv .venv
+./rag-mini search ~/my-project "chunker function"
 .venv\Scripts\python -m pip install -r requirements.txt  
 .venv\Scripts\python -m pip install .
 .venv\Scripts\activate.bat
 ```
-> **⏱️ Timing**: Fast internet 2-3 minutes total, slow internet 5-10 minutes due to large dependencies (LanceDB 36MB, PyArrow 43MB, PyLance 44MB).
+# Check what got indexed
 ### Getting weird results
 **Solution:** Try different search terms or check what got indexed
 ```bash
 # See what files were processed
 ./rag-mini status ~/my-project
 # Try more specific queries
 ./rag-mini search ~/my-project "specific function name"
 ```
-## Next Steps
+## Step 3: Index Your First Project
-### Learn More
+```bash
- **[Beginner's Glossary](BEGINNER_GLOSSARY.md)** - All the terms explained simply
+# Index any project directory
- **[TUI Guide](TUI_GUIDE.md)** - Master the interactive interface
+./rag-mini index /path/to/your/project
 - **[Visual Diagrams](DIAGRAMS.md)** - See how everything works
-### Advanced Features
+# The system creates .mini-rag/ directory with:
- **[Query Expansion](QUERY_EXPANSION.md)** - Make searches smarter with AI
+# - config.json (settings)
- **[LLM Providers](LLM_PROVIDERS.md)** - Use different AI models  
+# - manifest.json (file tracking)  
- **[CPU Deployment](CPU_DEPLOYMENT.md)** - Optimize for older computers
+# - database.lance/ (vector database)
 ```
-### Customize Everything
+## Step 4: Search Your Code
 - **[Technical Guide](TECHNICAL_GUIDE.md)** - How the system actually works
 - **[Configuration Examples](../examples/)** - Pre-made configs for different needs
---
+```bash
 # Basic semantic search
 ./rag-mini search /path/to/project "user login logic"
-**🎉 That's it!** You now have a semantic search system that understands your code by meaning, not just keywords. Start with simple searches and work your way up to the advanced AI features as you get comfortable.
+# Enhanced search with smart features  
 ./rag-mini-enhanced search /path/to/project "authentication"
-**💡 Pro tip:** The best way to learn is to index a project you know well and try searching for things you know are in there. You'll quickly see how much better meaning-based search is than traditional keyword search.
+# Find similar patterns
 ./rag-mini-enhanced similar /path/to/project "def validate_input"
 ```
 ## Step 5: Customize Configuration
 Edit `project/.mini-rag/config.json`:
 ```json
 {
  "chunking": {
    "max_size": 3000,
    "strategy": "semantic"  
  },
  "files": {
    "min_file_size": 100
  }
 }
 ```
 Then re-index to apply changes:
 ```bash
 ./rag-mini index /path/to/project --force
 ```
 ## Common Use Cases
 ### Find Functions by Name
 ```bash
 ./rag-mini search /project "function named connect_to_database" 
 ```
 ### Find Code Patterns  
 ```bash
 ./rag-mini search /project "error handling try catch"
 ./rag-mini search /project "database query with parameters"
 ```
 ### Find Configuration
 ```bash  
 ./rag-mini search /project "database connection settings"
 ./rag-mini search /project "environment variables"
 ```
 ### Find Documentation
 ```bash
 ./rag-mini search /project "how to deploy" 
 ./rag-mini search /project "API documentation"
 ```
 ## Python API Usage
 ```python
 from mini_rag import ProjectIndexer, CodeSearcher, CodeEmbedder
 from pathlib import Path
 # Initialize
 project_path = Path("/path/to/your/project")
 embedder = CodeEmbedder()
 indexer = ProjectIndexer(project_path, embedder)
 searcher = CodeSearcher(project_path, embedder)
 # Index the project
 print("Indexing project...")
 result = indexer.index_project()
 print(f"Indexed {result['files_processed']} files, {result['chunks_created']} chunks")
 # Search
 print("\nSearching for authentication code...")
 results = searcher.search("user authentication logic", limit=5)
 for i, result in enumerate(results, 1):
    print(f"\n{i}. {result.file_path}")
    print(f"   Score: {result.score:.3f}")
    print(f"   Type: {result.chunk_type}")
    print(f"   Content: {result.content[:100]}...")
 ```
 ## Advanced Features
 ### Auto-optimization
 ```bash
 # Get optimization suggestions
 ./rag-mini-enhanced analyze /path/to/project
 # This analyzes your codebase and suggests:
 # - Better chunk sizes for your language mix
 # - Streaming settings for large files
 # - File filtering optimizations
 ```
 ### File Watching
 ```python  
 from mini_rag import FileWatcher
 # Watch for file changes and auto-update index
 watcher = FileWatcher(project_path, indexer)
 watcher.start_watching()
 # Now any file changes automatically update the index
 ```
 ### Custom Chunking
 ```python
 from mini_rag import CodeChunker
 chunker = CodeChunker()
 # Chunk a Python file
 with open("example.py") as f:
    content = f.read()
 chunks = chunker.chunk_text(content, "python", "example.py")
 for chunk in chunks:
    print(f"Type: {chunk.chunk_type}")
    print(f"Content: {chunk.content}")
 ```
 ## Tips and Best Practices
 ### For Better Search Results
 - Use descriptive phrases: "function that validates email addresses" 
 - Try different phrasings if first search doesn't work
 - Search for concepts, not just exact variable names
 ### For Better Indexing
 - Exclude build directories: `node_modules/`, `build/`, `dist/`
 - Include documentation files - they often contain valuable context
 - Use semantic chunking strategy for most projects
 ### For Configuration  
 - Start with default settings
 - Use `analyze` command to get optimization suggestions
 - Increase chunk size for larger functions/classes
 - Decrease chunk size for more granular search
 ### For Troubleshooting
 - Check `./rag-mini status` to see what was indexed
 - Look at `.mini-rag/manifest.json` for file details
 - Run with `--force` to completely rebuild index
 - Check logs in `.mini-rag/` directory for errors
 ## What's Next?
 1. Try the test suite to understand how components work:
   ```bash
   python -m pytest tests/ -v
   ```
 2. Look at the examples in `examples/` directory
 3. Read the main README.md for complete technical details
 4. Customize the system for your specific project needs
--- a/docs/LLM_PROVIDERS.md
+++ b/docs/LLM_PROVIDERS.md
@ -1,264 +0,0 @@
 # 🤖 LLM Provider Setup Guide
 This guide shows how to configure FSS-Mini-RAG with different LLM providers for synthesis and query expansion features.
 ## 🎯 Quick Provider Comparison
 | Provider | Cost | Setup Difficulty | Quality | Privacy | Internet Required |
 |----------|------|------------------|---------|---------|-------------------|
 | **Ollama** | Free | Easy | Good | Excellent | No |
 | **LM Studio** | Free | Easy | Good | Excellent | No |
 | **OpenRouter** | Low ($0.10-0.50/M) | Medium | Excellent | Fair | Yes |
 | **OpenAI** | Medium ($0.15-2.50/M) | Medium | Excellent | Fair | Yes |
 | **Anthropic** | Medium-High | Medium | Excellent | Fair | Yes |
 ## 🏠 Local Providers (Recommended for Beginners)
 ### Ollama (Default)
 **Best for:** Privacy, learning, no ongoing costs
 ```yaml
 llm:
  provider: ollama
  ollama_host: localhost:11434
  synthesis_model: qwen3:1.7b
  expansion_model: qwen3:1.7b
  enable_synthesis: false
  synthesis_temperature: 0.3
  cpu_optimized: true
  enable_thinking: true
 ```
 **Setup:**
 1. Install Ollama: `curl -fsSL https://ollama.ai/install.sh | sh`
 2. Start service: `ollama serve`
 3. Download model: `ollama pull qwen3:1.7b`
 4. Test: `./rag-mini search /path/to/project "test" --synthesize`
 **Recommended Models:**
 - `qwen3:0.6b` - Ultra-fast, good for CPU-only systems
 - `qwen3:1.7b` - Balanced quality and speed (recommended)
 - `qwen3:4b` - Higher quality, excellent for most use cases
 ### LM Studio
 **Best for:** GUI users, model experimentation
 ```yaml
 llm:
  provider: openai
  api_base: http://localhost:1234/v1
  api_key: "not-needed"
  synthesis_model: "any"
  expansion_model: "any"
  enable_synthesis: false
  synthesis_temperature: 0.3
 ```
 **Setup:**
 1. Download [LM Studio](https://lmstudio.ai)
 2. Install any model from the catalog
 3. Start local server (default port 1234)
 4. Use config above
 ## ☁️ Cloud Providers (For Advanced Users)
 ### OpenRouter (Best Value)
 **Best for:** Access to many models, reasonable pricing
 ```yaml
 llm:
  provider: openai
  api_base: https://openrouter.ai/api/v1
  api_key: "your-api-key-here"
  synthesis_model: "meta-llama/llama-3.1-8b-instruct:free"
  expansion_model: "meta-llama/llama-3.1-8b-instruct:free"
  enable_synthesis: false
  synthesis_temperature: 0.3
  timeout: 30
 ```
 **Setup:**
 1. Sign up at [openrouter.ai](https://openrouter.ai)
 2. Create API key in dashboard
 3. Add $5-10 credits (goes far with efficient models)
 4. Replace `your-api-key-here` with actual key
 **Budget Models:**
 - `meta-llama/llama-3.1-8b-instruct:free` - Free tier
 - `openai/gpt-4o-mini` - $0.15 per million tokens
 - `anthropic/claude-3-haiku` - $0.25 per million tokens
 ### OpenAI (Premium Quality)
 **Best for:** Reliability, advanced features
 ```yaml
 llm:
  provider: openai
  api_key: "your-openai-api-key"
  synthesis_model: "gpt-4o-mini"
  expansion_model: "gpt-4o-mini"
  enable_synthesis: false
  synthesis_temperature: 0.3
  timeout: 30
 ```
 **Setup:**
 1. Sign up at [platform.openai.com](https://platform.openai.com)
 2. Add payment method
 3. Create API key
 4. Start with `gpt-4o-mini` for cost efficiency
 ### Anthropic Claude (Code Expert)
 **Best for:** Code analysis, thoughtful responses
 ```yaml
 llm:
  provider: anthropic
  api_key: "your-anthropic-api-key"
  synthesis_model: "claude-3-haiku-20240307"
  expansion_model: "claude-3-haiku-20240307"
  enable_synthesis: false
  synthesis_temperature: 0.3
  timeout: 30
 ```
 **Setup:**
 1. Sign up at [console.anthropic.com](https://console.anthropic.com)
 2. Add credits to account
 3. Create API key
 4. Start with Claude Haiku for budget-friendly option
 ## 🧪 Testing Your Setup
 ### 1. Basic Functionality Test
 ```bash
 # Test without LLM (should always work)
 ./rag-mini search /path/to/project "authentication"
 ```
 ### 2. Synthesis Test
 ```bash
 # Test LLM integration
 ./rag-mini search /path/to/project "authentication" --synthesize
 ```
 ### 3. Interactive Test
 ```bash
 # Test exploration mode
 ./rag-mini explore /path/to/project
 # Then ask: "How does authentication work in this codebase?"
 ```
 ### 4. Query Expansion Test
 Enable `expand_queries: true` in config, then:
 ```bash
 ./rag-mini search /path/to/project "auth"
 # Should automatically expand to "auth authentication login user session"
 ```
 ## 🛠️ Configuration Tips
 ### For Budget-Conscious Users
 ```yaml
 llm:
  synthesis_model: "gpt-4o-mini"  # or claude-haiku
  enable_synthesis: false         # Manual control
  synthesis_temperature: 0.1     # Factual responses
  max_expansion_terms: 4          # Shorter expansions
 ```
 ### For Quality-Focused Users
 ```yaml
 llm:
  synthesis_model: "gpt-4o"       # or claude-sonnet
  enable_synthesis: true          # Always on
  synthesis_temperature: 0.3     # Balanced creativity
  enable_thinking: true           # Show reasoning
  max_expansion_terms: 8          # Comprehensive expansion
 ```
 ### For Privacy-Focused Users
 ```yaml
 # Use only local providers
 embedding:
  preferred_method: ollama        # Local embeddings
 llm:
  provider: ollama               # Local LLM
  # Never use cloud providers
 ```
 ## 🔧 Troubleshooting
 ### Connection Issues
 - **Local:** Ensure Ollama/LM Studio is running: `ps aux | grep ollama`
 - **Cloud:** Check API key and internet: `curl -H "Authorization: Bearer $API_KEY" https://api.openai.com/v1/models`
 ### Model Not Found
 - **Ollama:** `ollama pull model-name`
 - **Cloud:** Check provider's model list documentation
 ### High Costs
 - Use mini/haiku models instead of full versions
 - Set `enable_synthesis: false` and use `--synthesize` selectively
 - Reduce `max_expansion_terms` to 4-6
 ### Poor Quality
 - Try higher-tier models (gpt-4o, claude-sonnet)
 - Adjust `synthesis_temperature` (0.1 = factual, 0.5 = creative)
 - Enable `expand_queries` for better search coverage
 ### Slow Responses
 - **Local:** Try smaller models (qwen3:0.6b)
 - **Cloud:** Increase `timeout` or switch providers
 - **General:** Reduce `max_size` in chunking config
 ## 📋 Environment Variables (Alternative Setup)
 Instead of putting API keys in config files, use environment variables:
 ```bash
 # In your shell profile (.bashrc, .zshrc, etc.)
 export OPENAI_API_KEY="your-openai-key"
 export ANTHROPIC_API_KEY="your-anthropic-key"
 export OPENROUTER_API_KEY="your-openrouter-key"
 ```
 Then in config:
 ```yaml
 llm:
  api_key: "${OPENAI_API_KEY}"  # Reads from environment
 ```
 ## 🚀 Advanced: Multi-Provider Setup
 You can create different configs for different use cases:
 ```bash
 # Fast local analysis
 cp examples/config-beginner.yaml .mini-rag/config-local.yaml
 # High-quality cloud analysis  
 cp examples/config-llm-providers.yaml .mini-rag/config-cloud.yaml
 # Edit to use OpenAI/Claude
 # Switch configs as needed
 ln -sf config-local.yaml .mini-rag/config.yaml   # Use local
 ln -sf config-cloud.yaml .mini-rag/config.yaml   # Use cloud
 ```
 ## 📚 Further Reading
 - [Ollama Model Library](https://ollama.ai/library)
 - [OpenRouter Pricing](https://openrouter.ai/docs#models)
 - [OpenAI API Documentation](https://platform.openai.com/docs)
 - [Anthropic Claude Documentation](https://docs.anthropic.com/claude)
 - [LM Studio Getting Started](https://lmstudio.ai/docs)
 ---
 💡 **Pro Tip:** Start with local Ollama for learning, then upgrade to cloud providers when you need production-quality analysis or are working with large codebases.
--- a/docs/QUERY_EXPANSION.md
+++ b/docs/QUERY_EXPANSION.md
@ -34,24 +34,7 @@ graph LR
 ## Configuration
-### Easy Configuration (TUI)
+Edit `config.yaml`:
 Use the interactive Configuration Manager in the TUI:
 1. **Start TUI**: `./rag-tui` or `rag.bat` (Windows)
 2. **Select Option 6**: Configuration Manager
 3. **Choose Option 2**: Toggle query expansion
 4. **Follow prompts**: Get explanation and easy on/off toggle
 The TUI will:
 - Explain benefits and requirements clearly
 - Check if Ollama is available
 - Show current status (enabled/disabled)
 - Save changes automatically
 ### Manual Configuration (Advanced)
 Edit `config.yaml` directly:
 ```yaml
 # Search behavior settings
--- a/docs/SMART_TUNING_GUIDE.md
+++ b/docs/SMART_TUNING_GUIDE.md
@ -5,10 +5,10 @@
 ### **1. 📊 Intelligent Analysis**
 ```bash
 # Analyze your project patterns and get optimization suggestions
-./rag-mini analyze /path/to/project
+./rag-mini-enhanced analyze /path/to/project
 # Get smart recommendations based on actual usage
-./rag-mini status /path/to/project
+./rag-mini-enhanced status /path/to/project
 ```
 **What it analyzes:**
@ -20,9 +20,13 @@
 ### **2. 🧠 Smart Search Enhancement**
 ```bash
 # Enhanced search with query intelligence
-./rag-mini search /project "MyClass"     # Detects class names
+./rag-mini-enhanced search /project "MyClass"     # Detects class names
-./rag-mini search /project "login()"     # Detects function calls  
+./rag-mini-enhanced search /project "login()"     # Detects function calls  
-./rag-mini search /project "user auth"   # Natural language
+./rag-mini-enhanced search /project "user auth"   # Natural language
 # Context-aware search (planned)
 ./rag-mini-enhanced context /project "function_name"  # Show surrounding code
 ./rag-mini-enhanced similar /project "pattern"        # Find similar patterns
 ```
 ### **3. ⚙️ Language-Specific Optimizations**
@ -109,10 +113,10 @@ Edit `.mini-rag/config.json` in your project:
 ./rag-mini index /project --force
 # Test search quality improvements
-./rag-mini search /project "your test query"
+./rag-mini-enhanced search /project "your test query"
 # Verify optimization impact
-./rag-mini analyze /project
+./rag-mini-enhanced analyze /project
 ```
 ## 🎊 **Result: Smarter, Faster, Better**
--- a/docs/TECHNICAL_GUIDE.md
+++ b/docs/TECHNICAL_GUIDE.md
@ -421,7 +421,7 @@ def _create_vector_table(self, chunks: List[CodeChunk], embeddings: np.ndarray):
    return table
-def vector_search(self, query_embedding: np.ndarray, top_k: int) -> List[SearchResult]:
+def vector_search(self, query_embedding: np.ndarray, limit: int) -> List[SearchResult]:
    """Fast vector similarity search."""
    table = self.db.open_table("chunks")
@ -794,13 +794,13 @@ def repair_index(self, project_path: Path) -> bool:
 FSS-Mini-RAG works well with various LLM sizes because our rich context and guided prompts help small models perform excellently:
 **Recommended (Best Balance):**
- **qwen3:1.7b** - Excellent quality with fast performance (default priority)
+- **qwen3:4b** - Excellent quality, good performance
 - **qwen3:0.6b** - Surprisingly good for CPU-only systems (522MB)
 **Still Excellent (Slower but highest quality):**
 - **qwen3:4b** - Highest quality, slower responses
 - **qwen3:4b:q8_0** - High-precision quantized version for production
 **Still Excellent (Faster/CPU-friendly):**
 - **qwen3:1.7b** - Very good results, faster responses
 - **qwen3:0.6b** - Surprisingly good considering size (522MB)
 ### Why Small Models Work Well Here
 Small models can produce excellent results in RAG systems because:
@ -813,7 +813,7 @@ Without good context, small models tend to get lost and produce erratic output.
 ### Quantization Benefits
-For production deployments, consider quantized models like `qwen3:1.7b:q8_0` or `qwen3:4b:q8_0`:
+For production deployments, consider quantized models like `qwen3:4b:q8_0`:
 - **Q8_0**: 8-bit quantization with minimal quality loss
 - **Smaller memory footprint**: ~50% reduction vs full precision
 - **Better CPU performance**: Faster inference on CPU-only systems
--- a/docs/TESTING_PLAN.md
+++ b/docs/TESTING_PLAN.md
@ -1,832 +0,0 @@
 # FSS-Mini-RAG Distribution Testing Plan
 > **CRITICAL**: This is a comprehensive testing plan for the new distribution system. Every stage must be completed and verified before deployment.
 ## Overview
 We've implemented a complete distribution overhaul with:
 - One-line installers for Linux/macOS/Windows
 - Multiple installation methods (uv, pipx, pip, zipapp)
 - Automated wheel building via GitHub Actions
 - PyPI publishing automation
 - Cross-platform compatibility
 **This testing plan ensures everything works before we ship it.**
 ---
 ## Phase 1: Local Development Environment Testing
 ### 1.1 Virtual Environment Setup Testing
 **Objective**: Verify our package works in clean environments
 **Test Environments**:
 - [ ] Python 3.8 in fresh venv
 - [ ] Python 3.9 in fresh venv  
 - [ ] Python 3.10 in fresh venv
 - [ ] Python 3.11 in fresh venv
 - [ ] Python 3.12 in fresh venv
 **For each Python version**:
 ```bash
 # Test commands for each environment
 python -m venv test_env_38
 source test_env_38/bin/activate  # or test_env_38\Scripts\activate on Windows
 python --version
 pip install -e .
 rag-mini --help
 rag-mini init --help
 rag-mini search --help
 # Test basic functionality
 mkdir test_project
 echo "def hello(): print('world')" > test_project/test.py
 rag-mini init -p test_project
 rag-mini search -p test_project "hello function"
 deactivate
 rm -rf test_env_38 test_project
 ```
 **Success Criteria**:
 - [ ] Package installs without errors
 - [ ] All CLI commands show help properly
 - [ ] Basic indexing and search works
 - [ ] No dependency conflicts
 ### 1.2 Package Metadata Testing
 **Objective**: Verify pyproject.toml produces correct package metadata
 **Tests**:
 ```bash
 # Build source distribution and inspect metadata
 python -m build --sdist
 tar -tzf dist/*.tar.gz | grep -E "(pyproject.toml|METADATA)"
 tar -xzf dist/*.tar.gz --to-stdout */METADATA
 # Verify key metadata fields
 python -c "
 import pkg_resources
 dist = pkg_resources.get_distribution('fss-mini-rag')
 print(f'Name: {dist.project_name}')
 print(f'Version: {dist.version}')  
 print(f'Entry points: {list(dist.get_entry_map().keys())}')
 "
 ```
 **Success Criteria**:
 - [ ] Package name is "fss-mini-rag" 
 - [ ] Console script "rag-mini" is registered
 - [ ] Version matches pyproject.toml
 - [ ] Author, license, description are correct
 - [ ] Python version requirements are set
 ---
 ## Phase 2: Build System Testing
 ### 2.1 Source Distribution Testing
 **Objective**: Verify source packages build and install correctly
 **Tests**:
 ```bash
 # Clean build
 rm -rf dist/ build/ *.egg-info/
 python -m build --sdist
 # Test source install in fresh environment
 python -m venv test_sdist
 source test_sdist/bin/activate
 pip install dist/*.tar.gz
 rag-mini --help
 # Test actual functionality
 mkdir test_src && echo "print('test')" > test_src/main.py
 rag-mini init -p test_src
 rag-mini search -p test_src "print statement"
 deactivate && rm -rf test_sdist test_src
 ```
 **Success Criteria**:
 - [ ] Source distribution builds without errors
 - [ ] Contains all necessary files
 - [ ] Installs and runs correctly from source
 - [ ] No missing dependencies
 ### 2.2 Wheel Building Testing
 **Objective**: Test wheel generation and installation
 **Tests**:
 ```bash
 # Build wheel
 python -m build --wheel
 # Inspect wheel contents  
 python -m zipfile -l dist/*.whl
 python -m wheel unpack dist/*.whl
 ls -la fss_mini_rag-*/
 # Test wheel install
 python -m venv test_wheel
 source test_wheel/bin/activate
 pip install dist/*.whl
 rag-mini --version
 which rag-mini
 rag-mini --help
 deactivate && rm -rf test_wheel
 ```
 **Success Criteria**:
 - [ ] Wheel builds successfully
 - [ ] Contains correct package structure
 - [ ] Installs faster than source
 - [ ] Entry point is properly registered
 ### 2.3 Zipapp (.pyz) Building Testing  
 **Objective**: Test single-file zipapp distribution
 **Tests**:
 ```bash
 # Build zipapp
 python scripts/build_pyz.py
 # Test direct execution
 python dist/rag-mini.pyz --help
 python dist/rag-mini.pyz --version
 # Test with different Python versions
 python3.8 dist/rag-mini.pyz --help
 python3.11 dist/rag-mini.pyz --help
 # Test functionality
 mkdir pyz_test && echo "def test(): pass" > pyz_test/code.py
 python dist/rag-mini.pyz init -p pyz_test
 python dist/rag-mini.pyz search -p pyz_test "test function"
 rm -rf pyz_test
 # Test file size and contents
 ls -lh dist/rag-mini.pyz
 python -m zipfile -l dist/rag-mini.pyz | head -20
 ```
 **Success Criteria**:
 - [ ] Builds without errors
 - [ ] File size is reasonable (< 100MB)  
 - [ ] Runs with multiple Python versions
 - [ ] All core functionality works
 - [ ] No missing dependencies in zipapp
 ---
 ## Phase 3: Installation Script Testing
 ### 3.1 Linux/macOS Install Script Testing
 **Objective**: Test install.sh in various Unix environments
 **Test Environments**:
 - [ ] Ubuntu 20.04 (clean container)
 - [ ] Ubuntu 22.04 (clean container)  
 - [ ] Ubuntu 24.04 (clean container)
 - [ ] CentOS 7 (clean container)
 - [ ] CentOS Stream 9 (clean container)
 - [ ] macOS 12+ (if available)
 - [ ] Alpine Linux (minimal test)
 **For each environment**:
 ```bash
 # Test script download and execution
 curl -fsSL file://$(pwd)/install.sh > /tmp/test_install.sh
 chmod +x /tmp/test_install.sh
 # Test dry run capabilities (modify script for --dry-run flag)
 /tmp/test_install.sh --dry-run
 # Test actual installation
 /tmp/test_install.sh
 # Verify installation
 which rag-mini
 rag-mini --help
 rag-mini --version
 # Test functionality
 mkdir install_test
 echo "def example(): return 'hello'" > install_test/sample.py
 rag-mini init -p install_test  
 rag-mini search -p install_test "example function"
 # Cleanup
 rm -rf install_test /tmp/test_install.sh
 ```
 **Edge Case Testing**:
 ```bash
 # Test without curl
 mv /usr/bin/curl /usr/bin/curl.bak 2>/dev/null || true
 # Run installer (should fall back to wget or pip)
 # Restore curl
 # Test without wget  
 mv /usr/bin/wget /usr/bin/wget.bak 2>/dev/null || true
 # Run installer
 # Restore wget
 # Test with Python but no pip
 # Test with old Python versions
 # Test with no internet (local package test)
 ```
 **Success Criteria**:
 - [ ] Script downloads and runs without errors
 - [ ] Handles missing dependencies gracefully
 - [ ] Installs correct package version
 - [ ] Creates working `rag-mini` command
 - [ ] Provides clear user feedback
 - [ ] Falls back properly (uv → pipx → pip)
 ### 3.2 Windows PowerShell Script Testing
 **Objective**: Test install.ps1 in Windows environments
 **Test Environments**:
 - [ ] Windows 10 (PowerShell 5.1)
 - [ ] Windows 11 (PowerShell 5.1)
 - [ ] Windows Server 2019
 - [ ] PowerShell Core 7.x (cross-platform)
 **For each environment**:
 ```powershell
 # Download and test
 Invoke-WebRequest -Uri "file://$(Get-Location)/install.ps1" -OutFile "$env:TEMP/test_install.ps1"
 # Test execution policy handling
 Get-ExecutionPolicy
 Set-ExecutionPolicy -ExecutionPolicy Bypass -Scope Process
 # Test dry run (modify script)
 & "$env:TEMP/test_install.ps1" -DryRun
 # Test actual installation
 & "$env:TEMP/test_install.ps1"
 # Verify installation
 Get-Command rag-mini
 rag-mini --help
 rag-mini --version
 # Test functionality
 New-Item -ItemType Directory -Name "win_test"
 "def windows_test(): return True" | Out-File -FilePath "win_test/test.py"
 rag-mini init -p win_test
 rag-mini search -p win_test "windows test"
 # Cleanup
 Remove-Item -Recurse -Force win_test
 Remove-Item "$env:TEMP/test_install.ps1"
 ```
 **Edge Case Testing**:
 - [ ] Test without Python in PATH
 - [ ] Test with Python 3.8-3.12
 - [ ] Test restricted execution policy
 - [ ] Test without admin rights
 - [ ] Test corporate firewall scenarios
 **Success Criteria**:
 - [ ] Script runs without PowerShell errors
 - [ ] Handles execution policy correctly
 - [ ] Installs package successfully
 - [ ] PATH is updated correctly
 - [ ] Error messages are user-friendly
 - [ ] Falls back properly (uv → pipx → pip)
 ---
 ## Phase 4: GitHub Actions Workflow Testing
 ### 4.1 Local Workflow Testing
 **Objective**: Test GitHub Actions workflow locally using act
 **Setup**:
 ```bash
 # Install act (GitHub Actions local runner)
 # On macOS: brew install act
 # On Linux: check https://github.com/nektos/act
 # Test workflow syntax
 act --list
 # Test individual jobs
 act -j build-wheels --dry-run
 act -j build-zipapp --dry-run  
 act -j test-installation --dry-run
 ```
 **Tests**:
 ```bash
 # Test wheel building job
 act -j build-wheels
 # Check artifacts
 ls -la /tmp/act-* 
 # Test zipapp building
 act -j build-zipapp
 # Test installation testing job
 act -j test-installation
 # Test release job (with dummy tag)
 act push -e .github/workflows/test-release.json
 ```
 **Success Criteria**:
 - [ ] All jobs complete without errors
 - [ ] Wheels are built for all platforms
 - [ ] Zipapp is created successfully
 - [ ] Installation tests pass
 - [ ] Artifacts are properly uploaded
 ### 4.2 Fork Testing
 **Objective**: Test workflow in a real GitHub environment
 **Setup**:
 1. [ ] Create a test fork of the repository
 2. [ ] Enable GitHub Actions on the fork
 3. [ ] Set up test PyPI token (TestPyPI)
 **Tests**:
 ```bash
 # Push changes to test branch
 git checkout -b test-distribution
 git push origin test-distribution
 # Create test release
 git tag v2.1.0-test
 git push origin v2.1.0-test
 # Monitor GitHub Actions:
 # - Check all jobs complete
 # - Download artifacts
 # - Verify wheel contents  
 # - Test zipapp download
 ```
 **Success Criteria**:
 - [ ] Workflow triggers on tag push
 - [ ] All matrix builds complete
 - [ ] Artifacts are uploaded
 - [ ] Release is created with assets
 - [ ] TestPyPI receives package (if configured)
 ---
 ## Phase 5: Manual Installation Method Testing
 ### 5.1 uv Installation Testing
 **Test Environments**: Linux, macOS, Windows
 **Tests**:
 ```bash
 # Fresh environment
 curl -LsSf https://astral.sh/uv/install.sh | sh
 export PATH="$HOME/.local/bin:$PATH"
 # Test uv tool install (will fail until we publish)  
 # For now, test with local wheel
 uv tool install dist/fss_mini_rag-*.whl
 # Verify installation
 which rag-mini
 rag-mini --help
 # Test functionality
 mkdir uv_test
 echo "print('uv test')" > uv_test/demo.py
 rag-mini init -p uv_test
 rag-mini search -p uv_test "print statement"
 rm -rf uv_test
 # Test uninstall
 uv tool uninstall fss-mini-rag
 ```
 **Success Criteria**:
 - [ ] uv installs cleanly
 - [ ] Package installs via uv tool install
 - [ ] Command is available in PATH
 - [ ] All functionality works
 - [ ] Uninstall works cleanly
 ### 5.2 pipx Installation Testing
 **Test Environments**: Linux, macOS, Windows
 **Tests**:
 ```bash
 # Install pipx
 python -m pip install --user pipx
 python -m pipx ensurepath
 # Test pipx install (local wheel for now)
 pipx install dist/fss_mini_rag-*.whl
 # Verify installation
 pipx list
 which rag-mini  
 rag-mini --help
 # Test functionality
 mkdir pipx_test
 echo "def pipx_demo(): pass" > pipx_test/code.py
 rag-mini init -p pipx_test
 rag-mini search -p pipx_test "pipx demo"
 rm -rf pipx_test
 # Test uninstall
 pipx uninstall fss-mini-rag
 ```
 **Success Criteria**:
 - [ ] pipx installs without issues
 - [ ] Package is isolated in own environment
 - [ ] Command works globally
 - [ ] No conflicts with system packages
 - [ ] Uninstall is clean
 ### 5.3 pip Installation Testing
 **Test Environments**: Multiple Python versions
 **Tests**:
 ```bash
 # Test with --user flag
 pip install --user dist/fss_mini_rag-*.whl
 # Verify PATH  
 echo $PATH | grep -q "$(python -m site --user-base)/bin"
 which rag-mini
 rag-mini --help
 # Test functionality
 mkdir pip_test
 echo "class PipTest: pass" > pip_test/example.py
 rag-mini init -p pip_test
 rag-mini search -p pip_test "PipTest class"
 rm -rf pip_test
 # Test uninstall
 pip uninstall -y fss-mini-rag
 ```
 **Success Criteria**:
 - [ ] Installs correctly with --user
 - [ ] PATH is configured properly
 - [ ] No permission issues
 - [ ] Works across Python versions
 - [ ] Uninstall removes everything
 ---
 ## Phase 6: End-to-End User Experience Testing
 ### 6.1 New User Experience Testing
 **Scenario**: Complete beginner with no Python knowledge
 **Test Script**:
 ```bash
 # Start with fresh system (VM/container)
 # Follow README instructions exactly
 # Linux/macOS user
 curl -fsSL https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.sh | bash
 # Windows user  
 # iwr https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.ps1 -UseBasicParsing | iex
 # Follow quick start guide
 rag-mini --help
 mkdir my_project
 echo "def hello_world(): print('Hello RAG!')" > my_project/main.py
 echo "class DataProcessor: pass" > my_project/processor.py
 rag-mini init -p my_project
 rag-mini search -p my_project "hello function"
 rag-mini search -p my_project "DataProcessor class"
 ```
 **Success Criteria**:
 - [ ] Installation completes without user intervention
 - [ ] Clear, helpful output throughout
 - [ ] `rag-mini` command is available immediately
 - [ ] Basic workflow works as expected
 - [ ] Error messages are user-friendly
 ### 6.2 Developer Experience Testing
 **Scenario**: Python developer wanting to contribute
 **Test Script**:
 ```bash
 # Clone repository
 git clone https://github.com/fsscoding/fss-mini-rag.git
 cd fss-mini-rag
 # Development installation
 python -m venv .venv
 source .venv/bin/activate
 pip install -r requirements.txt
 pip install -e .
 # Test development commands
 make help
 make dev-install
 make test-dist
 make build
 make build-pyz
 # Test local installation
 pip install dist/*.whl
 rag-mini --help
 ```
 **Success Criteria**:
 - [ ] Development setup is straightforward
 - [ ] Makefile commands work correctly
 - [ ] Local builds install properly
 - [ ] All development tools function
 ### 6.3 Advanced User Testing
 **Scenario**: Power user with custom requirements
 **Test Script**:
 ```bash
 # Test zipapp usage
 wget https://github.com/fsscoding/fss-mini-rag/releases/latest/download/rag-mini.pyz
 python rag-mini.pyz --help
 # Test with large codebase
 git clone https://github.com/django/django.git test_django
 python rag-mini.pyz init -p test_django
 python rag-mini.pyz search -p test_django "model validation"
 # Test server mode  
 python rag-mini.pyz server -p test_django
 curl http://localhost:7777/health
 # Clean up
 rm -rf test_django rag-mini.pyz
 ```
 **Success Criteria**:
 - [ ] Zipapp handles large codebases
 - [ ] Performance is acceptable
 - [ ] Server mode works correctly
 - [ ] All advanced features function
 ---
 ## Phase 7: Performance and Edge Case Testing
 ### 7.1 Performance Testing
 **Objective**: Ensure installation and runtime performance is acceptable
 **Tests**:
 ```bash
 # Installation speed testing
 time curl -fsSL https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.sh | bash
 # Package size testing
 ls -lh dist/
 du -sh .venv/
 # Runtime performance
 time rag-mini init -p large_project/
 time rag-mini search -p large_project/ "complex query"
 # Memory usage
 rag-mini server &
 ps aux | grep rag-mini
 # Monitor memory usage during indexing/search
 ```
 **Success Criteria**:
 - [ ] Installation completes in < 5 minutes
 - [ ] Package size is reasonable (< 50MB total)
 - [ ] Indexing performance meets expectations
 - [ ] Memory usage is acceptable
 ### 7.2 Edge Case Testing
 **Objective**: Test unusual but possible scenarios
 **Tests**:
 ```bash
 # Network issues
 # - Simulate slow connection
 # - Test offline scenarios  
 # - Test corporate firewalls
 # System edge cases
 # - Very old Python versions
 # - Systems without pip
 # - Read-only file systems
 # - Limited disk space
 # Unicode and special characters
 mkdir "测试项目"
 echo "def 函数名(): pass" > "测试项目/代码.py"
 rag-mini init -p "测试项目"
 rag-mini search -p "测试项目" "函数"
 # Very large files
 python -c "print('# ' + 'x'*1000000)" > large_file.py
 rag-mini init -p .
 # Should handle gracefully
 # Concurrent usage
 rag-mini server &
 for i in {1..10}; do
    rag-mini search "test query $i" &
 done
 wait
 ```
 **Success Criteria**:
 - [ ] Graceful degradation with network issues
 - [ ] Clear error messages for edge cases
 - [ ] Handles Unicode correctly
 - [ ] Doesn't crash on large files
 - [ ] Concurrent access works properly
 ---
 ## Phase 8: Security Testing
 ### 8.1 Install Script Security
 **Objective**: Verify install scripts are secure
 **Tests**:
 ```bash
 # Check install.sh
 shellcheck install.sh
 bandit -r install.sh (if applicable)
 # Verify HTTPS usage
 grep -n "http://" install.sh  # Should only be for localhost
 grep -n "curl.*-k" install.sh  # Should be none
 grep -n "wget.*--no-check" install.sh  # Should be none
 # Check PowerShell script
 # Run PowerShell security analyzer if available
 ```
 **Success Criteria**:
 - [ ] No shell script vulnerabilities
 - [ ] Only HTTPS downloads (except localhost)
 - [ ] No certificate verification bypasses
 - [ ] Input validation where needed
 - [ ] Clear error messages without info leakage
 ### 8.2 Package Security
 **Objective**: Ensure distributed packages are secure
 **Tests**:
 ```bash
 # Check for secrets in built packages
 python -m zipfile -l dist/*.whl | grep -i -E "(key|token|password|secret)"
 strings dist/rag-mini.pyz | grep -i -E "(key|token|password|secret)"
 # Verify package signatures (when implemented)
 # Check for unexpected executables in packages
 ```
 **Success Criteria**:
 - [ ] No hardcoded secrets in packages
 - [ ] No unexpected executables
 - [ ] Package integrity is verifiable
 - [ ] Dependencies are from trusted sources
 ---
 ## Phase 9: Documentation and User Support Testing
 ### 9.1 Documentation Accuracy Testing
 **Objective**: Verify all documentation matches reality
 **Tests**:
 ```bash
 # Test every command in README
 # Test every code example
 # Verify all links work
 # Check screenshots are current
 # Test error scenarios mentioned in docs
 # Verify troubleshooting sections
 ```
 **Success Criteria**:
 - [ ] All examples work as documented
 - [ ] Links are valid and up-to-date
 - [ ] Screenshots reflect current UI
 - [ ] Error scenarios are accurate
 ### 9.2 Support Path Testing
 **Objective**: Test user support workflows
 **Tests**:
 - [ ] GitHub issue templates work
 - [ ] Error messages include helpful information
 - [ ] Common problems have clear solutions
 - [ ] Contact information is correct
 ---
 ## Phase 10: Release Readiness
 ### 10.1 Pre-Release Checklist
 - [ ] All tests from Phases 1-9 pass
 - [ ] Version numbers are consistent
 - [ ] Changelog is updated
 - [ ] Documentation is current
 - [ ] Security review complete
 - [ ] Performance benchmarks recorded
 - [ ] Backup plan exists for rollback
 ### 10.2 Release Testing
 **TestPyPI Release**:
 ```bash
 # Upload to TestPyPI first
 python -m twine upload --repository testpypi dist/*
 # Test installation from TestPyPI
 pip install --index-url https://test.pypi.org/simple/ fss-mini-rag
 ```
 **Success Criteria**:
 - [ ] TestPyPI upload succeeds
 - [ ] Installation from TestPyPI works
 - [ ] All functionality works with TestPyPI package
 ### 10.3 Production Release
 **Only after TestPyPI success**:
 ```bash
 # Create GitHub release
 git tag v2.1.0
 git push origin v2.1.0
 # Monitor automated workflows
 # Test installation after PyPI publication
 pip install fss-mini-rag
 ```
 ---
 ## Testing Tools and Infrastructure
 ### Required Tools
 - [ ] Docker (for clean environment testing)
 - [ ] act (for local GitHub Actions testing)  
 - [ ] shellcheck (for bash script analysis)
 - [ ] Various Python versions (3.8-3.12)
 - [ ] Windows VM/container access
 - [ ] macOS testing environment (if possible)
 ### Test Data
 - [ ] Sample codebases of various sizes
 - [ ] Unicode test files
 - [ ] Edge case files (very large, empty, binary)
 - [ ] Network simulation tools
 ### Monitoring
 - [ ] Performance benchmarks
 - [ ] Error rate tracking  
 - [ ] User feedback collection
 - [ ] Download/install statistics
 ---
 ## Conclusion
 This testing plan is comprehensive but necessary. Each phase builds on the previous ones, and skipping phases risks shipping broken functionality to users.
 **Estimated Timeline**: 3-5 days for complete testing
 **Risk Level**: HIGH if phases are skipped
 **Success Criteria**: 100% of critical tests must pass before release
 The goal is to ship a distribution system that "just works" for every user, every time. This level of testing ensures we achieve that goal.
--- a/docs/TESTING_SUMMARY.md
+++ b/docs/TESTING_SUMMARY.md
@ -1,179 +0,0 @@
 # FSS-Mini-RAG Distribution Testing Summary
 ## What We've Built
 ### 🏗️ **Complete Distribution Infrastructure**
 1. **Enhanced pyproject.toml** - Proper metadata for PyPI publication
 2. **Install Scripts** - One-line installers for Linux/macOS (`install.sh`) and Windows (`install.ps1`)
 3. **Build Scripts** - Zipapp builder (`scripts/build_pyz.py`) 
 4. **GitHub Actions** - Automated wheel building and PyPI publishing
 5. **Documentation** - Updated README with modern installation methods
 6. **Testing Framework** - Comprehensive testing infrastructure
 ### 📦 **Installation Methods Implemented**
 - **One-line installers** (auto-detects best method)
 - **uv** - Ultra-fast package manager
 - **pipx** - Isolated tool installation
 - **pip** - Traditional method
 - **zipapp** - Single-file portable distribution
 ## Testing Status
 ### ✅ **Phase 1: Structure Tests (COMPLETED)**
 - [x] PyProject.toml validation - **PASSED**
 - [x] Install script structure - **PASSED**
 - [x] Build script presence - **PASSED** 
 - [x] GitHub workflow syntax - **PASSED**
 - [x] Documentation updates - **PASSED**
 - [x] Import structure - **FAILED** (dependencies needed)
 **Result**: 5/6 tests passed. Structure is solid.
 ### 🔄 **Phase 2: Build Tests (IN PROGRESS)**
 - [ ] Build requirements check
 - [ ] Source distribution build
 - [ ] Wheel building 
 - [ ] Zipapp creation
 - [ ] Package metadata validation
 ### 📋 **Remaining Test Phases**
 #### **Phase 3: Installation Testing**
 - [ ] Test built packages install correctly
 - [ ] Test entry points work
 - [ ] Test basic CLI functionality
 - [ ] Test in clean virtual environments
 #### **Phase 4: Install Script Testing**
 - [ ] Linux/macOS install.sh in containers
 - [ ] Windows install.ps1 testing
 - [ ] Edge cases (no python, no internet, etc.)
 - [ ] Fallback mechanism testing (uv → pipx → pip)
 #### **Phase 5: GitHub Actions Testing**
 - [ ] Local workflow testing with `act`
 - [ ] Fork testing with real CI
 - [ ] TestPyPI publishing test
 - [ ] Release creation testing
 #### **Phase 6: End-to-End User Experience**
 - [ ] Fresh system installation
 - [ ] Follow README exactly
 - [ ] Test error scenarios
 - [ ] Performance benchmarking
 ## Current Test Tools
 ### 📝 **Automated Test Scripts**
 1. **`scripts/validate_setup.py`** - File structure validation (✅ Working)
 2. **`scripts/phase1_basic_tests.py`** - Basic structure tests (✅ Working) 
 3. **`scripts/phase2_build_tests.py`** - Package building tests (🔄 Running)
 4. **`scripts/setup_test_environments.py`** - Multi-version env setup (📦 Complex)
 ### 🛠️ **Manual Test Commands**
 ```bash
 # Quick validation
 python scripts/validate_setup.py
 # Structure tests  
 python scripts/phase1_basic_tests.py
 # Build tests
 python scripts/phase2_build_tests.py
 # Manual builds
 make build          # Source + wheel
 make build-pyz      # Zipapp
 make test-dist      # Validation
 ```
 ## Issues Identified
 ### ⚠️ **Current Blockers**
 1. **Dependencies** - Full testing requires installing heavy ML dependencies
 2. **Environment Setup** - Multiple Python versions not available on current system  
 3. **Zipapp Size** - May be very large due to numpy/torch dependencies
 4. **Network Tests** - Install scripts need real network testing
 ### 🔧 **Mitigations**
 - **Staged Testing** - Test structure first, then functionality
 - **Container Testing** - Use Docker for clean environments
 - **Dependency Isolation** - Test core CLI without heavy ML deps
 - **Mock Network** - Local package server testing
 ## Deployment Strategy
 ### 🚀 **Safe Deployment Path**
 #### **Stage 1: TestPyPI Validation**
 1. Complete Phase 2 build tests
 2. Upload to TestPyPI  
 3. Test installation from TestPyPI
 4. Verify all install methods work
 #### **Stage 2: GitHub Release Testing**
 1. Create test release on fork
 2. Validate GitHub Actions workflow
 3. Test automated wheel building
 4. Verify release assets
 #### **Stage 3: Production Release**
 1. Final validation on clean systems
 2. Documentation review
 3. Create production release
 4. Monitor installation success rates
 ### 📊 **Success Criteria**
 For each phase, we need:
 - **95%+ test pass rate**
 - **Installation time < 5 minutes**
 - **Clear error messages** for failures
 - **Cross-platform compatibility**
 - **Fallback mechanisms working**
 ## Next Steps (Priority Order)
 1. **Complete Phase 2** - Finish build testing
 2. **Test Built Packages** - Verify they install and run
 3. **Container Testing** - Test install scripts in Docker
 4. **Fork Testing** - Test GitHub Actions in controlled environment
 5. **TestPyPI Release** - Safe production test
 6. **Clean System Testing** - Final validation
 7. **Production Release** - Go live
 ## Estimated Timeline
 - **Phase 2 Completion**: 1-2 hours
 - **Phase 3-4 Testing**: 4-6 hours  
 - **Phase 5-6 Testing**: 4-8 hours
 - **Deployment**: 2-4 hours
 **Total**: 2-3 days for comprehensive testing
 ## Risk Assessment
 ### 🔴 **High Risk**
 - Skipping environment testing
 - Not testing install scripts
 - Releasing without TestPyPI validation
 ### 🟡 **Medium Risk**  
 - Large zipapp file size
 - Dependency compatibility issues
 - Network connectivity problems
 ### 🟢 **Low Risk**
 - Documentation accuracy
 - GitHub workflow syntax
 - Package metadata
 ## Conclusion
 We've built a comprehensive modern distribution system for FSS-Mini-RAG. The infrastructure is solid (5/6 structure tests pass), but we need systematic testing before release.
 **The testing plan is extensive but necessary** - we're moving from a basic pip install to a professional-grade distribution system that needs to work flawlessly for users worldwide.
 **Current Status**: Infrastructure complete, systematic testing in progress.
 **Confidence Level**: High for structure, medium for functionality pending tests.
 **Ready for Release**: Not yet - need 2-3 days of proper testing.
--- a/docs/TROUBLESHOOTING.md
+++ b/docs/TROUBLESHOOTING.md
@ -45,46 +45,11 @@ pip3 install --user -r requirements.txt
 chmod +x install_mini_rag.sh
 # Then run
 ./install_mini_rag.sh
-# Or use proven manual method (100% reliable):
+# Or install manually:
-python3 -m venv .venv
+pip3 install -r requirements.txt
 .venv/bin/python -m pip install -r requirements.txt  # 2-8 minutes
 .venv/bin/python -m pip install .                    # ~1 minute
 source .venv/bin/activate
 python3 -c "import mini_rag; print('✅ Installation successful')"
 ```
 ### ❌ Installation takes too long / times out
 **Problem:** Installation seems stuck or takes forever  
 **Expected Timing:** 2-3 minutes fast internet, 5-10 minutes slow internet  
 **Solutions:**
 1. **Large dependencies are normal:**
   - LanceDB: 36MB (vector database)
   - PyArrow: 43MB (data processing) 
   - PyLance: 44MB (language parsing)
   - Total ~123MB + dependencies
 2. **For agents/CI/CD - run in background:**
   ```bash
   ./install_mini_rag.sh --headless &
   # Monitor with: tail -f install.log
   ```
 3. **Check if installation is actually progressing:**
   ```bash
   # Check pip cache (should be growing)
   du -sh ~/.cache/pip
   # Check if Python packages are installing
   ls -la .venv/lib/python*/site-packages/
   ```
 4. **Slow connection fallback:**
   ```bash
   # Increase pip timeout
   .venv/bin/python -m pip install -r requirements.txt --timeout 1000
   ```
 ---
 ## 🔍 Search & Results Issues
@ -145,7 +110,7 @@ python3 -c "import mini_rag; print('✅ Installation successful')"
 2. **Reduce result limit:**
   ```yaml
   search:
-     default_top_k: 5  # Instead of 10
+     default_limit: 5  # Instead of 10
   ```
 3. **Use faster embedding method:**
@ -178,8 +143,8 @@ python3 -c "import mini_rag; print('✅ Installation successful')"
 2. **Install a model:**
   ```bash
-   ollama pull qwen2.5:3b    # Good balance of speed and quality
+   ollama pull qwen3:0.6b    # Fast, small model
-   # Or: ollama pull qwen3:4b   # Larger but better quality
+   # Or: ollama pull llama3.2  # Larger but better
   ```
 3. **Test connection:**
@ -200,9 +165,9 @@ python3 -c "import mini_rag; print('✅ Installation successful')"
 2. **Try different model:**
   ```bash
-   ollama pull qwen3:1.7b   # Recommended: excellent quality (default priority)
+   ollama pull qwen3:4b     # Recommended: excellent quality
   ollama pull qwen3:1.7b   # Still very good, faster
   ollama pull qwen3:0.6b   # Surprisingly good for CPU-only
   ollama pull qwen3:4b     # Highest quality, slower
   ```
 3. **Use synthesis mode instead of exploration:**
--- a/docs/TUI_GUIDE.md
+++ b/docs/TUI_GUIDE.md
@ -23,9 +23,8 @@ That's it! The TUI will guide you through everything.
 ### User Flow
 1. **Select Project** → Choose directory to search
 2. **Index Project** → Process files for search
-3. **Search Content** → Find what you need quickly
+3. **Search Content** → Find what you need
-4. **Explore Project** → Interactive AI-powered discovery (NEW!)
+4. **Explore Results** → See full context and files
 5. **Configure System** → Customize search behavior
 ## Main Menu Options
@ -93,10 +92,10 @@ That's it! The TUI will guide you through everything.
 - **Full content** - Up to 8 lines of actual code/text
 - **Continuation info** - How many more lines exist
-**Tips You'll Learn**:
+**Advanced Tips Shown**:
- Verbose output with `--verbose` flag for debugging
+- Enhanced search with `./rag-mini-enhanced`
- How search scoring works
+- Verbose output with `--verbose` flag
- Finding the right search terms
+- Context-aware search for related code
 **What You Learn**:
 - Semantic search vs text search (finds concepts, not just words)
@ -107,66 +106,11 @@ That's it! The TUI will guide you through everything.
 **CLI Commands Shown**:
 ```bash
 ./rag-mini search /path/to/project "authentication logic"
-./rag-mini search /path/to/project "user login" --top-k 10
+./rag-mini search /path/to/project "user login" --limit 10
 ./rag-mini-enhanced context /path/to/project "login()"
 ```
-### 4. Explore Project (NEW!)
+### 4. View Status
 **Purpose**: Interactive AI-powered discovery with conversation memory
 **What Makes Explore Different**:
 - **Conversational**: Ask follow-up questions that build on previous answers
 - **AI Reasoning**: Uses thinking mode for deeper analysis and explanations
 - **Educational**: Perfect for understanding unfamiliar codebases
 - **Context Aware**: Remembers what you've already discussed
 **Interactive Process**:
 1. **First Question Guidance**: Clear prompts with example questions
 2. **Starter Suggestions**: Random helpful questions to get you going
 3. **Natural Follow-ups**: Ask "why?", "how?", "show me more" naturally
 4. **Session Memory**: AI remembers your conversation context
 **Explore Mode Features**:
 **Quick Start Options**:
 - **Option 1 - Help**: Show example questions and explore mode capabilities
 - **Option 2 - Status**: Project information and current exploration session
 - **Option 3 - Suggest**: Get a random starter question picked from 7 curated examples
 **Starter Questions** (randomly suggested):
 - "What are the main components of this project?"
 - "How is error handling implemented?"
 - "Show me the authentication and security logic"
 - "What are the key functions I should understand first?"
 - "How does data flow through this system?"
 - "What configuration options are available?"
 - "Show me the most important files to understand"
 **Advanced Usage**:
 - **Deep Questions**: "Why is this function slow?" "How does the security work?"
 - **Code Analysis**: "Explain this algorithm" "What could go wrong here?"
 - **Architecture**: "How do these components interact?" "What's the design pattern?"
 - **Best Practices**: "Is this code following best practices?" "How would you improve this?"
 **What You Learn**:
 - **Conversational AI**: How to have productive technical conversations with AI
 - **Code Understanding**: Deep analysis capabilities beyond simple search
 - **Context Building**: How conversation memory improves over time
 - **Question Techniques**: Effective ways to explore unfamiliar code
 **CLI Commands Shown**:
 ```bash
 ./rag-mini explore /path/to/project    # Start interactive exploration
 ```
 **Perfect For**:
 - Understanding new codebases
 - Code review and analysis
 - Learning from existing projects
 - Documenting complex systems
 - Onboarding new team members
 ### 5. View Status
 **Purpose**: Check system health and project information
@ -195,61 +139,32 @@ That's it! The TUI will guide you through everything.
 ./rag-mini status /path/to/project
 ```
-### 6. Configuration Manager (ENHANCED!)
+### 5. Configuration
-**Purpose**: Interactive configuration with user-friendly options
+**Purpose**: View and understand system settings
-**New Interactive Features**:
+**Configuration Display**:
- **Live Configuration Dashboard** - See current settings with clear status
+- **Current settings** - Chunk size, strategy, file patterns
- **Quick Configuration Options** - Change common settings without YAML editing
+- **File location** - Where config is stored
- **Guided Setup** - Explanations and presets for each option
+- **Setting explanations** - What each option does
- **Validation** - Input checking and helpful error messages
+- **Quick actions** - View or edit config directly
-**Main Configuration Options**:
+**Key Settings Explained**:
 - **chunking.max_size** - How large each searchable piece is
 - **chunking.strategy** - Smart (semantic) vs simple (fixed size)
 - **files.exclude_patterns** - Skip certain files/directories
 - **embedding.preferred_method** - AI model preference
 - **search.default_limit** - How many results to show
-**1. Adjust Chunk Size**:
+**Interactive Options**:
- **Presets**: Small (1000), Medium (2000), Large (3000), or custom
+- **[V]iew config** - See full configuration file
- **Guidance**: Performance vs accuracy explanations
+- **[E]dit path** - Get command to edit configuration
 - **Smart Validation**: Range checking and recommendations
 **2. Toggle Query Expansion**:
 - **Educational Info**: Clear explanation of benefits and requirements  
 - **Easy Toggle**: Simple on/off with confirmation
 - **System Check**: Verifies Ollama availability for AI features
 **3. Configure Search Behavior**:
 - **Result Count**: Adjust default number of search results (1-100)
 - **BM25 Toggle**: Enable/disable keyword matching boost
 - **Similarity Threshold**: Fine-tune match sensitivity (0.0-1.0)
 **4. View/Edit Configuration File**:
 - **Full File Viewer**: Display complete config with syntax highlighting
 - **Editor Instructions**: Commands for nano, vim, VS Code
 - **YAML Help**: Format explanation and editing tips
 **5. Reset to Defaults**:
 - **Safe Reset**: Confirmation before resetting all settings
 - **Clear Explanations**: Shows what defaults will be restored
 - **Backup Reminder**: Suggests saving current config first
 **6. Advanced Settings**:
 - **File Filtering**: Min file size, exclude patterns (view only)
 - **Performance Settings**: Batch sizes, streaming thresholds
 - **LLM Preferences**: Model rankings and selection priorities
 **Key Settings Dashboard**:
 - 📁 **Chunk size**: 2000 characters (with emoji indicators)
 - 🧠 **Chunking strategy**: semantic
 - 🔍 **Search results**: 10 results
 - 📊 **Embedding method**: ollama
 - 🚀 **Query expansion**: enabled/disabled
 - ⚡ **LLM synthesis**: enabled/disabled
 **What You Learn**:
- **Configuration Impact**: How settings affect search quality and speed
+- How configuration affects search quality
- **Interactive YAML**: Easier than manual editing for beginners
+- YAML configuration format
- **Best Practices**: Recommended settings for different project types
+- Which settings to adjust for different projects
- **System Understanding**: How all components work together
+- Where to find advanced options
 **CLI Commands Shown**:
 ```bash
@ -257,13 +172,7 @@ cat /path/to/project/.mini-rag/config.yaml   # View config
 nano /path/to/project/.mini-rag/config.yaml  # Edit config
 ```
-**Perfect For**:
+### 6. CLI Command Reference
 - Beginners who find YAML intimidating
 - Quick adjustments without memorizing syntax
 - Understanding what each setting actually does
 - Safe experimentation with guided validation
 ### 7. CLI Command Reference
 **Purpose**: Complete command reference for transitioning to CLI
--- a/examples/analyze_dependencies.py
+++ b/examples/analyze_dependencies.py
@ -4,14 +4,14 @@ Analyze FSS-Mini-RAG dependencies to determine what's safe to remove.
 """
 import ast
-from collections import defaultdict
+import os
 from pathlib import Path
-
+from collections import defaultdict
 def find_imports_in_file(file_path):
    """Find all imports in a Python file."""
    try:
-        with open(file_path, "r", encoding="utf-8") as f:
+        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
        tree = ast.parse(content)
@ -20,10 +20,10 @@ def find_imports_in_file(file_path):
        for node in ast.walk(tree):
            if isinstance(node, ast.Import):
                for alias in node.names:
-                    imports.add(alias.name.split(".")[0])
+                    imports.add(alias.name.split('.')[0])
            elif isinstance(node, ast.ImportFrom):
                if node.module:
-                    module = node.module.split(".")[0]
+                    module = node.module.split('.')[0]
                    imports.add(module)
        return imports
@ -31,7 +31,6 @@ def find_imports_in_file(file_path):
        print(f"Error analyzing {file_path}: {e}")
        return set()
 def analyze_dependencies():
    """Analyze all dependencies in the project."""
    project_root = Path(__file__).parent
@ -86,13 +85,13 @@ def analyze_dependencies():
    print("\n🛡️ Safety Analysis:")
    # Files imported by __init__.py are definitely needed
-    init_imports = file_imports.get("__init__.py", set())
+    init_imports = file_imports.get('__init__.py', set())
    print(f"   Core modules (imported by __init__.py): {', '.join(init_imports)}")
    # Files not used anywhere might be safe to remove
    unused_files = []
    for module in all_modules:
-        if module not in reverse_deps and module != "__init__":
+        if module not in reverse_deps and module != '__init__':
            unused_files.append(module)
    if unused_files:
@ -100,14 +99,11 @@ def analyze_dependencies():
        print("   ❗ Verify these aren't used by CLI or external scripts!")
    # Check CLI usage
-    cli_files = ["cli.py", "enhanced_cli.py"]
+    cli_files = ['cli.py', 'enhanced_cli.py']
    for cli_file in cli_files:
        if cli_file in file_imports:
            cli_imports = file_imports[cli_file]
-            print(
+            print(f"   📋 {cli_file} imports: {', '.join([imp for imp in cli_imports if imp in all_modules])}")
                f"   📋 {cli_file} imports: {', '.join([imp for imp in cli_imports if imp in all_modules])}"
            )
 if __name__ == "__main__":
    analyze_dependencies()
--- a/examples/basic_usage.py
+++ b/examples/basic_usage.py
@ -5,9 +5,7 @@ Shows how to index a project and search it programmatically.
 """
 from pathlib import Path
-
+from mini_rag import ProjectIndexer, CodeSearcher, CodeEmbedder
 from mini_rag import CodeEmbedder, CodeSearcher, ProjectIndexer
 def main():
    # Example project path - change this to your project
@ -46,26 +44,25 @@ def main():
        "embedding system", 
        "search implementation",
        "file watcher",
-        "error handling",
+        "error handling"
    ]
    print("\n4. Example searches:")
    for query in queries:
        print(f"\n   Query: '{query}'")
-        results = searcher.search(query, top_k=3)
+        results = searcher.search(query, limit=3)
        if results:
            for i, result in enumerate(results, 1):
                print(f"      {i}. {result.file_path.name} (score: {result.score:.3f})")
                print(f"         Type: {result.chunk_type}")
                # Show first 60 characters of content
-                content_preview = result.content.replace("\n", " ")[:60]
+                content_preview = result.content.replace('\n', ' ')[:60]
                print(f"         Preview: {content_preview}...")
        else:
            print("      No results found")
    print("\n=== Example Complete ===")
 if __name__ == "__main__":
    main()
--- a/examples/config-beginner.yaml
+++ b/examples/config-beginner.yaml
@ -41,13 +41,12 @@ embedding:
 # 🔍 Search behavior  
 search:
-  default_top_k: 10       # Show 10 results (good starting point)
+  default_limit: 10       # Show 10 results (good starting point)
  enable_bm25: true       # Find exact word matches too
  similarity_threshold: 0.1  # Pretty permissive (shows more results)
  expand_queries: false   # Keep it simple for now
 # 🤖 AI explanations (optional but helpful)
 # 💡 WANT DIFFERENT LLM? See examples/config-llm-providers.yaml for OpenAI, Claude, etc.
 llm:
  synthesis_model: auto         # Pick best available model
  enable_synthesis: false       # Turn on manually with --synthesize
--- a/examples/config-fast.yaml
+++ b/examples/config-fast.yaml
@ -62,7 +62,7 @@ embedding:
 # 🔍 Search optimized for speed
 search:
-  default_top_k: 5        # Fewer results = faster display
+  default_limit: 5        # Fewer results = faster display
  enable_bm25: false      # Skip keyword matching for speed
  similarity_threshold: 0.2  # Higher threshold = fewer results to process
  expand_queries: false   # No query expansion (much faster)
--- a/examples/config-llm-providers.yaml
+++ b/examples/config-llm-providers.yaml
@ -1,233 +0,0 @@
 # 🌐 LLM PROVIDER ALTERNATIVES - OpenRouter, LM Studio, OpenAI & More
 # Educational guide showing how to configure different LLM providers
 # Copy sections you need to your main config.yaml
 #═════════════════════════════════════════════════════════════════════════════════
 # 🎯 QUICK PROVIDER SELECTION GUIDE:
 #
 # 🏠 LOCAL (Best Privacy, No Internet Needed):
 #   - Ollama: Great quality, easy setup, free
 #   - LM Studio: User-friendly GUI, works with many models
 #
 # ☁️ CLOUD (Powerful Models, Requires API Keys):
 #   - OpenRouter: Access to many models with one API
 #   - OpenAI: High quality, reliable, but more expensive
 #   - Anthropic: Excellent for code analysis
 #
 # 💰 BUDGET FRIENDLY:
 #   - OpenRouter (Qwen, Llama models): $0.10-0.50 per million tokens
 #   - Local Ollama/LM Studio: Completely free
 #
 # 🚀 PERFORMANCE:
 #   - Local: Limited by your hardware
 #   - Cloud: Fast and powerful, costs per use
 #═════════════════════════════════════════════════════════════════════════════════
 # Standard FSS-Mini-RAG settings (copy these to any config)
 chunking:
  max_size: 2000
  min_size: 150
  strategy: semantic
 streaming:
  enabled: true
  threshold_bytes: 1048576
 files:
  min_file_size: 50
  exclude_patterns:
    - "node_modules/**"
    - ".git/**"
    - "__pycache__/**"
    - "*.pyc"
    - ".venv/**"
    - "build/**"
    - "dist/**"
  include_patterns:
    - "**/*"
 embedding:
  preferred_method: ollama    # Use Ollama for embeddings (works with all providers below)
  ollama_model: nomic-embed-text
  ollama_host: localhost:11434
  batch_size: 32
 search:
  default_top_k: 10
  enable_bm25: true
  similarity_threshold: 0.1
  expand_queries: false
 #═════════════════════════════════════════════════════════════════════════════════
 # 🤖 LLM PROVIDER CONFIGURATIONS
 #═════════════════════════════════════════════════════════════════════════════════
 # 🏠 OPTION 1: OLLAMA (LOCAL) - Default and Recommended
 # ✅ Pros: Free, private, no API keys, good quality
 # ❌ Cons: Uses your computer's resources, limited by hardware
 llm:
  provider: ollama                    # Use local Ollama
  ollama_host: localhost:11434        # Default Ollama location
  synthesis_model: qwen3:1.7b         # Good all-around model
  # alternatives: qwen3:0.6b (faster), qwen2.5:3b (balanced), qwen3:4b (quality)
  expansion_model: qwen3:1.7b
  enable_synthesis: false
  synthesis_temperature: 0.3
  cpu_optimized: true
  enable_thinking: true
  max_expansion_terms: 8
 # 🖥️ OPTION 2: LM STUDIO (LOCAL) - User-Friendly Alternative
 # ✅ Pros: Easy GUI, drag-drop model installation, compatible with Ollama
 # ❌ Cons: Another app to manage, similar hardware limitations
 # 
 # SETUP STEPS:
 # 1. Download LM Studio from lmstudio.ai
 # 2. Install a model (try "microsoft/DialoGPT-medium" or "TheBloke/Llama-2-7B-Chat-GGML")
 # 3. Start local server in LM Studio (usually port 1234)
 # 4. Use this config:
 #
 # llm:
 #   provider: openai                   # LM Studio uses OpenAI-compatible API
 #   api_base: http://localhost:1234/v1 # LM Studio default port
 #   api_key: "not-needed"             # LM Studio doesn't require real API key
 #   synthesis_model: "any"            # Use whatever model you loaded in LM Studio
 #   expansion_model: "any"
 #   enable_synthesis: false
 #   synthesis_temperature: 0.3
 #   cpu_optimized: true
 #   enable_thinking: true
 #   max_expansion_terms: 8
 # ☁️ OPTION 3: OPENROUTER (CLOUD) - Many Models, One API
 # ✅ Pros: Access to many models, good prices, no local setup
 # ❌ Cons: Requires internet, costs money, less private
 #
 # SETUP STEPS:
 # 1. Sign up at openrouter.ai
 # 2. Get API key from dashboard
 # 3. Add credits to account ($5-10 goes a long way)
 # 4. Use this config:
 #
 # llm:
 #   provider: openai                   # OpenRouter uses OpenAI-compatible API
 #   api_base: https://openrouter.ai/api/v1
 #   api_key: "your-openrouter-api-key-here"  # Replace with your actual key
 #   synthesis_model: "meta-llama/llama-3.1-8b-instruct:free"  # Free tier model
 #   # alternatives: "openai/gpt-4o-mini" ($0.15/M), "anthropic/claude-3-haiku" ($0.25/M)
 #   expansion_model: "meta-llama/llama-3.1-8b-instruct:free"
 #   enable_synthesis: false
 #   synthesis_temperature: 0.3
 #   cpu_optimized: false              # Cloud models don't need CPU optimization
 #   enable_thinking: true
 #   max_expansion_terms: 8
 #   timeout: 30                       # Longer timeout for internet requests
 # 🏢 OPTION 4: OPENAI (CLOUD) - Premium Quality
 # ✅ Pros: Excellent quality, very reliable, fast
 # ❌ Cons: More expensive, requires OpenAI account
 #
 # SETUP STEPS:
 # 1. Sign up at platform.openai.com
 # 2. Add payment method (pay-per-use)
 # 3. Create API key in dashboard
 # 4. Use this config:
 #
 # llm:
 #   provider: openai
 #   api_key: "your-openai-api-key-here"      # Replace with your actual key
 #   synthesis_model: "gpt-4o-mini"           # Affordable option (~$0.15/M tokens)
 #   # alternatives: "gpt-4o" (premium, ~$2.50/M), "gpt-3.5-turbo" (budget, ~$0.50/M)
 #   expansion_model: "gpt-4o-mini"
 #   enable_synthesis: false
 #   synthesis_temperature: 0.3
 #   cpu_optimized: false
 #   enable_thinking: true
 #   max_expansion_terms: 8
 #   timeout: 30
 # 🧠 OPTION 5: ANTHROPIC CLAUDE (CLOUD) - Excellent for Code
 # ✅ Pros: Great at code analysis, very thoughtful responses
 # ❌ Cons: Premium pricing, separate API account needed
 #
 # SETUP STEPS:
 # 1. Sign up at console.anthropic.com
 # 2. Get API key and add credits
 # 3. Use this config:
 #
 # llm:
 #   provider: anthropic
 #   api_key: "your-anthropic-api-key-here"   # Replace with your actual key
 #   synthesis_model: "claude-3-haiku-20240307"  # Most affordable option
 #   # alternatives: "claude-3-sonnet-20240229" (balanced), "claude-3-opus-20240229" (premium)
 #   expansion_model: "claude-3-haiku-20240307"
 #   enable_synthesis: false
 #   synthesis_temperature: 0.3
 #   cpu_optimized: false
 #   enable_thinking: true
 #   max_expansion_terms: 8
 #   timeout: 30
 #═════════════════════════════════════════════════════════════════════════════════
 # 🧪 TESTING YOUR CONFIGURATION
 #═════════════════════════════════════════════════════════════════════════════════
 #
 # After setting up any provider, test with these commands:
 #
 # 1. Test basic search (no LLM needed):
 #    ./rag-mini search /path/to/project "test query"
 #
 # 2. Test LLM synthesis:
 #    ./rag-mini search /path/to/project "test query" --synthesize
 #
 # 3. Test query expansion:
 #    Enable expand_queries: true in search section and try:
 #    ./rag-mini search /path/to/project "auth"
 #
 # 4. Test thinking mode:
 #    ./rag-mini explore /path/to/project
 #    Then ask: "explain the authentication system"
 #
 #═════════════════════════════════════════════════════════════════════════════════
 # 💡 TROUBLESHOOTING
 #═════════════════════════════════════════════════════════════════════════════════
 #
 # ❌ "Connection refused" or "API error":
 #    - Local: Make sure Ollama/LM Studio is running
 #    - Cloud: Check API key and internet connection
 #
 # ❌ "Model not found":
 #    - Local: Install model with `ollama pull model-name`
 #    - Cloud: Check model name matches provider's API docs
 #
 # ❌ "Token limit exceeded" or expensive bills:
 #    - Use cheaper models like gpt-4o-mini or claude-haiku
 #    - Enable shorter contexts with max_size: 1500
 #
 # ❌ Slow responses:
 #    - Local: Try smaller models (qwen3:0.6b)
 #    - Cloud: Increase timeout or try different provider
 #
 # ❌ Poor quality results:
 #    - Try higher-quality models
 #    - Adjust synthesis_temperature (0.1 for factual, 0.5 for creative)
 #    - Enable expand_queries for better search coverage
 #
 #═════════════════════════════════════════════════════════════════════════════════
 # 📚 LEARN MORE
 #═════════════════════════════════════════════════════════════════════════════════
 # 
 # Provider Documentation:
 # - Ollama: https://ollama.ai/library (model catalog)
 # - LM Studio: https://lmstudio.ai/docs (getting started)
 # - OpenRouter: https://openrouter.ai/docs (API reference)
 # - OpenAI: https://platform.openai.com/docs (API docs)
 # - Anthropic: https://docs.anthropic.com/claude/reference (Claude API)
 #
 # Model Recommendations:
 # - Code Analysis: claude-3-sonnet, gpt-4o, llama3.1:8b
 # - Fast Responses: gpt-4o-mini, claude-haiku, qwen3:0.6b  
 # - Budget Friendly: OpenRouter free tier, local Ollama
 # - Best Privacy: Local Ollama or LM Studio only
 #
 #═════════════════════════════════════════════════════════════════════════════════
--- a/examples/config-quality.yaml
+++ b/examples/config-quality.yaml
@ -44,7 +44,7 @@ embedding:
 # 🔍 Search optimized for comprehensive results
 search:
-  default_top_k: 15       # More results to choose from
+  default_limit: 15       # More results to choose from
  enable_bm25: true       # Use both semantic and keyword matching
  similarity_threshold: 0.05  # Very permissive (show more possibilities)
  expand_queries: true    # Automatic query expansion for better recall
@ -102,7 +102,7 @@ llm:
 # For even better results, try these model combinations:
 # • ollama pull nomic-embed-text:latest  (best embeddings)
 # • ollama pull qwen3:1.7b              (good general model)
-# • ollama pull qwen3:4b                (excellent for analysis)
+# • ollama pull llama3.2                (excellent for analysis)
 # 
 # Or adjust these settings for your specific needs:
 # • similarity_threshold: 0.3   (more selective results)
--- a/examples/config.yaml
+++ b/examples/config.yaml
@ -86,7 +86,7 @@ embedding:
 #═════════════════════════════════════════════════════════════════════════════════
 search:
-  default_top_k: 10                # How many search results to show by default
+  default_limit: 10                # How many search results to show by default
                                  # 💡 MORE RESULTS: 15-20 | FASTER SEARCH: 5-8
  enable_bm25: true               # Also use keyword matching (like Google search)
@ -112,7 +112,7 @@ llm:
  synthesis_model: auto           # Which AI model to use for explanations
                                  # 'auto': Picks best available model - RECOMMENDED
                                  # 'qwen3:0.6b': Ultra-fast, good for CPU-only computers
-                                  # 'qwen3:4b': Slower but more detailed explanations
+                                  # 'llama3.2': Slower but more detailed explanations
  expansion_model: auto           # Model for query expansion (usually same as synthesis)
--- a/examples/smart_config_suggestions.py
+++ b/examples/smart_config_suggestions.py
@ -5,10 +5,9 @@ Analyzes the indexed data to suggest optimal settings.
 """
 import json
 import sys
 from collections import Counter
 from pathlib import Path
-
+from collections import defaultdict, Counter
 import sys
 def analyze_project_patterns(manifest_path: Path):
    """Analyze project patterns and suggest optimizations."""
@ -16,7 +15,7 @@ def analyze_project_patterns(manifest_path: Path):
    with open(manifest_path) as f:
        manifest = json.load(f)
-    files = manifest.get("files", {})
+    files = manifest.get('files', {})
    print("🔍 FSS-Mini-RAG Smart Tuning Analysis")
    print("=" * 50)
@ -28,11 +27,11 @@ def analyze_project_patterns(manifest_path: Path):
    small_files = []
    for filepath, info in files.items():
-        lang = info.get("language", "unknown")
+        lang = info.get('language', 'unknown')
        languages[lang] += 1
-        size = info.get("size", 0)
+        size = info.get('size', 0)
-        chunks = info.get("chunks", 1)
+        chunks = info.get('chunks', 1)
        chunk_efficiency.append(chunks / max(1, size / 1000))  # chunks per KB
@ -43,70 +42,65 @@ def analyze_project_patterns(manifest_path: Path):
    # Analysis results
    total_files = len(files)
-    total_chunks = sum(info.get("chunks", 1) for info in files.values())
+    total_chunks = sum(info.get('chunks', 1) for info in files.values())
    avg_chunks_per_file = total_chunks / max(1, total_files)
-    print("📊 Current Stats:")
+    print(f"📊 Current Stats:")
    print(f"   Files: {total_files}")
    print(f"   Chunks: {total_chunks}")
    print(f"   Avg chunks/file: {avg_chunks_per_file:.1f}")
-    print("\n🗂️ Language Distribution:")
+    print(f"\n🗂️ Language Distribution:")
    for lang, count in languages.most_common(10):
        pct = 100 * count / total_files
        print(f"   {lang}: {count} files ({pct:.1f}%)")
-    print("\n💡 Smart Optimization Suggestions:")
+    print(f"\n💡 Smart Optimization Suggestions:")
    # Suggestion 1: Language-specific chunking
-    if languages["python"] > 10:
+    if languages['python'] > 10:
-        print("✨ Python Optimization:")
+        print(f"✨ Python Optimization:")
-        print(
+        print(f"   - Use function-level chunking (detected {languages['python']} Python files)")
-            f"   - Use function-level chunking (detected {languages['python']} Python files)"
+        print(f"   - Increase chunk size to 3000 chars for Python (better context)")
        )
        print("   - Increase chunk size to 3000 chars for Python (better context)")
-    if languages["markdown"] > 5:
+    if languages['markdown'] > 5:
-        print("✨ Markdown Optimization:")
+        print(f"✨ Markdown Optimization:")
        print(f"   - Use header-based chunking (detected {languages['markdown']} MD files)")
-        print("   - Keep sections together for better search relevance")
+        print(f"   - Keep sections together for better search relevance")
-    if languages["json"] > 20:
+    if languages['json'] > 20:
-        print("✨ JSON Optimization:")
+        print(f"✨ JSON Optimization:")
        print(f"   - Consider object-level chunking (detected {languages['json']} JSON files)")
-        print("   - Might want to exclude large config JSONs")
+        print(f"   - Might want to exclude large config JSONs")
    # Suggestion 2: File size optimization
    if large_files:
-        print("\n📈 Large File Optimization:")
+        print(f"\n📈 Large File Optimization:")
        print(f"   Found {len(large_files)} files >10KB:")
-        for filepath, size, chunks in sorted(large_files, key=lambda x: x[1], reverse=True)[
+        for filepath, size, chunks in sorted(large_files, key=lambda x: x[1], reverse=True)[:3]:
            :3
        ]:
            kb = size / 1024
            print(f"   - {filepath}: {kb:.1f}KB → {chunks} chunks")
        if len(large_files) > 5:
-            print("   💡 Consider streaming threshold: 5KB (current: 1MB)")
+            print(f"   💡 Consider streaming threshold: 5KB (current: 1MB)")
    if small_files and len(small_files) > total_files * 0.3:
-        print("\n📉 Small File Optimization:")
+        print(f"\n📉 Small File Optimization:")
        print(f"   {len(small_files)} files <500B might not need chunking")
-        print("   💡 Consider: combine small files or skip tiny ones")
+        print(f"   💡 Consider: combine small files or skip tiny ones")
    # Suggestion 3: Search optimization
    avg_efficiency = sum(chunk_efficiency) / len(chunk_efficiency)
-    print("\n🔍 Search Optimization:")
+    print(f"\n🔍 Search Optimization:")
    if avg_efficiency < 0.5:
-        print("   💡 Chunks are large relative to files - consider smaller chunks")
+        print(f"   💡 Chunks are large relative to files - consider smaller chunks")
        print(f"   💡 Current: {avg_chunks_per_file:.1f} chunks/file, try 2-3 chunks/file")
    elif avg_efficiency > 2:
-        print("   💡 Many small chunks - consider larger chunk size")
+        print(f"   💡 Many small chunks - consider larger chunk size")
-        print("   💡 Reduce chunk overhead with 2000-4000 char chunks")
+        print(f"   💡 Reduce chunk overhead with 2000-4000 char chunks")
    # Suggestion 4: Smart defaults
-    print("\n⚙️ Recommended Config Updates:")
+    print(f"\n⚙️ Recommended Config Updates:")
-    print(
+    print(f"""{{
        """{{
  "chunking": {{
    "max_size": {3000 if languages['python'] > languages['markdown'] else 2000},
    "min_size": 200,
@ -121,9 +115,7 @@ def analyze_project_patterns(manifest_path: Path):
    "skip_small_files": {500 if len(small_files) > total_files * 0.3 else 0},
    "streaming_threshold_kb": {5 if len(large_files) > 5 else 1024}
  }}
-}}"""
+}}""")
    )
 if __name__ == "__main__":
    if len(sys.argv) != 2:
--- a/install.ps1
+++ b/install.ps1
@ -1,320 +0,0 @@
 # FSS-Mini-RAG Installation Script for Windows PowerShell
 # Usage: iwr https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.ps1 -UseBasicParsing | iex
 # Requires -Version 5.1
 param(
    [switch]$Force = $false,
    [switch]$Quiet = $false
 )
 # Configuration
 $PackageName = "fss-mini-rag"
 $CommandName = "rag-mini"
 $ErrorActionPreference = "Stop"
 # Colors for output
 $Red = [System.ConsoleColor]::Red
 $Green = [System.ConsoleColor]::Green
 $Yellow = [System.ConsoleColor]::Yellow
 $Blue = [System.ConsoleColor]::Blue
 $Cyan = [System.ConsoleColor]::Cyan
 function Write-ColoredOutput {
    param(
        [string]$Message,
        [System.ConsoleColor]$Color = [System.ConsoleColor]::White,
        [string]$Prefix = ""
    )
    if (-not $Quiet) {
        $originalColor = $Host.UI.RawUI.ForegroundColor
        $Host.UI.RawUI.ForegroundColor = $Color
        Write-Host "$Prefix$Message"
        $Host.UI.RawUI.ForegroundColor = $originalColor
    }
 }
 function Write-Header {
    if ($Quiet) { return }
    Write-ColoredOutput "████████╗██╗   ██╗██████╗ " -Color $Cyan
    Write-ColoredOutput "██╔══██║██║   ██║██╔══██╗" -Color $Cyan
    Write-ColoredOutput "██████╔╝██║   ██║██████╔╝" -Color $Cyan
    Write-ColoredOutput "██╔══██╗██║   ██║██╔══██╗" -Color $Cyan
    Write-ColoredOutput "██║  ██║╚██████╔╝██║  ██║" -Color $Cyan
    Write-ColoredOutput "╚═╝  ╚═╝ ╚═════╝ ╚═╝  ╚═╝" -Color $Cyan
    Write-Host ""
    Write-ColoredOutput "FSS-Mini-RAG Installation Script" -Color $Blue
    Write-ColoredOutput "Educational RAG that actually works!" -Color $Yellow
    Write-Host ""
 }
 function Write-Log {
    param([string]$Message)
    Write-ColoredOutput $Message -Color $Green -Prefix "[INFO] "
 }
 function Write-Warning {
    param([string]$Message)
    Write-ColoredOutput $Message -Color $Yellow -Prefix "[WARN] "
 }
 function Write-Error {
    param([string]$Message)
    Write-ColoredOutput $Message -Color $Red -Prefix "[ERROR] "
    exit 1
 }
 function Test-SystemRequirements {
    Write-Log "Checking system requirements..."
    # Check PowerShell version
    $psVersion = $PSVersionTable.PSVersion
    if ($psVersion.Major -lt 5) {
        Write-Error "PowerShell 5.1 or later is required. Found version: $($psVersion.ToString())"
    }
    Write-Log "PowerShell $($psVersion.ToString()) detected ✓"
    # Check if Python 3.8+ is available
    try {
        $pythonPath = (Get-Command python -ErrorAction SilentlyContinue).Source
        if (-not $pythonPath) {
            $pythonPath = (Get-Command python3 -ErrorAction SilentlyContinue).Source
        }
        if (-not $pythonPath) {
            Write-Error "Python 3 is required but not found. Please install Python 3.8 or later from python.org"
        }
        # Check Python version
        $pythonVersionOutput = & python -c "import sys; print('.'.join(map(str, sys.version_info[:3])))" 2>$null
        if (-not $pythonVersionOutput) {
            $pythonVersionOutput = & python3 -c "import sys; print('.'.join(map(str, sys.version_info[:3])))" 2>$null
        }
        if (-not $pythonVersionOutput) {
            Write-Error "Unable to determine Python version"
        }
        # Parse version and check if >= 3.8
        $versionParts = $pythonVersionOutput.Split('.')
        $majorVersion = [int]$versionParts[0]
        $minorVersion = [int]$versionParts[1]
        if ($majorVersion -lt 3 -or ($majorVersion -eq 3 -and $minorVersion -lt 8)) {
            Write-Error "Python $pythonVersionOutput detected, but Python 3.8+ is required"
        }
        Write-Log "Python $pythonVersionOutput detected ✓"
        # Store python command for later use
        $script:PythonCommand = if (Get-Command python -ErrorAction SilentlyContinue) { "python" } else { "python3" }
    } catch {
        Write-Error "Failed to check Python installation: $($_.Exception.Message)"
    }
 }
 function Install-UV {
    if (Get-Command uv -ErrorAction SilentlyContinue) {
        Write-Log "uv is already installed ✓"
        return $true
    }
    Write-Log "Installing uv (fast Python package manager)..."
    try {
        # Install uv using the official Windows installer
        $uvInstaller = Invoke-WebRequest -Uri "https://astral.sh/uv/install.ps1" -UseBasicParsing
        Invoke-Expression $uvInstaller.Content
        # Refresh environment to pick up new PATH
        $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
        if (Get-Command uv -ErrorAction SilentlyContinue) {
            Write-Log "uv installed successfully ✓"
            return $true
        } else {
            Write-Warning "uv installation may not be in PATH. Falling back to pip method."
            return $false
        }
    } catch {
        Write-Warning "uv installation failed: $($_.Exception.Message). Falling back to pip method."
        return $false
    }
 }
 function Install-WithUV {
    Write-Log "Installing $PackageName with uv..."
    try {
        & uv tool install $PackageName
        if ($LASTEXITCODE -eq 0) {
            Write-Log "$PackageName installed successfully with uv ✓"
            return $true
        } else {
            Write-Warning "uv installation failed. Falling back to pip method."
            return $false
        }
    } catch {
        Write-Warning "uv installation failed: $($_.Exception.Message). Falling back to pip method."
        return $false
    }
 }
 function Install-WithPipx {
    # Check if pipx is available
    if (-not (Get-Command pipx -ErrorAction SilentlyContinue)) {
        Write-Log "Installing pipx..."
        try {
            & $script:PythonCommand -m pip install --user pipx
            & $script:PythonCommand -m pipx ensurepath
            # Refresh PATH
            $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
        } catch {
            Write-Warning "Failed to install pipx: $($_.Exception.Message). Falling back to pip method."
            return $false
        }
    }
    if (Get-Command pipx -ErrorAction SilentlyContinue) {
        Write-Log "Installing $PackageName with pipx..."
        try {
            & pipx install $PackageName
            if ($LASTEXITCODE -eq 0) {
                Write-Log "$PackageName installed successfully with pipx ✓"
                return $true
            } else {
                Write-Warning "pipx installation failed. Falling back to pip method."
                return $false
            }
        } catch {
            Write-Warning "pipx installation failed: $($_.Exception.Message). Falling back to pip method."
            return $false
        }
    } else {
        Write-Warning "pipx not available. Falling back to pip method."
        return $false
    }
 }
 function Install-WithPip {
    Write-Log "Installing $PackageName with pip..."
    try {
        & $script:PythonCommand -m pip install --user $PackageName
        if ($LASTEXITCODE -eq 0) {
            Write-Log "$PackageName installed successfully with pip --user ✓"
            # Add Scripts directory to PATH if not already there
            $scriptsPath = & $script:PythonCommand -c "import site; print(site.getusersitepackages().replace('site-packages', 'Scripts'))"
            $currentPath = $env:Path
            if ($currentPath -notlike "*$scriptsPath*") {
                Write-Warning "Adding $scriptsPath to PATH..."
                $newPath = "$scriptsPath;$currentPath"
                [System.Environment]::SetEnvironmentVariable("Path", $newPath, "User")
                $env:Path = $newPath
            }
            return $true
        } else {
            Write-Error "Failed to install $PackageName with pip."
        }
    } catch {
        Write-Error "Failed to install $PackageName with pip: $($_.Exception.Message)"
    }
 }
 function Test-Installation {
    Write-Log "Verifying installation..."
    # Refresh PATH
    $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
    # Check if command is available
    if (Get-Command $CommandName -ErrorAction SilentlyContinue) {
        Write-Log "$CommandName command is available ✓"
        # Test the command
        try {
            & $CommandName --help > $null 2>&1
            if ($LASTEXITCODE -eq 0) {
                Write-Log "Installation verified successfully! ✅"
                return $true
            } else {
                Write-Warning "Command exists but may have issues."
                return $false
            }
        } catch {
            Write-Warning "Command exists but may have issues."
            return $false
        }
    } else {
        Write-Warning "$CommandName command not found in PATH."
        Write-Warning "You may need to restart your PowerShell session or reboot."
        return $false
    }
 }
 function Write-Usage {
    if ($Quiet) { return }
    Write-Host ""
    Write-ColoredOutput "🎉 Installation complete!" -Color $Green
    Write-Host ""
    Write-ColoredOutput "Quick Start:" -Color $Blue
    Write-ColoredOutput "  # Initialize your project" -Color $Cyan
    Write-Host "  $CommandName init"
    Write-Host ""
    Write-ColoredOutput "  # Search your codebase" -Color $Cyan
    Write-Host "  $CommandName search `"authentication logic`""
    Write-Host ""
    Write-ColoredOutput "  # Get help" -Color $Cyan
    Write-Host "  $CommandName --help"
    Write-Host ""
    Write-ColoredOutput "Documentation: " -Color $Blue -NoNewline
    Write-Host "https://github.com/FSSCoding/Fss-Mini-Rag"
    Write-Host ""
    if (-not (Get-Command $CommandName -ErrorAction SilentlyContinue)) {
        Write-ColoredOutput "Note: If the command is not found, restart PowerShell or reboot Windows." -Color $Yellow
        Write-Host ""
    }
 }
 # Main execution
 function Main {
    Write-Header
    # Check system requirements
    Test-SystemRequirements
    # Try installation methods in order of preference
    $installationMethod = ""
    if ((Install-UV) -and (Install-WithUV)) {
        $installationMethod = "uv ✨"
    } elseif (Install-WithPipx) {
        $installationMethod = "pipx 📦"
    } else {
        Install-WithPip
        $installationMethod = "pip 🐍"
    }
    Write-Log "Installation method: $installationMethod"
    # Verify installation
    if (Test-Installation) {
        Write-Usage
    } else {
        Write-Warning "Installation completed but verification failed. The tool may still work after restarting PowerShell."
        Write-Usage
    }
 }
 # Run if not being dot-sourced
 if ($MyInvocation.InvocationName -ne '.') {
    Main
 }
--- a/install.sh
+++ b/install.sh
@ -1,238 +0,0 @@
 #!/usr/bin/env bash
 # FSS-Mini-RAG Installation Script for Linux/macOS
 # Usage: curl -fsSL https://raw.githubusercontent.com/fsscoding/fss-mini-rag/main/install.sh | bash
 set -euo pipefail
 # Colors for output
 RED='\033[0;31m'
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 BLUE='\033[0;34m'
 CYAN='\033[0;36m'
 NC='\033[0m' # No Color
 # Configuration
 PACKAGE_NAME="fss-mini-rag"
 COMMAND_NAME="rag-mini"
 print_header() {
    echo -e "${CYAN}"
    echo "████████╗██╗   ██╗██████╗ "
    echo "██╔══██║██║   ██║██╔══██╗"
    echo "██████╔╝██║   ██║██████╔╝"
    echo "██╔══██╗██║   ██║██╔══██╗"
    echo "██║  ██║╚██████╔╝██║  ██║"
    echo "╚═╝  ╚═╝ ╚═════╝ ╚═╝  ╚═╝"
    echo -e "${NC}"
    echo -e "${BLUE}FSS-Mini-RAG Installation Script${NC}"
    echo -e "${YELLOW}Educational RAG that actually works!${NC}"
    echo
 }
 log() {
    echo -e "${GREEN}[INFO]${NC} $1"
 }
 warn() {
    echo -e "${YELLOW}[WARN]${NC} $1"
 }
 error() {
    echo -e "${RED}[ERROR]${NC} $1"
    exit 1
 }
 check_system() {
    log "Checking system requirements..."
    # Check if we're on a supported platform
    case "$(uname -s)" in
        Darwin*) PLATFORM="macOS" ;;
        Linux*)  PLATFORM="Linux" ;;
        *) error "Unsupported platform: $(uname -s). This script supports Linux and macOS only." ;;
    esac
    log "Platform: $PLATFORM"
    # Check if Python 3.8+ is available
    if ! command -v python3 &> /dev/null; then
        error "Python 3 is required but not installed. Please install Python 3.8 or later."
    fi
    # Check Python version
    python_version=$(python3 -c "import sys; print('.'.join(map(str, sys.version_info[:2])))")
    required_version="3.8"
    if ! python3 -c "import sys; exit(0 if sys.version_info >= (3,8) else 1)" 2>/dev/null; then
        error "Python ${python_version} detected, but Python ${required_version}+ is required."
    fi
    log "Python ${python_version} detected ✓"
 }
 install_uv() {
    if command -v uv &> /dev/null; then
        log "uv is already installed ✓"
        return
    fi
    log "Installing uv (fast Python package manager)..."
    # Install uv using the official installer
    if command -v curl &> /dev/null; then
        curl -LsSf https://astral.sh/uv/install.sh | sh
    elif command -v wget &> /dev/null; then
        wget -qO- https://astral.sh/uv/install.sh | sh
    else
        warn "Neither curl nor wget available. Falling back to pip installation method."
        return 1
    fi
    # Add uv to PATH for current session
    export PATH="$HOME/.local/bin:$PATH"
    if command -v uv &> /dev/null; then
        log "uv installed successfully ✓"
        return 0
    else
        warn "uv installation may not be in PATH. Falling back to pip method."
        return 1
    fi
 }
 install_with_uv() {
    log "Installing ${PACKAGE_NAME} with uv..."
    # Install using uv tool install
    if uv tool install "$PACKAGE_NAME"; then
        log "${PACKAGE_NAME} installed successfully with uv ✓"
        return 0
    else
        warn "uv installation failed. Falling back to pip method."
        return 1
    fi
 }
 install_with_pipx() {
    if ! command -v pipx &> /dev/null; then
        log "Installing pipx..."
        python3 -m pip install --user pipx
        python3 -m pipx ensurepath
        # Add pipx to PATH for current session
        export PATH="$HOME/.local/bin:$PATH"
    fi
    if command -v pipx &> /dev/null; then
        log "Installing ${PACKAGE_NAME} with pipx..."
        if pipx install "$PACKAGE_NAME"; then
            log "${PACKAGE_NAME} installed successfully with pipx ✓"
            return 0
        else
            warn "pipx installation failed. Falling back to pip method."
            return 1
        fi
    else
        warn "pipx not available. Falling back to pip method."
        return 1
    fi
 }
 install_with_pip() {
    log "Installing ${PACKAGE_NAME} with pip (system-wide)..."
    # Try pip install with --user first
    if python3 -m pip install --user "$PACKAGE_NAME"; then
        log "${PACKAGE_NAME} installed successfully with pip --user ✓"
        # Ensure ~/.local/bin is in PATH
        local_bin="$HOME/.local/bin"
        if [[ ":$PATH:" != *":$local_bin:"* ]]; then
            warn "Adding $local_bin to PATH..."
            echo 'export PATH="$HOME/.local/bin:$PATH"' >> "$HOME/.bashrc"
            if [ -f "$HOME/.zshrc" ]; then
                echo 'export PATH="$HOME/.local/bin:$PATH"' >> "$HOME/.zshrc"
            fi
            export PATH="$local_bin:$PATH"
        fi
        return 0
    else
        error "Failed to install ${PACKAGE_NAME} with pip. Please check your Python setup."
    fi
 }
 verify_installation() {
    log "Verifying installation..."
    # Check if command is available
    if command -v "$COMMAND_NAME" &> /dev/null; then
        log "${COMMAND_NAME} command is available ✓"
        # Test the command
        if $COMMAND_NAME --help &> /dev/null; then
            log "Installation verified successfully! ✅"
            return 0
        else
            warn "Command exists but may have issues."
            return 1
        fi
    else
        warn "${COMMAND_NAME} command not found in PATH."
        warn "You may need to restart your terminal or run: source ~/.bashrc"
        return 1
    fi
 }
 print_usage() {
    echo
    echo -e "${GREEN}🎉 Installation complete!${NC}"
    echo
    echo -e "${BLUE}Quick Start:${NC}"
    echo -e "  ${CYAN}# Initialize your project${NC}"
    echo -e "  ${COMMAND_NAME} init"
    echo
    echo -e "  ${CYAN}# Search your codebase${NC}"
    echo -e "  ${COMMAND_NAME} search \"authentication logic\""
    echo
    echo -e "  ${CYAN}# Get help${NC}"
    echo -e "  ${COMMAND_NAME} --help"
    echo
    echo -e "${BLUE}Documentation:${NC} https://github.com/FSSCoding/Fss-Mini-Rag"
    echo
    if ! command -v "$COMMAND_NAME" &> /dev/null; then
        echo -e "${YELLOW}Note: If the command is not found, restart your terminal or run:${NC}"
        echo -e "  source ~/.bashrc"
        echo
    fi
 }
 main() {
    print_header
    # Check system requirements
    check_system
    # Try installation methods in order of preference
    if install_uv && install_with_uv; then
        log "Installation method: uv ✨"
    elif install_with_pipx; then
        log "Installation method: pipx 📦"
    else
        install_with_pip
        log "Installation method: pip 🐍"
    fi
    # Verify installation
    if verify_installation; then
        print_usage
    else
        warn "Installation completed but verification failed. The tool may still work."
        print_usage
    fi
 }
 # Run the main function
 main "$@"
--- a/install_mini_rag.ps1
+++ b/install_mini_rag.ps1
@ -1,458 +0,0 @@
 # FSS-Mini-RAG PowerShell Installation Script
 # Interactive installer that sets up Python environment and dependencies
 # Enable advanced features
 $ErrorActionPreference = "Stop"
 # Color functions for better output
 function Write-ColorOutput($message, $color = "White") {
    Write-Host $message -ForegroundColor $color
 }
 function Write-Header($message) {
    Write-Host "`n" -NoNewline
    Write-ColorOutput "=== $message ===" "Cyan"
 }
 function Write-Success($message) {
    Write-ColorOutput "✅ $message" "Green"
 }
 function Write-Warning($message) {
    Write-ColorOutput "⚠️  $message" "Yellow"
 }
 function Write-Error($message) {
    Write-ColorOutput "❌ $message" "Red"
 }
 function Write-Info($message) {
    Write-ColorOutput "ℹ️  $message" "Blue"
 }
 # Get script directory
 $ScriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path
 # Main installation function
 function Main {
    Write-Host ""
    Write-ColorOutput "╔══════════════════════════════════════╗" "Cyan"
    Write-ColorOutput "║        FSS-Mini-RAG Installer        ║" "Cyan"
    Write-ColorOutput "║   Fast Semantic Search for Code      ║" "Cyan" 
    Write-ColorOutput "╚══════════════════════════════════════╝" "Cyan"
    Write-Host ""
    Write-Info "PowerShell installation process:"
    Write-Host "  • Python environment setup"
    Write-Host "  • Smart configuration based on your system"
    Write-Host "  • Optional AI model downloads (with consent)"
    Write-Host "  • Testing and verification"
    Write-Host ""
    Write-ColorOutput "Note: You'll be asked before downloading any models" "Cyan"
    Write-Host ""
    $continue = Read-Host "Begin installation? [Y/n]"
    if ($continue -eq "n" -or $continue -eq "N") {
        Write-Host "Installation cancelled."
        exit 0
    }
    # Run installation steps
    Check-Python
    Create-VirtualEnvironment
    # Check Ollama availability
    $ollamaAvailable = Check-Ollama
    # Get installation preferences
    Get-InstallationPreferences $ollamaAvailable
    # Install dependencies
    Install-Dependencies
    # Setup models if available
    if ($ollamaAvailable) {
        Setup-OllamaModel
    }
    # Test installation
    if (Test-Installation) {
        Show-Completion
    } else {
        Write-Error "Installation test failed"
        Write-Host "Please check error messages and try again."
        exit 1
    }
 }
 function Check-Python {
    Write-Header "Checking Python Installation"
    # Try different Python commands
    $pythonCmd = $null
    $pythonVersion = $null
    foreach ($cmd in @("python", "python3", "py")) {
        try {
            $version = & $cmd --version 2>&1
            if ($LASTEXITCODE -eq 0) {
                $pythonCmd = $cmd
                $pythonVersion = ($version -split " ")[1]
                break
            }
        } catch {
            continue
        }
    }
    if (-not $pythonCmd) {
        Write-Error "Python not found!"
        Write-Host ""
        Write-ColorOutput "Please install Python 3.8+ from:" "Yellow"
        Write-Host "  • https://python.org/downloads"
        Write-Host "  • Make sure to check 'Add Python to PATH' during installation"
        Write-Host ""
        Write-ColorOutput "After installing Python, run this script again." "Cyan"
        exit 1
    }
    # Check version
    $versionParts = $pythonVersion -split "\."
    $major = [int]$versionParts[0]
    $minor = [int]$versionParts[1]
    if ($major -lt 3 -or ($major -eq 3 -and $minor -lt 8)) {
        Write-Error "Python $pythonVersion found, but 3.8+ required"
        Write-Host "Please upgrade Python to 3.8 or higher."
        exit 1
    }
    Write-Success "Found Python $pythonVersion ($pythonCmd)"
    $script:PythonCmd = $pythonCmd
 }
 function Create-VirtualEnvironment {
    Write-Header "Creating Python Virtual Environment"
    $venvPath = Join-Path $ScriptDir ".venv"
    if (Test-Path $venvPath) {
        Write-Info "Virtual environment already exists at $venvPath"
        $recreate = Read-Host "Recreate it? (y/N)"
        if ($recreate -eq "y" -or $recreate -eq "Y") {
            Write-Info "Removing existing virtual environment..."
            Remove-Item -Recurse -Force $venvPath
        } else {
            Write-Success "Using existing virtual environment"
            return
        }
    }
    Write-Info "Creating virtual environment at $venvPath"
    try {
        & $script:PythonCmd -m venv $venvPath
        if ($LASTEXITCODE -ne 0) {
            throw "Virtual environment creation failed"
        }
        Write-Success "Virtual environment created"
    } catch {
        Write-Error "Failed to create virtual environment"
        Write-Host "This might be because python venv module is not available."
        Write-Host "Try installing Python from python.org with full installation."
        exit 1
    }
    # Activate virtual environment and upgrade pip
    $activateScript = Join-Path $venvPath "Scripts\Activate.ps1"
    if (Test-Path $activateScript) {
        & $activateScript
        Write-Success "Virtual environment activated"
        Write-Info "Upgrading pip..."
        try {
            & python -m pip install --upgrade pip --quiet
        } catch {
            Write-Warning "Could not upgrade pip, continuing anyway..."
        }
    }
 }
 function Check-Ollama {
    Write-Header "Checking Ollama (AI Model Server)"
    try {
        $response = Invoke-WebRequest -Uri "http://localhost:11434/api/version" -TimeoutSec 5 -ErrorAction SilentlyContinue
        if ($response.StatusCode -eq 200) {
            Write-Success "Ollama server is running"
            return $true
        }
    } catch {
        # Ollama not running, check if installed
    }
    try {
        & ollama version 2>$null
        if ($LASTEXITCODE -eq 0) {
            Write-Warning "Ollama is installed but not running"
            $startOllama = Read-Host "Start Ollama now? (Y/n)"
            if ($startOllama -ne "n" -and $startOllama -ne "N") {
                Write-Info "Starting Ollama server..."
                Start-Process -FilePath "ollama" -ArgumentList "serve" -WindowStyle Hidden
                Start-Sleep -Seconds 3
                try {
                    $response = Invoke-WebRequest -Uri "http://localhost:11434/api/version" -TimeoutSec 5 -ErrorAction SilentlyContinue
                    if ($response.StatusCode -eq 200) {
                        Write-Success "Ollama server started"
                        return $true
                    }
                } catch {
                    Write-Warning "Failed to start Ollama automatically"
                    Write-Host "Please start Ollama manually: ollama serve"
                    return $false
                }
            }
            return $false
        }
    } catch {
        # Ollama not installed
    }
    Write-Warning "Ollama not found"
    Write-Host ""
    Write-ColorOutput "Ollama provides the best embedding quality and performance." "Cyan"
    Write-Host ""
    Write-ColorOutput "Options:" "White"
    Write-ColorOutput "1) Install Ollama automatically" "Green" -NoNewline
    Write-Host " (recommended)"
    Write-ColorOutput "2) Manual installation" "Yellow" -NoNewline
    Write-Host " - Visit https://ollama.com/download"
    Write-ColorOutput "3) Continue without Ollama" "Blue" -NoNewline
    Write-Host " (uses ML fallback)"
    Write-Host ""
    $choice = Read-Host "Choose [1/2/3]"
    switch ($choice) {
        "1" {
            Write-Info "Opening Ollama download page..."
            Start-Process "https://ollama.com/download"
            Write-Host ""
            Write-ColorOutput "Please:" "Yellow"
            Write-Host "  1. Download and install Ollama from the opened page"
            Write-Host "  2. Run 'ollama serve' in a new terminal"
            Write-Host "  3. Re-run this installer"
            Write-Host ""
            Read-Host "Press Enter to exit"
            exit 0
        }
        "2" {
            Write-Host ""
            Write-ColorOutput "Manual Ollama installation:" "Yellow"
            Write-Host "  1. Visit: https://ollama.com/download"
            Write-Host "  2. Download and install for Windows"
            Write-Host "  3. Run: ollama serve"
            Write-Host "  4. Re-run this installer"
            Read-Host "Press Enter to exit"
            exit 0
        }
        "3" {
            Write-Info "Continuing without Ollama (will use ML fallback)"
            return $false
        }
        default {
            Write-Warning "Invalid choice, continuing without Ollama"
            return $false
        }
    }
 }
 function Get-InstallationPreferences($ollamaAvailable) {
    Write-Header "Installation Configuration"
    Write-ColorOutput "FSS-Mini-RAG can run with different embedding backends:" "Cyan"
    Write-Host ""
    Write-ColorOutput "• Ollama" "Green" -NoNewline
    Write-Host " (recommended) - Best quality, local AI server"
    Write-ColorOutput "• ML Fallback" "Yellow" -NoNewline
    Write-Host " - Offline transformers, larger but always works"
    Write-ColorOutput "• Hash-based" "Blue" -NoNewline
    Write-Host " - Lightweight fallback, basic similarity"
    Write-Host ""
    if ($ollamaAvailable) {
        $recommended = "light (Ollama detected)"
        Write-ColorOutput "✓ Ollama detected - light installation recommended" "Green"
    } else {
        $recommended = "full (no Ollama)"
        Write-ColorOutput "⚠ No Ollama - full installation recommended for better quality" "Yellow"
    }
    Write-Host ""
    Write-ColorOutput "Installation options:" "White"
    Write-ColorOutput "L) Light" "Green" -NoNewline
    Write-Host " - Ollama + basic deps (~50MB) " -NoNewline
    Write-ColorOutput "← Best performance + AI chat" "Cyan"
    Write-ColorOutput "F) Full" "Yellow" -NoNewline
    Write-Host "  - Light + ML fallback (~2-3GB) " -NoNewline
    Write-ColorOutput "← Works without Ollama" "Cyan"
    Write-Host ""
    $choice = Read-Host "Choose [L/F] or Enter for recommended ($recommended)"
    if ($choice -eq "") {
        if ($ollamaAvailable) {
            $choice = "L"
        } else {
            $choice = "F"
        }
    }
    switch ($choice.ToUpper()) {
        "L" {
            $script:InstallType = "light"
            Write-ColorOutput "Selected: Light installation" "Green"
        }
        "F" {
            $script:InstallType = "full"
            Write-ColorOutput "Selected: Full installation" "Yellow"
        }
        default {
            Write-Warning "Invalid choice, using light installation"
            $script:InstallType = "light"
        }
    }
 }
 function Install-Dependencies {
    Write-Header "Installing Python Dependencies"
    if ($script:InstallType -eq "light") {
        Write-Info "Installing core dependencies (~50MB)..."
        Write-ColorOutput "  Installing: lancedb, pandas, numpy, PyYAML, etc." "Blue"
        try {
            & pip install -r (Join-Path $ScriptDir "requirements.txt") --quiet
            if ($LASTEXITCODE -ne 0) {
                throw "Dependency installation failed"
            }
            Write-Success "Dependencies installed"
        } catch {
            Write-Error "Failed to install dependencies"
            Write-Host "Try: pip install -r requirements.txt"
            exit 1
        }
    } else {
        Write-Info "Installing full dependencies (~2-3GB)..."
        Write-ColorOutput "This includes PyTorch and transformers - will take several minutes" "Yellow"
        try {
            & pip install -r (Join-Path $ScriptDir "requirements-full.txt")
            if ($LASTEXITCODE -ne 0) {
                throw "Dependency installation failed"
            }
            Write-Success "All dependencies installed"
        } catch {
            Write-Error "Failed to install dependencies"
            Write-Host "Try: pip install -r requirements-full.txt"
            exit 1
        }
    }
    Write-Info "Verifying installation..."
    try {
        & python -c "import lancedb, pandas, numpy" 2>$null
        if ($LASTEXITCODE -ne 0) {
            throw "Package verification failed"
        }
        Write-Success "Core packages verified"
    } catch {
        Write-Error "Package verification failed"
        exit 1
    }
 }
 function Setup-OllamaModel {
    # Implementation similar to bash version but adapted for PowerShell
    Write-Header "Ollama Model Setup"
    # For brevity, implementing basic version
    Write-Info "Ollama model setup available - see bash version for full implementation"
 }
 function Test-Installation {
    Write-Header "Testing Installation"
    Write-Info "Testing basic functionality..."
    try {
        & python -c "from mini_rag import CodeEmbedder, ProjectIndexer, CodeSearcher; print('✅ Import successful')" 2>$null
        if ($LASTEXITCODE -ne 0) {
            throw "Import test failed"
        }
        Write-Success "Python imports working"
        return $true
    } catch {
        Write-Error "Import test failed"
        return $false
    }
 }
 function Show-Completion {
    Write-Header "Installation Complete!"
    Write-ColorOutput "FSS-Mini-RAG is now installed!" "Green"
    Write-Host ""
    Write-ColorOutput "Quick Start Options:" "Cyan"
    Write-Host ""
    Write-ColorOutput "🎯 TUI (Beginner-Friendly):" "Green"
    Write-Host "     rag-tui.bat"
    Write-Host "     # Interactive interface with guided setup"
    Write-Host ""
    Write-ColorOutput "💻 CLI (Advanced):" "Blue"
    Write-Host "     rag-mini.bat index C:\path\to\project"
    Write-Host "     rag-mini.bat search C:\path\to\project `"query`""
    Write-Host "     rag-mini.bat status C:\path\to\project"
    Write-Host ""
    Write-ColorOutput "Documentation:" "Cyan"
    Write-Host "  • README.md - Complete technical documentation"
    Write-Host "  • docs\GETTING_STARTED.md - Step-by-step guide"
    Write-Host "  • examples\ - Usage examples and sample configs"
    Write-Host ""
    $runTest = Read-Host "Run quick test now? [Y/n]"
    if ($runTest -ne "n" -and $runTest -ne "N") {
        Run-QuickTest
    }
    Write-Host ""
    Write-ColorOutput "🎉 Setup complete! FSS-Mini-RAG is ready to use." "Green"
 }
 function Run-QuickTest {
    Write-Header "Quick Test"
    Write-Info "Testing with FSS-Mini-RAG codebase..."
    $ragDir = Join-Path $ScriptDir ".mini-rag"
    if (Test-Path $ragDir) {
        Write-Success "Project already indexed, running search..."
    } else {
        Write-Info "Indexing FSS-Mini-RAG system for demo..."
        & python (Join-Path $ScriptDir "rag-mini.py") index $ScriptDir
        if ($LASTEXITCODE -ne 0) {
            Write-Error "Test indexing failed"
            return
        }
    }
    Write-Host ""
    Write-Success "Running demo search: 'embedding system'"
    & python (Join-Path $ScriptDir "rag-mini.py") search $ScriptDir "embedding system" --top-k 3
    Write-Host ""
    Write-Success "Test completed successfully!"
    Write-ColorOutput "FSS-Mini-RAG is working perfectly on Windows!" "Cyan"
 }
 # Run main function
 Main
--- a/install_mini_rag.sh
+++ b/install_mini_rag.sh
@ -4,32 +4,6 @@
 set -e  # Exit on any error
 # Check for command line arguments
 HEADLESS_MODE=false
 if [[ "$1" == "--headless" ]]; then
    HEADLESS_MODE=true
    echo "🤖 Running in headless mode - using defaults for automation"
    echo "⚠️  WARNING: Installation may take 5-10 minutes due to large dependencies"
    echo "💡 For agents: Run as background process to avoid timeouts"
 elif [[ "$1" == "--help" || "$1" == "-h" ]]; then
    echo ""
    echo "FSS-Mini-RAG Installation Script"
    echo ""
    echo "Usage:"
    echo "  ./install_mini_rag.sh           # Interactive installation"
    echo "  ./install_mini_rag.sh --headless  # Automated installation for agents/CI"
    echo "  ./install_mini_rag.sh --help      # Show this help"
    echo ""
    echo "Headless mode options:"
    echo "  • Uses existing virtual environment if available"
    echo "  • Selects light installation (Ollama + basic dependencies)"  
    echo "  • Downloads nomic-embed-text model if Ollama is available"
    echo "  • Skips interactive prompts and tests"
    echo "  • Perfect for agent automation and CI/CD pipelines"
    echo ""
    exit 0
 fi
 # Colors for output
 RED='\033[0;31m'
 GREEN='\033[0;32m'
@ -110,10 +84,6 @@ check_python() {
 check_venv() {
    if [ -d "$SCRIPT_DIR/.venv" ]; then
        print_info "Virtual environment already exists at $SCRIPT_DIR/.venv"
        if [[ "$HEADLESS_MODE" == "true" ]]; then
            print_info "Headless mode: Using existing virtual environment"
            return 0  # Use existing
        else
        echo -n "Recreate it? (y/N): "
        read -r recreate
        if [[ $recreate =~ ^[Yy]$ ]]; then
@ -123,7 +93,6 @@ check_venv() {
        else
            return 0  # Use existing
        fi
        fi
    else
        return 1  # Needs creation
    fi
@ -171,13 +140,8 @@ check_ollama() {
            return 0
        else
            print_warning "Ollama is installed but not running"
            if [[ "$HEADLESS_MODE" == "true" ]]; then
                print_info "Headless mode: Starting Ollama server automatically"
                start_ollama="y"
            else
            echo -n "Start Ollama now? (Y/n): "
            read -r start_ollama
            fi
            if [[ ! $start_ollama =~ ^[Nn]$ ]]; then
                print_info "Starting Ollama server..."
                ollama serve &
@ -198,84 +162,22 @@ check_ollama() {
        print_warning "Ollama not found"
        echo ""
        echo -e "${CYAN}Ollama provides the best embedding quality and performance.${NC}"
-        echo ""
+        echo -e "${YELLOW}To install Ollama:${NC}"
-        echo -e "${BOLD}Options:${NC}"
+        echo "  1. Visit: https://ollama.ai/download"
        echo -e "${GREEN}1) Install Ollama automatically${NC} (recommended)"
        echo -e "${YELLOW}2) Manual installation${NC} - Visit https://ollama.com/download"
        echo -e "${BLUE}3) Continue without Ollama${NC} (uses ML fallback)"
        echo ""
        if [[ "$HEADLESS_MODE" == "true" ]]; then
            print_info "Headless mode: Continuing without Ollama (option 3)"
            ollama_choice="3"
        else
            echo -n "Choose [1/2/3]: "
            read -r ollama_choice
        fi
        case "$ollama_choice" in
            1|"")
                print_info "Installing Ollama using secure installation method..."
                echo -e "${CYAN}Downloading and verifying Ollama installer...${NC}"
                # Secure installation: download, verify, then execute
                local temp_script="/tmp/ollama-install-$$.sh"
                if curl -fsSL https://ollama.com/install.sh -o "$temp_script" && \
                   file "$temp_script" | grep -q "shell script" && \
                   chmod +x "$temp_script" && \
                   "$temp_script"; then
                    rm -f "$temp_script"
                    print_success "Ollama installed successfully"
                    print_info "Starting Ollama server..."
                    ollama serve &
                    sleep 3
                    if curl -s http://localhost:11434/api/version >/dev/null 2>&1; then
                        print_success "Ollama server started"
                        echo ""
                        echo -e "${CYAN}💡 Pro tip: Download an LLM for AI-powered search synthesis!${NC}"
                        echo -e "   Lightweight: ${GREEN}ollama pull qwen3:0.6b${NC} (~500MB, very fast)"
                        echo -e "   Balanced:    ${GREEN}ollama pull qwen3:1.7b${NC} (~1.4GB, good quality)" 
                        echo -e "   Excellent:   ${GREEN}ollama pull qwen3:4b${NC} (~2.5GB, sweet spot for most users)"
                        echo -e "   Maximum:     ${GREEN}ollama pull qwen3:8b${NC} (~5GB, slower but top quality)"
                        echo ""
                        echo -e "${BLUE}🧠 RAG works great with smaller models! 4B is usually perfect.${NC}"
                        echo -e "${BLUE}Creative possibilities: Try mistral for storytelling, qwen2.5-coder for development!${NC}"
                        echo ""
                        return 0
                    else
                        print_warning "Ollama installed but failed to start automatically"
                        echo "Please start Ollama manually: ollama serve"
                        echo "Then re-run this installer"
                        exit 1
                    fi
                else
                    print_error "Failed to install Ollama automatically"
                    echo "Please install manually from https://ollama.com/download"
                    exit 1
                fi
                ;;
            2)
                echo ""
                echo -e "${YELLOW}Manual Ollama installation:${NC}"
                echo "  1. Visit: https://ollama.com/download" 
        echo "  2. Download and install for your system"
        echo "  3. Run: ollama serve"
        echo "  4. Re-run this installer"
-                print_info "Exiting for manual installation..."
+        echo ""
        echo -e "${BLUE}Alternative: Use ML fallback (requires more disk space)${NC}"
        echo ""
        echo -n "Continue without Ollama? (y/N): "
        read -r continue_without
        if [[ $continue_without =~ ^[Yy]$ ]]; then
            return 1
        else
            print_info "Install Ollama first, then re-run this script"
            exit 0
-                ;;
+        fi
            3)
                print_info "Continuing without Ollama (will use ML fallback)"
                return 1
                ;;
            *)
                print_warning "Invalid choice, continuing without Ollama"
                return 1
                ;;
        esac
    fi
 }
@ -314,13 +216,8 @@ setup_ollama_model() {
        echo "  • Purpose: High-quality semantic embeddings"
        echo "  • Alternative: System will use ML/hash fallbacks"
        echo ""
        if [[ "$HEADLESS_MODE" == "true" ]]; then
            print_info "Headless mode: Downloading nomic-embed-text model"
            download_model="y"
        else
        echo -n "Download model? [y/N]: "
        read -r download_model
        fi
        should_download=$([ "$download_model" = "y" ] && echo "download" || echo "skip")
    fi
@ -374,17 +271,12 @@ get_installation_preferences() {
    echo ""
    echo -e "${BOLD}Installation options:${NC}"
-    echo -e "${GREEN}L) Light${NC} - Ollama + basic deps (~50MB) ${CYAN}← Best performance + AI chat${NC}"
+    echo -e "${GREEN}L) Light${NC} - Ollama + basic deps (~50MB)"
-    echo -e "${YELLOW}F) Full${NC}  - Light + ML fallback (~2-3GB) ${CYAN}← RAG-only if no Ollama${NC}"
+    echo -e "${YELLOW}F) Full${NC}  - Light + ML fallback (~2-3GB)"
    echo -e "${BLUE}C) Custom${NC} - Configure individual components"
    echo ""
    while true; do
        if [[ "$HEADLESS_MODE" == "true" ]]; then
            # Default to light installation in headless mode
            choice="L"
            print_info "Headless mode: Selected Light installation"
        else
        echo -n "Choose [L/F/C] or Enter for recommended ($recommended): "
        read -r choice
@ -396,7 +288,6 @@ get_installation_preferences() {
                choice="F"  
            fi
        fi
        fi
        case "${choice^^}" in
            L)
@ -436,13 +327,8 @@ configure_custom_installation() {
        echo ""
        echo -e "${BOLD}Ollama embedding model:${NC}"
        echo "  • nomic-embed-text (~270MB) - Best quality embeddings"
        if [[ "$HEADLESS_MODE" == "true" ]]; then
            print_info "Headless mode: Downloading Ollama model"
            download_ollama="y"
        else
        echo -n "Download Ollama model? [y/N]: "
        read -r download_ollama
        fi
        if [[ $download_ollama =~ ^[Yy]$ ]]; then
            ollama_model="download"
        fi
@ -453,13 +339,8 @@ configure_custom_installation() {
    echo -e "${BOLD}ML fallback system:${NC}"
    echo "  • PyTorch + transformers (~2-3GB) - Works without Ollama"
    echo "  • Useful for: Offline use, server deployments, CI/CD"
    if [[ "$HEADLESS_MODE" == "true" ]]; then
        print_info "Headless mode: Skipping ML dependencies (keeping light)"
        include_ml="n"
    else
    echo -n "Include ML dependencies? [y/N]: "
    read -r include_ml
    fi
    # Pre-download models
    local predownload_ml="skip"
@ -468,13 +349,8 @@ configure_custom_installation() {
        echo -e "${BOLD}Pre-download ML models:${NC}"
        echo "  • sentence-transformers model (~80MB)"
        echo "  • Skip: Models download automatically when first used"
        if [[ "$HEADLESS_MODE" == "true" ]]; then
            print_info "Headless mode: Skipping ML model pre-download"
            predownload="n"
        else
        echo -n "Pre-download now? [y/N]: "
        read -r predownload
        fi
        if [[ $predownload =~ ^[Yy]$ ]]; then
            predownload_ml="download"
        fi
@ -535,73 +411,6 @@ install_dependencies() {
    fi
 }
 # Setup application icon for desktop integration
 setup_desktop_icon() {
    print_header "Setting Up Desktop Integration"
    # Check if we're in a GUI environment
    if [ -z "$DISPLAY" ] && [ -z "$WAYLAND_DISPLAY" ]; then
        print_info "No GUI environment detected - skipping desktop integration"
        return 0
    fi
    local icon_source="$SCRIPT_DIR/assets/Fss_Mini_Rag.png"
    local desktop_dir="$HOME/.local/share/applications"
    local icon_dir="$HOME/.local/share/icons"
    # Check if icon file exists
    if [ ! -f "$icon_source" ]; then
        print_warning "Icon file not found at $icon_source"
        return 1
    fi
    # Create directories if needed
    mkdir -p "$desktop_dir" "$icon_dir" 2>/dev/null
    # Copy icon to standard location
    local icon_dest="$icon_dir/fss-mini-rag.png"
    if cp "$icon_source" "$icon_dest" 2>/dev/null; then
        print_success "Icon installed to $icon_dest"
    else
        print_warning "Could not install icon (permissions?)"
        return 1
    fi
    # Create desktop entry
    local desktop_file="$desktop_dir/fss-mini-rag.desktop"
    cat > "$desktop_file" << EOF
 [Desktop Entry]
 Name=FSS-Mini-RAG
 Comment=Fast Semantic Search for Code and Documents
 Exec=$SCRIPT_DIR/rag-tui
 Icon=fss-mini-rag
 Terminal=true
 Type=Application
 Categories=Development;Utility;TextEditor;
 Keywords=search;code;rag;semantic;ai;
 StartupNotify=true
 EOF
    if [ -f "$desktop_file" ]; then
        chmod +x "$desktop_file"
        print_success "Desktop entry created"
        # Update desktop database if available
        if command_exists update-desktop-database; then
            update-desktop-database "$desktop_dir" 2>/dev/null
            print_info "Desktop database updated"
        fi
        print_info "✨ FSS-Mini-RAG should now appear in your application menu!"
        print_info "   Look for it in Development or Utility categories"
    else
        print_warning "Could not create desktop entry"
        return 1
    fi
    return 0
 }
 # Setup ML models based on configuration  
 setup_ml_models() {
    if [ "$INSTALL_TYPE" != "full" ]; then
@ -618,13 +427,8 @@ setup_ml_models() {
        echo "  • Purpose: Offline fallback when Ollama unavailable"
        echo "  • If skipped: Auto-downloads when first needed"
        echo ""
        if [[ "$HEADLESS_MODE" == "true" ]]; then
            print_info "Headless mode: Skipping ML model pre-download"
            download_ml="n"
        else
        echo -n "Pre-download now? [y/N]: "
        read -r download_ml
        fi
        should_predownload=$([ "$download_ml" = "y" ] && echo "download" || echo "skip")
    fi
@ -704,36 +508,7 @@ print(f'✅ Embedding system: {info[\"method\"]}')
    " 2>/dev/null; then
        print_success "Embedding system working"
    else
-        echo ""
+        print_warning "Embedding test failed, but system should still work"
        echo -e "${YELLOW}⚠️  System Check${NC}"
        # Smart diagnosis - check what's actually available
        if command_exists ollama && curl -s http://localhost:11434/api/version >/dev/null 2>&1; then
            # Ollama is running, check for models
            local available_models=$(ollama list 2>/dev/null | grep -E "(qwen3|llama|mistral|gemma)" | head -5)
            local embedding_models=$(ollama list 2>/dev/null | grep -E "(embed|bge)" | head -2)
            if [[ -n "$available_models" ]]; then
                echo -e "${GREEN}✅ Ollama is running with available models${NC}"
                echo -e "${CYAN}Your setup will work great! The system will auto-select the best models.${NC}"
                echo ""
                echo -e "${BLUE}💡 RAG Performance Tip:${NC} Smaller models often work better with RAG!"
                echo -e "   With context provided, even 0.6B models give good results"
                echo -e "   4B models = excellent, 8B+ = overkill (slower responses)"
            else
                echo -e "${BLUE}Ollama is running but no chat models found.${NC}"
                echo -e "Download a lightweight model: ${GREEN}ollama pull qwen3:0.6b${NC} (fast)"
                echo -e "Or balanced option: ${GREEN}ollama pull qwen3:4b${NC} (excellent quality)"
            fi
        else
            echo -e "${BLUE}Ollama not running or not installed.${NC}"
            echo -e "Start Ollama: ${GREEN}ollama serve${NC}"
            echo -e "Or install from: https://ollama.com/download"
        fi
        echo ""
        echo -e "${CYAN}✅ FSS-Mini-RAG will auto-detect and use the best available method.${NC}"
        echo ""
    fi
    return 0
@ -770,113 +545,103 @@ show_completion() {
    fi
    # Ask if they want to run a test
-    echo ""
+    echo -n "Would you like to run a quick test now? (Y/n): "
-    echo -e "${BOLD}🧪 Quick Test Available${NC}"
+    read -r run_test
    echo -e "${CYAN}Test FSS-Mini-RAG with a small sample project (takes ~10 seconds)${NC}"
    echo ""
    # Ensure output is flushed and we're ready for input
    printf "Run quick test now? [Y/n]: "
    # More robust input handling
    if [[ "$HEADLESS_MODE" == "true" ]]; then
        print_info "Headless mode: Skipping interactive test"
        echo -e "${BLUE}You can test FSS-Mini-RAG anytime with: ./rag-tui${NC}"
        show_beginner_guidance
    elif read -r run_test < /dev/tty 2>/dev/null; then
        echo "User chose: '$run_test'"  # Debug output
    if [[ ! $run_test =~ ^[Nn]$ ]]; then
        run_quick_test
        echo ""
        show_beginner_guidance
    else
            echo -e "${BLUE}Skipping test - you can run it later with: ./rag-tui${NC}"
            show_beginner_guidance
        fi
    else
        # Fallback if interactive input fails
        echo ""
        echo -e "${YELLOW}⚠️  Interactive input not available - skipping test prompt${NC}"
        echo -e "${BLUE}You can test FSS-Mini-RAG anytime with: ./rag-tui${NC}"
        show_beginner_guidance
    fi
 }
-# Note: Sample project creation removed - now indexing real codebase/docs
+# Create sample project for testing
 create_sample_project() {
    local sample_dir="$SCRIPT_DIR/.sample_test"
    rm -rf "$sample_dir"
    mkdir -p "$sample_dir"
    # Create a few small sample files
    cat > "$sample_dir/README.md" << 'EOF'
 # Sample Project
 This is a sample project for testing FSS-Mini-RAG search capabilities.
 ## Features
 - User authentication system
 - Document processing
 - Search functionality
 - Email integration
 EOF
    cat > "$sample_dir/auth.py" << 'EOF'
 # Authentication module
 def login_user(username, password):
    """Handle user login with password validation"""
    if validate_credentials(username, password):
        create_session(username)
        return True
    return False
 def validate_credentials(username, password):
    """Check username and password against database"""
    # Database validation logic here
    return check_password_hash(username, password)
 EOF
    cat > "$sample_dir/search.py" << 'EOF'
 # Search functionality
 def semantic_search(query, documents):
    """Perform semantic search across document collection"""
    embeddings = generate_embeddings(query)
    results = find_similar_documents(embeddings, documents)
    return rank_results(results)
 def generate_embeddings(text):
    """Generate vector embeddings for text"""
    # Embedding generation logic
    return process_with_model(text)
 EOF
    echo "$sample_dir"
 }
 # Run quick test with sample data
 run_quick_test() {
    print_header "Quick Test"
-    # Ask what to index: code vs docs
+    print_info "Creating small sample project for testing..."
-    echo -e "${CYAN}What would you like to explore with FSS-Mini-RAG?${NC}"
+    local sample_dir=$(create_sample_project)
    echo "Sample project created with 3 files for fast testing."
    echo ""
-    echo -e "${GREEN}1) Code${NC} - Index the FSS-Mini-RAG codebase (~50 files)"
+    
-    echo -e "${BLUE}2) Docs${NC} - Index the documentation (~10 files)"  
+    # Index the sample project (much faster)
    print_info "Indexing sample project (this should be fast)..."
    if ./rag-mini index "$sample_dir" --quiet; then
        print_success "Sample project indexed successfully"
        echo ""
-    if [[ "$HEADLESS_MODE" == "true" ]]; then
+        print_info "Testing search with sample queries..."
-        print_info "Headless mode: Indexing code by default"
+        echo -e "${BLUE}Running search: 'user authentication'${NC}"
-        index_choice="1"
+        ./rag-mini search "$sample_dir" "user authentication" --limit 2
        echo ""
        print_success "Test completed successfully!"
        echo -e "${CYAN}Ready to use FSS-Mini-RAG on your own projects!${NC}"
        # Offer beginner guidance
        echo ""
        echo -e "${YELLOW}💡 Beginner Tip:${NC} Try the interactive mode with pre-made questions"
        echo "   Run: ./rag-tui for guided experience"
        # Clean up sample
        rm -rf "$sample_dir"
    else
-        echo -n "Choose [1/2] or Enter for code: "
+        print_error "Sample test failed"
-        read -r index_choice
+        echo "This might indicate an issue with the installation."
-    fi
+        rm -rf "$sample_dir"
    # Determine what to index
    local target_dir="$SCRIPT_DIR"
    local target_name="FSS-Mini-RAG codebase"
    if [[ "$index_choice" == "2" ]]; then
        target_dir="$SCRIPT_DIR/docs"
        target_name="FSS-Mini-RAG documentation"
    fi
    # Ensure we're in the right directory and have the right permissions
    if [[ ! -f "./rag-mini" ]]; then
        print_error "rag-mini script not found in current directory: $(pwd)"
        print_info "This might be a path issue. The installer should run from the project directory."
        return 1
    fi
    if [[ ! -x "./rag-mini" ]]; then
        print_info "Making rag-mini executable..."
        chmod +x ./rag-mini
    fi
    # Index the chosen target
    print_info "Indexing $target_name..."
    echo -e "${CYAN}This will take 10-30 seconds depending on your system${NC}"
    echo ""
    if ./rag-mini index "$target_dir"; then
        print_success "✅ Indexing completed successfully!"
        echo ""
        print_info "🎯 Launching Interactive Tutorial..."
        echo -e "${CYAN}The TUI has 6 sample questions to get you started.${NC}"
        echo -e "${CYAN}Try the suggested queries or enter your own!${NC}"
        echo ""
        if [[ "$HEADLESS_MODE" != "true" ]]; then
            echo -n "Press Enter to start interactive tutorial: "
            read -r
        fi
        # Launch the TUI which has the existing interactive tutorial system
        ./rag-tui.py "$target_dir" || true
        echo ""
        print_success "🎉 Tutorial completed!"
        echo -e "${CYAN}FSS-Mini-RAG is working perfectly!${NC}"
    else
        print_error "❌ Indexing failed"
        echo ""
        echo -e "${YELLOW}Possible causes:${NC}"
        echo "• Virtual environment not properly activated"
        echo "• Missing dependencies (try: pip install -r requirements.txt)"
        echo "• Path issues (ensure script runs from project directory)"
        echo "• Ollama connection issues (if using Ollama)"
        echo ""
        return 1
    fi
 }
@ -921,16 +686,12 @@ main() {
    echo -e "${CYAN}Note: You'll be asked before downloading any models${NC}"
    echo ""
    if [[ "$HEADLESS_MODE" == "true" ]]; then
        print_info "Headless mode: Beginning installation automatically"
    else
    echo -n "Begin installation? [Y/n]: "
    read -r continue_install
    if [[ $continue_install =~ ^[Nn]$ ]]; then
        echo "Installation cancelled."
        exit 0
    fi
    fi
    # Run installation steps
    check_python
@ -954,11 +715,7 @@ main() {
    fi
    setup_ml_models
    # Setup desktop integration with icon
    setup_desktop_icon
    if test_installation; then
        install_global_wrapper
        show_completion
    else
        print_error "Installation test failed"
@ -967,107 +724,5 @@ main() {
    fi
 }
 # Install global wrapper script for system-wide access
 install_global_wrapper() {
    print_info "Installing global rag-mini command..."
    # Create the wrapper script
    cat > /tmp/rag-mini-wrapper << 'EOF'
 #!/bin/bash
 # FSS-Mini-RAG Global Wrapper Script
 # Automatically handles virtual environment activation
 # Find the installation directory
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 # Common installation paths to check
 INSTALL_PATHS=(
    "/opt/fss-mini-rag"
    "/usr/local/lib/fss-mini-rag"
    "$(dirname "$SCRIPT_DIR")/lib/fss-mini-rag"
    "$HOME/.local/lib/fss-mini-rag"
 )
 # Add current directory if it looks like an FSS-Mini-RAG installation
 if [ -f "$(pwd)/.venv/bin/rag-mini" ] && [ -f "$(pwd)/requirements.txt" ]; then
    INSTALL_PATHS+=("$(pwd)")
 fi
 # Find the actual installation
 FSS_MINI_RAG_HOME=""
 for path in "${INSTALL_PATHS[@]}"; do
    if [ -f "$path/.venv/bin/rag-mini" ] && [ -f "$path/requirements.txt" ]; then
        FSS_MINI_RAG_HOME="$path"
        break
    fi
 done
 # If not found in standard paths, try to find it
 if [ -z "$FSS_MINI_RAG_HOME" ]; then
    # Try to find by looking for the venv with rag-mini
    FSS_MINI_RAG_HOME=$(find /opt /usr/local /home -maxdepth 4 -name ".venv" -type d 2>/dev/null | while read venv_dir; do
        if [ -f "$venv_dir/bin/rag-mini" ] && [ -f "$(dirname "$venv_dir")/requirements.txt" ]; then
            dirname "$venv_dir"
            break
        fi
    done | head -1)
 fi
 # Error if still not found
 if [ -z "$FSS_MINI_RAG_HOME" ] || [ ! -f "$FSS_MINI_RAG_HOME/.venv/bin/rag-mini" ]; then
    echo "❌ FSS-Mini-RAG installation not found!"
    echo ""
    echo "Expected to find .venv/bin/rag-mini in one of:"
    printf "  %s\n" "${INSTALL_PATHS[@]}"
    echo ""
    echo "Please reinstall FSS-Mini-RAG:"
    echo "  ./install_mini_rag.sh"
    exit 1
 fi
 # Activate virtual environment and run rag-mini with all arguments
 cd "$FSS_MINI_RAG_HOME"
 source .venv/bin/activate
 # Suppress virtual environment warnings since we handle activation
 export FSS_MINI_RAG_GLOBAL_WRAPPER=1
 exec .venv/bin/rag-mini "$@"
 EOF
    # Install the wrapper globally
    if [[ "$HEADLESS_MODE" == "true" ]] || [[ -w "/usr/local/bin" ]]; then
        # Headless mode or we have write permissions - install directly
        sudo cp /tmp/rag-mini-wrapper /usr/local/bin/rag-mini
        sudo chmod +x /usr/local/bin/rag-mini
        print_success "✅ Global rag-mini command installed"
        echo -e "${CYAN}You can now use 'rag-mini' from anywhere on your system!${NC}"
    else
        # Ask user permission for system-wide installation
        echo ""
        echo -e "${YELLOW}Install rag-mini globally?${NC}"
        echo "This will allow you to run 'rag-mini' from anywhere on your system."
        echo ""
        echo -n "Install globally? [Y/n]: "
        read -r install_global
        if [[ ! $install_global =~ ^[Nn]$ ]]; then
            if sudo cp /tmp/rag-mini-wrapper /usr/local/bin/rag-mini && sudo chmod +x /usr/local/bin/rag-mini; then
                print_success "✅ Global rag-mini command installed"
                echo -e "${CYAN}You can now use 'rag-mini' from anywhere on your system!${NC}"
            else
                print_error "❌ Failed to install global command"
                echo -e "${YELLOW}You can still use rag-mini from the installation directory${NC}"
            fi
        else
            echo -e "${YELLOW}Skipped global installation${NC}"
            echo -e "${CYAN}You can use rag-mini from the installation directory${NC}"
        fi
    fi
    # Clean up
    rm -f /tmp/rag-mini-wrapper
    echo ""
 }
 # Run main function
 main "$@"
--- a/install_windows.bat
+++ b/install_windows.bat
@ -1,418 +0,0 @@
@echo off
 REM FSS-Mini-RAG Windows Installer - Beautiful & Comprehensive
 setlocal enabledelayedexpansion
 REM Enable colors and unicode for modern Windows
 chcp 65001 >nul 2>&1
 REM Check for command line arguments
 set "HEADLESS_MODE=false"
 if "%1"=="--headless" (
    set "HEADLESS_MODE=true"
    echo 🤖 Running in headless mode - using defaults for automation
    echo ⚠️  WARNING: Installation may take 5-10 minutes due to large dependencies
    echo 💡 For agents: Run as background process to avoid timeouts
 ) else if "%1"=="--help" (
    goto show_help
 ) else if "%1"=="-h" (
    goto show_help
 )
 goto start_installation
 :show_help
 echo.
 echo FSS-Mini-RAG Windows Installation Script
 echo.
 echo Usage:
 echo   install_windows.bat           # Interactive installation
 echo   install_windows.bat --headless   # Automated installation for agents/CI
 echo   install_windows.bat --help       # Show this help
 echo.
 echo Headless mode options:
 echo   • Uses existing virtual environment if available
 echo   • Installs core dependencies only
 echo   • Skips AI model downloads
 echo   • Skips interactive prompts and tests  
 echo   • Perfect for agent automation and CI/CD pipelines
 echo.
 pause
 exit /b 0
 :start_installation
 echo.
 echo ╔══════════════════════════════════════════════════╗
 echo ║            FSS-Mini-RAG Windows Installer       ║
 echo ║         Fast Semantic Search for Code           ║
 echo ╚══════════════════════════════════════════════════╝
 echo.
 echo 🚀 Comprehensive installation process:
 echo   • Python environment setup and validation
 echo   • Smart dependency management 
 echo   • Optional AI model downloads (with your consent)
 echo   • System testing and verification
 echo   • Interactive tutorial (optional)
 echo.
 echo 💡 Note: You'll be asked before downloading any models
 echo.
 if "!HEADLESS_MODE!"=="true" (
    echo Headless mode: Beginning installation automatically
 ) else (
    set /p "continue=Begin installation? [Y/n]: "
    if /i "!continue!"=="n" (
        echo Installation cancelled.
        pause
        exit /b 0
    )
 )
 REM Get script directory
 set "SCRIPT_DIR=%~dp0"
 set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%"
 echo.
 echo ══════════════════════════════════════════════════
 echo [1/5] Checking Python Environment...
 python --version >nul 2>&1
 if errorlevel 1 (
    echo ❌ ERROR: Python not found!
    echo.
    echo 📦 Please install Python from: https://python.org/downloads
    echo 🔧 Installation requirements:
    echo    • Python 3.8 or higher
    echo    • Make sure to check "Add Python to PATH" during installation
    echo    • Restart your command prompt after installation
    echo.
    echo 💡 Quick install options:
    echo    • Download from python.org (recommended)
    echo    • Or use: winget install Python.Python.3.11
    echo    • Or use: choco install python311
    echo.
    pause
    exit /b 1
 )
 for /f "tokens=2" %%i in ('python --version 2^>^&1') do set "PYTHON_VERSION=%%i"
 echo ✅ Found Python !PYTHON_VERSION!
 REM Check Python version (basic check for 3.x)
 for /f "tokens=1 delims=." %%a in ("!PYTHON_VERSION!") do set "MAJOR_VERSION=%%a"
 if !MAJOR_VERSION! LSS 3 (
    echo ❌ ERROR: Python !PYTHON_VERSION! found, but Python 3.8+ required
    echo 📦 Please upgrade Python to 3.8 or higher
    pause
    exit /b 1
 )
 echo.
 echo ══════════════════════════════════════════════════
 echo [2/5] Creating Python Virtual Environment...
 if exist "%SCRIPT_DIR%\.venv" (
    echo 🔄 Found existing virtual environment, checking if it works...
    call "%SCRIPT_DIR%\.venv\Scripts\activate.bat" >nul 2>&1
    if not errorlevel 1 (
        "%SCRIPT_DIR%\.venv\Scripts\python.exe" -c "import sys; print('✅ Existing environment works')" >nul 2>&1
        if not errorlevel 1 (
            echo ✅ Using existing virtual environment
            goto skip_venv_creation
        )
    )
    echo 🔄 Removing problematic virtual environment...
    rmdir /s /q "%SCRIPT_DIR%\.venv" 2>nul
    if exist "%SCRIPT_DIR%\.venv" (
        echo ⚠️ Could not remove old environment, will try to work with it...
    )
 )
 echo 📁 Creating fresh virtual environment...
 python -m venv "%SCRIPT_DIR%\.venv"
 if errorlevel 1 (
    echo ❌ ERROR: Failed to create virtual environment
    echo.
    echo 🔧 This might be because:
    echo    • Python venv module is not installed
    echo    • Insufficient permissions
    echo    • Path contains special characters
    echo.
    echo 💡 Try: python -m pip install --user virtualenv
    pause
    exit /b 1
 )
 echo ✅ Virtual environment created successfully
 :skip_venv_creation
 echo.
 echo ══════════════════════════════════════════════════
 echo [3/5] Installing Python Dependencies...
 echo 📦 This may take 2-3 minutes depending on your internet speed...
 echo.
 call "%SCRIPT_DIR%\.venv\Scripts\activate.bat"
 if errorlevel 1 (
    echo ❌ ERROR: Could not activate virtual environment
    pause
    exit /b 1
 )
 echo 🔧 Upgrading pip...
 "%SCRIPT_DIR%\.venv\Scripts\python.exe" -m pip install --upgrade pip --quiet
 if errorlevel 1 (
    echo ⚠️ Warning: Could not upgrade pip, continuing anyway...
 )
 echo 📚 Installing core dependencies (lancedb, pandas, numpy, etc.)...
 echo    This provides semantic search capabilities
 "%SCRIPT_DIR%\.venv\Scripts\pip.exe" install -r "%SCRIPT_DIR%\requirements.txt"
 if errorlevel 1 (
    echo ❌ ERROR: Failed to install dependencies
    echo.
    echo 🔧 Possible solutions:
    echo    • Check internet connection
    echo    • Try running as administrator
    echo    • Check if antivirus is blocking pip
    echo    • Manually run: pip install -r requirements.txt
    echo.
    pause
    exit /b 1
 )
 echo ✅ Dependencies installed successfully
 echo.
 echo ══════════════════════════════════════════════════
 echo [4/5] Testing Installation...
 echo 🧪 Verifying Python imports...
 echo Attempting import test...
 "%SCRIPT_DIR%\.venv\Scripts\python.exe" -c "from mini_rag import CodeEmbedder, ProjectIndexer, CodeSearcher; print('✅ Core imports successful')" 2>import_error.txt
 if errorlevel 1 (
    echo ❌ ERROR: Installation test failed
    echo.
    echo 🔍 Import error details:
    type import_error.txt
    echo.
    echo 🔧 This usually means:
    echo    • Dependencies didn't install correctly
    echo    • Virtual environment is corrupted  
    echo    • Python path issues
    echo    • Module conflicts with existing installations
    echo.
    echo 💡 Troubleshooting options:
    echo    • Try: "%SCRIPT_DIR%\.venv\Scripts\pip.exe" install -r requirements.txt --force-reinstall
    echo    • Or delete .venv folder and run installer again
    echo    • Or check import_error.txt for specific error details
    del import_error.txt >nul 2>&1
    pause
    exit /b 1
 )
 del import_error.txt >nul 2>&1
 echo 🔍 Testing embedding system...
 "%SCRIPT_DIR%\.venv\Scripts\python.exe" -c "from mini_rag import CodeEmbedder; embedder = CodeEmbedder(); info = embedder.get_embedding_info(); print(f'✅ Embedding method: {info[\"method\"]}')" 2>nul
 if errorlevel 1 (
    echo ⚠️ Warning: Embedding test inconclusive, but core system is ready
 )
 echo.
 echo ══════════════════════════════════════════════════
 echo [5/6] Setting Up Desktop Integration...
 call :setup_windows_icon
 echo.
 echo ══════════════════════════════════════════════════
 echo [6/6] Checking AI Features (Optional)...
 call :check_ollama_enhanced
 echo.
 echo ╔══════════════════════════════════════════════════╗
 echo ║             INSTALLATION SUCCESSFUL!            ║
 echo ╚══════════════════════════════════════════════════╝
 echo.
 echo 🎯 Quick Start Options:
 echo.
 echo 🎨 For Beginners (Recommended):
 echo    rag.bat                 - Interactive interface with guided setup
 echo.
 echo 💻 For Developers:
 echo    rag.bat index C:\myproject      - Index a project
 echo    rag.bat search C:\myproject "authentication"  - Search project  
 echo    rag.bat help            - Show all commands
 echo.
 REM Offer interactive tutorial
 echo 🧪 Quick Test Available:
 echo    Test FSS-Mini-RAG with a small sample project (takes ~30 seconds)
 echo.
 if "!HEADLESS_MODE!"=="true" (
    echo Headless mode: Skipping interactive tutorial
    echo 📚 You can run the tutorial anytime with: rag.bat
 ) else (
    set /p "run_test=Run interactive tutorial now? [Y/n]: "
    if /i "!run_test!" NEQ "n" (
        call :run_tutorial
    ) else (
        echo 📚 You can run the tutorial anytime with: rag.bat
    )
 )
 echo.
 echo 🎉 Setup complete! FSS-Mini-RAG is ready to use.
 echo 💡 Pro tip: Try indexing any folder with text files - code, docs, notes!
 echo.
 pause
 exit /b 0
 :check_ollama_enhanced
 echo 🤖 Checking for AI capabilities...
 echo.
 REM Check if Ollama is installed
 where ollama >nul 2>&1
 if errorlevel 1 (
    echo ⚠️ Ollama not installed - using basic search mode
    echo.
    echo 🎯 For Enhanced AI Features:
    echo    • 📥 Install Ollama: https://ollama.com/download
    echo    • 🔄 Run: ollama serve  
    echo    • 🧠 Download model: ollama pull qwen3:1.7b
    echo.
    echo 💡 Benefits of AI features:
    echo    • Smart query expansion for better search results
    echo    • Interactive exploration mode with conversation memory
    echo    • AI-powered synthesis of search results  
    echo    • Natural language understanding of your questions
    echo.
    goto :eof
 )
 REM Check if Ollama server is running
 curl -s http://localhost:11434/api/version >nul 2>&1
 if errorlevel 1 (
    echo 🟡 Ollama installed but not running
    echo.
    if "!HEADLESS_MODE!"=="true" (
        echo Headless mode: Starting Ollama server automatically
        set "start_ollama=y"
    ) else (
        set /p "start_ollama=Start Ollama server now? [Y/n]: "
    )
    if /i "!start_ollama!" NEQ "n" (
        echo 🚀 Starting Ollama server...
        start /b ollama serve
        timeout /t 3 /nobreak >nul
        curl -s http://localhost:11434/api/version >nul 2>&1
        if errorlevel 1 (
            echo ⚠️ Could not start Ollama automatically
            echo 💡 Please run: ollama serve
        ) else (
            echo ✅ Ollama server started successfully!
        )
    )
 ) else (
    echo ✅ Ollama server is running!
 )
 REM Check for available models
 echo 🔍 Checking for AI models...
 ollama list 2>nul | findstr /v "NAME" | findstr /v "^$" >nul
 if errorlevel 1 (
    echo 📦 No AI models found
    echo.
    echo 🧠 Recommended Models (choose one):
    echo    • qwen3:1.7b    - Excellent for RAG (1.4GB, recommended)
    echo    • qwen3:0.6b    - Lightweight and fast (~500MB)  
    echo    • qwen3:4b      - Higher quality but slower (~2.5GB)
    echo.
    if "!HEADLESS_MODE!"=="true" (
        echo Headless mode: Skipping model download
        set "install_model=n"
    ) else (
        set /p "install_model=Download qwen3:1.7b model now? [Y/n]: "
    )
    if /i "!install_model!" NEQ "n" (
        echo 📥 Downloading qwen3:1.7b model...
        echo    This may take 5-10 minutes depending on your internet speed
        ollama pull qwen3:1.7b
        if errorlevel 1 (
            echo ⚠️ Download failed - you can try again later with: ollama pull qwen3:1.7b
        ) else (
            echo ✅ Model downloaded successfully! AI features are now available.
        )
    )
 ) else (
    echo ✅ AI models found - full AI features available!
    echo 🎉 Your system supports query expansion, exploration mode, and synthesis!
 )
 goto :eof
 :run_tutorial
 echo.
 echo ═══════════════════════════════════════════════════
 echo 🧪 Running Interactive Tutorial
 echo ═══════════════════════════════════════════════════
 echo.
 echo 📚 This tutorial will:
 echo    • Index the FSS-Mini-RAG documentation
 echo    • Show you how to search effectively
 echo    • Demonstrate AI features (if available)
 echo.
 call "%SCRIPT_DIR%\.venv\Scripts\activate.bat"
 echo 📁 Indexing project for demonstration...
 "%SCRIPT_DIR%\.venv\Scripts\python.exe" rag-mini.py index "%SCRIPT_DIR%" >nul 2>&1
 if errorlevel 1 (
    echo ❌ Indexing failed - please check the installation
    goto :eof
 )
 echo ✅ Indexing complete! 
 echo.
 echo 🔍 Example search: "embedding"
 "%SCRIPT_DIR%\.venv\Scripts\python.exe" rag-mini.py search "%SCRIPT_DIR%" "embedding" --top-k 3
 echo.
 echo 🎯 Try the interactive interface:
 echo    rag.bat
 echo.
 echo 💡 You can now search any project by indexing it first!
 goto :eof
 :setup_windows_icon
 echo 🎨 Setting up application icon and shortcuts...
 REM Check if icon exists
 if not exist "%SCRIPT_DIR%\assets\Fss_Mini_Rag.png" (
    echo ⚠️ Icon file not found - skipping desktop integration
    goto :eof
 )
 REM Create desktop shortcut
 echo 📱 Creating desktop shortcut...
 set "desktop=%USERPROFILE%\Desktop"
 set "shortcut=%desktop%\FSS-Mini-RAG.lnk"
 REM Use PowerShell to create shortcut with icon
 powershell -Command "& {$WshShell = New-Object -comObject WScript.Shell; $Shortcut = $WshShell.CreateShortcut('%shortcut%'); $Shortcut.TargetPath = '%SCRIPT_DIR%\rag.bat'; $Shortcut.WorkingDirectory = '%SCRIPT_DIR%'; $Shortcut.Description = 'FSS-Mini-RAG - Fast Semantic Search'; $Shortcut.Save()}" >nul 2>&1
 if exist "%shortcut%" (
    echo ✅ Desktop shortcut created
 ) else (
    echo ⚠️ Could not create desktop shortcut
 )
 REM Create Start Menu shortcut
 echo 📂 Creating Start Menu entry...
 set "startmenu=%APPDATA%\Microsoft\Windows\Start Menu\Programs"
 set "startshortcut=%startmenu%\FSS-Mini-RAG.lnk"
 powershell -Command "& {$WshShell = New-Object -comObject WScript.Shell; $Shortcut = $WshShell.CreateShortcut('%startshortcut%'); $Shortcut.TargetPath = '%SCRIPT_DIR%\rag.bat'; $Shortcut.WorkingDirectory = '%SCRIPT_DIR%'; $Shortcut.Description = 'FSS-Mini-RAG - Fast Semantic Search'; $Shortcut.Save()}" >nul 2>&1
 if exist "%startshortcut%" (
    echo ✅ Start Menu entry created
 ) else (
    echo ⚠️ Could not create Start Menu entry
 )
 echo 💡 FSS-Mini-RAG shortcuts have been created on your Desktop and Start Menu
 echo    You can now launch the application from either location
 goto :eof
--- a/mini_rag/init.py
+++ b/mini_rag/init.py
@ -7,9 +7,9 @@ Designed for portability, efficiency, and simplicity across projects and compute
 __version__ = "2.1.0"
 from .ollama_embeddings import OllamaEmbedder as CodeEmbedder
 from .chunker import CodeChunker
 from .indexer import ProjectIndexer
 from .ollama_embeddings import OllamaEmbedder as CodeEmbedder
 from .search import CodeSearcher
 from .watcher import FileWatcher
--- a/mini_rag/main.py
+++ b/mini_rag/main.py
@ -2,5 +2,5 @@
 from .cli import cli
-if __name__ == "__main__":
+if __name__ == '__main__':
    cli()
--- a/mini_rag/auto_optimizer.py
+++ b/mini_rag/auto_optimizer.py
@ -3,23 +3,22 @@ Auto-optimizer for FSS-Mini-RAG.
 Automatically tunes settings based on usage patterns.
 """
 import json
 import logging
 from collections import Counter
 from pathlib import Path
-from typing import Any, Dict
+import json
 from typing import Dict, Any, List
 from collections import Counter
 import logging
 logger = logging.getLogger(__name__)
 class AutoOptimizer:
    """Automatically optimizes RAG settings based on project patterns."""
    def __init__(self, project_path: Path):
        self.project_path = project_path
-        self.rag_dir = project_path / ".mini-rag"
+        self.rag_dir = project_path / '.mini-rag'
-        self.config_path = self.rag_dir / "config.json"
+        self.config_path = self.rag_dir / 'config.json'
-        self.manifest_path = self.rag_dir / "manifest.json"
+        self.manifest_path = self.rag_dir / 'manifest.json'
    def analyze_and_optimize(self) -> Dict[str, Any]:
        """Analyze current patterns and auto-optimize settings."""
@ -38,23 +37,23 @@ class AutoOptimizer:
        optimizations = self._generate_optimizations(analysis)
        # Apply optimizations if beneficial
-        if optimizations["confidence"] > 0.7:
+        if optimizations['confidence'] > 0.7:
            self._apply_optimizations(optimizations)
            return {
                "status": "optimized",
-                "changes": optimizations["changes"],
+                "changes": optimizations['changes'],
-                "expected_improvement": optimizations["expected_improvement"],
+                "expected_improvement": optimizations['expected_improvement']
            }
        else:
            return {
                "status": "no_changes_needed",
                "analysis": analysis,
-                "confidence": optimizations["confidence"],
+                "confidence": optimizations['confidence']
            }
    def _analyze_patterns(self, manifest: Dict[str, Any]) -> Dict[str, Any]:
        """Analyze current indexing patterns."""
-        files = manifest.get("files", {})
+        files = manifest.get('files', {})
        # Language distribution
        languages = Counter()
@ -62,11 +61,11 @@ class AutoOptimizer:
        chunk_ratios = []
        for filepath, info in files.items():
-            lang = info.get("language", "unknown")
+            lang = info.get('language', 'unknown')
            languages[lang] += 1
-            size = info.get("size", 0)
+            size = info.get('size', 0)
-            chunks = info.get("chunks", 1)
+            chunks = info.get('chunks', 1)
            sizes.append(size)
            chunk_ratios.append(chunks / max(1, size / 1000))  # chunks per KB
@ -75,13 +74,13 @@ class AutoOptimizer:
        avg_size = sum(sizes) / len(sizes) if sizes else 1000
        return {
-            "languages": dict(languages.most_common()),
+            'languages': dict(languages.most_common()),
-            "total_files": len(files),
+            'total_files': len(files),
-            "total_chunks": sum(info.get("chunks", 1) for info in files.values()),
+            'total_chunks': sum(info.get('chunks', 1) for info in files.values()),
-            "avg_chunk_ratio": avg_chunk_ratio,
+            'avg_chunk_ratio': avg_chunk_ratio,
-            "avg_file_size": avg_size,
+            'avg_file_size': avg_size,
-            "large_files": sum(1 for s in sizes if s > 10000),
+            'large_files': sum(1 for s in sizes if s > 10000),
-            "small_files": sum(1 for s in sizes if s < 500),
+            'small_files': sum(1 for s in sizes if s < 500)
        }
    def _generate_optimizations(self, analysis: Dict[str, Any]) -> Dict[str, Any]:
@ -91,51 +90,49 @@ class AutoOptimizer:
        expected_improvement = 0
        # Optimize chunking based on dominant language
-        languages = analysis["languages"]
+        languages = analysis['languages']
        if languages:
            dominant_lang, count = list(languages.items())[0]
-            lang_pct = count / analysis["total_files"]
+            lang_pct = count / analysis['total_files']
            if lang_pct > 0.3:  # Dominant language >30%
-                if dominant_lang == "python" and analysis["avg_chunk_ratio"] < 1.5:
+                if dominant_lang == 'python' and analysis['avg_chunk_ratio'] < 1.5:
-                    changes.append(
+                    changes.append("Increase Python chunk size to 3000 for better function context")
                        "Increase Python chunk size to 3000 for better function context"
                    )
                    confidence += 0.2
                    expected_improvement += 15
-                elif dominant_lang == "markdown" and analysis["avg_chunk_ratio"] < 1.2:
+                elif dominant_lang == 'markdown' and analysis['avg_chunk_ratio'] < 1.2:
                    changes.append("Use header-based chunking for Markdown files")
                    confidence += 0.15
                    expected_improvement += 10
        # Optimize for large files
-        if analysis["large_files"] > 5:
+        if analysis['large_files'] > 5:
            changes.append("Reduce streaming threshold to 5KB for better large file handling")
            confidence += 0.1
            expected_improvement += 8
        # Optimize chunk ratio
-        if analysis["avg_chunk_ratio"] < 1.0:
+        if analysis['avg_chunk_ratio'] < 1.0:
            changes.append("Reduce chunk size for more granular search results")
            confidence += 0.15
            expected_improvement += 12
-        elif analysis["avg_chunk_ratio"] > 3.0:
+        elif analysis['avg_chunk_ratio'] > 3.0:
            changes.append("Increase chunk size to reduce overhead")
            confidence += 0.1
            expected_improvement += 5
        # Skip tiny files optimization
-        small_file_pct = analysis["small_files"] / analysis["total_files"]
+        small_file_pct = analysis['small_files'] / analysis['total_files']
        if small_file_pct > 0.3:
            changes.append("Skip files smaller than 300 bytes to improve focus")
            confidence += 0.1
            expected_improvement += 3
        return {
-            "changes": changes,
+            'changes': changes,
-            "confidence": min(confidence, 1.0),
+            'confidence': min(confidence, 1.0),
-            "expected_improvement": expected_improvement,
+            'expected_improvement': expected_improvement
        }
    def _apply_optimizations(self, optimizations: Dict[str, Any]):
@ -148,35 +145,35 @@ class AutoOptimizer:
        else:
            config = self._get_default_config()
-        changes = optimizations["changes"]
+        changes = optimizations['changes']
        # Apply changes based on recommendations
        for change in changes:
            if "Python chunk size to 3000" in change:
-                config.setdefault("chunking", {})["max_size"] = 3000
+                config.setdefault('chunking', {})['max_size'] = 3000
            elif "header-based chunking" in change:
-                config.setdefault("chunking", {})["strategy"] = "header"
+                config.setdefault('chunking', {})['strategy'] = 'header'
            elif "streaming threshold to 5KB" in change:
-                config.setdefault("streaming", {})["threshold_bytes"] = 5120
+                config.setdefault('streaming', {})['threshold_bytes'] = 5120
            elif "Reduce chunk size" in change:
-                current_size = config.get("chunking", {}).get("max_size", 2000)
+                current_size = config.get('chunking', {}).get('max_size', 2000)
-                config.setdefault("chunking", {})["max_size"] = max(1500, current_size - 500)
+                config.setdefault('chunking', {})['max_size'] = max(1500, current_size - 500)
            elif "Increase chunk size" in change:
-                current_size = config.get("chunking", {}).get("max_size", 2000)
+                current_size = config.get('chunking', {}).get('max_size', 2000)
-                config.setdefault("chunking", {})["max_size"] = min(4000, current_size + 500)
+                config.setdefault('chunking', {})['max_size'] = min(4000, current_size + 500)
            elif "Skip files smaller" in change:
-                config.setdefault("files", {})["min_file_size"] = 300
+                config.setdefault('files', {})['min_file_size'] = 300
        # Save optimized config
-        config["_auto_optimized"] = True
+        config['_auto_optimized'] = True
-        config["_optimization_timestamp"] = json.dumps(None, default=str)
+        config['_optimization_timestamp'] = json.dumps(None, default=str)
-        with open(self.config_path, "w") as f:
+        with open(self.config_path, 'w') as f:
            json.dump(config, f, indent=2)
        logger.info(f"Applied {len(changes)} optimizations to {self.config_path}")
@ -184,7 +181,16 @@ class AutoOptimizer:
    def _get_default_config(self) -> Dict[str, Any]:
        """Get default configuration."""
        return {
-            "chunking": {"max_size": 2000, "min_size": 150, "strategy": "semantic"},
+            "chunking": {
-            "streaming": {"enabled": True, "threshold_bytes": 1048576},
+                "max_size": 2000,
-            "files": {"min_file_size": 50},
+                "min_size": 150,
                "strategy": "semantic"
            },
            "streaming": {
                "enabled": True,
                "threshold_bytes": 1048576
            },
            "files": {
                "min_file_size": 50
            }
        }
--- a/mini_rag/chunker.py
+++ b/mini_rag/chunker.py
--- a/mini_rag/cli.py
+++ b/mini_rag/cli.py
@ -3,120 +3,70 @@ Command-line interface for Mini RAG system.
 Beautiful, intuitive, and highly effective.
 """
-import logging
+import click
 import sys
 import time
 import logging
 from pathlib import Path
 from typing import Optional
-import click
+# Fix Windows console for proper emoji/Unicode support
 from .windows_console_fix import fix_windows_console
 fix_windows_console()
 from rich.console import Console
 from rich.logging import RichHandler
 from rich.panel import Panel
 from rich.progress import Progress, SpinnerColumn, TextColumn
 from rich.syntax import Syntax
 from rich.table import Table
 from rich.progress import Progress, SpinnerColumn, TextColumn
 from rich.logging import RichHandler
 from rich.syntax import Syntax
 from rich.panel import Panel
 from rich import print as rprint
 from .indexer import ProjectIndexer
 from .search import CodeSearcher
 from .watcher import FileWatcher
 from .non_invasive_watcher import NonInvasiveFileWatcher
 from .ollama_embeddings import OllamaEmbedder as CodeEmbedder
 from .chunker import CodeChunker
 from .performance import get_monitor
-from .search import CodeSearcher
+from .server import RAGClient
-from .server import RAGClient, start_server
+from .server import RAGServer, RAGClient, start_server
 from .windows_console_fix import fix_windows_console
 # Fix Windows console for proper emoji/Unicode support
 fix_windows_console()
 # Set up logging
 logging.basicConfig(
    level=logging.INFO,
    format="%(message)s",
-    handlers=[RichHandler(rich_tracebacks=True)],
+    handlers=[RichHandler(rich_tracebacks=True)]
 )
 logger = logging.getLogger(__name__)
 console = Console()
-def find_nearby_index(start_path: Path = None) -> Optional[Path]:
+@click.group()
-    """
+@click.option('--verbose', '-v', is_flag=True, help='Enable verbose logging')
-    Find .mini-rag index in current directory or up to 2 levels up.
+@click.option('--quiet', '-q', is_flag=True, help='Suppress output')
    Args:
        start_path: Starting directory to search from (default: current directory)
    Returns:
        Path to directory containing .mini-rag, or None if not found
    """
    if start_path is None:
        start_path = Path.cwd()
    current = start_path.resolve()
    # Search current directory and up to 2 levels up
    for level in range(3):  # 0, 1, 2 levels up
        rag_dir = current / ".mini-rag"
        if rag_dir.exists() and rag_dir.is_dir():
            return current
        # Move up one level
        parent = current.parent
        if parent == current:  # Reached filesystem root
            break
        current = parent
    return None
 def show_index_guidance(query_path: Path, found_index_path: Path) -> None:
    """Show helpful guidance when index is found in a different location."""
    relative_path = found_index_path.relative_to(Path.cwd()) if found_index_path != Path.cwd() else Path(".")
    console.print(f"\n[yellow]📍 Found FSS-Mini-RAG index in:[/yellow] [blue]{found_index_path}[/blue]")
    console.print(f"[dim]Current directory:[/dim] [dim]{query_path}[/dim]")
    console.print()
    console.print("[green]🚀 To search the index, navigate there first:[/green]")
    console.print(f"   [bold]cd {relative_path}[/bold]")
    console.print(f"   [bold]rag-mini search 'your query here'[/bold]")
    console.print()
    console.print("[cyan]💡 Or specify the path directly:[/cyan]")  
    console.print(f"   [bold]rag-mini search -p {found_index_path} 'your query here'[/bold]")
    console.print()
@click.group(context_settings={"help_option_names": ["-h", "--help"]})
@click.option("--verbose", "-v", is_flag=True, help="Enable verbose logging")
@click.option("--quiet", "-q", is_flag=True, help="Suppress output")
 def cli(verbose: bool, quiet: bool):
    """
    Mini RAG - Fast semantic code search that actually works.
-    A local RAG system for improving the development environment's grounding
+    A local RAG system for improving the development environment's grounding capabilities.
    capabilities.
    Indexes your codebase and enables lightning-fast semantic search.
    """
    # Check virtual environment
    from .venv_checker import check_and_warn_venv
    check_and_warn_venv("rag-mini", force_exit=False)
    if verbose:
        logging.getLogger().setLevel(logging.DEBUG)
    elif quiet:
        logging.getLogger().setLevel(logging.ERROR)
-@cli.command(context_settings={"help_option_names": ["-h", "--help"]})
+@cli.command()
-@click.option(
+@click.option('--path', '-p', type=click.Path(exists=True), default='.', 
-    "--path",
+              help='Project path to index')
-    "-p",
+@click.option('--force', '-f', is_flag=True, 
-    type=click.Path(exists=True),
+              help='Force reindex all files')
-    default=".",
+@click.option('--reindex', '-r', is_flag=True, 
-    help="Project path to index",
+              help='Force complete reindex (same as --force)')
-)
+@click.option('--model', '-m', type=str, default=None,
-@click.option("--force", "-", is_flag=True, help="Force reindex all files")
+              help='Embedding model to use')
@click.option("--reindex", "-r", is_flag=True, help="Force complete reindex (same as --force)")
@click.option("--model", "-m", type=str, default=None, help="Embedding model to use")
 def init(path: str, force: bool, reindex: bool, model: Optional[str]):
    """Initialize RAG index for a project."""
    project_path = Path(path).resolve()
@ -124,7 +74,7 @@ def init(path: str, force: bool, reindex: bool, model: Optional[str]):
    console.print(f"\n[bold cyan]Initializing Mini RAG for:[/bold cyan] {project_path}\n")
    # Check if already initialized
-    rag_dir = project_path / ".mini-rag"
+    rag_dir = project_path / '.mini-rag'
    force_reindex = force or reindex
    if rag_dir.exists() and not force_reindex:
        console.print("[yellow][/yellow]  Project already initialized!")
@ -138,10 +88,10 @@ def init(path: str, force: bool, reindex: bool, model: Optional[str]):
        table.add_column("Metric", style="cyan")
        table.add_column("Value", style="green")
-        table.add_row("Files Indexed", str(stats["file_count"]))
+        table.add_row("Files Indexed", str(stats['file_count']))
-        table.add_row("Total Chunks", str(stats["chunk_count"]))
+        table.add_row("Total Chunks", str(stats['chunk_count']))
        table.add_row("Index Size", f"{stats['index_size_mb']:.2f} MB")
-        table.add_row("Last Updated", stats["indexed_at"] or "Never")
+        table.add_row("Last Updated", stats['indexed_at'] or "Never")
        console.print(table)
        return
@ -155,13 +105,15 @@ def init(path: str, force: bool, reindex: bool, model: Optional[str]):
        ) as progress:
            # Initialize embedder
            task = progress.add_task("[cyan]Loading embedding model...", total=None)
-            # Use default model if None is passed
+            embedder = CodeEmbedder(model_name=model)
            embedder = CodeEmbedder(model_name=model) if model else CodeEmbedder()
            progress.update(task, completed=True)
            # Create indexer
            task = progress.add_task("[cyan]Creating indexer...", total=None)
-            indexer = ProjectIndexer(project_path, embedder=embedder)
+            indexer = ProjectIndexer(
                project_path,
                embedder=embedder
            )
            progress.update(task, completed=True)
        # Run indexing
@ -169,10 +121,8 @@ def init(path: str, force: bool, reindex: bool, model: Optional[str]):
        stats = indexer.index_project(force_reindex=force_reindex)
        # Show summary
-        if stats["files_indexed"] > 0:
+        if stats['files_indexed'] > 0:
-            console.print(
+            console.print(f"\n[bold green] Success![/bold green] Indexed {stats['files_indexed']} files")
                f"\n[bold green] Success![/bold green] Indexed {stats['files_indexed']} files"
            )
            console.print(f"Created {stats['chunks_created']} searchable chunks")
            console.print(f"Time: {stats['time_taken']:.2f} seconds")
            console.print(f"Speed: {stats['files_per_second']:.1f} files/second")
@ -181,9 +131,9 @@ def init(path: str, force: bool, reindex: bool, model: Optional[str]):
        # Show how to use
        console.print("\n[bold]Next steps:[/bold]")
-        console.print('  • Search your code: [cyan]rag-mini search "your query"[/cyan]')
+        console.print("  • Search your code: [cyan]mini-rag search \"your query\"[/cyan]")
-        console.print("  • Watch for changes: [cyan]rag-mini watch[/cyan]")
+        console.print("  • Watch for changes: [cyan]mini-rag watch[/cyan]")
-        console.print("  • View statistics: [cyan]rag-mini stats[/cyan]\n")
+        console.print("  • View statistics: [cyan]mini-rag stats[/cyan]\n")
    except Exception as e:
        console.print(f"\n[bold red]Error:[/bold red] {e}")
@ -191,43 +141,28 @@ def init(path: str, force: bool, reindex: bool, model: Optional[str]):
        sys.exit(1)
-@cli.command(context_settings={"help_option_names": ["-h", "--help"]})
+@cli.command()
-@click.argument("query")
+@click.argument('query')
-@click.option("--path", "-p", type=click.Path(exists=True), default=".", help="Project path")
+@click.option('--path', '-p', type=click.Path(exists=True), default='.',
-@click.option("--top-k", "-k", type=int, default=10, help="Maximum results to show")
+              help='Project path')
-@click.option(
+@click.option('--top-k', '-k', type=int, default=10,
-    "--type", "-t", multiple=True, help="Filter by chunk type (function, class, method)"
+              help='Maximum results to show')
-)
+@click.option('--type', '-t', multiple=True,
-@click.option("--lang", multiple=True, help="Filter by language (python, javascript, etc.)")
+              help='Filter by chunk type (function, class, method)')
-@click.option("--show-content", "-c", is_flag=True, help="Show code content in results")
+@click.option('--lang', multiple=True,
-@click.option("--show-perf", is_flag=True, help="Show performance metrics")
+              help='Filter by language (python, javascript, etc.)')
-def search(
+@click.option('--show-content', '-c', is_flag=True,
-    query: str,
+              help='Show code content in results')
-    path: str,
+@click.option('--show-perf', is_flag=True,
-    top_k: int,
+              help='Show performance metrics')
-    type: tuple,
+def search(query: str, path: str, top_k: int, type: tuple, lang: tuple, show_content: bool, show_perf: bool):
    lang: tuple,
    show_content: bool,
    show_perf: bool,
 ):
    """Search codebase using semantic similarity."""
    project_path = Path(path).resolve()
-    # Check if indexed at specified path
+    # Check if indexed
-    rag_dir = project_path / ".mini-rag"
+    rag_dir = project_path / '.mini-rag'
    if not rag_dir.exists():
-        # Try to find nearby index if searching from current directory
+        console.print("[red]Error:[/red] Project not indexed. Run 'mini-rag init' first.")
        if path == ".":
            nearby_index = find_nearby_index()
            if nearby_index:
                show_index_guidance(project_path, nearby_index)
                sys.exit(0)
        console.print(f"[red]Error:[/red] No FSS-Mini-RAG index found at [blue]{project_path}[/blue]")
        console.print()
        console.print("[yellow]💡 To create an index:[/yellow]")
        console.print(f"   [bold]rag-mini init -p {project_path}[/bold]")
        console.print()
        sys.exit(1)
    # Get performance monitor
@ -244,30 +179,27 @@ def search(
            response = client.search(query, top_k=top_k)
-            if response.get("success"):
+            if response.get('success'):
                # Convert response to SearchResult objects
                from .search import SearchResult
                results = []
-                for r in response["results"]:
+                for r in response['results']:
                    result = SearchResult(
-                        file_path=r["file_path"],
+                        file_path=r['file_path'],
-                        content=r["content"],
+                        content=r['content'],
-                        score=r["score"],
+                        score=r['score'],
-                        start_line=r["start_line"],
+                        start_line=r['start_line'],
-                        end_line=r["end_line"],
+                        end_line=r['end_line'],
-                        chunk_type=r["chunk_type"],
+                        chunk_type=r['chunk_type'],
-                        name=r["name"],
+                        name=r['name'],
-                        language=r["language"],
+                        language=r['language']
                    )
                    results.append(result)
                # Show server stats
-                search_time = response.get("search_time_ms", 0)
+                search_time = response.get('search_time_ms', 0)
-                total_queries = response.get("total_queries", 0)
+                total_queries = response.get('total_queries', 0)
-                console.print(
+                console.print(f"[dim]Search time: {search_time}ms (Query #{total_queries})[/dim]\n")
                    f"[dim]Search time: {search_time}ms (Query #{total_queries})[/dim]\n"
                )
            else:
                console.print(f"[red]Server error:[/red] {response.get('error')}")
                sys.exit(1)
@ -287,7 +219,7 @@ def search(
                        query,
                        top_k=top_k,
                        chunk_types=list(type) if type else None,
-                        languages=list(lang) if lang else None,
+                        languages=list(lang) if lang else None
                    )
            else:
                with console.status(f"[cyan]Searching for: {query}[/cyan]"):
@ -295,7 +227,7 @@ def search(
                        query,
                        top_k=top_k,
                        chunk_types=list(type) if type else None,
-                        languages=list(lang) if lang else None,
+                        languages=list(lang) if lang else None
                    )
        # Display results
@ -311,15 +243,12 @@ def search(
            # Copy first result to clipboard if available
            try:
                import pyperclip
                first_result = results[0]
                location = f"{first_result.file_path}:{first_result.start_line}"
                pyperclip.copy(location)
-                console.print(
+                console.print(f"\n[dim]First result location copied to clipboard: {location}[/dim]")
-                    f"\n[dim]First result location copied to clipboard: {location}[/dim]"
+            except:
-                )
+                pass
            except (ImportError, OSError):
                pass  # Clipboard not available
        else:
            console.print(f"\n[yellow]No results found for: {query}[/yellow]")
            console.print("\n[dim]Tips:[/dim]")
@ -337,16 +266,17 @@ def search(
        sys.exit(1)
-@cli.command(context_settings={"help_option_names": ["-h", "--help"]})
+@cli.command()
-@click.option("--path", "-p", type=click.Path(exists=True), default=".", help="Project path")
+@click.option('--path', '-p', type=click.Path(exists=True), default='.',
              help='Project path')
 def stats(path: str):
    """Show index statistics."""
    project_path = Path(path).resolve()
    # Check if indexed
-    rag_dir = project_path / ".mini-rag"
+    rag_dir = project_path / '.mini-rag'
    if not rag_dir.exists():
-        console.print("[red]Error:[/red] Project not indexed. Run 'rag-mini init' first.")
+        console.print("[red]Error:[/red] Project not indexed. Run 'mini-rag init' first.")
        sys.exit(1)
    try:
@ -366,37 +296,35 @@ def stats(path: str):
        table.add_column("Metric", style="cyan")
        table.add_column("Value", style="green")
-        table.add_row("Files Indexed", str(index_stats["file_count"]))
+        table.add_row("Files Indexed", str(index_stats['file_count']))
-        table.add_row("Total Chunks", str(index_stats["chunk_count"]))
+        table.add_row("Total Chunks", str(index_stats['chunk_count']))
        table.add_row("Index Size", f"{index_stats['index_size_mb']:.2f} MB")
-        table.add_row("Last Updated", index_stats["indexed_at"] or "Never")
+        table.add_row("Last Updated", index_stats['indexed_at'] or "Never")
        console.print(table)
        # Language distribution
-        if "languages" in search_stats:
+        if 'languages' in search_stats:
            console.print("\n[bold]Language Distribution:[/bold]")
            lang_table = Table()
            lang_table.add_column("Language", style="cyan")
            lang_table.add_column("Chunks", style="green")
-            for lang, count in sorted(
+            for lang, count in sorted(search_stats['languages'].items(), 
-                search_stats["languages"].items(), key=lambda x: x[1], reverse=True
+                                     key=lambda x: x[1], reverse=True):
            ):
                lang_table.add_row(lang, str(count))
            console.print(lang_table)
        # Chunk type distribution
-        if "chunk_types" in search_stats:
+        if 'chunk_types' in search_stats:
            console.print("\n[bold]Chunk Types:[/bold]")
            type_table = Table()
            type_table.add_column("Type", style="cyan")
            type_table.add_column("Count", style="green")
-            for chunk_type, count in sorted(
+            for chunk_type, count in sorted(search_stats['chunk_types'].items(),
-                search_stats["chunk_types"].items(), key=lambda x: x[1], reverse=True
+                                           key=lambda x: x[1], reverse=True):
            ):
                type_table.add_row(chunk_type, str(count))
            console.print(type_table)
@ -407,28 +335,22 @@ def stats(path: str):
        sys.exit(1)
-@cli.command(context_settings={"help_option_names": ["-h", "--help"]})
+@cli.command()
-@click.option("--path", "-p", type=click.Path(exists=True), default=".", help="Project path")
+@click.option('--path', '-p', type=click.Path(exists=True), default='.',
              help='Project path')
 def debug_schema(path: str):
    """Debug vector database schema and sample data."""
    project_path = Path(path).resolve()
    try:
-        rag_dir = project_path / ".mini-rag"
+        rag_dir = project_path / '.mini-rag'
        if not rag_dir.exists():
-            console.print("[red]No RAG index found. Run 'rag-mini init' first.[/red]")
+            console.print("[red]No RAG index found. Run 'init' first.[/red]")
            return
        # Connect to database
        try:
        import lancedb
        except ImportError:
            console.print(
                "[red]LanceDB not available. Install with: pip install lancedb pyarrow[/red]"
            )
            return
        db = lancedb.connect(rag_dir)
        if "code_vectors" not in db.table_names():
@ -442,66 +364,52 @@ def debug_schema(path: str):
        console.print(table.schema)
        # Get sample data
-
+        import pandas as pd
        df = table.to_pandas()
-        console.print("\n[bold cyan] Table Statistics:[/bold cyan]")
+        console.print(f"\n[bold cyan] Table Statistics:[/bold cyan]")
        console.print(f"Total rows: {len(df)}")
        if len(df) > 0:
            # Check embedding column
-            console.print("\n[bold cyan] Embedding Column Analysis:[/bold cyan]")
+            console.print(f"\n[bold cyan] Embedding Column Analysis:[/bold cyan]")
-            first_embedding = df["embedding"].iloc[0]
+            first_embedding = df['embedding'].iloc[0]
            console.print(f"Type: {type(first_embedding)}")
-            if hasattr(first_embedding, "shape"):
+            if hasattr(first_embedding, 'shape'):
                console.print(f"Shape: {first_embedding.shape}")
-            if hasattr(first_embedding, "dtype"):
+            if hasattr(first_embedding, 'dtype'):
                console.print(f"Dtype: {first_embedding.dtype}")
            # Show first few rows
-            console.print("\n[bold cyan] Sample Data (first 3 rows):[/bold cyan]")
+            console.print(f"\n[bold cyan] Sample Data (first 3 rows):[/bold cyan]")
            for i in range(min(3, len(df))):
                row = df.iloc[i]
                console.print(f"\n[yellow]Row {i}:[/yellow]")
                console.print(f"  chunk_id: {row['chunk_id']}")
                console.print(f"  file_path: {row['file_path']}")
                console.print(f"  content: {row['content'][:50]}...")
-                embed_len = (
+                console.print(f"  embedding: {type(row['embedding'])} of length {len(row['embedding']) if hasattr(row['embedding'], '__len__') else 'unknown'}")
                    len(row["embedding"])
                    if hasattr(row["embedding"], "__len__")
                    else "unknown"
                )
                console.print(f"  embedding: {type(row['embedding'])} of length {embed_len}")
    except Exception as e:
        logger.error(f"Schema debug failed: {e}")
        console.print(f"[red]Error: {e}[/red]")
-@cli.command(context_settings={"help_option_names": ["-h", "--help"]})
+@cli.command()
-@click.option("--path", "-p", type=click.Path(exists=True), default=".", help="Project path")
+@click.option('--path', '-p', type=click.Path(exists=True), default='.',
-@click.option(
+              help='Project path')
-    "--delay",
+@click.option('--delay', '-d', type=float, default=10.0,
-    "-d",
+              help='Update delay in seconds (default: 10s for non-invasive)')
-    type=float,
+@click.option('--silent', '-s', is_flag=True, default=False,
-    default=10.0,
+              help='Run silently in background without output')
    help="Update delay in seconds (default: 10s for non-invasive)",
 )
@click.option(
    "--silent",
    "-s",
    is_flag=True,
    default=False,
    help="Run silently in background without output",
 )
 def watch(path: str, delay: float, silent: bool):
    """Watch for file changes and update index automatically (non-invasive by default)."""
    project_path = Path(path).resolve()
    # Check if indexed
-    rag_dir = project_path / ".mini-rag"
+    rag_dir = project_path / '.mini-rag'
    if not rag_dir.exists():
        if not silent:
-            console.print("[red]Error:[/red] Project not indexed. Run 'rag-mini init' first.")
+            console.print("[red]Error:[/red] Project not indexed. Run 'mini-rag init' first.")
        sys.exit(1)
    try:
@ -542,7 +450,7 @@ def watch(path: str, delay: float, silent: bool):
                            f"\r[green]✓[/green] Files updated: {stats.get('files_processed', 0)} | "
                            f"[red]✗[/red] Failed: {stats.get('files_dropped', 0)} | "
                            f"[cyan]⧗[/cyan] Queue: {stats['queue_size']}",
-                            end="",
+                            end=""
                        )
                        last_stats = stats
@ -557,12 +465,10 @@ def watch(path: str, delay: float, silent: bool):
        # Show final stats only if not silent
        if not silent:
            final_stats = watcher.get_statistics()
-            console.print("\n[bold green]Watch Summary:[/bold green]")
+            console.print(f"\n[bold green]Watch Summary:[/bold green]")
            console.print(f"Files updated: {final_stats.get('files_processed', 0)}")
            console.print(f"Files failed: {final_stats.get('files_dropped', 0)}")
-            console.print(
+            console.print(f"Total runtime: {final_stats.get('uptime_seconds', 0):.1f} seconds\n")
                f"Total runtime: {final_stats.get('uptime_seconds', 0):.1f} seconds\n"
            )
    except Exception as e:
        console.print(f"\n[bold red]Error:[/bold red] {e}")
@ -570,10 +476,12 @@ def watch(path: str, delay: float, silent: bool):
        sys.exit(1)
-@cli.command(context_settings={"help_option_names": ["-h", "--help"]})
+@cli.command()
-@click.argument("function_name")
+@click.argument('function_name')
-@click.option("--path", "-p", type=click.Path(exists=True), default=".", help="Project path")
+@click.option('--path', '-p', type=click.Path(exists=True), default='.',
-@click.option("--top-k", "-k", type=int, default=5, help="Maximum results")
+              help='Project path')
@click.option('--top-k', '-k', type=int, default=5,
              help='Maximum results')
 def find_function(function_name: str, path: str, top_k: int):
    """Find a specific function by name."""
    project_path = Path(path).resolve()
@ -592,10 +500,12 @@ def find_function(function_name: str, path: str, top_k: int):
        sys.exit(1)
-@cli.command(context_settings={"help_option_names": ["-h", "--help"]})
+@cli.command()
-@click.argument("class_name")
+@click.argument('class_name')
-@click.option("--path", "-p", type=click.Path(exists=True), default=".", help="Project path")
+@click.option('--path', '-p', type=click.Path(exists=True), default='.',
-@click.option("--top-k", "-k", type=int, default=5, help="Maximum results")
+              help='Project path')
@click.option('--top-k', '-k', type=int, default=5,
              help='Maximum results')
 def find_class(class_name: str, path: str, top_k: int):
    """Find a specific class by name."""
    project_path = Path(path).resolve()
@ -614,16 +524,17 @@ def find_class(class_name: str, path: str, top_k: int):
        sys.exit(1)
-@cli.command(context_settings={"help_option_names": ["-h", "--help"]})
+@cli.command()
-@click.option("--path", "-p", type=click.Path(exists=True), default=".", help="Project path")
+@click.option('--path', '-p', type=click.Path(exists=True), default='.',
              help='Project path')
 def update(path: str):
    """Update index for changed files."""
    project_path = Path(path).resolve()
    # Check if indexed
-    rag_dir = project_path / ".mini-rag"
+    rag_dir = project_path / '.mini-rag'
    if not rag_dir.exists():
-        console.print("[red]Error:[/red] Project not indexed. Run 'rag-mini init' first.")
+        console.print("[red]Error:[/red] Project not indexed. Run 'mini-rag init' first.")
        sys.exit(1)
    try:
@ -633,7 +544,7 @@ def update(path: str):
        stats = indexer.index_project(force_reindex=False)
-        if stats["files_indexed"] > 0:
+        if stats['files_indexed'] > 0:
            console.print(f"[green][/green] Updated {stats['files_indexed']} files")
            console.print(f"Created {stats['chunks_created']} new chunks")
        else:
@ -644,8 +555,8 @@ def update(path: str):
        sys.exit(1)
-@cli.command(context_settings={"help_option_names": ["-h", "--help"]})
+@cli.command()
-@click.option("--show-code", "-c", is_flag=True, help="Show example code")
+@click.option('--show-code', '-c', is_flag=True, help='Show example code')
 def info(show_code: bool):
    """Show information about Mini RAG."""
    # Create info panel
@ -678,7 +589,7 @@ def info(show_code: bool):
        console.print("\n[bold]Example Usage:[/bold]\n")
        code = """# Initialize a project
-rag-mini init
+mini-rag init
 # Search for code
 mini-rag search "database connection"
@ -689,26 +600,28 @@ mini-rag find-function connect_to_db
 mini-rag find-class UserModel
 # Watch for changes
-rag-mini watch
+mini-rag watch
 # Get statistics
-rag-mini stats"""
+mini-rag stats"""
        syntax = Syntax(code, "bash", theme="monokai")
        console.print(syntax)
-@cli.command(context_settings={"help_option_names": ["-h", "--help"]})
+@cli.command()
-@click.option("--path", "-p", type=click.Path(exists=True), default=".", help="Project path")
+@click.option('--path', '-p', type=click.Path(exists=True), default='.',
-@click.option("--port", type=int, default=7777, help="Server port")
+              help='Project path')
@click.option('--port', type=int, default=7777,
              help='Server port')
 def server(path: str, port: int):
    """Start persistent RAG server (keeps model loaded)."""
    project_path = Path(path).resolve()
    # Check if indexed
-    rag_dir = project_path / ".mini-rag"
+    rag_dir = project_path / '.mini-rag'
    if not rag_dir.exists():
-        console.print("[red]Error:[/red] Project not indexed. Run 'rag-mini init' first.")
+        console.print("[red]Error:[/red] Project not indexed. Run 'mini-rag init' first.")
        sys.exit(1)
    try:
@ -725,10 +638,13 @@ def server(path: str, port: int):
        sys.exit(1)
-@cli.command(context_settings={"help_option_names": ["-h", "--help"]})
+@cli.command()
-@click.option("--path", "-p", type=click.Path(exists=True), default=".", help="Project path")
+@click.option('--path', '-p', type=click.Path(exists=True), default='.',
-@click.option("--port", type=int, default=7777, help="Server port")
+              help='Project path')
-@click.option("--discovery", "-d", is_flag=True, help="Run codebase discovery analysis")
+@click.option('--port', type=int, default=7777,
              help='Server port')
@click.option('--discovery', '-d', is_flag=True,
              help='Run codebase discovery analysis')
 def status(path: str, port: int, discovery: bool):
    """Show comprehensive RAG system status with optional codebase discovery."""
    project_path = Path(path).resolve()
@ -741,12 +657,7 @@ def status(path: str, port: int, discovery: bool):
    console.print("[bold]📁 Folder Contents:[/bold]")
    try:
        all_files = list(project_path.rglob("*"))
-        source_files = [
+        source_files = [f for f in all_files if f.is_file() and f.suffix in ['.py', '.js', '.ts', '.go', '.java', '.cpp', '.c', '.h']]
            f
            for f in all_files
            if f.is_file()
            and f.suffix in [".py", ".js", ".ts", ".go", ".java", ".cpp", ".c", ".h"]
        ]
        console.print(f"   • Total files: {len([f for f in all_files if f.is_file()])}")
        console.print(f"   • Source files: {len(source_files)}")
@ -756,34 +667,23 @@ def status(path: str, port: int, discovery: bool):
    # Check index status
    console.print("\n[bold]🗂️ Index Status:[/bold]")
-    rag_dir = project_path / ".mini-rag"
+    rag_dir = project_path / '.mini-rag'
    if rag_dir.exists():
        try:
            indexer = ProjectIndexer(project_path)
            index_stats = indexer.get_statistics()
-            console.print("   • Status: [green]✅ Indexed[/green]")
+            console.print(f"   • Status: [green]✅ Indexed[/green]")
            console.print(f"   • Files indexed: {index_stats['file_count']}")
            console.print(f"   • Total chunks: {index_stats['chunk_count']}")
            console.print(f"   • Index size: {index_stats['index_size_mb']:.2f} MB")
            console.print(f"   • Last updated: {index_stats['indexed_at'] or 'Never'}")
        except Exception as e:
-            console.print("   • Status: [yellow]⚠️ Index exists but has issues[/yellow]")
+            console.print(f"   • Status: [yellow]⚠️ Index exists but has issues[/yellow]")
            console.print(f"   • Error: {e}")
    else:
        console.print("   • Status: [red]❌ Not indexed[/red]")
-        
+        console.print("   • Run 'rag-start' to initialize")
        # Try to find nearby index if checking current directory  
        if path == ".":
            nearby_index = find_nearby_index()
            if nearby_index:
                console.print(f"   • Found index in: [blue]{nearby_index}[/blue]")
                relative_path = nearby_index.relative_to(Path.cwd()) if nearby_index != Path.cwd() else Path(".")
                console.print(f"   • Use: [bold]cd {relative_path} && rag-mini status[/bold]")
            else:
                console.print("   • Run 'rag-mini init' to initialize")
        else:
            console.print("   • Run 'rag-mini init' to initialize")
    # Check server status
    console.print("\n[bold]🚀 Server Status:[/bold]")
@ -795,16 +695,16 @@ def status(path: str, port: int, discovery: bool):
        # Try to get server info
        try:
            response = client.search("test", top_k=1)  # Minimal query to get stats
-            if response.get("success"):
+            if response.get('success'):
-                uptime = response.get("server_uptime", 0)
+                uptime = response.get('server_uptime', 0)
-                queries = response.get("total_queries", 0)
+                queries = response.get('total_queries', 0)
                console.print(f"   • Uptime: {uptime}s")
                console.print(f"   • Total queries: {queries}")
        except Exception as e:
            console.print(f"   • [yellow]Server responding but with issues: {e}[/yellow]")
    else:
        console.print(f"   • Status: [red]❌ Not running on port {port}[/red]")
-        console.print("   • Run 'rag-mini server' to start the server")
+        console.print("   • Run 'rag-start' to start server")
    # Run codebase discovery if requested
    if discovery and rag_dir.exists():
@ -830,26 +730,22 @@ def status(path: str, port: int, discovery: bool):
    elif discovery and not rag_dir.exists():
        console.print("\n[bold]🧠 Codebase Discovery:[/bold]")
        console.print("   [yellow]❌ Cannot run discovery - project not indexed[/yellow]")
-        console.print("   Run 'rag-mini init' first to initialize the system")
+        console.print("   Run 'rag-start' first to initialize the system")
    # Show next steps
    console.print("\n[bold]📋 Next Steps:[/bold]")
    if not rag_dir.exists():
-        console.print("   1. Run [cyan]rag-mini init[/cyan] to initialize the RAG system")
+        console.print("   1. Run [cyan]rag-start[/cyan] to initialize and start RAG system")
-        console.print('   2. Use [cyan]rag-mini search "your query"[/cyan] to search code')
+        console.print("   2. Use [cyan]rag-search \"your query\"[/cyan] to search code")
    elif not client.is_running():
-        console.print("   1. Run [cyan]rag-mini server[/cyan] to start the server")
+        console.print("   1. Run [cyan]rag-start[/cyan] to start the server")
-        console.print('   2. Use [cyan]rag-mini search "your query"[/cyan] to search code')
+        console.print("   2. Use [cyan]rag-search \"your query\"[/cyan] to search code")
    else:
-        console.print(
+        console.print("   • System ready! Use [cyan]rag-search \"your query\"[/cyan] to search")
-            '   • System ready! Use [cyan]rag-mini search "your query"[/cyan] to search'
+        console.print("   • Add [cyan]--discovery[/cyan] flag to run intelligent codebase analysis")
        )
        console.print(
            "   • Add [cyan]--discovery[/cyan] flag to run intelligent codebase analysis"
        )
    console.print()
-if __name__ == "__main__":
+if __name__ == '__main__':
    cli()
--- a/mini_rag/config.py
+++ b/mini_rag/config.py
@ -3,14 +3,11 @@ Configuration management for FSS-Mini-RAG.
 Handles loading, saving, and validation of YAML config files.
 """
 import logging
 import re
 from dataclasses import asdict, dataclass
 from pathlib import Path
 from typing import Any, Dict, List, Optional
 import yaml
-import requests
+import logging
 from pathlib import Path
 from typing import Dict, Any, Optional
 from dataclasses import dataclass, asdict
 logger = logging.getLogger(__name__)
@ -18,7 +15,6 @@ logger = logging.getLogger(__name__)
@dataclass
 class ChunkingConfig:
    """Configuration for text chunking."""
    max_size: int = 2000
    min_size: int = 150
    strategy: str = "semantic"  # "semantic" or "fixed"
@ -27,7 +23,6 @@ class ChunkingConfig:
@dataclass
 class StreamingConfig:
    """Configuration for large file streaming."""
    enabled: bool = True
    threshold_bytes: int = 1048576  # 1MB
@ -35,7 +30,6 @@ class StreamingConfig:
@dataclass
 class FilesConfig:
    """Configuration for file processing."""
    min_file_size: int = 50
    exclude_patterns: list = None
    include_patterns: list = None
@ -50,7 +44,7 @@ class FilesConfig:
                ".venv/**",
                "venv/**",
                "build/**",
-                "dist/**",
+                "dist/**"
            ]
        if self.include_patterns is None:
            self.include_patterns = ["**/*"]  # Include everything by default
@ -59,7 +53,6 @@ class FilesConfig:
@dataclass
 class EmbeddingConfig:
    """Configuration for embedding generation."""
    preferred_method: str = "ollama"  # "ollama", "ml", "hash", "auto"
    ollama_model: str = "nomic-embed-text"
    ollama_host: str = "localhost:11434"
@ -70,8 +63,7 @@ class EmbeddingConfig:
@dataclass
 class SearchConfig:
    """Configuration for search behavior."""
-
+    default_limit: int = 10
    default_top_k: int = 10
    enable_bm25: bool = True
    similarity_threshold: float = 0.1
    expand_queries: bool = False  # Enable automatic query expansion
@ -80,68 +72,24 @@ class SearchConfig:
@dataclass 
 class LLMConfig:
    """Configuration for LLM synthesis and query expansion."""
-
+    ollama_host: str = "localhost:11434"
    # Core settings
    synthesis_model: str = "auto"  # "auto", "qwen3:1.7b", "qwen2.5:1.5b", etc.
    expansion_model: str = "auto"  # Usually same as synthesis_model
    max_expansion_terms: int = 8   # Maximum additional terms to add
    enable_synthesis: bool = False # Enable by default when --synthesize used
    synthesis_temperature: float = 0.3
-    enable_thinking: bool = True  # Enable thinking mode for Qwen3 models
+    enable_thinking: bool = True  # Enable thinking mode for Qwen3 models (production: True, testing: toggle)
    cpu_optimized: bool = True  # Prefer lightweight models
    # Context window configuration (critical for RAG performance)
    context_window: int = 16384  # Context window size in tokens (16K recommended)
    auto_context: bool = True  # Auto-adjust context based on model capabilities
    # Model preference rankings (configurable)
    model_rankings: list = None  # Will be set in __post_init__
    # Provider-specific settings (for different LLM providers)
    provider: str = "ollama"  # "ollama", "openai", "anthropic"
    ollama_host: str = "localhost:11434"  # Ollama connection
    api_key: Optional[str] = None  # API key for cloud providers
    api_base: Optional[str] = None  # Base URL for API (e.g., OpenRouter)
    timeout: int = 20  # Request timeout in seconds
    def __post_init__(self):
        if self.model_rankings is None:
            # Default model preference rankings (can be overridden in config file)
            self.model_rankings = [
                # Testing model (prioritized for current testing phase)
                "qwen3:1.7b",
                # Ultra-efficient models (perfect for CPU-only systems)
                "qwen3:0.6b",
                # Recommended model (excellent quality but larger)
                "qwen3:4b",
                # Common fallbacks (prioritize Qwen models)
                "qwen2.5:1.5b",
                "qwen2.5:3b",
            ]
@dataclass
 class UpdateConfig:
    """Configuration for auto-update system."""
    auto_check: bool = True  # Check for updates automatically
    check_frequency_hours: int = 24  # How often to check (hours)
    auto_install: bool = False  # Auto-install without asking (not recommended)
    backup_before_update: bool = True  # Create backup before updating
    notify_beta_releases: bool = False  # Include beta/pre-releases
@dataclass
 class RAGConfig:
    """Main RAG system configuration."""
    chunking: ChunkingConfig = None
    streaming: StreamingConfig = None  
    files: FilesConfig = None
    embedding: EmbeddingConfig = None
    search: SearchConfig = None
    llm: LLMConfig = None
    updates: UpdateConfig = None
    def __post_init__(self):
        if self.chunking is None:
@ -156,8 +104,6 @@ class RAGConfig:
            self.search = SearchConfig()
        if self.llm is None:
            self.llm = LLMConfig()
        if self.updates is None:
            self.updates = UpdateConfig()
 class ConfigManager:
@ -165,223 +111,8 @@ class ConfigManager:
    def __init__(self, project_path: Path):
        self.project_path = Path(project_path)
-        self.rag_dir = self.project_path / ".mini-rag"
+        self.rag_dir = self.project_path / '.mini-rag'
-        self.config_path = self.rag_dir / "config.yaml"
+        self.config_path = self.rag_dir / 'config.yaml'
    def get_available_ollama_models(self, ollama_host: str = "localhost:11434") -> List[str]:
        """Get list of available Ollama models for validation with secure connection handling."""
        import time
        # Retry logic with exponential backoff
        max_retries = 3
        for attempt in range(max_retries):
            try:
                # Use explicit timeout and SSL verification for security
                response = requests.get(
                    f"http://{ollama_host}/api/tags", 
                    timeout=(5, 10),  # (connect_timeout, read_timeout)
                    verify=True,  # Explicit SSL verification 
                    allow_redirects=False  # Prevent redirect attacks
                )
                if response.status_code == 200:
                    data = response.json()
                    models = [model["name"] for model in data.get("models", [])]
                    logger.debug(f"Successfully fetched {len(models)} Ollama models")
                    return models
                else:
                    logger.debug(f"Ollama API returned status {response.status_code}")
            except requests.exceptions.SSLError as e:
                logger.debug(f"SSL verification failed for Ollama connection: {e}")
                # For local Ollama, SSL might not be configured - this is expected
                if "localhost" in ollama_host or "127.0.0.1" in ollama_host:
                    logger.debug("Retrying with local connection (SSL not required for localhost)")
                    # Local connections don't need SSL verification
                    try:
                        response = requests.get(f"http://{ollama_host}/api/tags", timeout=(5, 10))
                        if response.status_code == 200:
                            data = response.json()
                            return [model["name"] for model in data.get("models", [])]
                    except Exception as local_e:
                        logger.debug(f"Local Ollama connection also failed: {local_e}")
                break  # Don't retry SSL errors for remote hosts
            except requests.exceptions.Timeout as e:
                logger.debug(f"Ollama connection timeout (attempt {attempt + 1}/{max_retries}): {e}")
                if attempt < max_retries - 1:
                    sleep_time = (2 ** attempt)  # Exponential backoff
                    time.sleep(sleep_time)
                    continue
            except requests.exceptions.ConnectionError as e:
                logger.debug(f"Ollama connection error (attempt {attempt + 1}/{max_retries}): {e}")
                if attempt < max_retries - 1:
                    time.sleep(1)
                    continue
            except Exception as e:
                logger.debug(f"Unexpected error fetching Ollama models: {e}")
                break
        return []
    def _sanitize_model_name(self, model_name: str) -> str:
        """Sanitize model name to prevent injection attacks."""
        if not model_name:
            return ""
        # Allow only alphanumeric, dots, colons, hyphens, underscores
        # This covers legitimate model names like qwen3:1.7b-q8_0
        sanitized = re.sub(r'[^a-zA-Z0-9\.\:\-\_]', '', model_name)
        # Limit length to prevent DoS
        if len(sanitized) > 128:
            logger.warning(f"Model name too long, truncating: {sanitized[:20]}...")
            sanitized = sanitized[:128]
        return sanitized
    def resolve_model_name(self, configured_model: str, available_models: List[str]) -> Optional[str]:
        """Resolve configured model name to actual available model with input sanitization."""
        if not available_models or not configured_model:
            return None
        # Sanitize input to prevent injection
        configured_model = self._sanitize_model_name(configured_model)
        if not configured_model:
            logger.warning("Model name was empty after sanitization")
            return None
        # Handle special 'auto' directive
        if configured_model.lower() == 'auto':
            return available_models[0] if available_models else None
        # Direct exact match first (case-insensitive)
        for available_model in available_models:
            if configured_model.lower() == available_model.lower():
                return available_model
        # Fuzzy matching for common patterns
        model_patterns = self._get_model_patterns(configured_model)
        for pattern in model_patterns:
            for available_model in available_models:
                if pattern.lower() in available_model.lower():
                    # Additional validation: ensure it's not a partial match of something else
                    if self._validate_model_match(pattern, available_model):
                        return available_model
        return None  # Model not available
    def _get_model_patterns(self, configured_model: str) -> List[str]:
        """Generate fuzzy match patterns for common model naming conventions."""
        patterns = [configured_model]  # Start with exact name
        # Common quantization patterns for different models
        quantization_patterns = {
            'qwen3:1.7b': ['qwen3:1.7b-q8_0', 'qwen3:1.7b-q4_0', 'qwen3:1.7b-q6_k'],
            'qwen3:0.6b': ['qwen3:0.6b-q8_0', 'qwen3:0.6b-q4_0', 'qwen3:0.6b-q6_k'],
            'qwen3:4b': ['qwen3:4b-q8_0', 'qwen3:4b-q4_0', 'qwen3:4b-q6_k'],
            'qwen3:8b': ['qwen3:8b-q8_0', 'qwen3:8b-q4_0', 'qwen3:8b-q6_k'],
            'qwen2.5:1.5b': ['qwen2.5:1.5b-q8_0', 'qwen2.5:1.5b-q4_0'],
            'qwen2.5:3b': ['qwen2.5:3b-q8_0', 'qwen2.5:3b-q4_0'],
            'qwen2.5-coder:1.5b': ['qwen2.5-coder:1.5b-q8_0', 'qwen2.5-coder:1.5b-q4_0'],
            'qwen2.5-coder:3b': ['qwen2.5-coder:3b-q8_0', 'qwen2.5-coder:3b-q4_0'],
            'qwen2.5-coder:7b': ['qwen2.5-coder:7b-q8_0', 'qwen2.5-coder:7b-q4_0'],
        }
        # Add specific patterns for the configured model
        if configured_model.lower() in quantization_patterns:
            patterns.extend(quantization_patterns[configured_model.lower()])
        # Generic pattern generation for unknown models
        if ':' in configured_model:
            base_name, version = configured_model.split(':', 1)
            # Add common quantization suffixes
            common_suffixes = ['-q8_0', '-q4_0', '-q6_k', '-q4_k_m', '-instruct', '-base']
            for suffix in common_suffixes:
                patterns.append(f"{base_name}:{version}{suffix}")
            # Also try with instruct variants
            if 'instruct' not in version.lower():
                patterns.append(f"{base_name}:{version}-instruct")
                patterns.append(f"{base_name}:{version}-instruct-q8_0")
                patterns.append(f"{base_name}:{version}-instruct-q4_0")
        return patterns
    def _validate_model_match(self, pattern: str, available_model: str) -> bool:
        """Validate that a fuzzy match is actually correct and not a false positive."""
        # Convert to lowercase for comparison
        pattern_lower = pattern.lower()
        available_lower = available_model.lower()
        # Ensure the base model name matches
        if ':' in pattern_lower and ':' in available_lower:
            pattern_base = pattern_lower.split(':')[0]
            available_base = available_lower.split(':')[0]
            # Base names must match exactly
            if pattern_base != available_base:
                return False
            # Version part should be contained or closely related
            pattern_version = pattern_lower.split(':', 1)[1]
            available_version = available_lower.split(':', 1)[1]
            # The pattern version should be a prefix of the available version
            # e.g., "1.7b" should match "1.7b-q8_0" but not "11.7b"
            if not available_version.startswith(pattern_version.split('-')[0]):
                return False
        return True
    def validate_and_resolve_models(self, config: RAGConfig) -> RAGConfig:
        """Validate and resolve model names in configuration."""
        try:
            available_models = self.get_available_ollama_models(config.llm.ollama_host)
            if not available_models:
                logger.debug("No Ollama models available for validation")
                return config
            # Resolve synthesis model
            if config.llm.synthesis_model != "auto":
                resolved = self.resolve_model_name(config.llm.synthesis_model, available_models)
                if resolved and resolved != config.llm.synthesis_model:
                    logger.info(f"Resolved synthesis model: {config.llm.synthesis_model} -> {resolved}")
                    config.llm.synthesis_model = resolved
                elif not resolved:
                    logger.warning(f"Synthesis model '{config.llm.synthesis_model}' not found, keeping original")
            # Resolve expansion model (if different from synthesis)
            if (config.llm.expansion_model != "auto" and 
                config.llm.expansion_model != config.llm.synthesis_model):
                resolved = self.resolve_model_name(config.llm.expansion_model, available_models)
                if resolved and resolved != config.llm.expansion_model:
                    logger.info(f"Resolved expansion model: {config.llm.expansion_model} -> {resolved}")
                    config.llm.expansion_model = resolved
                elif not resolved:
                    logger.warning(f"Expansion model '{config.llm.expansion_model}' not found, keeping original")
            # Update model rankings with resolved names
            if config.llm.model_rankings:
                updated_rankings = []
                for model in config.llm.model_rankings:
                    resolved = self.resolve_model_name(model, available_models)
                    if resolved:
                        updated_rankings.append(resolved)
                        if resolved != model:
                            logger.debug(f"Updated model ranking: {model} -> {resolved}")
                    else:
                        updated_rankings.append(model)  # Keep original if not resolved
                config.llm.model_rankings = updated_rankings
        except Exception as e:
            logger.debug(f"Model validation failed: {e}")
        return config
    def load_config(self) -> RAGConfig:
        """Load configuration from YAML file or create default."""
@ -392,7 +123,7 @@ class ConfigManager:
            return config
        try:
-            with open(self.config_path, "r") as f:
+            with open(self.config_path, 'r') as f:
                data = yaml.safe_load(f)
            if not data:
@ -402,37 +133,19 @@ class ConfigManager:
            # Convert nested dicts back to dataclass instances
            config = RAGConfig()
-            if "chunking" in data:
+            if 'chunking' in data:
-                config.chunking = ChunkingConfig(**data["chunking"])
+                config.chunking = ChunkingConfig(**data['chunking'])
-            if "streaming" in data:
+            if 'streaming' in data:
-                config.streaming = StreamingConfig(**data["streaming"])
+                config.streaming = StreamingConfig(**data['streaming'])
-            if "files" in data:
+            if 'files' in data:
-                config.files = FilesConfig(**data["files"])
+                config.files = FilesConfig(**data['files'])
-            if "embedding" in data:
+            if 'embedding' in data:
-                config.embedding = EmbeddingConfig(**data["embedding"])
+                config.embedding = EmbeddingConfig(**data['embedding'])
-            if "search" in data:
+            if 'search' in data:
-                config.search = SearchConfig(**data["search"])
+                config.search = SearchConfig(**data['search'])
            if "llm" in data:
                config.llm = LLMConfig(**data["llm"])
            # Validate and resolve model names if Ollama is available
            config = self.validate_and_resolve_models(config)
            return config
        except yaml.YAMLError as e:
            # YAML syntax error - help user fix it instead of silent fallback
            error_msg = (
                f"⚠️ Config file has YAML syntax error at line "
                f"{getattr(e, 'problem_mark', 'unknown')}: {e}"
            )
            logger.error(error_msg)
            print(f"\n{error_msg}")
            print(f"Config file: {self.config_path}")
            print("💡 Check YAML syntax (indentation, quotes, colons)")
            print("💡 Or delete config file to reset to defaults")
            return RAGConfig()  # Still return defaults but warn user
        except Exception as e:
            logger.error(f"Failed to load config from {self.config_path}: {e}")
            logger.info("Using default configuration")
@ -449,18 +162,7 @@ class ConfigManager:
            # Create YAML content with comments
            yaml_content = self._create_yaml_with_comments(config_dict)
-            # Write with basic file locking to prevent corruption
+            with open(self.config_path, 'w') as f:
            with open(self.config_path, "w") as f:
                try:
                    import fcntl
                    fcntl.flock(
                        f.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB
                    )  # Non-blocking exclusive lock
                    f.write(yaml_content)
                    fcntl.flock(f.fileno(), fcntl.LOCK_UN)  # Unlock
                except (OSError, ImportError):
                    # Fallback for Windows or if fcntl unavailable
                f.write(yaml_content)
            logger.info(f"Configuration saved to {self.config_path}")
@ -477,87 +179,54 @@ class ConfigManager:
            "",
            "# Text chunking settings",
            "chunking:",
-            f"  max_size: {config_dict['chunking']['max_size']}  # Max chars per chunk",
+            f"  max_size: {config_dict['chunking']['max_size']}      # Maximum characters per chunk",
-            f"  min_size: {config_dict['chunking']['min_size']}  # Min chars per chunk",
+            f"  min_size: {config_dict['chunking']['min_size']}       # Minimum characters per chunk", 
-            f"  strategy: {config_dict['chunking']['strategy']}  # 'semantic' or 'fixed'",
+            f"  strategy: {config_dict['chunking']['strategy']}    # 'semantic' (language-aware) or 'fixed'",
            "",
            "# Large file streaming settings", 
            "streaming:",
            f"  enabled: {str(config_dict['streaming']['enabled']).lower()}",
-            f"  threshold_bytes: {config_dict['streaming']['threshold_bytes']}  # Stream files >1MB",
+            f"  threshold_bytes: {config_dict['streaming']['threshold_bytes']}  # Files larger than this use streaming (1MB)",
            "",
            "# File processing settings",
            "files:",
-            f"  min_file_size: {config_dict['files']['min_file_size']}  # Skip small files",
+            f"  min_file_size: {config_dict['files']['min_file_size']}        # Skip files smaller than this",
            "  exclude_patterns:",
        ]
-        for pattern in config_dict["files"]["exclude_patterns"]:
+        for pattern in config_dict['files']['exclude_patterns']:
-            yaml_lines.append(f'    - "{pattern}"')
+            yaml_lines.append(f"    - \"{pattern}\"")
-        yaml_lines.extend(
+        yaml_lines.extend([
            [
            "  include_patterns:",
-                '    - "**/*"                  # Include all files by default',
+            "    - \"**/*\"                  # Include all files by default",
            "",
            "# Embedding generation settings",
            "embedding:",
-                f"  preferred_method: {config_dict['embedding']['preferred_method']}  # Method",
+            f"  preferred_method: {config_dict['embedding']['preferred_method']}     # 'ollama', 'ml', 'hash', or 'auto'",
            f"  ollama_model: {config_dict['embedding']['ollama_model']}",
            f"  ollama_host: {config_dict['embedding']['ollama_host']}",
            f"  ml_model: {config_dict['embedding']['ml_model']}",
-                f"  batch_size: {config_dict['embedding']['batch_size']}  # Per batch",
+            f"  batch_size: {config_dict['embedding']['batch_size']}               # Embeddings processed per batch",
            "",
            "# Search behavior settings", 
            "search:",
-                f"  default_top_k: {config_dict['search']['default_top_k']}  # Top results",
+            f"  default_limit: {config_dict['search']['default_limit']}           # Default number of results",
-                f"  enable_bm25: {str(config_dict['search']['enable_bm25']).lower()}  # Keyword boost",
+            f"  enable_bm25: {str(config_dict['search']['enable_bm25']).lower()}             # Enable keyword matching boost",
-                f"  similarity_threshold: {config_dict['search']['similarity_threshold']}  # Min score",
+            f"  similarity_threshold: {config_dict['search']['similarity_threshold']}        # Minimum similarity score",
-                f"  expand_queries: {str(config_dict['search']['expand_queries']).lower()}  # Auto expand",
+            f"  expand_queries: {str(config_dict['search']['expand_queries']).lower()}          # Enable automatic query expansion",
            "",
            "# LLM synthesis and query expansion settings",
            "llm:",
            f"  ollama_host: {config_dict['llm']['ollama_host']}",
-                f"  synthesis_model: {config_dict['llm']['synthesis_model']}  # Model name",
+            f"  synthesis_model: {config_dict['llm']['synthesis_model']}    # 'auto', 'qwen3:1.7b', etc.",
-                f"  expansion_model: {config_dict['llm']['expansion_model']}  # Model name",
+            f"  expansion_model: {config_dict['llm']['expansion_model']}     # Usually same as synthesis_model",
-                f"  max_expansion_terms: {config_dict['llm']['max_expansion_terms']}  # Max terms",
+            f"  max_expansion_terms: {config_dict['llm']['max_expansion_terms']}        # Maximum terms to add to queries",
            f"  enable_synthesis: {str(config_dict['llm']['enable_synthesis']).lower()}       # Enable synthesis by default",
            f"  synthesis_temperature: {config_dict['llm']['synthesis_temperature']}      # LLM temperature for analysis",
-                "",
+        ])
                "  # Context window configuration (critical for RAG performance)",
                "  # 💡 Sizing guide: 2K=1 question, 4K=1-2 questions, 8K=manageable, 16K=most users",
                "  #               32K=large codebases, 64K+=power users only",
                "  # ⚠️  Larger contexts use exponentially more CPU/memory - only increase if needed",
                "  # 🔧 Low context limits? Try smaller topk, better search terms, or archive noise",
                f"  context_window: {config_dict['llm']['context_window']}           # Context size in tokens",
                f"  auto_context: {str(config_dict['llm']['auto_context']).lower()}            # Auto-adjust context based on model capabilities",
                "",
                "  model_rankings:          # Preferred model order (edit to change priority)",
            ]
        )
-        # Add model rankings list
+        return '\n'.join(yaml_lines)
        if "model_rankings" in config_dict["llm"] and config_dict["llm"]["model_rankings"]:
            for model in config_dict["llm"]["model_rankings"][:10]:  # Show first 10
                yaml_lines.append(f'    - "{model}"')
            if len(config_dict["llm"]["model_rankings"]) > 10:
                yaml_lines.append("    # ... (edit config to see all options)")
        # Add update settings
        yaml_lines.extend(
            [
                "",
                "# Auto-update system settings",
                "updates:",
                f"  auto_check: {str(config_dict['updates']['auto_check']).lower()}            # Check for updates automatically",
                f"  check_frequency_hours: {config_dict['updates']['check_frequency_hours']}    # Hours between update checks",
                f"  auto_install: {str(config_dict['updates']['auto_install']).lower()}          # Auto-install updates (not recommended)",
                f"  backup_before_update: {str(config_dict['updates']['backup_before_update']).lower()}   # Create backup before updating",
                f"  notify_beta_releases: {str(config_dict['updates']['notify_beta_releases']).lower()}   # Include beta releases in checks",
            ]
        )
        return "\n".join(yaml_lines)
    def update_config(self, **kwargs) -> RAGConfig:
        """Update specific configuration values."""
--- a/mini_rag/explorer.py
+++ b/mini_rag/explorer.py
@ -9,43 +9,33 @@ Perfect for exploring codebases with detailed reasoning and follow-up questions.
 import json
 import logging
 import time
-from dataclasses import dataclass
+from typing import List, Dict, Any, Optional
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from dataclasses import dataclass
 try:
    from .config import RAGConfig
    from .llm_synthesizer import LLMSynthesizer, SynthesisResult
    from .search import CodeSearcher
-    from .system_context import get_system_context
+    from .config import RAGConfig
 except ImportError:
    # For direct testing
    from config import RAGConfig
    from llm_synthesizer import LLMSynthesizer, SynthesisResult
    from search import CodeSearcher
-
+    from config import RAGConfig
    def get_system_context(x=None):
        return ""
 logger = logging.getLogger(__name__)
@dataclass
 class ExplorationSession:
    """Track an exploration session with context history."""
    project_path: Path
    conversation_history: List[Dict[str, Any]]
    session_id: str
    started_at: float
-    def add_exchange(
+    def add_exchange(self, question: str, search_results: List[Any], response: SynthesisResult):
        self, question: str, search_results: List[Any], response: SynthesisResult
    ):
        """Add a question/response exchange to the conversation history."""
-        self.conversation_history.append(
+        self.conversation_history.append({
            {
            "timestamp": time.time(),
            "question": question,
            "search_results_count": len(search_results),
@ -54,11 +44,9 @@ class ExplorationSession:
                "key_points": response.key_points,
                "code_examples": response.code_examples,
                "suggested_actions": response.suggested_actions,
-                    "confidence": response.confidence,
+                "confidence": response.confidence
                },
            }
-        )
+        })
 class CodeExplorer:
    """Interactive code exploration with thinking and context memory."""
@ -72,8 +60,7 @@ class CodeExplorer:
        self.synthesizer = LLMSynthesizer(
            ollama_url=f"http://{self.config.llm.ollama_host}",
            model=self.config.llm.synthesis_model,
-            enable_thinking=True,  # Always enable thinking in explore mode
+            enable_thinking=True  # Always enable thinking in explore mode
            config=self.config,  # Pass config for model rankings
        )
        # Session management
@ -82,7 +69,12 @@ class CodeExplorer:
    def start_exploration_session(self) -> bool:
        """Start a new exploration session."""
-        # Simple availability check - don't do complex model restart logic
+        # Check if we should restart the model for optimal thinking
        model_restart_needed = self._check_model_restart_needed()
        if model_restart_needed:
            if not self._handle_model_restart():
                print("⚠️  Continuing with current model (quality may be reduced)")
        if not self.synthesizer.is_available():
            print("❌ LLM service unavailable. Please check Ollama is running.")
            return False
@ -92,11 +84,20 @@ class CodeExplorer:
            project_path=self.project_path,
            conversation_history=[],
            session_id=session_id,
-            started_at=time.time(),
+            started_at=time.time()
        )
-        print("🧠 Exploration Mode Started")
+        print("🧠 EXPLORATION MODE STARTED")
        print("=" * 50)
        print(f"Project: {self.project_path.name}")
        print(f"Session: {session_id}")
        print("\n🎯 This mode uses thinking and remembers context.")
        print("   Perfect for debugging, learning, and deep exploration.")
        print("\n💡 Tips:")
        print("   • Ask follow-up questions - I'll remember our conversation")
        print("   • Use 'why', 'how', 'explain' for detailed reasoning")
        print("   • Type 'quit' or 'exit' to end session")
        print("\n" + "=" * 50)
        return True
@ -109,10 +110,10 @@ class CodeExplorer:
        search_start = time.time()
        results = self.searcher.search(
            question, 
-            top_k=context_limit,
+            limit=context_limit,
            include_context=True,
            semantic_weight=0.7,
-            bm25_weight=0.3,
+            bm25_weight=0.3
        )
        search_time = time.time() - search_start
@ -127,17 +128,17 @@ class CodeExplorer:
        # Add to conversation history
        self.current_session.add_exchange(question, results, synthesis)
-        # Streaming already displayed the response
+        # Format response with exploration context
-        # Just return minimal status for caller
+        response = self._format_exploration_response(
-        session_duration = time.time() - self.current_session.started_at
+            question, synthesis, len(results), search_time, synthesis_time
-        exchange_count = len(self.current_session.conversation_history)
+        )
-        status = f"\n📊 Session: {session_duration/60:.1f}m | Question #{exchange_count} | Results: {len(results)} | Time: {search_time+synthesis_time:.1f}s"
+        return response
        return status
    def _build_contextual_prompt(self, question: str, results: List[Any]) -> str:
        """Build a prompt that includes conversation context."""
        # Get recent conversation context (last 3 exchanges)
        context_summary = ""
        if self.current_session.conversation_history:
            recent_exchanges = self.current_session.conversation_history[-3:]
            context_parts = []
@ -148,97 +149,73 @@ class CodeExplorer:
                context_parts.append(f"Previous Q{i}: {prev_q}")
                context_parts.append(f"Previous A{i}: {prev_summary}")
-            # "\n".join(context_parts)  # Unused variable removed
+            context_summary = "\n".join(context_parts)
        # Build search results context
        results_context = []
        for i, result in enumerate(results[:8], 1):
-            # result.file_path if hasattr(result, "file_path") else "unknown"  # Unused variable removed
+            file_path = result.file_path if hasattr(result, 'file_path') else 'unknown'
-            # result.content if hasattr(result, "content") else str(result)  # Unused variable removed
+            content = result.content if hasattr(result, 'content') else str(result)
-            # result.score if hasattr(result, "score") else 0.0  # Unused variable removed
+            score = result.score if hasattr(result, 'score') else 0.0
-            results_context.append(
+            results_context.append(f"""
                """
 Result {i} (Score: {score:.3f}):
 File: {file_path}
 Content: {content[:800]}{'...' if len(content) > 800 else ''}
-"""
+""")
            )
-        # "\n".join(results_context)  # Unused variable removed
+        results_text = "\n".join(results_context)
-        # Get system context for better responses
+        # Create comprehensive exploration prompt
-        # get_system_context(self.project_path)  # Unused variable removed
+        prompt = f"""You are a senior software engineer helping explore and debug code. You have access to thinking mode and conversation context.
        # Create comprehensive exploration prompt with thinking
        prompt = """<think>
 The user asked: "{question}"
 System context: {system_context}
 Let me analyze what they're asking and look at the information I have available.
 From the search results, I can see relevant information about:
 {results_text[:500]}...
 I should think about:
 1. What the user is trying to understand or accomplish
 2. What information from the search results is most relevant
 3. How to explain this in a clear, educational way
 4. What practical next steps would be helpful
 Based on our conversation so far: {context_summary}
 Let me create a helpful response that breaks this down clearly and gives them actionable guidance.
 </think>
 You're a helpful assistant exploring a project with someone. You're good at breaking down complex topics into understandable pieces and explaining things clearly.
 PROJECT: {self.project_path.name}
-PREVIOUS CONVERSATION:
+CONVERSATION CONTEXT:
 {context_summary}
 CURRENT QUESTION: "{question}"
-RELEVANT INFORMATION FOUND:
+SEARCH RESULTS:
 {results_text}
-Please provide a helpful, natural explanation that answers their question. Write as if you're having a friendly conversation with a colleague who's exploring this project.
+Please provide a detailed analysis in JSON format. Think through the problem carefully and consider the conversation context:
-Structure your response to include:
+{{
-1. A clear explanation of what you found and how it answers their question
+    "summary": "2-3 sentences explaining what you found and how it relates to the question",
-2. The most important insights from the information you discovered
+    "key_points": [
-3. Relevant examples or code patterns when helpful
+        "Important insight 1 (reference specific code/files)",
-4. Practical next steps they could take
+        "Important insight 2 (explain relationships)", 
        "Important insight 3 (consider conversation context)"
    ],
    "code_examples": [
        "Relevant code snippet or pattern with explanation",
        "Another important code example with context"
    ],
    "suggested_actions": [
        "Specific next step the developer should take",
        "Follow-up investigation or debugging approach",
        "Potential improvements or fixes"
    ],
    "confidence": 0.85
 }}
-Guidelines:
+Focus on:
- Write in a conversational, friendly tone
+- Deep technical analysis with reasoning
- Be educational but not condescending
+- How this connects to previous questions in our conversation
- Reference specific files and information when helpful
+- Practical debugging/learning insights
- Give practical, actionable suggestions
+- Specific code references and explanations
- Connect everything back to their original question
+- Clear next steps for the developer
- Use natural language, not structured formats
+
- Break complex topics into understandable pieces
+Think carefully about the relationships between code components and how they answer the question in context."""
 """
        return prompt
    def _synthesize_with_context(self, prompt: str, results: List[Any]) -> SynthesisResult:
        """Synthesize results with full context and thinking."""
        try:
-            # Use streaming with thinking visible (don't collapse)
+            # Use thinking-enabled synthesis with lower temperature for exploration
-            response = self.synthesizer._call_ollama(
+            response = self.synthesizer._call_ollama(prompt, temperature=0.2)
                prompt,
                temperature=0.2,
                disable_thinking=False,
                use_streaming=True,
                collapse_thinking=False,
            )
            # ""  # Unused variable removed
            # Streaming already shows thinking and response
            # No need for additional indicators
            if not response:
                return SynthesisResult(
@ -246,16 +223,42 @@ Guidelines:
                    key_points=[],
                    code_examples=[],
                    suggested_actions=["Check LLM service status"],
-                    confidence=0.0,
+                    confidence=0.0
                )
-            # Use natural language response directly
+            # Parse the structured response
            try:
                # Extract JSON from response
                start_idx = response.find('{')
                end_idx = response.rfind('}') + 1
                if start_idx >= 0 and end_idx > start_idx:
                    json_str = response[start_idx:end_idx]
                    data = json.loads(json_str)
                    return SynthesisResult(
-                summary=response.strip(),
+                        summary=data.get('summary', 'Analysis completed'),
-                key_points=[],  # Not used with natural language responses
+                        key_points=data.get('key_points', []),
-                code_examples=[],  # Not used with natural language responses
+                        code_examples=data.get('code_examples', []),
-                suggested_actions=[],  # Not used with natural language responses
+                        suggested_actions=data.get('suggested_actions', []),
-                confidence=0.85,  # High confidence for natural responses
+                        confidence=float(data.get('confidence', 0.7))
                    )
                else:
                    # Fallback: use raw response as summary
                    return SynthesisResult(
                        summary=response[:400] + '...' if len(response) > 400 else response,
                        key_points=[],
                        code_examples=[],
                        suggested_actions=[],
                        confidence=0.5
                    )
            except json.JSONDecodeError:
                return SynthesisResult(
                    summary="Analysis completed but format parsing failed",
                    key_points=[],
                    code_examples=[],
                    suggested_actions=["Try rephrasing your question"],
                    confidence=0.3
                )
        except Exception as e:
@ -265,17 +268,11 @@ Guidelines:
                key_points=[],
                code_examples=[],
                suggested_actions=["Check system status and try again"],
-                confidence=0.0,
+                confidence=0.0
            )
-    def _format_exploration_response(
+    def _format_exploration_response(self, question: str, synthesis: SynthesisResult, 
-        self,
+                                   result_count: int, search_time: float, synthesis_time: float) -> str:
        question: str,
        synthesis: SynthesisResult,
        result_count: int,
        search_time: float,
        synthesis_time: float,
    ) -> str:
        """Format exploration response with context indicators."""
        output = []
@ -285,31 +282,38 @@ Guidelines:
        exchange_count = len(self.current_session.conversation_history)
        output.append(f"🧠 EXPLORATION ANALYSIS (Question #{exchange_count})")
-        output.append(
+        output.append(f"Session: {session_duration/60:.1f}m | Results: {result_count} | "
-            f"Session: {session_duration/60:.1f}m | Results: {result_count} | "
+                     f"Time: {search_time+synthesis_time:.1f}s")
            f"Time: {search_time+synthesis_time:.1f}s"
        )
        output.append("=" * 60)
        output.append("")
-        # Response was already displayed via streaming
+        # Main analysis
-        # Just show completion status
+        output.append(f"📝 Analysis:")
-        output.append("✅ Analysis complete")
+        output.append(f"   {synthesis.summary}")
        output.append("")
        if synthesis.key_points:
            output.append("🔍 Key Insights:")
            for point in synthesis.key_points:
                output.append(f"   • {point}")
            output.append("")
        if synthesis.code_examples:
            output.append("💡 Code Examples:")
            for example in synthesis.code_examples:
                output.append(f"   {example}")
            output.append("")
        if synthesis.suggested_actions:
            output.append("🎯 Next Steps:")
            for action in synthesis.suggested_actions:
                output.append(f"   • {action}")
            output.append("")
        # Confidence and context indicator
-        confidence_emoji = (
+        confidence_emoji = "🟢" if synthesis.confidence > 0.7 else "🟡" if synthesis.confidence > 0.4 else "🔴"
-            "🟢"
+        context_indicator = f" | Context: {exchange_count-1} previous questions" if exchange_count > 1 else ""
-            if synthesis.confidence > 0.7
+        output.append(f"{confidence_emoji} Confidence: {synthesis.confidence:.1%}{context_indicator}")
            else "🟡" if synthesis.confidence > 0.4 else "🔴"
        )
        context_indicator = (
            f" | Context: {exchange_count-1} previous questions" if exchange_count > 1 else ""
        )
        output.append(
            f"{confidence_emoji} Confidence: {synthesis.confidence:.1%}{context_indicator}"
        )
        return "\n".join(output)
@ -322,23 +326,19 @@ Guidelines:
        exchange_count = len(self.current_session.conversation_history)
        summary = [
-            "🧠 EXPLORATION SESSION SUMMARY",
+            f"🧠 EXPLORATION SESSION SUMMARY",
-            "=" * 40,
+            f"=" * 40,
            f"Project: {self.project_path.name}",
            f"Session ID: {self.current_session.session_id}",
            f"Duration: {duration/60:.1f} minutes",
            f"Questions explored: {exchange_count}",
-            "",
+            f"",
        ]
        if exchange_count > 0:
            summary.append("📋 Topics explored:")
            for i, exchange in enumerate(self.current_session.conversation_history, 1):
-                question = (
+                question = exchange["question"][:50] + "..." if len(exchange["question"]) > 50 else exchange["question"]
                    exchange["question"][:50] + "..."
                    if len(exchange["question"]) > 50
                    else exchange["question"]
                )
                confidence = exchange["response"]["confidence"]
                summary.append(f"   {i}. {question} (confidence: {confidence:.1%})")
@ -362,7 +362,9 @@ Guidelines:
            # Test with a simple thinking prompt to see response quality
            test_response = self.synthesizer._call_ollama(
-                "Think briefly: what is 2+2?", temperature=0.1, disable_thinking=False
+                "Think briefly: what is 2+2?", 
                temperature=0.1, 
                disable_thinking=False
            )
            if test_response:
@ -378,35 +380,24 @@ Guidelines:
    def _handle_model_restart(self) -> bool:
        """Handle user confirmation and model restart."""
        try:
-            print(
+            print("\n🤔 To ensure best thinking quality, exploration mode works best with a fresh model.")
                "\n🤔 To ensure best thinking quality, exploration mode works best with a fresh model."
            )
            print(f"   Currently running: {self.synthesizer.model}")
-            print(
+            print("\n💡 Stop current model and restart for optimal exploration? (y/N): ", end="", flush=True)
                "\n💡 Stop current model and restart for optimal exploration? (y/N): ",
                end="",
                flush=True,
            )
            response = input().strip().lower()
-            if response in ["y", "yes"]:
+            if response in ['y', 'yes']:
                print("\n🔄 Stopping current model...")
                # Use ollama stop command for clean model restart
                import subprocess
                try:
-                    subprocess.run(
+                    subprocess.run([
-                        ["ollama", "stop", self.synthesizer.model],
+                        "ollama", "stop", self.synthesizer.model
-                        timeout=10,
+                    ], timeout=10, capture_output=True)
                        capture_output=True,
                    )
                    print("✅ Model stopped successfully.")
-                    print(
+                    print("🚀 Exploration mode will restart the model with thinking enabled...")
                        "🚀 Exploration mode will restart the model with thinking enabled..."
                    )
                    # Reset synthesizer initialization to force fresh start
                    self.synthesizer._initialized = False
@ -432,207 +423,7 @@ Guidelines:
            print("\n📝 Continuing with current model...")
            return False
    def _call_ollama_with_thinking(self, prompt: str, temperature: float = 0.3) -> tuple:
        """Call Ollama with streaming for fast time-to-first-token."""
        import requests
        try:
            # Use the synthesizer's model and connection
            model_to_use = self.synthesizer.model
            if self.synthesizer.model not in self.synthesizer.available_models:
                if self.synthesizer.available_models:
                    model_to_use = self.synthesizer.available_models[0]
                else:
                    return None, None
            # Enable thinking by NOT adding <no_think>
            final_prompt = prompt
            # Get optimal parameters for this model
            from .llm_optimization import get_optimal_ollama_parameters
            optimal_params = get_optimal_ollama_parameters(model_to_use)
            payload = {
                "model": model_to_use,
                "prompt": final_prompt,
                "stream": True,  # Enable streaming for fast response
                "options": {
                    "temperature": temperature,
                    "top_p": optimal_params.get("top_p", 0.9),
                    "top_k": optimal_params.get("top_k", 40),
                    "num_ctx": self.synthesizer._get_optimal_context_size(model_to_use),
                    "num_predict": optimal_params.get("num_predict", 2000),
                    "repeat_penalty": optimal_params.get("repeat_penalty", 1.1),
                    "presence_penalty": optimal_params.get("presence_penalty", 1.0),
                },
            }
            response = requests.post(
                f"{self.synthesizer.ollama_url}/api/generate",
                json=payload,
                stream=True,
                timeout=65,
            )
            if response.status_code == 200:
                # Collect streaming response
                raw_response = ""
                thinking_displayed = False
                for line in response.iter_lines():
                    if line:
                        try:
                            chunk_data = json.loads(line.decode("utf-8"))
                            chunk_text = chunk_data.get("response", "")
                            if chunk_text:
                                raw_response += chunk_text
                                # Display thinking stream as it comes in
                                if not thinking_displayed and "<think>" in raw_response:
                                    # Start displaying thinking
                                    self._start_thinking_display()
                                    thinking_displayed = True
                                if thinking_displayed:
                                    self._stream_thinking_chunk(chunk_text)
                            if chunk_data.get("done", False):
                                break
                        except json.JSONDecodeError:
                            continue
                # Finish thinking display if it was shown
                if thinking_displayed:
                    self._end_thinking_display()
                # Extract thinking stream and final response
                thinking_stream, final_response = self._extract_thinking(raw_response)
                return final_response, thinking_stream
            else:
                return None, None
        except Exception as e:
            logger.error(f"Thinking-enabled Ollama call failed: {e}")
            return None, None
    def _extract_thinking(self, raw_response: str) -> tuple:
        """Extract thinking content from response."""
        thinking_stream = ""
        final_response = raw_response
        # Look for thinking patterns
        if "<think>" in raw_response and "</think>" in raw_response:
            # Extract thinking content between tags
            start_tag = raw_response.find("<think>")
            end_tag = raw_response.find("</think>") + len("</think>")
            if start_tag != -1 and end_tag != -1:
                thinking_content = raw_response[start_tag + 7 : end_tag - 8]  # Remove tags
                thinking_stream = thinking_content.strip()
                # Remove thinking from final response
                final_response = (raw_response[:start_tag] + raw_response[end_tag:]).strip()
        # Alternative patterns for models that use different thinking formats
        elif "Let me think" in raw_response or "I need to analyze" in raw_response:
            # Simple heuristic: first paragraph might be thinking
            lines = raw_response.split("\n")
            potential_thinking = []
            final_lines = []
            thinking_indicators = [
                "Let me think",
                "I need to",
                "First, I'll",
                "Looking at",
                "Analyzing",
            ]
            in_thinking = False
            for line in lines:
                if any(indicator in line for indicator in thinking_indicators):
                    in_thinking = True
                    potential_thinking.append(line)
                elif in_thinking and (
                    line.startswith("{") or line.startswith("**") or line.startswith("#")
                ):
                    # Likely end of thinking, start of structured response
                    in_thinking = False
                    final_lines.append(line)
                elif in_thinking:
                    potential_thinking.append(line)
                else:
                    final_lines.append(line)
            if potential_thinking:
                thinking_stream = "\n".join(potential_thinking).strip()
                final_response = "\n".join(final_lines).strip()
        return thinking_stream, final_response
    def _start_thinking_display(self):
        """Start the thinking stream display."""
        print("\n\033[2m\033[3m💭 AI Thinking:\033[0m")
        print("\033[2m\033[3m" + "─" * 40 + "\033[0m")
        self._thinking_buffer = ""
        self._in_thinking_tags = False
    def _stream_thinking_chunk(self, chunk: str):
        """Stream a chunk of thinking as it arrives."""
        self._thinking_buffer += chunk
        # Check if we're in thinking tags
        if "<think>" in self._thinking_buffer and not self._in_thinking_tags:
            self._in_thinking_tags = True
            # Display everything after <think>
            start_idx = self._thinking_buffer.find("<think>") + 7
            thinking_content = self._thinking_buffer[start_idx:]
            if thinking_content:
                print(f"\033[2m\033[3m{thinking_content}\033[0m", end="", flush=True)
        elif self._in_thinking_tags and "</think>" not in chunk:
            # We're in thinking mode, display the chunk
            print(f"\033[2m\033[3m{chunk}\033[0m", end="", flush=True)
        elif "</think>" in self._thinking_buffer:
            # End of thinking
            self._in_thinking_tags = False
    def _end_thinking_display(self):
        """End the thinking stream display."""
        print("\n\033[2m\033[3m" + "─" * 40 + "\033[0m")
        print()
    def _display_thinking_stream(self, thinking_stream: str):
        """Display thinking stream in light gray and italic (fallback for non-streaming)."""
        if not thinking_stream:
            return
        print("\n\033[2m\033[3m💭 AI Thinking:\033[0m")
        print("\033[2m\033[3m" + "─" * 40 + "\033[0m")
        # Split into paragraphs and display with proper formatting
        paragraphs = thinking_stream.split("\n\n")
        for para in paragraphs:
            if para.strip():
                # Wrap long lines nicely
                lines = para.strip().split("\n")
                for line in lines:
                    if line.strip():
                        # Light gray and italic
                        print(f"\033[2m\033[3m{line}\033[0m")
                print()  # Paragraph spacing
        print("\033[2m\033[3m" + "─" * 40 + "\033[0m")
        print()
 # Quick test function
 def test_explorer():
    """Test the code explorer."""
    explorer = CodeExplorer(Path("."))
@ -648,6 +439,5 @@ def test_explorer():
    print("\n" + explorer.end_session())
 if __name__ == "__main__":
    test_explorer()
--- a/mini_rag/fast_server.py
+++ b/mini_rag/fast_server.py
@ -12,47 +12,40 @@ Drop-in replacement for the original server with:
 """
 import json
 import logging
 import os
 import socket
 import subprocess
 import sys
 import threading
 import time
-from concurrent.futures import Future, ThreadPoolExecutor
+import subprocess
 import sys
 import os
 import logging
 from pathlib import Path
-from typing import Any, Callable, Dict, Optional
+from typing import Dict, Any, Optional, Callable
-
+from datetime import datetime
-from rich import print as rprint
+from concurrent.futures import ThreadPoolExecutor, Future
 import queue
 # Rich console for beautiful output
 from rich.console import Console
-from rich.live import Live
+from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TimeRemainingColumn, MofNCompleteColumn
 from rich.panel import Panel
 from rich.progress import (
    BarColumn,
    MofNCompleteColumn,
    Progress,
    SpinnerColumn,
    TextColumn,
    TimeRemainingColumn,
 )
 from rich.table import Table
 from rich.live import Live
 from rich import print as rprint
 # Fix Windows console first
-if sys.platform == "win32":
+if sys.platform == 'win32':
-    os.environ["PYTHONUTF8"] = "1"
+    os.environ['PYTHONUTF8'] = '1'
    try:
        from .windows_console_fix import fix_windows_console
        fix_windows_console()
-    except (ImportError, OSError):
+    except:
        pass
 from .indexer import ProjectIndexer
 from .ollama_embeddings import OllamaEmbedder as CodeEmbedder
 from .performance import PerformanceMonitor
 from .search import CodeSearcher
 from .ollama_embeddings import OllamaEmbedder as CodeEmbedder
 from .indexer import ProjectIndexer
 from .performance import PerformanceMonitor
 logger = logging.getLogger(__name__)
 console = Console()
@ -96,14 +89,14 @@ class ServerStatus:
    def get_status(self) -> Dict[str, Any]:
        """Get complete status as dict"""
        return {
-            "phase": self.phase,
+            'phase': self.phase,
-            "progress": self.progress,
+            'progress': self.progress,
-            "message": self.message,
+            'message': self.message,
-            "ready": self.ready,
+            'ready': self.ready,
-            "error": self.error,
+            'error': self.error,
-            "uptime": time.time() - self.start_time,
+            'uptime': time.time() - self.start_time,
-            "health_checks": self.health_checks,
+            'health_checks': self.health_checks,
-            "details": self.details,
+            'details': self.details
        }
@ -158,7 +151,7 @@ class FastRAGServer:
            # Quick port check first
            test_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            test_sock.settimeout(1.0)  # Faster timeout
-            result = test_sock.connect_ex(("localhost", self.port))
+            result = test_sock.connect_ex(('localhost', self.port))
            test_sock.close()
            if result != 0:  # Port is free
@ -168,43 +161,36 @@ class FastRAGServer:
            self.status.update("port_cleanup", 10, f"Clearing port {self.port}...")
            self._notify_status()
-            if sys.platform == "win32":
+            if sys.platform == 'win32':
                # Windows: Enhanced process killing
-                cmd = ["netstat", "-ano"]
+                cmd = ['netstat', '-ano']
                result = subprocess.run(cmd, capture_output=True, text=True, timeout=5)
-                for line in result.stdout.split("\n"):
+                for line in result.stdout.split('\n'):
-                    if f":{self.port}" in line and "LISTENING" in line:
+                    if f':{self.port}' in line and 'LISTENING' in line:
                        parts = line.split()
                        if len(parts) >= 5:
                            pid = parts[-1]
                            console.print(f"[dim]Killing process {pid}[/dim]")
-                            subprocess.run(
+                            subprocess.run(['taskkill', '/PID', pid, '/F'], 
-                                ["taskkill", "/PID", pid, "/F"],
+                                         capture_output=True, timeout=3)
                                capture_output=True,
                                timeout=3,
                            )
                            time.sleep(0.5)  # Reduced wait time
                            break
            else:
                # Unix/Linux: Enhanced process killing
-                result = subprocess.run(
+                result = subprocess.run(['lsof', '-ti', f':{self.port}'], 
-                    ["lso", "-ti", f":{self.port}"],
+                                      capture_output=True, text=True, timeout=3)
                    capture_output=True,
                    text=True,
                    timeout=3,
                )
                if result.stdout.strip():
                    pids = result.stdout.strip().split()
                    for pid in pids:
                        console.print(f"[dim]Killing process {pid}[/dim]")
-                        subprocess.run(["kill", "-9", pid], capture_output=True)
+                        subprocess.run(['kill', '-9', pid], capture_output=True)
                    time.sleep(0.5)
            # Verify port is free
            test_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            test_sock.settimeout(1.0)
-            result = test_sock.connect_ex(("localhost", self.port))
+            result = test_sock.connect_ex(('localhost', self.port))
            test_sock.close()
            if result == 0:
@ -220,30 +206,25 @@ class FastRAGServer:
    def _check_indexing_needed(self) -> bool:
        """Quick check if indexing is needed"""
-        rag_dir = self.project_path / ".mini-rag"
+        rag_dir = self.project_path / '.mini-rag'
        if not rag_dir.exists():
            return True
        # Check if database exists and is not empty
-        db_path = rag_dir / "code_vectors.lance"
+        db_path = rag_dir / 'code_vectors.lance'
        if not db_path.exists():
            return True
        # Quick file count check
        try:
            import lancedb
        except ImportError:
            # If LanceDB not available, assume index is empty and needs creation
            return True
        try:
            db = lancedb.connect(rag_dir)
-            if "code_vectors" not in db.table_names():
+            if 'code_vectors' not in db.table_names():
                return True
-            table = db.open_table("code_vectors")
+            table = db.open_table('code_vectors')
            count = table.count_rows()
            return count == 0
-        except (OSError, IOError, ValueError, AttributeError):
+        except:
            return True
    def _fast_index(self) -> bool:
@ -256,7 +237,7 @@ class FastRAGServer:
            self.indexer = ProjectIndexer(
                self.project_path,
                embedder=self.embedder,  # Reuse loaded embedder
-                max_workers=min(4, os.cpu_count() or 2),
+                max_workers=min(4, os.cpu_count() or 2)
            )
            console.print("\n[bold cyan]🚀 Fast Indexing Starting...[/bold cyan]")
@ -281,14 +262,11 @@ class FastRAGServer:
                    if total_files == 0:
                        self.status.update("indexing", 80, "Index up to date")
-                        return {
+                        return {'files_indexed': 0, 'chunks_created': 0, 'time_taken': 0}
                            "files_indexed": 0,
                            "chunks_created": 0,
                            "time_taken": 0,
                        }
                    task = progress.add_task(
-                        f"[cyan]Indexing {total_files} files...", total=total_files
+                        f"[cyan]Indexing {total_files} files...",
                        total=total_files
                    )
                    # Track progress by hooking into the processor
@ -299,11 +277,8 @@ class FastRAGServer:
                        while processed_count < total_files and self.running:
                            time.sleep(0.1)  # Fast polling
                            current_progress = (processed_count / total_files) * 60 + 20
-                            self.status.update(
+                            self.status.update("indexing", current_progress, 
-                                "indexing",
+                                             f"Indexed {processed_count}/{total_files} files")
                                current_progress,
                                f"Indexed {processed_count}/{total_files} files",
                            )
                            progress.update(task, completed=processed_count)
                            self._notify_status()
@ -334,18 +309,13 @@ class FastRAGServer:
                # Run indexing
                stats = self.indexer.index_project(force_reindex=False)
-                self.status.update(
+                self.status.update("indexing", 80, 
                    "indexing",
                    80,
                                 f"Indexed {stats.get('files_indexed', 0)} files, "
-                    f"created {stats.get('chunks_created', 0)} chunks",
+                                 f"created {stats.get('chunks_created', 0)} chunks")
                )
                self._notify_status()
-                console.print(
+                console.print(f"\n[green]✅ Indexing complete: {stats.get('files_indexed', 0)} files, "
-                    f"\n[green]✅ Indexing complete: {stats.get('files_indexed', 0)} files, "
+                            f"{stats.get('chunks_created', 0)} chunks in {stats.get('time_taken', 0):.1f}s[/green]")
                    f"{stats.get('chunks_created', 0)} chunks in {stats.get('time_taken', 0):.1f}s[/green]"
                )
                return True
@ -372,9 +342,7 @@ class FastRAGServer:
            ) as progress:
                # Task 1: Load embedder (this takes the most time)
-                embedder_task = progress.add_task(
+                embedder_task = progress.add_task("[cyan]Loading embedding model...", total=100)
                    "[cyan]Loading embedding model...", total=100
                )
                def load_embedder():
                    self.status.update("embedder", 25, "Loading embedding model...")
@ -428,46 +396,46 @@ class FastRAGServer:
            # Check 1: Embedder functionality
            if self.embedder:
                test_embedding = self.embedder.embed_code("def test(): pass")
-                checks["embedder"] = {
+                checks['embedder'] = {
-                    "status": "healthy",
+                    'status': 'healthy',
-                    "embedding_dim": len(test_embedding),
+                    'embedding_dim': len(test_embedding),
-                    "model": getattr(self.embedder, "model_name", "unknown"),
+                    'model': getattr(self.embedder, 'model_name', 'unknown')
                }
            else:
-                checks["embedder"] = {"status": "missing"}
+                checks['embedder'] = {'status': 'missing'}
            # Check 2: Database connectivity
            if self.searcher:
                stats = self.searcher.get_statistics()
-                checks["database"] = {
+                checks['database'] = {
-                    "status": "healthy",
+                    'status': 'healthy',
-                    "chunks": stats.get("total_chunks", 0),
+                    'chunks': stats.get('total_chunks', 0),
-                    "languages": len(stats.get("languages", {})),
+                    'languages': len(stats.get('languages', {}))
                }
            else:
-                checks["database"] = {"status": "missing"}
+                checks['database'] = {'status': 'missing'}
            # Check 3: Search functionality
            if self.searcher:
                test_results = self.searcher.search("test query", top_k=1)
-                checks["search"] = {
+                checks['search'] = {
-                    "status": "healthy",
+                    'status': 'healthy',
-                    "test_results": len(test_results),
+                    'test_results': len(test_results)
                }
            else:
-                checks["search"] = {"status": "unavailable"}
+                checks['search'] = {'status': 'unavailable'}
            # Check 4: Port availability
            try:
                test_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-                test_sock.bind(("localhost", self.port))
+                test_sock.bind(('localhost', self.port))
                test_sock.close()
-                checks["port"] = {"status": "available"}
+                checks['port'] = {'status': 'available'}
-            except (ConnectionError, OSError, TypeError, ValueError, socket.error):
+            except:
-                checks["port"] = {"status": "occupied"}
+                checks['port'] = {'status': 'occupied'}
        except Exception as e:
-            checks["health_check_error"] = str(e)
+            checks['health_check_error'] = str(e)
        self.status.health_checks = checks
        self.last_health_check = time.time()
@ -479,10 +447,10 @@ class FastRAGServer:
        table.add_column("Details", style="dim")
        for component, info in checks.items():
-            status = info.get("status", "unknown")
+            status = info.get('status', 'unknown')
-            details = ", ".join([f"{k}={v}" for k, v in info.items() if k != "status"])
+            details = ', '.join([f"{k}={v}" for k, v in info.items() if k != 'status'])
-            color = "green" if status in ["healthy", "available"] else "yellow"
+            color = "green" if status in ['healthy', 'available'] else "yellow"
            table.add_row(component, f"[{color}]{status}[/{color}]", details)
        console.print(table)
@ -506,7 +474,7 @@ class FastRAGServer:
            self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-            self.socket.bind(("localhost", self.port))
+            self.socket.bind(('localhost', self.port))
            self.socket.listen(10)  # Increased backlog
            self.running = True
@ -518,15 +486,15 @@ class FastRAGServer:
            # Display ready status
            panel = Panel(
-                "[bold green]🎉 RAG Server Ready![/bold green]\n\n"
+                f"[bold green]🎉 RAG Server Ready![/bold green]\n\n"
                f"🌐 Address: localhost:{self.port}\n"
                f"⚡ Startup Time: {total_time:.2f}s\n"
                f"📁 Project: {self.project_path.name}\n"
                f"🧠 Model: {getattr(self.embedder, 'model_name', 'default')}\n"
                f"📊 Chunks Indexed: {self.status.health_checks.get('database', {}).get('chunks', 0)}\n\n"
-                "[dim]Ready to serve the development environment queries...[/dim]",
+                f"[dim]Ready to serve the development environment queries...[/dim]",
                title="🚀 Server Status",
-                border_style="green",
+                border_style="green"
            )
            console.print(panel)
@ -574,21 +542,24 @@ class FastRAGServer:
            request = json.loads(data)
            # Handle different request types
-            if request.get("command") == "shutdown":
+            if request.get('command') == 'shutdown':
                console.print("\n[yellow]🛑 Shutdown requested[/yellow]")
-                response = {"success": True, "message": "Server shutting down"}
+                response = {'success': True, 'message': 'Server shutting down'}
                self._send_json(client, response)
                self.stop()
                return
-            if request.get("command") == "status":
+            if request.get('command') == 'status':
-                response = {"success": True, "status": self.status.get_status()}
+                response = {
                    'success': True,
                    'status': self.status.get_status()
                }
                self._send_json(client, response)
                return
            # Handle search requests
-            query = request.get("query", "")
+            query = request.get('query', '')
-            top_k = request.get("top_k", 10)
+            top_k = request.get('top_k', 10)
            if not query:
                raise ValueError("Empty query")
@ -596,9 +567,7 @@ class FastRAGServer:
            self.query_count += 1
            # Enhanced query logging
-            console.print(
+            console.print(f"[blue]🔍 Query #{self.query_count}:[/blue] [dim]{query[:50]}{'...' if len(query) > 50 else ''}[/dim]")
                f"[blue]🔍 Query #{self.query_count}:[/blue] [dim]{query[:50]}{'...' if len(query) > 50 else ''}[/dim]"
            )
            # Perform search with timing
            start = time.time()
@ -607,81 +576,79 @@ class FastRAGServer:
            # Enhanced response
            response = {
-                "success": True,
+                'success': True,
-                "query": query,
+                'query': query,
-                "count": len(results),
+                'count': len(results),
-                "search_time_ms": int(search_time * 1000),
+                'search_time_ms': int(search_time * 1000),
-                "results": [r.to_dict() for r in results],
+                'results': [r.to_dict() for r in results],
-                "server_uptime": int(time.time() - self.status.start_time),
+                'server_uptime': int(time.time() - self.status.start_time),
-                "total_queries": self.query_count,
+                'total_queries': self.query_count,
-                "server_status": "ready",
+                'server_status': 'ready'
            }
            self._send_json(client, response)
            # Enhanced result logging
-            console.print(
+            console.print(f"[green]✅ {len(results)} results in {search_time*1000:.0f}ms[/green]")
                f"[green]✅ {len(results)} results in {search_time*1000:.0f}ms[/green]"
            )
        except Exception as e:
            error_msg = str(e)
            logger.error(f"Client handler error: {error_msg}")
            error_response = {
-                "success": False,
+                'success': False,
-                "error": error_msg,
+                'error': error_msg,
-                "error_type": type(e).__name__,
+                'error_type': type(e).__name__,
-                "server_status": self.status.phase,
+                'server_status': self.status.phase
            }
            try:
                self._send_json(client, error_response)
-            except (TypeError, ValueError):
+            except:
                pass
            console.print(f"[red]❌ Query failed: {error_msg}[/red]")
        finally:
            try:
                client.close()
-            except (ConnectionError, OSError, TypeError, ValueError, socket.error):
+            except:
                pass
    def _receive_json(self, sock: socket.socket) -> str:
        """Receive JSON with length prefix and timeout handling"""
        try:
            # Receive length (4 bytes)
-            length_data = b""
+            length_data = b''
            while len(length_data) < 4:
                chunk = sock.recv(4 - len(length_data))
                if not chunk:
                    raise ConnectionError("Connection closed while receiving length")
                length_data += chunk
-            length = int.from_bytes(length_data, "big")
+            length = int.from_bytes(length_data, 'big')
            if length > 10_000_000:  # 10MB limit
                raise ValueError(f"Message too large: {length} bytes")
            # Receive data
-            data = b""
+            data = b''
            while len(data) < length:
                chunk = sock.recv(min(65536, length - len(data)))
                if not chunk:
                    raise ConnectionError("Connection closed while receiving data")
                data += chunk
-            return data.decode("utf-8")
+            return data.decode('utf-8')
        except socket.timeout:
            raise ConnectionError("Timeout while receiving data")
    def _send_json(self, sock: socket.socket, data: dict):
        """Send JSON with length prefix"""
-        json_str = json.dumps(data, ensure_ascii=False, separators=(",", ":"))
+        json_str = json.dumps(data, ensure_ascii=False, separators=(',', ':'))
-        json_bytes = json_str.encode("utf-8")
+        json_bytes = json_str.encode('utf-8')
        # Send length prefix
        length = len(json_bytes)
-        sock.send(length.to_bytes(4, "big"))
+        sock.send(length.to_bytes(4, 'big'))
        # Send data
        sock.sendall(json_bytes)
@ -695,7 +662,7 @@ class FastRAGServer:
        if self.socket:
            try:
                self.socket.close()
-            except (ConnectionError, OSError, TypeError, ValueError, socket.error):
+            except:
                pass
        # Shutdown executor
@ -705,8 +672,6 @@ class FastRAGServer:
 # Enhanced client with status monitoring
 class FastRAGClient:
    """Enhanced client with better error handling and status monitoring"""
@ -719,9 +684,9 @@ class FastRAGClient:
        try:
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            sock.settimeout(self.timeout)
-            sock.connect(("localhost", self.port))
+            sock.connect(('localhost', self.port))
-            request = {"query": query, "top_k": top_k}
+            request = {'query': query, 'top_k': top_k}
            self._send_json(sock, request)
            data = self._receive_json(sock)
@ -732,27 +697,31 @@ class FastRAGClient:
        except ConnectionRefusedError:
            return {
-                "success": False,
+                'success': False,
-                "error": "RAG server not running. Start with: python -m mini_rag server",
+                'error': 'RAG server not running. Start with: python -m mini_rag server',
-                "error_type": "connection_refused",
+                'error_type': 'connection_refused'
            }
        except socket.timeout:
            return {
-                "success": False,
+                'success': False,
-                "error": f"Request timed out after {self.timeout}s",
+                'error': f'Request timed out after {self.timeout}s',
-                "error_type": "timeout",
+                'error_type': 'timeout'
            }
        except Exception as e:
-            return {"success": False, "error": str(e), "error_type": type(e).__name__}
+            return {
                'success': False,
                'error': str(e),
                'error_type': type(e).__name__
            }
    def get_status(self) -> Dict[str, Any]:
        """Get detailed server status"""
        try:
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            sock.settimeout(5.0)
-            sock.connect(("localhost", self.port))
+            sock.connect(('localhost', self.port))
-            request = {"command": "status"}
+            request = {'command': 'status'}
            self._send_json(sock, request)
            data = self._receive_json(sock)
@ -762,14 +731,18 @@ class FastRAGClient:
            return response
        except Exception as e:
-            return {"success": False, "error": str(e), "server_running": False}
+            return {
                'success': False,
                'error': str(e),
                'server_running': False
            }
    def is_running(self) -> bool:
        """Enhanced server detection"""
        try:
            status = self.get_status()
-            return status.get("success", False)
+            return status.get('success', False)
-        except (TypeError, ValueError):
+        except:
            return False
    def shutdown(self) -> Dict[str, Any]:
@ -777,9 +750,9 @@ class FastRAGClient:
        try:
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            sock.settimeout(10.0)
-            sock.connect(("localhost", self.port))
+            sock.connect(('localhost', self.port))
-            request = {"command": "shutdown"}
+            request = {'command': 'shutdown'}
            self._send_json(sock, request)
            data = self._receive_json(sock)
@ -789,38 +762,41 @@ class FastRAGClient:
            return response
        except Exception as e:
-            return {"success": False, "error": str(e)}
+            return {
                'success': False,
                'error': str(e)
            }
    def _send_json(self, sock: socket.socket, data: dict):
        """Send JSON with length prefix"""
-        json_str = json.dumps(data, ensure_ascii=False, separators=(",", ":"))
+        json_str = json.dumps(data, ensure_ascii=False, separators=(',', ':'))
-        json_bytes = json_str.encode("utf-8")
+        json_bytes = json_str.encode('utf-8')
        length = len(json_bytes)
-        sock.send(length.to_bytes(4, "big"))
+        sock.send(length.to_bytes(4, 'big'))
        sock.sendall(json_bytes)
    def _receive_json(self, sock: socket.socket) -> str:
        """Receive JSON with length prefix"""
        # Receive length
-        length_data = b""
+        length_data = b''
        while len(length_data) < 4:
            chunk = sock.recv(4 - len(length_data))
            if not chunk:
                raise ConnectionError("Connection closed")
            length_data += chunk
-        length = int.from_bytes(length_data, "big")
+        length = int.from_bytes(length_data, 'big')
        # Receive data
-        data = b""
+        data = b''
        while len(data) < length:
            chunk = sock.recv(min(65536, length - len(data)))
            if not chunk:
                raise ConnectionError("Connection closed")
            data += chunk
-        return data.decode("utf-8")
+        return data.decode('utf-8')
 def start_fast_server(project_path: Path, port: int = 7777, auto_index: bool = True):
--- a/mini_rag/indexer.py
+++ b/mini_rag/indexer.py
@ -3,39 +3,23 @@ Parallel indexing engine for efficient codebase processing.
 Handles file discovery, chunking, embedding, and storage.
 """
 import hashlib
 import json
 import logging
 import os
 import json
 import hashlib
 import logging
 from pathlib import Path
 from typing import List, Dict, Any, Optional, Set, Tuple
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, List, Optional
 import numpy as np
 import lancedb
 import pandas as pd
 import pyarrow as pa
 from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TimeRemainingColumn
 from rich.console import Console
 from rich.progress import (
    BarColumn,
    Progress,
    SpinnerColumn,
    TextColumn,
    TimeRemainingColumn,
 )
 # Optional LanceDB import
 try:
    import lancedb
    import pyarrow as pa
    LANCEDB_AVAILABLE = True
 except ImportError:
    lancedb = None
    pa = None
    LANCEDB_AVAILABLE = False
 from .chunker import CodeChunker
 from .ollama_embeddings import OllamaEmbedder as CodeEmbedder
 from .chunker import CodeChunker, CodeChunk
 from .path_handler import normalize_path, normalize_relative_path
 logger = logging.getLogger(__name__)
@ -45,13 +29,11 @@ console = Console()
 class ProjectIndexer:
    """Indexes a project directory for semantic search."""
-    def __init__(
+    def __init__(self, 
        self,
                 project_path: Path,
                 embedder: Optional[CodeEmbedder] = None,
                 chunker: Optional[CodeChunker] = None,
-        max_workers: int = 4,
+                 max_workers: int = 4):
    ):
        """
        Initialize the indexer.
@ -62,9 +44,9 @@ class ProjectIndexer:
            max_workers: Number of parallel workers for indexing
        """
        self.project_path = Path(project_path).resolve()
-        self.rag_dir = self.project_path / ".mini-rag"
+        self.rag_dir = self.project_path / '.mini-rag'
-        self.manifest_path = self.rag_dir / "manifest.json"
+        self.manifest_path = self.rag_dir / 'manifest.json'
-        self.config_path = self.rag_dir / "config.json"
+        self.config_path = self.rag_dir / 'config.json'
        # Create RAG directory if it doesn't exist
        self.rag_dir.mkdir(exist_ok=True)
@ -81,75 +63,26 @@ class ProjectIndexer:
        # File patterns to include/exclude
        self.include_patterns = [
            # Code files
-            "*.py",
+            '*.py', '*.js', '*.jsx', '*.ts', '*.tsx',
-            "*.js",
+            '*.go', '*.java', '*.cpp', '*.c', '*.cs',
-            "*.jsx",
+            '*.rs', '*.rb', '*.php', '*.swift', '*.kt',
-            "*.ts",
+            '*.scala', '*.r', '*.m', '*.h', '*.hpp',
            "*.tsx",
            "*.go",
            "*.java",
            "*.cpp",
            "*.c",
            "*.cs",
            "*.rs",
            "*.rb",
            "*.php",
            "*.swift",
            "*.kt",
            "*.scala",
            "*.r",
            "*.m",
            "*.h",
            "*.hpp",
            # Documentation files
-            "*.md",
+            '*.md', '*.markdown', '*.rst', '*.txt',
-            "*.markdown",
+            '*.adoc', '*.asciidoc',
            "*.rst",
            "*.txt",
            "*.adoc",
            "*.asciidoc",
            # Config files
-            "*.json",
+            '*.json', '*.yaml', '*.yml', '*.toml', '*.ini',
-            "*.yaml",
+            '*.xml', '*.conf', '*.config',
            "*.yml",
            "*.toml",
            "*.ini",
            "*.xml",
            "*.con",
            "*.config",
            # Other text files
-            "README",
+            'README', 'LICENSE', 'CHANGELOG', 'AUTHORS',
-            "LICENSE",
+            'CONTRIBUTING', 'TODO', 'NOTES'
            "CHANGELOG",
            "AUTHORS",
            "CONTRIBUTING",
            "TODO",
            "NOTES",
        ]
        self.exclude_patterns = [
-            "__pycache__",
+            '__pycache__', '.git', 'node_modules', '.venv', 'venv',
-            ".git",
+            'env', 'dist', 'build', 'target', '.idea', '.vscode',
-            "node_modules",
+            '*.pyc', '*.pyo', '*.pyd', '.DS_Store', '*.so', '*.dll',
-            ".venv",
+            '*.dylib', '*.exe', '*.bin', '*.log', '*.lock'
            "venv",
            "env",
            "dist",
            "build",
            "target",
            ".idea",
            ".vscode",
            "*.pyc",
            "*.pyo",
            "*.pyd",
            ".DS_Store",
            "*.so",
            "*.dll",
            "*.dylib",
            "*.exe",
            "*.bin",
            "*.log",
            "*.lock",
        ]
        # Load existing manifest if it exists
@ -159,23 +92,23 @@ class ProjectIndexer:
        """Load existing manifest or create new one."""
        if self.manifest_path.exists():
            try:
-                with open(self.manifest_path, "r") as f:
+                with open(self.manifest_path, 'r') as f:
                    return json.load(f)
            except Exception as e:
                logger.warning(f"Failed to load manifest: {e}")
        return {
-            "version": "1.0",
+            'version': '1.0',
-            "indexed_at": None,
+            'indexed_at': None,
-            "file_count": 0,
+            'file_count': 0,
-            "chunk_count": 0,
+            'chunk_count': 0,
-            "files": {},
+            'files': {}
        }
    def _save_manifest(self):
        """Save manifest to disk."""
        try:
-            with open(self.manifest_path, "w") as f:
+            with open(self.manifest_path, 'w') as f:
                json.dump(self.manifest, f, indent=2)
        except Exception as e:
            logger.error(f"Failed to save manifest: {e}")
@ -184,7 +117,7 @@ class ProjectIndexer:
        """Load or create comprehensive configuration."""
        if self.config_path.exists():
            try:
-                with open(self.config_path, "r") as f:
+                with open(self.config_path, 'r') as f:
                    config = json.load(f)
                    # Apply any loaded settings
                    self._apply_config(config)
@ -197,57 +130,49 @@ class ProjectIndexer:
            "project": {
                "name": self.project_path.name,
                "description": f"RAG index for {self.project_path.name}",
-                "created_at": datetime.now().isoformat(),
+                "created_at": datetime.now().isoformat()
            },
            "embedding": {
                "provider": "ollama",
-                "model": (
+                "model": self.embedder.model_name if hasattr(self.embedder, 'model_name') else 'nomic-embed-text:latest',
                    self.embedder.model_name
                    if hasattr(self.embedder, "model_name")
                    else "nomic-embed-text:latest"
                ),
                "base_url": "http://localhost:11434",
                "batch_size": 4,
-                "max_workers": 4,
+                "max_workers": 4
            },
            "chunking": {
-                "max_size": (
+                "max_size": self.chunker.max_chunk_size if hasattr(self.chunker, 'max_chunk_size') else 2500,
-                    self.chunker.max_chunk_size
+                "min_size": self.chunker.min_chunk_size if hasattr(self.chunker, 'min_chunk_size') else 100,
                    if hasattr(self.chunker, "max_chunk_size")
                    else 2500
                ),
                "min_size": (
                    self.chunker.min_chunk_size
                    if hasattr(self.chunker, "min_chunk_size")
                    else 100
                ),
                "overlap": 100,
-                "strategy": "semantic",
+                "strategy": "semantic"
            },
            "streaming": {
                "enabled": True,
                "threshold_mb": 1,
                "chunk_size_kb": 64
            },
            "streaming": {"enabled": True, "threshold_mb": 1, "chunk_size_kb": 64},
            "files": {
                "include_patterns": self.include_patterns,
                "exclude_patterns": self.exclude_patterns,
                "max_file_size_mb": 50,
-                "encoding_fallbacks": ["utf-8", "latin-1", "cp1252", "utf-8-sig"],
+                "encoding_fallbacks": ["utf-8", "latin-1", "cp1252", "utf-8-sig"]
            },
            "indexing": {
                "parallel_workers": self.max_workers,
                "incremental": True,
                "track_changes": True,
-                "skip_binary": True,
+                "skip_binary": True
            },
            "search": {
-                "default_top_k": 10,
+                "default_limit": 10,
                "similarity_threshold": 0.7,
                "hybrid_search": True,
-                "bm25_weight": 0.3,
+                "bm25_weight": 0.3
            },
            "storage": {
                "compress_vectors": False,
                "index_type": "ivf_pq",
-                "cleanup_old_chunks": True,
+                "cleanup_old_chunks": True
-            },
+            }
        }
        # Save comprehensive config with nice formatting
@ -258,41 +183,31 @@ class ProjectIndexer:
        """Apply configuration settings to the indexer."""
        try:
            # Apply embedding settings
-            if "embedding" in config:
+            if 'embedding' in config:
-                emb_config = config["embedding"]
+                emb_config = config['embedding']
-                if hasattr(self.embedder, "model_name"):
+                if hasattr(self.embedder, 'model_name'):
-                    self.embedder.model_name = emb_config.get(
+                    self.embedder.model_name = emb_config.get('model', self.embedder.model_name)
-                        "model", self.embedder.model_name
+                if hasattr(self.embedder, 'base_url'):
-                    )
+                    self.embedder.base_url = emb_config.get('base_url', self.embedder.base_url)
                if hasattr(self.embedder, "base_url"):
                    self.embedder.base_url = emb_config.get("base_url", self.embedder.base_url)
            # Apply chunking settings
-            if "chunking" in config:
+            if 'chunking' in config:
-                chunk_config = config["chunking"]
+                chunk_config = config['chunking']
-                if hasattr(self.chunker, "max_chunk_size"):
+                if hasattr(self.chunker, 'max_chunk_size'):
-                    self.chunker.max_chunk_size = chunk_config.get(
+                    self.chunker.max_chunk_size = chunk_config.get('max_size', self.chunker.max_chunk_size)
-                        "max_size", self.chunker.max_chunk_size
+                if hasattr(self.chunker, 'min_chunk_size'):
-                    )
+                    self.chunker.min_chunk_size = chunk_config.get('min_size', self.chunker.min_chunk_size)
                if hasattr(self.chunker, "min_chunk_size"):
                    self.chunker.min_chunk_size = chunk_config.get(
                        "min_size", self.chunker.min_chunk_size
                    )
            # Apply file patterns
-            if "files" in config:
+            if 'files' in config:
-                file_config = config["files"]
+                file_config = config['files']
-                self.include_patterns = file_config.get(
+                self.include_patterns = file_config.get('include_patterns', self.include_patterns)
-                    "include_patterns", self.include_patterns
+                self.exclude_patterns = file_config.get('exclude_patterns', self.exclude_patterns)
                )
                self.exclude_patterns = file_config.get(
                    "exclude_patterns", self.exclude_patterns
                )
            # Apply indexing settings
-            if "indexing" in config:
+            if 'indexing' in config:
-                idx_config = config["indexing"]
+                idx_config = config['indexing']
-                self.max_workers = idx_config.get("parallel_workers", self.max_workers)
+                self.max_workers = idx_config.get('parallel_workers', self.max_workers)
        except Exception as e:
            logger.warning(f"Failed to apply some config settings: {e}")
@ -305,10 +220,10 @@ class ProjectIndexer:
                "_comment": "RAG System Configuration - Edit this file to customize indexing behavior",
                "_version": "2.0",
                "_docs": "See README.md for detailed configuration options",
-                **config,
+                **config
            }
-            with open(self.config_path, "w") as f:
+            with open(self.config_path, 'w') as f:
                json.dump(config_with_comments, f, indent=2, sort_keys=True)
            logger.info(f"Configuration saved to {self.config_path}")
@ -334,7 +249,7 @@ class ProjectIndexer:
        try:
            if file_path.stat().st_size > 1_000_000:
                return False
-        except (OSError, IOError):
+        except:
            return False
        # Check exclude patterns first
@ -358,33 +273,21 @@ class ProjectIndexer:
        """Check if an extensionless file should be indexed based on content."""
        try:
            # Read first 1KB to check content
-            with open(file_path, "rb") as f:
+            with open(file_path, 'rb') as f:
                first_chunk = f.read(1024)
            # Check if it's a text file (not binary)
            try:
-                text_content = first_chunk.decode("utf-8")
+                text_content = first_chunk.decode('utf-8')
            except UnicodeDecodeError:
                return False  # Binary file, skip
            # Check for code indicators
            code_indicators = [
-                "#!/usr/bin/env python",
+                '#!/usr/bin/env python', '#!/usr/bin/python', '#!.*python',
-                "#!/usr/bin/python",
+                'import ', 'from ', 'def ', 'class ', 'if __name__',
-                "#!.*python",
+                'function ', 'var ', 'const ', 'let ', 'package main',
-                "import ",
+                'public class', 'private class', 'public static void'
                "from ",
                "def ",
                "class ",
                "if __name__",
                "function ",
                "var ",
                "const ",
                "let ",
                "package main",
                "public class",
                "private class",
                "public static void",
            ]
            text_lower = text_content.lower()
@ -394,15 +297,8 @@ class ProjectIndexer:
            # Check for configuration files
            config_indicators = [
-                "#!/bin/bash",
+                '#!/bin/bash', '#!/bin/sh', '[', 'version =', 'name =',
-                "#!/bin/sh",
+                'description =', 'author =', '<configuration>', '<?xml'
                "[",
                "version =",
                "name =",
                "description =",
                "author =",
                "<configuration>",
                "<?xml",
            ]
            for indicator in config_indicators:
@ -419,17 +315,17 @@ class ProjectIndexer:
        file_str = normalize_relative_path(file_path, self.project_path)
        # Not in manifest - needs indexing
-        if file_str not in self.manifest["files"]:
+        if file_str not in self.manifest['files']:
            return True
-        file_info = self.manifest["files"][file_str]
+        file_info = self.manifest['files'][file_str]
        try:
            stat = file_path.stat()
            # Quick checks first (no I/O) - check size and modification time
-            stored_size = file_info.get("size", 0)
+            stored_size = file_info.get('size', 0)
-            stored_mtime = file_info.get("mtime", 0)
+            stored_mtime = file_info.get('mtime', 0)
            current_size = stat.st_size
            current_mtime = stat.st_mtime
@ -441,7 +337,7 @@ class ProjectIndexer:
            # Size and mtime same - check hash only if needed (for paranoia)
            # This catches cases where content changed but mtime didn't (rare but possible)
            current_hash = self._get_file_hash(file_path)
-            stored_hash = file_info.get("hash", "")
+            stored_hash = file_info.get('hash', '')
            return current_hash != stored_hash
@ -452,11 +348,11 @@ class ProjectIndexer:
    def _cleanup_removed_files(self):
        """Remove entries for files that no longer exist from manifest and database."""
-        if "files" not in self.manifest:
+        if 'files' not in self.manifest:
            return
        removed_files = []
-        for file_str in list(self.manifest["files"].keys()):
+        for file_str in list(self.manifest['files'].keys()):
            file_path = self.project_path / file_str
            if not file_path.exists():
                removed_files.append(file_str)
@ -467,14 +363,14 @@ class ProjectIndexer:
            for file_str in removed_files:
                # Remove from database
                try:
-                    if hasattr(self, "table") and self.table:
+                    if hasattr(self, 'table') and self.table:
                        self.table.delete(f"file_path = '{file_str}'")
                        logger.debug(f"Removed chunks for deleted file: {file_str}")
                except Exception as e:
                    logger.warning(f"Could not remove chunks for {file_str}: {e}")
                # Remove from manifest
-                del self.manifest["files"][file_str]
+                del self.manifest['files'][file_str]
            # Save updated manifest
            self._save_manifest()
@ -487,9 +383,7 @@ class ProjectIndexer:
        # Walk through project directory
        for root, dirs, files in os.walk(self.project_path):
            # Skip excluded directories
-            dirs[:] = [
+            dirs[:] = [d for d in dirs if not any(pattern in d for pattern in self.exclude_patterns)]
                d for d in dirs if not any(pattern in d for pattern in self.exclude_patterns)
            ]
            root_path = Path(root)
            for file in files:
@ -500,9 +394,7 @@ class ProjectIndexer:
        return files_to_index
-    def _process_file(
+    def _process_file(self, file_path: Path, stream_threshold: int = 1024 * 1024) -> Optional[List[Dict[str, Any]]]:
        self, file_path: Path, stream_threshold: int = 1024 * 1024
    ) -> Optional[List[Dict[str, Any]]]:
        """Process a single file: read, chunk, embed.
        Args:
@ -518,7 +410,7 @@ class ProjectIndexer:
                content = self._read_file_streaming(file_path)
            else:
                # Read file content normally for small files
-                content = file_path.read_text(encoding="utf-8")
+                content = file_path.read_text(encoding='utf-8')
            # Chunk the file
            chunks = self.chunker.chunk_file(file_path, content)
@ -546,43 +438,39 @@ class ProjectIndexer:
                    )
                record = {
-                    "file_path": normalize_relative_path(file_path, self.project_path),
+                    'file_path': normalize_relative_path(file_path, self.project_path),
-                    "absolute_path": normalize_path(file_path),
+                    'absolute_path': normalize_path(file_path),
-                    "chunk_id": f"{file_path.stem}_{i}",
+                    'chunk_id': f"{file_path.stem}_{i}",
-                    "content": chunk.content,
+                    'content': chunk.content,
-                    "start_line": int(chunk.start_line),
+                    'start_line': int(chunk.start_line),
-                    "end_line": int(chunk.end_line),
+                    'end_line': int(chunk.end_line),
-                    "chunk_type": chunk.chunk_type,
+                    'chunk_type': chunk.chunk_type,
-                    "name": chunk.name or f"chunk_{i}",
+                    'name': chunk.name or f"chunk_{i}",
-                    "language": chunk.language,
+                    'language': chunk.language,
-                    "embedding": embedding,  # Keep as numpy array
+                    'embedding': embedding,  # Keep as numpy array
-                    "indexed_at": datetime.now().isoformat(),
+                    'indexed_at': datetime.now().isoformat(),
                    # Add new metadata fields
-                    "file_lines": int(chunk.file_lines) if chunk.file_lines else 0,
+                    'file_lines': int(chunk.file_lines) if chunk.file_lines else 0,
-                    "chunk_index": (
+                    'chunk_index': int(chunk.chunk_index) if chunk.chunk_index is not None else i,
-                        int(chunk.chunk_index) if chunk.chunk_index is not None else i
+                    'total_chunks': int(chunk.total_chunks) if chunk.total_chunks else len(chunks),
-                    ),
+                    'parent_class': chunk.parent_class or '',
-                    "total_chunks": (
+                    'parent_function': chunk.parent_function or '',
-                        int(chunk.total_chunks) if chunk.total_chunks else len(chunks)
+                    'prev_chunk_id': chunk.prev_chunk_id or '',
-                    ),
+                    'next_chunk_id': chunk.next_chunk_id or '',
                    "parent_class": chunk.parent_class or "",
                    "parent_function": chunk.parent_function or "",
                    "prev_chunk_id": chunk.prev_chunk_id or "",
                    "next_chunk_id": chunk.next_chunk_id or "",
                }
                records.append(record)
            # Update manifest with enhanced tracking
            file_str = normalize_relative_path(file_path, self.project_path)
            stat = file_path.stat()
-            self.manifest["files"][file_str] = {
+            self.manifest['files'][file_str] = {
-                "hash": self._get_file_hash(file_path),
+                'hash': self._get_file_hash(file_path),
-                "size": stat.st_size,
+                'size': stat.st_size,
-                "mtime": stat.st_mtime,
+                'mtime': stat.st_mtime,
-                "chunks": len(chunks),
+                'chunks': len(chunks),
-                "indexed_at": datetime.now().isoformat(),
+                'indexed_at': datetime.now().isoformat(),
-                "language": chunks[0].language if chunks else "unknown",
+                'language': chunks[0].language if chunks else 'unknown',
-                "encoding": "utf-8",  # Track encoding used
+                'encoding': 'utf-8'  # Track encoding used
            }
            return records
@ -605,7 +493,7 @@ class ProjectIndexer:
        content_parts = []
        try:
-            with open(file_path, "r", encoding="utf-8") as f:
+            with open(file_path, 'r', encoding='utf-8') as f:
                while True:
                    chunk = f.read(chunk_size)
                    if not chunk:
@ -613,13 +501,13 @@ class ProjectIndexer:
                    content_parts.append(chunk)
            logger.debug(f"Streamed {len(content_parts)} chunks from {file_path}")
-            return "".join(content_parts)
+            return ''.join(content_parts)
        except UnicodeDecodeError:
            # Try with different encodings for problematic files
-            for encoding in ["latin-1", "cp1252", "utf-8-sig"]:
+            for encoding in ['latin-1', 'cp1252', 'utf-8-sig']:
                try:
-                    with open(file_path, "r", encoding=encoding) as f:
+                    with open(file_path, 'r', encoding=encoding) as f:
                        content_parts = []
                        while True:
                            chunk = f.read(chunk_size)
@ -627,10 +515,8 @@ class ProjectIndexer:
                                break
                            content_parts.append(chunk)
-                    logger.debug(
+                    logger.debug(f"Streamed {len(content_parts)} chunks from {file_path} using {encoding}")
-                        f"Streamed {len(content_parts)} chunks from {file_path} using {encoding}"
+                    return ''.join(content_parts)
                    )
                    return "".join(content_parts)
                except UnicodeDecodeError:
                    continue
@ -640,22 +526,12 @@ class ProjectIndexer:
    def _init_database(self):
        """Initialize LanceDB connection and table."""
        if not LANCEDB_AVAILABLE:
            logger.error(
                "LanceDB is not available. Please install LanceDB for full indexing functionality."
            )
            logger.info("For Ollama-only mode, consider using hash-based embeddings instead.")
            raise ImportError(
                "LanceDB dependency is required for indexing. Install with: pip install lancedb pyarrow"
            )
        try:
            self.db = lancedb.connect(self.rag_dir)
            # Define schema with fixed-size vector
            embedding_dim = self.embedder.get_embedding_dim()
-            schema = pa.schema(
+            schema = pa.schema([
                [
                pa.field("file_path", pa.string()),
                pa.field("absolute_path", pa.string()),
                pa.field("chunk_id", pa.string()),
@ -665,9 +541,7 @@ class ProjectIndexer:
                pa.field("chunk_type", pa.string()),
                pa.field("name", pa.string()),
                pa.field("language", pa.string()),
-                    pa.field(
+                pa.field("embedding", pa.list_(pa.float32(), embedding_dim)),  # Fixed-size list
                        "embedding", pa.list_(pa.float32(), embedding_dim)
                    ),  # Fixed-size list
                pa.field("indexed_at", pa.string()),
                # New metadata fields
                pa.field("file_lines", pa.int32()),
@ -677,8 +551,7 @@ class ProjectIndexer:
                pa.field("parent_function", pa.string(), nullable=True),
                pa.field("prev_chunk_id", pa.string(), nullable=True),
                pa.field("next_chunk_id", pa.string(), nullable=True),
-                ]
+            ])
            )
            # Create or open table
            if "code_vectors" in self.db.table_names():
@ -695,9 +568,7 @@ class ProjectIndexer:
                    if not required_fields.issubset(existing_fields):
                        # Schema mismatch - drop and recreate table
-                        logger.warning(
+                        logger.warning("Schema mismatch detected. Dropping and recreating table.")
                            "Schema mismatch detected. Dropping and recreating table."
                        )
                        self.db.drop_table("code_vectors")
                        self.table = self.db.create_table("code_vectors", schema=schema)
                        logger.info("Recreated code_vectors table with updated schema")
@ -712,9 +583,7 @@ class ProjectIndexer:
            else:
                # Create empty table with schema
                self.table = self.db.create_table("code_vectors", schema=schema)
-                logger.info(
+                logger.info(f"Created new code_vectors table with embedding dimension {embedding_dim}")
                    f"Created new code_vectors table with embedding dimension {embedding_dim}"
                )
        except Exception as e:
            logger.error(f"Failed to initialize database: {e}")
@ -742,11 +611,11 @@ class ProjectIndexer:
        # Clear manifest if force reindex
        if force_reindex:
            self.manifest = {
-                "version": "1.0",
+                'version': '1.0',
-                "indexed_at": None,
+                'indexed_at': None,
-                "file_count": 0,
+                'file_count': 0,
-                "chunk_count": 0,
+                'chunk_count': 0,
-                "files": {},
+                'files': {}
            }
            # Clear existing table
            if "code_vectors" in self.db.table_names():
@ -761,9 +630,9 @@ class ProjectIndexer:
        if not files_to_index:
            console.print("[green][/green] All files are up to date!")
            return {
-                "files_indexed": 0,
+                'files_indexed': 0,
-                "chunks_created": 0,
+                'chunks_created': 0,
-                "time_taken": 0,
+                'time_taken': 0,
            }
        console.print(f"[cyan]Found {len(files_to_index)} files to index[/cyan]")
@ -781,7 +650,10 @@ class ProjectIndexer:
            console=console,
        ) as progress:
-            task = progress.add_task("[cyan]Indexing files...", total=len(files_to_index))
+            task = progress.add_task(
                "[cyan]Indexing files...", 
                total=len(files_to_index)
            )
            with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
                # Submit all files for processing
@ -827,10 +699,10 @@ class ProjectIndexer:
                raise
        # Update manifest
-        self.manifest["indexed_at"] = datetime.now().isoformat()
+        self.manifest['indexed_at'] = datetime.now().isoformat()
-        self.manifest["file_count"] = len(self.manifest["files"])
+        self.manifest['file_count'] = len(self.manifest['files'])
-        self.manifest["chunk_count"] = sum(
+        self.manifest['chunk_count'] = sum(
-            f["chunks"] for f in self.manifest["files"].values()
+            f['chunks'] for f in self.manifest['files'].values()
        )
        self._save_manifest()
@ -839,11 +711,11 @@ class ProjectIndexer:
        time_taken = (end_time - start_time).total_seconds()
        stats = {
-            "files_indexed": len(files_to_index) - len(failed_files),
+            'files_indexed': len(files_to_index) - len(failed_files),
-            "files_failed": len(failed_files),
+            'files_failed': len(failed_files),
-            "chunks_created": len(all_records),
+            'chunks_created': len(all_records),
-            "time_taken": time_taken,
+            'time_taken': time_taken,
-            "files_per_second": (len(files_to_index) / time_taken if time_taken > 0 else 0),
+            'files_per_second': len(files_to_index) / time_taken if time_taken > 0 else 0,
        }
        # Print summary
@ -854,9 +726,7 @@ class ProjectIndexer:
        console.print(f"Speed: {stats['files_per_second']:.1f} files/second")
        if failed_files:
-            console.print(
+            console.print(f"\n[yellow]Warning:[/yellow] {len(failed_files)} files failed to index")
                f"\n[yellow]Warning:[/yellow] {len(failed_files)} files failed to index"
            )
        return stats
@ -891,16 +761,14 @@ class ProjectIndexer:
                df["total_chunks"] = df["total_chunks"].astype("int32")
                # Use vector store's update method (multiply out old, multiply in new)
-                if hasattr(self, "_vector_store") and self._vector_store:
+                if hasattr(self, '_vector_store') and self._vector_store:
                    success = self._vector_store.update_file_vectors(file_str, df)
                else:
                    # Fallback: delete by file path and add new data
                    try:
                        self.table.delete(f"file = '{file_str}'")
                    except Exception as e:
-                        logger.debug(
+                        logger.debug(f"Could not delete existing chunks (might not exist): {e}")
                            f"Could not delete existing chunks (might not exist): {e}"
                        )
                    self.table.add(df)
                    success = True
@ -908,25 +776,23 @@ class ProjectIndexer:
                    # Update manifest with enhanced file tracking
                    file_hash = self._get_file_hash(file_path)
                    stat = file_path.stat()
-                    if "files" not in self.manifest:
+                    if 'files' not in self.manifest:
-                        self.manifest["files"] = {}
+                        self.manifest['files'] = {}
-                    self.manifest["files"][file_str] = {
+                    self.manifest['files'][file_str] = {
-                        "hash": file_hash,
+                        'hash': file_hash,
-                        "size": stat.st_size,
+                        'size': stat.st_size,
-                        "mtime": stat.st_mtime,
+                        'mtime': stat.st_mtime,
-                        "chunks": len(records),
+                        'chunks': len(records),
-                        "last_updated": datetime.now().isoformat(),
+                        'last_updated': datetime.now().isoformat(),
-                        "language": (
+                        'language': records[0].get('language', 'unknown') if records else 'unknown',
-                            records[0].get("language", "unknown") if records else "unknown"
+                        'encoding': 'utf-8'
                        ),
                        "encoding": "utf-8",
                    }
                    self._save_manifest()
                    logger.debug(f"Successfully updated {len(records)} chunks for {file_str}")
                    return True
            else:
                # File exists but has no processable content - remove existing chunks
-                if hasattr(self, "_vector_store") and self._vector_store:
+                if hasattr(self, '_vector_store') and self._vector_store:
                    self._vector_store.delete_by_file(file_str)
                else:
                    try:
@ -959,7 +825,7 @@ class ProjectIndexer:
            file_str = normalize_relative_path(file_path, self.project_path)
            # Delete from vector store
-            if hasattr(self, "_vector_store") and self._vector_store:
+            if hasattr(self, '_vector_store') and self._vector_store:
                success = self._vector_store.delete_by_file(file_str)
            else:
                try:
@ -970,8 +836,8 @@ class ProjectIndexer:
                    success = False
            # Update manifest
-            if success and "files" in self.manifest and file_str in self.manifest["files"]:
+            if success and 'files' in self.manifest and file_str in self.manifest['files']:
-                del self.manifest["files"][file_str]
+                del self.manifest['files'][file_str]
                self._save_manifest()
                logger.debug(f"Deleted chunks for file: {file_str}")
@ -984,20 +850,20 @@ class ProjectIndexer:
    def get_statistics(self) -> Dict[str, Any]:
        """Get indexing statistics."""
        stats = {
-            "project_path": str(self.project_path),
+            'project_path': str(self.project_path),
-            "indexed_at": self.manifest.get("indexed_at", "Never"),
+            'indexed_at': self.manifest.get('indexed_at', 'Never'),
-            "file_count": self.manifest.get("file_count", 0),
+            'file_count': self.manifest.get('file_count', 0),
-            "chunk_count": self.manifest.get("chunk_count", 0),
+            'chunk_count': self.manifest.get('chunk_count', 0),
-            "index_size_mb": 0,
+            'index_size_mb': 0,
        }
        # Calculate index size
        try:
-            db_path = self.rag_dir / "code_vectors.lance"
+            db_path = self.rag_dir / 'code_vectors.lance'
            if db_path.exists():
-                size_bytes = sum(f.stat().st_size for f in db_path.rglob("*") if f.is_file())
+                size_bytes = sum(f.stat().st_size for f in db_path.rglob('*') if f.is_file())
-                stats["index_size_mb"] = size_bytes / (1024 * 1024)
+                stats['index_size_mb'] = size_bytes / (1024 * 1024)
-        except (OSError, IOError, PermissionError):
+        except:
            pass
        return stats
--- a/mini_rag/llm_safeguards.py
+++ b/mini_rag/llm_safeguards.py
@ -6,27 +6,24 @@ Provides runaway prevention, context management, and intelligent detection
 of problematic model behaviors to ensure reliable user experience.
 """
 import logging
 import re
 import time
 import logging
 from typing import Optional, Dict, List, Tuple
 from dataclasses import dataclass
 from typing import Dict, List, Optional, Tuple
 logger = logging.getLogger(__name__)
@dataclass
 class SafeguardConfig:
-    """Configuration for LLM safeguards - gentle and educational."""
+    """Configuration for LLM safeguards."""
-
+    max_output_tokens: int = 2000        # Prevent excessive generation
-    max_output_tokens: int = 4000  # Allow longer responses for learning
+    max_repetition_ratio: float = 0.3    # Max ratio of repeated content
-    max_repetition_ratio: float = 0.7  # Be very permissive - only catch extreme repetition
+    max_response_time: int = 60          # Max seconds for response
-    max_response_time: int = 120  # Allow 2 minutes for complex thinking
+    min_useful_length: int = 20          # Minimum useful response length
-    min_useful_length: int = 10  # Lower threshold - short answers can be useful
+    context_window: int = 32768          # Ollama context window
    context_window: int = 32000  # Match Qwen3 context length (32K token limit)
    enable_thinking_detection: bool = True  # Detect thinking patterns
 class ModelRunawayDetector:
    """Detects and prevents model runaway behaviors."""
@ -38,28 +35,21 @@ class ModelRunawayDetector:
        """Compile regex patterns for runaway detection."""
        return {
            # Excessive repetition patterns
-            "word_repetition": re.compile(r"\b(\w+)\b(?:\s+\1\b){3,}", re.IGNORECASE),
+            'word_repetition': re.compile(r'\b(\w+)\b(?:\s+\1\b){3,}', re.IGNORECASE),
-            "phrase_repetition": re.compile(r"(.{10,50}?)\1{2,}", re.DOTALL),
+            'phrase_repetition': re.compile(r'(.{10,50}?)\1{2,}', re.DOTALL),
            # Thinking loop patterns (small models get stuck)
-            "thinking_loop": re.compile(
+            'thinking_loop': re.compile(r'(let me think|i think|thinking|consider|actually|wait|hmm|well)\s*[.,:]*\s*\1', re.IGNORECASE),
-                r"(let me think|i think|thinking|consider|actually|wait|hmm|well)\s*[.,:]*\s*\1",
+            
                re.IGNORECASE,
            ),
            # Rambling patterns
-            "excessive_filler": re.compile(
+            'excessive_filler': re.compile(r'\b(um|uh|well|you know|like|basically|actually|so|then|and|but|however)\b(?:\s+[^.!?]*){5,}', re.IGNORECASE),
-                r"\b(um|uh|well|you know|like|basically|actually|so|then|and|but|however)\b(?:\s+[^.!?]*){5,}",
+            
                re.IGNORECASE,
            ),
            # JSON corruption patterns
-            "broken_json": re.compile(r"\{[^}]*\{[^}]*\{"),  # Nested broken JSON
+            'broken_json': re.compile(r'\{[^}]*\{[^}]*\{'),  # Nested broken JSON
-            "json_repetition": re.compile(
+            'json_repetition': re.compile(r'("[\w_]+"\s*:\s*"[^"]*",?\s*){4,}'),  # Repeated JSON fields
                r'("[\w_]+"\s*:\s*"[^"]*",?\s*){4,}'
            ),  # Repeated JSON fields
        }
-    def check_response_quality(
+    def check_response_quality(self, response: str, query: str, start_time: float) -> Tuple[bool, Optional[str], Optional[str]]:
        self, response: str, query: str, start_time: float
    ) -> Tuple[bool, Optional[str], Optional[str]]:
        """
        Check response quality and detect runaway behaviors.
@ -91,7 +81,7 @@ class ModelRunawayDetector:
            return False, rambling_issue, self._explain_rambling()
        # Check JSON corruption (for structured responses)
-        if "{" in response and "}" in response:
+        if '{' in response and '}' in response:
            json_issue = self._check_json_corruption(response)
            if json_issue:
                return False, json_issue, self._explain_json_corruption()
@ -101,26 +91,15 @@ class ModelRunawayDetector:
    def _check_repetition(self, response: str) -> Optional[str]:
        """Check for excessive repetition."""
        # Word repetition
-        if self.response_patterns["word_repetition"].search(response):
+        if self.response_patterns['word_repetition'].search(response):
            return "word_repetition"
        # Phrase repetition  
-        if self.response_patterns["phrase_repetition"].search(response):
+        if self.response_patterns['phrase_repetition'].search(response):
            return "phrase_repetition"
-        # Calculate repetition ratio (excluding Qwen3 thinking blocks)
+        # Calculate repetition ratio
-        analysis_text = response
+        words = response.split()
        if "<think>" in response and "</think>" in response:
            # Extract only the actual response (after thinking) for repetition analysis
            thinking_end = response.find("</think>")
            if thinking_end != -1:
                analysis_text = response[thinking_end + 8 :].strip()
                # If the actual response (excluding thinking) is short, don't penalize
                if len(analysis_text.split()) < 20:
                    return None
        words = analysis_text.split()
        if len(words) > 10:
            unique_words = set(words)
            repetition_ratio = 1 - (len(unique_words) / len(words))
@ -131,11 +110,11 @@ class ModelRunawayDetector:
    def _check_thinking_loops(self, response: str) -> Optional[str]:
        """Check for thinking loops (common in small models)."""
-        if self.response_patterns["thinking_loop"].search(response):
+        if self.response_patterns['thinking_loop'].search(response):
            return "thinking_loop"
        # Check for excessive meta-commentary
-        thinking_words = ["think", "considering", "actually", "wait", "hmm", "let me"]
+        thinking_words = ['think', 'considering', 'actually', 'wait', 'hmm', 'let me']
        thinking_count = sum(response.lower().count(word) for word in thinking_words)
        if thinking_count > 5 and len(response.split()) < 200:
@ -145,11 +124,11 @@ class ModelRunawayDetector:
    def _check_rambling(self, response: str) -> Optional[str]:
        """Check for rambling or excessive filler."""
-        if self.response_patterns["excessive_filler"].search(response):
+        if self.response_patterns['excessive_filler'].search(response):
            return "excessive_filler"
        # Check for extremely long sentences (sign of rambling)
-        sentences = re.split(r"[.!?]+", response)
+        sentences = re.split(r'[.!?]+', response)
        long_sentences = [s for s in sentences if len(s.split()) > 50]
        if len(long_sentences) > 2:
@ -159,10 +138,10 @@ class ModelRunawayDetector:
    def _check_json_corruption(self, response: str) -> Optional[str]:
        """Check for JSON corruption in structured responses."""
-        if self.response_patterns["broken_json"].search(response):
+        if self.response_patterns['broken_json'].search(response):
            return "broken_json"
-        if self.response_patterns["json_repetition"].search(response):
+        if self.response_patterns['json_repetition'].search(response):
            return "json_repetition"
        return None
@ -194,7 +173,7 @@ class ModelRunawayDetector:
 • Consider using a larger model if available"""
    def _explain_repetition(self, issue_type: str) -> str:
-        return """🔄 The AI got stuck in repetition loops ({issue_type}).
+        return f"""🔄 The AI got stuck in repetition loops ({issue_type}).
 **Why this happens:**
 • Small models sometimes repeat when uncertain
@ -205,7 +184,7 @@ class ModelRunawayDetector:
 • Try a more specific question
 • Break complex questions into smaller parts
 • Use exploration mode which handles context better: `rag-mini explore`
-• Consider: A larger model (qwen3:1.7b or qwen3:4b) would help"""
+• Consider: A larger model (qwen3:1.7b or qwen3:3b) would help"""
    def _explain_thinking_loop(self) -> str:
        return """🧠 The AI got caught in a "thinking loop" - overthinking the response.
@ -253,48 +232,35 @@ class ModelRunawayDetector:
        """Get specific recovery suggestions based on the issue."""
        suggestions = []
-        if issue_type in ["thinking_loop", "excessive_thinking"]:
+        if issue_type in ['thinking_loop', 'excessive_thinking']:
-            suggestions.extend(
+            suggestions.extend([
-                [
+                f"Try synthesis mode: `rag-mini search . \"{query}\" --synthesize`",
                    f'Try synthesis mode: `rag-mini search . "{query}" --synthesize`',
                "Ask more direct questions without 'why' or 'how'",
-                    "Break complex questions into smaller parts",
+                "Break complex questions into smaller parts"
-                ]
+            ])
            )
-        elif issue_type in [
+        elif issue_type in ['word_repetition', 'phrase_repetition', 'high_repetition_ratio']:
-            "word_repetition",
+            suggestions.extend([
            "phrase_repetition",
            "high_repetition_ratio",
        ]:
            suggestions.extend(
                [
                "Try rephrasing your question completely",
                "Use more specific technical terms",  
-                    "Try exploration mode: `rag-mini explore .`",
+                f"Try exploration mode: `rag-mini explore .`"
-                ]
+            ])
            )
-        elif issue_type == "timeout":
+        elif issue_type == 'timeout':
-            suggestions.extend(
+            suggestions.extend([
                [
                "Try a simpler version of your question",
                "Use synthesis mode for faster responses",
-                    "Check if Ollama is under heavy load",
+                "Check if Ollama is under heavy load"
-                ]
+            ])
            )
        # Universal suggestions
-        suggestions.extend(
+        suggestions.extend([
-            [
+            "Consider using a larger model if available (qwen3:1.7b or qwen3:3b)",
-                "Consider using a larger model if available (qwen3:1.7b or qwen3:4b)",
+            "Check model status: `ollama list`"
-                "Check model status: `ollama list`",
+        ])
            ]
        )
        return suggestions
 def get_optimal_ollama_parameters(model_name: str) -> Dict[str, any]:
    """Get optimal parameters for different Ollama models."""
@ -336,10 +302,7 @@ def get_optimal_ollama_parameters(model_name: str) -> Dict[str, any]:
    return base_params
 # Quick test
 def test_safeguards():
    """Test the safeguard system."""
    detector = ModelRunawayDetector()
@ -347,14 +310,11 @@ def test_safeguards():
    # Test repetition detection
    bad_response = "The user authentication system works by checking user credentials. The user authentication system works by checking user credentials. The user authentication system works by checking user credentials."
-    is_valid, issue, explanation = detector.check_response_quality(
+    is_valid, issue, explanation = detector.check_response_quality(bad_response, "auth", time.time())
        bad_response, "auth", time.time()
    )
    print(f"Repetition test: Valid={is_valid}, Issue={issue}")
    if explanation:
        print(explanation)
 if __name__ == "__main__":
    test_safeguards()
--- a/mini_rag/llm_synthesizer.py
+++ b/mini_rag/llm_synthesizer.py
@ -9,61 +9,39 @@ Takes raw search results and generates coherent, contextual summaries.
 import json
 import logging
 import time
 from typing import List, Dict, Any, Optional
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, List, Optional
 import requests
 from pathlib import Path
 try:
-    from .llm_safeguards import (
+    from .llm_safeguards import ModelRunawayDetector, SafeguardConfig, get_optimal_ollama_parameters
        ModelRunawayDetector,
        SafeguardConfig,
        get_optimal_ollama_parameters,
    )
    from .system_context import get_system_context
 except ImportError:
    # Graceful fallback if safeguards not available
    ModelRunawayDetector = None
    SafeguardConfig = None
-
+    get_optimal_ollama_parameters = lambda x: {}
    def get_optimal_ollama_parameters(x):
        return {}
    def get_system_context(x=None):
        return ""
 logger = logging.getLogger(__name__)
@dataclass
 class SynthesisResult:
    """Result of LLM synthesis."""
    summary: str
    key_points: List[str]
    code_examples: List[str]
    suggested_actions: List[str]
    confidence: float
 class LLMSynthesizer:
    """Synthesizes RAG search results using Ollama LLMs."""
-    def __init__(
+    def __init__(self, ollama_url: str = "http://localhost:11434", model: str = None, enable_thinking: bool = False):
-        self,
+        self.ollama_url = ollama_url.rstrip('/')
        ollama_url: str = "http://localhost:11434",
        model: str = None,
        enable_thinking: bool = False,
        config=None,
    ):
        self.ollama_url = ollama_url.rstrip("/")
        self.available_models = []
        self.model = model
        self.enable_thinking = enable_thinking  # Default False for synthesis mode
        self._initialized = False
        self.config = config  # For accessing model rankings
        # Initialize safeguards
        if ModelRunawayDetector:
@ -77,169 +55,73 @@ class LLMSynthesizer:
            response = requests.get(f"{self.ollama_url}/api/tags", timeout=5)
            if response.status_code == 200:
                data = response.json()
-                return [model["name"] for model in data.get("models", [])]
+                return [model['name'] for model in data.get('models', [])]
        except Exception as e:
            logger.warning(f"Could not fetch Ollama models: {e}")
        return []
    def _select_best_model(self) -> str:
-        """Select the best available model based on configuration rankings with robust name resolution."""
+        """Select the best available model based on modern performance rankings."""
        if not self.available_models:
-            # Use config fallback if available, otherwise use default
+            return "qwen2.5:1.5b"  # Fallback preference
            if (
                self.config
                and hasattr(self.config, "llm")
                and hasattr(self.config.llm, "model_rankings")
                and self.config.llm.model_rankings
            ):
                return self.config.llm.model_rankings[0]  # First preferred model
            return "qwen2.5:1.5b"  # System fallback only if no config
-        # Get model rankings from config or use defaults
+        # Modern model preference ranking (CPU-friendly first)
-        if (
+        # Prioritize: Ultra-efficient > Standard efficient > Larger models
            self.config
            and hasattr(self.config, "llm")
            and hasattr(self.config.llm, "model_rankings")
        ):
            model_rankings = self.config.llm.model_rankings
        else:
            # Fallback rankings if no config
        model_rankings = [
-                "qwen3:1.7b",
+            # Recommended model (excellent quality)
                "qwen3:0.6b",
            "qwen3:4b",
-                "qwen2.5:3b",
+            
-                "qwen2.5:1.5b",
+            # Ultra-efficient models (perfect for CPU-only systems)
-                "qwen2.5-coder:1.5b",
+            "qwen3:0.6b", "qwen3:1.7b", "llama3.2:1b", 
            # Standard efficient models
            "qwen2.5:1.5b", "qwen3:3b",
            # Qwen2.5 models (excellent performance/size ratio)
            "qwen2.5-coder:1.5b", "qwen2.5:1.5b", "qwen2.5:3b", "qwen2.5-coder:3b",
            "qwen2.5:7b", "qwen2.5-coder:7b",
            # Qwen2 models (older but still good)
            "qwen2:1.5b", "qwen2:3b", "qwen2:7b",
            # Mistral models (good quality, reasonable size)
            "mistral:7b", "mistral-nemo", "mistral-small",
            # Llama3.2 models (decent but larger)
            "llama3.2:1b", "llama3.2:3b", "llama3.2", "llama3.2:8b",
            # Fallback to other Llama models
            "llama3.1:8b", "llama3:8b", "llama3", 
            # Other decent models
            "gemma2:2b", "gemma2:9b", "phi3:3.8b", "phi3.5",
        ]
-        # Find first available model from our ranked list using relaxed name resolution
+        # Find first available model from our ranked list
        for preferred_model in model_rankings:
-            resolved_model = self._resolve_model_name(preferred_model)
+            for available_model in self.available_models:
-            if resolved_model:
+                # Match model names (handle version tags)
-                logger.info(f"Selected model: {resolved_model} (requested: {preferred_model})")
+                available_base = available_model.split(':')[0].lower()
-                return resolved_model
+                preferred_base = preferred_model.split(':')[0].lower()
                if preferred_base in available_base or available_base in preferred_base:
                    # Additional size filtering - prefer smaller models
                    if any(size in available_model.lower() for size in ['1b', '1.5b', '2b', '3b']):
                        logger.info(f"Selected efficient model: {available_model}")
                        return available_model
                    elif any(size in available_model.lower() for size in ['7b', '8b']):
                        # Only use larger models if no smaller ones available
                        logger.info(f"Selected larger model: {available_model}")
                        return available_model
                    elif ':' not in available_model:
                        # Handle models without explicit size tags
                        return available_model
        # If no preferred models found, use first available
        fallback = self.available_models[0]
        logger.warning(f"Using fallback model: {fallback}")
        return fallback
    def _resolve_model_name(self, configured_model: str) -> Optional[str]:
        """Auto-resolve model names to match what's actually available in Ollama.
        This handles common patterns like:
        - qwen3:1.7b -> qwen3:1.7b-q8_0
        - qwen3:4b -> qwen3:4b-instruct-2507-q4_K_M
        - auto -> first available model from ranked preference
        """
        logger.debug(f"Resolving model: {configured_model}")
        if not self.available_models:
            logger.warning("No available models for resolution")
            return None
        # Handle special 'auto' directive - use smart selection
        if configured_model.lower() == 'auto':
            logger.info("Using AUTO selection...")
            return self._select_best_available_model()
        # Direct exact match first (case-insensitive)
        for available_model in self.available_models:
            if configured_model.lower() == available_model.lower():
                logger.info(f"✅ EXACT MATCH: {available_model}")
                return available_model
        # Relaxed matching - extract base model and size, then find closest match
        logger.info(f"No exact match for '{configured_model}', trying relaxed matching...")
        match = self._find_closest_model_match(configured_model)
        if match:
            logger.info(f"✅ FUZZY MATCH: {configured_model} -> {match}")
        else:
            logger.warning(f"❌ NO MATCH: {configured_model} not found in available models")
        return match
    def _select_best_available_model(self) -> str:
        """Select the best available model from what's actually installed."""
        if not self.available_models:
            logger.warning("No models available from Ollama - using fallback")
            return "qwen2.5:1.5b"  # fallback
        logger.info(f"Available models: {self.available_models}")
        # Priority order for auto selection - prefer newer and larger models
        priority_patterns = [
            # Qwen3 series (newest)
            "qwen3:8b", "qwen3:4b", "qwen3:1.7b", "qwen3:0.6b",
            # Qwen2.5 series 
            "qwen2.5:3b", "qwen2.5:1.5b", "qwen2.5:0.5b",
            # Any other model as fallback
        ]
        # Find first match from priority list
        logger.info("Searching for best model match...")
        for pattern in priority_patterns:
            match = self._find_closest_model_match(pattern)
            if match:
                logger.info(f"✅ AUTO SELECTED: {match} (matched pattern: {pattern})")
                return match
            else:
                logger.debug(f"No match found for pattern: {pattern}")
        # If nothing matches, just use first available
        fallback = self.available_models[0]
        logger.warning(f"⚠️  Using first available model as fallback: {fallback}")
        return fallback
    def _find_closest_model_match(self, configured_model: str) -> Optional[str]:
        """Find the closest matching model using relaxed criteria."""
        if not self.available_models:
            logger.debug(f"No available models to match against for: {configured_model}")
            return None
        # Extract base model and size from configured model
        # e.g., "qwen3:4b" -> ("qwen3", "4b")
        if ':' not in configured_model:
            base_model = configured_model
            size = None
        else:
            base_model, size_part = configured_model.split(':', 1)
            # Extract just the size (remove any suffixes like -q8_0)
            size = size_part.split('-')[0] if '-' in size_part else size_part
        logger.debug(f"Looking for base model: '{base_model}', size: '{size}'")
        # Find all models that match the base model
        candidates = []
        for available_model in self.available_models:
            if ':' not in available_model:
                continue
            avail_base, avail_full = available_model.split(':', 1)
            if avail_base.lower() == base_model.lower():
                candidates.append(available_model)
                logger.debug(f"Found candidate: {available_model}")
        if not candidates:
            logger.debug(f"No candidates found for base model: {base_model}")
            return None
        # If we have a size preference, try to match it
        if size:
            for candidate in candidates:
                # Check if size appears in the model name
                if size.lower() in candidate.lower():
                    logger.debug(f"Size match found: {candidate} contains '{size}'")
                    return candidate
            logger.debug(f"No size match found for '{size}', using first candidate")
        # If no size match or no size specified, return first candidate
        selected = candidates[0]
        logger.debug(f"Returning first candidate: {selected}")
        return selected
    # Old pattern matching methods removed - using simpler approach now
    def _ensure_initialized(self):
        """Lazy initialization with LLM warmup."""
        if self._initialized:
@ -250,209 +132,81 @@ class LLMSynthesizer:
        if not self.model:
            self.model = self._select_best_model()
-        # Skip warmup - models are fast enough and warmup causes delays
+        # Warm up LLM with minimal request (ignores response)
-        # Warmup removed to eliminate startup delays and unwanted model calls
+        if self.available_models:
            try:
                self._call_ollama("testing, just say 'hi'", temperature=0.1, disable_thinking=True)
            except:
                pass  # Warmup failure is non-critical
        self._initialized = True
    def _get_optimal_context_size(self, model_name: str) -> int:
        """Get optimal context size based on model capabilities and configuration."""
        # Get configured context window
        if self.config and hasattr(self.config, "llm"):
            configured_context = self.config.llm.context_window
            auto_context = getattr(self.config.llm, "auto_context", True)
        else:
            configured_context = 16384  # Default to 16K
            auto_context = True
        # Model-specific maximum context windows (based on research)
        model_limits = {
            # Qwen3 models with native context support
            "qwen3:0.6b": 32768,  # 32K native
            "qwen3:1.7b": 32768,  # 32K native
            "qwen3:4b": 131072,  # 131K with YaRN extension
            # Qwen2.5 models
            "qwen2.5:1.5b": 32768,  # 32K native
            "qwen2.5:3b": 32768,  # 32K native
            "qwen2.5-coder:1.5b": 32768,  # 32K native
            # Fallback for unknown models
            "default": 8192,
        }
        # Find model limit (check for partial matches)
        model_limit = model_limits.get("default", 8192)
        for model_pattern, limit in model_limits.items():
            if model_pattern != "default" and model_pattern.lower() in model_name.lower():
                model_limit = limit
                break
        # If auto_context is enabled, respect model limits
        if auto_context:
            optimal_context = min(configured_context, model_limit)
        else:
            optimal_context = configured_context
        # Ensure minimum usable context for RAG
        optimal_context = max(optimal_context, 4096)  # Minimum 4K for basic RAG
        logger.debug(
            f"Context for {model_name}: {optimal_context} tokens (configured: {configured_context}, limit: {model_limit})"
        )
        return optimal_context
    def is_available(self) -> bool:
        """Check if Ollama is available and has models."""
        self._ensure_initialized()
        return len(self.available_models) > 0
-    def _call_ollama(
+    def _call_ollama(self, prompt: str, temperature: float = 0.3, disable_thinking: bool = False) -> Optional[str]:
        self,
        prompt: str,
        temperature: float = 0.3,
        disable_thinking: bool = False,
        use_streaming: bool = True,
        collapse_thinking: bool = True,
    ) -> Optional[str]:
        """Make a call to Ollama API with safeguards."""
        start_time = time.time()
        try:
-            # Ensure we're initialized
+            # Use the best available model
            self._ensure_initialized()
            # Use the best available model with retry logic
            model_to_use = self.model
            if self.model not in self.available_models:
                # Refresh model list in case of race condition
                logger.warning(
                    f"Configured model {self.model} not in available list, refreshing..."
                )
                self.available_models = self._get_available_models()
                if self.model in self.available_models:
                    model_to_use = self.model
                    logger.info(f"Model {self.model} found after refresh")
                elif self.available_models:
                # Fallback to first available model
                if self.available_models:
                    model_to_use = self.available_models[0]
                    logger.warning(f"Using fallback model: {model_to_use}")
                else:
                    logger.error("No Ollama models available")
                    return None
            # Handle thinking mode for Qwen3 models
            final_prompt = prompt
-            use_thinking = self.enable_thinking and not disable_thinking
+            if not self.enable_thinking or disable_thinking:
            # For non-thinking mode, add <no_think> tag for Qwen3
            if not use_thinking and "qwen3" in model_to_use.lower():
                if not final_prompt.endswith(" <no_think>"):
                    final_prompt += " <no_think>"
            # Get optimal parameters for this model
            optimal_params = get_optimal_ollama_parameters(model_to_use)
            # Qwen3-specific optimal parameters based on research
            if "qwen3" in model_to_use.lower():
                if use_thinking:
                    # Thinking mode: Temperature=0.6, TopP=0.95, TopK=20, PresencePenalty=1.5
                    qwen3_temp = 0.6
                    qwen3_top_p = 0.95
                    qwen3_top_k = 20
                    qwen3_presence = 1.5
                else:
                    # Non-thinking mode: Temperature=0.7, TopP=0.8, TopK=20, PresencePenalty=1.5
                    qwen3_temp = 0.7
                    qwen3_top_p = 0.8
                    qwen3_top_k = 20
                    qwen3_presence = 1.5
            else:
                qwen3_temp = temperature
                qwen3_top_p = optimal_params.get("top_p", 0.9)
                qwen3_top_k = optimal_params.get("top_k", 40)
                qwen3_presence = optimal_params.get("presence_penalty", 1.0)
            payload = {
                "model": model_to_use,
                "prompt": final_prompt,
-                "stream": use_streaming,
+                "stream": False,
                "options": {
-                    "temperature": qwen3_temp,
+                    "temperature": temperature,
-                    "top_p": qwen3_top_p,
+                    "top_p": optimal_params.get("top_p", 0.9),
-                    "top_k": qwen3_top_k,
+                    "top_k": optimal_params.get("top_k", 40),
-                    "num_ctx": self._get_optimal_context_size(
+                    "num_ctx": optimal_params.get("num_ctx", 32768),
                        model_to_use
                    ),  # Dynamic context based on model and config
                    "num_predict": optimal_params.get("num_predict", 2000),
                    "repeat_penalty": optimal_params.get("repeat_penalty", 1.1),
-                    "presence_penalty": qwen3_presence,
+                    "presence_penalty": optimal_params.get("presence_penalty", 1.0)
-                },
+                }
            }
            # Handle streaming with thinking display
            if use_streaming:
                return self._handle_streaming_with_thinking_display(
                    payload, model_to_use, use_thinking, start_time, collapse_thinking
                )
            response = requests.post(
                f"{self.ollama_url}/api/generate",
                json=payload,
-                timeout=65,  # Slightly longer than safeguard timeout
+                timeout=65  # Slightly longer than safeguard timeout
            )
            if response.status_code == 200:
                result = response.json()
-
+                raw_response = result.get('response', '').strip()
                # All models use standard response format
                # Qwen3 thinking tokens are embedded in the response content itself as <think>...</think>
                raw_response = result.get("response", "").strip()
                # Log thinking content for Qwen3 debugging
                if (
                    "qwen3" in model_to_use.lower()
                    and use_thinking
                    and "<think>" in raw_response
                ):
                    thinking_start = raw_response.find("<think>")
                    thinking_end = raw_response.find("</think>")
                    if thinking_start != -1 and thinking_end != -1:
                        thinking_content = raw_response[thinking_start + 7 : thinking_end]
                        logger.info(f"Qwen3 thinking: {thinking_content[:100]}...")
                # Apply safeguards to check response quality
                if self.safeguard_detector and raw_response:
-                    is_valid, issue_type, explanation = (
+                    is_valid, issue_type, explanation = self.safeguard_detector.check_response_quality(
-                        self.safeguard_detector.check_response_quality(
+                        raw_response, prompt[:100], start_time  # First 100 chars of prompt for context
                            raw_response,
                            prompt[:100],
                            start_time,  # First 100 chars of prompt for context
                        )
                    )
                    if not is_valid:
                        logger.warning(f"Safeguard triggered: {issue_type}")
-                        # Preserve original response but add safeguard warning
+                        # Return a safe explanation instead of the problematic response
-                        return self._create_safeguard_response_with_content(
+                        return self._create_safeguard_response(issue_type, explanation, prompt)
                            issue_type, explanation, raw_response
                        )
-                # Clean up thinking tags from final response
+                return raw_response
                cleaned_response = raw_response
                if "<think>" in cleaned_response or "</think>" in cleaned_response:
                    # Remove thinking content but preserve the rest
                    cleaned_response = cleaned_response.replace("<think>", "").replace(
                        "</think>", ""
                    )
                    # Clean up extra whitespace that might be left
                    lines = cleaned_response.split("\n")
                    cleaned_lines = []
                    for line in lines:
                        if line.strip():  # Only keep non-empty lines
                            cleaned_lines.append(line)
                    cleaned_response = "\n".join(cleaned_lines)
                return cleaned_response.strip()
            else:
                logger.error(f"Ollama API error: {response.status_code}")
                return None
@ -461,11 +215,9 @@ class LLMSynthesizer:
            logger.error(f"Ollama call failed: {e}")
            return None
-    def _create_safeguard_response(
+    def _create_safeguard_response(self, issue_type: str, explanation: str, original_prompt: str) -> str:
        self, issue_type: str, explanation: str, original_prompt: str
    ) -> str:
        """Create a helpful response when safeguards are triggered."""
-        return """⚠️ Model Response Issue Detected
+        return f"""⚠️ Model Response Issue Detected
 {explanation}
@ -481,315 +233,7 @@ class LLMSynthesizer:
 This is normal with smaller AI models and helps ensure you get quality responses."""
-    def _create_safeguard_response_with_content(
+    def synthesize_search_results(self, query: str, results: List[Any], project_path: Path) -> SynthesisResult:
        self, issue_type: str, explanation: str, original_response: str
    ) -> str:
        """Create a response that preserves the original content but adds a safeguard warning."""
        # For Qwen3, extract the actual response (after thinking)
        actual_response = original_response
        if "<think>" in original_response and "</think>" in original_response:
            thinking_end = original_response.find("</think>")
            if thinking_end != -1:
                actual_response = original_response[thinking_end + 8 :].strip()
        # If we have useful content, preserve it with a warning
        if len(actual_response.strip()) > 20:
            return """⚠️ **Response Quality Warning** ({issue_type})
 {explanation}
 ---
 **AI Response (use with caution):**
 {actual_response}
 ---
 💡 **Note**: This response may have quality issues. Consider rephrasing your question or trying exploration mode for better results."""
        else:
            # If content is too short or problematic, use the original safeguard response
            return """⚠️ Model Response Issue Detected
 {explanation}
 **What happened:** The AI model encountered a common issue with small language models.
 **Your options:**
 1. **Try again**: Ask the same question (often resolves itself)
 2. **Rephrase**: Make your question more specific or break it into parts
 3. **Use exploration mode**: `rag-mini explore` for complex questions
 This is normal with smaller AI models and helps ensure you get quality responses."""
    def _handle_streaming_with_thinking_display(
        self,
        payload: dict,
        model_name: str,
        use_thinking: bool,
        start_time: float,
        collapse_thinking: bool = True,
    ) -> Optional[str]:
        """Handle streaming response with real-time thinking token display."""
        import json
        try:
            response = requests.post(
                f"{self.ollama_url}/api/generate", json=payload, stream=True, timeout=65
            )
            if response.status_code != 200:
                logger.error(f"Ollama API error: {response.status_code}")
                return None
            full_response = ""
            thinking_content = ""
            is_in_thinking = False
            is_thinking_complete = False
            thinking_lines_printed = 0
            # ANSI escape codes for colors and cursor control
            GRAY = "\033[90m"  # Dark gray for thinking
            # "\033[37m"  # Light gray alternative  # Unused variable removed
            RESET = "\033[0m"  # Reset color
            CLEAR_LINE = "\033[2K"  # Clear entire line
            CURSOR_UP = "\033[A"  # Move cursor up one line
            print(f"\n💭 {GRAY}Thinking...{RESET}", flush=True)
            for line in response.iter_lines():
                if line:
                    try:
                        chunk_data = json.loads(line.decode("utf-8"))
                        chunk_text = chunk_data.get("response", "")
                        if chunk_text:
                            full_response += chunk_text
                            # Handle thinking tokens
                            if use_thinking and "<think>" in chunk_text:
                                is_in_thinking = True
                                chunk_text = chunk_text.replace("<think>", "")
                            if is_in_thinking and "</think>" in chunk_text:
                                is_in_thinking = False
                                is_thinking_complete = True
                                chunk_text = chunk_text.replace("</think>", "")
                                if collapse_thinking:
                                    # Clear thinking content and show completion
                                    # Move cursor up to clear thinking lines
                                    for _ in range(thinking_lines_printed + 1):
                                        print(
                                            f"{CURSOR_UP}{CLEAR_LINE}",
                                            end="",
                                            flush=True,
                                        )
                                    print(
                                        f"💭 {GRAY}Thinking complete ✓{RESET}",
                                        flush=True,
                                    )
                                    thinking_lines_printed = 0
                                else:
                                    # Keep thinking visible, just show completion
                                    print(
                                        f"\n💭 {GRAY}Thinking complete ✓{RESET}",
                                        flush=True,
                                    )
                                print("🤖 AI Response:", flush=True)
                                continue
                            # Display thinking content in gray with better formatting
                            if is_in_thinking and chunk_text.strip():
                                thinking_content += chunk_text
                                # Handle line breaks and word wrapping properly
                                if (
                                    " " in chunk_text
                                    or "\n" in chunk_text
                                    or len(thinking_content) > 100
                                ):
                                    # Split by sentences for better readability
                                    sentences = thinking_content.replace("\n", " ").split(". ")
                                    for sentence in sentences[
                                        :-1
                                    ]:  # Process complete sentences
                                        sentence = sentence.strip()
                                        if sentence:
                                            # Word wrap long sentences
                                            words = sentence.split()
                                            line = ""
                                            for word in words:
                                                if len(line + " " + word) > 70:
                                                    if line:
                                                        print(
                                                            f"{GRAY}   {line.strip()}{RESET}",
                                                            flush=True,
                                                        )
                                                        thinking_lines_printed += 1
                                                    line = word
                                                else:
                                                    line += " " + word if line else word
                                            if line.strip():
                                                print(
                                                    f"{GRAY}   {line.strip()}.{RESET}",
                                                    flush=True,
                                                )
                                                thinking_lines_printed += 1
                                    # Keep the last incomplete sentence for next iteration
                                    thinking_content = sentences[-1] if sentences else ""
                            # Display regular response content (skip any leftover thinking)
                            elif (
                                not is_in_thinking
                                and is_thinking_complete
                                and chunk_text.strip()
                            ):
                                # Filter out any remaining thinking tags that might leak through
                                clean_text = chunk_text
                                if "<think>" in clean_text or "</think>" in clean_text:
                                    clean_text = clean_text.replace("<think>", "").replace(
                                        "</think>", ""
                                    )
                                if clean_text:  # Remove .strip() here to preserve whitespace
                                    # Preserve all formatting including newlines and spaces
                                    print(clean_text, end="", flush=True)
                        # Check if response is done
                        if chunk_data.get("done", False):
                            print()  # Final newline
                            break
                    except json.JSONDecodeError:
                        continue
                    except Exception as e:
                        logger.error(f"Error processing stream chunk: {e}")
                        continue
            return full_response
        except Exception as e:
            logger.error(f"Streaming failed: {e}")
            return None
    def _handle_streaming_with_early_stop(
        self, payload: dict, model_name: str, use_thinking: bool, start_time: float
    ) -> Optional[str]:
        """Handle streaming response with intelligent early stopping."""
        import json
        try:
            response = requests.post(
                f"{self.ollama_url}/api/generate", json=payload, stream=True, timeout=65
            )
            if response.status_code != 200:
                logger.error(f"Ollama API error: {response.status_code}")
                return None
            full_response = ""
            word_buffer = []
            repetition_window = 30  # Check last 30 words for repetition (more context)
            stop_threshold = (
                0.8  # Stop only if 80% of recent words are repetitive (very permissive)
            )
            min_response_length = 100  # Don't early stop until we have at least 100 chars
            for line in response.iter_lines():
                if line:
                    try:
                        chunk_data = json.loads(line.decode("utf-8"))
                        chunk_text = chunk_data.get("response", "")
                        if chunk_text:
                            full_response += chunk_text
                            # Add words to buffer for repetition detection
                            new_words = chunk_text.split()
                            word_buffer.extend(new_words)
                            # Keep only recent words in buffer
                            if len(word_buffer) > repetition_window:
                                word_buffer = word_buffer[-repetition_window:]
                            # Check for repetition patterns after we have enough words AND content
                            if (
                                len(word_buffer) >= repetition_window
                                and len(full_response) >= min_response_length
                            ):
                                unique_words = set(word_buffer)
                                repetition_ratio = 1 - (len(unique_words) / len(word_buffer))
                                # Early stop only if repetition is EXTREMELY high (80%+)
                                if repetition_ratio > stop_threshold:
                                    logger.info(
                                        f"Early stopping due to repetition: {repetition_ratio:.2f}"
                                    )
                                    # Add a gentle completion to the response
                                    if not full_response.strip().endswith((".", "!", "?")):
                                        full_response += "..."
                                    # Send stop signal to model (attempt to gracefully stop)
                                    try:
                                        stop_payload = {
                                            "model": model_name,
                                            "stop": True,
                                        }
                                        requests.post(
                                            f"{self.ollama_url}/api/generate",
                                            json=stop_payload,
                                            timeout=2,
                                        )
                                    except (
                                        ConnectionError,
                                        FileNotFoundError,
                                        IOError,
                                        OSError,
                                        TimeoutError,
                                        requests.RequestException,
                                    ):
                                        pass  # If stop fails, we already have partial response
                                    break
                        if chunk_data.get("done", False):
                            break
                    except json.JSONDecodeError:
                        continue
            # Clean up thinking tags from final response
            cleaned_response = full_response
            if "<think>" in cleaned_response or "</think>" in cleaned_response:
                # Remove thinking content but preserve the rest
                cleaned_response = cleaned_response.replace("<think>", "").replace(
                    "</think>", ""
                )
                # Clean up extra whitespace that might be left
                lines = cleaned_response.split("\n")
                cleaned_lines = []
                for line in lines:
                    if line.strip():  # Only keep non-empty lines
                        cleaned_lines.append(line)
                cleaned_response = "\n".join(cleaned_lines)
            return cleaned_response.strip()
        except Exception as e:
            logger.error(f"Streaming with early stop failed: {e}")
            return None
    def synthesize_search_results(
        self, query: str, results: List[Any], project_path: Path
    ) -> SynthesisResult:
        """Synthesize search results into a coherent summary."""
        self._ensure_initialized()
@ -799,33 +243,27 @@ This is normal with smaller AI models and helps ensure you get quality responses
                key_points=[],
                code_examples=[],
                suggested_actions=["Install and run Ollama with a model"],
-                confidence=0.0,
+                confidence=0.0
            )
        # Prepare context from search results
        context_parts = []
        for i, result in enumerate(results[:8], 1):  # Limit to top 8 results
-            # result.file_path if hasattr(result, "file_path") else "unknown"  # Unused variable removed
+            file_path = result.file_path if hasattr(result, 'file_path') else 'unknown'
-            # result.content if hasattr(result, "content") else str(result)  # Unused variable removed
+            content = result.content if hasattr(result, 'content') else str(result)
-            # result.score if hasattr(result, "score") else 0.0  # Unused variable removed
+            score = result.score if hasattr(result, 'score') else 0.0
-            context_parts.append(
+            context_parts.append(f"""
                """
 Result {i} (Score: {score:.3f}):
 File: {file_path}
 Content: {content[:500]}{'...' if len(content) > 500 else ''}
-"""
+""")
            )
-        # "\n".join(context_parts)  # Unused variable removed
+        context = "\n".join(context_parts)
-        # Get system context for better responses
+        # Create synthesis prompt
-        # get_system_context(project_path)  # Unused variable removed
+        prompt = f"""You are a senior software engineer analyzing code search results. Your task is to synthesize the search results into a helpful, actionable summary.
        # Create synthesis prompt with system context
        prompt = """You are a senior software engineer analyzing code search results. Your task is to synthesize the search results into a helpful, actionable summary.
 SYSTEM CONTEXT: {system_context}
 SEARCH QUERY: "{query}"
 PROJECT: {project_path.name}
@ -868,33 +306,33 @@ Respond with ONLY the JSON, no other text."""
                key_points=[],
                code_examples=[],
                suggested_actions=["Check Ollama status and try again"],
-                confidence=0.0,
+                confidence=0.0
            )
        # Parse JSON response
        try:
            # Extract JSON from response (in case there's extra text)
-            start_idx = response.find("{")
+            start_idx = response.find('{')
-            end_idx = response.rfind("}") + 1
+            end_idx = response.rfind('}') + 1
            if start_idx >= 0 and end_idx > start_idx:
                json_str = response[start_idx:end_idx]
                data = json.loads(json_str)
                return SynthesisResult(
-                    summary=data.get("summary", "No summary generated"),
+                    summary=data.get('summary', 'No summary generated'),
-                    key_points=data.get("key_points", []),
+                    key_points=data.get('key_points', []),
-                    code_examples=data.get("code_examples", []),
+                    code_examples=data.get('code_examples', []),
-                    suggested_actions=data.get("suggested_actions", []),
+                    suggested_actions=data.get('suggested_actions', []),
-                    confidence=float(data.get("confidence", 0.5)),
+                    confidence=float(data.get('confidence', 0.5))
                )
            else:
                # Fallback: use the raw response as summary
                return SynthesisResult(
-                    summary=response[:300] + "..." if len(response) > 300 else response,
+                    summary=response[:300] + '...' if len(response) > 300 else response,
                    key_points=[],
                    code_examples=[],
                    suggested_actions=[],
-                    confidence=0.3,
+                    confidence=0.3
                )
        except Exception as e:
@ -904,7 +342,7 @@ Respond with ONLY the JSON, no other text."""
                key_points=[],
                code_examples=[],
                suggested_actions=["Try the search again or check LLM output"],
-                confidence=0.0,
+                confidence=0.0
            )
    def format_synthesis_output(self, synthesis: SynthesisResult, query: str) -> str:
@ -915,7 +353,7 @@ Respond with ONLY the JSON, no other text."""
        output.append("=" * 50)
        output.append("")
-        output.append("📝 Summary:")
+        output.append(f"📝 Summary:")
        output.append(f"   {synthesis.summary}")
        output.append("")
@ -937,20 +375,13 @@ Respond with ONLY the JSON, no other text."""
                output.append(f"   • {action}")
            output.append("")
-        confidence_emoji = (
+        confidence_emoji = "🟢" if synthesis.confidence > 0.7 else "🟡" if synthesis.confidence > 0.4 else "🔴"
            "🟢"
            if synthesis.confidence > 0.7
            else "🟡" if synthesis.confidence > 0.4 else "🔴"
        )
        output.append(f"{confidence_emoji} Confidence: {synthesis.confidence:.1%}")
        output.append("")
        return "\n".join(output)
 # Quick test function
 def test_synthesizer():
    """Test the synthesizer with sample data."""
    from dataclasses import dataclass
@ -969,24 +400,17 @@ def test_synthesizer():
    # Mock search results
    results = [
-        MockResult(
+        MockResult("auth.py", "def authenticate_user(username, password):\n    return verify_credentials(username, password)", 0.95),
-            "auth.py",
+        MockResult("models.py", "class User:\n    def login(self):\n        return authenticate_user(self.username, self.password)", 0.87)
            "def authenticate_user(username, password):\n    return verify_credentials(username, password)",
            0.95,
        ),
        MockResult(
            "models.py",
            "class User:\n    def login(self):\n        return authenticate_user(self.username, self.password)",
            0.87,
        ),
    ]
    synthesis = synthesizer.synthesize_search_results(
-        "user authentication", results, Path("/test/project")
+        "user authentication", 
        results, 
        Path("/test/project")
    )
    print(synthesizer.format_synthesis_output(synthesis, "user authentication"))
 if __name__ == "__main__":
    test_synthesizer()
--- a/mini_rag/non_invasive_watcher.py
+++ b/mini_rag/non_invasive_watcher.py
@ -3,16 +3,16 @@ Non-invasive file watcher designed to not interfere with development workflows.
 Uses minimal resources and gracefully handles high-load scenarios.
 """
-import logging
+import os
 import queue
 import threading
 import time
-from datetime import datetime
+import logging
 import threading
 import queue
 from pathlib import Path
 from typing import Optional, Set
-
+from datetime import datetime
 from watchdog.events import DirModifiedEvent, FileSystemEventHandler
 from watchdog.observers import Observer
 from watchdog.events import FileSystemEventHandler, DirModifiedEvent
 from .indexer import ProjectIndexer
@ -74,12 +74,10 @@ class NonInvasiveQueue:
 class MinimalEventHandler(FileSystemEventHandler):
    """Minimal event handler that only watches for meaningful changes."""
-    def __init__(
+    def __init__(self, 
        self,
                 update_queue: NonInvasiveQueue,
                 include_patterns: Set[str],
-        exclude_patterns: Set[str],
+                 exclude_patterns: Set[str]):
    ):
        self.update_queue = update_queue
        self.include_patterns = include_patterns
        self.exclude_patterns = exclude_patterns
@ -102,13 +100,11 @@ class MinimalEventHandler(FileSystemEventHandler):
        # Skip temporary and system files
        name = path.name
-        if (
+        if (name.startswith('.') or 
-            name.startswith(".")
+            name.startswith('~') or 
-            or name.startswith("~")
+            name.endswith('.tmp') or
-            or name.endswith(".tmp")
+            name.endswith('.swp') or
-            or name.endswith(".swp")
+            name.endswith('.lock')):
            or name.endswith(".lock")
        ):
            return False
        # Check exclude patterns first (faster)
@ -128,9 +124,7 @@ class MinimalEventHandler(FileSystemEventHandler):
        """Rate limit events per file."""
        current_time = time.time()
        if file_path in self.last_event_time:
-            if (
+            if current_time - self.last_event_time[file_path] < 2.0:  # 2 second cooldown per file
                current_time - self.last_event_time[file_path] < 2.0
            ):  # 2 second cooldown per file
                return False
        self.last_event_time[file_path] = current_time
@ -138,20 +132,16 @@ class MinimalEventHandler(FileSystemEventHandler):
    def on_modified(self, event):
        """Handle file modifications with minimal overhead."""
-        if (
+        if (not event.is_directory and 
-            not event.is_directory
+            self._should_process(event.src_path) and
-            and self._should_process(event.src_path)
+            self._rate_limit_event(event.src_path)):
            and self._rate_limit_event(event.src_path)
        ):
            self.update_queue.add(Path(event.src_path))
    def on_created(self, event):
        """Handle file creation."""
-        if (
+        if (not event.is_directory and 
-            not event.is_directory
+            self._should_process(event.src_path) and
-            and self._should_process(event.src_path)
+            self._rate_limit_event(event.src_path)):
            and self._rate_limit_event(event.src_path)
        ):
            self.update_queue.add(Path(event.src_path))
    def on_deleted(self, event):
@ -168,13 +158,11 @@ class MinimalEventHandler(FileSystemEventHandler):
 class NonInvasiveFileWatcher:
    """Non-invasive file watcher that prioritizes system stability."""
-    def __init__(
+    def __init__(self, 
        self,
                 project_path: Path,
                 indexer: Optional[ProjectIndexer] = None,
                 cpu_limit: float = 0.1,  # Max 10% CPU usage
-        max_memory_mb: int = 50,
+                 max_memory_mb: int = 50):  # Max 50MB memory
    ):  # Max 50MB memory
        """
        Initialize non-invasive watcher.
@ -190,9 +178,7 @@ class NonInvasiveFileWatcher:
        self.max_memory_mb = max_memory_mb
        # Initialize components with conservative settings
-        self.update_queue = NonInvasiveQueue(
+        self.update_queue = NonInvasiveQueue(delay=10.0, max_queue_size=50)  # Very conservative
            delay=10.0, max_queue_size=50
        )  # Very conservative
        self.observer = Observer()
        self.worker_thread = None
        self.running = False
@ -202,38 +188,19 @@ class NonInvasiveFileWatcher:
        self.exclude_patterns = set(self.indexer.exclude_patterns)
        # Add more aggressive exclusions
-        self.exclude_patterns.update(
+        self.exclude_patterns.update({
-            {
+            '__pycache__', '.git', 'node_modules', '.venv', 'venv',
-                "__pycache__",
+            'dist', 'build', 'target', '.idea', '.vscode', '.pytest_cache',
-                ".git",
+            'coverage', 'htmlcov', '.coverage', '.mypy_cache', '.tox',
-                "node_modules",
+            'logs', 'log', 'tmp', 'temp', '.DS_Store'
-                ".venv",
+        })
                "venv",
                "dist",
                "build",
                "target",
                ".idea",
                ".vscode",
                ".pytest_cache",
                "coverage",
                "htmlcov",
                ".coverage",
                ".mypy_cache",
                ".tox",
                "logs",
                "log",
                "tmp",
                "temp",
                ".DS_Store",
            }
        )
        # Stats
        self.stats = {
-            "files_processed": 0,
+            'files_processed': 0,
-            "files_dropped": 0,
+            'files_dropped': 0,
-            "cpu_throttle_count": 0,
+            'cpu_throttle_count': 0,
-            "started_at": None,
+            'started_at': None,
        }
    def start(self):
@ -245,16 +212,24 @@ class NonInvasiveFileWatcher:
        # Set up minimal event handler
        event_handler = MinimalEventHandler(
-            self.update_queue, self.include_patterns, self.exclude_patterns
+            self.update_queue,
            self.include_patterns,
            self.exclude_patterns
        )
        # Schedule with recursive watching
-        self.observer.schedule(event_handler, str(self.project_path), recursive=True)
+        self.observer.schedule(
            event_handler,
            str(self.project_path),
            recursive=True
        )
        # Start low-priority worker thread
        self.running = True
        self.worker_thread = threading.Thread(
-            target=self._process_updates_gently, daemon=True, name="RAG-FileWatcher"
+            target=self._process_updates_gently,
            daemon=True,
            name="RAG-FileWatcher"
        )
        # Set lowest priority
        self.worker_thread.start()
@ -262,7 +237,7 @@ class NonInvasiveFileWatcher:
        # Start observer
        self.observer.start()
-        self.stats["started_at"] = datetime.now()
+        self.stats['started_at'] = datetime.now()
        logger.info("Non-invasive file watcher started")
    def stop(self):
@ -307,7 +282,7 @@ class NonInvasiveFileWatcher:
                        # If we're consuming too much time, throttle aggressively
                        work_ratio = 0.1  # Assume we use 10% of time in this check
                        if work_ratio > self.cpu_limit:
-                            self.stats["cpu_throttle_count"] += 1
+                            self.stats['cpu_throttle_count'] += 1
                            time.sleep(2.0)  # Back off significantly
                            continue
@ -319,20 +294,18 @@ class NonInvasiveFileWatcher:
                            success = self.indexer.delete_file(file_path)
                        if success:
-                            self.stats["files_processed"] += 1
+                            self.stats['files_processed'] += 1
                        # Always yield CPU after processing
                        time.sleep(0.1)
                    except Exception as e:
-                        logger.debug(
+                        logger.debug(f"Non-invasive watcher: failed to process {file_path}: {e}")
                            f"Non-invasive watcher: failed to process {file_path}: {e}"
                        )
                        # Don't let errors propagate - just continue
                        continue
                # Update dropped count from queue
-                self.stats["files_dropped"] = self.update_queue.dropped_count
+                self.stats['files_dropped'] = self.update_queue.dropped_count
            except Exception as e:
                logger.debug(f"Non-invasive watcher error: {e}")
@ -343,12 +316,12 @@ class NonInvasiveFileWatcher:
    def get_statistics(self) -> dict:
        """Get non-invasive watcher statistics."""
        stats = self.stats.copy()
-        stats["queue_size"] = self.update_queue.queue.qsize()
+        stats['queue_size'] = self.update_queue.queue.qsize()
-        stats["running"] = self.running
+        stats['running'] = self.running
-        if stats["started_at"]:
+        if stats['started_at']:
-            uptime = datetime.now() - stats["started_at"]
+            uptime = datetime.now() - stats['started_at']
-            stats["uptime_seconds"] = uptime.total_seconds()
+            stats['uptime_seconds'] = uptime.total_seconds()
        return stats
--- a/mini_rag/ollama_embeddings.py
+++ b/mini_rag/ollama_embeddings.py
@ -3,14 +3,15 @@ Hybrid code embedding module - Ollama primary with ML fallback.
 Tries Ollama first, falls back to local ML stack if needed.
 """
 import logging
 import time
 from concurrent.futures import ThreadPoolExecutor
 from functools import lru_cache
 from typing import Any, Dict, List, Optional, Union
 import numpy as np
 import requests
 import numpy as np
 from typing import List, Union, Optional, Dict, Any
 import logging
 from functools import lru_cache
 import time
 import json
 from concurrent.futures import ThreadPoolExecutor
 import threading
 logger = logging.getLogger(__name__)
@ -18,9 +19,8 @@ logger = logging.getLogger(__name__)
 FALLBACK_AVAILABLE = False
 try:
    import torch
    from transformers import AutoTokenizer, AutoModel
    from sentence_transformers import SentenceTransformer
    from transformers import AutoModel, AutoTokenizer
    FALLBACK_AVAILABLE = True
    logger.debug("ML fallback dependencies available")
 except ImportError:
@ -30,12 +30,8 @@ except ImportError:
 class OllamaEmbedder:
    """Hybrid embeddings: Ollama primary with ML fallback."""
-    def __init__(
+    def __init__(self, model_name: str = "nomic-embed-text:latest", base_url: str = "http://localhost:11434", 
-        self,
+                 enable_fallback: bool = True):
        model_name: str = "nomic-embed-text:latest",
        base_url: str = "http://localhost:11434",
        enable_fallback: bool = True,
    ):
        """
        Initialize the hybrid embedder.
@ -74,9 +70,7 @@ class OllamaEmbedder:
                try:
                    self._initialize_fallback_embedder()
                    self.mode = "fallback"
-                    logger.info(
+                    logger.info(f"✅ ML fallback active: {self.fallback_embedder.model_type if hasattr(self.fallback_embedder, 'model_type') else 'transformer'}")
                        f"✅ ML fallback active: {self.fallback_embedder.model_type if hasattr(self.fallback_embedder, 'model_type') else 'transformer'}"
                    )
                except Exception as fallback_error:
                    logger.warning(f"ML fallback failed: {fallback_error}")
                    self.mode = "hash"
@ -87,36 +81,16 @@ class OllamaEmbedder:
    def _verify_ollama_connection(self):
        """Verify Ollama server is running and model is available."""
        try:
        # Check server status
        response = requests.get(f"{self.base_url}/api/tags", timeout=5)
        response.raise_for_status()
        except requests.exceptions.ConnectionError:
            print("🔌 Ollama Service Unavailable")
            print("   Ollama provides AI embeddings that make semantic search possible")
            print("   Start Ollama: ollama serve")
            print("   Install models: ollama pull nomic-embed-text")
            print()
            raise ConnectionError("Ollama service not running. Start with: ollama serve")
        except requests.exceptions.Timeout:
            print("⏱️ Ollama Service Timeout")
            print("   Ollama is taking too long to respond")
            print("   Check if Ollama is overloaded: ollama ps")
            print("   Restart if needed: killall ollama && ollama serve")
            print()
            raise ConnectionError("Ollama service timeout")
        # Check if our model is available
-        models = response.json().get("models", [])
+        models = response.json().get('models', [])
-        model_names = [model["name"] for model in models]
+        model_names = [model['name'] for model in models]
        if self.model_name not in model_names:
-            print(f"📦 Model '{self.model_name}' Not Found")
+            logger.warning(f"Model {self.model_name} not found. Available: {model_names}")
            print("   Embedding models convert text into searchable vectors")
            print(f"   Download model: ollama pull {self.model_name}")
            if model_names:
                print(f"   Available models: {', '.join(model_names[:3])}")
            print()
            # Try to pull the model
            self._pull_model()
@ -127,11 +101,7 @@ class OllamaEmbedder:
        # Try lightweight models first for better compatibility
        fallback_models = [
-            (
+            ("sentence-transformers/all-MiniLM-L6-v2", 384, self._init_sentence_transformer),
                "sentence-transformers/all-MiniLM-L6-v2",
                384,
                self._init_sentence_transformer,
            ),
            ("microsoft/codebert-base", 768, self._init_transformer_model),
            ("microsoft/unixcoder-base", 768, self._init_transformer_model),
        ]
@ -151,24 +121,22 @@ class OllamaEmbedder:
    def _init_sentence_transformer(self, model_name: str):
        """Initialize sentence-transformers model."""
        self.fallback_embedder = SentenceTransformer(model_name)
-        self.fallback_embedder.model_type = "sentence_transformer"
+        self.fallback_embedder.model_type = 'sentence_transformer'
    def _init_transformer_model(self, model_name: str):
        """Initialize transformer model."""
-        device = "cuda" if torch.cuda.is_available() else "cpu"
+        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModel.from_pretrained(model_name).to(device)
        model.eval()
        # Create a simple wrapper
        class TransformerWrapper:
            def __init__(self, model, tokenizer, device):
                self.model = model
                self.tokenizer = tokenizer
                self.device = device
-                self.model_type = "transformer"
+                self.model_type = 'transformer'
        self.fallback_embedder = TransformerWrapper(model, tokenizer, device)
@ -179,7 +147,7 @@ class OllamaEmbedder:
            response = requests.post(
                f"{self.base_url}/api/pull",
                json={"name": self.model_name},
-                timeout=300,  # 5 minutes for model download
+                timeout=300  # 5 minutes for model download
            )
            response.raise_for_status()
            logger.info(f"Successfully pulled {self.model_name}")
@ -201,13 +169,16 @@ class OllamaEmbedder:
        try:
            response = requests.post(
                f"{self.base_url}/api/embeddings",
-                json={"model": self.model_name, "prompt": text},
+                json={
-                timeout=30,
+                    "model": self.model_name,
                    "prompt": text
                },
                timeout=30
            )
            response.raise_for_status()
            result = response.json()
-            embedding = result.get("embedding", [])
+            embedding = result.get('embedding', [])
            if not embedding:
                raise ValueError("No embedding returned from Ollama")
@ -229,37 +200,33 @@ class OllamaEmbedder:
    def _get_fallback_embedding(self, text: str) -> np.ndarray:
        """Get embedding from ML fallback."""
        try:
-            if self.fallback_embedder.model_type == "sentence_transformer":
+            if self.fallback_embedder.model_type == 'sentence_transformer':
                embedding = self.fallback_embedder.encode([text], convert_to_numpy=True)[0]
                return embedding.astype(np.float32)
-            elif self.fallback_embedder.model_type == "transformer":
+            elif self.fallback_embedder.model_type == 'transformer':
                # Tokenize and generate embedding
                inputs = self.fallback_embedder.tokenizer(
                    text, 
                    padding=True, 
                    truncation=True, 
                    max_length=512,
-                    return_tensors="pt",
+                    return_tensors="pt"
                ).to(self.fallback_embedder.device)
                with torch.no_grad():
                    outputs = self.fallback_embedder.model(**inputs)
                    # Use pooler output if available, otherwise mean pooling
-                    if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
+                    if hasattr(outputs, 'pooler_output') and outputs.pooler_output is not None:
                        embedding = outputs.pooler_output[0]
                    else:
                        # Mean pooling over sequence length
-                        attention_mask = inputs["attention_mask"]
+                        attention_mask = inputs['attention_mask']
                        token_embeddings = outputs.last_hidden_state[0]
                        # Mask and average
-                        input_mask_expanded = (
+                        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
                            attention_mask.unsqueeze(-1)
                            .expand(token_embeddings.size())
                            .float()
                        )
                        sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 0)
                        sum_mask = torch.clamp(input_mask_expanded.sum(0), min=1e-9)
                        embedding = sum_embeddings / sum_mask
@ -267,9 +234,7 @@ class OllamaEmbedder:
                return embedding.cpu().numpy().astype(np.float32)
            else:
-                raise ValueError(
+                raise ValueError(f"Unknown fallback model type: {self.fallback_embedder.model_type}")
                    f"Unknown fallback model type: {self.fallback_embedder.model_type}"
                )
        except Exception as e:
            logger.error(f"Fallback embedding failed: {e}")
@ -280,7 +245,7 @@ class OllamaEmbedder:
        import hashlib
        # Create deterministic hash
-        hash_obj = hashlib.sha256(text.encode("utf-8"))
+        hash_obj = hashlib.sha256(text.encode('utf-8'))
        hash_bytes = hash_obj.digest()
        # Convert to numbers and normalize
@ -291,7 +256,7 @@ class OllamaEmbedder:
            hash_nums = np.concatenate([hash_nums, hash_nums])
        # Take exactly the dimension we need
-        embedding = hash_nums[: self.embedding_dim].astype(np.float32)
+        embedding = hash_nums[:self.embedding_dim].astype(np.float32)
        # Normalize to [-1, 1] range
        embedding = (embedding / 127.5) - 1.0
@ -340,7 +305,7 @@ class OllamaEmbedder:
        code = code.strip()
        # Normalize whitespace but preserve structure
-        lines = code.split("\n")
+        lines = code.split('\n')
        processed_lines = []
        for line in lines:
@ -350,7 +315,7 @@ class OllamaEmbedder:
            if line:
                processed_lines.append(line)
-        cleaned_code = "\n".join(processed_lines)
+        cleaned_code = '\n'.join(processed_lines)
        # Add language context for better embeddings
        if language and cleaned_code:
@ -395,36 +360,33 @@ class OllamaEmbedder:
        """Sequential processing for small batches."""
        results = []
        for file_dict in file_contents:
-            content = file_dict["content"]
+            content = file_dict['content']
-            language = file_dict.get("language", "python")
+            language = file_dict.get('language', 'python')
            embedding = self.embed_code(content, language)
            result = file_dict.copy()
-            result["embedding"] = embedding
+            result['embedding'] = embedding
            results.append(result)
        return results
-    def _batch_embed_concurrent(
+    def _batch_embed_concurrent(self, file_contents: List[dict], max_workers: int) -> List[dict]:
        self, file_contents: List[dict], max_workers: int
    ) -> List[dict]:
        """Concurrent processing for larger batches."""
        def embed_single(item_with_index):
            index, file_dict = item_with_index
-            content = file_dict["content"]
+            content = file_dict['content']
-            language = file_dict.get("language", "python")
+            language = file_dict.get('language', 'python')
            try:
                embedding = self.embed_code(content, language)
                result = file_dict.copy()
-                result["embedding"] = embedding
+                result['embedding'] = embedding
                return index, result
            except Exception as e:
                logger.error(f"Failed to embed content at index {index}: {e}")
                # Return with hash fallback
                result = file_dict.copy()
-                result["embedding"] = self._hash_embedding(content)
+                result['embedding'] = self._hash_embedding(content)
                return index, result
        # Create indexed items to preserve order
@ -438,9 +400,7 @@ class OllamaEmbedder:
        indexed_results.sort(key=lambda x: x[0])
        return [result for _, result in indexed_results]
-    def _batch_embed_chunked(
+    def _batch_embed_chunked(self, file_contents: List[dict], max_workers: int, chunk_size: int = 200) -> List[dict]:
        self, file_contents: List[dict], max_workers: int, chunk_size: int = 200
    ) -> List[dict]:
        """
        Process very large batches in smaller chunks to prevent memory issues.
        This is important for beginners who might try to index huge projects.
@ -450,15 +410,13 @@ class OllamaEmbedder:
        # Process in chunks
        for i in range(0, len(file_contents), chunk_size):
-            chunk = file_contents[i : i + chunk_size]
+            chunk = file_contents[i:i + chunk_size]
            # Log progress for large operations
            if total_chunks > chunk_size:
                chunk_num = i // chunk_size + 1
                total_chunk_count = (total_chunks + chunk_size - 1) // chunk_size
-                logger.info(
+                logger.info(f"Processing chunk {chunk_num}/{total_chunk_count} ({len(chunk)} files)")
                    f"Processing chunk {chunk_num}/{total_chunk_count} ({len(chunk)} files)"
                )
            # Process this chunk using concurrent method
            chunk_results = self._batch_embed_concurrent(chunk, max_workers)
@ -466,7 +424,7 @@ class OllamaEmbedder:
            # Brief pause between chunks to prevent overwhelming the system
            if i + chunk_size < len(file_contents):
-
+                import time
                time.sleep(0.1)  # 100ms pause between chunks
        return results
@ -485,32 +443,12 @@ class OllamaEmbedder:
            "mode": self.mode,
            "ollama_available": self.ollama_available,
            "fallback_available": FALLBACK_AVAILABLE and self.enable_fallback,
-            "fallback_model": (
+            "fallback_model": getattr(self.fallback_embedder, 'model_type', None) if self.fallback_embedder else None,
                getattr(self.fallback_embedder, "model_type", None)
                if self.fallback_embedder
                else None
            ),
            "embedding_dim": self.embedding_dim,
            "ollama_model": self.model_name if self.mode == "ollama" else None,
-            "ollama_url": self.base_url if self.mode == "ollama" else None,
+            "ollama_url": self.base_url if self.mode == "ollama" else None
        }
    def get_embedding_info(self) -> Dict[str, str]:
        """Get human-readable embedding system information for installer."""
        status = self.get_status()
        mode = status.get("mode", "unknown")
        if mode == "ollama":
            return {"method": f"Ollama ({status['ollama_model']})", "status": "working"}
        # Treat legacy/alternate naming uniformly
        if mode in ("fallback", "ml"):
            return {
                "method": f"ML Fallback ({status['fallback_model']})",
                "status": "working",
            }
        if mode == "hash":
            return {"method": "Hash-based (basic similarity)", "status": "working"}
        return {"method": "Unknown", "status": "error"}
    def warmup(self):
        """Warm up the embedding system with a dummy request."""
        dummy_code = "def hello(): pass"
@ -520,11 +458,7 @@ class OllamaEmbedder:
 # Convenience function for quick embedding
-
+def embed_code(code: Union[str, List[str]], model_name: str = "nomic-embed-text:latest") -> np.ndarray:
 def embed_code(
    code: Union[str, List[str]], model_name: str = "nomic-embed-text:latest"
 ) -> np.ndarray:
    """
    Quick function to embed code without managing embedder instance.
--- a/mini_rag/path_handler.py
+++ b/mini_rag/path_handler.py
@ -4,9 +4,10 @@ Handles forward/backward slashes on any file system.
 Robust cross-platform path handling.
 """
 import os
 import sys
 from pathlib import Path
-from typing import List, Union
+from typing import Union, List
 def normalize_path(path: Union[str, Path]) -> str:
@ -24,10 +25,10 @@ def normalize_path(path: Union[str, Path]) -> str:
    path_obj = Path(path)
    # Convert to string and replace backslashes
-    path_str = str(path_obj).replace("\\", "/")
+    path_str = str(path_obj).replace('\\', '/')
    # Handle UNC paths on Windows
-    if sys.platform == "win32" and path_str.startswith("//"):
+    if sys.platform == 'win32' and path_str.startswith('//'):
        # Keep UNC paths as they are
        return path_str
@ -119,7 +120,7 @@ def ensure_forward_slashes(path_str: str) -> str:
    Returns:
        Path with forward slashes
    """
-    return path_str.replace("\\", "/")
+    return path_str.replace('\\', '/')
 def ensure_native_slashes(path_str: str) -> str:
@ -136,8 +137,6 @@ def ensure_native_slashes(path_str: str) -> str:
 # Convenience functions for common operations
 def storage_path(path: Union[str, Path]) -> str:
    """Convert path to storage format (forward slashes)."""
    return normalize_path(path)
--- a/mini_rag/performance.py
+++ b/mini_rag/performance.py
@ -3,13 +3,12 @@ Performance monitoring for RAG system.
 Track loading times, query times, and resource usage.
 """
 import logging
 import os
 import time
 from contextlib import contextmanager
 from typing import Any, Dict, Optional
 import psutil
 import os
 from contextlib import contextmanager
 from typing import Dict, Any, Optional
 import logging
 logger = logging.getLogger(__name__)
@ -40,9 +39,9 @@ class PerformanceMonitor:
            # Store metrics
            self.metrics[operation] = {
-                "duration_seconds": duration,
+                'duration_seconds': duration,
-                "memory_delta_mb": memory_delta,
+                'memory_delta_mb': memory_delta,
-                "final_memory_mb": end_memory,
+                'final_memory_mb': end_memory,
            }
            logger.info(
@ -52,19 +51,19 @@ class PerformanceMonitor:
    def get_summary(self) -> Dict[str, Any]:
        """Get performance summary."""
-        total_time = sum(m["duration_seconds"] for m in self.metrics.values())
+        total_time = sum(m['duration_seconds'] for m in self.metrics.values())
        return {
-            "total_time_seconds": total_time,
+            'total_time_seconds': total_time,
-            "operations": self.metrics,
+            'operations': self.metrics,
-            "current_memory_mb": self.process.memory_info().rss / 1024 / 1024,
+            'current_memory_mb': self.process.memory_info().rss / 1024 / 1024,
        }
    def print_summary(self):
        """Print a formatted summary."""
-        print("\n" + "=" * 50)
+        print("\n" + "="*50)
        print("PERFORMANCE SUMMARY")
-        print("=" * 50)
+        print("="*50)
        for op, metrics in self.metrics.items():
            print(f"\n{op}:")
@ -74,13 +73,12 @@ class PerformanceMonitor:
        summary = self.get_summary()
        print(f"\nTotal Time: {summary['total_time_seconds']:.2f}s")
        print(f"Current Memory: {summary['current_memory_mb']:.1f}MB")
-        print("=" * 50)
+        print("="*50)
 # Global instance for easy access
 _monitor = None
 def get_monitor() -> PerformanceMonitor:
    """Get or create global monitor instance."""
    global _monitor
--- a/mini_rag/query_expander.py
+++ b/mini_rag/query_expander.py
@ -33,15 +33,12 @@ disable in CLI for maximum speed.
 import logging
 import re
 import threading
-from typing import Optional
+from typing import List, Optional
 import requests
 from .config import RAGConfig
 logger = logging.getLogger(__name__)
 class QueryExpander:
    """Expands search queries using LLM to improve search recall."""
@ -62,8 +59,23 @@ class QueryExpander:
        if self._initialized:
            return
-        # Skip warmup - causes startup delays and unwanted model calls
+        # Warm up LLM if enabled and available
-        # Query expansion works fine on first use without warmup
+        if self.enabled:
            try:
                model = self._select_expansion_model()
                if model:
                    requests.post(
                        f"{self.ollama_url}/api/generate",
                        json={
                            "model": model,
                            "prompt": "testing, just say 'hi' <no_think>",
                            "stream": False,
                            "options": {"temperature": 0.1, "max_tokens": 5}
                        },
                        timeout=5
                    )
            except:
                pass  # Warmup failure is non-critical
        self._initialized = True
@ -110,7 +122,7 @@ class QueryExpander:
            return None
        # Create expansion prompt
-        prompt = """You are a search query expert. Expand the following search query with {self.max_terms} additional related terms that would help find relevant content.
+        prompt = f"""You are a search query expert. Expand the following search query with {self.max_terms} additional related terms that would help find relevant content.
 Original query: "{query}"
@ -137,18 +149,18 @@ Expanded query:"""
                "options": {
                    "temperature": 0.1,  # Very low temperature for consistent expansions
                    "top_p": 0.8,
-                    "max_tokens": 100,  # Keep it short
+                    "max_tokens": 100    # Keep it short
-                },
+                }
            }
            response = requests.post(
                f"{self.ollama_url}/api/generate",
                json=payload,
-                timeout=10,  # Quick timeout for low latency
+                timeout=10  # Quick timeout for low latency
            )
            if response.status_code == 200:
-                result = response.json().get("response", "").strip()
+                result = response.json().get('response', '').strip()
                # Clean up the response - extract just the expanded query
                expanded = self._clean_expansion(result, query)
@ -169,16 +181,12 @@ Expanded query:"""
            response = requests.get(f"{self.ollama_url}/api/tags", timeout=5)
            if response.status_code == 200:
                data = response.json()
-                available = [model["name"] for model in data.get("models", [])]
+                available = [model['name'] for model in data.get('models', [])]
-                # Use same model rankings as main synthesizer for consistency
+                # Prefer ultra-fast, efficient models for query expansion (CPU-friendly)
                expansion_preferences = [
-                    "qwen3:1.7b",
+                    "qwen3:0.6b", "qwen3:1.7b", "qwen2.5:1.5b", 
-                    "qwen3:0.6b",
+                    "llama3.2:1b", "gemma2:2b", "llama3.2:3b"
                    "qwen3:4b",
                    "qwen2.5:3b",
                    "qwen2.5:1.5b",
                    "qwen2.5-coder:1.5b",
                ]
                for preferred in expansion_preferences:
@ -207,11 +215,11 @@ Expanded query:"""
            clean_response = clean_response[1:-1]
        # Take only the first line if multiline
-        clean_response = clean_response.split("\n")[0].strip()
+        clean_response = clean_response.split('\n')[0].strip()
        # Remove excessive punctuation and normalize spaces
-        clean_response = re.sub(r"[^\w\s-]", " ", clean_response)
+        clean_response = re.sub(r'[^\w\s-]', ' ', clean_response)
-        clean_response = re.sub(r"\s+", " ", clean_response).strip()
+        clean_response = re.sub(r'\s+', ' ', clean_response).strip()
        # Ensure it starts with the original query
        if not clean_response.lower().startswith(original_query.lower()):
@ -220,8 +228,8 @@ Expanded query:"""
        # Limit the total length to avoid very long queries
        words = clean_response.split()
        if len(words) > len(original_query.split()) + self.max_terms:
-            words = words[: len(original_query.split()) + self.max_terms]
+            words = words[:len(original_query.split()) + self.max_terms]
-            clean_response = " ".join(words)
+            clean_response = ' '.join(words)
        return clean_response
@ -249,13 +257,10 @@ Expanded query:"""
        try:
            response = requests.get(f"{self.ollama_url}/api/tags", timeout=5)
            return response.status_code == 200
-        except (ConnectionError, TimeoutError, requests.RequestException):
+        except:
            return False
 # Quick test function
 def test_expansion():
    """Test the query expander."""
    from .config import RAGConfig
@ -274,7 +279,7 @@ def test_expansion():
        "authentication",
        "error handling", 
        "database query",
-        "user interface",
+        "user interface"
    ]
    print("🔍 Testing Query Expansion:")
@ -282,6 +287,5 @@ def test_expansion():
        expanded = expander.expand_query(query)
        print(f"  '{query}' → '{expanded}'")
 if __name__ == "__main__":
    test_expansion()
--- a/mini_rag/search.py
+++ b/mini_rag/search.py
@ -4,33 +4,22 @@ Optimized for code search with relevance scoring.
 """
 import logging
 from collections import defaultdict
 from datetime import datetime
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
+from typing import List, Dict, Any, Optional, Tuple
 import numpy as np
 import pandas as pd
-from rank_bm25 import BM25Okapi
+import lancedb
 from rich.console import Console
 from rich.syntax import Syntax
 from rich.table import Table
 from rich.syntax import Syntax
 from rank_bm25 import BM25Okapi
 from collections import defaultdict
 # Optional LanceDB import
 try:
    import lancedb
    LANCEDB_AVAILABLE = True
 except ImportError:
    lancedb = None
    LANCEDB_AVAILABLE = False
 from datetime import timedelta
 from .config import ConfigManager
 from .ollama_embeddings import OllamaEmbedder as CodeEmbedder
 from .path_handler import display_path
 from .query_expander import QueryExpander
 from .config import ConfigManager
 from datetime import datetime, timedelta
 logger = logging.getLogger(__name__)
 console = Console()
@ -39,8 +28,7 @@ console = Console()
 class SearchResult:
    """Represents a single search result."""
-    def __init__(
+    def __init__(self, 
        self,
                 file_path: str,
                 content: str,
                 score: float,
@ -51,8 +39,7 @@ class SearchResult:
                 language: str,
                 context_before: Optional[str] = None,
                 context_after: Optional[str] = None,
-        parent_chunk: Optional["SearchResult"] = None,
+                 parent_chunk: Optional['SearchResult'] = None):
    ):
        self.file_path = file_path
        self.content = content
        self.score = score
@ -71,17 +58,17 @@ class SearchResult:
    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary."""
        return {
-            "file_path": self.file_path,
+            'file_path': self.file_path,
-            "content": self.content,
+            'content': self.content,
-            "score": self.score,
+            'score': self.score,
-            "start_line": self.start_line,
+            'start_line': self.start_line,
-            "end_line": self.end_line,
+            'end_line': self.end_line,
-            "chunk_type": self.chunk_type,
+            'chunk_type': self.chunk_type,
-            "name": self.name,
+            'name': self.name,
-            "language": self.language,
+            'language': self.language,
-            "context_before": self.context_before,
+            'context_before': self.context_before,
-            "context_after": self.context_after,
+            'context_after': self.context_after,
-            "parent_chunk": self.parent_chunk.to_dict() if self.parent_chunk else None,
+            'parent_chunk': self.parent_chunk.to_dict() if self.parent_chunk else None,
        }
    def format_for_display(self, max_lines: int = 10) -> str:
@ -90,15 +77,17 @@ class SearchResult:
        if len(lines) > max_lines:
            # Show first and last few lines
            half = max_lines // 2
-            lines = lines[:half] + ["..."] + lines[-half:]
+            lines = lines[:half] + ['...'] + lines[-half:]
-        return "\n".join(lines)
+        return '\n'.join(lines)
 class CodeSearcher:
    """Semantic code search using vector similarity."""
-    def __init__(self, project_path: Path, embedder: Optional[CodeEmbedder] = None):
+    def __init__(self, 
                 project_path: Path,
                 embedder: Optional[CodeEmbedder] = None):
        """
        Initialize searcher.
@ -107,7 +96,7 @@ class CodeSearcher:
            embedder: CodeEmbedder instance (creates one if not provided)
        """
        self.project_path = Path(project_path).resolve()
-        self.rag_dir = self.project_path / ".mini-rag"
+        self.rag_dir = self.project_path / '.mini-rag'
        self.embedder = embedder or CodeEmbedder()
        # Load configuration and initialize query expander
@ -126,35 +115,13 @@ class CodeSearcher:
    def _connect(self):
        """Connect to the LanceDB database."""
        if not LANCEDB_AVAILABLE:
            print("❌ LanceDB Not Available")
            print("   LanceDB is required for search functionality")
            print("   Install it with: pip install lancedb pyarrow")
            print("   For basic Ollama functionality, use hash-based search instead")
            print()
            raise ImportError(
                "LanceDB dependency is required for search. Install with: pip install lancedb pyarrow"
            )
        try:
            if not self.rag_dir.exists():
                print("🗃️ No Search Index Found")
                print("   An index is a database that makes your files searchable")
                print(f"   Create index: ./rag-mini index {self.project_path}")
                print("   (This analyzes your files and creates semantic search vectors)")
                print()
                raise FileNotFoundError(f"No RAG index found at {self.rag_dir}")
            self.db = lancedb.connect(self.rag_dir)
            if "code_vectors" not in self.db.table_names():
                print("🔧 Index Database Corrupted")
                print("   The search index exists but is missing data tables")
                print(
                    f"   Rebuild index: rm -rf {self.rag_dir} && ./rag-mini index {self.project_path}"
                )
                print("   (This will recreate the search database)")
                print()
                raise ValueError("No code_vectors table found. Run indexing first.")
            self.table = self.db.open_table("code_vectors")
@ -194,9 +161,7 @@ class CodeSearcher:
            logger.error(f"Failed to build BM25 index: {e}")
            self.bm25 = None
-    def get_chunk_context(
+    def get_chunk_context(self, chunk_id: str, include_adjacent: bool = True, include_parent: bool = True) -> Dict[str, Any]:
        self, chunk_id: str, include_adjacent: bool = True, include_parent: bool = True
    ) -> Dict[str, Any]:
        """
        Get context for a specific chunk including adjacent and parent chunks.
@ -214,81 +179,72 @@ class CodeSearcher:
        try:
            # Get the main chunk by ID
            df = self.table.to_pandas()
-            chunk_rows = df[df["chunk_id"] == chunk_id]
+            chunk_rows = df[df['chunk_id'] == chunk_id]
            if chunk_rows.empty:
-                return {"chunk": None, "prev": None, "next": None, "parent": None}
+                return {'chunk': None, 'prev': None, 'next': None, 'parent': None}
            chunk_row = chunk_rows.iloc[0]
-            context = {"chunk": self._row_to_search_result(chunk_row, score=1.0)}
+            context = {'chunk': self._row_to_search_result(chunk_row, score=1.0)}
            # Get adjacent chunks if requested
            if include_adjacent:
                # Get previous chunk
-                if pd.notna(chunk_row.get("prev_chunk_id")):
+                if pd.notna(chunk_row.get('prev_chunk_id')):
-                    prev_rows = df[df["chunk_id"] == chunk_row["prev_chunk_id"]]
+                    prev_rows = df[df['chunk_id'] == chunk_row['prev_chunk_id']]
                    if not prev_rows.empty:
-                        context["prev"] = self._row_to_search_result(
+                        context['prev'] = self._row_to_search_result(prev_rows.iloc[0], score=1.0)
                            prev_rows.iloc[0], score=1.0
                        )
                    else:
-                        context["prev"] = None
+                        context['prev'] = None
                else:
-                    context["prev"] = None
+                    context['prev'] = None
                # Get next chunk
-                if pd.notna(chunk_row.get("next_chunk_id")):
+                if pd.notna(chunk_row.get('next_chunk_id')):
-                    next_rows = df[df["chunk_id"] == chunk_row["next_chunk_id"]]
+                    next_rows = df[df['chunk_id'] == chunk_row['next_chunk_id']]
                    if not next_rows.empty:
-                        context["next"] = self._row_to_search_result(
+                        context['next'] = self._row_to_search_result(next_rows.iloc[0], score=1.0)
                            next_rows.iloc[0], score=1.0
                        )
                    else:
-                        context["next"] = None
+                        context['next'] = None
                else:
-                    context["next"] = None
+                    context['next'] = None
            else:
-                context["prev"] = None
+                context['prev'] = None
-                context["next"] = None
+                context['next'] = None
            # Get parent class chunk if requested and applicable
-            if include_parent and pd.notna(chunk_row.get("parent_class")):
+            if include_parent and pd.notna(chunk_row.get('parent_class')):
                # Find the parent class chunk
-                parent_rows = df[
+                parent_rows = df[(df['name'] == chunk_row['parent_class']) & 
-                    (df["name"] == chunk_row["parent_class"])
+                               (df['chunk_type'] == 'class') &
-                    & (df["chunk_type"] == "class")
+                               (df['file_path'] == chunk_row['file_path'])]
                    & (df["file_path"] == chunk_row["file_path"])
                ]
                if not parent_rows.empty:
-                    context["parent"] = self._row_to_search_result(
+                    context['parent'] = self._row_to_search_result(parent_rows.iloc[0], score=1.0)
                        parent_rows.iloc[0], score=1.0
                    )
                else:
-                    context["parent"] = None
+                    context['parent'] = None
            else:
-                context["parent"] = None
+                context['parent'] = None
            return context
        except Exception as e:
            logger.error(f"Failed to get chunk context: {e}")
-            return {"chunk": None, "prev": None, "next": None, "parent": None}
+            return {'chunk': None, 'prev': None, 'next': None, 'parent': None}
    def _row_to_search_result(self, row: pd.Series, score: float) -> SearchResult:
        """Convert a DataFrame row to a SearchResult."""
        return SearchResult(
-            file_path=display_path(row["file_path"]),
+            file_path=display_path(row['file_path']),
-            content=row["content"],
+            content=row['content'],
            score=score,
-            start_line=row["start_line"],
+            start_line=row['start_line'],
-            end_line=row["end_line"],
+            end_line=row['end_line'],
-            chunk_type=row["chunk_type"],
+            chunk_type=row['chunk_type'],
-            name=row["name"],
+            name=row['name'],
-            language=row["language"],
+            language=row['language']
        )
-    def search(
+    def search(self, 
        self,
              query: str, 
              top_k: int = 10,
              chunk_types: Optional[List[str]] = None,
@ -296,8 +252,7 @@ class CodeSearcher:
              file_pattern: Optional[str] = None,
              semantic_weight: float = 0.7,
              bm25_weight: float = 0.3,
-        include_context: bool = False,
+              include_context: bool = False) -> List[SearchResult]:
    ) -> List[SearchResult]:
        """
        Hybrid search for code similar to the query using both semantic and BM25.
@ -344,15 +299,16 @@ class CodeSearcher:
        # Apply filters first
        if chunk_types:
-            results_df = results_df[results_df["chunk_type"].isin(chunk_types)]
+            results_df = results_df[results_df['chunk_type'].isin(chunk_types)]
        if languages:
-            results_df = results_df[results_df["language"].isin(languages)]
+            results_df = results_df[results_df['language'].isin(languages)]
        if file_pattern:
            import fnmatch
-
+            mask = results_df['file_path'].apply(
-            mask = results_df["file_path"].apply(lambda x: fnmatch.fnmatch(x, file_pattern))
+                lambda x: fnmatch.fnmatch(x, file_pattern)
            )
            results_df = results_df[mask]
        # Calculate BM25 scores if available
@ -377,24 +333,25 @@ class CodeSearcher:
        hybrid_results = []
        for idx, row in results_df.iterrows():
            # Semantic score (convert distance to similarity)
-            distance = row["_distance"]
+            distance = row['_distance']
            semantic_score = 1 / (1 + distance)
            # BM25 score
            bm25_score = bm25_scores.get(idx, 0.0)
            # Combined score
-            combined_score = semantic_weight * semantic_score + bm25_weight * bm25_score
+            combined_score = (semantic_weight * semantic_score + 
                            bm25_weight * bm25_score)
            result = SearchResult(
-                file_path=display_path(row["file_path"]),
+                file_path=display_path(row['file_path']),
-                content=row["content"],
+                content=row['content'],
                score=combined_score,
-                start_line=row["start_line"],
+                start_line=row['start_line'],
-                end_line=row["end_line"],
+                end_line=row['end_line'],
-                chunk_type=row["chunk_type"],
+                chunk_type=row['chunk_type'],
-                name=row["name"],
+                name=row['name'],
-                language=row["language"],
+                language=row['language']
            )
            hybrid_results.append(result)
@ -425,20 +382,9 @@ class CodeSearcher:
            # File importance boost (20% boost for important files)
            file_path_lower = str(result.file_path).lower()
            important_patterns = [
-                "readme",
+                'readme', 'main.', 'index.', '__init__', 'config',
-                "main.",
+                'setup', 'install', 'getting', 'started', 'docs/',
-                "index.",
+                'documentation', 'guide', 'tutorial', 'example'
                "__init__",
                "config",
                "setup",
                "install",
                "getting",
                "started",
                "docs/",
                "documentation",
                "guide",
                "tutorial",
                "example",
            ]
            if any(pattern in file_path_lower for pattern in important_patterns):
@ -455,9 +401,7 @@ class CodeSearcher:
                if days_old <= 7:  # Modified in last week
                    result.score *= 1.1
-                    logger.debug(
+                    logger.debug(f"Recent file boost: {result.file_path} ({days_old} days old)")
                        f"Recent file boost: {result.file_path} ({days_old} days old)"
                    )
                elif days_old <= 30:  # Modified in last month
                    result.score *= 1.05
@ -466,11 +410,11 @@ class CodeSearcher:
                pass
            # Content type relevance boost
-            if hasattr(result, "chunk_type"):
+            if hasattr(result, 'chunk_type'):
-                if result.chunk_type in ["function", "class", "method"]:
+                if result.chunk_type in ['function', 'class', 'method']:
                    # Code definitions are usually more valuable
                    result.score *= 1.1
-                elif result.chunk_type in ["comment", "docstring"]:
+                elif result.chunk_type in ['comment', 'docstring']:
                    # Documentation is valuable for understanding
                    result.score *= 1.05
@ -479,16 +423,14 @@ class CodeSearcher:
                result.score *= 0.9
            # Small boost for content with good structure (has multiple lines)
-            lines = result.content.strip().split("\n")
+            lines = result.content.strip().split('\n')
            if len(lines) >= 3 and any(len(line.strip()) > 10 for line in lines):
                result.score *= 1.02
        # Sort by updated scores
        return sorted(results, key=lambda x: x.score, reverse=True)
-    def _apply_diversity_constraints(
+    def _apply_diversity_constraints(self, results: List[SearchResult], top_k: int) -> List[SearchResult]:
        self, results: List[SearchResult], top_k: int
    ) -> List[SearchResult]:
        """
        Apply diversity constraints to search results.
@ -512,10 +454,7 @@ class CodeSearcher:
                continue
            # Prefer diverse chunk types
-            if (
+            if len(final_results) >= top_k // 2 and chunk_type_counts[result.chunk_type] > top_k // 3:
                len(final_results) >= top_k // 2
                and chunk_type_counts[result.chunk_type] > top_k // 3
            ):
                # Skip if we have too many of this type already
                continue
@ -530,9 +469,7 @@ class CodeSearcher:
        return final_results
-    def _add_context_to_results(
+    def _add_context_to_results(self, results: List[SearchResult], search_df: pd.DataFrame) -> List[SearchResult]:
        self, results: List[SearchResult], search_df: pd.DataFrame
    ) -> List[SearchResult]:
        """
        Add context (adjacent and parent chunks) to search results.
@ -551,12 +488,12 @@ class CodeSearcher:
        for result in results:
            # Find matching row in search_df
            matching_rows = search_df[
-                (search_df["file_path"] == result.file_path)
+                (search_df['file_path'] == result.file_path) &
-                & (search_df["start_line"] == result.start_line)
+                (search_df['start_line'] == result.start_line) &
-                & (search_df["end_line"] == result.end_line)
+                (search_df['end_line'] == result.end_line)
            ]
            if not matching_rows.empty:
-                result_to_chunk_id[result] = matching_rows.iloc[0]["chunk_id"]
+                result_to_chunk_id[result] = matching_rows.iloc[0]['chunk_id']
        # Add context to each result
        for result in results:
@ -565,48 +502,49 @@ class CodeSearcher:
                continue
            # Get the row for this chunk
-            chunk_rows = full_df[full_df["chunk_id"] == chunk_id]
+            chunk_rows = full_df[full_df['chunk_id'] == chunk_id]
            if chunk_rows.empty:
                continue
            chunk_row = chunk_rows.iloc[0]
            # Add adjacent chunks as context
-            if pd.notna(chunk_row.get("prev_chunk_id")):
+            if pd.notna(chunk_row.get('prev_chunk_id')):
-                prev_rows = full_df[full_df["chunk_id"] == chunk_row["prev_chunk_id"]]
+                prev_rows = full_df[full_df['chunk_id'] == chunk_row['prev_chunk_id']]
                if not prev_rows.empty:
-                    result.context_before = prev_rows.iloc[0]["content"]
+                    result.context_before = prev_rows.iloc[0]['content']
-            if pd.notna(chunk_row.get("next_chunk_id")):
+            if pd.notna(chunk_row.get('next_chunk_id')):
-                next_rows = full_df[full_df["chunk_id"] == chunk_row["next_chunk_id"]]
+                next_rows = full_df[full_df['chunk_id'] == chunk_row['next_chunk_id']]
                if not next_rows.empty:
-                    result.context_after = next_rows.iloc[0]["content"]
+                    result.context_after = next_rows.iloc[0]['content']
            # Add parent class chunk if applicable
-            if pd.notna(chunk_row.get("parent_class")):
+            if pd.notna(chunk_row.get('parent_class')):
                parent_rows = full_df[
-                    (full_df["name"] == chunk_row["parent_class"])
+                    (full_df['name'] == chunk_row['parent_class']) & 
-                    & (full_df["chunk_type"] == "class")
+                    (full_df['chunk_type'] == 'class') &
-                    & (full_df["file_path"] == chunk_row["file_path"])
+                    (full_df['file_path'] == chunk_row['file_path'])
                ]
                if not parent_rows.empty:
                    parent_row = parent_rows.iloc[0]
                    result.parent_chunk = SearchResult(
-                        file_path=display_path(parent_row["file_path"]),
+                        file_path=display_path(parent_row['file_path']),
-                        content=parent_row["content"],
+                        content=parent_row['content'],
                        score=1.0,
-                        start_line=parent_row["start_line"],
+                        start_line=parent_row['start_line'],
-                        end_line=parent_row["end_line"],
+                        end_line=parent_row['end_line'],
-                        chunk_type=parent_row["chunk_type"],
+                        chunk_type=parent_row['chunk_type'],
-                        name=parent_row["name"],
+                        name=parent_row['name'],
-                        language=parent_row["language"],
+                        language=parent_row['language']
                    )
        return results
-    def search_similar_code(
+    def search_similar_code(self, 
-        self, code_snippet: str, top_k: int = 10, exclude_self: bool = True
+                          code_snippet: str, 
-    ) -> List[SearchResult]:
+                          top_k: int = 10,
                          exclude_self: bool = True) -> List[SearchResult]:
        """
        Find code similar to a given snippet using hybrid search.
@ -624,7 +562,7 @@ class CodeSearcher:
            query=code_snippet,
            top_k=top_k * 2 if exclude_self else top_k,
            semantic_weight=0.8,  # Higher semantic weight for code similarity
-            bm25_weight=0.2,
+            bm25_weight=0.2
        )
        if exclude_self:
@ -654,7 +592,11 @@ class CodeSearcher:
        query = f"function {function_name} implementation definition"
        # Search with filters
-        results = self.search(query, top_k=top_k * 2, chunk_types=["function", "method"])
+        results = self.search(
            query,
            top_k=top_k * 2,
            chunk_types=['function', 'method']
        )
        # Further filter by name
        filtered = []
@ -679,7 +621,11 @@ class CodeSearcher:
        query = f"class {class_name} definition implementation"
        # Search with filters
-        results = self.search(query, top_k=top_k * 2, chunk_types=["class"])
+        results = self.search(
            query,
            top_k=top_k * 2,
            chunk_types=['class']
        )
        # Further filter by name
        filtered = []
@ -729,12 +675,10 @@ class CodeSearcher:
        return filtered[:top_k]
-    def display_results(
+    def display_results(self, 
        self,
                       results: List[SearchResult], 
                       show_content: bool = True,
-        max_content_lines: int = 10,
+                       max_content_lines: int = 10):
    ):
        """
        Display search results in a formatted table.
@ -761,7 +705,7 @@ class CodeSearcher:
                result.file_path,
                result.chunk_type,
                result.name or "-",
-                f"{result.start_line}-{result.end_line}",
+                f"{result.start_line}-{result.end_line}"
            )
        console.print(table)
@ -771,9 +715,7 @@ class CodeSearcher:
            console.print("\n[bold]Top Results:[/bold]\n")
            for i, result in enumerate(results[:3], 1):
-                console.print(
+                console.print(f"[bold cyan]#{i}[/bold cyan] {result.file_path}:{result.start_line}")
                    f"[bold cyan]#{i}[/bold cyan] {result.file_path}:{result.start_line}"
                )
                console.print(f"[dim]Type: {result.chunk_type} | Name: {result.name}[/dim]")
                # Display code with syntax highlighting
@ -782,7 +724,7 @@ class CodeSearcher:
                    result.language,
                    theme="monokai",
                    line_numbers=True,
-                    start_line=result.start_line,
+                    start_line=result.start_line
                )
                console.print(syntax)
                console.print()
@ -790,7 +732,7 @@ class CodeSearcher:
    def get_statistics(self) -> Dict[str, Any]:
        """Get search index statistics."""
        if not self.table:
-            return {"error": "Database not connected"}
+            return {'error': 'Database not connected'}
        try:
            # Get table statistics
@ -798,30 +740,28 @@ class CodeSearcher:
            # Get unique files
            df = self.table.to_pandas()
-            unique_files = df["file_path"].nunique()
+            unique_files = df['file_path'].nunique()
            # Get chunk type distribution
-            chunk_types = df["chunk_type"].value_counts().to_dict()
+            chunk_types = df['chunk_type'].value_counts().to_dict()
            # Get language distribution
-            languages = df["language"].value_counts().to_dict()
+            languages = df['language'].value_counts().to_dict()
            return {
-                "total_chunks": num_rows,
+                'total_chunks': num_rows,
-                "unique_files": unique_files,
+                'unique_files': unique_files,
-                "chunk_types": chunk_types,
+                'chunk_types': chunk_types,
-                "languages": languages,
+                'languages': languages,
-                "index_ready": True,
+                'index_ready': True,
            }
        except Exception as e:
            logger.error(f"Failed to get statistics: {e}")
-            return {"error": str(e)}
+            return {'error': str(e)}
 # Convenience functions
 def search_code(project_path: Path, query: str, top_k: int = 10) -> List[SearchResult]:
    """
    Quick search function.
--- a/mini_rag/server.py
+++ b/mini_rag/server.py
@ -4,23 +4,23 @@ No more loading/unloading madness!
 """
 import json
 import logging
 import os
 import socket
 import subprocess
 import sys
 import threading
 import time
 import subprocess
 from pathlib import Path
-from typing import Any, Dict, Optional
+from typing import Dict, Any, Optional
 import logging
 import sys
 import os
 # Fix Windows console
-if sys.platform == "win32":
+if sys.platform == 'win32':
-    os.environ["PYTHONUTF8"] = "1"
+    os.environ['PYTHONUTF8'] = '1'
 from .search import CodeSearcher
 from .ollama_embeddings import OllamaEmbedder as CodeEmbedder
 from .performance import PerformanceMonitor
 from .search import CodeSearcher
 logger = logging.getLogger(__name__)
@ -43,30 +43,31 @@ class RAGServer:
        try:
            # Check if port is in use
            test_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-            result = test_sock.connect_ex(("localhost", self.port))
+            result = test_sock.connect_ex(('localhost', self.port))
            test_sock.close()
            if result == 0:  # Port is in use
                print(f"️  Port {self.port} is already in use, attempting to free it...")
-                if sys.platform == "win32":
+                if sys.platform == 'win32':
                    # Windows: Find and kill process using netstat
                    import subprocess
                    try:
                        # Get process ID using the port
                        result = subprocess.run(
-                            ["netstat", "-ano"], capture_output=True, text=True
+                            ['netstat', '-ano'], 
                            capture_output=True, 
                            text=True
                        )
-                        for line in result.stdout.split("\n"):
+                        for line in result.stdout.split('\n'):
-                            if f":{self.port}" in line and "LISTENING" in line:
+                            if f':{self.port}' in line and 'LISTENING' in line:
                                parts = line.split()
                                pid = parts[-1]
                                print(f"   Found process {pid} using port {self.port}")
                                # Kill the process
-                                subprocess.run(["taskkill", "//PID", pid, "//F"], check=False)
+                                subprocess.run(['taskkill', '//PID', pid, '//F'], check=False)
                                print(f"    Killed process {pid}")
                                time.sleep(1)  # Give it a moment to release the port
                                break
@ -75,16 +76,15 @@ class RAGServer:
                else:
                    # Unix/Linux: Use lsof and kill
                    import subprocess
                    try:
                        result = subprocess.run(
-                            ["lso", "-ti", f":{self.port}"],
+                            ['lsof', '-ti', f':{self.port}'], 
                            capture_output=True, 
-                            text=True,
+                            text=True
                        )
                        if result.stdout.strip():
                            pid = result.stdout.strip()
-                            subprocess.run(["kill", "-9", pid], check=False)
+                            subprocess.run(['kill', '-9', pid], check=False)
                            print(f"    Killed process {pid}")
                            time.sleep(1)
                    except Exception as e:
@ -114,7 +114,7 @@ class RAGServer:
        # Start server
        self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-        self.socket.bind(("localhost", self.port))
+        self.socket.bind(('localhost', self.port))
        self.socket.listen(5)
        self.running = True
@ -145,15 +145,15 @@ class RAGServer:
            request = json.loads(data)
            # Check for shutdown command
-            if request.get("command") == "shutdown":
+            if request.get('command') == 'shutdown':
                print("\n Shutdown requested")
-                response = {"success": True, "message": "Server shutting down"}
+                response = {'success': True, 'message': 'Server shutting down'}
                self._send_json(client, response)
                self.stop()
                return
-            query = request.get("query", "")
+            query = request.get('query', '')
-            top_k = request.get("top_k", 10)
+            top_k = request.get('top_k', 10)
            self.query_count += 1
            print(f"[Query #{self.query_count}] {query}")
@ -165,13 +165,13 @@ class RAGServer:
            # Prepare response
            response = {
-                "success": True,
+                'success': True,
-                "query": query,
+                'query': query,
-                "count": len(results),
+                'count': len(results),
-                "search_time_ms": int(search_time * 1000),
+                'search_time_ms': int(search_time * 1000),
-                "results": [r.to_dict() for r in results],
+                'results': [r.to_dict() for r in results],
-                "server_uptime": int(time.time() - self.start_time),
+                'server_uptime': int(time.time() - self.start_time),
-                "total_queries": self.query_count,
+                'total_queries': self.query_count,
            }
            # Send response with proper framing
@ -179,7 +179,7 @@ class RAGServer:
            print(f"    Found {len(results)} results in {search_time*1000:.0f}ms")
-        except ConnectionError:
+        except ConnectionError as e:
            # Normal disconnection - client closed connection
            # This is expected behavior, don't log as error
            pass
@ -187,10 +187,13 @@ class RAGServer:
            # Only log actual errors, not normal disconnections
            if "Connection closed" not in str(e):
                logger.error(f"Client handler error: {e}")
-            error_response = {"success": False, "error": str(e)}
+            error_response = {
                'success': False,
                'error': str(e)
            }
            try:
                self._send_json(client, error_response)
-            except (ConnectionError, OSError, TypeError, ValueError, socket.error):
+            except:
                pass
        finally:
            client.close()
@ -198,34 +201,34 @@ class RAGServer:
    def _receive_json(self, sock: socket.socket) -> str:
        """Receive a complete JSON message with length prefix."""
        # First receive the length (4 bytes)
-        length_data = b""
+        length_data = b''
        while len(length_data) < 4:
            chunk = sock.recv(4 - len(length_data))
            if not chunk:
                raise ConnectionError("Connection closed while receiving length")
            length_data += chunk
-        length = int.from_bytes(length_data, "big")
+        length = int.from_bytes(length_data, 'big')
        # Now receive the actual data
-        data = b""
+        data = b''
        while len(data) < length:
            chunk = sock.recv(min(65536, length - len(data)))
            if not chunk:
                raise ConnectionError("Connection closed while receiving data")
            data += chunk
-        return data.decode("utf-8")
+        return data.decode('utf-8')
    def _send_json(self, sock: socket.socket, data: dict):
        """Send a JSON message with length prefix."""
        # Sanitize the data to ensure JSON compatibility
-        json_str = json.dumps(data, ensure_ascii=False, separators=(",", ":"))
+        json_str = json.dumps(data, ensure_ascii=False, separators=(',', ':'))
-        json_bytes = json_str.encode("utf-8")
+        json_bytes = json_str.encode('utf-8')
        # Send length prefix (4 bytes)
        length = len(json_bytes)
-        sock.send(length.to_bytes(4, "big"))
+        sock.send(length.to_bytes(4, 'big'))
        # Send the data
        sock.sendall(json_bytes)
@ -250,10 +253,13 @@ class RAGClient:
        try:
            # Connect to server
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-            sock.connect(("localhost", self.port))
+            sock.connect(('localhost', self.port))
            # Send request with proper framing
-            request = {"query": query, "top_k": top_k}
+            request = {
                'query': query,
                'top_k': top_k
            }
            self._send_json(sock, request)
            # Receive response with proper framing
@ -265,48 +271,54 @@ class RAGClient:
        except ConnectionRefusedError:
            return {
-                "success": False,
+                'success': False,
-                "error": "RAG server not running. Start with: rag-mini server",
+                'error': 'RAG server not running. Start with: mini-rag server'
            }
        except ConnectionError as e:
            # Try legacy mode without message framing
            if not self.use_legacy and "receiving length" in str(e):
                self.use_legacy = True
                return self._search_legacy(query, top_k)
-            return {"success": False, "error": str(e)}
+            return {
                'success': False,
                'error': str(e)
            }
        except Exception as e:
-            return {"success": False, "error": str(e)}
+            return {
                'success': False,
                'error': str(e)
            }
    def _receive_json(self, sock: socket.socket) -> str:
        """Receive a complete JSON message with length prefix."""
        # First receive the length (4 bytes)
-        length_data = b""
+        length_data = b''
        while len(length_data) < 4:
            chunk = sock.recv(4 - len(length_data))
            if not chunk:
                raise ConnectionError("Connection closed while receiving length")
            length_data += chunk
-        length = int.from_bytes(length_data, "big")
+        length = int.from_bytes(length_data, 'big')
        # Now receive the actual data
-        data = b""
+        data = b''
        while len(data) < length:
            chunk = sock.recv(min(65536, length - len(data)))
            if not chunk:
                raise ConnectionError("Connection closed while receiving data")
            data += chunk
-        return data.decode("utf-8")
+        return data.decode('utf-8')
    def _send_json(self, sock: socket.socket, data: dict):
        """Send a JSON message with length prefix."""
-        json_str = json.dumps(data, ensure_ascii=False, separators=(",", ":"))
+        json_str = json.dumps(data, ensure_ascii=False, separators=(',', ':'))
-        json_bytes = json_str.encode("utf-8")
+        json_bytes = json_str.encode('utf-8')
        # Send length prefix (4 bytes)
        length = len(json_bytes)
-        sock.send(length.to_bytes(4, "big"))
+        sock.send(length.to_bytes(4, 'big'))
        # Send the data
        sock.sendall(json_bytes)
@ -315,14 +327,17 @@ class RAGClient:
        """Legacy search without message framing for old servers."""
        try:
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-            sock.connect(("localhost", self.port))
+            sock.connect(('localhost', self.port))
            # Send request (old way)
-            request = {"query": query, "top_k": top_k}
+            request = {
-            sock.send(json.dumps(request).encode("utf-8"))
+                'query': query,
                'top_k': top_k
            }
            sock.send(json.dumps(request).encode('utf-8'))
            # Receive response (accumulate until we get valid JSON)
-            data = b""
+            data = b''
            while True:
                chunk = sock.recv(65536)
                if not chunk:
@ -330,7 +345,7 @@ class RAGClient:
                data += chunk
                try:
                    # Try to decode as JSON
-                    response = json.loads(data.decode("utf-8"))
+                    response = json.loads(data.decode('utf-8'))
                    sock.close()
                    return response
                except json.JSONDecodeError:
@ -338,18 +353,24 @@ class RAGClient:
                    continue
            sock.close()
-            return {"success": False, "error": "Incomplete response from server"}
+            return {
                'success': False,
                'error': 'Incomplete response from server'
            }
        except Exception as e:
-            return {"success": False, "error": str(e)}
+            return {
                'success': False,
                'error': str(e)
            }
    def is_running(self) -> bool:
        """Check if server is running."""
        try:
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-            result = sock.connect_ex(("localhost", self.port))
+            result = sock.connect_ex(('localhost', self.port))
            sock.close()
            return result == 0
-        except (ConnectionError, OSError, TypeError, ValueError, socket.error):
+        except:
            return False
@ -368,20 +389,12 @@ def auto_start_if_needed(project_path: Path) -> Optional[subprocess.Popen]:
    if not client.is_running():
        # Start server in background
        import subprocess
-
+        cmd = [sys.executable, "-m", "mini_rag.cli", "server", "--path", str(project_path)]
        cmd = [
            sys.executable,
            "-m",
            "mini_rag.cli",
            "server",
            "--path",
            str(project_path),
        ]
        process = subprocess.Popen(
            cmd,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
-            creationflags=(subprocess.CREATE_NEW_CONSOLE if sys.platform == "win32" else 0),
+            creationflags=subprocess.CREATE_NEW_CONSOLE if sys.platform == 'win32' else 0
        )
        # Wait for server to start
--- a/mini_rag/smart_chunking.py
+++ b/mini_rag/smart_chunking.py
@ -3,49 +3,61 @@ Smart language-aware chunking strategies for FSS-Mini-RAG.
 Automatically adapts chunking based on file type and content patterns.
 """
 from typing import Dict, Any, List
 from pathlib import Path
-from typing import Any, Dict, List
+import json
 class SmartChunkingStrategy:
    """Intelligent chunking that adapts to file types and content."""
    def __init__(self):
        self.language_configs = {
-            "python": {
+            'python': {
-                "max_size": 3000,  # Larger for better function context
+                'max_size': 3000,  # Larger for better function context
-                "min_size": 200,
+                'min_size': 200,
-                "strategy": "function",
+                'strategy': 'function',
-                "prefer_semantic": True,
+                'prefer_semantic': True
            },
-            "javascript": {
+            'javascript': {
-                "max_size": 2500,
+                'max_size': 2500,
-                "min_size": 150,
+                'min_size': 150,
-                "strategy": "function",
+                'strategy': 'function',
-                "prefer_semantic": True,
+                'prefer_semantic': True
            },
-            "markdown": {
+            'markdown': {
-                "max_size": 2500,
+                'max_size': 2500,
-                "min_size": 300,  # Larger minimum for complete thoughts
+                'min_size': 300,  # Larger minimum for complete thoughts
-                "strategy": "header",
+                'strategy': 'header',
-                "preserve_structure": True,
+                'preserve_structure': True
            },
-            "json": {
+            'json': {
-                "max_size": 1000,  # Smaller for config files
+                'max_size': 1000,  # Smaller for config files
-                "min_size": 50,
+                'min_size': 50,
-                "skip_if_large": True,  # Skip huge config JSONs
+                'skip_if_large': True,  # Skip huge config JSONs
-                "max_file_size": 50000,  # 50KB limit
+                'max_file_size': 50000  # 50KB limit
            },
-            "yaml": {"max_size": 1500, "min_size": 100, "strategy": "key_block"},
+            'yaml': {
-            "text": {"max_size": 2000, "min_size": 200, "strategy": "paragraph"},
+                'max_size': 1500,
-            "bash": {"max_size": 1500, "min_size": 100, "strategy": "function"},
+                'min_size': 100,
                'strategy': 'key_block'
            },
            'text': {
                'max_size': 2000,
                'min_size': 200,
                'strategy': 'paragraph'
            },
            'bash': {
                'max_size': 1500,
                'min_size': 100,
                'strategy': 'function'
            }
        }
        # Smart defaults for unknown languages
        self.default_config = {
-            "max_size": 2000,
+            'max_size': 2000,
-            "min_size": 150,
+            'min_size': 150,
-            "strategy": "semantic",
+            'strategy': 'semantic'
        }
    def get_config_for_language(self, language: str, file_size: int = 0) -> Dict[str, Any]:
@ -55,10 +67,10 @@ class SmartChunkingStrategy:
        # Smart adjustments based on file size
        if file_size > 0:
            if file_size < 500:  # Very small files
-                config["max_size"] = max(config["max_size"] // 2, 200)
+                config['max_size'] = max(config['max_size'] // 2, 200)
-                config["min_size"] = 50
+                config['min_size'] = 50
            elif file_size > 20000:  # Large files  
-                config["max_size"] = min(config["max_size"] + 1000, 4000)
+                config['max_size'] = min(config['max_size'] + 1000, 4000)
        return config
@ -67,8 +79,8 @@ class SmartChunkingStrategy:
        lang_config = self.language_configs.get(language, {})
        # Skip huge JSON config files
-        if language == "json" and lang_config.get("skip_if_large"):
+        if language == 'json' and lang_config.get('skip_if_large'):
-            max_size = lang_config.get("max_file_size", 50000)
+            max_size = lang_config.get('max_file_size', 50000)
            if file_size > max_size:
                return True
@ -80,62 +92,58 @@ class SmartChunkingStrategy:
    def get_smart_defaults(self, project_stats: Dict[str, Any]) -> Dict[str, Any]:
        """Generate smart defaults based on project language distribution."""
-        languages = project_stats.get("languages", {})
+        languages = project_stats.get('languages', {})
-        # sum(languages.values())  # Unused variable removed
+        total_files = sum(languages.values())
        # Determine primary language
-        primary_lang = max(languages.items(), key=lambda x: x[1])[0] if languages else "python"
+        primary_lang = max(languages.items(), key=lambda x: x[1])[0] if languages else 'python'
        primary_config = self.language_configs.get(primary_lang, self.default_config)
        # Smart streaming threshold based on large files
-        large_files = project_stats.get("large_files", 0)
+        large_files = project_stats.get('large_files', 0)
        streaming_threshold = 5120 if large_files > 5 else 1048576  # 5KB vs 1MB
        return {
            "chunking": {
-                "max_size": primary_config["max_size"],
+                "max_size": primary_config['max_size'],
-                "min_size": primary_config["min_size"],
+                "min_size": primary_config['min_size'], 
-                "strategy": primary_config.get("strategy", "semantic"),
+                "strategy": primary_config.get('strategy', 'semantic'),
                "language_specific": {
-                    lang: config
+                    lang: config for lang, config in self.language_configs.items()
                    for lang, config in self.language_configs.items()
                    if languages.get(lang, 0) > 0
-                },
+                }
            },
            "streaming": {
                "enabled": True,
                "threshold_bytes": streaming_threshold,
-                "chunk_size_kb": 64,
+                "chunk_size_kb": 64
            },
            "files": {
                "skip_tiny_files": True,
                "tiny_threshold": 30,
-                "smart_json_filtering": True,
+                "smart_json_filtering": True
-            },
+            }
        }
 # Example usage
 def analyze_and_suggest(manifest_data: Dict[str, Any]) -> Dict[str, Any]:
    """Analyze project and suggest optimal configuration."""
    from collections import Counter
-    files = manifest_data.get("files", {})
+    files = manifest_data.get('files', {})
    languages = Counter()
    large_files = 0
    for info in files.values():
-        lang = info.get("language", "unknown")
+        lang = info.get('language', 'unknown')
        languages[lang] += 1
-        if info.get("size", 0) > 10000:
+        if info.get('size', 0) > 10000:
            large_files += 1
    stats = {
-        "languages": dict(languages),
+        'languages': dict(languages),
-        "large_files": large_files,
+        'large_files': large_files,
-        "total_files": len(files),
+        'total_files': len(files)
    }
    strategy = SmartChunkingStrategy()
--- a/mini_rag/system_context.py
+++ b/mini_rag/system_context.py
@ -1,121 +0,0 @@
 """
 System Context Collection for Enhanced RAG Grounding
 Collects minimal system information to help the LLM provide better,
 context-aware assistance without compromising privacy.
 """
 import platform
 import sys
 from pathlib import Path
 from typing import Dict, Optional
 class SystemContextCollector:
    """Collects system context information for enhanced LLM grounding."""
    @staticmethod
    def get_system_context(project_path: Optional[Path] = None) -> str:
        """
        Get concise system context for LLM grounding.
        Args:
            project_path: Current project directory
        Returns:
            Formatted system context string (max 200 chars for privacy)
        """
        try:
            # Basic system info
            os_name = platform.system()
            python_ver = f"{sys.version_info.major}.{sys.version_info.minor}"
            # Simplified OS names
            os_short = {"Windows": "Win", "Linux": "Linux", "Darwin": "macOS"}.get(
                os_name, os_name
            )
            # Working directory info
            if project_path:
                # Use relative or shortened path for privacy
                try:
                    rel_path = project_path.relative_to(Path.home())
                    path_info = f"~/{rel_path}"
                except ValueError:
                    # If not relative to home, just use folder name
                    path_info = project_path.name
            else:
                path_info = Path.cwd().name
            # Trim path if too long for our 200-char limit
            if len(path_info) > 50:
                path_info = f".../{path_info[-45:]}"
            # Command style hints
            cmd_style = "rag.bat" if os_name == "Windows" else "./rag-mini"
            # Format concise context
            context = f"[{os_short} {python_ver}, {path_info}, use {cmd_style}]"
            # Ensure we stay under 200 chars
            if len(context) > 200:
                context = context[:197] + "...]"
            return context
        except Exception:
            # Fallback to minimal info if anything fails
            return f"[{platform.system()}, Python {sys.version_info.major}.{sys.version_info.minor}]"
    @staticmethod
    def get_command_context(os_name: Optional[str] = None) -> Dict[str, str]:
        """
        Get OS-appropriate command examples.
        Returns:
            Dictionary with command patterns for the current OS
        """
        if os_name is None:
            os_name = platform.system()
        if os_name == "Windows":
            return {
                "launcher": "rag.bat",
                "index": "rag.bat index C:\\path\\to\\project",
                "search": 'rag.bat search C:\\path\\to\\project "query"',
                "explore": "rag.bat explore C:\\path\\to\\project",
                "path_sep": "\\",
                "example_path": "C:\\Users\\username\\Documents\\myproject",
            }
        else:
            return {
                "launcher": "./rag-mini",
                "index": "./rag-mini index /path/to/project",
                "search": './rag-mini search /path/to/project "query"',
                "explore": "./rag-mini explore /path/to/project",
                "path_sep": "/",
                "example_path": "~/Documents/myproject",
            }
 def get_system_context(project_path: Optional[Path] = None) -> str:
    """Convenience function to get system context."""
    return SystemContextCollector.get_system_context(project_path)
 def get_command_context() -> Dict[str, str]:
    """Convenience function to get command context."""
    return SystemContextCollector.get_command_context()
 # Test function
 if __name__ == "__main__":
    print("System Context Test:")
    print(f"Context: {get_system_context()}")
    print(f"Context with path: {get_system_context(Path('/tmp/test'))}")
    print()
    print("Command Context:")
    cmds = get_command_context()
    for key, value in cmds.items():
        print(f"  {key}: {value}")
--- a/mini_rag/updater.py
+++ b/mini_rag/updater.py
@ -1,482 +0,0 @@
 #!/usr/bin/env python3
 """
 FSS-Mini-RAG Auto-Update System
 Provides seamless GitHub-based updates with user-friendly interface.
 Checks for new releases, downloads updates, and handles installation safely.
 """
 import json
 import os
 import shutil
 import subprocess
 import sys
 import tempfile
 import zipfile
 from dataclasses import dataclass
 from datetime import datetime, timedelta
 from pathlib import Path
 from typing import Optional, Tuple
 try:
    import requests
    REQUESTS_AVAILABLE = True
 except ImportError:
    REQUESTS_AVAILABLE = False
 from .config import ConfigManager
@dataclass
 class UpdateInfo:
    """Information about an available update."""
    version: str
    release_url: str
    download_url: str
    release_notes: str
    published_at: str
    is_newer: bool
 class UpdateChecker:
    """
    Handles checking for and applying updates from GitHub releases.
    Features:
    - Checks GitHub API for latest releases
    - Downloads and applies updates safely with backup
    - Respects user preferences and rate limiting
    - Provides graceful fallbacks if network unavailable
    """
    def __init__(
        self,
        repo_owner: str = "FSSCoding",
        repo_name: str = "Fss-Mini-Rag",
        current_version: str = "2.1.0",
    ):
        self.repo_owner = repo_owner
        self.repo_name = repo_name
        self.current_version = current_version
        self.github_api_url = f"https://api.github.com/repos/{repo_owner}/{repo_name}"
        self.check_frequency_hours = 24  # Check once per day
        # Paths
        self.app_root = Path(__file__).parent.parent
        self.cache_file = self.app_root / ".update_cache.json"
        self.backup_dir = self.app_root / ".backup"
        # User preferences (graceful fallback if config unavailable)
        try:
            self.config = ConfigManager(self.app_root)
        except Exception:
            self.config = None
    def should_check_for_updates(self) -> bool:
        """
        Determine if we should check for updates now.
        Respects:
        - User preference to disable updates
        - Rate limiting (once per day by default)
        - Network availability
        """
        if not REQUESTS_AVAILABLE:
            return False
        # Check user preference
        if hasattr(self.config, "updates") and not getattr(
            self.config.updates, "auto_check", True
        ):
            return False
        # Check if we've checked recently
        if self.cache_file.exists():
            try:
                with open(self.cache_file, "r") as f:
                    cache = json.load(f)
                    last_check = datetime.fromisoformat(cache.get("last_check", "2020-01-01"))
                    if datetime.now() - last_check < timedelta(
                        hours=self.check_frequency_hours
                    ):
                        return False
            except (json.JSONDecodeError, ValueError, KeyError):
                pass  # Ignore cache errors, will check anyway
        return True
    def check_for_updates(self) -> Optional[UpdateInfo]:
        """
        Check GitHub API for the latest release.
        Returns:
            UpdateInfo if an update is available, None otherwise
        """
        if not REQUESTS_AVAILABLE:
            return None
        try:
            # Get latest release from GitHub API
            response = requests.get(
                f"{self.github_api_url}/releases/latest",
                timeout=10,
                headers={"Accept": "application/vnd.github.v3+json"},
            )
            if response.status_code != 200:
                return None
            release_data = response.json()
            # Extract version info
            latest_version = release_data.get("tag_name", "").lstrip("v")
            release_notes = release_data.get("body", "No release notes available.")
            published_at = release_data.get("published_at", "")
            release_url = release_data.get("html_url", "")
            # Find download URL for source code
            download_url = None
            for asset in release_data.get("assets", []):
                if asset.get("name", "").endswith(".zip"):
                    download_url = asset.get("browser_download_url")
                    break
            # Fallback to source code zip
            if not download_url:
                download_url = f"https://github.com/{self.repo_owner}/{self.repo_name}/archive/refs/tags/v{latest_version}.zip"
            # Check if this is a newer version
            is_newer = self._is_version_newer(latest_version, self.current_version)
            # Update cache
            self._update_cache(latest_version, is_newer)
            if is_newer:
                return UpdateInfo(
                    version=latest_version,
                    release_url=release_url,
                    download_url=download_url,
                    release_notes=release_notes,
                    published_at=published_at,
                    is_newer=True,
                )
        except Exception:
            # Silently fail for network issues - don't interrupt user experience
            pass
        return None
    def _is_version_newer(self, latest: str, current: str) -> bool:
        """
        Compare version strings to determine if latest is newer.
        Simple semantic version comparison supporting:
        - Major.Minor.Patch (e.g., 2.1.0)
        - Major.Minor (e.g., 2.1)
        """
        def version_tuple(v):
            return tuple(map(int, (v.split("."))))
        try:
            return version_tuple(latest) > version_tuple(current)
        except (ValueError, AttributeError):
            # If version parsing fails, assume it's newer to be safe
            return latest != current
    def _update_cache(self, latest_version: str, is_newer: bool):
        """Update the cache file with check results."""
        cache_data = {
            "last_check": datetime.now().isoformat(),
            "latest_version": latest_version,
            "is_newer": is_newer,
        }
        try:
            with open(self.cache_file, "w") as f:
                json.dump(cache_data, f, indent=2)
        except Exception:
            pass  # Ignore cache write errors
    def download_update(
        self, update_info: UpdateInfo, progress_callback=None
    ) -> Optional[Path]:
        """
        Download the update package to a temporary location.
        Args:
            update_info: Information about the update to download
            progress_callback: Optional callback for progress updates
        Returns:
            Path to downloaded file, or None if download failed
        """
        if not REQUESTS_AVAILABLE:
            return None
        try:
            # Create temporary file for download
            with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmp_file:
                tmp_path = Path(tmp_file.name)
            # Download with progress tracking
            response = requests.get(update_info.download_url, stream=True, timeout=30)
            response.raise_for_status()
            total_size = int(response.headers.get("content-length", 0))
            downloaded = 0
            with open(tmp_path, "wb") as f:
                for chunk in response.iter_content(chunk_size=8192):
                    if chunk:
                        f.write(chunk)
                        downloaded += len(chunk)
                        if progress_callback and total_size > 0:
                            progress_callback(downloaded, total_size)
            return tmp_path
        except Exception:
            # Clean up on error
            if "tmp_path" in locals() and tmp_path.exists():
                tmp_path.unlink()
            return None
    def create_backup(self) -> bool:
        """
        Create a backup of the current installation.
        Returns:
            True if backup created successfully
        """
        try:
            # Remove old backup if it exists
            if self.backup_dir.exists():
                shutil.rmtree(self.backup_dir)
            # Create new backup
            self.backup_dir.mkdir(exist_ok=True)
            # Copy key files and directories
            important_items = [
                "mini_rag",
                "rag-mini.py",
                "rag-tui.py",
                "requirements.txt",
                "install_mini_rag.sh",
                "install_windows.bat",
                "README.md",
                "assets",
            ]
            for item in important_items:
                src = self.app_root / item
                if src.exists():
                    if src.is_dir():
                        shutil.copytree(src, self.backup_dir / item)
                    else:
                        shutil.copy2(src, self.backup_dir / item)
            return True
        except Exception:
            return False
    def apply_update(self, update_package_path: Path, update_info: UpdateInfo) -> bool:
        """
        Apply the downloaded update.
        Args:
            update_package_path: Path to the downloaded update package
            update_info: Information about the update
        Returns:
            True if update applied successfully
        """
        try:
            # Extract to temporary directory first
            with tempfile.TemporaryDirectory() as tmp_dir:
                tmp_path = Path(tmp_dir)
                # Extract the archive
                with zipfile.ZipFile(update_package_path, "r") as zip_ref:
                    zip_ref.extractall(tmp_path)
                # Find the extracted directory (may be nested)
                extracted_dirs = [d for d in tmp_path.iterdir() if d.is_dir()]
                if not extracted_dirs:
                    return False
                source_dir = extracted_dirs[0]
                # Copy files to application directory
                important_items = [
                    "mini_rag",
                    "rag-mini.py",
                    "rag-tui.py",
                    "requirements.txt",
                    "install_mini_rag.sh",
                    "install_windows.bat",
                    "README.md",
                ]
                for item in important_items:
                    src = source_dir / item
                    dst = self.app_root / item
                    if src.exists():
                        if dst.exists():
                            if dst.is_dir():
                                shutil.rmtree(dst)
                            else:
                                dst.unlink()
                        if src.is_dir():
                            shutil.copytree(src, dst)
                        else:
                            shutil.copy2(src, dst)
                # Update version info
                self._update_version_info(update_info.version)
                return True
        except Exception:
            return False
    def _update_version_info(self, new_version: str):
        """Update version information in the application."""
        # Update __init__.py version
        init_file = self.app_root / "mini_rag" / "__init__.py"
        if init_file.exists():
            try:
                content = init_file.read_text()
                updated_content = content.replace(
                    f'__version__ = "{self.current_version}"',
                    f'__version__ = "{new_version}"',
                )
                init_file.write_text(updated_content)
            except Exception:
                pass
    def rollback_update(self) -> bool:
        """
        Rollback to the backup version if update failed.
        Returns:
            True if rollback successful
        """
        if not self.backup_dir.exists():
            return False
        try:
            # Restore from backup
            for item in self.backup_dir.iterdir():
                dst = self.app_root / item.name
                if dst.exists():
                    if dst.is_dir():
                        shutil.rmtree(dst)
                    else:
                        dst.unlink()
                if item.is_dir():
                    shutil.copytree(item, dst)
                else:
                    shutil.copy2(item, dst)
            return True
        except Exception:
            return False
    def restart_application(self):
        """Restart the application after update."""
        try:
            # Sanitize arguments to prevent command injection
            safe_argv = [sys.executable]
            for arg in sys.argv[1:]:  # Skip sys.argv[0] (script name)
                # Only allow safe arguments - alphanumeric, dashes, dots, slashes
                if isinstance(arg, str) and len(arg) < 200:  # Reasonable length limit
                    # Simple whitelist of safe characters
                    import re
                    if re.match(r'^[a-zA-Z0-9._/-]+$', arg):
                        safe_argv.append(arg)
            # Restart with sanitized arguments
            if sys.platform.startswith("win"):
                # Windows
                subprocess.Popen(safe_argv)
            else:
                # Unix-like systems
                os.execv(sys.executable, safe_argv)
        except Exception:
            # If restart fails, just exit gracefully
            print("\n✅ Update complete! Please restart the application manually.")
            sys.exit(0)
 def get_legacy_notification() -> Optional[str]:
    """
    Check if this is a legacy version that needs urgent notification.
    For users who downloaded before the auto-update system.
    """
    try:
        # Check if this is a very old version by looking for cache file
        # Old versions won't have update cache, so we can detect them
        app_root = Path(__file__).parent.parent
        # app_root / ".update_cache.json"  # Unused variable removed
        # Also check version in __init__.py to see if it's old
        init_file = app_root / "mini_rag" / "__init__.py"
        if init_file.exists():
            content = init_file.read_text()
            if '__version__ = "2.0.' in content or '__version__ = "1.' in content:
                return """
 🚨 IMPORTANT UPDATE AVAILABLE 🚨
 Your version of FSS-Mini-RAG is missing critical updates!
 🔧 Recent improvements include:
 • Fixed LLM response formatting issues
 • Added context window configuration
 • Improved Windows installer reliability
 • Added auto-update system (this notification!)
 📥 Please update by downloading the latest version:
   https://github.com/FSSCoding/Fss-Mini-Rag/releases/latest
 💡 After updating, you'll get automatic update notifications!
 """
    except Exception:
        pass
    return None
 # Global convenience functions
 _updater_instance = None
 def check_for_updates() -> Optional[UpdateInfo]:
    """Global function to check for updates."""
    global _updater_instance
    if _updater_instance is None:
        _updater_instance = UpdateChecker()
    if _updater_instance.should_check_for_updates():
        return _updater_instance.check_for_updates()
    return None
 def get_updater() -> UpdateChecker:
    """Get the global updater instance."""
    global _updater_instance
    if _updater_instance is None:
        _updater_instance = UpdateChecker()
    return _updater_instance
--- a/mini_rag/venv_checker.py
+++ b/mini_rag/venv_checker.py
@ -1,158 +0,0 @@
 #!/usr/bin/env python3
 """
 Virtual Environment Checker
 Ensures scripts run in proper Python virtual environment for consistency and safety.
 """
 import os
 import sys
 from pathlib import Path
 def is_in_virtualenv() -> bool:
    """Check if we're running in a virtual environment."""
    # Check for virtual environment indicators
    return (
        hasattr(sys, "real_prefix")
        or (hasattr(sys, "base_prefix") and sys.base_prefix != sys.prefix)  # virtualenv
        or os.environ.get("VIRTUAL_ENV") is not None  # venv/pyvenv  # Environment variable
    )
 def get_expected_venv_path() -> Path:
    """Get the expected virtual environment path for this project."""
    # Assume .venv in the same directory as the script
    script_dir = Path(__file__).parent.parent
    return script_dir / ".venv"
 def check_correct_venv() -> tuple[bool, str]:
    """
    Check if we're in the correct virtual environment.
    Returns:
        (is_correct, message)
    """
    if not is_in_virtualenv():
        return False, "not in virtual environment"
    expected_venv = get_expected_venv_path()
    if not expected_venv.exists():
        return False, "expected virtual environment not found"
    current_venv = os.environ.get("VIRTUAL_ENV")
    if current_venv:
        current_venv_path = Path(current_venv).resolve()
        expected_venv_path = expected_venv.resolve()
        if current_venv_path != expected_venv_path:
            return (
                False,
                f"wrong virtual environment (using {current_venv_path}, expected {expected_venv_path})",
            )
    return True, "correct virtual environment"
 def show_venv_warning(script_name: str = "script") -> None:
    """Show virtual environment warning with helpful instructions."""
    expected_venv = get_expected_venv_path()
    print("⚠️  VIRTUAL ENVIRONMENT WARNING")
    print("=" * 50)
    print()
    print(f"This {script_name} should be run in a Python virtual environment for:")
    print("  • Consistent dependencies")
    print("  • Isolated package versions")
    print("  • Proper security isolation")
    print("  • Reliable functionality")
    print()
    if expected_venv.exists():
        print("✅ Virtual environment found!")
        print(f"   Location: {expected_venv}")
        print()
        print("🚀 To activate it:")
        print(f"   source {expected_venv}/bin/activate")
        print(f"   {script_name}")
        print()
        print("🔄 Or run with activation:")
        print(f"   source {expected_venv}/bin/activate && {script_name}")
    else:
        print("❌ No virtual environment found!")
        print()
        print("🛠️  Create one first:")
        print("   ./install_mini_rag.sh")
        print()
        print("📚 Or manually:")
        print(f"   python3 -m venv {expected_venv}")
        print(f"   source {expected_venv}/bin/activate")
        print("   pip install -r requirements.txt")
    print()
    print("💡 Why this matters:")
    print("   Without a virtual environment, you may experience:")
    print("   • Import errors from missing packages")
    print("   • Version conflicts with system Python")
    print("   • Inconsistent behavior across systems")
    print("   • Potential system-wide package pollution")
    print()
 def check_and_warn_venv(script_name: str = "script", force_exit: bool = False) -> bool:
    """
    Check virtual environment and warn if needed.
    Args:
        script_name: Name of the script for user-friendly messages
        force_exit: Whether to exit if not in correct venv
    Returns:
        True if in correct venv, False otherwise
    """
    # Skip venv warning if running through global wrapper
    if os.environ.get("FSS_MINI_RAG_GLOBAL_WRAPPER"):
        return True
    is_correct, message = check_correct_venv()
    if not is_correct:
        show_venv_warning(script_name)
        if force_exit:
            print(f"⛔ Exiting {script_name} for your safety.")
            print("   Please activate the virtual environment and try again.")
            sys.exit(1)
        else:
            print(f"⚠️  Continuing anyway, but {script_name} may not work correctly...")
            print()
            return False
    return True
 def require_venv(script_name: str = "script") -> None:
    """Require virtual environment or exit."""
    check_and_warn_venv(script_name, force_exit=True)
 # Quick test function
 def main():
    """Test the virtual environment checker."""
    print("🧪 Virtual Environment Checker Test")
    print("=" * 40)
    print(f"In virtual environment: {is_in_virtualenv()}")
    print(f"Expected venv path: {get_expected_venv_path()}")
    is_correct, message = check_correct_venv()
    print(f"Correct venv: {is_correct} ({message})")
    if not is_correct:
        show_venv_warning("test script")
 if __name__ == "__main__":
    main()
--- a/mini_rag/watcher.py
+++ b/mini_rag/watcher.py
@ -4,21 +4,14 @@ Monitors project files and updates the index incrementally.
 """
 import logging
 import queue
 import threading
 import queue
 import time
 from datetime import datetime
 from pathlib import Path
-from typing import Callable, Optional, Set
+from typing import Set, Optional, Callable
-
+from datetime import datetime
 from watchdog.events import (
    FileCreatedEvent,
    FileDeletedEvent,
    FileModifiedEvent,
    FileMovedEvent,
    FileSystemEventHandler,
 )
 from watchdog.observers import Observer
 from watchdog.events import FileSystemEventHandler, FileModifiedEvent, FileCreatedEvent, FileDeletedEvent, FileMovedEvent
 from .indexer import ProjectIndexer
@ -80,13 +73,11 @@ class UpdateQueue:
 class CodeFileEventHandler(FileSystemEventHandler):
    """Handles file system events for code files."""
-    def __init__(
+    def __init__(self, 
        self,
                 update_queue: UpdateQueue,
                 include_patterns: Set[str],
                 exclude_patterns: Set[str],
-        project_path: Path,
+                 project_path: Path):
    ):
        """
        Initialize event handler.
@ -155,14 +146,12 @@ class CodeFileEventHandler(FileSystemEventHandler):
 class FileWatcher:
    """Watches project files and updates index automatically."""
-    def __init__(
+    def __init__(self, 
        self,
                 project_path: Path,
                 indexer: Optional[ProjectIndexer] = None,
                 update_delay: float = 1.0,
                 batch_size: int = 10,
-        batch_timeout: float = 5.0,
+                 batch_timeout: float = 5.0):
    ):
        """
        Initialize file watcher.
@ -191,10 +180,10 @@ class FileWatcher:
        # Statistics
        self.stats = {
-            "files_updated": 0,
+            'files_updated': 0,
-            "files_failed": 0,
+            'files_failed': 0,
-            "started_at": None,
+            'started_at': None,
-            "last_update": None,
+            'last_update': None,
        }
    def start(self):
@ -210,20 +199,27 @@ class FileWatcher:
            self.update_queue,
            self.include_patterns,
            self.exclude_patterns,
-            self.project_path,
+            self.project_path
        )
-        self.observer.schedule(event_handler, str(self.project_path), recursive=True)
+        self.observer.schedule(
            event_handler,
            str(self.project_path),
            recursive=True
        )
        # Start worker thread
        self.running = True
-        self.worker_thread = threading.Thread(target=self._process_updates, daemon=True)
+        self.worker_thread = threading.Thread(
            target=self._process_updates,
            daemon=True
        )
        self.worker_thread.start()
        # Start observer
        self.observer.start()
-        self.stats["started_at"] = datetime.now()
+        self.stats['started_at'] = datetime.now()
        logger.info("File watcher started successfully")
    def stop(self):
@ -319,29 +315,27 @@ class FileWatcher:
                    success = self.indexer.delete_file(file_path)
                if success:
-                    self.stats["files_updated"] += 1
+                    self.stats['files_updated'] += 1
                else:
-                    self.stats["files_failed"] += 1
+                    self.stats['files_failed'] += 1
-                self.stats["last_update"] = datetime.now()
+                self.stats['last_update'] = datetime.now()
            except Exception as e:
                logger.error(f"Failed to process {file_path}: {e}")
-                self.stats["files_failed"] += 1
+                self.stats['files_failed'] += 1
-        logger.info(
+        logger.info(f"Batch processing complete. Updated: {self.stats['files_updated']}, Failed: {self.stats['files_failed']}")
            f"Batch processing complete. Updated: {self.stats['files_updated']}, Failed: {self.stats['files_failed']}"
        )
    def get_statistics(self) -> dict:
        """Get watcher statistics."""
        stats = self.stats.copy()
-        stats["queue_size"] = self.update_queue.size()
+        stats['queue_size'] = self.update_queue.size()
-        stats["is_running"] = self.running
+        stats['is_running'] = self.running
-        if stats["started_at"]:
+        if stats['started_at']:
-            uptime = datetime.now() - stats["started_at"]
+            uptime = datetime.now() - stats['started_at']
-            stats["uptime_seconds"] = uptime.total_seconds()
+            stats['uptime_seconds'] = uptime.total_seconds()
        return stats
@ -377,8 +371,6 @@ class FileWatcher:
 # Convenience function
 def watch_project(project_path: Path, callback: Optional[Callable] = None):
    """
    Watch a project for changes and update index automatically.
--- a/mini_rag/windows_console_fix.py
+++ b/mini_rag/windows_console_fix.py
@ -3,9 +3,9 @@ Windows Console Unicode/Emoji Fix
 Reliable Windows console Unicode/emoji support for 2025.
 """
 import io
 import os
 import sys
 import os
 import io
 def fix_windows_console():
@ -14,33 +14,28 @@ def fix_windows_console():
    Call this at the start of any script that needs to output Unicode/emojis.
    """
    # Set environment variable for UTF-8 mode
-    os.environ["PYTHONUTF8"] = "1"
+    os.environ['PYTHONUTF8'] = '1'
    # For Python 3.7+
-    if hasattr(sys.stdout, "reconfigure"):
+    if hasattr(sys.stdout, 'reconfigure'):
-        sys.stdout.reconfigure(encoding="utf-8")
+        sys.stdout.reconfigure(encoding='utf-8')
-        sys.stderr.reconfigure(encoding="utf-8")
+        sys.stderr.reconfigure(encoding='utf-8')
-        if hasattr(sys.stdin, "reconfigure"):
+        if hasattr(sys.stdin, 'reconfigure'):
-            sys.stdin.reconfigure(encoding="utf-8")
+            sys.stdin.reconfigure(encoding='utf-8')
    else:
        # For older Python versions
-        if sys.platform == "win32":
+        if sys.platform == 'win32':
            # Replace streams with UTF-8 versions
-            sys.stdout = io.TextIOWrapper(
+            sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', line_buffering=True)
-                sys.stdout.buffer, encoding="utf-8", line_buffering=True
+            sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', line_buffering=True)
            )
            sys.stderr = io.TextIOWrapper(
                sys.stderr.buffer, encoding="utf-8", line_buffering=True
            )
    # Also set the console code page to UTF-8 on Windows
-    if sys.platform == "win32":
+    if sys.platform == 'win32':
        import subprocess
        try:
            # Set console to UTF-8 code page
-            subprocess.run(["chcp", "65001"], shell=True, capture_output=True)
+            subprocess.run(['chcp', '65001'], shell=True, capture_output=True)
-        except (OSError, subprocess.SubprocessError):
+        except:
            pass
@ -49,8 +44,6 @@ fix_windows_console()
 # Test function to verify it works
 def test_emojis():
    """Test that emojis work properly."""
    print("Testing emoji output:")
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,74 +0,0 @@
 [tool.isort]
 profile = "black"
 line_length = 95
 multi_line_output = 3
 include_trailing_comma = true
 force_grid_wrap = 0
 use_parentheses = true
 ensure_newline_before_comments = true
 src_paths = ["mini_rag", "tests", "examples", "scripts"]
 known_first_party = ["mini_rag"]
 sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY", "LOCALFOLDER"]
 skip = [".venv", ".venv-linting", "__pycache__", ".git"]
 skip_glob = ["*.egg-info/*", "build/*", "dist/*"]
 [tool.black]
 line-length = 95
 target-version = ['py310']
 include = '\.pyi?$'
 extend-exclude = '''
 /(
  # directories
  \.eggs
  | \.git
  | \.hg
  | \.mypy_cache
  | \.tox
  | \.venv
  | \.venv-linting
  | _build
  | buck-out
  | build
  | dist
 )/
 '''
 [build-system]
 requires = ["setuptools", "wheel"]
 build-backend = "setuptools.build_meta"
 [project]
 name = "fss-mini-rag"
 version = "2.1.0"
 description = "Educational RAG system that actually works! Two modes: fast synthesis for quick answers, deep exploration for learning."
 authors = [
    {name = "Brett Fox", email = "brett@fsscoding.com"}
 ]
 readme = "README.md"
 license = {text = "MIT"}
 requires-python = ">=3.8"
 keywords = ["rag", "search", "ai", "llm", "embeddings", "semantic-search", "code-search"]
 classifiers = [
    "Development Status :: 4 - Beta",
    "Intended Audience :: Developers",
    "License :: OSI Approved :: MIT License",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3.8",
    "Programming Language :: Python :: 3.9",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Topic :: Software Development :: Tools",
    "Topic :: Scientific/Engineering :: Artificial Intelligence",
 ]
 [project.urls]
 Homepage = "https://github.com/FSSCoding/Fss-Mini-Rag"
 Repository = "https://github.com/FSSCoding/Fss-Mini-Rag"
 Issues = "https://github.com/FSSCoding/Fss-Mini-Rag/issues"
 [project.scripts]
 rag-mini = "mini_rag.cli:cli"
 [tool.setuptools]
 packages = ["mini_rag"]
--- a/10
+++ b/10
@ -60,7 +60,6 @@ attempt_auto_setup() {
    echo -e "${GREEN}✅ Created virtual environment${NC}" >&2
    # Step 2: Install dependencies
    echo -e "${YELLOW}📦 Installing dependencies (this may take 1-2 minutes)...${NC}" >&2
    if ! "$SCRIPT_DIR/.venv/bin/pip" install -r "$SCRIPT_DIR/requirements.txt" >/dev/null 2>&1; then
        return 1  # Dependency installation failed
    fi
@ -113,7 +112,6 @@ show_help() {
    echo -e "${BOLD}Main Commands:${NC}"
    echo "  rag-mini index <project_path>      # Index project for search"
    echo "  rag-mini search <project_path> <query>  # Search indexed project"
    echo "  rag-mini explore <project_path>    # Interactive exploration with AI"
    echo "  rag-mini status <project_path>     # Show project status"
    echo ""
    echo -e "${BOLD}Interfaces:${NC}"
@ -326,11 +324,11 @@ main() {
        "server")
            # Start server mode
            shift
-            exec "$PYTHON" "$SCRIPT_DIR/mini_rag/fast_server.py" "$@"
+            exec "$PYTHON" "$SCRIPT_DIR/claude_rag/server.py" "$@"
            ;;
-        "index"|"search"|"explore"|"status"|"update"|"check-update")
+        "index"|"search"|"status")
-            # Direct CLI commands - call Python script
+            # Direct CLI commands
-            exec "$PYTHON" "$SCRIPT_DIR/bin/rag-mini.py" "$@"
+            exec "$SCRIPT_DIR/rag-mini" "$@"
            ;;
        *)
            # Unknown command - show help
--- a/rag-mini.py
+++ b/rag-mini.py
@ -0,0 +1,406 @@
 #!/usr/bin/env python3
 """
 rag-mini - FSS-Mini-RAG Command Line Interface
 A lightweight, portable RAG system for semantic code search.
 Usage: rag-mini <command> <project_path> [options]
 """
 import sys
 import argparse
 from pathlib import Path
 import json
 import logging
 # Add the RAG system to the path
 sys.path.insert(0, str(Path(__file__).parent))
 from mini_rag.indexer import ProjectIndexer
 from mini_rag.search import CodeSearcher
 from mini_rag.ollama_embeddings import OllamaEmbedder
 from mini_rag.llm_synthesizer import LLMSynthesizer
 from mini_rag.explorer import CodeExplorer
 # Configure logging for user-friendly output
 logging.basicConfig(
    level=logging.WARNING,  # Only show warnings and errors by default
    format='%(levelname)s: %(message)s'
 )
 logger = logging.getLogger(__name__)
 def index_project(project_path: Path, force: bool = False):
    """Index a project directory."""
    try:
        # Show what's happening
        action = "Re-indexing" if force else "Indexing"
        print(f"🚀 {action} {project_path.name}")
        # Quick pre-check
        rag_dir = project_path / '.mini-rag'
        if rag_dir.exists() and not force:
            print("   Checking for changes...")
        indexer = ProjectIndexer(project_path)
        result = indexer.index_project(force_reindex=force)
        # Show results with context
        files_count = result.get('files_indexed', 0)
        chunks_count = result.get('chunks_created', 0)
        time_taken = result.get('time_taken', 0)
        if files_count == 0:
            print("✅ Index up to date - no changes detected")
        else:
            print(f"✅ Indexed {files_count} files in {time_taken:.1f}s")
            print(f"   Created {chunks_count} chunks")
            # Show efficiency
            if time_taken > 0:
                speed = files_count / time_taken
                print(f"   Speed: {speed:.1f} files/sec")
        # Show warnings if any
        failed_count = result.get('files_failed', 0)
        if failed_count > 0:
            print(f"⚠️  {failed_count} files failed (check logs with --verbose)")
        # Quick tip for first-time users
        if not (project_path / '.mini-rag' / 'last_search').exists():
            print(f"\n💡 Try: rag-mini search {project_path} \"your search here\"")
    except Exception as e:
        print(f"❌ Indexing failed: {e}")
        print()
        print("🔧 Common solutions:")
        print("   • Check if path exists and you have read permissions")
        print("   • Ensure Python dependencies are installed: pip install -r requirements.txt")
        print("   • Try with smaller project first to test setup")
        print("   • Check available disk space for index files")
        print()
        print("📚 For detailed help:")
        print(f"   ./rag-mini index {project_path} --verbose")
        print("   Or see: docs/TROUBLESHOOTING.md")
        sys.exit(1)
 def search_project(project_path: Path, query: str, limit: int = 10, synthesize: bool = False):
    """Search a project directory."""
    try:
        # Check if indexed first
        rag_dir = project_path / '.mini-rag'
        if not rag_dir.exists():
            print(f"❌ Project not indexed: {project_path.name}")
            print(f"   Run: rag-mini index {project_path}")
            sys.exit(1)
        print(f"🔍 Searching \"{query}\" in {project_path.name}")
        searcher = CodeSearcher(project_path)
        results = searcher.search(query, top_k=limit)
        if not results:
            print("❌ No results found")
            print()
            print("🔧 Quick fixes to try:")
            print("   • Use broader terms: \"login\" instead of \"authenticate_user_session\"")
            print("   • Try concepts: \"database query\" instead of specific function names")
            print("   • Check spelling and try simpler words")
            print("   • Search for file types: \"python class\" or \"javascript function\"")
            print()
            print("⚙️ Configuration adjustments:")
            print(f"   • Lower threshold: ./rag-mini search {project_path} \"{query}\" --threshold 0.05")
            print("   • More results: add --limit 20")
            print()
            print("📚 Need help? See: docs/TROUBLESHOOTING.md")
            return
        print(f"✅ Found {len(results)} results:")
        print()
        for i, result in enumerate(results, 1):
            # Clean up file path display
            file_path = Path(result.file_path)
            try:
                rel_path = file_path.relative_to(project_path)
            except ValueError:
                # If relative_to fails, just show the basename
                rel_path = file_path.name
            print(f"{i}. {rel_path}")
            print(f"   Score: {result.score:.3f}")
            # Show line info if available
            if hasattr(result, 'start_line') and result.start_line:
                print(f"   Lines: {result.start_line}-{result.end_line}")
            # Show content preview  
            if hasattr(result, 'name') and result.name:
                print(f"   Context: {result.name}")
            # Show full content with proper formatting
            print(f"   Content:")
            content_lines = result.content.strip().split('\n')
            for line in content_lines[:10]:  # Show up to 10 lines
                print(f"     {line}")
            if len(content_lines) > 10:
                print(f"     ... ({len(content_lines) - 10} more lines)")
                print(f"     Use --verbose or rag-mini-enhanced for full context")
            print()
        # LLM Synthesis if requested
        if synthesize:
            print("🧠 Generating LLM synthesis...")
            synthesizer = LLMSynthesizer()
            if synthesizer.is_available():
                synthesis = synthesizer.synthesize_search_results(query, results, project_path)
                print()
                print(synthesizer.format_synthesis_output(synthesis, query))
                # Add guidance for deeper analysis
                if synthesis.confidence < 0.7 or any(word in query.lower() for word in ['why', 'how', 'explain', 'debug']):
                    print("\n💡 Want deeper analysis with reasoning?")
                    print(f"   Try: rag-mini explore {project_path}")
                    print("   Exploration mode enables thinking and remembers conversation context.")
            else:
                print("❌ LLM synthesis unavailable")
                print("   • Ensure Ollama is running: ollama serve")
                print("   • Install a model: ollama pull llama3.2")
                print("   • Check connection to http://localhost:11434")
        # Save last search for potential enhancements
        try:
            (rag_dir / 'last_search').write_text(query)
        except:
            pass  # Don't fail if we can't save
    except Exception as e:
        print(f"❌ Search failed: {e}")
        print()
        if "not indexed" in str(e).lower():
            print("🔧 Solution:")
            print(f"   ./rag-mini index {project_path}")
            print()
        else:
            print("🔧 Common solutions:")
            print("   • Check project path exists and is readable")
            print("   • Verify index isn't corrupted: delete .mini-rag/ and re-index")
            print("   • Try with a different project to test setup")
            print("   • Check available memory and disk space")
            print()
            print("📚 Get detailed error info:")
            print(f"   ./rag-mini search {project_path} \"{query}\" --verbose")
            print("   Or see: docs/TROUBLESHOOTING.md")
            print()
        sys.exit(1)
 def status_check(project_path: Path):
    """Show status of RAG system."""
    try:
        print(f"📊 Status for {project_path.name}")
        print()
        # Check project indexing status first
        rag_dir = project_path / '.mini-rag'
        if not rag_dir.exists():
            print("❌ Project not indexed")
            print(f"   Run: rag-mini index {project_path}")
            print()
        else:
            manifest = rag_dir / 'manifest.json'
            if manifest.exists():
                try:
                    with open(manifest) as f:
                        data = json.load(f)
                    file_count = data.get('file_count', 0)
                    chunk_count = data.get('chunk_count', 0)
                    indexed_at = data.get('indexed_at', 'Never')
                    print("✅ Project indexed")
                    print(f"   Files: {file_count}")
                    print(f"   Chunks: {chunk_count}")
                    print(f"   Last update: {indexed_at}")
                    # Show average chunks per file
                    if file_count > 0:
                        avg_chunks = chunk_count / file_count
                        print(f"   Avg chunks/file: {avg_chunks:.1f}")
                    print()
                except Exception:
                    print("⚠️  Index exists but manifest unreadable")
                    print()
            else:
                print("⚠️  Index directory exists but incomplete")
                print(f"   Try: rag-mini index {project_path} --force")
                print()
        # Check embedding system status
        print("🧠 Embedding System:")
        try:
            embedder = OllamaEmbedder()
            emb_info = embedder.get_status()
            method = emb_info.get('method', 'unknown')
            if method == 'ollama':
                print("   ✅ Ollama (high quality)")
            elif method == 'ml':
                print("   ✅ ML fallback (good quality)")
            elif method == 'hash':
                print("   ⚠️  Hash fallback (basic quality)")
            else:
                print(f"   ❓ Unknown method: {method}")
            # Show additional details if available
            if 'model' in emb_info:
                print(f"   Model: {emb_info['model']}")
        except Exception as e:
            print(f"   ❌ Status check failed: {e}")
        # Show last search if available
        last_search_file = rag_dir / 'last_search' if rag_dir.exists() else None
        if last_search_file and last_search_file.exists():
            try:
                last_query = last_search_file.read_text().strip()
                print(f"\n🔍 Last search: \"{last_query}\"")
            except:
                pass
    except Exception as e:
        print(f"❌ Status check failed: {e}")
        sys.exit(1)
 def explore_interactive(project_path: Path):
    """Interactive exploration mode with thinking and context memory."""
    try:
        explorer = CodeExplorer(project_path)
        if not explorer.start_exploration_session():
            sys.exit(1)
        print("\n🤔 Ask your first question about the codebase:")
        while True:
            try:
                # Get user input
                question = input("\n> ").strip()
                # Handle exit commands
                if question.lower() in ['quit', 'exit', 'q']:
                    print("\n" + explorer.end_session())
                    break
                # Handle empty input
                if not question:
                    print("Please enter a question or 'quit' to exit.")
                    continue
                # Special commands
                if question.lower() in ['help', 'h']:
                    print("""
 🧠 EXPLORATION MODE HELP:
  • Ask any question about the codebase
  • I remember our conversation for follow-up questions
  • Use 'why', 'how', 'explain' for detailed reasoning
  • Type 'summary' to see session overview
  • Type 'quit' or 'exit' to end session
 💡 Example questions:
  • "How does authentication work?"
  • "Why is this function slow?"
  • "Explain the database connection logic"
  • "What are the security concerns here?"
 """)
                    continue
                if question.lower() == 'summary':
                    print("\n" + explorer.get_session_summary())
                    continue
                # Process the question
                print("\n🔍 Analyzing...")
                response = explorer.explore_question(question)
                if response:
                    print(f"\n{response}")
                else:
                    print("❌ Sorry, I couldn't process that question. Please try again.")
            except KeyboardInterrupt:
                print(f"\n\n{explorer.end_session()}")
                break
            except EOFError:
                print(f"\n\n{explorer.end_session()}")
                break
            except Exception as e:
                print(f"❌ Error processing question: {e}")
                print("Please try again or type 'quit' to exit.")
    except Exception as e:
        print(f"❌ Failed to start exploration mode: {e}")
        print("Make sure the project is indexed first: rag-mini index <project>")
        sys.exit(1)
 def main():
    """Main CLI interface."""
    parser = argparse.ArgumentParser(
        description="FSS-Mini-RAG - Lightweight semantic code search",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
 Examples:
  rag-mini index /path/to/project              # Index a project
  rag-mini search /path/to/project "query"     # Search indexed project  
  rag-mini search /path/to/project "query" -s  # Search with LLM synthesis
  rag-mini explore /path/to/project            # Interactive exploration mode
  rag-mini status /path/to/project             # Show status
        """
    )
    parser.add_argument('command', choices=['index', 'search', 'explore', 'status'],
                       help='Command to execute')
    parser.add_argument('project_path', type=Path,
                       help='Path to project directory (REQUIRED)')
    parser.add_argument('query', nargs='?',
                       help='Search query (for search command)')
    parser.add_argument('--force', action='store_true',
                       help='Force reindex all files')
    parser.add_argument('--limit', type=int, default=10,
                       help='Maximum number of search results')
    parser.add_argument('--verbose', '-v', action='store_true',
                       help='Enable verbose logging')
    parser.add_argument('--synthesize', '-s', action='store_true',
                       help='Generate LLM synthesis of search results (requires Ollama)')
    args = parser.parse_args()
    # Set logging level
    if args.verbose:
        logging.getLogger().setLevel(logging.INFO)
    # Validate project path
    if not args.project_path.exists():
        print(f"❌ Project path does not exist: {args.project_path}")
        sys.exit(1)
    if not args.project_path.is_dir():
        print(f"❌ Project path is not a directory: {args.project_path}")
        sys.exit(1)
    # Execute command
    if args.command == 'index':
        index_project(args.project_path, args.force)
    elif args.command == 'search':
        if not args.query:
            print("❌ Search query required")
            sys.exit(1)
        search_project(args.project_path, args.query, args.limit, args.synthesize)
    elif args.command == 'explore':
        explore_interactive(args.project_path)
    elif args.command == 'status':
        status_check(args.project_path)
 if __name__ == '__main__':
    main()
--- a/2
+++ b/2
@ -19,4 +19,4 @@ if [ ! -f "$PYTHON" ]; then
 fi
 # Launch TUI
-exec "$PYTHON" "$SCRIPT_DIR/bin/rag-tui.py" "$@"
+exec "$PYTHON" "$SCRIPT_DIR/rag-tui.py" "$@"
--- a/rag-tui.py
+++ b/rag-tui.py
@ -0,0 +1,872 @@
 #!/usr/bin/env python3
 """
 FSS-Mini-RAG Text User Interface
 Simple, educational TUI that shows CLI commands while providing easy interaction.
 """
 import os
 import sys
 import json
 from pathlib import Path
 from typing import Optional, List, Dict, Any
 # Simple TUI without external dependencies
 class SimpleTUI:
    def __init__(self):
        self.project_path: Optional[Path] = None
        self.current_config: Dict[str, Any] = {}
        self.search_count = 0  # Track searches for sample reminder
    def clear_screen(self):
        """Clear the terminal screen."""
        os.system('cls' if os.name == 'nt' else 'clear')
    def print_header(self):
        """Print the main header."""
        print("╔════════════════════════════════════════════════════╗")
        print("║              FSS-Mini-RAG TUI                      ║")
        print("║         Semantic Code Search Interface             ║")
        print("╚════════════════════════════════════════════════════╝")
        print()
    def print_cli_command(self, command: str, description: str = ""):
        """Show the equivalent CLI command."""
        print(f"💻 CLI equivalent: {command}")
        if description:
            print(f"   {description}")
        print()
    def get_input(self, prompt: str, default: str = "") -> str:
        """Get user input with optional default."""
        if default:
            full_prompt = f"{prompt} [{default}]: "
        else:
            full_prompt = f"{prompt}: "
        result = input(full_prompt).strip()
        return result if result else default
    def show_menu(self, title: str, options: List[str], show_cli: bool = True) -> int:
        """Show a menu and get user selection."""
        print(f"🎯 {title}")
        print("=" * (len(title) + 3))
        print()
        for i, option in enumerate(options, 1):
            print(f"{i}. {option}")
        if show_cli:
            print()
            print("💡 All these actions can be done via CLI commands")
            print("   You'll see the commands as you use this interface!")
        print()
        while True:
            try:
                choice = int(input("Select option (number): "))
                if 1 <= choice <= len(options):
                    return choice - 1
                else:
                    print(f"Please enter a number between 1 and {len(options)}")
            except ValueError:
                print("Please enter a valid number")
            except KeyboardInterrupt:
                print("\nGoodbye!")
                sys.exit(0)
    def select_project(self):
        """Select or create project directory."""
        self.clear_screen()
        self.print_header()
        print("📁 Project Selection")
        print("==================")
        print()
        # Show current project if any
        if self.project_path:
            print(f"Current project: {self.project_path}")
            print()
        options = [
            "Enter project path",
            "Use current directory",
            "Browse recent projects" if self.project_path else "Skip (will ask later)"
        ]
        choice = self.show_menu("Choose project directory", options, show_cli=False)
        if choice == 0:
            # Enter path manually
            while True:
                path_str = self.get_input("Enter project directory path", 
                                        str(self.project_path) if self.project_path else "")
                if not path_str:
                    continue
                project_path = Path(path_str).expanduser().resolve()
                if project_path.exists() and project_path.is_dir():
                    self.project_path = project_path
                    print(f"✅ Selected: {self.project_path}")
                    break
                else:
                    print(f"❌ Directory not found: {project_path}")
                    retry = input("Try again? (y/N): ").lower()
                    if retry != 'y':
                        break
        elif choice == 1:
            # Use current directory
            self.project_path = Path.cwd()
            print(f"✅ Using current directory: {self.project_path}")
        elif choice == 2:
            # Browse recent projects or skip
            if self.project_path:
                self.browse_recent_projects()
            else:
                print("No project selected - you can choose one later from the main menu")
        input("\nPress Enter to continue...")
    def browse_recent_projects(self):
        """Browse recently indexed projects."""
        print("🕒 Recent Projects")
        print("=================")
        print()
        # Look for .mini-rag directories in common locations
        search_paths = [
            Path.home(),
            Path.home() / "projects", 
            Path.home() / "code",
            Path.home() / "dev",
            Path.cwd().parent,
            Path.cwd()
        ]
        recent_projects = []
        for search_path in search_paths:
            if search_path.exists() and search_path.is_dir():
                try:
                    for item in search_path.iterdir():
                        if item.is_dir():
                            rag_dir = item / '.mini-rag'
                            if rag_dir.exists():
                                recent_projects.append(item)
                except (PermissionError, OSError):
                    continue
        # Remove duplicates and sort by modification time
        recent_projects = list(set(recent_projects))
        try:
            recent_projects.sort(key=lambda p: (p / '.mini-rag').stat().st_mtime, reverse=True)
        except:
            pass
        if not recent_projects:
            print("❌ No recently indexed projects found")
            print("   Projects with .mini-rag directories will appear here")
            return
        print("Found indexed projects:")
        for i, project in enumerate(recent_projects[:10], 1):  # Show up to 10
            try:
                manifest = project / '.mini-rag' / 'manifest.json'
                if manifest.exists():
                    with open(manifest) as f:
                        data = json.load(f)
                    file_count = data.get('file_count', 0)
                    indexed_at = data.get('indexed_at', 'Unknown')
                    print(f"{i}. {project.name} ({file_count} files, {indexed_at})")
                else:
                    print(f"{i}. {project.name} (incomplete index)")
            except:
                print(f"{i}. {project.name} (index status unknown)")
        print()
        try:
            choice = int(input("Select project number (or 0 to cancel): "))
            if 1 <= choice <= len(recent_projects):
                self.project_path = recent_projects[choice - 1]
                print(f"✅ Selected: {self.project_path}")
        except (ValueError, IndexError):
            print("Selection cancelled")
    def index_project_interactive(self):
        """Interactive project indexing."""
        if not self.project_path:
            print("❌ No project selected")
            input("Press Enter to continue...")
            return
        self.clear_screen()
        self.print_header()
        print("🚀 Project Indexing")
        print("==================")
        print()
        print(f"Project: {self.project_path}")
        print()
        # Check if already indexed
        rag_dir = self.project_path / '.mini-rag'
        if rag_dir.exists():
            print("⚠️  Project appears to be already indexed")
            print()
            force = input("Re-index everything? (y/N): ").lower() == 'y'
        else:
            force = False
        # Show CLI command
        cli_cmd = f"./rag-mini index {self.project_path}"
        if force:
            cli_cmd += " --force"
        self.print_cli_command(cli_cmd, "Index project for semantic search")
        print("Starting indexing...")
        print("=" * 50)
        # Actually run the indexing
        try:
            # Import here to avoid startup delays
            sys.path.insert(0, str(Path(__file__).parent))
            from mini_rag.indexer import ProjectIndexer
            indexer = ProjectIndexer(self.project_path)
            result = indexer.index_project(force_reindex=force)
            print()
            print("✅ Indexing completed!")
            print(f"   Files processed: {result.get('files_indexed', 0)}")
            print(f"   Chunks created: {result.get('chunks_created', 0)}")
            print(f"   Time taken: {result.get('time_taken', 0):.1f}s")
            if result.get('files_failed', 0) > 0:
                print(f"   ⚠️  Files failed: {result['files_failed']}")
        except Exception as e:
            print(f"❌ Indexing failed: {e}")
            print("   Try running the CLI command directly for more details")
        print()
        input("Press Enter to continue...")
    def search_interactive(self):
        """Interactive search interface."""
        if not self.project_path:
            print("❌ No project selected")
            input("Press Enter to continue...")
            return
        # Check if indexed
        rag_dir = self.project_path / '.mini-rag'
        if not rag_dir.exists():
            print(f"❌ Project not indexed: {self.project_path.name}")
            print("   Index the project first!")
            input("Press Enter to continue...")
            return
        self.clear_screen()
        self.print_header()
        print("🔍 Semantic Search")
        print("=================")
        print()
        print(f"Project: {self.project_path.name}")
        print()
        # Show sample questions for beginners - relevant to FSS-Mini-RAG
        print("💡 Not sure what to search for? Try these questions about FSS-Mini-RAG:")
        print()
        sample_questions = [
            "chunking strategy",
            "ollama integration", 
            "indexing performance",
            "why does indexing take long",
            "how to improve search results",
            "embedding generation"
        ]
        for i, question in enumerate(sample_questions[:3], 1):
            print(f"   {i}. {question}")
        print("   4. Enter your own question")
        print()
        # Let user choose a sample or enter their own
        choice_str = self.get_input("Choose a number (1-4) or press Enter for custom", "4")
        try:
            choice = int(choice_str)
            if 1 <= choice <= 3:
                query = sample_questions[choice - 1]
                print(f"Selected: '{query}'")
                print()
            else:
                query = self.get_input("Enter your search query", "").strip()
        except ValueError:
            query = self.get_input("Enter your search query", "").strip()
        if not query:
            return
        # Get result limit
        try:
            limit = int(self.get_input("Number of results", "10"))
            limit = max(1, min(20, limit))  # Clamp between 1-20
        except ValueError:
            limit = 10
        # Show CLI command
        cli_cmd = f"./rag-mini search {self.project_path} \"{query}\""
        if limit != 10:
            cli_cmd += f" --limit {limit}"
        self.print_cli_command(cli_cmd, "Search for semantic matches")
        print("Searching...")
        print("=" * 50)
        # Actually run the search
        try:
            sys.path.insert(0, str(Path(__file__).parent))
            from mini_rag.search import CodeSearcher
            searcher = CodeSearcher(self.project_path)
            # Enable query expansion in TUI for better results
            searcher.config.search.expand_queries = True
            results = searcher.search(query, top_k=limit)
            if not results:
                print("❌ No results found")
                print()
                print("💡 Try:")
                print("   • Broader search terms")
                print("   • Different keywords")
                print("   • Concepts instead of exact names")
            else:
                print(f"✅ Found {len(results)} results:")
                print()
                for i, result in enumerate(results, 1):
                    # Clean up file path
                    try:
                        rel_path = result.file_path.relative_to(self.project_path)
                    except:
                        rel_path = result.file_path
                    print(f"{i}. {rel_path}")
                    print(f"   Relevance: {result.score:.3f}")
                    # Show line information if available
                    if hasattr(result, 'start_line') and result.start_line:
                        print(f"   Lines: {result.start_line}-{result.end_line}")
                    # Show function/class context if available
                    if hasattr(result, 'name') and result.name:
                        print(f"   Context: {result.name}")
                    # Show full content with proper formatting
                    content_lines = result.content.strip().split('\n')
                    print(f"   Content:")
                    for line_num, line in enumerate(content_lines[:8], 1):  # Show up to 8 lines
                        print(f"     {line}")
                    if len(content_lines) > 8:
                        print(f"     ... ({len(content_lines) - 8} more lines)")
                    print()
                # Offer to view full results
                if len(results) > 1:
                    print("💡 To see more context or specific results:")
                    print(f"   Run: ./rag-mini search {self.project_path} \"{query}\" --verbose")
                # Suggest follow-up questions based on the search
                print()
                print("🔍 Suggested follow-up searches:")
                follow_up_questions = self.generate_follow_up_questions(query, results)
                for i, question in enumerate(follow_up_questions, 1):
                    print(f"   {i}. {question}")
                # Ask if they want to run a follow-up search
                print()
                choice = input("Run a follow-up search? Enter number (1-3) or press Enter to continue: ").strip()
                if choice.isdigit() and 1 <= int(choice) <= len(follow_up_questions):
                    # Recursive search with the follow-up question
                    follow_up_query = follow_up_questions[int(choice) - 1]
                    print(f"\nSearching for: '{follow_up_query}'")
                    print("=" * 50)
                    # Run another search
                    follow_results = searcher.search(follow_up_query, top_k=5)
                    if follow_results:
                        print(f"✅ Found {len(follow_results)} follow-up results:")
                        print()
                        for i, result in enumerate(follow_results[:3], 1):  # Show top 3
                            try:
                                rel_path = result.file_path.relative_to(self.project_path)
                            except:
                                rel_path = result.file_path
                            print(f"{i}. {rel_path} (Score: {result.score:.3f})")
                            print(f"   {result.content.strip()[:100]}...")
                            print()
                    else:
                        print("❌ No follow-up results found")
                # Track searches and show sample reminder
                self.search_count += 1
                # Show sample reminder after 2 searches
                if self.search_count >= 2 and self.project_path.name == '.sample_test':
                    print()
                    print("⚠️  Sample Limitation Notice")
                    print("=" * 30)
                    print("You've been searching a small sample project.")
                    print("For full exploration of your codebase, you need to index the complete project.")
                    print()
                    # Show timing estimate if available
                    try:
                        with open('/tmp/fss-rag-sample-time.txt', 'r') as f:
                            sample_time = int(f.read().strip())
                        # Rough estimate: multiply by file count ratio
                        estimated_time = sample_time * 20  # Rough multiplier
                        print(f"🕒 Estimated full indexing time: ~{estimated_time} seconds")
                    except:
                        print("🕒 Estimated full indexing time: 1-3 minutes for typical projects")
                    print()
                    choice = input("Index the full project now? [y/N]: ").strip().lower()
                    if choice == 'y':
                        # Switch to full project and index
                        parent_dir = self.project_path.parent
                        self.project_path = parent_dir
                        print(f"\nSwitching to full project: {parent_dir}")
                        print("Starting full indexing...")
                        # Note: This would trigger full indexing in real implementation
                    print(f"   Or: ./rag-mini-enhanced context {self.project_path} \"{query}\"")
                    print()
        except Exception as e:
            print(f"❌ Search failed: {e}")
            print("   Try running the CLI command directly for more details")
        print()
        input("Press Enter to continue...")
    def generate_follow_up_questions(self, original_query: str, results) -> List[str]:
        """Generate contextual follow-up questions based on search results."""
        # Simple pattern-based follow-up generation
        follow_ups = []
        # Based on original query patterns
        query_lower = original_query.lower()
        # FSS-Mini-RAG specific follow-ups
        if "chunk" in query_lower:
            follow_ups.extend(["chunk size optimization", "smart chunking boundaries", "chunk overlap strategies"])
        elif "ollama" in query_lower:
            follow_ups.extend(["embedding model comparison", "ollama server setup", "nomic-embed-text performance"])
        elif "index" in query_lower or "performance" in query_lower:
            follow_ups.extend(["indexing speed optimization", "memory usage during indexing", "file processing pipeline"])
        elif "search" in query_lower or "result" in query_lower:
            follow_ups.extend(["search result ranking", "semantic vs keyword search", "query expansion techniques"])
        elif "embed" in query_lower:
            follow_ups.extend(["vector embedding storage", "embedding model fallbacks", "similarity scoring"])
        else:
            # Generic RAG-related follow-ups
            follow_ups.extend(["vector database internals", "search quality tuning", "embedding optimization"])
        # Based on file types found in results (FSS-Mini-RAG specific)
        if results:
            file_extensions = set()
            for result in results[:3]:  # Check first 3 results
                ext = result.file_path.suffix.lower()
                file_extensions.add(ext)
            if '.py' in file_extensions:
                follow_ups.append("Python module dependencies")
            if '.md' in file_extensions:
                follow_ups.append("documentation implementation")
            if 'chunker' in str(results[0].file_path).lower():
                follow_ups.append("chunking algorithm details")
            if 'search' in str(results[0].file_path).lower():
                follow_ups.append("search algorithm implementation")
        # Return top 3 unique follow-ups
        return list(dict.fromkeys(follow_ups))[:3]
    def explore_interactive(self):
        """Interactive exploration interface with thinking mode."""
        if not self.project_path:
            print("❌ No project selected")
            input("Press Enter to continue...")
            return
        # Check if indexed
        rag_dir = self.project_path / '.mini-rag'
        if not rag_dir.exists():
            print(f"❌ Project not indexed: {self.project_path.name}")
            print("   Index the project first!")
            input("Press Enter to continue...")
            return
        self.clear_screen()
        self.print_header()
        print("🧠 Interactive Exploration Mode")
        print("==============================")
        print()
        print(f"Project: {self.project_path.name}")
        print()
        print("💡 This mode enables:")
        print("   • Thinking-enabled LLM for detailed reasoning")
        print("   • Conversation memory across questions") 
        print("   • Perfect for learning and debugging")
        print()
        # Show CLI command
        cli_cmd = f"./rag-mini explore {self.project_path}"
        self.print_cli_command(cli_cmd, "Start interactive exploration session")
        print("Starting exploration mode...")
        print("=" * 50)
        # Launch exploration mode
        try:
            sys.path.insert(0, str(Path(__file__).parent))
            from mini_rag.explorer import CodeExplorer
            explorer = CodeExplorer(self.project_path)
            if not explorer.start_exploration_session():
                print("❌ Could not start exploration mode")
                print("   Make sure Ollama is running with a model installed")
                input("Press Enter to continue...")
                return
            print("\n🤔 Ask your first question about the codebase:")
            print("   (Type 'help' for commands, 'quit' to return to menu)")
            while True:
                try:
                    question = input("\n> ").strip()
                    if question.lower() in ['quit', 'exit', 'q', 'back']:
                        print("\n" + explorer.end_session())
                        break
                    if not question:
                        continue
                    if question.lower() in ['help', 'h']:
                        print("""
 🧠 EXPLORATION MODE HELP:
  • Ask any question about the codebase
  • I remember our conversation for follow-up questions  
  • Use 'why', 'how', 'explain' for detailed reasoning
  • Type 'summary' to see session overview
  • Type 'quit' to return to main menu
 💡 Example questions:
  • "How does authentication work?"
  • "Why is this function slow?"
  • "Explain the database connection logic"
  • "What are the security concerns here?"
 """)
                        continue
                    if question.lower() == 'summary':
                        print("\n" + explorer.get_session_summary())
                        continue
                    print("\n🔍 Analyzing...")
                    response = explorer.explore_question(question)
                    if response:
                        print(f"\n{response}")
                    else:
                        print("❌ Sorry, I couldn't process that question. Please try again.")
                except KeyboardInterrupt:
                    print(f"\n\n{explorer.end_session()}")
                    break
                except EOFError:
                    print(f"\n\n{explorer.end_session()}")
                    break
        except Exception as e:
            print(f"❌ Exploration mode failed: {e}")
            print("   Try running the CLI command directly for more details")
        input("\nPress Enter to continue...")
    def show_status(self):
        """Show project and system status."""
        self.clear_screen()
        self.print_header()
        print("📊 System Status")
        print("===============")
        print()
        if self.project_path:
            cli_cmd = f"./rag-mini status {self.project_path}"
            self.print_cli_command(cli_cmd, "Show detailed status information")
            # Check project status
            rag_dir = self.project_path / '.mini-rag'
            if rag_dir.exists():
                try:
                    manifest = rag_dir / 'manifest.json'
                    if manifest.exists():
                        with open(manifest) as f:
                            data = json.load(f)
                        print(f"Project: {self.project_path.name}")
                        print("✅ Indexed")
                        print(f"   Files: {data.get('file_count', 0)}")
                        print(f"   Chunks: {data.get('chunk_count', 0)}")
                        print(f"   Last update: {data.get('indexed_at', 'Unknown')}")
                    else:
                        print("⚠️  Index incomplete")
                except Exception as e:
                    print(f"❌ Could not read status: {e}")
            else:
                print(f"Project: {self.project_path.name}")
                print("❌ Not indexed")
        else:
            print("❌ No project selected")
        print()
        # Show embedding system status
        try:
            sys.path.insert(0, str(Path(__file__).parent))
            from mini_rag.ollama_embeddings import OllamaEmbedder
            embedder = OllamaEmbedder()
            info = embedder.get_status()
            print("🧠 Embedding System:")
            method = info.get('method', 'unknown')
            if method == 'ollama':
                print("   ✅ Ollama (high quality)")
            elif method == 'ml':
                print("   ✅ ML fallback (good quality)")
            elif method == 'hash':
                print("   ⚠️  Hash fallback (basic quality)")
            else:
                print(f"   ❓ Unknown: {method}")
        except Exception as e:
            print(f"🧠 Embedding System: ❌ Error: {e}")
        print()
        input("Press Enter to continue...")
    def show_configuration(self):
        """Show and manage configuration options."""
        if not self.project_path:
            print("❌ No project selected")
            input("Press Enter to continue...")
            return
        self.clear_screen()
        self.print_header()
        print("⚙️  Configuration")
        print("================")
        print()
        print(f"Project: {self.project_path.name}")
        print()
        config_path = self.project_path / '.mini-rag' / 'config.yaml'
        # Show current configuration if it exists
        if config_path.exists():
            print("✅ Configuration file exists")
            print(f"   Location: {config_path}")
            print()
            try:
                import yaml
                with open(config_path) as f:
                    config = yaml.safe_load(f)
                print("📋 Current Settings:")
                if 'chunking' in config:
                    chunk_cfg = config['chunking']
                    print(f"   Chunk size: {chunk_cfg.get('max_size', 2000)} characters")
                    print(f"   Strategy: {chunk_cfg.get('strategy', 'semantic')}")
                if 'embedding' in config:
                    emb_cfg = config['embedding']
                    print(f"   Embedding method: {emb_cfg.get('preferred_method', 'auto')}")
                if 'files' in config:
                    files_cfg = config['files']
                    print(f"   Min file size: {files_cfg.get('min_file_size', 50)} bytes")
                    exclude_count = len(files_cfg.get('exclude_patterns', []))
                    print(f"   Excluded patterns: {exclude_count} patterns")
                print()
            except Exception as e:
                print(f"⚠️  Could not read config: {e}")
                print()
        else:
            print("⚠️  No configuration file found")
            print("   A default config will be created when you index")
            print()
        # Show CLI commands for configuration
        self.print_cli_command(f"cat {config_path}", 
                              "View current configuration")
        self.print_cli_command(f"nano {config_path}", 
                              "Edit configuration file")
        print("🛠️  Configuration Options:")
        print("   • chunking.max_size - How large each searchable chunk is")
        print("   • chunking.strategy - 'semantic' (smart) vs 'fixed' (simple)")
        print("   • files.exclude_patterns - Skip files matching these patterns")
        print("   • embedding.preferred_method - 'ollama', 'ml', 'hash', or 'auto'")
        print("   • search.default_limit - Default number of search results")
        print()
        print("📚 References:")
        print("   • README.md - Complete configuration documentation")
        print("   • examples/config.yaml - Example with all options")
        print("   • docs/TUI_GUIDE.md - Detailed TUI walkthrough")
        print()
        # Quick actions
        if config_path.exists():
            action = input("Quick actions: [V]iew config, [E]dit path, or Enter to continue: ").lower()
            if action == 'v':
                print("\n" + "="*60)
                try:
                    with open(config_path) as f:
                        print(f.read())
                except Exception as e:
                    print(f"Could not read file: {e}")
                print("="*60)
                input("\nPress Enter to continue...")
            elif action == 'e':
                print(f"\n💡 To edit configuration:")
                print(f"   nano {config_path}")
                print(f"   # Or use your preferred editor")
                input("\nPress Enter to continue...")
        else:
            input("Press Enter to continue...")
    def show_cli_reference(self):
        """Show CLI command reference."""
        self.clear_screen()
        self.print_header()
        print("💻 CLI Command Reference")
        print("=======================")
        print()
        print("All TUI actions can be done via command line:")
        print()
        print("🚀 Basic Commands:")
        print("   ./rag-mini index <project_path>         # Index project")
        print("   ./rag-mini search <project_path> <query> --synthesize  # Fast synthesis")
        print("   ./rag-mini explore <project_path>       # Interactive thinking mode")
        print("   ./rag-mini status <project_path>        # Show status")
        print()
        print("🎯 Enhanced Commands:")
        print("   ./rag-mini-enhanced search <project_path> <query>  # Smart search")
        print("   ./rag-mini-enhanced similar <project_path> <query> # Find patterns")
        print("   ./rag-mini-enhanced analyze <project_path>         # Optimization")
        print()
        print("🛠️  Quick Scripts:")
        print("   ./run_mini_rag.sh index <project_path>     # Simple indexing")
        print("   ./run_mini_rag.sh search <project_path> <query>  # Simple search")
        print()
        print("⚙️  Options:")
        print("   --force                    # Force complete re-index")
        print("   --limit N                  # Limit search results")
        print("   --verbose                  # Show detailed output")
        print()
        print("💡 Pro tip: Start with the TUI, then try the CLI commands!")
        print("   The CLI is more powerful and faster for repeated tasks.")
        print()
        input("Press Enter to continue...")
    def main_menu(self):
        """Main application loop."""
        while True:
            self.clear_screen()
            self.print_header()
            # Show current project status
            if self.project_path:
                rag_dir = self.project_path / '.mini-rag'
                status = "✅ Indexed" if rag_dir.exists() else "❌ Not indexed"
                print(f"📁 Current project: {self.project_path.name} ({status})")
                print()
            else:
                # Show beginner tips when no project selected
                print("🎯 Welcome to FSS-Mini-RAG!")
                print("   Search through code, documents, emails, notes - anything text-based!")
                print("   Start by selecting a project directory below.")
                print()
            options = [
                "Select project directory",
                "Index project for search",
                "Search project (Fast synthesis)",
                "Explore project (Deep thinking)",
                "View status",
                "Configuration",
                "CLI command reference",
                "Exit"
            ]
            choice = self.show_menu("Main Menu", options)
            if choice == 0:
                self.select_project()
            elif choice == 1:
                self.index_project_interactive()
            elif choice == 2:
                self.search_interactive()
            elif choice == 3:
                self.explore_interactive()
            elif choice == 4:
                self.show_status()
            elif choice == 5:
                self.show_configuration()
            elif choice == 6:
                self.show_cli_reference()
            elif choice == 7:
                print("\nThanks for using FSS-Mini-RAG! 🚀")
                print("Try the CLI commands for even more power!")
                break
 def main():
    """Main entry point."""
    try:
        tui = SimpleTUI()
        tui.main_menu()
    except KeyboardInterrupt:
        print("\n\nGoodbye! 👋")
    except Exception as e:
        print(f"\nUnexpected error: {e}")
        print("Try running the CLI commands directly if this continues.")
 if __name__ == "__main__":
    main()
--- a/rag.bat
+++ b/rag.bat
@ -1,51 +0,0 @@
@echo off
 REM FSS-Mini-RAG Windows Launcher - Simple and Reliable
 setlocal
 set "SCRIPT_DIR=%~dp0"
 set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%"
 set "VENV_PYTHON=%SCRIPT_DIR%\.venv\Scripts\python.exe"
 REM Check if virtual environment exists
 if not exist "%VENV_PYTHON%" (
    echo Virtual environment not found!
    echo.
    echo Run this first: install_windows.bat
    echo.
    pause
    exit /b 1
 )
 REM Route commands
 if "%1"=="" goto :interactive
 if "%1"=="help" goto :help
 if "%1"=="--help" goto :help
 if "%1"=="-h" goto :help
 REM Pass all arguments to Python script
 "%VENV_PYTHON%" "%SCRIPT_DIR%\rag-mini.py" %*
 goto :end
 :interactive
 echo Starting interactive interface...
 "%VENV_PYTHON%" "%SCRIPT_DIR%\rag-tui.py"
 goto :end
 :help
 echo FSS-Mini-RAG - Semantic Code Search
 echo.
 echo Usage:
 echo   rag.bat                           - Interactive interface
 echo   rag.bat index ^<folder^>             - Index a project
 echo   rag.bat search ^<folder^> ^<query^>     - Search project
 echo   rag.bat status ^<folder^>            - Check status
 echo.
 echo Examples:
 echo   rag.bat index C:\myproject
 echo   rag.bat search C:\myproject "authentication"
 echo   rag.bat search . "error handling"
 echo.
 pause
 :end
 endlocal
--- a/requirements.txt
+++ b/requirements.txt
@ -1,12 +1,22 @@
-# Lightweight Mini RAG - Simplified versions
+# Lightweight Mini RAG - Ollama Edition
-lancedb
+# Removed: torch, transformers, sentence-transformers (5.2GB+ saved)
-pandas
+
-numpy
+# Core vector database and data handling
-pyarrow
+lancedb>=0.5.0
-watchdog
+pandas>=2.0.0
-requests
+numpy>=1.24.0
-click
+pyarrow>=14.0.0
-rich
+
-PyYAML
+# File monitoring and system utilities
-rank-bm25
+watchdog>=3.0.0
-psutil
+requests>=2.28.0
 # CLI interface and output
 click>=8.1.0
 rich>=13.0.0
 # Configuration management
 PyYAML>=6.0.0
 # Text search utilities (lightweight)
 rank-bm25>=0.2.2
--- a/scripts/analyze_github_actions.py
+++ b/scripts/analyze_github_actions.py
@ -1,229 +0,0 @@
 #!/usr/bin/env python3
 """
 Analyze the GitHub Actions workflow for potential issues and improvements.
 """
 import yaml
 from pathlib import Path
 def analyze_workflow():
    """Analyze the GitHub Actions workflow file."""
    print("🔍 GitHub Actions Workflow Analysis")
    print("=" * 50)
    workflow_file = Path(__file__).parent.parent / ".github/workflows/build-and-release.yml"
    if not workflow_file.exists():
        print("❌ Workflow file not found")
        return False
    try:
        with open(workflow_file, 'r') as f:
            workflow = yaml.safe_load(f)
    except Exception as e:
        print(f"❌ Failed to parse YAML: {e}")
        return False
    print("✅ Workflow YAML is valid")
    # Analyze workflow structure
    print("\n📋 Workflow Structure Analysis:")
    # Check triggers
    triggers = workflow.get('on', {})
    print(f"   Triggers: {list(triggers.keys())}")
    if 'push' in triggers:
        push_config = triggers['push']
        if 'tags' in push_config:
            print(f"   ✅ Tag triggers: {push_config['tags']}")
        if 'branches' in push_config:
            print(f"   ✅ Branch triggers: {push_config['branches']}")
    if 'workflow_dispatch' in triggers:
        print("   ✅ Manual trigger enabled")
    # Analyze jobs
    jobs = workflow.get('jobs', {})
    print(f"\n🛠️  Jobs ({len(jobs)}):")
    for job_name, job_config in jobs.items():
        print(f"   📋 {job_name}:")
        # Check dependencies
        needs = job_config.get('needs', [])
        if needs:
            if isinstance(needs, list):
                print(f"      Dependencies: {', '.join(needs)}")
            else:
                print(f"      Dependencies: {needs}")
        # Check conditions
        if 'if' in job_config:
            print(f"      Condition: {job_config['if']}")
        # Check matrix
        strategy = job_config.get('strategy', {})
        if 'matrix' in strategy:
            matrix = strategy['matrix']
            for key, values in matrix.items():
                print(f"      Matrix {key}: {values}")
    return True
 def check_potential_issues():
    """Check for potential issues in the workflow."""
    print("\n🔍 Potential Issues Analysis:")
    issues = []
    warnings = []
    workflow_file = Path(__file__).parent.parent / ".github/workflows/build-and-release.yml"
    content = workflow_file.read_text()
    # Check for common issues
    if 'PYPI_API_TOKEN' in content:
        if 'secrets.PYPI_API_TOKEN' not in content:
            issues.append("PyPI token referenced but not as secret")
        else:
            print("   ✅ PyPI token properly referenced as secret")
    if 'upload-artifact@v3' in content:
        warnings.append("Using upload-artifact@v3 - consider upgrading to v4")
    if 'setup-python@v4' in content:
        warnings.append("Using setup-python@v4 - consider upgrading to v5")
    if 'actions/checkout@v4' in content:
        print("   ✅ Using recent checkout action version")
    # Check cibuildwheel configuration
    if 'cibuildwheel@v2.16' in content:
        warnings.append("cibuildwheel version might be outdated - check for latest")
    if 'CIBW_TEST_COMMAND: "rag-mini --help"' in content:
        print("   ✅ Wheel testing configured")
    # Check for environment setup
    if 'environment: release' in content:
        print("   ✅ Release environment configured for security")
    # Check matrix strategy
    if 'ubuntu-latest, windows-latest, macos-13, macos-14' in content:
        print("   ✅ Good OS matrix coverage")
    if 'python-version: [\'3.8\', \'3.11\', \'3.12\']' in content:
        print("   ✅ Good Python version coverage")
    # Output results
    if issues:
        print(f"\n❌ Critical Issues ({len(issues)}):")
        for issue in issues:
            print(f"   • {issue}")
    if warnings:
        print(f"\n⚠️  Warnings ({len(warnings)}):")
        for warning in warnings:
            print(f"   • {warning}")
    if not issues and not warnings:
        print("\n✅ No critical issues or warnings found")
    return len(issues) == 0
 def check_secrets_requirements():
    """Check what secrets are required."""
    print("\n🔐 Required Secrets Analysis:")
    print("   Required GitHub Secrets:")
    print("   ✅ GITHUB_TOKEN (automatically provided)")
    print("   ⚠️  PYPI_API_TOKEN (needs manual setup)")
    print("\n   Setup Instructions:")
    print("   1. Go to PyPI.org → Account Settings → API Tokens")
    print("   2. Create token with 'Entire account' scope")
    print("   3. Go to GitHub repo → Settings → Secrets → Actions")
    print("   4. Add secret named 'PYPI_API_TOKEN' with the token value")
    print("\n   Optional Setup:")
    print("   • TestPyPI token for testing (TESTPYPI_API_TOKEN)")
    print("   • Release environment protection rules")
 def check_file_paths():
    """Check if referenced files exist."""
    print("\n📁 File References Check:")
    project_root = Path(__file__).parent.parent
    files_to_check = [
        ("requirements.txt", "Dependencies file"),
        ("scripts/build_pyz.py", "Zipapp build script"),
        ("pyproject.toml", "Package configuration"),
    ]
    all_exist = True
    for file_path, description in files_to_check:
        full_path = project_root / file_path
        if full_path.exists():
            print(f"   ✅ {description}: {file_path}")
        else:
            print(f"   ❌ Missing {description}: {file_path}")
            all_exist = False
    return all_exist
 def estimate_ci_costs():
    """Estimate CI costs and runtime."""
    print("\n💰 CI Cost & Runtime Estimation:")
    print("   Job Matrix:")
    print("   • build-wheels: 4 OS × ~20 min = 80 minutes")
    print("   • build-zipapp: 1 job × ~10 min = 10 minutes")
    print("   • test-installation: 7 combinations × ~5 min = 35 minutes")
    print("   • publish: 1 job × ~2 min = 2 minutes")
    print("   • create-release: 1 job × ~2 min = 2 minutes")
    print("\n   Total estimated runtime: ~45-60 minutes per release")
    print("   GitHub Actions free tier: 2000 minutes/month")
    print("   Estimated releases per month with free tier: ~30-40")
    print("\n   Optimization suggestions:")
    print("   • Cache dependencies to reduce build time")
    print("   • Run tests only on main Python versions")
    print("   • Use conditional jobs for PR vs release builds")
 def main():
    """Run all analyses."""
    success = True
    if not analyze_workflow():
        success = False
    if not check_potential_issues():
        success = False
    check_secrets_requirements()
    if not check_file_paths():
        success = False
    estimate_ci_costs()
    print(f"\n{'='*50}")
    if success:
        print("🎉 GitHub Actions workflow looks good!")
        print("✅ Ready for production use")
        print("\n📋 Next steps:")
        print("   1. Set up PYPI_API_TOKEN secret in GitHub")
        print("   2. Test with a release tag: git tag v2.1.0-test && git push origin v2.1.0-test")
        print("   3. Monitor the workflow execution")
        print("   4. Verify artifacts are created correctly")
    else:
        print("❌ Issues found - fix before using")
    return success
 if __name__ == "__main__":
    import sys
    success = main()
    sys.exit(0 if success else 1)
--- a/scripts/build_pyz.py
+++ b/scripts/build_pyz.py
@ -1,109 +0,0 @@
 #!/usr/bin/env python3
 """
 Build script for creating a single-file Python zipapp (.pyz) distribution.
 This creates a portable rag-mini.pyz that can be run with any Python 3.8+.
 """
 import os
 import shutil
 import subprocess
 import sys
 import tempfile
 import zipapp
 from pathlib import Path
 def main():
    """Build the .pyz file."""
    project_root = Path(__file__).parent.parent
    build_dir = project_root / "dist"
    pyz_file = build_dir / "rag-mini.pyz"
    print(f"🔨 Building FSS-Mini-RAG zipapp...")
    print(f"   Project root: {project_root}")
    print(f"   Output: {pyz_file}")
    # Ensure dist directory exists
    build_dir.mkdir(exist_ok=True)
    # Create temporary directory for building
    with tempfile.TemporaryDirectory() as temp_dir:
        temp_path = Path(temp_dir)
        app_dir = temp_path / "app"
        print(f"📦 Preparing files in {app_dir}...")
        # Copy source code
        src_dir = project_root / "mini_rag"
        if not src_dir.exists():
            print(f"❌ Source directory not found: {src_dir}")
            sys.exit(1)
        shutil.copytree(src_dir, app_dir / "mini_rag")
        # Install dependencies to the temp directory
        print("📥 Installing dependencies...")
        try:
            subprocess.run([
                sys.executable, "-m", "pip", "install", 
                "-t", str(app_dir),
                "-r", str(project_root / "requirements.txt")
            ], check=True, capture_output=True)
            print("   ✅ Dependencies installed")
        except subprocess.CalledProcessError as e:
            print(f"   ❌ Failed to install dependencies: {e}")
            print(f"   stderr: {e.stderr.decode()}")
            sys.exit(1)
        # Create __main__.py entry point
        main_py = app_dir / "__main__.py"
        main_py.write_text("""#!/usr/bin/env python3
 # Entry point for rag-mini zipapp
 import sys
 from mini_rag.cli import cli
 if __name__ == "__main__":
    sys.exit(cli())
 """)
        print("🗜️  Creating zipapp...")
        # Remove existing pyz file if it exists
        if pyz_file.exists():
            pyz_file.unlink()
        # Create the zipapp
        try:
            zipapp.create_archive(
                source=app_dir,
                target=pyz_file,
                interpreter="/usr/bin/env python3",
                compressed=True
            )
            print(f"✅ Successfully created {pyz_file}")
            # Show file size
            size_mb = pyz_file.stat().st_size / (1024 * 1024)
            print(f"   📊 Size: {size_mb:.1f} MB")
            # Make executable
            pyz_file.chmod(0o755)
            print(f"   🔧 Made executable")
            print(f"""
 🎉 Build complete! 
 Usage:
  python {pyz_file} --help
  python {pyz_file} init
  python {pyz_file} search "your query"
 Or make it directly executable (Unix/Linux/macOS):
  {pyz_file} --help
 """)
        except Exception as e:
            print(f"❌ Failed to create zipapp: {e}")
            sys.exit(1)
 if __name__ == "__main__":
    main()
--- a/scripts/final_pre_push_validation.py
+++ b/scripts/final_pre_push_validation.py
@ -1,303 +0,0 @@
 #!/usr/bin/env python3
 """
 Final validation before pushing to GitHub.
 Ensures all critical components are working and ready for production.
 """
 import os
 import subprocess
 import sys
 from pathlib import Path
 def check_critical_files():
    """Check that all critical files exist and are valid."""
    print("1. Checking critical files...")
    project_root = Path(__file__).parent.parent
    critical_files = [
        # Core distribution files
        ("pyproject.toml", "Enhanced package metadata"),
        ("install.sh", "Linux/macOS install script"),
        ("install.ps1", "Windows install script"), 
        ("Makefile", "Build automation"),
        # GitHub Actions
        (".github/workflows/build-and-release.yml", "CI/CD workflow"),
        # Build scripts
        ("scripts/build_pyz.py", "Zipapp builder"),
        # Documentation
        ("README.md", "Updated documentation"),
        ("docs/TESTING_PLAN.md", "Testing plan"),
        ("docs/DEPLOYMENT_ROADMAP.md", "Deployment roadmap"),
        ("TESTING_RESULTS.md", "Test results"),
        ("IMPLEMENTATION_COMPLETE.md", "Implementation summary"),
        # Testing scripts
        ("scripts/validate_setup.py", "Setup validator"),
        ("scripts/phase1_basic_tests.py", "Basic tests"),
        ("scripts/phase1_local_validation.py", "Local validation"),
        ("scripts/phase2_build_tests.py", "Build tests"),
        ("scripts/final_pre_push_validation.py", "This script"),
    ]
    missing_files = []
    for file_path, description in critical_files:
        full_path = project_root / file_path
        if full_path.exists():
            print(f"   ✅ {description}")
        else:
            print(f"   ❌ Missing: {description} ({file_path})")
            missing_files.append(file_path)
    return len(missing_files) == 0
 def check_pyproject_toml():
    """Check pyproject.toml has required elements."""
    print("2. Validating pyproject.toml...")
    project_root = Path(__file__).parent.parent
    pyproject_file = project_root / "pyproject.toml"
    if not pyproject_file.exists():
        print("   ❌ pyproject.toml missing")
        return False
    content = pyproject_file.read_text()
    required_elements = [
        ('name = "fss-mini-rag"', "Package name"),
        ('rag-mini = "mini_rag.cli:cli"', "Console script"),
        ('requires-python = ">=3.8"', "Python version"),
        ('Brett Fox', "Author"),
        ('MIT', "License"),
        ('[build-system]', "Build system"),
        ('[project.urls]', "Project URLs"),
    ]
    all_good = True
    for element, description in required_elements:
        if element in content:
            print(f"   ✅ {description}")
        else:
            print(f"   ❌ Missing: {description}")
            all_good = False
    return all_good
 def check_install_scripts():
    """Check install scripts are syntactically valid."""
    print("3. Validating install scripts...")
    project_root = Path(__file__).parent.parent
    # Check bash script
    install_sh = project_root / "install.sh"
    if install_sh.exists():
        try:
            result = subprocess.run(
                ["bash", "-n", str(install_sh)],
                capture_output=True, text=True
            )
            if result.returncode == 0:
                print("   ✅ install.sh syntax valid")
            else:
                print(f"   ❌ install.sh syntax error: {result.stderr}")
                return False
        except Exception as e:
            print(f"   ❌ Error checking install.sh: {e}")
            return False
    else:
        print("   ❌ install.sh missing")
        return False
    # Check PowerShell script exists and has key functions
    install_ps1 = project_root / "install.ps1"
    if install_ps1.exists():
        content = install_ps1.read_text()
        if "Install-UV" in content and "Install-WithPipx" in content:
            print("   ✅ install.ps1 structure valid")
        else:
            print("   ❌ install.ps1 missing key functions")
            return False
    else:
        print("   ❌ install.ps1 missing")
        return False
    return True
 def check_readme_updates():
    """Check README has the new installation section."""
    print("4. Validating README updates...")
    project_root = Path(__file__).parent.parent
    readme_file = project_root / "README.md"
    if not readme_file.exists():
        print("   ❌ README.md missing")
        return False
    content = readme_file.read_text()
    required_sections = [
        ("One-Line Installers", "New installation section"),
        ("curl -fsSL", "Linux/macOS installer"),
        ("iwr", "Windows installer"),
        ("uv tool install", "uv installation method"),
        ("pipx install", "pipx installation method"),
        ("fss-mini-rag", "Correct package name"),
    ]
    all_good = True
    for section, description in required_sections:
        if section in content:
            print(f"   ✅ {description}")
        else:
            print(f"   ❌ Missing: {description}")
            all_good = False
    return all_good
 def check_git_status():
    """Check git status and what will be committed."""
    print("5. Checking git status...")
    try:
        # Check git status
        result = subprocess.run(
            ["git", "status", "--porcelain"],
            capture_output=True, text=True
        )
        if result.returncode == 0:
            changes = result.stdout.strip().split('\n') if result.stdout.strip() else []
            if changes:
                print(f"   📋 Found {len(changes)} changes to commit:")
                for change in changes[:10]:  # Show first 10
                    print(f"      {change}")
                if len(changes) > 10:
                    print(f"      ... and {len(changes) - 10} more")
            else:
                print("   ✅ No changes to commit")
            return True
        else:
            print(f"   ❌ Git status failed: {result.stderr}")
            return False
    except Exception as e:
        print(f"   ❌ Error checking git status: {e}")
        return False
 def check_branch_status():
    """Check current branch."""
    print("6. Checking git branch...")
    try:
        result = subprocess.run(
            ["git", "branch", "--show-current"],
            capture_output=True, text=True
        )
        if result.returncode == 0:
            branch = result.stdout.strip()
            print(f"   ✅ Current branch: {branch}")
            return True
        else:
            print(f"   ❌ Failed to get branch: {result.stderr}")
            return False
    except Exception as e:
        print(f"   ❌ Error checking branch: {e}")
        return False
 def check_no_large_files():
    """Check for unexpectedly large files."""
    print("7. Checking for large files...")
    project_root = Path(__file__).parent.parent
    large_files = []
    for file_path in project_root.rglob("*"):
        if file_path.is_file():
            try:
                size_mb = file_path.stat().st_size / (1024 * 1024)
                if size_mb > 50:  # Files larger than 50MB
                    large_files.append((file_path, size_mb))
            except (OSError, PermissionError):
                pass  # Skip files we can't read
    if large_files:
        print("   ⚠️  Found large files:")
        for file_path, size_mb in large_files:
            rel_path = file_path.relative_to(project_root)
            print(f"      {rel_path}: {size_mb:.1f} MB")
        # Check if any are unexpectedly large (excluding known large files and gitignored paths)
        expected_large = ["dist/rag-mini.pyz"]  # Known large files
        gitignored_paths = [".venv/", "venv/", "test_environments/"]  # Gitignored directories
        unexpected = [f for f, s in large_files 
                     if not any(expected in str(f) for expected in expected_large)
                     and not any(ignored in str(f) for ignored in gitignored_paths)]
        if unexpected:
            print("   ❌ Unexpected large files found")
            return False
        else:
            print("   ✅ Large files are expected (zipapp, etc.)")
    else:
        print("   ✅ No large files found")
    return True
 def main():
    """Run all pre-push validation checks."""
    print("🚀 FSS-Mini-RAG: Final Pre-Push Validation")
    print("=" * 50)
    checks = [
        ("Critical Files", check_critical_files),
        ("PyProject.toml", check_pyproject_toml),
        ("Install Scripts", check_install_scripts),
        ("README Updates", check_readme_updates),
        ("Git Status", check_git_status),
        ("Git Branch", check_branch_status),
        ("Large Files", check_no_large_files),
    ]
    passed = 0
    total = len(checks)
    for check_name, check_func in checks:
        print(f"\n{'='*15} {check_name} {'='*15}")
        try:
            if check_func():
                print(f"✅ {check_name} PASSED")
                passed += 1
            else:
                print(f"❌ {check_name} FAILED")
        except Exception as e:
            print(f"❌ {check_name} ERROR: {e}")
    print(f"\n{'='*50}")
    print(f"📊 Pre-Push Validation: {passed}/{total} checks passed")
    print(f"{'='*50}")
    if passed == total:
        print("🎉 ALL CHECKS PASSED!")
        print("✅ Ready to push to GitHub")
        print()
        print("Next steps:")
        print("   1. git add -A")
        print("   2. git commit -m 'Add modern distribution system with one-line installers'")
        print("   3. git push origin main")
        return True
    else:
        print(f"❌ {total - passed} checks FAILED")
        print("🔧 Fix issues before pushing")
        return False
 if __name__ == "__main__":
    success = main()
    sys.exit(0 if success else 1)
--- a/scripts/phase1_basic_tests.py
+++ b/scripts/phase1_basic_tests.py
@ -1,196 +0,0 @@
 #!/usr/bin/env python3
 """
 Phase 1: Basic functionality tests without full environment setup.
 This runs quickly to verify core functionality works.
 """
 import sys
 from pathlib import Path
 # Add project to path
 project_root = Path(__file__).parent.parent
 sys.path.insert(0, str(project_root))
 def test_imports():
    """Test that basic imports work."""
    print("1. Testing imports...")
    try:
        import mini_rag
        print("   ✅ mini_rag package imports")
    except Exception as e:
        print(f"   ❌ mini_rag import failed: {e}")
        return False
    try:
        from mini_rag.cli import cli
        print("   ✅ CLI function imports")
    except Exception as e:
        print(f"   ❌ CLI import failed: {e}")
        return False
    return True
 def test_pyproject_structure():
    """Test pyproject.toml has correct structure."""
    print("2. Testing pyproject.toml...")
    pyproject_file = project_root / "pyproject.toml"
    if not pyproject_file.exists():
        print("   ❌ pyproject.toml missing")
        return False
    content = pyproject_file.read_text()
    # Check essential elements
    checks = [
        ('name = "fss-mini-rag"', "Package name"),
        ('rag-mini = "mini_rag.cli:cli"', "Entry point"),
        ('requires-python = ">=3.8"', "Python version"),
        ('Brett Fox', "Author"),
        ('MIT', "License"),
    ]
    for check, desc in checks:
        if check in content:
            print(f"   ✅ {desc}")
        else:
            print(f"   ❌ {desc} missing")
            return False
    return True
 def test_install_scripts():
    """Test install scripts exist and have basic structure."""
    print("3. Testing install scripts...")
    # Check install.sh
    install_sh = project_root / "install.sh"
    if install_sh.exists():
        content = install_sh.read_text()
        if "uv tool install" in content and "pipx install" in content:
            print("   ✅ install.sh has proper structure")
        else:
            print("   ❌ install.sh missing key components")
            return False
    else:
        print("   ❌ install.sh missing")
        return False
    # Check install.ps1
    install_ps1 = project_root / "install.ps1"
    if install_ps1.exists():
        content = install_ps1.read_text()
        if "Install-UV" in content and "Install-WithPipx" in content:
            print("   ✅ install.ps1 has proper structure")
        else:
            print("   ❌ install.ps1 missing key components") 
            return False
    else:
        print("   ❌ install.ps1 missing")
        return False
    return True
 def test_build_scripts():
    """Test build scripts exist."""
    print("4. Testing build scripts...")
    build_pyz = project_root / "scripts" / "build_pyz.py"
    if build_pyz.exists():
        content = build_pyz.read_text()
        if "zipapp" in content:
            print("   ✅ build_pyz.py exists with zipapp")
        else:
            print("   ❌ build_pyz.py missing zipapp code")
            return False
    else:
        print("   ❌ build_pyz.py missing")
        return False
    return True
 def test_github_workflow():
    """Test GitHub workflow exists."""
    print("5. Testing GitHub workflow...")
    workflow_file = project_root / ".github" / "workflows" / "build-and-release.yml"
    if workflow_file.exists():
        content = workflow_file.read_text()
        if "cibuildwheel" in content and "pypa/gh-action-pypi-publish" in content:
            print("   ✅ GitHub workflow has proper structure")
        else:
            print("   ❌ GitHub workflow missing key components")
            return False
    else:
        print("   ❌ GitHub workflow missing")
        return False
    return True
 def test_documentation():
    """Test documentation is updated."""
    print("6. Testing documentation...")
    readme = project_root / "README.md"
    if readme.exists():
        content = readme.read_text()
        if "One-Line Installers" in content and "uv tool install" in content:
            print("   ✅ README has new installation methods")
        else:
            print("   ❌ README missing new installation section")
            return False
    else:
        print("   ❌ README missing")
        return False
    return True
 def main():
    """Run all basic tests."""
    print("🧪 FSS-Mini-RAG Phase 1: Basic Tests")
    print("=" * 40)
    tests = [
        ("Import Tests", test_imports),
        ("PyProject Structure", test_pyproject_structure),
        ("Install Scripts", test_install_scripts),
        ("Build Scripts", test_build_scripts), 
        ("GitHub Workflow", test_github_workflow),
        ("Documentation", test_documentation),
    ]
    passed = 0
    total = len(tests)
    for test_name, test_func in tests:
        print(f"\n{'='*20} {test_name} {'='*20}")
        try:
            if test_func():
                print(f"✅ {test_name} PASSED")
                passed += 1
            else:
                print(f"❌ {test_name} FAILED")
        except Exception as e:
            print(f"❌ {test_name} ERROR: {e}")
    print(f"\n{'='*50}")
    print(f"📊 Results: {passed}/{total} tests passed")
    if passed == total:
        print("🎉 Phase 1: All basic tests PASSED!")
        print("\n📋 Ready for Phase 2: Package Building Tests")
        print("Next steps:")
        print("   1. python -m build --sdist")
        print("   2. python -m build --wheel") 
        print("   3. python scripts/build_pyz.py")
        print("   4. Test installations from built packages")
        return True
    else:
        print(f"❌ {total - passed} tests FAILED")
        print("🔧 Fix failing tests before proceeding to Phase 2")
        return False
 if __name__ == "__main__":
    success = main()
    sys.exit(0 if success else 1)
--- a/Show More
+++ b/Show More