80 changed files with 5556 additions and 8654 deletions
--- a/.flake8
+++ b/.flake8
@ -1,19 +0,0 @@
-[flake8]
-# Professional Python code style - balances quality with readability
-max-line-length = 95
-extend-ignore = E203,W503,W605
-exclude = 
-    .venv,
-    .venv-linting,
-    __pycache__,
-    *.egg-info,
-    .git,
-    build,
-    dist,
-    .mini-rag
-
-# Per-file ignores for practical development
-per-file-ignores =
-    tests/*.py:F401,F841
-    examples/*.py:F401,F841
-    fix_*.py:F401,F841,E501
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -33,106 +33,50 @@ jobs:
        restore-keys: |
          ${{ runner.os }}-python-${{ matrix.python-version }}-
          
-    - name: Create virtual environment
-      run: |
-        python -m venv .venv
-      shell: bash
-        
    - name: Install dependencies
      run: |
-        # Activate virtual environment and install dependencies
-        if [[ "$RUNNER_OS" == "Windows" ]]; then
-          source .venv/Scripts/activate
-        else
-          source .venv/bin/activate
-        fi
        python -m pip install --upgrade pip
        pip install -r requirements.txt
-      shell: bash
        
-    - name: Run comprehensive tests
+    - name: Run tests
      run: |
-        # Set OS-appropriate emojis and activate venv
-        if [[ "$RUNNER_OS" == "Windows" ]]; then
-          source .venv/Scripts/activate
-          OK="[OK]"
-          SKIP="[SKIP]"
-        else
-          source .venv/bin/activate
-          OK="✅"
-          SKIP="⚠️"
-        fi
-        
-        echo "$OK Virtual environment activated"
-        
        # Run basic import tests
-        python -c "from mini_rag import CodeEmbedder, ProjectIndexer, CodeSearcher; print('$OK Core imports successful')"
+        python -c "from mini_rag import CodeEmbedder, ProjectIndexer, CodeSearcher; print('✅ Core imports successful')"
        
-        # Run the actual test suite
-        if [ -f "tests/test_fixes.py" ]; then
-          echo "$OK Running comprehensive test suite..."
-          python tests/test_fixes.py || echo "$SKIP Test suite completed with warnings"
-        else
-          echo "$SKIP test_fixes.py not found, running basic tests only"
-        fi
-        
-        # Test config system with proper venv
+        # Test basic functionality without venv requirements
        python -c "
-        import os
-        ok_emoji = '$OK' if os.name != 'nt' else '[OK]'
-        
        try:
            from mini_rag.config import ConfigManager
-            import tempfile
-            with tempfile.TemporaryDirectory() as tmpdir:
-                config_manager = ConfigManager(tmpdir)
-                config = config_manager.load_config()
-                print(f'{ok_emoji} Config system works with proper dependencies')
+            print('✅ Config system imports work')
        except Exception as e:
-            print(f'Error in config test: {e}')
-            raise
+            print(f'⚠️ Config test skipped: {e}')
+        
+        try:
+            from mini_rag.chunker import CodeChunker
+            print('✅ Chunker imports work')
+        except Exception as e:
+            print(f'⚠️ Chunker test skipped: {e}')
        "
        
-        echo "$OK All tests completed successfully"
+        echo "✅ Core functionality tests completed"
      shell: bash
      
    - name: Test auto-update system
      run: |
-        # Set OS-appropriate emojis
-        if [[ "$RUNNER_OS" == "Windows" ]]; then
-          OK="[OK]"
-          SKIP="[SKIP]"
-        else
-          OK="✅"
-          SKIP="⚠️"
-        fi
-        
        python -c "
-        import os
-        ok_emoji = '$OK' if os.name != 'nt' else '[OK]'
-        skip_emoji = '$SKIP' if os.name != 'nt' else '[SKIP]'
-        
        try:
            from mini_rag.updater import UpdateChecker
            updater = UpdateChecker()
-            print(f'{ok_emoji} Auto-update system available')
+            print('✅ Auto-update system available')
        except ImportError:
-            print(f'{skip_emoji} Auto-update system not available (legacy version)')
+            print('⚠️ Auto-update system not available (legacy version)')
        "
-      shell: bash
        
    - name: Test CLI commands
      run: |
-        # Set OS-appropriate emojis
-        if [[ "$RUNNER_OS" == "Windows" ]]; then
-          OK="[OK]"
-        else
-          OK="✅"
-        fi
-        
-        echo "$OK Checking for CLI files..."
+        echo "✅ Checking for CLI files..."
        ls -la rag* || dir rag* || echo "CLI files may not be present"
-        echo "$OK CLI check completed - this is expected in CI environment"
+        echo "✅ CLI check completed - this is expected in CI environment"
      shell: bash

  security-scan:
--- a/.gitignore
+++ b/.gitignore
@ -105,13 +105,4 @@ dmypy.json
 .idea/

 # Project specific ignores
-REPOSITORY_SUMMARY.md
-
-# Analysis and scanning results (should not be committed)
-docs/live-analysis/
-docs/analysis-history/
-**/live-analysis/
-**/analysis-history/
-*.analysis.json
-*.analysis.html
-**/analysis_*/
+REPOSITORY_SUMMARY.md
--- a/.mini-rag/config.yaml
+++ b/.mini-rag/config.yaml
@ -1,18 +1,5 @@
 # FSS-Mini-RAG Configuration
-# 
-# 🔧 EDIT THIS FILE TO CUSTOMIZE YOUR RAG SYSTEM
-# 
-# This file controls all behavior of your Mini-RAG system.
-# Changes take effect immediately - no restart needed!
-# 
-# 💡 IMPORTANT: To change the AI model, edit the 'synthesis_model' line below
-# 
-# Common model options:
-#   synthesis_model: auto              # Let system choose best available
-#   synthesis_model: qwen3:0.6b        # Ultra-fast (500MB)
-#   synthesis_model: qwen3:1.7b        # Balanced (1.4GB) - recommended
-#   synthesis_model: qwen3:4b          # High quality (2.5GB)
-#
+# Edit this file to customize indexing and search behavior
 # See docs/GETTING_STARTED.md for detailed explanations

 # Text chunking settings
@ -59,7 +46,7 @@ search:
 # LLM synthesis and query expansion settings
 llm:
  ollama_host: localhost:11434
-  synthesis_model: qwen3:1.7b    # 'auto', 'qwen3:1.7b', etc.
+  synthesis_model: auto    # 'auto', 'qwen3:1.7b', etc.
  expansion_model: auto     # Usually same as synthesis_model
  max_expansion_terms: 8        # Maximum terms to add to queries
  enable_synthesis: false       # Enable synthesis by default
--- a/.mini-rag/last_search
+++ b/.mini-rag/last_search
@ -1 +1 @@
-test
+how to run tests
--- a/.venv-linting/bin/Activate.ps1
+++ b/.venv-linting/bin/Activate.ps1
@ -1,247 +0,0 @@
-<#
-.Synopsis
-Activate a Python virtual environment for the current PowerShell session.
-
-.Description
-Pushes the python executable for a virtual environment to the front of the
-$Env:PATH environment variable and sets the prompt to signify that you are
-in a Python virtual environment. Makes use of the command line switches as
-well as the `pyvenv.cfg` file values present in the virtual environment.
-
-.Parameter VenvDir
-Path to the directory that contains the virtual environment to activate. The
-default value for this is the parent of the directory that the Activate.ps1
-script is located within.
-
-.Parameter Prompt
-The prompt prefix to display when this virtual environment is activated. By
-default, this prompt is the name of the virtual environment folder (VenvDir)
-surrounded by parentheses and followed by a single space (ie. '(.venv) ').
-
-.Example
-Activate.ps1
-Activates the Python virtual environment that contains the Activate.ps1 script.
-
-.Example
-Activate.ps1 -Verbose
-Activates the Python virtual environment that contains the Activate.ps1 script,
-and shows extra information about the activation as it executes.
-
-.Example
-Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
-Activates the Python virtual environment located in the specified location.
-
-.Example
-Activate.ps1 -Prompt "MyPython"
-Activates the Python virtual environment that contains the Activate.ps1 script,
-and prefixes the current prompt with the specified string (surrounded in
-parentheses) while the virtual environment is active.
-
-.Notes
-On Windows, it may be required to enable this Activate.ps1 script by setting the
-execution policy for the user. You can do this by issuing the following PowerShell
-command:
-
-PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
-
-For more information on Execution Policies: 
-https://go.microsoft.com/fwlink/?LinkID=135170
-
-#>
-Param(
-    [Parameter(Mandatory = $false)]
-    [String]
-    $VenvDir,
-    [Parameter(Mandatory = $false)]
-    [String]
-    $Prompt
-)
-
-<# Function declarations --------------------------------------------------- #>
-
-<#
-.Synopsis
-Remove all shell session elements added by the Activate script, including the
-addition of the virtual environment's Python executable from the beginning of
-the PATH variable.
-
-.Parameter NonDestructive
-If present, do not remove this function from the global namespace for the
-session.
-
-#>
-function global:deactivate ([switch]$NonDestructive) {
-    # Revert to original values
-
-    # The prior prompt:
-    if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
-        Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
-        Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
-    }
-
-    # The prior PYTHONHOME:
-    if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
-        Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
-        Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
-    }
-
-    # The prior PATH:
-    if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
-        Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
-        Remove-Item -Path Env:_OLD_VIRTUAL_PATH
-    }
-
-    # Just remove the VIRTUAL_ENV altogether:
-    if (Test-Path -Path Env:VIRTUAL_ENV) {
-        Remove-Item -Path env:VIRTUAL_ENV
-    }
-
-    # Just remove VIRTUAL_ENV_PROMPT altogether.
-    if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) {
-        Remove-Item -Path env:VIRTUAL_ENV_PROMPT
-    }
-
-    # Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
-    if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
-        Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
-    }
-
-    # Leave deactivate function in the global namespace if requested:
-    if (-not $NonDestructive) {
-        Remove-Item -Path function:deactivate
-    }
-}
-
-<#
-.Description
-Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
-given folder, and returns them in a map.
-
-For each line in the pyvenv.cfg file, if that line can be parsed into exactly
-two strings separated by `=` (with any amount of whitespace surrounding the =)
-then it is considered a `key = value` line. The left hand string is the key,
-the right hand is the value.
-
-If the value starts with a `'` or a `"` then the first and last character is
-stripped from the value before being captured.
-
-.Parameter ConfigDir
-Path to the directory that contains the `pyvenv.cfg` file.
-#>
-function Get-PyVenvConfig(
-    [String]
-    $ConfigDir
-) {
-    Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
-
-    # Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
-    $pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
-
-    # An empty map will be returned if no config file is found.
-    $pyvenvConfig = @{ }
-
-    if ($pyvenvConfigPath) {
-
-        Write-Verbose "File exists, parse `key = value` lines"
-        $pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
-
-        $pyvenvConfigContent | ForEach-Object {
-            $keyval = $PSItem -split "\s*=\s*", 2
-            if ($keyval[0] -and $keyval[1]) {
-                $val = $keyval[1]
-
-                # Remove extraneous quotations around a string value.
-                if ("'""".Contains($val.Substring(0, 1))) {
-                    $val = $val.Substring(1, $val.Length - 2)
-                }
-
-                $pyvenvConfig[$keyval[0]] = $val
-                Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
-            }
-        }
-    }
-    return $pyvenvConfig
-}
-
-
-<# Begin Activate script --------------------------------------------------- #>
-
-# Determine the containing directory of this script
-$VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
-$VenvExecDir = Get-Item -Path $VenvExecPath
-
-Write-Verbose "Activation script is located in path: '$VenvExecPath'"
-Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
-Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
-
-# Set values required in priority: CmdLine, ConfigFile, Default
-# First, get the location of the virtual environment, it might not be
-# VenvExecDir if specified on the command line.
-if ($VenvDir) {
-    Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
-}
-else {
-    Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
-    $VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
-    Write-Verbose "VenvDir=$VenvDir"
-}
-
-# Next, read the `pyvenv.cfg` file to determine any required value such
-# as `prompt`.
-$pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
-
-# Next, set the prompt from the command line, or the config file, or
-# just use the name of the virtual environment folder.
-if ($Prompt) {
-    Write-Verbose "Prompt specified as argument, using '$Prompt'"
-}
-else {
-    Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
-    if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
-        Write-Verbose "  Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
-        $Prompt = $pyvenvCfg['prompt'];
-    }
-    else {
-        Write-Verbose "  Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)"
-        Write-Verbose "  Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
-        $Prompt = Split-Path -Path $venvDir -Leaf
-    }
-}
-
-Write-Verbose "Prompt = '$Prompt'"
-Write-Verbose "VenvDir='$VenvDir'"
-
-# Deactivate any currently active virtual environment, but leave the
-# deactivate function in place.
-deactivate -nondestructive
-
-# Now set the environment variable VIRTUAL_ENV, used by many tools to determine
-# that there is an activated venv.
-$env:VIRTUAL_ENV = $VenvDir
-
-if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
-
-    Write-Verbose "Setting prompt to '$Prompt'"
-
-    # Set the prompt to include the env name
-    # Make sure _OLD_VIRTUAL_PROMPT is global
-    function global:_OLD_VIRTUAL_PROMPT { "" }
-    Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
-    New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
-
-    function global:prompt {
-        Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
-        _OLD_VIRTUAL_PROMPT
-    }
-    $env:VIRTUAL_ENV_PROMPT = $Prompt
-}
-
-# Clear PYTHONHOME
-if (Test-Path -Path Env:PYTHONHOME) {
-    Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
-    Remove-Item -Path Env:PYTHONHOME
-}
-
-# Add the venv to the PATH
-Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
-$Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"
--- a/.venv-linting/bin/activate
+++ b/.venv-linting/bin/activate
@ -1,70 +0,0 @@
-# This file must be used with "source bin/activate" *from bash*
-# You cannot run it directly
-
-deactivate () {
-    # reset old environment variables
-    if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
-        PATH="${_OLD_VIRTUAL_PATH:-}"
-        export PATH
-        unset _OLD_VIRTUAL_PATH
-    fi
-    if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
-        PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
-        export PYTHONHOME
-        unset _OLD_VIRTUAL_PYTHONHOME
-    fi
-
-    # Call hash to forget past commands. Without forgetting
-    # past commands the $PATH changes we made may not be respected
-    hash -r 2> /dev/null
-
-    if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
-        PS1="${_OLD_VIRTUAL_PS1:-}"
-        export PS1
-        unset _OLD_VIRTUAL_PS1
-    fi
-
-    unset VIRTUAL_ENV
-    unset VIRTUAL_ENV_PROMPT
-    if [ ! "${1:-}" = "nondestructive" ] ; then
-    # Self destruct!
-        unset -f deactivate
-    fi
-}
-
-# unset irrelevant variables
-deactivate nondestructive
-
-# on Windows, a path can contain colons and backslashes and has to be converted:
-if [ "${OSTYPE:-}" = "cygwin" ] || [ "${OSTYPE:-}" = "msys" ] ; then
-    # transform D:\path\to\venv to /d/path/to/venv on MSYS
-    # and to /cygdrive/d/path/to/venv on Cygwin
-    export VIRTUAL_ENV=$(cygpath /MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting)
-else
-    # use the path as-is
-    export VIRTUAL_ENV=/MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting
-fi
-
-_OLD_VIRTUAL_PATH="$PATH"
-PATH="$VIRTUAL_ENV/"bin":$PATH"
-export PATH
-
-# unset PYTHONHOME if set
-# this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
-# could use `if (set -u; : $PYTHONHOME) ;` in bash
-if [ -n "${PYTHONHOME:-}" ] ; then
-    _OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
-    unset PYTHONHOME
-fi
-
-if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
-    _OLD_VIRTUAL_PS1="${PS1:-}"
-    PS1='(.venv-linting) '"${PS1:-}"
-    export PS1
-    VIRTUAL_ENV_PROMPT='(.venv-linting) '
-    export VIRTUAL_ENV_PROMPT
-fi
-
-# Call hash to forget past commands. Without forgetting
-# past commands the $PATH changes we made may not be respected
-hash -r 2> /dev/null
--- a/.venv-linting/bin/activate.csh
+++ b/.venv-linting/bin/activate.csh
@ -1,27 +0,0 @@
-# This file must be used with "source bin/activate.csh" *from csh*.
-# You cannot run it directly.
-
-# Created by Davide Di Blasi <davidedb@gmail.com>.
-# Ported to Python 3.3 venv by Andrew Svetlov <andrew.svetlov@gmail.com>
-
-alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; unsetenv VIRTUAL_ENV_PROMPT; test "\!:*" != "nondestructive" && unalias deactivate'
-
-# Unset irrelevant variables.
-deactivate nondestructive
-
-setenv VIRTUAL_ENV /MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting
-
-set _OLD_VIRTUAL_PATH="$PATH"
-setenv PATH "$VIRTUAL_ENV/"bin":$PATH"
-
-
-set _OLD_VIRTUAL_PROMPT="$prompt"
-
-if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then
-    set prompt = '(.venv-linting) '"$prompt"
-    setenv VIRTUAL_ENV_PROMPT '(.venv-linting) '
-endif
-
-alias pydoc python -m pydoc
-
-rehash
--- a/.venv-linting/bin/activate.fish
+++ b/.venv-linting/bin/activate.fish
@ -1,69 +0,0 @@
-# This file must be used with "source <venv>/bin/activate.fish" *from fish*
-# (https://fishshell.com/). You cannot run it directly.
-
-function deactivate  -d "Exit virtual environment and return to normal shell environment"
-    # reset old environment variables
-    if test -n "$_OLD_VIRTUAL_PATH"
-        set -gx PATH $_OLD_VIRTUAL_PATH
-        set -e _OLD_VIRTUAL_PATH
-    end
-    if test -n "$_OLD_VIRTUAL_PYTHONHOME"
-        set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME
-        set -e _OLD_VIRTUAL_PYTHONHOME
-    end
-
-    if test -n "$_OLD_FISH_PROMPT_OVERRIDE"
-        set -e _OLD_FISH_PROMPT_OVERRIDE
-        # prevents error when using nested fish instances (Issue #93858)
-        if functions -q _old_fish_prompt
-            functions -e fish_prompt
-            functions -c _old_fish_prompt fish_prompt
-            functions -e _old_fish_prompt
-        end
-    end
-
-    set -e VIRTUAL_ENV
-    set -e VIRTUAL_ENV_PROMPT
-    if test "$argv[1]" != "nondestructive"
-        # Self-destruct!
-        functions -e deactivate
-    end
-end
-
-# Unset irrelevant variables.
-deactivate nondestructive
-
-set -gx VIRTUAL_ENV /MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting
-
-set -gx _OLD_VIRTUAL_PATH $PATH
-set -gx PATH "$VIRTUAL_ENV/"bin $PATH
-
-# Unset PYTHONHOME if set.
-if set -q PYTHONHOME
-    set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME
-    set -e PYTHONHOME
-end
-
-if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
-    # fish uses a function instead of an env var to generate the prompt.
-
-    # Save the current fish_prompt function as the function _old_fish_prompt.
-    functions -c fish_prompt _old_fish_prompt
-
-    # With the original prompt function renamed, we can override with our own.
-    function fish_prompt
-        # Save the return status of the last command.
-        set -l old_status $status
-
-        # Output the venv prompt; color taken from the blue of the Python logo.
-        printf "%s%s%s" (set_color 4B8BBE) '(.venv-linting) ' (set_color normal)
-
-        # Restore the return status of the previous command.
-        echo "exit $old_status" | .
-        # Output the original/"old" prompt.
-        _old_fish_prompt
-    end
-
-    set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV"
-    set -gx VIRTUAL_ENV_PROMPT '(.venv-linting) '
-end
--- a/.venv-linting/bin/black
+++ b/.venv-linting/bin/black
@ -1,8 +0,0 @@
-#!/MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting/bin/python3
-# -*- coding: utf-8 -*-
-import re
-import sys
-from black import patched_main
-if __name__ == '__main__':
-    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
-    sys.exit(patched_main())
--- a/.venv-linting/bin/blackd
+++ b/.venv-linting/bin/blackd
@ -1,8 +0,0 @@
-#!/MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting/bin/python3
-# -*- coding: utf-8 -*-
-import re
-import sys
-from blackd import patched_main
-if __name__ == '__main__':
-    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
-    sys.exit(patched_main())
--- a/.venv-linting/bin/isort
+++ b/.venv-linting/bin/isort
@ -1,8 +0,0 @@
-#!/MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting/bin/python3
-# -*- coding: utf-8 -*-
-import re
-import sys
-from isort.main import main
-if __name__ == '__main__':
-    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
-    sys.exit(main())
--- a/.venv-linting/bin/isort-identify-imports
+++ b/.venv-linting/bin/isort-identify-imports
@ -1,8 +0,0 @@
-#!/MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting/bin/python3
-# -*- coding: utf-8 -*-
-import re
-import sys
-from isort.main import identify_imports_main
-if __name__ == '__main__':
-    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
-    sys.exit(identify_imports_main())
--- a/.venv-linting/bin/pip
+++ b/.venv-linting/bin/pip
@ -1,8 +0,0 @@
-#!/MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting/bin/python3
-# -*- coding: utf-8 -*-
-import re
-import sys
-from pip._internal.cli.main import main
-if __name__ == '__main__':
-    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
-    sys.exit(main())
--- a/.venv-linting/bin/pip3
+++ b/.venv-linting/bin/pip3
@ -1,8 +0,0 @@
-#!/MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting/bin/python3
-# -*- coding: utf-8 -*-
-import re
-import sys
-from pip._internal.cli.main import main
-if __name__ == '__main__':
-    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
-    sys.exit(main())
--- a/.venv-linting/bin/pip3.12
+++ b/.venv-linting/bin/pip3.12
@ -1,8 +0,0 @@
-#!/MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting/bin/python3
-# -*- coding: utf-8 -*-
-import re
-import sys
-from pip._internal.cli.main import main
-if __name__ == '__main__':
-    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
-    sys.exit(main())
--- a/.venv-linting/bin/python
+++ b/.venv-linting/bin/python
@ -1 +0,0 @@
-python3
--- a/.venv-linting/bin/python3
+++ b/.venv-linting/bin/python3
@ -1 +0,0 @@
-/usr/bin/python3
--- a/.venv-linting/bin/python3.12
+++ b/.venv-linting/bin/python3.12
@ -1 +0,0 @@
-python3
--- a/.venv-linting/lib64
+++ b/.venv-linting/lib64
@ -1 +0,0 @@
-lib
--- a/.venv-linting/pyvenv.cfg
+++ b/.venv-linting/pyvenv.cfg
@ -1,5 +0,0 @@
-home = /usr/bin
-include-system-site-packages = false
-version = 3.12.3
-executable = /usr/bin/python3.12
-command = /usr/bin/python3 -m venv /MASTERFOLDER/Coding/Fss-Mini-Rag/.venv-linting
--- a/GET_STARTED.md
+++ b/GET_STARTED.md
@ -0,0 +1,83 @@
+# 🚀 FSS-Mini-RAG: Get Started in 2 Minutes
+
+## Step 1: Install Everything
+```bash
+./install_mini_rag.sh
+```
+**That's it!** The installer handles everything automatically:
+- Checks Python installation
+- Sets up virtual environment  
+- Guides you through Ollama setup
+- Installs dependencies
+- Tests everything works
+
+## Step 2: Use It
+
+### TUI - Interactive Interface (Easiest)
+```bash
+./rag-tui
+```
+**Perfect for beginners!** Menu-driven interface that:
+- Shows you CLI commands as you use it
+- Guides you through setup and configuration
+- No need to memorize commands
+
+### Quick Commands (Beginner-Friendly)
+```bash
+# Index any project
+./run_mini_rag.sh index ~/my-project
+
+# Search your code  
+./run_mini_rag.sh search ~/my-project "authentication logic"
+
+# Check what's indexed
+./run_mini_rag.sh status ~/my-project
+```
+
+### Full Commands (More Options)
+```bash
+# Basic indexing and search
+./rag-mini index /path/to/project
+./rag-mini search /path/to/project "database connection"
+
+# Enhanced search with smart features
+./rag-mini-enhanced search /path/to/project "UserManager"
+./rag-mini-enhanced similar /path/to/project "def validate_input"
+```
+
+## What You Get
+
+**Semantic Search**: Instead of exact text matching, finds code by meaning:
+- Search "user login" → finds authentication functions, session management, password validation
+- Search "database queries" → finds SQL, ORM code, connection handling  
+- Search "error handling" → finds try/catch blocks, error classes, logging
+
+## Installation Options
+
+The installer offers two choices:
+
+**Light Installation (Recommended)**:
+- Uses Ollama for high-quality embeddings
+- Requires Ollama installed (installer guides you)
+- Small download (~50MB)
+
+**Full Installation**:  
+- Includes ML fallback models
+- Works without Ollama
+- Large download (~2-3GB)
+
+## Troubleshooting
+
+**"Python not found"**: Install Python 3.8+ from python.org
+**"Ollama not found"**: Visit https://ollama.ai/download
+**"Import errors"**: Re-run `./install_mini_rag.sh`
+
+## Next Steps
+
+- **Technical Details**: Read `README.md`
+- **Step-by-Step Guide**: Read `docs/GETTING_STARTED.md`
+- **Examples**: Check `examples/` directory
+- **Test It**: Run on this project: `./run_mini_rag.sh index .`
+
+---
+**Questions?** Everything is documented in the README.md file.
--- a/README.md
+++ b/README.md
@ -79,24 +79,34 @@ FSS-Mini-RAG offers **two distinct experiences** optimized for different use cas

 ## Quick Start (2 Minutes)

-**Step 1: Install**
+**Linux/macOS:**
 ```bash
-# Linux/macOS
+# 1. Install everything
 ./install_mini_rag.sh

-# Windows  
-install_windows.bat
+# 2. Choose your interface
+./rag-tui                         # Friendly interface for beginners
+# OR choose your mode:
+./rag-mini index ~/my-project     # Index your project first
+./rag-mini search ~/my-project "query" --synthesize  # Fast synthesis
+./rag-mini explore ~/my-project   # Interactive exploration
 ```

-**Step 2: Start Using**
-```bash
-# Beginners: Interactive interface
-./rag-tui                    # Linux/macOS
-rag.bat                      # Windows
+**Windows:**
+```cmd
+# 1. Install everything
+install_windows.bat

-# Experienced users: Direct commands
-./rag-mini index ~/project   # Index your project
-./rag-mini search ~/project "your query"
+# 2. Choose your interface
+rag.bat                           # Interactive interface
+# OR choose your mode:
+rag.bat index C:\my-project       # Index your project first
+rag.bat search C:\my-project "query"  # Fast search
+rag.bat explore C:\my-project     # Interactive exploration
+
+# Direct Python entrypoint (after install):
+rag-mini index C:\my-project
+rag-mini search C:\my-project "query"
 ```

 That's it. No external dependencies, no configuration required, no PhD in computer science needed.
@ -147,167 +157,7 @@ That's it. No external dependencies, no configuration required, no PhD in comput

 ## Installation Options

-### 🎯 Copy & Paste Installation (Guaranteed to Work) 
-
-Perfect for beginners - these commands work on any fresh Ubuntu, Windows, or Mac system:
-
-**Fresh Ubuntu/Debian System:**
-```bash
-# Install required system packages
-sudo apt update && sudo apt install -y python3 python3-pip python3-venv git curl
-
-# Clone and setup FSS-Mini-RAG
-git clone https://github.com/FSSCoding/Fss-Mini-Rag.git
-cd Fss-Mini-Rag
-
-# Create isolated Python environment
-python3 -m venv .venv
-source .venv/bin/activate
-
-# Install Python dependencies
-pip install -r requirements.txt
-
-# Optional: Install Ollama for best search quality (secure method)
-curl -fsSL https://ollama.com/install.sh -o /tmp/ollama-install.sh
-# Verify it's a shell script (basic safety check)
-file /tmp/ollama-install.sh | grep -q "shell script" && chmod +x /tmp/ollama-install.sh && /tmp/ollama-install.sh
-rm -f /tmp/ollama-install.sh
-ollama serve &
-sleep 3
-ollama pull nomic-embed-text
-
-# Ready to use!
-./rag-mini index /path/to/your/project
-./rag-mini search /path/to/your/project "your search query"
-```
-
-**Fresh CentOS/RHEL/Fedora System:**
-```bash
-# Install required system packages
-sudo dnf install -y python3 python3-pip python3-venv git curl
-
-# Clone and setup FSS-Mini-RAG
-git clone https://github.com/FSSCoding/Fss-Mini-Rag.git
-cd Fss-Mini-Rag
-
-# Create isolated Python environment  
-python3 -m venv .venv
-source .venv/bin/activate
-
-# Install Python dependencies
-pip install -r requirements.txt
-
-# Optional: Install Ollama for best search quality (secure method)
-curl -fsSL https://ollama.com/install.sh -o /tmp/ollama-install.sh
-# Verify it's a shell script (basic safety check)
-file /tmp/ollama-install.sh | grep -q "shell script" && chmod +x /tmp/ollama-install.sh && /tmp/ollama-install.sh
-rm -f /tmp/ollama-install.sh
-ollama serve &
-sleep 3
-ollama pull nomic-embed-text
-
-# Ready to use!
-./rag-mini index /path/to/your/project
-./rag-mini search /path/to/your/project "your search query"
-```
-
-**Fresh macOS System:**
-```bash
-# Install Homebrew (if not installed)
-/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
-
-# Install required packages
-brew install python3 git curl
-
-# Clone and setup FSS-Mini-RAG
-git clone https://github.com/FSSCoding/Fss-Mini-Rag.git
-cd Fss-Mini-Rag
-
-# Create isolated Python environment
-python3 -m venv .venv
-source .venv/bin/activate
-
-# Install Python dependencies
-pip install -r requirements.txt
-
-# Optional: Install Ollama for best search quality (secure method)
-curl -fsSL https://ollama.com/install.sh -o /tmp/ollama-install.sh
-# Verify it's a shell script (basic safety check)
-file /tmp/ollama-install.sh | grep -q "shell script" && chmod +x /tmp/ollama-install.sh && /tmp/ollama-install.sh
-rm -f /tmp/ollama-install.sh
-ollama serve &
-sleep 3
-ollama pull nomic-embed-text
-
-# Ready to use!
-./rag-mini index /path/to/your/project  
-./rag-mini search /path/to/your/project "your search query"
-```
-
-**Fresh Windows System:**
-```cmd
-REM Install Python (if not installed)
-REM Download from: https://python.org/downloads (ensure "Add to PATH" is checked)
-REM Install Git from: https://git-scm.com/download/win
-
-REM Clone and setup FSS-Mini-RAG
-git clone https://github.com/FSSCoding/Fss-Mini-Rag.git
-cd Fss-Mini-Rag
-
-REM Create isolated Python environment
-python -m venv .venv
-.venv\Scripts\activate.bat
-
-REM Install Python dependencies  
-pip install -r requirements.txt
-
-REM Optional: Install Ollama for best search quality
-REM Download from: https://ollama.com/download
-REM Run installer, then:
-ollama serve
-REM In new terminal:
-ollama pull nomic-embed-text
-
-REM Ready to use!
-rag.bat index C:\path\to\your\project
-rag.bat search C:\path\to\your\project "your search query"
-```
-
-**What these commands do:**
- **System packages**: Install Python 3.8+, pip (package manager), venv (virtual environments), git (version control), curl (downloads)
- **Clone repository**: Download FSS-Mini-RAG source code to your computer
- **Virtual environment**: Create isolated Python space (prevents conflicts with system Python)
- **Dependencies**: Install required Python libraries (pandas, numpy, lancedb, etc.)  
- **Ollama (optional)**: AI model server for best search quality - works offline and free
- **Model download**: Get high-quality embedding model for semantic search
- **Ready to use**: Index any folder and search through it semantically
-
-### ⚡ For Agents & CI/CD: Headless Installation
-
-Perfect for automated deployments, agents, and CI/CD pipelines:
-
-**Linux/macOS:**
-```bash
-./install_mini_rag.sh --headless
-# Automated installation with sensible defaults
-# No interactive prompts, perfect for scripts
-```
-
-**Windows:**
-```cmd
-install_windows.bat --headless
-# Automated installation with sensible defaults  
-# No interactive prompts, perfect for scripts
-```
-
-**What headless mode does:**
- Uses existing virtual environment if available
- Installs core dependencies only (light mode)
- Downloads embedding model if Ollama is available
- Skips interactive prompts and tests
- Perfect for agent automation and CI/CD pipelines
-
-### 🚀 Recommended: Full Installation
+### Recommended: Full Installation

 **Linux/macOS:**
 ```bash
@ -321,6 +171,24 @@ install_windows.bat
 # Handles Python setup, dependencies, works reliably
 ```

+### Experimental: Copy & Run (May Not Work)
+
+**Linux/macOS:**
+```bash
+# Copy folder anywhere and try to run directly
+./rag-mini index ~/my-project
+# Auto-setup will attempt to create environment
+# Falls back with clear instructions if it fails
+```
+
+**Windows:**
+```cmd
+# Copy folder anywhere and try to run directly
+rag.bat index C:\my-project
+# Auto-setup will attempt to create environment
+# Falls back with clear instructions if it fails
+```
+
 ### Manual Setup

 **Linux/macOS:**
@ -364,7 +232,7 @@ This implementation prioritizes:

 ## Next Steps

- **New users**: Run `./rag-tui` (Linux/macOS) or `rag.bat` (Windows) for guided experience
+- **New users**: Run `./rag-mini` (Linux/macOS) or `rag.bat` (Windows) for guided experience
 - **Developers**: Read [`TECHNICAL_GUIDE.md`](docs/TECHNICAL_GUIDE.md) for implementation details
 - **Contributors**: See [`CONTRIBUTING.md`](CONTRIBUTING.md) for development setup

--- a/commit_message.txt
+++ b/commit_message.txt
@ -0,0 +1,36 @@
+feat: Add comprehensive Windows compatibility and enhanced LLM model setup
+
+🚀 Major cross-platform enhancement making FSS-Mini-RAG fully Windows and Linux compatible
+
+## Windows Compatibility
+- **New Windows installer**: `install_windows.bat` - rock-solid, no-hang installation
+- **Simple Windows launcher**: `rag.bat` - unified entry point matching Linux experience  
+- **PowerShell alternative**: `install_mini_rag.ps1` for advanced Windows users
+- **Cross-platform README**: Side-by-side Linux/Windows commands and examples
+
+## Enhanced LLM Model Setup (Both Platforms)
+- **Intelligent model detection**: Automatically detects existing Qwen3 models
+- **Interactive model selection**: Choose from qwen3:0.6b, 1.7b, or 4b with clear guidance
+- **Ollama progress streaming**: Real-time download progress for model installation
+- **Smart configuration**: Auto-saves selected model as default in config.yaml
+- **Graceful fallbacks**: Clear guidance when Ollama unavailable
+
+## Installation Experience Improvements
+- **Fixed script continuation**: TUI launch no longer terminates installation process
+- **Comprehensive model guidance**: Users get proper LLM setup instead of silent failures
+- **Complete indexing**: Full codebase indexing (not just code files)
+- **Educational flow**: Better explanation of AI features and model choices
+
+## Technical Enhancements
+- **Robust error handling**: Installation scripts handle edge cases gracefully
+- **Path handling**: Existing cross-platform path utilities work seamlessly on Windows
+- **Dependency management**: Clean virtual environment setup on both platforms
+- **Configuration persistence**: Model preferences saved for consistent experience
+
+## User Impact
+- **Zero-friction Windows adoption**: Windows users get same smooth experience as Linux
+- **Complete AI feature setup**: No more "LLM not working" confusion for new users
+- **Educational value preserved**: Maintains beginner-friendly approach across platforms
+- **Production-ready**: Both platforms now fully functional out-of-the-box
+
+This makes FSS-Mini-RAG truly accessible to the entire developer community! 🎉
--- a/config-llm-providers.yaml
+++ b/config-llm-providers.yaml
@ -1,9 +0,0 @@
-llm:
-  provider: ollama
-  ollama_host: localhost:11434
-  synthesis_model: qwen3:1.5b
-  expansion_model: qwen3:1.5b
-  enable_synthesis: false
-  synthesis_temperature: 0.3
-  cpu_optimized: true
-  enable_thinking: true
--- a/docs/AGENT_INSTRUCTIONS.md
+++ b/docs/AGENT_INSTRUCTIONS.md
@ -1,40 +0,0 @@
-# Agent Instructions for Fss-Mini-RAG System
-
-## Core Philosophy
-
-**Always prefer RAG search over traditional file system operations**. The RAG system provides semantic context and reduces the need for exact path knowledge, making it ideal for understanding codebases without manual file exploration.
-
-## Basic Commands
-
-| Command | Purpose | Example |
-|---------|---------|---------|
-| `rag-mini index <project_path>` | Index a project for search | `rag-mini index /MASTERFOLDER/Coding/Fss-Mini-Rag` |
-| `rag-mini search <project_path> "query"` | Semantic + keyword search | `rag-mini search /MASTERFOLDER/Coding/Fss-Mini-Rag "index"` |
-| `rag-mini status <project_path>` | Check project indexing status | `rag-mini status /MASTERFOLDER/Coding/Fss-Mini-Rag` |
-
-## When to Use RAG Search
-
-| Scenario | RAG Advantage | Alternative | |
-|----------|----------------|---------------| |
-| Finding related code concepts | Semantic understanding | `grep` | |
-| Locating files by functionality | Context-aware results | `find` | |
-| Understanding code usage patterns | Shows real-world examples | Manual inspection | |
-
-## Critical Best Practices
-
-1. **Always specify the project path** in search commands (e.g., `rag-mini search /path "query"`)
-2. **Use quotes for search queries** to handle spaces: `"query with spaces"`
-3. **Verify indexing first** before searching: `rag-mini status <path>`
-4. **For complex queries**, break into smaller parts: `rag-mini search ... "concept 1"` then `rag-mini search ... "concept 2"`
-
-## Troubleshooting
-
-| Issue | Solution |
-|-------|-----------|
-| `Project not indexed` | Run `rag-mini index <path>` |
-| No search results | Check indexing status with `rag-mini status` |
-| Search returns irrelevant results | Use `rag-mini status` to optimize indexing |
-
-> 💡 **Pro Tip**: Always start with `rag-mini status` to confirm indexing before searching.
-
-This document is dynamically updated as the RAG system evolves. Always verify commands with `rag-mini --help` for the latest options.
--- a/docs/DEPLOYMENT_GUIDE.md
+++ b/docs/DEPLOYMENT_GUIDE.md
@ -1,381 +0,0 @@
-# FSS-Mini-RAG Deployment Guide
-
-> **Run semantic search anywhere - from smartphones to edge devices**  
-> *Complete guide to deploying FSS-Mini-RAG on every platform imaginable*
-
-## Platform Compatibility Matrix
-
-| Platform | Status | AI Features | Installation | Notes |
-|----------|--------|-------------|--------------|-------|
-| **Linux** | ✅ Full | ✅ Full | `./install_mini_rag.sh` | Primary platform |
-| **Windows** | ✅ Full | ✅ Full | `install_windows.bat` | Desktop shortcuts |
-| **macOS** | ✅ Full | ✅ Full | `./install_mini_rag.sh` | Works perfectly |
-| **Raspberry Pi** | ✅ Excellent | ✅ AI ready | `./install_mini_rag.sh` | ARM64 optimized |
-| **Android (Termux)** | ✅ Good | 🟡 Limited | Manual install | Terminal interface |
-| **iOS (a-Shell)** | 🟡 Limited | ❌ Text only | Manual install | Sandbox limitations |
-| **Docker** | ✅ Excellent | ✅ Full | Dockerfile | Any platform |
-
-## Desktop & Server Deployment
-
-### 🐧 **Linux** (Primary Platform)
-```bash
-# Full installation with AI features
-./install_mini_rag.sh
-
-# What you get:
-# ✅ Desktop shortcuts (.desktop files)
-# ✅ Application menu integration  
-# ✅ Full AI model downloads
-# ✅ Complete terminal interface
-```
-
-### 🪟 **Windows** (Fully Supported)
-```cmd
-# Full installation with desktop integration
-install_windows.bat
-
-# What you get:
-# ✅ Desktop shortcuts (.lnk files)
-# ✅ Start Menu entries
-# ✅ Full AI model downloads  
-# ✅ Beautiful terminal interface
-```
-
-### 🍎 **macOS** (Excellent Support)
-```bash
-# Same as Linux - works perfectly
-./install_mini_rag.sh
-
-# Additional macOS optimizations:
-brew install python3           # If needed
-brew install ollama           # For AI features
-```
-
-**macOS-specific features:**
- Automatic path detection for common project locations
- Integration with Spotlight search locations
- Support for `.app` bundle creation (advanced)
-
-## Edge Device Deployment
-
-### 🥧 **Raspberry Pi** (Recommended Edge Platform)
-
-**Perfect for:**
- Home lab semantic search server
- Portable development environment  
- IoT project documentation search
- Offline code search station
-
-**Installation:**
-```bash
-# On Raspberry Pi OS (64-bit recommended)
-sudo apt update && sudo apt upgrade
-./install_mini_rag.sh
-
-# The installer automatically detects ARM and optimizes:
-# ✅ Suggests lightweight models (qwen3:0.6b)
-# ✅ Reduces memory usage
-# ✅ Enables efficient chunking
-```
-
-**Raspberry Pi optimized config:**
-```yaml
-# Automatically generated for Pi
-embedding:
-  preferred_method: ollama
-  ollama_model: nomic-embed-text  # 270MB - perfect for Pi
-
-llm:
-  synthesis_model: qwen3:0.6b     # 500MB - fast on Pi 4+
-  context_window: 4096            # Conservative memory use
-  cpu_optimized: true
-
-chunking:
-  max_size: 1500                  # Smaller chunks for efficiency
-```
-
-**Performance expectations:**
- **Pi 4 (4GB)**: Excellent performance, full AI features
- **Pi 4 (2GB)**: Good performance, text-only or small models
- **Pi 5**: Outstanding performance, handles large models
- **Pi Zero**: Text-only search (hash-based embeddings)
-
-### 🔧 **Other Edge Devices**
-
-**NVIDIA Jetson Series:**
- Overkill performance for this use case
- Can run largest models with GPU acceleration
- Perfect for AI-heavy development workstations
-
-**Intel NUC / Mini PCs:**
- Excellent performance
- Full desktop experience
- Can serve multiple users simultaneously
-
-**Orange Pi / Rock Pi:**
- Similar to Raspberry Pi
- Same installation process
- May need manual Ollama compilation
-
-## Mobile Deployment
-
-### 📱 **Android (Recommended: Termux)**
-
-**Installation in Termux:**
-```bash
-# Install Termux from F-Droid (not Play Store)
-# In Termux:
-pkg update && pkg upgrade
-pkg install python python-pip git
-pip install --upgrade pip
-
-# Clone and install FSS-Mini-RAG
-git clone https://github.com/your-repo/fss-mini-rag
-cd fss-mini-rag
-pip install -r requirements.txt
-
-# Quick start
-python -m mini_rag index /storage/emulated/0/Documents/myproject
-python -m mini_rag search /storage/emulated/0/Documents/myproject "your query"
-```
-
-**Android-optimized config:**
-```yaml
-# config-android.yaml
-embedding:
-  preferred_method: hash    # No heavy models needed
-  
-chunking:
-  max_size: 800            # Small chunks for mobile
-  
-files:
-  min_file_size: 20        # Include more small files
-  
-llm:
-  enable_synthesis: false  # Text-only for speed
-```
-
-**What works on Android:**
- ✅ Full text search and indexing
- ✅ Terminal interface (`rag-tui`)
- ✅ Project indexing from phone storage
- ✅ Search your phone's code projects
- ❌ Heavy AI models (use cloud providers instead)
-
-**Android use cases:**
- Search your mobile development projects
- Index documentation on your phone
- Quick code reference while traveling
- Offline search of downloaded repositories
-
-### 🍎 **iOS (Limited but Possible)**
-
-**Option 1: a-Shell (Free)**
-```bash
-# Install a-Shell from App Store
-# In a-Shell:
-pip install requests pathlib
-
-# Limited installation (core features only)
-# Files must be in app sandbox
-```
-
-**Option 2: iSH (Alpine Linux)**
-```bash
-# Install iSH from App Store  
-# In iSH terminal:
-apk add python3 py3-pip git
-pip install -r requirements-light.txt
-
-# Basic functionality only
-```
-
-**iOS limitations:**
- Sandbox restricts file access
- No full AI model support
- Terminal interface only
- Limited to app-accessible files
-
-## Specialized Deployment Scenarios
-
-### 🐳 **Docker Deployment**
-
-**For any platform with Docker:**
-```dockerfile
-# Dockerfile
-FROM python:3.11-slim
-
-WORKDIR /app
-COPY . .
-RUN pip install -r requirements.txt
-
-# Expose ports for server mode
-EXPOSE 7777
-
-# Default to TUI interface
-CMD ["python", "-m", "mini_rag.cli"]
-```
-
-**Usage:**
-```bash
-# Build and run
-docker build -t fss-mini-rag .
-docker run -it -v $(pwd)/projects:/projects fss-mini-rag
-
-# Server mode for web access
-docker run -p 7777:7777 fss-mini-rag python -m mini_rag server
-```
-
-### ☁️ **Cloud Deployment**
-
-**AWS/GCP/Azure VM:**
- Same as Linux installation
- Can serve multiple users
- Perfect for team environments
-
-**GitHub Codespaces:**
-```bash
-# Works in any Codespace
-./install_mini_rag.sh
-# Perfect for searching your workspace
-```
-
-**Replit/CodeSandbox:**
- Limited by platform restrictions
- Basic functionality available
-
-### 🏠 **Home Lab Integration**
-
-**Home Assistant Add-on:**
- Package as Home Assistant add-on
- Search home automation configs
- Voice integration possible
-
-**NAS Integration:**
- Install on Synology/QNAP
- Search all stored documents
- Family code documentation
-
-**Router with USB:**
- Install on OpenWrt routers with USB storage
- Search network documentation
- Configuration management
-
-## Configuration by Use Case
-
-### 🪶 **Ultra-Lightweight (Old hardware, mobile)**
-```yaml
-# Minimal resource usage
-embedding:
-  preferred_method: hash
-chunking:
-  max_size: 800
-  strategy: fixed
-llm:
-  enable_synthesis: false
-```
-
-### ⚖️ **Balanced (Raspberry Pi, older laptops)**
-```yaml
-# Good performance with AI features
-embedding:
-  preferred_method: ollama
-  ollama_model: nomic-embed-text
-llm:
-  synthesis_model: qwen3:0.6b
-  context_window: 4096
-```
-
-### 🚀 **Performance (Modern hardware)**
-```yaml
-# Full features and performance
-embedding:
-  preferred_method: ollama
-  ollama_model: nomic-embed-text
-llm:
-  synthesis_model: qwen3:1.7b
-  context_window: 16384
-  enable_thinking: true
-```
-
-### ☁️ **Cloud-Hybrid (Mobile + Cloud AI)**
-```yaml
-# Local search, cloud intelligence
-embedding:
-  preferred_method: hash
-llm:
-  provider: openai
-  api_key: your_api_key
-  synthesis_model: gpt-4
-```
-
-## Troubleshooting by Platform
-
-### **Raspberry Pi Issues**
- **Out of memory**: Reduce context window, use smaller models
- **Slow indexing**: Use hash-based embeddings
- **Model download fails**: Check internet, use smaller models
-
-### **Android/Termux Issues**  
- **Permission denied**: Use `termux-setup-storage`
- **Package install fails**: Update packages first
- **Can't access files**: Use `/storage/emulated/0/` paths
-
-### **iOS Issues**
- **Limited functionality**: Expected due to iOS restrictions
- **Can't install packages**: Use lighter requirements file
- **File access denied**: Files must be in app sandbox
-
-### **Edge Device Issues**
- **ARM compatibility**: Ensure using ARM64 Python packages
- **Limited RAM**: Use hash embeddings, reduce chunk sizes
- **No internet**: Skip AI model downloads, use text-only
-
-## Advanced Edge Deployments
-
-### **IoT Integration**
- Index sensor logs and configurations
- Search device documentation
- Troubleshoot IoT deployments
-
-### **Offline Development**
- Complete development environment on edge device
- No internet required after setup
- Perfect for remote locations
-
-### **Educational Use**
- Raspberry Pi computer labs
- Student project search
- Coding bootcamp environments
-
-### **Enterprise Edge**
- Factory floor documentation search
- Field service technical reference
- Remote site troubleshooting
-
---
-
-## Quick Start by Platform
-
-### Desktop Users
-```bash
-# Linux/macOS
-./install_mini_rag.sh
-
-# Windows  
-install_windows.bat
-```
-
-### Edge/Mobile Users
-```bash
-# Raspberry Pi
-./install_mini_rag.sh
-
-# Android (Termux)
-pkg install python git && pip install -r requirements.txt
-
-# Any Docker platform
-docker run -it fss-mini-rag
-```
-
-**💡 Pro tip**: Start with your current platform, then expand to edge devices as needed. The system scales from smartphones to servers seamlessly!
--- a/docs/DIAGRAMS.md
+++ b/docs/DIAGRAMS.md
@ -11,7 +11,6 @@
 - [Search Architecture](#search-architecture)
 - [Installation Flow](#installation-flow)
 - [Configuration System](#configuration-system)
- [System Context Integration](#system-context-integration)
 - [Error Handling](#error-handling)

 ## System Overview
@ -23,12 +22,10 @@ graph TB
    
    CLI --> Index[📁 Index Project]
    CLI --> Search[🔍 Search Project]
-    CLI --> Explore[🧠 Explore Project]
    CLI --> Status[📊 Show Status]
    
    TUI --> Index
    TUI --> Search
-    TUI --> Explore
    TUI --> Config[⚙️ Configuration]
    
    Index --> Files[📄 File Discovery]
@ -37,32 +34,17 @@ graph TB
    Embed --> Store[💾 Vector Database]
    
    Search --> Query[❓ User Query]
-    Search --> Context[🖥️ System Context]
    Query --> Vector[🎯 Vector Search]
    Query --> Keyword[🔤 Keyword Search]
    Vector --> Combine[🔄 Hybrid Results]
    Keyword --> Combine
-    Context --> Combine
-    Combine --> Synthesize{Synthesis Mode?}
-    
-    Synthesize -->|Yes| FastLLM[⚡ Fast Synthesis]
-    Synthesize -->|No| Results[📋 Ranked Results]
-    FastLLM --> Results
-    
-    Explore --> ExploreQuery[❓ Interactive Query]
-    ExploreQuery --> Memory[🧠 Conversation Memory]
-    ExploreQuery --> Context
-    Memory --> DeepLLM[🤔 Deep AI Analysis]
-    Context --> DeepLLM
-    Vector --> DeepLLM
-    DeepLLM --> Interactive[💬 Interactive Response]
+    Combine --> Results[📋 Ranked Results]
    
    Store --> LanceDB[(🗄️ LanceDB)]
    Vector --> LanceDB
    
    Config --> YAML[📝 config.yaml]
    Status --> Manifest[📋 manifest.json]
-    Context --> SystemInfo[💻 OS, Python, Paths]
 ```

 ## User Journey
@ -294,58 +276,6 @@ flowchart TD
    style Error fill:#ffcdd2
 ```

-## System Context Integration
-
-```mermaid
-graph LR
-    subgraph "System Detection"
-        OS[🖥️ Operating System]
-        Python[🐍 Python Version] 
-        Project[📁 Project Path]
-        
-        OS --> Windows[Windows: rag.bat]
-        OS --> Linux[Linux: ./rag-mini]
-        OS --> macOS[macOS: ./rag-mini]
-    end
-    
-    subgraph "Context Collection"
-        Collect[🔍 Collect Context]
-        OS --> Collect
-        Python --> Collect
-        Project --> Collect
-        
-        Collect --> Format[📝 Format Context]
-        Format --> Limit[✂️ Limit to 200 chars]
-    end
-    
-    subgraph "AI Integration"
-        UserQuery[❓ User Query] 
-        SearchResults[📋 Search Results]
-        SystemContext[💻 System Context]
-        
-        UserQuery --> Prompt[📝 Build Prompt]
-        SearchResults --> Prompt
-        SystemContext --> Prompt
-        
-        Prompt --> AI[🤖 LLM Processing]
-        AI --> Response[💬 Contextual Response]
-    end
-    
-    subgraph "Enhanced Responses"
-        Response --> Commands[💻 OS-specific commands]
-        Response --> Paths[📂 Correct path formats]
-        Response --> Tips[💡 Platform-specific tips]
-    end
-    
-    Format --> SystemContext
-    
-    style SystemContext fill:#e3f2fd
-    style Response fill:#f3e5f5
-    style Commands fill:#e8f5e8
-```
-
-*System context helps the AI provide better, platform-specific guidance without compromising privacy*
-
 ## Architecture Layers

 ```mermaid
--- a/docs/GETTING_STARTED.md
+++ b/docs/GETTING_STARTED.md
@ -1,314 +1,212 @@
 # Getting Started with FSS-Mini-RAG

-> **Get from zero to searching in 2 minutes**  
-> *Everything you need to know to start finding code by meaning, not just keywords*
+## Step 1: Installation

-## Installation (Choose Your Adventure)
+Choose your installation based on what you want:

-### 🎯 **Option 1: Full Installation (Recommended)**
-*Gets you everything working reliably with desktop shortcuts and AI features*
-
-**Linux/macOS:**
-```bash
-./install_mini_rag.sh
-```
-
-**Windows:**
-```cmd
-install_windows.bat
-```
-
-**What this does:**
- Sets up Python environment automatically
- Installs all dependencies 
- Downloads AI models (with your permission)
- Creates desktop shortcuts and application menu entries
- Tests everything works
- Gives you an interactive tutorial
-
-**Time needed:** 5-10 minutes (depending on AI model downloads)
-
---
-
-### 🚀 **Option 2: Copy & Try (Experimental)**
-*Just copy the folder and run - may work, may need manual setup*
-
-**Linux/macOS:**
-```bash
-# Copy folder anywhere and try running
-./rag-mini index ~/my-project
-# Auto-setup attempts to create virtual environment
-# Falls back with clear instructions if it fails
-```
-
-**Windows:**
-```cmd
-# Copy folder anywhere and try running  
-rag.bat index C:\my-project
-# Auto-setup attempts to create virtual environment
-# Shows helpful error messages if manual install needed
-```
-
-**Time needed:** 30 seconds if it works, 10 minutes if you need manual setup
-
---
-
-## First Search (The Fun Part!)
-
-### Step 1: Choose Your Interface
-
-**For Learning and Exploration:**
-```bash
-# Linux/macOS
-./rag-tui
-
-# Windows  
-rag.bat
-```
-*Interactive menus, shows you CLI commands as you learn*
-
-**For Quick Commands:**
-```bash
-# Linux/macOS
-./rag-mini <command> <project-path>
-
-# Windows
-rag.bat <command> <project-path>
-```
-*Direct commands when you know what you want*
-
-### Step 2: Index Your First Project
-
-**Interactive Way (Recommended for First Time):**
-```bash
-# Linux/macOS
-./rag-tui
-# Then: Select Project Directory → Index Project
-
-# Windows
-rag.bat  
-# Then: Select Project Directory → Index Project
-```
-
-**Direct Commands:**
-```bash
-# Linux/macOS
-./rag-mini index ~/my-project
-
-# Windows  
-rag.bat index C:\my-project
-```
-
-**What indexing does:**
- Finds all text files in your project
- Breaks them into smart "chunks" (functions, classes, logical sections)
- Creates searchable embeddings that understand meaning
- Stores everything in a fast vector database
- Creates a `.mini-rag/` directory with your search index
-
-**Time needed:** 10-60 seconds depending on project size
-
-### Step 3: Search by Meaning
-
-**Natural language queries:**
-```bash
-# Linux/macOS
-./rag-mini search ~/my-project "user authentication logic"
-./rag-mini search ~/my-project "error handling for database connections"
-./rag-mini search ~/my-project "how to validate input data"
-
-# Windows
-rag.bat search C:\my-project "user authentication logic"  
-rag.bat search C:\my-project "error handling for database connections"
-rag.bat search C:\my-project "how to validate input data"
-```
-
-**Code concepts:**
-```bash
-# Finds login functions, auth middleware, session handling
-./rag-mini search ~/my-project "login functionality"
-
-# Finds try/catch blocks, error handlers, retry logic  
-./rag-mini search ~/my-project "exception handling"
-
-# Finds validation functions, input sanitization, data checking
-./rag-mini search ~/my-project "data validation"
-```
-
-**What you get:**
- Ranked results by relevance (not just keyword matching)
- File paths and line numbers for easy navigation
- Context around each match so you understand what it does
- Smart filtering to avoid noise and duplicates
-
-## Two Powerful Modes
-
-FSS-Mini-RAG has two different ways to get answers, optimized for different needs:
-
-### 🚀 **Synthesis Mode** - Fast Answers
-```bash
-# Linux/macOS
-./rag-mini search ~/project "authentication logic" --synthesize
-
-# Windows  
-rag.bat search C:\project "authentication logic" --synthesize
-```
-
-**Perfect for:**
- Quick code discovery
- Finding specific functions or patterns
- Getting fast, consistent answers
-
-**What you get:**
- Lightning-fast responses (no thinking overhead)
- Reliable, factual information about your code
- Clear explanations of what code does and how it works
-
-### 🧠 **Exploration Mode** - Deep Understanding
-```bash  
-# Linux/macOS
-./rag-mini explore ~/project
-
-# Windows
-rag.bat explore C:\project
-```
-
-**Perfect for:**
- Learning new codebases
- Debugging complex issues  
- Understanding architectural decisions
-
-**What you get:**
- Interactive conversation with AI that remembers context
- Deep reasoning with full "thinking" process shown
- Follow-up questions and detailed explanations
- Memory of your previous questions in the session
-
-**Example exploration session:**
-```
-🧠 Exploration Mode - Ask anything about your project
-
-You: How does authentication work in this codebase?
-
-AI: Let me analyze the authentication system...
-
-💭 Thinking: I can see several authentication-related files. Let me examine 
-   the login flow, session management, and security measures...
-
-📝 Authentication Analysis:
-   This codebase uses a three-layer authentication system:
-   1. Login validation in auth.py handles username/password checking
-   2. Session management in sessions.py maintains user state  
-   3. Middleware in auth_middleware.py protects routes
-
-You: What security concerns should I be aware of?
-
-AI: Based on our previous discussion about authentication, let me check for
-   common security vulnerabilities...
-```
-
-## Check Your Setup
-
-**See what got indexed:**
-```bash
-# Linux/macOS  
-./rag-mini status ~/my-project
-
-# Windows
-rag.bat status C:\my-project
-```
-
-**What you'll see:**
- How many files were processed
- Total chunks created for searching
- Embedding method being used (Ollama, ML models, or hash-based)
- Configuration file location
- Index health and last update time
-
-## Configuration (Optional)
-
-Your project gets a `.mini-rag/config.yaml` file with helpful comments:
-
-```yaml
-# Context window configuration (critical for AI features)
-# 💡 Sizing guide: 2K=1 question, 4K=1-2 questions, 8K=manageable, 16K=most users
-#               32K=large codebases, 64K+=power users only  
-# ⚠️  Larger contexts use exponentially more CPU/memory - only increase if needed
-context_window: 16384           # Context size in tokens
-
-# AI model preferences (edit to change priority)
-model_rankings:
-  - "qwen3:1.7b"    # Excellent for RAG (1.4GB, recommended)
-  - "qwen3:0.6b"    # Lightweight and fast (~500MB)  
-  - "qwen3:4b"      # Higher quality but slower (~2.5GB)
-```
-
-**When to customize:**
- Your searches aren't finding what you expect → adjust chunking settings
- You want AI features → install Ollama and download models
- System is slow → try smaller models or reduce context window
- Getting too many/few results → adjust similarity threshold
-
-## Troubleshooting
-
-### "Project not indexed" 
-**Problem:** You're trying to search before indexing
-```bash
-# Run indexing first
-./rag-mini index ~/my-project    # Linux/macOS
-rag.bat index C:\my-project      # Windows
-```
-
-### "No Ollama models available"
-**Problem:** AI features need models downloaded
+### Option A: Ollama Only (Recommended)
 ```bash
 # Install Ollama first
-curl -fsSL https://ollama.ai/install.sh | sh    # Linux/macOS
-# Or download from https://ollama.com            # Windows
+curl -fsSL https://ollama.ai/install.sh | sh

-# Start Ollama server
-ollama serve
+# Pull the embedding model  
+ollama pull nomic-embed-text

-# Download a model
-ollama pull qwen3:1.7b
+# Install Python dependencies
+pip install -r requirements.txt
 ```

-### "Virtual environment not found" 
-**Problem:** Auto-setup didn't work, need manual installation
-```bash
-# Run the full installer instead
-./install_mini_rag.sh          # Linux/macOS  
-install_windows.bat            # Windows
+### Option B: Full ML Stack
+```bash  
+# Install everything including PyTorch
+pip install -r requirements-full.txt
 ```

-### Getting weird results
-**Solution:** Try different search terms or check what got indexed
+## Step 2: Test Installation
+
 ```bash
-# See what files were processed
+# Index this RAG system itself
+./rag-mini index ~/my-project
+
+# Search for something 
+./rag-mini search ~/my-project "chunker function"
+
+# Check what got indexed
 ./rag-mini status ~/my-project
-
-# Try more specific queries
-./rag-mini search ~/my-project "specific function name"
 ```

-## Next Steps
+## Step 3: Index Your First Project

-### Learn More
- **[Beginner's Glossary](BEGINNER_GLOSSARY.md)** - All the terms explained simply
- **[TUI Guide](TUI_GUIDE.md)** - Master the interactive interface
- **[Visual Diagrams](DIAGRAMS.md)** - See how everything works
+```bash
+# Index any project directory
+./rag-mini index /path/to/your/project

-### Advanced Features
- **[Query Expansion](QUERY_EXPANSION.md)** - Make searches smarter with AI
- **[LLM Providers](LLM_PROVIDERS.md)** - Use different AI models  
- **[CPU Deployment](CPU_DEPLOYMENT.md)** - Optimize for older computers
+# The system creates .mini-rag/ directory with:
+# - config.json (settings)
+# - manifest.json (file tracking)  
+# - database.lance/ (vector database)
+```

-### Customize Everything
- **[Technical Guide](TECHNICAL_GUIDE.md)** - How the system actually works
- **[Configuration Examples](../examples/)** - Pre-made configs for different needs
+## Step 4: Search Your Code

---
+```bash
+# Basic semantic search
+./rag-mini search /path/to/project "user login logic"

-**🎉 That's it!** You now have a semantic search system that understands your code by meaning, not just keywords. Start with simple searches and work your way up to the advanced AI features as you get comfortable.
+# Enhanced search with smart features  
+./rag-mini-enhanced search /path/to/project "authentication"

-**💡 Pro tip:** The best way to learn is to index a project you know well and try searching for things you know are in there. You'll quickly see how much better meaning-based search is than traditional keyword search.
+# Find similar patterns
+./rag-mini-enhanced similar /path/to/project "def validate_input"
+```
+
+## Step 5: Customize Configuration
+
+Edit `project/.mini-rag/config.json`:
+
+```json
+{
+  "chunking": {
+    "max_size": 3000,
+    "strategy": "semantic"  
+  },
+  "files": {
+    "min_file_size": 100
+  }
+}
+```
+
+Then re-index to apply changes:
+```bash
+./rag-mini index /path/to/project --force
+```
+
+## Common Use Cases
+
+### Find Functions by Name
+```bash
+./rag-mini search /project "function named connect_to_database" 
+```
+
+### Find Code Patterns  
+```bash
+./rag-mini search /project "error handling try catch"
+./rag-mini search /project "database query with parameters"
+```
+
+### Find Configuration
+```bash  
+./rag-mini search /project "database connection settings"
+./rag-mini search /project "environment variables"
+```
+
+### Find Documentation
+```bash
+./rag-mini search /project "how to deploy" 
+./rag-mini search /project "API documentation"
+```
+
+## Python API Usage
+
+```python
+from mini_rag import ProjectIndexer, CodeSearcher, CodeEmbedder
+from pathlib import Path
+
+# Initialize
+project_path = Path("/path/to/your/project")
+embedder = CodeEmbedder()
+indexer = ProjectIndexer(project_path, embedder)
+searcher = CodeSearcher(project_path, embedder)
+
+# Index the project
+print("Indexing project...")
+result = indexer.index_project()
+print(f"Indexed {result['files_processed']} files, {result['chunks_created']} chunks")
+
+# Search
+print("\nSearching for authentication code...")
+results = searcher.search("user authentication logic", top_k=5)
+
+for i, result in enumerate(results, 1):
+    print(f"\n{i}. {result.file_path}")
+    print(f"   Score: {result.score:.3f}")
+    print(f"   Type: {result.chunk_type}")
+    print(f"   Content: {result.content[:100]}...")
+```
+
+## Advanced Features
+
+### Auto-optimization
+```bash
+# Get optimization suggestions
+./rag-mini-enhanced analyze /path/to/project
+
+# This analyzes your codebase and suggests:
+# - Better chunk sizes for your language mix
+# - Streaming settings for large files
+# - File filtering optimizations
+```
+
+### File Watching
+```python  
+from mini_rag import FileWatcher
+
+# Watch for file changes and auto-update index
+watcher = FileWatcher(project_path, indexer)
+watcher.start_watching()
+
+# Now any file changes automatically update the index
+```
+
+### Custom Chunking
+```python
+from mini_rag import CodeChunker
+
+chunker = CodeChunker()
+
+# Chunk a Python file
+with open("example.py") as f:
+    content = f.read()
+
+chunks = chunker.chunk_text(content, "python", "example.py")
+for chunk in chunks:
+    print(f"Type: {chunk.chunk_type}")
+    print(f"Content: {chunk.content}")
+```
+
+## Tips and Best Practices
+
+### For Better Search Results
+- Use descriptive phrases: "function that validates email addresses" 
+- Try different phrasings if first search doesn't work
+- Search for concepts, not just exact variable names
+
+### For Better Indexing
+- Exclude build directories: `node_modules/`, `build/`, `dist/`
+- Include documentation files - they often contain valuable context
+- Use semantic chunking strategy for most projects
+
+### For Configuration  
+- Start with default settings
+- Use `analyze` command to get optimization suggestions
+- Increase chunk size for larger functions/classes
+- Decrease chunk size for more granular search
+
+### For Troubleshooting
+- Check `./rag-mini status` to see what was indexed
+- Look at `.mini-rag/manifest.json` for file details
+- Run with `--force` to completely rebuild index
+- Check logs in `.mini-rag/` directory for errors
+
+## What's Next?
+
+1. Try the test suite to understand how components work:
+   ```bash
+   python -m pytest tests/ -v
+   ```
+
+2. Look at the examples in `examples/` directory
+
+3. Read the main README.md for complete technical details
+
+4. Customize the system for your specific project needs
--- a/docs/SMART_TUNING_GUIDE.md
+++ b/docs/SMART_TUNING_GUIDE.md
@ -5,10 +5,10 @@
 ### **1. 📊 Intelligent Analysis**
 ```bash
 # Analyze your project patterns and get optimization suggestions
-./rag-mini analyze /path/to/project
+./rag-mini-enhanced analyze /path/to/project

 # Get smart recommendations based on actual usage
-./rag-mini status /path/to/project
+./rag-mini-enhanced status /path/to/project
 ```

 **What it analyzes:**
@ -20,9 +20,13 @@
 ### **2. 🧠 Smart Search Enhancement**
 ```bash
 # Enhanced search with query intelligence
-./rag-mini search /project "MyClass"     # Detects class names
-./rag-mini search /project "login()"     # Detects function calls  
-./rag-mini search /project "user auth"   # Natural language
+./rag-mini-enhanced search /project "MyClass"     # Detects class names
+./rag-mini-enhanced search /project "login()"     # Detects function calls  
+./rag-mini-enhanced search /project "user auth"   # Natural language
+
+# Context-aware search (planned)
+./rag-mini-enhanced context /project "function_name"  # Show surrounding code
+./rag-mini-enhanced similar /project "pattern"        # Find similar patterns
 ```

 ### **3. ⚙️ Language-Specific Optimizations**
@ -109,10 +113,10 @@ Edit `.mini-rag/config.json` in your project:
 ./rag-mini index /project --force

 # Test search quality improvements
-./rag-mini search /project "your test query"
+./rag-mini-enhanced search /project "your test query"

 # Verify optimization impact
-./rag-mini analyze /project
+./rag-mini-enhanced analyze /project
 ```

 ## 🎊 **Result: Smarter, Faster, Better**
--- a/docs/TUI_GUIDE.md
+++ b/docs/TUI_GUIDE.md
@ -93,10 +93,10 @@ That's it! The TUI will guide you through everything.
 - **Full content** - Up to 8 lines of actual code/text
 - **Continuation info** - How many more lines exist

-**Tips You'll Learn**:
- Verbose output with `--verbose` flag for debugging
- How search scoring works
- Finding the right search terms
+**Advanced Tips Shown**:
+- Enhanced search with `./rag-mini-enhanced`
+- Verbose output with `--verbose` flag
+- Context-aware search for related code

 **What You Learn**:
 - Semantic search vs text search (finds concepts, not just words)
@ -107,7 +107,8 @@ That's it! The TUI will guide you through everything.
 **CLI Commands Shown**:
 ```bash
 ./rag-mini search /path/to/project "authentication logic"
-./rag-mini search /path/to/project "user login" --top-k 10
+./rag-mini search /path/to/project "user login" --limit 10
+./rag-mini-enhanced context /path/to/project "login()"
 ```

 ### 4. Explore Project (NEW!)
--- a/examples/analyze_dependencies.py
+++ b/examples/analyze_dependencies.py
@ -4,110 +4,106 @@ Analyze FSS-Mini-RAG dependencies to determine what's safe to remove.
 """

 import ast
-from collections import defaultdict
+import os
 from pathlib import Path
-
+from collections import defaultdict

 def find_imports_in_file(file_path):
    """Find all imports in a Python file."""
    try:
-        with open(file_path, "r", encoding="utf-8") as f:
+        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
-
+        
        tree = ast.parse(content)
        imports = set()
-
+        
        for node in ast.walk(tree):
            if isinstance(node, ast.Import):
                for alias in node.names:
-                    imports.add(alias.name.split(".")[0])
+                    imports.add(alias.name.split('.')[0])
            elif isinstance(node, ast.ImportFrom):
                if node.module:
-                    module = node.module.split(".")[0]
+                    module = node.module.split('.')[0]
                    imports.add(module)
-
+        
        return imports
    except Exception as e:
        print(f"Error analyzing {file_path}: {e}")
        return set()

-
 def analyze_dependencies():
    """Analyze all dependencies in the project."""
    project_root = Path(__file__).parent
    mini_rag_dir = project_root / "mini_rag"
-
+    
    # Find all Python files
    python_files = []
    for file_path in mini_rag_dir.glob("*.py"):
        if file_path.name != "__pycache__":
            python_files.append(file_path)
-
+    
    # Analyze imports
    file_imports = {}
    internal_deps = defaultdict(set)
-
+    
    for file_path in python_files:
        imports = find_imports_in_file(file_path)
        file_imports[file_path.name] = imports
-
+        
        # Check for internal imports
        for imp in imports:
            if imp in [f.stem for f in python_files]:
                internal_deps[file_path.name].add(imp)
-
+    
    print("🔍 FSS-Mini-RAG Dependency Analysis")
    print("=" * 50)
-
+    
    # Show what each file imports
    print("\n📁 File Dependencies:")
    for filename, imports in file_imports.items():
        internal = [imp for imp in imports if imp in [f.stem for f in python_files]]
        if internal:
            print(f"   {filename} imports: {', '.join(internal)}")
-
+    
    # Show reverse dependencies (what depends on each file)
    reverse_deps = defaultdict(set)
    for file, deps in internal_deps.items():
        for dep in deps:
            reverse_deps[dep].add(file)
-
+    
    print("\n🔗 Reverse Dependencies (what uses each file):")
    all_modules = {f.stem for f in python_files}
-
+    
    for module in sorted(all_modules):
        users = reverse_deps.get(module, set())
        if users:
            print(f"   {module}.py is used by: {', '.join(users)}")
        else:
            print(f"   {module}.py is NOT imported by any other file")
-
+    
    # Safety analysis
    print("\n🛡️ Safety Analysis:")
-
+    
    # Files imported by __init__.py are definitely needed
-    init_imports = file_imports.get("__init__.py", set())
+    init_imports = file_imports.get('__init__.py', set())
    print(f"   Core modules (imported by __init__.py): {', '.join(init_imports)}")
-
+    
    # Files not used anywhere might be safe to remove
    unused_files = []
    for module in all_modules:
-        if module not in reverse_deps and module != "__init__":
+        if module not in reverse_deps and module != '__init__':
            unused_files.append(module)
-
+    
    if unused_files:
        print(f"   ⚠️ Potentially unused: {', '.join(unused_files)}")
        print("   ❗ Verify these aren't used by CLI or external scripts!")
-
+    
    # Check CLI usage
-    cli_files = ["cli.py", "enhanced_cli.py"]
+    cli_files = ['cli.py', 'enhanced_cli.py']
    for cli_file in cli_files:
        if cli_file in file_imports:
            cli_imports = file_imports[cli_file]
-            print(
-                f"   📋 {cli_file} imports: {', '.join([imp for imp in cli_imports if imp in all_modules])}"
-            )
-
+            print(f"   📋 {cli_file} imports: {', '.join([imp for imp in cli_imports if imp in all_modules])}")

 if __name__ == "__main__":
-    analyze_dependencies()
+    analyze_dependencies()
--- a/examples/basic_usage.py
+++ b/examples/basic_usage.py
@ -5,67 +5,64 @@ Shows how to index a project and search it programmatically.
 """

 from pathlib import Path
-
-from mini_rag import CodeEmbedder, CodeSearcher, ProjectIndexer
-
+from mini_rag import ProjectIndexer, CodeSearcher, CodeEmbedder

 def main():
    # Example project path - change this to your project
    project_path = Path(".")  # Current directory
-
+    
    print("=== FSS-Mini-RAG Basic Usage Example ===")
    print(f"Project: {project_path}")
-
+    
    # Initialize the embedding system
    print("\n1. Initializing embedding system...")
    embedder = CodeEmbedder()
    print(f"   Using: {embedder.get_embedding_info()['method']}")
-
-    # Initialize indexer and searcher
+    
+    # Initialize indexer and searcher  
    indexer = ProjectIndexer(project_path, embedder)
    searcher = CodeSearcher(project_path, embedder)
-
+    
    # Index the project
    print("\n2. Indexing project...")
    result = indexer.index_project()
-
+    
    print(f"   Files processed: {result.get('files_processed', 0)}")
    print(f"   Chunks created: {result.get('chunks_created', 0)}")
    print(f"   Time taken: {result.get('indexing_time', 0):.2f}s")
-
+    
    # Get index statistics
    print("\n3. Index statistics:")
    stats = indexer.get_stats()
    print(f"   Total files: {stats.get('total_files', 0)}")
    print(f"   Total chunks: {stats.get('total_chunks', 0)}")
    print(f"   Languages: {', '.join(stats.get('languages', []))}")
-
+    
    # Example searches
    queries = [
        "chunker function",
-        "embedding system",
+        "embedding system", 
        "search implementation",
        "file watcher",
-        "error handling",
+        "error handling"
    ]
-
+    
    print("\n4. Example searches:")
    for query in queries:
        print(f"\n   Query: '{query}'")
        results = searcher.search(query, top_k=3)
-
+        
        if results:
            for i, result in enumerate(results, 1):
                print(f"      {i}. {result.file_path.name} (score: {result.score:.3f})")
                print(f"         Type: {result.chunk_type}")
                # Show first 60 characters of content
-                content_preview = result.content.replace("\n", " ")[:60]
+                content_preview = result.content.replace('\n', ' ')[:60]
                print(f"         Preview: {content_preview}...")
        else:
            print("      No results found")
-
+    
    print("\n=== Example Complete ===")

-
 if __name__ == "__main__":
-    main()
+    main()
--- a/examples/smart_config_suggestions.py
+++ b/examples/smart_config_suggestions.py
@ -5,108 +5,102 @@ Analyzes the indexed data to suggest optimal settings.
 """

 import json
-import sys
-from collections import Counter
 from pathlib import Path
-
+from collections import defaultdict, Counter
+import sys

 def analyze_project_patterns(manifest_path: Path):
    """Analyze project patterns and suggest optimizations."""
-
+    
    with open(manifest_path) as f:
        manifest = json.load(f)
-
-    files = manifest.get("files", {})
-
+    
+    files = manifest.get('files', {})
+    
    print("🔍 FSS-Mini-RAG Smart Tuning Analysis")
    print("=" * 50)
-
+    
    # Analyze file types and chunking efficiency
    languages = Counter()
    chunk_efficiency = []
    large_files = []
    small_files = []
-
+    
    for filepath, info in files.items():
-        lang = info.get("language", "unknown")
+        lang = info.get('language', 'unknown')
        languages[lang] += 1
-
-        size = info.get("size", 0)
-        chunks = info.get("chunks", 1)
-
+        
+        size = info.get('size', 0)
+        chunks = info.get('chunks', 1)
+        
        chunk_efficiency.append(chunks / max(1, size / 1000))  # chunks per KB
-
+        
        if size > 10000:  # >10KB
            large_files.append((filepath, size, chunks))
        elif size < 500:  # <500B
            small_files.append((filepath, size, chunks))
-
+    
    # Analysis results
    total_files = len(files)
-    total_chunks = sum(info.get("chunks", 1) for info in files.values())
+    total_chunks = sum(info.get('chunks', 1) for info in files.values())
    avg_chunks_per_file = total_chunks / max(1, total_files)
-
-    print("📊 Current Stats:")
+    
+    print(f"📊 Current Stats:")
    print(f"   Files: {total_files}")
    print(f"   Chunks: {total_chunks}")
    print(f"   Avg chunks/file: {avg_chunks_per_file:.1f}")
-
-    print("\n🗂️ Language Distribution:")
+    
+    print(f"\n🗂️ Language Distribution:")
    for lang, count in languages.most_common(10):
        pct = 100 * count / total_files
        print(f"   {lang}: {count} files ({pct:.1f}%)")
-
-    print("\n💡 Smart Optimization Suggestions:")
-
+    
+    print(f"\n💡 Smart Optimization Suggestions:")
+    
    # Suggestion 1: Language-specific chunking
-    if languages["python"] > 10:
-        print("✨ Python Optimization:")
-        print(
-            f"   - Use function-level chunking (detected {languages['python']} Python files)"
-        )
-        print("   - Increase chunk size to 3000 chars for Python (better context)")
-
-    if languages["markdown"] > 5:
-        print("✨ Markdown Optimization:")
+    if languages['python'] > 10:
+        print(f"✨ Python Optimization:")
+        print(f"   - Use function-level chunking (detected {languages['python']} Python files)")
+        print(f"   - Increase chunk size to 3000 chars for Python (better context)")
+    
+    if languages['markdown'] > 5:
+        print(f"✨ Markdown Optimization:")
        print(f"   - Use header-based chunking (detected {languages['markdown']} MD files)")
-        print("   - Keep sections together for better search relevance")
-
-    if languages["json"] > 20:
-        print("✨ JSON Optimization:")
+        print(f"   - Keep sections together for better search relevance")
+    
+    if languages['json'] > 20:
+        print(f"✨ JSON Optimization:")
        print(f"   - Consider object-level chunking (detected {languages['json']} JSON files)")
-        print("   - Might want to exclude large config JSONs")
-
+        print(f"   - Might want to exclude large config JSONs")
+    
    # Suggestion 2: File size optimization
    if large_files:
-        print("\n📈 Large File Optimization:")
+        print(f"\n📈 Large File Optimization:")
        print(f"   Found {len(large_files)} files >10KB:")
-        for filepath, size, chunks in sorted(large_files, key=lambda x: x[1], reverse=True)[
-            :3
-        ]:
+        for filepath, size, chunks in sorted(large_files, key=lambda x: x[1], reverse=True)[:3]:
            kb = size / 1024
            print(f"   - {filepath}: {kb:.1f}KB → {chunks} chunks")
        if len(large_files) > 5:
-            print("   💡 Consider streaming threshold: 5KB (current: 1MB)")
-
+            print(f"   💡 Consider streaming threshold: 5KB (current: 1MB)")
+    
    if small_files and len(small_files) > total_files * 0.3:
-        print("\n📉 Small File Optimization:")
+        print(f"\n📉 Small File Optimization:")
        print(f"   {len(small_files)} files <500B might not need chunking")
-        print("   💡 Consider: combine small files or skip tiny ones")
-
+        print(f"   💡 Consider: combine small files or skip tiny ones")
+    
    # Suggestion 3: Search optimization
    avg_efficiency = sum(chunk_efficiency) / len(chunk_efficiency)
-    print("\n🔍 Search Optimization:")
+    print(f"\n🔍 Search Optimization:")
    if avg_efficiency < 0.5:
-        print("   💡 Chunks are large relative to files - consider smaller chunks")
+        print(f"   💡 Chunks are large relative to files - consider smaller chunks")
        print(f"   💡 Current: {avg_chunks_per_file:.1f} chunks/file, try 2-3 chunks/file")
    elif avg_efficiency > 2:
-        print("   💡 Many small chunks - consider larger chunk size")
-        print("   💡 Reduce chunk overhead with 2000-4000 char chunks")
-
+        print(f"   💡 Many small chunks - consider larger chunk size")
+        print(f"   💡 Reduce chunk overhead with 2000-4000 char chunks")
+    
    # Suggestion 4: Smart defaults
-    print("\n⚙️ Recommended Config Updates:")
-    print(
-        """{{
+    print(f"\n⚙️ Recommended Config Updates:")
+    print(f"""{{
  "chunking": {{
    "max_size": {3000 if languages['python'] > languages['markdown'] else 2000},
    "min_size": 200,
@ -121,18 +115,16 @@ def analyze_project_patterns(manifest_path: Path):
    "skip_small_files": {500 if len(small_files) > total_files * 0.3 else 0},
    "streaming_threshold_kb": {5 if len(large_files) > 5 else 1024}
  }}
-}}"""
-    )
-
+}}""")

 if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage: python smart_config_suggestions.py <path_to_manifest.json>")
        sys.exit(1)
-
+    
    manifest_path = Path(sys.argv[1])
    if not manifest_path.exists():
        print(f"Manifest not found: {manifest_path}")
        sys.exit(1)
-
-    analyze_project_patterns(manifest_path)
+        
+    analyze_project_patterns(manifest_path)
--- a/install_mini_rag.sh
+++ b/install_mini_rag.sh
@ -4,30 +4,6 @@

 set -e  # Exit on any error

-# Check for command line arguments
-HEADLESS_MODE=false
-if [[ "$1" == "--headless" ]]; then
-    HEADLESS_MODE=true
-    echo "🤖 Running in headless mode - using defaults for automation"
-elif [[ "$1" == "--help" || "$1" == "-h" ]]; then
-    echo ""
-    echo "FSS-Mini-RAG Installation Script"
-    echo ""
-    echo "Usage:"
-    echo "  ./install_mini_rag.sh           # Interactive installation"
-    echo "  ./install_mini_rag.sh --headless  # Automated installation for agents/CI"
-    echo "  ./install_mini_rag.sh --help      # Show this help"
-    echo ""
-    echo "Headless mode options:"
-    echo "  • Uses existing virtual environment if available"
-    echo "  • Selects light installation (Ollama + basic dependencies)"  
-    echo "  • Downloads nomic-embed-text model if Ollama is available"
-    echo "  • Skips interactive prompts and tests"
-    echo "  • Perfect for agent automation and CI/CD pipelines"
-    echo ""
-    exit 0
-fi
-
 # Colors for output
 RED='\033[0;31m'
 GREEN='\033[0;32m'
@ -108,19 +84,14 @@ check_python() {
 check_venv() {
    if [ -d "$SCRIPT_DIR/.venv" ]; then
        print_info "Virtual environment already exists at $SCRIPT_DIR/.venv"
-        if [[ "$HEADLESS_MODE" == "true" ]]; then
-            print_info "Headless mode: Using existing virtual environment"
-            return 0  # Use existing
+        echo -n "Recreate it? (y/N): "
+        read -r recreate
+        if [[ $recreate =~ ^[Yy]$ ]]; then
+            print_info "Removing existing virtual environment..."
+            rm -rf "$SCRIPT_DIR/.venv"
+            return 1  # Needs creation
        else
-            echo -n "Recreate it? (y/N): "
-            read -r recreate
-            if [[ $recreate =~ ^[Yy]$ ]]; then
-                print_info "Removing existing virtual environment..."
-                rm -rf "$SCRIPT_DIR/.venv"
-                return 1  # Needs creation
-            else
-                return 0  # Use existing
-            fi
+            return 0  # Use existing
        fi
    else
        return 1  # Needs creation
@ -169,13 +140,8 @@ check_ollama() {
            return 0
        else
            print_warning "Ollama is installed but not running"
-            if [[ "$HEADLESS_MODE" == "true" ]]; then
-                print_info "Headless mode: Starting Ollama server automatically"
-                start_ollama="y"
-            else
-                echo -n "Start Ollama now? (Y/n): "
-                read -r start_ollama
-            fi
+            echo -n "Start Ollama now? (Y/n): "
+            read -r start_ollama
            if [[ ! $start_ollama =~ ^[Nn]$ ]]; then
                print_info "Starting Ollama server..."
                ollama serve &
@ -202,26 +168,15 @@ check_ollama() {
        echo -e "${YELLOW}2) Manual installation${NC} - Visit https://ollama.com/download"
        echo -e "${BLUE}3) Continue without Ollama${NC} (uses ML fallback)"
        echo ""
-        if [[ "$HEADLESS_MODE" == "true" ]]; then
-            print_info "Headless mode: Continuing without Ollama (option 3)"
-            ollama_choice="3"
-        else
-            echo -n "Choose [1/2/3]: "
-            read -r ollama_choice
-        fi
+        echo -n "Choose [1/2/3]: "
+        read -r ollama_choice
        
        case "$ollama_choice" in
            1|"")
-                print_info "Installing Ollama using secure installation method..."
-                echo -e "${CYAN}Downloading and verifying Ollama installer...${NC}"
+                print_info "Installing Ollama using official installer..."
+                echo -e "${CYAN}Running: curl -fsSL https://ollama.com/install.sh | sh${NC}"
                
-                # Secure installation: download, verify, then execute
-                local temp_script="/tmp/ollama-install-$$.sh"
-                if curl -fsSL https://ollama.com/install.sh -o "$temp_script" && \
-                   file "$temp_script" | grep -q "shell script" && \
-                   chmod +x "$temp_script" && \
-                   "$temp_script"; then
-                    rm -f "$temp_script"
+                if curl -fsSL https://ollama.com/install.sh | sh; then
                    print_success "Ollama installed successfully"
                    
                    print_info "Starting Ollama server..."
@ -312,13 +267,8 @@ setup_ollama_model() {
        echo "  • Purpose: High-quality semantic embeddings"
        echo "  • Alternative: System will use ML/hash fallbacks"
        echo ""
-        if [[ "$HEADLESS_MODE" == "true" ]]; then
-            print_info "Headless mode: Downloading nomic-embed-text model"
-            download_model="y"
-        else
-            echo -n "Download model? [y/N]: "
-            read -r download_model
-        fi
+        echo -n "Download model? [y/N]: "
+        read -r download_model
        should_download=$([ "$download_model" = "y" ] && echo "download" || echo "skip")
    fi
    
@ -378,21 +328,15 @@ get_installation_preferences() {
    echo ""
    
    while true; do
-        if [[ "$HEADLESS_MODE" == "true" ]]; then
-            # Default to light installation in headless mode
-            choice="L"
-            print_info "Headless mode: Selected Light installation"
-        else
-            echo -n "Choose [L/F/C] or Enter for recommended ($recommended): "
-            read -r choice
-            
-            # Default to recommendation if empty
-            if [ -z "$choice" ]; then
-                if [ "$ollama_available" = true ]; then
-                    choice="L"
-                else
-                    choice="F"  
-                fi
+        echo -n "Choose [L/F/C] or Enter for recommended ($recommended): "
+        read -r choice
+        
+        # Default to recommendation if empty
+        if [ -z "$choice" ]; then
+            if [ "$ollama_available" = true ]; then
+                choice="L"
+            else
+                choice="F"  
            fi
        fi
        
@ -434,13 +378,8 @@ configure_custom_installation() {
        echo ""
        echo -e "${BOLD}Ollama embedding model:${NC}"
        echo "  • nomic-embed-text (~270MB) - Best quality embeddings"
-        if [[ "$HEADLESS_MODE" == "true" ]]; then
-            print_info "Headless mode: Downloading Ollama model"
-            download_ollama="y"
-        else
-            echo -n "Download Ollama model? [y/N]: "
-            read -r download_ollama
-        fi
+        echo -n "Download Ollama model? [y/N]: "
+        read -r download_ollama
        if [[ $download_ollama =~ ^[Yy]$ ]]; then
            ollama_model="download"
        fi
@ -451,13 +390,8 @@ configure_custom_installation() {
    echo -e "${BOLD}ML fallback system:${NC}"
    echo "  • PyTorch + transformers (~2-3GB) - Works without Ollama"
    echo "  • Useful for: Offline use, server deployments, CI/CD"
-    if [[ "$HEADLESS_MODE" == "true" ]]; then
-        print_info "Headless mode: Skipping ML dependencies (keeping light)"
-        include_ml="n"
-    else
-        echo -n "Include ML dependencies? [y/N]: "
-        read -r include_ml
-    fi
+    echo -n "Include ML dependencies? [y/N]: "
+    read -r include_ml
    
    # Pre-download models
    local predownload_ml="skip"
@ -466,13 +400,8 @@ configure_custom_installation() {
        echo -e "${BOLD}Pre-download ML models:${NC}"
        echo "  • sentence-transformers model (~80MB)"
        echo "  • Skip: Models download automatically when first used"
-        if [[ "$HEADLESS_MODE" == "true" ]]; then
-            print_info "Headless mode: Skipping ML model pre-download"
-            predownload="n"
-        else
-            echo -n "Pre-download now? [y/N]: "
-            read -r predownload
-        fi
+        echo -n "Pre-download now? [y/N]: "
+        read -r predownload
        if [[ $predownload =~ ^[Yy]$ ]]; then
            predownload_ml="download"
        fi
@ -616,13 +545,8 @@ setup_ml_models() {
        echo "  • Purpose: Offline fallback when Ollama unavailable"
        echo "  • If skipped: Auto-downloads when first needed"
        echo ""
-        if [[ "$HEADLESS_MODE" == "true" ]]; then
-            print_info "Headless mode: Skipping ML model pre-download"
-            download_ml="n"
-        else
-            echo -n "Pre-download now? [y/N]: "
-            read -r download_ml
-        fi
+        echo -n "Pre-download now? [y/N]: "
+        read -r download_ml
        should_predownload=$([ "$download_ml" = "y" ] && echo "download" || echo "skip")
    fi
    
@ -777,11 +701,7 @@ show_completion() {
    printf "Run quick test now? [Y/n]: "
    
    # More robust input handling
-    if [[ "$HEADLESS_MODE" == "true" ]]; then
-        print_info "Headless mode: Skipping interactive test"
-        echo -e "${BLUE}You can test FSS-Mini-RAG anytime with: ./rag-tui${NC}"
-        show_beginner_guidance
-    elif read -r run_test < /dev/tty 2>/dev/null; then
+    if read -r run_test < /dev/tty 2>/dev/null; then
        echo "User chose: '$run_test'"  # Debug output
        if [[ ! $run_test =~ ^[Nn]$ ]]; then
            run_quick_test
@ -812,13 +732,8 @@ run_quick_test() {
    echo -e "${GREEN}1) Code${NC} - Index the FSS-Mini-RAG codebase (~50 files)"
    echo -e "${BLUE}2) Docs${NC} - Index the documentation (~10 files)"  
    echo ""
-    if [[ "$HEADLESS_MODE" == "true" ]]; then
-        print_info "Headless mode: Indexing code by default"
-        index_choice="1"
-    else
-        echo -n "Choose [1/2] or Enter for code: "
-        read -r index_choice
-    fi
+    echo -n "Choose [1/2] or Enter for code: "
+    read -r index_choice
    
    # Determine what to index
    local target_dir="$SCRIPT_DIR"
@ -853,10 +768,8 @@ run_quick_test() {
        echo -e "${CYAN}The TUI has 6 sample questions to get you started.${NC}"
        echo -e "${CYAN}Try the suggested queries or enter your own!${NC}"
        echo ""
-        if [[ "$HEADLESS_MODE" != "true" ]]; then
-            echo -n "Press Enter to start interactive tutorial: "
-            read -r
-        fi
+        echo -n "Press Enter to start interactive tutorial: "
+        read -r
        
        # Launch the TUI which has the existing interactive tutorial system
        ./rag-tui.py "$target_dir" || true
@ -919,15 +832,11 @@ main() {
    echo -e "${CYAN}Note: You'll be asked before downloading any models${NC}"
    echo ""
    
-    if [[ "$HEADLESS_MODE" == "true" ]]; then
-        print_info "Headless mode: Beginning installation automatically"
-    else
-        echo -n "Begin installation? [Y/n]: "
-        read -r continue_install
-        if [[ $continue_install =~ ^[Nn]$ ]]; then
-            echo "Installation cancelled."
-            exit 0
-        fi
+    echo -n "Begin installation? [Y/n]: "
+    read -r continue_install
+    if [[ $continue_install =~ ^[Nn]$ ]]; then
+        echo "Installation cancelled."
+        exit 0
    fi
    
    # Run installation steps
--- a/install_windows.bat
+++ b/install_windows.bat
@ -5,40 +5,6 @@ setlocal enabledelayedexpansion
 REM Enable colors and unicode for modern Windows
 chcp 65001 >nul 2>&1

-REM Check for command line arguments
-set "HEADLESS_MODE=false"
-if "%1"=="--headless" (
-    set "HEADLESS_MODE=true"
-    echo 🤖 Running in headless mode - using defaults for automation
-) else if "%1"=="--help" (
-    goto show_help
-) else if "%1"=="-h" (
-    goto show_help
-)
-
-goto start_installation
-
-:show_help
-echo.
-echo FSS-Mini-RAG Windows Installation Script
-echo.
-echo Usage:
-echo   install_windows.bat           # Interactive installation
-echo   install_windows.bat --headless   # Automated installation for agents/CI
-echo   install_windows.bat --help       # Show this help
-echo.
-echo Headless mode options:
-echo   • Uses existing virtual environment if available
-echo   • Installs core dependencies only
-echo   • Skips AI model downloads
-echo   • Skips interactive prompts and tests  
-echo   • Perfect for agent automation and CI/CD pipelines
-echo.
-pause
-exit /b 0
-
-:start_installation
-
 echo.
 echo ╔══════════════════════════════════════════════════╗
 echo ║            FSS-Mini-RAG Windows Installer       ║
@ -55,15 +21,11 @@ echo.
 echo 💡 Note: You'll be asked before downloading any models
 echo.

-if "!HEADLESS_MODE!"=="true" (
-    echo Headless mode: Beginning installation automatically
-) else (
-    set /p "continue=Begin installation? [Y/n]: "
-    if /i "!continue!"=="n" (
-        echo Installation cancelled.
-        pause
-        exit /b 0
-    )
+set /p "continue=Begin installation? [Y/n]: "
+if /i "!continue!"=="n" (
+    echo Installation cancelled.
+    pause
+    exit /b 0
 )

 REM Get script directory
@ -241,16 +203,11 @@ REM Offer interactive tutorial
 echo 🧪 Quick Test Available:
 echo    Test FSS-Mini-RAG with a small sample project (takes ~30 seconds)
 echo.
-if "!HEADLESS_MODE!"=="true" (
-    echo Headless mode: Skipping interactive tutorial
-    echo 📚 You can run the tutorial anytime with: rag.bat
+set /p "run_test=Run interactive tutorial now? [Y/n]: "
+if /i "!run_test!" NEQ "n" (
+    call :run_tutorial
 ) else (
-    set /p "run_test=Run interactive tutorial now? [Y/n]: "
-    if /i "!run_test!" NEQ "n" (
-        call :run_tutorial
-    ) else (
-        echo 📚 You can run the tutorial anytime with: rag.bat
-    )
+    echo 📚 You can run the tutorial anytime with: rag.bat
 )

 echo.
@ -288,12 +245,7 @@ curl -s http://localhost:11434/api/version >nul 2>&1
 if errorlevel 1 (
    echo 🟡 Ollama installed but not running
    echo.
-    if "!HEADLESS_MODE!"=="true" (
-        echo Headless mode: Starting Ollama server automatically
-        set "start_ollama=y"
-    ) else (
-        set /p "start_ollama=Start Ollama server now? [Y/n]: "
-    )
+    set /p "start_ollama=Start Ollama server now? [Y/n]: "
    if /i "!start_ollama!" NEQ "n" (
        echo 🚀 Starting Ollama server...
        start /b ollama serve
@ -321,12 +273,7 @@ if errorlevel 1 (
    echo    • qwen3:0.6b    - Lightweight and fast (~500MB)  
    echo    • qwen3:4b      - Higher quality but slower (~2.5GB)
    echo.
-    if "!HEADLESS_MODE!"=="true" (
-        echo Headless mode: Skipping model download
-        set "install_model=n"
-    ) else (
-        set /p "install_model=Download qwen3:1.7b model now? [Y/n]: "
-    )
+    set /p "install_model=Download qwen3:1.7b model now? [Y/n]: "
    if /i "!install_model!" NEQ "n" (
        echo 📥 Downloading qwen3:1.7b model...
        echo    This may take 5-10 minutes depending on your internet speed
--- a/mini_rag/init.py
+++ b/mini_rag/init.py
@ -7,16 +7,30 @@ Designed for portability, efficiency, and simplicity across projects and compute

 __version__ = "2.1.0"

+from .ollama_embeddings import OllamaEmbedder as CodeEmbedder
 from .chunker import CodeChunker
 from .indexer import ProjectIndexer
-from .ollama_embeddings import OllamaEmbedder as CodeEmbedder
 from .search import CodeSearcher
 from .watcher import FileWatcher

-__all__ = [
-    "CodeEmbedder",
-    "CodeChunker",
-    "ProjectIndexer",
-    "CodeSearcher",
-    "FileWatcher",
-]
+# Auto-update system (graceful import for legacy versions)
+try:
+    from .updater import UpdateChecker, check_for_updates, get_updater
+    __all__ = [
+        "CodeEmbedder",
+        "CodeChunker", 
+        "ProjectIndexer",
+        "CodeSearcher",
+        "FileWatcher",
+        "UpdateChecker",
+        "check_for_updates", 
+        "get_updater",
+    ]
+except ImportError:
+    __all__ = [
+        "CodeEmbedder",
+        "CodeChunker", 
+        "ProjectIndexer",
+        "CodeSearcher",
+        "FileWatcher",
+    ]
--- a/mini_rag/main.py
+++ b/mini_rag/main.py
@ -2,5 +2,5 @@

 from .cli import cli

-if __name__ == "__main__":
-    cli()
+if __name__ == '__main__':
+    cli()
--- a/mini_rag/auto_optimizer.py
+++ b/mini_rag/auto_optimizer.py
@ -3,188 +3,194 @@ Auto-optimizer for FSS-Mini-RAG.
 Automatically tunes settings based on usage patterns.
 """

-import json
-import logging
-from collections import Counter
 from pathlib import Path
-from typing import Any, Dict
+import json
+from typing import Dict, Any, List
+from collections import Counter
+import logging

 logger = logging.getLogger(__name__)

-
 class AutoOptimizer:
    """Automatically optimizes RAG settings based on project patterns."""
-
+    
    def __init__(self, project_path: Path):
        self.project_path = project_path
-        self.rag_dir = project_path / ".mini-rag"
-        self.config_path = self.rag_dir / "config.json"
-        self.manifest_path = self.rag_dir / "manifest.json"
-
+        self.rag_dir = project_path / '.mini-rag'
+        self.config_path = self.rag_dir / 'config.json'
+        self.manifest_path = self.rag_dir / 'manifest.json'
+    
    def analyze_and_optimize(self) -> Dict[str, Any]:
        """Analyze current patterns and auto-optimize settings."""
-
+        
        if not self.manifest_path.exists():
            return {"error": "No index found - run indexing first"}
-
+        
        # Load current data
        with open(self.manifest_path) as f:
            manifest = json.load(f)
-
+        
        # Analyze patterns
        analysis = self._analyze_patterns(manifest)
-
+        
        # Generate optimizations
        optimizations = self._generate_optimizations(analysis)
-
+        
        # Apply optimizations if beneficial
-        if optimizations["confidence"] > 0.7:
+        if optimizations['confidence'] > 0.7:
            self._apply_optimizations(optimizations)
            return {
                "status": "optimized",
-                "changes": optimizations["changes"],
-                "expected_improvement": optimizations["expected_improvement"],
+                "changes": optimizations['changes'],
+                "expected_improvement": optimizations['expected_improvement']
            }
        else:
            return {
                "status": "no_changes_needed",
                "analysis": analysis,
-                "confidence": optimizations["confidence"],
+                "confidence": optimizations['confidence']
            }
-
+    
    def _analyze_patterns(self, manifest: Dict[str, Any]) -> Dict[str, Any]:
        """Analyze current indexing patterns."""
-        files = manifest.get("files", {})
-
+        files = manifest.get('files', {})
+        
        # Language distribution
        languages = Counter()
        sizes = []
        chunk_ratios = []
-
+        
        for filepath, info in files.items():
-            lang = info.get("language", "unknown")
+            lang = info.get('language', 'unknown')
            languages[lang] += 1
-
-            size = info.get("size", 0)
-            chunks = info.get("chunks", 1)
-
+            
+            size = info.get('size', 0)
+            chunks = info.get('chunks', 1)
+            
            sizes.append(size)
            chunk_ratios.append(chunks / max(1, size / 1000))  # chunks per KB
-
+        
        avg_chunk_ratio = sum(chunk_ratios) / len(chunk_ratios) if chunk_ratios else 1
        avg_size = sum(sizes) / len(sizes) if sizes else 1000
-
+        
        return {
-            "languages": dict(languages.most_common()),
-            "total_files": len(files),
-            "total_chunks": sum(info.get("chunks", 1) for info in files.values()),
-            "avg_chunk_ratio": avg_chunk_ratio,
-            "avg_file_size": avg_size,
-            "large_files": sum(1 for s in sizes if s > 10000),
-            "small_files": sum(1 for s in sizes if s < 500),
+            'languages': dict(languages.most_common()),
+            'total_files': len(files),
+            'total_chunks': sum(info.get('chunks', 1) for info in files.values()),
+            'avg_chunk_ratio': avg_chunk_ratio,
+            'avg_file_size': avg_size,
+            'large_files': sum(1 for s in sizes if s > 10000),
+            'small_files': sum(1 for s in sizes if s < 500)
        }
-
+    
    def _generate_optimizations(self, analysis: Dict[str, Any]) -> Dict[str, Any]:
        """Generate optimization recommendations."""
        changes = []
        confidence = 0.5
        expected_improvement = 0
-
+        
        # Optimize chunking based on dominant language
-        languages = analysis["languages"]
+        languages = analysis['languages']
        if languages:
            dominant_lang, count = list(languages.items())[0]
-            lang_pct = count / analysis["total_files"]
-
+            lang_pct = count / analysis['total_files']
+            
            if lang_pct > 0.3:  # Dominant language >30%
-                if dominant_lang == "python" and analysis["avg_chunk_ratio"] < 1.5:
-                    changes.append(
-                        "Increase Python chunk size to 3000 for better function context"
-                    )
+                if dominant_lang == 'python' and analysis['avg_chunk_ratio'] < 1.5:
+                    changes.append("Increase Python chunk size to 3000 for better function context")
                    confidence += 0.2
                    expected_improvement += 15
-
-                elif dominant_lang == "markdown" and analysis["avg_chunk_ratio"] < 1.2:
+                
+                elif dominant_lang == 'markdown' and analysis['avg_chunk_ratio'] < 1.2:
                    changes.append("Use header-based chunking for Markdown files")
                    confidence += 0.15
                    expected_improvement += 10
-
+        
        # Optimize for large files
-        if analysis["large_files"] > 5:
+        if analysis['large_files'] > 5:
            changes.append("Reduce streaming threshold to 5KB for better large file handling")
            confidence += 0.1
            expected_improvement += 8
-
+        
        # Optimize chunk ratio
-        if analysis["avg_chunk_ratio"] < 1.0:
+        if analysis['avg_chunk_ratio'] < 1.0:
            changes.append("Reduce chunk size for more granular search results")
            confidence += 0.15
            expected_improvement += 12
-        elif analysis["avg_chunk_ratio"] > 3.0:
+        elif analysis['avg_chunk_ratio'] > 3.0:
            changes.append("Increase chunk size to reduce overhead")
            confidence += 0.1
            expected_improvement += 5
-
+        
        # Skip tiny files optimization
-        small_file_pct = analysis["small_files"] / analysis["total_files"]
+        small_file_pct = analysis['small_files'] / analysis['total_files']
        if small_file_pct > 0.3:
            changes.append("Skip files smaller than 300 bytes to improve focus")
            confidence += 0.1
            expected_improvement += 3
-
+        
        return {
-            "changes": changes,
-            "confidence": min(confidence, 1.0),
-            "expected_improvement": expected_improvement,
+            'changes': changes,
+            'confidence': min(confidence, 1.0),
+            'expected_improvement': expected_improvement
        }
-
+    
    def _apply_optimizations(self, optimizations: Dict[str, Any]):
        """Apply the recommended optimizations."""
-
+        
        # Load existing config or create default
        if self.config_path.exists():
            with open(self.config_path) as f:
                config = json.load(f)
        else:
            config = self._get_default_config()
-
-        changes = optimizations["changes"]
-
+        
+        changes = optimizations['changes']
+        
        # Apply changes based on recommendations
        for change in changes:
            if "Python chunk size to 3000" in change:
-                config.setdefault("chunking", {})["max_size"] = 3000
-
+                config.setdefault('chunking', {})['max_size'] = 3000
+                
            elif "header-based chunking" in change:
-                config.setdefault("chunking", {})["strategy"] = "header"
-
+                config.setdefault('chunking', {})['strategy'] = 'header'
+                
            elif "streaming threshold to 5KB" in change:
-                config.setdefault("streaming", {})["threshold_bytes"] = 5120
-
+                config.setdefault('streaming', {})['threshold_bytes'] = 5120
+                
            elif "Reduce chunk size" in change:
-                current_size = config.get("chunking", {}).get("max_size", 2000)
-                config.setdefault("chunking", {})["max_size"] = max(1500, current_size - 500)
-
+                current_size = config.get('chunking', {}).get('max_size', 2000)
+                config.setdefault('chunking', {})['max_size'] = max(1500, current_size - 500)
+                
            elif "Increase chunk size" in change:
-                current_size = config.get("chunking", {}).get("max_size", 2000)
-                config.setdefault("chunking", {})["max_size"] = min(4000, current_size + 500)
-
+                current_size = config.get('chunking', {}).get('max_size', 2000)
+                config.setdefault('chunking', {})['max_size'] = min(4000, current_size + 500)
+                
            elif "Skip files smaller" in change:
-                config.setdefault("files", {})["min_file_size"] = 300
-
+                config.setdefault('files', {})['min_file_size'] = 300
+        
        # Save optimized config
-        config["_auto_optimized"] = True
-        config["_optimization_timestamp"] = json.dumps(None, default=str)
-
-        with open(self.config_path, "w") as f:
+        config['_auto_optimized'] = True
+        config['_optimization_timestamp'] = json.dumps(None, default=str)
+        
+        with open(self.config_path, 'w') as f:
            json.dump(config, f, indent=2)
-
+        
        logger.info(f"Applied {len(changes)} optimizations to {self.config_path}")
-
+    
    def _get_default_config(self) -> Dict[str, Any]:
        """Get default configuration."""
        return {
-            "chunking": {"max_size": 2000, "min_size": 150, "strategy": "semantic"},
-            "streaming": {"enabled": True, "threshold_bytes": 1048576},
-            "files": {"min_file_size": 50},
-        }
+            "chunking": {
+                "max_size": 2000,
+                "min_size": 150,
+                "strategy": "semantic"
+            },
+            "streaming": {
+                "enabled": True,
+                "threshold_bytes": 1048576
+            },
+            "files": {
+                "min_file_size": 50
+            }
+        }
--- a/mini_rag/chunker.py
+++ b/mini_rag/chunker.py
--- a/mini_rag/cli.py
+++ b/mini_rag/cli.py
@ -3,57 +3,59 @@ Command-line interface for Mini RAG system.
 Beautiful, intuitive, and highly effective.
 """

-import logging
+import click
 import sys
 import time
+import logging
 from pathlib import Path
 from typing import Optional

-import click
+# Fix Windows console for proper emoji/Unicode support
+from .windows_console_fix import fix_windows_console
+fix_windows_console()
+
 from rich.console import Console
-from rich.logging import RichHandler
-from rich.panel import Panel
-from rich.progress import Progress, SpinnerColumn, TextColumn
-from rich.syntax import Syntax
 from rich.table import Table
+from rich.progress import Progress, SpinnerColumn, TextColumn
+from rich.logging import RichHandler
+from rich.syntax import Syntax
+from rich.panel import Panel
+from rich import print as rprint

 from .indexer import ProjectIndexer
+from .search import CodeSearcher
+from .watcher import FileWatcher
 from .non_invasive_watcher import NonInvasiveFileWatcher
 from .ollama_embeddings import OllamaEmbedder as CodeEmbedder
+from .chunker import CodeChunker
 from .performance import get_monitor
-from .search import CodeSearcher
-from .server import RAGClient, start_server
-from .windows_console_fix import fix_windows_console
-
-# Fix Windows console for proper emoji/Unicode support
-fix_windows_console()
+from .server import RAGClient
+from .server import RAGServer, RAGClient, start_server

 # Set up logging
 logging.basicConfig(
    level=logging.INFO,
    format="%(message)s",
-    handlers=[RichHandler(rich_tracebacks=True)],
+    handlers=[RichHandler(rich_tracebacks=True)]
 )
 logger = logging.getLogger(__name__)
 console = Console()


@click.group()
-@click.option("--verbose", "-v", is_flag=True, help="Enable verbose logging")
-@click.option("--quiet", "-q", is_flag=True, help="Suppress output")
+@click.option('--verbose', '-v', is_flag=True, help='Enable verbose logging')
+@click.option('--quiet', '-q', is_flag=True, help='Suppress output')
 def cli(verbose: bool, quiet: bool):
    """
    Mini RAG - Fast semantic code search that actually works.
-
-    A local RAG system for improving the development environment's grounding
-    capabilities.
+    
+    A local RAG system for improving the development environment's grounding capabilities.
    Indexes your codebase and enables lightning-fast semantic search.
    """
    # Check virtual environment
    from .venv_checker import check_and_warn_venv
-
    check_and_warn_venv("rag-mini", force_exit=False)
-
+    
    if verbose:
        logging.getLogger().setLevel(logging.DEBUG)
    elif quiet:
@ -61,45 +63,43 @@ def cli(verbose: bool, quiet: bool):


@cli.command()
-@click.option(
-    "--path",
-    "-p",
-    type=click.Path(exists=True),
-    default=".",
-    help="Project path to index",
-)
-@click.option("--force", "-", is_flag=True, help="Force reindex all files")
-@click.option("--reindex", "-r", is_flag=True, help="Force complete reindex (same as --force)")
-@click.option("--model", "-m", type=str, default=None, help="Embedding model to use")
+@click.option('--path', '-p', type=click.Path(exists=True), default='.', 
+              help='Project path to index')
+@click.option('--force', '-f', is_flag=True, 
+              help='Force reindex all files')
+@click.option('--reindex', '-r', is_flag=True, 
+              help='Force complete reindex (same as --force)')
+@click.option('--model', '-m', type=str, default=None,
+              help='Embedding model to use')
 def init(path: str, force: bool, reindex: bool, model: Optional[str]):
    """Initialize RAG index for a project."""
    project_path = Path(path).resolve()
-
+    
    console.print(f"\n[bold cyan]Initializing Mini RAG for:[/bold cyan] {project_path}\n")
-
+    
    # Check if already initialized
-    rag_dir = project_path / ".mini-rag"
+    rag_dir = project_path / '.mini-rag'
    force_reindex = force or reindex
    if rag_dir.exists() and not force_reindex:
        console.print("[yellow][/yellow]  Project already initialized!")
        console.print("Use --force or --reindex to reindex all files\n")
-
+        
        # Show current stats
        indexer = ProjectIndexer(project_path)
        stats = indexer.get_statistics()
-
+        
        table = Table(title="Current Index Statistics")
        table.add_column("Metric", style="cyan")
        table.add_column("Value", style="green")
-
-        table.add_row("Files Indexed", str(stats["file_count"]))
-        table.add_row("Total Chunks", str(stats["chunk_count"]))
+        
+        table.add_row("Files Indexed", str(stats['file_count']))
+        table.add_row("Total Chunks", str(stats['chunk_count']))
        table.add_row("Index Size", f"{stats['index_size_mb']:.2f} MB")
-        table.add_row("Last Updated", stats["indexed_at"] or "Never")
-
+        table.add_row("Last Updated", stats['indexed_at'] or "Never")
+        
        console.print(table)
        return
-
+    
    # Initialize components
    try:
        with Progress(
@ -111,33 +111,34 @@ def init(path: str, force: bool, reindex: bool, model: Optional[str]):
            task = progress.add_task("[cyan]Loading embedding model...", total=None)
            embedder = CodeEmbedder(model_name=model)
            progress.update(task, completed=True)
-
+            
            # Create indexer
            task = progress.add_task("[cyan]Creating indexer...", total=None)
-            indexer = ProjectIndexer(project_path, embedder=embedder)
+            indexer = ProjectIndexer(
+                project_path,
+                embedder=embedder
+            )
            progress.update(task, completed=True)
-
+        
        # Run indexing
        console.print("\n[bold green]Starting indexing...[/bold green]\n")
        stats = indexer.index_project(force_reindex=force_reindex)
-
+        
        # Show summary
-        if stats["files_indexed"] > 0:
-            console.print(
-                f"\n[bold green] Success![/bold green] Indexed {stats['files_indexed']} files"
-            )
+        if stats['files_indexed'] > 0:
+            console.print(f"\n[bold green] Success![/bold green] Indexed {stats['files_indexed']} files")
            console.print(f"Created {stats['chunks_created']} searchable chunks")
            console.print(f"Time: {stats['time_taken']:.2f} seconds")
            console.print(f"Speed: {stats['files_per_second']:.1f} files/second")
        else:
            console.print("\n[green] All files are already up to date![/green]")
-
+        
        # Show how to use
        console.print("\n[bold]Next steps:[/bold]")
-        console.print('  • Search your code: [cyan]rag-mini search "your query"[/cyan]')
+        console.print("  • Search your code: [cyan]rag-mini search \"your query\"[/cyan]")
        console.print("  • Watch for changes: [cyan]rag-mini watch[/cyan]")
        console.print("  • View statistics: [cyan]rag-mini stats[/cyan]\n")
-
+        
    except Exception as e:
        console.print(f"\n[bold red]Error:[/bold red] {e}")
        logger.exception("Initialization failed")
@ -145,71 +146,64 @@ def init(path: str, force: bool, reindex: bool, model: Optional[str]):


@cli.command()
-@click.argument("query")
-@click.option("--path", "-p", type=click.Path(exists=True), default=".", help="Project path")
-@click.option("--top-k", "-k", type=int, default=10, help="Maximum results to show")
-@click.option(
-    "--type", "-t", multiple=True, help="Filter by chunk type (function, class, method)"
-)
-@click.option("--lang", multiple=True, help="Filter by language (python, javascript, etc.)")
-@click.option("--show-content", "-c", is_flag=True, help="Show code content in results")
-@click.option("--show-per", is_flag=True, help="Show performance metrics")
-def search(
-    query: str,
-    path: str,
-    top_k: int,
-    type: tuple,
-    lang: tuple,
-    show_content: bool,
-    show_perf: bool,
-):
+@click.argument('query')
+@click.option('--path', '-p', type=click.Path(exists=True), default='.',
+              help='Project path')
+@click.option('--top-k', '-k', type=int, default=10,
+              help='Maximum results to show')
+@click.option('--type', '-t', multiple=True,
+              help='Filter by chunk type (function, class, method)')
+@click.option('--lang', multiple=True,
+              help='Filter by language (python, javascript, etc.)')
+@click.option('--show-content', '-c', is_flag=True,
+              help='Show code content in results')
+@click.option('--show-perf', is_flag=True,
+              help='Show performance metrics')
+def search(query: str, path: str, top_k: int, type: tuple, lang: tuple, show_content: bool, show_perf: bool):
    """Search codebase using semantic similarity."""
    project_path = Path(path).resolve()
-
+    
    # Check if indexed
-    rag_dir = project_path / ".mini-rag"
+    rag_dir = project_path / '.mini-rag'
    if not rag_dir.exists():
        console.print("[red]Error:[/red] Project not indexed. Run 'rag-mini init' first.")
        sys.exit(1)
-
+    
    # Get performance monitor
    monitor = get_monitor() if show_perf else None
-
+    
    # Check if server is running
    client = RAGClient()
    use_server = client.is_running()
-
+    
    try:
        if use_server:
            # Use server for fast queries
            console.print("[dim]Using RAG server...[/dim]")
-
+            
            response = client.search(query, top_k=top_k)
-
-            if response.get("success"):
+            
+            if response.get('success'):
                # Convert response to SearchResult objects
                from .search import SearchResult
-
                results = []
-                for r in response["results"]:
+                for r in response['results']:
                    result = SearchResult(
-                        file_path=r["file_path"],
-                        content=r["content"],
-                        score=r["score"],
-                        start_line=r["start_line"],
-                        end_line=r["end_line"],
-                        chunk_type=r["chunk_type"],
-                        name=r["name"],
-                        language=r["language"],
+                        file_path=r['file_path'],
+                        content=r['content'],
+                        score=r['score'],
+                        start_line=r['start_line'],
+                        end_line=r['end_line'],
+                        chunk_type=r['chunk_type'],
+                        name=r['name'],
+                        language=r['language']
                    )
                    results.append(result)
-
+                
                # Show server stats
-                search_time = response.get("search_time_ms", 0)
-                total_queries = response.get("total_queries", 0)
-                console.print(
-                    f"[dim]Search time: {search_time}ms (Query #{total_queries})[/dim]\n"
-                )
+                search_time = response.get('search_time_ms', 0)
+                total_queries = response.get('total_queries', 0)
+                console.print(f"[dim]Search time: {search_time}ms (Query #{total_queries})[/dim]\n")
            else:
                console.print(f"[red]Server error:[/red] {response.get('error')}")
                sys.exit(1)
@ -221,7 +215,7 @@ def search(
                    searcher = CodeSearcher(project_path)
            else:
                searcher = CodeSearcher(project_path)
-
+            
            # Perform search with timing
            if monitor:
                with monitor.measure("Execute Vector Search"):
@ -229,7 +223,7 @@ def search(
                        query,
                        top_k=top_k,
                        chunk_types=list(type) if type else None,
-                        languages=list(lang) if lang else None,
+                        languages=list(lang) if lang else None
                    )
            else:
                with console.status(f"[cyan]Searching for: {query}[/cyan]"):
@ -237,9 +231,9 @@ def search(
                        query,
                        top_k=top_k,
                        chunk_types=list(type) if type else None,
-                        languages=list(lang) if lang else None,
+                        languages=list(lang) if lang else None
                    )
-
+        
        # Display results
        if results:
            if use_server:
@ -249,30 +243,27 @@ def search(
                display_searcher.display_results(results, show_content=show_content)
            else:
                searcher.display_results(results, show_content=show_content)
-
+            
            # Copy first result to clipboard if available
            try:
                import pyperclip
-
                first_result = results[0]
                location = f"{first_result.file_path}:{first_result.start_line}"
                pyperclip.copy(location)
-                console.print(
-                    f"\n[dim]First result location copied to clipboard: {location}[/dim]"
-                )
-            except (ImportError, OSError):
-                pass  # Clipboard not available
+                console.print(f"\n[dim]First result location copied to clipboard: {location}[/dim]")
+            except:
+                pass
        else:
            console.print(f"\n[yellow]No results found for: {query}[/yellow]")
            console.print("\n[dim]Tips:[/dim]")
            console.print("  • Try different keywords")
            console.print("  • Use natural language queries")
-
+        
        # Show performance summary
        if monitor:
            monitor.print_summary()
            console.print("  • Check if files are indexed with 'mini-rag stats'")
-
+        
    except Exception as e:
        console.print(f"\n[bold red]Search error:[/bold red] {e}")
        logger.exception("Search failed")
@ -280,69 +271,68 @@ def search(


@cli.command()
-@click.option("--path", "-p", type=click.Path(exists=True), default=".", help="Project path")
+@click.option('--path', '-p', type=click.Path(exists=True), default='.',
+              help='Project path')
 def stats(path: str):
    """Show index statistics."""
    project_path = Path(path).resolve()
-
+    
    # Check if indexed
-    rag_dir = project_path / ".mini-rag"
+    rag_dir = project_path / '.mini-rag'
    if not rag_dir.exists():
        console.print("[red]Error:[/red] Project not indexed. Run 'rag-mini init' first.")
        sys.exit(1)
-
+    
    try:
        # Get statistics
        indexer = ProjectIndexer(project_path)
        index_stats = indexer.get_statistics()
-
+        
        searcher = CodeSearcher(project_path)
        search_stats = searcher.get_statistics()
-
+        
        # Display project info
        console.print(f"\n[bold cyan]Project:[/bold cyan] {project_path.name}")
        console.print(f"[dim]Path: {project_path}[/dim]\n")
-
+        
        # Index statistics table
        table = Table(title="Index Statistics")
        table.add_column("Metric", style="cyan")
        table.add_column("Value", style="green")
-
-        table.add_row("Files Indexed", str(index_stats["file_count"]))
-        table.add_row("Total Chunks", str(index_stats["chunk_count"]))
+        
+        table.add_row("Files Indexed", str(index_stats['file_count']))
+        table.add_row("Total Chunks", str(index_stats['chunk_count']))
        table.add_row("Index Size", f"{index_stats['index_size_mb']:.2f} MB")
-        table.add_row("Last Updated", index_stats["indexed_at"] or "Never")
-
+        table.add_row("Last Updated", index_stats['indexed_at'] or "Never")
+        
        console.print(table)
-
+        
        # Language distribution
-        if "languages" in search_stats:
+        if 'languages' in search_stats:
            console.print("\n[bold]Language Distribution:[/bold]")
            lang_table = Table()
            lang_table.add_column("Language", style="cyan")
            lang_table.add_column("Chunks", style="green")
-
-            for lang, count in sorted(
-                search_stats["languages"].items(), key=lambda x: x[1], reverse=True
-            ):
+            
+            for lang, count in sorted(search_stats['languages'].items(), 
+                                     key=lambda x: x[1], reverse=True):
                lang_table.add_row(lang, str(count))
-
+            
            console.print(lang_table)
-
+        
        # Chunk type distribution
-        if "chunk_types" in search_stats:
+        if 'chunk_types' in search_stats:
            console.print("\n[bold]Chunk Types:[/bold]")
            type_table = Table()
            type_table.add_column("Type", style="cyan")
            type_table.add_column("Count", style="green")
-
-            for chunk_type, count in sorted(
-                search_stats["chunk_types"].items(), key=lambda x: x[1], reverse=True
-            ):
+            
+            for chunk_type, count in sorted(search_stats['chunk_types'].items(),
+                                           key=lambda x: x[1], reverse=True):
                type_table.add_row(chunk_type, str(count))
-
+            
            console.print(type_table)
-
+        
    except Exception as e:
        console.print(f"\n[bold red]Error:[/bold red] {e}")
        logger.exception("Failed to get statistics")
@ -350,116 +340,101 @@ def stats(path: str):


@cli.command()
-@click.option("--path", "-p", type=click.Path(exists=True), default=".", help="Project path")
+@click.option('--path', '-p', type=click.Path(exists=True), default='.',
+              help='Project path')
 def debug_schema(path: str):
    """Debug vector database schema and sample data."""
    project_path = Path(path).resolve()
-
+    
    try:
-        rag_dir = project_path / ".mini-rag"
-
+        rag_dir = project_path / '.mini-rag'
+        
        if not rag_dir.exists():
            console.print("[red]No RAG index found. Run 'rag-mini init' first.[/red]")
            return
-
+        
        # Connect to database
        try:
            import lancedb
        except ImportError:
-            console.print(
-                "[red]LanceDB not available. Install with: pip install lancedb pyarrow[/red]"
-            )
+            console.print("[red]LanceDB not available. Install with: pip install lancedb pyarrow[/red]")
            return
-
+        
        db = lancedb.connect(rag_dir)
-
+        
        if "code_vectors" not in db.table_names():
            console.print("[red]No code_vectors table found.[/red]")
            return
-
+        
        table = db.open_table("code_vectors")
-
+        
        # Print schema
        console.print("\n[bold cyan] Table Schema:[/bold cyan]")
        console.print(table.schema)
-
+        
        # Get sample data
-
+        import pandas as pd
        df = table.to_pandas()
-        console.print("\n[bold cyan] Table Statistics:[/bold cyan]")
+        console.print(f"\n[bold cyan] Table Statistics:[/bold cyan]")
        console.print(f"Total rows: {len(df)}")
-
+        
        if len(df) > 0:
            # Check embedding column
-            console.print("\n[bold cyan] Embedding Column Analysis:[/bold cyan]")
-            first_embedding = df["embedding"].iloc[0]
+            console.print(f"\n[bold cyan] Embedding Column Analysis:[/bold cyan]")
+            first_embedding = df['embedding'].iloc[0]
            console.print(f"Type: {type(first_embedding)}")
-            if hasattr(first_embedding, "shape"):
+            if hasattr(first_embedding, 'shape'):
                console.print(f"Shape: {first_embedding.shape}")
-            if hasattr(first_embedding, "dtype"):
+            if hasattr(first_embedding, 'dtype'):
                console.print(f"Dtype: {first_embedding.dtype}")
-
+            
            # Show first few rows
-            console.print("\n[bold cyan] Sample Data (first 3 rows):[/bold cyan]")
+            console.print(f"\n[bold cyan] Sample Data (first 3 rows):[/bold cyan]")
            for i in range(min(3, len(df))):
                row = df.iloc[i]
                console.print(f"\n[yellow]Row {i}:[/yellow]")
                console.print(f"  chunk_id: {row['chunk_id']}")
                console.print(f"  file_path: {row['file_path']}")
                console.print(f"  content: {row['content'][:50]}...")
-                embed_len = (
-                    len(row["embedding"])
-                    if hasattr(row["embedding"], "__len__")
-                    else "unknown"
-                )
-                console.print(f"  embedding: {type(row['embedding'])} of length {embed_len}")
-
+                console.print(f"  embedding: {type(row['embedding'])} of length {len(row['embedding']) if hasattr(row['embedding'], '__len__') else 'unknown'}")
+        
    except Exception as e:
        logger.error(f"Schema debug failed: {e}")
        console.print(f"[red]Error: {e}[/red]")


@cli.command()
-@click.option("--path", "-p", type=click.Path(exists=True), default=".", help="Project path")
-@click.option(
-    "--delay",
-    "-d",
-    type=float,
-    default=10.0,
-    help="Update delay in seconds (default: 10s for non-invasive)",
-)
-@click.option(
-    "--silent",
-    "-s",
-    is_flag=True,
-    default=False,
-    help="Run silently in background without output",
-)
+@click.option('--path', '-p', type=click.Path(exists=True), default='.',
+              help='Project path')
+@click.option('--delay', '-d', type=float, default=10.0,
+              help='Update delay in seconds (default: 10s for non-invasive)')
+@click.option('--silent', '-s', is_flag=True, default=False,
+              help='Run silently in background without output')
 def watch(path: str, delay: float, silent: bool):
    """Watch for file changes and update index automatically (non-invasive by default)."""
    project_path = Path(path).resolve()
-
+    
    # Check if indexed
-    rag_dir = project_path / ".mini-rag"
+    rag_dir = project_path / '.mini-rag'
    if not rag_dir.exists():
        if not silent:
            console.print("[red]Error:[/red] Project not indexed. Run 'rag-mini init' first.")
        sys.exit(1)
-
+    
    try:
        # Always use non-invasive watcher
        watcher = NonInvasiveFileWatcher(project_path)
-
+        
        # Only show startup messages if not silent
        if not silent:
            console.print(f"\n[bold green]🕊️ Non-Invasive Watcher:[/bold green] {project_path}")
            console.print("[dim]Low CPU/memory usage - won't interfere with development[/dim]")
            console.print(f"[dim]Update delay: {delay}s[/dim]")
            console.print("\n[yellow]Press Ctrl+C to stop watching[/yellow]\n")
-
+        
        # Start watching
        watcher.start()
-
+        
        if silent:
            # Silent mode: just wait for interrupt without any output
            try:
@ -473,10 +448,10 @@ def watch(path: str, delay: float, silent: bool):
            while True:
                try:
                    time.sleep(1)
-
+                    
                    # Get current statistics
                    stats = watcher.get_statistics()
-
+                    
                    # Only update display if something changed
                    if stats != last_stats:
                        # Clear previous line
@ -484,28 +459,26 @@ def watch(path: str, delay: float, silent: bool):
                            f"\r[green]✓[/green] Files updated: {stats.get('files_processed', 0)} | "
                            f"[red]✗[/red] Failed: {stats.get('files_dropped', 0)} | "
                            f"[cyan]⧗[/cyan] Queue: {stats['queue_size']}",
-                            end="",
+                            end=""
                        )
                        last_stats = stats
-
+                    
                except KeyboardInterrupt:
                    break
-
+        
        # Stop watcher
        if not silent:
            console.print("\n\n[yellow]Stopping watcher...[/yellow]")
        watcher.stop()
-
+        
        # Show final stats only if not silent
        if not silent:
            final_stats = watcher.get_statistics()
-            console.print("\n[bold green]Watch Summary:[/bold green]")
+            console.print(f"\n[bold green]Watch Summary:[/bold green]")
            console.print(f"Files updated: {final_stats.get('files_processed', 0)}")
            console.print(f"Files failed: {final_stats.get('files_dropped', 0)}")
-            console.print(
-                f"Total runtime: {final_stats.get('uptime_seconds', 0):.1f} seconds\n"
-            )
-
+            console.print(f"Total runtime: {final_stats.get('uptime_seconds', 0):.1f} seconds\n")
+        
    except Exception as e:
        console.print(f"\n[bold red]Error:[/bold red] {e}")
        logger.exception("Watch failed")
@ -513,81 +486,86 @@ def watch(path: str, delay: float, silent: bool):


@cli.command()
-@click.argument("function_name")
-@click.option("--path", "-p", type=click.Path(exists=True), default=".", help="Project path")
-@click.option("--top-k", "-k", type=int, default=5, help="Maximum results")
+@click.argument('function_name')
+@click.option('--path', '-p', type=click.Path(exists=True), default='.',
+              help='Project path')
+@click.option('--top-k', '-k', type=int, default=5,
+              help='Maximum results')
 def find_function(function_name: str, path: str, top_k: int):
    """Find a specific function by name."""
    project_path = Path(path).resolve()
-
+    
    try:
        searcher = CodeSearcher(project_path)
        results = searcher.get_function(function_name, top_k=top_k)
-
+        
        if results:
            searcher.display_results(results, show_content=True)
        else:
            console.print(f"[yellow]No functions found matching: {function_name}[/yellow]")
-
+            
    except Exception as e:
        console.print(f"[red]Error:[/red] {e}")
        sys.exit(1)


@cli.command()
-@click.argument("class_name")
-@click.option("--path", "-p", type=click.Path(exists=True), default=".", help="Project path")
-@click.option("--top-k", "-k", type=int, default=5, help="Maximum results")
+@click.argument('class_name')
+@click.option('--path', '-p', type=click.Path(exists=True), default='.',
+              help='Project path')
+@click.option('--top-k', '-k', type=int, default=5,
+              help='Maximum results')
 def find_class(class_name: str, path: str, top_k: int):
    """Find a specific class by name."""
    project_path = Path(path).resolve()
-
+    
    try:
        searcher = CodeSearcher(project_path)
        results = searcher.get_class(class_name, top_k=top_k)
-
+        
        if results:
            searcher.display_results(results, show_content=True)
        else:
            console.print(f"[yellow]No classes found matching: {class_name}[/yellow]")
-
+            
    except Exception as e:
        console.print(f"[red]Error:[/red] {e}")
        sys.exit(1)


@cli.command()
-@click.option("--path", "-p", type=click.Path(exists=True), default=".", help="Project path")
+@click.option('--path', '-p', type=click.Path(exists=True), default='.',
+              help='Project path')
 def update(path: str):
    """Update index for changed files."""
    project_path = Path(path).resolve()
-
+    
    # Check if indexed
-    rag_dir = project_path / ".mini-rag"
+    rag_dir = project_path / '.mini-rag'
    if not rag_dir.exists():
        console.print("[red]Error:[/red] Project not indexed. Run 'rag-mini init' first.")
        sys.exit(1)
-
+    
    try:
        indexer = ProjectIndexer(project_path)
-
+        
        console.print(f"\n[cyan]Checking for changes in {project_path}...[/cyan]\n")
-
+        
        stats = indexer.index_project(force_reindex=False)
-
-        if stats["files_indexed"] > 0:
+        
+        if stats['files_indexed'] > 0:
            console.print(f"[green][/green] Updated {stats['files_indexed']} files")
            console.print(f"Created {stats['chunks_created']} new chunks")
        else:
            console.print("[green] All files are up to date![/green]")
-
+            
    except Exception as e:
        console.print(f"[red]Error:[/red] {e}")
        sys.exit(1)


@cli.command()
-@click.option("--show-code", "-c", is_flag=True, help="Show example code")
+@click.option('--show-code', '-c', is_flag=True, help='Show example code')
 def info(show_code: bool):
    """Show information about Mini RAG."""
    # Create info panel
@ -612,13 +590,13 @@ def info(show_code: bool):
 • Search: <50ms latency
 • Storage: ~200MB for 10k files
 """
-
+    
    panel = Panel(info_text, title="About Mini RAG", border_style="cyan")
    console.print(panel)
-
+    
    if show_code:
        console.print("\n[bold]Example Usage:[/bold]\n")
-
+        
        code = """# Initialize a project
 rag-mini init

@ -635,30 +613,32 @@ rag-mini watch

 # Get statistics
 rag-mini stats"""
-
+        
        syntax = Syntax(code, "bash", theme="monokai")
        console.print(syntax)


@cli.command()
-@click.option("--path", "-p", type=click.Path(exists=True), default=".", help="Project path")
-@click.option("--port", type=int, default=7777, help="Server port")
+@click.option('--path', '-p', type=click.Path(exists=True), default='.',
+              help='Project path')
+@click.option('--port', type=int, default=7777,
+              help='Server port')
 def server(path: str, port: int):
    """Start persistent RAG server (keeps model loaded)."""
    project_path = Path(path).resolve()
-
+    
    # Check if indexed
-    rag_dir = project_path / ".mini-rag"
+    rag_dir = project_path / '.mini-rag'
    if not rag_dir.exists():
        console.print("[red]Error:[/red] Project not indexed. Run 'rag-mini init' first.")
        sys.exit(1)
-
+    
    try:
        console.print(f"[bold cyan]Starting RAG server for:[/bold cyan] {project_path}")
        console.print(f"[dim]Port: {port}[/dim]\n")
-
+        
        start_server(project_path, port)
-
+        
    except KeyboardInterrupt:
        console.print("\n[yellow]Server stopped by user[/yellow]")
    except Exception as e:
@ -668,67 +648,65 @@ def server(path: str, port: int):


@cli.command()
-@click.option("--path", "-p", type=click.Path(exists=True), default=".", help="Project path")
-@click.option("--port", type=int, default=7777, help="Server port")
-@click.option("--discovery", "-d", is_flag=True, help="Run codebase discovery analysis")
+@click.option('--path', '-p', type=click.Path(exists=True), default='.',
+              help='Project path')
+@click.option('--port', type=int, default=7777,
+              help='Server port')
+@click.option('--discovery', '-d', is_flag=True,
+              help='Run codebase discovery analysis')
 def status(path: str, port: int, discovery: bool):
    """Show comprehensive RAG system status with optional codebase discovery."""
    project_path = Path(path).resolve()
-
+    
    # Print header
    console.print(f"\n[bold cyan]RAG System Status for:[/bold cyan] {project_path.name}")
    console.print(f"[dim]Path: {project_path}[/dim]\n")
-
+    
    # Check folder contents
    console.print("[bold]📁 Folder Contents:[/bold]")
    try:
        all_files = list(project_path.rglob("*"))
-        source_files = [
-            f
-            for f in all_files
-            if f.is_file()
-            and f.suffix in [".py", ".js", ".ts", ".go", ".java", ".cpp", ".c", ".h"]
-        ]
-
+        source_files = [f for f in all_files if f.is_file() and f.suffix in ['.py', '.js', '.ts', '.go', '.java', '.cpp', '.c', '.h']]
+        
        console.print(f"   • Total files: {len([f for f in all_files if f.is_file()])}")
        console.print(f"   • Source files: {len(source_files)}")
        console.print(f"   • Directories: {len([f for f in all_files if f.is_dir()])}")
    except Exception as e:
        console.print(f"   [red]Error reading folder: {e}[/red]")
-
+    
    # Check index status
    console.print("\n[bold]🗂️ Index Status:[/bold]")
-    rag_dir = project_path / ".mini-rag"
+    rag_dir = project_path / '.mini-rag'
    if rag_dir.exists():
        try:
            indexer = ProjectIndexer(project_path)
            index_stats = indexer.get_statistics()
-
-            console.print("   • Status: [green]✅ Indexed[/green]")
+            
+            console.print(f"   • Status: [green]✅ Indexed[/green]")
            console.print(f"   • Files indexed: {index_stats['file_count']}")
            console.print(f"   • Total chunks: {index_stats['chunk_count']}")
            console.print(f"   • Index size: {index_stats['index_size_mb']:.2f} MB")
            console.print(f"   • Last updated: {index_stats['indexed_at'] or 'Never'}")
        except Exception as e:
-            console.print("   • Status: [yellow]⚠️ Index exists but has issues[/yellow]")
+            console.print(f"   • Status: [yellow]⚠️ Index exists but has issues[/yellow]")
            console.print(f"   • Error: {e}")
    else:
        console.print("   • Status: [red]❌ Not indexed[/red]")
        console.print("   • Run 'rag-mini init' to initialize")
-
+    
    # Check server status
    console.print("\n[bold]🚀 Server Status:[/bold]")
    client = RAGClient(port)
-
+    
    if client.is_running():
        console.print(f"   • Status: [green]✅ Running on port {port}[/green]")
-
+        
        # Try to get server info
        try:
            response = client.search("test", top_k=1)  # Minimal query to get stats
-            if response.get("success"):
-                uptime = response.get("server_uptime", 0)
-                queries = response.get("total_queries", 0)
+            if response.get('success'):
+                uptime = response.get('server_uptime', 0)
+                queries = response.get('total_queries', 0)
                console.print(f"   • Uptime: {uptime}s")
                console.print(f"   • Total queries: {queries}")
        except Exception as e:
@ -736,51 +714,47 @@ def status(path: str, port: int, discovery: bool):
    else:
        console.print(f"   • Status: [red]❌ Not running on port {port}[/red]")
        console.print("   • Run 'rag-mini server' to start the server")
-
+    
    # Run codebase discovery if requested
    if discovery and rag_dir.exists():
        console.print("\n[bold]🧠 Codebase Discovery:[/bold]")
        try:
            # Import and run intelligent discovery
            import sys
-
-            # Add tools directory to path
+            
+            # Add tools directory to path  
            tools_path = Path(__file__).parent.parent.parent / "tools"
            if tools_path.exists():
                sys.path.insert(0, str(tools_path))
                from intelligent_codebase_discovery import IntelligentCodebaseDiscovery
-
+                
                discovery_system = IntelligentCodebaseDiscovery(project_path)
                discovery_system.run_lightweight_discovery()
            else:
                console.print("   [yellow]Discovery system not found[/yellow]")
-
+                
        except Exception as e:
            console.print(f"   [red]Discovery failed: {e}[/red]")
-
+    
    elif discovery and not rag_dir.exists():
        console.print("\n[bold]🧠 Codebase Discovery:[/bold]")
        console.print("   [yellow]❌ Cannot run discovery - project not indexed[/yellow]")
        console.print("   Run 'rag-mini init' first to initialize the system")
-
+    
    # Show next steps
    console.print("\n[bold]📋 Next Steps:[/bold]")
    if not rag_dir.exists():
        console.print("   1. Run [cyan]rag-mini init[/cyan] to initialize the RAG system")
-        console.print('   2. Use [cyan]rag-mini search "your query"[/cyan] to search code')
+        console.print("   2. Use [cyan]rag-mini search \"your query\"[/cyan] to search code")
    elif not client.is_running():
        console.print("   1. Run [cyan]rag-mini server[/cyan] to start the server")
-        console.print('   2. Use [cyan]rag-mini search "your query"[/cyan] to search code')
+        console.print("   2. Use [cyan]rag-mini search \"your query\"[/cyan] to search code")
    else:
-        console.print(
-            '   • System ready! Use [cyan]rag-mini search "your query"[/cyan] to search'
-        )
-        console.print(
-            "   • Add [cyan]--discovery[/cyan] flag to run intelligent codebase analysis"
-        )
-
+        console.print("   • System ready! Use [cyan]rag-mini search \"your query\"[/cyan] to search")
+        console.print("   • Add [cyan]--discovery[/cyan] flag to run intelligent codebase analysis")
+    
    console.print()


-if __name__ == "__main__":
-    cli()
+if __name__ == '__main__':
+    cli()
--- a/mini_rag/config.py
+++ b/mini_rag/config.py
@ -3,14 +3,11 @@ Configuration management for FSS-Mini-RAG.
 Handles loading, saving, and validation of YAML config files.
 """

-import logging
-import re
-from dataclasses import asdict, dataclass
-from pathlib import Path
-from typing import Any, Dict, List, Optional
-
 import yaml
-import requests
+import logging
+from pathlib import Path
+from typing import Dict, Any, Optional
+from dataclasses import dataclass, asdict

 logger = logging.getLogger(__name__)

@ -18,7 +15,6 @@ logger = logging.getLogger(__name__)
@dataclass
 class ChunkingConfig:
    """Configuration for text chunking."""
-
    max_size: int = 2000
    min_size: int = 150
    strategy: str = "semantic"  # "semantic" or "fixed"
@ -27,7 +23,6 @@ class ChunkingConfig:
@dataclass
 class StreamingConfig:
    """Configuration for large file streaming."""
-
    enabled: bool = True
    threshold_bytes: int = 1048576  # 1MB

@ -35,22 +30,21 @@ class StreamingConfig:
@dataclass
 class FilesConfig:
    """Configuration for file processing."""
-
    min_file_size: int = 50
    exclude_patterns: list = None
    include_patterns: list = None
-
+    
    def __post_init__(self):
        if self.exclude_patterns is None:
            self.exclude_patterns = [
                "node_modules/**",
-                ".git/**",
+                ".git/**", 
                "__pycache__/**",
                "*.pyc",
                ".venv/**",
                "venv/**",
                "build/**",
-                "dist/**",
+                "dist/**"
            ]
        if self.include_patterns is None:
            self.include_patterns = ["**/*"]  # Include everything by default
@ -59,7 +53,6 @@ class FilesConfig:
@dataclass
 class EmbeddingConfig:
    """Configuration for embedding generation."""
-
    preferred_method: str = "ollama"  # "ollama", "ml", "hash", "auto"
    ollama_model: str = "nomic-embed-text"
    ollama_host: str = "localhost:11434"
@ -70,51 +63,52 @@ class EmbeddingConfig:
@dataclass
 class SearchConfig:
    """Configuration for search behavior."""
-
    default_top_k: int = 10
    enable_bm25: bool = True
    similarity_threshold: float = 0.1
    expand_queries: bool = False  # Enable automatic query expansion


-@dataclass
+@dataclass 
 class LLMConfig:
    """Configuration for LLM synthesis and query expansion."""
-
    # Core settings
    synthesis_model: str = "auto"  # "auto", "qwen3:1.7b", "qwen2.5:1.5b", etc.
    expansion_model: str = "auto"  # Usually same as synthesis_model
-    max_expansion_terms: int = 8  # Maximum additional terms to add
-    enable_synthesis: bool = False  # Enable by default when --synthesize used
+    max_expansion_terms: int = 8   # Maximum additional terms to add
+    enable_synthesis: bool = False # Enable by default when --synthesize used
    synthesis_temperature: float = 0.3
    enable_thinking: bool = True  # Enable thinking mode for Qwen3 models
-    cpu_optimized: bool = True  # Prefer lightweight models
-
+    cpu_optimized: bool = True     # Prefer lightweight models
+    
    # Context window configuration (critical for RAG performance)
-    context_window: int = 16384  # Context window size in tokens (16K recommended)
-    auto_context: bool = True  # Auto-adjust context based on model capabilities
-
+    context_window: int = 16384    # Context window size in tokens (16K recommended)
+    auto_context: bool = True      # Auto-adjust context based on model capabilities
+    
    # Model preference rankings (configurable)
-    model_rankings: list = None  # Will be set in __post_init__
-
+    model_rankings: list = None    # Will be set in __post_init__
+    
    # Provider-specific settings (for different LLM providers)
-    provider: str = "ollama"  # "ollama", "openai", "anthropic"
+    provider: str = "ollama"       # "ollama", "openai", "anthropic"
    ollama_host: str = "localhost:11434"  # Ollama connection
    api_key: Optional[str] = None  # API key for cloud providers
-    api_base: Optional[str] = None  # Base URL for API (e.g., OpenRouter)
-    timeout: int = 20  # Request timeout in seconds
-
+    api_base: Optional[str] = None # Base URL for API (e.g., OpenRouter)
+    timeout: int = 20              # Request timeout in seconds
+    
    def __post_init__(self):
        if self.model_rankings is None:
            # Default model preference rankings (can be overridden in config file)
            self.model_rankings = [
                # Testing model (prioritized for current testing phase)
                "qwen3:1.7b",
+                
                # Ultra-efficient models (perfect for CPU-only systems)
-                "qwen3:0.6b",
+                "qwen3:0.6b", 
+                
                # Recommended model (excellent quality but larger)
                "qwen3:4b",
-                # Common fallbacks (prioritize Qwen models)
+                
+                # Common fallbacks (prioritize Qwen models)  
                "qwen2.5:1.5b",
                "qwen2.5:3b",
            ]
@ -123,26 +117,24 @@ class LLMConfig:
@dataclass
 class UpdateConfig:
    """Configuration for auto-update system."""
-
-    auto_check: bool = True  # Check for updates automatically
+    auto_check: bool = True          # Check for updates automatically
    check_frequency_hours: int = 24  # How often to check (hours)
-    auto_install: bool = False  # Auto-install without asking (not recommended)
-    backup_before_update: bool = True  # Create backup before updating
-    notify_beta_releases: bool = False  # Include beta/pre-releases
+    auto_install: bool = False       # Auto-install without asking (not recommended)
+    backup_before_update: bool = True # Create backup before updating
+    notify_beta_releases: bool = False # Include beta/pre-releases


@dataclass
 class RAGConfig:
    """Main RAG system configuration."""
-
    chunking: ChunkingConfig = None
-    streaming: StreamingConfig = None
+    streaming: StreamingConfig = None  
    files: FilesConfig = None
    embedding: EmbeddingConfig = None
    search: SearchConfig = None
    llm: LLMConfig = None
    updates: UpdateConfig = None
-
+    
    def __post_init__(self):
        if self.chunking is None:
            self.chunking = ChunkingConfig()
@ -162,227 +154,12 @@ class RAGConfig:

 class ConfigManager:
    """Manages configuration loading, saving, and validation."""
-
+    
    def __init__(self, project_path: Path):
        self.project_path = Path(project_path)
-        self.rag_dir = self.project_path / ".mini-rag"
-        self.config_path = self.rag_dir / "config.yaml"
-
-    def get_available_ollama_models(self, ollama_host: str = "localhost:11434") -> List[str]:
-        """Get list of available Ollama models for validation with secure connection handling."""
-        import time
+        self.rag_dir = self.project_path / '.mini-rag'
+        self.config_path = self.rag_dir / 'config.yaml'
        
-        # Retry logic with exponential backoff
-        max_retries = 3
-        for attempt in range(max_retries):
-            try:
-                # Use explicit timeout and SSL verification for security
-                response = requests.get(
-                    f"http://{ollama_host}/api/tags", 
-                    timeout=(5, 10),  # (connect_timeout, read_timeout)
-                    verify=True,  # Explicit SSL verification 
-                    allow_redirects=False  # Prevent redirect attacks
-                )
-                if response.status_code == 200:
-                    data = response.json()
-                    models = [model["name"] for model in data.get("models", [])]
-                    logger.debug(f"Successfully fetched {len(models)} Ollama models")
-                    return models
-                else:
-                    logger.debug(f"Ollama API returned status {response.status_code}")
-                    
-            except requests.exceptions.SSLError as e:
-                logger.debug(f"SSL verification failed for Ollama connection: {e}")
-                # For local Ollama, SSL might not be configured - this is expected
-                if "localhost" in ollama_host or "127.0.0.1" in ollama_host:
-                    logger.debug("Retrying with local connection (SSL not required for localhost)")
-                    # Local connections don't need SSL verification
-                    try:
-                        response = requests.get(f"http://{ollama_host}/api/tags", timeout=(5, 10))
-                        if response.status_code == 200:
-                            data = response.json()
-                            return [model["name"] for model in data.get("models", [])]
-                    except Exception as local_e:
-                        logger.debug(f"Local Ollama connection also failed: {local_e}")
-                break  # Don't retry SSL errors for remote hosts
-                
-            except requests.exceptions.Timeout as e:
-                logger.debug(f"Ollama connection timeout (attempt {attempt + 1}/{max_retries}): {e}")
-                if attempt < max_retries - 1:
-                    sleep_time = (2 ** attempt)  # Exponential backoff
-                    time.sleep(sleep_time)
-                    continue
-                    
-            except requests.exceptions.ConnectionError as e:
-                logger.debug(f"Ollama connection error (attempt {attempt + 1}/{max_retries}): {e}")
-                if attempt < max_retries - 1:
-                    time.sleep(1)
-                    continue
-                    
-            except Exception as e:
-                logger.debug(f"Unexpected error fetching Ollama models: {e}")
-                break
-                
-        return []
-
-    def _sanitize_model_name(self, model_name: str) -> str:
-        """Sanitize model name to prevent injection attacks."""
-        if not model_name:
-            return ""
-        
-        # Allow only alphanumeric, dots, colons, hyphens, underscores
-        # This covers legitimate model names like qwen3:1.7b-q8_0
-        sanitized = re.sub(r'[^a-zA-Z0-9\.\:\-\_]', '', model_name)
-        
-        # Limit length to prevent DoS
-        if len(sanitized) > 128:
-            logger.warning(f"Model name too long, truncating: {sanitized[:20]}...")
-            sanitized = sanitized[:128]
-            
-        return sanitized
-
-    def resolve_model_name(self, configured_model: str, available_models: List[str]) -> Optional[str]:
-        """Resolve configured model name to actual available model with input sanitization."""
-        if not available_models or not configured_model:
-            return None
-        
-        # Sanitize input to prevent injection
-        configured_model = self._sanitize_model_name(configured_model)
-        if not configured_model:
-            logger.warning("Model name was empty after sanitization")
-            return None
-            
-        # Handle special 'auto' directive
-        if configured_model.lower() == 'auto':
-            return available_models[0] if available_models else None
-            
-        # Direct exact match first (case-insensitive)
-        for available_model in available_models:
-            if configured_model.lower() == available_model.lower():
-                return available_model
-        
-        # Fuzzy matching for common patterns
-        model_patterns = self._get_model_patterns(configured_model)
-        
-        for pattern in model_patterns:
-            for available_model in available_models:
-                if pattern.lower() in available_model.lower():
-                    # Additional validation: ensure it's not a partial match of something else
-                    if self._validate_model_match(pattern, available_model):
-                        return available_model
-        
-        return None  # Model not available
-
-    def _get_model_patterns(self, configured_model: str) -> List[str]:
-        """Generate fuzzy match patterns for common model naming conventions."""
-        patterns = [configured_model]  # Start with exact name
-        
-        # Common quantization patterns for different models
-        quantization_patterns = {
-            'qwen3:1.7b': ['qwen3:1.7b-q8_0', 'qwen3:1.7b-q4_0', 'qwen3:1.7b-q6_k'],
-            'qwen3:0.6b': ['qwen3:0.6b-q8_0', 'qwen3:0.6b-q4_0', 'qwen3:0.6b-q6_k'],
-            'qwen3:4b': ['qwen3:4b-q8_0', 'qwen3:4b-q4_0', 'qwen3:4b-q6_k'],
-            'qwen3:8b': ['qwen3:8b-q8_0', 'qwen3:8b-q4_0', 'qwen3:8b-q6_k'],
-            'qwen2.5:1.5b': ['qwen2.5:1.5b-q8_0', 'qwen2.5:1.5b-q4_0'],
-            'qwen2.5:3b': ['qwen2.5:3b-q8_0', 'qwen2.5:3b-q4_0'],
-            'qwen2.5-coder:1.5b': ['qwen2.5-coder:1.5b-q8_0', 'qwen2.5-coder:1.5b-q4_0'],
-            'qwen2.5-coder:3b': ['qwen2.5-coder:3b-q8_0', 'qwen2.5-coder:3b-q4_0'],
-            'qwen2.5-coder:7b': ['qwen2.5-coder:7b-q8_0', 'qwen2.5-coder:7b-q4_0'],
-        }
-        
-        # Add specific patterns for the configured model
-        if configured_model.lower() in quantization_patterns:
-            patterns.extend(quantization_patterns[configured_model.lower()])
-        
-        # Generic pattern generation for unknown models
-        if ':' in configured_model:
-            base_name, version = configured_model.split(':', 1)
-            
-            # Add common quantization suffixes
-            common_suffixes = ['-q8_0', '-q4_0', '-q6_k', '-q4_k_m', '-instruct', '-base']
-            for suffix in common_suffixes:
-                patterns.append(f"{base_name}:{version}{suffix}")
-                
-            # Also try with instruct variants
-            if 'instruct' not in version.lower():
-                patterns.append(f"{base_name}:{version}-instruct")
-                patterns.append(f"{base_name}:{version}-instruct-q8_0")
-                patterns.append(f"{base_name}:{version}-instruct-q4_0")
-        
-        return patterns
-
-    def _validate_model_match(self, pattern: str, available_model: str) -> bool:
-        """Validate that a fuzzy match is actually correct and not a false positive."""
-        # Convert to lowercase for comparison
-        pattern_lower = pattern.lower()
-        available_lower = available_model.lower()
-        
-        # Ensure the base model name matches
-        if ':' in pattern_lower and ':' in available_lower:
-            pattern_base = pattern_lower.split(':')[0]
-            available_base = available_lower.split(':')[0]
-            
-            # Base names must match exactly
-            if pattern_base != available_base:
-                return False
-                
-            # Version part should be contained or closely related
-            pattern_version = pattern_lower.split(':', 1)[1]
-            available_version = available_lower.split(':', 1)[1]
-            
-            # The pattern version should be a prefix of the available version
-            # e.g., "1.7b" should match "1.7b-q8_0" but not "11.7b"
-            if not available_version.startswith(pattern_version.split('-')[0]):
-                return False
-                
-        return True
-
-    def validate_and_resolve_models(self, config: RAGConfig) -> RAGConfig:
-        """Validate and resolve model names in configuration."""
-        try:
-            available_models = self.get_available_ollama_models(config.llm.ollama_host)
-            
-            if not available_models:
-                logger.debug("No Ollama models available for validation")
-                return config
-                
-            # Resolve synthesis model
-            if config.llm.synthesis_model != "auto":
-                resolved = self.resolve_model_name(config.llm.synthesis_model, available_models)
-                if resolved and resolved != config.llm.synthesis_model:
-                    logger.info(f"Resolved synthesis model: {config.llm.synthesis_model} -> {resolved}")
-                    config.llm.synthesis_model = resolved
-                elif not resolved:
-                    logger.warning(f"Synthesis model '{config.llm.synthesis_model}' not found, keeping original")
-                    
-            # Resolve expansion model (if different from synthesis)
-            if (config.llm.expansion_model != "auto" and 
-                config.llm.expansion_model != config.llm.synthesis_model):
-                resolved = self.resolve_model_name(config.llm.expansion_model, available_models)
-                if resolved and resolved != config.llm.expansion_model:
-                    logger.info(f"Resolved expansion model: {config.llm.expansion_model} -> {resolved}")
-                    config.llm.expansion_model = resolved
-                elif not resolved:
-                    logger.warning(f"Expansion model '{config.llm.expansion_model}' not found, keeping original")
-            
-            # Update model rankings with resolved names
-            if config.llm.model_rankings:
-                updated_rankings = []
-                for model in config.llm.model_rankings:
-                    resolved = self.resolve_model_name(model, available_models)
-                    if resolved:
-                        updated_rankings.append(resolved)
-                        if resolved != model:
-                            logger.debug(f"Updated model ranking: {model} -> {resolved}")
-                    else:
-                        updated_rankings.append(model)  # Keep original if not resolved
-                config.llm.model_rankings = updated_rankings
-                        
-        except Exception as e:
-            logger.debug(f"Model validation failed: {e}")
-            
-        return config
-
    def load_config(self) -> RAGConfig:
        """Load configuration from YAML file or create default."""
        if not self.config_path.exists():
@ -390,84 +167,57 @@ class ConfigManager:
            config = RAGConfig()
            self.save_config(config)
            return config
-
+            
        try:
-            with open(self.config_path, "r") as f:
+            with open(self.config_path, 'r') as f:
                data = yaml.safe_load(f)
-
+                
            if not data:
                logger.warning("Empty config file, using defaults")
                return RAGConfig()
-
+                
            # Convert nested dicts back to dataclass instances
            config = RAGConfig()
-
-            if "chunking" in data:
-                config.chunking = ChunkingConfig(**data["chunking"])
-            if "streaming" in data:
-                config.streaming = StreamingConfig(**data["streaming"])
-            if "files" in data:
-                config.files = FilesConfig(**data["files"])
-            if "embedding" in data:
-                config.embedding = EmbeddingConfig(**data["embedding"])
-            if "search" in data:
-                config.search = SearchConfig(**data["search"])
-            if "llm" in data:
-                config.llm = LLMConfig(**data["llm"])
-
-            # Validate and resolve model names if Ollama is available
-            config = self.validate_and_resolve_models(config)
-
+            
+            if 'chunking' in data:
+                config.chunking = ChunkingConfig(**data['chunking'])
+            if 'streaming' in data:
+                config.streaming = StreamingConfig(**data['streaming'])
+            if 'files' in data:
+                config.files = FilesConfig(**data['files'])
+            if 'embedding' in data:
+                config.embedding = EmbeddingConfig(**data['embedding'])
+            if 'search' in data:
+                config.search = SearchConfig(**data['search'])
+            if 'llm' in data:
+                config.llm = LLMConfig(**data['llm'])
+                
            return config
-
-        except yaml.YAMLError as e:
-            # YAML syntax error - help user fix it instead of silent fallback
-            error_msg = (
-                f"⚠️ Config file has YAML syntax error at line "
-                f"{getattr(e, 'problem_mark', 'unknown')}: {e}"
-            )
-            logger.error(error_msg)
-            print(f"\n{error_msg}")
-            print(f"Config file: {self.config_path}")
-            print("💡 Check YAML syntax (indentation, quotes, colons)")
-            print("💡 Or delete config file to reset to defaults")
-            return RAGConfig()  # Still return defaults but warn user
-
+            
        except Exception as e:
            logger.error(f"Failed to load config from {self.config_path}: {e}")
            logger.info("Using default configuration")
            return RAGConfig()
-
+    
    def save_config(self, config: RAGConfig):
        """Save configuration to YAML file with comments."""
        try:
            self.rag_dir.mkdir(exist_ok=True)
-
+            
            # Convert to dict for YAML serialization
            config_dict = asdict(config)
-
+            
            # Create YAML content with comments
            yaml_content = self._create_yaml_with_comments(config_dict)
-
-            # Write with basic file locking to prevent corruption
-            with open(self.config_path, "w") as f:
-                try:
-                    import fcntl
-
-                    fcntl.flock(
-                        f.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB
-                    )  # Non-blocking exclusive lock
-                    f.write(yaml_content)
-                    fcntl.flock(f.fileno(), fcntl.LOCK_UN)  # Unlock
-                except (OSError, ImportError):
-                    # Fallback for Windows or if fcntl unavailable
-                    f.write(yaml_content)
-
+            
+            with open(self.config_path, 'w') as f:
+                f.write(yaml_content)
+                
            logger.info(f"Configuration saved to {self.config_path}")
-
+            
        except Exception as e:
            logger.error(f"Failed to save config to {self.config_path}: {e}")
-
+    
    def _create_yaml_with_comments(self, config_dict: Dict[str, Any]) -> str:
        """Create YAML content with helpful comments."""
        yaml_lines = [
@ -477,97 +227,89 @@ class ConfigManager:
            "",
            "# Text chunking settings",
            "chunking:",
-            f"  max_size: {config_dict['chunking']['max_size']}  # Max chars per chunk",
-            f"  min_size: {config_dict['chunking']['min_size']}  # Min chars per chunk",
-            f"  strategy: {config_dict['chunking']['strategy']}  # 'semantic' or 'fixed'",
+            f"  max_size: {config_dict['chunking']['max_size']}      # Maximum characters per chunk",
+            f"  min_size: {config_dict['chunking']['min_size']}       # Minimum characters per chunk", 
+            f"  strategy: {config_dict['chunking']['strategy']}    # 'semantic' (language-aware) or 'fixed'",
            "",
-            "# Large file streaming settings",
+            "# Large file streaming settings", 
            "streaming:",
            f"  enabled: {str(config_dict['streaming']['enabled']).lower()}",
-            f"  threshold_bytes: {config_dict['streaming']['threshold_bytes']}  # Stream files >1MB",
+            f"  threshold_bytes: {config_dict['streaming']['threshold_bytes']}  # Files larger than this use streaming (1MB)",
            "",
            "# File processing settings",
            "files:",
-            f"  min_file_size: {config_dict['files']['min_file_size']}  # Skip small files",
+            f"  min_file_size: {config_dict['files']['min_file_size']}        # Skip files smaller than this",
            "  exclude_patterns:",
        ]
-
-        for pattern in config_dict["files"]["exclude_patterns"]:
-            yaml_lines.append(f'    - "{pattern}"')
-
-        yaml_lines.extend(
-            [
-                "  include_patterns:",
-                '    - "**/*"                  # Include all files by default',
-                "",
-                "# Embedding generation settings",
-                "embedding:",
-                f"  preferred_method: {config_dict['embedding']['preferred_method']}  # Method",
-                f"  ollama_model: {config_dict['embedding']['ollama_model']}",
-                f"  ollama_host: {config_dict['embedding']['ollama_host']}",
-                f"  ml_model: {config_dict['embedding']['ml_model']}",
-                f"  batch_size: {config_dict['embedding']['batch_size']}  # Per batch",
-                "",
-                "# Search behavior settings",
-                "search:",
-                f"  default_top_k: {config_dict['search']['default_top_k']}  # Top results",
-                f"  enable_bm25: {str(config_dict['search']['enable_bm25']).lower()}  # Keyword boost",
-                f"  similarity_threshold: {config_dict['search']['similarity_threshold']}  # Min score",
-                f"  expand_queries: {str(config_dict['search']['expand_queries']).lower()}  # Auto expand",
-                "",
-                "# LLM synthesis and query expansion settings",
-                "llm:",
-                f"  ollama_host: {config_dict['llm']['ollama_host']}",
-                f"  synthesis_model: {config_dict['llm']['synthesis_model']}  # Model name",
-                f"  expansion_model: {config_dict['llm']['expansion_model']}  # Model name",
-                f"  max_expansion_terms: {config_dict['llm']['max_expansion_terms']}  # Max terms",
-                f"  enable_synthesis: {str(config_dict['llm']['enable_synthesis']).lower()}       # Enable synthesis by default",
-                f"  synthesis_temperature: {config_dict['llm']['synthesis_temperature']}      # LLM temperature for analysis",
-                "",
-                "  # Context window configuration (critical for RAG performance)",
-                "  # 💡 Sizing guide: 2K=1 question, 4K=1-2 questions, 8K=manageable, 16K=most users",
-                "  #               32K=large codebases, 64K+=power users only",
-                "  # ⚠️  Larger contexts use exponentially more CPU/memory - only increase if needed",
-                "  # 🔧 Low context limits? Try smaller topk, better search terms, or archive noise",
-                f"  context_window: {config_dict['llm']['context_window']}           # Context size in tokens",
-                f"  auto_context: {str(config_dict['llm']['auto_context']).lower()}            # Auto-adjust context based on model capabilities",
-                "",
-                "  model_rankings:          # Preferred model order (edit to change priority)",
-            ]
-        )
-
+        
+        for pattern in config_dict['files']['exclude_patterns']:
+            yaml_lines.append(f"    - \"{pattern}\"")
+        
+        yaml_lines.extend([
+            "  include_patterns:",
+            "    - \"**/*\"                  # Include all files by default",
+            "",
+            "# Embedding generation settings",
+            "embedding:",
+            f"  preferred_method: {config_dict['embedding']['preferred_method']}     # 'ollama', 'ml', 'hash', or 'auto'",
+            f"  ollama_model: {config_dict['embedding']['ollama_model']}",
+            f"  ollama_host: {config_dict['embedding']['ollama_host']}",
+            f"  ml_model: {config_dict['embedding']['ml_model']}",
+            f"  batch_size: {config_dict['embedding']['batch_size']}               # Embeddings processed per batch",
+            "",
+            "# Search behavior settings", 
+            "search:",
+            f"  default_top_k: {config_dict['search']['default_top_k']}           # Default number of top results",
+            f"  enable_bm25: {str(config_dict['search']['enable_bm25']).lower()}             # Enable keyword matching boost",
+            f"  similarity_threshold: {config_dict['search']['similarity_threshold']}        # Minimum similarity score",
+            f"  expand_queries: {str(config_dict['search']['expand_queries']).lower()}          # Enable automatic query expansion",
+            "",
+            "# LLM synthesis and query expansion settings",
+            "llm:",
+            f"  ollama_host: {config_dict['llm']['ollama_host']}",
+            f"  synthesis_model: {config_dict['llm']['synthesis_model']}    # 'auto', 'qwen3:1.7b', etc.",
+            f"  expansion_model: {config_dict['llm']['expansion_model']}     # Usually same as synthesis_model",
+            f"  max_expansion_terms: {config_dict['llm']['max_expansion_terms']}        # Maximum terms to add to queries",
+            f"  enable_synthesis: {str(config_dict['llm']['enable_synthesis']).lower()}       # Enable synthesis by default",
+            f"  synthesis_temperature: {config_dict['llm']['synthesis_temperature']}      # LLM temperature for analysis",
+            "",
+            "  # Context window configuration (critical for RAG performance)",
+            f"  context_window: {config_dict['llm']['context_window']}           # Context size in tokens (8K=fast, 16K=balanced, 32K=advanced)",
+            f"  auto_context: {str(config_dict['llm']['auto_context']).lower()}            # Auto-adjust context based on model capabilities",
+            "",
+            "  model_rankings:          # Preferred model order (edit to change priority)",
+        ])
+        
        # Add model rankings list
-        if "model_rankings" in config_dict["llm"] and config_dict["llm"]["model_rankings"]:
-            for model in config_dict["llm"]["model_rankings"][:10]:  # Show first 10
-                yaml_lines.append(f'    - "{model}"')
-            if len(config_dict["llm"]["model_rankings"]) > 10:
+        if 'model_rankings' in config_dict['llm'] and config_dict['llm']['model_rankings']:
+            for model in config_dict['llm']['model_rankings'][:10]:  # Show first 10
+                yaml_lines.append(f"    - \"{model}\"")
+            if len(config_dict['llm']['model_rankings']) > 10:
                yaml_lines.append("    # ... (edit config to see all options)")
-
+        
        # Add update settings
-        yaml_lines.extend(
-            [
-                "",
-                "# Auto-update system settings",
-                "updates:",
-                f"  auto_check: {str(config_dict['updates']['auto_check']).lower()}            # Check for updates automatically",
-                f"  check_frequency_hours: {config_dict['updates']['check_frequency_hours']}    # Hours between update checks",
-                f"  auto_install: {str(config_dict['updates']['auto_install']).lower()}          # Auto-install updates (not recommended)",
-                f"  backup_before_update: {str(config_dict['updates']['backup_before_update']).lower()}   # Create backup before updating",
-                f"  notify_beta_releases: {str(config_dict['updates']['notify_beta_releases']).lower()}   # Include beta releases in checks",
-            ]
-        )
-
-        return "\n".join(yaml_lines)
-
+        yaml_lines.extend([
+            "",
+            "# Auto-update system settings",
+            "updates:",
+            f"  auto_check: {str(config_dict['updates']['auto_check']).lower()}            # Check for updates automatically",
+            f"  check_frequency_hours: {config_dict['updates']['check_frequency_hours']}    # Hours between update checks",
+            f"  auto_install: {str(config_dict['updates']['auto_install']).lower()}          # Auto-install updates (not recommended)",
+            f"  backup_before_update: {str(config_dict['updates']['backup_before_update']).lower()}   # Create backup before updating",
+            f"  notify_beta_releases: {str(config_dict['updates']['notify_beta_releases']).lower()}   # Include beta releases in checks",
+        ])
+        
+        return '\n'.join(yaml_lines)
+    
    def update_config(self, **kwargs) -> RAGConfig:
        """Update specific configuration values."""
        config = self.load_config()
-
+        
        for key, value in kwargs.items():
            if hasattr(config, key):
                setattr(config, key, value)
            else:
                logger.warning(f"Unknown config key: {key}")
-
+        
        self.save_config(config)
-        return config
+        return config
--- a/mini_rag/explorer.py
+++ b/mini_rag/explorer.py
@ -9,173 +9,155 @@ Perfect for exploring codebases with detailed reasoning and follow-up questions.
 import json
 import logging
 import time
-from dataclasses import dataclass
+from typing import List, Dict, Any, Optional
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from dataclasses import dataclass

 try:
-    from .config import RAGConfig
    from .llm_synthesizer import LLMSynthesizer, SynthesisResult
    from .search import CodeSearcher
-    from .system_context import get_system_context
+    from .config import RAGConfig
 except ImportError:
    # For direct testing
-    from config import RAGConfig
    from llm_synthesizer import LLMSynthesizer, SynthesisResult
    from search import CodeSearcher
-
-    def get_system_context(x=None):
-        return ""
-
+    from config import RAGConfig

 logger = logging.getLogger(__name__)

-
@dataclass
 class ExplorationSession:
    """Track an exploration session with context history."""
-
    project_path: Path
    conversation_history: List[Dict[str, Any]]
    session_id: str
    started_at: float
-
-    def add_exchange(
-        self, question: str, search_results: List[Any], response: SynthesisResult
-    ):
+    
+    def add_exchange(self, question: str, search_results: List[Any], response: SynthesisResult):
        """Add a question/response exchange to the conversation history."""
-        self.conversation_history.append(
-            {
-                "timestamp": time.time(),
-                "question": question,
-                "search_results_count": len(search_results),
-                "response": {
-                    "summary": response.summary,
-                    "key_points": response.key_points,
-                    "code_examples": response.code_examples,
-                    "suggested_actions": response.suggested_actions,
-                    "confidence": response.confidence,
-                },
+        self.conversation_history.append({
+            "timestamp": time.time(),
+            "question": question,
+            "search_results_count": len(search_results),
+            "response": {
+                "summary": response.summary,
+                "key_points": response.key_points,
+                "code_examples": response.code_examples,
+                "suggested_actions": response.suggested_actions,
+                "confidence": response.confidence
            }
-        )
-
+        })

 class CodeExplorer:
    """Interactive code exploration with thinking and context memory."""
-
+    
    def __init__(self, project_path: Path, config: RAGConfig = None):
        self.project_path = project_path
        self.config = config or RAGConfig()
-
+        
        # Initialize components with thinking enabled
        self.searcher = CodeSearcher(project_path)
        self.synthesizer = LLMSynthesizer(
            ollama_url=f"http://{self.config.llm.ollama_host}",
            model=self.config.llm.synthesis_model,
            enable_thinking=True,  # Always enable thinking in explore mode
-            config=self.config,  # Pass config for model rankings
+            config=self.config  # Pass config for model rankings
        )
-
+        
        # Session management
        self.current_session: Optional[ExplorationSession] = None
-
+        
    def start_exploration_session(self) -> bool:
        """Start a new exploration session."""
-
+        
        # Simple availability check - don't do complex model restart logic
        if not self.synthesizer.is_available():
            print("❌ LLM service unavailable. Please check Ollama is running.")
            return False
-
+            
        session_id = f"explore_{int(time.time())}"
        self.current_session = ExplorationSession(
            project_path=self.project_path,
            conversation_history=[],
            session_id=session_id,
-            started_at=time.time(),
+            started_at=time.time()
        )
-
+        
        print("🧠 Exploration Mode Started")
        print(f"Project: {self.project_path.name}")
-
+        
        return True
-
+    
    def explore_question(self, question: str, context_limit: int = 10) -> Optional[str]:
        """Explore a question with full thinking and context."""
        if not self.current_session:
            return "❌ No exploration session active. Start one first."
-
+            
        # Search for relevant information
        search_start = time.time()
        results = self.searcher.search(
-            question,
+            question, 
            top_k=context_limit,
            include_context=True,
            semantic_weight=0.7,
-            bm25_weight=0.3,
+            bm25_weight=0.3
        )
        search_time = time.time() - search_start
-
+        
        # Build enhanced prompt with conversation context
        synthesis_prompt = self._build_contextual_prompt(question, results)
-
+        
        # Get thinking-enabled analysis
        synthesis_start = time.time()
        synthesis = self._synthesize_with_context(synthesis_prompt, results)
        synthesis_time = time.time() - synthesis_start
-
+        
        # Add to conversation history
        self.current_session.add_exchange(question, results, synthesis)
-
+        
        # Streaming already displayed the response
        # Just return minimal status for caller
        session_duration = time.time() - self.current_session.started_at
        exchange_count = len(self.current_session.conversation_history)
-
+        
        status = f"\n📊 Session: {session_duration/60:.1f}m | Question #{exchange_count} | Results: {len(results)} | Time: {search_time+synthesis_time:.1f}s"
        return status
-
+    
    def _build_contextual_prompt(self, question: str, results: List[Any]) -> str:
        """Build a prompt that includes conversation context."""
        # Get recent conversation context (last 3 exchanges)
+        context_summary = ""
        if self.current_session.conversation_history:
            recent_exchanges = self.current_session.conversation_history[-3:]
            context_parts = []
-
+            
            for i, exchange in enumerate(recent_exchanges, 1):
                prev_q = exchange["question"]
                prev_summary = exchange["response"]["summary"]
                context_parts.append(f"Previous Q{i}: {prev_q}")
                context_parts.append(f"Previous A{i}: {prev_summary}")
-
-            # "\n".join(context_parts)  # Unused variable removed
-
+            
+            context_summary = "\n".join(context_parts)
+        
        # Build search results context
        results_context = []
        for i, result in enumerate(results[:8], 1):
-            # result.file_path if hasattr(result, "file_path") else "unknown"  # Unused variable removed
-            # result.content if hasattr(result, "content") else str(result)  # Unused variable removed
-            # result.score if hasattr(result, "score") else 0.0  # Unused variable removed
-
-            results_context.append(
-                """
+            file_path = result.file_path if hasattr(result, 'file_path') else 'unknown'
+            content = result.content if hasattr(result, 'content') else str(result)
+            score = result.score if hasattr(result, 'score') else 0.0
+            
+            results_context.append(f"""
 Result {i} (Score: {score:.3f}):
 File: {file_path}
 Content: {content[:800]}{'...' if len(content) > 800 else ''}
-"""
-            )
-
-        # "\n".join(results_context)  # Unused variable removed
-
-        # Get system context for better responses
-        # get_system_context(self.project_path)  # Unused variable removed
-
+""")
+        
+        results_text = "\n".join(results_context)
+        
        # Create comprehensive exploration prompt with thinking
-        prompt = """<think>
+        prompt = f"""<think>
 The user asked: "{question}"

-System context: {system_context}
-
 Let me analyze what they're asking and look at the information I have available.

 From the search results, I can see relevant information about:
@ -208,7 +190,7 @@ Please provide a helpful, natural explanation that answers their question. Write

 Structure your response to include:
 1. A clear explanation of what you found and how it answers their question
-2. The most important insights from the information you discovered
+2. The most important insights from the information you discovered  
 3. Relevant examples or code patterns when helpful
 4. Practical next steps they could take

@ -221,43 +203,37 @@ Guidelines:
 - Use natural language, not structured formats
 - Break complex topics into understandable pieces
 """
-
+        
        return prompt
-
+    
    def _synthesize_with_context(self, prompt: str, results: List[Any]) -> SynthesisResult:
        """Synthesize results with full context and thinking."""
        try:
            # Use streaming with thinking visible (don't collapse)
-            response = self.synthesizer._call_ollama(
-                prompt,
-                temperature=0.2,
-                disable_thinking=False,
-                use_streaming=True,
-                collapse_thinking=False,
-            )
-            # ""  # Unused variable removed
-
+            response = self.synthesizer._call_ollama(prompt, temperature=0.2, disable_thinking=False, use_streaming=True, collapse_thinking=False)
+            thinking_stream = ""
+            
            # Streaming already shows thinking and response
            # No need for additional indicators
-
+            
            if not response:
                return SynthesisResult(
                    summary="Analysis unavailable (LLM service error)",
                    key_points=[],
                    code_examples=[],
                    suggested_actions=["Check LLM service status"],
-                    confidence=0.0,
+                    confidence=0.0
                )
-
+            
            # Use natural language response directly
            return SynthesisResult(
                summary=response.strip(),
                key_points=[],  # Not used with natural language responses
                code_examples=[],  # Not used with natural language responses
                suggested_actions=[],  # Not used with natural language responses
-                confidence=0.85,  # High confidence for natural responses
+                confidence=0.85  # High confidence for natural responses
            )
-
+                
        except Exception as e:
            logger.error(f"Context synthesis failed: {e}")
            return SynthesisResult(
@ -265,153 +241,124 @@ Guidelines:
                key_points=[],
                code_examples=[],
                suggested_actions=["Check system status and try again"],
-                confidence=0.0,
+                confidence=0.0
            )
-
-    def _format_exploration_response(
-        self,
-        question: str,
-        synthesis: SynthesisResult,
-        result_count: int,
-        search_time: float,
-        synthesis_time: float,
-    ) -> str:
+    
+    def _format_exploration_response(self, question: str, synthesis: SynthesisResult, 
+                                   result_count: int, search_time: float, synthesis_time: float) -> str:
        """Format exploration response with context indicators."""
-
+        
        output = []
-
+        
        # Header with session context
        session_duration = time.time() - self.current_session.started_at
        exchange_count = len(self.current_session.conversation_history)
-
+        
        output.append(f"🧠 EXPLORATION ANALYSIS (Question #{exchange_count})")
-        output.append(
-            f"Session: {session_duration/60:.1f}m | Results: {result_count} | "
-            f"Time: {search_time+synthesis_time:.1f}s"
-        )
+        output.append(f"Session: {session_duration/60:.1f}m | Results: {result_count} | "
+                     f"Time: {search_time+synthesis_time:.1f}s")
        output.append("=" * 60)
        output.append("")
-
+        
        # Response was already displayed via streaming
        # Just show completion status
        output.append("✅ Analysis complete")
        output.append("")
        output.append("")
-
+        
        # Confidence and context indicator
-        confidence_emoji = (
-            "🟢"
-            if synthesis.confidence > 0.7
-            else "🟡" if synthesis.confidence > 0.4 else "🔴"
-        )
-        context_indicator = (
-            f" | Context: {exchange_count-1} previous questions" if exchange_count > 1 else ""
-        )
-        output.append(
-            f"{confidence_emoji} Confidence: {synthesis.confidence:.1%}{context_indicator}"
-        )
-
+        confidence_emoji = "🟢" if synthesis.confidence > 0.7 else "🟡" if synthesis.confidence > 0.4 else "🔴"
+        context_indicator = f" | Context: {exchange_count-1} previous questions" if exchange_count > 1 else ""
+        output.append(f"{confidence_emoji} Confidence: {synthesis.confidence:.1%}{context_indicator}")
+        
        return "\n".join(output)
-
+    
    def get_session_summary(self) -> str:
        """Get a summary of the current exploration session."""
        if not self.current_session:
            return "No active exploration session."
-
+            
        duration = time.time() - self.current_session.started_at
        exchange_count = len(self.current_session.conversation_history)
-
+        
        summary = [
-            "🧠 EXPLORATION SESSION SUMMARY",
-            "=" * 40,
+            f"🧠 EXPLORATION SESSION SUMMARY",
+            f"=" * 40,
            f"Project: {self.project_path.name}",
            f"Session ID: {self.current_session.session_id}",
            f"Duration: {duration/60:.1f} minutes",
            f"Questions explored: {exchange_count}",
-            "",
+            f"",
        ]
-
+        
        if exchange_count > 0:
            summary.append("📋 Topics explored:")
            for i, exchange in enumerate(self.current_session.conversation_history, 1):
-                question = (
-                    exchange["question"][:50] + "..."
-                    if len(exchange["question"]) > 50
-                    else exchange["question"]
-                )
+                question = exchange["question"][:50] + "..." if len(exchange["question"]) > 50 else exchange["question"]
                confidence = exchange["response"]["confidence"]
                summary.append(f"   {i}. {question} (confidence: {confidence:.1%})")
-
+        
        return "\n".join(summary)
-
+    
    def end_session(self) -> str:
        """End the current exploration session."""
        if not self.current_session:
            return "No active session to end."
-
+            
        summary = self.get_session_summary()
        self.current_session = None
-
+        
        return summary + "\n\n✅ Exploration session ended."
-
+    
    def _check_model_restart_needed(self) -> bool:
        """Check if model restart would improve thinking quality."""
        try:
-            # Simple heuristic: if we can detect the model was recently used
+            # Simple heuristic: if we can detect the model was recently used 
            # with <no_think>, suggest restart for better thinking quality
-
+            
            # Test with a simple thinking prompt to see response quality
            test_response = self.synthesizer._call_ollama(
-                "Think briefly: what is 2+2?", temperature=0.1, disable_thinking=False
+                "Think briefly: what is 2+2?", 
+                temperature=0.1, 
+                disable_thinking=False
            )
-
+            
            if test_response:
                # If response is suspiciously short or shows signs of no-think behavior
                if len(test_response.strip()) < 10 or "4" == test_response.strip():
                    return True
-
+                    
        except Exception:
            pass
-
+            
        return False
-
+    
    def _handle_model_restart(self) -> bool:
        """Handle user confirmation and model restart."""
        try:
-            print(
-                "\n🤔 To ensure best thinking quality, exploration mode works best with a fresh model."
-            )
+            print("\n🤔 To ensure best thinking quality, exploration mode works best with a fresh model.")
            print(f"   Currently running: {self.synthesizer.model}")
-            print(
-                "\n💡 Stop current model and restart for optimal exploration? (y/N): ",
-                end="",
-                flush=True,
-            )
-
+            print("\n💡 Stop current model and restart for optimal exploration? (y/N): ", end="", flush=True)
+            
            response = input().strip().lower()
-
-            if response in ["y", "yes"]:
+            
+            if response in ['y', 'yes']:
                print("\n🔄 Stopping current model...")
-
+                
                # Use ollama stop command for clean model restart
                import subprocess
-
                try:
-                    subprocess.run(
-                        ["ollama", "stop", self.synthesizer.model],
-                        timeout=10,
-                        capture_output=True,
-                    )
-
+                    subprocess.run([
+                        "ollama", "stop", self.synthesizer.model
+                    ], timeout=10, capture_output=True)
+                    
                    print("✅ Model stopped successfully.")
-                    print(
-                        "🚀 Exploration mode will restart the model with thinking enabled..."
-                    )
-
+                    print("🚀 Exploration mode will restart the model with thinking enabled...")
+                    
                    # Reset synthesizer initialization to force fresh start
                    self.synthesizer._initialized = False
                    return True
-
+                    
                except subprocess.TimeoutExpired:
                    print("⚠️  Model stop timed out, continuing anyway...")
                    return False
@ -424,18 +371,19 @@ Guidelines:
            else:
                print("📝 Continuing with current model...")
                return False
-
+                
        except KeyboardInterrupt:
            print("\n📝 Continuing with current model...")
            return False
        except EOFError:
            print("\n📝 Continuing with current model...")
            return False
-
+    
    def _call_ollama_with_thinking(self, prompt: str, temperature: float = 0.3) -> tuple:
        """Call Ollama with streaming for fast time-to-first-token."""
        import requests
-
+        import json
+        
        try:
            # Use the synthesizer's model and connection
            model_to_use = self.synthesizer.model
@ -444,15 +392,14 @@ Guidelines:
                    model_to_use = self.synthesizer.available_models[0]
                else:
                    return None, None
-
+            
            # Enable thinking by NOT adding <no_think>
            final_prompt = prompt
-
+            
            # Get optimal parameters for this model
            from .llm_optimization import get_optimal_ollama_parameters
-
            optimal_params = get_optimal_ollama_parameters(model_to_use)
-
+            
            payload = {
                "model": model_to_use,
                "prompt": final_prompt,
@ -464,102 +411,94 @@ Guidelines:
                    "num_ctx": self.synthesizer._get_optimal_context_size(model_to_use),
                    "num_predict": optimal_params.get("num_predict", 2000),
                    "repeat_penalty": optimal_params.get("repeat_penalty", 1.1),
-                    "presence_penalty": optimal_params.get("presence_penalty", 1.0),
-                },
+                    "presence_penalty": optimal_params.get("presence_penalty", 1.0)
+                }
            }
-
+            
            response = requests.post(
                f"{self.synthesizer.ollama_url}/api/generate",
                json=payload,
                stream=True,
-                timeout=65,
+                timeout=65
            )
-
+            
            if response.status_code == 200:
                # Collect streaming response
                raw_response = ""
                thinking_displayed = False
-
+                
                for line in response.iter_lines():
                    if line:
                        try:
-                            chunk_data = json.loads(line.decode("utf-8"))
-                            chunk_text = chunk_data.get("response", "")
-
+                            chunk_data = json.loads(line.decode('utf-8'))
+                            chunk_text = chunk_data.get('response', '')
+                            
                            if chunk_text:
                                raw_response += chunk_text
-
+                                
                                # Display thinking stream as it comes in
-                                if not thinking_displayed and "<think>" in raw_response:
+                                if not thinking_displayed and '<think>' in raw_response:
                                    # Start displaying thinking
                                    self._start_thinking_display()
                                    thinking_displayed = True
-
+                                
                                if thinking_displayed:
                                    self._stream_thinking_chunk(chunk_text)
-
-                            if chunk_data.get("done", False):
+                                
+                            if chunk_data.get('done', False):
                                break
-
+                                
                        except json.JSONDecodeError:
                            continue
-
+                
                # Finish thinking display if it was shown
                if thinking_displayed:
                    self._end_thinking_display()
-
+                
                # Extract thinking stream and final response
                thinking_stream, final_response = self._extract_thinking(raw_response)
-
+                
                return final_response, thinking_stream
            else:
                return None, None
-
+                
        except Exception as e:
            logger.error(f"Thinking-enabled Ollama call failed: {e}")
            return None, None
-
+    
    def _extract_thinking(self, raw_response: str) -> tuple:
        """Extract thinking content from response."""
        thinking_stream = ""
        final_response = raw_response
-
+        
        # Look for thinking patterns
        if "<think>" in raw_response and "</think>" in raw_response:
            # Extract thinking content between tags
            start_tag = raw_response.find("<think>")
            end_tag = raw_response.find("</think>") + len("</think>")
-
+            
            if start_tag != -1 and end_tag != -1:
-                thinking_content = raw_response[start_tag + 7 : end_tag - 8]  # Remove tags
+                thinking_content = raw_response[start_tag + 7:end_tag - 8]  # Remove tags
                thinking_stream = thinking_content.strip()
-
+                
                # Remove thinking from final response
                final_response = (raw_response[:start_tag] + raw_response[end_tag:]).strip()
-
+        
        # Alternative patterns for models that use different thinking formats
        elif "Let me think" in raw_response or "I need to analyze" in raw_response:
            # Simple heuristic: first paragraph might be thinking
-            lines = raw_response.split("\n")
+            lines = raw_response.split('\n')
            potential_thinking = []
            final_lines = []
-
-            thinking_indicators = [
-                "Let me think",
-                "I need to",
-                "First, I'll",
-                "Looking at",
-                "Analyzing",
-            ]
+            
+            thinking_indicators = ["Let me think", "I need to", "First, I'll", "Looking at", "Analyzing"]
            in_thinking = False
-
+            
            for line in lines:
                if any(indicator in line for indicator in thinking_indicators):
                    in_thinking = True
                    potential_thinking.append(line)
-                elif in_thinking and (
-                    line.startswith("{") or line.startswith("**") or line.startswith("#")
-                ):
+                elif in_thinking and (line.startswith('{') or line.startswith('**') or line.startswith('#')):
                    # Likely end of thinking, start of structured response
                    in_thinking = False
                    final_lines.append(line)
@ -567,87 +506,84 @@ Guidelines:
                    potential_thinking.append(line)
                else:
                    final_lines.append(line)
-
+            
            if potential_thinking:
-                thinking_stream = "\n".join(potential_thinking).strip()
-                final_response = "\n".join(final_lines).strip()
-
+                thinking_stream = '\n'.join(potential_thinking).strip()
+                final_response = '\n'.join(final_lines).strip()
+        
        return thinking_stream, final_response
-
+    
    def _start_thinking_display(self):
        """Start the thinking stream display."""
        print("\n\033[2m\033[3m💭 AI Thinking:\033[0m")
        print("\033[2m\033[3m" + "─" * 40 + "\033[0m")
        self._thinking_buffer = ""
        self._in_thinking_tags = False
-
+    
    def _stream_thinking_chunk(self, chunk: str):
        """Stream a chunk of thinking as it arrives."""
-
+        import sys
+        
        self._thinking_buffer += chunk
-
+        
        # Check if we're in thinking tags
-        if "<think>" in self._thinking_buffer and not self._in_thinking_tags:
+        if '<think>' in self._thinking_buffer and not self._in_thinking_tags:
            self._in_thinking_tags = True
            # Display everything after <think>
-            start_idx = self._thinking_buffer.find("<think>") + 7
+            start_idx = self._thinking_buffer.find('<think>') + 7
            thinking_content = self._thinking_buffer[start_idx:]
            if thinking_content:
-                print(f"\033[2m\033[3m{thinking_content}\033[0m", end="", flush=True)
-        elif self._in_thinking_tags and "</think>" not in chunk:
+                print(f"\033[2m\033[3m{thinking_content}\033[0m", end='', flush=True)
+        elif self._in_thinking_tags and '</think>' not in chunk:
            # We're in thinking mode, display the chunk
-            print(f"\033[2m\033[3m{chunk}\033[0m", end="", flush=True)
-        elif "</think>" in self._thinking_buffer:
+            print(f"\033[2m\033[3m{chunk}\033[0m", end='', flush=True)
+        elif '</think>' in self._thinking_buffer:
            # End of thinking
            self._in_thinking_tags = False
-
+    
    def _end_thinking_display(self):
        """End the thinking stream display."""
-        print("\n\033[2m\033[3m" + "─" * 40 + "\033[0m")
+        print(f"\n\033[2m\033[3m" + "─" * 40 + "\033[0m")
        print()
-
+    
    def _display_thinking_stream(self, thinking_stream: str):
        """Display thinking stream in light gray and italic (fallback for non-streaming)."""
        if not thinking_stream:
            return
-
+            
        print("\n\033[2m\033[3m💭 AI Thinking:\033[0m")
        print("\033[2m\033[3m" + "─" * 40 + "\033[0m")
-
+        
        # Split into paragraphs and display with proper formatting
-        paragraphs = thinking_stream.split("\n\n")
+        paragraphs = thinking_stream.split('\n\n')
        for para in paragraphs:
            if para.strip():
                # Wrap long lines nicely
-                lines = para.strip().split("\n")
+                lines = para.strip().split('\n')
                for line in lines:
                    if line.strip():
                        # Light gray and italic
                        print(f"\033[2m\033[3m{line}\033[0m")
                print()  # Paragraph spacing
-
+        
        print("\033[2m\033[3m" + "─" * 40 + "\033[0m")
        print()

-
 # Quick test function
-
-
 def test_explorer():
    """Test the code explorer."""
    explorer = CodeExplorer(Path("."))
-
+    
    if not explorer.start_exploration_session():
        print("❌ Could not start exploration session")
        return
-
+        
    # Test question
    response = explorer.explore_question("How does authentication work in this codebase?")
    if response:
        print(response)
-
+        
    print("\n" + explorer.end_session())

-
 if __name__ == "__main__":
-    test_explorer()
+    test_explorer()
--- a/mini_rag/fast_server.py
+++ b/mini_rag/fast_server.py
--- a/mini_rag/indexer.py
+++ b/mini_rag/indexer.py
--- a/mini_rag/llm_safeguards.py
+++ b/mini_rag/llm_safeguards.py
@ -6,173 +6,163 @@ Provides runaway prevention, context management, and intelligent detection
 of problematic model behaviors to ensure reliable user experience.
 """

-import logging
 import re
 import time
+import logging
+from typing import Optional, Dict, List, Tuple
 from dataclasses import dataclass
-from typing import Dict, List, Optional, Tuple

 logger = logging.getLogger(__name__)

-
@dataclass
 class SafeguardConfig:
    """Configuration for LLM safeguards - gentle and educational."""
-
-    max_output_tokens: int = 4000  # Allow longer responses for learning
-    max_repetition_ratio: float = 0.7  # Be very permissive - only catch extreme repetition
-    max_response_time: int = 120  # Allow 2 minutes for complex thinking
-    min_useful_length: int = 10  # Lower threshold - short answers can be useful
-    context_window: int = 32000  # Match Qwen3 context length (32K token limit)
+    max_output_tokens: int = 4000        # Allow longer responses for learning
+    max_repetition_ratio: float = 0.7    # Be very permissive - only catch extreme repetition
+    max_response_time: int = 120         # Allow 2 minutes for complex thinking
+    min_useful_length: int = 10          # Lower threshold - short answers can be useful
+    context_window: int = 32000          # Match Qwen3 context length (32K token limit)
    enable_thinking_detection: bool = True  # Detect thinking patterns
-
-
+    
 class ModelRunawayDetector:
    """Detects and prevents model runaway behaviors."""
-
+    
    def __init__(self, config: SafeguardConfig = None):
        self.config = config or SafeguardConfig()
        self.response_patterns = self._compile_patterns()
-
+    
    def _compile_patterns(self) -> Dict[str, re.Pattern]:
        """Compile regex patterns for runaway detection."""
        return {
            # Excessive repetition patterns
-            "word_repetition": re.compile(r"\b(\w+)\b(?:\s+\1\b){3,}", re.IGNORECASE),
-            "phrase_repetition": re.compile(r"(.{10,50}?)\1{2,}", re.DOTALL),
+            'word_repetition': re.compile(r'\b(\w+)\b(?:\s+\1\b){3,}', re.IGNORECASE),
+            'phrase_repetition': re.compile(r'(.{10,50}?)\1{2,}', re.DOTALL),
+            
            # Thinking loop patterns (small models get stuck)
-            "thinking_loop": re.compile(
-                r"(let me think|i think|thinking|consider|actually|wait|hmm|well)\s*[.,:]*\s*\1",
-                re.IGNORECASE,
-            ),
+            'thinking_loop': re.compile(r'(let me think|i think|thinking|consider|actually|wait|hmm|well)\s*[.,:]*\s*\1', re.IGNORECASE),
+            
            # Rambling patterns
-            "excessive_filler": re.compile(
-                r"\b(um|uh|well|you know|like|basically|actually|so|then|and|but|however)\b(?:\s+[^.!?]*){5,}",
-                re.IGNORECASE,
-            ),
+            'excessive_filler': re.compile(r'\b(um|uh|well|you know|like|basically|actually|so|then|and|but|however)\b(?:\s+[^.!?]*){5,}', re.IGNORECASE),
+            
            # JSON corruption patterns
-            "broken_json": re.compile(r"\{[^}]*\{[^}]*\{"),  # Nested broken JSON
-            "json_repetition": re.compile(
-                r'("[\w_]+"\s*:\s*"[^"]*",?\s*){4,}'
-            ),  # Repeated JSON fields
+            'broken_json': re.compile(r'\{[^}]*\{[^}]*\{'),  # Nested broken JSON
+            'json_repetition': re.compile(r'("[\w_]+"\s*:\s*"[^"]*",?\s*){4,}'),  # Repeated JSON fields
        }
-
-    def check_response_quality(
-        self, response: str, query: str, start_time: float
-    ) -> Tuple[bool, Optional[str], Optional[str]]:
+    
+    def check_response_quality(self, response: str, query: str, start_time: float) -> Tuple[bool, Optional[str], Optional[str]]:
        """
        Check response quality and detect runaway behaviors.
-
+        
        Returns:
            (is_valid, issue_type, user_explanation)
        """
        if not response or len(response.strip()) < self.config.min_useful_length:
            return False, "too_short", self._explain_too_short()
-
+        
        # Check response time
        elapsed = time.time() - start_time
        if elapsed > self.config.max_response_time:
            return False, "timeout", self._explain_timeout()
-
+        
        # Check for repetition issues
        repetition_issue = self._check_repetition(response)
        if repetition_issue:
            return False, repetition_issue, self._explain_repetition(repetition_issue)
-
+        
        # Check for thinking loops
        if self.config.enable_thinking_detection:
            thinking_issue = self._check_thinking_loops(response)
            if thinking_issue:
                return False, thinking_issue, self._explain_thinking_loop()
-
+        
        # Check for rambling
        rambling_issue = self._check_rambling(response)
        if rambling_issue:
            return False, rambling_issue, self._explain_rambling()
-
+        
        # Check JSON corruption (for structured responses)
-        if "{" in response and "}" in response:
+        if '{' in response and '}' in response:
            json_issue = self._check_json_corruption(response)
            if json_issue:
                return False, json_issue, self._explain_json_corruption()
-
+        
        return True, None, None
-
+    
    def _check_repetition(self, response: str) -> Optional[str]:
        """Check for excessive repetition."""
        # Word repetition
-        if self.response_patterns["word_repetition"].search(response):
+        if self.response_patterns['word_repetition'].search(response):
            return "word_repetition"
-
-        # Phrase repetition
-        if self.response_patterns["phrase_repetition"].search(response):
+        
+        # Phrase repetition  
+        if self.response_patterns['phrase_repetition'].search(response):
            return "phrase_repetition"
-
+        
        # Calculate repetition ratio (excluding Qwen3 thinking blocks)
        analysis_text = response
        if "<think>" in response and "</think>" in response:
            # Extract only the actual response (after thinking) for repetition analysis
            thinking_end = response.find("</think>")
            if thinking_end != -1:
-                analysis_text = response[thinking_end + 8 :].strip()
-
+                analysis_text = response[thinking_end + 8:].strip()
+                
                # If the actual response (excluding thinking) is short, don't penalize
                if len(analysis_text.split()) < 20:
                    return None
-
+        
        words = analysis_text.split()
        if len(words) > 10:
            unique_words = set(words)
            repetition_ratio = 1 - (len(unique_words) / len(words))
            if repetition_ratio > self.config.max_repetition_ratio:
                return "high_repetition_ratio"
-
+        
        return None
-
+    
    def _check_thinking_loops(self, response: str) -> Optional[str]:
        """Check for thinking loops (common in small models)."""
-        if self.response_patterns["thinking_loop"].search(response):
+        if self.response_patterns['thinking_loop'].search(response):
            return "thinking_loop"
-
+        
        # Check for excessive meta-commentary
-        thinking_words = ["think", "considering", "actually", "wait", "hmm", "let me"]
+        thinking_words = ['think', 'considering', 'actually', 'wait', 'hmm', 'let me']
        thinking_count = sum(response.lower().count(word) for word in thinking_words)
-
+        
        if thinking_count > 5 and len(response.split()) < 200:
            return "excessive_thinking"
-
+        
        return None
-
+    
    def _check_rambling(self, response: str) -> Optional[str]:
        """Check for rambling or excessive filler."""
-        if self.response_patterns["excessive_filler"].search(response):
+        if self.response_patterns['excessive_filler'].search(response):
            return "excessive_filler"
-
+        
        # Check for extremely long sentences (sign of rambling)
-        sentences = re.split(r"[.!?]+", response)
+        sentences = re.split(r'[.!?]+', response)
        long_sentences = [s for s in sentences if len(s.split()) > 50]
-
+        
        if len(long_sentences) > 2:
            return "excessive_rambling"
-
+        
        return None
-
+    
    def _check_json_corruption(self, response: str) -> Optional[str]:
        """Check for JSON corruption in structured responses."""
-        if self.response_patterns["broken_json"].search(response):
+        if self.response_patterns['broken_json'].search(response):
            return "broken_json"
-
-        if self.response_patterns["json_repetition"].search(response):
+        
+        if self.response_patterns['json_repetition'].search(response):
            return "json_repetition"
-
+        
        return None
-
+    
    def _explain_too_short(self) -> str:
        return """🤔 The AI response was too short to be helpful.

 **Why this happens:**
 • The model might be confused by the query
-• Context might be insufficient
+• Context might be insufficient  
 • Model might be overloaded

 **What to try:**
@ -190,11 +180,11 @@ class ModelRunawayDetector:

 **What to try:**
 • Try a simpler, more direct question
-• Use synthesis mode for faster responses: `--synthesize`
+• Use synthesis mode for faster responses: `--synthesize`  
 • Consider using a larger model if available"""

    def _explain_repetition(self, issue_type: str) -> str:
-        return """🔄 The AI got stuck in repetition loops ({issue_type}).
+        return f"""🔄 The AI got stuck in repetition loops ({issue_type}).

 **Why this happens:**
 • Small models sometimes repeat when uncertain
@ -226,7 +216,7 @@ class ModelRunawayDetector:

 **Why this happens:**
 • Small models sometimes lose focus on complex topics
-• Query might be too broad or vague
+• Query might be too broad or vague  
 • Model trying to cover too much at once

 **What to try:**
@ -243,7 +233,7 @@ class ModelRunawayDetector:
 • Context limits can cause format errors
 • Complex analysis might overwhelm formatting

-**What to try:**
+**What to try:**  
 • Try the question again (often resolves itself)
 • Use simpler questions for better formatting
 • Synthesis mode sometimes gives cleaner output
@ -252,109 +242,90 @@ class ModelRunawayDetector:
    def get_recovery_suggestions(self, issue_type: str, query: str) -> List[str]:
        """Get specific recovery suggestions based on the issue."""
        suggestions = []
-
-        if issue_type in ["thinking_loop", "excessive_thinking"]:
-            suggestions.extend(
-                [
-                    f'Try synthesis mode: `rag-mini search . "{query}" --synthesize`',
-                    "Ask more direct questions without 'why' or 'how'",
-                    "Break complex questions into smaller parts",
-                ]
-            )
-
-        elif issue_type in [
-            "word_repetition",
-            "phrase_repetition",
-            "high_repetition_ratio",
-        ]:
-            suggestions.extend(
-                [
-                    "Try rephrasing your question completely",
-                    "Use more specific technical terms",
-                    "Try exploration mode: `rag-mini explore .`",
-                ]
-            )
-
-        elif issue_type == "timeout":
-            suggestions.extend(
-                [
-                    "Try a simpler version of your question",
-                    "Use synthesis mode for faster responses",
-                    "Check if Ollama is under heavy load",
-                ]
-            )
-
+        
+        if issue_type in ['thinking_loop', 'excessive_thinking']:
+            suggestions.extend([
+                f"Try synthesis mode: `rag-mini search . \"{query}\" --synthesize`",
+                "Ask more direct questions without 'why' or 'how'",
+                "Break complex questions into smaller parts"
+            ])
+        
+        elif issue_type in ['word_repetition', 'phrase_repetition', 'high_repetition_ratio']:
+            suggestions.extend([
+                "Try rephrasing your question completely",
+                "Use more specific technical terms",  
+                f"Try exploration mode: `rag-mini explore .`"
+            ])
+        
+        elif issue_type == 'timeout':
+            suggestions.extend([
+                "Try a simpler version of your question",
+                "Use synthesis mode for faster responses",
+                "Check if Ollama is under heavy load"
+            ])
+        
        # Universal suggestions
-        suggestions.extend(
-            [
-                "Consider using a larger model if available (qwen3:1.7b or qwen3:4b)",
-                "Check model status: `ollama list`",
-            ]
-        )
-
+        suggestions.extend([
+            "Consider using a larger model if available (qwen3:1.7b or qwen3:4b)",
+            "Check model status: `ollama list`"
+        ])
+        
        return suggestions

-
 def get_optimal_ollama_parameters(model_name: str) -> Dict[str, any]:
    """Get optimal parameters for different Ollama models."""
-
+    
    base_params = {
-        "num_ctx": 32768,  # Good context window for most uses
-        "num_predict": 2000,  # Reasonable response length
-        "temperature": 0.3,  # Balanced creativity/consistency
+        "num_ctx": 32768,      # Good context window for most uses
+        "num_predict": 2000,   # Reasonable response length
+        "temperature": 0.3,    # Balanced creativity/consistency
    }
-
+    
    # Model-specific optimizations
    if "qwen3:0.6b" in model_name.lower():
        return {
            **base_params,
-            "repeat_penalty": 1.15,  # Prevent repetition in small model
-            "presence_penalty": 1.5,  # Suppress repetitive outputs
-            "top_p": 0.8,  # Focused sampling
-            "top_k": 20,  # Limit choices
-            "num_predict": 1500,  # Shorter responses for reliability
+            "repeat_penalty": 1.15,      # Prevent repetition in small model
+            "presence_penalty": 1.5,     # Suppress repetitive outputs 
+            "top_p": 0.8,               # Focused sampling
+            "top_k": 20,                # Limit choices
+            "num_predict": 1500,        # Shorter responses for reliability
        }
-
+    
    elif "qwen3:1.7b" in model_name.lower():
        return {
            **base_params,
-            "repeat_penalty": 1.1,  # Less aggressive for larger model
-            "presence_penalty": 1.0,  # Balanced
-            "top_p": 0.9,  # More creative
-            "top_k": 40,  # More choices
+            "repeat_penalty": 1.1,       # Less aggressive for larger model
+            "presence_penalty": 1.0,     # Balanced
+            "top_p": 0.9,               # More creative
+            "top_k": 40,                # More choices
        }
-
+    
    elif any(size in model_name.lower() for size in ["3b", "7b", "8b"]):
        return {
            **base_params,
-            "repeat_penalty": 1.05,  # Minimal for larger models
-            "presence_penalty": 0.5,  # Light touch
-            "top_p": 0.95,  # High creativity
-            "top_k": 50,  # Many choices
-            "num_predict": 3000,  # Longer responses OK
+            "repeat_penalty": 1.05,      # Minimal for larger models
+            "presence_penalty": 0.5,     # Light touch
+            "top_p": 0.95,              # High creativity
+            "top_k": 50,                # Many choices
+            "num_predict": 3000,        # Longer responses OK
        }
-
+    
    return base_params

-
 # Quick test
-
-
 def test_safeguards():
    """Test the safeguard system."""
    detector = ModelRunawayDetector()
-
+    
    # Test repetition detection
    bad_response = "The user authentication system works by checking user credentials. The user authentication system works by checking user credentials. The user authentication system works by checking user credentials."
-
-    is_valid, issue, explanation = detector.check_response_quality(
-        bad_response, "auth", time.time()
-    )
-
+    
+    is_valid, issue, explanation = detector.check_response_quality(bad_response, "auth", time.time())
+    
    print(f"Repetition test: Valid={is_valid}, Issue={issue}")
    if explanation:
        print(explanation)

-
 if __name__ == "__main__":
-    test_safeguards()
+    test_safeguards()
--- a/mini_rag/llm_synthesizer.py
+++ b/mini_rag/llm_synthesizer.py
--- a/mini_rag/non_invasive_watcher.py
+++ b/mini_rag/non_invasive_watcher.py
@ -3,16 +3,16 @@ Non-invasive file watcher designed to not interfere with development workflows.
 Uses minimal resources and gracefully handles high-load scenarios.
 """

-import logging
-import queue
-import threading
+import os
 import time
-from datetime import datetime
+import logging
+import threading
+import queue
 from pathlib import Path
 from typing import Optional, Set
-
-from watchdog.events import DirModifiedEvent, FileSystemEventHandler
+from datetime import datetime
 from watchdog.observers import Observer
+from watchdog.events import FileSystemEventHandler, DirModifiedEvent

 from .indexer import ProjectIndexer

@ -21,7 +21,7 @@ logger = logging.getLogger(__name__)

 class NonInvasiveQueue:
    """Ultra-lightweight queue with aggressive deduplication and backoff."""
-
+    
    def __init__(self, delay: float = 5.0, max_queue_size: int = 100):
        self.queue = queue.Queue(maxsize=max_queue_size)
        self.pending = set()
@ -29,28 +29,28 @@ class NonInvasiveQueue:
        self.delay = delay
        self.last_update = {}
        self.dropped_count = 0
-
+    
    def add(self, file_path: Path) -> bool:
        """Add file to queue with aggressive filtering."""
        with self.lock:
            file_str = str(file_path)
            current_time = time.time()
-
+            
            # Skip if recently processed
            if file_str in self.last_update:
                if current_time - self.last_update[file_str] < self.delay:
                    return False
-
+            
            # Skip if already pending
            if file_str in self.pending:
                return False
-
+            
            # Skip if queue is getting full (backpressure)
            if self.queue.qsize() > self.queue.maxsize * 0.8:
                self.dropped_count += 1
                logger.debug(f"Dropping update for {file_str} - queue overloaded")
                return False
-
+            
            try:
                self.queue.put_nowait(file_path)
                self.pending.add(file_str)
@ -59,7 +59,7 @@ class NonInvasiveQueue:
            except queue.Full:
                self.dropped_count += 1
                return False
-
+    
    def get(self, timeout: float = 0.1) -> Optional[Path]:
        """Get next file with very short timeout."""
        try:
@ -73,87 +73,77 @@ class NonInvasiveQueue:

 class MinimalEventHandler(FileSystemEventHandler):
    """Minimal event handler that only watches for meaningful changes."""
-
-    def __init__(
-        self,
-        update_queue: NonInvasiveQueue,
-        include_patterns: Set[str],
-        exclude_patterns: Set[str],
-    ):
+    
+    def __init__(self, 
+                 update_queue: NonInvasiveQueue,
+                 include_patterns: Set[str],
+                 exclude_patterns: Set[str]):
        self.update_queue = update_queue
        self.include_patterns = include_patterns
        self.exclude_patterns = exclude_patterns
        self.last_event_time = {}
-
+        
    def _should_process(self, file_path: str) -> bool:
        """Ultra-conservative file filtering."""
        path = Path(file_path)
-
+        
        # Only process files, not directories
        if not path.is_file():
            return False
-
+        
        # Skip if too large (>1MB)
        try:
            if path.stat().st_size > 1024 * 1024:
                return False
        except (OSError, PermissionError):
            return False
-
+        
        # Skip temporary and system files
        name = path.name
-        if (
-            name.startswith(".")
-            or name.startswith("~")
-            or name.endswith(".tmp")
-            or name.endswith(".swp")
-            or name.endswith(".lock")
-        ):
+        if (name.startswith('.') or 
+            name.startswith('~') or 
+            name.endswith('.tmp') or
+            name.endswith('.swp') or
+            name.endswith('.lock')):
            return False
-
+        
        # Check exclude patterns first (faster)
        path_str = str(path)
        for pattern in self.exclude_patterns:
            if pattern in path_str:
                return False
-
+        
        # Check include patterns
        for pattern in self.include_patterns:
            if path.match(pattern):
                return True
-
+        
        return False
-
+    
    def _rate_limit_event(self, file_path: str) -> bool:
        """Rate limit events per file."""
        current_time = time.time()
        if file_path in self.last_event_time:
-            if (
-                current_time - self.last_event_time[file_path] < 2.0
-            ):  # 2 second cooldown per file
+            if current_time - self.last_event_time[file_path] < 2.0:  # 2 second cooldown per file
                return False
-
+        
        self.last_event_time[file_path] = current_time
        return True
-
+    
    def on_modified(self, event):
        """Handle file modifications with minimal overhead."""
-        if (
-            not event.is_directory
-            and self._should_process(event.src_path)
-            and self._rate_limit_event(event.src_path)
-        ):
+        if (not event.is_directory and 
+            self._should_process(event.src_path) and
+            self._rate_limit_event(event.src_path)):
            self.update_queue.add(Path(event.src_path))
-
+    
    def on_created(self, event):
        """Handle file creation."""
-        if (
-            not event.is_directory
-            and self._should_process(event.src_path)
-            and self._rate_limit_event(event.src_path)
-        ):
+        if (not event.is_directory and 
+            self._should_process(event.src_path) and
+            self._rate_limit_event(event.src_path)):
            self.update_queue.add(Path(event.src_path))
-
+    
    def on_deleted(self, event):
        """Handle file deletion."""
        if not event.is_directory and self._rate_limit_event(event.src_path):
@ -167,17 +157,15 @@ class MinimalEventHandler(FileSystemEventHandler):

 class NonInvasiveFileWatcher:
    """Non-invasive file watcher that prioritizes system stability."""
-
-    def __init__(
-        self,
-        project_path: Path,
-        indexer: Optional[ProjectIndexer] = None,
-        cpu_limit: float = 0.1,  # Max 10% CPU usage
-        max_memory_mb: int = 50,
-    ):  # Max 50MB memory
+    
+    def __init__(self, 
+                 project_path: Path,
+                 indexer: Optional[ProjectIndexer] = None,
+                 cpu_limit: float = 0.1,  # Max 10% CPU usage
+                 max_memory_mb: int = 50):  # Max 50MB memory
        """
        Initialize non-invasive watcher.
-
+        
        Args:
            project_path: Path to watch
            indexer: ProjectIndexer instance
@ -188,173 +176,158 @@ class NonInvasiveFileWatcher:
        self.indexer = indexer or ProjectIndexer(self.project_path)
        self.cpu_limit = cpu_limit
        self.max_memory_mb = max_memory_mb
-
+        
        # Initialize components with conservative settings
-        self.update_queue = NonInvasiveQueue(
-            delay=10.0, max_queue_size=50
-        )  # Very conservative
+        self.update_queue = NonInvasiveQueue(delay=10.0, max_queue_size=50)  # Very conservative
        self.observer = Observer()
        self.worker_thread = None
        self.running = False
-
+        
        # Get patterns from indexer
        self.include_patterns = set(self.indexer.include_patterns)
        self.exclude_patterns = set(self.indexer.exclude_patterns)
-
+        
        # Add more aggressive exclusions
-        self.exclude_patterns.update(
-            {
-                "__pycache__",
-                ".git",
-                "node_modules",
-                ".venv",
-                "venv",
-                "dist",
-                "build",
-                "target",
-                ".idea",
-                ".vscode",
-                ".pytest_cache",
-                "coverage",
-                "htmlcov",
-                ".coverage",
-                ".mypy_cache",
-                ".tox",
-                "logs",
-                "log",
-                "tmp",
-                "temp",
-                ".DS_Store",
-            }
-        )
-
+        self.exclude_patterns.update({
+            '__pycache__', '.git', 'node_modules', '.venv', 'venv',
+            'dist', 'build', 'target', '.idea', '.vscode', '.pytest_cache',
+            'coverage', 'htmlcov', '.coverage', '.mypy_cache', '.tox',
+            'logs', 'log', 'tmp', 'temp', '.DS_Store'
+        })
+        
        # Stats
        self.stats = {
-            "files_processed": 0,
-            "files_dropped": 0,
-            "cpu_throttle_count": 0,
-            "started_at": None,
+            'files_processed': 0,
+            'files_dropped': 0,
+            'cpu_throttle_count': 0,
+            'started_at': None,
        }
-
+    
    def start(self):
        """Start non-invasive watching."""
        if self.running:
            return
-
+        
        logger.info(f"Starting non-invasive file watcher for {self.project_path}")
-
+        
        # Set up minimal event handler
        event_handler = MinimalEventHandler(
-            self.update_queue, self.include_patterns, self.exclude_patterns
+            self.update_queue,
+            self.include_patterns,
+            self.exclude_patterns
        )
-
+        
        # Schedule with recursive watching
-        self.observer.schedule(event_handler, str(self.project_path), recursive=True)
-
+        self.observer.schedule(
+            event_handler,
+            str(self.project_path),
+            recursive=True
+        )
+        
        # Start low-priority worker thread
        self.running = True
        self.worker_thread = threading.Thread(
-            target=self._process_updates_gently, daemon=True, name="RAG-FileWatcher"
+            target=self._process_updates_gently,
+            daemon=True,
+            name="RAG-FileWatcher"
        )
        # Set lowest priority
        self.worker_thread.start()
-
+        
        # Start observer
        self.observer.start()
-
-        self.stats["started_at"] = datetime.now()
+        
+        self.stats['started_at'] = datetime.now()
        logger.info("Non-invasive file watcher started")
-
+    
    def stop(self):
        """Stop watching gracefully."""
        if not self.running:
            return
-
+        
        logger.info("Stopping non-invasive file watcher...")
-
+        
        # Stop observer first
        self.observer.stop()
        self.observer.join(timeout=2.0)  # Don't wait too long
-
+        
        # Stop worker thread
        self.running = False
        if self.worker_thread and self.worker_thread.is_alive():
            self.worker_thread.join(timeout=3.0)  # Don't block shutdown
-
+        
        logger.info("Non-invasive file watcher stopped")
-
+    
    def _process_updates_gently(self):
        """Process updates with extreme care not to interfere."""
        logger.debug("Non-invasive update processor started")
-
+        
        process_start_time = time.time()
-
+        
        while self.running:
            try:
                # Yield CPU frequently
                time.sleep(0.5)  # Always sleep between operations
-
+                
                # Get next file with very short timeout
                file_path = self.update_queue.get(timeout=0.1)
-
+                
                if file_path:
                    # Check CPU usage before processing
                    current_time = time.time()
                    elapsed = current_time - process_start_time
-
+                    
                    # Simple CPU throttling: if we've been working too much, back off
                    if elapsed > 0:
                        # If we're consuming too much time, throttle aggressively
                        work_ratio = 0.1  # Assume we use 10% of time in this check
                        if work_ratio > self.cpu_limit:
-                            self.stats["cpu_throttle_count"] += 1
+                            self.stats['cpu_throttle_count'] += 1
                            time.sleep(2.0)  # Back off significantly
                            continue
-
+                    
                    # Process single file with error isolation
                    try:
                        if file_path.exists():
                            success = self.indexer.update_file(file_path)
                        else:
                            success = self.indexer.delete_file(file_path)
-
+                        
                        if success:
-                            self.stats["files_processed"] += 1
-
+                            self.stats['files_processed'] += 1
+                        
                        # Always yield CPU after processing
                        time.sleep(0.1)
-
+                        
                    except Exception as e:
-                        logger.debug(
-                            f"Non-invasive watcher: failed to process {file_path}: {e}"
-                        )
+                        logger.debug(f"Non-invasive watcher: failed to process {file_path}: {e}")
                        # Don't let errors propagate - just continue
                        continue
-
+                
                # Update dropped count from queue
-                self.stats["files_dropped"] = self.update_queue.dropped_count
-
+                self.stats['files_dropped'] = self.update_queue.dropped_count
+                
            except Exception as e:
                logger.debug(f"Non-invasive watcher error: {e}")
                time.sleep(1.0)  # Back off on errors
-
+        
        logger.debug("Non-invasive update processor stopped")
-
+    
    def get_statistics(self) -> dict:
        """Get non-invasive watcher statistics."""
        stats = self.stats.copy()
-        stats["queue_size"] = self.update_queue.queue.qsize()
-        stats["running"] = self.running
-
-        if stats["started_at"]:
-            uptime = datetime.now() - stats["started_at"]
-            stats["uptime_seconds"] = uptime.total_seconds()
-
+        stats['queue_size'] = self.update_queue.queue.qsize()
+        stats['running'] = self.running
+        
+        if stats['started_at']:
+            uptime = datetime.now() - stats['started_at']
+            stats['uptime_seconds'] = uptime.total_seconds()
+        
        return stats
-
+    
    def __enter__(self):
        self.start()
        return self
-
+    
    def __exit__(self, exc_type, exc_val, exc_tb):
-        self.stop()
+        self.stop()
--- a/mini_rag/ollama_embeddings.py
+++ b/mini_rag/ollama_embeddings.py
@ -3,14 +3,15 @@ Hybrid code embedding module - Ollama primary with ML fallback.
 Tries Ollama first, falls back to local ML stack if needed.
 """

-import logging
-import time
-from concurrent.futures import ThreadPoolExecutor
-from functools import lru_cache
-from typing import Any, Dict, List, Optional, Union
-
-import numpy as np
 import requests
+import numpy as np
+from typing import List, Union, Optional, Dict, Any
+import logging
+from functools import lru_cache
+import time
+import json
+from concurrent.futures import ThreadPoolExecutor
+import threading

 logger = logging.getLogger(__name__)

@ -18,9 +19,8 @@ logger = logging.getLogger(__name__)
 FALLBACK_AVAILABLE = False
 try:
    import torch
+    from transformers import AutoTokenizer, AutoModel
    from sentence_transformers import SentenceTransformer
-    from transformers import AutoModel, AutoTokenizer
-
    FALLBACK_AVAILABLE = True
    logger.debug("ML fallback dependencies available")
 except ImportError:
@ -29,16 +29,12 @@ except ImportError:

 class OllamaEmbedder:
    """Hybrid embeddings: Ollama primary with ML fallback."""
-
-    def __init__(
-        self,
-        model_name: str = "nomic-embed-text:latest",
-        base_url: str = "http://localhost:11434",
-        enable_fallback: bool = True,
-    ):
+    
+    def __init__(self, model_name: str = "nomic-embed-text:latest", base_url: str = "http://localhost:11434", 
+                 enable_fallback: bool = True):
        """
        Initialize the hybrid embedder.
-
+        
        Args:
            model_name: Ollama model to use for embeddings
            base_url: Base URL for Ollama API
@ -48,15 +44,15 @@ class OllamaEmbedder:
        self.base_url = base_url
        self.embedding_dim = 768  # Standard for nomic-embed-text
        self.enable_fallback = enable_fallback and FALLBACK_AVAILABLE
-
+        
        # State tracking
        self.ollama_available = False
        self.fallback_embedder = None
        self.mode = "unknown"  # "ollama", "fallback", or "hash"
-
+        
        # Try to initialize Ollama first
        self._initialize_providers()
-
+        
    def _initialize_providers(self):
        """Initialize embedding providers in priority order."""
        # Try Ollama first
@ -68,15 +64,13 @@ class OllamaEmbedder:
        except Exception as e:
            logger.debug(f"Ollama not available: {e}")
            self.ollama_available = False
-
+            
            # Try ML fallback
            if self.enable_fallback:
                try:
                    self._initialize_fallback_embedder()
                    self.mode = "fallback"
-                    logger.info(
-                        f"✅ ML fallback active: {self.fallback_embedder.model_type if hasattr(self.fallback_embedder, 'model_type') else 'transformer'}"
-                    )
+                    logger.info(f"✅ ML fallback active: {self.fallback_embedder.model_type if hasattr(self.fallback_embedder, 'model_type') else 'transformer'}")
                except Exception as fallback_error:
                    logger.warning(f"ML fallback failed: {fallback_error}")
                    self.mode = "hash"
@ -84,7 +78,7 @@ class OllamaEmbedder:
            else:
                self.mode = "hash"
                logger.info("⚠️ Using hash-based embeddings (no fallback enabled)")
-
+    
    def _verify_ollama_connection(self):
        """Verify Ollama server is running and model is available."""
        try:
@ -99,17 +93,17 @@ class OllamaEmbedder:
            print()
            raise ConnectionError("Ollama service not running. Start with: ollama serve")
        except requests.exceptions.Timeout:
-            print("⏱️ Ollama Service Timeout")
+            print("⏱️ Ollama Service Timeout")  
            print("   Ollama is taking too long to respond")
            print("   Check if Ollama is overloaded: ollama ps")
            print("   Restart if needed: killall ollama && ollama serve")
            print()
            raise ConnectionError("Ollama service timeout")
-
+        
        # Check if our model is available
-        models = response.json().get("models", [])
-        model_names = [model["name"] for model in models]
-
+        models = response.json().get('models', [])
+        model_names = [model['name'] for model in models]
+        
        if self.model_name not in model_names:
            print(f"📦 Model '{self.model_name}' Not Found")
            print("   Embedding models convert text into searchable vectors")
@ -119,23 +113,19 @@ class OllamaEmbedder:
            print()
            # Try to pull the model
            self._pull_model()
-
+        
    def _initialize_fallback_embedder(self):
        """Initialize the ML fallback embedder."""
        if not FALLBACK_AVAILABLE:
            raise RuntimeError("ML dependencies not available for fallback")
-
+        
        # Try lightweight models first for better compatibility
        fallback_models = [
-            (
-                "sentence-transformers/all-MiniLM-L6-v2",
-                384,
-                self._init_sentence_transformer,
-            ),
+            ("sentence-transformers/all-MiniLM-L6-v2", 384, self._init_sentence_transformer),
            ("microsoft/codebert-base", 768, self._init_transformer_model),
            ("microsoft/unixcoder-base", 768, self._init_transformer_model),
        ]
-
+        
        for model_name, dim, init_func in fallback_models:
            try:
                init_func(model_name)
@ -145,33 +135,31 @@ class OllamaEmbedder:
            except Exception as e:
                logger.debug(f"Failed to load {model_name}: {e}")
                continue
-
+                
        raise RuntimeError("Could not initialize any fallback embedding model")
-
+    
    def _init_sentence_transformer(self, model_name: str):
        """Initialize sentence-transformers model."""
        self.fallback_embedder = SentenceTransformer(model_name)
-        self.fallback_embedder.model_type = "sentence_transformer"
-
+        self.fallback_embedder.model_type = 'sentence_transformer'
+        
    def _init_transformer_model(self, model_name: str):
        """Initialize transformer model."""
-        device = "cuda" if torch.cuda.is_available() else "cpu"
+        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModel.from_pretrained(model_name).to(device)
        model.eval()
-
+        
        # Create a simple wrapper
-
        class TransformerWrapper:
-
            def __init__(self, model, tokenizer, device):
                self.model = model
                self.tokenizer = tokenizer
                self.device = device
-                self.model_type = "transformer"
-
+                self.model_type = 'transformer'
+        
        self.fallback_embedder = TransformerWrapper(model, tokenizer, device)
-
+    
    def _pull_model(self):
        """Pull the embedding model if not available."""
        logger.info(f"Pulling model {self.model_name}...")
@ -179,13 +167,13 @@ class OllamaEmbedder:
            response = requests.post(
                f"{self.base_url}/api/pull",
                json={"name": self.model_name},
-                timeout=300,  # 5 minutes for model download
+                timeout=300  # 5 minutes for model download
            )
            response.raise_for_status()
            logger.info(f"Successfully pulled {self.model_name}")
        except requests.exceptions.RequestException as e:
            raise RuntimeError(f"Failed to pull model {self.model_name}: {e}")
-
+    
    def _get_embedding(self, text: str) -> np.ndarray:
        """Get embedding using the best available provider."""
        if self.mode == "ollama" and self.ollama_available:
@ -195,25 +183,28 @@ class OllamaEmbedder:
        else:
            # Hash fallback
            return self._hash_embedding(text)
-
+    
    def _get_ollama_embedding(self, text: str) -> np.ndarray:
        """Get embedding from Ollama API."""
        try:
            response = requests.post(
                f"{self.base_url}/api/embeddings",
-                json={"model": self.model_name, "prompt": text},
-                timeout=30,
+                json={
+                    "model": self.model_name,
+                    "prompt": text
+                },
+                timeout=30
            )
            response.raise_for_status()
-
+            
            result = response.json()
-            embedding = result.get("embedding", [])
-
+            embedding = result.get('embedding', [])
+            
            if not embedding:
                raise ValueError("No embedding returned from Ollama")
-
+            
            return np.array(embedding, dtype=np.float32)
-
+            
        except requests.exceptions.RequestException as e:
            logger.error(f"Ollama API request failed: {e}")
            # Degrade gracefully - try fallback if available
@ -225,88 +216,82 @@ class OllamaEmbedder:
        except (ValueError, KeyError) as e:
            logger.error(f"Invalid response from Ollama: {e}")
            return self._hash_embedding(text)
-
+    
    def _get_fallback_embedding(self, text: str) -> np.ndarray:
        """Get embedding from ML fallback."""
        try:
-            if self.fallback_embedder.model_type == "sentence_transformer":
+            if self.fallback_embedder.model_type == 'sentence_transformer':
                embedding = self.fallback_embedder.encode([text], convert_to_numpy=True)[0]
                return embedding.astype(np.float32)
-
-            elif self.fallback_embedder.model_type == "transformer":
+            
+            elif self.fallback_embedder.model_type == 'transformer':
                # Tokenize and generate embedding
                inputs = self.fallback_embedder.tokenizer(
-                    text,
-                    padding=True,
-                    truncation=True,
+                    text, 
+                    padding=True, 
+                    truncation=True, 
                    max_length=512,
-                    return_tensors="pt",
+                    return_tensors="pt"
                ).to(self.fallback_embedder.device)
-
+                
                with torch.no_grad():
                    outputs = self.fallback_embedder.model(**inputs)
-
+                    
                    # Use pooler output if available, otherwise mean pooling
-                    if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
+                    if hasattr(outputs, 'pooler_output') and outputs.pooler_output is not None:
                        embedding = outputs.pooler_output[0]
                    else:
                        # Mean pooling over sequence length
-                        attention_mask = inputs["attention_mask"]
+                        attention_mask = inputs['attention_mask']
                        token_embeddings = outputs.last_hidden_state[0]
-
+                        
                        # Mask and average
-                        input_mask_expanded = (
-                            attention_mask.unsqueeze(-1)
-                            .expand(token_embeddings.size())
-                            .float()
-                        )
+                        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
                        sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 0)
                        sum_mask = torch.clamp(input_mask_expanded.sum(0), min=1e-9)
                        embedding = sum_embeddings / sum_mask
-
+                
                return embedding.cpu().numpy().astype(np.float32)
-
+            
            else:
-                raise ValueError(
-                    f"Unknown fallback model type: {self.fallback_embedder.model_type}"
-                )
-
+                raise ValueError(f"Unknown fallback model type: {self.fallback_embedder.model_type}")
+                
        except Exception as e:
            logger.error(f"Fallback embedding failed: {e}")
            return self._hash_embedding(text)
-
+    
    def _hash_embedding(self, text: str) -> np.ndarray:
        """Generate deterministic hash-based embedding as fallback."""
        import hashlib
-
+        
        # Create deterministic hash
-        hash_obj = hashlib.sha256(text.encode("utf-8"))
+        hash_obj = hashlib.sha256(text.encode('utf-8'))
        hash_bytes = hash_obj.digest()
-
+        
        # Convert to numbers and normalize
        hash_nums = np.frombuffer(hash_bytes, dtype=np.uint8)
-
+        
        # Expand to target dimension using repetition
        while len(hash_nums) < self.embedding_dim:
            hash_nums = np.concatenate([hash_nums, hash_nums])
-
+        
        # Take exactly the dimension we need
-        embedding = hash_nums[: self.embedding_dim].astype(np.float32)
-
+        embedding = hash_nums[:self.embedding_dim].astype(np.float32)
+        
        # Normalize to [-1, 1] range
        embedding = (embedding / 127.5) - 1.0
-
+        
        logger.debug(f"Using hash fallback embedding for text: {text[:50]}...")
        return embedding
-
+    
    def embed_code(self, code: Union[str, List[str]], language: str = "python") -> np.ndarray:
        """
        Generate embeddings for code snippet(s).
-
+        
        Args:
            code: Single code string or list of code strings
            language: Programming language (used for context)
-
+            
        Returns:
            Embedding vector(s) as numpy array
        """
@ -315,22 +300,22 @@ class OllamaEmbedder:
            single_input = True
        else:
            single_input = False
-
+        
        # Preprocess code for better embeddings
        processed_code = [self._preprocess_code(c, language) for c in code]
-
+        
        # Generate embeddings
        embeddings = []
        for text in processed_code:
            embedding = self._get_embedding(text)
            embeddings.append(embedding)
-
+        
        embeddings = np.array(embeddings, dtype=np.float32)
-
+        
        if single_input:
            return embeddings[0]
        return embeddings
-
+    
    def _preprocess_code(self, code: str, language: str = "python") -> str:
        """
        Preprocess code for better embedding quality.
@ -338,25 +323,25 @@ class OllamaEmbedder:
        """
        # Remove leading/trailing whitespace
        code = code.strip()
-
+        
        # Normalize whitespace but preserve structure
-        lines = code.split("\n")
+        lines = code.split('\n')
        processed_lines = []
-
+        
        for line in lines:
            # Remove trailing whitespace
            line = line.rstrip()
            # Keep non-empty lines
            if line:
                processed_lines.append(line)
-
-        cleaned_code = "\n".join(processed_lines)
-
+        
+        cleaned_code = '\n'.join(processed_lines)
+        
        # Add language context for better embeddings
        if language and cleaned_code:
            return f"```{language}\n{cleaned_code}\n```"
        return cleaned_code
-
+    
    @lru_cache(maxsize=1000)
    def embed_query(self, query: str) -> np.ndarray:
        """
@ -366,151 +351,149 @@ class OllamaEmbedder:
        # Enhance query for code search
        enhanced_query = f"Search for code related to: {query}"
        return self._get_embedding(enhanced_query)
-
+    
    def batch_embed_files(self, file_contents: List[dict], max_workers: int = 4) -> List[dict]:
        """
        Embed multiple files efficiently using concurrent requests to Ollama.
-
+        
        Args:
            file_contents: List of dicts with 'content' and optionally 'language' keys
            max_workers: Maximum number of concurrent Ollama requests
-
+            
        Returns:
            List of dicts with added 'embedding' key (preserves original order)
        """
        if not file_contents:
            return []
-
+        
        # For small batches, use sequential processing to avoid overhead
        if len(file_contents) <= 2:
            return self._batch_embed_sequential(file_contents)
-
+        
        # For very large batches, use chunked processing to prevent memory issues
        if len(file_contents) > 500:  # Process in chunks to manage memory
            return self._batch_embed_chunked(file_contents, max_workers)
-
+        
        return self._batch_embed_concurrent(file_contents, max_workers)
-
+    
    def _batch_embed_sequential(self, file_contents: List[dict]) -> List[dict]:
        """Sequential processing for small batches."""
        results = []
        for file_dict in file_contents:
-            content = file_dict["content"]
-            language = file_dict.get("language", "python")
+            content = file_dict['content']
+            language = file_dict.get('language', 'python')
            embedding = self.embed_code(content, language)
-
+            
            result = file_dict.copy()
-            result["embedding"] = embedding
+            result['embedding'] = embedding
            results.append(result)
-
+        
        return results
-
-    def _batch_embed_concurrent(
-        self, file_contents: List[dict], max_workers: int
-    ) -> List[dict]:
+    
+    def _batch_embed_concurrent(self, file_contents: List[dict], max_workers: int) -> List[dict]:
        """Concurrent processing for larger batches."""
-
        def embed_single(item_with_index):
            index, file_dict = item_with_index
-            content = file_dict["content"]
-            language = file_dict.get("language", "python")
-
+            content = file_dict['content']
+            language = file_dict.get('language', 'python')
+            
            try:
                embedding = self.embed_code(content, language)
                result = file_dict.copy()
-                result["embedding"] = embedding
+                result['embedding'] = embedding
                return index, result
            except Exception as e:
                logger.error(f"Failed to embed content at index {index}: {e}")
                # Return with hash fallback
                result = file_dict.copy()
-                result["embedding"] = self._hash_embedding(content)
+                result['embedding'] = self._hash_embedding(content)
                return index, result
-
+        
        # Create indexed items to preserve order
        indexed_items = list(enumerate(file_contents))
-
+        
        # Process concurrently
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            indexed_results = list(executor.map(embed_single, indexed_items))
-
+        
        # Sort by original index and extract results
        indexed_results.sort(key=lambda x: x[0])
        return [result for _, result in indexed_results]
-
-    def _batch_embed_chunked(
-        self, file_contents: List[dict], max_workers: int, chunk_size: int = 200
-    ) -> List[dict]:
+    
+    def _batch_embed_chunked(self, file_contents: List[dict], max_workers: int, chunk_size: int = 200) -> List[dict]:
        """
        Process very large batches in smaller chunks to prevent memory issues.
        This is important for beginners who might try to index huge projects.
        """
        results = []
        total_chunks = len(file_contents)
-
+        
        # Process in chunks
        for i in range(0, len(file_contents), chunk_size):
-            chunk = file_contents[i : i + chunk_size]
-
+            chunk = file_contents[i:i + chunk_size]
+            
            # Log progress for large operations
            if total_chunks > chunk_size:
                chunk_num = i // chunk_size + 1
                total_chunk_count = (total_chunks + chunk_size - 1) // chunk_size
-                logger.info(
-                    f"Processing chunk {chunk_num}/{total_chunk_count} ({len(chunk)} files)"
-                )
-
+                logger.info(f"Processing chunk {chunk_num}/{total_chunk_count} ({len(chunk)} files)")
+            
            # Process this chunk using concurrent method
            chunk_results = self._batch_embed_concurrent(chunk, max_workers)
            results.extend(chunk_results)
-
+            
            # Brief pause between chunks to prevent overwhelming the system
            if i + chunk_size < len(file_contents):
-
+                import time
                time.sleep(0.1)  # 100ms pause between chunks
-
+        
        return results
-
+    
    def get_embedding_dim(self) -> int:
        """Return the dimension of embeddings produced by this model."""
        return self.embedding_dim
-
+    
    def get_mode(self) -> str:
        """Return current embedding mode: 'ollama', 'fallback', or 'hash'."""
        return self.mode
-
+    
    def get_status(self) -> Dict[str, Any]:
        """Get detailed status of the embedding system."""
        return {
            "mode": self.mode,
            "ollama_available": self.ollama_available,
            "fallback_available": FALLBACK_AVAILABLE and self.enable_fallback,
-            "fallback_model": (
-                getattr(self.fallback_embedder, "model_type", None)
-                if self.fallback_embedder
-                else None
-            ),
+            "fallback_model": getattr(self.fallback_embedder, 'model_type', None) if self.fallback_embedder else None,
            "embedding_dim": self.embedding_dim,
            "ollama_model": self.model_name if self.mode == "ollama" else None,
-            "ollama_url": self.base_url if self.mode == "ollama" else None,
+            "ollama_url": self.base_url if self.mode == "ollama" else None
        }
-
+    
    def get_embedding_info(self) -> Dict[str, str]:
        """Get human-readable embedding system information for installer."""
        status = self.get_status()
        mode = status.get("mode", "unknown")
        if mode == "ollama":
-            return {"method": f"Ollama ({status['ollama_model']})", "status": "working"}
+            return {
+                "method": f"Ollama ({status['ollama_model']})",
+                "status": "working"
+            }
        # Treat legacy/alternate naming uniformly
        if mode in ("fallback", "ml"):
            return {
                "method": f"ML Fallback ({status['fallback_model']})",
-                "status": "working",
+                "status": "working"
            }
        if mode == "hash":
-            return {"method": "Hash-based (basic similarity)", "status": "working"}
-        return {"method": "Unknown", "status": "error"}
-
+            return {
+                "method": "Hash-based (basic similarity)",
+                "status": "working"
+            }
+        return {
+            "method": "Unknown",
+            "status": "error"
+        }
+    
    def warmup(self):
        """Warm up the embedding system with a dummy request."""
        dummy_code = "def hello(): pass"
@ -520,18 +503,14 @@ class OllamaEmbedder:


 # Convenience function for quick embedding
-
-
-def embed_code(
-    code: Union[str, List[str]], model_name: str = "nomic-embed-text:latest"
-) -> np.ndarray:
+def embed_code(code: Union[str, List[str]], model_name: str = "nomic-embed-text:latest") -> np.ndarray:
    """
    Quick function to embed code without managing embedder instance.
-
+    
    Args:
        code: Code string(s) to embed
        model_name: Ollama model name to use
-
+        
    Returns:
        Embedding vector(s)
    """
@ -540,4 +519,4 @@ def embed_code(


 # Compatibility alias for drop-in replacement
-CodeEmbedder = OllamaEmbedder
+CodeEmbedder = OllamaEmbedder
--- a/mini_rag/path_handler.py
+++ b/mini_rag/path_handler.py
@ -4,50 +4,51 @@ Handles forward/backward slashes on any file system.
 Robust cross-platform path handling.
 """

+import os
 import sys
 from pathlib import Path
-from typing import List, Union
+from typing import Union, List


 def normalize_path(path: Union[str, Path]) -> str:
    """
    Normalize a path to always use forward slashes.
    This ensures consistency across platforms in storage.
-
+    
    Args:
        path: Path as string or Path object
-
+        
    Returns:
        Path string with forward slashes
    """
    # Convert to Path object first
    path_obj = Path(path)
-
+    
    # Convert to string and replace backslashes
-    path_str = str(path_obj).replace("\\", "/")
-
+    path_str = str(path_obj).replace('\\', '/')
+    
    # Handle UNC paths on Windows
-    if sys.platform == "win32" and path_str.startswith("//"):
+    if sys.platform == 'win32' and path_str.startswith('//'):
        # Keep UNC paths as they are
        return path_str
-
+    
    return path_str


 def normalize_relative_path(path: Union[str, Path], base: Union[str, Path]) -> str:
    """
    Get a normalized relative path.
-
+    
    Args:
        path: Path to make relative
        base: Base path to be relative to
-
+        
    Returns:
        Relative path with forward slashes
    """
    path_obj = Path(path).resolve()
    base_obj = Path(base).resolve()
-
+    
    try:
        rel_path = path_obj.relative_to(base_obj)
        return normalize_path(rel_path)
@ -60,10 +61,10 @@ def denormalize_path(path_str: str) -> Path:
    """
    Convert a normalized path string back to a Path object.
    This handles the conversion from storage format to OS format.
-
+    
    Args:
        path_str: Normalized path string with forward slashes
-
+        
    Returns:
        Path object appropriate for the OS
    """
@ -74,10 +75,10 @@ def denormalize_path(path_str: str) -> Path:
 def join_paths(*parts: Union[str, Path]) -> str:
    """
    Join path parts and return normalized result.
-
+    
    Args:
        *parts: Path parts to join
-
+        
    Returns:
        Normalized joined path
    """
@ -89,46 +90,46 @@ def join_paths(*parts: Union[str, Path]) -> str:
 def split_path(path: Union[str, Path]) -> List[str]:
    """
    Split a path into its components.
-
+    
    Args:
        path: Path to split
-
+        
    Returns:
        List of path components
    """
    path_obj = Path(path)
    parts = []
-
+    
    # Handle drive on Windows
    if path_obj.drive:
        parts.append(path_obj.drive)
-
+    
    # Add all other parts
    parts.extend(path_obj.parts[1:] if path_obj.drive else path_obj.parts)
-
+    
    return parts


 def ensure_forward_slashes(path_str: str) -> str:
    """
    Quick function to ensure a path string uses forward slashes.
-
+    
    Args:
        path_str: Path string
-
+        
    Returns:
        Path with forward slashes
    """
-    return path_str.replace("\\", "/")
+    return path_str.replace('\\', '/')


 def ensure_native_slashes(path_str: str) -> str:
    """
    Ensure a path uses the native separator for the OS.
-
+    
    Args:
        path_str: Path string
-
+        
    Returns:
        Path with native separators
    """
@ -136,8 +137,6 @@ def ensure_native_slashes(path_str: str) -> str:


 # Convenience functions for common operations
-
-
 def storage_path(path: Union[str, Path]) -> str:
    """Convert path to storage format (forward slashes)."""
    return normalize_path(path)
@ -150,4 +149,4 @@ def display_path(path: Union[str, Path]) -> str:

 def from_storage_path(path_str: str) -> Path:
    """Convert from storage format to Path object."""
-    return denormalize_path(path_str)
+    return denormalize_path(path_str)
--- a/mini_rag/performance.py
+++ b/mini_rag/performance.py
@ -3,87 +3,85 @@ Performance monitoring for RAG system.
 Track loading times, query times, and resource usage.
 """

-import logging
-import os
 import time
-from contextlib import contextmanager
-from typing import Any, Dict, Optional
-
 import psutil
+import os
+from contextlib import contextmanager
+from typing import Dict, Any, Optional
+import logging

 logger = logging.getLogger(__name__)


 class PerformanceMonitor:
    """Track performance metrics for RAG operations."""
-
+    
    def __init__(self):
        self.metrics = {}
        self.process = psutil.Process(os.getpid())
-
+        
    @contextmanager
    def measure(self, operation: str):
        """Context manager to measure operation time and memory."""
        # Get initial state
        start_time = time.time()
        start_memory = self.process.memory_info().rss / 1024 / 1024  # MB
-
+        
        try:
            yield self
        finally:
            # Calculate metrics
            end_time = time.time()
            end_memory = self.process.memory_info().rss / 1024 / 1024  # MB
-
+            
            duration = end_time - start_time
            memory_delta = end_memory - start_memory
-
+            
            # Store metrics
            self.metrics[operation] = {
-                "duration_seconds": duration,
-                "memory_delta_mb": memory_delta,
-                "final_memory_mb": end_memory,
+                'duration_seconds': duration,
+                'memory_delta_mb': memory_delta,
+                'final_memory_mb': end_memory,
            }
-
+            
            logger.info(
                f"[PERF] {operation}: {duration:.2f}s, "
                f"Memory: {end_memory:.1f}MB (+{memory_delta:+.1f}MB)"
            )
-
+    
    def get_summary(self) -> Dict[str, Any]:
        """Get performance summary."""
-        total_time = sum(m["duration_seconds"] for m in self.metrics.values())
-
+        total_time = sum(m['duration_seconds'] for m in self.metrics.values())
+        
        return {
-            "total_time_seconds": total_time,
-            "operations": self.metrics,
-            "current_memory_mb": self.process.memory_info().rss / 1024 / 1024,
+            'total_time_seconds': total_time,
+            'operations': self.metrics,
+            'current_memory_mb': self.process.memory_info().rss / 1024 / 1024,
        }
-
+    
    def print_summary(self):
        """Print a formatted summary."""
-        print("\n" + "=" * 50)
+        print("\n" + "="*50)
        print("PERFORMANCE SUMMARY")
-        print("=" * 50)
-
+        print("="*50)
+        
        for op, metrics in self.metrics.items():
            print(f"\n{op}:")
            print(f"  Time: {metrics['duration_seconds']:.2f}s")
            print(f"  Memory: +{metrics['memory_delta_mb']:+.1f}MB")
-
+        
        summary = self.get_summary()
        print(f"\nTotal Time: {summary['total_time_seconds']:.2f}s")
        print(f"Current Memory: {summary['current_memory_mb']:.1f}MB")
-        print("=" * 50)
+        print("="*50)


 # Global instance for easy access
 _monitor = None

-
 def get_monitor() -> PerformanceMonitor:
    """Get or create global monitor instance."""
    global _monitor
    if _monitor is None:
        _monitor = PerformanceMonitor()
-    return _monitor
+    return _monitor
--- a/mini_rag/query_expander.py
+++ b/mini_rag/query_expander.py
@ -7,7 +7,7 @@ Automatically expands search queries to find more relevant results.

 Example: "authentication" becomes "authentication login user verification credentials"

-## How It Helps
+## How It Helps  
 - 2-3x more relevant search results
 - Works with any content (code, docs, notes, etc.)
 - Completely transparent to users
@ -26,25 +26,22 @@ expanded = expander.expand_query("error handling")
 # Result: "error handling exception try catch fault tolerance"
 ```

-Perfect for beginners - enable in TUI for exploration,
+Perfect for beginners - enable in TUI for exploration, 
 disable in CLI for maximum speed.
 """

 import logging
 import re
 import threading
-from typing import Optional
-
+from typing import List, Optional
 import requests
-
 from .config import RAGConfig

 logger = logging.getLogger(__name__)

-
 class QueryExpander:
    """Expands search queries using LLM to improve search recall."""
-
+    
    def __init__(self, config: RAGConfig):
        self.config = config
        self.ollama_url = f"http://{config.llm.ollama_host}"
@ -52,37 +49,37 @@ class QueryExpander:
        self.max_terms = config.llm.max_expansion_terms
        self.enabled = config.search.expand_queries
        self._initialized = False
-
+        
        # Cache for expanded queries to avoid repeated API calls
        self._cache = {}
        self._cache_lock = threading.RLock()  # Thread-safe cache access
-
+    
    def _ensure_initialized(self):
        """Lazy initialization with LLM warmup."""
        if self._initialized:
            return
-
+            
        # Skip warmup - causes startup delays and unwanted model calls
        # Query expansion works fine on first use without warmup
-
+                
        self._initialized = True
-
+    
    def expand_query(self, query: str) -> str:
        """Expand a search query with related terms."""
        if not self.enabled or not query.strip():
            return query
-
+            
        self._ensure_initialized()
-
+            
        # Check cache first (thread-safe)
        with self._cache_lock:
            if query in self._cache:
                return self._cache[query]
-
+        
        # Don't expand very short queries or obvious keywords
        if len(query.split()) <= 1 or len(query) <= 3:
            return query
-
+            
        try:
            expanded = self._llm_expand_query(query)
            if expanded and expanded != query:
@ -94,23 +91,23 @@ class QueryExpander:
                        self._manage_cache_size()
                logger.info(f"Expanded query: '{query}' → '{expanded}'")
                return expanded
-
+            
        except Exception as e:
            logger.warning(f"Query expansion failed: {e}")
-
+        
        # Return original query if expansion fails
        return query
-
+    
    def _llm_expand_query(self, query: str) -> Optional[str]:
        """Use LLM to expand the query with related terms."""
-
+        
        # Use best available model
        model_to_use = self._select_expansion_model()
        if not model_to_use:
            return None
-
+        
        # Create expansion prompt
-        prompt = """You are a search query expert. Expand the following search query with {self.max_terms} additional related terms that would help find relevant content.
+        prompt = f"""You are a search query expert. Expand the following search query with {self.max_terms} additional related terms that would help find relevant content.

 Original query: "{query}"

@ -137,99 +134,95 @@ Expanded query:"""
                "options": {
                    "temperature": 0.1,  # Very low temperature for consistent expansions
                    "top_p": 0.8,
-                    "max_tokens": 100,  # Keep it short
-                },
+                    "max_tokens": 100    # Keep it short
+                }
            }
-
+            
            response = requests.post(
                f"{self.ollama_url}/api/generate",
                json=payload,
-                timeout=10,  # Quick timeout for low latency
+                timeout=10  # Quick timeout for low latency
            )
-
+            
            if response.status_code == 200:
-                result = response.json().get("response", "").strip()
-
+                result = response.json().get('response', '').strip()
+                
                # Clean up the response - extract just the expanded query
                expanded = self._clean_expansion(result, query)
                return expanded
-
+                
        except Exception as e:
            logger.warning(f"LLM expansion failed: {e}")
            return None
-
+    
    def _select_expansion_model(self) -> Optional[str]:
        """Select the best available model for query expansion."""
-
+        
        if self.model != "auto":
            return self.model
-
+        
        try:
            # Get available models
            response = requests.get(f"{self.ollama_url}/api/tags", timeout=5)
            if response.status_code == 200:
                data = response.json()
-                available = [model["name"] for model in data.get("models", [])]
-
+                available = [model['name'] for model in data.get('models', [])]
+                
                # Use same model rankings as main synthesizer for consistency
                expansion_preferences = [
-                    "qwen3:1.7b",
-                    "qwen3:0.6b",
-                    "qwen3:4b",
-                    "qwen2.5:3b",
-                    "qwen2.5:1.5b",
-                    "qwen2.5-coder:1.5b",
+                    "qwen3:1.7b", "qwen3:0.6b", "qwen3:4b", "qwen2.5:3b", 
+                    "qwen2.5:1.5b", "qwen2.5-coder:1.5b"
                ]
-
+                
                for preferred in expansion_preferences:
                    for available_model in available:
                        if preferred in available_model:
                            logger.debug(f"Using {available_model} for query expansion")
                            return available_model
-
+                
                # Fallback to first available model
                if available:
                    return available[0]
-
+                    
        except Exception as e:
            logger.warning(f"Could not select expansion model: {e}")
-
+        
        return None
-
+    
    def _clean_expansion(self, raw_response: str, original_query: str) -> str:
        """Clean the LLM response to extract just the expanded query."""
-
+        
        # Remove common response artifacts
        clean_response = raw_response.strip()
-
+        
        # Remove quotes if the entire response is quoted
        if clean_response.startswith('"') and clean_response.endswith('"'):
            clean_response = clean_response[1:-1]
-
+        
        # Take only the first line if multiline
-        clean_response = clean_response.split("\n")[0].strip()
-
+        clean_response = clean_response.split('\n')[0].strip()
+        
        # Remove excessive punctuation and normalize spaces
-        clean_response = re.sub(r"[^\w\s-]", " ", clean_response)
-        clean_response = re.sub(r"\s+", " ", clean_response).strip()
-
+        clean_response = re.sub(r'[^\w\s-]', ' ', clean_response)
+        clean_response = re.sub(r'\s+', ' ', clean_response).strip()
+        
        # Ensure it starts with the original query
        if not clean_response.lower().startswith(original_query.lower()):
            clean_response = f"{original_query} {clean_response}"
-
+        
        # Limit the total length to avoid very long queries
        words = clean_response.split()
        if len(words) > len(original_query.split()) + self.max_terms:
-            words = words[: len(original_query.split()) + self.max_terms]
-            clean_response = " ".join(words)
-
+            words = words[:len(original_query.split()) + self.max_terms]
+            clean_response = ' '.join(words)
+        
        return clean_response
-
+    
    def clear_cache(self):
        """Clear the expansion cache (thread-safe)."""
        with self._cache_lock:
            self._cache.clear()
-
+    
    def _manage_cache_size(self, max_size: int = 1000):
        """Keep cache from growing too large (prevents memory leaks)."""
        with self._cache_lock:
@ -239,49 +232,45 @@ Expanded query:"""
                keep_count = max_size // 2
                self._cache = dict(items[-keep_count:])
                logger.debug(f"Cache trimmed from {len(items)} to {len(self._cache)} entries")
-
+    
    def is_available(self) -> bool:
        """Check if query expansion is available."""
        if not self.enabled:
            return False
-
+            
        self._ensure_initialized()
        try:
            response = requests.get(f"{self.ollama_url}/api/tags", timeout=5)
            return response.status_code == 200
-        except (ConnectionError, TimeoutError, requests.RequestException):
+        except:
            return False

-
 # Quick test function
-
-
 def test_expansion():
    """Test the query expander."""
    from .config import RAGConfig
-
+    
    config = RAGConfig()
    config.search.expand_queries = True
    config.llm.max_expansion_terms = 6
-
+    
    expander = QueryExpander(config)
-
+    
    if not expander.is_available():
        print("❌ Ollama not available for testing")
        return
-
+    
    test_queries = [
        "authentication",
-        "error handling",
+        "error handling", 
        "database query",
-        "user interface",
+        "user interface"
    ]
-
+    
    print("🔍 Testing Query Expansion:")
    for query in test_queries:
        expanded = expander.expand_query(query)
        print(f"  '{query}' → '{expanded}'")

-
 if __name__ == "__main__":
-    test_expansion()
+    test_expansion()
--- a/mini_rag/search.py
+++ b/mini_rag/search.py
--- a/mini_rag/server.py
+++ b/mini_rag/server.py
@ -4,30 +4,30 @@ No more loading/unloading madness!
 """

 import json
-import logging
-import os
 import socket
-import subprocess
-import sys
 import threading
 import time
+import subprocess
 from pathlib import Path
-from typing import Any, Dict, Optional
+from typing import Dict, Any, Optional
+import logging
+import sys
+import os

 # Fix Windows console
-if sys.platform == "win32":
-    os.environ["PYTHONUTF8"] = "1"
+if sys.platform == 'win32':
+    os.environ['PYTHONUTF8'] = '1'

+from .search import CodeSearcher
 from .ollama_embeddings import OllamaEmbedder as CodeEmbedder
 from .performance import PerformanceMonitor
-from .search import CodeSearcher

 logger = logging.getLogger(__name__)


 class RAGServer:
    """Persistent server that keeps embeddings and DB loaded."""
-
+    
    def __init__(self, project_path: Path, port: int = 7777):
        self.project_path = project_path
        self.port = port
@ -37,36 +37,37 @@ class RAGServer:
        self.socket = None
        self.start_time = None
        self.query_count = 0
-
+        
    def _kill_existing_server(self):
        """Kill any existing process using our port."""
        try:
            # Check if port is in use
            test_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-            result = test_sock.connect_ex(("localhost", self.port))
+            result = test_sock.connect_ex(('localhost', self.port))
            test_sock.close()
-
+            
            if result == 0:  # Port is in use
                print(f"️  Port {self.port} is already in use, attempting to free it...")
-
-                if sys.platform == "win32":
+                
+                if sys.platform == 'win32':
                    # Windows: Find and kill process using netstat
                    import subprocess
-
                    try:
                        # Get process ID using the port
                        result = subprocess.run(
-                            ["netstat", "-ano"], capture_output=True, text=True
+                            ['netstat', '-ano'], 
+                            capture_output=True, 
+                            text=True
                        )
-
-                        for line in result.stdout.split("\n"):
-                            if f":{self.port}" in line and "LISTENING" in line:
+                        
+                        for line in result.stdout.split('\n'):
+                            if f':{self.port}' in line and 'LISTENING' in line:
                                parts = line.split()
                                pid = parts[-1]
                                print(f"   Found process {pid} using port {self.port}")
-
+                                
                                # Kill the process
-                                subprocess.run(["taskkill", "//PID", pid, "//F"], check=False)
+                                subprocess.run(['taskkill', '//PID', pid, '//F'], check=False)
                                print(f"    Killed process {pid}")
                                time.sleep(1)  # Give it a moment to release the port
                                break
@ -75,16 +76,15 @@ class RAGServer:
                else:
                    # Unix/Linux: Use lsof and kill
                    import subprocess
-
                    try:
                        result = subprocess.run(
-                            ["lso", "-ti", f":{self.port}"],
-                            capture_output=True,
-                            text=True,
+                            ['lsof', '-ti', f':{self.port}'], 
+                            capture_output=True, 
+                            text=True
                        )
                        if result.stdout.strip():
                            pid = result.stdout.strip()
-                            subprocess.run(["kill", "-9", pid], check=False)
+                            subprocess.run(['kill', '-9', pid], check=False)
                            print(f"    Killed process {pid}")
                            time.sleep(1)
                    except Exception as e:
@ -92,38 +92,38 @@ class RAGServer:
        except Exception as e:
            # Non-critical error, just log it
            logger.debug(f"Error checking port: {e}")
-
+        
    def start(self):
        """Start the RAG server."""
        # Kill any existing process on our port first
        self._kill_existing_server()
-
+        
        print(f" Starting RAG server on port {self.port}...")
-
+        
        # Load everything once
        perf = PerformanceMonitor()
-
+        
        with perf.measure("Load Embedder"):
            self.embedder = CodeEmbedder()
-
+            
        with perf.measure("Connect Database"):
            self.searcher = CodeSearcher(self.project_path, embedder=self.embedder)
-
+        
        perf.print_summary()
-
+        
        # Start server
        self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-        self.socket.bind(("localhost", self.port))
+        self.socket.bind(('localhost', self.port))
        self.socket.listen(5)
-
+        
        self.running = True
        self.start_time = time.time()
-
+        
        print(f"\n RAG server ready on localhost:{self.port}")
        print("   Model loaded, database connected")
        print("   Waiting for queries...\n")
-
+        
        # Handle connections
        while self.running:
            try:
@ -136,50 +136,50 @@ class RAGServer:
            except Exception as e:
                if self.running:
                    logger.error(f"Server error: {e}")
-
+    
    def _handle_client(self, client: socket.socket):
        """Handle a client connection."""
        try:
            # Receive query with proper message framing
            data = self._receive_json(client)
            request = json.loads(data)
-
+            
            # Check for shutdown command
-            if request.get("command") == "shutdown":
+            if request.get('command') == 'shutdown':
                print("\n Shutdown requested")
-                response = {"success": True, "message": "Server shutting down"}
+                response = {'success': True, 'message': 'Server shutting down'}
                self._send_json(client, response)
                self.stop()
                return
-
-            query = request.get("query", "")
-            top_k = request.get("top_k", 10)
-
+            
+            query = request.get('query', '')
+            top_k = request.get('top_k', 10)
+            
            self.query_count += 1
            print(f"[Query #{self.query_count}] {query}")
-
+            
            # Perform search
            start = time.time()
            results = self.searcher.search(query, top_k=top_k)
            search_time = time.time() - start
-
+            
            # Prepare response
            response = {
-                "success": True,
-                "query": query,
-                "count": len(results),
-                "search_time_ms": int(search_time * 1000),
-                "results": [r.to_dict() for r in results],
-                "server_uptime": int(time.time() - self.start_time),
-                "total_queries": self.query_count,
+                'success': True,
+                'query': query,
+                'count': len(results),
+                'search_time_ms': int(search_time * 1000),
+                'results': [r.to_dict() for r in results],
+                'server_uptime': int(time.time() - self.start_time),
+                'total_queries': self.query_count,
            }
-
+            
            # Send response with proper framing
            self._send_json(client, response)
-
+            
            print(f"    Found {len(results)} results in {search_time*1000:.0f}ms")
-
-        except ConnectionError:
+            
+        except ConnectionError as e:
            # Normal disconnection - client closed connection
            # This is expected behavior, don't log as error
            pass
@ -187,49 +187,52 @@ class RAGServer:
            # Only log actual errors, not normal disconnections
            if "Connection closed" not in str(e):
                logger.error(f"Client handler error: {e}")
-            error_response = {"success": False, "error": str(e)}
+            error_response = {
+                'success': False,
+                'error': str(e)
+            }
            try:
                self._send_json(client, error_response)
-            except (ConnectionError, OSError, TypeError, ValueError, socket.error):
+            except:
                pass
        finally:
            client.close()
-
+    
    def _receive_json(self, sock: socket.socket) -> str:
        """Receive a complete JSON message with length prefix."""
        # First receive the length (4 bytes)
-        length_data = b""
+        length_data = b''
        while len(length_data) < 4:
            chunk = sock.recv(4 - len(length_data))
            if not chunk:
                raise ConnectionError("Connection closed while receiving length")
            length_data += chunk
-
-        length = int.from_bytes(length_data, "big")
-
+        
+        length = int.from_bytes(length_data, 'big')
+        
        # Now receive the actual data
-        data = b""
+        data = b''
        while len(data) < length:
            chunk = sock.recv(min(65536, length - len(data)))
            if not chunk:
                raise ConnectionError("Connection closed while receiving data")
            data += chunk
-
-        return data.decode("utf-8")
-
+        
+        return data.decode('utf-8')
+    
    def _send_json(self, sock: socket.socket, data: dict):
        """Send a JSON message with length prefix."""
        # Sanitize the data to ensure JSON compatibility
-        json_str = json.dumps(data, ensure_ascii=False, separators=(",", ":"))
-        json_bytes = json_str.encode("utf-8")
-
+        json_str = json.dumps(data, ensure_ascii=False, separators=(',', ':'))
+        json_bytes = json_str.encode('utf-8')
+        
        # Send length prefix (4 bytes)
        length = len(json_bytes)
-        sock.send(length.to_bytes(4, "big"))
-
+        sock.send(length.to_bytes(4, 'big'))
+        
        # Send the data
        sock.sendall(json_bytes)
-
+    
    def stop(self):
        """Stop the server."""
        self.running = False
@ -240,89 +243,101 @@ class RAGServer:

 class RAGClient:
    """Client to communicate with RAG server."""
-
+    
    def __init__(self, port: int = 7777):
        self.port = port
        self.use_legacy = False
-
+        
    def search(self, query: str, top_k: int = 10) -> Dict[str, Any]:
        """Send search query to server."""
        try:
            # Connect to server
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-            sock.connect(("localhost", self.port))
-
+            sock.connect(('localhost', self.port))
+            
            # Send request with proper framing
-            request = {"query": query, "top_k": top_k}
+            request = {
+                'query': query,
+                'top_k': top_k
+            }
            self._send_json(sock, request)
-
+            
            # Receive response with proper framing
            data = self._receive_json(sock)
            response = json.loads(data)
-
+            
            sock.close()
            return response
-
+            
        except ConnectionRefusedError:
            return {
-                "success": False,
-                "error": "RAG server not running. Start with: rag-mini server",
+                'success': False,
+                'error': 'RAG server not running. Start with: rag-mini server'
            }
        except ConnectionError as e:
            # Try legacy mode without message framing
            if not self.use_legacy and "receiving length" in str(e):
                self.use_legacy = True
                return self._search_legacy(query, top_k)
-            return {"success": False, "error": str(e)}
+            return {
+                'success': False,
+                'error': str(e)
+            }
        except Exception as e:
-            return {"success": False, "error": str(e)}
-
+            return {
+                'success': False,
+                'error': str(e)
+            }
+    
    def _receive_json(self, sock: socket.socket) -> str:
        """Receive a complete JSON message with length prefix."""
        # First receive the length (4 bytes)
-        length_data = b""
+        length_data = b''
        while len(length_data) < 4:
            chunk = sock.recv(4 - len(length_data))
            if not chunk:
                raise ConnectionError("Connection closed while receiving length")
            length_data += chunk
-
-        length = int.from_bytes(length_data, "big")
-
+        
+        length = int.from_bytes(length_data, 'big')
+        
        # Now receive the actual data
-        data = b""
+        data = b''
        while len(data) < length:
            chunk = sock.recv(min(65536, length - len(data)))
            if not chunk:
                raise ConnectionError("Connection closed while receiving data")
            data += chunk
-
-        return data.decode("utf-8")
-
+        
+        return data.decode('utf-8')
+    
    def _send_json(self, sock: socket.socket, data: dict):
        """Send a JSON message with length prefix."""
-        json_str = json.dumps(data, ensure_ascii=False, separators=(",", ":"))
-        json_bytes = json_str.encode("utf-8")
-
+        json_str = json.dumps(data, ensure_ascii=False, separators=(',', ':'))
+        json_bytes = json_str.encode('utf-8')
+        
        # Send length prefix (4 bytes)
        length = len(json_bytes)
-        sock.send(length.to_bytes(4, "big"))
-
+        sock.send(length.to_bytes(4, 'big'))
+        
        # Send the data
        sock.sendall(json_bytes)
-
+    
    def _search_legacy(self, query: str, top_k: int = 10) -> Dict[str, Any]:
        """Legacy search without message framing for old servers."""
        try:
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-            sock.connect(("localhost", self.port))
-
+            sock.connect(('localhost', self.port))
+            
            # Send request (old way)
-            request = {"query": query, "top_k": top_k}
-            sock.send(json.dumps(request).encode("utf-8"))
-
+            request = {
+                'query': query,
+                'top_k': top_k
+            }
+            sock.send(json.dumps(request).encode('utf-8'))
+            
            # Receive response (accumulate until we get valid JSON)
-            data = b""
+            data = b''
            while True:
                chunk = sock.recv(65536)
                if not chunk:
@ -330,26 +345,32 @@ class RAGClient:
                data += chunk
                try:
                    # Try to decode as JSON
-                    response = json.loads(data.decode("utf-8"))
+                    response = json.loads(data.decode('utf-8'))
                    sock.close()
                    return response
                except json.JSONDecodeError:
                    # Keep receiving
                    continue
-
+            
            sock.close()
-            return {"success": False, "error": "Incomplete response from server"}
+            return {
+                'success': False,
+                'error': 'Incomplete response from server'
+            }
        except Exception as e:
-            return {"success": False, "error": str(e)}
-
+            return {
+                'success': False,
+                'error': str(e)
+            }
+    
    def is_running(self) -> bool:
        """Check if server is running."""
        try:
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-            result = sock.connect_ex(("localhost", self.port))
+            result = sock.connect_ex(('localhost', self.port))
            sock.close()
            return result == 0
-        except (ConnectionError, OSError, TypeError, ValueError, socket.error):
+        except:
            return False


@ -368,31 +389,23 @@ def auto_start_if_needed(project_path: Path) -> Optional[subprocess.Popen]:
    if not client.is_running():
        # Start server in background
        import subprocess
-
-        cmd = [
-            sys.executable,
-            "-m",
-            "mini_rag.cli",
-            "server",
-            "--path",
-            str(project_path),
-        ]
+        cmd = [sys.executable, "-m", "mini_rag.cli", "server", "--path", str(project_path)]
        process = subprocess.Popen(
            cmd,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
-            creationflags=(subprocess.CREATE_NEW_CONSOLE if sys.platform == "win32" else 0),
+            creationflags=subprocess.CREATE_NEW_CONSOLE if sys.platform == 'win32' else 0
        )
-
+        
        # Wait for server to start
        for _ in range(30):  # 30 second timeout
            time.sleep(1)
            if client.is_running():
                print(" RAG server started automatically")
                return process
-
+        
        # Failed to start
        process.terminate()
        raise RuntimeError("Failed to start RAG server")
-
-    return None
+    
+    return None
--- a/mini_rag/smart_chunking.py
+++ b/mini_rag/smart_chunking.py
@ -3,140 +3,148 @@ Smart language-aware chunking strategies for FSS-Mini-RAG.
 Automatically adapts chunking based on file type and content patterns.
 """

+from typing import Dict, Any, List
 from pathlib import Path
-from typing import Any, Dict, List
-
+import json

 class SmartChunkingStrategy:
    """Intelligent chunking that adapts to file types and content."""
-
+    
    def __init__(self):
        self.language_configs = {
-            "python": {
-                "max_size": 3000,  # Larger for better function context
-                "min_size": 200,
-                "strategy": "function",
-                "prefer_semantic": True,
+            'python': {
+                'max_size': 3000,  # Larger for better function context
+                'min_size': 200,
+                'strategy': 'function',
+                'prefer_semantic': True
            },
-            "javascript": {
-                "max_size": 2500,
-                "min_size": 150,
-                "strategy": "function",
-                "prefer_semantic": True,
+            'javascript': {
+                'max_size': 2500,
+                'min_size': 150,
+                'strategy': 'function',
+                'prefer_semantic': True
            },
-            "markdown": {
-                "max_size": 2500,
-                "min_size": 300,  # Larger minimum for complete thoughts
-                "strategy": "header",
-                "preserve_structure": True,
+            'markdown': {
+                'max_size': 2500,
+                'min_size': 300,  # Larger minimum for complete thoughts
+                'strategy': 'header',
+                'preserve_structure': True
            },
-            "json": {
-                "max_size": 1000,  # Smaller for config files
-                "min_size": 50,
-                "skip_if_large": True,  # Skip huge config JSONs
-                "max_file_size": 50000,  # 50KB limit
+            'json': {
+                'max_size': 1000,  # Smaller for config files
+                'min_size': 50,
+                'skip_if_large': True,  # Skip huge config JSONs
+                'max_file_size': 50000  # 50KB limit
            },
-            "yaml": {"max_size": 1500, "min_size": 100, "strategy": "key_block"},
-            "text": {"max_size": 2000, "min_size": 200, "strategy": "paragraph"},
-            "bash": {"max_size": 1500, "min_size": 100, "strategy": "function"},
+            'yaml': {
+                'max_size': 1500,
+                'min_size': 100,
+                'strategy': 'key_block'
+            },
+            'text': {
+                'max_size': 2000,
+                'min_size': 200,
+                'strategy': 'paragraph'
+            },
+            'bash': {
+                'max_size': 1500,
+                'min_size': 100,
+                'strategy': 'function'
+            }
        }
-
+        
        # Smart defaults for unknown languages
        self.default_config = {
-            "max_size": 2000,
-            "min_size": 150,
-            "strategy": "semantic",
+            'max_size': 2000,
+            'min_size': 150,
+            'strategy': 'semantic'
        }
-
+    
    def get_config_for_language(self, language: str, file_size: int = 0) -> Dict[str, Any]:
        """Get optimal chunking config for a specific language."""
        config = self.language_configs.get(language, self.default_config).copy()
-
+        
        # Smart adjustments based on file size
        if file_size > 0:
            if file_size < 500:  # Very small files
-                config["max_size"] = max(config["max_size"] // 2, 200)
-                config["min_size"] = 50
-            elif file_size > 20000:  # Large files
-                config["max_size"] = min(config["max_size"] + 1000, 4000)
-
+                config['max_size'] = max(config['max_size'] // 2, 200)
+                config['min_size'] = 50
+            elif file_size > 20000:  # Large files  
+                config['max_size'] = min(config['max_size'] + 1000, 4000)
+        
        return config
-
+    
    def should_skip_file(self, language: str, file_size: int) -> bool:
        """Determine if a file should be skipped entirely."""
        lang_config = self.language_configs.get(language, {})
-
+        
        # Skip huge JSON config files
-        if language == "json" and lang_config.get("skip_if_large"):
-            max_size = lang_config.get("max_file_size", 50000)
+        if language == 'json' and lang_config.get('skip_if_large'):
+            max_size = lang_config.get('max_file_size', 50000)
            if file_size > max_size:
                return True
-
+        
        # Skip tiny files that won't provide good context
        if file_size < 30:
            return True
-
+            
        return False
-
+    
    def get_smart_defaults(self, project_stats: Dict[str, Any]) -> Dict[str, Any]:
        """Generate smart defaults based on project language distribution."""
-        languages = project_stats.get("languages", {})
-        # sum(languages.values())  # Unused variable removed
-
+        languages = project_stats.get('languages', {})
+        total_files = sum(languages.values())
+        
        # Determine primary language
-        primary_lang = max(languages.items(), key=lambda x: x[1])[0] if languages else "python"
+        primary_lang = max(languages.items(), key=lambda x: x[1])[0] if languages else 'python'
        primary_config = self.language_configs.get(primary_lang, self.default_config)
-
+        
        # Smart streaming threshold based on large files
-        large_files = project_stats.get("large_files", 0)
+        large_files = project_stats.get('large_files', 0)
        streaming_threshold = 5120 if large_files > 5 else 1048576  # 5KB vs 1MB
-
+        
        return {
            "chunking": {
-                "max_size": primary_config["max_size"],
-                "min_size": primary_config["min_size"],
-                "strategy": primary_config.get("strategy", "semantic"),
+                "max_size": primary_config['max_size'],
+                "min_size": primary_config['min_size'], 
+                "strategy": primary_config.get('strategy', 'semantic'),
                "language_specific": {
-                    lang: config
-                    for lang, config in self.language_configs.items()
+                    lang: config for lang, config in self.language_configs.items()
                    if languages.get(lang, 0) > 0
-                },
+                }
            },
            "streaming": {
                "enabled": True,
                "threshold_bytes": streaming_threshold,
-                "chunk_size_kb": 64,
+                "chunk_size_kb": 64
            },
            "files": {
                "skip_tiny_files": True,
                "tiny_threshold": 30,
-                "smart_json_filtering": True,
-            },
+                "smart_json_filtering": True
+            }
        }

-
 # Example usage
-
-
 def analyze_and_suggest(manifest_data: Dict[str, Any]) -> Dict[str, Any]:
    """Analyze project and suggest optimal configuration."""
    from collections import Counter
-
-    files = manifest_data.get("files", {})
+    
+    files = manifest_data.get('files', {})
    languages = Counter()
    large_files = 0
-
+    
    for info in files.values():
-        lang = info.get("language", "unknown")
+        lang = info.get('language', 'unknown')
        languages[lang] += 1
-        if info.get("size", 0) > 10000:
+        if info.get('size', 0) > 10000:
            large_files += 1
-
+    
    stats = {
-        "languages": dict(languages),
-        "large_files": large_files,
-        "total_files": len(files),
+        'languages': dict(languages),
+        'large_files': large_files,
+        'total_files': len(files)
    }
-
+    
    strategy = SmartChunkingStrategy()
-    return strategy.get_smart_defaults(stats)
+    return strategy.get_smart_defaults(stats)
--- a/mini_rag/system_context.py
+++ b/mini_rag/system_context.py
@ -1,121 +0,0 @@
-"""
-System Context Collection for Enhanced RAG Grounding
-
-Collects minimal system information to help the LLM provide better,
-context-aware assistance without compromising privacy.
-"""
-
-import platform
-import sys
-from pathlib import Path
-from typing import Dict, Optional
-
-
-class SystemContextCollector:
-    """Collects system context information for enhanced LLM grounding."""
-
-    @staticmethod
-    def get_system_context(project_path: Optional[Path] = None) -> str:
-        """
-        Get concise system context for LLM grounding.
-
-        Args:
-            project_path: Current project directory
-
-        Returns:
-            Formatted system context string (max 200 chars for privacy)
-        """
-        try:
-            # Basic system info
-            os_name = platform.system()
-            python_ver = f"{sys.version_info.major}.{sys.version_info.minor}"
-
-            # Simplified OS names
-            os_short = {"Windows": "Win", "Linux": "Linux", "Darwin": "macOS"}.get(
-                os_name, os_name
-            )
-
-            # Working directory info
-            if project_path:
-                # Use relative or shortened path for privacy
-                try:
-                    rel_path = project_path.relative_to(Path.home())
-                    path_info = f"~/{rel_path}"
-                except ValueError:
-                    # If not relative to home, just use folder name
-                    path_info = project_path.name
-            else:
-                path_info = Path.cwd().name
-
-            # Trim path if too long for our 200-char limit
-            if len(path_info) > 50:
-                path_info = f".../{path_info[-45:]}"
-
-            # Command style hints
-            cmd_style = "rag.bat" if os_name == "Windows" else "./rag-mini"
-
-            # Format concise context
-            context = f"[{os_short} {python_ver}, {path_info}, use {cmd_style}]"
-
-            # Ensure we stay under 200 chars
-            if len(context) > 200:
-                context = context[:197] + "...]"
-
-            return context
-
-        except Exception:
-            # Fallback to minimal info if anything fails
-            return f"[{platform.system()}, Python {sys.version_info.major}.{sys.version_info.minor}]"
-
-    @staticmethod
-    def get_command_context(os_name: Optional[str] = None) -> Dict[str, str]:
-        """
-        Get OS-appropriate command examples.
-
-        Returns:
-            Dictionary with command patterns for the current OS
-        """
-        if os_name is None:
-            os_name = platform.system()
-
-        if os_name == "Windows":
-            return {
-                "launcher": "rag.bat",
-                "index": "rag.bat index C:\\path\\to\\project",
-                "search": 'rag.bat search C:\\path\\to\\project "query"',
-                "explore": "rag.bat explore C:\\path\\to\\project",
-                "path_sep": "\\",
-                "example_path": "C:\\Users\\username\\Documents\\myproject",
-            }
-        else:
-            return {
-                "launcher": "./rag-mini",
-                "index": "./rag-mini index /path/to/project",
-                "search": './rag-mini search /path/to/project "query"',
-                "explore": "./rag-mini explore /path/to/project",
-                "path_sep": "/",
-                "example_path": "~/Documents/myproject",
-            }
-
-
-def get_system_context(project_path: Optional[Path] = None) -> str:
-    """Convenience function to get system context."""
-    return SystemContextCollector.get_system_context(project_path)
-
-
-def get_command_context() -> Dict[str, str]:
-    """Convenience function to get command context."""
-    return SystemContextCollector.get_command_context()
-
-
-# Test function
-
-if __name__ == "__main__":
-    print("System Context Test:")
-    print(f"Context: {get_system_context()}")
-    print(f"Context with path: {get_system_context(Path('/tmp/test'))}")
-    print()
-    print("Command Context:")
-    cmds = get_command_context()
-    for key, value in cmds.items():
-        print(f"  {key}: {value}")
--- a/mini_rag/updater.py
+++ b/mini_rag/updater.py
@ -6,32 +6,30 @@ Provides seamless GitHub-based updates with user-friendly interface.
 Checks for new releases, downloads updates, and handles installation safely.
 """

-import json
 import os
-import shutil
-import subprocess
 import sys
-import tempfile
+import json
+import time
+import shutil
 import zipfile
-from dataclasses import dataclass
-from datetime import datetime, timedelta
+import tempfile
+import subprocess
 from pathlib import Path
-from typing import Optional, Tuple
+from typing import Optional, Dict, Any, Tuple
+from datetime import datetime, timedelta
+from dataclasses import dataclass

 try:
    import requests
-
    REQUESTS_AVAILABLE = True
 except ImportError:
    REQUESTS_AVAILABLE = False

 from .config import ConfigManager

-
@dataclass
 class UpdateInfo:
    """Information about an available update."""
-
    version: str
    release_url: str
    download_url: str
@ -39,45 +37,42 @@ class UpdateInfo:
    published_at: str
    is_newer: bool

-
 class UpdateChecker:
    """
    Handles checking for and applying updates from GitHub releases.
-
+    
    Features:
    - Checks GitHub API for latest releases
    - Downloads and applies updates safely with backup
    - Respects user preferences and rate limiting
    - Provides graceful fallbacks if network unavailable
    """
-
-    def __init__(
-        self,
-        repo_owner: str = "FSSCoding",
-        repo_name: str = "Fss-Mini-Rag",
-        current_version: str = "2.1.0",
-    ):
+    
+    def __init__(self, 
+                 repo_owner: str = "FSSCoding",
+                 repo_name: str = "Fss-Mini-Rag",
+                 current_version: str = "2.1.0"):
        self.repo_owner = repo_owner
        self.repo_name = repo_name
        self.current_version = current_version
        self.github_api_url = f"https://api.github.com/repos/{repo_owner}/{repo_name}"
        self.check_frequency_hours = 24  # Check once per day
-
+        
        # Paths
        self.app_root = Path(__file__).parent.parent
        self.cache_file = self.app_root / ".update_cache.json"
        self.backup_dir = self.app_root / ".backup"
-
+        
        # User preferences (graceful fallback if config unavailable)
        try:
            self.config = ConfigManager(self.app_root)
        except Exception:
            self.config = None
-
+        
    def should_check_for_updates(self) -> bool:
        """
        Determine if we should check for updates now.
-
+        
        Respects:
        - User preference to disable updates
        - Rate limiting (once per day by default)
@ -85,74 +80,70 @@ class UpdateChecker:
        """
        if not REQUESTS_AVAILABLE:
            return False
-
+            
        # Check user preference
-        if hasattr(self.config, "updates") and not getattr(
-            self.config.updates, "auto_check", True
-        ):
+        if hasattr(self.config, 'updates') and not getattr(self.config.updates, 'auto_check', True):
            return False
-
+            
        # Check if we've checked recently
        if self.cache_file.exists():
            try:
-                with open(self.cache_file, "r") as f:
+                with open(self.cache_file, 'r') as f:
                    cache = json.load(f)
-                    last_check = datetime.fromisoformat(cache.get("last_check", "2020-01-01"))
-                    if datetime.now() - last_check < timedelta(
-                        hours=self.check_frequency_hours
-                    ):
+                    last_check = datetime.fromisoformat(cache.get('last_check', '2020-01-01'))
+                    if datetime.now() - last_check < timedelta(hours=self.check_frequency_hours):
                        return False
            except (json.JSONDecodeError, ValueError, KeyError):
                pass  # Ignore cache errors, will check anyway
-
+                
        return True
-
+    
    def check_for_updates(self) -> Optional[UpdateInfo]:
        """
        Check GitHub API for the latest release.
-
+        
        Returns:
            UpdateInfo if an update is available, None otherwise
        """
        if not REQUESTS_AVAILABLE:
            return None
-
+            
        try:
            # Get latest release from GitHub API
            response = requests.get(
                f"{self.github_api_url}/releases/latest",
                timeout=10,
-                headers={"Accept": "application/vnd.github.v3+json"},
+                headers={"Accept": "application/vnd.github.v3+json"}
            )
-
+            
            if response.status_code != 200:
                return None
-
+                
            release_data = response.json()
-
+            
            # Extract version info
-            latest_version = release_data.get("tag_name", "").lstrip("v")
-            release_notes = release_data.get("body", "No release notes available.")
-            published_at = release_data.get("published_at", "")
-            release_url = release_data.get("html_url", "")
-
+            latest_version = release_data.get('tag_name', '').lstrip('v')
+            release_notes = release_data.get('body', 'No release notes available.')
+            published_at = release_data.get('published_at', '')
+            release_url = release_data.get('html_url', '')
+            
            # Find download URL for source code
            download_url = None
-            for asset in release_data.get("assets", []):
-                if asset.get("name", "").endswith(".zip"):
-                    download_url = asset.get("browser_download_url")
+            for asset in release_data.get('assets', []):
+                if asset.get('name', '').endswith('.zip'):
+                    download_url = asset.get('browser_download_url')
                    break
-
+            
            # Fallback to source code zip
            if not download_url:
                download_url = f"https://github.com/{self.repo_owner}/{self.repo_name}/archive/refs/tags/v{latest_version}.zip"
-
+            
            # Check if this is a newer version
            is_newer = self._is_version_newer(latest_version, self.current_version)
-
+            
            # Update cache
            self._update_cache(latest_version, is_newer)
-
+            
            if is_newer:
                return UpdateInfo(
                    version=latest_version,
@ -160,95 +151,92 @@ class UpdateChecker:
                    download_url=download_url,
                    release_notes=release_notes,
                    published_at=published_at,
-                    is_newer=True,
+                    is_newer=True
                )
-
-        except Exception:
+                
+        except Exception as e:
            # Silently fail for network issues - don't interrupt user experience
            pass
-
+            
        return None
-
+    
    def _is_version_newer(self, latest: str, current: str) -> bool:
        """
        Compare version strings to determine if latest is newer.
-
+        
        Simple semantic version comparison supporting:
        - Major.Minor.Patch (e.g., 2.1.0)
        - Major.Minor (e.g., 2.1)
        """
-
        def version_tuple(v):
            return tuple(map(int, (v.split("."))))
-
+        
        try:
            return version_tuple(latest) > version_tuple(current)
        except (ValueError, AttributeError):
            # If version parsing fails, assume it's newer to be safe
            return latest != current
-
+    
    def _update_cache(self, latest_version: str, is_newer: bool):
        """Update the cache file with check results."""
        cache_data = {
-            "last_check": datetime.now().isoformat(),
-            "latest_version": latest_version,
-            "is_newer": is_newer,
+            'last_check': datetime.now().isoformat(),
+            'latest_version': latest_version,
+            'is_newer': is_newer
        }
-
+        
        try:
-            with open(self.cache_file, "w") as f:
+            with open(self.cache_file, 'w') as f:
                json.dump(cache_data, f, indent=2)
        except Exception:
            pass  # Ignore cache write errors
-
-    def download_update(
-        self, update_info: UpdateInfo, progress_callback=None
-    ) -> Optional[Path]:
+    
+    def download_update(self, update_info: UpdateInfo, progress_callback=None) -> Optional[Path]:
        """
        Download the update package to a temporary location.
-
+        
        Args:
            update_info: Information about the update to download
            progress_callback: Optional callback for progress updates
-
+            
        Returns:
            Path to downloaded file, or None if download failed
        """
        if not REQUESTS_AVAILABLE:
            return None
-
+            
        try:
            # Create temporary file for download
-            with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmp_file:
+            with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as tmp_file:
                tmp_path = Path(tmp_file.name)
-
+                
            # Download with progress tracking
            response = requests.get(update_info.download_url, stream=True, timeout=30)
            response.raise_for_status()
-
-            total_size = int(response.headers.get("content-length", 0))
+            
+            total_size = int(response.headers.get('content-length', 0))
            downloaded = 0
-
-            with open(tmp_path, "wb") as f:
+            
+            with open(tmp_path, 'wb') as f:
                for chunk in response.iter_content(chunk_size=8192):
                    if chunk:
                        f.write(chunk)
                        downloaded += len(chunk)
                        if progress_callback and total_size > 0:
                            progress_callback(downloaded, total_size)
-
+                            
            return tmp_path
-
-        except Exception:
+            
+        except Exception as e:
            # Clean up on error
-            if "tmp_path" in locals() and tmp_path.exists():
+            if 'tmp_path' in locals() and tmp_path.exists():
                tmp_path.unlink()
            return None
-
+    
    def create_backup(self) -> bool:
        """
        Create a backup of the current installation.
-
+        
        Returns:
            True if backup created successfully
        """
@ -256,22 +244,22 @@ class UpdateChecker:
            # Remove old backup if it exists
            if self.backup_dir.exists():
                shutil.rmtree(self.backup_dir)
-
+                
            # Create new backup
            self.backup_dir.mkdir(exist_ok=True)
-
+            
            # Copy key files and directories
            important_items = [
-                "mini_rag",
-                "rag-mini.py",
-                "rag-tui.py",
-                "requirements.txt",
-                "install_mini_rag.sh",
-                "install_windows.bat",
-                "README.md",
-                "assets",
+                'mini_rag',
+                'rag-mini.py',
+                'rag-tui.py', 
+                'requirements.txt',
+                'install_mini_rag.sh',
+                'install_windows.bat',
+                'README.md',
+                'assets'
            ]
-
+            
            for item in important_items:
                src = self.app_root / item
                if src.exists():
@ -279,20 +267,20 @@ class UpdateChecker:
                        shutil.copytree(src, self.backup_dir / item)
                    else:
                        shutil.copy2(src, self.backup_dir / item)
-
+                        
            return True
-
-        except Exception:
+            
+        except Exception as e:
            return False
-
+    
    def apply_update(self, update_package_path: Path, update_info: UpdateInfo) -> bool:
        """
        Apply the downloaded update.
-
+        
        Args:
            update_package_path: Path to the downloaded update package
            update_info: Information about the update
-
+            
        Returns:
            True if update applied successfully
        """
@ -300,140 +288,133 @@ class UpdateChecker:
            # Extract to temporary directory first
            with tempfile.TemporaryDirectory() as tmp_dir:
                tmp_path = Path(tmp_dir)
-
+                
                # Extract the archive
-                with zipfile.ZipFile(update_package_path, "r") as zip_ref:
+                with zipfile.ZipFile(update_package_path, 'r') as zip_ref:
                    zip_ref.extractall(tmp_path)
-
+                
                # Find the extracted directory (may be nested)
                extracted_dirs = [d for d in tmp_path.iterdir() if d.is_dir()]
                if not extracted_dirs:
                    return False
-
+                    
                source_dir = extracted_dirs[0]
-
+                
                # Copy files to application directory
                important_items = [
-                    "mini_rag",
-                    "rag-mini.py",
-                    "rag-tui.py",
-                    "requirements.txt",
-                    "install_mini_rag.sh",
-                    "install_windows.bat",
-                    "README.md",
+                    'mini_rag',
+                    'rag-mini.py',
+                    'rag-tui.py',
+                    'requirements.txt',
+                    'install_mini_rag.sh', 
+                    'install_windows.bat',
+                    'README.md'
                ]
-
+                
                for item in important_items:
                    src = source_dir / item
                    dst = self.app_root / item
-
+                    
                    if src.exists():
                        if dst.exists():
                            if dst.is_dir():
                                shutil.rmtree(dst)
                            else:
                                dst.unlink()
-
+                                
                        if src.is_dir():
                            shutil.copytree(src, dst)
                        else:
                            shutil.copy2(src, dst)
-
+                
                # Update version info
                self._update_version_info(update_info.version)
-
+                
                return True
-
-        except Exception:
+                
+        except Exception as e:
            return False
-
+    
    def _update_version_info(self, new_version: str):
        """Update version information in the application."""
        # Update __init__.py version
-        init_file = self.app_root / "mini_rag" / "__init__.py"
+        init_file = self.app_root / 'mini_rag' / '__init__.py'
        if init_file.exists():
            try:
                content = init_file.read_text()
                updated_content = content.replace(
                    f'__version__ = "{self.current_version}"',
-                    f'__version__ = "{new_version}"',
+                    f'__version__ = "{new_version}"'
                )
                init_file.write_text(updated_content)
            except Exception:
                pass
-
+    
    def rollback_update(self) -> bool:
        """
        Rollback to the backup version if update failed.
-
+        
        Returns:
            True if rollback successful
        """
        if not self.backup_dir.exists():
            return False
-
+            
        try:
            # Restore from backup
            for item in self.backup_dir.iterdir():
                dst = self.app_root / item.name
-
+                
                if dst.exists():
                    if dst.is_dir():
                        shutil.rmtree(dst)
                    else:
                        dst.unlink()
-
+                        
                if item.is_dir():
                    shutil.copytree(item, dst)
                else:
                    shutil.copy2(item, dst)
-
+                    
            return True
-
-        except Exception:
+            
+        except Exception as e:
            return False
-
+    
    def restart_application(self):
        """Restart the application after update."""
        try:
-            # Sanitize arguments to prevent command injection
-            safe_argv = [sys.executable]
-            for arg in sys.argv[1:]:  # Skip sys.argv[0] (script name)
-                # Only allow safe arguments - alphanumeric, dashes, dots, slashes
-                if isinstance(arg, str) and len(arg) < 200:  # Reasonable length limit
-                    # Simple whitelist of safe characters
-                    import re
-                    if re.match(r'^[a-zA-Z0-9._/-]+$', arg):
-                        safe_argv.append(arg)
-
-            # Restart with sanitized arguments
-            if sys.platform.startswith("win"):
+            # Get the current script path
+            current_script = sys.argv[0]
+            
+            # Restart with the same arguments
+            if sys.platform.startswith('win'):
                # Windows
-                subprocess.Popen(safe_argv)
+                subprocess.Popen([sys.executable] + sys.argv)
            else:
                # Unix-like systems
-                os.execv(sys.executable, safe_argv)
-
-        except Exception:
+                os.execv(sys.executable, [sys.executable] + sys.argv)
+                
+        except Exception as e:
            # If restart fails, just exit gracefully
-            print("\n✅ Update complete! Please restart the application manually.")
+            print(f"\n✅ Update complete! Please restart the application manually.")
            sys.exit(0)


 def get_legacy_notification() -> Optional[str]:
    """
    Check if this is a legacy version that needs urgent notification.
-
+    
    For users who downloaded before the auto-update system.
    """
    try:
        # Check if this is a very old version by looking for cache file
        # Old versions won't have update cache, so we can detect them
        app_root = Path(__file__).parent.parent
-        # app_root / ".update_cache.json"  # Unused variable removed
-
+        cache_file = app_root / ".update_cache.json"
+        
        # Also check version in __init__.py to see if it's old
-        init_file = app_root / "mini_rag" / "__init__.py"
+        init_file = app_root / 'mini_rag' / '__init__.py'
        if init_file.exists():
            content = init_file.read_text()
            if '__version__ = "2.0.' in content or '__version__ = "1.' in content:
@ -443,7 +424,7 @@ def get_legacy_notification() -> Optional[str]:
 Your version of FSS-Mini-RAG is missing critical updates!

 🔧 Recent improvements include:
-• Fixed LLM response formatting issues
+• Fixed LLM response formatting issues  
 • Added context window configuration
 • Improved Windows installer reliability
 • Added auto-update system (this notification!)
@ -455,28 +436,26 @@ Your version of FSS-Mini-RAG is missing critical updates!
 """
    except Exception:
        pass
-
+        
    return None


 # Global convenience functions
 _updater_instance = None

-
 def check_for_updates() -> Optional[UpdateInfo]:
    """Global function to check for updates."""
    global _updater_instance
    if _updater_instance is None:
        _updater_instance = UpdateChecker()
-
+    
    if _updater_instance.should_check_for_updates():
        return _updater_instance.check_for_updates()
    return None

-
 def get_updater() -> UpdateChecker:
    """Get the global updater instance."""
    global _updater_instance
    if _updater_instance is None:
        _updater_instance = UpdateChecker()
-    return _updater_instance
+    return _updater_instance
--- a/mini_rag/venv_checker.py
+++ b/mini_rag/venv_checker.py
@ -4,70 +4,64 @@ Virtual Environment Checker
 Ensures scripts run in proper Python virtual environment for consistency and safety.
 """

-import os
 import sys
+import os
+import sysconfig
 from pathlib import Path

-
 def is_in_virtualenv() -> bool:
    """Check if we're running in a virtual environment."""
    # Check for virtual environment indicators
    return (
-        hasattr(sys, "real_prefix")
-        or (hasattr(sys, "base_prefix") and sys.base_prefix != sys.prefix)  # virtualenv
-        or os.environ.get("VIRTUAL_ENV") is not None  # venv/pyvenv  # Environment variable
+        hasattr(sys, 'real_prefix') or  # virtualenv
+        (hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix) or  # venv/pyvenv
+        os.environ.get('VIRTUAL_ENV') is not None  # Environment variable
    )

-
 def get_expected_venv_path() -> Path:
    """Get the expected virtual environment path for this project."""
    # Assume .venv in the same directory as the script
    script_dir = Path(__file__).parent.parent
-    return script_dir / ".venv"
-
+    return script_dir / '.venv'

 def check_correct_venv() -> tuple[bool, str]:
    """
    Check if we're in the correct virtual environment.
-
+    
    Returns:
        (is_correct, message)
    """
    if not is_in_virtualenv():
        return False, "not in virtual environment"
-
+    
    expected_venv = get_expected_venv_path()
    if not expected_venv.exists():
        return False, "expected virtual environment not found"
-
-    current_venv = os.environ.get("VIRTUAL_ENV")
+    
+    current_venv = os.environ.get('VIRTUAL_ENV')
    if current_venv:
        current_venv_path = Path(current_venv).resolve()
        expected_venv_path = expected_venv.resolve()
-
+        
        if current_venv_path != expected_venv_path:
-            return (
-                False,
-                f"wrong virtual environment (using {current_venv_path}, expected {expected_venv_path})",
-            )
-
+            return False, f"wrong virtual environment (using {current_venv_path}, expected {expected_venv_path})"
+    
    return True, "correct virtual environment"

-
 def show_venv_warning(script_name: str = "script") -> None:
    """Show virtual environment warning with helpful instructions."""
    expected_venv = get_expected_venv_path()
-
+    
    print("⚠️  VIRTUAL ENVIRONMENT WARNING")
    print("=" * 50)
    print()
    print(f"This {script_name} should be run in a Python virtual environment for:")
    print("  • Consistent dependencies")
-    print("  • Isolated package versions")
+    print("  • Isolated package versions") 
    print("  • Proper security isolation")
    print("  • Reliable functionality")
    print()
-
+    
    if expected_venv.exists():
        print("✅ Virtual environment found!")
        print(f"   Location: {expected_venv}")
@ -88,7 +82,7 @@ def show_venv_warning(script_name: str = "script") -> None:
        print(f"   python3 -m venv {expected_venv}")
        print(f"   source {expected_venv}/bin/activate")
        print("   pip install -r requirements.txt")
-
+    
    print()
    print("💡 Why this matters:")
    print("   Without a virtual environment, you may experience:")
@ -98,23 +92,22 @@ def show_venv_warning(script_name: str = "script") -> None:
    print("   • Potential system-wide package pollution")
    print()

-
 def check_and_warn_venv(script_name: str = "script", force_exit: bool = False) -> bool:
    """
    Check virtual environment and warn if needed.
-
+    
    Args:
        script_name: Name of the script for user-friendly messages
        force_exit: Whether to exit if not in correct venv
-
+        
    Returns:
        True if in correct venv, False otherwise
    """
    is_correct, message = check_correct_venv()
-
+    
    if not is_correct:
        show_venv_warning(script_name)
-
+        
        if force_exit:
            print(f"⛔ Exiting {script_name} for your safety.")
            print("   Please activate the virtual environment and try again.")
@ -123,32 +116,27 @@ def check_and_warn_venv(script_name: str = "script", force_exit: bool = False) -
            print(f"⚠️  Continuing anyway, but {script_name} may not work correctly...")
            print()
            return False
-
+    
    return True

-
 def require_venv(script_name: str = "script") -> None:
    """Require virtual environment or exit."""
    check_and_warn_venv(script_name, force_exit=True)

-
 # Quick test function
-
-
 def main():
    """Test the virtual environment checker."""
    print("🧪 Virtual Environment Checker Test")
    print("=" * 40)
-
+    
    print(f"In virtual environment: {is_in_virtualenv()}")
    print(f"Expected venv path: {get_expected_venv_path()}")
-
+    
    is_correct, message = check_correct_venv()
    print(f"Correct venv: {is_correct} ({message})")
-
+    
    if not is_correct:
        show_venv_warning("test script")

-
 if __name__ == "__main__":
-    main()
+    main()
--- a/mini_rag/watcher.py
+++ b/mini_rag/watcher.py
@ -4,21 +4,14 @@ Monitors project files and updates the index incrementally.
 """

 import logging
-import queue
 import threading
+import queue
 import time
-from datetime import datetime
 from pathlib import Path
-from typing import Callable, Optional, Set
-
-from watchdog.events import (
-    FileCreatedEvent,
-    FileDeletedEvent,
-    FileModifiedEvent,
-    FileMovedEvent,
-    FileSystemEventHandler,
-)
+from typing import Set, Optional, Callable
+from datetime import datetime
 from watchdog.observers import Observer
+from watchdog.events import FileSystemEventHandler, FileModifiedEvent, FileCreatedEvent, FileDeletedEvent, FileMovedEvent

 from .indexer import ProjectIndexer

@ -27,11 +20,11 @@ logger = logging.getLogger(__name__)

 class UpdateQueue:
    """Thread-safe queue for file updates with deduplication."""
-
+    
    def __init__(self, delay: float = 1.0):
        """
        Initialize update queue.
-
+        
        Args:
            delay: Delay in seconds before processing updates (for debouncing)
        """
@ -40,24 +33,24 @@ class UpdateQueue:
        self.lock = threading.Lock()
        self.delay = delay
        self.last_update = {}  # Track last update time per file
-
+    
    def add(self, file_path: Path):
        """Add a file to the update queue."""
        with self.lock:
            file_str = str(file_path)
            current_time = time.time()
-
+            
            # Check if we should debounce this update
            if file_str in self.last_update:
                if current_time - self.last_update[file_str] < self.delay:
                    return  # Skip this update
-
+            
            self.last_update[file_str] = current_time
-
+            
            if file_str not in self.pending:
                self.pending.add(file_str)
                self.queue.put(file_path)
-
+    
    def get(self, timeout: Optional[float] = None) -> Optional[Path]:
        """Get next file from queue."""
        try:
@ -67,11 +60,11 @@ class UpdateQueue:
            return file_path
        except queue.Empty:
            return None
-
+    
    def empty(self) -> bool:
        """Check if queue is empty."""
        return self.queue.empty()
-
+    
    def size(self) -> int:
        """Get queue size."""
        return self.queue.qsize()
@ -79,17 +72,15 @@ class UpdateQueue:

 class CodeFileEventHandler(FileSystemEventHandler):
    """Handles file system events for code files."""
-
-    def __init__(
-        self,
-        update_queue: UpdateQueue,
-        include_patterns: Set[str],
-        exclude_patterns: Set[str],
-        project_path: Path,
-    ):
+    
+    def __init__(self, 
+                 update_queue: UpdateQueue,
+                 include_patterns: Set[str],
+                 exclude_patterns: Set[str],
+                 project_path: Path):
        """
        Initialize event handler.
-
+        
        Args:
            update_queue: Queue for file updates
            include_patterns: File patterns to include
@ -100,47 +91,47 @@ class CodeFileEventHandler(FileSystemEventHandler):
        self.include_patterns = include_patterns
        self.exclude_patterns = exclude_patterns
        self.project_path = project_path
-
+    
    def _should_process(self, file_path: str) -> bool:
        """Check if file should be processed."""
        path = Path(file_path)
-
+        
        # Check if it's a file (not directory)
        if not path.is_file():
            return False
-
+        
        # Check exclude patterns
        path_str = str(path)
        for pattern in self.exclude_patterns:
            if pattern in path_str:
                return False
-
+        
        # Check include patterns
        for pattern in self.include_patterns:
            if path.match(pattern):
                return True
-
+        
        return False
-
+    
    def on_modified(self, event: FileModifiedEvent):
        """Handle file modification."""
        if not event.is_directory and self._should_process(event.src_path):
            logger.debug(f"File modified: {event.src_path}")
            self.update_queue.add(Path(event.src_path))
-
+    
    def on_created(self, event: FileCreatedEvent):
        """Handle file creation."""
        if not event.is_directory and self._should_process(event.src_path):
            logger.debug(f"File created: {event.src_path}")
            self.update_queue.add(Path(event.src_path))
-
+    
    def on_deleted(self, event: FileDeletedEvent):
        """Handle file deletion."""
        if not event.is_directory and self._should_process(event.src_path):
            logger.debug(f"File deleted: {event.src_path}")
            # Add deletion task to queue (we'll handle it differently)
            self.update_queue.add(Path(event.src_path))
-
+    
    def on_moved(self, event: FileMovedEvent):
        """Handle file move/rename."""
        if not event.is_directory:
@ -154,18 +145,16 @@ class CodeFileEventHandler(FileSystemEventHandler):

 class FileWatcher:
    """Watches project files and updates index automatically."""
-
-    def __init__(
-        self,
-        project_path: Path,
-        indexer: Optional[ProjectIndexer] = None,
-        update_delay: float = 1.0,
-        batch_size: int = 10,
-        batch_timeout: float = 5.0,
-    ):
+    
+    def __init__(self, 
+                 project_path: Path,
+                 indexer: Optional[ProjectIndexer] = None,
+                 update_delay: float = 1.0,
+                 batch_size: int = 10,
+                 batch_timeout: float = 5.0):
        """
        Initialize file watcher.
-
+        
        Args:
            project_path: Path to project to watch
            indexer: ProjectIndexer instance (creates one if not provided)
@ -178,79 +167,86 @@ class FileWatcher:
        self.update_delay = update_delay
        self.batch_size = batch_size
        self.batch_timeout = batch_timeout
-
+        
        # Initialize components
        self.update_queue = UpdateQueue(delay=update_delay)
        self.observer = Observer()
        self.worker_thread = None
        self.running = False
-
+        
        # Get patterns from indexer
        self.include_patterns = set(self.indexer.include_patterns)
        self.exclude_patterns = set(self.indexer.exclude_patterns)
-
+        
        # Statistics
        self.stats = {
-            "files_updated": 0,
-            "files_failed": 0,
-            "started_at": None,
-            "last_update": None,
+            'files_updated': 0,
+            'files_failed': 0,
+            'started_at': None,
+            'last_update': None,
        }
-
+    
    def start(self):
        """Start watching for file changes."""
        if self.running:
            logger.warning("Watcher is already running")
            return
-
+        
        logger.info(f"Starting file watcher for {self.project_path}")
-
+        
        # Set up file system observer
        event_handler = CodeFileEventHandler(
            self.update_queue,
            self.include_patterns,
            self.exclude_patterns,
-            self.project_path,
+            self.project_path
        )
-
-        self.observer.schedule(event_handler, str(self.project_path), recursive=True)
-
+        
+        self.observer.schedule(
+            event_handler,
+            str(self.project_path),
+            recursive=True
+        )
+        
        # Start worker thread
        self.running = True
-        self.worker_thread = threading.Thread(target=self._process_updates, daemon=True)
+        self.worker_thread = threading.Thread(
+            target=self._process_updates,
+            daemon=True
+        )
        self.worker_thread.start()
-
+        
        # Start observer
        self.observer.start()
-
-        self.stats["started_at"] = datetime.now()
+        
+        self.stats['started_at'] = datetime.now()
        logger.info("File watcher started successfully")
-
+    
    def stop(self):
        """Stop watching for file changes."""
        if not self.running:
            return
-
+        
        logger.info("Stopping file watcher...")
-
+        
        # Stop observer
        self.observer.stop()
        self.observer.join()
-
+        
        # Stop worker thread
        self.running = False
        if self.worker_thread:
            self.worker_thread.join(timeout=5.0)
-
+        
        logger.info("File watcher stopped")
-
+    
    def _process_updates(self):
        """Worker thread that processes file updates."""
        logger.info("Update processor thread started")
-
+        
        batch = []
        batch_start_time = None
-
+        
        while self.running:
            try:
                # Calculate timeout for getting next item
@ -267,46 +263,46 @@ class FileWatcher:
                    else:
                        # Wait for more items or timeout
                        timeout = min(0.1, self.batch_timeout - elapsed)
-
+                
                # Get next file from queue
                file_path = self.update_queue.get(timeout=timeout)
-
+                
                if file_path:
                    # Add to batch
                    if not batch:
                        batch_start_time = time.time()
                    batch.append(file_path)
-
+                    
                    # Check if batch is full
                    if len(batch) >= self.batch_size:
                        self._process_batch(batch)
                        batch = []
                        batch_start_time = None
-
+                
            except queue.Empty:
                # Check if we have a pending batch that's timed out
                if batch and (time.time() - batch_start_time) >= self.batch_timeout:
                    self._process_batch(batch)
                    batch = []
                    batch_start_time = None
-
+            
            except Exception as e:
                logger.error(f"Error in update processor: {e}")
                time.sleep(1)  # Prevent tight loop on error
-
+        
        # Process any remaining items
        if batch:
            self._process_batch(batch)
-
+        
        logger.info("Update processor thread stopped")
-
+    
    def _process_batch(self, files: list[Path]):
        """Process a batch of file updates."""
        if not files:
            return
-
+        
        logger.info(f"Processing batch of {len(files)} file updates")
-
+        
        for file_path in files:
            try:
                if file_path.exists():
@ -317,91 +313,87 @@ class FileWatcher:
                    # File doesn't exist - delete from index
                    logger.debug(f"Deleting {file_path} from index - file no longer exists")
                    success = self.indexer.delete_file(file_path)
-
+                
                if success:
-                    self.stats["files_updated"] += 1
+                    self.stats['files_updated'] += 1
                else:
-                    self.stats["files_failed"] += 1
-
-                self.stats["last_update"] = datetime.now()
-
+                    self.stats['files_failed'] += 1
+                
+                self.stats['last_update'] = datetime.now()
+                
            except Exception as e:
                logger.error(f"Failed to process {file_path}: {e}")
-                self.stats["files_failed"] += 1
-
-        logger.info(
-            f"Batch processing complete. Updated: {self.stats['files_updated']}, Failed: {self.stats['files_failed']}"
-        )
-
+                self.stats['files_failed'] += 1
+        
+        logger.info(f"Batch processing complete. Updated: {self.stats['files_updated']}, Failed: {self.stats['files_failed']}")
+    
    def get_statistics(self) -> dict:
        """Get watcher statistics."""
        stats = self.stats.copy()
-        stats["queue_size"] = self.update_queue.size()
-        stats["is_running"] = self.running
-
-        if stats["started_at"]:
-            uptime = datetime.now() - stats["started_at"]
-            stats["uptime_seconds"] = uptime.total_seconds()
-
+        stats['queue_size'] = self.update_queue.size()
+        stats['is_running'] = self.running
+        
+        if stats['started_at']:
+            uptime = datetime.now() - stats['started_at']
+            stats['uptime_seconds'] = uptime.total_seconds()
+        
        return stats
-
+    
    def wait_for_updates(self, timeout: Optional[float] = None) -> bool:
        """
        Wait for pending updates to complete.
-
+        
        Args:
            timeout: Maximum time to wait in seconds
-
+            
        Returns:
            True if all updates completed, False if timeout
        """
        start_time = time.time()
-
+        
        while not self.update_queue.empty():
            if timeout and (time.time() - start_time) > timeout:
                return False
            time.sleep(0.1)
-
+        
        # Wait a bit more to ensure batch processing completes
        time.sleep(self.batch_timeout + 0.5)
        return True
-
+    
    def __enter__(self):
        """Context manager entry."""
        self.start()
        return self
-
+    
    def __exit__(self, exc_type, exc_val, exc_tb):
        """Context manager exit."""
        self.stop()


 # Convenience function
-
-
 def watch_project(project_path: Path, callback: Optional[Callable] = None):
    """
    Watch a project for changes and update index automatically.
-
+    
    Args:
        project_path: Path to project
        callback: Optional callback function called after each update
    """
    watcher = FileWatcher(project_path)
-
+    
    try:
        watcher.start()
        logger.info(f"Watching {project_path} for changes. Press Ctrl+C to stop.")
-
+        
        while True:
            time.sleep(1)
-
+            
            # Call callback if provided
            if callback:
                stats = watcher.get_statistics()
                callback(stats)
-
+            
    except KeyboardInterrupt:
        logger.info("Stopping watcher...")
    finally:
-        watcher.stop()
+        watcher.stop()
--- a/mini_rag/windows_console_fix.py
+++ b/mini_rag/windows_console_fix.py
@ -3,9 +3,9 @@ Windows Console Unicode/Emoji Fix
 Reliable Windows console Unicode/emoji support for 2025.
 """

-import io
-import os
 import sys
+import os
+import io


 def fix_windows_console():
@ -14,33 +14,28 @@ def fix_windows_console():
    Call this at the start of any script that needs to output Unicode/emojis.
    """
    # Set environment variable for UTF-8 mode
-    os.environ["PYTHONUTF8"] = "1"
-
+    os.environ['PYTHONUTF8'] = '1'
+    
    # For Python 3.7+
-    if hasattr(sys.stdout, "reconfigure"):
-        sys.stdout.reconfigure(encoding="utf-8")
-        sys.stderr.reconfigure(encoding="utf-8")
-        if hasattr(sys.stdin, "reconfigure"):
-            sys.stdin.reconfigure(encoding="utf-8")
+    if hasattr(sys.stdout, 'reconfigure'):
+        sys.stdout.reconfigure(encoding='utf-8')
+        sys.stderr.reconfigure(encoding='utf-8')
+        if hasattr(sys.stdin, 'reconfigure'):
+            sys.stdin.reconfigure(encoding='utf-8')
    else:
        # For older Python versions
-        if sys.platform == "win32":
+        if sys.platform == 'win32':
            # Replace streams with UTF-8 versions
-            sys.stdout = io.TextIOWrapper(
-                sys.stdout.buffer, encoding="utf-8", line_buffering=True
-            )
-            sys.stderr = io.TextIOWrapper(
-                sys.stderr.buffer, encoding="utf-8", line_buffering=True
-            )
-
+            sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', line_buffering=True)
+            sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', line_buffering=True)
+    
    # Also set the console code page to UTF-8 on Windows
-    if sys.platform == "win32":
+    if sys.platform == 'win32':
        import subprocess
-
        try:
            # Set console to UTF-8 code page
-            subprocess.run(["chcp", "65001"], shell=True, capture_output=True)
-        except (OSError, subprocess.SubprocessError):
+            subprocess.run(['chcp', '65001'], shell=True, capture_output=True)
+        except:
            pass


@ -49,14 +44,12 @@ fix_windows_console()


 # Test function to verify it works
-
-
 def test_emojis():
    """Test that emojis work properly."""
    print("Testing emoji output:")
    print(" Check mark")
    print(" Cross mark")
-    print(" Rocket")
+    print(" Rocket") 
    print(" Fire")
    print(" Computer")
    print(" Python")
@ -64,7 +57,7 @@ def test_emojis():
    print(" Search")
    print(" Lightning")
    print(" Sparkles")
-
+    

 if __name__ == "__main__":
-    test_emojis()
+    test_emojis()
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,61 +0,0 @@
-[tool.isort]
-profile = "black"
-line_length = 95
-multi_line_output = 3
-include_trailing_comma = true
-force_grid_wrap = 0
-use_parentheses = true
-ensure_newline_before_comments = true
-src_paths = ["mini_rag", "tests", "examples", "scripts"]
-known_first_party = ["mini_rag"]
-sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY", "LOCALFOLDER"]
-skip = [".venv", ".venv-linting", "__pycache__", ".git"]
-skip_glob = ["*.egg-info/*", "build/*", "dist/*"]
-
-[tool.black]
-line-length = 95
-target-version = ['py310']
-include = '\.pyi?$'
-extend-exclude = '''
-/(
-  # directories
-  \.eggs
-  | \.git
-  | \.hg
-  | \.mypy_cache
-  | \.tox
-  | \.venv
-  | \.venv-linting
-  | _build
-  | buck-out
-  | build
-  | dist
-)/
-'''
-
-[build-system]
-requires = ["setuptools>=61.0"]
-build-backend = "setuptools.build_meta"
-
-[project]
-name = "mini-rag"
-version = "2.1.0"
-dependencies = [
-    "lancedb>=0.5.0",
-    "pandas>=2.0.0",
-    "numpy>=1.24.0",
-    "pyarrow>=14.0.0",
-    "watchdog>=3.0.0",
-    "requests>=2.28.0",
-    "click>=8.1.0",
-    "rich>=13.0.0",
-    "PyYAML>=6.0.0",
-    "rank-bm25>=0.2.2",
-    "psutil"
-]
-
-[project.scripts]
-rag-mini = "mini_rag.cli:cli"
-
-[tool.setuptools]
-packages = ["mini_rag"]
--- a/3
+++ b/3
@ -60,7 +60,6 @@ attempt_auto_setup() {
    echo -e "${GREEN}✅ Created virtual environment${NC}" >&2
    
    # Step 2: Install dependencies
-    echo -e "${YELLOW}📦 Installing dependencies (this may take 1-2 minutes)...${NC}" >&2
    if ! "$SCRIPT_DIR/.venv/bin/pip" install -r "$SCRIPT_DIR/requirements.txt" >/dev/null 2>&1; then
        return 1  # Dependency installation failed
    fi
@ -330,7 +329,7 @@ main() {
            ;;
        "index"|"search"|"explore"|"status"|"update"|"check-update")
            # Direct CLI commands - call Python script
-            exec "$PYTHON" "$SCRIPT_DIR/bin/rag-mini.py" "$@"
+            exec "$PYTHON" "$SCRIPT_DIR/rag-mini.py" "$@"
            ;;
        *)
            # Unknown command - show help
--- a/bin/rag-mini.py
+++ b/bin/rag-mini.py
@ -6,32 +6,24 @@ A lightweight, portable RAG system for semantic code search.
 Usage: rag-mini <command> <project_path> [options]
 """

+import sys
 import argparse
+from pathlib import Path
 import json
 import logging
-import socket
-import sys
-from pathlib import Path
-
-# Add parent directory to path so we can import mini_rag
-sys.path.insert(0, str(Path(__file__).parent.parent))
-
-import requests

 # Add the RAG system to the path
 sys.path.insert(0, str(Path(__file__).parent))

 try:
-    from mini_rag.explorer import CodeExplorer
    from mini_rag.indexer import ProjectIndexer
-    from mini_rag.llm_synthesizer import LLMSynthesizer
-    from mini_rag.ollama_embeddings import OllamaEmbedder
    from mini_rag.search import CodeSearcher
-
+    from mini_rag.ollama_embeddings import OllamaEmbedder
+    from mini_rag.llm_synthesizer import LLMSynthesizer
+    from mini_rag.explorer import CodeExplorer
    # Update system (graceful import)
    try:
        from mini_rag.updater import check_for_updates, get_updater
-
        UPDATER_AVAILABLE = True
    except ImportError:
        UPDATER_AVAILABLE = False
@ -56,51 +48,50 @@ except ImportError as e:
 # Configure logging for user-friendly output
 logging.basicConfig(
    level=logging.WARNING,  # Only show warnings and errors by default
-    format="%(levelname)s: %(message)s",
+    format='%(levelname)s: %(message)s'
 )
 logger = logging.getLogger(__name__)

-
 def index_project(project_path: Path, force: bool = False):
    """Index a project directory."""
    try:
        # Show what's happening
        action = "Re-indexing" if force else "Indexing"
        print(f"🚀 {action} {project_path.name}")
-
+        
        # Quick pre-check
-        rag_dir = project_path / ".mini-rag"
+        rag_dir = project_path / '.mini-rag'
        if rag_dir.exists() and not force:
            print("   Checking for changes...")
-
+        
        indexer = ProjectIndexer(project_path)
        result = indexer.index_project(force_reindex=force)
-
+        
        # Show results with context
-        files_count = result.get("files_indexed", 0)
-        chunks_count = result.get("chunks_created", 0)
-        time_taken = result.get("time_taken", 0)
-
+        files_count = result.get('files_indexed', 0)
+        chunks_count = result.get('chunks_created', 0)
+        time_taken = result.get('time_taken', 0)
+        
        if files_count == 0:
            print("✅ Index up to date - no changes detected")
        else:
            print(f"✅ Indexed {files_count} files in {time_taken:.1f}s")
            print(f"   Created {chunks_count} chunks")
-
+            
            # Show efficiency
            if time_taken > 0:
                speed = files_count / time_taken
                print(f"   Speed: {speed:.1f} files/sec")
-
+        
        # Show warnings if any
-        failed_count = result.get("files_failed", 0)
+        failed_count = result.get('files_failed', 0)
        if failed_count > 0:
            print(f"⚠️  {failed_count} files failed (check logs with --verbose)")
-
+        
        # Quick tip for first-time users
-        if not (project_path / ".mini-rag" / "last_search").exists():
-            print(f'\n💡 Try: rag-mini search {project_path} "your search here"')
-
+        if not (project_path / '.mini-rag' / 'last_search').exists():
+            print(f"\n💡 Try: rag-mini search {project_path} \"your search here\"")
+            
    except FileNotFoundError:
        print(f"📁 Directory Not Found: {project_path}")
        print("   Make sure the path exists and you're in the right location")
@ -119,7 +110,7 @@ def index_project(project_path: Path, force: bool = False):
        # Connection errors are handled in the embedding module
        if "ollama" in str(e).lower() or "connection" in str(e).lower():
            sys.exit(1)  # Error already displayed
-
+            
        print(f"❌ Indexing failed: {e}")
        print()
        print("🔧 Common solutions:")
@ -133,44 +124,39 @@ def index_project(project_path: Path, force: bool = False):
        print("   Or see: docs/TROUBLESHOOTING.md")
        sys.exit(1)

-
 def search_project(project_path: Path, query: str, top_k: int = 10, synthesize: bool = False):
    """Search a project directory."""
    try:
        # Check if indexed first
-        rag_dir = project_path / ".mini-rag"
+        rag_dir = project_path / '.mini-rag'
        if not rag_dir.exists():
            print(f"❌ Project not indexed: {project_path.name}")
            print(f"   Run: rag-mini index {project_path}")
            sys.exit(1)
-
-        print(f'🔍 Searching "{query}" in {project_path.name}')
+        
+        print(f"🔍 Searching \"{query}\" in {project_path.name}")
        searcher = CodeSearcher(project_path)
        results = searcher.search(query, top_k=top_k)
-
+        
        if not results:
            print("❌ No results found")
            print()
            print("🔧 Quick fixes to try:")
-            print('   • Use broader terms: "login" instead of "authenticate_user_session"')
-            print('   • Try concepts: "database query" instead of specific function names')
+            print("   • Use broader terms: \"login\" instead of \"authenticate_user_session\"")
+            print("   • Try concepts: \"database query\" instead of specific function names")
            print("   • Check spelling and try simpler words")
-            print('   • Search for file types: "python class" or "javascript function"')
+            print("   • Search for file types: \"python class\" or \"javascript function\"")
            print()
            print("⚙️ Configuration adjustments:")
-            print(
-                f'   • Lower threshold: ./rag-mini search "{project_path}" "{query}" --threshold 0.05'
-            )
-            print(
-                f'   • More results: ./rag-mini search "{project_path}" "{query}" --top-k 20'
-            )
+            print(f"   • Lower threshold: ./rag-mini search \"{project_path}\" \"{query}\" --threshold 0.05")
+            print(f"   • More results: ./rag-mini search \"{project_path}\" \"{query}\" --top-k 20")
            print()
            print("📚 Need help? See: docs/TROUBLESHOOTING.md")
            return
-
+            
        print(f"✅ Found {len(results)} results:")
        print()
-
+        
        for i, result in enumerate(results, 1):
            # Clean up file path display
            file_path = Path(result.file_path)
@ -179,89 +165,61 @@ def search_project(project_path: Path, query: str, top_k: int = 10, synthesize:
            except ValueError:
                # If relative_to fails, just show the basename
                rel_path = file_path.name
-
+            
            print(f"{i}. {rel_path}")
            print(f"   Score: {result.score:.3f}")
-
+            
            # Show line info if available
-            if hasattr(result, "start_line") and result.start_line:
+            if hasattr(result, 'start_line') and result.start_line:
                print(f"   Lines: {result.start_line}-{result.end_line}")
-
-            # Show content preview
-            if hasattr(result, "name") and result.name:
+            
+            # Show content preview  
+            if hasattr(result, 'name') and result.name:
                print(f"   Context: {result.name}")
-
+            
            # Show full content with proper formatting
-            print("   Content:")
-            content_lines = result.content.strip().split("\n")
+            print(f"   Content:")
+            content_lines = result.content.strip().split('\n')
            for line in content_lines[:10]:  # Show up to 10 lines
                print(f"     {line}")
-
+            
            if len(content_lines) > 10:
                print(f"     ... ({len(content_lines) - 10} more lines)")
-                print("     Use --verbose or rag-mini-enhanced for full context")
-
+                print(f"     Use --verbose or rag-mini-enhanced for full context")
+            
            print()
-
+        
        # LLM Synthesis if requested
        if synthesize:
            print("🧠 Generating LLM synthesis...")
-
-            # Load config to respect user's model preferences
-            from mini_rag.config import ConfigManager
-
-            config_manager = ConfigManager(project_path)
-            config = config_manager.load_config()
-
-            synthesizer = LLMSynthesizer(
-                model=(
-                    config.llm.synthesis_model
-                    if config.llm.synthesis_model != "auto"
-                    else None
-                ),
-                config=config,
-            )
-
+            synthesizer = LLMSynthesizer()
+            
            if synthesizer.is_available():
                synthesis = synthesizer.synthesize_search_results(query, results, project_path)
                print()
                print(synthesizer.format_synthesis_output(synthesis, query))
-
+                
                # Add guidance for deeper analysis
-                if synthesis.confidence < 0.7 or any(
-                    word in query.lower() for word in ["why", "how", "explain", "debug"]
-                ):
+                if synthesis.confidence < 0.7 or any(word in query.lower() for word in ['why', 'how', 'explain', 'debug']):
                    print("\n💡 Want deeper analysis with reasoning?")
                    print(f"   Try: rag-mini explore {project_path}")
-                    print(
-                        "   Exploration mode enables thinking and remembers conversation context."
-                    )
+                    print("   Exploration mode enables thinking and remembers conversation context.")
            else:
                print("❌ LLM synthesis unavailable")
                print("   • Ensure Ollama is running: ollama serve")
                print("   • Install a model: ollama pull qwen3:1.7b")
                print("   • Check connection to http://localhost:11434")
-
+        
        # Save last search for potential enhancements
        try:
-            (rag_dir / "last_search").write_text(query)
-        except (
-            ConnectionError,
-            FileNotFoundError,
-            IOError,
-            OSError,
-            TimeoutError,
-            TypeError,
-            ValueError,
-            requests.RequestException,
-            socket.error,
-        ):
+            (rag_dir / 'last_search').write_text(query)
+        except:
            pass  # Don't fail if we can't save
-
+            
    except Exception as e:
        print(f"❌ Search failed: {e}")
        print()
-
+        
        if "not indexed" in str(e).lower():
            print("🔧 Solution:")
            print(f"   ./rag-mini index {project_path}")
@ -274,45 +232,44 @@ def search_project(project_path: Path, query: str, top_k: int = 10, synthesize:
            print("   • Check available memory and disk space")
            print()
            print("📚 Get detailed error info:")
-            print(f'   ./rag-mini search {project_path} "{query}" --verbose')
+            print(f"   ./rag-mini search {project_path} \"{query}\" --verbose")
            print("   Or see: docs/TROUBLESHOOTING.md")
            print()
        sys.exit(1)

-
 def status_check(project_path: Path):
    """Show status of RAG system."""
    try:
        print(f"📊 Status for {project_path.name}")
        print()
-
+        
        # Check project indexing status first
-        rag_dir = project_path / ".mini-rag"
+        rag_dir = project_path / '.mini-rag'
        if not rag_dir.exists():
            print("❌ Project not indexed")
            print(f"   Run: rag-mini index {project_path}")
            print()
        else:
-            manifest = rag_dir / "manifest.json"
+            manifest = rag_dir / 'manifest.json'
            if manifest.exists():
                try:
                    with open(manifest) as f:
                        data = json.load(f)
-
-                    file_count = data.get("file_count", 0)
-                    chunk_count = data.get("chunk_count", 0)
-                    indexed_at = data.get("indexed_at", "Never")
-
+                    
+                    file_count = data.get('file_count', 0)
+                    chunk_count = data.get('chunk_count', 0)
+                    indexed_at = data.get('indexed_at', 'Never')
+                    
                    print("✅ Project indexed")
                    print(f"   Files: {file_count}")
                    print(f"   Chunks: {chunk_count}")
                    print(f"   Last update: {indexed_at}")
-
+                    
                    # Show average chunks per file
                    if file_count > 0:
                        avg_chunks = chunk_count / file_count
                        print(f"   Avg chunks/file: {avg_chunks:.1f}")
-
+                    
                    print()
                except Exception:
                    print("⚠️  Index exists but manifest unreadable")
@ -321,166 +278,51 @@ def status_check(project_path: Path):
                print("⚠️  Index directory exists but incomplete")
                print(f"   Try: rag-mini index {project_path} --force")
                print()
-
+        
        # Check embedding system status
        print("🧠 Embedding System:")
        try:
            embedder = OllamaEmbedder()
            emb_info = embedder.get_status()
-            method = emb_info.get("method", "unknown")
-
-            if method == "ollama":
+            method = emb_info.get('method', 'unknown')
+            
+            if method == 'ollama':
                print("   ✅ Ollama (high quality)")
-            elif method == "ml":
+            elif method == 'ml':
                print("   ✅ ML fallback (good quality)")
-            elif method == "hash":
+            elif method == 'hash':
                print("   ⚠️  Hash fallback (basic quality)")
            else:
                print(f"   ❓ Unknown method: {method}")
-
+                
            # Show additional details if available
-            if "model" in emb_info:
+            if 'model' in emb_info:
                print(f"   Model: {emb_info['model']}")
-
+                
        except Exception as e:
            print(f"   ❌ Status check failed: {e}")
-
-        print()
-
-        # Check LLM status and show actual vs configured model
-        print("🤖 LLM System:")
-        try:
-            from mini_rag.config import ConfigManager
-
-            config_manager = ConfigManager(project_path)
-            config = config_manager.load_config()
-
-            synthesizer = LLMSynthesizer(
-                model=(
-                    config.llm.synthesis_model
-                    if config.llm.synthesis_model != "auto"
-                    else None
-                ),
-                config=config,
-            )
-
-            if synthesizer.is_available():
-                synthesizer._ensure_initialized()
-                actual_model = synthesizer.model
-                config_model = config.llm.synthesis_model
-
-                if config_model == "auto":
-                    print(f"   ✅ Auto-selected: {actual_model}")
-                elif config_model == actual_model:
-                    print(f"   ✅ Using configured: {actual_model}")
-                else:
-                    print("   ⚠️  Model mismatch!")
-                    print(f"   Configured: {config_model}")
-                    print(f"   Actually using: {actual_model}")
-                    print("   (Configured model may not be installed)")
-
-                print(f"   Config file: {config_manager.config_path}")
-            else:
-                print("   ❌ Ollama not available")
-                print("   Start with: ollama serve")
-
-        except Exception as e:
-            print(f"   ❌ LLM status check failed: {e}")
-
+            
        # Show last search if available
-        last_search_file = rag_dir / "last_search" if rag_dir.exists() else None
+        last_search_file = rag_dir / 'last_search' if rag_dir.exists() else None
        if last_search_file and last_search_file.exists():
            try:
                last_query = last_search_file.read_text().strip()
-                print(f'\n🔍 Last search: "{last_query}"')
-            except (FileNotFoundError, IOError, OSError, TypeError, ValueError):
+                print(f"\n🔍 Last search: \"{last_query}\"")
+            except:
                pass
-
+            
    except Exception as e:
        print(f"❌ Status check failed: {e}")
        sys.exit(1)

-
-def show_model_status(project_path: Path):
-    """Show detailed model status and selection information."""
-    from mini_rag.config import ConfigManager
-    
-    print("🤖 Model Status Report")
-    print("=" * 50)
-    
-    try:
-        # Load config
-        config_manager = ConfigManager()
-        config = config_manager.load_config(project_path)
-        
-        # Create LLM synthesizer to check models
-        synthesizer = LLMSynthesizer(model=config.llm.synthesis_model, config=config)
-        
-        # Show configured model
-        print(f"📋 Configured model: {config.llm.synthesis_model}")
-        
-        # Show available models
-        available_models = synthesizer.available_models
-        if available_models:
-            print(f"\n📦 Available models ({len(available_models)}):")
-            
-            # Group models by series
-            qwen3_models = [m for m in available_models if m.startswith('qwen3:')]
-            qwen25_models = [m for m in available_models if m.startswith('qwen2.5')]
-            other_models = [m for m in available_models if not (m.startswith('qwen3:') or m.startswith('qwen2.5'))]
-            
-            if qwen3_models:
-                print("   🟢 Qwen3 series (recommended):")
-                for model in qwen3_models:
-                    is_selected = synthesizer._resolve_model_name(config.llm.synthesis_model) == model
-                    marker = "  ✅" if is_selected else "    "
-                    print(f"{marker} {model}")
-            
-            if qwen25_models:
-                print("   🟡 Qwen2.5 series:")
-                for model in qwen25_models:
-                    is_selected = synthesizer._resolve_model_name(config.llm.synthesis_model) == model
-                    marker = "  ✅" if is_selected else "    "
-                    print(f"{marker} {model}")
-                    
-            if other_models:
-                print("   🔵 Other models:")
-                for model in other_models[:10]:  # Limit to first 10
-                    is_selected = synthesizer._resolve_model_name(config.llm.synthesis_model) == model
-                    marker = "  ✅" if is_selected else "    "
-                    print(f"{marker} {model}")
-        else:
-            print("\n❌ No models available from Ollama")
-            print("   Make sure Ollama is running: ollama serve")
-            print("   Install models with: ollama pull qwen3:4b")
-            
-        # Show resolution result
-        resolved_model = synthesizer._resolve_model_name(config.llm.synthesis_model)
-        if resolved_model:
-            if resolved_model != config.llm.synthesis_model:
-                print(f"\n🔄 Model resolution: {config.llm.synthesis_model} -> {resolved_model}")
-            else:
-                print(f"\n✅ Using exact model match: {resolved_model}")
-        else:
-            print(f"\n❌ Model '{config.llm.synthesis_model}' not found!")
-            print("   Consider changing your model in the config file")
-            
-        print(f"\n📄 Config file: {config_manager.config_path}")
-        print("   Edit this file to change your model preference")
-        
-    except Exception as e:
-        print(f"❌ Model status check failed: {e}")
-        sys.exit(1)
-
-
 def explore_interactive(project_path: Path):
    """Interactive exploration mode with thinking and context memory for any documents."""
    try:
        explorer = CodeExplorer(project_path)
-
+        
        if not explorer.start_exploration_session():
            sys.exit(1)
-
+        
        # Show enhanced first-time guidance
        print(f"\n🤔 Ask your first question about {project_path.name}:")
        print()
@ -489,12 +331,12 @@ def explore_interactive(project_path: Path):
        print()
        print("🔧 Quick options:")
        print("   1. Help - Show example questions")
-        print("   2. Status - Project information")
+        print("   2. Status - Project information")  
        print("   3. Suggest - Get a random starter question")
        print()
-
+        
        is_first_question = True
-
+        
        while True:
            try:
                # Get user input with clearer prompt
@ -502,12 +344,12 @@ def explore_interactive(project_path: Path):
                    question = input("📝 Enter question or option (1-3): ").strip()
                else:
                    question = input("\n> ").strip()
-
+                
                # Handle exit commands
-                if question.lower() in ["quit", "exit", "q"]:
+                if question.lower() in ['quit', 'exit', 'q']:
                    print("\n" + explorer.end_session())
                    break
-
+                
                # Handle empty input
                if not question:
                    if is_first_question:
@ -515,18 +357,17 @@ def explore_interactive(project_path: Path):
                    else:
                        print("Please enter a question or 'quit' to exit.")
                    continue
-
+                
                # Handle numbered options and special commands
-                if question in ["1"] or question.lower() in ["help", "h"]:
-                    print(
-                        """
+                if question in ['1'] or question.lower() in ['help', 'h']:
+                    print("""
 🧠 EXPLORATION MODE HELP:
  • Ask any question about your documents or code
  • I remember our conversation for follow-up questions
  • Use 'why', 'how', 'explain' for detailed reasoning
  • Type 'summary' to see session overview
  • Type 'quit' or 'exit' to end session
-
+  
 💡 Example questions:
  • "How does authentication work?"
  • "What are the main components?"
@ -534,40 +375,36 @@ def explore_interactive(project_path: Path):
  • "Why is this function slow?"
  • "What security measures are in place?"
  • "How does data flow through this system?"
-"""
-                    )
+""")
                    continue
-
-                elif question in ["2"] or question.lower() == "status":
-                    print(
-                        """
+                    
+                elif question in ['2'] or question.lower() == 'status':
+                    print(f"""
 📊 PROJECT STATUS: {project_path.name}
  • Location: {project_path}
  • Exploration session active
  • AI model ready for questions
  • Conversation memory enabled
-"""
-                    )
+""")
                    continue
-
-                elif question in ["3"] or question.lower() == "suggest":
+                    
+                elif question in ['3'] or question.lower() == 'suggest':
                    # Random starter questions for first-time users
                    if is_first_question:
                        import random
-
                        starters = [
                            "What are the main components of this project?",
-                            "How is error handling implemented?",
+                            "How is error handling implemented?", 
                            "Show me the authentication and security logic",
                            "What are the key functions I should understand first?",
                            "How does data flow through this system?",
                            "What configuration options are available?",
-                            "Show me the most important files to understand",
+                            "Show me the most important files to understand"
                        ]
                        suggested = random.choice(starters)
                        print(f"\n💡 Suggested question: {suggested}")
                        print("   Press Enter to use this, or type your own question:")
-
+                        
                        next_input = input("📝 > ").strip()
                        if not next_input:  # User pressed Enter to use suggestion
                            question = suggested
@ -580,24 +417,24 @@ def explore_interactive(project_path: Path):
                        print('   "What are the security implications?"')
                        print('   "Show me related code examples"')
                        continue
-
-                if question.lower() == "summary":
+                
+                if question.lower() == 'summary':
                    print("\n" + explorer.get_session_summary())
                    continue
-
+                
                # Process the question
                print(f"\n🔍 Searching {project_path.name}...")
                print("🧠 Thinking with AI model...")
                response = explorer.explore_question(question)
-
+                
                # Mark as no longer first question after processing
                is_first_question = False
-
+                
                if response:
                    print(f"\n{response}")
                else:
                    print("❌ Sorry, I couldn't process that question. Please try again.")
-
+                
            except KeyboardInterrupt:
                print(f"\n\n{explorer.end_session()}")
                break
@ -607,94 +444,88 @@ def explore_interactive(project_path: Path):
            except Exception as e:
                print(f"❌ Error processing question: {e}")
                print("Please try again or type 'quit' to exit.")
-
+        
    except Exception as e:
        print(f"❌ Failed to start exploration mode: {e}")
        print("Make sure the project is indexed first: rag-mini index <project>")
        sys.exit(1)

-
 def show_discrete_update_notice():
    """Show a discrete, non-intrusive update notice for CLI users."""
    if not UPDATER_AVAILABLE:
        return
-
+        
    try:
        update_info = check_for_updates()
        if update_info:
            # Very discrete notice - just one line
-            print(
-                f"🔄 (Update v{update_info.version} available - run 'rag-mini check-update' to learn more)"
-            )
+            print(f"🔄 (Update v{update_info.version} available - run 'rag-mini check-update' to learn more)")
    except Exception:
        # Silently ignore any update check failures
        pass

-
 def handle_check_update():
    """Handle the check-update command."""
    if not UPDATER_AVAILABLE:
        print("❌ Update system not available")
        print("💡 Try updating to the latest version manually from GitHub")
        return
-
+        
    try:
        print("🔍 Checking for updates...")
        update_info = check_for_updates()
-
+        
        if update_info:
            print(f"\n🎉 Update Available: v{update_info.version}")
            print("=" * 50)
            print("\n📋 What's New:")
-            notes_lines = update_info.release_notes.split("\n")[:10]  # First 10 lines
+            notes_lines = update_info.release_notes.split('\n')[:10]  # First 10 lines
            for line in notes_lines:
                if line.strip():
                    print(f"   {line.strip()}")
-
+            
            print(f"\n🔗 Release Page: {update_info.release_url}")
-            print("\n🚀 To install: rag-mini update")
+            print(f"\n🚀 To install: rag-mini update")
            print("💡 Or update manually from GitHub releases")
        else:
            print("✅ You're already on the latest version!")
-
+            
    except Exception as e:
        print(f"❌ Failed to check for updates: {e}")
        print("💡 Try updating manually from GitHub")

-
 def handle_update():
    """Handle the update command."""
    if not UPDATER_AVAILABLE:
        print("❌ Update system not available")
        print("💡 Try updating manually from GitHub")
        return
-
+        
    try:
        print("🔍 Checking for updates...")
        update_info = check_for_updates()
-
+        
        if not update_info:
            print("✅ You're already on the latest version!")
            return
-
+            
        print(f"\n🎉 Update Available: v{update_info.version}")
        print("=" * 50)
-
+        
        # Show brief release notes
-        notes_lines = update_info.release_notes.split("\n")[:5]
+        notes_lines = update_info.release_notes.split('\n')[:5]
        for line in notes_lines:
            if line.strip():
                print(f"   • {line.strip()}")
-
+        
        # Confirm update
        confirm = input(f"\n🚀 Install v{update_info.version}? [Y/n]: ").strip().lower()
-        if confirm in ["", "y", "yes"]:
+        if confirm in ['', 'y', 'yes']:
            updater = get_updater()
-
+            
            print(f"\n📥 Downloading v{update_info.version}...")
-
+            
            # Progress callback
-
            def show_progress(downloaded, total):
                if total > 0:
                    percent = (downloaded / total) * 100
@ -702,17 +533,17 @@ def handle_update():
                    filled = int(bar_length * downloaded / total)
                    bar = "█" * filled + "░" * (bar_length - filled)
                    print(f"\r   [{bar}] {percent:.1f}%", end="", flush=True)
-
+            
            # Download and install
            update_package = updater.download_update(update_info, show_progress)
            if not update_package:
                print("\n❌ Download failed. Please try again later.")
                return
-
+                
            print("\n💾 Creating backup...")
            if not updater.create_backup():
                print("⚠️ Backup failed, but continuing anyway...")
-
+                
            print("🔄 Installing update...")
            if updater.apply_update(update_package, update_info):
                print("✅ Update successful!")
@ -727,111 +558,91 @@ def handle_update():
                    print("❌ Rollback failed. You may need to reinstall.")
        else:
            print("Update cancelled.")
-
+            
    except Exception as e:
        print(f"❌ Update failed: {e}")
        print("💡 Try updating manually from GitHub")

-
 def main():
    """Main CLI interface."""
    # Check virtual environment
    try:
        from mini_rag.venv_checker import check_and_warn_venv
-
        check_and_warn_venv("rag-mini.py", force_exit=False)
    except ImportError:
        pass  # If venv checker can't be imported, continue anyway
-
+    
    parser = argparse.ArgumentParser(
        description="FSS-Mini-RAG - Lightweight semantic code search",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
 Examples:
  rag-mini index /path/to/project              # Index a project
-  rag-mini search /path/to/project "query"     # Search indexed project
+  rag-mini search /path/to/project "query"     # Search indexed project  
  rag-mini search /path/to/project "query" -s  # Search with LLM synthesis
  rag-mini explore /path/to/project            # Interactive exploration mode
  rag-mini status /path/to/project             # Show status
-  rag-mini models /path/to/project             # Show model status and selection
-        """,
+        """
    )
-
-    parser.add_argument(
-        "command",
-        choices=["index", "search", "explore", "status", "models", "update", "check-update"],
-        help="Command to execute",
-    )
-    parser.add_argument(
-        "project_path",
-        type=Path,
-        nargs="?",
-        help="Path to project directory (REQUIRED except for update commands)",
-    )
-    parser.add_argument("query", nargs="?", help="Search query (for search command)")
-    parser.add_argument("--force", action="store_true", help="Force reindex all files")
-    parser.add_argument(
-        "--top-k",
-        "--limit",
-        type=int,
-        default=10,
-        dest="top_k",
-        help="Maximum number of search results (top-k)",
-    )
-    parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose logging")
-    parser.add_argument(
-        "--synthesize",
-        "-s",
-        action="store_true",
-        help="Generate LLM synthesis of search results (requires Ollama)",
-    )
-
+    
+    parser.add_argument('command', choices=['index', 'search', 'explore', 'status', 'update', 'check-update'],
+                       help='Command to execute')
+    parser.add_argument('project_path', type=Path, nargs='?',
+                       help='Path to project directory (REQUIRED except for update commands)')
+    parser.add_argument('query', nargs='?',
+                       help='Search query (for search command)')
+    parser.add_argument('--force', action='store_true',
+                       help='Force reindex all files')
+    parser.add_argument('--top-k', '--limit', type=int, default=10, dest='top_k',
+                       help='Maximum number of search results (top-k)')
+    parser.add_argument('--verbose', '-v', action='store_true',
+                       help='Enable verbose logging')
+    parser.add_argument('--synthesize', '-s', action='store_true',
+                       help='Generate LLM synthesis of search results (requires Ollama)')
+    
    args = parser.parse_args()
-
+    
    # Set logging level
    if args.verbose:
        logging.getLogger().setLevel(logging.INFO)
-
+    
    # Handle update commands first (don't require project_path)
-    if args.command == "check-update":
+    if args.command == 'check-update':
        handle_check_update()
        return
-    elif args.command == "update":
+    elif args.command == 'update':
        handle_update()
        return
-
+    
    # All other commands require project_path
    if not args.project_path:
        print("❌ Project path required for this command")
        sys.exit(1)
-
+    
    # Validate project path
    if not args.project_path.exists():
        print(f"❌ Project path does not exist: {args.project_path}")
        sys.exit(1)
-
+        
    if not args.project_path.is_dir():
        print(f"❌ Project path is not a directory: {args.project_path}")
        sys.exit(1)
-
+    
    # Show discrete update notification for regular commands (non-intrusive)
    show_discrete_update_notice()
-
+    
    # Execute command
-    if args.command == "index":
+    if args.command == 'index':
        index_project(args.project_path, args.force)
-    elif args.command == "search":
+    elif args.command == 'search':
        if not args.query:
            print("❌ Search query required")
            sys.exit(1)
        search_project(args.project_path, args.query, args.top_k, args.synthesize)
-    elif args.command == "explore":
+    elif args.command == 'explore':
        explore_interactive(args.project_path)
-    elif args.command == "status":
+    elif args.command == 'status':
        status_check(args.project_path)
-    elif args.command == "models":
-        show_model_status(args.project_path)

-
-if __name__ == "__main__":
-    main()
+if __name__ == '__main__':
+    main()
--- a/2
+++ b/2
@ -19,4 +19,4 @@ if [ ! -f "$PYTHON" ]; then
 fi

 # Launch TUI
-exec "$PYTHON" "$SCRIPT_DIR/bin/rag-tui.py" "$@"
+exec "$PYTHON" "$SCRIPT_DIR/rag-tui.py" "$@"
--- a/bin/rag-tui.py
+++ b/bin/rag-tui.py
--- a/scripts/setup-github-template.py
+++ b/scripts/setup-github-template.py
@ -6,67 +6,67 @@ Converts a project to use the auto-update template system.
 This script helps migrate projects from Gitea to GitHub with auto-update capability.
 """

-import argparse
+import os
+import sys
 import json
 import shutil
-import sys
+import argparse
 from pathlib import Path
-from typing import Dict, Optional
-
+from typing import Dict, Any, Optional

 def setup_project_template(
    project_path: Path,
    repo_owner: str,
    repo_name: str,
    project_type: str = "python",
-    include_auto_update: bool = True,
+    include_auto_update: bool = True
 ) -> bool:
    """
    Setup a project to use the GitHub auto-update template system.
-
+    
    Args:
        project_path: Path to the project directory
        repo_owner: GitHub username/organization
-        repo_name: GitHub repository name
+        repo_name: GitHub repository name  
        project_type: Type of project (python, general)
        include_auto_update: Whether to include auto-update system
-
+        
    Returns:
        True if setup successful
    """
-
+    
    print(f"🚀 Setting up GitHub template for: {repo_owner}/{repo_name}")
    print(f"📁 Project path: {project_path}")
    print(f"🔧 Project type: {project_type}")
    print(f"🔄 Auto-update: {'Enabled' if include_auto_update else 'Disabled'}")
    print()
-
+    
    try:
        # Create .github directory structure
        github_dir = project_path / ".github"
        workflows_dir = github_dir / "workflows"
        templates_dir = github_dir / "ISSUE_TEMPLATE"
-
+        
        # Ensure directories exist
        workflows_dir.mkdir(parents=True, exist_ok=True)
        templates_dir.mkdir(parents=True, exist_ok=True)
-
+        
        # 1. Setup GitHub Actions workflows
        setup_workflows(workflows_dir, repo_owner, repo_name, project_type)
-
+        
        # 2. Setup auto-update system if requested
        if include_auto_update:
            setup_auto_update_system(project_path, repo_owner, repo_name)
-
+            
        # 3. Create issue templates
        setup_issue_templates(templates_dir)
-
+        
        # 4. Create/update project configuration
        setup_project_config(project_path, repo_owner, repo_name, include_auto_update)
-
+        
        # 5. Create README template if needed
        setup_readme_template(project_path, repo_owner, repo_name)
-
+        
        print("✅ GitHub template setup completed successfully!")
        print()
        print("📋 Next Steps:")
@ -75,21 +75,20 @@ def setup_project_template(
        print("3. Test auto-update system: ./project check-update")
        print("4. Enable GitHub Pages for documentation (optional)")
        print()
-
+        
        return True
-
+        
    except Exception as e:
        print(f"❌ Setup failed: {e}")
        return False

-
 def setup_workflows(workflows_dir: Path, repo_owner: str, repo_name: str, project_type: str):
    """Setup GitHub Actions workflow files."""
-
+    
    print("🔧 Setting up GitHub Actions workflows...")
-
+    
    # Release workflow
-    release_workflow = """name: Auto Release & Update System
+    release_workflow = f"""name: Auto Release & Update System
 on:
  push:
    tags:
@ -106,18 +105,18 @@ jobs:
    runs-on: ubuntu-latest
    permissions:
      contents: write
-
+      
    steps:
    - name: Checkout code
      uses: actions/checkout@v4
      with:
        fetch-depth: 0
-
+        
    - name: Set up Python
      uses: actions/setup-python@v5
      with:
        python-version: '3.11'
-
+        
    - name: Extract version
      id: version
      run: |
@ -128,18 +127,18 @@ jobs:
        fi
        echo "version=$VERSION" >> $GITHUB_OUTPUT
        echo "clean_version=${{VERSION#v}}" >> $GITHUB_OUTPUT
-
+        
    - name: Update version in code
      run: |
        VERSION="${{{{ steps.version.outputs.clean_version }}}}"
        # Update version files
        find . -name "__init__.py" -exec sed -i 's/__version__ = ".*"/__version__ = "'$VERSION'"/' {{}} +
-
+        
    - name: Generate release notes
      id: release_notes
      run: |
        VERSION="${{{{ steps.version.outputs.version }}}}"
-
+        
        # Get commits since last tag
        LAST_TAG=$(git describe --tags --abbrev=0 HEAD~1 2>/dev/null || echo "")
        if [ -n "$LAST_TAG" ]; then
@ -147,28 +146,28 @@ jobs:
        else
          COMMITS=$(git log --oneline --pretty=format:"• %s" | head -10)
        fi
-
+        
        # Create release notes
        cat > release_notes.md << EOF
        ## What's New in $VERSION
-
+        
        ### 🚀 Changes
        $COMMITS
-
+        
        ### 📥 Installation
        Download and install the latest version:
-        ```bash
+        \`\`\`bash
        curl -sSL https://github.com/{repo_owner}/{repo_name}/releases/latest/download/install.sh | bash
-        ```
-
+        \`\`\`
+        
        ### 🔄 Auto-Update
        If you have auto-update support:
-        ```bash
+        \`\`\`bash
        ./{repo_name} check-update
        ./{repo_name} update
-        ```
+        \`\`\`
        EOF
-
+        
    - name: Create GitHub Release
      uses: softprops/action-gh-release@v2
      with:
@ -182,12 +181,12 @@ jobs:
          *.bat
          requirements.txt
 """
-
+    
    (workflows_dir / "release.yml").write_text(release_workflow)
-
+    
    # CI workflow for Python projects
    if project_type == "python":
-        ci_workflow = """name: CI/CD Pipeline
+        ci_workflow = f"""name: CI/CD Pipeline
 on:
  push:
    branches: [ main, develop ]
@ -202,25 +201,25 @@ jobs:
      matrix:
        os: [ubuntu-latest, windows-latest, macos-latest]
        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
-
+    
    steps:
    - name: Checkout code
      uses: actions/checkout@v4
-
+      
    - name: Set up Python ${{{{ matrix.python-version }}}}
      uses: actions/setup-python@v5
      with:
        python-version: ${{{{ matrix.python-version }}}}
-
+        
    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        pip install -r requirements.txt
-
+        
    - name: Run tests
      run: |
        python -c "import {repo_name.replace('-', '_')}; print('✅ Import successful')"
-
+        
    - name: Test auto-update system
      run: |
        python -c "
@ -232,38 +231,33 @@ jobs:
        "
 """
        (workflows_dir / "ci.yml").write_text(ci_workflow)
-
+    
    print("  ✅ GitHub Actions workflows created")

-
 def setup_auto_update_system(project_path: Path, repo_owner: str, repo_name: str):
    """Setup the auto-update system for the project."""
-
+    
    print("🔄 Setting up auto-update system...")
-
+    
    # Copy updater.py from FSS-Mini-RAG as template
    template_updater = Path(__file__).parent.parent / "mini_rag" / "updater.py"
-
+    
    if template_updater.exists():
        # Create project module directory if needed
-        module_name = repo_name.replace("-", "_")
+        module_name = repo_name.replace('-', '_')
        module_dir = project_path / module_name
        module_dir.mkdir(exist_ok=True)
-
+        
        # Copy and customize updater
        target_updater = module_dir / "updater.py"
        shutil.copy2(template_updater, target_updater)
-
+        
        # Customize for this project
        content = target_updater.read_text()
-        content = content.replace(
-            'repo_owner: str = "FSSCoding"', f'repo_owner: str = "{repo_owner}"'
-        )
-        content = content.replace(
-            'repo_name: str = "Fss-Mini-Rag"', f'repo_name: str = "{repo_name}"'
-        )
+        content = content.replace('repo_owner: str = "FSSCoding"', f'repo_owner: str = "{repo_owner}"')
+        content = content.replace('repo_name: str = "Fss-Mini-Rag"', f'repo_name: str = "{repo_name}"')
        target_updater.write_text(content)
-
+        
        # Update __init__.py to include updater
        init_file = module_dir / "__init__.py"
        if init_file.exists():
@ -278,17 +272,16 @@ except ImportError:
    pass
 """
                init_file.write_text(content)
-
+        
        print("  ✅ Auto-update system configured")
    else:
        print("  ⚠️ Template updater not found, you'll need to implement manually")

-
 def setup_issue_templates(templates_dir: Path):
    """Setup GitHub issue templates."""
-
+    
    print("📝 Setting up issue templates...")
-
+    
    bug_template = """---
 name: Bug Report
 about: Create a report to help us improve
@ -319,7 +312,7 @@ A clear and concise description of what you expected to happen.
 **Additional context**
 Add any other context about the problem here.
 """
-
+    
    feature_template = """---
 name: Feature Request
 about: Suggest an idea for this project
@ -341,50 +334,46 @@ A clear and concise description of any alternative solutions you've considered.
 **Additional context**
 Add any other context or screenshots about the feature request here.
 """
-
+    
    (templates_dir / "bug_report.md").write_text(bug_template)
    (templates_dir / "feature_request.md").write_text(feature_template)
-
+    
    print("  ✅ Issue templates created")

-
-def setup_project_config(
-    project_path: Path, repo_owner: str, repo_name: str, include_auto_update: bool
-):
+def setup_project_config(project_path: Path, repo_owner: str, repo_name: str, include_auto_update: bool):
    """Setup project configuration file."""
-
+    
    print("⚙️ Setting up project configuration...")
-
+    
    config = {
        "project": {
            "name": repo_name,
            "owner": repo_owner,
            "github_url": f"https://github.com/{repo_owner}/{repo_name}",
-            "auto_update_enabled": include_auto_update,
+            "auto_update_enabled": include_auto_update
        },
        "github": {
            "template_version": "1.0.0",
            "last_sync": None,
-            "workflows_enabled": True,
-        },
+            "workflows_enabled": True
+        }
    }
-
+    
    config_file = project_path / ".github" / "project-config.json"
-    with open(config_file, "w") as f:
+    with open(config_file, 'w') as f:
        json.dump(config, f, indent=2)
-
+    
    print("  ✅ Project configuration created")

-
 def setup_readme_template(project_path: Path, repo_owner: str, repo_name: str):
    """Setup README template if one doesn't exist."""
-
+    
    readme_file = project_path / "README.md"
-
+    
    if not readme_file.exists():
        print("📖 Creating README template...")
-
-        readme_content = """# {repo_name}
+        
+        readme_content = f"""# {repo_name}

 > A brief description of your project

@ -401,7 +390,7 @@ curl -sSL https://github.com/{repo_owner}/{repo_name}/releases/latest/download/i
 ## Features

 - ✨ Feature 1
- 🚀 Feature 2
+- 🚀 Feature 2  
 - 🔧 Feature 3

 ## Installation
@ -452,11 +441,10 @@ This project includes automatic update checking:

 🤖 **Auto-Update Enabled**: This project will notify you of new versions automatically!
 """
-
+        
        readme_file.write_text(readme_content)
        print("  ✅ README template created")

-
 def main():
    """Main entry point."""
    parser = argparse.ArgumentParser(
@ -466,38 +454,32 @@ def main():
 Examples:
  python setup-github-template.py myproject --owner username --name my-project
  python setup-github-template.py /path/to/project --owner org --name cool-tool --no-auto-update
-        """,
+        """
    )
-
-    parser.add_argument("project_path", type=Path, help="Path to project directory")
-    parser.add_argument("--owner", required=True, help="GitHub username or organization")
-    parser.add_argument("--name", required=True, help="GitHub repository name")
-    parser.add_argument(
-        "--type",
-        choices=["python", "general"],
-        default="python",
-        help="Project type (default: python)",
-    )
-    parser.add_argument(
-        "--no-auto-update", action="store_true", help="Disable auto-update system"
-    )
-
+    
+    parser.add_argument('project_path', type=Path, help='Path to project directory')
+    parser.add_argument('--owner', required=True, help='GitHub username or organization')
+    parser.add_argument('--name', required=True, help='GitHub repository name')
+    parser.add_argument('--type', choices=['python', 'general'], default='python', 
+                       help='Project type (default: python)')
+    parser.add_argument('--no-auto-update', action='store_true', 
+                       help='Disable auto-update system')
+    
    args = parser.parse_args()
-
+    
    if not args.project_path.exists():
        print(f"❌ Project path does not exist: {args.project_path}")
        sys.exit(1)
-
+        
    success = setup_project_template(
        project_path=args.project_path,
        repo_owner=args.owner,
        repo_name=args.name,
        project_type=args.type,
-        include_auto_update=not args.no_auto_update,
+        include_auto_update=not args.no_auto_update
    )
-
+    
    sys.exit(0 if success else 1)

-
 if __name__ == "__main__":
-    main()
+    main()
--- a/scripts/test-configs.py
+++ b/scripts/test-configs.py
@ -4,87 +4,80 @@ Test script to validate all config examples are syntactically correct
 and contain required fields for FSS-Mini-RAG.
 """

+import yaml
 import sys
 from pathlib import Path
-from typing import Any, Dict, List
-
-import yaml
-
+from typing import Dict, Any, List

 def validate_config_structure(config: Dict[str, Any], config_name: str) -> List[str]:
    """Validate that config has required structure."""
    errors = []
-
+    
    # Required sections
-    required_sections = ["chunking", "streaming", "files", "embedding", "search"]
+    required_sections = ['chunking', 'streaming', 'files', 'embedding', 'search']
    for section in required_sections:
        if section not in config:
            errors.append(f"{config_name}: Missing required section '{section}'")
-
+    
    # Validate chunking section
-    if "chunking" in config:
-        chunking = config["chunking"]
-        required_chunking = ["max_size", "min_size", "strategy"]
+    if 'chunking' in config:
+        chunking = config['chunking']
+        required_chunking = ['max_size', 'min_size', 'strategy']
        for field in required_chunking:
            if field not in chunking:
                errors.append(f"{config_name}: Missing chunking.{field}")
-
+        
        # Validate types and ranges
-        if "max_size" in chunking and not isinstance(chunking["max_size"], int):
+        if 'max_size' in chunking and not isinstance(chunking['max_size'], int):
            errors.append(f"{config_name}: chunking.max_size must be integer")
-        if "min_size" in chunking and not isinstance(chunking["min_size"], int):
+        if 'min_size' in chunking and not isinstance(chunking['min_size'], int):
            errors.append(f"{config_name}: chunking.min_size must be integer")
-        if "strategy" in chunking and chunking["strategy"] not in ["semantic", "fixed"]:
+        if 'strategy' in chunking and chunking['strategy'] not in ['semantic', 'fixed']:
            errors.append(f"{config_name}: chunking.strategy must be 'semantic' or 'fixed'")
-
+    
    # Validate embedding section
-    if "embedding" in config:
-        embedding = config["embedding"]
-        if "preferred_method" in embedding:
-            valid_methods = ["ollama", "ml", "hash", "auto"]
-            if embedding["preferred_method"] not in valid_methods:
-                errors.append(
-                    f"{config_name}: embedding.preferred_method must be one of {valid_methods}"
-                )
-
+    if 'embedding' in config:
+        embedding = config['embedding']
+        if 'preferred_method' in embedding:
+            valid_methods = ['ollama', 'ml', 'hash', 'auto']
+            if embedding['preferred_method'] not in valid_methods:
+                errors.append(f"{config_name}: embedding.preferred_method must be one of {valid_methods}")
+    
    # Validate LLM section (if present)
-    if "llm" in config:
-        llm = config["llm"]
-        if "synthesis_temperature" in llm:
-            temp = llm["synthesis_temperature"]
+    if 'llm' in config:
+        llm = config['llm']
+        if 'synthesis_temperature' in llm:
+            temp = llm['synthesis_temperature']
            if not isinstance(temp, (int, float)) or temp < 0 or temp > 1:
-                errors.append(
-                    f"{config_name}: llm.synthesis_temperature must be number between 0-1"
-                )
-
+                errors.append(f"{config_name}: llm.synthesis_temperature must be number between 0-1")
+    
    return errors

-
 def test_config_file(config_path: Path) -> bool:
    """Test a single config file."""
    print(f"Testing {config_path.name}...")
-
+    
    try:
        # Test YAML parsing
-        with open(config_path, "r") as f:
+        with open(config_path, 'r') as f:
            config = yaml.safe_load(f)
-
+        
        if not config:
            print(f"  ❌ {config_path.name}: Empty or invalid YAML")
            return False
-
+        
        # Test structure
        errors = validate_config_structure(config, config_path.name)
-
+        
        if errors:
            print(f"  ❌ {config_path.name}: Structure errors:")
            for error in errors:
                print(f"     • {error}")
            return False
-
+        
        print(f"  ✅ {config_path.name}: Valid")
        return True
-
+        
    except yaml.YAMLError as e:
        print(f"  ❌ {config_path.name}: YAML parsing error: {e}")
        return False
@ -92,32 +85,31 @@ def test_config_file(config_path: Path) -> bool:
        print(f"  ❌ {config_path.name}: Unexpected error: {e}")
        return False

-
 def main():
    """Test all config examples."""
    script_dir = Path(__file__).parent
    project_root = script_dir.parent
-    examples_dir = project_root / "examples"
-
+    examples_dir = project_root / 'examples'
+    
    if not examples_dir.exists():
        print(f"❌ Examples directory not found: {examples_dir}")
        sys.exit(1)
-
+    
    # Find all config files
-    config_files = list(examples_dir.glob("config*.yaml"))
-
+    config_files = list(examples_dir.glob('config*.yaml'))
+    
    if not config_files:
        print(f"❌ No config files found in {examples_dir}")
        sys.exit(1)
-
+    
    print(f"🧪 Testing {len(config_files)} config files...\n")
-
+    
    all_passed = True
    for config_file in sorted(config_files):
        passed = test_config_file(config_file)
        if not passed:
            all_passed = False
-
+    
    print(f"\n{'='*50}")
    if all_passed:
        print("✅ All config files are valid!")
@ -128,6 +120,5 @@ def main():
        print("❌ Some config files have issues - please fix before release")
        sys.exit(1)

-
-if __name__ == "__main__":
-    main()
+if __name__ == '__main__':
+    main()
--- a/tests/test_fixes.py
+++ b/tests/test_fixes.py
@ -10,61 +10,55 @@ Or run directly with venv:
    source .venv/bin/activate && python test_fixes.py
 """

-import os
 import sys
+import os
 import tempfile
 from pathlib import Path

 # Check if virtual environment is activated
-
-
 def check_venv():
-    if "VIRTUAL_ENV" not in os.environ:
+    if 'VIRTUAL_ENV' not in os.environ:
        print("⚠️  WARNING: Virtual environment not detected!")
        print("   This test requires the virtual environment to be activated.")
        print("   Run: source .venv/bin/activate && python test_fixes.py")
        print("   Continuing anyway...\n")

-
 check_venv()

 # Add current directory to Python path
-sys.path.insert(0, ".")
-
+sys.path.insert(0, '.')

 def test_config_model_rankings():
    """Test that model rankings are properly configured."""
    print("=" * 60)
    print("TESTING CONFIG AND MODEL RANKINGS")
    print("=" * 60)
-
+    
    try:
        # Test config loading without heavy dependencies
        from mini_rag.config import ConfigManager, LLMConfig
-
+        
        # Create a temporary directory for testing
        with tempfile.TemporaryDirectory() as tmpdir:
            config_manager = ConfigManager(tmpdir)
            config = config_manager.load_config()
-
+            
            print("✓ Config loads successfully")
-
+            
            # Check LLM config and model rankings
-            if hasattr(config, "llm"):
+            if hasattr(config, 'llm'):
                llm_config = config.llm
                print(f"✓ LLM config found: {type(llm_config)}")
-
-                if hasattr(llm_config, "model_rankings"):
+                
+                if hasattr(llm_config, 'model_rankings'):
                    rankings = llm_config.model_rankings
                    print(f"✓ Model rankings: {rankings}")
-
+                    
                    if rankings and rankings[0] == "qwen3:1.7b":
                        print("✓ qwen3:1.7b is FIRST priority - CORRECT!")
                        return True
                    else:
-                        print(
-                            f"✗ WRONG: First model is {rankings[0] if rankings else 'None'}, should be qwen3:1.7b"
-                        )
+                        print(f"✗ WRONG: First model is {rankings[0] if rankings else 'None'}, should be qwen3:1.7b")
                        return False
                else:
                    print("✗ Model rankings not found in LLM config")
@ -72,7 +66,7 @@ def test_config_model_rankings():
            else:
                print("✗ LLM config not found")
                return False
-
+                
    except ImportError as e:
        print(f"✗ Import error: {e}")
        return False
@ -80,18 +74,17 @@ def test_config_model_rankings():
        print(f"✗ Error: {e}")
        return False

-
 def test_context_length_fix():
    """Test that context length is correctly set to 32K."""
    print("\n" + "=" * 60)
    print("TESTING CONTEXT LENGTH FIXES")
    print("=" * 60)
-
+    
    try:
        # Read the synthesizer file and check for 32000
-        with open("mini_rag/llm_synthesizer.py", "r") as f:
+        with open('mini_rag/llm_synthesizer.py', 'r') as f:
            synthesizer_content = f.read()
-
+        
        if '"num_ctx": 32000' in synthesizer_content:
            print("✓ LLM Synthesizer: num_ctx is correctly set to 32000")
        elif '"num_ctx": 80000' in synthesizer_content:
@ -99,139 +92,133 @@ def test_context_length_fix():
            return False
        else:
            print("? LLM Synthesizer: num_ctx setting not found clearly")
-
+        
        # Read the safeguards file and check for 32000
-        with open("mini_rag/llm_safeguards.py", "r") as f:
+        with open('mini_rag/llm_safeguards.py', 'r') as f:
            safeguards_content = f.read()
-
-        if "context_window: int = 32000" in safeguards_content:
+        
+        if 'context_window: int = 32000' in safeguards_content:
            print("✓ Safeguards: context_window is correctly set to 32000")
            return True
-        elif "context_window: int = 80000" in safeguards_content:
+        elif 'context_window: int = 80000' in safeguards_content:
            print("✗ Safeguards: context_window is still 80000 - NEEDS FIX")
            return False
        else:
            print("? Safeguards: context_window setting not found clearly")
            return False
-
+            
    except Exception as e:
        print(f"✗ Error checking context length: {e}")
        return False

-
 def test_safeguard_preservation():
    """Test that safeguards preserve content instead of dropping it."""
    print("\n" + "=" * 60)
    print("TESTING SAFEGUARD CONTENT PRESERVATION")
    print("=" * 60)
-
+    
    try:
        # Read the synthesizer file and check for the preservation method
-        with open("mini_rag/llm_synthesizer.py", "r") as f:
+        with open('mini_rag/llm_synthesizer.py', 'r') as f:
            synthesizer_content = f.read()
-
-        if "_create_safeguard_response_with_content" in synthesizer_content:
+        
+        if '_create_safeguard_response_with_content' in synthesizer_content:
            print("✓ Safeguard content preservation method exists")
        else:
            print("✗ Safeguard content preservation method missing")
            return False
-
+        
        # Check for the specific preservation logic
-        if "AI Response (use with caution):" in synthesizer_content:
+        if 'AI Response (use with caution):' in synthesizer_content:
            print("✓ Content preservation warning format found")
        else:
            print("✗ Content preservation warning format missing")
            return False
-
+            
        # Check that it's being called instead of dropping content
-        if (
-            "return self._create_safeguard_response_with_content(" in synthesizer_content
-            and "issue_type, explanation, raw_response" in synthesizer_content
-        ):
+        if 'return self._create_safeguard_response_with_content(issue_type, explanation, raw_response)' in synthesizer_content:
            print("✓ Preservation method is called when safeguards trigger")
            return True
        else:
            print("✗ Preservation method not called properly")
            return False
-
+            
    except Exception as e:
        print(f"✗ Error checking safeguard preservation: {e}")
        return False

-
 def test_import_fixes():
    """Test that import statements are fixed from claude_rag to mini_rag."""
    print("\n" + "=" * 60)
    print("TESTING IMPORT STATEMENT FIXES")
    print("=" * 60)
-
+    
    test_files = [
-        "tests/test_rag_integration.py",
-        "tests/01_basic_integration_test.py",
-        "tests/test_hybrid_search.py",
-        "tests/test_context_retrieval.py",
+        'tests/test_rag_integration.py',
+        'tests/01_basic_integration_test.py',
+        'tests/test_hybrid_search.py',
+        'tests/test_context_retrieval.py'
    ]
-
+    
    all_good = True
-
+    
    for test_file in test_files:
        if Path(test_file).exists():
            try:
-                with open(test_file, "r") as f:
+                with open(test_file, 'r') as f:
                    content = f.read()
-
-                if "claude_rag" in content:
+                
+                if 'claude_rag' in content:
                    print(f"✗ {test_file}: Still contains 'claude_rag' imports")
                    all_good = False
-                elif "mini_rag" in content:
+                elif 'mini_rag' in content:
                    print(f"✓ {test_file}: Uses correct 'mini_rag' imports")
                else:
                    print(f"? {test_file}: No rag imports found")
-
+                    
            except Exception as e:
                print(f"✗ Error reading {test_file}: {e}")
                all_good = False
        else:
            print(f"? {test_file}: File not found")
-
+    
    return all_good

-
 def main():
    """Run all tests."""
    print("FSS-Mini-RAG Fix Verification Tests")
    print("Testing all the critical fixes...")
-
+    
    tests = [
        ("Model Rankings", test_config_model_rankings),
-        ("Context Length", test_context_length_fix),
+        ("Context Length", test_context_length_fix),  
        ("Safeguard Preservation", test_safeguard_preservation),
-        ("Import Fixes", test_import_fixes),
+        ("Import Fixes", test_import_fixes)
    ]
-
+    
    results = {}
-
+    
    for test_name, test_func in tests:
        try:
            results[test_name] = test_func()
        except Exception as e:
            print(f"✗ {test_name} test crashed: {e}")
            results[test_name] = False
-
+    
    # Summary
    print("\n" + "=" * 60)
    print("TEST SUMMARY")
    print("=" * 60)
-
+    
    passed = sum(1 for result in results.values() if result)
    total = len(results)
-
+    
    for test_name, result in results.items():
        status = "✓ PASS" if result else "✗ FAIL"
        print(f"{status} {test_name}")
-
+    
    print(f"\nOverall: {passed}/{total} tests passed")
-
+    
    if passed == total:
        print("🎉 ALL TESTS PASSED - System should be working properly!")
        return 0
@ -239,6 +226,5 @@ def main():
        print("❌ SOME TESTS FAILED - System needs more fixes!")
        return 1

-
 if __name__ == "__main__":
-    sys.exit(main())
+    sys.exit(main())
--- a/tests/01_basic_integration_test.py
+++ b/tests/01_basic_integration_test.py
@ -14,82 +14,74 @@ import sys
 import tempfile
 from pathlib import Path

-from mini_rag.chunker import CodeChunker
-from mini_rag.indexer import ProjectIndexer
-from mini_rag.ollama_embeddings import OllamaEmbedder as CodeEmbedder
-from mini_rag.search import CodeSearcher
-
 # Check if virtual environment is activated
-
-
 def check_venv():
-    if "VIRTUAL_ENV" not in os.environ:
+    if 'VIRTUAL_ENV' not in os.environ:
        print("⚠️  WARNING: Virtual environment not detected!")
        print("   This test requires the virtual environment to be activated.")
-        print(
-            "   Run: source .venv/bin/activate && PYTHONPATH=. python tests/01_basic_integration_test.py"
-        )
+        print("   Run: source .venv/bin/activate && PYTHONPATH=. python tests/01_basic_integration_test.py")
        print("   Continuing anyway...\n")

-
 check_venv()

 # Fix Windows encoding
-if sys.platform == "win32":
-    os.environ["PYTHONUTF8"] = "1"
-    sys.stdout.reconfigure(encoding="utf-8")
+if sys.platform == 'win32':
+    os.environ['PYTHONUTF8'] = '1'
+    sys.stdout.reconfigure(encoding='utf-8')

+from mini_rag.chunker import CodeChunker
+from mini_rag.indexer import ProjectIndexer
+from mini_rag.search import CodeSearcher
+from mini_rag.ollama_embeddings import OllamaEmbedder as CodeEmbedder

 def main():
    print("=" * 60)
    print("RAG System Integration Demo")
    print("=" * 60)
-
+    
    with tempfile.TemporaryDirectory() as tmpdir:
        project_path = Path(tmpdir)
-
+        
        # Create sample project files
        print("\n1. Creating sample project files...")
-
+        
        # Main calculator module
-        (project_path / "calculator.py").write_text(
-            '''"""
+        (project_path / "calculator.py").write_text('''"""
 Advanced calculator module with various mathematical operations.
 """

 import math
 from typing import List, Union

-
 class BasicCalculator:
    """Basic calculator with fundamental operations."""
-
+    
    def __init__(self):
        """Initialize calculator with result history."""
        self.history = []
        self.last_result = 0
-
+    
    def add(self, a: float, b: float) -> float:
        """Add two numbers and store result."""
        result = a + b
        self.history.append(f"{a} + {b} = {result}")
        self.last_result = result
        return result
-
+    
    def subtract(self, a: float, b: float) -> float:
        """Subtract b from a."""
        result = a - b
        self.history.append(f"{a} - {b} = {result}")
        self.last_result = result
        return result
-
+    
    def multiply(self, a: float, b: float) -> float:
        """Multiply two numbers."""
        result = a * b
        self.history.append(f"{a} * {b} = {result}")
        self.last_result = result
        return result
-
+    
    def divide(self, a: float, b: float) -> float:
        """Divide a by b with zero check."""
        if b == 0:
@ -99,17 +91,16 @@ class BasicCalculator:
        self.last_result = result
        return result

-
 class ScientificCalculator(BasicCalculator):
    """Scientific calculator extending basic operations."""
-
+    
    def power(self, base: float, exponent: float) -> float:
        """Calculate base raised to exponent."""
        result = math.pow(base, exponent)
        self.history.append(f"{base} ^ {exponent} = {result}")
        self.last_result = result
        return result
-
+    
    def sqrt(self, n: float) -> float:
        """Calculate square root."""
        if n < 0:
@ -118,7 +109,7 @@ class ScientificCalculator(BasicCalculator):
        self.history.append(f"sqrt({n}) = {result}")
        self.last_result = result
        return result
-
+    
    def logarithm(self, n: float, base: float = 10) -> float:
        """Calculate logarithm with specified base."""
        result = math.log(n, base)
@ -132,7 +123,6 @@ def calculate_mean(numbers: List[float]) -> float:
        return 0.0
    return sum(numbers) / len(numbers)

-
 def calculate_median(numbers: List[float]) -> float:
    """Calculate median of a list of numbers."""
    if not numbers:
@ -143,7 +133,6 @@ def calculate_median(numbers: List[float]) -> float:
        return (sorted_nums[n//2-1] + sorted_nums[n//2]) / 2
    return sorted_nums[n//2]

-
 def calculate_mode(numbers: List[float]) -> float:
    """Calculate mode (most frequent value)."""
    if not numbers:
@ -153,88 +142,79 @@ def calculate_mode(numbers: List[float]) -> float:
        frequency[num] = frequency.get(num, 0) + 1
    mode = max(frequency.keys(), key=frequency.get)
    return mode
-'''
-        )
-
+''')
+        
        # Test file for the calculator
-        (project_path / "test_calculator.py").write_text(
-            '''"""
+        (project_path / "test_calculator.py").write_text('''"""
 Unit tests for calculator module.
 """

 import unittest
 from calculator import BasicCalculator, ScientificCalculator, calculate_mean

-
 class TestBasicCalculator(unittest.TestCase):
    """Test cases for BasicCalculator."""
-
+    
    def setUp(self):
        """Set up test calculator."""
        self.calc = BasicCalculator()
-
+    
    def test_addition(self):
        """Test addition operation."""
        result = self.calc.add(5, 3)
        self.assertEqual(result, 8)
        self.assertEqual(self.calc.last_result, 8)
-
+    
    def test_division_by_zero(self):
        """Test division by zero raises error."""
        with self.assertRaises(ValueError):
            self.calc.divide(10, 0)

-
 class TestStatistics(unittest.TestCase):
    """Test statistical functions."""
-
+    
    def test_mean(self):
        """Test mean calculation."""
        numbers = [1, 2, 3, 4, 5]
        self.assertEqual(calculate_mean(numbers), 3.0)
-
+    
    def test_empty_list(self):
        """Test mean of empty list."""
        self.assertEqual(calculate_mean([]), 0.0)

 if __name__ == "__main__":
    unittest.main()
-'''
-        )
-
+''')
+        
        print("    Created 2 Python files")
-
+        
        # 2. Index the project
        print("\n2. Indexing project with intelligent chunking...")
-
+        
        # Use realistic chunk size
        chunker = CodeChunker(min_chunk_size=10, max_chunk_size=100)
        indexer = ProjectIndexer(project_path, chunker=chunker)
        stats = indexer.index_project()
-
+        
        print(f"    Indexed {stats['files_indexed']} files")
        print(f"    Created {stats['chunks_created']} chunks")
        print(f"    Time: {stats['time_taken']:.2f} seconds")
-
+        
        # 3. Demonstrate search capabilities
        print("\n3. Testing search capabilities...")
        searcher = CodeSearcher(project_path)
-
+        
        # Test different search types
        print("\n   a) Semantic search for 'calculate average':")
        results = searcher.search("calculate average", top_k=3)
        for i, result in enumerate(results, 1):
-            print(
-                f"      {i}. {result.chunk_type} '{result.name}' in {result.file_path} (score: {result.score:.3f})"
-            )
-
+            print(f"      {i}. {result.chunk_type} '{result.name}' in {result.file_path} (score: {result.score:.3f})")
+        
        print("\n   b) BM25-weighted search for 'divide zero':")
        results = searcher.search("divide zero", top_k=3, semantic_weight=0.2, bm25_weight=0.8)
        for i, result in enumerate(results, 1):
-            print(
-                f"      {i}. {result.chunk_type} '{result.name}' in {result.file_path} (score: {result.score:.3f})"
-            )
-
+            print(f"      {i}. {result.chunk_type} '{result.name}' in {result.file_path} (score: {result.score:.3f})")
+        
        print("\n   c) Search with context for 'test addition':")
        results = searcher.search("test addition", top_k=2, include_context=True)
        for i, result in enumerate(results, 1):
@ -245,39 +225,39 @@ if __name__ == "__main__":
                print(f"         Has previous context: {len(result.context_before)} chars")
            if result.context_after:
                print(f"         Has next context: {len(result.context_after)} chars")
-
+        
        # 4. Test chunk navigation
        print("\n4. Testing chunk navigation...")
-
+        
        # Get all chunks to find a method
        df = searcher.table.to_pandas()
-        method_chunks = df[df["chunk_type"] == "method"]
-
+        method_chunks = df[df['chunk_type'] == 'method']
+        
        if len(method_chunks) > 0:
            # Pick a method in the middle
            mid_idx = len(method_chunks) // 2
-            chunk_id = method_chunks.iloc[mid_idx]["chunk_id"]
-            chunk_name = method_chunks.iloc[mid_idx]["name"]
-
+            chunk_id = method_chunks.iloc[mid_idx]['chunk_id']
+            chunk_name = method_chunks.iloc[mid_idx]['name']
+            
            print(f"\n   Getting context for method '{chunk_name}':")
            context = searcher.get_chunk_context(chunk_id)
-
-            if context["chunk"]:
+            
+            if context['chunk']:
                print(f"    Current: {context['chunk'].name}")
-            if context["prev"]:
+            if context['prev']:
                print(f"    Previous: {context['prev'].name}")
-            if context["next"]:
+            if context['next']:
                print(f"    Next: {context['next'].name}")
-            if context["parent"]:
+            if context['parent']:
                print(f"    Parent class: {context['parent'].name}")
-
+        
        # 5. Show statistics
        print("\n5. Index Statistics:")
        stats = searcher.get_statistics()
        print(f"   - Total chunks: {stats['total_chunks']}")
        print(f"   - Unique files: {stats['unique_files']}")
        print(f"   - Chunk types: {stats['chunk_types']}")
-
+        
        print("\n" + "=" * 60)
        print(" All features working correctly!")
        print("=" * 60)
@ -288,6 +268,5 @@ if __name__ == "__main__":
        print("- Context-aware search with adjacent chunks")
        print("- Chunk navigation following code relationships")

-
 if __name__ == "__main__":
-    main()
+    main()
--- a/tests/02_search_examples.py
+++ b/tests/02_search_examples.py
@ -5,10 +5,9 @@ Simple demo of the hybrid search system showing real results.

 import sys
 from pathlib import Path
-
 from rich.console import Console
-from rich.panel import Panel
 from rich.syntax import Syntax
+from rich.panel import Panel
 from rich.table import Table

 from mini_rag.search import CodeSearcher
@ -18,110 +17,102 @@ console = Console()

 def demo_search(project_path: Path):
    """Run demo searches showing the hybrid system in action."""
-
+    
    console.print("\n[bold cyan]Mini RAG Hybrid Search Demo[/bold cyan]\n")
-
+    
    # Initialize searcher
    console.print("Initializing search system...")
    searcher = CodeSearcher(project_path)
-
+    
    # Get index stats
    stats = searcher.get_statistics()
-    if "error" not in stats:
-        console.print(
-            f"\n[green] Index ready:[/green] {stats['total_chunks']} chunks from {stats['unique_files']} files"
-        )
+    if 'error' not in stats:
+        console.print(f"\n[green] Index ready:[/green] {stats['total_chunks']} chunks from {stats['unique_files']} files")
        console.print(f"[dim]Languages: {', '.join(stats['languages'].keys())}[/dim]")
        console.print(f"[dim]Chunk types: {', '.join(stats['chunk_types'].keys())}[/dim]\n")
-
+    
    # Demo queries
    demos = [
        {
-            "title": "Keyword-Heavy Search",
-            "query": "BM25Okapi rank_bm25 search scoring",
-            "description": "This query has specific technical keywords that BM25 excels at finding",
-            "top_k": 5,
+            'title': 'Keyword-Heavy Search',
+            'query': 'BM25Okapi rank_bm25 search scoring',
+            'description': 'This query has specific technical keywords that BM25 excels at finding',
+            'top_k': 5
        },
        {
-            "title": "Natural Language Query",
-            "query": "how to build search index from database chunks",
-            "description": "This semantic query benefits from transformer embeddings understanding intent",
-            "top_k": 5,
+            'title': 'Natural Language Query',
+            'query': 'how to build search index from database chunks',
+            'description': 'This semantic query benefits from transformer embeddings understanding intent',
+            'top_k': 5
        },
        {
-            "title": "Mixed Technical Query",
-            "query": "vector embeddings for semantic code search with transformers",
-            "description": "This hybrid query combines technical terms with conceptual understanding",
-            "top_k": 5,
+            'title': 'Mixed Technical Query',
+            'query': 'vector embeddings for semantic code search with transformers',
+            'description': 'This hybrid query combines technical terms with conceptual understanding',
+            'top_k': 5
        },
        {
-            "title": "Function Search",
-            "query": "search method implementation with filters",
-            "description": "Looking for specific function implementations",
-            "top_k": 5,
-        },
+            'title': 'Function Search',
+            'query': 'search method implementation with filters',
+            'description': 'Looking for specific function implementations',
+            'top_k': 5
+        }
    ]
-
+    
    for demo in demos:
        console.rule(f"\n[bold yellow]{demo['title']}[/bold yellow]")
        console.print(f"[dim]{demo['description']}[/dim]")
        console.print(f"\n[cyan]Query:[/cyan] '{demo['query']}'")
-
+        
        # Run search with hybrid mode
        results = searcher.search(
-            query=demo["query"],
-            top_k=demo["top_k"],
+            query=demo['query'],
+            top_k=demo['top_k'],
            semantic_weight=0.7,
-            bm25_weight=0.3,
+            bm25_weight=0.3
        )
-
+        
        if not results:
            console.print("[red]No results found![/red]")
            continue
-
+        
        console.print(f"\n[green]Found {len(results)} results:[/green]\n")
-
+        
        # Show each result
        for i, result in enumerate(results, 1):
            # Create result panel
            header = f"#{i} {result.file_path}:{result.start_line}-{result.end_line}"
-
+            
            # Get code preview
            lines = result.content.splitlines()
            if len(lines) > 10:
-                preview_lines = lines[:8] + ["..."] + lines[-2:]
+                preview_lines = lines[:8] + ['...'] + lines[-2:]
            else:
                preview_lines = lines
-
-            preview = "\n".join(preview_lines)
-
+            
+            preview = '\n'.join(preview_lines)
+            
            # Create info table
            info = Table.grid(padding=0)
            info.add_column(style="cyan", width=12)
            info.add_column(style="white")
-
+            
            info.add_row("Score:", f"{result.score:.3f}")
            info.add_row("Type:", result.chunk_type)
            info.add_row("Name:", result.name or "N/A")
            info.add_row("Language:", result.language)
-
+            
            # Display result
-            console.print(
-                Panel(
-                    f"{info}\n\n[dim]{preview}[/dim]",
-                    title=header,
-                    title_align="left",
-                    border_style="blue",
-                )
-            )
-
+            console.print(Panel(
+                f"{info}\n\n[dim]{preview}[/dim]",
+                title=header,
+                title_align="left",
+                border_style="blue"
+            ))
+        
        # Show scoring breakdown for top result
        if results:
-            console.print(
-                "\n[dim]Top result hybrid score: {:.3f} (70% semantic + 30% BM25)[/dim]".format(
-                    results[0].score
-                )
-            )
+            console.print("\n[dim]Top result hybrid score: {:.3f} (70% semantic + 30% BM25)[/dim]".format(results[0].score))


 def main():
@ -131,14 +122,14 @@ def main():
    else:
        # Use the RAG system itself as the demo project
        project_path = Path(__file__).parent
-
-    if not (project_path / ".mini-rag").exists():
+    
+    if not (project_path / '.mini-rag').exists():
        console.print("[red]Error: No RAG index found. Run 'rag-mini index' first.[/red]")
        console.print(f"[dim]Looked in: {project_path / '.mini-rag'}[/dim]")
        return
-
+    
    demo_search(project_path)


 if __name__ == "__main__":
-    main()
+    main()
--- a/tests/03_system_validation.py
+++ b/tests/03_system_validation.py
@ -2,55 +2,53 @@
 Integration test to verify all three agents' work integrates properly.
 """

-import os
 import sys
+import os
 import tempfile
 from pathlib import Path

 # Fix Windows encoding
-if sys.platform == "win32":
-    os.environ["PYTHONUTF8"] = "1"
-    sys.stdout.reconfigure(encoding="utf-8")
+if sys.platform == 'win32':
+    os.environ['PYTHONUTF8'] = '1'
+    sys.stdout.reconfigure(encoding='utf-8')

 from mini_rag.chunker import CodeChunker
-from mini_rag.config import RAGConfig
 from mini_rag.indexer import ProjectIndexer
+from mini_rag.search import CodeSearcher
 from mini_rag.ollama_embeddings import OllamaEmbedder as CodeEmbedder
 from mini_rag.query_expander import QueryExpander
-from mini_rag.search import CodeSearcher
-
+from mini_rag.config import RAGConfig

 def test_chunker():
    """Test that chunker creates chunks with all required metadata."""
    print("1. Testing Chunker...")
-
+    
    # Create test Python file with more substantial content
    test_code = '''"""Test module for integration testing the chunker."""

 import os
 import sys

-
 class TestClass:
    """A test class with multiple methods."""
-
+    
    def __init__(self):
        """Initialize the test class."""
        self.value = 42
        self.name = "test"
-
+    
    def method_one(self):
        """First method with some logic."""
        result = self.value * 2
        return result
-
+    
    def method_two(self, x):
        """Second method that takes a parameter."""
        if x > 0:
            return self.value + x
        else:
            return self.value - x
-
+    
    def method_three(self):
        """Third method for testing."""
        data = []
@ -58,14 +56,13 @@ class TestClass:
            data.append(i * self.value)
        return data

-
 class AnotherClass:
    """Another test class."""
-
+    
    def __init__(self, name):
        """Initialize with name."""
        self.name = name
-
+    
    def process(self):
        """Process something."""
        return f"Processing {self.name}"
@ -75,25 +72,22 @@ def standalone_function(arg1, arg2):
    result = arg1 + arg2
    return result * 2

-
 def another_function():
    """Another standalone function."""
    data = {"key": "value", "number": 123}
    return data
 '''
-
+    
    chunker = CodeChunker(min_chunk_size=1)  # Use small chunk size for testing
    chunks = chunker.chunk_file(Path("test.py"), test_code)
-
+    
    print(f"    Created {len(chunks)} chunks")
-
+    
    # Debug: Show what chunks were created
    print("   Chunks created:")
    for chunk in chunks:
-        print(
-            f"     - Type: {chunk.chunk_type}, Name: {chunk.name}, Lines: {chunk.start_line}-{chunk.end_line}"
-        )
-
+        print(f"     - Type: {chunk.chunk_type}, Name: {chunk.name}, Lines: {chunk.start_line}-{chunk.end_line}")
+    
    # Check metadata
    issues = []
    for i, chunk in enumerate(chunks):
@ -103,82 +97,68 @@ def another_function():
            issues.append(f"Chunk {i} missing total_chunks")
        if chunk.file_lines is None:
            issues.append(f"Chunk {i} missing file_lines")
-
+        
        # Check links (except first/last)
        if i > 0 and chunk.prev_chunk_id is None:
            issues.append(f"Chunk {i} missing prev_chunk_id")
        if i < len(chunks) - 1 and chunk.next_chunk_id is None:
            issues.append(f"Chunk {i} missing next_chunk_id")
-
+        
        # Check parent_class for methods
-        if chunk.chunk_type == "method" and chunk.parent_class is None:
+        if chunk.chunk_type == 'method' and chunk.parent_class is None:
            issues.append(f"Method chunk {chunk.name} missing parent_class")
-
-        print(
-            f"   - Chunk {i}: {chunk.chunk_type} '{chunk.name}' "
-            f"[{chunk.chunk_index}/{chunk.total_chunks}] "
-            f"prev={chunk.prev_chunk_id} next={chunk.next_chunk_id}"
-        )
-
+            
+        print(f"   - Chunk {i}: {chunk.chunk_type} '{chunk.name}' "
+              f"[{chunk.chunk_index}/{chunk.total_chunks}] "
+              f"prev={chunk.prev_chunk_id} next={chunk.next_chunk_id}")
+    
    if issues:
        print("    Issues found:")
        for issue in issues:
            print(f"      - {issue}")
    else:
        print("    All metadata present")
-
+    
    return len(issues) == 0

-
 def test_indexer_storage():
    """Test that indexer stores the new metadata."""
    print("\n2. Testing Indexer Storage...")
-
+    
    with tempfile.TemporaryDirectory() as tmpdir:
        project_path = Path(tmpdir)
-
+        
        # Create test file
        test_file = project_path / "test.py"
-        test_file.write_text(
-            """
-
-
+        test_file.write_text('''
 class MyClass:
-
    def my_method(self):
        return 42
-"""
-        )
-
+''')
+        
        # Index the project with small chunk size for testing
        from mini_rag.chunker import CodeChunker
-
        chunker = CodeChunker(min_chunk_size=1)
        indexer = ProjectIndexer(project_path, chunker=chunker)
        stats = indexer.index_project()
-
+        
        print(f"    Indexed {stats['chunks_created']} chunks")
-
+        
        # Check what was stored
        if indexer.table:
            df = indexer.table.to_pandas()
            columns = df.columns.tolist()
-
-            required_fields = [
-                "chunk_id",
-                "prev_chunk_id",
-                "next_chunk_id",
-                "parent_class",
-            ]
+            
+            required_fields = ['chunk_id', 'prev_chunk_id', 'next_chunk_id', 'parent_class']
            missing_fields = [f for f in required_fields if f not in columns]
-
+            
            if missing_fields:
                print(f"    Missing fields in database: {missing_fields}")
                print(f"   Current fields: {columns}")
                return False
            else:
                print("    All required fields in database schema")
-
+                
                # Check if data is actually stored
                sample = df.iloc[0] if len(df) > 0 else None
                if sample is not None:
@ -186,41 +166,38 @@ class MyClass:
                    print(f"   Sample prev_chunk_id: {sample.get('prev_chunk_id', 'MISSING')}")
                    print(f"   Sample next_chunk_id: {sample.get('next_chunk_id', 'MISSING')}")
                    print(f"   Sample parent_class: {sample.get('parent_class', 'MISSING')}")
-
+        
        return len(missing_fields) == 0

-
 def test_search_integration():
    """Test that search uses the new metadata."""
    print("\n3. Testing Search Integration...")
-
+    
    with tempfile.TemporaryDirectory() as tmpdir:
        project_path = Path(tmpdir)
-
+        
        # Create test files with proper content that will create multiple chunks
-        (project_path / "math_utils.py").write_text(
-            '''"""Math utilities module."""
+        (project_path / "math_utils.py").write_text('''"""Math utilities module."""

 import math

-
 class Calculator:
    """A simple calculator class."""
-
+    
    def __init__(self):
        """Initialize calculator."""
        self.result = 0
-
+    
    def add(self, a, b):
        """Add two numbers."""
        self.result = a + b
        return self.result
-
+    
    def multiply(self, a, b):
        """Multiply two numbers."""
        self.result = a * b
        return self.result
-
+    
    def divide(self, a, b):
        """Divide two numbers."""
        if b == 0:
@ -228,15 +205,14 @@ class Calculator:
        self.result = a / b
        return self.result

-
 class AdvancedCalculator(Calculator):
    """Advanced calculator with more operations."""
-
+    
    def power(self, a, b):
        """Raise a to power b."""
        self.result = a ** b
        return self.result
-
+    
    def sqrt(self, a):
        """Calculate square root."""
        self.result = math.sqrt(a)
@ -248,7 +224,6 @@ def compute_average(numbers):
        return 0
    return sum(numbers) / len(numbers)

-
 def compute_median(numbers):
    """Compute median of a list."""
    if not numbers:
@ -258,22 +233,20 @@ def compute_median(numbers):
    if n % 2 == 0:
        return (sorted_nums[n//2-1] + sorted_nums[n//2]) / 2
    return sorted_nums[n//2]
-'''
-        )
-
+''')
+        
        # Index with small chunk size for testing
        chunker = CodeChunker(min_chunk_size=1)
        indexer = ProjectIndexer(project_path, chunker=chunker)
        indexer.index_project()
-
+        
        # Search
        searcher = CodeSearcher(project_path)
-
+        
        # Test BM25 integration
-        results = searcher.search(
-            "multiply numbers", top_k=5, semantic_weight=0.3, bm25_weight=0.7
-        )
-
+        results = searcher.search("multiply numbers", top_k=5, 
+                                 semantic_weight=0.3, bm25_weight=0.7)
+        
        if results:
            print(f"    BM25 + semantic search returned {len(results)} results")
            for r in results[:2]:
@ -281,50 +254,45 @@ def compute_median(numbers):
        else:
            print("    No search results returned")
            return False
-
+        
        # Test context retrieval
        print("\n   Testing context retrieval...")
        if searcher.table:
            df = searcher.table.to_pandas()
            print(f"   Total chunks in DB: {len(df)}")
-
-            # Find a method/function chunk to test parent context
-            method_chunks = df[df["chunk_type"].isin(["method", "function"])]
+            
+            # Find a method chunk to test parent context
+            method_chunks = df[df['chunk_type'] == 'method']
            if len(method_chunks) > 0:
-                method_chunk_id = method_chunks.iloc[0]["chunk_id"]
+                method_chunk_id = method_chunks.iloc[0]['chunk_id']
                context = searcher.get_chunk_context(method_chunk_id)
-
-                if context["chunk"]:
+                
+                if context['chunk']:
                    print(f"    Got main chunk: {context['chunk'].name}")
-                if context["prev"]:
+                if context['prev']:
                    print(f"    Got previous chunk: {context['prev'].name}")
                else:
-                    print("   - No previous chunk (might be first)")
-                if context["next"]:
+                    print(f"   - No previous chunk (might be first)")
+                if context['next']:
                    print(f"    Got next chunk: {context['next'].name}")
                else:
-                    print("   - No next chunk (might be last)")
-                if context["parent"]:
+                    print(f"   - No next chunk (might be last)")
+                if context['parent']:
                    print(f"    Got parent chunk: {context['parent'].name}")
                else:
-                    print("   - No parent chunk")
-
+                    print(f"   - No parent chunk")
+                    
                # Test include_context in search
                results_with_context = searcher.search("add", include_context=True, top_k=2)
                if results_with_context:
                    print(f"   Found {len(results_with_context)} results with context")
                    for r in results_with_context:
-                        # Check if result has context (unused variable removed)
-                        print(
-                            f"     - {r.name}: context_before={bool(r.context_before)}, "
-                            f"context_after={bool(r.context_after)}, parent={bool(r.parent_chunk)}"
-                        )
-
+                        has_context = bool(r.context_before or r.context_after or r.parent_chunk)
+                        print(f"     - {r.name}: context_before={bool(r.context_before)}, "
+                              f"context_after={bool(r.context_after)}, parent={bool(r.parent_chunk)}")
+                    
                    # Check if at least one result has some context
-                    if any(
-                        r.context_before or r.context_after or r.parent_chunk
-                        for r in results_with_context
-                    ):
+                    if any(r.context_before or r.context_after or r.parent_chunk for r in results_with_context):
                        print("    Search with context working")
                        return True
                    else:
@ -336,117 +304,112 @@ def compute_median(numbers):
            else:
                print("    No method chunks found in database")
                return False
-
+        
        return True

-
 def test_server():
    """Test that server still works."""
    print("\n4. Testing Server...")
-
+    
    # Just check if we can import and create server instance
    try:
        from mini_rag.server import RAGServer
-
-        # RAGServer(Path("."), port=7778)  # Unused variable removed
+        server = RAGServer(Path("."), port=7778)
        print("    Server can be instantiated")
        return True
    except Exception as e:
        print(f"    Server error: {e}")
        return False

-
 def test_new_features():
    """Test new features: query expansion and smart ranking."""
    print("\n5. Testing New Features (Query Expansion & Smart Ranking)...")
-
+    
    try:
        # Test configuration loading
        config = RAGConfig()
-        print("    ✅ Configuration loaded successfully")
+        print(f"    ✅ Configuration loaded successfully")
        print(f"       Query expansion enabled: {config.search.expand_queries}")
        print(f"       Max expansion terms: {config.llm.max_expansion_terms}")
-
+        
        # Test query expander (will use mock if Ollama unavailable)
        expander = QueryExpander(config)
        test_query = "authentication"
-
+        
        if expander.is_available():
            expanded = expander.expand_query(test_query)
            print(f"    ✅ Query expansion working: '{test_query}' → '{expanded}'")
        else:
-            print("    ⚠️  Query expansion offline (Ollama not available)")
+            print(f"    ⚠️  Query expansion offline (Ollama not available)")
            # Test that it still returns original query
            expanded = expander.expand_query(test_query)
            if expanded == test_query:
-                print("    ✅ Graceful degradation working: returns original query")
+                print(f"    ✅ Graceful degradation working: returns original query")
            else:
-                print("    ❌ Error: should return original query when offline")
+                print(f"    ❌ Error: should return original query when offline")
                return False
-
+        
        # Test smart ranking (this always works as it's zero-overhead)
        print("    🧮 Testing smart ranking...")
-
+        
        # Create a simple test to verify the method exists and can be called
        with tempfile.TemporaryDirectory() as temp_dir:
            temp_path = Path(temp_dir)
-
+            
            # Create a simple test project
            test_file = temp_path / "README.md"
            test_file.write_text("# Test Project\nThis is a test README file.")
-
+            
            try:
                searcher = CodeSearcher(temp_path)
                # Test that the _smart_rerank method exists
-                if hasattr(searcher, "_smart_rerank"):
+                if hasattr(searcher, '_smart_rerank'):
                    print("    ✅ Smart ranking method available")
                    return True
                else:
                    print("    ❌ Smart ranking method not found")
                    return False
-
+                    
            except Exception as e:
                print(f"    ❌ Smart ranking test failed: {e}")
                return False
-
+        
    except Exception as e:
        print(f"    ❌ New features test failed: {e}")
        return False

-
 def main():
    """Run all integration tests."""
    print("=" * 50)
    print("RAG System Integration Check")
    print("=" * 50)
-
+    
    results = {
        "Chunker": test_chunker(),
-        "Indexer": test_indexer_storage(),
+        "Indexer": test_indexer_storage(), 
        "Search": test_search_integration(),
        "Server": test_server(),
-        "New Features": test_new_features(),
+        "New Features": test_new_features()
    }
-
+    
    print("\n" + "=" * 50)
    print("SUMMARY:")
    print("=" * 50)
-
+    
    all_passed = True
    for component, passed in results.items():
        status = " PASS" if passed else " FAIL"
        print(f"{component}: {status}")
        if not passed:
            all_passed = False
-
+    
    if all_passed:
        print("\n All integration tests passed!")
    else:
        print("\n️  Some tests failed - fixes needed!")
-
+    
    return all_passed

-
 if __name__ == "__main__":
    success = main()
-    sys.exit(0 if success else 1)
+    sys.exit(0 if success else 1)
--- a/tests/show_index_contents.py
+++ b/tests/show_index_contents.py
@ -3,19 +3,19 @@
 Show what files are actually indexed in the RAG system.
 """

-import os
 import sys
-from collections import Counter
+import os
 from pathlib import Path

-from mini_rag.vector_store import VectorStore
-
-if sys.platform == "win32":
-    os.environ["PYTHONUTF8"] = "1"
-    sys.stdout.reconfigure(encoding="utf-8")
+if sys.platform == 'win32':
+    os.environ['PYTHONUTF8'] = '1'
+    sys.stdout.reconfigure(encoding='utf-8')

 sys.path.insert(0, str(Path(__file__).parent))

+from mini_rag.vector_store import VectorStore
+from collections import Counter
+
 project_path = Path.cwd()
 store = VectorStore(project_path)
 store._connect()
@ -32,16 +32,16 @@ for row in store.table.to_pandas().itertuples():

 unique_files = sorted(set(files))

-print("\n Indexed Files Summary")
+print(f"\n Indexed Files Summary")
 print(f"Total files: {len(unique_files)}")
 print(f"Total chunks: {len(files)}")
 print(f"\nChunk types: {dict(chunk_types)}")

-print("\n Files with most chunks:")
+print(f"\n Files with most chunks:")
 for file, count in chunks_by_file.most_common(10):
    print(f"  {count:3d} chunks: {file}")

-print("\n Text-to-speech files:")
-tts_files = [f for f in unique_files if "text-to-speech" in f or "speak" in f.lower()]
+print(f"\n Text-to-speech files:")
+tts_files = [f for f in unique_files if 'text-to-speech' in f or 'speak' in f.lower()]
 for f in tts_files:
-    print(f"  - {f} ({chunks_by_file[f]} chunks)")
+    print(f"  - {f} ({chunks_by_file[f]} chunks)")
--- a/tests/test_context_retrieval.py
+++ b/tests/test_context_retrieval.py
@ -12,37 +12,30 @@ Or run directly with venv:

 import os
 from pathlib import Path
-
-from mini_rag.ollama_embeddings import OllamaEmbedder as CodeEmbedder
 from mini_rag.search import CodeSearcher
+from mini_rag.ollama_embeddings import OllamaEmbedder as CodeEmbedder

 # Check if virtual environment is activated
-
-
 def check_venv():
-    if "VIRTUAL_ENV" not in os.environ:
+    if 'VIRTUAL_ENV' not in os.environ:
        print("⚠️  WARNING: Virtual environment not detected!")
        print("   This test requires the virtual environment to be activated.")
-        print(
-            "   Run: source .venv/bin/activate && PYTHONPATH=. python tests/test_context_retrieval.py"
-        )
+        print("   Run: source .venv/bin/activate && PYTHONPATH=. python tests/test_context_retrieval.py")
        print("   Continuing anyway...\n")

-
 check_venv()

-
 def test_context_retrieval():
    """Test the new context retrieval functionality."""
-
+    
    # Initialize searcher
    project_path = Path(__file__).parent
    try:
        embedder = CodeEmbedder()
        searcher = CodeSearcher(project_path, embedder)
-
+        
        print("Testing search with context...")
-
+        
        # Test 1: Search without context
        print("\n1. Search WITHOUT context:")
        results = searcher.search("chunk metadata", top_k=3, include_context=False)
@ -52,7 +45,7 @@ def test_context_retrieval():
            print(f"    Has context_before: {result.context_before is not None}")
            print(f"    Has context_after: {result.context_after is not None}")
            print(f"    Has parent_chunk: {result.parent_chunk is not None}")
-
+        
        # Test 2: Search with context
        print("\n2. Search WITH context:")
        results = searcher.search("chunk metadata", top_k=3, include_context=True)
@ -62,51 +55,39 @@ def test_context_retrieval():
            print(f"    Has context_before: {result.context_before is not None}")
            print(f"    Has context_after: {result.context_after is not None}")
            print(f"    Has parent_chunk: {result.parent_chunk is not None}")
-
+            
            if result.context_before:
                print(f"    Context before preview: {result.context_before[:50]}...")
            if result.context_after:
                print(f"    Context after preview: {result.context_after[:50]}...")
            if result.parent_chunk:
-                print(
-                    f"    Parent chunk: {result.parent_chunk.name} ({result.parent_chunk.chunk_type})"
-                )
-
+                print(f"    Parent chunk: {result.parent_chunk.name} ({result.parent_chunk.chunk_type})")
+        
        # Test 3: get_chunk_context method
        print("\n3. Testing get_chunk_context method:")
        # Get a sample chunk_id from the first result
        df = searcher.table.to_pandas()
        if not df.empty:
-            sample_chunk_id = df.iloc[0]["chunk_id"]
+            sample_chunk_id = df.iloc[0]['chunk_id']
            print(f"  Getting context for chunk_id: {sample_chunk_id}")
-
+            
            context = searcher.get_chunk_context(sample_chunk_id)
-
-            if context["chunk"]:
-                print(
-                    f"    Main chunk: {context['chunk'].file_path}:{context['chunk'].start_line}"
-                )
-            if context["prev"]:
-                print(
-                    f"    Previous chunk: lines {context['prev'].start_line}-{context['prev'].end_line}"
-                )
-            if context["next"]:
-                print(
-                    f"    Next chunk: lines {context['next'].start_line}-{context['next'].end_line}"
-                )
-            if context["parent"]:
-                print(
-                    f"    Parent chunk: {context['parent'].name} ({context['parent'].chunk_type})"
-                )
-
+            
+            if context['chunk']:
+                print(f"    Main chunk: {context['chunk'].file_path}:{context['chunk'].start_line}")
+            if context['prev']:
+                print(f"    Previous chunk: lines {context['prev'].start_line}-{context['prev'].end_line}")
+            if context['next']:
+                print(f"    Next chunk: lines {context['next'].start_line}-{context['next'].end_line}")
+            if context['parent']:
+                print(f"    Parent chunk: {context['parent'].name} ({context['parent'].chunk_type})")
+        
        print("\nAll tests completed successfully!")
-
+        
    except Exception as e:
        print(f"Error during testing: {e}")
        import traceback
-
        traceback.print_exc()

-
 if __name__ == "__main__":
-    test_context_retrieval()
+    test_context_retrieval()
--- a/tests/test_hybrid_search.py
+++ b/tests/test_hybrid_search.py
@ -12,49 +12,46 @@ Or run directly with venv:
 """

 import time
+import json
 from pathlib import Path
-from typing import Any, Dict
-
+from typing import List, Dict, Any
 from rich.console import Console
-from rich.progress import track
 from rich.table import Table
+from rich.panel import Panel
+from rich.columns import Columns
+from rich.syntax import Syntax
+from rich.progress import track

+from mini_rag.search import CodeSearcher, SearchResult
 from mini_rag.ollama_embeddings import OllamaEmbedder as CodeEmbedder
-from mini_rag.search import CodeSearcher

 console = Console()


 class SearchTester:
    """Test harness for hybrid search evaluation."""
-
+    
    def __init__(self, project_path: Path):
        self.project_path = project_path
        console.print(f"\n[cyan]Initializing search system for: {project_path}[/cyan]")
-
+        
        # Initialize searcher
        start = time.time()
        self.searcher = CodeSearcher(project_path)
        init_time = time.time() - start
-
+        
        console.print(f"[green] Initialized in {init_time:.2f}s[/green]")
-
+        
        # Get statistics
        stats = self.searcher.get_statistics()
-        if "error" not in stats:
-            console.print(
-                f"[dim]Index contains {stats['total_chunks']} chunks from {stats['unique_files']} files[/dim]\n"
-            )
-
-    def run_query(
-        self,
-        query: str,
-        top_k: int = 10,
-        semantic_only: bool = False,
-        bm25_only: bool = False,
-    ) -> Dict[str, Any]:
+        if 'error' not in stats:
+            console.print(f"[dim]Index contains {stats['total_chunks']} chunks from {stats['unique_files']} files[/dim]\n")
+    
+    def run_query(self, query: str, top_k: int = 10, 
+                  semantic_only: bool = False,
+                  bm25_only: bool = False) -> Dict[str, Any]:
        """Run a single query and return metrics."""
-
+        
        # Set weights based on mode
        if semantic_only:
            semantic_weight, bm25_weight = 1.0, 0.0
@ -65,156 +62,150 @@ class SearchTester:
        else:
            semantic_weight, bm25_weight = 0.7, 0.3
            mode = "Hybrid (70/30)"
-
+        
        # Run search
        start = time.time()
        results = self.searcher.search(
            query=query,
            top_k=top_k,
            semantic_weight=semantic_weight,
-            bm25_weight=bm25_weight,
+            bm25_weight=bm25_weight
        )
        search_time = time.time() - start
-
+        
        return {
-            "query": query,
-            "mode": mode,
-            "results": results,
-            "search_time_ms": search_time * 1000,
-            "num_results": len(results),
-            "top_score": results[0].score if results else 0,
-            "avg_score": sum(r.score for r in results) / len(results) if results else 0,
+            'query': query,
+            'mode': mode,
+            'results': results,
+            'search_time_ms': search_time * 1000,
+            'num_results': len(results),
+            'top_score': results[0].score if results else 0,
+            'avg_score': sum(r.score for r in results) / len(results) if results else 0,
        }
-
+    
    def compare_search_modes(self, query: str, top_k: int = 5):
        """Compare results across different search modes."""
        console.print(f"\n[bold cyan]Query:[/bold cyan] '{query}'")
        console.print(f"[dim]Top {top_k} results per mode[/dim]\n")
-
+        
        # Run searches in all modes
        modes = [
-            ("hybrid", False, False),
-            ("semantic", True, False),
-            ("bm25", False, True),
+            ('hybrid', False, False),
+            ('semantic', True, False),
+            ('bm25', False, True)
        ]
-
+        
        all_results = {}
        for mode_name, semantic_only, bm25_only in modes:
            result = self.run_query(query, top_k, semantic_only, bm25_only)
            all_results[mode_name] = result
-
+        
        # Create comparison table
        table = Table(title="Search Mode Comparison")
        table.add_column("Metric", style="cyan", width=20)
        table.add_column("Hybrid (70/30)", style="green")
        table.add_column("Semantic Only", style="blue")
        table.add_column("BM25 Only", style="magenta")
-
+        
        # Add metrics
        table.add_row(
            "Search Time (ms)",
            f"{all_results['hybrid']['search_time_ms']:.1f}",
            f"{all_results['semantic']['search_time_ms']:.1f}",
-            f"{all_results['bm25']['search_time_ms']:.1f}",
+            f"{all_results['bm25']['search_time_ms']:.1f}"
        )
-
+        
        table.add_row(
            "Results Found",
-            str(all_results["hybrid"]["num_results"]),
-            str(all_results["semantic"]["num_results"]),
-            str(all_results["bm25"]["num_results"]),
+            str(all_results['hybrid']['num_results']),
+            str(all_results['semantic']['num_results']),
+            str(all_results['bm25']['num_results'])
        )
-
+        
        table.add_row(
            "Top Score",
            f"{all_results['hybrid']['top_score']:.3f}",
            f"{all_results['semantic']['top_score']:.3f}",
-            f"{all_results['bm25']['top_score']:.3f}",
+            f"{all_results['bm25']['top_score']:.3f}"
        )
-
+        
        table.add_row(
            "Avg Score",
            f"{all_results['hybrid']['avg_score']:.3f}",
            f"{all_results['semantic']['avg_score']:.3f}",
-            f"{all_results['bm25']['avg_score']:.3f}",
+            f"{all_results['bm25']['avg_score']:.3f}"
        )
-
+        
        console.print(table)
-
+        
        # Show top results from each mode
        console.print("\n[bold]Top Results by Mode:[/bold]")
-
+        
        for mode_name, result_data in all_results.items():
            console.print(f"\n[bold cyan]{result_data['mode']}:[/bold cyan]")
-            for i, result in enumerate(result_data["results"][:3], 1):
-                console.print(
-                    f"\n{i}. [green]{result.file_path}[/green]:{result.start_line}-{result.end_line}"
-                )
-                console.print(
-                    f"   [dim]Type: {result.chunk_type} | Name: {result.name} | Score: {result.score:.3f}[/dim]"
-                )
-
+            for i, result in enumerate(result_data['results'][:3], 1):
+                console.print(f"\n{i}. [green]{result.file_path}[/green]:{result.start_line}-{result.end_line}")
+                console.print(f"   [dim]Type: {result.chunk_type} | Name: {result.name} | Score: {result.score:.3f}[/dim]")
+                
                # Show snippet
                lines = result.content.splitlines()[:5]
                for line in lines:
-                    console.print(
-                        f"   [dim]{line[:80]}{'...' if len(line) > 80 else ''}[/dim]"
-                    )
-
+                    console.print(f"   [dim]{line[:80]}{'...' if len(line) > 80 else ''}[/dim]")
+    
    def test_query_types(self):
        """Test different types of queries to show system capabilities."""
        test_queries = [
            # Keyword-heavy queries (should benefit from BM25)
            {
-                "query": "class CodeSearcher search method",
-                "description": "Specific class and method names",
-                "expected": "Should find exact matches with BM25 boost",
+                'query': 'class CodeSearcher search method',
+                'description': 'Specific class and method names',
+                'expected': 'Should find exact matches with BM25 boost'
            },
            {
-                "query": "import pandas numpy torch",
-                "description": "Multiple import keywords",
-                "expected": "BM25 should excel at finding import statements",
+                'query': 'import pandas numpy torch',
+                'description': 'Multiple import keywords',
+                'expected': 'BM25 should excel at finding import statements'
            },
+            
            # Semantic queries (should benefit from embeddings)
            {
-                "query": "find similar code chunks using vector similarity",
-                "description": "Natural language description",
-                "expected": "Semantic search should understand intent",
+                'query': 'find similar code chunks using vector similarity',
+                'description': 'Natural language description',
+                'expected': 'Semantic search should understand intent'
            },
            {
-                "query": "how to initialize database connection",
-                "description": "How-to question",
-                "expected": "Semantic search should find relevant implementations",
+                'query': 'how to initialize database connection',
+                'description': 'How-to question',
+                'expected': 'Semantic search should find relevant implementations'
            },
+            
            # Mixed queries (benefit from hybrid)
            {
-                "query": "BM25 scoring implementation for search ranking",
-                "description": "Technical terms + intent",
-                "expected": "Hybrid should balance keyword and semantic matching",
+                'query': 'BM25 scoring implementation for search ranking',
+                'description': 'Technical terms + intent',
+                'expected': 'Hybrid should balance keyword and semantic matching'
            },
            {
-                "query": "embedding vectors for code search with transformers",
-                "description": "Domain-specific terminology",
-                "expected": "Hybrid should leverage both approaches",
-            },
+                'query': 'embedding vectors for code search with transformers',
+                'description': 'Domain-specific terminology',
+                'expected': 'Hybrid should leverage both approaches'
+            }
        ]
-
+        
        console.print("\n[bold yellow]Query Type Analysis[/bold yellow]")
-        console.print(
-            "[dim]Testing different query patterns to demonstrate hybrid search benefits[/dim]\n"
-        )
-
+        console.print("[dim]Testing different query patterns to demonstrate hybrid search benefits[/dim]\n")
+        
        for test_case in test_queries:
            console.rule(f"\n[cyan]{test_case['description']}[/cyan]")
            console.print(f"[dim]{test_case['expected']}[/dim]")
-            self.compare_search_modes(test_case["query"], top_k=3)
+            self.compare_search_modes(test_case['query'], top_k=3)
            time.sleep(0.5)  # Brief pause between tests
-
+    
    def benchmark_performance(self, num_queries: int = 50):
        """Run performance benchmarks."""
        console.print("\n[bold yellow]Performance Benchmark[/bold yellow]")
        console.print(f"[dim]Running {num_queries} queries to measure performance[/dim]\n")
-
+        
        # Sample queries for benchmarking
        benchmark_queries = [
            "search function implementation",
@ -226,28 +217,28 @@ class SearchTester:
            "test cases unit testing",
            "configuration settings",
            "logging and debugging",
-            "performance optimization",
+            "performance optimization"
        ] * (num_queries // 10 + 1)
-
+        
        benchmark_queries = benchmark_queries[:num_queries]
-
+        
        # Benchmark each mode
        modes = [
-            ("Hybrid (70/30)", 0.7, 0.3),
-            ("Semantic Only", 1.0, 0.0),
-            ("BM25 Only", 0.0, 1.0),
+            ('Hybrid (70/30)', 0.7, 0.3),
+            ('Semantic Only', 1.0, 0.0),
+            ('BM25 Only', 0.0, 1.0)
        ]
-
+        
        results_table = Table(title="Performance Benchmark Results")
        results_table.add_column("Mode", style="cyan")
        results_table.add_column("Avg Time (ms)", style="green")
        results_table.add_column("Min Time (ms)", style="blue")
        results_table.add_column("Max Time (ms)", style="red")
        results_table.add_column("Total Time (s)", style="magenta")
-
+        
        for mode_name, sem_weight, bm25_weight in modes:
            times = []
-
+            
            console.print(f"[cyan]Testing {mode_name}...[/cyan]")
            for query in track(benchmark_queries, description=f"Running {mode_name}"):
                start = time.time()
@ -255,75 +246,69 @@ class SearchTester:
                    query=query,
                    limit=10,
                    semantic_weight=sem_weight,
-                    bm25_weight=bm25_weight,
+                    bm25_weight=bm25_weight
                )
                elapsed = (time.time() - start) * 1000
                times.append(elapsed)
-
+            
            # Calculate statistics
            avg_time = sum(times) / len(times)
            min_time = min(times)
            max_time = max(times)
            total_time = sum(times) / 1000
-
+            
            results_table.add_row(
                mode_name,
                f"{avg_time:.2f}",
                f"{min_time:.2f}",
                f"{max_time:.2f}",
-                f"{total_time:.2f}",
+                f"{total_time:.2f}"
            )
-
+        
        console.print("\n")
        console.print(results_table)
-
+    
    def test_diversity_constraints(self):
        """Test the diversity constraints in search results."""
        console.print("\n[bold yellow]Diversity Constraints Test[/bold yellow]")
        console.print("[dim]Verifying max 2 chunks per file and chunk type diversity[/dim]\n")
-
+        
        # Query that might return many results from same files
        query = "function implementation code search"
        results = self.searcher.search(query, top_k=20)
-
+        
        # Analyze diversity
        file_counts = {}
        chunk_types = {}
-
+        
        for result in results:
            file_counts[result.file_path] = file_counts.get(result.file_path, 0) + 1
            chunk_types[result.chunk_type] = chunk_types.get(result.chunk_type, 0) + 1
-
+        
        # Create diversity report
        table = Table(title="Result Diversity Analysis")
        table.add_column("Metric", style="cyan")
        table.add_column("Value", style="green")
-
+        
        table.add_row("Total Results", str(len(results)))
        table.add_row("Unique Files", str(len(file_counts)))
-        table.add_row(
-            "Max Chunks per File", str(max(file_counts.values()) if file_counts else 0)
-        )
+        table.add_row("Max Chunks per File", str(max(file_counts.values()) if file_counts else 0))
        table.add_row("Unique Chunk Types", str(len(chunk_types)))
-
+        
        console.print(table)
-
+        
        # Show file distribution
        if len(file_counts) > 0:
            console.print("\n[bold]File Distribution:[/bold]")
-            for file_path, count in sorted(
-                file_counts.items(), key=lambda x: x[1], reverse=True
-            )[:5]:
+            for file_path, count in sorted(file_counts.items(), key=lambda x: x[1], reverse=True)[:5]:
                console.print(f"  {count}x {file_path}")
-
+        
        # Show chunk type distribution
        if len(chunk_types) > 0:
            console.print("\n[bold]Chunk Type Distribution:[/bold]")
-            for chunk_type, count in sorted(
-                chunk_types.items(), key=lambda x: x[1], reverse=True
-            ):
+            for chunk_type, count in sorted(chunk_types.items(), key=lambda x: x[1], reverse=True):
                console.print(f"  {chunk_type}: {count} chunks")
-
+        
        # Verify constraints
        console.print("\n[bold]Constraint Verification:[/bold]")
        max_per_file = max(file_counts.values()) if file_counts else 0
@ -336,45 +321,45 @@ class SearchTester:
 def main():
    """Run comprehensive hybrid search tests."""
    import sys
-
+    
    if len(sys.argv) > 1:
        project_path = Path(sys.argv[1])
    else:
        project_path = Path.cwd()
-
-    if not (project_path / ".mini-rag").exists():
+    
+    if not (project_path / '.mini-rag').exists():
        console.print("[red]Error: No RAG index found. Run 'rag-mini index' first.[/red]")
        return
-
+    
    # Create tester
    tester = SearchTester(project_path)
-
+    
    # Run all tests
-    console.print("\n" + "=" * 80)
+    console.print("\n" + "="*80)
    console.print("[bold green]Mini RAG Hybrid Search Test Suite[/bold green]")
-    console.print("=" * 80)
-
+    console.print("="*80)
+    
    # Test 1: Query type analysis
    tester.test_query_types()
-
+    
    # Test 2: Performance benchmark
-    console.print("\n" + "-" * 80)
+    console.print("\n" + "-"*80)
    tester.benchmark_performance(num_queries=30)
-
+    
    # Test 3: Diversity constraints
-    console.print("\n" + "-" * 80)
+    console.print("\n" + "-"*80)
    tester.test_diversity_constraints()
-
+    
    # Summary
-    console.print("\n" + "=" * 80)
+    console.print("\n" + "="*80)
    console.print("[bold green]Test Suite Complete![/bold green]")
    console.print("\n[dim]The hybrid search combines:")
    console.print("  • Semantic understanding from transformer embeddings")
    console.print("  • Keyword relevance from BM25 scoring")
    console.print("  • Result diversity through intelligent filtering")
    console.print("  • Performance optimization through concurrent processing[/dim]")
-    console.print("=" * 80 + "\n")
+    console.print("="*80 + "\n")


 if __name__ == "__main__":
-    main()
+    main()
--- a/tests/test_min_chunk_size.py
+++ b/tests/test_min_chunk_size.py
@ -1,16 +1,13 @@
 """Test with smaller min_chunk_size."""

-from pathlib import Path
-
 from mini_rag.chunker import CodeChunker
+from pathlib import Path

 test_code = '''"""Test module."""

 import os

-
 class MyClass:
-
    def method(self):
        return 42

@ -27,4 +24,4 @@ for i, chunk in enumerate(chunks):
    print(f"\nChunk {i}: {chunk.chunk_type} '{chunk.name}'")
    print(f"Lines {chunk.start_line}-{chunk.end_line}")
    print(f"Size: {len(chunk.content.splitlines())} lines")
-    print("-" * 40)
+    print("-" * 40)
--- a/tests/test_mode_separation.py
+++ b/tests/test_mode_separation.py
@ -7,6 +7,7 @@ between thinking and no-thinking modes.
 """

 import sys
+import os
 import tempfile
 import unittest
 from pathlib import Path
@ -15,54 +16,51 @@ from pathlib import Path
 sys.path.insert(0, str(Path(__file__).parent.parent))

 try:
-    from mini_rag.config import RAGConfig
+    from mini_rag.llm_synthesizer import LLMSynthesizer  
    from mini_rag.explorer import CodeExplorer
+    from mini_rag.config import RAGConfig
    from mini_rag.indexer import ProjectIndexer
-    from mini_rag.llm_synthesizer import LLMSynthesizer
    from mini_rag.search import CodeSearcher
 except ImportError as e:
    print(f"❌ Could not import RAG components: {e}")
    print("   This test requires the full RAG system to be installed")
    sys.exit(1)

-
 class TestModeSeparation(unittest.TestCase):
    """Test the clean separation between synthesis and exploration modes."""
-
+    
    def setUp(self):
        """Set up test environment."""
        self.temp_dir = tempfile.mkdtemp()
        self.project_path = Path(self.temp_dir)
-
+        
        # Create a simple test project
        test_file = self.project_path / "test_module.py"
-        test_file.write_text(
-            '''"""Test module for mode separation testing."""
+        test_file.write_text('''"""Test module for mode separation testing."""

 def authenticate_user(username: str, password: str) -> bool:
    """Authenticate a user with username and password."""
    # Simple authentication logic
    if not username or not password:
        return False
-
+    
    # Check against database (simplified)
    valid_users = {"admin": "secret", "user": "password"}
    return valid_users.get(username) == password

-
 class UserManager:
    """Manages user operations."""
-
+    
    def __init__(self):
        self.users = {}
-
+    
    def create_user(self, username: str) -> bool:
        """Create a new user."""
        if username in self.users:
            return False
        self.users[username] = {"created": True}
        return True
-
+    
    def get_user_info(self, username: str) -> dict:
        """Get user information."""
        return self.users.get(username, {})
@ -73,216 +71,196 @@ def process_login_request(username: str, password: str) -> dict:
        return {"success": True, "message": "Login successful"}
    else:
        return {"success": False, "message": "Invalid credentials"}
-'''
-        )
-
+''')
+        
        # Index the project for testing
        try:
            indexer = ProjectIndexer(self.project_path)
            indexer.index_project()
        except Exception as e:
            self.skipTest(f"Could not index test project: {e}")
-
+    
    def tearDown(self):
        """Clean up test environment."""
        import shutil
-
        shutil.rmtree(self.temp_dir, ignore_errors=True)
-
+    
    def test_01_synthesis_mode_defaults(self):
        """Test that synthesis mode has correct defaults."""
        synthesizer = LLMSynthesizer()
-
+        
        # Should default to no thinking
-        self.assertFalse(
-            synthesizer.enable_thinking, "Synthesis mode should default to no thinking"
-        )
-
+        self.assertFalse(synthesizer.enable_thinking, 
+                        "Synthesis mode should default to no thinking")
+        
        print("✅ Synthesis mode defaults to no thinking")
-
+    
    def test_02_exploration_mode_defaults(self):
        """Test that exploration mode enables thinking."""
        config = RAGConfig()
        explorer = CodeExplorer(self.project_path, config)
-
+        
        # Should enable thinking in exploration mode
-        self.assertTrue(
-            explorer.synthesizer.enable_thinking,
-            "Exploration mode should enable thinking",
-        )
-
+        self.assertTrue(explorer.synthesizer.enable_thinking,
+                       "Exploration mode should enable thinking")
+        
        print("✅ Exploration mode enables thinking by default")
-
+    
    def test_03_no_runtime_thinking_toggle(self):
        """Test that thinking mode cannot be toggled at runtime."""
        synthesizer = LLMSynthesizer(enable_thinking=False)
-
+        
        # Should not have public methods to toggle thinking
-        thinking_methods = [
-            method
-            for method in dir(synthesizer)
-            if "thinking" in method.lower() and not method.startswith("_")
-        ]
-
+        thinking_methods = [method for method in dir(synthesizer) 
+                           if 'thinking' in method.lower() and not method.startswith('_')]
+        
        # The only thinking-related attribute should be the readonly enable_thinking
-        self.assertEqual(
-            len(thinking_methods), 0, "Should not have public thinking toggle methods"
-        )
-
+        self.assertEqual(len(thinking_methods), 0,
+                        "Should not have public thinking toggle methods")
+        
        print("✅ No runtime thinking toggle methods available")
-
+    
    def test_04_mode_contamination_prevention(self):
        """Test that modes don't contaminate each other."""
        if not self._ollama_available():
            self.skipTest("Ollama not available for contamination testing")
-
+        
        # Create synthesis mode synthesizer
        synthesis_synthesizer = LLMSynthesizer(enable_thinking=False)
-
-        # Create exploration mode synthesizer
+        
+        # Create exploration mode synthesizer  
        exploration_synthesizer = LLMSynthesizer(enable_thinking=True)
-
+        
        # Both should maintain their thinking settings
-        self.assertFalse(
-            synthesis_synthesizer.enable_thinking,
-            "Synthesis synthesizer should remain no-thinking",
-        )
-        self.assertTrue(
-            exploration_synthesizer.enable_thinking,
-            "Exploration synthesizer should remain thinking-enabled",
-        )
-
+        self.assertFalse(synthesis_synthesizer.enable_thinking,
+                        "Synthesis synthesizer should remain no-thinking")
+        self.assertTrue(exploration_synthesizer.enable_thinking,
+                       "Exploration synthesizer should remain thinking-enabled")
+        
        print("✅ Mode contamination prevented")
-
+    
    def test_05_exploration_session_management(self):
        """Test exploration session management."""
        config = RAGConfig()
        explorer = CodeExplorer(self.project_path, config)
-
+        
        # Should start with no active session
-        self.assertIsNone(explorer.current_session, "Should start with no active session")
-
+        self.assertIsNone(explorer.current_session, 
+                         "Should start with no active session")
+        
        # Should be able to create session summary even without session
        summary = explorer.get_session_summary()
-        self.assertIn("No active", summary, "Should handle no active session gracefully")
-
+        self.assertIn("No active", summary,
+                     "Should handle no active session gracefully")
+        
        print("✅ Session management working correctly")
-
+    
    def test_06_context_memory_structure(self):
        """Test that exploration mode has context memory structure."""
        config = RAGConfig()
        explorer = CodeExplorer(self.project_path, config)
-
+        
        # Should have context tracking attributes
-        self.assertTrue(
-            hasattr(explorer, "current_session"),
-            "Explorer should have session tracking",
-        )
-
+        self.assertTrue(hasattr(explorer, 'current_session'),
+                       "Explorer should have session tracking")
+        
        print("✅ Context memory structure present")
-
+    
    def test_07_synthesis_mode_no_thinking_prompts(self):
        """Test that synthesis mode properly handles no-thinking."""
        if not self._ollama_available():
            self.skipTest("Ollama not available for prompt testing")
-
+        
        synthesizer = LLMSynthesizer(enable_thinking=False)
-
+        
        # Test the _call_ollama method handling
-        if hasattr(synthesizer, "_call_ollama"):
+        if hasattr(synthesizer, '_call_ollama'):
            # Should append <no_think> when thinking disabled
            # This is a white-box test of the implementation
            try:
                # Mock test - just verify the method exists and can be called
-                # Test call (result unused)
-                synthesizer._call_ollama("test", temperature=0.1, disable_thinking=True)
+                result = synthesizer._call_ollama("test", temperature=0.1, disable_thinking=True)
                # Don't assert on result since Ollama might not be available
                print("✅ No-thinking prompt handling available")
            except Exception as e:
                print(f"⚠️  Prompt handling test skipped: {e}")
        else:
            self.fail("Synthesizer should have _call_ollama method")
-
+    
    def test_08_mode_specific_initialization(self):
        """Test that modes initialize correctly with lazy loading."""
        # Synthesis mode
        synthesis_synthesizer = LLMSynthesizer(enable_thinking=False)
-        self.assertFalse(
-            synthesis_synthesizer._initialized,
-            "Should start uninitialized for lazy loading",
-        )
-
-        # Exploration mode
+        self.assertFalse(synthesis_synthesizer._initialized,
+                        "Should start uninitialized for lazy loading")
+        
+        # Exploration mode  
        config = RAGConfig()
        explorer = CodeExplorer(self.project_path, config)
-        self.assertFalse(
-            explorer.synthesizer._initialized,
-            "Should start uninitialized for lazy loading",
-        )
-
+        self.assertFalse(explorer.synthesizer._initialized,
+                        "Should start uninitialized for lazy loading")
+        
        print("✅ Lazy initialization working correctly")
-
+    
    def test_09_search_vs_exploration_integration(self):
        """Test integration differences between search and exploration."""
        # Regular search (synthesis mode)
        searcher = CodeSearcher(self.project_path)
        search_results = searcher.search("authentication", top_k=3)
-
-        self.assertGreater(len(search_results), 0, "Search should return results")
-
+        
+        self.assertGreater(len(search_results), 0, 
+                          "Search should return results")
+        
        # Exploration mode setup
        config = RAGConfig()
        explorer = CodeExplorer(self.project_path, config)
-
+        
        # Both should work with same project but different approaches
-        self.assertTrue(
-            hasattr(explorer, "synthesizer"),
-            "Explorer should have thinking-enabled synthesizer",
-        )
-
+        self.assertTrue(hasattr(explorer, 'synthesizer'),
+                       "Explorer should have thinking-enabled synthesizer")
+        
        print("✅ Search and exploration integration working")
-
+    
    def test_10_mode_guidance_detection(self):
        """Test that the system can detect when to recommend different modes."""
        # Words that should trigger exploration mode recommendation
-        exploration_triggers = ["why", "how", "explain", "debug"]
-
+        exploration_triggers = ['why', 'how', 'explain', 'debug']
+        
        for trigger in exploration_triggers:
            query = f"{trigger} does authentication work"
            # This would typically be tested in the main CLI
            # Here we just verify the trigger detection logic exists
            has_trigger = any(word in query.lower() for word in exploration_triggers)
-            self.assertTrue(has_trigger, f"Should detect '{trigger}' as exploration trigger")
-
+            self.assertTrue(has_trigger, 
+                           f"Should detect '{trigger}' as exploration trigger")
+        
        print("✅ Mode guidance detection working")
-
+    
    def _ollama_available(self) -> bool:
        """Check if Ollama is available for testing."""
        try:
            import requests
-
            response = requests.get("http://localhost:11434/api/tags", timeout=5)
            return response.status_code == 200
        except Exception:
            return False

-
 def main():
    """Run mode separation tests."""
    print("🧪 Testing Mode Separation")
    print("=" * 40)
-
+    
    # Check if we're in the right environment
    if not Path("mini_rag").exists():
        print("❌ Tests must be run from the FSS-Mini-RAG root directory")
        sys.exit(1)
-
+    
    # Run tests
    loader = unittest.TestLoader()
    suite = loader.loadTestsFromTestCase(TestModeSeparation)
    runner = unittest.TextTestRunner(verbosity=2)
    result = runner.run(suite)
-
+    
    # Summary
    print("\n" + "=" * 40)
    if result.wasSuccessful():
@ -291,10 +269,9 @@ def main():
    else:
        print("❌ Some tests failed")
        print(f"   Failed: {len(result.failures)}, Errors: {len(result.errors)}")
-
+    
    return result.wasSuccessful()

-
 if __name__ == "__main__":
    success = main()
-    sys.exit(0 if success else 1)
+    sys.exit(0 if success else 1)
--- a/tests/test_ollama_integration.py
+++ b/tests/test_ollama_integration.py
@ -8,71 +8,72 @@ what's working and what needs attention.
 Run with: python3 tests/test_ollama_integration.py
 """

-import sys
 import unittest
-from pathlib import Path
-from unittest.mock import MagicMock, patch
-
 import requests
-
-from mini_rag.config import RAGConfig
-from mini_rag.llm_synthesizer import LLMSynthesizer
-from mini_rag.query_expander import QueryExpander
+import json
+import sys
+from pathlib import Path
+from unittest.mock import patch, MagicMock

 # Add project to path
 sys.path.insert(0, str(Path(__file__).parent.parent))

+from mini_rag.query_expander import QueryExpander
+from mini_rag.llm_synthesizer import LLMSynthesizer  
+from mini_rag.config import RAGConfig
+

 class TestOllamaIntegration(unittest.TestCase):
    """
    Tests to help beginners troubleshoot their Ollama setup.
-
+    
    Each test explains what it's checking and gives clear feedback
    about what's working or needs to be fixed.
    """
-
+    
    def setUp(self):
        """Set up test configuration."""
        self.config = RAGConfig()
        print(f"\n🧪 Testing with Ollama host: {self.config.llm.ollama_host}")
-
+    
    def test_01_ollama_server_running(self):
        """
        ✅ Check if Ollama server is running and responding.
-
+        
        This test verifies that:
        - Ollama is installed and running
        - The API endpoint is accessible
        - Basic connectivity works
        """
        print("\n📡 Testing Ollama server connectivity...")
-
+        
        try:
            response = requests.get(
-                f"http://{self.config.llm.ollama_host}/api/tags", timeout=5
+                f"http://{self.config.llm.ollama_host}/api/tags", 
+                timeout=5
            )
-
+            
            if response.status_code == 200:
                data = response.json()
-                models = data.get("models", [])
-                print("   ✅ Ollama server is running!")
+                models = data.get('models', [])
+                print(f"   ✅ Ollama server is running!")
                print(f"   📦 Found {len(models)} models available")
-
+                
                if models:
                    print("   🎯 Available models:")
                    for model in models[:5]:  # Show first 5
-                        name = model.get("name", "unknown")
-                        size = model.get("size", 0)
+                        name = model.get('name', 'unknown')
+                        size = model.get('size', 0) 
                        print(f"      • {name} ({size//1000000:.0f}MB)")
                    if len(models) > 5:
                        print(f"      ... and {len(models)-5} more")
                else:
                    print("   ⚠️  No models found. Install with: ollama pull qwen3:4b")
-
+                
                self.assertTrue(True)
            else:
                self.fail(f"Ollama server responded with status {response.status_code}")
-
+                
        except requests.exceptions.ConnectionError:
            self.fail(
                "❌ Cannot connect to Ollama server.\n"
@ -83,32 +84,35 @@ class TestOllamaIntegration(unittest.TestCase):
            )
        except Exception as e:
            self.fail(f"❌ Unexpected error: {e}")
-
+    
    def test_02_embedding_model_available(self):
        """
        ✅ Check if embedding model is available.
-
+        
        This test verifies that:
        - The embedding model (nomic-embed-text) is installed
        - Embedding API calls work correctly
        - Model responds with valid embeddings
        """
        print("\n🧠 Testing embedding model availability...")
-
+        
        try:
            # Test embedding generation
            response = requests.post(
                f"http://{self.config.llm.ollama_host}/api/embeddings",
-                json={"model": "nomic-embed-text", "prompt": "test embedding"},
-                timeout=10,
+                json={
+                    "model": "nomic-embed-text",
+                    "prompt": "test embedding"
+                },
+                timeout=10
            )
-
+            
            if response.status_code == 200:
                data = response.json()
-                embedding = data.get("embedding", [])
-
+                embedding = data.get('embedding', [])
+                
                if embedding and len(embedding) > 0:
-                    print("   ✅ Embedding model working!")
+                    print(f"   ✅ Embedding model working!")
                    print(f"   📊 Generated {len(embedding)}-dimensional vectors")
                    self.assertTrue(len(embedding) > 100)  # Should be substantial vectors
                else:
@ -122,283 +126,285 @@ class TestOllamaIntegration(unittest.TestCase):
                    )
                else:
                    self.fail(f"Embedding API error: {response.status_code}")
-
+                    
        except Exception as e:
            self.fail(f"❌ Embedding test failed: {e}")
-
+    
    def test_03_llm_model_available(self):
        """
        ✅ Check if LLM models are available for synthesis/expansion.
-
+        
        This test verifies that:
        - At least one LLM model is available
        - The model can generate text responses
        - Response quality is reasonable
        """
        print("\n🤖 Testing LLM model availability...")
-
+        
        synthesizer = LLMSynthesizer(config=self.config)
-
+        
        if not synthesizer.is_available():
            self.fail(
                "❌ No LLM models available.\n"
                "   💡 Install a model like: ollama pull qwen3:4b"
            )
-
+        
        print(f"   ✅ Found {len(synthesizer.available_models)} LLM models")
        print(f"   🎯 Will use: {synthesizer.model}")
-
+        
        # Test basic text generation
        try:
            response = synthesizer._call_ollama(
-                "Complete this: The capital of France is", temperature=0.1
+                "Complete this: The capital of France is", 
+                temperature=0.1
            )
-
+            
            if response and len(response.strip()) > 0:
-                print("   ✅ Model generating responses!")
+                print(f"   ✅ Model generating responses!")
                print(f"   💬 Sample response: '{response[:50]}...'")
-
+                
                # Basic quality check
                if "paris" in response.lower():
                    print("   🎯 Response quality looks good!")
                else:
                    print("   ⚠️  Response quality might be low")
-
+                    
                self.assertTrue(len(response) > 5)
            else:
                self.fail("Model produced empty response")
-
+                
        except Exception as e:
            self.fail(f"❌ LLM generation test failed: {e}")
-
+    
    def test_04_query_expansion_working(self):
        """
        ✅ Check if query expansion is working correctly.
-
+        
        This test verifies that:
        - QueryExpander can connect to Ollama
        - Expansion produces reasonable results
        - Caching is working
        """
        print("\n🔍 Testing query expansion...")
-
+        
        # Enable expansion for testing
        self.config.search.expand_queries = True
        expander = QueryExpander(self.config)
-
+        
        if not expander.is_available():
            self.skipTest("⏭️  Skipping - Ollama not available (tested above)")
-
+        
        # Test expansion
        test_query = "authentication"
        expanded = expander.expand_query(test_query)
-
+        
        print(f"   📝 Original: '{test_query}'")
        print(f"   ➡️  Expanded: '{expanded}'")
-
+        
        # Quality checks
        if expanded == test_query:
            print("   ⚠️  No expansion occurred (might be normal for simple queries)")
        else:
            # Should contain original query
            self.assertIn(test_query.lower(), expanded.lower())
-
+            
            # Should be longer
            self.assertGreater(len(expanded.split()), len(test_query.split()))
-
+            
            # Test caching
            cached = expander.expand_query(test_query)
            self.assertEqual(expanded, cached)
            print("   ✅ Expansion and caching working!")
-
+    
    def test_05_synthesis_mode_no_thinking(self):
        """
        ✅ Test synthesis mode operates without thinking.
-
+        
        Verifies that LLMSynthesizer in synthesis mode:
        - Defaults to no thinking
        - Handles <no_think> tokens properly
        - Works independently of exploration mode
        """
        print("\n🚀 Testing synthesis mode (no thinking)...")
-
+        
        # Create synthesis mode synthesizer (default behavior)
        synthesizer = LLMSynthesizer()
-
+        
        # Should default to no thinking
-        self.assertFalse(
-            synthesizer.enable_thinking, "Synthesis mode should default to no thinking"
-        )
+        self.assertFalse(synthesizer.enable_thinking, 
+                        "Synthesis mode should default to no thinking")
        print("   ✅ Defaults to no thinking")
-
+        
        if synthesizer.is_available():
            print("   📝 Testing with live Ollama...")
-
+            
            # Create mock search results
            from dataclasses import dataclass
-
+            
            @dataclass
            class MockResult:
                file_path: str
                content: str
                score: float
-
-            results = [MockResult("auth.py", "def authenticate(user): return True", 0.95)]
-
-            # Test synthesis
+            
+            results = [
+                MockResult("auth.py", "def authenticate(user): return True", 0.95)
+            ]
+            
+            # Test synthesis 
            synthesis = synthesizer.synthesize_search_results(
                "user authentication", results, Path(".")
            )
-
+            
            # Should get reasonable synthesis
            self.assertIsNotNone(synthesis)
            self.assertGreater(len(synthesis.summary), 10)
            print("   ✅ Synthesis mode working without thinking")
        else:
            print("   ⏭️  Live test skipped - Ollama not available")
-
+    
    def test_06_exploration_mode_thinking(self):
        """
        ✅ Test exploration mode enables thinking.
-
+        
        Verifies that CodeExplorer:
        - Enables thinking by default
        - Has session management
        - Works independently of synthesis mode
        """
        print("\n🧠 Testing exploration mode (with thinking)...")
-
+        
        try:
            from mini_rag.explorer import CodeExplorer
        except ImportError:
            self.skipTest("⏭️  CodeExplorer not available")
-
+        
        # Create exploration mode
        explorer = CodeExplorer(Path("."), self.config)
-
+        
        # Should enable thinking
-        self.assertTrue(
-            explorer.synthesizer.enable_thinking,
-            "Exploration mode should enable thinking",
-        )
+        self.assertTrue(explorer.synthesizer.enable_thinking,
+                       "Exploration mode should enable thinking")
        print("   ✅ Enables thinking by default")
-
+        
        # Should have session management
-        self.assertIsNone(explorer.current_session, "Should start with no active session")
+        self.assertIsNone(explorer.current_session,
+                         "Should start with no active session")
        print("   ✅ Session management available")
-
+        
        # Should handle session summary gracefully
        summary = explorer.get_session_summary()
        self.assertIn("No active", summary)
        print("   ✅ Graceful session handling")
-
+    
    def test_07_mode_separation(self):
        """
        ✅ Test that synthesis and exploration modes don't interfere.
-
+        
        Verifies clean separation:
        - Different thinking settings
        - Independent operation
        - No cross-contamination
        """
        print("\n🔄 Testing mode separation...")
-
+        
        # Create both modes
        synthesizer = LLMSynthesizer(enable_thinking=False)
-
+        
        try:
            from mini_rag.explorer import CodeExplorer
-
            explorer = CodeExplorer(Path("."), self.config)
        except ImportError:
            self.skipTest("⏭️  CodeExplorer not available")
-
+        
        # Should have different thinking settings
-        self.assertFalse(synthesizer.enable_thinking, "Synthesis should not use thinking")
-        self.assertTrue(
-            explorer.synthesizer.enable_thinking, "Exploration should use thinking"
-        )
-
+        self.assertFalse(synthesizer.enable_thinking,
+                        "Synthesis should not use thinking")
+        self.assertTrue(explorer.synthesizer.enable_thinking,
+                       "Exploration should use thinking")
+        
        # Both should be uninitialized (lazy loading)
-        self.assertFalse(synthesizer._initialized, "Should use lazy loading")
-        self.assertFalse(explorer.synthesizer._initialized, "Should use lazy loading")
-
+        self.assertFalse(synthesizer._initialized,
+                        "Should use lazy loading")
+        self.assertFalse(explorer.synthesizer._initialized,
+                        "Should use lazy loading")
+        
        print("   ✅ Clean mode separation confirmed")
-
+    
    def test_08_with_mocked_ollama(self):
        """
        ✅ Test components work with mocked Ollama (for offline testing).
-
+        
        This test verifies that:
        - System gracefully handles Ollama being unavailable
        - Fallback behaviors work correctly
        - Error messages are helpful
        """
        print("\n🎭 Testing with mocked Ollama responses...")
-
+        
        # Mock successful embedding response
        mock_embedding_response = MagicMock()
        mock_embedding_response.status_code = 200
        mock_embedding_response.json.return_value = {
-            "embedding": [0.1] * 768  # Standard embedding size
+            'embedding': [0.1] * 768  # Standard embedding size
        }
-
+        
        # Mock LLM response
        mock_llm_response = MagicMock()
        mock_llm_response.status_code = 200
        mock_llm_response.json.return_value = {
-            "response": "authentication login user verification credentials"
+            'response': 'authentication login user verification credentials'
        }
-
-        with patch("requests.post", side_effect=[mock_embedding_response, mock_llm_response]):
+        
+        with patch('requests.post', side_effect=[mock_embedding_response, mock_llm_response]):
            # Test query expansion with mocked response
            expander = QueryExpander(self.config)
            expander.enabled = True
-
+            
            expanded = expander._llm_expand_query("authentication")
            if expanded:
                print(f"   ✅ Mocked expansion: '{expanded}'")
                self.assertIn("authentication", expanded)
            else:
                print("   ⚠️  Expansion returned None (might be expected)")
-
+        
        # Test graceful degradation when Ollama unavailable
-        with patch("requests.get", side_effect=requests.exceptions.ConnectionError()):
+        with patch('requests.get', side_effect=requests.exceptions.ConnectionError()):
            expander_offline = QueryExpander(self.config)
-
+            
            # Should handle unavailable server gracefully
            self.assertFalse(expander_offline.is_available())
-
+            
            # Should return original query when offline
            result = expander_offline.expand_query("test query")
            self.assertEqual(result, "test query")
            print("   ✅ Graceful offline behavior working!")
-
+    
    def test_06_configuration_validation(self):
        """
        ✅ Check if configuration is valid and complete.
-
+        
        This test verifies that:
        - All required config sections exist
        - Values are reasonable
        - Host/port settings are valid
        """
        print("\n⚙️  Testing configuration validation...")
-
+        
        # Check LLM config
        self.assertIsNotNone(self.config.llm)
        self.assertTrue(self.config.llm.ollama_host)
        self.assertTrue(isinstance(self.config.llm.max_expansion_terms, int))
        self.assertGreater(self.config.llm.max_expansion_terms, 0)
-
-        print("   ✅ LLM config valid")
+        
+        print(f"   ✅ LLM config valid")
        print(f"      Host: {self.config.llm.ollama_host}")
        print(f"      Max expansion terms: {self.config.llm.max_expansion_terms}")
-
-        # Check search config
+        
+        # Check search config  
        self.assertIsNotNone(self.config.search)
        self.assertGreater(self.config.search.default_top_k, 0)
-        print("   ✅ Search config valid")
+        print(f"   ✅ Search config valid")
        print(f"      Default top-k: {self.config.search.default_top_k}")
        print(f"      Query expansion: {self.config.search.expand_queries}")

@ -412,10 +418,10 @@ def run_troubleshooting():
    print("These tests help you troubleshoot your Ollama setup.")
    print("Each test explains what it's checking and how to fix issues.")
    print()
-
+    
    # Run tests with detailed output
    unittest.main(verbosity=2, exit=False)
-
+    
    print("\n" + "=" * 50)
    print("💡 Common Solutions:")
    print("   • Install Ollama: https://ollama.ai/download")
@ -426,5 +432,5 @@ def run_troubleshooting():
    print("📚 For more help, see docs/QUERY_EXPANSION.md")


-if __name__ == "__main__":
-    run_troubleshooting()
+if __name__ == '__main__':
+    run_troubleshooting()
--- a/tests/test_rag_integration.py
+++ b/tests/test_rag_integration.py
@ -10,26 +10,21 @@ Or run directly with venv:
    source .venv/bin/activate && PYTHONPATH=. python tests/test_rag_integration.py
 """

-import os
 import tempfile
+import shutil
+import os
 from pathlib import Path
-
 from mini_rag.indexer import ProjectIndexer
 from mini_rag.search import CodeSearcher

 # Check if virtual environment is activated
-
-
 def check_venv():
-    if "VIRTUAL_ENV" not in os.environ:
+    if 'VIRTUAL_ENV' not in os.environ:
        print("⚠️  WARNING: Virtual environment not detected!")
        print("   This test requires the virtual environment to be activated.")
-        print(
-            "   Run: source .venv/bin/activate && PYTHONPATH=. python tests/test_rag_integration.py"
-        )
+        print("   Run: source .venv/bin/activate && PYTHONPATH=. python tests/test_rag_integration.py")
        print("   Continuing anyway...\n")

-
 check_venv()

 # Sample Python file with proper structure
@ -40,16 +35,15 @@ This module demonstrates various Python constructs.

 import os
 import sys
-from typing import List, Optional
+from typing import List, Dict, Optional
 from dataclasses import dataclass

 # Module-level constants
 DEFAULT_TIMEOUT = 30
 MAX_RETRIES = 3

+
@dataclass
-
-
 class Config:
    """Configuration dataclass."""
    timeout: int = DEFAULT_TIMEOUT
@ -59,71 +53,73 @@ class Config:
 class DataProcessor:
    """
    Main data processor class.
-
+    
    This class handles the processing of various data types
    and provides a unified interface for data operations.
    """
-
+    
    def __init__(self, config: Config):
        """
        Initialize the processor with configuration.
-
+        
        Args:
            config: Configuration object
        """
        self.config = config
        self._cache = {}
        self._initialized = False
-
+        
    def process(self, data: List[Dict]) -> List[Dict]:
        """
        Process a list of data items.
-
+        
        Args:
            data: List of dictionaries to process
-
+            
        Returns:
            Processed data list
        """
        if not self._initialized:
            self._initialize()
-
+            
        results = []
        for item in data:
            processed = self._process_item(item)
            results.append(processed)
-
+            
        return results
-
+    
    def _initialize(self):
        """Initialize internal state."""
        self._cache.clear()
        self._initialized = True
-
+        
    def _process_item(self, item: Dict) -> Dict:
        """Process a single item."""
        # Implementation details
        return {**item, 'processed': True}

+
 def main():
    """Main entry point."""
    config = Config()
    processor = DataProcessor(config)
-
+    
    test_data = [
        {'id': 1, 'value': 'test1'},
        {'id': 2, 'value': 'test2'},
    ]
-
+    
    results = processor.process(test_data)
    print(f"Processed {len(results)} items")

+
 if __name__ == "__main__":
    main()
 '''

 # Sample markdown file
-sample_markdown = """# RAG System Documentation
+sample_markdown = '''# RAG System Documentation

 ## Overview

@ -179,103 +175,103 @@ Main class for indexing projects.
 ### CodeSearcher

 Provides semantic search capabilities.
-"""
+'''


 def test_integration():
    """Test the complete RAG system with smart chunking."""
-
+    
    # Create temporary project directory
    with tempfile.TemporaryDirectory() as tmpdir:
        project_path = Path(tmpdir)
-
+        
        # Create test files
        (project_path / "processor.py").write_text(sample_code)
        (project_path / "README.md").write_text(sample_markdown)
-
+        
        print("=" * 60)
        print("TESTING RAG SYSTEM INTEGRATION")
        print("=" * 60)
-
+        
        # Index the project
        print("\n1. Indexing project...")
        indexer = ProjectIndexer(project_path)
        stats = indexer.index_project()
-
+        
        print(f"   - Files indexed: {stats['files_indexed']}")
        print(f"   - Total chunks: {stats['chunks_created']}")
        print(f"   - Indexing time: {stats['time_taken']:.2f}s")
-
+        
        # Verify chunks were created properly
        print("\n2. Verifying chunk metadata...")
-
+        
        # Initialize searcher
        searcher = CodeSearcher(project_path)
-
+        
        # Search for specific content
        print("\n3. Testing search functionality...")
-
+        
        # Test 1: Search for class with docstring
        results = searcher.search("data processor class unified interface", top_k=3)
-        print("\n   Test 1 - Class search:")
+        print(f"\n   Test 1 - Class search:")
        for i, result in enumerate(results[:1]):
            print(f"   - Match {i+1}: {result.file_path}")
            print(f"     Chunk type: {result.chunk_type}")
            print(f"     Score: {result.score:.3f}")
-            if "This class handles" in result.content:
+            if 'This class handles' in result.content:
                print("     [OK] Docstring included with class")
            else:
                print("     [FAIL] Docstring not found")
-
+        
        # Test 2: Search for method with docstring
        results = searcher.search("process list of data items", top_k=3)
-        print("\n   Test 2 - Method search:")
+        print(f"\n   Test 2 - Method search:")
        for i, result in enumerate(results[:1]):
            print(f"   - Match {i+1}: {result.file_path}")
            print(f"     Chunk type: {result.chunk_type}")
            print(f"     Parent class: {getattr(result, 'parent_class', 'N/A')}")
-            if "Args:" in result.content and "Returns:" in result.content:
+            if 'Args:' in result.content and 'Returns:' in result.content:
                print("     [OK] Docstring included with method")
            else:
                print("     [FAIL] Method docstring not complete")
-
+        
        # Test 3: Search markdown content
        results = searcher.search("smart chunking capabilities markdown", top_k=3)
-        print("\n   Test 3 - Markdown search:")
+        print(f"\n   Test 3 - Markdown search:")
        for i, result in enumerate(results[:1]):
            print(f"   - Match {i+1}: {result.file_path}")
            print(f"     Chunk type: {result.chunk_type}")
            print(f"     Lines: {result.start_line}-{result.end_line}")
-
+        
        # Test 4: Verify chunk navigation
-        print("\n   Test 4 - Chunk navigation:")
+        print(f"\n   Test 4 - Chunk navigation:")
        all_results = searcher.search("", top_k=100)  # Get all chunks
-        py_chunks = [r for r in all_results if r.file_path.endswith(".py")]
-
+        py_chunks = [r for r in all_results if r.file_path.endswith('.py')]
+        
        if py_chunks:
            first_chunk = py_chunks[0]
            print(f"   - First chunk: index={getattr(first_chunk, 'chunk_index', 'N/A')}")
            print(f"     Next chunk ID: {getattr(first_chunk, 'next_chunk_id', 'N/A')}")
-
+            
            # Verify chain
            valid_chain = True
            for i in range(len(py_chunks) - 1):
                curr = py_chunks[i]
-                # py_chunks[i + 1]  # Unused variable removed
+                next_chunk = py_chunks[i + 1]
                expected_next = f"processor_{i+1}"
-                if getattr(curr, "next_chunk_id", None) != expected_next:
+                if getattr(curr, 'next_chunk_id', None) != expected_next:
                    valid_chain = False
                    break
-
+            
            if valid_chain:
                print("     [OK] Chunk navigation chain is valid")
            else:
                print("     [FAIL] Chunk navigation chain broken")
-
+        
        print("\n" + "=" * 60)
        print("INTEGRATION TEST COMPLETED")
        print("=" * 60)


 if __name__ == "__main__":
-    test_integration()
+    test_integration()
--- a/tests/test_smart_ranking.py
+++ b/tests/test_smart_ranking.py
@ -8,26 +8,26 @@ and producing better quality results.
 Run with: python3 tests/test_smart_ranking.py
 """

-import sys
 import unittest
-from datetime import datetime, timedelta
+import sys
 from pathlib import Path
-from unittest.mock import MagicMock, patch
-
-from mini_rag.search import CodeSearcher, SearchResult
+from datetime import datetime, timedelta
+from unittest.mock import patch, MagicMock

 # Add project to path
 sys.path.insert(0, str(Path(__file__).parent.parent))

+from mini_rag.search import SearchResult, CodeSearcher
+

 class TestSmartRanking(unittest.TestCase):
    """
    Test smart result re-ranking for better search quality.
-
+    
    These tests verify that important files, recent files, and
    well-structured content get appropriate boosts.
    """
-
+    
    def setUp(self):
        """Set up test results for ranking."""
        # Create mock search results with different characteristics
@ -40,31 +40,27 @@ class TestSmartRanking(unittest.TestCase):
                end_line=2,
                chunk_type="text",
                name="temp",
-                language="text",
+                language="text"
            ),
            SearchResult(
-                file_path=Path("README.md"),
-                content=(
-                    "This is a comprehensive README file\n"
-                    "with detailed installation instructions\n"
-                    "and usage examples for beginners."
-                ),
+                file_path=Path("README.md"), 
+                content="This is a comprehensive README file\nwith detailed installation instructions\nand usage examples for beginners.",
                score=0.7,  # Lower initial score
                start_line=1,
                end_line=5,
                chunk_type="markdown",
                name="Installation Guide",
-                language="markdown",
+                language="markdown"
            ),
            SearchResult(
                file_path=Path("src/main.py"),
-                content='def main():\n    """Main application entry point."""\n    app = create_app()\n    return app.run()',
+                content="def main():\n    \"\"\"Main application entry point.\"\"\"\n    app = create_app()\n    return app.run()",
                score=0.75,
                start_line=10,
                end_line=15,
                chunk_type="function",
                name="main",
-                language="python",
+                language="python"
            ),
            SearchResult(
                file_path=Path("temp/cache_123.log"),
@ -72,123 +68,123 @@ class TestSmartRanking(unittest.TestCase):
                score=0.85,
                start_line=1,
                end_line=1,
-                chunk_type="text",
+                chunk_type="text", 
                name="log",
-                language="text",
-            ),
+                language="text"
+            )
        ]
-
+    
    def test_01_important_file_boost(self):
        """
        ✅ Test that important files get ranking boosts.
-
+        
        README files, main files, config files, etc. should be
        ranked higher than random temporary files.
        """
        print("\n📈 Testing important file boost...")
-
+        
        # Create a minimal CodeSearcher to test ranking
        searcher = MagicMock()
        searcher._smart_rerank = CodeSearcher._smart_rerank.__get__(searcher)
-
+        
        # Test re-ranking
        ranked = searcher._smart_rerank(self.mock_results.copy())
-
+        
        # Find README and temp file results
-        readme_result = next((r for r in ranked if "README" in str(r.file_path)), None)
-        temp_result = next((r for r in ranked if "temp" in str(r.file_path)), None)
-
+        readme_result = next((r for r in ranked if 'README' in str(r.file_path)), None)
+        temp_result = next((r for r in ranked if 'temp' in str(r.file_path)), None)
+        
        self.assertIsNotNone(readme_result)
        self.assertIsNotNone(temp_result)
-
+        
        # README should be boosted (original 0.7 * 1.2 = 0.84)
        self.assertGreater(readme_result.score, 0.8)
-
+        
        # README should now rank higher than the temp file
        readme_index = ranked.index(readme_result)
        temp_index = ranked.index(temp_result)
        self.assertLess(readme_index, temp_index)
-
+        
        print(f"   ✅ README boosted from 0.7 to {readme_result.score:.3f}")
        print(f"   📊 README now ranks #{readme_index + 1}, temp file ranks #{temp_index + 1}")
-
+    
    def test_02_content_quality_boost(self):
        """
        ✅ Test that well-structured content gets boosts.
-
+        
        Content with multiple lines and good structure should
        rank higher than very short snippets.
        """
        print("\n📝 Testing content quality boost...")
-
+        
        searcher = MagicMock()
        searcher._smart_rerank = CodeSearcher._smart_rerank.__get__(searcher)
-
+        
        ranked = searcher._smart_rerank(self.mock_results.copy())
-
+        
        # Find short and long content results
        short_result = next((r for r in ranked if len(r.content.strip()) < 20), None)
-        structured_result = next((r for r in ranked if "README" in str(r.file_path)), None)
-
+        structured_result = next((r for r in ranked if 'README' in str(r.file_path)), None)
+        
        if short_result:
            # Short content should be penalized (score * 0.9)
            print(f"   📉 Short content penalized: {short_result.score:.3f}")
            # Original was likely reduced
-
+        
        if structured_result:
            # Well-structured content gets small boost (score * 1.02)
-            lines = structured_result.content.strip().split("\n")
+            lines = structured_result.content.strip().split('\n')
            if len(lines) >= 3:
                print(f"   📈 Structured content boosted: {structured_result.score:.3f}")
                print(f"      ({len(lines)} lines of content)")
-
+        
        self.assertTrue(True)  # Test passes if no exceptions
-
+    
    def test_03_chunk_type_relevance(self):
        """
        ✅ Test that relevant chunk types get appropriate boosts.
-
+        
        Functions, classes, and documentation should be ranked
        higher than random text snippets.
        """
        print("\n🏷️  Testing chunk type relevance...")
-
+        
        searcher = MagicMock()
        searcher._smart_rerank = CodeSearcher._smart_rerank.__get__(searcher)
-
+        
        ranked = searcher._smart_rerank(self.mock_results.copy())
-
+        
        # Find function result
-        function_result = next((r for r in ranked if r.chunk_type == "function"), None)
-
+        function_result = next((r for r in ranked if r.chunk_type == 'function'), None)
+        
        if function_result:
            # Function should get boost (original score * 1.1)
            print(f"   ✅ Function chunk boosted: {function_result.score:.3f}")
            print(f"      Function: {function_result.name}")
-
+            
            # Should rank well compared to original score
            original_score = 0.75
            self.assertGreater(function_result.score, original_score)
-
+        
        self.assertTrue(True)
-
-    @patch("pathlib.Path.stat")
+    
+    @patch('pathlib.Path.stat')
    def test_04_recency_boost(self, mock_stat):
        """
        ✅ Test that recently modified files get ranking boosts.
-
+        
        Files modified in the last week should rank higher than
        very old files.
        """
        print("\n⏰ Testing recency boost...")
-
+        
        # Mock file stats for different modification times
        now = datetime.now()
-
+        
        def mock_stat_side_effect(file_path):
            mock_stat_obj = MagicMock()
-
-            if "README" in str(file_path):
+            
+            if 'README' in str(file_path):
                # Recent file (2 days ago)
                recent_time = (now - timedelta(days=2)).timestamp()
                mock_stat_obj.st_mtime = recent_time
@ -196,102 +192,98 @@ class TestSmartRanking(unittest.TestCase):
                # Old file (2 months ago)
                old_time = (now - timedelta(days=60)).timestamp()
                mock_stat_obj.st_mtime = old_time
-
+                
            return mock_stat_obj
-
+        
        # Apply mock to Path.stat for each result
        mock_stat.side_effect = lambda: mock_stat_side_effect("dummy")
-
+        
        # Patch the Path constructor to return mocked paths
-        with patch.object(Path, "stat", side_effect=mock_stat_side_effect):
+        with patch.object(Path, 'stat', side_effect=mock_stat_side_effect):
            searcher = MagicMock()
            searcher._smart_rerank = CodeSearcher._smart_rerank.__get__(searcher)
-
+            
            ranked = searcher._smart_rerank(self.mock_results.copy())
-
-            readme_result = next((r for r in ranked if "README" in str(r.file_path)), None)
-
+            
+            readme_result = next((r for r in ranked if 'README' in str(r.file_path)), None)
+            
            if readme_result:
                # Recent file should get boost
                # Original 0.7 * 1.2 (important) * 1.1 (recent) * 1.02 (structured) ≈ 0.88
                print(f"   ✅ Recent file boosted: {readme_result.score:.3f}")
                self.assertGreater(readme_result.score, 0.8)
-
+            
        print("   📅 Recency boost system working!")
-
+    
    def test_05_overall_ranking_quality(self):
        """
        ✅ Test that overall ranking produces sensible results.
-
+        
        After all boosts and penalties, the ranking should make sense:
        - Important, recent, well-structured files should rank highest
        - Short, temporary, old files should rank lowest
        """
        print("\n🏆 Testing overall ranking quality...")
-
+        
        searcher = MagicMock()
        searcher._smart_rerank = CodeSearcher._smart_rerank.__get__(searcher)
-
+        
        # Test with original unsorted results
        unsorted = self.mock_results.copy()
        ranked = searcher._smart_rerank(unsorted)
-
+        
        print("   📊 Final ranking:")
        for i, result in enumerate(ranked, 1):
            file_name = Path(result.file_path).name
            print(f"      {i}. {file_name} (score: {result.score:.3f})")
-
+        
        # Quality checks:
        # 1. Results should be sorted by score (descending)
        scores = [r.score for r in ranked]
        self.assertEqual(scores, sorted(scores, reverse=True))
-
+        
        # 2. README should rank higher than temp files
-        readme_pos = next(
-            (i for i, r in enumerate(ranked) if "README" in str(r.file_path)), None
-        )
-        temp_pos = next((i for i, r in enumerate(ranked) if "temp" in str(r.file_path)), None)
-
+        readme_pos = next((i for i, r in enumerate(ranked) if 'README' in str(r.file_path)), None)
+        temp_pos = next((i for i, r in enumerate(ranked) if 'temp' in str(r.file_path)), None)
+        
        if readme_pos is not None and temp_pos is not None:
            self.assertLess(readme_pos, temp_pos)
            print(f"   ✅ README ranks #{readme_pos + 1}, temp file ranks #{temp_pos + 1}")
-
+        
        # 3. Function/code should rank well
-        function_pos = next(
-            (i for i, r in enumerate(ranked) if r.chunk_type == "function"), None
-        )
+        function_pos = next((i for i, r in enumerate(ranked) if r.chunk_type == 'function'), None)
        if function_pos is not None:
            self.assertLess(function_pos, len(ranked) // 2)  # Should be in top half
            print(f"   ✅ Function code ranks #{function_pos + 1}")
-
+        
        print("   🎯 Ranking quality looks good!")
-
+    
    def test_06_zero_overhead_verification(self):
        """
        ✅ Verify that smart ranking adds zero overhead.
-
+        
        The ranking should only use existing data and lightweight operations.
        No additional API calls or expensive operations.
        """
        print("\n⚡ Testing zero overhead...")
-
+        
        searcher = MagicMock()
        searcher._smart_rerank = CodeSearcher._smart_rerank.__get__(searcher)
-
+        
        import time
-
+        
        # Time the ranking operation
        start_time = time.time()
-        # searcher._smart_rerank(self.mock_results.copy())  # Unused variable removed
+        ranked = searcher._smart_rerank(self.mock_results.copy())
        end_time = time.time()
-
+        
        ranking_time = (end_time - start_time) * 1000  # Convert to milliseconds
-
+        
        print(f"   ⏱️  Ranking took {ranking_time:.2f}ms for {len(self.mock_results)} results")
-
+        
        # Should be very fast (< 10ms for small result sets)
        self.assertLess(ranking_time, 50)  # Very generous upper bound
-
+        
        # Verify no external calls were made (check that we only use existing data)
        # This is implicitly tested by the fact that we're using mock objects
        print("   ✅ Zero overhead verified - only uses existing result data!")
@ -305,18 +297,18 @@ def run_ranking_tests():
    print("=" * 40)
    print("Testing the zero-overhead ranking improvements.")
    print()
-
+    
    unittest.main(verbosity=2, exit=False)
-
+    
    print("\n" + "=" * 40)
    print("💡 Smart Ranking Features:")
    print("   • Important files (README, main, config) get 20% boost")
-    print("   • Recent files (< 1 week) get 10% boost")
+    print("   • Recent files (< 1 week) get 10% boost") 
    print("   • Functions/classes get 10% boost")
    print("   • Well-structured content gets 2% boost")
    print("   • Very short content gets 10% penalty")
    print("   • All boosts are cumulative for maximum quality")


-if __name__ == "__main__":
-    run_ranking_tests()
+if __name__ == '__main__':
+    run_ranking_tests()
--- a/tests/troubleshoot.py
+++ b/tests/troubleshoot.py
@ -8,22 +8,21 @@ and helps identify what's working and what needs attention.
 Run with: python3 tests/troubleshoot.py
 """

-import subprocess
 import sys
+import subprocess
 from pathlib import Path

 # Add project to path
 sys.path.insert(0, str(Path(__file__).parent.parent))

-
 def main():
    """Run comprehensive troubleshooting checks."""
-
+    
    print("🔧 FSS-Mini-RAG Troubleshooting Tool")
    print("=" * 50)
    print("This tool checks your setup and helps fix common issues.")
    print()
-
+    
    # Menu of available tests
    print("Available tests:")
    print("  1. Full Ollama Integration Test")
@ -31,21 +30,21 @@ def main():
    print("  3. Basic System Validation")
    print("  4. All Tests (recommended)")
    print()
-
+    
    choice = input("Select test (1-4, or Enter for all): ").strip()
-
+    
    if choice == "1" or choice == "" or choice == "4":
        print("\n" + "🤖 OLLAMA INTEGRATION TESTS".center(50, "="))
        run_test("test_ollama_integration.py")
-
+    
    if choice == "2" or choice == "" or choice == "4":
        print("\n" + "🧮 SMART RANKING TESTS".center(50, "="))
        run_test("test_smart_ranking.py")
-
+    
    if choice == "3" or choice == "" or choice == "4":
        print("\n" + "🔍 SYSTEM VALIDATION TESTS".center(50, "="))
        run_test("03_system_validation.py")
-
+    
    print("\n" + "✅ TROUBLESHOOTING COMPLETE".center(50, "="))
    print("💡 If you're still having issues:")
    print("   • Check docs/QUERY_EXPANSION.md for setup help")
@ -53,37 +52,35 @@ def main():
    print("   • Start Ollama server: ollama serve")
    print("   • Install models: ollama pull qwen3:4b")

-
 def run_test(test_file):
    """Run a specific test file."""
    test_path = Path(__file__).parent / test_file
-
+    
    if not test_path.exists():
        print(f"❌ Test file not found: {test_file}")
        return
-
+    
    try:
        # Run the test
-        result = subprocess.run(
-            [sys.executable, str(test_path)], capture_output=True, text=True, timeout=60
-        )
-
+        result = subprocess.run([
+            sys.executable, str(test_path)
+        ], capture_output=True, text=True, timeout=60)
+        
        # Show output
        if result.stdout:
            print(result.stdout)
        if result.stderr:
            print("STDERR:", result.stderr)
-
+            
        if result.returncode == 0:
            print(f"✅ {test_file} completed successfully!")
        else:
            print(f"⚠️  {test_file} had some issues (return code: {result.returncode})")
-
+            
    except subprocess.TimeoutExpired:
        print(f"⏰ {test_file} timed out after 60 seconds")
    except Exception as e:
        print(f"❌ Error running {test_file}: {e}")

-
 if __name__ == "__main__":
-    main()
+    main()