From a84ff94fba70342cd8a08d2364f3e9c8e3cd5cea Mon Sep 17 00:00:00 2001 From: BobAi Date: Fri, 15 Aug 2025 12:20:06 +1000 Subject: [PATCH] Improve UX with streaming tokens, fix model references, and add icon integration This comprehensive update enhances user experience with several key improvements: ## Enhanced Streaming & Thinking Display - Implement real-time streaming with gray thinking tokens that collapse after completion - Fix thinking token redisplay bug with proper content filtering - Add clear "AI Response:" headers to separate thinking from responses - Enable streaming by default for better user engagement - Keep thinking visible for exploration, collapse only for suggested questions ## Natural Conversation Responses - Convert clunky JSON exploration responses to natural, conversational format - Improve exploration prompts for friendly, colleague-style interactions - Update summary generation with better context handling - Eliminate double response display issues ## Model Reference Updates - Remove all llama3.2 references in favor of qwen3 models - Fix non-existent qwen3:3b references, replace with proper model names - Update model rankings to prioritize working qwen models across all components - Ensure consistent model recommendations in docs and examples ## Cross-Platform Icon Integration - Add desktop icon setup to Linux installer with .desktop entry - Add Windows shortcuts for desktop and Start Menu integration - Improve installer user experience with visual branding ## Configuration & Navigation Fixes - Fix "0" option in configuration menu to properly go back - Improve configuration menu user-friendliness - Update troubleshooting guides with correct model suggestions These changes significantly improve the beginner experience while maintaining technical accuracy and system reliability. --- docs/BEGINNER_GLOSSARY.md | 2 +- docs/CPU_DEPLOYMENT.md | 2 +- docs/LLM_PROVIDERS.md | 10 +- docs/QUERY_EXPANSION.md | 19 +- docs/TROUBLESHOOTING.md | 4 +- docs/TUI_GUIDE.md | 140 +++++-- examples/config-llm-providers.yaml | 6 +- examples/config-quality.yaml | 2 +- examples/config.yaml | 2 +- install_mini_rag.sh | 70 ++++ install_windows.bat | 385 ++++++++++++++---- mini_rag/config.py | 4 +- mini_rag/explorer.py | 123 ++---- mini_rag/llm_safeguards.py | 4 +- mini_rag/llm_synthesizer.py | 134 ++++++- mini_rag/query_expander.py | 4 +- rag-mini.py | 85 +++- rag-tui.py | 611 ++++++++++++++++++++++++----- 18 files changed, 1280 insertions(+), 327 deletions(-) diff --git a/docs/BEGINNER_GLOSSARY.md b/docs/BEGINNER_GLOSSARY.md index 24f24dc..ff8cf25 100644 --- a/docs/BEGINNER_GLOSSARY.md +++ b/docs/BEGINNER_GLOSSARY.md @@ -117,7 +117,7 @@ def login_user(email, password): **Models you might see:** - **qwen3:0.6b** - Ultra-fast, good for most questions -- **llama3.2** - Slower but more detailed +- **qwen3:4b** - Slower but more detailed - **auto** - Picks the best available model --- diff --git a/docs/CPU_DEPLOYMENT.md b/docs/CPU_DEPLOYMENT.md index 48458be..e533d4f 100644 --- a/docs/CPU_DEPLOYMENT.md +++ b/docs/CPU_DEPLOYMENT.md @@ -49,7 +49,7 @@ ollama run qwen3:0.6b "Hello, can you expand this query: authentication" |-------|------|-----------|---------| | qwen3:0.6b | 522MB | Fast ⚡ | Excellent ✅ | | qwen3:1.7b | 1.4GB | Medium | Excellent ✅ | -| qwen3:3b | 2.0GB | Slow | Excellent ✅ | +| qwen3:4b | 2.5GB | Slow | Excellent ✅ | ## CPU-Optimized Configuration diff --git a/docs/LLM_PROVIDERS.md b/docs/LLM_PROVIDERS.md index c0c68ff..ce6b9cc 100644 --- a/docs/LLM_PROVIDERS.md +++ b/docs/LLM_PROVIDERS.md @@ -22,8 +22,8 @@ This guide shows how to configure FSS-Mini-RAG with different LLM providers for llm: provider: ollama ollama_host: localhost:11434 - synthesis_model: llama3.2 - expansion_model: llama3.2 + synthesis_model: qwen3:1.7b + expansion_model: qwen3:1.7b enable_synthesis: false synthesis_temperature: 0.3 cpu_optimized: true @@ -33,13 +33,13 @@ llm: **Setup:** 1. Install Ollama: `curl -fsSL https://ollama.ai/install.sh | sh` 2. Start service: `ollama serve` -3. Download model: `ollama pull llama3.2` +3. Download model: `ollama pull qwen3:1.7b` 4. Test: `./rag-mini search /path/to/project "test" --synthesize` **Recommended Models:** - `qwen3:0.6b` - Ultra-fast, good for CPU-only systems -- `llama3.2` - Balanced quality and speed -- `llama3.1:8b` - Higher quality, needs more RAM +- `qwen3:1.7b` - Balanced quality and speed (recommended) +- `qwen3:4b` - Higher quality, excellent for most use cases ### LM Studio diff --git a/docs/QUERY_EXPANSION.md b/docs/QUERY_EXPANSION.md index d697e31..05cc795 100644 --- a/docs/QUERY_EXPANSION.md +++ b/docs/QUERY_EXPANSION.md @@ -34,7 +34,24 @@ graph LR ## Configuration -Edit `config.yaml`: +### Easy Configuration (TUI) + +Use the interactive Configuration Manager in the TUI: + +1. **Start TUI**: `./rag-tui` or `rag.bat` (Windows) +2. **Select Option 6**: Configuration Manager +3. **Choose Option 2**: Toggle query expansion +4. **Follow prompts**: Get explanation and easy on/off toggle + +The TUI will: +- Explain benefits and requirements clearly +- Check if Ollama is available +- Show current status (enabled/disabled) +- Save changes automatically + +### Manual Configuration (Advanced) + +Edit `config.yaml` directly: ```yaml # Search behavior settings diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md index 6ab3416..9b2607c 100644 --- a/docs/TROUBLESHOOTING.md +++ b/docs/TROUBLESHOOTING.md @@ -143,8 +143,8 @@ python3 -c "import mini_rag; print('✅ Installation successful')" 2. **Install a model:** ```bash - ollama pull qwen3:0.6b # Fast, small model - # Or: ollama pull llama3.2 # Larger but better + ollama pull qwen2.5:3b # Good balance of speed and quality + # Or: ollama pull qwen3:4b # Larger but better quality ``` 3. **Test connection:** diff --git a/docs/TUI_GUIDE.md b/docs/TUI_GUIDE.md index 96cf2a4..d58f32c 100644 --- a/docs/TUI_GUIDE.md +++ b/docs/TUI_GUIDE.md @@ -23,8 +23,9 @@ That's it! The TUI will guide you through everything. ### User Flow 1. **Select Project** → Choose directory to search 2. **Index Project** → Process files for search -3. **Search Content** → Find what you need -4. **Explore Results** → See full context and files +3. **Search Content** → Find what you need quickly +4. **Explore Project** → Interactive AI-powered discovery (NEW!) +5. **Configure System** → Customize search behavior ## Main Menu Options @@ -110,7 +111,63 @@ That's it! The TUI will guide you through everything. ./rag-mini-enhanced context /path/to/project "login()" ``` -### 4. View Status +### 4. Explore Project (NEW!) + +**Purpose**: Interactive AI-powered discovery with conversation memory + +**What Makes Explore Different**: +- **Conversational**: Ask follow-up questions that build on previous answers +- **AI Reasoning**: Uses thinking mode for deeper analysis and explanations +- **Educational**: Perfect for understanding unfamiliar codebases +- **Context Aware**: Remembers what you've already discussed + +**Interactive Process**: +1. **First Question Guidance**: Clear prompts with example questions +2. **Starter Suggestions**: Random helpful questions to get you going +3. **Natural Follow-ups**: Ask "why?", "how?", "show me more" naturally +4. **Session Memory**: AI remembers your conversation context + +**Explore Mode Features**: + +**Quick Start Options**: +- **Option 1 - Help**: Show example questions and explore mode capabilities +- **Option 2 - Status**: Project information and current exploration session +- **Option 3 - Suggest**: Get a random starter question picked from 7 curated examples + +**Starter Questions** (randomly suggested): +- "What are the main components of this project?" +- "How is error handling implemented?" +- "Show me the authentication and security logic" +- "What are the key functions I should understand first?" +- "How does data flow through this system?" +- "What configuration options are available?" +- "Show me the most important files to understand" + +**Advanced Usage**: +- **Deep Questions**: "Why is this function slow?" "How does the security work?" +- **Code Analysis**: "Explain this algorithm" "What could go wrong here?" +- **Architecture**: "How do these components interact?" "What's the design pattern?" +- **Best Practices**: "Is this code following best practices?" "How would you improve this?" + +**What You Learn**: +- **Conversational AI**: How to have productive technical conversations with AI +- **Code Understanding**: Deep analysis capabilities beyond simple search +- **Context Building**: How conversation memory improves over time +- **Question Techniques**: Effective ways to explore unfamiliar code + +**CLI Commands Shown**: +```bash +./rag-mini explore /path/to/project # Start interactive exploration +``` + +**Perfect For**: +- Understanding new codebases +- Code review and analysis +- Learning from existing projects +- Documenting complex systems +- Onboarding new team members + +### 5. View Status **Purpose**: Check system health and project information @@ -139,32 +196,61 @@ That's it! The TUI will guide you through everything. ./rag-mini status /path/to/project ``` -### 5. Configuration +### 6. Configuration Manager (ENHANCED!) -**Purpose**: View and understand system settings +**Purpose**: Interactive configuration with user-friendly options -**Configuration Display**: -- **Current settings** - Chunk size, strategy, file patterns -- **File location** - Where config is stored -- **Setting explanations** - What each option does -- **Quick actions** - View or edit config directly +**New Interactive Features**: +- **Live Configuration Dashboard** - See current settings with clear status +- **Quick Configuration Options** - Change common settings without YAML editing +- **Guided Setup** - Explanations and presets for each option +- **Validation** - Input checking and helpful error messages -**Key Settings Explained**: -- **chunking.max_size** - How large each searchable piece is -- **chunking.strategy** - Smart (semantic) vs simple (fixed size) -- **files.exclude_patterns** - Skip certain files/directories -- **embedding.preferred_method** - AI model preference -- **search.default_top_k** - How many results to show +**Main Configuration Options**: -**Interactive Options**: -- **[V]iew config** - See full configuration file -- **[E]dit path** - Get command to edit configuration +**1. Adjust Chunk Size**: +- **Presets**: Small (1000), Medium (2000), Large (3000), or custom +- **Guidance**: Performance vs accuracy explanations +- **Smart Validation**: Range checking and recommendations + +**2. Toggle Query Expansion**: +- **Educational Info**: Clear explanation of benefits and requirements +- **Easy Toggle**: Simple on/off with confirmation +- **System Check**: Verifies Ollama availability for AI features + +**3. Configure Search Behavior**: +- **Result Count**: Adjust default number of search results (1-100) +- **BM25 Toggle**: Enable/disable keyword matching boost +- **Similarity Threshold**: Fine-tune match sensitivity (0.0-1.0) + +**4. View/Edit Configuration File**: +- **Full File Viewer**: Display complete config with syntax highlighting +- **Editor Instructions**: Commands for nano, vim, VS Code +- **YAML Help**: Format explanation and editing tips + +**5. Reset to Defaults**: +- **Safe Reset**: Confirmation before resetting all settings +- **Clear Explanations**: Shows what defaults will be restored +- **Backup Reminder**: Suggests saving current config first + +**6. Advanced Settings**: +- **File Filtering**: Min file size, exclude patterns (view only) +- **Performance Settings**: Batch sizes, streaming thresholds +- **LLM Preferences**: Model rankings and selection priorities + +**Key Settings Dashboard**: +- 📁 **Chunk size**: 2000 characters (with emoji indicators) +- 🧠 **Chunking strategy**: semantic +- 🔍 **Search results**: 10 results +- 📊 **Embedding method**: ollama +- 🚀 **Query expansion**: enabled/disabled +- ⚡ **LLM synthesis**: enabled/disabled **What You Learn**: -- How configuration affects search quality -- YAML configuration format -- Which settings to adjust for different projects -- Where to find advanced options +- **Configuration Impact**: How settings affect search quality and speed +- **Interactive YAML**: Easier than manual editing for beginners +- **Best Practices**: Recommended settings for different project types +- **System Understanding**: How all components work together **CLI Commands Shown**: ```bash @@ -172,7 +258,13 @@ cat /path/to/project/.mini-rag/config.yaml # View config nano /path/to/project/.mini-rag/config.yaml # Edit config ``` -### 6. CLI Command Reference +**Perfect For**: +- Beginners who find YAML intimidating +- Quick adjustments without memorizing syntax +- Understanding what each setting actually does +- Safe experimentation with guided validation + +### 7. CLI Command Reference **Purpose**: Complete command reference for transitioning to CLI diff --git a/examples/config-llm-providers.yaml b/examples/config-llm-providers.yaml index 7d8dbfc..1413e12 100644 --- a/examples/config-llm-providers.yaml +++ b/examples/config-llm-providers.yaml @@ -68,9 +68,9 @@ search: llm: provider: ollama # Use local Ollama ollama_host: localhost:11434 # Default Ollama location - synthesis_model: llama3.2 # Good all-around model - # alternatives: qwen3:0.6b (faster), llama3.2:3b (balanced), llama3.1:8b (quality) - expansion_model: llama3.2 + synthesis_model: qwen3:1.7b # Good all-around model + # alternatives: qwen3:0.6b (faster), qwen2.5:3b (balanced), qwen3:4b (quality) + expansion_model: qwen3:1.7b enable_synthesis: false synthesis_temperature: 0.3 cpu_optimized: true diff --git a/examples/config-quality.yaml b/examples/config-quality.yaml index 844f121..b394733 100644 --- a/examples/config-quality.yaml +++ b/examples/config-quality.yaml @@ -102,7 +102,7 @@ llm: # For even better results, try these model combinations: # • ollama pull nomic-embed-text:latest (best embeddings) # • ollama pull qwen3:1.7b (good general model) -# • ollama pull llama3.2 (excellent for analysis) +# • ollama pull qwen3:4b (excellent for analysis) # # Or adjust these settings for your specific needs: # • similarity_threshold: 0.3 (more selective results) diff --git a/examples/config.yaml b/examples/config.yaml index 1c80d79..8cfef52 100644 --- a/examples/config.yaml +++ b/examples/config.yaml @@ -112,7 +112,7 @@ llm: synthesis_model: auto # Which AI model to use for explanations # 'auto': Picks best available model - RECOMMENDED # 'qwen3:0.6b': Ultra-fast, good for CPU-only computers - # 'llama3.2': Slower but more detailed explanations + # 'qwen3:4b': Slower but more detailed explanations expansion_model: auto # Model for query expansion (usually same as synthesis) diff --git a/install_mini_rag.sh b/install_mini_rag.sh index 4414ad7..0e1f838 100755 --- a/install_mini_rag.sh +++ b/install_mini_rag.sh @@ -462,6 +462,73 @@ install_dependencies() { fi } +# Setup application icon for desktop integration +setup_desktop_icon() { + print_header "Setting Up Desktop Integration" + + # Check if we're in a GUI environment + if [ -z "$DISPLAY" ] && [ -z "$WAYLAND_DISPLAY" ]; then + print_info "No GUI environment detected - skipping desktop integration" + return 0 + fi + + local icon_source="$SCRIPT_DIR/assets/Fss_Mini_Rag.png" + local desktop_dir="$HOME/.local/share/applications" + local icon_dir="$HOME/.local/share/icons" + + # Check if icon file exists + if [ ! -f "$icon_source" ]; then + print_warning "Icon file not found at $icon_source" + return 1 + fi + + # Create directories if needed + mkdir -p "$desktop_dir" "$icon_dir" 2>/dev/null + + # Copy icon to standard location + local icon_dest="$icon_dir/fss-mini-rag.png" + if cp "$icon_source" "$icon_dest" 2>/dev/null; then + print_success "Icon installed to $icon_dest" + else + print_warning "Could not install icon (permissions?)" + return 1 + fi + + # Create desktop entry + local desktop_file="$desktop_dir/fss-mini-rag.desktop" + cat > "$desktop_file" << EOF +[Desktop Entry] +Name=FSS-Mini-RAG +Comment=Fast Semantic Search for Code and Documents +Exec=$SCRIPT_DIR/rag-tui +Icon=fss-mini-rag +Terminal=true +Type=Application +Categories=Development;Utility;TextEditor; +Keywords=search;code;rag;semantic;ai; +StartupNotify=true +EOF + + if [ -f "$desktop_file" ]; then + chmod +x "$desktop_file" + print_success "Desktop entry created" + + # Update desktop database if available + if command_exists update-desktop-database; then + update-desktop-database "$desktop_dir" 2>/dev/null + print_info "Desktop database updated" + fi + + print_info "✨ FSS-Mini-RAG should now appear in your application menu!" + print_info " Look for it in Development or Utility categories" + else + print_warning "Could not create desktop entry" + return 1 + fi + + return 0 +} + # Setup ML models based on configuration setup_ml_models() { if [ "$INSTALL_TYPE" != "full" ]; then @@ -794,6 +861,9 @@ main() { fi setup_ml_models + # Setup desktop integration with icon + setup_desktop_icon + if test_installation; then show_completion else diff --git a/install_windows.bat b/install_windows.bat index 20db0cb..81b20aa 100644 --- a/install_windows.bat +++ b/install_windows.bat @@ -1,124 +1,343 @@ @echo off -REM FSS-Mini-RAG Windows Installer - Simple & Reliable +REM FSS-Mini-RAG Windows Installer - Beautiful & Comprehensive +setlocal enabledelayedexpansion + +REM Enable colors and unicode for modern Windows +chcp 65001 >nul 2>&1 echo. -echo =================================================== -echo FSS-Mini-RAG Windows Setup -echo =================================================== +echo ╔══════════════════════════════════════════════════╗ +echo ║ FSS-Mini-RAG Windows Installer ║ +echo ║ Fast Semantic Search for Code ║ +echo ╚══════════════════════════════════════════════════╝ echo. +echo 🚀 Comprehensive installation process: +echo • Python environment setup and validation +echo • Smart dependency management +echo • Optional AI model downloads (with your consent) +echo • System testing and verification +echo • Interactive tutorial (optional) +echo. +echo 💡 Note: You'll be asked before downloading any models +echo. + +set /p "continue=Begin installation? [Y/n]: " +if /i "!continue!"=="n" ( + echo Installation cancelled. + pause + exit /b 0 +) REM Get script directory set "SCRIPT_DIR=%~dp0" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%" -echo [1/4] Checking Python... +echo. +echo ══════════════════════════════════════════════════ +echo [1/5] Checking Python Environment... python --version >nul 2>&1 if errorlevel 1 ( - echo ERROR: Python not found! + echo ❌ ERROR: Python not found! echo. - echo Please install Python from: https://python.org/downloads - echo Make sure to check "Add Python to PATH" during installation + echo 📦 Please install Python from: https://python.org/downloads + echo 🔧 Installation requirements: + echo • Python 3.8 or higher + echo • Make sure to check "Add Python to PATH" during installation + echo • Restart your command prompt after installation + echo. + echo 💡 Quick install options: + echo • Download from python.org (recommended) + echo • Or use: winget install Python.Python.3.11 + echo • Or use: choco install python311 echo. pause exit /b 1 ) for /f "tokens=2" %%i in ('python --version 2^>^&1') do set "PYTHON_VERSION=%%i" -echo Found Python %PYTHON_VERSION% +echo ✅ Found Python !PYTHON_VERSION! + +REM Check Python version (basic check for 3.x) +for /f "tokens=1 delims=." %%a in ("!PYTHON_VERSION!") do set "MAJOR_VERSION=%%a" +if !MAJOR_VERSION! LSS 3 ( + echo ❌ ERROR: Python !PYTHON_VERSION! found, but Python 3.8+ required + echo 📦 Please upgrade Python to 3.8 or higher + pause + exit /b 1 +) echo. -echo [2/4] Creating virtual environment... +echo ══════════════════════════════════════════════════ +echo [2/5] Creating Python Virtual Environment... if exist "%SCRIPT_DIR%\.venv" ( - echo Removing old virtual environment... + echo 🔄 Removing old virtual environment... rmdir /s /q "%SCRIPT_DIR%\.venv" 2>nul -) - -python -m venv "%SCRIPT_DIR%\.venv" -if errorlevel 1 ( - echo ERROR: Failed to create virtual environment - pause - exit /b 1 -) -echo Virtual environment created successfully - -echo. -echo [3/4] Installing dependencies... -echo This may take a few minutes... -call "%SCRIPT_DIR%\.venv\Scripts\activate.bat" -"%SCRIPT_DIR%\.venv\Scripts\python.exe" -m pip install --upgrade pip --quiet -"%SCRIPT_DIR%\.venv\Scripts\pip.exe" install -r "%SCRIPT_DIR%\requirements.txt" -if errorlevel 1 ( - echo ERROR: Failed to install dependencies - pause - exit /b 1 -) -echo Dependencies installed successfully - -echo. -echo [4/4] Testing installation... -"%SCRIPT_DIR%\.venv\Scripts\python.exe" -c "from mini_rag import CodeEmbedder; print('Import test: OK')" 2>nul -if errorlevel 1 ( - echo ERROR: Installation test failed - pause - exit /b 1 -) - -echo. -echo =================================================== -echo INSTALLATION SUCCESSFUL! -echo =================================================== -echo. -echo Quick start: -echo rag.bat - Interactive interface -echo rag.bat help - Show all commands -echo. - -REM Check for Ollama and offer model setup -call :check_ollama - -echo. -echo Setup complete! FSS-Mini-RAG is ready to use. -set /p choice="Press Enter to continue or 'test' to run quick test: " -if /i "%choice%"=="test" ( - echo. - echo Running quick test... - call "%SCRIPT_DIR%\.venv\Scripts\activate.bat" - "%SCRIPT_DIR%\.venv\Scripts\python.exe" rag-mini.py index . --force - if not errorlevel 1 ( - "%SCRIPT_DIR%\.venv\Scripts\python.exe" rag-mini.py search . "embedding" --top-k 3 + if exist "%SCRIPT_DIR%\.venv" ( + echo ⚠️ Could not remove old environment, creating anyway... ) ) +echo 📁 Creating fresh virtual environment... +python -m venv "%SCRIPT_DIR%\.venv" +if errorlevel 1 ( + echo ❌ ERROR: Failed to create virtual environment + echo. + echo 🔧 This might be because: + echo • Python venv module is not installed + echo • Insufficient permissions + echo • Path contains special characters + echo. + echo 💡 Try: python -m pip install --user virtualenv + pause + exit /b 1 +) +echo ✅ Virtual environment created successfully + +echo. +echo ══════════════════════════════════════════════════ +echo [3/5] Installing Python Dependencies... +echo 📦 This may take 2-3 minutes depending on your internet speed... +echo. + +call "%SCRIPT_DIR%\.venv\Scripts\activate.bat" +if errorlevel 1 ( + echo ❌ ERROR: Could not activate virtual environment + pause + exit /b 1 +) + +echo 🔧 Upgrading pip... +"%SCRIPT_DIR%\.venv\Scripts\python.exe" -m pip install --upgrade pip --quiet +if errorlevel 1 ( + echo ⚠️ Warning: Could not upgrade pip, continuing anyway... +) + +echo 📚 Installing core dependencies (lancedb, pandas, numpy, etc.)... +echo This provides semantic search capabilities +"%SCRIPT_DIR%\.venv\Scripts\pip.exe" install -r "%SCRIPT_DIR%\requirements.txt" +if errorlevel 1 ( + echo ❌ ERROR: Failed to install dependencies + echo. + echo 🔧 Possible solutions: + echo • Check internet connection + echo • Try running as administrator + echo • Check if antivirus is blocking pip + echo • Manually run: pip install -r requirements.txt + echo. + pause + exit /b 1 +) +echo ✅ Dependencies installed successfully + +echo. +echo ══════════════════════════════════════════════════ +echo [4/5] Testing Installation... +echo 🧪 Verifying Python imports... +"%SCRIPT_DIR%\.venv\Scripts\python.exe" -c "from mini_rag import CodeEmbedder, ProjectIndexer, CodeSearcher; print('✅ Core imports successful')" 2>nul +if errorlevel 1 ( + echo ❌ ERROR: Installation test failed + echo. + echo 🔧 This usually means: + echo • Dependencies didn't install correctly + echo • Virtual environment is corrupted + echo • Python path issues + echo. + echo 💡 Try running: pip install -r requirements.txt + pause + exit /b 1 +) + +echo 🔍 Testing embedding system... +"%SCRIPT_DIR%\.venv\Scripts\python.exe" -c "from mini_rag import CodeEmbedder; embedder = CodeEmbedder(); info = embedder.get_embedding_info(); print(f'✅ Embedding method: {info[\"method\"]}')" 2>nul +if errorlevel 1 ( + echo ⚠️ Warning: Embedding test inconclusive, but core system is ready +) + +echo. +echo ══════════════════════════════════════════════════ +echo [5/6] Setting Up Desktop Integration... +call :setup_windows_icon + +echo. +echo ══════════════════════════════════════════════════ +echo [6/6] Checking AI Features (Optional)... +call :check_ollama_enhanced + +echo. +echo ╔══════════════════════════════════════════════════╗ +echo ║ INSTALLATION SUCCESSFUL! ║ +echo ╚══════════════════════════════════════════════════╝ +echo. +echo 🎯 Quick Start Options: +echo. +echo 🎨 For Beginners (Recommended): +echo rag.bat - Interactive interface with guided setup +echo. +echo 💻 For Developers: +echo rag.bat index C:\myproject - Index a project +echo rag.bat search C:\myproject "authentication" - Search project +echo rag.bat help - Show all commands +echo. + +REM Offer interactive tutorial +echo 🧪 Quick Test Available: +echo Test FSS-Mini-RAG with a small sample project (takes ~30 seconds) +echo. +set /p "run_test=Run interactive tutorial now? [Y/n]: " +if /i "!run_test!" NEQ "n" ( + call :run_tutorial +) else ( + echo 📚 You can run the tutorial anytime with: rag.bat +) + +echo. +echo 🎉 Setup complete! FSS-Mini-RAG is ready to use. +echo 💡 Pro tip: Try indexing any folder with text files - code, docs, notes! echo. pause exit /b 0 -:check_ollama +:check_ollama_enhanced +echo 🤖 Checking for AI capabilities... echo. -echo Checking for AI features... -REM Simple Ollama check +REM Check if Ollama is installed +where ollama >nul 2>&1 +if errorlevel 1 ( + echo ⚠️ Ollama not installed - using basic search mode + echo. + echo 🎯 For Enhanced AI Features: + echo • 📥 Install Ollama: https://ollama.com/download + echo • 🔄 Run: ollama serve + echo • 🧠 Download model: ollama pull qwen3:1.7b + echo. + echo 💡 Benefits of AI features: + echo • Smart query expansion for better search results + echo • Interactive exploration mode with conversation memory + echo • AI-powered synthesis of search results + echo • Natural language understanding of your questions + echo. + goto :eof +) + +REM Check if Ollama server is running curl -s http://localhost:11434/api/version >nul 2>&1 if errorlevel 1 ( - echo Ollama not detected - basic search mode available + echo 🟡 Ollama installed but not running echo. - echo For AI features (synthesis, exploration): - echo 1. Install Ollama: https://ollama.com/download - echo 2. Run: ollama serve - echo 3. Run: ollama pull qwen3:1.7b - return + set /p "start_ollama=Start Ollama server now? [Y/n]: " + if /i "!start_ollama!" NEQ "n" ( + echo 🚀 Starting Ollama server... + start /b ollama serve + timeout /t 3 /nobreak >nul + curl -s http://localhost:11434/api/version >nul 2>&1 + if errorlevel 1 ( + echo ⚠️ Could not start Ollama automatically + echo 💡 Please run: ollama serve + ) else ( + echo ✅ Ollama server started successfully! + ) + ) +) else ( + echo ✅ Ollama server is running! ) -echo Ollama detected! - -REM Check for any LLM models +REM Check for available models +echo 🔍 Checking for AI models... ollama list 2>nul | findstr /v "NAME" | findstr /v "^$" >nul if errorlevel 1 ( - echo No LLM models found + echo 📦 No AI models found echo. - echo Recommended: ollama pull qwen3:1.7b - echo This enables AI synthesis and exploration features + echo 🧠 Recommended Models (choose one): + echo • qwen3:1.7b - Excellent for RAG (1.4GB, recommended) + echo • qwen3:0.6b - Lightweight and fast (~500MB) + echo • qwen3:4b - Higher quality but slower (~2.5GB) + echo. + set /p "install_model=Download qwen3:1.7b model now? [Y/n]: " + if /i "!install_model!" NEQ "n" ( + echo 📥 Downloading qwen3:1.7b model... + echo This may take 5-10 minutes depending on your internet speed + ollama pull qwen3:1.7b + if errorlevel 1 ( + echo ⚠️ Download failed - you can try again later with: ollama pull qwen3:1.7b + ) else ( + echo ✅ Model downloaded successfully! AI features are now available. + ) + ) ) else ( - echo LLM models found - AI features available! + echo ✅ AI models found - full AI features available! + echo 🎉 Your system supports query expansion, exploration mode, and synthesis! ) -return \ No newline at end of file +goto :eof + +:run_tutorial +echo. +echo ═══════════════════════════════════════════════════ +echo 🧪 Running Interactive Tutorial +echo ═══════════════════════════════════════════════════ +echo. +echo 📚 This tutorial will: +echo • Index the FSS-Mini-RAG documentation +echo • Show you how to search effectively +echo • Demonstrate AI features (if available) +echo. + +call "%SCRIPT_DIR%\.venv\Scripts\activate.bat" + +echo 📁 Indexing project for demonstration... +"%SCRIPT_DIR%\.venv\Scripts\python.exe" rag-mini.py index "%SCRIPT_DIR%" >nul 2>&1 +if errorlevel 1 ( + echo ❌ Indexing failed - please check the installation + goto :eof +) + +echo ✅ Indexing complete! +echo. +echo 🔍 Example search: "embedding" +"%SCRIPT_DIR%\.venv\Scripts\python.exe" rag-mini.py search "%SCRIPT_DIR%" "embedding" --top-k 3 +echo. +echo 🎯 Try the interactive interface: +echo rag.bat +echo. +echo 💡 You can now search any project by indexing it first! +goto :eof + +:setup_windows_icon +echo 🎨 Setting up application icon and shortcuts... + +REM Check if icon exists +if not exist "%SCRIPT_DIR%\assets\Fss_Mini_Rag.png" ( + echo ⚠️ Icon file not found - skipping desktop integration + goto :eof +) + +REM Create desktop shortcut +echo 📱 Creating desktop shortcut... +set "desktop=%USERPROFILE%\Desktop" +set "shortcut=%desktop%\FSS-Mini-RAG.lnk" + +REM Use PowerShell to create shortcut with icon +powershell -Command "& {$WshShell = New-Object -comObject WScript.Shell; $Shortcut = $WshShell.CreateShortcut('%shortcut%'); $Shortcut.TargetPath = '%SCRIPT_DIR%\rag.bat'; $Shortcut.WorkingDirectory = '%SCRIPT_DIR%'; $Shortcut.Description = 'FSS-Mini-RAG - Fast Semantic Search'; $Shortcut.Save()}" >nul 2>&1 + +if exist "%shortcut%" ( + echo ✅ Desktop shortcut created +) else ( + echo ⚠️ Could not create desktop shortcut +) + +REM Create Start Menu shortcut +echo 📂 Creating Start Menu entry... +set "startmenu=%APPDATA%\Microsoft\Windows\Start Menu\Programs" +set "startshortcut=%startmenu%\FSS-Mini-RAG.lnk" + +powershell -Command "& {$WshShell = New-Object -comObject WScript.Shell; $Shortcut = $WshShell.CreateShortcut('%startshortcut%'); $Shortcut.TargetPath = '%SCRIPT_DIR%\rag.bat'; $Shortcut.WorkingDirectory = '%SCRIPT_DIR%'; $Shortcut.Description = 'FSS-Mini-RAG - Fast Semantic Search'; $Shortcut.Save()}" >nul 2>&1 + +if exist "%startshortcut%" ( + echo ✅ Start Menu entry created +) else ( + echo ⚠️ Could not create Start Menu entry +) + +echo 💡 FSS-Mini-RAG shortcuts have been created on your Desktop and Start Menu +echo You can now launch the application from either location +goto :eof \ No newline at end of file diff --git a/mini_rag/config.py b/mini_rag/config.py index 81926ad..1eab053 100644 --- a/mini_rag/config.py +++ b/mini_rag/config.py @@ -104,9 +104,9 @@ class LLMConfig: # Recommended model (excellent quality but larger) "qwen3:4b", - # Common fallbacks (only include models we know exist) - "llama3.2:1b", + # Common fallbacks (prioritize Qwen models) "qwen2.5:1.5b", + "qwen2.5:3b", ] diff --git a/mini_rag/explorer.py b/mini_rag/explorer.py index 9e4c379..c95a7ec 100644 --- a/mini_rag/explorer.py +++ b/mini_rag/explorer.py @@ -115,12 +115,13 @@ class CodeExplorer: # Add to conversation history self.current_session.add_exchange(question, results, synthesis) - # Format response with exploration context - response = self._format_exploration_response( - question, synthesis, len(results), search_time, synthesis_time - ) + # Streaming already displayed the response + # Just return minimal status for caller + session_duration = time.time() - self.current_session.started_at + exchange_count = len(self.current_session.conversation_history) - return response + status = f"\n📊 Session: {session_duration/60:.1f}m | Question #{exchange_count} | Results: {len(results)} | Time: {search_time+synthesis_time:.1f}s" + return status def _build_contextual_prompt(self, question: str, results: List[Any]) -> str: """Build a prompt that includes conversation context.""" @@ -185,33 +186,22 @@ CURRENT QUESTION: "{question}" RELEVANT INFORMATION FOUND: {results_text} -Please provide a helpful analysis in JSON format: +Please provide a helpful, natural explanation that answers their question. Write as if you're having a friendly conversation with a colleague who's exploring this project. -{{ - "summary": "Clear explanation of what you found and how it answers their question", - "key_points": [ - "Most important insight from the information", - "Secondary important point or relationship", - "Third key point or practical consideration" - ], - "code_examples": [ - "Relevant example or pattern from the information", - "Another useful example or demonstration" - ], - "suggested_actions": [ - "Specific next step they could take", - "Additional exploration or investigation suggestion", - "Practical way to apply this information" - ], - "confidence": 0.85 -}} +Structure your response to include: +1. A clear explanation of what you found and how it answers their question +2. The most important insights from the information you discovered +3. Relevant examples or code patterns when helpful +4. Practical next steps they could take Guidelines: -- Be educational and break things down clearly +- Write in a conversational, friendly tone +- Be educational but not condescending - Reference specific files and information when helpful - Give practical, actionable suggestions -- Keep explanations beginner-friendly but not condescending -- Connect information to their question directly +- Connect everything back to their original question +- Use natural language, not structured formats +- Break complex topics into understandable pieces """ return prompt @@ -219,16 +209,12 @@ Guidelines: def _synthesize_with_context(self, prompt: str, results: List[Any]) -> SynthesisResult: """Synthesize results with full context and thinking.""" try: - # TEMPORARILY: Use simple non-streaming call to avoid flow issues - # TODO: Re-enable streaming once flow is stable - response = self.synthesizer._call_ollama(prompt, temperature=0.2, disable_thinking=False) + # Use streaming with thinking visible (don't collapse) + response = self.synthesizer._call_ollama(prompt, temperature=0.2, disable_thinking=False, use_streaming=True, collapse_thinking=False) thinking_stream = "" - # Display simple thinking indicator - if response and len(response) > 200: - print("\n💭 Analysis in progress...") - - # Don't display thinking stream again - keeping it simple for now + # Streaming already shows thinking and response + # No need for additional indicators if not response: return SynthesisResult( @@ -239,40 +225,14 @@ Guidelines: confidence=0.0 ) - # Parse the structured response - try: - # Extract JSON from response - start_idx = response.find('{') - end_idx = response.rfind('}') + 1 - if start_idx >= 0 and end_idx > start_idx: - json_str = response[start_idx:end_idx] - data = json.loads(json_str) - - return SynthesisResult( - summary=data.get('summary', 'Analysis completed'), - key_points=data.get('key_points', []), - code_examples=data.get('code_examples', []), - suggested_actions=data.get('suggested_actions', []), - confidence=float(data.get('confidence', 0.7)) - ) - else: - # Fallback: use raw response as summary - return SynthesisResult( - summary=response[:400] + '...' if len(response) > 400 else response, - key_points=[], - code_examples=[], - suggested_actions=[], - confidence=0.5 - ) - - except json.JSONDecodeError: - return SynthesisResult( - summary="Analysis completed but format parsing failed", - key_points=[], - code_examples=[], - suggested_actions=["Try rephrasing your question"], - confidence=0.3 - ) + # Use natural language response directly + return SynthesisResult( + summary=response.strip(), + key_points=[], # Not used with natural language responses + code_examples=[], # Not used with natural language responses + suggested_actions=[], # Not used with natural language responses + confidence=0.85 # High confidence for natural responses + ) except Exception as e: logger.error(f"Context synthesis failed: {e}") @@ -300,28 +260,11 @@ Guidelines: output.append("=" * 60) output.append("") - # Main analysis - output.append(f"📝 Analysis:") - output.append(f" {synthesis.summary}") + # Response was already displayed via streaming + # Just show completion status + output.append("✅ Analysis complete") + output.append("") output.append("") - - if synthesis.key_points: - output.append("🔍 Key Insights:") - for point in synthesis.key_points: - output.append(f" • {point}") - output.append("") - - if synthesis.code_examples: - output.append("💡 Code Examples:") - for example in synthesis.code_examples: - output.append(f" {example}") - output.append("") - - if synthesis.suggested_actions: - output.append("🎯 Next Steps:") - for action in synthesis.suggested_actions: - output.append(f" • {action}") - output.append("") # Confidence and context indicator confidence_emoji = "🟢" if synthesis.confidence > 0.7 else "🟡" if synthesis.confidence > 0.4 else "🔴" diff --git a/mini_rag/llm_safeguards.py b/mini_rag/llm_safeguards.py index eb0f8f2..2fc5238 100644 --- a/mini_rag/llm_safeguards.py +++ b/mini_rag/llm_safeguards.py @@ -195,7 +195,7 @@ class ModelRunawayDetector: • Try a more specific question • Break complex questions into smaller parts • Use exploration mode which handles context better: `rag-mini explore` -• Consider: A larger model (qwen3:1.7b or qwen3:3b) would help""" +• Consider: A larger model (qwen3:1.7b or qwen3:4b) would help""" def _explain_thinking_loop(self) -> str: return """🧠 The AI got caught in a "thinking loop" - overthinking the response. @@ -266,7 +266,7 @@ class ModelRunawayDetector: # Universal suggestions suggestions.extend([ - "Consider using a larger model if available (qwen3:1.7b or qwen3:3b)", + "Consider using a larger model if available (qwen3:1.7b or qwen3:4b)", "Check model status: `ollama list`" ]) diff --git a/mini_rag/llm_synthesizer.py b/mini_rag/llm_synthesizer.py index 0dcda93..b85056e 100644 --- a/mini_rag/llm_synthesizer.py +++ b/mini_rag/llm_synthesizer.py @@ -72,8 +72,8 @@ class LLMSynthesizer: else: # Fallback rankings if no config model_rankings = [ - "qwen3:1.7b", "qwen3:0.6b", "qwen3:4b", "llama3.2:1b", - "qwen2.5:1.5b", "qwen3:3b", "qwen2.5-coder:1.5b" + "qwen3:1.7b", "qwen3:0.6b", "qwen3:4b", "qwen2.5:3b", + "qwen2.5:1.5b", "qwen2.5-coder:1.5b" ] # Find first available model from our ranked list (exact matches first) @@ -119,7 +119,7 @@ class LLMSynthesizer: self._ensure_initialized() return len(self.available_models) > 0 - def _call_ollama(self, prompt: str, temperature: float = 0.3, disable_thinking: bool = False, use_streaming: bool = False) -> Optional[str]: + def _call_ollama(self, prompt: str, temperature: float = 0.3, disable_thinking: bool = False, use_streaming: bool = True, collapse_thinking: bool = True) -> Optional[str]: """Make a call to Ollama API with safeguards.""" start_time = time.time() @@ -181,9 +181,9 @@ class LLMSynthesizer: } } - # Handle streaming with early stopping + # Handle streaming with thinking display if use_streaming: - return self._handle_streaming_with_early_stop(payload, model_to_use, use_thinking, start_time) + return self._handle_streaming_with_thinking_display(payload, model_to_use, use_thinking, start_time, collapse_thinking) response = requests.post( f"{self.ollama_url}/api/generate", @@ -284,6 +284,130 @@ This is normal with smaller AI models and helps ensure you get quality responses This is normal with smaller AI models and helps ensure you get quality responses.""" + def _handle_streaming_with_thinking_display(self, payload: dict, model_name: str, use_thinking: bool, start_time: float, collapse_thinking: bool = True) -> Optional[str]: + """Handle streaming response with real-time thinking token display.""" + import json + import sys + + try: + response = requests.post( + f"{self.ollama_url}/api/generate", + json=payload, + stream=True, + timeout=65 + ) + + if response.status_code != 200: + logger.error(f"Ollama API error: {response.status_code}") + return None + + full_response = "" + thinking_content = "" + is_in_thinking = False + is_thinking_complete = False + thinking_lines_printed = 0 + + # ANSI escape codes for colors and cursor control + GRAY = '\033[90m' # Dark gray for thinking + LIGHT_GRAY = '\033[37m' # Light gray alternative + RESET = '\033[0m' # Reset color + CLEAR_LINE = '\033[2K' # Clear entire line + CURSOR_UP = '\033[A' # Move cursor up one line + + print(f"\n💭 {GRAY}Thinking...{RESET}", flush=True) + + for line in response.iter_lines(): + if line: + try: + chunk_data = json.loads(line.decode('utf-8')) + chunk_text = chunk_data.get('response', '') + + if chunk_text: + full_response += chunk_text + + # Handle thinking tokens + if use_thinking and '' in chunk_text: + is_in_thinking = True + chunk_text = chunk_text.replace('', '') + + if is_in_thinking and '' in chunk_text: + is_in_thinking = False + is_thinking_complete = True + chunk_text = chunk_text.replace('', '') + + if collapse_thinking: + # Clear thinking content and show completion + # Move cursor up to clear thinking lines + for _ in range(thinking_lines_printed + 1): + print(f"{CURSOR_UP}{CLEAR_LINE}", end='', flush=True) + + print(f"💭 {GRAY}Thinking complete ✓{RESET}", flush=True) + thinking_lines_printed = 0 + else: + # Keep thinking visible, just show completion + print(f"\n💭 {GRAY}Thinking complete ✓{RESET}", flush=True) + + print("🤖 AI Response:", flush=True) + continue + + # Display thinking content in gray with better formatting + if is_in_thinking and chunk_text.strip(): + thinking_content += chunk_text + + # Handle line breaks and word wrapping properly + if ' ' in chunk_text or '\n' in chunk_text or len(thinking_content) > 100: + # Split by sentences for better readability + sentences = thinking_content.replace('\n', ' ').split('. ') + + for sentence in sentences[:-1]: # Process complete sentences + sentence = sentence.strip() + if sentence: + # Word wrap long sentences + words = sentence.split() + line = "" + for word in words: + if len(line + " " + word) > 70: + if line: + print(f"{GRAY} {line.strip()}{RESET}", flush=True) + thinking_lines_printed += 1 + line = word + else: + line += " " + word if line else word + + if line.strip(): + print(f"{GRAY} {line.strip()}.{RESET}", flush=True) + thinking_lines_printed += 1 + + # Keep the last incomplete sentence for next iteration + thinking_content = sentences[-1] if sentences else "" + + # Display regular response content (skip any leftover thinking) + elif not is_in_thinking and is_thinking_complete and chunk_text.strip(): + # Filter out any remaining thinking tags that might leak through + clean_text = chunk_text + if '' in clean_text or '' in clean_text: + clean_text = clean_text.replace('', '').replace('', '') + + if clean_text.strip(): + print(clean_text, end='', flush=True) + + # Check if response is done + if chunk_data.get('done', False): + print() # Final newline + break + + except json.JSONDecodeError: + continue + except Exception as e: + logger.error(f"Error processing stream chunk: {e}") + continue + + return full_response + + except Exception as e: + logger.error(f"Streaming failed: {e}") + return None + def _handle_streaming_with_early_stop(self, payload: dict, model_name: str, use_thinking: bool, start_time: float) -> Optional[str]: """Handle streaming response with intelligent early stopping.""" import json diff --git a/mini_rag/query_expander.py b/mini_rag/query_expander.py index c2a8e44..95c00b0 100644 --- a/mini_rag/query_expander.py +++ b/mini_rag/query_expander.py @@ -170,8 +170,8 @@ Expanded query:""" # Use same model rankings as main synthesizer for consistency expansion_preferences = [ - "qwen3:1.7b", "qwen3:0.6b", "qwen3:4b", "llama3.2:1b", - "qwen2.5:1.5b", "qwen3:3b", "qwen2.5-coder:1.5b" + "qwen3:1.7b", "qwen3:0.6b", "qwen3:4b", "qwen2.5:3b", + "qwen2.5:1.5b", "qwen2.5-coder:1.5b" ] for preferred in expansion_preferences: diff --git a/rag-mini.py b/rag-mini.py index 4d7451e..51bb166 100644 --- a/rag-mini.py +++ b/rag-mini.py @@ -142,8 +142,8 @@ def search_project(project_path: Path, query: str, top_k: int = 10, synthesize: print(" • Search for file types: \"python class\" or \"javascript function\"") print() print("⚙️ Configuration adjustments:") - print(f" • Lower threshold: ./rag-mini search {project_path} \"{query}\" --threshold 0.05") - print(" • More results: add --top-k 20") + print(f" • Lower threshold: ./rag-mini search \"{project_path}\" \"{query}\" --threshold 0.05") + print(f" • More results: ./rag-mini search \"{project_path}\" \"{query}\" --top-k 20") print() print("📚 Need help? See: docs/TROUBLESHOOTING.md") return @@ -201,7 +201,7 @@ def search_project(project_path: Path, query: str, top_k: int = 10, synthesize: else: print("❌ LLM synthesis unavailable") print(" • Ensure Ollama is running: ollama serve") - print(" • Install a model: ollama pull llama3.2") + print(" • Install a model: ollama pull qwen3:1.7b") print(" • Check connection to http://localhost:11434") # Save last search for potential enhancements @@ -317,12 +317,27 @@ def explore_interactive(project_path: Path): if not explorer.start_exploration_session(): sys.exit(1) + # Show enhanced first-time guidance print(f"\n🤔 Ask your first question about {project_path.name}:") + print() + print("💡 Enter your search query or question below:") + print(' Examples: "How does authentication work?" or "Show me error handling"') + print() + print("🔧 Quick options:") + print(" 1. Help - Show example questions") + print(" 2. Status - Project information") + print(" 3. Suggest - Get a random starter question") + print() + + is_first_question = True while True: try: - # Get user input - question = input("\n> ").strip() + # Get user input with clearer prompt + if is_first_question: + question = input("📝 Enter question or option (1-3): ").strip() + else: + question = input("\n> ").strip() # Handle exit commands if question.lower() in ['quit', 'exit', 'q']: @@ -331,14 +346,17 @@ def explore_interactive(project_path: Path): # Handle empty input if not question: - print("Please enter a question or 'quit' to exit.") + if is_first_question: + print("Please enter a question or try option 3 for a suggestion.") + else: + print("Please enter a question or 'quit' to exit.") continue - # Special commands - if question.lower() in ['help', 'h']: + # Handle numbered options and special commands + if question in ['1'] or question.lower() in ['help', 'h']: print(""" 🧠 EXPLORATION MODE HELP: - • Ask any question about the codebase + • Ask any question about your documents or code • I remember our conversation for follow-up questions • Use 'why', 'how', 'explain' for detailed reasoning • Type 'summary' to see session overview @@ -346,11 +364,53 @@ def explore_interactive(project_path: Path): 💡 Example questions: • "How does authentication work?" + • "What are the main components?" + • "Show me error handling patterns" • "Why is this function slow?" - • "Explain the database connection logic" - • "What are the security concerns here?" + • "What security measures are in place?" + • "How does data flow through this system?" """) continue + + elif question in ['2'] or question.lower() == 'status': + print(f""" +📊 PROJECT STATUS: {project_path.name} + • Location: {project_path} + • Exploration session active + • AI model ready for questions + • Conversation memory enabled +""") + continue + + elif question in ['3'] or question.lower() == 'suggest': + # Random starter questions for first-time users + if is_first_question: + import random + starters = [ + "What are the main components of this project?", + "How is error handling implemented?", + "Show me the authentication and security logic", + "What are the key functions I should understand first?", + "How does data flow through this system?", + "What configuration options are available?", + "Show me the most important files to understand" + ] + suggested = random.choice(starters) + print(f"\n💡 Suggested question: {suggested}") + print(" Press Enter to use this, or type your own question:") + + next_input = input("📝 > ").strip() + if not next_input: # User pressed Enter to use suggestion + question = suggested + else: + question = next_input + else: + # For subsequent questions, could add AI-powered suggestions here + print("\n💡 Based on our conversation, you might want to ask:") + print(' "Can you explain that in more detail?"') + print(' "What are the security implications?"') + print(' "Show me related code examples"') + continue if question.lower() == 'summary': print("\n" + explorer.get_session_summary()) @@ -361,6 +421,9 @@ def explore_interactive(project_path: Path): print("🧠 Thinking with AI model...") response = explorer.explore_question(question) + # Mark as no longer first question after processing + is_first_question = False + if response: print(f"\n{response}") else: diff --git a/rag-tui.py b/rag-tui.py index aeba78a..5895310 100755 --- a/rag-tui.py +++ b/rag-tui.py @@ -916,28 +916,36 @@ class SimpleTUI: input("Press Enter to continue...") return - print("\n🤔 Ask questions about the codebase:") - print(" Quick: 0=quit, 1=summary, 2=history, 3=suggest next question") + # Show initial prompt + self._show_exploration_prompt(explorer, is_first=True) + is_first_question = True while True: try: - question = input("\n> ").strip() + question = input("➤ ").strip() # Handle numbered options if question == '0': print(explorer.end_session()) break elif question == '1': - print("\n" + explorer.get_session_summary()) + # Use improved summary function + summary = self._generate_conversation_summary(explorer) + print(f"\n{summary}") + self._show_exploration_prompt(explorer) continue elif question == '2': if hasattr(explorer.current_session, 'conversation_history') and explorer.current_session.conversation_history: - print("\n🔍 Recent questions:") - for i, exchange in enumerate(explorer.current_session.conversation_history[-3:], 1): - q = exchange["question"][:50] + "..." if len(exchange["question"]) > 50 else exchange["question"] - print(f" {i}. {q}") + print("\n📋 Recent Question History:") + print("═" * 40) + for i, exchange in enumerate(explorer.current_session.conversation_history[-5:], 1): + q = exchange["question"][:60] + "..." if len(exchange["question"]) > 60 else exchange["question"] + confidence = exchange["response"].get("confidence", 0) + print(f" {i}. {q} (confidence: {confidence:.0f}%)") + print() else: print("\n📝 No questions asked yet") + self._show_exploration_prompt(explorer) continue elif question == '3': # Generate smart suggestion @@ -945,13 +953,14 @@ class SimpleTUI: if suggested_question: print(f"\n💡 Suggested question: {suggested_question}") print(" Press Enter to use this, or type your own question:") - next_input = input("> ").strip() + next_input = input("➤ ").strip() if not next_input: # User pressed Enter to use suggestion question = suggested_question else: question = next_input else: print("\n💡 No suggestions available yet. Ask a question first!") + self._show_exploration_prompt(explorer) continue # Simple exit handling @@ -961,23 +970,31 @@ class SimpleTUI: # Skip empty input if not question: + print("💡 Please enter a question or choose an option (0-3)") continue # Simple help if question.lower() in ['help', 'h', '?']: - print("\n💡 Just ask any question about the codebase!") - print(" Examples: 'how does search work?' or 'explain the indexing'") - print(" Quick: 0=quit, 1=summary, 2=history, 3=suggest") + print("\n💡 Exploration Help:") + print(" • Just ask any question about the codebase!") + print(" • Examples: 'how does search work?' or 'explain the indexing'") + print(" • Use options 0-3 for quick actions") + self._show_exploration_prompt(explorer) continue - # Process the question immediately - print("🔍 Thinking...") + # Process the question with streaming + print("\n🔍 Starting analysis...") response = explorer.explore_question(question) if response: - print(f"\n{response}\n") + print(f"\n{response}") + is_first_question = False + # Show prompt for next question + self._show_exploration_prompt(explorer) else: - print("❌ Sorry, I couldn't process that question.\n") + print("❌ Sorry, I couldn't process that question.") + print("💡 Try rephrasing or using simpler terms.") + self._show_exploration_prompt(explorer) except KeyboardInterrupt: print(f"\n{explorer.end_session()}") @@ -994,10 +1011,173 @@ class SimpleTUI: # Exploration session completed successfully, return to menu without extra prompt + def _get_context_tokens_estimate(self, explorer): + """Estimate the total tokens used in the conversation context.""" + if not explorer.current_session or not explorer.current_session.conversation_history: + return 0 + + total_chars = 0 + for exchange in explorer.current_session.conversation_history: + total_chars += len(exchange["question"]) + # Estimate response character count (summary + key points) + response = exchange["response"] + total_chars += len(response.get("summary", "")) + for point in response.get("key_points", []): + total_chars += len(point) + + # Rough estimate: 4 characters = 1 token + return total_chars // 4 + + def _get_context_limit_estimate(self): + """Get estimated context limit for current model.""" + # Conservative estimates for common models + return 32000 # Most models we use have 32k context + + def _format_token_display(self, used_tokens, limit_tokens): + """Format token usage display with color coding.""" + percentage = (used_tokens / limit_tokens) * 100 if limit_tokens > 0 else 0 + + if percentage < 50: + color = "🟢" # Green - plenty of space + elif percentage < 75: + color = "🟡" # Yellow - getting full + else: + color = "🔴" # Red - almost full + + return f"{color} Context: {used_tokens}/{limit_tokens} tokens ({percentage:.0f}%)" + + def _show_exploration_prompt(self, explorer, is_first=False): + """Show standardized input prompt for exploration mode.""" + print() + print("═" * 60) + if is_first: + print("🤔 Ask your first question about the codebase:") + else: + print("🤔 What would you like to explore next?") + print() + + # Show context usage + used_tokens = self._get_context_tokens_estimate(explorer) + limit_tokens = self._get_context_limit_estimate() + token_display = self._format_token_display(used_tokens, limit_tokens) + print(f"📊 {token_display}") + print() + + print("🔧 Quick Options:") + print(" 0 = Quit exploration 1 = Summarize conversation") + print(" 2 = Show question history 3 = Suggest next question") + print() + print("💬 Enter your question or choose an option:") + + def _generate_conversation_summary(self, explorer): + """Generate a detailed summary of the conversation history.""" + if not explorer.current_session or not explorer.current_session.conversation_history: + return "📝 No conversation to summarize yet. Ask a question first!" + + try: + # Build conversation context + conversation_text = "" + for i, exchange in enumerate(explorer.current_session.conversation_history, 1): + conversation_text += f"Question {i}: {exchange['question']}\n" + conversation_text += f"Response {i}: {exchange['response']['summary']}\n" + # Add key points if available + if exchange['response'].get('key_points'): + for point in exchange['response']['key_points']: + conversation_text += f"- {point}\n" + conversation_text += "\n" + + # Determine summary length based on conversation length + char_count = len(conversation_text) + if char_count < 500: + target_length = "brief" + target_words = "50-80" + elif char_count < 2000: + target_length = "moderate" + target_words = "100-150" + else: + target_length = "comprehensive" + target_words = "200-300" + + # Create summary prompt for natural conversation style + prompt = f"""Please summarize this conversation about the project we've been exploring. Write a {target_length} summary ({target_words} words) in a natural, conversational style that captures: + +1. Main topics we explored together +2. Key insights we discovered +3. Important details we learned +4. Overall understanding we gained + +Conversation: +{conversation_text.strip()} + +Write your summary as if you're explaining to a colleague what we discussed. Use a friendly, informative tone and avoid JSON or structured formats.""" + + # Use the synthesizer to generate summary with streaming and thinking + print("\n💭 Generating summary...") + response = explorer.synthesizer._call_ollama(prompt, temperature=0.1, disable_thinking=False, use_streaming=True) + + if response: + return f"📋 **Conversation Summary**\n\n{response.strip()}" + else: + # Fallback summary + return self._generate_fallback_summary(explorer.current_session.conversation_history) + + except Exception as e: + logger.error(f"Summary generation failed: {e}") + return self._generate_fallback_summary(explorer.current_session.conversation_history) + + def _generate_fallback_summary(self, conversation_history): + """Generate a simple fallback summary when AI summary fails.""" + if not conversation_history: + return "📝 No conversation to summarize yet." + + question_count = len(conversation_history) + topics = [] + + # Extract simple topics from questions + for exchange in conversation_history: + question = exchange["question"].lower() + if "component" in question or "part" in question: + topics.append("system components") + elif "error" in question or "bug" in question: + topics.append("error handling") + elif "security" in question or "auth" in question: + topics.append("security/authentication") + elif "test" in question: + topics.append("testing") + elif "config" in question or "setting" in question: + topics.append("configuration") + elif "performance" in question or "speed" in question: + topics.append("performance") + else: + # Extract first few words as topic + words = question.split()[:3] + topics.append(" ".join(words)) + + unique_topics = list(dict.fromkeys(topics)) # Remove duplicates while preserving order + + summary = f"📋 **Conversation Summary**\n\n" + summary += f"Questions asked: {question_count}\n" + summary += f"Topics explored: {', '.join(unique_topics[:5])}\n" + summary += f"Session duration: {len(conversation_history) * 2} minutes (estimated)\n\n" + summary += "💡 Use option 2 to see recent question history for more details." + + return summary + def _generate_smart_suggestion(self, explorer): """Generate a smart follow-up question based on conversation context.""" if not explorer.current_session or not explorer.current_session.conversation_history: - return None + # First question - provide a random starter question + import random + starters = [ + "What are the main components of this project?", + "How is error handling implemented?", + "Show me the authentication and security logic", + "What are the key functions I should understand first?", + "How does data flow through this system?", + "What configuration options are available?", + "Show me the most important files to understand" + ] + return random.choice(starters) try: # Get recent conversation context @@ -1023,8 +1203,8 @@ Respond with ONLY a single short question that would logically explore deeper or Your suggested question (under 10 words):""" - # Use the synthesizer to generate suggestion - response = explorer.synthesizer._call_ollama(prompt, temperature=0.3, disable_thinking=True) + # Use the synthesizer to generate suggestion with thinking collapse + response = explorer.synthesizer._call_ollama(prompt, temperature=0.3, disable_thinking=False, use_streaming=True, collapse_thinking=True) if response: # Clean up the response - extract just the question @@ -1149,99 +1329,344 @@ Your suggested question (under 10 words):""" input("Press Enter to continue...") def show_configuration(self): - """Show and manage configuration options.""" + """Show and manage configuration options with interactive editing.""" if not self.project_path: print("❌ No project selected") input("Press Enter to continue...") return - self.clear_screen() - self.print_header() - - print("⚙️ Configuration") - print("================") - print() - print(f"Project: {self.project_path.name}") - print() - - config_path = self.project_path / '.mini-rag' / 'config.yaml' - - # Show current configuration if it exists - if config_path.exists(): - print("✅ Configuration file exists") - print(f" Location: {config_path}") + while True: + self.clear_screen() + self.print_header() + + print("⚙️ Configuration Manager") + print("========================") + print() + print(f"Project: {self.project_path.name}") print() + # Load current configuration try: - import yaml - with open(config_path) as f: - config = yaml.safe_load(f) + from mini_rag.config import ConfigManager + config_manager = ConfigManager(self.project_path) + config = config_manager.load_config() + config_path = self.project_path / '.mini-rag' / 'config.yaml' print("📋 Current Settings:") - if 'chunking' in config: - chunk_cfg = config['chunking'] - print(f" Chunk size: {chunk_cfg.get('max_size', 2000)} characters") - print(f" Strategy: {chunk_cfg.get('strategy', 'semantic')}") - - if 'embedding' in config: - emb_cfg = config['embedding'] - print(f" Embedding method: {emb_cfg.get('preferred_method', 'auto')}") - - if 'files' in config: - files_cfg = config['files'] - print(f" Min file size: {files_cfg.get('min_file_size', 50)} bytes") - exclude_count = len(files_cfg.get('exclude_patterns', [])) - print(f" Excluded patterns: {exclude_count} patterns") - + print(f" 📁 Chunk size: {config.chunking.max_size} characters") + print(f" 🧠 Chunking strategy: {config.chunking.strategy}") + print(f" 🔍 Search results: {config.search.default_top_k} results") + print(f" 📊 Embedding method: {config.embedding.preferred_method}") + print(f" 🚀 Query expansion: {'enabled' if config.search.expand_queries else 'disabled'}") + print(f" ⚡ LLM synthesis: {'enabled' if config.llm.enable_synthesis else 'disabled'}") print() + print("🛠️ Quick Configuration Options:") + print(" 1. Adjust chunk size (performance vs accuracy)") + print(" 2. Toggle query expansion (smarter searches)") + print(" 3. Configure search behavior") + print(" 4. View/edit full configuration file") + print(" 5. Reset to defaults") + print(" 6. Advanced settings") + print() + print(" V. View current config file") + print(" B. Back to main menu") + except Exception as e: - print(f"⚠️ Could not read config: {e}") + print(f"❌ Error loading configuration: {e}") + print(" A default config will be created when needed") print() - else: - print("⚠️ No configuration file found") - print(" A default config will be created when you index") + print(" B. Back to main menu") + print() - - # Show CLI commands for configuration - self.print_cli_command(f"cat {config_path}", - "View current configuration") - self.print_cli_command(f"nano {config_path}", - "Edit configuration file") - - print("🛠️ Configuration Options:") - print(" • chunking.max_size - How large each searchable chunk is") - print(" • chunking.strategy - 'semantic' (smart) vs 'fixed' (simple)") - print(" • files.exclude_patterns - Skip files matching these patterns") - print(" • embedding.preferred_method - 'ollama', 'ml', 'hash', or 'auto'") - print(" • search.default_top_k - Default number of search results (top-k)") + choice = input("Choose option: ").strip().lower() + + if choice == 'b' or choice == '' or choice == '0': + break + elif choice == 'v': + self._show_config_file(config_path) + elif choice == '1': + self._configure_chunk_size(config_manager, config) + elif choice == '2': + self._toggle_query_expansion(config_manager, config) + elif choice == '3': + self._configure_search_behavior(config_manager, config) + elif choice == '4': + self._edit_config_file(config_path) + elif choice == '5': + self._reset_config(config_manager) + elif choice == '6': + self._advanced_settings(config_manager, config) + else: + print("Invalid option. Press Enter to continue...") + input() + + def _show_config_file(self, config_path): + """Display the full configuration file.""" + self.clear_screen() + print("📄 Configuration File Contents") + print("=" * 50) print() - print("📚 References:") - print(" • README.md - Complete configuration documentation") - print(" • examples/config.yaml - Example with all options") - print(" • docs/TUI_GUIDE.md - Detailed TUI walkthrough") - - print() - - # Quick actions if config_path.exists(): - action = input("Quick actions: [V]iew config, [E]dit path, or Enter to continue: ").lower() - if action == 'v': - print("\n" + "="*60) - try: - with open(config_path) as f: - print(f.read()) - except Exception as e: - print(f"Could not read file: {e}") - print("="*60) - input("\nPress Enter to continue...") - elif action == 'e': - print(f"\n💡 To edit configuration:") - print(f" nano {config_path}") - print(f" # Or use your preferred editor") - input("\nPress Enter to continue...") + try: + with open(config_path) as f: + content = f.read() + print(content) + except Exception as e: + print(f"❌ Could not read file: {e}") else: + print("⚠️ Configuration file doesn't exist yet") + print(" It will be created when you first index a project") + + print("\n" + "=" * 50) + input("Press Enter to continue...") + + def _configure_chunk_size(self, config_manager, config): + """Interactive chunk size configuration.""" + self.clear_screen() + print("📁 Chunk Size Configuration") + print("===========================") + print() + print("Chunk size affects both performance and search accuracy:") + print("• Smaller chunks (500-1000): More precise but may miss context") + print("• Medium chunks (1500-2500): Good balance (recommended)") + print("• Larger chunks (3000+): More context but less precise") + print() + print(f"Current chunk size: {config.chunking.max_size} characters") + print() + + print("Quick presets:") + print(" 1. Small (1000) - Precise searching") + print(" 2. Medium (2000) - Balanced (default)") + print(" 3. Large (3000) - More context") + print(" 4. Custom size") + print() + + choice = input("Choose preset or enter custom size: ").strip() + + new_size = None + if choice == '1': + new_size = 1000 + elif choice == '2': + new_size = 2000 + elif choice == '3': + new_size = 3000 + elif choice == '4': + try: + new_size = int(input("Enter custom chunk size (500-5000): ")) + if new_size < 500 or new_size > 5000: + print("❌ Size must be between 500 and 5000") + input("Press Enter to continue...") + return + except ValueError: + print("❌ Invalid number") + input("Press Enter to continue...") + return + elif choice.isdigit(): + try: + new_size = int(choice) + if new_size < 500 or new_size > 5000: + print("❌ Size must be between 500 and 5000") + input("Press Enter to continue...") + return + except ValueError: + pass + + if new_size and new_size != config.chunking.max_size: + config.chunking.max_size = new_size + config_manager.save_config(config) + print(f"\n✅ Chunk size updated to {new_size} characters") + print("💡 Tip: Re-index your project for changes to take effect") + input("Press Enter to continue...") + + def _toggle_query_expansion(self, config_manager, config): + """Toggle query expansion on/off.""" + self.clear_screen() + print("🚀 Query Expansion Configuration") + print("================================") + print() + print("Query expansion automatically adds related terms to your searches") + print("to improve results quality. This uses an LLM to understand your") + print("intent and find related concepts.") + print() + print("Benefits:") + print("• Find relevant results even with different terminology") + print("• Better semantic understanding of queries") + print("• Improved search for complex technical concepts") + print() + print("Requirements:") + print("• Ollama with a language model (e.g., qwen3:1.7b)") + print("• Slightly slower search (1-2 seconds)") + print() + + current_status = "enabled" if config.search.expand_queries else "disabled" + print(f"Current status: {current_status}") + print() + + if config.search.expand_queries: + choice = input("Query expansion is currently ON. Turn OFF? [y/N]: ").lower() + if choice == 'y': + config.search.expand_queries = False + config_manager.save_config(config) + print("✅ Query expansion disabled") + else: + choice = input("Query expansion is currently OFF. Turn ON? [y/N]: ").lower() + if choice == 'y': + config.search.expand_queries = True + config_manager.save_config(config) + print("✅ Query expansion enabled") + print("💡 Make sure Ollama is running with a language model") + + input("\nPress Enter to continue...") + + def _configure_search_behavior(self, config_manager, config): + """Configure search behavior settings.""" + self.clear_screen() + print("🔍 Search Behavior Configuration") + print("================================") + print() + print(f"Current settings:") + print(f"• Default results: {config.search.default_top_k}") + print(f"• BM25 keyword boost: {'enabled' if config.search.enable_bm25 else 'disabled'}") + print(f"• Similarity threshold: {config.search.similarity_threshold}") + print() + + print("Configuration options:") + print(" 1. Change default number of results") + print(" 2. Toggle BM25 keyword matching") + print(" 3. Adjust similarity threshold") + print(" B. Back") + print() + + choice = input("Choose option: ").strip().lower() + + if choice == '1': + try: + new_top_k = int(input(f"Enter default number of results (current: {config.search.default_top_k}): ")) + if 1 <= new_top_k <= 100: + config.search.default_top_k = new_top_k + config_manager.save_config(config) + print(f"✅ Default results updated to {new_top_k}") + else: + print("❌ Number must be between 1 and 100") + except ValueError: + print("❌ Invalid number") + elif choice == '2': + config.search.enable_bm25 = not config.search.enable_bm25 + config_manager.save_config(config) + status = "enabled" if config.search.enable_bm25 else "disabled" + print(f"✅ BM25 keyword matching {status}") + elif choice == '3': + try: + new_threshold = float(input(f"Enter similarity threshold 0.0-1.0 (current: {config.search.similarity_threshold}): ")) + if 0.0 <= new_threshold <= 1.0: + config.search.similarity_threshold = new_threshold + config_manager.save_config(config) + print(f"✅ Similarity threshold updated to {new_threshold}") + else: + print("❌ Threshold must be between 0.0 and 1.0") + except ValueError: + print("❌ Invalid number") + + if choice != 'b' and choice != '': + input("Press Enter to continue...") + + def _edit_config_file(self, config_path): + """Provide instructions for editing the config file.""" + self.clear_screen() + print("📝 Edit Configuration File") + print("=========================") + print() + + if config_path.exists(): + print(f"Configuration file location:") + print(f" {config_path}") + print() + print("To edit the configuration:") + print(" • Use any text editor (nano, vim, VS Code, etc.)") + print(" • The file is in YAML format with helpful comments") + print(" • Changes take effect after saving") + print() + print("Quick edit commands:") + self.print_cli_command(f"nano {config_path}", "Edit with nano") + self.print_cli_command(f"code {config_path}", "Edit with VS Code") + self.print_cli_command(f"vim {config_path}", "Edit with vim") + else: + print("⚠️ Configuration file doesn't exist yet") + print(" It will be created automatically when you index a project") + + input("\nPress Enter to continue...") + + def _reset_config(self, config_manager): + """Reset configuration to defaults.""" + self.clear_screen() + print("🔄 Reset Configuration") + print("=====================") + print() + print("This will reset all settings to default values:") + print("• Chunk size: 2000 characters") + print("• Chunking strategy: semantic") + print("• Query expansion: disabled") + print("• Search results: 10") + print("• Embedding method: auto") + print() + + confirm = input("Are you sure you want to reset to defaults? [y/N]: ").lower() + if confirm == 'y': + from mini_rag.config import RAGConfig + default_config = RAGConfig() + config_manager.save_config(default_config) + print("✅ Configuration reset to defaults") + print("💡 You may want to re-index for changes to take effect") + else: + print("❌ Reset cancelled") + + input("Press Enter to continue...") + + def _advanced_settings(self, config_manager, config): + """Configure advanced settings.""" + self.clear_screen() + print("⚙️ Advanced Configuration") + print("==========================") + print() + print("Advanced settings for power users:") + print() + print(f"Current advanced settings:") + print(f"• Min file size: {config.files.min_file_size} bytes") + print(f"• Streaming threshold: {config.streaming.threshold_bytes} bytes") + print(f"• Embedding batch size: {config.embedding.batch_size}") + print(f"• LLM synthesis: {'enabled' if config.llm.enable_synthesis else 'disabled'}") + print() + + print("Advanced options:") + print(" 1. Configure file filtering") + print(" 2. Adjust performance settings") + print(" 3. LLM model preferences") + print(" B. Back") + print() + + choice = input("Choose option: ").strip().lower() + + if choice == '1': + print("\n📁 File filtering settings:") + print(f"Minimum file size: {config.files.min_file_size} bytes") + print(f"Excluded patterns: {len(config.files.exclude_patterns)} patterns") + print("\n💡 Edit the config file directly for detailed file filtering") + elif choice == '2': + print("\n⚡ Performance settings:") + print(f"Embedding batch size: {config.embedding.batch_size}") + print(f"Streaming threshold: {config.streaming.threshold_bytes}") + print("\n💡 Higher batch sizes = faster indexing but more memory") + elif choice == '3': + print("\n🧠 LLM model preferences:") + if hasattr(config.llm, 'model_rankings') and config.llm.model_rankings: + print("Current model priority order:") + for i, model in enumerate(config.llm.model_rankings[:5], 1): + print(f" {i}. {model}") + print("\n💡 Edit config file to change model preferences") + + if choice != 'b' and choice != '': input("Press Enter to continue...") def show_cli_reference(self):