diff --git a/docs/DEPLOYMENT_GUIDE.md b/docs/DEPLOYMENT_GUIDE.md new file mode 100644 index 0000000..8e4a54e --- /dev/null +++ b/docs/DEPLOYMENT_GUIDE.md @@ -0,0 +1,381 @@ +# FSS-Mini-RAG Deployment Guide + +> **Run semantic search anywhere - from smartphones to edge devices** +> *Complete guide to deploying FSS-Mini-RAG on every platform imaginable* + +## Platform Compatibility Matrix + +| Platform | Status | AI Features | Installation | Notes | +|----------|--------|-------------|--------------|-------| +| **Linux** | ✅ Full | ✅ Full | `./install_mini_rag.sh` | Primary platform | +| **Windows** | ✅ Full | ✅ Full | `install_windows.bat` | Desktop shortcuts | +| **macOS** | ✅ Full | ✅ Full | `./install_mini_rag.sh` | Works perfectly | +| **Raspberry Pi** | ✅ Excellent | ✅ AI ready | `./install_mini_rag.sh` | ARM64 optimized | +| **Android (Termux)** | ✅ Good | 🟡 Limited | Manual install | Terminal interface | +| **iOS (a-Shell)** | 🟡 Limited | ❌ Text only | Manual install | Sandbox limitations | +| **Docker** | ✅ Excellent | ✅ Full | Dockerfile | Any platform | + +## Desktop & Server Deployment + +### 🐧 **Linux** (Primary Platform) +```bash +# Full installation with AI features +./install_mini_rag.sh + +# What you get: +# ✅ Desktop shortcuts (.desktop files) +# ✅ Application menu integration +# ✅ Full AI model downloads +# ✅ Complete terminal interface +``` + +### 🪟 **Windows** (Fully Supported) +```cmd +# Full installation with desktop integration +install_windows.bat + +# What you get: +# ✅ Desktop shortcuts (.lnk files) +# ✅ Start Menu entries +# ✅ Full AI model downloads +# ✅ Beautiful terminal interface +``` + +### 🍎 **macOS** (Excellent Support) +```bash +# Same as Linux - works perfectly +./install_mini_rag.sh + +# Additional macOS optimizations: +brew install python3 # If needed +brew install ollama # For AI features +``` + +**macOS-specific features:** +- Automatic path detection for common project locations +- Integration with Spotlight search locations +- Support for `.app` bundle creation (advanced) + +## Edge Device Deployment + +### 🥧 **Raspberry Pi** (Recommended Edge Platform) + +**Perfect for:** +- Home lab semantic search server +- Portable development environment +- IoT project documentation search +- Offline code search station + +**Installation:** +```bash +# On Raspberry Pi OS (64-bit recommended) +sudo apt update && sudo apt upgrade +./install_mini_rag.sh + +# The installer automatically detects ARM and optimizes: +# ✅ Suggests lightweight models (qwen3:0.6b) +# ✅ Reduces memory usage +# ✅ Enables efficient chunking +``` + +**Raspberry Pi optimized config:** +```yaml +# Automatically generated for Pi +embedding: + preferred_method: ollama + ollama_model: nomic-embed-text # 270MB - perfect for Pi + +llm: + synthesis_model: qwen3:0.6b # 500MB - fast on Pi 4+ + context_window: 4096 # Conservative memory use + cpu_optimized: true + +chunking: + max_size: 1500 # Smaller chunks for efficiency +``` + +**Performance expectations:** +- **Pi 4 (4GB)**: Excellent performance, full AI features +- **Pi 4 (2GB)**: Good performance, text-only or small models +- **Pi 5**: Outstanding performance, handles large models +- **Pi Zero**: Text-only search (hash-based embeddings) + +### 🔧 **Other Edge Devices** + +**NVIDIA Jetson Series:** +- Overkill performance for this use case +- Can run largest models with GPU acceleration +- Perfect for AI-heavy development workstations + +**Intel NUC / Mini PCs:** +- Excellent performance +- Full desktop experience +- Can serve multiple users simultaneously + +**Orange Pi / Rock Pi:** +- Similar to Raspberry Pi +- Same installation process +- May need manual Ollama compilation + +## Mobile Deployment + +### 📱 **Android (Recommended: Termux)** + +**Installation in Termux:** +```bash +# Install Termux from F-Droid (not Play Store) +# In Termux: +pkg update && pkg upgrade +pkg install python python-pip git +pip install --upgrade pip + +# Clone and install FSS-Mini-RAG +git clone https://github.com/your-repo/fss-mini-rag +cd fss-mini-rag +pip install -r requirements.txt + +# Quick start +python -m mini_rag index /storage/emulated/0/Documents/myproject +python -m mini_rag search /storage/emulated/0/Documents/myproject "your query" +``` + +**Android-optimized config:** +```yaml +# config-android.yaml +embedding: + preferred_method: hash # No heavy models needed + +chunking: + max_size: 800 # Small chunks for mobile + +files: + min_file_size: 20 # Include more small files + +llm: + enable_synthesis: false # Text-only for speed +``` + +**What works on Android:** +- ✅ Full text search and indexing +- ✅ Terminal interface (`rag-tui`) +- ✅ Project indexing from phone storage +- ✅ Search your phone's code projects +- ❌ Heavy AI models (use cloud providers instead) + +**Android use cases:** +- Search your mobile development projects +- Index documentation on your phone +- Quick code reference while traveling +- Offline search of downloaded repositories + +### 🍎 **iOS (Limited but Possible)** + +**Option 1: a-Shell (Free)** +```bash +# Install a-Shell from App Store +# In a-Shell: +pip install requests pathlib + +# Limited installation (core features only) +# Files must be in app sandbox +``` + +**Option 2: iSH (Alpine Linux)** +```bash +# Install iSH from App Store +# In iSH terminal: +apk add python3 py3-pip git +pip install -r requirements-light.txt + +# Basic functionality only +``` + +**iOS limitations:** +- Sandbox restricts file access +- No full AI model support +- Terminal interface only +- Limited to app-accessible files + +## Specialized Deployment Scenarios + +### 🐳 **Docker Deployment** + +**For any platform with Docker:** +```dockerfile +# Dockerfile +FROM python:3.11-slim + +WORKDIR /app +COPY . . +RUN pip install -r requirements.txt + +# Expose ports for server mode +EXPOSE 7777 + +# Default to TUI interface +CMD ["python", "-m", "mini_rag.cli"] +``` + +**Usage:** +```bash +# Build and run +docker build -t fss-mini-rag . +docker run -it -v $(pwd)/projects:/projects fss-mini-rag + +# Server mode for web access +docker run -p 7777:7777 fss-mini-rag python -m mini_rag server +``` + +### ☁️ **Cloud Deployment** + +**AWS/GCP/Azure VM:** +- Same as Linux installation +- Can serve multiple users +- Perfect for team environments + +**GitHub Codespaces:** +```bash +# Works in any Codespace +./install_mini_rag.sh +# Perfect for searching your workspace +``` + +**Replit/CodeSandbox:** +- Limited by platform restrictions +- Basic functionality available + +### 🏠 **Home Lab Integration** + +**Home Assistant Add-on:** +- Package as Home Assistant add-on +- Search home automation configs +- Voice integration possible + +**NAS Integration:** +- Install on Synology/QNAP +- Search all stored documents +- Family code documentation + +**Router with USB:** +- Install on OpenWrt routers with USB storage +- Search network documentation +- Configuration management + +## Configuration by Use Case + +### 🪶 **Ultra-Lightweight (Old hardware, mobile)** +```yaml +# Minimal resource usage +embedding: + preferred_method: hash +chunking: + max_size: 800 + strategy: fixed +llm: + enable_synthesis: false +``` + +### ⚖️ **Balanced (Raspberry Pi, older laptops)** +```yaml +# Good performance with AI features +embedding: + preferred_method: ollama + ollama_model: nomic-embed-text +llm: + synthesis_model: qwen3:0.6b + context_window: 4096 +``` + +### 🚀 **Performance (Modern hardware)** +```yaml +# Full features and performance +embedding: + preferred_method: ollama + ollama_model: nomic-embed-text +llm: + synthesis_model: qwen3:1.7b + context_window: 16384 + enable_thinking: true +``` + +### ☁️ **Cloud-Hybrid (Mobile + Cloud AI)** +```yaml +# Local search, cloud intelligence +embedding: + preferred_method: hash +llm: + provider: openai + api_key: your_api_key + synthesis_model: gpt-4 +``` + +## Troubleshooting by Platform + +### **Raspberry Pi Issues** +- **Out of memory**: Reduce context window, use smaller models +- **Slow indexing**: Use hash-based embeddings +- **Model download fails**: Check internet, use smaller models + +### **Android/Termux Issues** +- **Permission denied**: Use `termux-setup-storage` +- **Package install fails**: Update packages first +- **Can't access files**: Use `/storage/emulated/0/` paths + +### **iOS Issues** +- **Limited functionality**: Expected due to iOS restrictions +- **Can't install packages**: Use lighter requirements file +- **File access denied**: Files must be in app sandbox + +### **Edge Device Issues** +- **ARM compatibility**: Ensure using ARM64 Python packages +- **Limited RAM**: Use hash embeddings, reduce chunk sizes +- **No internet**: Skip AI model downloads, use text-only + +## Advanced Edge Deployments + +### **IoT Integration** +- Index sensor logs and configurations +- Search device documentation +- Troubleshoot IoT deployments + +### **Offline Development** +- Complete development environment on edge device +- No internet required after setup +- Perfect for remote locations + +### **Educational Use** +- Raspberry Pi computer labs +- Student project search +- Coding bootcamp environments + +### **Enterprise Edge** +- Factory floor documentation search +- Field service technical reference +- Remote site troubleshooting + +--- + +## Quick Start by Platform + +### Desktop Users +```bash +# Linux/macOS +./install_mini_rag.sh + +# Windows +install_windows.bat +``` + +### Edge/Mobile Users +```bash +# Raspberry Pi +./install_mini_rag.sh + +# Android (Termux) +pkg install python git && pip install -r requirements.txt + +# Any Docker platform +docker run -it fss-mini-rag +``` + +**💡 Pro tip**: Start with your current platform, then expand to edge devices as needed. The system scales from smartphones to servers seamlessly! \ No newline at end of file diff --git a/docs/DIAGRAMS.md b/docs/DIAGRAMS.md index 6d0f278..8bbce9d 100644 --- a/docs/DIAGRAMS.md +++ b/docs/DIAGRAMS.md @@ -11,6 +11,7 @@ - [Search Architecture](#search-architecture) - [Installation Flow](#installation-flow) - [Configuration System](#configuration-system) +- [System Context Integration](#system-context-integration) - [Error Handling](#error-handling) ## System Overview @@ -22,10 +23,12 @@ graph TB CLI --> Index[📁 Index Project] CLI --> Search[🔍 Search Project] + CLI --> Explore[🧠 Explore Project] CLI --> Status[📊 Show Status] TUI --> Index TUI --> Search + TUI --> Explore TUI --> Config[⚙️ Configuration] Index --> Files[📄 File Discovery] @@ -34,17 +37,32 @@ graph TB Embed --> Store[💾 Vector Database] Search --> Query[❓ User Query] + Search --> Context[🖥️ System Context] Query --> Vector[🎯 Vector Search] Query --> Keyword[🔤 Keyword Search] Vector --> Combine[🔄 Hybrid Results] Keyword --> Combine - Combine --> Results[📋 Ranked Results] + Context --> Combine + Combine --> Synthesize{Synthesis Mode?} + + Synthesize -->|Yes| FastLLM[⚡ Fast Synthesis] + Synthesize -->|No| Results[📋 Ranked Results] + FastLLM --> Results + + Explore --> ExploreQuery[❓ Interactive Query] + ExploreQuery --> Memory[🧠 Conversation Memory] + ExploreQuery --> Context + Memory --> DeepLLM[🤔 Deep AI Analysis] + Context --> DeepLLM + Vector --> DeepLLM + DeepLLM --> Interactive[💬 Interactive Response] Store --> LanceDB[(🗄️ LanceDB)] Vector --> LanceDB Config --> YAML[📝 config.yaml] Status --> Manifest[📋 manifest.json] + Context --> SystemInfo[💻 OS, Python, Paths] ``` ## User Journey @@ -276,6 +294,58 @@ flowchart TD style Error fill:#ffcdd2 ``` +## System Context Integration + +```mermaid +graph LR + subgraph "System Detection" + OS[🖥️ Operating System] + Python[🐍 Python Version] + Project[📁 Project Path] + + OS --> Windows[Windows: rag.bat] + OS --> Linux[Linux: ./rag-mini] + OS --> macOS[macOS: ./rag-mini] + end + + subgraph "Context Collection" + Collect[🔍 Collect Context] + OS --> Collect + Python --> Collect + Project --> Collect + + Collect --> Format[📝 Format Context] + Format --> Limit[✂️ Limit to 200 chars] + end + + subgraph "AI Integration" + UserQuery[❓ User Query] + SearchResults[📋 Search Results] + SystemContext[💻 System Context] + + UserQuery --> Prompt[📝 Build Prompt] + SearchResults --> Prompt + SystemContext --> Prompt + + Prompt --> AI[🤖 LLM Processing] + AI --> Response[💬 Contextual Response] + end + + subgraph "Enhanced Responses" + Response --> Commands[💻 OS-specific commands] + Response --> Paths[📂 Correct path formats] + Response --> Tips[💡 Platform-specific tips] + end + + Format --> SystemContext + + style SystemContext fill:#e3f2fd + style Response fill:#f3e5f5 + style Commands fill:#e8f5e8 +``` + +*System context helps the AI provide better, platform-specific guidance without compromising privacy* + ## Architecture Layers ```mermaid diff --git a/docs/GETTING_STARTED.md b/docs/GETTING_STARTED.md index 63af487..1352165 100644 --- a/docs/GETTING_STARTED.md +++ b/docs/GETTING_STARTED.md @@ -1,212 +1,314 @@ # Getting Started with FSS-Mini-RAG -## Step 1: Installation +> **Get from zero to searching in 2 minutes** +> *Everything you need to know to start finding code by meaning, not just keywords* -Choose your installation based on what you want: +## Installation (Choose Your Adventure) -### Option A: Ollama Only (Recommended) +### 🎯 **Option 1: Full Installation (Recommended)** +*Gets you everything working reliably with desktop shortcuts and AI features* + +**Linux/macOS:** ```bash -# Install Ollama first -curl -fsSL https://ollama.ai/install.sh | sh - -# Pull the embedding model -ollama pull nomic-embed-text - -# Install Python dependencies -pip install -r requirements.txt +./install_mini_rag.sh ``` -### Option B: Full ML Stack -```bash -# Install everything including PyTorch -pip install -r requirements-full.txt +**Windows:** +```cmd +install_windows.bat ``` -## Step 2: Test Installation +**What this does:** +- Sets up Python environment automatically +- Installs all dependencies +- Downloads AI models (with your permission) +- Creates desktop shortcuts and application menu entries +- Tests everything works +- Gives you an interactive tutorial +**Time needed:** 5-10 minutes (depending on AI model downloads) + +--- + +### 🚀 **Option 2: Copy & Try (Experimental)** +*Just copy the folder and run - may work, may need manual setup* + +**Linux/macOS:** ```bash -# Index this RAG system itself +# Copy folder anywhere and try running +./rag-mini index ~/my-project +# Auto-setup attempts to create virtual environment +# Falls back with clear instructions if it fails +``` + +**Windows:** +```cmd +# Copy folder anywhere and try running +rag.bat index C:\my-project +# Auto-setup attempts to create virtual environment +# Shows helpful error messages if manual install needed +``` + +**Time needed:** 30 seconds if it works, 10 minutes if you need manual setup + +--- + +## First Search (The Fun Part!) + +### Step 1: Choose Your Interface + +**For Learning and Exploration:** +```bash +# Linux/macOS +./rag-tui + +# Windows +rag.bat +``` +*Interactive menus, shows you CLI commands as you learn* + +**For Quick Commands:** +```bash +# Linux/macOS +./rag-mini + +# Windows +rag.bat +``` +*Direct commands when you know what you want* + +### Step 2: Index Your First Project + +**Interactive Way (Recommended for First Time):** +```bash +# Linux/macOS +./rag-tui +# Then: Select Project Directory → Index Project + +# Windows +rag.bat +# Then: Select Project Directory → Index Project +``` + +**Direct Commands:** +```bash +# Linux/macOS ./rag-mini index ~/my-project -# Search for something -./rag-mini search ~/my-project "chunker function" - -# Check what got indexed -./rag-mini status ~/my-project +# Windows +rag.bat index C:\my-project ``` -## Step 3: Index Your First Project +**What indexing does:** +- Finds all text files in your project +- Breaks them into smart "chunks" (functions, classes, logical sections) +- Creates searchable embeddings that understand meaning +- Stores everything in a fast vector database +- Creates a `.mini-rag/` directory with your search index +**Time needed:** 10-60 seconds depending on project size + +### Step 3: Search by Meaning + +**Natural language queries:** ```bash -# Index any project directory -./rag-mini index /path/to/your/project +# Linux/macOS +./rag-mini search ~/my-project "user authentication logic" +./rag-mini search ~/my-project "error handling for database connections" +./rag-mini search ~/my-project "how to validate input data" -# The system creates .mini-rag/ directory with: -# - config.json (settings) -# - manifest.json (file tracking) -# - database.lance/ (vector database) +# Windows +rag.bat search C:\my-project "user authentication logic" +rag.bat search C:\my-project "error handling for database connections" +rag.bat search C:\my-project "how to validate input data" ``` -## Step 4: Search Your Code - +**Code concepts:** ```bash -# Basic semantic search -./rag-mini search /path/to/project "user login logic" +# Finds login functions, auth middleware, session handling +./rag-mini search ~/my-project "login functionality" -# Enhanced search with smart features -./rag-mini-enhanced search /path/to/project "authentication" +# Finds try/catch blocks, error handlers, retry logic +./rag-mini search ~/my-project "exception handling" -# Find similar patterns -./rag-mini-enhanced similar /path/to/project "def validate_input" +# Finds validation functions, input sanitization, data checking +./rag-mini search ~/my-project "data validation" ``` -## Step 5: Customize Configuration +**What you get:** +- Ranked results by relevance (not just keyword matching) +- File paths and line numbers for easy navigation +- Context around each match so you understand what it does +- Smart filtering to avoid noise and duplicates -Edit `project/.mini-rag/config.json`: +## Two Powerful Modes -```json -{ - "chunking": { - "max_size": 3000, - "strategy": "semantic" - }, - "files": { - "min_file_size": 100 - } -} -``` +FSS-Mini-RAG has two different ways to get answers, optimized for different needs: -Then re-index to apply changes: +### 🚀 **Synthesis Mode** - Fast Answers ```bash -./rag-mini index /path/to/project --force +# Linux/macOS +./rag-mini search ~/project "authentication logic" --synthesize + +# Windows +rag.bat search C:\project "authentication logic" --synthesize ``` -## Common Use Cases +**Perfect for:** +- Quick code discovery +- Finding specific functions or patterns +- Getting fast, consistent answers -### Find Functions by Name -```bash -./rag-mini search /project "function named connect_to_database" -``` +**What you get:** +- Lightning-fast responses (no thinking overhead) +- Reliable, factual information about your code +- Clear explanations of what code does and how it works -### Find Code Patterns -```bash -./rag-mini search /project "error handling try catch" -./rag-mini search /project "database query with parameters" -``` - -### Find Configuration +### 🧠 **Exploration Mode** - Deep Understanding ```bash -./rag-mini search /project "database connection settings" -./rag-mini search /project "environment variables" +# Linux/macOS +./rag-mini explore ~/project + +# Windows +rag.bat explore C:\project ``` -### Find Documentation +**Perfect for:** +- Learning new codebases +- Debugging complex issues +- Understanding architectural decisions + +**What you get:** +- Interactive conversation with AI that remembers context +- Deep reasoning with full "thinking" process shown +- Follow-up questions and detailed explanations +- Memory of your previous questions in the session + +**Example exploration session:** +``` +🧠 Exploration Mode - Ask anything about your project + +You: How does authentication work in this codebase? + +AI: Let me analyze the authentication system... + +💭 Thinking: I can see several authentication-related files. Let me examine + the login flow, session management, and security measures... + +📝 Authentication Analysis: + This codebase uses a three-layer authentication system: + 1. Login validation in auth.py handles username/password checking + 2. Session management in sessions.py maintains user state + 3. Middleware in auth_middleware.py protects routes + +You: What security concerns should I be aware of? + +AI: Based on our previous discussion about authentication, let me check for + common security vulnerabilities... +``` + +## Check Your Setup + +**See what got indexed:** ```bash -./rag-mini search /project "how to deploy" -./rag-mini search /project "API documentation" +# Linux/macOS +./rag-mini status ~/my-project + +# Windows +rag.bat status C:\my-project ``` -## Python API Usage +**What you'll see:** +- How many files were processed +- Total chunks created for searching +- Embedding method being used (Ollama, ML models, or hash-based) +- Configuration file location +- Index health and last update time -```python -from mini_rag import ProjectIndexer, CodeSearcher, CodeEmbedder -from pathlib import Path +## Configuration (Optional) -# Initialize -project_path = Path("/path/to/your/project") -embedder = CodeEmbedder() -indexer = ProjectIndexer(project_path, embedder) -searcher = CodeSearcher(project_path, embedder) +Your project gets a `.mini-rag/config.yaml` file with helpful comments: -# Index the project -print("Indexing project...") -result = indexer.index_project() -print(f"Indexed {result['files_processed']} files, {result['chunks_created']} chunks") +```yaml +# Context window configuration (critical for AI features) +# 💡 Sizing guide: 2K=1 question, 4K=1-2 questions, 8K=manageable, 16K=most users +# 32K=large codebases, 64K+=power users only +# ⚠️ Larger contexts use exponentially more CPU/memory - only increase if needed +context_window: 16384 # Context size in tokens -# Search -print("\nSearching for authentication code...") -results = searcher.search("user authentication logic", top_k=5) - -for i, result in enumerate(results, 1): - print(f"\n{i}. {result.file_path}") - print(f" Score: {result.score:.3f}") - print(f" Type: {result.chunk_type}") - print(f" Content: {result.content[:100]}...") +# AI model preferences (edit to change priority) +model_rankings: + - "qwen3:1.7b" # Excellent for RAG (1.4GB, recommended) + - "qwen3:0.6b" # Lightweight and fast (~500MB) + - "qwen3:4b" # Higher quality but slower (~2.5GB) ``` -## Advanced Features +**When to customize:** +- Your searches aren't finding what you expect → adjust chunking settings +- You want AI features → install Ollama and download models +- System is slow → try smaller models or reduce context window +- Getting too many/few results → adjust similarity threshold -### Auto-optimization +## Troubleshooting + +### "Project not indexed" +**Problem:** You're trying to search before indexing ```bash -# Get optimization suggestions -./rag-mini-enhanced analyze /path/to/project - -# This analyzes your codebase and suggests: -# - Better chunk sizes for your language mix -# - Streaming settings for large files -# - File filtering optimizations +# Run indexing first +./rag-mini index ~/my-project # Linux/macOS +rag.bat index C:\my-project # Windows ``` -### File Watching -```python -from mini_rag import FileWatcher +### "No Ollama models available" +**Problem:** AI features need models downloaded +```bash +# Install Ollama first +curl -fsSL https://ollama.ai/install.sh | sh # Linux/macOS +# Or download from https://ollama.com # Windows -# Watch for file changes and auto-update index -watcher = FileWatcher(project_path, indexer) -watcher.start_watching() +# Start Ollama server +ollama serve -# Now any file changes automatically update the index +# Download a model +ollama pull qwen3:1.7b ``` -### Custom Chunking -```python -from mini_rag import CodeChunker - -chunker = CodeChunker() - -# Chunk a Python file -with open("example.py") as f: - content = f.read() - -chunks = chunker.chunk_text(content, "python", "example.py") -for chunk in chunks: - print(f"Type: {chunk.chunk_type}") - print(f"Content: {chunk.content}") +### "Virtual environment not found" +**Problem:** Auto-setup didn't work, need manual installation +```bash +# Run the full installer instead +./install_mini_rag.sh # Linux/macOS +install_windows.bat # Windows ``` -## Tips and Best Practices +### Getting weird results +**Solution:** Try different search terms or check what got indexed +```bash +# See what files were processed +./rag-mini status ~/my-project -### For Better Search Results -- Use descriptive phrases: "function that validates email addresses" -- Try different phrasings if first search doesn't work -- Search for concepts, not just exact variable names +# Try more specific queries +./rag-mini search ~/my-project "specific function name" +``` -### For Better Indexing -- Exclude build directories: `node_modules/`, `build/`, `dist/` -- Include documentation files - they often contain valuable context -- Use semantic chunking strategy for most projects +## Next Steps -### For Configuration -- Start with default settings -- Use `analyze` command to get optimization suggestions -- Increase chunk size for larger functions/classes -- Decrease chunk size for more granular search +### Learn More +- **[Beginner's Glossary](BEGINNER_GLOSSARY.md)** - All the terms explained simply +- **[TUI Guide](TUI_GUIDE.md)** - Master the interactive interface +- **[Visual Diagrams](DIAGRAMS.md)** - See how everything works -### For Troubleshooting -- Check `./rag-mini status` to see what was indexed -- Look at `.mini-rag/manifest.json` for file details -- Run with `--force` to completely rebuild index -- Check logs in `.mini-rag/` directory for errors +### Advanced Features +- **[Query Expansion](QUERY_EXPANSION.md)** - Make searches smarter with AI +- **[LLM Providers](LLM_PROVIDERS.md)** - Use different AI models +- **[CPU Deployment](CPU_DEPLOYMENT.md)** - Optimize for older computers -## What's Next? +### Customize Everything +- **[Technical Guide](TECHNICAL_GUIDE.md)** - How the system actually works +- **[Configuration Examples](../examples/)** - Pre-made configs for different needs -1. Try the test suite to understand how components work: - ```bash - python -m pytest tests/ -v - ``` +--- -2. Look at the examples in `examples/` directory +**🎉 That's it!** You now have a semantic search system that understands your code by meaning, not just keywords. Start with simple searches and work your way up to the advanced AI features as you get comfortable. -3. Read the main README.md for complete technical details - -4. Customize the system for your specific project needs \ No newline at end of file +**💡 Pro tip:** The best way to learn is to index a project you know well and try searching for things you know are in there. You'll quickly see how much better meaning-based search is than traditional keyword search. \ No newline at end of file diff --git a/mini_rag/config.py b/mini_rag/config.py index e80c421..77c4c1e 100644 --- a/mini_rag/config.py +++ b/mini_rag/config.py @@ -194,6 +194,16 @@ class ConfigManager: return config + except yaml.YAMLError as e: + # YAML syntax error - help user fix it instead of silent fallback + error_msg = f"⚠️ Config file has YAML syntax error at line {getattr(e, 'problem_mark', 'unknown')}: {e}" + logger.error(error_msg) + print(f"\n{error_msg}") + print(f"Config file: {self.config_path}") + print("💡 Check YAML syntax (indentation, quotes, colons)") + print("💡 Or delete config file to reset to defaults") + return RAGConfig() # Still return defaults but warn user + except Exception as e: logger.error(f"Failed to load config from {self.config_path}: {e}") logger.info("Using default configuration") @@ -210,8 +220,16 @@ class ConfigManager: # Create YAML content with comments yaml_content = self._create_yaml_with_comments(config_dict) + # Write with basic file locking to prevent corruption with open(self.config_path, 'w') as f: - f.write(yaml_content) + try: + import fcntl + fcntl.flock(f.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB) # Non-blocking exclusive lock + f.write(yaml_content) + fcntl.flock(f.fileno(), fcntl.LOCK_UN) # Unlock + except (OSError, ImportError): + # Fallback for Windows or if fcntl unavailable + f.write(yaml_content) logger.info(f"Configuration saved to {self.config_path}") @@ -274,7 +292,11 @@ class ConfigManager: f" synthesis_temperature: {config_dict['llm']['synthesis_temperature']} # LLM temperature for analysis", "", " # Context window configuration (critical for RAG performance)", - f" context_window: {config_dict['llm']['context_window']} # Context size in tokens (8K=fast, 16K=balanced, 32K=advanced)", + " # 💡 Sizing guide: 2K=1 question, 4K=1-2 questions, 8K=manageable, 16K=most users", + " # 32K=large codebases, 64K+=power users only", + " # ⚠️ Larger contexts use exponentially more CPU/memory - only increase if needed", + " # 🔧 Low context limits? Try smaller topk, better search terms, or archive noise", + f" context_window: {config_dict['llm']['context_window']} # Context size in tokens", f" auto_context: {str(config_dict['llm']['auto_context']).lower()} # Auto-adjust context based on model capabilities", "", " model_rankings: # Preferred model order (edit to change priority)", diff --git a/mini_rag/explorer.py b/mini_rag/explorer.py index b50d040..be5b3c8 100644 --- a/mini_rag/explorer.py +++ b/mini_rag/explorer.py @@ -17,11 +17,13 @@ try: from .llm_synthesizer import LLMSynthesizer, SynthesisResult from .search import CodeSearcher from .config import RAGConfig + from .system_context import get_system_context except ImportError: # For direct testing from llm_synthesizer import LLMSynthesizer, SynthesisResult from search import CodeSearcher from config import RAGConfig + get_system_context = lambda x=None: "" logger = logging.getLogger(__name__) @@ -154,10 +156,15 @@ Content: {content[:800]}{'...' if len(content) > 800 else ''} results_text = "\n".join(results_context) + # Get system context for better responses + system_context = get_system_context(self.project_path) + # Create comprehensive exploration prompt with thinking prompt = f""" The user asked: "{question}" +System context: {system_context} + Let me analyze what they're asking and look at the information I have available. From the search results, I can see relevant information about: diff --git a/mini_rag/llm_synthesizer.py b/mini_rag/llm_synthesizer.py index 60a622a..8653904 100644 --- a/mini_rag/llm_synthesizer.py +++ b/mini_rag/llm_synthesizer.py @@ -16,11 +16,13 @@ from pathlib import Path try: from .llm_safeguards import ModelRunawayDetector, SafeguardConfig, get_optimal_ollama_parameters + from .system_context import get_system_context except ImportError: # Graceful fallback if safeguards not available ModelRunawayDetector = None SafeguardConfig = None get_optimal_ollama_parameters = lambda x: {} + get_system_context = lambda x=None: "" logger = logging.getLogger(__name__) @@ -175,12 +177,20 @@ class LLMSynthesizer: # Ensure we're initialized self._ensure_initialized() - # Use the best available model + # Use the best available model with retry logic model_to_use = self.model if self.model not in self.available_models: - # Fallback to first available model - if self.available_models: + # Refresh model list in case of race condition + logger.warning(f"Configured model {self.model} not in available list, refreshing...") + self.available_models = self._get_available_models() + + if self.model in self.available_models: + model_to_use = self.model + logger.info(f"Model {self.model} found after refresh") + elif self.available_models: + # Fallback to first available model model_to_use = self.available_models[0] + logger.warning(f"Using fallback model: {model_to_use}") else: logger.error("No Ollama models available") return None @@ -587,9 +597,13 @@ Content: {content[:500]}{'...' if len(content) > 500 else ''} context = "\n".join(context_parts) - # Create synthesis prompt + # Get system context for better responses + system_context = get_system_context(project_path) + + # Create synthesis prompt with system context prompt = f"""You are a senior software engineer analyzing code search results. Your task is to synthesize the search results into a helpful, actionable summary. +SYSTEM CONTEXT: {system_context} SEARCH QUERY: "{query}" PROJECT: {project_path.name} diff --git a/mini_rag/system_context.py b/mini_rag/system_context.py new file mode 100644 index 0000000..98fe9f9 --- /dev/null +++ b/mini_rag/system_context.py @@ -0,0 +1,123 @@ +""" +System Context Collection for Enhanced RAG Grounding + +Collects minimal system information to help the LLM provide better, +context-aware assistance without compromising privacy. +""" + +import platform +import sys +import os +from pathlib import Path +from typing import Dict, Optional + + +class SystemContextCollector: + """Collects system context information for enhanced LLM grounding.""" + + @staticmethod + def get_system_context(project_path: Optional[Path] = None) -> str: + """ + Get concise system context for LLM grounding. + + Args: + project_path: Current project directory + + Returns: + Formatted system context string (max 200 chars for privacy) + """ + try: + # Basic system info + os_name = platform.system() + python_ver = f"{sys.version_info.major}.{sys.version_info.minor}" + + # Simplified OS names + os_short = { + 'Windows': 'Win', + 'Linux': 'Linux', + 'Darwin': 'macOS' + }.get(os_name, os_name) + + # Working directory info + if project_path: + # Use relative or shortened path for privacy + try: + rel_path = project_path.relative_to(Path.home()) + path_info = f"~/{rel_path}" + except ValueError: + # If not relative to home, just use folder name + path_info = project_path.name + else: + path_info = Path.cwd().name + + # Trim path if too long for our 200-char limit + if len(path_info) > 50: + path_info = f".../{path_info[-45:]}" + + # Command style hints + cmd_style = "rag.bat" if os_name == "Windows" else "./rag-mini" + + # Format concise context + context = f"[{os_short} {python_ver}, {path_info}, use {cmd_style}]" + + # Ensure we stay under 200 chars + if len(context) > 200: + context = context[:197] + "...]" + + return context + + except Exception: + # Fallback to minimal info if anything fails + return f"[{platform.system()}, Python {sys.version_info.major}.{sys.version_info.minor}]" + + @staticmethod + def get_command_context(os_name: Optional[str] = None) -> Dict[str, str]: + """ + Get OS-appropriate command examples. + + Returns: + Dictionary with command patterns for the current OS + """ + if os_name is None: + os_name = platform.system() + + if os_name == "Windows": + return { + "launcher": "rag.bat", + "index": "rag.bat index C:\\path\\to\\project", + "search": "rag.bat search C:\\path\\to\\project \"query\"", + "explore": "rag.bat explore C:\\path\\to\\project", + "path_sep": "\\", + "example_path": "C:\\Users\\username\\Documents\\myproject" + } + else: + return { + "launcher": "./rag-mini", + "index": "./rag-mini index /path/to/project", + "search": "./rag-mini search /path/to/project \"query\"", + "explore": "./rag-mini explore /path/to/project", + "path_sep": "/", + "example_path": "~/Documents/myproject" + } + + +def get_system_context(project_path: Optional[Path] = None) -> str: + """Convenience function to get system context.""" + return SystemContextCollector.get_system_context(project_path) + + +def get_command_context() -> Dict[str, str]: + """Convenience function to get command context.""" + return SystemContextCollector.get_command_context() + + +# Test function +if __name__ == "__main__": + print("System Context Test:") + print(f"Context: {get_system_context()}") + print(f"Context with path: {get_system_context(Path('/tmp/test'))}") + print() + print("Command Context:") + cmds = get_command_context() + for key, value in cmds.items(): + print(f" {key}: {value}") \ No newline at end of file