🎓 Complete beginner-friendly polish with production reliability

✨ BEGINNER-FRIENDLY ENHANCEMENTS: - Add comprehensive glossary explaining RAG, embeddings, chunks in plain English - Create detailed troubleshooting guide covering installation, search issues, performance - Provide preset configs (beginner/fast/quality) with extensive helpful comments - Enhanced error messages with specific solutions and next steps 🔧 PRODUCTION RELIABILITY: - Add thread-safe caching with automatic cleanup in QueryExpander - Implement chunked processing for large batches to prevent memory issues - Enhanced concurrent embedding with intelligent batch size management - Memory leak prevention with LRU cache approximation 🏗️ ARCHITECTURE COMPLETENESS: - Maintain two-mode system (synthesis fast, exploration thinking + memory) - Preserve educational value while removing intimidation barriers - Complete testing coverage for mode separation and context memory - Full documentation reflecting clean two-mode architecture Perfect balance: genuinely beginner-friendly without compromising technical sophistication
2025-08-12 18:59:24 +10:00 · 2025-08-12 18:59:24 +10:00 · 3363171820
commit 3363171820
parent 2c5eef8596
9 changed files with 1176 additions and 55 deletions
--- a/claude_rag/ollama_embeddings.py
+++ b/claude_rag/ollama_embeddings.py
@ -350,6 +350,10 @@ class OllamaEmbedder:
        if len(file_contents) <= 2:
            return self._batch_embed_sequential(file_contents)
        
+        # For very large batches, use chunked processing to prevent memory issues
+        if len(file_contents) > 500:  # Process in chunks to manage memory
+            return self._batch_embed_chunked(file_contents, max_workers)
+        
        return self._batch_embed_concurrent(file_contents, max_workers)
    
    def _batch_embed_sequential(self, file_contents: List[dict]) -> List[dict]:
@ -396,6 +400,35 @@ class OllamaEmbedder:
        indexed_results.sort(key=lambda x: x[0])
        return [result for _, result in indexed_results]
    
+    def _batch_embed_chunked(self, file_contents: List[dict], max_workers: int, chunk_size: int = 200) -> List[dict]:
+        """
+        Process very large batches in smaller chunks to prevent memory issues.
+        This is important for beginners who might try to index huge projects.
+        """
+        results = []
+        total_chunks = len(file_contents)
+        
+        # Process in chunks
+        for i in range(0, len(file_contents), chunk_size):
+            chunk = file_contents[i:i + chunk_size]
+            
+            # Log progress for large operations
+            if total_chunks > chunk_size:
+                chunk_num = i // chunk_size + 1
+                total_chunk_count = (total_chunks + chunk_size - 1) // chunk_size
+                logger.info(f"Processing chunk {chunk_num}/{total_chunk_count} ({len(chunk)} files)")
+            
+            # Process this chunk using concurrent method
+            chunk_results = self._batch_embed_concurrent(chunk, max_workers)
+            results.extend(chunk_results)
+            
+            # Brief pause between chunks to prevent overwhelming the system
+            if i + chunk_size < len(file_contents):
+                import time
+                time.sleep(0.1)  # 100ms pause between chunks
+        
+        return results
+    
    def get_embedding_dim(self) -> int:
        """Return the dimension of embeddings produced by this model."""
        return self.embedding_dim
--- a/claude_rag/query_expander.py
+++ b/claude_rag/query_expander.py
@ -32,6 +32,7 @@ disable in CLI for maximum speed.

 import logging
 import re
+import threading
 from typing import List, Optional
 import requests
 from .config import RAGConfig
@ -51,6 +52,7 @@ class QueryExpander:
        
        # Cache for expanded queries to avoid repeated API calls
        self._cache = {}
+        self._cache_lock = threading.RLock()  # Thread-safe cache access
    
    def _ensure_initialized(self):
        """Lazy initialization with LLM warmup."""
@ -84,9 +86,10 @@ class QueryExpander:
            
        self._ensure_initialized()
            
-        # Check cache first
-        if query in self._cache:
-            return self._cache[query]
+        # Check cache first (thread-safe)
+        with self._cache_lock:
+            if query in self._cache:
+                return self._cache[query]
        
        # Don't expand very short queries or obvious keywords
        if len(query.split()) <= 1 or len(query) <= 3:
@ -95,8 +98,12 @@ class QueryExpander:
        try:
            expanded = self._llm_expand_query(query)
            if expanded and expanded != query:
-                # Cache the result
-                self._cache[query] = expanded
+                # Cache the result (thread-safe)
+                with self._cache_lock:
+                    self._cache[query] = expanded
+                    # Prevent cache from growing too large
+                    if len(self._cache) % 100 == 0:  # Check every 100 entries
+                        self._manage_cache_size()
                logger.info(f"Expanded query: '{query}' → '{expanded}'")
                return expanded
            
@ -227,8 +234,19 @@ Expanded query:"""
        return clean_response
    
    def clear_cache(self):
-        """Clear the expansion cache."""
-        self._cache.clear()
+        """Clear the expansion cache (thread-safe)."""
+        with self._cache_lock:
+            self._cache.clear()
+    
+    def _manage_cache_size(self, max_size: int = 1000):
+        """Keep cache from growing too large (prevents memory leaks)."""
+        with self._cache_lock:
+            if len(self._cache) > max_size:
+                # Remove oldest half of cache entries (simple LRU approximation)
+                items = list(self._cache.items())
+                keep_count = max_size // 2
+                self._cache = dict(items[-keep_count:])
+                logger.debug(f"Cache trimmed from {len(items)} to {len(self._cache)} entries")
    
    def is_available(self) -> bool:
        """Check if query expansion is available."""
--- a/docs/BEGINNER_GLOSSARY.md
+++ b/docs/BEGINNER_GLOSSARY.md
@ -0,0 +1,202 @@
+# 📚 Beginner's Glossary - RAG Terms Made Simple
+
+*Confused by all the technical terms? Don't worry! This guide explains everything in plain English.*
+
+---
+
+## 🤖 **RAG** - Retrieval Augmented Generation
+**What it is:** A fancy way of saying "search your code and get AI explanations"
+
+**Simple explanation:** Instead of just searching for keywords (like Google), RAG finds code that's *similar in meaning* to what you're looking for, then has an AI explain it to you.
+
+**Real example:** 
+- You search for "user authentication"  
+- RAG finds code about login systems, password validation, and user sessions
+- AI explains: "This code handles user logins using email/password, stores sessions in cookies, and validates users on each request"
+
+---
+
+## 🧩 **Chunks** - Bite-sized pieces of your code
+**What it is:** Your code files broken into smaller, searchable pieces
+
+**Simple explanation:** RAG can't search entire huge files efficiently, so it breaks them into "chunks" - like cutting a pizza into slices. Each chunk is usually one function, one class, or a few related lines.
+
+**Why it matters:**
+- Too small chunks = missing context ("this variable" but what variable?)
+- Too big chunks = too much unrelated stuff in search results
+- Just right = perfect context for understanding what code does
+
+**Real example:**
+```python
+# This would be one chunk:
+def login_user(email, password):
+    """Authenticate user with email and password."""
+    user = find_user_by_email(email)
+    if user and check_password(user, password):
+        create_session(user)
+        return True
+    return False
+```
+
+---
+
+## 🧠 **Embeddings** - Code "fingerprints" 
+**What it is:** A way to convert your code into numbers that computers can compare
+
+**Simple explanation:** Think of embeddings like DNA fingerprints for your code. Similar code gets similar fingerprints. The computer can then find code with similar "fingerprints" to what you're searching for.
+
+**The magic:** Code that does similar things gets similar embeddings, even if the exact words are different:
+- `login_user()` and `authenticate()` would have similar embeddings
+- `calculate_tax()` and `login_user()` would have very different embeddings
+
+**You don't need to understand the technical details** - just know that embeddings help find semantically similar code, not just exact word matches.
+
+---
+
+## 🔍 **Vector Search** vs **Keyword Search**
+**Keyword search (like Google):** Finds exact word matches
+- Search "login" → finds code with the word "login"
+- Misses: authentication, signin, user_auth
+
+**Vector search (the RAG way):** Finds similar *meaning*
+- Search "login" → finds login, authentication, signin, user validation
+- Uses those embedding "fingerprints" to find similar concepts
+
+**FSS-Mini-RAG uses both** for the best results!
+
+---
+
+## 📊 **Similarity Score** - How relevant is this result?
+**What it is:** A number from 0.0 to 1.0 showing how closely your search matches the result
+
+**Simple explanation:** 
+- 1.0 = Perfect match (very rare)
+- 0.8+ = Excellent match  
+- 0.5+ = Good match
+- 0.3+ = Somewhat relevant
+- 0.1+ = Might be useful
+- Below 0.1 = Probably not what you want
+
+**In practice:** Most useful results are between 0.2-0.8
+
+---
+
+## 🎯 **BM25** - The keyword search boost
+**What it is:** A fancy algorithm that finds exact word matches (like Google search)
+
+**Simple explanation:** While embeddings find *similar meaning*, BM25 finds *exact words*. Using both together gives you the best of both worlds.
+
+**Example:**
+- You search for "password validation"
+- Embeddings find: authentication functions, login methods, user security
+- BM25 finds: code with the exact words "password" and "validation"
+- Combined = comprehensive results
+
+**Keep it enabled** unless you're getting too many irrelevant results.
+
+---
+
+## 🔄 **Query Expansion** - Making your search smarter
+**What it is:** Automatically adding related terms to your search
+
+**Simple explanation:** When you search for "auth", the system automatically expands it to "auth authentication login signin user validate".
+
+**Pros:** Much better, more comprehensive results  
+**Cons:** Slower search, sometimes too broad
+
+**When to use:**
+- Turn ON for: Complex searches, learning new codebases
+- Turn OFF for: Quick lookups, very specific searches
+
+---
+
+## 🤖 **LLM** - Large Language Model (The AI Brain)
+**What it is:** The AI that reads your search results and explains them in plain English
+
+**Simple explanation:** After finding relevant code chunks, the LLM reads them like a human would and gives you a summary like: "This code handles user registration by validating email format, checking for existing users, hashing passwords, and saving to database."
+
+**Models you might see:**
+- **qwen3:0.6b** - Ultra-fast, good for most questions
+- **llama3.2** - Slower but more detailed
+- **auto** - Picks the best available model
+
+---
+
+## 🧮 **Synthesis** vs **Exploration** - Two ways to get answers
+
+### 🚀 **Synthesis Mode** (Fast & Consistent)
+**What it does:** Quick, factual answers about your code
+**Best for:** "What does this function do?" "Where is authentication handled?" "How does the database connection work?"
+**Speed:** Very fast (no "thinking" overhead)
+
+### 🧠 **Exploration Mode** (Deep & Interactive)
+**What it does:** Detailed analysis with reasoning, remembers conversation
+**Best for:** "Why is this function slow?" "What are the security issues here?" "How would I add a new feature?"
+**Features:** Shows its reasoning process, you can ask follow-up questions
+
+---
+
+## ⚡ **Streaming** - Handling huge files without crashing
+**What it is:** Processing large files in smaller batches instead of all at once
+
+**Simple explanation:** Imagine trying to eat an entire cake at once vs. eating it slice by slice. Streaming is like eating slice by slice - your computer won't choke on huge files.
+
+**When it kicks in:** Files larger than 1MB (that's about 25,000 lines of code)
+
+---
+
+## 🏷️ **Semantic** vs **Fixed** Chunking
+**Semantic chunking (RECOMMENDED):** Smart splitting that respects code structure
+- Keeps functions together
+- Keeps classes together  
+- Respects natural code boundaries
+
+**Fixed chunking:** Simple splitting that just cuts at size limits
+- Faster processing
+- Might cut functions in half
+- Less intelligent but more predictable
+
+**For beginners:** Always use semantic chunking unless you have a specific reason not to.
+
+---
+
+## ❓ **Common Questions**
+
+**Q: Do I need to understand embeddings to use this?**  
+A: Nope! Just know they help find similar code. The system handles all the technical details.
+
+**Q: What's a good similarity threshold for beginners?**  
+A: Start with 0.1. If you get too many results, try 0.2. If you get too few, try 0.05.
+
+**Q: Should I enable query expansion?**  
+A: For learning new codebases: YES. For quick specific searches: NO. The TUI enables it automatically when helpful.
+
+**Q: Which embedding method should I choose?**  
+A: Use "auto" - it tries the best option and falls back gracefully if needed.
+
+**Q: What if I don't have Ollama installed?**  
+A: No problem! The system will automatically fall back to other methods that work without any additional software.
+
+---
+
+## 🚀 **Quick Start Recommendations**
+
+**For absolute beginners:**
+1. Keep all default settings
+2. Use the TUI interface to start
+3. Try simple searches like "user login" or "database connection"
+4. Gradually try the CLI commands as you get comfortable
+
+**For faster results:**
+- Set `similarity_threshold: 0.2`
+- Set `expand_queries: false`
+- Use synthesis mode instead of exploration
+
+**For learning new codebases:**
+- Set `expand_queries: true`
+- Use exploration mode
+- Ask "why" and "how" questions
+
+---
+
+**Remember:** This is a learning tool! Don't be afraid to experiment with settings and see what works best for your projects. The beauty of FSS-Mini-RAG is that it's designed to be beginner-friendly while still being powerful.
--- a/docs/TROUBLESHOOTING.md
+++ b/docs/TROUBLESHOOTING.md
@ -0,0 +1,460 @@
+# 🛠️ Troubleshooting Guide - Common Issues & Solutions
+
+*Having problems? You're not alone! Here are solutions to the most common issues beginners encounter.*
+
+---
+
+## 🚀 Installation & Setup Issues
+
+### ❌ "Command not found: ollama"
+**Problem:** The system can't find Ollama  
+**Solution:** 
+```bash
+# Install Ollama
+curl -fsSL https://ollama.ai/install.sh | sh
+# Or on Mac: brew install ollama
+# Start Ollama
+ollama serve
+```
+**Alternative:** Use the system without Ollama - it will automatically fall back to other embedding methods.
+
+### ❌ "Permission denied" when running scripts
+**Problem:** Script files aren't executable  
+**Solution:**
+```bash
+chmod +x rag-mini.py rag-tui.py install_mini_rag.sh
+# Or run with python directly:
+python3 rag-mini.py --help
+```
+
+### ❌ "Module not found" or import errors
+**Problem:** Python dependencies not installed  
+**Solution:**
+```bash
+# Install dependencies
+pip3 install -r requirements.txt
+# If that fails, try:
+pip3 install --user -r requirements.txt
+```
+
+### ❌ Installation script fails
+**Problem:** `./install_mini_rag.sh` doesn't work  
+**Solution:**
+```bash
+# Make it executable first
+chmod +x install_mini_rag.sh
+# Then run
+./install_mini_rag.sh
+# Or install manually:
+pip3 install -r requirements.txt
+python3 -c "import claude_rag; print('✅ Installation successful')"
+```
+
+---
+
+## 🔍 Search & Results Issues
+
+### ❌ "No results found" for everything
+**Problem:** Search isn't finding anything  
+**Diagnosis & Solutions:**
+
+1. **Check if project is indexed:**
+   ```bash
+   ./rag-mini status /path/to/project
+   # If not indexed:
+   ./rag-mini index /path/to/project
+   ```
+
+2. **Lower similarity threshold:**
+   - Edit config file, change `similarity_threshold: 0.05`
+   - Or try: `./rag-mini search /path/to/project "query" --threshold 0.05`
+
+3. **Try broader search terms:**
+   - Instead of: "getUserById" 
+   - Try: "user function" or "get user"
+
+4. **Enable query expansion:**
+   - Edit config: `expand_queries: true`
+   - Or use TUI which enables it automatically
+
+### ❌ Search results are irrelevant/weird
+**Problem:** Getting results that don't match your search  
+**Solutions:**
+
+1. **Increase similarity threshold:**
+   ```yaml
+   search:
+     similarity_threshold: 0.3  # Higher = more picky
+   ```
+
+2. **Use more specific terms:**
+   - Instead of: "function"
+   - Try: "login function" or "authentication method"
+
+3. **Check BM25 setting:**
+   ```yaml
+   search:
+     enable_bm25: true  # Helps find exact word matches
+   ```
+
+### ❌ Search is too slow
+**Problem:** Takes too long to get results  
+**Solutions:**
+
+1. **Disable query expansion:**
+   ```yaml
+   search:
+     expand_queries: false
+   ```
+
+2. **Reduce result limit:**
+   ```yaml
+   search:
+     default_limit: 5  # Instead of 10
+   ```
+
+3. **Use faster embedding method:**
+   ```yaml
+   embedding:
+     preferred_method: hash  # Fastest but lower quality
+   ```
+
+4. **Smaller batch size:**
+   ```yaml
+   embedding:
+     batch_size: 16  # Instead of 32
+   ```
+
+---
+
+## 🤖 AI/LLM Issues
+
+### ❌ "LLM synthesis unavailable" 
+**Problem:** AI explanations aren't working  
+**Solutions:**
+
+1. **Check Ollama is running:**
+   ```bash
+   # In one terminal:
+   ollama serve
+   # In another:
+   ollama list  # Should show installed models
+   ```
+
+2. **Install a model:**
+   ```bash
+   ollama pull qwen3:0.6b    # Fast, small model
+   # Or: ollama pull llama3.2  # Larger but better
+   ```
+
+3. **Test connection:**
+   ```bash
+   curl http://localhost:11434/api/tags
+   # Should return JSON with model list
+   ```
+
+### ❌ AI gives weird/wrong answers
+**Problem:** LLM responses don't make sense  
+**Solutions:**
+
+1. **Lower temperature:**
+   ```yaml
+   llm:
+     synthesis_temperature: 0.1  # More factual, less creative
+   ```
+
+2. **Try different model:**
+   ```bash
+   ollama pull qwen3:1.7b  # Good balance of speed/quality
+   ```
+
+3. **Use synthesis mode instead of exploration:**
+   ```bash
+   ./rag-mini search /path "query" --synthesize
+   # Instead of: ./rag-mini explore /path
+   ```
+
+---
+
+## 💾 Memory & Performance Issues
+
+### ❌ "Out of memory" or computer freezes during indexing
+**Problem:** System runs out of RAM  
+**Solutions:**
+
+1. **Reduce batch size:**
+   ```yaml
+   embedding:
+     batch_size: 8  # Much smaller batches
+   ```
+
+2. **Lower streaming threshold:**
+   ```yaml
+   streaming:
+     threshold_bytes: 512000  # 512KB instead of 1MB
+   ```
+
+3. **Index smaller projects first:**
+   ```bash
+   # Exclude large directories
+   ./rag-mini index /path/to/project --exclude "node_modules/**,dist/**"
+   ```
+
+4. **Use hash embeddings:**
+   ```yaml
+   embedding:
+     preferred_method: hash  # Much less memory
+   ```
+
+### ❌ Indexing is extremely slow
+**Problem:** Taking forever to index project  
+**Solutions:**
+
+1. **Exclude unnecessary files:**
+   ```yaml
+   files:
+     exclude_patterns:
+       - "node_modules/**"
+       - ".git/**" 
+       - "*.log"
+       - "build/**"
+       - "*.min.js"  # Minified files
+   ```
+
+2. **Increase minimum file size:**
+   ```yaml
+   files:
+     min_file_size: 200  # Skip tiny files
+   ```
+
+3. **Use simpler chunking:**
+   ```yaml
+   chunking:
+     strategy: fixed  # Faster than semantic
+   ```
+
+4. **More workers (if you have good CPU):**
+   ```bash
+   ./rag-mini index /path/to/project --workers 8
+   ```
+
+---
+
+## ⚙️ Configuration Issues
+
+### ❌ "Invalid configuration" errors
+**Problem:** Config file has errors  
+**Solutions:**
+
+1. **Check YAML syntax:**
+   ```bash
+   python3 -c "import yaml; yaml.safe_load(open('config.yaml'))"
+   ```
+
+2. **Copy from working example:**
+   ```bash
+   cp examples/config.yaml .claude-rag/config.yaml
+   ```
+
+3. **Reset to defaults:**
+   ```bash
+   rm .claude-rag/config.yaml
+   # System will recreate with defaults
+   ```
+
+### ❌ Changes to config aren't taking effect
+**Problem:** Modified settings don't work  
+**Solutions:**
+
+1. **Restart TUI/CLI:**
+   - Configuration is loaded at startup
+   - Exit and restart the interface
+
+2. **Check config location:**
+   ```bash
+   # Project-specific config:
+   /path/to/project/.claude-rag/config.yaml
+   # Global config:
+   ~/.claude-rag/config.yaml
+   ```
+
+3. **Force re-index after config changes:**
+   ```bash
+   ./rag-mini index /path/to/project --force
+   ```
+
+---
+
+## 🖥️ Interface Issues
+
+### ❌ TUI looks broken/garbled
+**Problem:** Text interface isn't displaying correctly  
+**Solutions:**
+
+1. **Try different terminal:**
+   ```bash
+   # Instead of basic terminal, try:
+   # - iTerm2 (Mac)
+   # - Windows Terminal (Windows)  
+   # - GNOME Terminal (Linux)
+   ```
+
+2. **Use CLI directly:**
+   ```bash
+   ./rag-mini --help  # Skip TUI entirely
+   ```
+
+3. **Check terminal size:**
+   ```bash
+   # Make terminal window larger (TUI needs space)
+   # At least 80x24 characters
+   ```
+
+### ❌ "Keyboard interrupt" or TUI crashes
+**Problem:** Interface stops responding  
+**Solutions:**
+
+1. **Use Ctrl+C to exit cleanly:**
+   - Don't force-quit if possible
+
+2. **Check for conflicting processes:**
+   ```bash
+   ps aux | grep rag-tui
+   # Kill any stuck processes
+   ```
+
+3. **Use CLI as fallback:**
+   ```bash
+   ./rag-mini search /path/to/project "your query"
+   ```
+
+---
+
+## 📁 File & Path Issues
+
+### ❌ "Project not found" or "Permission denied"
+**Problem:** Can't access project directory  
+**Solutions:**
+
+1. **Check path exists:**
+   ```bash
+   ls -la /path/to/project
+   ```
+
+2. **Check permissions:**
+   ```bash
+   # Make sure you can read the directory
+   chmod -R +r /path/to/project
+   ```
+
+3. **Use absolute paths:**
+   ```bash
+   # Instead of: ./rag-mini index ../my-project
+   # Use: ./rag-mini index /full/path/to/my-project
+   ```
+
+### ❌ "No files found to index"
+**Problem:** System doesn't see any files  
+**Solutions:**
+
+1. **Check include patterns:**
+   ```yaml
+   files:
+     include_patterns:
+       - "**/*.py"     # Only Python files
+       - "**/*.js"     # Add JavaScript
+       - "**/*.md"     # Add Markdown
+   ```
+
+2. **Check exclude patterns:**
+   ```yaml
+   files:
+     exclude_patterns: []  # Remove all exclusions temporarily
+   ```
+
+3. **Lower minimum file size:**
+   ```yaml
+   files:
+     min_file_size: 10  # Instead of 50
+   ```
+
+---
+
+## 🔍 Quick Diagnostic Commands
+
+**Check system status:**
+```bash
+./rag-mini status /path/to/project
+```
+
+**Test embeddings:**
+```bash
+python3 -c "from claude_rag.ollama_embeddings import OllamaEmbedder; e=OllamaEmbedder(); print(e.get_embedding_info())"
+```
+
+**Verify installation:**
+```bash
+python3 -c "import claude_rag; print('✅ RAG system installed')"
+```
+
+**Test Ollama connection:**
+```bash
+curl -s http://localhost:11434/api/tags | python3 -m json.tool
+```
+
+**Check disk space:**
+```bash
+df -h .claude-rag/  # Make sure you have space for index
+```
+
+---
+
+## 🆘 When All Else Fails
+
+1. **Start fresh:**
+   ```bash
+   rm -rf .claude-rag/
+   ./rag-mini index /path/to/project
+   ```
+
+2. **Use minimal config:**
+   ```yaml
+   # Simplest possible config:
+   chunking:
+     strategy: fixed
+   embedding:  
+     preferred_method: auto
+   search:
+     expand_queries: false
+   ```
+
+3. **Try a tiny test project:**
+   ```bash
+   mkdir test-project
+   echo "def hello(): print('world')" > test-project/test.py
+   ./rag-mini index test-project
+   ./rag-mini search test-project "hello function"
+   ```
+
+4. **Get help:**
+   - Check the main README.md
+   - Look at examples/ directory
+   - Try the basic_usage.py example
+
+---
+
+## 💡 Prevention Tips
+
+**For beginners:**
+- Start with default settings
+- Use the TUI interface first
+- Test with small projects initially
+- Keep Ollama running in background
+
+**For better results:**
+- Be specific in search queries
+- Use the glossary to understand terms
+- Experiment with config settings on test projects first
+- Use synthesis mode for quick answers, exploration for learning
+
+**Remember:** This is a learning tool! Don't be afraid to experiment and try different settings. The worst thing that can happen is you delete the `.claude-rag` directory and start over. 🚀
--- a/examples/config-beginner.yaml
+++ b/examples/config-beginner.yaml
@ -0,0 +1,72 @@
+# 🚀 BEGINNER CONFIG - Simple & Reliable
+# Perfect for newcomers who want everything to "just work"
+# Copy this to your project: cp examples/config-beginner.yaml /path/to/project/.claude-rag/config.yaml
+
+#═══════════════════════════════════════════════════════════════════════
+# ✨ BEGINNER-FRIENDLY SETTINGS - No overwhelming options!
+#═══════════════════════════════════════════════════════════════════════
+
+# 📝 How to split your code files (keep it simple)
+chunking:
+  max_size: 2000          # Good size for most code (about 50 lines)
+  min_size: 150           # Skip tiny fragments
+  strategy: semantic      # Smart splitting (respects functions/classes)
+
+# 🌊 Handle large files without crashing
+streaming:
+  enabled: true           # Always keep this on
+  threshold_bytes: 1048576  # 1MB - good for most computers
+
+# 📁 Which files to include
+files:
+  min_file_size: 50       # Skip empty/tiny files
+  
+  # 🚫 Skip these folders (saves time and storage)
+  exclude_patterns:
+    - "node_modules/**"   # JavaScript packages
+    - ".git/**"          # Git history
+    - "__pycache__/**"   # Python cache
+    - "*.pyc"           # Python bytecode
+    - ".venv/**"        # Python virtual environments
+    - "build/**"        # Build artifacts
+    - "dist/**"         # Distribution files
+  
+  include_patterns:
+    - "**/*"             # Everything else
+
+# 🧠 Embeddings (the "AI fingerprints" of your code)
+embedding:
+  preferred_method: auto   # Try best method, fall back if needed - SAFEST
+  batch_size: 32          # Good balance of speed and memory usage
+
+# 🔍 Search behavior  
+search:
+  default_limit: 10       # Show 10 results (good starting point)
+  enable_bm25: true       # Find exact word matches too
+  similarity_threshold: 0.1  # Pretty permissive (shows more results)
+  expand_queries: false   # Keep it simple for now
+
+# 🤖 AI explanations (optional but helpful)
+llm:
+  synthesis_model: auto         # Pick best available model
+  enable_synthesis: false       # Turn on manually with --synthesize
+  synthesis_temperature: 0.3    # Factual answers
+  cpu_optimized: true          # Good for computers without fancy graphics cards
+  enable_thinking: true        # Shows reasoning (great for learning!)
+  max_expansion_terms: 6       # Keep expansions focused
+
+#═══════════════════════════════════════════════════════════════════════
+# 🎯 WHAT THIS CONFIG DOES:
+# 
+# ✅ Works reliably across different systems
+# ✅ Good performance on modest hardware  
+# ✅ Balanced search results (not too few, not too many)
+# ✅ Safe defaults that won't crash your computer
+# ✅ AI features available but not overwhelming
+# 
+# 🚀 TO GET STARTED:
+# 1. Copy this file to your project: .claude-rag/config.yaml
+# 2. Index your project: ./rag-mini index /path/to/project
+# 3. Search: ./rag-mini search /path/to/project "your query"
+# 4. Try AI: ./rag-mini search /path/to/project "your query" --synthesize
+#═══════════════════════════════════════════════════════════════════════
--- a/examples/config-fast.yaml
+++ b/examples/config-fast.yaml
@ -0,0 +1,105 @@
+# ⚡ FAST CONFIG - Maximum Speed
+# When you need quick results and don't mind slightly lower quality
+# Perfect for: large projects, frequent searches, older computers
+
+#═══════════════════════════════════════════════════════════════════════
+# 🚀 SPEED-OPTIMIZED SETTINGS - Everything tuned for performance!
+#═══════════════════════════════════════════════════════════════════════
+
+# 📝 Chunking optimized for speed
+chunking:
+  max_size: 1500          # Smaller chunks = faster processing
+  min_size: 100           # More aggressive minimum
+  strategy: fixed         # Simple splitting (faster than semantic)
+
+# 🌊 More aggressive streaming for memory efficiency
+streaming:
+  enabled: true
+  threshold_bytes: 512000  # 512KB - process big files in smaller chunks
+
+# 📁 File filtering optimized for speed
+files:
+  min_file_size: 100      # Skip more tiny files
+  
+  # 🚫 Aggressive exclusions for speed
+  exclude_patterns:
+    - "node_modules/**"
+    - ".git/**"
+    - "__pycache__/**"
+    - "*.pyc"
+    - ".venv/**"
+    - "venv/**"  
+    - "build/**"
+    - "dist/**"
+    - "*.min.js"          # Skip minified files
+    - "*.min.css"         # Skip minified CSS
+    - "*.log"             # Skip log files
+    - "*.tmp"             # Skip temp files
+    - "target/**"         # Rust/Java build dirs
+    - ".next/**"          # Next.js build dir
+    - ".nuxt/**"          # Nuxt build dir
+  
+  include_patterns:
+    - "**/*.py"           # Focus on common code files only
+    - "**/*.js" 
+    - "**/*.ts"
+    - "**/*.jsx"
+    - "**/*.tsx"
+    - "**/*.java"
+    - "**/*.cpp"
+    - "**/*.c"
+    - "**/*.h"
+    - "**/*.rs"
+    - "**/*.go"
+    - "**/*.php"
+    - "**/*.rb"
+    - "**/*.md"
+
+# 🧠 Fastest embedding method
+embedding:
+  preferred_method: hash   # Instant embeddings (lower quality but very fast)
+  batch_size: 64          # Larger batches for efficiency
+
+# 🔍 Search optimized for speed
+search:
+  default_limit: 5        # Fewer results = faster display
+  enable_bm25: false      # Skip keyword matching for speed
+  similarity_threshold: 0.2  # Higher threshold = fewer results to process
+  expand_queries: false   # No query expansion (much faster)
+
+# 🤖 Minimal AI for speed
+llm:
+  synthesis_model: qwen3:0.6b    # Smallest/fastest model
+  enable_synthesis: false        # Only use when explicitly requested
+  synthesis_temperature: 0.1     # Fast, factual responses
+  cpu_optimized: true           # Use lightweight models
+  enable_thinking: false        # Skip thinking process for speed
+  max_expansion_terms: 4        # Shorter expansions
+
+#═══════════════════════════════════════════════════════════════════════
+# ⚡ WHAT THIS CONFIG PRIORITIZES:
+# 
+# 🚀 Indexing speed - get up and running quickly
+# 🚀 Search speed - results in milliseconds  
+# 🚀 Memory efficiency - won't slow down your computer
+# 🚀 CPU efficiency - good for older/slower machines
+# 🚀 Storage efficiency - smaller index files
+# 
+# ⚖️ TRADE-OFFS:
+# ⚠️  Lower search quality (might miss some relevant results)
+# ⚠️  Less context in results (smaller chunks)
+# ⚠️  No query expansion (might need more specific search terms)
+# ⚠️  Basic embeddings (hash-based, not semantic)
+# 
+# 🎯 PERFECT FOR:
+# • Large codebases (>10k files)
+# • Older computers with limited resources
+# • When you know exactly what you're looking for
+# • Frequent, quick lookups
+# • CI/CD environments where speed matters
+# 
+# 🚀 TO USE THIS CONFIG:
+# 1. Copy to project: cp examples/config-fast.yaml .claude-rag/config.yaml
+# 2. Index: ./rag-mini index /path/to/project
+# 3. Enjoy lightning-fast searches! ⚡
+#═══════════════════════════════════════════════════════════════════════
--- a/examples/config-quality.yaml
+++ b/examples/config-quality.yaml
@ -0,0 +1,111 @@
+# 💎 QUALITY CONFIG - Best Possible Results  
+# When you want the highest quality search and AI responses
+# Perfect for: learning new codebases, research, complex analysis
+
+#═══════════════════════════════════════════════════════════════════════
+# 🎯 QUALITY-OPTIMIZED SETTINGS - Everything tuned for best results!
+#═══════════════════════════════════════════════════════════════════════
+
+# 📝 Chunking for maximum context and quality
+chunking:
+  max_size: 3000          # Larger chunks = more context per result
+  min_size: 200           # Ensure substantial content per chunk
+  strategy: semantic      # Smart splitting that respects code structure
+
+# 🌊 Conservative streaming (favor quality over speed)
+streaming:
+  enabled: true
+  threshold_bytes: 2097152  # 2MB - less aggressive chunking
+
+# 📁 Comprehensive file inclusion
+files:
+  min_file_size: 20       # Include even small files (might contain important info)
+  
+  # 🎯 Minimal exclusions (include more content)
+  exclude_patterns:
+    - "node_modules/**"   # Still skip these (too much noise)
+    - ".git/**"          # Git history not useful for code search
+    - "__pycache__/**"   # Python bytecode
+    - "*.pyc"
+    - ".venv/**"
+    - "build/**"         # Compiled artifacts
+    - "dist/**"
+    # Note: We keep logs, docs, configs that might have useful context
+  
+  include_patterns:
+    - "**/*"             # Include everything not explicitly excluded
+
+# 🧠 Best embedding quality
+embedding:
+  preferred_method: ollama  # Highest quality embeddings (needs Ollama)
+  ollama_model: nomic-embed-text  # Excellent code understanding
+  ml_model: sentence-transformers/all-MiniLM-L6-v2  # Good fallback
+  batch_size: 16          # Smaller batches for stability
+
+# 🔍 Search optimized for comprehensive results
+search:
+  default_limit: 15       # More results to choose from
+  enable_bm25: true       # Use both semantic and keyword matching
+  similarity_threshold: 0.05  # Very permissive (show more possibilities)
+  expand_queries: true    # Automatic query expansion for better recall
+
+# 🤖 High-quality AI analysis
+llm:
+  synthesis_model: auto         # Use best available model
+  enable_synthesis: true        # AI explanations by default
+  synthesis_temperature: 0.4    # Good balance of accuracy and insight
+  cpu_optimized: false         # Use powerful models if available
+  enable_thinking: true        # Show detailed reasoning process
+  max_expansion_terms: 10      # Comprehensive query expansion
+
+#═══════════════════════════════════════════════════════════════════════
+# 💎 WHAT THIS CONFIG MAXIMIZES:
+# 
+# 🎯 Search comprehensiveness - find everything relevant
+# 🎯 Result context - larger chunks with more information
+# 🎯 AI explanation quality - detailed, thoughtful analysis
+# 🎯 Query understanding - automatic expansion and enhancement
+# 🎯 Semantic accuracy - best embedding models available
+# 
+# ⚖️ TRADE-OFFS:
+# ⏳ Slower indexing (larger chunks, better embeddings)
+# ⏳ Slower searching (query expansion, more results)
+# 💾 More storage space (larger index, more files included)
+# 🧠 More memory usage (larger batches, bigger models)
+# ⚡ Higher CPU/GPU usage (better models)
+# 
+# 🎯 PERFECT FOR:
+# • Learning new, complex codebases
+# • Research and analysis tasks  
+# • When you need to understand WHY code works a certain way
+# • Finding subtle connections and patterns
+# • Code review and security analysis
+# • Academic or professional research
+# 
+# 💻 REQUIREMENTS:
+# • Ollama installed and running (ollama serve)
+# • At least one language model (ollama pull qwen3:1.7b)
+# • Decent computer specs (4GB+ RAM recommended)
+# • Patience for thorough analysis 😊
+# 
+# 🚀 TO USE THIS CONFIG:
+# 1. Install Ollama: curl -fsSL https://ollama.ai/install.sh | sh
+# 2. Start Ollama: ollama serve
+# 3. Install a model: ollama pull qwen3:1.7b
+# 4. Copy config: cp examples/config-quality.yaml .claude-rag/config.yaml  
+# 5. Index project: ./rag-mini index /path/to/project
+# 6. Enjoy comprehensive analysis: ./rag-mini explore /path/to/project
+#═══════════════════════════════════════════════════════════════════════
+
+# 🧪 ADVANCED QUALITY TUNING (optional):
+# 
+# For even better results, try these model combinations:
+# • ollama pull nomic-embed-text:latest  (best embeddings)
+# • ollama pull qwen3:1.7b              (good general model)
+# • ollama pull llama3.2                (excellent for analysis)
+# 
+# Or adjust these settings for your specific needs:
+# • similarity_threshold: 0.3   (more selective results)
+# • max_size: 4000             (even more context per result)  
+# • enable_thinking: false     (hide reasoning, show just answers)
+# • synthesis_temperature: 0.2 (more conservative AI responses)
--- a/examples/config.yaml
+++ b/examples/config.yaml
@ -1,55 +1,145 @@
-# FSS-Mini-RAG Configuration
-# Edit this file to customize indexing and search behavior
-# See docs/GETTING_STARTED.md for detailed explanations
+# FSS-Mini-RAG Configuration - Beginner-Friendly Edition
+# 
+# 🎯 QUICK START PRESETS:
+# - Keep defaults for most cases (recommended for beginners)  
+# - For large projects (>10k files): increase max_size to 3000
+# - For faster search: set similarity_threshold to 0.2
+# - For better results: enable expand_queries (but slower search)
+
+#═════════════════════════════════════════════════════════════════════════════════
+# 📝 CHUNKING: How we break up your code files for searching
+#═════════════════════════════════════════════════════════════════════════════════
+# Think of chunks as "bite-sized pieces" of your code that the system can search through.
+# Smaller chunks = more precise results but might miss context
+# Larger chunks = more context but might be less precise

-# Text chunking settings
 chunking:
-  max_size: 2000      # Maximum characters per chunk
-  min_size: 150       # Minimum characters per chunk
-  strategy: semantic  # 'semantic' (language-aware) or 'fixed'
+  max_size: 2000           # Maximum characters per chunk (2000 = ~50 lines of code)
+                          # 💡 ADJUST IF: Getting results that are too narrow/broad
+                          # Small projects: 1500 | Large projects: 3000 | Detailed analysis: 4000
+  
+  min_size: 150           # Minimum characters per chunk (150 = ~4-5 lines)
+                          # ⚠️ Don't go below 100 or you'll get fragments
+  
+  strategy: semantic      # How to split files into chunks
+                          # 'semantic': Smart splitting (respects functions, classes) - RECOMMENDED
+                          # 'fixed': Simple splitting (just cuts at size limits) - faster but less intelligent
+
+#═════════════════════════════════════════════════════════════════════════════════
+# 🌊 STREAMING: How we handle really big files  
+#═════════════════════════════════════════════════════════════════════════════════
+# Large files (like minified CSS or huge data files) get processed in smaller batches
+# to prevent your computer from running out of memory

-# Large file streaming settings
 streaming:
-  enabled: true
-  threshold_bytes: 1048576  # Files larger than this use streaming (1MB)
+  enabled: true                    # Always keep this true - prevents memory crashes
+  threshold_bytes: 1048576        # Files larger than 1MB use streaming (1MB = 1048576 bytes)
+                                  # 💡 ADJUST IF: Low memory computer = 512000 | High memory = 2097152
+
+#═════════════════════════════════════════════════════════════════════════════════
+# 📁 FILES: Which files to include/exclude from indexing
+#═════════════════════════════════════════════════════════════════════════════════

-# File processing settings
 files:
-  min_file_size: 50        # Skip files smaller than this
+  min_file_size: 50              # Skip tiny files (50 bytes = ~1 line of code)
+                                 # 💡 REASON: Tiny files usually aren't useful for searching
+  
+  # 🚫 EXCLUDE PATTERNS: Files/folders we always skip (saves time and space)
  exclude_patterns:
-    - "node_modules/**"
-    - ".git/**"
-    - "__pycache__/**"
-    - "*.pyc"
-    - ".venv/**"
-    - "venv/**"
-    - "build/**"
-    - "dist/**"
+    - "node_modules/**"          # JavaScript dependencies (huge and not your code)
+    - ".git/**"                  # Git history (not useful for code search)
+    - "__pycache__/**"           # Python bytecode (generated files)
+    - "*.pyc"                    # More Python bytecode
+    - ".venv/**"                 # Python virtual environments
+    - "venv/**"                  # More virtual environments  
+    - "build/**"                 # Compiled output (not source code)
+    - "dist/**"                  # Distribution files
+    # 💡 ADD YOUR OWN: Add patterns like "logs/**" or "*.tmp"
+  
  include_patterns:
-    - "**/*"                  # Include all files by default
+    - "**/*"                     # Include everything else by default
+                                 # 💡 CUSTOMIZE: Could be ["**/*.py", "**/*.js"] for only Python/JS
+
+#═════════════════════════════════════════════════════════════════════════════════
+# 🧠 EMBEDDINGS: How we turn your code into searchable "vectors" 
+#═════════════════════════════════════════════════════════════════════════════════
+# Embeddings are like "fingerprints" of your code that help find similar content
+# Don't worry about the technical details - the defaults work great!

-# Embedding generation settings
 embedding:
-  preferred_method: ollama     # 'ollama', 'ml', 'hash', or 'auto'
-  ollama_model: nomic-embed-text
-  ollama_host: localhost:11434
-  ml_model: sentence-transformers/all-MiniLM-L6-v2
-  batch_size: 32               # Embeddings processed per batch
+  preferred_method: ollama         # Which system to use for creating embeddings
+                                  # 'ollama': Best quality (needs Ollama installed) - RECOMMENDED
+                                  # 'ml': Good quality (downloads models automatically)  
+                                  # 'hash': Basic quality (works without internet)
+                                  # 'auto': Try ollama, fall back to ml, then hash - SAFEST CHOICE
+  
+  ollama_model: nomic-embed-text   # Which Ollama model to use (this one is excellent)
+  ollama_host: localhost:11434     # Where to find Ollama (don't change unless you know why)
+  
+  ml_model: sentence-transformers/all-MiniLM-L6-v2  # Backup model (small and fast)
+  
+  batch_size: 32                   # How many chunks to process at once
+                                  # 💡 ADJUST IF: Slow computer = 16 | Fast computer = 64
+
+#═════════════════════════════════════════════════════════════════════════════════
+# 🔍 SEARCH: How the system finds and ranks results
+#═════════════════════════════════════════════════════════════════════════════════

-# Search behavior settings
 search:
-  default_limit: 10           # Default number of results
-  enable_bm25: true             # Enable keyword matching boost
-  similarity_threshold: 0.1        # Minimum similarity score
-  expand_queries: false         # Enable automatic query expansion (TUI auto-enables)
+  default_limit: 10                # How many search results to show by default
+                                  # 💡 MORE RESULTS: 15-20 | FASTER SEARCH: 5-8
+  
+  enable_bm25: true               # Also use keyword matching (like Google search)
+                                  # 💡 EFFECT: Finds exact word matches even if semantically different
+                                  # Keep true unless getting too many irrelevant results
+  
+  similarity_threshold: 0.1       # Minimum "similarity score" to show results (0.0-1.0)
+                                  # 💡 HIGHER = fewer but more relevant results
+                                  # Picky: 0.3 | Balanced: 0.1 | Show everything: 0.05
+  
+  expand_queries: false           # Automatically add related search terms
+                                  # 💡 EFFECT: "auth" becomes "auth authentication login user"
+                                  # Better results but slower - TUI enables this automatically
+
+#═════════════════════════════════════════════════════════════════════════════════
+# 🤖 LLM: Settings for the AI that explains and synthesizes results
+#═════════════════════════════════════════════════════════════════════════════════
+# The LLM (Large Language Model) reads your search results and explains them in plain English

-# LLM synthesis and query expansion settings
 llm:
-  ollama_host: localhost:11434
-  synthesis_model: auto    # 'auto' prefers qwen3:0.6b for CPU efficiency
-  expansion_model: auto     # Usually same as synthesis_model  
-  max_expansion_terms: 8        # Maximum terms to add to queries
-  enable_synthesis: false       # Enable synthesis by default
-  synthesis_temperature: 0.3      # LLM temperature for analysis
-  cpu_optimized: true      # Prefer ultra-lightweight models for CPU-only systems
-  enable_thinking: true    # Enable thinking mode for Qwen3 models (production: true, testing: false)
+  ollama_host: localhost:11434     # Where to find Ollama (don't change unless you know why)
+  
+  synthesis_model: auto           # Which AI model to use for explanations
+                                  # 'auto': Picks best available model - RECOMMENDED
+                                  # 'qwen3:0.6b': Ultra-fast, good for CPU-only computers
+                                  # 'llama3.2': Slower but more detailed explanations
+  
+  expansion_model: auto           # Model for query expansion (usually same as synthesis)
+  
+  max_expansion_terms: 8          # How many extra terms to add to expanded queries
+                                  # 💡 MORE TERMS = broader search but potentially less focused
+  
+  enable_synthesis: false         # Turn on AI explanations by default
+                                  # 💡 SET TO TRUE: If you want every search to include explanations
+                                  # (You can always use --synthesize flag when you want it)
+  
+  synthesis_temperature: 0.3      # How "creative" the AI explanations are (0.0-1.0)
+                                  # 💡 Lower = more factual | Higher = more creative
+                                  # Code analysis: 0.1-0.3 | Creative writing: 0.7-0.9
+  
+  cpu_optimized: true            # Prefer lightweight models for computers without graphics cards
+                                  # 💡 DISABLE IF: You have a powerful GPU and want highest quality
+  
+  enable_thinking: true          # Let AI "think out loud" for complex questions
+                                  # 💡 EFFECT: Shows reasoning process, better for learning/debugging
+
+#═════════════════════════════════════════════════════════════════════════════════
+# 🎯 QUICK TROUBLESHOOTING:
+# 
+# Search returns nothing? → Lower similarity_threshold to 0.05
+# Search too slow? → Set expand_queries: false and batch_size: 16  
+# Results not detailed enough? → Increase max_size to 3000
+# Getting weird fragments? → Check min_size is at least 150
+# AI not working? → Make sure Ollama is running: `ollama serve`
+# Out of memory errors? → Decrease batch_size to 16 and lower threshold_bytes
+#═════════════════════════════════════════════════════════════════════════════════
--- a/rag-mini.py
+++ b/rag-mini.py
@ -70,7 +70,16 @@ def index_project(project_path: Path, force: bool = False):
            
    except Exception as e:
        print(f"❌ Indexing failed: {e}")
-        print(f"   Use --verbose for details")
+        print()
+        print("🔧 Common solutions:")
+        print("   • Check if path exists and you have read permissions")
+        print("   • Ensure Python dependencies are installed: pip install -r requirements.txt")
+        print("   • Try with smaller project first to test setup")
+        print("   • Check available disk space for index files")
+        print()
+        print("📚 For detailed help:")
+        print(f"   ./rag-mini index {project_path} --verbose")
+        print("   Or see: docs/TROUBLESHOOTING.md")
        sys.exit(1)

 def search_project(project_path: Path, query: str, limit: int = 10, synthesize: bool = False):
@ -89,10 +98,18 @@ def search_project(project_path: Path, query: str, limit: int = 10, synthesize:
        
        if not results:
            print("❌ No results found")
-            print("\n💡 Try:")
-            print("   • Broader search terms")
-            print("   • Check spelling")
-            print("   • Use concepts: \"authentication\" instead of \"auth_handler\"")
+            print()
+            print("🔧 Quick fixes to try:")
+            print("   • Use broader terms: \"login\" instead of \"authenticate_user_session\"")
+            print("   • Try concepts: \"database query\" instead of specific function names")
+            print("   • Check spelling and try simpler words")
+            print("   • Search for file types: \"python class\" or \"javascript function\"")
+            print()
+            print("⚙️ Configuration adjustments:")
+            print(f"   • Lower threshold: ./rag-mini search {project_path} \"{query}\" --threshold 0.05")
+            print("   • More results: add --limit 20")
+            print()
+            print("📚 Need help? See: docs/TROUBLESHOOTING.md")
            return
            
        print(f"✅ Found {len(results)} results:")
@ -154,10 +171,23 @@ def search_project(project_path: Path, query: str, limit: int = 10, synthesize:
            
    except Exception as e:
        print(f"❌ Search failed: {e}")
+        print()
+        
        if "not indexed" in str(e).lower():
-            print(f"   Run: rag-mini index {project_path}")
+            print("🔧 Solution:")
+            print(f"   ./rag-mini index {project_path}")
+            print()
        else:
-            print("   Use --verbose for details")
+            print("🔧 Common solutions:")
+            print("   • Check project path exists and is readable")
+            print("   • Verify index isn't corrupted: delete .claude-rag/ and re-index")
+            print("   • Try with a different project to test setup")
+            print("   • Check available memory and disk space")
+            print()
+            print("📚 Get detailed error info:")
+            print(f"   ./rag-mini search {project_path} \"{query}\" --verbose")
+            print("   Or see: docs/TROUBLESHOOTING.md")
+            print()
        sys.exit(1)

 def status_check(project_path: Path):