🎓 Complete beginner-friendly polish with production reliability

 BEGINNER-FRIENDLY ENHANCEMENTS:
- Add comprehensive glossary explaining RAG, embeddings, chunks in plain English
- Create detailed troubleshooting guide covering installation, search issues, performance
- Provide preset configs (beginner/fast/quality) with extensive helpful comments
- Enhanced error messages with specific solutions and next steps

🔧 PRODUCTION RELIABILITY:
- Add thread-safe caching with automatic cleanup in QueryExpander
- Implement chunked processing for large batches to prevent memory issues
- Enhanced concurrent embedding with intelligent batch size management
- Memory leak prevention with LRU cache approximation

🏗️ ARCHITECTURE COMPLETENESS:
- Maintain two-mode system (synthesis fast, exploration thinking + memory)
- Preserve educational value while removing intimidation barriers
- Complete testing coverage for mode separation and context memory
- Full documentation reflecting clean two-mode architecture

Perfect balance: genuinely beginner-friendly without compromising technical sophistication
This commit is contained in:
BobAi 2025-08-12 18:59:24 +10:00
parent 2c5eef8596
commit 3363171820
9 changed files with 1176 additions and 55 deletions

View File

@ -350,6 +350,10 @@ class OllamaEmbedder:
if len(file_contents) <= 2:
return self._batch_embed_sequential(file_contents)
# For very large batches, use chunked processing to prevent memory issues
if len(file_contents) > 500: # Process in chunks to manage memory
return self._batch_embed_chunked(file_contents, max_workers)
return self._batch_embed_concurrent(file_contents, max_workers)
def _batch_embed_sequential(self, file_contents: List[dict]) -> List[dict]:
@ -396,6 +400,35 @@ class OllamaEmbedder:
indexed_results.sort(key=lambda x: x[0])
return [result for _, result in indexed_results]
def _batch_embed_chunked(self, file_contents: List[dict], max_workers: int, chunk_size: int = 200) -> List[dict]:
"""
Process very large batches in smaller chunks to prevent memory issues.
This is important for beginners who might try to index huge projects.
"""
results = []
total_chunks = len(file_contents)
# Process in chunks
for i in range(0, len(file_contents), chunk_size):
chunk = file_contents[i:i + chunk_size]
# Log progress for large operations
if total_chunks > chunk_size:
chunk_num = i // chunk_size + 1
total_chunk_count = (total_chunks + chunk_size - 1) // chunk_size
logger.info(f"Processing chunk {chunk_num}/{total_chunk_count} ({len(chunk)} files)")
# Process this chunk using concurrent method
chunk_results = self._batch_embed_concurrent(chunk, max_workers)
results.extend(chunk_results)
# Brief pause between chunks to prevent overwhelming the system
if i + chunk_size < len(file_contents):
import time
time.sleep(0.1) # 100ms pause between chunks
return results
def get_embedding_dim(self) -> int:
"""Return the dimension of embeddings produced by this model."""
return self.embedding_dim

View File

@ -32,6 +32,7 @@ disable in CLI for maximum speed.
import logging
import re
import threading
from typing import List, Optional
import requests
from .config import RAGConfig
@ -51,6 +52,7 @@ class QueryExpander:
# Cache for expanded queries to avoid repeated API calls
self._cache = {}
self._cache_lock = threading.RLock() # Thread-safe cache access
def _ensure_initialized(self):
"""Lazy initialization with LLM warmup."""
@ -84,9 +86,10 @@ class QueryExpander:
self._ensure_initialized()
# Check cache first
if query in self._cache:
return self._cache[query]
# Check cache first (thread-safe)
with self._cache_lock:
if query in self._cache:
return self._cache[query]
# Don't expand very short queries or obvious keywords
if len(query.split()) <= 1 or len(query) <= 3:
@ -95,8 +98,12 @@ class QueryExpander:
try:
expanded = self._llm_expand_query(query)
if expanded and expanded != query:
# Cache the result
self._cache[query] = expanded
# Cache the result (thread-safe)
with self._cache_lock:
self._cache[query] = expanded
# Prevent cache from growing too large
if len(self._cache) % 100 == 0: # Check every 100 entries
self._manage_cache_size()
logger.info(f"Expanded query: '{query}''{expanded}'")
return expanded
@ -227,8 +234,19 @@ Expanded query:"""
return clean_response
def clear_cache(self):
"""Clear the expansion cache."""
self._cache.clear()
"""Clear the expansion cache (thread-safe)."""
with self._cache_lock:
self._cache.clear()
def _manage_cache_size(self, max_size: int = 1000):
"""Keep cache from growing too large (prevents memory leaks)."""
with self._cache_lock:
if len(self._cache) > max_size:
# Remove oldest half of cache entries (simple LRU approximation)
items = list(self._cache.items())
keep_count = max_size // 2
self._cache = dict(items[-keep_count:])
logger.debug(f"Cache trimmed from {len(items)} to {len(self._cache)} entries")
def is_available(self) -> bool:
"""Check if query expansion is available."""

202
docs/BEGINNER_GLOSSARY.md Normal file
View File

@ -0,0 +1,202 @@
# 📚 Beginner's Glossary - RAG Terms Made Simple
*Confused by all the technical terms? Don't worry! This guide explains everything in plain English.*
---
## 🤖 **RAG** - Retrieval Augmented Generation
**What it is:** A fancy way of saying "search your code and get AI explanations"
**Simple explanation:** Instead of just searching for keywords (like Google), RAG finds code that's *similar in meaning* to what you're looking for, then has an AI explain it to you.
**Real example:**
- You search for "user authentication"
- RAG finds code about login systems, password validation, and user sessions
- AI explains: "This code handles user logins using email/password, stores sessions in cookies, and validates users on each request"
---
## 🧩 **Chunks** - Bite-sized pieces of your code
**What it is:** Your code files broken into smaller, searchable pieces
**Simple explanation:** RAG can't search entire huge files efficiently, so it breaks them into "chunks" - like cutting a pizza into slices. Each chunk is usually one function, one class, or a few related lines.
**Why it matters:**
- Too small chunks = missing context ("this variable" but what variable?)
- Too big chunks = too much unrelated stuff in search results
- Just right = perfect context for understanding what code does
**Real example:**
```python
# This would be one chunk:
def login_user(email, password):
"""Authenticate user with email and password."""
user = find_user_by_email(email)
if user and check_password(user, password):
create_session(user)
return True
return False
```
---
## 🧠 **Embeddings** - Code "fingerprints"
**What it is:** A way to convert your code into numbers that computers can compare
**Simple explanation:** Think of embeddings like DNA fingerprints for your code. Similar code gets similar fingerprints. The computer can then find code with similar "fingerprints" to what you're searching for.
**The magic:** Code that does similar things gets similar embeddings, even if the exact words are different:
- `login_user()` and `authenticate()` would have similar embeddings
- `calculate_tax()` and `login_user()` would have very different embeddings
**You don't need to understand the technical details** - just know that embeddings help find semantically similar code, not just exact word matches.
---
## 🔍 **Vector Search** vs **Keyword Search**
**Keyword search (like Google):** Finds exact word matches
- Search "login" → finds code with the word "login"
- Misses: authentication, signin, user_auth
**Vector search (the RAG way):** Finds similar *meaning*
- Search "login" → finds login, authentication, signin, user validation
- Uses those embedding "fingerprints" to find similar concepts
**FSS-Mini-RAG uses both** for the best results!
---
## 📊 **Similarity Score** - How relevant is this result?
**What it is:** A number from 0.0 to 1.0 showing how closely your search matches the result
**Simple explanation:**
- 1.0 = Perfect match (very rare)
- 0.8+ = Excellent match
- 0.5+ = Good match
- 0.3+ = Somewhat relevant
- 0.1+ = Might be useful
- Below 0.1 = Probably not what you want
**In practice:** Most useful results are between 0.2-0.8
---
## 🎯 **BM25** - The keyword search boost
**What it is:** A fancy algorithm that finds exact word matches (like Google search)
**Simple explanation:** While embeddings find *similar meaning*, BM25 finds *exact words*. Using both together gives you the best of both worlds.
**Example:**
- You search for "password validation"
- Embeddings find: authentication functions, login methods, user security
- BM25 finds: code with the exact words "password" and "validation"
- Combined = comprehensive results
**Keep it enabled** unless you're getting too many irrelevant results.
---
## 🔄 **Query Expansion** - Making your search smarter
**What it is:** Automatically adding related terms to your search
**Simple explanation:** When you search for "auth", the system automatically expands it to "auth authentication login signin user validate".
**Pros:** Much better, more comprehensive results
**Cons:** Slower search, sometimes too broad
**When to use:**
- Turn ON for: Complex searches, learning new codebases
- Turn OFF for: Quick lookups, very specific searches
---
## 🤖 **LLM** - Large Language Model (The AI Brain)
**What it is:** The AI that reads your search results and explains them in plain English
**Simple explanation:** After finding relevant code chunks, the LLM reads them like a human would and gives you a summary like: "This code handles user registration by validating email format, checking for existing users, hashing passwords, and saving to database."
**Models you might see:**
- **qwen3:0.6b** - Ultra-fast, good for most questions
- **llama3.2** - Slower but more detailed
- **auto** - Picks the best available model
---
## 🧮 **Synthesis** vs **Exploration** - Two ways to get answers
### 🚀 **Synthesis Mode** (Fast & Consistent)
**What it does:** Quick, factual answers about your code
**Best for:** "What does this function do?" "Where is authentication handled?" "How does the database connection work?"
**Speed:** Very fast (no "thinking" overhead)
### 🧠 **Exploration Mode** (Deep & Interactive)
**What it does:** Detailed analysis with reasoning, remembers conversation
**Best for:** "Why is this function slow?" "What are the security issues here?" "How would I add a new feature?"
**Features:** Shows its reasoning process, you can ask follow-up questions
---
## ⚡ **Streaming** - Handling huge files without crashing
**What it is:** Processing large files in smaller batches instead of all at once
**Simple explanation:** Imagine trying to eat an entire cake at once vs. eating it slice by slice. Streaming is like eating slice by slice - your computer won't choke on huge files.
**When it kicks in:** Files larger than 1MB (that's about 25,000 lines of code)
---
## 🏷️ **Semantic** vs **Fixed** Chunking
**Semantic chunking (RECOMMENDED):** Smart splitting that respects code structure
- Keeps functions together
- Keeps classes together
- Respects natural code boundaries
**Fixed chunking:** Simple splitting that just cuts at size limits
- Faster processing
- Might cut functions in half
- Less intelligent but more predictable
**For beginners:** Always use semantic chunking unless you have a specific reason not to.
---
## ❓ **Common Questions**
**Q: Do I need to understand embeddings to use this?**
A: Nope! Just know they help find similar code. The system handles all the technical details.
**Q: What's a good similarity threshold for beginners?**
A: Start with 0.1. If you get too many results, try 0.2. If you get too few, try 0.05.
**Q: Should I enable query expansion?**
A: For learning new codebases: YES. For quick specific searches: NO. The TUI enables it automatically when helpful.
**Q: Which embedding method should I choose?**
A: Use "auto" - it tries the best option and falls back gracefully if needed.
**Q: What if I don't have Ollama installed?**
A: No problem! The system will automatically fall back to other methods that work without any additional software.
---
## 🚀 **Quick Start Recommendations**
**For absolute beginners:**
1. Keep all default settings
2. Use the TUI interface to start
3. Try simple searches like "user login" or "database connection"
4. Gradually try the CLI commands as you get comfortable
**For faster results:**
- Set `similarity_threshold: 0.2`
- Set `expand_queries: false`
- Use synthesis mode instead of exploration
**For learning new codebases:**
- Set `expand_queries: true`
- Use exploration mode
- Ask "why" and "how" questions
---
**Remember:** This is a learning tool! Don't be afraid to experiment with settings and see what works best for your projects. The beauty of FSS-Mini-RAG is that it's designed to be beginner-friendly while still being powerful.

460
docs/TROUBLESHOOTING.md Normal file
View File

@ -0,0 +1,460 @@
# 🛠️ Troubleshooting Guide - Common Issues & Solutions
*Having problems? You're not alone! Here are solutions to the most common issues beginners encounter.*
---
## 🚀 Installation & Setup Issues
### ❌ "Command not found: ollama"
**Problem:** The system can't find Ollama
**Solution:**
```bash
# Install Ollama
curl -fsSL https://ollama.ai/install.sh | sh
# Or on Mac: brew install ollama
# Start Ollama
ollama serve
```
**Alternative:** Use the system without Ollama - it will automatically fall back to other embedding methods.
### ❌ "Permission denied" when running scripts
**Problem:** Script files aren't executable
**Solution:**
```bash
chmod +x rag-mini.py rag-tui.py install_mini_rag.sh
# Or run with python directly:
python3 rag-mini.py --help
```
### ❌ "Module not found" or import errors
**Problem:** Python dependencies not installed
**Solution:**
```bash
# Install dependencies
pip3 install -r requirements.txt
# If that fails, try:
pip3 install --user -r requirements.txt
```
### ❌ Installation script fails
**Problem:** `./install_mini_rag.sh` doesn't work
**Solution:**
```bash
# Make it executable first
chmod +x install_mini_rag.sh
# Then run
./install_mini_rag.sh
# Or install manually:
pip3 install -r requirements.txt
python3 -c "import claude_rag; print('✅ Installation successful')"
```
---
## 🔍 Search & Results Issues
### ❌ "No results found" for everything
**Problem:** Search isn't finding anything
**Diagnosis & Solutions:**
1. **Check if project is indexed:**
```bash
./rag-mini status /path/to/project
# If not indexed:
./rag-mini index /path/to/project
```
2. **Lower similarity threshold:**
- Edit config file, change `similarity_threshold: 0.05`
- Or try: `./rag-mini search /path/to/project "query" --threshold 0.05`
3. **Try broader search terms:**
- Instead of: "getUserById"
- Try: "user function" or "get user"
4. **Enable query expansion:**
- Edit config: `expand_queries: true`
- Or use TUI which enables it automatically
### ❌ Search results are irrelevant/weird
**Problem:** Getting results that don't match your search
**Solutions:**
1. **Increase similarity threshold:**
```yaml
search:
similarity_threshold: 0.3 # Higher = more picky
```
2. **Use more specific terms:**
- Instead of: "function"
- Try: "login function" or "authentication method"
3. **Check BM25 setting:**
```yaml
search:
enable_bm25: true # Helps find exact word matches
```
### ❌ Search is too slow
**Problem:** Takes too long to get results
**Solutions:**
1. **Disable query expansion:**
```yaml
search:
expand_queries: false
```
2. **Reduce result limit:**
```yaml
search:
default_limit: 5 # Instead of 10
```
3. **Use faster embedding method:**
```yaml
embedding:
preferred_method: hash # Fastest but lower quality
```
4. **Smaller batch size:**
```yaml
embedding:
batch_size: 16 # Instead of 32
```
---
## 🤖 AI/LLM Issues
### ❌ "LLM synthesis unavailable"
**Problem:** AI explanations aren't working
**Solutions:**
1. **Check Ollama is running:**
```bash
# In one terminal:
ollama serve
# In another:
ollama list # Should show installed models
```
2. **Install a model:**
```bash
ollama pull qwen3:0.6b # Fast, small model
# Or: ollama pull llama3.2 # Larger but better
```
3. **Test connection:**
```bash
curl http://localhost:11434/api/tags
# Should return JSON with model list
```
### ❌ AI gives weird/wrong answers
**Problem:** LLM responses don't make sense
**Solutions:**
1. **Lower temperature:**
```yaml
llm:
synthesis_temperature: 0.1 # More factual, less creative
```
2. **Try different model:**
```bash
ollama pull qwen3:1.7b # Good balance of speed/quality
```
3. **Use synthesis mode instead of exploration:**
```bash
./rag-mini search /path "query" --synthesize
# Instead of: ./rag-mini explore /path
```
---
## 💾 Memory & Performance Issues
### ❌ "Out of memory" or computer freezes during indexing
**Problem:** System runs out of RAM
**Solutions:**
1. **Reduce batch size:**
```yaml
embedding:
batch_size: 8 # Much smaller batches
```
2. **Lower streaming threshold:**
```yaml
streaming:
threshold_bytes: 512000 # 512KB instead of 1MB
```
3. **Index smaller projects first:**
```bash
# Exclude large directories
./rag-mini index /path/to/project --exclude "node_modules/**,dist/**"
```
4. **Use hash embeddings:**
```yaml
embedding:
preferred_method: hash # Much less memory
```
### ❌ Indexing is extremely slow
**Problem:** Taking forever to index project
**Solutions:**
1. **Exclude unnecessary files:**
```yaml
files:
exclude_patterns:
- "node_modules/**"
- ".git/**"
- "*.log"
- "build/**"
- "*.min.js" # Minified files
```
2. **Increase minimum file size:**
```yaml
files:
min_file_size: 200 # Skip tiny files
```
3. **Use simpler chunking:**
```yaml
chunking:
strategy: fixed # Faster than semantic
```
4. **More workers (if you have good CPU):**
```bash
./rag-mini index /path/to/project --workers 8
```
---
## ⚙️ Configuration Issues
### ❌ "Invalid configuration" errors
**Problem:** Config file has errors
**Solutions:**
1. **Check YAML syntax:**
```bash
python3 -c "import yaml; yaml.safe_load(open('config.yaml'))"
```
2. **Copy from working example:**
```bash
cp examples/config.yaml .claude-rag/config.yaml
```
3. **Reset to defaults:**
```bash
rm .claude-rag/config.yaml
# System will recreate with defaults
```
### ❌ Changes to config aren't taking effect
**Problem:** Modified settings don't work
**Solutions:**
1. **Restart TUI/CLI:**
- Configuration is loaded at startup
- Exit and restart the interface
2. **Check config location:**
```bash
# Project-specific config:
/path/to/project/.claude-rag/config.yaml
# Global config:
~/.claude-rag/config.yaml
```
3. **Force re-index after config changes:**
```bash
./rag-mini index /path/to/project --force
```
---
## 🖥️ Interface Issues
### ❌ TUI looks broken/garbled
**Problem:** Text interface isn't displaying correctly
**Solutions:**
1. **Try different terminal:**
```bash
# Instead of basic terminal, try:
# - iTerm2 (Mac)
# - Windows Terminal (Windows)
# - GNOME Terminal (Linux)
```
2. **Use CLI directly:**
```bash
./rag-mini --help # Skip TUI entirely
```
3. **Check terminal size:**
```bash
# Make terminal window larger (TUI needs space)
# At least 80x24 characters
```
### ❌ "Keyboard interrupt" or TUI crashes
**Problem:** Interface stops responding
**Solutions:**
1. **Use Ctrl+C to exit cleanly:**
- Don't force-quit if possible
2. **Check for conflicting processes:**
```bash
ps aux | grep rag-tui
# Kill any stuck processes
```
3. **Use CLI as fallback:**
```bash
./rag-mini search /path/to/project "your query"
```
---
## 📁 File & Path Issues
### ❌ "Project not found" or "Permission denied"
**Problem:** Can't access project directory
**Solutions:**
1. **Check path exists:**
```bash
ls -la /path/to/project
```
2. **Check permissions:**
```bash
# Make sure you can read the directory
chmod -R +r /path/to/project
```
3. **Use absolute paths:**
```bash
# Instead of: ./rag-mini index ../my-project
# Use: ./rag-mini index /full/path/to/my-project
```
### ❌ "No files found to index"
**Problem:** System doesn't see any files
**Solutions:**
1. **Check include patterns:**
```yaml
files:
include_patterns:
- "**/*.py" # Only Python files
- "**/*.js" # Add JavaScript
- "**/*.md" # Add Markdown
```
2. **Check exclude patterns:**
```yaml
files:
exclude_patterns: [] # Remove all exclusions temporarily
```
3. **Lower minimum file size:**
```yaml
files:
min_file_size: 10 # Instead of 50
```
---
## 🔍 Quick Diagnostic Commands
**Check system status:**
```bash
./rag-mini status /path/to/project
```
**Test embeddings:**
```bash
python3 -c "from claude_rag.ollama_embeddings import OllamaEmbedder; e=OllamaEmbedder(); print(e.get_embedding_info())"
```
**Verify installation:**
```bash
python3 -c "import claude_rag; print('✅ RAG system installed')"
```
**Test Ollama connection:**
```bash
curl -s http://localhost:11434/api/tags | python3 -m json.tool
```
**Check disk space:**
```bash
df -h .claude-rag/ # Make sure you have space for index
```
---
## 🆘 When All Else Fails
1. **Start fresh:**
```bash
rm -rf .claude-rag/
./rag-mini index /path/to/project
```
2. **Use minimal config:**
```yaml
# Simplest possible config:
chunking:
strategy: fixed
embedding:
preferred_method: auto
search:
expand_queries: false
```
3. **Try a tiny test project:**
```bash
mkdir test-project
echo "def hello(): print('world')" > test-project/test.py
./rag-mini index test-project
./rag-mini search test-project "hello function"
```
4. **Get help:**
- Check the main README.md
- Look at examples/ directory
- Try the basic_usage.py example
---
## 💡 Prevention Tips
**For beginners:**
- Start with default settings
- Use the TUI interface first
- Test with small projects initially
- Keep Ollama running in background
**For better results:**
- Be specific in search queries
- Use the glossary to understand terms
- Experiment with config settings on test projects first
- Use synthesis mode for quick answers, exploration for learning
**Remember:** This is a learning tool! Don't be afraid to experiment and try different settings. The worst thing that can happen is you delete the `.claude-rag` directory and start over. 🚀

View File

@ -0,0 +1,72 @@
# 🚀 BEGINNER CONFIG - Simple & Reliable
# Perfect for newcomers who want everything to "just work"
# Copy this to your project: cp examples/config-beginner.yaml /path/to/project/.claude-rag/config.yaml
#═══════════════════════════════════════════════════════════════════════
# ✨ BEGINNER-FRIENDLY SETTINGS - No overwhelming options!
#═══════════════════════════════════════════════════════════════════════
# 📝 How to split your code files (keep it simple)
chunking:
max_size: 2000 # Good size for most code (about 50 lines)
min_size: 150 # Skip tiny fragments
strategy: semantic # Smart splitting (respects functions/classes)
# 🌊 Handle large files without crashing
streaming:
enabled: true # Always keep this on
threshold_bytes: 1048576 # 1MB - good for most computers
# 📁 Which files to include
files:
min_file_size: 50 # Skip empty/tiny files
# 🚫 Skip these folders (saves time and storage)
exclude_patterns:
- "node_modules/**" # JavaScript packages
- ".git/**" # Git history
- "__pycache__/**" # Python cache
- "*.pyc" # Python bytecode
- ".venv/**" # Python virtual environments
- "build/**" # Build artifacts
- "dist/**" # Distribution files
include_patterns:
- "**/*" # Everything else
# 🧠 Embeddings (the "AI fingerprints" of your code)
embedding:
preferred_method: auto # Try best method, fall back if needed - SAFEST
batch_size: 32 # Good balance of speed and memory usage
# 🔍 Search behavior
search:
default_limit: 10 # Show 10 results (good starting point)
enable_bm25: true # Find exact word matches too
similarity_threshold: 0.1 # Pretty permissive (shows more results)
expand_queries: false # Keep it simple for now
# 🤖 AI explanations (optional but helpful)
llm:
synthesis_model: auto # Pick best available model
enable_synthesis: false # Turn on manually with --synthesize
synthesis_temperature: 0.3 # Factual answers
cpu_optimized: true # Good for computers without fancy graphics cards
enable_thinking: true # Shows reasoning (great for learning!)
max_expansion_terms: 6 # Keep expansions focused
#═══════════════════════════════════════════════════════════════════════
# 🎯 WHAT THIS CONFIG DOES:
#
# ✅ Works reliably across different systems
# ✅ Good performance on modest hardware
# ✅ Balanced search results (not too few, not too many)
# ✅ Safe defaults that won't crash your computer
# ✅ AI features available but not overwhelming
#
# 🚀 TO GET STARTED:
# 1. Copy this file to your project: .claude-rag/config.yaml
# 2. Index your project: ./rag-mini index /path/to/project
# 3. Search: ./rag-mini search /path/to/project "your query"
# 4. Try AI: ./rag-mini search /path/to/project "your query" --synthesize
#═══════════════════════════════════════════════════════════════════════

105
examples/config-fast.yaml Normal file
View File

@ -0,0 +1,105 @@
# ⚡ FAST CONFIG - Maximum Speed
# When you need quick results and don't mind slightly lower quality
# Perfect for: large projects, frequent searches, older computers
#═══════════════════════════════════════════════════════════════════════
# 🚀 SPEED-OPTIMIZED SETTINGS - Everything tuned for performance!
#═══════════════════════════════════════════════════════════════════════
# 📝 Chunking optimized for speed
chunking:
max_size: 1500 # Smaller chunks = faster processing
min_size: 100 # More aggressive minimum
strategy: fixed # Simple splitting (faster than semantic)
# 🌊 More aggressive streaming for memory efficiency
streaming:
enabled: true
threshold_bytes: 512000 # 512KB - process big files in smaller chunks
# 📁 File filtering optimized for speed
files:
min_file_size: 100 # Skip more tiny files
# 🚫 Aggressive exclusions for speed
exclude_patterns:
- "node_modules/**"
- ".git/**"
- "__pycache__/**"
- "*.pyc"
- ".venv/**"
- "venv/**"
- "build/**"
- "dist/**"
- "*.min.js" # Skip minified files
- "*.min.css" # Skip minified CSS
- "*.log" # Skip log files
- "*.tmp" # Skip temp files
- "target/**" # Rust/Java build dirs
- ".next/**" # Next.js build dir
- ".nuxt/**" # Nuxt build dir
include_patterns:
- "**/*.py" # Focus on common code files only
- "**/*.js"
- "**/*.ts"
- "**/*.jsx"
- "**/*.tsx"
- "**/*.java"
- "**/*.cpp"
- "**/*.c"
- "**/*.h"
- "**/*.rs"
- "**/*.go"
- "**/*.php"
- "**/*.rb"
- "**/*.md"
# 🧠 Fastest embedding method
embedding:
preferred_method: hash # Instant embeddings (lower quality but very fast)
batch_size: 64 # Larger batches for efficiency
# 🔍 Search optimized for speed
search:
default_limit: 5 # Fewer results = faster display
enable_bm25: false # Skip keyword matching for speed
similarity_threshold: 0.2 # Higher threshold = fewer results to process
expand_queries: false # No query expansion (much faster)
# 🤖 Minimal AI for speed
llm:
synthesis_model: qwen3:0.6b # Smallest/fastest model
enable_synthesis: false # Only use when explicitly requested
synthesis_temperature: 0.1 # Fast, factual responses
cpu_optimized: true # Use lightweight models
enable_thinking: false # Skip thinking process for speed
max_expansion_terms: 4 # Shorter expansions
#═══════════════════════════════════════════════════════════════════════
# ⚡ WHAT THIS CONFIG PRIORITIZES:
#
# 🚀 Indexing speed - get up and running quickly
# 🚀 Search speed - results in milliseconds
# 🚀 Memory efficiency - won't slow down your computer
# 🚀 CPU efficiency - good for older/slower machines
# 🚀 Storage efficiency - smaller index files
#
# ⚖️ TRADE-OFFS:
# ⚠️ Lower search quality (might miss some relevant results)
# ⚠️ Less context in results (smaller chunks)
# ⚠️ No query expansion (might need more specific search terms)
# ⚠️ Basic embeddings (hash-based, not semantic)
#
# 🎯 PERFECT FOR:
# • Large codebases (>10k files)
# • Older computers with limited resources
# • When you know exactly what you're looking for
# • Frequent, quick lookups
# • CI/CD environments where speed matters
#
# 🚀 TO USE THIS CONFIG:
# 1. Copy to project: cp examples/config-fast.yaml .claude-rag/config.yaml
# 2. Index: ./rag-mini index /path/to/project
# 3. Enjoy lightning-fast searches! ⚡
#═══════════════════════════════════════════════════════════════════════

View File

@ -0,0 +1,111 @@
# 💎 QUALITY CONFIG - Best Possible Results
# When you want the highest quality search and AI responses
# Perfect for: learning new codebases, research, complex analysis
#═══════════════════════════════════════════════════════════════════════
# 🎯 QUALITY-OPTIMIZED SETTINGS - Everything tuned for best results!
#═══════════════════════════════════════════════════════════════════════
# 📝 Chunking for maximum context and quality
chunking:
max_size: 3000 # Larger chunks = more context per result
min_size: 200 # Ensure substantial content per chunk
strategy: semantic # Smart splitting that respects code structure
# 🌊 Conservative streaming (favor quality over speed)
streaming:
enabled: true
threshold_bytes: 2097152 # 2MB - less aggressive chunking
# 📁 Comprehensive file inclusion
files:
min_file_size: 20 # Include even small files (might contain important info)
# 🎯 Minimal exclusions (include more content)
exclude_patterns:
- "node_modules/**" # Still skip these (too much noise)
- ".git/**" # Git history not useful for code search
- "__pycache__/**" # Python bytecode
- "*.pyc"
- ".venv/**"
- "build/**" # Compiled artifacts
- "dist/**"
# Note: We keep logs, docs, configs that might have useful context
include_patterns:
- "**/*" # Include everything not explicitly excluded
# 🧠 Best embedding quality
embedding:
preferred_method: ollama # Highest quality embeddings (needs Ollama)
ollama_model: nomic-embed-text # Excellent code understanding
ml_model: sentence-transformers/all-MiniLM-L6-v2 # Good fallback
batch_size: 16 # Smaller batches for stability
# 🔍 Search optimized for comprehensive results
search:
default_limit: 15 # More results to choose from
enable_bm25: true # Use both semantic and keyword matching
similarity_threshold: 0.05 # Very permissive (show more possibilities)
expand_queries: true # Automatic query expansion for better recall
# 🤖 High-quality AI analysis
llm:
synthesis_model: auto # Use best available model
enable_synthesis: true # AI explanations by default
synthesis_temperature: 0.4 # Good balance of accuracy and insight
cpu_optimized: false # Use powerful models if available
enable_thinking: true # Show detailed reasoning process
max_expansion_terms: 10 # Comprehensive query expansion
#═══════════════════════════════════════════════════════════════════════
# 💎 WHAT THIS CONFIG MAXIMIZES:
#
# 🎯 Search comprehensiveness - find everything relevant
# 🎯 Result context - larger chunks with more information
# 🎯 AI explanation quality - detailed, thoughtful analysis
# 🎯 Query understanding - automatic expansion and enhancement
# 🎯 Semantic accuracy - best embedding models available
#
# ⚖️ TRADE-OFFS:
# ⏳ Slower indexing (larger chunks, better embeddings)
# ⏳ Slower searching (query expansion, more results)
# 💾 More storage space (larger index, more files included)
# 🧠 More memory usage (larger batches, bigger models)
# ⚡ Higher CPU/GPU usage (better models)
#
# 🎯 PERFECT FOR:
# • Learning new, complex codebases
# • Research and analysis tasks
# • When you need to understand WHY code works a certain way
# • Finding subtle connections and patterns
# • Code review and security analysis
# • Academic or professional research
#
# 💻 REQUIREMENTS:
# • Ollama installed and running (ollama serve)
# • At least one language model (ollama pull qwen3:1.7b)
# • Decent computer specs (4GB+ RAM recommended)
# • Patience for thorough analysis 😊
#
# 🚀 TO USE THIS CONFIG:
# 1. Install Ollama: curl -fsSL https://ollama.ai/install.sh | sh
# 2. Start Ollama: ollama serve
# 3. Install a model: ollama pull qwen3:1.7b
# 4. Copy config: cp examples/config-quality.yaml .claude-rag/config.yaml
# 5. Index project: ./rag-mini index /path/to/project
# 6. Enjoy comprehensive analysis: ./rag-mini explore /path/to/project
#═══════════════════════════════════════════════════════════════════════
# 🧪 ADVANCED QUALITY TUNING (optional):
#
# For even better results, try these model combinations:
# • ollama pull nomic-embed-text:latest (best embeddings)
# • ollama pull qwen3:1.7b (good general model)
# • ollama pull llama3.2 (excellent for analysis)
#
# Or adjust these settings for your specific needs:
# • similarity_threshold: 0.3 (more selective results)
# • max_size: 4000 (even more context per result)
# • enable_thinking: false (hide reasoning, show just answers)
# • synthesis_temperature: 0.2 (more conservative AI responses)

View File

@ -1,55 +1,145 @@
# FSS-Mini-RAG Configuration
# Edit this file to customize indexing and search behavior
# See docs/GETTING_STARTED.md for detailed explanations
# FSS-Mini-RAG Configuration - Beginner-Friendly Edition
#
# 🎯 QUICK START PRESETS:
# - Keep defaults for most cases (recommended for beginners)
# - For large projects (>10k files): increase max_size to 3000
# - For faster search: set similarity_threshold to 0.2
# - For better results: enable expand_queries (but slower search)
#═════════════════════════════════════════════════════════════════════════════════
# 📝 CHUNKING: How we break up your code files for searching
#═════════════════════════════════════════════════════════════════════════════════
# Think of chunks as "bite-sized pieces" of your code that the system can search through.
# Smaller chunks = more precise results but might miss context
# Larger chunks = more context but might be less precise
# Text chunking settings
chunking:
max_size: 2000 # Maximum characters per chunk
min_size: 150 # Minimum characters per chunk
strategy: semantic # 'semantic' (language-aware) or 'fixed'
max_size: 2000 # Maximum characters per chunk (2000 = ~50 lines of code)
# 💡 ADJUST IF: Getting results that are too narrow/broad
# Small projects: 1500 | Large projects: 3000 | Detailed analysis: 4000
min_size: 150 # Minimum characters per chunk (150 = ~4-5 lines)
# ⚠️ Don't go below 100 or you'll get fragments
strategy: semantic # How to split files into chunks
# 'semantic': Smart splitting (respects functions, classes) - RECOMMENDED
# 'fixed': Simple splitting (just cuts at size limits) - faster but less intelligent
#═════════════════════════════════════════════════════════════════════════════════
# 🌊 STREAMING: How we handle really big files
#═════════════════════════════════════════════════════════════════════════════════
# Large files (like minified CSS or huge data files) get processed in smaller batches
# to prevent your computer from running out of memory
# Large file streaming settings
streaming:
enabled: true
threshold_bytes: 1048576 # Files larger than this use streaming (1MB)
enabled: true # Always keep this true - prevents memory crashes
threshold_bytes: 1048576 # Files larger than 1MB use streaming (1MB = 1048576 bytes)
# 💡 ADJUST IF: Low memory computer = 512000 | High memory = 2097152
#═════════════════════════════════════════════════════════════════════════════════
# 📁 FILES: Which files to include/exclude from indexing
#═════════════════════════════════════════════════════════════════════════════════
# File processing settings
files:
min_file_size: 50 # Skip files smaller than this
min_file_size: 50 # Skip tiny files (50 bytes = ~1 line of code)
# 💡 REASON: Tiny files usually aren't useful for searching
# 🚫 EXCLUDE PATTERNS: Files/folders we always skip (saves time and space)
exclude_patterns:
- "node_modules/**"
- ".git/**"
- "__pycache__/**"
- "*.pyc"
- ".venv/**"
- "venv/**"
- "build/**"
- "dist/**"
- "node_modules/**" # JavaScript dependencies (huge and not your code)
- ".git/**" # Git history (not useful for code search)
- "__pycache__/**" # Python bytecode (generated files)
- "*.pyc" # More Python bytecode
- ".venv/**" # Python virtual environments
- "venv/**" # More virtual environments
- "build/**" # Compiled output (not source code)
- "dist/**" # Distribution files
# 💡 ADD YOUR OWN: Add patterns like "logs/**" or "*.tmp"
include_patterns:
- "**/*" # Include all files by default
- "**/*" # Include everything else by default
# 💡 CUSTOMIZE: Could be ["**/*.py", "**/*.js"] for only Python/JS
#═════════════════════════════════════════════════════════════════════════════════
# 🧠 EMBEDDINGS: How we turn your code into searchable "vectors"
#═════════════════════════════════════════════════════════════════════════════════
# Embeddings are like "fingerprints" of your code that help find similar content
# Don't worry about the technical details - the defaults work great!
# Embedding generation settings
embedding:
preferred_method: ollama # 'ollama', 'ml', 'hash', or 'auto'
ollama_model: nomic-embed-text
ollama_host: localhost:11434
ml_model: sentence-transformers/all-MiniLM-L6-v2
batch_size: 32 # Embeddings processed per batch
preferred_method: ollama # Which system to use for creating embeddings
# 'ollama': Best quality (needs Ollama installed) - RECOMMENDED
# 'ml': Good quality (downloads models automatically)
# 'hash': Basic quality (works without internet)
# 'auto': Try ollama, fall back to ml, then hash - SAFEST CHOICE
ollama_model: nomic-embed-text # Which Ollama model to use (this one is excellent)
ollama_host: localhost:11434 # Where to find Ollama (don't change unless you know why)
ml_model: sentence-transformers/all-MiniLM-L6-v2 # Backup model (small and fast)
batch_size: 32 # How many chunks to process at once
# 💡 ADJUST IF: Slow computer = 16 | Fast computer = 64
#═════════════════════════════════════════════════════════════════════════════════
# 🔍 SEARCH: How the system finds and ranks results
#═════════════════════════════════════════════════════════════════════════════════
# Search behavior settings
search:
default_limit: 10 # Default number of results
enable_bm25: true # Enable keyword matching boost
similarity_threshold: 0.1 # Minimum similarity score
expand_queries: false # Enable automatic query expansion (TUI auto-enables)
default_limit: 10 # How many search results to show by default
# 💡 MORE RESULTS: 15-20 | FASTER SEARCH: 5-8
enable_bm25: true # Also use keyword matching (like Google search)
# 💡 EFFECT: Finds exact word matches even if semantically different
# Keep true unless getting too many irrelevant results
similarity_threshold: 0.1 # Minimum "similarity score" to show results (0.0-1.0)
# 💡 HIGHER = fewer but more relevant results
# Picky: 0.3 | Balanced: 0.1 | Show everything: 0.05
expand_queries: false # Automatically add related search terms
# 💡 EFFECT: "auth" becomes "auth authentication login user"
# Better results but slower - TUI enables this automatically
#═════════════════════════════════════════════════════════════════════════════════
# 🤖 LLM: Settings for the AI that explains and synthesizes results
#═════════════════════════════════════════════════════════════════════════════════
# The LLM (Large Language Model) reads your search results and explains them in plain English
# LLM synthesis and query expansion settings
llm:
ollama_host: localhost:11434
synthesis_model: auto # 'auto' prefers qwen3:0.6b for CPU efficiency
expansion_model: auto # Usually same as synthesis_model
max_expansion_terms: 8 # Maximum terms to add to queries
enable_synthesis: false # Enable synthesis by default
synthesis_temperature: 0.3 # LLM temperature for analysis
cpu_optimized: true # Prefer ultra-lightweight models for CPU-only systems
enable_thinking: true # Enable thinking mode for Qwen3 models (production: true, testing: false)
ollama_host: localhost:11434 # Where to find Ollama (don't change unless you know why)
synthesis_model: auto # Which AI model to use for explanations
# 'auto': Picks best available model - RECOMMENDED
# 'qwen3:0.6b': Ultra-fast, good for CPU-only computers
# 'llama3.2': Slower but more detailed explanations
expansion_model: auto # Model for query expansion (usually same as synthesis)
max_expansion_terms: 8 # How many extra terms to add to expanded queries
# 💡 MORE TERMS = broader search but potentially less focused
enable_synthesis: false # Turn on AI explanations by default
# 💡 SET TO TRUE: If you want every search to include explanations
# (You can always use --synthesize flag when you want it)
synthesis_temperature: 0.3 # How "creative" the AI explanations are (0.0-1.0)
# 💡 Lower = more factual | Higher = more creative
# Code analysis: 0.1-0.3 | Creative writing: 0.7-0.9
cpu_optimized: true # Prefer lightweight models for computers without graphics cards
# 💡 DISABLE IF: You have a powerful GPU and want highest quality
enable_thinking: true # Let AI "think out loud" for complex questions
# 💡 EFFECT: Shows reasoning process, better for learning/debugging
#═════════════════════════════════════════════════════════════════════════════════
# 🎯 QUICK TROUBLESHOOTING:
#
# Search returns nothing? → Lower similarity_threshold to 0.05
# Search too slow? → Set expand_queries: false and batch_size: 16
# Results not detailed enough? → Increase max_size to 3000
# Getting weird fragments? → Check min_size is at least 150
# AI not working? → Make sure Ollama is running: `ollama serve`
# Out of memory errors? → Decrease batch_size to 16 and lower threshold_bytes
#═════════════════════════════════════════════════════════════════════════════════

View File

@ -70,7 +70,16 @@ def index_project(project_path: Path, force: bool = False):
except Exception as e:
print(f"❌ Indexing failed: {e}")
print(f" Use --verbose for details")
print()
print("🔧 Common solutions:")
print(" • Check if path exists and you have read permissions")
print(" • Ensure Python dependencies are installed: pip install -r requirements.txt")
print(" • Try with smaller project first to test setup")
print(" • Check available disk space for index files")
print()
print("📚 For detailed help:")
print(f" ./rag-mini index {project_path} --verbose")
print(" Or see: docs/TROUBLESHOOTING.md")
sys.exit(1)
def search_project(project_path: Path, query: str, limit: int = 10, synthesize: bool = False):
@ -89,10 +98,18 @@ def search_project(project_path: Path, query: str, limit: int = 10, synthesize:
if not results:
print("❌ No results found")
print("\n💡 Try:")
print(" • Broader search terms")
print(" • Check spelling")
print(" • Use concepts: \"authentication\" instead of \"auth_handler\"")
print()
print("🔧 Quick fixes to try:")
print(" • Use broader terms: \"login\" instead of \"authenticate_user_session\"")
print(" • Try concepts: \"database query\" instead of specific function names")
print(" • Check spelling and try simpler words")
print(" • Search for file types: \"python class\" or \"javascript function\"")
print()
print("⚙️ Configuration adjustments:")
print(f" • Lower threshold: ./rag-mini search {project_path} \"{query}\" --threshold 0.05")
print(" • More results: add --limit 20")
print()
print("📚 Need help? See: docs/TROUBLESHOOTING.md")
return
print(f"✅ Found {len(results)} results:")
@ -154,10 +171,23 @@ def search_project(project_path: Path, query: str, limit: int = 10, synthesize:
except Exception as e:
print(f"❌ Search failed: {e}")
print()
if "not indexed" in str(e).lower():
print(f" Run: rag-mini index {project_path}")
print("🔧 Solution:")
print(f" ./rag-mini index {project_path}")
print()
else:
print(" Use --verbose for details")
print("🔧 Common solutions:")
print(" • Check project path exists and is readable")
print(" • Verify index isn't corrupted: delete .claude-rag/ and re-index")
print(" • Try with a different project to test setup")
print(" • Check available memory and disk space")
print()
print("📚 Get detailed error info:")
print(f" ./rag-mini search {project_path} \"{query}\" --verbose")
print(" Or see: docs/TROUBLESHOOTING.md")
print()
sys.exit(1)
def status_check(project_path: Path):