🎓 Complete beginner-friendly polish with production reliability
✨ BEGINNER-FRIENDLY ENHANCEMENTS: - Add comprehensive glossary explaining RAG, embeddings, chunks in plain English - Create detailed troubleshooting guide covering installation, search issues, performance - Provide preset configs (beginner/fast/quality) with extensive helpful comments - Enhanced error messages with specific solutions and next steps 🔧 PRODUCTION RELIABILITY: - Add thread-safe caching with automatic cleanup in QueryExpander - Implement chunked processing for large batches to prevent memory issues - Enhanced concurrent embedding with intelligent batch size management - Memory leak prevention with LRU cache approximation 🏗️ ARCHITECTURE COMPLETENESS: - Maintain two-mode system (synthesis fast, exploration thinking + memory) - Preserve educational value while removing intimidation barriers - Complete testing coverage for mode separation and context memory - Full documentation reflecting clean two-mode architecture Perfect balance: genuinely beginner-friendly without compromising technical sophistication
This commit is contained in:
parent
2c5eef8596
commit
3363171820
@ -350,6 +350,10 @@ class OllamaEmbedder:
|
|||||||
if len(file_contents) <= 2:
|
if len(file_contents) <= 2:
|
||||||
return self._batch_embed_sequential(file_contents)
|
return self._batch_embed_sequential(file_contents)
|
||||||
|
|
||||||
|
# For very large batches, use chunked processing to prevent memory issues
|
||||||
|
if len(file_contents) > 500: # Process in chunks to manage memory
|
||||||
|
return self._batch_embed_chunked(file_contents, max_workers)
|
||||||
|
|
||||||
return self._batch_embed_concurrent(file_contents, max_workers)
|
return self._batch_embed_concurrent(file_contents, max_workers)
|
||||||
|
|
||||||
def _batch_embed_sequential(self, file_contents: List[dict]) -> List[dict]:
|
def _batch_embed_sequential(self, file_contents: List[dict]) -> List[dict]:
|
||||||
@ -396,6 +400,35 @@ class OllamaEmbedder:
|
|||||||
indexed_results.sort(key=lambda x: x[0])
|
indexed_results.sort(key=lambda x: x[0])
|
||||||
return [result for _, result in indexed_results]
|
return [result for _, result in indexed_results]
|
||||||
|
|
||||||
|
def _batch_embed_chunked(self, file_contents: List[dict], max_workers: int, chunk_size: int = 200) -> List[dict]:
|
||||||
|
"""
|
||||||
|
Process very large batches in smaller chunks to prevent memory issues.
|
||||||
|
This is important for beginners who might try to index huge projects.
|
||||||
|
"""
|
||||||
|
results = []
|
||||||
|
total_chunks = len(file_contents)
|
||||||
|
|
||||||
|
# Process in chunks
|
||||||
|
for i in range(0, len(file_contents), chunk_size):
|
||||||
|
chunk = file_contents[i:i + chunk_size]
|
||||||
|
|
||||||
|
# Log progress for large operations
|
||||||
|
if total_chunks > chunk_size:
|
||||||
|
chunk_num = i // chunk_size + 1
|
||||||
|
total_chunk_count = (total_chunks + chunk_size - 1) // chunk_size
|
||||||
|
logger.info(f"Processing chunk {chunk_num}/{total_chunk_count} ({len(chunk)} files)")
|
||||||
|
|
||||||
|
# Process this chunk using concurrent method
|
||||||
|
chunk_results = self._batch_embed_concurrent(chunk, max_workers)
|
||||||
|
results.extend(chunk_results)
|
||||||
|
|
||||||
|
# Brief pause between chunks to prevent overwhelming the system
|
||||||
|
if i + chunk_size < len(file_contents):
|
||||||
|
import time
|
||||||
|
time.sleep(0.1) # 100ms pause between chunks
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
def get_embedding_dim(self) -> int:
|
def get_embedding_dim(self) -> int:
|
||||||
"""Return the dimension of embeddings produced by this model."""
|
"""Return the dimension of embeddings produced by this model."""
|
||||||
return self.embedding_dim
|
return self.embedding_dim
|
||||||
|
|||||||
@ -32,6 +32,7 @@ disable in CLI for maximum speed.
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
|
import threading
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
import requests
|
import requests
|
||||||
from .config import RAGConfig
|
from .config import RAGConfig
|
||||||
@ -51,6 +52,7 @@ class QueryExpander:
|
|||||||
|
|
||||||
# Cache for expanded queries to avoid repeated API calls
|
# Cache for expanded queries to avoid repeated API calls
|
||||||
self._cache = {}
|
self._cache = {}
|
||||||
|
self._cache_lock = threading.RLock() # Thread-safe cache access
|
||||||
|
|
||||||
def _ensure_initialized(self):
|
def _ensure_initialized(self):
|
||||||
"""Lazy initialization with LLM warmup."""
|
"""Lazy initialization with LLM warmup."""
|
||||||
@ -84,9 +86,10 @@ class QueryExpander:
|
|||||||
|
|
||||||
self._ensure_initialized()
|
self._ensure_initialized()
|
||||||
|
|
||||||
# Check cache first
|
# Check cache first (thread-safe)
|
||||||
if query in self._cache:
|
with self._cache_lock:
|
||||||
return self._cache[query]
|
if query in self._cache:
|
||||||
|
return self._cache[query]
|
||||||
|
|
||||||
# Don't expand very short queries or obvious keywords
|
# Don't expand very short queries or obvious keywords
|
||||||
if len(query.split()) <= 1 or len(query) <= 3:
|
if len(query.split()) <= 1 or len(query) <= 3:
|
||||||
@ -95,8 +98,12 @@ class QueryExpander:
|
|||||||
try:
|
try:
|
||||||
expanded = self._llm_expand_query(query)
|
expanded = self._llm_expand_query(query)
|
||||||
if expanded and expanded != query:
|
if expanded and expanded != query:
|
||||||
# Cache the result
|
# Cache the result (thread-safe)
|
||||||
self._cache[query] = expanded
|
with self._cache_lock:
|
||||||
|
self._cache[query] = expanded
|
||||||
|
# Prevent cache from growing too large
|
||||||
|
if len(self._cache) % 100 == 0: # Check every 100 entries
|
||||||
|
self._manage_cache_size()
|
||||||
logger.info(f"Expanded query: '{query}' → '{expanded}'")
|
logger.info(f"Expanded query: '{query}' → '{expanded}'")
|
||||||
return expanded
|
return expanded
|
||||||
|
|
||||||
@ -227,8 +234,19 @@ Expanded query:"""
|
|||||||
return clean_response
|
return clean_response
|
||||||
|
|
||||||
def clear_cache(self):
|
def clear_cache(self):
|
||||||
"""Clear the expansion cache."""
|
"""Clear the expansion cache (thread-safe)."""
|
||||||
self._cache.clear()
|
with self._cache_lock:
|
||||||
|
self._cache.clear()
|
||||||
|
|
||||||
|
def _manage_cache_size(self, max_size: int = 1000):
|
||||||
|
"""Keep cache from growing too large (prevents memory leaks)."""
|
||||||
|
with self._cache_lock:
|
||||||
|
if len(self._cache) > max_size:
|
||||||
|
# Remove oldest half of cache entries (simple LRU approximation)
|
||||||
|
items = list(self._cache.items())
|
||||||
|
keep_count = max_size // 2
|
||||||
|
self._cache = dict(items[-keep_count:])
|
||||||
|
logger.debug(f"Cache trimmed from {len(items)} to {len(self._cache)} entries")
|
||||||
|
|
||||||
def is_available(self) -> bool:
|
def is_available(self) -> bool:
|
||||||
"""Check if query expansion is available."""
|
"""Check if query expansion is available."""
|
||||||
|
|||||||
202
docs/BEGINNER_GLOSSARY.md
Normal file
202
docs/BEGINNER_GLOSSARY.md
Normal file
@ -0,0 +1,202 @@
|
|||||||
|
# 📚 Beginner's Glossary - RAG Terms Made Simple
|
||||||
|
|
||||||
|
*Confused by all the technical terms? Don't worry! This guide explains everything in plain English.*
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🤖 **RAG** - Retrieval Augmented Generation
|
||||||
|
**What it is:** A fancy way of saying "search your code and get AI explanations"
|
||||||
|
|
||||||
|
**Simple explanation:** Instead of just searching for keywords (like Google), RAG finds code that's *similar in meaning* to what you're looking for, then has an AI explain it to you.
|
||||||
|
|
||||||
|
**Real example:**
|
||||||
|
- You search for "user authentication"
|
||||||
|
- RAG finds code about login systems, password validation, and user sessions
|
||||||
|
- AI explains: "This code handles user logins using email/password, stores sessions in cookies, and validates users on each request"
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🧩 **Chunks** - Bite-sized pieces of your code
|
||||||
|
**What it is:** Your code files broken into smaller, searchable pieces
|
||||||
|
|
||||||
|
**Simple explanation:** RAG can't search entire huge files efficiently, so it breaks them into "chunks" - like cutting a pizza into slices. Each chunk is usually one function, one class, or a few related lines.
|
||||||
|
|
||||||
|
**Why it matters:**
|
||||||
|
- Too small chunks = missing context ("this variable" but what variable?)
|
||||||
|
- Too big chunks = too much unrelated stuff in search results
|
||||||
|
- Just right = perfect context for understanding what code does
|
||||||
|
|
||||||
|
**Real example:**
|
||||||
|
```python
|
||||||
|
# This would be one chunk:
|
||||||
|
def login_user(email, password):
|
||||||
|
"""Authenticate user with email and password."""
|
||||||
|
user = find_user_by_email(email)
|
||||||
|
if user and check_password(user, password):
|
||||||
|
create_session(user)
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🧠 **Embeddings** - Code "fingerprints"
|
||||||
|
**What it is:** A way to convert your code into numbers that computers can compare
|
||||||
|
|
||||||
|
**Simple explanation:** Think of embeddings like DNA fingerprints for your code. Similar code gets similar fingerprints. The computer can then find code with similar "fingerprints" to what you're searching for.
|
||||||
|
|
||||||
|
**The magic:** Code that does similar things gets similar embeddings, even if the exact words are different:
|
||||||
|
- `login_user()` and `authenticate()` would have similar embeddings
|
||||||
|
- `calculate_tax()` and `login_user()` would have very different embeddings
|
||||||
|
|
||||||
|
**You don't need to understand the technical details** - just know that embeddings help find semantically similar code, not just exact word matches.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🔍 **Vector Search** vs **Keyword Search**
|
||||||
|
**Keyword search (like Google):** Finds exact word matches
|
||||||
|
- Search "login" → finds code with the word "login"
|
||||||
|
- Misses: authentication, signin, user_auth
|
||||||
|
|
||||||
|
**Vector search (the RAG way):** Finds similar *meaning*
|
||||||
|
- Search "login" → finds login, authentication, signin, user validation
|
||||||
|
- Uses those embedding "fingerprints" to find similar concepts
|
||||||
|
|
||||||
|
**FSS-Mini-RAG uses both** for the best results!
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 📊 **Similarity Score** - How relevant is this result?
|
||||||
|
**What it is:** A number from 0.0 to 1.0 showing how closely your search matches the result
|
||||||
|
|
||||||
|
**Simple explanation:**
|
||||||
|
- 1.0 = Perfect match (very rare)
|
||||||
|
- 0.8+ = Excellent match
|
||||||
|
- 0.5+ = Good match
|
||||||
|
- 0.3+ = Somewhat relevant
|
||||||
|
- 0.1+ = Might be useful
|
||||||
|
- Below 0.1 = Probably not what you want
|
||||||
|
|
||||||
|
**In practice:** Most useful results are between 0.2-0.8
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🎯 **BM25** - The keyword search boost
|
||||||
|
**What it is:** A fancy algorithm that finds exact word matches (like Google search)
|
||||||
|
|
||||||
|
**Simple explanation:** While embeddings find *similar meaning*, BM25 finds *exact words*. Using both together gives you the best of both worlds.
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
- You search for "password validation"
|
||||||
|
- Embeddings find: authentication functions, login methods, user security
|
||||||
|
- BM25 finds: code with the exact words "password" and "validation"
|
||||||
|
- Combined = comprehensive results
|
||||||
|
|
||||||
|
**Keep it enabled** unless you're getting too many irrelevant results.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🔄 **Query Expansion** - Making your search smarter
|
||||||
|
**What it is:** Automatically adding related terms to your search
|
||||||
|
|
||||||
|
**Simple explanation:** When you search for "auth", the system automatically expands it to "auth authentication login signin user validate".
|
||||||
|
|
||||||
|
**Pros:** Much better, more comprehensive results
|
||||||
|
**Cons:** Slower search, sometimes too broad
|
||||||
|
|
||||||
|
**When to use:**
|
||||||
|
- Turn ON for: Complex searches, learning new codebases
|
||||||
|
- Turn OFF for: Quick lookups, very specific searches
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🤖 **LLM** - Large Language Model (The AI Brain)
|
||||||
|
**What it is:** The AI that reads your search results and explains them in plain English
|
||||||
|
|
||||||
|
**Simple explanation:** After finding relevant code chunks, the LLM reads them like a human would and gives you a summary like: "This code handles user registration by validating email format, checking for existing users, hashing passwords, and saving to database."
|
||||||
|
|
||||||
|
**Models you might see:**
|
||||||
|
- **qwen3:0.6b** - Ultra-fast, good for most questions
|
||||||
|
- **llama3.2** - Slower but more detailed
|
||||||
|
- **auto** - Picks the best available model
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🧮 **Synthesis** vs **Exploration** - Two ways to get answers
|
||||||
|
|
||||||
|
### 🚀 **Synthesis Mode** (Fast & Consistent)
|
||||||
|
**What it does:** Quick, factual answers about your code
|
||||||
|
**Best for:** "What does this function do?" "Where is authentication handled?" "How does the database connection work?"
|
||||||
|
**Speed:** Very fast (no "thinking" overhead)
|
||||||
|
|
||||||
|
### 🧠 **Exploration Mode** (Deep & Interactive)
|
||||||
|
**What it does:** Detailed analysis with reasoning, remembers conversation
|
||||||
|
**Best for:** "Why is this function slow?" "What are the security issues here?" "How would I add a new feature?"
|
||||||
|
**Features:** Shows its reasoning process, you can ask follow-up questions
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## ⚡ **Streaming** - Handling huge files without crashing
|
||||||
|
**What it is:** Processing large files in smaller batches instead of all at once
|
||||||
|
|
||||||
|
**Simple explanation:** Imagine trying to eat an entire cake at once vs. eating it slice by slice. Streaming is like eating slice by slice - your computer won't choke on huge files.
|
||||||
|
|
||||||
|
**When it kicks in:** Files larger than 1MB (that's about 25,000 lines of code)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🏷️ **Semantic** vs **Fixed** Chunking
|
||||||
|
**Semantic chunking (RECOMMENDED):** Smart splitting that respects code structure
|
||||||
|
- Keeps functions together
|
||||||
|
- Keeps classes together
|
||||||
|
- Respects natural code boundaries
|
||||||
|
|
||||||
|
**Fixed chunking:** Simple splitting that just cuts at size limits
|
||||||
|
- Faster processing
|
||||||
|
- Might cut functions in half
|
||||||
|
- Less intelligent but more predictable
|
||||||
|
|
||||||
|
**For beginners:** Always use semantic chunking unless you have a specific reason not to.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## ❓ **Common Questions**
|
||||||
|
|
||||||
|
**Q: Do I need to understand embeddings to use this?**
|
||||||
|
A: Nope! Just know they help find similar code. The system handles all the technical details.
|
||||||
|
|
||||||
|
**Q: What's a good similarity threshold for beginners?**
|
||||||
|
A: Start with 0.1. If you get too many results, try 0.2. If you get too few, try 0.05.
|
||||||
|
|
||||||
|
**Q: Should I enable query expansion?**
|
||||||
|
A: For learning new codebases: YES. For quick specific searches: NO. The TUI enables it automatically when helpful.
|
||||||
|
|
||||||
|
**Q: Which embedding method should I choose?**
|
||||||
|
A: Use "auto" - it tries the best option and falls back gracefully if needed.
|
||||||
|
|
||||||
|
**Q: What if I don't have Ollama installed?**
|
||||||
|
A: No problem! The system will automatically fall back to other methods that work without any additional software.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🚀 **Quick Start Recommendations**
|
||||||
|
|
||||||
|
**For absolute beginners:**
|
||||||
|
1. Keep all default settings
|
||||||
|
2. Use the TUI interface to start
|
||||||
|
3. Try simple searches like "user login" or "database connection"
|
||||||
|
4. Gradually try the CLI commands as you get comfortable
|
||||||
|
|
||||||
|
**For faster results:**
|
||||||
|
- Set `similarity_threshold: 0.2`
|
||||||
|
- Set `expand_queries: false`
|
||||||
|
- Use synthesis mode instead of exploration
|
||||||
|
|
||||||
|
**For learning new codebases:**
|
||||||
|
- Set `expand_queries: true`
|
||||||
|
- Use exploration mode
|
||||||
|
- Ask "why" and "how" questions
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Remember:** This is a learning tool! Don't be afraid to experiment with settings and see what works best for your projects. The beauty of FSS-Mini-RAG is that it's designed to be beginner-friendly while still being powerful.
|
||||||
460
docs/TROUBLESHOOTING.md
Normal file
460
docs/TROUBLESHOOTING.md
Normal file
@ -0,0 +1,460 @@
|
|||||||
|
# 🛠️ Troubleshooting Guide - Common Issues & Solutions
|
||||||
|
|
||||||
|
*Having problems? You're not alone! Here are solutions to the most common issues beginners encounter.*
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🚀 Installation & Setup Issues
|
||||||
|
|
||||||
|
### ❌ "Command not found: ollama"
|
||||||
|
**Problem:** The system can't find Ollama
|
||||||
|
**Solution:**
|
||||||
|
```bash
|
||||||
|
# Install Ollama
|
||||||
|
curl -fsSL https://ollama.ai/install.sh | sh
|
||||||
|
# Or on Mac: brew install ollama
|
||||||
|
# Start Ollama
|
||||||
|
ollama serve
|
||||||
|
```
|
||||||
|
**Alternative:** Use the system without Ollama - it will automatically fall back to other embedding methods.
|
||||||
|
|
||||||
|
### ❌ "Permission denied" when running scripts
|
||||||
|
**Problem:** Script files aren't executable
|
||||||
|
**Solution:**
|
||||||
|
```bash
|
||||||
|
chmod +x rag-mini.py rag-tui.py install_mini_rag.sh
|
||||||
|
# Or run with python directly:
|
||||||
|
python3 rag-mini.py --help
|
||||||
|
```
|
||||||
|
|
||||||
|
### ❌ "Module not found" or import errors
|
||||||
|
**Problem:** Python dependencies not installed
|
||||||
|
**Solution:**
|
||||||
|
```bash
|
||||||
|
# Install dependencies
|
||||||
|
pip3 install -r requirements.txt
|
||||||
|
# If that fails, try:
|
||||||
|
pip3 install --user -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
### ❌ Installation script fails
|
||||||
|
**Problem:** `./install_mini_rag.sh` doesn't work
|
||||||
|
**Solution:**
|
||||||
|
```bash
|
||||||
|
# Make it executable first
|
||||||
|
chmod +x install_mini_rag.sh
|
||||||
|
# Then run
|
||||||
|
./install_mini_rag.sh
|
||||||
|
# Or install manually:
|
||||||
|
pip3 install -r requirements.txt
|
||||||
|
python3 -c "import claude_rag; print('✅ Installation successful')"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🔍 Search & Results Issues
|
||||||
|
|
||||||
|
### ❌ "No results found" for everything
|
||||||
|
**Problem:** Search isn't finding anything
|
||||||
|
**Diagnosis & Solutions:**
|
||||||
|
|
||||||
|
1. **Check if project is indexed:**
|
||||||
|
```bash
|
||||||
|
./rag-mini status /path/to/project
|
||||||
|
# If not indexed:
|
||||||
|
./rag-mini index /path/to/project
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Lower similarity threshold:**
|
||||||
|
- Edit config file, change `similarity_threshold: 0.05`
|
||||||
|
- Or try: `./rag-mini search /path/to/project "query" --threshold 0.05`
|
||||||
|
|
||||||
|
3. **Try broader search terms:**
|
||||||
|
- Instead of: "getUserById"
|
||||||
|
- Try: "user function" or "get user"
|
||||||
|
|
||||||
|
4. **Enable query expansion:**
|
||||||
|
- Edit config: `expand_queries: true`
|
||||||
|
- Or use TUI which enables it automatically
|
||||||
|
|
||||||
|
### ❌ Search results are irrelevant/weird
|
||||||
|
**Problem:** Getting results that don't match your search
|
||||||
|
**Solutions:**
|
||||||
|
|
||||||
|
1. **Increase similarity threshold:**
|
||||||
|
```yaml
|
||||||
|
search:
|
||||||
|
similarity_threshold: 0.3 # Higher = more picky
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Use more specific terms:**
|
||||||
|
- Instead of: "function"
|
||||||
|
- Try: "login function" or "authentication method"
|
||||||
|
|
||||||
|
3. **Check BM25 setting:**
|
||||||
|
```yaml
|
||||||
|
search:
|
||||||
|
enable_bm25: true # Helps find exact word matches
|
||||||
|
```
|
||||||
|
|
||||||
|
### ❌ Search is too slow
|
||||||
|
**Problem:** Takes too long to get results
|
||||||
|
**Solutions:**
|
||||||
|
|
||||||
|
1. **Disable query expansion:**
|
||||||
|
```yaml
|
||||||
|
search:
|
||||||
|
expand_queries: false
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Reduce result limit:**
|
||||||
|
```yaml
|
||||||
|
search:
|
||||||
|
default_limit: 5 # Instead of 10
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Use faster embedding method:**
|
||||||
|
```yaml
|
||||||
|
embedding:
|
||||||
|
preferred_method: hash # Fastest but lower quality
|
||||||
|
```
|
||||||
|
|
||||||
|
4. **Smaller batch size:**
|
||||||
|
```yaml
|
||||||
|
embedding:
|
||||||
|
batch_size: 16 # Instead of 32
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🤖 AI/LLM Issues
|
||||||
|
|
||||||
|
### ❌ "LLM synthesis unavailable"
|
||||||
|
**Problem:** AI explanations aren't working
|
||||||
|
**Solutions:**
|
||||||
|
|
||||||
|
1. **Check Ollama is running:**
|
||||||
|
```bash
|
||||||
|
# In one terminal:
|
||||||
|
ollama serve
|
||||||
|
# In another:
|
||||||
|
ollama list # Should show installed models
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Install a model:**
|
||||||
|
```bash
|
||||||
|
ollama pull qwen3:0.6b # Fast, small model
|
||||||
|
# Or: ollama pull llama3.2 # Larger but better
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Test connection:**
|
||||||
|
```bash
|
||||||
|
curl http://localhost:11434/api/tags
|
||||||
|
# Should return JSON with model list
|
||||||
|
```
|
||||||
|
|
||||||
|
### ❌ AI gives weird/wrong answers
|
||||||
|
**Problem:** LLM responses don't make sense
|
||||||
|
**Solutions:**
|
||||||
|
|
||||||
|
1. **Lower temperature:**
|
||||||
|
```yaml
|
||||||
|
llm:
|
||||||
|
synthesis_temperature: 0.1 # More factual, less creative
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Try different model:**
|
||||||
|
```bash
|
||||||
|
ollama pull qwen3:1.7b # Good balance of speed/quality
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Use synthesis mode instead of exploration:**
|
||||||
|
```bash
|
||||||
|
./rag-mini search /path "query" --synthesize
|
||||||
|
# Instead of: ./rag-mini explore /path
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 💾 Memory & Performance Issues
|
||||||
|
|
||||||
|
### ❌ "Out of memory" or computer freezes during indexing
|
||||||
|
**Problem:** System runs out of RAM
|
||||||
|
**Solutions:**
|
||||||
|
|
||||||
|
1. **Reduce batch size:**
|
||||||
|
```yaml
|
||||||
|
embedding:
|
||||||
|
batch_size: 8 # Much smaller batches
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Lower streaming threshold:**
|
||||||
|
```yaml
|
||||||
|
streaming:
|
||||||
|
threshold_bytes: 512000 # 512KB instead of 1MB
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Index smaller projects first:**
|
||||||
|
```bash
|
||||||
|
# Exclude large directories
|
||||||
|
./rag-mini index /path/to/project --exclude "node_modules/**,dist/**"
|
||||||
|
```
|
||||||
|
|
||||||
|
4. **Use hash embeddings:**
|
||||||
|
```yaml
|
||||||
|
embedding:
|
||||||
|
preferred_method: hash # Much less memory
|
||||||
|
```
|
||||||
|
|
||||||
|
### ❌ Indexing is extremely slow
|
||||||
|
**Problem:** Taking forever to index project
|
||||||
|
**Solutions:**
|
||||||
|
|
||||||
|
1. **Exclude unnecessary files:**
|
||||||
|
```yaml
|
||||||
|
files:
|
||||||
|
exclude_patterns:
|
||||||
|
- "node_modules/**"
|
||||||
|
- ".git/**"
|
||||||
|
- "*.log"
|
||||||
|
- "build/**"
|
||||||
|
- "*.min.js" # Minified files
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Increase minimum file size:**
|
||||||
|
```yaml
|
||||||
|
files:
|
||||||
|
min_file_size: 200 # Skip tiny files
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Use simpler chunking:**
|
||||||
|
```yaml
|
||||||
|
chunking:
|
||||||
|
strategy: fixed # Faster than semantic
|
||||||
|
```
|
||||||
|
|
||||||
|
4. **More workers (if you have good CPU):**
|
||||||
|
```bash
|
||||||
|
./rag-mini index /path/to/project --workers 8
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## ⚙️ Configuration Issues
|
||||||
|
|
||||||
|
### ❌ "Invalid configuration" errors
|
||||||
|
**Problem:** Config file has errors
|
||||||
|
**Solutions:**
|
||||||
|
|
||||||
|
1. **Check YAML syntax:**
|
||||||
|
```bash
|
||||||
|
python3 -c "import yaml; yaml.safe_load(open('config.yaml'))"
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Copy from working example:**
|
||||||
|
```bash
|
||||||
|
cp examples/config.yaml .claude-rag/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Reset to defaults:**
|
||||||
|
```bash
|
||||||
|
rm .claude-rag/config.yaml
|
||||||
|
# System will recreate with defaults
|
||||||
|
```
|
||||||
|
|
||||||
|
### ❌ Changes to config aren't taking effect
|
||||||
|
**Problem:** Modified settings don't work
|
||||||
|
**Solutions:**
|
||||||
|
|
||||||
|
1. **Restart TUI/CLI:**
|
||||||
|
- Configuration is loaded at startup
|
||||||
|
- Exit and restart the interface
|
||||||
|
|
||||||
|
2. **Check config location:**
|
||||||
|
```bash
|
||||||
|
# Project-specific config:
|
||||||
|
/path/to/project/.claude-rag/config.yaml
|
||||||
|
# Global config:
|
||||||
|
~/.claude-rag/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Force re-index after config changes:**
|
||||||
|
```bash
|
||||||
|
./rag-mini index /path/to/project --force
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🖥️ Interface Issues
|
||||||
|
|
||||||
|
### ❌ TUI looks broken/garbled
|
||||||
|
**Problem:** Text interface isn't displaying correctly
|
||||||
|
**Solutions:**
|
||||||
|
|
||||||
|
1. **Try different terminal:**
|
||||||
|
```bash
|
||||||
|
# Instead of basic terminal, try:
|
||||||
|
# - iTerm2 (Mac)
|
||||||
|
# - Windows Terminal (Windows)
|
||||||
|
# - GNOME Terminal (Linux)
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Use CLI directly:**
|
||||||
|
```bash
|
||||||
|
./rag-mini --help # Skip TUI entirely
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Check terminal size:**
|
||||||
|
```bash
|
||||||
|
# Make terminal window larger (TUI needs space)
|
||||||
|
# At least 80x24 characters
|
||||||
|
```
|
||||||
|
|
||||||
|
### ❌ "Keyboard interrupt" or TUI crashes
|
||||||
|
**Problem:** Interface stops responding
|
||||||
|
**Solutions:**
|
||||||
|
|
||||||
|
1. **Use Ctrl+C to exit cleanly:**
|
||||||
|
- Don't force-quit if possible
|
||||||
|
|
||||||
|
2. **Check for conflicting processes:**
|
||||||
|
```bash
|
||||||
|
ps aux | grep rag-tui
|
||||||
|
# Kill any stuck processes
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Use CLI as fallback:**
|
||||||
|
```bash
|
||||||
|
./rag-mini search /path/to/project "your query"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 📁 File & Path Issues
|
||||||
|
|
||||||
|
### ❌ "Project not found" or "Permission denied"
|
||||||
|
**Problem:** Can't access project directory
|
||||||
|
**Solutions:**
|
||||||
|
|
||||||
|
1. **Check path exists:**
|
||||||
|
```bash
|
||||||
|
ls -la /path/to/project
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Check permissions:**
|
||||||
|
```bash
|
||||||
|
# Make sure you can read the directory
|
||||||
|
chmod -R +r /path/to/project
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Use absolute paths:**
|
||||||
|
```bash
|
||||||
|
# Instead of: ./rag-mini index ../my-project
|
||||||
|
# Use: ./rag-mini index /full/path/to/my-project
|
||||||
|
```
|
||||||
|
|
||||||
|
### ❌ "No files found to index"
|
||||||
|
**Problem:** System doesn't see any files
|
||||||
|
**Solutions:**
|
||||||
|
|
||||||
|
1. **Check include patterns:**
|
||||||
|
```yaml
|
||||||
|
files:
|
||||||
|
include_patterns:
|
||||||
|
- "**/*.py" # Only Python files
|
||||||
|
- "**/*.js" # Add JavaScript
|
||||||
|
- "**/*.md" # Add Markdown
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Check exclude patterns:**
|
||||||
|
```yaml
|
||||||
|
files:
|
||||||
|
exclude_patterns: [] # Remove all exclusions temporarily
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Lower minimum file size:**
|
||||||
|
```yaml
|
||||||
|
files:
|
||||||
|
min_file_size: 10 # Instead of 50
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🔍 Quick Diagnostic Commands
|
||||||
|
|
||||||
|
**Check system status:**
|
||||||
|
```bash
|
||||||
|
./rag-mini status /path/to/project
|
||||||
|
```
|
||||||
|
|
||||||
|
**Test embeddings:**
|
||||||
|
```bash
|
||||||
|
python3 -c "from claude_rag.ollama_embeddings import OllamaEmbedder; e=OllamaEmbedder(); print(e.get_embedding_info())"
|
||||||
|
```
|
||||||
|
|
||||||
|
**Verify installation:**
|
||||||
|
```bash
|
||||||
|
python3 -c "import claude_rag; print('✅ RAG system installed')"
|
||||||
|
```
|
||||||
|
|
||||||
|
**Test Ollama connection:**
|
||||||
|
```bash
|
||||||
|
curl -s http://localhost:11434/api/tags | python3 -m json.tool
|
||||||
|
```
|
||||||
|
|
||||||
|
**Check disk space:**
|
||||||
|
```bash
|
||||||
|
df -h .claude-rag/ # Make sure you have space for index
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🆘 When All Else Fails
|
||||||
|
|
||||||
|
1. **Start fresh:**
|
||||||
|
```bash
|
||||||
|
rm -rf .claude-rag/
|
||||||
|
./rag-mini index /path/to/project
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Use minimal config:**
|
||||||
|
```yaml
|
||||||
|
# Simplest possible config:
|
||||||
|
chunking:
|
||||||
|
strategy: fixed
|
||||||
|
embedding:
|
||||||
|
preferred_method: auto
|
||||||
|
search:
|
||||||
|
expand_queries: false
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Try a tiny test project:**
|
||||||
|
```bash
|
||||||
|
mkdir test-project
|
||||||
|
echo "def hello(): print('world')" > test-project/test.py
|
||||||
|
./rag-mini index test-project
|
||||||
|
./rag-mini search test-project "hello function"
|
||||||
|
```
|
||||||
|
|
||||||
|
4. **Get help:**
|
||||||
|
- Check the main README.md
|
||||||
|
- Look at examples/ directory
|
||||||
|
- Try the basic_usage.py example
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 💡 Prevention Tips
|
||||||
|
|
||||||
|
**For beginners:**
|
||||||
|
- Start with default settings
|
||||||
|
- Use the TUI interface first
|
||||||
|
- Test with small projects initially
|
||||||
|
- Keep Ollama running in background
|
||||||
|
|
||||||
|
**For better results:**
|
||||||
|
- Be specific in search queries
|
||||||
|
- Use the glossary to understand terms
|
||||||
|
- Experiment with config settings on test projects first
|
||||||
|
- Use synthesis mode for quick answers, exploration for learning
|
||||||
|
|
||||||
|
**Remember:** This is a learning tool! Don't be afraid to experiment and try different settings. The worst thing that can happen is you delete the `.claude-rag` directory and start over. 🚀
|
||||||
72
examples/config-beginner.yaml
Normal file
72
examples/config-beginner.yaml
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
# 🚀 BEGINNER CONFIG - Simple & Reliable
|
||||||
|
# Perfect for newcomers who want everything to "just work"
|
||||||
|
# Copy this to your project: cp examples/config-beginner.yaml /path/to/project/.claude-rag/config.yaml
|
||||||
|
|
||||||
|
#═══════════════════════════════════════════════════════════════════════
|
||||||
|
# ✨ BEGINNER-FRIENDLY SETTINGS - No overwhelming options!
|
||||||
|
#═══════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
# 📝 How to split your code files (keep it simple)
|
||||||
|
chunking:
|
||||||
|
max_size: 2000 # Good size for most code (about 50 lines)
|
||||||
|
min_size: 150 # Skip tiny fragments
|
||||||
|
strategy: semantic # Smart splitting (respects functions/classes)
|
||||||
|
|
||||||
|
# 🌊 Handle large files without crashing
|
||||||
|
streaming:
|
||||||
|
enabled: true # Always keep this on
|
||||||
|
threshold_bytes: 1048576 # 1MB - good for most computers
|
||||||
|
|
||||||
|
# 📁 Which files to include
|
||||||
|
files:
|
||||||
|
min_file_size: 50 # Skip empty/tiny files
|
||||||
|
|
||||||
|
# 🚫 Skip these folders (saves time and storage)
|
||||||
|
exclude_patterns:
|
||||||
|
- "node_modules/**" # JavaScript packages
|
||||||
|
- ".git/**" # Git history
|
||||||
|
- "__pycache__/**" # Python cache
|
||||||
|
- "*.pyc" # Python bytecode
|
||||||
|
- ".venv/**" # Python virtual environments
|
||||||
|
- "build/**" # Build artifacts
|
||||||
|
- "dist/**" # Distribution files
|
||||||
|
|
||||||
|
include_patterns:
|
||||||
|
- "**/*" # Everything else
|
||||||
|
|
||||||
|
# 🧠 Embeddings (the "AI fingerprints" of your code)
|
||||||
|
embedding:
|
||||||
|
preferred_method: auto # Try best method, fall back if needed - SAFEST
|
||||||
|
batch_size: 32 # Good balance of speed and memory usage
|
||||||
|
|
||||||
|
# 🔍 Search behavior
|
||||||
|
search:
|
||||||
|
default_limit: 10 # Show 10 results (good starting point)
|
||||||
|
enable_bm25: true # Find exact word matches too
|
||||||
|
similarity_threshold: 0.1 # Pretty permissive (shows more results)
|
||||||
|
expand_queries: false # Keep it simple for now
|
||||||
|
|
||||||
|
# 🤖 AI explanations (optional but helpful)
|
||||||
|
llm:
|
||||||
|
synthesis_model: auto # Pick best available model
|
||||||
|
enable_synthesis: false # Turn on manually with --synthesize
|
||||||
|
synthesis_temperature: 0.3 # Factual answers
|
||||||
|
cpu_optimized: true # Good for computers without fancy graphics cards
|
||||||
|
enable_thinking: true # Shows reasoning (great for learning!)
|
||||||
|
max_expansion_terms: 6 # Keep expansions focused
|
||||||
|
|
||||||
|
#═══════════════════════════════════════════════════════════════════════
|
||||||
|
# 🎯 WHAT THIS CONFIG DOES:
|
||||||
|
#
|
||||||
|
# ✅ Works reliably across different systems
|
||||||
|
# ✅ Good performance on modest hardware
|
||||||
|
# ✅ Balanced search results (not too few, not too many)
|
||||||
|
# ✅ Safe defaults that won't crash your computer
|
||||||
|
# ✅ AI features available but not overwhelming
|
||||||
|
#
|
||||||
|
# 🚀 TO GET STARTED:
|
||||||
|
# 1. Copy this file to your project: .claude-rag/config.yaml
|
||||||
|
# 2. Index your project: ./rag-mini index /path/to/project
|
||||||
|
# 3. Search: ./rag-mini search /path/to/project "your query"
|
||||||
|
# 4. Try AI: ./rag-mini search /path/to/project "your query" --synthesize
|
||||||
|
#═══════════════════════════════════════════════════════════════════════
|
||||||
105
examples/config-fast.yaml
Normal file
105
examples/config-fast.yaml
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
# ⚡ FAST CONFIG - Maximum Speed
|
||||||
|
# When you need quick results and don't mind slightly lower quality
|
||||||
|
# Perfect for: large projects, frequent searches, older computers
|
||||||
|
|
||||||
|
#═══════════════════════════════════════════════════════════════════════
|
||||||
|
# 🚀 SPEED-OPTIMIZED SETTINGS - Everything tuned for performance!
|
||||||
|
#═══════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
# 📝 Chunking optimized for speed
|
||||||
|
chunking:
|
||||||
|
max_size: 1500 # Smaller chunks = faster processing
|
||||||
|
min_size: 100 # More aggressive minimum
|
||||||
|
strategy: fixed # Simple splitting (faster than semantic)
|
||||||
|
|
||||||
|
# 🌊 More aggressive streaming for memory efficiency
|
||||||
|
streaming:
|
||||||
|
enabled: true
|
||||||
|
threshold_bytes: 512000 # 512KB - process big files in smaller chunks
|
||||||
|
|
||||||
|
# 📁 File filtering optimized for speed
|
||||||
|
files:
|
||||||
|
min_file_size: 100 # Skip more tiny files
|
||||||
|
|
||||||
|
# 🚫 Aggressive exclusions for speed
|
||||||
|
exclude_patterns:
|
||||||
|
- "node_modules/**"
|
||||||
|
- ".git/**"
|
||||||
|
- "__pycache__/**"
|
||||||
|
- "*.pyc"
|
||||||
|
- ".venv/**"
|
||||||
|
- "venv/**"
|
||||||
|
- "build/**"
|
||||||
|
- "dist/**"
|
||||||
|
- "*.min.js" # Skip minified files
|
||||||
|
- "*.min.css" # Skip minified CSS
|
||||||
|
- "*.log" # Skip log files
|
||||||
|
- "*.tmp" # Skip temp files
|
||||||
|
- "target/**" # Rust/Java build dirs
|
||||||
|
- ".next/**" # Next.js build dir
|
||||||
|
- ".nuxt/**" # Nuxt build dir
|
||||||
|
|
||||||
|
include_patterns:
|
||||||
|
- "**/*.py" # Focus on common code files only
|
||||||
|
- "**/*.js"
|
||||||
|
- "**/*.ts"
|
||||||
|
- "**/*.jsx"
|
||||||
|
- "**/*.tsx"
|
||||||
|
- "**/*.java"
|
||||||
|
- "**/*.cpp"
|
||||||
|
- "**/*.c"
|
||||||
|
- "**/*.h"
|
||||||
|
- "**/*.rs"
|
||||||
|
- "**/*.go"
|
||||||
|
- "**/*.php"
|
||||||
|
- "**/*.rb"
|
||||||
|
- "**/*.md"
|
||||||
|
|
||||||
|
# 🧠 Fastest embedding method
|
||||||
|
embedding:
|
||||||
|
preferred_method: hash # Instant embeddings (lower quality but very fast)
|
||||||
|
batch_size: 64 # Larger batches for efficiency
|
||||||
|
|
||||||
|
# 🔍 Search optimized for speed
|
||||||
|
search:
|
||||||
|
default_limit: 5 # Fewer results = faster display
|
||||||
|
enable_bm25: false # Skip keyword matching for speed
|
||||||
|
similarity_threshold: 0.2 # Higher threshold = fewer results to process
|
||||||
|
expand_queries: false # No query expansion (much faster)
|
||||||
|
|
||||||
|
# 🤖 Minimal AI for speed
|
||||||
|
llm:
|
||||||
|
synthesis_model: qwen3:0.6b # Smallest/fastest model
|
||||||
|
enable_synthesis: false # Only use when explicitly requested
|
||||||
|
synthesis_temperature: 0.1 # Fast, factual responses
|
||||||
|
cpu_optimized: true # Use lightweight models
|
||||||
|
enable_thinking: false # Skip thinking process for speed
|
||||||
|
max_expansion_terms: 4 # Shorter expansions
|
||||||
|
|
||||||
|
#═══════════════════════════════════════════════════════════════════════
|
||||||
|
# ⚡ WHAT THIS CONFIG PRIORITIZES:
|
||||||
|
#
|
||||||
|
# 🚀 Indexing speed - get up and running quickly
|
||||||
|
# 🚀 Search speed - results in milliseconds
|
||||||
|
# 🚀 Memory efficiency - won't slow down your computer
|
||||||
|
# 🚀 CPU efficiency - good for older/slower machines
|
||||||
|
# 🚀 Storage efficiency - smaller index files
|
||||||
|
#
|
||||||
|
# ⚖️ TRADE-OFFS:
|
||||||
|
# ⚠️ Lower search quality (might miss some relevant results)
|
||||||
|
# ⚠️ Less context in results (smaller chunks)
|
||||||
|
# ⚠️ No query expansion (might need more specific search terms)
|
||||||
|
# ⚠️ Basic embeddings (hash-based, not semantic)
|
||||||
|
#
|
||||||
|
# 🎯 PERFECT FOR:
|
||||||
|
# • Large codebases (>10k files)
|
||||||
|
# • Older computers with limited resources
|
||||||
|
# • When you know exactly what you're looking for
|
||||||
|
# • Frequent, quick lookups
|
||||||
|
# • CI/CD environments where speed matters
|
||||||
|
#
|
||||||
|
# 🚀 TO USE THIS CONFIG:
|
||||||
|
# 1. Copy to project: cp examples/config-fast.yaml .claude-rag/config.yaml
|
||||||
|
# 2. Index: ./rag-mini index /path/to/project
|
||||||
|
# 3. Enjoy lightning-fast searches! ⚡
|
||||||
|
#═══════════════════════════════════════════════════════════════════════
|
||||||
111
examples/config-quality.yaml
Normal file
111
examples/config-quality.yaml
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
# 💎 QUALITY CONFIG - Best Possible Results
|
||||||
|
# When you want the highest quality search and AI responses
|
||||||
|
# Perfect for: learning new codebases, research, complex analysis
|
||||||
|
|
||||||
|
#═══════════════════════════════════════════════════════════════════════
|
||||||
|
# 🎯 QUALITY-OPTIMIZED SETTINGS - Everything tuned for best results!
|
||||||
|
#═══════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
# 📝 Chunking for maximum context and quality
|
||||||
|
chunking:
|
||||||
|
max_size: 3000 # Larger chunks = more context per result
|
||||||
|
min_size: 200 # Ensure substantial content per chunk
|
||||||
|
strategy: semantic # Smart splitting that respects code structure
|
||||||
|
|
||||||
|
# 🌊 Conservative streaming (favor quality over speed)
|
||||||
|
streaming:
|
||||||
|
enabled: true
|
||||||
|
threshold_bytes: 2097152 # 2MB - less aggressive chunking
|
||||||
|
|
||||||
|
# 📁 Comprehensive file inclusion
|
||||||
|
files:
|
||||||
|
min_file_size: 20 # Include even small files (might contain important info)
|
||||||
|
|
||||||
|
# 🎯 Minimal exclusions (include more content)
|
||||||
|
exclude_patterns:
|
||||||
|
- "node_modules/**" # Still skip these (too much noise)
|
||||||
|
- ".git/**" # Git history not useful for code search
|
||||||
|
- "__pycache__/**" # Python bytecode
|
||||||
|
- "*.pyc"
|
||||||
|
- ".venv/**"
|
||||||
|
- "build/**" # Compiled artifacts
|
||||||
|
- "dist/**"
|
||||||
|
# Note: We keep logs, docs, configs that might have useful context
|
||||||
|
|
||||||
|
include_patterns:
|
||||||
|
- "**/*" # Include everything not explicitly excluded
|
||||||
|
|
||||||
|
# 🧠 Best embedding quality
|
||||||
|
embedding:
|
||||||
|
preferred_method: ollama # Highest quality embeddings (needs Ollama)
|
||||||
|
ollama_model: nomic-embed-text # Excellent code understanding
|
||||||
|
ml_model: sentence-transformers/all-MiniLM-L6-v2 # Good fallback
|
||||||
|
batch_size: 16 # Smaller batches for stability
|
||||||
|
|
||||||
|
# 🔍 Search optimized for comprehensive results
|
||||||
|
search:
|
||||||
|
default_limit: 15 # More results to choose from
|
||||||
|
enable_bm25: true # Use both semantic and keyword matching
|
||||||
|
similarity_threshold: 0.05 # Very permissive (show more possibilities)
|
||||||
|
expand_queries: true # Automatic query expansion for better recall
|
||||||
|
|
||||||
|
# 🤖 High-quality AI analysis
|
||||||
|
llm:
|
||||||
|
synthesis_model: auto # Use best available model
|
||||||
|
enable_synthesis: true # AI explanations by default
|
||||||
|
synthesis_temperature: 0.4 # Good balance of accuracy and insight
|
||||||
|
cpu_optimized: false # Use powerful models if available
|
||||||
|
enable_thinking: true # Show detailed reasoning process
|
||||||
|
max_expansion_terms: 10 # Comprehensive query expansion
|
||||||
|
|
||||||
|
#═══════════════════════════════════════════════════════════════════════
|
||||||
|
# 💎 WHAT THIS CONFIG MAXIMIZES:
|
||||||
|
#
|
||||||
|
# 🎯 Search comprehensiveness - find everything relevant
|
||||||
|
# 🎯 Result context - larger chunks with more information
|
||||||
|
# 🎯 AI explanation quality - detailed, thoughtful analysis
|
||||||
|
# 🎯 Query understanding - automatic expansion and enhancement
|
||||||
|
# 🎯 Semantic accuracy - best embedding models available
|
||||||
|
#
|
||||||
|
# ⚖️ TRADE-OFFS:
|
||||||
|
# ⏳ Slower indexing (larger chunks, better embeddings)
|
||||||
|
# ⏳ Slower searching (query expansion, more results)
|
||||||
|
# 💾 More storage space (larger index, more files included)
|
||||||
|
# 🧠 More memory usage (larger batches, bigger models)
|
||||||
|
# ⚡ Higher CPU/GPU usage (better models)
|
||||||
|
#
|
||||||
|
# 🎯 PERFECT FOR:
|
||||||
|
# • Learning new, complex codebases
|
||||||
|
# • Research and analysis tasks
|
||||||
|
# • When you need to understand WHY code works a certain way
|
||||||
|
# • Finding subtle connections and patterns
|
||||||
|
# • Code review and security analysis
|
||||||
|
# • Academic or professional research
|
||||||
|
#
|
||||||
|
# 💻 REQUIREMENTS:
|
||||||
|
# • Ollama installed and running (ollama serve)
|
||||||
|
# • At least one language model (ollama pull qwen3:1.7b)
|
||||||
|
# • Decent computer specs (4GB+ RAM recommended)
|
||||||
|
# • Patience for thorough analysis 😊
|
||||||
|
#
|
||||||
|
# 🚀 TO USE THIS CONFIG:
|
||||||
|
# 1. Install Ollama: curl -fsSL https://ollama.ai/install.sh | sh
|
||||||
|
# 2. Start Ollama: ollama serve
|
||||||
|
# 3. Install a model: ollama pull qwen3:1.7b
|
||||||
|
# 4. Copy config: cp examples/config-quality.yaml .claude-rag/config.yaml
|
||||||
|
# 5. Index project: ./rag-mini index /path/to/project
|
||||||
|
# 6. Enjoy comprehensive analysis: ./rag-mini explore /path/to/project
|
||||||
|
#═══════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
# 🧪 ADVANCED QUALITY TUNING (optional):
|
||||||
|
#
|
||||||
|
# For even better results, try these model combinations:
|
||||||
|
# • ollama pull nomic-embed-text:latest (best embeddings)
|
||||||
|
# • ollama pull qwen3:1.7b (good general model)
|
||||||
|
# • ollama pull llama3.2 (excellent for analysis)
|
||||||
|
#
|
||||||
|
# Or adjust these settings for your specific needs:
|
||||||
|
# • similarity_threshold: 0.3 (more selective results)
|
||||||
|
# • max_size: 4000 (even more context per result)
|
||||||
|
# • enable_thinking: false (hide reasoning, show just answers)
|
||||||
|
# • synthesis_temperature: 0.2 (more conservative AI responses)
|
||||||
@ -1,55 +1,145 @@
|
|||||||
# FSS-Mini-RAG Configuration
|
# FSS-Mini-RAG Configuration - Beginner-Friendly Edition
|
||||||
# Edit this file to customize indexing and search behavior
|
#
|
||||||
# See docs/GETTING_STARTED.md for detailed explanations
|
# 🎯 QUICK START PRESETS:
|
||||||
|
# - Keep defaults for most cases (recommended for beginners)
|
||||||
|
# - For large projects (>10k files): increase max_size to 3000
|
||||||
|
# - For faster search: set similarity_threshold to 0.2
|
||||||
|
# - For better results: enable expand_queries (but slower search)
|
||||||
|
|
||||||
|
#═════════════════════════════════════════════════════════════════════════════════
|
||||||
|
# 📝 CHUNKING: How we break up your code files for searching
|
||||||
|
#═════════════════════════════════════════════════════════════════════════════════
|
||||||
|
# Think of chunks as "bite-sized pieces" of your code that the system can search through.
|
||||||
|
# Smaller chunks = more precise results but might miss context
|
||||||
|
# Larger chunks = more context but might be less precise
|
||||||
|
|
||||||
# Text chunking settings
|
|
||||||
chunking:
|
chunking:
|
||||||
max_size: 2000 # Maximum characters per chunk
|
max_size: 2000 # Maximum characters per chunk (2000 = ~50 lines of code)
|
||||||
min_size: 150 # Minimum characters per chunk
|
# 💡 ADJUST IF: Getting results that are too narrow/broad
|
||||||
strategy: semantic # 'semantic' (language-aware) or 'fixed'
|
# Small projects: 1500 | Large projects: 3000 | Detailed analysis: 4000
|
||||||
|
|
||||||
|
min_size: 150 # Minimum characters per chunk (150 = ~4-5 lines)
|
||||||
|
# ⚠️ Don't go below 100 or you'll get fragments
|
||||||
|
|
||||||
|
strategy: semantic # How to split files into chunks
|
||||||
|
# 'semantic': Smart splitting (respects functions, classes) - RECOMMENDED
|
||||||
|
# 'fixed': Simple splitting (just cuts at size limits) - faster but less intelligent
|
||||||
|
|
||||||
|
#═════════════════════════════════════════════════════════════════════════════════
|
||||||
|
# 🌊 STREAMING: How we handle really big files
|
||||||
|
#═════════════════════════════════════════════════════════════════════════════════
|
||||||
|
# Large files (like minified CSS or huge data files) get processed in smaller batches
|
||||||
|
# to prevent your computer from running out of memory
|
||||||
|
|
||||||
# Large file streaming settings
|
|
||||||
streaming:
|
streaming:
|
||||||
enabled: true
|
enabled: true # Always keep this true - prevents memory crashes
|
||||||
threshold_bytes: 1048576 # Files larger than this use streaming (1MB)
|
threshold_bytes: 1048576 # Files larger than 1MB use streaming (1MB = 1048576 bytes)
|
||||||
|
# 💡 ADJUST IF: Low memory computer = 512000 | High memory = 2097152
|
||||||
|
|
||||||
|
#═════════════════════════════════════════════════════════════════════════════════
|
||||||
|
# 📁 FILES: Which files to include/exclude from indexing
|
||||||
|
#═════════════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
# File processing settings
|
|
||||||
files:
|
files:
|
||||||
min_file_size: 50 # Skip files smaller than this
|
min_file_size: 50 # Skip tiny files (50 bytes = ~1 line of code)
|
||||||
|
# 💡 REASON: Tiny files usually aren't useful for searching
|
||||||
|
|
||||||
|
# 🚫 EXCLUDE PATTERNS: Files/folders we always skip (saves time and space)
|
||||||
exclude_patterns:
|
exclude_patterns:
|
||||||
- "node_modules/**"
|
- "node_modules/**" # JavaScript dependencies (huge and not your code)
|
||||||
- ".git/**"
|
- ".git/**" # Git history (not useful for code search)
|
||||||
- "__pycache__/**"
|
- "__pycache__/**" # Python bytecode (generated files)
|
||||||
- "*.pyc"
|
- "*.pyc" # More Python bytecode
|
||||||
- ".venv/**"
|
- ".venv/**" # Python virtual environments
|
||||||
- "venv/**"
|
- "venv/**" # More virtual environments
|
||||||
- "build/**"
|
- "build/**" # Compiled output (not source code)
|
||||||
- "dist/**"
|
- "dist/**" # Distribution files
|
||||||
|
# 💡 ADD YOUR OWN: Add patterns like "logs/**" or "*.tmp"
|
||||||
|
|
||||||
include_patterns:
|
include_patterns:
|
||||||
- "**/*" # Include all files by default
|
- "**/*" # Include everything else by default
|
||||||
|
# 💡 CUSTOMIZE: Could be ["**/*.py", "**/*.js"] for only Python/JS
|
||||||
|
|
||||||
|
#═════════════════════════════════════════════════════════════════════════════════
|
||||||
|
# 🧠 EMBEDDINGS: How we turn your code into searchable "vectors"
|
||||||
|
#═════════════════════════════════════════════════════════════════════════════════
|
||||||
|
# Embeddings are like "fingerprints" of your code that help find similar content
|
||||||
|
# Don't worry about the technical details - the defaults work great!
|
||||||
|
|
||||||
# Embedding generation settings
|
|
||||||
embedding:
|
embedding:
|
||||||
preferred_method: ollama # 'ollama', 'ml', 'hash', or 'auto'
|
preferred_method: ollama # Which system to use for creating embeddings
|
||||||
ollama_model: nomic-embed-text
|
# 'ollama': Best quality (needs Ollama installed) - RECOMMENDED
|
||||||
ollama_host: localhost:11434
|
# 'ml': Good quality (downloads models automatically)
|
||||||
ml_model: sentence-transformers/all-MiniLM-L6-v2
|
# 'hash': Basic quality (works without internet)
|
||||||
batch_size: 32 # Embeddings processed per batch
|
# 'auto': Try ollama, fall back to ml, then hash - SAFEST CHOICE
|
||||||
|
|
||||||
|
ollama_model: nomic-embed-text # Which Ollama model to use (this one is excellent)
|
||||||
|
ollama_host: localhost:11434 # Where to find Ollama (don't change unless you know why)
|
||||||
|
|
||||||
|
ml_model: sentence-transformers/all-MiniLM-L6-v2 # Backup model (small and fast)
|
||||||
|
|
||||||
|
batch_size: 32 # How many chunks to process at once
|
||||||
|
# 💡 ADJUST IF: Slow computer = 16 | Fast computer = 64
|
||||||
|
|
||||||
|
#═════════════════════════════════════════════════════════════════════════════════
|
||||||
|
# 🔍 SEARCH: How the system finds and ranks results
|
||||||
|
#═════════════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
# Search behavior settings
|
|
||||||
search:
|
search:
|
||||||
default_limit: 10 # Default number of results
|
default_limit: 10 # How many search results to show by default
|
||||||
enable_bm25: true # Enable keyword matching boost
|
# 💡 MORE RESULTS: 15-20 | FASTER SEARCH: 5-8
|
||||||
similarity_threshold: 0.1 # Minimum similarity score
|
|
||||||
expand_queries: false # Enable automatic query expansion (TUI auto-enables)
|
enable_bm25: true # Also use keyword matching (like Google search)
|
||||||
|
# 💡 EFFECT: Finds exact word matches even if semantically different
|
||||||
|
# Keep true unless getting too many irrelevant results
|
||||||
|
|
||||||
|
similarity_threshold: 0.1 # Minimum "similarity score" to show results (0.0-1.0)
|
||||||
|
# 💡 HIGHER = fewer but more relevant results
|
||||||
|
# Picky: 0.3 | Balanced: 0.1 | Show everything: 0.05
|
||||||
|
|
||||||
|
expand_queries: false # Automatically add related search terms
|
||||||
|
# 💡 EFFECT: "auth" becomes "auth authentication login user"
|
||||||
|
# Better results but slower - TUI enables this automatically
|
||||||
|
|
||||||
|
#═════════════════════════════════════════════════════════════════════════════════
|
||||||
|
# 🤖 LLM: Settings for the AI that explains and synthesizes results
|
||||||
|
#═════════════════════════════════════════════════════════════════════════════════
|
||||||
|
# The LLM (Large Language Model) reads your search results and explains them in plain English
|
||||||
|
|
||||||
# LLM synthesis and query expansion settings
|
|
||||||
llm:
|
llm:
|
||||||
ollama_host: localhost:11434
|
ollama_host: localhost:11434 # Where to find Ollama (don't change unless you know why)
|
||||||
synthesis_model: auto # 'auto' prefers qwen3:0.6b for CPU efficiency
|
|
||||||
expansion_model: auto # Usually same as synthesis_model
|
synthesis_model: auto # Which AI model to use for explanations
|
||||||
max_expansion_terms: 8 # Maximum terms to add to queries
|
# 'auto': Picks best available model - RECOMMENDED
|
||||||
enable_synthesis: false # Enable synthesis by default
|
# 'qwen3:0.6b': Ultra-fast, good for CPU-only computers
|
||||||
synthesis_temperature: 0.3 # LLM temperature for analysis
|
# 'llama3.2': Slower but more detailed explanations
|
||||||
cpu_optimized: true # Prefer ultra-lightweight models for CPU-only systems
|
|
||||||
enable_thinking: true # Enable thinking mode for Qwen3 models (production: true, testing: false)
|
expansion_model: auto # Model for query expansion (usually same as synthesis)
|
||||||
|
|
||||||
|
max_expansion_terms: 8 # How many extra terms to add to expanded queries
|
||||||
|
# 💡 MORE TERMS = broader search but potentially less focused
|
||||||
|
|
||||||
|
enable_synthesis: false # Turn on AI explanations by default
|
||||||
|
# 💡 SET TO TRUE: If you want every search to include explanations
|
||||||
|
# (You can always use --synthesize flag when you want it)
|
||||||
|
|
||||||
|
synthesis_temperature: 0.3 # How "creative" the AI explanations are (0.0-1.0)
|
||||||
|
# 💡 Lower = more factual | Higher = more creative
|
||||||
|
# Code analysis: 0.1-0.3 | Creative writing: 0.7-0.9
|
||||||
|
|
||||||
|
cpu_optimized: true # Prefer lightweight models for computers without graphics cards
|
||||||
|
# 💡 DISABLE IF: You have a powerful GPU and want highest quality
|
||||||
|
|
||||||
|
enable_thinking: true # Let AI "think out loud" for complex questions
|
||||||
|
# 💡 EFFECT: Shows reasoning process, better for learning/debugging
|
||||||
|
|
||||||
|
#═════════════════════════════════════════════════════════════════════════════════
|
||||||
|
# 🎯 QUICK TROUBLESHOOTING:
|
||||||
|
#
|
||||||
|
# Search returns nothing? → Lower similarity_threshold to 0.05
|
||||||
|
# Search too slow? → Set expand_queries: false and batch_size: 16
|
||||||
|
# Results not detailed enough? → Increase max_size to 3000
|
||||||
|
# Getting weird fragments? → Check min_size is at least 150
|
||||||
|
# AI not working? → Make sure Ollama is running: `ollama serve`
|
||||||
|
# Out of memory errors? → Decrease batch_size to 16 and lower threshold_bytes
|
||||||
|
#═════════════════════════════════════════════════════════════════════════════════
|
||||||
44
rag-mini.py
44
rag-mini.py
@ -70,7 +70,16 @@ def index_project(project_path: Path, force: bool = False):
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"❌ Indexing failed: {e}")
|
print(f"❌ Indexing failed: {e}")
|
||||||
print(f" Use --verbose for details")
|
print()
|
||||||
|
print("🔧 Common solutions:")
|
||||||
|
print(" • Check if path exists and you have read permissions")
|
||||||
|
print(" • Ensure Python dependencies are installed: pip install -r requirements.txt")
|
||||||
|
print(" • Try with smaller project first to test setup")
|
||||||
|
print(" • Check available disk space for index files")
|
||||||
|
print()
|
||||||
|
print("📚 For detailed help:")
|
||||||
|
print(f" ./rag-mini index {project_path} --verbose")
|
||||||
|
print(" Or see: docs/TROUBLESHOOTING.md")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
def search_project(project_path: Path, query: str, limit: int = 10, synthesize: bool = False):
|
def search_project(project_path: Path, query: str, limit: int = 10, synthesize: bool = False):
|
||||||
@ -89,10 +98,18 @@ def search_project(project_path: Path, query: str, limit: int = 10, synthesize:
|
|||||||
|
|
||||||
if not results:
|
if not results:
|
||||||
print("❌ No results found")
|
print("❌ No results found")
|
||||||
print("\n💡 Try:")
|
print()
|
||||||
print(" • Broader search terms")
|
print("🔧 Quick fixes to try:")
|
||||||
print(" • Check spelling")
|
print(" • Use broader terms: \"login\" instead of \"authenticate_user_session\"")
|
||||||
print(" • Use concepts: \"authentication\" instead of \"auth_handler\"")
|
print(" • Try concepts: \"database query\" instead of specific function names")
|
||||||
|
print(" • Check spelling and try simpler words")
|
||||||
|
print(" • Search for file types: \"python class\" or \"javascript function\"")
|
||||||
|
print()
|
||||||
|
print("⚙️ Configuration adjustments:")
|
||||||
|
print(f" • Lower threshold: ./rag-mini search {project_path} \"{query}\" --threshold 0.05")
|
||||||
|
print(" • More results: add --limit 20")
|
||||||
|
print()
|
||||||
|
print("📚 Need help? See: docs/TROUBLESHOOTING.md")
|
||||||
return
|
return
|
||||||
|
|
||||||
print(f"✅ Found {len(results)} results:")
|
print(f"✅ Found {len(results)} results:")
|
||||||
@ -154,10 +171,23 @@ def search_project(project_path: Path, query: str, limit: int = 10, synthesize:
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"❌ Search failed: {e}")
|
print(f"❌ Search failed: {e}")
|
||||||
|
print()
|
||||||
|
|
||||||
if "not indexed" in str(e).lower():
|
if "not indexed" in str(e).lower():
|
||||||
print(f" Run: rag-mini index {project_path}")
|
print("🔧 Solution:")
|
||||||
|
print(f" ./rag-mini index {project_path}")
|
||||||
|
print()
|
||||||
else:
|
else:
|
||||||
print(" Use --verbose for details")
|
print("🔧 Common solutions:")
|
||||||
|
print(" • Check project path exists and is readable")
|
||||||
|
print(" • Verify index isn't corrupted: delete .claude-rag/ and re-index")
|
||||||
|
print(" • Try with a different project to test setup")
|
||||||
|
print(" • Check available memory and disk space")
|
||||||
|
print()
|
||||||
|
print("📚 Get detailed error info:")
|
||||||
|
print(f" ./rag-mini search {project_path} \"{query}\" --verbose")
|
||||||
|
print(" Or see: docs/TROUBLESHOOTING.md")
|
||||||
|
print()
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
def status_check(project_path: Path):
|
def status_check(project_path: Path):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user