From 336317182032f6d99b1b659b725c594c95dd5389 Mon Sep 17 00:00:00 2001 From: BobAi Date: Tue, 12 Aug 2025 18:59:24 +1000 Subject: [PATCH] =?UTF-8?q?=F0=9F=8E=93=20Complete=20beginner-friendly=20p?= =?UTF-8?q?olish=20with=20production=20reliability?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit โœจ BEGINNER-FRIENDLY ENHANCEMENTS: - Add comprehensive glossary explaining RAG, embeddings, chunks in plain English - Create detailed troubleshooting guide covering installation, search issues, performance - Provide preset configs (beginner/fast/quality) with extensive helpful comments - Enhanced error messages with specific solutions and next steps ๐Ÿ”ง PRODUCTION RELIABILITY: - Add thread-safe caching with automatic cleanup in QueryExpander - Implement chunked processing for large batches to prevent memory issues - Enhanced concurrent embedding with intelligent batch size management - Memory leak prevention with LRU cache approximation ๐Ÿ—๏ธ ARCHITECTURE COMPLETENESS: - Maintain two-mode system (synthesis fast, exploration thinking + memory) - Preserve educational value while removing intimidation barriers - Complete testing coverage for mode separation and context memory - Full documentation reflecting clean two-mode architecture Perfect balance: genuinely beginner-friendly without compromising technical sophistication --- claude_rag/ollama_embeddings.py | 33 +++ claude_rag/query_expander.py | 32 ++- docs/BEGINNER_GLOSSARY.md | 202 ++++++++++++++ docs/TROUBLESHOOTING.md | 460 ++++++++++++++++++++++++++++++++ examples/config-beginner.yaml | 72 +++++ examples/config-fast.yaml | 105 ++++++++ examples/config-quality.yaml | 111 ++++++++ examples/config.yaml | 172 +++++++++--- rag-mini.py | 44 ++- 9 files changed, 1176 insertions(+), 55 deletions(-) create mode 100644 docs/BEGINNER_GLOSSARY.md create mode 100644 docs/TROUBLESHOOTING.md create mode 100644 examples/config-beginner.yaml create mode 100644 examples/config-fast.yaml create mode 100644 examples/config-quality.yaml diff --git a/claude_rag/ollama_embeddings.py b/claude_rag/ollama_embeddings.py index 2bed0d7..6231de6 100644 --- a/claude_rag/ollama_embeddings.py +++ b/claude_rag/ollama_embeddings.py @@ -350,6 +350,10 @@ class OllamaEmbedder: if len(file_contents) <= 2: return self._batch_embed_sequential(file_contents) + # For very large batches, use chunked processing to prevent memory issues + if len(file_contents) > 500: # Process in chunks to manage memory + return self._batch_embed_chunked(file_contents, max_workers) + return self._batch_embed_concurrent(file_contents, max_workers) def _batch_embed_sequential(self, file_contents: List[dict]) -> List[dict]: @@ -396,6 +400,35 @@ class OllamaEmbedder: indexed_results.sort(key=lambda x: x[0]) return [result for _, result in indexed_results] + def _batch_embed_chunked(self, file_contents: List[dict], max_workers: int, chunk_size: int = 200) -> List[dict]: + """ + Process very large batches in smaller chunks to prevent memory issues. + This is important for beginners who might try to index huge projects. + """ + results = [] + total_chunks = len(file_contents) + + # Process in chunks + for i in range(0, len(file_contents), chunk_size): + chunk = file_contents[i:i + chunk_size] + + # Log progress for large operations + if total_chunks > chunk_size: + chunk_num = i // chunk_size + 1 + total_chunk_count = (total_chunks + chunk_size - 1) // chunk_size + logger.info(f"Processing chunk {chunk_num}/{total_chunk_count} ({len(chunk)} files)") + + # Process this chunk using concurrent method + chunk_results = self._batch_embed_concurrent(chunk, max_workers) + results.extend(chunk_results) + + # Brief pause between chunks to prevent overwhelming the system + if i + chunk_size < len(file_contents): + import time + time.sleep(0.1) # 100ms pause between chunks + + return results + def get_embedding_dim(self) -> int: """Return the dimension of embeddings produced by this model.""" return self.embedding_dim diff --git a/claude_rag/query_expander.py b/claude_rag/query_expander.py index c59b708..33c9768 100644 --- a/claude_rag/query_expander.py +++ b/claude_rag/query_expander.py @@ -32,6 +32,7 @@ disable in CLI for maximum speed. import logging import re +import threading from typing import List, Optional import requests from .config import RAGConfig @@ -51,6 +52,7 @@ class QueryExpander: # Cache for expanded queries to avoid repeated API calls self._cache = {} + self._cache_lock = threading.RLock() # Thread-safe cache access def _ensure_initialized(self): """Lazy initialization with LLM warmup.""" @@ -84,9 +86,10 @@ class QueryExpander: self._ensure_initialized() - # Check cache first - if query in self._cache: - return self._cache[query] + # Check cache first (thread-safe) + with self._cache_lock: + if query in self._cache: + return self._cache[query] # Don't expand very short queries or obvious keywords if len(query.split()) <= 1 or len(query) <= 3: @@ -95,8 +98,12 @@ class QueryExpander: try: expanded = self._llm_expand_query(query) if expanded and expanded != query: - # Cache the result - self._cache[query] = expanded + # Cache the result (thread-safe) + with self._cache_lock: + self._cache[query] = expanded + # Prevent cache from growing too large + if len(self._cache) % 100 == 0: # Check every 100 entries + self._manage_cache_size() logger.info(f"Expanded query: '{query}' โ†’ '{expanded}'") return expanded @@ -227,8 +234,19 @@ Expanded query:""" return clean_response def clear_cache(self): - """Clear the expansion cache.""" - self._cache.clear() + """Clear the expansion cache (thread-safe).""" + with self._cache_lock: + self._cache.clear() + + def _manage_cache_size(self, max_size: int = 1000): + """Keep cache from growing too large (prevents memory leaks).""" + with self._cache_lock: + if len(self._cache) > max_size: + # Remove oldest half of cache entries (simple LRU approximation) + items = list(self._cache.items()) + keep_count = max_size // 2 + self._cache = dict(items[-keep_count:]) + logger.debug(f"Cache trimmed from {len(items)} to {len(self._cache)} entries") def is_available(self) -> bool: """Check if query expansion is available.""" diff --git a/docs/BEGINNER_GLOSSARY.md b/docs/BEGINNER_GLOSSARY.md new file mode 100644 index 0000000..24f24dc --- /dev/null +++ b/docs/BEGINNER_GLOSSARY.md @@ -0,0 +1,202 @@ +# ๐Ÿ“š Beginner's Glossary - RAG Terms Made Simple + +*Confused by all the technical terms? Don't worry! This guide explains everything in plain English.* + +--- + +## ๐Ÿค– **RAG** - Retrieval Augmented Generation +**What it is:** A fancy way of saying "search your code and get AI explanations" + +**Simple explanation:** Instead of just searching for keywords (like Google), RAG finds code that's *similar in meaning* to what you're looking for, then has an AI explain it to you. + +**Real example:** +- You search for "user authentication" +- RAG finds code about login systems, password validation, and user sessions +- AI explains: "This code handles user logins using email/password, stores sessions in cookies, and validates users on each request" + +--- + +## ๐Ÿงฉ **Chunks** - Bite-sized pieces of your code +**What it is:** Your code files broken into smaller, searchable pieces + +**Simple explanation:** RAG can't search entire huge files efficiently, so it breaks them into "chunks" - like cutting a pizza into slices. Each chunk is usually one function, one class, or a few related lines. + +**Why it matters:** +- Too small chunks = missing context ("this variable" but what variable?) +- Too big chunks = too much unrelated stuff in search results +- Just right = perfect context for understanding what code does + +**Real example:** +```python +# This would be one chunk: +def login_user(email, password): + """Authenticate user with email and password.""" + user = find_user_by_email(email) + if user and check_password(user, password): + create_session(user) + return True + return False +``` + +--- + +## ๐Ÿง  **Embeddings** - Code "fingerprints" +**What it is:** A way to convert your code into numbers that computers can compare + +**Simple explanation:** Think of embeddings like DNA fingerprints for your code. Similar code gets similar fingerprints. The computer can then find code with similar "fingerprints" to what you're searching for. + +**The magic:** Code that does similar things gets similar embeddings, even if the exact words are different: +- `login_user()` and `authenticate()` would have similar embeddings +- `calculate_tax()` and `login_user()` would have very different embeddings + +**You don't need to understand the technical details** - just know that embeddings help find semantically similar code, not just exact word matches. + +--- + +## ๐Ÿ” **Vector Search** vs **Keyword Search** +**Keyword search (like Google):** Finds exact word matches +- Search "login" โ†’ finds code with the word "login" +- Misses: authentication, signin, user_auth + +**Vector search (the RAG way):** Finds similar *meaning* +- Search "login" โ†’ finds login, authentication, signin, user validation +- Uses those embedding "fingerprints" to find similar concepts + +**FSS-Mini-RAG uses both** for the best results! + +--- + +## ๐Ÿ“Š **Similarity Score** - How relevant is this result? +**What it is:** A number from 0.0 to 1.0 showing how closely your search matches the result + +**Simple explanation:** +- 1.0 = Perfect match (very rare) +- 0.8+ = Excellent match +- 0.5+ = Good match +- 0.3+ = Somewhat relevant +- 0.1+ = Might be useful +- Below 0.1 = Probably not what you want + +**In practice:** Most useful results are between 0.2-0.8 + +--- + +## ๐ŸŽฏ **BM25** - The keyword search boost +**What it is:** A fancy algorithm that finds exact word matches (like Google search) + +**Simple explanation:** While embeddings find *similar meaning*, BM25 finds *exact words*. Using both together gives you the best of both worlds. + +**Example:** +- You search for "password validation" +- Embeddings find: authentication functions, login methods, user security +- BM25 finds: code with the exact words "password" and "validation" +- Combined = comprehensive results + +**Keep it enabled** unless you're getting too many irrelevant results. + +--- + +## ๐Ÿ”„ **Query Expansion** - Making your search smarter +**What it is:** Automatically adding related terms to your search + +**Simple explanation:** When you search for "auth", the system automatically expands it to "auth authentication login signin user validate". + +**Pros:** Much better, more comprehensive results +**Cons:** Slower search, sometimes too broad + +**When to use:** +- Turn ON for: Complex searches, learning new codebases +- Turn OFF for: Quick lookups, very specific searches + +--- + +## ๐Ÿค– **LLM** - Large Language Model (The AI Brain) +**What it is:** The AI that reads your search results and explains them in plain English + +**Simple explanation:** After finding relevant code chunks, the LLM reads them like a human would and gives you a summary like: "This code handles user registration by validating email format, checking for existing users, hashing passwords, and saving to database." + +**Models you might see:** +- **qwen3:0.6b** - Ultra-fast, good for most questions +- **llama3.2** - Slower but more detailed +- **auto** - Picks the best available model + +--- + +## ๐Ÿงฎ **Synthesis** vs **Exploration** - Two ways to get answers + +### ๐Ÿš€ **Synthesis Mode** (Fast & Consistent) +**What it does:** Quick, factual answers about your code +**Best for:** "What does this function do?" "Where is authentication handled?" "How does the database connection work?" +**Speed:** Very fast (no "thinking" overhead) + +### ๐Ÿง  **Exploration Mode** (Deep & Interactive) +**What it does:** Detailed analysis with reasoning, remembers conversation +**Best for:** "Why is this function slow?" "What are the security issues here?" "How would I add a new feature?" +**Features:** Shows its reasoning process, you can ask follow-up questions + +--- + +## โšก **Streaming** - Handling huge files without crashing +**What it is:** Processing large files in smaller batches instead of all at once + +**Simple explanation:** Imagine trying to eat an entire cake at once vs. eating it slice by slice. Streaming is like eating slice by slice - your computer won't choke on huge files. + +**When it kicks in:** Files larger than 1MB (that's about 25,000 lines of code) + +--- + +## ๐Ÿท๏ธ **Semantic** vs **Fixed** Chunking +**Semantic chunking (RECOMMENDED):** Smart splitting that respects code structure +- Keeps functions together +- Keeps classes together +- Respects natural code boundaries + +**Fixed chunking:** Simple splitting that just cuts at size limits +- Faster processing +- Might cut functions in half +- Less intelligent but more predictable + +**For beginners:** Always use semantic chunking unless you have a specific reason not to. + +--- + +## โ“ **Common Questions** + +**Q: Do I need to understand embeddings to use this?** +A: Nope! Just know they help find similar code. The system handles all the technical details. + +**Q: What's a good similarity threshold for beginners?** +A: Start with 0.1. If you get too many results, try 0.2. If you get too few, try 0.05. + +**Q: Should I enable query expansion?** +A: For learning new codebases: YES. For quick specific searches: NO. The TUI enables it automatically when helpful. + +**Q: Which embedding method should I choose?** +A: Use "auto" - it tries the best option and falls back gracefully if needed. + +**Q: What if I don't have Ollama installed?** +A: No problem! The system will automatically fall back to other methods that work without any additional software. + +--- + +## ๐Ÿš€ **Quick Start Recommendations** + +**For absolute beginners:** +1. Keep all default settings +2. Use the TUI interface to start +3. Try simple searches like "user login" or "database connection" +4. Gradually try the CLI commands as you get comfortable + +**For faster results:** +- Set `similarity_threshold: 0.2` +- Set `expand_queries: false` +- Use synthesis mode instead of exploration + +**For learning new codebases:** +- Set `expand_queries: true` +- Use exploration mode +- Ask "why" and "how" questions + +--- + +**Remember:** This is a learning tool! Don't be afraid to experiment with settings and see what works best for your projects. The beauty of FSS-Mini-RAG is that it's designed to be beginner-friendly while still being powerful. \ No newline at end of file diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md new file mode 100644 index 0000000..eaca99c --- /dev/null +++ b/docs/TROUBLESHOOTING.md @@ -0,0 +1,460 @@ +# ๐Ÿ› ๏ธ Troubleshooting Guide - Common Issues & Solutions + +*Having problems? You're not alone! Here are solutions to the most common issues beginners encounter.* + +--- + +## ๐Ÿš€ Installation & Setup Issues + +### โŒ "Command not found: ollama" +**Problem:** The system can't find Ollama +**Solution:** +```bash +# Install Ollama +curl -fsSL https://ollama.ai/install.sh | sh +# Or on Mac: brew install ollama +# Start Ollama +ollama serve +``` +**Alternative:** Use the system without Ollama - it will automatically fall back to other embedding methods. + +### โŒ "Permission denied" when running scripts +**Problem:** Script files aren't executable +**Solution:** +```bash +chmod +x rag-mini.py rag-tui.py install_mini_rag.sh +# Or run with python directly: +python3 rag-mini.py --help +``` + +### โŒ "Module not found" or import errors +**Problem:** Python dependencies not installed +**Solution:** +```bash +# Install dependencies +pip3 install -r requirements.txt +# If that fails, try: +pip3 install --user -r requirements.txt +``` + +### โŒ Installation script fails +**Problem:** `./install_mini_rag.sh` doesn't work +**Solution:** +```bash +# Make it executable first +chmod +x install_mini_rag.sh +# Then run +./install_mini_rag.sh +# Or install manually: +pip3 install -r requirements.txt +python3 -c "import claude_rag; print('โœ… Installation successful')" +``` + +--- + +## ๐Ÿ” Search & Results Issues + +### โŒ "No results found" for everything +**Problem:** Search isn't finding anything +**Diagnosis & Solutions:** + +1. **Check if project is indexed:** + ```bash + ./rag-mini status /path/to/project + # If not indexed: + ./rag-mini index /path/to/project + ``` + +2. **Lower similarity threshold:** + - Edit config file, change `similarity_threshold: 0.05` + - Or try: `./rag-mini search /path/to/project "query" --threshold 0.05` + +3. **Try broader search terms:** + - Instead of: "getUserById" + - Try: "user function" or "get user" + +4. **Enable query expansion:** + - Edit config: `expand_queries: true` + - Or use TUI which enables it automatically + +### โŒ Search results are irrelevant/weird +**Problem:** Getting results that don't match your search +**Solutions:** + +1. **Increase similarity threshold:** + ```yaml + search: + similarity_threshold: 0.3 # Higher = more picky + ``` + +2. **Use more specific terms:** + - Instead of: "function" + - Try: "login function" or "authentication method" + +3. **Check BM25 setting:** + ```yaml + search: + enable_bm25: true # Helps find exact word matches + ``` + +### โŒ Search is too slow +**Problem:** Takes too long to get results +**Solutions:** + +1. **Disable query expansion:** + ```yaml + search: + expand_queries: false + ``` + +2. **Reduce result limit:** + ```yaml + search: + default_limit: 5 # Instead of 10 + ``` + +3. **Use faster embedding method:** + ```yaml + embedding: + preferred_method: hash # Fastest but lower quality + ``` + +4. **Smaller batch size:** + ```yaml + embedding: + batch_size: 16 # Instead of 32 + ``` + +--- + +## ๐Ÿค– AI/LLM Issues + +### โŒ "LLM synthesis unavailable" +**Problem:** AI explanations aren't working +**Solutions:** + +1. **Check Ollama is running:** + ```bash + # In one terminal: + ollama serve + # In another: + ollama list # Should show installed models + ``` + +2. **Install a model:** + ```bash + ollama pull qwen3:0.6b # Fast, small model + # Or: ollama pull llama3.2 # Larger but better + ``` + +3. **Test connection:** + ```bash + curl http://localhost:11434/api/tags + # Should return JSON with model list + ``` + +### โŒ AI gives weird/wrong answers +**Problem:** LLM responses don't make sense +**Solutions:** + +1. **Lower temperature:** + ```yaml + llm: + synthesis_temperature: 0.1 # More factual, less creative + ``` + +2. **Try different model:** + ```bash + ollama pull qwen3:1.7b # Good balance of speed/quality + ``` + +3. **Use synthesis mode instead of exploration:** + ```bash + ./rag-mini search /path "query" --synthesize + # Instead of: ./rag-mini explore /path + ``` + +--- + +## ๐Ÿ’พ Memory & Performance Issues + +### โŒ "Out of memory" or computer freezes during indexing +**Problem:** System runs out of RAM +**Solutions:** + +1. **Reduce batch size:** + ```yaml + embedding: + batch_size: 8 # Much smaller batches + ``` + +2. **Lower streaming threshold:** + ```yaml + streaming: + threshold_bytes: 512000 # 512KB instead of 1MB + ``` + +3. **Index smaller projects first:** + ```bash + # Exclude large directories + ./rag-mini index /path/to/project --exclude "node_modules/**,dist/**" + ``` + +4. **Use hash embeddings:** + ```yaml + embedding: + preferred_method: hash # Much less memory + ``` + +### โŒ Indexing is extremely slow +**Problem:** Taking forever to index project +**Solutions:** + +1. **Exclude unnecessary files:** + ```yaml + files: + exclude_patterns: + - "node_modules/**" + - ".git/**" + - "*.log" + - "build/**" + - "*.min.js" # Minified files + ``` + +2. **Increase minimum file size:** + ```yaml + files: + min_file_size: 200 # Skip tiny files + ``` + +3. **Use simpler chunking:** + ```yaml + chunking: + strategy: fixed # Faster than semantic + ``` + +4. **More workers (if you have good CPU):** + ```bash + ./rag-mini index /path/to/project --workers 8 + ``` + +--- + +## โš™๏ธ Configuration Issues + +### โŒ "Invalid configuration" errors +**Problem:** Config file has errors +**Solutions:** + +1. **Check YAML syntax:** + ```bash + python3 -c "import yaml; yaml.safe_load(open('config.yaml'))" + ``` + +2. **Copy from working example:** + ```bash + cp examples/config.yaml .claude-rag/config.yaml + ``` + +3. **Reset to defaults:** + ```bash + rm .claude-rag/config.yaml + # System will recreate with defaults + ``` + +### โŒ Changes to config aren't taking effect +**Problem:** Modified settings don't work +**Solutions:** + +1. **Restart TUI/CLI:** + - Configuration is loaded at startup + - Exit and restart the interface + +2. **Check config location:** + ```bash + # Project-specific config: + /path/to/project/.claude-rag/config.yaml + # Global config: + ~/.claude-rag/config.yaml + ``` + +3. **Force re-index after config changes:** + ```bash + ./rag-mini index /path/to/project --force + ``` + +--- + +## ๐Ÿ–ฅ๏ธ Interface Issues + +### โŒ TUI looks broken/garbled +**Problem:** Text interface isn't displaying correctly +**Solutions:** + +1. **Try different terminal:** + ```bash + # Instead of basic terminal, try: + # - iTerm2 (Mac) + # - Windows Terminal (Windows) + # - GNOME Terminal (Linux) + ``` + +2. **Use CLI directly:** + ```bash + ./rag-mini --help # Skip TUI entirely + ``` + +3. **Check terminal size:** + ```bash + # Make terminal window larger (TUI needs space) + # At least 80x24 characters + ``` + +### โŒ "Keyboard interrupt" or TUI crashes +**Problem:** Interface stops responding +**Solutions:** + +1. **Use Ctrl+C to exit cleanly:** + - Don't force-quit if possible + +2. **Check for conflicting processes:** + ```bash + ps aux | grep rag-tui + # Kill any stuck processes + ``` + +3. **Use CLI as fallback:** + ```bash + ./rag-mini search /path/to/project "your query" + ``` + +--- + +## ๐Ÿ“ File & Path Issues + +### โŒ "Project not found" or "Permission denied" +**Problem:** Can't access project directory +**Solutions:** + +1. **Check path exists:** + ```bash + ls -la /path/to/project + ``` + +2. **Check permissions:** + ```bash + # Make sure you can read the directory + chmod -R +r /path/to/project + ``` + +3. **Use absolute paths:** + ```bash + # Instead of: ./rag-mini index ../my-project + # Use: ./rag-mini index /full/path/to/my-project + ``` + +### โŒ "No files found to index" +**Problem:** System doesn't see any files +**Solutions:** + +1. **Check include patterns:** + ```yaml + files: + include_patterns: + - "**/*.py" # Only Python files + - "**/*.js" # Add JavaScript + - "**/*.md" # Add Markdown + ``` + +2. **Check exclude patterns:** + ```yaml + files: + exclude_patterns: [] # Remove all exclusions temporarily + ``` + +3. **Lower minimum file size:** + ```yaml + files: + min_file_size: 10 # Instead of 50 + ``` + +--- + +## ๐Ÿ” Quick Diagnostic Commands + +**Check system status:** +```bash +./rag-mini status /path/to/project +``` + +**Test embeddings:** +```bash +python3 -c "from claude_rag.ollama_embeddings import OllamaEmbedder; e=OllamaEmbedder(); print(e.get_embedding_info())" +``` + +**Verify installation:** +```bash +python3 -c "import claude_rag; print('โœ… RAG system installed')" +``` + +**Test Ollama connection:** +```bash +curl -s http://localhost:11434/api/tags | python3 -m json.tool +``` + +**Check disk space:** +```bash +df -h .claude-rag/ # Make sure you have space for index +``` + +--- + +## ๐Ÿ†˜ When All Else Fails + +1. **Start fresh:** + ```bash + rm -rf .claude-rag/ + ./rag-mini index /path/to/project + ``` + +2. **Use minimal config:** + ```yaml + # Simplest possible config: + chunking: + strategy: fixed + embedding: + preferred_method: auto + search: + expand_queries: false + ``` + +3. **Try a tiny test project:** + ```bash + mkdir test-project + echo "def hello(): print('world')" > test-project/test.py + ./rag-mini index test-project + ./rag-mini search test-project "hello function" + ``` + +4. **Get help:** + - Check the main README.md + - Look at examples/ directory + - Try the basic_usage.py example + +--- + +## ๐Ÿ’ก Prevention Tips + +**For beginners:** +- Start with default settings +- Use the TUI interface first +- Test with small projects initially +- Keep Ollama running in background + +**For better results:** +- Be specific in search queries +- Use the glossary to understand terms +- Experiment with config settings on test projects first +- Use synthesis mode for quick answers, exploration for learning + +**Remember:** This is a learning tool! Don't be afraid to experiment and try different settings. The worst thing that can happen is you delete the `.claude-rag` directory and start over. ๐Ÿš€ \ No newline at end of file diff --git a/examples/config-beginner.yaml b/examples/config-beginner.yaml new file mode 100644 index 0000000..0a4986c --- /dev/null +++ b/examples/config-beginner.yaml @@ -0,0 +1,72 @@ +# ๐Ÿš€ BEGINNER CONFIG - Simple & Reliable +# Perfect for newcomers who want everything to "just work" +# Copy this to your project: cp examples/config-beginner.yaml /path/to/project/.claude-rag/config.yaml + +#โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +# โœจ BEGINNER-FRIENDLY SETTINGS - No overwhelming options! +#โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• + +# ๐Ÿ“ How to split your code files (keep it simple) +chunking: + max_size: 2000 # Good size for most code (about 50 lines) + min_size: 150 # Skip tiny fragments + strategy: semantic # Smart splitting (respects functions/classes) + +# ๐ŸŒŠ Handle large files without crashing +streaming: + enabled: true # Always keep this on + threshold_bytes: 1048576 # 1MB - good for most computers + +# ๐Ÿ“ Which files to include +files: + min_file_size: 50 # Skip empty/tiny files + + # ๐Ÿšซ Skip these folders (saves time and storage) + exclude_patterns: + - "node_modules/**" # JavaScript packages + - ".git/**" # Git history + - "__pycache__/**" # Python cache + - "*.pyc" # Python bytecode + - ".venv/**" # Python virtual environments + - "build/**" # Build artifacts + - "dist/**" # Distribution files + + include_patterns: + - "**/*" # Everything else + +# ๐Ÿง  Embeddings (the "AI fingerprints" of your code) +embedding: + preferred_method: auto # Try best method, fall back if needed - SAFEST + batch_size: 32 # Good balance of speed and memory usage + +# ๐Ÿ” Search behavior +search: + default_limit: 10 # Show 10 results (good starting point) + enable_bm25: true # Find exact word matches too + similarity_threshold: 0.1 # Pretty permissive (shows more results) + expand_queries: false # Keep it simple for now + +# ๐Ÿค– AI explanations (optional but helpful) +llm: + synthesis_model: auto # Pick best available model + enable_synthesis: false # Turn on manually with --synthesize + synthesis_temperature: 0.3 # Factual answers + cpu_optimized: true # Good for computers without fancy graphics cards + enable_thinking: true # Shows reasoning (great for learning!) + max_expansion_terms: 6 # Keep expansions focused + +#โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +# ๐ŸŽฏ WHAT THIS CONFIG DOES: +# +# โœ… Works reliably across different systems +# โœ… Good performance on modest hardware +# โœ… Balanced search results (not too few, not too many) +# โœ… Safe defaults that won't crash your computer +# โœ… AI features available but not overwhelming +# +# ๐Ÿš€ TO GET STARTED: +# 1. Copy this file to your project: .claude-rag/config.yaml +# 2. Index your project: ./rag-mini index /path/to/project +# 3. Search: ./rag-mini search /path/to/project "your query" +# 4. Try AI: ./rag-mini search /path/to/project "your query" --synthesize +#โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• \ No newline at end of file diff --git a/examples/config-fast.yaml b/examples/config-fast.yaml new file mode 100644 index 0000000..6fe646f --- /dev/null +++ b/examples/config-fast.yaml @@ -0,0 +1,105 @@ +# โšก FAST CONFIG - Maximum Speed +# When you need quick results and don't mind slightly lower quality +# Perfect for: large projects, frequent searches, older computers + +#โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +# ๐Ÿš€ SPEED-OPTIMIZED SETTINGS - Everything tuned for performance! +#โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• + +# ๐Ÿ“ Chunking optimized for speed +chunking: + max_size: 1500 # Smaller chunks = faster processing + min_size: 100 # More aggressive minimum + strategy: fixed # Simple splitting (faster than semantic) + +# ๐ŸŒŠ More aggressive streaming for memory efficiency +streaming: + enabled: true + threshold_bytes: 512000 # 512KB - process big files in smaller chunks + +# ๐Ÿ“ File filtering optimized for speed +files: + min_file_size: 100 # Skip more tiny files + + # ๐Ÿšซ Aggressive exclusions for speed + exclude_patterns: + - "node_modules/**" + - ".git/**" + - "__pycache__/**" + - "*.pyc" + - ".venv/**" + - "venv/**" + - "build/**" + - "dist/**" + - "*.min.js" # Skip minified files + - "*.min.css" # Skip minified CSS + - "*.log" # Skip log files + - "*.tmp" # Skip temp files + - "target/**" # Rust/Java build dirs + - ".next/**" # Next.js build dir + - ".nuxt/**" # Nuxt build dir + + include_patterns: + - "**/*.py" # Focus on common code files only + - "**/*.js" + - "**/*.ts" + - "**/*.jsx" + - "**/*.tsx" + - "**/*.java" + - "**/*.cpp" + - "**/*.c" + - "**/*.h" + - "**/*.rs" + - "**/*.go" + - "**/*.php" + - "**/*.rb" + - "**/*.md" + +# ๐Ÿง  Fastest embedding method +embedding: + preferred_method: hash # Instant embeddings (lower quality but very fast) + batch_size: 64 # Larger batches for efficiency + +# ๐Ÿ” Search optimized for speed +search: + default_limit: 5 # Fewer results = faster display + enable_bm25: false # Skip keyword matching for speed + similarity_threshold: 0.2 # Higher threshold = fewer results to process + expand_queries: false # No query expansion (much faster) + +# ๐Ÿค– Minimal AI for speed +llm: + synthesis_model: qwen3:0.6b # Smallest/fastest model + enable_synthesis: false # Only use when explicitly requested + synthesis_temperature: 0.1 # Fast, factual responses + cpu_optimized: true # Use lightweight models + enable_thinking: false # Skip thinking process for speed + max_expansion_terms: 4 # Shorter expansions + +#โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +# โšก WHAT THIS CONFIG PRIORITIZES: +# +# ๐Ÿš€ Indexing speed - get up and running quickly +# ๐Ÿš€ Search speed - results in milliseconds +# ๐Ÿš€ Memory efficiency - won't slow down your computer +# ๐Ÿš€ CPU efficiency - good for older/slower machines +# ๐Ÿš€ Storage efficiency - smaller index files +# +# โš–๏ธ TRADE-OFFS: +# โš ๏ธ Lower search quality (might miss some relevant results) +# โš ๏ธ Less context in results (smaller chunks) +# โš ๏ธ No query expansion (might need more specific search terms) +# โš ๏ธ Basic embeddings (hash-based, not semantic) +# +# ๐ŸŽฏ PERFECT FOR: +# โ€ข Large codebases (>10k files) +# โ€ข Older computers with limited resources +# โ€ข When you know exactly what you're looking for +# โ€ข Frequent, quick lookups +# โ€ข CI/CD environments where speed matters +# +# ๐Ÿš€ TO USE THIS CONFIG: +# 1. Copy to project: cp examples/config-fast.yaml .claude-rag/config.yaml +# 2. Index: ./rag-mini index /path/to/project +# 3. Enjoy lightning-fast searches! โšก +#โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• \ No newline at end of file diff --git a/examples/config-quality.yaml b/examples/config-quality.yaml new file mode 100644 index 0000000..e2ce2fc --- /dev/null +++ b/examples/config-quality.yaml @@ -0,0 +1,111 @@ +# ๐Ÿ’Ž QUALITY CONFIG - Best Possible Results +# When you want the highest quality search and AI responses +# Perfect for: learning new codebases, research, complex analysis + +#โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +# ๐ŸŽฏ QUALITY-OPTIMIZED SETTINGS - Everything tuned for best results! +#โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• + +# ๐Ÿ“ Chunking for maximum context and quality +chunking: + max_size: 3000 # Larger chunks = more context per result + min_size: 200 # Ensure substantial content per chunk + strategy: semantic # Smart splitting that respects code structure + +# ๐ŸŒŠ Conservative streaming (favor quality over speed) +streaming: + enabled: true + threshold_bytes: 2097152 # 2MB - less aggressive chunking + +# ๐Ÿ“ Comprehensive file inclusion +files: + min_file_size: 20 # Include even small files (might contain important info) + + # ๐ŸŽฏ Minimal exclusions (include more content) + exclude_patterns: + - "node_modules/**" # Still skip these (too much noise) + - ".git/**" # Git history not useful for code search + - "__pycache__/**" # Python bytecode + - "*.pyc" + - ".venv/**" + - "build/**" # Compiled artifacts + - "dist/**" + # Note: We keep logs, docs, configs that might have useful context + + include_patterns: + - "**/*" # Include everything not explicitly excluded + +# ๐Ÿง  Best embedding quality +embedding: + preferred_method: ollama # Highest quality embeddings (needs Ollama) + ollama_model: nomic-embed-text # Excellent code understanding + ml_model: sentence-transformers/all-MiniLM-L6-v2 # Good fallback + batch_size: 16 # Smaller batches for stability + +# ๐Ÿ” Search optimized for comprehensive results +search: + default_limit: 15 # More results to choose from + enable_bm25: true # Use both semantic and keyword matching + similarity_threshold: 0.05 # Very permissive (show more possibilities) + expand_queries: true # Automatic query expansion for better recall + +# ๐Ÿค– High-quality AI analysis +llm: + synthesis_model: auto # Use best available model + enable_synthesis: true # AI explanations by default + synthesis_temperature: 0.4 # Good balance of accuracy and insight + cpu_optimized: false # Use powerful models if available + enable_thinking: true # Show detailed reasoning process + max_expansion_terms: 10 # Comprehensive query expansion + +#โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +# ๐Ÿ’Ž WHAT THIS CONFIG MAXIMIZES: +# +# ๐ŸŽฏ Search comprehensiveness - find everything relevant +# ๐ŸŽฏ Result context - larger chunks with more information +# ๐ŸŽฏ AI explanation quality - detailed, thoughtful analysis +# ๐ŸŽฏ Query understanding - automatic expansion and enhancement +# ๐ŸŽฏ Semantic accuracy - best embedding models available +# +# โš–๏ธ TRADE-OFFS: +# โณ Slower indexing (larger chunks, better embeddings) +# โณ Slower searching (query expansion, more results) +# ๐Ÿ’พ More storage space (larger index, more files included) +# ๐Ÿง  More memory usage (larger batches, bigger models) +# โšก Higher CPU/GPU usage (better models) +# +# ๐ŸŽฏ PERFECT FOR: +# โ€ข Learning new, complex codebases +# โ€ข Research and analysis tasks +# โ€ข When you need to understand WHY code works a certain way +# โ€ข Finding subtle connections and patterns +# โ€ข Code review and security analysis +# โ€ข Academic or professional research +# +# ๐Ÿ’ป REQUIREMENTS: +# โ€ข Ollama installed and running (ollama serve) +# โ€ข At least one language model (ollama pull qwen3:1.7b) +# โ€ข Decent computer specs (4GB+ RAM recommended) +# โ€ข Patience for thorough analysis ๐Ÿ˜Š +# +# ๐Ÿš€ TO USE THIS CONFIG: +# 1. Install Ollama: curl -fsSL https://ollama.ai/install.sh | sh +# 2. Start Ollama: ollama serve +# 3. Install a model: ollama pull qwen3:1.7b +# 4. Copy config: cp examples/config-quality.yaml .claude-rag/config.yaml +# 5. Index project: ./rag-mini index /path/to/project +# 6. Enjoy comprehensive analysis: ./rag-mini explore /path/to/project +#โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• + +# ๐Ÿงช ADVANCED QUALITY TUNING (optional): +# +# For even better results, try these model combinations: +# โ€ข ollama pull nomic-embed-text:latest (best embeddings) +# โ€ข ollama pull qwen3:1.7b (good general model) +# โ€ข ollama pull llama3.2 (excellent for analysis) +# +# Or adjust these settings for your specific needs: +# โ€ข similarity_threshold: 0.3 (more selective results) +# โ€ข max_size: 4000 (even more context per result) +# โ€ข enable_thinking: false (hide reasoning, show just answers) +# โ€ข synthesis_temperature: 0.2 (more conservative AI responses) \ No newline at end of file diff --git a/examples/config.yaml b/examples/config.yaml index 9fe59fd..50ddca8 100644 --- a/examples/config.yaml +++ b/examples/config.yaml @@ -1,55 +1,145 @@ -# FSS-Mini-RAG Configuration -# Edit this file to customize indexing and search behavior -# See docs/GETTING_STARTED.md for detailed explanations +# FSS-Mini-RAG Configuration - Beginner-Friendly Edition +# +# ๐ŸŽฏ QUICK START PRESETS: +# - Keep defaults for most cases (recommended for beginners) +# - For large projects (>10k files): increase max_size to 3000 +# - For faster search: set similarity_threshold to 0.2 +# - For better results: enable expand_queries (but slower search) + +#โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +# ๐Ÿ“ CHUNKING: How we break up your code files for searching +#โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +# Think of chunks as "bite-sized pieces" of your code that the system can search through. +# Smaller chunks = more precise results but might miss context +# Larger chunks = more context but might be less precise -# Text chunking settings chunking: - max_size: 2000 # Maximum characters per chunk - min_size: 150 # Minimum characters per chunk - strategy: semantic # 'semantic' (language-aware) or 'fixed' + max_size: 2000 # Maximum characters per chunk (2000 = ~50 lines of code) + # ๐Ÿ’ก ADJUST IF: Getting results that are too narrow/broad + # Small projects: 1500 | Large projects: 3000 | Detailed analysis: 4000 + + min_size: 150 # Minimum characters per chunk (150 = ~4-5 lines) + # โš ๏ธ Don't go below 100 or you'll get fragments + + strategy: semantic # How to split files into chunks + # 'semantic': Smart splitting (respects functions, classes) - RECOMMENDED + # 'fixed': Simple splitting (just cuts at size limits) - faster but less intelligent + +#โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +# ๐ŸŒŠ STREAMING: How we handle really big files +#โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +# Large files (like minified CSS or huge data files) get processed in smaller batches +# to prevent your computer from running out of memory -# Large file streaming settings streaming: - enabled: true - threshold_bytes: 1048576 # Files larger than this use streaming (1MB) + enabled: true # Always keep this true - prevents memory crashes + threshold_bytes: 1048576 # Files larger than 1MB use streaming (1MB = 1048576 bytes) + # ๐Ÿ’ก ADJUST IF: Low memory computer = 512000 | High memory = 2097152 + +#โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +# ๐Ÿ“ FILES: Which files to include/exclude from indexing +#โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• -# File processing settings files: - min_file_size: 50 # Skip files smaller than this + min_file_size: 50 # Skip tiny files (50 bytes = ~1 line of code) + # ๐Ÿ’ก REASON: Tiny files usually aren't useful for searching + + # ๐Ÿšซ EXCLUDE PATTERNS: Files/folders we always skip (saves time and space) exclude_patterns: - - "node_modules/**" - - ".git/**" - - "__pycache__/**" - - "*.pyc" - - ".venv/**" - - "venv/**" - - "build/**" - - "dist/**" + - "node_modules/**" # JavaScript dependencies (huge and not your code) + - ".git/**" # Git history (not useful for code search) + - "__pycache__/**" # Python bytecode (generated files) + - "*.pyc" # More Python bytecode + - ".venv/**" # Python virtual environments + - "venv/**" # More virtual environments + - "build/**" # Compiled output (not source code) + - "dist/**" # Distribution files + # ๐Ÿ’ก ADD YOUR OWN: Add patterns like "logs/**" or "*.tmp" + include_patterns: - - "**/*" # Include all files by default + - "**/*" # Include everything else by default + # ๐Ÿ’ก CUSTOMIZE: Could be ["**/*.py", "**/*.js"] for only Python/JS + +#โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +# ๐Ÿง  EMBEDDINGS: How we turn your code into searchable "vectors" +#โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +# Embeddings are like "fingerprints" of your code that help find similar content +# Don't worry about the technical details - the defaults work great! -# Embedding generation settings embedding: - preferred_method: ollama # 'ollama', 'ml', 'hash', or 'auto' - ollama_model: nomic-embed-text - ollama_host: localhost:11434 - ml_model: sentence-transformers/all-MiniLM-L6-v2 - batch_size: 32 # Embeddings processed per batch + preferred_method: ollama # Which system to use for creating embeddings + # 'ollama': Best quality (needs Ollama installed) - RECOMMENDED + # 'ml': Good quality (downloads models automatically) + # 'hash': Basic quality (works without internet) + # 'auto': Try ollama, fall back to ml, then hash - SAFEST CHOICE + + ollama_model: nomic-embed-text # Which Ollama model to use (this one is excellent) + ollama_host: localhost:11434 # Where to find Ollama (don't change unless you know why) + + ml_model: sentence-transformers/all-MiniLM-L6-v2 # Backup model (small and fast) + + batch_size: 32 # How many chunks to process at once + # ๐Ÿ’ก ADJUST IF: Slow computer = 16 | Fast computer = 64 + +#โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +# ๐Ÿ” SEARCH: How the system finds and ranks results +#โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• -# Search behavior settings search: - default_limit: 10 # Default number of results - enable_bm25: true # Enable keyword matching boost - similarity_threshold: 0.1 # Minimum similarity score - expand_queries: false # Enable automatic query expansion (TUI auto-enables) + default_limit: 10 # How many search results to show by default + # ๐Ÿ’ก MORE RESULTS: 15-20 | FASTER SEARCH: 5-8 + + enable_bm25: true # Also use keyword matching (like Google search) + # ๐Ÿ’ก EFFECT: Finds exact word matches even if semantically different + # Keep true unless getting too many irrelevant results + + similarity_threshold: 0.1 # Minimum "similarity score" to show results (0.0-1.0) + # ๐Ÿ’ก HIGHER = fewer but more relevant results + # Picky: 0.3 | Balanced: 0.1 | Show everything: 0.05 + + expand_queries: false # Automatically add related search terms + # ๐Ÿ’ก EFFECT: "auth" becomes "auth authentication login user" + # Better results but slower - TUI enables this automatically + +#โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +# ๐Ÿค– LLM: Settings for the AI that explains and synthesizes results +#โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +# The LLM (Large Language Model) reads your search results and explains them in plain English -# LLM synthesis and query expansion settings llm: - ollama_host: localhost:11434 - synthesis_model: auto # 'auto' prefers qwen3:0.6b for CPU efficiency - expansion_model: auto # Usually same as synthesis_model - max_expansion_terms: 8 # Maximum terms to add to queries - enable_synthesis: false # Enable synthesis by default - synthesis_temperature: 0.3 # LLM temperature for analysis - cpu_optimized: true # Prefer ultra-lightweight models for CPU-only systems - enable_thinking: true # Enable thinking mode for Qwen3 models (production: true, testing: false) \ No newline at end of file + ollama_host: localhost:11434 # Where to find Ollama (don't change unless you know why) + + synthesis_model: auto # Which AI model to use for explanations + # 'auto': Picks best available model - RECOMMENDED + # 'qwen3:0.6b': Ultra-fast, good for CPU-only computers + # 'llama3.2': Slower but more detailed explanations + + expansion_model: auto # Model for query expansion (usually same as synthesis) + + max_expansion_terms: 8 # How many extra terms to add to expanded queries + # ๐Ÿ’ก MORE TERMS = broader search but potentially less focused + + enable_synthesis: false # Turn on AI explanations by default + # ๐Ÿ’ก SET TO TRUE: If you want every search to include explanations + # (You can always use --synthesize flag when you want it) + + synthesis_temperature: 0.3 # How "creative" the AI explanations are (0.0-1.0) + # ๐Ÿ’ก Lower = more factual | Higher = more creative + # Code analysis: 0.1-0.3 | Creative writing: 0.7-0.9 + + cpu_optimized: true # Prefer lightweight models for computers without graphics cards + # ๐Ÿ’ก DISABLE IF: You have a powerful GPU and want highest quality + + enable_thinking: true # Let AI "think out loud" for complex questions + # ๐Ÿ’ก EFFECT: Shows reasoning process, better for learning/debugging + +#โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• +# ๐ŸŽฏ QUICK TROUBLESHOOTING: +# +# Search returns nothing? โ†’ Lower similarity_threshold to 0.05 +# Search too slow? โ†’ Set expand_queries: false and batch_size: 16 +# Results not detailed enough? โ†’ Increase max_size to 3000 +# Getting weird fragments? โ†’ Check min_size is at least 150 +# AI not working? โ†’ Make sure Ollama is running: `ollama serve` +# Out of memory errors? โ†’ Decrease batch_size to 16 and lower threshold_bytes +#โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• \ No newline at end of file diff --git a/rag-mini.py b/rag-mini.py index 7ec27ec..a9c559d 100644 --- a/rag-mini.py +++ b/rag-mini.py @@ -70,7 +70,16 @@ def index_project(project_path: Path, force: bool = False): except Exception as e: print(f"โŒ Indexing failed: {e}") - print(f" Use --verbose for details") + print() + print("๐Ÿ”ง Common solutions:") + print(" โ€ข Check if path exists and you have read permissions") + print(" โ€ข Ensure Python dependencies are installed: pip install -r requirements.txt") + print(" โ€ข Try with smaller project first to test setup") + print(" โ€ข Check available disk space for index files") + print() + print("๐Ÿ“š For detailed help:") + print(f" ./rag-mini index {project_path} --verbose") + print(" Or see: docs/TROUBLESHOOTING.md") sys.exit(1) def search_project(project_path: Path, query: str, limit: int = 10, synthesize: bool = False): @@ -89,10 +98,18 @@ def search_project(project_path: Path, query: str, limit: int = 10, synthesize: if not results: print("โŒ No results found") - print("\n๐Ÿ’ก Try:") - print(" โ€ข Broader search terms") - print(" โ€ข Check spelling") - print(" โ€ข Use concepts: \"authentication\" instead of \"auth_handler\"") + print() + print("๐Ÿ”ง Quick fixes to try:") + print(" โ€ข Use broader terms: \"login\" instead of \"authenticate_user_session\"") + print(" โ€ข Try concepts: \"database query\" instead of specific function names") + print(" โ€ข Check spelling and try simpler words") + print(" โ€ข Search for file types: \"python class\" or \"javascript function\"") + print() + print("โš™๏ธ Configuration adjustments:") + print(f" โ€ข Lower threshold: ./rag-mini search {project_path} \"{query}\" --threshold 0.05") + print(" โ€ข More results: add --limit 20") + print() + print("๐Ÿ“š Need help? See: docs/TROUBLESHOOTING.md") return print(f"โœ… Found {len(results)} results:") @@ -154,10 +171,23 @@ def search_project(project_path: Path, query: str, limit: int = 10, synthesize: except Exception as e: print(f"โŒ Search failed: {e}") + print() + if "not indexed" in str(e).lower(): - print(f" Run: rag-mini index {project_path}") + print("๐Ÿ”ง Solution:") + print(f" ./rag-mini index {project_path}") + print() else: - print(" Use --verbose for details") + print("๐Ÿ”ง Common solutions:") + print(" โ€ข Check project path exists and is readable") + print(" โ€ข Verify index isn't corrupted: delete .claude-rag/ and re-index") + print(" โ€ข Try with a different project to test setup") + print(" โ€ข Check available memory and disk space") + print() + print("๐Ÿ“š Get detailed error info:") + print(f" ./rag-mini search {project_path} \"{query}\" --verbose") + print(" Or see: docs/TROUBLESHOOTING.md") + print() sys.exit(1) def status_check(project_path: Path):