Major fixes: - Fix model selection to prioritize qwen3:1.7b instead of qwen3:4b for testing - Correct context length from 80,000 to 32,000 tokens (proper Qwen3 limit) - Implement content-preserving safeguards instead of dropping responses - Fix all test imports from claude_rag to mini_rag module naming - Add virtual environment warnings to all test entry points - Fix TUI EOF crash handling with proper error handling - Remove warmup delays that were causing startup lag and unwanted model calls - Fix command mappings between bash wrapper and Python script - Update documentation to reflect qwen3:1.7b as primary recommendation - Improve TUI box alignment and formatting - Make language generic for any documents, not just codebases - Add proper folder names in user feedback instead of generic terms Technical improvements: - Unified model rankings across all components - Better error handling for missing dependencies - Comprehensive testing and validation of all fixes - All tests now pass and system is deployment-ready All major crashes and deployment issues resolved.
73 lines
3.6 KiB
YAML
73 lines
3.6 KiB
YAML
# 🚀 BEGINNER CONFIG - Simple & Reliable
|
|
# Perfect for newcomers who want everything to "just work"
|
|
# Copy this to your project: cp examples/config-beginner.yaml /path/to/project/.mini-rag/config.yaml
|
|
|
|
#═══════════════════════════════════════════════════════════════════════
|
|
# ✨ BEGINNER-FRIENDLY SETTINGS - No overwhelming options!
|
|
#═══════════════════════════════════════════════════════════════════════
|
|
|
|
# 📝 How to split your code files (keep it simple)
|
|
chunking:
|
|
max_size: 2000 # Good size for most code (about 50 lines)
|
|
min_size: 150 # Skip tiny fragments
|
|
strategy: semantic # Smart splitting (respects functions/classes)
|
|
|
|
# 🌊 Handle large files without crashing
|
|
streaming:
|
|
enabled: true # Always keep this on
|
|
threshold_bytes: 1048576 # 1MB - good for most computers
|
|
|
|
# 📁 Which files to include
|
|
files:
|
|
min_file_size: 50 # Skip empty/tiny files
|
|
|
|
# 🚫 Skip these folders (saves time and storage)
|
|
exclude_patterns:
|
|
- "node_modules/**" # JavaScript packages
|
|
- ".git/**" # Git history
|
|
- "__pycache__/**" # Python cache
|
|
- "*.pyc" # Python bytecode
|
|
- ".venv/**" # Python virtual environments
|
|
- "build/**" # Build artifacts
|
|
- "dist/**" # Distribution files
|
|
|
|
include_patterns:
|
|
- "**/*" # Everything else
|
|
|
|
# 🧠 Embeddings (the "AI fingerprints" of your code)
|
|
embedding:
|
|
preferred_method: auto # Try best method, fall back if needed - SAFEST
|
|
batch_size: 32 # Good balance of speed and memory usage
|
|
|
|
# 🔍 Search behavior
|
|
search:
|
|
default_top_k: 10 # Show 10 results (good starting point)
|
|
enable_bm25: true # Find exact word matches too
|
|
similarity_threshold: 0.1 # Pretty permissive (shows more results)
|
|
expand_queries: false # Keep it simple for now
|
|
|
|
# 🤖 AI explanations (optional but helpful)
|
|
# 💡 WANT DIFFERENT LLM? See examples/config-llm-providers.yaml for OpenAI, Claude, etc.
|
|
llm:
|
|
synthesis_model: auto # Pick best available model
|
|
enable_synthesis: false # Turn on manually with --synthesize
|
|
synthesis_temperature: 0.3 # Factual answers
|
|
cpu_optimized: true # Good for computers without fancy graphics cards
|
|
enable_thinking: true # Shows reasoning (great for learning!)
|
|
max_expansion_terms: 6 # Keep expansions focused
|
|
|
|
#═══════════════════════════════════════════════════════════════════════
|
|
# 🎯 WHAT THIS CONFIG DOES:
|
|
#
|
|
# ✅ Works reliably across different systems
|
|
# ✅ Good performance on modest hardware
|
|
# ✅ Balanced search results (not too few, not too many)
|
|
# ✅ Safe defaults that won't crash your computer
|
|
# ✅ AI features available but not overwhelming
|
|
#
|
|
# 🚀 TO GET STARTED:
|
|
# 1. Copy this file to your project: .mini-rag/config.yaml
|
|
# 2. Index your project: ./rag-mini index /path/to/project
|
|
# 3. Search: ./rag-mini search /path/to/project "your query"
|
|
# 4. Try AI: ./rag-mini search /path/to/project "your query" --synthesize
|
|
#═══════════════════════════════════════════════════════════════════════ |