Major fixes: - Fix model selection to prioritize qwen3:1.7b instead of qwen3:4b for testing - Correct context length from 80,000 to 32,000 tokens (proper Qwen3 limit) - Implement content-preserving safeguards instead of dropping responses - Fix all test imports from claude_rag to mini_rag module naming - Add virtual environment warnings to all test entry points - Fix TUI EOF crash handling with proper error handling - Remove warmup delays that were causing startup lag and unwanted model calls - Fix command mappings between bash wrapper and Python script - Update documentation to reflect qwen3:1.7b as primary recommendation - Improve TUI box alignment and formatting - Make language generic for any documents, not just codebases - Add proper folder names in user feedback instead of generic terms Technical improvements: - Unified model rankings across all components - Better error handling for missing dependencies - Comprehensive testing and validation of all fixes - All tests now pass and system is deployment-ready All major crashes and deployment issues resolved.
53 lines
1.8 KiB
YAML
53 lines
1.8 KiB
YAML
# FSS-Mini-RAG Configuration
|
|
# Edit this file to customize indexing and search behavior
|
|
# See docs/GETTING_STARTED.md for detailed explanations
|
|
|
|
# Text chunking settings
|
|
chunking:
|
|
max_size: 2000 # Maximum characters per chunk
|
|
min_size: 150 # Minimum characters per chunk
|
|
strategy: semantic # 'semantic' (language-aware) or 'fixed'
|
|
|
|
# Large file streaming settings
|
|
streaming:
|
|
enabled: true
|
|
threshold_bytes: 1048576 # Files larger than this use streaming (1MB)
|
|
|
|
# File processing settings
|
|
files:
|
|
min_file_size: 50 # Skip files smaller than this
|
|
exclude_patterns:
|
|
- "node_modules/**"
|
|
- ".git/**"
|
|
- "__pycache__/**"
|
|
- "*.pyc"
|
|
- ".venv/**"
|
|
- "venv/**"
|
|
- "build/**"
|
|
- "dist/**"
|
|
include_patterns:
|
|
- "**/*" # Include all files by default
|
|
|
|
# Embedding generation settings
|
|
embedding:
|
|
preferred_method: ollama # 'ollama', 'ml', 'hash', or 'auto'
|
|
ollama_model: nomic-embed-text
|
|
ollama_host: localhost:11434
|
|
ml_model: sentence-transformers/all-MiniLM-L6-v2
|
|
batch_size: 32 # Embeddings processed per batch
|
|
|
|
# Search behavior settings
|
|
search:
|
|
default_top_k: 10 # Default number of top results
|
|
enable_bm25: true # Enable keyword matching boost
|
|
similarity_threshold: 0.1 # Minimum similarity score
|
|
expand_queries: false # Enable automatic query expansion
|
|
|
|
# LLM synthesis and query expansion settings
|
|
llm:
|
|
ollama_host: localhost:11434
|
|
synthesis_model: auto # 'auto', 'qwen3:1.7b', etc.
|
|
expansion_model: auto # Usually same as synthesis_model
|
|
max_expansion_terms: 8 # Maximum terms to add to queries
|
|
enable_synthesis: false # Enable synthesis by default
|
|
synthesis_temperature: 0.3 # LLM temperature for analysis |