Fss-Rag-Mini/examples/config.yaml
BobAi 16199375fc Add CPU-only deployment support with qwen3:0.6b model
- Update model rankings to prioritize ultra-efficient CPU models (qwen3:0.6b first)
- Add comprehensive CPU deployment documentation with performance benchmarks
- Configure CPU-optimized settings in default config
- Enable 796MB total model footprint for standard systems
- Support Raspberry Pi, older laptops, and CPU-only environments
- Maintain excellent quality with 522MB qwen3:0.6b model
2025-08-12 17:49:02 +10:00

54 lines
1.9 KiB
YAML

# FSS-Mini-RAG Configuration
# Edit this file to customize indexing and search behavior
# See docs/GETTING_STARTED.md for detailed explanations
# Text chunking settings
chunking:
max_size: 2000 # Maximum characters per chunk
min_size: 150 # Minimum characters per chunk
strategy: semantic # 'semantic' (language-aware) or 'fixed'
# Large file streaming settings
streaming:
enabled: true
threshold_bytes: 1048576 # Files larger than this use streaming (1MB)
# File processing settings
files:
min_file_size: 50 # Skip files smaller than this
exclude_patterns:
- "node_modules/**"
- ".git/**"
- "__pycache__/**"
- "*.pyc"
- ".venv/**"
- "venv/**"
- "build/**"
- "dist/**"
include_patterns:
- "**/*" # Include all files by default
# Embedding generation settings
embedding:
preferred_method: ollama # 'ollama', 'ml', 'hash', or 'auto'
ollama_model: nomic-embed-text
ollama_host: localhost:11434
ml_model: sentence-transformers/all-MiniLM-L6-v2
batch_size: 32 # Embeddings processed per batch
# Search behavior settings
search:
default_limit: 10 # Default number of results
enable_bm25: true # Enable keyword matching boost
similarity_threshold: 0.1 # Minimum similarity score
expand_queries: false # Enable automatic query expansion (TUI auto-enables)
# LLM synthesis and query expansion settings
llm:
ollama_host: localhost:11434
synthesis_model: auto # 'auto' prefers qwen3:0.6b for CPU efficiency
expansion_model: auto # Usually same as synthesis_model
max_expansion_terms: 8 # Maximum terms to add to queries
enable_synthesis: false # Enable synthesis by default
synthesis_temperature: 0.3 # LLM temperature for analysis
cpu_optimized: true # Prefer ultra-lightweight models for CPU-only systems