BobAi a7e3e6f474 Add interactive exploration mode with thinking and context memory
- Create separate explore mode with thinking enabled for debugging/learning
- Add lazy loading with LLM warmup using 'testing, just say "hi" <no_think>'
- Implement context-aware conversation memory across questions
- Add interactive CLI with help, summary, and session management
- Enable Qwen3 thinking mode toggle for experimentation
- Support multi-turn conversations for better debugging workflow
- Clean separation between fast synthesis and deep exploration modes
2025-08-12 18:06:08 +10:00

55 lines
2.0 KiB
YAML

# FSS-Mini-RAG Configuration
# Edit this file to customize indexing and search behavior
# See docs/GETTING_STARTED.md for detailed explanations
# Text chunking settings
chunking:
max_size: 2000 # Maximum characters per chunk
min_size: 150 # Minimum characters per chunk
strategy: semantic # 'semantic' (language-aware) or 'fixed'
# Large file streaming settings
streaming:
enabled: true
threshold_bytes: 1048576 # Files larger than this use streaming (1MB)
# File processing settings
files:
min_file_size: 50 # Skip files smaller than this
exclude_patterns:
- "node_modules/**"
- ".git/**"
- "__pycache__/**"
- "*.pyc"
- ".venv/**"
- "venv/**"
- "build/**"
- "dist/**"
include_patterns:
- "**/*" # Include all files by default
# Embedding generation settings
embedding:
preferred_method: ollama # 'ollama', 'ml', 'hash', or 'auto'
ollama_model: nomic-embed-text
ollama_host: localhost:11434
ml_model: sentence-transformers/all-MiniLM-L6-v2
batch_size: 32 # Embeddings processed per batch
# Search behavior settings
search:
default_limit: 10 # Default number of results
enable_bm25: true # Enable keyword matching boost
similarity_threshold: 0.1 # Minimum similarity score
expand_queries: false # Enable automatic query expansion (TUI auto-enables)
# LLM synthesis and query expansion settings
llm:
ollama_host: localhost:11434
synthesis_model: auto # 'auto' prefers qwen3:0.6b for CPU efficiency
expansion_model: auto # Usually same as synthesis_model
max_expansion_terms: 8 # Maximum terms to add to queries
enable_synthesis: false # Enable synthesis by default
synthesis_temperature: 0.3 # LLM temperature for analysis
cpu_optimized: true # Prefer ultra-lightweight models for CPU-only systems
enable_thinking: true # Enable thinking mode for Qwen3 models (production: true, testing: false)