Add intelligent context window management for optimal RAG performance: ## Core Features - Dynamic context sizing based on model capabilities - User-friendly configuration menu with Development/Production/Advanced presets - Automatic validation against model limits (qwen3:0.6b/1.7b = 32K, qwen3:4b = 131K) - Educational content explaining context window importance for RAG ## Technical Implementation - Enhanced LLMConfig with context_window and auto_context parameters - Intelligent _get_optimal_context_size() method with model-specific limits - Consistent context application across synthesizer and explorer - YAML configuration output with helpful context explanations ## User Experience Improvements - Clear context window display in configuration status - Guided selection: Development (8K), Production (16K), Advanced (32K) - Memory usage estimates and performance guidance - Validation prevents invalid context/model combinations ## Educational Value - Explains why default 2048 tokens fails for RAG - Shows relationship between context size and conversation length - Guides users toward optimal settings for their use case - Highlights advanced capabilities (15+ results, 4000+ character chunks) This addresses the critical issue where Ollama's default context severely limits RAG performance, providing users with proper configuration tools and understanding of this crucial parameter.
290 lines
12 KiB
Python
290 lines
12 KiB
Python
"""
|
|
Configuration management for FSS-Mini-RAG.
|
|
Handles loading, saving, and validation of YAML config files.
|
|
"""
|
|
|
|
import yaml
|
|
import logging
|
|
from pathlib import Path
|
|
from typing import Dict, Any, Optional
|
|
from dataclasses import dataclass, asdict
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class ChunkingConfig:
|
|
"""Configuration for text chunking."""
|
|
max_size: int = 2000
|
|
min_size: int = 150
|
|
strategy: str = "semantic" # "semantic" or "fixed"
|
|
|
|
|
|
@dataclass
|
|
class StreamingConfig:
|
|
"""Configuration for large file streaming."""
|
|
enabled: bool = True
|
|
threshold_bytes: int = 1048576 # 1MB
|
|
|
|
|
|
@dataclass
|
|
class FilesConfig:
|
|
"""Configuration for file processing."""
|
|
min_file_size: int = 50
|
|
exclude_patterns: list = None
|
|
include_patterns: list = None
|
|
|
|
def __post_init__(self):
|
|
if self.exclude_patterns is None:
|
|
self.exclude_patterns = [
|
|
"node_modules/**",
|
|
".git/**",
|
|
"__pycache__/**",
|
|
"*.pyc",
|
|
".venv/**",
|
|
"venv/**",
|
|
"build/**",
|
|
"dist/**"
|
|
]
|
|
if self.include_patterns is None:
|
|
self.include_patterns = ["**/*"] # Include everything by default
|
|
|
|
|
|
@dataclass
|
|
class EmbeddingConfig:
|
|
"""Configuration for embedding generation."""
|
|
preferred_method: str = "ollama" # "ollama", "ml", "hash", "auto"
|
|
ollama_model: str = "nomic-embed-text"
|
|
ollama_host: str = "localhost:11434"
|
|
ml_model: str = "sentence-transformers/all-MiniLM-L6-v2"
|
|
batch_size: int = 32
|
|
|
|
|
|
@dataclass
|
|
class SearchConfig:
|
|
"""Configuration for search behavior."""
|
|
default_top_k: int = 10
|
|
enable_bm25: bool = True
|
|
similarity_threshold: float = 0.1
|
|
expand_queries: bool = False # Enable automatic query expansion
|
|
|
|
|
|
@dataclass
|
|
class LLMConfig:
|
|
"""Configuration for LLM synthesis and query expansion."""
|
|
# Core settings
|
|
synthesis_model: str = "auto" # "auto", "qwen3:1.7b", "qwen2.5:1.5b", etc.
|
|
expansion_model: str = "auto" # Usually same as synthesis_model
|
|
max_expansion_terms: int = 8 # Maximum additional terms to add
|
|
enable_synthesis: bool = False # Enable by default when --synthesize used
|
|
synthesis_temperature: float = 0.3
|
|
enable_thinking: bool = True # Enable thinking mode for Qwen3 models
|
|
cpu_optimized: bool = True # Prefer lightweight models
|
|
|
|
# Context window configuration (critical for RAG performance)
|
|
context_window: int = 16384 # Context window size in tokens (16K recommended)
|
|
auto_context: bool = True # Auto-adjust context based on model capabilities
|
|
|
|
# Model preference rankings (configurable)
|
|
model_rankings: list = None # Will be set in __post_init__
|
|
|
|
# Provider-specific settings (for different LLM providers)
|
|
provider: str = "ollama" # "ollama", "openai", "anthropic"
|
|
ollama_host: str = "localhost:11434" # Ollama connection
|
|
api_key: Optional[str] = None # API key for cloud providers
|
|
api_base: Optional[str] = None # Base URL for API (e.g., OpenRouter)
|
|
timeout: int = 20 # Request timeout in seconds
|
|
|
|
def __post_init__(self):
|
|
if self.model_rankings is None:
|
|
# Default model preference rankings (can be overridden in config file)
|
|
self.model_rankings = [
|
|
# Testing model (prioritized for current testing phase)
|
|
"qwen3:1.7b",
|
|
|
|
# Ultra-efficient models (perfect for CPU-only systems)
|
|
"qwen3:0.6b",
|
|
|
|
# Recommended model (excellent quality but larger)
|
|
"qwen3:4b",
|
|
|
|
# Common fallbacks (prioritize Qwen models)
|
|
"qwen2.5:1.5b",
|
|
"qwen2.5:3b",
|
|
]
|
|
|
|
|
|
@dataclass
|
|
class RAGConfig:
|
|
"""Main RAG system configuration."""
|
|
chunking: ChunkingConfig = None
|
|
streaming: StreamingConfig = None
|
|
files: FilesConfig = None
|
|
embedding: EmbeddingConfig = None
|
|
search: SearchConfig = None
|
|
llm: LLMConfig = None
|
|
|
|
def __post_init__(self):
|
|
if self.chunking is None:
|
|
self.chunking = ChunkingConfig()
|
|
if self.streaming is None:
|
|
self.streaming = StreamingConfig()
|
|
if self.files is None:
|
|
self.files = FilesConfig()
|
|
if self.embedding is None:
|
|
self.embedding = EmbeddingConfig()
|
|
if self.search is None:
|
|
self.search = SearchConfig()
|
|
if self.llm is None:
|
|
self.llm = LLMConfig()
|
|
|
|
|
|
class ConfigManager:
|
|
"""Manages configuration loading, saving, and validation."""
|
|
|
|
def __init__(self, project_path: Path):
|
|
self.project_path = Path(project_path)
|
|
self.rag_dir = self.project_path / '.mini-rag'
|
|
self.config_path = self.rag_dir / 'config.yaml'
|
|
|
|
def load_config(self) -> RAGConfig:
|
|
"""Load configuration from YAML file or create default."""
|
|
if not self.config_path.exists():
|
|
logger.info(f"No config found at {self.config_path}, creating default")
|
|
config = RAGConfig()
|
|
self.save_config(config)
|
|
return config
|
|
|
|
try:
|
|
with open(self.config_path, 'r') as f:
|
|
data = yaml.safe_load(f)
|
|
|
|
if not data:
|
|
logger.warning("Empty config file, using defaults")
|
|
return RAGConfig()
|
|
|
|
# Convert nested dicts back to dataclass instances
|
|
config = RAGConfig()
|
|
|
|
if 'chunking' in data:
|
|
config.chunking = ChunkingConfig(**data['chunking'])
|
|
if 'streaming' in data:
|
|
config.streaming = StreamingConfig(**data['streaming'])
|
|
if 'files' in data:
|
|
config.files = FilesConfig(**data['files'])
|
|
if 'embedding' in data:
|
|
config.embedding = EmbeddingConfig(**data['embedding'])
|
|
if 'search' in data:
|
|
config.search = SearchConfig(**data['search'])
|
|
if 'llm' in data:
|
|
config.llm = LLMConfig(**data['llm'])
|
|
|
|
return config
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to load config from {self.config_path}: {e}")
|
|
logger.info("Using default configuration")
|
|
return RAGConfig()
|
|
|
|
def save_config(self, config: RAGConfig):
|
|
"""Save configuration to YAML file with comments."""
|
|
try:
|
|
self.rag_dir.mkdir(exist_ok=True)
|
|
|
|
# Convert to dict for YAML serialization
|
|
config_dict = asdict(config)
|
|
|
|
# Create YAML content with comments
|
|
yaml_content = self._create_yaml_with_comments(config_dict)
|
|
|
|
with open(self.config_path, 'w') as f:
|
|
f.write(yaml_content)
|
|
|
|
logger.info(f"Configuration saved to {self.config_path}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to save config to {self.config_path}: {e}")
|
|
|
|
def _create_yaml_with_comments(self, config_dict: Dict[str, Any]) -> str:
|
|
"""Create YAML content with helpful comments."""
|
|
yaml_lines = [
|
|
"# FSS-Mini-RAG Configuration",
|
|
"# Edit this file to customize indexing and search behavior",
|
|
"# See docs/GETTING_STARTED.md for detailed explanations",
|
|
"",
|
|
"# Text chunking settings",
|
|
"chunking:",
|
|
f" max_size: {config_dict['chunking']['max_size']} # Maximum characters per chunk",
|
|
f" min_size: {config_dict['chunking']['min_size']} # Minimum characters per chunk",
|
|
f" strategy: {config_dict['chunking']['strategy']} # 'semantic' (language-aware) or 'fixed'",
|
|
"",
|
|
"# Large file streaming settings",
|
|
"streaming:",
|
|
f" enabled: {str(config_dict['streaming']['enabled']).lower()}",
|
|
f" threshold_bytes: {config_dict['streaming']['threshold_bytes']} # Files larger than this use streaming (1MB)",
|
|
"",
|
|
"# File processing settings",
|
|
"files:",
|
|
f" min_file_size: {config_dict['files']['min_file_size']} # Skip files smaller than this",
|
|
" exclude_patterns:",
|
|
]
|
|
|
|
for pattern in config_dict['files']['exclude_patterns']:
|
|
yaml_lines.append(f" - \"{pattern}\"")
|
|
|
|
yaml_lines.extend([
|
|
" include_patterns:",
|
|
" - \"**/*\" # Include all files by default",
|
|
"",
|
|
"# Embedding generation settings",
|
|
"embedding:",
|
|
f" preferred_method: {config_dict['embedding']['preferred_method']} # 'ollama', 'ml', 'hash', or 'auto'",
|
|
f" ollama_model: {config_dict['embedding']['ollama_model']}",
|
|
f" ollama_host: {config_dict['embedding']['ollama_host']}",
|
|
f" ml_model: {config_dict['embedding']['ml_model']}",
|
|
f" batch_size: {config_dict['embedding']['batch_size']} # Embeddings processed per batch",
|
|
"",
|
|
"# Search behavior settings",
|
|
"search:",
|
|
f" default_top_k: {config_dict['search']['default_top_k']} # Default number of top results",
|
|
f" enable_bm25: {str(config_dict['search']['enable_bm25']).lower()} # Enable keyword matching boost",
|
|
f" similarity_threshold: {config_dict['search']['similarity_threshold']} # Minimum similarity score",
|
|
f" expand_queries: {str(config_dict['search']['expand_queries']).lower()} # Enable automatic query expansion",
|
|
"",
|
|
"# LLM synthesis and query expansion settings",
|
|
"llm:",
|
|
f" ollama_host: {config_dict['llm']['ollama_host']}",
|
|
f" synthesis_model: {config_dict['llm']['synthesis_model']} # 'auto', 'qwen3:1.7b', etc.",
|
|
f" expansion_model: {config_dict['llm']['expansion_model']} # Usually same as synthesis_model",
|
|
f" max_expansion_terms: {config_dict['llm']['max_expansion_terms']} # Maximum terms to add to queries",
|
|
f" enable_synthesis: {str(config_dict['llm']['enable_synthesis']).lower()} # Enable synthesis by default",
|
|
f" synthesis_temperature: {config_dict['llm']['synthesis_temperature']} # LLM temperature for analysis",
|
|
"",
|
|
" # Context window configuration (critical for RAG performance)",
|
|
f" context_window: {config_dict['llm']['context_window']} # Context size in tokens (8K=fast, 16K=balanced, 32K=advanced)",
|
|
f" auto_context: {str(config_dict['llm']['auto_context']).lower()} # Auto-adjust context based on model capabilities",
|
|
"",
|
|
" model_rankings: # Preferred model order (edit to change priority)",
|
|
])
|
|
|
|
# Add model rankings list
|
|
if 'model_rankings' in config_dict['llm'] and config_dict['llm']['model_rankings']:
|
|
for model in config_dict['llm']['model_rankings'][:10]: # Show first 10
|
|
yaml_lines.append(f" - \"{model}\"")
|
|
if len(config_dict['llm']['model_rankings']) > 10:
|
|
yaml_lines.append(" # ... (edit config to see all options)")
|
|
|
|
return '\n'.join(yaml_lines)
|
|
|
|
def update_config(self, **kwargs) -> RAGConfig:
|
|
"""Update specific configuration values."""
|
|
config = self.load_config()
|
|
|
|
for key, value in kwargs.items():
|
|
if hasattr(config, key):
|
|
setattr(config, key, value)
|
|
else:
|
|
logger.warning(f"Unknown config key: {key}")
|
|
|
|
self.save_config(config)
|
|
return config |