Major enhancements: • Add comprehensive deployment guide covering all platforms (mobile, edge, cloud) • Implement system context collection for enhanced AI responses • Update documentation with current workflows and deployment scenarios • Fix Windows compatibility bugs in file locking system • Enhanced diagrams with system context integration flow • Improved exploration mode with better context handling Platform support expanded: • Full macOS compatibility verified • Raspberry Pi deployment with ARM64 optimizations • Android deployment via Termux with configuration examples • Edge device deployment strategies and performance guidelines • Docker containerization for universal deployment Technical improvements: • System context module provides OS/environment awareness to AI • Context-aware prompts improve response relevance • Enhanced error handling and graceful fallbacks • Better integration between synthesis and exploration modes Documentation updates: • Complete deployment guide with troubleshooting • Updated getting started guide with current installation flows • Enhanced visual diagrams showing system architecture • Platform-specific configuration examples Ready for extended deployment testing and user feedback.
337 lines
14 KiB
Python
337 lines
14 KiB
Python
"""
|
|
Configuration management for FSS-Mini-RAG.
|
|
Handles loading, saving, and validation of YAML config files.
|
|
"""
|
|
|
|
import yaml
|
|
import logging
|
|
from pathlib import Path
|
|
from typing import Dict, Any, Optional
|
|
from dataclasses import dataclass, asdict
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class ChunkingConfig:
|
|
"""Configuration for text chunking."""
|
|
max_size: int = 2000
|
|
min_size: int = 150
|
|
strategy: str = "semantic" # "semantic" or "fixed"
|
|
|
|
|
|
@dataclass
|
|
class StreamingConfig:
|
|
"""Configuration for large file streaming."""
|
|
enabled: bool = True
|
|
threshold_bytes: int = 1048576 # 1MB
|
|
|
|
|
|
@dataclass
|
|
class FilesConfig:
|
|
"""Configuration for file processing."""
|
|
min_file_size: int = 50
|
|
exclude_patterns: list = None
|
|
include_patterns: list = None
|
|
|
|
def __post_init__(self):
|
|
if self.exclude_patterns is None:
|
|
self.exclude_patterns = [
|
|
"node_modules/**",
|
|
".git/**",
|
|
"__pycache__/**",
|
|
"*.pyc",
|
|
".venv/**",
|
|
"venv/**",
|
|
"build/**",
|
|
"dist/**"
|
|
]
|
|
if self.include_patterns is None:
|
|
self.include_patterns = ["**/*"] # Include everything by default
|
|
|
|
|
|
@dataclass
|
|
class EmbeddingConfig:
|
|
"""Configuration for embedding generation."""
|
|
preferred_method: str = "ollama" # "ollama", "ml", "hash", "auto"
|
|
ollama_model: str = "nomic-embed-text"
|
|
ollama_host: str = "localhost:11434"
|
|
ml_model: str = "sentence-transformers/all-MiniLM-L6-v2"
|
|
batch_size: int = 32
|
|
|
|
|
|
@dataclass
|
|
class SearchConfig:
|
|
"""Configuration for search behavior."""
|
|
default_top_k: int = 10
|
|
enable_bm25: bool = True
|
|
similarity_threshold: float = 0.1
|
|
expand_queries: bool = False # Enable automatic query expansion
|
|
|
|
|
|
@dataclass
|
|
class LLMConfig:
|
|
"""Configuration for LLM synthesis and query expansion."""
|
|
# Core settings
|
|
synthesis_model: str = "auto" # "auto", "qwen3:1.7b", "qwen2.5:1.5b", etc.
|
|
expansion_model: str = "auto" # Usually same as synthesis_model
|
|
max_expansion_terms: int = 8 # Maximum additional terms to add
|
|
enable_synthesis: bool = False # Enable by default when --synthesize used
|
|
synthesis_temperature: float = 0.3
|
|
enable_thinking: bool = True # Enable thinking mode for Qwen3 models
|
|
cpu_optimized: bool = True # Prefer lightweight models
|
|
|
|
# Context window configuration (critical for RAG performance)
|
|
context_window: int = 16384 # Context window size in tokens (16K recommended)
|
|
auto_context: bool = True # Auto-adjust context based on model capabilities
|
|
|
|
# Model preference rankings (configurable)
|
|
model_rankings: list = None # Will be set in __post_init__
|
|
|
|
# Provider-specific settings (for different LLM providers)
|
|
provider: str = "ollama" # "ollama", "openai", "anthropic"
|
|
ollama_host: str = "localhost:11434" # Ollama connection
|
|
api_key: Optional[str] = None # API key for cloud providers
|
|
api_base: Optional[str] = None # Base URL for API (e.g., OpenRouter)
|
|
timeout: int = 20 # Request timeout in seconds
|
|
|
|
def __post_init__(self):
|
|
if self.model_rankings is None:
|
|
# Default model preference rankings (can be overridden in config file)
|
|
self.model_rankings = [
|
|
# Testing model (prioritized for current testing phase)
|
|
"qwen3:1.7b",
|
|
|
|
# Ultra-efficient models (perfect for CPU-only systems)
|
|
"qwen3:0.6b",
|
|
|
|
# Recommended model (excellent quality but larger)
|
|
"qwen3:4b",
|
|
|
|
# Common fallbacks (prioritize Qwen models)
|
|
"qwen2.5:1.5b",
|
|
"qwen2.5:3b",
|
|
]
|
|
|
|
|
|
@dataclass
|
|
class UpdateConfig:
|
|
"""Configuration for auto-update system."""
|
|
auto_check: bool = True # Check for updates automatically
|
|
check_frequency_hours: int = 24 # How often to check (hours)
|
|
auto_install: bool = False # Auto-install without asking (not recommended)
|
|
backup_before_update: bool = True # Create backup before updating
|
|
notify_beta_releases: bool = False # Include beta/pre-releases
|
|
|
|
|
|
@dataclass
|
|
class RAGConfig:
|
|
"""Main RAG system configuration."""
|
|
chunking: ChunkingConfig = None
|
|
streaming: StreamingConfig = None
|
|
files: FilesConfig = None
|
|
embedding: EmbeddingConfig = None
|
|
search: SearchConfig = None
|
|
llm: LLMConfig = None
|
|
updates: UpdateConfig = None
|
|
|
|
def __post_init__(self):
|
|
if self.chunking is None:
|
|
self.chunking = ChunkingConfig()
|
|
if self.streaming is None:
|
|
self.streaming = StreamingConfig()
|
|
if self.files is None:
|
|
self.files = FilesConfig()
|
|
if self.embedding is None:
|
|
self.embedding = EmbeddingConfig()
|
|
if self.search is None:
|
|
self.search = SearchConfig()
|
|
if self.llm is None:
|
|
self.llm = LLMConfig()
|
|
if self.updates is None:
|
|
self.updates = UpdateConfig()
|
|
|
|
|
|
class ConfigManager:
|
|
"""Manages configuration loading, saving, and validation."""
|
|
|
|
def __init__(self, project_path: Path):
|
|
self.project_path = Path(project_path)
|
|
self.rag_dir = self.project_path / '.mini-rag'
|
|
self.config_path = self.rag_dir / 'config.yaml'
|
|
|
|
def load_config(self) -> RAGConfig:
|
|
"""Load configuration from YAML file or create default."""
|
|
if not self.config_path.exists():
|
|
logger.info(f"No config found at {self.config_path}, creating default")
|
|
config = RAGConfig()
|
|
self.save_config(config)
|
|
return config
|
|
|
|
try:
|
|
with open(self.config_path, 'r') as f:
|
|
data = yaml.safe_load(f)
|
|
|
|
if not data:
|
|
logger.warning("Empty config file, using defaults")
|
|
return RAGConfig()
|
|
|
|
# Convert nested dicts back to dataclass instances
|
|
config = RAGConfig()
|
|
|
|
if 'chunking' in data:
|
|
config.chunking = ChunkingConfig(**data['chunking'])
|
|
if 'streaming' in data:
|
|
config.streaming = StreamingConfig(**data['streaming'])
|
|
if 'files' in data:
|
|
config.files = FilesConfig(**data['files'])
|
|
if 'embedding' in data:
|
|
config.embedding = EmbeddingConfig(**data['embedding'])
|
|
if 'search' in data:
|
|
config.search = SearchConfig(**data['search'])
|
|
if 'llm' in data:
|
|
config.llm = LLMConfig(**data['llm'])
|
|
|
|
return config
|
|
|
|
except yaml.YAMLError as e:
|
|
# YAML syntax error - help user fix it instead of silent fallback
|
|
error_msg = f"⚠️ Config file has YAML syntax error at line {getattr(e, 'problem_mark', 'unknown')}: {e}"
|
|
logger.error(error_msg)
|
|
print(f"\n{error_msg}")
|
|
print(f"Config file: {self.config_path}")
|
|
print("💡 Check YAML syntax (indentation, quotes, colons)")
|
|
print("💡 Or delete config file to reset to defaults")
|
|
return RAGConfig() # Still return defaults but warn user
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to load config from {self.config_path}: {e}")
|
|
logger.info("Using default configuration")
|
|
return RAGConfig()
|
|
|
|
def save_config(self, config: RAGConfig):
|
|
"""Save configuration to YAML file with comments."""
|
|
try:
|
|
self.rag_dir.mkdir(exist_ok=True)
|
|
|
|
# Convert to dict for YAML serialization
|
|
config_dict = asdict(config)
|
|
|
|
# Create YAML content with comments
|
|
yaml_content = self._create_yaml_with_comments(config_dict)
|
|
|
|
# Write with basic file locking to prevent corruption
|
|
with open(self.config_path, 'w') as f:
|
|
try:
|
|
import fcntl
|
|
fcntl.flock(f.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB) # Non-blocking exclusive lock
|
|
f.write(yaml_content)
|
|
fcntl.flock(f.fileno(), fcntl.LOCK_UN) # Unlock
|
|
except (OSError, ImportError):
|
|
# Fallback for Windows or if fcntl unavailable
|
|
f.write(yaml_content)
|
|
|
|
logger.info(f"Configuration saved to {self.config_path}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to save config to {self.config_path}: {e}")
|
|
|
|
def _create_yaml_with_comments(self, config_dict: Dict[str, Any]) -> str:
|
|
"""Create YAML content with helpful comments."""
|
|
yaml_lines = [
|
|
"# FSS-Mini-RAG Configuration",
|
|
"# Edit this file to customize indexing and search behavior",
|
|
"# See docs/GETTING_STARTED.md for detailed explanations",
|
|
"",
|
|
"# Text chunking settings",
|
|
"chunking:",
|
|
f" max_size: {config_dict['chunking']['max_size']} # Maximum characters per chunk",
|
|
f" min_size: {config_dict['chunking']['min_size']} # Minimum characters per chunk",
|
|
f" strategy: {config_dict['chunking']['strategy']} # 'semantic' (language-aware) or 'fixed'",
|
|
"",
|
|
"# Large file streaming settings",
|
|
"streaming:",
|
|
f" enabled: {str(config_dict['streaming']['enabled']).lower()}",
|
|
f" threshold_bytes: {config_dict['streaming']['threshold_bytes']} # Files larger than this use streaming (1MB)",
|
|
"",
|
|
"# File processing settings",
|
|
"files:",
|
|
f" min_file_size: {config_dict['files']['min_file_size']} # Skip files smaller than this",
|
|
" exclude_patterns:",
|
|
]
|
|
|
|
for pattern in config_dict['files']['exclude_patterns']:
|
|
yaml_lines.append(f" - \"{pattern}\"")
|
|
|
|
yaml_lines.extend([
|
|
" include_patterns:",
|
|
" - \"**/*\" # Include all files by default",
|
|
"",
|
|
"# Embedding generation settings",
|
|
"embedding:",
|
|
f" preferred_method: {config_dict['embedding']['preferred_method']} # 'ollama', 'ml', 'hash', or 'auto'",
|
|
f" ollama_model: {config_dict['embedding']['ollama_model']}",
|
|
f" ollama_host: {config_dict['embedding']['ollama_host']}",
|
|
f" ml_model: {config_dict['embedding']['ml_model']}",
|
|
f" batch_size: {config_dict['embedding']['batch_size']} # Embeddings processed per batch",
|
|
"",
|
|
"# Search behavior settings",
|
|
"search:",
|
|
f" default_top_k: {config_dict['search']['default_top_k']} # Default number of top results",
|
|
f" enable_bm25: {str(config_dict['search']['enable_bm25']).lower()} # Enable keyword matching boost",
|
|
f" similarity_threshold: {config_dict['search']['similarity_threshold']} # Minimum similarity score",
|
|
f" expand_queries: {str(config_dict['search']['expand_queries']).lower()} # Enable automatic query expansion",
|
|
"",
|
|
"# LLM synthesis and query expansion settings",
|
|
"llm:",
|
|
f" ollama_host: {config_dict['llm']['ollama_host']}",
|
|
f" synthesis_model: {config_dict['llm']['synthesis_model']} # 'auto', 'qwen3:1.7b', etc.",
|
|
f" expansion_model: {config_dict['llm']['expansion_model']} # Usually same as synthesis_model",
|
|
f" max_expansion_terms: {config_dict['llm']['max_expansion_terms']} # Maximum terms to add to queries",
|
|
f" enable_synthesis: {str(config_dict['llm']['enable_synthesis']).lower()} # Enable synthesis by default",
|
|
f" synthesis_temperature: {config_dict['llm']['synthesis_temperature']} # LLM temperature for analysis",
|
|
"",
|
|
" # Context window configuration (critical for RAG performance)",
|
|
" # 💡 Sizing guide: 2K=1 question, 4K=1-2 questions, 8K=manageable, 16K=most users",
|
|
" # 32K=large codebases, 64K+=power users only",
|
|
" # ⚠️ Larger contexts use exponentially more CPU/memory - only increase if needed",
|
|
" # 🔧 Low context limits? Try smaller topk, better search terms, or archive noise",
|
|
f" context_window: {config_dict['llm']['context_window']} # Context size in tokens",
|
|
f" auto_context: {str(config_dict['llm']['auto_context']).lower()} # Auto-adjust context based on model capabilities",
|
|
"",
|
|
" model_rankings: # Preferred model order (edit to change priority)",
|
|
])
|
|
|
|
# Add model rankings list
|
|
if 'model_rankings' in config_dict['llm'] and config_dict['llm']['model_rankings']:
|
|
for model in config_dict['llm']['model_rankings'][:10]: # Show first 10
|
|
yaml_lines.append(f" - \"{model}\"")
|
|
if len(config_dict['llm']['model_rankings']) > 10:
|
|
yaml_lines.append(" # ... (edit config to see all options)")
|
|
|
|
# Add update settings
|
|
yaml_lines.extend([
|
|
"",
|
|
"# Auto-update system settings",
|
|
"updates:",
|
|
f" auto_check: {str(config_dict['updates']['auto_check']).lower()} # Check for updates automatically",
|
|
f" check_frequency_hours: {config_dict['updates']['check_frequency_hours']} # Hours between update checks",
|
|
f" auto_install: {str(config_dict['updates']['auto_install']).lower()} # Auto-install updates (not recommended)",
|
|
f" backup_before_update: {str(config_dict['updates']['backup_before_update']).lower()} # Create backup before updating",
|
|
f" notify_beta_releases: {str(config_dict['updates']['notify_beta_releases']).lower()} # Include beta releases in checks",
|
|
])
|
|
|
|
return '\n'.join(yaml_lines)
|
|
|
|
def update_config(self, **kwargs) -> RAGConfig:
|
|
"""Update specific configuration values."""
|
|
config = self.load_config()
|
|
|
|
for key, value in kwargs.items():
|
|
if hasattr(config, key):
|
|
setattr(config, key, value)
|
|
else:
|
|
logger.warning(f"Unknown config key: {key}")
|
|
|
|
self.save_config(config)
|
|
return config |