This comprehensive update enhances user experience with several key improvements: ## Enhanced Streaming & Thinking Display - Implement real-time streaming with gray thinking tokens that collapse after completion - Fix thinking token redisplay bug with proper content filtering - Add clear "AI Response:" headers to separate thinking from responses - Enable streaming by default for better user engagement - Keep thinking visible for exploration, collapse only for suggested questions ## Natural Conversation Responses - Convert clunky JSON exploration responses to natural, conversational format - Improve exploration prompts for friendly, colleague-style interactions - Update summary generation with better context handling - Eliminate double response display issues ## Model Reference Updates - Remove all llama3.2 references in favor of qwen3 models - Fix non-existent qwen3:3b references, replace with proper model names - Update model rankings to prioritize working qwen models across all components - Ensure consistent model recommendations in docs and examples ## Cross-Platform Icon Integration - Add desktop icon setup to Linux installer with .desktop entry - Add Windows shortcuts for desktop and Start Menu integration - Improve installer user experience with visual branding ## Configuration & Navigation Fixes - Fix "0" option in configuration menu to properly go back - Improve configuration menu user-friendliness - Update troubleshooting guides with correct model suggestions These changes significantly improve the beginner experience while maintaining technical accuracy and system reliability.
281 lines
11 KiB
Python
281 lines
11 KiB
Python
"""
|
|
Configuration management for FSS-Mini-RAG.
|
|
Handles loading, saving, and validation of YAML config files.
|
|
"""
|
|
|
|
import yaml
|
|
import logging
|
|
from pathlib import Path
|
|
from typing import Dict, Any, Optional
|
|
from dataclasses import dataclass, asdict
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class ChunkingConfig:
|
|
"""Configuration for text chunking."""
|
|
max_size: int = 2000
|
|
min_size: int = 150
|
|
strategy: str = "semantic" # "semantic" or "fixed"
|
|
|
|
|
|
@dataclass
|
|
class StreamingConfig:
|
|
"""Configuration for large file streaming."""
|
|
enabled: bool = True
|
|
threshold_bytes: int = 1048576 # 1MB
|
|
|
|
|
|
@dataclass
|
|
class FilesConfig:
|
|
"""Configuration for file processing."""
|
|
min_file_size: int = 50
|
|
exclude_patterns: list = None
|
|
include_patterns: list = None
|
|
|
|
def __post_init__(self):
|
|
if self.exclude_patterns is None:
|
|
self.exclude_patterns = [
|
|
"node_modules/**",
|
|
".git/**",
|
|
"__pycache__/**",
|
|
"*.pyc",
|
|
".venv/**",
|
|
"venv/**",
|
|
"build/**",
|
|
"dist/**"
|
|
]
|
|
if self.include_patterns is None:
|
|
self.include_patterns = ["**/*"] # Include everything by default
|
|
|
|
|
|
@dataclass
|
|
class EmbeddingConfig:
|
|
"""Configuration for embedding generation."""
|
|
preferred_method: str = "ollama" # "ollama", "ml", "hash", "auto"
|
|
ollama_model: str = "nomic-embed-text"
|
|
ollama_host: str = "localhost:11434"
|
|
ml_model: str = "sentence-transformers/all-MiniLM-L6-v2"
|
|
batch_size: int = 32
|
|
|
|
|
|
@dataclass
|
|
class SearchConfig:
|
|
"""Configuration for search behavior."""
|
|
default_top_k: int = 10
|
|
enable_bm25: bool = True
|
|
similarity_threshold: float = 0.1
|
|
expand_queries: bool = False # Enable automatic query expansion
|
|
|
|
|
|
@dataclass
|
|
class LLMConfig:
|
|
"""Configuration for LLM synthesis and query expansion."""
|
|
# Core settings
|
|
synthesis_model: str = "auto" # "auto", "qwen3:1.7b", "qwen2.5:1.5b", etc.
|
|
expansion_model: str = "auto" # Usually same as synthesis_model
|
|
max_expansion_terms: int = 8 # Maximum additional terms to add
|
|
enable_synthesis: bool = False # Enable by default when --synthesize used
|
|
synthesis_temperature: float = 0.3
|
|
enable_thinking: bool = True # Enable thinking mode for Qwen3 models
|
|
cpu_optimized: bool = True # Prefer lightweight models
|
|
|
|
# Model preference rankings (configurable)
|
|
model_rankings: list = None # Will be set in __post_init__
|
|
|
|
# Provider-specific settings (for different LLM providers)
|
|
provider: str = "ollama" # "ollama", "openai", "anthropic"
|
|
ollama_host: str = "localhost:11434" # Ollama connection
|
|
api_key: Optional[str] = None # API key for cloud providers
|
|
api_base: Optional[str] = None # Base URL for API (e.g., OpenRouter)
|
|
timeout: int = 20 # Request timeout in seconds
|
|
|
|
def __post_init__(self):
|
|
if self.model_rankings is None:
|
|
# Default model preference rankings (can be overridden in config file)
|
|
self.model_rankings = [
|
|
# Testing model (prioritized for current testing phase)
|
|
"qwen3:1.7b",
|
|
|
|
# Ultra-efficient models (perfect for CPU-only systems)
|
|
"qwen3:0.6b",
|
|
|
|
# Recommended model (excellent quality but larger)
|
|
"qwen3:4b",
|
|
|
|
# Common fallbacks (prioritize Qwen models)
|
|
"qwen2.5:1.5b",
|
|
"qwen2.5:3b",
|
|
]
|
|
|
|
|
|
@dataclass
|
|
class RAGConfig:
|
|
"""Main RAG system configuration."""
|
|
chunking: ChunkingConfig = None
|
|
streaming: StreamingConfig = None
|
|
files: FilesConfig = None
|
|
embedding: EmbeddingConfig = None
|
|
search: SearchConfig = None
|
|
llm: LLMConfig = None
|
|
|
|
def __post_init__(self):
|
|
if self.chunking is None:
|
|
self.chunking = ChunkingConfig()
|
|
if self.streaming is None:
|
|
self.streaming = StreamingConfig()
|
|
if self.files is None:
|
|
self.files = FilesConfig()
|
|
if self.embedding is None:
|
|
self.embedding = EmbeddingConfig()
|
|
if self.search is None:
|
|
self.search = SearchConfig()
|
|
if self.llm is None:
|
|
self.llm = LLMConfig()
|
|
|
|
|
|
class ConfigManager:
|
|
"""Manages configuration loading, saving, and validation."""
|
|
|
|
def __init__(self, project_path: Path):
|
|
self.project_path = Path(project_path)
|
|
self.rag_dir = self.project_path / '.mini-rag'
|
|
self.config_path = self.rag_dir / 'config.yaml'
|
|
|
|
def load_config(self) -> RAGConfig:
|
|
"""Load configuration from YAML file or create default."""
|
|
if not self.config_path.exists():
|
|
logger.info(f"No config found at {self.config_path}, creating default")
|
|
config = RAGConfig()
|
|
self.save_config(config)
|
|
return config
|
|
|
|
try:
|
|
with open(self.config_path, 'r') as f:
|
|
data = yaml.safe_load(f)
|
|
|
|
if not data:
|
|
logger.warning("Empty config file, using defaults")
|
|
return RAGConfig()
|
|
|
|
# Convert nested dicts back to dataclass instances
|
|
config = RAGConfig()
|
|
|
|
if 'chunking' in data:
|
|
config.chunking = ChunkingConfig(**data['chunking'])
|
|
if 'streaming' in data:
|
|
config.streaming = StreamingConfig(**data['streaming'])
|
|
if 'files' in data:
|
|
config.files = FilesConfig(**data['files'])
|
|
if 'embedding' in data:
|
|
config.embedding = EmbeddingConfig(**data['embedding'])
|
|
if 'search' in data:
|
|
config.search = SearchConfig(**data['search'])
|
|
if 'llm' in data:
|
|
config.llm = LLMConfig(**data['llm'])
|
|
|
|
return config
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to load config from {self.config_path}: {e}")
|
|
logger.info("Using default configuration")
|
|
return RAGConfig()
|
|
|
|
def save_config(self, config: RAGConfig):
|
|
"""Save configuration to YAML file with comments."""
|
|
try:
|
|
self.rag_dir.mkdir(exist_ok=True)
|
|
|
|
# Convert to dict for YAML serialization
|
|
config_dict = asdict(config)
|
|
|
|
# Create YAML content with comments
|
|
yaml_content = self._create_yaml_with_comments(config_dict)
|
|
|
|
with open(self.config_path, 'w') as f:
|
|
f.write(yaml_content)
|
|
|
|
logger.info(f"Configuration saved to {self.config_path}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to save config to {self.config_path}: {e}")
|
|
|
|
def _create_yaml_with_comments(self, config_dict: Dict[str, Any]) -> str:
|
|
"""Create YAML content with helpful comments."""
|
|
yaml_lines = [
|
|
"# FSS-Mini-RAG Configuration",
|
|
"# Edit this file to customize indexing and search behavior",
|
|
"# See docs/GETTING_STARTED.md for detailed explanations",
|
|
"",
|
|
"# Text chunking settings",
|
|
"chunking:",
|
|
f" max_size: {config_dict['chunking']['max_size']} # Maximum characters per chunk",
|
|
f" min_size: {config_dict['chunking']['min_size']} # Minimum characters per chunk",
|
|
f" strategy: {config_dict['chunking']['strategy']} # 'semantic' (language-aware) or 'fixed'",
|
|
"",
|
|
"# Large file streaming settings",
|
|
"streaming:",
|
|
f" enabled: {str(config_dict['streaming']['enabled']).lower()}",
|
|
f" threshold_bytes: {config_dict['streaming']['threshold_bytes']} # Files larger than this use streaming (1MB)",
|
|
"",
|
|
"# File processing settings",
|
|
"files:",
|
|
f" min_file_size: {config_dict['files']['min_file_size']} # Skip files smaller than this",
|
|
" exclude_patterns:",
|
|
]
|
|
|
|
for pattern in config_dict['files']['exclude_patterns']:
|
|
yaml_lines.append(f" - \"{pattern}\"")
|
|
|
|
yaml_lines.extend([
|
|
" include_patterns:",
|
|
" - \"**/*\" # Include all files by default",
|
|
"",
|
|
"# Embedding generation settings",
|
|
"embedding:",
|
|
f" preferred_method: {config_dict['embedding']['preferred_method']} # 'ollama', 'ml', 'hash', or 'auto'",
|
|
f" ollama_model: {config_dict['embedding']['ollama_model']}",
|
|
f" ollama_host: {config_dict['embedding']['ollama_host']}",
|
|
f" ml_model: {config_dict['embedding']['ml_model']}",
|
|
f" batch_size: {config_dict['embedding']['batch_size']} # Embeddings processed per batch",
|
|
"",
|
|
"# Search behavior settings",
|
|
"search:",
|
|
f" default_top_k: {config_dict['search']['default_top_k']} # Default number of top results",
|
|
f" enable_bm25: {str(config_dict['search']['enable_bm25']).lower()} # Enable keyword matching boost",
|
|
f" similarity_threshold: {config_dict['search']['similarity_threshold']} # Minimum similarity score",
|
|
f" expand_queries: {str(config_dict['search']['expand_queries']).lower()} # Enable automatic query expansion",
|
|
"",
|
|
"# LLM synthesis and query expansion settings",
|
|
"llm:",
|
|
f" ollama_host: {config_dict['llm']['ollama_host']}",
|
|
f" synthesis_model: {config_dict['llm']['synthesis_model']} # 'auto', 'qwen3:1.7b', etc.",
|
|
f" expansion_model: {config_dict['llm']['expansion_model']} # Usually same as synthesis_model",
|
|
f" max_expansion_terms: {config_dict['llm']['max_expansion_terms']} # Maximum terms to add to queries",
|
|
f" enable_synthesis: {str(config_dict['llm']['enable_synthesis']).lower()} # Enable synthesis by default",
|
|
f" synthesis_temperature: {config_dict['llm']['synthesis_temperature']} # LLM temperature for analysis",
|
|
" model_rankings: # Preferred model order (edit to change priority)",
|
|
])
|
|
|
|
# Add model rankings list
|
|
if 'model_rankings' in config_dict['llm'] and config_dict['llm']['model_rankings']:
|
|
for model in config_dict['llm']['model_rankings'][:10]: # Show first 10
|
|
yaml_lines.append(f" - \"{model}\"")
|
|
if len(config_dict['llm']['model_rankings']) > 10:
|
|
yaml_lines.append(" # ... (edit config to see all options)")
|
|
|
|
return '\n'.join(yaml_lines)
|
|
|
|
def update_config(self, **kwargs) -> RAGConfig:
|
|
"""Update specific configuration values."""
|
|
config = self.load_config()
|
|
|
|
for key, value in kwargs.items():
|
|
if hasattr(config, key):
|
|
setattr(config, key, value)
|
|
else:
|
|
logger.warning(f"Unknown config key: {key}")
|
|
|
|
self.save_config(config)
|
|
return config |