fss-mini-rag-github/mini_rag/config.py

"""
Configuration management for FSS-Mini-RAG.
Handles loading, saving, and validation of YAML config files.
"""

import logging
import re
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Any, Dict, List, Optional

import yaml
import requests

logger = logging.getLogger(__name__)


@dataclass
class ChunkingConfig:
    """Configuration for text chunking."""

    max_size: int = 2000
    min_size: int = 150
    strategy: str = "semantic"  # "semantic" or "fixed"


@dataclass
class StreamingConfig:
    """Configuration for large file streaming."""

    enabled: bool = True
    threshold_bytes: int = 1048576  # 1MB


@dataclass
class FilesConfig:
    """Configuration for file processing."""

    min_file_size: int = 50
    exclude_patterns: list = None
    include_patterns: list = None

    def __post_init__(self):
        if self.exclude_patterns is None:
            self.exclude_patterns = [
                "node_modules/**",
                ".git/**",
                "__pycache__/**",
                "*.pyc",
                ".venv/**",
                "venv/**",
                "build/**",
                "dist/**",
            ]
        if self.include_patterns is None:
            self.include_patterns = ["**/*"]  # Include everything by default


@dataclass
class EmbeddingConfig:
    """Configuration for embedding generation."""

    preferred_method: str = "ollama"  # "ollama", "ml", "hash", "auto"
    ollama_model: str = "nomic-embed-text"
    ollama_host: str = "localhost:11434"
    ml_model: str = "sentence-transformers/all-MiniLM-L6-v2"
    batch_size: int = 32


@dataclass
class SearchConfig:
    """Configuration for search behavior."""

    default_top_k: int = 10
    enable_bm25: bool = True
    similarity_threshold: float = 0.1
    expand_queries: bool = False  # Enable automatic query expansion


@dataclass
class LLMConfig:
    """Configuration for LLM synthesis and query expansion."""

    # Core settings
    synthesis_model: str = "auto"  # "auto", "qwen3:1.7b", "qwen2.5:1.5b", etc.
    expansion_model: str = "auto"  # Usually same as synthesis_model
    max_expansion_terms: int = 8  # Maximum additional terms to add
    enable_synthesis: bool = False  # Enable by default when --synthesize used
    synthesis_temperature: float = 0.3
    enable_thinking: bool = True  # Enable thinking mode for Qwen3 models
    cpu_optimized: bool = True  # Prefer lightweight models

    # Context window configuration (critical for RAG performance)
    context_window: int = 16384  # Context window size in tokens (16K recommended)
    auto_context: bool = True  # Auto-adjust context based on model capabilities

    # Model preference rankings (configurable)
    model_rankings: list = None  # Will be set in __post_init__

    # Provider-specific settings (for different LLM providers)
    provider: str = "ollama"  # "ollama", "openai", "anthropic"
    ollama_host: str = "localhost:11434"  # Ollama connection
    api_key: Optional[str] = None  # API key for cloud providers
    api_base: Optional[str] = None  # Base URL for API (e.g., OpenRouter)
    timeout: int = 20  # Request timeout in seconds

    def __post_init__(self):
        if self.model_rankings is None:
            # Default model preference rankings (can be overridden in config file)
            self.model_rankings = [
                # Testing model (prioritized for current testing phase)
                "qwen3:1.7b",
                # Ultra-efficient models (perfect for CPU-only systems)
                "qwen3:0.6b",
                # Recommended model (excellent quality but larger)
                "qwen3:4b",
                # Common fallbacks (prioritize Qwen models)
                "qwen2.5:1.5b",
                "qwen2.5:3b",
            ]


@dataclass
class UpdateConfig:
    """Configuration for auto-update system."""

    auto_check: bool = True  # Check for updates automatically
    check_frequency_hours: int = 24  # How often to check (hours)
    auto_install: bool = False  # Auto-install without asking (not recommended)
    backup_before_update: bool = True  # Create backup before updating
    notify_beta_releases: bool = False  # Include beta/pre-releases


@dataclass
class RAGConfig:
    """Main RAG system configuration."""

    chunking: ChunkingConfig = None
    streaming: StreamingConfig = None
    files: FilesConfig = None
    embedding: EmbeddingConfig = None
    search: SearchConfig = None
    llm: LLMConfig = None
    updates: UpdateConfig = None

    def __post_init__(self):
        if self.chunking is None:
            self.chunking = ChunkingConfig()
        if self.streaming is None:
            self.streaming = StreamingConfig()
        if self.files is None:
            self.files = FilesConfig()
        if self.embedding is None:
            self.embedding = EmbeddingConfig()
        if self.search is None:
            self.search = SearchConfig()
        if self.llm is None:
            self.llm = LLMConfig()
        if self.updates is None:
            self.updates = UpdateConfig()


class ConfigManager:
    """Manages configuration loading, saving, and validation."""

    def __init__(self, project_path: Path):
        self.project_path = Path(project_path)
        self.rag_dir = self.project_path / ".mini-rag"
        self.config_path = self.rag_dir / "config.yaml"

    def get_available_ollama_models(self, ollama_host: str = "localhost:11434") -> List[str]:
        """Get list of available Ollama models for validation with secure connection handling."""
        import time

        # Retry logic with exponential backoff
        max_retries = 3
        for attempt in range(max_retries):
            try:
                # Use explicit timeout and SSL verification for security
                response = requests.get(
                    f"http://{ollama_host}/api/tags",
                    timeout=(5, 10),  # (connect_timeout, read_timeout)
                    verify=True,  # Explicit SSL verification
                    allow_redirects=False  # Prevent redirect attacks
                )
                if response.status_code == 200:
                    data = response.json()
                    models = [model["name"] for model in data.get("models", [])]
                    logger.debug(f"Successfully fetched {len(models)} Ollama models")
                    return models
                else:
                    logger.debug(f"Ollama API returned status {response.status_code}")

            except requests.exceptions.SSLError as e:
                logger.debug(f"SSL verification failed for Ollama connection: {e}")
                # For local Ollama, SSL might not be configured - this is expected
                if "localhost" in ollama_host or "127.0.0.1" in ollama_host:
                    logger.debug("Retrying with local connection (SSL not required for localhost)")
                    # Local connections don't need SSL verification
                    try:
                        response = requests.get(f"http://{ollama_host}/api/tags", timeout=(5, 10))
                        if response.status_code == 200:
                            data = response.json()
                            return [model["name"] for model in data.get("models", [])]
                    except Exception as local_e:
                        logger.debug(f"Local Ollama connection also failed: {local_e}")
                break  # Don't retry SSL errors for remote hosts

            except requests.exceptions.Timeout as e:
                logger.debug(f"Ollama connection timeout (attempt {attempt + 1}/{max_retries}): {e}")
                if attempt < max_retries - 1:
                    sleep_time = (2 ** attempt)  # Exponential backoff
                    time.sleep(sleep_time)
                    continue

            except requests.exceptions.ConnectionError as e:
                logger.debug(f"Ollama connection error (attempt {attempt + 1}/{max_retries}): {e}")
                if attempt < max_retries - 1:
                    time.sleep(1)
                    continue

            except Exception as e:
                logger.debug(f"Unexpected error fetching Ollama models: {e}")
                break

        return []

    def _sanitize_model_name(self, model_name: str) -> str:
        """Sanitize model name to prevent injection attacks."""
        if not model_name:
            return ""

        # Allow only alphanumeric, dots, colons, hyphens, underscores
        # This covers legitimate model names like qwen3:1.7b-q8_0
        sanitized = re.sub(r'[^a-zA-Z0-9\.\:\-\_]', '', model_name)

        # Limit length to prevent DoS
        if len(sanitized) > 128:
            logger.warning(f"Model name too long, truncating: {sanitized[:20]}...")
            sanitized = sanitized[:128]

        return sanitized

    def resolve_model_name(self, configured_model: str, available_models: List[str]) -> Optional[str]:
        """Resolve configured model name to actual available model with input sanitization."""
        if not available_models or not configured_model:
            return None

        # Sanitize input to prevent injection
        configured_model = self._sanitize_model_name(configured_model)
        if not configured_model:
            logger.warning("Model name was empty after sanitization")
            return None

        # Handle special 'auto' directive
        if configured_model.lower() == 'auto':
            return available_models[0] if available_models else None

        # Direct exact match first (case-insensitive)
        for available_model in available_models:
            if configured_model.lower() == available_model.lower():
                return available_model

        # Fuzzy matching for common patterns
        model_patterns = self._get_model_patterns(configured_model)

        for pattern in model_patterns:
            for available_model in available_models:
                if pattern.lower() in available_model.lower():
                    # Additional validation: ensure it's not a partial match of something else
                    if self._validate_model_match(pattern, available_model):
                        return available_model

        return None  # Model not available

    def _get_model_patterns(self, configured_model: str) -> List[str]:
        """Generate fuzzy match patterns for common model naming conventions."""
        patterns = [configured_model]  # Start with exact name

        # Common quantization patterns for different models
        quantization_patterns = {
            'qwen3:1.7b': ['qwen3:1.7b-q8_0', 'qwen3:1.7b-q4_0', 'qwen3:1.7b-q6_k'],
            'qwen3:0.6b': ['qwen3:0.6b-q8_0', 'qwen3:0.6b-q4_0', 'qwen3:0.6b-q6_k'],
            'qwen3:4b': ['qwen3:4b-q8_0', 'qwen3:4b-q4_0', 'qwen3:4b-q6_k'],
            'qwen3:8b': ['qwen3:8b-q8_0', 'qwen3:8b-q4_0', 'qwen3:8b-q6_k'],
            'qwen2.5:1.5b': ['qwen2.5:1.5b-q8_0', 'qwen2.5:1.5b-q4_0'],
            'qwen2.5:3b': ['qwen2.5:3b-q8_0', 'qwen2.5:3b-q4_0'],
            'qwen2.5-coder:1.5b': ['qwen2.5-coder:1.5b-q8_0', 'qwen2.5-coder:1.5b-q4_0'],
            'qwen2.5-coder:3b': ['qwen2.5-coder:3b-q8_0', 'qwen2.5-coder:3b-q4_0'],
            'qwen2.5-coder:7b': ['qwen2.5-coder:7b-q8_0', 'qwen2.5-coder:7b-q4_0'],
        }

        # Add specific patterns for the configured model
        if configured_model.lower() in quantization_patterns:
            patterns.extend(quantization_patterns[configured_model.lower()])

        # Generic pattern generation for unknown models
        if ':' in configured_model:
            base_name, version = configured_model.split(':', 1)

            # Add common quantization suffixes
            common_suffixes = ['-q8_0', '-q4_0', '-q6_k', '-q4_k_m', '-instruct', '-base']
            for suffix in common_suffixes:
                patterns.append(f"{base_name}:{version}{suffix}")

            # Also try with instruct variants
            if 'instruct' not in version.lower():
                patterns.append(f"{base_name}:{version}-instruct")
                patterns.append(f"{base_name}:{version}-instruct-q8_0")
                patterns.append(f"{base_name}:{version}-instruct-q4_0")

        return patterns

    def _validate_model_match(self, pattern: str, available_model: str) -> bool:
        """Validate that a fuzzy match is actually correct and not a false positive."""
        # Convert to lowercase for comparison
        pattern_lower = pattern.lower()
        available_lower = available_model.lower()

        # Ensure the base model name matches
        if ':' in pattern_lower and ':' in available_lower:
            pattern_base = pattern_lower.split(':')[0]
            available_base = available_lower.split(':')[0]

            # Base names must match exactly
            if pattern_base != available_base:
                return False

            # Version part should be contained or closely related
            pattern_version = pattern_lower.split(':', 1)[1]
            available_version = available_lower.split(':', 1)[1]

            # The pattern version should be a prefix of the available version
            # e.g., "1.7b" should match "1.7b-q8_0" but not "11.7b"
            if not available_version.startswith(pattern_version.split('-')[0]):
                return False

        return True

    def validate_and_resolve_models(self, config: RAGConfig) -> RAGConfig:
        """Validate and resolve model names in configuration."""
        try:
            available_models = self.get_available_ollama_models(config.llm.ollama_host)

            if not available_models:
                logger.debug("No Ollama models available for validation")
                return config

            # Resolve synthesis model
            if config.llm.synthesis_model != "auto":
                resolved = self.resolve_model_name(config.llm.synthesis_model, available_models)
                if resolved and resolved != config.llm.synthesis_model:
                    logger.info(f"Resolved synthesis model: {config.llm.synthesis_model} -> {resolved}")
                    config.llm.synthesis_model = resolved
                elif not resolved:
                    logger.warning(f"Synthesis model '{config.llm.synthesis_model}' not found, keeping original")

            # Resolve expansion model (if different from synthesis)
            if (config.llm.expansion_model != "auto" and
                config.llm.expansion_model != config.llm.synthesis_model):
                resolved = self.resolve_model_name(config.llm.expansion_model, available_models)
                if resolved and resolved != config.llm.expansion_model:
                    logger.info(f"Resolved expansion model: {config.llm.expansion_model} -> {resolved}")
                    config.llm.expansion_model = resolved
                elif not resolved:
                    logger.warning(f"Expansion model '{config.llm.expansion_model}' not found, keeping original")

            # Update model rankings with resolved names
            if config.llm.model_rankings:
                updated_rankings = []
                for model in config.llm.model_rankings:
                    resolved = self.resolve_model_name(model, available_models)
                    if resolved:
                        updated_rankings.append(resolved)
                        if resolved != model:
                            logger.debug(f"Updated model ranking: {model} -> {resolved}")
                    else:
                        updated_rankings.append(model)  # Keep original if not resolved
                config.llm.model_rankings = updated_rankings

        except Exception as e:
            logger.debug(f"Model validation failed: {e}")

        return config

    def load_config(self) -> RAGConfig:
        """Load configuration from YAML file or create default."""
        if not self.config_path.exists():
            logger.info(f"No config found at {self.config_path}, creating default")
            config = RAGConfig()
            self.save_config(config)
            return config

        try:
            with open(self.config_path, "r") as f:
                data = yaml.safe_load(f)

            if not data:
                logger.warning("Empty config file, using defaults")
                return RAGConfig()

            # Convert nested dicts back to dataclass instances
            config = RAGConfig()

            if "chunking" in data:
                config.chunking = ChunkingConfig(**data["chunking"])
            if "streaming" in data:
                config.streaming = StreamingConfig(**data["streaming"])
            if "files" in data:
                config.files = FilesConfig(**data["files"])
            if "embedding" in data:
                config.embedding = EmbeddingConfig(**data["embedding"])
            if "search" in data:
                config.search = SearchConfig(**data["search"])
            if "llm" in data:
                config.llm = LLMConfig(**data["llm"])

            # Validate and resolve model names if Ollama is available
            config = self.validate_and_resolve_models(config)

            return config

        except yaml.YAMLError as e:
            # YAML syntax error - help user fix it instead of silent fallback
            error_msg = (
                f"⚠️ Config file has YAML syntax error at line "
                f"{getattr(e, 'problem_mark', 'unknown')}: {e}"
            )
            logger.error(error_msg)
            print(f"\n{error_msg}")
            print(f"Config file: {self.config_path}")
            print("💡 Check YAML syntax (indentation, quotes, colons)")
            print("💡 Or delete config file to reset to defaults")
            return RAGConfig()  # Still return defaults but warn user

        except Exception as e:
            logger.error(f"Failed to load config from {self.config_path}: {e}")
            logger.info("Using default configuration")
            return RAGConfig()

    def save_config(self, config: RAGConfig):
        """Save configuration to YAML file with comments."""
        try:
            self.rag_dir.mkdir(exist_ok=True)

            # Convert to dict for YAML serialization
            config_dict = asdict(config)

            # Create YAML content with comments
            yaml_content = self._create_yaml_with_comments(config_dict)

            # Write with basic file locking to prevent corruption
            with open(self.config_path, "w") as f:
                try:
                    import fcntl

                    fcntl.flock(
                        f.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB
                    )  # Non-blocking exclusive lock
                    f.write(yaml_content)
                    fcntl.flock(f.fileno(), fcntl.LOCK_UN)  # Unlock
                except (OSError, ImportError):
                    # Fallback for Windows or if fcntl unavailable
                    f.write(yaml_content)

            logger.info(f"Configuration saved to {self.config_path}")

        except Exception as e:
            logger.error(f"Failed to save config to {self.config_path}: {e}")

    def _create_yaml_with_comments(self, config_dict: Dict[str, Any]) -> str:
        """Create YAML content with helpful comments."""
        yaml_lines = [
            "# FSS-Mini-RAG Configuration",
            "# Edit this file to customize indexing and search behavior",
            "# See docs/GETTING_STARTED.md for detailed explanations",
            "",
            "# Text chunking settings",
            "chunking:",
            f"  max_size: {config_dict['chunking']['max_size']}  # Max chars per chunk",
            f"  min_size: {config_dict['chunking']['min_size']}  # Min chars per chunk",
            f"  strategy: {config_dict['chunking']['strategy']}  # 'semantic' or 'fixed'",
            "",
            "# Large file streaming settings",
            "streaming:",
            f"  enabled: {str(config_dict['streaming']['enabled']).lower()}",
            f"  threshold_bytes: {config_dict['streaming']['threshold_bytes']}  # Stream files >1MB",
            "",
            "# File processing settings",
            "files:",
            f"  min_file_size: {config_dict['files']['min_file_size']}  # Skip small files",
            "  exclude_patterns:",
        ]

        for pattern in config_dict["files"]["exclude_patterns"]:
            yaml_lines.append(f'    - "{pattern}"')

        yaml_lines.extend(
            [
                "  include_patterns:",
                '    - "**/*"                  # Include all files by default',
                "",
                "# Embedding generation settings",
                "embedding:",
                f"  preferred_method: {config_dict['embedding']['preferred_method']}  # Method",
                f"  ollama_model: {config_dict['embedding']['ollama_model']}",
                f"  ollama_host: {config_dict['embedding']['ollama_host']}",
                f"  ml_model: {config_dict['embedding']['ml_model']}",
                f"  batch_size: {config_dict['embedding']['batch_size']}  # Per batch",
                "",
                "# Search behavior settings",
                "search:",
                f"  default_top_k: {config_dict['search']['default_top_k']}  # Top results",
                f"  enable_bm25: {str(config_dict['search']['enable_bm25']).lower()}  # Keyword boost",
                f"  similarity_threshold: {config_dict['search']['similarity_threshold']}  # Min score",
                f"  expand_queries: {str(config_dict['search']['expand_queries']).lower()}  # Auto expand",
                "",
                "# LLM synthesis and query expansion settings",
                "llm:",
                f"  ollama_host: {config_dict['llm']['ollama_host']}",
                f"  synthesis_model: {config_dict['llm']['synthesis_model']}  # Model name",
                f"  expansion_model: {config_dict['llm']['expansion_model']}  # Model name",
                f"  max_expansion_terms: {config_dict['llm']['max_expansion_terms']}  # Max terms",
                f"  enable_synthesis: {str(config_dict['llm']['enable_synthesis']).lower()}       # Enable synthesis by default",
                f"  synthesis_temperature: {config_dict['llm']['synthesis_temperature']}      # LLM temperature for analysis",
                "",
                "  # Context window configuration (critical for RAG performance)",
                "  # 💡 Sizing guide: 2K=1 question, 4K=1-2 questions, 8K=manageable, 16K=most users",
                "  #               32K=large codebases, 64K+=power users only",
                "  # ⚠️  Larger contexts use exponentially more CPU/memory - only increase if needed",
                "  # 🔧 Low context limits? Try smaller topk, better search terms, or archive noise",
                f"  context_window: {config_dict['llm']['context_window']}           # Context size in tokens",
                f"  auto_context: {str(config_dict['llm']['auto_context']).lower()}            # Auto-adjust context based on model capabilities",
                "",
                "  model_rankings:          # Preferred model order (edit to change priority)",
            ]
        )

        # Add model rankings list
        if "model_rankings" in config_dict["llm"] and config_dict["llm"]["model_rankings"]:
            for model in config_dict["llm"]["model_rankings"][:10]:  # Show first 10
                yaml_lines.append(f'    - "{model}"')
            if len(config_dict["llm"]["model_rankings"]) > 10:
                yaml_lines.append("    # ... (edit config to see all options)")

        # Add update settings
        yaml_lines.extend(
            [
                "",
                "# Auto-update system settings",
                "updates:",
                f"  auto_check: {str(config_dict['updates']['auto_check']).lower()}            # Check for updates automatically",
                f"  check_frequency_hours: {config_dict['updates']['check_frequency_hours']}    # Hours between update checks",
                f"  auto_install: {str(config_dict['updates']['auto_install']).lower()}          # Auto-install updates (not recommended)",
                f"  backup_before_update: {str(config_dict['updates']['backup_before_update']).lower()}   # Create backup before updating",
                f"  notify_beta_releases: {str(config_dict['updates']['notify_beta_releases']).lower()}   # Include beta releases in checks",
            ]
        )

        return "\n".join(yaml_lines)

    def update_config(self, **kwargs) -> RAGConfig:
        """Update specific configuration values."""
        config = self.load_config()

        for key, value in kwargs.items():
            if hasattr(config, key):
                setattr(config, key, value)
            else:
                logger.warning(f"Unknown config key: {key}")

        self.save_config(config)
        return config