Enhance model resolution system and improve user experience

Key improvements: - Implement relaxed model matching to handle modern naming conventions (e.g., qwen3:4b-instruct-2507-q4_K_M) - Add smart auto-selection prioritizing Qwen3 series over older models - Replace rigid pattern matching with flexible base+size matching - Add comprehensive logging for model resolution transparency - Introduce new 'models' command for detailed model status reporting - Improve pip installation feedback with progress indication - Fix Python syntax warning in GitHub template script The enhanced system now provides clear visibility into model selection decisions and gracefully handles various model naming patterns without requiring complex configuration.
2025-09-03 00:09:39 +10:00 · 2025-09-03 00:09:39 +10:00 · f4115e83bd
commit f4115e83bd
parent b6b64ecb52
4 changed files with 177 additions and 82 deletions
--- a/bin/rag-mini.py
+++ b/bin/rag-mini.py
@ -401,6 +401,78 @@ def status_check(project_path: Path):
        sys.exit(1)


+def show_model_status(project_path: Path):
+    """Show detailed model status and selection information."""
+    from mini_rag.config import ConfigManager
+    
+    print("🤖 Model Status Report")
+    print("=" * 50)
+    
+    try:
+        # Load config
+        config_manager = ConfigManager()
+        config = config_manager.load_config(project_path)
+        
+        # Create LLM synthesizer to check models
+        synthesizer = LLMSynthesizer(model=config.llm.synthesis_model, config=config)
+        
+        # Show configured model
+        print(f"📋 Configured model: {config.llm.synthesis_model}")
+        
+        # Show available models
+        available_models = synthesizer.available_models
+        if available_models:
+            print(f"\n📦 Available models ({len(available_models)}):")
+            
+            # Group models by series
+            qwen3_models = [m for m in available_models if m.startswith('qwen3:')]
+            qwen25_models = [m for m in available_models if m.startswith('qwen2.5')]
+            other_models = [m for m in available_models if not (m.startswith('qwen3:') or m.startswith('qwen2.5'))]
+            
+            if qwen3_models:
+                print("   🟢 Qwen3 series (recommended):")
+                for model in qwen3_models:
+                    is_selected = synthesizer._resolve_model_name(config.llm.synthesis_model) == model
+                    marker = "  ✅" if is_selected else "    "
+                    print(f"{marker} {model}")
+            
+            if qwen25_models:
+                print("   🟡 Qwen2.5 series:")
+                for model in qwen25_models:
+                    is_selected = synthesizer._resolve_model_name(config.llm.synthesis_model) == model
+                    marker = "  ✅" if is_selected else "    "
+                    print(f"{marker} {model}")
+                    
+            if other_models:
+                print("   🔵 Other models:")
+                for model in other_models[:10]:  # Limit to first 10
+                    is_selected = synthesizer._resolve_model_name(config.llm.synthesis_model) == model
+                    marker = "  ✅" if is_selected else "    "
+                    print(f"{marker} {model}")
+        else:
+            print("\n❌ No models available from Ollama")
+            print("   Make sure Ollama is running: ollama serve")
+            print("   Install models with: ollama pull qwen3:4b")
+            
+        # Show resolution result
+        resolved_model = synthesizer._resolve_model_name(config.llm.synthesis_model)
+        if resolved_model:
+            if resolved_model != config.llm.synthesis_model:
+                print(f"\n🔄 Model resolution: {config.llm.synthesis_model} -> {resolved_model}")
+            else:
+                print(f"\n✅ Using exact model match: {resolved_model}")
+        else:
+            print(f"\n❌ Model '{config.llm.synthesis_model}' not found!")
+            print("   Consider changing your model in the config file")
+            
+        print(f"\n📄 Config file: {config_manager.config_path}")
+        print("   Edit this file to change your model preference")
+        
+    except Exception as e:
+        print(f"❌ Model status check failed: {e}")
+        sys.exit(1)
+
+
 def explore_interactive(project_path: Path):
    """Interactive exploration mode with thinking and context memory for any documents."""
    try:
@ -681,12 +753,13 @@ Examples:
  rag-mini search /path/to/project "query" -s  # Search with LLM synthesis
  rag-mini explore /path/to/project            # Interactive exploration mode
  rag-mini status /path/to/project             # Show status
+  rag-mini models /path/to/project             # Show model status and selection
        """,
    )

    parser.add_argument(
        "command",
-        choices=["index", "search", "explore", "status", "update", "check-update"],
+        choices=["index", "search", "explore", "status", "models", "update", "check-update"],
        help="Command to execute",
    )
    parser.add_argument(
@ -756,6 +829,8 @@ Examples:
        explore_interactive(args.project_path)
    elif args.command == "status":
        status_check(args.project_path)
+    elif args.command == "models":
+        show_model_status(args.project_path)


 if __name__ == "__main__":
--- a/mini_rag/llm_synthesizer.py
+++ b/mini_rag/llm_synthesizer.py
@ -113,7 +113,7 @@ class LLMSynthesizer:
                "qwen2.5-coder:1.5b",
            ]

-        # Find first available model from our ranked list using robust name resolution
+        # Find first available model from our ranked list using relaxed name resolution
        for preferred_model in model_rankings:
            resolved_model = self._resolve_model_name(preferred_model)
            if resolved_model:
@ -130,96 +130,115 @@ class LLMSynthesizer:
        
        This handles common patterns like:
        - qwen3:1.7b -> qwen3:1.7b-q8_0
-        - qwen3:0.6b -> qwen3:0.6b-q4_0 
-        - auto -> first available model
+        - qwen3:4b -> qwen3:4b-instruct-2507-q4_K_M
+        - auto -> first available model from ranked preference
        """
+        logger.debug(f"Resolving model: {configured_model}")
+        
        if not self.available_models:
+            logger.warning("No available models for resolution")
            return None
            
-        # Handle special 'auto' directive
+        # Handle special 'auto' directive - use smart selection
        if configured_model.lower() == 'auto':
-            return self.available_models[0] if self.available_models else None
+            logger.info("Using AUTO selection...")
+            return self._select_best_available_model()
            
        # Direct exact match first (case-insensitive)
        for available_model in self.available_models:
            if configured_model.lower() == available_model.lower():
+                logger.info(f"✅ EXACT MATCH: {available_model}")
                return available_model
        
-        # Fuzzy matching for common patterns
-        model_patterns = self._get_model_patterns(configured_model)
+        # Relaxed matching - extract base model and size, then find closest match
+        logger.info(f"No exact match for '{configured_model}', trying relaxed matching...")
+        match = self._find_closest_model_match(configured_model)
+        if match:
+            logger.info(f"✅ FUZZY MATCH: {configured_model} -> {match}")
+        else:
+            logger.warning(f"❌ NO MATCH: {configured_model} not found in available models")
+        return match
+    
+    def _select_best_available_model(self) -> str:
+        """Select the best available model from what's actually installed."""
+        if not self.available_models:
+            logger.warning("No models available from Ollama - using fallback")
+            return "qwen2.5:1.5b"  # fallback
+            
+        logger.info(f"Available models: {self.available_models}")
        
-        for pattern in model_patterns:
-            for available_model in self.available_models:
-                if pattern.lower() in available_model.lower():
-                    # Additional validation: ensure it's not a partial match of something else
-                    if self._validate_model_match(pattern, available_model):
-                        return available_model
+        # Priority order for auto selection - prefer newer and larger models
+        priority_patterns = [
+            # Qwen3 series (newest)
+            "qwen3:8b", "qwen3:4b", "qwen3:1.7b", "qwen3:0.6b",
+            # Qwen2.5 series 
+            "qwen2.5:3b", "qwen2.5:1.5b", "qwen2.5:0.5b",
+            # Any other model as fallback
+        ]
        
-        return None  # Model not available
+        # Find first match from priority list
+        logger.info("Searching for best model match...")
+        for pattern in priority_patterns:
+            match = self._find_closest_model_match(pattern)
+            if match:
+                logger.info(f"✅ AUTO SELECTED: {match} (matched pattern: {pattern})")
+                return match
+            else:
+                logger.debug(f"No match found for pattern: {pattern}")
+                
+        # If nothing matches, just use first available
+        fallback = self.available_models[0]
+        logger.warning(f"⚠️  Using first available model as fallback: {fallback}")
+        return fallback
+    
+    def _find_closest_model_match(self, configured_model: str) -> Optional[str]:
+        """Find the closest matching model using relaxed criteria."""
+        if not self.available_models:
+            logger.debug(f"No available models to match against for: {configured_model}")
+            return None
+            
+        # Extract base model and size from configured model
+        # e.g., "qwen3:4b" -> ("qwen3", "4b")
+        if ':' not in configured_model:
+            base_model = configured_model
+            size = None
+        else:
+            base_model, size_part = configured_model.split(':', 1)
+            # Extract just the size (remove any suffixes like -q8_0)
+            size = size_part.split('-')[0] if '-' in size_part else size_part
+        
+        logger.debug(f"Looking for base model: '{base_model}', size: '{size}'")
+        
+        # Find all models that match the base model
+        candidates = []
+        for available_model in self.available_models:
+            if ':' not in available_model:
+                continue
+                
+            avail_base, avail_full = available_model.split(':', 1)
+            if avail_base.lower() == base_model.lower():
+                candidates.append(available_model)
+                logger.debug(f"Found candidate: {available_model}")
+        
+        if not candidates:
+            logger.debug(f"No candidates found for base model: {base_model}")
+            return None
+            
+        # If we have a size preference, try to match it
+        if size:
+            for candidate in candidates:
+                # Check if size appears in the model name
+                if size.lower() in candidate.lower():
+                    logger.debug(f"Size match found: {candidate} contains '{size}'")
+                    return candidate
+            logger.debug(f"No size match found for '{size}', using first candidate")
+        
+        # If no size match or no size specified, return first candidate
+        selected = candidates[0]
+        logger.debug(f"Returning first candidate: {selected}")
+        return selected

-    def _get_model_patterns(self, configured_model: str) -> List[str]:
-        """Generate fuzzy match patterns for common model naming conventions."""
-        patterns = [configured_model]  # Start with exact name
-        
-        # Common quantization patterns for different models
-        quantization_patterns = {
-            'qwen3:1.7b': ['qwen3:1.7b-q8_0', 'qwen3:1.7b-q4_0', 'qwen3:1.7b-q6_k'],
-            'qwen3:0.6b': ['qwen3:0.6b-q8_0', 'qwen3:0.6b-q4_0', 'qwen3:0.6b-q6_k'],
-            'qwen3:4b': ['qwen3:4b-q8_0', 'qwen3:4b-q4_0', 'qwen3:4b-q6_k'],
-            'qwen3:8b': ['qwen3:8b-q8_0', 'qwen3:8b-q4_0', 'qwen3:8b-q6_k'],
-            'qwen2.5:1.5b': ['qwen2.5:1.5b-q8_0', 'qwen2.5:1.5b-q4_0'],
-            'qwen2.5:3b': ['qwen2.5:3b-q8_0', 'qwen2.5:3b-q4_0'],
-            'qwen2.5-coder:1.5b': ['qwen2.5-coder:1.5b-q8_0', 'qwen2.5-coder:1.5b-q4_0'],
-            'qwen2.5-coder:3b': ['qwen2.5-coder:3b-q8_0', 'qwen2.5-coder:3b-q4_0'],
-            'qwen2.5-coder:7b': ['qwen2.5-coder:7b-q8_0', 'qwen2.5-coder:7b-q4_0'],
-        }
-        
-        # Add specific patterns for the configured model
-        if configured_model.lower() in quantization_patterns:
-            patterns.extend(quantization_patterns[configured_model.lower()])
-        
-        # Generic pattern generation for unknown models
-        if ':' in configured_model:
-            base_name, version = configured_model.split(':', 1)
-            
-            # Add common quantization suffixes
-            common_suffixes = ['-q8_0', '-q4_0', '-q6_k', '-q4_k_m', '-instruct', '-base']
-            for suffix in common_suffixes:
-                patterns.append(f"{base_name}:{version}{suffix}")
-                
-            # Also try with instruct variants
-            if 'instruct' not in version.lower():
-                patterns.append(f"{base_name}:{version}-instruct")
-                patterns.append(f"{base_name}:{version}-instruct-q8_0")
-                patterns.append(f"{base_name}:{version}-instruct-q4_0")
-        
-        return patterns
-
-    def _validate_model_match(self, pattern: str, available_model: str) -> bool:
-        """Validate that a fuzzy match is actually correct and not a false positive."""
-        # Convert to lowercase for comparison
-        pattern_lower = pattern.lower()
-        available_lower = available_model.lower()
-        
-        # Ensure the base model name matches
-        if ':' in pattern_lower and ':' in available_lower:
-            pattern_base = pattern_lower.split(':')[0]
-            available_base = available_lower.split(':')[0]
-            
-            # Base names must match exactly
-            if pattern_base != available_base:
-                return False
-                
-            # Version part should be contained or closely related
-            pattern_version = pattern_lower.split(':', 1)[1]
-            available_version = available_lower.split(':', 1)[1]
-            
-            # The pattern version should be a prefix of the available version
-            # e.g., "1.7b" should match "1.7b-q8_0" but not "11.7b"
-            if not available_version.startswith(pattern_version.split('-')[0]):
-                return False
-                
-        return True
+    # Old pattern matching methods removed - using simpler approach now

    def _ensure_initialized(self):
        """Lazy initialization with LLM warmup."""
--- a/1
+++ b/1
@ -60,6 +60,7 @@ attempt_auto_setup() {
    echo -e "${GREEN}✅ Created virtual environment${NC}" >&2
    
    # Step 2: Install dependencies
+    echo -e "${YELLOW}📦 Installing dependencies (this may take 1-2 minutes)...${NC}" >&2
    if ! "$SCRIPT_DIR/.venv/bin/pip" install -r "$SCRIPT_DIR/requirements.txt" >/dev/null 2>&1; then
        return 1  # Dependency installation failed
    fi
--- a/scripts/setup-github-template.py
+++ b/scripts/setup-github-template.py
@ -157,16 +157,16 @@ jobs:

        ### 📥 Installation
        Download and install the latest version:
-        \`\`\`bash
+        ```bash
        curl -sSL https://github.com/{repo_owner}/{repo_name}/releases/latest/download/install.sh | bash
-        \`\`\`
+        ```

        ### 🔄 Auto-Update
        If you have auto-update support:
-        \`\`\`bash
+        ```bash
        ./{repo_name} check-update
        ./{repo_name} update
-        \`\`\`
+        ```
        EOF

    - name: Create GitHub Release