diff --git a/bin/rag-mini.py b/bin/rag-mini.py index 3aa309b..e2f0747 100644 --- a/bin/rag-mini.py +++ b/bin/rag-mini.py @@ -401,6 +401,78 @@ def status_check(project_path: Path): sys.exit(1) +def show_model_status(project_path: Path): + """Show detailed model status and selection information.""" + from mini_rag.config import ConfigManager + + print("šŸ¤– Model Status Report") + print("=" * 50) + + try: + # Load config + config_manager = ConfigManager() + config = config_manager.load_config(project_path) + + # Create LLM synthesizer to check models + synthesizer = LLMSynthesizer(model=config.llm.synthesis_model, config=config) + + # Show configured model + print(f"šŸ“‹ Configured model: {config.llm.synthesis_model}") + + # Show available models + available_models = synthesizer.available_models + if available_models: + print(f"\nšŸ“¦ Available models ({len(available_models)}):") + + # Group models by series + qwen3_models = [m for m in available_models if m.startswith('qwen3:')] + qwen25_models = [m for m in available_models if m.startswith('qwen2.5')] + other_models = [m for m in available_models if not (m.startswith('qwen3:') or m.startswith('qwen2.5'))] + + if qwen3_models: + print(" 🟢 Qwen3 series (recommended):") + for model in qwen3_models: + is_selected = synthesizer._resolve_model_name(config.llm.synthesis_model) == model + marker = " āœ…" if is_selected else " " + print(f"{marker} {model}") + + if qwen25_models: + print(" 🟔 Qwen2.5 series:") + for model in qwen25_models: + is_selected = synthesizer._resolve_model_name(config.llm.synthesis_model) == model + marker = " āœ…" if is_selected else " " + print(f"{marker} {model}") + + if other_models: + print(" šŸ”µ Other models:") + for model in other_models[:10]: # Limit to first 10 + is_selected = synthesizer._resolve_model_name(config.llm.synthesis_model) == model + marker = " āœ…" if is_selected else " " + print(f"{marker} {model}") + else: + print("\nāŒ No models available from Ollama") + print(" Make sure Ollama is running: ollama serve") + print(" Install models with: ollama pull qwen3:4b") + + # Show resolution result + resolved_model = synthesizer._resolve_model_name(config.llm.synthesis_model) + if resolved_model: + if resolved_model != config.llm.synthesis_model: + print(f"\nšŸ”„ Model resolution: {config.llm.synthesis_model} -> {resolved_model}") + else: + print(f"\nāœ… Using exact model match: {resolved_model}") + else: + print(f"\nāŒ Model '{config.llm.synthesis_model}' not found!") + print(" Consider changing your model in the config file") + + print(f"\nšŸ“„ Config file: {config_manager.config_path}") + print(" Edit this file to change your model preference") + + except Exception as e: + print(f"āŒ Model status check failed: {e}") + sys.exit(1) + + def explore_interactive(project_path: Path): """Interactive exploration mode with thinking and context memory for any documents.""" try: @@ -681,12 +753,13 @@ Examples: rag-mini search /path/to/project "query" -s # Search with LLM synthesis rag-mini explore /path/to/project # Interactive exploration mode rag-mini status /path/to/project # Show status + rag-mini models /path/to/project # Show model status and selection """, ) parser.add_argument( "command", - choices=["index", "search", "explore", "status", "update", "check-update"], + choices=["index", "search", "explore", "status", "models", "update", "check-update"], help="Command to execute", ) parser.add_argument( @@ -756,6 +829,8 @@ Examples: explore_interactive(args.project_path) elif args.command == "status": status_check(args.project_path) + elif args.command == "models": + show_model_status(args.project_path) if __name__ == "__main__": diff --git a/mini_rag/llm_synthesizer.py b/mini_rag/llm_synthesizer.py index 0be86d4..5ddc927 100644 --- a/mini_rag/llm_synthesizer.py +++ b/mini_rag/llm_synthesizer.py @@ -113,7 +113,7 @@ class LLMSynthesizer: "qwen2.5-coder:1.5b", ] - # Find first available model from our ranked list using robust name resolution + # Find first available model from our ranked list using relaxed name resolution for preferred_model in model_rankings: resolved_model = self._resolve_model_name(preferred_model) if resolved_model: @@ -130,96 +130,115 @@ class LLMSynthesizer: This handles common patterns like: - qwen3:1.7b -> qwen3:1.7b-q8_0 - - qwen3:0.6b -> qwen3:0.6b-q4_0 - - auto -> first available model + - qwen3:4b -> qwen3:4b-instruct-2507-q4_K_M + - auto -> first available model from ranked preference """ + logger.debug(f"Resolving model: {configured_model}") + if not self.available_models: + logger.warning("No available models for resolution") return None - # Handle special 'auto' directive + # Handle special 'auto' directive - use smart selection if configured_model.lower() == 'auto': - return self.available_models[0] if self.available_models else None + logger.info("Using AUTO selection...") + return self._select_best_available_model() # Direct exact match first (case-insensitive) for available_model in self.available_models: if configured_model.lower() == available_model.lower(): + logger.info(f"āœ… EXACT MATCH: {available_model}") return available_model - # Fuzzy matching for common patterns - model_patterns = self._get_model_patterns(configured_model) + # Relaxed matching - extract base model and size, then find closest match + logger.info(f"No exact match for '{configured_model}', trying relaxed matching...") + match = self._find_closest_model_match(configured_model) + if match: + logger.info(f"āœ… FUZZY MATCH: {configured_model} -> {match}") + else: + logger.warning(f"āŒ NO MATCH: {configured_model} not found in available models") + return match + + def _select_best_available_model(self) -> str: + """Select the best available model from what's actually installed.""" + if not self.available_models: + logger.warning("No models available from Ollama - using fallback") + return "qwen2.5:1.5b" # fallback + + logger.info(f"Available models: {self.available_models}") - for pattern in model_patterns: - for available_model in self.available_models: - if pattern.lower() in available_model.lower(): - # Additional validation: ensure it's not a partial match of something else - if self._validate_model_match(pattern, available_model): - return available_model + # Priority order for auto selection - prefer newer and larger models + priority_patterns = [ + # Qwen3 series (newest) + "qwen3:8b", "qwen3:4b", "qwen3:1.7b", "qwen3:0.6b", + # Qwen2.5 series + "qwen2.5:3b", "qwen2.5:1.5b", "qwen2.5:0.5b", + # Any other model as fallback + ] - return None # Model not available + # Find first match from priority list + logger.info("Searching for best model match...") + for pattern in priority_patterns: + match = self._find_closest_model_match(pattern) + if match: + logger.info(f"āœ… AUTO SELECTED: {match} (matched pattern: {pattern})") + return match + else: + logger.debug(f"No match found for pattern: {pattern}") + + # If nothing matches, just use first available + fallback = self.available_models[0] + logger.warning(f"āš ļø Using first available model as fallback: {fallback}") + return fallback + + def _find_closest_model_match(self, configured_model: str) -> Optional[str]: + """Find the closest matching model using relaxed criteria.""" + if not self.available_models: + logger.debug(f"No available models to match against for: {configured_model}") + return None + + # Extract base model and size from configured model + # e.g., "qwen3:4b" -> ("qwen3", "4b") + if ':' not in configured_model: + base_model = configured_model + size = None + else: + base_model, size_part = configured_model.split(':', 1) + # Extract just the size (remove any suffixes like -q8_0) + size = size_part.split('-')[0] if '-' in size_part else size_part + + logger.debug(f"Looking for base model: '{base_model}', size: '{size}'") + + # Find all models that match the base model + candidates = [] + for available_model in self.available_models: + if ':' not in available_model: + continue + + avail_base, avail_full = available_model.split(':', 1) + if avail_base.lower() == base_model.lower(): + candidates.append(available_model) + logger.debug(f"Found candidate: {available_model}") + + if not candidates: + logger.debug(f"No candidates found for base model: {base_model}") + return None + + # If we have a size preference, try to match it + if size: + for candidate in candidates: + # Check if size appears in the model name + if size.lower() in candidate.lower(): + logger.debug(f"Size match found: {candidate} contains '{size}'") + return candidate + logger.debug(f"No size match found for '{size}', using first candidate") + + # If no size match or no size specified, return first candidate + selected = candidates[0] + logger.debug(f"Returning first candidate: {selected}") + return selected - def _get_model_patterns(self, configured_model: str) -> List[str]: - """Generate fuzzy match patterns for common model naming conventions.""" - patterns = [configured_model] # Start with exact name - - # Common quantization patterns for different models - quantization_patterns = { - 'qwen3:1.7b': ['qwen3:1.7b-q8_0', 'qwen3:1.7b-q4_0', 'qwen3:1.7b-q6_k'], - 'qwen3:0.6b': ['qwen3:0.6b-q8_0', 'qwen3:0.6b-q4_0', 'qwen3:0.6b-q6_k'], - 'qwen3:4b': ['qwen3:4b-q8_0', 'qwen3:4b-q4_0', 'qwen3:4b-q6_k'], - 'qwen3:8b': ['qwen3:8b-q8_0', 'qwen3:8b-q4_0', 'qwen3:8b-q6_k'], - 'qwen2.5:1.5b': ['qwen2.5:1.5b-q8_0', 'qwen2.5:1.5b-q4_0'], - 'qwen2.5:3b': ['qwen2.5:3b-q8_0', 'qwen2.5:3b-q4_0'], - 'qwen2.5-coder:1.5b': ['qwen2.5-coder:1.5b-q8_0', 'qwen2.5-coder:1.5b-q4_0'], - 'qwen2.5-coder:3b': ['qwen2.5-coder:3b-q8_0', 'qwen2.5-coder:3b-q4_0'], - 'qwen2.5-coder:7b': ['qwen2.5-coder:7b-q8_0', 'qwen2.5-coder:7b-q4_0'], - } - - # Add specific patterns for the configured model - if configured_model.lower() in quantization_patterns: - patterns.extend(quantization_patterns[configured_model.lower()]) - - # Generic pattern generation for unknown models - if ':' in configured_model: - base_name, version = configured_model.split(':', 1) - - # Add common quantization suffixes - common_suffixes = ['-q8_0', '-q4_0', '-q6_k', '-q4_k_m', '-instruct', '-base'] - for suffix in common_suffixes: - patterns.append(f"{base_name}:{version}{suffix}") - - # Also try with instruct variants - if 'instruct' not in version.lower(): - patterns.append(f"{base_name}:{version}-instruct") - patterns.append(f"{base_name}:{version}-instruct-q8_0") - patterns.append(f"{base_name}:{version}-instruct-q4_0") - - return patterns - - def _validate_model_match(self, pattern: str, available_model: str) -> bool: - """Validate that a fuzzy match is actually correct and not a false positive.""" - # Convert to lowercase for comparison - pattern_lower = pattern.lower() - available_lower = available_model.lower() - - # Ensure the base model name matches - if ':' in pattern_lower and ':' in available_lower: - pattern_base = pattern_lower.split(':')[0] - available_base = available_lower.split(':')[0] - - # Base names must match exactly - if pattern_base != available_base: - return False - - # Version part should be contained or closely related - pattern_version = pattern_lower.split(':', 1)[1] - available_version = available_lower.split(':', 1)[1] - - # The pattern version should be a prefix of the available version - # e.g., "1.7b" should match "1.7b-q8_0" but not "11.7b" - if not available_version.startswith(pattern_version.split('-')[0]): - return False - - return True + # Old pattern matching methods removed - using simpler approach now def _ensure_initialized(self): """Lazy initialization with LLM warmup.""" diff --git a/rag-mini b/rag-mini index 897c434..4f642dc 100755 --- a/rag-mini +++ b/rag-mini @@ -60,6 +60,7 @@ attempt_auto_setup() { echo -e "${GREEN}āœ… Created virtual environment${NC}" >&2 # Step 2: Install dependencies + echo -e "${YELLOW}šŸ“¦ Installing dependencies (this may take 1-2 minutes)...${NC}" >&2 if ! "$SCRIPT_DIR/.venv/bin/pip" install -r "$SCRIPT_DIR/requirements.txt" >/dev/null 2>&1; then return 1 # Dependency installation failed fi diff --git a/scripts/setup-github-template.py b/scripts/setup-github-template.py index d786d92..5bf427d 100755 --- a/scripts/setup-github-template.py +++ b/scripts/setup-github-template.py @@ -157,16 +157,16 @@ jobs: ### šŸ“„ Installation Download and install the latest version: - \`\`\`bash + ```bash curl -sSL https://github.com/{repo_owner}/{repo_name}/releases/latest/download/install.sh | bash - \`\`\` + ``` ### šŸ”„ Auto-Update If you have auto-update support: - \`\`\`bash + ```bash ./{repo_name} check-update ./{repo_name} update - \`\`\` + ``` EOF - name: Create GitHub Release