Enhance model resolution system and improve user experience

Key improvements:
- Implement relaxed model matching to handle modern naming conventions (e.g., qwen3:4b-instruct-2507-q4_K_M)
- Add smart auto-selection prioritizing Qwen3 series over older models
- Replace rigid pattern matching with flexible base+size matching
- Add comprehensive logging for model resolution transparency
- Introduce new 'models' command for detailed model status reporting
- Improve pip installation feedback with progress indication
- Fix Python syntax warning in GitHub template script

The enhanced system now provides clear visibility into model selection
decisions and gracefully handles various model naming patterns without
requiring complex configuration.
This commit is contained in:
FSSCoding 2025-09-03 00:09:39 +10:00
parent b6b64ecb52
commit f4115e83bd
4 changed files with 177 additions and 82 deletions

View File

@ -401,6 +401,78 @@ def status_check(project_path: Path):
sys.exit(1) sys.exit(1)
def show_model_status(project_path: Path):
"""Show detailed model status and selection information."""
from mini_rag.config import ConfigManager
print("🤖 Model Status Report")
print("=" * 50)
try:
# Load config
config_manager = ConfigManager()
config = config_manager.load_config(project_path)
# Create LLM synthesizer to check models
synthesizer = LLMSynthesizer(model=config.llm.synthesis_model, config=config)
# Show configured model
print(f"📋 Configured model: {config.llm.synthesis_model}")
# Show available models
available_models = synthesizer.available_models
if available_models:
print(f"\n📦 Available models ({len(available_models)}):")
# Group models by series
qwen3_models = [m for m in available_models if m.startswith('qwen3:')]
qwen25_models = [m for m in available_models if m.startswith('qwen2.5')]
other_models = [m for m in available_models if not (m.startswith('qwen3:') or m.startswith('qwen2.5'))]
if qwen3_models:
print(" 🟢 Qwen3 series (recommended):")
for model in qwen3_models:
is_selected = synthesizer._resolve_model_name(config.llm.synthesis_model) == model
marker = "" if is_selected else " "
print(f"{marker} {model}")
if qwen25_models:
print(" 🟡 Qwen2.5 series:")
for model in qwen25_models:
is_selected = synthesizer._resolve_model_name(config.llm.synthesis_model) == model
marker = "" if is_selected else " "
print(f"{marker} {model}")
if other_models:
print(" 🔵 Other models:")
for model in other_models[:10]: # Limit to first 10
is_selected = synthesizer._resolve_model_name(config.llm.synthesis_model) == model
marker = "" if is_selected else " "
print(f"{marker} {model}")
else:
print("\n❌ No models available from Ollama")
print(" Make sure Ollama is running: ollama serve")
print(" Install models with: ollama pull qwen3:4b")
# Show resolution result
resolved_model = synthesizer._resolve_model_name(config.llm.synthesis_model)
if resolved_model:
if resolved_model != config.llm.synthesis_model:
print(f"\n🔄 Model resolution: {config.llm.synthesis_model} -> {resolved_model}")
else:
print(f"\n✅ Using exact model match: {resolved_model}")
else:
print(f"\n❌ Model '{config.llm.synthesis_model}' not found!")
print(" Consider changing your model in the config file")
print(f"\n📄 Config file: {config_manager.config_path}")
print(" Edit this file to change your model preference")
except Exception as e:
print(f"❌ Model status check failed: {e}")
sys.exit(1)
def explore_interactive(project_path: Path): def explore_interactive(project_path: Path):
"""Interactive exploration mode with thinking and context memory for any documents.""" """Interactive exploration mode with thinking and context memory for any documents."""
try: try:
@ -681,12 +753,13 @@ Examples:
rag-mini search /path/to/project "query" -s # Search with LLM synthesis rag-mini search /path/to/project "query" -s # Search with LLM synthesis
rag-mini explore /path/to/project # Interactive exploration mode rag-mini explore /path/to/project # Interactive exploration mode
rag-mini status /path/to/project # Show status rag-mini status /path/to/project # Show status
rag-mini models /path/to/project # Show model status and selection
""", """,
) )
parser.add_argument( parser.add_argument(
"command", "command",
choices=["index", "search", "explore", "status", "update", "check-update"], choices=["index", "search", "explore", "status", "models", "update", "check-update"],
help="Command to execute", help="Command to execute",
) )
parser.add_argument( parser.add_argument(
@ -756,6 +829,8 @@ Examples:
explore_interactive(args.project_path) explore_interactive(args.project_path)
elif args.command == "status": elif args.command == "status":
status_check(args.project_path) status_check(args.project_path)
elif args.command == "models":
show_model_status(args.project_path)
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -113,7 +113,7 @@ class LLMSynthesizer:
"qwen2.5-coder:1.5b", "qwen2.5-coder:1.5b",
] ]
# Find first available model from our ranked list using robust name resolution # Find first available model from our ranked list using relaxed name resolution
for preferred_model in model_rankings: for preferred_model in model_rankings:
resolved_model = self._resolve_model_name(preferred_model) resolved_model = self._resolve_model_name(preferred_model)
if resolved_model: if resolved_model:
@ -130,96 +130,115 @@ class LLMSynthesizer:
This handles common patterns like: This handles common patterns like:
- qwen3:1.7b -> qwen3:1.7b-q8_0 - qwen3:1.7b -> qwen3:1.7b-q8_0
- qwen3:0.6b -> qwen3:0.6b-q4_0 - qwen3:4b -> qwen3:4b-instruct-2507-q4_K_M
- auto -> first available model - auto -> first available model from ranked preference
""" """
logger.debug(f"Resolving model: {configured_model}")
if not self.available_models: if not self.available_models:
logger.warning("No available models for resolution")
return None return None
# Handle special 'auto' directive # Handle special 'auto' directive - use smart selection
if configured_model.lower() == 'auto': if configured_model.lower() == 'auto':
return self.available_models[0] if self.available_models else None logger.info("Using AUTO selection...")
return self._select_best_available_model()
# Direct exact match first (case-insensitive) # Direct exact match first (case-insensitive)
for available_model in self.available_models: for available_model in self.available_models:
if configured_model.lower() == available_model.lower(): if configured_model.lower() == available_model.lower():
logger.info(f"✅ EXACT MATCH: {available_model}")
return available_model return available_model
# Fuzzy matching for common patterns # Relaxed matching - extract base model and size, then find closest match
model_patterns = self._get_model_patterns(configured_model) logger.info(f"No exact match for '{configured_model}', trying relaxed matching...")
match = self._find_closest_model_match(configured_model)
if match:
logger.info(f"✅ FUZZY MATCH: {configured_model} -> {match}")
else:
logger.warning(f"❌ NO MATCH: {configured_model} not found in available models")
return match
for pattern in model_patterns: def _select_best_available_model(self) -> str:
for available_model in self.available_models: """Select the best available model from what's actually installed."""
if pattern.lower() in available_model.lower(): if not self.available_models:
# Additional validation: ensure it's not a partial match of something else logger.warning("No models available from Ollama - using fallback")
if self._validate_model_match(pattern, available_model): return "qwen2.5:1.5b" # fallback
return available_model
return None # Model not available logger.info(f"Available models: {self.available_models}")
def _get_model_patterns(self, configured_model: str) -> List[str]: # Priority order for auto selection - prefer newer and larger models
"""Generate fuzzy match patterns for common model naming conventions.""" priority_patterns = [
patterns = [configured_model] # Start with exact name # Qwen3 series (newest)
"qwen3:8b", "qwen3:4b", "qwen3:1.7b", "qwen3:0.6b",
# Qwen2.5 series
"qwen2.5:3b", "qwen2.5:1.5b", "qwen2.5:0.5b",
# Any other model as fallback
]
# Common quantization patterns for different models # Find first match from priority list
quantization_patterns = { logger.info("Searching for best model match...")
'qwen3:1.7b': ['qwen3:1.7b-q8_0', 'qwen3:1.7b-q4_0', 'qwen3:1.7b-q6_k'], for pattern in priority_patterns:
'qwen3:0.6b': ['qwen3:0.6b-q8_0', 'qwen3:0.6b-q4_0', 'qwen3:0.6b-q6_k'], match = self._find_closest_model_match(pattern)
'qwen3:4b': ['qwen3:4b-q8_0', 'qwen3:4b-q4_0', 'qwen3:4b-q6_k'], if match:
'qwen3:8b': ['qwen3:8b-q8_0', 'qwen3:8b-q4_0', 'qwen3:8b-q6_k'], logger.info(f"✅ AUTO SELECTED: {match} (matched pattern: {pattern})")
'qwen2.5:1.5b': ['qwen2.5:1.5b-q8_0', 'qwen2.5:1.5b-q4_0'], return match
'qwen2.5:3b': ['qwen2.5:3b-q8_0', 'qwen2.5:3b-q4_0'], else:
'qwen2.5-coder:1.5b': ['qwen2.5-coder:1.5b-q8_0', 'qwen2.5-coder:1.5b-q4_0'], logger.debug(f"No match found for pattern: {pattern}")
'qwen2.5-coder:3b': ['qwen2.5-coder:3b-q8_0', 'qwen2.5-coder:3b-q4_0'],
'qwen2.5-coder:7b': ['qwen2.5-coder:7b-q8_0', 'qwen2.5-coder:7b-q4_0'],
}
# Add specific patterns for the configured model # If nothing matches, just use first available
if configured_model.lower() in quantization_patterns: fallback = self.available_models[0]
patterns.extend(quantization_patterns[configured_model.lower()]) logger.warning(f"⚠️ Using first available model as fallback: {fallback}")
return fallback
# Generic pattern generation for unknown models def _find_closest_model_match(self, configured_model: str) -> Optional[str]:
if ':' in configured_model: """Find the closest matching model using relaxed criteria."""
base_name, version = configured_model.split(':', 1) if not self.available_models:
logger.debug(f"No available models to match against for: {configured_model}")
return None
# Add common quantization suffixes # Extract base model and size from configured model
common_suffixes = ['-q8_0', '-q4_0', '-q6_k', '-q4_k_m', '-instruct', '-base'] # e.g., "qwen3:4b" -> ("qwen3", "4b")
for suffix in common_suffixes: if ':' not in configured_model:
patterns.append(f"{base_name}:{version}{suffix}") base_model = configured_model
size = None
else:
base_model, size_part = configured_model.split(':', 1)
# Extract just the size (remove any suffixes like -q8_0)
size = size_part.split('-')[0] if '-' in size_part else size_part
# Also try with instruct variants logger.debug(f"Looking for base model: '{base_model}', size: '{size}'")
if 'instruct' not in version.lower():
patterns.append(f"{base_name}:{version}-instruct")
patterns.append(f"{base_name}:{version}-instruct-q8_0")
patterns.append(f"{base_name}:{version}-instruct-q4_0")
return patterns # Find all models that match the base model
candidates = []
for available_model in self.available_models:
if ':' not in available_model:
continue
def _validate_model_match(self, pattern: str, available_model: str) -> bool: avail_base, avail_full = available_model.split(':', 1)
"""Validate that a fuzzy match is actually correct and not a false positive.""" if avail_base.lower() == base_model.lower():
# Convert to lowercase for comparison candidates.append(available_model)
pattern_lower = pattern.lower() logger.debug(f"Found candidate: {available_model}")
available_lower = available_model.lower()
# Ensure the base model name matches if not candidates:
if ':' in pattern_lower and ':' in available_lower: logger.debug(f"No candidates found for base model: {base_model}")
pattern_base = pattern_lower.split(':')[0] return None
available_base = available_lower.split(':')[0]
# Base names must match exactly # If we have a size preference, try to match it
if pattern_base != available_base: if size:
return False for candidate in candidates:
# Check if size appears in the model name
if size.lower() in candidate.lower():
logger.debug(f"Size match found: {candidate} contains '{size}'")
return candidate
logger.debug(f"No size match found for '{size}', using first candidate")
# Version part should be contained or closely related # If no size match or no size specified, return first candidate
pattern_version = pattern_lower.split(':', 1)[1] selected = candidates[0]
available_version = available_lower.split(':', 1)[1] logger.debug(f"Returning first candidate: {selected}")
return selected
# The pattern version should be a prefix of the available version # Old pattern matching methods removed - using simpler approach now
# e.g., "1.7b" should match "1.7b-q8_0" but not "11.7b"
if not available_version.startswith(pattern_version.split('-')[0]):
return False
return True
def _ensure_initialized(self): def _ensure_initialized(self):
"""Lazy initialization with LLM warmup.""" """Lazy initialization with LLM warmup."""

View File

@ -60,6 +60,7 @@ attempt_auto_setup() {
echo -e "${GREEN}✅ Created virtual environment${NC}" >&2 echo -e "${GREEN}✅ Created virtual environment${NC}" >&2
# Step 2: Install dependencies # Step 2: Install dependencies
echo -e "${YELLOW}📦 Installing dependencies (this may take 1-2 minutes)...${NC}" >&2
if ! "$SCRIPT_DIR/.venv/bin/pip" install -r "$SCRIPT_DIR/requirements.txt" >/dev/null 2>&1; then if ! "$SCRIPT_DIR/.venv/bin/pip" install -r "$SCRIPT_DIR/requirements.txt" >/dev/null 2>&1; then
return 1 # Dependency installation failed return 1 # Dependency installation failed
fi fi

View File

@ -157,16 +157,16 @@ jobs:
### 📥 Installation ### 📥 Installation
Download and install the latest version: Download and install the latest version:
\`\`\`bash ```bash
curl -sSL https://github.com/{repo_owner}/{repo_name}/releases/latest/download/install.sh | bash curl -sSL https://github.com/{repo_owner}/{repo_name}/releases/latest/download/install.sh | bash
\`\`\` ```
### 🔄 Auto-Update ### 🔄 Auto-Update
If you have auto-update support: If you have auto-update support:
\`\`\`bash ```bash
./{repo_name} check-update ./{repo_name} check-update
./{repo_name} update ./{repo_name} update
\`\`\` ```
EOF EOF
- name: Create GitHub Release - name: Create GitHub Release