Implement clean model state management with user confirmation
- Add user confirmation before stopping models for optimal mode switching - Clean separation: synthesis mode never uses thinking, exploration always does - Add intelligent restart detection based on response quality heuristics - Include helpful guidance messages suggesting exploration mode for deep analysis - Default synthesis mode to no-thinking for consistent fast responses - Handle graceful fallbacks when model stop fails or user declines - Provide clear explanations for why model restart improves thinking quality
This commit is contained in:
parent
a7e3e6f474
commit
bebb0016d0
@ -68,6 +68,13 @@ class CodeExplorer:
|
||||
|
||||
def start_exploration_session(self) -> bool:
|
||||
"""Start a new exploration session."""
|
||||
|
||||
# Check if we should restart the model for optimal thinking
|
||||
model_restart_needed = self._check_model_restart_needed()
|
||||
if model_restart_needed:
|
||||
if not self._handle_model_restart():
|
||||
print("⚠️ Continuing with current model (quality may be reduced)")
|
||||
|
||||
if not self.synthesizer.is_available():
|
||||
print("❌ LLM service unavailable. Please check Ollama is running.")
|
||||
return False
|
||||
@ -347,6 +354,75 @@ Think carefully about the relationships between code components and how they ans
|
||||
|
||||
return summary + "\n\n✅ Exploration session ended."
|
||||
|
||||
def _check_model_restart_needed(self) -> bool:
|
||||
"""Check if model restart would improve thinking quality."""
|
||||
try:
|
||||
# Simple heuristic: if we can detect the model was recently used
|
||||
# with <no_think>, suggest restart for better thinking quality
|
||||
|
||||
# Test with a simple thinking prompt to see response quality
|
||||
test_response = self.synthesizer._call_ollama(
|
||||
"Think briefly: what is 2+2?",
|
||||
temperature=0.1,
|
||||
disable_thinking=False
|
||||
)
|
||||
|
||||
if test_response:
|
||||
# If response is suspiciously short or shows signs of no-think behavior
|
||||
if len(test_response.strip()) < 10 or "4" == test_response.strip():
|
||||
return True
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return False
|
||||
|
||||
def _handle_model_restart(self) -> bool:
|
||||
"""Handle user confirmation and model restart."""
|
||||
try:
|
||||
print("\n🤔 To ensure best thinking quality, exploration mode works best with a fresh model.")
|
||||
print(f" Currently running: {self.synthesizer.model}")
|
||||
print("\n💡 Stop current model and restart for optimal exploration? (y/N): ", end="", flush=True)
|
||||
|
||||
response = input().strip().lower()
|
||||
|
||||
if response in ['y', 'yes']:
|
||||
print("\n🔄 Stopping current model...")
|
||||
|
||||
# Use ollama stop command for clean model restart
|
||||
import subprocess
|
||||
try:
|
||||
subprocess.run([
|
||||
"ollama", "stop", self.synthesizer.model
|
||||
], timeout=10, capture_output=True)
|
||||
|
||||
print("✅ Model stopped successfully.")
|
||||
print("🚀 Exploration mode will restart the model with thinking enabled...")
|
||||
|
||||
# Reset synthesizer initialization to force fresh start
|
||||
self.synthesizer._initialized = False
|
||||
return True
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
print("⚠️ Model stop timed out, continuing anyway...")
|
||||
return False
|
||||
except FileNotFoundError:
|
||||
print("⚠️ 'ollama' command not found, continuing with current model...")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"⚠️ Error stopping model: {e}")
|
||||
return False
|
||||
else:
|
||||
print("📝 Continuing with current model...")
|
||||
return False
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n📝 Continuing with current model...")
|
||||
return False
|
||||
except EOFError:
|
||||
print("\n📝 Continuing with current model...")
|
||||
return False
|
||||
|
||||
# Quick test function
|
||||
def test_explorer():
|
||||
"""Test the code explorer."""
|
||||
|
||||
@ -27,11 +27,11 @@ class SynthesisResult:
|
||||
class LLMSynthesizer:
|
||||
"""Synthesizes RAG search results using Ollama LLMs."""
|
||||
|
||||
def __init__(self, ollama_url: str = "http://localhost:11434", model: str = None, enable_thinking: bool = True):
|
||||
def __init__(self, ollama_url: str = "http://localhost:11434", model: str = None, enable_thinking: bool = False):
|
||||
self.ollama_url = ollama_url.rstrip('/')
|
||||
self.available_models = []
|
||||
self.model = model
|
||||
self.enable_thinking = enable_thinking
|
||||
self.enable_thinking = enable_thinking # Default False for synthesis mode
|
||||
self._initialized = False
|
||||
|
||||
def _get_available_models(self) -> List[str]:
|
||||
|
||||
@ -134,6 +134,12 @@ def search_project(project_path: Path, query: str, limit: int = 10, synthesize:
|
||||
synthesis = synthesizer.synthesize_search_results(query, results, project_path)
|
||||
print()
|
||||
print(synthesizer.format_synthesis_output(synthesis, query))
|
||||
|
||||
# Add guidance for deeper analysis
|
||||
if synthesis.confidence < 0.7 or any(word in query.lower() for word in ['why', 'how', 'explain', 'debug']):
|
||||
print("\n💡 Want deeper analysis with reasoning?")
|
||||
print(f" Try: rag-mini explore {project_path}")
|
||||
print(" Exploration mode enables thinking and remembers conversation context.")
|
||||
else:
|
||||
print("❌ LLM synthesis unavailable")
|
||||
print(" • Ensure Ollama is running: ollama serve")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user