Implement clean model state management with user confirmation

- Add user confirmation before stopping models for optimal mode switching - Clean separation: synthesis mode never uses thinking, exploration always does - Add intelligent restart detection based on response quality heuristics - Include helpful guidance messages suggesting exploration mode for deep analysis - Default synthesis mode to no-thinking for consistent fast responses - Handle graceful fallbacks when model stop fails or user declines - Provide clear explanations for why model restart improves thinking quality
2025-08-12 18:15:30 +10:00 · 2025-08-12 18:15:30 +10:00 · bebb0016d0
commit bebb0016d0
parent a7e3e6f474
3 changed files with 84 additions and 2 deletions
--- a/claude_rag/explorer.py
+++ b/claude_rag/explorer.py
@ -68,6 +68,13 @@ class CodeExplorer:
        
    def start_exploration_session(self) -> bool:
        """Start a new exploration session."""
+        
+        # Check if we should restart the model for optimal thinking
+        model_restart_needed = self._check_model_restart_needed()
+        if model_restart_needed:
+            if not self._handle_model_restart():
+                print("⚠️  Continuing with current model (quality may be reduced)")
+        
        if not self.synthesizer.is_available():
            print("❌ LLM service unavailable. Please check Ollama is running.")
            return False
@ -347,6 +354,75 @@ Think carefully about the relationships between code components and how they ans
        
        return summary + "\n\n✅ Exploration session ended."
    
+    def _check_model_restart_needed(self) -> bool:
+        """Check if model restart would improve thinking quality."""
+        try:
+            # Simple heuristic: if we can detect the model was recently used 
+            # with <no_think>, suggest restart for better thinking quality
+            
+            # Test with a simple thinking prompt to see response quality
+            test_response = self.synthesizer._call_ollama(
+                "Think briefly: what is 2+2?", 
+                temperature=0.1, 
+                disable_thinking=False
+            )
+            
+            if test_response:
+                # If response is suspiciously short or shows signs of no-think behavior
+                if len(test_response.strip()) < 10 or "4" == test_response.strip():
+                    return True
+                    
+        except Exception:
+            pass
+            
+        return False
+    
+    def _handle_model_restart(self) -> bool:
+        """Handle user confirmation and model restart."""
+        try:
+            print("\n🤔 To ensure best thinking quality, exploration mode works best with a fresh model.")
+            print(f"   Currently running: {self.synthesizer.model}")
+            print("\n💡 Stop current model and restart for optimal exploration? (y/N): ", end="", flush=True)
+            
+            response = input().strip().lower()
+            
+            if response in ['y', 'yes']:
+                print("\n🔄 Stopping current model...")
+                
+                # Use ollama stop command for clean model restart
+                import subprocess
+                try:
+                    subprocess.run([
+                        "ollama", "stop", self.synthesizer.model
+                    ], timeout=10, capture_output=True)
+                    
+                    print("✅ Model stopped successfully.")
+                    print("🚀 Exploration mode will restart the model with thinking enabled...")
+                    
+                    # Reset synthesizer initialization to force fresh start
+                    self.synthesizer._initialized = False
+                    return True
+                    
+                except subprocess.TimeoutExpired:
+                    print("⚠️  Model stop timed out, continuing anyway...")
+                    return False
+                except FileNotFoundError:
+                    print("⚠️  'ollama' command not found, continuing with current model...")
+                    return False
+                except Exception as e:
+                    print(f"⚠️  Error stopping model: {e}")
+                    return False
+            else:
+                print("📝 Continuing with current model...")
+                return False
+                
+        except KeyboardInterrupt:
+            print("\n📝 Continuing with current model...")
+            return False
+        except EOFError:
+            print("\n📝 Continuing with current model...")
+            return False
+
 # Quick test function
 def test_explorer():
    """Test the code explorer."""
--- a/claude_rag/llm_synthesizer.py
+++ b/claude_rag/llm_synthesizer.py
@ -27,11 +27,11 @@ class SynthesisResult:
 class LLMSynthesizer:
    """Synthesizes RAG search results using Ollama LLMs."""
    
-    def __init__(self, ollama_url: str = "http://localhost:11434", model: str = None, enable_thinking: bool = True):
+    def __init__(self, ollama_url: str = "http://localhost:11434", model: str = None, enable_thinking: bool = False):
        self.ollama_url = ollama_url.rstrip('/')
        self.available_models = []
        self.model = model
-        self.enable_thinking = enable_thinking
+        self.enable_thinking = enable_thinking  # Default False for synthesis mode
        self._initialized = False
        
    def _get_available_models(self) -> List[str]:
--- a/rag-mini.py
+++ b/rag-mini.py
@ -134,6 +134,12 @@ def search_project(project_path: Path, query: str, limit: int = 10, synthesize:
                synthesis = synthesizer.synthesize_search_results(query, results, project_path)
                print()
                print(synthesizer.format_synthesis_output(synthesis, query))
+                
+                # Add guidance for deeper analysis
+                if synthesis.confidence < 0.7 or any(word in query.lower() for word in ['why', 'how', 'explain', 'debug']):
+                    print("\n💡 Want deeper analysis with reasoning?")
+                    print(f"   Try: rag-mini explore {project_path}")
+                    print("   Exploration mode enables thinking and remembers conversation context.")
            else:
                print("❌ LLM synthesis unavailable")
                print("   • Ensure Ollama is running: ollama serve")