diff --git a/claude_rag/explorer.py b/claude_rag/explorer.py index ac28728..b1f5fad 100644 --- a/claude_rag/explorer.py +++ b/claude_rag/explorer.py @@ -68,6 +68,13 @@ class CodeExplorer: def start_exploration_session(self) -> bool: """Start a new exploration session.""" + + # Check if we should restart the model for optimal thinking + model_restart_needed = self._check_model_restart_needed() + if model_restart_needed: + if not self._handle_model_restart(): + print("āš ļø Continuing with current model (quality may be reduced)") + if not self.synthesizer.is_available(): print("āŒ LLM service unavailable. Please check Ollama is running.") return False @@ -346,6 +353,75 @@ Think carefully about the relationships between code components and how they ans self.current_session = None return summary + "\n\nāœ… Exploration session ended." + + def _check_model_restart_needed(self) -> bool: + """Check if model restart would improve thinking quality.""" + try: + # Simple heuristic: if we can detect the model was recently used + # with , suggest restart for better thinking quality + + # Test with a simple thinking prompt to see response quality + test_response = self.synthesizer._call_ollama( + "Think briefly: what is 2+2?", + temperature=0.1, + disable_thinking=False + ) + + if test_response: + # If response is suspiciously short or shows signs of no-think behavior + if len(test_response.strip()) < 10 or "4" == test_response.strip(): + return True + + except Exception: + pass + + return False + + def _handle_model_restart(self) -> bool: + """Handle user confirmation and model restart.""" + try: + print("\nšŸ¤” To ensure best thinking quality, exploration mode works best with a fresh model.") + print(f" Currently running: {self.synthesizer.model}") + print("\nšŸ’” Stop current model and restart for optimal exploration? (y/N): ", end="", flush=True) + + response = input().strip().lower() + + if response in ['y', 'yes']: + print("\nšŸ”„ Stopping current model...") + + # Use ollama stop command for clean model restart + import subprocess + try: + subprocess.run([ + "ollama", "stop", self.synthesizer.model + ], timeout=10, capture_output=True) + + print("āœ… Model stopped successfully.") + print("šŸš€ Exploration mode will restart the model with thinking enabled...") + + # Reset synthesizer initialization to force fresh start + self.synthesizer._initialized = False + return True + + except subprocess.TimeoutExpired: + print("āš ļø Model stop timed out, continuing anyway...") + return False + except FileNotFoundError: + print("āš ļø 'ollama' command not found, continuing with current model...") + return False + except Exception as e: + print(f"āš ļø Error stopping model: {e}") + return False + else: + print("šŸ“ Continuing with current model...") + return False + + except KeyboardInterrupt: + print("\nšŸ“ Continuing with current model...") + return False + except EOFError: + print("\nšŸ“ Continuing with current model...") + return False # Quick test function def test_explorer(): diff --git a/claude_rag/llm_synthesizer.py b/claude_rag/llm_synthesizer.py index 0f4ca36..538b851 100644 --- a/claude_rag/llm_synthesizer.py +++ b/claude_rag/llm_synthesizer.py @@ -27,11 +27,11 @@ class SynthesisResult: class LLMSynthesizer: """Synthesizes RAG search results using Ollama LLMs.""" - def __init__(self, ollama_url: str = "http://localhost:11434", model: str = None, enable_thinking: bool = True): + def __init__(self, ollama_url: str = "http://localhost:11434", model: str = None, enable_thinking: bool = False): self.ollama_url = ollama_url.rstrip('/') self.available_models = [] self.model = model - self.enable_thinking = enable_thinking + self.enable_thinking = enable_thinking # Default False for synthesis mode self._initialized = False def _get_available_models(self) -> List[str]: diff --git a/rag-mini.py b/rag-mini.py index 5fc8ddb..7ec27ec 100644 --- a/rag-mini.py +++ b/rag-mini.py @@ -134,6 +134,12 @@ def search_project(project_path: Path, query: str, limit: int = 10, synthesize: synthesis = synthesizer.synthesize_search_results(query, results, project_path) print() print(synthesizer.format_synthesis_output(synthesis, query)) + + # Add guidance for deeper analysis + if synthesis.confidence < 0.7 or any(word in query.lower() for word in ['why', 'how', 'explain', 'debug']): + print("\nšŸ’” Want deeper analysis with reasoning?") + print(f" Try: rag-mini explore {project_path}") + print(" Exploration mode enables thinking and remembers conversation context.") else: print("āŒ LLM synthesis unavailable") print(" • Ensure Ollama is running: ollama serve")