#!/usr/bin/env python3 """ Interactive Code Explorer with Thinking Mode Provides multi-turn conversations with context memory for debugging and learning. Perfect for exploring codebases with detailed reasoning and follow-up questions. """ import json import logging import time from dataclasses import dataclass from pathlib import Path from typing import Any, Dict, List, Optional try: from .config import RAGConfig from .llm_synthesizer import LLMSynthesizer, SynthesisResult from .search import CodeSearcher from .system_context import get_system_context except ImportError: # For direct testing from config import RAGConfig from llm_synthesizer import LLMSynthesizer, SynthesisResult from search import CodeSearcher def get_system_context(x=None): return "" logger = logging.getLogger(__name__) @dataclass class ExplorationSession: """Track an exploration session with context history.""" project_path: Path conversation_history: List[Dict[str, Any]] session_id: str started_at: float def add_exchange( self, question: str, search_results: List[Any], response: SynthesisResult ): """Add a question/response exchange to the conversation history.""" self.conversation_history.append( { "timestamp": time.time(), "question": question, "search_results_count": len(search_results), "response": { "summary": response.summary, "key_points": response.key_points, "code_examples": response.code_examples, "suggested_actions": response.suggested_actions, "confidence": response.confidence, }, } ) class CodeExplorer: """Interactive code exploration with thinking and context memory.""" def __init__(self, project_path: Path, config: RAGConfig = None): self.project_path = project_path self.config = config or RAGConfig() # Initialize components with thinking enabled self.searcher = CodeSearcher(project_path) self.synthesizer = LLMSynthesizer( ollama_url=f"http://{self.config.llm.ollama_host}", model=self.config.llm.synthesis_model, enable_thinking=True, # Always enable thinking in explore mode config=self.config, # Pass config for model rankings ) # Session management self.current_session: Optional[ExplorationSession] = None def start_exploration_session(self) -> bool: """Start a new exploration session.""" # Simple availability check - don't do complex model restart logic if not self.synthesizer.is_available(): print("āŒ LLM service unavailable. Please check Ollama is running.") return False session_id = f"explore_{int(time.time())}" self.current_session = ExplorationSession( project_path=self.project_path, conversation_history=[], session_id=session_id, started_at=time.time(), ) print("🧠 Exploration Mode Started") print(f"Project: {self.project_path.name}") return True def explore_question(self, question: str, context_limit: int = 10) -> Optional[str]: """Explore a question with full thinking and context.""" if not self.current_session: return "āŒ No exploration session active. Start one first." # Search for relevant information search_start = time.time() results = self.searcher.search( question, top_k=context_limit, include_context=True, semantic_weight=0.7, bm25_weight=0.3, ) search_time = time.time() - search_start # Build enhanced prompt with conversation context synthesis_prompt = self._build_contextual_prompt(question, results) # Get thinking-enabled analysis synthesis_start = time.time() synthesis = self._synthesize_with_context(synthesis_prompt, results) synthesis_time = time.time() - synthesis_start # Add to conversation history self.current_session.add_exchange(question, results, synthesis) # Streaming already displayed the response # Just return minimal status for caller session_duration = time.time() - self.current_session.started_at exchange_count = len(self.current_session.conversation_history) status = f"\nšŸ“Š Session: {session_duration/60:.1f}m | Question #{exchange_count} | Results: {len(results)} | Time: {search_time+synthesis_time:.1f}s" return status def _build_contextual_prompt(self, question: str, results: List[Any]) -> str: """Build a prompt that includes conversation context.""" # Get recent conversation context (last 3 exchanges) if self.current_session.conversation_history: recent_exchanges = self.current_session.conversation_history[-3:] context_parts = [] for i, exchange in enumerate(recent_exchanges, 1): prev_q = exchange["question"] prev_summary = exchange["response"]["summary"] context_parts.append(f"Previous Q{i}: {prev_q}") context_parts.append(f"Previous A{i}: {prev_summary}") # "\n".join(context_parts) # Unused variable removed # Build search results context results_context = [] for i, result in enumerate(results[:8], 1): # result.file_path if hasattr(result, "file_path") else "unknown" # Unused variable removed # result.content if hasattr(result, "content") else str(result) # Unused variable removed # result.score if hasattr(result, "score") else 0.0 # Unused variable removed results_context.append( """ Result {i} (Score: {score:.3f}): File: {file_path} Content: {content[:800]}{'...' if len(content) > 800 else ''} """ ) # "\n".join(results_context) # Unused variable removed # Get system context for better responses # get_system_context(self.project_path) # Unused variable removed # Create comprehensive exploration prompt with thinking prompt = """ The user asked: "{question}" System context: {system_context} Let me analyze what they're asking and look at the information I have available. From the search results, I can see relevant information about: {results_text[:500]}... I should think about: 1. What the user is trying to understand or accomplish 2. What information from the search results is most relevant 3. How to explain this in a clear, educational way 4. What practical next steps would be helpful Based on our conversation so far: {context_summary} Let me create a helpful response that breaks this down clearly and gives them actionable guidance. You're a helpful assistant exploring a project with someone. You're good at breaking down complex topics into understandable pieces and explaining things clearly. PROJECT: {self.project_path.name} PREVIOUS CONVERSATION: {context_summary} CURRENT QUESTION: "{question}" RELEVANT INFORMATION FOUND: {results_text} Please provide a helpful, natural explanation that answers their question. Write as if you're having a friendly conversation with a colleague who's exploring this project. Structure your response to include: 1. A clear explanation of what you found and how it answers their question 2. The most important insights from the information you discovered 3. Relevant examples or code patterns when helpful 4. Practical next steps they could take Guidelines: - Write in a conversational, friendly tone - Be educational but not condescending - Reference specific files and information when helpful - Give practical, actionable suggestions - Connect everything back to their original question - Use natural language, not structured formats - Break complex topics into understandable pieces """ return prompt def _synthesize_with_context(self, prompt: str, results: List[Any]) -> SynthesisResult: """Synthesize results with full context and thinking.""" try: # Use streaming with thinking visible (don't collapse) response = self.synthesizer._call_ollama( prompt, temperature=0.2, disable_thinking=False, use_streaming=True, collapse_thinking=False, ) # "" # Unused variable removed # Streaming already shows thinking and response # No need for additional indicators if not response: return SynthesisResult( summary="Analysis unavailable (LLM service error)", key_points=[], code_examples=[], suggested_actions=["Check LLM service status"], confidence=0.0, ) # Use natural language response directly return SynthesisResult( summary=response.strip(), key_points=[], # Not used with natural language responses code_examples=[], # Not used with natural language responses suggested_actions=[], # Not used with natural language responses confidence=0.85, # High confidence for natural responses ) except Exception as e: logger.error(f"Context synthesis failed: {e}") return SynthesisResult( summary="Analysis failed due to service error", key_points=[], code_examples=[], suggested_actions=["Check system status and try again"], confidence=0.0, ) def _format_exploration_response( self, question: str, synthesis: SynthesisResult, result_count: int, search_time: float, synthesis_time: float, ) -> str: """Format exploration response with context indicators.""" output = [] # Header with session context session_duration = time.time() - self.current_session.started_at exchange_count = len(self.current_session.conversation_history) output.append(f"🧠 EXPLORATION ANALYSIS (Question #{exchange_count})") output.append( f"Session: {session_duration/60:.1f}m | Results: {result_count} | " f"Time: {search_time+synthesis_time:.1f}s" ) output.append("=" * 60) output.append("") # Response was already displayed via streaming # Just show completion status output.append("āœ… Analysis complete") output.append("") output.append("") # Confidence and context indicator confidence_emoji = ( "🟢" if synthesis.confidence > 0.7 else "🟔" if synthesis.confidence > 0.4 else "šŸ”“" ) context_indicator = ( f" | Context: {exchange_count-1} previous questions" if exchange_count > 1 else "" ) output.append( f"{confidence_emoji} Confidence: {synthesis.confidence:.1%}{context_indicator}" ) return "\n".join(output) def get_session_summary(self) -> str: """Get a summary of the current exploration session.""" if not self.current_session: return "No active exploration session." duration = time.time() - self.current_session.started_at exchange_count = len(self.current_session.conversation_history) summary = [ "🧠 EXPLORATION SESSION SUMMARY", "=" * 40, f"Project: {self.project_path.name}", f"Session ID: {self.current_session.session_id}", f"Duration: {duration/60:.1f} minutes", f"Questions explored: {exchange_count}", "", ] if exchange_count > 0: summary.append("šŸ“‹ Topics explored:") for i, exchange in enumerate(self.current_session.conversation_history, 1): question = ( exchange["question"][:50] + "..." if len(exchange["question"]) > 50 else exchange["question"] ) confidence = exchange["response"]["confidence"] summary.append(f" {i}. {question} (confidence: {confidence:.1%})") return "\n".join(summary) def end_session(self) -> str: """End the current exploration session.""" if not self.current_session: return "No active session to end." summary = self.get_session_summary() self.current_session = None return summary + "\n\nāœ… Exploration session ended." def _check_model_restart_needed(self) -> bool: """Check if model restart would improve thinking quality.""" try: # Simple heuristic: if we can detect the model was recently used # with , suggest restart for better thinking quality # Test with a simple thinking prompt to see response quality test_response = self.synthesizer._call_ollama( "Think briefly: what is 2+2?", temperature=0.1, disable_thinking=False ) if test_response: # If response is suspiciously short or shows signs of no-think behavior if len(test_response.strip()) < 10 or "4" == test_response.strip(): return True except Exception: pass return False def _handle_model_restart(self) -> bool: """Handle user confirmation and model restart.""" try: print( "\nšŸ¤” To ensure best thinking quality, exploration mode works best with a fresh model." ) print(f" Currently running: {self.synthesizer.model}") print( "\nšŸ’” Stop current model and restart for optimal exploration? (y/N): ", end="", flush=True, ) response = input().strip().lower() if response in ["y", "yes"]: print("\nšŸ”„ Stopping current model...") # Use ollama stop command for clean model restart import subprocess try: subprocess.run( ["ollama", "stop", self.synthesizer.model], timeout=10, capture_output=True, ) print("āœ… Model stopped successfully.") print( "šŸš€ Exploration mode will restart the model with thinking enabled..." ) # Reset synthesizer initialization to force fresh start self.synthesizer._initialized = False return True except subprocess.TimeoutExpired: print("āš ļø Model stop timed out, continuing anyway...") return False except FileNotFoundError: print("āš ļø 'ollama' command not found, continuing with current model...") return False except Exception as e: print(f"āš ļø Error stopping model: {e}") return False else: print("šŸ“ Continuing with current model...") return False except KeyboardInterrupt: print("\nšŸ“ Continuing with current model...") return False except EOFError: print("\nšŸ“ Continuing with current model...") return False def _call_ollama_with_thinking(self, prompt: str, temperature: float = 0.3) -> tuple: """Call Ollama with streaming for fast time-to-first-token.""" import requests try: # Use the synthesizer's model and connection model_to_use = self.synthesizer.model if self.synthesizer.model not in self.synthesizer.available_models: if self.synthesizer.available_models: model_to_use = self.synthesizer.available_models[0] else: return None, None # Enable thinking by NOT adding final_prompt = prompt # Get optimal parameters for this model from .llm_optimization import get_optimal_ollama_parameters optimal_params = get_optimal_ollama_parameters(model_to_use) payload = { "model": model_to_use, "prompt": final_prompt, "stream": True, # Enable streaming for fast response "options": { "temperature": temperature, "top_p": optimal_params.get("top_p", 0.9), "top_k": optimal_params.get("top_k", 40), "num_ctx": self.synthesizer._get_optimal_context_size(model_to_use), "num_predict": optimal_params.get("num_predict", 2000), "repeat_penalty": optimal_params.get("repeat_penalty", 1.1), "presence_penalty": optimal_params.get("presence_penalty", 1.0), }, } response = requests.post( f"{self.synthesizer.ollama_url}/api/generate", json=payload, stream=True, timeout=65, ) if response.status_code == 200: # Collect streaming response raw_response = "" thinking_displayed = False for line in response.iter_lines(): if line: try: chunk_data = json.loads(line.decode("utf-8")) chunk_text = chunk_data.get("response", "") if chunk_text: raw_response += chunk_text # Display thinking stream as it comes in if not thinking_displayed and "" in raw_response: # Start displaying thinking self._start_thinking_display() thinking_displayed = True if thinking_displayed: self._stream_thinking_chunk(chunk_text) if chunk_data.get("done", False): break except json.JSONDecodeError: continue # Finish thinking display if it was shown if thinking_displayed: self._end_thinking_display() # Extract thinking stream and final response thinking_stream, final_response = self._extract_thinking(raw_response) return final_response, thinking_stream else: return None, None except Exception as e: logger.error(f"Thinking-enabled Ollama call failed: {e}") return None, None def _extract_thinking(self, raw_response: str) -> tuple: """Extract thinking content from response.""" thinking_stream = "" final_response = raw_response # Look for thinking patterns if "" in raw_response and "" in raw_response: # Extract thinking content between tags start_tag = raw_response.find("") end_tag = raw_response.find("") + len("") if start_tag != -1 and end_tag != -1: thinking_content = raw_response[start_tag + 7 : end_tag - 8] # Remove tags thinking_stream = thinking_content.strip() # Remove thinking from final response final_response = (raw_response[:start_tag] + raw_response[end_tag:]).strip() # Alternative patterns for models that use different thinking formats elif "Let me think" in raw_response or "I need to analyze" in raw_response: # Simple heuristic: first paragraph might be thinking lines = raw_response.split("\n") potential_thinking = [] final_lines = [] thinking_indicators = [ "Let me think", "I need to", "First, I'll", "Looking at", "Analyzing", ] in_thinking = False for line in lines: if any(indicator in line for indicator in thinking_indicators): in_thinking = True potential_thinking.append(line) elif in_thinking and ( line.startswith("{") or line.startswith("**") or line.startswith("#") ): # Likely end of thinking, start of structured response in_thinking = False final_lines.append(line) elif in_thinking: potential_thinking.append(line) else: final_lines.append(line) if potential_thinking: thinking_stream = "\n".join(potential_thinking).strip() final_response = "\n".join(final_lines).strip() return thinking_stream, final_response def _start_thinking_display(self): """Start the thinking stream display.""" print("\n\033[2m\033[3mšŸ’­ AI Thinking:\033[0m") print("\033[2m\033[3m" + "─" * 40 + "\033[0m") self._thinking_buffer = "" self._in_thinking_tags = False def _stream_thinking_chunk(self, chunk: str): """Stream a chunk of thinking as it arrives.""" self._thinking_buffer += chunk # Check if we're in thinking tags if "" in self._thinking_buffer and not self._in_thinking_tags: self._in_thinking_tags = True # Display everything after start_idx = self._thinking_buffer.find("") + 7 thinking_content = self._thinking_buffer[start_idx:] if thinking_content: print(f"\033[2m\033[3m{thinking_content}\033[0m", end="", flush=True) elif self._in_thinking_tags and "" not in chunk: # We're in thinking mode, display the chunk print(f"\033[2m\033[3m{chunk}\033[0m", end="", flush=True) elif "" in self._thinking_buffer: # End of thinking self._in_thinking_tags = False def _end_thinking_display(self): """End the thinking stream display.""" print("\n\033[2m\033[3m" + "─" * 40 + "\033[0m") print() def _display_thinking_stream(self, thinking_stream: str): """Display thinking stream in light gray and italic (fallback for non-streaming).""" if not thinking_stream: return print("\n\033[2m\033[3mšŸ’­ AI Thinking:\033[0m") print("\033[2m\033[3m" + "─" * 40 + "\033[0m") # Split into paragraphs and display with proper formatting paragraphs = thinking_stream.split("\n\n") for para in paragraphs: if para.strip(): # Wrap long lines nicely lines = para.strip().split("\n") for line in lines: if line.strip(): # Light gray and italic print(f"\033[2m\033[3m{line}\033[0m") print() # Paragraph spacing print("\033[2m\033[3m" + "─" * 40 + "\033[0m") print() # Quick test function def test_explorer(): """Test the code explorer.""" explorer = CodeExplorer(Path(".")) if not explorer.start_exploration_session(): print("āŒ Could not start exploration session") return # Test question response = explorer.explore_question("How does authentication work in this codebase?") if response: print(response) print("\n" + explorer.end_session()) if __name__ == "__main__": test_explorer()