From a7e3e6f474f26e7c656617bd9887264b2b554695 Mon Sep 17 00:00:00 2001 From: BobAi Date: Tue, 12 Aug 2025 18:06:08 +1000 Subject: [PATCH] Add interactive exploration mode with thinking and context memory - Create separate explore mode with thinking enabled for debugging/learning - Add lazy loading with LLM warmup using 'testing, just say "hi" ' - Implement context-aware conversation memory across questions - Add interactive CLI with help, summary, and session management - Enable Qwen3 thinking mode toggle for experimentation - Support multi-turn conversations for better debugging workflow - Clean separation between fast synthesis and deep exploration modes --- claude_rag/config.py | 1 + claude_rag/explorer.py | 367 ++++++++++++++++++++++++++++++++++ claude_rag/llm_synthesizer.py | 39 +++- claude_rag/query_expander.py | 29 +++ examples/config.yaml | 3 +- rag-mini.py | 77 ++++++- 6 files changed, 509 insertions(+), 7 deletions(-) create mode 100644 claude_rag/explorer.py diff --git a/claude_rag/config.py b/claude_rag/config.py index 268512a..5ae7f66 100644 --- a/claude_rag/config.py +++ b/claude_rag/config.py @@ -78,6 +78,7 @@ class LLMConfig: max_expansion_terms: int = 8 # Maximum additional terms to add enable_synthesis: bool = False # Enable by default when --synthesize used synthesis_temperature: float = 0.3 + enable_thinking: bool = True # Enable thinking mode for Qwen3 models (production: True, testing: toggle) @dataclass diff --git a/claude_rag/explorer.py b/claude_rag/explorer.py new file mode 100644 index 0000000..ac28728 --- /dev/null +++ b/claude_rag/explorer.py @@ -0,0 +1,367 @@ +#!/usr/bin/env python3 +""" +Interactive Code Explorer with Thinking Mode + +Provides multi-turn conversations with context memory for debugging and learning. +Perfect for exploring codebases with detailed reasoning and follow-up questions. +""" + +import json +import logging +import time +from typing import List, Dict, Any, Optional +from pathlib import Path +from dataclasses import dataclass + +try: + from .llm_synthesizer import LLMSynthesizer, SynthesisResult + from .search import CodeSearcher + from .config import RAGConfig +except ImportError: + # For direct testing + from llm_synthesizer import LLMSynthesizer, SynthesisResult + from search import CodeSearcher + from config import RAGConfig + +logger = logging.getLogger(__name__) + +@dataclass +class ExplorationSession: + """Track an exploration session with context history.""" + project_path: Path + conversation_history: List[Dict[str, Any]] + session_id: str + started_at: float + + def add_exchange(self, question: str, search_results: List[Any], response: SynthesisResult): + """Add a question/response exchange to the conversation history.""" + self.conversation_history.append({ + "timestamp": time.time(), + "question": question, + "search_results_count": len(search_results), + "response": { + "summary": response.summary, + "key_points": response.key_points, + "code_examples": response.code_examples, + "suggested_actions": response.suggested_actions, + "confidence": response.confidence + } + }) + +class CodeExplorer: + """Interactive code exploration with thinking and context memory.""" + + def __init__(self, project_path: Path, config: RAGConfig = None): + self.project_path = project_path + self.config = config or RAGConfig() + + # Initialize components with thinking enabled + self.searcher = CodeSearcher(project_path) + self.synthesizer = LLMSynthesizer( + ollama_url=f"http://{self.config.llm.ollama_host}", + model=self.config.llm.synthesis_model, + enable_thinking=True # Always enable thinking in explore mode + ) + + # Session management + self.current_session: Optional[ExplorationSession] = None + + def start_exploration_session(self) -> bool: + """Start a new exploration session.""" + if not self.synthesizer.is_available(): + print("āŒ LLM service unavailable. Please check Ollama is running.") + return False + + session_id = f"explore_{int(time.time())}" + self.current_session = ExplorationSession( + project_path=self.project_path, + conversation_history=[], + session_id=session_id, + started_at=time.time() + ) + + print("🧠 EXPLORATION MODE STARTED") + print("=" * 50) + print(f"Project: {self.project_path.name}") + print(f"Session: {session_id}") + print("\nšŸŽÆ This mode uses thinking and remembers context.") + print(" Perfect for debugging, learning, and deep exploration.") + print("\nšŸ’” Tips:") + print(" • Ask follow-up questions - I'll remember our conversation") + print(" • Use 'why', 'how', 'explain' for detailed reasoning") + print(" • Type 'quit' or 'exit' to end session") + print("\n" + "=" * 50) + + return True + + def explore_question(self, question: str, context_limit: int = 10) -> Optional[str]: + """Explore a question with full thinking and context.""" + if not self.current_session: + return "āŒ No exploration session active. Start one first." + + # Search for relevant information + search_start = time.time() + results = self.searcher.search( + question, + limit=context_limit, + include_context=True, + semantic_weight=0.7, + bm25_weight=0.3 + ) + search_time = time.time() - search_start + + # Build enhanced prompt with conversation context + synthesis_prompt = self._build_contextual_prompt(question, results) + + # Get thinking-enabled analysis + synthesis_start = time.time() + synthesis = self._synthesize_with_context(synthesis_prompt, results) + synthesis_time = time.time() - synthesis_start + + # Add to conversation history + self.current_session.add_exchange(question, results, synthesis) + + # Format response with exploration context + response = self._format_exploration_response( + question, synthesis, len(results), search_time, synthesis_time + ) + + return response + + def _build_contextual_prompt(self, question: str, results: List[Any]) -> str: + """Build a prompt that includes conversation context.""" + # Get recent conversation context (last 3 exchanges) + context_summary = "" + if self.current_session.conversation_history: + recent_exchanges = self.current_session.conversation_history[-3:] + context_parts = [] + + for i, exchange in enumerate(recent_exchanges, 1): + prev_q = exchange["question"] + prev_summary = exchange["response"]["summary"] + context_parts.append(f"Previous Q{i}: {prev_q}") + context_parts.append(f"Previous A{i}: {prev_summary}") + + context_summary = "\n".join(context_parts) + + # Build search results context + results_context = [] + for i, result in enumerate(results[:8], 1): + file_path = result.file_path if hasattr(result, 'file_path') else 'unknown' + content = result.content if hasattr(result, 'content') else str(result) + score = result.score if hasattr(result, 'score') else 0.0 + + results_context.append(f""" +Result {i} (Score: {score:.3f}): +File: {file_path} +Content: {content[:800]}{'...' if len(content) > 800 else ''} +""") + + results_text = "\n".join(results_context) + + # Create comprehensive exploration prompt + prompt = f"""You are a senior software engineer helping explore and debug code. You have access to thinking mode and conversation context. + +PROJECT: {self.project_path.name} + +CONVERSATION CONTEXT: +{context_summary} + +CURRENT QUESTION: "{question}" + +SEARCH RESULTS: +{results_text} + +Please provide a detailed analysis in JSON format. Think through the problem carefully and consider the conversation context: + +{{ + "summary": "2-3 sentences explaining what you found and how it relates to the question", + "key_points": [ + "Important insight 1 (reference specific code/files)", + "Important insight 2 (explain relationships)", + "Important insight 3 (consider conversation context)" + ], + "code_examples": [ + "Relevant code snippet or pattern with explanation", + "Another important code example with context" + ], + "suggested_actions": [ + "Specific next step the developer should take", + "Follow-up investigation or debugging approach", + "Potential improvements or fixes" + ], + "confidence": 0.85 +}} + +Focus on: +- Deep technical analysis with reasoning +- How this connects to previous questions in our conversation +- Practical debugging/learning insights +- Specific code references and explanations +- Clear next steps for the developer + +Think carefully about the relationships between code components and how they answer the question in context.""" + + return prompt + + def _synthesize_with_context(self, prompt: str, results: List[Any]) -> SynthesisResult: + """Synthesize results with full context and thinking.""" + try: + # Use thinking-enabled synthesis with lower temperature for exploration + response = self.synthesizer._call_ollama(prompt, temperature=0.2) + + if not response: + return SynthesisResult( + summary="Analysis unavailable (LLM service error)", + key_points=[], + code_examples=[], + suggested_actions=["Check LLM service status"], + confidence=0.0 + ) + + # Parse the structured response + try: + # Extract JSON from response + start_idx = response.find('{') + end_idx = response.rfind('}') + 1 + if start_idx >= 0 and end_idx > start_idx: + json_str = response[start_idx:end_idx] + data = json.loads(json_str) + + return SynthesisResult( + summary=data.get('summary', 'Analysis completed'), + key_points=data.get('key_points', []), + code_examples=data.get('code_examples', []), + suggested_actions=data.get('suggested_actions', []), + confidence=float(data.get('confidence', 0.7)) + ) + else: + # Fallback: use raw response as summary + return SynthesisResult( + summary=response[:400] + '...' if len(response) > 400 else response, + key_points=[], + code_examples=[], + suggested_actions=[], + confidence=0.5 + ) + + except json.JSONDecodeError: + return SynthesisResult( + summary="Analysis completed but format parsing failed", + key_points=[], + code_examples=[], + suggested_actions=["Try rephrasing your question"], + confidence=0.3 + ) + + except Exception as e: + logger.error(f"Context synthesis failed: {e}") + return SynthesisResult( + summary="Analysis failed due to service error", + key_points=[], + code_examples=[], + suggested_actions=["Check system status and try again"], + confidence=0.0 + ) + + def _format_exploration_response(self, question: str, synthesis: SynthesisResult, + result_count: int, search_time: float, synthesis_time: float) -> str: + """Format exploration response with context indicators.""" + + output = [] + + # Header with session context + session_duration = time.time() - self.current_session.started_at + exchange_count = len(self.current_session.conversation_history) + + output.append(f"🧠 EXPLORATION ANALYSIS (Question #{exchange_count})") + output.append(f"Session: {session_duration/60:.1f}m | Results: {result_count} | " + f"Time: {search_time+synthesis_time:.1f}s") + output.append("=" * 60) + output.append("") + + # Main analysis + output.append(f"šŸ“ Analysis:") + output.append(f" {synthesis.summary}") + output.append("") + + if synthesis.key_points: + output.append("šŸ” Key Insights:") + for point in synthesis.key_points: + output.append(f" • {point}") + output.append("") + + if synthesis.code_examples: + output.append("šŸ’” Code Examples:") + for example in synthesis.code_examples: + output.append(f" {example}") + output.append("") + + if synthesis.suggested_actions: + output.append("šŸŽÆ Next Steps:") + for action in synthesis.suggested_actions: + output.append(f" • {action}") + output.append("") + + # Confidence and context indicator + confidence_emoji = "🟢" if synthesis.confidence > 0.7 else "🟔" if synthesis.confidence > 0.4 else "šŸ”“" + context_indicator = f" | Context: {exchange_count-1} previous questions" if exchange_count > 1 else "" + output.append(f"{confidence_emoji} Confidence: {synthesis.confidence:.1%}{context_indicator}") + + return "\n".join(output) + + def get_session_summary(self) -> str: + """Get a summary of the current exploration session.""" + if not self.current_session: + return "No active exploration session." + + duration = time.time() - self.current_session.started_at + exchange_count = len(self.current_session.conversation_history) + + summary = [ + f"🧠 EXPLORATION SESSION SUMMARY", + f"=" * 40, + f"Project: {self.project_path.name}", + f"Session ID: {self.current_session.session_id}", + f"Duration: {duration/60:.1f} minutes", + f"Questions explored: {exchange_count}", + f"", + ] + + if exchange_count > 0: + summary.append("šŸ“‹ Topics explored:") + for i, exchange in enumerate(self.current_session.conversation_history, 1): + question = exchange["question"][:50] + "..." if len(exchange["question"]) > 50 else exchange["question"] + confidence = exchange["response"]["confidence"] + summary.append(f" {i}. {question} (confidence: {confidence:.1%})") + + return "\n".join(summary) + + def end_session(self) -> str: + """End the current exploration session.""" + if not self.current_session: + return "No active session to end." + + summary = self.get_session_summary() + self.current_session = None + + return summary + "\n\nāœ… Exploration session ended." + +# Quick test function +def test_explorer(): + """Test the code explorer.""" + explorer = CodeExplorer(Path(".")) + + if not explorer.start_exploration_session(): + print("āŒ Could not start exploration session") + return + + # Test question + response = explorer.explore_question("How does authentication work in this codebase?") + if response: + print(response) + + print("\n" + explorer.end_session()) + +if __name__ == "__main__": + test_explorer() \ No newline at end of file diff --git a/claude_rag/llm_synthesizer.py b/claude_rag/llm_synthesizer.py index 741a91a..0f4ca36 100644 --- a/claude_rag/llm_synthesizer.py +++ b/claude_rag/llm_synthesizer.py @@ -27,10 +27,12 @@ class SynthesisResult: class LLMSynthesizer: """Synthesizes RAG search results using Ollama LLMs.""" - def __init__(self, ollama_url: str = "http://localhost:11434", model: str = None): + def __init__(self, ollama_url: str = "http://localhost:11434", model: str = None, enable_thinking: bool = True): self.ollama_url = ollama_url.rstrip('/') - self.available_models = self._get_available_models() - self.model = model or self._select_best_model() + self.available_models = [] + self.model = model + self.enable_thinking = enable_thinking + self._initialized = False def _get_available_models(self) -> List[str]: """Get list of available Ollama models.""" @@ -102,11 +104,31 @@ class LLMSynthesizer: logger.warning(f"Using fallback model: {fallback}") return fallback + def _ensure_initialized(self): + """Lazy initialization with LLM warmup.""" + if self._initialized: + return + + # Load available models + self.available_models = self._get_available_models() + if not self.model: + self.model = self._select_best_model() + + # Warm up LLM with minimal request (ignores response) + if self.available_models: + try: + self._call_ollama("testing, just say 'hi'", temperature=0.1, disable_thinking=True) + except: + pass # Warmup failure is non-critical + + self._initialized = True + def is_available(self) -> bool: """Check if Ollama is available and has models.""" + self._ensure_initialized() return len(self.available_models) > 0 - def _call_ollama(self, prompt: str, temperature: float = 0.3) -> Optional[str]: + def _call_ollama(self, prompt: str, temperature: float = 0.3, disable_thinking: bool = False) -> Optional[str]: """Make a call to Ollama API.""" try: # Use the best available model @@ -119,9 +141,15 @@ class LLMSynthesizer: logger.error("No Ollama models available") return None + # Handle thinking mode for Qwen3 models + final_prompt = prompt + if not self.enable_thinking or disable_thinking: + if not final_prompt.endswith(" "): + final_prompt += " " + payload = { "model": model_to_use, - "prompt": prompt, + "prompt": final_prompt, "stream": False, "options": { "temperature": temperature, @@ -150,6 +178,7 @@ class LLMSynthesizer: def synthesize_search_results(self, query: str, results: List[Any], project_path: Path) -> SynthesisResult: """Synthesize search results into a coherent summary.""" + self._ensure_initialized() if not self.is_available(): return SynthesisResult( summary="LLM synthesis unavailable (Ollama not running or no models)", diff --git a/claude_rag/query_expander.py b/claude_rag/query_expander.py index 2bf784d..c59b708 100644 --- a/claude_rag/query_expander.py +++ b/claude_rag/query_expander.py @@ -47,15 +47,43 @@ class QueryExpander: self.model = config.llm.expansion_model self.max_terms = config.llm.max_expansion_terms self.enabled = config.search.expand_queries + self._initialized = False # Cache for expanded queries to avoid repeated API calls self._cache = {} + def _ensure_initialized(self): + """Lazy initialization with LLM warmup.""" + if self._initialized: + return + + # Warm up LLM if enabled and available + if self.enabled: + try: + model = self._select_expansion_model() + if model: + requests.post( + f"{self.ollama_url}/api/generate", + json={ + "model": model, + "prompt": "testing, just say 'hi' ", + "stream": False, + "options": {"temperature": 0.1, "max_tokens": 5} + }, + timeout=5 + ) + except: + pass # Warmup failure is non-critical + + self._initialized = True + def expand_query(self, query: str) -> str: """Expand a search query with related terms.""" if not self.enabled or not query.strip(): return query + self._ensure_initialized() + # Check cache first if query in self._cache: return self._cache[query] @@ -207,6 +235,7 @@ Expanded query:""" if not self.enabled: return False + self._ensure_initialized() try: response = requests.get(f"{self.ollama_url}/api/tags", timeout=5) return response.status_code == 200 diff --git a/examples/config.yaml b/examples/config.yaml index 837c455..9fe59fd 100644 --- a/examples/config.yaml +++ b/examples/config.yaml @@ -51,4 +51,5 @@ llm: max_expansion_terms: 8 # Maximum terms to add to queries enable_synthesis: false # Enable synthesis by default synthesis_temperature: 0.3 # LLM temperature for analysis - cpu_optimized: true # Prefer ultra-lightweight models for CPU-only systems \ No newline at end of file + cpu_optimized: true # Prefer ultra-lightweight models for CPU-only systems + enable_thinking: true # Enable thinking mode for Qwen3 models (production: true, testing: false) \ No newline at end of file diff --git a/rag-mini.py b/rag-mini.py index bca838e..5fc8ddb 100644 --- a/rag-mini.py +++ b/rag-mini.py @@ -19,6 +19,7 @@ from claude_rag.indexer import ProjectIndexer from claude_rag.search import CodeSearcher from claude_rag.ollama_embeddings import OllamaEmbedder from claude_rag.llm_synthesizer import LLMSynthesizer +from claude_rag.explorer import CodeExplorer # Configure logging for user-friendly output logging.basicConfig( @@ -231,6 +232,77 @@ def status_check(project_path: Path): print(f"āŒ Status check failed: {e}") sys.exit(1) +def explore_interactive(project_path: Path): + """Interactive exploration mode with thinking and context memory.""" + try: + explorer = CodeExplorer(project_path) + + if not explorer.start_exploration_session(): + sys.exit(1) + + print("\nšŸ¤” Ask your first question about the codebase:") + + while True: + try: + # Get user input + question = input("\n> ").strip() + + # Handle exit commands + if question.lower() in ['quit', 'exit', 'q']: + print("\n" + explorer.end_session()) + break + + # Handle empty input + if not question: + print("Please enter a question or 'quit' to exit.") + continue + + # Special commands + if question.lower() in ['help', 'h']: + print(""" +🧠 EXPLORATION MODE HELP: + • Ask any question about the codebase + • I remember our conversation for follow-up questions + • Use 'why', 'how', 'explain' for detailed reasoning + • Type 'summary' to see session overview + • Type 'quit' or 'exit' to end session + +šŸ’” Example questions: + • "How does authentication work?" + • "Why is this function slow?" + • "Explain the database connection logic" + • "What are the security concerns here?" +""") + continue + + if question.lower() == 'summary': + print("\n" + explorer.get_session_summary()) + continue + + # Process the question + print("\nšŸ” Analyzing...") + response = explorer.explore_question(question) + + if response: + print(f"\n{response}") + else: + print("āŒ Sorry, I couldn't process that question. Please try again.") + + except KeyboardInterrupt: + print(f"\n\n{explorer.end_session()}") + break + except EOFError: + print(f"\n\n{explorer.end_session()}") + break + except Exception as e: + print(f"āŒ Error processing question: {e}") + print("Please try again or type 'quit' to exit.") + + except Exception as e: + print(f"āŒ Failed to start exploration mode: {e}") + print("Make sure the project is indexed first: rag-mini index ") + sys.exit(1) + def main(): """Main CLI interface.""" parser = argparse.ArgumentParser( @@ -241,11 +313,12 @@ Examples: rag-mini index /path/to/project # Index a project rag-mini search /path/to/project "query" # Search indexed project rag-mini search /path/to/project "query" -s # Search with LLM synthesis + rag-mini explore /path/to/project # Interactive exploration mode rag-mini status /path/to/project # Show status """ ) - parser.add_argument('command', choices=['index', 'search', 'status'], + parser.add_argument('command', choices=['index', 'search', 'explore', 'status'], help='Command to execute') parser.add_argument('project_path', type=Path, help='Path to project directory (REQUIRED)') @@ -283,6 +356,8 @@ Examples: print("āŒ Search query required") sys.exit(1) search_project(args.project_path, args.query, args.limit, args.synthesize) + elif args.command == 'explore': + explore_interactive(args.project_path) elif args.command == 'status': status_check(args.project_path)