From a7e3e6f474f26e7c656617bd9887264b2b554695 Mon Sep 17 00:00:00 2001
From: BobAi <brett@foxsoftwaresolutions.com.au>
Date: Tue, 12 Aug 2025 18:06:08 +1000
Subject: [PATCH] Add interactive exploration mode with thinking and context
 memory

- Create separate explore mode with thinking enabled for debugging/learning
- Add lazy loading with LLM warmup using 'testing, just say "hi" <no_think>'
- Implement context-aware conversation memory across questions
- Add interactive CLI with help, summary, and session management
- Enable Qwen3 thinking mode toggle for experimentation
- Support multi-turn conversations for better debugging workflow
- Clean separation between fast synthesis and deep exploration modes
---
 claude_rag/config.py          |   1 +
 claude_rag/explorer.py        | 367 ++++++++++++++++++++++++++++++++++
 claude_rag/llm_synthesizer.py |  39 +++-
 claude_rag/query_expander.py  |  29 +++
 examples/config.yaml          |   3 +-
 rag-mini.py                   |  77 ++++++-
 6 files changed, 509 insertions(+), 7 deletions(-)
 create mode 100644 claude_rag/explorer.py

diff --git a/claude_rag/config.py b/claude_rag/config.py
index 268512a..5ae7f66 100644
--- a/claude_rag/config.py
+++ b/claude_rag/config.py
@@ -78,6 +78,7 @@ class LLMConfig:
     max_expansion_terms: int = 8   # Maximum additional terms to add
     enable_synthesis: bool = False # Enable by default when --synthesize used
     synthesis_temperature: float = 0.3
+    enable_thinking: bool = True  # Enable thinking mode for Qwen3 models (production: True, testing: toggle)
 
 
 @dataclass
diff --git a/claude_rag/explorer.py b/claude_rag/explorer.py
new file mode 100644
index 0000000..ac28728
--- /dev/null
+++ b/claude_rag/explorer.py
@@ -0,0 +1,367 @@
+#!/usr/bin/env python3
+"""
+Interactive Code Explorer with Thinking Mode
+
+Provides multi-turn conversations with context memory for debugging and learning.
+Perfect for exploring codebases with detailed reasoning and follow-up questions.
+"""
+
+import json
+import logging
+import time
+from typing import List, Dict, Any, Optional
+from pathlib import Path
+from dataclasses import dataclass
+
+try:
+    from .llm_synthesizer import LLMSynthesizer, SynthesisResult
+    from .search import CodeSearcher
+    from .config import RAGConfig
+except ImportError:
+    # For direct testing
+    from llm_synthesizer import LLMSynthesizer, SynthesisResult
+    from search import CodeSearcher
+    from config import RAGConfig
+
+logger = logging.getLogger(__name__)
+
+@dataclass
+class ExplorationSession:
+    """Track an exploration session with context history."""
+    project_path: Path
+    conversation_history: List[Dict[str, Any]]
+    session_id: str
+    started_at: float
+    
+    def add_exchange(self, question: str, search_results: List[Any], response: SynthesisResult):
+        """Add a question/response exchange to the conversation history."""
+        self.conversation_history.append({
+            "timestamp": time.time(),
+            "question": question,
+            "search_results_count": len(search_results),
+            "response": {
+                "summary": response.summary,
+                "key_points": response.key_points,
+                "code_examples": response.code_examples,
+                "suggested_actions": response.suggested_actions,
+                "confidence": response.confidence
+            }
+        })
+
+class CodeExplorer:
+    """Interactive code exploration with thinking and context memory."""
+    
+    def __init__(self, project_path: Path, config: RAGConfig = None):
+        self.project_path = project_path
+        self.config = config or RAGConfig()
+        
+        # Initialize components with thinking enabled
+        self.searcher = CodeSearcher(project_path)
+        self.synthesizer = LLMSynthesizer(
+            ollama_url=f"http://{self.config.llm.ollama_host}",
+            model=self.config.llm.synthesis_model,
+            enable_thinking=True  # Always enable thinking in explore mode
+        )
+        
+        # Session management
+        self.current_session: Optional[ExplorationSession] = None
+        
+    def start_exploration_session(self) -> bool:
+        """Start a new exploration session."""
+        if not self.synthesizer.is_available():
+            print("❌ LLM service unavailable. Please check Ollama is running.")
+            return False
+            
+        session_id = f"explore_{int(time.time())}"
+        self.current_session = ExplorationSession(
+            project_path=self.project_path,
+            conversation_history=[],
+            session_id=session_id,
+            started_at=time.time()
+        )
+        
+        print("🧠 EXPLORATION MODE STARTED")
+        print("=" * 50)
+        print(f"Project: {self.project_path.name}")
+        print(f"Session: {session_id}")
+        print("\n🎯 This mode uses thinking and remembers context.")
+        print("   Perfect for debugging, learning, and deep exploration.")
+        print("\n💡 Tips:")
+        print("   • Ask follow-up questions - I'll remember our conversation")
+        print("   • Use 'why', 'how', 'explain' for detailed reasoning")
+        print("   • Type 'quit' or 'exit' to end session")
+        print("\n" + "=" * 50)
+        
+        return True
+    
+    def explore_question(self, question: str, context_limit: int = 10) -> Optional[str]:
+        """Explore a question with full thinking and context."""
+        if not self.current_session:
+            return "❌ No exploration session active. Start one first."
+            
+        # Search for relevant information
+        search_start = time.time()
+        results = self.searcher.search(
+            question, 
+            limit=context_limit,
+            include_context=True,
+            semantic_weight=0.7,
+            bm25_weight=0.3
+        )
+        search_time = time.time() - search_start
+        
+        # Build enhanced prompt with conversation context
+        synthesis_prompt = self._build_contextual_prompt(question, results)
+        
+        # Get thinking-enabled analysis
+        synthesis_start = time.time()
+        synthesis = self._synthesize_with_context(synthesis_prompt, results)
+        synthesis_time = time.time() - synthesis_start
+        
+        # Add to conversation history
+        self.current_session.add_exchange(question, results, synthesis)
+        
+        # Format response with exploration context
+        response = self._format_exploration_response(
+            question, synthesis, len(results), search_time, synthesis_time
+        )
+        
+        return response
+    
+    def _build_contextual_prompt(self, question: str, results: List[Any]) -> str:
+        """Build a prompt that includes conversation context."""
+        # Get recent conversation context (last 3 exchanges)
+        context_summary = ""
+        if self.current_session.conversation_history:
+            recent_exchanges = self.current_session.conversation_history[-3:]
+            context_parts = []
+            
+            for i, exchange in enumerate(recent_exchanges, 1):
+                prev_q = exchange["question"]
+                prev_summary = exchange["response"]["summary"]
+                context_parts.append(f"Previous Q{i}: {prev_q}")
+                context_parts.append(f"Previous A{i}: {prev_summary}")
+            
+            context_summary = "\n".join(context_parts)
+        
+        # Build search results context
+        results_context = []
+        for i, result in enumerate(results[:8], 1):
+            file_path = result.file_path if hasattr(result, 'file_path') else 'unknown'
+            content = result.content if hasattr(result, 'content') else str(result)
+            score = result.score if hasattr(result, 'score') else 0.0
+            
+            results_context.append(f"""
+Result {i} (Score: {score:.3f}):
+File: {file_path}
+Content: {content[:800]}{'...' if len(content) > 800 else ''}
+""")
+        
+        results_text = "\n".join(results_context)
+        
+        # Create comprehensive exploration prompt
+        prompt = f"""You are a senior software engineer helping explore and debug code. You have access to thinking mode and conversation context.
+
+PROJECT: {self.project_path.name}
+
+CONVERSATION CONTEXT:
+{context_summary}
+
+CURRENT QUESTION: "{question}"
+
+SEARCH RESULTS:
+{results_text}
+
+Please provide a detailed analysis in JSON format. Think through the problem carefully and consider the conversation context:
+
+{{
+    "summary": "2-3 sentences explaining what you found and how it relates to the question",
+    "key_points": [
+        "Important insight 1 (reference specific code/files)",
+        "Important insight 2 (explain relationships)", 
+        "Important insight 3 (consider conversation context)"
+    ],
+    "code_examples": [
+        "Relevant code snippet or pattern with explanation",
+        "Another important code example with context"
+    ],
+    "suggested_actions": [
+        "Specific next step the developer should take",
+        "Follow-up investigation or debugging approach",
+        "Potential improvements or fixes"
+    ],
+    "confidence": 0.85
+}}
+
+Focus on:
+- Deep technical analysis with reasoning
+- How this connects to previous questions in our conversation
+- Practical debugging/learning insights
+- Specific code references and explanations
+- Clear next steps for the developer
+
+Think carefully about the relationships between code components and how they answer the question in context."""
+
+        return prompt
+    
+    def _synthesize_with_context(self, prompt: str, results: List[Any]) -> SynthesisResult:
+        """Synthesize results with full context and thinking."""
+        try:
+            # Use thinking-enabled synthesis with lower temperature for exploration
+            response = self.synthesizer._call_ollama(prompt, temperature=0.2)
+            
+            if not response:
+                return SynthesisResult(
+                    summary="Analysis unavailable (LLM service error)",
+                    key_points=[],
+                    code_examples=[],
+                    suggested_actions=["Check LLM service status"],
+                    confidence=0.0
+                )
+            
+            # Parse the structured response
+            try:
+                # Extract JSON from response
+                start_idx = response.find('{')
+                end_idx = response.rfind('}') + 1
+                if start_idx >= 0 and end_idx > start_idx:
+                    json_str = response[start_idx:end_idx]
+                    data = json.loads(json_str)
+                    
+                    return SynthesisResult(
+                        summary=data.get('summary', 'Analysis completed'),
+                        key_points=data.get('key_points', []),
+                        code_examples=data.get('code_examples', []),
+                        suggested_actions=data.get('suggested_actions', []),
+                        confidence=float(data.get('confidence', 0.7))
+                    )
+                else:
+                    # Fallback: use raw response as summary
+                    return SynthesisResult(
+                        summary=response[:400] + '...' if len(response) > 400 else response,
+                        key_points=[],
+                        code_examples=[],
+                        suggested_actions=[],
+                        confidence=0.5
+                    )
+                    
+            except json.JSONDecodeError:
+                return SynthesisResult(
+                    summary="Analysis completed but format parsing failed",
+                    key_points=[],
+                    code_examples=[],
+                    suggested_actions=["Try rephrasing your question"],
+                    confidence=0.3
+                )
+                
+        except Exception as e:
+            logger.error(f"Context synthesis failed: {e}")
+            return SynthesisResult(
+                summary="Analysis failed due to service error",
+                key_points=[],
+                code_examples=[],
+                suggested_actions=["Check system status and try again"],
+                confidence=0.0
+            )
+    
+    def _format_exploration_response(self, question: str, synthesis: SynthesisResult, 
+                                   result_count: int, search_time: float, synthesis_time: float) -> str:
+        """Format exploration response with context indicators."""
+        
+        output = []
+        
+        # Header with session context
+        session_duration = time.time() - self.current_session.started_at
+        exchange_count = len(self.current_session.conversation_history)
+        
+        output.append(f"🧠 EXPLORATION ANALYSIS (Question #{exchange_count})")
+        output.append(f"Session: {session_duration/60:.1f}m | Results: {result_count} | "
+                     f"Time: {search_time+synthesis_time:.1f}s")
+        output.append("=" * 60)
+        output.append("")
+        
+        # Main analysis
+        output.append(f"📝 Analysis:")
+        output.append(f"   {synthesis.summary}")
+        output.append("")
+        
+        if synthesis.key_points:
+            output.append("🔍 Key Insights:")
+            for point in synthesis.key_points:
+                output.append(f"   • {point}")
+            output.append("")
+        
+        if synthesis.code_examples:
+            output.append("💡 Code Examples:")
+            for example in synthesis.code_examples:
+                output.append(f"   {example}")
+            output.append("")
+        
+        if synthesis.suggested_actions:
+            output.append("🎯 Next Steps:")
+            for action in synthesis.suggested_actions:
+                output.append(f"   • {action}")
+            output.append("")
+        
+        # Confidence and context indicator
+        confidence_emoji = "🟢" if synthesis.confidence > 0.7 else "🟡" if synthesis.confidence > 0.4 else "🔴"
+        context_indicator = f" | Context: {exchange_count-1} previous questions" if exchange_count > 1 else ""
+        output.append(f"{confidence_emoji} Confidence: {synthesis.confidence:.1%}{context_indicator}")
+        
+        return "\n".join(output)
+    
+    def get_session_summary(self) -> str:
+        """Get a summary of the current exploration session."""
+        if not self.current_session:
+            return "No active exploration session."
+            
+        duration = time.time() - self.current_session.started_at
+        exchange_count = len(self.current_session.conversation_history)
+        
+        summary = [
+            f"🧠 EXPLORATION SESSION SUMMARY",
+            f"=" * 40,
+            f"Project: {self.project_path.name}",
+            f"Session ID: {self.current_session.session_id}",
+            f"Duration: {duration/60:.1f} minutes",
+            f"Questions explored: {exchange_count}",
+            f"",
+        ]
+        
+        if exchange_count > 0:
+            summary.append("📋 Topics explored:")
+            for i, exchange in enumerate(self.current_session.conversation_history, 1):
+                question = exchange["question"][:50] + "..." if len(exchange["question"]) > 50 else exchange["question"]
+                confidence = exchange["response"]["confidence"]
+                summary.append(f"   {i}. {question} (confidence: {confidence:.1%})")
+        
+        return "\n".join(summary)
+    
+    def end_session(self) -> str:
+        """End the current exploration session."""
+        if not self.current_session:
+            return "No active session to end."
+            
+        summary = self.get_session_summary()
+        self.current_session = None
+        
+        return summary + "\n\n✅ Exploration session ended."
+
+# Quick test function
+def test_explorer():
+    """Test the code explorer."""
+    explorer = CodeExplorer(Path("."))
+    
+    if not explorer.start_exploration_session():
+        print("❌ Could not start exploration session")
+        return
+        
+    # Test question
+    response = explorer.explore_question("How does authentication work in this codebase?")
+    if response:
+        print(response)
+        
+    print("\n" + explorer.end_session())
+
+if __name__ == "__main__":
+    test_explorer()
\ No newline at end of file
diff --git a/claude_rag/llm_synthesizer.py b/claude_rag/llm_synthesizer.py
index 741a91a..0f4ca36 100644
--- a/claude_rag/llm_synthesizer.py
+++ b/claude_rag/llm_synthesizer.py
@@ -27,10 +27,12 @@ class SynthesisResult:
 class LLMSynthesizer:
     """Synthesizes RAG search results using Ollama LLMs."""
     
-    def __init__(self, ollama_url: str = "http://localhost:11434", model: str = None):
+    def __init__(self, ollama_url: str = "http://localhost:11434", model: str = None, enable_thinking: bool = True):
         self.ollama_url = ollama_url.rstrip('/')
-        self.available_models = self._get_available_models()
-        self.model = model or self._select_best_model()
+        self.available_models = []
+        self.model = model
+        self.enable_thinking = enable_thinking
+        self._initialized = False
         
     def _get_available_models(self) -> List[str]:
         """Get list of available Ollama models."""
@@ -102,11 +104,31 @@ class LLMSynthesizer:
         logger.warning(f"Using fallback model: {fallback}")
         return fallback
     
+    def _ensure_initialized(self):
+        """Lazy initialization with LLM warmup."""
+        if self._initialized:
+            return
+            
+        # Load available models
+        self.available_models = self._get_available_models()
+        if not self.model:
+            self.model = self._select_best_model()
+            
+        # Warm up LLM with minimal request (ignores response)
+        if self.available_models:
+            try:
+                self._call_ollama("testing, just say 'hi'", temperature=0.1, disable_thinking=True)
+            except:
+                pass  # Warmup failure is non-critical
+                
+        self._initialized = True
+    
     def is_available(self) -> bool:
         """Check if Ollama is available and has models."""
+        self._ensure_initialized()
         return len(self.available_models) > 0
     
-    def _call_ollama(self, prompt: str, temperature: float = 0.3) -> Optional[str]:
+    def _call_ollama(self, prompt: str, temperature: float = 0.3, disable_thinking: bool = False) -> Optional[str]:
         """Make a call to Ollama API."""
         try:
             # Use the best available model
@@ -119,9 +141,15 @@ class LLMSynthesizer:
                     logger.error("No Ollama models available")
                     return None
                     
+            # Handle thinking mode for Qwen3 models
+            final_prompt = prompt
+            if not self.enable_thinking or disable_thinking:
+                if not final_prompt.endswith(" <no_think>"):
+                    final_prompt += " <no_think>"
+            
             payload = {
                 "model": model_to_use,
-                "prompt": prompt,
+                "prompt": final_prompt,
                 "stream": False,
                 "options": {
                     "temperature": temperature,
@@ -150,6 +178,7 @@ class LLMSynthesizer:
     def synthesize_search_results(self, query: str, results: List[Any], project_path: Path) -> SynthesisResult:
         """Synthesize search results into a coherent summary."""
         
+        self._ensure_initialized()
         if not self.is_available():
             return SynthesisResult(
                 summary="LLM synthesis unavailable (Ollama not running or no models)",
diff --git a/claude_rag/query_expander.py b/claude_rag/query_expander.py
index 2bf784d..c59b708 100644
--- a/claude_rag/query_expander.py
+++ b/claude_rag/query_expander.py
@@ -47,15 +47,43 @@ class QueryExpander:
         self.model = config.llm.expansion_model
         self.max_terms = config.llm.max_expansion_terms
         self.enabled = config.search.expand_queries
+        self._initialized = False
         
         # Cache for expanded queries to avoid repeated API calls
         self._cache = {}
     
+    def _ensure_initialized(self):
+        """Lazy initialization with LLM warmup."""
+        if self._initialized:
+            return
+            
+        # Warm up LLM if enabled and available
+        if self.enabled:
+            try:
+                model = self._select_expansion_model()
+                if model:
+                    requests.post(
+                        f"{self.ollama_url}/api/generate",
+                        json={
+                            "model": model,
+                            "prompt": "testing, just say 'hi' <no_think>",
+                            "stream": False,
+                            "options": {"temperature": 0.1, "max_tokens": 5}
+                        },
+                        timeout=5
+                    )
+            except:
+                pass  # Warmup failure is non-critical
+                
+        self._initialized = True
+    
     def expand_query(self, query: str) -> str:
         """Expand a search query with related terms."""
         if not self.enabled or not query.strip():
             return query
             
+        self._ensure_initialized()
+            
         # Check cache first
         if query in self._cache:
             return self._cache[query]
@@ -207,6 +235,7 @@ Expanded query:"""
         if not self.enabled:
             return False
             
+        self._ensure_initialized()
         try:
             response = requests.get(f"{self.ollama_url}/api/tags", timeout=5)
             return response.status_code == 200
diff --git a/examples/config.yaml b/examples/config.yaml
index 837c455..9fe59fd 100644
--- a/examples/config.yaml
+++ b/examples/config.yaml
@@ -51,4 +51,5 @@ llm:
   max_expansion_terms: 8        # Maximum terms to add to queries
   enable_synthesis: false       # Enable synthesis by default
   synthesis_temperature: 0.3      # LLM temperature for analysis
-  cpu_optimized: true      # Prefer ultra-lightweight models for CPU-only systems
\ No newline at end of file
+  cpu_optimized: true      # Prefer ultra-lightweight models for CPU-only systems
+  enable_thinking: true    # Enable thinking mode for Qwen3 models (production: true, testing: false)
\ No newline at end of file
diff --git a/rag-mini.py b/rag-mini.py
index bca838e..5fc8ddb 100644
--- a/rag-mini.py
+++ b/rag-mini.py
@@ -19,6 +19,7 @@ from claude_rag.indexer import ProjectIndexer
 from claude_rag.search import CodeSearcher
 from claude_rag.ollama_embeddings import OllamaEmbedder
 from claude_rag.llm_synthesizer import LLMSynthesizer
+from claude_rag.explorer import CodeExplorer
 
 # Configure logging for user-friendly output
 logging.basicConfig(
@@ -231,6 +232,77 @@ def status_check(project_path: Path):
         print(f"❌ Status check failed: {e}")
         sys.exit(1)
 
+def explore_interactive(project_path: Path):
+    """Interactive exploration mode with thinking and context memory."""
+    try:
+        explorer = CodeExplorer(project_path)
+        
+        if not explorer.start_exploration_session():
+            sys.exit(1)
+        
+        print("\n🤔 Ask your first question about the codebase:")
+        
+        while True:
+            try:
+                # Get user input
+                question = input("\n> ").strip()
+                
+                # Handle exit commands
+                if question.lower() in ['quit', 'exit', 'q']:
+                    print("\n" + explorer.end_session())
+                    break
+                
+                # Handle empty input
+                if not question:
+                    print("Please enter a question or 'quit' to exit.")
+                    continue
+                
+                # Special commands
+                if question.lower() in ['help', 'h']:
+                    print("""
+🧠 EXPLORATION MODE HELP:
+  • Ask any question about the codebase
+  • I remember our conversation for follow-up questions
+  • Use 'why', 'how', 'explain' for detailed reasoning
+  • Type 'summary' to see session overview
+  • Type 'quit' or 'exit' to end session
+  
+💡 Example questions:
+  • "How does authentication work?"
+  • "Why is this function slow?"
+  • "Explain the database connection logic"
+  • "What are the security concerns here?"
+""")
+                    continue
+                
+                if question.lower() == 'summary':
+                    print("\n" + explorer.get_session_summary())
+                    continue
+                
+                # Process the question
+                print("\n🔍 Analyzing...")
+                response = explorer.explore_question(question)
+                
+                if response:
+                    print(f"\n{response}")
+                else:
+                    print("❌ Sorry, I couldn't process that question. Please try again.")
+                
+            except KeyboardInterrupt:
+                print(f"\n\n{explorer.end_session()}")
+                break
+            except EOFError:
+                print(f"\n\n{explorer.end_session()}")
+                break
+            except Exception as e:
+                print(f"❌ Error processing question: {e}")
+                print("Please try again or type 'quit' to exit.")
+        
+    except Exception as e:
+        print(f"❌ Failed to start exploration mode: {e}")
+        print("Make sure the project is indexed first: rag-mini index <project>")
+        sys.exit(1)
+
 def main():
     """Main CLI interface."""
     parser = argparse.ArgumentParser(
@@ -241,11 +313,12 @@ Examples:
   rag-mini index /path/to/project              # Index a project
   rag-mini search /path/to/project "query"     # Search indexed project  
   rag-mini search /path/to/project "query" -s  # Search with LLM synthesis
+  rag-mini explore /path/to/project            # Interactive exploration mode
   rag-mini status /path/to/project             # Show status
         """
     )
     
-    parser.add_argument('command', choices=['index', 'search', 'status'],
+    parser.add_argument('command', choices=['index', 'search', 'explore', 'status'],
                        help='Command to execute')
     parser.add_argument('project_path', type=Path,
                        help='Path to project directory (REQUIRED)')
@@ -283,6 +356,8 @@ Examples:
             print("❌ Search query required")
             sys.exit(1)
         search_project(args.project_path, args.query, args.limit, args.synthesize)
+    elif args.command == 'explore':
+        explore_interactive(args.project_path)
     elif args.command == 'status':
         status_check(args.project_path)