Integrate LLM synthesis across all interfaces and update demo

🔧 Integration Updates - Added --synthesize flag to main rag-mini CLI - Updated README with synthesis examples and 10 result default - Enhanced demo script with 8 complete results (was cutting off at 5) - Updated rag-tui default from 5 to 10 results - Updated rag-mini-enhanced script defaults 📈 User Experience Improvements - All components now consistently default to 10 results - Demo shows complete 8-result workflow with multi-line previews - Documentation reflects new AI analysis capabilities - Seamless integration preserves existing workflows Users get more comprehensive results by default and can optionally add intelligent AI analysis with a simple --synthesize flag!
Add LLM synthesis feature with smart model selection and increase default results to 10
2025-08-12 17:13:21 +10:00 · 2025-08-12 17:12:51 +10:00
6 changed files with 375 additions and 11 deletions
--- a/README.md
+++ b/README.md
@ -39,7 +39,8 @@ FSS-Mini-RAG is a distilled, lightweight implementation of a production-quality
 ./rag-tui                    # Friendly interface for beginners
 # OR
 ./rag-mini index ~/my-project     # Direct CLI for developers
-./rag-mini search ~/my-project "authentication logic"
+./rag-mini search ~/my-project "authentication logic"      # 10 results
+./rag-mini search ~/my-project "error handling" --synthesize  # AI analysis
 ```

 That's it. No external dependencies, no configuration required, no PhD in computer science needed.
--- a/claude_rag/llm_synthesizer.py
+++ b/claude_rag/llm_synthesizer.py
@ -0,0 +1,326 @@
+#!/usr/bin/env python3
+"""
+LLM Synthesizer for RAG Results
+
+Provides intelligent synthesis of search results using Ollama LLMs.
+Takes raw search results and generates coherent, contextual summaries.
+"""
+
+import json
+import logging
+from typing import List, Dict, Any, Optional
+from dataclasses import dataclass
+import requests
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+@dataclass
+class SynthesisResult:
+    """Result of LLM synthesis."""
+    summary: str
+    key_points: List[str]
+    code_examples: List[str]
+    suggested_actions: List[str]
+    confidence: float
+
+class LLMSynthesizer:
+    """Synthesizes RAG search results using Ollama LLMs."""
+    
+    def __init__(self, ollama_url: str = "http://localhost:11434", model: str = None):
+        self.ollama_url = ollama_url.rstrip('/')
+        self.available_models = self._get_available_models()
+        self.model = model or self._select_best_model()
+        
+    def _get_available_models(self) -> List[str]:
+        """Get list of available Ollama models."""
+        try:
+            response = requests.get(f"{self.ollama_url}/api/tags", timeout=5)
+            if response.status_code == 200:
+                data = response.json()
+                return [model['name'] for model in data.get('models', [])]
+        except Exception as e:
+            logger.warning(f"Could not fetch Ollama models: {e}")
+        return []
+    
+    def _select_best_model(self) -> str:
+        """Select the best available model based on modern performance rankings."""
+        if not self.available_models:
+            return "qwen2.5:1.5b"  # Fallback preference
+        
+        # Modern model preference ranking (best to acceptable)
+        # Prioritize: Qwen3 > Qwen2.5 > Mistral > Llama3.2 > Others
+        model_rankings = [
+            # Qwen3 models (newest, most efficient) - prefer standard versions
+            "qwen3:1.7b", "qwen3:0.6b", "qwen3:4b", "qwen3:8b",
+            
+            # Qwen2.5 models (excellent performance/size ratio)
+            "qwen2.5-coder:1.5b", "qwen2.5:1.5b", "qwen2.5:3b", "qwen2.5-coder:3b",
+            "qwen2.5:7b", "qwen2.5-coder:7b",
+            
+            # Qwen2 models (older but still good)
+            "qwen2:1.5b", "qwen2:3b", "qwen2:7b",
+            
+            # Mistral models (good quality, reasonable size)
+            "mistral:7b", "mistral-nemo", "mistral-small",
+            
+            # Llama3.2 models (decent but larger)
+            "llama3.2:1b", "llama3.2:3b", "llama3.2", "llama3.2:8b",
+            
+            # Fallback to other Llama models
+            "llama3.1:8b", "llama3:8b", "llama3", 
+            
+            # Other decent models
+            "gemma2:2b", "gemma2:9b", "phi3:3.8b", "phi3.5",
+        ]
+        
+        # Find first available model from our ranked list
+        for preferred_model in model_rankings:
+            for available_model in self.available_models:
+                # Match model names (handle version tags)
+                available_base = available_model.split(':')[0].lower()
+                preferred_base = preferred_model.split(':')[0].lower()
+                
+                if preferred_base in available_base or available_base in preferred_base:
+                    # Additional size filtering - prefer smaller models
+                    if any(size in available_model.lower() for size in ['1b', '1.5b', '2b', '3b']):
+                        logger.info(f"Selected efficient model: {available_model}")
+                        return available_model
+                    elif any(size in available_model.lower() for size in ['7b', '8b']):
+                        # Only use larger models if no smaller ones available
+                        logger.info(f"Selected larger model: {available_model}")
+                        return available_model
+                    elif ':' not in available_model:
+                        # Handle models without explicit size tags
+                        return available_model
+        
+        # If no preferred models found, use first available
+        fallback = self.available_models[0]
+        logger.warning(f"Using fallback model: {fallback}")
+        return fallback
+    
+    def is_available(self) -> bool:
+        """Check if Ollama is available and has models."""
+        return len(self.available_models) > 0
+    
+    def _call_ollama(self, prompt: str, temperature: float = 0.3) -> Optional[str]:
+        """Make a call to Ollama API."""
+        try:
+            # Use the best available model
+            model_to_use = self.model
+            if self.model not in self.available_models:
+                # Fallback to first available model
+                if self.available_models:
+                    model_to_use = self.available_models[0]
+                else:
+                    logger.error("No Ollama models available")
+                    return None
+                    
+            payload = {
+                "model": model_to_use,
+                "prompt": prompt,
+                "stream": False,
+                "options": {
+                    "temperature": temperature,
+                    "top_p": 0.9,
+                    "top_k": 40
+                }
+            }
+            
+            response = requests.post(
+                f"{self.ollama_url}/api/generate",
+                json=payload,
+                timeout=30
+            )
+            
+            if response.status_code == 200:
+                result = response.json()
+                return result.get('response', '').strip()
+            else:
+                logger.error(f"Ollama API error: {response.status_code}")
+                return None
+                
+        except Exception as e:
+            logger.error(f"Ollama call failed: {e}")
+            return None
+    
+    def synthesize_search_results(self, query: str, results: List[Any], project_path: Path) -> SynthesisResult:
+        """Synthesize search results into a coherent summary."""
+        
+        if not self.is_available():
+            return SynthesisResult(
+                summary="LLM synthesis unavailable (Ollama not running or no models)",
+                key_points=[],
+                code_examples=[],
+                suggested_actions=["Install and run Ollama with a model"],
+                confidence=0.0
+            )
+        
+        # Prepare context from search results
+        context_parts = []
+        for i, result in enumerate(results[:8], 1):  # Limit to top 8 results
+            file_path = result.file_path if hasattr(result, 'file_path') else 'unknown'
+            content = result.content if hasattr(result, 'content') else str(result)
+            score = result.score if hasattr(result, 'score') else 0.0
+            
+            context_parts.append(f"""
+Result {i} (Score: {score:.3f}):
+File: {file_path}
+Content: {content[:500]}{'...' if len(content) > 500 else ''}
+""")
+        
+        context = "\n".join(context_parts)
+        
+        # Create synthesis prompt
+        prompt = f"""You are a senior software engineer analyzing code search results. Your task is to synthesize the search results into a helpful, actionable summary.
+
+SEARCH QUERY: "{query}"
+PROJECT: {project_path.name}
+
+SEARCH RESULTS:
+{context}
+
+Please provide a synthesis in the following JSON format:
+{{
+    "summary": "A 2-3 sentence overview of what the search results show",
+    "key_points": [
+        "Important finding 1",
+        "Important finding 2", 
+        "Important finding 3"
+    ],
+    "code_examples": [
+        "Relevant code snippet or pattern from the results",
+        "Another important code example"
+    ],
+    "suggested_actions": [
+        "What the developer should do next",
+        "Additional recommendations"
+    ],
+    "confidence": 0.85
+}}
+
+Focus on:
+- What the code does and how it works
+- Patterns and relationships between the results
+- Practical next steps for the developer
+- Code quality observations
+
+Respond with ONLY the JSON, no other text."""
+
+        # Get LLM response
+        response = self._call_ollama(prompt, temperature=0.2)
+        
+        if not response:
+            return SynthesisResult(
+                summary="LLM synthesis failed (API error)",
+                key_points=[],
+                code_examples=[],
+                suggested_actions=["Check Ollama status and try again"],
+                confidence=0.0
+            )
+        
+        # Parse JSON response
+        try:
+            # Extract JSON from response (in case there's extra text)
+            start_idx = response.find('{')
+            end_idx = response.rfind('}') + 1
+            if start_idx >= 0 and end_idx > start_idx:
+                json_str = response[start_idx:end_idx]
+                data = json.loads(json_str)
+                
+                return SynthesisResult(
+                    summary=data.get('summary', 'No summary generated'),
+                    key_points=data.get('key_points', []),
+                    code_examples=data.get('code_examples', []),
+                    suggested_actions=data.get('suggested_actions', []),
+                    confidence=float(data.get('confidence', 0.5))
+                )
+            else:
+                # Fallback: use the raw response as summary
+                return SynthesisResult(
+                    summary=response[:300] + '...' if len(response) > 300 else response,
+                    key_points=[],
+                    code_examples=[],
+                    suggested_actions=[],
+                    confidence=0.3
+                )
+                
+        except Exception as e:
+            logger.error(f"Failed to parse LLM response: {e}")
+            return SynthesisResult(
+                summary="LLM synthesis failed (JSON parsing error)",
+                key_points=[],
+                code_examples=[],
+                suggested_actions=["Try the search again or check LLM output"],
+                confidence=0.0
+            )
+    
+    def format_synthesis_output(self, synthesis: SynthesisResult, query: str) -> str:
+        """Format synthesis result for display."""
+        
+        output = []
+        output.append("🧠 LLM SYNTHESIS")
+        output.append("=" * 50)
+        output.append("")
+        
+        output.append(f"📝 Summary:")
+        output.append(f"   {synthesis.summary}")
+        output.append("")
+        
+        if synthesis.key_points:
+            output.append("🔍 Key Findings:")
+            for point in synthesis.key_points:
+                output.append(f"   • {point}")
+            output.append("")
+        
+        if synthesis.code_examples:
+            output.append("💡 Code Patterns:")
+            for example in synthesis.code_examples:
+                output.append(f"   {example}")
+            output.append("")
+        
+        if synthesis.suggested_actions:
+            output.append("🎯 Suggested Actions:")
+            for action in synthesis.suggested_actions:
+                output.append(f"   • {action}")
+            output.append("")
+        
+        confidence_emoji = "🟢" if synthesis.confidence > 0.7 else "🟡" if synthesis.confidence > 0.4 else "🔴"
+        output.append(f"{confidence_emoji} Confidence: {synthesis.confidence:.1%}")
+        output.append("")
+        
+        return "\n".join(output)
+
+# Quick test function
+def test_synthesizer():
+    """Test the synthesizer with sample data."""
+    from dataclasses import dataclass
+    
+    @dataclass 
+    class MockResult:
+        file_path: str
+        content: str
+        score: float
+    
+    synthesizer = LLMSynthesizer()
+    
+    if not synthesizer.is_available():
+        print("❌ Ollama not available for testing")
+        return
+    
+    # Mock search results
+    results = [
+        MockResult("auth.py", "def authenticate_user(username, password):\n    return verify_credentials(username, password)", 0.95),
+        MockResult("models.py", "class User:\n    def login(self):\n        return authenticate_user(self.username, self.password)", 0.87)
+    ]
+    
+    synthesis = synthesizer.synthesize_search_results(
+        "user authentication", 
+        results, 
+        Path("/test/project")
+    )
+    
+    print(synthesizer.format_synthesis_output(synthesis, "user authentication"))
+
+if __name__ == "__main__":
+    test_synthesizer()
--- a/create_demo_script.py
+++ b/create_demo_script.py
@ -156,6 +156,24 @@ class DemoSimulator:
                "function": "User.authenticate()",
                "preview": "User model authentication method.\nQueries database for user credentials\nand handles account status checks.",
                "score": "0.82"
+            },
+            {
+                "file": "auth/tokens.py",
+                "function": "generate_jwt_token()",
+                "preview": "Generate JWT authentication tokens.\nIncludes expiration, claims, and signature.\nSupports refresh and access token types.",
+                "score": "0.79"
+            },
+            {
+                "file": "utils/security.py",
+                "function": "hash_password()",
+                "preview": "Secure password hashing utility.\nUses bcrypt with configurable rounds.\nProvides salt generation and validation.",
+                "score": "0.76"
+            },
+            {
+                "file": "config/auth_settings.py",
+                "function": "load_auth_config()",
+                "preview": "Load authentication configuration.\nHandles JWT secrets, token expiration,\nand authentication provider settings.",
+                "score": "0.73"
            }
        ]
        
--- a/2
+++ b/2
@ -60,7 +60,7 @@ similar_search() {
    echo "🔄 Finding similar patterns to: '$query'"
    # Use semantic search with pattern-focused terms
    pattern_query="similar to $query OR like $query OR pattern $query"
-    "$PYTHON" "$SCRIPT_DIR/rag-mini.py" search "$project_path" "$pattern_query" --limit 5
+    "$PYTHON" "$SCRIPT_DIR/rag-mini.py" search "$project_path" "$pattern_query" --limit 10
 }

 # Smart indexing with optimizations
--- a/rag-mini.py
+++ b/rag-mini.py
@ -18,6 +18,7 @@ sys.path.insert(0, str(Path(__file__).parent))
 from claude_rag.indexer import ProjectIndexer
 from claude_rag.search import CodeSearcher
 from claude_rag.ollama_embeddings import OllamaEmbedder
+from claude_rag.llm_synthesizer import LLMSynthesizer

 # Configure logging for user-friendly output
 logging.basicConfig(
@ -71,7 +72,7 @@ def index_project(project_path: Path, force: bool = False):
        print(f"   Use --verbose for details")
        sys.exit(1)

-def search_project(project_path: Path, query: str, limit: int = 5):
+def search_project(project_path: Path, query: str, limit: int = 10, synthesize: bool = False):
    """Search a project directory."""
    try:
        # Check if indexed first
@ -123,6 +124,21 @@ def search_project(project_path: Path, query: str, limit: int = 5):
            
            print()
        
+        # LLM Synthesis if requested
+        if synthesize:
+            print("🧠 Generating LLM synthesis...")
+            synthesizer = LLMSynthesizer()
+            
+            if synthesizer.is_available():
+                synthesis = synthesizer.synthesize_search_results(query, results, project_path)
+                print()
+                print(synthesizer.format_synthesis_output(synthesis, query))
+            else:
+                print("❌ LLM synthesis unavailable")
+                print("   • Ensure Ollama is running: ollama serve")
+                print("   • Install a model: ollama pull llama3.2")
+                print("   • Check connection to http://localhost:11434")
+        
        # Save last search for potential enhancements
        try:
            (rag_dir / 'last_search').write_text(query)
@ -224,6 +240,7 @@ def main():
 Examples:
  rag-mini index /path/to/project              # Index a project
  rag-mini search /path/to/project "query"     # Search indexed project  
+  rag-mini search /path/to/project "query" -s  # Search with LLM synthesis
  rag-mini status /path/to/project             # Show status
        """
    )
@ -236,10 +253,12 @@ Examples:
                       help='Search query (for search command)')
    parser.add_argument('--force', action='store_true',
                       help='Force reindex all files')
-    parser.add_argument('--limit', type=int, default=5,
+    parser.add_argument('--limit', type=int, default=10,
                       help='Maximum number of search results')
    parser.add_argument('--verbose', '-v', action='store_true',
                       help='Enable verbose logging')
+    parser.add_argument('--synthesize', '-s', action='store_true',
+                       help='Generate LLM synthesis of search results (requires Ollama)')
    
    args = parser.parse_args()
    
@ -263,7 +282,7 @@ Examples:
        if not args.query:
            print("❌ Search query required")
            sys.exit(1)
-        search_project(args.project_path, args.query, args.limit)
+        search_project(args.project_path, args.query, args.limit, args.synthesize)
    elif args.command == 'status':
        status_check(args.project_path)

--- a/rag-tui.py
+++ b/rag-tui.py
@ -285,14 +285,14 @@ class SimpleTUI:
        
        # Get result limit
        try:
-            limit = int(self.get_input("Number of results", "5"))
+            limit = int(self.get_input("Number of results", "10"))
            limit = max(1, min(20, limit))  # Clamp between 1-20
        except ValueError:
-            limit = 5
+            limit = 10
        
        # Show CLI command
        cli_cmd = f"./rag-mini search {self.project_path} \"{query}\""
-        if limit != 5:
+        if limit != 10:
            cli_cmd += f" --limit {limit}"
        
        self.print_cli_command(cli_cmd, "Search for semantic matches")