Compare commits

..

2 Commits

Author SHA1 Message Date
55500a2977 Integrate LLM synthesis across all interfaces and update demo
🔧 Integration Updates
- Added --synthesize flag to main rag-mini CLI
- Updated README with synthesis examples and 10 result default
- Enhanced demo script with 8 complete results (was cutting off at 5)
- Updated rag-tui default from 5 to 10 results
- Updated rag-mini-enhanced script defaults

📈 User Experience Improvements
- All components now consistently default to 10 results
- Demo shows complete 8-result workflow with multi-line previews
- Documentation reflects new AI analysis capabilities
- Seamless integration preserves existing workflows

Users get more comprehensive results by default and can optionally
add intelligent AI analysis with a simple --synthesize flag!
2025-08-12 17:13:21 +10:00
ba28246178 Add LLM synthesis feature with smart model selection and increase default results to 10
🧠 NEW: LLM Synthesis Feature
- Intelligent analysis of RAG search results using Ollama LLMs
- Smart model selection: Qwen3 → Qwen2.5 → Mistral → Llama3.2
- Prioritizes efficient models (1.5B-3B parameters) for best performance
- Structured output: summary, key findings, code patterns, suggested actions
- Confidence scoring for result reliability
- Graceful fallback with setup instructions if Ollama unavailable

📊 Enhanced Search Experience
- Increased default search results from 5 to 10 across all components
- Updated demo script to show all 8 results with richer previews
- Better user experience with more comprehensive result sets

🎯 New CLI Options
- Added --synthesize/-s flag: rag-mini search project "query" --synthesize
- Zero-configuration setup - automatically detects best available model
- Never downloads models - only uses what's already installed

🧪 Tested with qwen3:1.7b
- Confirmed excellent performance with 1.7B parameter model
- Professional-grade analysis including security recommendations
- Fast response times with quality RAG context

Perfect for users who already have Ollama - transforms FSS-Mini-RAG
from search tool into AI-powered code assistant\!
2025-08-12 17:12:51 +10:00
6 changed files with 375 additions and 11 deletions

View File

@ -39,7 +39,8 @@ FSS-Mini-RAG is a distilled, lightweight implementation of a production-quality
./rag-tui # Friendly interface for beginners
# OR
./rag-mini index ~/my-project # Direct CLI for developers
./rag-mini search ~/my-project "authentication logic"
./rag-mini search ~/my-project "authentication logic" # 10 results
./rag-mini search ~/my-project "error handling" --synthesize # AI analysis
```
That's it. No external dependencies, no configuration required, no PhD in computer science needed.

View File

@ -0,0 +1,326 @@
#!/usr/bin/env python3
"""
LLM Synthesizer for RAG Results
Provides intelligent synthesis of search results using Ollama LLMs.
Takes raw search results and generates coherent, contextual summaries.
"""
import json
import logging
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
import requests
from pathlib import Path
logger = logging.getLogger(__name__)
@dataclass
class SynthesisResult:
"""Result of LLM synthesis."""
summary: str
key_points: List[str]
code_examples: List[str]
suggested_actions: List[str]
confidence: float
class LLMSynthesizer:
"""Synthesizes RAG search results using Ollama LLMs."""
def __init__(self, ollama_url: str = "http://localhost:11434", model: str = None):
self.ollama_url = ollama_url.rstrip('/')
self.available_models = self._get_available_models()
self.model = model or self._select_best_model()
def _get_available_models(self) -> List[str]:
"""Get list of available Ollama models."""
try:
response = requests.get(f"{self.ollama_url}/api/tags", timeout=5)
if response.status_code == 200:
data = response.json()
return [model['name'] for model in data.get('models', [])]
except Exception as e:
logger.warning(f"Could not fetch Ollama models: {e}")
return []
def _select_best_model(self) -> str:
"""Select the best available model based on modern performance rankings."""
if not self.available_models:
return "qwen2.5:1.5b" # Fallback preference
# Modern model preference ranking (best to acceptable)
# Prioritize: Qwen3 > Qwen2.5 > Mistral > Llama3.2 > Others
model_rankings = [
# Qwen3 models (newest, most efficient) - prefer standard versions
"qwen3:1.7b", "qwen3:0.6b", "qwen3:4b", "qwen3:8b",
# Qwen2.5 models (excellent performance/size ratio)
"qwen2.5-coder:1.5b", "qwen2.5:1.5b", "qwen2.5:3b", "qwen2.5-coder:3b",
"qwen2.5:7b", "qwen2.5-coder:7b",
# Qwen2 models (older but still good)
"qwen2:1.5b", "qwen2:3b", "qwen2:7b",
# Mistral models (good quality, reasonable size)
"mistral:7b", "mistral-nemo", "mistral-small",
# Llama3.2 models (decent but larger)
"llama3.2:1b", "llama3.2:3b", "llama3.2", "llama3.2:8b",
# Fallback to other Llama models
"llama3.1:8b", "llama3:8b", "llama3",
# Other decent models
"gemma2:2b", "gemma2:9b", "phi3:3.8b", "phi3.5",
]
# Find first available model from our ranked list
for preferred_model in model_rankings:
for available_model in self.available_models:
# Match model names (handle version tags)
available_base = available_model.split(':')[0].lower()
preferred_base = preferred_model.split(':')[0].lower()
if preferred_base in available_base or available_base in preferred_base:
# Additional size filtering - prefer smaller models
if any(size in available_model.lower() for size in ['1b', '1.5b', '2b', '3b']):
logger.info(f"Selected efficient model: {available_model}")
return available_model
elif any(size in available_model.lower() for size in ['7b', '8b']):
# Only use larger models if no smaller ones available
logger.info(f"Selected larger model: {available_model}")
return available_model
elif ':' not in available_model:
# Handle models without explicit size tags
return available_model
# If no preferred models found, use first available
fallback = self.available_models[0]
logger.warning(f"Using fallback model: {fallback}")
return fallback
def is_available(self) -> bool:
"""Check if Ollama is available and has models."""
return len(self.available_models) > 0
def _call_ollama(self, prompt: str, temperature: float = 0.3) -> Optional[str]:
"""Make a call to Ollama API."""
try:
# Use the best available model
model_to_use = self.model
if self.model not in self.available_models:
# Fallback to first available model
if self.available_models:
model_to_use = self.available_models[0]
else:
logger.error("No Ollama models available")
return None
payload = {
"model": model_to_use,
"prompt": prompt,
"stream": False,
"options": {
"temperature": temperature,
"top_p": 0.9,
"top_k": 40
}
}
response = requests.post(
f"{self.ollama_url}/api/generate",
json=payload,
timeout=30
)
if response.status_code == 200:
result = response.json()
return result.get('response', '').strip()
else:
logger.error(f"Ollama API error: {response.status_code}")
return None
except Exception as e:
logger.error(f"Ollama call failed: {e}")
return None
def synthesize_search_results(self, query: str, results: List[Any], project_path: Path) -> SynthesisResult:
"""Synthesize search results into a coherent summary."""
if not self.is_available():
return SynthesisResult(
summary="LLM synthesis unavailable (Ollama not running or no models)",
key_points=[],
code_examples=[],
suggested_actions=["Install and run Ollama with a model"],
confidence=0.0
)
# Prepare context from search results
context_parts = []
for i, result in enumerate(results[:8], 1): # Limit to top 8 results
file_path = result.file_path if hasattr(result, 'file_path') else 'unknown'
content = result.content if hasattr(result, 'content') else str(result)
score = result.score if hasattr(result, 'score') else 0.0
context_parts.append(f"""
Result {i} (Score: {score:.3f}):
File: {file_path}
Content: {content[:500]}{'...' if len(content) > 500 else ''}
""")
context = "\n".join(context_parts)
# Create synthesis prompt
prompt = f"""You are a senior software engineer analyzing code search results. Your task is to synthesize the search results into a helpful, actionable summary.
SEARCH QUERY: "{query}"
PROJECT: {project_path.name}
SEARCH RESULTS:
{context}
Please provide a synthesis in the following JSON format:
{{
"summary": "A 2-3 sentence overview of what the search results show",
"key_points": [
"Important finding 1",
"Important finding 2",
"Important finding 3"
],
"code_examples": [
"Relevant code snippet or pattern from the results",
"Another important code example"
],
"suggested_actions": [
"What the developer should do next",
"Additional recommendations"
],
"confidence": 0.85
}}
Focus on:
- What the code does and how it works
- Patterns and relationships between the results
- Practical next steps for the developer
- Code quality observations
Respond with ONLY the JSON, no other text."""
# Get LLM response
response = self._call_ollama(prompt, temperature=0.2)
if not response:
return SynthesisResult(
summary="LLM synthesis failed (API error)",
key_points=[],
code_examples=[],
suggested_actions=["Check Ollama status and try again"],
confidence=0.0
)
# Parse JSON response
try:
# Extract JSON from response (in case there's extra text)
start_idx = response.find('{')
end_idx = response.rfind('}') + 1
if start_idx >= 0 and end_idx > start_idx:
json_str = response[start_idx:end_idx]
data = json.loads(json_str)
return SynthesisResult(
summary=data.get('summary', 'No summary generated'),
key_points=data.get('key_points', []),
code_examples=data.get('code_examples', []),
suggested_actions=data.get('suggested_actions', []),
confidence=float(data.get('confidence', 0.5))
)
else:
# Fallback: use the raw response as summary
return SynthesisResult(
summary=response[:300] + '...' if len(response) > 300 else response,
key_points=[],
code_examples=[],
suggested_actions=[],
confidence=0.3
)
except Exception as e:
logger.error(f"Failed to parse LLM response: {e}")
return SynthesisResult(
summary="LLM synthesis failed (JSON parsing error)",
key_points=[],
code_examples=[],
suggested_actions=["Try the search again or check LLM output"],
confidence=0.0
)
def format_synthesis_output(self, synthesis: SynthesisResult, query: str) -> str:
"""Format synthesis result for display."""
output = []
output.append("🧠 LLM SYNTHESIS")
output.append("=" * 50)
output.append("")
output.append(f"📝 Summary:")
output.append(f" {synthesis.summary}")
output.append("")
if synthesis.key_points:
output.append("🔍 Key Findings:")
for point in synthesis.key_points:
output.append(f"{point}")
output.append("")
if synthesis.code_examples:
output.append("💡 Code Patterns:")
for example in synthesis.code_examples:
output.append(f" {example}")
output.append("")
if synthesis.suggested_actions:
output.append("🎯 Suggested Actions:")
for action in synthesis.suggested_actions:
output.append(f"{action}")
output.append("")
confidence_emoji = "🟢" if synthesis.confidence > 0.7 else "🟡" if synthesis.confidence > 0.4 else "🔴"
output.append(f"{confidence_emoji} Confidence: {synthesis.confidence:.1%}")
output.append("")
return "\n".join(output)
# Quick test function
def test_synthesizer():
"""Test the synthesizer with sample data."""
from dataclasses import dataclass
@dataclass
class MockResult:
file_path: str
content: str
score: float
synthesizer = LLMSynthesizer()
if not synthesizer.is_available():
print("❌ Ollama not available for testing")
return
# Mock search results
results = [
MockResult("auth.py", "def authenticate_user(username, password):\n return verify_credentials(username, password)", 0.95),
MockResult("models.py", "class User:\n def login(self):\n return authenticate_user(self.username, self.password)", 0.87)
]
synthesis = synthesizer.synthesize_search_results(
"user authentication",
results,
Path("/test/project")
)
print(synthesizer.format_synthesis_output(synthesis, "user authentication"))
if __name__ == "__main__":
test_synthesizer()

View File

@ -156,6 +156,24 @@ class DemoSimulator:
"function": "User.authenticate()",
"preview": "User model authentication method.\nQueries database for user credentials\nand handles account status checks.",
"score": "0.82"
},
{
"file": "auth/tokens.py",
"function": "generate_jwt_token()",
"preview": "Generate JWT authentication tokens.\nIncludes expiration, claims, and signature.\nSupports refresh and access token types.",
"score": "0.79"
},
{
"file": "utils/security.py",
"function": "hash_password()",
"preview": "Secure password hashing utility.\nUses bcrypt with configurable rounds.\nProvides salt generation and validation.",
"score": "0.76"
},
{
"file": "config/auth_settings.py",
"function": "load_auth_config()",
"preview": "Load authentication configuration.\nHandles JWT secrets, token expiration,\nand authentication provider settings.",
"score": "0.73"
}
]

View File

@ -60,7 +60,7 @@ similar_search() {
echo "🔄 Finding similar patterns to: '$query'"
# Use semantic search with pattern-focused terms
pattern_query="similar to $query OR like $query OR pattern $query"
"$PYTHON" "$SCRIPT_DIR/rag-mini.py" search "$project_path" "$pattern_query" --limit 5
"$PYTHON" "$SCRIPT_DIR/rag-mini.py" search "$project_path" "$pattern_query" --limit 10
}
# Smart indexing with optimizations

View File

@ -18,6 +18,7 @@ sys.path.insert(0, str(Path(__file__).parent))
from claude_rag.indexer import ProjectIndexer
from claude_rag.search import CodeSearcher
from claude_rag.ollama_embeddings import OllamaEmbedder
from claude_rag.llm_synthesizer import LLMSynthesizer
# Configure logging for user-friendly output
logging.basicConfig(
@ -71,7 +72,7 @@ def index_project(project_path: Path, force: bool = False):
print(f" Use --verbose for details")
sys.exit(1)
def search_project(project_path: Path, query: str, limit: int = 5):
def search_project(project_path: Path, query: str, limit: int = 10, synthesize: bool = False):
"""Search a project directory."""
try:
# Check if indexed first
@ -123,6 +124,21 @@ def search_project(project_path: Path, query: str, limit: int = 5):
print()
# LLM Synthesis if requested
if synthesize:
print("🧠 Generating LLM synthesis...")
synthesizer = LLMSynthesizer()
if synthesizer.is_available():
synthesis = synthesizer.synthesize_search_results(query, results, project_path)
print()
print(synthesizer.format_synthesis_output(synthesis, query))
else:
print("❌ LLM synthesis unavailable")
print(" • Ensure Ollama is running: ollama serve")
print(" • Install a model: ollama pull llama3.2")
print(" • Check connection to http://localhost:11434")
# Save last search for potential enhancements
try:
(rag_dir / 'last_search').write_text(query)
@ -224,6 +240,7 @@ def main():
Examples:
rag-mini index /path/to/project # Index a project
rag-mini search /path/to/project "query" # Search indexed project
rag-mini search /path/to/project "query" -s # Search with LLM synthesis
rag-mini status /path/to/project # Show status
"""
)
@ -236,10 +253,12 @@ Examples:
help='Search query (for search command)')
parser.add_argument('--force', action='store_true',
help='Force reindex all files')
parser.add_argument('--limit', type=int, default=5,
parser.add_argument('--limit', type=int, default=10,
help='Maximum number of search results')
parser.add_argument('--verbose', '-v', action='store_true',
help='Enable verbose logging')
parser.add_argument('--synthesize', '-s', action='store_true',
help='Generate LLM synthesis of search results (requires Ollama)')
args = parser.parse_args()
@ -263,7 +282,7 @@ Examples:
if not args.query:
print("❌ Search query required")
sys.exit(1)
search_project(args.project_path, args.query, args.limit)
search_project(args.project_path, args.query, args.limit, args.synthesize)
elif args.command == 'status':
status_check(args.project_path)

View File

@ -285,14 +285,14 @@ class SimpleTUI:
# Get result limit
try:
limit = int(self.get_input("Number of results", "5"))
limit = int(self.get_input("Number of results", "10"))
limit = max(1, min(20, limit)) # Clamp between 1-20
except ValueError:
limit = 5
limit = 10
# Show CLI command
cli_cmd = f"./rag-mini search {self.project_path} \"{query}\""
if limit != 5:
if limit != 10:
cli_cmd += f" --limit {limit}"
self.print_cli_command(cli_cmd, "Search for semantic matches")