Compare commits
2 Commits
4166d0a362
...
55500a2977
| Author | SHA1 | Date | |
|---|---|---|---|
| 55500a2977 | |||
| ba28246178 |
@ -39,7 +39,8 @@ FSS-Mini-RAG is a distilled, lightweight implementation of a production-quality
|
||||
./rag-tui # Friendly interface for beginners
|
||||
# OR
|
||||
./rag-mini index ~/my-project # Direct CLI for developers
|
||||
./rag-mini search ~/my-project "authentication logic"
|
||||
./rag-mini search ~/my-project "authentication logic" # 10 results
|
||||
./rag-mini search ~/my-project "error handling" --synthesize # AI analysis
|
||||
```
|
||||
|
||||
That's it. No external dependencies, no configuration required, no PhD in computer science needed.
|
||||
|
||||
326
claude_rag/llm_synthesizer.py
Normal file
326
claude_rag/llm_synthesizer.py
Normal file
@ -0,0 +1,326 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
LLM Synthesizer for RAG Results
|
||||
|
||||
Provides intelligent synthesis of search results using Ollama LLMs.
|
||||
Takes raw search results and generates coherent, contextual summaries.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import List, Dict, Any, Optional
|
||||
from dataclasses import dataclass
|
||||
import requests
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@dataclass
|
||||
class SynthesisResult:
|
||||
"""Result of LLM synthesis."""
|
||||
summary: str
|
||||
key_points: List[str]
|
||||
code_examples: List[str]
|
||||
suggested_actions: List[str]
|
||||
confidence: float
|
||||
|
||||
class LLMSynthesizer:
|
||||
"""Synthesizes RAG search results using Ollama LLMs."""
|
||||
|
||||
def __init__(self, ollama_url: str = "http://localhost:11434", model: str = None):
|
||||
self.ollama_url = ollama_url.rstrip('/')
|
||||
self.available_models = self._get_available_models()
|
||||
self.model = model or self._select_best_model()
|
||||
|
||||
def _get_available_models(self) -> List[str]:
|
||||
"""Get list of available Ollama models."""
|
||||
try:
|
||||
response = requests.get(f"{self.ollama_url}/api/tags", timeout=5)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
return [model['name'] for model in data.get('models', [])]
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not fetch Ollama models: {e}")
|
||||
return []
|
||||
|
||||
def _select_best_model(self) -> str:
|
||||
"""Select the best available model based on modern performance rankings."""
|
||||
if not self.available_models:
|
||||
return "qwen2.5:1.5b" # Fallback preference
|
||||
|
||||
# Modern model preference ranking (best to acceptable)
|
||||
# Prioritize: Qwen3 > Qwen2.5 > Mistral > Llama3.2 > Others
|
||||
model_rankings = [
|
||||
# Qwen3 models (newest, most efficient) - prefer standard versions
|
||||
"qwen3:1.7b", "qwen3:0.6b", "qwen3:4b", "qwen3:8b",
|
||||
|
||||
# Qwen2.5 models (excellent performance/size ratio)
|
||||
"qwen2.5-coder:1.5b", "qwen2.5:1.5b", "qwen2.5:3b", "qwen2.5-coder:3b",
|
||||
"qwen2.5:7b", "qwen2.5-coder:7b",
|
||||
|
||||
# Qwen2 models (older but still good)
|
||||
"qwen2:1.5b", "qwen2:3b", "qwen2:7b",
|
||||
|
||||
# Mistral models (good quality, reasonable size)
|
||||
"mistral:7b", "mistral-nemo", "mistral-small",
|
||||
|
||||
# Llama3.2 models (decent but larger)
|
||||
"llama3.2:1b", "llama3.2:3b", "llama3.2", "llama3.2:8b",
|
||||
|
||||
# Fallback to other Llama models
|
||||
"llama3.1:8b", "llama3:8b", "llama3",
|
||||
|
||||
# Other decent models
|
||||
"gemma2:2b", "gemma2:9b", "phi3:3.8b", "phi3.5",
|
||||
]
|
||||
|
||||
# Find first available model from our ranked list
|
||||
for preferred_model in model_rankings:
|
||||
for available_model in self.available_models:
|
||||
# Match model names (handle version tags)
|
||||
available_base = available_model.split(':')[0].lower()
|
||||
preferred_base = preferred_model.split(':')[0].lower()
|
||||
|
||||
if preferred_base in available_base or available_base in preferred_base:
|
||||
# Additional size filtering - prefer smaller models
|
||||
if any(size in available_model.lower() for size in ['1b', '1.5b', '2b', '3b']):
|
||||
logger.info(f"Selected efficient model: {available_model}")
|
||||
return available_model
|
||||
elif any(size in available_model.lower() for size in ['7b', '8b']):
|
||||
# Only use larger models if no smaller ones available
|
||||
logger.info(f"Selected larger model: {available_model}")
|
||||
return available_model
|
||||
elif ':' not in available_model:
|
||||
# Handle models without explicit size tags
|
||||
return available_model
|
||||
|
||||
# If no preferred models found, use first available
|
||||
fallback = self.available_models[0]
|
||||
logger.warning(f"Using fallback model: {fallback}")
|
||||
return fallback
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Check if Ollama is available and has models."""
|
||||
return len(self.available_models) > 0
|
||||
|
||||
def _call_ollama(self, prompt: str, temperature: float = 0.3) -> Optional[str]:
|
||||
"""Make a call to Ollama API."""
|
||||
try:
|
||||
# Use the best available model
|
||||
model_to_use = self.model
|
||||
if self.model not in self.available_models:
|
||||
# Fallback to first available model
|
||||
if self.available_models:
|
||||
model_to_use = self.available_models[0]
|
||||
else:
|
||||
logger.error("No Ollama models available")
|
||||
return None
|
||||
|
||||
payload = {
|
||||
"model": model_to_use,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"options": {
|
||||
"temperature": temperature,
|
||||
"top_p": 0.9,
|
||||
"top_k": 40
|
||||
}
|
||||
}
|
||||
|
||||
response = requests.post(
|
||||
f"{self.ollama_url}/api/generate",
|
||||
json=payload,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
return result.get('response', '').strip()
|
||||
else:
|
||||
logger.error(f"Ollama API error: {response.status_code}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Ollama call failed: {e}")
|
||||
return None
|
||||
|
||||
def synthesize_search_results(self, query: str, results: List[Any], project_path: Path) -> SynthesisResult:
|
||||
"""Synthesize search results into a coherent summary."""
|
||||
|
||||
if not self.is_available():
|
||||
return SynthesisResult(
|
||||
summary="LLM synthesis unavailable (Ollama not running or no models)",
|
||||
key_points=[],
|
||||
code_examples=[],
|
||||
suggested_actions=["Install and run Ollama with a model"],
|
||||
confidence=0.0
|
||||
)
|
||||
|
||||
# Prepare context from search results
|
||||
context_parts = []
|
||||
for i, result in enumerate(results[:8], 1): # Limit to top 8 results
|
||||
file_path = result.file_path if hasattr(result, 'file_path') else 'unknown'
|
||||
content = result.content if hasattr(result, 'content') else str(result)
|
||||
score = result.score if hasattr(result, 'score') else 0.0
|
||||
|
||||
context_parts.append(f"""
|
||||
Result {i} (Score: {score:.3f}):
|
||||
File: {file_path}
|
||||
Content: {content[:500]}{'...' if len(content) > 500 else ''}
|
||||
""")
|
||||
|
||||
context = "\n".join(context_parts)
|
||||
|
||||
# Create synthesis prompt
|
||||
prompt = f"""You are a senior software engineer analyzing code search results. Your task is to synthesize the search results into a helpful, actionable summary.
|
||||
|
||||
SEARCH QUERY: "{query}"
|
||||
PROJECT: {project_path.name}
|
||||
|
||||
SEARCH RESULTS:
|
||||
{context}
|
||||
|
||||
Please provide a synthesis in the following JSON format:
|
||||
{{
|
||||
"summary": "A 2-3 sentence overview of what the search results show",
|
||||
"key_points": [
|
||||
"Important finding 1",
|
||||
"Important finding 2",
|
||||
"Important finding 3"
|
||||
],
|
||||
"code_examples": [
|
||||
"Relevant code snippet or pattern from the results",
|
||||
"Another important code example"
|
||||
],
|
||||
"suggested_actions": [
|
||||
"What the developer should do next",
|
||||
"Additional recommendations"
|
||||
],
|
||||
"confidence": 0.85
|
||||
}}
|
||||
|
||||
Focus on:
|
||||
- What the code does and how it works
|
||||
- Patterns and relationships between the results
|
||||
- Practical next steps for the developer
|
||||
- Code quality observations
|
||||
|
||||
Respond with ONLY the JSON, no other text."""
|
||||
|
||||
# Get LLM response
|
||||
response = self._call_ollama(prompt, temperature=0.2)
|
||||
|
||||
if not response:
|
||||
return SynthesisResult(
|
||||
summary="LLM synthesis failed (API error)",
|
||||
key_points=[],
|
||||
code_examples=[],
|
||||
suggested_actions=["Check Ollama status and try again"],
|
||||
confidence=0.0
|
||||
)
|
||||
|
||||
# Parse JSON response
|
||||
try:
|
||||
# Extract JSON from response (in case there's extra text)
|
||||
start_idx = response.find('{')
|
||||
end_idx = response.rfind('}') + 1
|
||||
if start_idx >= 0 and end_idx > start_idx:
|
||||
json_str = response[start_idx:end_idx]
|
||||
data = json.loads(json_str)
|
||||
|
||||
return SynthesisResult(
|
||||
summary=data.get('summary', 'No summary generated'),
|
||||
key_points=data.get('key_points', []),
|
||||
code_examples=data.get('code_examples', []),
|
||||
suggested_actions=data.get('suggested_actions', []),
|
||||
confidence=float(data.get('confidence', 0.5))
|
||||
)
|
||||
else:
|
||||
# Fallback: use the raw response as summary
|
||||
return SynthesisResult(
|
||||
summary=response[:300] + '...' if len(response) > 300 else response,
|
||||
key_points=[],
|
||||
code_examples=[],
|
||||
suggested_actions=[],
|
||||
confidence=0.3
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to parse LLM response: {e}")
|
||||
return SynthesisResult(
|
||||
summary="LLM synthesis failed (JSON parsing error)",
|
||||
key_points=[],
|
||||
code_examples=[],
|
||||
suggested_actions=["Try the search again or check LLM output"],
|
||||
confidence=0.0
|
||||
)
|
||||
|
||||
def format_synthesis_output(self, synthesis: SynthesisResult, query: str) -> str:
|
||||
"""Format synthesis result for display."""
|
||||
|
||||
output = []
|
||||
output.append("🧠 LLM SYNTHESIS")
|
||||
output.append("=" * 50)
|
||||
output.append("")
|
||||
|
||||
output.append(f"📝 Summary:")
|
||||
output.append(f" {synthesis.summary}")
|
||||
output.append("")
|
||||
|
||||
if synthesis.key_points:
|
||||
output.append("🔍 Key Findings:")
|
||||
for point in synthesis.key_points:
|
||||
output.append(f" • {point}")
|
||||
output.append("")
|
||||
|
||||
if synthesis.code_examples:
|
||||
output.append("💡 Code Patterns:")
|
||||
for example in synthesis.code_examples:
|
||||
output.append(f" {example}")
|
||||
output.append("")
|
||||
|
||||
if synthesis.suggested_actions:
|
||||
output.append("🎯 Suggested Actions:")
|
||||
for action in synthesis.suggested_actions:
|
||||
output.append(f" • {action}")
|
||||
output.append("")
|
||||
|
||||
confidence_emoji = "🟢" if synthesis.confidence > 0.7 else "🟡" if synthesis.confidence > 0.4 else "🔴"
|
||||
output.append(f"{confidence_emoji} Confidence: {synthesis.confidence:.1%}")
|
||||
output.append("")
|
||||
|
||||
return "\n".join(output)
|
||||
|
||||
# Quick test function
|
||||
def test_synthesizer():
|
||||
"""Test the synthesizer with sample data."""
|
||||
from dataclasses import dataclass
|
||||
|
||||
@dataclass
|
||||
class MockResult:
|
||||
file_path: str
|
||||
content: str
|
||||
score: float
|
||||
|
||||
synthesizer = LLMSynthesizer()
|
||||
|
||||
if not synthesizer.is_available():
|
||||
print("❌ Ollama not available for testing")
|
||||
return
|
||||
|
||||
# Mock search results
|
||||
results = [
|
||||
MockResult("auth.py", "def authenticate_user(username, password):\n return verify_credentials(username, password)", 0.95),
|
||||
MockResult("models.py", "class User:\n def login(self):\n return authenticate_user(self.username, self.password)", 0.87)
|
||||
]
|
||||
|
||||
synthesis = synthesizer.synthesize_search_results(
|
||||
"user authentication",
|
||||
results,
|
||||
Path("/test/project")
|
||||
)
|
||||
|
||||
print(synthesizer.format_synthesis_output(synthesis, "user authentication"))
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_synthesizer()
|
||||
@ -156,6 +156,24 @@ class DemoSimulator:
|
||||
"function": "User.authenticate()",
|
||||
"preview": "User model authentication method.\nQueries database for user credentials\nand handles account status checks.",
|
||||
"score": "0.82"
|
||||
},
|
||||
{
|
||||
"file": "auth/tokens.py",
|
||||
"function": "generate_jwt_token()",
|
||||
"preview": "Generate JWT authentication tokens.\nIncludes expiration, claims, and signature.\nSupports refresh and access token types.",
|
||||
"score": "0.79"
|
||||
},
|
||||
{
|
||||
"file": "utils/security.py",
|
||||
"function": "hash_password()",
|
||||
"preview": "Secure password hashing utility.\nUses bcrypt with configurable rounds.\nProvides salt generation and validation.",
|
||||
"score": "0.76"
|
||||
},
|
||||
{
|
||||
"file": "config/auth_settings.py",
|
||||
"function": "load_auth_config()",
|
||||
"preview": "Load authentication configuration.\nHandles JWT secrets, token expiration,\nand authentication provider settings.",
|
||||
"score": "0.73"
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
@ -60,7 +60,7 @@ similar_search() {
|
||||
echo "🔄 Finding similar patterns to: '$query'"
|
||||
# Use semantic search with pattern-focused terms
|
||||
pattern_query="similar to $query OR like $query OR pattern $query"
|
||||
"$PYTHON" "$SCRIPT_DIR/rag-mini.py" search "$project_path" "$pattern_query" --limit 5
|
||||
"$PYTHON" "$SCRIPT_DIR/rag-mini.py" search "$project_path" "$pattern_query" --limit 10
|
||||
}
|
||||
|
||||
# Smart indexing with optimizations
|
||||
|
||||
25
rag-mini.py
25
rag-mini.py
@ -18,6 +18,7 @@ sys.path.insert(0, str(Path(__file__).parent))
|
||||
from claude_rag.indexer import ProjectIndexer
|
||||
from claude_rag.search import CodeSearcher
|
||||
from claude_rag.ollama_embeddings import OllamaEmbedder
|
||||
from claude_rag.llm_synthesizer import LLMSynthesizer
|
||||
|
||||
# Configure logging for user-friendly output
|
||||
logging.basicConfig(
|
||||
@ -71,7 +72,7 @@ def index_project(project_path: Path, force: bool = False):
|
||||
print(f" Use --verbose for details")
|
||||
sys.exit(1)
|
||||
|
||||
def search_project(project_path: Path, query: str, limit: int = 5):
|
||||
def search_project(project_path: Path, query: str, limit: int = 10, synthesize: bool = False):
|
||||
"""Search a project directory."""
|
||||
try:
|
||||
# Check if indexed first
|
||||
@ -123,6 +124,21 @@ def search_project(project_path: Path, query: str, limit: int = 5):
|
||||
|
||||
print()
|
||||
|
||||
# LLM Synthesis if requested
|
||||
if synthesize:
|
||||
print("🧠 Generating LLM synthesis...")
|
||||
synthesizer = LLMSynthesizer()
|
||||
|
||||
if synthesizer.is_available():
|
||||
synthesis = synthesizer.synthesize_search_results(query, results, project_path)
|
||||
print()
|
||||
print(synthesizer.format_synthesis_output(synthesis, query))
|
||||
else:
|
||||
print("❌ LLM synthesis unavailable")
|
||||
print(" • Ensure Ollama is running: ollama serve")
|
||||
print(" • Install a model: ollama pull llama3.2")
|
||||
print(" • Check connection to http://localhost:11434")
|
||||
|
||||
# Save last search for potential enhancements
|
||||
try:
|
||||
(rag_dir / 'last_search').write_text(query)
|
||||
@ -224,6 +240,7 @@ def main():
|
||||
Examples:
|
||||
rag-mini index /path/to/project # Index a project
|
||||
rag-mini search /path/to/project "query" # Search indexed project
|
||||
rag-mini search /path/to/project "query" -s # Search with LLM synthesis
|
||||
rag-mini status /path/to/project # Show status
|
||||
"""
|
||||
)
|
||||
@ -236,10 +253,12 @@ Examples:
|
||||
help='Search query (for search command)')
|
||||
parser.add_argument('--force', action='store_true',
|
||||
help='Force reindex all files')
|
||||
parser.add_argument('--limit', type=int, default=5,
|
||||
parser.add_argument('--limit', type=int, default=10,
|
||||
help='Maximum number of search results')
|
||||
parser.add_argument('--verbose', '-v', action='store_true',
|
||||
help='Enable verbose logging')
|
||||
parser.add_argument('--synthesize', '-s', action='store_true',
|
||||
help='Generate LLM synthesis of search results (requires Ollama)')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@ -263,7 +282,7 @@ Examples:
|
||||
if not args.query:
|
||||
print("❌ Search query required")
|
||||
sys.exit(1)
|
||||
search_project(args.project_path, args.query, args.limit)
|
||||
search_project(args.project_path, args.query, args.limit, args.synthesize)
|
||||
elif args.command == 'status':
|
||||
status_check(args.project_path)
|
||||
|
||||
|
||||
@ -285,14 +285,14 @@ class SimpleTUI:
|
||||
|
||||
# Get result limit
|
||||
try:
|
||||
limit = int(self.get_input("Number of results", "5"))
|
||||
limit = int(self.get_input("Number of results", "10"))
|
||||
limit = max(1, min(20, limit)) # Clamp between 1-20
|
||||
except ValueError:
|
||||
limit = 5
|
||||
limit = 10
|
||||
|
||||
# Show CLI command
|
||||
cli_cmd = f"./rag-mini search {self.project_path} \"{query}\""
|
||||
if limit != 5:
|
||||
if limit != 10:
|
||||
cli_cmd += f" --limit {limit}"
|
||||
|
||||
self.print_cli_command(cli_cmd, "Search for semantic matches")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user