🛡️ Add comprehensive LLM safeguards and dual-mode demo scripts
🛡️ SMART MODEL SAFEGUARDS: - Implement runaway prevention with pattern detection (repetition, thinking loops, rambling) - Add context length management with optimal parameters per model size - Quality validation prevents problematic responses before reaching users - Helpful explanations when issues occur with recovery suggestions - Model-specific parameter optimization (qwen3:0.6b vs 1.7b vs 3b+) - Timeout protection and graceful degradation ⚡ OPTIMAL PERFORMANCE SETTINGS: - Context window: 32k tokens for good balance - Repeat penalty: 1.15 for 0.6b, 1.1 for 1.7b, 1.05 for larger models - Presence penalty: 1.5 for quantized models to prevent repetition - Smart output limits: 1500 tokens for 0.6b, 2000+ for larger models - Top-p/top-k tuning based on research best practices 🎬 DUAL-MODE DEMO SCRIPTS: - create_synthesis_demo.py: Shows fast search with AI synthesis workflow - create_exploration_demo.py: Interactive thinking mode with conversation memory - Realistic typing simulation and response timing for quality GIFs - Clear demonstration of when to use each mode Perfect for creating compelling demo videos showing both RAG experiences!
This commit is contained in:
parent
3363171820
commit
5f42751e9a
320
claude_rag/llm_safeguards.py
Normal file
320
claude_rag/llm_safeguards.py
Normal file
@ -0,0 +1,320 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
LLM Safeguards for Small Model Management
|
||||||
|
|
||||||
|
Provides runaway prevention, context management, and intelligent detection
|
||||||
|
of problematic model behaviors to ensure reliable user experience.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
import logging
|
||||||
|
from typing import Optional, Dict, List, Tuple
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SafeguardConfig:
|
||||||
|
"""Configuration for LLM safeguards."""
|
||||||
|
max_output_tokens: int = 2000 # Prevent excessive generation
|
||||||
|
max_repetition_ratio: float = 0.3 # Max ratio of repeated content
|
||||||
|
max_response_time: int = 60 # Max seconds for response
|
||||||
|
min_useful_length: int = 20 # Minimum useful response length
|
||||||
|
context_window: int = 32768 # Ollama context window
|
||||||
|
enable_thinking_detection: bool = True # Detect thinking patterns
|
||||||
|
|
||||||
|
class ModelRunawayDetector:
|
||||||
|
"""Detects and prevents model runaway behaviors."""
|
||||||
|
|
||||||
|
def __init__(self, config: SafeguardConfig = None):
|
||||||
|
self.config = config or SafeguardConfig()
|
||||||
|
self.response_patterns = self._compile_patterns()
|
||||||
|
|
||||||
|
def _compile_patterns(self) -> Dict[str, re.Pattern]:
|
||||||
|
"""Compile regex patterns for runaway detection."""
|
||||||
|
return {
|
||||||
|
# Excessive repetition patterns
|
||||||
|
'word_repetition': re.compile(r'\b(\w+)\b(?:\s+\1\b){3,}', re.IGNORECASE),
|
||||||
|
'phrase_repetition': re.compile(r'(.{10,50}?)\1{2,}', re.DOTALL),
|
||||||
|
|
||||||
|
# Thinking loop patterns (small models get stuck)
|
||||||
|
'thinking_loop': re.compile(r'(let me think|i think|thinking|consider|actually|wait|hmm|well)\s*[.,:]*\s*\1', re.IGNORECASE),
|
||||||
|
|
||||||
|
# Rambling patterns
|
||||||
|
'excessive_filler': re.compile(r'\b(um|uh|well|you know|like|basically|actually|so|then|and|but|however)\b(?:\s+[^.!?]*){5,}', re.IGNORECASE),
|
||||||
|
|
||||||
|
# JSON corruption patterns
|
||||||
|
'broken_json': re.compile(r'\{[^}]*\{[^}]*\{'), # Nested broken JSON
|
||||||
|
'json_repetition': re.compile(r'("[\w_]+"\s*:\s*"[^"]*",?\s*){4,}'), # Repeated JSON fields
|
||||||
|
}
|
||||||
|
|
||||||
|
def check_response_quality(self, response: str, query: str, start_time: float) -> Tuple[bool, Optional[str], Optional[str]]:
|
||||||
|
"""
|
||||||
|
Check response quality and detect runaway behaviors.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(is_valid, issue_type, user_explanation)
|
||||||
|
"""
|
||||||
|
if not response or len(response.strip()) < self.config.min_useful_length:
|
||||||
|
return False, "too_short", self._explain_too_short()
|
||||||
|
|
||||||
|
# Check response time
|
||||||
|
elapsed = time.time() - start_time
|
||||||
|
if elapsed > self.config.max_response_time:
|
||||||
|
return False, "timeout", self._explain_timeout()
|
||||||
|
|
||||||
|
# Check for repetition issues
|
||||||
|
repetition_issue = self._check_repetition(response)
|
||||||
|
if repetition_issue:
|
||||||
|
return False, repetition_issue, self._explain_repetition(repetition_issue)
|
||||||
|
|
||||||
|
# Check for thinking loops
|
||||||
|
if self.config.enable_thinking_detection:
|
||||||
|
thinking_issue = self._check_thinking_loops(response)
|
||||||
|
if thinking_issue:
|
||||||
|
return False, thinking_issue, self._explain_thinking_loop()
|
||||||
|
|
||||||
|
# Check for rambling
|
||||||
|
rambling_issue = self._check_rambling(response)
|
||||||
|
if rambling_issue:
|
||||||
|
return False, rambling_issue, self._explain_rambling()
|
||||||
|
|
||||||
|
# Check JSON corruption (for structured responses)
|
||||||
|
if '{' in response and '}' in response:
|
||||||
|
json_issue = self._check_json_corruption(response)
|
||||||
|
if json_issue:
|
||||||
|
return False, json_issue, self._explain_json_corruption()
|
||||||
|
|
||||||
|
return True, None, None
|
||||||
|
|
||||||
|
def _check_repetition(self, response: str) -> Optional[str]:
|
||||||
|
"""Check for excessive repetition."""
|
||||||
|
# Word repetition
|
||||||
|
if self.response_patterns['word_repetition'].search(response):
|
||||||
|
return "word_repetition"
|
||||||
|
|
||||||
|
# Phrase repetition
|
||||||
|
if self.response_patterns['phrase_repetition'].search(response):
|
||||||
|
return "phrase_repetition"
|
||||||
|
|
||||||
|
# Calculate repetition ratio
|
||||||
|
words = response.split()
|
||||||
|
if len(words) > 10:
|
||||||
|
unique_words = set(words)
|
||||||
|
repetition_ratio = 1 - (len(unique_words) / len(words))
|
||||||
|
if repetition_ratio > self.config.max_repetition_ratio:
|
||||||
|
return "high_repetition_ratio"
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _check_thinking_loops(self, response: str) -> Optional[str]:
|
||||||
|
"""Check for thinking loops (common in small models)."""
|
||||||
|
if self.response_patterns['thinking_loop'].search(response):
|
||||||
|
return "thinking_loop"
|
||||||
|
|
||||||
|
# Check for excessive meta-commentary
|
||||||
|
thinking_words = ['think', 'considering', 'actually', 'wait', 'hmm', 'let me']
|
||||||
|
thinking_count = sum(response.lower().count(word) for word in thinking_words)
|
||||||
|
|
||||||
|
if thinking_count > 5 and len(response.split()) < 200:
|
||||||
|
return "excessive_thinking"
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _check_rambling(self, response: str) -> Optional[str]:
|
||||||
|
"""Check for rambling or excessive filler."""
|
||||||
|
if self.response_patterns['excessive_filler'].search(response):
|
||||||
|
return "excessive_filler"
|
||||||
|
|
||||||
|
# Check for extremely long sentences (sign of rambling)
|
||||||
|
sentences = re.split(r'[.!?]+', response)
|
||||||
|
long_sentences = [s for s in sentences if len(s.split()) > 50]
|
||||||
|
|
||||||
|
if len(long_sentences) > 2:
|
||||||
|
return "excessive_rambling"
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _check_json_corruption(self, response: str) -> Optional[str]:
|
||||||
|
"""Check for JSON corruption in structured responses."""
|
||||||
|
if self.response_patterns['broken_json'].search(response):
|
||||||
|
return "broken_json"
|
||||||
|
|
||||||
|
if self.response_patterns['json_repetition'].search(response):
|
||||||
|
return "json_repetition"
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _explain_too_short(self) -> str:
|
||||||
|
return """🤔 The AI response was too short to be helpful.
|
||||||
|
|
||||||
|
**Why this happens:**
|
||||||
|
• The model might be confused by the query
|
||||||
|
• Context might be insufficient
|
||||||
|
• Model might be overloaded
|
||||||
|
|
||||||
|
**What to try:**
|
||||||
|
• Rephrase your question more specifically
|
||||||
|
• Try a broader search term first
|
||||||
|
• Use exploration mode for complex questions: `rag-mini explore`"""
|
||||||
|
|
||||||
|
def _explain_timeout(self) -> str:
|
||||||
|
return """⏱️ The AI took too long to respond (over 60 seconds).
|
||||||
|
|
||||||
|
**Why this happens:**
|
||||||
|
• Small models sometimes get "stuck" thinking
|
||||||
|
• Complex queries can overwhelm smaller models
|
||||||
|
• System might be under load
|
||||||
|
|
||||||
|
**What to try:**
|
||||||
|
• Try a simpler, more direct question
|
||||||
|
• Use synthesis mode for faster responses: `--synthesize`
|
||||||
|
• Consider using a larger model if available"""
|
||||||
|
|
||||||
|
def _explain_repetition(self, issue_type: str) -> str:
|
||||||
|
return f"""🔄 The AI got stuck in repetition loops ({issue_type}).
|
||||||
|
|
||||||
|
**Why this happens:**
|
||||||
|
• Small models sometimes repeat when uncertain
|
||||||
|
• Query might be too complex for the model size
|
||||||
|
• Context window might be exceeded
|
||||||
|
|
||||||
|
**What to try:**
|
||||||
|
• Try a more specific question
|
||||||
|
• Break complex questions into smaller parts
|
||||||
|
• Use exploration mode which handles context better: `rag-mini explore`
|
||||||
|
• Consider: A larger model (qwen3:1.7b or qwen3:3b) would help"""
|
||||||
|
|
||||||
|
def _explain_thinking_loop(self) -> str:
|
||||||
|
return """🧠 The AI got caught in a "thinking loop" - overthinking the response.
|
||||||
|
|
||||||
|
**Why this happens:**
|
||||||
|
• Small models sometimes over-analyze simple questions
|
||||||
|
• Thinking mode can cause loops in smaller models
|
||||||
|
• Query complexity exceeds model capabilities
|
||||||
|
|
||||||
|
**What to try:**
|
||||||
|
• Ask more direct, specific questions
|
||||||
|
• Use synthesis mode (no thinking) for faster results
|
||||||
|
• Try: "What does this code do?" instead of "Explain how this works"
|
||||||
|
• Larger models (qwen3:1.7b+) handle thinking better"""
|
||||||
|
|
||||||
|
def _explain_rambling(self) -> str:
|
||||||
|
return """💭 The AI started rambling instead of giving focused answers.
|
||||||
|
|
||||||
|
**Why this happens:**
|
||||||
|
• Small models sometimes lose focus on complex topics
|
||||||
|
• Query might be too broad or vague
|
||||||
|
• Model trying to cover too much at once
|
||||||
|
|
||||||
|
**What to try:**
|
||||||
|
• Ask more specific questions
|
||||||
|
• Break broad questions into focused parts
|
||||||
|
• Example: "How is data validated?" instead of "Explain the whole system"
|
||||||
|
• Exploration mode helps maintain focus across questions"""
|
||||||
|
|
||||||
|
def _explain_json_corruption(self) -> str:
|
||||||
|
return """🔧 The AI response format got corrupted.
|
||||||
|
|
||||||
|
**Why this happens:**
|
||||||
|
• Small models sometimes struggle with structured output
|
||||||
|
• Context limits can cause format errors
|
||||||
|
• Complex analysis might overwhelm formatting
|
||||||
|
|
||||||
|
**What to try:**
|
||||||
|
• Try the question again (often resolves itself)
|
||||||
|
• Use simpler questions for better formatting
|
||||||
|
• Synthesis mode sometimes gives cleaner output
|
||||||
|
• This is less common with larger models"""
|
||||||
|
|
||||||
|
def get_recovery_suggestions(self, issue_type: str, query: str) -> List[str]:
|
||||||
|
"""Get specific recovery suggestions based on the issue."""
|
||||||
|
suggestions = []
|
||||||
|
|
||||||
|
if issue_type in ['thinking_loop', 'excessive_thinking']:
|
||||||
|
suggestions.extend([
|
||||||
|
f"Try synthesis mode: `rag-mini search . \"{query}\" --synthesize`",
|
||||||
|
"Ask more direct questions without 'why' or 'how'",
|
||||||
|
"Break complex questions into smaller parts"
|
||||||
|
])
|
||||||
|
|
||||||
|
elif issue_type in ['word_repetition', 'phrase_repetition', 'high_repetition_ratio']:
|
||||||
|
suggestions.extend([
|
||||||
|
"Try rephrasing your question completely",
|
||||||
|
"Use more specific technical terms",
|
||||||
|
f"Try exploration mode: `rag-mini explore .`"
|
||||||
|
])
|
||||||
|
|
||||||
|
elif issue_type == 'timeout':
|
||||||
|
suggestions.extend([
|
||||||
|
"Try a simpler version of your question",
|
||||||
|
"Use synthesis mode for faster responses",
|
||||||
|
"Check if Ollama is under heavy load"
|
||||||
|
])
|
||||||
|
|
||||||
|
# Universal suggestions
|
||||||
|
suggestions.extend([
|
||||||
|
"Consider using a larger model if available (qwen3:1.7b or qwen3:3b)",
|
||||||
|
"Check model status: `ollama list`"
|
||||||
|
])
|
||||||
|
|
||||||
|
return suggestions
|
||||||
|
|
||||||
|
def get_optimal_ollama_parameters(model_name: str) -> Dict[str, any]:
|
||||||
|
"""Get optimal parameters for different Ollama models."""
|
||||||
|
|
||||||
|
base_params = {
|
||||||
|
"num_ctx": 32768, # Good context window for most uses
|
||||||
|
"num_predict": 2000, # Reasonable response length
|
||||||
|
"temperature": 0.3, # Balanced creativity/consistency
|
||||||
|
}
|
||||||
|
|
||||||
|
# Model-specific optimizations
|
||||||
|
if "qwen3:0.6b" in model_name.lower():
|
||||||
|
return {
|
||||||
|
**base_params,
|
||||||
|
"repeat_penalty": 1.15, # Prevent repetition in small model
|
||||||
|
"presence_penalty": 1.5, # Suppress repetitive outputs
|
||||||
|
"top_p": 0.8, # Focused sampling
|
||||||
|
"top_k": 20, # Limit choices
|
||||||
|
"num_predict": 1500, # Shorter responses for reliability
|
||||||
|
}
|
||||||
|
|
||||||
|
elif "qwen3:1.7b" in model_name.lower():
|
||||||
|
return {
|
||||||
|
**base_params,
|
||||||
|
"repeat_penalty": 1.1, # Less aggressive for larger model
|
||||||
|
"presence_penalty": 1.0, # Balanced
|
||||||
|
"top_p": 0.9, # More creative
|
||||||
|
"top_k": 40, # More choices
|
||||||
|
}
|
||||||
|
|
||||||
|
elif any(size in model_name.lower() for size in ["3b", "7b", "8b"]):
|
||||||
|
return {
|
||||||
|
**base_params,
|
||||||
|
"repeat_penalty": 1.05, # Minimal for larger models
|
||||||
|
"presence_penalty": 0.5, # Light touch
|
||||||
|
"top_p": 0.95, # High creativity
|
||||||
|
"top_k": 50, # Many choices
|
||||||
|
"num_predict": 3000, # Longer responses OK
|
||||||
|
}
|
||||||
|
|
||||||
|
return base_params
|
||||||
|
|
||||||
|
# Quick test
|
||||||
|
def test_safeguards():
|
||||||
|
"""Test the safeguard system."""
|
||||||
|
detector = ModelRunawayDetector()
|
||||||
|
|
||||||
|
# Test repetition detection
|
||||||
|
bad_response = "The user authentication system works by checking user credentials. The user authentication system works by checking user credentials. The user authentication system works by checking user credentials."
|
||||||
|
|
||||||
|
is_valid, issue, explanation = detector.check_response_quality(bad_response, "auth", time.time())
|
||||||
|
|
||||||
|
print(f"Repetition test: Valid={is_valid}, Issue={issue}")
|
||||||
|
if explanation:
|
||||||
|
print(explanation)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_safeguards()
|
||||||
@ -8,11 +8,20 @@ Takes raw search results and generates coherent, contextual summaries.
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import time
|
||||||
from typing import List, Dict, Any, Optional
|
from typing import List, Dict, Any, Optional
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
import requests
|
import requests
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
try:
|
||||||
|
from .llm_safeguards import ModelRunawayDetector, SafeguardConfig, get_optimal_ollama_parameters
|
||||||
|
except ImportError:
|
||||||
|
# Graceful fallback if safeguards not available
|
||||||
|
ModelRunawayDetector = None
|
||||||
|
SafeguardConfig = None
|
||||||
|
get_optimal_ollama_parameters = lambda x: {}
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@ -34,6 +43,12 @@ class LLMSynthesizer:
|
|||||||
self.enable_thinking = enable_thinking # Default False for synthesis mode
|
self.enable_thinking = enable_thinking # Default False for synthesis mode
|
||||||
self._initialized = False
|
self._initialized = False
|
||||||
|
|
||||||
|
# Initialize safeguards
|
||||||
|
if ModelRunawayDetector:
|
||||||
|
self.safeguard_detector = ModelRunawayDetector(SafeguardConfig())
|
||||||
|
else:
|
||||||
|
self.safeguard_detector = None
|
||||||
|
|
||||||
def _get_available_models(self) -> List[str]:
|
def _get_available_models(self) -> List[str]:
|
||||||
"""Get list of available Ollama models."""
|
"""Get list of available Ollama models."""
|
||||||
try:
|
try:
|
||||||
@ -129,7 +144,9 @@ class LLMSynthesizer:
|
|||||||
return len(self.available_models) > 0
|
return len(self.available_models) > 0
|
||||||
|
|
||||||
def _call_ollama(self, prompt: str, temperature: float = 0.3, disable_thinking: bool = False) -> Optional[str]:
|
def _call_ollama(self, prompt: str, temperature: float = 0.3, disable_thinking: bool = False) -> Optional[str]:
|
||||||
"""Make a call to Ollama API."""
|
"""Make a call to Ollama API with safeguards."""
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Use the best available model
|
# Use the best available model
|
||||||
model_to_use = self.model
|
model_to_use = self.model
|
||||||
@ -147,26 +164,46 @@ class LLMSynthesizer:
|
|||||||
if not final_prompt.endswith(" <no_think>"):
|
if not final_prompt.endswith(" <no_think>"):
|
||||||
final_prompt += " <no_think>"
|
final_prompt += " <no_think>"
|
||||||
|
|
||||||
|
# Get optimal parameters for this model
|
||||||
|
optimal_params = get_optimal_ollama_parameters(model_to_use)
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": model_to_use,
|
"model": model_to_use,
|
||||||
"prompt": final_prompt,
|
"prompt": final_prompt,
|
||||||
"stream": False,
|
"stream": False,
|
||||||
"options": {
|
"options": {
|
||||||
"temperature": temperature,
|
"temperature": temperature,
|
||||||
"top_p": 0.9,
|
"top_p": optimal_params.get("top_p", 0.9),
|
||||||
"top_k": 40
|
"top_k": optimal_params.get("top_k", 40),
|
||||||
|
"num_ctx": optimal_params.get("num_ctx", 32768),
|
||||||
|
"num_predict": optimal_params.get("num_predict", 2000),
|
||||||
|
"repeat_penalty": optimal_params.get("repeat_penalty", 1.1),
|
||||||
|
"presence_penalty": optimal_params.get("presence_penalty", 1.0)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
f"{self.ollama_url}/api/generate",
|
f"{self.ollama_url}/api/generate",
|
||||||
json=payload,
|
json=payload,
|
||||||
timeout=30
|
timeout=65 # Slightly longer than safeguard timeout
|
||||||
)
|
)
|
||||||
|
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
result = response.json()
|
result = response.json()
|
||||||
return result.get('response', '').strip()
|
raw_response = result.get('response', '').strip()
|
||||||
|
|
||||||
|
# Apply safeguards to check response quality
|
||||||
|
if self.safeguard_detector and raw_response:
|
||||||
|
is_valid, issue_type, explanation = self.safeguard_detector.check_response_quality(
|
||||||
|
raw_response, prompt[:100], start_time # First 100 chars of prompt for context
|
||||||
|
)
|
||||||
|
|
||||||
|
if not is_valid:
|
||||||
|
logger.warning(f"Safeguard triggered: {issue_type}")
|
||||||
|
# Return a safe explanation instead of the problematic response
|
||||||
|
return self._create_safeguard_response(issue_type, explanation, prompt)
|
||||||
|
|
||||||
|
return raw_response
|
||||||
else:
|
else:
|
||||||
logger.error(f"Ollama API error: {response.status_code}")
|
logger.error(f"Ollama API error: {response.status_code}")
|
||||||
return None
|
return None
|
||||||
@ -175,6 +212,24 @@ class LLMSynthesizer:
|
|||||||
logger.error(f"Ollama call failed: {e}")
|
logger.error(f"Ollama call failed: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def _create_safeguard_response(self, issue_type: str, explanation: str, original_prompt: str) -> str:
|
||||||
|
"""Create a helpful response when safeguards are triggered."""
|
||||||
|
return f"""⚠️ Model Response Issue Detected
|
||||||
|
|
||||||
|
{explanation}
|
||||||
|
|
||||||
|
**Original query context:** {original_prompt[:200]}{'...' if len(original_prompt) > 200 else ''}
|
||||||
|
|
||||||
|
**What happened:** The AI model encountered a common issue with small language models and was prevented from giving a problematic response.
|
||||||
|
|
||||||
|
**Your options:**
|
||||||
|
1. **Try again**: Ask the same question (often resolves itself)
|
||||||
|
2. **Rephrase**: Make your question more specific or break it into parts
|
||||||
|
3. **Use exploration mode**: `rag-mini explore` for complex questions
|
||||||
|
4. **Different approach**: Try synthesis mode: `--synthesize` for simpler responses
|
||||||
|
|
||||||
|
This is normal with smaller AI models and helps ensure you get quality responses."""
|
||||||
|
|
||||||
def synthesize_search_results(self, query: str, results: List[Any], project_path: Path) -> SynthesisResult:
|
def synthesize_search_results(self, query: str, results: List[Any], project_path: Path) -> SynthesisResult:
|
||||||
"""Synthesize search results into a coherent summary."""
|
"""Synthesize search results into a coherent summary."""
|
||||||
|
|
||||||
|
|||||||
270
create_exploration_demo.py
Normal file
270
create_exploration_demo.py
Normal file
@ -0,0 +1,270 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Create demo GIF for Exploration Mode - Deep Thinking & Interactive Learning
|
||||||
|
Shows the conversational workflow for understanding and debugging codebases.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import time
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
class ExplorationDemoSimulator:
|
||||||
|
def __init__(self):
|
||||||
|
self.width = 100
|
||||||
|
self.height = 35
|
||||||
|
|
||||||
|
def clear_screen(self):
|
||||||
|
print("\033[H\033[2J", end="")
|
||||||
|
|
||||||
|
def type_command(self, command: str, delay: float = 0.05):
|
||||||
|
"""Simulate typing a command."""
|
||||||
|
print("$ ", end="", flush=True)
|
||||||
|
for char in command:
|
||||||
|
print(char, end="", flush=True)
|
||||||
|
time.sleep(delay)
|
||||||
|
print()
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
def type_question(self, question: str, delay: float = 0.04):
|
||||||
|
"""Simulate typing a question in exploration mode."""
|
||||||
|
print("> ", end="", flush=True)
|
||||||
|
for char in question:
|
||||||
|
print(char, end="", flush=True)
|
||||||
|
time.sleep(delay)
|
||||||
|
print()
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
def show_thinking(self, duration: float = 2.0):
|
||||||
|
"""Show thinking animation."""
|
||||||
|
print("🔍 Analyzing...", end="", flush=True)
|
||||||
|
for _ in range(3):
|
||||||
|
time.sleep(duration / 3)
|
||||||
|
print(".", end="", flush=True)
|
||||||
|
print()
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
def show_response(self, lines: list, delay: float = 0.4):
|
||||||
|
"""Show AI response with realistic timing."""
|
||||||
|
for line in lines:
|
||||||
|
print(line)
|
||||||
|
time.sleep(delay)
|
||||||
|
time.sleep(1.5)
|
||||||
|
|
||||||
|
def run_exploration_demo(self):
|
||||||
|
"""Run the exploration mode demonstration."""
|
||||||
|
self.clear_screen()
|
||||||
|
|
||||||
|
# Title
|
||||||
|
print("🧠 FSS-Mini-RAG: Exploration Mode Demo")
|
||||||
|
print("=" * 55)
|
||||||
|
print("Deep thinking & interactive learning for complex codebases")
|
||||||
|
print()
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
# Step 1: Start exploration
|
||||||
|
print("Step 1: Start interactive exploration session")
|
||||||
|
print("-" * 45)
|
||||||
|
self.type_command("rag-mini explore ./webapp-project")
|
||||||
|
|
||||||
|
self.show_response([
|
||||||
|
"🧠 EXPLORATION MODE STARTED",
|
||||||
|
"=" * 50,
|
||||||
|
"Project: webapp-project",
|
||||||
|
"Session: explore_1702845632",
|
||||||
|
"",
|
||||||
|
"🎯 This mode uses thinking and remembers context.",
|
||||||
|
" Perfect for debugging, learning, and deep exploration.",
|
||||||
|
"",
|
||||||
|
"💡 Tips:",
|
||||||
|
" • Ask follow-up questions - I'll remember our conversation",
|
||||||
|
" • Use 'why', 'how', 'explain' for detailed reasoning",
|
||||||
|
" • Type 'quit' or 'exit' to end session",
|
||||||
|
"",
|
||||||
|
"=" * 50,
|
||||||
|
"",
|
||||||
|
"🤔 Ask your first question about the codebase:"
|
||||||
|
])
|
||||||
|
|
||||||
|
# Step 2: First question - How authentication works
|
||||||
|
print("Step 2: Understanding authentication system")
|
||||||
|
print("-" * 42)
|
||||||
|
self.type_question("How does authentication work in this codebase?")
|
||||||
|
|
||||||
|
self.show_thinking()
|
||||||
|
|
||||||
|
self.show_response([
|
||||||
|
"",
|
||||||
|
"🧠 EXPLORATION ANALYSIS (Question #1)",
|
||||||
|
"Session: 0.2m | Results: 8 | Time: 3.4s",
|
||||||
|
"=" * 60,
|
||||||
|
"",
|
||||||
|
"📝 Analysis:",
|
||||||
|
" This webapp implements a JWT-based authentication system with",
|
||||||
|
" session management, role-based permissions, and OAuth2 integration.",
|
||||||
|
"",
|
||||||
|
"🔍 Key Insights:",
|
||||||
|
" • JWT tokens generated in auth/jwt_handler.py with 24h expiry",
|
||||||
|
" • User sessions stored in Redis for fast lookups and invalidation",
|
||||||
|
" • Role-based access control through middleware/permissions.py",
|
||||||
|
" • OAuth2 integration supports Google, GitHub, and Microsoft providers",
|
||||||
|
" • Password hashing uses bcrypt with 12 rounds for security",
|
||||||
|
"",
|
||||||
|
"💡 Code Examples:",
|
||||||
|
" JWT generation: jwt.encode(payload, SECRET_KEY, algorithm='HS256')",
|
||||||
|
" Session storage: redis.setex(f'session:{user_id}', 86400, token)",
|
||||||
|
" Permission check: @require_permission('admin') decorator",
|
||||||
|
"",
|
||||||
|
"🎯 Next Steps:",
|
||||||
|
" • Review JWT secret rotation policy",
|
||||||
|
" • Consider implementing refresh tokens",
|
||||||
|
" • Audit OAuth2 scope permissions",
|
||||||
|
"",
|
||||||
|
"🟢 Confidence: 91%"
|
||||||
|
])
|
||||||
|
|
||||||
|
# Step 3: Follow-up question - Security concerns
|
||||||
|
print("Step 3: Follow-up question (context remembered)")
|
||||||
|
print("-" * 48)
|
||||||
|
self.type_question("What security concerns should I be aware of?")
|
||||||
|
|
||||||
|
self.show_thinking(1.8)
|
||||||
|
|
||||||
|
self.show_response([
|
||||||
|
"",
|
||||||
|
"🧠 EXPLORATION ANALYSIS (Question #2)",
|
||||||
|
"Session: 1.4m | Results: 6 | Time: 2.1s",
|
||||||
|
"=" * 60,
|
||||||
|
"",
|
||||||
|
"📝 Analysis:",
|
||||||
|
" Building on the JWT authentication system we just discussed,",
|
||||||
|
" I've identified several security considerations that need attention.",
|
||||||
|
"",
|
||||||
|
"🔍 Key Insights:",
|
||||||
|
" • JWT secret is hardcoded in config.py (CRITICAL RISK)",
|
||||||
|
" • No rate limiting on login endpoints (brute force vulnerable)",
|
||||||
|
" • Session invalidation only happens on logout, not on password change",
|
||||||
|
" • OAuth2 state parameter validation could be strengthened",
|
||||||
|
" • CORS settings allow all origins in development config",
|
||||||
|
"",
|
||||||
|
"💡 Code Examples:",
|
||||||
|
" Vulnerable: SECRET_KEY = 'hardcoded-secret-123'",
|
||||||
|
" Missing: @rate_limit('5 per minute') on /api/login",
|
||||||
|
" Risk: CORS_ALLOW_ALL_ORIGINS = True",
|
||||||
|
"",
|
||||||
|
"🎯 Next Steps:",
|
||||||
|
" • Move JWT secret to environment variables immediately",
|
||||||
|
" • Implement rate limiting with Redis or in-memory store",
|
||||||
|
" • Add session invalidation on password/email changes",
|
||||||
|
" • Review and restrict CORS origins for production",
|
||||||
|
" • Add security headers middleware",
|
||||||
|
"",
|
||||||
|
"🔴 Confidence: 94% | Context: 1 previous questions"
|
||||||
|
])
|
||||||
|
|
||||||
|
# Step 4: Third question - Performance issues
|
||||||
|
print("Step 4: Exploring performance (conversation continues)")
|
||||||
|
print("-" * 54)
|
||||||
|
self.type_question("Why might the login process be slow?")
|
||||||
|
|
||||||
|
self.show_thinking(2.2)
|
||||||
|
|
||||||
|
self.show_response([
|
||||||
|
"",
|
||||||
|
"🧠 EXPLORATION ANALYSIS (Question #3)",
|
||||||
|
"Session: 2.1m | Results: 7 | Time: 2.8s",
|
||||||
|
"=" * 60,
|
||||||
|
"",
|
||||||
|
"📝 Analysis:",
|
||||||
|
" Considering our previous discussion about JWT auth and security,",
|
||||||
|
" the login slowness likely stems from several performance bottlenecks.",
|
||||||
|
"",
|
||||||
|
"🔍 Key Insights:",
|
||||||
|
" • bcrypt rounds set to 15 (recommended: 10-12) causing CPU overhead",
|
||||||
|
" • Database queries for user lookup aren't indexed on email field",
|
||||||
|
" • Redis connection pool not configured, creating new connections",
|
||||||
|
" • OAuth2 provider calls lack timeout settings (can hang)",
|
||||||
|
" • Session cleanup runs synchronously during login flow",
|
||||||
|
"",
|
||||||
|
"💡 Code Examples:",
|
||||||
|
" Slow: bcrypt.hashpw(password, bcrypt.gensalt(rounds=15))",
|
||||||
|
" Missing: CREATE INDEX idx_users_email ON users(email)",
|
||||||
|
" Blocking: cleanup_expired_sessions() # Should be async",
|
||||||
|
"",
|
||||||
|
"🎯 Next Steps:",
|
||||||
|
" • Reduce bcrypt rounds to 12 for better balance",
|
||||||
|
" • Add database index on users.email column",
|
||||||
|
" • Configure Redis connection pooling",
|
||||||
|
" • Move session cleanup to background task",
|
||||||
|
" • Add OAuth2 request timeouts (5-10 seconds)",
|
||||||
|
"",
|
||||||
|
"🟡 Confidence: 89% | Context: 2 previous questions"
|
||||||
|
])
|
||||||
|
|
||||||
|
# Step 5: Session summary
|
||||||
|
print("Step 5: Session summary and conclusion")
|
||||||
|
print("-" * 38)
|
||||||
|
self.type_question("summary")
|
||||||
|
|
||||||
|
self.show_response([
|
||||||
|
"",
|
||||||
|
"🧠 EXPLORATION SESSION SUMMARY",
|
||||||
|
"=" * 40,
|
||||||
|
"Project: webapp-project",
|
||||||
|
"Session ID: explore_1702845632",
|
||||||
|
"Duration: 2.5 minutes",
|
||||||
|
"Questions explored: 3",
|
||||||
|
"",
|
||||||
|
"📋 Topics explored:",
|
||||||
|
" 1. How does authentication work in this codebase? (confidence: 91%)",
|
||||||
|
" 2. What security concerns should I be aware of? (confidence: 94%)",
|
||||||
|
" 3. Why might the login process be slow? (confidence: 89%)",
|
||||||
|
"",
|
||||||
|
"🎯 Key Discoveries:",
|
||||||
|
" • JWT-based auth with session management",
|
||||||
|
" • Critical security issues (hardcoded secrets, no rate limiting)",
|
||||||
|
" • Performance bottlenecks (bcrypt settings, missing indexes)",
|
||||||
|
"",
|
||||||
|
"💡 Action Items Generated:",
|
||||||
|
" • Immediate: Fix hardcoded JWT secret",
|
||||||
|
" • High Priority: Add rate limiting and database indexes",
|
||||||
|
" • Monitor: Review OAuth2 configurations"
|
||||||
|
])
|
||||||
|
|
||||||
|
# Step 6: Exit
|
||||||
|
self.type_question("quit")
|
||||||
|
|
||||||
|
self.show_response([
|
||||||
|
"",
|
||||||
|
"✅ Exploration session ended.",
|
||||||
|
"",
|
||||||
|
"🎬 This was Exploration Mode - perfect for learning and debugging!"
|
||||||
|
])
|
||||||
|
|
||||||
|
# Final summary
|
||||||
|
print()
|
||||||
|
print("💡 Exploration Mode Benefits:")
|
||||||
|
print(" 🧠 Thinking-enabled AI for detailed reasoning")
|
||||||
|
print(" 💭 Conversation memory across questions")
|
||||||
|
print(" 🔍 Perfect for debugging and understanding")
|
||||||
|
print(" 📚 Educational - learn how code really works")
|
||||||
|
print(" 🎯 Context-aware follow-up responses")
|
||||||
|
print()
|
||||||
|
time.sleep(3)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Run the exploration mode demo."""
|
||||||
|
demo = ExplorationDemoSimulator()
|
||||||
|
|
||||||
|
print("Starting FSS-Mini-RAG Exploration Mode Demo...")
|
||||||
|
print("Record with: asciinema rec exploration_demo.cast")
|
||||||
|
print("Press Enter to start...")
|
||||||
|
input()
|
||||||
|
|
||||||
|
demo.run_exploration_demo()
|
||||||
|
|
||||||
|
print("\n🎯 To create GIF:")
|
||||||
|
print("agg exploration_demo.cast exploration_demo.gif")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
178
create_synthesis_demo.py
Normal file
178
create_synthesis_demo.py
Normal file
@ -0,0 +1,178 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Create demo GIF for Synthesis Mode - Fast & Consistent RAG Search
|
||||||
|
Shows the streamlined workflow for quick answers and code discovery.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import time
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
class SynthesisDemoSimulator:
|
||||||
|
def __init__(self):
|
||||||
|
self.width = 100
|
||||||
|
self.height = 30
|
||||||
|
|
||||||
|
def clear_screen(self):
|
||||||
|
print("\033[H\033[2J", end="")
|
||||||
|
|
||||||
|
def type_command(self, command: str, delay: float = 0.05):
|
||||||
|
"""Simulate typing a command."""
|
||||||
|
print("$ ", end="", flush=True)
|
||||||
|
for char in command:
|
||||||
|
print(char, end="", flush=True)
|
||||||
|
time.sleep(delay)
|
||||||
|
print()
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
def show_output(self, lines: list, delay: float = 0.3):
|
||||||
|
"""Show command output with realistic timing."""
|
||||||
|
for line in lines:
|
||||||
|
print(line)
|
||||||
|
time.sleep(delay)
|
||||||
|
time.sleep(1.0)
|
||||||
|
|
||||||
|
def run_synthesis_demo(self):
|
||||||
|
"""Run the synthesis mode demonstration."""
|
||||||
|
self.clear_screen()
|
||||||
|
|
||||||
|
# Title
|
||||||
|
print("🚀 FSS-Mini-RAG: Synthesis Mode Demo")
|
||||||
|
print("=" * 50)
|
||||||
|
print("Fast & consistent RAG search for quick answers")
|
||||||
|
print()
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
# Step 1: Index a project
|
||||||
|
print("Step 1: Index a sample project")
|
||||||
|
print("-" * 30)
|
||||||
|
self.type_command("rag-mini index ./sample-project")
|
||||||
|
|
||||||
|
self.show_output([
|
||||||
|
"📁 Indexing project: sample-project",
|
||||||
|
"🔍 Found 12 files to process",
|
||||||
|
"✂️ Creating semantic chunks...",
|
||||||
|
"🧠 Generating embeddings...",
|
||||||
|
"💾 Building vector index...",
|
||||||
|
"✅ Indexed 89 chunks from 12 files in 3.2s",
|
||||||
|
"",
|
||||||
|
"💡 Try: rag-mini search ./sample-project \"your search here\""
|
||||||
|
])
|
||||||
|
|
||||||
|
# Step 2: Quick search
|
||||||
|
print("Step 2: Quick semantic search")
|
||||||
|
print("-" * 30)
|
||||||
|
self.type_command("rag-mini search ./sample-project \"user authentication\"")
|
||||||
|
|
||||||
|
self.show_output([
|
||||||
|
"🔍 Searching \"user authentication\" in sample-project",
|
||||||
|
"✅ Found 5 results:",
|
||||||
|
"",
|
||||||
|
"1. auth/models.py",
|
||||||
|
" Score: 0.923",
|
||||||
|
" Lines: 45-62",
|
||||||
|
" Context: User class",
|
||||||
|
" Content:",
|
||||||
|
" class User:",
|
||||||
|
" def authenticate(self, password):",
|
||||||
|
" return bcrypt.checkpw(password, self.password_hash)",
|
||||||
|
"",
|
||||||
|
"2. auth/views.py",
|
||||||
|
" Score: 0.887",
|
||||||
|
" Lines: 23-41",
|
||||||
|
" Context: login_view function",
|
||||||
|
" Content:",
|
||||||
|
" def login_view(request):",
|
||||||
|
" user = authenticate(username, password)",
|
||||||
|
" if user:",
|
||||||
|
" login(request, user)",
|
||||||
|
"",
|
||||||
|
"3. middleware/auth.py",
|
||||||
|
" Score: 0.845",
|
||||||
|
" Content: Authentication middleware checking..."
|
||||||
|
])
|
||||||
|
|
||||||
|
# Step 3: Search with AI synthesis
|
||||||
|
print("Step 3: Add AI synthesis for deeper understanding")
|
||||||
|
print("-" * 50)
|
||||||
|
self.type_command("rag-mini search ./sample-project \"error handling\" --synthesize")
|
||||||
|
|
||||||
|
self.show_output([
|
||||||
|
"🔍 Searching \"error handling\" in sample-project",
|
||||||
|
"🧠 Generating LLM synthesis...",
|
||||||
|
"✅ Found 4 results:",
|
||||||
|
"",
|
||||||
|
"1. utils/exceptions.py",
|
||||||
|
" Score: 0.934",
|
||||||
|
" Content: Custom exception classes for API errors...",
|
||||||
|
"",
|
||||||
|
"2. api/handlers.py",
|
||||||
|
" Score: 0.889",
|
||||||
|
" Content: Global exception handler with logging...",
|
||||||
|
"",
|
||||||
|
"🧠 LLM SYNTHESIS",
|
||||||
|
"=" * 50,
|
||||||
|
"",
|
||||||
|
"📝 Summary:",
|
||||||
|
" This codebase implements a robust error handling system with",
|
||||||
|
" custom exceptions, global handlers, and structured logging.",
|
||||||
|
"",
|
||||||
|
"🔍 Key Findings:",
|
||||||
|
" • Custom exception hierarchy in utils/exceptions.py",
|
||||||
|
" • Global error handler catches all API exceptions",
|
||||||
|
" • Logging integrated with error tracking service",
|
||||||
|
"",
|
||||||
|
"💡 Code Patterns:",
|
||||||
|
" try/except blocks with specific exception types",
|
||||||
|
" Centralized error response formatting",
|
||||||
|
"",
|
||||||
|
"🎯 Suggested Actions:",
|
||||||
|
" • Review exception hierarchy for completeness",
|
||||||
|
" • Consider adding error recovery mechanisms",
|
||||||
|
"",
|
||||||
|
"🟢 Confidence: 87%"
|
||||||
|
])
|
||||||
|
|
||||||
|
# Step 4: Show performance
|
||||||
|
print("Step 4: Performance characteristics")
|
||||||
|
print("-" * 35)
|
||||||
|
print("⚡ Synthesis Mode Benefits:")
|
||||||
|
print(" • Lightning fast responses (no thinking overhead)")
|
||||||
|
print(" • Consistent, reliable results")
|
||||||
|
print(" • Perfect for code discovery and quick answers")
|
||||||
|
print(" • Works great with ultra-efficient models (qwen3:0.6b)")
|
||||||
|
print()
|
||||||
|
time.sleep(3)
|
||||||
|
|
||||||
|
# Step 5: When to use
|
||||||
|
print("💡 When to use Synthesis Mode:")
|
||||||
|
print(" ✅ Quick code lookups")
|
||||||
|
print(" ✅ Finding specific functions or classes")
|
||||||
|
print(" ✅ Understanding code structure")
|
||||||
|
print(" ✅ Fast documentation searches")
|
||||||
|
print(" ✅ Batch processing multiple queries")
|
||||||
|
print()
|
||||||
|
|
||||||
|
print("🧠 For deeper analysis, try: rag-mini explore ./project")
|
||||||
|
print()
|
||||||
|
time.sleep(3)
|
||||||
|
|
||||||
|
print("🎬 Demo complete! This was Synthesis Mode - optimized for speed.")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Run the synthesis mode demo."""
|
||||||
|
demo = SynthesisDemoSimulator()
|
||||||
|
|
||||||
|
print("Starting FSS-Mini-RAG Synthesis Mode Demo...")
|
||||||
|
print("Record with: asciinema rec synthesis_demo.cast")
|
||||||
|
print("Press Enter to start...")
|
||||||
|
input()
|
||||||
|
|
||||||
|
demo.run_synthesis_demo()
|
||||||
|
|
||||||
|
print("\n🎯 To create GIF:")
|
||||||
|
print("agg synthesis_demo.cast synthesis_demo.gif")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Loading…
x
Reference in New Issue
Block a user