From 5f42751e9a82f883f9676fad94e112a80aac275c Mon Sep 17 00:00:00 2001 From: BobAi Date: Tue, 12 Aug 2025 19:07:48 +1000 Subject: [PATCH] =?UTF-8?q?=F0=9F=9B=A1=EF=B8=8F=20Add=20comprehensive=20L?= =?UTF-8?q?LM=20safeguards=20and=20dual-mode=20demo=20scripts?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit šŸ›”ļø SMART MODEL SAFEGUARDS: - Implement runaway prevention with pattern detection (repetition, thinking loops, rambling) - Add context length management with optimal parameters per model size - Quality validation prevents problematic responses before reaching users - Helpful explanations when issues occur with recovery suggestions - Model-specific parameter optimization (qwen3:0.6b vs 1.7b vs 3b+) - Timeout protection and graceful degradation ⚔ OPTIMAL PERFORMANCE SETTINGS: - Context window: 32k tokens for good balance - Repeat penalty: 1.15 for 0.6b, 1.1 for 1.7b, 1.05 for larger models - Presence penalty: 1.5 for quantized models to prevent repetition - Smart output limits: 1500 tokens for 0.6b, 2000+ for larger models - Top-p/top-k tuning based on research best practices šŸŽ¬ DUAL-MODE DEMO SCRIPTS: - create_synthesis_demo.py: Shows fast search with AI synthesis workflow - create_exploration_demo.py: Interactive thinking mode with conversation memory - Realistic typing simulation and response timing for quality GIFs - Clear demonstration of when to use each mode Perfect for creating compelling demo videos showing both RAG experiences! --- claude_rag/llm_safeguards.py | 320 ++++++++++++++++++++++++++++++++++ claude_rag/llm_synthesizer.py | 65 ++++++- create_exploration_demo.py | 270 ++++++++++++++++++++++++++++ create_synthesis_demo.py | 178 +++++++++++++++++++ 4 files changed, 828 insertions(+), 5 deletions(-) create mode 100644 claude_rag/llm_safeguards.py create mode 100644 create_exploration_demo.py create mode 100644 create_synthesis_demo.py diff --git a/claude_rag/llm_safeguards.py b/claude_rag/llm_safeguards.py new file mode 100644 index 0000000..f6fa474 --- /dev/null +++ b/claude_rag/llm_safeguards.py @@ -0,0 +1,320 @@ +#!/usr/bin/env python3 +""" +LLM Safeguards for Small Model Management + +Provides runaway prevention, context management, and intelligent detection +of problematic model behaviors to ensure reliable user experience. +""" + +import re +import time +import logging +from typing import Optional, Dict, List, Tuple +from dataclasses import dataclass + +logger = logging.getLogger(__name__) + +@dataclass +class SafeguardConfig: + """Configuration for LLM safeguards.""" + max_output_tokens: int = 2000 # Prevent excessive generation + max_repetition_ratio: float = 0.3 # Max ratio of repeated content + max_response_time: int = 60 # Max seconds for response + min_useful_length: int = 20 # Minimum useful response length + context_window: int = 32768 # Ollama context window + enable_thinking_detection: bool = True # Detect thinking patterns + +class ModelRunawayDetector: + """Detects and prevents model runaway behaviors.""" + + def __init__(self, config: SafeguardConfig = None): + self.config = config or SafeguardConfig() + self.response_patterns = self._compile_patterns() + + def _compile_patterns(self) -> Dict[str, re.Pattern]: + """Compile regex patterns for runaway detection.""" + return { + # Excessive repetition patterns + 'word_repetition': re.compile(r'\b(\w+)\b(?:\s+\1\b){3,}', re.IGNORECASE), + 'phrase_repetition': re.compile(r'(.{10,50}?)\1{2,}', re.DOTALL), + + # Thinking loop patterns (small models get stuck) + 'thinking_loop': re.compile(r'(let me think|i think|thinking|consider|actually|wait|hmm|well)\s*[.,:]*\s*\1', re.IGNORECASE), + + # Rambling patterns + 'excessive_filler': re.compile(r'\b(um|uh|well|you know|like|basically|actually|so|then|and|but|however)\b(?:\s+[^.!?]*){5,}', re.IGNORECASE), + + # JSON corruption patterns + 'broken_json': re.compile(r'\{[^}]*\{[^}]*\{'), # Nested broken JSON + 'json_repetition': re.compile(r'("[\w_]+"\s*:\s*"[^"]*",?\s*){4,}'), # Repeated JSON fields + } + + def check_response_quality(self, response: str, query: str, start_time: float) -> Tuple[bool, Optional[str], Optional[str]]: + """ + Check response quality and detect runaway behaviors. + + Returns: + (is_valid, issue_type, user_explanation) + """ + if not response or len(response.strip()) < self.config.min_useful_length: + return False, "too_short", self._explain_too_short() + + # Check response time + elapsed = time.time() - start_time + if elapsed > self.config.max_response_time: + return False, "timeout", self._explain_timeout() + + # Check for repetition issues + repetition_issue = self._check_repetition(response) + if repetition_issue: + return False, repetition_issue, self._explain_repetition(repetition_issue) + + # Check for thinking loops + if self.config.enable_thinking_detection: + thinking_issue = self._check_thinking_loops(response) + if thinking_issue: + return False, thinking_issue, self._explain_thinking_loop() + + # Check for rambling + rambling_issue = self._check_rambling(response) + if rambling_issue: + return False, rambling_issue, self._explain_rambling() + + # Check JSON corruption (for structured responses) + if '{' in response and '}' in response: + json_issue = self._check_json_corruption(response) + if json_issue: + return False, json_issue, self._explain_json_corruption() + + return True, None, None + + def _check_repetition(self, response: str) -> Optional[str]: + """Check for excessive repetition.""" + # Word repetition + if self.response_patterns['word_repetition'].search(response): + return "word_repetition" + + # Phrase repetition + if self.response_patterns['phrase_repetition'].search(response): + return "phrase_repetition" + + # Calculate repetition ratio + words = response.split() + if len(words) > 10: + unique_words = set(words) + repetition_ratio = 1 - (len(unique_words) / len(words)) + if repetition_ratio > self.config.max_repetition_ratio: + return "high_repetition_ratio" + + return None + + def _check_thinking_loops(self, response: str) -> Optional[str]: + """Check for thinking loops (common in small models).""" + if self.response_patterns['thinking_loop'].search(response): + return "thinking_loop" + + # Check for excessive meta-commentary + thinking_words = ['think', 'considering', 'actually', 'wait', 'hmm', 'let me'] + thinking_count = sum(response.lower().count(word) for word in thinking_words) + + if thinking_count > 5 and len(response.split()) < 200: + return "excessive_thinking" + + return None + + def _check_rambling(self, response: str) -> Optional[str]: + """Check for rambling or excessive filler.""" + if self.response_patterns['excessive_filler'].search(response): + return "excessive_filler" + + # Check for extremely long sentences (sign of rambling) + sentences = re.split(r'[.!?]+', response) + long_sentences = [s for s in sentences if len(s.split()) > 50] + + if len(long_sentences) > 2: + return "excessive_rambling" + + return None + + def _check_json_corruption(self, response: str) -> Optional[str]: + """Check for JSON corruption in structured responses.""" + if self.response_patterns['broken_json'].search(response): + return "broken_json" + + if self.response_patterns['json_repetition'].search(response): + return "json_repetition" + + return None + + def _explain_too_short(self) -> str: + return """šŸ¤” The AI response was too short to be helpful. + +**Why this happens:** +• The model might be confused by the query +• Context might be insufficient +• Model might be overloaded + +**What to try:** +• Rephrase your question more specifically +• Try a broader search term first +• Use exploration mode for complex questions: `rag-mini explore`""" + + def _explain_timeout(self) -> str: + return """ā±ļø The AI took too long to respond (over 60 seconds). + +**Why this happens:** +• Small models sometimes get "stuck" thinking +• Complex queries can overwhelm smaller models +• System might be under load + +**What to try:** +• Try a simpler, more direct question +• Use synthesis mode for faster responses: `--synthesize` +• Consider using a larger model if available""" + + def _explain_repetition(self, issue_type: str) -> str: + return f"""šŸ”„ The AI got stuck in repetition loops ({issue_type}). + +**Why this happens:** +• Small models sometimes repeat when uncertain +• Query might be too complex for the model size +• Context window might be exceeded + +**What to try:** +• Try a more specific question +• Break complex questions into smaller parts +• Use exploration mode which handles context better: `rag-mini explore` +• Consider: A larger model (qwen3:1.7b or qwen3:3b) would help""" + + def _explain_thinking_loop(self) -> str: + return """🧠 The AI got caught in a "thinking loop" - overthinking the response. + +**Why this happens:** +• Small models sometimes over-analyze simple questions +• Thinking mode can cause loops in smaller models +• Query complexity exceeds model capabilities + +**What to try:** +• Ask more direct, specific questions +• Use synthesis mode (no thinking) for faster results +• Try: "What does this code do?" instead of "Explain how this works" +• Larger models (qwen3:1.7b+) handle thinking better""" + + def _explain_rambling(self) -> str: + return """šŸ’­ The AI started rambling instead of giving focused answers. + +**Why this happens:** +• Small models sometimes lose focus on complex topics +• Query might be too broad or vague +• Model trying to cover too much at once + +**What to try:** +• Ask more specific questions +• Break broad questions into focused parts +• Example: "How is data validated?" instead of "Explain the whole system" +• Exploration mode helps maintain focus across questions""" + + def _explain_json_corruption(self) -> str: + return """šŸ”§ The AI response format got corrupted. + +**Why this happens:** +• Small models sometimes struggle with structured output +• Context limits can cause format errors +• Complex analysis might overwhelm formatting + +**What to try:** +• Try the question again (often resolves itself) +• Use simpler questions for better formatting +• Synthesis mode sometimes gives cleaner output +• This is less common with larger models""" + + def get_recovery_suggestions(self, issue_type: str, query: str) -> List[str]: + """Get specific recovery suggestions based on the issue.""" + suggestions = [] + + if issue_type in ['thinking_loop', 'excessive_thinking']: + suggestions.extend([ + f"Try synthesis mode: `rag-mini search . \"{query}\" --synthesize`", + "Ask more direct questions without 'why' or 'how'", + "Break complex questions into smaller parts" + ]) + + elif issue_type in ['word_repetition', 'phrase_repetition', 'high_repetition_ratio']: + suggestions.extend([ + "Try rephrasing your question completely", + "Use more specific technical terms", + f"Try exploration mode: `rag-mini explore .`" + ]) + + elif issue_type == 'timeout': + suggestions.extend([ + "Try a simpler version of your question", + "Use synthesis mode for faster responses", + "Check if Ollama is under heavy load" + ]) + + # Universal suggestions + suggestions.extend([ + "Consider using a larger model if available (qwen3:1.7b or qwen3:3b)", + "Check model status: `ollama list`" + ]) + + return suggestions + +def get_optimal_ollama_parameters(model_name: str) -> Dict[str, any]: + """Get optimal parameters for different Ollama models.""" + + base_params = { + "num_ctx": 32768, # Good context window for most uses + "num_predict": 2000, # Reasonable response length + "temperature": 0.3, # Balanced creativity/consistency + } + + # Model-specific optimizations + if "qwen3:0.6b" in model_name.lower(): + return { + **base_params, + "repeat_penalty": 1.15, # Prevent repetition in small model + "presence_penalty": 1.5, # Suppress repetitive outputs + "top_p": 0.8, # Focused sampling + "top_k": 20, # Limit choices + "num_predict": 1500, # Shorter responses for reliability + } + + elif "qwen3:1.7b" in model_name.lower(): + return { + **base_params, + "repeat_penalty": 1.1, # Less aggressive for larger model + "presence_penalty": 1.0, # Balanced + "top_p": 0.9, # More creative + "top_k": 40, # More choices + } + + elif any(size in model_name.lower() for size in ["3b", "7b", "8b"]): + return { + **base_params, + "repeat_penalty": 1.05, # Minimal for larger models + "presence_penalty": 0.5, # Light touch + "top_p": 0.95, # High creativity + "top_k": 50, # Many choices + "num_predict": 3000, # Longer responses OK + } + + return base_params + +# Quick test +def test_safeguards(): + """Test the safeguard system.""" + detector = ModelRunawayDetector() + + # Test repetition detection + bad_response = "The user authentication system works by checking user credentials. The user authentication system works by checking user credentials. The user authentication system works by checking user credentials." + + is_valid, issue, explanation = detector.check_response_quality(bad_response, "auth", time.time()) + + print(f"Repetition test: Valid={is_valid}, Issue={issue}") + if explanation: + print(explanation) + +if __name__ == "__main__": + test_safeguards() \ No newline at end of file diff --git a/claude_rag/llm_synthesizer.py b/claude_rag/llm_synthesizer.py index 538b851..0bf8503 100644 --- a/claude_rag/llm_synthesizer.py +++ b/claude_rag/llm_synthesizer.py @@ -8,11 +8,20 @@ Takes raw search results and generates coherent, contextual summaries. import json import logging +import time from typing import List, Dict, Any, Optional from dataclasses import dataclass import requests from pathlib import Path +try: + from .llm_safeguards import ModelRunawayDetector, SafeguardConfig, get_optimal_ollama_parameters +except ImportError: + # Graceful fallback if safeguards not available + ModelRunawayDetector = None + SafeguardConfig = None + get_optimal_ollama_parameters = lambda x: {} + logger = logging.getLogger(__name__) @dataclass @@ -34,6 +43,12 @@ class LLMSynthesizer: self.enable_thinking = enable_thinking # Default False for synthesis mode self._initialized = False + # Initialize safeguards + if ModelRunawayDetector: + self.safeguard_detector = ModelRunawayDetector(SafeguardConfig()) + else: + self.safeguard_detector = None + def _get_available_models(self) -> List[str]: """Get list of available Ollama models.""" try: @@ -129,7 +144,9 @@ class LLMSynthesizer: return len(self.available_models) > 0 def _call_ollama(self, prompt: str, temperature: float = 0.3, disable_thinking: bool = False) -> Optional[str]: - """Make a call to Ollama API.""" + """Make a call to Ollama API with safeguards.""" + start_time = time.time() + try: # Use the best available model model_to_use = self.model @@ -147,26 +164,46 @@ class LLMSynthesizer: if not final_prompt.endswith(" "): final_prompt += " " + # Get optimal parameters for this model + optimal_params = get_optimal_ollama_parameters(model_to_use) + payload = { "model": model_to_use, "prompt": final_prompt, "stream": False, "options": { "temperature": temperature, - "top_p": 0.9, - "top_k": 40 + "top_p": optimal_params.get("top_p", 0.9), + "top_k": optimal_params.get("top_k", 40), + "num_ctx": optimal_params.get("num_ctx", 32768), + "num_predict": optimal_params.get("num_predict", 2000), + "repeat_penalty": optimal_params.get("repeat_penalty", 1.1), + "presence_penalty": optimal_params.get("presence_penalty", 1.0) } } response = requests.post( f"{self.ollama_url}/api/generate", json=payload, - timeout=30 + timeout=65 # Slightly longer than safeguard timeout ) if response.status_code == 200: result = response.json() - return result.get('response', '').strip() + raw_response = result.get('response', '').strip() + + # Apply safeguards to check response quality + if self.safeguard_detector and raw_response: + is_valid, issue_type, explanation = self.safeguard_detector.check_response_quality( + raw_response, prompt[:100], start_time # First 100 chars of prompt for context + ) + + if not is_valid: + logger.warning(f"Safeguard triggered: {issue_type}") + # Return a safe explanation instead of the problematic response + return self._create_safeguard_response(issue_type, explanation, prompt) + + return raw_response else: logger.error(f"Ollama API error: {response.status_code}") return None @@ -175,6 +212,24 @@ class LLMSynthesizer: logger.error(f"Ollama call failed: {e}") return None + def _create_safeguard_response(self, issue_type: str, explanation: str, original_prompt: str) -> str: + """Create a helpful response when safeguards are triggered.""" + return f"""āš ļø Model Response Issue Detected + +{explanation} + +**Original query context:** {original_prompt[:200]}{'...' if len(original_prompt) > 200 else ''} + +**What happened:** The AI model encountered a common issue with small language models and was prevented from giving a problematic response. + +**Your options:** +1. **Try again**: Ask the same question (often resolves itself) +2. **Rephrase**: Make your question more specific or break it into parts +3. **Use exploration mode**: `rag-mini explore` for complex questions +4. **Different approach**: Try synthesis mode: `--synthesize` for simpler responses + +This is normal with smaller AI models and helps ensure you get quality responses.""" + def synthesize_search_results(self, query: str, results: List[Any], project_path: Path) -> SynthesisResult: """Synthesize search results into a coherent summary.""" diff --git a/create_exploration_demo.py b/create_exploration_demo.py new file mode 100644 index 0000000..67f264d --- /dev/null +++ b/create_exploration_demo.py @@ -0,0 +1,270 @@ +#!/usr/bin/env python3 +""" +Create demo GIF for Exploration Mode - Deep Thinking & Interactive Learning +Shows the conversational workflow for understanding and debugging codebases. +""" + +import time +import sys +import os +from pathlib import Path + +class ExplorationDemoSimulator: + def __init__(self): + self.width = 100 + self.height = 35 + + def clear_screen(self): + print("\033[H\033[2J", end="") + + def type_command(self, command: str, delay: float = 0.05): + """Simulate typing a command.""" + print("$ ", end="", flush=True) + for char in command: + print(char, end="", flush=True) + time.sleep(delay) + print() + time.sleep(0.5) + + def type_question(self, question: str, delay: float = 0.04): + """Simulate typing a question in exploration mode.""" + print("> ", end="", flush=True) + for char in question: + print(char, end="", flush=True) + time.sleep(delay) + print() + time.sleep(0.5) + + def show_thinking(self, duration: float = 2.0): + """Show thinking animation.""" + print("šŸ” Analyzing...", end="", flush=True) + for _ in range(3): + time.sleep(duration / 3) + print(".", end="", flush=True) + print() + time.sleep(0.5) + + def show_response(self, lines: list, delay: float = 0.4): + """Show AI response with realistic timing.""" + for line in lines: + print(line) + time.sleep(delay) + time.sleep(1.5) + + def run_exploration_demo(self): + """Run the exploration mode demonstration.""" + self.clear_screen() + + # Title + print("🧠 FSS-Mini-RAG: Exploration Mode Demo") + print("=" * 55) + print("Deep thinking & interactive learning for complex codebases") + print() + time.sleep(2) + + # Step 1: Start exploration + print("Step 1: Start interactive exploration session") + print("-" * 45) + self.type_command("rag-mini explore ./webapp-project") + + self.show_response([ + "🧠 EXPLORATION MODE STARTED", + "=" * 50, + "Project: webapp-project", + "Session: explore_1702845632", + "", + "šŸŽÆ This mode uses thinking and remembers context.", + " Perfect for debugging, learning, and deep exploration.", + "", + "šŸ’” Tips:", + " • Ask follow-up questions - I'll remember our conversation", + " • Use 'why', 'how', 'explain' for detailed reasoning", + " • Type 'quit' or 'exit' to end session", + "", + "=" * 50, + "", + "šŸ¤” Ask your first question about the codebase:" + ]) + + # Step 2: First question - How authentication works + print("Step 2: Understanding authentication system") + print("-" * 42) + self.type_question("How does authentication work in this codebase?") + + self.show_thinking() + + self.show_response([ + "", + "🧠 EXPLORATION ANALYSIS (Question #1)", + "Session: 0.2m | Results: 8 | Time: 3.4s", + "=" * 60, + "", + "šŸ“ Analysis:", + " This webapp implements a JWT-based authentication system with", + " session management, role-based permissions, and OAuth2 integration.", + "", + "šŸ” Key Insights:", + " • JWT tokens generated in auth/jwt_handler.py with 24h expiry", + " • User sessions stored in Redis for fast lookups and invalidation", + " • Role-based access control through middleware/permissions.py", + " • OAuth2 integration supports Google, GitHub, and Microsoft providers", + " • Password hashing uses bcrypt with 12 rounds for security", + "", + "šŸ’” Code Examples:", + " JWT generation: jwt.encode(payload, SECRET_KEY, algorithm='HS256')", + " Session storage: redis.setex(f'session:{user_id}', 86400, token)", + " Permission check: @require_permission('admin') decorator", + "", + "šŸŽÆ Next Steps:", + " • Review JWT secret rotation policy", + " • Consider implementing refresh tokens", + " • Audit OAuth2 scope permissions", + "", + "🟢 Confidence: 91%" + ]) + + # Step 3: Follow-up question - Security concerns + print("Step 3: Follow-up question (context remembered)") + print("-" * 48) + self.type_question("What security concerns should I be aware of?") + + self.show_thinking(1.8) + + self.show_response([ + "", + "🧠 EXPLORATION ANALYSIS (Question #2)", + "Session: 1.4m | Results: 6 | Time: 2.1s", + "=" * 60, + "", + "šŸ“ Analysis:", + " Building on the JWT authentication system we just discussed,", + " I've identified several security considerations that need attention.", + "", + "šŸ” Key Insights:", + " • JWT secret is hardcoded in config.py (CRITICAL RISK)", + " • No rate limiting on login endpoints (brute force vulnerable)", + " • Session invalidation only happens on logout, not on password change", + " • OAuth2 state parameter validation could be strengthened", + " • CORS settings allow all origins in development config", + "", + "šŸ’” Code Examples:", + " Vulnerable: SECRET_KEY = 'hardcoded-secret-123'", + " Missing: @rate_limit('5 per minute') on /api/login", + " Risk: CORS_ALLOW_ALL_ORIGINS = True", + "", + "šŸŽÆ Next Steps:", + " • Move JWT secret to environment variables immediately", + " • Implement rate limiting with Redis or in-memory store", + " • Add session invalidation on password/email changes", + " • Review and restrict CORS origins for production", + " • Add security headers middleware", + "", + "šŸ”“ Confidence: 94% | Context: 1 previous questions" + ]) + + # Step 4: Third question - Performance issues + print("Step 4: Exploring performance (conversation continues)") + print("-" * 54) + self.type_question("Why might the login process be slow?") + + self.show_thinking(2.2) + + self.show_response([ + "", + "🧠 EXPLORATION ANALYSIS (Question #3)", + "Session: 2.1m | Results: 7 | Time: 2.8s", + "=" * 60, + "", + "šŸ“ Analysis:", + " Considering our previous discussion about JWT auth and security,", + " the login slowness likely stems from several performance bottlenecks.", + "", + "šŸ” Key Insights:", + " • bcrypt rounds set to 15 (recommended: 10-12) causing CPU overhead", + " • Database queries for user lookup aren't indexed on email field", + " • Redis connection pool not configured, creating new connections", + " • OAuth2 provider calls lack timeout settings (can hang)", + " • Session cleanup runs synchronously during login flow", + "", + "šŸ’” Code Examples:", + " Slow: bcrypt.hashpw(password, bcrypt.gensalt(rounds=15))", + " Missing: CREATE INDEX idx_users_email ON users(email)", + " Blocking: cleanup_expired_sessions() # Should be async", + "", + "šŸŽÆ Next Steps:", + " • Reduce bcrypt rounds to 12 for better balance", + " • Add database index on users.email column", + " • Configure Redis connection pooling", + " • Move session cleanup to background task", + " • Add OAuth2 request timeouts (5-10 seconds)", + "", + "🟔 Confidence: 89% | Context: 2 previous questions" + ]) + + # Step 5: Session summary + print("Step 5: Session summary and conclusion") + print("-" * 38) + self.type_question("summary") + + self.show_response([ + "", + "🧠 EXPLORATION SESSION SUMMARY", + "=" * 40, + "Project: webapp-project", + "Session ID: explore_1702845632", + "Duration: 2.5 minutes", + "Questions explored: 3", + "", + "šŸ“‹ Topics explored:", + " 1. How does authentication work in this codebase? (confidence: 91%)", + " 2. What security concerns should I be aware of? (confidence: 94%)", + " 3. Why might the login process be slow? (confidence: 89%)", + "", + "šŸŽÆ Key Discoveries:", + " • JWT-based auth with session management", + " • Critical security issues (hardcoded secrets, no rate limiting)", + " • Performance bottlenecks (bcrypt settings, missing indexes)", + "", + "šŸ’” Action Items Generated:", + " • Immediate: Fix hardcoded JWT secret", + " • High Priority: Add rate limiting and database indexes", + " • Monitor: Review OAuth2 configurations" + ]) + + # Step 6: Exit + self.type_question("quit") + + self.show_response([ + "", + "āœ… Exploration session ended.", + "", + "šŸŽ¬ This was Exploration Mode - perfect for learning and debugging!" + ]) + + # Final summary + print() + print("šŸ’” Exploration Mode Benefits:") + print(" 🧠 Thinking-enabled AI for detailed reasoning") + print(" šŸ’­ Conversation memory across questions") + print(" šŸ” Perfect for debugging and understanding") + print(" šŸ“š Educational - learn how code really works") + print(" šŸŽÆ Context-aware follow-up responses") + print() + time.sleep(3) + +def main(): + """Run the exploration mode demo.""" + demo = ExplorationDemoSimulator() + + print("Starting FSS-Mini-RAG Exploration Mode Demo...") + print("Record with: asciinema rec exploration_demo.cast") + print("Press Enter to start...") + input() + + demo.run_exploration_demo() + + print("\nšŸŽÆ To create GIF:") + print("agg exploration_demo.cast exploration_demo.gif") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/create_synthesis_demo.py b/create_synthesis_demo.py new file mode 100644 index 0000000..f8e65bc --- /dev/null +++ b/create_synthesis_demo.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python3 +""" +Create demo GIF for Synthesis Mode - Fast & Consistent RAG Search +Shows the streamlined workflow for quick answers and code discovery. +""" + +import time +import sys +import os +from pathlib import Path + +class SynthesisDemoSimulator: + def __init__(self): + self.width = 100 + self.height = 30 + + def clear_screen(self): + print("\033[H\033[2J", end="") + + def type_command(self, command: str, delay: float = 0.05): + """Simulate typing a command.""" + print("$ ", end="", flush=True) + for char in command: + print(char, end="", flush=True) + time.sleep(delay) + print() + time.sleep(0.5) + + def show_output(self, lines: list, delay: float = 0.3): + """Show command output with realistic timing.""" + for line in lines: + print(line) + time.sleep(delay) + time.sleep(1.0) + + def run_synthesis_demo(self): + """Run the synthesis mode demonstration.""" + self.clear_screen() + + # Title + print("šŸš€ FSS-Mini-RAG: Synthesis Mode Demo") + print("=" * 50) + print("Fast & consistent RAG search for quick answers") + print() + time.sleep(2) + + # Step 1: Index a project + print("Step 1: Index a sample project") + print("-" * 30) + self.type_command("rag-mini index ./sample-project") + + self.show_output([ + "šŸ“ Indexing project: sample-project", + "šŸ” Found 12 files to process", + "āœ‚ļø Creating semantic chunks...", + "🧠 Generating embeddings...", + "šŸ’¾ Building vector index...", + "āœ… Indexed 89 chunks from 12 files in 3.2s", + "", + "šŸ’” Try: rag-mini search ./sample-project \"your search here\"" + ]) + + # Step 2: Quick search + print("Step 2: Quick semantic search") + print("-" * 30) + self.type_command("rag-mini search ./sample-project \"user authentication\"") + + self.show_output([ + "šŸ” Searching \"user authentication\" in sample-project", + "āœ… Found 5 results:", + "", + "1. auth/models.py", + " Score: 0.923", + " Lines: 45-62", + " Context: User class", + " Content:", + " class User:", + " def authenticate(self, password):", + " return bcrypt.checkpw(password, self.password_hash)", + "", + "2. auth/views.py", + " Score: 0.887", + " Lines: 23-41", + " Context: login_view function", + " Content:", + " def login_view(request):", + " user = authenticate(username, password)", + " if user:", + " login(request, user)", + "", + "3. middleware/auth.py", + " Score: 0.845", + " Content: Authentication middleware checking..." + ]) + + # Step 3: Search with AI synthesis + print("Step 3: Add AI synthesis for deeper understanding") + print("-" * 50) + self.type_command("rag-mini search ./sample-project \"error handling\" --synthesize") + + self.show_output([ + "šŸ” Searching \"error handling\" in sample-project", + "🧠 Generating LLM synthesis...", + "āœ… Found 4 results:", + "", + "1. utils/exceptions.py", + " Score: 0.934", + " Content: Custom exception classes for API errors...", + "", + "2. api/handlers.py", + " Score: 0.889", + " Content: Global exception handler with logging...", + "", + "🧠 LLM SYNTHESIS", + "=" * 50, + "", + "šŸ“ Summary:", + " This codebase implements a robust error handling system with", + " custom exceptions, global handlers, and structured logging.", + "", + "šŸ” Key Findings:", + " • Custom exception hierarchy in utils/exceptions.py", + " • Global error handler catches all API exceptions", + " • Logging integrated with error tracking service", + "", + "šŸ’” Code Patterns:", + " try/except blocks with specific exception types", + " Centralized error response formatting", + "", + "šŸŽÆ Suggested Actions:", + " • Review exception hierarchy for completeness", + " • Consider adding error recovery mechanisms", + "", + "🟢 Confidence: 87%" + ]) + + # Step 4: Show performance + print("Step 4: Performance characteristics") + print("-" * 35) + print("⚔ Synthesis Mode Benefits:") + print(" • Lightning fast responses (no thinking overhead)") + print(" • Consistent, reliable results") + print(" • Perfect for code discovery and quick answers") + print(" • Works great with ultra-efficient models (qwen3:0.6b)") + print() + time.sleep(3) + + # Step 5: When to use + print("šŸ’” When to use Synthesis Mode:") + print(" āœ… Quick code lookups") + print(" āœ… Finding specific functions or classes") + print(" āœ… Understanding code structure") + print(" āœ… Fast documentation searches") + print(" āœ… Batch processing multiple queries") + print() + + print("🧠 For deeper analysis, try: rag-mini explore ./project") + print() + time.sleep(3) + + print("šŸŽ¬ Demo complete! This was Synthesis Mode - optimized for speed.") + +def main(): + """Run the synthesis mode demo.""" + demo = SynthesisDemoSimulator() + + print("Starting FSS-Mini-RAG Synthesis Mode Demo...") + print("Record with: asciinema rec synthesis_demo.cast") + print("Press Enter to start...") + input() + + demo.run_synthesis_demo() + + print("\nšŸŽÆ To create GIF:") + print("agg synthesis_demo.cast synthesis_demo.gif") + +if __name__ == "__main__": + main() \ No newline at end of file