Fss-Rag-Mini/tests/02_search_examples.py
BobAi 4166d0a362 Initial release: FSS-Mini-RAG - Lightweight semantic code search system
🎯 Complete transformation from 5.9GB bloated system to 70MB optimized solution

 Key Features:
- Hybrid embedding system (Ollama + ML fallback + hash backup)
- Intelligent chunking with language-aware parsing
- Semantic + BM25 hybrid search with rich context
- Zero-config portable design with graceful degradation
- Beautiful TUI for beginners + powerful CLI for experts
- Comprehensive documentation with 8+ Mermaid diagrams
- Professional animated demo (183KB optimized GIF)

🏗️ Architecture Highlights:
- LanceDB vector storage with streaming indexing
- Smart file tracking (size/mtime) to avoid expensive rehashing
- Progressive chunking: Markdown headers → Python functions → fixed-size
- Quality filtering: 200+ chars, 20+ words, 30% alphanumeric content
- Concurrent batch processing with error recovery

📦 Package Contents:
- Core engine: claude_rag/ (11 modules, 2,847 lines)
- Entry points: rag-mini (unified), rag-tui (beginner interface)
- Documentation: README + 6 guides with visual diagrams
- Assets: 3D icon, optimized demo GIF, recording tools
- Tests: 8 comprehensive integration and validation tests
- Examples: Usage patterns, config templates, dependency analysis

🎥 Demo System:
- Scripted demonstration showing 12 files → 58 chunks indexing
- Semantic search with multi-line result previews
- Complete workflow from TUI startup to CLI mastery
- Professional recording pipeline with asciinema + GIF conversion

🛡️ Security & Quality:
- Complete .gitignore with personal data protection
- Dependency optimization (removed python-dotenv)
- Code quality validation and educational test suite
- Agent-reviewed architecture and documentation

Ready for production use - copy folder, run ./rag-mini, start searching\!
2025-08-12 16:38:28 +10:00

135 lines
4.5 KiB
Python

#!/usr/bin/env python3
"""
Simple demo of the hybrid search system showing real results.
"""
import sys
from pathlib import Path
from rich.console import Console
from rich.syntax import Syntax
from rich.panel import Panel
from rich.table import Table
from claude_rag.search import CodeSearcher
console = Console()
def demo_search(project_path: Path):
"""Run demo searches showing the hybrid system in action."""
console.print("\n[bold cyan]Claude RAG Hybrid Search Demo[/bold cyan]\n")
# Initialize searcher
console.print("Initializing search system...")
searcher = CodeSearcher(project_path)
# Get index stats
stats = searcher.get_statistics()
if 'error' not in stats:
console.print(f"\n[green] Index ready:[/green] {stats['total_chunks']} chunks from {stats['unique_files']} files")
console.print(f"[dim]Languages: {', '.join(stats['languages'].keys())}[/dim]")
console.print(f"[dim]Chunk types: {', '.join(stats['chunk_types'].keys())}[/dim]\n")
# Demo queries
demos = [
{
'title': 'Keyword-Heavy Search',
'query': 'BM25Okapi rank_bm25 search scoring',
'description': 'This query has specific technical keywords that BM25 excels at finding',
'limit': 5
},
{
'title': 'Natural Language Query',
'query': 'how to build search index from database chunks',
'description': 'This semantic query benefits from transformer embeddings understanding intent',
'limit': 5
},
{
'title': 'Mixed Technical Query',
'query': 'vector embeddings for semantic code search with transformers',
'description': 'This hybrid query combines technical terms with conceptual understanding',
'limit': 5
},
{
'title': 'Function Search',
'query': 'search method implementation with filters',
'description': 'Looking for specific function implementations',
'limit': 5
}
]
for demo in demos:
console.rule(f"\n[bold yellow]{demo['title']}[/bold yellow]")
console.print(f"[dim]{demo['description']}[/dim]")
console.print(f"\n[cyan]Query:[/cyan] '{demo['query']}'")
# Run search with hybrid mode
results = searcher.search(
query=demo['query'],
limit=demo['limit'],
semantic_weight=0.7,
bm25_weight=0.3
)
if not results:
console.print("[red]No results found![/red]")
continue
console.print(f"\n[green]Found {len(results)} results:[/green]\n")
# Show each result
for i, result in enumerate(results, 1):
# Create result panel
header = f"#{i} {result.file_path}:{result.start_line}-{result.end_line}"
# Get code preview
lines = result.content.splitlines()
if len(lines) > 10:
preview_lines = lines[:8] + ['...'] + lines[-2:]
else:
preview_lines = lines
preview = '\n'.join(preview_lines)
# Create info table
info = Table.grid(padding=0)
info.add_column(style="cyan", width=12)
info.add_column(style="white")
info.add_row("Score:", f"{result.score:.3f}")
info.add_row("Type:", result.chunk_type)
info.add_row("Name:", result.name or "N/A")
info.add_row("Language:", result.language)
# Display result
console.print(Panel(
f"{info}\n\n[dim]{preview}[/dim]",
title=header,
title_align="left",
border_style="blue"
))
# Show scoring breakdown for top result
if results:
console.print("\n[dim]Top result hybrid score: {:.3f} (70% semantic + 30% BM25)[/dim]".format(results[0].score))
def main():
"""Run the demo."""
if len(sys.argv) > 1:
project_path = Path(sys.argv[1])
else:
# Use the RAG system itself as the demo project
project_path = Path(__file__).parent
if not (project_path / '.claude-rag').exists():
console.print("[red]Error: No RAG index found. Run 'claude-rag index' first.[/red]")
console.print(f"[dim]Looked in: {project_path / '.claude-rag'}[/dim]")
return
demo_search(project_path)
if __name__ == "__main__":
main()