Fss-Rag-Mini/tests/02_search_examples.py
BobAi c201b3badd Fix critical deployment issues and improve system reliability
Major fixes:
- Fix model selection to prioritize qwen3:1.7b instead of qwen3:4b for testing
- Correct context length from 80,000 to 32,000 tokens (proper Qwen3 limit)
- Implement content-preserving safeguards instead of dropping responses
- Fix all test imports from claude_rag to mini_rag module naming
- Add virtual environment warnings to all test entry points
- Fix TUI EOF crash handling with proper error handling
- Remove warmup delays that were causing startup lag and unwanted model calls
- Fix command mappings between bash wrapper and Python script
- Update documentation to reflect qwen3:1.7b as primary recommendation
- Improve TUI box alignment and formatting
- Make language generic for any documents, not just codebases
- Add proper folder names in user feedback instead of generic terms

Technical improvements:
- Unified model rankings across all components
- Better error handling for missing dependencies
- Comprehensive testing and validation of all fixes
- All tests now pass and system is deployment-ready

All major crashes and deployment issues resolved.
2025-08-15 09:47:15 +10:00

135 lines
4.5 KiB
Python

#!/usr/bin/env python3
"""
Simple demo of the hybrid search system showing real results.
"""
import sys
from pathlib import Path
from rich.console import Console
from rich.syntax import Syntax
from rich.panel import Panel
from rich.table import Table
from mini_rag.search import CodeSearcher
console = Console()
def demo_search(project_path: Path):
"""Run demo searches showing the hybrid system in action."""
console.print("\n[bold cyan]Mini RAG Hybrid Search Demo[/bold cyan]\n")
# Initialize searcher
console.print("Initializing search system...")
searcher = CodeSearcher(project_path)
# Get index stats
stats = searcher.get_statistics()
if 'error' not in stats:
console.print(f"\n[green] Index ready:[/green] {stats['total_chunks']} chunks from {stats['unique_files']} files")
console.print(f"[dim]Languages: {', '.join(stats['languages'].keys())}[/dim]")
console.print(f"[dim]Chunk types: {', '.join(stats['chunk_types'].keys())}[/dim]\n")
# Demo queries
demos = [
{
'title': 'Keyword-Heavy Search',
'query': 'BM25Okapi rank_bm25 search scoring',
'description': 'This query has specific technical keywords that BM25 excels at finding',
'top_k': 5
},
{
'title': 'Natural Language Query',
'query': 'how to build search index from database chunks',
'description': 'This semantic query benefits from transformer embeddings understanding intent',
'top_k': 5
},
{
'title': 'Mixed Technical Query',
'query': 'vector embeddings for semantic code search with transformers',
'description': 'This hybrid query combines technical terms with conceptual understanding',
'top_k': 5
},
{
'title': 'Function Search',
'query': 'search method implementation with filters',
'description': 'Looking for specific function implementations',
'top_k': 5
}
]
for demo in demos:
console.rule(f"\n[bold yellow]{demo['title']}[/bold yellow]")
console.print(f"[dim]{demo['description']}[/dim]")
console.print(f"\n[cyan]Query:[/cyan] '{demo['query']}'")
# Run search with hybrid mode
results = searcher.search(
query=demo['query'],
top_k=demo['top_k'],
semantic_weight=0.7,
bm25_weight=0.3
)
if not results:
console.print("[red]No results found![/red]")
continue
console.print(f"\n[green]Found {len(results)} results:[/green]\n")
# Show each result
for i, result in enumerate(results, 1):
# Create result panel
header = f"#{i} {result.file_path}:{result.start_line}-{result.end_line}"
# Get code preview
lines = result.content.splitlines()
if len(lines) > 10:
preview_lines = lines[:8] + ['...'] + lines[-2:]
else:
preview_lines = lines
preview = '\n'.join(preview_lines)
# Create info table
info = Table.grid(padding=0)
info.add_column(style="cyan", width=12)
info.add_column(style="white")
info.add_row("Score:", f"{result.score:.3f}")
info.add_row("Type:", result.chunk_type)
info.add_row("Name:", result.name or "N/A")
info.add_row("Language:", result.language)
# Display result
console.print(Panel(
f"{info}\n\n[dim]{preview}[/dim]",
title=header,
title_align="left",
border_style="blue"
))
# Show scoring breakdown for top result
if results:
console.print("\n[dim]Top result hybrid score: {:.3f} (70% semantic + 30% BM25)[/dim]".format(results[0].score))
def main():
"""Run the demo."""
if len(sys.argv) > 1:
project_path = Path(sys.argv[1])
else:
# Use the RAG system itself as the demo project
project_path = Path(__file__).parent
if not (project_path / '.mini-rag').exists():
console.print("[red]Error: No RAG index found. Run 'mini-rag index' first.[/red]")
console.print(f"[dim]Looked in: {project_path / '.mini-rag'}[/dim]")
return
demo_search(project_path)
if __name__ == "__main__":
main()