Complete rebrand to eliminate any Claude/Anthropic references: Directory Changes: - claude_rag/ → mini_rag/ (preserving git history) Content Changes: - Replaced 930+ Claude references across 40+ files - Updated all imports: from claude_rag → from mini_rag - Updated all file paths: .claude-rag → .mini-rag - Updated documentation and comments - Updated configuration files and examples Testing Changes: - All tests updated to use mini_rag imports - Integration tests verify new module structure This ensures complete independence from Claude/Anthropic branding while maintaining all functionality and git history.
401 lines
16 KiB
Python
401 lines
16 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
rag-mini - FSS-Mini-RAG Command Line Interface
|
|
|
|
A lightweight, portable RAG system for semantic code search.
|
|
Usage: rag-mini <command> <project_path> [options]
|
|
"""
|
|
|
|
import sys
|
|
import argparse
|
|
from pathlib import Path
|
|
import json
|
|
import logging
|
|
|
|
# Add the RAG system to the path
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
|
|
|
from mini_rag.indexer import ProjectIndexer
|
|
from mini_rag.search import CodeSearcher
|
|
from mini_rag.ollama_embeddings import OllamaEmbedder
|
|
from mini_rag.llm_synthesizer import LLMSynthesizer
|
|
from mini_rag.explorer import CodeExplorer
|
|
|
|
# Configure logging for user-friendly output
|
|
logging.basicConfig(
|
|
level=logging.WARNING, # Only show warnings and errors by default
|
|
format='%(levelname)s: %(message)s'
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
def index_project(project_path: Path, force: bool = False):
|
|
"""Index a project directory."""
|
|
try:
|
|
# Show what's happening
|
|
action = "Re-indexing" if force else "Indexing"
|
|
print(f"🚀 {action} {project_path.name}")
|
|
|
|
# Quick pre-check
|
|
rag_dir = project_path / '.mini-rag'
|
|
if rag_dir.exists() and not force:
|
|
print(" Checking for changes...")
|
|
|
|
indexer = ProjectIndexer(project_path)
|
|
result = indexer.index_project(force_reindex=force)
|
|
|
|
# Show results with context
|
|
files_count = result.get('files_indexed', 0)
|
|
chunks_count = result.get('chunks_created', 0)
|
|
time_taken = result.get('time_taken', 0)
|
|
|
|
if files_count == 0:
|
|
print("✅ Index up to date - no changes detected")
|
|
else:
|
|
print(f"✅ Indexed {files_count} files in {time_taken:.1f}s")
|
|
print(f" Created {chunks_count} chunks")
|
|
|
|
# Show efficiency
|
|
if time_taken > 0:
|
|
speed = files_count / time_taken
|
|
print(f" Speed: {speed:.1f} files/sec")
|
|
|
|
# Show warnings if any
|
|
failed_count = result.get('files_failed', 0)
|
|
if failed_count > 0:
|
|
print(f"⚠️ {failed_count} files failed (check logs with --verbose)")
|
|
|
|
# Quick tip for first-time users
|
|
if not (project_path / '.mini-rag' / 'last_search').exists():
|
|
print(f"\n💡 Try: rag-mini search {project_path} \"your search here\"")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Indexing failed: {e}")
|
|
print()
|
|
print("🔧 Common solutions:")
|
|
print(" • Check if path exists and you have read permissions")
|
|
print(" • Ensure Python dependencies are installed: pip install -r requirements.txt")
|
|
print(" • Try with smaller project first to test setup")
|
|
print(" • Check available disk space for index files")
|
|
print()
|
|
print("📚 For detailed help:")
|
|
print(f" ./rag-mini index {project_path} --verbose")
|
|
print(" Or see: docs/TROUBLESHOOTING.md")
|
|
sys.exit(1)
|
|
|
|
def search_project(project_path: Path, query: str, limit: int = 10, synthesize: bool = False):
|
|
"""Search a project directory."""
|
|
try:
|
|
# Check if indexed first
|
|
rag_dir = project_path / '.mini-rag'
|
|
if not rag_dir.exists():
|
|
print(f"❌ Project not indexed: {project_path.name}")
|
|
print(f" Run: rag-mini index {project_path}")
|
|
sys.exit(1)
|
|
|
|
print(f"🔍 Searching \"{query}\" in {project_path.name}")
|
|
searcher = CodeSearcher(project_path)
|
|
results = searcher.search(query, top_k=limit)
|
|
|
|
if not results:
|
|
print("❌ No results found")
|
|
print()
|
|
print("🔧 Quick fixes to try:")
|
|
print(" • Use broader terms: \"login\" instead of \"authenticate_user_session\"")
|
|
print(" • Try concepts: \"database query\" instead of specific function names")
|
|
print(" • Check spelling and try simpler words")
|
|
print(" • Search for file types: \"python class\" or \"javascript function\"")
|
|
print()
|
|
print("⚙️ Configuration adjustments:")
|
|
print(f" • Lower threshold: ./rag-mini search {project_path} \"{query}\" --threshold 0.05")
|
|
print(" • More results: add --limit 20")
|
|
print()
|
|
print("📚 Need help? See: docs/TROUBLESHOOTING.md")
|
|
return
|
|
|
|
print(f"✅ Found {len(results)} results:")
|
|
print()
|
|
|
|
for i, result in enumerate(results, 1):
|
|
# Clean up file path display
|
|
rel_path = result.file_path.relative_to(project_path) if result.file_path.is_absolute() else result.file_path
|
|
|
|
print(f"{i}. {rel_path}")
|
|
print(f" Score: {result.score:.3f}")
|
|
|
|
# Show line info if available
|
|
if hasattr(result, 'start_line') and result.start_line:
|
|
print(f" Lines: {result.start_line}-{result.end_line}")
|
|
|
|
# Show content preview
|
|
if hasattr(result, 'name') and result.name:
|
|
print(f" Context: {result.name}")
|
|
|
|
# Show full content with proper formatting
|
|
print(f" Content:")
|
|
content_lines = result.content.strip().split('\n')
|
|
for line in content_lines[:10]: # Show up to 10 lines
|
|
print(f" {line}")
|
|
|
|
if len(content_lines) > 10:
|
|
print(f" ... ({len(content_lines) - 10} more lines)")
|
|
print(f" Use --verbose or rag-mini-enhanced for full context")
|
|
|
|
print()
|
|
|
|
# LLM Synthesis if requested
|
|
if synthesize:
|
|
print("🧠 Generating LLM synthesis...")
|
|
synthesizer = LLMSynthesizer()
|
|
|
|
if synthesizer.is_available():
|
|
synthesis = synthesizer.synthesize_search_results(query, results, project_path)
|
|
print()
|
|
print(synthesizer.format_synthesis_output(synthesis, query))
|
|
|
|
# Add guidance for deeper analysis
|
|
if synthesis.confidence < 0.7 or any(word in query.lower() for word in ['why', 'how', 'explain', 'debug']):
|
|
print("\n💡 Want deeper analysis with reasoning?")
|
|
print(f" Try: rag-mini explore {project_path}")
|
|
print(" Exploration mode enables thinking and remembers conversation context.")
|
|
else:
|
|
print("❌ LLM synthesis unavailable")
|
|
print(" • Ensure Ollama is running: ollama serve")
|
|
print(" • Install a model: ollama pull llama3.2")
|
|
print(" • Check connection to http://localhost:11434")
|
|
|
|
# Save last search for potential enhancements
|
|
try:
|
|
(rag_dir / 'last_search').write_text(query)
|
|
except:
|
|
pass # Don't fail if we can't save
|
|
|
|
except Exception as e:
|
|
print(f"❌ Search failed: {e}")
|
|
print()
|
|
|
|
if "not indexed" in str(e).lower():
|
|
print("🔧 Solution:")
|
|
print(f" ./rag-mini index {project_path}")
|
|
print()
|
|
else:
|
|
print("🔧 Common solutions:")
|
|
print(" • Check project path exists and is readable")
|
|
print(" • Verify index isn't corrupted: delete .mini-rag/ and re-index")
|
|
print(" • Try with a different project to test setup")
|
|
print(" • Check available memory and disk space")
|
|
print()
|
|
print("📚 Get detailed error info:")
|
|
print(f" ./rag-mini search {project_path} \"{query}\" --verbose")
|
|
print(" Or see: docs/TROUBLESHOOTING.md")
|
|
print()
|
|
sys.exit(1)
|
|
|
|
def status_check(project_path: Path):
|
|
"""Show status of RAG system."""
|
|
try:
|
|
print(f"📊 Status for {project_path.name}")
|
|
print()
|
|
|
|
# Check project indexing status first
|
|
rag_dir = project_path / '.mini-rag'
|
|
if not rag_dir.exists():
|
|
print("❌ Project not indexed")
|
|
print(f" Run: rag-mini index {project_path}")
|
|
print()
|
|
else:
|
|
manifest = rag_dir / 'manifest.json'
|
|
if manifest.exists():
|
|
try:
|
|
with open(manifest) as f:
|
|
data = json.load(f)
|
|
|
|
file_count = data.get('file_count', 0)
|
|
chunk_count = data.get('chunk_count', 0)
|
|
indexed_at = data.get('indexed_at', 'Never')
|
|
|
|
print("✅ Project indexed")
|
|
print(f" Files: {file_count}")
|
|
print(f" Chunks: {chunk_count}")
|
|
print(f" Last update: {indexed_at}")
|
|
|
|
# Show average chunks per file
|
|
if file_count > 0:
|
|
avg_chunks = chunk_count / file_count
|
|
print(f" Avg chunks/file: {avg_chunks:.1f}")
|
|
|
|
print()
|
|
except Exception:
|
|
print("⚠️ Index exists but manifest unreadable")
|
|
print()
|
|
else:
|
|
print("⚠️ Index directory exists but incomplete")
|
|
print(f" Try: rag-mini index {project_path} --force")
|
|
print()
|
|
|
|
# Check embedding system status
|
|
print("🧠 Embedding System:")
|
|
try:
|
|
embedder = OllamaEmbedder()
|
|
emb_info = embedder.get_embedding_info()
|
|
method = emb_info.get('method', 'unknown')
|
|
|
|
if method == 'ollama':
|
|
print(" ✅ Ollama (high quality)")
|
|
elif method == 'ml':
|
|
print(" ✅ ML fallback (good quality)")
|
|
elif method == 'hash':
|
|
print(" ⚠️ Hash fallback (basic quality)")
|
|
else:
|
|
print(f" ❓ Unknown method: {method}")
|
|
|
|
# Show additional details if available
|
|
if 'model' in emb_info:
|
|
print(f" Model: {emb_info['model']}")
|
|
|
|
except Exception as e:
|
|
print(f" ❌ Status check failed: {e}")
|
|
|
|
# Show last search if available
|
|
last_search_file = rag_dir / 'last_search' if rag_dir.exists() else None
|
|
if last_search_file and last_search_file.exists():
|
|
try:
|
|
last_query = last_search_file.read_text().strip()
|
|
print(f"\n🔍 Last search: \"{last_query}\"")
|
|
except:
|
|
pass
|
|
|
|
except Exception as e:
|
|
print(f"❌ Status check failed: {e}")
|
|
sys.exit(1)
|
|
|
|
def explore_interactive(project_path: Path):
|
|
"""Interactive exploration mode with thinking and context memory."""
|
|
try:
|
|
explorer = CodeExplorer(project_path)
|
|
|
|
if not explorer.start_exploration_session():
|
|
sys.exit(1)
|
|
|
|
print("\n🤔 Ask your first question about the codebase:")
|
|
|
|
while True:
|
|
try:
|
|
# Get user input
|
|
question = input("\n> ").strip()
|
|
|
|
# Handle exit commands
|
|
if question.lower() in ['quit', 'exit', 'q']:
|
|
print("\n" + explorer.end_session())
|
|
break
|
|
|
|
# Handle empty input
|
|
if not question:
|
|
print("Please enter a question or 'quit' to exit.")
|
|
continue
|
|
|
|
# Special commands
|
|
if question.lower() in ['help', 'h']:
|
|
print("""
|
|
🧠 EXPLORATION MODE HELP:
|
|
• Ask any question about the codebase
|
|
• I remember our conversation for follow-up questions
|
|
• Use 'why', 'how', 'explain' for detailed reasoning
|
|
• Type 'summary' to see session overview
|
|
• Type 'quit' or 'exit' to end session
|
|
|
|
💡 Example questions:
|
|
• "How does authentication work?"
|
|
• "Why is this function slow?"
|
|
• "Explain the database connection logic"
|
|
• "What are the security concerns here?"
|
|
""")
|
|
continue
|
|
|
|
if question.lower() == 'summary':
|
|
print("\n" + explorer.get_session_summary())
|
|
continue
|
|
|
|
# Process the question
|
|
print("\n🔍 Analyzing...")
|
|
response = explorer.explore_question(question)
|
|
|
|
if response:
|
|
print(f"\n{response}")
|
|
else:
|
|
print("❌ Sorry, I couldn't process that question. Please try again.")
|
|
|
|
except KeyboardInterrupt:
|
|
print(f"\n\n{explorer.end_session()}")
|
|
break
|
|
except EOFError:
|
|
print(f"\n\n{explorer.end_session()}")
|
|
break
|
|
except Exception as e:
|
|
print(f"❌ Error processing question: {e}")
|
|
print("Please try again or type 'quit' to exit.")
|
|
|
|
except Exception as e:
|
|
print(f"❌ Failed to start exploration mode: {e}")
|
|
print("Make sure the project is indexed first: rag-mini index <project>")
|
|
sys.exit(1)
|
|
|
|
def main():
|
|
"""Main CLI interface."""
|
|
parser = argparse.ArgumentParser(
|
|
description="FSS-Mini-RAG - Lightweight semantic code search",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
rag-mini index /path/to/project # Index a project
|
|
rag-mini search /path/to/project "query" # Search indexed project
|
|
rag-mini search /path/to/project "query" -s # Search with LLM synthesis
|
|
rag-mini explore /path/to/project # Interactive exploration mode
|
|
rag-mini status /path/to/project # Show status
|
|
"""
|
|
)
|
|
|
|
parser.add_argument('command', choices=['index', 'search', 'explore', 'status'],
|
|
help='Command to execute')
|
|
parser.add_argument('project_path', type=Path,
|
|
help='Path to project directory (REQUIRED)')
|
|
parser.add_argument('query', nargs='?',
|
|
help='Search query (for search command)')
|
|
parser.add_argument('--force', action='store_true',
|
|
help='Force reindex all files')
|
|
parser.add_argument('--limit', type=int, default=10,
|
|
help='Maximum number of search results')
|
|
parser.add_argument('--verbose', '-v', action='store_true',
|
|
help='Enable verbose logging')
|
|
parser.add_argument('--synthesize', '-s', action='store_true',
|
|
help='Generate LLM synthesis of search results (requires Ollama)')
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Set logging level
|
|
if args.verbose:
|
|
logging.getLogger().setLevel(logging.INFO)
|
|
|
|
# Validate project path
|
|
if not args.project_path.exists():
|
|
print(f"❌ Project path does not exist: {args.project_path}")
|
|
sys.exit(1)
|
|
|
|
if not args.project_path.is_dir():
|
|
print(f"❌ Project path is not a directory: {args.project_path}")
|
|
sys.exit(1)
|
|
|
|
# Execute command
|
|
if args.command == 'index':
|
|
index_project(args.project_path, args.force)
|
|
elif args.command == 'search':
|
|
if not args.query:
|
|
print("❌ Search query required")
|
|
sys.exit(1)
|
|
search_project(args.project_path, args.query, args.limit, args.synthesize)
|
|
elif args.command == 'explore':
|
|
explore_interactive(args.project_path)
|
|
elif args.command == 'status':
|
|
status_check(args.project_path)
|
|
|
|
if __name__ == '__main__':
|
|
main() |