diff --git a/.mini-rag/config.yaml b/.mini-rag/config.yaml
new file mode 100644
index 0000000..4f552fe
--- /dev/null
+++ b/.mini-rag/config.yaml
@@ -0,0 +1,53 @@
+# FSS-Mini-RAG Configuration
+# Edit this file to customize indexing and search behavior
+# See docs/GETTING_STARTED.md for detailed explanations
+
+# Text chunking settings
+chunking:
+ max_size: 2000 # Maximum characters per chunk
+ min_size: 150 # Minimum characters per chunk
+ strategy: semantic # 'semantic' (language-aware) or 'fixed'
+
+# Large file streaming settings
+streaming:
+ enabled: true
+ threshold_bytes: 1048576 # Files larger than this use streaming (1MB)
+
+# File processing settings
+files:
+ min_file_size: 50 # Skip files smaller than this
+ exclude_patterns:
+ - "node_modules/**"
+ - ".git/**"
+ - "__pycache__/**"
+ - "*.pyc"
+ - ".venv/**"
+ - "venv/**"
+ - "build/**"
+ - "dist/**"
+ include_patterns:
+ - "**/*" # Include all files by default
+
+# Embedding generation settings
+embedding:
+ preferred_method: ollama # 'ollama', 'ml', 'hash', or 'auto'
+ ollama_model: nomic-embed-text
+ ollama_host: localhost:11434
+ ml_model: sentence-transformers/all-MiniLM-L6-v2
+ batch_size: 32 # Embeddings processed per batch
+
+# Search behavior settings
+search:
+ default_top_k: 10 # Default number of top results
+ enable_bm25: true # Enable keyword matching boost
+ similarity_threshold: 0.1 # Minimum similarity score
+ expand_queries: false # Enable automatic query expansion
+
+# LLM synthesis and query expansion settings
+llm:
+ ollama_host: localhost:11434
+ synthesis_model: auto # 'auto', 'qwen3:1.7b', etc.
+ expansion_model: auto # Usually same as synthesis_model
+ max_expansion_terms: 8 # Maximum terms to add to queries
+ enable_synthesis: false # Enable synthesis by default
+ synthesis_temperature: 0.3 # LLM temperature for analysis
\ No newline at end of file
diff --git a/.mini-rag/last_search b/.mini-rag/last_search
new file mode 100644
index 0000000..30d74d2
--- /dev/null
+++ b/.mini-rag/last_search
@@ -0,0 +1 @@
+test
\ No newline at end of file
diff --git a/docs/CPU_DEPLOYMENT.md b/docs/CPU_DEPLOYMENT.md
index cd3da53..48458be 100644
--- a/docs/CPU_DEPLOYMENT.md
+++ b/docs/CPU_DEPLOYMENT.md
@@ -67,7 +67,7 @@ llm:
# Aggressive caching for CPU systems
search:
expand_queries: false # Enable only in TUI
- default_limit: 8 # Slightly fewer results for speed
+ default_top_k: 8 # Slightly fewer results for speed
```
## System Requirements
diff --git a/docs/GETTING_STARTED.md b/docs/GETTING_STARTED.md
index 38b93be..63af487 100644
--- a/docs/GETTING_STARTED.md
+++ b/docs/GETTING_STARTED.md
@@ -125,7 +125,7 @@ print(f"Indexed {result['files_processed']} files, {result['chunks_created']} ch
# Search
print("\nSearching for authentication code...")
-results = searcher.search("user authentication logic", limit=5)
+results = searcher.search("user authentication logic", top_k=5)
for i, result in enumerate(results, 1):
print(f"\n{i}. {result.file_path}")
diff --git a/docs/TECHNICAL_GUIDE.md b/docs/TECHNICAL_GUIDE.md
index a92410f..16d73c3 100644
--- a/docs/TECHNICAL_GUIDE.md
+++ b/docs/TECHNICAL_GUIDE.md
@@ -421,7 +421,7 @@ def _create_vector_table(self, chunks: List[CodeChunk], embeddings: np.ndarray):
return table
-def vector_search(self, query_embedding: np.ndarray, limit: int) -> List[SearchResult]:
+def vector_search(self, query_embedding: np.ndarray, top_k: int) -> List[SearchResult]:
"""Fast vector similarity search."""
table = self.db.open_table("chunks")
@@ -794,12 +794,12 @@ def repair_index(self, project_path: Path) -> bool:
FSS-Mini-RAG works well with various LLM sizes because our rich context and guided prompts help small models perform excellently:
**Recommended (Best Balance):**
-- **qwen3:4b** - Excellent quality, good performance
-- **qwen3:4b:q8_0** - High-precision quantized version for production
+- **qwen3:1.7b** - Excellent quality with fast performance (default priority)
+- **qwen3:0.6b** - Surprisingly good for CPU-only systems (522MB)
-**Still Excellent (Faster/CPU-friendly):**
-- **qwen3:1.7b** - Very good results, faster responses
-- **qwen3:0.6b** - Surprisingly good considering size (522MB)
+**Still Excellent (Slower but highest quality):**
+- **qwen3:4b** - Highest quality, slower responses
+- **qwen3:4b:q8_0** - High-precision quantized version for production
### Why Small Models Work Well Here
@@ -813,7 +813,7 @@ Without good context, small models tend to get lost and produce erratic output.
### Quantization Benefits
-For production deployments, consider quantized models like `qwen3:4b:q8_0`:
+For production deployments, consider quantized models like `qwen3:1.7b:q8_0` or `qwen3:4b:q8_0`:
- **Q8_0**: 8-bit quantization with minimal quality loss
- **Smaller memory footprint**: ~50% reduction vs full precision
- **Better CPU performance**: Faster inference on CPU-only systems
diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md
index d3e5d7a..6ab3416 100644
--- a/docs/TROUBLESHOOTING.md
+++ b/docs/TROUBLESHOOTING.md
@@ -110,7 +110,7 @@ python3 -c "import mini_rag; print('ā
Installation successful')"
2. **Reduce result limit:**
```yaml
search:
- default_limit: 5 # Instead of 10
+ default_top_k: 5 # Instead of 10
```
3. **Use faster embedding method:**
@@ -165,9 +165,9 @@ python3 -c "import mini_rag; print('ā
Installation successful')"
2. **Try different model:**
```bash
- ollama pull qwen3:4b # Recommended: excellent quality
- ollama pull qwen3:1.7b # Still very good, faster
- ollama pull qwen3:0.6b # Surprisingly good for CPU-only
+ ollama pull qwen3:1.7b # Recommended: excellent quality (default priority)
+ ollama pull qwen3:0.6b # Surprisingly good for CPU-only
+ ollama pull qwen3:4b # Highest quality, slower
```
3. **Use synthesis mode instead of exploration:**
diff --git a/docs/TUI_GUIDE.md b/docs/TUI_GUIDE.md
index 4c46131..96cf2a4 100644
--- a/docs/TUI_GUIDE.md
+++ b/docs/TUI_GUIDE.md
@@ -154,7 +154,7 @@ That's it! The TUI will guide you through everything.
- **chunking.strategy** - Smart (semantic) vs simple (fixed size)
- **files.exclude_patterns** - Skip certain files/directories
- **embedding.preferred_method** - AI model preference
-- **search.default_limit** - How many results to show
+- **search.default_top_k** - How many results to show
**Interactive Options**:
- **[V]iew config** - See full configuration file
diff --git a/examples/basic_usage.py b/examples/basic_usage.py
index ecac475..1d9d05d 100644
--- a/examples/basic_usage.py
+++ b/examples/basic_usage.py
@@ -50,7 +50,7 @@ def main():
print("\n4. Example searches:")
for query in queries:
print(f"\n Query: '{query}'")
- results = searcher.search(query, limit=3)
+ results = searcher.search(query, top_k=3)
if results:
for i, result in enumerate(results, 1):
diff --git a/examples/config-beginner.yaml b/examples/config-beginner.yaml
index 538713a..6a0d174 100644
--- a/examples/config-beginner.yaml
+++ b/examples/config-beginner.yaml
@@ -41,7 +41,7 @@ embedding:
# š Search behavior
search:
- default_limit: 10 # Show 10 results (good starting point)
+ default_top_k: 10 # Show 10 results (good starting point)
enable_bm25: true # Find exact word matches too
similarity_threshold: 0.1 # Pretty permissive (shows more results)
expand_queries: false # Keep it simple for now
diff --git a/examples/config-fast.yaml b/examples/config-fast.yaml
index eec71ef..a7e00a9 100644
--- a/examples/config-fast.yaml
+++ b/examples/config-fast.yaml
@@ -62,7 +62,7 @@ embedding:
# š Search optimized for speed
search:
- default_limit: 5 # Fewer results = faster display
+ default_top_k: 5 # Fewer results = faster display
enable_bm25: false # Skip keyword matching for speed
similarity_threshold: 0.2 # Higher threshold = fewer results to process
expand_queries: false # No query expansion (much faster)
diff --git a/examples/config-llm-providers.yaml b/examples/config-llm-providers.yaml
index 5f3b6b4..7d8dbfc 100644
--- a/examples/config-llm-providers.yaml
+++ b/examples/config-llm-providers.yaml
@@ -53,7 +53,7 @@ embedding:
batch_size: 32
search:
- default_limit: 10
+ default_top_k: 10
enable_bm25: true
similarity_threshold: 0.1
expand_queries: false
diff --git a/examples/config-quality.yaml b/examples/config-quality.yaml
index 99b6979..844f121 100644
--- a/examples/config-quality.yaml
+++ b/examples/config-quality.yaml
@@ -44,7 +44,7 @@ embedding:
# š Search optimized for comprehensive results
search:
- default_limit: 15 # More results to choose from
+ default_top_k: 15 # More results to choose from
enable_bm25: true # Use both semantic and keyword matching
similarity_threshold: 0.05 # Very permissive (show more possibilities)
expand_queries: true # Automatic query expansion for better recall
diff --git a/examples/config.yaml b/examples/config.yaml
index 50ddca8..1c80d79 100644
--- a/examples/config.yaml
+++ b/examples/config.yaml
@@ -86,7 +86,7 @@ embedding:
#āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā
search:
- default_limit: 10 # How many search results to show by default
+ default_top_k: 10 # How many search results to show by default
# š” MORE RESULTS: 15-20 | FASTER SEARCH: 5-8
enable_bm25: true # Also use keyword matching (like Google search)
diff --git a/install_mini_rag.sh b/install_mini_rag.sh
index 7dbf2cd..b6a3ad5 100755
--- a/install_mini_rag.sh
+++ b/install_mini_rag.sh
@@ -653,66 +653,28 @@ show_completion() {
fi
}
-# Create sample project for testing
-create_sample_project() {
- local sample_dir="$SCRIPT_DIR/.sample_test"
- rm -rf "$sample_dir"
- mkdir -p "$sample_dir"
-
- # Create a few small sample files
- cat > "$sample_dir/README.md" << 'EOF'
-# Sample Project
-
-This is a sample project for testing FSS-Mini-RAG search capabilities.
-
-## Features
-
-- User authentication system
-- Document processing
-- Search functionality
-- Email integration
-EOF
-
- cat > "$sample_dir/auth.py" << 'EOF'
-# Authentication module
-def login_user(username, password):
- """Handle user login with password validation"""
- if validate_credentials(username, password):
- create_session(username)
- return True
- return False
-
-def validate_credentials(username, password):
- """Check username and password against database"""
- # Database validation logic here
- return check_password_hash(username, password)
-EOF
-
- cat > "$sample_dir/search.py" << 'EOF'
-# Search functionality
-def semantic_search(query, documents):
- """Perform semantic search across document collection"""
- embeddings = generate_embeddings(query)
- results = find_similar_documents(embeddings, documents)
- return rank_results(results)
-
-def generate_embeddings(text):
- """Generate vector embeddings for text"""
- # Embedding generation logic
- return process_with_model(text)
-EOF
-
- echo "$sample_dir"
-}
+# Note: Sample project creation removed - now indexing real codebase/docs
# Run quick test with sample data
run_quick_test() {
print_header "Quick Test"
- print_info "Creating small sample project for testing..."
- local sample_dir=$(create_sample_project)
- echo "ā
Sample project created: $sample_dir"
+ # Ask what to index: code vs docs
+ echo -e "${CYAN}What would you like to explore with FSS-Mini-RAG?${NC}"
echo ""
+ echo -e "${GREEN}1) Code${NC} - Index the FSS-Mini-RAG codebase (~50 files)"
+ echo -e "${BLUE}2) Docs${NC} - Index the documentation (~10 files)"
+ echo ""
+ echo -n "Choose [1/2] or Enter for code: "
+ read -r index_choice
+
+ # Determine what to index
+ local target_dir="$SCRIPT_DIR"
+ local target_name="FSS-Mini-RAG codebase"
+ if [[ "$index_choice" == "2" ]]; then
+ target_dir="$SCRIPT_DIR/docs"
+ target_name="FSS-Mini-RAG documentation"
+ fi
# Ensure we're in the right directory and have the right permissions
if [[ ! -f "./rag-mini" ]]; then
@@ -726,32 +688,31 @@ run_quick_test() {
chmod +x ./rag-mini
fi
- # Test with explicit error handling and timeout
- print_info "Indexing sample project (should complete in ~5 seconds)..."
- echo -e "${CYAN}Command: ./rag-mini index \"$sample_dir\" --quiet${NC}"
+ # Index the chosen target
+ print_info "Indexing $target_name..."
+ echo -e "${CYAN}This will take 10-30 seconds depending on your system${NC}"
+ echo ""
- if timeout 30 ./rag-mini index "$sample_dir" --quiet; then
- print_success "ā
Indexing completed successfully"
+ if ./rag-mini index "$target_dir"; then
+ print_success "ā
Indexing completed successfully!"
echo ""
- print_info "Testing search functionality..."
- echo -e "${CYAN}Command: ./rag-mini search \"$sample_dir\" \"user authentication\" --limit 2${NC}"
+ print_info "šÆ Launching Interactive Tutorial..."
+ echo -e "${CYAN}The TUI has 6 sample questions to get you started.${NC}"
+ echo -e "${CYAN}Try the suggested queries or enter your own!${NC}"
+ echo ""
+ echo -n "Press Enter to start interactive tutorial: "
+ read -r
- if timeout 15 ./rag-mini search "$sample_dir" "user authentication" --limit 2; then
- echo ""
- print_success "š Test completed successfully!"
- echo -e "${CYAN}FSS-Mini-RAG is working perfectly!${NC}"
- else
- print_error "Search test failed or timed out"
- echo "Indexing worked but search had issues."
- fi
+ # Launch the TUI which has the existing interactive tutorial system
+ ./rag-tui.py "$target_dir"
- # Clean up sample
- print_info "Cleaning up test files..."
- rm -rf "$sample_dir"
+ echo ""
+ print_success "š Tutorial completed!"
+ echo -e "${CYAN}FSS-Mini-RAG is working perfectly!${NC}"
else
- print_error "ā Indexing test failed or timed out"
+ print_error "ā Indexing failed"
echo ""
echo -e "${YELLOW}Possible causes:${NC}"
echo "⢠Virtual environment not properly activated"
@@ -759,8 +720,6 @@ run_quick_test() {
echo "⢠Path issues (ensure script runs from project directory)"
echo "⢠Ollama connection issues (if using Ollama)"
echo ""
- print_info "Cleaning up and continuing..."
- rm -rf "$sample_dir"
return 1
fi
}
diff --git a/mini_rag/cli.py b/mini_rag/cli.py
index 6fe4a3b..cc4b353 100644
--- a/mini_rag/cli.py
+++ b/mini_rag/cli.py
@@ -52,6 +52,10 @@ def cli(verbose: bool, quiet: bool):
A local RAG system for improving the development environment's grounding capabilities.
Indexes your codebase and enables lightning-fast semantic search.
"""
+ # Check virtual environment
+ from .venv_checker import check_and_warn_venv
+ check_and_warn_venv("rag-mini", force_exit=False)
+
if verbose:
logging.getLogger().setLevel(logging.DEBUG)
elif quiet:
@@ -350,7 +354,12 @@ def debug_schema(path: str):
return
# Connect to database
- import lancedb
+ try:
+ import lancedb
+ except ImportError:
+ console.print("[red]LanceDB not available. Install with: pip install lancedb pyarrow[/red]")
+ return
+
db = lancedb.connect(rag_dir)
if "code_vectors" not in db.table_names():
diff --git a/mini_rag/config.py b/mini_rag/config.py
index 85104ef..81926ad 100644
--- a/mini_rag/config.py
+++ b/mini_rag/config.py
@@ -63,7 +63,7 @@ class EmbeddingConfig:
@dataclass
class SearchConfig:
"""Configuration for search behavior."""
- default_limit: int = 10
+ default_top_k: int = 10
enable_bm25: bool = True
similarity_threshold: float = 0.1
expand_queries: bool = False # Enable automatic query expansion
@@ -81,12 +81,33 @@ class LLMConfig:
enable_thinking: bool = True # Enable thinking mode for Qwen3 models
cpu_optimized: bool = True # Prefer lightweight models
+ # Model preference rankings (configurable)
+ model_rankings: list = None # Will be set in __post_init__
+
# Provider-specific settings (for different LLM providers)
provider: str = "ollama" # "ollama", "openai", "anthropic"
ollama_host: str = "localhost:11434" # Ollama connection
api_key: Optional[str] = None # API key for cloud providers
api_base: Optional[str] = None # Base URL for API (e.g., OpenRouter)
timeout: int = 20 # Request timeout in seconds
+
+ def __post_init__(self):
+ if self.model_rankings is None:
+ # Default model preference rankings (can be overridden in config file)
+ self.model_rankings = [
+ # Testing model (prioritized for current testing phase)
+ "qwen3:1.7b",
+
+ # Ultra-efficient models (perfect for CPU-only systems)
+ "qwen3:0.6b",
+
+ # Recommended model (excellent quality but larger)
+ "qwen3:4b",
+
+ # Common fallbacks (only include models we know exist)
+ "llama3.2:1b",
+ "qwen2.5:1.5b",
+ ]
@dataclass
@@ -151,6 +172,8 @@ class ConfigManager:
config.embedding = EmbeddingConfig(**data['embedding'])
if 'search' in data:
config.search = SearchConfig(**data['search'])
+ if 'llm' in data:
+ config.llm = LLMConfig(**data['llm'])
return config
@@ -219,7 +242,7 @@ class ConfigManager:
"",
"# Search behavior settings",
"search:",
- f" default_limit: {config_dict['search']['default_limit']} # Default number of results",
+ f" default_top_k: {config_dict['search']['default_top_k']} # Default number of top results",
f" enable_bm25: {str(config_dict['search']['enable_bm25']).lower()} # Enable keyword matching boost",
f" similarity_threshold: {config_dict['search']['similarity_threshold']} # Minimum similarity score",
f" expand_queries: {str(config_dict['search']['expand_queries']).lower()} # Enable automatic query expansion",
@@ -232,8 +255,16 @@ class ConfigManager:
f" max_expansion_terms: {config_dict['llm']['max_expansion_terms']} # Maximum terms to add to queries",
f" enable_synthesis: {str(config_dict['llm']['enable_synthesis']).lower()} # Enable synthesis by default",
f" synthesis_temperature: {config_dict['llm']['synthesis_temperature']} # LLM temperature for analysis",
+ " model_rankings: # Preferred model order (edit to change priority)",
])
+ # Add model rankings list
+ if 'model_rankings' in config_dict['llm'] and config_dict['llm']['model_rankings']:
+ for model in config_dict['llm']['model_rankings'][:10]: # Show first 10
+ yaml_lines.append(f" - \"{model}\"")
+ if len(config_dict['llm']['model_rankings']) > 10:
+ yaml_lines.append(" # ... (edit config to see all options)")
+
return '\n'.join(yaml_lines)
def update_config(self, **kwargs) -> RAGConfig:
diff --git a/mini_rag/explorer.py b/mini_rag/explorer.py
index b1f5fad..9e4c379 100644
--- a/mini_rag/explorer.py
+++ b/mini_rag/explorer.py
@@ -60,7 +60,8 @@ class CodeExplorer:
self.synthesizer = LLMSynthesizer(
ollama_url=f"http://{self.config.llm.ollama_host}",
model=self.config.llm.synthesis_model,
- enable_thinking=True # Always enable thinking in explore mode
+ enable_thinking=True, # Always enable thinking in explore mode
+ config=self.config # Pass config for model rankings
)
# Session management
@@ -69,12 +70,7 @@ class CodeExplorer:
def start_exploration_session(self) -> bool:
"""Start a new exploration session."""
- # Check if we should restart the model for optimal thinking
- model_restart_needed = self._check_model_restart_needed()
- if model_restart_needed:
- if not self._handle_model_restart():
- print("ā ļø Continuing with current model (quality may be reduced)")
-
+ # Simple availability check - don't do complex model restart logic
if not self.synthesizer.is_available():
print("ā LLM service unavailable. Please check Ollama is running.")
return False
@@ -87,17 +83,8 @@ class CodeExplorer:
started_at=time.time()
)
- print("š§ EXPLORATION MODE STARTED")
- print("=" * 50)
+ print("š§ Exploration Mode Started")
print(f"Project: {self.project_path.name}")
- print(f"Session: {session_id}")
- print("\nšÆ This mode uses thinking and remembers context.")
- print(" Perfect for debugging, learning, and deep exploration.")
- print("\nš” Tips:")
- print(" ⢠Ask follow-up questions - I'll remember our conversation")
- print(" ⢠Use 'why', 'how', 'explain' for detailed reasoning")
- print(" ⢠Type 'quit' or 'exit' to end session")
- print("\n" + "=" * 50)
return True
@@ -110,7 +97,7 @@ class CodeExplorer:
search_start = time.time()
results = self.searcher.search(
question,
- limit=context_limit,
+ top_k=context_limit,
include_context=True,
semantic_weight=0.7,
bm25_weight=0.3
@@ -166,56 +153,82 @@ Content: {content[:800]}{'...' if len(content) > 800 else ''}
results_text = "\n".join(results_context)
- # Create comprehensive exploration prompt
- prompt = f"""You are a senior software engineer helping explore and debug code. You have access to thinking mode and conversation context.
+ # Create comprehensive exploration prompt with thinking
+ prompt = f"""
+The user asked: "{question}"
+
+Let me analyze what they're asking and look at the information I have available.
+
+From the search results, I can see relevant information about:
+{results_text[:500]}...
+
+I should think about:
+1. What the user is trying to understand or accomplish
+2. What information from the search results is most relevant
+3. How to explain this in a clear, educational way
+4. What practical next steps would be helpful
+
+Based on our conversation so far: {context_summary}
+
+Let me create a helpful response that breaks this down clearly and gives them actionable guidance.
+
+
+You're a helpful assistant exploring a project with someone. You're good at breaking down complex topics into understandable pieces and explaining things clearly.
PROJECT: {self.project_path.name}
-CONVERSATION CONTEXT:
+PREVIOUS CONVERSATION:
{context_summary}
CURRENT QUESTION: "{question}"
-SEARCH RESULTS:
+RELEVANT INFORMATION FOUND:
{results_text}
-Please provide a detailed analysis in JSON format. Think through the problem carefully and consider the conversation context:
+Please provide a helpful analysis in JSON format:
{{
- "summary": "2-3 sentences explaining what you found and how it relates to the question",
+ "summary": "Clear explanation of what you found and how it answers their question",
"key_points": [
- "Important insight 1 (reference specific code/files)",
- "Important insight 2 (explain relationships)",
- "Important insight 3 (consider conversation context)"
+ "Most important insight from the information",
+ "Secondary important point or relationship",
+ "Third key point or practical consideration"
],
"code_examples": [
- "Relevant code snippet or pattern with explanation",
- "Another important code example with context"
+ "Relevant example or pattern from the information",
+ "Another useful example or demonstration"
],
"suggested_actions": [
- "Specific next step the developer should take",
- "Follow-up investigation or debugging approach",
- "Potential improvements or fixes"
+ "Specific next step they could take",
+ "Additional exploration or investigation suggestion",
+ "Practical way to apply this information"
],
"confidence": 0.85
}}
-Focus on:
-- Deep technical analysis with reasoning
-- How this connects to previous questions in our conversation
-- Practical debugging/learning insights
-- Specific code references and explanations
-- Clear next steps for the developer
-
-Think carefully about the relationships between code components and how they answer the question in context."""
-
+Guidelines:
+- Be educational and break things down clearly
+- Reference specific files and information when helpful
+- Give practical, actionable suggestions
+- Keep explanations beginner-friendly but not condescending
+- Connect information to their question directly
+"""
+
return prompt
def _synthesize_with_context(self, prompt: str, results: List[Any]) -> SynthesisResult:
"""Synthesize results with full context and thinking."""
try:
- # Use thinking-enabled synthesis with lower temperature for exploration
- response = self.synthesizer._call_ollama(prompt, temperature=0.2)
+ # TEMPORARILY: Use simple non-streaming call to avoid flow issues
+ # TODO: Re-enable streaming once flow is stable
+ response = self.synthesizer._call_ollama(prompt, temperature=0.2, disable_thinking=False)
+ thinking_stream = ""
+
+ # Display simple thinking indicator
+ if response and len(response) > 200:
+ print("\nš Analysis in progress...")
+
+ # Don't display thinking stream again - keeping it simple for now
if not response:
return SynthesisResult(
@@ -422,6 +435,196 @@ Think carefully about the relationships between code components and how they ans
except EOFError:
print("\nš Continuing with current model...")
return False
+
+ def _call_ollama_with_thinking(self, prompt: str, temperature: float = 0.3) -> tuple:
+ """Call Ollama with streaming for fast time-to-first-token."""
+ import requests
+ import json
+
+ try:
+ # Use the synthesizer's model and connection
+ model_to_use = self.synthesizer.model
+ if self.synthesizer.model not in self.synthesizer.available_models:
+ if self.synthesizer.available_models:
+ model_to_use = self.synthesizer.available_models[0]
+ else:
+ return None, None
+
+ # Enable thinking by NOT adding
+ final_prompt = prompt
+
+ # Get optimal parameters for this model
+ from .llm_optimization import get_optimal_ollama_parameters
+ optimal_params = get_optimal_ollama_parameters(model_to_use)
+
+ payload = {
+ "model": model_to_use,
+ "prompt": final_prompt,
+ "stream": True, # Enable streaming for fast response
+ "options": {
+ "temperature": temperature,
+ "top_p": optimal_params.get("top_p", 0.9),
+ "top_k": optimal_params.get("top_k", 40),
+ "num_ctx": optimal_params.get("num_ctx", 32768),
+ "num_predict": optimal_params.get("num_predict", 2000),
+ "repeat_penalty": optimal_params.get("repeat_penalty", 1.1),
+ "presence_penalty": optimal_params.get("presence_penalty", 1.0)
+ }
+ }
+
+ response = requests.post(
+ f"{self.synthesizer.ollama_url}/api/generate",
+ json=payload,
+ stream=True,
+ timeout=65
+ )
+
+ if response.status_code == 200:
+ # Collect streaming response
+ raw_response = ""
+ thinking_displayed = False
+
+ for line in response.iter_lines():
+ if line:
+ try:
+ chunk_data = json.loads(line.decode('utf-8'))
+ chunk_text = chunk_data.get('response', '')
+
+ if chunk_text:
+ raw_response += chunk_text
+
+ # Display thinking stream as it comes in
+ if not thinking_displayed and '' in raw_response:
+ # Start displaying thinking
+ self._start_thinking_display()
+ thinking_displayed = True
+
+ if thinking_displayed:
+ self._stream_thinking_chunk(chunk_text)
+
+ if chunk_data.get('done', False):
+ break
+
+ except json.JSONDecodeError:
+ continue
+
+ # Finish thinking display if it was shown
+ if thinking_displayed:
+ self._end_thinking_display()
+
+ # Extract thinking stream and final response
+ thinking_stream, final_response = self._extract_thinking(raw_response)
+
+ return final_response, thinking_stream
+ else:
+ return None, None
+
+ except Exception as e:
+ logger.error(f"Thinking-enabled Ollama call failed: {e}")
+ return None, None
+
+ def _extract_thinking(self, raw_response: str) -> tuple:
+ """Extract thinking content from response."""
+ thinking_stream = ""
+ final_response = raw_response
+
+ # Look for thinking patterns
+ if "" in raw_response and "" in raw_response:
+ # Extract thinking content between tags
+ start_tag = raw_response.find("")
+ end_tag = raw_response.find("") + len("")
+
+ if start_tag != -1 and end_tag != -1:
+ thinking_content = raw_response[start_tag + 7:end_tag - 8] # Remove tags
+ thinking_stream = thinking_content.strip()
+
+ # Remove thinking from final response
+ final_response = (raw_response[:start_tag] + raw_response[end_tag:]).strip()
+
+ # Alternative patterns for models that use different thinking formats
+ elif "Let me think" in raw_response or "I need to analyze" in raw_response:
+ # Simple heuristic: first paragraph might be thinking
+ lines = raw_response.split('\n')
+ potential_thinking = []
+ final_lines = []
+
+ thinking_indicators = ["Let me think", "I need to", "First, I'll", "Looking at", "Analyzing"]
+ in_thinking = False
+
+ for line in lines:
+ if any(indicator in line for indicator in thinking_indicators):
+ in_thinking = True
+ potential_thinking.append(line)
+ elif in_thinking and (line.startswith('{') or line.startswith('**') or line.startswith('#')):
+ # Likely end of thinking, start of structured response
+ in_thinking = False
+ final_lines.append(line)
+ elif in_thinking:
+ potential_thinking.append(line)
+ else:
+ final_lines.append(line)
+
+ if potential_thinking:
+ thinking_stream = '\n'.join(potential_thinking).strip()
+ final_response = '\n'.join(final_lines).strip()
+
+ return thinking_stream, final_response
+
+ def _start_thinking_display(self):
+ """Start the thinking stream display."""
+ print("\n\033[2m\033[3mš AI Thinking:\033[0m")
+ print("\033[2m\033[3m" + "ā" * 40 + "\033[0m")
+ self._thinking_buffer = ""
+ self._in_thinking_tags = False
+
+ def _stream_thinking_chunk(self, chunk: str):
+ """Stream a chunk of thinking as it arrives."""
+ import sys
+
+ self._thinking_buffer += chunk
+
+ # Check if we're in thinking tags
+ if '' in self._thinking_buffer and not self._in_thinking_tags:
+ self._in_thinking_tags = True
+ # Display everything after
+ start_idx = self._thinking_buffer.find('') + 7
+ thinking_content = self._thinking_buffer[start_idx:]
+ if thinking_content:
+ print(f"\033[2m\033[3m{thinking_content}\033[0m", end='', flush=True)
+ elif self._in_thinking_tags and '' not in chunk:
+ # We're in thinking mode, display the chunk
+ print(f"\033[2m\033[3m{chunk}\033[0m", end='', flush=True)
+ elif '' in self._thinking_buffer:
+ # End of thinking
+ self._in_thinking_tags = False
+
+ def _end_thinking_display(self):
+ """End the thinking stream display."""
+ print(f"\n\033[2m\033[3m" + "ā" * 40 + "\033[0m")
+ print()
+
+ def _display_thinking_stream(self, thinking_stream: str):
+ """Display thinking stream in light gray and italic (fallback for non-streaming)."""
+ if not thinking_stream:
+ return
+
+ print("\n\033[2m\033[3mš AI Thinking:\033[0m")
+ print("\033[2m\033[3m" + "ā" * 40 + "\033[0m")
+
+ # Split into paragraphs and display with proper formatting
+ paragraphs = thinking_stream.split('\n\n')
+ for para in paragraphs:
+ if para.strip():
+ # Wrap long lines nicely
+ lines = para.strip().split('\n')
+ for line in lines:
+ if line.strip():
+ # Light gray and italic
+ print(f"\033[2m\033[3m{line}\033[0m")
+ print() # Paragraph spacing
+
+ print("\033[2m\033[3m" + "ā" * 40 + "\033[0m")
+ print()
# Quick test function
def test_explorer():
diff --git a/mini_rag/fast_server.py b/mini_rag/fast_server.py
index b637250..940e9df 100644
--- a/mini_rag/fast_server.py
+++ b/mini_rag/fast_server.py
@@ -218,6 +218,11 @@ class FastRAGServer:
# Quick file count check
try:
import lancedb
+ except ImportError:
+ # If LanceDB not available, assume index is empty and needs creation
+ return True
+
+ try:
db = lancedb.connect(rag_dir)
if 'code_vectors' not in db.table_names():
return True
diff --git a/mini_rag/indexer.py b/mini_rag/indexer.py
index 4462aed..8cfa580 100644
--- a/mini_rag/indexer.py
+++ b/mini_rag/indexer.py
@@ -12,12 +12,20 @@ from typing import List, Dict, Any, Optional, Set, Tuple
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
import numpy as np
-import lancedb
import pandas as pd
-import pyarrow as pa
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TimeRemainingColumn
from rich.console import Console
+# Optional LanceDB import
+try:
+ import lancedb
+ import pyarrow as pa
+ LANCEDB_AVAILABLE = True
+except ImportError:
+ lancedb = None
+ pa = None
+ LANCEDB_AVAILABLE = False
+
from .ollama_embeddings import OllamaEmbedder as CodeEmbedder
from .chunker import CodeChunker, CodeChunk
from .path_handler import normalize_path, normalize_relative_path
@@ -163,7 +171,7 @@ class ProjectIndexer:
"skip_binary": True
},
"search": {
- "default_limit": 10,
+ "default_top_k": 10,
"similarity_threshold": 0.7,
"hybrid_search": True,
"bm25_weight": 0.3
@@ -526,6 +534,11 @@ class ProjectIndexer:
def _init_database(self):
"""Initialize LanceDB connection and table."""
+ if not LANCEDB_AVAILABLE:
+ logger.error("LanceDB is not available. Please install LanceDB for full indexing functionality.")
+ logger.info("For Ollama-only mode, consider using hash-based embeddings instead.")
+ raise ImportError("LanceDB dependency is required for indexing. Install with: pip install lancedb pyarrow")
+
try:
self.db = lancedb.connect(self.rag_dir)
diff --git a/mini_rag/llm_safeguards.py b/mini_rag/llm_safeguards.py
index f6fa474..eb0f8f2 100644
--- a/mini_rag/llm_safeguards.py
+++ b/mini_rag/llm_safeguards.py
@@ -16,12 +16,12 @@ logger = logging.getLogger(__name__)
@dataclass
class SafeguardConfig:
- """Configuration for LLM safeguards."""
- max_output_tokens: int = 2000 # Prevent excessive generation
- max_repetition_ratio: float = 0.3 # Max ratio of repeated content
- max_response_time: int = 60 # Max seconds for response
- min_useful_length: int = 20 # Minimum useful response length
- context_window: int = 32768 # Ollama context window
+ """Configuration for LLM safeguards - gentle and educational."""
+ max_output_tokens: int = 4000 # Allow longer responses for learning
+ max_repetition_ratio: float = 0.7 # Be very permissive - only catch extreme repetition
+ max_response_time: int = 120 # Allow 2 minutes for complex thinking
+ min_useful_length: int = 10 # Lower threshold - short answers can be useful
+ context_window: int = 32000 # Match Qwen3 context length (32K token limit)
enable_thinking_detection: bool = True # Detect thinking patterns
class ModelRunawayDetector:
@@ -98,8 +98,19 @@ class ModelRunawayDetector:
if self.response_patterns['phrase_repetition'].search(response):
return "phrase_repetition"
- # Calculate repetition ratio
- words = response.split()
+ # Calculate repetition ratio (excluding Qwen3 thinking blocks)
+ analysis_text = response
+ if "" in response and "" in response:
+ # Extract only the actual response (after thinking) for repetition analysis
+ thinking_end = response.find("")
+ if thinking_end != -1:
+ analysis_text = response[thinking_end + 8:].strip()
+
+ # If the actual response (excluding thinking) is short, don't penalize
+ if len(analysis_text.split()) < 20:
+ return None
+
+ words = analysis_text.split()
if len(words) > 10:
unique_words = set(words)
repetition_ratio = 1 - (len(unique_words) / len(words))
diff --git a/mini_rag/llm_synthesizer.py b/mini_rag/llm_synthesizer.py
index f0f1c39..0dcda93 100644
--- a/mini_rag/llm_synthesizer.py
+++ b/mini_rag/llm_synthesizer.py
@@ -36,12 +36,13 @@ class SynthesisResult:
class LLMSynthesizer:
"""Synthesizes RAG search results using Ollama LLMs."""
- def __init__(self, ollama_url: str = "http://localhost:11434", model: str = None, enable_thinking: bool = False):
+ def __init__(self, ollama_url: str = "http://localhost:11434", model: str = None, enable_thinking: bool = False, config=None):
self.ollama_url = ollama_url.rstrip('/')
self.available_models = []
self.model = model
self.enable_thinking = enable_thinking # Default False for synthesis mode
self._initialized = False
+ self.config = config # For accessing model rankings
# Initialize safeguards
if ModelRunawayDetector:
@@ -61,60 +62,36 @@ class LLMSynthesizer:
return []
def _select_best_model(self) -> str:
- """Select the best available model based on modern performance rankings."""
+ """Select the best available model based on configuration rankings."""
if not self.available_models:
return "qwen2.5:1.5b" # Fallback preference
- # Modern model preference ranking (CPU-friendly first)
- # Prioritize: Ultra-efficient > Standard efficient > Larger models
- model_rankings = [
- # Recommended model (excellent quality)
- "qwen3:4b",
-
- # Ultra-efficient models (perfect for CPU-only systems)
- "qwen3:0.6b", "qwen3:1.7b", "llama3.2:1b",
-
- # Standard efficient models
- "qwen2.5:1.5b", "qwen3:3b",
-
- # Qwen2.5 models (excellent performance/size ratio)
- "qwen2.5-coder:1.5b", "qwen2.5:1.5b", "qwen2.5:3b", "qwen2.5-coder:3b",
- "qwen2.5:7b", "qwen2.5-coder:7b",
-
- # Qwen2 models (older but still good)
- "qwen2:1.5b", "qwen2:3b", "qwen2:7b",
-
- # Mistral models (good quality, reasonable size)
- "mistral:7b", "mistral-nemo", "mistral-small",
-
- # Llama3.2 models (decent but larger)
- "llama3.2:1b", "llama3.2:3b", "llama3.2", "llama3.2:8b",
-
- # Fallback to other Llama models
- "llama3.1:8b", "llama3:8b", "llama3",
-
- # Other decent models
- "gemma2:2b", "gemma2:9b", "phi3:3.8b", "phi3.5",
- ]
+ # Get model rankings from config or use defaults
+ if self.config and hasattr(self.config, 'llm') and hasattr(self.config.llm, 'model_rankings'):
+ model_rankings = self.config.llm.model_rankings
+ else:
+ # Fallback rankings if no config
+ model_rankings = [
+ "qwen3:1.7b", "qwen3:0.6b", "qwen3:4b", "llama3.2:1b",
+ "qwen2.5:1.5b", "qwen3:3b", "qwen2.5-coder:1.5b"
+ ]
- # Find first available model from our ranked list
+ # Find first available model from our ranked list (exact matches first)
for preferred_model in model_rankings:
for available_model in self.available_models:
- # Match model names (handle version tags)
- available_base = available_model.split(':')[0].lower()
- preferred_base = preferred_model.split(':')[0].lower()
+ # Exact match first (e.g., "qwen3:1.7b" matches "qwen3:1.7b")
+ if preferred_model.lower() == available_model.lower():
+ logger.info(f"Selected exact match model: {available_model}")
+ return available_model
- if preferred_base in available_base or available_base in preferred_base:
- # Additional size filtering - prefer smaller models
- if any(size in available_model.lower() for size in ['1b', '1.5b', '2b', '3b']):
- logger.info(f"Selected efficient model: {available_model}")
- return available_model
- elif any(size in available_model.lower() for size in ['7b', '8b']):
- # Only use larger models if no smaller ones available
- logger.info(f"Selected larger model: {available_model}")
- return available_model
- elif ':' not in available_model:
- # Handle models without explicit size tags
+ # Partial match with version handling (e.g., "qwen3:1.7b" matches "qwen3:1.7b-q8_0")
+ preferred_parts = preferred_model.lower().split(':')
+ available_parts = available_model.lower().split(':')
+
+ if len(preferred_parts) >= 2 and len(available_parts) >= 2:
+ if (preferred_parts[0] == available_parts[0] and
+ preferred_parts[1] in available_parts[1]):
+ logger.info(f"Selected version match model: {available_model}")
return available_model
# If no preferred models found, use first available
@@ -132,12 +109,8 @@ class LLMSynthesizer:
if not self.model:
self.model = self._select_best_model()
- # Warm up LLM with minimal request (ignores response)
- if self.available_models:
- try:
- self._call_ollama("testing, just say 'hi'", temperature=0.1, disable_thinking=True)
- except:
- pass # Warmup failure is non-critical
+ # Skip warmup - models are fast enough and warmup causes delays
+ # Warmup removed to eliminate startup delays and unwanted model calls
self._initialized = True
@@ -146,7 +119,7 @@ class LLMSynthesizer:
self._ensure_initialized()
return len(self.available_models) > 0
- def _call_ollama(self, prompt: str, temperature: float = 0.3, disable_thinking: bool = False) -> Optional[str]:
+ def _call_ollama(self, prompt: str, temperature: float = 0.3, disable_thinking: bool = False, use_streaming: bool = False) -> Optional[str]:
"""Make a call to Ollama API with safeguards."""
start_time = time.time()
@@ -163,28 +136,55 @@ class LLMSynthesizer:
# Handle thinking mode for Qwen3 models
final_prompt = prompt
- if not self.enable_thinking or disable_thinking:
+ use_thinking = self.enable_thinking and not disable_thinking
+
+ # For non-thinking mode, add tag for Qwen3
+ if not use_thinking and "qwen3" in model_to_use.lower():
if not final_prompt.endswith(" "):
final_prompt += " "
- # Get optimal parameters for this model
+ # Get optimal parameters for this model
optimal_params = get_optimal_ollama_parameters(model_to_use)
+ # Qwen3-specific optimal parameters based on research
+ if "qwen3" in model_to_use.lower():
+ if use_thinking:
+ # Thinking mode: Temperature=0.6, TopP=0.95, TopK=20, PresencePenalty=1.5
+ qwen3_temp = 0.6
+ qwen3_top_p = 0.95
+ qwen3_top_k = 20
+ qwen3_presence = 1.5
+ else:
+ # Non-thinking mode: Temperature=0.7, TopP=0.8, TopK=20, PresencePenalty=1.5
+ qwen3_temp = 0.7
+ qwen3_top_p = 0.8
+ qwen3_top_k = 20
+ qwen3_presence = 1.5
+ else:
+ qwen3_temp = temperature
+ qwen3_top_p = optimal_params.get("top_p", 0.9)
+ qwen3_top_k = optimal_params.get("top_k", 40)
+ qwen3_presence = optimal_params.get("presence_penalty", 1.0)
+
payload = {
"model": model_to_use,
"prompt": final_prompt,
- "stream": False,
+ "stream": use_streaming,
"options": {
- "temperature": temperature,
- "top_p": optimal_params.get("top_p", 0.9),
- "top_k": optimal_params.get("top_k", 40),
- "num_ctx": optimal_params.get("num_ctx", 32768),
+ "temperature": qwen3_temp,
+ "top_p": qwen3_top_p,
+ "top_k": qwen3_top_k,
+ "num_ctx": 32000, # Critical: Qwen3 context length (32K token limit)
"num_predict": optimal_params.get("num_predict", 2000),
"repeat_penalty": optimal_params.get("repeat_penalty", 1.1),
- "presence_penalty": optimal_params.get("presence_penalty", 1.0)
+ "presence_penalty": qwen3_presence
}
}
+ # Handle streaming with early stopping
+ if use_streaming:
+ return self._handle_streaming_with_early_stop(payload, model_to_use, use_thinking, start_time)
+
response = requests.post(
f"{self.ollama_url}/api/generate",
json=payload,
@@ -193,8 +193,19 @@ class LLMSynthesizer:
if response.status_code == 200:
result = response.json()
+
+ # All models use standard response format
+ # Qwen3 thinking tokens are embedded in the response content itself as ...
raw_response = result.get('response', '').strip()
+ # Log thinking content for Qwen3 debugging
+ if "qwen3" in model_to_use.lower() and use_thinking and "" in raw_response:
+ thinking_start = raw_response.find("")
+ thinking_end = raw_response.find("")
+ if thinking_start != -1 and thinking_end != -1:
+ thinking_content = raw_response[thinking_start+7:thinking_end]
+ logger.info(f"Qwen3 thinking: {thinking_content[:100]}...")
+
# Apply safeguards to check response quality
if self.safeguard_detector and raw_response:
is_valid, issue_type, explanation = self.safeguard_detector.check_response_quality(
@@ -203,8 +214,8 @@ class LLMSynthesizer:
if not is_valid:
logger.warning(f"Safeguard triggered: {issue_type}")
- # Return a safe explanation instead of the problematic response
- return self._create_safeguard_response(issue_type, explanation, prompt)
+ # Preserve original response but add safeguard warning
+ return self._create_safeguard_response_with_content(issue_type, explanation, raw_response)
return raw_response
else:
@@ -232,6 +243,119 @@ class LLMSynthesizer:
4. **Different approach**: Try synthesis mode: `--synthesize` for simpler responses
This is normal with smaller AI models and helps ensure you get quality responses."""
+
+ def _create_safeguard_response_with_content(self, issue_type: str, explanation: str, original_response: str) -> str:
+ """Create a response that preserves the original content but adds a safeguard warning."""
+
+ # For Qwen3, extract the actual response (after thinking)
+ actual_response = original_response
+ if "" in original_response and "" in original_response:
+ thinking_end = original_response.find("")
+ if thinking_end != -1:
+ actual_response = original_response[thinking_end + 8:].strip()
+
+ # If we have useful content, preserve it with a warning
+ if len(actual_response.strip()) > 20:
+ return f"""ā ļø **Response Quality Warning** ({issue_type})
+
+{explanation}
+
+---
+
+**AI Response (use with caution):**
+
+{actual_response}
+
+---
+
+š” **Note**: This response may have quality issues. Consider rephrasing your question or trying exploration mode for better results."""
+ else:
+ # If content is too short or problematic, use the original safeguard response
+ return f"""ā ļø Model Response Issue Detected
+
+{explanation}
+
+**What happened:** The AI model encountered a common issue with small language models.
+
+**Your options:**
+1. **Try again**: Ask the same question (often resolves itself)
+2. **Rephrase**: Make your question more specific or break it into parts
+3. **Use exploration mode**: `rag-mini explore` for complex questions
+
+This is normal with smaller AI models and helps ensure you get quality responses."""
+
+ def _handle_streaming_with_early_stop(self, payload: dict, model_name: str, use_thinking: bool, start_time: float) -> Optional[str]:
+ """Handle streaming response with intelligent early stopping."""
+ import json
+
+ try:
+ response = requests.post(
+ f"{self.ollama_url}/api/generate",
+ json=payload,
+ stream=True,
+ timeout=65
+ )
+
+ if response.status_code != 200:
+ logger.error(f"Ollama API error: {response.status_code}")
+ return None
+
+ full_response = ""
+ word_buffer = []
+ repetition_window = 30 # Check last 30 words for repetition (more context)
+ stop_threshold = 0.8 # Stop only if 80% of recent words are repetitive (very permissive)
+ min_response_length = 100 # Don't early stop until we have at least 100 chars
+
+ for line in response.iter_lines():
+ if line:
+ try:
+ chunk_data = json.loads(line.decode('utf-8'))
+ chunk_text = chunk_data.get('response', '')
+
+ if chunk_text:
+ full_response += chunk_text
+
+ # Add words to buffer for repetition detection
+ new_words = chunk_text.split()
+ word_buffer.extend(new_words)
+
+ # Keep only recent words in buffer
+ if len(word_buffer) > repetition_window:
+ word_buffer = word_buffer[-repetition_window:]
+
+ # Check for repetition patterns after we have enough words AND content
+ if len(word_buffer) >= repetition_window and len(full_response) >= min_response_length:
+ unique_words = set(word_buffer)
+ repetition_ratio = 1 - (len(unique_words) / len(word_buffer))
+
+ # Early stop only if repetition is EXTREMELY high (80%+)
+ if repetition_ratio > stop_threshold:
+ logger.info(f"Early stopping due to repetition: {repetition_ratio:.2f}")
+
+ # Add a gentle completion to the response
+ if not full_response.strip().endswith(('.', '!', '?')):
+ full_response += "..."
+
+ # Send stop signal to model (attempt to gracefully stop)
+ try:
+ stop_payload = {"model": model_name, "stop": True}
+ requests.post(f"{self.ollama_url}/api/generate", json=stop_payload, timeout=2)
+ except:
+ pass # If stop fails, we already have partial response
+
+ break
+
+ if chunk_data.get('done', False):
+ break
+
+ except json.JSONDecodeError:
+ continue
+
+ return full_response.strip()
+
+ except Exception as e:
+ logger.error(f"Streaming with early stop failed: {e}")
+ return None
def synthesize_search_results(self, query: str, results: List[Any], project_path: Path) -> SynthesisResult:
"""Synthesize search results into a coherent summary."""
diff --git a/mini_rag/query_expander.py b/mini_rag/query_expander.py
index b092bba..c2a8e44 100644
--- a/mini_rag/query_expander.py
+++ b/mini_rag/query_expander.py
@@ -59,23 +59,8 @@ class QueryExpander:
if self._initialized:
return
- # Warm up LLM if enabled and available
- if self.enabled:
- try:
- model = self._select_expansion_model()
- if model:
- requests.post(
- f"{self.ollama_url}/api/generate",
- json={
- "model": model,
- "prompt": "testing, just say 'hi' ",
- "stream": False,
- "options": {"temperature": 0.1, "max_tokens": 5}
- },
- timeout=5
- )
- except:
- pass # Warmup failure is non-critical
+ # Skip warmup - causes startup delays and unwanted model calls
+ # Query expansion works fine on first use without warmup
self._initialized = True
@@ -183,10 +168,10 @@ Expanded query:"""
data = response.json()
available = [model['name'] for model in data.get('models', [])]
- # Prefer ultra-fast, efficient models for query expansion (CPU-friendly)
+ # Use same model rankings as main synthesizer for consistency
expansion_preferences = [
- "qwen3:0.6b", "qwen3:1.7b", "qwen2.5:1.5b",
- "llama3.2:1b", "gemma2:2b", "llama3.2:3b"
+ "qwen3:1.7b", "qwen3:0.6b", "qwen3:4b", "llama3.2:1b",
+ "qwen2.5:1.5b", "qwen3:3b", "qwen2.5-coder:1.5b"
]
for preferred in expansion_preferences:
diff --git a/mini_rag/search.py b/mini_rag/search.py
index 0144aca..1823fab 100644
--- a/mini_rag/search.py
+++ b/mini_rag/search.py
@@ -8,13 +8,20 @@ from pathlib import Path
from typing import List, Dict, Any, Optional, Tuple
import numpy as np
import pandas as pd
-import lancedb
from rich.console import Console
from rich.table import Table
from rich.syntax import Syntax
from rank_bm25 import BM25Okapi
from collections import defaultdict
+# Optional LanceDB import
+try:
+ import lancedb
+ LANCEDB_AVAILABLE = True
+except ImportError:
+ lancedb = None
+ LANCEDB_AVAILABLE = False
+
from .ollama_embeddings import OllamaEmbedder as CodeEmbedder
from .path_handler import display_path
from .query_expander import QueryExpander
@@ -115,6 +122,14 @@ class CodeSearcher:
def _connect(self):
"""Connect to the LanceDB database."""
+ if not LANCEDB_AVAILABLE:
+ print("ā LanceDB Not Available")
+ print(" LanceDB is required for search functionality")
+ print(" Install it with: pip install lancedb pyarrow")
+ print(" For basic Ollama functionality, use hash-based search instead")
+ print()
+ raise ImportError("LanceDB dependency is required for search. Install with: pip install lancedb pyarrow")
+
try:
if not self.rag_dir.exists():
print("šļø No Search Index Found")
diff --git a/mini_rag/venv_checker.py b/mini_rag/venv_checker.py
new file mode 100644
index 0000000..492303d
--- /dev/null
+++ b/mini_rag/venv_checker.py
@@ -0,0 +1,142 @@
+#!/usr/bin/env python3
+"""
+Virtual Environment Checker
+Ensures scripts run in proper Python virtual environment for consistency and safety.
+"""
+
+import sys
+import os
+import sysconfig
+from pathlib import Path
+
+def is_in_virtualenv() -> bool:
+ """Check if we're running in a virtual environment."""
+ # Check for virtual environment indicators
+ return (
+ hasattr(sys, 'real_prefix') or # virtualenv
+ (hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix) or # venv/pyvenv
+ os.environ.get('VIRTUAL_ENV') is not None # Environment variable
+ )
+
+def get_expected_venv_path() -> Path:
+ """Get the expected virtual environment path for this project."""
+ # Assume .venv in the same directory as the script
+ script_dir = Path(__file__).parent.parent
+ return script_dir / '.venv'
+
+def check_correct_venv() -> tuple[bool, str]:
+ """
+ Check if we're in the correct virtual environment.
+
+ Returns:
+ (is_correct, message)
+ """
+ if not is_in_virtualenv():
+ return False, "not in virtual environment"
+
+ expected_venv = get_expected_venv_path()
+ if not expected_venv.exists():
+ return False, "expected virtual environment not found"
+
+ current_venv = os.environ.get('VIRTUAL_ENV')
+ if current_venv:
+ current_venv_path = Path(current_venv).resolve()
+ expected_venv_path = expected_venv.resolve()
+
+ if current_venv_path != expected_venv_path:
+ return False, f"wrong virtual environment (using {current_venv_path}, expected {expected_venv_path})"
+
+ return True, "correct virtual environment"
+
+def show_venv_warning(script_name: str = "script") -> None:
+ """Show virtual environment warning with helpful instructions."""
+ expected_venv = get_expected_venv_path()
+
+ print("ā ļø VIRTUAL ENVIRONMENT WARNING")
+ print("=" * 50)
+ print()
+ print(f"This {script_name} should be run in a Python virtual environment for:")
+ print(" ⢠Consistent dependencies")
+ print(" ⢠Isolated package versions")
+ print(" ⢠Proper security isolation")
+ print(" ⢠Reliable functionality")
+ print()
+
+ if expected_venv.exists():
+ print("ā
Virtual environment found!")
+ print(f" Location: {expected_venv}")
+ print()
+ print("š To activate it:")
+ print(f" source {expected_venv}/bin/activate")
+ print(f" {script_name}")
+ print()
+ print("š Or run with activation:")
+ print(f" source {expected_venv}/bin/activate && {script_name}")
+ else:
+ print("ā No virtual environment found!")
+ print()
+ print("š ļø Create one first:")
+ print(" ./install_mini_rag.sh")
+ print()
+ print("š Or manually:")
+ print(f" python3 -m venv {expected_venv}")
+ print(f" source {expected_venv}/bin/activate")
+ print(" pip install -r requirements.txt")
+
+ print()
+ print("š” Why this matters:")
+ print(" Without a virtual environment, you may experience:")
+ print(" ⢠Import errors from missing packages")
+ print(" ⢠Version conflicts with system Python")
+ print(" ⢠Inconsistent behavior across systems")
+ print(" ⢠Potential system-wide package pollution")
+ print()
+
+def check_and_warn_venv(script_name: str = "script", force_exit: bool = False) -> bool:
+ """
+ Check virtual environment and warn if needed.
+
+ Args:
+ script_name: Name of the script for user-friendly messages
+ force_exit: Whether to exit if not in correct venv
+
+ Returns:
+ True if in correct venv, False otherwise
+ """
+ is_correct, message = check_correct_venv()
+
+ if not is_correct:
+ show_venv_warning(script_name)
+
+ if force_exit:
+ print(f"ā Exiting {script_name} for your safety.")
+ print(" Please activate the virtual environment and try again.")
+ sys.exit(1)
+ else:
+ print(f"ā ļø Continuing anyway, but {script_name} may not work correctly...")
+ print()
+ return False
+
+ return True
+
+def require_venv(script_name: str = "script") -> None:
+ """Require virtual environment or exit."""
+ check_and_warn_venv(script_name, force_exit=True)
+
+# Quick test function
+def main():
+ """Test the virtual environment checker."""
+ print("š§Ŗ Virtual Environment Checker Test")
+ print("=" * 40)
+
+ print(f"In virtual environment: {is_in_virtualenv()}")
+ print(f"Expected venv path: {get_expected_venv_path()}")
+
+ is_correct, message = check_correct_venv()
+ print(f"Correct venv: {is_correct} ({message})")
+
+ if not is_correct:
+ show_venv_warning("test script")
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/rag-mini b/rag-mini
index e6862d9..73be737 100755
--- a/rag-mini
+++ b/rag-mini
@@ -112,6 +112,7 @@ show_help() {
echo -e "${BOLD}Main Commands:${NC}"
echo " rag-mini index # Index project for search"
echo " rag-mini search # Search indexed project"
+ echo " rag-mini explore # Interactive exploration with AI"
echo " rag-mini status # Show project status"
echo ""
echo -e "${BOLD}Interfaces:${NC}"
@@ -324,9 +325,9 @@ main() {
"server")
# Start server mode
shift
- exec "$PYTHON" "$SCRIPT_DIR/claude_rag/server.py" "$@"
+ exec "$PYTHON" "$SCRIPT_DIR/mini_rag/fast_server.py" "$@"
;;
- "index"|"search"|"status")
+ "index"|"search"|"explore"|"status")
# Direct CLI commands - call Python script
exec "$PYTHON" "$SCRIPT_DIR/rag-mini.py" "$@"
;;
diff --git a/rag-mini.py b/rag-mini.py
index 04096d2..4d7451e 100644
--- a/rag-mini.py
+++ b/rag-mini.py
@@ -118,7 +118,7 @@ def index_project(project_path: Path, force: bool = False):
print(" Or see: docs/TROUBLESHOOTING.md")
sys.exit(1)
-def search_project(project_path: Path, query: str, limit: int = 10, synthesize: bool = False):
+def search_project(project_path: Path, query: str, top_k: int = 10, synthesize: bool = False):
"""Search a project directory."""
try:
# Check if indexed first
@@ -130,7 +130,7 @@ def search_project(project_path: Path, query: str, limit: int = 10, synthesize:
print(f"š Searching \"{query}\" in {project_path.name}")
searcher = CodeSearcher(project_path)
- results = searcher.search(query, top_k=limit)
+ results = searcher.search(query, top_k=top_k)
if not results:
print("ā No results found")
@@ -143,7 +143,7 @@ def search_project(project_path: Path, query: str, limit: int = 10, synthesize:
print()
print("āļø Configuration adjustments:")
print(f" ⢠Lower threshold: ./rag-mini search {project_path} \"{query}\" --threshold 0.05")
- print(" ⢠More results: add --limit 20")
+ print(" ⢠More results: add --top-k 20")
print()
print("š Need help? See: docs/TROUBLESHOOTING.md")
return
@@ -310,14 +310,14 @@ def status_check(project_path: Path):
sys.exit(1)
def explore_interactive(project_path: Path):
- """Interactive exploration mode with thinking and context memory."""
+ """Interactive exploration mode with thinking and context memory for any documents."""
try:
explorer = CodeExplorer(project_path)
if not explorer.start_exploration_session():
sys.exit(1)
- print("\nš¤ Ask your first question about the codebase:")
+ print(f"\nš¤ Ask your first question about {project_path.name}:")
while True:
try:
@@ -357,7 +357,8 @@ def explore_interactive(project_path: Path):
continue
# Process the question
- print("\nš Analyzing...")
+ print(f"\nš Searching {project_path.name}...")
+ print("š§ Thinking with AI model...")
response = explorer.explore_question(question)
if response:
@@ -382,6 +383,13 @@ def explore_interactive(project_path: Path):
def main():
"""Main CLI interface."""
+ # Check virtual environment
+ try:
+ from mini_rag.venv_checker import check_and_warn_venv
+ check_and_warn_venv("rag-mini.py", force_exit=False)
+ except ImportError:
+ pass # If venv checker can't be imported, continue anyway
+
parser = argparse.ArgumentParser(
description="FSS-Mini-RAG - Lightweight semantic code search",
formatter_class=argparse.RawDescriptionHelpFormatter,
@@ -403,8 +411,8 @@ Examples:
help='Search query (for search command)')
parser.add_argument('--force', action='store_true',
help='Force reindex all files')
- parser.add_argument('--limit', type=int, default=10,
- help='Maximum number of search results')
+ parser.add_argument('--top-k', '--limit', type=int, default=10, dest='top_k',
+ help='Maximum number of search results (top-k)')
parser.add_argument('--verbose', '-v', action='store_true',
help='Enable verbose logging')
parser.add_argument('--synthesize', '-s', action='store_true',
@@ -432,7 +440,7 @@ Examples:
if not args.query:
print("ā Search query required")
sys.exit(1)
- search_project(args.project_path, args.query, args.limit, args.synthesize)
+ search_project(args.project_path, args.query, args.top_k, args.synthesize)
elif args.command == 'explore':
explore_interactive(args.project_path)
elif args.command == 'status':
diff --git a/rag-tui.py b/rag-tui.py
index c711b0b..aeba78a 100755
--- a/rag-tui.py
+++ b/rag-tui.py
@@ -16,17 +16,83 @@ class SimpleTUI:
self.project_path: Optional[Path] = None
self.current_config: Dict[str, Any] = {}
self.search_count = 0 # Track searches for sample reminder
+ self.config_dir = Path.home() / '.mini-rag-tui'
+ self.config_file = self.config_dir / 'last_project.json'
+ # Load last project on startup
+ self._load_last_project()
+
+ def _load_last_project(self):
+ """Load the last used project from config file, or auto-detect current directory."""
+ # First check if current directory has .mini-rag folder (auto-detect)
+ current_dir = Path.cwd()
+ if (current_dir / '.mini-rag').exists():
+ self.project_path = current_dir
+ # Save this as the last project too
+ self._save_last_project()
+ return
+
+ # If no auto-detection, try loading from config file
+ try:
+ if hasattr(self, 'config_file') and self.config_file.exists():
+ with open(self.config_file, 'r') as f:
+ data = json.load(f)
+ project_path = Path(data.get('last_project', ''))
+ if project_path.exists() and project_path.is_dir():
+ self.project_path = project_path
+ except Exception:
+ # If loading fails, just continue without last project
+ pass
+
+ def _save_last_project(self):
+ """Save current project as last used."""
+ if not self.project_path:
+ return
+ try:
+ self.config_dir.mkdir(exist_ok=True)
+ data = {'last_project': str(self.project_path)}
+ with open(self.config_file, 'w') as f:
+ json.dump(data, f)
+ except Exception:
+ # If saving fails, just continue
+ pass
+
+ def _get_llm_status(self):
+ """Get LLM status for display in main menu."""
+ try:
+ # Import here to avoid startup delays
+ sys.path.insert(0, str(Path(__file__).parent))
+ from mini_rag.llm_synthesizer import LLMSynthesizer
+ from mini_rag.config import RAGConfig, ConfigManager
+
+ # Load config for model rankings
+ if self.project_path:
+ config_manager = ConfigManager(self.project_path)
+ config = config_manager.load_config()
+ else:
+ config = RAGConfig()
+
+ synthesizer = LLMSynthesizer(config=config)
+ if synthesizer.is_available():
+ # Get the model that would be selected
+ synthesizer._ensure_initialized()
+ model = synthesizer.model
+ return "ā
Ready", model
+ else:
+ return "ā Ollama not running", None
+ except Exception as e:
+ return f"ā Error: {str(e)[:20]}...", None
+
def clear_screen(self):
"""Clear the terminal screen."""
os.system('cls' if os.name == 'nt' else 'clear')
def print_header(self):
"""Print the main header."""
- print("āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā")
- print("ā FSS-Mini-RAG TUI ā")
- print("ā Semantic Code Search Interface ā")
- print("āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā")
+ print("+====================================================+")
+ print("| FSS-Mini-RAG TUI |")
+ print("| Semantic Code Search Interface |")
+ print("+====================================================+")
print()
def print_cli_command(self, command: str, description: str = ""):
@@ -43,10 +109,14 @@ class SimpleTUI:
else:
full_prompt = f"{prompt}: "
- result = input(full_prompt).strip()
- return result if result else default
+ try:
+ result = input(full_prompt).strip()
+ return result if result else default
+ except (KeyboardInterrupt, EOFError):
+ print("\nGoodbye!")
+ sys.exit(0)
- def show_menu(self, title: str, options: List[str], show_cli: bool = True) -> int:
+ def show_menu(self, title: str, options: List[str], show_cli: bool = True, back_option: str = None) -> int:
"""Show a menu and get user selection."""
print(f"šÆ {title}")
print("=" * (len(title) + 3))
@@ -55,6 +125,10 @@ class SimpleTUI:
for i, option in enumerate(options, 1):
print(f"{i}. {option}")
+ # Add back/exit option
+ if back_option:
+ print(f"0. {back_option}")
+
if show_cli:
print()
print("š” All these actions can be done via CLI commands")
@@ -64,13 +138,16 @@ class SimpleTUI:
while True:
try:
choice = int(input("Select option (number): "))
- if 1 <= choice <= len(options):
+ if choice == 0 and back_option:
+ return -1 # Special value for back/exit
+ elif 1 <= choice <= len(options):
return choice - 1
else:
- print(f"Please enter a number between 1 and {len(options)}")
+ valid_range = "0-" + str(len(options)) if back_option else "1-" + str(len(options))
+ print(f"Please enter a number between {valid_range}")
except ValueError:
print("Please enter a valid number")
- except KeyboardInterrupt:
+ except (KeyboardInterrupt, EOFError):
print("\nGoodbye!")
sys.exit(0)
@@ -88,49 +165,90 @@ class SimpleTUI:
print(f"Current project: {self.project_path}")
print()
- options = [
- "Enter project path",
- "Use current directory",
- "Browse recent projects" if self.project_path else "Skip (will ask later)"
- ]
+ print("š” New to FSS-Mini-RAG? Select 'Use current directory' to")
+ print(" explore this RAG system's own codebase as your first demo!")
+ print()
- choice = self.show_menu("Choose project directory", options, show_cli=False)
+ # If we already have a project, show it prominently and offer quick actions
+ if self.project_path:
+ rag_dir = self.project_path / '.mini-rag'
+ is_indexed = rag_dir.exists()
+ status_text = "Ready for search ā
" if is_indexed else "Needs indexing ā"
+
+ print(f"Current: {self.project_path.name} ({status_text})")
+ print()
+
+ options = [
+ "Keep current project (go back to main menu)",
+ "Use current directory (this folder)",
+ "Enter different project path",
+ "Browse recent projects"
+ ]
+ else:
+ options = [
+ "Use current directory (perfect for beginners - try the RAG codebase!)",
+ "Enter project path (if you have a specific project)",
+ "Browse recent projects"
+ ]
- if choice == 0:
- # Enter path manually
- while True:
- path_str = self.get_input("Enter project directory path",
- str(self.project_path) if self.project_path else "")
-
- if not path_str:
- continue
-
- project_path = Path(path_str).expanduser().resolve()
-
- if project_path.exists() and project_path.is_dir():
- self.project_path = project_path
- print(f"ā
Selected: {self.project_path}")
- break
- else:
- print(f"ā Directory not found: {project_path}")
- retry = input("Try again? (y/N): ").lower()
- if retry != 'y':
- break
+ choice = self.show_menu("Choose project directory", options, show_cli=False, back_option="Back to main menu")
- elif choice == 1:
- # Use current directory
- self.project_path = Path.cwd()
- print(f"ā
Using current directory: {self.project_path}")
+ if choice == -1: # Back to main menu
+ return
- elif choice == 2:
- # Browse recent projects or skip
- if self.project_path:
+ # Handle different choice patterns based on whether we have a project
+ if self.project_path:
+ if choice == 0:
+ # Keep current project - just go back
+ return
+ elif choice == 1:
+ # Use current directory
+ self.project_path = Path.cwd()
+ print(f"ā
Using current directory: {self.project_path}")
+ self._save_last_project()
+ elif choice == 2:
+ # Enter different project path
+ self._enter_project_path()
+ elif choice == 3:
+ # Browse recent projects
+ self.browse_recent_projects()
+ else:
+ if choice == 0:
+ # Use current directory
+ self.project_path = Path.cwd()
+ print(f"ā
Using current directory: {self.project_path}")
+ self._save_last_project()
+ elif choice == 1:
+ # Enter project path
+ self._enter_project_path()
+ elif choice == 2:
+ # Browse recent projects
self.browse_recent_projects()
- else:
- print("No project selected - you can choose one later from the main menu")
input("\nPress Enter to continue...")
+ def _enter_project_path(self):
+ """Helper method to handle manual project path entry."""
+ while True:
+ path_str = self.get_input("Enter project directory path",
+ str(self.project_path) if self.project_path else "")
+
+ if not path_str:
+ continue
+
+ project_path = Path(path_str).expanduser().resolve()
+
+ if project_path.exists() and project_path.is_dir():
+ self.project_path = project_path
+ print(f"ā
Selected: {self.project_path}")
+ self._save_last_project()
+ break
+ else:
+ print(f"ā Directory not found: {project_path}")
+ retry = input("Try again? (y/N): ").lower()
+ if retry != 'y':
+ break
+
def browse_recent_projects(self):
"""Browse recently indexed projects."""
print("š Recent Projects")
@@ -192,6 +310,7 @@ class SimpleTUI:
if 1 <= choice <= len(recent_projects):
self.project_path = recent_projects[choice - 1]
print(f"ā
Selected: {self.project_path}")
+ self._save_last_project()
except (ValueError, IndexError):
print("Selection cancelled")
@@ -214,9 +333,7 @@ class SimpleTUI:
# Check if already indexed
rag_dir = self.project_path / '.mini-rag'
if rag_dir.exists():
- print("ā ļø Project appears to be already indexed")
- print()
- force = input("Re-index everything? (y/N): ").lower() == 'y'
+ force = self._show_existing_index_info(rag_dir)
else:
force = False
@@ -227,26 +344,157 @@ class SimpleTUI:
self.print_cli_command(cli_cmd, "Index project for semantic search")
- print("Starting indexing...")
+ # Import here to avoid startup delays
+ sys.path.insert(0, str(Path(__file__).parent))
+ from mini_rag.indexer import ProjectIndexer
+
+ # Get file count and show preview before starting
+ print("š Analyzing project structure...")
print("=" * 50)
- # Actually run the indexing
try:
- # Import here to avoid startup delays
- sys.path.insert(0, str(Path(__file__).parent))
- from mini_rag.indexer import ProjectIndexer
-
indexer = ProjectIndexer(self.project_path)
+
+ # Get files that would be indexed
+ files_to_index = indexer._get_files_to_index()
+ total_files = len(files_to_index)
+
+ if total_files == 0:
+ print("ā
All files are already up to date!")
+ print(" No indexing needed.")
+ input("\nPress Enter to continue...")
+ return
+
+ # Show file analysis
+ print(f"š Indexing Analysis:")
+ print(f" Files to process: {total_files}")
+
+ # Analyze file types
+ file_types = {}
+ total_size = 0
+ for file_path in files_to_index:
+ ext = file_path.suffix.lower() or 'no extension'
+ file_types[ext] = file_types.get(ext, 0) + 1
+ try:
+ total_size += file_path.stat().st_size
+ except:
+ pass
+
+ # Show breakdown
+ print(f" Total size: {total_size / (1024*1024):.1f}MB")
+ print(f" File types:")
+ for ext, count in sorted(file_types.items(), key=lambda x: x[1], reverse=True):
+ print(f" ⢠{ext}: {count} files")
+
+ # Conservative time estimate for average hardware
+ estimated_time = self._estimate_processing_time(total_files, total_size)
+ print(f" Estimated time: {estimated_time}")
+
+ print()
+ print("š” What indexing does:")
+ print(" ⢠Reads and analyzes each file's content (READ-ONLY)")
+ print(" ⢠Breaks content into semantic chunks")
+ print(" ⢠Generates embeddings for semantic search")
+ print(" ⢠Stores everything in a separate .mini-rag/ database")
+ print()
+ print("š”ļø SAFETY GUARANTEE:")
+ print(" ⢠Your original files are NEVER modified or touched")
+ print(" ⢠Only reads files to create the search index")
+ print(" ⢠All data stored separately in .mini-rag/ folder")
+ print(" ⢠You can delete the .mini-rag/ folder anytime to remove all traces")
+ print()
+
+ # Confirmation
+ confirm = input("š Proceed with indexing? [Y/n]: ").strip().lower()
+ if confirm and confirm != 'y' and confirm != 'yes':
+ print("Indexing cancelled.")
+ input("Press Enter to continue...")
+ return
+
+ print("\nš Starting indexing...")
+ print("=" * 50)
+
+ # Actually run the indexing
result = indexer.index_project(force_reindex=force)
print()
- print("ā
Indexing completed!")
- print(f" Files processed: {result.get('files_indexed', 0)}")
- print(f" Chunks created: {result.get('chunks_created', 0)}")
- print(f" Time taken: {result.get('time_taken', 0):.1f}s")
+ print("š INDEXING COMPLETE!")
+ print("=" * 50)
- if result.get('files_failed', 0) > 0:
- print(f" ā ļø Files failed: {result['files_failed']}")
+ # Comprehensive performance summary
+ files_processed = result.get('files_indexed', 0)
+ chunks_created = result.get('chunks_created', 0)
+ time_taken = result.get('time_taken', 0)
+ files_failed = result.get('files_failed', 0)
+ files_per_second = result.get('files_per_second', 0)
+
+ print(f"š PROCESSING SUMMARY:")
+ print(f" ā
Files successfully processed: {files_processed:,}")
+ print(f" š§© Semantic chunks created: {chunks_created:,}")
+ print(f" ā±ļø Total processing time: {time_taken:.2f} seconds")
+ print(f" š Processing speed: {files_per_second:.1f} files/second")
+
+ if files_failed > 0:
+ print(f" ā ļø Files with issues: {files_failed}")
+
+ # Show what we analyzed
+ if chunks_created > 0:
+ avg_chunks_per_file = chunks_created / max(files_processed, 1)
+ print()
+ print(f"š CONTENT ANALYSIS:")
+ print(f" ⢠Average chunks per file: {avg_chunks_per_file:.1f}")
+ print(f" ⢠Semantic boundaries detected and preserved")
+ print(f" ⢠Function and class contexts captured")
+ print(f" ⢠Documentation and code comments indexed")
+
+ # Try to show embedding info
+ try:
+ embedder = indexer.embedder
+ embed_info = embedder.get_embedding_info()
+ print(f" ⢠Embedding method: {embed_info.get('method', 'Unknown')}")
+ print(f" ⢠Vector dimensions: {embedder.get_embedding_dim()}")
+ except:
+ pass
+
+ # Database info
+ print()
+ print(f"š¾ DATABASE CREATED:")
+ print(f" ⢠Location: {self.project_path}/.mini-rag/")
+ print(f" ⢠Vector database with {chunks_created:,} searchable chunks")
+ print(f" ⢠Optimized for fast semantic similarity search")
+ print(f" ⢠Supports natural language queries")
+
+ # Performance metrics
+ if time_taken > 0:
+ print()
+ print(f"ā” PERFORMANCE METRICS:")
+ chunks_per_second = chunks_created / time_taken if time_taken > 0 else 0
+ print(f" ⢠{chunks_per_second:.0f} chunks processed per second")
+
+ # Estimate search performance
+ estimated_search_time = max(0.1, chunks_created / 10000) # Very rough estimate
+ print(f" ⢠Estimated search time: ~{estimated_search_time:.1f}s per query")
+
+ if total_size > 0:
+ mb_per_second = (total_size / (1024*1024)) / time_taken
+ print(f" ⢠Data processing rate: {mb_per_second:.1f} MB/second")
+
+ # What's next
+ print()
+ print(f"šÆ READY FOR SEARCH!")
+ print(f" Your codebase is now fully indexed and searchable.")
+ print(f" Try queries like:")
+ print(f" ⢠'authentication logic'")
+ print(f" ⢠'error handling patterns'")
+ print(f" ⢠'database connection setup'")
+ print(f" ⢠'unit tests for validation'")
+
+ if files_failed > 0:
+ print()
+ print(f"š NOTES:")
+ print(f" ⢠{files_failed} files couldn't be processed (binary files, encoding issues, etc.)")
+ print(f" ⢠This is normal - only text-based files are indexed")
+ print(f" ⢠All processable content has been successfully indexed")
except Exception as e:
print(f"ā Indexing failed: {e}")
@@ -255,6 +503,83 @@ class SimpleTUI:
print()
input("Press Enter to continue...")
+ def _show_existing_index_info(self, rag_dir: Path) -> bool:
+ """Show essential info about existing index and ask about re-indexing."""
+ print("š EXISTING INDEX FOUND")
+ print("=" * 50)
+ print()
+ print("š”ļø Your original files are safe and unmodified.")
+ print()
+
+ try:
+ manifest_path = rag_dir / 'manifest.json'
+ if manifest_path.exists():
+ import json
+ from datetime import datetime
+
+ with open(manifest_path, 'r') as f:
+ manifest = json.load(f)
+
+ file_count = manifest.get('file_count', 0)
+ chunk_count = manifest.get('chunk_count', 0)
+ indexed_at = manifest.get('indexed_at', 'Unknown')
+
+ print(f"⢠Files indexed: {file_count:,}")
+ print(f"⢠Chunks created: {chunk_count:,}")
+
+ # Show when it was last indexed
+ if indexed_at != 'Unknown':
+ try:
+ dt = datetime.fromisoformat(indexed_at.replace('Z', '+00:00'))
+ time_ago = datetime.now() - dt.replace(tzinfo=None)
+
+ if time_ago.days > 0:
+ age_str = f"{time_ago.days} day(s) ago"
+ elif time_ago.seconds > 3600:
+ age_str = f"{time_ago.seconds // 3600} hour(s) ago"
+ else:
+ age_str = f"{time_ago.seconds // 60} minute(s) ago"
+
+ print(f"⢠Last indexed: {age_str}")
+ except:
+ print(f"⢠Last indexed: {indexed_at}")
+ else:
+ print("⢠Last indexed: Unknown")
+
+ # Simple recommendation
+ if time_ago.days >= 7:
+ print(f"\nš” RECOMMEND: Re-index (index is {time_ago.days} days old)")
+ elif time_ago.days >= 1:
+ print(f"\nš” MAYBE: Re-index if you've made changes ({time_ago.days} day(s) old)")
+ else:
+ print(f"\nš” RECOMMEND: Skip (index is recent)")
+
+ estimate = self._estimate_processing_time(file_count, 0)
+ print(f"⢠Re-indexing would take: {estimate}")
+
+ else:
+ print("ā ļø Index corrupted - recommend re-indexing")
+
+ except Exception:
+ print("ā ļø Could not read index info - recommend re-indexing")
+
+ print()
+ choice = input("š Re-index everything? [y/N]: ").strip().lower()
+ return choice in ['y', 'yes']
+
+ def _estimate_processing_time(self, file_count: int, total_size_bytes: int) -> str:
+ """Conservative time estimates for average hardware (not high-end dev machines)."""
+ # Conservative: 2 seconds per file for average hardware (4x buffer from fast machines)
+ estimated_seconds = file_count * 2.0 + 15 # +15s startup overhead
+
+ if estimated_seconds < 60:
+ return "1-2 minutes"
+ elif estimated_seconds < 300: # 5 minutes
+ minutes = int(estimated_seconds / 60)
+ return f"{minutes}-{minutes + 1} minutes"
+ else:
+ minutes = int(estimated_seconds / 60)
+ return f"{minutes}+ minutes"
def search_interactive(self):
"""Interactive search interface."""
if not self.project_path:
@@ -279,51 +604,54 @@ class SimpleTUI:
print(f"Project: {self.project_path.name}")
print()
- # Show sample questions for beginners - relevant to FSS-Mini-RAG
- print("š” Not sure what to search for? Try these questions about FSS-Mini-RAG:")
- print()
- sample_questions = [
- "chunking strategy",
- "ollama integration",
- "indexing performance",
- "why does indexing take long",
- "how to improve search results",
- "embedding generation"
- ]
-
- for i, question in enumerate(sample_questions[:3], 1):
- print(f" {i}. {question}")
- print(" 4. Enter your own question")
+ # More prominent search input
+ print("šÆ ENTER YOUR SEARCH QUERY:")
+ print(" Ask any question about your codebase using natural language")
+ print(" Examples: 'chunking strategy', 'ollama integration', 'embedding generation'")
print()
- # Let user choose a sample or enter their own
- choice_str = self.get_input("Choose a number (1-4) or press Enter for custom", "4")
+ # Primary input - direct query entry
+ query = self.get_input("Search query", "").strip()
- try:
- choice = int(choice_str)
- if 1 <= choice <= 3:
- query = sample_questions[choice - 1]
- print(f"Selected: '{query}'")
- print()
- else:
- query = self.get_input("Enter your search query", "").strip()
- except ValueError:
- query = self.get_input("Enter your search query", "").strip()
+ # If they didn't enter anything, show sample options
+ if not query:
+ print()
+ print("š” Need inspiration? Try one of these sample queries:")
+ print()
+ sample_questions = [
+ "chunking strategy",
+ "ollama integration",
+ "indexing performance",
+ "why does indexing take long",
+ "how to improve search results",
+ "embedding generation"
+ ]
+
+ for i, question in enumerate(sample_questions[:3], 1):
+ print(f" {i}. {question}")
+ print()
+
+ choice_str = self.get_input("Select a sample query (1-3) or press Enter to go back", "")
+
+ if choice_str.isdigit():
+ choice = int(choice_str)
+ if 1 <= choice <= 3:
+ query = sample_questions[choice - 1]
+ print(f"ā
Using: '{query}'")
+ print()
+
+ # If still no query, return to menu
if not query:
return
- # Get result limit
- try:
- limit = int(self.get_input("Number of results", "10"))
- limit = max(1, min(20, limit)) # Clamp between 1-20
- except ValueError:
- limit = 10
+ # Use a sensible default for results to streamline UX
+ top_k = 10 # Good default, advanced users can use CLI for more options
# Show CLI command
cli_cmd = f"./rag-mini search {self.project_path} \"{query}\""
- if limit != 10:
- cli_cmd += f" --limit {limit}"
+ if top_k != 10:
+ cli_cmd += f" --top-k {top_k}"
self.print_cli_command(cli_cmd, "Search for semantic matches")
@@ -338,7 +666,7 @@ class SimpleTUI:
searcher = CodeSearcher(self.project_path)
# Enable query expansion in TUI for better results
searcher.config.search.expand_queries = True
- results = searcher.search(query, top_k=limit)
+ results = searcher.search(query, top_k=top_k)
if not results:
print("ā No results found")
@@ -352,9 +680,18 @@ class SimpleTUI:
print()
for i, result in enumerate(results, 1):
+ # Add divider and whitespace before each result (except first)
+ if i > 1:
+ print()
+ print("-" * 60)
+ print()
+
# Clean up file path
try:
- rel_path = result.file_path.relative_to(self.project_path)
+ if hasattr(result.file_path, 'relative_to'):
+ rel_path = result.file_path.relative_to(self.project_path)
+ else:
+ rel_path = Path(result.file_path).relative_to(self.project_path)
except:
rel_path = result.file_path
@@ -392,6 +729,13 @@ class SimpleTUI:
for i, question in enumerate(follow_up_questions, 1):
print(f" {i}. {question}")
+ # Show additional CLI commands
+ print()
+ print("š» CLI Commands:")
+ print(f" ./rag-mini search {self.project_path} \"{query}\" --top-k 20 # More results")
+ print(f" ./rag-mini explore {self.project_path} # Interactive mode")
+ print(f" ./rag-mini search {self.project_path} \"{query}\" --synthesize # With AI summary")
+
# Ask if they want to run a follow-up search
print()
choice = input("Run a follow-up search? Enter number (1-3) or press Enter to continue: ").strip()
@@ -407,8 +751,17 @@ class SimpleTUI:
print(f"ā
Found {len(follow_results)} follow-up results:")
print()
for i, result in enumerate(follow_results[:3], 1): # Show top 3
+ # Add divider for follow-up results too
+ if i > 1:
+ print()
+ print("-" * 40)
+ print()
+
try:
- rel_path = result.file_path.relative_to(self.project_path)
+ if hasattr(result.file_path, 'relative_to'):
+ rel_path = result.file_path.relative_to(self.project_path)
+ else:
+ rel_path = Path(result.file_path).relative_to(self.project_path)
except:
rel_path = result.file_path
print(f"{i}. {rel_path} (Score: {result.score:.3f})")
@@ -448,12 +801,19 @@ class SimpleTUI:
print(f"\nSwitching to full project: {parent_dir}")
print("Starting full indexing...")
# Note: This would trigger full indexing in real implementation
- print(f" Or: ./rag-mini-enhanced context {self.project_path} \"{query}\"")
- print()
-
+
except Exception as e:
print(f"ā Search failed: {e}")
- print(" Try running the CLI command directly for more details")
+ print()
+ print("š” Try these CLI commands for more details:")
+ print(f" ./rag-mini search {self.project_path} \"{query}\" --verbose")
+ print(f" ./rag-mini status {self.project_path}")
+ print(" ./rag-mini --help")
+ print()
+ print("š§ Common solutions:")
+ print(" ⢠Make sure the project is indexed first")
+ print(" ⢠Check if Ollama is running: ollama serve")
+ print(" ⢠Try a simpler search query")
print()
input("Press Enter to continue...")
@@ -485,8 +845,15 @@ class SimpleTUI:
if results:
file_extensions = set()
for result in results[:3]: # Check first 3 results
- ext = result.file_path.suffix.lower()
- file_extensions.add(ext)
+ try:
+ # Handle both Path objects and strings
+ if hasattr(result.file_path, 'suffix'):
+ ext = result.file_path.suffix.lower()
+ else:
+ ext = Path(result.file_path).suffix.lower()
+ file_extensions.add(ext)
+ except:
+ continue # Skip if we can't get extension
if '.py' in file_extensions:
follow_ups.append("Python module dependencies")
@@ -549,61 +916,173 @@ class SimpleTUI:
input("Press Enter to continue...")
return
- print("\nš¤ Ask your first question about the codebase:")
- print(" (Type 'help' for commands, 'quit' to return to menu)")
+ print("\nš¤ Ask questions about the codebase:")
+ print(" Quick: 0=quit, 1=summary, 2=history, 3=suggest next question")
while True:
try:
question = input("\n> ").strip()
+ # Handle numbered options
+ if question == '0':
+ print(explorer.end_session())
+ break
+ elif question == '1':
+ print("\n" + explorer.get_session_summary())
+ continue
+ elif question == '2':
+ if hasattr(explorer.current_session, 'conversation_history') and explorer.current_session.conversation_history:
+ print("\nš Recent questions:")
+ for i, exchange in enumerate(explorer.current_session.conversation_history[-3:], 1):
+ q = exchange["question"][:50] + "..." if len(exchange["question"]) > 50 else exchange["question"]
+ print(f" {i}. {q}")
+ else:
+ print("\nš No questions asked yet")
+ continue
+ elif question == '3':
+ # Generate smart suggestion
+ suggested_question = self._generate_smart_suggestion(explorer)
+ if suggested_question:
+ print(f"\nš” Suggested question: {suggested_question}")
+ print(" Press Enter to use this, or type your own question:")
+ next_input = input("> ").strip()
+ if not next_input: # User pressed Enter to use suggestion
+ question = suggested_question
+ else:
+ question = next_input
+ else:
+ print("\nš” No suggestions available yet. Ask a question first!")
+ continue
+
+ # Simple exit handling
if question.lower() in ['quit', 'exit', 'q', 'back']:
- print("\n" + explorer.end_session())
+ print(explorer.end_session())
break
+ # Skip empty input
if not question:
continue
- if question.lower() in ['help', 'h']:
- print("""
-š§ EXPLORATION MODE HELP:
- ⢠Ask any question about the codebase
- ⢠I remember our conversation for follow-up questions
- ⢠Use 'why', 'how', 'explain' for detailed reasoning
- ⢠Type 'summary' to see session overview
- ⢠Type 'quit' to return to main menu
-
-š” Example questions:
- ⢠"How does authentication work?"
- ⢠"Why is this function slow?"
- ⢠"Explain the database connection logic"
- ⢠"What are the security concerns here?"
-""")
+ # Simple help
+ if question.lower() in ['help', 'h', '?']:
+ print("\nš” Just ask any question about the codebase!")
+ print(" Examples: 'how does search work?' or 'explain the indexing'")
+ print(" Quick: 0=quit, 1=summary, 2=history, 3=suggest")
continue
- if question.lower() == 'summary':
- print("\n" + explorer.get_session_summary())
- continue
-
- print("\nš Analyzing...")
+ # Process the question immediately
+ print("š Thinking...")
response = explorer.explore_question(question)
if response:
- print(f"\n{response}")
+ print(f"\n{response}\n")
else:
- print("ā Sorry, I couldn't process that question. Please try again.")
+ print("ā Sorry, I couldn't process that question.\n")
except KeyboardInterrupt:
- print(f"\n\n{explorer.end_session()}")
+ print(f"\n{explorer.end_session()}")
break
except EOFError:
- print(f"\n\n{explorer.end_session()}")
+ print(f"\n{explorer.end_session()}")
break
except Exception as e:
print(f"ā Exploration mode failed: {e}")
print(" Try running the CLI command directly for more details")
+ input("\nPress Enter to continue...")
+ return
- input("\nPress Enter to continue...")
+ # Exploration session completed successfully, return to menu without extra prompt
+
+ def _generate_smart_suggestion(self, explorer):
+ """Generate a smart follow-up question based on conversation context."""
+ if not explorer.current_session or not explorer.current_session.conversation_history:
+ return None
+
+ try:
+ # Get recent conversation context
+ recent_exchanges = explorer.current_session.conversation_history[-2:] # Last 2 exchanges
+ context_summary = ""
+
+ for i, exchange in enumerate(recent_exchanges, 1):
+ q = exchange["question"]
+ summary = exchange["response"]["summary"][:100] + "..." if len(exchange["response"]["summary"]) > 100 else exchange["response"]["summary"]
+ context_summary += f"Q{i}: {q}\nA{i}: {summary}\n\n"
+
+ # Create a very focused prompt that encourages short responses
+ prompt = f"""Based on this recent conversation about a codebase, suggest ONE short follow-up question (under 10 words).
+
+Recent conversation:
+{context_summary.strip()}
+
+Respond with ONLY a single short question that would logically explore deeper or connect to what was discussed. Examples:
+- "Why does this approach work better?"
+- "What could go wrong here?"
+- "How is this tested?"
+- "Where else is this pattern used?"
+
+Your suggested question (under 10 words):"""
+
+ # Use the synthesizer to generate suggestion
+ response = explorer.synthesizer._call_ollama(prompt, temperature=0.3, disable_thinking=True)
+
+ if response:
+ # Clean up the response - extract just the question
+ lines = response.strip().split('\n')
+ for line in lines:
+ line = line.strip()
+ if line and ('?' in line or line.lower().startswith(('what', 'how', 'why', 'where', 'when', 'which', 'who'))):
+ # Remove any prefixes like "Question:" or numbers
+ cleaned = line.split(':', 1)[-1].strip()
+ if len(cleaned) < 80 and ('?' in cleaned or cleaned.lower().startswith(('what', 'how', 'why', 'where', 'when', 'which', 'who'))):
+ return cleaned
+
+ # Fallback: use first non-empty line if it looks like a question
+ first_line = lines[0].strip() if lines else ""
+ if first_line and len(first_line) < 80:
+ return first_line
+
+ # Fallback: pattern-based suggestions if LLM fails
+ return self._get_fallback_suggestion(recent_exchanges)
+
+ except Exception as e:
+ # Silent fail with pattern-based fallback
+ recent_exchanges = explorer.current_session.conversation_history[-2:] if explorer.current_session.conversation_history else []
+ return self._get_fallback_suggestion(recent_exchanges)
+
+ def _get_fallback_suggestion(self, recent_exchanges):
+ """Generate pattern-based suggestions as fallback."""
+ if not recent_exchanges:
+ return None
+
+ last_question = recent_exchanges[-1]["question"].lower()
+
+ # Simple pattern matching for common follow-ups
+ if "how" in last_question and "work" in last_question:
+ return "What could go wrong with this approach?"
+ elif "what" in last_question and ("is" in last_question or "does" in last_question):
+ return "How is this implemented?"
+ elif "implement" in last_question or "code" in last_question:
+ return "How is this tested?"
+ elif "error" in last_question or "bug" in last_question:
+ return "How can this be prevented?"
+ elif "performance" in last_question or "speed" in last_question:
+ return "What are the bottlenecks here?"
+ elif "security" in last_question or "safe" in last_question:
+ return "What other security concerns exist?"
+ elif "test" in last_question:
+ return "What edge cases should be considered?"
+ else:
+ # Generic follow-ups
+ fallbacks = [
+ "How is this used elsewhere?",
+ "What are the alternatives?",
+ "Why was this approach chosen?",
+ "What happens when this fails?",
+ "How can this be improved?"
+ ]
+ import random
+ return random.choice(fallbacks)
def show_status(self):
"""Show project and system status."""
@@ -735,7 +1214,7 @@ class SimpleTUI:
print(" ⢠chunking.strategy - 'semantic' (smart) vs 'fixed' (simple)")
print(" ⢠files.exclude_patterns - Skip files matching these patterns")
print(" ⢠embedding.preferred_method - 'ollama', 'ml', 'hash', or 'auto'")
- print(" ⢠search.default_limit - Default number of search results")
+ print(" ⢠search.default_top_k - Default number of search results (top-k)")
print()
print("š References:")
@@ -796,7 +1275,7 @@ class SimpleTUI:
print("āļø Options:")
print(" --force # Force complete re-index")
- print(" --limit N # Limit search results")
+ print(" --top-k N # Number of top results to return")
print(" --verbose # Show detailed output")
print()
@@ -812,11 +1291,44 @@ class SimpleTUI:
self.clear_screen()
self.print_header()
- # Show current project status
+ # Show current project status prominently
if self.project_path:
rag_dir = self.project_path / '.mini-rag'
- status = "ā
Indexed" if rag_dir.exists() else "ā Not indexed"
- print(f"š Current project: {self.project_path.name} ({status})")
+ is_indexed = rag_dir.exists()
+ status_icon = "ā
" if is_indexed else "ā"
+ status_text = "Ready for search" if is_indexed else "Needs indexing"
+
+ # Check LLM status
+ llm_status, llm_model = self._get_llm_status()
+
+ print("āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā")
+ # Calculate exact spacing for 50-char content width
+ project_line = f" Current Project: {self.project_path.name}"
+ print(f"ā{project_line:<50}ā")
+
+ status_line = f" Index Status: {status_icon} {status_text}"
+ print(f"ā{status_line:<50}ā")
+
+ llm_line = f" LLM Status: {llm_status}"
+ print(f"ā{llm_line:<50}ā")
+
+ if llm_model:
+ model_line = f" Model: {llm_model}"
+ print(f"ā{model_line:<50}ā")
+
+ if is_indexed:
+ # Show quick stats if indexed
+ try:
+ manifest = rag_dir / 'manifest.json'
+ if manifest.exists():
+ with open(manifest) as f:
+ data = json.load(f)
+ file_count = data.get('file_count', 0)
+ files_line = f" Files indexed: {file_count}"
+ print(f"ā{files_line:<50}ā")
+ except:
+ pass
+ print("āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā")
print()
else:
# Show beginner tips when no project selected
@@ -825,20 +1337,50 @@ class SimpleTUI:
print(" Start by selecting a project directory below.")
print()
- options = [
- "Select project directory",
- "Index project for search",
- "Search project (Fast synthesis)",
- "Explore project (Deep thinking)",
- "View status",
- "Configuration",
- "CLI command reference",
- "Exit"
- ]
+ # Create options with visual cues based on project status
+ if self.project_path:
+ rag_dir = self.project_path / '.mini-rag'
+ is_indexed = rag_dir.exists()
+
+ if is_indexed:
+ options = [
+ "Select project directory",
+ "\033[2mIndex project for search (already indexed)\033[0m",
+ "Search project (Fast synthesis)",
+ "Explore project (Deep thinking)",
+ "View status",
+ "Configuration",
+ "CLI command reference"
+ ]
+ else:
+ options = [
+ "Select project directory",
+ "Index project for search",
+ "\033[2mSearch project (needs indexing first)\033[0m",
+ "\033[2mExplore project (needs indexing first)\033[0m",
+ "View status",
+ "Configuration",
+ "CLI command reference"
+ ]
+ else:
+ # No project selected - gray out project-dependent options
+ options = [
+ "Select project directory",
+ "\033[2mIndex project for search (select project first)\033[0m",
+ "\033[2mSearch project (select project first)\033[0m",
+ "\033[2mExplore project (select project first)\033[0m",
+ "\033[2mView status (select project first)\033[0m",
+ "Configuration",
+ "CLI command reference"
+ ]
- choice = self.show_menu("Main Menu", options)
+ choice = self.show_menu("Main Menu", options, back_option="Exit")
- if choice == 0:
+ if choice == -1: # Exit (0 option)
+ print("\nThanks for using FSS-Mini-RAG! š")
+ print("Try the CLI commands for even more power!")
+ break
+ elif choice == 0:
self.select_project()
elif choice == 1:
self.index_project_interactive()
@@ -852,17 +1394,35 @@ class SimpleTUI:
self.show_configuration()
elif choice == 6:
self.show_cli_reference()
- elif choice == 7:
- print("\nThanks for using FSS-Mini-RAG! š")
- print("Try the CLI commands for even more power!")
- break
def main():
"""Main entry point."""
try:
+ # Check if we can import dependencies
+ try:
+ sys.path.insert(0, str(Path(__file__).parent))
+ from mini_rag.venv_checker import check_and_warn_venv
+ check_and_warn_venv("rag-tui", force_exit=False)
+ except ImportError as e:
+ # Dependencies missing - show helpful message
+ script_dir = Path(__file__).parent
+ print("ā FSS-Mini-RAG dependencies not found!")
+ print("")
+ print("š§ To fix this:")
+ print(f" 1. Run the installer: {script_dir}/install_mini_rag.sh")
+ print(f" 2. Or use the wrapper script: {script_dir}/rag-tui")
+ print(" 3. Or activate the virtual environment first:")
+ print(f" cd {script_dir}")
+ print(" source .venv/bin/activate")
+ print(f" python3 {script_dir}/rag-tui.py")
+ print("")
+ print(f"š” Dependencies missing: {e}")
+ input("\nPress Enter to exit...")
+ return
+
tui = SimpleTUI()
tui.main_menu()
- except KeyboardInterrupt:
+ except (KeyboardInterrupt, EOFError):
print("\n\nGoodbye! š")
except Exception as e:
print(f"\nUnexpected error: {e}")
diff --git a/test_fixes.py b/test_fixes.py
new file mode 100644
index 0000000..cdcbc3f
--- /dev/null
+++ b/test_fixes.py
@@ -0,0 +1,230 @@
+#!/usr/bin/env python3
+"""
+Quick test script to verify our key fixes without heavy dependencies.
+
+ā ļø IMPORTANT: This test requires the virtual environment to be activated:
+ source .venv/bin/activate
+ python test_fixes.py
+
+Or run directly with venv:
+ source .venv/bin/activate && python test_fixes.py
+"""
+
+import sys
+import os
+import tempfile
+from pathlib import Path
+
+# Check if virtual environment is activated
+def check_venv():
+ if 'VIRTUAL_ENV' not in os.environ:
+ print("ā ļø WARNING: Virtual environment not detected!")
+ print(" This test requires the virtual environment to be activated.")
+ print(" Run: source .venv/bin/activate && python test_fixes.py")
+ print(" Continuing anyway...\n")
+
+check_venv()
+
+# Add current directory to Python path
+sys.path.insert(0, '.')
+
+def test_config_model_rankings():
+ """Test that model rankings are properly configured."""
+ print("=" * 60)
+ print("TESTING CONFIG AND MODEL RANKINGS")
+ print("=" * 60)
+
+ try:
+ # Test config loading without heavy dependencies
+ from mini_rag.config import ConfigManager, LLMConfig
+
+ # Create a temporary directory for testing
+ with tempfile.TemporaryDirectory() as tmpdir:
+ config_manager = ConfigManager(tmpdir)
+ config = config_manager.load_config()
+
+ print("ā Config loads successfully")
+
+ # Check LLM config and model rankings
+ if hasattr(config, 'llm'):
+ llm_config = config.llm
+ print(f"ā LLM config found: {type(llm_config)}")
+
+ if hasattr(llm_config, 'model_rankings'):
+ rankings = llm_config.model_rankings
+ print(f"ā Model rankings: {rankings}")
+
+ if rankings and rankings[0] == "qwen3:1.7b":
+ print("ā qwen3:1.7b is FIRST priority - CORRECT!")
+ return True
+ else:
+ print(f"ā WRONG: First model is {rankings[0] if rankings else 'None'}, should be qwen3:1.7b")
+ return False
+ else:
+ print("ā Model rankings not found in LLM config")
+ return False
+ else:
+ print("ā LLM config not found")
+ return False
+
+ except ImportError as e:
+ print(f"ā Import error: {e}")
+ return False
+ except Exception as e:
+ print(f"ā Error: {e}")
+ return False
+
+def test_context_length_fix():
+ """Test that context length is correctly set to 32K."""
+ print("\n" + "=" * 60)
+ print("TESTING CONTEXT LENGTH FIXES")
+ print("=" * 60)
+
+ try:
+ # Read the synthesizer file and check for 32000
+ with open('mini_rag/llm_synthesizer.py', 'r') as f:
+ synthesizer_content = f.read()
+
+ if '"num_ctx": 32000' in synthesizer_content:
+ print("ā LLM Synthesizer: num_ctx is correctly set to 32000")
+ elif '"num_ctx": 80000' in synthesizer_content:
+ print("ā LLM Synthesizer: num_ctx is still 80000 - NEEDS FIX")
+ return False
+ else:
+ print("? LLM Synthesizer: num_ctx setting not found clearly")
+
+ # Read the safeguards file and check for 32000
+ with open('mini_rag/llm_safeguards.py', 'r') as f:
+ safeguards_content = f.read()
+
+ if 'context_window: int = 32000' in safeguards_content:
+ print("ā Safeguards: context_window is correctly set to 32000")
+ return True
+ elif 'context_window: int = 80000' in safeguards_content:
+ print("ā Safeguards: context_window is still 80000 - NEEDS FIX")
+ return False
+ else:
+ print("? Safeguards: context_window setting not found clearly")
+ return False
+
+ except Exception as e:
+ print(f"ā Error checking context length: {e}")
+ return False
+
+def test_safeguard_preservation():
+ """Test that safeguards preserve content instead of dropping it."""
+ print("\n" + "=" * 60)
+ print("TESTING SAFEGUARD CONTENT PRESERVATION")
+ print("=" * 60)
+
+ try:
+ # Read the synthesizer file and check for the preservation method
+ with open('mini_rag/llm_synthesizer.py', 'r') as f:
+ synthesizer_content = f.read()
+
+ if '_create_safeguard_response_with_content' in synthesizer_content:
+ print("ā Safeguard content preservation method exists")
+ else:
+ print("ā Safeguard content preservation method missing")
+ return False
+
+ # Check for the specific preservation logic
+ if 'AI Response (use with caution):' in synthesizer_content:
+ print("ā Content preservation warning format found")
+ else:
+ print("ā Content preservation warning format missing")
+ return False
+
+ # Check that it's being called instead of dropping content
+ if 'return self._create_safeguard_response_with_content(issue_type, explanation, raw_response)' in synthesizer_content:
+ print("ā Preservation method is called when safeguards trigger")
+ return True
+ else:
+ print("ā Preservation method not called properly")
+ return False
+
+ except Exception as e:
+ print(f"ā Error checking safeguard preservation: {e}")
+ return False
+
+def test_import_fixes():
+ """Test that import statements are fixed from claude_rag to mini_rag."""
+ print("\n" + "=" * 60)
+ print("TESTING IMPORT STATEMENT FIXES")
+ print("=" * 60)
+
+ test_files = [
+ 'tests/test_rag_integration.py',
+ 'tests/01_basic_integration_test.py',
+ 'tests/test_hybrid_search.py',
+ 'tests/test_context_retrieval.py'
+ ]
+
+ all_good = True
+
+ for test_file in test_files:
+ if Path(test_file).exists():
+ try:
+ with open(test_file, 'r') as f:
+ content = f.read()
+
+ if 'claude_rag' in content:
+ print(f"ā {test_file}: Still contains 'claude_rag' imports")
+ all_good = False
+ elif 'mini_rag' in content:
+ print(f"ā {test_file}: Uses correct 'mini_rag' imports")
+ else:
+ print(f"? {test_file}: No rag imports found")
+
+ except Exception as e:
+ print(f"ā Error reading {test_file}: {e}")
+ all_good = False
+ else:
+ print(f"? {test_file}: File not found")
+
+ return all_good
+
+def main():
+ """Run all tests."""
+ print("FSS-Mini-RAG Fix Verification Tests")
+ print("Testing all the critical fixes...")
+
+ tests = [
+ ("Model Rankings", test_config_model_rankings),
+ ("Context Length", test_context_length_fix),
+ ("Safeguard Preservation", test_safeguard_preservation),
+ ("Import Fixes", test_import_fixes)
+ ]
+
+ results = {}
+
+ for test_name, test_func in tests:
+ try:
+ results[test_name] = test_func()
+ except Exception as e:
+ print(f"ā {test_name} test crashed: {e}")
+ results[test_name] = False
+
+ # Summary
+ print("\n" + "=" * 60)
+ print("TEST SUMMARY")
+ print("=" * 60)
+
+ passed = sum(1 for result in results.values() if result)
+ total = len(results)
+
+ for test_name, result in results.items():
+ status = "ā PASS" if result else "ā FAIL"
+ print(f"{status} {test_name}")
+
+ print(f"\nOverall: {passed}/{total} tests passed")
+
+ if passed == total:
+ print("š ALL TESTS PASSED - System should be working properly!")
+ return 0
+ else:
+ print("ā SOME TESTS FAILED - System needs more fixes!")
+ return 1
+
+if __name__ == "__main__":
+ sys.exit(main())
\ No newline at end of file
diff --git a/tests/01_basic_integration_test.py b/tests/01_basic_integration_test.py
index 281322a..4fec7a7 100644
--- a/tests/01_basic_integration_test.py
+++ b/tests/01_basic_integration_test.py
@@ -1,5 +1,12 @@
"""
Comprehensive demo of the RAG system showing all integrated features.
+
+ā ļø IMPORTANT: This test requires the virtual environment to be activated:
+ source .venv/bin/activate
+ PYTHONPATH=. python tests/01_basic_integration_test.py
+
+Or run directly with venv:
+ source .venv/bin/activate && PYTHONPATH=. python tests/01_basic_integration_test.py
"""
import os
@@ -7,6 +14,16 @@ import sys
import tempfile
from pathlib import Path
+# Check if virtual environment is activated
+def check_venv():
+ if 'VIRTUAL_ENV' not in os.environ:
+ print("ā ļø WARNING: Virtual environment not detected!")
+ print(" This test requires the virtual environment to be activated.")
+ print(" Run: source .venv/bin/activate && PYTHONPATH=. python tests/01_basic_integration_test.py")
+ print(" Continuing anyway...\n")
+
+check_venv()
+
# Fix Windows encoding
if sys.platform == 'win32':
os.environ['PYTHONUTF8'] = '1'
@@ -15,7 +32,7 @@ if sys.platform == 'win32':
from mini_rag.chunker import CodeChunker
from mini_rag.indexer import ProjectIndexer
from mini_rag.search import CodeSearcher
-from mini_rag.embeddings import CodeEmbedder
+from mini_rag.ollama_embeddings import OllamaEmbedder as CodeEmbedder
def main():
print("=" * 60)
@@ -189,17 +206,17 @@ if __name__ == "__main__":
# Test different search types
print("\n a) Semantic search for 'calculate average':")
- results = searcher.search("calculate average", limit=3)
+ results = searcher.search("calculate average", top_k=3)
for i, result in enumerate(results, 1):
print(f" {i}. {result.chunk_type} '{result.name}' in {result.file_path} (score: {result.score:.3f})")
print("\n b) BM25-weighted search for 'divide zero':")
- results = searcher.search("divide zero", limit=3, semantic_weight=0.2, bm25_weight=0.8)
+ results = searcher.search("divide zero", top_k=3, semantic_weight=0.2, bm25_weight=0.8)
for i, result in enumerate(results, 1):
print(f" {i}. {result.chunk_type} '{result.name}' in {result.file_path} (score: {result.score:.3f})")
print("\n c) Search with context for 'test addition':")
- results = searcher.search("test addition", limit=2, include_context=True)
+ results = searcher.search("test addition", top_k=2, include_context=True)
for i, result in enumerate(results, 1):
print(f" {i}. {result.chunk_type} '{result.name}'")
if result.parent_chunk:
diff --git a/tests/02_search_examples.py b/tests/02_search_examples.py
index b478d97..271c1ab 100644
--- a/tests/02_search_examples.py
+++ b/tests/02_search_examples.py
@@ -37,25 +37,25 @@ def demo_search(project_path: Path):
'title': 'Keyword-Heavy Search',
'query': 'BM25Okapi rank_bm25 search scoring',
'description': 'This query has specific technical keywords that BM25 excels at finding',
- 'limit': 5
+ 'top_k': 5
},
{
'title': 'Natural Language Query',
'query': 'how to build search index from database chunks',
'description': 'This semantic query benefits from transformer embeddings understanding intent',
- 'limit': 5
+ 'top_k': 5
},
{
'title': 'Mixed Technical Query',
'query': 'vector embeddings for semantic code search with transformers',
'description': 'This hybrid query combines technical terms with conceptual understanding',
- 'limit': 5
+ 'top_k': 5
},
{
'title': 'Function Search',
'query': 'search method implementation with filters',
'description': 'Looking for specific function implementations',
- 'limit': 5
+ 'top_k': 5
}
]
@@ -67,7 +67,7 @@ def demo_search(project_path: Path):
# Run search with hybrid mode
results = searcher.search(
query=demo['query'],
- limit=demo['limit'],
+ top_k=demo['top_k'],
semantic_weight=0.7,
bm25_weight=0.3
)
diff --git a/tests/03_system_validation.py b/tests/03_system_validation.py
index 6293c6f..ea47134 100644
--- a/tests/03_system_validation.py
+++ b/tests/03_system_validation.py
@@ -244,7 +244,7 @@ def compute_median(numbers):
searcher = CodeSearcher(project_path)
# Test BM25 integration
- results = searcher.search("multiply numbers", limit=5,
+ results = searcher.search("multiply numbers", top_k=5,
semantic_weight=0.3, bm25_weight=0.7)
if results:
@@ -283,7 +283,7 @@ def compute_median(numbers):
print(f" - No parent chunk")
# Test include_context in search
- results_with_context = searcher.search("add", include_context=True, limit=2)
+ results_with_context = searcher.search("add", include_context=True, top_k=2)
if results_with_context:
print(f" Found {len(results_with_context)} results with context")
for r in results_with_context:
diff --git a/tests/test_context_retrieval.py b/tests/test_context_retrieval.py
index 2db8d77..5c1a6cd 100644
--- a/tests/test_context_retrieval.py
+++ b/tests/test_context_retrieval.py
@@ -1,11 +1,29 @@
#!/usr/bin/env python3
"""
Test script for adjacent chunk retrieval functionality.
+
+ā ļø IMPORTANT: This test requires the virtual environment to be activated:
+ source .venv/bin/activate
+ PYTHONPATH=. python tests/test_context_retrieval.py
+
+Or run directly with venv:
+ source .venv/bin/activate && PYTHONPATH=. python tests/test_context_retrieval.py
"""
+import os
from pathlib import Path
from mini_rag.search import CodeSearcher
-from mini_rag.embeddings import CodeEmbedder
+from mini_rag.ollama_embeddings import OllamaEmbedder as CodeEmbedder
+
+# Check if virtual environment is activated
+def check_venv():
+ if 'VIRTUAL_ENV' not in os.environ:
+ print("ā ļø WARNING: Virtual environment not detected!")
+ print(" This test requires the virtual environment to be activated.")
+ print(" Run: source .venv/bin/activate && PYTHONPATH=. python tests/test_context_retrieval.py")
+ print(" Continuing anyway...\n")
+
+check_venv()
def test_context_retrieval():
"""Test the new context retrieval functionality."""
@@ -20,7 +38,7 @@ def test_context_retrieval():
# Test 1: Search without context
print("\n1. Search WITHOUT context:")
- results = searcher.search("chunk metadata", limit=3, include_context=False)
+ results = searcher.search("chunk metadata", top_k=3, include_context=False)
for i, result in enumerate(results, 1):
print(f" Result {i}: {result.file_path}:{result.start_line}-{result.end_line}")
print(f" Type: {result.chunk_type}, Name: {result.name}")
@@ -30,7 +48,7 @@ def test_context_retrieval():
# Test 2: Search with context
print("\n2. Search WITH context:")
- results = searcher.search("chunk metadata", limit=3, include_context=True)
+ results = searcher.search("chunk metadata", top_k=3, include_context=True)
for i, result in enumerate(results, 1):
print(f" Result {i}: {result.file_path}:{result.start_line}-{result.end_line}")
print(f" Type: {result.chunk_type}, Name: {result.name}")
diff --git a/tests/test_hybrid_search.py b/tests/test_hybrid_search.py
index 0d3f0fe..c3c526a 100644
--- a/tests/test_hybrid_search.py
+++ b/tests/test_hybrid_search.py
@@ -2,6 +2,13 @@
"""
Test and benchmark the hybrid BM25 + semantic search system.
Shows performance metrics and search quality comparisons.
+
+ā ļø IMPORTANT: This test requires the virtual environment to be activated:
+ source .venv/bin/activate
+ PYTHONPATH=. python tests/test_hybrid_search.py
+
+Or run directly with venv:
+ source .venv/bin/activate && PYTHONPATH=. python tests/test_hybrid_search.py
"""
import time
@@ -16,7 +23,7 @@ from rich.syntax import Syntax
from rich.progress import track
from mini_rag.search import CodeSearcher, SearchResult
-from mini_rag.embeddings import CodeEmbedder
+from mini_rag.ollama_embeddings import OllamaEmbedder as CodeEmbedder
console = Console()
@@ -40,7 +47,7 @@ class SearchTester:
if 'error' not in stats:
console.print(f"[dim]Index contains {stats['total_chunks']} chunks from {stats['unique_files']} files[/dim]\n")
- def run_query(self, query: str, limit: int = 10,
+ def run_query(self, query: str, top_k: int = 10,
semantic_only: bool = False,
bm25_only: bool = False) -> Dict[str, Any]:
"""Run a single query and return metrics."""
@@ -60,7 +67,7 @@ class SearchTester:
start = time.time()
results = self.searcher.search(
query=query,
- limit=limit,
+ top_k=top_k,
semantic_weight=semantic_weight,
bm25_weight=bm25_weight
)
@@ -76,10 +83,10 @@ class SearchTester:
'avg_score': sum(r.score for r in results) / len(results) if results else 0,
}
- def compare_search_modes(self, query: str, limit: int = 5):
+ def compare_search_modes(self, query: str, top_k: int = 5):
"""Compare results across different search modes."""
console.print(f"\n[bold cyan]Query:[/bold cyan] '{query}'")
- console.print(f"[dim]Top {limit} results per mode[/dim]\n")
+ console.print(f"[dim]Top {top_k} results per mode[/dim]\n")
# Run searches in all modes
modes = [
@@ -90,7 +97,7 @@ class SearchTester:
all_results = {}
for mode_name, semantic_only, bm25_only in modes:
- result = self.run_query(query, limit, semantic_only, bm25_only)
+ result = self.run_query(query, top_k, semantic_only, bm25_only)
all_results[mode_name] = result
# Create comparison table
@@ -191,7 +198,7 @@ class SearchTester:
for test_case in test_queries:
console.rule(f"\n[cyan]{test_case['description']}[/cyan]")
console.print(f"[dim]{test_case['expected']}[/dim]")
- self.compare_search_modes(test_case['query'], limit=3)
+ self.compare_search_modes(test_case['query'], top_k=3)
time.sleep(0.5) # Brief pause between tests
def benchmark_performance(self, num_queries: int = 50):
@@ -268,7 +275,7 @@ class SearchTester:
# Query that might return many results from same files
query = "function implementation code search"
- results = self.searcher.search(query, limit=20)
+ results = self.searcher.search(query, top_k=20)
# Analyze diversity
file_counts = {}
diff --git a/tests/test_ollama_integration.py b/tests/test_ollama_integration.py
index 4466d3a..65673bf 100755
--- a/tests/test_ollama_integration.py
+++ b/tests/test_ollama_integration.py
@@ -403,9 +403,9 @@ class TestOllamaIntegration(unittest.TestCase):
# Check search config
self.assertIsNotNone(self.config.search)
- self.assertGreater(self.config.search.default_limit, 0)
+ self.assertGreater(self.config.search.default_top_k, 0)
print(f" ā
Search config valid")
- print(f" Default limit: {self.config.search.default_limit}")
+ print(f" Default top-k: {self.config.search.default_top_k}")
print(f" Query expansion: {self.config.search.expand_queries}")
diff --git a/tests/test_rag_integration.py b/tests/test_rag_integration.py
index 7dae3d5..00313e8 100644
--- a/tests/test_rag_integration.py
+++ b/tests/test_rag_integration.py
@@ -1,12 +1,32 @@
#!/usr/bin/env python3
-"""Test RAG system integration with smart chunking."""
+"""
+Test RAG system integration with smart chunking.
+
+ā ļø IMPORTANT: This test requires the virtual environment to be activated:
+ source .venv/bin/activate
+ PYTHONPATH=. python tests/test_rag_integration.py
+
+Or run directly with venv:
+ source .venv/bin/activate && PYTHONPATH=. python tests/test_rag_integration.py
+"""
import tempfile
import shutil
+import os
from pathlib import Path
from mini_rag.indexer import ProjectIndexer
from mini_rag.search import CodeSearcher
+# Check if virtual environment is activated
+def check_venv():
+ if 'VIRTUAL_ENV' not in os.environ:
+ print("ā ļø WARNING: Virtual environment not detected!")
+ print(" This test requires the virtual environment to be activated.")
+ print(" Run: source .venv/bin/activate && PYTHONPATH=. python tests/test_rag_integration.py")
+ print(" Continuing anyway...\n")
+
+check_venv()
+
# Sample Python file with proper structure
sample_code = '''"""
Sample module for testing RAG system.
@@ -179,8 +199,8 @@ def test_integration():
stats = indexer.index_project()
print(f" - Files indexed: {stats['files_indexed']}")
- print(f" - Total chunks: {stats['total_chunks']}")
- print(f" - Indexing time: {stats['indexing_time']:.2f}s")
+ print(f" - Total chunks: {stats['chunks_created']}")
+ print(f" - Indexing time: {stats['time_taken']:.2f}s")
# Verify chunks were created properly
print("\n2. Verifying chunk metadata...")
@@ -195,10 +215,10 @@ def test_integration():
results = searcher.search("data processor class unified interface", top_k=3)
print(f"\n Test 1 - Class search:")
for i, result in enumerate(results[:1]):
- print(f" - Match {i+1}: {result['file_path']}")
- print(f" Chunk type: {result['chunk_type']}")
- print(f" Score: {result['score']:.3f}")
- if 'This class handles' in result['content']:
+ print(f" - Match {i+1}: {result.file_path}")
+ print(f" Chunk type: {result.chunk_type}")
+ print(f" Score: {result.score:.3f}")
+ if 'This class handles' in result.content:
print(" [OK] Docstring included with class")
else:
print(" [FAIL] Docstring not found")
@@ -207,10 +227,10 @@ def test_integration():
results = searcher.search("process list of data items", top_k=3)
print(f"\n Test 2 - Method search:")
for i, result in enumerate(results[:1]):
- print(f" - Match {i+1}: {result['file_path']}")
- print(f" Chunk type: {result['chunk_type']}")
- print(f" Parent class: {result.get('parent_class', 'N/A')}")
- if 'Args:' in result['content'] and 'Returns:' in result['content']:
+ print(f" - Match {i+1}: {result.file_path}")
+ print(f" Chunk type: {result.chunk_type}")
+ print(f" Parent class: {getattr(result, 'parent_class', 'N/A')}")
+ if 'Args:' in result.content and 'Returns:' in result.content:
print(" [OK] Docstring included with method")
else:
print(" [FAIL] Method docstring not complete")
@@ -219,19 +239,19 @@ def test_integration():
results = searcher.search("smart chunking capabilities markdown", top_k=3)
print(f"\n Test 3 - Markdown search:")
for i, result in enumerate(results[:1]):
- print(f" - Match {i+1}: {result['file_path']}")
- print(f" Chunk type: {result['chunk_type']}")
- print(f" Lines: {result['start_line']}-{result['end_line']}")
+ print(f" - Match {i+1}: {result.file_path}")
+ print(f" Chunk type: {result.chunk_type}")
+ print(f" Lines: {result.start_line}-{result.end_line}")
# Test 4: Verify chunk navigation
print(f"\n Test 4 - Chunk navigation:")
all_results = searcher.search("", top_k=100) # Get all chunks
- py_chunks = [r for r in all_results if r['file_path'].endswith('.py')]
+ py_chunks = [r for r in all_results if r.file_path.endswith('.py')]
if py_chunks:
first_chunk = py_chunks[0]
- print(f" - First chunk: index={first_chunk.get('chunk_index', 'N/A')}")
- print(f" Next chunk ID: {first_chunk.get('next_chunk_id', 'N/A')}")
+ print(f" - First chunk: index={getattr(first_chunk, 'chunk_index', 'N/A')}")
+ print(f" Next chunk ID: {getattr(first_chunk, 'next_chunk_id', 'N/A')}")
# Verify chain
valid_chain = True
@@ -239,7 +259,7 @@ def test_integration():
curr = py_chunks[i]
next_chunk = py_chunks[i + 1]
expected_next = f"processor_{i+1}"
- if curr.get('next_chunk_id') != expected_next:
+ if getattr(curr, 'next_chunk_id', None) != expected_next:
valid_chain = False
break