Compare commits
2 Commits
11639c8237
...
c201b3badd
| Author | SHA1 | Date | |
|---|---|---|---|
| c201b3badd | |||
| 597c810034 |
53
.mini-rag/config.yaml
Normal file
53
.mini-rag/config.yaml
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
# FSS-Mini-RAG Configuration
|
||||||
|
# Edit this file to customize indexing and search behavior
|
||||||
|
# See docs/GETTING_STARTED.md for detailed explanations
|
||||||
|
|
||||||
|
# Text chunking settings
|
||||||
|
chunking:
|
||||||
|
max_size: 2000 # Maximum characters per chunk
|
||||||
|
min_size: 150 # Minimum characters per chunk
|
||||||
|
strategy: semantic # 'semantic' (language-aware) or 'fixed'
|
||||||
|
|
||||||
|
# Large file streaming settings
|
||||||
|
streaming:
|
||||||
|
enabled: true
|
||||||
|
threshold_bytes: 1048576 # Files larger than this use streaming (1MB)
|
||||||
|
|
||||||
|
# File processing settings
|
||||||
|
files:
|
||||||
|
min_file_size: 50 # Skip files smaller than this
|
||||||
|
exclude_patterns:
|
||||||
|
- "node_modules/**"
|
||||||
|
- ".git/**"
|
||||||
|
- "__pycache__/**"
|
||||||
|
- "*.pyc"
|
||||||
|
- ".venv/**"
|
||||||
|
- "venv/**"
|
||||||
|
- "build/**"
|
||||||
|
- "dist/**"
|
||||||
|
include_patterns:
|
||||||
|
- "**/*" # Include all files by default
|
||||||
|
|
||||||
|
# Embedding generation settings
|
||||||
|
embedding:
|
||||||
|
preferred_method: ollama # 'ollama', 'ml', 'hash', or 'auto'
|
||||||
|
ollama_model: nomic-embed-text
|
||||||
|
ollama_host: localhost:11434
|
||||||
|
ml_model: sentence-transformers/all-MiniLM-L6-v2
|
||||||
|
batch_size: 32 # Embeddings processed per batch
|
||||||
|
|
||||||
|
# Search behavior settings
|
||||||
|
search:
|
||||||
|
default_top_k: 10 # Default number of top results
|
||||||
|
enable_bm25: true # Enable keyword matching boost
|
||||||
|
similarity_threshold: 0.1 # Minimum similarity score
|
||||||
|
expand_queries: false # Enable automatic query expansion
|
||||||
|
|
||||||
|
# LLM synthesis and query expansion settings
|
||||||
|
llm:
|
||||||
|
ollama_host: localhost:11434
|
||||||
|
synthesis_model: auto # 'auto', 'qwen3:1.7b', etc.
|
||||||
|
expansion_model: auto # Usually same as synthesis_model
|
||||||
|
max_expansion_terms: 8 # Maximum terms to add to queries
|
||||||
|
enable_synthesis: false # Enable synthesis by default
|
||||||
|
synthesis_temperature: 0.3 # LLM temperature for analysis
|
||||||
1
.mini-rag/last_search
Normal file
1
.mini-rag/last_search
Normal file
@ -0,0 +1 @@
|
|||||||
|
test
|
||||||
@ -67,7 +67,7 @@ llm:
|
|||||||
# Aggressive caching for CPU systems
|
# Aggressive caching for CPU systems
|
||||||
search:
|
search:
|
||||||
expand_queries: false # Enable only in TUI
|
expand_queries: false # Enable only in TUI
|
||||||
default_limit: 8 # Slightly fewer results for speed
|
default_top_k: 8 # Slightly fewer results for speed
|
||||||
```
|
```
|
||||||
|
|
||||||
## System Requirements
|
## System Requirements
|
||||||
|
|||||||
@ -125,7 +125,7 @@ print(f"Indexed {result['files_processed']} files, {result['chunks_created']} ch
|
|||||||
|
|
||||||
# Search
|
# Search
|
||||||
print("\nSearching for authentication code...")
|
print("\nSearching for authentication code...")
|
||||||
results = searcher.search("user authentication logic", limit=5)
|
results = searcher.search("user authentication logic", top_k=5)
|
||||||
|
|
||||||
for i, result in enumerate(results, 1):
|
for i, result in enumerate(results, 1):
|
||||||
print(f"\n{i}. {result.file_path}")
|
print(f"\n{i}. {result.file_path}")
|
||||||
|
|||||||
@ -421,7 +421,7 @@ def _create_vector_table(self, chunks: List[CodeChunk], embeddings: np.ndarray):
|
|||||||
|
|
||||||
return table
|
return table
|
||||||
|
|
||||||
def vector_search(self, query_embedding: np.ndarray, limit: int) -> List[SearchResult]:
|
def vector_search(self, query_embedding: np.ndarray, top_k: int) -> List[SearchResult]:
|
||||||
"""Fast vector similarity search."""
|
"""Fast vector similarity search."""
|
||||||
table = self.db.open_table("chunks")
|
table = self.db.open_table("chunks")
|
||||||
|
|
||||||
@ -794,12 +794,12 @@ def repair_index(self, project_path: Path) -> bool:
|
|||||||
FSS-Mini-RAG works well with various LLM sizes because our rich context and guided prompts help small models perform excellently:
|
FSS-Mini-RAG works well with various LLM sizes because our rich context and guided prompts help small models perform excellently:
|
||||||
|
|
||||||
**Recommended (Best Balance):**
|
**Recommended (Best Balance):**
|
||||||
- **qwen3:4b** - Excellent quality, good performance
|
- **qwen3:1.7b** - Excellent quality with fast performance (default priority)
|
||||||
- **qwen3:4b:q8_0** - High-precision quantized version for production
|
- **qwen3:0.6b** - Surprisingly good for CPU-only systems (522MB)
|
||||||
|
|
||||||
**Still Excellent (Faster/CPU-friendly):**
|
**Still Excellent (Slower but highest quality):**
|
||||||
- **qwen3:1.7b** - Very good results, faster responses
|
- **qwen3:4b** - Highest quality, slower responses
|
||||||
- **qwen3:0.6b** - Surprisingly good considering size (522MB)
|
- **qwen3:4b:q8_0** - High-precision quantized version for production
|
||||||
|
|
||||||
### Why Small Models Work Well Here
|
### Why Small Models Work Well Here
|
||||||
|
|
||||||
@ -813,7 +813,7 @@ Without good context, small models tend to get lost and produce erratic output.
|
|||||||
|
|
||||||
### Quantization Benefits
|
### Quantization Benefits
|
||||||
|
|
||||||
For production deployments, consider quantized models like `qwen3:4b:q8_0`:
|
For production deployments, consider quantized models like `qwen3:1.7b:q8_0` or `qwen3:4b:q8_0`:
|
||||||
- **Q8_0**: 8-bit quantization with minimal quality loss
|
- **Q8_0**: 8-bit quantization with minimal quality loss
|
||||||
- **Smaller memory footprint**: ~50% reduction vs full precision
|
- **Smaller memory footprint**: ~50% reduction vs full precision
|
||||||
- **Better CPU performance**: Faster inference on CPU-only systems
|
- **Better CPU performance**: Faster inference on CPU-only systems
|
||||||
|
|||||||
@ -110,7 +110,7 @@ python3 -c "import mini_rag; print('✅ Installation successful')"
|
|||||||
2. **Reduce result limit:**
|
2. **Reduce result limit:**
|
||||||
```yaml
|
```yaml
|
||||||
search:
|
search:
|
||||||
default_limit: 5 # Instead of 10
|
default_top_k: 5 # Instead of 10
|
||||||
```
|
```
|
||||||
|
|
||||||
3. **Use faster embedding method:**
|
3. **Use faster embedding method:**
|
||||||
@ -165,9 +165,9 @@ python3 -c "import mini_rag; print('✅ Installation successful')"
|
|||||||
|
|
||||||
2. **Try different model:**
|
2. **Try different model:**
|
||||||
```bash
|
```bash
|
||||||
ollama pull qwen3:4b # Recommended: excellent quality
|
ollama pull qwen3:1.7b # Recommended: excellent quality (default priority)
|
||||||
ollama pull qwen3:1.7b # Still very good, faster
|
|
||||||
ollama pull qwen3:0.6b # Surprisingly good for CPU-only
|
ollama pull qwen3:0.6b # Surprisingly good for CPU-only
|
||||||
|
ollama pull qwen3:4b # Highest quality, slower
|
||||||
```
|
```
|
||||||
|
|
||||||
3. **Use synthesis mode instead of exploration:**
|
3. **Use synthesis mode instead of exploration:**
|
||||||
|
|||||||
@ -154,7 +154,7 @@ That's it! The TUI will guide you through everything.
|
|||||||
- **chunking.strategy** - Smart (semantic) vs simple (fixed size)
|
- **chunking.strategy** - Smart (semantic) vs simple (fixed size)
|
||||||
- **files.exclude_patterns** - Skip certain files/directories
|
- **files.exclude_patterns** - Skip certain files/directories
|
||||||
- **embedding.preferred_method** - AI model preference
|
- **embedding.preferred_method** - AI model preference
|
||||||
- **search.default_limit** - How many results to show
|
- **search.default_top_k** - How many results to show
|
||||||
|
|
||||||
**Interactive Options**:
|
**Interactive Options**:
|
||||||
- **[V]iew config** - See full configuration file
|
- **[V]iew config** - See full configuration file
|
||||||
|
|||||||
@ -50,7 +50,7 @@ def main():
|
|||||||
print("\n4. Example searches:")
|
print("\n4. Example searches:")
|
||||||
for query in queries:
|
for query in queries:
|
||||||
print(f"\n Query: '{query}'")
|
print(f"\n Query: '{query}'")
|
||||||
results = searcher.search(query, limit=3)
|
results = searcher.search(query, top_k=3)
|
||||||
|
|
||||||
if results:
|
if results:
|
||||||
for i, result in enumerate(results, 1):
|
for i, result in enumerate(results, 1):
|
||||||
|
|||||||
@ -41,7 +41,7 @@ embedding:
|
|||||||
|
|
||||||
# 🔍 Search behavior
|
# 🔍 Search behavior
|
||||||
search:
|
search:
|
||||||
default_limit: 10 # Show 10 results (good starting point)
|
default_top_k: 10 # Show 10 results (good starting point)
|
||||||
enable_bm25: true # Find exact word matches too
|
enable_bm25: true # Find exact word matches too
|
||||||
similarity_threshold: 0.1 # Pretty permissive (shows more results)
|
similarity_threshold: 0.1 # Pretty permissive (shows more results)
|
||||||
expand_queries: false # Keep it simple for now
|
expand_queries: false # Keep it simple for now
|
||||||
|
|||||||
@ -62,7 +62,7 @@ embedding:
|
|||||||
|
|
||||||
# 🔍 Search optimized for speed
|
# 🔍 Search optimized for speed
|
||||||
search:
|
search:
|
||||||
default_limit: 5 # Fewer results = faster display
|
default_top_k: 5 # Fewer results = faster display
|
||||||
enable_bm25: false # Skip keyword matching for speed
|
enable_bm25: false # Skip keyword matching for speed
|
||||||
similarity_threshold: 0.2 # Higher threshold = fewer results to process
|
similarity_threshold: 0.2 # Higher threshold = fewer results to process
|
||||||
expand_queries: false # No query expansion (much faster)
|
expand_queries: false # No query expansion (much faster)
|
||||||
|
|||||||
@ -53,7 +53,7 @@ embedding:
|
|||||||
batch_size: 32
|
batch_size: 32
|
||||||
|
|
||||||
search:
|
search:
|
||||||
default_limit: 10
|
default_top_k: 10
|
||||||
enable_bm25: true
|
enable_bm25: true
|
||||||
similarity_threshold: 0.1
|
similarity_threshold: 0.1
|
||||||
expand_queries: false
|
expand_queries: false
|
||||||
|
|||||||
@ -44,7 +44,7 @@ embedding:
|
|||||||
|
|
||||||
# 🔍 Search optimized for comprehensive results
|
# 🔍 Search optimized for comprehensive results
|
||||||
search:
|
search:
|
||||||
default_limit: 15 # More results to choose from
|
default_top_k: 15 # More results to choose from
|
||||||
enable_bm25: true # Use both semantic and keyword matching
|
enable_bm25: true # Use both semantic and keyword matching
|
||||||
similarity_threshold: 0.05 # Very permissive (show more possibilities)
|
similarity_threshold: 0.05 # Very permissive (show more possibilities)
|
||||||
expand_queries: true # Automatic query expansion for better recall
|
expand_queries: true # Automatic query expansion for better recall
|
||||||
|
|||||||
@ -86,7 +86,7 @@ embedding:
|
|||||||
#═════════════════════════════════════════════════════════════════════════════════
|
#═════════════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
search:
|
search:
|
||||||
default_limit: 10 # How many search results to show by default
|
default_top_k: 10 # How many search results to show by default
|
||||||
# 💡 MORE RESULTS: 15-20 | FASTER SEARCH: 5-8
|
# 💡 MORE RESULTS: 15-20 | FASTER SEARCH: 5-8
|
||||||
|
|
||||||
enable_bm25: true # Also use keyword matching (like Google search)
|
enable_bm25: true # Also use keyword matching (like Google search)
|
||||||
|
|||||||
@ -188,12 +188,13 @@ check_ollama() {
|
|||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo -e "${CYAN}💡 Pro tip: Download an LLM for AI-powered search synthesis!${NC}"
|
echo -e "${CYAN}💡 Pro tip: Download an LLM for AI-powered search synthesis!${NC}"
|
||||||
echo -e " Lightweight: ${GREEN}ollama pull qwen3:0.6b${NC} (~400MB, very fast)"
|
echo -e " Lightweight: ${GREEN}ollama pull qwen3:0.6b${NC} (~500MB, very fast)"
|
||||||
echo -e " Balanced: ${GREEN}ollama pull qwen3:1.7b${NC} (~1GB, good quality)"
|
echo -e " Balanced: ${GREEN}ollama pull qwen3:1.7b${NC} (~1.4GB, good quality)"
|
||||||
echo -e " Excellent: ${GREEN}ollama pull qwen3:3b${NC} (~2GB, great for this project)"
|
echo -e " Excellent: ${GREEN}ollama pull qwen3:4b${NC} (~2.5GB, sweet spot for most users)"
|
||||||
echo -e " Premium: ${GREEN}ollama pull qwen3:8b${NC} (~5GB, amazing results)"
|
echo -e " Maximum: ${GREEN}ollama pull qwen3:8b${NC} (~5GB, slower but top quality)"
|
||||||
echo ""
|
echo ""
|
||||||
echo -e "${BLUE}Creative possibilities: Try mistral for storytelling, or qwen3-coder for development!${NC}"
|
echo -e "${BLUE}🧠 RAG works great with smaller models! 4B is usually perfect.${NC}"
|
||||||
|
echo -e "${BLUE}Creative possibilities: Try mistral for storytelling, qwen2.5-coder for development!${NC}"
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
@ -558,7 +559,36 @@ print(f'✅ Embedding system: {info[\"method\"]}')
|
|||||||
" 2>/dev/null; then
|
" 2>/dev/null; then
|
||||||
print_success "Embedding system working"
|
print_success "Embedding system working"
|
||||||
else
|
else
|
||||||
print_warning "Embedding test failed, but system should still work"
|
echo ""
|
||||||
|
echo -e "${YELLOW}⚠️ System Check${NC}"
|
||||||
|
|
||||||
|
# Smart diagnosis - check what's actually available
|
||||||
|
if command_exists ollama && curl -s http://localhost:11434/api/version >/dev/null 2>&1; then
|
||||||
|
# Ollama is running, check for models
|
||||||
|
local available_models=$(ollama list 2>/dev/null | grep -E "(qwen3|llama|mistral|gemma)" | head -5)
|
||||||
|
local embedding_models=$(ollama list 2>/dev/null | grep -E "(embed|bge)" | head -2)
|
||||||
|
|
||||||
|
if [[ -n "$available_models" ]]; then
|
||||||
|
echo -e "${GREEN}✅ Ollama is running with available models${NC}"
|
||||||
|
echo -e "${CYAN}Your setup will work great! The system will auto-select the best models.${NC}"
|
||||||
|
echo ""
|
||||||
|
echo -e "${BLUE}💡 RAG Performance Tip:${NC} Smaller models often work better with RAG!"
|
||||||
|
echo -e " With context provided, even 0.6B models give good results"
|
||||||
|
echo -e " 4B models = excellent, 8B+ = overkill (slower responses)"
|
||||||
|
else
|
||||||
|
echo -e "${BLUE}Ollama is running but no chat models found.${NC}"
|
||||||
|
echo -e "Download a lightweight model: ${GREEN}ollama pull qwen3:0.6b${NC} (fast)"
|
||||||
|
echo -e "Or balanced option: ${GREEN}ollama pull qwen3:4b${NC} (excellent quality)"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo -e "${BLUE}Ollama not running or not installed.${NC}"
|
||||||
|
echo -e "Start Ollama: ${GREEN}ollama serve${NC}"
|
||||||
|
echo -e "Or install from: https://ollama.com/download"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e "${CYAN}✅ FSS-Mini-RAG will auto-detect and use the best available method.${NC}"
|
||||||
|
echo ""
|
||||||
fi
|
fi
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
@ -595,103 +625,102 @@ show_completion() {
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
# Ask if they want to run a test
|
# Ask if they want to run a test
|
||||||
echo -n "Would you like to run a quick test now? (Y/n): "
|
echo ""
|
||||||
read -r run_test
|
echo -e "${BOLD}🧪 Quick Test Available${NC}"
|
||||||
if [[ ! $run_test =~ ^[Nn]$ ]]; then
|
echo -e "${CYAN}Test FSS-Mini-RAG with a small sample project (takes ~10 seconds)${NC}"
|
||||||
run_quick_test
|
echo ""
|
||||||
echo ""
|
|
||||||
show_beginner_guidance
|
# Ensure output is flushed and we're ready for input
|
||||||
|
printf "Run quick test now? [Y/n]: "
|
||||||
|
|
||||||
|
# More robust input handling
|
||||||
|
if read -r run_test < /dev/tty 2>/dev/null; then
|
||||||
|
echo "User chose: '$run_test'" # Debug output
|
||||||
|
if [[ ! $run_test =~ ^[Nn]$ ]]; then
|
||||||
|
run_quick_test
|
||||||
|
echo ""
|
||||||
|
show_beginner_guidance
|
||||||
|
else
|
||||||
|
echo -e "${BLUE}Skipping test - you can run it later with: ./rag-tui${NC}"
|
||||||
|
show_beginner_guidance
|
||||||
|
fi
|
||||||
else
|
else
|
||||||
|
# Fallback if interactive input fails
|
||||||
|
echo ""
|
||||||
|
echo -e "${YELLOW}⚠️ Interactive input not available - skipping test prompt${NC}"
|
||||||
|
echo -e "${BLUE}You can test FSS-Mini-RAG anytime with: ./rag-tui${NC}"
|
||||||
show_beginner_guidance
|
show_beginner_guidance
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
# Create sample project for testing
|
# Note: Sample project creation removed - now indexing real codebase/docs
|
||||||
create_sample_project() {
|
|
||||||
local sample_dir="$SCRIPT_DIR/.sample_test"
|
|
||||||
rm -rf "$sample_dir"
|
|
||||||
mkdir -p "$sample_dir"
|
|
||||||
|
|
||||||
# Create a few small sample files
|
|
||||||
cat > "$sample_dir/README.md" << 'EOF'
|
|
||||||
# Sample Project
|
|
||||||
|
|
||||||
This is a sample project for testing FSS-Mini-RAG search capabilities.
|
|
||||||
|
|
||||||
## Features
|
|
||||||
|
|
||||||
- User authentication system
|
|
||||||
- Document processing
|
|
||||||
- Search functionality
|
|
||||||
- Email integration
|
|
||||||
EOF
|
|
||||||
|
|
||||||
cat > "$sample_dir/auth.py" << 'EOF'
|
|
||||||
# Authentication module
|
|
||||||
def login_user(username, password):
|
|
||||||
"""Handle user login with password validation"""
|
|
||||||
if validate_credentials(username, password):
|
|
||||||
create_session(username)
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
def validate_credentials(username, password):
|
|
||||||
"""Check username and password against database"""
|
|
||||||
# Database validation logic here
|
|
||||||
return check_password_hash(username, password)
|
|
||||||
EOF
|
|
||||||
|
|
||||||
cat > "$sample_dir/search.py" << 'EOF'
|
|
||||||
# Search functionality
|
|
||||||
def semantic_search(query, documents):
|
|
||||||
"""Perform semantic search across document collection"""
|
|
||||||
embeddings = generate_embeddings(query)
|
|
||||||
results = find_similar_documents(embeddings, documents)
|
|
||||||
return rank_results(results)
|
|
||||||
|
|
||||||
def generate_embeddings(text):
|
|
||||||
"""Generate vector embeddings for text"""
|
|
||||||
# Embedding generation logic
|
|
||||||
return process_with_model(text)
|
|
||||||
EOF
|
|
||||||
|
|
||||||
echo "$sample_dir"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Run quick test with sample data
|
# Run quick test with sample data
|
||||||
run_quick_test() {
|
run_quick_test() {
|
||||||
print_header "Quick Test"
|
print_header "Quick Test"
|
||||||
|
|
||||||
print_info "Creating small sample project for testing..."
|
# Ask what to index: code vs docs
|
||||||
local sample_dir=$(create_sample_project)
|
echo -e "${CYAN}What would you like to explore with FSS-Mini-RAG?${NC}"
|
||||||
echo "Sample project created with 3 files for fast testing."
|
echo ""
|
||||||
|
echo -e "${GREEN}1) Code${NC} - Index the FSS-Mini-RAG codebase (~50 files)"
|
||||||
|
echo -e "${BLUE}2) Docs${NC} - Index the documentation (~10 files)"
|
||||||
|
echo ""
|
||||||
|
echo -n "Choose [1/2] or Enter for code: "
|
||||||
|
read -r index_choice
|
||||||
|
|
||||||
|
# Determine what to index
|
||||||
|
local target_dir="$SCRIPT_DIR"
|
||||||
|
local target_name="FSS-Mini-RAG codebase"
|
||||||
|
if [[ "$index_choice" == "2" ]]; then
|
||||||
|
target_dir="$SCRIPT_DIR/docs"
|
||||||
|
target_name="FSS-Mini-RAG documentation"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ensure we're in the right directory and have the right permissions
|
||||||
|
if [[ ! -f "./rag-mini" ]]; then
|
||||||
|
print_error "rag-mini script not found in current directory: $(pwd)"
|
||||||
|
print_info "This might be a path issue. The installer should run from the project directory."
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ ! -x "./rag-mini" ]]; then
|
||||||
|
print_info "Making rag-mini executable..."
|
||||||
|
chmod +x ./rag-mini
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Index the chosen target
|
||||||
|
print_info "Indexing $target_name..."
|
||||||
|
echo -e "${CYAN}This will take 10-30 seconds depending on your system${NC}"
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
# Index the sample project (much faster)
|
if ./rag-mini index "$target_dir"; then
|
||||||
print_info "Indexing sample project (this should be fast)..."
|
print_success "✅ Indexing completed successfully!"
|
||||||
if ./rag-mini index "$sample_dir" --quiet; then
|
|
||||||
print_success "Sample project indexed successfully"
|
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
print_info "Testing search with sample queries..."
|
print_info "🎯 Launching Interactive Tutorial..."
|
||||||
echo -e "${BLUE}Running search: 'user authentication'${NC}"
|
echo -e "${CYAN}The TUI has 6 sample questions to get you started.${NC}"
|
||||||
./rag-mini search "$sample_dir" "user authentication" --limit 2
|
echo -e "${CYAN}Try the suggested queries or enter your own!${NC}"
|
||||||
|
echo ""
|
||||||
|
echo -n "Press Enter to start interactive tutorial: "
|
||||||
|
read -r
|
||||||
|
|
||||||
|
# Launch the TUI which has the existing interactive tutorial system
|
||||||
|
./rag-tui.py "$target_dir"
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
print_success "Test completed successfully!"
|
print_success "🎉 Tutorial completed!"
|
||||||
echo -e "${CYAN}Ready to use FSS-Mini-RAG on your own projects!${NC}"
|
echo -e "${CYAN}FSS-Mini-RAG is working perfectly!${NC}"
|
||||||
|
|
||||||
# Offer beginner guidance
|
|
||||||
echo ""
|
|
||||||
echo -e "${YELLOW}💡 Beginner Tip:${NC} Try the interactive mode with pre-made questions"
|
|
||||||
echo " Run: ./rag-tui for guided experience"
|
|
||||||
|
|
||||||
# Clean up sample
|
|
||||||
rm -rf "$sample_dir"
|
|
||||||
else
|
else
|
||||||
print_error "Sample test failed"
|
print_error "❌ Indexing failed"
|
||||||
echo "This might indicate an issue with the installation."
|
echo ""
|
||||||
rm -rf "$sample_dir"
|
echo -e "${YELLOW}Possible causes:${NC}"
|
||||||
|
echo "• Virtual environment not properly activated"
|
||||||
|
echo "• Missing dependencies (try: pip install -r requirements.txt)"
|
||||||
|
echo "• Path issues (ensure script runs from project directory)"
|
||||||
|
echo "• Ollama connection issues (if using Ollama)"
|
||||||
|
echo ""
|
||||||
|
return 1
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -52,6 +52,10 @@ def cli(verbose: bool, quiet: bool):
|
|||||||
A local RAG system for improving the development environment's grounding capabilities.
|
A local RAG system for improving the development environment's grounding capabilities.
|
||||||
Indexes your codebase and enables lightning-fast semantic search.
|
Indexes your codebase and enables lightning-fast semantic search.
|
||||||
"""
|
"""
|
||||||
|
# Check virtual environment
|
||||||
|
from .venv_checker import check_and_warn_venv
|
||||||
|
check_and_warn_venv("rag-mini", force_exit=False)
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
logging.getLogger().setLevel(logging.DEBUG)
|
logging.getLogger().setLevel(logging.DEBUG)
|
||||||
elif quiet:
|
elif quiet:
|
||||||
@ -350,7 +354,12 @@ def debug_schema(path: str):
|
|||||||
return
|
return
|
||||||
|
|
||||||
# Connect to database
|
# Connect to database
|
||||||
import lancedb
|
try:
|
||||||
|
import lancedb
|
||||||
|
except ImportError:
|
||||||
|
console.print("[red]LanceDB not available. Install with: pip install lancedb pyarrow[/red]")
|
||||||
|
return
|
||||||
|
|
||||||
db = lancedb.connect(rag_dir)
|
db = lancedb.connect(rag_dir)
|
||||||
|
|
||||||
if "code_vectors" not in db.table_names():
|
if "code_vectors" not in db.table_names():
|
||||||
|
|||||||
@ -63,7 +63,7 @@ class EmbeddingConfig:
|
|||||||
@dataclass
|
@dataclass
|
||||||
class SearchConfig:
|
class SearchConfig:
|
||||||
"""Configuration for search behavior."""
|
"""Configuration for search behavior."""
|
||||||
default_limit: int = 10
|
default_top_k: int = 10
|
||||||
enable_bm25: bool = True
|
enable_bm25: bool = True
|
||||||
similarity_threshold: float = 0.1
|
similarity_threshold: float = 0.1
|
||||||
expand_queries: bool = False # Enable automatic query expansion
|
expand_queries: bool = False # Enable automatic query expansion
|
||||||
@ -81,6 +81,9 @@ class LLMConfig:
|
|||||||
enable_thinking: bool = True # Enable thinking mode for Qwen3 models
|
enable_thinking: bool = True # Enable thinking mode for Qwen3 models
|
||||||
cpu_optimized: bool = True # Prefer lightweight models
|
cpu_optimized: bool = True # Prefer lightweight models
|
||||||
|
|
||||||
|
# Model preference rankings (configurable)
|
||||||
|
model_rankings: list = None # Will be set in __post_init__
|
||||||
|
|
||||||
# Provider-specific settings (for different LLM providers)
|
# Provider-specific settings (for different LLM providers)
|
||||||
provider: str = "ollama" # "ollama", "openai", "anthropic"
|
provider: str = "ollama" # "ollama", "openai", "anthropic"
|
||||||
ollama_host: str = "localhost:11434" # Ollama connection
|
ollama_host: str = "localhost:11434" # Ollama connection
|
||||||
@ -88,6 +91,24 @@ class LLMConfig:
|
|||||||
api_base: Optional[str] = None # Base URL for API (e.g., OpenRouter)
|
api_base: Optional[str] = None # Base URL for API (e.g., OpenRouter)
|
||||||
timeout: int = 20 # Request timeout in seconds
|
timeout: int = 20 # Request timeout in seconds
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
if self.model_rankings is None:
|
||||||
|
# Default model preference rankings (can be overridden in config file)
|
||||||
|
self.model_rankings = [
|
||||||
|
# Testing model (prioritized for current testing phase)
|
||||||
|
"qwen3:1.7b",
|
||||||
|
|
||||||
|
# Ultra-efficient models (perfect for CPU-only systems)
|
||||||
|
"qwen3:0.6b",
|
||||||
|
|
||||||
|
# Recommended model (excellent quality but larger)
|
||||||
|
"qwen3:4b",
|
||||||
|
|
||||||
|
# Common fallbacks (only include models we know exist)
|
||||||
|
"llama3.2:1b",
|
||||||
|
"qwen2.5:1.5b",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class RAGConfig:
|
class RAGConfig:
|
||||||
@ -151,6 +172,8 @@ class ConfigManager:
|
|||||||
config.embedding = EmbeddingConfig(**data['embedding'])
|
config.embedding = EmbeddingConfig(**data['embedding'])
|
||||||
if 'search' in data:
|
if 'search' in data:
|
||||||
config.search = SearchConfig(**data['search'])
|
config.search = SearchConfig(**data['search'])
|
||||||
|
if 'llm' in data:
|
||||||
|
config.llm = LLMConfig(**data['llm'])
|
||||||
|
|
||||||
return config
|
return config
|
||||||
|
|
||||||
@ -219,7 +242,7 @@ class ConfigManager:
|
|||||||
"",
|
"",
|
||||||
"# Search behavior settings",
|
"# Search behavior settings",
|
||||||
"search:",
|
"search:",
|
||||||
f" default_limit: {config_dict['search']['default_limit']} # Default number of results",
|
f" default_top_k: {config_dict['search']['default_top_k']} # Default number of top results",
|
||||||
f" enable_bm25: {str(config_dict['search']['enable_bm25']).lower()} # Enable keyword matching boost",
|
f" enable_bm25: {str(config_dict['search']['enable_bm25']).lower()} # Enable keyword matching boost",
|
||||||
f" similarity_threshold: {config_dict['search']['similarity_threshold']} # Minimum similarity score",
|
f" similarity_threshold: {config_dict['search']['similarity_threshold']} # Minimum similarity score",
|
||||||
f" expand_queries: {str(config_dict['search']['expand_queries']).lower()} # Enable automatic query expansion",
|
f" expand_queries: {str(config_dict['search']['expand_queries']).lower()} # Enable automatic query expansion",
|
||||||
@ -232,8 +255,16 @@ class ConfigManager:
|
|||||||
f" max_expansion_terms: {config_dict['llm']['max_expansion_terms']} # Maximum terms to add to queries",
|
f" max_expansion_terms: {config_dict['llm']['max_expansion_terms']} # Maximum terms to add to queries",
|
||||||
f" enable_synthesis: {str(config_dict['llm']['enable_synthesis']).lower()} # Enable synthesis by default",
|
f" enable_synthesis: {str(config_dict['llm']['enable_synthesis']).lower()} # Enable synthesis by default",
|
||||||
f" synthesis_temperature: {config_dict['llm']['synthesis_temperature']} # LLM temperature for analysis",
|
f" synthesis_temperature: {config_dict['llm']['synthesis_temperature']} # LLM temperature for analysis",
|
||||||
|
" model_rankings: # Preferred model order (edit to change priority)",
|
||||||
])
|
])
|
||||||
|
|
||||||
|
# Add model rankings list
|
||||||
|
if 'model_rankings' in config_dict['llm'] and config_dict['llm']['model_rankings']:
|
||||||
|
for model in config_dict['llm']['model_rankings'][:10]: # Show first 10
|
||||||
|
yaml_lines.append(f" - \"{model}\"")
|
||||||
|
if len(config_dict['llm']['model_rankings']) > 10:
|
||||||
|
yaml_lines.append(" # ... (edit config to see all options)")
|
||||||
|
|
||||||
return '\n'.join(yaml_lines)
|
return '\n'.join(yaml_lines)
|
||||||
|
|
||||||
def update_config(self, **kwargs) -> RAGConfig:
|
def update_config(self, **kwargs) -> RAGConfig:
|
||||||
|
|||||||
@ -60,7 +60,8 @@ class CodeExplorer:
|
|||||||
self.synthesizer = LLMSynthesizer(
|
self.synthesizer = LLMSynthesizer(
|
||||||
ollama_url=f"http://{self.config.llm.ollama_host}",
|
ollama_url=f"http://{self.config.llm.ollama_host}",
|
||||||
model=self.config.llm.synthesis_model,
|
model=self.config.llm.synthesis_model,
|
||||||
enable_thinking=True # Always enable thinking in explore mode
|
enable_thinking=True, # Always enable thinking in explore mode
|
||||||
|
config=self.config # Pass config for model rankings
|
||||||
)
|
)
|
||||||
|
|
||||||
# Session management
|
# Session management
|
||||||
@ -69,12 +70,7 @@ class CodeExplorer:
|
|||||||
def start_exploration_session(self) -> bool:
|
def start_exploration_session(self) -> bool:
|
||||||
"""Start a new exploration session."""
|
"""Start a new exploration session."""
|
||||||
|
|
||||||
# Check if we should restart the model for optimal thinking
|
# Simple availability check - don't do complex model restart logic
|
||||||
model_restart_needed = self._check_model_restart_needed()
|
|
||||||
if model_restart_needed:
|
|
||||||
if not self._handle_model_restart():
|
|
||||||
print("⚠️ Continuing with current model (quality may be reduced)")
|
|
||||||
|
|
||||||
if not self.synthesizer.is_available():
|
if not self.synthesizer.is_available():
|
||||||
print("❌ LLM service unavailable. Please check Ollama is running.")
|
print("❌ LLM service unavailable. Please check Ollama is running.")
|
||||||
return False
|
return False
|
||||||
@ -87,17 +83,8 @@ class CodeExplorer:
|
|||||||
started_at=time.time()
|
started_at=time.time()
|
||||||
)
|
)
|
||||||
|
|
||||||
print("🧠 EXPLORATION MODE STARTED")
|
print("🧠 Exploration Mode Started")
|
||||||
print("=" * 50)
|
|
||||||
print(f"Project: {self.project_path.name}")
|
print(f"Project: {self.project_path.name}")
|
||||||
print(f"Session: {session_id}")
|
|
||||||
print("\n🎯 This mode uses thinking and remembers context.")
|
|
||||||
print(" Perfect for debugging, learning, and deep exploration.")
|
|
||||||
print("\n💡 Tips:")
|
|
||||||
print(" • Ask follow-up questions - I'll remember our conversation")
|
|
||||||
print(" • Use 'why', 'how', 'explain' for detailed reasoning")
|
|
||||||
print(" • Type 'quit' or 'exit' to end session")
|
|
||||||
print("\n" + "=" * 50)
|
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@ -110,7 +97,7 @@ class CodeExplorer:
|
|||||||
search_start = time.time()
|
search_start = time.time()
|
||||||
results = self.searcher.search(
|
results = self.searcher.search(
|
||||||
question,
|
question,
|
||||||
limit=context_limit,
|
top_k=context_limit,
|
||||||
include_context=True,
|
include_context=True,
|
||||||
semantic_weight=0.7,
|
semantic_weight=0.7,
|
||||||
bm25_weight=0.3
|
bm25_weight=0.3
|
||||||
@ -166,56 +153,82 @@ Content: {content[:800]}{'...' if len(content) > 800 else ''}
|
|||||||
|
|
||||||
results_text = "\n".join(results_context)
|
results_text = "\n".join(results_context)
|
||||||
|
|
||||||
# Create comprehensive exploration prompt
|
# Create comprehensive exploration prompt with thinking
|
||||||
prompt = f"""You are a senior software engineer helping explore and debug code. You have access to thinking mode and conversation context.
|
prompt = f"""<think>
|
||||||
|
The user asked: "{question}"
|
||||||
|
|
||||||
|
Let me analyze what they're asking and look at the information I have available.
|
||||||
|
|
||||||
|
From the search results, I can see relevant information about:
|
||||||
|
{results_text[:500]}...
|
||||||
|
|
||||||
|
I should think about:
|
||||||
|
1. What the user is trying to understand or accomplish
|
||||||
|
2. What information from the search results is most relevant
|
||||||
|
3. How to explain this in a clear, educational way
|
||||||
|
4. What practical next steps would be helpful
|
||||||
|
|
||||||
|
Based on our conversation so far: {context_summary}
|
||||||
|
|
||||||
|
Let me create a helpful response that breaks this down clearly and gives them actionable guidance.
|
||||||
|
</think>
|
||||||
|
|
||||||
|
You're a helpful assistant exploring a project with someone. You're good at breaking down complex topics into understandable pieces and explaining things clearly.
|
||||||
|
|
||||||
PROJECT: {self.project_path.name}
|
PROJECT: {self.project_path.name}
|
||||||
|
|
||||||
CONVERSATION CONTEXT:
|
PREVIOUS CONVERSATION:
|
||||||
{context_summary}
|
{context_summary}
|
||||||
|
|
||||||
CURRENT QUESTION: "{question}"
|
CURRENT QUESTION: "{question}"
|
||||||
|
|
||||||
SEARCH RESULTS:
|
RELEVANT INFORMATION FOUND:
|
||||||
{results_text}
|
{results_text}
|
||||||
|
|
||||||
Please provide a detailed analysis in JSON format. Think through the problem carefully and consider the conversation context:
|
Please provide a helpful analysis in JSON format:
|
||||||
|
|
||||||
{{
|
{{
|
||||||
"summary": "2-3 sentences explaining what you found and how it relates to the question",
|
"summary": "Clear explanation of what you found and how it answers their question",
|
||||||
"key_points": [
|
"key_points": [
|
||||||
"Important insight 1 (reference specific code/files)",
|
"Most important insight from the information",
|
||||||
"Important insight 2 (explain relationships)",
|
"Secondary important point or relationship",
|
||||||
"Important insight 3 (consider conversation context)"
|
"Third key point or practical consideration"
|
||||||
],
|
],
|
||||||
"code_examples": [
|
"code_examples": [
|
||||||
"Relevant code snippet or pattern with explanation",
|
"Relevant example or pattern from the information",
|
||||||
"Another important code example with context"
|
"Another useful example or demonstration"
|
||||||
],
|
],
|
||||||
"suggested_actions": [
|
"suggested_actions": [
|
||||||
"Specific next step the developer should take",
|
"Specific next step they could take",
|
||||||
"Follow-up investigation or debugging approach",
|
"Additional exploration or investigation suggestion",
|
||||||
"Potential improvements or fixes"
|
"Practical way to apply this information"
|
||||||
],
|
],
|
||||||
"confidence": 0.85
|
"confidence": 0.85
|
||||||
}}
|
}}
|
||||||
|
|
||||||
Focus on:
|
Guidelines:
|
||||||
- Deep technical analysis with reasoning
|
- Be educational and break things down clearly
|
||||||
- How this connects to previous questions in our conversation
|
- Reference specific files and information when helpful
|
||||||
- Practical debugging/learning insights
|
- Give practical, actionable suggestions
|
||||||
- Specific code references and explanations
|
- Keep explanations beginner-friendly but not condescending
|
||||||
- Clear next steps for the developer
|
- Connect information to their question directly
|
||||||
|
"""
|
||||||
Think carefully about the relationships between code components and how they answer the question in context."""
|
|
||||||
|
|
||||||
return prompt
|
return prompt
|
||||||
|
|
||||||
def _synthesize_with_context(self, prompt: str, results: List[Any]) -> SynthesisResult:
|
def _synthesize_with_context(self, prompt: str, results: List[Any]) -> SynthesisResult:
|
||||||
"""Synthesize results with full context and thinking."""
|
"""Synthesize results with full context and thinking."""
|
||||||
try:
|
try:
|
||||||
# Use thinking-enabled synthesis with lower temperature for exploration
|
# TEMPORARILY: Use simple non-streaming call to avoid flow issues
|
||||||
response = self.synthesizer._call_ollama(prompt, temperature=0.2)
|
# TODO: Re-enable streaming once flow is stable
|
||||||
|
response = self.synthesizer._call_ollama(prompt, temperature=0.2, disable_thinking=False)
|
||||||
|
thinking_stream = ""
|
||||||
|
|
||||||
|
# Display simple thinking indicator
|
||||||
|
if response and len(response) > 200:
|
||||||
|
print("\n💭 Analysis in progress...")
|
||||||
|
|
||||||
|
# Don't display thinking stream again - keeping it simple for now
|
||||||
|
|
||||||
if not response:
|
if not response:
|
||||||
return SynthesisResult(
|
return SynthesisResult(
|
||||||
@ -423,6 +436,196 @@ Think carefully about the relationships between code components and how they ans
|
|||||||
print("\n📝 Continuing with current model...")
|
print("\n📝 Continuing with current model...")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def _call_ollama_with_thinking(self, prompt: str, temperature: float = 0.3) -> tuple:
|
||||||
|
"""Call Ollama with streaming for fast time-to-first-token."""
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Use the synthesizer's model and connection
|
||||||
|
model_to_use = self.synthesizer.model
|
||||||
|
if self.synthesizer.model not in self.synthesizer.available_models:
|
||||||
|
if self.synthesizer.available_models:
|
||||||
|
model_to_use = self.synthesizer.available_models[0]
|
||||||
|
else:
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
# Enable thinking by NOT adding <no_think>
|
||||||
|
final_prompt = prompt
|
||||||
|
|
||||||
|
# Get optimal parameters for this model
|
||||||
|
from .llm_optimization import get_optimal_ollama_parameters
|
||||||
|
optimal_params = get_optimal_ollama_parameters(model_to_use)
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"model": model_to_use,
|
||||||
|
"prompt": final_prompt,
|
||||||
|
"stream": True, # Enable streaming for fast response
|
||||||
|
"options": {
|
||||||
|
"temperature": temperature,
|
||||||
|
"top_p": optimal_params.get("top_p", 0.9),
|
||||||
|
"top_k": optimal_params.get("top_k", 40),
|
||||||
|
"num_ctx": optimal_params.get("num_ctx", 32768),
|
||||||
|
"num_predict": optimal_params.get("num_predict", 2000),
|
||||||
|
"repeat_penalty": optimal_params.get("repeat_penalty", 1.1),
|
||||||
|
"presence_penalty": optimal_params.get("presence_penalty", 1.0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post(
|
||||||
|
f"{self.synthesizer.ollama_url}/api/generate",
|
||||||
|
json=payload,
|
||||||
|
stream=True,
|
||||||
|
timeout=65
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
# Collect streaming response
|
||||||
|
raw_response = ""
|
||||||
|
thinking_displayed = False
|
||||||
|
|
||||||
|
for line in response.iter_lines():
|
||||||
|
if line:
|
||||||
|
try:
|
||||||
|
chunk_data = json.loads(line.decode('utf-8'))
|
||||||
|
chunk_text = chunk_data.get('response', '')
|
||||||
|
|
||||||
|
if chunk_text:
|
||||||
|
raw_response += chunk_text
|
||||||
|
|
||||||
|
# Display thinking stream as it comes in
|
||||||
|
if not thinking_displayed and '<think>' in raw_response:
|
||||||
|
# Start displaying thinking
|
||||||
|
self._start_thinking_display()
|
||||||
|
thinking_displayed = True
|
||||||
|
|
||||||
|
if thinking_displayed:
|
||||||
|
self._stream_thinking_chunk(chunk_text)
|
||||||
|
|
||||||
|
if chunk_data.get('done', False):
|
||||||
|
break
|
||||||
|
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Finish thinking display if it was shown
|
||||||
|
if thinking_displayed:
|
||||||
|
self._end_thinking_display()
|
||||||
|
|
||||||
|
# Extract thinking stream and final response
|
||||||
|
thinking_stream, final_response = self._extract_thinking(raw_response)
|
||||||
|
|
||||||
|
return final_response, thinking_stream
|
||||||
|
else:
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Thinking-enabled Ollama call failed: {e}")
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
def _extract_thinking(self, raw_response: str) -> tuple:
|
||||||
|
"""Extract thinking content from response."""
|
||||||
|
thinking_stream = ""
|
||||||
|
final_response = raw_response
|
||||||
|
|
||||||
|
# Look for thinking patterns
|
||||||
|
if "<think>" in raw_response and "</think>" in raw_response:
|
||||||
|
# Extract thinking content between tags
|
||||||
|
start_tag = raw_response.find("<think>")
|
||||||
|
end_tag = raw_response.find("</think>") + len("</think>")
|
||||||
|
|
||||||
|
if start_tag != -1 and end_tag != -1:
|
||||||
|
thinking_content = raw_response[start_tag + 7:end_tag - 8] # Remove tags
|
||||||
|
thinking_stream = thinking_content.strip()
|
||||||
|
|
||||||
|
# Remove thinking from final response
|
||||||
|
final_response = (raw_response[:start_tag] + raw_response[end_tag:]).strip()
|
||||||
|
|
||||||
|
# Alternative patterns for models that use different thinking formats
|
||||||
|
elif "Let me think" in raw_response or "I need to analyze" in raw_response:
|
||||||
|
# Simple heuristic: first paragraph might be thinking
|
||||||
|
lines = raw_response.split('\n')
|
||||||
|
potential_thinking = []
|
||||||
|
final_lines = []
|
||||||
|
|
||||||
|
thinking_indicators = ["Let me think", "I need to", "First, I'll", "Looking at", "Analyzing"]
|
||||||
|
in_thinking = False
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
if any(indicator in line for indicator in thinking_indicators):
|
||||||
|
in_thinking = True
|
||||||
|
potential_thinking.append(line)
|
||||||
|
elif in_thinking and (line.startswith('{') or line.startswith('**') or line.startswith('#')):
|
||||||
|
# Likely end of thinking, start of structured response
|
||||||
|
in_thinking = False
|
||||||
|
final_lines.append(line)
|
||||||
|
elif in_thinking:
|
||||||
|
potential_thinking.append(line)
|
||||||
|
else:
|
||||||
|
final_lines.append(line)
|
||||||
|
|
||||||
|
if potential_thinking:
|
||||||
|
thinking_stream = '\n'.join(potential_thinking).strip()
|
||||||
|
final_response = '\n'.join(final_lines).strip()
|
||||||
|
|
||||||
|
return thinking_stream, final_response
|
||||||
|
|
||||||
|
def _start_thinking_display(self):
|
||||||
|
"""Start the thinking stream display."""
|
||||||
|
print("\n\033[2m\033[3m💭 AI Thinking:\033[0m")
|
||||||
|
print("\033[2m\033[3m" + "─" * 40 + "\033[0m")
|
||||||
|
self._thinking_buffer = ""
|
||||||
|
self._in_thinking_tags = False
|
||||||
|
|
||||||
|
def _stream_thinking_chunk(self, chunk: str):
|
||||||
|
"""Stream a chunk of thinking as it arrives."""
|
||||||
|
import sys
|
||||||
|
|
||||||
|
self._thinking_buffer += chunk
|
||||||
|
|
||||||
|
# Check if we're in thinking tags
|
||||||
|
if '<think>' in self._thinking_buffer and not self._in_thinking_tags:
|
||||||
|
self._in_thinking_tags = True
|
||||||
|
# Display everything after <think>
|
||||||
|
start_idx = self._thinking_buffer.find('<think>') + 7
|
||||||
|
thinking_content = self._thinking_buffer[start_idx:]
|
||||||
|
if thinking_content:
|
||||||
|
print(f"\033[2m\033[3m{thinking_content}\033[0m", end='', flush=True)
|
||||||
|
elif self._in_thinking_tags and '</think>' not in chunk:
|
||||||
|
# We're in thinking mode, display the chunk
|
||||||
|
print(f"\033[2m\033[3m{chunk}\033[0m", end='', flush=True)
|
||||||
|
elif '</think>' in self._thinking_buffer:
|
||||||
|
# End of thinking
|
||||||
|
self._in_thinking_tags = False
|
||||||
|
|
||||||
|
def _end_thinking_display(self):
|
||||||
|
"""End the thinking stream display."""
|
||||||
|
print(f"\n\033[2m\033[3m" + "─" * 40 + "\033[0m")
|
||||||
|
print()
|
||||||
|
|
||||||
|
def _display_thinking_stream(self, thinking_stream: str):
|
||||||
|
"""Display thinking stream in light gray and italic (fallback for non-streaming)."""
|
||||||
|
if not thinking_stream:
|
||||||
|
return
|
||||||
|
|
||||||
|
print("\n\033[2m\033[3m💭 AI Thinking:\033[0m")
|
||||||
|
print("\033[2m\033[3m" + "─" * 40 + "\033[0m")
|
||||||
|
|
||||||
|
# Split into paragraphs and display with proper formatting
|
||||||
|
paragraphs = thinking_stream.split('\n\n')
|
||||||
|
for para in paragraphs:
|
||||||
|
if para.strip():
|
||||||
|
# Wrap long lines nicely
|
||||||
|
lines = para.strip().split('\n')
|
||||||
|
for line in lines:
|
||||||
|
if line.strip():
|
||||||
|
# Light gray and italic
|
||||||
|
print(f"\033[2m\033[3m{line}\033[0m")
|
||||||
|
print() # Paragraph spacing
|
||||||
|
|
||||||
|
print("\033[2m\033[3m" + "─" * 40 + "\033[0m")
|
||||||
|
print()
|
||||||
|
|
||||||
# Quick test function
|
# Quick test function
|
||||||
def test_explorer():
|
def test_explorer():
|
||||||
"""Test the code explorer."""
|
"""Test the code explorer."""
|
||||||
|
|||||||
@ -218,6 +218,11 @@ class FastRAGServer:
|
|||||||
# Quick file count check
|
# Quick file count check
|
||||||
try:
|
try:
|
||||||
import lancedb
|
import lancedb
|
||||||
|
except ImportError:
|
||||||
|
# If LanceDB not available, assume index is empty and needs creation
|
||||||
|
return True
|
||||||
|
|
||||||
|
try:
|
||||||
db = lancedb.connect(rag_dir)
|
db = lancedb.connect(rag_dir)
|
||||||
if 'code_vectors' not in db.table_names():
|
if 'code_vectors' not in db.table_names():
|
||||||
return True
|
return True
|
||||||
|
|||||||
@ -12,12 +12,20 @@ from typing import List, Dict, Any, Optional, Set, Tuple
|
|||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import lancedb
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pyarrow as pa
|
|
||||||
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TimeRemainingColumn
|
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TimeRemainingColumn
|
||||||
from rich.console import Console
|
from rich.console import Console
|
||||||
|
|
||||||
|
# Optional LanceDB import
|
||||||
|
try:
|
||||||
|
import lancedb
|
||||||
|
import pyarrow as pa
|
||||||
|
LANCEDB_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
lancedb = None
|
||||||
|
pa = None
|
||||||
|
LANCEDB_AVAILABLE = False
|
||||||
|
|
||||||
from .ollama_embeddings import OllamaEmbedder as CodeEmbedder
|
from .ollama_embeddings import OllamaEmbedder as CodeEmbedder
|
||||||
from .chunker import CodeChunker, CodeChunk
|
from .chunker import CodeChunker, CodeChunk
|
||||||
from .path_handler import normalize_path, normalize_relative_path
|
from .path_handler import normalize_path, normalize_relative_path
|
||||||
@ -163,7 +171,7 @@ class ProjectIndexer:
|
|||||||
"skip_binary": True
|
"skip_binary": True
|
||||||
},
|
},
|
||||||
"search": {
|
"search": {
|
||||||
"default_limit": 10,
|
"default_top_k": 10,
|
||||||
"similarity_threshold": 0.7,
|
"similarity_threshold": 0.7,
|
||||||
"hybrid_search": True,
|
"hybrid_search": True,
|
||||||
"bm25_weight": 0.3
|
"bm25_weight": 0.3
|
||||||
@ -526,6 +534,11 @@ class ProjectIndexer:
|
|||||||
|
|
||||||
def _init_database(self):
|
def _init_database(self):
|
||||||
"""Initialize LanceDB connection and table."""
|
"""Initialize LanceDB connection and table."""
|
||||||
|
if not LANCEDB_AVAILABLE:
|
||||||
|
logger.error("LanceDB is not available. Please install LanceDB for full indexing functionality.")
|
||||||
|
logger.info("For Ollama-only mode, consider using hash-based embeddings instead.")
|
||||||
|
raise ImportError("LanceDB dependency is required for indexing. Install with: pip install lancedb pyarrow")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.db = lancedb.connect(self.rag_dir)
|
self.db = lancedb.connect(self.rag_dir)
|
||||||
|
|
||||||
|
|||||||
@ -16,12 +16,12 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class SafeguardConfig:
|
class SafeguardConfig:
|
||||||
"""Configuration for LLM safeguards."""
|
"""Configuration for LLM safeguards - gentle and educational."""
|
||||||
max_output_tokens: int = 2000 # Prevent excessive generation
|
max_output_tokens: int = 4000 # Allow longer responses for learning
|
||||||
max_repetition_ratio: float = 0.3 # Max ratio of repeated content
|
max_repetition_ratio: float = 0.7 # Be very permissive - only catch extreme repetition
|
||||||
max_response_time: int = 60 # Max seconds for response
|
max_response_time: int = 120 # Allow 2 minutes for complex thinking
|
||||||
min_useful_length: int = 20 # Minimum useful response length
|
min_useful_length: int = 10 # Lower threshold - short answers can be useful
|
||||||
context_window: int = 32768 # Ollama context window
|
context_window: int = 32000 # Match Qwen3 context length (32K token limit)
|
||||||
enable_thinking_detection: bool = True # Detect thinking patterns
|
enable_thinking_detection: bool = True # Detect thinking patterns
|
||||||
|
|
||||||
class ModelRunawayDetector:
|
class ModelRunawayDetector:
|
||||||
@ -98,8 +98,19 @@ class ModelRunawayDetector:
|
|||||||
if self.response_patterns['phrase_repetition'].search(response):
|
if self.response_patterns['phrase_repetition'].search(response):
|
||||||
return "phrase_repetition"
|
return "phrase_repetition"
|
||||||
|
|
||||||
# Calculate repetition ratio
|
# Calculate repetition ratio (excluding Qwen3 thinking blocks)
|
||||||
words = response.split()
|
analysis_text = response
|
||||||
|
if "<think>" in response and "</think>" in response:
|
||||||
|
# Extract only the actual response (after thinking) for repetition analysis
|
||||||
|
thinking_end = response.find("</think>")
|
||||||
|
if thinking_end != -1:
|
||||||
|
analysis_text = response[thinking_end + 8:].strip()
|
||||||
|
|
||||||
|
# If the actual response (excluding thinking) is short, don't penalize
|
||||||
|
if len(analysis_text.split()) < 20:
|
||||||
|
return None
|
||||||
|
|
||||||
|
words = analysis_text.split()
|
||||||
if len(words) > 10:
|
if len(words) > 10:
|
||||||
unique_words = set(words)
|
unique_words = set(words)
|
||||||
repetition_ratio = 1 - (len(unique_words) / len(words))
|
repetition_ratio = 1 - (len(unique_words) / len(words))
|
||||||
|
|||||||
@ -36,12 +36,13 @@ class SynthesisResult:
|
|||||||
class LLMSynthesizer:
|
class LLMSynthesizer:
|
||||||
"""Synthesizes RAG search results using Ollama LLMs."""
|
"""Synthesizes RAG search results using Ollama LLMs."""
|
||||||
|
|
||||||
def __init__(self, ollama_url: str = "http://localhost:11434", model: str = None, enable_thinking: bool = False):
|
def __init__(self, ollama_url: str = "http://localhost:11434", model: str = None, enable_thinking: bool = False, config=None):
|
||||||
self.ollama_url = ollama_url.rstrip('/')
|
self.ollama_url = ollama_url.rstrip('/')
|
||||||
self.available_models = []
|
self.available_models = []
|
||||||
self.model = model
|
self.model = model
|
||||||
self.enable_thinking = enable_thinking # Default False for synthesis mode
|
self.enable_thinking = enable_thinking # Default False for synthesis mode
|
||||||
self._initialized = False
|
self._initialized = False
|
||||||
|
self.config = config # For accessing model rankings
|
||||||
|
|
||||||
# Initialize safeguards
|
# Initialize safeguards
|
||||||
if ModelRunawayDetector:
|
if ModelRunawayDetector:
|
||||||
@ -61,60 +62,36 @@ class LLMSynthesizer:
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
def _select_best_model(self) -> str:
|
def _select_best_model(self) -> str:
|
||||||
"""Select the best available model based on modern performance rankings."""
|
"""Select the best available model based on configuration rankings."""
|
||||||
if not self.available_models:
|
if not self.available_models:
|
||||||
return "qwen2.5:1.5b" # Fallback preference
|
return "qwen2.5:1.5b" # Fallback preference
|
||||||
|
|
||||||
# Modern model preference ranking (CPU-friendly first)
|
# Get model rankings from config or use defaults
|
||||||
# Prioritize: Ultra-efficient > Standard efficient > Larger models
|
if self.config and hasattr(self.config, 'llm') and hasattr(self.config.llm, 'model_rankings'):
|
||||||
model_rankings = [
|
model_rankings = self.config.llm.model_rankings
|
||||||
# Recommended model (excellent quality)
|
else:
|
||||||
"qwen3:4b",
|
# Fallback rankings if no config
|
||||||
|
model_rankings = [
|
||||||
|
"qwen3:1.7b", "qwen3:0.6b", "qwen3:4b", "llama3.2:1b",
|
||||||
|
"qwen2.5:1.5b", "qwen3:3b", "qwen2.5-coder:1.5b"
|
||||||
|
]
|
||||||
|
|
||||||
# Ultra-efficient models (perfect for CPU-only systems)
|
# Find first available model from our ranked list (exact matches first)
|
||||||
"qwen3:0.6b", "qwen3:1.7b", "llama3.2:1b",
|
|
||||||
|
|
||||||
# Standard efficient models
|
|
||||||
"qwen2.5:1.5b", "qwen3:3b",
|
|
||||||
|
|
||||||
# Qwen2.5 models (excellent performance/size ratio)
|
|
||||||
"qwen2.5-coder:1.5b", "qwen2.5:1.5b", "qwen2.5:3b", "qwen2.5-coder:3b",
|
|
||||||
"qwen2.5:7b", "qwen2.5-coder:7b",
|
|
||||||
|
|
||||||
# Qwen2 models (older but still good)
|
|
||||||
"qwen2:1.5b", "qwen2:3b", "qwen2:7b",
|
|
||||||
|
|
||||||
# Mistral models (good quality, reasonable size)
|
|
||||||
"mistral:7b", "mistral-nemo", "mistral-small",
|
|
||||||
|
|
||||||
# Llama3.2 models (decent but larger)
|
|
||||||
"llama3.2:1b", "llama3.2:3b", "llama3.2", "llama3.2:8b",
|
|
||||||
|
|
||||||
# Fallback to other Llama models
|
|
||||||
"llama3.1:8b", "llama3:8b", "llama3",
|
|
||||||
|
|
||||||
# Other decent models
|
|
||||||
"gemma2:2b", "gemma2:9b", "phi3:3.8b", "phi3.5",
|
|
||||||
]
|
|
||||||
|
|
||||||
# Find first available model from our ranked list
|
|
||||||
for preferred_model in model_rankings:
|
for preferred_model in model_rankings:
|
||||||
for available_model in self.available_models:
|
for available_model in self.available_models:
|
||||||
# Match model names (handle version tags)
|
# Exact match first (e.g., "qwen3:1.7b" matches "qwen3:1.7b")
|
||||||
available_base = available_model.split(':')[0].lower()
|
if preferred_model.lower() == available_model.lower():
|
||||||
preferred_base = preferred_model.split(':')[0].lower()
|
logger.info(f"Selected exact match model: {available_model}")
|
||||||
|
return available_model
|
||||||
|
|
||||||
if preferred_base in available_base or available_base in preferred_base:
|
# Partial match with version handling (e.g., "qwen3:1.7b" matches "qwen3:1.7b-q8_0")
|
||||||
# Additional size filtering - prefer smaller models
|
preferred_parts = preferred_model.lower().split(':')
|
||||||
if any(size in available_model.lower() for size in ['1b', '1.5b', '2b', '3b']):
|
available_parts = available_model.lower().split(':')
|
||||||
logger.info(f"Selected efficient model: {available_model}")
|
|
||||||
return available_model
|
if len(preferred_parts) >= 2 and len(available_parts) >= 2:
|
||||||
elif any(size in available_model.lower() for size in ['7b', '8b']):
|
if (preferred_parts[0] == available_parts[0] and
|
||||||
# Only use larger models if no smaller ones available
|
preferred_parts[1] in available_parts[1]):
|
||||||
logger.info(f"Selected larger model: {available_model}")
|
logger.info(f"Selected version match model: {available_model}")
|
||||||
return available_model
|
|
||||||
elif ':' not in available_model:
|
|
||||||
# Handle models without explicit size tags
|
|
||||||
return available_model
|
return available_model
|
||||||
|
|
||||||
# If no preferred models found, use first available
|
# If no preferred models found, use first available
|
||||||
@ -132,12 +109,8 @@ class LLMSynthesizer:
|
|||||||
if not self.model:
|
if not self.model:
|
||||||
self.model = self._select_best_model()
|
self.model = self._select_best_model()
|
||||||
|
|
||||||
# Warm up LLM with minimal request (ignores response)
|
# Skip warmup - models are fast enough and warmup causes delays
|
||||||
if self.available_models:
|
# Warmup removed to eliminate startup delays and unwanted model calls
|
||||||
try:
|
|
||||||
self._call_ollama("testing, just say 'hi'", temperature=0.1, disable_thinking=True)
|
|
||||||
except:
|
|
||||||
pass # Warmup failure is non-critical
|
|
||||||
|
|
||||||
self._initialized = True
|
self._initialized = True
|
||||||
|
|
||||||
@ -146,7 +119,7 @@ class LLMSynthesizer:
|
|||||||
self._ensure_initialized()
|
self._ensure_initialized()
|
||||||
return len(self.available_models) > 0
|
return len(self.available_models) > 0
|
||||||
|
|
||||||
def _call_ollama(self, prompt: str, temperature: float = 0.3, disable_thinking: bool = False) -> Optional[str]:
|
def _call_ollama(self, prompt: str, temperature: float = 0.3, disable_thinking: bool = False, use_streaming: bool = False) -> Optional[str]:
|
||||||
"""Make a call to Ollama API with safeguards."""
|
"""Make a call to Ollama API with safeguards."""
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
@ -163,28 +136,55 @@ class LLMSynthesizer:
|
|||||||
|
|
||||||
# Handle thinking mode for Qwen3 models
|
# Handle thinking mode for Qwen3 models
|
||||||
final_prompt = prompt
|
final_prompt = prompt
|
||||||
if not self.enable_thinking or disable_thinking:
|
use_thinking = self.enable_thinking and not disable_thinking
|
||||||
|
|
||||||
|
# For non-thinking mode, add <no_think> tag for Qwen3
|
||||||
|
if not use_thinking and "qwen3" in model_to_use.lower():
|
||||||
if not final_prompt.endswith(" <no_think>"):
|
if not final_prompt.endswith(" <no_think>"):
|
||||||
final_prompt += " <no_think>"
|
final_prompt += " <no_think>"
|
||||||
|
|
||||||
# Get optimal parameters for this model
|
# Get optimal parameters for this model
|
||||||
optimal_params = get_optimal_ollama_parameters(model_to_use)
|
optimal_params = get_optimal_ollama_parameters(model_to_use)
|
||||||
|
|
||||||
|
# Qwen3-specific optimal parameters based on research
|
||||||
|
if "qwen3" in model_to_use.lower():
|
||||||
|
if use_thinking:
|
||||||
|
# Thinking mode: Temperature=0.6, TopP=0.95, TopK=20, PresencePenalty=1.5
|
||||||
|
qwen3_temp = 0.6
|
||||||
|
qwen3_top_p = 0.95
|
||||||
|
qwen3_top_k = 20
|
||||||
|
qwen3_presence = 1.5
|
||||||
|
else:
|
||||||
|
# Non-thinking mode: Temperature=0.7, TopP=0.8, TopK=20, PresencePenalty=1.5
|
||||||
|
qwen3_temp = 0.7
|
||||||
|
qwen3_top_p = 0.8
|
||||||
|
qwen3_top_k = 20
|
||||||
|
qwen3_presence = 1.5
|
||||||
|
else:
|
||||||
|
qwen3_temp = temperature
|
||||||
|
qwen3_top_p = optimal_params.get("top_p", 0.9)
|
||||||
|
qwen3_top_k = optimal_params.get("top_k", 40)
|
||||||
|
qwen3_presence = optimal_params.get("presence_penalty", 1.0)
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": model_to_use,
|
"model": model_to_use,
|
||||||
"prompt": final_prompt,
|
"prompt": final_prompt,
|
||||||
"stream": False,
|
"stream": use_streaming,
|
||||||
"options": {
|
"options": {
|
||||||
"temperature": temperature,
|
"temperature": qwen3_temp,
|
||||||
"top_p": optimal_params.get("top_p", 0.9),
|
"top_p": qwen3_top_p,
|
||||||
"top_k": optimal_params.get("top_k", 40),
|
"top_k": qwen3_top_k,
|
||||||
"num_ctx": optimal_params.get("num_ctx", 32768),
|
"num_ctx": 32000, # Critical: Qwen3 context length (32K token limit)
|
||||||
"num_predict": optimal_params.get("num_predict", 2000),
|
"num_predict": optimal_params.get("num_predict", 2000),
|
||||||
"repeat_penalty": optimal_params.get("repeat_penalty", 1.1),
|
"repeat_penalty": optimal_params.get("repeat_penalty", 1.1),
|
||||||
"presence_penalty": optimal_params.get("presence_penalty", 1.0)
|
"presence_penalty": qwen3_presence
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Handle streaming with early stopping
|
||||||
|
if use_streaming:
|
||||||
|
return self._handle_streaming_with_early_stop(payload, model_to_use, use_thinking, start_time)
|
||||||
|
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
f"{self.ollama_url}/api/generate",
|
f"{self.ollama_url}/api/generate",
|
||||||
json=payload,
|
json=payload,
|
||||||
@ -193,8 +193,19 @@ class LLMSynthesizer:
|
|||||||
|
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
result = response.json()
|
result = response.json()
|
||||||
|
|
||||||
|
# All models use standard response format
|
||||||
|
# Qwen3 thinking tokens are embedded in the response content itself as <think>...</think>
|
||||||
raw_response = result.get('response', '').strip()
|
raw_response = result.get('response', '').strip()
|
||||||
|
|
||||||
|
# Log thinking content for Qwen3 debugging
|
||||||
|
if "qwen3" in model_to_use.lower() and use_thinking and "<think>" in raw_response:
|
||||||
|
thinking_start = raw_response.find("<think>")
|
||||||
|
thinking_end = raw_response.find("</think>")
|
||||||
|
if thinking_start != -1 and thinking_end != -1:
|
||||||
|
thinking_content = raw_response[thinking_start+7:thinking_end]
|
||||||
|
logger.info(f"Qwen3 thinking: {thinking_content[:100]}...")
|
||||||
|
|
||||||
# Apply safeguards to check response quality
|
# Apply safeguards to check response quality
|
||||||
if self.safeguard_detector and raw_response:
|
if self.safeguard_detector and raw_response:
|
||||||
is_valid, issue_type, explanation = self.safeguard_detector.check_response_quality(
|
is_valid, issue_type, explanation = self.safeguard_detector.check_response_quality(
|
||||||
@ -203,8 +214,8 @@ class LLMSynthesizer:
|
|||||||
|
|
||||||
if not is_valid:
|
if not is_valid:
|
||||||
logger.warning(f"Safeguard triggered: {issue_type}")
|
logger.warning(f"Safeguard triggered: {issue_type}")
|
||||||
# Return a safe explanation instead of the problematic response
|
# Preserve original response but add safeguard warning
|
||||||
return self._create_safeguard_response(issue_type, explanation, prompt)
|
return self._create_safeguard_response_with_content(issue_type, explanation, raw_response)
|
||||||
|
|
||||||
return raw_response
|
return raw_response
|
||||||
else:
|
else:
|
||||||
@ -233,6 +244,119 @@ class LLMSynthesizer:
|
|||||||
|
|
||||||
This is normal with smaller AI models and helps ensure you get quality responses."""
|
This is normal with smaller AI models and helps ensure you get quality responses."""
|
||||||
|
|
||||||
|
def _create_safeguard_response_with_content(self, issue_type: str, explanation: str, original_response: str) -> str:
|
||||||
|
"""Create a response that preserves the original content but adds a safeguard warning."""
|
||||||
|
|
||||||
|
# For Qwen3, extract the actual response (after thinking)
|
||||||
|
actual_response = original_response
|
||||||
|
if "<think>" in original_response and "</think>" in original_response:
|
||||||
|
thinking_end = original_response.find("</think>")
|
||||||
|
if thinking_end != -1:
|
||||||
|
actual_response = original_response[thinking_end + 8:].strip()
|
||||||
|
|
||||||
|
# If we have useful content, preserve it with a warning
|
||||||
|
if len(actual_response.strip()) > 20:
|
||||||
|
return f"""⚠️ **Response Quality Warning** ({issue_type})
|
||||||
|
|
||||||
|
{explanation}
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**AI Response (use with caution):**
|
||||||
|
|
||||||
|
{actual_response}
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
💡 **Note**: This response may have quality issues. Consider rephrasing your question or trying exploration mode for better results."""
|
||||||
|
else:
|
||||||
|
# If content is too short or problematic, use the original safeguard response
|
||||||
|
return f"""⚠️ Model Response Issue Detected
|
||||||
|
|
||||||
|
{explanation}
|
||||||
|
|
||||||
|
**What happened:** The AI model encountered a common issue with small language models.
|
||||||
|
|
||||||
|
**Your options:**
|
||||||
|
1. **Try again**: Ask the same question (often resolves itself)
|
||||||
|
2. **Rephrase**: Make your question more specific or break it into parts
|
||||||
|
3. **Use exploration mode**: `rag-mini explore` for complex questions
|
||||||
|
|
||||||
|
This is normal with smaller AI models and helps ensure you get quality responses."""
|
||||||
|
|
||||||
|
def _handle_streaming_with_early_stop(self, payload: dict, model_name: str, use_thinking: bool, start_time: float) -> Optional[str]:
|
||||||
|
"""Handle streaming response with intelligent early stopping."""
|
||||||
|
import json
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(
|
||||||
|
f"{self.ollama_url}/api/generate",
|
||||||
|
json=payload,
|
||||||
|
stream=True,
|
||||||
|
timeout=65
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
logger.error(f"Ollama API error: {response.status_code}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
full_response = ""
|
||||||
|
word_buffer = []
|
||||||
|
repetition_window = 30 # Check last 30 words for repetition (more context)
|
||||||
|
stop_threshold = 0.8 # Stop only if 80% of recent words are repetitive (very permissive)
|
||||||
|
min_response_length = 100 # Don't early stop until we have at least 100 chars
|
||||||
|
|
||||||
|
for line in response.iter_lines():
|
||||||
|
if line:
|
||||||
|
try:
|
||||||
|
chunk_data = json.loads(line.decode('utf-8'))
|
||||||
|
chunk_text = chunk_data.get('response', '')
|
||||||
|
|
||||||
|
if chunk_text:
|
||||||
|
full_response += chunk_text
|
||||||
|
|
||||||
|
# Add words to buffer for repetition detection
|
||||||
|
new_words = chunk_text.split()
|
||||||
|
word_buffer.extend(new_words)
|
||||||
|
|
||||||
|
# Keep only recent words in buffer
|
||||||
|
if len(word_buffer) > repetition_window:
|
||||||
|
word_buffer = word_buffer[-repetition_window:]
|
||||||
|
|
||||||
|
# Check for repetition patterns after we have enough words AND content
|
||||||
|
if len(word_buffer) >= repetition_window and len(full_response) >= min_response_length:
|
||||||
|
unique_words = set(word_buffer)
|
||||||
|
repetition_ratio = 1 - (len(unique_words) / len(word_buffer))
|
||||||
|
|
||||||
|
# Early stop only if repetition is EXTREMELY high (80%+)
|
||||||
|
if repetition_ratio > stop_threshold:
|
||||||
|
logger.info(f"Early stopping due to repetition: {repetition_ratio:.2f}")
|
||||||
|
|
||||||
|
# Add a gentle completion to the response
|
||||||
|
if not full_response.strip().endswith(('.', '!', '?')):
|
||||||
|
full_response += "..."
|
||||||
|
|
||||||
|
# Send stop signal to model (attempt to gracefully stop)
|
||||||
|
try:
|
||||||
|
stop_payload = {"model": model_name, "stop": True}
|
||||||
|
requests.post(f"{self.ollama_url}/api/generate", json=stop_payload, timeout=2)
|
||||||
|
except:
|
||||||
|
pass # If stop fails, we already have partial response
|
||||||
|
|
||||||
|
break
|
||||||
|
|
||||||
|
if chunk_data.get('done', False):
|
||||||
|
break
|
||||||
|
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
return full_response.strip()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Streaming with early stop failed: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
def synthesize_search_results(self, query: str, results: List[Any], project_path: Path) -> SynthesisResult:
|
def synthesize_search_results(self, query: str, results: List[Any], project_path: Path) -> SynthesisResult:
|
||||||
"""Synthesize search results into a coherent summary."""
|
"""Synthesize search results into a coherent summary."""
|
||||||
|
|
||||||
|
|||||||
@ -469,6 +469,31 @@ class OllamaEmbedder:
|
|||||||
"ollama_url": self.base_url if self.mode == "ollama" else None
|
"ollama_url": self.base_url if self.mode == "ollama" else None
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def get_embedding_info(self) -> Dict[str, str]:
|
||||||
|
"""Get human-readable embedding system information for installer."""
|
||||||
|
status = self.get_status()
|
||||||
|
|
||||||
|
if status["mode"] == "ollama":
|
||||||
|
return {
|
||||||
|
"method": f"Ollama ({status['ollama_model']})",
|
||||||
|
"status": "working"
|
||||||
|
}
|
||||||
|
elif status["mode"] == "ml":
|
||||||
|
return {
|
||||||
|
"method": f"ML Fallback ({status['fallback_model']})",
|
||||||
|
"status": "working"
|
||||||
|
}
|
||||||
|
elif status["mode"] == "hash":
|
||||||
|
return {
|
||||||
|
"method": "Hash-based (basic similarity)",
|
||||||
|
"status": "working"
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
"method": "Unknown",
|
||||||
|
"status": "error"
|
||||||
|
}
|
||||||
|
|
||||||
def warmup(self):
|
def warmup(self):
|
||||||
"""Warm up the embedding system with a dummy request."""
|
"""Warm up the embedding system with a dummy request."""
|
||||||
dummy_code = "def hello(): pass"
|
dummy_code = "def hello(): pass"
|
||||||
|
|||||||
@ -59,23 +59,8 @@ class QueryExpander:
|
|||||||
if self._initialized:
|
if self._initialized:
|
||||||
return
|
return
|
||||||
|
|
||||||
# Warm up LLM if enabled and available
|
# Skip warmup - causes startup delays and unwanted model calls
|
||||||
if self.enabled:
|
# Query expansion works fine on first use without warmup
|
||||||
try:
|
|
||||||
model = self._select_expansion_model()
|
|
||||||
if model:
|
|
||||||
requests.post(
|
|
||||||
f"{self.ollama_url}/api/generate",
|
|
||||||
json={
|
|
||||||
"model": model,
|
|
||||||
"prompt": "testing, just say 'hi' <no_think>",
|
|
||||||
"stream": False,
|
|
||||||
"options": {"temperature": 0.1, "max_tokens": 5}
|
|
||||||
},
|
|
||||||
timeout=5
|
|
||||||
)
|
|
||||||
except:
|
|
||||||
pass # Warmup failure is non-critical
|
|
||||||
|
|
||||||
self._initialized = True
|
self._initialized = True
|
||||||
|
|
||||||
@ -183,10 +168,10 @@ Expanded query:"""
|
|||||||
data = response.json()
|
data = response.json()
|
||||||
available = [model['name'] for model in data.get('models', [])]
|
available = [model['name'] for model in data.get('models', [])]
|
||||||
|
|
||||||
# Prefer ultra-fast, efficient models for query expansion (CPU-friendly)
|
# Use same model rankings as main synthesizer for consistency
|
||||||
expansion_preferences = [
|
expansion_preferences = [
|
||||||
"qwen3:0.6b", "qwen3:1.7b", "qwen2.5:1.5b",
|
"qwen3:1.7b", "qwen3:0.6b", "qwen3:4b", "llama3.2:1b",
|
||||||
"llama3.2:1b", "gemma2:2b", "llama3.2:3b"
|
"qwen2.5:1.5b", "qwen3:3b", "qwen2.5-coder:1.5b"
|
||||||
]
|
]
|
||||||
|
|
||||||
for preferred in expansion_preferences:
|
for preferred in expansion_preferences:
|
||||||
|
|||||||
@ -8,13 +8,20 @@ from pathlib import Path
|
|||||||
from typing import List, Dict, Any, Optional, Tuple
|
from typing import List, Dict, Any, Optional, Tuple
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import lancedb
|
|
||||||
from rich.console import Console
|
from rich.console import Console
|
||||||
from rich.table import Table
|
from rich.table import Table
|
||||||
from rich.syntax import Syntax
|
from rich.syntax import Syntax
|
||||||
from rank_bm25 import BM25Okapi
|
from rank_bm25 import BM25Okapi
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
|
# Optional LanceDB import
|
||||||
|
try:
|
||||||
|
import lancedb
|
||||||
|
LANCEDB_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
lancedb = None
|
||||||
|
LANCEDB_AVAILABLE = False
|
||||||
|
|
||||||
from .ollama_embeddings import OllamaEmbedder as CodeEmbedder
|
from .ollama_embeddings import OllamaEmbedder as CodeEmbedder
|
||||||
from .path_handler import display_path
|
from .path_handler import display_path
|
||||||
from .query_expander import QueryExpander
|
from .query_expander import QueryExpander
|
||||||
@ -115,6 +122,14 @@ class CodeSearcher:
|
|||||||
|
|
||||||
def _connect(self):
|
def _connect(self):
|
||||||
"""Connect to the LanceDB database."""
|
"""Connect to the LanceDB database."""
|
||||||
|
if not LANCEDB_AVAILABLE:
|
||||||
|
print("❌ LanceDB Not Available")
|
||||||
|
print(" LanceDB is required for search functionality")
|
||||||
|
print(" Install it with: pip install lancedb pyarrow")
|
||||||
|
print(" For basic Ollama functionality, use hash-based search instead")
|
||||||
|
print()
|
||||||
|
raise ImportError("LanceDB dependency is required for search. Install with: pip install lancedb pyarrow")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if not self.rag_dir.exists():
|
if not self.rag_dir.exists():
|
||||||
print("🗃️ No Search Index Found")
|
print("🗃️ No Search Index Found")
|
||||||
|
|||||||
142
mini_rag/venv_checker.py
Normal file
142
mini_rag/venv_checker.py
Normal file
@ -0,0 +1,142 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Virtual Environment Checker
|
||||||
|
Ensures scripts run in proper Python virtual environment for consistency and safety.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import sysconfig
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
def is_in_virtualenv() -> bool:
|
||||||
|
"""Check if we're running in a virtual environment."""
|
||||||
|
# Check for virtual environment indicators
|
||||||
|
return (
|
||||||
|
hasattr(sys, 'real_prefix') or # virtualenv
|
||||||
|
(hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix) or # venv/pyvenv
|
||||||
|
os.environ.get('VIRTUAL_ENV') is not None # Environment variable
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_expected_venv_path() -> Path:
|
||||||
|
"""Get the expected virtual environment path for this project."""
|
||||||
|
# Assume .venv in the same directory as the script
|
||||||
|
script_dir = Path(__file__).parent.parent
|
||||||
|
return script_dir / '.venv'
|
||||||
|
|
||||||
|
def check_correct_venv() -> tuple[bool, str]:
|
||||||
|
"""
|
||||||
|
Check if we're in the correct virtual environment.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(is_correct, message)
|
||||||
|
"""
|
||||||
|
if not is_in_virtualenv():
|
||||||
|
return False, "not in virtual environment"
|
||||||
|
|
||||||
|
expected_venv = get_expected_venv_path()
|
||||||
|
if not expected_venv.exists():
|
||||||
|
return False, "expected virtual environment not found"
|
||||||
|
|
||||||
|
current_venv = os.environ.get('VIRTUAL_ENV')
|
||||||
|
if current_venv:
|
||||||
|
current_venv_path = Path(current_venv).resolve()
|
||||||
|
expected_venv_path = expected_venv.resolve()
|
||||||
|
|
||||||
|
if current_venv_path != expected_venv_path:
|
||||||
|
return False, f"wrong virtual environment (using {current_venv_path}, expected {expected_venv_path})"
|
||||||
|
|
||||||
|
return True, "correct virtual environment"
|
||||||
|
|
||||||
|
def show_venv_warning(script_name: str = "script") -> None:
|
||||||
|
"""Show virtual environment warning with helpful instructions."""
|
||||||
|
expected_venv = get_expected_venv_path()
|
||||||
|
|
||||||
|
print("⚠️ VIRTUAL ENVIRONMENT WARNING")
|
||||||
|
print("=" * 50)
|
||||||
|
print()
|
||||||
|
print(f"This {script_name} should be run in a Python virtual environment for:")
|
||||||
|
print(" • Consistent dependencies")
|
||||||
|
print(" • Isolated package versions")
|
||||||
|
print(" • Proper security isolation")
|
||||||
|
print(" • Reliable functionality")
|
||||||
|
print()
|
||||||
|
|
||||||
|
if expected_venv.exists():
|
||||||
|
print("✅ Virtual environment found!")
|
||||||
|
print(f" Location: {expected_venv}")
|
||||||
|
print()
|
||||||
|
print("🚀 To activate it:")
|
||||||
|
print(f" source {expected_venv}/bin/activate")
|
||||||
|
print(f" {script_name}")
|
||||||
|
print()
|
||||||
|
print("🔄 Or run with activation:")
|
||||||
|
print(f" source {expected_venv}/bin/activate && {script_name}")
|
||||||
|
else:
|
||||||
|
print("❌ No virtual environment found!")
|
||||||
|
print()
|
||||||
|
print("🛠️ Create one first:")
|
||||||
|
print(" ./install_mini_rag.sh")
|
||||||
|
print()
|
||||||
|
print("📚 Or manually:")
|
||||||
|
print(f" python3 -m venv {expected_venv}")
|
||||||
|
print(f" source {expected_venv}/bin/activate")
|
||||||
|
print(" pip install -r requirements.txt")
|
||||||
|
|
||||||
|
print()
|
||||||
|
print("💡 Why this matters:")
|
||||||
|
print(" Without a virtual environment, you may experience:")
|
||||||
|
print(" • Import errors from missing packages")
|
||||||
|
print(" • Version conflicts with system Python")
|
||||||
|
print(" • Inconsistent behavior across systems")
|
||||||
|
print(" • Potential system-wide package pollution")
|
||||||
|
print()
|
||||||
|
|
||||||
|
def check_and_warn_venv(script_name: str = "script", force_exit: bool = False) -> bool:
|
||||||
|
"""
|
||||||
|
Check virtual environment and warn if needed.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
script_name: Name of the script for user-friendly messages
|
||||||
|
force_exit: Whether to exit if not in correct venv
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if in correct venv, False otherwise
|
||||||
|
"""
|
||||||
|
is_correct, message = check_correct_venv()
|
||||||
|
|
||||||
|
if not is_correct:
|
||||||
|
show_venv_warning(script_name)
|
||||||
|
|
||||||
|
if force_exit:
|
||||||
|
print(f"⛔ Exiting {script_name} for your safety.")
|
||||||
|
print(" Please activate the virtual environment and try again.")
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
print(f"⚠️ Continuing anyway, but {script_name} may not work correctly...")
|
||||||
|
print()
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def require_venv(script_name: str = "script") -> None:
|
||||||
|
"""Require virtual environment or exit."""
|
||||||
|
check_and_warn_venv(script_name, force_exit=True)
|
||||||
|
|
||||||
|
# Quick test function
|
||||||
|
def main():
|
||||||
|
"""Test the virtual environment checker."""
|
||||||
|
print("🧪 Virtual Environment Checker Test")
|
||||||
|
print("=" * 40)
|
||||||
|
|
||||||
|
print(f"In virtual environment: {is_in_virtualenv()}")
|
||||||
|
print(f"Expected venv path: {get_expected_venv_path()}")
|
||||||
|
|
||||||
|
is_correct, message = check_correct_venv()
|
||||||
|
print(f"Correct venv: {is_correct} ({message})")
|
||||||
|
|
||||||
|
if not is_correct:
|
||||||
|
show_venv_warning("test script")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
9
rag-mini
9
rag-mini
@ -112,6 +112,7 @@ show_help() {
|
|||||||
echo -e "${BOLD}Main Commands:${NC}"
|
echo -e "${BOLD}Main Commands:${NC}"
|
||||||
echo " rag-mini index <project_path> # Index project for search"
|
echo " rag-mini index <project_path> # Index project for search"
|
||||||
echo " rag-mini search <project_path> <query> # Search indexed project"
|
echo " rag-mini search <project_path> <query> # Search indexed project"
|
||||||
|
echo " rag-mini explore <project_path> # Interactive exploration with AI"
|
||||||
echo " rag-mini status <project_path> # Show project status"
|
echo " rag-mini status <project_path> # Show project status"
|
||||||
echo ""
|
echo ""
|
||||||
echo -e "${BOLD}Interfaces:${NC}"
|
echo -e "${BOLD}Interfaces:${NC}"
|
||||||
@ -324,11 +325,11 @@ main() {
|
|||||||
"server")
|
"server")
|
||||||
# Start server mode
|
# Start server mode
|
||||||
shift
|
shift
|
||||||
exec "$PYTHON" "$SCRIPT_DIR/claude_rag/server.py" "$@"
|
exec "$PYTHON" "$SCRIPT_DIR/mini_rag/fast_server.py" "$@"
|
||||||
;;
|
;;
|
||||||
"index"|"search"|"status")
|
"index"|"search"|"explore"|"status")
|
||||||
# Direct CLI commands
|
# Direct CLI commands - call Python script
|
||||||
exec "$SCRIPT_DIR/rag-mini" "$@"
|
exec "$PYTHON" "$SCRIPT_DIR/rag-mini.py" "$@"
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
# Unknown command - show help
|
# Unknown command - show help
|
||||||
|
|||||||
26
rag-mini.py
26
rag-mini.py
@ -118,7 +118,7 @@ def index_project(project_path: Path, force: bool = False):
|
|||||||
print(" Or see: docs/TROUBLESHOOTING.md")
|
print(" Or see: docs/TROUBLESHOOTING.md")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
def search_project(project_path: Path, query: str, limit: int = 10, synthesize: bool = False):
|
def search_project(project_path: Path, query: str, top_k: int = 10, synthesize: bool = False):
|
||||||
"""Search a project directory."""
|
"""Search a project directory."""
|
||||||
try:
|
try:
|
||||||
# Check if indexed first
|
# Check if indexed first
|
||||||
@ -130,7 +130,7 @@ def search_project(project_path: Path, query: str, limit: int = 10, synthesize:
|
|||||||
|
|
||||||
print(f"🔍 Searching \"{query}\" in {project_path.name}")
|
print(f"🔍 Searching \"{query}\" in {project_path.name}")
|
||||||
searcher = CodeSearcher(project_path)
|
searcher = CodeSearcher(project_path)
|
||||||
results = searcher.search(query, top_k=limit)
|
results = searcher.search(query, top_k=top_k)
|
||||||
|
|
||||||
if not results:
|
if not results:
|
||||||
print("❌ No results found")
|
print("❌ No results found")
|
||||||
@ -143,7 +143,7 @@ def search_project(project_path: Path, query: str, limit: int = 10, synthesize:
|
|||||||
print()
|
print()
|
||||||
print("⚙️ Configuration adjustments:")
|
print("⚙️ Configuration adjustments:")
|
||||||
print(f" • Lower threshold: ./rag-mini search {project_path} \"{query}\" --threshold 0.05")
|
print(f" • Lower threshold: ./rag-mini search {project_path} \"{query}\" --threshold 0.05")
|
||||||
print(" • More results: add --limit 20")
|
print(" • More results: add --top-k 20")
|
||||||
print()
|
print()
|
||||||
print("📚 Need help? See: docs/TROUBLESHOOTING.md")
|
print("📚 Need help? See: docs/TROUBLESHOOTING.md")
|
||||||
return
|
return
|
||||||
@ -310,14 +310,14 @@ def status_check(project_path: Path):
|
|||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
def explore_interactive(project_path: Path):
|
def explore_interactive(project_path: Path):
|
||||||
"""Interactive exploration mode with thinking and context memory."""
|
"""Interactive exploration mode with thinking and context memory for any documents."""
|
||||||
try:
|
try:
|
||||||
explorer = CodeExplorer(project_path)
|
explorer = CodeExplorer(project_path)
|
||||||
|
|
||||||
if not explorer.start_exploration_session():
|
if not explorer.start_exploration_session():
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
print("\n🤔 Ask your first question about the codebase:")
|
print(f"\n🤔 Ask your first question about {project_path.name}:")
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
@ -357,7 +357,8 @@ def explore_interactive(project_path: Path):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# Process the question
|
# Process the question
|
||||||
print("\n🔍 Analyzing...")
|
print(f"\n🔍 Searching {project_path.name}...")
|
||||||
|
print("🧠 Thinking with AI model...")
|
||||||
response = explorer.explore_question(question)
|
response = explorer.explore_question(question)
|
||||||
|
|
||||||
if response:
|
if response:
|
||||||
@ -382,6 +383,13 @@ def explore_interactive(project_path: Path):
|
|||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main CLI interface."""
|
"""Main CLI interface."""
|
||||||
|
# Check virtual environment
|
||||||
|
try:
|
||||||
|
from mini_rag.venv_checker import check_and_warn_venv
|
||||||
|
check_and_warn_venv("rag-mini.py", force_exit=False)
|
||||||
|
except ImportError:
|
||||||
|
pass # If venv checker can't be imported, continue anyway
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description="FSS-Mini-RAG - Lightweight semantic code search",
|
description="FSS-Mini-RAG - Lightweight semantic code search",
|
||||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
@ -403,8 +411,8 @@ Examples:
|
|||||||
help='Search query (for search command)')
|
help='Search query (for search command)')
|
||||||
parser.add_argument('--force', action='store_true',
|
parser.add_argument('--force', action='store_true',
|
||||||
help='Force reindex all files')
|
help='Force reindex all files')
|
||||||
parser.add_argument('--limit', type=int, default=10,
|
parser.add_argument('--top-k', '--limit', type=int, default=10, dest='top_k',
|
||||||
help='Maximum number of search results')
|
help='Maximum number of search results (top-k)')
|
||||||
parser.add_argument('--verbose', '-v', action='store_true',
|
parser.add_argument('--verbose', '-v', action='store_true',
|
||||||
help='Enable verbose logging')
|
help='Enable verbose logging')
|
||||||
parser.add_argument('--synthesize', '-s', action='store_true',
|
parser.add_argument('--synthesize', '-s', action='store_true',
|
||||||
@ -432,7 +440,7 @@ Examples:
|
|||||||
if not args.query:
|
if not args.query:
|
||||||
print("❌ Search query required")
|
print("❌ Search query required")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
search_project(args.project_path, args.query, args.limit, args.synthesize)
|
search_project(args.project_path, args.query, args.top_k, args.synthesize)
|
||||||
elif args.command == 'explore':
|
elif args.command == 'explore':
|
||||||
explore_interactive(args.project_path)
|
explore_interactive(args.project_path)
|
||||||
elif args.command == 'status':
|
elif args.command == 'status':
|
||||||
|
|||||||
862
rag-tui.py
862
rag-tui.py
File diff suppressed because it is too large
Load Diff
230
test_fixes.py
Normal file
230
test_fixes.py
Normal file
@ -0,0 +1,230 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Quick test script to verify our key fixes without heavy dependencies.
|
||||||
|
|
||||||
|
⚠️ IMPORTANT: This test requires the virtual environment to be activated:
|
||||||
|
source .venv/bin/activate
|
||||||
|
python test_fixes.py
|
||||||
|
|
||||||
|
Or run directly with venv:
|
||||||
|
source .venv/bin/activate && python test_fixes.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Check if virtual environment is activated
|
||||||
|
def check_venv():
|
||||||
|
if 'VIRTUAL_ENV' not in os.environ:
|
||||||
|
print("⚠️ WARNING: Virtual environment not detected!")
|
||||||
|
print(" This test requires the virtual environment to be activated.")
|
||||||
|
print(" Run: source .venv/bin/activate && python test_fixes.py")
|
||||||
|
print(" Continuing anyway...\n")
|
||||||
|
|
||||||
|
check_venv()
|
||||||
|
|
||||||
|
# Add current directory to Python path
|
||||||
|
sys.path.insert(0, '.')
|
||||||
|
|
||||||
|
def test_config_model_rankings():
|
||||||
|
"""Test that model rankings are properly configured."""
|
||||||
|
print("=" * 60)
|
||||||
|
print("TESTING CONFIG AND MODEL RANKINGS")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Test config loading without heavy dependencies
|
||||||
|
from mini_rag.config import ConfigManager, LLMConfig
|
||||||
|
|
||||||
|
# Create a temporary directory for testing
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
config_manager = ConfigManager(tmpdir)
|
||||||
|
config = config_manager.load_config()
|
||||||
|
|
||||||
|
print("✓ Config loads successfully")
|
||||||
|
|
||||||
|
# Check LLM config and model rankings
|
||||||
|
if hasattr(config, 'llm'):
|
||||||
|
llm_config = config.llm
|
||||||
|
print(f"✓ LLM config found: {type(llm_config)}")
|
||||||
|
|
||||||
|
if hasattr(llm_config, 'model_rankings'):
|
||||||
|
rankings = llm_config.model_rankings
|
||||||
|
print(f"✓ Model rankings: {rankings}")
|
||||||
|
|
||||||
|
if rankings and rankings[0] == "qwen3:1.7b":
|
||||||
|
print("✓ qwen3:1.7b is FIRST priority - CORRECT!")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
print(f"✗ WRONG: First model is {rankings[0] if rankings else 'None'}, should be qwen3:1.7b")
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
print("✗ Model rankings not found in LLM config")
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
print("✗ LLM config not found")
|
||||||
|
return False
|
||||||
|
|
||||||
|
except ImportError as e:
|
||||||
|
print(f"✗ Import error: {e}")
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
print(f"✗ Error: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def test_context_length_fix():
|
||||||
|
"""Test that context length is correctly set to 32K."""
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("TESTING CONTEXT LENGTH FIXES")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Read the synthesizer file and check for 32000
|
||||||
|
with open('mini_rag/llm_synthesizer.py', 'r') as f:
|
||||||
|
synthesizer_content = f.read()
|
||||||
|
|
||||||
|
if '"num_ctx": 32000' in synthesizer_content:
|
||||||
|
print("✓ LLM Synthesizer: num_ctx is correctly set to 32000")
|
||||||
|
elif '"num_ctx": 80000' in synthesizer_content:
|
||||||
|
print("✗ LLM Synthesizer: num_ctx is still 80000 - NEEDS FIX")
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
print("? LLM Synthesizer: num_ctx setting not found clearly")
|
||||||
|
|
||||||
|
# Read the safeguards file and check for 32000
|
||||||
|
with open('mini_rag/llm_safeguards.py', 'r') as f:
|
||||||
|
safeguards_content = f.read()
|
||||||
|
|
||||||
|
if 'context_window: int = 32000' in safeguards_content:
|
||||||
|
print("✓ Safeguards: context_window is correctly set to 32000")
|
||||||
|
return True
|
||||||
|
elif 'context_window: int = 80000' in safeguards_content:
|
||||||
|
print("✗ Safeguards: context_window is still 80000 - NEEDS FIX")
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
print("? Safeguards: context_window setting not found clearly")
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"✗ Error checking context length: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def test_safeguard_preservation():
|
||||||
|
"""Test that safeguards preserve content instead of dropping it."""
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("TESTING SAFEGUARD CONTENT PRESERVATION")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Read the synthesizer file and check for the preservation method
|
||||||
|
with open('mini_rag/llm_synthesizer.py', 'r') as f:
|
||||||
|
synthesizer_content = f.read()
|
||||||
|
|
||||||
|
if '_create_safeguard_response_with_content' in synthesizer_content:
|
||||||
|
print("✓ Safeguard content preservation method exists")
|
||||||
|
else:
|
||||||
|
print("✗ Safeguard content preservation method missing")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Check for the specific preservation logic
|
||||||
|
if 'AI Response (use with caution):' in synthesizer_content:
|
||||||
|
print("✓ Content preservation warning format found")
|
||||||
|
else:
|
||||||
|
print("✗ Content preservation warning format missing")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Check that it's being called instead of dropping content
|
||||||
|
if 'return self._create_safeguard_response_with_content(issue_type, explanation, raw_response)' in synthesizer_content:
|
||||||
|
print("✓ Preservation method is called when safeguards trigger")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
print("✗ Preservation method not called properly")
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"✗ Error checking safeguard preservation: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def test_import_fixes():
|
||||||
|
"""Test that import statements are fixed from claude_rag to mini_rag."""
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("TESTING IMPORT STATEMENT FIXES")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
test_files = [
|
||||||
|
'tests/test_rag_integration.py',
|
||||||
|
'tests/01_basic_integration_test.py',
|
||||||
|
'tests/test_hybrid_search.py',
|
||||||
|
'tests/test_context_retrieval.py'
|
||||||
|
]
|
||||||
|
|
||||||
|
all_good = True
|
||||||
|
|
||||||
|
for test_file in test_files:
|
||||||
|
if Path(test_file).exists():
|
||||||
|
try:
|
||||||
|
with open(test_file, 'r') as f:
|
||||||
|
content = f.read()
|
||||||
|
|
||||||
|
if 'claude_rag' in content:
|
||||||
|
print(f"✗ {test_file}: Still contains 'claude_rag' imports")
|
||||||
|
all_good = False
|
||||||
|
elif 'mini_rag' in content:
|
||||||
|
print(f"✓ {test_file}: Uses correct 'mini_rag' imports")
|
||||||
|
else:
|
||||||
|
print(f"? {test_file}: No rag imports found")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"✗ Error reading {test_file}: {e}")
|
||||||
|
all_good = False
|
||||||
|
else:
|
||||||
|
print(f"? {test_file}: File not found")
|
||||||
|
|
||||||
|
return all_good
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Run all tests."""
|
||||||
|
print("FSS-Mini-RAG Fix Verification Tests")
|
||||||
|
print("Testing all the critical fixes...")
|
||||||
|
|
||||||
|
tests = [
|
||||||
|
("Model Rankings", test_config_model_rankings),
|
||||||
|
("Context Length", test_context_length_fix),
|
||||||
|
("Safeguard Preservation", test_safeguard_preservation),
|
||||||
|
("Import Fixes", test_import_fixes)
|
||||||
|
]
|
||||||
|
|
||||||
|
results = {}
|
||||||
|
|
||||||
|
for test_name, test_func in tests:
|
||||||
|
try:
|
||||||
|
results[test_name] = test_func()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"✗ {test_name} test crashed: {e}")
|
||||||
|
results[test_name] = False
|
||||||
|
|
||||||
|
# Summary
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("TEST SUMMARY")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
passed = sum(1 for result in results.values() if result)
|
||||||
|
total = len(results)
|
||||||
|
|
||||||
|
for test_name, result in results.items():
|
||||||
|
status = "✓ PASS" if result else "✗ FAIL"
|
||||||
|
print(f"{status} {test_name}")
|
||||||
|
|
||||||
|
print(f"\nOverall: {passed}/{total} tests passed")
|
||||||
|
|
||||||
|
if passed == total:
|
||||||
|
print("🎉 ALL TESTS PASSED - System should be working properly!")
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
print("❌ SOME TESTS FAILED - System needs more fixes!")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
@ -1,5 +1,12 @@
|
|||||||
"""
|
"""
|
||||||
Comprehensive demo of the RAG system showing all integrated features.
|
Comprehensive demo of the RAG system showing all integrated features.
|
||||||
|
|
||||||
|
⚠️ IMPORTANT: This test requires the virtual environment to be activated:
|
||||||
|
source .venv/bin/activate
|
||||||
|
PYTHONPATH=. python tests/01_basic_integration_test.py
|
||||||
|
|
||||||
|
Or run directly with venv:
|
||||||
|
source .venv/bin/activate && PYTHONPATH=. python tests/01_basic_integration_test.py
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
@ -7,6 +14,16 @@ import sys
|
|||||||
import tempfile
|
import tempfile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Check if virtual environment is activated
|
||||||
|
def check_venv():
|
||||||
|
if 'VIRTUAL_ENV' not in os.environ:
|
||||||
|
print("⚠️ WARNING: Virtual environment not detected!")
|
||||||
|
print(" This test requires the virtual environment to be activated.")
|
||||||
|
print(" Run: source .venv/bin/activate && PYTHONPATH=. python tests/01_basic_integration_test.py")
|
||||||
|
print(" Continuing anyway...\n")
|
||||||
|
|
||||||
|
check_venv()
|
||||||
|
|
||||||
# Fix Windows encoding
|
# Fix Windows encoding
|
||||||
if sys.platform == 'win32':
|
if sys.platform == 'win32':
|
||||||
os.environ['PYTHONUTF8'] = '1'
|
os.environ['PYTHONUTF8'] = '1'
|
||||||
@ -15,7 +32,7 @@ if sys.platform == 'win32':
|
|||||||
from mini_rag.chunker import CodeChunker
|
from mini_rag.chunker import CodeChunker
|
||||||
from mini_rag.indexer import ProjectIndexer
|
from mini_rag.indexer import ProjectIndexer
|
||||||
from mini_rag.search import CodeSearcher
|
from mini_rag.search import CodeSearcher
|
||||||
from mini_rag.embeddings import CodeEmbedder
|
from mini_rag.ollama_embeddings import OllamaEmbedder as CodeEmbedder
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
print("=" * 60)
|
print("=" * 60)
|
||||||
@ -189,17 +206,17 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
# Test different search types
|
# Test different search types
|
||||||
print("\n a) Semantic search for 'calculate average':")
|
print("\n a) Semantic search for 'calculate average':")
|
||||||
results = searcher.search("calculate average", limit=3)
|
results = searcher.search("calculate average", top_k=3)
|
||||||
for i, result in enumerate(results, 1):
|
for i, result in enumerate(results, 1):
|
||||||
print(f" {i}. {result.chunk_type} '{result.name}' in {result.file_path} (score: {result.score:.3f})")
|
print(f" {i}. {result.chunk_type} '{result.name}' in {result.file_path} (score: {result.score:.3f})")
|
||||||
|
|
||||||
print("\n b) BM25-weighted search for 'divide zero':")
|
print("\n b) BM25-weighted search for 'divide zero':")
|
||||||
results = searcher.search("divide zero", limit=3, semantic_weight=0.2, bm25_weight=0.8)
|
results = searcher.search("divide zero", top_k=3, semantic_weight=0.2, bm25_weight=0.8)
|
||||||
for i, result in enumerate(results, 1):
|
for i, result in enumerate(results, 1):
|
||||||
print(f" {i}. {result.chunk_type} '{result.name}' in {result.file_path} (score: {result.score:.3f})")
|
print(f" {i}. {result.chunk_type} '{result.name}' in {result.file_path} (score: {result.score:.3f})")
|
||||||
|
|
||||||
print("\n c) Search with context for 'test addition':")
|
print("\n c) Search with context for 'test addition':")
|
||||||
results = searcher.search("test addition", limit=2, include_context=True)
|
results = searcher.search("test addition", top_k=2, include_context=True)
|
||||||
for i, result in enumerate(results, 1):
|
for i, result in enumerate(results, 1):
|
||||||
print(f" {i}. {result.chunk_type} '{result.name}'")
|
print(f" {i}. {result.chunk_type} '{result.name}'")
|
||||||
if result.parent_chunk:
|
if result.parent_chunk:
|
||||||
|
|||||||
@ -37,25 +37,25 @@ def demo_search(project_path: Path):
|
|||||||
'title': 'Keyword-Heavy Search',
|
'title': 'Keyword-Heavy Search',
|
||||||
'query': 'BM25Okapi rank_bm25 search scoring',
|
'query': 'BM25Okapi rank_bm25 search scoring',
|
||||||
'description': 'This query has specific technical keywords that BM25 excels at finding',
|
'description': 'This query has specific technical keywords that BM25 excels at finding',
|
||||||
'limit': 5
|
'top_k': 5
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'title': 'Natural Language Query',
|
'title': 'Natural Language Query',
|
||||||
'query': 'how to build search index from database chunks',
|
'query': 'how to build search index from database chunks',
|
||||||
'description': 'This semantic query benefits from transformer embeddings understanding intent',
|
'description': 'This semantic query benefits from transformer embeddings understanding intent',
|
||||||
'limit': 5
|
'top_k': 5
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'title': 'Mixed Technical Query',
|
'title': 'Mixed Technical Query',
|
||||||
'query': 'vector embeddings for semantic code search with transformers',
|
'query': 'vector embeddings for semantic code search with transformers',
|
||||||
'description': 'This hybrid query combines technical terms with conceptual understanding',
|
'description': 'This hybrid query combines technical terms with conceptual understanding',
|
||||||
'limit': 5
|
'top_k': 5
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'title': 'Function Search',
|
'title': 'Function Search',
|
||||||
'query': 'search method implementation with filters',
|
'query': 'search method implementation with filters',
|
||||||
'description': 'Looking for specific function implementations',
|
'description': 'Looking for specific function implementations',
|
||||||
'limit': 5
|
'top_k': 5
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -67,7 +67,7 @@ def demo_search(project_path: Path):
|
|||||||
# Run search with hybrid mode
|
# Run search with hybrid mode
|
||||||
results = searcher.search(
|
results = searcher.search(
|
||||||
query=demo['query'],
|
query=demo['query'],
|
||||||
limit=demo['limit'],
|
top_k=demo['top_k'],
|
||||||
semantic_weight=0.7,
|
semantic_weight=0.7,
|
||||||
bm25_weight=0.3
|
bm25_weight=0.3
|
||||||
)
|
)
|
||||||
|
|||||||
@ -244,7 +244,7 @@ def compute_median(numbers):
|
|||||||
searcher = CodeSearcher(project_path)
|
searcher = CodeSearcher(project_path)
|
||||||
|
|
||||||
# Test BM25 integration
|
# Test BM25 integration
|
||||||
results = searcher.search("multiply numbers", limit=5,
|
results = searcher.search("multiply numbers", top_k=5,
|
||||||
semantic_weight=0.3, bm25_weight=0.7)
|
semantic_weight=0.3, bm25_weight=0.7)
|
||||||
|
|
||||||
if results:
|
if results:
|
||||||
@ -283,7 +283,7 @@ def compute_median(numbers):
|
|||||||
print(f" - No parent chunk")
|
print(f" - No parent chunk")
|
||||||
|
|
||||||
# Test include_context in search
|
# Test include_context in search
|
||||||
results_with_context = searcher.search("add", include_context=True, limit=2)
|
results_with_context = searcher.search("add", include_context=True, top_k=2)
|
||||||
if results_with_context:
|
if results_with_context:
|
||||||
print(f" Found {len(results_with_context)} results with context")
|
print(f" Found {len(results_with_context)} results with context")
|
||||||
for r in results_with_context:
|
for r in results_with_context:
|
||||||
|
|||||||
@ -1,11 +1,29 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""
|
"""
|
||||||
Test script for adjacent chunk retrieval functionality.
|
Test script for adjacent chunk retrieval functionality.
|
||||||
|
|
||||||
|
⚠️ IMPORTANT: This test requires the virtual environment to be activated:
|
||||||
|
source .venv/bin/activate
|
||||||
|
PYTHONPATH=. python tests/test_context_retrieval.py
|
||||||
|
|
||||||
|
Or run directly with venv:
|
||||||
|
source .venv/bin/activate && PYTHONPATH=. python tests/test_context_retrieval.py
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from mini_rag.search import CodeSearcher
|
from mini_rag.search import CodeSearcher
|
||||||
from mini_rag.embeddings import CodeEmbedder
|
from mini_rag.ollama_embeddings import OllamaEmbedder as CodeEmbedder
|
||||||
|
|
||||||
|
# Check if virtual environment is activated
|
||||||
|
def check_venv():
|
||||||
|
if 'VIRTUAL_ENV' not in os.environ:
|
||||||
|
print("⚠️ WARNING: Virtual environment not detected!")
|
||||||
|
print(" This test requires the virtual environment to be activated.")
|
||||||
|
print(" Run: source .venv/bin/activate && PYTHONPATH=. python tests/test_context_retrieval.py")
|
||||||
|
print(" Continuing anyway...\n")
|
||||||
|
|
||||||
|
check_venv()
|
||||||
|
|
||||||
def test_context_retrieval():
|
def test_context_retrieval():
|
||||||
"""Test the new context retrieval functionality."""
|
"""Test the new context retrieval functionality."""
|
||||||
@ -20,7 +38,7 @@ def test_context_retrieval():
|
|||||||
|
|
||||||
# Test 1: Search without context
|
# Test 1: Search without context
|
||||||
print("\n1. Search WITHOUT context:")
|
print("\n1. Search WITHOUT context:")
|
||||||
results = searcher.search("chunk metadata", limit=3, include_context=False)
|
results = searcher.search("chunk metadata", top_k=3, include_context=False)
|
||||||
for i, result in enumerate(results, 1):
|
for i, result in enumerate(results, 1):
|
||||||
print(f" Result {i}: {result.file_path}:{result.start_line}-{result.end_line}")
|
print(f" Result {i}: {result.file_path}:{result.start_line}-{result.end_line}")
|
||||||
print(f" Type: {result.chunk_type}, Name: {result.name}")
|
print(f" Type: {result.chunk_type}, Name: {result.name}")
|
||||||
@ -30,7 +48,7 @@ def test_context_retrieval():
|
|||||||
|
|
||||||
# Test 2: Search with context
|
# Test 2: Search with context
|
||||||
print("\n2. Search WITH context:")
|
print("\n2. Search WITH context:")
|
||||||
results = searcher.search("chunk metadata", limit=3, include_context=True)
|
results = searcher.search("chunk metadata", top_k=3, include_context=True)
|
||||||
for i, result in enumerate(results, 1):
|
for i, result in enumerate(results, 1):
|
||||||
print(f" Result {i}: {result.file_path}:{result.start_line}-{result.end_line}")
|
print(f" Result {i}: {result.file_path}:{result.start_line}-{result.end_line}")
|
||||||
print(f" Type: {result.chunk_type}, Name: {result.name}")
|
print(f" Type: {result.chunk_type}, Name: {result.name}")
|
||||||
|
|||||||
@ -2,6 +2,13 @@
|
|||||||
"""
|
"""
|
||||||
Test and benchmark the hybrid BM25 + semantic search system.
|
Test and benchmark the hybrid BM25 + semantic search system.
|
||||||
Shows performance metrics and search quality comparisons.
|
Shows performance metrics and search quality comparisons.
|
||||||
|
|
||||||
|
⚠️ IMPORTANT: This test requires the virtual environment to be activated:
|
||||||
|
source .venv/bin/activate
|
||||||
|
PYTHONPATH=. python tests/test_hybrid_search.py
|
||||||
|
|
||||||
|
Or run directly with venv:
|
||||||
|
source .venv/bin/activate && PYTHONPATH=. python tests/test_hybrid_search.py
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import time
|
import time
|
||||||
@ -16,7 +23,7 @@ from rich.syntax import Syntax
|
|||||||
from rich.progress import track
|
from rich.progress import track
|
||||||
|
|
||||||
from mini_rag.search import CodeSearcher, SearchResult
|
from mini_rag.search import CodeSearcher, SearchResult
|
||||||
from mini_rag.embeddings import CodeEmbedder
|
from mini_rag.ollama_embeddings import OllamaEmbedder as CodeEmbedder
|
||||||
|
|
||||||
console = Console()
|
console = Console()
|
||||||
|
|
||||||
@ -40,7 +47,7 @@ class SearchTester:
|
|||||||
if 'error' not in stats:
|
if 'error' not in stats:
|
||||||
console.print(f"[dim]Index contains {stats['total_chunks']} chunks from {stats['unique_files']} files[/dim]\n")
|
console.print(f"[dim]Index contains {stats['total_chunks']} chunks from {stats['unique_files']} files[/dim]\n")
|
||||||
|
|
||||||
def run_query(self, query: str, limit: int = 10,
|
def run_query(self, query: str, top_k: int = 10,
|
||||||
semantic_only: bool = False,
|
semantic_only: bool = False,
|
||||||
bm25_only: bool = False) -> Dict[str, Any]:
|
bm25_only: bool = False) -> Dict[str, Any]:
|
||||||
"""Run a single query and return metrics."""
|
"""Run a single query and return metrics."""
|
||||||
@ -60,7 +67,7 @@ class SearchTester:
|
|||||||
start = time.time()
|
start = time.time()
|
||||||
results = self.searcher.search(
|
results = self.searcher.search(
|
||||||
query=query,
|
query=query,
|
||||||
limit=limit,
|
top_k=top_k,
|
||||||
semantic_weight=semantic_weight,
|
semantic_weight=semantic_weight,
|
||||||
bm25_weight=bm25_weight
|
bm25_weight=bm25_weight
|
||||||
)
|
)
|
||||||
@ -76,10 +83,10 @@ class SearchTester:
|
|||||||
'avg_score': sum(r.score for r in results) / len(results) if results else 0,
|
'avg_score': sum(r.score for r in results) / len(results) if results else 0,
|
||||||
}
|
}
|
||||||
|
|
||||||
def compare_search_modes(self, query: str, limit: int = 5):
|
def compare_search_modes(self, query: str, top_k: int = 5):
|
||||||
"""Compare results across different search modes."""
|
"""Compare results across different search modes."""
|
||||||
console.print(f"\n[bold cyan]Query:[/bold cyan] '{query}'")
|
console.print(f"\n[bold cyan]Query:[/bold cyan] '{query}'")
|
||||||
console.print(f"[dim]Top {limit} results per mode[/dim]\n")
|
console.print(f"[dim]Top {top_k} results per mode[/dim]\n")
|
||||||
|
|
||||||
# Run searches in all modes
|
# Run searches in all modes
|
||||||
modes = [
|
modes = [
|
||||||
@ -90,7 +97,7 @@ class SearchTester:
|
|||||||
|
|
||||||
all_results = {}
|
all_results = {}
|
||||||
for mode_name, semantic_only, bm25_only in modes:
|
for mode_name, semantic_only, bm25_only in modes:
|
||||||
result = self.run_query(query, limit, semantic_only, bm25_only)
|
result = self.run_query(query, top_k, semantic_only, bm25_only)
|
||||||
all_results[mode_name] = result
|
all_results[mode_name] = result
|
||||||
|
|
||||||
# Create comparison table
|
# Create comparison table
|
||||||
@ -191,7 +198,7 @@ class SearchTester:
|
|||||||
for test_case in test_queries:
|
for test_case in test_queries:
|
||||||
console.rule(f"\n[cyan]{test_case['description']}[/cyan]")
|
console.rule(f"\n[cyan]{test_case['description']}[/cyan]")
|
||||||
console.print(f"[dim]{test_case['expected']}[/dim]")
|
console.print(f"[dim]{test_case['expected']}[/dim]")
|
||||||
self.compare_search_modes(test_case['query'], limit=3)
|
self.compare_search_modes(test_case['query'], top_k=3)
|
||||||
time.sleep(0.5) # Brief pause between tests
|
time.sleep(0.5) # Brief pause between tests
|
||||||
|
|
||||||
def benchmark_performance(self, num_queries: int = 50):
|
def benchmark_performance(self, num_queries: int = 50):
|
||||||
@ -268,7 +275,7 @@ class SearchTester:
|
|||||||
|
|
||||||
# Query that might return many results from same files
|
# Query that might return many results from same files
|
||||||
query = "function implementation code search"
|
query = "function implementation code search"
|
||||||
results = self.searcher.search(query, limit=20)
|
results = self.searcher.search(query, top_k=20)
|
||||||
|
|
||||||
# Analyze diversity
|
# Analyze diversity
|
||||||
file_counts = {}
|
file_counts = {}
|
||||||
|
|||||||
@ -403,9 +403,9 @@ class TestOllamaIntegration(unittest.TestCase):
|
|||||||
|
|
||||||
# Check search config
|
# Check search config
|
||||||
self.assertIsNotNone(self.config.search)
|
self.assertIsNotNone(self.config.search)
|
||||||
self.assertGreater(self.config.search.default_limit, 0)
|
self.assertGreater(self.config.search.default_top_k, 0)
|
||||||
print(f" ✅ Search config valid")
|
print(f" ✅ Search config valid")
|
||||||
print(f" Default limit: {self.config.search.default_limit}")
|
print(f" Default top-k: {self.config.search.default_top_k}")
|
||||||
print(f" Query expansion: {self.config.search.expand_queries}")
|
print(f" Query expansion: {self.config.search.expand_queries}")
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,12 +1,32 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""Test RAG system integration with smart chunking."""
|
"""
|
||||||
|
Test RAG system integration with smart chunking.
|
||||||
|
|
||||||
|
⚠️ IMPORTANT: This test requires the virtual environment to be activated:
|
||||||
|
source .venv/bin/activate
|
||||||
|
PYTHONPATH=. python tests/test_rag_integration.py
|
||||||
|
|
||||||
|
Or run directly with venv:
|
||||||
|
source .venv/bin/activate && PYTHONPATH=. python tests/test_rag_integration.py
|
||||||
|
"""
|
||||||
|
|
||||||
import tempfile
|
import tempfile
|
||||||
import shutil
|
import shutil
|
||||||
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from mini_rag.indexer import ProjectIndexer
|
from mini_rag.indexer import ProjectIndexer
|
||||||
from mini_rag.search import CodeSearcher
|
from mini_rag.search import CodeSearcher
|
||||||
|
|
||||||
|
# Check if virtual environment is activated
|
||||||
|
def check_venv():
|
||||||
|
if 'VIRTUAL_ENV' not in os.environ:
|
||||||
|
print("⚠️ WARNING: Virtual environment not detected!")
|
||||||
|
print(" This test requires the virtual environment to be activated.")
|
||||||
|
print(" Run: source .venv/bin/activate && PYTHONPATH=. python tests/test_rag_integration.py")
|
||||||
|
print(" Continuing anyway...\n")
|
||||||
|
|
||||||
|
check_venv()
|
||||||
|
|
||||||
# Sample Python file with proper structure
|
# Sample Python file with proper structure
|
||||||
sample_code = '''"""
|
sample_code = '''"""
|
||||||
Sample module for testing RAG system.
|
Sample module for testing RAG system.
|
||||||
@ -179,8 +199,8 @@ def test_integration():
|
|||||||
stats = indexer.index_project()
|
stats = indexer.index_project()
|
||||||
|
|
||||||
print(f" - Files indexed: {stats['files_indexed']}")
|
print(f" - Files indexed: {stats['files_indexed']}")
|
||||||
print(f" - Total chunks: {stats['total_chunks']}")
|
print(f" - Total chunks: {stats['chunks_created']}")
|
||||||
print(f" - Indexing time: {stats['indexing_time']:.2f}s")
|
print(f" - Indexing time: {stats['time_taken']:.2f}s")
|
||||||
|
|
||||||
# Verify chunks were created properly
|
# Verify chunks were created properly
|
||||||
print("\n2. Verifying chunk metadata...")
|
print("\n2. Verifying chunk metadata...")
|
||||||
@ -195,10 +215,10 @@ def test_integration():
|
|||||||
results = searcher.search("data processor class unified interface", top_k=3)
|
results = searcher.search("data processor class unified interface", top_k=3)
|
||||||
print(f"\n Test 1 - Class search:")
|
print(f"\n Test 1 - Class search:")
|
||||||
for i, result in enumerate(results[:1]):
|
for i, result in enumerate(results[:1]):
|
||||||
print(f" - Match {i+1}: {result['file_path']}")
|
print(f" - Match {i+1}: {result.file_path}")
|
||||||
print(f" Chunk type: {result['chunk_type']}")
|
print(f" Chunk type: {result.chunk_type}")
|
||||||
print(f" Score: {result['score']:.3f}")
|
print(f" Score: {result.score:.3f}")
|
||||||
if 'This class handles' in result['content']:
|
if 'This class handles' in result.content:
|
||||||
print(" [OK] Docstring included with class")
|
print(" [OK] Docstring included with class")
|
||||||
else:
|
else:
|
||||||
print(" [FAIL] Docstring not found")
|
print(" [FAIL] Docstring not found")
|
||||||
@ -207,10 +227,10 @@ def test_integration():
|
|||||||
results = searcher.search("process list of data items", top_k=3)
|
results = searcher.search("process list of data items", top_k=3)
|
||||||
print(f"\n Test 2 - Method search:")
|
print(f"\n Test 2 - Method search:")
|
||||||
for i, result in enumerate(results[:1]):
|
for i, result in enumerate(results[:1]):
|
||||||
print(f" - Match {i+1}: {result['file_path']}")
|
print(f" - Match {i+1}: {result.file_path}")
|
||||||
print(f" Chunk type: {result['chunk_type']}")
|
print(f" Chunk type: {result.chunk_type}")
|
||||||
print(f" Parent class: {result.get('parent_class', 'N/A')}")
|
print(f" Parent class: {getattr(result, 'parent_class', 'N/A')}")
|
||||||
if 'Args:' in result['content'] and 'Returns:' in result['content']:
|
if 'Args:' in result.content and 'Returns:' in result.content:
|
||||||
print(" [OK] Docstring included with method")
|
print(" [OK] Docstring included with method")
|
||||||
else:
|
else:
|
||||||
print(" [FAIL] Method docstring not complete")
|
print(" [FAIL] Method docstring not complete")
|
||||||
@ -219,19 +239,19 @@ def test_integration():
|
|||||||
results = searcher.search("smart chunking capabilities markdown", top_k=3)
|
results = searcher.search("smart chunking capabilities markdown", top_k=3)
|
||||||
print(f"\n Test 3 - Markdown search:")
|
print(f"\n Test 3 - Markdown search:")
|
||||||
for i, result in enumerate(results[:1]):
|
for i, result in enumerate(results[:1]):
|
||||||
print(f" - Match {i+1}: {result['file_path']}")
|
print(f" - Match {i+1}: {result.file_path}")
|
||||||
print(f" Chunk type: {result['chunk_type']}")
|
print(f" Chunk type: {result.chunk_type}")
|
||||||
print(f" Lines: {result['start_line']}-{result['end_line']}")
|
print(f" Lines: {result.start_line}-{result.end_line}")
|
||||||
|
|
||||||
# Test 4: Verify chunk navigation
|
# Test 4: Verify chunk navigation
|
||||||
print(f"\n Test 4 - Chunk navigation:")
|
print(f"\n Test 4 - Chunk navigation:")
|
||||||
all_results = searcher.search("", top_k=100) # Get all chunks
|
all_results = searcher.search("", top_k=100) # Get all chunks
|
||||||
py_chunks = [r for r in all_results if r['file_path'].endswith('.py')]
|
py_chunks = [r for r in all_results if r.file_path.endswith('.py')]
|
||||||
|
|
||||||
if py_chunks:
|
if py_chunks:
|
||||||
first_chunk = py_chunks[0]
|
first_chunk = py_chunks[0]
|
||||||
print(f" - First chunk: index={first_chunk.get('chunk_index', 'N/A')}")
|
print(f" - First chunk: index={getattr(first_chunk, 'chunk_index', 'N/A')}")
|
||||||
print(f" Next chunk ID: {first_chunk.get('next_chunk_id', 'N/A')}")
|
print(f" Next chunk ID: {getattr(first_chunk, 'next_chunk_id', 'N/A')}")
|
||||||
|
|
||||||
# Verify chain
|
# Verify chain
|
||||||
valid_chain = True
|
valid_chain = True
|
||||||
@ -239,7 +259,7 @@ def test_integration():
|
|||||||
curr = py_chunks[i]
|
curr = py_chunks[i]
|
||||||
next_chunk = py_chunks[i + 1]
|
next_chunk = py_chunks[i + 1]
|
||||||
expected_next = f"processor_{i+1}"
|
expected_next = f"processor_{i+1}"
|
||||||
if curr.get('next_chunk_id') != expected_next:
|
if getattr(curr, 'next_chunk_id', None) != expected_next:
|
||||||
valid_chain = False
|
valid_chain = False
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user