Fss-Rag-Mini/install_mini_rag.sh
BobAi 4166d0a362 Initial release: FSS-Mini-RAG - Lightweight semantic code search system
🎯 Complete transformation from 5.9GB bloated system to 70MB optimized solution

 Key Features:
- Hybrid embedding system (Ollama + ML fallback + hash backup)
- Intelligent chunking with language-aware parsing
- Semantic + BM25 hybrid search with rich context
- Zero-config portable design with graceful degradation
- Beautiful TUI for beginners + powerful CLI for experts
- Comprehensive documentation with 8+ Mermaid diagrams
- Professional animated demo (183KB optimized GIF)

🏗️ Architecture Highlights:
- LanceDB vector storage with streaming indexing
- Smart file tracking (size/mtime) to avoid expensive rehashing
- Progressive chunking: Markdown headers → Python functions → fixed-size
- Quality filtering: 200+ chars, 20+ words, 30% alphanumeric content
- Concurrent batch processing with error recovery

📦 Package Contents:
- Core engine: claude_rag/ (11 modules, 2,847 lines)
- Entry points: rag-mini (unified), rag-tui (beginner interface)
- Documentation: README + 6 guides with visual diagrams
- Assets: 3D icon, optimized demo GIF, recording tools
- Tests: 8 comprehensive integration and validation tests
- Examples: Usage patterns, config templates, dependency analysis

🎥 Demo System:
- Scripted demonstration showing 12 files → 58 chunks indexing
- Semantic search with multi-line result previews
- Complete workflow from TUI startup to CLI mastery
- Professional recording pipeline with asciinema + GIF conversion

🛡️ Security & Quality:
- Complete .gitignore with personal data protection
- Dependency optimization (removed python-dotenv)
- Code quality validation and educational test suite
- Agent-reviewed architecture and documentation

Ready for production use - copy folder, run ./rag-mini, start searching\!
2025-08-12 16:38:28 +10:00

638 lines
20 KiB
Bash
Executable File
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# FSS-Mini-RAG Installation Script
# Interactive installer that sets up Python environment and dependencies
set -e # Exit on any error
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
BOLD='\033[1m'
NC='\033[0m' # No Color
# Get script directory
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# Print colored output
print_header() {
echo -e "\n${CYAN}${BOLD}=== $1 ===${NC}"
}
print_success() {
echo -e "${GREEN}$1${NC}"
}
print_warning() {
echo -e "${YELLOW}⚠️ $1${NC}"
}
print_error() {
echo -e "${RED}$1${NC}"
}
print_info() {
echo -e "${BLUE} $1${NC}"
}
# Check if command exists
command_exists() {
command -v "$1" >/dev/null 2>&1
}
# Check Python version
check_python() {
print_header "Checking Python Installation"
# Check for python3 first, then python
local python_cmd=""
if command_exists python3; then
python_cmd="python3"
elif command_exists python; then
python_cmd="python"
else
print_error "Python not found!"
echo -e "${YELLOW}Please install Python 3.8+ from:${NC}"
echo " • https://python.org/downloads"
echo " • Or use your system package manager:"
echo " - Ubuntu/Debian: sudo apt install python3 python3-pip python3-venv"
echo " - macOS: brew install python"
echo " - Windows: Download from python.org"
echo ""
echo -e "${CYAN}After installing Python, run this script again.${NC}"
exit 1
fi
# Check Python version
local python_version=$($python_cmd -c "import sys; print('.'.join(map(str, sys.version_info[:2])))")
local major=$(echo $python_version | cut -d. -f1)
local minor=$(echo $python_version | cut -d. -f2)
if [ "$major" -lt 3 ] || ([ "$major" -eq 3 ] && [ "$minor" -lt 8 ]); then
print_error "Python $python_version found, but 3.8+ required"
echo "Please upgrade Python to 3.8 or higher."
exit 1
fi
print_success "Found Python $python_version ($python_cmd)"
export PYTHON_CMD="$python_cmd"
}
# Check if virtual environment exists
check_venv() {
if [ -d "$SCRIPT_DIR/.venv" ]; then
print_info "Virtual environment already exists at $SCRIPT_DIR/.venv"
echo -n "Recreate it? (y/N): "
read -r recreate
if [[ $recreate =~ ^[Yy]$ ]]; then
print_info "Removing existing virtual environment..."
rm -rf "$SCRIPT_DIR/.venv"
return 1 # Needs creation
else
return 0 # Use existing
fi
else
return 1 # Needs creation
fi
}
# Create virtual environment
create_venv() {
print_header "Creating Python Virtual Environment"
if ! check_venv; then
print_info "Creating virtual environment at $SCRIPT_DIR/.venv"
$PYTHON_CMD -m venv "$SCRIPT_DIR/.venv"
if [ $? -ne 0 ]; then
print_error "Failed to create virtual environment"
echo "This might be because python3-venv is not installed."
echo "Try: sudo apt install python3-venv (Ubuntu/Debian)"
exit 1
fi
print_success "Virtual environment created"
else
print_success "Using existing virtual environment"
fi
# Activate virtual environment
source "$SCRIPT_DIR/.venv/bin/activate"
print_success "Virtual environment activated"
# Upgrade pip
print_info "Upgrading pip..."
pip install --upgrade pip >/dev/null 2>&1
}
# Check Ollama installation
check_ollama() {
print_header "Checking Ollama (AI Model Server)"
if command_exists ollama; then
print_success "Ollama is installed"
# Check if Ollama is running
if curl -s http://localhost:11434/api/version >/dev/null 2>&1; then
print_success "Ollama server is running"
return 0
else
print_warning "Ollama is installed but not running"
echo -n "Start Ollama now? (Y/n): "
read -r start_ollama
if [[ ! $start_ollama =~ ^[Nn]$ ]]; then
print_info "Starting Ollama server..."
ollama serve &
sleep 3
if curl -s http://localhost:11434/api/version >/dev/null 2>&1; then
print_success "Ollama server started"
return 0
else
print_warning "Failed to start Ollama automatically"
echo "Please start Ollama manually: ollama serve"
return 1
fi
else
return 1
fi
fi
else
print_warning "Ollama not found"
echo ""
echo -e "${CYAN}Ollama provides the best embedding quality and performance.${NC}"
echo -e "${YELLOW}To install Ollama:${NC}"
echo " 1. Visit: https://ollama.ai/download"
echo " 2. Download and install for your system"
echo " 3. Run: ollama serve"
echo " 4. Re-run this installer"
echo ""
echo -e "${BLUE}Alternative: Use ML fallback (requires more disk space)${NC}"
echo ""
echo -n "Continue without Ollama? (y/N): "
read -r continue_without
if [[ $continue_without =~ ^[Yy]$ ]]; then
return 1
else
print_info "Install Ollama first, then re-run this script"
exit 0
fi
fi
}
# Setup Ollama model based on configuration
setup_ollama_model() {
# Skip if custom config says to skip
if [ "$CUSTOM_OLLAMA_MODEL" = "skip" ]; then
print_info "Skipping Ollama model setup (custom configuration)"
return 1
fi
print_header "Ollama Model Setup"
print_info "Checking available Ollama models..."
# Get list of installed models
local available_models=$(ollama list 2>/dev/null | grep -v "NAME" | awk '{print $1}' | grep -v "^$")
if echo "$available_models" | grep -q "nomic-embed-text"; then
print_success "nomic-embed-text model already installed"
local model_info=$(ollama list | grep "nomic-embed-text")
echo -e "${BLUE}$model_info${NC}"
return 0
fi
if [ -n "$available_models" ]; then
print_info "Other Ollama models found:"
echo "$available_models" | sed 's/^/ • /'
echo ""
fi
# For custom installations, we already asked. For auto installations, ask now
local should_download="$CUSTOM_OLLAMA_MODEL"
if [ -z "$should_download" ] || [ "$should_download" = "auto" ]; then
echo -e "${CYAN}Model: nomic-embed-text (~270MB)${NC}"
echo " • Purpose: High-quality semantic embeddings"
echo " • Alternative: System will use ML/hash fallbacks"
echo ""
echo -n "Download model? [y/N]: "
read -r download_model
should_download=$([ "$download_model" = "y" ] && echo "download" || echo "skip")
fi
if [ "$should_download" != "download" ]; then
print_info "Skipping model download"
echo " Install later: ollama pull nomic-embed-text"
return 1
fi
# Test connectivity and download
print_info "Testing Ollama connection..."
if ! curl -s --connect-timeout 5 http://localhost:11434/api/version >/dev/null; then
print_error "Cannot connect to Ollama server"
echo " Ensure Ollama is running: ollama serve"
echo " Then install manually: ollama pull nomic-embed-text"
return 1
fi
print_info "Downloading nomic-embed-text..."
echo -e "${BLUE} Press Ctrl+C to cancel if needed${NC}"
if ollama pull nomic-embed-text; then
print_success "Model ready"
return 0
else
print_warning "Download failed - will use fallback embeddings"
return 1
fi
}
# Get installation preferences with smart defaults
get_installation_preferences() {
print_header "Installation Configuration"
echo -e "${CYAN}FSS-Mini-RAG can run with different embedding backends:${NC}"
echo ""
echo -e "${GREEN}• Ollama${NC} (recommended) - Best quality, local AI server"
echo -e "${YELLOW}• ML Fallback${NC} - Offline transformers, larger but always works"
echo -e "${BLUE}• Hash-based${NC} - Lightweight fallback, basic similarity"
echo ""
# Smart recommendation based on detected setup
local recommended=""
if [ "$ollama_available" = true ]; then
recommended="light (Ollama detected)"
echo -e "${GREEN}✓ Ollama detected - light installation recommended${NC}"
else
recommended="full (no Ollama)"
echo -e "${YELLOW}⚠ No Ollama - full installation recommended for better quality${NC}"
fi
echo ""
echo -e "${BOLD}Installation options:${NC}"
echo -e "${GREEN}L) Light${NC} - Ollama + basic deps (~50MB)"
echo -e "${YELLOW}F) Full${NC} - Light + ML fallback (~2-3GB)"
echo -e "${BLUE}C) Custom${NC} - Configure individual components"
echo ""
while true; do
echo -n "Choose [L/F/C] or Enter for recommended ($recommended): "
read -r choice
# Default to recommendation if empty
if [ -z "$choice" ]; then
if [ "$ollama_available" = true ]; then
choice="L"
else
choice="F"
fi
fi
case "${choice^^}" in
L)
export INSTALL_TYPE="light"
echo -e "${GREEN}Selected: Light installation${NC}"
break
;;
F)
export INSTALL_TYPE="full"
echo -e "${YELLOW}Selected: Full installation${NC}"
break
;;
C)
configure_custom_installation
break
;;
*)
print_warning "Please choose L, F, C, or press Enter for default"
;;
esac
done
}
# Custom installation configuration
configure_custom_installation() {
print_header "Custom Installation Configuration"
echo -e "${CYAN}Configure each component individually:${NC}"
echo ""
# Base dependencies (always required)
echo -e "${GREEN}✓ Base dependencies${NC} (lancedb, pandas, numpy, etc.) - Required"
# Ollama model
local ollama_model="skip"
if [ "$ollama_available" = true ]; then
echo ""
echo -e "${BOLD}Ollama embedding model:${NC}"
echo " • nomic-embed-text (~270MB) - Best quality embeddings"
echo -n "Download Ollama model? [y/N]: "
read -r download_ollama
if [[ $download_ollama =~ ^[Yy]$ ]]; then
ollama_model="download"
fi
fi
# ML dependencies
echo ""
echo -e "${BOLD}ML fallback system:${NC}"
echo " • PyTorch + transformers (~2-3GB) - Works without Ollama"
echo " • Useful for: Offline use, server deployments, CI/CD"
echo -n "Include ML dependencies? [y/N]: "
read -r include_ml
# Pre-download models
local predownload_ml="skip"
if [[ $include_ml =~ ^[Yy]$ ]]; then
echo ""
echo -e "${BOLD}Pre-download ML models:${NC}"
echo " • sentence-transformers model (~80MB)"
echo " • Skip: Models download automatically when first used"
echo -n "Pre-download now? [y/N]: "
read -r predownload
if [[ $predownload =~ ^[Yy]$ ]]; then
predownload_ml="download"
fi
fi
# Set configuration
if [[ $include_ml =~ ^[Yy]$ ]]; then
export INSTALL_TYPE="full"
else
export INSTALL_TYPE="light"
fi
export CUSTOM_OLLAMA_MODEL="$ollama_model"
export CUSTOM_ML_PREDOWNLOAD="$predownload_ml"
echo ""
echo -e "${GREEN}Custom configuration set:${NC}"
echo " • Base deps: ✓"
echo " • Ollama model: $ollama_model"
echo " • ML deps: $([ "$INSTALL_TYPE" = "full" ] && echo "✓" || echo "skip")"
echo " • ML predownload: $predownload_ml"
}
# Install dependencies with progress
install_dependencies() {
print_header "Installing Python Dependencies"
if [ "$INSTALL_TYPE" = "light" ]; then
print_info "Installing core dependencies (~50MB)..."
echo -e "${BLUE} Installing: lancedb, pandas, numpy, PyYAML, etc.${NC}"
if pip install -r "$SCRIPT_DIR/requirements.txt" --quiet; then
print_success "Dependencies installed"
else
print_error "Failed to install dependencies"
echo "Try: pip install -r requirements.txt"
exit 1
fi
else
print_info "Installing full dependencies (~2-3GB)..."
echo -e "${YELLOW} This includes PyTorch and transformers - will take several minutes${NC}"
echo -e "${BLUE} Progress will be shown...${NC}"
if pip install -r "$SCRIPT_DIR/requirements-full.txt"; then
print_success "All dependencies installed"
else
print_error "Failed to install dependencies"
echo "Try: pip install -r requirements-full.txt"
exit 1
fi
fi
print_info "Verifying installation..."
if python3 -c "import lancedb, pandas, numpy" 2>/dev/null; then
print_success "Core packages verified"
else
print_error "Package verification failed"
exit 1
fi
}
# Setup ML models based on configuration
setup_ml_models() {
if [ "$INSTALL_TYPE" != "full" ]; then
return 0
fi
# Check if we should pre-download
local should_predownload="$CUSTOM_ML_PREDOWNLOAD"
if [ -z "$should_predownload" ] || [ "$should_predownload" = "auto" ]; then
print_header "ML Model Pre-download"
echo -e "${CYAN}Pre-download ML models for offline use?${NC}"
echo ""
echo -e "${BLUE}Model: sentence-transformers/all-MiniLM-L6-v2 (~80MB)${NC}"
echo " • Purpose: Offline fallback when Ollama unavailable"
echo " • If skipped: Auto-downloads when first needed"
echo ""
echo -n "Pre-download now? [y/N]: "
read -r download_ml
should_predownload=$([ "$download_ml" = "y" ] && echo "download" || echo "skip")
fi
if [ "$should_predownload" != "download" ]; then
print_info "Skipping ML model pre-download"
echo " Models will download automatically when first used"
return 0
fi
print_info "Pre-downloading ML model..."
echo -e "${BLUE} This ensures offline availability${NC}"
# Create a simple progress indicator
python3 -c "
import sys
import threading
import time
# Progress spinner
def spinner():
chars = '⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏'
while not spinner.stop:
for char in chars:
if spinner.stop:
break
sys.stdout.write(f'\r {char} Downloading model...')
sys.stdout.flush()
time.sleep(0.1)
try:
spinner.stop = False
spinner_thread = threading.Thread(target=spinner)
spinner_thread.start()
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
spinner.stop = True
spinner_thread.join()
print('\r✅ ML model ready for offline use ')
except Exception as e:
spinner.stop = True
spinner_thread.join()
print(f'\r❌ Download failed: {e} ')
sys.exit(1)
" 2>/dev/null
if [ $? -eq 0 ]; then
print_success "ML models ready"
else
print_warning "Pre-download failed"
echo " Models will auto-download when first needed"
fi
}
# Test installation
test_installation() {
print_header "Testing Installation"
print_info "Testing basic functionality..."
# Test import
if python3 -c "from claude_rag import CodeEmbedder, ProjectIndexer, CodeSearcher; print('✅ Import successful')" 2>/dev/null; then
print_success "Python imports working"
else
print_error "Import test failed"
return 1
fi
# Test embedding system
if python3 -c "
from claude_rag import CodeEmbedder
embedder = CodeEmbedder()
info = embedder.get_embedding_info()
print(f'✅ Embedding system: {info[\"method\"]}')
" 2>/dev/null; then
print_success "Embedding system working"
else
print_warning "Embedding test failed, but system should still work"
fi
return 0
}
# Show completion message
show_completion() {
print_header "Installation Complete!"
echo -e "${GREEN}${BOLD}FSS-Mini-RAG is now installed!${NC}"
echo ""
echo -e "${CYAN}Quick Start Options:${NC}"
echo ""
echo -e "${GREEN}🎯 TUI (Beginner-Friendly):${NC}"
echo " ./rag-tui"
echo " # Interactive interface with guided setup"
echo ""
echo -e "${BLUE}💻 CLI (Advanced):${NC}"
echo " ./rag-mini index /path/to/project"
echo " ./rag-mini search /path/to/project \"query\""
echo " ./rag-mini status /path/to/project"
echo ""
echo -e "${CYAN}Documentation:${NC}"
echo " • README.md - Complete technical documentation"
echo " • docs/GETTING_STARTED.md - Step-by-step guide"
echo " • examples/ - Usage examples and sample configs"
echo ""
if [ "$INSTALL_TYPE" = "light" ] && ! command_exists ollama; then
echo -e "${YELLOW}Note: You chose light installation but Ollama isn't running.${NC}"
echo "The system will use hash-based embeddings (lower quality)."
echo "For best results, install Ollama from https://ollama.ai/download"
echo ""
fi
# Ask if they want to run a test
echo -n "Would you like to run a quick test now? (Y/n): "
read -r run_test
if [[ ! $run_test =~ ^[Nn]$ ]]; then
run_quick_test
fi
}
# Run quick test
run_quick_test() {
print_header "Quick Test"
print_info "Testing on this project directory..."
echo "This will index the FSS-Mini-RAG system itself as a test."
echo ""
# Index this project
if ./rag-mini index "$SCRIPT_DIR"; then
print_success "Indexing completed"
# Try a search
echo ""
print_info "Testing search functionality..."
./rag-mini search "$SCRIPT_DIR" "embedding system" --limit 3
echo ""
print_success "Test completed successfully!"
echo -e "${CYAN}You can now use FSS-Mini-RAG on your own projects.${NC}"
else
print_error "Test failed"
echo "Check the error messages above for troubleshooting."
fi
}
# Main installation flow
main() {
echo -e "${CYAN}${BOLD}"
echo "╔══════════════════════════════════════╗"
echo "║ FSS-Mini-RAG Installer ║"
echo "║ Fast Semantic Search for Code ║"
echo "╚══════════════════════════════════════╝"
echo -e "${NC}"
echo -e "${BLUE}Adaptive installation process:${NC}"
echo " • Python environment setup"
echo " • Smart configuration based on your system"
echo " • Optional AI model downloads (with consent)"
echo " • Testing and verification"
echo ""
echo -e "${CYAN}Note: You'll be asked before downloading any models${NC}"
echo ""
echo -n "Begin installation? [Y/n]: "
read -r continue_install
if [[ $continue_install =~ ^[Nn]$ ]]; then
echo "Installation cancelled."
exit 0
fi
# Run installation steps
check_python
create_venv
# Check Ollama availability
ollama_available=false
if check_ollama; then
ollama_available=true
fi
# Get installation preferences with smart recommendations
get_installation_preferences
# Install dependencies
install_dependencies
# Setup models based on configuration
if [ "$ollama_available" = true ]; then
setup_ollama_model
fi
setup_ml_models
if test_installation; then
show_completion
else
print_error "Installation test failed"
echo "Please check error messages and try again."
exit 1
fi
}
# Run main function
main "$@"