#!/bin/bash # Train final production model with 10k emails and 0.55 thresholds set -e echo "==========================================" echo "TRAINING FINAL MODEL" echo "==========================================" echo "" echo "Config: 0.55 thresholds across all categories" echo "Training set: 10,000 Enron emails" echo "Calibration: 300 samples (3%)" echo "Validation: 100 samples (1%)" echo "" # Backup existing model if it exists if [ -f src/models/calibrated/classifier.pkl ]; then BACKUP_FILE="src/models/calibrated/classifier.pkl.backup-$(date +%Y%m%d-%H%M%S)" cp src/models/calibrated/classifier.pkl "$BACKUP_FILE" echo "Backed up existing model to: $BACKUP_FILE" fi # Clean old results rm -rf results_final/ final_training.log # Activate venv if [ -z "$VIRTUAL_ENV" ]; then source venv/bin/activate fi # Train model python -m src.cli run \ --source enron \ --limit 10000 \ --output results_final/ \ 2>&1 | tee final_training.log # Create timestamped backup of trained model if [ -f src/models/calibrated/classifier.pkl ]; then TRAINED_BACKUP="src/models/calibrated/classifier.pkl.backup-trained-$(date +%Y%m%d-%H%M%S)" cp src/models/calibrated/classifier.pkl "$TRAINED_BACKUP" echo "Created backup of trained model: $TRAINED_BACKUP" fi echo "" echo "==========================================" echo "Training complete!" echo "Model saved to: src/models/calibrated/classifier.pkl" echo "Backup created with timestamp" echo "Log: final_training.log" echo "=========================================="