email-sorter/config/default_config.yaml

version: "1.0.0"

calibration:
  sample_size: 1500
  sample_strategy: "stratified"
  validation_size: 300
  min_confidence: 0.6

processing:
  batch_size: 100
  llm_queue_size: 100
  parallel_workers: 4
  checkpoint_interval: 1000
  checkpoint_dir: "checkpoints"

classification:
  default_threshold: 0.75
  min_threshold: 0.60
  max_threshold: 0.90
  adjustment_step: 0.05
  adjustment_frequency: 1000
  category_thresholds:
    junk: 0.85
    auth: 0.90
    transactional: 0.80
    newsletters: 0.75
    conversational: 0.65

llm:
  provider: "ollama"
  fallback_enabled: true

  ollama:
    base_url: "http://localhost:11434"
    calibration_model: "qwen3:1.7b"
    consolidation_model: "qwen3:8b-q4_K_M"  # Larger model needed for JSON consolidation
    classification_model: "qwen3:1.7b"
    temperature: 0.1
    max_tokens: 2000
    timeout: 30
    retry_attempts: 3

  openai:
    base_url: "https://api.openai.com/v1"
    api_key: "${OPENAI_API_KEY}"
    calibration_model: "gpt-4o-mini"
    classification_model: "gpt-4o-mini"
    temperature: 0.1
    max_tokens: 500

email_providers:
  gmail:
    batch_size: 100
  microsoft:
    batch_size: 100
  imap:
    timeout: 30
    batch_size: 50

features:
  text_features:
    max_vocab_size: 10000
    ngram_range: [1, 2]
    min_df: 2
    max_df: 0.95
  embedding_model: "all-MiniLM-L6-v2"
  embedding_batch_size: 32

export:
  format: "json"
  include_confidence: true
  create_report: true
  output_dir: "results"

logging:
  level: "INFO"
  file: "logs/email-sorter.log"

cleanup:
  delete_temp_files: true
  delete_repo_after: false
  temp_dir: ".email-sorter-tmp"