email-sorter/config/default_config.yaml

version: "1.0.0"

calibration:
  sample_size: 250
  sample_strategy: "stratified"
  validation_size: 50
  min_confidence: 0.6

processing:
  batch_size: 100
  llm_queue_size: 100
  parallel_workers: 4
  checkpoint_interval: 1000
  checkpoint_dir: "checkpoints"

classification:
  default_threshold: 0.55
  min_threshold: 0.50
  max_threshold: 0.70
  adjustment_step: 0.05
  adjustment_frequency: 1000
  category_thresholds:
    junk: 0.55
    auth: 0.55
    transactional: 0.55
    newsletters: 0.55
    conversational: 0.55

llm:
  provider: "openai"
  fallback_enabled: true

  ollama:
    base_url: "http://localhost:11434"
    calibration_model: "qwen3:4b-instruct-2507-q8_0"
    consolidation_model: "qwen3:4b-instruct-2507-q8_0"
    classification_model: "qwen3:4b-instruct-2507-q8_0"
    temperature: 0.1
    max_tokens: 2000
    timeout: 30
    retry_attempts: 3

  openai:
    base_url: "http://localhost:11433/v1"
    api_key: "not-needed"
    calibration_model: "qwen3-coder-30b"
    consolidation_model: "qwen3-coder-30b"
    classification_model: "qwen3-coder-30b"
    temperature: 0.1
    max_tokens: 500

email_providers:
  gmail:
    batch_size: 100
  microsoft:
    batch_size: 100
  imap:
    timeout: 30
    batch_size: 50

features:
  text_features:
    max_vocab_size: 10000
    ngram_range: [1, 2]
    min_df: 2
    max_df: 0.95
  embedding_model: "all-MiniLM-L6-v2"
  embedding_batch_size: 32

export:
  format: "json"
  include_confidence: true
  create_report: true
  output_dir: "results"

logging:
  level: "INFO"
  file: "logs/email-sorter.log"

cleanup:
  delete_temp_files: true
  delete_repo_after: false
  temp_dir: ".email-sorter-tmp"