email-sorter/config/default_config.yaml
FSSCoding 10862583ad Add batch LLM classifier tool with prompt caching optimization
- Created standalone batch_llm_classifier.py for custom email queries
- Optimized all LLM prompts for caching (static instructions first, variables last)
- Configured rtx3090 vLLM endpoint (qwen3-coder-30b)
- Tested batch_size=4 optimal (100% success, 4.65 req/sec)
- Added comprehensive documentation (tools/README.md, BATCH_LLM_QUICKSTART.md)

Tool is completely separate from main ML pipeline - no interference.
Prerequisite: vLLM server must be running at rtx3090.bobai.com.au
2025-11-14 16:01:57 +11:00

83 lines
1.6 KiB
YAML

version: "1.0.0"
calibration:
sample_size: 250
sample_strategy: "stratified"
validation_size: 50
min_confidence: 0.6
processing:
batch_size: 100
llm_queue_size: 100
parallel_workers: 4
checkpoint_interval: 1000
checkpoint_dir: "checkpoints"
classification:
default_threshold: 0.55
min_threshold: 0.50
max_threshold: 0.70
adjustment_step: 0.05
adjustment_frequency: 1000
category_thresholds:
junk: 0.55
auth: 0.55
transactional: 0.55
newsletters: 0.55
conversational: 0.55
llm:
provider: "ollama"
fallback_enabled: true
ollama:
base_url: "http://localhost:11434"
calibration_model: "qwen3:4b-instruct-2507-q8_0"
consolidation_model: "qwen3:4b-instruct-2507-q8_0"
classification_model: "qwen3:4b-instruct-2507-q8_0"
temperature: 0.1
max_tokens: 2000
timeout: 30
retry_attempts: 3
openai:
base_url: "https://rtx3090.bobai.com.au/v1"
api_key: "rtx3090_foxadmin_10_8034ecb47841f45ba1d5f3f5d875c092"
calibration_model: "qwen3-coder-30b"
classification_model: "qwen3-coder-30b"
temperature: 0.1
max_tokens: 500
email_providers:
gmail:
batch_size: 100
microsoft:
batch_size: 100
imap:
timeout: 30
batch_size: 50
features:
text_features:
max_vocab_size: 10000
ngram_range: [1, 2]
min_df: 2
max_df: 0.95
embedding_model: "all-MiniLM-L6-v2"
embedding_batch_size: 32
export:
format: "json"
include_confidence: true
create_report: true
output_dir: "results"
logging:
level: "INFO"
file: "logs/email-sorter.log"
cleanup:
delete_temp_files: true
delete_repo_after: false
temp_dir: ".email-sorter-tmp"