"""Integration tests for email-sorter.""" import pytest from src.email_providers.base import MockProvider, Email from src.classification.feature_extractor import FeatureExtractor from src.classification.ml_classifier import MLClassifier from src.classification.adaptive_classifier import AdaptiveClassifier from src.utils.config import load_config, load_categories from datetime import datetime def test_end_to_end_mock_classification(sample_emails, config, categories): """Test end-to-end classification with mock provider.""" # Setup mock provider provider = MockProvider() provider.connect({}) # Add sample emails for email in sample_emails: provider.add_mock_email(email) # Fetch emails emails = provider.fetch_emails() assert len(emails) == len(sample_emails) # Setup classifiers feature_extractor = FeatureExtractor() ml_classifier = MLClassifier() classifier = AdaptiveClassifier( feature_extractor, ml_classifier, None, categories, config.dict() ) # Classify results = classifier.classify_batch(emails) assert len(results) == len(emails) assert all(r.email_id is not None for r in results) assert all(r.category in categories for r in results) # Check stats stats = classifier.get_stats() assert stats.total_emails == len(emails) assert stats.rule_matched + stats.ml_classified + stats.needs_review > 0 def test_mock_provider_integration(): """Test mock provider""" provider = MockProvider() assert not provider.is_connected() provider.connect({}) assert provider.is_connected() email = Email( id='test-1', subject='Test email', sender='test@example.com', body='Test body' ) provider.add_mock_email(email) emails = provider.fetch_emails() assert len(emails) == 1 assert emails[0].id == 'test-1' provider.disconnect() assert not provider.is_connected() def test_classification_pipeline_with_auth_email(config, categories): """Test full classification of authentication email.""" from src.email_providers.base import Email auth_email = Email( id='auth-1', subject='Verify your account - Action Required', sender='noreply@service.com', body='Your verification code is 654321. Do not share this code.', body_snippet='Your verification code is 654321' ) feature_extractor = FeatureExtractor() ml_classifier = MLClassifier() classifier = AdaptiveClassifier( feature_extractor, ml_classifier, None, categories, config.dict() ) result = classifier.classify(auth_email) assert result.email_id == 'auth-1' assert result.category == 'auth' assert result.method == 'rule' # Should match hard rule def test_classification_pipeline_with_invoice_email(config, categories): """Test full classification of invoice email.""" from src.email_providers.base import Email, Attachment invoice_email = Email( id='invoice-1', subject='Invoice #INV-2024-9999 - October Services', sender='billing@vendor.com', body='Please see attached invoice for services rendered.', body_snippet='See attached invoice', has_attachments=True, attachments=[ Attachment('invoice.pdf', 'application/pdf', 100000) ] ) feature_extractor = FeatureExtractor() ml_classifier = MLClassifier() classifier = AdaptiveClassifier( feature_extractor, ml_classifier, None, categories, config.dict() ) result = classifier.classify(invoice_email) assert result.email_id == 'invoice-1' assert result.category == 'transactional' def test_batch_classification(sample_emails, config, categories): """Test batch classification.""" feature_extractor = FeatureExtractor() ml_classifier = MLClassifier() classifier = AdaptiveClassifier( feature_extractor, ml_classifier, None, categories, config.dict() ) results = classifier.classify_batch(sample_emails) assert len(results) == len(sample_emails) for result in results: assert result.category in list(categories.keys()) + ['unknown'] assert 0 <= result.confidence <= 1