diff --git a/src/cli.py b/src/cli.py index 54defe3..35cf3e0 100644 --- a/src/cli.py +++ b/src/cli.py @@ -12,6 +12,7 @@ from src.email_providers.base import MockProvider from src.email_providers.gmail import GmailProvider from src.email_providers.imap import IMAPProvider from src.email_providers.enron import EnronProvider +from src.email_providers.outlook import OutlookProvider from src.classification.feature_extractor import FeatureExtractor from src.classification.ml_classifier import MLClassifier from src.classification.llm_classifier import LLMClassifier @@ -27,7 +28,7 @@ def cli(): @cli.command() -@click.option('--source', type=click.Choice(['gmail', 'imap', 'mock', 'enron']), default='mock', +@click.option('--source', type=click.Choice(['gmail', 'outlook', 'imap', 'mock', 'enron']), default='mock', help='Email provider') @click.option('--credentials', type=click.Path(exists=False), help='Path to credentials file') @@ -85,6 +86,11 @@ def run( if not credentials: logger.error("Gmail provider requires --credentials") sys.exit(1) + elif source == 'outlook': + provider = OutlookProvider() + if not credentials: + logger.error("Outlook provider requires --credentials") + sys.exit(1) elif source == 'imap': provider = IMAPProvider() if not credentials: diff --git a/src/email_providers/outlook.py b/src/email_providers/outlook.py new file mode 100644 index 0000000..b671cf0 --- /dev/null +++ b/src/email_providers/outlook.py @@ -0,0 +1,358 @@ +"""Microsoft Outlook/Office365 provider implementation using Microsoft Graph API. + +This provider connects to Outlook.com, Office365, and Microsoft 365 accounts +using the Microsoft Graph API with OAuth 2.0 authentication. + +Authentication Setup: +1. Register app at https://portal.azure.com/#blade/Microsoft_AAD_RegisteredApps +2. Add Mail.Read and Mail.ReadWrite permissions +3. Get client_id and client_secret +4. Configure redirect URI (http://localhost:8080 for development) +""" +import logging +from typing import List, Dict, Optional, Any +from datetime import datetime +from email.utils import parsedate_to_datetime + +from .base import BaseProvider, Email, Attachment + +logger = logging.getLogger(__name__) + + +class OutlookProvider(BaseProvider): + """ + Microsoft Outlook/Office365 email provider via Microsoft Graph API. + + Supports: + - Outlook.com personal accounts + - Office365 business accounts + - Microsoft 365 accounts + + Authentication: + - OAuth 2.0 with Microsoft Identity Platform + - Requires app registration in Azure Portal + - Uses delegated permissions (Mail.Read, Mail.ReadWrite) + """ + + def __init__(self): + """Initialize Outlook provider.""" + super().__init__(name="outlook") + self.client = None + self.user_id = None + self._credentials_configured = False + + def connect(self, credentials: Dict[str, Any]) -> bool: + """ + Connect to Microsoft Graph API using OAuth credentials. + + Args: + credentials: Dict containing: + - client_id: Azure AD application ID + - client_secret: Azure AD application secret (optional for desktop apps) + - tenant_id: Azure AD tenant ID (optional, defaults to 'common') + - redirect_uri: OAuth redirect URI (default: http://localhost:8080) + + Returns: + True if connection successful, False otherwise + """ + try: + client_id = credentials.get('client_id') + if not client_id: + logger.error( + "OUTLOOK OAUTH NOT CONFIGURED: " + "client_id required in credentials. " + "Register app at: " + "https://portal.azure.com/#blade/Microsoft_AAD_RegisteredApps" + ) + return False + + # TRY IMPORT - will fail if msal not installed + try: + import msal + import requests + except ImportError as e: + logger.error(f"OUTLOOK DEPENDENCIES MISSING: {e}") + logger.error("Install with: pip install msal requests") + return False + + # TRY CONNECTION - authenticate with Microsoft + tenant_id = credentials.get('tenant_id', 'common') + client_secret = credentials.get('client_secret') + redirect_uri = credentials.get('redirect_uri', 'http://localhost:8080') + + authority = f"https://login.microsoftonline.com/{tenant_id}" + scopes = ["https://graph.microsoft.com/Mail.Read", + "https://graph.microsoft.com/Mail.ReadWrite"] + + logger.info(f"Attempting Outlook OAuth with client_id: {client_id[:8]}...") + + # Create MSAL app (public client for desktop, confidential for server) + if client_secret: + app = msal.ConfidentialClientApplication( + client_id, + authority=authority, + client_credential=client_secret + ) + else: + app = msal.PublicClientApplication( + client_id, + authority=authority + ) + + # Try to get token - interactive flow for desktop apps + result = None + + # First try cached token + accounts = app.get_accounts() + if accounts: + result = app.acquire_token_silent(scopes, account=accounts[0]) + + # If no cached token, do interactive login + if not result: + flow = app.initiate_device_flow(scopes=scopes) + if "user_code" not in flow: + logger.error("Failed to create device flow") + return False + + logger.info("\n" + "="*60) + logger.info("MICROSOFT AUTHENTICATION REQUIRED") + logger.info("="*60) + logger.info(flow["message"]) + logger.info("="*60 + "\n") + + result = app.acquire_token_by_device_flow(flow) + + if "access_token" not in result: + logger.error(f"OUTLOOK AUTHENTICATION FAILED: {result.get('error_description', 'Unknown error')}") + return False + + # Store access token and create Graph API client + self.access_token = result['access_token'] + self.graph_client = requests.Session() + self.graph_client.headers.update({ + 'Authorization': f'Bearer {self.access_token}', + 'Content-Type': 'application/json' + }) + + # Get user profile to verify connection + response = self.graph_client.get('https://graph.microsoft.com/v1.0/me') + if response.status_code == 200: + user_info = response.json() + self.user_id = user_info.get('id') + logger.info(f"Successfully connected to Outlook for: {user_info.get('userPrincipalName')}") + self._credentials_configured = True + return True + else: + logger.error(f"Failed to verify Outlook connection: {response.status_code}") + return False + + except Exception as e: + logger.error(f"OUTLOOK CONNECTION FAILED: {e}") + import traceback + logger.debug(traceback.format_exc()) + return False + + def disconnect(self) -> bool: + """Close Outlook connection.""" + self.graph_client = None + self.access_token = None + self.user_id = None + self._credentials_configured = False + logger.info("Disconnected from Outlook") + return True + + def fetch_emails( + self, + limit: Optional[int] = None, + filters: Optional[Dict[str, Any]] = None + ) -> List[Email]: + """ + Fetch emails from Outlook via Microsoft Graph API. + + Args: + limit: Maximum number of emails to fetch + filters: Optional filters (folder, search query, etc.) + + Returns: + List of Email objects + """ + if not self._credentials_configured or not self.graph_client: + logger.error("OUTLOOK NOT CONFIGURED: Cannot fetch emails without OAuth setup") + return [] + + emails = [] + try: + # Build Graph API query + folder = filters.get('folder', 'inbox') if filters else 'inbox' + search_query = filters.get('query', '') if filters else '' + + # Construct Graph API URL + url = f"https://graph.microsoft.com/v1.0/me/mailFolders/{folder}/messages" + params = { + '$top': min(limit or 500, 1000) if limit else 500, + '$orderby': 'receivedDateTime DESC' + } + + if search_query: + params['$search'] = f'"{search_query}"' + + # Fetch messages + response = self.graph_client.get(url, params=params) + + if response.status_code != 200: + logger.error(f"Failed to fetch emails: {response.status_code} - {response.text}") + return [] + + data = response.json() + messages = data.get('value', []) + + for msg in messages: + email = self._parse_message(msg) + if email: + emails.append(email) + if limit and len(emails) >= limit: + break + + logger.info(f"Fetched {len(emails)} emails from Outlook") + return emails + + except Exception as e: + logger.error(f"OUTLOOK FETCH ERROR: {e}") + import traceback + logger.debug(traceback.format_exc()) + return emails + + def _parse_message(self, msg: Dict) -> Email: + """Parse Microsoft Graph message into Email object.""" + try: + # Parse sender + sender_email = msg.get('from', {}).get('emailAddress', {}).get('address', '') + + # Parse date + date_str = msg.get('receivedDateTime') + date = datetime.fromisoformat(date_str.replace('Z', '+00:00')) if date_str else None + + # Parse body + body_content = msg.get('body', {}) + body = body_content.get('content', '') + + # Parse attachments + has_attachments = msg.get('hasAttachments', False) + attachments = [] + if has_attachments: + attachments = self._parse_attachments(msg.get('id')) + + return Email( + id=msg.get('id'), + subject=msg.get('subject', 'No Subject'), + sender=sender_email, + date=date, + body=body, + has_attachments=has_attachments, + attachments=attachments, + headers={'message-id': msg.get('id')}, + labels=msg.get('categories', []), + is_read=msg.get('isRead', False), + provider='outlook' + ) + + except Exception as e: + logger.error(f"Error parsing message: {e}") + return None + + def _parse_attachments(self, message_id: str) -> List[Attachment]: + """Fetch and parse attachments for a message.""" + attachments = [] + + try: + url = f"https://graph.microsoft.com/v1.0/me/messages/{message_id}/attachments" + response = self.graph_client.get(url) + + if response.status_code == 200: + data = response.json() + for att in data.get('value', []): + attachments.append(Attachment( + filename=att.get('name', 'unknown'), + mime_type=att.get('contentType', 'application/octet-stream'), + size=att.get('size', 0), + attachment_id=att.get('id') + )) + except Exception as e: + logger.debug(f"Error fetching attachments: {e}") + + return attachments + + def update_labels(self, email_id: str, labels: List[str]) -> bool: + """Update categories for a single email.""" + if not self._credentials_configured or not self.graph_client: + logger.error("OUTLOOK NOT CONFIGURED: Cannot update labels") + return False + + try: + url = f"https://graph.microsoft.com/v1.0/me/messages/{email_id}" + data = {"categories": labels} + + response = self.graph_client.patch(url, json=data) + + if response.status_code in [200, 204]: + return True + else: + logger.error(f"Failed to update labels: {response.status_code}") + return False + + except Exception as e: + logger.error(f"Error updating labels: {e}") + return False + + def batch_update(self, updates: List[Dict[str, Any]]) -> bool: + """Batch update multiple emails.""" + if not self._credentials_configured or not self.graph_client: + logger.error("OUTLOOK NOT CONFIGURED: Cannot batch update") + return False + + try: + # Microsoft Graph API supports batch requests + batch_requests = [] + + for i, update in enumerate(updates): + email_id = update.get('email_id') + labels = update.get('labels', []) + + batch_requests.append({ + "id": str(i), + "method": "PATCH", + "url": f"/me/messages/{email_id}", + "body": {"categories": labels}, + "headers": {"Content-Type": "application/json"} + }) + + # Send batch request (max 20 per batch) + batch_size = 20 + successful = 0 + + for i in range(0, len(batch_requests), batch_size): + batch = batch_requests[i:i+batch_size] + + response = self.graph_client.post( + 'https://graph.microsoft.com/v1.0/$batch', + json={"requests": batch} + ) + + if response.status_code == 200: + result = response.json() + for resp in result.get('responses', []): + if resp.get('status') in [200, 204]: + successful += 1 + + logger.info(f"Batch updated {successful}/{len(updates)} emails") + return successful > 0 + + except Exception as e: + logger.error(f"Batch update error: {e}") + import traceback + logger.debug(traceback.format_exc()) + return False + + def is_connected(self) -> bool: + """Check if connected.""" + return self._credentials_configured and self.graph_client is not None