Add Outlook/Microsoft365 email provider support

New Features:
- Created OutlookProvider using Microsoft Graph API
- Supports Outlook.com, Office365, and Microsoft 365 accounts
- OAuth 2.0 authentication via Microsoft Identity Platform
- Device flow authentication for desktop apps
- Batch operations support (20 emails per API call)

Provider Capabilities:
- Fetch emails from any folder (default: inbox)
- Update email categories/labels
- Batch update multiple emails
- Attachment metadata extraction
- Search and filter support

Integration:
- Added outlook to CLI source options
- Follows same pattern as Gmail provider
- Requires credentials file with client_id
- Optional client_secret for confidential apps

Dependencies:
- msal (Microsoft Authentication Library)
- requests

Both Gmail and Outlook providers now fully integrated and tested.
This commit is contained in:
FSSCoding 2025-10-25 16:23:12 +11:00
parent 1992799b25
commit 81affc58af
2 changed files with 365 additions and 1 deletions

View File

@ -12,6 +12,7 @@ from src.email_providers.base import MockProvider
from src.email_providers.gmail import GmailProvider
from src.email_providers.imap import IMAPProvider
from src.email_providers.enron import EnronProvider
from src.email_providers.outlook import OutlookProvider
from src.classification.feature_extractor import FeatureExtractor
from src.classification.ml_classifier import MLClassifier
from src.classification.llm_classifier import LLMClassifier
@ -27,7 +28,7 @@ def cli():
@cli.command()
@click.option('--source', type=click.Choice(['gmail', 'imap', 'mock', 'enron']), default='mock',
@click.option('--source', type=click.Choice(['gmail', 'outlook', 'imap', 'mock', 'enron']), default='mock',
help='Email provider')
@click.option('--credentials', type=click.Path(exists=False),
help='Path to credentials file')
@ -85,6 +86,11 @@ def run(
if not credentials:
logger.error("Gmail provider requires --credentials")
sys.exit(1)
elif source == 'outlook':
provider = OutlookProvider()
if not credentials:
logger.error("Outlook provider requires --credentials")
sys.exit(1)
elif source == 'imap':
provider = IMAPProvider()
if not credentials:

View File

@ -0,0 +1,358 @@
"""Microsoft Outlook/Office365 provider implementation using Microsoft Graph API.
This provider connects to Outlook.com, Office365, and Microsoft 365 accounts
using the Microsoft Graph API with OAuth 2.0 authentication.
Authentication Setup:
1. Register app at https://portal.azure.com/#blade/Microsoft_AAD_RegisteredApps
2. Add Mail.Read and Mail.ReadWrite permissions
3. Get client_id and client_secret
4. Configure redirect URI (http://localhost:8080 for development)
"""
import logging
from typing import List, Dict, Optional, Any
from datetime import datetime
from email.utils import parsedate_to_datetime
from .base import BaseProvider, Email, Attachment
logger = logging.getLogger(__name__)
class OutlookProvider(BaseProvider):
"""
Microsoft Outlook/Office365 email provider via Microsoft Graph API.
Supports:
- Outlook.com personal accounts
- Office365 business accounts
- Microsoft 365 accounts
Authentication:
- OAuth 2.0 with Microsoft Identity Platform
- Requires app registration in Azure Portal
- Uses delegated permissions (Mail.Read, Mail.ReadWrite)
"""
def __init__(self):
"""Initialize Outlook provider."""
super().__init__(name="outlook")
self.client = None
self.user_id = None
self._credentials_configured = False
def connect(self, credentials: Dict[str, Any]) -> bool:
"""
Connect to Microsoft Graph API using OAuth credentials.
Args:
credentials: Dict containing:
- client_id: Azure AD application ID
- client_secret: Azure AD application secret (optional for desktop apps)
- tenant_id: Azure AD tenant ID (optional, defaults to 'common')
- redirect_uri: OAuth redirect URI (default: http://localhost:8080)
Returns:
True if connection successful, False otherwise
"""
try:
client_id = credentials.get('client_id')
if not client_id:
logger.error(
"OUTLOOK OAUTH NOT CONFIGURED: "
"client_id required in credentials. "
"Register app at: "
"https://portal.azure.com/#blade/Microsoft_AAD_RegisteredApps"
)
return False
# TRY IMPORT - will fail if msal not installed
try:
import msal
import requests
except ImportError as e:
logger.error(f"OUTLOOK DEPENDENCIES MISSING: {e}")
logger.error("Install with: pip install msal requests")
return False
# TRY CONNECTION - authenticate with Microsoft
tenant_id = credentials.get('tenant_id', 'common')
client_secret = credentials.get('client_secret')
redirect_uri = credentials.get('redirect_uri', 'http://localhost:8080')
authority = f"https://login.microsoftonline.com/{tenant_id}"
scopes = ["https://graph.microsoft.com/Mail.Read",
"https://graph.microsoft.com/Mail.ReadWrite"]
logger.info(f"Attempting Outlook OAuth with client_id: {client_id[:8]}...")
# Create MSAL app (public client for desktop, confidential for server)
if client_secret:
app = msal.ConfidentialClientApplication(
client_id,
authority=authority,
client_credential=client_secret
)
else:
app = msal.PublicClientApplication(
client_id,
authority=authority
)
# Try to get token - interactive flow for desktop apps
result = None
# First try cached token
accounts = app.get_accounts()
if accounts:
result = app.acquire_token_silent(scopes, account=accounts[0])
# If no cached token, do interactive login
if not result:
flow = app.initiate_device_flow(scopes=scopes)
if "user_code" not in flow:
logger.error("Failed to create device flow")
return False
logger.info("\n" + "="*60)
logger.info("MICROSOFT AUTHENTICATION REQUIRED")
logger.info("="*60)
logger.info(flow["message"])
logger.info("="*60 + "\n")
result = app.acquire_token_by_device_flow(flow)
if "access_token" not in result:
logger.error(f"OUTLOOK AUTHENTICATION FAILED: {result.get('error_description', 'Unknown error')}")
return False
# Store access token and create Graph API client
self.access_token = result['access_token']
self.graph_client = requests.Session()
self.graph_client.headers.update({
'Authorization': f'Bearer {self.access_token}',
'Content-Type': 'application/json'
})
# Get user profile to verify connection
response = self.graph_client.get('https://graph.microsoft.com/v1.0/me')
if response.status_code == 200:
user_info = response.json()
self.user_id = user_info.get('id')
logger.info(f"Successfully connected to Outlook for: {user_info.get('userPrincipalName')}")
self._credentials_configured = True
return True
else:
logger.error(f"Failed to verify Outlook connection: {response.status_code}")
return False
except Exception as e:
logger.error(f"OUTLOOK CONNECTION FAILED: {e}")
import traceback
logger.debug(traceback.format_exc())
return False
def disconnect(self) -> bool:
"""Close Outlook connection."""
self.graph_client = None
self.access_token = None
self.user_id = None
self._credentials_configured = False
logger.info("Disconnected from Outlook")
return True
def fetch_emails(
self,
limit: Optional[int] = None,
filters: Optional[Dict[str, Any]] = None
) -> List[Email]:
"""
Fetch emails from Outlook via Microsoft Graph API.
Args:
limit: Maximum number of emails to fetch
filters: Optional filters (folder, search query, etc.)
Returns:
List of Email objects
"""
if not self._credentials_configured or not self.graph_client:
logger.error("OUTLOOK NOT CONFIGURED: Cannot fetch emails without OAuth setup")
return []
emails = []
try:
# Build Graph API query
folder = filters.get('folder', 'inbox') if filters else 'inbox'
search_query = filters.get('query', '') if filters else ''
# Construct Graph API URL
url = f"https://graph.microsoft.com/v1.0/me/mailFolders/{folder}/messages"
params = {
'$top': min(limit or 500, 1000) if limit else 500,
'$orderby': 'receivedDateTime DESC'
}
if search_query:
params['$search'] = f'"{search_query}"'
# Fetch messages
response = self.graph_client.get(url, params=params)
if response.status_code != 200:
logger.error(f"Failed to fetch emails: {response.status_code} - {response.text}")
return []
data = response.json()
messages = data.get('value', [])
for msg in messages:
email = self._parse_message(msg)
if email:
emails.append(email)
if limit and len(emails) >= limit:
break
logger.info(f"Fetched {len(emails)} emails from Outlook")
return emails
except Exception as e:
logger.error(f"OUTLOOK FETCH ERROR: {e}")
import traceback
logger.debug(traceback.format_exc())
return emails
def _parse_message(self, msg: Dict) -> Email:
"""Parse Microsoft Graph message into Email object."""
try:
# Parse sender
sender_email = msg.get('from', {}).get('emailAddress', {}).get('address', '')
# Parse date
date_str = msg.get('receivedDateTime')
date = datetime.fromisoformat(date_str.replace('Z', '+00:00')) if date_str else None
# Parse body
body_content = msg.get('body', {})
body = body_content.get('content', '')
# Parse attachments
has_attachments = msg.get('hasAttachments', False)
attachments = []
if has_attachments:
attachments = self._parse_attachments(msg.get('id'))
return Email(
id=msg.get('id'),
subject=msg.get('subject', 'No Subject'),
sender=sender_email,
date=date,
body=body,
has_attachments=has_attachments,
attachments=attachments,
headers={'message-id': msg.get('id')},
labels=msg.get('categories', []),
is_read=msg.get('isRead', False),
provider='outlook'
)
except Exception as e:
logger.error(f"Error parsing message: {e}")
return None
def _parse_attachments(self, message_id: str) -> List[Attachment]:
"""Fetch and parse attachments for a message."""
attachments = []
try:
url = f"https://graph.microsoft.com/v1.0/me/messages/{message_id}/attachments"
response = self.graph_client.get(url)
if response.status_code == 200:
data = response.json()
for att in data.get('value', []):
attachments.append(Attachment(
filename=att.get('name', 'unknown'),
mime_type=att.get('contentType', 'application/octet-stream'),
size=att.get('size', 0),
attachment_id=att.get('id')
))
except Exception as e:
logger.debug(f"Error fetching attachments: {e}")
return attachments
def update_labels(self, email_id: str, labels: List[str]) -> bool:
"""Update categories for a single email."""
if not self._credentials_configured or not self.graph_client:
logger.error("OUTLOOK NOT CONFIGURED: Cannot update labels")
return False
try:
url = f"https://graph.microsoft.com/v1.0/me/messages/{email_id}"
data = {"categories": labels}
response = self.graph_client.patch(url, json=data)
if response.status_code in [200, 204]:
return True
else:
logger.error(f"Failed to update labels: {response.status_code}")
return False
except Exception as e:
logger.error(f"Error updating labels: {e}")
return False
def batch_update(self, updates: List[Dict[str, Any]]) -> bool:
"""Batch update multiple emails."""
if not self._credentials_configured or not self.graph_client:
logger.error("OUTLOOK NOT CONFIGURED: Cannot batch update")
return False
try:
# Microsoft Graph API supports batch requests
batch_requests = []
for i, update in enumerate(updates):
email_id = update.get('email_id')
labels = update.get('labels', [])
batch_requests.append({
"id": str(i),
"method": "PATCH",
"url": f"/me/messages/{email_id}",
"body": {"categories": labels},
"headers": {"Content-Type": "application/json"}
})
# Send batch request (max 20 per batch)
batch_size = 20
successful = 0
for i in range(0, len(batch_requests), batch_size):
batch = batch_requests[i:i+batch_size]
response = self.graph_client.post(
'https://graph.microsoft.com/v1.0/$batch',
json={"requests": batch}
)
if response.status_code == 200:
result = response.json()
for resp in result.get('responses', []):
if resp.get('status') in [200, 204]:
successful += 1
logger.info(f"Batch updated {successful}/{len(updates)} emails")
return successful > 0
except Exception as e:
logger.error(f"Batch update error: {e}")
import traceback
logger.debug(traceback.format_exc())
return False
def is_connected(self) -> bool:
"""Check if connected."""
return self._credentials_configured and self.graph_client is not None