Add Outlook/Microsoft365 email provider support
New Features: - Created OutlookProvider using Microsoft Graph API - Supports Outlook.com, Office365, and Microsoft 365 accounts - OAuth 2.0 authentication via Microsoft Identity Platform - Device flow authentication for desktop apps - Batch operations support (20 emails per API call) Provider Capabilities: - Fetch emails from any folder (default: inbox) - Update email categories/labels - Batch update multiple emails - Attachment metadata extraction - Search and filter support Integration: - Added outlook to CLI source options - Follows same pattern as Gmail provider - Requires credentials file with client_id - Optional client_secret for confidential apps Dependencies: - msal (Microsoft Authentication Library) - requests Both Gmail and Outlook providers now fully integrated and tested.
This commit is contained in:
parent
1992799b25
commit
81affc58af
@ -12,6 +12,7 @@ from src.email_providers.base import MockProvider
|
||||
from src.email_providers.gmail import GmailProvider
|
||||
from src.email_providers.imap import IMAPProvider
|
||||
from src.email_providers.enron import EnronProvider
|
||||
from src.email_providers.outlook import OutlookProvider
|
||||
from src.classification.feature_extractor import FeatureExtractor
|
||||
from src.classification.ml_classifier import MLClassifier
|
||||
from src.classification.llm_classifier import LLMClassifier
|
||||
@ -27,7 +28,7 @@ def cli():
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.option('--source', type=click.Choice(['gmail', 'imap', 'mock', 'enron']), default='mock',
|
||||
@click.option('--source', type=click.Choice(['gmail', 'outlook', 'imap', 'mock', 'enron']), default='mock',
|
||||
help='Email provider')
|
||||
@click.option('--credentials', type=click.Path(exists=False),
|
||||
help='Path to credentials file')
|
||||
@ -85,6 +86,11 @@ def run(
|
||||
if not credentials:
|
||||
logger.error("Gmail provider requires --credentials")
|
||||
sys.exit(1)
|
||||
elif source == 'outlook':
|
||||
provider = OutlookProvider()
|
||||
if not credentials:
|
||||
logger.error("Outlook provider requires --credentials")
|
||||
sys.exit(1)
|
||||
elif source == 'imap':
|
||||
provider = IMAPProvider()
|
||||
if not credentials:
|
||||
|
||||
358
src/email_providers/outlook.py
Normal file
358
src/email_providers/outlook.py
Normal file
@ -0,0 +1,358 @@
|
||||
"""Microsoft Outlook/Office365 provider implementation using Microsoft Graph API.
|
||||
|
||||
This provider connects to Outlook.com, Office365, and Microsoft 365 accounts
|
||||
using the Microsoft Graph API with OAuth 2.0 authentication.
|
||||
|
||||
Authentication Setup:
|
||||
1. Register app at https://portal.azure.com/#blade/Microsoft_AAD_RegisteredApps
|
||||
2. Add Mail.Read and Mail.ReadWrite permissions
|
||||
3. Get client_id and client_secret
|
||||
4. Configure redirect URI (http://localhost:8080 for development)
|
||||
"""
|
||||
import logging
|
||||
from typing import List, Dict, Optional, Any
|
||||
from datetime import datetime
|
||||
from email.utils import parsedate_to_datetime
|
||||
|
||||
from .base import BaseProvider, Email, Attachment
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class OutlookProvider(BaseProvider):
|
||||
"""
|
||||
Microsoft Outlook/Office365 email provider via Microsoft Graph API.
|
||||
|
||||
Supports:
|
||||
- Outlook.com personal accounts
|
||||
- Office365 business accounts
|
||||
- Microsoft 365 accounts
|
||||
|
||||
Authentication:
|
||||
- OAuth 2.0 with Microsoft Identity Platform
|
||||
- Requires app registration in Azure Portal
|
||||
- Uses delegated permissions (Mail.Read, Mail.ReadWrite)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize Outlook provider."""
|
||||
super().__init__(name="outlook")
|
||||
self.client = None
|
||||
self.user_id = None
|
||||
self._credentials_configured = False
|
||||
|
||||
def connect(self, credentials: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
Connect to Microsoft Graph API using OAuth credentials.
|
||||
|
||||
Args:
|
||||
credentials: Dict containing:
|
||||
- client_id: Azure AD application ID
|
||||
- client_secret: Azure AD application secret (optional for desktop apps)
|
||||
- tenant_id: Azure AD tenant ID (optional, defaults to 'common')
|
||||
- redirect_uri: OAuth redirect URI (default: http://localhost:8080)
|
||||
|
||||
Returns:
|
||||
True if connection successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
client_id = credentials.get('client_id')
|
||||
if not client_id:
|
||||
logger.error(
|
||||
"OUTLOOK OAUTH NOT CONFIGURED: "
|
||||
"client_id required in credentials. "
|
||||
"Register app at: "
|
||||
"https://portal.azure.com/#blade/Microsoft_AAD_RegisteredApps"
|
||||
)
|
||||
return False
|
||||
|
||||
# TRY IMPORT - will fail if msal not installed
|
||||
try:
|
||||
import msal
|
||||
import requests
|
||||
except ImportError as e:
|
||||
logger.error(f"OUTLOOK DEPENDENCIES MISSING: {e}")
|
||||
logger.error("Install with: pip install msal requests")
|
||||
return False
|
||||
|
||||
# TRY CONNECTION - authenticate with Microsoft
|
||||
tenant_id = credentials.get('tenant_id', 'common')
|
||||
client_secret = credentials.get('client_secret')
|
||||
redirect_uri = credentials.get('redirect_uri', 'http://localhost:8080')
|
||||
|
||||
authority = f"https://login.microsoftonline.com/{tenant_id}"
|
||||
scopes = ["https://graph.microsoft.com/Mail.Read",
|
||||
"https://graph.microsoft.com/Mail.ReadWrite"]
|
||||
|
||||
logger.info(f"Attempting Outlook OAuth with client_id: {client_id[:8]}...")
|
||||
|
||||
# Create MSAL app (public client for desktop, confidential for server)
|
||||
if client_secret:
|
||||
app = msal.ConfidentialClientApplication(
|
||||
client_id,
|
||||
authority=authority,
|
||||
client_credential=client_secret
|
||||
)
|
||||
else:
|
||||
app = msal.PublicClientApplication(
|
||||
client_id,
|
||||
authority=authority
|
||||
)
|
||||
|
||||
# Try to get token - interactive flow for desktop apps
|
||||
result = None
|
||||
|
||||
# First try cached token
|
||||
accounts = app.get_accounts()
|
||||
if accounts:
|
||||
result = app.acquire_token_silent(scopes, account=accounts[0])
|
||||
|
||||
# If no cached token, do interactive login
|
||||
if not result:
|
||||
flow = app.initiate_device_flow(scopes=scopes)
|
||||
if "user_code" not in flow:
|
||||
logger.error("Failed to create device flow")
|
||||
return False
|
||||
|
||||
logger.info("\n" + "="*60)
|
||||
logger.info("MICROSOFT AUTHENTICATION REQUIRED")
|
||||
logger.info("="*60)
|
||||
logger.info(flow["message"])
|
||||
logger.info("="*60 + "\n")
|
||||
|
||||
result = app.acquire_token_by_device_flow(flow)
|
||||
|
||||
if "access_token" not in result:
|
||||
logger.error(f"OUTLOOK AUTHENTICATION FAILED: {result.get('error_description', 'Unknown error')}")
|
||||
return False
|
||||
|
||||
# Store access token and create Graph API client
|
||||
self.access_token = result['access_token']
|
||||
self.graph_client = requests.Session()
|
||||
self.graph_client.headers.update({
|
||||
'Authorization': f'Bearer {self.access_token}',
|
||||
'Content-Type': 'application/json'
|
||||
})
|
||||
|
||||
# Get user profile to verify connection
|
||||
response = self.graph_client.get('https://graph.microsoft.com/v1.0/me')
|
||||
if response.status_code == 200:
|
||||
user_info = response.json()
|
||||
self.user_id = user_info.get('id')
|
||||
logger.info(f"Successfully connected to Outlook for: {user_info.get('userPrincipalName')}")
|
||||
self._credentials_configured = True
|
||||
return True
|
||||
else:
|
||||
logger.error(f"Failed to verify Outlook connection: {response.status_code}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"OUTLOOK CONNECTION FAILED: {e}")
|
||||
import traceback
|
||||
logger.debug(traceback.format_exc())
|
||||
return False
|
||||
|
||||
def disconnect(self) -> bool:
|
||||
"""Close Outlook connection."""
|
||||
self.graph_client = None
|
||||
self.access_token = None
|
||||
self.user_id = None
|
||||
self._credentials_configured = False
|
||||
logger.info("Disconnected from Outlook")
|
||||
return True
|
||||
|
||||
def fetch_emails(
|
||||
self,
|
||||
limit: Optional[int] = None,
|
||||
filters: Optional[Dict[str, Any]] = None
|
||||
) -> List[Email]:
|
||||
"""
|
||||
Fetch emails from Outlook via Microsoft Graph API.
|
||||
|
||||
Args:
|
||||
limit: Maximum number of emails to fetch
|
||||
filters: Optional filters (folder, search query, etc.)
|
||||
|
||||
Returns:
|
||||
List of Email objects
|
||||
"""
|
||||
if not self._credentials_configured or not self.graph_client:
|
||||
logger.error("OUTLOOK NOT CONFIGURED: Cannot fetch emails without OAuth setup")
|
||||
return []
|
||||
|
||||
emails = []
|
||||
try:
|
||||
# Build Graph API query
|
||||
folder = filters.get('folder', 'inbox') if filters else 'inbox'
|
||||
search_query = filters.get('query', '') if filters else ''
|
||||
|
||||
# Construct Graph API URL
|
||||
url = f"https://graph.microsoft.com/v1.0/me/mailFolders/{folder}/messages"
|
||||
params = {
|
||||
'$top': min(limit or 500, 1000) if limit else 500,
|
||||
'$orderby': 'receivedDateTime DESC'
|
||||
}
|
||||
|
||||
if search_query:
|
||||
params['$search'] = f'"{search_query}"'
|
||||
|
||||
# Fetch messages
|
||||
response = self.graph_client.get(url, params=params)
|
||||
|
||||
if response.status_code != 200:
|
||||
logger.error(f"Failed to fetch emails: {response.status_code} - {response.text}")
|
||||
return []
|
||||
|
||||
data = response.json()
|
||||
messages = data.get('value', [])
|
||||
|
||||
for msg in messages:
|
||||
email = self._parse_message(msg)
|
||||
if email:
|
||||
emails.append(email)
|
||||
if limit and len(emails) >= limit:
|
||||
break
|
||||
|
||||
logger.info(f"Fetched {len(emails)} emails from Outlook")
|
||||
return emails
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"OUTLOOK FETCH ERROR: {e}")
|
||||
import traceback
|
||||
logger.debug(traceback.format_exc())
|
||||
return emails
|
||||
|
||||
def _parse_message(self, msg: Dict) -> Email:
|
||||
"""Parse Microsoft Graph message into Email object."""
|
||||
try:
|
||||
# Parse sender
|
||||
sender_email = msg.get('from', {}).get('emailAddress', {}).get('address', '')
|
||||
|
||||
# Parse date
|
||||
date_str = msg.get('receivedDateTime')
|
||||
date = datetime.fromisoformat(date_str.replace('Z', '+00:00')) if date_str else None
|
||||
|
||||
# Parse body
|
||||
body_content = msg.get('body', {})
|
||||
body = body_content.get('content', '')
|
||||
|
||||
# Parse attachments
|
||||
has_attachments = msg.get('hasAttachments', False)
|
||||
attachments = []
|
||||
if has_attachments:
|
||||
attachments = self._parse_attachments(msg.get('id'))
|
||||
|
||||
return Email(
|
||||
id=msg.get('id'),
|
||||
subject=msg.get('subject', 'No Subject'),
|
||||
sender=sender_email,
|
||||
date=date,
|
||||
body=body,
|
||||
has_attachments=has_attachments,
|
||||
attachments=attachments,
|
||||
headers={'message-id': msg.get('id')},
|
||||
labels=msg.get('categories', []),
|
||||
is_read=msg.get('isRead', False),
|
||||
provider='outlook'
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing message: {e}")
|
||||
return None
|
||||
|
||||
def _parse_attachments(self, message_id: str) -> List[Attachment]:
|
||||
"""Fetch and parse attachments for a message."""
|
||||
attachments = []
|
||||
|
||||
try:
|
||||
url = f"https://graph.microsoft.com/v1.0/me/messages/{message_id}/attachments"
|
||||
response = self.graph_client.get(url)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
for att in data.get('value', []):
|
||||
attachments.append(Attachment(
|
||||
filename=att.get('name', 'unknown'),
|
||||
mime_type=att.get('contentType', 'application/octet-stream'),
|
||||
size=att.get('size', 0),
|
||||
attachment_id=att.get('id')
|
||||
))
|
||||
except Exception as e:
|
||||
logger.debug(f"Error fetching attachments: {e}")
|
||||
|
||||
return attachments
|
||||
|
||||
def update_labels(self, email_id: str, labels: List[str]) -> bool:
|
||||
"""Update categories for a single email."""
|
||||
if not self._credentials_configured or not self.graph_client:
|
||||
logger.error("OUTLOOK NOT CONFIGURED: Cannot update labels")
|
||||
return False
|
||||
|
||||
try:
|
||||
url = f"https://graph.microsoft.com/v1.0/me/messages/{email_id}"
|
||||
data = {"categories": labels}
|
||||
|
||||
response = self.graph_client.patch(url, json=data)
|
||||
|
||||
if response.status_code in [200, 204]:
|
||||
return True
|
||||
else:
|
||||
logger.error(f"Failed to update labels: {response.status_code}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating labels: {e}")
|
||||
return False
|
||||
|
||||
def batch_update(self, updates: List[Dict[str, Any]]) -> bool:
|
||||
"""Batch update multiple emails."""
|
||||
if not self._credentials_configured or not self.graph_client:
|
||||
logger.error("OUTLOOK NOT CONFIGURED: Cannot batch update")
|
||||
return False
|
||||
|
||||
try:
|
||||
# Microsoft Graph API supports batch requests
|
||||
batch_requests = []
|
||||
|
||||
for i, update in enumerate(updates):
|
||||
email_id = update.get('email_id')
|
||||
labels = update.get('labels', [])
|
||||
|
||||
batch_requests.append({
|
||||
"id": str(i),
|
||||
"method": "PATCH",
|
||||
"url": f"/me/messages/{email_id}",
|
||||
"body": {"categories": labels},
|
||||
"headers": {"Content-Type": "application/json"}
|
||||
})
|
||||
|
||||
# Send batch request (max 20 per batch)
|
||||
batch_size = 20
|
||||
successful = 0
|
||||
|
||||
for i in range(0, len(batch_requests), batch_size):
|
||||
batch = batch_requests[i:i+batch_size]
|
||||
|
||||
response = self.graph_client.post(
|
||||
'https://graph.microsoft.com/v1.0/$batch',
|
||||
json={"requests": batch}
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
for resp in result.get('responses', []):
|
||||
if resp.get('status') in [200, 204]:
|
||||
successful += 1
|
||||
|
||||
logger.info(f"Batch updated {successful}/{len(updates)} emails")
|
||||
return successful > 0
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Batch update error: {e}")
|
||||
import traceback
|
||||
logger.debug(traceback.format_exc())
|
||||
return False
|
||||
|
||||
def is_connected(self) -> bool:
|
||||
"""Check if connected."""
|
||||
return self._credentials_configured and self.graph_client is not None
|
||||
Loading…
x
Reference in New Issue
Block a user