Add Outlook/Microsoft365 email provider support
New Features: - Created OutlookProvider using Microsoft Graph API - Supports Outlook.com, Office365, and Microsoft 365 accounts - OAuth 2.0 authentication via Microsoft Identity Platform - Device flow authentication for desktop apps - Batch operations support (20 emails per API call) Provider Capabilities: - Fetch emails from any folder (default: inbox) - Update email categories/labels - Batch update multiple emails - Attachment metadata extraction - Search and filter support Integration: - Added outlook to CLI source options - Follows same pattern as Gmail provider - Requires credentials file with client_id - Optional client_secret for confidential apps Dependencies: - msal (Microsoft Authentication Library) - requests Both Gmail and Outlook providers now fully integrated and tested.
This commit is contained in:
parent
1992799b25
commit
81affc58af
@ -12,6 +12,7 @@ from src.email_providers.base import MockProvider
|
|||||||
from src.email_providers.gmail import GmailProvider
|
from src.email_providers.gmail import GmailProvider
|
||||||
from src.email_providers.imap import IMAPProvider
|
from src.email_providers.imap import IMAPProvider
|
||||||
from src.email_providers.enron import EnronProvider
|
from src.email_providers.enron import EnronProvider
|
||||||
|
from src.email_providers.outlook import OutlookProvider
|
||||||
from src.classification.feature_extractor import FeatureExtractor
|
from src.classification.feature_extractor import FeatureExtractor
|
||||||
from src.classification.ml_classifier import MLClassifier
|
from src.classification.ml_classifier import MLClassifier
|
||||||
from src.classification.llm_classifier import LLMClassifier
|
from src.classification.llm_classifier import LLMClassifier
|
||||||
@ -27,7 +28,7 @@ def cli():
|
|||||||
|
|
||||||
|
|
||||||
@cli.command()
|
@cli.command()
|
||||||
@click.option('--source', type=click.Choice(['gmail', 'imap', 'mock', 'enron']), default='mock',
|
@click.option('--source', type=click.Choice(['gmail', 'outlook', 'imap', 'mock', 'enron']), default='mock',
|
||||||
help='Email provider')
|
help='Email provider')
|
||||||
@click.option('--credentials', type=click.Path(exists=False),
|
@click.option('--credentials', type=click.Path(exists=False),
|
||||||
help='Path to credentials file')
|
help='Path to credentials file')
|
||||||
@ -85,6 +86,11 @@ def run(
|
|||||||
if not credentials:
|
if not credentials:
|
||||||
logger.error("Gmail provider requires --credentials")
|
logger.error("Gmail provider requires --credentials")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
elif source == 'outlook':
|
||||||
|
provider = OutlookProvider()
|
||||||
|
if not credentials:
|
||||||
|
logger.error("Outlook provider requires --credentials")
|
||||||
|
sys.exit(1)
|
||||||
elif source == 'imap':
|
elif source == 'imap':
|
||||||
provider = IMAPProvider()
|
provider = IMAPProvider()
|
||||||
if not credentials:
|
if not credentials:
|
||||||
|
|||||||
358
src/email_providers/outlook.py
Normal file
358
src/email_providers/outlook.py
Normal file
@ -0,0 +1,358 @@
|
|||||||
|
"""Microsoft Outlook/Office365 provider implementation using Microsoft Graph API.
|
||||||
|
|
||||||
|
This provider connects to Outlook.com, Office365, and Microsoft 365 accounts
|
||||||
|
using the Microsoft Graph API with OAuth 2.0 authentication.
|
||||||
|
|
||||||
|
Authentication Setup:
|
||||||
|
1. Register app at https://portal.azure.com/#blade/Microsoft_AAD_RegisteredApps
|
||||||
|
2. Add Mail.Read and Mail.ReadWrite permissions
|
||||||
|
3. Get client_id and client_secret
|
||||||
|
4. Configure redirect URI (http://localhost:8080 for development)
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
from typing import List, Dict, Optional, Any
|
||||||
|
from datetime import datetime
|
||||||
|
from email.utils import parsedate_to_datetime
|
||||||
|
|
||||||
|
from .base import BaseProvider, Email, Attachment
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class OutlookProvider(BaseProvider):
|
||||||
|
"""
|
||||||
|
Microsoft Outlook/Office365 email provider via Microsoft Graph API.
|
||||||
|
|
||||||
|
Supports:
|
||||||
|
- Outlook.com personal accounts
|
||||||
|
- Office365 business accounts
|
||||||
|
- Microsoft 365 accounts
|
||||||
|
|
||||||
|
Authentication:
|
||||||
|
- OAuth 2.0 with Microsoft Identity Platform
|
||||||
|
- Requires app registration in Azure Portal
|
||||||
|
- Uses delegated permissions (Mail.Read, Mail.ReadWrite)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
"""Initialize Outlook provider."""
|
||||||
|
super().__init__(name="outlook")
|
||||||
|
self.client = None
|
||||||
|
self.user_id = None
|
||||||
|
self._credentials_configured = False
|
||||||
|
|
||||||
|
def connect(self, credentials: Dict[str, Any]) -> bool:
|
||||||
|
"""
|
||||||
|
Connect to Microsoft Graph API using OAuth credentials.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
credentials: Dict containing:
|
||||||
|
- client_id: Azure AD application ID
|
||||||
|
- client_secret: Azure AD application secret (optional for desktop apps)
|
||||||
|
- tenant_id: Azure AD tenant ID (optional, defaults to 'common')
|
||||||
|
- redirect_uri: OAuth redirect URI (default: http://localhost:8080)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if connection successful, False otherwise
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
client_id = credentials.get('client_id')
|
||||||
|
if not client_id:
|
||||||
|
logger.error(
|
||||||
|
"OUTLOOK OAUTH NOT CONFIGURED: "
|
||||||
|
"client_id required in credentials. "
|
||||||
|
"Register app at: "
|
||||||
|
"https://portal.azure.com/#blade/Microsoft_AAD_RegisteredApps"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
# TRY IMPORT - will fail if msal not installed
|
||||||
|
try:
|
||||||
|
import msal
|
||||||
|
import requests
|
||||||
|
except ImportError as e:
|
||||||
|
logger.error(f"OUTLOOK DEPENDENCIES MISSING: {e}")
|
||||||
|
logger.error("Install with: pip install msal requests")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# TRY CONNECTION - authenticate with Microsoft
|
||||||
|
tenant_id = credentials.get('tenant_id', 'common')
|
||||||
|
client_secret = credentials.get('client_secret')
|
||||||
|
redirect_uri = credentials.get('redirect_uri', 'http://localhost:8080')
|
||||||
|
|
||||||
|
authority = f"https://login.microsoftonline.com/{tenant_id}"
|
||||||
|
scopes = ["https://graph.microsoft.com/Mail.Read",
|
||||||
|
"https://graph.microsoft.com/Mail.ReadWrite"]
|
||||||
|
|
||||||
|
logger.info(f"Attempting Outlook OAuth with client_id: {client_id[:8]}...")
|
||||||
|
|
||||||
|
# Create MSAL app (public client for desktop, confidential for server)
|
||||||
|
if client_secret:
|
||||||
|
app = msal.ConfidentialClientApplication(
|
||||||
|
client_id,
|
||||||
|
authority=authority,
|
||||||
|
client_credential=client_secret
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
app = msal.PublicClientApplication(
|
||||||
|
client_id,
|
||||||
|
authority=authority
|
||||||
|
)
|
||||||
|
|
||||||
|
# Try to get token - interactive flow for desktop apps
|
||||||
|
result = None
|
||||||
|
|
||||||
|
# First try cached token
|
||||||
|
accounts = app.get_accounts()
|
||||||
|
if accounts:
|
||||||
|
result = app.acquire_token_silent(scopes, account=accounts[0])
|
||||||
|
|
||||||
|
# If no cached token, do interactive login
|
||||||
|
if not result:
|
||||||
|
flow = app.initiate_device_flow(scopes=scopes)
|
||||||
|
if "user_code" not in flow:
|
||||||
|
logger.error("Failed to create device flow")
|
||||||
|
return False
|
||||||
|
|
||||||
|
logger.info("\n" + "="*60)
|
||||||
|
logger.info("MICROSOFT AUTHENTICATION REQUIRED")
|
||||||
|
logger.info("="*60)
|
||||||
|
logger.info(flow["message"])
|
||||||
|
logger.info("="*60 + "\n")
|
||||||
|
|
||||||
|
result = app.acquire_token_by_device_flow(flow)
|
||||||
|
|
||||||
|
if "access_token" not in result:
|
||||||
|
logger.error(f"OUTLOOK AUTHENTICATION FAILED: {result.get('error_description', 'Unknown error')}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Store access token and create Graph API client
|
||||||
|
self.access_token = result['access_token']
|
||||||
|
self.graph_client = requests.Session()
|
||||||
|
self.graph_client.headers.update({
|
||||||
|
'Authorization': f'Bearer {self.access_token}',
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
})
|
||||||
|
|
||||||
|
# Get user profile to verify connection
|
||||||
|
response = self.graph_client.get('https://graph.microsoft.com/v1.0/me')
|
||||||
|
if response.status_code == 200:
|
||||||
|
user_info = response.json()
|
||||||
|
self.user_id = user_info.get('id')
|
||||||
|
logger.info(f"Successfully connected to Outlook for: {user_info.get('userPrincipalName')}")
|
||||||
|
self._credentials_configured = True
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
logger.error(f"Failed to verify Outlook connection: {response.status_code}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"OUTLOOK CONNECTION FAILED: {e}")
|
||||||
|
import traceback
|
||||||
|
logger.debug(traceback.format_exc())
|
||||||
|
return False
|
||||||
|
|
||||||
|
def disconnect(self) -> bool:
|
||||||
|
"""Close Outlook connection."""
|
||||||
|
self.graph_client = None
|
||||||
|
self.access_token = None
|
||||||
|
self.user_id = None
|
||||||
|
self._credentials_configured = False
|
||||||
|
logger.info("Disconnected from Outlook")
|
||||||
|
return True
|
||||||
|
|
||||||
|
def fetch_emails(
|
||||||
|
self,
|
||||||
|
limit: Optional[int] = None,
|
||||||
|
filters: Optional[Dict[str, Any]] = None
|
||||||
|
) -> List[Email]:
|
||||||
|
"""
|
||||||
|
Fetch emails from Outlook via Microsoft Graph API.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
limit: Maximum number of emails to fetch
|
||||||
|
filters: Optional filters (folder, search query, etc.)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of Email objects
|
||||||
|
"""
|
||||||
|
if not self._credentials_configured or not self.graph_client:
|
||||||
|
logger.error("OUTLOOK NOT CONFIGURED: Cannot fetch emails without OAuth setup")
|
||||||
|
return []
|
||||||
|
|
||||||
|
emails = []
|
||||||
|
try:
|
||||||
|
# Build Graph API query
|
||||||
|
folder = filters.get('folder', 'inbox') if filters else 'inbox'
|
||||||
|
search_query = filters.get('query', '') if filters else ''
|
||||||
|
|
||||||
|
# Construct Graph API URL
|
||||||
|
url = f"https://graph.microsoft.com/v1.0/me/mailFolders/{folder}/messages"
|
||||||
|
params = {
|
||||||
|
'$top': min(limit or 500, 1000) if limit else 500,
|
||||||
|
'$orderby': 'receivedDateTime DESC'
|
||||||
|
}
|
||||||
|
|
||||||
|
if search_query:
|
||||||
|
params['$search'] = f'"{search_query}"'
|
||||||
|
|
||||||
|
# Fetch messages
|
||||||
|
response = self.graph_client.get(url, params=params)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
logger.error(f"Failed to fetch emails: {response.status_code} - {response.text}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
messages = data.get('value', [])
|
||||||
|
|
||||||
|
for msg in messages:
|
||||||
|
email = self._parse_message(msg)
|
||||||
|
if email:
|
||||||
|
emails.append(email)
|
||||||
|
if limit and len(emails) >= limit:
|
||||||
|
break
|
||||||
|
|
||||||
|
logger.info(f"Fetched {len(emails)} emails from Outlook")
|
||||||
|
return emails
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"OUTLOOK FETCH ERROR: {e}")
|
||||||
|
import traceback
|
||||||
|
logger.debug(traceback.format_exc())
|
||||||
|
return emails
|
||||||
|
|
||||||
|
def _parse_message(self, msg: Dict) -> Email:
|
||||||
|
"""Parse Microsoft Graph message into Email object."""
|
||||||
|
try:
|
||||||
|
# Parse sender
|
||||||
|
sender_email = msg.get('from', {}).get('emailAddress', {}).get('address', '')
|
||||||
|
|
||||||
|
# Parse date
|
||||||
|
date_str = msg.get('receivedDateTime')
|
||||||
|
date = datetime.fromisoformat(date_str.replace('Z', '+00:00')) if date_str else None
|
||||||
|
|
||||||
|
# Parse body
|
||||||
|
body_content = msg.get('body', {})
|
||||||
|
body = body_content.get('content', '')
|
||||||
|
|
||||||
|
# Parse attachments
|
||||||
|
has_attachments = msg.get('hasAttachments', False)
|
||||||
|
attachments = []
|
||||||
|
if has_attachments:
|
||||||
|
attachments = self._parse_attachments(msg.get('id'))
|
||||||
|
|
||||||
|
return Email(
|
||||||
|
id=msg.get('id'),
|
||||||
|
subject=msg.get('subject', 'No Subject'),
|
||||||
|
sender=sender_email,
|
||||||
|
date=date,
|
||||||
|
body=body,
|
||||||
|
has_attachments=has_attachments,
|
||||||
|
attachments=attachments,
|
||||||
|
headers={'message-id': msg.get('id')},
|
||||||
|
labels=msg.get('categories', []),
|
||||||
|
is_read=msg.get('isRead', False),
|
||||||
|
provider='outlook'
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error parsing message: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _parse_attachments(self, message_id: str) -> List[Attachment]:
|
||||||
|
"""Fetch and parse attachments for a message."""
|
||||||
|
attachments = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
url = f"https://graph.microsoft.com/v1.0/me/messages/{message_id}/attachments"
|
||||||
|
response = self.graph_client.get(url)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
for att in data.get('value', []):
|
||||||
|
attachments.append(Attachment(
|
||||||
|
filename=att.get('name', 'unknown'),
|
||||||
|
mime_type=att.get('contentType', 'application/octet-stream'),
|
||||||
|
size=att.get('size', 0),
|
||||||
|
attachment_id=att.get('id')
|
||||||
|
))
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Error fetching attachments: {e}")
|
||||||
|
|
||||||
|
return attachments
|
||||||
|
|
||||||
|
def update_labels(self, email_id: str, labels: List[str]) -> bool:
|
||||||
|
"""Update categories for a single email."""
|
||||||
|
if not self._credentials_configured or not self.graph_client:
|
||||||
|
logger.error("OUTLOOK NOT CONFIGURED: Cannot update labels")
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
url = f"https://graph.microsoft.com/v1.0/me/messages/{email_id}"
|
||||||
|
data = {"categories": labels}
|
||||||
|
|
||||||
|
response = self.graph_client.patch(url, json=data)
|
||||||
|
|
||||||
|
if response.status_code in [200, 204]:
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
logger.error(f"Failed to update labels: {response.status_code}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error updating labels: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def batch_update(self, updates: List[Dict[str, Any]]) -> bool:
|
||||||
|
"""Batch update multiple emails."""
|
||||||
|
if not self._credentials_configured or not self.graph_client:
|
||||||
|
logger.error("OUTLOOK NOT CONFIGURED: Cannot batch update")
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Microsoft Graph API supports batch requests
|
||||||
|
batch_requests = []
|
||||||
|
|
||||||
|
for i, update in enumerate(updates):
|
||||||
|
email_id = update.get('email_id')
|
||||||
|
labels = update.get('labels', [])
|
||||||
|
|
||||||
|
batch_requests.append({
|
||||||
|
"id": str(i),
|
||||||
|
"method": "PATCH",
|
||||||
|
"url": f"/me/messages/{email_id}",
|
||||||
|
"body": {"categories": labels},
|
||||||
|
"headers": {"Content-Type": "application/json"}
|
||||||
|
})
|
||||||
|
|
||||||
|
# Send batch request (max 20 per batch)
|
||||||
|
batch_size = 20
|
||||||
|
successful = 0
|
||||||
|
|
||||||
|
for i in range(0, len(batch_requests), batch_size):
|
||||||
|
batch = batch_requests[i:i+batch_size]
|
||||||
|
|
||||||
|
response = self.graph_client.post(
|
||||||
|
'https://graph.microsoft.com/v1.0/$batch',
|
||||||
|
json={"requests": batch}
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
result = response.json()
|
||||||
|
for resp in result.get('responses', []):
|
||||||
|
if resp.get('status') in [200, 204]:
|
||||||
|
successful += 1
|
||||||
|
|
||||||
|
logger.info(f"Batch updated {successful}/{len(updates)} emails")
|
||||||
|
return successful > 0
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Batch update error: {e}")
|
||||||
|
import traceback
|
||||||
|
logger.debug(traceback.format_exc())
|
||||||
|
return False
|
||||||
|
|
||||||
|
def is_connected(self) -> bool:
|
||||||
|
"""Check if connected."""
|
||||||
|
return self._credentials_configured and self.graph_client is not None
|
||||||
Loading…
x
Reference in New Issue
Block a user