#!/usr/bin/env python3
"""
Generate interactive HTML report from email classification results.
Usage:
python tools/generate_html_report.py --input results.json --output report.html
"""
import argparse
import json
from pathlib import Path
from datetime import datetime
from collections import Counter, defaultdict
from html import escape
def load_results(input_path: str) -> dict:
"""Load classification results from JSON."""
with open(input_path) as f:
return json.load(f)
def extract_domain(sender: str) -> str:
"""Extract domain from email address."""
if not sender:
return "unknown"
if "@" in sender:
return sender.split("@")[-1].lower()
return sender.lower()
def format_date(date_str: str) -> str:
"""Format ISO date string for display."""
if not date_str:
return "N/A"
try:
dt = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
return dt.strftime("%Y-%m-%d %H:%M")
except:
return date_str[:16] if len(date_str) > 16 else date_str
def truncate(text: str, max_len: int = 60) -> str:
"""Truncate text with ellipsis."""
if not text:
return ""
if len(text) <= max_len:
return text
return text[:max_len-3] + "..."
def generate_html_report(results: dict, output_path: str):
"""Generate interactive HTML report."""
metadata = results.get("metadata", {})
classifications = results.get("classifications", [])
# Calculate statistics
total = len(classifications)
categories = Counter(c["category"] for c in classifications)
methods = Counter(c["method"] for c in classifications)
# Group by category
by_category = defaultdict(list)
for c in classifications:
by_category[c["category"]].append(c)
# Sort categories by count
sorted_categories = sorted(categories.keys(), key=lambda x: categories[x], reverse=True)
# Sender statistics
sender_domains = Counter(extract_domain(c.get("sender", "")) for c in classifications)
top_senders = Counter(c.get("sender", "unknown") for c in classifications).most_common(20)
# Confidence distribution
high_conf = sum(1 for c in classifications if c.get("confidence", 0) >= 0.7)
med_conf = sum(1 for c in classifications if 0.5 <= c.get("confidence", 0) < 0.7)
low_conf = sum(1 for c in classifications if c.get("confidence", 0) < 0.5)
# Generate HTML
html = f'''