- Rewrote CLAUDE.md with comprehensive development guide - Archived 20 old docs to docs/archive/ - Added PROJECT_ROADMAP_2025.md with research learnings - Added CLASSIFICATION_METHODS_COMPARISON.md - Added SESSION_HANDOVER_20251128.md - Added tools for analysis (brett_gmail/microsoft analyzers) - Updated .gitignore for archive folders - Config changes for local vLLM endpoint
643 lines
20 KiB
Python
643 lines
20 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Generate interactive HTML report from email classification results.
|
|
|
|
Usage:
|
|
python tools/generate_html_report.py --input results.json --output report.html
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from collections import Counter, defaultdict
|
|
from html import escape
|
|
|
|
|
|
def load_results(input_path: str) -> dict:
|
|
"""Load classification results from JSON."""
|
|
with open(input_path) as f:
|
|
return json.load(f)
|
|
|
|
|
|
def extract_domain(sender: str) -> str:
|
|
"""Extract domain from email address."""
|
|
if not sender:
|
|
return "unknown"
|
|
if "@" in sender:
|
|
return sender.split("@")[-1].lower()
|
|
return sender.lower()
|
|
|
|
|
|
def format_date(date_str: str) -> str:
|
|
"""Format ISO date string for display."""
|
|
if not date_str:
|
|
return "N/A"
|
|
try:
|
|
dt = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
|
|
return dt.strftime("%Y-%m-%d %H:%M")
|
|
except:
|
|
return date_str[:16] if len(date_str) > 16 else date_str
|
|
|
|
|
|
def truncate(text: str, max_len: int = 60) -> str:
|
|
"""Truncate text with ellipsis."""
|
|
if not text:
|
|
return ""
|
|
if len(text) <= max_len:
|
|
return text
|
|
return text[:max_len-3] + "..."
|
|
|
|
|
|
def generate_html_report(results: dict, output_path: str):
|
|
"""Generate interactive HTML report."""
|
|
|
|
metadata = results.get("metadata", {})
|
|
classifications = results.get("classifications", [])
|
|
|
|
# Calculate statistics
|
|
total = len(classifications)
|
|
categories = Counter(c["category"] for c in classifications)
|
|
methods = Counter(c["method"] for c in classifications)
|
|
|
|
# Group by category
|
|
by_category = defaultdict(list)
|
|
for c in classifications:
|
|
by_category[c["category"]].append(c)
|
|
|
|
# Sort categories by count
|
|
sorted_categories = sorted(categories.keys(), key=lambda x: categories[x], reverse=True)
|
|
|
|
# Sender statistics
|
|
sender_domains = Counter(extract_domain(c.get("sender", "")) for c in classifications)
|
|
top_senders = Counter(c.get("sender", "unknown") for c in classifications).most_common(20)
|
|
|
|
# Confidence distribution
|
|
high_conf = sum(1 for c in classifications if c.get("confidence", 0) >= 0.7)
|
|
med_conf = sum(1 for c in classifications if 0.5 <= c.get("confidence", 0) < 0.7)
|
|
low_conf = sum(1 for c in classifications if c.get("confidence", 0) < 0.5)
|
|
|
|
# Generate HTML
|
|
html = f'''<!DOCTYPE html>
|
|
<html lang="en">
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
<title>Email Classification Report</title>
|
|
<style>
|
|
:root {{
|
|
--bg-primary: #1a1a2e;
|
|
--bg-secondary: #16213e;
|
|
--bg-card: #0f3460;
|
|
--text-primary: #eee;
|
|
--text-secondary: #aaa;
|
|
--accent: #e94560;
|
|
--accent-hover: #ff6b6b;
|
|
--success: #00d9a5;
|
|
--warning: #ffc107;
|
|
--border: #2a2a4a;
|
|
}}
|
|
|
|
* {{
|
|
margin: 0;
|
|
padding: 0;
|
|
box-sizing: border-box;
|
|
}}
|
|
|
|
body {{
|
|
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif;
|
|
background: var(--bg-primary);
|
|
color: var(--text-primary);
|
|
line-height: 1.6;
|
|
}}
|
|
|
|
.container {{
|
|
max-width: 1400px;
|
|
margin: 0 auto;
|
|
padding: 20px;
|
|
}}
|
|
|
|
header {{
|
|
background: var(--bg-secondary);
|
|
padding: 30px;
|
|
border-radius: 12px;
|
|
margin-bottom: 30px;
|
|
border: 1px solid var(--border);
|
|
}}
|
|
|
|
header h1 {{
|
|
font-size: 2rem;
|
|
margin-bottom: 10px;
|
|
color: var(--accent);
|
|
}}
|
|
|
|
.meta-info {{
|
|
display: flex;
|
|
flex-wrap: wrap;
|
|
gap: 20px;
|
|
margin-top: 15px;
|
|
color: var(--text-secondary);
|
|
font-size: 0.9rem;
|
|
}}
|
|
|
|
.meta-info span {{
|
|
background: var(--bg-card);
|
|
padding: 5px 12px;
|
|
border-radius: 20px;
|
|
}}
|
|
|
|
.stats-grid {{
|
|
display: grid;
|
|
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
|
gap: 20px;
|
|
margin-bottom: 30px;
|
|
}}
|
|
|
|
.stat-card {{
|
|
background: var(--bg-secondary);
|
|
padding: 20px;
|
|
border-radius: 12px;
|
|
border: 1px solid var(--border);
|
|
text-align: center;
|
|
}}
|
|
|
|
.stat-card .value {{
|
|
font-size: 2.5rem;
|
|
font-weight: bold;
|
|
color: var(--accent);
|
|
}}
|
|
|
|
.stat-card .label {{
|
|
color: var(--text-secondary);
|
|
font-size: 0.9rem;
|
|
margin-top: 5px;
|
|
}}
|
|
|
|
.tabs {{
|
|
display: flex;
|
|
flex-wrap: wrap;
|
|
gap: 10px;
|
|
margin-bottom: 20px;
|
|
border-bottom: 2px solid var(--border);
|
|
padding-bottom: 10px;
|
|
}}
|
|
|
|
.tab {{
|
|
padding: 10px 20px;
|
|
background: var(--bg-secondary);
|
|
border: 1px solid var(--border);
|
|
border-radius: 8px 8px 0 0;
|
|
cursor: pointer;
|
|
transition: all 0.2s;
|
|
color: var(--text-secondary);
|
|
}}
|
|
|
|
.tab:hover {{
|
|
background: var(--bg-card);
|
|
color: var(--text-primary);
|
|
}}
|
|
|
|
.tab.active {{
|
|
background: var(--accent);
|
|
color: white;
|
|
border-color: var(--accent);
|
|
}}
|
|
|
|
.tab .count {{
|
|
background: rgba(255,255,255,0.2);
|
|
padding: 2px 8px;
|
|
border-radius: 10px;
|
|
font-size: 0.8rem;
|
|
margin-left: 8px;
|
|
}}
|
|
|
|
.tab-content {{
|
|
display: none;
|
|
}}
|
|
|
|
.tab-content.active {{
|
|
display: block;
|
|
}}
|
|
|
|
.email-table {{
|
|
width: 100%;
|
|
border-collapse: collapse;
|
|
background: var(--bg-secondary);
|
|
border-radius: 12px;
|
|
overflow: hidden;
|
|
}}
|
|
|
|
.email-table th {{
|
|
background: var(--bg-card);
|
|
padding: 15px;
|
|
text-align: left;
|
|
font-weight: 600;
|
|
color: var(--text-primary);
|
|
position: sticky;
|
|
top: 0;
|
|
}}
|
|
|
|
.email-table td {{
|
|
padding: 12px 15px;
|
|
border-bottom: 1px solid var(--border);
|
|
color: var(--text-secondary);
|
|
}}
|
|
|
|
.email-table tr:hover td {{
|
|
background: var(--bg-card);
|
|
color: var(--text-primary);
|
|
}}
|
|
|
|
.email-table .subject {{
|
|
max-width: 400px;
|
|
color: var(--text-primary);
|
|
}}
|
|
|
|
.email-table .sender {{
|
|
max-width: 250px;
|
|
}}
|
|
|
|
.confidence {{
|
|
display: inline-block;
|
|
padding: 3px 10px;
|
|
border-radius: 12px;
|
|
font-size: 0.85rem;
|
|
font-weight: 500;
|
|
}}
|
|
|
|
.confidence.high {{
|
|
background: rgba(0, 217, 165, 0.2);
|
|
color: var(--success);
|
|
}}
|
|
|
|
.confidence.medium {{
|
|
background: rgba(255, 193, 7, 0.2);
|
|
color: var(--warning);
|
|
}}
|
|
|
|
.confidence.low {{
|
|
background: rgba(233, 69, 96, 0.2);
|
|
color: var(--accent);
|
|
}}
|
|
|
|
.method-badge {{
|
|
display: inline-block;
|
|
padding: 3px 8px;
|
|
border-radius: 4px;
|
|
font-size: 0.75rem;
|
|
text-transform: uppercase;
|
|
}}
|
|
|
|
.method-ml {{
|
|
background: rgba(0, 217, 165, 0.2);
|
|
color: var(--success);
|
|
}}
|
|
|
|
.method-rule {{
|
|
background: rgba(100, 149, 237, 0.2);
|
|
color: cornflowerblue;
|
|
}}
|
|
|
|
.method-llm {{
|
|
background: rgba(255, 193, 7, 0.2);
|
|
color: var(--warning);
|
|
}}
|
|
|
|
.section {{
|
|
background: var(--bg-secondary);
|
|
padding: 25px;
|
|
border-radius: 12px;
|
|
margin-bottom: 30px;
|
|
border: 1px solid var(--border);
|
|
}}
|
|
|
|
.section h2 {{
|
|
margin-bottom: 20px;
|
|
color: var(--accent);
|
|
font-size: 1.3rem;
|
|
}}
|
|
|
|
.chart-bar {{
|
|
display: flex;
|
|
align-items: center;
|
|
margin-bottom: 10px;
|
|
}}
|
|
|
|
.chart-bar .label {{
|
|
width: 150px;
|
|
font-size: 0.9rem;
|
|
color: var(--text-secondary);
|
|
}}
|
|
|
|
.chart-bar .bar-container {{
|
|
flex: 1;
|
|
height: 24px;
|
|
background: var(--bg-card);
|
|
border-radius: 4px;
|
|
overflow: hidden;
|
|
margin: 0 15px;
|
|
}}
|
|
|
|
.chart-bar .bar {{
|
|
height: 100%;
|
|
background: linear-gradient(90deg, var(--accent), var(--accent-hover));
|
|
transition: width 0.5s ease;
|
|
}}
|
|
|
|
.chart-bar .value {{
|
|
width: 80px;
|
|
text-align: right;
|
|
font-size: 0.9rem;
|
|
}}
|
|
|
|
.sender-list {{
|
|
display: grid;
|
|
grid-template-columns: repeat(auto-fill, minmax(300px, 1fr));
|
|
gap: 10px;
|
|
}}
|
|
|
|
.sender-item {{
|
|
display: flex;
|
|
justify-content: space-between;
|
|
padding: 10px 15px;
|
|
background: var(--bg-card);
|
|
border-radius: 8px;
|
|
font-size: 0.9rem;
|
|
}}
|
|
|
|
.sender-item .email {{
|
|
color: var(--text-secondary);
|
|
overflow: hidden;
|
|
text-overflow: ellipsis;
|
|
white-space: nowrap;
|
|
max-width: 220px;
|
|
}}
|
|
|
|
.sender-item .count {{
|
|
color: var(--accent);
|
|
font-weight: bold;
|
|
}}
|
|
|
|
.search-box {{
|
|
width: 100%;
|
|
padding: 12px 20px;
|
|
background: var(--bg-card);
|
|
border: 1px solid var(--border);
|
|
border-radius: 8px;
|
|
color: var(--text-primary);
|
|
font-size: 1rem;
|
|
margin-bottom: 20px;
|
|
}}
|
|
|
|
.search-box:focus {{
|
|
outline: none;
|
|
border-color: var(--accent);
|
|
}}
|
|
|
|
.table-container {{
|
|
max-height: 600px;
|
|
overflow-y: auto;
|
|
border-radius: 12px;
|
|
}}
|
|
|
|
.attachment-icon {{
|
|
color: var(--warning);
|
|
}}
|
|
|
|
footer {{
|
|
text-align: center;
|
|
padding: 20px;
|
|
color: var(--text-secondary);
|
|
font-size: 0.85rem;
|
|
}}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<div class="container">
|
|
<header>
|
|
<h1>Email Classification Report</h1>
|
|
<p>Automated analysis of email inbox</p>
|
|
<div class="meta-info">
|
|
<span>Generated: {datetime.now().strftime("%Y-%m-%d %H:%M")}</span>
|
|
<span>Source: {escape(metadata.get("source", "unknown"))}</span>
|
|
<span>Total Emails: {total:,}</span>
|
|
</div>
|
|
</header>
|
|
|
|
<div class="stats-grid">
|
|
<div class="stat-card">
|
|
<div class="value">{total:,}</div>
|
|
<div class="label">Total Emails</div>
|
|
</div>
|
|
<div class="stat-card">
|
|
<div class="value">{len(categories)}</div>
|
|
<div class="label">Categories</div>
|
|
</div>
|
|
<div class="stat-card">
|
|
<div class="value">{high_conf}</div>
|
|
<div class="label">High Confidence (≥70%)</div>
|
|
</div>
|
|
<div class="stat-card">
|
|
<div class="value">{len(sender_domains)}</div>
|
|
<div class="label">Unique Domains</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="section">
|
|
<h2>Category Distribution</h2>
|
|
{"".join(f'''
|
|
<div class="chart-bar">
|
|
<div class="label">{escape(cat)}</div>
|
|
<div class="bar-container">
|
|
<div class="bar" style="width: {categories[cat]/total*100:.1f}%"></div>
|
|
</div>
|
|
<div class="value">{categories[cat]:,} ({categories[cat]/total*100:.1f}%)</div>
|
|
</div>
|
|
''' for cat in sorted_categories)}
|
|
</div>
|
|
|
|
<div class="section">
|
|
<h2>Classification Methods</h2>
|
|
{"".join(f'''
|
|
<div class="chart-bar">
|
|
<div class="label">{escape(method.upper())}</div>
|
|
<div class="bar-container">
|
|
<div class="bar" style="width: {methods[method]/total*100:.1f}%"></div>
|
|
</div>
|
|
<div class="value">{methods[method]:,} ({methods[method]/total*100:.1f}%)</div>
|
|
</div>
|
|
''' for method in sorted(methods.keys()))}
|
|
</div>
|
|
|
|
<div class="section">
|
|
<h2>Confidence Distribution</h2>
|
|
<div class="chart-bar">
|
|
<div class="label">High (≥70%)</div>
|
|
<div class="bar-container">
|
|
<div class="bar" style="width: {high_conf/total*100:.1f}%; background: linear-gradient(90deg, #00d9a5, #00ffcc);"></div>
|
|
</div>
|
|
<div class="value">{high_conf:,} ({high_conf/total*100:.1f}%)</div>
|
|
</div>
|
|
<div class="chart-bar">
|
|
<div class="label">Medium (50-70%)</div>
|
|
<div class="bar-container">
|
|
<div class="bar" style="width: {med_conf/total*100:.1f}%; background: linear-gradient(90deg, #ffc107, #ffdb58);"></div>
|
|
</div>
|
|
<div class="value">{med_conf:,} ({med_conf/total*100:.1f}%)</div>
|
|
</div>
|
|
<div class="chart-bar">
|
|
<div class="label">Low (<50%)</div>
|
|
<div class="bar-container">
|
|
<div class="bar" style="width: {low_conf/total*100:.1f}%; background: linear-gradient(90deg, #e94560, #ff6b6b);"></div>
|
|
</div>
|
|
<div class="value">{low_conf:,} ({low_conf/total*100:.1f}%)</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="section">
|
|
<h2>Top Senders</h2>
|
|
<div class="sender-list">
|
|
{"".join(f'''
|
|
<div class="sender-item">
|
|
<span class="email" title="{escape(sender)}">{escape(truncate(sender, 35))}</span>
|
|
<span class="count">{count}</span>
|
|
</div>
|
|
''' for sender, count in top_senders)}
|
|
</div>
|
|
</div>
|
|
|
|
<div class="section">
|
|
<h2>Emails by Category</h2>
|
|
|
|
<div class="tabs">
|
|
<div class="tab active" onclick="showTab('all')">All<span class="count">{total}</span></div>
|
|
{"".join(f'''<div class="tab" onclick="showTab('{escape(cat)}')">{escape(cat)}<span class="count">{categories[cat]}</span></div>''' for cat in sorted_categories)}
|
|
</div>
|
|
|
|
<input type="text" class="search-box" placeholder="Search by subject, sender..." onkeyup="filterTable(this.value)">
|
|
|
|
<div id="tab-all" class="tab-content active">
|
|
<div class="table-container">
|
|
<table class="email-table" id="email-table-all">
|
|
<thead>
|
|
<tr>
|
|
<th>Date</th>
|
|
<th>Subject</th>
|
|
<th>Sender</th>
|
|
<th>Category</th>
|
|
<th>Confidence</th>
|
|
<th>Method</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
{"".join(generate_email_row(c) for c in sorted(classifications, key=lambda x: x.get("date") or "", reverse=True))}
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</div>
|
|
|
|
{"".join(f'''
|
|
<div id="tab-{escape(cat)}" class="tab-content">
|
|
<div class="table-container">
|
|
<table class="email-table">
|
|
<thead>
|
|
<tr>
|
|
<th>Date</th>
|
|
<th>Subject</th>
|
|
<th>Sender</th>
|
|
<th>Confidence</th>
|
|
<th>Method</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
{"".join(generate_email_row(c, show_category=False) for c in sorted(by_category[cat], key=lambda x: x.get("date") or "", reverse=True))}
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</div>
|
|
''' for cat in sorted_categories)}
|
|
</div>
|
|
|
|
<footer>
|
|
Generated by Email Sorter | {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
|
|
</footer>
|
|
</div>
|
|
|
|
<script>
|
|
function showTab(tabId) {{
|
|
// Hide all tabs
|
|
document.querySelectorAll('.tab-content').forEach(el => el.classList.remove('active'));
|
|
document.querySelectorAll('.tab').forEach(el => el.classList.remove('active'));
|
|
|
|
// Show selected tab
|
|
document.getElementById('tab-' + tabId).classList.add('active');
|
|
event.target.classList.add('active');
|
|
}}
|
|
|
|
function filterTable(query) {{
|
|
query = query.toLowerCase();
|
|
document.querySelectorAll('.tab-content.active tbody tr').forEach(row => {{
|
|
const text = row.textContent.toLowerCase();
|
|
row.style.display = text.includes(query) ? '' : 'none';
|
|
}});
|
|
}}
|
|
</script>
|
|
</body>
|
|
</html>
|
|
'''
|
|
|
|
with open(output_path, "w", encoding="utf-8") as f:
|
|
f.write(html)
|
|
|
|
print(f"Report generated: {output_path}")
|
|
print(f" Total emails: {total:,}")
|
|
print(f" Categories: {len(categories)}")
|
|
print(f" Top category: {sorted_categories[0]} ({categories[sorted_categories[0]]:,})")
|
|
|
|
|
|
def generate_email_row(c: dict, show_category: bool = True) -> str:
|
|
"""Generate HTML table row for an email."""
|
|
conf = c.get("confidence", 0)
|
|
conf_class = "high" if conf >= 0.7 else "medium" if conf >= 0.5 else "low"
|
|
method = c.get("method", "unknown")
|
|
method_class = f"method-{method}"
|
|
|
|
attachment_icon = '<span class="attachment-icon" title="Has attachments">📎</span> ' if c.get("has_attachments") else ""
|
|
|
|
category_col = f'<td>{escape(c.get("category", "unknown"))}</td>' if show_category else ""
|
|
|
|
return f'''
|
|
<tr data-search="{escape(c.get('subject', ''))} {escape(c.get('sender', ''))}">
|
|
<td>{format_date(c.get("date"))}</td>
|
|
<td class="subject">{attachment_icon}{escape(truncate(c.get("subject", "No subject"), 70))}</td>
|
|
<td class="sender" title="{escape(c.get('sender', ''))}">{escape(truncate(c.get("sender_name") or c.get("sender", ""), 35))}</td>
|
|
{category_col}
|
|
<td><span class="confidence {conf_class}">{conf*100:.0f}%</span></td>
|
|
<td><span class="method-badge {method_class}">{method}</span></td>
|
|
</tr>
|
|
'''
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Generate HTML report from classification results")
|
|
parser.add_argument("--input", "-i", required=True, help="Path to results.json")
|
|
parser.add_argument("--output", "-o", default=None, help="Output HTML file path")
|
|
|
|
args = parser.parse_args()
|
|
|
|
input_path = Path(args.input)
|
|
if not input_path.exists():
|
|
print(f"Error: Input file not found: {input_path}")
|
|
return 1
|
|
|
|
output_path = args.output or str(input_path.parent / "report.html")
|
|
|
|
results = load_results(args.input)
|
|
generate_html_report(results, output_path)
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
exit(main())
|