Add LLM-driven cache evolution - selective category persistence
LLM now decides which new categories should be added to persistent cache for future mailbox runs vs temporary (run-only) categories. ENHANCED LLM REVIEW: - New field: "cache_worthy" (true/false) for each "new" category - LLM judges: "Is this category useful across different mailboxes?" - Examples: - "Customer Support" → cache_worthy: true (universal) - "Project X Updates" → cache_worthy: false (mailbox-specific) CACHE EVOLUTION: - cache_worthy=true → Added to persistent cache for future runs - cache_worthy=false → Used for current run only, not cached - First run (empty cache) → All categories treated as cache-worthy - LLM reasoning logged for transparency INTELLIGENT GROWTH: - Cache grows organically with high-quality, reusable categories - Prevents pollution with mailbox-specific categories - Maintains cross-mailbox consistency while allowing natural evolution - LLM balances: consistency (snap existing) vs expansion (add worthy) SINGLE LLM CALL EFFICIENCY: - Same ~4 second LLM call now handles: 1. Snap vs new decision 2. Cache persistence decision 3. Reasoning for both - No additional overhead for cache evolution Result: Cache evolves intelligently over time, collecting universally useful categories while filtering out temporary/specific ones.
This commit is contained in:
parent
eab378409e
commit
fa09d14e52
@ -74,7 +74,7 @@ class CategoryCache:
|
||||
allow_new: bool = True,
|
||||
max_new: int = 3,
|
||||
llm_review_threshold: float = 0.5
|
||||
) -> Tuple[Dict[str, str], Dict[str, str]]:
|
||||
) -> Tuple[Dict[str, str], Dict[str, str], List[Tuple[str, str]]]:
|
||||
"""
|
||||
Snap discovered categories to cached ones using multi-stage matching.
|
||||
|
||||
@ -93,13 +93,15 @@ class CategoryCache:
|
||||
llm_review_threshold: Min score to trigger LLM review (default: 0.5)
|
||||
|
||||
Returns:
|
||||
(snapped_categories, mapping) where:
|
||||
(snapped_categories, mapping, cache_worthy) where:
|
||||
- snapped_categories: Final category set (from cache + new)
|
||||
- mapping: {discovered_name: final_name} for all discovered
|
||||
- cache_worthy: List of (name, desc) for categories to add to persistent cache
|
||||
"""
|
||||
if not self.cache:
|
||||
logger.info("Empty cache, using all discovered categories")
|
||||
return discovered, {name: name for name in discovered}
|
||||
# First run - all categories can be cache-worthy
|
||||
return discovered, {name: name for name in discovered}, list(discovered.items())
|
||||
|
||||
snapped = {}
|
||||
mapping = {}
|
||||
@ -139,6 +141,8 @@ class CategoryCache:
|
||||
logger.warning(f"Force review: '{disc_name}' (max_new exceeded, score: {best_score:.2f})")
|
||||
|
||||
# LLM Review for ambiguous cases
|
||||
cache_worthy_additions = [] # Track categories to add to persistent cache
|
||||
|
||||
if ambiguous_cases and self.llm_provider:
|
||||
logger.info(f"Requesting LLM review for {len(ambiguous_cases)} ambiguous cases...")
|
||||
llm_decisions = self._llm_review_ambiguous(ambiguous_cases, cached_cats, allow_new, len(new_categories), max_new)
|
||||
@ -149,14 +153,25 @@ class CategoryCache:
|
||||
mapping[disc_name] = target
|
||||
if target not in snapped:
|
||||
snapped[target] = cached_cats[target]
|
||||
logger.info(f"LLM snap: '{disc_name}' → '{target}'")
|
||||
reasoning = decision.get('reasoning', 'similar to existing')
|
||||
logger.info(f"LLM snap: '{disc_name}' → '{target}' ({reasoning})")
|
||||
|
||||
elif decision['action'] == 'new':
|
||||
# Find original description
|
||||
disc_desc = next(desc for name, desc, _, _ in ambiguous_cases if name == disc_name)
|
||||
new_categories.append((disc_name, disc_desc))
|
||||
mapping[disc_name] = disc_name
|
||||
snapped[disc_name] = disc_desc
|
||||
logger.info(f"LLM approved new: '{disc_name}'")
|
||||
|
||||
# Check if LLM recommends adding to cache
|
||||
is_cache_worthy = decision.get('cache_worthy', False)
|
||||
reasoning = decision.get('reasoning', 'new category')
|
||||
|
||||
if is_cache_worthy:
|
||||
cache_worthy_additions.append((disc_name, disc_desc))
|
||||
logger.info(f"LLM approved new + CACHE: '{disc_name}' ({reasoning})")
|
||||
else:
|
||||
logger.info(f"LLM approved new (run-only): '{disc_name}' ({reasoning})")
|
||||
|
||||
elif ambiguous_cases:
|
||||
# No LLM available → use heuristic fallback
|
||||
@ -175,8 +190,8 @@ class CategoryCache:
|
||||
if best_match not in snapped:
|
||||
snapped[best_match] = cached_cats[best_match]
|
||||
|
||||
logger.info(f"Snapping result: {len(snapped)} final categories ({len(new_categories)} new)")
|
||||
return snapped, mapping
|
||||
logger.info(f"Snapping result: {len(snapped)} final categories ({len(new_categories)} new, {len(cache_worthy_additions)} cache-worthy)")
|
||||
return snapped, mapping, cache_worthy_additions
|
||||
|
||||
def _find_best_match(
|
||||
self,
|
||||
@ -310,23 +325,34 @@ CONTEXT:
|
||||
|
||||
TASK:
|
||||
For each ambiguous case, decide:
|
||||
1. "snap" - If semantically similar enough to cached category (even if not perfect match)
|
||||
2. "new" - If genuinely distinct and worth adding (only if slots available)
|
||||
1. "snap" - If semantically similar enough to cached category
|
||||
2. "new" - If genuinely distinct and worth adding for THIS RUN
|
||||
|
||||
ADDITIONALLY:
|
||||
For "new" categories, decide if they should be added to the CACHE for future mailboxes:
|
||||
- "cache_worthy": true - High-quality, reusable category (e.g., "Customer Support", "Sales")
|
||||
- "cache_worthy": false - Mailbox-specific, not broadly useful (e.g., "Project X Updates")
|
||||
|
||||
GUIDELINES:
|
||||
- PREFER snapping to maintain consistency across mailboxes
|
||||
- Only approve "new" if category serves a clearly distinct purpose
|
||||
- Consider: Will users benefit from separating this vs merging with existing?
|
||||
- Be conservative with "new" - consolidation is better than fragmentation
|
||||
- Be VERY selective with cache_worthy - only approve universally useful categories
|
||||
- Consider: Would this category be useful across different users' mailboxes?
|
||||
- Mailbox-specific categories can be "new" without being cache_worthy
|
||||
|
||||
Return JSON:
|
||||
{{
|
||||
"CategoryName": {{"action": "snap"|"new", "target": "CachedCategoryName"}},
|
||||
"CategoryName": {{
|
||||
"action": "snap"|"new",
|
||||
"target": "CachedCategoryName",
|
||||
"cache_worthy": true|false,
|
||||
"reasoning": "brief explanation"
|
||||
}},
|
||||
...
|
||||
}}
|
||||
|
||||
For "snap": target = cached category to snap to
|
||||
For "new": target = same as CategoryName (keeps original)
|
||||
For "snap": target = cached category to snap to, cache_worthy = false (not applicable)
|
||||
For "new": target = same as CategoryName, cache_worthy = true/false based on reusability
|
||||
|
||||
JSON:
|
||||
"""
|
||||
|
||||
@ -105,7 +105,7 @@ class CalibrationAnalyzer:
|
||||
max_new = self.config.get('cache_max_new', 3)
|
||||
|
||||
logger.info(f"Snapping to cached categories (threshold={similarity_threshold}, allow_new={allow_new}, max_new={max_new})")
|
||||
final_categories, snap_mapping = self.category_cache.snap_to_cache(
|
||||
final_categories, snap_mapping, cache_worthy = self.category_cache.snap_to_cache(
|
||||
discovered_categories,
|
||||
similarity_threshold=similarity_threshold,
|
||||
allow_new=allow_new,
|
||||
@ -120,11 +120,19 @@ class CalibrationAnalyzer:
|
||||
logger.info(f"After cache snap: {len(final_categories)} categories")
|
||||
discovered_categories = final_categories
|
||||
|
||||
# Update cache with usage counts
|
||||
# Update cache with usage counts AND add cache-worthy new categories
|
||||
category_counts = {}
|
||||
for _, cat in email_labels:
|
||||
category_counts[cat] = category_counts.get(cat, 0) + 1
|
||||
self.category_cache.update_cache(discovered_categories, category_counts)
|
||||
|
||||
# Add cache-worthy categories to persistent cache
|
||||
if cache_worthy:
|
||||
cache_additions = {name: desc for name, desc in cache_worthy}
|
||||
logger.info(f"Adding {len(cache_worthy)} LLM-approved categories to persistent cache: {list(cache_additions.keys())}")
|
||||
self.category_cache.update_cache(cache_additions, category_counts)
|
||||
else:
|
||||
# Just update usage counts for existing categories
|
||||
self.category_cache.update_cache(discovered_categories, category_counts)
|
||||
|
||||
return discovered_categories, email_labels
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user