Add LLM-driven cache evolution - selective category persistence

LLM now decides which new categories should be added to persistent cache
for future mailbox runs vs temporary (run-only) categories.

ENHANCED LLM REVIEW:
- New field: "cache_worthy" (true/false) for each "new" category
- LLM judges: "Is this category useful across different mailboxes?"
- Examples:
  - "Customer Support" → cache_worthy: true (universal)
  - "Project X Updates" → cache_worthy: false (mailbox-specific)

CACHE EVOLUTION:
- cache_worthy=true → Added to persistent cache for future runs
- cache_worthy=false → Used for current run only, not cached
- First run (empty cache) → All categories treated as cache-worthy
- LLM reasoning logged for transparency

INTELLIGENT GROWTH:
- Cache grows organically with high-quality, reusable categories
- Prevents pollution with mailbox-specific categories
- Maintains cross-mailbox consistency while allowing natural evolution
- LLM balances: consistency (snap existing) vs expansion (add worthy)

SINGLE LLM CALL EFFICIENCY:
- Same ~4 second LLM call now handles:
  1. Snap vs new decision
  2. Cache persistence decision
  3. Reasoning for both
- No additional overhead for cache evolution

Result: Cache evolves intelligently over time, collecting universally
useful categories while filtering out temporary/specific ones.
This commit is contained in:
FSSCoding 2025-10-23 15:36:51 +11:00
parent eab378409e
commit fa09d14e52
2 changed files with 51 additions and 17 deletions

View File

@ -74,7 +74,7 @@ class CategoryCache:
allow_new: bool = True, allow_new: bool = True,
max_new: int = 3, max_new: int = 3,
llm_review_threshold: float = 0.5 llm_review_threshold: float = 0.5
) -> Tuple[Dict[str, str], Dict[str, str]]: ) -> Tuple[Dict[str, str], Dict[str, str], List[Tuple[str, str]]]:
""" """
Snap discovered categories to cached ones using multi-stage matching. Snap discovered categories to cached ones using multi-stage matching.
@ -93,13 +93,15 @@ class CategoryCache:
llm_review_threshold: Min score to trigger LLM review (default: 0.5) llm_review_threshold: Min score to trigger LLM review (default: 0.5)
Returns: Returns:
(snapped_categories, mapping) where: (snapped_categories, mapping, cache_worthy) where:
- snapped_categories: Final category set (from cache + new) - snapped_categories: Final category set (from cache + new)
- mapping: {discovered_name: final_name} for all discovered - mapping: {discovered_name: final_name} for all discovered
- cache_worthy: List of (name, desc) for categories to add to persistent cache
""" """
if not self.cache: if not self.cache:
logger.info("Empty cache, using all discovered categories") logger.info("Empty cache, using all discovered categories")
return discovered, {name: name for name in discovered} # First run - all categories can be cache-worthy
return discovered, {name: name for name in discovered}, list(discovered.items())
snapped = {} snapped = {}
mapping = {} mapping = {}
@ -139,6 +141,8 @@ class CategoryCache:
logger.warning(f"Force review: '{disc_name}' (max_new exceeded, score: {best_score:.2f})") logger.warning(f"Force review: '{disc_name}' (max_new exceeded, score: {best_score:.2f})")
# LLM Review for ambiguous cases # LLM Review for ambiguous cases
cache_worthy_additions = [] # Track categories to add to persistent cache
if ambiguous_cases and self.llm_provider: if ambiguous_cases and self.llm_provider:
logger.info(f"Requesting LLM review for {len(ambiguous_cases)} ambiguous cases...") logger.info(f"Requesting LLM review for {len(ambiguous_cases)} ambiguous cases...")
llm_decisions = self._llm_review_ambiguous(ambiguous_cases, cached_cats, allow_new, len(new_categories), max_new) llm_decisions = self._llm_review_ambiguous(ambiguous_cases, cached_cats, allow_new, len(new_categories), max_new)
@ -149,14 +153,25 @@ class CategoryCache:
mapping[disc_name] = target mapping[disc_name] = target
if target not in snapped: if target not in snapped:
snapped[target] = cached_cats[target] snapped[target] = cached_cats[target]
logger.info(f"LLM snap: '{disc_name}''{target}'") reasoning = decision.get('reasoning', 'similar to existing')
logger.info(f"LLM snap: '{disc_name}''{target}' ({reasoning})")
elif decision['action'] == 'new': elif decision['action'] == 'new':
# Find original description # Find original description
disc_desc = next(desc for name, desc, _, _ in ambiguous_cases if name == disc_name) disc_desc = next(desc for name, desc, _, _ in ambiguous_cases if name == disc_name)
new_categories.append((disc_name, disc_desc)) new_categories.append((disc_name, disc_desc))
mapping[disc_name] = disc_name mapping[disc_name] = disc_name
snapped[disc_name] = disc_desc snapped[disc_name] = disc_desc
logger.info(f"LLM approved new: '{disc_name}'")
# Check if LLM recommends adding to cache
is_cache_worthy = decision.get('cache_worthy', False)
reasoning = decision.get('reasoning', 'new category')
if is_cache_worthy:
cache_worthy_additions.append((disc_name, disc_desc))
logger.info(f"LLM approved new + CACHE: '{disc_name}' ({reasoning})")
else:
logger.info(f"LLM approved new (run-only): '{disc_name}' ({reasoning})")
elif ambiguous_cases: elif ambiguous_cases:
# No LLM available → use heuristic fallback # No LLM available → use heuristic fallback
@ -175,8 +190,8 @@ class CategoryCache:
if best_match not in snapped: if best_match not in snapped:
snapped[best_match] = cached_cats[best_match] snapped[best_match] = cached_cats[best_match]
logger.info(f"Snapping result: {len(snapped)} final categories ({len(new_categories)} new)") logger.info(f"Snapping result: {len(snapped)} final categories ({len(new_categories)} new, {len(cache_worthy_additions)} cache-worthy)")
return snapped, mapping return snapped, mapping, cache_worthy_additions
def _find_best_match( def _find_best_match(
self, self,
@ -310,23 +325,34 @@ CONTEXT:
TASK: TASK:
For each ambiguous case, decide: For each ambiguous case, decide:
1. "snap" - If semantically similar enough to cached category (even if not perfect match) 1. "snap" - If semantically similar enough to cached category
2. "new" - If genuinely distinct and worth adding (only if slots available) 2. "new" - If genuinely distinct and worth adding for THIS RUN
ADDITIONALLY:
For "new" categories, decide if they should be added to the CACHE for future mailboxes:
- "cache_worthy": true - High-quality, reusable category (e.g., "Customer Support", "Sales")
- "cache_worthy": false - Mailbox-specific, not broadly useful (e.g., "Project X Updates")
GUIDELINES: GUIDELINES:
- PREFER snapping to maintain consistency across mailboxes - PREFER snapping to maintain consistency across mailboxes
- Only approve "new" if category serves a clearly distinct purpose - Only approve "new" if category serves a clearly distinct purpose
- Consider: Will users benefit from separating this vs merging with existing? - Be VERY selective with cache_worthy - only approve universally useful categories
- Be conservative with "new" - consolidation is better than fragmentation - Consider: Would this category be useful across different users' mailboxes?
- Mailbox-specific categories can be "new" without being cache_worthy
Return JSON: Return JSON:
{{ {{
"CategoryName": {{"action": "snap"|"new", "target": "CachedCategoryName"}}, "CategoryName": {{
"action": "snap"|"new",
"target": "CachedCategoryName",
"cache_worthy": true|false,
"reasoning": "brief explanation"
}},
... ...
}} }}
For "snap": target = cached category to snap to For "snap": target = cached category to snap to, cache_worthy = false (not applicable)
For "new": target = same as CategoryName (keeps original) For "new": target = same as CategoryName, cache_worthy = true/false based on reusability
JSON: JSON:
""" """

View File

@ -105,7 +105,7 @@ class CalibrationAnalyzer:
max_new = self.config.get('cache_max_new', 3) max_new = self.config.get('cache_max_new', 3)
logger.info(f"Snapping to cached categories (threshold={similarity_threshold}, allow_new={allow_new}, max_new={max_new})") logger.info(f"Snapping to cached categories (threshold={similarity_threshold}, allow_new={allow_new}, max_new={max_new})")
final_categories, snap_mapping = self.category_cache.snap_to_cache( final_categories, snap_mapping, cache_worthy = self.category_cache.snap_to_cache(
discovered_categories, discovered_categories,
similarity_threshold=similarity_threshold, similarity_threshold=similarity_threshold,
allow_new=allow_new, allow_new=allow_new,
@ -120,11 +120,19 @@ class CalibrationAnalyzer:
logger.info(f"After cache snap: {len(final_categories)} categories") logger.info(f"After cache snap: {len(final_categories)} categories")
discovered_categories = final_categories discovered_categories = final_categories
# Update cache with usage counts # Update cache with usage counts AND add cache-worthy new categories
category_counts = {} category_counts = {}
for _, cat in email_labels: for _, cat in email_labels:
category_counts[cat] = category_counts.get(cat, 0) + 1 category_counts[cat] = category_counts.get(cat, 0) + 1
self.category_cache.update_cache(discovered_categories, category_counts)
# Add cache-worthy categories to persistent cache
if cache_worthy:
cache_additions = {name: desc for name, desc in cache_worthy}
logger.info(f"Adding {len(cache_worthy)} LLM-approved categories to persistent cache: {list(cache_additions.keys())}")
self.category_cache.update_cache(cache_additions, category_counts)
else:
# Just update usage counts for existing categories
self.category_cache.update_cache(discovered_categories, category_counts)
return discovered_categories, email_labels return discovered_categories, email_labels