Add LLM-driven cache evolution - selective category persistence
LLM now decides which new categories should be added to persistent cache for future mailbox runs vs temporary (run-only) categories. ENHANCED LLM REVIEW: - New field: "cache_worthy" (true/false) for each "new" category - LLM judges: "Is this category useful across different mailboxes?" - Examples: - "Customer Support" → cache_worthy: true (universal) - "Project X Updates" → cache_worthy: false (mailbox-specific) CACHE EVOLUTION: - cache_worthy=true → Added to persistent cache for future runs - cache_worthy=false → Used for current run only, not cached - First run (empty cache) → All categories treated as cache-worthy - LLM reasoning logged for transparency INTELLIGENT GROWTH: - Cache grows organically with high-quality, reusable categories - Prevents pollution with mailbox-specific categories - Maintains cross-mailbox consistency while allowing natural evolution - LLM balances: consistency (snap existing) vs expansion (add worthy) SINGLE LLM CALL EFFICIENCY: - Same ~4 second LLM call now handles: 1. Snap vs new decision 2. Cache persistence decision 3. Reasoning for both - No additional overhead for cache evolution Result: Cache evolves intelligently over time, collecting universally useful categories while filtering out temporary/specific ones.
This commit is contained in:
parent
eab378409e
commit
fa09d14e52
@ -74,7 +74,7 @@ class CategoryCache:
|
|||||||
allow_new: bool = True,
|
allow_new: bool = True,
|
||||||
max_new: int = 3,
|
max_new: int = 3,
|
||||||
llm_review_threshold: float = 0.5
|
llm_review_threshold: float = 0.5
|
||||||
) -> Tuple[Dict[str, str], Dict[str, str]]:
|
) -> Tuple[Dict[str, str], Dict[str, str], List[Tuple[str, str]]]:
|
||||||
"""
|
"""
|
||||||
Snap discovered categories to cached ones using multi-stage matching.
|
Snap discovered categories to cached ones using multi-stage matching.
|
||||||
|
|
||||||
@ -93,13 +93,15 @@ class CategoryCache:
|
|||||||
llm_review_threshold: Min score to trigger LLM review (default: 0.5)
|
llm_review_threshold: Min score to trigger LLM review (default: 0.5)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
(snapped_categories, mapping) where:
|
(snapped_categories, mapping, cache_worthy) where:
|
||||||
- snapped_categories: Final category set (from cache + new)
|
- snapped_categories: Final category set (from cache + new)
|
||||||
- mapping: {discovered_name: final_name} for all discovered
|
- mapping: {discovered_name: final_name} for all discovered
|
||||||
|
- cache_worthy: List of (name, desc) for categories to add to persistent cache
|
||||||
"""
|
"""
|
||||||
if not self.cache:
|
if not self.cache:
|
||||||
logger.info("Empty cache, using all discovered categories")
|
logger.info("Empty cache, using all discovered categories")
|
||||||
return discovered, {name: name for name in discovered}
|
# First run - all categories can be cache-worthy
|
||||||
|
return discovered, {name: name for name in discovered}, list(discovered.items())
|
||||||
|
|
||||||
snapped = {}
|
snapped = {}
|
||||||
mapping = {}
|
mapping = {}
|
||||||
@ -139,6 +141,8 @@ class CategoryCache:
|
|||||||
logger.warning(f"Force review: '{disc_name}' (max_new exceeded, score: {best_score:.2f})")
|
logger.warning(f"Force review: '{disc_name}' (max_new exceeded, score: {best_score:.2f})")
|
||||||
|
|
||||||
# LLM Review for ambiguous cases
|
# LLM Review for ambiguous cases
|
||||||
|
cache_worthy_additions = [] # Track categories to add to persistent cache
|
||||||
|
|
||||||
if ambiguous_cases and self.llm_provider:
|
if ambiguous_cases and self.llm_provider:
|
||||||
logger.info(f"Requesting LLM review for {len(ambiguous_cases)} ambiguous cases...")
|
logger.info(f"Requesting LLM review for {len(ambiguous_cases)} ambiguous cases...")
|
||||||
llm_decisions = self._llm_review_ambiguous(ambiguous_cases, cached_cats, allow_new, len(new_categories), max_new)
|
llm_decisions = self._llm_review_ambiguous(ambiguous_cases, cached_cats, allow_new, len(new_categories), max_new)
|
||||||
@ -149,14 +153,25 @@ class CategoryCache:
|
|||||||
mapping[disc_name] = target
|
mapping[disc_name] = target
|
||||||
if target not in snapped:
|
if target not in snapped:
|
||||||
snapped[target] = cached_cats[target]
|
snapped[target] = cached_cats[target]
|
||||||
logger.info(f"LLM snap: '{disc_name}' → '{target}'")
|
reasoning = decision.get('reasoning', 'similar to existing')
|
||||||
|
logger.info(f"LLM snap: '{disc_name}' → '{target}' ({reasoning})")
|
||||||
|
|
||||||
elif decision['action'] == 'new':
|
elif decision['action'] == 'new':
|
||||||
# Find original description
|
# Find original description
|
||||||
disc_desc = next(desc for name, desc, _, _ in ambiguous_cases if name == disc_name)
|
disc_desc = next(desc for name, desc, _, _ in ambiguous_cases if name == disc_name)
|
||||||
new_categories.append((disc_name, disc_desc))
|
new_categories.append((disc_name, disc_desc))
|
||||||
mapping[disc_name] = disc_name
|
mapping[disc_name] = disc_name
|
||||||
snapped[disc_name] = disc_desc
|
snapped[disc_name] = disc_desc
|
||||||
logger.info(f"LLM approved new: '{disc_name}'")
|
|
||||||
|
# Check if LLM recommends adding to cache
|
||||||
|
is_cache_worthy = decision.get('cache_worthy', False)
|
||||||
|
reasoning = decision.get('reasoning', 'new category')
|
||||||
|
|
||||||
|
if is_cache_worthy:
|
||||||
|
cache_worthy_additions.append((disc_name, disc_desc))
|
||||||
|
logger.info(f"LLM approved new + CACHE: '{disc_name}' ({reasoning})")
|
||||||
|
else:
|
||||||
|
logger.info(f"LLM approved new (run-only): '{disc_name}' ({reasoning})")
|
||||||
|
|
||||||
elif ambiguous_cases:
|
elif ambiguous_cases:
|
||||||
# No LLM available → use heuristic fallback
|
# No LLM available → use heuristic fallback
|
||||||
@ -175,8 +190,8 @@ class CategoryCache:
|
|||||||
if best_match not in snapped:
|
if best_match not in snapped:
|
||||||
snapped[best_match] = cached_cats[best_match]
|
snapped[best_match] = cached_cats[best_match]
|
||||||
|
|
||||||
logger.info(f"Snapping result: {len(snapped)} final categories ({len(new_categories)} new)")
|
logger.info(f"Snapping result: {len(snapped)} final categories ({len(new_categories)} new, {len(cache_worthy_additions)} cache-worthy)")
|
||||||
return snapped, mapping
|
return snapped, mapping, cache_worthy_additions
|
||||||
|
|
||||||
def _find_best_match(
|
def _find_best_match(
|
||||||
self,
|
self,
|
||||||
@ -310,23 +325,34 @@ CONTEXT:
|
|||||||
|
|
||||||
TASK:
|
TASK:
|
||||||
For each ambiguous case, decide:
|
For each ambiguous case, decide:
|
||||||
1. "snap" - If semantically similar enough to cached category (even if not perfect match)
|
1. "snap" - If semantically similar enough to cached category
|
||||||
2. "new" - If genuinely distinct and worth adding (only if slots available)
|
2. "new" - If genuinely distinct and worth adding for THIS RUN
|
||||||
|
|
||||||
|
ADDITIONALLY:
|
||||||
|
For "new" categories, decide if they should be added to the CACHE for future mailboxes:
|
||||||
|
- "cache_worthy": true - High-quality, reusable category (e.g., "Customer Support", "Sales")
|
||||||
|
- "cache_worthy": false - Mailbox-specific, not broadly useful (e.g., "Project X Updates")
|
||||||
|
|
||||||
GUIDELINES:
|
GUIDELINES:
|
||||||
- PREFER snapping to maintain consistency across mailboxes
|
- PREFER snapping to maintain consistency across mailboxes
|
||||||
- Only approve "new" if category serves a clearly distinct purpose
|
- Only approve "new" if category serves a clearly distinct purpose
|
||||||
- Consider: Will users benefit from separating this vs merging with existing?
|
- Be VERY selective with cache_worthy - only approve universally useful categories
|
||||||
- Be conservative with "new" - consolidation is better than fragmentation
|
- Consider: Would this category be useful across different users' mailboxes?
|
||||||
|
- Mailbox-specific categories can be "new" without being cache_worthy
|
||||||
|
|
||||||
Return JSON:
|
Return JSON:
|
||||||
{{
|
{{
|
||||||
"CategoryName": {{"action": "snap"|"new", "target": "CachedCategoryName"}},
|
"CategoryName": {{
|
||||||
|
"action": "snap"|"new",
|
||||||
|
"target": "CachedCategoryName",
|
||||||
|
"cache_worthy": true|false,
|
||||||
|
"reasoning": "brief explanation"
|
||||||
|
}},
|
||||||
...
|
...
|
||||||
}}
|
}}
|
||||||
|
|
||||||
For "snap": target = cached category to snap to
|
For "snap": target = cached category to snap to, cache_worthy = false (not applicable)
|
||||||
For "new": target = same as CategoryName (keeps original)
|
For "new": target = same as CategoryName, cache_worthy = true/false based on reusability
|
||||||
|
|
||||||
JSON:
|
JSON:
|
||||||
"""
|
"""
|
||||||
|
|||||||
@ -105,7 +105,7 @@ class CalibrationAnalyzer:
|
|||||||
max_new = self.config.get('cache_max_new', 3)
|
max_new = self.config.get('cache_max_new', 3)
|
||||||
|
|
||||||
logger.info(f"Snapping to cached categories (threshold={similarity_threshold}, allow_new={allow_new}, max_new={max_new})")
|
logger.info(f"Snapping to cached categories (threshold={similarity_threshold}, allow_new={allow_new}, max_new={max_new})")
|
||||||
final_categories, snap_mapping = self.category_cache.snap_to_cache(
|
final_categories, snap_mapping, cache_worthy = self.category_cache.snap_to_cache(
|
||||||
discovered_categories,
|
discovered_categories,
|
||||||
similarity_threshold=similarity_threshold,
|
similarity_threshold=similarity_threshold,
|
||||||
allow_new=allow_new,
|
allow_new=allow_new,
|
||||||
@ -120,11 +120,19 @@ class CalibrationAnalyzer:
|
|||||||
logger.info(f"After cache snap: {len(final_categories)} categories")
|
logger.info(f"After cache snap: {len(final_categories)} categories")
|
||||||
discovered_categories = final_categories
|
discovered_categories = final_categories
|
||||||
|
|
||||||
# Update cache with usage counts
|
# Update cache with usage counts AND add cache-worthy new categories
|
||||||
category_counts = {}
|
category_counts = {}
|
||||||
for _, cat in email_labels:
|
for _, cat in email_labels:
|
||||||
category_counts[cat] = category_counts.get(cat, 0) + 1
|
category_counts[cat] = category_counts.get(cat, 0) + 1
|
||||||
self.category_cache.update_cache(discovered_categories, category_counts)
|
|
||||||
|
# Add cache-worthy categories to persistent cache
|
||||||
|
if cache_worthy:
|
||||||
|
cache_additions = {name: desc for name, desc in cache_worthy}
|
||||||
|
logger.info(f"Adding {len(cache_worthy)} LLM-approved categories to persistent cache: {list(cache_additions.keys())}")
|
||||||
|
self.category_cache.update_cache(cache_additions, category_counts)
|
||||||
|
else:
|
||||||
|
# Just update usage counts for existing categories
|
||||||
|
self.category_cache.update_cache(discovered_categories, category_counts)
|
||||||
|
|
||||||
return discovered_categories, email_labels
|
return discovered_categories, email_labels
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user