diff --git a/src/calibration/category_cache.py b/src/calibration/category_cache.py index c738e8c..241068d 100644 --- a/src/calibration/category_cache.py +++ b/src/calibration/category_cache.py @@ -74,7 +74,7 @@ class CategoryCache: allow_new: bool = True, max_new: int = 3, llm_review_threshold: float = 0.5 - ) -> Tuple[Dict[str, str], Dict[str, str]]: + ) -> Tuple[Dict[str, str], Dict[str, str], List[Tuple[str, str]]]: """ Snap discovered categories to cached ones using multi-stage matching. @@ -93,13 +93,15 @@ class CategoryCache: llm_review_threshold: Min score to trigger LLM review (default: 0.5) Returns: - (snapped_categories, mapping) where: + (snapped_categories, mapping, cache_worthy) where: - snapped_categories: Final category set (from cache + new) - mapping: {discovered_name: final_name} for all discovered + - cache_worthy: List of (name, desc) for categories to add to persistent cache """ if not self.cache: logger.info("Empty cache, using all discovered categories") - return discovered, {name: name for name in discovered} + # First run - all categories can be cache-worthy + return discovered, {name: name for name in discovered}, list(discovered.items()) snapped = {} mapping = {} @@ -139,6 +141,8 @@ class CategoryCache: logger.warning(f"Force review: '{disc_name}' (max_new exceeded, score: {best_score:.2f})") # LLM Review for ambiguous cases + cache_worthy_additions = [] # Track categories to add to persistent cache + if ambiguous_cases and self.llm_provider: logger.info(f"Requesting LLM review for {len(ambiguous_cases)} ambiguous cases...") llm_decisions = self._llm_review_ambiguous(ambiguous_cases, cached_cats, allow_new, len(new_categories), max_new) @@ -149,14 +153,25 @@ class CategoryCache: mapping[disc_name] = target if target not in snapped: snapped[target] = cached_cats[target] - logger.info(f"LLM snap: '{disc_name}' → '{target}'") + reasoning = decision.get('reasoning', 'similar to existing') + logger.info(f"LLM snap: '{disc_name}' → '{target}' ({reasoning})") + elif decision['action'] == 'new': # Find original description disc_desc = next(desc for name, desc, _, _ in ambiguous_cases if name == disc_name) new_categories.append((disc_name, disc_desc)) mapping[disc_name] = disc_name snapped[disc_name] = disc_desc - logger.info(f"LLM approved new: '{disc_name}'") + + # Check if LLM recommends adding to cache + is_cache_worthy = decision.get('cache_worthy', False) + reasoning = decision.get('reasoning', 'new category') + + if is_cache_worthy: + cache_worthy_additions.append((disc_name, disc_desc)) + logger.info(f"LLM approved new + CACHE: '{disc_name}' ({reasoning})") + else: + logger.info(f"LLM approved new (run-only): '{disc_name}' ({reasoning})") elif ambiguous_cases: # No LLM available → use heuristic fallback @@ -175,8 +190,8 @@ class CategoryCache: if best_match not in snapped: snapped[best_match] = cached_cats[best_match] - logger.info(f"Snapping result: {len(snapped)} final categories ({len(new_categories)} new)") - return snapped, mapping + logger.info(f"Snapping result: {len(snapped)} final categories ({len(new_categories)} new, {len(cache_worthy_additions)} cache-worthy)") + return snapped, mapping, cache_worthy_additions def _find_best_match( self, @@ -310,23 +325,34 @@ CONTEXT: TASK: For each ambiguous case, decide: -1. "snap" - If semantically similar enough to cached category (even if not perfect match) -2. "new" - If genuinely distinct and worth adding (only if slots available) +1. "snap" - If semantically similar enough to cached category +2. "new" - If genuinely distinct and worth adding for THIS RUN + +ADDITIONALLY: +For "new" categories, decide if they should be added to the CACHE for future mailboxes: +- "cache_worthy": true - High-quality, reusable category (e.g., "Customer Support", "Sales") +- "cache_worthy": false - Mailbox-specific, not broadly useful (e.g., "Project X Updates") GUIDELINES: - PREFER snapping to maintain consistency across mailboxes - Only approve "new" if category serves a clearly distinct purpose -- Consider: Will users benefit from separating this vs merging with existing? -- Be conservative with "new" - consolidation is better than fragmentation +- Be VERY selective with cache_worthy - only approve universally useful categories +- Consider: Would this category be useful across different users' mailboxes? +- Mailbox-specific categories can be "new" without being cache_worthy Return JSON: {{ - "CategoryName": {{"action": "snap"|"new", "target": "CachedCategoryName"}}, + "CategoryName": {{ + "action": "snap"|"new", + "target": "CachedCategoryName", + "cache_worthy": true|false, + "reasoning": "brief explanation" + }}, ... }} -For "snap": target = cached category to snap to -For "new": target = same as CategoryName (keeps original) +For "snap": target = cached category to snap to, cache_worthy = false (not applicable) +For "new": target = same as CategoryName, cache_worthy = true/false based on reusability JSON: """ diff --git a/src/calibration/llm_analyzer.py b/src/calibration/llm_analyzer.py index e7bcd38..dc03ff4 100644 --- a/src/calibration/llm_analyzer.py +++ b/src/calibration/llm_analyzer.py @@ -105,7 +105,7 @@ class CalibrationAnalyzer: max_new = self.config.get('cache_max_new', 3) logger.info(f"Snapping to cached categories (threshold={similarity_threshold}, allow_new={allow_new}, max_new={max_new})") - final_categories, snap_mapping = self.category_cache.snap_to_cache( + final_categories, snap_mapping, cache_worthy = self.category_cache.snap_to_cache( discovered_categories, similarity_threshold=similarity_threshold, allow_new=allow_new, @@ -120,11 +120,19 @@ class CalibrationAnalyzer: logger.info(f"After cache snap: {len(final_categories)} categories") discovered_categories = final_categories - # Update cache with usage counts + # Update cache with usage counts AND add cache-worthy new categories category_counts = {} for _, cat in email_labels: category_counts[cat] = category_counts.get(cat, 0) + 1 - self.category_cache.update_cache(discovered_categories, category_counts) + + # Add cache-worthy categories to persistent cache + if cache_worthy: + cache_additions = {name: desc for name, desc in cache_worthy} + logger.info(f"Adding {len(cache_worthy)} LLM-approved categories to persistent cache: {list(cache_additions.keys())}") + self.category_cache.update_cache(cache_additions, category_counts) + else: + # Just update usage counts for existing categories + self.category_cache.update_cache(discovered_categories, category_counts) return discovered_categories, email_labels