diff --git a/src/calibration/category_cache.py b/src/calibration/category_cache.py
index c738e8c..241068d 100644
--- a/src/calibration/category_cache.py
+++ b/src/calibration/category_cache.py
@@ -74,7 +74,7 @@ class CategoryCache:
         allow_new: bool = True,
         max_new: int = 3,
         llm_review_threshold: float = 0.5
-    ) -> Tuple[Dict[str, str], Dict[str, str]]:
+    ) -> Tuple[Dict[str, str], Dict[str, str], List[Tuple[str, str]]]:
         """
         Snap discovered categories to cached ones using multi-stage matching.
 
@@ -93,13 +93,15 @@ class CategoryCache:
             llm_review_threshold: Min score to trigger LLM review (default: 0.5)
 
         Returns:
-            (snapped_categories, mapping) where:
+            (snapped_categories, mapping, cache_worthy) where:
             - snapped_categories: Final category set (from cache + new)
             - mapping: {discovered_name: final_name} for all discovered
+            - cache_worthy: List of (name, desc) for categories to add to persistent cache
         """
         if not self.cache:
             logger.info("Empty cache, using all discovered categories")
-            return discovered, {name: name for name in discovered}
+            # First run - all categories can be cache-worthy
+            return discovered, {name: name for name in discovered}, list(discovered.items())
 
         snapped = {}
         mapping = {}
@@ -139,6 +141,8 @@ class CategoryCache:
                     logger.warning(f"Force review: '{disc_name}' (max_new exceeded, score: {best_score:.2f})")
 
         # LLM Review for ambiguous cases
+        cache_worthy_additions = []  # Track categories to add to persistent cache
+
         if ambiguous_cases and self.llm_provider:
             logger.info(f"Requesting LLM review for {len(ambiguous_cases)} ambiguous cases...")
             llm_decisions = self._llm_review_ambiguous(ambiguous_cases, cached_cats, allow_new, len(new_categories), max_new)
@@ -149,14 +153,25 @@ class CategoryCache:
                     mapping[disc_name] = target
                     if target not in snapped:
                         snapped[target] = cached_cats[target]
-                    logger.info(f"LLM snap: '{disc_name}' → '{target}'")
+                    reasoning = decision.get('reasoning', 'similar to existing')
+                    logger.info(f"LLM snap: '{disc_name}' → '{target}' ({reasoning})")
+
                 elif decision['action'] == 'new':
                     # Find original description
                     disc_desc = next(desc for name, desc, _, _ in ambiguous_cases if name == disc_name)
                     new_categories.append((disc_name, disc_desc))
                     mapping[disc_name] = disc_name
                     snapped[disc_name] = disc_desc
-                    logger.info(f"LLM approved new: '{disc_name}'")
+
+                    # Check if LLM recommends adding to cache
+                    is_cache_worthy = decision.get('cache_worthy', False)
+                    reasoning = decision.get('reasoning', 'new category')
+
+                    if is_cache_worthy:
+                        cache_worthy_additions.append((disc_name, disc_desc))
+                        logger.info(f"LLM approved new + CACHE: '{disc_name}' ({reasoning})")
+                    else:
+                        logger.info(f"LLM approved new (run-only): '{disc_name}' ({reasoning})")
 
         elif ambiguous_cases:
             # No LLM available → use heuristic fallback
@@ -175,8 +190,8 @@ class CategoryCache:
                     if best_match not in snapped:
                         snapped[best_match] = cached_cats[best_match]
 
-        logger.info(f"Snapping result: {len(snapped)} final categories ({len(new_categories)} new)")
-        return snapped, mapping
+        logger.info(f"Snapping result: {len(snapped)} final categories ({len(new_categories)} new, {len(cache_worthy_additions)} cache-worthy)")
+        return snapped, mapping, cache_worthy_additions
 
     def _find_best_match(
         self,
@@ -310,23 +325,34 @@ CONTEXT:
 
 TASK:
 For each ambiguous case, decide:
-1. "snap" - If semantically similar enough to cached category (even if not perfect match)
-2. "new" - If genuinely distinct and worth adding (only if slots available)
+1. "snap" - If semantically similar enough to cached category
+2. "new" - If genuinely distinct and worth adding for THIS RUN
+
+ADDITIONALLY:
+For "new" categories, decide if they should be added to the CACHE for future mailboxes:
+- "cache_worthy": true - High-quality, reusable category (e.g., "Customer Support", "Sales")
+- "cache_worthy": false - Mailbox-specific, not broadly useful (e.g., "Project X Updates")
 
 GUIDELINES:
 - PREFER snapping to maintain consistency across mailboxes
 - Only approve "new" if category serves a clearly distinct purpose
-- Consider: Will users benefit from separating this vs merging with existing?
-- Be conservative with "new" - consolidation is better than fragmentation
+- Be VERY selective with cache_worthy - only approve universally useful categories
+- Consider: Would this category be useful across different users' mailboxes?
+- Mailbox-specific categories can be "new" without being cache_worthy
 
 Return JSON:
 {{
-  "CategoryName": {{"action": "snap"|"new", "target": "CachedCategoryName"}},
+  "CategoryName": {{
+    "action": "snap"|"new",
+    "target": "CachedCategoryName",
+    "cache_worthy": true|false,
+    "reasoning": "brief explanation"
+  }},
   ...
 }}
 
-For "snap": target = cached category to snap to
-For "new": target = same as CategoryName (keeps original)
+For "snap": target = cached category to snap to, cache_worthy = false (not applicable)
+For "new": target = same as CategoryName, cache_worthy = true/false based on reusability
 
 JSON:
 """
diff --git a/src/calibration/llm_analyzer.py b/src/calibration/llm_analyzer.py
index e7bcd38..dc03ff4 100644
--- a/src/calibration/llm_analyzer.py
+++ b/src/calibration/llm_analyzer.py
@@ -105,7 +105,7 @@ class CalibrationAnalyzer:
             max_new = self.config.get('cache_max_new', 3)
 
             logger.info(f"Snapping to cached categories (threshold={similarity_threshold}, allow_new={allow_new}, max_new={max_new})")
-            final_categories, snap_mapping = self.category_cache.snap_to_cache(
+            final_categories, snap_mapping, cache_worthy = self.category_cache.snap_to_cache(
                 discovered_categories,
                 similarity_threshold=similarity_threshold,
                 allow_new=allow_new,
@@ -120,11 +120,19 @@ class CalibrationAnalyzer:
             logger.info(f"After cache snap: {len(final_categories)} categories")
             discovered_categories = final_categories
 
-            # Update cache with usage counts
+            # Update cache with usage counts AND add cache-worthy new categories
             category_counts = {}
             for _, cat in email_labels:
                 category_counts[cat] = category_counts.get(cat, 0) + 1
-            self.category_cache.update_cache(discovered_categories, category_counts)
+
+            # Add cache-worthy categories to persistent cache
+            if cache_worthy:
+                cache_additions = {name: desc for name, desc in cache_worthy}
+                logger.info(f"Adding {len(cache_worthy)} LLM-approved categories to persistent cache: {list(cache_additions.keys())}")
+                self.category_cache.update_cache(cache_additions, category_counts)
+            else:
+                # Just update usage counts for existing categories
+                self.category_cache.update_cache(discovered_categories, category_counts)
 
         return discovered_categories, email_labels