From a1f84e2bd5436ea203ba40e10ca3c7c74d7fc690 Mon Sep 17 00:00:00 2001
From: BobAi <brett@foxsoftwaresolutions.com.au>
Date: Tue, 12 Aug 2025 20:01:16 +1000
Subject: [PATCH] Update model recommendations to Qwen3 4B and fix status
 command

- Changed primary model recommendation from qwen3:1.7b to qwen3:4b
- Added Q8 quantization info in technical docs for production users
- Fixed method name error: get_embedding_info() -> get_status()
- Updated all error messages and test files with new recommendations
- Maintained beginner-friendly options (1.7b still very good, 0.6b surprisingly good)
- Added explanation of why small models work well with RAG context
- Comprehensive testing completed - system ready for clean release
---
 .mini-rag/config.yaml            | 53 ++++++++++++++++++++++++++++++++
 .mini-rag/last_search            |  1 +
 docs/TECHNICAL_GUIDE.md          | 32 +++++++++++++++++++
 docs/TROUBLESHOOTING.md          |  4 ++-
 mini_rag/llm_synthesizer.py      |  5 ++-
 rag-mini.py                      |  9 ++++--
 rag-tui.py                       |  2 +-
 tests/test_ollama_integration.py |  6 ++--
 tests/troubleshoot.py            |  2 +-
 9 files changed, 105 insertions(+), 9 deletions(-)
 create mode 100644 .mini-rag/config.yaml
 create mode 100644 .mini-rag/last_search

diff --git a/.mini-rag/config.yaml b/.mini-rag/config.yaml
new file mode 100644
index 0000000..86338ba
--- /dev/null
+++ b/.mini-rag/config.yaml
@@ -0,0 +1,53 @@
+# FSS-Mini-RAG Configuration
+# Edit this file to customize indexing and search behavior
+# See docs/GETTING_STARTED.md for detailed explanations
+
+# Text chunking settings
+chunking:
+  max_size: 2000      # Maximum characters per chunk
+  min_size: 150       # Minimum characters per chunk
+  strategy: semantic    # 'semantic' (language-aware) or 'fixed'
+
+# Large file streaming settings
+streaming:
+  enabled: true
+  threshold_bytes: 1048576  # Files larger than this use streaming (1MB)
+
+# File processing settings
+files:
+  min_file_size: 50        # Skip files smaller than this
+  exclude_patterns:
+    - "node_modules/**"
+    - ".git/**"
+    - "__pycache__/**"
+    - "*.pyc"
+    - ".venv/**"
+    - "venv/**"
+    - "build/**"
+    - "dist/**"
+  include_patterns:
+    - "**/*"                  # Include all files by default
+
+# Embedding generation settings
+embedding:
+  preferred_method: ollama     # 'ollama', 'ml', 'hash', or 'auto'
+  ollama_model: nomic-embed-text
+  ollama_host: localhost:11434
+  ml_model: sentence-transformers/all-MiniLM-L6-v2
+  batch_size: 32               # Embeddings processed per batch
+
+# Search behavior settings
+search:
+  default_limit: 10           # Default number of results
+  enable_bm25: true             # Enable keyword matching boost
+  similarity_threshold: 0.1        # Minimum similarity score
+  expand_queries: false          # Enable automatic query expansion
+
+# LLM synthesis and query expansion settings
+llm:
+  ollama_host: localhost:11434
+  synthesis_model: auto    # 'auto', 'qwen3:1.7b', etc.
+  expansion_model: auto     # Usually same as synthesis_model
+  max_expansion_terms: 8        # Maximum terms to add to queries
+  enable_synthesis: false       # Enable synthesis by default
+  synthesis_temperature: 0.3      # LLM temperature for analysis
\ No newline at end of file
diff --git a/.mini-rag/last_search b/.mini-rag/last_search
new file mode 100644
index 0000000..19fbe2c
--- /dev/null
+++ b/.mini-rag/last_search
@@ -0,0 +1 @@
+chunking
\ No newline at end of file
diff --git a/docs/TECHNICAL_GUIDE.md b/docs/TECHNICAL_GUIDE.md
index eb6b72a..a92410f 100644
--- a/docs/TECHNICAL_GUIDE.md
+++ b/docs/TECHNICAL_GUIDE.md
@@ -787,4 +787,36 @@ def repair_index(self, project_path: Path) -> bool:
         return False
 ```
 
+## LLM Model Selection & Performance
+
+### Model Recommendations by Use Case
+
+FSS-Mini-RAG works well with various LLM sizes because our rich context and guided prompts help small models perform excellently:
+
+**Recommended (Best Balance):**
+- **qwen3:4b** - Excellent quality, good performance
+- **qwen3:4b:q8_0** - High-precision quantized version for production
+
+**Still Excellent (Faster/CPU-friendly):**
+- **qwen3:1.7b** - Very good results, faster responses
+- **qwen3:0.6b** - Surprisingly good considering size (522MB)
+
+### Why Small Models Work Well Here
+
+Small models can produce excellent results in RAG systems because:
+
+1. **Rich Context**: Our chunking provides substantial context around each match
+2. **Guided Prompts**: Well-structured prompts give models a clear "runway" to continue
+3. **Specific Domain**: Code analysis is more predictable than general conversation
+
+Without good context, small models tend to get lost and produce erratic output. But with RAG's rich context and focused prompts, even the 0.6B model can provide meaningful analysis.
+
+### Quantization Benefits
+
+For production deployments, consider quantized models like `qwen3:4b:q8_0`:
+- **Q8_0**: 8-bit quantization with minimal quality loss
+- **Smaller memory footprint**: ~50% reduction vs full precision
+- **Better CPU performance**: Faster inference on CPU-only systems
+- **Production ready**: Maintains analysis quality while improving efficiency
+
 This technical guide provides the deep implementation details that developers need to understand, modify, and extend the system, while keeping the main README focused on getting users started quickly.
\ No newline at end of file
diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md
index eade395..d3e5d7a 100644
--- a/docs/TROUBLESHOOTING.md
+++ b/docs/TROUBLESHOOTING.md
@@ -165,7 +165,9 @@ python3 -c "import mini_rag; print('✅ Installation successful')"
 
 2. **Try different model:**
    ```bash
-   ollama pull qwen3:1.7b  # Good balance of speed/quality
+   ollama pull qwen3:4b     # Recommended: excellent quality
+   ollama pull qwen3:1.7b   # Still very good, faster
+   ollama pull qwen3:0.6b   # Surprisingly good for CPU-only
    ```
 
 3. **Use synthesis mode instead of exploration:**
diff --git a/mini_rag/llm_synthesizer.py b/mini_rag/llm_synthesizer.py
index 0bf8503..f0f1c39 100644
--- a/mini_rag/llm_synthesizer.py
+++ b/mini_rag/llm_synthesizer.py
@@ -68,11 +68,14 @@ class LLMSynthesizer:
         # Modern model preference ranking (CPU-friendly first)
         # Prioritize: Ultra-efficient > Standard efficient > Larger models
         model_rankings = [
+            # Recommended model (excellent quality)
+            "qwen3:4b",
+            
             # Ultra-efficient models (perfect for CPU-only systems)
             "qwen3:0.6b", "qwen3:1.7b", "llama3.2:1b", 
             
             # Standard efficient models
-            "qwen2.5:1.5b", "qwen3:3b", "qwen3:4b",
+            "qwen2.5:1.5b", "qwen3:3b",
             
             # Qwen2.5 models (excellent performance/size ratio)
             "qwen2.5-coder:1.5b", "qwen2.5:1.5b", "qwen2.5:3b", "qwen2.5-coder:3b",
diff --git a/rag-mini.py b/rag-mini.py
index e283973..3871cab 100644
--- a/rag-mini.py
+++ b/rag-mini.py
@@ -117,7 +117,12 @@ def search_project(project_path: Path, query: str, limit: int = 10, synthesize:
         
         for i, result in enumerate(results, 1):
             # Clean up file path display
-            rel_path = result.file_path.relative_to(project_path) if result.file_path.is_absolute() else result.file_path
+            file_path = Path(result.file_path)
+            try:
+                rel_path = file_path.relative_to(project_path)
+            except ValueError:
+                # If relative_to fails, just show the basename
+                rel_path = file_path.name
             
             print(f"{i}. {rel_path}")
             print(f"   Score: {result.score:.3f}")
@@ -236,7 +241,7 @@ def status_check(project_path: Path):
         print("🧠 Embedding System:")
         try:
             embedder = OllamaEmbedder()
-            emb_info = embedder.get_embedding_info()
+            emb_info = embedder.get_status()
             method = emb_info.get('method', 'unknown')
             
             if method == 'ollama':
diff --git a/rag-tui.py b/rag-tui.py
index 47a785f..07cb230 100755
--- a/rag-tui.py
+++ b/rag-tui.py
@@ -514,7 +514,7 @@ class SimpleTUI:
             from mini_rag.ollama_embeddings import OllamaEmbedder
             
             embedder = OllamaEmbedder()
-            info = embedder.get_embedding_info()
+            info = embedder.get_status()
             
             print("🧠 Embedding System:")
             method = info.get('method', 'unknown')
diff --git a/tests/test_ollama_integration.py b/tests/test_ollama_integration.py
index 9e7c530..4466d3a 100755
--- a/tests/test_ollama_integration.py
+++ b/tests/test_ollama_integration.py
@@ -68,7 +68,7 @@ class TestOllamaIntegration(unittest.TestCase):
                     if len(models) > 5:
                         print(f"      ... and {len(models)-5} more")
                 else:
-                    print("   ⚠️  No models found. Install with: ollama pull qwen3:1.7b")
+                    print("   ⚠️  No models found. Install with: ollama pull qwen3:4b")
                 
                 self.assertTrue(True)
             else:
@@ -146,7 +146,7 @@ class TestOllamaIntegration(unittest.TestCase):
         if not synthesizer.is_available():
             self.fail(
                 "❌ No LLM models available.\n"
-                "   💡 Install a model like: ollama pull qwen3:1.7b"
+                "   💡 Install a model like: ollama pull qwen3:4b"
             )
         
         print(f"   ✅ Found {len(synthesizer.available_models)} LLM models")
@@ -426,7 +426,7 @@ def run_troubleshooting():
     print("💡 Common Solutions:")
     print("   • Install Ollama: https://ollama.ai/download")
     print("   • Start server: ollama serve")
-    print("   • Install models: ollama pull qwen3:1.7b")
+    print("   • Install models: ollama pull qwen3:4b")
     print("   • Install embedding model: ollama pull nomic-embed-text")
     print()
     print("📚 For more help, see docs/QUERY_EXPANSION.md")
diff --git a/tests/troubleshoot.py b/tests/troubleshoot.py
index 69ab26b..3e6255e 100755
--- a/tests/troubleshoot.py
+++ b/tests/troubleshoot.py
@@ -50,7 +50,7 @@ def main():
     print("   • Check docs/QUERY_EXPANSION.md for setup help")
     print("   • Ensure Ollama is installed: https://ollama.ai/download")
     print("   • Start Ollama server: ollama serve")
-    print("   • Install models: ollama pull qwen3:1.7b")
+    print("   • Install models: ollama pull qwen3:4b")
 
 def run_test(test_file):
     """Run a specific test file."""