Fix LLM response formatting and Windows installer robustness

- Preserve whitespace and newlines in streaming responses - Clean thinking tags from final LLM responses - Add lazy initialization to _call_ollama method - Improve Windows installer to handle existing virtual environments - Add better error reporting for import failures These fixes address formatting corruption in numbered lists and improve installer reliability when dependencies already exist.
2025-08-15 14:26:53 +10:00 · 2025-08-15 14:26:53 +10:00 · 92cb600dd6
commit 92cb600dd6
parent 17f4f57dad
3 changed files with 59 additions and 9 deletions
--- a/.mini-rag/last_search
+++ b/.mini-rag/last_search
@ -1 +1 @@
-test
+how to run tests
--- a/install_windows.bat
+++ b/install_windows.bat
@ -70,10 +70,19 @@ echo.
 echo ══════════════════════════════════════════════════
 echo [2/5] Creating Python Virtual Environment...
 if exist "%SCRIPT_DIR%\.venv" (
-    echo 🔄 Removing old virtual environment...
+    echo 🔄 Found existing virtual environment, checking if it works...
+    call "%SCRIPT_DIR%\.venv\Scripts\activate.bat" >nul 2>&1
+    if not errorlevel 1 (
+        "%SCRIPT_DIR%\.venv\Scripts\python.exe" -c "import sys; print('✅ Existing environment works')" >nul 2>&1
+        if not errorlevel 1 (
+            echo ✅ Using existing virtual environment
+            goto skip_venv_creation
+        )
+    )
+    echo 🔄 Removing problematic virtual environment...
    rmdir /s /q "%SCRIPT_DIR%\.venv" 2>nul
    if exist "%SCRIPT_DIR%\.venv" (
-        echo ⚠️ Could not remove old environment, creating anyway...
+        echo ⚠️ Could not remove old environment, will try to work with it...
    )
 )

@ -93,6 +102,7 @@ if errorlevel 1 (
 )
 echo ✅ Virtual environment created successfully

+:skip_venv_creation
 echo.
 echo ══════════════════════════════════════════════════
 echo [3/5] Installing Python Dependencies...
@ -133,19 +143,29 @@ echo.
 echo ══════════════════════════════════════════════════
 echo [4/5] Testing Installation...
 echo 🧪 Verifying Python imports...
-"%SCRIPT_DIR%\.venv\Scripts\python.exe" -c "from mini_rag import CodeEmbedder, ProjectIndexer, CodeSearcher; print('✅ Core imports successful')" 2>nul
+echo Attempting import test...
+"%SCRIPT_DIR%\.venv\Scripts\python.exe" -c "from mini_rag import CodeEmbedder, ProjectIndexer, CodeSearcher; print('✅ Core imports successful')" 2>import_error.txt
 if errorlevel 1 (
    echo ❌ ERROR: Installation test failed
    echo.
+    echo 🔍 Import error details:
+    type import_error.txt
+    echo.
    echo 🔧 This usually means:
    echo    • Dependencies didn't install correctly
    echo    • Virtual environment is corrupted  
    echo    • Python path issues
+    echo    • Module conflicts with existing installations
    echo.
-    echo 💡 Try running: pip install -r requirements.txt
+    echo 💡 Troubleshooting options:
+    echo    • Try: "%SCRIPT_DIR%\.venv\Scripts\pip.exe" install -r requirements.txt --force-reinstall
+    echo    • Or delete .venv folder and run installer again
+    echo    • Or check import_error.txt for specific error details
+    del import_error.txt >nul 2>&1
    pause
    exit /b 1
 )
+del import_error.txt >nul 2>&1

 echo 🔍 Testing embedding system...
 "%SCRIPT_DIR%\.venv\Scripts\python.exe" -c "from mini_rag import CodeEmbedder; embedder = CodeEmbedder(); info = embedder.get_embedding_info(); print(f'✅ Embedding method: {info[\"method\"]}')" 2>nul
--- a/mini_rag/llm_synthesizer.py
+++ b/mini_rag/llm_synthesizer.py
@ -169,6 +169,9 @@ class LLMSynthesizer:
        start_time = time.time()
        
        try:
+            # Ensure we're initialized
+            self._ensure_initialized()
+            
            # Use the best available model
            model_to_use = self.model
            if self.model not in self.available_models:
@ -262,7 +265,20 @@ class LLMSynthesizer:
                        # Preserve original response but add safeguard warning
                        return self._create_safeguard_response_with_content(issue_type, explanation, raw_response)
                
-                return raw_response
+                # Clean up thinking tags from final response
+                cleaned_response = raw_response
+                if '<think>' in cleaned_response or '</think>' in cleaned_response:
+                    # Remove thinking content but preserve the rest
+                    cleaned_response = cleaned_response.replace('<think>', '').replace('</think>', '')
+                    # Clean up extra whitespace that might be left
+                    lines = cleaned_response.split('\n')
+                    cleaned_lines = []
+                    for line in lines:
+                        if line.strip():  # Only keep non-empty lines
+                            cleaned_lines.append(line)
+                    cleaned_response = '\n'.join(cleaned_lines)
+                
+                return cleaned_response.strip()
            else:
                logger.error(f"Ollama API error: {response.status_code}")
                return None
@ -433,7 +449,8 @@ This is normal with smaller AI models and helps ensure you get quality responses
                                if '<think>' in clean_text or '</think>' in clean_text:
                                    clean_text = clean_text.replace('<think>', '').replace('</think>', '')
                                
-                                if clean_text.strip():
+                                if clean_text:  # Remove .strip() here to preserve whitespace
+                                    # Preserve all formatting including newlines and spaces
                                    print(clean_text, end='', flush=True)
                        
                        # Check if response is done
@ -520,7 +537,20 @@ This is normal with smaller AI models and helps ensure you get quality responses
                    except json.JSONDecodeError:
                        continue
            
-            return full_response.strip()
+            # Clean up thinking tags from final response
+            cleaned_response = full_response
+            if '<think>' in cleaned_response or '</think>' in cleaned_response:
+                # Remove thinking content but preserve the rest
+                cleaned_response = cleaned_response.replace('<think>', '').replace('</think>', '')
+                # Clean up extra whitespace that might be left
+                lines = cleaned_response.split('\n')
+                cleaned_lines = []
+                for line in lines:
+                    if line.strip():  # Only keep non-empty lines
+                        cleaned_lines.append(line)
+                cleaned_response = '\n'.join(cleaned_lines)
+            
+            return cleaned_response.strip()
            
        except Exception as e:
            logger.error(f"Streaming with early stop failed: {e}")