BobAi a84ff94fba Improve UX with streaming tokens, fix model references, and add icon integration
This comprehensive update enhances user experience with several key improvements:

## Enhanced Streaming & Thinking Display
- Implement real-time streaming with gray thinking tokens that collapse after completion
- Fix thinking token redisplay bug with proper content filtering
- Add clear "AI Response:" headers to separate thinking from responses
- Enable streaming by default for better user engagement
- Keep thinking visible for exploration, collapse only for suggested questions

## Natural Conversation Responses
- Convert clunky JSON exploration responses to natural, conversational format
- Improve exploration prompts for friendly, colleague-style interactions
- Update summary generation with better context handling
- Eliminate double response display issues

## Model Reference Updates
- Remove all llama3.2 references in favor of qwen3 models
- Fix non-existent qwen3:3b references, replace with proper model names
- Update model rankings to prioritize working qwen models across all components
- Ensure consistent model recommendations in docs and examples

## Cross-Platform Icon Integration
- Add desktop icon setup to Linux installer with .desktop entry
- Add Windows shortcuts for desktop and Start Menu integration
- Improve installer user experience with visual branding

## Configuration & Navigation Fixes
- Fix "0" option in configuration menu to properly go back
- Improve configuration menu user-friendliness
- Update troubleshooting guides with correct model suggestions

These changes significantly improve the beginner experience while maintaining
technical accuracy and system reliability.
2025-08-15 12:20:06 +10:00

589 lines
24 KiB
Python

#!/usr/bin/env python3
"""
Interactive Code Explorer with Thinking Mode
Provides multi-turn conversations with context memory for debugging and learning.
Perfect for exploring codebases with detailed reasoning and follow-up questions.
"""
import json
import logging
import time
from typing import List, Dict, Any, Optional
from pathlib import Path
from dataclasses import dataclass
try:
from .llm_synthesizer import LLMSynthesizer, SynthesisResult
from .search import CodeSearcher
from .config import RAGConfig
except ImportError:
# For direct testing
from llm_synthesizer import LLMSynthesizer, SynthesisResult
from search import CodeSearcher
from config import RAGConfig
logger = logging.getLogger(__name__)
@dataclass
class ExplorationSession:
"""Track an exploration session with context history."""
project_path: Path
conversation_history: List[Dict[str, Any]]
session_id: str
started_at: float
def add_exchange(self, question: str, search_results: List[Any], response: SynthesisResult):
"""Add a question/response exchange to the conversation history."""
self.conversation_history.append({
"timestamp": time.time(),
"question": question,
"search_results_count": len(search_results),
"response": {
"summary": response.summary,
"key_points": response.key_points,
"code_examples": response.code_examples,
"suggested_actions": response.suggested_actions,
"confidence": response.confidence
}
})
class CodeExplorer:
"""Interactive code exploration with thinking and context memory."""
def __init__(self, project_path: Path, config: RAGConfig = None):
self.project_path = project_path
self.config = config or RAGConfig()
# Initialize components with thinking enabled
self.searcher = CodeSearcher(project_path)
self.synthesizer = LLMSynthesizer(
ollama_url=f"http://{self.config.llm.ollama_host}",
model=self.config.llm.synthesis_model,
enable_thinking=True, # Always enable thinking in explore mode
config=self.config # Pass config for model rankings
)
# Session management
self.current_session: Optional[ExplorationSession] = None
def start_exploration_session(self) -> bool:
"""Start a new exploration session."""
# Simple availability check - don't do complex model restart logic
if not self.synthesizer.is_available():
print("❌ LLM service unavailable. Please check Ollama is running.")
return False
session_id = f"explore_{int(time.time())}"
self.current_session = ExplorationSession(
project_path=self.project_path,
conversation_history=[],
session_id=session_id,
started_at=time.time()
)
print("🧠 Exploration Mode Started")
print(f"Project: {self.project_path.name}")
return True
def explore_question(self, question: str, context_limit: int = 10) -> Optional[str]:
"""Explore a question with full thinking and context."""
if not self.current_session:
return "❌ No exploration session active. Start one first."
# Search for relevant information
search_start = time.time()
results = self.searcher.search(
question,
top_k=context_limit,
include_context=True,
semantic_weight=0.7,
bm25_weight=0.3
)
search_time = time.time() - search_start
# Build enhanced prompt with conversation context
synthesis_prompt = self._build_contextual_prompt(question, results)
# Get thinking-enabled analysis
synthesis_start = time.time()
synthesis = self._synthesize_with_context(synthesis_prompt, results)
synthesis_time = time.time() - synthesis_start
# Add to conversation history
self.current_session.add_exchange(question, results, synthesis)
# Streaming already displayed the response
# Just return minimal status for caller
session_duration = time.time() - self.current_session.started_at
exchange_count = len(self.current_session.conversation_history)
status = f"\n📊 Session: {session_duration/60:.1f}m | Question #{exchange_count} | Results: {len(results)} | Time: {search_time+synthesis_time:.1f}s"
return status
def _build_contextual_prompt(self, question: str, results: List[Any]) -> str:
"""Build a prompt that includes conversation context."""
# Get recent conversation context (last 3 exchanges)
context_summary = ""
if self.current_session.conversation_history:
recent_exchanges = self.current_session.conversation_history[-3:]
context_parts = []
for i, exchange in enumerate(recent_exchanges, 1):
prev_q = exchange["question"]
prev_summary = exchange["response"]["summary"]
context_parts.append(f"Previous Q{i}: {prev_q}")
context_parts.append(f"Previous A{i}: {prev_summary}")
context_summary = "\n".join(context_parts)
# Build search results context
results_context = []
for i, result in enumerate(results[:8], 1):
file_path = result.file_path if hasattr(result, 'file_path') else 'unknown'
content = result.content if hasattr(result, 'content') else str(result)
score = result.score if hasattr(result, 'score') else 0.0
results_context.append(f"""
Result {i} (Score: {score:.3f}):
File: {file_path}
Content: {content[:800]}{'...' if len(content) > 800 else ''}
""")
results_text = "\n".join(results_context)
# Create comprehensive exploration prompt with thinking
prompt = f"""<think>
The user asked: "{question}"
Let me analyze what they're asking and look at the information I have available.
From the search results, I can see relevant information about:
{results_text[:500]}...
I should think about:
1. What the user is trying to understand or accomplish
2. What information from the search results is most relevant
3. How to explain this in a clear, educational way
4. What practical next steps would be helpful
Based on our conversation so far: {context_summary}
Let me create a helpful response that breaks this down clearly and gives them actionable guidance.
</think>
You're a helpful assistant exploring a project with someone. You're good at breaking down complex topics into understandable pieces and explaining things clearly.
PROJECT: {self.project_path.name}
PREVIOUS CONVERSATION:
{context_summary}
CURRENT QUESTION: "{question}"
RELEVANT INFORMATION FOUND:
{results_text}
Please provide a helpful, natural explanation that answers their question. Write as if you're having a friendly conversation with a colleague who's exploring this project.
Structure your response to include:
1. A clear explanation of what you found and how it answers their question
2. The most important insights from the information you discovered
3. Relevant examples or code patterns when helpful
4. Practical next steps they could take
Guidelines:
- Write in a conversational, friendly tone
- Be educational but not condescending
- Reference specific files and information when helpful
- Give practical, actionable suggestions
- Connect everything back to their original question
- Use natural language, not structured formats
- Break complex topics into understandable pieces
"""
return prompt
def _synthesize_with_context(self, prompt: str, results: List[Any]) -> SynthesisResult:
"""Synthesize results with full context and thinking."""
try:
# Use streaming with thinking visible (don't collapse)
response = self.synthesizer._call_ollama(prompt, temperature=0.2, disable_thinking=False, use_streaming=True, collapse_thinking=False)
thinking_stream = ""
# Streaming already shows thinking and response
# No need for additional indicators
if not response:
return SynthesisResult(
summary="Analysis unavailable (LLM service error)",
key_points=[],
code_examples=[],
suggested_actions=["Check LLM service status"],
confidence=0.0
)
# Use natural language response directly
return SynthesisResult(
summary=response.strip(),
key_points=[], # Not used with natural language responses
code_examples=[], # Not used with natural language responses
suggested_actions=[], # Not used with natural language responses
confidence=0.85 # High confidence for natural responses
)
except Exception as e:
logger.error(f"Context synthesis failed: {e}")
return SynthesisResult(
summary="Analysis failed due to service error",
key_points=[],
code_examples=[],
suggested_actions=["Check system status and try again"],
confidence=0.0
)
def _format_exploration_response(self, question: str, synthesis: SynthesisResult,
result_count: int, search_time: float, synthesis_time: float) -> str:
"""Format exploration response with context indicators."""
output = []
# Header with session context
session_duration = time.time() - self.current_session.started_at
exchange_count = len(self.current_session.conversation_history)
output.append(f"🧠 EXPLORATION ANALYSIS (Question #{exchange_count})")
output.append(f"Session: {session_duration/60:.1f}m | Results: {result_count} | "
f"Time: {search_time+synthesis_time:.1f}s")
output.append("=" * 60)
output.append("")
# Response was already displayed via streaming
# Just show completion status
output.append("✅ Analysis complete")
output.append("")
output.append("")
# Confidence and context indicator
confidence_emoji = "🟢" if synthesis.confidence > 0.7 else "🟡" if synthesis.confidence > 0.4 else "🔴"
context_indicator = f" | Context: {exchange_count-1} previous questions" if exchange_count > 1 else ""
output.append(f"{confidence_emoji} Confidence: {synthesis.confidence:.1%}{context_indicator}")
return "\n".join(output)
def get_session_summary(self) -> str:
"""Get a summary of the current exploration session."""
if not self.current_session:
return "No active exploration session."
duration = time.time() - self.current_session.started_at
exchange_count = len(self.current_session.conversation_history)
summary = [
f"🧠 EXPLORATION SESSION SUMMARY",
f"=" * 40,
f"Project: {self.project_path.name}",
f"Session ID: {self.current_session.session_id}",
f"Duration: {duration/60:.1f} minutes",
f"Questions explored: {exchange_count}",
f"",
]
if exchange_count > 0:
summary.append("📋 Topics explored:")
for i, exchange in enumerate(self.current_session.conversation_history, 1):
question = exchange["question"][:50] + "..." if len(exchange["question"]) > 50 else exchange["question"]
confidence = exchange["response"]["confidence"]
summary.append(f" {i}. {question} (confidence: {confidence:.1%})")
return "\n".join(summary)
def end_session(self) -> str:
"""End the current exploration session."""
if not self.current_session:
return "No active session to end."
summary = self.get_session_summary()
self.current_session = None
return summary + "\n\n✅ Exploration session ended."
def _check_model_restart_needed(self) -> bool:
"""Check if model restart would improve thinking quality."""
try:
# Simple heuristic: if we can detect the model was recently used
# with <no_think>, suggest restart for better thinking quality
# Test with a simple thinking prompt to see response quality
test_response = self.synthesizer._call_ollama(
"Think briefly: what is 2+2?",
temperature=0.1,
disable_thinking=False
)
if test_response:
# If response is suspiciously short or shows signs of no-think behavior
if len(test_response.strip()) < 10 or "4" == test_response.strip():
return True
except Exception:
pass
return False
def _handle_model_restart(self) -> bool:
"""Handle user confirmation and model restart."""
try:
print("\n🤔 To ensure best thinking quality, exploration mode works best with a fresh model.")
print(f" Currently running: {self.synthesizer.model}")
print("\n💡 Stop current model and restart for optimal exploration? (y/N): ", end="", flush=True)
response = input().strip().lower()
if response in ['y', 'yes']:
print("\n🔄 Stopping current model...")
# Use ollama stop command for clean model restart
import subprocess
try:
subprocess.run([
"ollama", "stop", self.synthesizer.model
], timeout=10, capture_output=True)
print("✅ Model stopped successfully.")
print("🚀 Exploration mode will restart the model with thinking enabled...")
# Reset synthesizer initialization to force fresh start
self.synthesizer._initialized = False
return True
except subprocess.TimeoutExpired:
print("⚠️ Model stop timed out, continuing anyway...")
return False
except FileNotFoundError:
print("⚠️ 'ollama' command not found, continuing with current model...")
return False
except Exception as e:
print(f"⚠️ Error stopping model: {e}")
return False
else:
print("📝 Continuing with current model...")
return False
except KeyboardInterrupt:
print("\n📝 Continuing with current model...")
return False
except EOFError:
print("\n📝 Continuing with current model...")
return False
def _call_ollama_with_thinking(self, prompt: str, temperature: float = 0.3) -> tuple:
"""Call Ollama with streaming for fast time-to-first-token."""
import requests
import json
try:
# Use the synthesizer's model and connection
model_to_use = self.synthesizer.model
if self.synthesizer.model not in self.synthesizer.available_models:
if self.synthesizer.available_models:
model_to_use = self.synthesizer.available_models[0]
else:
return None, None
# Enable thinking by NOT adding <no_think>
final_prompt = prompt
# Get optimal parameters for this model
from .llm_optimization import get_optimal_ollama_parameters
optimal_params = get_optimal_ollama_parameters(model_to_use)
payload = {
"model": model_to_use,
"prompt": final_prompt,
"stream": True, # Enable streaming for fast response
"options": {
"temperature": temperature,
"top_p": optimal_params.get("top_p", 0.9),
"top_k": optimal_params.get("top_k", 40),
"num_ctx": optimal_params.get("num_ctx", 32768),
"num_predict": optimal_params.get("num_predict", 2000),
"repeat_penalty": optimal_params.get("repeat_penalty", 1.1),
"presence_penalty": optimal_params.get("presence_penalty", 1.0)
}
}
response = requests.post(
f"{self.synthesizer.ollama_url}/api/generate",
json=payload,
stream=True,
timeout=65
)
if response.status_code == 200:
# Collect streaming response
raw_response = ""
thinking_displayed = False
for line in response.iter_lines():
if line:
try:
chunk_data = json.loads(line.decode('utf-8'))
chunk_text = chunk_data.get('response', '')
if chunk_text:
raw_response += chunk_text
# Display thinking stream as it comes in
if not thinking_displayed and '<think>' in raw_response:
# Start displaying thinking
self._start_thinking_display()
thinking_displayed = True
if thinking_displayed:
self._stream_thinking_chunk(chunk_text)
if chunk_data.get('done', False):
break
except json.JSONDecodeError:
continue
# Finish thinking display if it was shown
if thinking_displayed:
self._end_thinking_display()
# Extract thinking stream and final response
thinking_stream, final_response = self._extract_thinking(raw_response)
return final_response, thinking_stream
else:
return None, None
except Exception as e:
logger.error(f"Thinking-enabled Ollama call failed: {e}")
return None, None
def _extract_thinking(self, raw_response: str) -> tuple:
"""Extract thinking content from response."""
thinking_stream = ""
final_response = raw_response
# Look for thinking patterns
if "<think>" in raw_response and "</think>" in raw_response:
# Extract thinking content between tags
start_tag = raw_response.find("<think>")
end_tag = raw_response.find("</think>") + len("</think>")
if start_tag != -1 and end_tag != -1:
thinking_content = raw_response[start_tag + 7:end_tag - 8] # Remove tags
thinking_stream = thinking_content.strip()
# Remove thinking from final response
final_response = (raw_response[:start_tag] + raw_response[end_tag:]).strip()
# Alternative patterns for models that use different thinking formats
elif "Let me think" in raw_response or "I need to analyze" in raw_response:
# Simple heuristic: first paragraph might be thinking
lines = raw_response.split('\n')
potential_thinking = []
final_lines = []
thinking_indicators = ["Let me think", "I need to", "First, I'll", "Looking at", "Analyzing"]
in_thinking = False
for line in lines:
if any(indicator in line for indicator in thinking_indicators):
in_thinking = True
potential_thinking.append(line)
elif in_thinking and (line.startswith('{') or line.startswith('**') or line.startswith('#')):
# Likely end of thinking, start of structured response
in_thinking = False
final_lines.append(line)
elif in_thinking:
potential_thinking.append(line)
else:
final_lines.append(line)
if potential_thinking:
thinking_stream = '\n'.join(potential_thinking).strip()
final_response = '\n'.join(final_lines).strip()
return thinking_stream, final_response
def _start_thinking_display(self):
"""Start the thinking stream display."""
print("\n\033[2m\033[3m💭 AI Thinking:\033[0m")
print("\033[2m\033[3m" + "" * 40 + "\033[0m")
self._thinking_buffer = ""
self._in_thinking_tags = False
def _stream_thinking_chunk(self, chunk: str):
"""Stream a chunk of thinking as it arrives."""
import sys
self._thinking_buffer += chunk
# Check if we're in thinking tags
if '<think>' in self._thinking_buffer and not self._in_thinking_tags:
self._in_thinking_tags = True
# Display everything after <think>
start_idx = self._thinking_buffer.find('<think>') + 7
thinking_content = self._thinking_buffer[start_idx:]
if thinking_content:
print(f"\033[2m\033[3m{thinking_content}\033[0m", end='', flush=True)
elif self._in_thinking_tags and '</think>' not in chunk:
# We're in thinking mode, display the chunk
print(f"\033[2m\033[3m{chunk}\033[0m", end='', flush=True)
elif '</think>' in self._thinking_buffer:
# End of thinking
self._in_thinking_tags = False
def _end_thinking_display(self):
"""End the thinking stream display."""
print(f"\n\033[2m\033[3m" + "" * 40 + "\033[0m")
print()
def _display_thinking_stream(self, thinking_stream: str):
"""Display thinking stream in light gray and italic (fallback for non-streaming)."""
if not thinking_stream:
return
print("\n\033[2m\033[3m💭 AI Thinking:\033[0m")
print("\033[2m\033[3m" + "" * 40 + "\033[0m")
# Split into paragraphs and display with proper formatting
paragraphs = thinking_stream.split('\n\n')
for para in paragraphs:
if para.strip():
# Wrap long lines nicely
lines = para.strip().split('\n')
for line in lines:
if line.strip():
# Light gray and italic
print(f"\033[2m\033[3m{line}\033[0m")
print() # Paragraph spacing
print("\033[2m\033[3m" + "" * 40 + "\033[0m")
print()
# Quick test function
def test_explorer():
"""Test the code explorer."""
explorer = CodeExplorer(Path("."))
if not explorer.start_exploration_session():
print("❌ Could not start exploration session")
return
# Test question
response = explorer.explore_question("How does authentication work in this codebase?")
if response:
print(response)
print("\n" + explorer.end_session())
if __name__ == "__main__":
test_explorer()