fss-mini-rag-github/tests/03_system_validation.py
BobAi 4925f6d4e4 Add comprehensive testing suite and documentation for new features
📚 DOCUMENTATION
- docs/QUERY_EXPANSION.md: Complete beginner guide with examples and troubleshooting
- Updated config.yaml with proper LLM settings and comments
- Clear explanations of when features are enabled/disabled

🧪 NEW TESTING INFRASTRUCTURE
- test_ollama_integration.py: 6 comprehensive tests with helpful error messages
- test_smart_ranking.py: 6 tests verifying ranking quality improvements
- troubleshoot.py: Interactive tool for diagnosing setup issues
- Enhanced system validation with new features coverage

⚙️ SMART DEFAULTS
- Query expansion disabled by default (CLI speed)
- TUI enables expansion automatically (exploration mode)
- Clear user feedback about which features are active
- Graceful degradation when Ollama unavailable

🎯 BEGINNER-FRIENDLY APPROACH
- Tests explain what they're checking and why
- Clear solutions provided for common problems
- Educational output showing system status
- Offline testing with gentle mocking

Run 'python3 tests/troubleshoot.py' to verify your setup\!
2025-08-12 17:36:32 +10:00

415 lines
14 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Integration test to verify all three agents' work integrates properly.
"""
import sys
import os
import tempfile
from pathlib import Path
# Fix Windows encoding
if sys.platform == 'win32':
os.environ['PYTHONUTF8'] = '1'
sys.stdout.reconfigure(encoding='utf-8')
from claude_rag.chunker import CodeChunker
from claude_rag.indexer import ProjectIndexer
from claude_rag.search import CodeSearcher
from claude_rag.ollama_embeddings import OllamaEmbedder as CodeEmbedder
from claude_rag.query_expander import QueryExpander
from claude_rag.config import RAGConfig
def test_chunker():
"""Test that chunker creates chunks with all required metadata."""
print("1. Testing Chunker...")
# Create test Python file with more substantial content
test_code = '''"""Test module for integration testing the chunker."""
import os
import sys
class TestClass:
"""A test class with multiple methods."""
def __init__(self):
"""Initialize the test class."""
self.value = 42
self.name = "test"
def method_one(self):
"""First method with some logic."""
result = self.value * 2
return result
def method_two(self, x):
"""Second method that takes a parameter."""
if x > 0:
return self.value + x
else:
return self.value - x
def method_three(self):
"""Third method for testing."""
data = []
for i in range(10):
data.append(i * self.value)
return data
class AnotherClass:
"""Another test class."""
def __init__(self, name):
"""Initialize with name."""
self.name = name
def process(self):
"""Process something."""
return f"Processing {self.name}"
def standalone_function(arg1, arg2):
"""A standalone function that does something."""
result = arg1 + arg2
return result * 2
def another_function():
"""Another standalone function."""
data = {"key": "value", "number": 123}
return data
'''
chunker = CodeChunker(min_chunk_size=1) # Use small chunk size for testing
chunks = chunker.chunk_file(Path("test.py"), test_code)
print(f" Created {len(chunks)} chunks")
# Debug: Show what chunks were created
print(" Chunks created:")
for chunk in chunks:
print(f" - Type: {chunk.chunk_type}, Name: {chunk.name}, Lines: {chunk.start_line}-{chunk.end_line}")
# Check metadata
issues = []
for i, chunk in enumerate(chunks):
if chunk.chunk_index is None:
issues.append(f"Chunk {i} missing chunk_index")
if chunk.total_chunks is None:
issues.append(f"Chunk {i} missing total_chunks")
if chunk.file_lines is None:
issues.append(f"Chunk {i} missing file_lines")
# Check links (except first/last)
if i > 0 and chunk.prev_chunk_id is None:
issues.append(f"Chunk {i} missing prev_chunk_id")
if i < len(chunks) - 1 and chunk.next_chunk_id is None:
issues.append(f"Chunk {i} missing next_chunk_id")
# Check parent_class for methods
if chunk.chunk_type == 'method' and chunk.parent_class is None:
issues.append(f"Method chunk {chunk.name} missing parent_class")
print(f" - Chunk {i}: {chunk.chunk_type} '{chunk.name}' "
f"[{chunk.chunk_index}/{chunk.total_chunks}] "
f"prev={chunk.prev_chunk_id} next={chunk.next_chunk_id}")
if issues:
print(" Issues found:")
for issue in issues:
print(f" - {issue}")
else:
print(" All metadata present")
return len(issues) == 0
def test_indexer_storage():
"""Test that indexer stores the new metadata."""
print("\n2. Testing Indexer Storage...")
with tempfile.TemporaryDirectory() as tmpdir:
project_path = Path(tmpdir)
# Create test file
test_file = project_path / "test.py"
test_file.write_text('''
class MyClass:
def my_method(self):
return 42
''')
# Index the project with small chunk size for testing
from claude_rag.chunker import CodeChunker
chunker = CodeChunker(min_chunk_size=1)
indexer = ProjectIndexer(project_path, chunker=chunker)
stats = indexer.index_project()
print(f" Indexed {stats['chunks_created']} chunks")
# Check what was stored
if indexer.table:
df = indexer.table.to_pandas()
columns = df.columns.tolist()
required_fields = ['chunk_id', 'prev_chunk_id', 'next_chunk_id', 'parent_class']
missing_fields = [f for f in required_fields if f not in columns]
if missing_fields:
print(f" Missing fields in database: {missing_fields}")
print(f" Current fields: {columns}")
return False
else:
print(" All required fields in database schema")
# Check if data is actually stored
sample = df.iloc[0] if len(df) > 0 else None
if sample is not None:
print(f" Sample chunk_id: {sample.get('chunk_id', 'MISSING')}")
print(f" Sample prev_chunk_id: {sample.get('prev_chunk_id', 'MISSING')}")
print(f" Sample next_chunk_id: {sample.get('next_chunk_id', 'MISSING')}")
print(f" Sample parent_class: {sample.get('parent_class', 'MISSING')}")
return len(missing_fields) == 0
def test_search_integration():
"""Test that search uses the new metadata."""
print("\n3. Testing Search Integration...")
with tempfile.TemporaryDirectory() as tmpdir:
project_path = Path(tmpdir)
# Create test files with proper content that will create multiple chunks
(project_path / "math_utils.py").write_text('''"""Math utilities module."""
import math
class Calculator:
"""A simple calculator class."""
def __init__(self):
"""Initialize calculator."""
self.result = 0
def add(self, a, b):
"""Add two numbers."""
self.result = a + b
return self.result
def multiply(self, a, b):
"""Multiply two numbers."""
self.result = a * b
return self.result
def divide(self, a, b):
"""Divide two numbers."""
if b == 0:
raise ValueError("Cannot divide by zero")
self.result = a / b
return self.result
class AdvancedCalculator(Calculator):
"""Advanced calculator with more operations."""
def power(self, a, b):
"""Raise a to power b."""
self.result = a ** b
return self.result
def sqrt(self, a):
"""Calculate square root."""
self.result = math.sqrt(a)
return self.result
def compute_average(numbers):
"""Compute average of a list."""
if not numbers:
return 0
return sum(numbers) / len(numbers)
def compute_median(numbers):
"""Compute median of a list."""
if not numbers:
return 0
sorted_nums = sorted(numbers)
n = len(sorted_nums)
if n % 2 == 0:
return (sorted_nums[n//2-1] + sorted_nums[n//2]) / 2
return sorted_nums[n//2]
''')
# Index with small chunk size for testing
chunker = CodeChunker(min_chunk_size=1)
indexer = ProjectIndexer(project_path, chunker=chunker)
indexer.index_project()
# Search
searcher = CodeSearcher(project_path)
# Test BM25 integration
results = searcher.search("multiply numbers", limit=5,
semantic_weight=0.3, bm25_weight=0.7)
if results:
print(f" BM25 + semantic search returned {len(results)} results")
for r in results[:2]:
print(f" - {r.chunk_type} '{r.name}' score={r.score:.3f}")
else:
print(" No search results returned")
return False
# Test context retrieval
print("\n Testing context retrieval...")
if searcher.table:
df = searcher.table.to_pandas()
print(f" Total chunks in DB: {len(df)}")
# Find a method chunk to test parent context
method_chunks = df[df['chunk_type'] == 'method']
if len(method_chunks) > 0:
method_chunk_id = method_chunks.iloc[0]['chunk_id']
context = searcher.get_chunk_context(method_chunk_id)
if context['chunk']:
print(f" Got main chunk: {context['chunk'].name}")
if context['prev']:
print(f" Got previous chunk: {context['prev'].name}")
else:
print(f" - No previous chunk (might be first)")
if context['next']:
print(f" Got next chunk: {context['next'].name}")
else:
print(f" - No next chunk (might be last)")
if context['parent']:
print(f" Got parent chunk: {context['parent'].name}")
else:
print(f" - No parent chunk")
# Test include_context in search
results_with_context = searcher.search("add", include_context=True, limit=2)
if results_with_context:
print(f" Found {len(results_with_context)} results with context")
for r in results_with_context:
has_context = bool(r.context_before or r.context_after or r.parent_chunk)
print(f" - {r.name}: context_before={bool(r.context_before)}, "
f"context_after={bool(r.context_after)}, parent={bool(r.parent_chunk)}")
# Check if at least one result has some context
if any(r.context_before or r.context_after or r.parent_chunk for r in results_with_context):
print(" Search with context working")
return True
else:
print(" Search returned results but no context attached")
return False
else:
print(" No search results returned")
return False
else:
print(" No method chunks found in database")
return False
return True
def test_server():
"""Test that server still works."""
print("\n4. Testing Server...")
# Just check if we can import and create server instance
try:
from claude_rag.server import RAGServer
server = RAGServer(Path("."), port=7778)
print(" Server can be instantiated")
return True
except Exception as e:
print(f" Server error: {e}")
return False
def test_new_features():
"""Test new features: query expansion and smart ranking."""
print("\n5. Testing New Features (Query Expansion & Smart Ranking)...")
try:
# Test configuration loading
config = RAGConfig()
print(f" ✅ Configuration loaded successfully")
print(f" Query expansion enabled: {config.search.expand_queries}")
print(f" Max expansion terms: {config.llm.max_expansion_terms}")
# Test query expander (will use mock if Ollama unavailable)
expander = QueryExpander(config)
test_query = "authentication"
if expander.is_available():
expanded = expander.expand_query(test_query)
print(f" ✅ Query expansion working: '{test_query}''{expanded}'")
else:
print(f" ⚠️ Query expansion offline (Ollama not available)")
# Test that it still returns original query
expanded = expander.expand_query(test_query)
if expanded == test_query:
print(f" ✅ Graceful degradation working: returns original query")
else:
print(f" ❌ Error: should return original query when offline")
return False
# Test smart ranking (this always works as it's zero-overhead)
print(" 🧮 Testing smart ranking...")
# Create a simple test to verify the method exists and can be called
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
# Create a simple test project
test_file = temp_path / "README.md"
test_file.write_text("# Test Project\nThis is a test README file.")
try:
searcher = CodeSearcher(temp_path)
# Test that the _smart_rerank method exists
if hasattr(searcher, '_smart_rerank'):
print(" ✅ Smart ranking method available")
return True
else:
print(" ❌ Smart ranking method not found")
return False
except Exception as e:
print(f" ❌ Smart ranking test failed: {e}")
return False
except Exception as e:
print(f" ❌ New features test failed: {e}")
return False
def main():
"""Run all integration tests."""
print("=" * 50)
print("RAG System Integration Check")
print("=" * 50)
results = {
"Chunker": test_chunker(),
"Indexer": test_indexer_storage(),
"Search": test_search_integration(),
"Server": test_server(),
"New Features": test_new_features()
}
print("\n" + "=" * 50)
print("SUMMARY:")
print("=" * 50)
all_passed = True
for component, passed in results.items():
status = " PASS" if passed else " FAIL"
print(f"{component}: {status}")
if not passed:
all_passed = False
if all_passed:
print("\n All integration tests passed!")
else:
print("\n Some tests failed - fixes needed!")
return all_passed
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)