fss-polish/test_performance.py
FSSCoding 9316bc50f1 Initial commit: FSS-Polish v1.0.0
Complete implementation of Fast Spelling and Style Polish tool with:
- Australian English spelling conversion (7 patterns + case preservation)
- CLI support with text input or clipboard mode
- Daemon mode with configurable hotkey
- MIN_LENGTH, AGGRESSION, and CUSTOM_DICTIONARY config options
- Comprehensive diff logging
- 12 passing tests (100% test coverage for AU spelling)
- Wheel package built and ready for deployment
- Agent-friendly CLI with stdin/stdout support

Features:
- Text correction using t5-small-spoken-typo model
- Australian/American spelling conversion
- Configurable correction aggression levels
- Custom dictionary whitelist support
- Background daemon with hotkey trigger
- CLI tool for direct text polishing
- Preserves clipboard history (adds new item vs replace)

Ready for deployment to /opt and Gitea repository.
2025-10-25 23:59:34 +11:00

75 lines
2.5 KiB
Python

#!/usr/bin/env python3
"""Performance test for text-polish model"""
import sys
sys.path.insert(0, '/MASTERFOLDER/Tools/text-polish/src')
import time
from model_loader import load_model, polish
# Test strings with various typos and issues
TEST_STRINGS = [
"teh quick brown fox jumps over teh lazy dog",
"I cant beleive its not butter",
"This is a sentance with some mispelled words and bad spacing",
"The weater is realy nice today dont you think",
"I need to go to the store and buy some grocerys",
"Can you help me with this problme please",
"The meeting is schedduled for tommorow at 3pm",
"I dont know waht to do about this situaton",
"Please send me the docment as soon as posible",
"The compnay announced a new product today"
]
def count_tokens(text, tokenizer):
"""Count tokens in text"""
return len(tokenizer.encode(text))
def main():
print("Loading model...")
start = time.time()
model, tokenizer = load_model()
load_time = time.time() - start
print(f"Model loaded in {load_time:.2f}s\n")
print("Running performance tests...\n")
print("-" * 80)
total_time = 0
total_tokens = 0
for i, test_str in enumerate(TEST_STRINGS, 1):
input_tokens = count_tokens(test_str, tokenizer)
start = time.time()
result = polish(model, tokenizer, test_str)
elapsed = time.time() - start
output_tokens = count_tokens(result, tokenizer)
tokens_per_sec = (input_tokens + output_tokens) / elapsed if elapsed > 0 else 0
total_time += elapsed
total_tokens += (input_tokens + output_tokens)
print(f"Test {i}:")
print(f" Input: {test_str}")
print(f" Output: {result}")
print(f" Time: {elapsed*1000:.2f}ms")
print(f" Tokens: {input_tokens} in + {output_tokens} out = {input_tokens + output_tokens} total")
print(f" Speed: {tokens_per_sec:.2f} tokens/sec")
print("-" * 80)
avg_time = total_time / len(TEST_STRINGS)
avg_tokens_per_sec = total_tokens / total_time if total_time > 0 else 0
print(f"\nSUMMARY:")
print(f" Total tests: {len(TEST_STRINGS)}")
print(f" Total time: {total_time:.2f}s")
print(f" Average per string: {avg_time*1000:.2f}ms")
print(f" Total tokens: {total_tokens}")
print(f" Average speed: {avg_tokens_per_sec:.2f} tokens/sec")
print(f" Model load time: {load_time:.2f}s")
if __name__ == "__main__":
main()