Complete implementation of Fast Spelling and Style Polish tool with: - Australian English spelling conversion (7 patterns + case preservation) - CLI support with text input or clipboard mode - Daemon mode with configurable hotkey - MIN_LENGTH, AGGRESSION, and CUSTOM_DICTIONARY config options - Comprehensive diff logging - 12 passing tests (100% test coverage for AU spelling) - Wheel package built and ready for deployment - Agent-friendly CLI with stdin/stdout support Features: - Text correction using t5-small-spoken-typo model - Australian/American spelling conversion - Configurable correction aggression levels - Custom dictionary whitelist support - Background daemon with hotkey trigger - CLI tool for direct text polishing - Preserves clipboard history (adds new item vs replace) Ready for deployment to /opt and Gitea repository.
75 lines
2.5 KiB
Python
75 lines
2.5 KiB
Python
#!/usr/bin/env python3
|
|
"""Performance test for text-polish model"""
|
|
|
|
import sys
|
|
sys.path.insert(0, '/MASTERFOLDER/Tools/text-polish/src')
|
|
|
|
import time
|
|
from model_loader import load_model, polish
|
|
|
|
# Test strings with various typos and issues
|
|
TEST_STRINGS = [
|
|
"teh quick brown fox jumps over teh lazy dog",
|
|
"I cant beleive its not butter",
|
|
"This is a sentance with some mispelled words and bad spacing",
|
|
"The weater is realy nice today dont you think",
|
|
"I need to go to the store and buy some grocerys",
|
|
"Can you help me with this problme please",
|
|
"The meeting is schedduled for tommorow at 3pm",
|
|
"I dont know waht to do about this situaton",
|
|
"Please send me the docment as soon as posible",
|
|
"The compnay announced a new product today"
|
|
]
|
|
|
|
def count_tokens(text, tokenizer):
|
|
"""Count tokens in text"""
|
|
return len(tokenizer.encode(text))
|
|
|
|
def main():
|
|
print("Loading model...")
|
|
start = time.time()
|
|
model, tokenizer = load_model()
|
|
load_time = time.time() - start
|
|
print(f"Model loaded in {load_time:.2f}s\n")
|
|
|
|
print("Running performance tests...\n")
|
|
print("-" * 80)
|
|
|
|
total_time = 0
|
|
total_tokens = 0
|
|
|
|
for i, test_str in enumerate(TEST_STRINGS, 1):
|
|
input_tokens = count_tokens(test_str, tokenizer)
|
|
|
|
start = time.time()
|
|
result = polish(model, tokenizer, test_str)
|
|
elapsed = time.time() - start
|
|
|
|
output_tokens = count_tokens(result, tokenizer)
|
|
tokens_per_sec = (input_tokens + output_tokens) / elapsed if elapsed > 0 else 0
|
|
|
|
total_time += elapsed
|
|
total_tokens += (input_tokens + output_tokens)
|
|
|
|
print(f"Test {i}:")
|
|
print(f" Input: {test_str}")
|
|
print(f" Output: {result}")
|
|
print(f" Time: {elapsed*1000:.2f}ms")
|
|
print(f" Tokens: {input_tokens} in + {output_tokens} out = {input_tokens + output_tokens} total")
|
|
print(f" Speed: {tokens_per_sec:.2f} tokens/sec")
|
|
print("-" * 80)
|
|
|
|
avg_time = total_time / len(TEST_STRINGS)
|
|
avg_tokens_per_sec = total_tokens / total_time if total_time > 0 else 0
|
|
|
|
print(f"\nSUMMARY:")
|
|
print(f" Total tests: {len(TEST_STRINGS)}")
|
|
print(f" Total time: {total_time:.2f}s")
|
|
print(f" Average per string: {avg_time*1000:.2f}ms")
|
|
print(f" Total tokens: {total_tokens}")
|
|
print(f" Average speed: {avg_tokens_per_sec:.2f} tokens/sec")
|
|
print(f" Model load time: {load_time:.2f}s")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|