#!/usr/bin/env python3 """Performance test for text-polish model""" import sys sys.path.insert(0, '/MASTERFOLDER/Tools/text-polish/src') import time from model_loader import load_model, polish # Test strings with various typos and issues TEST_STRINGS = [ "teh quick brown fox jumps over teh lazy dog", "I cant beleive its not butter", "This is a sentance with some mispelled words and bad spacing", "The weater is realy nice today dont you think", "I need to go to the store and buy some grocerys", "Can you help me with this problme please", "The meeting is schedduled for tommorow at 3pm", "I dont know waht to do about this situaton", "Please send me the docment as soon as posible", "The compnay announced a new product today" ] def count_tokens(text, tokenizer): """Count tokens in text""" return len(tokenizer.encode(text)) def main(): print("Loading model...") start = time.time() model, tokenizer = load_model() load_time = time.time() - start print(f"Model loaded in {load_time:.2f}s\n") print("Running performance tests...\n") print("-" * 80) total_time = 0 total_tokens = 0 for i, test_str in enumerate(TEST_STRINGS, 1): input_tokens = count_tokens(test_str, tokenizer) start = time.time() result = polish(model, tokenizer, test_str) elapsed = time.time() - start output_tokens = count_tokens(result, tokenizer) tokens_per_sec = (input_tokens + output_tokens) / elapsed if elapsed > 0 else 0 total_time += elapsed total_tokens += (input_tokens + output_tokens) print(f"Test {i}:") print(f" Input: {test_str}") print(f" Output: {result}") print(f" Time: {elapsed*1000:.2f}ms") print(f" Tokens: {input_tokens} in + {output_tokens} out = {input_tokens + output_tokens} total") print(f" Speed: {tokens_per_sec:.2f} tokens/sec") print("-" * 80) avg_time = total_time / len(TEST_STRINGS) avg_tokens_per_sec = total_tokens / total_time if total_time > 0 else 0 print(f"\nSUMMARY:") print(f" Total tests: {len(TEST_STRINGS)}") print(f" Total time: {total_time:.2f}s") print(f" Average per string: {avg_time*1000:.2f}ms") print(f" Total tokens: {total_tokens}") print(f" Average speed: {avg_tokens_per_sec:.2f} tokens/sec") print(f" Model load time: {load_time:.2f}s") if __name__ == "__main__": main()