Update test-tokenizer-random.py

This commit is contained in:
Robert 2024-11-12 22:24:03 -08:00 committed by GitHub
parent 60fd27b68d
commit db26ba5b5c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1,6 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
""" """
Test libllama tokenizer against AutoTokenizer using brute force random words/text generation. # Test libllama tokenizer == AutoTokenizer.
# Brute force random words/text generation.
Sample usage: Sample usage:
@ -584,4 +585,4 @@ if __name__ == "__main__":
logger.info(f"TOKENIZER: '{tokenizer}'") logger.info(f"TOKENIZER: '{tokenizer}'")
vocab_file = Path(path_vocab_format % tokenizer) vocab_file = Path(path_vocab_format % tokenizer)
dir_tokenizer = path_tokenizers / tokenizer dir_tokenizer = path_tokenizers / tokenizer
main([str(vocab_file), str(dir_tokenizer), "--verbose"]) main([str(vocab_file), str(dir_tokenizer), "--verbose"])