tests : add tokenizer tests for numbers

This commit is contained in:
Georgi Gerganov 2024-04-26 13:21:28 +03:00
parent c56e19db4b
commit 7a44e44342
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
4 changed files with 46 additions and 1 deletions

View file

@ -36,9 +36,19 @@ static const std::map<std::string, std::vector<llama_token>> & k_tests() {
{ " Hello" , { 258, 23090, }, },
{ " Hello" , { 466, 23090, }, },
{ " Hello\n Hello" , { 466, 23090, 742, 23090, }, },
{ " (" , { 204, 19, }, },
{ "\n =" , { 1212, 40, }, },
{ "' era" , { 18, 4932, }, },
{ "Hello, y'all! How are you 😁 ?我想在apple工作1314151天", { 9856, 23, 291, 18, 436, 12, 1265, 362, 299, 8196, 207, 204, 42, 50087, 123, 2727, 20300, 32022, 133, 234, 17419, 30137, 28, 7858, 181, 133, 236, }, },
{ "3" , { 30, }, },
{ "33" , { 3138, }, },
{ "333" , { 22287, }, },
{ "3333" , { 22287, 30, }, },
{ "33333" , { 22287, 3138, }, },
{ "333333" , { 22287, 22287, }, },
{ "3333333" , { 22287, 22287, 30, }, },
{ "33333333" , { 22287, 22287, 3138, }, },
{ "333333333" , { 22287, 22287, 22287, }, },
};
return _k_tests;

View file

@ -39,9 +39,19 @@ tests = [
" Hello",
" Hello",
" Hello\n Hello",
" (",
"\n =",
"' era",
"Hello, y'all! How are you 😁 ?我想在apple工作1314151天",
"3",
"33",
"333",
"3333",
"33333",
"333333",
"3333333",
"33333333",
"333333333",
]
for text in tests:

View file

@ -37,6 +37,18 @@ static const std::map<std::string, std::vector<llama_token>> & k_tests() {
{ " Hello" , { 268, 15043, }, },
{ " Hello\n Hello" , { 268, 15043, 13, 1678, 15043, }, },
{ " (" , { 29871, 313, }, },
{ "\n =" , { 29871, 13, 353, }, },
{ "' era" , { 525, 3152, }, },
{ "Hello, y'all! How are you 😁 ?我想在apple工作1314151天", { 15043, 29892, 343, 29915, 497, 29991, 1128, 526, 366, 29871, 243, 162, 155, 132, 1577, 30672, 31522, 30505, 11548, 31041, 30732, 29896, 29941, 29896, 29946, 29896, 29945, 29896, 30408, 30739, }, },
{ "3" , { 29871, 29941, }, },
{ "33" , { 29871, 29941, 29941, }, },
{ "333" , { 29871, 29941, 29941, 29941, }, },
{ "3333" , { 29871, 29941, 29941, 29941, 29941, }, },
{ "33333" , { 29871, 29941, 29941, 29941, 29941, 29941, }, },
{ "333333" , { 29871, 29941, 29941, 29941, 29941, 29941, 29941, }, },
{ "3333333" , { 29871, 29941, 29941, 29941, 29941, 29941, 29941, 29941, }, },
{ "33333333" , { 29871, 29941, 29941, 29941, 29941, 29941, 29941, 29941, 29941, }, },
{ "333333333" , { 29871, 29941, 29941, 29941, 29941, 29941, 29941, 29941, 29941, 29941, }, },
};
return _k_tests;

View file

@ -39,6 +39,19 @@ tests = [
" Hello",
" Hello",
" Hello\n Hello",
" (",
"\n =",
"' era",
"Hello, y'all! How are you 😁 ?我想在apple工作1314151天",
"3",
"33",
"333",
"3333",
"33333",
"333333",
"3333333",
"33333333",
"333333333",
]