llama : use new pre-tokenizer type
This commit is contained in:
parent
9b4d63ae53
commit
43e12ce8e5
12 changed files with 87 additions and 44 deletions
|
@ -41,13 +41,12 @@ llama_test(test-quantize-perf.cpp)
|
|||
llama_test(test-sampling.cpp)
|
||||
llama_test(test-chat-template.cpp)
|
||||
|
||||
# TODO: tmp disabled LLaMA v3 and Deepseek tests
|
||||
llama_test(test-tokenizer-0-llama.cpp NAME test-tokenizer-0-llama ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
|
||||
#llama_test(test-tokenizer-0-llama-v3.cpp NAME test-tokenizer-0-llama-v3 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-v3.gguf)
|
||||
llama_test(test-tokenizer-0-llama-v3.cpp NAME test-tokenizer-0-llama-v3 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-v3.gguf)
|
||||
llama_test(test-tokenizer-0-falcon.cpp NAME test-tokenizer-0-falcon ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf)
|
||||
|
||||
#llama_test(test-tokenizer-0-deepseek-coder.cpp NAME test-tokenizer-0-deepseek-coder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-deepseek-coder.gguf)
|
||||
#llama_test(test-tokenizer-0-deepseek-llm.cpp NAME test-tokenizer-0-deepseek-llm ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-deepseek-llm.gguf)
|
||||
llama_test(test-tokenizer-0-deepseek-coder.cpp NAME test-tokenizer-0-deepseek-coder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-deepseek-coder.gguf)
|
||||
llama_test(test-tokenizer-0-deepseek-llm.cpp NAME test-tokenizer-0-deepseek-llm ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-deepseek-llm.gguf)
|
||||
|
||||
llama_test(test-tokenizer-1-llama.cpp NAME test-tokenizer-1-llama ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
|
||||
llama_test(test-tokenizer-1-llama.cpp NAME test-tokenizer-1-baichuan ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-baichuan.gguf)
|
||||
|
|
|
@ -27,6 +27,8 @@ tests = [
|
|||
" ",
|
||||
"\t",
|
||||
"\n",
|
||||
"\n\n",
|
||||
"\n\n\n",
|
||||
"\t\n",
|
||||
"Hello world",
|
||||
" Hello world",
|
||||
|
|
|
@ -17,6 +17,8 @@ static const std::map<std::string, std::vector<llama_token>> & k_tests() {
|
|||
{ " " , { 466, }, },
|
||||
{ "\t" , { 192, }, },
|
||||
{ "\n" , { 193, }, },
|
||||
{ "\n\n" , { 1001, }, },
|
||||
{ "\n\n\n" , { 11331, }, },
|
||||
{ "\t\n" , { 19125, }, },
|
||||
{ "Hello world" , { 9856, 1079, }, },
|
||||
{ " Hello world" , { 23090, 1079, }, },
|
||||
|
|
|
@ -17,6 +17,8 @@ static const std::map<std::string, std::vector<llama_token>> & k_tests() {
|
|||
{ " " , { 262, }, },
|
||||
{ "\t" , { 197, }, },
|
||||
{ "\n" , { 198, }, },
|
||||
{ "\n\n" , { 271, }, },
|
||||
{ "\n\n\n" , { 1432, }, },
|
||||
{ "\t\n" , { 1602, }, },
|
||||
{ "Hello world" , { 9906, 1917, }, },
|
||||
{ " Hello world" , { 22691, 1917, }, },
|
||||
|
|
|
@ -17,6 +17,8 @@ static const std::map<std::string, std::vector<llama_token>> & k_tests() {
|
|||
{ " " , { 268, }, },
|
||||
{ "\t" , { 29871, 12, }, },
|
||||
{ "\n" , { 29871, 13, }, },
|
||||
{ "\n\n" , { 29871, 13, 13, }, },
|
||||
{ "\n\n\n" , { 29871, 13, 13, 13, }, },
|
||||
{ "\t\n" , { 29871, 12, 13, }, },
|
||||
{ "Hello world" , { 15043, 3186, }, },
|
||||
{ " Hello world" , { 29871, 15043, 3186, }, },
|
||||
|
|
|
@ -27,6 +27,8 @@ tests = [
|
|||
" ",
|
||||
"\t",
|
||||
"\n",
|
||||
"\n\n",
|
||||
"\n\n\n",
|
||||
"\t\n",
|
||||
"Hello world",
|
||||
" Hello world",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue