tests : add more vocabs and tests

ggml-ci
This commit is contained in:
Georgi Gerganov 2024-04-29 11:07:25 +03:00
parent 43708d22c3
commit c68d2596ea
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
21 changed files with 965 additions and 124 deletions

View file

@ -66,7 +66,7 @@ function(llama_target_and_test source)
endfunction()
# build test-tokenizer-0 target once and add many tests
add_executable(test-tokenizer-0 test-tokenizer-0.cpp get-model.cpp)
add_executable(test-tokenizer-0 test-tokenizer-0.cpp)
target_link_libraries(test-tokenizer-0 PRIVATE common)
install(TARGETS test-tokenizer-0 RUNTIME)
@ -75,9 +75,10 @@ llama_test(test-tokenizer-0 NAME test-tokenizer-0-llama-bpe ARGS ${CMAKE
llama_test(test-tokenizer-0 NAME test-tokenizer-0-falcon ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf)
llama_test(test-tokenizer-0 NAME test-tokenizer-0-deepseek-llm ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-deepseek-llm.gguf)
llama_test(test-tokenizer-0 NAME test-tokenizer-0-deepseek-coder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-deepseek-coder.gguf)
llama_test(test-tokenizer-0 NAME test-tokenizer-0-bert-bge r ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-bert-bge.gguf)
# build test-tokenizer-1-bpe target once and add many tests
add_executable(test-tokenizer-1-bpe test-tokenizer-1-bpe.cpp get-model.cpp)
add_executable(test-tokenizer-1-bpe test-tokenizer-1-bpe.cpp)
target_link_libraries(test-tokenizer-1-bpe PRIVATE common)
install(TARGETS test-tokenizer-1-bpe RUNTIME)
@ -92,12 +93,12 @@ llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-gpt2 ARGS ${CMAKE_CUR
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-bloom ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-bloom.gguf) # BIG
# build test-tokenizer-1-spm target once and add many tests
add_executable(test-tokenizer-1-spm test-tokenizer-1-spm.cpp get-model.cpp)
add_executable(test-tokenizer-1-spm test-tokenizer-1-spm.cpp)
target_link_libraries(test-tokenizer-1-spm PRIVATE common)
install(TARGETS test-tokenizer-1-spm RUNTIME)
llama_target_and_test(test-tokenizer-1-spm NAME test-tokenizer-1-llama-spm ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-spm.gguf)
llama_target_and_test(test-tokenizer-1-spm NAME test-tokenizer-1-baichuan ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-baichuan.gguf)
llama_test(test-tokenizer-1-spm NAME test-tokenizer-1-llama-spm ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-spm.gguf)
llama_test(test-tokenizer-1-spm NAME test-tokenizer-1-baichuan ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-baichuan.gguf)
# llama_target_and_test(test-double-float.cpp) # SLOW
llama_target_and_test(test-quantize-fns.cpp)

View file

@ -12,7 +12,7 @@
#include <thread>
#include <vector>
int main(int argc, char **argv) {
int main(int argc, char ** argv) {
if (argc < 2) {
fprintf(stderr, "Usage: %s <vocab-file>\n", argv[0]);
return 1;