diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 71ae1513e..dde4fa9c8 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -239,6 +239,10 @@ class Model: self.gguf_writer.add_expert_used_count(n_experts_used) logger.info(f"gguf: experts used count = {n_experts_used}") + if (head_dim := self.hparams.get("head_dim")) is not None: + self.gguf_writer.add_key_length(head_dim) + self.gguf_writer.add_value_length(head_dim) + self.gguf_writer.add_file_type(self.ftype) logger.info(f"gguf: file type = {self.ftype}") @@ -596,6 +600,9 @@ class Model: if chkhsh == "63b97e4253352e6f357cc59ea5b583e3a680eaeaf2632188c2b952de2588485e": # ref: https://huggingface.co/mistralai/Mistral-Nemo-Base-2407 res = "tekken" + if chkhsh == "855059429035d75a914d1eda9f10a876752e281a054a7a3d421ef0533e5b6249": + # ref: https://huggingface.co/HuggingFaceTB/SmolLM-135M + res = "smollm" if res is None: logger.warning("\n") @@ -736,7 +743,7 @@ class Model: added_tokens_json = json.load(f) for key in added_tokens_json: token_id = added_tokens_json[key] - if (token_id >= vocab_size): + if token_id >= vocab_size: logger.warning(f'ignore token {token_id}: id is out of range, max={vocab_size - 1}') continue @@ -1484,7 +1491,12 @@ class LlamaModel(Model): super().set_gguf_parameters() hparams = self.hparams self.gguf_writer.add_vocab_size(hparams["vocab_size"]) - self.gguf_writer.add_rope_dimension_count(hparams["hidden_size"] // hparams["num_attention_heads"]) + + if "head_dim" in hparams: + rope_dim = hparams["head_dim"] + else: + rope_dim = hparams["hidden_size"] // hparams["num_attention_heads"] + self.gguf_writer.add_rope_dimension_count(rope_dim) if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]: if self.hparams["rope_scaling"].get("type") == "linear": @@ -1999,7 +2011,7 @@ class Phi3MiniModel(Model): for key in added_tokens_json: token_id = added_tokens_json[key] - if (token_id >= vocab_size): + if token_id >= vocab_size: logger.debug(f'ignore token {token_id}: id is out of range, max={vocab_size - 1}') continue @@ -2075,7 +2087,7 @@ class Phi3MiniModel(Model): # write rope scaling for long context (128k) model rope_scaling = self.find_hparam(['rope_scaling'], True) - if (rope_scaling is None): + if rope_scaling is None: return scale = max_pos_embds / orig_max_pos_embds @@ -2722,7 +2734,7 @@ class JinaBertV2Model(BertModel): yield name, data - def set_vocab(self, *args, **kwargs): + def set_vocab(self): tokenizer_class = 'BertTokenizer' with open(self.dir_model / "tokenizer_config.json", "r", encoding="utf-8") as f: tokenizer_class = json.load(f)['tokenizer_class'] @@ -2870,7 +2882,7 @@ class ArcticModel(Model): added_tokens_decoder = tokenizer_config_json["added_tokens_decoder"] for token_id, token_json in added_tokens_decoder.items(): token_id = int(token_id) - if (token_id >= vocab_size): + if token_id >= vocab_size: logger.debug(f'ignore token {token_id}: id is out of range, max={vocab_size - 1}') continue @@ -3119,7 +3131,7 @@ class T5Model(Model): added_tokens_json = json.load(f) for key in added_tokens_json: token_id = added_tokens_json[key] - if (token_id >= vocab_size): + if token_id >= vocab_size: logger.warning(f'ignore token {token_id}: id is out of range, max={vocab_size - 1}') continue diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py index a92379666..d5a2d925e 100755 --- a/convert_hf_to_gguf_update.py +++ b/convert_hf_to_gguf_update.py @@ -50,7 +50,7 @@ class TOKENIZER_TYPE(IntEnum): # TODO: this string has to exercise as much pre-tokenizer functionality as possible # will be updated with time - contributions welcome -chktxt = '\n \n\n \n\n\n \t \t\t \t\n \n \n \n \n🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български \'\'\'\'\'\'```````\"\"\"\"......!!!!!!?????? I\'ve been \'told he\'s there, \'RE you sure? \'M not sure I\'ll make it, \'D you like some tea? We\'Ve a\'lL' +CHK_TXT = '\n \n\n \n\n\n \t \t\t \t\n \n \n \n \n🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български \'\'\'\'\'\'```````\"\"\"\"......!!!!!!?????? I\'ve been \'told he\'s there, \'RE you sure? \'M not sure I\'ll make it, \'D you like some tea? We\'Ve a\'lL' if len(sys.argv) == 2: token = sys.argv[1] @@ -93,6 +93,7 @@ models = [ {"name": "t5", "tokt": TOKENIZER_TYPE.UGM, "repo": "https://huggingface.co/google-t5/t5-small", }, {"name": "codeshell", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/WisdomShell/CodeShell-7B", }, {"name": "tekken", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mistralai/Mistral-Nemo-Base-2407", }, + {"name": "smollm", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/HuggingFaceTB/SmolLM-135M", }, ] @@ -101,8 +102,8 @@ def download_file_with_auth(url, token, save_path): response = sess.get(url, headers=headers) response.raise_for_status() os.makedirs(os.path.dirname(save_path), exist_ok=True) - with open(save_path, 'wb') as f: - f.write(response.content) + with open(save_path, 'wb') as downloaded_file: + downloaded_file.write(response.content) logger.info(f"File {save_path} downloaded successfully") @@ -161,7 +162,7 @@ for model in models: logger.error(f"Error loading tokenizer for model {name}. The model may not exist or is not accessible with the provided token. Error: {e}") continue # Skip to the next model if the tokenizer can't be loaded - chktok = tokenizer.encode(chktxt) + chktok = tokenizer.encode(CHK_TXT) chkhsh = sha256(str(chktok).encode()).hexdigest() logger.info(f"model: {name}") @@ -193,7 +194,7 @@ src_func = f""" # we will use this unique identifier to write a "tokenizer.ggml.pre" entry in the GGUF file which we can # use in llama.cpp to implement the same pre-tokenizer - chktxt = {repr(chktxt)} + chktxt = {repr(CHK_TXT)} chktok = tokenizer.encode(chktxt) chkhsh = sha256(str(chktok).encode()).hexdigest() @@ -289,7 +290,7 @@ tests = [ "333333333", "Cửa Việt", # llama-bpe fails on this " discards", - chktxt, + CHK_TXT, ] # write the tests to ./models/ggml-vocab-{name}.gguf.inp diff --git a/convert_llama_ggml_to_gguf.py b/convert_llama_ggml_to_gguf.py index 95ea831a5..7b00b4398 100755 --- a/convert_llama_ggml_to_gguf.py +++ b/convert_llama_ggml_to_gguf.py @@ -132,6 +132,10 @@ class Tensor: class GGMLModel: + + file_format: GGMLFormat + format_version: int + def __init__(self): self.hyperparameters = None self.vocab = None @@ -290,7 +294,7 @@ class GGMLToGGUF: if self.vocab_override is not None: vo = self.vocab_override logger.info('* Adding vocab item(s)') - for (idx, (vbytes, score, ttype)) in enumerate(vo.all_tokens()): + for (_, (vbytes, score, ttype)) in enumerate(vo.all_tokens()): tokens.append(vbytes) scores.append(score) toktypes.append(ttype) diff --git a/examples/llama.android/llama/src/main/cpp/llama-android.cpp b/examples/llama.android/llama/src/main/cpp/llama-android.cpp index 92a6b16b1..2aafe2316 100644 --- a/examples/llama.android/llama/src/main/cpp/llama-android.cpp +++ b/examples/llama.android/llama/src/main/cpp/llama-android.cpp @@ -409,7 +409,7 @@ Java_android_llama_cpp_LLamaAndroid_completion_1loop( const auto n_cur = env->CallIntMethod(intvar_ncur, la_int_var_value); if (llama_token_is_eog(model, new_token_id) || n_cur == n_len) { - return env->NewStringUTF(""); + return nullptr; } auto new_token_chars = llama_token_to_piece(context, new_token_id); diff --git a/examples/server/README.md b/examples/server/README.md index e477d1501..ff4074517 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -444,7 +444,7 @@ node index.js `n_predict`: Set the maximum number of tokens to predict when generating text. **Note:** May exceed the set limit slightly if the last token is a partial multibyte character. When 0, no tokens will be generated but the prompt is evaluated into the cache. Default: `-1`, where `-1` is infinity. - `n_keep`: Specify the number of tokens from the prompt to retain when the context size is exceeded and tokens need to be discarded. + `n_keep`: Specify the number of tokens from the prompt to retain when the context size is exceeded and tokens need to be discarded. The number excludes the BOS token. By default, this value is set to `0`, meaning no tokens are kept. Use `-1` to retain all tokens from the prompt. `stream`: It allows receiving each predicted token in real-time instead of waiting for the completion to finish. To enable this, set to `true`. diff --git a/flake.lock b/flake.lock index 3e5125dde..940cda6a4 100644 --- a/flake.lock +++ b/flake.lock @@ -20,11 +20,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1720768451, - "narHash": "sha256-EYekUHJE2gxeo2pM/zM9Wlqw1Uw2XTJXOSAO79ksc4Y=", + "lastModified": 1721379653, + "narHash": "sha256-8MUgifkJ7lkZs3u99UDZMB4kbOxvMEXQZ31FO3SopZ0=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "7e7c39ea35c5cdd002cd4588b03a3fb9ece6fad9", + "rev": "1d9c2c9b3e71b9ee663d11c5d298727dace8d374", "type": "github" }, "original": { diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c index 1defed3ca..47418597c 100644 --- a/ggml/src/ggml-quants.c +++ b/ggml/src/ggml-quants.c @@ -4748,7 +4748,7 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, size_t bs, const void * r int sumi = __riscv_vmv_x_s_i32m1_i32(vs2); - sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d)) * sumi; + sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d)) * sumi; } #elif defined(__POWER9_VECTOR__) diff --git a/include/llama.h b/include/llama.h index 2dc4e6d30..bf2761467 100644 --- a/include/llama.h +++ b/include/llama.h @@ -92,8 +92,9 @@ extern "C" { LLAMA_VOCAB_PRE_TYPE_CHATGLM4 = 17, LLAMA_VOCAB_PRE_TYPE_VIKING = 18, LLAMA_VOCAB_PRE_TYPE_JAIS = 19, - LLAMA_VOCAB_PRE_TYPE_CODESHELL = 20, - LLAMA_VOCAB_PRE_TYPE_TEKKEN = 21, + LLAMA_VOCAB_PRE_TYPE_TEKKEN = 20, + LLAMA_VOCAB_PRE_TYPE_SMOLLM = 21, + LLAMA_VOCAB_PRE_TYPE_CODESHELL = 22, }; // note: these values should be synchronized with ggml_rope diff --git a/models/ggml-vocab-gpt2.gguf b/models/ggml-vocab-gpt2.gguf deleted file mode 100644 index 1fbc72c1e..000000000 Binary files a/models/ggml-vocab-gpt2.gguf and /dev/null differ diff --git a/models/ggml-vocab-stablelm.gguf b/models/ggml-vocab-stablelm.gguf deleted file mode 100644 index ebb0cdb7d..000000000 Binary files a/models/ggml-vocab-stablelm.gguf and /dev/null differ diff --git a/src/llama.cpp b/src/llama.cpp index 34b4191d6..6df81da4b 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -3707,7 +3707,7 @@ struct llama_model_loader { } if (param_overrides_p != nullptr) { - for (const struct llama_model_kv_override *p = param_overrides_p; p->key[0] != 0; p++) { + for (const struct llama_model_kv_override * p = param_overrides_p; p->key[0] != 0; p++) { kv_overrides.insert({std::string(p->key), *p}); } } @@ -3875,7 +3875,7 @@ struct llama_model_loader { ftype = (llama_ftype) (ftype | LLAMA_FTYPE_GUESSED); { - const int kid = gguf_find_key(meta, "general.file_type"); + const int kid = gguf_find_key(meta, "general.file_type"); // TODO: use LLM_KV if (kid >= 0) { ftype = (llama_ftype) gguf_get_val_u32(meta, kid); } @@ -5369,6 +5369,7 @@ static void llm_load_vocab( if (merges_keyidx == -1) { throw std::runtime_error("cannot find tokenizer merges in model file\n"); } + const int n_merges = gguf_get_arr_n(ctx, merges_keyidx); for (int i = 0; i < n_merges; i++) { const std::string word = gguf_get_arr_str(ctx, merges_keyidx, i); @@ -5407,16 +5408,6 @@ static void llm_load_vocab( vocab.special_cls_id = -1; vocab.special_mask_id = -1; - const int add_space_prefix_keyidx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_ADD_PREFIX).c_str()); - if (add_space_prefix_keyidx != -1) { - vocab.tokenizer_add_space_prefix = gguf_get_val_bool(ctx, add_space_prefix_keyidx); - } // The default value of add_space_prefix is true. - - const int remove_extra_whitespaces_keyidx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_REMOVE_EXTRA_WS).c_str()); - if (remove_extra_whitespaces_keyidx != -1) { - vocab.tokenizer_remove_extra_whitespaces = gguf_get_val_bool(ctx, remove_extra_whitespaces_keyidx); - } // The default value of remove_extra_whitespaces is false. - const int precompiled_charsmap_keyidx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP).c_str()); if (precompiled_charsmap_keyidx != -1) { size_t n_precompiled_charsmap = gguf_get_arr_n(ctx, precompiled_charsmap_keyidx); @@ -5533,6 +5524,10 @@ static void llm_load_vocab( vocab.tokenizer_clean_spaces = false; vocab.tokenizer_ignore_merges = true; vocab.tokenizer_add_bos = true; + } else if ( + tokenizer_pre == "smollm") { + vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_SMOLLM; + vocab.tokenizer_clean_spaces = false; } else { throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str())); } @@ -5556,10 +5551,8 @@ static void llm_load_vocab( vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT; } - const int add_space_prefix_keyidx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_ADD_PREFIX).c_str()); - if (add_space_prefix_keyidx != -1) { - vocab.tokenizer_add_space_prefix = gguf_get_val_bool(ctx, add_space_prefix_keyidx); - } + ml.get_key(LLM_KV_TOKENIZER_ADD_PREFIX, vocab.tokenizer_add_space_prefix, false); + ml.get_key(LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, vocab.tokenizer_remove_extra_whitespaces, false); } const int token_idx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_LIST).c_str()); @@ -6140,10 +6133,10 @@ static bool llm_load_tensors( layer.attn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}); - layer.wq = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd}); - layer.wk = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_K, "weight", i), {n_embd, n_embd_gqa}); - layer.wv = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_V, "weight", i), {n_embd, n_embd_gqa}); - layer.wo = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}); + layer.wq = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd_head_k * n_head}); + layer.wk = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_K, "weight", i), {n_embd, n_embd_k_gqa}); + layer.wv = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_V, "weight", i), {n_embd, n_embd_v_gqa}); + layer.wo = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd_head_k * n_head, n_embd}); // optional bias tensors layer.bq = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_Q, "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED); @@ -15558,6 +15551,7 @@ struct llm_tokenizer_bpe { case LLAMA_VOCAB_PRE_TYPE_REFACT: case LLAMA_VOCAB_PRE_TYPE_COMMAND_R: case LLAMA_VOCAB_PRE_TYPE_CODESHELL: + case LLAMA_VOCAB_PRE_TYPE_SMOLLM: regex_exprs = { "\\p{N}", "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)", @@ -18292,8 +18286,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s // copy the KV pairs from the input file gguf_set_kv (ctx_out, ml.meta); - gguf_set_val_u32(ctx_out, "general.quantization_version", GGML_QNT_VERSION); - gguf_set_val_u32(ctx_out, "general.file_type", ftype); + gguf_set_val_u32(ctx_out, "general.quantization_version", GGML_QNT_VERSION); // TODO: use LLM_KV + gguf_set_val_u32(ctx_out, "general.file_type", ftype); // TODO: use LLM_KV + // Remove split metadata gguf_remove_key(ctx_out, ml.llm_kv(LLM_KV_SPLIT_NO).c_str()); gguf_remove_key(ctx_out, ml.llm_kv(LLM_KV_SPLIT_COUNT).c_str()); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index cfa707315..0207e3a59 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -70,21 +70,19 @@ add_executable(test-tokenizer-0 test-tokenizer-0.cpp) target_link_libraries(test-tokenizer-0 PRIVATE common) install(TARGETS test-tokenizer-0 RUNTIME) -llama_test(test-tokenizer-0 NAME test-tokenizer-0-llama-spm ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-spm.gguf) -llama_test(test-tokenizer-0 NAME test-tokenizer-0-llama-bpe ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-bpe.gguf) -llama_test(test-tokenizer-0 NAME test-tokenizer-0-phi-3 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-phi-3.gguf) -llama_test(test-tokenizer-0 NAME test-tokenizer-0-falcon ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf) llama_test(test-tokenizer-0 NAME test-tokenizer-0-bert-bge ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-bert-bge.gguf) -# TODO: enable when fixed -# https://github.com/ggerganov/llama.cpp/pull/7036 -#llama_test(test-tokenizer-0 NAME test-tokenizer-0-mpt ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-mpt.gguf) -#llama_test(test-tokenizer-0 NAME test-tokenizer-0-deepseek-llm ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-deepseek-llm.gguf) -#llama_test(test-tokenizer-0 NAME test-tokenizer-0-deepseek-coder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-deepseek-coder.gguf) -llama_test(test-tokenizer-0 NAME test-tokenizer-0-starcoder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf) -llama_test(test-tokenizer-0 NAME test-tokenizer-0-gpt-2 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-2.gguf) -llama_test(test-tokenizer-0 NAME test-tokenizer-0-refact ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf) llama_test(test-tokenizer-0 NAME test-tokenizer-0-command-r ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-command-r.gguf) +llama_test(test-tokenizer-0 NAME test-tokenizer-0-deepseek-coder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-deepseek-coder.gguf) +llama_test(test-tokenizer-0 NAME test-tokenizer-0-deepseek-llm ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-deepseek-llm.gguf) +llama_test(test-tokenizer-0 NAME test-tokenizer-0-falcon ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf) +llama_test(test-tokenizer-0 NAME test-tokenizer-0-gpt-2 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-2.gguf) +llama_test(test-tokenizer-0 NAME test-tokenizer-0-llama-bpe ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-bpe.gguf) +llama_test(test-tokenizer-0 NAME test-tokenizer-0-llama-spm ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-spm.gguf) +llama_test(test-tokenizer-0 NAME test-tokenizer-0-mpt ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-mpt.gguf) +llama_test(test-tokenizer-0 NAME test-tokenizer-0-phi-3 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-phi-3.gguf) llama_test(test-tokenizer-0 NAME test-tokenizer-0-qwen2 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-qwen2.gguf) +llama_test(test-tokenizer-0 NAME test-tokenizer-0-refact ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf) +llama_test(test-tokenizer-0 NAME test-tokenizer-0-starcoder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf) # build test-tokenizer-1-bpe target once and add many tests add_executable(test-tokenizer-1-bpe test-tokenizer-1-bpe.cpp) @@ -92,16 +90,14 @@ target_link_libraries(test-tokenizer-1-bpe PRIVATE common) install(TARGETS test-tokenizer-1-bpe RUNTIME) # TODO: disabled due to slowness -#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-llama-bpe ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-bpe.gguf --ignore-merges) -#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-falcon ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf) #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-aquila ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf) -#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-mpt ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-mpt.gguf) -#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-stablelm ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-stablelm.gguf) +#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-falcon ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf) +#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-gpt-2 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-2.gguf) #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-gpt-neox ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-neox.gguf) +#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-llama-bpe ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-bpe.gguf --ignore-merges) +#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-mpt ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-mpt.gguf) #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-refact ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf) #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-starcoder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf) -#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-gpt2 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt2.gguf) -#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-bloom ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-bloom.gguf) # build test-tokenizer-1-spm target once and add many tests add_executable(test-tokenizer-1-spm test-tokenizer-1-spm.cpp)