llama : update API names to use correct prefix

ggml-ci
2025-01-10 14:40:29 +02:00 · 2025-01-10 14:40:29 +02:00 · 1586ed5061
commit 1586ed5061
parent aeeb9420a3
37 changed files with 59 additions and 59 deletions
--- a/common/common.cpp
+++ b/common/common.cpp
@ -857,7 +857,7 @@ struct common_init_result common_init_from_params(common_params & params) {
        return iparams;
    }

-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    if (params.reranking) {
        bool ok = true;
@ -1563,7 +1563,7 @@ std::vector<llama_token> common_tokenize(
                        bool   add_special,
                        bool   parse_special) {
    const llama_model * model = llama_get_model(ctx);
-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);
    return common_tokenize(vocab, text, add_special, parse_special);
 }

@ -1588,7 +1588,7 @@ std::vector<llama_token> common_tokenize(

 std::string common_token_to_piece(const struct llama_context * ctx, llama_token token, bool special) {
    const llama_model * model = llama_get_model(ctx);
-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);
    return common_token_to_piece(vocab, token, special);
 }

@ -1610,7 +1610,7 @@ std::string common_token_to_piece(const struct llama_vocab * vocab, llama_token

 std::string common_detokenize(const struct llama_context * ctx, const std::vector<llama_token> & tokens, bool special) {
    const llama_model * model = llama_get_model(ctx);
-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);
    return common_detokenize(vocab, tokens, special);
 }

--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@ -114,7 +114,7 @@ struct common_sampler {
        const auto * logits = llama_get_logits_ith(ctx, idx);

        const llama_model * model = llama_get_model(ctx);
-        const llama_vocab * vocab = llama_get_vocab(model);
+        const llama_vocab * vocab = llama_model_get_vocab(model);

        const int n_vocab = llama_n_vocab(vocab);

@ -145,7 +145,7 @@ std::string common_params_sampling::print() const {
 }

 struct common_sampler * common_sampler_init(const struct llama_model * model, const struct common_params_sampling & params) {
-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    llama_sampler_chain_params lparams = llama_sampler_chain_default_params();

--- a/common/speculative.cpp
+++ b/common/speculative.cpp
@ -79,8 +79,8 @@ bool common_speculative_are_compatible(
    const struct llama_model * model_tgt = llama_get_model(ctx_tgt);
    const struct llama_model * model_dft = llama_get_model(ctx_dft);

-    const struct llama_vocab * vocab_tgt = llama_get_vocab(model_tgt);
-    const struct llama_vocab * vocab_dft = llama_get_vocab(model_dft);
+    const struct llama_vocab * vocab_tgt = llama_model_get_vocab(model_tgt);
+    const struct llama_vocab * vocab_dft = llama_model_get_vocab(model_dft);

    const bool vocab_type_tgt = llama_vocab_type(vocab_tgt);
    LOG_DBG("%s: vocab_type tgt: %d\n", __func__, vocab_type_tgt);
--- a/examples/batched/batched.cpp
+++ b/examples/batched/batched.cpp
@ -48,7 +48,7 @@ int main(int argc, char ** argv) {
        return 1;
    }

-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    // tokenize the prompt

--- a/examples/cvector-generator/cvector-generator.cpp
+++ b/examples/cvector-generator/cvector-generator.cpp
@ -274,7 +274,7 @@ struct tokenized_prompt {

    tokenized_prompt(llama_context * ctx, std::string pos, std::string neg) {
        const llama_model * model = llama_get_model(ctx);
-        const llama_vocab * vocab = llama_get_vocab(model);
+        const llama_vocab * vocab = llama_model_get_vocab(model);
        const bool add_bos = llama_add_bos_token(vocab);
        tokens_pos = common_tokenize(ctx, pos, add_bos, true);
        tokens_neg = common_tokenize(ctx, neg, add_bos, true);
--- a/examples/embedding/embedding.cpp
+++ b/examples/embedding/embedding.cpp
@ -105,7 +105,7 @@ int main(int argc, char ** argv) {
        return 1;
    }

-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    const int n_ctx_train = llama_n_ctx_train(model);
    const int n_ctx = llama_n_ctx(ctx);
--- a/examples/eval-callback/eval-callback.cpp
+++ b/examples/eval-callback/eval-callback.cpp
@ -128,7 +128,7 @@ static bool ggml_debug(struct ggml_tensor * t, bool ask, void * user_data) {

 static bool run(llama_context * ctx, const common_params & params) {
    const llama_model * model = llama_get_model(ctx);
-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    const bool add_bos = llama_add_bos_token(vocab);

--- a/examples/export-lora/export-lora.cpp
+++ b/examples/export-lora/export-lora.cpp
@ -8,7 +8,6 @@
 #include <map>
 #include <vector>
 #include <string>
-#include <thread>
 #include <fstream>

 static bool g_verbose = false;
--- a/examples/gritlm/gritlm.cpp
+++ b/examples/gritlm/gritlm.cpp
@ -11,7 +11,7 @@ static std::vector<std::vector<float>> encode(llama_context * ctx, const std::ve
    std::vector<std::vector<float>> result;

    const llama_model * model = llama_get_model(ctx);
-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    llama_batch batch = llama_batch_init(llama_n_batch(ctx), 0, 1);

@ -98,7 +98,7 @@ static std::string generate(llama_context * ctx, llama_sampler * smpl, const std
    std::string result;

    const llama_model * model = llama_get_model(ctx);
-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    llama_token eos_token = llama_token_eos(vocab);

--- a/examples/imatrix/imatrix.cpp
+++ b/examples/imatrix/imatrix.cpp
@ -430,7 +430,7 @@ static void process_logits(

 static bool compute_imatrix(llama_context * ctx, const common_params & params) {
    const llama_model * model = llama_get_model(ctx);
-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    const bool add_bos = llama_add_bos_token(vocab);
    const int n_ctx = llama_n_ctx(ctx);
--- a/examples/infill/infill.cpp
+++ b/examples/infill/infill.cpp
@ -139,7 +139,7 @@ int main(int argc, char ** argv) {
        return 1;
    }

-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    const int n_ctx_train = llama_n_ctx_train(model);
    const int n_ctx = llama_n_ctx(ctx);
--- a/examples/llama-bench/llama-bench.cpp
+++ b/examples/llama-bench/llama-bench.cpp
@ -1401,7 +1401,7 @@ static void test_prompt(llama_context * ctx, int n_prompt, int n_batch, int n_th
    llama_set_n_threads(ctx, n_threads, n_threads);

    const llama_model * model   = llama_get_model(ctx);
-    const llama_vocab * vocab   = llama_get_vocab(model);
+    const llama_vocab * vocab   = llama_model_get_vocab(model);
    const int32_t       n_vocab = llama_n_vocab(vocab);

    std::vector<llama_token> tokens(n_batch);
@ -1425,7 +1425,7 @@ static void test_gen(llama_context * ctx, int n_gen, int n_threads) {
    llama_set_n_threads(ctx, n_threads, n_threads);

    const llama_model * model   = llama_get_model(ctx);
-    const llama_vocab * vocab   = llama_get_vocab(model);
+    const llama_vocab * vocab   = llama_model_get_vocab(model);
    const int32_t       n_vocab = llama_n_vocab(vocab);

    llama_token token = llama_add_bos_token(vocab) ? llama_token_bos(vocab) : std::rand() % n_vocab;
--- a/examples/llama.android/llama/src/main/cpp/llama-android.cpp
+++ b/examples/llama.android/llama/src/main/cpp/llama-android.cpp
@ -405,7 +405,7 @@ Java_android_llama_cpp_LLamaAndroid_completion_1loop(
    const auto batch   = reinterpret_cast<llama_batch   *>(batch_pointer);
    const auto sampler = reinterpret_cast<llama_sampler *>(sampler_pointer);
    const auto model = llama_get_model(context);
-    const auto vocab = llama_get_vocab(model);
+    const auto vocab = llama_model_get_vocab(model);

    if (!la_int_var) la_int_var = env->GetObjectClass(intvar_ncur);
    if (!la_int_var_value) la_int_var_value = env->GetMethodID(la_int_var, "getValue", "()I");
--- a/examples/llava/llava-cli.cpp
+++ b/examples/llava/llava-cli.cpp
@ -49,7 +49,7 @@ static const char * sample(struct common_sampler * smpl,
    common_sampler_accept(smpl, id, true);

    const llama_model * model = llama_get_model(ctx_llama);
-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    static std::string ret;
    if (llama_token_is_eog(vocab, id)) {
--- a/examples/llava/minicpmv-cli.cpp
+++ b/examples/llava/minicpmv-cli.cpp
@ -169,7 +169,7 @@ static const char * sample(struct common_sampler * smpl,
    common_sampler_accept(smpl, id, true);

    const llama_model * model = llama_get_model(ctx_llama);
-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    static std::string ret;
    if (llama_token_is_eog(vocab, id)) {
--- a/examples/llava/qwen2vl-cli.cpp
+++ b/examples/llava/qwen2vl-cli.cpp
@ -134,7 +134,7 @@ static const char * sample(struct common_sampler * smpl,
    common_sampler_accept(smpl, id, true);

    const llama_model * model = llama_get_model(ctx_llama);
-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    static std::string ret;
    if (llama_token_is_eog(vocab, id)) {
--- a/examples/lookahead/lookahead.cpp
+++ b/examples/lookahead/lookahead.cpp
@ -61,7 +61,7 @@ int main(int argc, char ** argv) {
    llama_model * model = llama_init.model.get();
    llama_context * ctx = llama_init.context.get();

-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    // Tokenize the prompt
    std::vector<llama_token> inp;
--- a/examples/lookup/lookup.cpp
+++ b/examples/lookup/lookup.cpp
@ -36,7 +36,7 @@ int main(int argc, char ** argv){
    llama_model * model = llama_init.model.get();
    llama_context * ctx = llama_init.context.get();

-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    // tokenize the prompt
    std::vector<llama_token> inp;
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -163,7 +163,7 @@ int main(int argc, char ** argv) {
        return 1;
    }

-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    LOG_INF("%s: llama threadpool init, n_threads = %d\n", __func__, (int) params.cpuparams.n_threads);

--- a/examples/parallel/parallel.cpp
+++ b/examples/parallel/parallel.cpp
@ -135,7 +135,7 @@ int main(int argc, char ** argv) {
    llama_model * model = llama_init.model.get();
    llama_context * ctx = llama_init.context.get();

-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    // load the prompts from an external file if there are any
    if (params.prompt.empty()) {
--- a/examples/passkey/passkey.cpp
+++ b/examples/passkey/passkey.cpp
@ -70,7 +70,7 @@ int main(int argc, char ** argv) {
        return 1;
    }

-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    // initialize the context

--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@ -297,7 +297,7 @@ static results_perplexity perplexity_v2(llama_context * ctx, const common_params
    // BOS tokens will be added for each chunk before eval

    const llama_model * model = llama_get_model(ctx);
-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    const bool add_bos = llama_add_bos_token(vocab);
    GGML_ASSERT(!llama_add_eos_token(vocab));
@ -448,7 +448,7 @@ static results_perplexity perplexity(llama_context * ctx, const common_params &
    // BOS tokens will be added for each chunk before eval

    const llama_model * model = llama_get_model(ctx);
-    const llama_vocab *   vocab   = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    const bool add_bos = llama_add_bos_token(vocab);
    GGML_ASSERT(!llama_add_eos_token(vocab));
@ -739,7 +739,7 @@ static void compute_logprobs(const float * batch_logits, int n_vocab, std::vecto

 static void hellaswag_score(llama_context * ctx, const common_params & params) {
    const llama_model * model = llama_get_model(ctx);
-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    // Calculates hellaswag score (acc_norm) from prompt
    //
@ -1082,7 +1082,7 @@ static std::vector<winogrande_entry> load_winogrande_from_csv(const std::string
 */
 static void winogrande_score(llama_context * ctx, const common_params & params) {
    const llama_model * model = llama_get_model(ctx);
-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    constexpr int k_min_trailing_ctx = 3;

@ -1386,7 +1386,7 @@ static bool multiple_choice_prepare_one_task(llama_context * ctx, multiple_choic
 //
 static void multiple_choice_score(llama_context * ctx, const common_params & params) {
    const llama_model * model = llama_get_model(ctx);
-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    std::istringstream strstream(params.prompt);
    uint32_t n_task;
@ -1669,7 +1669,7 @@ static void multiple_choice_score(llama_context * ctx, const common_params & par

 static void kl_divergence(llama_context * ctx, const common_params & params) {
    const llama_model * model = llama_get_model(ctx);
-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    if (params.logits_file.empty()) {
        LOG_ERR("%s: you must provide a name of a file containing the log probabilities of the base model\n", __func__);
--- a/examples/retrieval/retrieval.cpp
+++ b/examples/retrieval/retrieval.cpp
@ -159,7 +159,7 @@ int main(int argc, char ** argv) {
        return 1;
    }

-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    const int n_ctx_train = llama_n_ctx_train(model);
    const int n_ctx = llama_n_ctx(ctx);
--- a/examples/run/run.cpp
+++ b/examples/run/run.cpp
@ -773,7 +773,7 @@ static void print_word_and_concatenate_to_response(const std::string & piece, st

 // helper function to evaluate a prompt and generate a response
 static int generate(LlamaData & llama_data, const std::string & prompt, std::string & response) {
-    const llama_vocab * vocab = llama_get_vocab(llama_data.model.get());
+    const llama_vocab * vocab = llama_model_get_vocab(llama_data.model.get());

    std::vector<llama_token> tokens;
    if (tokenize_prompt(vocab, prompt, tokens) < 0) {
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -207,7 +207,7 @@ struct server_task {
            const common_params & params_base,
            const json & data) {
        const llama_model * model = llama_get_model(ctx);
-        const llama_vocab * vocab = llama_get_vocab(model);
+        const llama_vocab * vocab = llama_model_get_vocab(model);

        slot_params params;

@ -1694,7 +1694,7 @@ struct server_context {
            return false;
        }

-        vocab = llama_get_vocab(model);
+        vocab = llama_model_get_vocab(model);

        n_ctx = llama_n_ctx(ctx);

--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@ -774,7 +774,7 @@ static std::vector<llama_token_data> get_token_probabilities(llama_context * ctx
    const auto * logits = llama_get_logits_ith(ctx, idx);

    const llama_model * model = llama_get_model(ctx);
-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    const int n_vocab = llama_n_vocab(vocab);

--- a/examples/simple-chat/simple-chat.cpp
+++ b/examples/simple-chat/simple-chat.cpp
@ -75,7 +75,7 @@ int main(int argc, char ** argv) {
        return 1;
    }

-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    // initialize the context
    llama_context_params ctx_params = llama_context_default_params();
--- a/examples/simple/simple.cpp
+++ b/examples/simple/simple.cpp
@ -84,7 +84,7 @@ int main(int argc, char ** argv) {
    model_params.n_gpu_layers = ngl;

    llama_model * model = llama_model_load_from_file(model_path.c_str(), model_params);
-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    if (model == NULL) {
        fprintf(stderr , "%s: error: unable to load model\n" , __func__);
--- a/examples/speculative-simple/speculative-simple.cpp
+++ b/examples/speculative-simple/speculative-simple.cpp
@ -45,7 +45,7 @@ int main(int argc, char ** argv) {
    model_tgt = llama_init_tgt.model.get();
    ctx_tgt   = llama_init_tgt.context.get();

-    const llama_vocab * vocab = llama_get_vocab(model_tgt);
+    const llama_vocab * vocab = llama_model_get_vocab(model_tgt);

    // load the draft model
    params.devices      = params.speculative.devices;
--- a/examples/speculative/speculative.cpp
+++ b/examples/speculative/speculative.cpp
@ -90,8 +90,8 @@ int main(int argc, char ** argv) {
    model_dft = llama_init_dft.model.get();
    ctx_dft   = llama_init_dft.context.get();

-    const llama_vocab * vocab_tgt = llama_get_vocab(model_tgt);
-    const llama_vocab * vocab_dft = llama_get_vocab(model_dft);
+    const llama_vocab * vocab_tgt = llama_model_get_vocab(model_tgt);
+    const llama_vocab * vocab_dft = llama_model_get_vocab(model_dft);

    const bool vocab_type_tgt = llama_vocab_type(vocab_tgt);
    LOG_DBG("vocab_type tgt: %d\n", vocab_type_tgt);
--- a/examples/tokenize/tokenize.cpp
+++ b/examples/tokenize/tokenize.cpp
@ -344,7 +344,7 @@ int main(int raw_argc, char ** raw_argv) {
        return 1;
    }

-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    llama_context_params ctx_params = llama_context_default_params();
    llama_context * ctx = llama_new_context_with_model(model, ctx_params);
--- a/examples/tts/tts.cpp
+++ b/examples/tts/tts.cpp
@ -462,7 +462,7 @@ int main(int argc, char ** argv) {
    model_ttc = llama_init_ttc.model.get();
    ctx_ttc   = llama_init_ttc.context.get();

-    const llama_vocab * vocab = llama_get_vocab(model_ttc);
+    const llama_vocab * vocab = llama_model_get_vocab(model_ttc);

    // TODO: refactor in a common struct
    params.model     = params.vocoder.model;
--- a/include/llama.h
+++ b/include/llama.h
@ -456,15 +456,16 @@ extern "C" {

    LLAMA_API int32_t llama_n_vocab    (const struct llama_vocab * vocab);

-    LLAMA_API const struct llama_model * llama_get_model(const struct llama_context * ctx);
-    LLAMA_API const struct llama_vocab * llama_get_vocab(const struct llama_model * model);
+    LLAMA_API const struct llama_model * llama_get_model   (const struct llama_context * ctx);
+    LLAMA_API enum llama_pooling_type    llama_pooling_type(const struct llama_context * ctx);

-    LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx);
-    LLAMA_API enum llama_vocab_type   llama_vocab_type  (const struct llama_vocab * vocab);
-    LLAMA_API enum llama_rope_type    llama_rope_type   (const struct llama_model * model);
+    LLAMA_API const struct llama_vocab * llama_model_get_vocab(const struct llama_model * model);
+    LLAMA_API enum llama_rope_type       llama_model_rope_type(const struct llama_model * model);

    // Get the model's RoPE frequency scaling factor
-    LLAMA_API float llama_rope_freq_scale_train(const struct llama_model * model);
+    LLAMA_API float llama_model_rope_freq_scale_train(const struct llama_model * model);
+
+    LLAMA_API enum llama_vocab_type llama_vocab_type(const struct llama_vocab * vocab);

    // Functions to access the model's GGUF metadata scalar values
    // - The functions return the length of the string on success, or -1 on failure
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@ -1244,7 +1244,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
        hparams.use_alibi = true;
    }

-    hparams.rope_type = llama_rope_type(this);
+    hparams.rope_type = llama_model_rope_type(this);
 }

 void llama_model::load_vocab(llama_model_loader & ml) {
@ -3735,7 +3735,7 @@ struct llama_model_params llama_model_default_params() {
    return result;
 }

-const struct llama_vocab * llama_get_vocab(const struct llama_model * model) {
+const struct llama_vocab * llama_model_get_vocab(const struct llama_model * model) {
    return &model->vocab;
 }

@ -3763,7 +3763,7 @@ int32_t llama_n_head(const struct llama_model * model) {
    return model->hparams.n_head();
 }

-enum llama_rope_type llama_rope_type(const struct llama_model * model) {
+enum llama_rope_type llama_model_rope_type(const struct llama_model * model) {
    switch (model->arch) {
        // these models do not use RoPE
        case LLM_ARCH_GPT2:
@ -3841,7 +3841,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
    return LLAMA_ROPE_TYPE_NONE;
 }

-float llama_rope_freq_scale_train(const struct llama_model * model) {
+float llama_model_rope_freq_scale_train(const struct llama_model * model) {
    return model->hparams.rope_freq_scale_train;
 }

--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@ -372,7 +372,7 @@ llama_token llama_sampler_sample(struct llama_sampler * smpl, struct llama_conte
    const auto * logits = llama_get_logits_ith(ctx, idx);

    const llama_model * model = llama_get_model(ctx);
-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    const int n_vocab = llama_n_vocab(vocab);

--- a/tests/test-tokenizer-1-bpe.cpp
+++ b/tests/test-tokenizer-1-bpe.cpp
@ -64,7 +64,7 @@ int main(int argc, char **argv) {
        }
    }

-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    //GGML_ASSERT(llama_vocab_type(vocab) == LLAMA_VOCAB_TYPE_BPE);
    if (llama_vocab_type(vocab) != LLAMA_VOCAB_TYPE_BPE) {
--- a/tests/test-tokenizer-1-spm.cpp
+++ b/tests/test-tokenizer-1-spm.cpp
@ -52,7 +52,7 @@ int main(int argc, char ** argv) {
        }
    }

-    const llama_vocab * vocab = llama_get_vocab(model);
+    const llama_vocab * vocab = llama_model_get_vocab(model);

    //GGML_ASSERT(llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
    if (llama_vocab_type(vocab) != LLAMA_VOCAB_TYPE_SPM) {