llama : disambiguate API

ggml-ci
2024-09-02 10:06:42 +03:00 · 2024-09-02 10:06:42 +03:00 · 086e7f6ebc
commit 086e7f6ebc
parent 8f1d81a0b6
9 changed files with 22 additions and 18 deletions
--- a/examples/embedding/embedding.cpp
+++ b/examples/embedding/embedding.cpp
@ -31,7 +31,7 @@ static void batch_add_seq(llama_batch & batch, const std::vector<int32_t> & toke
 }

 static void batch_decode(llama_context * ctx, llama_batch & batch, float * output, int n_seq, int n_embd, int embd_norm) {
-    const enum llama_pooling_type pooling_type = llama_pooling_type(ctx);
+    const enum llama_pooling_type pooling_type = llama_get_pooling_type(ctx);
    const struct llama_model * model = llama_get_model(ctx);

    // clear previous kv_cache values (irrelevant for embeddings)
@ -114,7 +114,7 @@ int main(int argc, char ** argv) {
    const int n_ctx_train = llama_n_ctx_train(model);
    const int n_ctx = llama_n_ctx(ctx);

-    const enum llama_pooling_type pooling_type = llama_pooling_type(ctx);
+    const enum llama_pooling_type pooling_type = llama_get_pooling_type(ctx);

    if (llama_model_has_encoder(model) && llama_model_has_decoder(model)) {
        fprintf(stderr, "%s: error: computing embeddings in encoder-decoder models is not supported\n", __func__);
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@ -796,7 +796,7 @@ static void hellaswag_score(llama_context * ctx, const gpt_params & params) {
    size_t hs_task_count = prompt_lines.size()/6;
    fprintf(stderr, "%s : loaded %zu tasks from prompt.\n", __func__, hs_task_count);

-    const bool is_spm = llama_vocab_type(llama_get_model(ctx)) == LLAMA_VOCAB_TYPE_SPM;
+    const bool is_spm = llama_get_vocab_type(llama_get_model(ctx)) == LLAMA_VOCAB_TYPE_SPM;
    fprintf(stderr, "================================= is_spm = %d\n", is_spm);

    // The tasks should be randomized so the score stabilizes quickly.
--- a/examples/retrieval/retrieval.cpp
+++ b/examples/retrieval/retrieval.cpp
@ -162,7 +162,7 @@ int main(int argc, char ** argv) {
    const int n_ctx_train = llama_n_ctx_train(model);
    const int n_ctx = llama_n_ctx(ctx);

-    const enum llama_pooling_type pooling_type = llama_pooling_type(ctx);
+    const enum llama_pooling_type pooling_type = llama_get_pooling_type(ctx);
    if (pooling_type == LLAMA_POOLING_TYPE_NONE) {
        fprintf(stderr, "%s: error: pooling type NONE not supported\n", __func__);
        return 1;
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -2450,7 +2450,7 @@ struct server_context {

    json model_meta() const {
        return json {
-            {"vocab_type",  llama_vocab_type    (model)},
+            {"vocab_type",  llama_get_vocab_type(model)},
            {"n_vocab",     llama_n_vocab       (model)},
            {"n_ctx_train", llama_n_ctx_train   (model)},
            {"n_embd",      llama_n_embd        (model)},
--- a/examples/speculative/speculative.cpp
+++ b/examples/speculative/speculative.cpp
@ -82,10 +82,10 @@ int main(int argc, char ** argv) {
    model_dft = llama_init_dft.model;
    ctx_dft = llama_init_dft.context;

-    const bool vocab_type_tgt = llama_vocab_type(model_tgt);
+    const bool vocab_type_tgt = llama_get_vocab_type(model_tgt);
    LOG("vocab_type tgt: %d\n", vocab_type_tgt);

-    const bool vocab_type_dft = llama_vocab_type(model_dft);
+    const bool vocab_type_dft = llama_get_vocab_type(model_dft);
    LOG("vocab_type dft: %d\n", vocab_type_dft);

    if (vocab_type_tgt != vocab_type_dft) {