common : remove duplicate function llama_should_add_bos_token (#8778)

2024-08-15 15:23:23 +08:00 · 2024-08-15 15:23:23 +08:00 · 4af8420afb
commit 4af8420afb
parent 6bda7ce6c3
14 changed files with 26 additions and 40 deletions
--- a/examples/cvector-generator/cvector-generator.cpp
+++ b/examples/cvector-generator/cvector-generator.cpp
@ -271,7 +271,7 @@ struct tokenized_prompt {
    size_t max_seq_len;

    tokenized_prompt(llama_context * ctx, std::string pos, std::string neg) {
-        const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
+        const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
        tokens_pos = ::llama_tokenize(ctx, pos, add_bos, true);
        tokens_neg = ::llama_tokenize(ctx, neg, add_bos, true);
        max_seq_len = std::max(tokens_pos.size(), tokens_neg.size());
--- a/examples/eval-callback/eval-callback.cpp
+++ b/examples/eval-callback/eval-callback.cpp
@ -127,7 +127,7 @@ static bool ggml_debug(struct ggml_tensor * t, bool ask, void * user_data) {
 }

 static bool run(llama_context * ctx, const gpt_params & params) {
-    const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
+    const bool add_bos = llama_add_bos_token(llama_get_model(ctx));

    std::vector<llama_token> tokens = ::llama_tokenize(ctx, params.prompt, add_bos);

--- a/examples/imatrix/imatrix.cpp
+++ b/examples/imatrix/imatrix.cpp
@ -433,8 +433,8 @@ static void process_logits(
 }

 static bool compute_imatrix(llama_context * ctx, const gpt_params & params) {
-    const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
-    GGML_ASSERT(llama_add_eos_token(llama_get_model(ctx)) != 1);
+    const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
+    GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));
    const int n_ctx = llama_n_ctx(ctx);

    auto tim1 = std::chrono::high_resolution_clock::now();
--- a/examples/infill/infill.cpp
+++ b/examples/infill/infill.cpp
@ -203,8 +203,8 @@ int main(int argc, char ** argv) {
        LOG_TEE("\n");
        LOG_TEE("%s\n", gpt_params_get_system_info(params).c_str());
    }
-    const bool add_bos = llama_should_add_bos_token(model);
-    GGML_ASSERT(llama_add_eos_token(model) != 1);
+    const bool add_bos = llama_add_bos_token(model);
+    GGML_ASSERT(!llama_add_eos_token(model));
    LOG("add_bos: %d\n", add_bos);

    std::vector<llama_token> embd_inp;
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -267,9 +267,9 @@ int main(int argc, char ** argv) {
        }
    }

-    const bool add_bos = llama_should_add_bos_token(model);
+    const bool add_bos = llama_add_bos_token(model);
    if (!llama_model_has_encoder(model)) {
-        GGML_ASSERT(llama_add_eos_token(model) != 1);
+        GGML_ASSERT(!llama_add_eos_token(model));
    }
    LOG("add_bos: %d\n", add_bos);

--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@ -340,8 +340,8 @@ static results_perplexity perplexity_v2(llama_context * ctx, const gpt_params &
    // Output: `perplexity: 13.5106 [114/114]`
    // BOS tokens will be added for each chunk before eval

-    const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
-    GGML_ASSERT(llama_add_eos_token(llama_get_model(ctx)) != 1);
+    const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
+    GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));

    fprintf(stderr, "%s: tokenizing the input ..\n", __func__);

@ -480,8 +480,8 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
    // Output: `perplexity: 13.5106 [114/114]`
    // BOS tokens will be added for each chunk before eval

-    const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
-    GGML_ASSERT(llama_add_eos_token(llama_get_model(ctx)) != 1);
+    const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
+    GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));

    std::ofstream logits_stream;
    if (!params.logits_file.empty()) {
@ -1733,8 +1733,8 @@ static void kl_divergence(llama_context * ctx, const gpt_params & params) {
    const int n_batch = params.n_batch;
    const int num_batches = (n_ctx + n_batch - 1)/n_batch;
    const int nv = 2*((n_vocab + 1)/2) + 4;
-    const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
-    GGML_ASSERT(llama_add_eos_token(llama_get_model(ctx)) != 1);
+    const bool add_bos = llama_add_bos_token(llama_get_model(ctx));
+    GGML_ASSERT(!llama_add_eos_token(llama_get_model(ctx)));

    std::vector<uint16_t> log_probs_uint16(size_t(n_ctx - 1 - n_ctx/2) * nv);
    std::vector<float>    kld_values(size_t(n_ctx - 1 - n_ctx/2)*n_chunk);
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -693,9 +693,8 @@ struct server_context {

        n_ctx = llama_n_ctx(ctx);

-        add_bos_token = llama_should_add_bos_token(model);
-        has_eos_token = llama_add_eos_token(model) != 1;
-
+        add_bos_token = llama_add_bos_token(model);
+        has_eos_token = !llama_add_eos_token(model);
        return true;
    }

@ -2038,7 +2037,7 @@ struct server_context {
                        slot.t_start_generation = 0;

                        if (slot.infill) {
-                            const bool add_bos = llama_should_add_bos_token(model);
+                            const bool add_bos = llama_add_bos_token(model);
                            bool suff_rm_leading_spc = true;
                            if (params.input_suffix.find_first_of(' ') == 0 && params.input_suffix.size() > 1) {
                                params.input_suffix.erase(0, 1);
--- a/examples/tokenize/tokenize.cpp
+++ b/examples/tokenize/tokenize.cpp
@ -362,7 +362,7 @@ int main(int raw_argc, char ** raw_argv) {
        prompt = stdin_buffer.str();
    }

-    const bool model_wants_add_bos = llama_should_add_bos_token(model);
+    const bool model_wants_add_bos = llama_add_bos_token(model);
    const bool add_bos = model_wants_add_bos && !no_bos;
    const bool parse_special = !no_parse_special;