trying to fix ggjt v3

2023-05-21 00:29:50 +08:00 · 2023-05-21 00:29:50 +08:00 · 5032e0fd64
commit 5032e0fd64
parent c048bcfec4
2 changed files with 10 additions and 6 deletions
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@ -318,7 +318,11 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in

        //determine mem per token
        const std::vector<int> tmp = {1, 2, 3, 4};
-        llama_eval(llama_ctx_v3, tmp.data(), tmp.size(), 0, params.n_threads);
+        auto er = llama_eval(llama_ctx_v3, tmp.data(), tmp.size(), 0, params.n_threads);
+        if(er!=0)
+        {
+            printf("\nLLAMA EVAL returned nonzero!\n");
+        }
        return ModelLoadResult::SUCCESS;
    }
    else if (file_format == FileFormat::RWKV_1)
--- a/llama.cpp
+++ b/llama.cpp
@ -1137,11 +1137,11 @@ static bool llama_eval_internal(
            const int   n_past,
            const int   n_threads) {

-    // enforce that the first token is BOS
-    if (n_past == 0 && tokens[0] != llama_token_bos()) {
-        fprintf(stderr, "%s: first token must be BOS\n", __func__);
-        return false;
-    }
+    // // enforce that the first token is BOS
+    // if (n_past == 0 && tokens[0] != llama_token_bos()) {
+    //     fprintf(stderr, "%s: first token must be BOS\n", __func__);
+    //     return false;
+    // }

    const int64_t t_start_us = ggml_time_us();