diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index ee5658b38..ac11ad767 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -318,7 +318,11 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in //determine mem per token const std::vector tmp = {1, 2, 3, 4}; - llama_eval(llama_ctx_v3, tmp.data(), tmp.size(), 0, params.n_threads); + auto er = llama_eval(llama_ctx_v3, tmp.data(), tmp.size(), 0, params.n_threads); + if(er!=0) + { + printf("\nLLAMA EVAL returned nonzero!\n"); + } return ModelLoadResult::SUCCESS; } else if (file_format == FileFormat::RWKV_1) diff --git a/llama.cpp b/llama.cpp index 745b95823..37bd155c8 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1137,11 +1137,11 @@ static bool llama_eval_internal( const int n_past, const int n_threads) { - // enforce that the first token is BOS - if (n_past == 0 && tokens[0] != llama_token_bos()) { - fprintf(stderr, "%s: first token must be BOS\n", __func__); - return false; - } + // // enforce that the first token is BOS + // if (n_past == 0 && tokens[0] != llama_token_bos()) { + // fprintf(stderr, "%s: first token must be BOS\n", __func__); + // return false; + // } const int64_t t_start_us = ggml_time_us();