diff --git a/examples/baby-llama/baby-llama.cpp b/examples/baby-llama/baby-llama.cpp index 7eb272f5a..7841de0a4 100644 --- a/examples/baby-llama/baby-llama.cpp +++ b/examples/baby-llama/baby-llama.cpp @@ -1,8 +1,10 @@ #include "ggml.h" -#include + #include -#include +#include #include +#include +#include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data @@ -457,7 +459,7 @@ static void randomize_model_lora( } } -static bool init_kv_cache(struct llama_kv_cache* cache, struct llama_model * model, int n_batch) { +static void init_kv_cache(struct llama_kv_cache* cache, struct llama_model * model, int n_batch) { const auto & hparams = model->hparams; const uint32_t n_ctx = hparams.n_ctx; @@ -483,14 +485,12 @@ static bool init_kv_cache(struct llama_kv_cache* cache, struct llama_model * mod if (!cache->ctx) { fprintf(stderr, "%s: failed to allocate memory for kv cache\n", __func__); - return false; + exit(1); } } cache->k = ggml_new_tensor_1d(cache->ctx, GGML_TYPE_F32, n_elements); cache->v = ggml_new_tensor_1d(cache->ctx, GGML_TYPE_F32, n_elements); - - return true; } static bool init_kv_cache_lora(struct llama_kv_cache* cache, struct llama_model_lora * model, int n_batch) {