diff --git a/ggml-opencl.cpp b/ggml-opencl.cpp index 76f2f87da..955d4e9c2 100644 --- a/ggml-opencl.cpp +++ b/ggml-opencl.cpp @@ -1529,7 +1529,7 @@ static void ggml_cl_mul_mat_f32(const ggml_tensor * src0, const ggml_tensor * sr &queue, &ev_sgemm); if (status != clblast::StatusCode::kSuccess) { - printf("\nF32 Matmul Failed (%d): [dims: %lld,%lld,%lld,%lld] You may be out of VRAM. Please check if you have enough.\n",status,ne00,ne01,ne10,ne11); + printf("\nF32 Matmul Failed (%d): [dims: %ld,%ld,%ld,%ld] You may be out of VRAM. Please check if you have enough.\n",static_cast(status),ne00,ne01,ne10,ne11); GGML_ASSERT(false); } @@ -1634,7 +1634,7 @@ static void ggml_cl_mul_mat_f16(const ggml_tensor * src0, const ggml_tensor * sr &queue, &ev_sgemm); if (status != clblast::StatusCode::kSuccess) { - printf("\nF16 Matmul Failed (%d): [dims: %lld,%lld,%lld,%lld] You may be out of VRAM. Please check if you have enough.\n",status,ne00,ne01,ne10,ne11); + printf("\nF16 Matmul Failed (%d): [dims: %ld,%ld,%ld,%ld] You may be out of VRAM. Please check if you have enough.\n",static_cast(status),ne00,ne01,ne10,ne11); GGML_ASSERT(false); } @@ -1754,7 +1754,7 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor * &queue, events.data() + ev_idx++); if (status != clblast::StatusCode::kSuccess) { - printf("\nQF32 Matmul Failed (%d): [dims: %lld,%lld,%lld,%lld] You may be out of VRAM. Please check if you have enough.\n",status,ne00,ne01,ne10,ne11); + printf("\nQF32 Matmul Failed (%d): [dims: %ld,%ld,%ld,%ld] You may be out of VRAM. Please check if you have enough.\n",static_cast(status),ne00,ne01,ne10,ne11); GGML_ASSERT(false); } } diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 2f28516a5..69603004f 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -691,7 +691,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in const struct rwkv_file_header & header = rwkv_ctx_v3->instance->model.header; const size_t n_vocab = header.n_vocab; - printf("\nDetected Vocab: %d",n_vocab); + printf("\nDetected Vocab: %zu",n_vocab); if(n_vocab>60000) { printf("\nUsing WORLD TOKENIZER"); @@ -729,7 +729,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in auto statebufsiz = rwkv_v2_get_state_buffer_element_count(rwkv_ctx_v2) * sizeof(float) + padding; auto logitbufsiz = rwkv_v2_get_logits_buffer_element_count(rwkv_ctx_v2) * sizeof(float) + padding; - printf("\nRWKV old Init: State Buffer:%u, Logit Buffer:%u\n", statebufsiz, logitbufsiz); + printf("\nRWKV old Init: State Buffer:%lu, Logit Buffer:%lu\n", statebufsiz, logitbufsiz); rwkv_ctx_v2->state_out = (float *)malloc(statebufsiz); rwkv_ctx_v2->logits_out = (float *)malloc(logitbufsiz); rwkv_ctx_v2->state_in = nullptr; @@ -757,7 +757,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in auto statebufsiz = rwkv_get_state_buffer_element_count(rwkv_ctx_v3) * sizeof(float) + padding; auto logitbufsiz = rwkv_get_logits_buffer_element_count(rwkv_ctx_v3) * sizeof(float) + padding; - printf("\nRWKV Init: State Buffer:%u, Logit Buffer:%u\n", statebufsiz, logitbufsiz); + printf("\nRWKV Init: State Buffer:%lu, Logit Buffer:%lu\n", statebufsiz, logitbufsiz); rwkv_ctx_v3->state_out = (float *)malloc(statebufsiz); rwkv_ctx_v3->logits_out = (float *)malloc(logitbufsiz); rwkv_ctx_v3->state_in = nullptr; @@ -1284,7 +1284,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o //prepare banned tokens if(banned_token_ids.size()==0 && banned_tokens.size()>0) { - printf("\n[First Run] Banning %d token sequences...",banned_tokens.size()); + printf("\n[First Run] Banning %zu token sequences...",banned_tokens.size()); for(int v=0;v(status),ne00,ne01,ne10,ne11); GGML_V2_ASSERT(false); } @@ -672,7 +672,7 @@ static void ggml_v2_cl_mul_mat_f16(const ggml_v2_tensor * src0, const ggml_v2_te &queue, &ev_sgemm); if (status != clblast::StatusCode::kSuccess) { - printf("\nF16 Matmul Failed (%d): [dims: %lld,%lld,%lld,%lld] You may be out of VRAM. Please check if you have enough.\n",status,ne00,ne01,ne10,ne11); + printf("\nF16 Matmul Failed (%d): [dims: %ld,%ld,%ld,%ld] You may be out of VRAM. Please check if you have enough.\n",static_cast(status),ne00,ne01,ne10,ne11); GGML_V2_ASSERT(false); } @@ -780,7 +780,7 @@ static void ggml_v2_cl_mul_mat_q_f32(const ggml_v2_tensor * src0, const ggml_v2_ &queue, &ev_sgemm); if (status != clblast::StatusCode::kSuccess) { - printf("\nQF32 Matmul Failed (%d): [dims: %lld,%lld,%lld,%lld] You may be out of VRAM. Please check if you have enough.\n",status,ne00,ne01,ne10,ne11); + printf("\nQF32 Matmul Failed (%d): [dims: %ld,%ld,%ld,%ld] You may be out of VRAM. Please check if you have enough.\n",static_cast(status),ne00,ne01,ne10,ne11); GGML_V2_ASSERT(false); } } diff --git a/otherarch/ggml_v2.c b/otherarch/ggml_v2.c index 6b18fe723..a3f593ee6 100644 --- a/otherarch/ggml_v2.c +++ b/otherarch/ggml_v2.c @@ -1,6 +1,3 @@ -// Defines CLOCK_MONOTONIC on Linux -#define _GNU_SOURCE - #include "ggml_v2.h" #if defined(_MSC_VER) || defined(__MINGW32__) diff --git a/otherarch/gpt2_v2.cpp b/otherarch/gpt2_v2.cpp index 9e023a9d6..33ca85e11 100644 --- a/otherarch/gpt2_v2.cpp +++ b/otherarch/gpt2_v2.cpp @@ -150,7 +150,7 @@ ModelLoadResult gpt2_v2_model_load(const std::string & fname, gpt2_v2_model & mo params.mem_size = ctx_size; params.mem_buffer = NULL; params.no_alloc = false; - + model.ctx = ggml_v2_init(params); if (!model.ctx) { @@ -237,7 +237,7 @@ ModelLoadResult gpt2_v2_model_load(const std::string & fname, gpt2_v2_model & mo const int n_mem = n_layer*n_ctx; const int n_elements = n_embd*n_mem; - + model.memory_k = ggml_v2_new_tensor_1d(ctx, memory_type, n_elements*1.5); model.memory_v = ggml_v2_new_tensor_1d(ctx, memory_type, n_elements*1.5); @@ -287,7 +287,7 @@ ModelLoadResult gpt2_v2_model_load(const std::string & fname, gpt2_v2_model & mo } if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) { - fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%lld, %lld], expected [%lld, %lld]\n", + fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%ld, %ld], expected [%d, %d]\n", __func__, name.data(), tensor->ne[0], tensor->ne[1], ne[0], ne[1]); return ModelLoadResult::FAIL; } @@ -379,7 +379,7 @@ bool gpt2_v2_eval( params.mem_size = buf_size; params.mem_buffer = buf; params.no_alloc = false; - + struct ggml_v2_context * ctx0 = ggml_v2_init(params); struct ggml_v2_cgraph gf = {}; diff --git a/otherarch/gptj_v2.cpp b/otherarch/gptj_v2.cpp index 100ca1ace..97e885016 100644 --- a/otherarch/gptj_v2.cpp +++ b/otherarch/gptj_v2.cpp @@ -150,7 +150,7 @@ ModelLoadResult gptj_v2_model_load(const std::string & fname, gptj_v2_model & mo params.mem_size = ctx_size; params.mem_buffer = NULL; params.no_alloc = false; - + model.ctx = ggml_v2_init(params); if (!model.ctx) { @@ -281,7 +281,7 @@ ModelLoadResult gptj_v2_model_load(const std::string & fname, gptj_v2_model & mo fprintf(stderr, "%s: tensor '%s' has wrong size in model file\n", __func__, name.data()); return ModelLoadResult::FAIL; } - + if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) { @@ -294,11 +294,11 @@ ModelLoadResult gptj_v2_model_load(const std::string & fname, gptj_v2_model & mo } else { - fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d], expected [%d, %d]\n", + fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%ld, %ld], expected [%d, %d]\n", __func__, name.data(), tensor->ne[0], tensor->ne[1], ne[0], ne[1]); return ModelLoadResult::FAIL; } - + } // for debugging @@ -387,7 +387,7 @@ bool gptj_v2_eval( params.mem_size = buf_size; params.mem_buffer = buf; params.no_alloc = false; - + struct ggml_v2_context * ctx0 = ggml_v2_init(params); struct ggml_v2_cgraph gf = {}; diff --git a/otherarch/gptj_v3.cpp b/otherarch/gptj_v3.cpp index 42512e190..1001e9b84 100644 --- a/otherarch/gptj_v3.cpp +++ b/otherarch/gptj_v3.cpp @@ -304,7 +304,7 @@ ModelLoadResult gptj_model_load(const std::string & fname, gptj_model & model, g } else { - fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d], expected [%d, %d]\n", + fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%ld, %ld], expected [%d, %d]\n", __func__, name.data(), tensor->ne[0], tensor->ne[1], ne[0], ne[1]); return ModelLoadResult::FAIL; } diff --git a/otherarch/llama_v3.h b/otherarch/llama_v3.h index 289f2d796..2cc4b4707 100644 --- a/otherarch/llama_v3.h +++ b/otherarch/llama_v3.h @@ -243,10 +243,9 @@ extern "C" { // Various functions for loading a ggml llama model. // Allocate (almost) all memory needed for the model. // Return NULL on failure - LLAMA_V3_API DEPRECATED(struct llama_v3_context * llama_v3_init_from_file( + LLAMA_V3_API struct llama_v3_context * llama_v3_init_from_file( const char * path_model, - struct llama_v3_context_params params), - "please use llama_v3_load_model_from_file combined with llama_v3_new_context_with_model instead"); + struct llama_v3_context_params params); // Frees all allocated memory LLAMA_V3_API void llama_v3_free(struct llama_v3_context * ctx); @@ -263,12 +262,11 @@ extern "C" { // The model needs to be reloaded before applying a new adapter, otherwise the adapter // will be applied on top of the previous one // Returns 0 on success - LLAMA_V3_API DEPRECATED(int llama_v3_apply_lora_from_file( + LLAMA_V3_API int llama_v3_apply_lora_from_file( struct llama_v3_context * ctx, const char * path_lora, const char * path_base_model, - int n_threads), - "please use llama_v3_model_apply_lora_from_file instead"); + int n_threads); LLAMA_V3_API int llama_v3_model_apply_lora_from_file( const struct llama_v3_model * model, diff --git a/otherarch/rwkv_v2.cpp b/otherarch/rwkv_v2.cpp index d627a13f0..7d2065eaa 100644 --- a/otherarch/rwkv_v2.cpp +++ b/otherarch/rwkv_v2.cpp @@ -367,8 +367,8 @@ struct rwkv_v2_context * rwkv_v2_init_from_file(const char * file_path, uint32_t // Verify order of dimensions struct ggml_v2_tensor * emb = model->emb; RWKV_V2_ASSERT_NULL(emb->n_dims == 2, "Unexpected dimension count of embedding matrix %d", emb->n_dims); - RWKV_V2_ASSERT_NULL(emb->ne[0] == model->n_embed, "Unexpected dimension of embedding matrix %lld", emb->ne[0]); - RWKV_V2_ASSERT_NULL(emb->ne[1] == model->n_vocab, "Unexpected dimension of embedding matrix %lld", emb->ne[1]); + RWKV_V2_ASSERT_NULL(emb->ne[0] == model->n_embed, "Unexpected dimension of embedding matrix %ld", emb->ne[0]); + RWKV_V2_ASSERT_NULL(emb->ne[1] == model->n_vocab, "Unexpected dimension of embedding matrix %ld", emb->ne[1]); int32_t n_embed = model->n_embed; int32_t n_layer = model->n_layer;