Reduce warnings. (#439)
This commit is contained in:
parent
8d90072a2a
commit
f6ba36dff6
10 changed files with 28 additions and 36 deletions
|
@ -1529,7 +1529,7 @@ static void ggml_cl_mul_mat_f32(const ggml_tensor * src0, const ggml_tensor * sr
|
||||||
&queue, &ev_sgemm);
|
&queue, &ev_sgemm);
|
||||||
|
|
||||||
if (status != clblast::StatusCode::kSuccess) {
|
if (status != clblast::StatusCode::kSuccess) {
|
||||||
printf("\nF32 Matmul Failed (%d): [dims: %lld,%lld,%lld,%lld] You may be out of VRAM. Please check if you have enough.\n",status,ne00,ne01,ne10,ne11);
|
printf("\nF32 Matmul Failed (%d): [dims: %ld,%ld,%ld,%ld] You may be out of VRAM. Please check if you have enough.\n",static_cast<int>(status),ne00,ne01,ne10,ne11);
|
||||||
GGML_ASSERT(false);
|
GGML_ASSERT(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1634,7 +1634,7 @@ static void ggml_cl_mul_mat_f16(const ggml_tensor * src0, const ggml_tensor * sr
|
||||||
&queue, &ev_sgemm);
|
&queue, &ev_sgemm);
|
||||||
|
|
||||||
if (status != clblast::StatusCode::kSuccess) {
|
if (status != clblast::StatusCode::kSuccess) {
|
||||||
printf("\nF16 Matmul Failed (%d): [dims: %lld,%lld,%lld,%lld] You may be out of VRAM. Please check if you have enough.\n",status,ne00,ne01,ne10,ne11);
|
printf("\nF16 Matmul Failed (%d): [dims: %ld,%ld,%ld,%ld] You may be out of VRAM. Please check if you have enough.\n",static_cast<int>(status),ne00,ne01,ne10,ne11);
|
||||||
GGML_ASSERT(false);
|
GGML_ASSERT(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1754,7 +1754,7 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
|
||||||
&queue, events.data() + ev_idx++);
|
&queue, events.data() + ev_idx++);
|
||||||
|
|
||||||
if (status != clblast::StatusCode::kSuccess) {
|
if (status != clblast::StatusCode::kSuccess) {
|
||||||
printf("\nQF32 Matmul Failed (%d): [dims: %lld,%lld,%lld,%lld] You may be out of VRAM. Please check if you have enough.\n",status,ne00,ne01,ne10,ne11);
|
printf("\nQF32 Matmul Failed (%d): [dims: %ld,%ld,%ld,%ld] You may be out of VRAM. Please check if you have enough.\n",static_cast<int>(status),ne00,ne01,ne10,ne11);
|
||||||
GGML_ASSERT(false);
|
GGML_ASSERT(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -691,7 +691,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
|
|
||||||
const struct rwkv_file_header & header = rwkv_ctx_v3->instance->model.header;
|
const struct rwkv_file_header & header = rwkv_ctx_v3->instance->model.header;
|
||||||
const size_t n_vocab = header.n_vocab;
|
const size_t n_vocab = header.n_vocab;
|
||||||
printf("\nDetected Vocab: %d",n_vocab);
|
printf("\nDetected Vocab: %zu",n_vocab);
|
||||||
if(n_vocab>60000)
|
if(n_vocab>60000)
|
||||||
{
|
{
|
||||||
printf("\nUsing WORLD TOKENIZER");
|
printf("\nUsing WORLD TOKENIZER");
|
||||||
|
@ -729,7 +729,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
auto statebufsiz = rwkv_v2_get_state_buffer_element_count(rwkv_ctx_v2) * sizeof(float) + padding;
|
auto statebufsiz = rwkv_v2_get_state_buffer_element_count(rwkv_ctx_v2) * sizeof(float) + padding;
|
||||||
auto logitbufsiz = rwkv_v2_get_logits_buffer_element_count(rwkv_ctx_v2) * sizeof(float) + padding;
|
auto logitbufsiz = rwkv_v2_get_logits_buffer_element_count(rwkv_ctx_v2) * sizeof(float) + padding;
|
||||||
|
|
||||||
printf("\nRWKV old Init: State Buffer:%u, Logit Buffer:%u\n", statebufsiz, logitbufsiz);
|
printf("\nRWKV old Init: State Buffer:%lu, Logit Buffer:%lu\n", statebufsiz, logitbufsiz);
|
||||||
rwkv_ctx_v2->state_out = (float *)malloc(statebufsiz);
|
rwkv_ctx_v2->state_out = (float *)malloc(statebufsiz);
|
||||||
rwkv_ctx_v2->logits_out = (float *)malloc(logitbufsiz);
|
rwkv_ctx_v2->logits_out = (float *)malloc(logitbufsiz);
|
||||||
rwkv_ctx_v2->state_in = nullptr;
|
rwkv_ctx_v2->state_in = nullptr;
|
||||||
|
@ -757,7 +757,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
auto statebufsiz = rwkv_get_state_buffer_element_count(rwkv_ctx_v3) * sizeof(float) + padding;
|
auto statebufsiz = rwkv_get_state_buffer_element_count(rwkv_ctx_v3) * sizeof(float) + padding;
|
||||||
auto logitbufsiz = rwkv_get_logits_buffer_element_count(rwkv_ctx_v3) * sizeof(float) + padding;
|
auto logitbufsiz = rwkv_get_logits_buffer_element_count(rwkv_ctx_v3) * sizeof(float) + padding;
|
||||||
|
|
||||||
printf("\nRWKV Init: State Buffer:%u, Logit Buffer:%u\n", statebufsiz, logitbufsiz);
|
printf("\nRWKV Init: State Buffer:%lu, Logit Buffer:%lu\n", statebufsiz, logitbufsiz);
|
||||||
rwkv_ctx_v3->state_out = (float *)malloc(statebufsiz);
|
rwkv_ctx_v3->state_out = (float *)malloc(statebufsiz);
|
||||||
rwkv_ctx_v3->logits_out = (float *)malloc(logitbufsiz);
|
rwkv_ctx_v3->logits_out = (float *)malloc(logitbufsiz);
|
||||||
rwkv_ctx_v3->state_in = nullptr;
|
rwkv_ctx_v3->state_in = nullptr;
|
||||||
|
@ -1284,7 +1284,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
||||||
//prepare banned tokens
|
//prepare banned tokens
|
||||||
if(banned_token_ids.size()==0 && banned_tokens.size()>0)
|
if(banned_token_ids.size()==0 && banned_tokens.size()>0)
|
||||||
{
|
{
|
||||||
printf("\n[First Run] Banning %d token sequences...",banned_tokens.size());
|
printf("\n[First Run] Banning %zu token sequences...",banned_tokens.size());
|
||||||
for(int v=0;v<n_vocab;++v)
|
for(int v=0;v<n_vocab;++v)
|
||||||
{
|
{
|
||||||
std::string word = FileFormatTokenizeID(v,file_format);
|
std::string word = FileFormatTokenizeID(v,file_format);
|
||||||
|
@ -1297,7 +1297,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
printf("\nBanned a total of %d tokens.\n",banned_token_ids.size());
|
printf("\nBanned a total of %zu tokens.\n",banned_token_ids.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
if(debugmode!=-1)
|
if(debugmode!=-1)
|
||||||
|
@ -1337,7 +1337,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
||||||
//print progress
|
//print progress
|
||||||
if (!startedsampling && debugmode!=-1)
|
if (!startedsampling && debugmode!=-1)
|
||||||
{
|
{
|
||||||
printf("\rProcessing Prompt%s (%d / %d tokens)", (blasmode ? " [BLAS]" : ""), input_consumed, embd_inp.size());
|
printf("\rProcessing Prompt%s (%d / %zu tokens)", (blasmode ? " [BLAS]" : ""), input_consumed, embd_inp.size());
|
||||||
}
|
}
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,3 @@
|
||||||
// Defines CLOCK_MONOTONIC and asprintf on Linux
|
|
||||||
#define _GNU_SOURCE
|
|
||||||
|
|
||||||
#include "ggml_v1.h"
|
#include "ggml_v1.h"
|
||||||
|
|
||||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||||
|
|
|
@ -573,7 +573,7 @@ static void ggml_v2_cl_mul_mat_f32(const ggml_v2_tensor * src0, const ggml_v2_te
|
||||||
&queue, &ev_sgemm);
|
&queue, &ev_sgemm);
|
||||||
|
|
||||||
if (status != clblast::StatusCode::kSuccess) {
|
if (status != clblast::StatusCode::kSuccess) {
|
||||||
printf("\nF32 Matmul Failed (%d): [dims: %lld,%lld,%lld,%lld] You may be out of VRAM. Please check if you have enough.\n",status,ne00,ne01,ne10,ne11);
|
printf("\nF32 Matmul Failed (%d): [dims: %ld,%ld,%ld,%ld] You may be out of VRAM. Please check if you have enough.\n",static_cast<int>(status),ne00,ne01,ne10,ne11);
|
||||||
GGML_V2_ASSERT(false);
|
GGML_V2_ASSERT(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -672,7 +672,7 @@ static void ggml_v2_cl_mul_mat_f16(const ggml_v2_tensor * src0, const ggml_v2_te
|
||||||
&queue, &ev_sgemm);
|
&queue, &ev_sgemm);
|
||||||
|
|
||||||
if (status != clblast::StatusCode::kSuccess) {
|
if (status != clblast::StatusCode::kSuccess) {
|
||||||
printf("\nF16 Matmul Failed (%d): [dims: %lld,%lld,%lld,%lld] You may be out of VRAM. Please check if you have enough.\n",status,ne00,ne01,ne10,ne11);
|
printf("\nF16 Matmul Failed (%d): [dims: %ld,%ld,%ld,%ld] You may be out of VRAM. Please check if you have enough.\n",static_cast<int>(status),ne00,ne01,ne10,ne11);
|
||||||
GGML_V2_ASSERT(false);
|
GGML_V2_ASSERT(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -780,7 +780,7 @@ static void ggml_v2_cl_mul_mat_q_f32(const ggml_v2_tensor * src0, const ggml_v2_
|
||||||
&queue, &ev_sgemm);
|
&queue, &ev_sgemm);
|
||||||
|
|
||||||
if (status != clblast::StatusCode::kSuccess) {
|
if (status != clblast::StatusCode::kSuccess) {
|
||||||
printf("\nQF32 Matmul Failed (%d): [dims: %lld,%lld,%lld,%lld] You may be out of VRAM. Please check if you have enough.\n",status,ne00,ne01,ne10,ne11);
|
printf("\nQF32 Matmul Failed (%d): [dims: %ld,%ld,%ld,%ld] You may be out of VRAM. Please check if you have enough.\n",static_cast<int>(status),ne00,ne01,ne10,ne11);
|
||||||
GGML_V2_ASSERT(false);
|
GGML_V2_ASSERT(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,3 @@
|
||||||
// Defines CLOCK_MONOTONIC on Linux
|
|
||||||
#define _GNU_SOURCE
|
|
||||||
|
|
||||||
#include "ggml_v2.h"
|
#include "ggml_v2.h"
|
||||||
|
|
||||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||||
|
|
|
@ -150,7 +150,7 @@ ModelLoadResult gpt2_v2_model_load(const std::string & fname, gpt2_v2_model & mo
|
||||||
params.mem_size = ctx_size;
|
params.mem_size = ctx_size;
|
||||||
params.mem_buffer = NULL;
|
params.mem_buffer = NULL;
|
||||||
params.no_alloc = false;
|
params.no_alloc = false;
|
||||||
|
|
||||||
|
|
||||||
model.ctx = ggml_v2_init(params);
|
model.ctx = ggml_v2_init(params);
|
||||||
if (!model.ctx) {
|
if (!model.ctx) {
|
||||||
|
@ -237,7 +237,7 @@ ModelLoadResult gpt2_v2_model_load(const std::string & fname, gpt2_v2_model & mo
|
||||||
|
|
||||||
const int n_mem = n_layer*n_ctx;
|
const int n_mem = n_layer*n_ctx;
|
||||||
const int n_elements = n_embd*n_mem;
|
const int n_elements = n_embd*n_mem;
|
||||||
|
|
||||||
model.memory_k = ggml_v2_new_tensor_1d(ctx, memory_type, n_elements*1.5);
|
model.memory_k = ggml_v2_new_tensor_1d(ctx, memory_type, n_elements*1.5);
|
||||||
model.memory_v = ggml_v2_new_tensor_1d(ctx, memory_type, n_elements*1.5);
|
model.memory_v = ggml_v2_new_tensor_1d(ctx, memory_type, n_elements*1.5);
|
||||||
|
|
||||||
|
@ -287,7 +287,7 @@ ModelLoadResult gpt2_v2_model_load(const std::string & fname, gpt2_v2_model & mo
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) {
|
if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) {
|
||||||
fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%lld, %lld], expected [%lld, %lld]\n",
|
fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%ld, %ld], expected [%d, %d]\n",
|
||||||
__func__, name.data(), tensor->ne[0], tensor->ne[1], ne[0], ne[1]);
|
__func__, name.data(), tensor->ne[0], tensor->ne[1], ne[0], ne[1]);
|
||||||
return ModelLoadResult::FAIL;
|
return ModelLoadResult::FAIL;
|
||||||
}
|
}
|
||||||
|
@ -379,7 +379,7 @@ bool gpt2_v2_eval(
|
||||||
params.mem_size = buf_size;
|
params.mem_size = buf_size;
|
||||||
params.mem_buffer = buf;
|
params.mem_buffer = buf;
|
||||||
params.no_alloc = false;
|
params.no_alloc = false;
|
||||||
|
|
||||||
|
|
||||||
struct ggml_v2_context * ctx0 = ggml_v2_init(params);
|
struct ggml_v2_context * ctx0 = ggml_v2_init(params);
|
||||||
struct ggml_v2_cgraph gf = {};
|
struct ggml_v2_cgraph gf = {};
|
||||||
|
|
|
@ -150,7 +150,7 @@ ModelLoadResult gptj_v2_model_load(const std::string & fname, gptj_v2_model & mo
|
||||||
params.mem_size = ctx_size;
|
params.mem_size = ctx_size;
|
||||||
params.mem_buffer = NULL;
|
params.mem_buffer = NULL;
|
||||||
params.no_alloc = false;
|
params.no_alloc = false;
|
||||||
|
|
||||||
|
|
||||||
model.ctx = ggml_v2_init(params);
|
model.ctx = ggml_v2_init(params);
|
||||||
if (!model.ctx) {
|
if (!model.ctx) {
|
||||||
|
@ -281,7 +281,7 @@ ModelLoadResult gptj_v2_model_load(const std::string & fname, gptj_v2_model & mo
|
||||||
fprintf(stderr, "%s: tensor '%s' has wrong size in model file\n", __func__, name.data());
|
fprintf(stderr, "%s: tensor '%s' has wrong size in model file\n", __func__, name.data());
|
||||||
return ModelLoadResult::FAIL;
|
return ModelLoadResult::FAIL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) {
|
if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) {
|
||||||
|
|
||||||
|
@ -294,11 +294,11 @@ ModelLoadResult gptj_v2_model_load(const std::string & fname, gptj_v2_model & mo
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d], expected [%d, %d]\n",
|
fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%ld, %ld], expected [%d, %d]\n",
|
||||||
__func__, name.data(), tensor->ne[0], tensor->ne[1], ne[0], ne[1]);
|
__func__, name.data(), tensor->ne[0], tensor->ne[1], ne[0], ne[1]);
|
||||||
return ModelLoadResult::FAIL;
|
return ModelLoadResult::FAIL;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// for debugging
|
// for debugging
|
||||||
|
@ -387,7 +387,7 @@ bool gptj_v2_eval(
|
||||||
params.mem_size = buf_size;
|
params.mem_size = buf_size;
|
||||||
params.mem_buffer = buf;
|
params.mem_buffer = buf;
|
||||||
params.no_alloc = false;
|
params.no_alloc = false;
|
||||||
|
|
||||||
|
|
||||||
struct ggml_v2_context * ctx0 = ggml_v2_init(params);
|
struct ggml_v2_context * ctx0 = ggml_v2_init(params);
|
||||||
struct ggml_v2_cgraph gf = {};
|
struct ggml_v2_cgraph gf = {};
|
||||||
|
|
|
@ -304,7 +304,7 @@ ModelLoadResult gptj_model_load(const std::string & fname, gptj_model & model, g
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d], expected [%d, %d]\n",
|
fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%ld, %ld], expected [%d, %d]\n",
|
||||||
__func__, name.data(), tensor->ne[0], tensor->ne[1], ne[0], ne[1]);
|
__func__, name.data(), tensor->ne[0], tensor->ne[1], ne[0], ne[1]);
|
||||||
return ModelLoadResult::FAIL;
|
return ModelLoadResult::FAIL;
|
||||||
}
|
}
|
||||||
|
|
|
@ -243,10 +243,9 @@ extern "C" {
|
||||||
// Various functions for loading a ggml llama model.
|
// Various functions for loading a ggml llama model.
|
||||||
// Allocate (almost) all memory needed for the model.
|
// Allocate (almost) all memory needed for the model.
|
||||||
// Return NULL on failure
|
// Return NULL on failure
|
||||||
LLAMA_V3_API DEPRECATED(struct llama_v3_context * llama_v3_init_from_file(
|
LLAMA_V3_API struct llama_v3_context * llama_v3_init_from_file(
|
||||||
const char * path_model,
|
const char * path_model,
|
||||||
struct llama_v3_context_params params),
|
struct llama_v3_context_params params);
|
||||||
"please use llama_v3_load_model_from_file combined with llama_v3_new_context_with_model instead");
|
|
||||||
|
|
||||||
// Frees all allocated memory
|
// Frees all allocated memory
|
||||||
LLAMA_V3_API void llama_v3_free(struct llama_v3_context * ctx);
|
LLAMA_V3_API void llama_v3_free(struct llama_v3_context * ctx);
|
||||||
|
@ -263,12 +262,11 @@ extern "C" {
|
||||||
// The model needs to be reloaded before applying a new adapter, otherwise the adapter
|
// The model needs to be reloaded before applying a new adapter, otherwise the adapter
|
||||||
// will be applied on top of the previous one
|
// will be applied on top of the previous one
|
||||||
// Returns 0 on success
|
// Returns 0 on success
|
||||||
LLAMA_V3_API DEPRECATED(int llama_v3_apply_lora_from_file(
|
LLAMA_V3_API int llama_v3_apply_lora_from_file(
|
||||||
struct llama_v3_context * ctx,
|
struct llama_v3_context * ctx,
|
||||||
const char * path_lora,
|
const char * path_lora,
|
||||||
const char * path_base_model,
|
const char * path_base_model,
|
||||||
int n_threads),
|
int n_threads);
|
||||||
"please use llama_v3_model_apply_lora_from_file instead");
|
|
||||||
|
|
||||||
LLAMA_V3_API int llama_v3_model_apply_lora_from_file(
|
LLAMA_V3_API int llama_v3_model_apply_lora_from_file(
|
||||||
const struct llama_v3_model * model,
|
const struct llama_v3_model * model,
|
||||||
|
|
|
@ -367,8 +367,8 @@ struct rwkv_v2_context * rwkv_v2_init_from_file(const char * file_path, uint32_t
|
||||||
// Verify order of dimensions
|
// Verify order of dimensions
|
||||||
struct ggml_v2_tensor * emb = model->emb;
|
struct ggml_v2_tensor * emb = model->emb;
|
||||||
RWKV_V2_ASSERT_NULL(emb->n_dims == 2, "Unexpected dimension count of embedding matrix %d", emb->n_dims);
|
RWKV_V2_ASSERT_NULL(emb->n_dims == 2, "Unexpected dimension count of embedding matrix %d", emb->n_dims);
|
||||||
RWKV_V2_ASSERT_NULL(emb->ne[0] == model->n_embed, "Unexpected dimension of embedding matrix %lld", emb->ne[0]);
|
RWKV_V2_ASSERT_NULL(emb->ne[0] == model->n_embed, "Unexpected dimension of embedding matrix %ld", emb->ne[0]);
|
||||||
RWKV_V2_ASSERT_NULL(emb->ne[1] == model->n_vocab, "Unexpected dimension of embedding matrix %lld", emb->ne[1]);
|
RWKV_V2_ASSERT_NULL(emb->ne[1] == model->n_vocab, "Unexpected dimension of embedding matrix %ld", emb->ne[1]);
|
||||||
|
|
||||||
int32_t n_embed = model->n_embed;
|
int32_t n_embed = model->n_embed;
|
||||||
int32_t n_layer = model->n_layer;
|
int32_t n_layer = model->n_layer;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue