From 6a2a13b51cab7e650c4d3b7566f22cb894e878bc Mon Sep 17 00:00:00 2001 From: manikbhandari Date: Tue, 26 Dec 2023 07:28:22 -0500 Subject: [PATCH] adapt to recent changes --- README.md | 1 + llama.cpp | 20 +------------------- 2 files changed, 2 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 3b202a336..48dcd6464 100644 --- a/README.md +++ b/README.md @@ -103,6 +103,7 @@ as the main playground for developing new features for the [ggml](https://github - [x] [Qwen models](https://huggingface.co/models?search=Qwen/Qwen) - [x] [Mixtral MoE](https://huggingface.co/models?search=mistral-ai/Mixtral) - [x] [PLaMo-13B](https://github.com/ggerganov/llama.cpp/pull/3557) +- [x] [GPT-2](https://huggingface.co/gpt2) **Multimodal models:** diff --git a/llama.cpp b/llama.cpp index ced77dfe2..0af96109f 100644 --- a/llama.cpp +++ b/llama.cpp @@ -3753,14 +3753,6 @@ static bool llm_load_tensors( model.output_norm = ml.create_tensor(ctx, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, backend_norm); model.output_norm_b = ml.create_tensor(ctx, tn(LLM_TENSOR_OUTPUT_NORM, "bias"), {n_embd}, backend_norm); model.output = ml.create_tensor(ctx, tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, backend_output); - - if (backend_norm == GGML_BACKEND_GPU) { - vram_weights += ggml_nbytes(model.output_norm); - vram_weights += ggml_nbytes(model.output_norm_b); - } - if (backend_output == GGML_BACKEND_GPU_SPLIT) { - vram_weights += ggml_nbytes(model.output); - } } const uint32_t n_ff = hparams.n_ff; @@ -3792,16 +3784,6 @@ static bool llm_load_tensors( layer.ffn_up = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, backend_split); layer.ffn_up_b = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff}, backend); - - if (backend == GGML_BACKEND_GPU) { - vram_weights += - ggml_nbytes(layer.attn_norm) + ggml_nbytes(layer.attn_norm_b) + - ggml_nbytes(layer.wqkv) + ggml_nbytes(layer.bqkv) + - ggml_nbytes(layer.wo) + ggml_nbytes(layer.bo) + - ggml_nbytes(layer.ffn_norm) + ggml_nbytes(layer.ffn_norm_b) + - ggml_nbytes(layer.ffn_down) + ggml_nbytes(layer.ffn_down_b) + - ggml_nbytes(layer.ffn_up) + ggml_nbytes(layer.ffn_up_b); - } } } break; default: @@ -5890,7 +5872,7 @@ struct llm_build_context { cur = llm_build_kqv(ctx0, model, hparams, kv_self, model.layers[il].wo, model.layers[il].bo, - Qcur, KQ_scale, KQ_mask, n_ctx, n_tokens, n_kv, -1.0f, 1.0f/sqrtf(float(n_embd_head)), cb, il); + Qcur, KQ_mask, n_ctx, n_tokens, n_kv, -1.0f, 1.0f/sqrtf(float(n_embd_head)), cb, il); cb(cur, "kqv_out", il); }