diff --git a/ggml.c b/ggml.c index 072078806..3dda7547f 100644 --- a/ggml.c +++ b/ggml.c @@ -4345,50 +4345,6 @@ void ggml_print_objects(const struct ggml_context * ctx) { GGML_PRINT("%s: --- end ---\n", __func__); } -static void ggml_print_tensor(const struct ggml_tensor * tensor) { - GGML_PRINT("Tensor (null): %s | rank %d | shape (", ggml_type_name(tensor->type), tensor->n_dims); - for (int i=0; in_dims; ++i) { - GGML_PRINT("%lld ", tensor->ne[i]); - } - GGML_PRINT(") | strides ("); - for (int i=0; in_dims; ++i) { - GGML_PRINT("%lld ", tensor->nb[i]); - } - GGML_PRINT(")\n"); -} - -static void ggml_print_tensor_values(const struct ggml_tensor * tensor, int starts[], int dim, int nelts) { - GGML_ASSERT(tensor->type == GGML_TYPE_F32); - GGML_PRINT("Printing values for tensor %s[", tensor->name); - for (int i=0; in_dims; ++i) { - GGML_ASSERT(starts[i] >= 0); - if (i == dim) { - if (starts[i] > 0) { - GGML_PRINT("%d:%d", starts[i], starts[i]+nelts); - } else { - GGML_PRINT(":%d", starts[i]+nelts); - } - } else { - GGML_PRINT("%d", starts[i]); - } - if (in_dims-1) { - GGML_PRINT(","); - } - } - GGML_PRINT("]\n"); - float *data_ptr = (float *) tensor->data; - int offset = 0; - for (int j = 0; j < tensor->n_dims; j++) { - offset += (starts[j] * tensor->nb[j]) / ggml_type_size(GGML_TYPE_F32); - } - data_ptr += offset; - for (int i = 0; i < nelts; i++) { - GGML_PRINT("%f ", *data_ptr); - data_ptr += tensor->nb[dim] / ggml_type_size(GGML_TYPE_F32); - } - GGML_PRINT("\n"); -} - int64_t ggml_nelements(const struct ggml_tensor * tensor) { static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); @@ -6442,7 +6398,6 @@ struct ggml_tensor * ggml_mul_mat( const int64_t ne[4] = { a->ne[1], b->ne[1], b->ne[2], b->ne[3] }; struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, MAX(a->n_dims, b->n_dims), ne); - //GGML_PRINT("ggml_mul_mat result shape : (%lld, %lld, %lld, %lld)\n", ne[0], ne[1], ne[2], ne[3]); result->op = GGML_OP_MUL_MAT; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; @@ -11205,7 +11160,6 @@ static void ggml_compute_forward_norm_f32( } GGML_ASSERT(src0->nb[0] == sizeof(float)); - // If the name starts with "layer_inputs", and we are on thread 0, print the tensor const int ith = params->ith; const int nth = params->nth; @@ -12322,16 +12276,8 @@ static void ggml_compute_forward_view( const struct ggml_compute_params * params, const struct ggml_tensor * src0) { // NOP - if (strncmp(src0->name, "cache_k", 7) == 0 && params->ith == 0) { - /* - GGML_PRINT("\noutputs of cache_k for view%s\n", src0->name); - ggml_print_tensor(src0); - int starts[] = {4096 * }; - ggml_print_tensor_values(src0, starts, 0, 10); - */ - } - //UNUSED(params); - //UNUSED(src0); + UNUSED(params); + UNUSED(src0); } // ggml_compute_forward_permute diff --git a/gguf-py/gguf/gguf.py b/gguf-py/gguf/gguf.py index 6bb139c9d..882b96bc6 100644 --- a/gguf-py/gguf/gguf.py +++ b/gguf-py/gguf/gguf.py @@ -234,7 +234,7 @@ class TensorNameMap: "transformer.word_embeddings", # falcon "model.embed_tokens", # llama-hf "tok_embeddings", # llama-pth - "language_model.embedding.word_embeddings", # adept + "language_model.embedding.word_embeddings", # persimmon ), # Position embeddings @@ -247,7 +247,7 @@ class TensorNameMap: "embed_out", # gptneox "lm_head", # gpt2 mpt falcon llama-hf baichuan "output", # llama-pth - "word_embeddings_for_head", # adept + "word_embeddings_for_head", # persimmon ), # Output norm @@ -256,7 +256,7 @@ class TensorNameMap: "transformer.ln_f", # gpt2 falcon "model.norm", # llama-hf baichuan "norm", # llama-pth - "language_model.encoder.final_layernorm", # adept + "language_model.encoder.final_layernorm", # persimmon ), # Rope frequencies @@ -275,7 +275,7 @@ class TensorNameMap: "transformer.h.{bid}.ln_mlp", # falcon40b "model.layers.{bid}.input_layernorm", # llama-hf "layers.{bid}.attention_norm", # llama-pth - "language_model.encoder.layers.{bid}.input_layernorm", # adept + "language_model.encoder.layers.{bid}.input_layernorm", # persimmon ), # Attention norm 2 @@ -289,7 +289,7 @@ class TensorNameMap: "transformer.h.{bid}.attn.c_attn", # gpt2 "transformer.blocks.{bid}.attn.Wqkv", # mpt "transformer.h.{bid}.self_attention.query_key_value", # falcon - "language_model.encoder.layers.{bid}.self_attention.query_key_value", # adept + "language_model.encoder.layers.{bid}.self_attention.query_key_value", # persimmon ), # Attention query @@ -318,7 +318,7 @@ class TensorNameMap: "transformer.h.{bid}.self_attention.dense", # falcon "model.layers.{bid}.self_attn.o_proj", # llama-hf "layers.{bid}.attention.wo", # llama-pth - "language_model.encoder.layers.{bid}.self_attention.dense" # adept + "language_model.encoder.layers.{bid}.self_attention.dense" # persimmon ), # Rotary embeddings @@ -334,7 +334,7 @@ class TensorNameMap: "transformer.blocks.{bid}.norm_2", # mpt "model.layers.{bid}.post_attention_layernorm", # llama-hf "layers.{bid}.ffn_norm", # llama-pth - "language_model.encoder.layers.{bid}.post_attention_layernorm", # adept + "language_model.encoder.layers.{bid}.post_attention_layernorm", # persimmon ), # Feed-forward up @@ -345,7 +345,7 @@ class TensorNameMap: "transformer.h.{bid}.mlp.dense_h_to_4h", # falcon "model.layers.{bid}.mlp.up_proj", # llama-hf "layers.{bid}.feed_forward.w3", # llama-pth - "language_model.encoder.layers.{bid}.mlp.dense_h_to_4h", # adept + "language_model.encoder.layers.{bid}.mlp.dense_h_to_4h", # persimmon ), # Feed-forward gate @@ -362,7 +362,7 @@ class TensorNameMap: "transformer.h.{bid}.mlp.dense_4h_to_h", # falcon "model.layers.{bid}.mlp.down_proj", # llama-hf "layers.{bid}.feed_forward.w2", # llama-pth - "language_model.encoder.layers.{bid}.mlp.dense_4h_to_h", # adept + "language_model.encoder.layers.{bid}.mlp.dense_4h_to_h", # persimmon ), MODEL_TENSOR.ATTN_Q_NORM: ( @@ -374,7 +374,7 @@ class TensorNameMap: ), MODEL_TENSOR.ROPE_FREQS: ( - "language_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq", # adept + "language_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq", # persimmon ) } diff --git a/llama.cpp b/llama.cpp index 20feae50d..cebb5b6ca 100644 --- a/llama.cpp +++ b/llama.cpp @@ -7088,7 +7088,6 @@ struct llama_context * llama_new_context_with_model( llama_free(ctx); return nullptr; } - LLAMA_LOG_INFO("Kv self cache: %7.2f MB\n", ggml_nbytes(ctx->kv_self.k) / 1024.0 / 1024.0); { const size_t memory_size = ggml_nbytes(ctx->kv_self.k) + ggml_nbytes(ctx->kv_self.v);