trivial cleanups
This commit is contained in:
parent
7473773c0b
commit
d904aff040
3 changed files with 12 additions and 67 deletions
58
ggml.c
58
ggml.c
|
@ -4345,50 +4345,6 @@ void ggml_print_objects(const struct ggml_context * ctx) {
|
|||
GGML_PRINT("%s: --- end ---\n", __func__);
|
||||
}
|
||||
|
||||
static void ggml_print_tensor(const struct ggml_tensor * tensor) {
|
||||
GGML_PRINT("Tensor (null): %s | rank %d | shape (", ggml_type_name(tensor->type), tensor->n_dims);
|
||||
for (int i=0; i<tensor->n_dims; ++i) {
|
||||
GGML_PRINT("%lld ", tensor->ne[i]);
|
||||
}
|
||||
GGML_PRINT(") | strides (");
|
||||
for (int i=0; i<tensor->n_dims; ++i) {
|
||||
GGML_PRINT("%lld ", tensor->nb[i]);
|
||||
}
|
||||
GGML_PRINT(")\n");
|
||||
}
|
||||
|
||||
static void ggml_print_tensor_values(const struct ggml_tensor * tensor, int starts[], int dim, int nelts) {
|
||||
GGML_ASSERT(tensor->type == GGML_TYPE_F32);
|
||||
GGML_PRINT("Printing values for tensor %s[", tensor->name);
|
||||
for (int i=0; i<tensor->n_dims; ++i) {
|
||||
GGML_ASSERT(starts[i] >= 0);
|
||||
if (i == dim) {
|
||||
if (starts[i] > 0) {
|
||||
GGML_PRINT("%d:%d", starts[i], starts[i]+nelts);
|
||||
} else {
|
||||
GGML_PRINT(":%d", starts[i]+nelts);
|
||||
}
|
||||
} else {
|
||||
GGML_PRINT("%d", starts[i]);
|
||||
}
|
||||
if (i<tensor->n_dims-1) {
|
||||
GGML_PRINT(",");
|
||||
}
|
||||
}
|
||||
GGML_PRINT("]\n");
|
||||
float *data_ptr = (float *) tensor->data;
|
||||
int offset = 0;
|
||||
for (int j = 0; j < tensor->n_dims; j++) {
|
||||
offset += (starts[j] * tensor->nb[j]) / ggml_type_size(GGML_TYPE_F32);
|
||||
}
|
||||
data_ptr += offset;
|
||||
for (int i = 0; i < nelts; i++) {
|
||||
GGML_PRINT("%f ", *data_ptr);
|
||||
data_ptr += tensor->nb[dim] / ggml_type_size(GGML_TYPE_F32);
|
||||
}
|
||||
GGML_PRINT("\n");
|
||||
}
|
||||
|
||||
int64_t ggml_nelements(const struct ggml_tensor * tensor) {
|
||||
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
||||
|
||||
|
@ -6442,7 +6398,6 @@ struct ggml_tensor * ggml_mul_mat(
|
|||
|
||||
const int64_t ne[4] = { a->ne[1], b->ne[1], b->ne[2], b->ne[3] };
|
||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, MAX(a->n_dims, b->n_dims), ne);
|
||||
//GGML_PRINT("ggml_mul_mat result shape : (%lld, %lld, %lld, %lld)\n", ne[0], ne[1], ne[2], ne[3]);
|
||||
|
||||
result->op = GGML_OP_MUL_MAT;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
|
@ -11205,7 +11160,6 @@ static void ggml_compute_forward_norm_f32(
|
|||
}
|
||||
|
||||
GGML_ASSERT(src0->nb[0] == sizeof(float));
|
||||
// If the name starts with "layer_inputs", and we are on thread 0, print the tensor
|
||||
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
|
@ -12322,16 +12276,8 @@ static void ggml_compute_forward_view(
|
|||
const struct ggml_compute_params * params,
|
||||
const struct ggml_tensor * src0) {
|
||||
// NOP
|
||||
if (strncmp(src0->name, "cache_k", 7) == 0 && params->ith == 0) {
|
||||
/*
|
||||
GGML_PRINT("\noutputs of cache_k for view%s\n", src0->name);
|
||||
ggml_print_tensor(src0);
|
||||
int starts[] = {4096 * };
|
||||
ggml_print_tensor_values(src0, starts, 0, 10);
|
||||
*/
|
||||
}
|
||||
//UNUSED(params);
|
||||
//UNUSED(src0);
|
||||
UNUSED(params);
|
||||
UNUSED(src0);
|
||||
}
|
||||
|
||||
// ggml_compute_forward_permute
|
||||
|
|
|
@ -234,7 +234,7 @@ class TensorNameMap:
|
|||
"transformer.word_embeddings", # falcon
|
||||
"model.embed_tokens", # llama-hf
|
||||
"tok_embeddings", # llama-pth
|
||||
"language_model.embedding.word_embeddings", # adept
|
||||
"language_model.embedding.word_embeddings", # persimmon
|
||||
),
|
||||
|
||||
# Position embeddings
|
||||
|
@ -247,7 +247,7 @@ class TensorNameMap:
|
|||
"embed_out", # gptneox
|
||||
"lm_head", # gpt2 mpt falcon llama-hf baichuan
|
||||
"output", # llama-pth
|
||||
"word_embeddings_for_head", # adept
|
||||
"word_embeddings_for_head", # persimmon
|
||||
),
|
||||
|
||||
# Output norm
|
||||
|
@ -256,7 +256,7 @@ class TensorNameMap:
|
|||
"transformer.ln_f", # gpt2 falcon
|
||||
"model.norm", # llama-hf baichuan
|
||||
"norm", # llama-pth
|
||||
"language_model.encoder.final_layernorm", # adept
|
||||
"language_model.encoder.final_layernorm", # persimmon
|
||||
),
|
||||
|
||||
# Rope frequencies
|
||||
|
@ -275,7 +275,7 @@ class TensorNameMap:
|
|||
"transformer.h.{bid}.ln_mlp", # falcon40b
|
||||
"model.layers.{bid}.input_layernorm", # llama-hf
|
||||
"layers.{bid}.attention_norm", # llama-pth
|
||||
"language_model.encoder.layers.{bid}.input_layernorm", # adept
|
||||
"language_model.encoder.layers.{bid}.input_layernorm", # persimmon
|
||||
),
|
||||
|
||||
# Attention norm 2
|
||||
|
@ -289,7 +289,7 @@ class TensorNameMap:
|
|||
"transformer.h.{bid}.attn.c_attn", # gpt2
|
||||
"transformer.blocks.{bid}.attn.Wqkv", # mpt
|
||||
"transformer.h.{bid}.self_attention.query_key_value", # falcon
|
||||
"language_model.encoder.layers.{bid}.self_attention.query_key_value", # adept
|
||||
"language_model.encoder.layers.{bid}.self_attention.query_key_value", # persimmon
|
||||
),
|
||||
|
||||
# Attention query
|
||||
|
@ -318,7 +318,7 @@ class TensorNameMap:
|
|||
"transformer.h.{bid}.self_attention.dense", # falcon
|
||||
"model.layers.{bid}.self_attn.o_proj", # llama-hf
|
||||
"layers.{bid}.attention.wo", # llama-pth
|
||||
"language_model.encoder.layers.{bid}.self_attention.dense" # adept
|
||||
"language_model.encoder.layers.{bid}.self_attention.dense" # persimmon
|
||||
),
|
||||
|
||||
# Rotary embeddings
|
||||
|
@ -334,7 +334,7 @@ class TensorNameMap:
|
|||
"transformer.blocks.{bid}.norm_2", # mpt
|
||||
"model.layers.{bid}.post_attention_layernorm", # llama-hf
|
||||
"layers.{bid}.ffn_norm", # llama-pth
|
||||
"language_model.encoder.layers.{bid}.post_attention_layernorm", # adept
|
||||
"language_model.encoder.layers.{bid}.post_attention_layernorm", # persimmon
|
||||
),
|
||||
|
||||
# Feed-forward up
|
||||
|
@ -345,7 +345,7 @@ class TensorNameMap:
|
|||
"transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
|
||||
"model.layers.{bid}.mlp.up_proj", # llama-hf
|
||||
"layers.{bid}.feed_forward.w3", # llama-pth
|
||||
"language_model.encoder.layers.{bid}.mlp.dense_h_to_4h", # adept
|
||||
"language_model.encoder.layers.{bid}.mlp.dense_h_to_4h", # persimmon
|
||||
),
|
||||
|
||||
# Feed-forward gate
|
||||
|
@ -362,7 +362,7 @@ class TensorNameMap:
|
|||
"transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
|
||||
"model.layers.{bid}.mlp.down_proj", # llama-hf
|
||||
"layers.{bid}.feed_forward.w2", # llama-pth
|
||||
"language_model.encoder.layers.{bid}.mlp.dense_4h_to_h", # adept
|
||||
"language_model.encoder.layers.{bid}.mlp.dense_4h_to_h", # persimmon
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ATTN_Q_NORM: (
|
||||
|
@ -374,7 +374,7 @@ class TensorNameMap:
|
|||
),
|
||||
|
||||
MODEL_TENSOR.ROPE_FREQS: (
|
||||
"language_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq", # adept
|
||||
"language_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq", # persimmon
|
||||
)
|
||||
}
|
||||
|
||||
|
|
|
@ -7088,7 +7088,6 @@ struct llama_context * llama_new_context_with_model(
|
|||
llama_free(ctx);
|
||||
return nullptr;
|
||||
}
|
||||
LLAMA_LOG_INFO("Kv self cache: %7.2f MB\n", ggml_nbytes(ctx->kv_self.k) / 1024.0 / 1024.0);
|
||||
|
||||
{
|
||||
const size_t memory_size = ggml_nbytes(ctx->kv_self.k) + ggml_nbytes(ctx->kv_self.v);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue