convert : read/write n_head_kv
This commit is contained in:
parent
e9acbce624
commit
92711138f9
2 changed files with 4 additions and 3 deletions
|
@ -1787,6 +1787,7 @@ class Phi3MiniModel(Model):
|
||||||
|
|
||||||
n_embd = self.find_hparam(["hidden_size", "n_embd"])
|
n_embd = self.find_hparam(["hidden_size", "n_embd"])
|
||||||
n_head = self.find_hparam(["num_attention_heads", "n_head"])
|
n_head = self.find_hparam(["num_attention_heads", "n_head"])
|
||||||
|
n_head_kv = self.find_hparam(["num_key_value_heads", "n_head_kv"])
|
||||||
rms_eps = self.find_hparam(["rms_norm_eps"])
|
rms_eps = self.find_hparam(["rms_norm_eps"])
|
||||||
max_pos_embds = self.find_hparam(["n_positions", "max_position_embeddings"])
|
max_pos_embds = self.find_hparam(["n_positions", "max_position_embeddings"])
|
||||||
orig_max_pos_embds = self.find_hparam(["original_max_position_embeddings"])
|
orig_max_pos_embds = self.find_hparam(["original_max_position_embeddings"])
|
||||||
|
@ -1799,7 +1800,7 @@ class Phi3MiniModel(Model):
|
||||||
self.gguf_writer.add_feed_forward_length(self.find_hparam(["intermediate_size"]))
|
self.gguf_writer.add_feed_forward_length(self.find_hparam(["intermediate_size"]))
|
||||||
self.gguf_writer.add_block_count(block_count)
|
self.gguf_writer.add_block_count(block_count)
|
||||||
self.gguf_writer.add_head_count(n_head)
|
self.gguf_writer.add_head_count(n_head)
|
||||||
self.gguf_writer.add_head_count_kv(n_head)
|
self.gguf_writer.add_head_count_kv(n_head_kv)
|
||||||
self.gguf_writer.add_layer_norm_rms_eps(rms_eps)
|
self.gguf_writer.add_layer_norm_rms_eps(rms_eps)
|
||||||
self.gguf_writer.add_rope_dimension_count(rope_dims)
|
self.gguf_writer.add_rope_dimension_count(rope_dims)
|
||||||
self.gguf_writer.add_rope_freq_base(self.find_hparam(["rope_theta"]))
|
self.gguf_writer.add_rope_freq_base(self.find_hparam(["rope_theta"]))
|
||||||
|
|
|
@ -5652,12 +5652,12 @@ static bool llm_load_tensors(
|
||||||
ggml_context* ctx_layer = ctx_for_layer(i);
|
ggml_context* ctx_layer = ctx_for_layer(i);
|
||||||
ggml_context* ctx_split = ctx_for_layer_split(i);
|
ggml_context* ctx_split = ctx_for_layer_split(i);
|
||||||
|
|
||||||
auto& layer = model.layers[i];
|
auto & layer = model.layers[i];
|
||||||
|
|
||||||
layer.attn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM, "weight", i), { n_embd });
|
layer.attn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM, "weight", i), { n_embd });
|
||||||
|
|
||||||
layer.wqkv = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_QKV, "weight", i), { n_embd, n_embd + 2 * n_embd_gqa }, false);
|
layer.wqkv = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_QKV, "weight", i), { n_embd, n_embd + 2 * n_embd_gqa }, false);
|
||||||
layer.wo = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_OUT, "weight", i), { n_embd, n_embd });
|
layer.wo = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_OUT, "weight", i), { n_embd, n_embd });
|
||||||
|
|
||||||
layer.ffn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_NORM, "weight", i), { n_embd });
|
layer.ffn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_NORM, "weight", i), { n_embd });
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue