update format
This commit is contained in:
parent
bd72ba0445
commit
d1940a3646
2 changed files with 7 additions and 27 deletions
|
@ -120,6 +120,8 @@ if "max_sequence_length" in hparams:
|
|||
ctx_length = hparams["max_sequence_length"]
|
||||
elif "max_position_embeddings" in hparams:
|
||||
ctx_length = hparams["max_position_embeddings"]
|
||||
elif "model_max_length" in hparams:
|
||||
ctx_length = hparams["model_max_length"]
|
||||
else:
|
||||
print("gguf: can not find ctx length parameter.")
|
||||
|
||||
|
@ -231,12 +233,7 @@ for part_name in part_names:
|
|||
|
||||
tmp=model_part
|
||||
for i in itertools.count():
|
||||
if f"model.layers.{i}.self_attn.q_proj.weight" in model_part:
|
||||
print(f"Permuting layer {i}")
|
||||
tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = reverse_hf_permute(model_part[f"model.layers.{i}.self_attn.q_proj.weight"], head_count, head_count)
|
||||
tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = reverse_hf_permute(model_part[f"model.layers.{i}.self_attn.k_proj.weight"], head_count, head_count_kv)
|
||||
#tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"]
|
||||
elif f"model.layers.{i}.self_attn.W_pack.weight" in model_part:
|
||||
if f"model.layers.{i}.self_attn.W_pack.weight" in model_part:
|
||||
print(f"Unpacking and permuting layer {i}")
|
||||
tmp[f"model.layers.{i}.self_attn.q_proj.weight"]=reverse_hf_permute_part(model_part[f"model.layers.{i}.self_attn.W_pack.weight"],0,head_count,head_count)
|
||||
tmp[f"model.layers.{i}.self_attn.k_proj.weight"]=reverse_hf_permute_part(model_part[f"model.layers.{i}.self_attn.W_pack.weight"],1,head_count,head_count_kv)
|
||||
|
@ -259,14 +256,6 @@ for part_name in part_names:
|
|||
|
||||
data = data.squeeze().numpy()
|
||||
|
||||
# reverse permute these
|
||||
# if name.endswith(".q_proj.weight"):
|
||||
# data = reverse_hf_permute(data, head_count)
|
||||
# if name.endswith(".k_proj.weight"):
|
||||
# data = reverse_hf_permute(data, head_count, head_count_kv)
|
||||
|
||||
|
||||
|
||||
# map tensor names
|
||||
new_name = tensor_map.get_name(name, try_suffixes = (".weight", ".bias"))
|
||||
if new_name is None:
|
||||
|
@ -289,8 +278,6 @@ for part_name in part_names:
|
|||
data = data.astype(np.float16)
|
||||
|
||||
print(name + " -> " + new_name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype))
|
||||
|
||||
|
||||
gguf_writer.add_tensor(new_name, data)
|
||||
|
||||
|
||||
|
|
15
llama.cpp
15
llama.cpp
|
@ -1948,7 +1948,6 @@ static void llm_load_tensors(
|
|||
const int64_t n_vocab = hparams.n_vocab;
|
||||
|
||||
const auto tn = LLM_TN(model.arch);
|
||||
|
||||
switch (model.arch) {
|
||||
case LLM_ARCH_LLAMA:
|
||||
{
|
||||
|
@ -2777,13 +2776,11 @@ static struct ggml_cgraph * llm_build_baichaun(
|
|||
|
||||
struct ggml_tensor * Kcur;
|
||||
struct ggml_tensor * Qcur;
|
||||
switch (model.type)
|
||||
{
|
||||
switch (model.type) {
|
||||
case MODEL_7B:
|
||||
Kcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, tmpk, n_embd_head, n_head_kv, N), n_past, n_embd_head, 0, 0, freq_base, freq_scale);
|
||||
Qcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, tmpq, n_embd_head, n_head, N), n_past, n_embd_head, 0, 0, freq_base, freq_scale);
|
||||
Qcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, tmpq, n_embd_head, n_head, N), n_past, n_embd_head, 0, 0, freq_base, freq_scale);
|
||||
break;
|
||||
|
||||
case MODEL_13B:
|
||||
Kcur = ggml_reshape_3d(ctx0, tmpk, n_embd/n_head, n_head, N);
|
||||
Qcur = ggml_reshape_3d(ctx0, tmpq, n_embd/n_head, n_head, N);
|
||||
|
@ -2797,8 +2794,6 @@ static struct ggml_cgraph * llm_build_baichaun(
|
|||
|
||||
offload_func_kq(Qcur);
|
||||
ggml_set_name(Qcur, "Qcur");
|
||||
|
||||
|
||||
|
||||
// store key and value to memory
|
||||
{
|
||||
|
@ -2853,13 +2848,11 @@ static struct ggml_cgraph * llm_build_baichaun(
|
|||
|
||||
struct ggml_tensor * KQ_masked;
|
||||
struct ggml_tensor * KQ_scaled_alibi;
|
||||
// if model.type == MODEL_13B,here add kq_scaled_alibi
|
||||
switch (model.type)
|
||||
{
|
||||
|
||||
switch (model.type) {
|
||||
case MODEL_7B:
|
||||
KQ_masked = ggml_diag_mask_inf_inplace(ctx0, KQ_scaled, n_past);
|
||||
break;
|
||||
|
||||
case MODEL_13B:
|
||||
KQ_scaled_alibi =ggml_alibi(ctx0, KQ_scaled, n_past, n_head, 8);
|
||||
ggml_set_name(KQ_scaled_alibi, "KQ_scaled_alibi");
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue