update format

2023-09-04 18:12:25 +08:00 · 2023-09-04 18:12:25 +08:00 · d1940a3646
commit d1940a3646
parent bd72ba0445
2 changed files with 7 additions and 27 deletions
--- a/convert-baichuan-hf-to-gguf.py
+++ b/convert-baichuan-hf-to-gguf.py
@ -120,6 +120,8 @@ if "max_sequence_length" in hparams:
    ctx_length = hparams["max_sequence_length"]
 elif "max_position_embeddings" in hparams:
    ctx_length = hparams["max_position_embeddings"]
+elif "model_max_length" in hparams:
+    ctx_length = hparams["model_max_length"]
 else:
    print("gguf: can not find ctx length parameter.")

@ -231,12 +233,7 @@ for part_name in part_names:

    tmp=model_part
    for i in itertools.count():
-        if f"model.layers.{i}.self_attn.q_proj.weight" in model_part:
-            print(f"Permuting layer {i}")
-            tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = reverse_hf_permute(model_part[f"model.layers.{i}.self_attn.q_proj.weight"], head_count, head_count)
-            tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = reverse_hf_permute(model_part[f"model.layers.{i}.self_attn.k_proj.weight"], head_count, head_count_kv)
-           #tmp[f"model.layers.{i}.self_attn.v_proj.weight"] =              model[f"model.layers.{i}.self_attn.v_proj.weight"]
-        elif f"model.layers.{i}.self_attn.W_pack.weight" in model_part:
+        if f"model.layers.{i}.self_attn.W_pack.weight" in model_part:
            print(f"Unpacking and permuting layer {i}")
            tmp[f"model.layers.{i}.self_attn.q_proj.weight"]=reverse_hf_permute_part(model_part[f"model.layers.{i}.self_attn.W_pack.weight"],0,head_count,head_count)
            tmp[f"model.layers.{i}.self_attn.k_proj.weight"]=reverse_hf_permute_part(model_part[f"model.layers.{i}.self_attn.W_pack.weight"],1,head_count,head_count_kv)
@ -259,14 +256,6 @@ for part_name in part_names:

        data = data.squeeze().numpy()

-        # reverse permute these
-        # if name.endswith(".q_proj.weight"):
-        #     data = reverse_hf_permute(data, head_count)
-        # if name.endswith(".k_proj.weight"):
-        #     data = reverse_hf_permute(data, head_count, head_count_kv)
-        
-
-
        # map tensor names
        new_name = tensor_map.get_name(name, try_suffixes = (".weight", ".bias"))
        if new_name is None:
@ -289,8 +278,6 @@ for part_name in part_names:
            data = data.astype(np.float16)

        print(name + " -> " +  new_name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype))
-      
-
        gguf_writer.add_tensor(new_name, data)


--- a/llama.cpp
+++ b/llama.cpp
@ -1948,7 +1948,6 @@ static void llm_load_tensors(
        const int64_t n_vocab    = hparams.n_vocab;

        const auto tn = LLM_TN(model.arch);
-        
        switch (model.arch) {
            case LLM_ARCH_LLAMA:
                {
@ -2777,13 +2776,11 @@ static struct ggml_cgraph * llm_build_baichaun(

            struct ggml_tensor * Kcur;
            struct ggml_tensor * Qcur;
-            switch (model.type)
-            {
+            switch (model.type) {
                case MODEL_7B:
                    Kcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, tmpk, n_embd_head, n_head_kv, N), n_past, n_embd_head, 0, 0, freq_base, freq_scale);
-                    Qcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, tmpq, n_embd_head, n_head, N),    n_past, n_embd_head, 0, 0, freq_base, freq_scale);  
+                    Qcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, tmpq, n_embd_head, n_head, N),    n_past, n_embd_head, 0, 0, freq_base, freq_scale); 
                    break;
-                
                case MODEL_13B:
                    Kcur  = ggml_reshape_3d(ctx0, tmpk, n_embd/n_head, n_head, N);
                    Qcur = ggml_reshape_3d(ctx0, tmpq, n_embd/n_head, n_head, N);
@ -2797,8 +2794,6 @@ static struct ggml_cgraph * llm_build_baichaun(

            offload_func_kq(Qcur);
            ggml_set_name(Qcur, "Qcur");
-            
-

            // store key and value to memory
            {
@ -2853,13 +2848,11 @@ static struct ggml_cgraph * llm_build_baichaun(

            struct ggml_tensor * KQ_masked;
            struct ggml_tensor * KQ_scaled_alibi;
-            // if model.type == MODEL_13B,here add kq_scaled_alibi 
-            switch (model.type)
-            {
+
+            switch (model.type) {
                case MODEL_7B:
                    KQ_masked = ggml_diag_mask_inf_inplace(ctx0, KQ_scaled, n_past);
                    break;
-                
                case MODEL_13B:
                    KQ_scaled_alibi =ggml_alibi(ctx0, KQ_scaled, n_past, n_head, 8);
                    ggml_set_name(KQ_scaled_alibi, "KQ_scaled_alibi");