From a02a190756aa8e1a1f3db4a2e1d45ab0d3caad67 Mon Sep 17 00:00:00 2001 From: HimariO Date: Fri, 13 Dec 2024 21:31:51 +0800 Subject: [PATCH] minor updates --- README.md | 1 + examples/llava/qwen2_vl_surgery.py | 9 +++++++-- ggml/src/ggml-cuda/rope.cu | 1 - ggml/src/ggml.c | 2 +- src/llama.cpp | 11 ++++++----- 5 files changed, 15 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 6fdd8d9ee..ddb932ffc 100644 --- a/README.md +++ b/README.md @@ -110,6 +110,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo - [x] [Mini CPM](https://huggingface.co/models?search=MiniCPM) - [x] [Moondream](https://huggingface.co/vikhyatk/moondream2) - [x] [Bunny](https://github.com/BAAI-DCAI/Bunny) +- [x] [Qwen2-VL](https://huggingface.co/collections/Qwen/qwen2-vl-66cee7455501d7126940800d) diff --git a/examples/llava/qwen2_vl_surgery.py b/examples/llava/qwen2_vl_surgery.py index 56d933fde..464ab80d3 100644 --- a/examples/llava/qwen2_vl_surgery.py +++ b/examples/llava/qwen2_vl_surgery.py @@ -50,7 +50,6 @@ def find_vision_tensors(qwen2vl, dtype) -> Dict[str, np.ndarray]: tensor_map[to_gguf_name(f"vision_model.{name}").replace("qkv", "q")] = wq tensor_map[to_gguf_name(f"vision_model.{name}").replace("qkv", "k")] = wk tensor_map[to_gguf_name(f"vision_model.{name}").replace("qkv", "v")] = wv - # breakpoint() elif 'merger' in name: if name.endswith("ln_q.weight"): tensor_map['v.post_ln.weight'] = ten @@ -97,7 +96,12 @@ def main(args): cfg: Qwen2VLConfig = qwen2vl.config # type: ignore[reportAssignmentType] vcfg = cfg.vision_config - fname_out = "qwen2vl-vision.gguf" + if os.path.isdir(model_name): + if model_name.endswith(os.sep): + model_name = model_name[:-1] + model_name = os.path.basename(model_name) + fname_out = f"{model_name.replace('/', '-').lower()}-vision.gguf" + fout = GGUFWriter(path=fname_out, arch="clip") fout.add_description("image encoder for Qwen2VL") @@ -143,6 +147,7 @@ def main(args): fout.write_kv_data_to_file() fout.write_tensors_to_file() fout.close() + print("save model as: ", fname_out) if __name__ == "__main__": diff --git a/ggml/src/ggml-cuda/rope.cu b/ggml/src/ggml-cuda/rope.cu index fc3cabfb2..2c84778d2 100644 --- a/ggml/src/ggml-cuda/rope.cu +++ b/ggml/src/ggml-cuda/rope.cu @@ -397,7 +397,6 @@ void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { const int mode = ((int32_t *) dst->op_params)[2]; //const int n_ctx = ((int32_t *) dst->op_params)[3]; const int n_ctx_orig = ((int32_t *) dst->op_params)[4]; - // int sections[4]; mrope_sections sections; // RoPE alteration for extended context diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index aa4c952c9..51cc85662 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -3528,7 +3528,7 @@ static struct ggml_tensor * ggml_rope_impl( memcpy(params + 8, &attn_factor, sizeof(float)); memcpy(params + 9, &beta_fast, sizeof(float)); memcpy(params + 10, &beta_slow, sizeof(float)); - memcpy(params + 11, §ions, sizeof(int) * 4); + memcpy(params + 11, §ions, sizeof(int)*4); ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_ROPE; diff --git a/src/llama.cpp b/src/llama.cpp index 6349b8019..cb61d4802 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -2558,6 +2558,9 @@ struct llama_hparams { if (this->rope_finetuned != other.rope_finetuned) return true; if (this->n_ctx_orig_yarn != other.n_ctx_orig_yarn) return true; if (this->rope_sections != other.rope_sections) return true; + if (std::equal(std::begin(this->rope_sections), + std::end(this->rope_sections), + std::begin(other.rope_sections))) return true; if (this->ssm_d_conv != other.ssm_d_conv) return true; if (this->ssm_d_inner != other.ssm_d_inner) return true; @@ -3421,8 +3424,6 @@ struct llama_context { struct ggml_tensor * inp_tokens; // I32 [n_batch] struct ggml_tensor * inp_embd; // F32 [n_embd, n_batch] struct ggml_tensor * inp_pos; // I32 [n_batch] - struct ggml_tensor * inp_pos_w; // I32 [n_batch] second-dimension of m-rope position index - struct ggml_tensor * inp_pos_h; // I32 [n_batch] third-dimension of m-rope position index struct ggml_tensor * inp_out_ids; // I32 [n_outputs] struct ggml_tensor * inp_KQ_mask; // F32 [kv_size, n_batch] struct ggml_tensor * inp_KQ_mask_swa; // F32 [kv_size, n_batch] @@ -12606,7 +12607,6 @@ struct llm_build_context { inpL = llm_build_inp_embd(ctx0, lctx, hparams, ubatch, model.tok_embd, cb); // inp_pos - contains the positions - // struct ggml_tensor * inp_pos = build_inp_pos(); lctx.inp_pos = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_tokens * 4); cb(lctx.inp_pos, "inp_pos", -1); ggml_set_input(lctx.inp_pos); @@ -12646,14 +12646,15 @@ struct llm_build_context { Qcur = ggml_rope_multi( ctx0, - ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos, nullptr, + ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos, nullptr, n_rot, sections, rope_type, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow ); cb(Qcur, "Qcur", il); Kcur = ggml_rope_multi( - ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos, nullptr, + ctx0, + ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos, nullptr, n_rot, sections, rope_type, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow );