From ac2089c3780cfcfbe8d3f91bd83537d84f33e6a2 Mon Sep 17 00:00:00 2001 From: HimariO Date: Sun, 8 Dec 2024 00:47:48 +0800 Subject: [PATCH] add mrope unit test, fix few compiler warnings --- examples/llava/clip.cpp | 1 - examples/llava/qwen2vl-cli.cpp | 21 ++++----- ggml/src/ggml.c | 1 - src/llama.cpp | 2 + tests/test-rope.cpp | 81 +++++++++++++++++++++++++--------- 5 files changed, 74 insertions(+), 32 deletions(-) diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index 050b04ce2..863d86ea4 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -2488,7 +2488,6 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima const int patch_size = hparams.patch_size; const int num_patches = ((image_size_width / patch_size) * (image_size_height / patch_size)); const int num_positions = num_patches + (ctx->has_class_embedding ? 1 : 0); - const int num_position_ids = ctx->has_qwen2vl_merger ? num_positions * 3 : num_positions; if(ctx->load_image_size==nullptr){ ctx->load_image_size= clip_image_size_init(); } diff --git a/examples/llava/qwen2vl-cli.cpp b/examples/llava/qwen2vl-cli.cpp index cb4ce7f0e..99394a980 100644 --- a/examples/llava/qwen2vl-cli.cpp +++ b/examples/llava/qwen2vl-cli.cpp @@ -24,7 +24,9 @@ static bool qwen2vl_eval_image_embed(llama_context * ctx_llama, const struct lla const int ph = image_size->height / patch_size + (image_size->height % patch_size > 0); const int pw = image_size->width / patch_size + (image_size->width % patch_size > 0); auto img_tokens = image_embed->n_image_pos; - llama_pos mrope_pos[img_tokens * 4]; + // llama_pos mrope_pos[img_tokens * 4]; + std::vector mrope_pos; + mrope_pos.resize(img_tokens * 4); for (int y = 0; y < ph; y++) { @@ -350,7 +352,7 @@ static void llava_free(struct llava_context * ctx_llava) { #ifndef NDEBUG -static void tmp_test_rope(struct llava_context * ctx_llava, common_params * params) { +static void tmp_test_rope() { int n_threads = 1; static size_t buf_size = 512u*1024*1024; @@ -415,13 +417,13 @@ static void tmp_test_rope(struct llava_context * ctx_llava, common_params * para } } -static void tmp_dump_img_embed(struct llava_context * ctx_llava, common_params * params) { - // auto * image_embed = load_image(ctx_llava, params, "/home/ron/Downloads/gguf/dog.jpeg"); +static void tmp_dump_img_embed(struct llava_context * ctx_llava) { int n_embd = llama_n_embd(llama_get_model(ctx_llava->ctx_llama)); - // int ne = n_embd * image_embed->n_image_pos; int ne = n_embd * 4; float vals[56 * 56 * 3]; - float embd[ne]; + // float embd[ne]; + std::vector embd; + embd.resize(ne); for (int i = 0; i < 56*56; i++) { @@ -429,12 +431,11 @@ static void tmp_dump_img_embed(struct llava_context * ctx_llava, common_params * vals[i * 3 + c] = (float)(i % (56 * 56)) / (56*56); } - // auto param = &ctx_llava->ctx_clip->vision_model.hparams; - clip_encode_float_image(ctx_llava->ctx_clip, 16, vals, 56, 56, embd); + clip_encode_float_image(ctx_llava->ctx_clip, 16, vals, 56, 56, embd.data()); std::ofstream outFile("img_embed.bin", std::ios::binary); if (outFile.is_open()) { - outFile.write(reinterpret_cast(embd), ne * sizeof(float)); + outFile.write(reinterpret_cast(embd.data()), ne * sizeof(float)); outFile.close(); std::cout << "Data successfully written to mrope.bin" << std::endl; @@ -484,7 +485,7 @@ int main(int argc, char ** argv) { } else if (params.image[0].empty()) { auto ctx_llava = llava_init_context(¶ms, model); - tmp_dump_img_embed(ctx_llava, ¶ms); + tmp_dump_img_embed(ctx_llava); llama_perf_context_print(ctx_llava->ctx_llama); ctx_llava->model = NULL; diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index c3726163b..008022441 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -3585,7 +3585,6 @@ struct ggml_tensor * ggml_mrope_ext( memcpy(params + 9, &beta_fast, sizeof(float)); memcpy(params + 10, &beta_slow, sizeof(float)); memcpy(¶ms[11], sections, sizeof(int)*4); - // memcpy(params + 11, sections, sizeof(int)*3); ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_ROPE; diff --git a/src/llama.cpp b/src/llama.cpp index 15052006b..d7deaffe0 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -3337,6 +3337,7 @@ struct llama_context { // whether we are computing encoder output or decoder output bool is_encoding = false; + // TODO: find a better way to accommodate mutli-dimension position encoding methods // number of position id each token get, 1 for each token in most cases. // when using m-rope, it will be 3 position ids per token to representing 3 dimension coordinate. int n_pos_per_token = 1; @@ -5719,6 +5720,7 @@ static void llm_load_hparams( std::fill(hparams.rope_mrope_sections.begin(), hparams.rope_mrope_sections.end(), 0); ml.get_key_or_arr(LLM_KV_ROPE_DIMENSION_SECTIONS, hparams.rope_mrope_sections, 4, true); } + // fall through case LLM_ARCH_QWEN2: { ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); diff --git a/tests/test-rope.cpp b/tests/test-rope.cpp index 4656b30f0..b138ffb25 100644 --- a/tests/test-rope.cpp +++ b/tests/test-rope.cpp @@ -138,7 +138,7 @@ int main(int /*argc*/, const char ** /*argv*/) { struct ggml_tensor * x; // rope f32 - for (int m = 0; m < 3; ++m) { + for (int m = 0; m < 5; ++m) { const int ndims = 4; const int64_t n_rot = 128; @@ -147,28 +147,69 @@ int main(int /*argc*/, const char ** /*argv*/) { const int n_past_0 = 100; const int n_past_2 = 33; - struct ggml_tensor * p0 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2]); - struct ggml_tensor * p1 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2]); - struct ggml_tensor * p2 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2]); - - for (int i = 0; i < ne[2]; ++i) { - ((int32_t *) p0->data)[i] = n_past_0 + i; - ((int32_t *) p1->data)[i] = n_past_2 - n_past_0; - ((int32_t *) p2->data)[i] = n_past_2 + i; - } - - // test mode 0, 2, 4 (standard, GPT-NeoX, GLM) - const int mode = m == 0 ? 0 : m == 1 ? 2 : 4; - + struct ggml_tensor * r0; + struct ggml_tensor * r1; + struct ggml_tensor * r2; x = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + int mode = -1; - // 100, 101, 102, ..., 172 - struct ggml_tensor * r0 = ggml_rope(ctx0, x, p0, n_rot, mode); - // -67, -67, -67, ..., -67 - struct ggml_tensor * r1 = ggml_rope(ctx0, r0, p1, n_rot, mode); // "context swap", i.e. forget n_past_0 - n_past_2 tokens + if (m < 3) { + struct ggml_tensor * p0 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2]); + struct ggml_tensor * p1 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2]); + struct ggml_tensor * p2 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2]); - // 33, 34, 35, ..., 105 - struct ggml_tensor * r2 = ggml_rope(ctx0, x, p2, n_rot, mode); + for (int i = 0; i < ne[2]; ++i) { + ((int32_t *) p0->data)[i] = n_past_0 + i; + ((int32_t *) p1->data)[i] = n_past_2 - n_past_0; + ((int32_t *) p2->data)[i] = n_past_2 + i; + } + // test mode 0, 2, 4 (standard, GPT-NeoX, GLM) + mode = m == 0 ? 0 : m == 1 ? 2 : 4; + + // 100, 101, 102, ..., 172 + r0 = ggml_rope(ctx0, x, p0, n_rot, mode); + // -67, -67, -67, ..., -67 + r1 = ggml_rope(ctx0, r0, p1, n_rot, mode); // "context swap", i.e. forget n_past_0 - n_past_2 tokens + + // 33, 34, 35, ..., 105 + r2 = ggml_rope(ctx0, x, p2, n_rot, mode); + } else { + // testing multi-dimension rope position embedding mode + struct ggml_tensor * p0 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2] * 4); + struct ggml_tensor * p1 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2] * 4); + struct ggml_tensor * p2 = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne[2] * 4); + + int sections[4] = {16, 24, 24, 0}; + mode = (m == 3) ? GGML_ROPE_TYPE_MROPE : GGML_ROPE_TYPE_VISION; + + for (int i = 0; i < ne[2]; ++i) { + for (int j = 0; j < 4; ++j) { + ((int32_t *) p0->data)[i + ne[2] * j] = n_past_0 + i + j; + ((int32_t *) p1->data)[i + ne[2] * j] = n_past_2 - n_past_0; + ((int32_t *) p2->data)[i + ne[2] * j] = n_past_2 + i + j; + } + } + + // [[100, 101, 102, ..., 172], + // [101, 102, 103, ..., 173], + // [102, 103, 104, ..., 174]] + r0 = ggml_mrope_ext( + ctx0, x, p0, nullptr, + n_rot, sections, mode, 32768, 1000000, 1, 0, 1, 32, 1); + // [[-67, -67, -67, ..., -67] + // [-67, -67, -67, ..., -67] + // [-67, -67, -67, ..., -67]] + r1 = ggml_mrope_ext( + ctx0, r0, p1, nullptr, + n_rot, sections, mode, 32768, 1000000, 1, 0, 1, 32, 1); + + // [[33, 34, 35, ..., 105] + // [34, 35, 36, ..., 106] + // [35, 36, 37, ..., 107]] + r2 = ggml_mrope_ext( + ctx0, x, p2, nullptr, + n_rot, sections, mode, 32768, 1000000, 1, 0, 1, 32, 1); + } ggml_cgraph * gf = ggml_new_graph(ctx0);