From f8e9140cb46eebaa867e1184a9946e4840eec772 Mon Sep 17 00:00:00 2001 From: 0cc4m Date: Wed, 31 Jan 2024 11:44:19 +0100 Subject: [PATCH 1/9] Vulkan Fixes (#5223) * Fix Vulkan F16 models * Fix Vulkan context shift crash * Add Vulkan to common.cpp dump_non_result_info_yaml function * Fix bug in Vulkan CPY op * Fix small matrix multiplication errors in AMD GPUs on Windows or with amdvlk Co-authored-by: Engininja2 <139037756+Engininja2@users.noreply.github.com> --------- Co-authored-by: Engininja2 <139037756+Engininja2@users.noreply.github.com> --- common/common.cpp | 1 + ggml-vulkan-shaders.hpp | 1952 +++++++++++++---------------------- ggml-vulkan.cpp | 14 +- ggml_vk_generate_shaders.py | 4 +- 4 files changed, 704 insertions(+), 1267 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 0dd1c50cf..9d976c7c8 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1520,6 +1520,7 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l fprintf(stream, "cpu_has_avx512_vbmi: %s\n", ggml_cpu_has_avx512_vbmi() ? "true" : "false"); fprintf(stream, "cpu_has_avx512_vnni: %s\n", ggml_cpu_has_avx512_vnni() ? "true" : "false"); fprintf(stream, "cpu_has_cublas: %s\n", ggml_cpu_has_cublas() ? "true" : "false"); + fprintf(stream, "cpu_has_vulkan: %s\n", ggml_cpu_has_vulkan() ? "true" : "false"); fprintf(stream, "cpu_has_clblast: %s\n", ggml_cpu_has_clblast() ? "true" : "false"); fprintf(stream, "cpu_has_kompute: %s\n", ggml_cpu_has_kompute() ? "true" : "false"); fprintf(stream, "cpu_has_fma: %s\n", ggml_cpu_has_fma() ? "true" : "false"); diff --git a/ggml-vulkan-shaders.hpp b/ggml-vulkan-shaders.hpp index 321e36383..e2e9be22c 100644 --- a/ggml-vulkan-shaders.hpp +++ b/ggml-vulkan-shaders.hpp @@ -890,7 +890,7 @@ const uint64_t cpy_f32_f32_len = 2472; unsigned char dequant_f16_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0x87,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, +0x81,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, 0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x09,0x00,0x00,0x00, 0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00,0x0b,0x00,0x06,0x00, 0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c,0x2e,0x73,0x74,0x64, @@ -898,7 +898,7 @@ unsigned char dequant_f16_data[] = { 0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x0f,0x00,0x09,0x00, 0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x6d,0x61,0x69,0x6e, 0x00,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x51,0x00,0x00,0x00,0x5f,0x00,0x00,0x00,0x10,0x00,0x06,0x00, +0x4f,0x00,0x00,0x00,0x5d,0x00,0x00,0x00,0x10,0x00,0x06,0x00, 0x04,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x00,0x01,0x00,0x00, 0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00, 0x0c,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, @@ -910,23 +910,23 @@ unsigned char dequant_f16_data[] = { 0x48,0x00,0x05,0x00,0x14,0x00,0x00,0x00,0x03,0x00,0x00,0x00, 0x23,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x47,0x00,0x03,0x00, 0x14,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x4e,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x48,0x00,0x04,0x00,0x4f,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x4f,0x00,0x00,0x00, +0x4c,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x02,0x00,0x00,0x00, +0x48,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x4d,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x4f,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x51,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x51,0x00,0x00,0x00, +0x47,0x00,0x03,0x00,0x4d,0x00,0x00,0x00,0x02,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x4f,0x00,0x00,0x00,0x22,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x4f,0x00,0x00,0x00, 0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x5c,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x48,0x00,0x04,0x00,0x5d,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x5d,0x00,0x00,0x00, +0x5a,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x02,0x00,0x00,0x00, +0x48,0x00,0x04,0x00,0x5b,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x5b,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x5d,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x5f,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x5f,0x00,0x00,0x00, +0x47,0x00,0x03,0x00,0x5b,0x00,0x00,0x00,0x02,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x5d,0x00,0x00,0x00,0x22,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x5d,0x00,0x00,0x00, 0x21,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x80,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00, +0x7e,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00, 0x13,0x00,0x02,0x00,0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00, 0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00, 0x06,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00, @@ -945,330 +945,109 @@ unsigned char dequant_f16_data[] = { 0x16,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, 0x06,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x01,0x00,0x00,0x00, 0x20,0x00,0x04,0x00,0x18,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x1b,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x14,0x00,0x02,0x00, -0x24,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x48,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x16,0x00,0x03,0x00,0x4a,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x4e,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x4f,0x00,0x00,0x00, -0x4e,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x50,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x50,0x00,0x00,0x00,0x51,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x54,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x5c,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x5d,0x00,0x00,0x00, -0x5c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x5e,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x5d,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x5e,0x00,0x00,0x00,0x5f,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x61,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x79,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x7f,0x00,0x00,0x00,0x00,0x01,0x00,0x00, -0x2c,0x00,0x06,0x00,0x0a,0x00,0x00,0x00,0x80,0x00,0x00,0x00, -0x7f,0x00,0x00,0x00,0x79,0x00,0x00,0x00,0x79,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x6c,0x02,0x00,0x00, -0x11,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x6d,0x02,0x00,0x00,0x12,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x6e,0x02,0x00,0x00,0x13,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x6f,0x02,0x00,0x00, -0x04,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x70,0x02,0x00,0x00,0x14,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x71,0x02,0x00,0x00,0x05,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x72,0x02,0x00,0x00, -0x15,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x73,0x02,0x00,0x00,0x06,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x74,0x02,0x00,0x00,0x16,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x75,0x02,0x00,0x00, -0x07,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x76,0x02,0x00,0x00,0x17,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x77,0x02,0x00,0x00,0x08,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x78,0x02,0x00,0x00, -0x18,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x79,0x02,0x00,0x00,0x09,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x7a,0x02,0x00,0x00,0x19,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x7b,0x02,0x00,0x00, -0x0a,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x7c,0x02,0x00,0x00,0x1a,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x7d,0x02,0x00,0x00,0x0b,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x7e,0x02,0x00,0x00, -0x1b,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x7f,0x02,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x80,0x02,0x00,0x00,0x1c,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x81,0x02,0x00,0x00, -0x0d,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x82,0x02,0x00,0x00,0x1d,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x83,0x02,0x00,0x00,0x0e,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x84,0x02,0x00,0x00, -0x1e,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x85,0x02,0x00,0x00,0x0f,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x86,0x02,0x00,0x00,0x1f,0x00,0x00,0x00, -0x36,0x00,0x05,0x00,0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x05,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x81,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0xfb,0x00,0x03,0x00,0x0d,0x00,0x00,0x00, -0x82,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x82,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x0e,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x0d,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x18,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x87,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x1b,0x00,0x00,0x00, -0x8b,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x1d,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x87,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x26,0x00,0x00,0x00,0x1d,0x00,0x00,0x00,0x1b,0x00,0x00,0x00, -0xaf,0x00,0x05,0x00,0x24,0x00,0x00,0x00,0x29,0x00,0x00,0x00, -0x26,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0xa8,0x00,0x04,0x00, -0x24,0x00,0x00,0x00,0x2a,0x00,0x00,0x00,0x29,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0x2c,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfa,0x00,0x04,0x00,0x2a,0x00,0x00,0x00,0x2b,0x00,0x00,0x00, -0x2c,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x2b,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x18,0x00,0x00,0x00,0x2f,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x30,0x00,0x00,0x00,0x2f,0x00,0x00,0x00, -0xaf,0x00,0x05,0x00,0x24,0x00,0x00,0x00,0x31,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x30,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0x2c,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x2c,0x00,0x00,0x00, -0xf5,0x00,0x07,0x00,0x24,0x00,0x00,0x00,0x32,0x00,0x00,0x00, -0x29,0x00,0x00,0x00,0x82,0x00,0x00,0x00,0x31,0x00,0x00,0x00, -0x2b,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x34,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00,0x32,0x00,0x00,0x00, -0x33,0x00,0x00,0x00,0x34,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x33,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x81,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x34,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x18,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x37,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x39,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0x87,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x39,0x00,0x00,0x00, -0x1b,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x3e,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x3e,0x00,0x00,0x00,0x1d,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x54,0x00,0x00,0x00,0x55,0x00,0x00,0x00,0x51,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x56,0x00,0x00,0x00,0x55,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x58,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x54,0x00,0x00,0x00,0x59,0x00,0x00,0x00,0x51,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x59,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x18,0x00,0x00,0x00,0x62,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x61,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x63,0x00,0x00,0x00,0x62,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x64,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x63,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x67,0x00,0x00,0x00,0x64,0x00,0x00,0x00, -0x26,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00, -0x6e,0x00,0x00,0x00,0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x67,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0x6e,0x00,0x00,0x00, -0x56,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x78,0x00,0x00,0x00,0x67,0x00,0x00,0x00,0x48,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00,0x7c,0x00,0x00,0x00, -0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x78,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0x7c,0x00,0x00,0x00,0x5a,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x92,0x00,0x00,0x00, -0x55,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x95,0x00,0x00,0x00,0x59,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x9c,0x00,0x00,0x00,0x67,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00, -0x9f,0x00,0x00,0x00,0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x9c,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0x9f,0x00,0x00,0x00, -0x92,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xa6,0x00,0x00,0x00,0x67,0x00,0x00,0x00,0x6c,0x02,0x00,0x00, -0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00,0xa8,0x00,0x00,0x00, -0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xa6,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0xa8,0x00,0x00,0x00,0x95,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xb2,0x00,0x00,0x00, -0x55,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xb5,0x00,0x00,0x00,0x59,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xbc,0x00,0x00,0x00,0x67,0x00,0x00,0x00, -0x37,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00, -0xbf,0x00,0x00,0x00,0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xbc,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0xbf,0x00,0x00,0x00, -0xb2,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xc6,0x00,0x00,0x00,0x67,0x00,0x00,0x00,0x6d,0x02,0x00,0x00, -0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00,0xc8,0x00,0x00,0x00, -0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xc6,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0xc8,0x00,0x00,0x00,0xb5,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xd2,0x00,0x00,0x00, -0x55,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xd5,0x00,0x00,0x00,0x59,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xdc,0x00,0x00,0x00,0x67,0x00,0x00,0x00, -0x61,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00, -0xdf,0x00,0x00,0x00,0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xdc,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0xdf,0x00,0x00,0x00, -0xd2,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xe6,0x00,0x00,0x00,0x67,0x00,0x00,0x00,0x6e,0x02,0x00,0x00, -0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00,0xe8,0x00,0x00,0x00, -0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xe6,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0xe8,0x00,0x00,0x00,0xd5,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xf2,0x00,0x00,0x00, -0x55,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xf5,0x00,0x00,0x00,0x59,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xfc,0x00,0x00,0x00,0x67,0x00,0x00,0x00, -0x6f,0x02,0x00,0x00,0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00, -0xff,0x00,0x00,0x00,0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xfc,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0xff,0x00,0x00,0x00, -0xf2,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x06,0x01,0x00,0x00,0x67,0x00,0x00,0x00,0x70,0x02,0x00,0x00, -0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00,0x08,0x01,0x00,0x00, -0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x06,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0x08,0x01,0x00,0x00,0xf5,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x12,0x01,0x00,0x00, -0x55,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x15,0x01,0x00,0x00,0x59,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x1c,0x01,0x00,0x00,0x67,0x00,0x00,0x00, -0x71,0x02,0x00,0x00,0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00, -0x1f,0x01,0x00,0x00,0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x1c,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0x1f,0x01,0x00,0x00, -0x12,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x26,0x01,0x00,0x00,0x67,0x00,0x00,0x00,0x72,0x02,0x00,0x00, -0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00,0x28,0x01,0x00,0x00, -0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x26,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0x28,0x01,0x00,0x00,0x15,0x01,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x32,0x01,0x00,0x00, -0x55,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x35,0x01,0x00,0x00,0x59,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x3c,0x01,0x00,0x00,0x67,0x00,0x00,0x00, -0x73,0x02,0x00,0x00,0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00, -0x3f,0x01,0x00,0x00,0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x3c,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0x3f,0x01,0x00,0x00, -0x32,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x46,0x01,0x00,0x00,0x67,0x00,0x00,0x00,0x74,0x02,0x00,0x00, -0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00,0x48,0x01,0x00,0x00, -0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x46,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0x48,0x01,0x00,0x00,0x35,0x01,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x52,0x01,0x00,0x00, -0x55,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x55,0x01,0x00,0x00,0x59,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x5c,0x01,0x00,0x00,0x67,0x00,0x00,0x00, -0x75,0x02,0x00,0x00,0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00, -0x5f,0x01,0x00,0x00,0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x5c,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0x5f,0x01,0x00,0x00, -0x52,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x66,0x01,0x00,0x00,0x67,0x00,0x00,0x00,0x76,0x02,0x00,0x00, -0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00,0x68,0x01,0x00,0x00, -0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x66,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0x68,0x01,0x00,0x00,0x55,0x01,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x72,0x01,0x00,0x00, -0x55,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x75,0x01,0x00,0x00,0x59,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x7c,0x01,0x00,0x00,0x67,0x00,0x00,0x00, -0x77,0x02,0x00,0x00,0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00, -0x7f,0x01,0x00,0x00,0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x7c,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0x7f,0x01,0x00,0x00, -0x72,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x86,0x01,0x00,0x00,0x67,0x00,0x00,0x00,0x78,0x02,0x00,0x00, -0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00,0x88,0x01,0x00,0x00, -0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x86,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0x88,0x01,0x00,0x00,0x75,0x01,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x92,0x01,0x00,0x00, -0x55,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x95,0x01,0x00,0x00,0x59,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x9c,0x01,0x00,0x00,0x67,0x00,0x00,0x00, -0x79,0x02,0x00,0x00,0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00, -0x9f,0x01,0x00,0x00,0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x9c,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0x9f,0x01,0x00,0x00, -0x92,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xa6,0x01,0x00,0x00,0x67,0x00,0x00,0x00,0x7a,0x02,0x00,0x00, -0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00,0xa8,0x01,0x00,0x00, -0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xa6,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0xa8,0x01,0x00,0x00,0x95,0x01,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xb2,0x01,0x00,0x00, -0x55,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xb5,0x01,0x00,0x00,0x59,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xbc,0x01,0x00,0x00,0x67,0x00,0x00,0x00, -0x7b,0x02,0x00,0x00,0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00, -0xbf,0x01,0x00,0x00,0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xbc,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0xbf,0x01,0x00,0x00, -0xb2,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xc6,0x01,0x00,0x00,0x67,0x00,0x00,0x00,0x7c,0x02,0x00,0x00, -0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00,0xc8,0x01,0x00,0x00, -0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xc6,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0xc8,0x01,0x00,0x00,0xb5,0x01,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xd2,0x01,0x00,0x00, -0x55,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xd5,0x01,0x00,0x00,0x59,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xdc,0x01,0x00,0x00,0x67,0x00,0x00,0x00, -0x7d,0x02,0x00,0x00,0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00, -0xdf,0x01,0x00,0x00,0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xdc,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0xdf,0x01,0x00,0x00, -0xd2,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xe6,0x01,0x00,0x00,0x67,0x00,0x00,0x00,0x7e,0x02,0x00,0x00, -0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00,0xe8,0x01,0x00,0x00, -0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xe6,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0xe8,0x01,0x00,0x00,0xd5,0x01,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xf2,0x01,0x00,0x00, -0x55,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xf5,0x01,0x00,0x00,0x59,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xfc,0x01,0x00,0x00,0x67,0x00,0x00,0x00, -0x7f,0x02,0x00,0x00,0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00, -0xff,0x01,0x00,0x00,0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xfc,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0xff,0x01,0x00,0x00, -0xf2,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x06,0x02,0x00,0x00,0x67,0x00,0x00,0x00,0x80,0x02,0x00,0x00, -0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00,0x08,0x02,0x00,0x00, -0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x06,0x02,0x00,0x00, -0x3e,0x00,0x03,0x00,0x08,0x02,0x00,0x00,0xf5,0x01,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x12,0x02,0x00,0x00, -0x55,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x15,0x02,0x00,0x00,0x59,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x1c,0x02,0x00,0x00,0x67,0x00,0x00,0x00, -0x81,0x02,0x00,0x00,0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00, -0x1f,0x02,0x00,0x00,0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x1c,0x02,0x00,0x00,0x3e,0x00,0x03,0x00,0x1f,0x02,0x00,0x00, -0x12,0x02,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x26,0x02,0x00,0x00,0x67,0x00,0x00,0x00,0x82,0x02,0x00,0x00, -0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00,0x28,0x02,0x00,0x00, -0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x26,0x02,0x00,0x00, -0x3e,0x00,0x03,0x00,0x28,0x02,0x00,0x00,0x15,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x32,0x02,0x00,0x00, -0x55,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x35,0x02,0x00,0x00,0x59,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x3c,0x02,0x00,0x00,0x67,0x00,0x00,0x00, -0x83,0x02,0x00,0x00,0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00, -0x3f,0x02,0x00,0x00,0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x3c,0x02,0x00,0x00,0x3e,0x00,0x03,0x00,0x3f,0x02,0x00,0x00, -0x32,0x02,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x46,0x02,0x00,0x00,0x67,0x00,0x00,0x00,0x84,0x02,0x00,0x00, -0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00,0x48,0x02,0x00,0x00, -0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x46,0x02,0x00,0x00, -0x3e,0x00,0x03,0x00,0x48,0x02,0x00,0x00,0x35,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x52,0x02,0x00,0x00, -0x55,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x55,0x02,0x00,0x00,0x59,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x5c,0x02,0x00,0x00,0x67,0x00,0x00,0x00, -0x85,0x02,0x00,0x00,0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00, -0x5f,0x02,0x00,0x00,0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x5c,0x02,0x00,0x00,0x3e,0x00,0x03,0x00,0x5f,0x02,0x00,0x00, -0x52,0x02,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x66,0x02,0x00,0x00,0x67,0x00,0x00,0x00,0x86,0x02,0x00,0x00, -0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00,0x68,0x02,0x00,0x00, -0x5f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x66,0x02,0x00,0x00, -0x3e,0x00,0x03,0x00,0x68,0x02,0x00,0x00,0x55,0x02,0x00,0x00, -0xf9,0x00,0x02,0x00,0x81,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x81,0x00,0x00,0x00,0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, - +0x06,0x00,0x00,0x00,0x14,0x00,0x02,0x00,0x23,0x00,0x00,0x00, +0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x36,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x16,0x00,0x03,0x00, +0x48,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, +0x4c,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, +0x4d,0x00,0x00,0x00,0x4c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x4e,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x4d,0x00,0x00,0x00, +0x3b,0x00,0x04,0x00,0x4e,0x00,0x00,0x00,0x4f,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x52,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, +0x5a,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, +0x5b,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x5c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x5b,0x00,0x00,0x00, +0x3b,0x00,0x04,0x00,0x5c,0x00,0x00,0x00,0x5d,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x5f,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, +0x09,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x01,0x00,0x00,0x00, +0x2b,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x7d,0x00,0x00,0x00, +0x00,0x01,0x00,0x00,0x2c,0x00,0x06,0x00,0x0a,0x00,0x00,0x00, +0x7e,0x00,0x00,0x00,0x7d,0x00,0x00,0x00,0x77,0x00,0x00,0x00, +0x77,0x00,0x00,0x00,0x36,0x00,0x05,0x00,0x02,0x00,0x00,0x00, +0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00, +0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, +0x7f,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xfb,0x00,0x03,0x00, +0x0d,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, +0x80,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x0e,0x00,0x00,0x00, +0x0f,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x0d,0x00,0x00,0x00, +0x3d,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x10,0x00,0x00,0x00, +0x0f,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x11,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x41,0x00,0x05,0x00, +0x18,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x16,0x00,0x00,0x00, +0x17,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x1a,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x87,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, +0x17,0x00,0x00,0x00,0x8b,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x1c,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x1b,0x00,0x00,0x00, +0x87,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x22,0x00,0x00,0x00, +0x11,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0xaf,0x00,0x05,0x00, +0x23,0x00,0x00,0x00,0x28,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, +0x1a,0x00,0x00,0x00,0xa8,0x00,0x04,0x00,0x23,0x00,0x00,0x00, +0x29,0x00,0x00,0x00,0x28,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, +0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00, +0x29,0x00,0x00,0x00,0x2a,0x00,0x00,0x00,0x2b,0x00,0x00,0x00, +0xf8,0x00,0x02,0x00,0x2a,0x00,0x00,0x00,0x41,0x00,0x05,0x00, +0x18,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x16,0x00,0x00,0x00, +0x2d,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x2f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xaf,0x00,0x05,0x00, +0x23,0x00,0x00,0x00,0x30,0x00,0x00,0x00,0x22,0x00,0x00,0x00, +0x2f,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x2b,0x00,0x00,0x00, +0xf8,0x00,0x02,0x00,0x2b,0x00,0x00,0x00,0xf5,0x00,0x07,0x00, +0x23,0x00,0x00,0x00,0x31,0x00,0x00,0x00,0x28,0x00,0x00,0x00, +0x80,0x00,0x00,0x00,0x30,0x00,0x00,0x00,0x2a,0x00,0x00,0x00, +0xf7,0x00,0x03,0x00,0x33,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0xfa,0x00,0x04,0x00,0x31,0x00,0x00,0x00,0x32,0x00,0x00,0x00, +0x33,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x32,0x00,0x00,0x00, +0xf9,0x00,0x02,0x00,0x7f,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, +0x33,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x18,0x00,0x00,0x00, +0x37,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x36,0x00,0x00,0x00, +0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x38,0x00,0x00,0x00, +0x37,0x00,0x00,0x00,0x87,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x39,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0x17,0x00,0x00,0x00, +0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x3d,0x00,0x00,0x00, +0x22,0x00,0x00,0x00,0x39,0x00,0x00,0x00,0x80,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x3f,0x00,0x00,0x00,0x3d,0x00,0x00,0x00, +0x1c,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x52,0x00,0x00,0x00, +0x53,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, +0x3f,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x48,0x00,0x00,0x00, +0x54,0x00,0x00,0x00,0x53,0x00,0x00,0x00,0x80,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x56,0x00,0x00,0x00,0x3f,0x00,0x00,0x00, +0x17,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x52,0x00,0x00,0x00, +0x57,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, +0x56,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x48,0x00,0x00,0x00, +0x58,0x00,0x00,0x00,0x57,0x00,0x00,0x00,0x41,0x00,0x05,0x00, +0x18,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0x16,0x00,0x00,0x00, +0x5f,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x61,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0x84,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x62,0x00,0x00,0x00,0x22,0x00,0x00,0x00, +0x61,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x65,0x00,0x00,0x00,0x62,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, +0x41,0x00,0x06,0x00,0x52,0x00,0x00,0x00,0x6c,0x00,0x00,0x00, +0x5d,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x65,0x00,0x00,0x00, +0x3e,0x00,0x03,0x00,0x6c,0x00,0x00,0x00,0x54,0x00,0x00,0x00, +0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x76,0x00,0x00,0x00, +0x65,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x41,0x00,0x06,0x00, +0x52,0x00,0x00,0x00,0x7a,0x00,0x00,0x00,0x5d,0x00,0x00,0x00, +0x2d,0x00,0x00,0x00,0x76,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, +0x7a,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, +0x7f,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x7f,0x00,0x00,0x00, +0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, }; -const uint64_t dequant_f16_len = 4392; +const uint64_t dequant_f16_len = 1748; unsigned char dequant_f16_fp32_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0xc8,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, +0x86,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, 0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00, 0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c, 0x2e,0x73,0x74,0x64,0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00, 0x0e,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00, 0x0f,0x00,0x09,0x00,0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00, 0x6d,0x61,0x69,0x6e,0x00,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x52,0x00,0x00,0x00,0x62,0x00,0x00,0x00, +0x16,0x00,0x00,0x00,0x50,0x00,0x00,0x00,0x60,0x00,0x00,0x00, 0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00,0x11,0x00,0x00,0x00, 0x00,0x01,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00, 0x47,0x00,0x04,0x00,0x0c,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, @@ -1280,23 +1059,23 @@ unsigned char dequant_f16_fp32_data[] = { 0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x14,0x00,0x00,0x00, 0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, 0x47,0x00,0x03,0x00,0x14,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x4f,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x50,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0x06,0x00,0x00,0x00, +0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x50,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x50,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x52,0x00,0x00,0x00, +0x4e,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x4e,0x00,0x00,0x00, +0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x50,0x00,0x00,0x00, 0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x52,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x5f,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x60,0x00,0x00,0x00, +0x50,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x5d,0x00,0x00,0x00,0x06,0x00,0x00,0x00, +0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x5e,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x60,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x60,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x62,0x00,0x00,0x00, +0x5e,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x5e,0x00,0x00,0x00, +0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x60,0x00,0x00,0x00, 0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x62,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x85,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, +0x60,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x01,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x83,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, 0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00,0x02,0x00,0x00,0x00, 0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00, 0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x20,0x00,0x00,0x00, @@ -1315,405 +1094,105 @@ unsigned char dequant_f16_fp32_data[] = { 0x15,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x09,0x00,0x00,0x00, 0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x17,0x00,0x00,0x00, 0x01,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x18,0x00,0x00,0x00, -0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x14,0x00,0x02,0x00,0x24,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x37,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x48,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x16,0x00,0x03,0x00, -0x4a,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x16,0x00,0x03,0x00, -0x4e,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, -0x4f,0x00,0x00,0x00,0x4e,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, -0x50,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x51,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x50,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x51,0x00,0x00,0x00,0x52,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x55,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x4e,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, -0x5f,0x00,0x00,0x00,0x4e,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, -0x60,0x00,0x00,0x00,0x5f,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x61,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x60,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x61,0x00,0x00,0x00,0x62,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x64,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x7d,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x84,0x00,0x00,0x00, -0x00,0x01,0x00,0x00,0x2c,0x00,0x06,0x00,0x0a,0x00,0x00,0x00, -0x85,0x00,0x00,0x00,0x84,0x00,0x00,0x00,0x7d,0x00,0x00,0x00, -0x7d,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xad,0x02,0x00,0x00,0x11,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xae,0x02,0x00,0x00,0x12,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xaf,0x02,0x00,0x00, -0x13,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xb0,0x02,0x00,0x00,0x04,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xb1,0x02,0x00,0x00,0x14,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xb2,0x02,0x00,0x00, -0x05,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xb3,0x02,0x00,0x00,0x15,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xb4,0x02,0x00,0x00,0x06,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xb5,0x02,0x00,0x00, -0x16,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xb6,0x02,0x00,0x00,0x07,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xb7,0x02,0x00,0x00,0x17,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xb8,0x02,0x00,0x00, -0x08,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xb9,0x02,0x00,0x00,0x18,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xba,0x02,0x00,0x00,0x09,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xbb,0x02,0x00,0x00, -0x19,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xbc,0x02,0x00,0x00,0x0a,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xbd,0x02,0x00,0x00,0x1a,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xbe,0x02,0x00,0x00, -0x0b,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xbf,0x02,0x00,0x00,0x1b,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xc0,0x02,0x00,0x00,0x0c,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xc1,0x02,0x00,0x00, -0x1c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xc2,0x02,0x00,0x00,0x0d,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xc3,0x02,0x00,0x00,0x1d,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xc4,0x02,0x00,0x00, -0x0e,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xc5,0x02,0x00,0x00,0x1e,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0xc6,0x02,0x00,0x00,0x0f,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0xc7,0x02,0x00,0x00, -0x1f,0x00,0x00,0x00,0x36,0x00,0x05,0x00,0x02,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, -0x86,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xfb,0x00,0x03,0x00, -0x0d,0x00,0x00,0x00,0x87,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x87,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x0e,0x00,0x00,0x00, -0x0f,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x0d,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x0f,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x18,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x1a,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x87,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, -0x1b,0x00,0x00,0x00,0x8b,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x1d,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x87,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x26,0x00,0x00,0x00,0x1d,0x00,0x00,0x00, -0x1b,0x00,0x00,0x00,0xaf,0x00,0x05,0x00,0x24,0x00,0x00,0x00, -0x29,0x00,0x00,0x00,0x26,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, -0xa8,0x00,0x04,0x00,0x24,0x00,0x00,0x00,0x2a,0x00,0x00,0x00, -0x29,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x2c,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00,0x2a,0x00,0x00,0x00, -0x2b,0x00,0x00,0x00,0x2c,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x2b,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x18,0x00,0x00,0x00, -0x2f,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x30,0x00,0x00,0x00, -0x2f,0x00,0x00,0x00,0xaf,0x00,0x05,0x00,0x24,0x00,0x00,0x00, -0x31,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x30,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0x2c,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x2c,0x00,0x00,0x00,0xf5,0x00,0x07,0x00,0x24,0x00,0x00,0x00, -0x32,0x00,0x00,0x00,0x29,0x00,0x00,0x00,0x87,0x00,0x00,0x00, -0x31,0x00,0x00,0x00,0x2b,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, -0x34,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00, -0x32,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0x34,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x33,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0x86,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x34,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x18,0x00,0x00,0x00,0x38,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, +0x09,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x14,0x00,0x02,0x00, +0x23,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x2d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, +0x06,0x00,0x00,0x00,0x36,0x00,0x00,0x00,0x02,0x00,0x00,0x00, +0x16,0x00,0x03,0x00,0x48,0x00,0x00,0x00,0x20,0x00,0x00,0x00, +0x16,0x00,0x03,0x00,0x4c,0x00,0x00,0x00,0x10,0x00,0x00,0x00, +0x1d,0x00,0x03,0x00,0x4d,0x00,0x00,0x00,0x4c,0x00,0x00,0x00, +0x1e,0x00,0x03,0x00,0x4e,0x00,0x00,0x00,0x4d,0x00,0x00,0x00, +0x20,0x00,0x04,0x00,0x4f,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x4e,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x4f,0x00,0x00,0x00, +0x50,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x53,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x4c,0x00,0x00,0x00, +0x1d,0x00,0x03,0x00,0x5d,0x00,0x00,0x00,0x4c,0x00,0x00,0x00, +0x1e,0x00,0x03,0x00,0x5e,0x00,0x00,0x00,0x5d,0x00,0x00,0x00, +0x20,0x00,0x04,0x00,0x5f,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x5e,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x5f,0x00,0x00,0x00, +0x60,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, +0x06,0x00,0x00,0x00,0x62,0x00,0x00,0x00,0x03,0x00,0x00,0x00, +0x2b,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x7b,0x00,0x00,0x00, +0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x09,0x00,0x00,0x00, +0x82,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x2c,0x00,0x06,0x00, +0x0a,0x00,0x00,0x00,0x83,0x00,0x00,0x00,0x82,0x00,0x00,0x00, +0x7b,0x00,0x00,0x00,0x7b,0x00,0x00,0x00,0x36,0x00,0x05,0x00, +0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00, +0xf7,0x00,0x03,0x00,0x84,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0xfb,0x00,0x03,0x00,0x0d,0x00,0x00,0x00,0x85,0x00,0x00,0x00, +0xf8,0x00,0x02,0x00,0x85,0x00,0x00,0x00,0x41,0x00,0x05,0x00, +0x0e,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x0d,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x09,0x00,0x00,0x00, +0x10,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, +0x06,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x10,0x00,0x00,0x00, +0x41,0x00,0x05,0x00,0x18,0x00,0x00,0x00,0x19,0x00,0x00,0x00, +0x16,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, +0x06,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x19,0x00,0x00,0x00, +0x87,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x1b,0x00,0x00,0x00, +0x1a,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x8b,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x11,0x00,0x00,0x00, +0x1b,0x00,0x00,0x00,0x87,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x22,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x1b,0x00,0x00,0x00, +0xaf,0x00,0x05,0x00,0x23,0x00,0x00,0x00,0x28,0x00,0x00,0x00, +0x1c,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0xa8,0x00,0x04,0x00, +0x23,0x00,0x00,0x00,0x29,0x00,0x00,0x00,0x28,0x00,0x00,0x00, +0xf7,0x00,0x03,0x00,0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0xfa,0x00,0x04,0x00,0x29,0x00,0x00,0x00,0x2a,0x00,0x00,0x00, +0x2b,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x2a,0x00,0x00,0x00, +0x41,0x00,0x05,0x00,0x18,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0x16,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, +0x06,0x00,0x00,0x00,0x2f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0xaf,0x00,0x05,0x00,0x23,0x00,0x00,0x00,0x30,0x00,0x00,0x00, +0x22,0x00,0x00,0x00,0x2f,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, +0x2b,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x2b,0x00,0x00,0x00, +0xf5,0x00,0x07,0x00,0x23,0x00,0x00,0x00,0x31,0x00,0x00,0x00, +0x28,0x00,0x00,0x00,0x85,0x00,0x00,0x00,0x30,0x00,0x00,0x00, +0x2a,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x33,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00,0x31,0x00,0x00,0x00, +0x32,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, +0x32,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x84,0x00,0x00,0x00, +0xf8,0x00,0x02,0x00,0x33,0x00,0x00,0x00,0x41,0x00,0x05,0x00, +0x18,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x16,0x00,0x00,0x00, +0x36,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x38,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x87,0x00,0x05,0x00, 0x06,0x00,0x00,0x00,0x39,0x00,0x00,0x00,0x38,0x00,0x00,0x00, -0x87,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, -0x39,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x3e,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x3a,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x3e,0x00,0x00,0x00,0x1d,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00,0x56,0x00,0x00,0x00, -0x52,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00,0x57,0x00,0x00,0x00, -0x56,0x00,0x00,0x00,0x73,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x58,0x00,0x00,0x00,0x57,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00, -0x5b,0x00,0x00,0x00,0x52,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x5a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, -0x5c,0x00,0x00,0x00,0x5b,0x00,0x00,0x00,0x73,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x5d,0x00,0x00,0x00,0x5c,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x18,0x00,0x00,0x00,0x65,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x64,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x66,0x00,0x00,0x00,0x65,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x67,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x66,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x6a,0x00,0x00,0x00,0x67,0x00,0x00,0x00, -0x26,0x00,0x00,0x00,0x73,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, -0x71,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x55,0x00,0x00,0x00,0x72,0x00,0x00,0x00,0x62,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x6a,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0x72,0x00,0x00,0x00,0x71,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x7c,0x00,0x00,0x00,0x6a,0x00,0x00,0x00, -0x48,0x00,0x00,0x00,0x73,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, -0x80,0x00,0x00,0x00,0x5d,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x55,0x00,0x00,0x00,0x81,0x00,0x00,0x00,0x62,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x7c,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0x81,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4e,0x00,0x00,0x00,0x97,0x00,0x00,0x00,0x56,0x00,0x00,0x00, -0x73,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x98,0x00,0x00,0x00, -0x97,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, -0x9b,0x00,0x00,0x00,0x5b,0x00,0x00,0x00,0x73,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x9c,0x00,0x00,0x00,0x9b,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xa3,0x00,0x00,0x00, -0x6a,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x73,0x00,0x04,0x00, -0x4e,0x00,0x00,0x00,0xa6,0x00,0x00,0x00,0x98,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00,0xa7,0x00,0x00,0x00, -0x62,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xa3,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0xa7,0x00,0x00,0x00,0xa6,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xae,0x00,0x00,0x00, -0x6a,0x00,0x00,0x00,0xad,0x02,0x00,0x00,0x73,0x00,0x04,0x00, -0x4e,0x00,0x00,0x00,0xb0,0x00,0x00,0x00,0x9c,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00,0xb1,0x00,0x00,0x00, -0x62,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xae,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0xb1,0x00,0x00,0x00,0xb0,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00,0xbb,0x00,0x00,0x00, -0x56,0x00,0x00,0x00,0x73,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xbc,0x00,0x00,0x00,0xbb,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4e,0x00,0x00,0x00,0xbf,0x00,0x00,0x00,0x5b,0x00,0x00,0x00, -0x73,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xc0,0x00,0x00,0x00, -0xbf,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xc7,0x00,0x00,0x00,0x6a,0x00,0x00,0x00,0x37,0x00,0x00,0x00, -0x73,0x00,0x04,0x00,0x4e,0x00,0x00,0x00,0xca,0x00,0x00,0x00, -0xbc,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00, -0xcb,0x00,0x00,0x00,0x62,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xc7,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0xcb,0x00,0x00,0x00, -0xca,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xd2,0x00,0x00,0x00,0x6a,0x00,0x00,0x00,0xae,0x02,0x00,0x00, -0x73,0x00,0x04,0x00,0x4e,0x00,0x00,0x00,0xd4,0x00,0x00,0x00, -0xc0,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00, -0xd5,0x00,0x00,0x00,0x62,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xd2,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0xd5,0x00,0x00,0x00, -0xd4,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, -0xdf,0x00,0x00,0x00,0x56,0x00,0x00,0x00,0x73,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xe0,0x00,0x00,0x00,0xdf,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00,0xe3,0x00,0x00,0x00, -0x5b,0x00,0x00,0x00,0x73,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xe4,0x00,0x00,0x00,0xe3,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xeb,0x00,0x00,0x00,0x6a,0x00,0x00,0x00, -0x64,0x00,0x00,0x00,0x73,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, -0xee,0x00,0x00,0x00,0xe0,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x55,0x00,0x00,0x00,0xef,0x00,0x00,0x00,0x62,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xeb,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0xef,0x00,0x00,0x00,0xee,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xf6,0x00,0x00,0x00,0x6a,0x00,0x00,0x00, -0xaf,0x02,0x00,0x00,0x73,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, -0xf8,0x00,0x00,0x00,0xe4,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x55,0x00,0x00,0x00,0xf9,0x00,0x00,0x00,0x62,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xf6,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0xf9,0x00,0x00,0x00,0xf8,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4e,0x00,0x00,0x00,0x03,0x01,0x00,0x00,0x56,0x00,0x00,0x00, -0x73,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x04,0x01,0x00,0x00, -0x03,0x01,0x00,0x00,0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, -0x07,0x01,0x00,0x00,0x5b,0x00,0x00,0x00,0x73,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x08,0x01,0x00,0x00,0x07,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x0f,0x01,0x00,0x00, -0x6a,0x00,0x00,0x00,0xb0,0x02,0x00,0x00,0x73,0x00,0x04,0x00, -0x4e,0x00,0x00,0x00,0x12,0x01,0x00,0x00,0x04,0x01,0x00,0x00, -0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00,0x13,0x01,0x00,0x00, -0x62,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x0f,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0x13,0x01,0x00,0x00,0x12,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x1a,0x01,0x00,0x00, -0x6a,0x00,0x00,0x00,0xb1,0x02,0x00,0x00,0x73,0x00,0x04,0x00, -0x4e,0x00,0x00,0x00,0x1c,0x01,0x00,0x00,0x08,0x01,0x00,0x00, -0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00,0x1d,0x01,0x00,0x00, -0x62,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x1a,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0x1d,0x01,0x00,0x00,0x1c,0x01,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00,0x27,0x01,0x00,0x00, -0x56,0x00,0x00,0x00,0x73,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x28,0x01,0x00,0x00,0x27,0x01,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4e,0x00,0x00,0x00,0x2b,0x01,0x00,0x00,0x5b,0x00,0x00,0x00, -0x73,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x2c,0x01,0x00,0x00, -0x2b,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x33,0x01,0x00,0x00,0x6a,0x00,0x00,0x00,0xb2,0x02,0x00,0x00, -0x73,0x00,0x04,0x00,0x4e,0x00,0x00,0x00,0x36,0x01,0x00,0x00, -0x28,0x01,0x00,0x00,0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00, -0x37,0x01,0x00,0x00,0x62,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x33,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0x37,0x01,0x00,0x00, -0x36,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x3e,0x01,0x00,0x00,0x6a,0x00,0x00,0x00,0xb3,0x02,0x00,0x00, -0x73,0x00,0x04,0x00,0x4e,0x00,0x00,0x00,0x40,0x01,0x00,0x00, -0x2c,0x01,0x00,0x00,0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00, -0x41,0x01,0x00,0x00,0x62,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x3e,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0x41,0x01,0x00,0x00, -0x40,0x01,0x00,0x00,0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, -0x4b,0x01,0x00,0x00,0x56,0x00,0x00,0x00,0x73,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x4c,0x01,0x00,0x00,0x4b,0x01,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00,0x4f,0x01,0x00,0x00, -0x5b,0x00,0x00,0x00,0x73,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x50,0x01,0x00,0x00,0x4f,0x01,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x57,0x01,0x00,0x00,0x6a,0x00,0x00,0x00, -0xb4,0x02,0x00,0x00,0x73,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, -0x5a,0x01,0x00,0x00,0x4c,0x01,0x00,0x00,0x41,0x00,0x06,0x00, -0x55,0x00,0x00,0x00,0x5b,0x01,0x00,0x00,0x62,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x57,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0x5b,0x01,0x00,0x00,0x5a,0x01,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x62,0x01,0x00,0x00,0x6a,0x00,0x00,0x00, -0xb5,0x02,0x00,0x00,0x73,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, -0x64,0x01,0x00,0x00,0x50,0x01,0x00,0x00,0x41,0x00,0x06,0x00, -0x55,0x00,0x00,0x00,0x65,0x01,0x00,0x00,0x62,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x62,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0x65,0x01,0x00,0x00,0x64,0x01,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4e,0x00,0x00,0x00,0x6f,0x01,0x00,0x00,0x56,0x00,0x00,0x00, -0x73,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x70,0x01,0x00,0x00, -0x6f,0x01,0x00,0x00,0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, -0x73,0x01,0x00,0x00,0x5b,0x00,0x00,0x00,0x73,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x74,0x01,0x00,0x00,0x73,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x7b,0x01,0x00,0x00, -0x6a,0x00,0x00,0x00,0xb6,0x02,0x00,0x00,0x73,0x00,0x04,0x00, -0x4e,0x00,0x00,0x00,0x7e,0x01,0x00,0x00,0x70,0x01,0x00,0x00, -0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00,0x7f,0x01,0x00,0x00, -0x62,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x7b,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0x7f,0x01,0x00,0x00,0x7e,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x86,0x01,0x00,0x00, -0x6a,0x00,0x00,0x00,0xb7,0x02,0x00,0x00,0x73,0x00,0x04,0x00, -0x4e,0x00,0x00,0x00,0x88,0x01,0x00,0x00,0x74,0x01,0x00,0x00, -0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00,0x89,0x01,0x00,0x00, -0x62,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x86,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0x89,0x01,0x00,0x00,0x88,0x01,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00,0x93,0x01,0x00,0x00, -0x56,0x00,0x00,0x00,0x73,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x94,0x01,0x00,0x00,0x93,0x01,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4e,0x00,0x00,0x00,0x97,0x01,0x00,0x00,0x5b,0x00,0x00,0x00, -0x73,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x98,0x01,0x00,0x00, -0x97,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x9f,0x01,0x00,0x00,0x6a,0x00,0x00,0x00,0xb8,0x02,0x00,0x00, -0x73,0x00,0x04,0x00,0x4e,0x00,0x00,0x00,0xa2,0x01,0x00,0x00, -0x94,0x01,0x00,0x00,0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00, -0xa3,0x01,0x00,0x00,0x62,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x9f,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0xa3,0x01,0x00,0x00, -0xa2,0x01,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0xaa,0x01,0x00,0x00,0x6a,0x00,0x00,0x00,0xb9,0x02,0x00,0x00, -0x73,0x00,0x04,0x00,0x4e,0x00,0x00,0x00,0xac,0x01,0x00,0x00, -0x98,0x01,0x00,0x00,0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00, -0xad,0x01,0x00,0x00,0x62,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0xaa,0x01,0x00,0x00,0x3e,0x00,0x03,0x00,0xad,0x01,0x00,0x00, -0xac,0x01,0x00,0x00,0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, -0xb7,0x01,0x00,0x00,0x56,0x00,0x00,0x00,0x73,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xb8,0x01,0x00,0x00,0xb7,0x01,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00,0xbb,0x01,0x00,0x00, -0x5b,0x00,0x00,0x00,0x73,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0xbc,0x01,0x00,0x00,0xbb,0x01,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xc3,0x01,0x00,0x00,0x6a,0x00,0x00,0x00, -0xba,0x02,0x00,0x00,0x73,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, -0xc6,0x01,0x00,0x00,0xb8,0x01,0x00,0x00,0x41,0x00,0x06,0x00, -0x55,0x00,0x00,0x00,0xc7,0x01,0x00,0x00,0x62,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xc3,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0xc7,0x01,0x00,0x00,0xc6,0x01,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xce,0x01,0x00,0x00,0x6a,0x00,0x00,0x00, -0xbb,0x02,0x00,0x00,0x73,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, -0xd0,0x01,0x00,0x00,0xbc,0x01,0x00,0x00,0x41,0x00,0x06,0x00, -0x55,0x00,0x00,0x00,0xd1,0x01,0x00,0x00,0x62,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xce,0x01,0x00,0x00,0x3e,0x00,0x03,0x00, -0xd1,0x01,0x00,0x00,0xd0,0x01,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4e,0x00,0x00,0x00,0xdb,0x01,0x00,0x00,0x56,0x00,0x00,0x00, -0x73,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0xdc,0x01,0x00,0x00, -0xdb,0x01,0x00,0x00,0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, -0xdf,0x01,0x00,0x00,0x5b,0x00,0x00,0x00,0x73,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0xe0,0x01,0x00,0x00,0xdf,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xe7,0x01,0x00,0x00, -0x6a,0x00,0x00,0x00,0xbc,0x02,0x00,0x00,0x73,0x00,0x04,0x00, -0x4e,0x00,0x00,0x00,0xea,0x01,0x00,0x00,0xdc,0x01,0x00,0x00, -0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00,0xeb,0x01,0x00,0x00, -0x62,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xe7,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0xeb,0x01,0x00,0x00,0xea,0x01,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xf2,0x01,0x00,0x00, -0x6a,0x00,0x00,0x00,0xbd,0x02,0x00,0x00,0x73,0x00,0x04,0x00, -0x4e,0x00,0x00,0x00,0xf4,0x01,0x00,0x00,0xe0,0x01,0x00,0x00, -0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00,0xf5,0x01,0x00,0x00, -0x62,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0xf2,0x01,0x00,0x00, -0x3e,0x00,0x03,0x00,0xf5,0x01,0x00,0x00,0xf4,0x01,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00,0xff,0x01,0x00,0x00, -0x56,0x00,0x00,0x00,0x73,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x00,0x02,0x00,0x00,0xff,0x01,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4e,0x00,0x00,0x00,0x03,0x02,0x00,0x00,0x5b,0x00,0x00,0x00, -0x73,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x04,0x02,0x00,0x00, -0x03,0x02,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x0b,0x02,0x00,0x00,0x6a,0x00,0x00,0x00,0xbe,0x02,0x00,0x00, -0x73,0x00,0x04,0x00,0x4e,0x00,0x00,0x00,0x0e,0x02,0x00,0x00, -0x00,0x02,0x00,0x00,0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00, -0x0f,0x02,0x00,0x00,0x62,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x0b,0x02,0x00,0x00,0x3e,0x00,0x03,0x00,0x0f,0x02,0x00,0x00, -0x0e,0x02,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x16,0x02,0x00,0x00,0x6a,0x00,0x00,0x00,0xbf,0x02,0x00,0x00, -0x73,0x00,0x04,0x00,0x4e,0x00,0x00,0x00,0x18,0x02,0x00,0x00, -0x04,0x02,0x00,0x00,0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00, -0x19,0x02,0x00,0x00,0x62,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x16,0x02,0x00,0x00,0x3e,0x00,0x03,0x00,0x19,0x02,0x00,0x00, -0x18,0x02,0x00,0x00,0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, -0x23,0x02,0x00,0x00,0x56,0x00,0x00,0x00,0x73,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x24,0x02,0x00,0x00,0x23,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00,0x27,0x02,0x00,0x00, -0x5b,0x00,0x00,0x00,0x73,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x28,0x02,0x00,0x00,0x27,0x02,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x2f,0x02,0x00,0x00,0x6a,0x00,0x00,0x00, -0xc0,0x02,0x00,0x00,0x73,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, -0x32,0x02,0x00,0x00,0x24,0x02,0x00,0x00,0x41,0x00,0x06,0x00, -0x55,0x00,0x00,0x00,0x33,0x02,0x00,0x00,0x62,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x2f,0x02,0x00,0x00,0x3e,0x00,0x03,0x00, -0x33,0x02,0x00,0x00,0x32,0x02,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x3a,0x02,0x00,0x00,0x6a,0x00,0x00,0x00, -0xc1,0x02,0x00,0x00,0x73,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, -0x3c,0x02,0x00,0x00,0x28,0x02,0x00,0x00,0x41,0x00,0x06,0x00, -0x55,0x00,0x00,0x00,0x3d,0x02,0x00,0x00,0x62,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x3a,0x02,0x00,0x00,0x3e,0x00,0x03,0x00, -0x3d,0x02,0x00,0x00,0x3c,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4e,0x00,0x00,0x00,0x47,0x02,0x00,0x00,0x56,0x00,0x00,0x00, -0x73,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x48,0x02,0x00,0x00, -0x47,0x02,0x00,0x00,0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, -0x4b,0x02,0x00,0x00,0x5b,0x00,0x00,0x00,0x73,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x4c,0x02,0x00,0x00,0x4b,0x02,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x53,0x02,0x00,0x00, -0x6a,0x00,0x00,0x00,0xc2,0x02,0x00,0x00,0x73,0x00,0x04,0x00, -0x4e,0x00,0x00,0x00,0x56,0x02,0x00,0x00,0x48,0x02,0x00,0x00, -0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00,0x57,0x02,0x00,0x00, -0x62,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x53,0x02,0x00,0x00, -0x3e,0x00,0x03,0x00,0x57,0x02,0x00,0x00,0x56,0x02,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x5e,0x02,0x00,0x00, -0x6a,0x00,0x00,0x00,0xc3,0x02,0x00,0x00,0x73,0x00,0x04,0x00, -0x4e,0x00,0x00,0x00,0x60,0x02,0x00,0x00,0x4c,0x02,0x00,0x00, -0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00,0x61,0x02,0x00,0x00, -0x62,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x5e,0x02,0x00,0x00, -0x3e,0x00,0x03,0x00,0x61,0x02,0x00,0x00,0x60,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00,0x6b,0x02,0x00,0x00, -0x56,0x00,0x00,0x00,0x73,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x6c,0x02,0x00,0x00,0x6b,0x02,0x00,0x00,0x3d,0x00,0x04,0x00, -0x4e,0x00,0x00,0x00,0x6f,0x02,0x00,0x00,0x5b,0x00,0x00,0x00, -0x73,0x00,0x04,0x00,0x4a,0x00,0x00,0x00,0x70,0x02,0x00,0x00, -0x6f,0x02,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x77,0x02,0x00,0x00,0x6a,0x00,0x00,0x00,0xc4,0x02,0x00,0x00, -0x73,0x00,0x04,0x00,0x4e,0x00,0x00,0x00,0x7a,0x02,0x00,0x00, -0x6c,0x02,0x00,0x00,0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00, -0x7b,0x02,0x00,0x00,0x62,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x77,0x02,0x00,0x00,0x3e,0x00,0x03,0x00,0x7b,0x02,0x00,0x00, -0x7a,0x02,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x82,0x02,0x00,0x00,0x6a,0x00,0x00,0x00,0xc5,0x02,0x00,0x00, -0x73,0x00,0x04,0x00,0x4e,0x00,0x00,0x00,0x84,0x02,0x00,0x00, -0x70,0x02,0x00,0x00,0x41,0x00,0x06,0x00,0x55,0x00,0x00,0x00, -0x85,0x02,0x00,0x00,0x62,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x82,0x02,0x00,0x00,0x3e,0x00,0x03,0x00,0x85,0x02,0x00,0x00, -0x84,0x02,0x00,0x00,0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, -0x8f,0x02,0x00,0x00,0x56,0x00,0x00,0x00,0x73,0x00,0x04,0x00, -0x4a,0x00,0x00,0x00,0x90,0x02,0x00,0x00,0x8f,0x02,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00,0x93,0x02,0x00,0x00, -0x5b,0x00,0x00,0x00,0x73,0x00,0x04,0x00,0x4a,0x00,0x00,0x00, -0x94,0x02,0x00,0x00,0x93,0x02,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x9b,0x02,0x00,0x00,0x6a,0x00,0x00,0x00, -0xc6,0x02,0x00,0x00,0x73,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, -0x9e,0x02,0x00,0x00,0x90,0x02,0x00,0x00,0x41,0x00,0x06,0x00, -0x55,0x00,0x00,0x00,0x9f,0x02,0x00,0x00,0x62,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x9b,0x02,0x00,0x00,0x3e,0x00,0x03,0x00, -0x9f,0x02,0x00,0x00,0x9e,0x02,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xa6,0x02,0x00,0x00,0x6a,0x00,0x00,0x00, -0xc7,0x02,0x00,0x00,0x73,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, -0xa8,0x02,0x00,0x00,0x94,0x02,0x00,0x00,0x41,0x00,0x06,0x00, -0x55,0x00,0x00,0x00,0xa9,0x02,0x00,0x00,0x62,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0xa6,0x02,0x00,0x00,0x3e,0x00,0x03,0x00, -0xa9,0x02,0x00,0x00,0xa8,0x02,0x00,0x00,0xf9,0x00,0x02,0x00, -0x86,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x86,0x00,0x00,0x00, -0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, +0x17,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x3d,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x39,0x00,0x00,0x00, +0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x3f,0x00,0x00,0x00, +0x3d,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x41,0x00,0x06,0x00, +0x53,0x00,0x00,0x00,0x54,0x00,0x00,0x00,0x50,0x00,0x00,0x00, +0x2d,0x00,0x00,0x00,0x3f,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, +0x4c,0x00,0x00,0x00,0x55,0x00,0x00,0x00,0x54,0x00,0x00,0x00, +0x73,0x00,0x04,0x00,0x48,0x00,0x00,0x00,0x56,0x00,0x00,0x00, +0x55,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x58,0x00,0x00,0x00,0x3f,0x00,0x00,0x00,0x17,0x00,0x00,0x00, +0x41,0x00,0x06,0x00,0x53,0x00,0x00,0x00,0x59,0x00,0x00,0x00, +0x50,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x58,0x00,0x00,0x00, +0x3d,0x00,0x04,0x00,0x4c,0x00,0x00,0x00,0x5a,0x00,0x00,0x00, +0x59,0x00,0x00,0x00,0x73,0x00,0x04,0x00,0x48,0x00,0x00,0x00, +0x5b,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x41,0x00,0x05,0x00, +0x18,0x00,0x00,0x00,0x63,0x00,0x00,0x00,0x16,0x00,0x00,0x00, +0x62,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x64,0x00,0x00,0x00,0x63,0x00,0x00,0x00,0x84,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0x22,0x00,0x00,0x00, +0x64,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x68,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, +0x73,0x00,0x04,0x00,0x4c,0x00,0x00,0x00,0x6f,0x00,0x00,0x00, +0x56,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x53,0x00,0x00,0x00, +0x70,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, +0x68,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0x70,0x00,0x00,0x00, +0x6f,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x7a,0x00,0x00,0x00,0x68,0x00,0x00,0x00,0x17,0x00,0x00,0x00, +0x73,0x00,0x04,0x00,0x4c,0x00,0x00,0x00,0x7e,0x00,0x00,0x00, +0x5b,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x53,0x00,0x00,0x00, +0x7f,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, +0x7a,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0x7f,0x00,0x00,0x00, +0x7e,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x84,0x00,0x00,0x00, +0xf8,0x00,0x02,0x00,0x84,0x00,0x00,0x00,0xfd,0x00,0x01,0x00, +0x38,0x00,0x01,0x00, }; -const uint64_t dequant_f16_fp32_len = 5420; +const uint64_t dequant_f16_fp32_len = 1816; unsigned char dequant_q2_K_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, @@ -15313,7 +14792,7 @@ const uint64_t gelu_f32_len = 1408; unsigned char get_rows_f16_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0x7a,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, +0x77,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, 0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x09,0x00,0x00,0x00, 0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00,0x0b,0x00,0x06,0x00, 0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c,0x2e,0x73,0x74,0x64, @@ -15321,7 +14800,7 @@ unsigned char get_rows_f16_data[] = { 0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x0f,0x00,0x0a,0x00, 0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x6d,0x61,0x69,0x6e, 0x00,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, -0x2d,0x00,0x00,0x00,0x57,0x00,0x00,0x00,0x65,0x00,0x00,0x00, +0x2d,0x00,0x00,0x00,0x55,0x00,0x00,0x00,0x63,0x00,0x00,0x00, 0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00,0x11,0x00,0x00,0x00, 0x00,0x02,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00, 0x47,0x00,0x04,0x00,0x0b,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, @@ -15341,22 +14820,184 @@ unsigned char get_rows_f16_data[] = { 0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x2d,0x00,0x00,0x00, 0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, 0x2d,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x54,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x55,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x52,0x00,0x00,0x00,0x06,0x00,0x00,0x00, +0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x53,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x55,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x55,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x57,0x00,0x00,0x00, +0x53,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x53,0x00,0x00,0x00, +0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x55,0x00,0x00,0x00, 0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x57,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x62,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x63,0x00,0x00,0x00, +0x55,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x60,0x00,0x00,0x00,0x06,0x00,0x00,0x00, +0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x61,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x63,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x63,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x65,0x00,0x00,0x00, +0x61,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x61,0x00,0x00,0x00, +0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x63,0x00,0x00,0x00, 0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x65,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x02,0x00,0x00,0x00, +0x63,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x02,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x74,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, +0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00,0x02,0x00,0x00,0x00, +0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00, +0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x20,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x17,0x00,0x04,0x00,0x09,0x00,0x00,0x00, +0x06,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x0a,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x09,0x00,0x00,0x00, +0x3b,0x00,0x04,0x00,0x0a,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, +0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x0d,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x06,0x00,0x00,0x00, +0x15,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00, +0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, +0x12,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, +0x06,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x01,0x00,0x00,0x00, +0x16,0x00,0x03,0x00,0x1c,0x00,0x00,0x00,0x20,0x00,0x00,0x00, +0x1e,0x00,0x06,0x00,0x1d,0x00,0x00,0x00,0x06,0x00,0x00,0x00, +0x06,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, +0x20,0x00,0x04,0x00,0x1e,0x00,0x00,0x00,0x09,0x00,0x00,0x00, +0x1d,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x1e,0x00,0x00,0x00, +0x1f,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, +0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00, +0x20,0x00,0x04,0x00,0x21,0x00,0x00,0x00,0x09,0x00,0x00,0x00, +0x06,0x00,0x00,0x00,0x14,0x00,0x02,0x00,0x24,0x00,0x00,0x00, +0x1d,0x00,0x03,0x00,0x2a,0x00,0x00,0x00,0x10,0x00,0x00,0x00, +0x1e,0x00,0x03,0x00,0x2b,0x00,0x00,0x00,0x2a,0x00,0x00,0x00, +0x20,0x00,0x04,0x00,0x2c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x2b,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x2c,0x00,0x00,0x00, +0x2d,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, +0x10,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x20,0x00,0x04,0x00,0x30,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x10,0x00,0x00,0x00,0x16,0x00,0x03,0x00,0x4e,0x00,0x00,0x00, +0x10,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x52,0x00,0x00,0x00, +0x4e,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x53,0x00,0x00,0x00, +0x52,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x54,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x53,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, +0x54,0x00,0x00,0x00,0x55,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x20,0x00,0x04,0x00,0x58,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x4e,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x60,0x00,0x00,0x00, +0x4e,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x61,0x00,0x00,0x00, +0x60,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x62,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x61,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, +0x62,0x00,0x00,0x00,0x63,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x73,0x00,0x00,0x00, +0x00,0x02,0x00,0x00,0x2c,0x00,0x06,0x00,0x09,0x00,0x00,0x00, +0x74,0x00,0x00,0x00,0x73,0x00,0x00,0x00,0x16,0x00,0x00,0x00, +0x16,0x00,0x00,0x00,0x36,0x00,0x05,0x00,0x02,0x00,0x00,0x00, +0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00, +0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, +0x75,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xfb,0x00,0x03,0x00, +0x0c,0x00,0x00,0x00,0x76,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, +0x76,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x0d,0x00,0x00,0x00, +0x0e,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, +0x0e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00, +0x11,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x84,0x00,0x05,0x00, +0x10,0x00,0x00,0x00,0x13,0x00,0x00,0x00,0x11,0x00,0x00,0x00, +0x12,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x14,0x00,0x00,0x00,0x13,0x00,0x00,0x00,0x41,0x00,0x05,0x00, +0x0d,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, +0x16,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x18,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, +0x10,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x18,0x00,0x00,0x00, +0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, +0x19,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x21,0x00,0x00,0x00, +0x22,0x00,0x00,0x00,0x1f,0x00,0x00,0x00,0x20,0x00,0x00,0x00, +0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x23,0x00,0x00,0x00, +0x22,0x00,0x00,0x00,0xae,0x00,0x05,0x00,0x24,0x00,0x00,0x00, +0x25,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x23,0x00,0x00,0x00, +0xf7,0x00,0x03,0x00,0x27,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0xfa,0x00,0x04,0x00,0x25,0x00,0x00,0x00,0x26,0x00,0x00,0x00, +0x27,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x26,0x00,0x00,0x00, +0xf9,0x00,0x02,0x00,0x75,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, +0x27,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x30,0x00,0x00,0x00, +0x31,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0x1a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x10,0x00,0x00,0x00, +0x32,0x00,0x00,0x00,0x31,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, +0x06,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0x32,0x00,0x00,0x00, +0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x38,0x00,0x00,0x00, +0x33,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x80,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x38,0x00,0x00,0x00, +0x14,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x3f,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x23,0x00,0x00,0x00, +0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x41,0x00,0x00,0x00, +0x3f,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x86,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, +0x16,0x00,0x00,0x00,0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x47,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x16,0x00,0x00,0x00, +0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x48,0x00,0x00,0x00, +0x47,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x89,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x4c,0x00,0x00,0x00,0x41,0x00,0x00,0x00, +0x16,0x00,0x00,0x00,0x82,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x4d,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0x4c,0x00,0x00,0x00, +0x41,0x00,0x06,0x00,0x58,0x00,0x00,0x00,0x59,0x00,0x00,0x00, +0x55,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x44,0x00,0x00,0x00, +0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00,0x5a,0x00,0x00,0x00, +0x59,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x5c,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x16,0x00,0x00,0x00, +0x41,0x00,0x06,0x00,0x58,0x00,0x00,0x00,0x5d,0x00,0x00,0x00, +0x55,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x5c,0x00,0x00,0x00, +0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00,0x5e,0x00,0x00,0x00, +0x5d,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x66,0x00,0x00,0x00,0x4d,0x00,0x00,0x00,0x48,0x00,0x00,0x00, +0x41,0x00,0x06,0x00,0x58,0x00,0x00,0x00,0x6b,0x00,0x00,0x00, +0x63,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x66,0x00,0x00,0x00, +0x3e,0x00,0x03,0x00,0x6b,0x00,0x00,0x00,0x5a,0x00,0x00,0x00, +0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x6f,0x00,0x00,0x00, +0x66,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x41,0x00,0x06,0x00, +0x58,0x00,0x00,0x00,0x72,0x00,0x00,0x00,0x63,0x00,0x00,0x00, +0x2e,0x00,0x00,0x00,0x6f,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, +0x72,0x00,0x00,0x00,0x5e,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, +0x75,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x75,0x00,0x00,0x00, +0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, +}; +const uint64_t get_rows_f16_len = 1892; + +unsigned char get_rows_f16_f32_data[] = { +0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, +0x7a,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, +0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x09,0x00,0x00,0x00, +0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00,0x0b,0x00,0x06,0x00, +0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c,0x2e,0x73,0x74,0x64, +0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00,0x0e,0x00,0x03,0x00, +0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x0f,0x00,0x0a,0x00, +0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x6d,0x61,0x69,0x6e, +0x00,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, +0x2d,0x00,0x00,0x00,0x55,0x00,0x00,0x00,0x63,0x00,0x00,0x00, +0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00,0x11,0x00,0x00,0x00, +0x00,0x02,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x0b,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, +0x1c,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00,0x01,0x00,0x00,0x00, +0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x05,0x00, +0x1d,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x23,0x00,0x00,0x00, +0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00, +0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x47,0x00,0x03,0x00,0x1d,0x00,0x00,0x00,0x02,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x2a,0x00,0x00,0x00,0x06,0x00,0x00,0x00, +0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x2b,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00, +0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x2b,0x00,0x00,0x00, +0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x2d,0x00,0x00,0x00, +0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, +0x2d,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x01,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x52,0x00,0x00,0x00,0x06,0x00,0x00,0x00, +0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x53,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00, +0x53,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x53,0x00,0x00,0x00, +0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x55,0x00,0x00,0x00, +0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, +0x55,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x60,0x00,0x00,0x00,0x06,0x00,0x00,0x00, +0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x61,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00, +0x61,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x61,0x00,0x00,0x00, +0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x63,0x00,0x00,0x00, +0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, +0x63,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x02,0x00,0x00,0x00, 0x47,0x00,0x04,0x00,0x77,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, 0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00,0x02,0x00,0x00,0x00, 0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00, @@ -15388,198 +15029,28 @@ unsigned char get_rows_f16_data[] = { 0x2d,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, 0x10,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x20,0x00,0x04,0x00,0x30,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x44,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x49,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x16,0x00,0x03,0x00,0x50,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x1d,0x00,0x03,0x00,0x54,0x00,0x00,0x00,0x50,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0x55,0x00,0x00,0x00,0x54,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x56,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x55,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x56,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x5a,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x50,0x00,0x00,0x00, -0x1d,0x00,0x03,0x00,0x62,0x00,0x00,0x00,0x50,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0x63,0x00,0x00,0x00,0x62,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x64,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x63,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x64,0x00,0x00,0x00, -0x65,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x71,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x76,0x00,0x00,0x00, -0x00,0x02,0x00,0x00,0x2c,0x00,0x06,0x00,0x09,0x00,0x00,0x00, -0x77,0x00,0x00,0x00,0x76,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x36,0x00,0x05,0x00,0x02,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, -0x78,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xfb,0x00,0x03,0x00, -0x0c,0x00,0x00,0x00,0x79,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x79,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x0d,0x00,0x00,0x00, -0x0e,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x0f,0x00,0x00,0x00, -0x0e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x10,0x00,0x00,0x00,0x13,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x12,0x00,0x00,0x00,0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x14,0x00,0x00,0x00,0x13,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x0d,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x7c,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x21,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x1f,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0xae,0x00,0x05,0x00,0x24,0x00,0x00,0x00, -0x25,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0x27,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfa,0x00,0x04,0x00,0x25,0x00,0x00,0x00,0x26,0x00,0x00,0x00, -0x27,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x26,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0x78,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x27,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x30,0x00,0x00,0x00, -0x31,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x1a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x32,0x00,0x00,0x00,0x31,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x33,0x00,0x00,0x00,0x32,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x38,0x00,0x00,0x00, -0x33,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x38,0x00,0x00,0x00, -0x14,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x3f,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0x3f,0x00,0x00,0x00,0x14,0x00,0x00,0x00,0x86,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x45,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, -0x44,0x00,0x00,0x00,0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x48,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x44,0x00,0x00,0x00, -0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, -0x48,0x00,0x00,0x00,0x49,0x00,0x00,0x00,0x89,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x4e,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0x44,0x00,0x00,0x00,0x82,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x4f,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0x4e,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x5a,0x00,0x00,0x00,0x5b,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x50,0x00,0x00,0x00,0x5c,0x00,0x00,0x00, -0x5b,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x5e,0x00,0x00,0x00,0x45,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x5a,0x00,0x00,0x00,0x5f,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x5e,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x50,0x00,0x00,0x00,0x60,0x00,0x00,0x00, -0x5f,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x68,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x5a,0x00,0x00,0x00,0x6d,0x00,0x00,0x00, -0x65,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x68,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0x6d,0x00,0x00,0x00,0x5c,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x72,0x00,0x00,0x00, -0x68,0x00,0x00,0x00,0x71,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x5a,0x00,0x00,0x00,0x75,0x00,0x00,0x00,0x65,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x72,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0x75,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0x78,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x78,0x00,0x00,0x00, -0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, -}; -const uint64_t get_rows_f16_len = 1940; - -unsigned char get_rows_f16_f32_data[] = { -0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0x7d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, -0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x09,0x00,0x00,0x00, -0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00,0x0b,0x00,0x06,0x00, -0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c,0x2e,0x73,0x74,0x64, -0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00,0x0e,0x00,0x03,0x00, -0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x0f,0x00,0x0a,0x00, -0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x6d,0x61,0x69,0x6e, -0x00,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, -0x2d,0x00,0x00,0x00,0x57,0x00,0x00,0x00,0x65,0x00,0x00,0x00, -0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x00,0x02,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x0b,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x23,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x1d,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x08,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x1d,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x1d,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x2a,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x2b,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x2b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x2b,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x2d,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x2d,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x54,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x55,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x55,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x55,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x57,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x57,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x62,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00,0x63,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00, -0x63,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00,0x63,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x65,0x00,0x00,0x00, -0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x65,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x7a,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00,0x02,0x00,0x00,0x00, -0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x17,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x0a,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x3b,0x00,0x04,0x00,0x0a,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x0d,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x15,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00, -0x12,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x16,0x00,0x03,0x00,0x1c,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x1e,0x00,0x06,0x00,0x1d,0x00,0x00,0x00,0x06,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x1e,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x1d,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x1e,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x21,0x00,0x00,0x00,0x09,0x00,0x00,0x00, -0x06,0x00,0x00,0x00,0x14,0x00,0x02,0x00,0x24,0x00,0x00,0x00, -0x1d,0x00,0x03,0x00,0x2a,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0x2b,0x00,0x00,0x00,0x2a,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x2c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x2b,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x2c,0x00,0x00,0x00, -0x2d,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x10,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x30,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x44,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x49,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x16,0x00,0x03,0x00,0x50,0x00,0x00,0x00,0x10,0x00,0x00,0x00, -0x1d,0x00,0x03,0x00,0x54,0x00,0x00,0x00,0x50,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0x55,0x00,0x00,0x00,0x54,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x56,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x55,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x56,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x5a,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x50,0x00,0x00,0x00, -0x1d,0x00,0x03,0x00,0x62,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0x63,0x00,0x00,0x00,0x62,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x64,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x63,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x64,0x00,0x00,0x00, -0x65,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x6e,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x73,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x79,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x2c,0x00,0x06,0x00, -0x09,0x00,0x00,0x00,0x7a,0x00,0x00,0x00,0x79,0x00,0x00,0x00, +0x10,0x00,0x00,0x00,0x16,0x00,0x03,0x00,0x4e,0x00,0x00,0x00, +0x10,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x52,0x00,0x00,0x00, +0x4e,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x53,0x00,0x00,0x00, +0x52,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x54,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x53,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, +0x54,0x00,0x00,0x00,0x55,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x20,0x00,0x04,0x00,0x58,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x4e,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x60,0x00,0x00,0x00, +0x1c,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x61,0x00,0x00,0x00, +0x60,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x62,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x61,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, +0x62,0x00,0x00,0x00,0x63,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x20,0x00,0x04,0x00,0x6c,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x1c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x76,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x2c,0x00,0x06,0x00, +0x09,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x76,0x00,0x00,0x00, 0x16,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x36,0x00,0x05,0x00, 0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0x7b,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfb,0x00,0x03,0x00,0x0c,0x00,0x00,0x00,0x7c,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x7c,0x00,0x00,0x00,0x41,0x00,0x05,0x00, +0xf7,0x00,0x03,0x00,0x78,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0xfb,0x00,0x03,0x00,0x0c,0x00,0x00,0x00,0x79,0x00,0x00,0x00, +0xf8,0x00,0x02,0x00,0x79,0x00,0x00,0x00,0x41,0x00,0x05,0x00, 0x0d,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, 0x0c,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, 0x0f,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, @@ -15600,7 +15071,7 @@ unsigned char get_rows_f16_f32_data[] = { 0x23,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x27,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00,0x25,0x00,0x00,0x00, 0x26,0x00,0x00,0x00,0x27,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x26,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x7b,0x00,0x00,0x00, +0x26,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x78,0x00,0x00,0x00, 0xf8,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x41,0x00,0x06,0x00, 0x30,0x00,0x00,0x00,0x31,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, 0x2e,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, @@ -15613,51 +15084,51 @@ unsigned char get_rows_f16_f32_data[] = { 0x06,0x00,0x00,0x00,0x3f,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, 0x23,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, 0x41,0x00,0x00,0x00,0x3f,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x45,0x00,0x00,0x00, -0x3a,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x89,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, -0x44,0x00,0x00,0x00,0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x49,0x00,0x00,0x00, -0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x4e,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x82,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0x4e,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x5a,0x00,0x00,0x00, -0x5b,0x00,0x00,0x00,0x57,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x45,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x50,0x00,0x00,0x00, -0x5c,0x00,0x00,0x00,0x5b,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x5e,0x00,0x00,0x00,0x45,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x5a,0x00,0x00,0x00, -0x5f,0x00,0x00,0x00,0x57,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x5e,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x50,0x00,0x00,0x00, -0x60,0x00,0x00,0x00,0x5f,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x68,0x00,0x00,0x00,0x4f,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00, -0x6d,0x00,0x00,0x00,0x5c,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x6e,0x00,0x00,0x00,0x6f,0x00,0x00,0x00,0x65,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x68,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0x6f,0x00,0x00,0x00,0x6d,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x74,0x00,0x00,0x00,0x68,0x00,0x00,0x00, -0x73,0x00,0x00,0x00,0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00, -0x77,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x6e,0x00,0x00,0x00,0x78,0x00,0x00,0x00,0x65,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x74,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0x78,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0x7b,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x7b,0x00,0x00,0x00, +0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x44,0x00,0x00,0x00, +0x3a,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x89,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x47,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, +0x16,0x00,0x00,0x00,0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x48,0x00,0x00,0x00,0x47,0x00,0x00,0x00,0x16,0x00,0x00,0x00, +0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x4c,0x00,0x00,0x00, +0x41,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x82,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x4d,0x00,0x00,0x00,0x41,0x00,0x00,0x00, +0x4c,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x58,0x00,0x00,0x00, +0x59,0x00,0x00,0x00,0x55,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0x44,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, +0x5a,0x00,0x00,0x00,0x59,0x00,0x00,0x00,0x80,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x5c,0x00,0x00,0x00,0x44,0x00,0x00,0x00, +0x16,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x58,0x00,0x00,0x00, +0x5d,0x00,0x00,0x00,0x55,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0x5c,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4e,0x00,0x00,0x00, +0x5e,0x00,0x00,0x00,0x5d,0x00,0x00,0x00,0x80,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x66,0x00,0x00,0x00,0x4d,0x00,0x00,0x00, +0x48,0x00,0x00,0x00,0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00, +0x6b,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x41,0x00,0x06,0x00, +0x6c,0x00,0x00,0x00,0x6d,0x00,0x00,0x00,0x63,0x00,0x00,0x00, +0x2e,0x00,0x00,0x00,0x66,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, +0x6d,0x00,0x00,0x00,0x6b,0x00,0x00,0x00,0x80,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x71,0x00,0x00,0x00,0x66,0x00,0x00,0x00, +0x16,0x00,0x00,0x00,0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00, +0x74,0x00,0x00,0x00,0x5e,0x00,0x00,0x00,0x41,0x00,0x06,0x00, +0x6c,0x00,0x00,0x00,0x75,0x00,0x00,0x00,0x63,0x00,0x00,0x00, +0x2e,0x00,0x00,0x00,0x71,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, +0x75,0x00,0x00,0x00,0x74,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, +0x78,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x78,0x00,0x00,0x00, 0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, }; -const uint64_t get_rows_f16_f32_len = 1988; +const uint64_t get_rows_f16_f32_len = 1940; unsigned char get_rows_f16_f32_fp32_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0x7d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, +0x7a,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, 0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00, 0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c, 0x2e,0x73,0x74,0x64,0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00, 0x0e,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00, 0x0f,0x00,0x0a,0x00,0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00, 0x6d,0x61,0x69,0x6e,0x00,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x57,0x00,0x00,0x00, -0x67,0x00,0x00,0x00,0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00, +0x1f,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x55,0x00,0x00,0x00, +0x65,0x00,0x00,0x00,0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00, 0x11,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x01,0x00,0x00,0x00, 0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x0b,0x00,0x00,0x00, 0x0b,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x48,0x00,0x05,0x00, @@ -15676,23 +15147,23 @@ unsigned char get_rows_f16_f32_fp32_data[] = { 0x2b,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, 0x2d,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x47,0x00,0x04,0x00,0x2d,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x54,0x00,0x00,0x00, +0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x52,0x00,0x00,0x00, 0x06,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x55,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x55,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x53,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00, +0x48,0x00,0x05,0x00,0x53,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x55,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x57,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x57,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x64,0x00,0x00,0x00, +0x53,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, +0x55,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x55,0x00,0x00,0x00,0x21,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x62,0x00,0x00,0x00, 0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x65,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x65,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x63,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00, +0x48,0x00,0x05,0x00,0x63,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x65,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x67,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x67,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x7a,0x00,0x00,0x00, +0x63,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, +0x65,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x65,0x00,0x00,0x00,0x21,0x00,0x00,0x00, +0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x77,0x00,0x00,0x00, 0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00, 0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00, 0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00, @@ -15723,32 +15194,28 @@ unsigned char get_rows_f16_f32_fp32_data[] = { 0x2c,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, 0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x30,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x49,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x16,0x00,0x03,0x00,0x53,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x54,0x00,0x00,0x00, -0x53,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x55,0x00,0x00,0x00, -0x54,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x56,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x55,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x56,0x00,0x00,0x00,0x57,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x5a,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x53,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x64,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x65,0x00,0x00,0x00, -0x64,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x66,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x66,0x00,0x00,0x00,0x67,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x6f,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x1c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x74,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x79,0x00,0x00,0x00,0x00,0x02,0x00,0x00, -0x2c,0x00,0x06,0x00,0x09,0x00,0x00,0x00,0x7a,0x00,0x00,0x00, -0x79,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x16,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x16,0x00,0x03,0x00, +0x51,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, +0x52,0x00,0x00,0x00,0x51,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, +0x53,0x00,0x00,0x00,0x52,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x54,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x53,0x00,0x00,0x00, +0x3b,0x00,0x04,0x00,0x54,0x00,0x00,0x00,0x55,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x58,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x51,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, +0x62,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, +0x63,0x00,0x00,0x00,0x62,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x64,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x63,0x00,0x00,0x00, +0x3b,0x00,0x04,0x00,0x64,0x00,0x00,0x00,0x65,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x6d,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, +0x06,0x00,0x00,0x00,0x76,0x00,0x00,0x00,0x00,0x02,0x00,0x00, +0x2c,0x00,0x06,0x00,0x09,0x00,0x00,0x00,0x77,0x00,0x00,0x00, +0x76,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x16,0x00,0x00,0x00, 0x36,0x00,0x05,0x00,0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x05,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x7b,0x00,0x00,0x00, +0x05,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x78,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0xfb,0x00,0x03,0x00,0x0c,0x00,0x00,0x00, -0x7c,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x7c,0x00,0x00,0x00, +0x79,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x79,0x00,0x00,0x00, 0x41,0x00,0x05,0x00,0x0d,0x00,0x00,0x00,0x0e,0x00,0x00,0x00, 0x0b,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, 0x06,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x0e,0x00,0x00,0x00, @@ -15770,7 +15237,7 @@ unsigned char get_rows_f16_f32_fp32_data[] = { 0x27,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00, 0x25,0x00,0x00,0x00,0x26,0x00,0x00,0x00,0x27,0x00,0x00,0x00, 0xf8,0x00,0x02,0x00,0x26,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0x7b,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x27,0x00,0x00,0x00, +0x78,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x27,0x00,0x00,0x00, 0x41,0x00,0x06,0x00,0x30,0x00,0x00,0x00,0x31,0x00,0x00,0x00, 0x2d,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, 0x3d,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x32,0x00,0x00,0x00, @@ -15783,51 +15250,51 @@ unsigned char get_rows_f16_f32_fp32_data[] = { 0x1a,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x80,0x00,0x05,0x00, 0x06,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0x3f,0x00,0x00,0x00, 0x14,0x00,0x00,0x00,0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x45,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x44,0x00,0x00,0x00, -0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x48,0x00,0x00,0x00, -0x3a,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x86,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x4a,0x00,0x00,0x00,0x48,0x00,0x00,0x00, -0x49,0x00,0x00,0x00,0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x4e,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0x44,0x00,0x00,0x00, -0x82,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x4f,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x4e,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x5a,0x00,0x00,0x00,0x5b,0x00,0x00,0x00,0x57,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x45,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x53,0x00,0x00,0x00,0x5c,0x00,0x00,0x00,0x5b,0x00,0x00,0x00, -0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00,0x5d,0x00,0x00,0x00, -0x5c,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x5f,0x00,0x00,0x00,0x45,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x5a,0x00,0x00,0x00,0x60,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x5f,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x61,0x00,0x00,0x00, -0x60,0x00,0x00,0x00,0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00, -0x62,0x00,0x00,0x00,0x61,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x6a,0x00,0x00,0x00,0x4f,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x6f,0x00,0x00,0x00, -0x70,0x00,0x00,0x00,0x67,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x6a,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0x70,0x00,0x00,0x00, -0x5d,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x75,0x00,0x00,0x00,0x6a,0x00,0x00,0x00,0x74,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x6f,0x00,0x00,0x00,0x78,0x00,0x00,0x00, -0x67,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x75,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0x78,0x00,0x00,0x00,0x62,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0x7b,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x7b,0x00,0x00,0x00,0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, +0x44,0x00,0x00,0x00,0x3a,0x00,0x00,0x00,0x16,0x00,0x00,0x00, +0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x47,0x00,0x00,0x00, +0x3a,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x86,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x47,0x00,0x00,0x00, +0x16,0x00,0x00,0x00,0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x4c,0x00,0x00,0x00,0x41,0x00,0x00,0x00,0x16,0x00,0x00,0x00, +0x82,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x4d,0x00,0x00,0x00, +0x41,0x00,0x00,0x00,0x4c,0x00,0x00,0x00,0x41,0x00,0x06,0x00, +0x58,0x00,0x00,0x00,0x59,0x00,0x00,0x00,0x55,0x00,0x00,0x00, +0x2e,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, +0x51,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x59,0x00,0x00,0x00, +0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00,0x5b,0x00,0x00,0x00, +0x5a,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x5d,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x16,0x00,0x00,0x00, +0x41,0x00,0x06,0x00,0x58,0x00,0x00,0x00,0x5e,0x00,0x00,0x00, +0x55,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x5d,0x00,0x00,0x00, +0x3d,0x00,0x04,0x00,0x51,0x00,0x00,0x00,0x5f,0x00,0x00,0x00, +0x5e,0x00,0x00,0x00,0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00, +0x60,0x00,0x00,0x00,0x5f,0x00,0x00,0x00,0x80,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x68,0x00,0x00,0x00,0x4d,0x00,0x00,0x00, +0x48,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x6d,0x00,0x00,0x00, +0x6e,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0x68,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0x6e,0x00,0x00,0x00, +0x5b,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x72,0x00,0x00,0x00,0x68,0x00,0x00,0x00,0x16,0x00,0x00,0x00, +0x41,0x00,0x06,0x00,0x6d,0x00,0x00,0x00,0x75,0x00,0x00,0x00, +0x65,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x72,0x00,0x00,0x00, +0x3e,0x00,0x03,0x00,0x75,0x00,0x00,0x00,0x60,0x00,0x00,0x00, +0xf9,0x00,0x02,0x00,0x78,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, +0x78,0x00,0x00,0x00,0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, }; -const uint64_t get_rows_f16_f32_fp32_len = 1980; +const uint64_t get_rows_f16_f32_fp32_len = 1932; unsigned char get_rows_f16_fp32_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0x7e,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, +0x7b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, 0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00, 0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c, 0x2e,0x73,0x74,0x64,0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00, 0x0e,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00, 0x0f,0x00,0x0a,0x00,0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00, 0x6d,0x61,0x69,0x6e,0x00,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x57,0x00,0x00,0x00, -0x67,0x00,0x00,0x00,0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00, +0x1f,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x55,0x00,0x00,0x00, +0x65,0x00,0x00,0x00,0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00, 0x11,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x01,0x00,0x00,0x00, 0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x0b,0x00,0x00,0x00, 0x0b,0x00,0x00,0x00,0x1c,0x00,0x00,0x00,0x48,0x00,0x05,0x00, @@ -15846,23 +15313,23 @@ unsigned char get_rows_f16_fp32_data[] = { 0x2b,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, 0x2d,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x47,0x00,0x04,0x00,0x2d,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x54,0x00,0x00,0x00, +0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x52,0x00,0x00,0x00, 0x06,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x55,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x55,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x53,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00, +0x48,0x00,0x05,0x00,0x53,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x55,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x57,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x57,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x64,0x00,0x00,0x00, +0x53,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, +0x55,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x55,0x00,0x00,0x00,0x21,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x62,0x00,0x00,0x00, 0x06,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x48,0x00,0x04,0x00, -0x65,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00, -0x48,0x00,0x05,0x00,0x65,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x63,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x19,0x00,0x00,0x00, +0x48,0x00,0x05,0x00,0x63,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x03,0x00, -0x65,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x67,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x67,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x7b,0x00,0x00,0x00, +0x63,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, +0x65,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x65,0x00,0x00,0x00,0x21,0x00,0x00,0x00, +0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x78,0x00,0x00,0x00, 0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00,0x13,0x00,0x02,0x00, 0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00,0x03,0x00,0x00,0x00, 0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00,0x06,0x00,0x00,0x00, @@ -15893,31 +15360,27 @@ unsigned char get_rows_f16_fp32_data[] = { 0x2c,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, 0x2b,0x00,0x04,0x00,0x10,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x30,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x20,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x49,0x00,0x00,0x00, -0x02,0x00,0x00,0x00,0x16,0x00,0x03,0x00,0x53,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x54,0x00,0x00,0x00, -0x53,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x55,0x00,0x00,0x00, -0x54,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x56,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x55,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x56,0x00,0x00,0x00,0x57,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x5a,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x53,0x00,0x00,0x00,0x1d,0x00,0x03,0x00,0x64,0x00,0x00,0x00, -0x53,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0x65,0x00,0x00,0x00, -0x64,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x66,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0x66,0x00,0x00,0x00,0x67,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x74,0x00,0x00,0x00, -0x10,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x7a,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x2c,0x00,0x06,0x00, -0x09,0x00,0x00,0x00,0x7b,0x00,0x00,0x00,0x7a,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x16,0x00,0x03,0x00, +0x51,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, +0x52,0x00,0x00,0x00,0x51,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, +0x53,0x00,0x00,0x00,0x52,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x54,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x53,0x00,0x00,0x00, +0x3b,0x00,0x04,0x00,0x54,0x00,0x00,0x00,0x55,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x58,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x51,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, +0x62,0x00,0x00,0x00,0x51,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, +0x63,0x00,0x00,0x00,0x62,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x64,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x63,0x00,0x00,0x00, +0x3b,0x00,0x04,0x00,0x64,0x00,0x00,0x00,0x65,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, +0x77,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x2c,0x00,0x06,0x00, +0x09,0x00,0x00,0x00,0x78,0x00,0x00,0x00,0x77,0x00,0x00,0x00, 0x16,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x36,0x00,0x05,0x00, 0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x05,0x00,0x00,0x00, -0xf7,0x00,0x03,0x00,0x7c,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0xfb,0x00,0x03,0x00,0x0c,0x00,0x00,0x00,0x7d,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x7d,0x00,0x00,0x00,0x41,0x00,0x05,0x00, +0xf7,0x00,0x03,0x00,0x79,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0xfb,0x00,0x03,0x00,0x0c,0x00,0x00,0x00,0x7a,0x00,0x00,0x00, +0xf8,0x00,0x02,0x00,0x7a,0x00,0x00,0x00,0x41,0x00,0x05,0x00, 0x0d,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, 0x0c,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, 0x0f,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x7c,0x00,0x04,0x00, @@ -15938,7 +15401,7 @@ unsigned char get_rows_f16_fp32_data[] = { 0x23,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x27,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00,0x25,0x00,0x00,0x00, 0x26,0x00,0x00,0x00,0x27,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x26,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x7c,0x00,0x00,0x00, +0x26,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x79,0x00,0x00,0x00, 0xf8,0x00,0x02,0x00,0x27,0x00,0x00,0x00,0x41,0x00,0x06,0x00, 0x30,0x00,0x00,0x00,0x31,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, 0x2e,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, @@ -15951,42 +15414,42 @@ unsigned char get_rows_f16_fp32_data[] = { 0x06,0x00,0x00,0x00,0x3f,0x00,0x00,0x00,0x1a,0x00,0x00,0x00, 0x23,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, 0x41,0x00,0x00,0x00,0x3f,0x00,0x00,0x00,0x14,0x00,0x00,0x00, -0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x45,0x00,0x00,0x00, -0x3a,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x89,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, -0x44,0x00,0x00,0x00,0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x4a,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x49,0x00,0x00,0x00, -0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x4e,0x00,0x00,0x00, -0x41,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x82,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x41,0x00,0x00,0x00, -0x4e,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x5a,0x00,0x00,0x00, -0x5b,0x00,0x00,0x00,0x57,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x45,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x53,0x00,0x00,0x00, -0x5c,0x00,0x00,0x00,0x5b,0x00,0x00,0x00,0x73,0x00,0x04,0x00, -0x1c,0x00,0x00,0x00,0x5d,0x00,0x00,0x00,0x5c,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x5f,0x00,0x00,0x00, -0x45,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x41,0x00,0x06,0x00, -0x5a,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0x57,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x5f,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x53,0x00,0x00,0x00,0x61,0x00,0x00,0x00,0x60,0x00,0x00,0x00, -0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00,0x62,0x00,0x00,0x00, -0x61,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x6a,0x00,0x00,0x00,0x4f,0x00,0x00,0x00,0x4a,0x00,0x00,0x00, -0x73,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x6f,0x00,0x00,0x00, -0x5d,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x5a,0x00,0x00,0x00, -0x70,0x00,0x00,0x00,0x67,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x6a,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0x70,0x00,0x00,0x00, -0x6f,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x75,0x00,0x00,0x00,0x6a,0x00,0x00,0x00,0x74,0x00,0x00,0x00, -0x73,0x00,0x04,0x00,0x53,0x00,0x00,0x00,0x78,0x00,0x00,0x00, -0x62,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x5a,0x00,0x00,0x00, -0x79,0x00,0x00,0x00,0x67,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x75,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0x79,0x00,0x00,0x00, -0x78,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x7c,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x7c,0x00,0x00,0x00,0xfd,0x00,0x01,0x00, +0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x44,0x00,0x00,0x00, +0x3a,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x89,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x47,0x00,0x00,0x00,0x3a,0x00,0x00,0x00, +0x16,0x00,0x00,0x00,0x86,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x48,0x00,0x00,0x00,0x47,0x00,0x00,0x00,0x16,0x00,0x00,0x00, +0x89,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x4c,0x00,0x00,0x00, +0x41,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x82,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x4d,0x00,0x00,0x00,0x41,0x00,0x00,0x00, +0x4c,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x58,0x00,0x00,0x00, +0x59,0x00,0x00,0x00,0x55,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0x44,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x51,0x00,0x00,0x00, +0x5a,0x00,0x00,0x00,0x59,0x00,0x00,0x00,0x73,0x00,0x04,0x00, +0x1c,0x00,0x00,0x00,0x5b,0x00,0x00,0x00,0x5a,0x00,0x00,0x00, +0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x5d,0x00,0x00,0x00, +0x44,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x41,0x00,0x06,0x00, +0x58,0x00,0x00,0x00,0x5e,0x00,0x00,0x00,0x55,0x00,0x00,0x00, +0x2e,0x00,0x00,0x00,0x5d,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, +0x51,0x00,0x00,0x00,0x5f,0x00,0x00,0x00,0x5e,0x00,0x00,0x00, +0x73,0x00,0x04,0x00,0x1c,0x00,0x00,0x00,0x60,0x00,0x00,0x00, +0x5f,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x68,0x00,0x00,0x00,0x4d,0x00,0x00,0x00,0x48,0x00,0x00,0x00, +0x73,0x00,0x04,0x00,0x51,0x00,0x00,0x00,0x6d,0x00,0x00,0x00, +0x5b,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x58,0x00,0x00,0x00, +0x6e,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0x68,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0x6e,0x00,0x00,0x00, +0x6d,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x72,0x00,0x00,0x00,0x68,0x00,0x00,0x00,0x16,0x00,0x00,0x00, +0x73,0x00,0x04,0x00,0x51,0x00,0x00,0x00,0x75,0x00,0x00,0x00, +0x60,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x58,0x00,0x00,0x00, +0x76,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0x72,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0x76,0x00,0x00,0x00, +0x75,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x79,0x00,0x00,0x00, +0xf8,0x00,0x02,0x00,0x79,0x00,0x00,0x00,0xfd,0x00,0x01,0x00, 0x38,0x00,0x01,0x00, }; -const uint64_t get_rows_f16_fp32_len = 1996; +const uint64_t get_rows_f16_fp32_len = 1948; unsigned char get_rows_q4_0_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, @@ -52701,7 +52164,7 @@ const uint64_t mul_f32_len = 1456; unsigned char mul_mat_vec_f16_f32_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, -0xba,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, +0xb6,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x02,0x00, 0x01,0x00,0x00,0x00,0x11,0x00,0x02,0x00,0x51,0x11,0x00,0x00, 0x0b,0x00,0x06,0x00,0x01,0x00,0x00,0x00,0x47,0x4c,0x53,0x4c, 0x2e,0x73,0x74,0x64,0x2e,0x34,0x35,0x30,0x00,0x00,0x00,0x00, @@ -52709,9 +52172,9 @@ unsigned char mul_mat_vec_f16_f32_data[] = { 0x0f,0x00,0x0c,0x00,0x05,0x00,0x00,0x00,0x04,0x00,0x00,0x00, 0x6d,0x61,0x69,0x6e,0x00,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, 0x13,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x2a,0x00,0x00,0x00, -0x51,0x00,0x00,0x00,0x66,0x00,0x00,0x00,0xad,0x00,0x00,0x00, +0x51,0x00,0x00,0x00,0x65,0x00,0x00,0x00,0xaa,0x00,0x00,0x00, 0x10,0x00,0x06,0x00,0x04,0x00,0x00,0x00,0x11,0x00,0x00,0x00, -0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00, +0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00, 0x47,0x00,0x04,0x00,0x0c,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, 0x1a,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x13,0x00,0x00,0x00, 0x0b,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x48,0x00,0x05,0x00, @@ -52729,23 +52192,23 @@ unsigned char mul_mat_vec_f16_f32_data[] = { 0x47,0x00,0x04,0x00,0x51,0x00,0x00,0x00,0x22,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x51,0x00,0x00,0x00, 0x21,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0x63,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x48,0x00,0x04,0x00,0x64,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x64,0x00,0x00,0x00, +0x62,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00, +0x48,0x00,0x04,0x00,0x63,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x18,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0x63,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0x64,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0x66,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x66,0x00,0x00,0x00, +0x47,0x00,0x03,0x00,0x63,0x00,0x00,0x00,0x02,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0x65,0x00,0x00,0x00,0x22,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0x65,0x00,0x00,0x00, 0x21,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0xaa,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x48,0x00,0x04,0x00,0xab,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0xab,0x00,0x00,0x00, +0xa7,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00, +0x48,0x00,0x04,0x00,0xa8,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x19,0x00,0x00,0x00,0x48,0x00,0x05,0x00,0xa8,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x47,0x00,0x03,0x00,0xab,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x47,0x00,0x04,0x00,0xad,0x00,0x00,0x00,0x22,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0xad,0x00,0x00,0x00, +0x47,0x00,0x03,0x00,0xa8,0x00,0x00,0x00,0x02,0x00,0x00,0x00, +0x47,0x00,0x04,0x00,0xaa,0x00,0x00,0x00,0x22,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x47,0x00,0x04,0x00,0xaa,0x00,0x00,0x00, 0x21,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x47,0x00,0x04,0x00, -0xb5,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00, +0xb2,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x19,0x00,0x00,0x00, 0x13,0x00,0x02,0x00,0x02,0x00,0x00,0x00,0x21,0x00,0x03,0x00, 0x03,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x15,0x00,0x04,0x00, 0x06,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x01,0x00,0x00,0x00, @@ -52760,7 +52223,7 @@ unsigned char mul_mat_vec_f16_f32_data[] = { 0x3b,0x00,0x04,0x00,0x0b,0x00,0x00,0x00,0x13,0x00,0x00,0x00, 0x01,0x00,0x00,0x00,0x16,0x00,0x03,0x00,0x17,0x00,0x00,0x00, 0x20,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x09,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x1c,0x00,0x04,0x00, +0x18,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x1c,0x00,0x04,0x00, 0x19,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x18,0x00,0x00,0x00, 0x20,0x00,0x04,0x00,0x1a,0x00,0x00,0x00,0x04,0x00,0x00,0x00, 0x19,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x1a,0x00,0x00,0x00, @@ -52775,7 +52238,7 @@ unsigned char mul_mat_vec_f16_f32_data[] = { 0x29,0x00,0x00,0x00,0x2a,0x00,0x00,0x00,0x09,0x00,0x00,0x00, 0x20,0x00,0x04,0x00,0x2b,0x00,0x00,0x00,0x09,0x00,0x00,0x00, 0x06,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x2e,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x14,0x00,0x02,0x00, +0x2e,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x14,0x00,0x02,0x00, 0x30,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, 0x35,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x16,0x00,0x03,0x00, 0x4d,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, @@ -52784,26 +52247,22 @@ unsigned char mul_mat_vec_f16_f32_data[] = { 0x50,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x4f,0x00,0x00,0x00, 0x3b,0x00,0x04,0x00,0x50,0x00,0x00,0x00,0x51,0x00,0x00,0x00, 0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x54,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0x4d,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x59,0x00,0x00,0x00,0x01,0x00,0x00,0x00, -0x1d,0x00,0x03,0x00,0x63,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x1e,0x00,0x03,0x00,0x64,0x00,0x00,0x00,0x63,0x00,0x00,0x00, -0x20,0x00,0x04,0x00,0x65,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x64,0x00,0x00,0x00,0x3b,0x00,0x04,0x00,0x65,0x00,0x00,0x00, -0x66,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00, -0x6e,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x17,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x77,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x2b,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0x80,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, -0x09,0x00,0x00,0x00,0x8b,0x00,0x00,0x00,0x02,0x00,0x00,0x00, -0x2b,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x8c,0x00,0x00,0x00, -0x08,0x01,0x00,0x00,0x1d,0x00,0x03,0x00,0xaa,0x00,0x00,0x00, -0x17,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0xab,0x00,0x00,0x00, -0xaa,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0xac,0x00,0x00,0x00, -0x0c,0x00,0x00,0x00,0xab,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, -0xac,0x00,0x00,0x00,0xad,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, -0x2c,0x00,0x06,0x00,0x0a,0x00,0x00,0x00,0xb5,0x00,0x00,0x00, -0x18,0x00,0x00,0x00,0x77,0x00,0x00,0x00,0x77,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x4d,0x00,0x00,0x00,0x1d,0x00,0x03,0x00, +0x62,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x1e,0x00,0x03,0x00, +0x63,0x00,0x00,0x00,0x62,0x00,0x00,0x00,0x20,0x00,0x04,0x00, +0x64,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x63,0x00,0x00,0x00, +0x3b,0x00,0x04,0x00,0x64,0x00,0x00,0x00,0x65,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0x6d,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0x17,0x00,0x00,0x00,0x2b,0x00,0x04,0x00, +0x09,0x00,0x00,0x00,0x88,0x00,0x00,0x00,0x02,0x00,0x00,0x00, +0x2b,0x00,0x04,0x00,0x09,0x00,0x00,0x00,0x89,0x00,0x00,0x00, +0x08,0x01,0x00,0x00,0x1d,0x00,0x03,0x00,0xa7,0x00,0x00,0x00, +0x17,0x00,0x00,0x00,0x1e,0x00,0x03,0x00,0xa8,0x00,0x00,0x00, +0xa7,0x00,0x00,0x00,0x20,0x00,0x04,0x00,0xa9,0x00,0x00,0x00, +0x0c,0x00,0x00,0x00,0xa8,0x00,0x00,0x00,0x3b,0x00,0x04,0x00, +0xa9,0x00,0x00,0x00,0xaa,0x00,0x00,0x00,0x0c,0x00,0x00,0x00, +0x2c,0x00,0x06,0x00,0x0a,0x00,0x00,0x00,0xb2,0x00,0x00,0x00, +0x18,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x18,0x00,0x00,0x00, 0x36,0x00,0x05,0x00,0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, 0x05,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x0e,0x00,0x00,0x00, @@ -52819,122 +52278,91 @@ unsigned char mul_mat_vec_f16_f32_data[] = { 0x1b,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, 0x1f,0x00,0x00,0x00,0x1d,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, 0x22,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x22,0x00,0x00,0x00, -0xf5,0x00,0x07,0x00,0x06,0x00,0x00,0x00,0xb8,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0x05,0x00,0x00,0x00,0x8a,0x00,0x00,0x00, +0xf5,0x00,0x07,0x00,0x06,0x00,0x00,0x00,0xb5,0x00,0x00,0x00, +0x21,0x00,0x00,0x00,0x05,0x00,0x00,0x00,0x87,0x00,0x00,0x00, 0x23,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x2b,0x00,0x00,0x00, 0x2c,0x00,0x00,0x00,0x2a,0x00,0x00,0x00,0x21,0x00,0x00,0x00, 0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, 0x2c,0x00,0x00,0x00,0x87,0x00,0x05,0x00,0x06,0x00,0x00,0x00, 0x2f,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, 0xb1,0x00,0x05,0x00,0x30,0x00,0x00,0x00,0x31,0x00,0x00,0x00, -0xb8,0x00,0x00,0x00,0x2f,0x00,0x00,0x00,0xf6,0x00,0x04,0x00, +0xb5,0x00,0x00,0x00,0x2f,0x00,0x00,0x00,0xf6,0x00,0x04,0x00, 0x24,0x00,0x00,0x00,0x23,0x00,0x00,0x00,0x01,0x00,0x00,0x00, 0xfa,0x00,0x04,0x00,0x31,0x00,0x00,0x00,0x23,0x00,0x00,0x00, 0x24,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x23,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x34,0x00,0x00,0x00, -0xb8,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x84,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x37,0x00,0x00,0x00,0x35,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x38,0x00,0x00,0x00,0x34,0x00,0x00,0x00,0x37,0x00,0x00,0x00, -0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x3d,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x2d,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x3f,0x00,0x00,0x00,0x3d,0x00,0x00,0x00, -0x38,0x00,0x00,0x00,0x87,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x40,0x00,0x00,0x00,0x3f,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, -0x8b,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x43,0x00,0x00,0x00, -0x38,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x87,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x43,0x00,0x00,0x00, -0x35,0x00,0x00,0x00,0x82,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x49,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0x43,0x00,0x00,0x00, -0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00,0x55,0x00,0x00,0x00, -0x51,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00,0x56,0x00,0x00,0x00, -0x55,0x00,0x00,0x00,0x73,0x00,0x04,0x00,0x17,0x00,0x00,0x00, -0x57,0x00,0x00,0x00,0x56,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x40,0x00,0x00,0x00, -0x59,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00, -0x5b,0x00,0x00,0x00,0x51,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x5a,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00, -0x5c,0x00,0x00,0x00,0x5b,0x00,0x00,0x00,0x73,0x00,0x04,0x00, -0x17,0x00,0x00,0x00,0x5d,0x00,0x00,0x00,0x5c,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x2b,0x00,0x00,0x00,0x67,0x00,0x00,0x00, -0x2a,0x00,0x00,0x00,0x59,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x06,0x00,0x00,0x00,0x68,0x00,0x00,0x00,0x67,0x00,0x00,0x00, -0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x6a,0x00,0x00,0x00, -0x68,0x00,0x00,0x00,0x49,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x6c,0x00,0x00,0x00,0x6a,0x00,0x00,0x00, -0x44,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x6e,0x00,0x00,0x00, -0x6f,0x00,0x00,0x00,0x66,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x6c,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x17,0x00,0x00,0x00, -0x70,0x00,0x00,0x00,0x6f,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x17,0x00,0x00,0x00,0x73,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, -0x0c,0x00,0x08,0x00,0x17,0x00,0x00,0x00,0x74,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x32,0x00,0x00,0x00,0x57,0x00,0x00,0x00, -0x70,0x00,0x00,0x00,0x73,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0x1f,0x00,0x00,0x00,0x74,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x81,0x00,0x00,0x00,0x6c,0x00,0x00,0x00, -0x80,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x6e,0x00,0x00,0x00, -0x82,0x00,0x00,0x00,0x66,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x81,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x17,0x00,0x00,0x00, -0x83,0x00,0x00,0x00,0x82,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x17,0x00,0x00,0x00,0x86,0x00,0x00,0x00,0x1f,0x00,0x00,0x00, -0x0c,0x00,0x08,0x00,0x17,0x00,0x00,0x00,0x87,0x00,0x00,0x00, -0x01,0x00,0x00,0x00,0x32,0x00,0x00,0x00,0x5d,0x00,0x00,0x00, -0x83,0x00,0x00,0x00,0x86,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, -0x1f,0x00,0x00,0x00,0x87,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0x8a,0x00,0x00,0x00,0xb8,0x00,0x00,0x00, -0x35,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0x22,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x24,0x00,0x00,0x00,0xe0,0x00,0x04,0x00, -0x8b,0x00,0x00,0x00,0x8b,0x00,0x00,0x00,0x8c,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0x8e,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x8e,0x00,0x00,0x00,0xf5,0x00,0x07,0x00,0x06,0x00,0x00,0x00, -0xb9,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x24,0x00,0x00,0x00, -0xa5,0x00,0x00,0x00,0x91,0x00,0x00,0x00,0xad,0x00,0x05,0x00, -0x30,0x00,0x00,0x00,0x94,0x00,0x00,0x00,0xb9,0x00,0x00,0x00, -0x21,0x00,0x00,0x00,0xf6,0x00,0x04,0x00,0x90,0x00,0x00,0x00, -0x91,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0xfa,0x00,0x04,0x00, -0x94,0x00,0x00,0x00,0x8f,0x00,0x00,0x00,0x90,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0x8f,0x00,0x00,0x00,0xb1,0x00,0x05,0x00, -0x30,0x00,0x00,0x00,0x97,0x00,0x00,0x00,0x16,0x00,0x00,0x00, -0xb9,0x00,0x00,0x00,0xf7,0x00,0x03,0x00,0x99,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00,0x97,0x00,0x00,0x00, -0x98,0x00,0x00,0x00,0x99,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x98,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, -0x9d,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0xb9,0x00,0x00,0x00, -0x41,0x00,0x05,0x00,0x1e,0x00,0x00,0x00,0x9e,0x00,0x00,0x00, -0x1b,0x00,0x00,0x00,0x9d,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, -0x17,0x00,0x00,0x00,0x9f,0x00,0x00,0x00,0x9e,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x17,0x00,0x00,0x00,0xa1,0x00,0x00,0x00, -0x1f,0x00,0x00,0x00,0x81,0x00,0x05,0x00,0x17,0x00,0x00,0x00, -0xa2,0x00,0x00,0x00,0xa1,0x00,0x00,0x00,0x9f,0x00,0x00,0x00, -0x3e,0x00,0x03,0x00,0x1f,0x00,0x00,0x00,0xa2,0x00,0x00,0x00, -0xf9,0x00,0x02,0x00,0x99,0x00,0x00,0x00,0xf8,0x00,0x02,0x00, -0x99,0x00,0x00,0x00,0xe0,0x00,0x04,0x00,0x8b,0x00,0x00,0x00, -0x8b,0x00,0x00,0x00,0x8c,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0x91,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x91,0x00,0x00,0x00, -0xc3,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xa5,0x00,0x00,0x00, -0xb9,0x00,0x00,0x00,0x59,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, -0x8e,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x90,0x00,0x00,0x00, -0xaa,0x00,0x05,0x00,0x30,0x00,0x00,0x00,0xa7,0x00,0x00,0x00, -0x16,0x00,0x00,0x00,0x21,0x00,0x00,0x00,0xf7,0x00,0x03,0x00, -0xa9,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xfa,0x00,0x04,0x00, -0xa7,0x00,0x00,0x00,0xa8,0x00,0x00,0x00,0xa9,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0xa8,0x00,0x00,0x00,0x41,0x00,0x05,0x00, -0x2b,0x00,0x00,0x00,0xae,0x00,0x00,0x00,0x2a,0x00,0x00,0x00, -0x35,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00, -0xaf,0x00,0x00,0x00,0xae,0x00,0x00,0x00,0x80,0x00,0x05,0x00, -0x06,0x00,0x00,0x00,0xb1,0x00,0x00,0x00,0xaf,0x00,0x00,0x00, -0x11,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x1e,0x00,0x00,0x00, -0xb2,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0x3d,0x00,0x04,0x00,0x17,0x00,0x00,0x00,0xb3,0x00,0x00,0x00, -0xb2,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x6e,0x00,0x00,0x00, -0xb4,0x00,0x00,0x00,0xad,0x00,0x00,0x00,0x21,0x00,0x00,0x00, -0xb1,0x00,0x00,0x00,0x3e,0x00,0x03,0x00,0xb4,0x00,0x00,0x00, -0xb3,0x00,0x00,0x00,0xf9,0x00,0x02,0x00,0xa9,0x00,0x00,0x00, -0xf8,0x00,0x02,0x00,0xa9,0x00,0x00,0x00,0xfd,0x00,0x01,0x00, -0x38,0x00,0x01,0x00, +0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x37,0x00,0x00,0x00, +0x35,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x80,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0xb5,0x00,0x00,0x00, +0x37,0x00,0x00,0x00,0x84,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x3d,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x2d,0x00,0x00,0x00, +0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x3f,0x00,0x00,0x00, +0x3d,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0x87,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x3f,0x00,0x00,0x00, +0x2e,0x00,0x00,0x00,0x8b,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x43,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0x87,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x44,0x00,0x00,0x00, +0x43,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x82,0x00,0x05,0x00, +0x06,0x00,0x00,0x00,0x49,0x00,0x00,0x00,0x38,0x00,0x00,0x00, +0x43,0x00,0x00,0x00,0x41,0x00,0x06,0x00,0x54,0x00,0x00,0x00, +0x55,0x00,0x00,0x00,0x51,0x00,0x00,0x00,0x21,0x00,0x00,0x00, +0x40,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x4d,0x00,0x00,0x00, +0x56,0x00,0x00,0x00,0x55,0x00,0x00,0x00,0x73,0x00,0x04,0x00, +0x17,0x00,0x00,0x00,0x57,0x00,0x00,0x00,0x56,0x00,0x00,0x00, +0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x59,0x00,0x00,0x00, +0x40,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x41,0x00,0x06,0x00, +0x54,0x00,0x00,0x00,0x5a,0x00,0x00,0x00,0x51,0x00,0x00,0x00, +0x21,0x00,0x00,0x00,0x59,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, +0x4d,0x00,0x00,0x00,0x5b,0x00,0x00,0x00,0x5a,0x00,0x00,0x00, +0x73,0x00,0x04,0x00,0x17,0x00,0x00,0x00,0x5c,0x00,0x00,0x00, +0x5b,0x00,0x00,0x00,0x41,0x00,0x05,0x00,0x2b,0x00,0x00,0x00, +0x66,0x00,0x00,0x00,0x2a,0x00,0x00,0x00,0x2e,0x00,0x00,0x00, +0x3d,0x00,0x04,0x00,0x06,0x00,0x00,0x00,0x67,0x00,0x00,0x00, +0x66,0x00,0x00,0x00,0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00, +0x69,0x00,0x00,0x00,0x67,0x00,0x00,0x00,0x49,0x00,0x00,0x00, +0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x6b,0x00,0x00,0x00, +0x69,0x00,0x00,0x00,0x44,0x00,0x00,0x00,0x41,0x00,0x06,0x00, +0x6d,0x00,0x00,0x00,0x6e,0x00,0x00,0x00,0x65,0x00,0x00,0x00, +0x21,0x00,0x00,0x00,0x6b,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, +0x17,0x00,0x00,0x00,0x6f,0x00,0x00,0x00,0x6e,0x00,0x00,0x00, +0x3d,0x00,0x04,0x00,0x17,0x00,0x00,0x00,0x72,0x00,0x00,0x00, +0x1f,0x00,0x00,0x00,0x0c,0x00,0x08,0x00,0x17,0x00,0x00,0x00, +0x73,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x32,0x00,0x00,0x00, +0x57,0x00,0x00,0x00,0x6f,0x00,0x00,0x00,0x72,0x00,0x00,0x00, +0x3e,0x00,0x03,0x00,0x1f,0x00,0x00,0x00,0x73,0x00,0x00,0x00, +0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x7e,0x00,0x00,0x00, +0x6b,0x00,0x00,0x00,0x2e,0x00,0x00,0x00,0x41,0x00,0x06,0x00, +0x6d,0x00,0x00,0x00,0x7f,0x00,0x00,0x00,0x65,0x00,0x00,0x00, +0x21,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, +0x17,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x7f,0x00,0x00,0x00, +0x3d,0x00,0x04,0x00,0x17,0x00,0x00,0x00,0x83,0x00,0x00,0x00, +0x1f,0x00,0x00,0x00,0x0c,0x00,0x08,0x00,0x17,0x00,0x00,0x00, +0x84,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x32,0x00,0x00,0x00, +0x5c,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x83,0x00,0x00,0x00, +0x3e,0x00,0x03,0x00,0x1f,0x00,0x00,0x00,0x84,0x00,0x00,0x00, +0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0x87,0x00,0x00,0x00, +0xb5,0x00,0x00,0x00,0x35,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, +0x22,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0x24,0x00,0x00,0x00, +0xe0,0x00,0x04,0x00,0x88,0x00,0x00,0x00,0x88,0x00,0x00,0x00, +0x89,0x00,0x00,0x00,0xaa,0x00,0x05,0x00,0x30,0x00,0x00,0x00, +0xa4,0x00,0x00,0x00,0x16,0x00,0x00,0x00,0x21,0x00,0x00,0x00, +0xf7,0x00,0x03,0x00,0xa6,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0xfa,0x00,0x04,0x00,0xa4,0x00,0x00,0x00,0xa5,0x00,0x00,0x00, +0xa6,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0xa5,0x00,0x00,0x00, +0x41,0x00,0x05,0x00,0x2b,0x00,0x00,0x00,0xab,0x00,0x00,0x00, +0x2a,0x00,0x00,0x00,0x35,0x00,0x00,0x00,0x3d,0x00,0x04,0x00, +0x06,0x00,0x00,0x00,0xac,0x00,0x00,0x00,0xab,0x00,0x00,0x00, +0x80,0x00,0x05,0x00,0x06,0x00,0x00,0x00,0xae,0x00,0x00,0x00, +0xac,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x41,0x00,0x05,0x00, +0x1e,0x00,0x00,0x00,0xaf,0x00,0x00,0x00,0x1b,0x00,0x00,0x00, +0x21,0x00,0x00,0x00,0x3d,0x00,0x04,0x00,0x17,0x00,0x00,0x00, +0xb0,0x00,0x00,0x00,0xaf,0x00,0x00,0x00,0x41,0x00,0x06,0x00, +0x6d,0x00,0x00,0x00,0xb1,0x00,0x00,0x00,0xaa,0x00,0x00,0x00, +0x21,0x00,0x00,0x00,0xae,0x00,0x00,0x00,0x3e,0x00,0x03,0x00, +0xb1,0x00,0x00,0x00,0xb0,0x00,0x00,0x00,0xf9,0x00,0x02,0x00, +0xa6,0x00,0x00,0x00,0xf8,0x00,0x02,0x00,0xa6,0x00,0x00,0x00, +0xfd,0x00,0x01,0x00,0x38,0x00,0x01,0x00, }; -const uint64_t mul_mat_vec_f16_f32_len = 2788; +const uint64_t mul_mat_vec_f16_f32_len = 2372; unsigned char mul_mat_vec_nc_f16_f32_data[] = { 0x03,0x02,0x23,0x07,0x00,0x05,0x01,0x00,0x0b,0x00,0x0d,0x00, diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp index 1d93ec6bb..bccc40bf5 100644 --- a/ggml-vulkan.cpp +++ b/ggml-vulkan.cpp @@ -817,7 +817,7 @@ static void ggml_vk_load_shaders() { // mulmat std::initializer_list warptile_l = { 128, 128, 128, 16, vk_device.subgroup_size * 2, 64, 2, 4, 4, vk_device.subgroup_size }; std::initializer_list warptile_m = { 128, 64, 64, 16, vk_device.subgroup_size, 32, 2, 4, 2, vk_device.subgroup_size }; - std::initializer_list warptile_s = { vk_device.subgroup_size, 32, 32, 8, 32, 32, 2, 2, 2, vk_device.subgroup_size }; + std::initializer_list warptile_s = { vk_device.subgroup_size, 32, 32, 16, 32, 32, 2, 2, 2, vk_device.subgroup_size }; std::array l_wg_denoms = {128, 128, 1 }; std::array m_wg_denoms = { 64, 64, 1 }; @@ -2873,7 +2873,8 @@ static void ggml_vk_op_f32(vk_context * ctx, const ggml_tensor * src0, const ggm if (op == GGML_OP_CPY) { GGML_ASSERT(!transfer_src0); GGML_ASSERT(!transfer_src1); - d_sz = dst->ne[1] * dst->nb[1]; + x_sz = ggml_nbytes(src0); + d_sz = ggml_nbytes(dst); if (extra->offset + d_sz >= d_D->size) { d_sz = VK_WHOLE_SIZE; @@ -4556,8 +4557,15 @@ GGML_CALL static bool ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml } ggml_vk_preallocate_buffers(); + int last_node = cgraph->n_nodes - 1; + + // If the last op in the cgraph isn't backend GPU, the command buffer doesn't get closed properly + while (last_node > 0 && cgraph->nodes[last_node]->backend != GGML_BACKEND_GPU) { + last_node -= 1; + } + for (int i = 0; i < cgraph->n_nodes; i++) { - ggml_vk_build_graph(cgraph->nodes[i], i == cgraph->n_nodes - 1); + ggml_vk_build_graph(cgraph->nodes[i], i == last_node); } ggml_compute_params params = {}; diff --git a/ggml_vk_generate_shaders.py b/ggml_vk_generate_shaders.py index d0861fde4..6b1b82bf3 100644 --- a/ggml_vk_generate_shaders.py +++ b/ggml_vk_generate_shaders.py @@ -19,8 +19,8 @@ shader_int8_ext = """ # Type-specific defines shader_f16_defines = """ -#define QUANT_K 32 -#define QUANT_R 2 +#define QUANT_K 1 +#define QUANT_R 1 #define A_TYPE float16_t """ From dabcc5b471348e4ae03ddacc41e19ad75fb2f041 Mon Sep 17 00:00:00 2001 From: slaren Date: Wed, 31 Jan 2024 13:43:03 +0100 Subject: [PATCH 2/9] ggml : limit n_threads to the max n_tasks (#5238) --- ggml.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ggml.c b/ggml.c index b2c8baaa8..afd9c6c61 100644 --- a/ggml.c +++ b/ggml.c @@ -16985,12 +16985,16 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa struct ggml_cplan cplan; memset(&cplan, 0, sizeof(struct ggml_cplan)); + int max_tasks = 1; + // thread scheduling for the different operations + work buffer size estimation for (int i = 0; i < cgraph->n_nodes; i++) { struct ggml_tensor * node = cgraph->nodes[i]; const int n_tasks = ggml_get_n_tasks(node, n_threads); + max_tasks = MAX(max_tasks, n_tasks); + size_t cur = 0; switch (node->op) { @@ -17157,7 +17161,7 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa work_size += CACHE_LINE_SIZE*(n_threads - 1); } - cplan.n_threads = n_threads; + cplan.n_threads = MIN(max_tasks, n_threads); cplan.work_size = work_size; cplan.work_data = NULL; From b2b9f025e7821e78bd501d75d01838c26de07a57 Mon Sep 17 00:00:00 2001 From: Neo Zhang Jianyu Date: Wed, 31 Jan 2024 21:04:46 +0800 Subject: [PATCH 3/9] format license text, restore apache license by legal suggestion (#5233) --- examples/sycl/ls-sycl-device.cpp | 10 ++++++---- ggml-sycl.cpp | 15 +++++++++++---- ggml-sycl.h | 9 +++++---- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/examples/sycl/ls-sycl-device.cpp b/examples/sycl/ls-sycl-device.cpp index 42847154a..52442e4ca 100644 --- a/examples/sycl/ls-sycl-device.cpp +++ b/examples/sycl/ls-sycl-device.cpp @@ -1,7 +1,9 @@ -/*MIT license - Copyright (C) 2024 Intel Corporation - SPDX-License-Identifier: MIT -*/ +// +// MIT license +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: MIT +// + #include "ggml-sycl.h" diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index 3fc346975..1cc55ef52 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -1,7 +1,14 @@ -/*MIT license - Copyright (C) 2024 Intel Corporation - SPDX-License-Identifier: MIT -*/ +// +// MIT license +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: MIT +// + +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// #include #include diff --git a/ggml-sycl.h b/ggml-sycl.h index 0eabb53cc..ba0c61473 100644 --- a/ggml-sycl.h +++ b/ggml-sycl.h @@ -1,7 +1,8 @@ -/*MIT license - Copyright (C) 2024 Intel Corporation - SPDX-License-Identifier: MIT -*/ +// +// MIT license +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: MIT +// #pragma once From 15606309a05ccf7fadbaad5538cb7c32acb1e06b Mon Sep 17 00:00:00 2001 From: JidongZhang-THU <1119708529@qq.com> Date: Wed, 31 Jan 2024 21:10:15 +0800 Subject: [PATCH 4/9] llava : add MobileVLM support (#5132) * New Feature: 1. Sum_Rows: fix cuda kernel overflow fix block shape error when nrows too big 2. Im2Col: Support Batch in cuda Support f32 to f32 both in cpu && cuda 3. DepthWiseConv: Support by Im2Col && MulMat 4. Pool_2d: Supoort avg pooling in cuda 5. HardSigmoid: Imp in cuda 6. HardSwish: Imp in cuda * fix tabs instead of spaces * code clean * CUDA POOL2D * ADD POOL2D test case in test-backend-ops.cpp * code clean * fix pool2d_kernel nits * fix bug in pool2d kernel * fix avg pooling, count_include_pad nits * test-backend-ops : add more pool_2d tests * cuda : fix warnings and formatting * ggml : check types in release builds too in pool_2d * test-backend-ops : remove f16 pool_2d tests * cuda : more style fixes * Add assert in ggml_cuda_op_pool2d * pool2d float padding fallback * test-backend-ops : add dst_type to im2col --------- Co-authored-by: slaren --- examples/llava/MobileVLM-README.md | 58 +++++++- ggml-cuda.cu | 209 ++++++++++++++++++++++++++--- ggml.c | 118 +++++++++++++--- ggml.h | 3 +- tests/test-backend-ops.cpp | 74 +++++++++- 5 files changed, 421 insertions(+), 41 deletions(-) diff --git a/examples/llava/MobileVLM-README.md b/examples/llava/MobileVLM-README.md index c6258eba6..9eba791da 100644 --- a/examples/llava/MobileVLM-README.md +++ b/examples/llava/MobileVLM-README.md @@ -111,17 +111,71 @@ llama_print_timings: eval time = 1279.03 ms / 18 runs ( 71.06 m llama_print_timings: total time = 34570.79 ms ``` +## Orin compile and run +### compile +```sh +make LLAMA_CUBLAS=1 CUDA_DOCKER_ARCH=sm_87 LLAMA_CUDA_F16=1 -j 32 +``` + +### run on Orin +### case 1 +**input** +```sh +./llava-cli \ + -m /data/local/tmp/ggml-model-q4_k.gguf \ + --mmproj /data/local/tmp/mmproj-model-f16.gguf \ + --image /data/local/tmp/demo.jpeg \ + -p "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: \nWho is the author of this book? \nAnswer the question using a single word or phrase. ASSISTANT:" \ + --n-gpu-layers 999 +``` +**output** +```sh + +encode_image_with_clip: image encoded in 296.62 ms by CLIP ( 2.06 ms per image patch) + + Susan Wise Bauer + +llama_print_timings: load time = 1067.64 ms +llama_print_timings: sample time = 1.53 ms / 6 runs ( 0.25 ms per token, 3934.43 tokens per second) +llama_print_timings: prompt eval time = 306.84 ms / 246 tokens ( 1.25 ms per token, 801.72 tokens per second) +llama_print_timings: eval time = 91.50 ms / 6 runs ( 15.25 ms per token, 65.58 tokens per second) +llama_print_timings: total time = 1352.63 ms / 252 tokens +``` + +### case 2 +**input** +```sh +./llava-cli \ + -m /data/local/tmp/ggml-model-q4_k.gguf \ + --mmproj /data/local/tmp/mmproj-model-f16.gguf \ + -p "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: \nWhat is in the image? ASSISTANT:" \ + --n-gpu-layers 999 + +``` +**output** +```sh +encode_image_with_clip: image encoded in 302.15 ms by CLIP ( 2.10 ms per image patch) + + The image features a cat lying in the grass. + +llama_print_timings: load time = 1057.07 ms +llama_print_timings: sample time = 3.27 ms / 11 runs ( 0.30 ms per token, 3360.83 tokens per second) +llama_print_timings: prompt eval time = 213.60 ms / 232 tokens ( 0.92 ms per token, 1086.14 tokens per second) +llama_print_timings: eval time = 166.65 ms / 11 runs ( 15.15 ms per token, 66.01 tokens per second) +llama_print_timings: total time = 1365.47 ms / 243 tokens +``` + ## Minor shortcomings The `n_patch` of output in `ldp` is 1/4 of the input. In order to implement quickly, we uniformly modified `clip_n_patches` function to a quarter. when counting the time consumption, the calculated time will be 4 times bigger than the real cost. ## TODO -- [ ] Support non-CPU backend for the new operators, such as `depthwise`, `hardswish`, `hardsigmoid` +- [x] Support non-CPU backend for the new operators, such as `depthwise`, `hardswish`, `hardsigmoid` - [ ] Optimize LDP projector performance - Optimize the structure definition to avoid unnecessary memory rearrangements, to reduce the use of `ggml_permute_cpy`; - Optimize operator implementation (ARM CPU/NVIDIA GPU): such as depthwise conv, hardswish, hardsigmoid, etc. -- [ ] run MobileVLM on `Jetson Orin` +- [x] run MobileVLM on `Jetson Orin` - [ ] Support more model variants, such as `MobileVLM-3B`. diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 949bc8a1c..e56595742 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -524,6 +524,8 @@ static_assert(sizeof(block_iq3_xxs) == sizeof(ggml_fp16_t) + 3*(QK_K/8), "wrong #define CUDA_SILU_BLOCK_SIZE 256 #define CUDA_TANH_BLOCK_SIZE 256 #define CUDA_RELU_BLOCK_SIZE 256 +#define CUDA_HARDSIGMOID_BLOCK_SIZE 256 +#define CUDA_HARDSWISH_BLOCK_SIZE 256 #define CUDA_SQR_BLOCK_SIZE 256 #define CUDA_CPY_BLOCK_SIZE 32 #define CUDA_SCALE_BLOCK_SIZE 256 @@ -540,6 +542,7 @@ static_assert(sizeof(block_iq3_xxs) == sizeof(ggml_fp16_t) + 3*(QK_K/8), "wrong #define CUDA_PAD_BLOCK_SIZE 256 #define CUDA_ACC_BLOCK_SIZE 256 #define CUDA_IM2COL_BLOCK_SIZE 256 +#define CUDA_POOL2D_BLOCK_SIZE 256 #define CUDA_Q8_0_NE_ALIGN 2048 @@ -823,6 +826,24 @@ static __global__ void relu_f32(const float * x, float * dst, const int k) { dst[i] = fmaxf(x[i], 0); } +static __global__ void hardsigmoid_f32(const float * x, float * dst, const int k) { + const int i = blockDim.x*blockIdx.x + threadIdx.x; + + if (i >= k) { + return; + } + dst[i] = fminf(1.0f, fmaxf(0.0f, (x[i] + 3.0f) / 6.0f)); +} + +static __global__ void hardswish_f32(const float * x, float * dst, const int k) { + const int i = blockDim.x*blockIdx.x + threadIdx.x; + + if (i >= k) { + return; + } + dst[i] = x[i] * fminf(1.0f, fmaxf(0.0f, (x[i] + 3.0f) / 6.0f)); +} + static __global__ void leaky_relu_f32(const float * x, float * dst, const int k, const float negative_slope) { const int i = blockDim.x*blockIdx.x + threadIdx.x; if (i >= k) { @@ -5823,7 +5844,7 @@ static __global__ void alibi_f32(const float * x, float * dst, const int ncols, } static __global__ void k_sum_rows_f32(const float * x, float * dst, const int ncols) { - const int row = blockIdx.y; + const int row = blockIdx.x; const int col = threadIdx.x; float sum = 0.0f; @@ -6145,9 +6166,10 @@ static __global__ void clamp_f32(const float * x, float * dst, const float min, dst[i] = x[i] < min ? min : (x[i] > max ? max : x[i]); } -static __global__ void im2col_f32_f16( - const float * x, half * dst, - int offset_delta, int IW, int IH, int OW, int KW, int KH, int pelements, int CHW, +template +static __global__ void im2col_kernel( + const float * x, T * dst, int batch_offset, + int offset_delta, int IC, int IW, int IH, int OH, int OW, int KW, int KH, int pelements, int CHW, int s0, int s1, int p0, int p1, int d0, int d1) { const int i = threadIdx.x + blockIdx.x * blockDim.x; if (i >= pelements) { @@ -6160,21 +6182,73 @@ static __global__ void im2col_f32_f16( const int ky = (i - kd) / OW; const int ix = i % OW; + const int oh = blockIdx.y; + const int batch = blockIdx.z / IC; + const int ic = blockIdx.z % IC; + const int64_t iiw = ix * s0 + kx * d0 - p0; - const int64_t iih = blockIdx.y * s1 + ky * d1 - p1; + const int64_t iih = oh * s1 + ky * d1 - p1; const int64_t offset_dst = - (blockIdx.y * OW + ix) * CHW + - (blockIdx.z * (KW * KH) + ky * KW + kx); + ((batch * OH + oh) * OW + ix) * CHW + + (ic * (KW * KH) + ky * KW + kx); if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) { - dst[offset_dst] = __float2half(0.0f); + dst[offset_dst] = 0.0f; } else { - const int64_t offset_src = blockIdx.z * offset_delta; - dst[offset_dst] = __float2half(x[offset_src + iih * IW + iiw]); + const int64_t offset_src = ic * offset_delta + batch * batch_offset; + dst[offset_dst] = x[offset_src + iih * IW + iiw]; } } +template +static __global__ void pool2d_nchw_kernel( + const int ih, const int iw, const int oh, const int ow, + const int kh, const int kw, const int sh, const int sw, + const int ph, const int pw, const int parallel_elements, + const Ti* src, To* dst, const enum ggml_op_pool op) { + int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx >= parallel_elements) { + return; + } + + const int I_HW = ih * iw; + const int O_HW = oh * ow; + const int nc = idx / O_HW; + const int cur_oh = idx % O_HW / ow; + const int cur_ow = idx % O_HW % ow; + const Ti* i_ptr = src + nc * I_HW; + To* o_ptr = dst + nc * O_HW; + const int start_h = cur_oh * sh - ph; + const int bh = max(0, start_h); + const int eh = min(ih, start_h + kh); + const int start_w = cur_ow * sw - pw; + const int bw = max(0, start_w); + const int ew = min(iw, start_w + kw); + const To scale = 1. / (kh * kw); + To res = 0; + + switch (op) { + case GGML_OP_POOL_AVG: res = 0; break; + case GGML_OP_POOL_MAX: res = -FLT_MAX; break; + } + + for (int i = bh; i < eh; i += 1) { + for (int j = bw; j < ew; j += 1) { + #if __CUDA_ARCH__ >= 350 + Ti cur = __ldg(i_ptr + i * iw + j); + #else + Ti cur = i_ptr[i * iw + j]; + #endif + switch (op) { + case GGML_OP_POOL_AVG: res += cur * scale; break; + case GGML_OP_POOL_MAX: res = max(res, (To)cur); break; + } + } + } + o_ptr[cur_oh * ow + cur_ow] = res; +} + template static void get_rows_cuda(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const void * src0_dd, const int32_t * src1_dd, float * dst_dd, cudaStream_t stream) { @@ -6388,6 +6462,16 @@ static void relu_f32_cuda(const float * x, float * dst, const int k, cudaStream_ relu_f32<<>>(x, dst, k); } +static void hardsigmoid_f32_cuda(const float * x, float * dst, const int k, cudaStream_t stream) { + const int num_blocks = (k + CUDA_HARDSIGMOID_BLOCK_SIZE - 1) / CUDA_HARDSIGMOID_BLOCK_SIZE; + hardsigmoid_f32<<>>(x, dst, k); +} + +static void hardswish_f32_cuda(const float * x, float * dst, const int k, cudaStream_t stream) { + const int num_blocks = (k + CUDA_HARDSWISH_BLOCK_SIZE - 1) / CUDA_HARDSWISH_BLOCK_SIZE; + hardswish_f32<<>>(x, dst, k); +} + static void leaky_relu_f32_cuda(const float * x, float * dst, const int k, const float negative_slope, cudaStream_t stream) { const int num_blocks = (k + CUDA_RELU_BLOCK_SIZE - 1) / CUDA_RELU_BLOCK_SIZE; leaky_relu_f32<<>>(x, dst, k, negative_slope); @@ -7475,7 +7559,7 @@ static void alibi_f32_cuda(const float * x, float * dst, const int ncols, const static void sum_rows_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, cudaStream_t stream) { const dim3 block_dims(WARP_SIZE, 1, 1); - const dim3 block_nums(1, nrows, 1); + const dim3 block_nums(nrows, 1, 1); k_sum_rows_f32<<>>(x, dst, ncols); } @@ -7587,14 +7671,15 @@ static void soft_max_f32_cuda(const float * x, const float * y, float * dst, con } } -static void im2col_f32_f16_cuda(const float* x, half* dst, +template +static void im2col_cuda(const float* x, T* dst, int IW, int IH, int OW, int OH, int KW, int KH, int IC, - int offset_delta, + int batch, int batch_offset, int offset_delta, int s0,int s1,int p0,int p1,int d0,int d1, cudaStream_t stream) { const int parallel_elements = OW * KW * KH; const int num_blocks = (parallel_elements + CUDA_IM2COL_BLOCK_SIZE - 1) / CUDA_IM2COL_BLOCK_SIZE; - dim3 block_nums(num_blocks, OH, IC); - im2col_f32_f16<<>>(x, dst, offset_delta, IW, IH, OW, KW, KH, parallel_elements, (IC * KH * KW), s0, s1, p0, p1, d0, d1); + dim3 block_nums(num_blocks, OH, batch * IC); + im2col_kernel<<>>(x, dst, batch_offset, offset_delta, IC, IW, IH, OH, OW, KW, KH, parallel_elements, (IC * KH * KW), s0, s1, p0, p1, d0, d1); } // buffer pool for cuda @@ -8179,6 +8264,34 @@ static void ggml_cuda_op_relu( (void) src1_dd; } +static void ggml_cuda_op_hardsigmoid( + const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, + const float * src0_dd, const float * src1_dd, float * dst_dd, cudaStream_t main_stream) { + + GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT( dst->type == GGML_TYPE_F32); + + hardsigmoid_f32_cuda(src0_dd, dst_dd, ggml_nelements(src0), main_stream); + + (void) src1; + (void) dst; + (void) src1_dd; +} + +static void ggml_cuda_op_hardswish( + const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, + const float * src0_dd, const float * src1_dd, float * dst_dd, cudaStream_t main_stream) { + + GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT( dst->type == GGML_TYPE_F32); + + hardswish_f32_cuda(src0_dd, dst_dd, ggml_nelements(src0), main_stream); + + (void) src1; + (void) dst; + (void) src1_dd; +} + static void ggml_cuda_op_leaky_relu( const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const float * src0_dd, const float * src1_dd, float * dst_dd, cudaStream_t main_stream) { @@ -8810,13 +8923,46 @@ static void ggml_cuda_op_alibi( (void) src1_dd; } +static void ggml_cuda_op_pool2d( + const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, + const float * src0_dd, const float * src1_dd, float * dst_dd, cudaStream_t main_stream) { + + GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT( dst->type == GGML_TYPE_F32); + + const int32_t * opts = (const int32_t *)dst->op_params; + enum ggml_op_pool op = static_cast(opts[0]); + const int k0 = opts[1]; + const int k1 = opts[2]; + const int s0 = opts[3]; + const int s1 = opts[4]; + const int p0 = opts[5]; + const int p1 = opts[6]; + + const int64_t IH = src0->ne[1]; + const int64_t IW = src0->ne[0]; + + const int64_t N = dst->ne[3]; + const int64_t OC = dst->ne[2]; + const int64_t OH = dst->ne[1]; + const int64_t OW = dst->ne[0]; + + const int parallel_elements = N * OC * OH * OW; + const int num_blocks = (parallel_elements + CUDA_POOL2D_BLOCK_SIZE - 1) / CUDA_POOL2D_BLOCK_SIZE; + dim3 block_nums(num_blocks); + pool2d_nchw_kernel<<>>(IH, IW, OH, OW, k1, k0, s1, s0, p1, p0, parallel_elements, src0_dd, dst_dd, op); + + (void) src1; + (void) src1_dd; +} + static void ggml_cuda_op_im2col( const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const float * src0_dd, const float * src1_dd, float * dst_dd, cudaStream_t main_stream) { GGML_ASSERT(src0->type == GGML_TYPE_F16); GGML_ASSERT(src1->type == GGML_TYPE_F32); - GGML_ASSERT( dst->type == GGML_TYPE_F16); + GGML_ASSERT( dst->type == GGML_TYPE_F16 || dst->type == GGML_TYPE_F32); const int32_t s0 = ((const int32_t*)(dst->op_params))[0]; const int32_t s1 = ((const int32_t*)(dst->op_params))[1]; @@ -8838,8 +8984,14 @@ static void ggml_cuda_op_im2col( const int64_t OW = dst->ne[1]; const size_t delta_offset = src1->nb[is_2D ? 2 : 1] / 4; // nb is byte offset, src is type float32 + const int64_t batch = src1->ne[3]; + const size_t batch_offset = src1->nb[3] / 4; // nb is byte offset, src is type float32 - im2col_f32_f16_cuda(src1_dd, (half*) dst_dd, IW, IH, OW, OH, KW, KH, IC, delta_offset, s0, s1, p0, p1, d0, d1, main_stream); + if(dst->type == GGML_TYPE_F16) { + im2col_cuda(src1_dd, (half*) dst_dd, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, delta_offset, s0, s1, p0, p1, d0, d1, main_stream); + } else { + im2col_cuda(src1_dd, (float*) dst_dd, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, delta_offset, s0, s1, p0, p1, d0, d1, main_stream); + } (void) src0; (void) src0_dd; @@ -9435,6 +9587,13 @@ static void ggml_cuda_relu(const ggml_tensor * src0, const ggml_tensor * src1, g ggml_cuda_op_flatten(src0, src1, dst, ggml_cuda_op_relu); } +static void ggml_cuda_hardsigmoid(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + ggml_cuda_op_flatten(src0, src1, dst, ggml_cuda_op_hardsigmoid); +} + +static void ggml_cuda_hardswish(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + ggml_cuda_op_flatten(src0, src1, dst, ggml_cuda_op_hardswish); +} static void ggml_cuda_leaky_relu(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { ggml_cuda_op_flatten(src0, src1, dst, ggml_cuda_op_leaky_relu); } @@ -10220,6 +10379,10 @@ static void ggml_cuda_alibi(const ggml_tensor * src0, const ggml_tensor * src1, ggml_cuda_op_flatten(src0, src1, dst, ggml_cuda_op_alibi); } +static void ggml_cuda_pool2d(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + ggml_cuda_op_flatten(src0, src1, dst, ggml_cuda_op_pool2d); +} + static void ggml_cuda_im2col(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { ggml_cuda_op_flatten(src0, src1, dst, ggml_cuda_op_im2col); } @@ -10321,6 +10484,12 @@ GGML_CALL bool ggml_cuda_compute_forward(struct ggml_compute_params * params, st case GGML_UNARY_OP_RELU: func = ggml_cuda_relu; break; + case GGML_UNARY_OP_HARDSIGMOID: + func = ggml_cuda_hardsigmoid; + break; + case GGML_UNARY_OP_HARDSWISH: + func = ggml_cuda_hardswish; + break; default: return false; } @@ -10395,6 +10564,9 @@ GGML_CALL bool ggml_cuda_compute_forward(struct ggml_compute_params * params, st case GGML_OP_IM2COL: func = ggml_cuda_im2col; break; + case GGML_OP_POOL_2D: + func = ggml_cuda_pool2d; + break; case GGML_OP_SUM_ROWS: func = ggml_cuda_sum_rows; break; @@ -11123,6 +11295,8 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons case GGML_UNARY_OP_GELU: case GGML_UNARY_OP_SILU: case GGML_UNARY_OP_RELU: + case GGML_UNARY_OP_HARDSIGMOID: + case GGML_UNARY_OP_HARDSWISH: case GGML_UNARY_OP_GELU_QUICK: case GGML_UNARY_OP_TANH: return true; @@ -11221,6 +11395,7 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons case GGML_OP_ROPE: case GGML_OP_ALIBI: case GGML_OP_IM2COL: + case GGML_OP_POOL_2D: case GGML_OP_SUM_ROWS: case GGML_OP_ARGSORT: case GGML_OP_ACC: diff --git a/ggml.c b/ggml.c index afd9c6c61..ee994c875 100644 --- a/ggml.c +++ b/ggml.c @@ -5349,7 +5349,7 @@ GGML_API struct ggml_tensor * ggml_conv_1d( int s0, int p0, int d0) { - struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, 0, p0, 0, d0, 0, false); // [N, OL, IC * K] + struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16); // [N, OL, IC * K] struct ggml_tensor * result = ggml_mul_mat(ctx, @@ -5427,16 +5427,15 @@ struct ggml_tensor * ggml_conv_depthwise_2d( int p1, int d0, int d1) { + struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]); struct ggml_tensor * im2col = ggml_im2col(ctx, new_a, ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]), - s0, s1, p0, p1, d0, d1, true); // [N * IC, OH, OW, KH * KW] - - struct ggml_tensor * result = - ggml_mul_mat(ctx, - ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1), // [OC,1, KH, KW] => [1, OC, 1, KH * KW] - ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3])); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW] + s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW] + struct ggml_tensor * new_b = ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW] + new_a = ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1); // [OC,1, KH, KW] => [1, OC, 1, KH * KW] + struct ggml_tensor * result = ggml_mul_mat(ctx, new_a, new_b); result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW] return result; @@ -5457,7 +5456,8 @@ struct ggml_tensor * ggml_im2col( int p1, int d0, int d1, - bool is_2D) { + bool is_2D, + enum ggml_type dst_type) { if(is_2D) { GGML_ASSERT(a->ne[2] == b->ne[2]); @@ -5481,7 +5481,7 @@ struct ggml_tensor * ggml_im2col( is_2D ? b->ne[3] : 1, }; - struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 4, ne); + struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne); int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) }; ggml_set_op_params(result, params, sizeof(params)); @@ -5506,7 +5506,7 @@ struct ggml_tensor * ggml_conv_2d( int p1, int d0, int d1) { - struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true); // [N, OH, OW, IC * KH * KW] + struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N, OH, OW, IC * KH * KW] struct ggml_tensor * result = ggml_mul_mat(ctx, @@ -5632,12 +5632,13 @@ struct ggml_tensor * ggml_pool_2d( is_node = true; } + struct ggml_tensor * result; const int64_t ne[3] = { ggml_calc_pool_output_size(a->ne[0], k0, s0, p0), ggml_calc_pool_output_size(a->ne[1], k1, s1, p1), a->ne[2], }; - struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne); + result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne); int32_t params[] = { op, k0, k1, s0, s1, p0, p1 }; ggml_set_op_params(result, params, sizeof(params)); @@ -5645,7 +5646,6 @@ struct ggml_tensor * ggml_pool_2d( result->op = GGML_OP_POOL_2D; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->src[0] = a; - return result; } @@ -12493,6 +12493,92 @@ static void ggml_compute_forward_conv_transpose_1d( } } +// src0: kernel [OC, IC, KH, KW] +// src1: image [N, IC, IH, IW] +// dst: result [N, OH, OW, IC*KH*KW] +static void ggml_compute_forward_im2col_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + GGML_ASSERT(src0->type == GGML_TYPE_F16); + GGML_ASSERT(src1->type == GGML_TYPE_F32); + GGML_ASSERT( dst->type == GGML_TYPE_F32); + + int64_t t0 = ggml_perf_time_us(); + UNUSED(t0); + + GGML_TENSOR_BINARY_OP_LOCALS; + + const int32_t s0 = ((const int32_t *)(dst->op_params))[0]; + const int32_t s1 = ((const int32_t *)(dst->op_params))[1]; + const int32_t p0 = ((const int32_t *)(dst->op_params))[2]; + const int32_t p1 = ((const int32_t *)(dst->op_params))[3]; + const int32_t d0 = ((const int32_t *)(dst->op_params))[4]; + const int32_t d1 = ((const int32_t *)(dst->op_params))[5]; + const bool is_2D = ((const int32_t *)(dst->op_params))[6] == 1; + + const int ith = params->ith; + const int nth = params->nth; + + const int64_t N = is_2D ? ne13 : ne12; + const int64_t IC = is_2D ? ne12 : ne11; + const int64_t IH = is_2D ? ne11 : 1; + const int64_t IW = ne10; + + const int64_t KH = is_2D ? ne01 : 1; + const int64_t KW = ne00; + + const int64_t OH = is_2D ? ne2 : 1; + const int64_t OW = ne1; + + int ofs0 = is_2D ? nb13 : nb12; + int ofs1 = is_2D ? nb12 : nb11; + + GGML_ASSERT(nb00 == sizeof(ggml_fp16_t)); + GGML_ASSERT(nb10 == sizeof(float)); + + if (params->type == GGML_TASK_INIT) { + return; + } + + if (params->type == GGML_TASK_FINALIZE) { + return; + } + + // im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW] + { + float * const wdata = (float *) dst->data; + + for (int64_t in = 0; in < N; in++) { + for (int64_t ioh = 0; ioh < OH; ioh++) { // 1 + for (int64_t iow = 0; iow < OW; iow++) { + for (int64_t iic = ith; iic < IC; iic += nth) { + + // micro kernel + float * dst_data = wdata + (in*OH*OW + ioh*OW + iow)*(IC*KH*KW); // [IC, KH, KW] + const float * const src_data = (float *)((char *) src1->data + in*ofs0 + iic*ofs1); // [IH, IW] + + for (int64_t ikh = 0; ikh < KH; ikh++) { // 1 + for (int64_t ikw = 0; ikw < KW; ikw++) { + const int64_t iiw = iow*s0 + ikw*d0 - p0; + const int64_t iih = ioh*s1 + ikh*d1 - p1; + + if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) { + dst_data[iic*(KH*KW) + ikh*KW + ikw] = 0; + } else { + dst_data[iic*(KH*KW) + ikh*KW + ikw] = (src_data[iih*IW + iiw]); + } + } + } + } + } + } + } + } +} + + // src0: kernel [OC, IC, KH, KW] // src1: image [N, IC, IH, IW] // dst: result [N, OH, OW, IC*KH*KW] @@ -12583,14 +12669,14 @@ static void ggml_compute_forward_im2col( const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { - switch (src0->type) { + switch (dst->type) { case GGML_TYPE_F16: { ggml_compute_forward_im2col_f16(params, src0, src1, dst); } break; case GGML_TYPE_F32: { - GGML_ASSERT(false); + ggml_compute_forward_im2col_f32(params, src0, src1, dst); } break; default: { @@ -12781,8 +12867,8 @@ static void ggml_compute_forward_pool_2d( const struct ggml_compute_params * params, const struct ggml_tensor * src, struct ggml_tensor * dst) { - assert(src->type == GGML_TYPE_F32); - assert(params->ith == 0); + GGML_ASSERT(src->type == GGML_TYPE_F32); + GGML_ASSERT(params->ith == 0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; diff --git a/ggml.h b/ggml.h index afc87b843..e0a4799f3 100644 --- a/ggml.h +++ b/ggml.h @@ -1495,7 +1495,8 @@ extern "C" { int p1, int d0, int d1, - bool is_2D); + bool is_2D, + enum ggml_type dst_type); GGML_API struct ggml_tensor * ggml_conv_depthwise_2d( struct ggml_context * ctx, diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 1d29070b6..eb06123d2 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -227,6 +227,14 @@ static std::string var_to_str(ggml_type type) { return ggml_type_name(type); } +static std::string var_to_str(ggml_op_pool pool) { + switch (pool) { + case GGML_OP_POOL_AVG: return "avg"; + case GGML_OP_POOL_MAX: return "max"; + default: return std::to_string(pool); + } +} + #define VARS_TO_STR1(a) VAR_TO_STR(a) #define VARS_TO_STR2(a, b) VAR_TO_STR(a) + "," + VAR_TO_STR(b) #define VARS_TO_STR3(a, b, c) VAR_TO_STR(a) + "," + VARS_TO_STR2(b, c) @@ -238,6 +246,7 @@ static std::string var_to_str(ggml_type type) { #define VARS_TO_STR9(a, b, c, d, e, f, g, h, i) VAR_TO_STR(a) + "," + VARS_TO_STR8(b, c, d, e, f, g, h, i) #define VARS_TO_STR10(a, b, c, d, e, f, g, h, i, j) VAR_TO_STR(a) + "," + VARS_TO_STR9(b, c, d, e, f, g, h, i, j) #define VARS_TO_STR11(a, b, c, d, e, f, g, h, i, j, k) VAR_TO_STR(a) + "," + VARS_TO_STR10(b, c, d, e, f, g, h, i, j, k) +#define VARS_TO_STR12(a, b, c, d, e, f, g, h, i, j, k, l) VAR_TO_STR(a) + "," + VARS_TO_STR11(b, c, d, e, f, g, h, i, j, k, l) #ifdef GGML_USE_SYCL static bool inline _isinf(float f) { @@ -1162,10 +1171,45 @@ struct test_alibi : public test_case { } }; +// GGML_OP_POOL2D +struct test_pool2d : public test_case { + enum ggml_op_pool pool_type; + const ggml_type type_input; + const std::array ne_input; + // kernel size + const int k0; + const int k1; + // stride + const int s0; + const int s1; + // padding + const int p0; + const int p1; + + std::string vars() override { + return VARS_TO_STR9(pool_type, type_input, ne_input, k0, k1, s0, s1, p0, p1); + } + + test_pool2d(ggml_op_pool pool_type = GGML_OP_POOL_AVG, + ggml_type type_input = GGML_TYPE_F32, + std::array ne_input = {10, 10, 3, 1}, // [input_width, input_height, input_channels, 1] + int k0 = 3, int k1 = 3, + int s0 = 1, int s1 = 1, + int p0 = 1, int p1 = 1) + : pool_type(pool_type), type_input(type_input), ne_input(ne_input), k0(k0), k1(k1), s0(s0), s1(s1), p0(p0), p1(p1) {} + + ggml_tensor * build_graph(ggml_context * ctx) override { + ggml_tensor * input = ggml_new_tensor(ctx, type_input, 4, ne_input.data()); + ggml_tensor * out = ggml_pool_2d(ctx, input, pool_type, k0, k1, s0, s1, p0, p1); + return out; + } +}; + // GGML_OP_IM2COL struct test_im2col : public test_case { const ggml_type type_input; const ggml_type type_kernel; + const ggml_type dst_type; const std::array ne_input; const std::array ne_kernel; // stride @@ -1181,22 +1225,22 @@ struct test_im2col : public test_case { const bool is_2D; std::string vars() override { - return VARS_TO_STR11(type_input, type_kernel, ne_input, ne_kernel, s0, s1, p0, p1, d0, d1, is_2D); + return VARS_TO_STR12(type_input, type_kernel, dst_type, ne_input, ne_kernel, s0, s1, p0, p1, d0, d1, is_2D); } - test_im2col(ggml_type type_input = GGML_TYPE_F32, ggml_type type_kernel = GGML_TYPE_F16, + test_im2col(ggml_type type_input = GGML_TYPE_F32, ggml_type type_kernel = GGML_TYPE_F16, ggml_type dst_type = GGML_TYPE_F32, std::array ne_input = {10, 10, 3, 1}, // [input_width, input_height, input_channels, 1] std::array ne_kernel = {3, 3, 3, 1}, // [kernel_width, kernel_height, input_channels, 1] int s0 = 1, int s1 = 1, int p0 = 1, int p1 = 1, int d0 = 1, int d1 = 1, bool is_2D = true) - : type_input(type_input), type_kernel(type_kernel), ne_input(ne_input), ne_kernel(ne_kernel), s0(s0), s1(s1), p0(p0), p1(p1), d0(d0), d1(d1), is_2D(is_2D) {} + : type_input(type_input), type_kernel(type_kernel), dst_type(dst_type), ne_input(ne_input), ne_kernel(ne_kernel), s0(s0), s1(s1), p0(p0), p1(p1), d0(d0), d1(d1), is_2D(is_2D) {} ggml_tensor * build_graph(ggml_context * ctx) override { ggml_tensor * input = ggml_new_tensor(ctx, type_input, 4, ne_input.data()); ggml_tensor * kernel = ggml_new_tensor(ctx, type_kernel, 4, ne_kernel.data()); - ggml_tensor * out = ggml_im2col(ctx, kernel, input, s0, s1, p0, p1, d0, d1, is_2D); + ggml_tensor * out = ggml_im2col(ctx, kernel, input, s0, s1, p0, p1, d0, d1, is_2D, dst_type); return out; } }; @@ -1912,6 +1956,27 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op } } + for (ggml_type type_input : {GGML_TYPE_F32}) { + for (ggml_op_pool pool_type : {GGML_OP_POOL_AVG, GGML_OP_POOL_MAX}) { + for (int k0 : {1, 3}) { + for (int k1 : {1, 3}) { + for (int s0 : {1, 2}) { + for (int s1 : {1, 2}) { + for (int p0 : {0, 1}) { + for (int p1 : {0, 1}) { + test_cases.emplace_back(new test_pool2d(pool_type, type_input, {10, 10, 3, 1}, k0, k1, s0, s1, p0, p1)); + } + } + } + } + } + } + } + } + + test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32)); + test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16)); + test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 10, 10, 10}, {1, 1, 1, 1})); test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 10, 10, 10}, {2, 1, 1, 1})); test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 10, 10, 10}, {1, 2, 1, 1})); @@ -2049,7 +2114,6 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op } test_cases.emplace_back(new test_alibi()); - test_cases.emplace_back(new test_im2col()); test_cases.emplace_back(new test_concat(GGML_TYPE_F32)); test_cases.emplace_back(new test_concat(GGML_TYPE_I32)); From efb7bdbbd061d087c788598b97992c653f992ddd Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 31 Jan 2024 15:35:41 +0200 Subject: [PATCH 5/9] metal : add im2col F32 dst support (#5132) --- ggml-metal.m | 13 ++++++++++--- ggml-metal.metal | 33 +++++++++++++++++++++++++++++---- 2 files changed, 39 insertions(+), 7 deletions(-) diff --git a/ggml-metal.m b/ggml-metal.m index f87859552..5260ed827 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -135,6 +135,7 @@ enum ggml_metal_kernel_type { GGML_METAL_KERNEL_TYPE_ROPE_F16, GGML_METAL_KERNEL_TYPE_ALIBI_F32, GGML_METAL_KERNEL_TYPE_IM2COL_F16, + GGML_METAL_KERNEL_TYPE_IM2COL_F32, GGML_METAL_KERNEL_TYPE_UPSCALE_F32, GGML_METAL_KERNEL_TYPE_PAD_F32, GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_ASC, @@ -506,6 +507,7 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) { GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ROPE_F16, rope_f16, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ALIBI_F32, alibi_f32, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_IM2COL_F16, im2col_f16, true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_IM2COL_F32, im2col_f32, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_UPSCALE_F32, upscale_f32, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_PAD_F32, pad_f32, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_ASC, argsort_f32_i32_asc, true); @@ -630,6 +632,10 @@ static bool ggml_metal_supports_op(const struct ggml_metal_context * ctx, const case GGML_OP_ALIBI: case GGML_OP_ROPE: case GGML_OP_IM2COL: + return true; + case GGML_OP_POOL_1D: + case GGML_OP_POOL_2D: + return false; case GGML_OP_UPSCALE: case GGML_OP_PAD: case GGML_OP_ARGSORT: @@ -2015,7 +2021,7 @@ static bool ggml_metal_graph_compute( { GGML_ASSERT(src0->type == GGML_TYPE_F16); GGML_ASSERT(src1->type == GGML_TYPE_F32); - GGML_ASSERT( dst->type == GGML_TYPE_F16); + GGML_ASSERT( dst->type == GGML_TYPE_F16 || dst->type == GGML_TYPE_F32); const int32_t s0 = ((const int32_t *)(dst->op_params))[0]; const int32_t s1 = ((const int32_t *)(dst->op_params))[1]; @@ -2023,6 +2029,7 @@ static bool ggml_metal_graph_compute( const int32_t p1 = ((const int32_t *)(dst->op_params))[3]; const int32_t d0 = ((const int32_t *)(dst->op_params))[4]; const int32_t d1 = ((const int32_t *)(dst->op_params))[5]; + const bool is_2D = ((const int32_t *)(dst->op_params))[6] == 1; const int32_t N = src1->ne[is_2D ? 3 : 2]; @@ -2043,8 +2050,8 @@ static bool ggml_metal_graph_compute( id pipeline = nil; - switch (src0->type) { - case GGML_TYPE_F32: GGML_ASSERT(false && "not implemented"); break; + switch (dst->type) { + case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_IM2COL_F32].pipeline; break; case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_IM2COL_F16].pipeline; break; default: GGML_ASSERT(false); }; diff --git a/ggml-metal.metal b/ggml-metal.metal index 2614d82e8..efed6ad46 100644 --- a/ggml-metal.metal +++ b/ggml-metal.metal @@ -1775,9 +1775,29 @@ kernel void kernel_rope( template [[host_name("kernel_rope_f32")]] kernel rope_t kernel_rope; template [[host_name("kernel_rope_f16")]] kernel rope_t kernel_rope; -kernel void kernel_im2col_f16( +typedef void (im2col_t)( device const float * x, - device half * dst, + device char * dst, + constant int32_t & ofs0, + constant int32_t & ofs1, + constant int32_t & IW, + constant int32_t & IH, + constant int32_t & CHW, + constant int32_t & s0, + constant int32_t & s1, + constant int32_t & p0, + constant int32_t & p1, + constant int32_t & d0, + constant int32_t & d1, + uint3 tgpig[[threadgroup_position_in_grid]], + uint3 tgpg[[threadgroups_per_grid]], + uint3 tpitg[[thread_position_in_threadgroup]], + uint3 ntg[[threads_per_threadgroup]]); + +template +kernel void kernel_im2col( + device const float * x, + device char * dst, constant int32_t & ofs0, constant int32_t & ofs1, constant int32_t & IW, @@ -1800,14 +1820,19 @@ kernel void kernel_im2col_f16( (tpitg[0] * tgpg[1] * tgpg[2] + tgpig[1] * tgpg[2] + tgpig[2]) * CHW + (tgpig[0] * (ntg[1] * ntg[2]) + tpitg[1] * ntg[2] + tpitg[2]); + device T * pdst = (device T *) (dst); + if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) { - dst[offset_dst] = 0.0f; + pdst[offset_dst] = 0.0f; } else { const int32_t offset_src = tpitg[0] * ofs0 + tgpig[0] * ofs1; - dst[offset_dst] = x[offset_src + iih * IW + iiw]; + pdst[offset_dst] = x[offset_src + iih * IW + iiw]; } } +template [[host_name("kernel_im2col_f32")]] kernel im2col_t kernel_im2col; +template [[host_name("kernel_im2col_f16")]] kernel im2col_t kernel_im2col; + kernel void kernel_upscale_f32( device const char * src0, device char * dst, From 5cb04dbc16d1da38c8fdcc0111b40e67d00dd1c3 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 31 Jan 2024 17:30:17 +0200 Subject: [PATCH 6/9] llama : remove LLAMA_MAX_DEVICES and LLAMA_SUPPORTS_GPU_OFFLOAD (#5240) * llama : remove LLAMA_MAX_DEVICES from llama.h ggml-ci * Update llama.cpp Co-authored-by: slaren * server : remove LLAMA_MAX_DEVICES ggml-ci * llama : remove LLAMA_SUPPORTS_GPU_OFFLOAD ggml-ci * train : remove LLAMA_SUPPORTS_GPU_OFFLOAD * readme : add deprecation notice * readme : change deprecation notice to "remove" and fix url * llama : remove gpu includes from llama.h ggml-ci --------- Co-authored-by: slaren --- README.md | 3 +- common/common.cpp | 56 ++++++++++---------- common/common.h | 66 ++++++++++++------------ common/train.cpp | 12 ++--- examples/batched-bench/batched-bench.cpp | 2 +- examples/llama-bench/llama-bench.cpp | 16 +++--- examples/server/server.cpp | 44 ++++++++-------- llama.cpp | 39 +++++++++++--- llama.h | 29 ++++------- 9 files changed, 143 insertions(+), 124 deletions(-) diff --git a/README.md b/README.md index 7746cb510..e6ed1d429 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,8 @@ Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++ ### Hot topics -- ⚠️ Incoming backends: https://github.com/ggerganov/llama.cpp/discussions/5138 +- Remove LLAMA_MAX_DEVICES and LLAMA_SUPPORTS_GPU_OFFLOAD: https://github.com/ggerganov/llama.cpp/pull/5240 +- Incoming backends: https://github.com/ggerganov/llama.cpp/discussions/5138 - [SYCL backend](README-sycl.md) is ready (1/28/2024), support Linux/Windows in Intel GPUs (iGPU, Arc/Flex/Max series) - New SOTA quantized models, including pure 2-bits: https://huggingface.co/ikawrakow - Collecting Apple Silicon performance stats: diff --git a/common/common.cpp b/common/common.cpp index 9d976c7c8..ce739b15c 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -583,20 +583,20 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { break; } params.n_gpu_layers = std::stoi(argv[i]); -#ifndef LLAMA_SUPPORTS_GPU_OFFLOAD - fprintf(stderr, "warning: not compiled with GPU offload support, --n-gpu-layers option will be ignored\n"); - fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n"); -#endif + if (!llama_supports_gpu_offload()) { + fprintf(stderr, "warning: not compiled with GPU offload support, --n-gpu-layers option will be ignored\n"); + fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n"); + } } else if (arg == "--gpu-layers-draft" || arg == "-ngld" || arg == "--n-gpu-layers-draft") { if (++i >= argc) { invalid_param = true; break; } params.n_gpu_layers_draft = std::stoi(argv[i]); -#ifndef LLAMA_SUPPORTS_GPU_OFFLOAD - fprintf(stderr, "warning: not compiled with GPU offload support, --n-gpu-layers-draft option will be ignored\n"); - fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n"); -#endif + if (!llama_supports_gpu_offload()) { + fprintf(stderr, "warning: not compiled with GPU offload support, --n-gpu-layers-draft option will be ignored\n"); + fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n"); + } } else if (arg == "--main-gpu" || arg == "-mg") { if (++i >= argc) { invalid_param = true; @@ -637,11 +637,11 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { const std::regex regex{R"([,/]+)"}; std::sregex_token_iterator it{arg_next.begin(), arg_next.end(), regex, -1}; std::vector split_arg{it, {}}; - if (split_arg.size() >= LLAMA_MAX_DEVICES) { + if (split_arg.size() >= llama_max_devices()) { invalid_param = true; break; } - for (size_t i = 0; i < LLAMA_MAX_DEVICES; ++i) { + for (size_t i = 0; i < llama_max_devices(); ++i) { if (i < split_arg.size()) { params.tensor_split[i] = std::stof(split_arg[i]); } else { @@ -989,30 +989,30 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" -cb, --cont-batching enable continuous batching (a.k.a dynamic batching) (default: disabled)\n"); printf(" --mmproj MMPROJ_FILE path to a multimodal projector file for LLaVA. see examples/llava/README.md\n"); printf(" --image IMAGE_FILE path to an image file. use with multimodal models\n"); - if (llama_mlock_supported()) { + if (llama_supports_mlock()) { printf(" --mlock force system to keep model in RAM rather than swapping or compressing\n"); } - if (llama_mmap_supported()) { + if (llama_supports_mmap()) { printf(" --no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock)\n"); } printf(" --numa attempt optimizations that help on some NUMA systems\n"); printf(" if run without this previously, it is recommended to drop the system page cache before using this\n"); printf(" see https://github.com/ggerganov/llama.cpp/issues/1437\n"); -#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD - printf(" -ngl N, --n-gpu-layers N\n"); - printf(" number of layers to store in VRAM\n"); - printf(" -ngld N, --n-gpu-layers-draft N\n"); - printf(" number of layers to store in VRAM for the draft model\n"); - printf(" -sm SPLIT_MODE, --split-mode SPLIT_MODE\n"); - printf(" how to split the model across multiple GPUs, one of:\n"); - printf(" - none: use one GPU only\n"); - printf(" - layer (default): split layers and KV across GPUs\n"); - printf(" - row: split rows across GPUs\n"); - printf(" -ts SPLIT, --tensor-split SPLIT\n"); - printf(" fraction of the model to offload to each GPU, comma-separated list of proportions, e.g. 3,1\n"); - printf(" -mg i, --main-gpu i the GPU to use for the model (with split-mode = none),\n"); - printf(" or for intermediate results and KV (with split-mode = row) (default: %d)\n", params.main_gpu); -#endif // LLAMA_SUPPORTS_GPU_OFFLOAD + if (llama_supports_gpu_offload()) { + printf(" -ngl N, --n-gpu-layers N\n"); + printf(" number of layers to store in VRAM\n"); + printf(" -ngld N, --n-gpu-layers-draft N\n"); + printf(" number of layers to store in VRAM for the draft model\n"); + printf(" -sm SPLIT_MODE, --split-mode SPLIT_MODE\n"); + printf(" how to split the model across multiple GPUs, one of:\n"); + printf(" - none: use one GPU only\n"); + printf(" - layer (default): split layers and KV across GPUs\n"); + printf(" - row: split rows across GPUs\n"); + printf(" -ts SPLIT, --tensor-split SPLIT\n"); + printf(" fraction of the model to offload to each GPU, comma-separated list of proportions, e.g. 3,1\n"); + printf(" -mg i, --main-gpu i the GPU to use for the model (with split-mode = none),\n"); + printf(" or for intermediate results and KV (with split-mode = row) (default: %d)\n", params.main_gpu); + } printf(" --verbose-prompt print a verbose prompt before generation (default: %s)\n", params.verbose_prompt ? "true" : "false"); printf(" --no-display-prompt don't print prompt at generation (default: %s)\n", !params.display_prompt ? "true" : "false"); printf(" -gan N, --grp-attn-n N\n"); @@ -1651,7 +1651,7 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l fprintf(stream, "cont_batching: %s # default: false\n", params.cont_batching ? "true" : "false"); fprintf(stream, "temp: %f # default: 0.8\n", sparams.temp); - const std::vector tensor_split_vector(params.tensor_split, params.tensor_split + LLAMA_MAX_DEVICES); + const std::vector tensor_split_vector(params.tensor_split, params.tensor_split + llama_max_devices()); dump_vector_float_yaml(stream, "tensor_split", tensor_split_vector); fprintf(stream, "tfs: %f # default: 1.0\n", sparams.tfs_z); diff --git a/common/common.h b/common/common.h index 214a379b5..24a99d728 100644 --- a/common/common.h +++ b/common/common.h @@ -43,40 +43,40 @@ extern char const *LLAMA_BUILD_TARGET; int32_t get_num_physical_cores(); struct gpt_params { - uint32_t seed = -1; // RNG seed + uint32_t seed = -1; // RNG seed - int32_t n_threads = get_num_physical_cores(); - int32_t n_threads_draft = -1; - int32_t n_threads_batch = -1; // number of threads to use for batch processing (-1 = use n_threads) - int32_t n_threads_batch_draft = -1; - int32_t n_predict = -1; // new tokens to predict - int32_t n_ctx = 512; // context size - int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS) - int32_t n_keep = 0; // number of tokens to keep from initial prompt - int32_t n_draft = 8; // number of tokens to draft during speculative decoding - int32_t n_chunks = -1; // max number of chunks to process (-1 = unlimited) - int32_t n_parallel = 1; // number of parallel sequences to decode - int32_t n_sequences = 1; // number of sequences to decode - float p_accept = 0.5f; // speculative decoding accept probability - float p_split = 0.1f; // speculative decoding split probability - int32_t n_gpu_layers = -1; // number of layers to store in VRAM (-1 - use default) - int32_t n_gpu_layers_draft = -1; // number of layers to store in VRAM for the draft model (-1 - use default) - llama_split_mode split_mode = LLAMA_SPLIT_LAYER; // how to split the model across GPUs - int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors - float tensor_split[LLAMA_MAX_DEVICES] = {0}; // how split tensors should be distributed across GPUs - int32_t n_beams = 0; // if non-zero then use beam search of given width. - int32_t grp_attn_n = 1; // group-attention factor - int32_t grp_attn_w = 512; // group-attention width - int32_t n_print = -1; // print token count every n tokens (-1 = disabled) - float rope_freq_base = 0.0f; // RoPE base frequency - float rope_freq_scale = 0.0f; // RoPE frequency scaling factor - float yarn_ext_factor = -1.0f; // YaRN extrapolation mix factor - float yarn_attn_factor = 1.0f; // YaRN magnitude scaling factor - float yarn_beta_fast = 32.0f; // YaRN low correction dim - float yarn_beta_slow = 1.0f; // YaRN high correction dim - int32_t yarn_orig_ctx = 0; // YaRN original context length - int8_t rope_scaling_type = LLAMA_ROPE_SCALING_UNSPECIFIED; // TODO: better to be int32_t for alignment - // pinging @cebtenzzre + int32_t n_threads = get_num_physical_cores(); + int32_t n_threads_draft = -1; + int32_t n_threads_batch = -1; // number of threads to use for batch processing (-1 = use n_threads) + int32_t n_threads_batch_draft = -1; + int32_t n_predict = -1; // new tokens to predict + int32_t n_ctx = 512; // context size + int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS) + int32_t n_keep = 0; // number of tokens to keep from initial prompt + int32_t n_draft = 8; // number of tokens to draft during speculative decoding + int32_t n_chunks = -1; // max number of chunks to process (-1 = unlimited) + int32_t n_parallel = 1; // number of parallel sequences to decode + int32_t n_sequences = 1; // number of sequences to decode + float p_accept = 0.5f; // speculative decoding accept probability + float p_split = 0.1f; // speculative decoding split probability + int32_t n_gpu_layers = -1; // number of layers to store in VRAM (-1 - use default) + int32_t n_gpu_layers_draft = -1; // number of layers to store in VRAM for the draft model (-1 - use default) + llama_split_mode split_mode = LLAMA_SPLIT_LAYER; // how to split the model across GPUs + int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors + float tensor_split[128] = {0}; // how split tensors should be distributed across GPUs + int32_t n_beams = 0; // if non-zero then use beam search of given width. + int32_t grp_attn_n = 1; // group-attention factor + int32_t grp_attn_w = 512; // group-attention width + int32_t n_print = -1; // print token count every n tokens (-1 = disabled) + float rope_freq_base = 0.0f; // RoPE base frequency + float rope_freq_scale = 0.0f; // RoPE frequency scaling factor + float yarn_ext_factor = -1.0f; // YaRN extrapolation mix factor + float yarn_attn_factor = 1.0f; // YaRN magnitude scaling factor + float yarn_beta_fast = 32.0f; // YaRN low correction dim + float yarn_beta_slow = 1.0f; // YaRN high correction dim + int32_t yarn_orig_ctx = 0; // YaRN original context length + int8_t rope_scaling_type = LLAMA_ROPE_SCALING_UNSPECIFIED; // TODO: better to be int32_t for alignment + // pinging @cebtenzzre // // sampling parameters struct llama_sampling_params sparams; diff --git a/common/train.cpp b/common/train.cpp index e6f2f7a2f..e4c3d5df6 100644 --- a/common/train.cpp +++ b/common/train.cpp @@ -1363,12 +1363,12 @@ bool consume_common_train_arg( *invalid_param = true; return true; } -#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD - params->n_gpu_layers = std::stoi(argv[i]); -#else - fprintf(stderr, "warning: not compiled with GPU offload support, --n-gpu-layers option will be ignored\n"); - fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n"); -#endif + if (llama_supports_gpu_offload()) { + params->n_gpu_layers = std::stoi(argv[i]); + } else { + fprintf(stderr, "warning: not compiled with GPU offload support, --n-gpu-layers option will be ignored\n"); + fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n"); + } } else if (arg == "-h" || arg == "--help") { params->print_usage = true; return true; diff --git a/examples/batched-bench/batched-bench.cpp b/examples/batched-bench/batched-bench.cpp index 7924db267..b52d68457 100644 --- a/examples/batched-bench/batched-bench.cpp +++ b/examples/batched-bench/batched-bench.cpp @@ -88,7 +88,7 @@ int main(int argc, char ** argv) { llama_model_params model_params = llama_model_default_params(); - const std::vector t_split (LLAMA_MAX_DEVICES, 0.0f); + const std::vector t_split(llama_max_devices(), 0.0f); model_params.n_gpu_layers = n_gpu_layers; model_params.tensor_split = t_split.data(); diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index 542cc7bb8..c5a6f744e 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -160,7 +160,7 @@ struct cmd_params { std::vector main_gpu; std::vector no_kv_offload; std::vector mul_mat_q; - std::vector> tensor_split; + std::vector> tensor_split; int reps; bool verbose; output_formats output_format; @@ -179,7 +179,7 @@ static const cmd_params cmd_params_defaults = { /* main_gpu */ {0}, /* no_kv_offload */ {false}, /* mul_mat_q */ {true}, - /* tensor_split */ {{}}, + /* tensor_split */ {std::vector(llama_max_devices(), 0.0f)}, /* reps */ 5, /* verbose */ false, /* output_format */ MARKDOWN @@ -380,10 +380,10 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { const std::regex regex{R"([;/]+)"}; std::sregex_token_iterator it{ts.begin(), ts.end(), regex, -1}; std::vector split_arg{it, {}}; - GGML_ASSERT(split_arg.size() <= LLAMA_MAX_DEVICES); + GGML_ASSERT(split_arg.size() <= llama_max_devices()); - std::array tensor_split; - for (size_t i = 0; i < LLAMA_MAX_DEVICES; ++i) { + std::vector tensor_split(llama_max_devices()); + for (size_t i = 0; i < llama_max_devices(); ++i) { if (i < split_arg.size()) { tensor_split[i] = std::stof(split_arg[i]); } else { @@ -459,7 +459,7 @@ struct cmd_params_instance { int main_gpu; bool no_kv_offload; bool mul_mat_q; - std::array tensor_split; + std::vector tensor_split; llama_model_params to_llama_mparams() const { llama_model_params mparams = llama_model_default_params(); @@ -582,7 +582,7 @@ struct test { int main_gpu; bool no_kv_offload; bool mul_mat_q; - std::array tensor_split; + std::vector tensor_split; int n_prompt; int n_gen; std::string test_time; @@ -704,7 +704,7 @@ struct test { std::vector get_values() const { std::string tensor_split_str; int max_nonzero = 0; - for (int i = 0; i < LLAMA_MAX_DEVICES; i++) { + for (size_t i = 0; i < llama_max_devices(); i++) { if (tensor_split[i] > 0) { max_nonzero = i; } diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 21bdce8ed..ea77125ea 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1789,28 +1789,28 @@ static void server_print_usage(const char *argv0, const gpt_params ¶ms, printf(" -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch); printf(" --memory-f32 use f32 instead of f16 for memory key+value (default: disabled)\n"); printf(" not recommended: doubles context memory required and no measurable increase in quality\n"); - if (llama_mlock_supported()) + if (llama_supports_mlock()) { printf(" --mlock force system to keep model in RAM rather than swapping or compressing\n"); } - if (llama_mmap_supported()) + if (llama_supports_mmap()) { printf(" --no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock)\n"); } printf(" --numa attempt optimizations that help on some NUMA systems\n"); -#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD - printf(" -ngl N, --n-gpu-layers N\n"); - printf(" number of layers to store in VRAM\n"); - printf(" -sm SPLIT_MODE, --split-mode SPLIT_MODE\n"); - printf(" how to split the model across multiple GPUs, one of:\n"); - printf(" - none: use one GPU only\n"); - printf(" - layer (default): split layers and KV across GPUs\n"); - printf(" - row: split rows across GPUs\n"); - printf(" -ts SPLIT --tensor-split SPLIT\n"); - printf(" fraction of the model to offload to each GPU, comma-separated list of proportions, e.g. 3,1\n"); - printf(" -mg i, --main-gpu i the GPU to use for the model (with split-mode = none),\n"); - printf(" or for intermediate results and KV (with split-mode = row)\n"); -#endif + if (llama_supports_gpu_offload()) { + printf(" -ngl N, --n-gpu-layers N\n"); + printf(" number of layers to store in VRAM\n"); + printf(" -sm SPLIT_MODE, --split-mode SPLIT_MODE\n"); + printf(" how to split the model across multiple GPUs, one of:\n"); + printf(" - none: use one GPU only\n"); + printf(" - layer (default): split layers and KV across GPUs\n"); + printf(" - row: split rows across GPUs\n"); + printf(" -ts SPLIT --tensor-split SPLIT\n"); + printf(" fraction of the model to offload to each GPU, comma-separated list of proportions, e.g. 3,1\n"); + printf(" -mg i, --main-gpu i the GPU to use for the model (with split-mode = none),\n"); + printf(" or for intermediate results and KV (with split-mode = row)\n"); + } printf(" -m FNAME, --model FNAME\n"); printf(" model path (default: %s)\n", params.model.c_str()); printf(" -a ALIAS, --alias ALIAS\n"); @@ -2066,13 +2066,13 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, invalid_param = true; break; } -#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD - params.n_gpu_layers = std::stoi(argv[i]); -#else - LOG_WARNING("Not compiled with GPU offload support, --n-gpu-layers option will be ignored. " + if (llama_supports_gpu_offload()) { + params.n_gpu_layers = std::stoi(argv[i]); + } else { + LOG_WARNING("Not compiled with GPU offload support, --n-gpu-layers option will be ignored. " "See main README.md for information on enabling GPU BLAS support", {{"n_gpu_layers", params.n_gpu_layers}}); -#endif + } } else if (arg == "--split-mode" || arg == "-sm") { @@ -2115,9 +2115,9 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, const std::regex regex{R"([,/]+)"}; std::sregex_token_iterator it{arg_next.begin(), arg_next.end(), regex, -1}; std::vector split_arg{it, {}}; - GGML_ASSERT(split_arg.size() <= LLAMA_MAX_DEVICES); + GGML_ASSERT(split_arg.size() <= llama_max_devices()); - for (size_t i_device = 0; i_device < LLAMA_MAX_DEVICES; ++i_device) + for (size_t i_device = 0; i_device < llama_max_devices(); ++i_device) { if (i_device < split_arg.size()) { diff --git a/llama.cpp b/llama.cpp index bb23689fa..9b249ba9c 100644 --- a/llama.cpp +++ b/llama.cpp @@ -10090,18 +10090,45 @@ struct llama_model_quantize_params llama_model_quantize_default_params() { return result; } -int32_t llama_max_devices(void) { - return LLAMA_MAX_DEVICES; +size_t llama_max_devices(void) { +#if defined(GGML_USE_METAL) + return 1; +#elif defined(GGML_USE_CUBLAS) + return GGML_CUDA_MAX_DEVICES; +#elif defined(GGML_USE_SYCL) + return GGML_SYCL_MAX_DEVICES; +#else + return 1; +#endif } -bool llama_mmap_supported(void) { +bool llama_supports_mmap(void) { return llama_mmap::SUPPORTED; } -bool llama_mlock_supported(void) { +bool llama_supports_mlock(void) { return llama_mlock::SUPPORTED; } +bool llama_supports_gpu_offload(void) { +#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_METAL) || defined(GGML_USE_VULKAN) || \ + defined(GGML_USE_SYCL) || defined(GGML_USE_KOMPUTE) + // Defined when llama.cpp is compiled with support for offloading model layers to GPU. + return true; +#else + return false; +#endif +} + +// deprecated: +bool llama_mmap_supported(void) { + return llama_supports_mmap(); +} + +bool llama_mlock_supported(void) { + return llama_supports_mlock(); +} + void llama_backend_init(bool numa) { ggml_time_init(); @@ -10133,8 +10160,8 @@ int64_t llama_time_us(void) { } struct llama_model * llama_load_model_from_file( - const char * path_model, - struct llama_model_params params) { + const char * path_model, + struct llama_model_params params) { ggml_time_init(); llama_model * model = new llama_model; diff --git a/llama.h b/llama.h index 17d43d039..9a60e9bfb 100644 --- a/llama.h +++ b/llama.h @@ -3,15 +3,7 @@ #include "ggml.h" #include "ggml-backend.h" -#ifdef GGML_USE_CUBLAS -#include "ggml-cuda.h" -#define LLAMA_MAX_DEVICES GGML_CUDA_MAX_DEVICES -#elif defined(GGML_USE_SYCL) -#include "ggml-sycl.h" -#define LLAMA_MAX_DEVICES GGML_SYCL_MAX_DEVICES -#else -#define LLAMA_MAX_DEVICES 1 -#endif // GGML_USE_CUBLAS + #include #include #include @@ -49,12 +41,6 @@ #define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN #define LLAMA_SESSION_VERSION 4 -#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_METAL) || defined(GGML_USE_VULKAN) || \ - defined(GGML_USE_SYCL) || defined(GGML_USE_KOMPUTE) -// Defined when llama.cpp is compiled with support for offloading model layers to GPU. -#define LLAMA_SUPPORTS_GPU_OFFLOAD -#endif - #ifdef __cplusplus extern "C" { #endif @@ -201,7 +187,7 @@ extern "C" { // LLAMA_SPLIT_LAYER: ignored int32_t main_gpu; - // proportion of the model (layers or rows) to offload to each GPU, size: LLAMA_MAX_DEVICES + // proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices() const float * tensor_split; // Called with a progress value between 0.0 and 1.0. Pass NULL to disable. @@ -338,9 +324,14 @@ extern "C" { LLAMA_API int64_t llama_time_us(void); - LLAMA_API int32_t llama_max_devices(void); - LLAMA_API bool llama_mmap_supported (void); - LLAMA_API bool llama_mlock_supported(void); + LLAMA_API size_t llama_max_devices(void); + + LLAMA_API bool llama_supports_mmap (void); + LLAMA_API bool llama_supports_mlock (void); + LLAMA_API bool llama_supports_gpu_offload(void); + + LLAMA_API DEPRECATED(bool llama_mmap_supported (void), "use llama_supports_mmap() instead"); + LLAMA_API DEPRECATED(bool llama_mlock_supported(void), "use llama_supports_mlock() instead"); LLAMA_API const struct llama_model * llama_get_model(const struct llama_context * ctx); From d3bac7d58408c602ec1f1e423695f1df8410bb03 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 31 Jan 2024 18:47:10 +0200 Subject: [PATCH 7/9] llama : reorder build_orion() at correct place (#5118) --- llama.cpp | 239 +++++++++++++++++++++++++++--------------------------- 1 file changed, 119 insertions(+), 120 deletions(-) diff --git a/llama.cpp b/llama.cpp index 9b249ba9c..02b0a485a 100644 --- a/llama.cpp +++ b/llama.cpp @@ -4666,126 +4666,6 @@ struct llm_build_context { ctx0 = nullptr; } } - struct ggml_cgraph * build_orion() { - struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false); - - const int64_t n_embd_head = hparams.n_embd_head_v; - GGML_ASSERT(n_embd_head == hparams.n_embd_head_k); - GGML_ASSERT(n_embd_head == hparams.n_rot); - - struct ggml_tensor * cur; - struct ggml_tensor * inpL; - - inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); - cb(inpL, "inp_embd", -1); - - // inp_pos - contains the positions - struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0); - cb(inp_pos, "inp_pos", -1); - - // KQ_mask (mask for 1 head, it will be broadcasted to all heads) - struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0); - cb(KQ_mask, "KQ_mask", -1); - - // shift the entire K-cache if needed - if (do_rope_shift) { - llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE, n_ctx, freq_base, freq_scale, cb); - } - - for (int il = 0; il < n_layer; ++il) { - struct ggml_tensor * inpSA = inpL; - - // norm - cur = llm_build_norm(ctx0, inpL, hparams, - model.layers[il].attn_norm, model.layers[il].attn_norm_b, - LLM_NORM, cb, il); - cb(cur, "attn_norm", il); - - // self-attention - { - // compute Q and K and RoPE them - struct ggml_tensor * Qcur = ggml_mul_mat(ctx0, model.layers[il].wq, cur); - cb(Qcur, "Qcur", il); - // if (model.layers[il].bq) { - // Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq); - // cb(Qcur, "Qcur", il); - // } - - struct ggml_tensor * Kcur = ggml_mul_mat(ctx0, model.layers[il].wk, cur); - cb(Kcur, "Kcur", il); - // if (model.layers[il].bk) { - // Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk); - // cb(Kcur, "Kcur", il); - // } - - struct ggml_tensor * Vcur = ggml_mul_mat(ctx0, model.layers[il].wv, cur); - cb(Vcur, "Vcur", il); - // if (model.layers[il].bv) { - // Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv); - // cb(Vcur, "Vcur", il); - // } - - Qcur = ggml_rope_custom( - ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos, - hparams.n_rot, 2, 0, n_orig_ctx, freq_base, freq_scale, - ext_factor, attn_factor, beta_fast, beta_slow - ); - cb(Qcur, "Qcur", il); - - Kcur = ggml_rope_custom( - ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos, - hparams.n_rot, 2, 0, n_orig_ctx, freq_base, freq_scale, - ext_factor, attn_factor, beta_fast, beta_slow - ); - cb(Kcur, "Kcur", il); - - cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, - model.layers[il].wo, NULL, - Kcur, Vcur, Qcur, KQ_mask, n_ctx, n_tokens, kv_head, n_kv, -1.0f, 1.0f/sqrtf(float(n_embd_head)), cb, il); - cb(cur, "kqv_out", il); - } - - struct ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); - cb(ffn_inp, "ffn_inp", il); - - // feed-forward network - cur = llm_build_norm(ctx0, ffn_inp, hparams, - model.layers[il].ffn_norm, model.layers[il].ffn_norm_b, - LLM_NORM, cb, il); - cb(cur, "ffn_norm", il); - - cur = llm_build_ffn(ctx0, cur, - model.layers[il].ffn_up, NULL, - model.layers[il].ffn_gate, NULL, - model.layers[il].ffn_down, NULL, - NULL, - LLM_FFN_SILU, LLM_FFN_PAR, cb, il); - cb(cur, "ffn_out", il); - - cur = ggml_add(ctx0, cur, ffn_inp); - cb(cur, "l_out", il); - - // input for next layer - inpL = cur; - } - - cur = inpL; - - cur = llm_build_norm(ctx0, cur, hparams, - model.output_norm, model.output_norm_b, - LLM_NORM, cb, -1); - cb(cur, "result_norm", -1); - - // lm_head - cur = ggml_mul_mat(ctx0, model.output, cur); - cb(cur, "result_output", -1); - - ggml_build_forward_expand(gf, cur); - - return gf; - } - - struct ggml_cgraph * build_llama() { struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false); @@ -6589,6 +6469,125 @@ struct llm_build_context { return gf; } + + struct ggml_cgraph * build_orion() { + struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false); + + const int64_t n_embd_head = hparams.n_embd_head_v; + GGML_ASSERT(n_embd_head == hparams.n_embd_head_k); + GGML_ASSERT(n_embd_head == hparams.n_rot); + + struct ggml_tensor * cur; + struct ggml_tensor * inpL; + + inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); + cb(inpL, "inp_embd", -1); + + // inp_pos - contains the positions + struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0); + cb(inp_pos, "inp_pos", -1); + + // KQ_mask (mask for 1 head, it will be broadcasted to all heads) + struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0); + cb(KQ_mask, "KQ_mask", -1); + + // shift the entire K-cache if needed + if (do_rope_shift) { + llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE, n_ctx, freq_base, freq_scale, cb); + } + + for (int il = 0; il < n_layer; ++il) { + struct ggml_tensor * inpSA = inpL; + + // norm + cur = llm_build_norm(ctx0, inpL, hparams, + model.layers[il].attn_norm, model.layers[il].attn_norm_b, + LLM_NORM, cb, il); + cb(cur, "attn_norm", il); + + // self-attention + { + // compute Q and K and RoPE them + struct ggml_tensor * Qcur = ggml_mul_mat(ctx0, model.layers[il].wq, cur); + cb(Qcur, "Qcur", il); + // if (model.layers[il].bq) { + // Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq); + // cb(Qcur, "Qcur", il); + // } + + struct ggml_tensor * Kcur = ggml_mul_mat(ctx0, model.layers[il].wk, cur); + cb(Kcur, "Kcur", il); + // if (model.layers[il].bk) { + // Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk); + // cb(Kcur, "Kcur", il); + // } + + struct ggml_tensor * Vcur = ggml_mul_mat(ctx0, model.layers[il].wv, cur); + cb(Vcur, "Vcur", il); + // if (model.layers[il].bv) { + // Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv); + // cb(Vcur, "Vcur", il); + // } + + Qcur = ggml_rope_custom( + ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos, + hparams.n_rot, 2, 0, n_orig_ctx, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + cb(Qcur, "Qcur", il); + + Kcur = ggml_rope_custom( + ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos, + hparams.n_rot, 2, 0, n_orig_ctx, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + cb(Kcur, "Kcur", il); + + cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, + model.layers[il].wo, NULL, + Kcur, Vcur, Qcur, KQ_mask, n_ctx, n_tokens, kv_head, n_kv, -1.0f, 1.0f/sqrtf(float(n_embd_head)), cb, il); + cb(cur, "kqv_out", il); + } + + struct ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); + cb(ffn_inp, "ffn_inp", il); + + // feed-forward network + cur = llm_build_norm(ctx0, ffn_inp, hparams, + model.layers[il].ffn_norm, model.layers[il].ffn_norm_b, + LLM_NORM, cb, il); + cb(cur, "ffn_norm", il); + + cur = llm_build_ffn(ctx0, cur, + model.layers[il].ffn_up, NULL, + model.layers[il].ffn_gate, NULL, + model.layers[il].ffn_down, NULL, + NULL, + LLM_FFN_SILU, LLM_FFN_PAR, cb, il); + cb(cur, "ffn_out", il); + + cur = ggml_add(ctx0, cur, ffn_inp); + cb(cur, "l_out", il); + + // input for next layer + inpL = cur; + } + + cur = inpL; + + cur = llm_build_norm(ctx0, cur, hparams, + model.output_norm, model.output_norm_b, + LLM_NORM, cb, -1); + cb(cur, "result_norm", -1); + + // lm_head + cur = ggml_mul_mat(ctx0, model.output, cur); + cb(cur, "result_output", -1); + + ggml_build_forward_expand(gf, cur); + + return gf; + } }; static struct ggml_cgraph * llama_build_graph( From 1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915 Mon Sep 17 00:00:00 2001 From: Eve <139727413+netrunnereve@users.noreply.github.com> Date: Wed, 31 Jan 2024 19:21:55 +0000 Subject: [PATCH 8/9] Fix broken Vulkan Cmake (properly) (#5230) * build vulkan as object * vulkan ci --- .github/workflows/build.yml | 6 ++++-- CMakeLists.txt | 8 ++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c6db1666e..f4c374ce5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -356,6 +356,8 @@ jobs: defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"' - build: 'kompute' defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON' + - build: 'vulkan' + defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_VULKAN=ON -DBUILD_SHARED_LIBS=ON' steps: - name: Clone @@ -406,7 +408,7 @@ jobs: - name: Install Vulkan SDK id: get_vulkan - if: ${{ matrix.build == 'kompute' }} + if: ${{ matrix.build == 'kompute' || matrix.build == 'vulkan' }} run: | curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe" & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install @@ -451,7 +453,7 @@ jobs: - name: Test id: cmake_test # not all machines have native AVX-512 - if: ${{ matrix.build != 'clblast' && matrix.build != 'kompute' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} + if: ${{ matrix.build != 'clblast' && matrix.build != 'kompute' && matrix.build != 'vulkan' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} run: | cd build ctest -L main -C Release --verbose --timeout 900 diff --git a/CMakeLists.txt b/CMakeLists.txt index 15a1101aa..1ee455b3a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -423,10 +423,7 @@ if (LLAMA_VULKAN) if (Vulkan_FOUND) message(STATUS "Vulkan found") - set(GGML_HEADERS_VULKAN ggml-vulkan.h) - set(GGML_SOURCES_VULKAN ggml-vulkan.cpp) - - add_library(ggml-vulkan STATIC ggml-vulkan.cpp ggml-vulkan.h) + add_library(ggml-vulkan OBJECT ggml-vulkan.cpp ggml-vulkan.h) if (BUILD_SHARED_LIBS) set_target_properties(ggml-vulkan PROPERTIES POSITION_INDEPENDENT_CODE ON) endif() @@ -1012,7 +1009,6 @@ add_library(ggml OBJECT ggml-quants.h ${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA} ${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL} - ${GGML_SOURCES_VULKAN} ${GGML_HEADERS_VULKAN} ${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL} ${GGML_SOURCES_MPI} ${GGML_HEADERS_MPI} ${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA} @@ -1094,7 +1090,7 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfig.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/Llama) set(GGML_PUBLIC_HEADERS "ggml.h" "ggml-alloc.h" "ggml-backend.h" - "${GGML_HEADERS_CUDA}" "${GGML_HEADERS_OPENCL}" "${GGML_HEADERS_VULKAN}" + "${GGML_HEADERS_CUDA}" "${GGML_HEADERS_OPENCL}" "${GGML_HEADERS_METAL}" "${GGML_HEADERS_MPI}" "${GGML_HEADERS_EXTRA}") set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}") From ce32060198b7e2d6a13a9b8e1e1369e3c295ae2a Mon Sep 17 00:00:00 2001 From: Guoteng <32697156+SolenoidWGT@users.noreply.github.com> Date: Thu, 1 Feb 2024 17:19:51 +0800 Subject: [PATCH 9/9] llama : support InternLM2 (#5184) * support InternLM2 inference * add add_space_prefix KV pair --- convert-hf-to-gguf.py | 152 ++++++++++++++++++++++++ gguf-py/gguf/constants.py | 18 +++ gguf-py/gguf/gguf_writer.py | 3 + gguf-py/gguf/tensor_mapping.py | 14 ++- llama.cpp | 205 ++++++++++++++++++++++++++++++++- 5 files changed, 387 insertions(+), 5 deletions(-) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 6ab7f486e..4ebab07b3 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -203,6 +203,8 @@ class Model: return CodeShellModel if model_architecture == "OrionForCausalLM": return OrionModel + if model_architecture == "InternLM2ForCausalLM": + return InternLM2Model return Model def _is_model_safetensors(self) -> bool: @@ -254,6 +256,8 @@ class Model: return gguf.MODEL_ARCH.CODESHELL if arch == "OrionForCausalLM": return gguf.MODEL_ARCH.ORION + if arch == "InternLM2ForCausalLM": + return gguf.MODEL_ARCH.INTERNLM2 raise NotImplementedError(f'Architecture "{arch}" not supported!') @@ -1344,6 +1348,154 @@ class CodeShellModel(Model): self.gguf_writer.add_tensor("output.weight", data) print(name, f"=> output.weight, shape = {data.shape}, {old_dtype} --> {data.dtype}") + +class InternLM2Model(Model): + def set_vocab(self): + # (TODO): Is there a better way? + # Copy from _set_vocab_sentencepiece, The only difference is that we will treat the character + # \x00 specially and convert it into an emoji character to prevent it from being mistakenly + # recognized as an empty string in C++. + from sentencepiece import SentencePieceProcessor + from sentencepiece import sentencepiece_model_pb2 as model + + tokenizer_path = self.dir_model / 'tokenizer.model' + + tokens: list[bytes] = [] + scores: list[float] = [] + toktypes: list[int] = [] + + if not tokenizer_path.is_file(): + print(f'Error: Missing {tokenizer_path}', file=sys.stderr) + sys.exit(1) + + sentencepiece_model = model.ModelProto() + sentencepiece_model.ParseFromString(open(tokenizer_path, "rb").read()) + add_prefix = sentencepiece_model.normalizer_spec.add_dummy_prefix + + tokenizer = SentencePieceProcessor(str(tokenizer_path)) + vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size()) + + for token_id in range(vocab_size): + piece = tokenizer.id_to_piece(token_id) + text = piece.encode("utf-8") + score = tokenizer.get_score(token_id) + if text == b"\x00": + # (TODO): fixme + # Hack here and replace the \x00 characters. + print(f"InternLM2 convert token '{text}' to '🐉'!") + text = "🐉" + + toktype = SentencePieceTokenTypes.NORMAL + if tokenizer.is_unknown(token_id): + toktype = SentencePieceTokenTypes.UNKNOWN + elif tokenizer.is_control(token_id): + toktype = SentencePieceTokenTypes.CONTROL + elif tokenizer.is_unused(token_id): + toktype = SentencePieceTokenTypes.UNUSED + elif tokenizer.is_byte(token_id): + toktype = SentencePieceTokenTypes.BYTE + + tokens.append(text) + scores.append(score) + toktypes.append(toktype) + + added_tokens_file = self.dir_model / 'added_tokens.json' + if added_tokens_file.is_file(): + with open(added_tokens_file, "r", encoding="utf-8") as f: + added_tokens_json = json.load(f) + + for key in added_tokens_json: + tokens.append(key.encode("utf-8")) + scores.append(-1000.0) + toktypes.append(SentencePieceTokenTypes.USER_DEFINED) + + self.gguf_writer.add_tokenizer_model("llama") + self.gguf_writer.add_token_list(tokens) + self.gguf_writer.add_token_scores(scores) + self.gguf_writer.add_token_types(toktypes) + self.gguf_writer.add_add_space_prefix(add_prefix) + + special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens)) + special_vocab.add_to_gguf(self.gguf_writer) + + def set_gguf_parameters(self): + self.gguf_writer.add_name("InternLM2") + self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"]) + self.gguf_writer.add_block_count(self.hparams["num_hidden_layers"]) + self.gguf_writer.add_embedding_length(self.hparams["hidden_size"]) + self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"]) + self.gguf_writer.add_rope_freq_base(self.hparams["rope_theta"]) + self.gguf_writer.add_head_count(self.hparams["num_attention_heads"]) + self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"]) + self.gguf_writer.add_head_count_kv(self.hparams["num_key_value_heads"]) + + def post_write_tensors(self, tensor_map, name, data_torch): + old_dtype = data_torch.dtype + + # convert any unsupported data types to float32 + if data_torch.dtype not in (torch.float16, torch.float32): + data_torch = data_torch.to(torch.float32) + + data = data_torch.squeeze().numpy() + + # map tensor names + new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) + if new_name is None: + print(f"Can not map tensor {name!r}") + sys.exit() + + n_dims = len(data.shape) + data_dtype = data.dtype + + # if f32 desired, convert any float16 to float32 + if self.ftype == 0 and data_dtype == np.float16: + data = data.astype(np.float32) + + # TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32 + if self.ftype == 1 and data_dtype == np.float16 and n_dims == 1: + data = data.astype(np.float32) + + # if f16 desired, convert any float32 2-dim weight tensors to float16 + if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: + data = data.astype(np.float16) + + print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") + self.gguf_writer.add_tensor(new_name, data) + + def write_tensors(self): + from einops import rearrange + + num_heads = self.hparams.get("num_attention_heads") + num_kv_heads = self.hparams.get("num_key_value_heads") + hidden_size = self.hparams.get("hidden_size") + q_per_kv = num_heads // num_kv_heads + head_dim = hidden_size // num_heads + num_groups = num_heads // q_per_kv + + block_count = self.hparams["num_hidden_layers"] + model_kv = dict(self.get_tensors()) + tensor_map = gguf.get_tensor_name_map(self.model_arch, block_count) + qkv_pattern = r"model\.layers\.(\d+)\.attention\.wqkv" + for name, data_torch in model_kv.items(): + # we don't need these + if name.endswith(".rotary_emb.inv_freq"): + continue + + if re.match(qkv_pattern, name): + bid = re.findall(qkv_pattern, name)[0] + qkv = data_torch + qkv = rearrange(qkv.T, " o (g n i) ->o g n i", g=num_groups, n=q_per_kv + 2, i=head_dim) + q, k, v = qkv[..., : q_per_kv, :], qkv[..., q_per_kv: q_per_kv + 1, :], qkv[..., q_per_kv + 1: q_per_kv + 2, :] + q = rearrange(q, " o g n i -> o (g n i)").T + k = rearrange(k, " o g n i -> o (g n i)").T + v = rearrange(v, " o g n i -> o (g n i)").T + self.post_write_tensors(tensor_map, f"model.layers.{bid}.attention.wq.weight", q) + self.post_write_tensors(tensor_map, f"model.layers.{bid}.attention.wk.weight", k) + self.post_write_tensors(tensor_map, f"model.layers.{bid}.attention.wv.weight", v) + else: + self.post_write_tensors(tensor_map, name, data_torch) + + ###### CONVERSION LOGIC ###### diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index f5c933a41..ed8e26f83 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -72,6 +72,7 @@ class Keys: PAD_ID = "tokenizer.ggml.padding_token_id" ADD_BOS = "tokenizer.ggml.add_bos_token" ADD_EOS = "tokenizer.ggml.add_eos_token" + ADD_PREFIX = "tokenizer.ggml.add_space_prefix" HF_JSON = "tokenizer.huggingface.json" RWKV = "tokenizer.rwkv.world" CHAT_TEMPLATE = "tokenizer.chat_template" @@ -102,6 +103,7 @@ class MODEL_ARCH(IntEnum): PLAMO = auto() CODESHELL = auto() ORION = auto() + INTERNLM2 = auto() class MODEL_TENSOR(IntEnum): @@ -153,6 +155,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = { MODEL_ARCH.PLAMO: "plamo", MODEL_ARCH.CODESHELL: "codeshell", MODEL_ARCH.ORION: "orion", + MODEL_ARCH.INTERNLM2: "internlm2", } TENSOR_NAMES: dict[MODEL_TENSOR, str] = { @@ -446,6 +449,21 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { MODEL_TENSOR.FFN_DOWN, MODEL_TENSOR.FFN_UP, ], + MODEL_ARCH.INTERNLM2: [ + MODEL_TENSOR.TOKEN_EMBD, + MODEL_TENSOR.OUTPUT_NORM, + MODEL_TENSOR.OUTPUT, + MODEL_TENSOR.ATTN_NORM, + MODEL_TENSOR.ATTN_Q, + MODEL_TENSOR.ATTN_K, + MODEL_TENSOR.ATTN_V, + MODEL_TENSOR.ATTN_OUT, + MODEL_TENSOR.ATTN_ROT_EMBD, + MODEL_TENSOR.FFN_NORM, + MODEL_TENSOR.FFN_GATE, + MODEL_TENSOR.FFN_DOWN, + MODEL_TENSOR.FFN_UP, + ], # TODO } diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index d93aaa877..16808196e 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -411,6 +411,9 @@ class GGUFWriter: def add_add_eos_token(self, value: bool) -> None: self.add_bool(Keys.Tokenizer.ADD_EOS, value) + def add_add_space_prefix(self, value: bool) -> None: + self.add_bool(Keys.Tokenizer.ADD_PREFIX, value) + def add_chat_template(self, value: str) -> None: self.add_string(Keys.Tokenizer.CHAT_TEMPLATE, value) diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py index de177af13..4f16d8504 100644 --- a/gguf-py/gguf/tensor_mapping.py +++ b/gguf-py/gguf/tensor_mapping.py @@ -19,6 +19,7 @@ class TensorNameMap: "language_model.embedding.word_embeddings", # persimmon "wte", # gpt2 "transformer.embd.wte", # phi2 + "model.tok_embeddings", # internlm2 ), # Token type embeddings @@ -42,7 +43,7 @@ class TensorNameMap: MODEL_TENSOR.OUTPUT: ( "embed_out", # gptneox "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen - "output", # llama-pth bloom + "output", # llama-pth bloom internlm2 "word_embeddings_for_head", # persimmon "lm_head.linear", # phi2 ), @@ -51,7 +52,7 @@ class TensorNameMap: MODEL_TENSOR.OUTPUT_NORM: ( "gpt_neox.final_layer_norm", # gptneox "transformer.ln_f", # gpt2 gpt-j falcon - "model.norm", # llama-hf baichuan + "model.norm", # llama-hf baichuan internlm2 "norm", # llama-pth "embeddings.LayerNorm", # bert "transformer.norm_f", # mpt @@ -84,6 +85,7 @@ class TensorNameMap: "h.{bid}.ln_1", # gpt2 "transformer.h.{bid}.ln", # phi2 "model.layers.layers.{bid}.norm", # plamo + "model.layers.{bid}.attention_norm", # internlm2 ), # Attention norm 2 @@ -111,6 +113,7 @@ class TensorNameMap: "encoder.layer.{bid}.attention.self.query", # bert "transformer.h.{bid}.attn.q_proj", # gpt-j "model.layers.layers.{bid}.self_attn.q_proj", # plamo + "model.layers.{bid}.attention.wq" # internlm2 ), # Attention key @@ -120,6 +123,7 @@ class TensorNameMap: "encoder.layer.{bid}.attention.self.key", # bert "transformer.h.{bid}.attn.k_proj", # gpt-j "model.layers.layers.{bid}.self_attn.k_proj", # plamo + "model.layers.{bid}.attention.wk" # internlm2 ), # Attention value @@ -129,6 +133,7 @@ class TensorNameMap: "encoder.layer.{bid}.attention.self.value", # bert "transformer.h.{bid}.attn.v_proj", # gpt-j "model.layers.layers.{bid}.self_attn.v_proj", # plamo + "model.layers.{bid}.attention.wv" # internlm2 ), # Attention output @@ -147,6 +152,7 @@ class TensorNameMap: "h.{bid}.attn.c_proj", # gpt2 "transformer.h.{bid}.mixer.out_proj", # phi2 "model.layers.layers.{bid}.self_attn.o_proj", # plamo + "model.layers.{bid}.attention.wo", # internlm2 ), # Rotary embeddings @@ -169,6 +175,7 @@ class TensorNameMap: "language_model.encoder.layers.{bid}.post_attention_layernorm", # persimmon "model.layers.{bid}.ln2", # yi "h.{bid}.ln_2", # gpt2 + "model.layers.{bid}.ffn_norm", # internlm2 ), MODEL_TENSOR.FFN_GATE_INP: ( @@ -194,6 +201,7 @@ class TensorNameMap: "transformer.h.{bid}.mlp.fc1", # phi2 "model.layers.{bid}.mlp.fc1", # phi2 "model.layers.layers.{bid}.mlp.up_proj", # plamo + "model.layers.{bid}.feed_forward.w3", # internlm2 ), MODEL_TENSOR.FFN_UP_EXP: ( @@ -212,6 +220,7 @@ class TensorNameMap: "layers.{bid}.feed_forward.w1", # llama-pth "transformer.h.{bid}.mlp.w2", # qwen "model.layers.layers.{bid}.mlp.gate_proj", # plamo + "model.layers.{bid}.feed_forward.w1", # internlm2 ), MODEL_TENSOR.FFN_GATE_EXP: ( @@ -236,6 +245,7 @@ class TensorNameMap: "transformer.h.{bid}.mlp.fc2", # phi2 "model.layers.{bid}.mlp.fc2", # phi2 "model.layers.layers.{bid}.mlp.down_proj", # plamo + "model.layers.{bid}.feed_forward.w2", # internlm2 ), MODEL_TENSOR.FFN_DOWN_EXP: ( diff --git a/llama.cpp b/llama.cpp index 02b0a485a..e8f44c2cb 100644 --- a/llama.cpp +++ b/llama.cpp @@ -204,6 +204,7 @@ enum llm_arch { LLM_ARCH_PLAMO, LLM_ARCH_CODESHELL, LLM_ARCH_ORION, + LLM_ARCH_INTERNLM2, LLM_ARCH_UNKNOWN, }; @@ -226,6 +227,7 @@ static std::map LLM_ARCH_NAMES = { { LLM_ARCH_PLAMO, "plamo" }, { LLM_ARCH_CODESHELL, "codeshell" }, { LLM_ARCH_ORION, "orion" }, + { LLM_ARCH_INTERNLM2, "internlm2" }, }; enum llm_kv { @@ -278,6 +280,7 @@ enum llm_kv { LLM_KV_TOKENIZER_PAD_ID, LLM_KV_TOKENIZER_ADD_BOS, LLM_KV_TOKENIZER_ADD_EOS, + LLM_KV_TOKENIZER_ADD_PREFIX, LLM_KV_TOKENIZER_HF_JSON, LLM_KV_TOKENIZER_RWKV, }; @@ -332,6 +335,7 @@ static std::map LLM_KV_NAMES = { { LLM_KV_TOKENIZER_PAD_ID, "tokenizer.ggml.padding_token_id" }, { LLM_KV_TOKENIZER_ADD_BOS, "tokenizer.ggml.add_bos_token" }, { LLM_KV_TOKENIZER_ADD_EOS, "tokenizer.ggml.add_eos_token" }, + { LLM_KV_TOKENIZER_ADD_PREFIX, "tokenizer.ggml.add_space_prefix" }, { LLM_KV_TOKENIZER_HF_JSON, "tokenizer.huggingface.json" }, { LLM_KV_TOKENIZER_RWKV, "tokenizer.rwkv.world" }, }; @@ -669,7 +673,23 @@ static std::map> LLM_TENSOR_NAMES = { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" }, }, }, - + { + LLM_ARCH_INTERNLM2, + { + { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, + { LLM_TENSOR_OUTPUT_NORM, "output_norm" }, + { LLM_TENSOR_OUTPUT, "output" }, + { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" }, + { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" }, + { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" }, + { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" }, + { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" }, + { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" }, + { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" }, + { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" }, + { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" }, + }, + }, { LLM_ARCH_UNKNOWN, { @@ -1377,6 +1397,7 @@ enum e_model { MODEL_13B, MODEL_14B, MODEL_15B, + MODEL_20B, MODEL_30B, MODEL_34B, MODEL_40B, @@ -1618,6 +1639,8 @@ struct llama_vocab { id special_suffix_id = 32008; id special_eot_id = 32010; + bool add_space_prefix = true; + int find_bpe_rank(const std::string & token_left, const std::string & token_right) const { GGML_ASSERT(token_left.find(' ') == std::string::npos); GGML_ASSERT(token_left.find('\n') == std::string::npos); @@ -2731,6 +2754,7 @@ static const char * llama_model_type_name(e_model type) { case MODEL_13B: return "13B"; case MODEL_14B: return "14B"; case MODEL_15B: return "15B"; + case MODEL_20B: return "20B"; case MODEL_30B: return "30B"; case MODEL_34B: return "34B"; case MODEL_40B: return "40B"; @@ -2743,6 +2767,14 @@ static const char * llama_model_type_name(e_model type) { default: return "?B"; } } +static const char * llama_model_vocab_type_name(enum llama_vocab_type type){ + switch (type) { + case LLAMA_VOCAB_TYPE_SPM: return "SPM"; + case LLAMA_VOCAB_TYPE_BPE: return "BPE"; + default: return "unknown"; + } +} + static void llm_load_arch(llama_model_loader & ml, llama_model & model) { model.arch = ml.get_arch(); @@ -3006,6 +3038,15 @@ static void llm_load_hparams( default: model.type = e_model::MODEL_UNKNOWN; } } break; + case LLM_ARCH_INTERNLM2: + { + ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); + switch (hparams.n_layer) { + case 32: model.type = e_model::MODEL_7B; break; + case 48: model.type = e_model::MODEL_20B; break; + default: model.type = e_model::MODEL_UNKNOWN; + } + } break; default: (void)0; } @@ -3057,6 +3098,11 @@ static void llm_load_vocab( vocab.special_unk_id = 0; vocab.special_sep_id = -1; vocab.special_pad_id = -1; + + const int add_space_prefix_keyidx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_ADD_PREFIX).c_str()); + if (add_space_prefix_keyidx != -1) { + vocab.add_space_prefix = gguf_get_val_bool(ctx, add_space_prefix_keyidx); + } // The default value of add_space_prefix is true. } else if (tokenizer_name == "gpt2") { vocab.type = LLAMA_VOCAB_TYPE_BPE; @@ -3269,7 +3315,7 @@ static void llm_load_print_meta(llama_model_loader & ml, llama_model & model) { // hparams LLAMA_LOG_INFO("%s: format = %s\n", __func__, llama_file_version_name(ml.fver)); LLAMA_LOG_INFO("%s: arch = %s\n", __func__, LLM_ARCH_NAMES.at(model.arch).c_str()); - LLAMA_LOG_INFO("%s: vocab type = %s\n", __func__, vocab.type == LLAMA_VOCAB_TYPE_SPM ? "SPM" : "BPE"); // TODO: fix + LLAMA_LOG_INFO("%s: vocab type = %s\n", __func__, llama_model_vocab_type_name(vocab.type)); LLAMA_LOG_INFO("%s: n_vocab = %u\n", __func__, hparams.n_vocab); LLAMA_LOG_INFO("%s: n_merges = %u\n", __func__, (int) vocab.bpe_ranks.size()); LLAMA_LOG_INFO("%s: n_ctx_train = %u\n", __func__, hparams.n_ctx_train); @@ -4018,8 +4064,35 @@ static bool llm_load_tensors( layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}); } } break; + case LLM_ARCH_INTERNLM2: + { + model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}); + // output + { + model.output_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}); + model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}); + } + for (int i = 0; i < n_layer; ++i) { + ggml_context * ctx_layer = ctx_for_layer(i); + ggml_context * ctx_split = ctx_for_layer_split(i); + + auto & layer = model.layers[i]; + + layer.attn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}); + // layer.wqkv = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, n_embd + 2*n_embd_gqa}); + layer.wq = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd}); + layer.wk = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_K, "weight", i), {n_embd, n_embd_gqa}); + layer.wv = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_V, "weight", i), {n_embd, n_embd_gqa}); + + layer.wo = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}); + layer.ffn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}); + layer.ffn_gate = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}); + layer.ffn_down = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}); + layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}); + } + } break; default: throw std::runtime_error("unknown architecture"); } @@ -6588,6 +6661,126 @@ struct llm_build_context { return gf; } + + struct ggml_cgraph * build_internlm2() { + struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false); + + const int64_t n_embd_head = hparams.n_embd_head_v; + GGML_ASSERT(n_embd_head == hparams.n_embd_head_k); + GGML_ASSERT(n_embd_head == hparams.n_rot); + + struct ggml_tensor * cur; + struct ggml_tensor * inpL; + + inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); + cb(inpL, "inp_embd", -1); + + // inp_pos - contains the positions + struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0); + cb(inp_pos, "inp_pos", -1); + + // KQ_mask (mask for 1 head, it will be broadcasted to all heads) + struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0); + cb(KQ_mask, "KQ_mask", -1); + + // shift the entire K-cache if needed + if (do_rope_shift) { + llm_build_k_shift(ctx0, hparams, cparams, kv_self, gf, lctx.inp_K_shift, LLM_ROPE, n_ctx, freq_base, freq_scale, cb); + } + + for (int il = 0; il < n_layer; ++il) { + struct ggml_tensor * inpSA = inpL; + + // norm + cur = llm_build_norm(ctx0, inpL, hparams, + model.layers[il].attn_norm, NULL, + LLM_NORM_RMS, cb, il); + cb(cur, "attn_norm", il); + + // self-attention + { + // compute Q and K and RoPE them + struct ggml_tensor * Qcur = ggml_mul_mat(ctx0, model.layers[il].wq, cur); + cb(Qcur, "Qcur", il); + if (model.layers[il].bq) { + Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq); + cb(Qcur, "Qcur", il); + } + + struct ggml_tensor * Kcur = ggml_mul_mat(ctx0, model.layers[il].wk, cur); + cb(Kcur, "Kcur", il); + if (model.layers[il].bk) { + Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk); + cb(Kcur, "Kcur", il); + } + + struct ggml_tensor * Vcur = ggml_mul_mat(ctx0, model.layers[il].wv, cur); + cb(Vcur, "Vcur", il); + if (model.layers[il].bv) { + Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv); + cb(Vcur, "Vcur", il); + } + + Qcur = ggml_rope_custom( + ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos, + hparams.n_rot, 0, 0, n_orig_ctx, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + cb(Qcur, "Qcur", il); + + Kcur = ggml_rope_custom( + ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos, + hparams.n_rot, 0, 0, n_orig_ctx, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + cb(Kcur, "Kcur", il); + + cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, + model.layers[il].wo, model.layers[il].bo, + Kcur, Vcur, Qcur, KQ_mask, n_ctx, n_tokens, kv_head, n_kv, -1.0f, 1.0f/sqrtf(float(n_embd_head)), cb, il); + cb(cur, "kqv_out", il); + } + + struct ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); + cb(ffn_inp, "ffn_inp", il); + + // feed-forward network + cur = llm_build_norm(ctx0, ffn_inp, hparams, + model.layers[il].ffn_norm, NULL, + LLM_NORM_RMS, cb, il); + cb(cur, "ffn_norm", il); + + cur = llm_build_ffn(ctx0, cur, + model.layers[il].ffn_up, NULL, + model.layers[il].ffn_gate, NULL, + model.layers[il].ffn_down, NULL, + NULL, + LLM_FFN_SILU, LLM_FFN_PAR, cb, il); + cb(cur, "ffn_out", il); + + cur = ggml_add(ctx0, cur, ffn_inp); + cb(cur, "l_out", il); + + // input for next layer + inpL = cur; + } + + cur = inpL; + + cur = llm_build_norm(ctx0, cur, hparams, + model.output_norm, NULL, + LLM_NORM_RMS, cb, -1); + cb(cur, "result_norm", -1); + + // lm_head + cur = ggml_mul_mat(ctx0, model.output, cur); + cb(cur, "result_output", -1); + + ggml_build_forward_expand(gf, cur); + + return gf; + } + }; static struct ggml_cgraph * llama_build_graph( @@ -6746,6 +6939,10 @@ static struct ggml_cgraph * llama_build_graph( { result = llm.build_orion(); } break; + case LLM_ARCH_INTERNLM2: + { + result = llm.build_internlm2(); + } break; default: GGML_ASSERT(false); } @@ -7688,7 +7885,9 @@ static std::vector llama_tokenize_internal(const llama_vocab & // auto raw_text = fragment.raw_text.substr(fragment.offset, fragment.length); if (&fragment == &fragment_buffer.front()) { - raw_text = " " + raw_text; // prefix with space if the first token is not special + if (vocab.add_space_prefix) { + raw_text = " " + raw_text; // prefix with space if the first token is not special + } } #ifdef PRETOKENIZERDEBUG