Implement byteswap for tq1_0 and tq2_0

2025-01-09 14:50:32 +01:00 · 2025-01-09 14:50:32 +01:00 · 0682209c66
commit 0682209c66
parent a8757fec66
1 changed files with 18 additions and 0 deletions
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@ -625,6 +625,8 @@ static void ggml_byteswap_q8_k    (void * restrict buffer, size_t elements);
 static void ggml_byteswap_q4_0_4x4(void * restrict buffer, size_t elements);
 static void ggml_byteswap_q4_0_4x8(void * restrict buffer, size_t elements);
 static void ggml_byteswap_q4_0_8x8(void * restrict buffer, size_t elements);
+static void ggml_byteswap_tq1_0   (void * restrict buffer, size_t elements);
+static void ggml_byteswap_tq2_0   (void * restrict buffer, size_t elements);

 static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
    [GGML_TYPE_I8] = {
@ -911,6 +913,7 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
        .is_quantized             = true,
        .to_float                 = (ggml_to_float_t) dequantize_row_tq1_0,
        .from_float_ref           = (ggml_from_float_t) quantize_row_tq1_0_ref,
+        .byteswap                 = ggml_byteswap_tq1_0,
    },
    [GGML_TYPE_TQ2_0] = {
        .type_name                = "tq2_0",
@ -919,6 +922,7 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
        .is_quantized             = true,
        .to_float                 = (ggml_to_float_t) dequantize_row_tq2_0,
        .from_float_ref           = (ggml_from_float_t) quantize_row_tq2_0_ref,
+        .byteswap                 = ggml_byteswap_tq2_0,
    },
    [36] = { // GGML_TYPE_IQ4_NL_4_4
        .type_name                = "TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking",
@ -6792,3 +6796,17 @@ static void ggml_byteswap_q4_0_8x8(void * restrict buffer, size_t elements) {
    UNUSED(buffer);
    UNUSED(elements);
 }
+
+static void ggml_byteswap_tq1_0(void * restrict buffer, size_t elements) {
+    block_tq1_0 *data_ptr = (block_tq1_0*) buffer;
+    for (size_t i = 0; i < elements; ++i) {
+        convert_from_le16(&(data_ptr[i].d));
+    }
+}
+
+static void ggml_byteswap_tq2_0(void * restrict buffer, size_t elements) {
+    block_tq2_0 *data_ptr = (block_tq2_0*) buffer;
+    for (size_t i = 0; i < elements; ++i) {
+        convert_from_le16(&(data_ptr[i].d));
+    }
+}