diff --git a/ggml-quants.c b/ggml-quants.c
index 607d50925..42e6c6692 100644
--- a/ggml-quants.c
+++ b/ggml-quants.c
@@ -4227,6 +4227,9 @@ void dequantize_row_iq4_nl(const block_iq4_nl * restrict x, float * restrict y,
 
 void dequantize_row_iq4_xs(const block_iq4_xs * restrict x, float * restrict y, int k) {
     assert(k % QK_K == 0);
+#if QK_K == 64
+    dequantize_row_iq4_nl((const block_iq4_nl *)x, y, k);
+#else
     const int nb = k / QK_K;
 
     for (int i = 0; i < nb; i++) {
@@ -4246,6 +4249,7 @@ void dequantize_row_iq4_xs(const block_iq4_xs * restrict x, float * restrict y,
             qs += 16;
         }
     }
+#endif
 }
 
 //===================================== Q8_K ==============================================
@@ -10455,6 +10459,9 @@ void ggml_vec_dot_iq4_xs_q8_K(int n, float * restrict s, size_t bs, const void *
     UNUSED(by);
     UNUSED(bs);
     assert(n % QK_K == 0);
+#if QK_K == 64
+    ggml_vec_dot_iq4_nl_q8_0(n, s, bs, vx, bx, vy, by, nrc);
+#else
 
     const block_iq4_xs * restrict x = vx;
     const block_q8_K   * restrict y = vy;
@@ -10574,6 +10581,7 @@ void ggml_vec_dot_iq4_xs_q8_K(int n, float * restrict s, size_t bs, const void *
     }
     *s = sumf;
 #endif
+#endif
 }
 
 // ================================ IQ2 quantization =============================================
@@ -10921,7 +10929,7 @@ static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict
 
     const int kMaxQ = 3;
 
-    const int nbl = n/256;
+    const int nbl = n/QK_K;
 
     block_iq2_xxs * y = vy;
 
@@ -11094,7 +11102,7 @@ static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict v
 
     const int kMaxQ = 3;
 
-    const int nbl = n/256;
+    const int nbl = n/QK_K;
 
     block_iq2_xs * y = vy;
 
@@ -12037,7 +12045,7 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
     GGML_ASSERT(kneighbors_q2xs && "forgot to call ggml_quantize_init()?");
     GGML_ASSERT(n%QK_K == 0);
 
-    const int nbl = n/256;
+    const int nbl = n/QK_K;
 
     block_iq1_s * y = vy;
 
@@ -12315,6 +12323,9 @@ void quantize_row_iq4_nl_reference(const float * restrict x, block_iq4_nl * rest
 }
 
 size_t quantize_iq4_xs(const float * src, void * dst, int nrow, int n_per_row, int64_t * hist, const float * quant_weights) {
+#if QK_K == 64
+    return quantize_iq4_nl(src, dst, nrow, n_per_row, hist, quant_weights);
+#else
     (void)hist;
     GGML_ASSERT(n_per_row%QK_K == 0);
     int nblock = n_per_row/QK_K;
@@ -12333,6 +12344,7 @@ size_t quantize_iq4_xs(const float * src, void * dst, int nrow, int n_per_row, i
         qrow += nblock*sizeof(block_iq4_xs);
     }
     return nrow * nblock * sizeof(block_iq4_xs);
+#endif
 }
 
 void quantize_row_iq4_xs(const float * restrict x, void * restrict vy, int k) {
@@ -12363,7 +12375,7 @@ static void quantize_row_iq2_s_impl(const float * restrict x, void * restrict vy
 
     const int kMaxQ = 3;
 
-    const int nbl = n/256;
+    const int nbl = n/QK_K;
 
     block_iq2_s * y = vy;
 
diff --git a/ggml-quants.h b/ggml-quants.h
index 2c61134c4..316e35687 100644
--- a/ggml-quants.h
+++ b/ggml-quants.h
@@ -230,6 +230,10 @@ typedef struct {
 } block_iq4_nl;
 static_assert(sizeof(block_iq4_nl) == sizeof(ggml_fp16_t) + QK4_NL/2, "wrong iq4_nl block size/padding");
 
+#if QK_K == 64
+#define block_iq4_xs block_iq4_nl
+//typedef struct block_iq4_nl block_iq4_xs;
+#else
 typedef struct {
     ggml_fp16_t d;
     uint16_t scales_h;
@@ -237,6 +241,7 @@ typedef struct {
     uint8_t  qs[QK_K/2];
 } block_iq4_xs;
 static_assert(sizeof(block_iq4_xs) == sizeof(ggml_fp16_t) + sizeof(uint16_t) + QK_K/64 + QK_K/2, "wrong iq4_xs block size/padding");
+#endif
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/ggml.c b/ggml.c
index d66db3352..4591644ad 100644
--- a/ggml.c
+++ b/ggml.c
@@ -728,14 +728,22 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
     },
     [GGML_TYPE_IQ4_XS] = {
         .type_name                = "iq4_xs",
+#if QK_K == 64
+        .blck_size                = QK4_NL,
+#else
         .blck_size                = QK_K,
+#endif
         .type_size                = sizeof(block_iq4_xs),
         .is_quantized             = true,
         .to_float                 = (ggml_to_float_t) dequantize_row_iq4_xs,
         .from_float               = quantize_row_iq4_xs,
         .from_float_reference     = (ggml_from_float_t)quantize_row_iq4_xs_reference,
         .vec_dot                  = ggml_vec_dot_iq4_xs_q8_K,
+#if QK_K == 64
+        .vec_dot_type             = GGML_TYPE_Q8_0,
+#else
         .vec_dot_type             = GGML_TYPE_Q8_K,
+#endif
         .nrows                    = 1,
     },
     [GGML_TYPE_Q8_K] = {
@@ -19830,6 +19838,9 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
                 GGML_ASSERT(result == row_size * nrows);
             } break;
         case GGML_TYPE_IQ4_NL:
+#if QK_K == 64
+        case GGML_TYPE_IQ4_XS:
+#endif
             {
                 GGML_ASSERT(start % QK4_NL == 0);
                 GGML_ASSERT(start % n_per_row == 0);
@@ -19838,15 +19849,17 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
                 result = quantize_iq4_nl(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
                 GGML_ASSERT(result == row_size * nrows);
             } break;
+#if QK_K != 64
         case GGML_TYPE_IQ4_XS:
             {
-                GGML_ASSERT(start % QK4_NL == 0);
+                GGML_ASSERT(start % QK_K == 0);
                 GGML_ASSERT(start % n_per_row == 0);
                 size_t start_row = start / n_per_row;
                 size_t row_size = ggml_row_size(type, n_per_row);
                 result = quantize_iq4_xs(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
                 GGML_ASSERT(result == row_size * nrows);
             } break;
+#endif
         case GGML_TYPE_F16:
             {
                 size_t elemsize = sizeof(ggml_fp16_t);