diff --git a/ggml-opencl.c b/ggml-opencl.c
index 31ab13b25..598b9a1e9 100644
--- a/ggml-opencl.c
+++ b/ggml-opencl.c
@@ -20,7 +20,7 @@ constant uint QK4_0 = 32;
 struct block_q4_0
 {
     float d;
-    uint8_t qs[QK4_0 / 2];
+    uint8_t qs[16];
 };
 
 constant uint QK4_1 = 32;
@@ -28,7 +28,7 @@ struct block_q4_1
 {
     float d;
     float m;
-    uint8_t qs[QK4_1 / 2];
+    uint8_t qs[16];
 };
 
 constant uint QK5_0 = 32;
@@ -36,7 +36,7 @@ struct __attribute__ ((packed)) block_q5_0
 {
     half d;
     uint32_t qh;
-    uint8_t qs[QK5_0 / 2];
+    uint8_t qs[16];
 };
 
 constant uint QK5_1 = 32;
@@ -45,14 +45,14 @@ struct block_q5_1
     half d;
     half m;
     uint32_t qh;
-    uint8_t qs[QK5_1 / 2];
+    uint8_t qs[16];
 };
 
 constant uint QK8_0 = 32;
 struct block_q8_0
 {
     float d;
-    uint8_t qs[QK8_0];
+    uint8_t qs[32];
 };
 
 
diff --git a/ggml.c b/ggml.c
index c5d820fec..49fe226d0 100644
--- a/ggml.c
+++ b/ggml.c
@@ -3102,11 +3102,12 @@ static const int GGML_BLCK_SIZE[GGML_TYPE_COUNT] = {
     [GGML_TYPE_Q5_1] = QK5_1,
     [GGML_TYPE_Q8_0] = QK8_0,
     [GGML_TYPE_Q8_1] = QK8_1,
+    [GGML_TYPE_Q8_1B] = QK8_1,
     [GGML_TYPE_I8]   = 1,
     [GGML_TYPE_I16]  = 1,
     [GGML_TYPE_I32]  = 1,
 };
-static_assert(GGML_TYPE_COUNT == 13, "GGML_BLCK_SIZE is outdated");
+static_assert(GGML_TYPE_COUNT == 14, "GGML_BLCK_SIZE is outdated");
 
 static const size_t GGML_TYPE_SIZE[GGML_TYPE_COUNT] = {
     [GGML_TYPE_F32]  = sizeof(float),
@@ -3119,11 +3120,12 @@ static const size_t GGML_TYPE_SIZE[GGML_TYPE_COUNT] = {
     [GGML_TYPE_Q5_1] = sizeof(block_q5_1),
     [GGML_TYPE_Q8_0] = sizeof(block_q8_0),
     [GGML_TYPE_Q8_1] = sizeof(block_q8_1),
+    [GGML_TYPE_Q8_1B] = sizeof(block_q8_1_v2),
     [GGML_TYPE_I8]   = sizeof(int8_t),
     [GGML_TYPE_I16]  = sizeof(int16_t),
     [GGML_TYPE_I32]  = sizeof(int32_t),
 };
-static_assert(GGML_TYPE_COUNT == 13, "GGML_TYPE_SIZE is outdated");
+static_assert(GGML_TYPE_COUNT == 14, "GGML_TYPE_SIZE is outdated");
 
 
 static const char * GGML_TYPE_NAME[GGML_TYPE_COUNT] = {
@@ -3137,11 +3139,12 @@ static const char * GGML_TYPE_NAME[GGML_TYPE_COUNT] = {
     [GGML_TYPE_Q5_1] = "q5_1",
     [GGML_TYPE_Q8_0] = "q8_0",
     [GGML_TYPE_Q8_1] = "q8_1",
+    [GGML_TYPE_Q8_1B] = "q8_1b",
     [GGML_TYPE_I8]   = "i8",
     [GGML_TYPE_I16]  = "i16",
     [GGML_TYPE_I32]  = "i32",
 };
-static_assert(GGML_TYPE_COUNT == 13, "GGML_TYPE_NAME is outdated");
+static_assert(GGML_TYPE_COUNT == 14, "GGML_TYPE_NAME is outdated");
 
 static bool GGML_IS_QUANTIZED[GGML_TYPE_COUNT] = {
     [GGML_TYPE_F32]  = false,
@@ -3154,11 +3157,12 @@ static bool GGML_IS_QUANTIZED[GGML_TYPE_COUNT] = {
     [GGML_TYPE_Q5_1] = true,
     [GGML_TYPE_Q8_0] = true,
     [GGML_TYPE_Q8_1] = true,
+    [GGML_TYPE_Q8_1B] = true,
     [GGML_TYPE_I8]   = false,
     [GGML_TYPE_I16]  = false,
     [GGML_TYPE_I32]  = false,
 };
-static_assert(GGML_TYPE_COUNT == 13, "GGML_IS_QUANTIZED is outdated");
+static_assert(GGML_TYPE_COUNT == 14, "GGML_IS_QUANTIZED is outdated");
 
 static const char * GGML_OP_LABEL[GGML_OP_COUNT] = {
     "NONE",
@@ -8041,6 +8045,7 @@ static void ggml_compute_forward_mul_mat(
         case GGML_TYPE_Q5_1:
         case GGML_TYPE_Q8_0:
         case GGML_TYPE_Q8_1:
+        case GGML_TYPE_Q8_1B:
             {
                 ggml_compute_forward_mul_mat_q_f32(params, src0, src1, dst);
             } break;
@@ -8273,6 +8278,7 @@ static void ggml_compute_forward_get_rows(
         case GGML_TYPE_Q5_1:
         case GGML_TYPE_Q8_0:
         case GGML_TYPE_Q8_1:
+        case GGML_TYPE_Q8_1B:
             {
                 ggml_compute_forward_get_rows_q(params, src0, src1, dst);
             } break;
@@ -8599,6 +8605,7 @@ static void ggml_compute_forward_alibi(
         case GGML_TYPE_Q5_1:
         case GGML_TYPE_Q8_0:
         case GGML_TYPE_Q8_1:
+        case GGML_TYPE_Q8_1B:
         case GGML_TYPE_I8:
         case GGML_TYPE_I16:
         case GGML_TYPE_I32:
diff --git a/ggml.h b/ggml.h
index 35bd45e8a..d6cc480c7 100644
--- a/ggml.h
+++ b/ggml.h
@@ -240,6 +240,7 @@ extern "C" {
         GGML_TYPE_I8,
         GGML_TYPE_I16,
         GGML_TYPE_I32,
+        GGML_TYPE_Q8_1B = 13, //legacy q8_1
         GGML_TYPE_COUNT,
     };
 
diff --git a/ggml_v2.c b/ggml_v2.c
index 38bf9108d..ffc6ab552 100644
--- a/ggml_v2.c
+++ b/ggml_v2.c
@@ -1592,7 +1592,7 @@ static const quantize_fns_t quantize_fns_v2[GGML_TYPE_COUNT] = {
         .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_1_reference_v2,
         .quantize_row_q_dot       = quantize_row_q8_1_v2,
         .vec_dot_q                = ggml_vec_dot_q4_1_q8_1_v2,
-        .vec_dot_type             = GGML_TYPE_Q8_1,
+        .vec_dot_type             = GGML_TYPE_Q8_1B,
     },
     [GGML_TYPE_Q4_2] = {
         .dequantize_row_q         = dequantize_row_q4_2_v2,
@@ -1608,7 +1608,7 @@ static const quantize_fns_t quantize_fns_v2[GGML_TYPE_COUNT] = {
         .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_3_reference_v2,
         .quantize_row_q_dot       = quantize_row_q8_1_v2,
         .vec_dot_q                = ggml_vec_dot_q4_3_q8_1_v2,
-        .vec_dot_type             = GGML_TYPE_Q8_1,
+        .vec_dot_type             = GGML_TYPE_Q8_1B,
     },
     [GGML_TYPE_Q5_0] = {
         .dequantize_row_q         = dequantize_row_q5_0_v2,
@@ -1624,7 +1624,7 @@ static const quantize_fns_t quantize_fns_v2[GGML_TYPE_COUNT] = {
         .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q5_1_reference_v2,
         .quantize_row_q_dot       = quantize_row_q8_1_v2,
         .vec_dot_q                = ggml_vec_dot_q5_1_q8_1_v2,
-        .vec_dot_type             = GGML_TYPE_Q8_1,
+        .vec_dot_type             = GGML_TYPE_Q8_1B,
     },
     [GGML_TYPE_Q8_0] = {
         .dequantize_row_q         = dequantize_row_q8_0_v2,
@@ -1634,13 +1634,13 @@ static const quantize_fns_t quantize_fns_v2[GGML_TYPE_COUNT] = {
         .vec_dot_q                = ggml_vec_dot_q8_0_q8_0_v2,
         .vec_dot_type             = GGML_TYPE_Q8_0,
     },
-    [GGML_TYPE_Q8_1] = {
+    [GGML_TYPE_Q8_1B] = {
         .dequantize_row_q         = NULL,   // TODO
         .quantize_row_q           = quantize_row_q8_1_v2,
         .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q8_1_reference_v2,
         .quantize_row_q_dot       = quantize_row_q8_1_v2,
         .vec_dot_q                = NULL,   // TODO
-        .vec_dot_type             = GGML_TYPE_Q8_1,
+        .vec_dot_type             = GGML_TYPE_Q8_1B,
     },
 };
 
diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
index 92a42a2fa..08b60b3ef 100644
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@@ -224,7 +224,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
     neox_ctx.hparams.n_ctx = gptj_ctx_v1.hparams.n_ctx = gptj_ctx_v2.hparams.n_ctx = gpt2_ctx_v1.hparams.n_ctx = gpt2_ctx_v2.hparams.n_ctx = params.n_ctx;
 
     printf("System Info: %s\n", llama_print_system_info());
-
+    SetQuantsUnshuffled(false);   
     if(file_format == FileFormat::GGML || file_format == FileFormat::GGHF || file_format == FileFormat::GGJT || file_format == FileFormat::GGJT_2)
     {
         //newer format has bit unshuffling
diff --git a/koboldcpp.py b/koboldcpp.py
index 71f1f8b6c..7b8e766ca 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -198,7 +198,7 @@ maxctx = 2048
 maxlen = 128
 modelbusy = False
 defaultport = 5001
-KcppVersion = "1.21.1"
+KcppVersion = "1.21.2"
 
 class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
     sys_version = ""
diff --git a/otherarch/tools/common-ggml.cpp b/otherarch/tools/common-ggml.cpp
index 350e95039..3c0bfe286 100644
--- a/otherarch/tools/common-ggml.cpp
+++ b/otherarch/tools/common-ggml.cpp
@@ -198,6 +198,7 @@ bool ggml_common_quantize_0(
                 case GGML_TYPE_I16:
                 case GGML_TYPE_I32:
                 case GGML_TYPE_Q8_1:
+                case GGML_TYPE_Q8_1B:
                 case GGML_TYPE_COUNT:
                     {
                         fprintf(stderr, "%s: unsupported quantization type %d (%s)\n", __func__, ttype, ggml_type_name((ggml_type) ttype));