diff --git a/ggml-quants.c b/ggml-quants.c
index c2628badf..c0724e947 100644
--- a/ggml-quants.c
+++ b/ggml-quants.c
@@ -1325,16 +1325,16 @@ static void quantize_q_k_1(const float * x, int bits, int scale_bits, int block_
     // If all the weight are positive we can invert the sign of min.
     // Otherwise blocks with all positive weights need to be quantized with zero
     // min, because min scale is unsigned.
-    int all_positive = 1;
+    bool all_positive = true;
     for (int j = 0; j < QK_K; j++) {
         if (x[j] < 0.0f) {
-            all_positive = 0;
+            all_positive = false;
             break;
         }
     }
 
-    float scales[QK_K];
-    float mins[QK_K];
+    float scales[QK_K/block_size];
+    float mins[QK_K/block_size];
 
     for (int j = 0; j < QK_K/block_size; j++) {
         uint8_t q[QK_K/block_size];
@@ -1343,7 +1343,7 @@ static void quantize_q_k_1(const float * x, int bits, int scale_bits, int block_
         // Flip the sign because quantize_1 assumes that min is added, but min
         // is subtracted in k-quants.
         mins[j] = -mins[j];
-        if (!all_positive && mins[j] < 0) {
+        if ((!all_positive && mins[j] < 0) || (all_positive && mins[j] > 0)) {
             // All weights are positive in this block, but some blocks have
             // negative weights. Find new least squares scale with zero min.
             mins[j] = 0.0f;
@@ -1366,18 +1366,18 @@ static void quantize_q_k_1(const float * x, int bits, int scale_bits, int block_
     // Increasing passes would decrease RMS error by miniscule amount with
     // drawback of taking more time.
     for(int pass = 0; pass < 2; pass++) {
-        float inv_scale = max_scale == 0.0f ? 0.0f : max_group/max_scale;
-        float inv_min   = max_min == 0.0f ? 0.0f : max_group/max_min;
+        float inv_scale  = max_scale == 0.0f ? 0.0f : max_group/max_scale;
+        float inv_min    = max_min == 0.0f ? 0.0f : max_group/max_min;
+        float block_d    = max_scale/max_group;
+        float block_dmin = max_min/max_group;
         for (int j = 0; j < QK_K/block_size; ++j) {
-            uint8_t ls = nearest_int(inv_scale*scales[j]);
-            uint8_t lm = nearest_int(inv_min*mins[j]);
-            uint8_t best_lm = lm;
-            uint8_t best_ls = ls;
+            uint8_t ls = MAX(0, nearest_int(inv_scale*scales[j]));
+            uint8_t lm = MAX(0, nearest_int(inv_min*mins[j]));
             ls = MIN(max_group, ls);
             lm = MIN(max_group, lm);
+            uint8_t best_lm = lm;
+            uint8_t best_ls = ls;
             float best_rms = FLT_MAX;
-            const float d1 = max_scale / max_group;
-            const float dmin1 = max_min / max_group;
             int limit = 1;
             // Increase limit for minor RMS error decrease while increasing the
             // quantization run time.
@@ -1390,14 +1390,14 @@ static void quantize_q_k_1(const float * x, int bits, int scale_bits, int block_
                 for (int lmt = MAX(0, lm-limit); lmt <= MIN(max_group, lm+limit); lmt++) {
                     float rms = 0.0f;
                     for (int ii = 0; ii < block_size; ii++) {
-                        const float d = d1 * lst;
-                        const float dm1 = dmin1 * lmt;
+                        const float d = block_d * lst;
+                        const float dm = block_dmin * lmt;
                         int l1 = 0;
                         if (d) {
-                            l1 = nearest_int((x[block_size*j + ii] + dm1)/d);
+                            l1 = nearest_int((x[block_size*j + ii] + dm)/d);
                             l1 = MAX(0, MIN((1 << bits) - 1, l1));
                         }
-                        const float e = (d*l1 - dm1) - x[block_size*j + ii];
+                        const float e = (d*l1 - dm) - x[block_size*j + ii];
                         rms += e*e;
                     }
                     if (rms < best_rms) {
@@ -1411,8 +1411,6 @@ static void quantize_q_k_1(const float * x, int bits, int scale_bits, int block_
             block_mins[j] = best_lm;
         }
 
-        float block_d = max_scale/max_group;
-        float block_dmin = max_min/max_group;
         float q_fit[QK_K];
         float q_m[QK_K/block_size];