diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 3b8032569..3df5b142b 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -2374,8 +2374,8 @@ static __global__ void dequantize_block_iq3_xxs(const void * __restrict__ vx, ds // Better (lower PPL), but requires more bit twidling, so slower #define IQ3S_MULTIPLIER 190842953LL #else -//#define IQ3S_MULTIPLIER 898886 -#define IQ3S_MULTIPLIER 842866 +#define IQ3S_MULTIPLIER 898886 +//#define IQ3S_MULTIPLIER 842866 #endif template diff --git a/ggml-quants.c b/ggml-quants.c index e6f8389db..f154d7c21 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -4125,8 +4125,8 @@ void dequantize_row_iq3_xxs(const block_iq3_xxs * restrict x, float * restrict y // Best PPL #define IQ3S_MULTIPLIER 190842953 #else -//#define IQ3S_MULTIPLIER 898886 -#define IQ3S_MULTIPLIER 842866 +#define IQ3S_MULTIPLIER 898886 +//#define IQ3S_MULTIPLIER 842866 #endif #define IQ3S_BITS 3