From 93d0a0ae7aac92b4ddb93a309a3cbda688fb00e5 Mon Sep 17 00:00:00 2001 From: Julia Longtin Date: Wed, 24 Apr 2024 17:50:12 +0000 Subject: [PATCH] use or, instead of and. bug fix? --- ggml-phi-knc-dot_q5_K_q8_K.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml-phi-knc-dot_q5_K_q8_K.c b/ggml-phi-knc-dot_q5_K_q8_K.c index ffa34a314..755ecd58f 100644 --- a/ggml-phi-knc-dot_q5_K_q8_K.c +++ b/ggml-phi-knc-dot_q5_K_q8_K.c @@ -215,7 +215,7 @@ void GGML_5bit_Unpack_Unaligned (const uint8x16_t * q4, const uint8_t * q1, uint "vloadunpackhd\t\t32(%%r9)%{uint8%},\t%%zmm7\n\t" // load our odd 4 bit sequences. note that it loads two 4 bit sequences into each zmm value. "vprefetch1\t32(%%r9)\n\t" // pull the next set of 4 bit sequences into the L2 cache. "vpandd\t%%zmm0,\t%%zmm7,\t%%zmm8\n\t" // apply a mask, storing the next low four bits of vector zmm1 into zmm5. - "vpaddd\t%%zmm1,%%zmm8,%%zmm8%{%%k2%}\n\t" // turn on bit 5 for all values that passed the prior test. + "vpord\t%%zmm1,%%zmm8,%%zmm8%{%%k2%}\n\t" // turn on bit 5 for all values that passed the prior test. "vmovdqa32\t\t%%zmm8%{uint8%},\t16(%%r8)\n\t" // save our result. "add\t$32,\t%%r8\n\t"