fix an offset error, and get rid of tabs.
This commit is contained in:
parent
e37b7f8497
commit
4fb1547ba6
1 changed files with 91 additions and 91 deletions
|
@ -208,8 +208,8 @@ void GGML_5bit_Unpack_Unaligned (const uint8x16_t * q4, const uint8_t * q1, uint
|
||||||
"vpord\t%%zmm1,%%zmm6,%%zmm6%{%%k1%}\n\t" // turn on bit 5 for all values that passed the prior test.
|
"vpord\t%%zmm1,%%zmm6,%%zmm6%{%%k1%}\n\t" // turn on bit 5 for all values that passed the prior test.
|
||||||
"vmovdqa32\t\t%%zmm6%{uint8%},\t(%%r8)\n\t" // save our result.
|
"vmovdqa32\t\t%%zmm6%{uint8%},\t(%%r8)\n\t" // save our result.
|
||||||
|
|
||||||
"vloadunpackld\t\t(%%r9)%{uint8%},\t%%zmm7\n\t" // load our odd 4 bit sequences. note that it loads two 4 bit sequences into each zmm value.
|
"vloadunpackld\t\t16(%%r9)%{uint8%},\t%%zmm7\n\t" // load our odd 4 bit sequences. note that it loads two 4 bit sequences into each zmm value.
|
||||||
"vloadunpackhd\t\t16(%%r9)%{uint8%},\t%%zmm7\n\t" // load our odd 4 bit sequences. note that it loads two 4 bit sequences into each zmm value.
|
"vloadunpackhd\t\t32(%%r9)%{uint8%},\t%%zmm7\n\t" // load our odd 4 bit sequences. note that it loads two 4 bit sequences into each zmm value.
|
||||||
"vprefetch1\t32(%%r9)\n\t" // pull the next set of 4 bit sequences into the L2 cache.
|
"vprefetch1\t32(%%r9)\n\t" // pull the next set of 4 bit sequences into the L2 cache.
|
||||||
"vpandd\t%%zmm0,\t%%zmm7,\t%%zmm8\n\t" // apply a mask, storing the next low four bits of vector zmm1 into zmm5.
|
"vpandd\t%%zmm0,\t%%zmm7,\t%%zmm8\n\t" // apply a mask, storing the next low four bits of vector zmm1 into zmm5.
|
||||||
"vpaddd\t%%zmm1,%%zmm8,%%zmm8%{%%k2%}\n\t" // turn on bit 5 for all values that passed the prior test.
|
"vpaddd\t%%zmm1,%%zmm8,%%zmm8%{%%k2%}\n\t" // turn on bit 5 for all values that passed the prior test.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue