make the offset of q4 available.

This commit is contained in:
Julia Longtin 2024-05-11 19:39:53 +00:00
parent 9550ca516f
commit efdb4116d1

View file

@ -199,6 +199,7 @@ void GGML_5bit_Unpack_Unaligned (const uint8x16_t * q4, const uint8_t * q1, uint
uint8_t lowmask = 0x0F; uint8_t lowmask = 0x0F;
uint8_t m=1; uint8_t m=1;
uint8_t bit5 = 0x10; uint8_t bit5 = 0x10;
uint64_t q4offset=((uint64_t) q4) & 0x3f;
__asm__ __volatile__ ( __asm__ __volatile__ (
"vprefetch0\t(%[SRC1])\n\t" // Issue our memory requests first thing. "vprefetch0\t(%[SRC1])\n\t" // Issue our memory requests first thing.
@ -267,6 +268,7 @@ void GGML_5bit_Unpack_Unaligned (const uint8x16_t * q4, const uint8_t * q1, uint
"2:" "2:"
: [DST] "+r" (dst) : [DST] "+r" (dst)
: [SRC4] "r" (q4), : [SRC4] "r" (q4),
[OFFSET] "m" (q4offset),
[SRC1] "r" (q1), [SRC1] "r" (q1),
[MASK] "m" (lowmask), [MASK] "m" (lowmask),
[M] "m" (m), [M] "m" (m),