make the offset of q4 available.
This commit is contained in:
parent
9550ca516f
commit
efdb4116d1
1 changed files with 3 additions and 1 deletions
|
@ -199,6 +199,7 @@ void GGML_5bit_Unpack_Unaligned (const uint8x16_t * q4, const uint8_t * q1, uint
|
||||||
uint8_t lowmask = 0x0F;
|
uint8_t lowmask = 0x0F;
|
||||||
uint8_t m=1;
|
uint8_t m=1;
|
||||||
uint8_t bit5 = 0x10;
|
uint8_t bit5 = 0x10;
|
||||||
|
uint64_t q4offset=((uint64_t) q4) & 0x3f;
|
||||||
|
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"vprefetch0\t(%[SRC1])\n\t" // Issue our memory requests first thing.
|
"vprefetch0\t(%[SRC1])\n\t" // Issue our memory requests first thing.
|
||||||
|
@ -267,6 +268,7 @@ void GGML_5bit_Unpack_Unaligned (const uint8x16_t * q4, const uint8_t * q1, uint
|
||||||
"2:"
|
"2:"
|
||||||
: [DST] "+r" (dst)
|
: [DST] "+r" (dst)
|
||||||
: [SRC4] "r" (q4),
|
: [SRC4] "r" (q4),
|
||||||
|
[OFFSET] "m" (q4offset),
|
||||||
[SRC1] "r" (q1),
|
[SRC1] "r" (q1),
|
||||||
[MASK] "m" (lowmask),
|
[MASK] "m" (lowmask),
|
||||||
[M] "m" (m),
|
[M] "m" (m),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue