cuBLAS: update block_q5_1
This commit is contained in:
parent
4cd0a480bf
commit
a79756b210
1 changed files with 3 additions and 2 deletions
|
@ -68,7 +68,7 @@ static_assert(sizeof(block_q5_0) == sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5
|
||||||
typedef struct {
|
typedef struct {
|
||||||
half d; // delta
|
half d; // delta
|
||||||
half m; // min
|
half m; // min
|
||||||
uint32_t qh; // 5-th bit of quants
|
uint8_t qh[4]; // 5-th bit of quants
|
||||||
uint8_t qs[QK5_1 / 2]; // nibbles / quants
|
uint8_t qs[QK5_1 / 2]; // nibbles / quants
|
||||||
} block_q5_1;
|
} block_q5_1;
|
||||||
static_assert(sizeof(block_q5_1) == 2 * sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_1 / 2, "wrong q5_1 block size/padding");
|
static_assert(sizeof(block_q5_1) == 2 * sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_1 / 2, "wrong q5_1 block size/padding");
|
||||||
|
@ -189,7 +189,8 @@ static __global__ void dequantize_block_q5_1(const void * vx, float * y) {
|
||||||
|
|
||||||
const uint8_t * pp = x[i].qs;
|
const uint8_t * pp = x[i].qs;
|
||||||
|
|
||||||
const uint32_t qh = x[i].qh;
|
uint32_t qh;
|
||||||
|
memcpy(&qh, x[i].qh, sizeof(qh));
|
||||||
|
|
||||||
for (int l = 0; l < QK5_1; l += 2) {
|
for (int l = 0; l < QK5_1; l += 2) {
|
||||||
const uint8_t vi = pp[l/2];
|
const uint8_t vi = pp[l/2];
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue