Fix Intel dequant issue

Fix validation issue
This commit is contained in:
0cc4m 2024-03-21 18:34:19 +01:00
parent 24e5039f9d
commit 1fceeb9046
3 changed files with 5277 additions and 5246 deletions

File diff suppressed because it is too large Load diff

View file

@ -2143,7 +2143,6 @@ static void ggml_vk_buffer_write_2d(ggml_backend_vk_context * ctx, vk_buffer& ds
ggml_vk_submit(subctx, ctx->fence);
VK_CHECK(ctx->device->device.waitForFences({ ctx->fence }, true, UINT64_MAX), "vk_buffer_write_2d waitForFences");
ctx->device->device.resetFences({ ctx->fence });
ggml_vk_queue_cleanup(ctx, ctx->device->transfer_queue);
}
}
@ -2240,7 +2239,6 @@ static void ggml_vk_buffer_read(ggml_backend_vk_context * ctx, vk_buffer& src, s
for (auto& cpy : subctx->out_memcpys) {
memcpy(cpy.dst, cpy.src, cpy.n);
}
ggml_vk_queue_cleanup(ctx, ctx->device->transfer_queue);
}
}
@ -5935,6 +5933,10 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_compute_
return;
}
#ifdef GGML_VULKAN_DEBUG
std::cerr << "ggml_vk_check_results_0(" << tensor->name << ")" << std::endl;
#endif
ggml_tensor * src0 = tensor->src[0];
ggml_tensor * src1 = tensor->src[1];
ggml_tensor * src2 = tensor->src[2];
@ -6244,6 +6246,10 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_compute_
return;
}
#ifdef GGML_VULKAN_DEBUG
std::cerr << "ggml_vk_check_results_1(" << tensor->name << ")" << std::endl;
#endif
ggml_tensor * src0 = tensor->src[0];
ggml_tensor * src1 = tensor->src[1];

View file

@ -451,10 +451,10 @@ mulmat_load_q3_K = """
is < 8 ? (data_a[ib].scales[is-0] & 0xF) | (((data_a[ib].scales[is+4] >> 2) & 3) << 4) :
is < 12 ? (data_a[ib].scales[is-8] >> 4) | (((data_a[ib].scales[is+0] >> 4) & 3) << 4) :
(data_a[ib].scales[is-8] >> 4) | (((data_a[ib].scales[is-4] >> 6) & 3) << 4));
const FLOAT_TYPE dl = FLOAT_TYPE(data_a[ib].d) * FLOAT_TYPE(us - 32);
const float dl = float(data_a[ib].d) * float(us - 32);
buf_a[buf_idx ] = dl * FLOAT_TYPE(int8_t((data_a[ib].qs[qsi ] >> qsshift) & 3) - (((data_a[ib].hmask[hmi ] & m) != 0) ? 0 : 4));
buf_a[buf_idx + 1] = dl * FLOAT_TYPE(int8_t((data_a[ib].qs[qsi + 1] >> qsshift) & 3) - (((data_a[ib].hmask[hmi + 1] & m) != 0) ? 0 : 4));"""
buf_a[buf_idx ] = FLOAT_TYPE(dl * float(int8_t((data_a[ib].qs[qsi ] >> qsshift) & 3) - (((data_a[ib].hmask[hmi ] & m) != 0) ? 0 : 4)));
buf_a[buf_idx + 1] = FLOAT_TYPE(dl * float(int8_t((data_a[ib].qs[qsi + 1] >> qsshift) & 3) - (((data_a[ib].hmask[hmi + 1] & m) != 0) ? 0 : 4)));"""
mulmat_load_q4_K = """
const uint idx = pos_a + (loadc_a + l) * p.stride_a / LOAD_VEC_A + loadr_a;
@ -479,11 +479,11 @@ mulmat_load_q4_K = """
sc = uint8_t((data_a[ib].scales[is + 4] & 0xF) | ((data_a[ib].scales[is - 4] >> 6) << 4));
mbyte = uint8_t((data_a[ib].scales[is + 4] >> 4) | ((data_a[ib].scales[is ] >> 6) << 4));
}
const FLOAT_TYPE d = FLOAT_TYPE(loadd.x) * sc;
const FLOAT_TYPE m = FLOAT_TYPE(loadd.y) * mbyte;
const float d = loadd.x * sc;
const float m = loadd.y * mbyte;
buf_a[buf_idx ] = d * FLOAT_TYPE((data_a[ib].qs[qsi ] >> (b * 4)) & 0xF) - m;
buf_a[buf_idx + 1] = d * FLOAT_TYPE((data_a[ib].qs[qsi + 1] >> (b * 4)) & 0xF) - m;"""
buf_a[buf_idx ] = FLOAT_TYPE(d * float((data_a[ib].qs[qsi ] >> (b * 4)) & 0xF) - m);
buf_a[buf_idx + 1] = FLOAT_TYPE(d * float((data_a[ib].qs[qsi + 1] >> (b * 4)) & 0xF) - m);"""
mulmat_load_q5_K = """
const uint idx = pos_a + (loadc_a + l) * p.stride_a / LOAD_VEC_A + loadr_a;
@ -511,11 +511,11 @@ mulmat_load_q5_K = """
sc = uint8_t((data_a[ib].scales[is + 4] & 0xF) | ((data_a[ib].scales[is - 4] >> 6) << 4));
mbyte = uint8_t((data_a[ib].scales[is + 4] >> 4) | ((data_a[ib].scales[is ] >> 6) << 4));
}
const FLOAT_TYPE d = FLOAT_TYPE(loadd.x) * sc;
const FLOAT_TYPE m = FLOAT_TYPE(loadd.y) * mbyte;
const float d = loadd.x * sc;
const float m = loadd.y * mbyte;
buf_a[buf_idx ] = d * FLOAT_TYPE(((data_a[ib].qs[qsi ] >> (b * 4)) & 0xF) + ((data_a[ib].qh[qhi ] & hm) != 0 ? 16 : 0)) - m;
buf_a[buf_idx + 1] = d * FLOAT_TYPE(((data_a[ib].qs[qsi + 1] >> (b * 4)) & 0xF) + ((data_a[ib].qh[qhi + 1] & hm) != 0 ? 16 : 0)) - m;"""
buf_a[buf_idx ] = FLOAT_TYPE(d * (float((data_a[ib].qs[qsi ] >> (b * 4)) & 0xF) + float((data_a[ib].qh[qhi ] & hm) != 0 ? 16 : 0)) - m);
buf_a[buf_idx + 1] = FLOAT_TYPE(d * (float((data_a[ib].qs[qsi + 1] >> (b * 4)) & 0xF) + float((data_a[ib].qh[qhi + 1] & hm) != 0 ? 16 : 0)) - m);"""
mulmat_load_q6_K = """
const uint idx = pos_a + (loadc_a + l) * p.stride_a / LOAD_VEC_A + loadr_a;
@ -532,10 +532,10 @@ mulmat_load_q6_K = """
const uint qsi = n * 64 + (iqs % 32) * 2; // 0,2,4..126
const uint qhi = n * 32 + (iqs % 16) * 2; // 0,2,4..62
const FLOAT_TYPE dscale = FLOAT_TYPE(data_a[ib].d) * FLOAT_TYPE(data_a[ib].scales[is]);
const float dscale = float(data_a[ib].d) * float(data_a[ib].scales[is]);
buf_a[buf_idx ] = dscale * FLOAT_TYPE(int8_t(((data_a[ib].ql[qsi ] >> (b * 4)) & 0xF) | (((data_a[ib].qh[qhi ] >> qhshift) & 3) << 4)) - 32);
buf_a[buf_idx + 1] = dscale * FLOAT_TYPE(int8_t(((data_a[ib].ql[qsi + 1] >> (b * 4)) & 0xF) | (((data_a[ib].qh[qhi + 1] >> qhshift) & 3) << 4)) - 32);"""
buf_a[buf_idx ] = FLOAT_TYPE(dscale * float(int8_t(((data_a[ib].ql[qsi ] >> (b * 4)) & 0xF) | (((data_a[ib].qh[qhi ] >> qhshift) & 3) << 4)) - 32));
buf_a[buf_idx + 1] = FLOAT_TYPE(dscale * float(int8_t(((data_a[ib].ql[qsi + 1] >> (b * 4)) & 0xF) | (((data_a[ib].qh[qhi + 1] >> qhshift) & 3) << 4)) - 32));"""
mulmat_body2 = """
}