fix f16_sycl cpy call

This commit is contained in:
Abhilash Majumder 2024-02-08 16:42:14 +05:30 committed by GitHub
parent b7b74cef36
commit de69ea86b0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -12149,6 +12149,25 @@ inline void ggml_sycl_op_dequantize_mul_mat_vec(
const dpct::queue_ptr &stream) { const dpct::queue_ptr &stream) {
const int64_t ne00 = src0->ne[0]; const int64_t ne00 = src0->ne[0];
const int64_t ne01 = src0->ne[1];
const int64_t ne02 = src0->ne[2];
const int64_t nb00 = src0->nb[0];
const int64_t nb01 = src0->nb[1];
const int64_t nb02 = src0->nb[2];
const int64_t nb03 = src0->nb[3];
const int64_t ne10 = src1->ne[0];
const int64_t ne11 = src1->ne[1];
const int64_t ne12 = src1->ne[2];
const int64_t nb10 = src1->nb[0];
const int64_t nb11 = src1->nb[1];
const int64_t nb12 = src1->nb[2];
const int64_t nb13 = src1->nb[3];
const int64_t row_diff = row_high - row_low; const int64_t row_diff = row_high - row_low;
// on some GPUs it is faster to convert src1 to half and to use half precision intrinsics // on some GPUs it is faster to convert src1 to half and to use half precision intrinsics
@ -12166,9 +12185,13 @@ inline void ggml_sycl_op_dequantize_mul_mat_vec(
src1_dfloat = (sycl::half *)src1->data + src1_padded_row_size; src1_dfloat = (sycl::half *)src1->data + src1_padded_row_size;
} else { } else {
src1_dfloat = src1_dfloat_a.alloc(ne00); src1_dfloat = src1_dfloat_a.alloc(ne00);
//ggml_cpy_f32_f16_sycl((const char *)src1_ddf_i, (char *)src1_dfloat,
// ne00, ne00, 1, sizeof(float), 0, 0, ne00, 1,
// sizeof(sycl::half), 0, 0, stream);
ggml_cpy_f32_f16_sycl((const char *)src1_ddf_i, (char *)src1_dfloat, ggml_cpy_f32_f16_sycl((const char *)src1_ddf_i, (char *)src1_dfloat,
ne00, ne00, 1, sizeof(float), 0, 0, ne00, 1, ne00, ne00, ne01, ne02, nb00, nb01, nb02,
sizeof(sycl::half), 0, 0, stream); nb03, ne10, ne11, ne12, nb10, nb11, nb12,
nb13, stream);
} }
} }
#else #else