cuda : fix LLAMA_CUDA_F16 build

This commit is contained in:
slaren 2024-03-21 13:41:43 +01:00
parent 5b7b0ac8df
commit 4f7e57a23f

View file

@ -9453,7 +9453,7 @@ static void ggml_cuda_op_dequantize_mul_mat_vec(
// on some GPUs it is faster to convert src1 to half and to use half precision intrinsics // on some GPUs it is faster to convert src1 to half and to use half precision intrinsics
#ifdef GGML_CUDA_F16 #ifdef GGML_CUDA_F16
cuda_pool_alloc<half> src1_dfloat_a; ggml_cuda_pool_alloc<half> src1_dfloat_a(ctx.pool());
half * src1_dfloat = nullptr; // dfloat == half half * src1_dfloat = nullptr; // dfloat == half
bool src1_convert_f16 = bool src1_convert_f16 =