Add OpenCL add kernel (#5151)

* Add OpenCL add kernel

* Put add kernel into different string to stay within MSVC string length limit, disable float16 support due to bad results
This commit is contained in:
0cc4m 2024-01-26 23:07:32 +01:00 committed by GitHub
parent bbe7c56c99
commit a1d6df129b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 96 additions and 3 deletions

11
ggml.c
View file

@ -7207,6 +7207,17 @@ static void ggml_compute_forward_add_f32(
const int ith = params->ith;
const int nth = params->nth;
#ifdef GGML_USE_CLBLAST
if (src1->backend == GGML_BACKEND_GPU) {
// TODO: OpenCL kernel support full broadcast
GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
if (ith == 0) {
ggml_cl_add(src0, src1, dst);
}
return;
}
#endif
const int nr = ggml_nrows(src0);
GGML_TENSOR_BINARY_OP_LOCALS