Add OpenCL add kernel (#5151)
* Add OpenCL add kernel * Put add kernel into different string to stay within MSVC string length limit, disable float16 support due to bad results
This commit is contained in:
parent
bbe7c56c99
commit
a1d6df129b
3 changed files with 96 additions and 3 deletions
11
ggml.c
11
ggml.c
|
@ -7207,6 +7207,17 @@ static void ggml_compute_forward_add_f32(
|
|||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
|
||||
#ifdef GGML_USE_CLBLAST
|
||||
if (src1->backend == GGML_BACKEND_GPU) {
|
||||
// TODO: OpenCL kernel support full broadcast
|
||||
GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
|
||||
if (ith == 0) {
|
||||
ggml_cl_add(src0, src1, dst);
|
||||
}
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
const int nr = ggml_nrows(src0);
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue