Add f16 implementation to ggml_compute_forward_add_f16_f32

This commit is contained in:
Andrew Godfrey 2023-10-23 13:21:17 -07:00
parent 9ea91ceaf2
commit 19097c97a8

25
ggml.c
View file

@ -9358,9 +9358,15 @@ static void ggml_compute_forward_add_f16_f32(
GGML_ASSERT(src0->type == GGML_TYPE_F16); GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F32); GGML_ASSERT(src1->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F16);
if (dst->type == GGML_TYPE_F32) {
GGML_ASSERT( nb0 == sizeof(float));
}
else {
GGML_ASSERT(dst->type == GGML_TYPE_F16);
GGML_ASSERT( nb0 == sizeof(ggml_fp16_t)); GGML_ASSERT( nb0 == sizeof(ggml_fp16_t));
}
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t)); GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
// rows per thread // rows per thread
@ -9371,6 +9377,7 @@ static void ggml_compute_forward_add_f16_f32(
const int ir1 = MIN(ir0 + dr, nr); const int ir1 = MIN(ir0 + dr, nr);
if (nb10 == sizeof(float)) { if (nb10 == sizeof(float)) {
if (dst->type == GGML_TYPE_F16) {
for (int ir = ir0; ir < ir1; ++ir) { for (int ir = ir0; ir < ir1; ++ir) {
// src0, src1 and dst are same shape => same indices // src0, src1 and dst are same shape => same indices
const int i3 = ir/(ne2*ne1); const int i3 = ir/(ne2*ne1);
@ -9385,6 +9392,22 @@ static void ggml_compute_forward_add_f16_f32(
dst_ptr[i] = GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(src0_ptr[i]) + src1_ptr[i]); dst_ptr[i] = GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(src0_ptr[i]) + src1_ptr[i]);
} }
} }
} else {
for (int ir = ir0; ir < ir1; ++ir) {
// src0, src1 and dst are same shape => same indices
const int i3 = ir/(ne2*ne1);
const int i2 = (ir - i3*ne2*ne1)/ne1;
const int i1 = (ir - i3*ne2*ne1 - i2*ne1);
float * dst_ptr = (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1);
ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01);
float * src1_ptr = (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11);
for (int i = 0; i < ne0; i++) {
dst_ptr[i] = GGML_FP16_TO_FP32(src0_ptr[i]) + src1_ptr[i];
}
}
}
} }
else { else {
// src1 is not contiguous // src1 is not contiguous