implement ggml_compute_forward_repeat_f16

This commit is contained in:
xaedes 2023-08-29 19:59:14 +02:00
parent e28cf7e9ce
commit 794bb7ea42
No known key found for this signature in database
GPG key ID: 30030EDD817EA2B1

50
ggml.c
View file

@ -9950,11 +9950,61 @@ static void ggml_compute_forward_repeat_f32(
}
}
static void ggml_compute_forward_repeat_f16(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
GGML_ASSERT(params->ith == 0);
GGML_ASSERT(ggml_can_repeat(src0, dst));
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
GGML_TENSOR_UNARY_OP_LOCALS;
// guaranteed to be an integer due to the check in ggml_can_repeat
const int nr0 = (int)(ne0/ne00);
const int nr1 = (int)(ne1/ne01);
const int nr2 = (int)(ne2/ne02);
const int nr3 = (int)(ne3/ne03);
// TODO: support for transposed / permuted tensors
GGML_ASSERT(nb0 == sizeof(ggml_fp16_t));
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
// TODO: maybe this is not optimal?
for (int i3 = 0; i3 < nr3; i3++) {
for (int k3 = 0; k3 < ne03; k3++) {
for (int i2 = 0; i2 < nr2; i2++) {
for (int k2 = 0; k2 < ne02; k2++) {
for (int i1 = 0; i1 < nr1; i1++) {
for (int k1 = 0; k1 < ne01; k1++) {
for (int i0 = 0; i0 < nr0; i0++) {
ggml_fp16_t * y = (ggml_fp16_t *) ((char *) dst->data + (i3*ne03 + k3)*nb3 + (i2*ne02 + k2)*nb2 + (i1*ne01 + k1)*nb1 + (i0*ne00)*nb0);
ggml_fp16_t * x = (ggml_fp16_t *) ((char *) src0->data + ( k3)*nb03 + ( k2)*nb02 + ( k1)*nb01);
// ggml_vec_cpy_f16(ne00, y, x)
for (int i = 0; i < ne00; ++i) {
y[i] = x[i];
}
}
}
}
}
}
}
}
}
static void ggml_compute_forward_repeat(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
switch (src0->type) {
case GGML_TYPE_F16:
{
ggml_compute_forward_repeat_f16(params, src0, dst);
} break;
case GGML_TYPE_F32:
{
ggml_compute_forward_repeat_f32(params, src0, dst);