diff --git a/ggml.c b/ggml.c index 52efe0d0f..5e3f4c199 100644 --- a/ggml.c +++ b/ggml.c @@ -5115,6 +5115,44 @@ struct ggml_tensor * ggml_add_inplace( return ggml_add_impl(ctx, a, b, true); } +// ggml_add_cast + +static struct ggml_tensor * ggml_add_cast_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + enum ggml_type type) { + // TODO: support less-strict constraint + // GGML_ASSERT(ggml_can_repeat(b, a)); + GGML_ASSERT(ggml_can_repeat_rows(b, a)); + GGML_ASSERT(ggml_is_quantized(a->type)); // currently only supported for quantized input + + bool is_node = false; + + if (a->grad || b->grad) { + // TODO: support backward pass for broadcasting + GGML_ASSERT(ggml_are_same_shape(a, b)); + is_node = true; + } + + struct ggml_tensor * result = ggml_new_tensor(ctx, type, a->n_dims, a->ne); + + result->op = GGML_OP_ADD; + result->grad = is_node ? ggml_new_tensor(ctx, GGML_TYPE_F32, a->n_dims, a->ne) : NULL; + result->src[0] = a; + result->src[1] = b; + + return result; +} + +struct ggml_tensor * ggml_add_cast( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + enum ggml_type type) { + return ggml_add_impl(ctx, a, b, false); +} + // ggml_add1 static struct ggml_tensor * ggml_add1_impl( @@ -8317,8 +8355,9 @@ static void ggml_compute_forward_add_q_f32( const int nth = params->nth; const enum ggml_type type = src0->type; + const enum ggml_type dtype = dst->type; ggml_to_float_t const dequantize_row_q = type_traits[type].to_float; - ggml_from_float_t const quantize_row_q = type_traits[type].from_float; + ggml_from_float_t const quantize_row_q = type_traits[dtype].from_float; // we don't support permuted src0 or src1 GGML_ASSERT(nb00 == GGML_TYPE_SIZE[type]); @@ -8368,7 +8407,11 @@ static void ggml_compute_forward_add_q_f32( // add src1 ggml_vec_acc_f32(ne00, wdata, src1_row); // quantize row to dst - quantize_row_q(wdata, dst_row, ne00); + if (quantize_row_q != NULL) { + quantize_row_q(wdata, dst_row, ne00); + } else { + memcpy(dst_row, wdata, ne0*nb0); + } } } diff --git a/ggml.h b/ggml.h index cc43afdde..02db9ad2b 100644 --- a/ggml.h +++ b/ggml.h @@ -670,6 +670,12 @@ extern "C" { struct ggml_tensor * a, struct ggml_tensor * b); + GGML_API struct ggml_tensor * ggml_add_cast( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + enum ggml_type type); + GGML_API struct ggml_tensor * ggml_add1( struct ggml_context * ctx, struct ggml_tensor * a,