From 7cbf5b282c7fcc3f2500355936bd81f45815b6d8 Mon Sep 17 00:00:00 2001 From: Andrew Godfrey Date: Mon, 23 Oct 2023 18:31:06 -0700 Subject: [PATCH] Add an f16 case to ggml_add_cast_impl and llama_build_lora_finetune_graphs --- examples/finetune/finetune.cpp | 2 +- ggml.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp index d158c2327..29354db20 100644 --- a/examples/finetune/finetune.cpp +++ b/examples/finetune/finetune.cpp @@ -652,7 +652,7 @@ static struct ggml_tensor * llama_build_lora_finetune_graphs( GGML_ASSERT(tokens_input->type == GGML_TYPE_I32); auto add_to_f32 = [] (struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { - if (ggml_is_quantized(a->type)) { + if (ggml_is_quantized(a->type) || a->type == GGML_TYPE_F16) { return ggml_add_cast(ctx, a, b, GGML_TYPE_F32); } else if (a->type == GGML_TYPE_F32) { return ggml_add(ctx, a, b); diff --git a/ggml.c b/ggml.c index 1bc77737c..a0501fbdd 100644 --- a/ggml.c +++ b/ggml.c @@ -5636,7 +5636,7 @@ static struct ggml_tensor * ggml_add_cast_impl( // TODO: support less-strict constraint // GGML_ASSERT(ggml_can_repeat(b, a)); GGML_ASSERT(ggml_can_repeat_rows(b, a)); - GGML_ASSERT(ggml_is_quantized(a->type)); // currently only supported for quantized input + GGML_ASSERT(ggml_is_quantized(a->type) || a->type == GGML_TYPE_F16); // currently only supported for quantized input and f16 bool is_node = false;