diff --git a/convert-llama-h5-to-gguf.py b/convert-llama-h5-to-gguf.py index 56063f375..35f85bb97 100644 --- a/convert-llama-h5-to-gguf.py +++ b/convert-llama-h5-to-gguf.py @@ -327,7 +327,7 @@ for part_name in part_names: if ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: data = data.astype(np.float16) - print( name + ", shape " + str(len(data.shape)) + ", " + str(old_dtype) + " --> " + str(data.dtype)) + print(name + ", shape " + str(len(data.shape)) + ", " + str(old_dtype) + " --> " + str(data.dtype)) gguf_writer.write_tensor_to_file(data) diff --git a/ggml.c b/ggml.c index 19fbe6b9e..f7e72ed84 100644 --- a/ggml.c +++ b/ggml.c @@ -9140,6 +9140,8 @@ static void ggml_compute_forward_mul( const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { + GGML_ASSERT(src1->type == GGML_TYPE_F32 && "only f32 src1 supported for now"); + switch (src0->type) { case GGML_TYPE_F32: {