diff --git a/ggml.c b/ggml.c index fbc66f65b..c94006e51 100644 --- a/ggml.c +++ b/ggml.c @@ -470,6 +470,19 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .type_size = sizeof(int32_t), .is_quantized = false, }, + [GGML_TYPE_I64] = { + .type_name = "i64", + .blck_size = 1, + .type_size = sizeof(int64_t), + .is_quantized = false, + }, + [GGML_TYPE_F64] = { + .type_name = "f64", + .blck_size = 1, + .type_size = sizeof(double), + .is_quantized = false, + .nrows = 1, + }, [GGML_TYPE_F32] = { .type_name = "f32", .blck_size = 1, @@ -12418,6 +12431,8 @@ static void ggml_compute_forward_alibi( case GGML_TYPE_I8: case GGML_TYPE_I16: case GGML_TYPE_I32: + case GGML_TYPE_I64: + case GGML_TYPE_F64: case GGML_TYPE_COUNT: { GGML_ASSERT(false); @@ -12504,6 +12519,8 @@ static void ggml_compute_forward_clamp( case GGML_TYPE_I8: case GGML_TYPE_I16: case GGML_TYPE_I32: + case GGML_TYPE_I64: + case GGML_TYPE_F64: case GGML_TYPE_COUNT: { GGML_ASSERT(false); diff --git a/ggml.h b/ggml.h index ab26c8f59..c937d4a53 100644 --- a/ggml.h +++ b/ggml.h @@ -366,6 +366,8 @@ extern "C" { GGML_TYPE_I8 = 24, GGML_TYPE_I16 = 25, GGML_TYPE_I32 = 26, + GGML_TYPE_I64 = 27, + GGML_TYPE_F64 = 28, GGML_TYPE_COUNT, }; diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 2d7cf16c1..458a641dc 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -665,6 +665,8 @@ class GGMLQuantizationType(IntEnum): I8 = 24 I16 = 25 I32 = 26 + I64 = 27 + F64 = 28 class GGUFEndian(IntEnum): @@ -734,6 +736,8 @@ GGML_QUANT_SIZES = { GGMLQuantizationType.I8: (1, 1), GGMLQuantizationType.I16: (1, 2), GGMLQuantizationType.I32: (1, 4), + GGMLQuantizationType.I64: (1, 8), + GGMLQuantizationType.F64: (1, 8), } diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index 1c10f5753..33afac552 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -242,12 +242,15 @@ class GGUFReader: n_bytes = n_elems * type_size // block_size data_offs = int(start_offs + offset_tensor[0]) item_type: npt.DTypeLike - if ggml_type == GGMLQuantizationType.F32: - item_count = n_elems - item_type = np.float32 - elif ggml_type == GGMLQuantizationType.F16: + if ggml_type == GGMLQuantizationType.F16: item_count = n_elems item_type = np.float16 + elif ggml_type == GGMLQuantizationType.F32: + item_count = n_elems + item_type = np.float32 + elif ggml_type == GGMLQuantizationType.F64: + item_count = n_elems + item_type = np.float64 elif ggml_type == GGMLQuantizationType.I8: item_count = n_elems item_type = np.int8 @@ -257,6 +260,9 @@ class GGUFReader: elif ggml_type == GGMLQuantizationType.I32: item_count = n_elems item_type = np.int32 + elif ggml_type == GGMLQuantizationType.I64: + item_count = n_elems + item_type = np.int64 else: item_count = n_bytes item_type = np.uint8 diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 81b2eb884..1967b633c 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -204,18 +204,22 @@ class GGUFWriter: for i in range(n_dims): self.ti_data += self._pack("Q", tensor_shape[n_dims - 1 - i]) if raw_dtype is None: - if tensor_dtype == np.float32: - dtype = GGMLQuantizationType.F32 - elif tensor_dtype == np.float16: + if tensor_dtype == np.float16: dtype = GGMLQuantizationType.F16 + elif tensor_dtype == np.float32: + dtype = GGMLQuantizationType.F32 + elif tensor_dtype == np.float64: + dtype = GGMLQuantizationType.F64 elif tensor_dtype == np.int8: dtype = GGMLQuantizationType.I8 elif tensor_dtype == np.int16: dtype = GGMLQuantizationType.I16 elif tensor_dtype == np.int32: dtype = GGMLQuantizationType.I32 + elif tensor_dtype == np.int64: + dtype = GGMLQuantizationType.I64 else: - raise ValueError("Only F32, F16, I8, I16, I32 tensors are supported for now") + raise ValueError("Only F16, F32, F64, I8, I16, I32, I64 tensors are supported for now") else: dtype = raw_dtype self.ti_data += self._pack("I", dtype)