From 7fc0250d1556de0353cfe31bfeea313cd758f57b Mon Sep 17 00:00:00 2001 From: chenqiny Date: Fri, 13 Oct 2023 00:23:16 +0800 Subject: [PATCH] 1. check in ggml.c if endianess is not match 2. update GGUF version 3. change get_pack_prefix to property 4. update information log --- convert-baichuan-hf-to-gguf.py | 1 + ggml.c | 8 ++++- ggml.h | 3 +- gguf-py/gguf/gguf.py | 54 ++++++++++++++++++---------------- gguf-py/pyproject.toml | 2 +- 5 files changed, 39 insertions(+), 29 deletions(-) diff --git a/convert-baichuan-hf-to-gguf.py b/convert-baichuan-hf-to-gguf.py index c1c080f60..a1783f71f 100755 --- a/convert-baichuan-hf-to-gguf.py +++ b/convert-baichuan-hf-to-gguf.py @@ -90,6 +90,7 @@ if not dir_model.is_dir(): endianess = gguf.GGUFEndian.LITTLE if args.bigendian: endianess = gguf.GGUFEndian.BIG +endianess_str = "Big Endian" if args.bigendian else "Little Endian" print(f"gguf: Conversion Endianess {endianess}") # possible tensor data types # ftype == 0 -> float32 diff --git a/ggml.c b/ggml.c index 6d1776ca4..04b88c98a 100644 --- a/ggml.c +++ b/ggml.c @@ -20916,7 +20916,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p gguf_fread_el(file, &magic, sizeof(magic), &offset); if (magic != GGUF_MAGIC) { - fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic); + if (magic == GGUF_WRONG_ENIAN_MAGIC) + { + fprintf(stderr, "Endianess of the GGUF file and platform do not match.%s: invalid magic number %08x.\n", __func__, magic); + } + else { + fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic); + } fclose(file); return NULL; } diff --git a/ggml.h b/ggml.h index 3eddc44b9..fdd8e31be 100644 --- a/ggml.h +++ b/ggml.h @@ -232,7 +232,8 @@ #define GGML_EXIT_ABORTED 1 #define GGUF_MAGIC 0x46554747 // "GGUF" -#define GGUF_VERSION 2 +#define GGUF_WRONG_ENIAN_MAGIC 0x47475546 +#define GGUF_VERSION 3 #define GGUF_DEFAULT_ALIGNMENT 32 diff --git a/gguf-py/gguf/gguf.py b/gguf-py/gguf/gguf.py index c24bbc1ba..748d59343 100644 --- a/gguf-py/gguf/gguf.py +++ b/gguf-py/gguf/gguf.py @@ -19,7 +19,7 @@ import numpy as np # GGUF_MAGIC = 0x46554747 -GGUF_VERSION = 2 +GGUF_VERSION = 3 GGUF_DEFAULT_ALIGNMENT = 32 @@ -621,7 +621,8 @@ class GGUFWriter: temp_file: tempfile.SpooledTemporaryFile[bytes] | None = None tensors: list[tuple[np.ndarray[Any, Any], int]] - def get_pack_prefix(self): + @property + def pack_prefix(self): if self.endianess==GGUFEndian.LITTLE: return "<" else: @@ -632,28 +633,29 @@ class GGUFWriter: self.arch = arch self.endianess = endianess self._simple_value_packing = { - GGUFValueType.UINT8: f"{self.get_pack_prefix()}B", - GGUFValueType.INT8: f"{self.get_pack_prefix()}b", - GGUFValueType.UINT16: f"{self.get_pack_prefix()}H", - GGUFValueType.INT16: f"{self.get_pack_prefix()}h", - GGUFValueType.UINT32: f"{self.get_pack_prefix()}I", - GGUFValueType.INT32: f"{self.get_pack_prefix()}i", - GGUFValueType.FLOAT32: f"{self.get_pack_prefix()}f", - GGUFValueType.UINT64: f"{self.get_pack_prefix()}Q", - GGUFValueType.INT64: f"{self.get_pack_prefix()}q", - GGUFValueType.FLOAT64: f"{self.get_pack_prefix()}d", + GGUFValueType.UINT8: f"{self.pack_prefix}B", + GGUFValueType.INT8: f"{self.pack_prefix}b", + GGUFValueType.UINT16: f"{self.pack_prefix}H", + GGUFValueType.INT16: f"{self.pack_prefix}h", + GGUFValueType.UINT32: f"{self.pack_prefix}I", + GGUFValueType.INT32: f"{self.pack_prefix}i", + GGUFValueType.FLOAT32: f"{self.pack_prefix}f", + GGUFValueType.UINT64: f"{self.pack_prefix}Q", + GGUFValueType.INT64: f"{self.pack_prefix}q", + GGUFValueType.FLOAT64: f"{self.pack_prefix}d", GGUFValueType.BOOL: "?" , } self.add_architecture() self.use_temp_file = use_temp_file self.tensors = [] - print(f"This gguf file is for {self.endianess} only") + endianess_str = "Big Endian" if self.endianess == GGUFEndian.BIG else "Little Endian" + print(f"This gguf file is for {endianess_str} only") def write_header_to_file(self): - self.fout.write(struct.pack(f"{self.get_pack_prefix()}I", GGUF_MAGIC)) - self.fout.write(struct.pack(f"{self.get_pack_prefix()}I", GGUF_VERSION)) - self.fout.write(struct.pack(f"{self.get_pack_prefix()}Q", self.ti_data_count)) - self.fout.write(struct.pack(f"{self.get_pack_prefix()}Q", self.kv_data_count)) + self.fout.write(struct.pack(f"{self.pack_prefix}I", GGUF_MAGIC)) + self.fout.write(struct.pack(f"{self.pack_prefix}I", GGUF_VERSION)) + self.fout.write(struct.pack(f"{self.pack_prefix}Q", self.ti_data_count)) + self.fout.write(struct.pack(f"{self.pack_prefix}Q", self.kv_data_count)) self.flush() # print("tensors " + str(self.ti_data_count) + " kv " + str(self.kv_data_count)) @@ -730,7 +732,7 @@ class GGUFWriter: vtype = GGUFValueType.get_type(val) if add_vtype: - self.kv_data += struct.pack(f"{self.get_pack_prefix()}I", vtype) + self.kv_data += struct.pack(f"{self.pack_prefix}I", vtype) self.kv_data_count += 1 pack_fmt = self._simple_value_packing.get(vtype) @@ -738,14 +740,14 @@ class GGUFWriter: self.kv_data += struct.pack(pack_fmt, val) elif vtype == GGUFValueType.STRING: encoded_val = val.encode("utf8") if isinstance(val, str) else val - self.kv_data += struct.pack(f"{self.get_pack_prefix()}Q", len(encoded_val)) + self.kv_data += struct.pack(f"{self.pack_prefix}Q", len(encoded_val)) self.kv_data += encoded_val elif vtype == GGUFValueType.ARRAY and isinstance(val, Sequence) and len(val) > 0: ltype = GGUFValueType.get_type(val[0]) if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]): raise ValueError("All items in a GGUF array should be of the same type") - self.kv_data += struct.pack(f"{self.get_pack_prefix()}I", ltype) - self.kv_data += struct.pack(f"{self.get_pack_prefix()}Q", len(val)) + self.kv_data += struct.pack(f"{self.pack_prefix}I", ltype) + self.kv_data += struct.pack(f"{self.pack_prefix}Q", len(val)) for item in val: self.add_val(item, add_vtype=False) else: @@ -759,18 +761,18 @@ class GGUFWriter: assert raw_dtype is not None or tensor_dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now" encoded_name = name.encode("utf8") - self.ti_data += struct.pack(f"{self.get_pack_prefix()}Q", len(encoded_name)) + self.ti_data += struct.pack(f"{self.pack_prefix}Q", len(encoded_name)) self.ti_data += encoded_name n_dims = len(tensor_shape) - self.ti_data += struct.pack(f"{self.get_pack_prefix()}I", n_dims) + self.ti_data += struct.pack(f"{self.pack_prefix}I", n_dims) for i in range(n_dims): - self.ti_data += struct.pack(f"{self.get_pack_prefix()}Q", tensor_shape[n_dims - 1 - i]) + self.ti_data += struct.pack(f"{self.pack_prefix}Q", tensor_shape[n_dims - 1 - i]) if raw_dtype is None: dtype = GGMLQuantizationType.F32 if tensor_dtype == np.float32 else GGMLQuantizationType.F16 else: dtype = raw_dtype - self.ti_data += struct.pack(f"{self.get_pack_prefix()}I", dtype) - self.ti_data += struct.pack(f"{self.get_pack_prefix()}Q", self.offset_tensor) + self.ti_data += struct.pack(f"{self.pack_prefix}I", dtype) + self.ti_data += struct.pack(f"{self.pack_prefix}Q", self.offset_tensor) self.offset_tensor += GGUFWriter.ggml_pad(tensor_nbytes, self.data_alignment) self.ti_data_count += 1 diff --git a/gguf-py/pyproject.toml b/gguf-py/pyproject.toml index 07a7ab4dd..f0741a7c2 100644 --- a/gguf-py/pyproject.toml +++ b/gguf-py/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "gguf" -version = "0.4.4" +version = "0.4.5" description = "Write ML models in GGUF for GGML" authors = ["GGML "] packages = [