diff --git a/convert.py b/convert.py index 8bb6c7e41..20e27aa42 100755 --- a/convert.py +++ b/convert.py @@ -947,6 +947,7 @@ class OutputFile: elapsed = time.time() - start size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape) padi = len(str(len(model))) + ndarray.byteswap(inplace=True) print(f"[{i+1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type.name:4} | T+{int(elapsed):4}") of.gguf.write_tensor_data(ndarray) diff --git a/gguf-py/gguf/gguf.py b/gguf-py/gguf/gguf.py index 598cf8e59..2e997f72a 100644 --- a/gguf-py/gguf/gguf.py +++ b/gguf-py/gguf/gguf.py @@ -22,6 +22,7 @@ GGUF_MAGIC = 0x46554747 GGUF_VERSION = 2 GGUF_DEFAULT_ALIGNMENT = 32 + # general KEY_GENERAL_ARCHITECTURE = "general.architecture" KEY_GENERAL_QUANTIZATION_VERSION = "general.quantization_version" @@ -428,7 +429,6 @@ class GGMLQuantizationType(IntEnum): Q6_K = 14 Q8_K = 15 - class GGUFValueType(IntEnum): UINT8 = 0 INT8 = 1 @@ -483,10 +483,10 @@ class GGUFWriter: self.tensors = [] def write_header_to_file(self): - self.fout.write(struct.pack("I", GGUF_MAGIC)) + self.fout.write(struct.pack(">I", GGUF_VERSION)) + self.fout.write(struct.pack(">Q", self.ti_data_count)) + self.fout.write(struct.pack(">Q", self.kv_data_count)) self.flush() # print("tensors " + str(self.ti_data_count) + " kv " + str(self.kv_data_count)) @@ -559,16 +559,16 @@ class GGUFWriter: self.add_val(val, GGUFValueType.ARRAY) _simple_value_packing = { - GGUFValueType.UINT8: "h", + GGUFValueType.UINT32: ">I", + GGUFValueType.INT32: ">i", + GGUFValueType.FLOAT32: ">f", + GGUFValueType.UINT64: ">Q", + GGUFValueType.INT64: ">q", + GGUFValueType.FLOAT64: ">d", GGUFValueType.BOOL: "?" , } def add_val(self, val: Any, vtype: GGUFValueType | None = None, add_vtype: bool = True): @@ -576,7 +576,7 @@ class GGUFWriter: vtype = GGUFValueType.get_type(val) if add_vtype: - self.kv_data += struct.pack("I", vtype) self.kv_data_count += 1 pack_fmt = self._simple_value_packing.get(vtype) @@ -584,14 +584,14 @@ class GGUFWriter: self.kv_data += struct.pack(pack_fmt, val) elif vtype == GGUFValueType.STRING: encoded_val = val.encode("utf8") if isinstance(val, str) else val - self.kv_data += struct.pack("Q", len(encoded_val)) self.kv_data += encoded_val elif vtype == GGUFValueType.ARRAY and isinstance(val, Sequence) and len(val) > 0: ltype = GGUFValueType.get_type(val[0]) if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]): raise ValueError("All items in a GGUF array should be of the same type") - self.kv_data += struct.pack("I", ltype) + self.kv_data += struct.pack(">Q", len(val)) for item in val: self.add_val(item, add_vtype=False) else: @@ -605,22 +605,23 @@ class GGUFWriter: assert raw_dtype is not None or tensor_dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now" encoded_name = name.encode("utf8") - self.ti_data += struct.pack("Q", len(encoded_name)) self.ti_data += encoded_name n_dims = len(tensor_shape) - self.ti_data += struct.pack("I", n_dims) for i in range(n_dims): - self.ti_data += struct.pack("Q", tensor_shape[n_dims - 1 - i]) if raw_dtype is None: dtype = GGMLQuantizationType.F32 if tensor_dtype == np.float32 else GGMLQuantizationType.F16 else: dtype = raw_dtype - self.ti_data += struct.pack("I", dtype) + self.ti_data += struct.pack(">Q", self.offset_tensor) self.offset_tensor += GGUFWriter.ggml_pad(tensor_nbytes, self.data_alignment) self.ti_data_count += 1 def add_tensor(self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None, raw_dtype: GGMLQuantizationType | None = None): + tensor.byteswap(inplace=True) if self.use_temp_file and self.temp_file is None: fp = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256*1024*1024) fp.seek(0) diff --git a/tests/test-double-float.cpp b/tests/test-double-float.cpp index b506f273f..afd7bf77f 100644 --- a/tests/test-double-float.cpp +++ b/tests/test-double-float.cpp @@ -4,7 +4,9 @@ #undef NDEBUG #include +#if !defined(__riscv) && !defined(__s390__) #include +#endif #include #include #include