support s390x big endian

This commit is contained in:
chenqiny 2023-10-08 11:47:39 +08:00
parent 0613562412
commit fa62c8c73a
3 changed files with 28 additions and 24 deletions

View file

@ -947,6 +947,7 @@ class OutputFile:
elapsed = time.time() - start elapsed = time.time() - start
size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape) size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape)
padi = len(str(len(model))) padi = len(str(len(model)))
ndarray.byteswap(inplace=True)
print(f"[{i+1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type.name:4} | T+{int(elapsed):4}") print(f"[{i+1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type.name:4} | T+{int(elapsed):4}")
of.gguf.write_tensor_data(ndarray) of.gguf.write_tensor_data(ndarray)

View file

@ -22,6 +22,7 @@ GGUF_MAGIC = 0x46554747
GGUF_VERSION = 2 GGUF_VERSION = 2
GGUF_DEFAULT_ALIGNMENT = 32 GGUF_DEFAULT_ALIGNMENT = 32
# general # general
KEY_GENERAL_ARCHITECTURE = "general.architecture" KEY_GENERAL_ARCHITECTURE = "general.architecture"
KEY_GENERAL_QUANTIZATION_VERSION = "general.quantization_version" KEY_GENERAL_QUANTIZATION_VERSION = "general.quantization_version"
@ -428,7 +429,6 @@ class GGMLQuantizationType(IntEnum):
Q6_K = 14 Q6_K = 14
Q8_K = 15 Q8_K = 15
class GGUFValueType(IntEnum): class GGUFValueType(IntEnum):
UINT8 = 0 UINT8 = 0
INT8 = 1 INT8 = 1
@ -483,10 +483,10 @@ class GGUFWriter:
self.tensors = [] self.tensors = []
def write_header_to_file(self): def write_header_to_file(self):
self.fout.write(struct.pack("<I", GGUF_MAGIC)) self.fout.write(struct.pack(">I", GGUF_MAGIC))
self.fout.write(struct.pack("<I", GGUF_VERSION)) self.fout.write(struct.pack(">I", GGUF_VERSION))
self.fout.write(struct.pack("<Q", self.ti_data_count)) self.fout.write(struct.pack(">Q", self.ti_data_count))
self.fout.write(struct.pack("<Q", self.kv_data_count)) self.fout.write(struct.pack(">Q", self.kv_data_count))
self.flush() self.flush()
# print("tensors " + str(self.ti_data_count) + " kv " + str(self.kv_data_count)) # print("tensors " + str(self.ti_data_count) + " kv " + str(self.kv_data_count))
@ -559,16 +559,16 @@ class GGUFWriter:
self.add_val(val, GGUFValueType.ARRAY) self.add_val(val, GGUFValueType.ARRAY)
_simple_value_packing = { _simple_value_packing = {
GGUFValueType.UINT8: "<B", GGUFValueType.UINT8: f"{GGUF_ENDIANESS}B",
GGUFValueType.INT8: "<b", GGUFValueType.INT8: f"{GGUF_ENDIANESS.}b",
GGUFValueType.UINT16: "<H", GGUFValueType.UINT16: f"{GGUF_ENDIANESS.get}H",
GGUFValueType.INT16: "<h", GGUFValueType.INT16: ">h",
GGUFValueType.UINT32: "<I", GGUFValueType.UINT32: ">I",
GGUFValueType.INT32: "<i", GGUFValueType.INT32: ">i",
GGUFValueType.FLOAT32: "<f", GGUFValueType.FLOAT32: ">f",
GGUFValueType.UINT64: "<Q", GGUFValueType.UINT64: ">Q",
GGUFValueType.INT64: "<q", GGUFValueType.INT64: ">q",
GGUFValueType.FLOAT64: "<d", GGUFValueType.FLOAT64: ">d",
GGUFValueType.BOOL: "?" , GGUFValueType.BOOL: "?" ,
} }
def add_val(self, val: Any, vtype: GGUFValueType | None = None, add_vtype: bool = True): def add_val(self, val: Any, vtype: GGUFValueType | None = None, add_vtype: bool = True):
@ -576,7 +576,7 @@ class GGUFWriter:
vtype = GGUFValueType.get_type(val) vtype = GGUFValueType.get_type(val)
if add_vtype: if add_vtype:
self.kv_data += struct.pack("<I", vtype) self.kv_data += struct.pack(">I", vtype)
self.kv_data_count += 1 self.kv_data_count += 1
pack_fmt = self._simple_value_packing.get(vtype) pack_fmt = self._simple_value_packing.get(vtype)
@ -584,14 +584,14 @@ class GGUFWriter:
self.kv_data += struct.pack(pack_fmt, val) self.kv_data += struct.pack(pack_fmt, val)
elif vtype == GGUFValueType.STRING: elif vtype == GGUFValueType.STRING:
encoded_val = val.encode("utf8") if isinstance(val, str) else val encoded_val = val.encode("utf8") if isinstance(val, str) else val
self.kv_data += struct.pack("<Q", len(encoded_val)) self.kv_data += struct.pack(">Q", len(encoded_val))
self.kv_data += encoded_val self.kv_data += encoded_val
elif vtype == GGUFValueType.ARRAY and isinstance(val, Sequence) and len(val) > 0: elif vtype == GGUFValueType.ARRAY and isinstance(val, Sequence) and len(val) > 0:
ltype = GGUFValueType.get_type(val[0]) ltype = GGUFValueType.get_type(val[0])
if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]): if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]):
raise ValueError("All items in a GGUF array should be of the same type") raise ValueError("All items in a GGUF array should be of the same type")
self.kv_data += struct.pack("<I", ltype) self.kv_data += struct.pack(">I", ltype)
self.kv_data += struct.pack("<Q", len(val)) self.kv_data += struct.pack(">Q", len(val))
for item in val: for item in val:
self.add_val(item, add_vtype=False) self.add_val(item, add_vtype=False)
else: else:
@ -605,22 +605,23 @@ class GGUFWriter:
assert raw_dtype is not None or tensor_dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now" assert raw_dtype is not None or tensor_dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now"
encoded_name = name.encode("utf8") encoded_name = name.encode("utf8")
self.ti_data += struct.pack("<Q", len(encoded_name)) self.ti_data += struct.pack(">Q", len(encoded_name))
self.ti_data += encoded_name self.ti_data += encoded_name
n_dims = len(tensor_shape) n_dims = len(tensor_shape)
self.ti_data += struct.pack("<I", n_dims) self.ti_data += struct.pack(">I", n_dims)
for i in range(n_dims): for i in range(n_dims):
self.ti_data += struct.pack("<Q", tensor_shape[n_dims - 1 - i]) self.ti_data += struct.pack(">Q", tensor_shape[n_dims - 1 - i])
if raw_dtype is None: if raw_dtype is None:
dtype = GGMLQuantizationType.F32 if tensor_dtype == np.float32 else GGMLQuantizationType.F16 dtype = GGMLQuantizationType.F32 if tensor_dtype == np.float32 else GGMLQuantizationType.F16
else: else:
dtype = raw_dtype dtype = raw_dtype
self.ti_data += struct.pack("<I", dtype) self.ti_data += struct.pack(">I", dtype)
self.ti_data += struct.pack("<Q", self.offset_tensor) self.ti_data += struct.pack(">Q", self.offset_tensor)
self.offset_tensor += GGUFWriter.ggml_pad(tensor_nbytes, self.data_alignment) self.offset_tensor += GGUFWriter.ggml_pad(tensor_nbytes, self.data_alignment)
self.ti_data_count += 1 self.ti_data_count += 1
def add_tensor(self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None, raw_dtype: GGMLQuantizationType | None = None): def add_tensor(self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None, raw_dtype: GGMLQuantizationType | None = None):
tensor.byteswap(inplace=True)
if self.use_temp_file and self.temp_file is None: if self.use_temp_file and self.temp_file is None:
fp = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256*1024*1024) fp = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256*1024*1024)
fp.seek(0) fp.seek(0)

View file

@ -4,7 +4,9 @@
#undef NDEBUG #undef NDEBUG
#include <cassert> #include <cassert>
#if !defined(__riscv) && !defined(__s390__)
#include <immintrin.h> #include <immintrin.h>
#endif
#include <cmath> #include <cmath>
#include <cstdint> #include <cstdint>
#include <cstring> #include <cstring>