support s390x big endian
This commit is contained in:
parent
0613562412
commit
fa62c8c73a
3 changed files with 28 additions and 24 deletions
|
@ -947,6 +947,7 @@ class OutputFile:
|
||||||
elapsed = time.time() - start
|
elapsed = time.time() - start
|
||||||
size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape)
|
size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape)
|
||||||
padi = len(str(len(model)))
|
padi = len(str(len(model)))
|
||||||
|
ndarray.byteswap(inplace=True)
|
||||||
print(f"[{i+1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type.name:4} | T+{int(elapsed):4}")
|
print(f"[{i+1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type.name:4} | T+{int(elapsed):4}")
|
||||||
of.gguf.write_tensor_data(ndarray)
|
of.gguf.write_tensor_data(ndarray)
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,7 @@ GGUF_MAGIC = 0x46554747
|
||||||
GGUF_VERSION = 2
|
GGUF_VERSION = 2
|
||||||
GGUF_DEFAULT_ALIGNMENT = 32
|
GGUF_DEFAULT_ALIGNMENT = 32
|
||||||
|
|
||||||
|
|
||||||
# general
|
# general
|
||||||
KEY_GENERAL_ARCHITECTURE = "general.architecture"
|
KEY_GENERAL_ARCHITECTURE = "general.architecture"
|
||||||
KEY_GENERAL_QUANTIZATION_VERSION = "general.quantization_version"
|
KEY_GENERAL_QUANTIZATION_VERSION = "general.quantization_version"
|
||||||
|
@ -428,7 +429,6 @@ class GGMLQuantizationType(IntEnum):
|
||||||
Q6_K = 14
|
Q6_K = 14
|
||||||
Q8_K = 15
|
Q8_K = 15
|
||||||
|
|
||||||
|
|
||||||
class GGUFValueType(IntEnum):
|
class GGUFValueType(IntEnum):
|
||||||
UINT8 = 0
|
UINT8 = 0
|
||||||
INT8 = 1
|
INT8 = 1
|
||||||
|
@ -483,10 +483,10 @@ class GGUFWriter:
|
||||||
self.tensors = []
|
self.tensors = []
|
||||||
|
|
||||||
def write_header_to_file(self):
|
def write_header_to_file(self):
|
||||||
self.fout.write(struct.pack("<I", GGUF_MAGIC))
|
self.fout.write(struct.pack(">I", GGUF_MAGIC))
|
||||||
self.fout.write(struct.pack("<I", GGUF_VERSION))
|
self.fout.write(struct.pack(">I", GGUF_VERSION))
|
||||||
self.fout.write(struct.pack("<Q", self.ti_data_count))
|
self.fout.write(struct.pack(">Q", self.ti_data_count))
|
||||||
self.fout.write(struct.pack("<Q", self.kv_data_count))
|
self.fout.write(struct.pack(">Q", self.kv_data_count))
|
||||||
self.flush()
|
self.flush()
|
||||||
# print("tensors " + str(self.ti_data_count) + " kv " + str(self.kv_data_count))
|
# print("tensors " + str(self.ti_data_count) + " kv " + str(self.kv_data_count))
|
||||||
|
|
||||||
|
@ -559,16 +559,16 @@ class GGUFWriter:
|
||||||
self.add_val(val, GGUFValueType.ARRAY)
|
self.add_val(val, GGUFValueType.ARRAY)
|
||||||
|
|
||||||
_simple_value_packing = {
|
_simple_value_packing = {
|
||||||
GGUFValueType.UINT8: "<B",
|
GGUFValueType.UINT8: f"{GGUF_ENDIANESS}B",
|
||||||
GGUFValueType.INT8: "<b",
|
GGUFValueType.INT8: f"{GGUF_ENDIANESS.}b",
|
||||||
GGUFValueType.UINT16: "<H",
|
GGUFValueType.UINT16: f"{GGUF_ENDIANESS.get}H",
|
||||||
GGUFValueType.INT16: "<h",
|
GGUFValueType.INT16: ">h",
|
||||||
GGUFValueType.UINT32: "<I",
|
GGUFValueType.UINT32: ">I",
|
||||||
GGUFValueType.INT32: "<i",
|
GGUFValueType.INT32: ">i",
|
||||||
GGUFValueType.FLOAT32: "<f",
|
GGUFValueType.FLOAT32: ">f",
|
||||||
GGUFValueType.UINT64: "<Q",
|
GGUFValueType.UINT64: ">Q",
|
||||||
GGUFValueType.INT64: "<q",
|
GGUFValueType.INT64: ">q",
|
||||||
GGUFValueType.FLOAT64: "<d",
|
GGUFValueType.FLOAT64: ">d",
|
||||||
GGUFValueType.BOOL: "?" ,
|
GGUFValueType.BOOL: "?" ,
|
||||||
}
|
}
|
||||||
def add_val(self, val: Any, vtype: GGUFValueType | None = None, add_vtype: bool = True):
|
def add_val(self, val: Any, vtype: GGUFValueType | None = None, add_vtype: bool = True):
|
||||||
|
@ -576,7 +576,7 @@ class GGUFWriter:
|
||||||
vtype = GGUFValueType.get_type(val)
|
vtype = GGUFValueType.get_type(val)
|
||||||
|
|
||||||
if add_vtype:
|
if add_vtype:
|
||||||
self.kv_data += struct.pack("<I", vtype)
|
self.kv_data += struct.pack(">I", vtype)
|
||||||
self.kv_data_count += 1
|
self.kv_data_count += 1
|
||||||
|
|
||||||
pack_fmt = self._simple_value_packing.get(vtype)
|
pack_fmt = self._simple_value_packing.get(vtype)
|
||||||
|
@ -584,14 +584,14 @@ class GGUFWriter:
|
||||||
self.kv_data += struct.pack(pack_fmt, val)
|
self.kv_data += struct.pack(pack_fmt, val)
|
||||||
elif vtype == GGUFValueType.STRING:
|
elif vtype == GGUFValueType.STRING:
|
||||||
encoded_val = val.encode("utf8") if isinstance(val, str) else val
|
encoded_val = val.encode("utf8") if isinstance(val, str) else val
|
||||||
self.kv_data += struct.pack("<Q", len(encoded_val))
|
self.kv_data += struct.pack(">Q", len(encoded_val))
|
||||||
self.kv_data += encoded_val
|
self.kv_data += encoded_val
|
||||||
elif vtype == GGUFValueType.ARRAY and isinstance(val, Sequence) and len(val) > 0:
|
elif vtype == GGUFValueType.ARRAY and isinstance(val, Sequence) and len(val) > 0:
|
||||||
ltype = GGUFValueType.get_type(val[0])
|
ltype = GGUFValueType.get_type(val[0])
|
||||||
if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]):
|
if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]):
|
||||||
raise ValueError("All items in a GGUF array should be of the same type")
|
raise ValueError("All items in a GGUF array should be of the same type")
|
||||||
self.kv_data += struct.pack("<I", ltype)
|
self.kv_data += struct.pack(">I", ltype)
|
||||||
self.kv_data += struct.pack("<Q", len(val))
|
self.kv_data += struct.pack(">Q", len(val))
|
||||||
for item in val:
|
for item in val:
|
||||||
self.add_val(item, add_vtype=False)
|
self.add_val(item, add_vtype=False)
|
||||||
else:
|
else:
|
||||||
|
@ -605,22 +605,23 @@ class GGUFWriter:
|
||||||
assert raw_dtype is not None or tensor_dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now"
|
assert raw_dtype is not None or tensor_dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now"
|
||||||
|
|
||||||
encoded_name = name.encode("utf8")
|
encoded_name = name.encode("utf8")
|
||||||
self.ti_data += struct.pack("<Q", len(encoded_name))
|
self.ti_data += struct.pack(">Q", len(encoded_name))
|
||||||
self.ti_data += encoded_name
|
self.ti_data += encoded_name
|
||||||
n_dims = len(tensor_shape)
|
n_dims = len(tensor_shape)
|
||||||
self.ti_data += struct.pack("<I", n_dims)
|
self.ti_data += struct.pack(">I", n_dims)
|
||||||
for i in range(n_dims):
|
for i in range(n_dims):
|
||||||
self.ti_data += struct.pack("<Q", tensor_shape[n_dims - 1 - i])
|
self.ti_data += struct.pack(">Q", tensor_shape[n_dims - 1 - i])
|
||||||
if raw_dtype is None:
|
if raw_dtype is None:
|
||||||
dtype = GGMLQuantizationType.F32 if tensor_dtype == np.float32 else GGMLQuantizationType.F16
|
dtype = GGMLQuantizationType.F32 if tensor_dtype == np.float32 else GGMLQuantizationType.F16
|
||||||
else:
|
else:
|
||||||
dtype = raw_dtype
|
dtype = raw_dtype
|
||||||
self.ti_data += struct.pack("<I", dtype)
|
self.ti_data += struct.pack(">I", dtype)
|
||||||
self.ti_data += struct.pack("<Q", self.offset_tensor)
|
self.ti_data += struct.pack(">Q", self.offset_tensor)
|
||||||
self.offset_tensor += GGUFWriter.ggml_pad(tensor_nbytes, self.data_alignment)
|
self.offset_tensor += GGUFWriter.ggml_pad(tensor_nbytes, self.data_alignment)
|
||||||
self.ti_data_count += 1
|
self.ti_data_count += 1
|
||||||
|
|
||||||
def add_tensor(self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None, raw_dtype: GGMLQuantizationType | None = None):
|
def add_tensor(self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None, raw_dtype: GGMLQuantizationType | None = None):
|
||||||
|
tensor.byteswap(inplace=True)
|
||||||
if self.use_temp_file and self.temp_file is None:
|
if self.use_temp_file and self.temp_file is None:
|
||||||
fp = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256*1024*1024)
|
fp = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256*1024*1024)
|
||||||
fp.seek(0)
|
fp.seek(0)
|
||||||
|
|
|
@ -4,7 +4,9 @@
|
||||||
|
|
||||||
#undef NDEBUG
|
#undef NDEBUG
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
#if !defined(__riscv) && !defined(__s390__)
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
|
#endif
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue