finish f16 hf bitnet e2e
This commit is contained in:
parent
1f2e0ee012
commit
5e59660173
10 changed files with 440 additions and 11 deletions
|
@ -925,6 +925,7 @@ class GGMLQuantizationType(IntEnum):
|
|||
F64 = 28
|
||||
IQ1_M = 29
|
||||
BF16 = 30
|
||||
I2 = 31
|
||||
|
||||
|
||||
# TODO: add GGMLFileType from ggml_ftype in ggml.h
|
||||
|
@ -966,6 +967,7 @@ class LlamaFileType(IntEnum):
|
|||
MOSTLY_IQ4_XS = 30 # except 1d tensors
|
||||
MOSTLY_IQ1_M = 31 # except 1d tensors
|
||||
MOSTLY_BF16 = 32 # except 1d tensors
|
||||
MOSTLY_I2 = 33 # except 1d tensors
|
||||
|
||||
GUESSED = 1024 # not specified in the model file
|
||||
|
||||
|
@ -1032,6 +1034,7 @@ GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
|
|||
GGMLQuantizationType.IQ3_S: (256, 2 + QK_K // 4 + QK_K // 8 + QK_K // 32 + 4),
|
||||
GGMLQuantizationType.IQ2_S: (256, 2 + QK_K // 4 + QK_K // 16),
|
||||
GGMLQuantizationType.IQ4_XS: (256, 2 + 2 + QK_K // 2 + QK_K // 64),
|
||||
GGMLQuantizationType.I2: (1, 1),
|
||||
GGMLQuantizationType.I8: (1, 1),
|
||||
GGMLQuantizationType.I16: (1, 2),
|
||||
GGMLQuantizationType.I32: (1, 4),
|
||||
|
|
|
@ -225,8 +225,10 @@ class GGUFWriter:
|
|||
dtype = GGMLQuantizationType.I32
|
||||
elif tensor_dtype == np.int64:
|
||||
dtype = GGMLQuantizationType.I64
|
||||
elif tensor_dtype == np.uint8:
|
||||
dtype = GGMLQuantizationType.I2
|
||||
else:
|
||||
raise ValueError("Only F16, F32, F64, I8, I16, I32, I64 tensors are supported for now")
|
||||
raise ValueError("Only F16, F32, F64, I8, I16, I32, I64, I2 tensors are supported for now")
|
||||
else:
|
||||
dtype = raw_dtype
|
||||
if tensor_dtype == np.uint8:
|
||||
|
@ -237,7 +239,10 @@ class GGUFWriter:
|
|||
self.ti_data += self._pack("Q", tensor_shape[n_dims - 1 - i])
|
||||
self.ti_data += self._pack("I", dtype)
|
||||
self.ti_data += self._pack("Q", self.offset_tensor)
|
||||
self.offset_tensor += GGUFWriter.ggml_pad(tensor_nbytes, self.data_alignment)
|
||||
if dtype == GGMLQuantizationType.I2:
|
||||
self.offset_tensor += GGUFWriter.ggml_pad(tensor_nbytes, self.data_alignment) + self.data_alignment
|
||||
else:
|
||||
self.offset_tensor += GGUFWriter.ggml_pad(tensor_nbytes, self.data_alignment)
|
||||
self.ti_data_count += 1
|
||||
|
||||
def add_tensor(
|
||||
|
@ -252,7 +257,9 @@ class GGUFWriter:
|
|||
self.temp_file = fp
|
||||
|
||||
shape: Sequence[int] = raw_shape if raw_shape is not None else tensor.shape
|
||||
self.add_tensor_info(name, shape, tensor.dtype, tensor.nbytes, raw_dtype = raw_dtype)
|
||||
|
||||
if (raw_dtype != GGMLQuantizationType.F32 or not name.endswith("scale")):
|
||||
self.add_tensor_info(name, shape, tensor.dtype, tensor.nbytes, raw_dtype = raw_dtype)
|
||||
|
||||
if self.temp_file is None:
|
||||
self.tensors.append(tensor)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue