simplify even further and standardize with GGUFWriter
This commit is contained in:
parent
f6fd3ea4e9
commit
bb5ee02096
2 changed files with 12 additions and 31 deletions
|
@ -331,7 +331,7 @@ class Model:
|
||||||
self.write_tensors()
|
self.write_tensors()
|
||||||
self.gguf_writer.write_header_to_file()
|
self.gguf_writer.write_header_to_file()
|
||||||
self.gguf_writer.write_kv_data_to_file()
|
self.gguf_writer.write_kv_data_to_file()
|
||||||
self.gguf_writer.write_ti_data_to_file()
|
self.gguf_writer.write_tensors_to_file()
|
||||||
self.gguf_writer.close()
|
self.gguf_writer.close()
|
||||||
|
|
||||||
def write_vocab(self):
|
def write_vocab(self):
|
||||||
|
|
|
@ -73,33 +73,24 @@ class SplitStrategy(deque):
|
||||||
self.append((shard, model[start:stop], GGUFWriter(shard, arch, use_temp_file=use_temp_file, endianess=endianess)))
|
self.append((shard, model[start:stop], GGUFWriter(shard, arch, use_temp_file=use_temp_file, endianess=endianess)))
|
||||||
|
|
||||||
elif split_arguments.split_style == SplitStyle.SIZE:
|
elif split_arguments.split_style == SplitStyle.SIZE:
|
||||||
shards = []
|
shards = [[model[0]]]
|
||||||
|
|
||||||
# we have to determine the shards first to determine how many shards there will be in total - two passes
|
# we have to determine the shards first to determine how many shards there will be in total - two passes
|
||||||
for i, shard in enumerate(model):
|
for i, shard in enumerate(model[1:]):
|
||||||
if i == 0:
|
|
||||||
shards.append([shard])
|
|
||||||
continue
|
|
||||||
if SplitStrategy.get_tensor_size(shard[1]) + sum(SplitStrategy.get_tensor_size(t[1]) for t in shards[-1]) > split_arguments.split_max_size:
|
if SplitStrategy.get_tensor_size(shard[1]) + sum(SplitStrategy.get_tensor_size(t[1]) for t in shards[-1]) > split_arguments.split_max_size:
|
||||||
shards.append([shard])
|
shards.append([shard])
|
||||||
else:
|
else:
|
||||||
shards[-1].append(shard)
|
shards[-1].append(shard)
|
||||||
|
|
||||||
total_shards = len(shards) + split_arguments.small_first_shard
|
|
||||||
shard_offset = 1
|
|
||||||
|
|
||||||
if split_arguments.small_first_shard:
|
if split_arguments.small_first_shard:
|
||||||
outname = fname_out.with_name(SHARD_NAME_FORMAT.format(fname_out.stem, shard_offset, total_shards))
|
shards.insert(0, None)
|
||||||
self.append((outname, None, GGUFWriter(outname, arch, use_temp_file=use_temp_file, endianess=endianess)))
|
|
||||||
shard_offset += 1
|
|
||||||
|
|
||||||
for i, shard in enumerate(shards):
|
for i, shard in enumerate(shards):
|
||||||
outname = fname_out.with_name(SHARD_NAME_FORMAT.format(fname_out.stem, i + shard_offset, total_shards))
|
outname = fname_out.with_name(SHARD_NAME_FORMAT.format(fname_out.stem, i + 1, len(shards)))
|
||||||
self.append((outname, shard, GGUFWriter(outname, arch, use_temp_file=use_temp_file, endianess=endianess)))
|
self.append((outname, shard, GGUFWriter(outname, arch, use_temp_file=use_temp_file, endianess=endianess)))
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_tensor_size(tensor) -> int:
|
def get_tensor_size(tensor) -> int:
|
||||||
# we don't have the LazyTensor class here from convert.py but we can try
|
|
||||||
try:
|
try:
|
||||||
return tensor.data_type.elements_to_bytes(np.prod(tensor.shape))
|
return tensor.data_type.elements_to_bytes(np.prod(tensor.shape))
|
||||||
except AttributeError: # numpy ndarray[Any, Any]
|
except AttributeError: # numpy ndarray[Any, Any]
|
||||||
|
@ -213,7 +204,7 @@ class GGUFManager(GGUFWriter):
|
||||||
|
|
||||||
self.state = WriterState.KV_DATA
|
self.state = WriterState.KV_DATA
|
||||||
|
|
||||||
def write_ti_data_to_file(self) -> None:
|
def write_tensors_to_file(self) -> None:
|
||||||
if self.split_arguments.dry_run:
|
if self.split_arguments.dry_run:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -222,25 +213,17 @@ class GGUFManager(GGUFWriter):
|
||||||
|
|
||||||
running_total = self.total_tensors
|
running_total = self.total_tensors
|
||||||
for ct in range(self.total_shards):
|
for ct in range(self.total_shards):
|
||||||
try:
|
|
||||||
(_, tensors, writer) = self.split_strategy.popleft()
|
(_, tensors, writer) = self.split_strategy.popleft()
|
||||||
tensors = deque(tensors) if tensors else None
|
tensors = deque(tensors) if tensors else None
|
||||||
except IndexError:
|
|
||||||
break
|
|
||||||
|
|
||||||
shard_num_tensors = len(tensors) if tensors else 0
|
shard_num_tensors = len(tensors) if tensors else 0
|
||||||
|
|
||||||
if tensors:
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
(name, tensor, dtype) = tensors.popleft()
|
|
||||||
except IndexError:
|
|
||||||
break
|
|
||||||
writer.add_tensor(name, tensor, raw_dtype=dtype)
|
|
||||||
|
|
||||||
print(f"Writing to shard {ct}/{self.total_shards} with {shard_num_tensors}/{running_total} remaining tensors (of {self.total_tensors} total)")
|
print(f"Writing to shard {ct}/{self.total_shards} with {shard_num_tensors}/{running_total} remaining tensors (of {self.total_tensors} total)")
|
||||||
running_total -= shard_num_tensors
|
running_total -= shard_num_tensors
|
||||||
|
|
||||||
|
for _ in range(shard_num_tensors):
|
||||||
|
(name, tensor, dtype) = tensors.popleft()
|
||||||
|
writer.add_tensor(name, tensor, raw_dtype=dtype)
|
||||||
|
|
||||||
# need to write everything down here
|
# need to write everything down here
|
||||||
writer.write_header_to_file()
|
writer.write_header_to_file()
|
||||||
writer.write_kv_data_to_file()
|
writer.write_kv_data_to_file()
|
||||||
|
@ -268,8 +251,6 @@ class GGUFManager(GGUFWriter):
|
||||||
self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None,
|
self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None,
|
||||||
raw_dtype: GGMLQuantizationType | None = None,
|
raw_dtype: GGMLQuantizationType | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
if self.endianess == GGUFEndian.BIG:
|
|
||||||
tensor.byteswap(inplace=True)
|
|
||||||
self.tensors.append((name, tensor, raw_dtype))
|
self.tensors.append((name, tensor, raw_dtype))
|
||||||
|
|
||||||
def close(self) -> None:
|
def close(self) -> None:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue