convert-hf : save memory with lazy evaluation (#7075)
* convert-hf : begin refactoring write_tensor * convert : upgrade to sentencepiece v0.2.0 * convert-hf : remove unused n_dims in extra_*_tensors * convert-hf : simplify MoE weights stacking * convert-hf : flake8 linter doesn't like semicolons * convert-hf : allow unusual model part names For example, loading `model-00001-of-00001.safetensors` now works. * convert-hf : fix stacking MoE expert tensors `torch.stack` and `torch.cat` don't do the same thing. * convert-hf : fix Mamba conversion Tested to work even with a SentencePiece-based tokenizer. * convert : use a string for the SentencePiece tokenizer path * convert-hf : display tensor shape * convert-hf : convert norms to f32 by default * convert-hf : sort model part names `os.listdir` is said to list files in arbitrary order. Sorting the file names should let "model-00009-of-00042.safetensors" be loaded before "model-00010-of-00042.safetensors". * convert-hf : use an ABC for Model again It seems Protocol can't be used as a statically type-checked ABC, because its subclasses also can't be instantiated. (why did it seem to work?) At least there's still a way to throw an error when forgetting to define the `model_arch` property of any registered Model subclasses. * convert-hf : use a plain class for Model, and forbid direct instantiation There are no abstract methods used anyway, so using ABC isn't really necessary. * convert-hf : more consistent formatting of cmdline args * convert-hf : align the message logged for converted tensors * convert-hf : fix Refact conversion * convert-hf : save memory with lazy evaluation * convert-hf : flake8 doesn't like lowercase L as a variable name * convert-hf : remove einops requirement for InternLM2 * convert-hf : faster model parts loading Instead of pre-loading them all into a dict, iterate on the tensors in the model parts progressively as needed in Model.write_tensors Conversion for some architectures relies on checking for the presence of specific tensor names, so for multi-part models, the weight map is read from the relevant json file to quickly get these names up-front. * convert-hf : minor changes for consistency * gguf-py : add tqdm as a dependency It's small, and used for a progress bar in GGUFWriter.write_tensors_to_file
This commit is contained in:
parent
bc4bba364f
commit
f98eb31c51
14 changed files with 873 additions and 1285 deletions
|
@ -7,7 +7,7 @@ import json
|
|||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from typing import Any, Mapping, Sequence
|
||||
from typing import Any, Sequence
|
||||
|
||||
# Necessary to load the local gguf package
|
||||
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
|
||||
|
@ -34,7 +34,7 @@ def get_byteorder(reader: gguf.GGUFReader) -> gguf.GGUFEndian:
|
|||
return host_endian
|
||||
|
||||
|
||||
def decode_field(field: gguf.ReaderField) -> Any:
|
||||
def decode_field(field: gguf.ReaderField | None) -> Any:
|
||||
if field and field.types:
|
||||
main_type = field.types[0]
|
||||
|
||||
|
@ -42,11 +42,11 @@ def decode_field(field: gguf.ReaderField) -> Any:
|
|||
sub_type = field.types[-1]
|
||||
|
||||
if sub_type == gguf.GGUFValueType.STRING:
|
||||
return [str(bytes(field.parts[idx]), encoding='utf8') for idx in field.data]
|
||||
return [str(bytes(field.parts[idx]), encoding='utf-8') for idx in field.data]
|
||||
else:
|
||||
return [pv for idx in field.data for pv in field.parts[idx].tolist()]
|
||||
if main_type == gguf.GGUFValueType.STRING:
|
||||
return str(bytes(field.parts[-1]), encoding='utf8')
|
||||
return str(bytes(field.parts[-1]), encoding='utf-8')
|
||||
else:
|
||||
return field.parts[-1][0]
|
||||
|
||||
|
@ -59,7 +59,7 @@ def get_field_data(reader: gguf.GGUFReader, key: str) -> Any:
|
|||
return decode_field(field)
|
||||
|
||||
|
||||
def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new_metadata: Mapping[str, str], remove_metadata: Sequence[str]) -> None:
|
||||
def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new_metadata: dict[str, str], remove_metadata: Sequence[str]) -> None:
|
||||
for field in reader.fields.values():
|
||||
# Suppress virtual fields and fields written by GGUFWriter
|
||||
if field.name == gguf.Keys.General.ARCHITECTURE or field.name.startswith('GGUF.'):
|
||||
|
@ -101,7 +101,7 @@ def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new
|
|||
|
||||
for tensor in reader.tensors:
|
||||
# Dimensions are written in reverse order, so flip them first
|
||||
shape = np.flipud(tensor.shape)
|
||||
shape = np.flipud(tensor.shape).tolist()
|
||||
writer.add_tensor_info(tensor.name, shape, tensor.data.dtype, tensor.data.nbytes, tensor.tensor_type)
|
||||
|
||||
writer.write_header_to_file()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue