convert-new.py : convert script now works
This commit is contained in:
parent
7eaa315631
commit
673ae1a17e
4 changed files with 44 additions and 36 deletions
|
@ -298,7 +298,7 @@ for part_name in part_names:
|
||||||
|
|
||||||
print( name + ", shape " + str(len(data.shape)) + ", " + str(old_dtype) + " --> " + str(data.dtype))
|
print( name + ", shape " + str(len(data.shape)) + ", " + str(old_dtype) + " --> " + str(data.dtype))
|
||||||
|
|
||||||
gguf_writer.write_tensor_to_file(data)
|
gguf_writer.write_tensor_data(data)
|
||||||
|
|
||||||
gguf_writer.close()
|
gguf_writer.close()
|
||||||
|
|
||||||
|
|
|
@ -669,7 +669,6 @@ def lazy_load_file(path: Path) -> ModelPlus:
|
||||||
In = TypeVar('In')
|
In = TypeVar('In')
|
||||||
Out = TypeVar('Out')
|
Out = TypeVar('Out')
|
||||||
|
|
||||||
|
|
||||||
def bounded_parallel_map(func: Callable[[In], Out], iterable: Iterable[In], concurrency: int) -> Iterable[Out]:
|
def bounded_parallel_map(func: Callable[[In], Out], iterable: Iterable[In], concurrency: int) -> Iterable[Out]:
|
||||||
'''Parallel map, but with backpressure. If the caller doesn't call `next`
|
'''Parallel map, but with backpressure. If the caller doesn't call `next`
|
||||||
fast enough, this will stop calling `func` at some point rather than
|
fast enough, this will stop calling `func` at some point rather than
|
||||||
|
@ -734,19 +733,35 @@ class OutputFile:
|
||||||
|
|
||||||
# TODO: added / special tokens
|
# TODO: added / special tokens
|
||||||
|
|
||||||
|
def add_tensor_info(self, name: str, tensor: LazyTensor) -> None:
|
||||||
|
n_elements = 1
|
||||||
|
for dim in tensor.shape:
|
||||||
|
n_elements *= dim
|
||||||
|
data_type = DATA_TYPE_TO_NUMPY[tensor.data_type]
|
||||||
|
data_nbytes = n_elements * data_type.itemsize
|
||||||
|
self.gguf.add_tensor_info(name, tensor.shape, data_type, data_nbytes)
|
||||||
|
|
||||||
def write_meta(self) -> None:
|
def write_meta(self) -> None:
|
||||||
self.gguf.write_header_to_file()
|
self.gguf.write_header_to_file()
|
||||||
self.gguf.write_kv_data_to_file()
|
self.gguf.write_kv_data_to_file()
|
||||||
|
|
||||||
|
def write_tensor_info(self) -> None:
|
||||||
|
self.gguf.write_ti_data_to_file()
|
||||||
|
|
||||||
def close(self) -> None:
|
def close(self) -> None:
|
||||||
self.gguf.close()
|
self.gguf.close()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def write_vocab_only(fname_out: Path, params: Params, vocab: Vocab) -> None:
|
def write_vocab_only(fname_out: Path, params: Params, vocab: Vocab) -> None:
|
||||||
|
check_vocab_size(params, vocab)
|
||||||
|
|
||||||
of = OutputFile(fname_out)
|
of = OutputFile(fname_out)
|
||||||
|
|
||||||
|
# meta data
|
||||||
of.add_meta_arch(params)
|
of.add_meta_arch(params)
|
||||||
of.add_meta_vocab(vocab)
|
of.add_meta_vocab(vocab)
|
||||||
of.write_meta()
|
of.write_meta()
|
||||||
|
|
||||||
of.close()
|
of.close()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -754,22 +769,31 @@ class OutputFile:
|
||||||
check_vocab_size(params, vocab)
|
check_vocab_size(params, vocab)
|
||||||
|
|
||||||
of = OutputFile(fname_out)
|
of = OutputFile(fname_out)
|
||||||
|
|
||||||
|
# meta data
|
||||||
of.add_meta_arch(params)
|
of.add_meta_arch(params)
|
||||||
of.add_meta_vocab(vocab)
|
of.add_meta_vocab(vocab)
|
||||||
|
|
||||||
|
# tensor info
|
||||||
|
for name, lazy_tensor in model.items():
|
||||||
|
of.add_tensor_info(name, lazy_tensor)
|
||||||
|
|
||||||
|
of.write_meta()
|
||||||
|
of.write_tensor_info()
|
||||||
|
|
||||||
def do_item(item: Tuple[str, LazyTensor]) -> NDArray:
|
def do_item(item: Tuple[str, LazyTensor]) -> NDArray:
|
||||||
name, lazy_tensor = item
|
name, lazy_tensor = item
|
||||||
return lazy_tensor.load().to_ggml().ndarray
|
return lazy_tensor.load().to_ggml().ndarray
|
||||||
|
|
||||||
|
# tensor data
|
||||||
ndarrays = bounded_parallel_map(do_item, model.items(), concurrency=8)
|
ndarrays = bounded_parallel_map(do_item, model.items(), concurrency=8)
|
||||||
for i, ((name, lazy_tensor), ndarray) in enumerate(zip(model.items(), ndarrays)):
|
for i, ((name, lazy_tensor), ndarray) in enumerate(zip(model.items(), ndarrays)):
|
||||||
size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape)
|
size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape)
|
||||||
padi = len(str(len(model)))
|
padi = len(str(len(model)))
|
||||||
print(f"[{i+1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type}")
|
print(f"[{i+1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type}")
|
||||||
#of.write_tensor_header(name, lazy_tensor.shape, lazy_tensor.data_type)
|
of.gguf.write_tensor_data(ndarray)
|
||||||
ndarray.tofile(of.fout)
|
|
||||||
of.fout.close()
|
|
||||||
|
|
||||||
|
of.close()
|
||||||
|
|
||||||
def pick_output_type(model: LazyModel, output_type_str: Optional[str]) -> GGMLFileType:
|
def pick_output_type(model: LazyModel, output_type_str: Optional[str]) -> GGMLFileType:
|
||||||
wq_type = model[NAMES[gguf.MODEL_TENSOR.ATTN_Q].format(bid=0)+".weight"].data_type
|
wq_type = model[NAMES[gguf.MODEL_TENSOR.ATTN_Q].format(bid=0)+".weight"].data_type
|
||||||
|
@ -783,6 +807,9 @@ def pick_output_type(model: LazyModel, output_type_str: Optional[str]) -> GGMLFi
|
||||||
|
|
||||||
raise Exception(f"Unexpected combination of types: {name_to_type}")
|
raise Exception(f"Unexpected combination of types: {name_to_type}")
|
||||||
|
|
||||||
|
def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyModel:
|
||||||
|
return {name: tensor.astype(output_type.type_for_tensor(name, tensor))
|
||||||
|
for (name, tensor) in model.items()}
|
||||||
|
|
||||||
def convert_model_names(model: LazyModel, params: Params) -> LazyModel:
|
def convert_model_names(model: LazyModel, params: Params) -> LazyModel:
|
||||||
tmap = gguf.get_tensor_name_map(ARCH, params.n_layer)
|
tmap = gguf.get_tensor_name_map(ARCH, params.n_layer)
|
||||||
|
@ -808,12 +835,6 @@ def convert_model_names(model: LazyModel, params: Params) -> LazyModel:
|
||||||
|
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyModel:
|
|
||||||
return {name: tensor.astype(output_type.type_for_tensor(name, tensor))
|
|
||||||
for (name, tensor) in model.items()}
|
|
||||||
|
|
||||||
|
|
||||||
def nth_multifile_path(path: Path, n: int) -> Optional[Path]:
|
def nth_multifile_path(path: Path, n: int) -> Optional[Path]:
|
||||||
'''Given any path belonging to a multi-file model (e.g. foo.bin.1), return
|
'''Given any path belonging to a multi-file model (e.g. foo.bin.1), return
|
||||||
the nth path in the model.
|
the nth path in the model.
|
||||||
|
|
33
gguf.py
33
gguf.py
|
@ -70,23 +70,6 @@ KEY_TOKENIZER_RWKV = "tokenizer.rwkv.world"
|
||||||
# recommended mapping of model tensor names for storage in gguf
|
# recommended mapping of model tensor names for storage in gguf
|
||||||
#
|
#
|
||||||
|
|
||||||
#LLAMA_TOKEN_EMBD = "token_embd"
|
|
||||||
#LLAMA_OUTPUT_NORM = "output_norm"
|
|
||||||
#LLAMA_OUTPUT = "output"
|
|
||||||
#LLAMA_ATTN_NORM = "blk.{bid}.attn_norm"
|
|
||||||
#LLAMA_ATTN_Q = "blk.{bid}.attn_q"
|
|
||||||
#LLAMA_ATTN_K = "blk.{bid}.attn_k"
|
|
||||||
#LLAMA_ATTN_V = "blk.{bid}.attn_v"
|
|
||||||
#LLAMA_ATTN_OUTPUT = "blk.{bid}.attn_output"
|
|
||||||
#LLAMA_FFN_NORM = "blk.{bid}.ffn_norm"
|
|
||||||
#LLAMA_FFN_GATE = "blk.{bid}.ffn_gate"
|
|
||||||
#LLAMA_FFN_DOWN = "blk.{bid}.ffn_down"
|
|
||||||
#LLAMA_FFN_UP = "blk.{bid}.ffn_up"
|
|
||||||
#
|
|
||||||
#GPT_POS_EMBD = "pos_embd"
|
|
||||||
#
|
|
||||||
#FALCON_ATTN_NORM_2 = "blk.{bid}.attn_norm_2"
|
|
||||||
|
|
||||||
class MODEL_ARCH(IntEnum):
|
class MODEL_ARCH(IntEnum):
|
||||||
LLAMA = auto()
|
LLAMA = auto()
|
||||||
FALCON = auto()
|
FALCON = auto()
|
||||||
|
@ -117,15 +100,15 @@ class MODEL_TENSOR(IntEnum):
|
||||||
MODEL_ARCH_NAMES = {
|
MODEL_ARCH_NAMES = {
|
||||||
MODEL_ARCH.LLAMA : "llama",
|
MODEL_ARCH.LLAMA : "llama",
|
||||||
MODEL_ARCH.FALCON : "falcon",
|
MODEL_ARCH.FALCON : "falcon",
|
||||||
MODEL_ARCH.GPT2 : "gpt-2",
|
MODEL_ARCH.GPT2 : "gpt2",
|
||||||
MODEL_ARCH.GPTJ : "gpt-j",
|
MODEL_ARCH.GPTJ : "gptj",
|
||||||
MODEL_ARCH.GPTNEOX : "gpt-neox",
|
MODEL_ARCH.GPTNEOX : "gptneox",
|
||||||
MODEL_ARCH.MPT : "mpt",
|
MODEL_ARCH.MPT : "mpt",
|
||||||
}
|
}
|
||||||
|
|
||||||
MODEL_TENSOR_NAMES = {
|
MODEL_TENSOR_NAMES = {
|
||||||
MODEL_ARCH.LLAMA : {
|
MODEL_ARCH.LLAMA : {
|
||||||
MODEL_TENSOR.TOKEN_EMBD : "tok_embd",
|
MODEL_TENSOR.TOKEN_EMBD : "token_embd",
|
||||||
MODEL_TENSOR.OUTPUT_NORM : "output_norm",
|
MODEL_TENSOR.OUTPUT_NORM : "output_norm",
|
||||||
MODEL_TENSOR.OUTPUT : "output",
|
MODEL_TENSOR.OUTPUT : "output",
|
||||||
MODEL_TENSOR.ROPE_FREQS : "rope_freqs",
|
MODEL_TENSOR.ROPE_FREQS : "rope_freqs",
|
||||||
|
@ -141,7 +124,7 @@ MODEL_TENSOR_NAMES = {
|
||||||
MODEL_TENSOR.FFN_UP : "blk.{bid}.ffn_up",
|
MODEL_TENSOR.FFN_UP : "blk.{bid}.ffn_up",
|
||||||
},
|
},
|
||||||
MODEL_ARCH.FALCON : {
|
MODEL_ARCH.FALCON : {
|
||||||
MODEL_TENSOR.TOKEN_EMBD : "tok_embd",
|
MODEL_TENSOR.TOKEN_EMBD : "token_embd",
|
||||||
MODEL_TENSOR.OUTPUT_NORM : "output_norm",
|
MODEL_TENSOR.OUTPUT_NORM : "output_norm",
|
||||||
MODEL_TENSOR.OUTPUT : "output",
|
MODEL_TENSOR.OUTPUT : "output",
|
||||||
MODEL_TENSOR.ATTN_NORM : "blk.{bid}.attn_norm",
|
MODEL_TENSOR.ATTN_NORM : "blk.{bid}.attn_norm",
|
||||||
|
@ -495,7 +478,7 @@ class GGUFWriter:
|
||||||
self.offset_tensor += GGUFWriter.ggml_pad(tensor_nbytes, self.data_alignment)
|
self.offset_tensor += GGUFWriter.ggml_pad(tensor_nbytes, self.data_alignment)
|
||||||
self.ti_data_count += 1
|
self.ti_data_count += 1
|
||||||
|
|
||||||
def write_tensor_to_file(self, tensor: np.ndarray):
|
def write_tensor_data(self, tensor: np.ndarray):
|
||||||
pad = GGUFWriter.ggml_pad(self.fout.tell(), self.data_alignment) - self.fout.tell()
|
pad = GGUFWriter.ggml_pad(self.fout.tell(), self.data_alignment) - self.fout.tell()
|
||||||
if pad != 0:
|
if pad != 0:
|
||||||
self.fout.write(bytes([0] * pad))
|
self.fout.write(bytes([0] * pad))
|
||||||
|
@ -650,7 +633,7 @@ if __name__ == "__main__":
|
||||||
gguf_writer.write_header_to_file()
|
gguf_writer.write_header_to_file()
|
||||||
gguf_writer.write_kv_data_to_file()
|
gguf_writer.write_kv_data_to_file()
|
||||||
gguf_writer.write_ti_data_to_file()
|
gguf_writer.write_ti_data_to_file()
|
||||||
gguf_writer.write_tensor_to_file(tensor1)
|
gguf_writer.write_tensor_data(tensor1)
|
||||||
gguf_writer.write_tensor_to_file(tensor2)
|
gguf_writer.write_tensor_data(tensor2)
|
||||||
|
|
||||||
gguf_writer.close()
|
gguf_writer.close()
|
||||||
|
|
|
@ -1115,6 +1115,10 @@ struct llama_model_loader {
|
||||||
struct ggml_tensor * create_tensor(struct ggml_context * ctx, const std::string & name, const std::vector<uint32_t> & ne, ggml_backend backend) {
|
struct ggml_tensor * create_tensor(struct ggml_context * ctx, const std::string & name, const std::vector<uint32_t> & ne, ggml_backend backend) {
|
||||||
struct ggml_tensor * cur = ggml_get_tensor(ctx_meta, name.c_str());
|
struct ggml_tensor * cur = ggml_get_tensor(ctx_meta, name.c_str());
|
||||||
|
|
||||||
|
if (cur == NULL) {
|
||||||
|
throw std::runtime_error(format("%s: tensor '%s' not found", __func__, name.c_str()));
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
bool is_ok = true;
|
bool is_ok = true;
|
||||||
for (size_t i = 0; i < ne.size(); ++i) {
|
for (size_t i = 0; i < ne.size(); ++i) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue