convert-new.py : vocab-only option should work now
This commit is contained in:
parent
f9db574bbf
commit
0ba5d488e5
2 changed files with 27 additions and 20 deletions
|
@ -750,7 +750,7 @@ class OutputFile:
|
||||||
def __init__(self, fname_out: Path) -> None:
|
def __init__(self, fname_out: Path) -> None:
|
||||||
self.gguf = gguf.GGUFWriter.open(fname_out)
|
self.gguf = gguf.GGUFWriter.open(fname_out)
|
||||||
|
|
||||||
def write_file_header(self, params: Params, file_type: GGMLFileType) -> None:
|
def add_meta_arch(self, params: Params, file_type: GGMLFileType) -> None:
|
||||||
llm_arch = "llama"
|
llm_arch = "llama"
|
||||||
|
|
||||||
self.gguf.add_architecture (llm_arch)
|
self.gguf.add_architecture (llm_arch)
|
||||||
|
@ -763,14 +763,14 @@ class OutputFile:
|
||||||
self.gguf.add_head_count_kv (llm_arch, params.n_head_kv)
|
self.gguf.add_head_count_kv (llm_arch, params.n_head_kv)
|
||||||
self.gguf.add_layer_norm_rms_eps (llm_arch, params.f_norm_eps)
|
self.gguf.add_layer_norm_rms_eps (llm_arch, params.f_norm_eps)
|
||||||
|
|
||||||
def write_tensor_header(self, name: str, shape: Sequence[int], data_type: DataType) -> None:
|
#def write_tensor_header(self, name: str, shape: Sequence[int], data_type: DataType) -> None:
|
||||||
sname = name.encode('utf-8')
|
# sname = name.encode('utf-8')
|
||||||
self.fout.write(struct.pack("iii", len(shape), len(sname), DATA_TYPE_TO_FTYPE[data_type]))
|
# self.fout.write(struct.pack("iii", len(shape), len(sname), DATA_TYPE_TO_FTYPE[data_type]))
|
||||||
self.fout.write(struct.pack("i" * len(shape), *shape[::-1]))
|
# self.fout.write(struct.pack("i" * len(shape), *shape[::-1]))
|
||||||
self.fout.write(sname)
|
# self.fout.write(sname)
|
||||||
self.fout.seek((self.fout.tell() + 31) & -32)
|
# self.fout.seek((self.fout.tell() + 31) & -32)
|
||||||
|
|
||||||
def write_vocab(self, vocab: Vocab) -> None:
|
def add_meta_vocab(self, vocab: Vocab) -> None:
|
||||||
tokens = []
|
tokens = []
|
||||||
scores = []
|
scores = []
|
||||||
for text, score in vocab.all_tokens():
|
for text, score in vocab.all_tokens():
|
||||||
|
@ -784,21 +784,28 @@ class OutputFile:
|
||||||
|
|
||||||
# TODO: added / special tokens
|
# TODO: added / special tokens
|
||||||
|
|
||||||
|
def write_meta(self) -> None:
|
||||||
|
self.gguf.write_header_to_file()
|
||||||
|
self.gguf.write_kv_data_to_file()
|
||||||
|
|
||||||
|
def close(self) -> None:
|
||||||
|
self.gguf.close()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def write_vocab_only(fname_out: Path, params: Params, vocab: Vocab) -> None:
|
def write_vocab_only(fname_out: Path, params: Params, vocab: Vocab) -> None:
|
||||||
of = OutputFile(fname_out)
|
of = OutputFile(fname_out)
|
||||||
of = OutputFile(fname_out)
|
of.add_meta_arch(params, file_type=GGMLFileType.AllF32)
|
||||||
of.write_file_header(params, file_type=GGMLFileType.AllF32)
|
of.add_meta_vocab(vocab)
|
||||||
of.write_vocab(vocab)
|
of.write_meta()
|
||||||
of.fout.close()
|
of.close()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def write_all(fname_out: Path, params: Params, file_type: GGMLFileType, model: LazyModel, vocab: Vocab) -> None:
|
def write_all(fname_out: Path, params: Params, file_type: GGMLFileType, model: LazyModel, vocab: Vocab) -> None:
|
||||||
check_vocab_size(params, vocab)
|
check_vocab_size(params, vocab)
|
||||||
|
|
||||||
of = OutputFile(fname_out)
|
of = OutputFile(fname_out)
|
||||||
of.write_file_header(params, file_type)
|
of.add_meta_arch(params, file_type)
|
||||||
print("Writing vocab...")
|
of.add_meta_vocab(vocab)
|
||||||
of.write_vocab(vocab)
|
|
||||||
|
|
||||||
def do_item(item: Tuple[str, LazyTensor]) -> NDArray:
|
def do_item(item: Tuple[str, LazyTensor]) -> NDArray:
|
||||||
name, lazy_tensor = item
|
name, lazy_tensor = item
|
||||||
|
@ -809,7 +816,7 @@ class OutputFile:
|
||||||
size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape)
|
size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape)
|
||||||
padi = len(str(len(model)))
|
padi = len(str(len(model)))
|
||||||
print(f"[{i+1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type}")
|
print(f"[{i+1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type}")
|
||||||
of.write_tensor_header(name, lazy_tensor.shape, lazy_tensor.data_type)
|
#of.write_tensor_header(name, lazy_tensor.shape, lazy_tensor.data_type)
|
||||||
ndarray.tofile(of.fout)
|
ndarray.tofile(of.fout)
|
||||||
of.fout.close()
|
of.fout.close()
|
||||||
|
|
||||||
|
@ -997,7 +1004,7 @@ def main(args_in: Optional[List[str]] = None) -> None:
|
||||||
vocab = load_vocab(args.vocab_dir or args.model, args.vocabtype)
|
vocab = load_vocab(args.vocab_dir or args.model, args.vocabtype)
|
||||||
assert args.outfile, "need --outfile if using --vocab-only"
|
assert args.outfile, "need --outfile if using --vocab-only"
|
||||||
outfile = args.outfile
|
outfile = args.outfile
|
||||||
OutputFile.write_vocab_only(outfile, vocab)
|
OutputFile.write_vocab_only(outfile, params, vocab)
|
||||||
print(f"Wrote {outfile}")
|
print(f"Wrote {outfile}")
|
||||||
else:
|
else:
|
||||||
if args.dump:
|
if args.dump:
|
||||||
|
|
|
@ -26,10 +26,10 @@ llama_build_and_test_executable(test-quantize-fns.cpp)
|
||||||
llama_build_and_test_executable(test-quantize-perf.cpp)
|
llama_build_and_test_executable(test-quantize-perf.cpp)
|
||||||
llama_build_and_test_executable(test-sampling.cpp)
|
llama_build_and_test_executable(test-sampling.cpp)
|
||||||
llama_build_executable(test-tokenizer-0.cpp)
|
llama_build_executable(test-tokenizer-0.cpp)
|
||||||
llama_test_executable(test-tokenizer-0.llama test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.bin)
|
llama_test_executable(test-tokenizer-0.llama test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
|
||||||
llama_build_executable(test-tokenizer-1.cpp)
|
llama_build_executable(test-tokenizer-1.cpp)
|
||||||
llama_test_executable(test-tokenizer-1.llama test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.bin)
|
llama_test_executable(test-tokenizer-1.llama test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
|
||||||
llama_test_executable(test-tokenizer-1.aquila test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.bin)
|
#llama_test_executable(test-tokenizer-1.aquila test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf)
|
||||||
llama_build_and_test_executable(test-grammar-parser.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../examples/grammar-parser.cpp)
|
llama_build_and_test_executable(test-grammar-parser.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../examples/grammar-parser.cpp)
|
||||||
llama_build_and_test_executable(test-grad0.cpp) # SLOW
|
llama_build_and_test_executable(test-grad0.cpp) # SLOW
|
||||||
# llama_build_and_test_executable(test-opt.cpp) # SLOW
|
# llama_build_and_test_executable(test-opt.cpp) # SLOW
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue