From 0ba5d488e50e6716b91a3c199d7a82e5a5cb38a0 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 17 Aug 2023 12:00:13 +0300 Subject: [PATCH] convert-new.py : vocab-only option should work now --- convert-new.py | 41 ++++++++++++++++++++++++----------------- tests/CMakeLists.txt | 6 +++--- 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/convert-new.py b/convert-new.py index 1e959f7bd..b05696f3f 100755 --- a/convert-new.py +++ b/convert-new.py @@ -750,7 +750,7 @@ class OutputFile: def __init__(self, fname_out: Path) -> None: self.gguf = gguf.GGUFWriter.open(fname_out) - def write_file_header(self, params: Params, file_type: GGMLFileType) -> None: + def add_meta_arch(self, params: Params, file_type: GGMLFileType) -> None: llm_arch = "llama" self.gguf.add_architecture (llm_arch) @@ -763,14 +763,14 @@ class OutputFile: self.gguf.add_head_count_kv (llm_arch, params.n_head_kv) self.gguf.add_layer_norm_rms_eps (llm_arch, params.f_norm_eps) - def write_tensor_header(self, name: str, shape: Sequence[int], data_type: DataType) -> None: - sname = name.encode('utf-8') - self.fout.write(struct.pack("iii", len(shape), len(sname), DATA_TYPE_TO_FTYPE[data_type])) - self.fout.write(struct.pack("i" * len(shape), *shape[::-1])) - self.fout.write(sname) - self.fout.seek((self.fout.tell() + 31) & -32) + #def write_tensor_header(self, name: str, shape: Sequence[int], data_type: DataType) -> None: + # sname = name.encode('utf-8') + # self.fout.write(struct.pack("iii", len(shape), len(sname), DATA_TYPE_TO_FTYPE[data_type])) + # self.fout.write(struct.pack("i" * len(shape), *shape[::-1])) + # self.fout.write(sname) + # self.fout.seek((self.fout.tell() + 31) & -32) - def write_vocab(self, vocab: Vocab) -> None: + def add_meta_vocab(self, vocab: Vocab) -> None: tokens = [] scores = [] for text, score in vocab.all_tokens(): @@ -784,21 +784,28 @@ class OutputFile: # TODO: added / special tokens + def write_meta(self) -> None: + self.gguf.write_header_to_file() + self.gguf.write_kv_data_to_file() + + def close(self) -> None: + self.gguf.close() + @staticmethod def write_vocab_only(fname_out: Path, params: Params, vocab: Vocab) -> None: of = OutputFile(fname_out) - of = OutputFile(fname_out) - of.write_file_header(params, file_type=GGMLFileType.AllF32) - of.write_vocab(vocab) - of.fout.close() + of.add_meta_arch(params, file_type=GGMLFileType.AllF32) + of.add_meta_vocab(vocab) + of.write_meta() + of.close() @staticmethod def write_all(fname_out: Path, params: Params, file_type: GGMLFileType, model: LazyModel, vocab: Vocab) -> None: check_vocab_size(params, vocab) + of = OutputFile(fname_out) - of.write_file_header(params, file_type) - print("Writing vocab...") - of.write_vocab(vocab) + of.add_meta_arch(params, file_type) + of.add_meta_vocab(vocab) def do_item(item: Tuple[str, LazyTensor]) -> NDArray: name, lazy_tensor = item @@ -809,7 +816,7 @@ class OutputFile: size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape) padi = len(str(len(model))) print(f"[{i+1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type}") - of.write_tensor_header(name, lazy_tensor.shape, lazy_tensor.data_type) + #of.write_tensor_header(name, lazy_tensor.shape, lazy_tensor.data_type) ndarray.tofile(of.fout) of.fout.close() @@ -997,7 +1004,7 @@ def main(args_in: Optional[List[str]] = None) -> None: vocab = load_vocab(args.vocab_dir or args.model, args.vocabtype) assert args.outfile, "need --outfile if using --vocab-only" outfile = args.outfile - OutputFile.write_vocab_only(outfile, vocab) + OutputFile.write_vocab_only(outfile, params, vocab) print(f"Wrote {outfile}") else: if args.dump: diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 9831b13d4..bc2100efb 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -26,10 +26,10 @@ llama_build_and_test_executable(test-quantize-fns.cpp) llama_build_and_test_executable(test-quantize-perf.cpp) llama_build_and_test_executable(test-sampling.cpp) llama_build_executable(test-tokenizer-0.cpp) -llama_test_executable(test-tokenizer-0.llama test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.bin) +llama_test_executable(test-tokenizer-0.llama test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf) llama_build_executable(test-tokenizer-1.cpp) -llama_test_executable(test-tokenizer-1.llama test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.bin) -llama_test_executable(test-tokenizer-1.aquila test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.bin) +llama_test_executable(test-tokenizer-1.llama test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf) +#llama_test_executable(test-tokenizer-1.aquila test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf) llama_build_and_test_executable(test-grammar-parser.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../examples/grammar-parser.cpp) llama_build_and_test_executable(test-grad0.cpp) # SLOW # llama_build_and_test_executable(test-opt.cpp) # SLOW