From 0ba5d488e50e6716b91a3c199d7a82e5a5cb38a0 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Thu, 17 Aug 2023 12:00:13 +0300
Subject: [PATCH] convert-new.py : vocab-only option should work now

---
 convert-new.py       | 41 ++++++++++++++++++++++++-----------------
 tests/CMakeLists.txt |  6 +++---
 2 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/convert-new.py b/convert-new.py
index 1e959f7bd..b05696f3f 100755
--- a/convert-new.py
+++ b/convert-new.py
@@ -750,7 +750,7 @@ class OutputFile:
     def __init__(self, fname_out: Path) -> None:
         self.gguf = gguf.GGUFWriter.open(fname_out)
 
-    def write_file_header(self, params: Params, file_type: GGMLFileType) -> None:
+    def add_meta_arch(self, params: Params, file_type: GGMLFileType) -> None:
         llm_arch = "llama"
 
         self.gguf.add_architecture        (llm_arch)
@@ -763,14 +763,14 @@ class OutputFile:
         self.gguf.add_head_count_kv       (llm_arch, params.n_head_kv)
         self.gguf.add_layer_norm_rms_eps  (llm_arch, params.f_norm_eps)
 
-    def write_tensor_header(self, name: str, shape: Sequence[int], data_type: DataType) -> None:
-        sname = name.encode('utf-8')
-        self.fout.write(struct.pack("iii", len(shape), len(sname), DATA_TYPE_TO_FTYPE[data_type]))
-        self.fout.write(struct.pack("i" * len(shape), *shape[::-1]))
-        self.fout.write(sname)
-        self.fout.seek((self.fout.tell() + 31) & -32)
+    #def write_tensor_header(self, name: str, shape: Sequence[int], data_type: DataType) -> None:
+    #    sname = name.encode('utf-8')
+    #    self.fout.write(struct.pack("iii", len(shape), len(sname), DATA_TYPE_TO_FTYPE[data_type]))
+    #    self.fout.write(struct.pack("i" * len(shape), *shape[::-1]))
+    #    self.fout.write(sname)
+    #    self.fout.seek((self.fout.tell() + 31) & -32)
 
-    def write_vocab(self, vocab: Vocab) -> None:
+    def add_meta_vocab(self, vocab: Vocab) -> None:
         tokens = []
         scores = []
         for text, score in vocab.all_tokens():
@@ -784,21 +784,28 @@ class OutputFile:
 
         # TODO: added / special tokens
 
+    def write_meta(self) -> None:
+        self.gguf.write_header_to_file()
+        self.gguf.write_kv_data_to_file()
+
+    def close(self) -> None:
+        self.gguf.close()
+
     @staticmethod
     def write_vocab_only(fname_out: Path, params: Params, vocab: Vocab) -> None:
         of = OutputFile(fname_out)
-        of = OutputFile(fname_out)
-        of.write_file_header(params, file_type=GGMLFileType.AllF32)
-        of.write_vocab(vocab)
-        of.fout.close()
+        of.add_meta_arch(params, file_type=GGMLFileType.AllF32)
+        of.add_meta_vocab(vocab)
+        of.write_meta()
+        of.close()
 
     @staticmethod
     def write_all(fname_out: Path, params: Params, file_type: GGMLFileType, model: LazyModel, vocab: Vocab) -> None:
         check_vocab_size(params, vocab)
+
         of = OutputFile(fname_out)
-        of.write_file_header(params, file_type)
-        print("Writing vocab...")
-        of.write_vocab(vocab)
+        of.add_meta_arch(params, file_type)
+        of.add_meta_vocab(vocab)
 
         def do_item(item: Tuple[str, LazyTensor]) -> NDArray:
             name, lazy_tensor = item
@@ -809,7 +816,7 @@ class OutputFile:
             size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape)
             padi = len(str(len(model)))
             print(f"[{i+1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type}")
-            of.write_tensor_header(name, lazy_tensor.shape, lazy_tensor.data_type)
+            #of.write_tensor_header(name, lazy_tensor.shape, lazy_tensor.data_type)
             ndarray.tofile(of.fout)
         of.fout.close()
 
@@ -997,7 +1004,7 @@ def main(args_in: Optional[List[str]] = None) -> None:
         vocab = load_vocab(args.vocab_dir or args.model, args.vocabtype)
         assert args.outfile, "need --outfile if using --vocab-only"
         outfile = args.outfile
-        OutputFile.write_vocab_only(outfile, vocab)
+        OutputFile.write_vocab_only(outfile, params, vocab)
         print(f"Wrote {outfile}")
     else:
         if args.dump:
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 9831b13d4..bc2100efb 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -26,10 +26,10 @@ llama_build_and_test_executable(test-quantize-fns.cpp)
 llama_build_and_test_executable(test-quantize-perf.cpp)
 llama_build_and_test_executable(test-sampling.cpp)
 llama_build_executable(test-tokenizer-0.cpp)
-llama_test_executable(test-tokenizer-0.llama test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.bin)
+llama_test_executable(test-tokenizer-0.llama test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
 llama_build_executable(test-tokenizer-1.cpp)
-llama_test_executable(test-tokenizer-1.llama test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.bin)
-llama_test_executable(test-tokenizer-1.aquila test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.bin)
+llama_test_executable(test-tokenizer-1.llama  test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
+#llama_test_executable(test-tokenizer-1.aquila test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf)
 llama_build_and_test_executable(test-grammar-parser.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../examples/grammar-parser.cpp)
 llama_build_and_test_executable(test-grad0.cpp) # SLOW
 # llama_build_and_test_executable(test-opt.cpp) # SLOW