From 39362f3485d90c364fe6ebe19b0428e87b3f5991 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 17 Aug 2023 17:02:01 +0300 Subject: [PATCH] gguf.py : pick some of the refactoring from #2644 --- convert-new.py | 20 ++++----- gguf.py | 111 ++++++++++++++++++++++++------------------------- 2 files changed, 63 insertions(+), 68 deletions(-) diff --git a/convert-new.py b/convert-new.py index 4aaaa60d4..972a6c609 100755 --- a/convert-new.py +++ b/convert-new.py @@ -705,19 +705,17 @@ def check_vocab_size(params: Params, vocab: Vocab) -> None: class OutputFile: def __init__(self, fname_out: Path) -> None: - self.gguf = gguf.GGUFWriter.open(fname_out) + self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH]) def add_meta_arch(self, params: Params) -> None: - arch = gguf.MODEL_ARCH_NAMES[ARCH] - self.gguf.add_architecture (arch) - self.gguf.add_context_length (arch, params.n_ctx) - self.gguf.add_embedding_length (arch, params.n_embd) - self.gguf.add_block_count (arch, params.n_layer) - self.gguf.add_feed_forward_length (arch, params.n_ff) - self.gguf.add_rope_dimension_count(arch, params.n_embd // params.n_head) - self.gguf.add_head_count (arch, params.n_head) - self.gguf.add_head_count_kv (arch, params.n_head_kv) - self.gguf.add_layer_norm_rms_eps (arch, params.f_norm_eps) + self.gguf.add_context_length (params.n_ctx) + self.gguf.add_embedding_length (params.n_embd) + self.gguf.add_block_count (params.n_layer) + self.gguf.add_feed_forward_length (params.n_ff) + self.gguf.add_rope_dimension_count(params.n_embd // params.n_head) + self.gguf.add_head_count (params.n_head) + self.gguf.add_head_count_kv (params.n_head_kv) + self.gguf.add_layer_norm_rms_eps (params.f_norm_eps) def add_meta_vocab(self, vocab: Vocab) -> None: tokens = [] diff --git a/gguf.py b/gguf.py index 55f45716a..a4dd10872 100644 --- a/gguf.py +++ b/gguf.py @@ -33,24 +33,24 @@ KEY_GENERAL_SOURCE_URL = "general.source.url" KEY_GENERAL_SOURCE_HF_REPO = "general.source.hugginface.repository" # LLM -KEY_LLM_CONTEXT_LENGTH = "{llm}.context_length" -KEY_LLM_EMBEDDING_LENGTH = "{llm}.embedding_length" -KEY_LLM_BLOCK_COUNT = "{llm}.block_count" -KEY_LLM_FEED_FORWARD_LENGTH = "{llm}.feed_forward_length" -KEY_LLM_USE_PARALLEL_RESIDUAL = "{llm}.use_parallel_residual" -KEY_LLM_TENSOR_DATA_LAYOUT = "{llm}.tensor_data_layout" +KEY_LLM_CONTEXT_LENGTH = "{arch}.context_length" +KEY_LLM_EMBEDDING_LENGTH = "{arch}.embedding_length" +KEY_LLM_BLOCK_COUNT = "{arch}.block_count" +KEY_LLM_FEED_FORWARD_LENGTH = "{arch}.feed_forward_length" +KEY_LLM_USE_PARALLEL_RESIDUAL = "{arch}.use_parallel_residual" +KEY_LLM_TENSOR_DATA_LAYOUT = "{arch}.tensor_data_layout" # attention -KEY_ATTENTION_HEAD_COUNT = "{llm}.attention.head_count" -KEY_ATTENTION_HEAD_COUNT_KV = "{llm}.attention.head_count_kv" -KEY_ATTENTION_MAX_ALIBI_BIAS = "{llm}.attention.max_alibi_bias" -KEY_ATTENTION_CLAMP_KQV = "{llm}.attention.clamp_kqv" -KEY_ATTENTION_LAYERNORM_EPS = "{llm}.attention.layer_norm_epsilon" -KEY_ATTENTION_LAYERNORM_RMS_EPS = "{llm}.attention.layer_norm_rms_epsilon" +KEY_ATTENTION_HEAD_COUNT = "{arch}.attention.head_count" +KEY_ATTENTION_HEAD_COUNT_KV = "{arch}.attention.head_count_kv" +KEY_ATTENTION_MAX_ALIBI_BIAS = "{arch}.attention.max_alibi_bias" +KEY_ATTENTION_CLAMP_KQV = "{arch}.attention.clamp_kqv" +KEY_ATTENTION_LAYERNORM_EPS = "{arch}.attention.layer_norm_epsilon" +KEY_ATTENTION_LAYERNORM_RMS_EPS = "{arch}.attention.layer_norm_rms_epsilon" # RoPE -KEY_ROPE_DIMENSION_COUNT = "{llm}.rope.dimension_count" -KEY_ROPE_SCALE = "{llm}.rope.scale" +KEY_ROPE_DIMENSION_COUNT = "{arch}.rope.dimension_count" +KEY_ROPE_SCALE = "{arch}.rope.scale" # tokenization KEY_TOKENIZER_MODEL = "tokenizer.ggml.model" @@ -343,14 +343,16 @@ class GGUFValueType(IntEnum): class GGUFWriter: - def __init__(self, fout: IO): - self.fout = fout + def __init__(self, path: str, arch: str): + self.fout = open(path, "wb") + self.arch = arch self.offset_tensor = 0 self.data_alignment = GGUF_DEFAULT_ALIGNMENT self.kv_data = b"" self.kv_data_count = 0 self.ti_data = b"" self.ti_data_count = 0 + self.add_architecture() def write_header_to_file(self): self.fout.write(struct.pack(" "GGUFWriter": - f = open(path, "wb") - return cls(f) - def add_key(self, key: str): self.add_val(key, GGUFValueType.STRING, add_vtype=False) @@ -409,7 +406,8 @@ class GGUFWriter: self.add_val(val, GGUFValueType.BOOL) def add_string(self, key: str, val: str): - if len(val) == 0: return + if len(val) == 0: + return self.add_key(key) self.add_val(val, GGUFValueType.STRING) @@ -463,6 +461,8 @@ class GGUFWriter: return ((x + n - 1) // n) * n def add_tensor_info(self, name: str, tensor_shape: np.ndarray, tensor_dtype: np.dtype, tensor_nbytes: int): + assert tensor_dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now" + encoded_name = name.encode("utf8") self.ti_data += struct.pack("