WIP: Initial setup for GGUF writer configuration

- Created the `initialize_writer` function to set up GGUF writer with model metadata - Included validation for file type and architecture - Default hyperparameter values sourced from MixFormerSequentialConfig - Function annotations and documentation added for clarity - Prepared groundwork for MixFormer architecture integration
2023-11-02 22:40:28 -04:00 · 2023-11-02 22:40:28 -04:00 · 41a98c618e
commit 41a98c618e
parent 629f917cd6
2 changed files with 305 additions and 0 deletions
--- a/convert-phi-1-to-gguf.py
+++ b/convert-phi-1-to-gguf.py
@ -0,0 +1,300 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import json
+import logging
+import os
+import struct
+import sys
+from json import JSONDecodeError
+from pathlib import Path
+from typing import Any, Dict, List
+
+import numpy as np
+import torch
+from transformers import AutoTokenizer
+
+if "NO_LOCAL_GGUF" not in os.environ:
+    sys.path.insert(1, str(Path(__file__).parent / "gguf-py" / "gguf"))
+import gguf
+
+# Configure logging
+logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
+
+
+def check_required_files(directory: Path, required_files: List[str]) -> None:
+    missing_files = [
+        file_name
+        for file_name in required_files
+        if not (directory / file_name).exists()
+    ]
+    if missing_files:
+        raise FileNotFoundError(f"Missing required files: {', '.join(missing_files)}")
+
+
+def get_json_map(file_path: Path) -> dict[str, Any]:
+    with open(file_path, "r") as source_file:
+        try:
+            return json.load(source_file)
+        except JSONDecodeError:
+            raise ValueError(f"Failed to decode {file_path}")
+
+
+def load_hyper_params(directory: Path, architecture: str) -> dict:
+    config_path = directory / "config.json"
+    hparams = get_json_map(config_path)
+
+    # Ensure the expected architecture is present
+    expected_architecture = architecture
+    if hparams["architectures"][0] != expected_architecture:
+        raise ValueError(
+            f"Model architecture not supported: {hparams['architectures'][0]}"
+        )
+
+    return hparams
+
+
+def initialize_writer(
+    fname_out: str, architecture: str, ftype: str, hparams: Dict[str, Any]
+) -> gguf.GGUFWriter:
+    """
+    Initializes the GGUF writer with the model metadata.
+
+    :param fname_out: The filename for the output model.
+    :param architecture: The model architecture enum name.
+    :param ftype: The data type for the model file (e.g., 'F32', 'F16').
+    :param hparams: The hyperparameters loaded from the model's config file.
+    :return: An initialized GGUF writer object.
+    """
+    # Validate the architecture name
+    if not hasattr(gguf.MODEL_ARCH, architecture):
+        raise ValueError(f"Unsupported architecture: {architecture}")
+    ARCH = getattr(gguf.MODEL_ARCH, architecture)
+
+    # Validate the file type
+    if ftype not in ['F32', 'F16']:
+        raise ValueError(f"Unsupported file type: {ftype}")
+
+    # Initialize the GGUF writer
+    gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH])
+
+    # Set the writer with the hyperparameters from MixFormerSequentialConfig
+    gguf_writer.add_name(gguf.MODEL_ARCH_NAMES[ARCH])
+    gguf_writer.add_context_length(hparams.get("n_positions", 2048))
+    gguf_writer.add_embedding_length(hparams.get("n_embd", 1024))
+    n_inner = hparams.get("n_inner", 4 * hparams.get("n_embd", 1024))
+    gguf_writer.add_feed_forward_length(n_inner)
+    gguf_writer.add_block_count(hparams.get("n_layer", 20))
+    gguf_writer.add_head_count(hparams.get("n_head", 16))
+    n_head_kv = hparams.get("n_head_kv", hparams.get("n_head", 16))
+    gguf_writer.add_head_count_kv(n_head_kv)  # NOTE: arxiv:2203.11082
+    gguf_writer.add_layer_norm_eps(hparams.get("layer_norm_epsilon", 1e-5))
+
+    # Add the file type
+    gguf_writer.add_file_type(ftype)
+
+    return gguf_writer
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Convert a Phi-1 model to a GGML compatible file"
+    )
+    parser.add_argument(
+        "--vocab-only", action="store_true", help="extract only the vocab"
+    )
+    parser.add_argument(
+        "--outfile", type=Path, help="path to write to; default: based on input"
+    )
+    parser.add_argument(
+        "model",
+        type=Path,
+        help="directory containing model file, or model file itself (*.bin)",
+    )
+    parser.add_argument(
+        "--ftype",
+        type=str,
+        choices=["f32", "f16"],
+        default="f16",  # NOTE: Phi-1 is dtype float16.
+        help="output format - use 'float32' for 32-bit tensors, 'float16' for 16-bit tensors",
+    )
+    return parser.parse_args()
+
+
+def main():
+    try:
+        args = parse_args()
+
+        ftype = args.ftype
+        directory = args.model  # Renamed for clarity
+
+        if not directory.is_dir():
+            raise NotADirectoryError(f"{directory} is not a directory.")
+
+        required_files = ["pytorch_model.bin", "config.json", "tokenizer.json"]
+        check_required_files(directory, required_files)
+
+        # Reference the actual model file
+        model = directory / "pytorch_model.bin"
+        if not model.exists():
+            raise FileNotFoundError(f"Model file {model} does not exist.")
+
+        hparams = load_hyper_params(directory, "MixFormerSequentialForCausalLM")
+        architecture = hparams["architectures"][0]
+
+        if args.outfile is not None:
+            fname_out = args.outfile
+        else:
+            fname_out = directory / f"ggml-model-{ftype}.gguf"
+
+        if not fname_out.parent.exists():
+            logging.warning(f"Output directory {fname_out.parent} does not exist.")
+
+        gguf_writer = initialize_writer(fname_out, architecture, ftype, hparams)
+
+        # Proceed with the model processing using the 'model' path
+        # ... [rest of your existing code] ...
+
+    except Exception as e:
+        logging.error(e)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
+
+
+# # TOKENIZATION
+
+# print("gguf: get tokenizer metadata")
+
+# tokens: list[bytearray] = []
+# scores: list[float] = []
+# toktypes: list[int] = []
+
+# # gpt2 tokenizer
+# gguf_writer.add_tokenizer_model("gpt2")
+
+# print("gguf: get gpt2 tokenizer vocab")
+
+# # ref: https://github.com/cmp-nct/ggllm.cpp/blob/master/falcon_convert.py
+# tokenizer = AutoTokenizer.from_pretrained(dir_model)
+
+# # The number of tokens in tokenizer.json can differ from the expected vocab size.
+# # This causes downstream issues with mismatched tensor sizes when running the inference
+# vocab_size = hparams.get("vocab_size", len(tokenizer.vocab))
+# assert max(tokenizer.vocab.values()) < vocab_size
+
+# added_vocab = tokenizer.get_added_vocab()
+# reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.vocab.items()}
+
+# for i in range(vocab_size):
+#     if i not in reverse_vocab:
+#         tokens.append(f"[PAD{i}]")
+#         toktypes.append(gguf.TokenType.USER_DEFINED)
+#     elif reverse_vocab[i] in added_vocab:
+#         tokens.append(reverse_vocab[i])
+#         if tokenizer.added_tokens_decoder[i].special:
+#             toktypes.append(gguf.TokenType.CONTROL)
+#         else:
+#             toktypes.append(gguf.TokenType.USER_DEFINED)
+#     else:
+#         tokens.append(reverse_vocab[i])
+#         toktypes.append(gguf.TokenType.NORMAL)
+
+# gguf_writer.add_token_list(tokens)
+# gguf_writer.add_token_types(toktypes)
+# special_vocab = gguf.SpecialVocab(dir_model, load_merges=True, n_vocab=len(tokens))
+# special_vocab.add_to_gguf(gguf_writer)
+
+# # TENSORS
+
+# tensor_map = gguf.get_tensor_name_map(ARCH, block_count)
+
+# # params for qkv transform
+# n_head = hparams["n_head"]
+# n_head_kv = hparams["n_head_kv"] if "n_head_kv" in hparams else 1
+
+# head_dim = hparams["n_embd"] // n_head
+
+# # tensor info
+# print("gguf: get tensor metadata")
+
+# if num_parts == 0:
+#     part_names = iter(("pytorch_model.bin",))
+# else:
+#     part_names = (
+#         f"pytorch_model-{n:05}-of-{num_parts:05}.bin" for n in range(1, num_parts + 1)
+#     )
+
+# for part_name in part_names:
+#     if args.vocab_only:
+#         break
+#     print("gguf: loading model part '" + part_name + "'")
+#     model_part = torch.load(dir_model / part_name, map_location="cpu")
+
+#     for name in model_part.keys():
+#         data = model_part[name]
+
+#         old_dtype = data.dtype
+
+#         # convert any unsupported data types to float32
+#         if data.dtype != torch.float16 and data.dtype != torch.float32:
+#             data = data.to(torch.float32)
+
+#         data = data.squeeze().numpy()
+
+#         # map tensor names
+#         new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
+#         if new_name is None:
+#             print("Can not map tensor '" + name + "'")
+#             sys.exit()
+
+#         n_dims = len(data.shape)
+#         data_dtype = data.dtype
+
+#         # if f32 desired, convert any float16 to float32
+#         if ftype == 0 and data_dtype == np.float16:
+#             data = data.astype(np.float32)
+
+#         # TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
+#         if ftype == 1 and data_dtype == np.float16 and n_dims == 1:
+#             data = data.astype(np.float32)
+
+#         # if f16 desired, convert any float32 2-dim weight tensors to float16
+#         if (
+#             ftype == 1
+#             and data_dtype == np.float32
+#             and name.endswith(".weight")
+#             and n_dims == 2
+#         ):
+#             data = data.astype(np.float16)
+
+#         print(
+#             name,
+#             "=>",
+#             new_name
+#             + ", shape = "
+#             + str(data.shape)
+#             + ", "
+#             + str(old_dtype)
+#             + " --> "
+#             + str(data.dtype),
+#         )
+
+#         gguf_writer.add_tensor(new_name, data)
+
+
+# print("gguf: write header")
+# gguf_writer.write_header_to_file()
+# print("gguf: write metadata")
+# gguf_writer.write_kv_data_to_file()
+# if not args.vocab_only:
+#     print("gguf: write tensors")
+#     gguf_writer.write_tensors_to_file()
+
+# gguf_writer.close()
+
+# print(f"gguf: model successfully exported to '{fname_out}'")
+# print("")
--- a/gguf-py/gguf/gguf.py
+++ b/gguf-py/gguf/gguf.py
@ -93,6 +93,7 @@ class MODEL_ARCH(IntEnum):
    REFACT        : int = auto()
    BERT          : int = auto()
    BLOOM         : int = auto()
+    PHI_1         : int = auto()


 class MODEL_TENSOR(IntEnum):
@ -132,6 +133,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
    MODEL_ARCH.REFACT:         "refact",
    MODEL_ARCH.BERT:           "bert",
    MODEL_ARCH.BLOOM:          "bloom",
+    MODEL_ARCH.PHI_1:          "phi-1",
 }

 TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
@ -302,6 +304,9 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
        MODEL_TENSOR.FFN_DOWN,
        MODEL_TENSOR.FFN_UP,
    ],
+    MODEL_ARCH.PHI_1: [
+        # TODO
+    ],
    MODEL_ARCH.GPT2: [
        # TODO
    ],