WIP: Initial setup for GGUF writer configuration

- Created the `initialize_writer` function to set up GGUF writer with model metadata - Included validation for file type and architecture - Default hyperparameter values sourced from MixFormerSequentialConfig - Function annotations and documentation added for clarity - Prepared groundwork for MixFormer architecture integration
2023-11-02 22:40:28 -04:00 · 2023-11-02 22:40:28 -04:00 · 41a98c618e
commit 41a98c618e
parent 629f917cd6
2 changed files with 305 additions and 0 deletions
--- a/convert-phi-1-to-gguf.py
+++ b/convert-phi-1-to-gguf.py
@ -0,0 +1,300 @@
 #!/usr/bin/env python3
 from __future__ import annotations
 import argparse
 import json
 import logging
 import os
 import struct
 import sys
 from json import JSONDecodeError
 from pathlib import Path
 from typing import Any, Dict, List
 import numpy as np
 import torch
 from transformers import AutoTokenizer
 if "NO_LOCAL_GGUF" not in os.environ:
    sys.path.insert(1, str(Path(__file__).parent / "gguf-py" / "gguf"))
 import gguf
 # Configure logging
 logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
 def check_required_files(directory: Path, required_files: List[str]) -> None:
    missing_files = [
        file_name
        for file_name in required_files
        if not (directory / file_name).exists()
    ]
    if missing_files:
        raise FileNotFoundError(f"Missing required files: {', '.join(missing_files)}")
 def get_json_map(file_path: Path) -> dict[str, Any]:
    with open(file_path, "r") as source_file:
        try:
            return json.load(source_file)
        except JSONDecodeError:
            raise ValueError(f"Failed to decode {file_path}")
 def load_hyper_params(directory: Path, architecture: str) -> dict:
    config_path = directory / "config.json"
    hparams = get_json_map(config_path)
    # Ensure the expected architecture is present
    expected_architecture = architecture
    if hparams["architectures"][0] != expected_architecture:
        raise ValueError(
            f"Model architecture not supported: {hparams['architectures'][0]}"
        )
    return hparams
 def initialize_writer(
    fname_out: str, architecture: str, ftype: str, hparams: Dict[str, Any]
 ) -> gguf.GGUFWriter:
    """
    Initializes the GGUF writer with the model metadata.
    :param fname_out: The filename for the output model.
    :param architecture: The model architecture enum name.
    :param ftype: The data type for the model file (e.g., 'F32', 'F16').
    :param hparams: The hyperparameters loaded from the model's config file.
    :return: An initialized GGUF writer object.
    """
    # Validate the architecture name
    if not hasattr(gguf.MODEL_ARCH, architecture):
        raise ValueError(f"Unsupported architecture: {architecture}")
    ARCH = getattr(gguf.MODEL_ARCH, architecture)
    # Validate the file type
    if ftype not in ['F32', 'F16']:
        raise ValueError(f"Unsupported file type: {ftype}")
    # Initialize the GGUF writer
    gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH])
    # Set the writer with the hyperparameters from MixFormerSequentialConfig
    gguf_writer.add_name(gguf.MODEL_ARCH_NAMES[ARCH])
    gguf_writer.add_context_length(hparams.get("n_positions", 2048))
    gguf_writer.add_embedding_length(hparams.get("n_embd", 1024))
    n_inner = hparams.get("n_inner", 4 * hparams.get("n_embd", 1024))
    gguf_writer.add_feed_forward_length(n_inner)
    gguf_writer.add_block_count(hparams.get("n_layer", 20))
    gguf_writer.add_head_count(hparams.get("n_head", 16))
    n_head_kv = hparams.get("n_head_kv", hparams.get("n_head", 16))
    gguf_writer.add_head_count_kv(n_head_kv)  # NOTE: arxiv:2203.11082
    gguf_writer.add_layer_norm_eps(hparams.get("layer_norm_epsilon", 1e-5))
    # Add the file type
    gguf_writer.add_file_type(ftype)
    return gguf_writer
 def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        description="Convert a Phi-1 model to a GGML compatible file"
    )
    parser.add_argument(
        "--vocab-only", action="store_true", help="extract only the vocab"
    )
    parser.add_argument(
        "--outfile", type=Path, help="path to write to; default: based on input"
    )
    parser.add_argument(
        "model",
        type=Path,
        help="directory containing model file, or model file itself (*.bin)",
    )
    parser.add_argument(
        "--ftype",
        type=str,
        choices=["f32", "f16"],
        default="f16",  # NOTE: Phi-1 is dtype float16.
        help="output format - use 'float32' for 32-bit tensors, 'float16' for 16-bit tensors",
    )
    return parser.parse_args()
 def main():
    try:
        args = parse_args()
        ftype = args.ftype
        directory = args.model  # Renamed for clarity
        if not directory.is_dir():
            raise NotADirectoryError(f"{directory} is not a directory.")
        required_files = ["pytorch_model.bin", "config.json", "tokenizer.json"]
        check_required_files(directory, required_files)
        # Reference the actual model file
        model = directory / "pytorch_model.bin"
        if not model.exists():
            raise FileNotFoundError(f"Model file {model} does not exist.")
        hparams = load_hyper_params(directory, "MixFormerSequentialForCausalLM")
        architecture = hparams["architectures"][0]
        if args.outfile is not None:
            fname_out = args.outfile
        else:
            fname_out = directory / f"ggml-model-{ftype}.gguf"
        if not fname_out.parent.exists():
            logging.warning(f"Output directory {fname_out.parent} does not exist.")
        gguf_writer = initialize_writer(fname_out, architecture, ftype, hparams)
        # Proceed with the model processing using the 'model' path
        # ... [rest of your existing code] ...
    except Exception as e:
        logging.error(e)
        sys.exit(1)
 if __name__ == "__main__":
    main()
 # # TOKENIZATION
 # print("gguf: get tokenizer metadata")
 # tokens: list[bytearray] = []
 # scores: list[float] = []
 # toktypes: list[int] = []
 # # gpt2 tokenizer
 # gguf_writer.add_tokenizer_model("gpt2")
 # print("gguf: get gpt2 tokenizer vocab")
 # # ref: https://github.com/cmp-nct/ggllm.cpp/blob/master/falcon_convert.py
 # tokenizer = AutoTokenizer.from_pretrained(dir_model)
 # # The number of tokens in tokenizer.json can differ from the expected vocab size.
 # # This causes downstream issues with mismatched tensor sizes when running the inference
 # vocab_size = hparams.get("vocab_size", len(tokenizer.vocab))
 # assert max(tokenizer.vocab.values()) < vocab_size
 # added_vocab = tokenizer.get_added_vocab()
 # reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.vocab.items()}
 # for i in range(vocab_size):
 #     if i not in reverse_vocab:
 #         tokens.append(f"[PAD{i}]")
 #         toktypes.append(gguf.TokenType.USER_DEFINED)
 #     elif reverse_vocab[i] in added_vocab:
 #         tokens.append(reverse_vocab[i])
 #         if tokenizer.added_tokens_decoder[i].special:
 #             toktypes.append(gguf.TokenType.CONTROL)
 #         else:
 #             toktypes.append(gguf.TokenType.USER_DEFINED)
 #     else:
 #         tokens.append(reverse_vocab[i])
 #         toktypes.append(gguf.TokenType.NORMAL)
 # gguf_writer.add_token_list(tokens)
 # gguf_writer.add_token_types(toktypes)
 # special_vocab = gguf.SpecialVocab(dir_model, load_merges=True, n_vocab=len(tokens))
 # special_vocab.add_to_gguf(gguf_writer)
 # # TENSORS
 # tensor_map = gguf.get_tensor_name_map(ARCH, block_count)
 # # params for qkv transform
 # n_head = hparams["n_head"]
 # n_head_kv = hparams["n_head_kv"] if "n_head_kv" in hparams else 1
 # head_dim = hparams["n_embd"] // n_head
 # # tensor info
 # print("gguf: get tensor metadata")
 # if num_parts == 0:
 #     part_names = iter(("pytorch_model.bin",))
 # else:
 #     part_names = (
 #         f"pytorch_model-{n:05}-of-{num_parts:05}.bin" for n in range(1, num_parts + 1)
 #     )
 # for part_name in part_names:
 #     if args.vocab_only:
 #         break
 #     print("gguf: loading model part '" + part_name + "'")
 #     model_part = torch.load(dir_model / part_name, map_location="cpu")
 #     for name in model_part.keys():
 #         data = model_part[name]
 #         old_dtype = data.dtype
 #         # convert any unsupported data types to float32
 #         if data.dtype != torch.float16 and data.dtype != torch.float32:
 #             data = data.to(torch.float32)
 #         data = data.squeeze().numpy()
 #         # map tensor names
 #         new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
 #         if new_name is None:
 #             print("Can not map tensor '" + name + "'")
 #             sys.exit()
 #         n_dims = len(data.shape)
 #         data_dtype = data.dtype
 #         # if f32 desired, convert any float16 to float32
 #         if ftype == 0 and data_dtype == np.float16:
 #             data = data.astype(np.float32)
 #         # TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
 #         if ftype == 1 and data_dtype == np.float16 and n_dims == 1:
 #             data = data.astype(np.float32)
 #         # if f16 desired, convert any float32 2-dim weight tensors to float16
 #         if (
 #             ftype == 1
 #             and data_dtype == np.float32
 #             and name.endswith(".weight")
 #             and n_dims == 2
 #         ):
 #             data = data.astype(np.float16)
 #         print(
 #             name,
 #             "=>",
 #             new_name
 #             + ", shape = "
 #             + str(data.shape)
 #             + ", "
 #             + str(old_dtype)
 #             + " --> "
 #             + str(data.dtype),
 #         )
 #         gguf_writer.add_tensor(new_name, data)
 # print("gguf: write header")
 # gguf_writer.write_header_to_file()
 # print("gguf: write metadata")
 # gguf_writer.write_kv_data_to_file()
 # if not args.vocab_only:
 #     print("gguf: write tensors")
 #     gguf_writer.write_tensors_to_file()
 # gguf_writer.close()
 # print(f"gguf: model successfully exported to '{fname_out}'")
 # print("")
--- a/gguf-py/gguf/gguf.py
+++ b/gguf-py/gguf/gguf.py
@ -93,6 +93,7 @@ class MODEL_ARCH(IntEnum):
    REFACT        : int = auto()
    BERT          : int = auto()
    BLOOM         : int = auto()
    PHI_1         : int = auto()
 class MODEL_TENSOR(IntEnum):
@ -132,6 +133,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
    MODEL_ARCH.REFACT:         "refact",
    MODEL_ARCH.BERT:           "bert",
    MODEL_ARCH.BLOOM:          "bloom",
    MODEL_ARCH.PHI_1:          "phi-1",
 }
 TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
@ -302,6 +304,9 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
        MODEL_TENSOR.FFN_DOWN,
        MODEL_TENSOR.FFN_UP,
    ],
    MODEL_ARCH.PHI_1: [
        # TODO
    ],
    MODEL_ARCH.GPT2: [
        # TODO
    ],