Merge 6803aac321 into a192860cfe

2023-08-23 21:37:57 +02:00 · 2023-08-23 21:37:57 +02:00 · 680ab3dcb1
commit 680ab3dcb1
parent a192860cfe 6803aac321
3 changed files with 21 additions and 0 deletions
--- a/convert.py
+++ b/convert.py
@ -16,12 +16,14 @@ import pickle
 import re
 import signal
 import struct
+import subprocess
 import sys
 import zipfile
 import numpy as np

 from abc import ABCMeta, abstractmethod
 from dataclasses import dataclass
+from datetime import datetime
 from pathlib import Path
 from typing import (IO, TYPE_CHECKING, Any, Callable, Dict, Iterable, List, Literal, Optional, Sequence, Tuple, TypeVar, Union)
 from sentencepiece import SentencePieceProcessor  # type: ignore
@ -734,6 +736,8 @@ class OutputFile:

    def add_meta_arch(self, params: Params) -> None:
        self.gguf.add_name                ("LLaMA")
+        self.gguf.add_date                (datetime.today().isoformat())
+        self.gguf.add_commit_hash         (get_git_revision_short_hash())
        self.gguf.add_context_length      (params.n_ctx)
        self.gguf.add_embedding_length    (params.n_embd)
        self.gguf.add_block_count         (params.n_layer)
@ -1000,6 +1004,9 @@ def do_dump_model(model_plus: ModelPlus) -> None:
        print(f"{name}: shape={lazy_tensor.shape} type={lazy_tensor.data_type}; {lazy_tensor.description}")


+def get_git_revision_short_hash() -> str:
+    return subprocess.check_output(['git', 'rev-parse', '--short', 'HEAD']).decode('ascii').strip()
+
 def main(args_in: Optional[List[str]] = None) -> None:
    parser = argparse.ArgumentParser(description="Convert a LLaMa model to a GGML compatible file")
    parser.add_argument("--dump",        action="store_true",    help="don't convert, just show what's in the model")
--- a/gguf.py
+++ b/gguf.py
@ -28,6 +28,8 @@ KEY_GENERAL_LICENSE              = "general.license"
 KEY_GENERAL_SOURCE_URL           = "general.source.url"
 KEY_GENERAL_SOURCE_HF_REPO       = "general.source.hugginface.repository"
 KEY_GENERAL_FILE_TYPE            = "general.file_type"
+KEY_GENERAL_DATE                 = "general.date"
+KEY_GENERAL_COMMIT_HASH          = "general.commit_hash"

 # LLM
 KEY_LLM_CONTEXT_LENGTH        = "{arch}.context_length"
@ -600,6 +602,12 @@ class GGUFWriter:
    def add_file_type(self, ftype: int):
        self.add_uint32(KEY_GENERAL_FILE_TYPE, ftype)

+    def add_date(self, date: str):
+        self.add_string(KEY_GENERAL_DATE, date)
+
+    def add_commit_hash(self, commit_hash: str):
+        self.add_string(KEY_GENERAL_COMMIT_HASH, commit_hash)
+
    def add_name(self, name: str):
        self.add_string(KEY_GENERAL_NAME, name)

--- a/llama.cpp
+++ b/llama.cpp
@ -1279,6 +1279,8 @@ static void llama_model_load_internal(

    std::string general_name = "n/a";
    std::string general_arch = "n/a";
+    std::string general_date = "n/a";
+    std::string general_commit_hash = "n/a";

    // read hparams
    {
@ -1336,6 +1338,8 @@ static void llama_model_load_internal(
        // get general kv
        GGUF_GET(general_name, gguf_get_val_str, GGUF_TYPE_STRING, false, "general.name");
        GGUF_GET(general_arch, gguf_get_val_str, GGUF_TYPE_STRING, false, "general.architecture");
+        GGUF_GET(general_date, gguf_get_val_str, GGUF_TYPE_STRING, false, "general.date");
+        GGUF_GET(general_commit_hash, gguf_get_val_str, GGUF_TYPE_STRING, false, "general.commit_hash");

        // special tokens
        GGUF_GET(vocab.special_bos_id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, "tokenizer.ggml.bos_token_id");
@ -1445,6 +1449,8 @@ static void llama_model_load_internal(

        // general kv
        LLAMA_LOG_INFO("%s: general.name = %s\n",    __func__, general_name.c_str());
+        LLAMA_LOG_INFO("%s: general.date = %s\n",    __func__, general_date.c_str());
+        LLAMA_LOG_INFO("%s: general.commit_hash = %s\n", __func__, general_commit_hash.c_str());

        // special tokens
        if (vocab.special_bos_id != -1) { LLAMA_LOG_INFO( "%s: BOS token = %d '%s'\n", __func__, vocab.special_bos_id, vocab.id_to_token[vocab.special_bos_id].text.c_str() ); }