This commit is contained in:
akawrykow 2023-08-23 21:37:57 +02:00 committed by GitHub
commit 680ab3dcb1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 21 additions and 0 deletions

View file

@ -16,12 +16,14 @@ import pickle
import re import re
import signal import signal
import struct import struct
import subprocess
import sys import sys
import zipfile import zipfile
import numpy as np import numpy as np
from abc import ABCMeta, abstractmethod from abc import ABCMeta, abstractmethod
from dataclasses import dataclass from dataclasses import dataclass
from datetime import datetime
from pathlib import Path from pathlib import Path
from typing import (IO, TYPE_CHECKING, Any, Callable, Dict, Iterable, List, Literal, Optional, Sequence, Tuple, TypeVar, Union) from typing import (IO, TYPE_CHECKING, Any, Callable, Dict, Iterable, List, Literal, Optional, Sequence, Tuple, TypeVar, Union)
from sentencepiece import SentencePieceProcessor # type: ignore from sentencepiece import SentencePieceProcessor # type: ignore
@ -734,6 +736,8 @@ class OutputFile:
def add_meta_arch(self, params: Params) -> None: def add_meta_arch(self, params: Params) -> None:
self.gguf.add_name ("LLaMA") self.gguf.add_name ("LLaMA")
self.gguf.add_date (datetime.today().isoformat())
self.gguf.add_commit_hash (get_git_revision_short_hash())
self.gguf.add_context_length (params.n_ctx) self.gguf.add_context_length (params.n_ctx)
self.gguf.add_embedding_length (params.n_embd) self.gguf.add_embedding_length (params.n_embd)
self.gguf.add_block_count (params.n_layer) self.gguf.add_block_count (params.n_layer)
@ -1000,6 +1004,9 @@ def do_dump_model(model_plus: ModelPlus) -> None:
print(f"{name}: shape={lazy_tensor.shape} type={lazy_tensor.data_type}; {lazy_tensor.description}") print(f"{name}: shape={lazy_tensor.shape} type={lazy_tensor.data_type}; {lazy_tensor.description}")
def get_git_revision_short_hash() -> str:
return subprocess.check_output(['git', 'rev-parse', '--short', 'HEAD']).decode('ascii').strip()
def main(args_in: Optional[List[str]] = None) -> None: def main(args_in: Optional[List[str]] = None) -> None:
parser = argparse.ArgumentParser(description="Convert a LLaMa model to a GGML compatible file") parser = argparse.ArgumentParser(description="Convert a LLaMa model to a GGML compatible file")
parser.add_argument("--dump", action="store_true", help="don't convert, just show what's in the model") parser.add_argument("--dump", action="store_true", help="don't convert, just show what's in the model")

View file

@ -28,6 +28,8 @@ KEY_GENERAL_LICENSE = "general.license"
KEY_GENERAL_SOURCE_URL = "general.source.url" KEY_GENERAL_SOURCE_URL = "general.source.url"
KEY_GENERAL_SOURCE_HF_REPO = "general.source.hugginface.repository" KEY_GENERAL_SOURCE_HF_REPO = "general.source.hugginface.repository"
KEY_GENERAL_FILE_TYPE = "general.file_type" KEY_GENERAL_FILE_TYPE = "general.file_type"
KEY_GENERAL_DATE = "general.date"
KEY_GENERAL_COMMIT_HASH = "general.commit_hash"
# LLM # LLM
KEY_LLM_CONTEXT_LENGTH = "{arch}.context_length" KEY_LLM_CONTEXT_LENGTH = "{arch}.context_length"
@ -600,6 +602,12 @@ class GGUFWriter:
def add_file_type(self, ftype: int): def add_file_type(self, ftype: int):
self.add_uint32(KEY_GENERAL_FILE_TYPE, ftype) self.add_uint32(KEY_GENERAL_FILE_TYPE, ftype)
def add_date(self, date: str):
self.add_string(KEY_GENERAL_DATE, date)
def add_commit_hash(self, commit_hash: str):
self.add_string(KEY_GENERAL_COMMIT_HASH, commit_hash)
def add_name(self, name: str): def add_name(self, name: str):
self.add_string(KEY_GENERAL_NAME, name) self.add_string(KEY_GENERAL_NAME, name)

View file

@ -1279,6 +1279,8 @@ static void llama_model_load_internal(
std::string general_name = "n/a"; std::string general_name = "n/a";
std::string general_arch = "n/a"; std::string general_arch = "n/a";
std::string general_date = "n/a";
std::string general_commit_hash = "n/a";
// read hparams // read hparams
{ {
@ -1336,6 +1338,8 @@ static void llama_model_load_internal(
// get general kv // get general kv
GGUF_GET(general_name, gguf_get_val_str, GGUF_TYPE_STRING, false, "general.name"); GGUF_GET(general_name, gguf_get_val_str, GGUF_TYPE_STRING, false, "general.name");
GGUF_GET(general_arch, gguf_get_val_str, GGUF_TYPE_STRING, false, "general.architecture"); GGUF_GET(general_arch, gguf_get_val_str, GGUF_TYPE_STRING, false, "general.architecture");
GGUF_GET(general_date, gguf_get_val_str, GGUF_TYPE_STRING, false, "general.date");
GGUF_GET(general_commit_hash, gguf_get_val_str, GGUF_TYPE_STRING, false, "general.commit_hash");
// special tokens // special tokens
GGUF_GET(vocab.special_bos_id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, "tokenizer.ggml.bos_token_id"); GGUF_GET(vocab.special_bos_id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, "tokenizer.ggml.bos_token_id");
@ -1445,6 +1449,8 @@ static void llama_model_load_internal(
// general kv // general kv
LLAMA_LOG_INFO("%s: general.name = %s\n", __func__, general_name.c_str()); LLAMA_LOG_INFO("%s: general.name = %s\n", __func__, general_name.c_str());
LLAMA_LOG_INFO("%s: general.date = %s\n", __func__, general_date.c_str());
LLAMA_LOG_INFO("%s: general.commit_hash = %s\n", __func__, general_commit_hash.c_str());
// special tokens // special tokens
if (vocab.special_bos_id != -1) { LLAMA_LOG_INFO( "%s: BOS token = %d '%s'\n", __func__, vocab.special_bos_id, vocab.id_to_token[vocab.special_bos_id].text.c_str() ); } if (vocab.special_bos_id != -1) { LLAMA_LOG_INFO( "%s: BOS token = %d '%s'\n", __func__, vocab.special_bos_id, vocab.id_to_token[vocab.special_bos_id].text.c_str() ); }