refactor: Add prototyped bridge interface for tokenizers and llama.cpp

This commit is contained in:
teleprint-me 2024-05-31 20:35:41 -04:00
parent c2e48979e2
commit 47ef6157a0
No known key found for this signature in database
GPG key ID: B0D11345E65C4D48

View file

@ -13,9 +13,9 @@ GGML_QUANT_VERSION = 2 # GGML_QNT_VERSION from ggml.h
# #
# metadata keys # model metadata keys
# #
class Keys: class GGUFMetadataKeys:
class General: class General:
ARCHITECTURE = "general.architecture" ARCHITECTURE = "general.architecture"
QUANTIZATION_VERSION = "general.quantization_version" QUANTIZATION_VERSION = "general.quantization_version"
@ -27,7 +27,7 @@ class Keys:
DESCRIPTION = "general.description" DESCRIPTION = "general.description"
LICENSE = "general.license" LICENSE = "general.license"
SOURCE_URL = "general.source.url" SOURCE_URL = "general.source.url"
SOURCE_HF_REPO = "general.source.huggingface.repository" SOURCE_REPO = "general.source.repository"
FILE_TYPE = "general.file_type" FILE_TYPE = "general.file_type"
class LLM: class LLM:
@ -77,36 +77,35 @@ class Keys:
TIME_STEP_RANK = "{arch}.ssm.time_step_rank" TIME_STEP_RANK = "{arch}.ssm.time_step_rank"
class Tokenizer: class Tokenizer:
MODEL = "tokenizer.ggml.model" # STRING: e.g. llama MODEL = "tokenizer.model" # STRING: e.g. llama, gpt2, etc...
TYPE = "tokenizer.ggml.type" # STRING: BPE, SPM, WPM, etc. TYPE = "tokenizer.type" # STRING: BPE, SPM, WPM, etc.
NORM = "tokenizer.ggml.norm" # OBJECT {"type": "ByteLevel"} NORM = "tokenizer.norm" # OBJECT {"type": "ByteLevel", ...}
PRE = "tokenizer.ggml.pre" # OBJECT {"type": "ByteLevel"} PRE = "tokenizer.pre" # OBJECT {"type": "ByteLevel", ...}
ADDED = "tokenizer.ggml.added" # ARRAY of OBJECTs {"id": 1} ADDED = "tokenizer.added" # ARRAY of OBJECTs: [{"id": 1, ...}, ...]
LIST = "tokenizer.ggml.tokens" VOCAB = "tokenizer.vocab" # ARRAY of STRINGs: ["[BOS]", ...]
TOKEN_TYPE = "tokenizer.ggml.token_type" MERGES = "tokenizer.merges" # ARRAY of STRINGs: ["▁ t", ...]
TOKEN_TYPE_COUNT = "tokenizer.ggml.token_type_count" # BERT token types TOKEN_TYPE = "tokenizer.token_type" # ARRAY of INT [2, ...]
SCORES = "tokenizer.ggml.scores" # Word Piece Only TOKEN_TYPE_COUNT = "tokenizer.token_type_count" # BERT token types
MERGES = "tokenizer.ggml.merges" SCORES = "tokenizer.scores" # WPM only
BOS_ID = "tokenizer.ggml.bos_token_id" BOS_ID = "tokenizer.bos_token_id"
EOS_ID = "tokenizer.ggml.eos_token_id" EOS_ID = "tokenizer.eos_token_id"
UNK_ID = "tokenizer.ggml.unknown_token_id" UNK_ID = "tokenizer.unknown_token_id"
SEP_ID = "tokenizer.ggml.seperator_token_id" SEP_ID = "tokenizer.seperator_token_id"
PAD_ID = "tokenizer.ggml.padding_token_id" PAD_ID = "tokenizer.padding_token_id"
CLS_ID = "tokenizer.ggml.cls_token_id" CLS_ID = "tokenizer.cls_token_id"
MASK_ID = "tokenizer.ggml.mask_token_id" MASK_ID = "tokenizer.mask_token_id"
ADD_BOS = "tokenizer.ggml.add_bos_token" ADD_BOS = "tokenizer.add_bos_token"
ADD_EOS = "tokenizer.ggml.add_eos_token" ADD_EOS = "tokenizer.add_eos_token"
ADD_PREFIX = "tokenizer.ggml.add_space_prefix" ADD_PREFIX = "tokenizer.add_space_prefix"
HF_JSON = "tokenizer.huggingface.json"
RWKV = "tokenizer.rwkv.world" RWKV = "tokenizer.rwkv.world"
CHAT_TEMPLATE = "tokenizer.chat_template" CHAT_TEMPLATE = "tokenizer.chat_template"
CHAT_TEMPLATE_N = "tokenizer.chat_template.{name}" CHAT_TEMPLATE_N = "tokenizer.chat_template.{name}"
CHAT_TEMPLATES = "tokenizer.chat_templates" CHAT_TEMPLATES = "tokenizer.chat_templates"
# FIM/Infill special tokens constants # FIM/Infill special tokens constants
PREFIX_ID = "tokenizer.ggml.prefix_token_id" PREFIX_ID = "tokenizer.prefix_token_id"
SUFFIX_ID = "tokenizer.ggml.suffix_token_id" SUFFIX_ID = "tokenizer.suffix_token_id"
MIDDLE_ID = "tokenizer.ggml.middle_token_id" MIDDLE_ID = "tokenizer.middle_token_id"
EOT_ID = "tokenizer.ggml.eot_token_id" EOT_ID = "tokenizer.eot_token_id"
# #
@ -849,13 +848,13 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
# #
# types # types
# #
class RopeScalingType(Enum): class GGMLRopeScalingType(Enum):
NONE = 'none' NONE = 'none'
LINEAR = 'linear' LINEAR = 'linear'
YARN = 'yarn' YARN = 'yarn'
class PoolingType(IntEnum): class GGMLPoolingType(IntEnum):
NONE = 0 NONE = 0
MEAN = 1 MEAN = 1
CLS = 2 CLS = 2
@ -898,7 +897,7 @@ class GGMLQuantizationType(IntEnum):
# from llama_ftype in llama.h # from llama_ftype in llama.h
# ALL VALUES SHOULD BE THE SAME HERE AS THEY ARE OVER THERE. # ALL VALUES SHOULD BE THE SAME HERE AS THEY ARE OVER THERE.
class LlamaFileType(IntEnum): class GGUFFileType(IntEnum):
ALL_F32 = 0 ALL_F32 = 0
MOSTLY_F16 = 1 # except 1d tensors MOSTLY_F16 = 1 # except 1d tensors
MOSTLY_Q4_0 = 2 # except 1d tensors MOSTLY_Q4_0 = 2 # except 1d tensors
@ -936,47 +935,70 @@ class LlamaFileType(IntEnum):
GUESSED = 1024 # not specified in the model file GUESSED = 1024 # not specified in the model file
LLAMA_FILE_TYPE_NAMES: dict[LlamaFileType, str] = { GGUF_FILE_TYPE_MAP: dict[str, GGUFFileType] = {
LlamaFileType.ALL_F32 : "F32", "F32" : GGUFFileType.ALL_F32,
LlamaFileType.MOSTLY_F16 : "F16", "F16" : GGUFFileType.MOSTLY_F16,
LlamaFileType.MOSTLY_BF16 : "BF16", "BF16" : GGUFFileType.MOSTLY_BF16,
LlamaFileType.MOSTLY_Q8_0 : "Q8_0", "Q8_0" : GGUFFileType.MOSTLY_Q8_0,
}
GGUF_FILE_TYPE_NAMES: dict[GGUFFileType, str] = {
GGUFFileType.ALL_F32 : "F32",
GGUFFileType.MOSTLY_F16 : "F16",
GGUFFileType.MOSTLY_BF16 : "BF16",
GGUFFileType.MOSTLY_Q8_0 : "Q8_0",
} }
class GGUFEndian(IntEnum): class GGUFEndian(IntEnum):
LITTLE = 0 LITTLE = 0
BIG = 1 BIG = 1
class GGUFValueType(IntEnum): class GGUFValueType(IntEnum):
UINT8 = 0 UINT8 = auto()
INT8 = 1 INT8 = auto()
UINT16 = 2 UINT16 = auto()
INT16 = 3 INT16 = auto()
UINT32 = 4 UINT32 = auto()
INT32 = 5 INT32 = auto()
FLOAT32 = 6 UINT64 = auto()
BOOL = 7 INT64 = auto()
STRING = 8 FLOAT32 = auto()
ARRAY = 9 FLOAT64 = auto()
UINT64 = 10 BOOL = auto()
INT64 = 11 STRING = auto()
FLOAT64 = 12 ARRAY = auto()
OBJECT = auto()
@staticmethod @staticmethod
def get_type(val: Any) -> GGUFValueType: def get_type(val: Any) -> GGUFValueType:
if isinstance(val, (str, bytes, bytearray)): if isinstance(val, (str, bytes, bytearray)):
return GGUFValueType.STRING return GGUFValueType.STRING
elif isinstance(val, list):
return GGUFValueType.ARRAY
elif isinstance(val, float):
return GGUFValueType.FLOAT32
elif isinstance(val, bool): elif isinstance(val, bool):
return GGUFValueType.BOOL return GGUFValueType.BOOL
elif isinstance(val, int):
# TODO: Need help with 64-bit types in Python.
# NOTE: Maybe use numpy, e.g. np.dtypes to determine data type?
# Using base types is unreliable in python as all numbers in python are 64-bits.
# If it's an integer (either signed or unsigned)
if isinstance(val, int):
return GGUFValueType.INT32 return GGUFValueType.INT32
# TODO: need help with 64-bit types in Python
elif isinstance(val, float):
# NOTE: This is unreliable in python as all numbers in python are 64-bits
return GGUFValueType.FLOAT32
elif isinstance(val, list):
return GGUFValueType.ARRAY
elif isinstance(val, dict):
# NOTE: JSON Object, Dict, or Mapping are valid types
return GGUFValueType.OBJECT
else: else:
raise ValueError(f"Unknown type: {type(val)}") raise ValueError(f"Unknown type: {type(val)}")
@ -1019,7 +1041,7 @@ GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
# #
# Tokenizer Types # Tokenizer Types
# #
class TokenType(IntEnum): class GGUFTokenType(IntEnum):
NORMAL = 1 NORMAL = 1
UNKNOWN = 2 UNKNOWN = 2
CONTROL = 3 CONTROL = 3
@ -1028,7 +1050,7 @@ class TokenType(IntEnum):
BYTE = 6 BYTE = 6
class VocabType(Enum): class GGUFTokenizerType(Enum):
SPM = "SPM" # SentencePiece LLaMa tokenizer SPM = "SPM" # SentencePiece LLaMa tokenizer
BPE = "BPE" # BytePair GPT-2 tokenizer BPE = "BPE" # BytePair GPT-2 tokenizer
WPM = "WPM" # WordPiece BERT tokenizer WPM = "WPM" # WordPiece BERT tokenizer
@ -1037,42 +1059,43 @@ class VocabType(Enum):
# #
# Model File Types # Model File Types
# #
class ModelFileExtension(Enum): class GGUFFileExtension(Enum):
PT = ".pt" # torch PT = ".pt" # torch
PTH = ".pth" # torch PTH = ".pth" # torch
BIN = ".bin" # torch BIN = ".bin" # torch
SAFETENSORS = ".safetensors" # safetensors SAFETENSORS = ".safetensors" # safetensors
JSON = ".json" # transformers/tokenizers JSON = ".json" # transformers/tokenizers
MODEL = ".model" # sentencepiece MODEL = ".model" # sentencepiece
GGUF = ".gguf" # ggml/llama.cpp GGUF = ".gguf" # ggml/llama.cpp
# #
# Normalizer Types # Normalizer Types
# #
class NormalizerType(Enum): class GGUFNormalizerType(Enum):
SEQUENCE = "Sequence" SEQUENCE = "Sequence"
NFC = "NFC" NFC = "NFC"
NFD = "NFD" NFD = "NFD"
NFKC = "NFKC" NFKC = "NFKC"
NFKD = "NFKD" NFKD = "NFKD"
# #
# Pre-tokenizer Types # Pre-tokenizer Types
# #
class PreTokenizerType(Enum): class GGUFPreTokenizerType(Enum):
SEQUENCE = "Sequence" WHITESPACE = "Whitespace"
BYTE_LEVEL = "ByteLevel" METASPACE = "Metaspace"
BYTE_LEVEL = "ByteLevel"
BERT_PRE_TOKENIZER = "BertPreTokenizer" BERT_PRE_TOKENIZER = "BertPreTokenizer"
METASPACE = "Metaspace" SEQUENCE = "Sequence"
# #
# HF Vocab Files # HF Vocab Files
# #
HF_TOKENIZER_BPE_FILES = ("config.json", "tokenizer_config.json", "tokenizer.json",) HF_TOKENIZER_BPE_FILES: tuple[str, ...] = ("config.json", "tokenizer_config.json", "tokenizer.json",)
HF_TOKENIZER_SPM_FILES = HF_TOKENIZER_BPE_FILES + ("tokenizer.model",) HF_TOKENIZER_SPM_FILES: tuple[str, ...] = HF_TOKENIZER_BPE_FILES + ("tokenizer.model",)
# #
# Pre-tokenization Regular Expressions # Pre-tokenization Regular Expressions
@ -1089,69 +1112,75 @@ GPT_PRE_TOKENIZER_DEFAULT = ("'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\
# Aliases for backward compatibility. # Aliases for backward compatibility.
# general # general
KEY_GENERAL_ARCHITECTURE = Keys.General.ARCHITECTURE KEY_GENERAL_ARCHITECTURE = GGUFMetadataKeys.General.ARCHITECTURE
KEY_GENERAL_QUANTIZATION_VERSION = Keys.General.QUANTIZATION_VERSION KEY_GENERAL_QUANTIZATION_VERSION = GGUFMetadataKeys.General.QUANTIZATION_VERSION
KEY_GENERAL_ALIGNMENT = Keys.General.ALIGNMENT KEY_GENERAL_ALIGNMENT = GGUFMetadataKeys.General.ALIGNMENT
KEY_GENERAL_NAME = Keys.General.NAME KEY_GENERAL_NAME = GGUFMetadataKeys.General.NAME
KEY_GENERAL_AUTHOR = Keys.General.AUTHOR KEY_GENERAL_AUTHOR = GGUFMetadataKeys.General.AUTHOR
KEY_GENERAL_URL = Keys.General.URL KEY_GENERAL_URL = GGUFMetadataKeys.General.URL
KEY_GENERAL_DESCRIPTION = Keys.General.DESCRIPTION KEY_GENERAL_DESCRIPTION = GGUFMetadataKeys.General.DESCRIPTION
KEY_GENERAL_LICENSE = Keys.General.LICENSE KEY_GENERAL_LICENSE = GGUFMetadataKeys.General.LICENSE
KEY_GENERAL_SOURCE_URL = Keys.General.SOURCE_URL KEY_GENERAL_SOURCE_URL = GGUFMetadataKeys.General.SOURCE_URL
KEY_GENERAL_SOURCE_HF_REPO = Keys.General.SOURCE_HF_REPO KEY_GENERAL_SOURCE_HF_REPO = GGUFMetadataKeys.General.SOURCE_HF_REPO
KEY_GENERAL_FILE_TYPE = Keys.General.FILE_TYPE KEY_GENERAL_FILE_TYPE = GGUFMetadataKeys.General.FILE_TYPE
# LLM # LLM
KEY_VOCAB_SIZE = Keys.LLM.VOCAB_SIZE KEY_VOCAB_SIZE = GGUFMetadataKeys.LLM.VOCAB_SIZE
KEY_CONTEXT_LENGTH = Keys.LLM.CONTEXT_LENGTH KEY_CONTEXT_LENGTH = GGUFMetadataKeys.LLM.CONTEXT_LENGTH
KEY_EMBEDDING_LENGTH = Keys.LLM.EMBEDDING_LENGTH KEY_EMBEDDING_LENGTH = GGUFMetadataKeys.LLM.EMBEDDING_LENGTH
KEY_BLOCK_COUNT = Keys.LLM.BLOCK_COUNT KEY_BLOCK_COUNT = GGUFMetadataKeys.LLM.BLOCK_COUNT
KEY_FEED_FORWARD_LENGTH = Keys.LLM.FEED_FORWARD_LENGTH KEY_FEED_FORWARD_LENGTH = GGUFMetadataKeys.LLM.FEED_FORWARD_LENGTH
KEY_USE_PARALLEL_RESIDUAL = Keys.LLM.USE_PARALLEL_RESIDUAL KEY_USE_PARALLEL_RESIDUAL = GGUFMetadataKeys.LLM.USE_PARALLEL_RESIDUAL
KEY_TENSOR_DATA_LAYOUT = Keys.LLM.TENSOR_DATA_LAYOUT KEY_TENSOR_DATA_LAYOUT = GGUFMetadataKeys.LLM.TENSOR_DATA_LAYOUT
# attention # attention
KEY_ATTENTION_HEAD_COUNT = Keys.Attention.HEAD_COUNT KEY_ATTENTION_HEAD_COUNT = GGUFMetadataKeys.Attention.HEAD_COUNT
KEY_ATTENTION_HEAD_COUNT_KV = Keys.Attention.HEAD_COUNT_KV KEY_ATTENTION_HEAD_COUNT_KV = GGUFMetadataKeys.Attention.HEAD_COUNT_KV
KEY_ATTENTION_MAX_ALIBI_BIAS = Keys.Attention.MAX_ALIBI_BIAS KEY_ATTENTION_MAX_ALIBI_BIAS = GGUFMetadataKeys.Attention.MAX_ALIBI_BIAS
KEY_ATTENTION_CLAMP_KQV = Keys.Attention.CLAMP_KQV KEY_ATTENTION_CLAMP_KQV = GGUFMetadataKeys.Attention.CLAMP_KQV
KEY_ATTENTION_LAYERNORM_EPS = Keys.Attention.LAYERNORM_EPS KEY_ATTENTION_LAYERNORM_EPS = GGUFMetadataKeys.Attention.LAYERNORM_EPS
KEY_ATTENTION_LAYERNORM_RMS_EPS = Keys.Attention.LAYERNORM_RMS_EPS KEY_ATTENTION_LAYERNORM_RMS_EPS = GGUFMetadataKeys.Attention.LAYERNORM_RMS_EPS
# RoPE # RoPE
KEY_ROPE_DIMENSION_COUNT = Keys.Rope.DIMENSION_COUNT KEY_ROPE_DIMENSION_COUNT = GGUFMetadataKeys.Rope.DIMENSION_COUNT
KEY_ROPE_FREQ_BASE = Keys.Rope.FREQ_BASE KEY_ROPE_FREQ_BASE = GGUFMetadataKeys.Rope.FREQ_BASE
KEY_ROPE_SCALING_TYPE = Keys.Rope.SCALING_TYPE KEY_ROPE_SCALING_TYPE = GGUFMetadataKeys.Rope.SCALING_TYPE
KEY_ROPE_SCALING_FACTOR = Keys.Rope.SCALING_FACTOR KEY_ROPE_SCALING_FACTOR = GGUFMetadataKeys.Rope.SCALING_FACTOR
KEY_ROPE_SCALING_ORIG_CTX_LEN = Keys.Rope.SCALING_ORIG_CTX_LEN KEY_ROPE_SCALING_ORIG_CTX_LEN = GGUFMetadataKeys.Rope.SCALING_ORIG_CTX_LEN
KEY_ROPE_SCALING_FINETUNED = Keys.Rope.SCALING_FINETUNED KEY_ROPE_SCALING_FINETUNED = GGUFMetadataKeys.Rope.SCALING_FINETUNED
# SSM # SSM
KEY_SSM_CONV_KERNEL = Keys.SSM.CONV_KERNEL KEY_SSM_CONV_KERNEL = GGUFMetadataKeys.SSM.CONV_KERNEL
KEY_SSM_INNER_SIZE = Keys.SSM.INNER_SIZE KEY_SSM_INNER_SIZE = GGUFMetadataKeys.SSM.INNER_SIZE
KEY_SSM_STATE_SIZE = Keys.SSM.STATE_SIZE KEY_SSM_STATE_SIZE = GGUFMetadataKeys.SSM.STATE_SIZE
KEY_SSM_TIME_STEP_RANK = Keys.SSM.TIME_STEP_RANK KEY_SSM_TIME_STEP_RANK = GGUFMetadataKeys.SSM.TIME_STEP_RANK
# tokenization # tokenization
KEY_TOKENIZER_MODEL = Keys.Tokenizer.MODEL KEY_TOKENIZER_MODEL = GGUFMetadataKeys.Tokenizer.MODEL
KEY_TOKENIZER_TYPE = Keys.Tokenizer.TYPE KEY_TOKENIZER_TYPE = GGUFMetadataKeys.Tokenizer.TYPE
KEY_TOKENIZER_NORM = Keys.Tokenizer.NORM KEY_TOKENIZER_NORM = GGUFMetadataKeys.Tokenizer.NORM
KEY_TOKENIZER_PRE = Keys.Tokenizer.PRE KEY_TOKENIZER_PRE = GGUFMetadataKeys.Tokenizer.PRE
KEY_TOKENIZER_ADDED = Keys.Tokenizer.ADDED KEY_TOKENIZER_ADDED = GGUFMetadataKeys.Tokenizer.ADDED
KEY_TOKENIZER_LIST = Keys.Tokenizer.LIST KEY_TOKENIZER_VOCAB = GGUFMetadataKeys.Tokenizer.VOCAB
KEY_TOKENIZER_TOKEN_TYPE = Keys.Tokenizer.TOKEN_TYPE KEY_TOKENIZER_MERGES = GGUFMetadataKeys.Tokenizer.MERGES
KEY_TOKENIZER_SCORES = Keys.Tokenizer.SCORES KEY_TOKENIZER_TOKEN_TYPE = GGUFMetadataKeys.Tokenizer.TOKEN_TYPE
KEY_TOKENIZER_MERGES = Keys.Tokenizer.MERGES KEY_TOKENIZER_TOKEN_TYPE_COUNT = GGUFMetadataKeys.Tokenizer.TOKEN_TYPE_COUNT
KEY_TOKENIZER_BOS_ID = Keys.Tokenizer.BOS_ID KEY_TOKENIZER_SCORES = GGUFMetadataKeys.Tokenizer.SCORES
KEY_TOKENIZER_EOS_ID = Keys.Tokenizer.EOS_ID KEY_TOKENIZER_BOS_ID = GGUFMetadataKeys.Tokenizer.BOS_ID
KEY_TOKENIZER_UNK_ID = Keys.Tokenizer.UNK_ID KEY_TOKENIZER_EOS_ID = GGUFMetadataKeys.Tokenizer.EOS_ID
KEY_TOKENIZER_SEP_ID = Keys.Tokenizer.SEP_ID KEY_TOKENIZER_UNK_ID = GGUFMetadataKeys.Tokenizer.UNK_ID
KEY_TOKENIZER_PAD_ID = Keys.Tokenizer.PAD_ID KEY_TOKENIZER_SEP_ID = GGUFMetadataKeys.Tokenizer.SEP_ID
KEY_TOKENIZER_CLS_ID = Keys.Tokenizer.CLS_ID KEY_TOKENIZER_PAD_ID = GGUFMetadataKeys.Tokenizer.PAD_ID
KEY_TOKENIZER_MASK_ID = Keys.Tokenizer.MASK_ID KEY_TOKENIZER_CLS_ID = GGUFMetadataKeys.Tokenizer.CLS_ID
KEY_TOKENIZER_HF_JSON = Keys.Tokenizer.HF_JSON KEY_TOKENIZER_MASK_ID = GGUFMetadataKeys.Tokenizer.MASK_ID
KEY_TOKENIZER_RWKV = Keys.Tokenizer.RWKV KEY_TOKENIZER_ADD_BOS = GGUFMetadataKeys.Tokenizer.ADD_BOS
KEY_TOKENIZER_PRIFIX_ID = Keys.Tokenizer.PREFIX_ID KEY_TOKENIZER_ADD_EOS = GGUFMetadataKeys.Tokenizer.ADD_EOS
KEY_TOKENIZER_SUFFIX_ID = Keys.Tokenizer.SUFFIX_ID KEY_TOKENIZER_ADD_PREFIX = GGUFMetadataKeys.Tokenizer.ADD_PREFIX
KEY_TOKENIZER_MIDDLE_ID = Keys.Tokenizer.MIDDLE_ID KEY_TOKENIZER_RWKV = GGUFMetadataKeys.Tokenizer.RWKV
KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID KEY_TOKENIZER_CHAT_TEMPLATE = GGUFMetadataKeys.Tokenizer.CHAT_TEMPLATE
KEY_TOKENIZER_CHAT_TEMPLATE_N = GGUFMetadataKeys.Tokenizer.CHAT_TEMPLATE_N
KEY_TOKENIZER_CHAT_TEMPLATES = GGUFMetadataKeys.Tokenizer.CHAT_TEMPLATES
KEY_TOKENIZER_PRIFIX_ID = GGUFMetadataKeys.Tokenizer.PREFIX_ID
KEY_TOKENIZER_SUFFIX_ID = GGUFMetadataKeys.Tokenizer.SUFFIX_ID
KEY_TOKENIZER_MIDDLE_ID = GGUFMetadataKeys.Tokenizer.MIDDLE_ID
KEY_TOKENIZER_EOT_ID = GGUFMetadataKeys.Tokenizer.EOT_ID