Merge 072d7c96c0
into 17eb6aa8a9
This commit is contained in:
commit
34798e9cd7
2 changed files with 115 additions and 656 deletions
|
@ -13,7 +13,7 @@ import sys
|
||||||
from enum import IntEnum
|
from enum import IntEnum
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from hashlib import sha256
|
from hashlib import sha256
|
||||||
from typing import TYPE_CHECKING, Any, Callable, ContextManager, Iterable, Iterator, Literal, Sequence, TypeVar, cast
|
from typing import TYPE_CHECKING, Any, Callable, ContextManager, Iterable, Iterator, Sequence, TypeVar, cast
|
||||||
|
|
||||||
import math
|
import math
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
@ -265,7 +265,7 @@ class Model:
|
||||||
break
|
break
|
||||||
|
|
||||||
for new_name, data in ((n, d.squeeze().numpy()) for n, d in self.modify_tensors(data_torch, name, bid)):
|
for new_name, data in ((n, d.squeeze().numpy()) for n, d in self.modify_tensors(data_torch, name, bid)):
|
||||||
data: np.ndarray # type hint
|
data: np.ndarray = data # type hint
|
||||||
n_dims = len(data.shape)
|
n_dims = len(data.shape)
|
||||||
data_dtype = data.dtype
|
data_dtype = data.dtype
|
||||||
data_qtype: gguf.GGMLQuantizationType | None = None
|
data_qtype: gguf.GGMLQuantizationType | None = None
|
||||||
|
@ -380,7 +380,7 @@ class Model:
|
||||||
|
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
tokenizer = AutoTokenizer.from_pretrained(self.dir_model)
|
tokenizer = AutoTokenizer.from_pretrained(self.dir_model)
|
||||||
vocab_size = self.hparams.get("vocab_size", len(tokenizer.vocab))
|
vocab_size = self.hparams.get("vocab_size", len(tokenizer.vocab)) #self.hparams.get("vocab_size")
|
||||||
assert max(tokenizer.vocab.values()) < vocab_size
|
assert max(tokenizer.vocab.values()) < vocab_size
|
||||||
|
|
||||||
tokpre = self.get_vocab_base_pre(tokenizer)
|
tokpre = self.get_vocab_base_pre(tokenizer)
|
||||||
|
@ -404,7 +404,7 @@ class Model:
|
||||||
|
|
||||||
return tokens, toktypes, tokpre
|
return tokens, toktypes, tokpre
|
||||||
|
|
||||||
# NOTE: this function is generated by convert_hf_to_gguf_update.py
|
# NOTE: this function is generated by convert-hf-to-gguf-update.py
|
||||||
# do not modify it manually!
|
# do not modify it manually!
|
||||||
# ref: https://github.com/ggerganov/llama.cpp/pull/6920
|
# ref: https://github.com/ggerganov/llama.cpp/pull/6920
|
||||||
# Marker: Start get_vocab_base_pre
|
# Marker: Start get_vocab_base_pre
|
||||||
|
@ -424,87 +424,18 @@ class Model:
|
||||||
|
|
||||||
res = None
|
res = None
|
||||||
|
|
||||||
# NOTE: if you get an error here, you need to update the convert_hf_to_gguf_update.py script
|
# NOTE: if you get an error here, you need to update the convert-hf-to-gguf-update.py script
|
||||||
# or pull the latest version of the model from Huggingface
|
# or pull the latest version of the model from Huggingface
|
||||||
# don't edit the hashes manually!
|
# don't edit the hashes manually!
|
||||||
if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5":
|
|
||||||
# ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B
|
|
||||||
res = "llama-bpe"
|
|
||||||
if chkhsh == "049ecf7629871e3041641907f3de7c733e4dbfdc736f57d882ba0b0845599754":
|
|
||||||
# ref: https://huggingface.co/deepseek-ai/deepseek-llm-7b-base
|
|
||||||
res = "deepseek-llm"
|
|
||||||
if chkhsh == "347715f544604f9118bb75ed199f68779f423cabb20db6de6f31b908d04d7821":
|
|
||||||
# ref: https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base
|
|
||||||
res = "deepseek-coder"
|
|
||||||
if chkhsh == "8aeee3860c56296a157a1fe2fad249ec40aa59b1bb5709f4ade11c4e6fe652ed":
|
|
||||||
# ref: https://huggingface.co/tiiuae/falcon-7b
|
|
||||||
res = "falcon"
|
|
||||||
if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f":
|
|
||||||
# ref: https://huggingface.co/BAAI/bge-small-en-v1.5
|
|
||||||
res = "bert-bge"
|
|
||||||
if chkhsh == "b6dc8df998e1cfbdc4eac8243701a65afe638679230920b50d6f17d81c098166":
|
|
||||||
# ref: https://huggingface.co/mosaicml/mpt-7b
|
|
||||||
res = "mpt"
|
|
||||||
if chkhsh == "35d91631860c815f952d711435f48d356ebac988362536bed955d43bfa436e34":
|
|
||||||
# ref: https://huggingface.co/bigcode/starcoder2-3b
|
|
||||||
res = "starcoder"
|
|
||||||
if chkhsh == "3ce83efda5659b07b1ad37ca97ca5797ea4285d9b9ab0dc679e4a720c9da7454":
|
|
||||||
# ref: https://huggingface.co/openai-community/gpt2
|
|
||||||
res = "gpt-2"
|
|
||||||
if chkhsh == "32d85c31273f8019248f2559fed492d929ea28b17e51d81d3bb36fff23ca72b3":
|
|
||||||
# ref: https://huggingface.co/stabilityai/stablelm-2-zephyr-1_6b
|
|
||||||
res = "stablelm2"
|
|
||||||
if chkhsh == "6221ad2852e85ce96f791f476e0b390cf9b474c9e3d1362f53a24a06dc8220ff":
|
|
||||||
# ref: https://huggingface.co/smallcloudai/Refact-1_6-base
|
|
||||||
res = "refact"
|
|
||||||
if chkhsh == "9c2227e4dd922002fb81bde4fc02b0483ca4f12911410dee2255e4987644e3f8":
|
|
||||||
# ref: https://huggingface.co/CohereForAI/c4ai-command-r-v01
|
|
||||||
res = "command-r"
|
|
||||||
if chkhsh == "e636dc30a262dcc0d8c323492e32ae2b70728f4df7dfe9737d9f920a282b8aea":
|
|
||||||
# ref: https://huggingface.co/Qwen/Qwen1.5-7B
|
|
||||||
res = "qwen2"
|
|
||||||
if chkhsh == "b6dc8df998e1cfbdc4eac8243701a65afe638679230920b50d6f17d81c098166":
|
|
||||||
# ref: https://huggingface.co/allenai/OLMo-1.7-7B-hf
|
|
||||||
res = "olmo"
|
|
||||||
if chkhsh == "a8594e3edff7c29c003940395316294b2c623e09894deebbc65f33f1515df79e":
|
|
||||||
# ref: https://huggingface.co/databricks/dbrx-base
|
|
||||||
res = "dbrx"
|
|
||||||
if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f":
|
|
||||||
# ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-en
|
|
||||||
res = "jina-v2-en"
|
|
||||||
if chkhsh == "171aeeedd6fb548d418a7461d053f11b6f1f1fc9b387bd66640d28a4b9f5c643":
|
|
||||||
# ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-es
|
|
||||||
res = "jina-v2-es"
|
|
||||||
if chkhsh == "27949a2493fc4a9f53f5b9b029c82689cfbe5d3a1929bb25e043089e28466de6":
|
|
||||||
# ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-de
|
|
||||||
res = "jina-v2-de"
|
|
||||||
if chkhsh == "c136ed14d01c2745d4f60a9596ae66800e2b61fa45643e72436041855ad4089d":
|
|
||||||
# ref: https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct
|
|
||||||
res = "smaug-bpe"
|
|
||||||
if chkhsh == "c7ea5862a53e4272c035c8238367063e2b270d51faa48c0f09e9d5b54746c360":
|
|
||||||
# ref: https://huggingface.co/LumiOpen/Poro-34B-chat
|
|
||||||
res = "poro-chat"
|
|
||||||
if chkhsh == "7967bfa498ade6b757b064f31e964dddbb80f8f9a4d68d4ba7998fcf281c531a":
|
|
||||||
# ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-code
|
|
||||||
res = "jina-v2-code"
|
|
||||||
if chkhsh == "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b":
|
|
||||||
# ref: https://huggingface.co/THUDM/glm-4-9b-chat
|
|
||||||
res = "chatglm-bpe"
|
|
||||||
if chkhsh == "7fc505bd3104ca1083b150b17d088b59534ede9bde81f0dd2090967d7fe52cee":
|
|
||||||
# ref: https://huggingface.co/LumiOpen/Viking-7B
|
|
||||||
res = "viking"
|
|
||||||
if chkhsh == "b53802fb28e26d645c3a310b34bfe07da813026ec7c7716883404d5e0f8b1901":
|
|
||||||
# ref: https://huggingface.co/core42/jais-13b
|
|
||||||
res = "jais"
|
|
||||||
|
|
||||||
if res is None:
|
if res is None:
|
||||||
logger.warning("\n")
|
logger.warning("\n")
|
||||||
logger.warning("**************************************************************************************")
|
logger.warning("**************************************************************************************")
|
||||||
logger.warning("** WARNING: The BPE pre-tokenizer was not recognized!")
|
logger.warning("** WARNING: The BPE pre-tokenizer was not recognized!")
|
||||||
logger.warning("** There are 2 possible reasons for this:")
|
logger.warning("** There are 2 possible reasons for this:")
|
||||||
logger.warning("** - the model has not been added to convert_hf_to_gguf_update.py yet")
|
logger.warning("** - the model has not been added to convert-hf-to-gguf-update.py yet")
|
||||||
logger.warning("** - the pre-tokenization config has changed upstream")
|
logger.warning("** - the pre-tokenization config has changed upstream")
|
||||||
logger.warning("** Check your model files and convert_hf_to_gguf_update.py and update them accordingly.")
|
logger.warning("** Check your model files and convert-hf-to-gguf-update.py and update them accordingly.")
|
||||||
logger.warning("** ref: https://github.com/ggerganov/llama.cpp/pull/6920")
|
logger.warning("** ref: https://github.com/ggerganov/llama.cpp/pull/6920")
|
||||||
logger.warning("**")
|
logger.warning("**")
|
||||||
logger.warning(f"** chkhsh: {chkhsh}")
|
logger.warning(f"** chkhsh: {chkhsh}")
|
||||||
|
@ -582,23 +513,15 @@ class Model:
|
||||||
special_vocab._set_special_token("unk", tokenizer.special_tokens["<|endoftext|>"])
|
special_vocab._set_special_token("unk", tokenizer.special_tokens["<|endoftext|>"])
|
||||||
special_vocab.add_to_gguf(self.gguf_writer)
|
special_vocab.add_to_gguf(self.gguf_writer)
|
||||||
|
|
||||||
def _set_vocab_sentencepiece(self, add_to_gguf=True):
|
def _set_vocab_sentencepiece(self):
|
||||||
tokens, scores, toktypes = self._create_vocab_sentencepiece()
|
|
||||||
|
|
||||||
self.gguf_writer.add_tokenizer_model("llama")
|
|
||||||
self.gguf_writer.add_tokenizer_pre("default")
|
|
||||||
self.gguf_writer.add_token_list(tokens)
|
|
||||||
self.gguf_writer.add_token_scores(scores)
|
|
||||||
self.gguf_writer.add_token_types(toktypes)
|
|
||||||
|
|
||||||
special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
|
|
||||||
special_vocab.add_to_gguf(self.gguf_writer)
|
|
||||||
|
|
||||||
def _create_vocab_sentencepiece(self):
|
|
||||||
from sentencepiece import SentencePieceProcessor
|
from sentencepiece import SentencePieceProcessor
|
||||||
|
|
||||||
tokenizer_path = self.dir_model / 'tokenizer.model'
|
tokenizer_path = self.dir_model / 'tokenizer.model'
|
||||||
|
|
||||||
|
tokens: list[bytes] = []
|
||||||
|
scores: list[float] = []
|
||||||
|
toktypes: list[int] = []
|
||||||
|
|
||||||
if not tokenizer_path.is_file():
|
if not tokenizer_path.is_file():
|
||||||
raise FileNotFoundError(f"File not found: {tokenizer_path}")
|
raise FileNotFoundError(f"File not found: {tokenizer_path}")
|
||||||
|
|
||||||
|
@ -652,7 +575,14 @@ class Model:
|
||||||
scores.append(-1000.0)
|
scores.append(-1000.0)
|
||||||
toktypes.append(SentencePieceTokenTypes.UNUSED)
|
toktypes.append(SentencePieceTokenTypes.UNUSED)
|
||||||
|
|
||||||
return tokens, scores, toktypes
|
self.gguf_writer.add_tokenizer_model("llama")
|
||||||
|
self.gguf_writer.add_tokenizer_pre("default")
|
||||||
|
self.gguf_writer.add_token_list(tokens)
|
||||||
|
self.gguf_writer.add_token_scores(scores)
|
||||||
|
self.gguf_writer.add_token_types(toktypes)
|
||||||
|
|
||||||
|
special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
|
||||||
|
special_vocab.add_to_gguf(self.gguf_writer)
|
||||||
|
|
||||||
def _set_vocab_llama_hf(self):
|
def _set_vocab_llama_hf(self):
|
||||||
vocab = gguf.LlamaHfVocab(self.dir_model)
|
vocab = gguf.LlamaHfVocab(self.dir_model)
|
||||||
|
@ -676,51 +606,6 @@ class Model:
|
||||||
special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
|
special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
|
||||||
special_vocab.add_to_gguf(self.gguf_writer)
|
special_vocab.add_to_gguf(self.gguf_writer)
|
||||||
|
|
||||||
def _set_vocab_builtin(self, model_name: Literal["gpt-neox", "llama-spm"], vocab_size: int):
|
|
||||||
tokenizer_path = Path(sys.path[0]) / "models" / f"ggml-vocab-{model_name}.gguf"
|
|
||||||
logger.warning(f"Using tokenizer from '{os.path.relpath(tokenizer_path, os.getcwd())}'")
|
|
||||||
vocab_reader = gguf.GGUFReader(tokenizer_path, "r")
|
|
||||||
|
|
||||||
default_pre = "mpt" if model_name == "gpt-neox" else "default"
|
|
||||||
|
|
||||||
field = vocab_reader.get_field(gguf.Keys.Tokenizer.MODEL)
|
|
||||||
assert field # tokenizer model
|
|
||||||
self.gguf_writer.add_tokenizer_model(bytes(field.parts[-1]).decode("utf-8"))
|
|
||||||
|
|
||||||
field = vocab_reader.get_field(gguf.Keys.Tokenizer.PRE)
|
|
||||||
self.gguf_writer.add_tokenizer_pre(bytes(field.parts[-1]).decode("utf-8") if field else default_pre)
|
|
||||||
|
|
||||||
field = vocab_reader.get_field(gguf.Keys.Tokenizer.LIST)
|
|
||||||
assert field # token list
|
|
||||||
self.gguf_writer.add_token_list([bytes(field.parts[i]) for i in field.data][:vocab_size])
|
|
||||||
|
|
||||||
if model_name == "llama-spm":
|
|
||||||
field = vocab_reader.get_field(gguf.Keys.Tokenizer.SCORES)
|
|
||||||
assert field # token scores
|
|
||||||
self.gguf_writer.add_token_scores([field.parts[i].tolist()[0] for i in field.data][:vocab_size])
|
|
||||||
|
|
||||||
field = vocab_reader.get_field(gguf.Keys.Tokenizer.TOKEN_TYPE)
|
|
||||||
assert field # token types
|
|
||||||
self.gguf_writer.add_token_types([field.parts[i].tolist()[0] for i in field.data][:vocab_size])
|
|
||||||
|
|
||||||
if model_name != "llama-spm":
|
|
||||||
field = vocab_reader.get_field(gguf.Keys.Tokenizer.MERGES)
|
|
||||||
assert field # token merges
|
|
||||||
self.gguf_writer.add_token_merges([bytes(field.parts[i]) for i in field.data])
|
|
||||||
|
|
||||||
if (field := vocab_reader.get_field(gguf.Keys.Tokenizer.BOS_ID)) is not None:
|
|
||||||
self.gguf_writer.add_bos_token_id(field.parts[-1].tolist()[0])
|
|
||||||
if (field := vocab_reader.get_field(gguf.Keys.Tokenizer.EOS_ID)) is not None:
|
|
||||||
self.gguf_writer.add_eos_token_id(field.parts[-1].tolist()[0])
|
|
||||||
if (field := vocab_reader.get_field(gguf.Keys.Tokenizer.UNK_ID)) is not None:
|
|
||||||
self.gguf_writer.add_unk_token_id(field.parts[-1].tolist()[0])
|
|
||||||
if (field := vocab_reader.get_field(gguf.Keys.Tokenizer.PAD_ID)) is not None:
|
|
||||||
self.gguf_writer.add_pad_token_id(field.parts[-1].tolist()[0])
|
|
||||||
if (field := vocab_reader.get_field(gguf.Keys.Tokenizer.ADD_BOS)) is not None:
|
|
||||||
self.gguf_writer.add_add_bos_token(field.parts[-1].tolist()[0])
|
|
||||||
if (field := vocab_reader.get_field(gguf.Keys.Tokenizer.ADD_EOS)) is not None:
|
|
||||||
self.gguf_writer.add_add_eos_token(field.parts[-1].tolist()[0])
|
|
||||||
|
|
||||||
|
|
||||||
@Model.register("GPTNeoXForCausalLM")
|
@Model.register("GPTNeoXForCausalLM")
|
||||||
class GPTNeoXModel(Model):
|
class GPTNeoXModel(Model):
|
||||||
|
@ -1985,7 +1870,7 @@ class Phi3MiniModel(Model):
|
||||||
if len(rope_scaling_type) == 0:
|
if len(rope_scaling_type) == 0:
|
||||||
raise KeyError('Missing the required key rope_scaling.type')
|
raise KeyError('Missing the required key rope_scaling.type')
|
||||||
|
|
||||||
if rope_scaling_type == 'su' or rope_scaling_type == 'longrope':
|
if rope_scaling_type == 'su':
|
||||||
attn_factor = math.sqrt(1 + math.log(scale) / math.log(orig_max_pos_embds)) if scale > 1.0 else 1.0
|
attn_factor = math.sqrt(1 + math.log(scale) / math.log(orig_max_pos_embds)) if scale > 1.0 else 1.0
|
||||||
elif rope_scaling_type == 'yarn':
|
elif rope_scaling_type == 'yarn':
|
||||||
attn_factor = 0.1 * math.log(scale) + 1.0 if scale > 1.0 else 1.0
|
attn_factor = 0.1 * math.log(scale) + 1.0 if scale > 1.0 else 1.0
|
||||||
|
@ -2115,7 +2000,7 @@ class InternLM2Model(Model):
|
||||||
logger.error(f'Error: Missing {tokenizer_path}')
|
logger.error(f'Error: Missing {tokenizer_path}')
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
sentencepiece_model = model.ModelProto() # pyright: ignore[reportAttributeAccessIssue]
|
sentencepiece_model = model.ModelProto()
|
||||||
sentencepiece_model.ParseFromString(open(tokenizer_path, "rb").read())
|
sentencepiece_model.ParseFromString(open(tokenizer_path, "rb").read())
|
||||||
add_prefix = sentencepiece_model.normalizer_spec.add_dummy_prefix
|
add_prefix = sentencepiece_model.normalizer_spec.add_dummy_prefix
|
||||||
|
|
||||||
|
@ -2395,8 +2280,6 @@ class GemmaModel(Model):
|
||||||
special_vocab._set_special_token("eot", 107)
|
special_vocab._set_special_token("eot", 107)
|
||||||
special_vocab.add_to_gguf(self.gguf_writer)
|
special_vocab.add_to_gguf(self.gguf_writer)
|
||||||
|
|
||||||
self.gguf_writer.add_add_space_prefix(False)
|
|
||||||
|
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
hparams = self.hparams
|
hparams = self.hparams
|
||||||
block_count = hparams["num_hidden_layers"]
|
block_count = hparams["num_hidden_layers"]
|
||||||
|
@ -2429,71 +2312,6 @@ class GemmaModel(Model):
|
||||||
return [(self.map_tensor_name(name), data_torch)]
|
return [(self.map_tensor_name(name), data_torch)]
|
||||||
|
|
||||||
|
|
||||||
@Model.register("Gemma2ForCausalLM")
|
|
||||||
class Gemma2Model(Model):
|
|
||||||
model_arch = gguf.MODEL_ARCH.GEMMA2
|
|
||||||
|
|
||||||
def set_vocab(self):
|
|
||||||
tokens, scores, toktypes = self._create_vocab_sentencepiece()
|
|
||||||
# hack: This is required so that we can properly use start/end-of-turn for chat template
|
|
||||||
for i in range(108):
|
|
||||||
# including <unusedX>, <start_of_turn>, <end_of_turn>
|
|
||||||
toktypes[i] = SentencePieceTokenTypes.CONTROL
|
|
||||||
self.gguf_writer.add_tokenizer_model("llama")
|
|
||||||
self.gguf_writer.add_tokenizer_pre("default")
|
|
||||||
self.gguf_writer.add_token_list(tokens)
|
|
||||||
self.gguf_writer.add_token_scores(scores)
|
|
||||||
self.gguf_writer.add_token_types(toktypes)
|
|
||||||
|
|
||||||
special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
|
|
||||||
special_vocab.add_to_gguf(self.gguf_writer)
|
|
||||||
|
|
||||||
self.gguf_writer.add_add_space_prefix(False)
|
|
||||||
|
|
||||||
def set_gguf_parameters(self):
|
|
||||||
hparams = self.hparams
|
|
||||||
block_count = hparams["num_hidden_layers"]
|
|
||||||
|
|
||||||
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
|
|
||||||
self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
|
|
||||||
self.gguf_writer.add_embedding_length(hparams["hidden_size"])
|
|
||||||
self.gguf_writer.add_block_count(block_count)
|
|
||||||
self.gguf_writer.add_feed_forward_length(hparams["intermediate_size"])
|
|
||||||
self.gguf_writer.add_head_count(hparams["num_attention_heads"])
|
|
||||||
self.gguf_writer.add_head_count_kv(self.hparams["num_key_value_heads"] if "num_key_value_heads" in hparams else hparams["num_attention_heads"])
|
|
||||||
self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
|
|
||||||
self.gguf_writer.add_key_length(hparams["head_dim"])
|
|
||||||
self.gguf_writer.add_value_length(hparams["head_dim"])
|
|
||||||
self.gguf_writer.add_file_type(self.ftype)
|
|
||||||
self.gguf_writer.add_attn_logit_softcapping(
|
|
||||||
self.hparams["attn_logit_softcapping"]
|
|
||||||
)
|
|
||||||
self.gguf_writer.add_final_logit_softcapping(
|
|
||||||
self.hparams["final_logit_softcapping"]
|
|
||||||
)
|
|
||||||
self.gguf_writer.add_sliding_window(self.hparams["sliding_window"])
|
|
||||||
|
|
||||||
# sanity check
|
|
||||||
attn_scalar = self.hparams["query_pre_attn_scalar"]
|
|
||||||
if attn_scalar != hparams["hidden_size"] / hparams["num_attention_heads"]:
|
|
||||||
raise ValueError("query_pre_attn_scalar must be equal to n_embd / n_head")
|
|
||||||
|
|
||||||
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
|
||||||
del bid # unused
|
|
||||||
|
|
||||||
# lm_head is not used in llama.cpp, while autoawq will include this tensor in model
|
|
||||||
# To prevent errors, skip loading lm_head.weight.
|
|
||||||
if name == "lm_head.weight":
|
|
||||||
logger.debug(f"Skipping get tensor {name!r} in safetensors so that convert can end normally.")
|
|
||||||
return []
|
|
||||||
|
|
||||||
# ref: https://github.com/huggingface/transformers/blob/fc37f38915372c15992b540dfcbbe00a916d4fc6/src/transformers/models/gemma/modeling_gemma.py#L89
|
|
||||||
if name.endswith("norm.weight"):
|
|
||||||
data_torch = data_torch + 1
|
|
||||||
|
|
||||||
return [(self.map_tensor_name(name), data_torch)]
|
|
||||||
|
|
||||||
|
|
||||||
@Model.register("Starcoder2ForCausalLM")
|
@Model.register("Starcoder2ForCausalLM")
|
||||||
class StarCoder2Model(Model):
|
class StarCoder2Model(Model):
|
||||||
model_arch = gguf.MODEL_ARCH.STARCODER2
|
model_arch = gguf.MODEL_ARCH.STARCODER2
|
||||||
|
@ -2518,7 +2336,39 @@ class MambaModel(Model):
|
||||||
self._set_vocab_sentencepiece()
|
self._set_vocab_sentencepiece()
|
||||||
else:
|
else:
|
||||||
# Use the GPT-NeoX tokenizer when no tokenizer files are present
|
# Use the GPT-NeoX tokenizer when no tokenizer files are present
|
||||||
self._set_vocab_builtin("gpt-neox", vocab_size)
|
tokenizer_path = Path(sys.path[0]) / "models" / "ggml-vocab-gpt-neox.gguf"
|
||||||
|
logger.warning(f"Using tokenizer from '{os.path.relpath(tokenizer_path, os.getcwd())}'")
|
||||||
|
neox_reader = gguf.GGUFReader(tokenizer_path, "r")
|
||||||
|
|
||||||
|
field = neox_reader.get_field(gguf.Keys.Tokenizer.MODEL)
|
||||||
|
self.gguf_writer.add_tokenizer_model(bytes(field.parts[-1]).decode("utf-8") if field else "gpt2")
|
||||||
|
|
||||||
|
field = neox_reader.get_field(gguf.Keys.Tokenizer.PRE)
|
||||||
|
self.gguf_writer.add_tokenizer_pre(bytes(field.parts[-1]).decode("utf-8") if field else "mpt")
|
||||||
|
|
||||||
|
field = neox_reader.get_field(gguf.Keys.Tokenizer.LIST)
|
||||||
|
assert field
|
||||||
|
self.gguf_writer.add_token_list([bytes(field.parts[i]) for i in field.data][:vocab_size])
|
||||||
|
|
||||||
|
field = neox_reader.get_field(gguf.Keys.Tokenizer.TOKEN_TYPE)
|
||||||
|
assert field
|
||||||
|
self.gguf_writer.add_token_types([field.parts[i].tolist()[0] for i in field.data][:vocab_size])
|
||||||
|
|
||||||
|
field = neox_reader.get_field(gguf.Keys.Tokenizer.MERGES)
|
||||||
|
assert field
|
||||||
|
self.gguf_writer.add_token_merges([bytes(field.parts[i]) for i in field.data])
|
||||||
|
|
||||||
|
field = neox_reader.get_field(gguf.Keys.Tokenizer.BOS_ID)
|
||||||
|
self.gguf_writer.add_bos_token_id(field.parts[-1].tolist()[0] if field else 1)
|
||||||
|
|
||||||
|
field = neox_reader.get_field(gguf.Keys.Tokenizer.EOS_ID)
|
||||||
|
self.gguf_writer.add_eos_token_id(field.parts[-1].tolist()[0] if field else 0)
|
||||||
|
|
||||||
|
field = neox_reader.get_field(gguf.Keys.Tokenizer.UNK_ID)
|
||||||
|
self.gguf_writer.add_unk_token_id(field.parts[-1].tolist()[0] if field else 0)
|
||||||
|
|
||||||
|
field = neox_reader.get_field(gguf.Keys.Tokenizer.PAD_ID)
|
||||||
|
self.gguf_writer.add_pad_token_id(field.parts[-1].tolist()[0] if field else 0)
|
||||||
|
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
d_model = self.find_hparam(["hidden_size", "d_model"])
|
d_model = self.find_hparam(["hidden_size", "d_model"])
|
||||||
|
@ -2670,82 +2520,6 @@ class JinaBertV2Model(BertModel):
|
||||||
self.gguf_writer.add_add_eos_token(True)
|
self.gguf_writer.add_add_eos_token(True)
|
||||||
|
|
||||||
|
|
||||||
@Model.register("OpenELMForCausalLM")
|
|
||||||
class OpenELMModel(Model):
|
|
||||||
model_arch = gguf.MODEL_ARCH.OPENELM
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _make_divisible(v: float | int, divisor: int) -> int:
|
|
||||||
# ref: https://huggingface.co/apple/OpenELM-270M-Instruct/blob/eb111ff2e6724348e5b905984063d4064d4bc579/configuration_openelm.py#L34-L38
|
|
||||||
new_v = max(divisor, int(v + divisor / 2) // divisor * divisor)
|
|
||||||
# Make sure that round down does not go down by more than 10%.
|
|
||||||
if new_v < 0.9 * v:
|
|
||||||
new_v += divisor
|
|
||||||
return new_v
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
super().__init__(*args, **kwargs)
|
|
||||||
|
|
||||||
ffn_multipliers: list[float] = self.hparams["ffn_multipliers"]
|
|
||||||
ffn_dim_divisor: int = self.hparams["ffn_dim_divisor"]
|
|
||||||
self._n_embd: int = self.hparams["model_dim"]
|
|
||||||
self._num_kv_heads: list[int] = self.hparams["num_kv_heads"]
|
|
||||||
self._num_query_heads: list[int] = self.hparams["num_query_heads"]
|
|
||||||
self._ffn_dims: list[int] = [
|
|
||||||
OpenELMModel._make_divisible(multiplier * self._n_embd, ffn_dim_divisor)
|
|
||||||
for multiplier in ffn_multipliers
|
|
||||||
]
|
|
||||||
assert isinstance(self._num_kv_heads, list) and isinstance(self._num_kv_heads[0], int)
|
|
||||||
assert isinstance(self._num_query_heads, list) and isinstance(self._num_query_heads[0], int)
|
|
||||||
|
|
||||||
# Uses the tokenizer from meta-llama/Llama-2-7b-hf
|
|
||||||
def set_vocab(self):
|
|
||||||
try:
|
|
||||||
self._set_vocab_sentencepiece()
|
|
||||||
except FileNotFoundError:
|
|
||||||
self._set_vocab_builtin("llama-spm", self.hparams["vocab_size"])
|
|
||||||
|
|
||||||
def set_gguf_parameters(self):
|
|
||||||
n_embd = self._n_embd
|
|
||||||
head_dim = self.hparams["head_dim"]
|
|
||||||
rot_pct = 1.0
|
|
||||||
assert self.block_count == len(self._num_kv_heads)
|
|
||||||
assert self.block_count == len(self._num_query_heads)
|
|
||||||
assert self.block_count == len(self._ffn_dims)
|
|
||||||
|
|
||||||
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
|
|
||||||
self.gguf_writer.add_block_count(self.block_count)
|
|
||||||
self.gguf_writer.add_context_length(self.hparams["max_context_length"])
|
|
||||||
self.gguf_writer.add_embedding_length(n_embd)
|
|
||||||
self.gguf_writer.add_feed_forward_length(self._ffn_dims)
|
|
||||||
self.gguf_writer.add_head_count(self._num_query_heads)
|
|
||||||
self.gguf_writer.add_head_count_kv(self._num_kv_heads)
|
|
||||||
self.gguf_writer.add_rope_freq_base(self.hparams["rope_freq_constant"])
|
|
||||||
# https://huggingface.co/apple/OpenELM-270M-Instruct/blob/c401df2/modeling_openelm.py#L30
|
|
||||||
self.gguf_writer.add_layer_norm_rms_eps(1e-6)
|
|
||||||
self.gguf_writer.add_rope_dimension_count(int(rot_pct * head_dim))
|
|
||||||
self.gguf_writer.add_key_length(head_dim)
|
|
||||||
self.gguf_writer.add_value_length(head_dim)
|
|
||||||
self.gguf_writer.add_file_type(self.ftype)
|
|
||||||
|
|
||||||
def find_hparam(self, keys: Iterable[str], optional: bool = False) -> Any:
|
|
||||||
if "n_layers" in keys:
|
|
||||||
return self.hparams["num_transformer_layers"]
|
|
||||||
|
|
||||||
return super().find_hparam(keys, optional)
|
|
||||||
|
|
||||||
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
|
||||||
|
|
||||||
# split ff
|
|
||||||
if bid is not None and name == f"transformer.layers.{bid}.ffn.proj_1.weight":
|
|
||||||
ff_dim = self._ffn_dims[bid]
|
|
||||||
yield (self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE, bid), data_torch[:ff_dim])
|
|
||||||
yield (self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP, bid), data_torch[ff_dim:])
|
|
||||||
return
|
|
||||||
|
|
||||||
yield (self.map_tensor_name(name), data_torch)
|
|
||||||
|
|
||||||
|
|
||||||
@Model.register("ArcticForCausalLM")
|
@Model.register("ArcticForCausalLM")
|
||||||
class ArcticModel(Model):
|
class ArcticModel(Model):
|
||||||
model_arch = gguf.MODEL_ARCH.ARCTIC
|
model_arch = gguf.MODEL_ARCH.ARCTIC
|
||||||
|
@ -2976,17 +2750,11 @@ class DeepseekV2Model(Model):
|
||||||
raise ValueError(f"Unprocessed experts: {experts}")
|
raise ValueError(f"Unprocessed experts: {experts}")
|
||||||
|
|
||||||
|
|
||||||
@Model.register("T5WithLMHeadModel")
|
|
||||||
@Model.register("T5ForConditionalGeneration")
|
@Model.register("T5ForConditionalGeneration")
|
||||||
@Model.register("MT5ForConditionalGeneration")
|
@Model.register("T5WithLMHeadModel")
|
||||||
@Model.register("UMT5ForConditionalGeneration")
|
|
||||||
class T5Model(Model):
|
class T5Model(Model):
|
||||||
model_arch = gguf.MODEL_ARCH.T5
|
model_arch = gguf.MODEL_ARCH.T5
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
super().__init__(*args, **kwargs)
|
|
||||||
self.shared_token_embeddings_found = False
|
|
||||||
|
|
||||||
def set_vocab(self):
|
def set_vocab(self):
|
||||||
# to avoid TypeError: Descriptors cannot be created directly
|
# to avoid TypeError: Descriptors cannot be created directly
|
||||||
# exception when importing sentencepiece_model_pb2
|
# exception when importing sentencepiece_model_pb2
|
||||||
|
@ -2994,29 +2762,17 @@ class T5Model(Model):
|
||||||
from sentencepiece import SentencePieceProcessor
|
from sentencepiece import SentencePieceProcessor
|
||||||
from sentencepiece import sentencepiece_model_pb2 as model
|
from sentencepiece import sentencepiece_model_pb2 as model
|
||||||
|
|
||||||
tokenizer_path = self.dir_model / 'tokenizer.model'
|
tokenizer_path = self.dir_model / 'spiece.model'
|
||||||
|
|
||||||
# many older models use spiece.model tokenizer model filename
|
|
||||||
if not tokenizer_path.is_file():
|
|
||||||
tokenizer_path = self.dir_model / 'spiece.model'
|
|
||||||
|
|
||||||
if not tokenizer_path.is_file():
|
if not tokenizer_path.is_file():
|
||||||
raise FileNotFoundError(f"File not found: {tokenizer_path}")
|
raise FileNotFoundError(f"File not found: {tokenizer_path}")
|
||||||
|
|
||||||
sentencepiece_model = model.ModelProto() # pyright: ignore[reportAttributeAccessIssue]
|
sentencepiece_model = model.ModelProto()
|
||||||
sentencepiece_model.ParseFromString(open(tokenizer_path, "rb").read())
|
sentencepiece_model.ParseFromString(open(tokenizer_path, "rb").read())
|
||||||
|
|
||||||
# some models like Pile-T5 family use BPE tokenizer instead of Unigram
|
|
||||||
if sentencepiece_model.trainer_spec.model_type == 2: # BPE
|
|
||||||
# assure the tokenizer model file name is correct
|
|
||||||
assert tokenizer_path.name == 'tokenizer.model'
|
|
||||||
return self._set_vocab_sentencepiece()
|
|
||||||
else:
|
|
||||||
assert sentencepiece_model.trainer_spec.model_type == 1 # UNIGRAM
|
|
||||||
|
|
||||||
add_prefix = sentencepiece_model.normalizer_spec.add_dummy_prefix
|
add_prefix = sentencepiece_model.normalizer_spec.add_dummy_prefix
|
||||||
remove_whitespaces = sentencepiece_model.normalizer_spec.remove_extra_whitespaces
|
remove_whitespaces = sentencepiece_model.normalizer_spec.remove_extra_whitespaces
|
||||||
precompiled_charsmap = sentencepiece_model.normalizer_spec.precompiled_charsmap
|
precompiled_charsmap = sentencepiece_model.normalizer_spec.precompiled_charsmap
|
||||||
|
assert sentencepiece_model.trainer_spec.model_type == 1 # UNIGRAM
|
||||||
|
|
||||||
tokenizer = SentencePieceProcessor()
|
tokenizer = SentencePieceProcessor()
|
||||||
tokenizer.LoadFromFile(str(tokenizer_path))
|
tokenizer.LoadFromFile(str(tokenizer_path))
|
||||||
|
@ -3086,10 +2842,7 @@ class T5Model(Model):
|
||||||
|
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
self.gguf_writer.add_name("T5")
|
self.gguf_writer.add_name("T5")
|
||||||
if (n_ctx := self.find_hparam(["n_positions"], optional=True)) is None:
|
self.gguf_writer.add_context_length(self.hparams["n_positions"])
|
||||||
logger.warning("Couldn't find context length in config.json, assuming default value of 512")
|
|
||||||
n_ctx = 512
|
|
||||||
self.gguf_writer.add_context_length(n_ctx)
|
|
||||||
self.gguf_writer.add_embedding_length(self.hparams["d_model"])
|
self.gguf_writer.add_embedding_length(self.hparams["d_model"])
|
||||||
self.gguf_writer.add_feed_forward_length(self.hparams["d_ff"])
|
self.gguf_writer.add_feed_forward_length(self.hparams["d_ff"])
|
||||||
self.gguf_writer.add_block_count(self.hparams["num_layers"])
|
self.gguf_writer.add_block_count(self.hparams["num_layers"])
|
||||||
|
@ -3105,295 +2858,16 @@ class T5Model(Model):
|
||||||
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
||||||
del bid # unused
|
del bid # unused
|
||||||
|
|
||||||
# T5 based models contain shared token embeddings tensors saved randomly as either "encoder.embed_tokens.weight",
|
# Sometimes T5 and Flan-T5 based models contain "encoder.embed_tokens.weight" tensor or
|
||||||
# "decoder.embed_tokens.weight" or "shared.weight" tensor. In some models there are even multiple of them stored
|
# "decoder.embed_tokens.weight" tensors that are duplicates of "shared.weight" tensor
|
||||||
# in the safetensors files. We use the first tensor from these three as the token embeddings for both encoder
|
# To prevent errors caused by an unnecessary unmapped tensor, skip both of them and use only "shared.weight".
|
||||||
# and decoder and ignore the remaining ones.
|
if name == "decoder.embed_tokens.weight" or name == "encoder.embed_tokens.weight":
|
||||||
if name in ["decoder.embed_tokens.weight", "encoder.embed_tokens.weight", "shared.weight"]:
|
logger.debug(f"Skipping tensor {name!r} in safetensors so that convert can end normally.")
|
||||||
if not self.shared_token_embeddings_found:
|
|
||||||
name = "shared.weight"
|
|
||||||
self.shared_token_embeddings_found = True
|
|
||||||
else:
|
|
||||||
logger.debug(f"Skipping shared tensor {name!r} in safetensors so that convert can end normally.")
|
|
||||||
return []
|
|
||||||
|
|
||||||
return [(self.map_tensor_name(name), data_torch)]
|
|
||||||
|
|
||||||
|
|
||||||
@Model.register("JAISLMHeadModel")
|
|
||||||
class JaisModel(Model):
|
|
||||||
model_arch = gguf.MODEL_ARCH.JAIS
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
super().__init__(*args, **kwargs)
|
|
||||||
|
|
||||||
# SwigLU activation
|
|
||||||
assert self.hparams["activation_function"] == "swiglu"
|
|
||||||
# ALiBi position embedding
|
|
||||||
assert self.hparams["position_embedding_type"] == "alibi"
|
|
||||||
|
|
||||||
# Embeddings scale
|
|
||||||
self.embeddings_scale = 1.0
|
|
||||||
# note: For some JAIS flavors, output is tied to (same as) wte in original model
|
|
||||||
self.output_is_wte = False
|
|
||||||
if 'mup_embeddings_scale' in self.hparams:
|
|
||||||
self.output_is_wte = True # Hack (?)
|
|
||||||
self.embeddings_scale = self.hparams['mup_embeddings_scale']
|
|
||||||
elif 'embeddings_scale' in self.hparams:
|
|
||||||
self.embeddings_scale = self.hparams['embeddings_scale']
|
|
||||||
else:
|
|
||||||
assert False
|
|
||||||
|
|
||||||
self.width_scale = 1.0
|
|
||||||
if 'mup_output_alpha' in self.hparams:
|
|
||||||
assert 'mup_width_scale' in self.hparams
|
|
||||||
self.width_scale = self.hparams['mup_output_alpha'] * self.hparams['mup_width_scale']
|
|
||||||
elif 'width_scale' in self.hparams:
|
|
||||||
self.width_scale = self.hparams['width_scale']
|
|
||||||
else:
|
|
||||||
assert False
|
|
||||||
|
|
||||||
self.max_alibi_bias = 8.0
|
|
||||||
|
|
||||||
def set_vocab(self):
|
|
||||||
self._set_vocab_gpt2()
|
|
||||||
|
|
||||||
def set_gguf_parameters(self):
|
|
||||||
self.gguf_writer.add_name(self.dir_model.name)
|
|
||||||
self.gguf_writer.add_block_count(self.hparams["n_layer"])
|
|
||||||
self.gguf_writer.add_context_length(self.hparams["n_positions"])
|
|
||||||
self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
|
|
||||||
self.gguf_writer.add_feed_forward_length(self.hparams["n_inner"])
|
|
||||||
self.gguf_writer.add_head_count(self.hparams["n_head"])
|
|
||||||
self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
|
|
||||||
self.gguf_writer.add_file_type(self.ftype)
|
|
||||||
|
|
||||||
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
|
||||||
del bid # unused
|
|
||||||
|
|
||||||
tensors: list[tuple[str, Tensor]] = []
|
|
||||||
|
|
||||||
# we don't need these
|
|
||||||
if name.endswith((".attn.bias")):
|
|
||||||
return tensors
|
|
||||||
|
|
||||||
if name.endswith(("relative_pe.slopes")):
|
|
||||||
# Calculate max ALiBi bias (this is the inverse of the ALiBi calculation)
|
|
||||||
# Some other models has max_alibi_bias spelled out explicitly in the hyperparams,
|
|
||||||
# but Jais's PyTorch model simply precalculates the slope values and places them
|
|
||||||
# in relative_pes.slopes
|
|
||||||
n_head_closest_log2 = 2 ** math.floor(math.log2(self.hparams["n_head"]))
|
|
||||||
first_val = float(data_torch[0].item())
|
|
||||||
self.max_alibi_bias = -round(math.log2(first_val) * n_head_closest_log2)
|
|
||||||
|
|
||||||
return tensors
|
|
||||||
|
|
||||||
if name.endswith((".c_attn.weight", ".c_proj.weight", ".c_fc.weight", ".c_fc2.weight")):
|
|
||||||
data_torch = data_torch.transpose(1, 0)
|
|
||||||
|
|
||||||
new_name = self.map_tensor_name(name)
|
|
||||||
|
|
||||||
if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD):
|
|
||||||
tensors.append((new_name, data_torch * self.embeddings_scale))
|
|
||||||
if self.output_is_wte:
|
|
||||||
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch * self.width_scale))
|
|
||||||
elif new_name == self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT):
|
|
||||||
assert not self.output_is_wte
|
|
||||||
tensors.append((new_name, data_torch * self.width_scale))
|
|
||||||
else:
|
|
||||||
tensors.append((new_name, data_torch))
|
|
||||||
|
|
||||||
return tensors
|
|
||||||
|
|
||||||
def write_tensors(self):
|
|
||||||
super().write_tensors()
|
|
||||||
self.gguf_writer.add_max_alibi_bias(self.max_alibi_bias)
|
|
||||||
|
|
||||||
|
|
||||||
@Model.register("ChatGLMModel", "ChatGLMForConditionalGeneration")
|
|
||||||
class ChatGLMModel(Model):
|
|
||||||
model_arch = gguf.MODEL_ARCH.CHATGLM
|
|
||||||
|
|
||||||
def set_vocab_chatglm3(self):
|
|
||||||
dir_model = self.dir_model
|
|
||||||
hparams = self.hparams
|
|
||||||
tokens: list[bytes] = []
|
|
||||||
toktypes: list[int] = []
|
|
||||||
scores: list[float] = []
|
|
||||||
|
|
||||||
from transformers import AutoTokenizer
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(dir_model, trust_remote_code=True)
|
|
||||||
vocab_size = hparams.get("padded_vocab_size", len(tokenizer.get_vocab()))
|
|
||||||
assert max(tokenizer.get_vocab().values()) < vocab_size
|
|
||||||
role_special_tokens = ["<|system|>", "<|user|>", "<|assistant|>", "<|observation|>"]
|
|
||||||
special_tokens = ["[MASK]", "[gMASK]", "[sMASK]", "sop", "eop"] + role_special_tokens
|
|
||||||
for token_id in range(vocab_size):
|
|
||||||
piece = tokenizer._convert_id_to_token(token_id)
|
|
||||||
if token_id == 0:
|
|
||||||
piece = "<unk>"
|
|
||||||
elif token_id == 1:
|
|
||||||
piece = "<bos>"
|
|
||||||
elif token_id == 2:
|
|
||||||
piece = "<eos>"
|
|
||||||
|
|
||||||
text = piece.encode("utf-8")
|
|
||||||
score = 0.0
|
|
||||||
# Referencing the tokenizer Python implementation(https://huggingface.co/THUDM/chatglm3-6b/blob/main/tokenization_chatglm.py),
|
|
||||||
# it is only valid if it is less than tokenizer.tokenizer.sp_model.vocab_size()
|
|
||||||
if len(piece) != 0 and token_id < tokenizer.tokenizer.sp_model.vocab_size():
|
|
||||||
score = tokenizer.tokenizer.sp_model.get_score(token_id)
|
|
||||||
|
|
||||||
if len(piece) == 0:
|
|
||||||
text = f"[PAD{token_id}]".encode("utf-8")
|
|
||||||
|
|
||||||
if token_id >= tokenizer.tokenizer.sp_model.vocab_size():
|
|
||||||
if piece in special_tokens:
|
|
||||||
# show special tokens in prompt
|
|
||||||
toktype = SentencePieceTokenTypes.USER_DEFINED
|
|
||||||
else:
|
|
||||||
toktype = SentencePieceTokenTypes.UNKNOWN
|
|
||||||
tokens.append(text)
|
|
||||||
scores.append(score)
|
|
||||||
toktypes.append(toktype)
|
|
||||||
continue
|
|
||||||
|
|
||||||
toktype = SentencePieceTokenTypes.NORMAL
|
|
||||||
if tokenizer.tokenizer.sp_model.is_unknown(token_id):
|
|
||||||
toktype = SentencePieceTokenTypes.UNKNOWN
|
|
||||||
elif tokenizer.tokenizer.sp_model.is_control(token_id):
|
|
||||||
toktype = SentencePieceTokenTypes.CONTROL
|
|
||||||
elif tokenizer.tokenizer.sp_model.is_unused(token_id):
|
|
||||||
toktype = SentencePieceTokenTypes.UNUSED
|
|
||||||
elif tokenizer.tokenizer.sp_model.is_byte(token_id):
|
|
||||||
toktype = SentencePieceTokenTypes.BYTE
|
|
||||||
|
|
||||||
tokens.append(text)
|
|
||||||
scores.append(score)
|
|
||||||
toktypes.append(toktype)
|
|
||||||
|
|
||||||
self.gguf_writer.add_tokenizer_model("llama")
|
|
||||||
# glm3 needs prefix and suffix formatted as:
|
|
||||||
# prompt = "[gMASK]sop<|user|>\n" + prompt + "<|assistant|>"
|
|
||||||
self.gguf_writer.add_tokenizer_pre("chatglm-spm")
|
|
||||||
self.gguf_writer.add_token_list(tokens)
|
|
||||||
self.gguf_writer.add_token_scores(scores)
|
|
||||||
self.gguf_writer.add_token_types(toktypes)
|
|
||||||
|
|
||||||
special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
|
|
||||||
special_vocab.add_to_gguf(self.gguf_writer)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def token_bytes_to_string(b):
|
|
||||||
from transformers.models.gpt2.tokenization_gpt2 import bytes_to_unicode
|
|
||||||
byte_encoder = bytes_to_unicode()
|
|
||||||
return ''.join([byte_encoder[ord(char)] for char in b.decode('latin-1')])
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def bpe(mergeable_ranks: dict[bytes, int], token: bytes, max_rank: int | None = None) -> list[bytes]:
|
|
||||||
parts = [bytes([b]) for b in token]
|
|
||||||
while True:
|
|
||||||
min_idx = None
|
|
||||||
min_rank = None
|
|
||||||
for i, pair in enumerate(zip(parts[:-1], parts[1:])):
|
|
||||||
rank = mergeable_ranks.get(pair[0] + pair[1])
|
|
||||||
if rank is not None and (min_rank is None or rank < min_rank):
|
|
||||||
min_idx = i
|
|
||||||
min_rank = rank
|
|
||||||
if min_rank is None or (max_rank is not None and min_rank >= max_rank):
|
|
||||||
break
|
|
||||||
assert min_idx is not None
|
|
||||||
parts = parts[:min_idx] + [parts[min_idx] + parts[min_idx + 1]] + parts[min_idx + 2:]
|
|
||||||
return parts
|
|
||||||
|
|
||||||
def set_vocab(self):
|
|
||||||
if "THUDM/chatglm3-6b" in self.hparams.get("_name_or_path", ""):
|
|
||||||
self.set_vocab_chatglm3()
|
|
||||||
return
|
|
||||||
|
|
||||||
dir_model = self.dir_model
|
|
||||||
hparams = self.hparams
|
|
||||||
tokens: list[str] = []
|
|
||||||
toktypes: list[int] = []
|
|
||||||
|
|
||||||
from transformers import AutoTokenizer
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(dir_model, trust_remote_code=True)
|
|
||||||
vocab_size = hparams["padded_vocab_size"]
|
|
||||||
assert max(tokenizer.get_vocab().values()) < vocab_size
|
|
||||||
|
|
||||||
tokpre = self.get_vocab_base_pre(tokenizer)
|
|
||||||
|
|
||||||
merges = []
|
|
||||||
vocab = {}
|
|
||||||
mergeable_ranks = tokenizer.mergeable_ranks
|
|
||||||
for token, rank in mergeable_ranks.items():
|
|
||||||
vocab[ChatGLMModel.token_bytes_to_string(token)] = rank
|
|
||||||
if len(token) == 1:
|
|
||||||
continue
|
|
||||||
merged = ChatGLMModel.bpe(mergeable_ranks, token, max_rank=rank)
|
|
||||||
assert len(merged) >= 2 and len(merged) <= 7
|
|
||||||
merges.append(' '.join(map(ChatGLMModel.token_bytes_to_string, merged)))
|
|
||||||
|
|
||||||
# for this kind of tokenizer, added_vocab is not a subset of vocab, so they need to be combined
|
|
||||||
added_vocab = tokenizer.get_added_vocab()
|
|
||||||
reverse_vocab = {id_ : encoded_tok for encoded_tok, id_ in {**vocab, **added_vocab}.items()}
|
|
||||||
|
|
||||||
for i in range(vocab_size):
|
|
||||||
if i not in reverse_vocab:
|
|
||||||
tokens.append(f"[PAD{i}]")
|
|
||||||
toktypes.append(gguf.TokenType.USER_DEFINED)
|
|
||||||
elif reverse_vocab[i] in added_vocab:
|
|
||||||
tokens.append(reverse_vocab[i])
|
|
||||||
if tokenizer.added_tokens_decoder[i].special:
|
|
||||||
toktypes.append(gguf.TokenType.CONTROL)
|
|
||||||
else:
|
|
||||||
toktypes.append(gguf.TokenType.USER_DEFINED)
|
|
||||||
else:
|
|
||||||
tokens.append(reverse_vocab[i])
|
|
||||||
toktypes.append(gguf.TokenType.NORMAL)
|
|
||||||
|
|
||||||
self.gguf_writer.add_tokenizer_model("gpt2")
|
|
||||||
self.gguf_writer.add_tokenizer_pre(tokpre)
|
|
||||||
self.gguf_writer.add_token_list(tokens)
|
|
||||||
self.gguf_writer.add_token_types(toktypes)
|
|
||||||
|
|
||||||
special_vocab = gguf.SpecialVocab(dir_model, load_merges=False)
|
|
||||||
special_vocab.merges = merges
|
|
||||||
# only add special tokens when they were not already loaded from config.json
|
|
||||||
special_vocab._set_special_token("eos", tokenizer.get_added_vocab()["<|endoftext|>"])
|
|
||||||
special_vocab._set_special_token("eot", tokenizer.get_added_vocab()["<|user|>"])
|
|
||||||
# this one is usually not in config.json anyway
|
|
||||||
special_vocab._set_special_token("unk", tokenizer.get_added_vocab()["<|endoftext|>"])
|
|
||||||
special_vocab.add_to_gguf(self.gguf_writer)
|
|
||||||
|
|
||||||
def set_gguf_parameters(self):
|
|
||||||
self.gguf_writer.add_name(self.hparams["_name_or_path"].split("/")[1]) # THUDM/glm4-9b-chat or THUDM/chatglm3-6b
|
|
||||||
n_embed = self.hparams.get("hidden_size", self.hparams.get("n_embed"))
|
|
||||||
n_head = self.hparams.get("n_head", self.hparams.get("num_attention_heads"))
|
|
||||||
n_head_kv = self.hparams.get("multi_query_group_num", n_head)
|
|
||||||
self.gguf_writer.add_context_length(self.hparams.get("seq_length", n_embed))
|
|
||||||
self.gguf_writer.add_embedding_length(n_embed)
|
|
||||||
self.gguf_writer.add_feed_forward_length(self.hparams.get("ffn_hidden_size", 4 * n_embed))
|
|
||||||
self.gguf_writer.add_block_count(self.hparams["num_layers"])
|
|
||||||
self.gguf_writer.add_head_count(n_head)
|
|
||||||
self.gguf_writer.add_head_count_kv(n_head_kv)
|
|
||||||
self.gguf_writer.add_layer_norm_rms_eps(self.hparams["layernorm_epsilon"])
|
|
||||||
self.gguf_writer.add_file_type(self.ftype)
|
|
||||||
self.gguf_writer.add_rope_dimension_count(64)
|
|
||||||
self.gguf_writer.add_add_bos_token(False)
|
|
||||||
rope_freq = 10000
|
|
||||||
if "rope_ratio" in self.hparams:
|
|
||||||
rope_freq = rope_freq * self.hparams["rope_ratio"]
|
|
||||||
self.gguf_writer.add_rope_freq_base(rope_freq)
|
|
||||||
|
|
||||||
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
|
||||||
del bid # unused
|
|
||||||
|
|
||||||
if name.endswith(".rotary_pos_emb.inv_freq"):
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
name = name.removeprefix("transformer.")
|
|
||||||
return [(self.map_tensor_name(name), data_torch)]
|
return [(self.map_tensor_name(name), data_torch)]
|
||||||
|
|
||||||
|
|
||||||
###### CONVERSION LOGIC ######
|
###### CONVERSION LOGIC ######
|
||||||
|
|
||||||
|
|
||||||
|
@ -3443,6 +2917,10 @@ def parse_args() -> argparse.Namespace:
|
||||||
"--vocab-only", action="store_true",
|
"--vocab-only", action="store_true",
|
||||||
help="extract only the vocab",
|
help="extract only the vocab",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--awq-path", type=Path, default=None,
|
||||||
|
help="Path to scale awq cache file",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--outfile", type=Path,
|
"--outfile", type=Path,
|
||||||
help="path to write to; default: based on input. {ftype} will be replaced by the outtype.",
|
help="path to write to; default: based on input. {ftype} will be replaced by the outtype.",
|
||||||
|
@ -3520,6 +2998,19 @@ def main() -> None:
|
||||||
|
|
||||||
dir_model = args.model
|
dir_model = args.model
|
||||||
|
|
||||||
|
if args.awq_path:
|
||||||
|
sys.path.insert(1, str(Path(__file__).parent / 'awq-py'))
|
||||||
|
from awq.apply_awq import add_scale_weights # type: ignore[import-not-found]
|
||||||
|
tmp_model_path = args.model / "weighted_model"
|
||||||
|
dir_model = tmp_model_path
|
||||||
|
if tmp_model_path.is_dir():
|
||||||
|
logger.info(f"{tmp_model_path} exists as a weighted model.")
|
||||||
|
else:
|
||||||
|
tmp_model_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
logger.info("Saving new weighted model ...")
|
||||||
|
add_scale_weights(str(args.model), str(args.awq_path), str(tmp_model_path))
|
||||||
|
logger.info(f"Saved weighted model at {tmp_model_path}.")
|
||||||
|
|
||||||
if not dir_model.is_dir():
|
if not dir_model.is_dir():
|
||||||
logger.error(f'Error: {args.model} is not a directory')
|
logger.error(f'Error: {args.model} is not a directory')
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
@ -3532,8 +3023,7 @@ def main() -> None:
|
||||||
"auto": gguf.LlamaFileType.GUESSED,
|
"auto": gguf.LlamaFileType.GUESSED,
|
||||||
}
|
}
|
||||||
|
|
||||||
is_split = args.split_max_tensors > 0 or args.split_max_size != "0"
|
if args.use_temp_file and (args.split_max_tensors > 0 or args.split_max_size != "0"):
|
||||||
if args.use_temp_file and is_split:
|
|
||||||
logger.error("Error: Cannot use temp file when splitting")
|
logger.error("Error: Cannot use temp file when splitting")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
@ -3570,12 +3060,11 @@ def main() -> None:
|
||||||
if args.vocab_only:
|
if args.vocab_only:
|
||||||
logger.info("Exporting model vocab...")
|
logger.info("Exporting model vocab...")
|
||||||
model_instance.write_vocab()
|
model_instance.write_vocab()
|
||||||
logger.info(f"Model vocab successfully exported to {model_instance.fname_out}")
|
logger.info("Model vocab successfully exported.")
|
||||||
else:
|
else:
|
||||||
logger.info("Exporting model...")
|
logger.info("Exporting model...")
|
||||||
model_instance.write()
|
model_instance.write()
|
||||||
out_path = f"{model_instance.fname_out.parent}{os.sep}" if is_split else model_instance.fname_out
|
logger.info("Model successfully exported.")
|
||||||
logger.info(f"Model successfully exported to {out_path}")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# This script downloads the tokenizer models of the specified models from Huggingface and
|
# This script downloads the tokenizer models of the specified models from Huggingface and
|
||||||
# generates the get_vocab_base_pre() function for convert_hf_to_gguf.py
|
# generates the get_vocab_base_pre() function for convert-hf-to-gguf.py
|
||||||
#
|
#
|
||||||
# This is necessary in order to analyze the type of pre-tokenizer used by the model and
|
# This is necessary in order to analyze the type of pre-tokenizer used by the model and
|
||||||
# provide the necessary information to llama.cpp via the GGUF header in order to implement
|
# provide the necessary information to llama.cpp via the GGUF header in order to implement
|
||||||
|
@ -15,9 +15,9 @@
|
||||||
# - Add a new model to the "models" list
|
# - Add a new model to the "models" list
|
||||||
# - Run the script with your huggingface token:
|
# - Run the script with your huggingface token:
|
||||||
#
|
#
|
||||||
# python3 convert_hf_to_gguf_update.py <huggingface_token>
|
# python3 convert-hf-to-gguf-update.py <huggingface_token>
|
||||||
#
|
#
|
||||||
# - Copy-paste the generated get_vocab_base_pre() function into convert_hf_to_gguf.py
|
# - Copy-paste the generated get_vocab_base_pre() function into convert-hf-to-gguf.py
|
||||||
# - Update llama.cpp with the new pre-tokenizer if necessary
|
# - Update llama.cpp with the new pre-tokenizer if necessary
|
||||||
#
|
#
|
||||||
# TODO: generate tokenizer tests for llama.cpp
|
# TODO: generate tokenizer tests for llama.cpp
|
||||||
|
@ -27,6 +27,7 @@ import logging
|
||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
import re
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
import sys
|
import sys
|
||||||
|
@ -37,15 +38,17 @@ from enum import IntEnum, auto
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
|
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
logger = logging.getLogger("convert_hf_to_gguf_update")
|
logger = logging.getLogger("convert-hf-to-gguf-update")
|
||||||
sess = requests.Session()
|
sess = requests.Session()
|
||||||
|
|
||||||
|
# User input for new model
|
||||||
|
new_name = input("Enter the name of the new model: ")
|
||||||
|
new_url = input("Enter the URL of the new model: ")
|
||||||
|
|
||||||
class TOKENIZER_TYPE(IntEnum):
|
class TOKENIZER_TYPE(IntEnum):
|
||||||
SPM = auto()
|
SPM = auto()
|
||||||
BPE = auto()
|
BPE = auto()
|
||||||
WPM = auto()
|
WPM = auto()
|
||||||
UGM = auto()
|
|
||||||
|
|
||||||
|
|
||||||
# TODO: this string has to exercise as much pre-tokenizer functionality as possible
|
# TODO: this string has to exercise as much pre-tokenizer functionality as possible
|
||||||
|
@ -56,42 +59,21 @@ if len(sys.argv) == 2:
|
||||||
token = sys.argv[1]
|
token = sys.argv[1]
|
||||||
if not token.startswith("hf_"):
|
if not token.startswith("hf_"):
|
||||||
logger.info("Huggingface token seems invalid")
|
logger.info("Huggingface token seems invalid")
|
||||||
logger.info("Usage: python convert_hf_to_gguf_update.py <huggingface_token>")
|
logger.info("Usage: python convert-hf-to-gguf-update.py <huggingface_token>")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
else:
|
else:
|
||||||
logger.info("Usage: python convert_hf_to_gguf_update.py <huggingface_token>")
|
logger.info("Usage: python convert-hf-to-gguf-update.py <huggingface_token>")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
# TODO: add models here, base models preferred
|
# TODO: add models here, base models preferred
|
||||||
models = [
|
models = []
|
||||||
{"name": "llama-spm", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/meta-llama/Llama-2-7b-hf", },
|
|
||||||
{"name": "llama-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Meta-Llama-3-8B", },
|
# Construct new entry and add to models list
|
||||||
{"name": "phi-3", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct", },
|
new_entry = {"name": new_name, "tokt": TOKENIZER_TYPE.BPE, "repo": new_url}
|
||||||
{"name": "deepseek-llm", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-llm-7b-base", },
|
models.append(new_entry)
|
||||||
{"name": "deepseek-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base", },
|
print('Model added...')
|
||||||
{"name": "falcon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/falcon-7b", },
|
print(models)
|
||||||
{"name": "bert-bge", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/BAAI/bge-small-en-v1.5", },
|
time.sleep(15)
|
||||||
{"name": "mpt", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mosaicml/mpt-7b", },
|
|
||||||
{"name": "starcoder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigcode/starcoder2-3b", },
|
|
||||||
{"name": "gpt-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/openai-community/gpt2", },
|
|
||||||
{"name": "stablelm2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/stabilityai/stablelm-2-zephyr-1_6b", },
|
|
||||||
{"name": "refact", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/smallcloudai/Refact-1_6-base", },
|
|
||||||
{"name": "command-r", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/CohereForAI/c4ai-command-r-v01", },
|
|
||||||
{"name": "qwen2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen1.5-7B", },
|
|
||||||
{"name": "olmo", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/allenai/OLMo-1.7-7B-hf", },
|
|
||||||
{"name": "dbrx", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/databricks/dbrx-base", },
|
|
||||||
{"name": "jina-v2-en", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-en", }, # WPM!
|
|
||||||
{"name": "jina-v2-es", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-es", },
|
|
||||||
{"name": "jina-v2-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-de", },
|
|
||||||
{"name": "smaug-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct", },
|
|
||||||
{"name": "poro-chat", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Poro-34B-chat", },
|
|
||||||
{"name": "jina-v2-code", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-code", },
|
|
||||||
{"name": "viking", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Viking-7B", }, # Also used for Viking 13B and 33B
|
|
||||||
{"name": "gemma", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2b", },
|
|
||||||
{"name": "gemma-2", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2-9b", },
|
|
||||||
{"name": "jais", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/core42/jais-13b", },
|
|
||||||
{"name": "t5", "tokt": TOKENIZER_TYPE.UGM, "repo": "https://huggingface.co/google-t5/t5-small", },
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def download_file_with_auth(url, token, save_path):
|
def download_file_with_auth(url, token, save_path):
|
||||||
|
@ -112,13 +94,9 @@ def download_model(model):
|
||||||
os.makedirs(f"models/tokenizers/{name}", exist_ok=True)
|
os.makedirs(f"models/tokenizers/{name}", exist_ok=True)
|
||||||
|
|
||||||
files = ["config.json", "tokenizer.json", "tokenizer_config.json"]
|
files = ["config.json", "tokenizer.json", "tokenizer_config.json"]
|
||||||
|
|
||||||
if tokt == TOKENIZER_TYPE.SPM:
|
if tokt == TOKENIZER_TYPE.SPM:
|
||||||
files.append("tokenizer.model")
|
files.append("tokenizer.model")
|
||||||
|
|
||||||
if tokt == TOKENIZER_TYPE.UGM:
|
|
||||||
files.append("spiece.model")
|
|
||||||
|
|
||||||
for file in files:
|
for file in files:
|
||||||
save_path = f"models/tokenizers/{name}/{file}"
|
save_path = f"models/tokenizers/{name}/{file}"
|
||||||
if os.path.isfile(save_path):
|
if os.path.isfile(save_path):
|
||||||
|
@ -134,14 +112,14 @@ for model in models:
|
||||||
logger.error(f"Failed to download model {model['name']}. Error: {e}")
|
logger.error(f"Failed to download model {model['name']}. Error: {e}")
|
||||||
|
|
||||||
|
|
||||||
# generate the source code for the convert_hf_to_gguf.py:get_vocab_base_pre() function:
|
# generate the source code for the convert-hf-to-gguf.py:get_vocab_base_pre() function:
|
||||||
|
|
||||||
src_ifs = ""
|
src_ifs = ""
|
||||||
for model in models:
|
for model in models:
|
||||||
name = model["name"]
|
name = model["name"]
|
||||||
tokt = model["tokt"]
|
tokt = model["tokt"]
|
||||||
|
|
||||||
if tokt == TOKENIZER_TYPE.SPM or tokt == TOKENIZER_TYPE.UGM:
|
if tokt == TOKENIZER_TYPE.SPM:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Skip if the tokenizer folder does not exist or there are other download issues previously
|
# Skip if the tokenizer folder does not exist or there are other download issues previously
|
||||||
|
@ -151,10 +129,7 @@ for model in models:
|
||||||
|
|
||||||
# create the tokenizer
|
# create the tokenizer
|
||||||
try:
|
try:
|
||||||
if name == "t5":
|
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
|
||||||
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}", use_fast=False)
|
|
||||||
else:
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
|
|
||||||
except OSError as e:
|
except OSError as e:
|
||||||
logger.error(f"Error loading tokenizer for model {name}. The model may not exist or is not accessible with the provided token. Error: {e}")
|
logger.error(f"Error loading tokenizer for model {name}. The model may not exist or is not accessible with the provided token. Error: {e}")
|
||||||
continue # Skip to the next model if the tokenizer can't be loaded
|
continue # Skip to the next model if the tokenizer can't be loaded
|
||||||
|
@ -201,7 +176,7 @@ src_func = f"""
|
||||||
|
|
||||||
res = None
|
res = None
|
||||||
|
|
||||||
# NOTE: if you get an error here, you need to update the convert_hf_to_gguf_update.py script
|
# NOTE: if you get an error here, you need to update the convert-hf-to-gguf-update.py script
|
||||||
# or pull the latest version of the model from Huggingface
|
# or pull the latest version of the model from Huggingface
|
||||||
# don't edit the hashes manually!
|
# don't edit the hashes manually!
|
||||||
{src_ifs}
|
{src_ifs}
|
||||||
|
@ -210,9 +185,9 @@ src_func = f"""
|
||||||
logger.warning("**************************************************************************************")
|
logger.warning("**************************************************************************************")
|
||||||
logger.warning("** WARNING: The BPE pre-tokenizer was not recognized!")
|
logger.warning("** WARNING: The BPE pre-tokenizer was not recognized!")
|
||||||
logger.warning("** There are 2 possible reasons for this:")
|
logger.warning("** There are 2 possible reasons for this:")
|
||||||
logger.warning("** - the model has not been added to convert_hf_to_gguf_update.py yet")
|
logger.warning("** - the model has not been added to convert-hf-to-gguf-update.py yet")
|
||||||
logger.warning("** - the pre-tokenization config has changed upstream")
|
logger.warning("** - the pre-tokenization config has changed upstream")
|
||||||
logger.warning("** Check your model files and convert_hf_to_gguf_update.py and update them accordingly.")
|
logger.warning("** Check your model files and convert-hf-to-gguf-update.py and update them accordingly.")
|
||||||
logger.warning("** ref: https://github.com/ggerganov/llama.cpp/pull/6920")
|
logger.warning("** ref: https://github.com/ggerganov/llama.cpp/pull/6920")
|
||||||
logger.warning("**")
|
logger.warning("**")
|
||||||
logger.warning(f"** chkhsh: {{chkhsh}}")
|
logger.warning(f"** chkhsh: {{chkhsh}}")
|
||||||
|
@ -226,7 +201,7 @@ src_func = f"""
|
||||||
return res
|
return res
|
||||||
"""
|
"""
|
||||||
|
|
||||||
convert_py_pth = pathlib.Path("convert_hf_to_gguf.py")
|
convert_py_pth = pathlib.Path("convert-hf-to-gguf.py")
|
||||||
convert_py = convert_py_pth.read_text(encoding="utf-8")
|
convert_py = convert_py_pth.read_text(encoding="utf-8")
|
||||||
convert_py = re.sub(
|
convert_py = re.sub(
|
||||||
r"(# Marker: Start get_vocab_base_pre)(.+?)( +# Marker: End get_vocab_base_pre)",
|
r"(# Marker: Start get_vocab_base_pre)(.+?)( +# Marker: End get_vocab_base_pre)",
|
||||||
|
@ -237,7 +212,7 @@ convert_py = re.sub(
|
||||||
|
|
||||||
convert_py_pth.write_text(convert_py, encoding="utf-8")
|
convert_py_pth.write_text(convert_py, encoding="utf-8")
|
||||||
|
|
||||||
logger.info("+++ convert_hf_to_gguf.py was updated")
|
logger.info("+++ convert-hf-to-gguf.py was updated")
|
||||||
|
|
||||||
# generate tests for each tokenizer model
|
# generate tests for each tokenizer model
|
||||||
|
|
||||||
|
@ -275,7 +250,6 @@ tests = [
|
||||||
"\n =",
|
"\n =",
|
||||||
"' era",
|
"' era",
|
||||||
"Hello, y'all! How are you 😁 ?我想在apple工作1314151天~",
|
"Hello, y'all! How are you 😁 ?我想在apple工作1314151天~",
|
||||||
"!!!!!!",
|
|
||||||
"3",
|
"3",
|
||||||
"33",
|
"33",
|
||||||
"333",
|
"333",
|
||||||
|
@ -285,8 +259,7 @@ tests = [
|
||||||
"3333333",
|
"3333333",
|
||||||
"33333333",
|
"33333333",
|
||||||
"333333333",
|
"333333333",
|
||||||
"Cửa Việt", # llama-bpe fails on this
|
# "Cửa Việt", # llama-bpe fails on this
|
||||||
" discards",
|
|
||||||
chktxt,
|
chktxt,
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -314,10 +287,7 @@ for model in models:
|
||||||
|
|
||||||
# create the tokenizer
|
# create the tokenizer
|
||||||
try:
|
try:
|
||||||
if name == "t5":
|
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}",)
|
||||||
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}", use_fast=False)
|
|
||||||
else:
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
|
|
||||||
except OSError as e:
|
except OSError as e:
|
||||||
logger.error(f"Failed to load tokenizer for model {name}. Error: {e}")
|
logger.error(f"Failed to load tokenizer for model {name}. Error: {e}")
|
||||||
continue # Skip this model and continue with the next one in the loop
|
continue # Skip this model and continue with the next one in the loop
|
||||||
|
@ -343,6 +313,6 @@ logger.info("\nRun the following commands to generate the vocab files for testin
|
||||||
for model in models:
|
for model in models:
|
||||||
name = model["name"]
|
name = model["name"]
|
||||||
|
|
||||||
print(f"python3 convert_hf_to_gguf.py models/tokenizers/{name}/ --outfile models/ggml-vocab-{name}.gguf --vocab-only") # noqa: NP100
|
print(f"python3 convert-hf-to-gguf.py models/tokenizers/{name}/ --outfile models/ggml-vocab-{name}.gguf --vocab-only") # noqa: NP100
|
||||||
|
|
||||||
logger.info("\n")
|
logger.info("\n")
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue