BERT tokenizer fixes (#6498)
Key changes: * BERT conversion: fix abuse of LlamaHfVocab, do not set BOS or EOS * Nomic Embed conversion: pad vocab instead of slicing embedding tensor * llama_tokenize: handle added special tokens like HF does
This commit is contained in:
parent
c4a3a4ff47
commit
1b67731e18
20 changed files with 221 additions and 194 deletions
21
convert.py
21
convert.py
|
@ -33,7 +33,7 @@ if 'NO_LOCAL_GGUF' not in os.environ:
|
|||
import gguf
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing import TypeAlias
|
||||
from typing_extensions import Self, TypeAlias
|
||||
|
||||
if hasattr(faulthandler, 'register') and hasattr(signal, 'SIGUSR1'):
|
||||
faulthandler.register(signal.SIGUSR1)
|
||||
|
@ -517,7 +517,7 @@ class LlamaHfVocab(Vocab):
|
|||
tokenizer_model = "llama"
|
||||
name = "hfft"
|
||||
|
||||
def __init__(self, base_path: Path, ignore_nonllama: bool = False):
|
||||
def __init__(self, base_path: Path):
|
||||
fname_tokenizer = base_path / FAST_TOKENIZER_FILE
|
||||
# if this fails, FileNotFoundError propagates to caller
|
||||
with open(fname_tokenizer, encoding='utf-8') as f:
|
||||
|
@ -525,9 +525,7 @@ class LlamaHfVocab(Vocab):
|
|||
|
||||
# pre-check so we know if we need transformers
|
||||
tokenizer_model: dict[str, Any] = tokenizer_json['model']
|
||||
if ignore_nonllama:
|
||||
pass # workaround incorrect use of this class for WordPiece
|
||||
elif (
|
||||
if (
|
||||
tokenizer_model['type'] != 'BPE' or not tokenizer_model.get('byte_fallback', False)
|
||||
or tokenizer_json['decoder']['type'] != 'Sequence'
|
||||
):
|
||||
|
@ -647,16 +645,17 @@ def permute(weights: NDArray, n_head: int, n_head_kv: int) -> NDArray:
|
|||
|
||||
|
||||
class Tensor(ABC):
|
||||
ndarray: NDArray
|
||||
data_type: DataType
|
||||
|
||||
@abstractmethod
|
||||
def astype(self, data_type: DataType) -> Tensor: ...
|
||||
def astype(self, data_type: DataType) -> Self: ...
|
||||
@abstractmethod
|
||||
def permute(self, n_head: int, n_head_kv: int) -> Tensor: ...
|
||||
def permute(self, n_head: int, n_head_kv: int) -> Self: ...
|
||||
@abstractmethod
|
||||
def permute_part(self, n_part: int, n_head: int, n_head_kv: int) -> UnquantizedTensor: ...
|
||||
def permute_part(self, n_part: int, n_head: int, n_head_kv: int) -> Self: ...
|
||||
@abstractmethod
|
||||
def part(self, n_part: int) -> UnquantizedTensor: ...
|
||||
def part(self, n_part: int) -> Self: ...
|
||||
@abstractmethod
|
||||
def to_ggml(self) -> GGMLCompatibleTensor: ...
|
||||
|
||||
|
@ -673,13 +672,13 @@ class UnquantizedTensor(Tensor):
|
|||
self.ndarray = ndarray
|
||||
self.data_type = NUMPY_TYPE_TO_DATA_TYPE[ndarray.dtype]
|
||||
|
||||
def astype(self, data_type: DataType) -> Tensor:
|
||||
def astype(self, data_type: DataType) -> UnquantizedTensor:
|
||||
dtype = data_type.dtype
|
||||
if self.data_type == DT_BF16:
|
||||
self.ndarray = bf16_to_fp32(self.ndarray)
|
||||
return UnquantizedTensor(self.ndarray.astype(dtype))
|
||||
|
||||
def to_ggml(self) -> UnquantizedTensor:
|
||||
def to_ggml(self) -> Self:
|
||||
return self
|
||||
|
||||
def permute_part(self, n_part: int, n_head: int, n_head_kv: int) -> UnquantizedTensor:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue