diff --git a/convert-gptneox-hf-to-gguf.py b/convert-gptneox-hf-to-gguf.py index b81e0468a..f8141560a 100755 --- a/convert-gptneox-hf-to-gguf.py +++ b/convert-gptneox-hf-to-gguf.py @@ -34,8 +34,7 @@ def bytes_to_unicode(): bs.append(b) cs.append(2**8+n) n += 1 - cs = [chr(n) for n in cs] - return dict(zip(bs, cs)) + return dict(zip(bs, (chr(n) for n in cs))) def count_model_parts(dir_model: str) -> int: diff --git a/convert-llama-ggmlv3-to-gguf.py b/convert-llama-ggmlv3-to-gguf.py index 61e439d51..c8e7f1761 100755 --- a/convert-llama-ggmlv3-to-gguf.py +++ b/convert-llama-ggmlv3-to-gguf.py @@ -75,7 +75,7 @@ class Tensor: self.dims = () self.dtype = None self.start_offset = 0 - self.len_bytes = 0 + self.len_bytes = np.int64(0) def load(self, data, offset): orig_offset = offset diff --git a/convert-lora-to-ggml.py b/convert-lora-to-ggml.py index 1112532ba..a00339b47 100755 --- a/convert-lora-to-ggml.py +++ b/convert-lora-to-ggml.py @@ -60,7 +60,7 @@ def write_file_header(fout: BinaryIO, params: Dict[str, Any]) -> None: def write_tensor_header( - self, name: str, shape: Sequence[int], data_type: np.dtype + self, name: str, shape: Sequence[int], data_type: np.dtype[Any] ) -> None: sname = name.encode("utf-8") fout.write( diff --git a/convert.py b/convert.py index dd5049137..9fd8f43a4 100755 --- a/convert.py +++ b/convert.py @@ -25,7 +25,7 @@ import numpy as np from abc import ABCMeta, abstractmethod from dataclasses import dataclass from pathlib import Path -from typing import (IO, TYPE_CHECKING, Any, Callable, Dict, Generator, Iterable, List, Literal, Optional, Sequence, Set, Tuple, TypeVar, Union) +from typing import (IO, TYPE_CHECKING, Any, Callable, Dict, Generator, Iterable, List, Literal, Optional, Sequence, Set, Tuple, Type, TypeVar, Union) from sentencepiece import SentencePieceProcessor # type: ignore if TYPE_CHECKING: @@ -447,7 +447,7 @@ class Tensor(metaclass=ABCMeta): def to_ggml(self) -> 'GGMLCompatibleTensor': ... -def bf16_to_fp32(bf16_arr: np.ndarray) -> NDArray: +def bf16_to_fp32(bf16_arr: np.ndarray[Any, np.dtype[np.uint16]]) -> NDArray: assert bf16_arr.dtype == np.uint16, f"Input array should be of dtype uint16, but got {bf16_arr.dtype}" fp32_arr = bf16_arr.astype(np.uint32) << 16 return fp32_arr.view(np.float32) @@ -658,7 +658,7 @@ class LazyUnpickler(pickle.Unpickler): description = f'storage data_type={data_type} path-in-zip={filename} path={self.zip_file.filename}' return LazyStorage(load=load, kind=pid[1], description=description) - # @staticmethod + @staticmethod def lazy_rebuild_tensor_v2(storage: Any, storage_offset: Any, size: Any, stride: Any, # pyright: ignore[reportSelfClsParameterName] requires_grad: Any, backward_hooks: Any, metadata: Any = None) -> LazyTensor: @@ -670,13 +670,15 @@ class LazyUnpickler(pickle.Unpickler): description = f'pickled storage_offset={storage_offset} in {storage.description}' return LazyTensor(load, list(size), storage.kind.data_type, description) - # @staticmethod + @staticmethod def rebuild_from_type_v2(func, new_type, args, state): return func(*args) - CLASSES: Dict[Any, Any] = { - ('torch._tensor', '_rebuild_from_type_v2'): rebuild_from_type_v2, - ('torch._utils', '_rebuild_tensor_v2'): lazy_rebuild_tensor_v2, + CLASSES: Dict[Tuple[str, str], Any] = { + # getattr used here as a workaround for mypy not being smart enough to detrmine + # the staticmethods have a __func__ attribute. + ('torch._tensor', '_rebuild_from_type_v2'): getattr(rebuild_from_type_v2, '__func__'), + ('torch._utils', '_rebuild_tensor_v2'): getattr(lazy_rebuild_tensor_v2, '__func__'), ('torch', 'BFloat16Storage'): LazyStorageKind(DT_BF16), ('torch', 'HalfStorage'): LazyStorageKind(DT_F16), ('torch', 'FloatStorage'): LazyStorageKind(DT_F32), @@ -752,7 +754,7 @@ def lazy_load_file(path: Path) -> ModelPlus: In = TypeVar('In') Out = TypeVar('Out') -def bounded_parallel_map(func: Callable[[In], Out], iterable: Iterable[In], concurrency: int, max_workers: Optional[int] = None, factory: Callable = ThreadPoolExecutor) -> Iterable[Out]: +def bounded_parallel_map(func: Callable[[In], Out], iterable: Iterable[In], concurrency: int, max_workers: Optional[int] = None, use_processpool_executor: bool = False) -> Iterable[Out]: '''Parallel map, but with backpressure. If the caller doesn't call `next` fast enough, this will stop calling `func` at some point rather than letting results pile up in memory. Specifically, there is a max of one @@ -761,7 +763,12 @@ def bounded_parallel_map(func: Callable[[In], Out], iterable: Iterable[In], conc yield from map(func, iterable) # Not reached. iterable = iter(iterable) - with factory(max_workers = max_workers) as executor: + executor_class: Union[Type[ThreadPoolExecutor], Type[ProcessPoolExecutor]] + if use_processpool_executor: + executor_class = ProcessPoolExecutor + else: + executor_class = ThreadPoolExecutor + with executor_class(max_workers = max_workers) as executor: futures: List[concurrent.futures.Future[Out]] = [] done = False for _ in range(concurrency): @@ -913,7 +920,7 @@ class OutputFile: # tensor data ndarrays_inner = bounded_parallel_map(OutputFile.do_item, model.items(), concurrency = concurrency) if ftype == GGMLFileType.MostlyQ8_0: - ndarrays = bounded_parallel_map(OutputFile.maybe_do_quantize, ndarrays_inner, concurrency = concurrency, max_workers = concurrency, factory = ProcessPoolExecutor) + ndarrays = bounded_parallel_map(OutputFile.maybe_do_quantize, ndarrays_inner, concurrency = concurrency, max_workers = concurrency, use_processpool_executor = True) else: ndarrays = map(OutputFile.maybe_do_quantize, ndarrays_inner) diff --git a/gguf-py/gguf/gguf.py b/gguf-py/gguf/gguf.py index 47bdb303a..1f8f7098f 100644 --- a/gguf-py/gguf/gguf.py +++ b/gguf-py/gguf/gguf.py @@ -8,7 +8,6 @@ import json import os from pathlib import Path -import collections.abc as collections_abc from enum import IntEnum, auto from typing import Any, BinaryIO, Callable, IO, Dict, List, Optional, Sequence, Tuple, Union @@ -512,7 +511,7 @@ class GGUFWriter: self.add_val(val, GGUFValueType.STRING) def add_array(self, key: str, val: Sequence[Any]): - if not isinstance(val, collections_abc.Sequence): + if not isinstance(val, Sequence): raise ValueError("Value must be a sequence for array type") self.add_key(key) @@ -546,15 +545,16 @@ class GGUFWriter: encoded_val = val.encode("utf8") if isinstance(val, str) else val self.kv_data += struct.pack(" 0: + ltype = GGUFValueType.get_type(val[0]) + if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]): + raise ValueError("All items in a GGUF array should be of the same type") + self.kv_data += struct.pack(" int: @@ -614,7 +614,7 @@ class GGUFWriter: self.write_padding(self.fout, self.fout.tell()) - if not self.use_temp_file: + if self.temp_file is None: for (currtensor, currpad) in self.tensors: currtensor.tofile(self.fout) if currpad != 0: @@ -808,14 +808,14 @@ class SpecialVocab: def add_to_gguf(self, gw: GGUFWriter): # FIXME: Don't always include merges (possibly also don't even load them). if len(self.merges) > 0: - print(f'SpecialVocab: Adding {len(self.merges)} merge(s).') + print(f'gguf: Adding {len(self.merges)} merge(s).') gw.add_token_merges(self.merges) for typ, tokid in self.special_token_ids.items(): handler: Optional[Callable[[int], None]] = getattr(gw, f'add_{typ}_token_id', None) if handler is None: - print(f'SpecialVocab: WARNING: No handler for special token type {typ} with id {tokid} - skipping') + print(f'gguf: WARNING: No handler for special token type {typ} with id {tokid} - skipping') continue - print(f'SpecialVocab: Setting special token type {typ} to {tokid}') + print(f'gguf: Setting special token type {typ} to {tokid}') handler(tokid) def __repr__(self):