convert : fix python 3.8 support, modernize type annotations (#2916)

* convert : fix python 3.8 support

* convert : sort imports

* convert : fix required parameters in convert-llama-ggmlv3-to-gguf

* convert : fix mypy errors in convert-llama-ggmlv3-to-gguf

* convert : use PEP 585 generics and PEP 604 unions

Now that we have `from __future__ import annotations`, we can use this
modern syntax in Python 3.7 instead of restricting support to Python 3.9
or 3.10 respectively.

* gguf.py : a tuple is already a tuple

* add mypy.ini

* convert : add necessary `type: ignore` comments

* gguf-py: bump version
This commit is contained in:
Cebtenzzre 2023-08-31 01:02:23 -04:00 committed by GitHub
parent 8afe228000
commit 92d0b751a7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 193 additions and 168 deletions

View file

@ -3,22 +3,25 @@
# Only models with a single datafile are supported, like 7B
# HF files required in the model dir: config.json tokenizer_config.json tokenizer.json tokenizer.model
import gguf
import os
import sys
import struct
from __future__ import annotations
import argparse
import json
import os
import struct
import sys
from pathlib import Path
from typing import TYPE_CHECKING, Any
import gguf
import numpy as np
import torch
import argparse
from sentencepiece import SentencePieceProcessor # type: ignore[import]
from typing import Any, List, TypeAlias
from pathlib import Path
from sentencepiece import SentencePieceProcessor
if TYPE_CHECKING:
from typing import TypeAlias
#NDArray = np.ndarray[Any, Any]
# compatible with python < 3.9
NDArray: 'TypeAlias' = 'np.ndarray[Any, Any]'
NDArray: TypeAlias = 'np.ndarray[Any, Any]'
def count_model_parts(dir_model: Path) -> int:
@ -129,9 +132,9 @@ if "rope_scaling" in hparams and hparams["rope_scaling"] != None and "factor" in
print("gguf: get tokenizer metadata")
tokens: List[bytes] = []
scores: List[float] = []
toktypes: List[int] = []
tokens: list[bytes] = []
scores: list[float] = []
toktypes: list[int] = []
tokenizer_model_file = dir_model / 'tokenizer.model'
if not tokenizer_model_file.is_file():