Update test-tokenizer-random.py
Added blank lines for Lint test; Added sequence import from typing Removed 'free' call from Object
This commit is contained in:
parent
5edd022d6a
commit
235a268f96
1 changed files with 3 additions and 7 deletions
|
@ -18,7 +18,7 @@ import subprocess
|
|||
import random
|
||||
import unicodedata
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterator, cast
|
||||
from typing import Any, Iterator, cast, Sequence
|
||||
from typing_extensions import Buffer
|
||||
#
|
||||
# External Imports
|
||||
|
@ -31,18 +31,13 @@ logger = logging.getLogger("test-tokenizer-random")
|
|||
if shutil.which("gcc") is None:
|
||||
raise EnvironmentError("GCC is not available on this system. Please install GCC or use preprocessed headers.")
|
||||
|
||||
|
||||
class LibLlama:
|
||||
|
||||
DEFAULT_PATH_LLAMA_H = "./include/llama.h"
|
||||
DEFAULT_PATH_INCLUDES = ["./ggml/include/", "./include/"]
|
||||
DEFAULT_PATH_LIBLLAMA = "./build/src/libllama.so" # CMakeLists.txt: BUILD_SHARED_LIBS ON
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
self.free()
|
||||
|
||||
def __init__(self, path_llama_h: str | None = None, path_includes: list[str] = [], path_libllama: str | None = None):
|
||||
path_llama_h = path_llama_h or self.DEFAULT_PATH_LLAMA_H
|
||||
path_includes = path_includes or self.DEFAULT_PATH_INCLUDES
|
||||
|
@ -495,6 +490,7 @@ def compare_tokenizers(tokenizer1: TokenizerGroundtruth, tokenizer2: TokenizerLl
|
|||
except Exception as e:
|
||||
logger.exception(f"An error occurred during tokenizer comparison: {e}")
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None):
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("vocab_file", type=str, help="path to vocab 'gguf' file")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue