Update test-tokenizer-random.py

Added blank lines for Lint test;
Added sequence import from typing
Removed 'free' call from Object
This commit is contained in:
Robert 2024-11-13 07:49:38 -08:00 committed by GitHub
parent 5edd022d6a
commit 235a268f96
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -18,7 +18,7 @@ import subprocess
import random import random
import unicodedata import unicodedata
from pathlib import Path from pathlib import Path
from typing import Any, Iterator, cast from typing import Any, Iterator, cast, Sequence
from typing_extensions import Buffer from typing_extensions import Buffer
# #
# External Imports # External Imports
@ -31,18 +31,13 @@ logger = logging.getLogger("test-tokenizer-random")
if shutil.which("gcc") is None: if shutil.which("gcc") is None:
raise EnvironmentError("GCC is not available on this system. Please install GCC or use preprocessed headers.") raise EnvironmentError("GCC is not available on this system. Please install GCC or use preprocessed headers.")
class LibLlama: class LibLlama:
DEFAULT_PATH_LLAMA_H = "./include/llama.h" DEFAULT_PATH_LLAMA_H = "./include/llama.h"
DEFAULT_PATH_INCLUDES = ["./ggml/include/", "./include/"] DEFAULT_PATH_INCLUDES = ["./ggml/include/", "./include/"]
DEFAULT_PATH_LIBLLAMA = "./build/src/libllama.so" # CMakeLists.txt: BUILD_SHARED_LIBS ON DEFAULT_PATH_LIBLLAMA = "./build/src/libllama.so" # CMakeLists.txt: BUILD_SHARED_LIBS ON
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
self.free()
def __init__(self, path_llama_h: str | None = None, path_includes: list[str] = [], path_libllama: str | None = None): def __init__(self, path_llama_h: str | None = None, path_includes: list[str] = [], path_libllama: str | None = None):
path_llama_h = path_llama_h or self.DEFAULT_PATH_LLAMA_H path_llama_h = path_llama_h or self.DEFAULT_PATH_LLAMA_H
path_includes = path_includes or self.DEFAULT_PATH_INCLUDES path_includes = path_includes or self.DEFAULT_PATH_INCLUDES
@ -495,6 +490,7 @@ def compare_tokenizers(tokenizer1: TokenizerGroundtruth, tokenizer2: TokenizerLl
except Exception as e: except Exception as e:
logger.exception(f"An error occurred during tokenizer comparison: {e}") logger.exception(f"An error occurred during tokenizer comparison: {e}")
def main(argv: list[str] | None = None): def main(argv: list[str] | None = None):
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("vocab_file", type=str, help="path to vocab 'gguf' file") parser.add_argument("vocab_file", type=str, help="path to vocab 'gguf' file")