diff --git a/tests/test-tokenizer-random.py b/tests/test-tokenizer-random.py index 6bc782b96..2bc14e23f 100644 --- a/tests/test-tokenizer-random.py +++ b/tests/test-tokenizer-random.py @@ -18,7 +18,7 @@ import subprocess import random import unicodedata from pathlib import Path -from typing import Any, Iterator, cast +from typing import Any, Iterator, cast, Sequence from typing_extensions import Buffer # # External Imports @@ -31,18 +31,13 @@ logger = logging.getLogger("test-tokenizer-random") if shutil.which("gcc") is None: raise EnvironmentError("GCC is not available on this system. Please install GCC or use preprocessed headers.") + class LibLlama: DEFAULT_PATH_LLAMA_H = "./include/llama.h" DEFAULT_PATH_INCLUDES = ["./ggml/include/", "./include/"] DEFAULT_PATH_LIBLLAMA = "./build/src/libllama.so" # CMakeLists.txt: BUILD_SHARED_LIBS ON - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_value, traceback): - self.free() - def __init__(self, path_llama_h: str | None = None, path_includes: list[str] = [], path_libllama: str | None = None): path_llama_h = path_llama_h or self.DEFAULT_PATH_LLAMA_H path_includes = path_includes or self.DEFAULT_PATH_INCLUDES @@ -495,6 +490,7 @@ def compare_tokenizers(tokenizer1: TokenizerGroundtruth, tokenizer2: TokenizerLl except Exception as e: logger.exception(f"An error occurred during tokenizer comparison: {e}") + def main(argv: list[str] | None = None): parser = argparse.ArgumentParser() parser.add_argument("vocab_file", type=str, help="path to vocab 'gguf' file")