Update test-tokenizer-random.py

Added blank lines for Lint test;
Added sequence import from typing
Removed 'free' call from Object
This commit is contained in:
Robert 2024-11-13 07:49:38 -08:00 committed by GitHub
parent 5edd022d6a
commit 235a268f96
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -18,7 +18,7 @@ import subprocess
import random
import unicodedata
from pathlib import Path
from typing import Any, Iterator, cast
from typing import Any, Iterator, cast, Sequence
from typing_extensions import Buffer
#
# External Imports
@ -31,18 +31,13 @@ logger = logging.getLogger("test-tokenizer-random")
if shutil.which("gcc") is None:
raise EnvironmentError("GCC is not available on this system. Please install GCC or use preprocessed headers.")
class LibLlama:
DEFAULT_PATH_LLAMA_H = "./include/llama.h"
DEFAULT_PATH_INCLUDES = ["./ggml/include/", "./include/"]
DEFAULT_PATH_LIBLLAMA = "./build/src/libllama.so" # CMakeLists.txt: BUILD_SHARED_LIBS ON
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
self.free()
def __init__(self, path_llama_h: str | None = None, path_includes: list[str] = [], path_libllama: str | None = None):
path_llama_h = path_llama_h or self.DEFAULT_PATH_LLAMA_H
path_includes = path_includes or self.DEFAULT_PATH_INCLUDES
@ -495,6 +490,7 @@ def compare_tokenizers(tokenizer1: TokenizerGroundtruth, tokenizer2: TokenizerLl
except Exception as e:
logger.exception(f"An error occurred during tokenizer comparison: {e}")
def main(argv: list[str] | None = None):
parser = argparse.ArgumentParser()
parser.add_argument("vocab_file", type=str, help="path to vocab 'gguf' file")