change funtion name

This commit is contained in:
wonjun Jang 2023-11-22 19:54:04 +09:00 committed by GitHub
parent 2e263ca200
commit 5ac1949fff
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -338,7 +338,7 @@ class VocabLoader:
self.fname_tokenizer = fname_tokenizer self.fname_tokenizer = fname_tokenizer
vocab_file = "tokenizer.model" vocab_file = "tokenizer.model"
path_candidate = vocab_check_and_append_path(self.fname_tokenizer, vocab_file) path_candidate = find_vocab_file_path(self.fname_tokenizer, vocab_file)
if path_candidate is not None: if path_candidate is not None:
self.spm = SentencePieceProcessor(str(path_candidate)) self.spm = SentencePieceProcessor(str(path_candidate))
print(self.spm.vocab_size(), self.vocab_size_base) print(self.spm.vocab_size(), self.vocab_size_base)
@ -407,19 +407,19 @@ class VocabLoader:
def get_vocab_type(self) -> str: def get_vocab_type(self) -> str:
path_candidates = [] path_candidates = []
vocab_file = "tokenizer.model" vocab_file = "tokenizer.model"
path_candidate = vocab_check_and_append_path(self.fname_tokenizer, vocab_file) path_candidate = find_vocab_file_path(self.fname_tokenizer, vocab_file)
if path_candidate is not None: if path_candidate is not None:
return "llama" return "llama"
path_candidates.append(path_candidate) path_candidates.append(path_candidate)
vocab_file = "vocab.json" vocab_file = "vocab.json"
path_candidate = vocab_check_and_append_path(self.fname_tokenizer, vocab_file) path_candidate = find_vocab_file_path(self.fname_tokenizer, vocab_file)
if path_candidate is not None: if path_candidate is not None:
return "gpt2" return "gpt2"
path_candidates.append(path_candidate) path_candidates.append(path_candidate)
vocab_file = "tokenizer.json" vocab_file = "tokenizer.json"
path_candidate = vocab_check_and_append_path(self.fname_tokenizer, vocab_file) path_candidate = find_vocab_file_path(self.fname_tokenizer, vocab_file)
if path_candidate: if path_candidate:
if not self.has_newline_token(): if not self.has_newline_token():
return "gpt2" return "gpt2"
@ -1091,7 +1091,7 @@ def load_some_model(path: Path) -> ModelPlus:
return model_plus return model_plus
def vocab_check_and_append_path(path: Path, vocab_file: str) -> bool: def find_vocab_file_path(path: Path, vocab_file: str) -> Optional[Path]:
path2 = path / vocab_file path2 = path / vocab_file
# Use `.parent` instead of /.. to handle the symlink case better. # Use `.parent` instead of /.. to handle the symlink case better.
path3 = path.parent / vocab_file path3 = path.parent / vocab_file