chore: Fix and update comments

This commit is contained in:
teleprint-me 2024-05-20 14:59:40 -04:00
parent 2fa2c7a86c
commit 5978bb007d
No known key found for this signature in database
GPG key ID: B0D11345E65C4D48

View file

@ -964,6 +964,9 @@ class LLaMaVocabType(IntEnum):
WPM = auto() # WordPiece BERT tokenizer WPM = auto() # WordPiece BERT tokenizer
#
# LLaMa Model Types
#
class LLaMaModelType(IntEnum): class LLaMaModelType(IntEnum):
UNK = auto() # Unsupported file type UNK = auto() # Unsupported file type
PTH = auto() # PyTorch file type PTH = auto() # PyTorch file type
@ -971,13 +974,14 @@ class LLaMaModelType(IntEnum):
# #
# LLaMa Tokenizer Map # HuggingFace Model Map
# #
# NOTE: # NOTE:
# - Repository paths are required # - Repository paths are required
# - Allow the user to specify the tokenizer model type themselves # - Allow the user to specify the tokenizer model type themselves
# - Use architecture types because they are explicitly defined # - Use architecture types because they are explicitly defined
# - Possible tokenizer model types are: SentencePiece, WordPiece, or BytePair # - Possible algorithms are WordLevel, BPE, WordPiece, or Unigram
# - Possible LLaMa tokenizer model types are: None, SPM, BPE, or WPM
HF_MODEL_MAP = ( HF_MODEL_MAP = (
{"model_arch": MODEL_ARCH.LLAMA, "vocab_type": LLaMaVocabType.SPM, "repo": "meta-llama/Llama-2-7b-hf", }, {"model_arch": MODEL_ARCH.LLAMA, "vocab_type": LLaMaVocabType.SPM, "repo": "meta-llama/Llama-2-7b-hf", },
{"model_arch": MODEL_ARCH.LLAMA, "vocab_type": LLaMaVocabType.BPE, "repo": "meta-llama/Meta-Llama-3-8B", }, {"model_arch": MODEL_ARCH.LLAMA, "vocab_type": LLaMaVocabType.BPE, "repo": "meta-llama/Meta-Llama-3-8B", },