chore: Fix and update comments
This commit is contained in:
parent
2fa2c7a86c
commit
5978bb007d
1 changed files with 6 additions and 2 deletions
|
@ -964,6 +964,9 @@ class LLaMaVocabType(IntEnum):
|
||||||
WPM = auto() # WordPiece BERT tokenizer
|
WPM = auto() # WordPiece BERT tokenizer
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# LLaMa Model Types
|
||||||
|
#
|
||||||
class LLaMaModelType(IntEnum):
|
class LLaMaModelType(IntEnum):
|
||||||
UNK = auto() # Unsupported file type
|
UNK = auto() # Unsupported file type
|
||||||
PTH = auto() # PyTorch file type
|
PTH = auto() # PyTorch file type
|
||||||
|
@ -971,13 +974,14 @@ class LLaMaModelType(IntEnum):
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# LLaMa Tokenizer Map
|
# HuggingFace Model Map
|
||||||
#
|
#
|
||||||
# NOTE:
|
# NOTE:
|
||||||
# - Repository paths are required
|
# - Repository paths are required
|
||||||
# - Allow the user to specify the tokenizer model type themselves
|
# - Allow the user to specify the tokenizer model type themselves
|
||||||
# - Use architecture types because they are explicitly defined
|
# - Use architecture types because they are explicitly defined
|
||||||
# - Possible tokenizer model types are: SentencePiece, WordPiece, or BytePair
|
# - Possible algorithms are WordLevel, BPE, WordPiece, or Unigram
|
||||||
|
# - Possible LLaMa tokenizer model types are: None, SPM, BPE, or WPM
|
||||||
HF_MODEL_MAP = (
|
HF_MODEL_MAP = (
|
||||||
{"model_arch": MODEL_ARCH.LLAMA, "vocab_type": LLaMaVocabType.SPM, "repo": "meta-llama/Llama-2-7b-hf", },
|
{"model_arch": MODEL_ARCH.LLAMA, "vocab_type": LLaMaVocabType.SPM, "repo": "meta-llama/Llama-2-7b-hf", },
|
||||||
{"model_arch": MODEL_ARCH.LLAMA, "vocab_type": LLaMaVocabType.BPE, "repo": "meta-llama/Meta-Llama-3-8B", },
|
{"model_arch": MODEL_ARCH.LLAMA, "vocab_type": LLaMaVocabType.BPE, "repo": "meta-llama/Meta-Llama-3-8B", },
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue