llama : towards llama3 tokenization support (wip)
This commit is contained in:
parent
ed42711b90
commit
4907e41aa7
8 changed files with 298 additions and 121 deletions
2
llama.h
2
llama.h
|
@ -67,8 +67,6 @@ extern "C" {
|
|||
LLAMA_VOCAB_TYPE_SPM = 1, // LLaMA tokenizer based on byte-level BPE with byte fallback
|
||||
LLAMA_VOCAB_TYPE_BPE = 2, // GPT-2 tokenizer based on byte-level BPE
|
||||
LLAMA_VOCAB_TYPE_WPM = 3, // BERT tokenizer based on WordPiece
|
||||
LLAMA_VOCAB_TYPE_DEEPSEEKCODER = 4, // Deepseek Coder
|
||||
LLAMA_VOCAB_TYPE_DEEPSEEKLLM = 5, // Deepseek LLM
|
||||
};
|
||||
|
||||
// note: these values should be synchronized with ggml_rope
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue