llama : towards llama3 tokenization support (wip)

2024-04-26 14:55:03 +03:00 · 2024-04-26 14:55:03 +03:00 · 4907e41aa7
commit 4907e41aa7
parent ed42711b90
8 changed files with 298 additions and 121 deletions
--- a/llama.h
+++ b/llama.h
@ -67,8 +67,6 @@ extern "C" {
        LLAMA_VOCAB_TYPE_SPM  = 1, // LLaMA tokenizer based on byte-level BPE with byte fallback
        LLAMA_VOCAB_TYPE_BPE  = 2, // GPT-2 tokenizer based on byte-level BPE
        LLAMA_VOCAB_TYPE_WPM  = 3, // BERT tokenizer based on WordPiece
-        LLAMA_VOCAB_TYPE_DEEPSEEKCODER = 4, // Deepseek Coder
-        LLAMA_VOCAB_TYPE_DEEPSEEKLLM   = 5, // Deepseek LLM
    };

    // note: these values should be synchronized with ggml_rope