llama : add pre-tokenizer regexes for BLOOM and gpt3-finnish (#8850)

This commit is contained in:
Esko Toivonen 2024-08-15 10:17:12 +03:00 committed by GitHub
parent d5492f0525
commit 6bda7ce6c3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 19 additions and 1 deletions

View file

@ -93,6 +93,8 @@ extern "C" {
LLAMA_VOCAB_PRE_TYPE_TEKKEN = 20,
LLAMA_VOCAB_PRE_TYPE_SMOLLM = 21,
LLAMA_VOCAB_PRE_TYPE_CODESHELL = 22,
LLAMA_VOCAB_PRE_TYPE_BLOOM = 23,
LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24,
};
enum llama_rope_type {