From d291c742534fdb6f4626283b9b4c2ad105d5a803 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 16 Dec 2024 21:45:25 +0200 Subject: [PATCH] llama : handle no-vocab detokenization --- src/llama-vocab.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index e38e59853..7f2725f94 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -1867,6 +1867,10 @@ int32_t llama_detokenize_impl( int32_t text_len_max, bool remove_special, bool unparse_special) { + if (vocab.type == LLAMA_VOCAB_TYPE_NONE) { + return 0; + } + GGML_ASSERT(vocab.tokenizer && "Tokenizer not initialized. Call llama_vocab::init_tokenizer() first."); int32_t avail = text_len_max;