diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 360f571e4..e576b65a0 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -693,7 +693,10 @@ struct server_context { n_ctx = llama_n_ctx(ctx); add_bos_token = llama_should_add_bos_token(model); - GGML_ASSERT(llama_add_eos_token(model) != 1); + + if (!llama_model_has_encoder(model)) { + GGML_ASSERT(llama_add_eos_token(model) != 1); + } return true; } diff --git a/src/llama.cpp b/src/llama.cpp index aaf8db496..3ea822652 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -17190,6 +17190,7 @@ bool llama_model_has_encoder(const struct llama_model * model) { switch (model->arch) { case LLM_ARCH_T5: return true; case LLM_ARCH_T5ENCODER: return true; + case LLM_ARCH_JINA_BERT_V2: return true; default: return false; } }