From ecad2afbddde4b260bc14974fbb70a053ef5a8b3 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 10 Mar 2024 17:55:32 +0200 Subject: [PATCH] llama : minor --- llama.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index ed56a694d..249442166 100644 --- a/llama.cpp +++ b/llama.cpp @@ -3940,6 +3940,7 @@ static void llm_load_print_meta(llama_model_loader & ml, llama_model & model) { LLAMA_LOG_INFO("%s: n_ff = %u\n", __func__, hparams.n_ff); LLAMA_LOG_INFO("%s: n_expert = %u\n", __func__, hparams.n_expert); LLAMA_LOG_INFO("%s: n_expert_used = %u\n", __func__, hparams.n_expert_used); + LLAMA_LOG_INFO("%s: causal attm = %d\n", __func__, hparams.causal_attn); LLAMA_LOG_INFO("%s: pooling type = %d\n", __func__, hparams.pooling_type); LLAMA_LOG_INFO("%s: rope type = %d\n", __func__, hparams.rope_type); LLAMA_LOG_INFO("%s: rope scaling = %s\n", __func__, rope_scaling_type); @@ -8539,7 +8540,6 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) { ); // NOTE: hparams.causal_attn indicates the model is capable of generation and uses the kv cache. - // But if cparams.embeddings is set, the attention will be non-causal nonetheless. if (cparams.causal_attn) { const int64_t n_kv = kv_self.n; const int64_t n_tokens = batch.n_tokens; @@ -12747,6 +12747,7 @@ struct llama_context * llama_new_context_with_model( } cparams.causal_attn = hparams.causal_attn; + if (cparams.pooling_type == LLAMA_POOLING_TYPE_UNSPECIFIED) { if (hparams.pooling_type == LLAMA_POOLING_TYPE_UNSPECIFIED) { cparams.pooling_type = LLAMA_POOLING_TYPE_NONE;