diff --git a/Makefile b/Makefile index 178ccb914..50f1b8f39 100644 --- a/Makefile +++ b/Makefile @@ -327,8 +327,8 @@ k_quants.o: k_quants.c k_quants.h endif # LLAMA_NO_K_QUANTS ifndef LLAMA_NO_SKIP_UNUSED_LOGITS - CFLAGS += -DLLAMA_SKIP_UNUSED_LOGITS - CXXFLAGS += -DLLAMA_SKIP_UNUSED_LOGITS + CFLAGS += -DLLAMA_SKIP_UNUSED_LOGITS + CXXFLAGS += -DLLAMA_SKIP_UNUSED_LOGITS endif # diff --git a/llama.cpp b/llama.cpp index 8e4ae9fbb..0de4aaa2f 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2317,8 +2317,7 @@ static struct ggml_cgraph * llm_build_llama( } #ifdef LLAMA_SKIP_UNUSED_LOGITS - if (il == n_layer - 1 && !lctx.logits_all) - { + if (il == n_layer - 1 && !lctx.logits_all) { // From here on, we only care about the last token and its logits. // We do as if N = 1 (from the end), which means we only keep // the last column of cur and inpSA ((n_embd, N) -> (n_embd, 1)).