From f42285f0e5f2a21a46fa0310a30d71a7b8ebbd47 Mon Sep 17 00:00:00 2001
From: fmz <quic_fzaghlou@quic.com>
Date: Fri, 28 Jun 2024 07:10:59 -0700
Subject: [PATCH] remove hack

---
 include/llama.h | 5 -----
 src/llama.cpp   | 7 -------
 2 files changed, 12 deletions(-)

diff --git a/include/llama.h b/include/llama.h
index 2591edce9..c5b618292 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -652,11 +652,6 @@ extern "C" {
     // State / sessions
     //
 
-    // hack
-    void llama_set_logits_all(
-        struct llama_context * ctx,
-        bool logits_all);
-
     // Returns the maximum size in bytes of the state (rng, logits, embedding
     // and kv_cache) - will often be smaller after compacting tokens
     LLAMA_API size_t llama_state_get_size(const struct llama_context * ctx);
diff --git a/src/llama.cpp b/src/llama.cpp
index 88ca14db9..307bcef84 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -13106,13 +13106,6 @@ static void llama_graph_compute(
     // fprintf(stderr, "splits: %d\n", ggml_backend_sched_get_n_splits(lctx.sched));
 }
 
-void llama_set_logits_all(
-    struct llama_context * ctx,
-    bool logits_all
-) {
-    ctx->logits_all = logits_all;
-}
-
 // decode a batch of tokens by evaluating the transformer
 //
 //   - lctx:      llama context