diff --git a/include/llama.h b/include/llama.h
index 2591edce9..c5b618292 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -652,11 +652,6 @@ extern "C" {
     // State / sessions
     //
 
-    // hack
-    void llama_set_logits_all(
-        struct llama_context * ctx,
-        bool logits_all);
-
     // Returns the maximum size in bytes of the state (rng, logits, embedding
     // and kv_cache) - will often be smaller after compacting tokens
     LLAMA_API size_t llama_state_get_size(const struct llama_context * ctx);
diff --git a/src/llama.cpp b/src/llama.cpp
index 88ca14db9..307bcef84 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -13106,13 +13106,6 @@ static void llama_graph_compute(
     // fprintf(stderr, "splits: %d\n", ggml_backend_sched_get_n_splits(lctx.sched));
 }
 
-void llama_set_logits_all(
-    struct llama_context * ctx,
-    bool logits_all
-) {
-    ctx->logits_all = logits_all;
-}
-
 // decode a batch of tokens by evaluating the transformer
 //
 //   - lctx:      llama context