diff --git a/include/llama.h b/include/llama.h index 2591edce9..c5b618292 100644 --- a/include/llama.h +++ b/include/llama.h @@ -652,11 +652,6 @@ extern "C" { // State / sessions // - // hack - void llama_set_logits_all( - struct llama_context * ctx, - bool logits_all); - // Returns the maximum size in bytes of the state (rng, logits, embedding // and kv_cache) - will often be smaller after compacting tokens LLAMA_API size_t llama_state_get_size(const struct llama_context * ctx); diff --git a/src/llama.cpp b/src/llama.cpp index 88ca14db9..307bcef84 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -13106,13 +13106,6 @@ static void llama_graph_compute( // fprintf(stderr, "splits: %d\n", ggml_backend_sched_get_n_splits(lctx.sched)); } -void llama_set_logits_all( - struct llama_context * ctx, - bool logits_all -) { - ctx->logits_all = logits_all; -} - // decode a batch of tokens by evaluating the transformer // // - lctx: llama context