squash! llama : add getters for n_threads/n_threads_batch

Rename the getters to llama_n_threads and llama_n_threads_batch.

Signed-off-by: Daniel Bevenius <daniel.bevenius@gmail.com>
This commit is contained in:
Daniel Bevenius 2024-05-22 16:17:00 +02:00
parent 43f1d316f5
commit 43bcb50f13
Failed to extract signature
2 changed files with 4 additions and 4 deletions

View file

@ -17183,11 +17183,11 @@ void llama_set_n_threads(struct llama_context * ctx, uint32_t n_threads, uint32_
ctx->cparams.n_threads_batch = n_threads_batch;
}
uint32_t llama_get_n_threads(struct llama_context * ctx) {
uint32_t llama_n_threads(struct llama_context * ctx) {
return ctx->cparams.n_threads;
}
uint32_t llama_get_n_threads_batch(struct llama_context * ctx) {
uint32_t llama_n_threads_batch(struct llama_context * ctx) {
return ctx->cparams.n_threads_batch;
}

View file

@ -760,10 +760,10 @@ extern "C" {
LLAMA_API void llama_set_n_threads(struct llama_context * ctx, uint32_t n_threads, uint32_t n_threads_batch);
// Get the number of threads used for generation of a single token.
LLAMA_API uint32_t llama_get_n_threads(struct llama_context * ctx);
LLAMA_API uint32_t llama_n_threads(struct llama_context * ctx);
// Get the number of threads used for prompt and batch processing (multiple token).
LLAMA_API uint32_t llama_get_n_threads_batch(struct llama_context * ctx);
LLAMA_API uint32_t llama_n_threads_batch(struct llama_context * ctx);
// Set whether to use causal attention or not
// If set to true, the model will only attend to the past tokens