From 7a45a13e3d0a96f8905f9b3e398e442682272f49 Mon Sep 17 00:00:00 2001 From: mudler Date: Mon, 19 Jun 2023 18:42:36 +0200 Subject: [PATCH] Move booleans at the bottom of the structure Signed-off-by: mudler --- llama.cpp | 14 +++++++------- llama.h | 16 ++++++++-------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/llama.cpp b/llama.cpp index eea1cd99d..17a4214bf 100644 --- a/llama.cpp +++ b/llama.cpp @@ -925,13 +925,6 @@ static bool kv_cache_init( struct llama_context_params llama_context_default_params() { struct llama_context_params result = { - /*.low_vram =*/ false, - /*.f16_kv =*/ true, - /*.logits_all =*/ false, - /*.vocab_only =*/ false, - /*.use_mmap =*/ true, - /*.use_mlock =*/ false, - /*.embedding =*/ false, /*.seed =*/ -1, /*.n_ctx =*/ 512, /*.n_batch =*/ 512, @@ -940,6 +933,13 @@ struct llama_context_params llama_context_default_params() { /*.tensor_split =*/ {0}, /*.progress_callback =*/ nullptr, /*.progress_callback_user_data =*/ nullptr, + /*.low_vram =*/ false, + /*.f16_kv =*/ true, + /*.logits_all =*/ false, + /*.vocab_only =*/ false, + /*.use_mmap =*/ true, + /*.use_mlock =*/ false, + /*.embedding =*/ false, }; return result; diff --git a/llama.h b/llama.h index c41873ff0..b9ee593ad 100644 --- a/llama.h +++ b/llama.h @@ -72,14 +72,6 @@ extern "C" { typedef void (*llama_progress_callback)(float progress, void *ctx); struct llama_context_params { - bool low_vram; // if true, reduce VRAM usage at the cost of performance - bool f16_kv; // use fp16 for KV cache - bool logits_all; // the llama_eval() call computes all logits, not just the last one - bool vocab_only; // only load the vocabulary, no weights - bool use_mmap; // use mmap if possible - bool use_mlock; // force system to keep model in RAM - bool embedding; // embedding mode only - int seed; // RNG seed, -1 for random int n_ctx; // text context int n_batch; // prompt processing batch size @@ -90,6 +82,14 @@ extern "C" { llama_progress_callback progress_callback; // context pointer passed to the progress callback void * progress_callback_user_data; + + bool low_vram; // if true, reduce VRAM usage at the cost of performance + bool f16_kv; // use fp16 for KV cache + bool logits_all; // the llama_eval() call computes all logits, not just the last one + bool vocab_only; // only load the vocabulary, no weights + bool use_mmap; // use mmap if possible + bool use_mlock; // force system to keep model in RAM + bool embedding; // embedding mode only }; // model file types enum llama_ftype {