Move booleans at the bottom of the structure

Signed-off-by: mudler <mudler@localai.io>
This commit is contained in:
mudler 2023-06-19 18:42:36 +02:00
parent 807d1705db
commit 7a45a13e3d
2 changed files with 15 additions and 15 deletions

View file

@ -925,13 +925,6 @@ static bool kv_cache_init(
struct llama_context_params llama_context_default_params() {
struct llama_context_params result = {
/*.low_vram =*/ false,
/*.f16_kv =*/ true,
/*.logits_all =*/ false,
/*.vocab_only =*/ false,
/*.use_mmap =*/ true,
/*.use_mlock =*/ false,
/*.embedding =*/ false,
/*.seed =*/ -1,
/*.n_ctx =*/ 512,
/*.n_batch =*/ 512,
@ -940,6 +933,13 @@ struct llama_context_params llama_context_default_params() {
/*.tensor_split =*/ {0},
/*.progress_callback =*/ nullptr,
/*.progress_callback_user_data =*/ nullptr,
/*.low_vram =*/ false,
/*.f16_kv =*/ true,
/*.logits_all =*/ false,
/*.vocab_only =*/ false,
/*.use_mmap =*/ true,
/*.use_mlock =*/ false,
/*.embedding =*/ false,
};
return result;

16
llama.h
View file

@ -72,14 +72,6 @@ extern "C" {
typedef void (*llama_progress_callback)(float progress, void *ctx);
struct llama_context_params {
bool low_vram; // if true, reduce VRAM usage at the cost of performance
bool f16_kv; // use fp16 for KV cache
bool logits_all; // the llama_eval() call computes all logits, not just the last one
bool vocab_only; // only load the vocabulary, no weights
bool use_mmap; // use mmap if possible
bool use_mlock; // force system to keep model in RAM
bool embedding; // embedding mode only
int seed; // RNG seed, -1 for random
int n_ctx; // text context
int n_batch; // prompt processing batch size
@ -90,6 +82,14 @@ extern "C" {
llama_progress_callback progress_callback;
// context pointer passed to the progress callback
void * progress_callback_user_data;
bool low_vram; // if true, reduce VRAM usage at the cost of performance
bool f16_kv; // use fp16 for KV cache
bool logits_all; // the llama_eval() call computes all logits, not just the last one
bool vocab_only; // only load the vocabulary, no weights
bool use_mmap; // use mmap if possible
bool use_mlock; // force system to keep model in RAM
bool embedding; // embedding mode only
};
// model file types
enum llama_ftype {