Move booleans at the bottom of the structure
Signed-off-by: mudler <mudler@localai.io>
This commit is contained in:
parent
807d1705db
commit
7a45a13e3d
2 changed files with 15 additions and 15 deletions
14
llama.cpp
14
llama.cpp
|
@ -925,13 +925,6 @@ static bool kv_cache_init(
|
|||
|
||||
struct llama_context_params llama_context_default_params() {
|
||||
struct llama_context_params result = {
|
||||
/*.low_vram =*/ false,
|
||||
/*.f16_kv =*/ true,
|
||||
/*.logits_all =*/ false,
|
||||
/*.vocab_only =*/ false,
|
||||
/*.use_mmap =*/ true,
|
||||
/*.use_mlock =*/ false,
|
||||
/*.embedding =*/ false,
|
||||
/*.seed =*/ -1,
|
||||
/*.n_ctx =*/ 512,
|
||||
/*.n_batch =*/ 512,
|
||||
|
@ -940,6 +933,13 @@ struct llama_context_params llama_context_default_params() {
|
|||
/*.tensor_split =*/ {0},
|
||||
/*.progress_callback =*/ nullptr,
|
||||
/*.progress_callback_user_data =*/ nullptr,
|
||||
/*.low_vram =*/ false,
|
||||
/*.f16_kv =*/ true,
|
||||
/*.logits_all =*/ false,
|
||||
/*.vocab_only =*/ false,
|
||||
/*.use_mmap =*/ true,
|
||||
/*.use_mlock =*/ false,
|
||||
/*.embedding =*/ false,
|
||||
};
|
||||
|
||||
return result;
|
||||
|
|
16
llama.h
16
llama.h
|
@ -72,14 +72,6 @@ extern "C" {
|
|||
typedef void (*llama_progress_callback)(float progress, void *ctx);
|
||||
|
||||
struct llama_context_params {
|
||||
bool low_vram; // if true, reduce VRAM usage at the cost of performance
|
||||
bool f16_kv; // use fp16 for KV cache
|
||||
bool logits_all; // the llama_eval() call computes all logits, not just the last one
|
||||
bool vocab_only; // only load the vocabulary, no weights
|
||||
bool use_mmap; // use mmap if possible
|
||||
bool use_mlock; // force system to keep model in RAM
|
||||
bool embedding; // embedding mode only
|
||||
|
||||
int seed; // RNG seed, -1 for random
|
||||
int n_ctx; // text context
|
||||
int n_batch; // prompt processing batch size
|
||||
|
@ -90,6 +82,14 @@ extern "C" {
|
|||
llama_progress_callback progress_callback;
|
||||
// context pointer passed to the progress callback
|
||||
void * progress_callback_user_data;
|
||||
|
||||
bool low_vram; // if true, reduce VRAM usage at the cost of performance
|
||||
bool f16_kv; // use fp16 for KV cache
|
||||
bool logits_all; // the llama_eval() call computes all logits, not just the last one
|
||||
bool vocab_only; // only load the vocabulary, no weights
|
||||
bool use_mmap; // use mmap if possible
|
||||
bool use_mlock; // force system to keep model in RAM
|
||||
bool embedding; // embedding mode only
|
||||
};
|
||||
// model file types
|
||||
enum llama_ftype {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue