Move booleans at the bottom of the structure

Signed-off-by: mudler <mudler@localai.io>
2023-06-19 18:42:36 +02:00 · 2023-06-19 18:42:36 +02:00 · 7a45a13e3d
commit 7a45a13e3d
parent 807d1705db
2 changed files with 15 additions and 15 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -925,13 +925,6 @@ static bool kv_cache_init(

 struct llama_context_params llama_context_default_params() {
    struct llama_context_params result = {
-        /*.low_vram                    =*/ false,
-        /*.f16_kv                      =*/ true,
-        /*.logits_all                  =*/ false,
-        /*.vocab_only                  =*/ false,
-        /*.use_mmap                    =*/ true,
-        /*.use_mlock                   =*/ false,
-        /*.embedding                   =*/ false,
        /*.seed                        =*/ -1,
        /*.n_ctx                       =*/ 512,
        /*.n_batch                     =*/ 512,
@ -940,6 +933,13 @@ struct llama_context_params llama_context_default_params() {
        /*.tensor_split                =*/ {0},
        /*.progress_callback           =*/ nullptr,
        /*.progress_callback_user_data =*/ nullptr,
+        /*.low_vram                    =*/ false,
+        /*.f16_kv                      =*/ true,
+        /*.logits_all                  =*/ false,
+        /*.vocab_only                  =*/ false,
+        /*.use_mmap                    =*/ true,
+        /*.use_mlock                   =*/ false,
+        /*.embedding                   =*/ false,
    };

    return result;
--- a/llama.h
+++ b/llama.h
@ -72,14 +72,6 @@ extern "C" {
    typedef void (*llama_progress_callback)(float progress, void *ctx);

   struct llama_context_params {
-        bool low_vram;                         // if true, reduce VRAM usage at the cost of performance
-        bool f16_kv;     // use fp16 for KV cache
-        bool logits_all; // the llama_eval() call computes all logits, not just the last one
-        bool vocab_only; // only load the vocabulary, no weights
-        bool use_mmap;   // use mmap if possible
-        bool use_mlock;  // force system to keep model in RAM
-        bool embedding;  // embedding mode only
-
        int seed;                              // RNG seed, -1 for random
        int n_ctx;                             // text context
        int n_batch;                           // prompt processing batch size
@ -90,6 +82,14 @@ extern "C" {
        llama_progress_callback progress_callback;
        // context pointer passed to the progress callback
        void * progress_callback_user_data;
+
+        bool low_vram;   // if true, reduce VRAM usage at the cost of performance
+        bool f16_kv;     // use fp16 for KV cache
+        bool logits_all; // the llama_eval() call computes all logits, not just the last one
+        bool vocab_only; // only load the vocabulary, no weights
+        bool use_mmap;   // use mmap if possible
+        bool use_mlock;  // force system to keep model in RAM
+        bool embedding;  // embedding mode only
    };
    // model file types
    enum llama_ftype {