From 7a45a13e3d0a96f8905f9b3e398e442682272f49 Mon Sep 17 00:00:00 2001
From: mudler <mudler@localai.io>
Date: Mon, 19 Jun 2023 18:42:36 +0200
Subject: [PATCH] Move booleans at the bottom of the structure

Signed-off-by: mudler <mudler@localai.io>
---
 llama.cpp | 14 +++++++-------
 llama.h   | 16 ++++++++--------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/llama.cpp b/llama.cpp
index eea1cd99d..17a4214bf 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -925,13 +925,6 @@ static bool kv_cache_init(
 
 struct llama_context_params llama_context_default_params() {
     struct llama_context_params result = {
-        /*.low_vram                    =*/ false,
-        /*.f16_kv                      =*/ true,
-        /*.logits_all                  =*/ false,
-        /*.vocab_only                  =*/ false,
-        /*.use_mmap                    =*/ true,
-        /*.use_mlock                   =*/ false,
-        /*.embedding                   =*/ false,
         /*.seed                        =*/ -1,
         /*.n_ctx                       =*/ 512,
         /*.n_batch                     =*/ 512,
@@ -940,6 +933,13 @@ struct llama_context_params llama_context_default_params() {
         /*.tensor_split                =*/ {0},
         /*.progress_callback           =*/ nullptr,
         /*.progress_callback_user_data =*/ nullptr,
+        /*.low_vram                    =*/ false,
+        /*.f16_kv                      =*/ true,
+        /*.logits_all                  =*/ false,
+        /*.vocab_only                  =*/ false,
+        /*.use_mmap                    =*/ true,
+        /*.use_mlock                   =*/ false,
+        /*.embedding                   =*/ false,
     };
 
     return result;
diff --git a/llama.h b/llama.h
index c41873ff0..b9ee593ad 100644
--- a/llama.h
+++ b/llama.h
@@ -72,14 +72,6 @@ extern "C" {
     typedef void (*llama_progress_callback)(float progress, void *ctx);
 
    struct llama_context_params {
-        bool low_vram;                         // if true, reduce VRAM usage at the cost of performance
-        bool f16_kv;     // use fp16 for KV cache
-        bool logits_all; // the llama_eval() call computes all logits, not just the last one
-        bool vocab_only; // only load the vocabulary, no weights
-        bool use_mmap;   // use mmap if possible
-        bool use_mlock;  // force system to keep model in RAM
-        bool embedding;  // embedding mode only
-
         int seed;                              // RNG seed, -1 for random
         int n_ctx;                             // text context
         int n_batch;                           // prompt processing batch size
@@ -90,6 +82,14 @@ extern "C" {
         llama_progress_callback progress_callback;
         // context pointer passed to the progress callback
         void * progress_callback_user_data;
+
+        bool low_vram;   // if true, reduce VRAM usage at the cost of performance
+        bool f16_kv;     // use fp16 for KV cache
+        bool logits_all; // the llama_eval() call computes all logits, not just the last one
+        bool vocab_only; // only load the vocabulary, no weights
+        bool use_mmap;   // use mmap if possible
+        bool use_mlock;  // force system to keep model in RAM
+        bool embedding;  // embedding mode only
     };
     // model file types
     enum llama_ftype {