diff --git a/examples/batched-bench/batched-bench.cpp b/examples/batched-bench/batched-bench.cpp
index 2924d8116..e7a83198d 100644
--- a/examples/batched-bench/batched-bench.cpp
+++ b/examples/batched-bench/batched-bench.cpp
@@ -140,13 +140,13 @@ int main(int argc, char ** argv) {
             const int32_t n_tokens = std::min(n_batch, (int32_t) (batch.n_tokens - i));
 
             llama_batch batch_view = {
-                n_tokens,
                 batch.token    + i,
                 nullptr,
                 batch.pos      + i,
                 batch.n_seq_id + i,
                 batch.seq_id   + i,
                 batch.logits   + i,
+                n_tokens,
                 0, 0, 0, // unused
             };
 
diff --git a/examples/llava/llava.cpp b/examples/llava/llava.cpp
index 9a990bb18..5da101105 100644
--- a/examples/llava/llava.cpp
+++ b/examples/llava/llava.cpp
@@ -338,7 +338,7 @@ bool llava_eval_image_embed(llama_context * ctx_llama, const struct llava_image_
         if (n_eval > n_batch) {
             n_eval = n_batch;
         }
-        llama_batch batch = {int32_t(n_eval), nullptr, (image_embed->embed+i*n_embd), nullptr, nullptr, nullptr, nullptr, *n_past, 1, 0, };
+        llama_batch batch = {nullptr, (image_embed->embed+i*n_embd), nullptr, nullptr, nullptr, nullptr, int32_t(n_eval), *n_past, 1, 0, };
         if (llama_decode(ctx_llama, batch)) {
             LOG_TEE("%s : failed to eval\n", __func__);
             return false;
diff --git a/examples/parallel/parallel.cpp b/examples/parallel/parallel.cpp
index 7c5595d6e..a91fa8294 100644
--- a/examples/parallel/parallel.cpp
+++ b/examples/parallel/parallel.cpp
@@ -301,13 +301,13 @@ int main(int argc, char ** argv) {
             const int32_t n_tokens = std::min(n_batch, (int32_t) (batch.n_tokens - i));
 
             llama_batch batch_view = {
-                n_tokens,
                 batch.token    + i,
                 nullptr,
                 batch.pos      + i,
                 batch.n_seq_id + i,
                 batch.seq_id   + i,
                 batch.logits   + i,
+                n_tokens,
                 0, 0, 0, // unused
             };
 
diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp
index db6e0949d..15bdc725f 100644
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@@ -691,13 +691,13 @@ static bool decode_helper(llama_context * ctx, llama_batch & batch, std::vector<
         const int32_t n_tokens = std::min(n_batch, (int32_t) (batch.n_tokens - i));
 
         llama_batch batch_view = {
-            n_tokens,
             batch.token    + i,
             nullptr,
             batch.pos      + i,
             batch.n_seq_id + i,
             batch.seq_id   + i,
             batch.logits   + i,
+            n_tokens,
             0, 0, 0, // unused
         };
 
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index ceaeb1f76..40b20bf33 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1072,13 +1072,13 @@ struct server_context {
             for (int32_t i = 0; i < batch.n_tokens; i += n_batch) {
                 const int32_t n_tokens = std::min(params.n_batch, batch.n_tokens - i);
                 llama_batch batch_view = {
-                    n_tokens,
                     batch.token    + i,
                     nullptr,
                     batch.pos      + i,
                     batch.n_seq_id + i,
                     batch.seq_id   + i,
                     batch.logits   + i,
+                    n_tokens,
                     0, 0, 0, // unused
                 };
 
@@ -2195,13 +2195,13 @@ struct server_context {
             }
 
             llama_batch batch_view = {
-                n_tokens,
                 batch.token    + i,
                 nullptr,
                 batch.pos      + i,
                 batch.n_seq_id + i,
                 batch.seq_id   + i,
                 batch.logits   + i,
+                n_tokens,
                 0, 0, 0, // unused
             };
 
diff --git a/ggml-alloc.c b/ggml-alloc.c
index 1fbd376ed..e2998defb 100644
--- a/ggml-alloc.c
+++ b/ggml-alloc.c
@@ -334,8 +334,8 @@ struct hash_node {
     int n_children;
     int n_views;
     int buffer_id;
-    size_t offset; // offset within the buffer
     bool allocated;
+    size_t offset; // offset within the buffer
 };
 
 struct tensor_alloc {
diff --git a/ggml.c b/ggml.c
index b96a82a41..1937a6f86 100644
--- a/ggml.c
+++ b/ggml.c
@@ -19149,8 +19149,8 @@ struct ggml_compute_state_shared {
 struct ggml_compute_state {
     ggml_thread_t thrd;
     int ith;
-    struct ggml_compute_state_shared * shared;
     enum ggml_status ec;
+    struct ggml_compute_state_shared * shared;
 };
 
 static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const struct ggml_compute_state_shared * st) {
@@ -21706,8 +21706,8 @@ struct gguf_header {
 struct gguf_tensor_info {
     struct gguf_str name;
 
-    uint32_t n_dims;
     uint64_t ne[GGML_MAX_DIMS];
+    uint32_t n_dims;
 
     enum ggml_type type;
 
diff --git a/ggml.h b/ggml.h
index 3fe95ed57..261792bdf 100644
--- a/ggml.h
+++ b/ggml.h
@@ -2412,9 +2412,9 @@ extern "C" {
 
     typedef struct {
         const char      * type_name;
+        bool              is_quantized;
         int               blck_size;
         size_t            type_size;
-        bool              is_quantized;
         ggml_to_float_t   to_float;
         ggml_from_float_t from_float;
         ggml_from_float_t from_float_reference;
diff --git a/llama.cpp b/llama.cpp
index e91ad7285..e1fa10b5c 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -11428,13 +11428,13 @@ static int llama_decode_internal(
     for (uint32_t cur_token = 0; cur_token < n_tokens_all; cur_token += n_ubatch) {
         const uint32_t n_tokens = std::min(n_ubatch, n_tokens_all - cur_token);
         llama_batch u_batch = {
-            /* .n_tokens   = */ (int32_t) n_tokens,
             /* .token      = */ batch_all.token     ? batch_all.token    + cur_token        : nullptr,
             /* .embd       = */ batch_all.embd      ? batch_all.embd     + cur_token*n_embd : nullptr,
             /* .pos        = */ batch_all.pos       ? batch_all.pos      + cur_token        : nullptr,
             /* .n_seq_id   = */ batch_all.n_seq_id  ? batch_all.n_seq_id + cur_token        : nullptr,
             /* .seq_id     = */ batch_all.seq_id    ? batch_all.seq_id   + cur_token        : nullptr,
             /* .logits     = */ batch_all.logits    ? batch_all.logits   + cur_token        : nullptr,
+            /* .n_tokens   = */ (int32_t) n_tokens,
             /* .all_pos_0  = */ batch_all.all_pos_0 + (llama_pos) cur_token*batch_all.all_pos_1,
             /* .all_pos_1  = */ batch_all.all_pos_1,
             /* .all_seq_id = */ batch_all.all_seq_id,
@@ -15310,13 +15310,13 @@ static int llama_apply_lora_from_file_internal(
 //
 struct llama_model_params llama_model_default_params() {
     struct llama_model_params result = {
-        /*.n_gpu_layers                =*/ 0,
-        /*.split_mode                  =*/ LLAMA_SPLIT_MODE_LAYER,
-        /*.main_gpu                    =*/ 0,
         /*.tensor_split                =*/ nullptr,
         /*.progress_callback           =*/ nullptr,
         /*.progress_callback_user_data =*/ nullptr,
         /*.kv_overrides                =*/ nullptr,
+        /*.n_gpu_layers                =*/ 0,
+        /*.split_mode                  =*/ LLAMA_SPLIT_MODE_LAYER,
+        /*.main_gpu                    =*/ 0,
         /*.vocab_only                  =*/ false,
         /*.use_mmap                    =*/ true,
         /*.use_mlock                   =*/ false,
@@ -17293,13 +17293,13 @@ struct llama_batch llama_batch_get_one(
                llama_pos   pos_0,
             llama_seq_id   seq_id) {
     return {
-        /*n_tokens       =*/ n_tokens,
         /*tokens         =*/ tokens,
         /*embd           =*/ nullptr,
         /*pos            =*/ nullptr,
         /*n_seq_id       =*/ nullptr,
         /*seq_id         =*/ nullptr,
         /*logits         =*/ nullptr,
+        /*n_tokens       =*/ n_tokens,
         /*all_pos_0      =*/ pos_0,
         /*all_pos_1      =*/ 1,
         /*all_seq_id     =*/ seq_id,
@@ -17307,7 +17307,7 @@ struct llama_batch llama_batch_get_one(
 }
 
 struct llama_batch llama_batch_init(int32_t n_tokens_alloc, int32_t embd, int32_t n_seq_max) {
-    llama_batch batch = { 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, 0, 0, 0, };
+    llama_batch batch = { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, 0, 0, 0, 0, };
 
     if (embd) {
         batch.embd = (float *) malloc(sizeof(float) * n_tokens_alloc * embd);
diff --git a/llama.h b/llama.h
index 0b2e708d0..502319eec 100644
--- a/llama.h
+++ b/llama.h
@@ -190,8 +190,6 @@ extern "C" {
     // - logits : if zero, the logits (and/or the embeddings) for the respective token will not be output
     //
     typedef struct llama_batch {
-        int32_t n_tokens;
-
         llama_token  *  token;
         float        *  embd;
         llama_pos    *  pos;
@@ -199,6 +197,7 @@ extern "C" {
         llama_seq_id ** seq_id;
         int8_t       *  logits; // TODO: rename this to "output"
 
+        int32_t         n_tokens;
         // NOTE: helpers for smooth API transition - can be deprecated in the future
         //       for future-proof code, use the above fields instead and ignore everything below
         //
@@ -230,15 +229,6 @@ extern "C" {
     };
 
     struct llama_model_params {
-        int32_t n_gpu_layers; // number of layers to store in VRAM
-        enum llama_split_mode split_mode; // how to split the model across multiple GPUs
-
-        // main_gpu interpretation depends on split_mode:
-        // LLAMA_SPLIT_NONE: the GPU that is used for the entire model
-        // LLAMA_SPLIT_ROW: the GPU that is used for small tensors and intermediate results
-        // LLAMA_SPLIT_LAYER: ignored
-        int32_t main_gpu;
-
         // proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices()
         const float * tensor_split;
 
@@ -253,6 +243,15 @@ extern "C" {
         // override key-value pairs of the model meta data
         const struct llama_model_kv_override * kv_overrides;
 
+        int32_t n_gpu_layers; // number of layers to store in VRAM
+        enum llama_split_mode split_mode; // how to split the model across multiple GPUs
+
+        // main_gpu interpretation depends on split_mode:
+        // LLAMA_SPLIT_NONE: the GPU that is used for the entire model
+        // LLAMA_SPLIT_ROW: the GPU that is used for small tensors and intermediate results
+        // LLAMA_SPLIT_LAYER: ignored
+        int32_t main_gpu;
+
         // Keep the booleans together to avoid misalignment during copy-by-value.
         bool vocab_only;    // only load the vocabulary, no weights
         bool use_mmap;      // use mmap if possible