diff --git a/llama.h b/llama.h
index d387ad77b..2d8ed371b 100644
--- a/llama.h
+++ b/llama.h
@@ -185,12 +185,13 @@ extern "C" {
     struct llama_model_params {
         int32_t n_gpu_layers; // number of layers to store in VRAM
         enum llama_split_mode split_mode; // how to split the model across multiple GPUs
-        
+
         // main_gpu interpretation depends on split_mode:
         // LLAMA_SPLIT_NONE: the GPU that is used for the entire model
         // LLAMA_SPLIT_ROW: the GPU that is used for small tensors and intermediate results
         // LLAMA_SPLIT_LAYER: ignored
         int32_t main_gpu;
+
         // proportion of the model (layers or rows) to offload to each GPU, size: LLAMA_MAX_DEVICES
         const float * tensor_split;