diff --git a/CMakeLists.txt b/CMakeLists.txt
index e4bd57d77..8aa36e4c3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,7 +17,7 @@ set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Release")
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
 set(LLAMA_STANDALONE ON)
 set(BUILD_SHARED_LIBS_DEFAULT ON)
-set(LLAMA_STATIC ON)
+set(LLAMA_STATIC OFF)
 set(LLAMA_NATIVE OFF)
 set(LLAMA_LTO OFF)
 set(LLAMA_ALL_WARNINGS OFF)
diff --git a/otherarch/ggml_v1.c b/otherarch/ggml_v1.c
index 5720788d6..d6af52c82 100644
--- a/otherarch/ggml_v1.c
+++ b/otherarch/ggml_v1.c
@@ -10053,10 +10053,10 @@ enum ggml_v1_opt_result ggml_v1_opt(
         struct ggml_v1_tensor * f) {
     bool free_ctx = false;
     if (ctx == NULL) {
-        struct ggml_v1_init_params params_ctx = {
-            .mem_size   = 16*1024*1024,
-            .mem_buffer = NULL,
-        };
+        struct ggml_v1_init_params params_ctx;
+        params_ctx.mem_size   = 16*1024*1024;
+        params_ctx.mem_buffer = NULL;
+       
 
         ctx = ggml_v1_init(params_ctx);
         if (ctx == NULL) {
diff --git a/otherarch/gpt2_v1.cpp b/otherarch/gpt2_v1.cpp
index e60084b34..b70f814e9 100644
--- a/otherarch/gpt2_v1.cpp
+++ b/otherarch/gpt2_v1.cpp
@@ -12,7 +12,6 @@
 #include <string>
 #include <vector>
 #include <iostream>
-#include <unistd.h>
 
 
 
@@ -137,10 +136,10 @@ ModelLoadResult legacy_gpt2_model_load(const std::string & fname, gpt2_v1_model
 
     // create the ggml context
     {
-        struct ggml_v1_init_params params = {
-            .mem_size   = ctx_size,
-            .mem_buffer = NULL,
-        };
+        struct ggml_v1_init_params params;        
+        params.mem_size   = ctx_size,
+        params.mem_buffer = NULL,
+        
 
         model.ctx = ggml_v1_init(params);
         if (!model.ctx) {
@@ -352,10 +351,10 @@ bool legacy_gpt2_eval(
         }
     }
 
-    struct ggml_v1_init_params params = {
-        .mem_size   = buf_size,
-        .mem_buffer = buf,
-    };
+    struct ggml_v1_init_params params;
+    params.mem_size   = buf_size;
+    params.mem_buffer = buf;
+    
 
     struct ggml_v1_context * ctx0 = ggml_v1_init(params);
     struct ggml_v1_cgraph gf = { .n_threads = n_threads };
diff --git a/otherarch/gpt2_v2.cpp b/otherarch/gpt2_v2.cpp
index af252a0cc..199c353b9 100644
--- a/otherarch/gpt2_v2.cpp
+++ b/otherarch/gpt2_v2.cpp
@@ -13,7 +13,6 @@
 #include <string>
 #include <vector>
 #include <iostream>
-#include <unistd.h>
 
 #include "model_adapter.h"
 
@@ -143,11 +142,11 @@ ModelLoadResult gpt2_model_load(const std::string & fname, gpt2_model & model, g
 
     // create the ggml context
     {
-        struct ggml_init_params params = {
-            .mem_size   = ctx_size,
-            .mem_buffer = NULL,
-            .no_alloc   = false,
-        };
+        struct ggml_init_params params;
+        params.mem_size   = ctx_size;
+        params.mem_buffer = NULL;
+        params.no_alloc   = false;
+       
 
         model.ctx = ggml_init(params);
         if (!model.ctx) {
@@ -370,11 +369,11 @@ bool gpt2_eval(
         }
     }
 
-    struct ggml_init_params params = {
-        .mem_size   = buf_size,
-        .mem_buffer = buf,
-        .no_alloc   = false,
-    };
+    struct ggml_init_params params;
+    params.mem_size   = buf_size;
+    params.mem_buffer = buf;
+    params.no_alloc   = false;
+    
 
     struct ggml_context * ctx0 = ggml_init(params);
     struct ggml_cgraph gf = { .n_threads = n_threads };
diff --git a/otherarch/gptj_v1.cpp b/otherarch/gptj_v1.cpp
index 2f6ae9898..3ec25d2ae 100644
--- a/otherarch/gptj_v1.cpp
+++ b/otherarch/gptj_v1.cpp
@@ -12,7 +12,6 @@
 #include <string>
 #include <vector>
 #include <iostream>
-#include <unistd.h>
 
 
 
@@ -148,10 +147,10 @@ ModelLoadResult legacy_gptj_model_load(const std::string & fname, gptj_model_v1
 
     // create the ggml context
     {
-        struct ggml_v1_init_params params = {
-            .mem_size   = ctx_size,
-            .mem_buffer = NULL,
-        };
+        struct ggml_v1_init_params params;
+        params.mem_size   = ctx_size;
+        params.mem_buffer = NULL;
+        
 
         model.ctx = ggml_v1_init(params);
         if (!model.ctx) {
@@ -402,10 +401,10 @@ bool legacy_gptj_eval(
         }
     }
 
-    struct ggml_v1_init_params params = {
-        .mem_size   = buf_size,
-        .mem_buffer = buf,
-    };
+    struct ggml_v1_init_params params;
+    params.mem_size   = buf_size;
+    params.mem_buffer = buf;
+    
 
     struct ggml_v1_context * ctx0 = ggml_v1_init(params);
     struct ggml_v1_cgraph gf = { .n_threads = n_threads };
diff --git a/otherarch/gptj_v2.cpp b/otherarch/gptj_v2.cpp
index 8ea889025..c942cd3d7 100644
--- a/otherarch/gptj_v2.cpp
+++ b/otherarch/gptj_v2.cpp
@@ -13,7 +13,6 @@
 #include <string>
 #include <vector>
 #include <iostream>
-#include <unistd.h>
 
 #include "model_adapter.h"
 
@@ -143,11 +142,11 @@ ModelLoadResult gptj_model_load(const std::string & fname, gptj_model & model, g
 
     // create the ggml context
     {
-        struct ggml_init_params params = {
-            .mem_size   = ctx_size,
-            .mem_buffer = NULL,
-            .no_alloc   = false,
-        };
+        struct ggml_init_params params;
+        params.mem_size   = ctx_size;
+        params.mem_buffer = NULL;
+        params.no_alloc   = false;
+        
 
         model.ctx = ggml_init(params);
         if (!model.ctx) {
@@ -382,11 +381,11 @@ bool gptj_eval(
         }
     }
 
-    struct ggml_init_params params = {
-        .mem_size   = buf_size,
-        .mem_buffer = buf,
-        .no_alloc   = false,
-    };
+    struct ggml_init_params params;
+    params.mem_size   = buf_size;
+    params.mem_buffer = buf;
+    params.no_alloc   = false;
+    
 
     struct ggml_context * ctx0 = ggml_init(params);
     struct ggml_cgraph gf = { .n_threads = n_threads };
diff --git a/otherarch/neox.cpp b/otherarch/neox.cpp
index 120ccab45..53a9d4512 100644
--- a/otherarch/neox.cpp
+++ b/otherarch/neox.cpp
@@ -13,7 +13,6 @@
 #include <string>
 #include <vector>
 #include <iostream>
-#include <unistd.h>
 
 
 
@@ -135,12 +134,11 @@ ModelLoadResult stablelm_model_load(const std::string & fname, stablelm_model &
 
     // create the ggml context
     {
-        struct ggml_init_params params = {
-            .mem_size   = ctx_size,
-            .mem_buffer = NULL,
-            .no_alloc   = false,
-        };
-
+        struct ggml_init_params params;
+        params.mem_size   = ctx_size;
+        params.mem_buffer = NULL;
+        params.no_alloc   = false;
+        
         model.ctx = ggml_init(params);
         if (!model.ctx) {
             fprintf(stderr, "%s: ggml_init() failed\n", __func__);
@@ -377,11 +375,11 @@ bool stablelm_eval(
         }
     }
 
-    struct ggml_init_params params = {
-        .mem_size   = buf_size,
-        .mem_buffer = buf,
-        .no_alloc   = false,
-    };
+    struct ggml_init_params params;
+    params.mem_size   = buf_size;
+    params.mem_buffer = buf;
+    params.no_alloc   = false;
+    
 
     struct ggml_context * ctx0 = ggml_init(params);
     struct ggml_cgraph gf = { .n_threads = n_threads };
diff --git a/otherarch/tools/convert_hf_gpt2.py b/otherarch/tools/convert_hf_gpt2.py
index 70df03f3e..b6e20983a 100644
--- a/otherarch/tools/convert_hf_gpt2.py
+++ b/otherarch/tools/convert_hf_gpt2.py
@@ -10,7 +10,7 @@ import torch
 import numpy as np
 import re
 
-from transformers import GPTJForCausalLM, AutoModelForCausalLM
+from transformers import AutoModelForCausalLM
 
 # ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py
 def bytes_to_unicode():
diff --git a/otherarch/tools/convert_hf_neox.py b/otherarch/tools/convert_hf_neox.py
index 4e1f8f01b..fc327b149 100644
--- a/otherarch/tools/convert_hf_neox.py
+++ b/otherarch/tools/convert_hf_neox.py
@@ -1,7 +1,6 @@
 import sys
 import struct
 import json
-import torch
 import numpy as np
 
 from transformers import AutoModelForCausalLM, AutoTokenizer
@@ -59,6 +58,7 @@ fout.write(struct.pack("i", hparams["hidden_size"]))
 fout.write(struct.pack("i", hparams["num_attention_heads"]))
 fout.write(struct.pack("i", hparams["num_hidden_layers"]))
 fout.write(struct.pack("i", int(hparams["rotary_pct"]*(hparams["hidden_size"]//hparams["num_attention_heads"]))))
+fout.write(struct.pack("i", hparams["use_parallel_residual"]))
 fout.write(struct.pack("i", ftype))
 
 # TODO: temporary hack to not deal with implementing the tokenizer