diff --git a/koboldcpp.py b/koboldcpp.py
index 2d0e094d5..52f5a6151 100755
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -969,7 +969,7 @@ def show_new_gui():
             basefile = os.path.basename(model_var.get())
             horde_name_var.set(os.path.splitext(basefile)[0])
 
-    usehorde_box = makecheckbox(network_tab, "Configure for Horde", usehorde_var, 4, command=togglehorde)
+    makecheckbox(network_tab, "Configure for Horde", usehorde_var, 4, command=togglehorde)
     togglehorde(1,1,1)
 
     # launch
@@ -1130,6 +1130,7 @@ def show_new_gui():
             if len(dict["hordeconfig"]) > 4:
                 horde_apikey_var.set(dict["hordeconfig"][3])
                 horde_workername_var.set(dict["hordeconfig"][4])
+                usehorde_var.set("1")
 
     def save_config():
         file_type = [("KoboldCpp Settings", "*.kcpps")]
@@ -1343,7 +1344,7 @@ def show_old_gui():
 #A very simple and stripped down embedded horde worker with no dependencies
 def run_horde_worker(args, api_key, worker_name):
     import urllib.request
-    global friendlymodelname, maxhordectx, maxhordelen, exitcounter
+    global friendlymodelname, maxhordectx, maxhordelen, exitcounter, modelbusy
 
     def make_url_request(url, data, method='POST'):
         try:
@@ -1384,6 +1385,12 @@ def run_horde_worker(args, api_key, worker_name):
             break
 
     while exitcounter < 10:
+
+        #first, make sure we are not generating
+        if modelbusy.locked():
+            time.sleep(0.5)
+            continue
+
         #pop new request
         gen_dict = {
             "name": worker_name,
@@ -1406,13 +1413,14 @@ def run_horde_worker(args, api_key, worker_name):
             continue
         current_id = pop['id']
         current_payload = pop['payload']
-        print(f"Job received from {cluster} for {current_payload.get('max_length',80)} tokens and {current_payload.get('max_context_length',1024)} max context. Starting generation...")
+        print(f"\nJob received from {cluster} for {current_payload.get('max_length',80)} tokens and {current_payload.get('max_context_length',1024)} max context. Starting generation...")
 
         #do gen
         while exitcounter < 10:
-            current_generation = make_url_request(f'http://localhost:{args.port}/api/v1/generate', current_payload)
-            if current_generation:
-                break
+            if not modelbusy.locked():
+                current_generation = make_url_request(f'http://localhost:{args.port}/api/v1/generate', current_payload)
+                if current_generation:
+                    break
             print("Server Busy - Not ready to generate...")
             time.sleep(5)
 
diff --git a/otherarch/gptj_v3.cpp b/otherarch/gptj_v3.cpp
index 21cf40500..91eb355bb 100644
--- a/otherarch/gptj_v3.cpp
+++ b/otherarch/gptj_v3.cpp
@@ -478,8 +478,8 @@ bool gptj_eval(
 
         // self-attention
         {
-            struct ggml_tensor * Qcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].c_attn_q_proj_w, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0, freq_base, freq_scale, n_ctx);
-            struct ggml_tensor * Kcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].c_attn_k_proj_w, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0, freq_base, freq_scale, n_ctx);
+            struct ggml_tensor * Qcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].c_attn_q_proj_w, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0, n_ctx, freq_base, freq_scale);
+            struct ggml_tensor * Kcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].c_attn_k_proj_w, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0, n_ctx, freq_base, freq_scale);
 
             // store key and value to memory
             {
diff --git a/otherarch/neox_v3.cpp b/otherarch/neox_v3.cpp
index d9d7225cb..fdcaed9bb 100644
--- a/otherarch/neox_v3.cpp
+++ b/otherarch/neox_v3.cpp
@@ -506,8 +506,8 @@ bool gpt_neox_eval(
             struct ggml_tensor * Vcur = ggml_cont(ctx0, ggml_view_3d(ctx0, cur, n_embd/n_head, n_head, N, cur->nb[1]/n_head, cur->nb[1], 2*sizeof(float)*n_embd/n_head));
 
             // using mode = 2 for GPT-NeoX mode
-            Qcur = ggml_rope_custom_inplace(ctx0, Qcur, n_past, n_rot, 2, freq_base, freq_scale, n_ctx);
-            Kcur = ggml_rope_custom_inplace(ctx0, Kcur, n_past, n_rot, 2, freq_base, freq_scale, n_ctx);
+            Qcur = ggml_rope_custom_inplace(ctx0, Qcur, n_past, n_rot, 2, n_ctx, freq_base, freq_scale);
+            Kcur = ggml_rope_custom_inplace(ctx0, Kcur, n_past, n_rot, 2, n_ctx, freq_base, freq_scale);
 
             // store key and value to memory
             {