diff --git a/ggml-opencl.cpp b/ggml-opencl.cpp
index d08b39a2c..fc7bf520d 100644
--- a/ggml-opencl.cpp
+++ b/ggml-opencl.cpp
@@ -14,7 +14,7 @@
 
 #include "ggml.h"
 
-#define CL_DMMV_BLOCK_SIZE 32;
+#define CL_DMMV_BLOCK_SIZE 64;
 
 #define MULTILINE_QUOTE(...) #__VA_ARGS__
 static std::string program_source = MULTILINE_QUOTE(
diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
index 3449be635..55a4668e4 100644
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@@ -857,18 +857,15 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
         gpt_vocab::id id = 0;
         // predict
         unsigned int embdsize = embd.size();
+        //print progress
+        if (!startedsampling)
+        {
+            printf("\rProcessing Prompt%s (%d / %d tokens)", (blasmode ? " [BLAS]" : ""), input_consumed, embd_inp.size());
+        }        
+        fflush(stdout);
+
         if (embdsize > 0)
         {
-            //print progress
-            if (!startedsampling)
-            {
-                printf("\rProcessing Prompt%s (%d / %d tokens)", (blasmode ? " [BLAS]" : ""), input_consumed, embd_inp.size());
-            }
-            else
-            {                
-                printf("\rGenerating (%d / %d tokens)", (1 + params.n_predict - remaining_tokens), params.n_predict);
-            }
-            fflush(stdout);
 
             bool evalres = false;
 
@@ -954,40 +951,35 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
                 printf("\n");
             }
 
+            unsigned int eosID = 0;
+            float * logitsPtr;
             if(file_format == FileFormat::GGML || file_format == FileFormat::GGHF || file_format == FileFormat::GGJT || file_format == FileFormat::GGJT_2 || file_format == FileFormat::GGJT_3)
             {
-                float * logits;
                 if(file_format == FileFormat::GGJT_3)
                 {
-                    logits = llama_get_logits(llama_ctx_v3);
+                    logitsPtr = llama_get_logits(llama_ctx_v3);
                 }
                 else
                 {
-                    logits = llama_v2_get_logits(llama_ctx_v2);
+                    logitsPtr = llama_v2_get_logits(llama_ctx_v2);
                 }
 
+                eosID = llama_token_eos();
+               
                 if (!unbanTokens)
                 {
                     // set the logit of the eos token (2) to zero to avoid sampling it
-                    logits[llama_token_eos()] = 0;
-                    //set logits of opening square bracket to zero. (disabled as obsolete)
-                    // logits[518] = 0;
-                    // logits[29961] = 0;
+                    logitsPtr[eosID] = 0;
                 }
-
-
-                id = SampleLogits(logits, nctx, n_vocab, last_n_size, repeat_penalty, 
-                top_k, top_p, typical_p, tfs_z, temp, rng,
-                params.mirostat,params.mirostat_tau,params.mirostat_eta);
-
             }
             else
             {
+                logitsPtr = logits.data();
                 if (!unbanTokens)
                 {
                     //gpt2 uses negative logits, so we cant zero it
                     // set the logit of the eos token to minimum to avoid sampling it
-                    if ((file_format == FileFormat::GPT2_1 ||
+                    if (file_format == FileFormat::GPT2_1 ||
                          file_format == FileFormat::GPT2_2 ||
                          file_format == FileFormat::GPT2_3 ||
                          file_format == FileFormat::GPT2_4 ||
@@ -995,11 +987,14 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
                          file_format == FileFormat::GPTJ_2 ||
                          file_format == FileFormat::GPTJ_3 ||
                          file_format == FileFormat::GPTJ_4 ||
-                         file_format == FileFormat::GPTJ_5) &&
-                        logits.size() > 50256)
-                    {                       
-                        int topid = std::min_element(logits.begin(),logits.end())-logits.begin();
-                        logits[50256] = (logits[topid] < 0 ? logits[topid] : 0);
+                         file_format == FileFormat::GPTJ_5)
+                    {
+                        eosID = 50256;
+                        if(logits.size() > eosID)
+                        {
+                            int topid = std::min_element(logits.begin(),logits.end())-logits.begin();
+                            logits[eosID] = (logits[topid] < 0 ? logits[topid] : 0);
+                        }
                     }
                         
                      // set the logit of the eos token (0) to minimum to avoid sampling it
@@ -1011,16 +1006,18 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
                          file_format == FileFormat::NEOX_6 ||
                          file_format == FileFormat::NEOX_7)
                     {
+                        eosID = 0;
                         int topid = std::min_element(logits.begin(),logits.end())-logits.begin();
-                        logits[0] = (logits[topid] < 0 ? logits[topid] : 0);
+                        logits[eosID] = (logits[topid] < 0 ? logits[topid] : 0);
                     }
-                }              
-
-                id = SampleLogits(logits.data(), nctx, n_vocab, last_n_size, repeat_penalty, 
-                top_k, top_p, typical_p, tfs_z, temp, rng,
-                params.mirostat,params.mirostat_tau,params.mirostat_eta);
+                }
+              
             }
-
+         
+            id = SampleLogits(logitsPtr, nctx, n_vocab, last_n_size, repeat_penalty, 
+            top_k, top_p, typical_p, tfs_z, temp, rng,
+            params.mirostat,params.mirostat_tau,params.mirostat_eta);
+            
             last_n_tokens.erase(last_n_tokens.begin());
             last_n_tokens.push_back(id);
             current_context_tokens.push_back(id);
@@ -1031,31 +1028,15 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
             // decrement remaining sampling budget
             --remaining_tokens;
 
-            if (file_format == FileFormat::GGML || file_format == FileFormat::GGHF || file_format == FileFormat::GGJT || file_format == FileFormat::GGJT_2|| file_format == FileFormat::GGJT_3)
+            for (auto id : embd)
             {
-                if(file_format == FileFormat::GGJT_3)
-                {
-                    concat_output += llama_token_to_str(llama_ctx_v3, id);
-                }
-                else
-                {
-                    concat_output += llama_v2_token_to_str(llama_ctx_v2, id);
-                }
-                
-                if(unbanTokens && id==llama_token_eos())
-                {
-                     printf("\n(EOS token triggered!)");
-                     remaining_tokens = 0;
-                }
+                concat_output += FileFormatTokenizeID(id,file_format);
             }
-            else
-            {
-                for (auto id : embd)
-                {
-                    concat_output += vocab.id_to_token[id].c_str();
-                }
-            }
-
+           
+            if (startedsampling)
+            {                
+                printf("\rGenerating (%d / %d tokens)", (params.n_predict - remaining_tokens), params.n_predict);
+            }          
             if(debugmode && top_picks.size()>0)
             {
                 printf(" [");
@@ -1074,6 +1055,12 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
                 printf("]\n");
             }
 
+            if(unbanTokens && id==eosID)
+            {
+                printf("\n(EOS token triggered!)");
+                remaining_tokens = 0;
+            }      
+
             for (const auto &matched : stop_sequence)
             {
                 if (concat_output.find(matched) != std::string::npos)
@@ -1084,6 +1071,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
                     break;
                 }
             }
+            fflush(stdout);
         }
         else
         {
diff --git a/koboldcpp.py b/koboldcpp.py
index 3891ca31a..3fca437fd 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -208,7 +208,7 @@ maxctx = 2048
 maxlen = 128
 modelbusy = False
 defaultport = 5001
-KcppVersion = "1.25.1"
+KcppVersion = "1.26"
 
 class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
     sys_version = ""
diff --git a/otherarch/ggml_v2-opencl.cpp b/otherarch/ggml_v2-opencl.cpp
index 93f038ef5..6193662ba 100644
--- a/otherarch/ggml_v2-opencl.cpp
+++ b/otherarch/ggml_v2-opencl.cpp
@@ -14,7 +14,7 @@
 
 #include "ggml_v2.h"
 
-#define CL_DMMV_BLOCK_SIZE 32;
+#define CL_DMMV_BLOCK_SIZE 64;
 
 #define MULTILINE_QUOTE(...) #__VA_ARGS__
 static std::string program_source = MULTILINE_QUOTE(