From b91abc3316e76a1a4f86a568dae0eb573f717660 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sun, 9 Apr 2023 15:27:43 +0800 Subject: [PATCH] increase default blas batch size --- gpttype_adapter.cpp | 2 +- koboldcpp.py | 1 + llama_adapter.cpp | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 5be8145c1..40a799f66 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -209,7 +209,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o int original_threads = params.n_threads; if (blasmode) { - params.n_batch = 512; + params.n_batch = 1024; params.n_threads = 1; } diff --git a/koboldcpp.py b/koboldcpp.py index bcc13a957..82f39e10c 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -377,6 +377,7 @@ if __name__ == '__main__': portgroup.add_argument("l_port", help="Port to listen on (deprecated)", default=defaultport, nargs="?", type=int, action='store') parser.add_argument("--host", help="Host IP to listen on. If empty, all routable interfaces are accepted.", default="") + #os.environ["OMP_NUM_THREADS"] = '12' # psutil.cpu_count(logical=False) physical_core_limit = 1 if os.cpu_count()!=None and os.cpu_count()>1: diff --git a/llama_adapter.cpp b/llama_adapter.cpp index 8e3d77379..995294a02 100644 --- a/llama_adapter.cpp +++ b/llama_adapter.cpp @@ -158,7 +158,7 @@ generation_outputs llama_generate(const generation_inputs inputs, generation_out int original_threads = params.n_threads; if (blasmode) { - params.n_batch = 512; + params.n_batch = 1024; params.n_threads = 1; }