From 0cca0726fef36732d640a95ce140da67325c7ce1 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sun, 23 Jul 2023 09:59:34 +0800 Subject: [PATCH] reduce number of retries, fixed maxlength > maxctx bug --- koboldcpp.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/koboldcpp.py b/koboldcpp.py index 8143960ff..d704bc653 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -222,6 +222,8 @@ def generate(prompt,max_length=20, max_context_length=512, temperature=0.8, top_ inputs = generation_inputs() outputs = ctypes.create_unicode_buffer(ctypes.sizeof(generation_outputs)) inputs.prompt = prompt.encode("UTF-8") + if max_length >= max_context_length: + max_length = max_context_length-1 inputs.max_context_length = max_context_length # this will resize the context buffer if changed inputs.max_length = max_length inputs.temperature = temperature @@ -1426,7 +1428,7 @@ def run_horde_worker(args, api_key, worker_name): break else: currentjob_attempts += 1 - if currentjob_attempts>10: + if currentjob_attempts>5: break print("Server Busy - Not ready to generate...") time.sleep(5)