From 0e5f16de53235317a7f0d6d0fd5a91f0b48f2eba Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Mon, 27 Nov 2023 19:08:54 +0800
Subject: [PATCH] reduce max ctx to fit instead of crashing

---
 koboldcpp.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/koboldcpp.py b/koboldcpp.py
index 60000e165..732d65ab7 100755
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -310,9 +310,11 @@ def generate(prompt, memory="", max_length=32, max_context_length=512, temperatu
         max_length = max_context_length-1
     inputs.max_context_length = max_context_length   # this will resize the context buffer if changed
     global showmaxctxwarning
-    if showmaxctxwarning and max_context_length > maxctx:
-        print(f"\n(Warning! Request max_context_length={max_context_length} exceeds allocated context size of {maxctx}. Consider launching with increased --contextsize to avoid errors. This message will only show once per session.)")
-        showmaxctxwarning = False
+    if max_context_length > maxctx:
+        if showmaxctxwarning:
+            print(f"\n(Warning! Request max_context_length={max_context_length} exceeds allocated context size of {maxctx}. It will be reduced to fit. Consider launching with increased --contextsize to avoid errors. This message will only show once per session.)")
+            showmaxctxwarning = False
+        max_context_length = maxctx
     inputs.max_length = max_length
     inputs.temperature = temperature
     inputs.top_k = top_k