reduce max ctx to fit instead of crashing
This commit is contained in:
parent
8acd7be734
commit
0e5f16de53
1 changed files with 5 additions and 3 deletions
|
@ -310,9 +310,11 @@ def generate(prompt, memory="", max_length=32, max_context_length=512, temperatu
|
|||
max_length = max_context_length-1
|
||||
inputs.max_context_length = max_context_length # this will resize the context buffer if changed
|
||||
global showmaxctxwarning
|
||||
if showmaxctxwarning and max_context_length > maxctx:
|
||||
print(f"\n(Warning! Request max_context_length={max_context_length} exceeds allocated context size of {maxctx}. Consider launching with increased --contextsize to avoid errors. This message will only show once per session.)")
|
||||
showmaxctxwarning = False
|
||||
if max_context_length > maxctx:
|
||||
if showmaxctxwarning:
|
||||
print(f"\n(Warning! Request max_context_length={max_context_length} exceeds allocated context size of {maxctx}. It will be reduced to fit. Consider launching with increased --contextsize to avoid errors. This message will only show once per session.)")
|
||||
showmaxctxwarning = False
|
||||
max_context_length = maxctx
|
||||
inputs.max_length = max_length
|
||||
inputs.temperature = temperature
|
||||
inputs.top_k = top_k
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue