Allocate a small amount of extra context for GGUF to deal with KV fragmentation causing issues in some scenarios.
This commit is contained in:
parent
d2ef458b02
commit
ba5c33319b
2 changed files with 5 additions and 4 deletions
|
@ -883,7 +883,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
{
|
{
|
||||||
llama_model_params model_params = llama_model_default_params();
|
llama_model_params model_params = llama_model_default_params();
|
||||||
llama_context_params llama_ctx_params = llama_context_default_params();
|
llama_context_params llama_ctx_params = llama_context_default_params();
|
||||||
llama_ctx_params.n_ctx = clamped_max_context_length;
|
llama_ctx_params.n_ctx = clamped_max_context_length + 64; //add some extra context to deal with KV fragmentation
|
||||||
//llama_ctx_paran_parts = -1;
|
//llama_ctx_paran_parts = -1;
|
||||||
llama_ctx_params.seed = -1;
|
llama_ctx_params.seed = -1;
|
||||||
llama_ctx_params.f16_kv = inputs.f16_kv;
|
llama_ctx_params.f16_kv = inputs.f16_kv;
|
||||||
|
@ -1808,7 +1808,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
||||||
|
|
||||||
if (!evalres)
|
if (!evalres)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "\nFailed to predict! Check your context buffer sizes!\n");
|
fprintf(stderr, "\nFailed to predict at %d! Check your context buffer sizes!\n",n_past);
|
||||||
snprintf(output.text, sizeof(output.text), "%s", "");
|
snprintf(output.text, sizeof(output.text), "%s", "");
|
||||||
output.status = 0;
|
output.status = 0;
|
||||||
generation_finished = true;
|
generation_finished = true;
|
||||||
|
|
|
@ -2164,13 +2164,14 @@ def main(launch_args,start_server=True):
|
||||||
|
|
||||||
if args.port_param!=defaultport:
|
if args.port_param!=defaultport:
|
||||||
args.port = args.port_param
|
args.port = args.port_param
|
||||||
print(f"Starting Kobold HTTP Server on port {args.port} at http://localhost:{args.port}/api")
|
|
||||||
print(f"Starting OpenAI Compatible Endpoint on port {args.port} at http://localhost:{args.port}/v1")
|
|
||||||
epurl = ""
|
epurl = ""
|
||||||
if args.host=="":
|
if args.host=="":
|
||||||
epurl = f"http://localhost:{args.port}"
|
epurl = f"http://localhost:{args.port}"
|
||||||
else:
|
else:
|
||||||
epurl = f"http://{args.host}:{args.port}"
|
epurl = f"http://{args.host}:{args.port}"
|
||||||
|
print(f"Starting Kobold HTTP Server on port {args.port} at {epurl}/api/")
|
||||||
|
print(f"Starting OpenAI Compatible Endpoint on port {args.port} at {epurl}/v1/")
|
||||||
|
|
||||||
if args.launch:
|
if args.launch:
|
||||||
try:
|
try:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue