diff --git a/expose.cpp b/expose.cpp index 0c0c33616..cd7f46a67 100644 --- a/expose.cpp +++ b/expose.cpp @@ -161,25 +161,28 @@ extern "C" { std::fill(last_n_tokens.begin(), last_n_tokens.end(), 0); n_past = 0; - //fast forward the past based on identical tokens, stop once a divergence is noted + //fast forward the past based on identical tokens, stop once a divergence is noted + int embd_inp_len = embd_inp.size(); for(int i=0;i=embd_inp_len) { break; } } + + last_n_tokens.erase(last_n_tokens.begin(),last_n_tokens.begin()+n_past); + embd_inp.erase(embd_inp.begin(),embd_inp.begin()+n_past); + current_context_tokens.resize(n_past); int remaining_tokens = params.n_predict; diff --git a/llama_for_kobold.py b/llama_for_kobold.py index 5b4906f6b..fc57f2dcb 100644 --- a/llama_for_kobold.py +++ b/llama_for_kobold.py @@ -289,7 +289,7 @@ if __name__ == '__main__': mdl_nparts += 1 modelname = os.path.abspath(sys.argv[1]) print("Loading model: " + modelname) - loadok = load_model(modelname,16,maxctx,mdl_nparts) + loadok = load_model(modelname,8,maxctx,mdl_nparts) print("Load Model OK: " + str(loadok)) #friendlymodelname = Path(modelname).stem ### this wont work on local kobold api, so we must hardcode a known HF model name diff --git a/llamacpp.dll b/llamacpp.dll index 422696934..91e7b8d61 100644 Binary files a/llamacpp.dll and b/llamacpp.dll differ