Optimizations

2023-03-24 21:33:53 +08:00 · 2023-03-24 21:33:53 +08:00 · c6c60332a4
commit c6c60332a4
parent 3879d84400
3 changed files with 10 additions and 7 deletions
--- a/expose.cpp
+++ b/expose.cpp
@ -161,25 +161,28 @@ extern "C" {
        std::fill(last_n_tokens.begin(), last_n_tokens.end(), 0);
        n_past = 0;

-        //fast forward the past based on identical tokens, stop once a divergence is noted        
+        //fast forward the past based on identical tokens, stop once a divergence is noted
+        int embd_inp_len = embd_inp.size(); 
        for(int i=0;i<current_context_tokens.size();++i)
        {
-            if(current_context_tokens[i]==embd_inp[0])
+            if(current_context_tokens[i]==embd_inp[i])
            {
-                n_past += 1;
-                embd_inp.erase(embd_inp.begin());
-                last_n_tokens.erase(last_n_tokens.begin());
+                n_past += 1;                
                last_n_tokens.push_back(current_context_tokens[i]);
            }
            else
            {
                break;
            }
-            if(embd_inp.size()<=1)
+            if((i+2)>=embd_inp_len)
            {
                break;
            }
        }
+       
+        last_n_tokens.erase(last_n_tokens.begin(),last_n_tokens.begin()+n_past);
+        embd_inp.erase(embd_inp.begin(),embd_inp.begin()+n_past);
+        
        current_context_tokens.resize(n_past);
 		
 		int remaining_tokens = params.n_predict;
--- a/llama_for_kobold.py
+++ b/llama_for_kobold.py
@ -289,7 +289,7 @@ if __name__ == '__main__':
            mdl_nparts += 1
    modelname = os.path.abspath(sys.argv[1])
    print("Loading model: " + modelname)
-    loadok = load_model(modelname,16,maxctx,mdl_nparts)
+    loadok = load_model(modelname,8,maxctx,mdl_nparts)
    print("Load Model OK: " + str(loadok))

    #friendlymodelname = Path(modelname).stem   ### this wont work on local kobold api, so we must hardcode a known HF model name
--- a/llamacpp.dll
+++ b/llamacpp.dll