improved console info, fixed utf encoding bugs

2023-03-31 15:38:38 +08:00 · 2023-03-31 15:38:38 +08:00 · 79f9743347
commit 79f9743347
parent 354d4f232f
5 changed files with 20 additions and 8 deletions
--- a/README.md
+++ b/README.md
@ -25,7 +25,7 @@ What does it mean? You get llama.cpp with a fancy UI, persistent stories, editin
 - Leave main.cpp UNTOUCHED, We want to be able to update the repo and pull any changes automatically.
 - No dynamic memory allocation! Setup structs with FIXED (known) shapes and sizes for ALL output fields. Python will ALWAYS provide the memory, we just write to it.
 - No external libraries or dependencies. That means no Flask, Pybind and whatever. All You Need Is Python.
- Since v1.0.6, requires libopenblas, the prebuilt windows binaries are included in this repo. If not found, it will fall back to a mode without BLAS.
+- Since v1.0.6, requires libopenblas, the prebuilt windows binaries are included in this repo. If not found, it will fall back to a mode without BLAS. If you want you can also link your own install of OpenBLAS manually with `LLAMA_OPENBLAS=1`

 ## License
 - The original GGML library and llama.cpp by ggerganov are licensed under the MIT License
@ -34,3 +34,4 @@ What does it mean? You get llama.cpp with a fancy UI, persistent stories, editin

 ## Notes
 - There is a fundamental flaw with llama.cpp, which causes generation delay to scale linearly with original prompt length. If you care, **please contribute to [this discussion](https://github.com/ggerganov/llama.cpp/discussions/229)** which, if resolved, will actually make this viable.
+- I have heard of someone claiming a false AV positive report. The exe is a simple pyinstaller bundle that includes the necessary python scripts and dlls to run. If this still concerns you, you might wish to rebuild everything from source code using the makefile, and you can rebuild the exe yourself with pyinstaller by using `make_pyinstaller.bat`
--- a/expose.cpp
+++ b/expose.cpp
@ -218,19 +218,30 @@ extern "C" {
 		std::string concat_output = "";  
    	
 		bool startedsampling = false;
-        printf("\nProcessing Prompt (%d tokens%s): ",embd_inp.size(),(blasmode?", BLAS":""));
+        
        timer_start();
        double time1=0,time2=0;
+        unsigned int embd_inp_size = embd_inp.size();
+        printf("\n");

 		while (remaining_tokens > 0) 
 		{
 			llama_token id = 0;
 	        // predict
-	        if (embd.size() > 0) 
+            unsigned int embdsize = embd.size();
+	        if (embdsize > 0) 
 			{
-				printf("|");                
+                //print progress                
+                if(!startedsampling)
+                {
+                    printf("\rProcessing Prompt%s (%d / %d tokens)",(blasmode?" [BLAS]":""), input_consumed,embd_inp_size);                   
+                }
+                else
+                {
+                    printf("\rGenerating (%d / %d tokens)",(1+params.n_predict-remaining_tokens),params.n_predict);				    
+                }
                //printf("\nnp:%d embd:%d txt:%s",n_past,embd.size(),llama_token_to_str(ctx, embd[0]));
-	            if (llama_eval(ctx, embd.data(), embd.size(), n_past, params.n_threads)) 
+	            if (llama_eval(ctx, embd.data(), embdsize, n_past, params.n_threads)) 
 				{
 	                fprintf(stderr, "Failed to predict\n");
                    snprintf(output.text, sizeof(output.text), "%s", "");
@ -256,7 +267,7 @@ extern "C" {
                    params.n_threads = original_threads;
                    time1 = timer_check();
                    timer_start();
-                    printf("\nGenerating (%d tokens): ",params.n_predict);
+                    printf("\n");
                }

 	            {
@ -281,7 +292,7 @@ extern "C" {
 	            --remaining_tokens;
                //printf("\nid:%d word:%s\n",id,llama_token_to_str(ctx, id));
 				concat_output += llama_token_to_str(ctx, id);
-        	} 
+        	}
 			else 
 			{
 	            // some user input remains from prompt or interaction, forward it to processing
--- a/llamacpp.dll
+++ b/llamacpp.dll
--- a/llamacpp_blas.dll
+++ b/llamacpp_blas.dll
--- a/llamacpp_for_kobold.py
+++ b/llamacpp_for_kobold.py
@ -73,7 +73,7 @@ def generate(prompt,max_length=20, max_context_length=512,temperature=0.8,top_k=
    inputs.seed = seed
    ret = handle.generate(inputs,outputs)
    if(ret.status==1):
-        return ret.text.decode("UTF-8")
+        return ret.text.decode("UTF-8","ignore")
    return ""

 #################################################################