diff --git a/README.md b/README.md index c4837f2ad..81f29eeb4 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ What does it mean? You get llama.cpp with a fancy UI, persistent stories, editin - Leave main.cpp UNTOUCHED, We want to be able to update the repo and pull any changes automatically. - No dynamic memory allocation! Setup structs with FIXED (known) shapes and sizes for ALL output fields. Python will ALWAYS provide the memory, we just write to it. - No external libraries or dependencies. That means no Flask, Pybind and whatever. All You Need Is Python. -- Since v1.0.6, requires libopenblas, the prebuilt windows binaries are included in this repo. If not found, it will fall back to a mode without BLAS. +- Since v1.0.6, requires libopenblas, the prebuilt windows binaries are included in this repo. If not found, it will fall back to a mode without BLAS. If you want you can also link your own install of OpenBLAS manually with `LLAMA_OPENBLAS=1` ## License - The original GGML library and llama.cpp by ggerganov are licensed under the MIT License @@ -34,3 +34,4 @@ What does it mean? You get llama.cpp with a fancy UI, persistent stories, editin ## Notes - There is a fundamental flaw with llama.cpp, which causes generation delay to scale linearly with original prompt length. If you care, **please contribute to [this discussion](https://github.com/ggerganov/llama.cpp/discussions/229)** which, if resolved, will actually make this viable. +- I have heard of someone claiming a false AV positive report. The exe is a simple pyinstaller bundle that includes the necessary python scripts and dlls to run. If this still concerns you, you might wish to rebuild everything from source code using the makefile, and you can rebuild the exe yourself with pyinstaller by using `make_pyinstaller.bat` \ No newline at end of file diff --git a/expose.cpp b/expose.cpp index 773e53a47..9fd644a53 100644 --- a/expose.cpp +++ b/expose.cpp @@ -218,19 +218,30 @@ extern "C" { std::string concat_output = ""; bool startedsampling = false; - printf("\nProcessing Prompt (%d tokens%s): ",embd_inp.size(),(blasmode?", BLAS":"")); + timer_start(); double time1=0,time2=0; + unsigned int embd_inp_size = embd_inp.size(); + printf("\n"); while (remaining_tokens > 0) { llama_token id = 0; // predict - if (embd.size() > 0) + unsigned int embdsize = embd.size(); + if (embdsize > 0) { - printf("|"); + //print progress + if(!startedsampling) + { + printf("\rProcessing Prompt%s (%d / %d tokens)",(blasmode?" [BLAS]":""), input_consumed,embd_inp_size); + } + else + { + printf("\rGenerating (%d / %d tokens)",(1+params.n_predict-remaining_tokens),params.n_predict); + } //printf("\nnp:%d embd:%d txt:%s",n_past,embd.size(),llama_token_to_str(ctx, embd[0])); - if (llama_eval(ctx, embd.data(), embd.size(), n_past, params.n_threads)) + if (llama_eval(ctx, embd.data(), embdsize, n_past, params.n_threads)) { fprintf(stderr, "Failed to predict\n"); snprintf(output.text, sizeof(output.text), "%s", ""); @@ -256,7 +267,7 @@ extern "C" { params.n_threads = original_threads; time1 = timer_check(); timer_start(); - printf("\nGenerating (%d tokens): ",params.n_predict); + printf("\n"); } { @@ -281,7 +292,7 @@ extern "C" { --remaining_tokens; //printf("\nid:%d word:%s\n",id,llama_token_to_str(ctx, id)); concat_output += llama_token_to_str(ctx, id); - } + } else { // some user input remains from prompt or interaction, forward it to processing diff --git a/llamacpp.dll b/llamacpp.dll index 033c05c5b..bf3946c74 100644 Binary files a/llamacpp.dll and b/llamacpp.dll differ diff --git a/llamacpp_blas.dll b/llamacpp_blas.dll index ced649012..f197df7f7 100644 Binary files a/llamacpp_blas.dll and b/llamacpp_blas.dll differ diff --git a/llamacpp_for_kobold.py b/llamacpp_for_kobold.py index 564dddea1..c60e9841d 100644 --- a/llamacpp_for_kobold.py +++ b/llamacpp_for_kobold.py @@ -73,7 +73,7 @@ def generate(prompt,max_length=20, max_context_length=512,temperature=0.8,top_k= inputs.seed = seed ret = handle.generate(inputs,outputs) if(ret.status==1): - return ret.text.decode("UTF-8") + return ret.text.decode("UTF-8","ignore") return "" #################################################################