do not show tokenizer warning

2023-05-13 15:48:17 +08:00 · 2023-05-13 15:48:17 +08:00 · b6594ab91e
commit b6594ab91e
parent cee8042793
3 changed files with 8 additions and 5 deletions
--- a/README.md
+++ b/README.md
@ -46,6 +46,7 @@ For more information, be sure to run the program with the --help flag.
  - For Arch Linux: Install `cblas` `openblas` and `clblast`. 
  - For Debian: Install `libclblast-dev` and `libopenblas-dev`.
 - After all binaries are built, you can run the python script with the command `koboldcpp.py [ggml_model.bin] [port]`
 - Note: Many OSX users have found that the using Accelerate is actually faster than OpenBLAS. To try, you may wish to run with `--noblas` and compare speeds.
 ## Considerations
 - ZERO or MINIMAL changes as possible to parent repo files - do not move their function declarations elsewhere! We want to be able to update the repo and pull any changes automatically.
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -106,6 +106,8 @@ def init_library():
        else:
            use_blas = True
            print("Attempting to use OpenBLAS library for faster prompt ingestion. A compatible libopenblas will be required.")
            if sys.platform=="darwin":
                print("Mac OSX note: Some people have found Accelerate actually faster than OpenBLAS. To compare, run Koboldcpp with --noblas instead.")
    if use_noavx2:
        if use_blas:
@ -196,7 +198,7 @@ maxctx = 2048
 maxlen = 128
 modelbusy = False
 defaultport = 5001
-KcppVersion = "1.21"
+KcppVersion = "1.21.1"
 class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
    sys_version = ""
--- a/llama.cpp
+++ b/llama.cpp
@ -1075,11 +1075,11 @@ static bool llama_eval_internal(
            const int   n_past,
            const int   n_threads) {
-    // enforce that the first token is BOS
+    // enforce that the first token is BOS (not needed, messes with my context manip code)
-    if (n_past == 0 && tokens[0] != llama_token_bos()) {
+    //if (n_past == 0 && tokens[0] != llama_token_bos()) {
-        fprintf(stderr, "%s: first token must be BOS\n", __func__);
+        //fprintf(stderr, "%s: first token must be BOS\n", __func__);
        // return false; //never fail. Not even in the face of Armageddon.
-    }
+    //}
    const int64_t t_start_us = ggml_time_us();