do not show tokenizer warning

This commit is contained in:
Concedo 2023-05-13 15:48:17 +08:00
parent cee8042793
commit b6594ab91e
3 changed files with 8 additions and 5 deletions

View file

@ -46,6 +46,7 @@ For more information, be sure to run the program with the --help flag.
- For Arch Linux: Install `cblas` `openblas` and `clblast`.
- For Debian: Install `libclblast-dev` and `libopenblas-dev`.
- After all binaries are built, you can run the python script with the command `koboldcpp.py [ggml_model.bin] [port]`
- Note: Many OSX users have found that the using Accelerate is actually faster than OpenBLAS. To try, you may wish to run with `--noblas` and compare speeds.
## Considerations
- ZERO or MINIMAL changes as possible to parent repo files - do not move their function declarations elsewhere! We want to be able to update the repo and pull any changes automatically.

View file

@ -106,6 +106,8 @@ def init_library():
else:
use_blas = True
print("Attempting to use OpenBLAS library for faster prompt ingestion. A compatible libopenblas will be required.")
if sys.platform=="darwin":
print("Mac OSX note: Some people have found Accelerate actually faster than OpenBLAS. To compare, run Koboldcpp with --noblas instead.")
if use_noavx2:
if use_blas:
@ -196,7 +198,7 @@ maxctx = 2048
maxlen = 128
modelbusy = False
defaultport = 5001
KcppVersion = "1.21"
KcppVersion = "1.21.1"
class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
sys_version = ""

View file

@ -1075,11 +1075,11 @@ static bool llama_eval_internal(
const int n_past,
const int n_threads) {
// enforce that the first token is BOS
if (n_past == 0 && tokens[0] != llama_token_bos()) {
fprintf(stderr, "%s: first token must be BOS\n", __func__);
// enforce that the first token is BOS (not needed, messes with my context manip code)
//if (n_past == 0 && tokens[0] != llama_token_bos()) {
//fprintf(stderr, "%s: first token must be BOS\n", __func__);
// return false; //never fail. Not even in the face of Armageddon.
}
//}
const int64_t t_start_us = ggml_time_us();