do not show tokenizer warning
This commit is contained in:
parent
cee8042793
commit
b6594ab91e
3 changed files with 8 additions and 5 deletions
|
@ -46,6 +46,7 @@ For more information, be sure to run the program with the --help flag.
|
|||
- For Arch Linux: Install `cblas` `openblas` and `clblast`.
|
||||
- For Debian: Install `libclblast-dev` and `libopenblas-dev`.
|
||||
- After all binaries are built, you can run the python script with the command `koboldcpp.py [ggml_model.bin] [port]`
|
||||
- Note: Many OSX users have found that the using Accelerate is actually faster than OpenBLAS. To try, you may wish to run with `--noblas` and compare speeds.
|
||||
|
||||
## Considerations
|
||||
- ZERO or MINIMAL changes as possible to parent repo files - do not move their function declarations elsewhere! We want to be able to update the repo and pull any changes automatically.
|
||||
|
|
|
@ -106,6 +106,8 @@ def init_library():
|
|||
else:
|
||||
use_blas = True
|
||||
print("Attempting to use OpenBLAS library for faster prompt ingestion. A compatible libopenblas will be required.")
|
||||
if sys.platform=="darwin":
|
||||
print("Mac OSX note: Some people have found Accelerate actually faster than OpenBLAS. To compare, run Koboldcpp with --noblas instead.")
|
||||
|
||||
if use_noavx2:
|
||||
if use_blas:
|
||||
|
@ -196,7 +198,7 @@ maxctx = 2048
|
|||
maxlen = 128
|
||||
modelbusy = False
|
||||
defaultport = 5001
|
||||
KcppVersion = "1.21"
|
||||
KcppVersion = "1.21.1"
|
||||
|
||||
class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||
sys_version = ""
|
||||
|
|
|
@ -1075,11 +1075,11 @@ static bool llama_eval_internal(
|
|||
const int n_past,
|
||||
const int n_threads) {
|
||||
|
||||
// enforce that the first token is BOS
|
||||
if (n_past == 0 && tokens[0] != llama_token_bos()) {
|
||||
fprintf(stderr, "%s: first token must be BOS\n", __func__);
|
||||
// enforce that the first token is BOS (not needed, messes with my context manip code)
|
||||
//if (n_past == 0 && tokens[0] != llama_token_bos()) {
|
||||
//fprintf(stderr, "%s: first token must be BOS\n", __func__);
|
||||
// return false; //never fail. Not even in the face of Armageddon.
|
||||
}
|
||||
//}
|
||||
|
||||
const int64_t t_start_us = ggml_time_us();
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue