do not show tokenizer warning
This commit is contained in:
parent
cee8042793
commit
b6594ab91e
3 changed files with 8 additions and 5 deletions
|
@ -46,6 +46,7 @@ For more information, be sure to run the program with the --help flag.
|
||||||
- For Arch Linux: Install `cblas` `openblas` and `clblast`.
|
- For Arch Linux: Install `cblas` `openblas` and `clblast`.
|
||||||
- For Debian: Install `libclblast-dev` and `libopenblas-dev`.
|
- For Debian: Install `libclblast-dev` and `libopenblas-dev`.
|
||||||
- After all binaries are built, you can run the python script with the command `koboldcpp.py [ggml_model.bin] [port]`
|
- After all binaries are built, you can run the python script with the command `koboldcpp.py [ggml_model.bin] [port]`
|
||||||
|
- Note: Many OSX users have found that the using Accelerate is actually faster than OpenBLAS. To try, you may wish to run with `--noblas` and compare speeds.
|
||||||
|
|
||||||
## Considerations
|
## Considerations
|
||||||
- ZERO or MINIMAL changes as possible to parent repo files - do not move their function declarations elsewhere! We want to be able to update the repo and pull any changes automatically.
|
- ZERO or MINIMAL changes as possible to parent repo files - do not move their function declarations elsewhere! We want to be able to update the repo and pull any changes automatically.
|
||||||
|
|
|
@ -106,6 +106,8 @@ def init_library():
|
||||||
else:
|
else:
|
||||||
use_blas = True
|
use_blas = True
|
||||||
print("Attempting to use OpenBLAS library for faster prompt ingestion. A compatible libopenblas will be required.")
|
print("Attempting to use OpenBLAS library for faster prompt ingestion. A compatible libopenblas will be required.")
|
||||||
|
if sys.platform=="darwin":
|
||||||
|
print("Mac OSX note: Some people have found Accelerate actually faster than OpenBLAS. To compare, run Koboldcpp with --noblas instead.")
|
||||||
|
|
||||||
if use_noavx2:
|
if use_noavx2:
|
||||||
if use_blas:
|
if use_blas:
|
||||||
|
@ -196,7 +198,7 @@ maxctx = 2048
|
||||||
maxlen = 128
|
maxlen = 128
|
||||||
modelbusy = False
|
modelbusy = False
|
||||||
defaultport = 5001
|
defaultport = 5001
|
||||||
KcppVersion = "1.21"
|
KcppVersion = "1.21.1"
|
||||||
|
|
||||||
class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
sys_version = ""
|
sys_version = ""
|
||||||
|
|
|
@ -1075,11 +1075,11 @@ static bool llama_eval_internal(
|
||||||
const int n_past,
|
const int n_past,
|
||||||
const int n_threads) {
|
const int n_threads) {
|
||||||
|
|
||||||
// enforce that the first token is BOS
|
// enforce that the first token is BOS (not needed, messes with my context manip code)
|
||||||
if (n_past == 0 && tokens[0] != llama_token_bos()) {
|
//if (n_past == 0 && tokens[0] != llama_token_bos()) {
|
||||||
fprintf(stderr, "%s: first token must be BOS\n", __func__);
|
//fprintf(stderr, "%s: first token must be BOS\n", __func__);
|
||||||
// return false; //never fail. Not even in the face of Armageddon.
|
// return false; //never fail. Not even in the face of Armageddon.
|
||||||
}
|
//}
|
||||||
|
|
||||||
const int64_t t_start_us = ggml_time_us();
|
const int64_t t_start_us = ggml_time_us();
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue