actually for this round, do not include deprecated params. i dont want to have to deal with them (+2 squashed commit)
Squashed commit: [df2691c2] show context limit [7c74f52a] prevent old scripts from crashing
This commit is contained in:
parent
a723466d50
commit
d74eab0e63
3 changed files with 8 additions and 2 deletions
|
@ -33,7 +33,7 @@
|
||||||
"!nohup ./cloudflared-linux-amd64 tunnel --url http://localhost:5001 &\r\n",
|
"!nohup ./cloudflared-linux-amd64 tunnel --url http://localhost:5001 &\r\n",
|
||||||
"!sleep 10\r\n",
|
"!sleep 10\r\n",
|
||||||
"!cat nohup.out\r\n",
|
"!cat nohup.out\r\n",
|
||||||
"!python koboldcpp.py model.ggml --usecublas 0 mmq --gpulayers $Layers --hordeconfig concedo\r\n"
|
"!python koboldcpp.py model.ggml --usecublas 0 mmq --gpulayers $Layers\r\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|
|
@ -1768,7 +1768,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
||||||
int realnpredict = params.n_predict-stopper_unused_tokens;
|
int realnpredict = params.n_predict-stopper_unused_tokens;
|
||||||
float pt2 = (time2*1000.0/(realnpredict==0?1:realnpredict));
|
float pt2 = (time2*1000.0/(realnpredict==0?1:realnpredict));
|
||||||
float tokens_per_second = (realnpredict == 0 ? 0 : realnpredict / (time1 + time2));
|
float tokens_per_second = (realnpredict == 0 ? 0 : realnpredict / (time1 + time2));
|
||||||
printf("\nTime Taken - Processing:%.1fs (%.0fms/T), Generation:%.1fs (%.0fms/T), Total:%.1fs (%.1fT/s)", time1, pt1, time2, pt2, (time1 + time2), tokens_per_second);
|
printf("\nContextLimit: %d/%d, Processing:%.1fs (%.0fms/T), Generation:%.1fs (%.0fms/T), Total:%.1fs (%.1fT/s)",current_context_tokens.size(),nctx, time1, pt1, time2, pt2, (time1 + time2), tokens_per_second);
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
output.status = 1;
|
output.status = 1;
|
||||||
generation_finished = true;
|
generation_finished = true;
|
||||||
|
|
|
@ -1902,4 +1902,10 @@ if __name__ == '__main__':
|
||||||
parser.add_argument("--multiuser", help="Runs in multiuser mode, which queues incoming requests instead of blocking them.", action='store_true')
|
parser.add_argument("--multiuser", help="Runs in multiuser mode, which queues incoming requests instead of blocking them.", action='store_true')
|
||||||
parser.add_argument("--foreground", help="Windows only. Sends the terminal to the foreground every time a new prompt is generated. This helps avoid some idle slowdown issues.", action='store_true')
|
parser.add_argument("--foreground", help="Windows only. Sends the terminal to the foreground every time a new prompt is generated. This helps avoid some idle slowdown issues.", action='store_true')
|
||||||
|
|
||||||
|
# #deprecated hidden args. they do nothing. do not use
|
||||||
|
# parser.add_argument("--psutil_set_threads", action='store_true', help=argparse.SUPPRESS)
|
||||||
|
# parser.add_argument("--stream", action='store_true', help=argparse.SUPPRESS)
|
||||||
|
# parser.add_argument("--unbantokens", action='store_true', help=argparse.SUPPRESS)
|
||||||
|
# parser.add_argument("--usemirostat", action='store_true', help=argparse.SUPPRESS)
|
||||||
|
|
||||||
main(parser.parse_args(),start_server=True)
|
main(parser.parse_args(),start_server=True)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue