actually for this round, do not include deprecated params. i dont want to have to deal with them (+2 squashed commit)

Squashed commit:

[df2691c2] show context limit

[7c74f52a] prevent old scripts from crashing
This commit is contained in:
Concedo 2023-10-10 18:51:04 +08:00
parent a723466d50
commit d74eab0e63
3 changed files with 8 additions and 2 deletions

View file

@ -33,7 +33,7 @@
"!nohup ./cloudflared-linux-amd64 tunnel --url http://localhost:5001 &\r\n", "!nohup ./cloudflared-linux-amd64 tunnel --url http://localhost:5001 &\r\n",
"!sleep 10\r\n", "!sleep 10\r\n",
"!cat nohup.out\r\n", "!cat nohup.out\r\n",
"!python koboldcpp.py model.ggml --usecublas 0 mmq --gpulayers $Layers --hordeconfig concedo\r\n" "!python koboldcpp.py model.ggml --usecublas 0 mmq --gpulayers $Layers\r\n"
] ]
} }
], ],

View file

@ -1768,7 +1768,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
int realnpredict = params.n_predict-stopper_unused_tokens; int realnpredict = params.n_predict-stopper_unused_tokens;
float pt2 = (time2*1000.0/(realnpredict==0?1:realnpredict)); float pt2 = (time2*1000.0/(realnpredict==0?1:realnpredict));
float tokens_per_second = (realnpredict == 0 ? 0 : realnpredict / (time1 + time2)); float tokens_per_second = (realnpredict == 0 ? 0 : realnpredict / (time1 + time2));
printf("\nTime Taken - Processing:%.1fs (%.0fms/T), Generation:%.1fs (%.0fms/T), Total:%.1fs (%.1fT/s)", time1, pt1, time2, pt2, (time1 + time2), tokens_per_second); printf("\nContextLimit: %d/%d, Processing:%.1fs (%.0fms/T), Generation:%.1fs (%.0fms/T), Total:%.1fs (%.1fT/s)",current_context_tokens.size(),nctx, time1, pt1, time2, pt2, (time1 + time2), tokens_per_second);
fflush(stdout); fflush(stdout);
output.status = 1; output.status = 1;
generation_finished = true; generation_finished = true;

View file

@ -1902,4 +1902,10 @@ if __name__ == '__main__':
parser.add_argument("--multiuser", help="Runs in multiuser mode, which queues incoming requests instead of blocking them.", action='store_true') parser.add_argument("--multiuser", help="Runs in multiuser mode, which queues incoming requests instead of blocking them.", action='store_true')
parser.add_argument("--foreground", help="Windows only. Sends the terminal to the foreground every time a new prompt is generated. This helps avoid some idle slowdown issues.", action='store_true') parser.add_argument("--foreground", help="Windows only. Sends the terminal to the foreground every time a new prompt is generated. This helps avoid some idle slowdown issues.", action='store_true')
# #deprecated hidden args. they do nothing. do not use
# parser.add_argument("--psutil_set_threads", action='store_true', help=argparse.SUPPRESS)
# parser.add_argument("--stream", action='store_true', help=argparse.SUPPRESS)
# parser.add_argument("--unbantokens", action='store_true', help=argparse.SUPPRESS)
# parser.add_argument("--usemirostat", action='store_true', help=argparse.SUPPRESS)
main(parser.parse_args(),start_server=True) main(parser.parse_args(),start_server=True)