diff --git a/colab.ipynb b/colab.ipynb index b3742ac04..e1e4b224c 100644 --- a/colab.ipynb +++ b/colab.ipynb @@ -33,7 +33,7 @@ "!nohup ./cloudflared-linux-amd64 tunnel --url http://localhost:5001 &\r\n", "!sleep 10\r\n", "!cat nohup.out\r\n", - "!python koboldcpp.py model.ggml --usecublas 0 mmq --gpulayers $Layers --hordeconfig concedo\r\n" + "!python koboldcpp.py model.ggml --usecublas 0 mmq --gpulayers $Layers\r\n" ] } ], diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 366a64df8..462148cd9 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -1768,7 +1768,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o int realnpredict = params.n_predict-stopper_unused_tokens; float pt2 = (time2*1000.0/(realnpredict==0?1:realnpredict)); float tokens_per_second = (realnpredict == 0 ? 0 : realnpredict / (time1 + time2)); - printf("\nTime Taken - Processing:%.1fs (%.0fms/T), Generation:%.1fs (%.0fms/T), Total:%.1fs (%.1fT/s)", time1, pt1, time2, pt2, (time1 + time2), tokens_per_second); + printf("\nContextLimit: %d/%d, Processing:%.1fs (%.0fms/T), Generation:%.1fs (%.0fms/T), Total:%.1fs (%.1fT/s)",current_context_tokens.size(),nctx, time1, pt1, time2, pt2, (time1 + time2), tokens_per_second); fflush(stdout); output.status = 1; generation_finished = true; diff --git a/koboldcpp.py b/koboldcpp.py index 099bcdbd0..0bb5da6e5 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -1902,4 +1902,10 @@ if __name__ == '__main__': parser.add_argument("--multiuser", help="Runs in multiuser mode, which queues incoming requests instead of blocking them.", action='store_true') parser.add_argument("--foreground", help="Windows only. Sends the terminal to the foreground every time a new prompt is generated. This helps avoid some idle slowdown issues.", action='store_true') + # #deprecated hidden args. they do nothing. do not use + # parser.add_argument("--psutil_set_threads", action='store_true', help=argparse.SUPPRESS) + # parser.add_argument("--stream", action='store_true', help=argparse.SUPPRESS) + # parser.add_argument("--unbantokens", action='store_true', help=argparse.SUPPRESS) + # parser.add_argument("--usemirostat", action='store_true', help=argparse.SUPPRESS) + main(parser.parse_args(),start_server=True)