Only one CUDA stream per device for async compute (#1898)

This commit is contained in:
Johannes Gäßler 2023-06-17 19:15:02 +02:00 committed by GitHub
parent 051e1b0e6a
commit 2c9380dd2f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 20 additions and 38 deletions

View file

@ -106,9 +106,6 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
}
if (arg == "-s" || arg == "--seed") {
#if defined(GGML_USE_CUBLAS)
fprintf(stderr, "WARNING: when using cuBLAS generation results are NOT guaranteed to be reproducible.\n");
#endif
if (++i >= argc) {
invalid_param = true;
break;