diff --git a/examples/common.cpp b/examples/common.cpp index 97eded6ec..f1c3bae13 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -100,6 +100,9 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { arg = argv[i]; if (arg == "-s" || arg == "--seed") { +#if defined(GGML_USE_CUBLAS) + fprintf(stderr, "WARNING: when using cuBLAS generation results are NOT guaranteed to be reproducible.\n"); +#endif if (++i >= argc) { invalid_param = true; break; diff --git a/ggml-cuda.cu b/ggml-cuda.cu index e8a1e77cb..127b352a0 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -348,7 +348,7 @@ static void ggml_cuda_pool_free(void * ptr, size_t size) { CUDA_CHECK(cudaFree(ptr)); } -#define GGML_CUDA_MAX_STREAMS 8 +#define GGML_CUDA_MAX_STREAMS 8 // Set this to 1 for reproducible matrix multiplication. #define GGML_CUDA_MAX_EVENTS 64 static cublasHandle_t g_cublasH = nullptr; static cudaStream_t g_cudaStreams[GGML_CUDA_MAX_STREAMS] = { nullptr };