cuda : tweak mm stride to double perf on P40 + GTX 970

This commit is contained in:
Jared Van Bortel 2023-11-26 22:20:18 -05:00
parent 3e73d31d9c
commit 12fb1c58ec

View file

@ -467,7 +467,7 @@ static_assert(K_QUANTS_PER_ITERATION == 1 || K_QUANTS_PER_ITERATION == 2, "K_QUA
#define GGML_CUDA_PEER_MAX_BATCH_SIZE 128
#endif // GGML_CUDA_PEER_MAX_BATCH_SIZE
#define MUL_MAT_SRC1_COL_STRIDE 128
#define MUL_MAT_SRC1_COL_STRIDE 4096
#define MAX_STREAMS 8
static cudaStream_t g_cudaStreams[GGML_CUDA_MAX_DEVICES][MAX_STREAMS] = { { nullptr } };