restrict fp16 mat mul to volta and up
This commit is contained in:
parent
32ada53c8e
commit
7d5674dd2d
1 changed files with 3 additions and 2 deletions
|
@ -6059,8 +6059,9 @@ inline void ggml_cuda_op_mul_mat_cublas(
|
||||||
// ldc == nrows of the matrix that cuBLAS writes into
|
// ldc == nrows of the matrix that cuBLAS writes into
|
||||||
int ldc = dst->backend == GGML_BACKEND_GPU && id == g_main_device ? ne0 : row_diff;
|
int ldc = dst->backend == GGML_BACKEND_GPU && id == g_main_device ? ne0 : row_diff;
|
||||||
|
|
||||||
// TODO: this may be slower in older architectures with poor fp16 support
|
const int compute_capability = g_compute_capabilities[id];
|
||||||
if (src0->type == GGML_TYPE_F16 && ggml_is_contiguous(src0) && ldc == row_diff) {
|
|
||||||
|
if (compute_capability >= CC_TURING && src0->type == GGML_TYPE_F16 && ggml_is_contiguous(src0) && ldc == row_diff) {
|
||||||
// convert src1 to fp16, multiply as fp16, convert dst to fp32
|
// convert src1 to fp16, multiply as fp16, convert dst to fp32
|
||||||
half * src1_as_f16 = nullptr;
|
half * src1_as_f16 = nullptr;
|
||||||
size_t src1_as = 0;
|
size_t src1_as = 0;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue