ggml : reduce hash table reset cost (#8698)

* ggml : reduce hash table reset cost

* fix unreachable code warnings after GGML_ASSERT(false)

* GGML_ASSERT(false) -> GGML_ABORT("fatal error")

* GGML_ABORT use format string
This commit is contained in:
slaren 2024-07-27 04:41:55 +02:00 committed by GitHub
parent 01245f5b16
commit 2b1f616b20
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
46 changed files with 851 additions and 754 deletions

View file

@ -284,7 +284,7 @@ void launch_fattn_tile_f32_64_128(ggml_backend_cuda_context & ctx, ggml_tensor *
launch_fattn<D, parallel_blocks>(ctx, dst, fattn_kernel, nwarps, cols_per_block, true, true);
} break;
default: {
GGML_ASSERT(false && "FlashAttention without tensor cores only supports head sizes 64 and 128.");
GGML_ABORT("FlashAttention without tensor cores only supports head sizes 64 and 128.");
} break;
}
}