ggml : reduce hash table reset cost (#8698)
* ggml : reduce hash table reset cost * fix unreachable code warnings after GGML_ASSERT(false) * GGML_ASSERT(false) -> GGML_ABORT("fatal error") * GGML_ABORT use format string
This commit is contained in:
parent
01245f5b16
commit
2b1f616b20
46 changed files with 851 additions and 754 deletions
|
@ -284,7 +284,7 @@ void launch_fattn_tile_f32_64_128(ggml_backend_cuda_context & ctx, ggml_tensor *
|
|||
launch_fattn<D, parallel_blocks>(ctx, dst, fattn_kernel, nwarps, cols_per_block, true, true);
|
||||
} break;
|
||||
default: {
|
||||
GGML_ASSERT(false && "FlashAttention without tensor cores only supports head sizes 64 and 128.");
|
||||
GGML_ABORT("FlashAttention without tensor cores only supports head sizes 64 and 128.");
|
||||
} break;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue