CANN: Support Ascend310P to accelerate F32 and F16 Model (#10216)

* CANN Support Ascend310P to accelerate F32 and F16 Model

* Add compile option soc type macro ASCEND_310P to ggml-cann lib

* Remove unused code

* Remove the ascend soc_type hard code compile option in CMakelist.txt
This commit is contained in:
leo-pony 2024-11-22 14:07:20 +08:00 committed by GitHub
parent a5e47592b6
commit c18610b4ee
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 123 additions and 41 deletions

View file

@ -2312,6 +2312,14 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
switch (src0->type) {
case GGML_TYPE_F32:
{
#ifdef ASCEND_310P
// Special operation for get_row_f32 kernel of 310P: clear the content of dest data buffer when row is not aligned to 32 bytes
if ((src0->ne[0] % 8) != 0) {
size_t dst_len = src1->ne[0] * src1->ne[1] * src1->ne[2] * src0->ne[0] * ggml_type_size(GGML_TYPE_F32);
ACL_CHECK(aclrtMemset((char*)dst->data, dst_len, 0, dst_len));
}
#endif
aclrtlaunch_ascendc_get_row_f32(
24, ctx.stream(), src0->data, src1->data, dst->data,
((ggml_tensor*)src0->extra)->ne,
@ -2320,7 +2328,16 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
((ggml_tensor*)src1->extra)->nb, ((ggml_tensor*)dst->extra)->ne,
((ggml_tensor*)dst->extra)->nb);
break;
}
case GGML_TYPE_F16:
{
#ifdef ASCEND_310P
// Special operation for get_row_f16 kernel of 310P: clear the content of dest data buffer when row is not aligned to 32 bytes
if ((src0->ne[0] % 16) != 0) {
size_t dst_len = src1->ne[0] * src1->ne[1] * src1->ne[2] * src0->ne[0] * ggml_type_size(GGML_TYPE_F32); // out is also f32, even input is f16
ACL_CHECK(aclrtMemset((char*)dst->data, dst_len, 0, dst_len));
}
#endif
aclrtlaunch_ascendc_get_row_f16(
24, ctx.stream(), src0->data, src1->data, dst->data,
((ggml_tensor*)src0->extra)->ne,
@ -2329,6 +2346,7 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
((ggml_tensor*)src1->extra)->nb, ((ggml_tensor*)dst->extra)->ne,
((ggml_tensor*)dst->extra)->nb);
break;
}
case GGML_TYPE_Q4_0:
aclrtlaunch_ascendc_get_row_q4_0(
24, ctx.stream(), src0->data, src1->data, dst->data,