CANN: Fix the bug build fail on Ascend310P under two cases:

1) Manual specify SOC_TYPE
2) Under some unusual compile environment
This commit is contained in:
leo-pony 2024-11-26 21:20:47 +08:00
parent b7420131bf
commit dc60ede113
6 changed files with 54 additions and 8 deletions

View file

@ -22,13 +22,14 @@ if(NOT SOC_TYPE)
detect_ascend_soc_type(SOC_VERSION) detect_ascend_soc_type(SOC_VERSION)
set(SOC_TYPE "${SOC_VERSION}") set(SOC_TYPE "${SOC_VERSION}")
message(STATUS "CANN: SOC_VERSION auto-detected is:${SOC_VERSION}") message(STATUS "CANN: SOC_VERSION auto-detected is:${SOC_VERSION}")
else()
string(TOLOWER ${SOC_TYPE} SOC_VERSION)
endif() endif()
# Construct Soc specify compile option: ASCEND_#Soc_Major_SN. Such as ASCEND_910B, ASCEND310P. string(TOLOWER ${SOC_TYPE} SOC_VERSION) # SOC_VERSION need lower
# Construct Soc specify compile option: ASCEND_#Soc_Major_SN. Such as ASCEND_910B, ASCEND_310P.
string(REGEX MATCH "[0-9]+[a-zA-Z]" SOC_TYPE_MAJOR_SN "${SOC_VERSION}") string(REGEX MATCH "[0-9]+[a-zA-Z]" SOC_TYPE_MAJOR_SN "${SOC_VERSION}")
set(SOC_TYPE_COMPILE_OPTION "ASCEND_${SOC_TYPE_MAJOR_SN}") set(SOC_TYPE_COMPILE_OPTION "ASCEND_${SOC_TYPE_MAJOR_SN}")
string(TOUPPER ${SOC_TYPE_COMPILE_OPTION} SOC_TYPE_COMPILE_OPTION)
if (CANN_INSTALL_DIR) if (CANN_INSTALL_DIR)
# Only Support Linux. # Only Support Linux.

View file

@ -25,6 +25,6 @@ ascendc_library(ascendc_kernels STATIC
${SRC_FILES} ${SRC_FILES}
) )
message(STATUS "CANN: compile ascend kernels witch SOC_VERSION:${SOC_VERSION}.") message(STATUS "CANN: compile ascend kernels witch SOC_TYPE:${SOC_TYPE}, SOC_VERSION:${SOC_VERSION}, compile macro:-D${SOC_TYPE_COMPILE_OPTION}.")
ascendc_compile_definitions(ascendc_kernels PRIVATE "-D${SOC_TYPE_COMPILE_OPTION}") ascendc_compile_definitions(ascendc_kernels PRIVATE "-D${SOC_TYPE_COMPILE_OPTION}")
# ascendc_compile_definitions(ascendc_kernels PRIVATE -DASCENDC_DUMP) # ascendc_compile_definitions(ascendc_kernels PRIVATE -DASCENDC_DUMP)

View file

@ -2,6 +2,15 @@
// optimize me. Use template to avoid copy code. // optimize me. Use template to avoid copy code.
using namespace AscendC; using namespace AscendC;
#ifdef ASCEND_310P // 310P not support 4bit get row
extern "C" __global__ __aicore__ void ascendc_get_row_q4_0(
GM_ADDR input_gm, GM_ADDR indices_gm, GM_ADDR output_gm,
GM_ADDR input_ne_gm, GM_ADDR indices_ne_gm, GM_ADDR indices_nb_gm,
GM_ADDR output_ne_gm, GM_ADDR output_nb_gm) {
// let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
printf("Ascend310P not support 4bit get row.\n");
}
#else
#define BUFFER_NUM 2 #define BUFFER_NUM 2
@ -110,12 +119,9 @@ class GET_ROW_Q4_0 {
LocalTensor<float> output_local = output_queue.AllocTensor<float>(); LocalTensor<float> output_local = output_queue.AllocTensor<float>();
// TODO: cast more data to speed up. // TODO: cast more data to speed up.
#ifdef ASCEND_310P
// TODO: 310P support quantification
#else
Cast(cast_local, input_local, RoundMode::CAST_NONE, QK4_0); Cast(cast_local, input_local, RoundMode::CAST_NONE, QK4_0);
Cast(output_local, cast_local, RoundMode::CAST_NONE, QK4_0); Cast(output_local, cast_local, RoundMode::CAST_NONE, QK4_0);
#endif
// Only mul need compile by group. // Only mul need compile by group.
half scale = scale_gm.GetValue(scale_offset); half scale = scale_gm.GetValue(scale_offset);
@ -194,3 +200,5 @@ extern "C" __global__ __aicore__ void ascendc_get_row_q4_0(
indices_nb_ub, output_ne_ub, output_nb_ub); indices_nb_ub, output_ne_ub, output_nb_ub);
op.calculate(); op.calculate();
} }
#endif // #ifdef ASCEND_310P

View file

@ -1,6 +1,14 @@
#include "kernel_operator.h" #include "kernel_operator.h"
using namespace AscendC; using namespace AscendC;
#ifdef ASCEND_310P
extern "C" __global__ __aicore__ void ascendc_quantize_f16_q8_0(
GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
// let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
printf("Ascend310P not support f16->8bit quantization.\n");
}
#else
#define BUFFER_NUM 2 #define BUFFER_NUM 2
#define QK8_0 32 #define QK8_0 32
@ -206,3 +214,5 @@ extern "C" __global__ __aicore__ void ascendc_quantize_f16_q8_0(
op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub); op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub);
op.calculate(); op.calculate();
} }
#endif // #ifdef ASCEND_310P

View file

@ -1,6 +1,14 @@
#include "kernel_operator.h" #include "kernel_operator.h"
using namespace AscendC; using namespace AscendC;
#ifdef ASCEND_310P // 310P not support f32->8bit quantization
extern "C" __global__ __aicore__ void ascendc_quantize_f32_q8_0(
GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
// let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
printf("Ascend310P not support f32->8bit quantization.\n");
}
#else
#define BUFFER_NUM 2 #define BUFFER_NUM 2
#define QK8_0 32 #define QK8_0 32
@ -204,3 +212,5 @@ extern "C" __global__ __aicore__ void ascendc_quantize_f32_q8_0(
op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub); op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub);
op.calculate(); op.calculate();
} }
#endif // #ifdef ASCEND_310P

View file

@ -1,6 +1,21 @@
#include "kernel_operator.h" #include "kernel_operator.h"
using namespace AscendC; using namespace AscendC;
#ifdef ASCEND_310P // 310P not support float->4bit quantization
extern "C" __global__ __aicore__ void ascendc_quantize_f32_to_q4_0(
GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
// let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
printf("Ascend310P not support f32->4bit quantization.\n");
}
extern "C" __global__ __aicore__ void ascendc_quantize_f16_to_q4_0(
GM_ADDR input_gm, GM_ADDR output_gm, GM_ADDR input_ne_gm,
GM_ADDR input_nb_gm, GM_ADDR output_ne_gm) {
// let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
printf("Ascend310P not support f16->4bit quantization.\n");
}
#else
#define BUFFER_NUM 2 #define BUFFER_NUM 2
#define Group_Size 32 #define Group_Size 32
@ -276,3 +291,5 @@ extern "C" __global__ __aicore__ void ascendc_quantize_f32_to_q4_0(
op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub); op.init(input_gm, output_gm, input_ne_ub, input_nb_ub, output_ne_ub);
op.calculate(); op.calculate();
} }
#endif // #ifdef ASCEND_310P