Merge branch 'ggerganov:master' into server-chat-templates-custom

This commit is contained in:
MaggotHATE 2024-11-22 23:39:53 +05:00 committed by GitHub
commit dd2bc3293b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 380 additions and 248 deletions

View file

@ -1,50 +0,0 @@
name: Low Severity Bugs
description: Used to report low severity bugs in llama.cpp (e.g. cosmetic issues, non critical UI glitches)
title: "Bug: "
labels: ["bug-unconfirmed", "low severity"]
body:
- type: markdown
attributes:
value: |
Thanks for taking the time to fill out this bug report!
Please include information about your system, the steps to reproduce the bug,
and the version of llama.cpp that you are using.
If possible, please provide a minimal code example that reproduces the bug.
- type: textarea
id: what-happened
attributes:
label: What happened?
description: Also tell us, what did you expect to happen?
placeholder: Tell us what you see!
validations:
required: true
- type: textarea
id: version
attributes:
label: Name and Version
description: Which executable and which version of our software are you running? (use `--version` to get a version string)
placeholder: |
$./llama-cli --version
version: 2999 (42b4109e)
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
validations:
required: true
- type: dropdown
id: operating-system
attributes:
label: What operating system are you seeing the problem on?
multiple: true
options:
- Linux
- Mac
- Windows
- BSD
- Other? (Please let us know in description)
validations:
required: false
- type: textarea
id: logs
attributes:
label: Relevant log output
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
render: shell

View file

@ -0,0 +1,73 @@
name: Bug (compilation)
description: Something goes wrong when trying to compile llama.cpp.
title: "Compile bug: "
labels: ["bug-unconfirmed", "compilation"]
body:
- type: markdown
attributes:
value: >
Thanks for taking the time to fill out this bug report!
This issue template is intended for bug reports where the compilation of llama.cpp fails.
Before opening an issue, please confirm that the compilation still fails with `-DGGML_CCACHE=OFF`.
If the compilation succeeds with ccache disabled you should be able to permanently fix the issue
by clearing `~/.cache/ccache` (on Linux).
- type: textarea
id: commit
attributes:
label: Git commit
description: Which commit are you trying to compile?
placeholder: |
$git rev-parse HEAD
84a07a17b1b08cf2b9747c633a2372782848a27f
validations:
required: true
- type: dropdown
id: operating-system
attributes:
label: Which operating systems do you know to be affected?
multiple: true
options:
- Linux
- Mac
- Windows
- BSD
- Other? (Please let us know in description)
validations:
required: true
- type: dropdown
id: backends
attributes:
label: GGML backends
description: Which GGML backends do you know to be affected?
options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
multiple: true
- type: textarea
id: steps_to_reproduce
attributes:
label: Steps to Reproduce
description: >
Please tell us how to reproduce the bug and any additional information that you think could be useful for fixing it.
If you can narrow down the bug to specific compile flags, that information would be very much appreciated by us.
placeholder: >
Here are the exact commands that I used: ...
validations:
required: true
- type: textarea
id: first_bad_commit
attributes:
label: First Bad Commit
description: >
If the bug was not present on an earlier version: when did it start appearing?
If possible, please do a git bisect and identify the exact commit that introduced the bug.
validations:
required: false
- type: textarea
id: logs
attributes:
label: Relevant log output
description: >
Please copy and paste any relevant log output, including the command that you entered and any generated text.
This will be automatically formatted into code, so no need for backticks.
render: shell
validations:
required: true

View file

@ -0,0 +1,98 @@
name: Bug (model use)
description: Something goes wrong when using a model (in general, not specific to a single llama.cpp module).
title: "Eval bug: "
labels: ["bug-unconfirmed", "model evaluation"]
body:
- type: markdown
attributes:
value: >
Thanks for taking the time to fill out this bug report!
This issue template is intended for bug reports where the model evaluation results
(i.e. the generated text) are incorrect or llama.cpp crashes during model evaluation.
If you encountered the issue while using an external UI (e.g. ollama),
please reproduce your issue using one of the examples/binaries in this repository.
The `llama-cli` binary can be used for simple and reproducible model inference.
- type: textarea
id: version
attributes:
label: Name and Version
description: Which version of our software are you running? (use `--version` to get a version string)
placeholder: |
$./llama-cli --version
version: 2999 (42b4109e)
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
validations:
required: true
- type: dropdown
id: operating-system
attributes:
label: Which operating systems do you know to be affected?
multiple: true
options:
- Linux
- Mac
- Windows
- BSD
- Other? (Please let us know in description)
validations:
required: true
- type: dropdown
id: backends
attributes:
label: GGML backends
description: Which GGML backends do you know to be affected?
options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
multiple: true
- type: textarea
id: hardware
attributes:
label: Hardware
description: Which CPUs/GPUs are you using?
placeholder: >
e.g. Ryzen 5950X + 2x RTX 4090
validations:
required: true
- type: textarea
id: model
attributes:
label: Model
description: >
Which model at which quantization were you using when encountering the bug?
If you downloaded a GGUF file off of Huggingface, please provide a link.
placeholder: >
e.g. Meta LLaMA 3.1 Instruct 8b q4_K_M
validations:
required: false
- type: textarea
id: steps_to_reproduce
attributes:
label: Steps to Reproduce
description: >
Please tell us how to reproduce the bug and any additional information that you think could be useful for fixing it.
If you can narrow down the bug to specific hardware, compile flags, or command line arguments,
that information would be very much appreciated by us.
placeholder: >
e.g. when I run llama-cli with -ngl 99 I get garbled outputs.
When I use -ngl 0 it works correctly.
Here are the exact commands that I used: ...
validations:
required: true
- type: textarea
id: first_bad_commit
attributes:
label: First Bad Commit
description: >
If the bug was not present on an earlier version: when did it start appearing?
If possible, please do a git bisect and identify the exact commit that introduced the bug.
validations:
required: false
- type: textarea
id: logs
attributes:
label: Relevant log output
description: >
Please copy and paste any relevant log output, including the command that you entered and any generated text.
This will be automatically formatted into code, so no need for backticks.
render: shell
validations:
required: true

78
.github/ISSUE_TEMPLATE/019-bug-misc.yml vendored Normal file
View file

@ -0,0 +1,78 @@
name: Bug (misc.)
description: Something is not working the way it should (and it's not covered by any of the above cases).
title: "Misc. bug: "
labels: ["bug-unconfirmed"]
body:
- type: markdown
attributes:
value: >
Thanks for taking the time to fill out this bug report!
This issue template is intended for miscellaneous bugs that don't fit into any other category.
If you encountered the issue while using an external UI (e.g. ollama),
please reproduce your issue using one of the examples/binaries in this repository.
- type: textarea
id: version
attributes:
label: Name and Version
description: Which version of our software are you running? (use `--version` to get a version string)
placeholder: |
$./llama-cli --version
version: 2999 (42b4109e)
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
validations:
required: true
- type: dropdown
id: operating-system
attributes:
label: Which operating systems do you know to be affected?
multiple: true
options:
- Linux
- Mac
- Windows
- BSD
- Other? (Please let us know in description)
validations:
required: true
- type: dropdown
id: module
attributes:
label: Which llama.cpp modules do you know to be affected?
multiple: true
options:
- libllama (core library)
- llama-cli
- llama-server
- llama-bench
- llama-quantize
- Python/Bash scripts
- Other (Please specify in the next section)
validations:
required: true
- type: textarea
id: steps_to_reproduce
attributes:
label: Steps to Reproduce
description: >
Please tell us how to reproduce the bug and any additional information that you think could be useful for fixing it.
validations:
required: true
- type: textarea
id: first_bad_commit
attributes:
label: First Bad Commit
description: >
If the bug was not present on an earlier version: when did it start appearing?
If possible, please do a git bisect and identify the exact commit that introduced the bug.
validations:
required: false
- type: textarea
id: logs
attributes:
label: Relevant log output
description: >
Please copy and paste any relevant log output, including the command that you entered and any generated text.
This will be automatically formatted into code, so no need for backticks.
render: shell
validations:
required: true

View file

@ -1,50 +0,0 @@
name: Medium Severity Bug
description: Used to report medium severity bugs in llama.cpp (e.g. Malfunctioning Features but generally still useable)
title: "Bug: "
labels: ["bug-unconfirmed", "medium severity"]
body:
- type: markdown
attributes:
value: |
Thanks for taking the time to fill out this bug report!
Please include information about your system, the steps to reproduce the bug,
and the version of llama.cpp that you are using.
If possible, please provide a minimal code example that reproduces the bug.
- type: textarea
id: what-happened
attributes:
label: What happened?
description: Also tell us, what did you expect to happen?
placeholder: Tell us what you see!
validations:
required: true
- type: textarea
id: version
attributes:
label: Name and Version
description: Which executable and which version of our software are you running? (use `--version` to get a version string)
placeholder: |
$./llama-cli --version
version: 2999 (42b4109e)
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
validations:
required: true
- type: dropdown
id: operating-system
attributes:
label: What operating system are you seeing the problem on?
multiple: true
options:
- Linux
- Mac
- Windows
- BSD
- Other? (Please let us know in description)
validations:
required: false
- type: textarea
id: logs
attributes:
label: Relevant log output
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
render: shell

View file

@ -1,5 +1,5 @@
name: Enhancement name: Enhancement
description: Used to request enhancements for llama.cpp description: Used to request enhancements for llama.cpp.
title: "Feature Request: " title: "Feature Request: "
labels: ["enhancement"] labels: ["enhancement"]
body: body:

View file

@ -1,50 +0,0 @@
name: High Severity Bug
description: Used to report high severity bugs in llama.cpp (e.g. Malfunctioning features hindering important common workflow)
title: "Bug: "
labels: ["bug-unconfirmed", "high severity"]
body:
- type: markdown
attributes:
value: |
Thanks for taking the time to fill out this bug report!
Please include information about your system, the steps to reproduce the bug,
and the version of llama.cpp that you are using.
If possible, please provide a minimal code example that reproduces the bug.
- type: textarea
id: what-happened
attributes:
label: What happened?
description: Also tell us, what did you expect to happen?
placeholder: Tell us what you see!
validations:
required: true
- type: textarea
id: version
attributes:
label: Name and Version
description: Which executable and which version of our software are you running? (use `--version` to get a version string)
placeholder: |
$./llama-cli --version
version: 2999 (42b4109e)
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
validations:
required: true
- type: dropdown
id: operating-system
attributes:
label: What operating system are you seeing the problem on?
multiple: true
options:
- Linux
- Mac
- Windows
- BSD
- Other? (Please let us know in description)
validations:
required: false
- type: textarea
id: logs
attributes:
label: Relevant log output
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
render: shell

View file

@ -1,5 +1,5 @@
name: Research name: Research
description: Track new technical research area description: Track new technical research area.
title: "Research: " title: "Research: "
labels: ["research 🔬"] labels: ["research 🔬"]
body: body:

View file

@ -1,50 +0,0 @@
name: Critical Severity Bug
description: Used to report critical severity bugs in llama.cpp (e.g. Crashing, Corrupted, Dataloss)
title: "Bug: "
labels: ["bug-unconfirmed", "critical severity"]
body:
- type: markdown
attributes:
value: |
Thanks for taking the time to fill out this bug report!
Please include information about your system, the steps to reproduce the bug,
and the version of llama.cpp that you are using.
If possible, please provide a minimal code example that reproduces the bug.
- type: textarea
id: what-happened
attributes:
label: What happened?
description: Also tell us, what did you expect to happen?
placeholder: Tell us what you see!
validations:
required: true
- type: textarea
id: version
attributes:
label: Name and Version
description: Which executable and which version of our software are you running? (use `--version` to get a version string)
placeholder: |
$./llama-cli --version
version: 2999 (42b4109e)
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
validations:
required: true
- type: dropdown
id: operating-system
attributes:
label: What operating system are you seeing the problem on?
multiple: true
options:
- Linux
- Mac
- Windows
- BSD
- Other? (Please let us know in description)
validations:
required: false
- type: textarea
id: logs
attributes:
label: Relevant log output
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
render: shell

View file

@ -1,5 +1,5 @@
name: Refactor (Maintainers) name: Refactor (Maintainers)
description: Used to track refactoring opportunities description: Used to track refactoring opportunities.
title: "Refactor: " title: "Refactor: "
labels: ["refactor"] labels: ["refactor"]
body: body:

View file

@ -986,13 +986,14 @@ jobs:
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
run: | run: |
echo "cp oneAPI running time dll files in ${{ env.ONEAPI_ROOT }} to ./build/bin" echo "cp oneAPI running time dll files in ${{ env.ONEAPI_ROOT }} to ./build/bin"
cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.4.dll" ./build/bin cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.5.dll" ./build/bin
cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_core.2.dll" ./build/bin cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_core.2.dll" ./build/bin
cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/pi_win_proxy_loader.dll" ./build/bin cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_loader.dll" ./build/bin
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/pi_level_zero.dll" ./build/bin cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_win_proxy_loader.dll" ./build/bin
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl7.dll" ./build/bin cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero.dll" ./build/bin
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl8.dll" ./build/bin
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin

View file

@ -3,6 +3,33 @@ if ("cann${CANN_INSTALL_DIR}" STREQUAL "cann" AND DEFINED ENV{ASCEND_TOOLKIT_HOM
message(STATUS "CANN: updated CANN_INSTALL_DIR from ASCEND_TOOLKIT_HOME=$ENV{ASCEND_TOOLKIT_HOME}") message(STATUS "CANN: updated CANN_INSTALL_DIR from ASCEND_TOOLKIT_HOME=$ENV{ASCEND_TOOLKIT_HOME}")
endif() endif()
# Auto-detech Soc type and Soc version, if detect failed, will abort build
set(SOC_VERSION "")
function(detect_ascend_soc_type SOC_VERSION)
execute_process(
COMMAND bash -c "npu-smi info|awk -F' ' 'NF > 0 && NR==7 {print $3}'"
OUTPUT_VARIABLE npu_info
RESULT_VARIABLE npu_result
OUTPUT_STRIP_TRAILING_WHITESPACE
)
if("${npu_info}" STREQUAL "" OR ${npu_result})
message(FATAL_ERROR "Auto-detech ascend soc type failed, please specify manually or check ascend device working normally.")
endif()
set(${SOC_VERSION} "Ascend${npu_info}" PARENT_SCOPE)
endfunction()
if(NOT SOC_TYPE)
detect_ascend_soc_type(SOC_VERSION)
set(SOC_TYPE "${SOC_VERSION}")
message(STATUS "CANN: SOC_VERSION auto-detected is:${SOC_VERSION}")
else()
string(TOLOWER ${SOC_TYPE} SOC_VERSION)
endif()
# Construct Soc specify compile option: ASCEND_#Soc_Major_SN. Such as ASCEND_910B, ASCEND310P.
string(REGEX MATCH "[0-9]+[a-zA-Z]" SOC_TYPE_MAJOR_SN "${SOC_VERSION}")
set(SOC_TYPE_COMPILE_OPTION "ASCEND_${SOC_TYPE_MAJOR_SN}")
if (CANN_INSTALL_DIR) if (CANN_INSTALL_DIR)
# Only Support Linux. # Only Support Linux.
if (NOT UNIX) if (NOT UNIX)
@ -39,6 +66,8 @@ if (CANN_INSTALL_DIR)
target_include_directories(ggml-cann PRIVATE . .. ${CANN_INCLUDE_DIRS}) target_include_directories(ggml-cann PRIVATE . .. ${CANN_INCLUDE_DIRS})
target_link_directories(ggml-cann PRIVATE ${CANN_INSTALL_DIR}/lib64) target_link_directories(ggml-cann PRIVATE ${CANN_INSTALL_DIR}/lib64)
target_compile_definitions(ggml-cann PRIVATE "-D${SOC_TYPE_COMPILE_OPTION}")
message(STATUS "CANN: CANN_INCLUDE_DIRS = ${CANN_INCLUDE_DIRS}") message(STATUS "CANN: CANN_INCLUDE_DIRS = ${CANN_INCLUDE_DIRS}")
message(STATUS "CANN: CANN_LIBRARIES = ${CANN_LIBRARIES}") message(STATUS "CANN: CANN_LIBRARIES = ${CANN_LIBRARIES}")
else() else()

View file

@ -2312,6 +2312,14 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
switch (src0->type) { switch (src0->type) {
case GGML_TYPE_F32: case GGML_TYPE_F32:
{
#ifdef ASCEND_310P
// Special operation for get_row_f32 kernel of 310P: clear the content of dest data buffer when row is not aligned to 32 bytes
if ((src0->ne[0] % 8) != 0) {
size_t dst_len = src1->ne[0] * src1->ne[1] * src1->ne[2] * src0->ne[0] * ggml_type_size(GGML_TYPE_F32);
ACL_CHECK(aclrtMemset((char*)dst->data, dst_len, 0, dst_len));
}
#endif
aclrtlaunch_ascendc_get_row_f32( aclrtlaunch_ascendc_get_row_f32(
24, ctx.stream(), src0->data, src1->data, dst->data, 24, ctx.stream(), src0->data, src1->data, dst->data,
((ggml_tensor*)src0->extra)->ne, ((ggml_tensor*)src0->extra)->ne,
@ -2320,7 +2328,16 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
((ggml_tensor*)src1->extra)->nb, ((ggml_tensor*)dst->extra)->ne, ((ggml_tensor*)src1->extra)->nb, ((ggml_tensor*)dst->extra)->ne,
((ggml_tensor*)dst->extra)->nb); ((ggml_tensor*)dst->extra)->nb);
break; break;
}
case GGML_TYPE_F16: case GGML_TYPE_F16:
{
#ifdef ASCEND_310P
// Special operation for get_row_f16 kernel of 310P: clear the content of dest data buffer when row is not aligned to 32 bytes
if ((src0->ne[0] % 16) != 0) {
size_t dst_len = src1->ne[0] * src1->ne[1] * src1->ne[2] * src0->ne[0] * ggml_type_size(GGML_TYPE_F32); // out is also f32, even input is f16
ACL_CHECK(aclrtMemset((char*)dst->data, dst_len, 0, dst_len));
}
#endif
aclrtlaunch_ascendc_get_row_f16( aclrtlaunch_ascendc_get_row_f16(
24, ctx.stream(), src0->data, src1->data, dst->data, 24, ctx.stream(), src0->data, src1->data, dst->data,
((ggml_tensor*)src0->extra)->ne, ((ggml_tensor*)src0->extra)->ne,
@ -2329,6 +2346,7 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
((ggml_tensor*)src1->extra)->nb, ((ggml_tensor*)dst->extra)->ne, ((ggml_tensor*)src1->extra)->nb, ((ggml_tensor*)dst->extra)->ne,
((ggml_tensor*)dst->extra)->nb); ((ggml_tensor*)dst->extra)->nb);
break; break;
}
case GGML_TYPE_Q4_0: case GGML_TYPE_Q4_0:
aclrtlaunch_ascendc_get_row_q4_0( aclrtlaunch_ascendc_get_row_q4_0(
24, ctx.stream(), src0->data, src1->data, dst->data, 24, ctx.stream(), src0->data, src1->data, dst->data,

View file

@ -1,7 +1,3 @@
if (NOT SOC_TYPE)
set (SOC_TYPE "Ascend910B3")
endif()
file(GLOB SRC_FILES file(GLOB SRC_FILES
get_row_f32.cpp get_row_f32.cpp
get_row_f16.cpp get_row_f16.cpp
@ -13,7 +9,6 @@ file(GLOB SRC_FILES
dup.cpp dup.cpp
) )
string(TOLOWER ${SOC_TYPE} SOC_VERSION)
set(ASCEND_CANN_PACKAGE_PATH ${CANN_INSTALL_DIR}) set(ASCEND_CANN_PACKAGE_PATH ${CANN_INSTALL_DIR})
set(RUN_MODE "npu" CACHE STRING "run mode: npu/sim") set(RUN_MODE "npu" CACHE STRING "run mode: npu/sim")
@ -30,4 +25,6 @@ ascendc_library(ascendc_kernels STATIC
${SRC_FILES} ${SRC_FILES}
) )
message(STATUS "CANN: compile ascend kernels witch SOC_VERSION:${SOC_VERSION}.")
ascendc_compile_definitions(ascendc_kernels PRIVATE "-D${SOC_TYPE_COMPILE_OPTION}")
# ascendc_compile_definitions(ascendc_kernels PRIVATE -DASCENDC_DUMP) # ascendc_compile_definitions(ascendc_kernels PRIVATE -DASCENDC_DUMP)

View file

@ -5,6 +5,7 @@
using namespace AscendC; using namespace AscendC;
#define BUFFER_NUM 2 #define BUFFER_NUM 2
const int64_t SUPPORTED_MAX_DIM = 65535; // currently the limit of max block dim supportted by dup kernel is 65535template <typename SRC_T, typename DST_T>
template <typename SRC_T, typename DST_T> template <typename SRC_T, typename DST_T>
class DupByRows { class DupByRows {
@ -19,6 +20,7 @@ class DupByRows {
// Input has four dims. // Input has four dims.
int64_t op_block_num = GetBlockNum(); int64_t op_block_num = GetBlockNum();
int64_t op_block_idx = GetBlockIdx(); int64_t op_block_idx = GetBlockIdx();
assert(op_block_idx < SUPPORTED_MAX_DIM && op_block_idx >= 0, "Invalid block index:%d, max is:%d\n", op_block_idx, SUPPORTED_MAX_DIM);
// param // param
num_rows = input_ne_ub[1] * input_ne_ub[2] * input_ne_ub[3]; num_rows = input_ne_ub[1] * input_ne_ub[2] * input_ne_ub[3];
@ -51,24 +53,36 @@ class DupByRows {
__aicore__ inline void copy_in() { __aicore__ inline void copy_in() {
LocalTensor<SRC_T> src_local = src_queue.AllocTensor<SRC_T>(); LocalTensor<SRC_T> src_local = src_queue.AllocTensor<SRC_T>();
const size_t elem_per_block = 32 / sizeof(SRC_T);
DataCopyExtParams dataCopyParams; size_t tail = num_elem % elem_per_block;
dataCopyParams.blockCount = 1; size_t cpy_elements_len = tail > 0 ? num_elem + 1 : num_elem;
dataCopyParams.blockLen = num_elem * sizeof(SRC_T); DataCopy(src_local, src_gm, cpy_elements_len);
DataCopyPadExtParams<SRC_T> padParams;
DataCopyPad(src_local, src_gm, dataCopyParams, padParams);
src_queue.EnQue(src_local); src_queue.EnQue(src_local);
} }
__aicore__ inline void copy_out() { __aicore__ inline void copy_out() {
LocalTensor<DST_T> dst_local = dst_queue.DeQue<DST_T>(); LocalTensor<DST_T> dst_local = dst_queue.DeQue<DST_T>();
#ifdef ASCEND_310P
const size_t elem_per_block = 32 / sizeof(DST_T);
size_t tail = num_elem % elem_per_block;
size_t len = num_elem & ~(elem_per_block - 1);
if (len > 0) {
DataCopy(dst_gm, dst_local, len);
}
if(tail != 0) {
for (size_t i = tail; i < elem_per_block; i++) {
dst_local[len + i].SetValue(0, 0);
}
SetAtomicAdd<float>();
DataCopy(dst_gm[len], dst_local[len], elem_per_block);
SetAtomicNone();
}
#else
DataCopyExtParams dataCopyParams; DataCopyExtParams dataCopyParams;
dataCopyParams.blockCount = 1; dataCopyParams.blockCount = 1;
dataCopyParams.blockLen = num_elem * sizeof(DST_T); dataCopyParams.blockLen = num_elem * sizeof(DST_T);
DataCopyPad(dst_gm, dst_local, dataCopyParams); DataCopyPad(dst_gm, dst_local, dataCopyParams);
#endif
dst_queue.FreeTensor(dst_local); dst_queue.FreeTensor(dst_local);
} }

View file

@ -14,7 +14,7 @@ class GET_ROW_F16 {
int64_t *output_ne_ub, size_t *output_nb_ub) { int64_t *output_ne_ub, size_t *output_nb_ub) {
// TODO, use template for F16/f32 // TODO, use template for F16/f32
int64_t op_block_num = GetBlockNum(); int64_t op_block_num = GetBlockNum();
int64_t op_block_idx = GetBlockIdx(); op_block_idx = GetBlockIdx();
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
input_ne[i] = input_ne_ub[i]; input_ne[i] = input_ne_ub[i];
@ -59,32 +59,42 @@ class GET_ROW_F16 {
} }
__aicore__ inline void copy_in(uint32_t offset, size_t len) { __aicore__ inline void copy_in(uint32_t offset, size_t len) {
size_t origin_len = len;
LocalTensor<half> input_local = input_queue.AllocTensor<half>(); LocalTensor<half> input_local = input_queue.AllocTensor<half>();
size_t tail = len % 32; const size_t elem_per_block = 32 / sizeof(half);
len = len & ~31; size_t tail = len % elem_per_block;
DataCopy(input_local, input_gm[offset], len); len = len & ~(elem_per_block - 1);
if(tail != 0) { if(tail != 0) {
DataCopyExtParams dataCopyParams; len += elem_per_block;
dataCopyParams.blockCount = 1;
dataCopyParams.blockLen = tail * sizeof(half);
DataCopyPadExtParams<half> padParams;
DataCopyPad(input_local[len], input_gm[offset + len],
dataCopyParams, padParams);
} }
DataCopy(input_local, input_gm[offset], len);
input_queue.EnQue(input_local); input_queue.EnQue(input_local);
} }
__aicore__ inline void copy_out(uint32_t offset, size_t len) { __aicore__ inline void copy_out(uint32_t offset, size_t len) {
LocalTensor<float> output_local = output_queue.DeQue<float>(); LocalTensor<float> output_local = output_queue.DeQue<float>();
size_t tail = len % 32; const size_t elem_per_block = 32 / sizeof(float);
len = len & ~31; size_t tail = len % elem_per_block;
len = len & ~(elem_per_block - 1);
if (len > 0) {
DataCopy(output_gm[offset], output_local, len); DataCopy(output_gm[offset], output_local, len);
}
if(tail != 0) { if(tail != 0) {
#ifdef ASCEND_310P
for (size_t i = tail; i < elem_per_block; i++) {
output_local[len + i].SetValue(0, 0);
}
SetAtomicAdd<float>();
DataCopy(output_gm[offset + len], output_local[len], elem_per_block);
SetAtomicNone();
#else
DataCopyExtParams dataCopyParams; DataCopyExtParams dataCopyParams;
dataCopyParams.blockCount = 1; dataCopyParams.blockCount = 1;
dataCopyParams.blockLen = tail * sizeof(float); dataCopyParams.blockLen = tail * sizeof(float);
DataCopyPad(output_gm[offset + len], output_local[len], DataCopyPad(output_gm[offset + len], output_local[len],
dataCopyParams); dataCopyParams);
#endif
} }
output_queue.FreeTensor(output_local); output_queue.FreeTensor(output_local);
} }
@ -150,6 +160,7 @@ class GET_ROW_F16 {
GlobalTensor<float> output_gm; GlobalTensor<float> output_gm;
TQue<QuePosition::VECIN, BUFFER_NUM> input_queue; TQue<QuePosition::VECIN, BUFFER_NUM> input_queue;
TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue; TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue;
int64_t op_block_idx;
}; };
template <typename T> template <typename T>

View file

@ -13,7 +13,7 @@ class GET_ROW_F32 {
int64_t *indices_ne_ub, size_t *indices_nb_ub, int64_t *indices_ne_ub, size_t *indices_nb_ub,
int64_t *output_ne_ub, size_t *output_nb_ub) { int64_t *output_ne_ub, size_t *output_nb_ub) {
int64_t op_block_num = GetBlockNum(); int64_t op_block_num = GetBlockNum();
int64_t op_block_idx = GetBlockIdx(); op_block_idx = GetBlockIdx();
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
input_ne[i] = input_ne_ub[i]; input_ne[i] = input_ne_ub[i];
@ -55,31 +55,40 @@ class GET_ROW_F32 {
__aicore__ inline void copy_in(uint32_t offset, size_t len) { __aicore__ inline void copy_in(uint32_t offset, size_t len) {
LocalTensor<float> input_local = input_queue.AllocTensor<float>(); LocalTensor<float> input_local = input_queue.AllocTensor<float>();
size_t tail = len % 32; const size_t elem_per_block = 32 / sizeof(float);
len = len & ~31; size_t tail = len % elem_per_block;
DataCopy(input_local, input_gm[offset], len); len = len & ~(elem_per_block - 1);
if(tail != 0) { if(tail != 0) {
DataCopyExtParams dataCopyParams; len += elem_per_block;
dataCopyParams.blockCount = 1;
dataCopyParams.blockLen = tail * sizeof(float);
DataCopyPadExtParams<float> padParams;
DataCopyPad(input_local[len], input_gm[offset + len],
dataCopyParams, padParams);
} }
DataCopy(input_local, input_gm[offset], len);
input_queue.EnQue(input_local); input_queue.EnQue(input_local);
} }
__aicore__ inline void copy_out(uint32_t offset, size_t len) { __aicore__ inline void copy_out(uint32_t offset, size_t len) {
LocalTensor<float> output_local = output_queue.DeQue<float>(); LocalTensor<float> output_local = output_queue.DeQue<float>();
size_t tail = len % 32; const size_t elem_per_block = 32 / sizeof(float);
len = len & ~31; size_t tail = len % elem_per_block;
len = len & ~(elem_per_block - 1);
if (len > 0) {
DataCopy(output_gm[offset], output_local, len); DataCopy(output_gm[offset], output_local, len);
}
if(tail != 0) { if(tail != 0) {
#ifdef ASCEND_310P
for (size_t i = tail; i < elem_per_block; i++) {
output_local[len + i].SetValue(0, 0);
}
SetAtomicAdd<float>();
DataCopy(output_gm[offset + len], output_local[len], elem_per_block);
SetAtomicNone();
#else
DataCopyExtParams dataCopyParams; DataCopyExtParams dataCopyParams;
dataCopyParams.blockCount = 1; dataCopyParams.blockCount = 1;
dataCopyParams.blockLen = tail * sizeof(float); dataCopyParams.blockLen = tail * sizeof(float);
DataCopyPad(output_gm[offset + len], output_local[len], DataCopyPad(output_gm[offset + len], output_local[len],
dataCopyParams); dataCopyParams);
#endif
} }
output_queue.FreeTensor(output_local); output_queue.FreeTensor(output_local);
} }
@ -144,6 +153,7 @@ class GET_ROW_F32 {
GlobalTensor<float> output_gm; GlobalTensor<float> output_gm;
TQue<QuePosition::VECIN, BUFFER_NUM> input_queue; TQue<QuePosition::VECIN, BUFFER_NUM> input_queue;
TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue; TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue;
int64_t op_block_idx;
}; };
template <typename T> template <typename T>

View file

@ -110,9 +110,12 @@ class GET_ROW_Q4_0 {
LocalTensor<float> output_local = output_queue.AllocTensor<float>(); LocalTensor<float> output_local = output_queue.AllocTensor<float>();
// TODO: cast more data to speed up. // TODO: cast more data to speed up.
#ifdef ASCEND_310P
// TODO: 310P support quantification
#else
Cast(cast_local, input_local, RoundMode::CAST_NONE, QK4_0); Cast(cast_local, input_local, RoundMode::CAST_NONE, QK4_0);
Cast(output_local, cast_local, RoundMode::CAST_NONE, QK4_0); Cast(output_local, cast_local, RoundMode::CAST_NONE, QK4_0);
#endif
// Only mul need compile by group. // Only mul need compile by group.
half scale = scale_gm.GetValue(scale_offset); half scale = scale_gm.GetValue(scale_offset);