Merge branch 'ggerganov:master' into master
This commit is contained in:
commit
021ca28c14
19 changed files with 384 additions and 252 deletions
50
.github/ISSUE_TEMPLATE/01-bug-low.yml
vendored
50
.github/ISSUE_TEMPLATE/01-bug-low.yml
vendored
|
@ -1,50 +0,0 @@
|
||||||
name: Low Severity Bugs
|
|
||||||
description: Used to report low severity bugs in llama.cpp (e.g. cosmetic issues, non critical UI glitches)
|
|
||||||
title: "Bug: "
|
|
||||||
labels: ["bug-unconfirmed", "low severity"]
|
|
||||||
body:
|
|
||||||
- type: markdown
|
|
||||||
attributes:
|
|
||||||
value: |
|
|
||||||
Thanks for taking the time to fill out this bug report!
|
|
||||||
Please include information about your system, the steps to reproduce the bug,
|
|
||||||
and the version of llama.cpp that you are using.
|
|
||||||
If possible, please provide a minimal code example that reproduces the bug.
|
|
||||||
- type: textarea
|
|
||||||
id: what-happened
|
|
||||||
attributes:
|
|
||||||
label: What happened?
|
|
||||||
description: Also tell us, what did you expect to happen?
|
|
||||||
placeholder: Tell us what you see!
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: textarea
|
|
||||||
id: version
|
|
||||||
attributes:
|
|
||||||
label: Name and Version
|
|
||||||
description: Which executable and which version of our software are you running? (use `--version` to get a version string)
|
|
||||||
placeholder: |
|
|
||||||
$./llama-cli --version
|
|
||||||
version: 2999 (42b4109e)
|
|
||||||
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: dropdown
|
|
||||||
id: operating-system
|
|
||||||
attributes:
|
|
||||||
label: What operating system are you seeing the problem on?
|
|
||||||
multiple: true
|
|
||||||
options:
|
|
||||||
- Linux
|
|
||||||
- Mac
|
|
||||||
- Windows
|
|
||||||
- BSD
|
|
||||||
- Other? (Please let us know in description)
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: textarea
|
|
||||||
id: logs
|
|
||||||
attributes:
|
|
||||||
label: Relevant log output
|
|
||||||
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
|
|
||||||
render: shell
|
|
73
.github/ISSUE_TEMPLATE/010-bug-compilation.yml
vendored
Normal file
73
.github/ISSUE_TEMPLATE/010-bug-compilation.yml
vendored
Normal file
|
@ -0,0 +1,73 @@
|
||||||
|
name: Bug (compilation)
|
||||||
|
description: Something goes wrong when trying to compile llama.cpp.
|
||||||
|
title: "Compile bug: "
|
||||||
|
labels: ["bug-unconfirmed", "compilation"]
|
||||||
|
body:
|
||||||
|
- type: markdown
|
||||||
|
attributes:
|
||||||
|
value: >
|
||||||
|
Thanks for taking the time to fill out this bug report!
|
||||||
|
This issue template is intended for bug reports where the compilation of llama.cpp fails.
|
||||||
|
Before opening an issue, please confirm that the compilation still fails with `-DGGML_CCACHE=OFF`.
|
||||||
|
If the compilation succeeds with ccache disabled you should be able to permanently fix the issue
|
||||||
|
by clearing `~/.cache/ccache` (on Linux).
|
||||||
|
- type: textarea
|
||||||
|
id: commit
|
||||||
|
attributes:
|
||||||
|
label: Git commit
|
||||||
|
description: Which commit are you trying to compile?
|
||||||
|
placeholder: |
|
||||||
|
$git rev-parse HEAD
|
||||||
|
84a07a17b1b08cf2b9747c633a2372782848a27f
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: dropdown
|
||||||
|
id: operating-system
|
||||||
|
attributes:
|
||||||
|
label: Which operating systems do you know to be affected?
|
||||||
|
multiple: true
|
||||||
|
options:
|
||||||
|
- Linux
|
||||||
|
- Mac
|
||||||
|
- Windows
|
||||||
|
- BSD
|
||||||
|
- Other? (Please let us know in description)
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: dropdown
|
||||||
|
id: backends
|
||||||
|
attributes:
|
||||||
|
label: GGML backends
|
||||||
|
description: Which GGML backends do you know to be affected?
|
||||||
|
options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
|
||||||
|
multiple: true
|
||||||
|
- type: textarea
|
||||||
|
id: steps_to_reproduce
|
||||||
|
attributes:
|
||||||
|
label: Steps to Reproduce
|
||||||
|
description: >
|
||||||
|
Please tell us how to reproduce the bug and any additional information that you think could be useful for fixing it.
|
||||||
|
If you can narrow down the bug to specific compile flags, that information would be very much appreciated by us.
|
||||||
|
placeholder: >
|
||||||
|
Here are the exact commands that I used: ...
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: first_bad_commit
|
||||||
|
attributes:
|
||||||
|
label: First Bad Commit
|
||||||
|
description: >
|
||||||
|
If the bug was not present on an earlier version: when did it start appearing?
|
||||||
|
If possible, please do a git bisect and identify the exact commit that introduced the bug.
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
- type: textarea
|
||||||
|
id: logs
|
||||||
|
attributes:
|
||||||
|
label: Relevant log output
|
||||||
|
description: >
|
||||||
|
Please copy and paste any relevant log output, including the command that you entered and any generated text.
|
||||||
|
This will be automatically formatted into code, so no need for backticks.
|
||||||
|
render: shell
|
||||||
|
validations:
|
||||||
|
required: true
|
98
.github/ISSUE_TEMPLATE/011-bug-results.yml
vendored
Normal file
98
.github/ISSUE_TEMPLATE/011-bug-results.yml
vendored
Normal file
|
@ -0,0 +1,98 @@
|
||||||
|
name: Bug (model use)
|
||||||
|
description: Something goes wrong when using a model (in general, not specific to a single llama.cpp module).
|
||||||
|
title: "Eval bug: "
|
||||||
|
labels: ["bug-unconfirmed", "model evaluation"]
|
||||||
|
body:
|
||||||
|
- type: markdown
|
||||||
|
attributes:
|
||||||
|
value: >
|
||||||
|
Thanks for taking the time to fill out this bug report!
|
||||||
|
This issue template is intended for bug reports where the model evaluation results
|
||||||
|
(i.e. the generated text) are incorrect or llama.cpp crashes during model evaluation.
|
||||||
|
If you encountered the issue while using an external UI (e.g. ollama),
|
||||||
|
please reproduce your issue using one of the examples/binaries in this repository.
|
||||||
|
The `llama-cli` binary can be used for simple and reproducible model inference.
|
||||||
|
- type: textarea
|
||||||
|
id: version
|
||||||
|
attributes:
|
||||||
|
label: Name and Version
|
||||||
|
description: Which version of our software are you running? (use `--version` to get a version string)
|
||||||
|
placeholder: |
|
||||||
|
$./llama-cli --version
|
||||||
|
version: 2999 (42b4109e)
|
||||||
|
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: dropdown
|
||||||
|
id: operating-system
|
||||||
|
attributes:
|
||||||
|
label: Which operating systems do you know to be affected?
|
||||||
|
multiple: true
|
||||||
|
options:
|
||||||
|
- Linux
|
||||||
|
- Mac
|
||||||
|
- Windows
|
||||||
|
- BSD
|
||||||
|
- Other? (Please let us know in description)
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: dropdown
|
||||||
|
id: backends
|
||||||
|
attributes:
|
||||||
|
label: GGML backends
|
||||||
|
description: Which GGML backends do you know to be affected?
|
||||||
|
options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
|
||||||
|
multiple: true
|
||||||
|
- type: textarea
|
||||||
|
id: hardware
|
||||||
|
attributes:
|
||||||
|
label: Hardware
|
||||||
|
description: Which CPUs/GPUs are you using?
|
||||||
|
placeholder: >
|
||||||
|
e.g. Ryzen 5950X + 2x RTX 4090
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: model
|
||||||
|
attributes:
|
||||||
|
label: Model
|
||||||
|
description: >
|
||||||
|
Which model at which quantization were you using when encountering the bug?
|
||||||
|
If you downloaded a GGUF file off of Huggingface, please provide a link.
|
||||||
|
placeholder: >
|
||||||
|
e.g. Meta LLaMA 3.1 Instruct 8b q4_K_M
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
- type: textarea
|
||||||
|
id: steps_to_reproduce
|
||||||
|
attributes:
|
||||||
|
label: Steps to Reproduce
|
||||||
|
description: >
|
||||||
|
Please tell us how to reproduce the bug and any additional information that you think could be useful for fixing it.
|
||||||
|
If you can narrow down the bug to specific hardware, compile flags, or command line arguments,
|
||||||
|
that information would be very much appreciated by us.
|
||||||
|
placeholder: >
|
||||||
|
e.g. when I run llama-cli with -ngl 99 I get garbled outputs.
|
||||||
|
When I use -ngl 0 it works correctly.
|
||||||
|
Here are the exact commands that I used: ...
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: first_bad_commit
|
||||||
|
attributes:
|
||||||
|
label: First Bad Commit
|
||||||
|
description: >
|
||||||
|
If the bug was not present on an earlier version: when did it start appearing?
|
||||||
|
If possible, please do a git bisect and identify the exact commit that introduced the bug.
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
- type: textarea
|
||||||
|
id: logs
|
||||||
|
attributes:
|
||||||
|
label: Relevant log output
|
||||||
|
description: >
|
||||||
|
Please copy and paste any relevant log output, including the command that you entered and any generated text.
|
||||||
|
This will be automatically formatted into code, so no need for backticks.
|
||||||
|
render: shell
|
||||||
|
validations:
|
||||||
|
required: true
|
78
.github/ISSUE_TEMPLATE/019-bug-misc.yml
vendored
Normal file
78
.github/ISSUE_TEMPLATE/019-bug-misc.yml
vendored
Normal file
|
@ -0,0 +1,78 @@
|
||||||
|
name: Bug (misc.)
|
||||||
|
description: Something is not working the way it should (and it's not covered by any of the above cases).
|
||||||
|
title: "Misc. bug: "
|
||||||
|
labels: ["bug-unconfirmed"]
|
||||||
|
body:
|
||||||
|
- type: markdown
|
||||||
|
attributes:
|
||||||
|
value: >
|
||||||
|
Thanks for taking the time to fill out this bug report!
|
||||||
|
This issue template is intended for miscellaneous bugs that don't fit into any other category.
|
||||||
|
If you encountered the issue while using an external UI (e.g. ollama),
|
||||||
|
please reproduce your issue using one of the examples/binaries in this repository.
|
||||||
|
- type: textarea
|
||||||
|
id: version
|
||||||
|
attributes:
|
||||||
|
label: Name and Version
|
||||||
|
description: Which version of our software are you running? (use `--version` to get a version string)
|
||||||
|
placeholder: |
|
||||||
|
$./llama-cli --version
|
||||||
|
version: 2999 (42b4109e)
|
||||||
|
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: dropdown
|
||||||
|
id: operating-system
|
||||||
|
attributes:
|
||||||
|
label: Which operating systems do you know to be affected?
|
||||||
|
multiple: true
|
||||||
|
options:
|
||||||
|
- Linux
|
||||||
|
- Mac
|
||||||
|
- Windows
|
||||||
|
- BSD
|
||||||
|
- Other? (Please let us know in description)
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: dropdown
|
||||||
|
id: module
|
||||||
|
attributes:
|
||||||
|
label: Which llama.cpp modules do you know to be affected?
|
||||||
|
multiple: true
|
||||||
|
options:
|
||||||
|
- libllama (core library)
|
||||||
|
- llama-cli
|
||||||
|
- llama-server
|
||||||
|
- llama-bench
|
||||||
|
- llama-quantize
|
||||||
|
- Python/Bash scripts
|
||||||
|
- Other (Please specify in the next section)
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: steps_to_reproduce
|
||||||
|
attributes:
|
||||||
|
label: Steps to Reproduce
|
||||||
|
description: >
|
||||||
|
Please tell us how to reproduce the bug and any additional information that you think could be useful for fixing it.
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: first_bad_commit
|
||||||
|
attributes:
|
||||||
|
label: First Bad Commit
|
||||||
|
description: >
|
||||||
|
If the bug was not present on an earlier version: when did it start appearing?
|
||||||
|
If possible, please do a git bisect and identify the exact commit that introduced the bug.
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
- type: textarea
|
||||||
|
id: logs
|
||||||
|
attributes:
|
||||||
|
label: Relevant log output
|
||||||
|
description: >
|
||||||
|
Please copy and paste any relevant log output, including the command that you entered and any generated text.
|
||||||
|
This will be automatically formatted into code, so no need for backticks.
|
||||||
|
render: shell
|
||||||
|
validations:
|
||||||
|
required: true
|
50
.github/ISSUE_TEMPLATE/02-bug-medium.yml
vendored
50
.github/ISSUE_TEMPLATE/02-bug-medium.yml
vendored
|
@ -1,50 +0,0 @@
|
||||||
name: Medium Severity Bug
|
|
||||||
description: Used to report medium severity bugs in llama.cpp (e.g. Malfunctioning Features but generally still useable)
|
|
||||||
title: "Bug: "
|
|
||||||
labels: ["bug-unconfirmed", "medium severity"]
|
|
||||||
body:
|
|
||||||
- type: markdown
|
|
||||||
attributes:
|
|
||||||
value: |
|
|
||||||
Thanks for taking the time to fill out this bug report!
|
|
||||||
Please include information about your system, the steps to reproduce the bug,
|
|
||||||
and the version of llama.cpp that you are using.
|
|
||||||
If possible, please provide a minimal code example that reproduces the bug.
|
|
||||||
- type: textarea
|
|
||||||
id: what-happened
|
|
||||||
attributes:
|
|
||||||
label: What happened?
|
|
||||||
description: Also tell us, what did you expect to happen?
|
|
||||||
placeholder: Tell us what you see!
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: textarea
|
|
||||||
id: version
|
|
||||||
attributes:
|
|
||||||
label: Name and Version
|
|
||||||
description: Which executable and which version of our software are you running? (use `--version` to get a version string)
|
|
||||||
placeholder: |
|
|
||||||
$./llama-cli --version
|
|
||||||
version: 2999 (42b4109e)
|
|
||||||
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: dropdown
|
|
||||||
id: operating-system
|
|
||||||
attributes:
|
|
||||||
label: What operating system are you seeing the problem on?
|
|
||||||
multiple: true
|
|
||||||
options:
|
|
||||||
- Linux
|
|
||||||
- Mac
|
|
||||||
- Windows
|
|
||||||
- BSD
|
|
||||||
- Other? (Please let us know in description)
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: textarea
|
|
||||||
id: logs
|
|
||||||
attributes:
|
|
||||||
label: Relevant log output
|
|
||||||
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
|
|
||||||
render: shell
|
|
|
@ -1,5 +1,5 @@
|
||||||
name: Enhancement
|
name: Enhancement
|
||||||
description: Used to request enhancements for llama.cpp
|
description: Used to request enhancements for llama.cpp.
|
||||||
title: "Feature Request: "
|
title: "Feature Request: "
|
||||||
labels: ["enhancement"]
|
labels: ["enhancement"]
|
||||||
body:
|
body:
|
50
.github/ISSUE_TEMPLATE/03-bug-high.yml
vendored
50
.github/ISSUE_TEMPLATE/03-bug-high.yml
vendored
|
@ -1,50 +0,0 @@
|
||||||
name: High Severity Bug
|
|
||||||
description: Used to report high severity bugs in llama.cpp (e.g. Malfunctioning features hindering important common workflow)
|
|
||||||
title: "Bug: "
|
|
||||||
labels: ["bug-unconfirmed", "high severity"]
|
|
||||||
body:
|
|
||||||
- type: markdown
|
|
||||||
attributes:
|
|
||||||
value: |
|
|
||||||
Thanks for taking the time to fill out this bug report!
|
|
||||||
Please include information about your system, the steps to reproduce the bug,
|
|
||||||
and the version of llama.cpp that you are using.
|
|
||||||
If possible, please provide a minimal code example that reproduces the bug.
|
|
||||||
- type: textarea
|
|
||||||
id: what-happened
|
|
||||||
attributes:
|
|
||||||
label: What happened?
|
|
||||||
description: Also tell us, what did you expect to happen?
|
|
||||||
placeholder: Tell us what you see!
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: textarea
|
|
||||||
id: version
|
|
||||||
attributes:
|
|
||||||
label: Name and Version
|
|
||||||
description: Which executable and which version of our software are you running? (use `--version` to get a version string)
|
|
||||||
placeholder: |
|
|
||||||
$./llama-cli --version
|
|
||||||
version: 2999 (42b4109e)
|
|
||||||
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: dropdown
|
|
||||||
id: operating-system
|
|
||||||
attributes:
|
|
||||||
label: What operating system are you seeing the problem on?
|
|
||||||
multiple: true
|
|
||||||
options:
|
|
||||||
- Linux
|
|
||||||
- Mac
|
|
||||||
- Windows
|
|
||||||
- BSD
|
|
||||||
- Other? (Please let us know in description)
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: textarea
|
|
||||||
id: logs
|
|
||||||
attributes:
|
|
||||||
label: Relevant log output
|
|
||||||
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
|
|
||||||
render: shell
|
|
|
@ -1,5 +1,5 @@
|
||||||
name: Research
|
name: Research
|
||||||
description: Track new technical research area
|
description: Track new technical research area.
|
||||||
title: "Research: "
|
title: "Research: "
|
||||||
labels: ["research 🔬"]
|
labels: ["research 🔬"]
|
||||||
body:
|
body:
|
50
.github/ISSUE_TEMPLATE/04-bug-critical.yml
vendored
50
.github/ISSUE_TEMPLATE/04-bug-critical.yml
vendored
|
@ -1,50 +0,0 @@
|
||||||
name: Critical Severity Bug
|
|
||||||
description: Used to report critical severity bugs in llama.cpp (e.g. Crashing, Corrupted, Dataloss)
|
|
||||||
title: "Bug: "
|
|
||||||
labels: ["bug-unconfirmed", "critical severity"]
|
|
||||||
body:
|
|
||||||
- type: markdown
|
|
||||||
attributes:
|
|
||||||
value: |
|
|
||||||
Thanks for taking the time to fill out this bug report!
|
|
||||||
Please include information about your system, the steps to reproduce the bug,
|
|
||||||
and the version of llama.cpp that you are using.
|
|
||||||
If possible, please provide a minimal code example that reproduces the bug.
|
|
||||||
- type: textarea
|
|
||||||
id: what-happened
|
|
||||||
attributes:
|
|
||||||
label: What happened?
|
|
||||||
description: Also tell us, what did you expect to happen?
|
|
||||||
placeholder: Tell us what you see!
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: textarea
|
|
||||||
id: version
|
|
||||||
attributes:
|
|
||||||
label: Name and Version
|
|
||||||
description: Which executable and which version of our software are you running? (use `--version` to get a version string)
|
|
||||||
placeholder: |
|
|
||||||
$./llama-cli --version
|
|
||||||
version: 2999 (42b4109e)
|
|
||||||
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: dropdown
|
|
||||||
id: operating-system
|
|
||||||
attributes:
|
|
||||||
label: What operating system are you seeing the problem on?
|
|
||||||
multiple: true
|
|
||||||
options:
|
|
||||||
- Linux
|
|
||||||
- Mac
|
|
||||||
- Windows
|
|
||||||
- BSD
|
|
||||||
- Other? (Please let us know in description)
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: textarea
|
|
||||||
id: logs
|
|
||||||
attributes:
|
|
||||||
label: Relevant log output
|
|
||||||
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
|
|
||||||
render: shell
|
|
|
@ -1,5 +1,5 @@
|
||||||
name: Refactor (Maintainers)
|
name: Refactor (Maintainers)
|
||||||
description: Used to track refactoring opportunities
|
description: Used to track refactoring opportunities.
|
||||||
title: "Refactor: "
|
title: "Refactor: "
|
||||||
labels: ["refactor"]
|
labels: ["refactor"]
|
||||||
body:
|
body:
|
9
.github/workflows/build.yml
vendored
9
.github/workflows/build.yml
vendored
|
@ -986,13 +986,14 @@ jobs:
|
||||||
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
||||||
run: |
|
run: |
|
||||||
echo "cp oneAPI running time dll files in ${{ env.ONEAPI_ROOT }} to ./build/bin"
|
echo "cp oneAPI running time dll files in ${{ env.ONEAPI_ROOT }} to ./build/bin"
|
||||||
cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.4.dll" ./build/bin
|
cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.5.dll" ./build/bin
|
||||||
cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_core.2.dll" ./build/bin
|
cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_core.2.dll" ./build/bin
|
||||||
cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin
|
cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin
|
||||||
|
|
||||||
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/pi_win_proxy_loader.dll" ./build/bin
|
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_loader.dll" ./build/bin
|
||||||
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/pi_level_zero.dll" ./build/bin
|
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_win_proxy_loader.dll" ./build/bin
|
||||||
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl7.dll" ./build/bin
|
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero.dll" ./build/bin
|
||||||
|
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl8.dll" ./build/bin
|
||||||
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin
|
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin
|
||||||
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin
|
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin
|
||||||
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin
|
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin
|
||||||
|
|
|
@ -3,6 +3,33 @@ if ("cann${CANN_INSTALL_DIR}" STREQUAL "cann" AND DEFINED ENV{ASCEND_TOOLKIT_HOM
|
||||||
message(STATUS "CANN: updated CANN_INSTALL_DIR from ASCEND_TOOLKIT_HOME=$ENV{ASCEND_TOOLKIT_HOME}")
|
message(STATUS "CANN: updated CANN_INSTALL_DIR from ASCEND_TOOLKIT_HOME=$ENV{ASCEND_TOOLKIT_HOME}")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# Auto-detech Soc type and Soc version, if detect failed, will abort build
|
||||||
|
set(SOC_VERSION "")
|
||||||
|
function(detect_ascend_soc_type SOC_VERSION)
|
||||||
|
execute_process(
|
||||||
|
COMMAND bash -c "npu-smi info|awk -F' ' 'NF > 0 && NR==7 {print $3}'"
|
||||||
|
OUTPUT_VARIABLE npu_info
|
||||||
|
RESULT_VARIABLE npu_result
|
||||||
|
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||||
|
)
|
||||||
|
if("${npu_info}" STREQUAL "" OR ${npu_result})
|
||||||
|
message(FATAL_ERROR "Auto-detech ascend soc type failed, please specify manually or check ascend device working normally.")
|
||||||
|
endif()
|
||||||
|
set(${SOC_VERSION} "Ascend${npu_info}" PARENT_SCOPE)
|
||||||
|
endfunction()
|
||||||
|
|
||||||
|
if(NOT SOC_TYPE)
|
||||||
|
detect_ascend_soc_type(SOC_VERSION)
|
||||||
|
set(SOC_TYPE "${SOC_VERSION}")
|
||||||
|
message(STATUS "CANN: SOC_VERSION auto-detected is:${SOC_VERSION}")
|
||||||
|
else()
|
||||||
|
string(TOLOWER ${SOC_TYPE} SOC_VERSION)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Construct Soc specify compile option: ASCEND_#Soc_Major_SN. Such as ASCEND_910B, ASCEND310P.
|
||||||
|
string(REGEX MATCH "[0-9]+[a-zA-Z]" SOC_TYPE_MAJOR_SN "${SOC_VERSION}")
|
||||||
|
set(SOC_TYPE_COMPILE_OPTION "ASCEND_${SOC_TYPE_MAJOR_SN}")
|
||||||
|
|
||||||
if (CANN_INSTALL_DIR)
|
if (CANN_INSTALL_DIR)
|
||||||
# Only Support Linux.
|
# Only Support Linux.
|
||||||
if (NOT UNIX)
|
if (NOT UNIX)
|
||||||
|
@ -39,6 +66,8 @@ if (CANN_INSTALL_DIR)
|
||||||
target_include_directories(ggml-cann PRIVATE . .. ${CANN_INCLUDE_DIRS})
|
target_include_directories(ggml-cann PRIVATE . .. ${CANN_INCLUDE_DIRS})
|
||||||
target_link_directories(ggml-cann PRIVATE ${CANN_INSTALL_DIR}/lib64)
|
target_link_directories(ggml-cann PRIVATE ${CANN_INSTALL_DIR}/lib64)
|
||||||
|
|
||||||
|
target_compile_definitions(ggml-cann PRIVATE "-D${SOC_TYPE_COMPILE_OPTION}")
|
||||||
|
|
||||||
message(STATUS "CANN: CANN_INCLUDE_DIRS = ${CANN_INCLUDE_DIRS}")
|
message(STATUS "CANN: CANN_INCLUDE_DIRS = ${CANN_INCLUDE_DIRS}")
|
||||||
message(STATUS "CANN: CANN_LIBRARIES = ${CANN_LIBRARIES}")
|
message(STATUS "CANN: CANN_LIBRARIES = ${CANN_LIBRARIES}")
|
||||||
else()
|
else()
|
||||||
|
|
|
@ -2312,6 +2312,14 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
|
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
|
{
|
||||||
|
#ifdef ASCEND_310P
|
||||||
|
// Special operation for get_row_f32 kernel of 310P: clear the content of dest data buffer when row is not aligned to 32 bytes
|
||||||
|
if ((src0->ne[0] % 8) != 0) {
|
||||||
|
size_t dst_len = src1->ne[0] * src1->ne[1] * src1->ne[2] * src0->ne[0] * ggml_type_size(GGML_TYPE_F32);
|
||||||
|
ACL_CHECK(aclrtMemset((char*)dst->data, dst_len, 0, dst_len));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
aclrtlaunch_ascendc_get_row_f32(
|
aclrtlaunch_ascendc_get_row_f32(
|
||||||
24, ctx.stream(), src0->data, src1->data, dst->data,
|
24, ctx.stream(), src0->data, src1->data, dst->data,
|
||||||
((ggml_tensor*)src0->extra)->ne,
|
((ggml_tensor*)src0->extra)->ne,
|
||||||
|
@ -2320,7 +2328,16 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
((ggml_tensor*)src1->extra)->nb, ((ggml_tensor*)dst->extra)->ne,
|
((ggml_tensor*)src1->extra)->nb, ((ggml_tensor*)dst->extra)->ne,
|
||||||
((ggml_tensor*)dst->extra)->nb);
|
((ggml_tensor*)dst->extra)->nb);
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
case GGML_TYPE_F16:
|
case GGML_TYPE_F16:
|
||||||
|
{
|
||||||
|
#ifdef ASCEND_310P
|
||||||
|
// Special operation for get_row_f16 kernel of 310P: clear the content of dest data buffer when row is not aligned to 32 bytes
|
||||||
|
if ((src0->ne[0] % 16) != 0) {
|
||||||
|
size_t dst_len = src1->ne[0] * src1->ne[1] * src1->ne[2] * src0->ne[0] * ggml_type_size(GGML_TYPE_F32); // out is also f32, even input is f16
|
||||||
|
ACL_CHECK(aclrtMemset((char*)dst->data, dst_len, 0, dst_len));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
aclrtlaunch_ascendc_get_row_f16(
|
aclrtlaunch_ascendc_get_row_f16(
|
||||||
24, ctx.stream(), src0->data, src1->data, dst->data,
|
24, ctx.stream(), src0->data, src1->data, dst->data,
|
||||||
((ggml_tensor*)src0->extra)->ne,
|
((ggml_tensor*)src0->extra)->ne,
|
||||||
|
@ -2329,6 +2346,7 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
||||||
((ggml_tensor*)src1->extra)->nb, ((ggml_tensor*)dst->extra)->ne,
|
((ggml_tensor*)src1->extra)->nb, ((ggml_tensor*)dst->extra)->ne,
|
||||||
((ggml_tensor*)dst->extra)->nb);
|
((ggml_tensor*)dst->extra)->nb);
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
case GGML_TYPE_Q4_0:
|
case GGML_TYPE_Q4_0:
|
||||||
aclrtlaunch_ascendc_get_row_q4_0(
|
aclrtlaunch_ascendc_get_row_q4_0(
|
||||||
24, ctx.stream(), src0->data, src1->data, dst->data,
|
24, ctx.stream(), src0->data, src1->data, dst->data,
|
||||||
|
|
|
@ -1,7 +1,3 @@
|
||||||
if (NOT SOC_TYPE)
|
|
||||||
set (SOC_TYPE "Ascend910B3")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
file(GLOB SRC_FILES
|
file(GLOB SRC_FILES
|
||||||
get_row_f32.cpp
|
get_row_f32.cpp
|
||||||
get_row_f16.cpp
|
get_row_f16.cpp
|
||||||
|
@ -13,7 +9,6 @@ file(GLOB SRC_FILES
|
||||||
dup.cpp
|
dup.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
string(TOLOWER ${SOC_TYPE} SOC_VERSION)
|
|
||||||
set(ASCEND_CANN_PACKAGE_PATH ${CANN_INSTALL_DIR})
|
set(ASCEND_CANN_PACKAGE_PATH ${CANN_INSTALL_DIR})
|
||||||
set(RUN_MODE "npu" CACHE STRING "run mode: npu/sim")
|
set(RUN_MODE "npu" CACHE STRING "run mode: npu/sim")
|
||||||
|
|
||||||
|
@ -30,4 +25,6 @@ ascendc_library(ascendc_kernels STATIC
|
||||||
${SRC_FILES}
|
${SRC_FILES}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
message(STATUS "CANN: compile ascend kernels witch SOC_VERSION:${SOC_VERSION}.")
|
||||||
|
ascendc_compile_definitions(ascendc_kernels PRIVATE "-D${SOC_TYPE_COMPILE_OPTION}")
|
||||||
# ascendc_compile_definitions(ascendc_kernels PRIVATE -DASCENDC_DUMP)
|
# ascendc_compile_definitions(ascendc_kernels PRIVATE -DASCENDC_DUMP)
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
using namespace AscendC;
|
using namespace AscendC;
|
||||||
|
|
||||||
#define BUFFER_NUM 2
|
#define BUFFER_NUM 2
|
||||||
|
const int64_t SUPPORTED_MAX_DIM = 65535; // currently the limit of max block dim supportted by dup kernel is 65535template <typename SRC_T, typename DST_T>
|
||||||
|
|
||||||
template <typename SRC_T, typename DST_T>
|
template <typename SRC_T, typename DST_T>
|
||||||
class DupByRows {
|
class DupByRows {
|
||||||
|
@ -19,6 +20,7 @@ class DupByRows {
|
||||||
// Input has four dims.
|
// Input has four dims.
|
||||||
int64_t op_block_num = GetBlockNum();
|
int64_t op_block_num = GetBlockNum();
|
||||||
int64_t op_block_idx = GetBlockIdx();
|
int64_t op_block_idx = GetBlockIdx();
|
||||||
|
assert(op_block_idx < SUPPORTED_MAX_DIM && op_block_idx >= 0, "Invalid block index:%d, max is:%d\n", op_block_idx, SUPPORTED_MAX_DIM);
|
||||||
|
|
||||||
// param
|
// param
|
||||||
num_rows = input_ne_ub[1] * input_ne_ub[2] * input_ne_ub[3];
|
num_rows = input_ne_ub[1] * input_ne_ub[2] * input_ne_ub[3];
|
||||||
|
@ -51,24 +53,36 @@ class DupByRows {
|
||||||
|
|
||||||
__aicore__ inline void copy_in() {
|
__aicore__ inline void copy_in() {
|
||||||
LocalTensor<SRC_T> src_local = src_queue.AllocTensor<SRC_T>();
|
LocalTensor<SRC_T> src_local = src_queue.AllocTensor<SRC_T>();
|
||||||
|
const size_t elem_per_block = 32 / sizeof(SRC_T);
|
||||||
DataCopyExtParams dataCopyParams;
|
size_t tail = num_elem % elem_per_block;
|
||||||
dataCopyParams.blockCount = 1;
|
size_t cpy_elements_len = tail > 0 ? num_elem + 1 : num_elem;
|
||||||
dataCopyParams.blockLen = num_elem * sizeof(SRC_T);
|
DataCopy(src_local, src_gm, cpy_elements_len);
|
||||||
DataCopyPadExtParams<SRC_T> padParams;
|
|
||||||
DataCopyPad(src_local, src_gm, dataCopyParams, padParams);
|
|
||||||
|
|
||||||
src_queue.EnQue(src_local);
|
src_queue.EnQue(src_local);
|
||||||
}
|
}
|
||||||
|
|
||||||
__aicore__ inline void copy_out() {
|
__aicore__ inline void copy_out() {
|
||||||
LocalTensor<DST_T> dst_local = dst_queue.DeQue<DST_T>();
|
LocalTensor<DST_T> dst_local = dst_queue.DeQue<DST_T>();
|
||||||
|
#ifdef ASCEND_310P
|
||||||
|
const size_t elem_per_block = 32 / sizeof(DST_T);
|
||||||
|
size_t tail = num_elem % elem_per_block;
|
||||||
|
size_t len = num_elem & ~(elem_per_block - 1);
|
||||||
|
if (len > 0) {
|
||||||
|
DataCopy(dst_gm, dst_local, len);
|
||||||
|
}
|
||||||
|
if(tail != 0) {
|
||||||
|
for (size_t i = tail; i < elem_per_block; i++) {
|
||||||
|
dst_local[len + i].SetValue(0, 0);
|
||||||
|
}
|
||||||
|
SetAtomicAdd<float>();
|
||||||
|
DataCopy(dst_gm[len], dst_local[len], elem_per_block);
|
||||||
|
SetAtomicNone();
|
||||||
|
}
|
||||||
|
#else
|
||||||
DataCopyExtParams dataCopyParams;
|
DataCopyExtParams dataCopyParams;
|
||||||
dataCopyParams.blockCount = 1;
|
dataCopyParams.blockCount = 1;
|
||||||
dataCopyParams.blockLen = num_elem * sizeof(DST_T);
|
dataCopyParams.blockLen = num_elem * sizeof(DST_T);
|
||||||
DataCopyPad(dst_gm, dst_local, dataCopyParams);
|
DataCopyPad(dst_gm, dst_local, dataCopyParams);
|
||||||
|
#endif
|
||||||
dst_queue.FreeTensor(dst_local);
|
dst_queue.FreeTensor(dst_local);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -14,7 +14,7 @@ class GET_ROW_F16 {
|
||||||
int64_t *output_ne_ub, size_t *output_nb_ub) {
|
int64_t *output_ne_ub, size_t *output_nb_ub) {
|
||||||
// TODO, use template for F16/f32
|
// TODO, use template for F16/f32
|
||||||
int64_t op_block_num = GetBlockNum();
|
int64_t op_block_num = GetBlockNum();
|
||||||
int64_t op_block_idx = GetBlockIdx();
|
op_block_idx = GetBlockIdx();
|
||||||
|
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < 4; i++) {
|
||||||
input_ne[i] = input_ne_ub[i];
|
input_ne[i] = input_ne_ub[i];
|
||||||
|
@ -59,32 +59,42 @@ class GET_ROW_F16 {
|
||||||
}
|
}
|
||||||
|
|
||||||
__aicore__ inline void copy_in(uint32_t offset, size_t len) {
|
__aicore__ inline void copy_in(uint32_t offset, size_t len) {
|
||||||
|
size_t origin_len = len;
|
||||||
LocalTensor<half> input_local = input_queue.AllocTensor<half>();
|
LocalTensor<half> input_local = input_queue.AllocTensor<half>();
|
||||||
size_t tail = len % 32;
|
const size_t elem_per_block = 32 / sizeof(half);
|
||||||
len = len & ~31;
|
size_t tail = len % elem_per_block;
|
||||||
DataCopy(input_local, input_gm[offset], len);
|
len = len & ~(elem_per_block - 1);
|
||||||
if(tail != 0) {
|
if(tail != 0) {
|
||||||
DataCopyExtParams dataCopyParams;
|
len += elem_per_block;
|
||||||
dataCopyParams.blockCount = 1;
|
|
||||||
dataCopyParams.blockLen = tail * sizeof(half);
|
|
||||||
DataCopyPadExtParams<half> padParams;
|
|
||||||
DataCopyPad(input_local[len], input_gm[offset + len],
|
|
||||||
dataCopyParams, padParams);
|
|
||||||
}
|
}
|
||||||
|
DataCopy(input_local, input_gm[offset], len);
|
||||||
input_queue.EnQue(input_local);
|
input_queue.EnQue(input_local);
|
||||||
}
|
}
|
||||||
|
|
||||||
__aicore__ inline void copy_out(uint32_t offset, size_t len) {
|
__aicore__ inline void copy_out(uint32_t offset, size_t len) {
|
||||||
LocalTensor<float> output_local = output_queue.DeQue<float>();
|
LocalTensor<float> output_local = output_queue.DeQue<float>();
|
||||||
size_t tail = len % 32;
|
const size_t elem_per_block = 32 / sizeof(float);
|
||||||
len = len & ~31;
|
size_t tail = len % elem_per_block;
|
||||||
|
len = len & ~(elem_per_block - 1);
|
||||||
|
if (len > 0) {
|
||||||
DataCopy(output_gm[offset], output_local, len);
|
DataCopy(output_gm[offset], output_local, len);
|
||||||
|
}
|
||||||
|
|
||||||
if(tail != 0) {
|
if(tail != 0) {
|
||||||
|
#ifdef ASCEND_310P
|
||||||
|
for (size_t i = tail; i < elem_per_block; i++) {
|
||||||
|
output_local[len + i].SetValue(0, 0);
|
||||||
|
}
|
||||||
|
SetAtomicAdd<float>();
|
||||||
|
DataCopy(output_gm[offset + len], output_local[len], elem_per_block);
|
||||||
|
SetAtomicNone();
|
||||||
|
#else
|
||||||
DataCopyExtParams dataCopyParams;
|
DataCopyExtParams dataCopyParams;
|
||||||
dataCopyParams.blockCount = 1;
|
dataCopyParams.blockCount = 1;
|
||||||
dataCopyParams.blockLen = tail * sizeof(float);
|
dataCopyParams.blockLen = tail * sizeof(float);
|
||||||
DataCopyPad(output_gm[offset + len], output_local[len],
|
DataCopyPad(output_gm[offset + len], output_local[len],
|
||||||
dataCopyParams);
|
dataCopyParams);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
output_queue.FreeTensor(output_local);
|
output_queue.FreeTensor(output_local);
|
||||||
}
|
}
|
||||||
|
@ -150,6 +160,7 @@ class GET_ROW_F16 {
|
||||||
GlobalTensor<float> output_gm;
|
GlobalTensor<float> output_gm;
|
||||||
TQue<QuePosition::VECIN, BUFFER_NUM> input_queue;
|
TQue<QuePosition::VECIN, BUFFER_NUM> input_queue;
|
||||||
TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue;
|
TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue;
|
||||||
|
int64_t op_block_idx;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
|
|
@ -13,7 +13,7 @@ class GET_ROW_F32 {
|
||||||
int64_t *indices_ne_ub, size_t *indices_nb_ub,
|
int64_t *indices_ne_ub, size_t *indices_nb_ub,
|
||||||
int64_t *output_ne_ub, size_t *output_nb_ub) {
|
int64_t *output_ne_ub, size_t *output_nb_ub) {
|
||||||
int64_t op_block_num = GetBlockNum();
|
int64_t op_block_num = GetBlockNum();
|
||||||
int64_t op_block_idx = GetBlockIdx();
|
op_block_idx = GetBlockIdx();
|
||||||
|
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < 4; i++) {
|
||||||
input_ne[i] = input_ne_ub[i];
|
input_ne[i] = input_ne_ub[i];
|
||||||
|
@ -55,31 +55,40 @@ class GET_ROW_F32 {
|
||||||
|
|
||||||
__aicore__ inline void copy_in(uint32_t offset, size_t len) {
|
__aicore__ inline void copy_in(uint32_t offset, size_t len) {
|
||||||
LocalTensor<float> input_local = input_queue.AllocTensor<float>();
|
LocalTensor<float> input_local = input_queue.AllocTensor<float>();
|
||||||
size_t tail = len % 32;
|
const size_t elem_per_block = 32 / sizeof(float);
|
||||||
len = len & ~31;
|
size_t tail = len % elem_per_block;
|
||||||
DataCopy(input_local, input_gm[offset], len);
|
len = len & ~(elem_per_block - 1);
|
||||||
if(tail != 0) {
|
if(tail != 0) {
|
||||||
DataCopyExtParams dataCopyParams;
|
len += elem_per_block;
|
||||||
dataCopyParams.blockCount = 1;
|
|
||||||
dataCopyParams.blockLen = tail * sizeof(float);
|
|
||||||
DataCopyPadExtParams<float> padParams;
|
|
||||||
DataCopyPad(input_local[len], input_gm[offset + len],
|
|
||||||
dataCopyParams, padParams);
|
|
||||||
}
|
}
|
||||||
|
DataCopy(input_local, input_gm[offset], len);
|
||||||
input_queue.EnQue(input_local);
|
input_queue.EnQue(input_local);
|
||||||
}
|
}
|
||||||
|
|
||||||
__aicore__ inline void copy_out(uint32_t offset, size_t len) {
|
__aicore__ inline void copy_out(uint32_t offset, size_t len) {
|
||||||
LocalTensor<float> output_local = output_queue.DeQue<float>();
|
LocalTensor<float> output_local = output_queue.DeQue<float>();
|
||||||
size_t tail = len % 32;
|
const size_t elem_per_block = 32 / sizeof(float);
|
||||||
len = len & ~31;
|
size_t tail = len % elem_per_block;
|
||||||
|
len = len & ~(elem_per_block - 1);
|
||||||
|
if (len > 0) {
|
||||||
DataCopy(output_gm[offset], output_local, len);
|
DataCopy(output_gm[offset], output_local, len);
|
||||||
|
}
|
||||||
|
|
||||||
if(tail != 0) {
|
if(tail != 0) {
|
||||||
|
#ifdef ASCEND_310P
|
||||||
|
for (size_t i = tail; i < elem_per_block; i++) {
|
||||||
|
output_local[len + i].SetValue(0, 0);
|
||||||
|
}
|
||||||
|
SetAtomicAdd<float>();
|
||||||
|
DataCopy(output_gm[offset + len], output_local[len], elem_per_block);
|
||||||
|
SetAtomicNone();
|
||||||
|
#else
|
||||||
DataCopyExtParams dataCopyParams;
|
DataCopyExtParams dataCopyParams;
|
||||||
dataCopyParams.blockCount = 1;
|
dataCopyParams.blockCount = 1;
|
||||||
dataCopyParams.blockLen = tail * sizeof(float);
|
dataCopyParams.blockLen = tail * sizeof(float);
|
||||||
DataCopyPad(output_gm[offset + len], output_local[len],
|
DataCopyPad(output_gm[offset + len], output_local[len],
|
||||||
dataCopyParams);
|
dataCopyParams);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
output_queue.FreeTensor(output_local);
|
output_queue.FreeTensor(output_local);
|
||||||
}
|
}
|
||||||
|
@ -144,6 +153,7 @@ class GET_ROW_F32 {
|
||||||
GlobalTensor<float> output_gm;
|
GlobalTensor<float> output_gm;
|
||||||
TQue<QuePosition::VECIN, BUFFER_NUM> input_queue;
|
TQue<QuePosition::VECIN, BUFFER_NUM> input_queue;
|
||||||
TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue;
|
TQue<QuePosition::VECOUT, BUFFER_NUM> output_queue;
|
||||||
|
int64_t op_block_idx;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
|
|
@ -110,9 +110,12 @@ class GET_ROW_Q4_0 {
|
||||||
LocalTensor<float> output_local = output_queue.AllocTensor<float>();
|
LocalTensor<float> output_local = output_queue.AllocTensor<float>();
|
||||||
|
|
||||||
// TODO: cast more data to speed up.
|
// TODO: cast more data to speed up.
|
||||||
|
#ifdef ASCEND_310P
|
||||||
|
// TODO: 310P support quantification
|
||||||
|
#else
|
||||||
Cast(cast_local, input_local, RoundMode::CAST_NONE, QK4_0);
|
Cast(cast_local, input_local, RoundMode::CAST_NONE, QK4_0);
|
||||||
Cast(output_local, cast_local, RoundMode::CAST_NONE, QK4_0);
|
Cast(output_local, cast_local, RoundMode::CAST_NONE, QK4_0);
|
||||||
|
#endif
|
||||||
// Only mul need compile by group.
|
// Only mul need compile by group.
|
||||||
half scale = scale_gm.GetValue(scale_offset);
|
half scale = scale_gm.GetValue(scale_offset);
|
||||||
|
|
||||||
|
|
|
@ -13896,7 +13896,7 @@ int ggml_cpu_has_vsx(void) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int ggml_cpu_has_neon(void) {
|
int ggml_cpu_has_neon(void) {
|
||||||
#if defined(__ARM_ARCH)
|
#if defined(__ARM_ARCH) && defined(__ARM_NEON)
|
||||||
return ggml_arm_arch_features.has_neon;
|
return ggml_arm_arch_features.has_neon;
|
||||||
#else
|
#else
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -13904,7 +13904,7 @@ int ggml_cpu_has_neon(void) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int ggml_cpu_has_sve(void) {
|
int ggml_cpu_has_sve(void) {
|
||||||
#if defined(__ARM_ARCH)
|
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_SVE)
|
||||||
return ggml_arm_arch_features.has_sve;
|
return ggml_arm_arch_features.has_sve;
|
||||||
#else
|
#else
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -13912,7 +13912,7 @@ int ggml_cpu_has_sve(void) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int ggml_cpu_has_matmul_int8(void) {
|
int ggml_cpu_has_matmul_int8(void) {
|
||||||
#if defined(__ARM_ARCH)
|
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_MATMUL_INT8)
|
||||||
return ggml_arm_arch_features.has_i8mm;
|
return ggml_arm_arch_features.has_i8mm;
|
||||||
#else
|
#else
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -13920,7 +13920,7 @@ int ggml_cpu_has_matmul_int8(void) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int ggml_cpu_get_sve_cnt(void) {
|
int ggml_cpu_get_sve_cnt(void) {
|
||||||
#if defined(__ARM_ARCH)
|
#if defined(__ARM_ARCH) && defined(__ARM_FEATURE_SVE)
|
||||||
return ggml_arm_arch_features.sve_cnt;
|
return ggml_arm_arch_features.sve_cnt;
|
||||||
#else
|
#else
|
||||||
return 0;
|
return 0;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue