Support multiple GPUs (split mode) on SYCL backend (#5806)
* suport multiple cards: split-mode - layer|row * rm warning * rebase with master, support tow new OPs, close feature for -sm=row, fix for unit test * update news * fix merge error * update according to review comments
This commit is contained in:
parent
9bf297a02b
commit
715641391d
8 changed files with 1506 additions and 814 deletions
|
@ -7,7 +7,7 @@
|
|||
|
||||
#include "ggml-sycl.h"
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
int main() {
|
||||
ggml_backend_sycl_print_sycl_devices();
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -8,12 +8,19 @@ INPUT2="Building a website can be done in 10 simple steps:\nStep 1:"
|
|||
source /opt/intel/oneapi/setvars.sh
|
||||
|
||||
if [ $# -gt 0 ]; then
|
||||
export GGML_SYCL_DEVICE=$1
|
||||
GGML_SYCL_DEVICE=$1
|
||||
else
|
||||
export GGML_SYCL_DEVICE=0
|
||||
GGML_SYCL_DEVICE=0
|
||||
fi
|
||||
echo GGML_SYCL_DEVICE=$GGML_SYCL_DEVICE
|
||||
echo "use $GGML_SYCL_DEVICE as main GPU"
|
||||
#export GGML_SYCL_DEBUG=1
|
||||
./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0
|
||||
#./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 5 -e -ngl 33 -t 1 -s 0
|
||||
|
||||
|
||||
#ZES_ENABLE_SYSMAN=1, Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory. Recommended to use when --split-mode = layer.
|
||||
|
||||
#use all GPUs with same max compute units
|
||||
ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0
|
||||
|
||||
#use main GPU only
|
||||
#ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 -mg $GGML_SYCL_DEVICE -sm none
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue