parent
0cd6bd3483
commit
554c247caf
21 changed files with 29 additions and 2639 deletions
|
@ -5,7 +5,6 @@ set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@)
|
|||
set(LLAMA_BLAS @LLAMA_BLAS@)
|
||||
set(LLAMA_CUDA @LLAMA_CUDA@)
|
||||
set(LLAMA_METAL @LLAMA_METAL@)
|
||||
set(LLAMA_CLBLAST @LLAMA_CLBLAST@)
|
||||
set(LLAMA_HIPBLAS @LLAMA_HIPBLAS@)
|
||||
set(LLAMA_ACCELERATE @LLAMA_ACCELERATE@)
|
||||
|
||||
|
@ -36,10 +35,6 @@ if (LLAMA_METAL)
|
|||
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
|
||||
endif()
|
||||
|
||||
if (LLAMA_CLBLAST)
|
||||
find_package(CLBlast REQUIRED)
|
||||
endif()
|
||||
|
||||
if (LLAMA_HIPBLAS)
|
||||
find_package(hip REQUIRED)
|
||||
find_package(hipblas REQUIRED)
|
||||
|
|
|
@ -19,17 +19,17 @@ logger = logging.getLogger("compare-llama-bench")
|
|||
|
||||
# Properties by which to differentiate results per commit:
|
||||
KEY_PROPERTIES = [
|
||||
"cpu_info", "gpu_info", "n_gpu_layers", "cuda", "opencl", "vulkan", "kompute", "metal", "sycl", "rpc", "gpu_blas",
|
||||
"cpu_info", "gpu_info", "n_gpu_layers", "cuda", "vulkan", "kompute", "metal", "sycl", "rpc", "gpu_blas",
|
||||
"blas", "model_filename", "model_type", "model_size", "model_n_params", "n_batch", "n_ubatch", "embeddings", "n_threads",
|
||||
"type_k", "type_v", "use_mmap", "no_kv_offload", "split_mode", "main_gpu", "tensor_split", "flash_attn", "n_prompt", "n_gen"
|
||||
]
|
||||
|
||||
# Properties that are boolean and are converted to Yes/No for the table:
|
||||
BOOL_PROPERTIES = ["cuda", "opencl", "vulkan", "kompute", "metal", "sycl", "gpu_blas", "blas", "embeddings", "use_mmap", "no_kv_offload", "flash_attn"]
|
||||
BOOL_PROPERTIES = ["cuda", "vulkan", "kompute", "metal", "sycl", "gpu_blas", "blas", "embeddings", "use_mmap", "no_kv_offload", "flash_attn"]
|
||||
|
||||
# Header names for the table:
|
||||
PRETTY_NAMES = {
|
||||
"cuda": "CUDA", "opencl": "OpenCL", "vulkan": "Vulkan", "kompute": "Kompute", "metal": "Metal", "sycl": "SYCL", "rpc": "RPC",
|
||||
"cuda": "CUDA", "vulkan": "Vulkan", "kompute": "Kompute", "metal": "Metal", "sycl": "SYCL", "rpc": "RPC",
|
||||
"gpu_blas": "GPU BLAS", "blas": "BLAS", "cpu_info": "CPU", "gpu_info": "GPU", "model_filename": "File", "model_type": "Model",
|
||||
"model_size": "Model Size [GiB]", "model_n_params": "Num. of Par.", "n_batch": "Batch size", "n_ubatch": "Microbatch size",
|
||||
"n_threads": "Threads", "type_k": "K type", "type_v": "V type", "n_gpu_layers": "GPU layers", "split_mode": "Split mode",
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
# Helper script for deploying llama.cpp server with a single Bash command
|
||||
#
|
||||
# - Works on Linux and macOS
|
||||
# - Supports: CPU, CUDA, Metal, OpenCL
|
||||
# - Supports: CPU, CUDA, Metal
|
||||
# - Can run all GGUF models from HuggingFace
|
||||
# - Can serve requests in parallel
|
||||
# - Always builds latest llama.cpp from GitHub
|
||||
|
@ -19,7 +19,7 @@
|
|||
# --port: port number, default is 8888
|
||||
# --repo: path to a repo containing GGUF model files
|
||||
# --wtype: weights type (f16, q8_0, q4_0, q4_1), default is user-input
|
||||
# --backend: cpu, cuda, metal, opencl, depends on the OS
|
||||
# --backend: cpu, cuda, metal, depends on the OS
|
||||
# --gpu-id: gpu id, default is 0
|
||||
# --n-parallel: number of parallel requests, default is 8
|
||||
# --n-kv: KV cache size, default is 4096
|
||||
|
@ -72,7 +72,7 @@ function print_usage {
|
|||
printf " --port: port number, default is 8888\n"
|
||||
printf " --repo: path to a repo containing GGUF model files\n"
|
||||
printf " --wtype: weights type (f16, q8_0, q4_0, q4_1), default is user-input\n"
|
||||
printf " --backend: cpu, cuda, metal, opencl, depends on the OS\n"
|
||||
printf " --backend: cpu, cuda, metal, depends on the OS\n"
|
||||
printf " --gpu-id: gpu id, default is 0\n"
|
||||
printf " --n-parallel: number of parallel requests, default is 8\n"
|
||||
printf " --n-kv: KV cache size, default is 4096\n"
|
||||
|
@ -387,9 +387,6 @@ elif [[ "$backend" == "cpu" ]]; then
|
|||
elif [[ "$backend" == "metal" ]]; then
|
||||
printf "[+] Building with Metal backend\n"
|
||||
make -j server $log
|
||||
elif [[ "$backend" == "opencl" ]]; then
|
||||
printf "[+] Building with OpenCL backend\n"
|
||||
LLAMA_CLBLAST=1 make -j server $log
|
||||
else
|
||||
printf "[-] Unknown backend: %s\n" "$backend"
|
||||
exit 1
|
||||
|
@ -407,8 +404,6 @@ elif [[ "$backend" == "cpu" ]]; then
|
|||
args="-ngl 0"
|
||||
elif [[ "$backend" == "metal" ]]; then
|
||||
args="-ngl 999"
|
||||
elif [[ "$backend" == "opencl" ]]; then
|
||||
args="-ngl 999"
|
||||
else
|
||||
printf "[-] Unknown backend: %s\n" "$backend"
|
||||
exit 1
|
||||
|
|
|
@ -106,8 +106,6 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
|
|||
# src/ggml-kompute.h -> ggml-kompute.h
|
||||
# src/ggml-metal.h -> ggml-metal.h
|
||||
# src/ggml-metal.m -> ggml-metal.m
|
||||
# src/ggml-opencl.cpp -> ggml-opencl.cpp
|
||||
# src/ggml-opencl.h -> ggml-opencl.h
|
||||
# src/ggml-quants.c -> ggml-quants.c
|
||||
# src/ggml-quants.h -> ggml-quants.h
|
||||
# src/ggml-rpc.cpp -> ggml-rpc.cpp
|
||||
|
@ -143,8 +141,6 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
|
|||
-e 's/src\/ggml-kompute\.h/ggml-kompute.h/g' \
|
||||
-e 's/src\/ggml-metal\.h/ggml-metal.h/g' \
|
||||
-e 's/src\/ggml-metal\.m/ggml-metal.m/g' \
|
||||
-e 's/src\/ggml-opencl\.cpp/ggml-opencl.cpp/g' \
|
||||
-e 's/src\/ggml-opencl\.h/ggml-opencl.h/g' \
|
||||
-e 's/src\/ggml-quants\.c/ggml-quants.c/g' \
|
||||
-e 's/src\/ggml-quants\.h/ggml-quants.h/g' \
|
||||
-e 's/src\/ggml-rpc\.cpp/ggml-rpc.cpp/g' \
|
||||
|
|
|
@ -14,8 +14,6 @@ cp -rpv ../ggml/src/ggml-kompute.h ./ggml-kompute.h
|
|||
cp -rpv ../ggml/src/ggml-metal.h ./ggml-metal.h
|
||||
cp -rpv ../ggml/src/ggml-metal.m ./ggml-metal.m
|
||||
cp -rpv ../ggml/src/ggml-metal.metal ./ggml-metal.metal
|
||||
cp -rpv ../ggml/src/ggml-opencl.cpp ./ggml-opencl.cpp
|
||||
cp -rpv ../ggml/src/ggml-opencl.h ./ggml-opencl.h
|
||||
cp -rpv ../ggml/src/ggml-quants.c ./ggml-quants.c
|
||||
cp -rpv ../ggml/src/ggml-quants.h ./ggml-quants.h
|
||||
cp -rpv ../ggml/src/ggml-rpc.cpp ./ggml-rpc.cpp
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue