cuda : rename build flag to LLAMA_CUDA (#6299)
This commit is contained in:
parent
b06c16ef9f
commit
280345968d
28 changed files with 129 additions and 115 deletions
|
@ -42,7 +42,7 @@ git clone https://github.com/ggerganov/llama.cpp
|
|||
|
||||
cd llama.cpp
|
||||
|
||||
LLAMA_CUBLAS=1 make -j
|
||||
LLAMA_CUDA=1 make -j
|
||||
|
||||
ln -sfn /workspace/TinyLlama-1.1B-Chat-v0.3 ./models/tinyllama-1b
|
||||
ln -sfn /workspace/CodeLlama-7b-hf ./models/codellama-7b
|
||||
|
@ -60,7 +60,7 @@ cd /workspace/llama.cpp
|
|||
mkdir build-cublas
|
||||
cd build-cublas
|
||||
|
||||
cmake -DLLAMA_CUBLAS=1 ../
|
||||
cmake -DLLAMA_CUDA=1 ../
|
||||
make -j
|
||||
|
||||
if [ "$1" -eq "0" ]; then
|
||||
|
@ -186,17 +186,17 @@ if [ "$1" -eq "1" ]; then
|
|||
# batched
|
||||
cd /workspace/llama.cpp
|
||||
|
||||
LLAMA_CUBLAS=1 make -j && ./batched ./models/tinyllama-1b/ggml-model-f16.gguf "Hello, my name is" 8 128 999
|
||||
LLAMA_CUDA=1 make -j && ./batched ./models/tinyllama-1b/ggml-model-f16.gguf "Hello, my name is" 8 128 999
|
||||
|
||||
# batched-bench
|
||||
cd /workspace/llama.cpp
|
||||
|
||||
LLAMA_CUBLAS=1 make -j && ./batched-bench ./models/tinyllama-1b/ggml-model-f16.gguf 4608 1 99 0 512 128 1,2,3,4,5,6,7,8,16,32
|
||||
LLAMA_CUDA=1 make -j && ./batched-bench ./models/tinyllama-1b/ggml-model-f16.gguf 4608 1 99 0 512 128 1,2,3,4,5,6,7,8,16,32
|
||||
|
||||
# parallel
|
||||
cd /workspace/llama.cpp
|
||||
|
||||
LLAMA_CUBLAS=1 make -j && ./parallel -m ./models/tinyllama-1b/ggml-model-f16.gguf -t 1 -ngl 100 -c 4096 -b 512 -s 1 -np 8 -ns 128 -n 100 -cb
|
||||
LLAMA_CUDA=1 make -j && ./parallel -m ./models/tinyllama-1b/ggml-model-f16.gguf -t 1 -ngl 100 -c 4096 -b 512 -s 1 -np 8 -ns 128 -n 100 -cb
|
||||
|
||||
fi
|
||||
|
||||
|
@ -204,10 +204,10 @@ fi
|
|||
#if [ "$1" -eq "7" ]; then
|
||||
# cd /workspace/llama.cpp
|
||||
#
|
||||
# LLAMA_CUBLAS=1 make -j && ./speculative -m ./models/codellama-34b-instruct/ggml-model-f16.gguf -md ./models/codellama-7b-instruct/ggml-model-q4_0.gguf -p "# Dijkstra's shortest path algorithm in Python (4 spaces indentation) + complexity analysis:\n\n" -e -ngl 999 -ngld 999 -t 4 -n 512 -c 4096 -s 21 --draft 16 -np 1 --temp 0.0
|
||||
# LLAMA_CUDA=1 make -j && ./speculative -m ./models/codellama-34b-instruct/ggml-model-f16.gguf -md ./models/codellama-7b-instruct/ggml-model-q4_0.gguf -p "# Dijkstra's shortest path algorithm in Python (4 spaces indentation) + complexity analysis:\n\n" -e -ngl 999 -ngld 999 -t 4 -n 512 -c 4096 -s 21 --draft 16 -np 1 --temp 0.0
|
||||
#fi
|
||||
|
||||
# more benches
|
||||
#LLAMA_CUBLAS=1 make -j && ./batched-bench ./models/codellama-7b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1
|
||||
#LLAMA_CUBLAS=1 make -j && ./batched-bench ./models/codellama-13b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1
|
||||
#LLAMA_CUDA=1 make -j && ./batched-bench ./models/codellama-7b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1
|
||||
#LLAMA_CUDA=1 make -j && ./batched-bench ./models/codellama-13b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue