cuda : rename build flag to LLAMA_CUDA (#6299)

2024-03-26 01:16:01 +01:00 · 2024-03-26 01:16:01 +01:00 · 280345968d
commit 280345968d
parent b06c16ef9f
28 changed files with 129 additions and 115 deletions
--- a/scripts/pod-llama.sh
+++ b/scripts/pod-llama.sh
@ -42,7 +42,7 @@ git clone https://github.com/ggerganov/llama.cpp

 cd llama.cpp

-LLAMA_CUBLAS=1 make -j
+LLAMA_CUDA=1 make -j

 ln -sfn /workspace/TinyLlama-1.1B-Chat-v0.3  ./models/tinyllama-1b
 ln -sfn /workspace/CodeLlama-7b-hf           ./models/codellama-7b
@ -60,7 +60,7 @@ cd /workspace/llama.cpp
 mkdir build-cublas
 cd build-cublas

-cmake -DLLAMA_CUBLAS=1 ../
+cmake -DLLAMA_CUDA=1 ../
 make -j

 if [ "$1" -eq "0" ]; then
@ -186,17 +186,17 @@ if [ "$1" -eq "1" ]; then
    # batched
    cd /workspace/llama.cpp

-    LLAMA_CUBLAS=1 make -j && ./batched ./models/tinyllama-1b/ggml-model-f16.gguf "Hello, my name is" 8 128 999
+    LLAMA_CUDA=1 make -j && ./batched ./models/tinyllama-1b/ggml-model-f16.gguf "Hello, my name is" 8 128 999

    # batched-bench
    cd /workspace/llama.cpp

-    LLAMA_CUBLAS=1 make -j && ./batched-bench ./models/tinyllama-1b/ggml-model-f16.gguf 4608 1 99 0 512 128 1,2,3,4,5,6,7,8,16,32
+    LLAMA_CUDA=1 make -j && ./batched-bench ./models/tinyllama-1b/ggml-model-f16.gguf 4608 1 99 0 512 128 1,2,3,4,5,6,7,8,16,32

    # parallel
    cd /workspace/llama.cpp

-    LLAMA_CUBLAS=1 make -j && ./parallel -m ./models/tinyllama-1b/ggml-model-f16.gguf -t 1 -ngl 100 -c 4096 -b 512 -s 1 -np 8 -ns 128 -n 100 -cb
+    LLAMA_CUDA=1 make -j && ./parallel -m ./models/tinyllama-1b/ggml-model-f16.gguf -t 1 -ngl 100 -c 4096 -b 512 -s 1 -np 8 -ns 128 -n 100 -cb

 fi

@ -204,10 +204,10 @@ fi
 #if [ "$1" -eq "7" ]; then
 #    cd /workspace/llama.cpp
 #
-#    LLAMA_CUBLAS=1 make -j && ./speculative -m ./models/codellama-34b-instruct/ggml-model-f16.gguf -md ./models/codellama-7b-instruct/ggml-model-q4_0.gguf -p "# Dijkstra's shortest path algorithm in Python (4 spaces indentation) + complexity analysis:\n\n" -e -ngl 999 -ngld 999 -t 4 -n 512 -c 4096 -s 21 --draft 16 -np 1 --temp 0.0
+#    LLAMA_CUDA=1 make -j && ./speculative -m ./models/codellama-34b-instruct/ggml-model-f16.gguf -md ./models/codellama-7b-instruct/ggml-model-q4_0.gguf -p "# Dijkstra's shortest path algorithm in Python (4 spaces indentation) + complexity analysis:\n\n" -e -ngl 999 -ngld 999 -t 4 -n 512 -c 4096 -s 21 --draft 16 -np 1 --temp 0.0
 #fi

 # more benches
-#LLAMA_CUBLAS=1 make -j && ./batched-bench ./models/codellama-7b/ggml-model-q4_k.gguf  4096 1 99 1 512,3200 128,128,800 1
-#LLAMA_CUBLAS=1 make -j && ./batched-bench ./models/codellama-13b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1
+#LLAMA_CUDA=1 make -j && ./batched-bench ./models/codellama-7b/ggml-model-q4_k.gguf  4096 1 99 1 512,3200 128,128,800 1
+#LLAMA_CUDA=1 make -j && ./batched-bench ./models/codellama-13b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1