From f80ff4dc6a545f84e0ea949b0b70c38ffa166c40 Mon Sep 17 00:00:00 2001 From: crasm Date: Tue, 19 Dec 2023 01:43:27 -0500 Subject: [PATCH] ci : get ci/run.sh working with test-model-load-cancel --- ci/run.sh | 247 +++++++++++++++++-------------- requirements.txt | 8 +- tests/test-model-load-cancel.cpp | 36 +++-- 3 files changed, 167 insertions(+), 124 deletions(-) diff --git a/ci/run.sh b/ci/run.sh index 2fb214299..a1b978a0f 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -1,4 +1,4 @@ -#/bin/bash +#!/bin/bash # # sample usage: # @@ -11,6 +11,8 @@ # GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt # +set -u # Fail on unset variables + if [ -z "$2" ]; then echo "usage: $0 " exit 1 @@ -30,8 +32,20 @@ sd=`dirname $0` cd $sd/../ SRC=`pwd` +# Read-only array of quantization types for iteration. +# Use ${quants[@]:1} to skip f16. +declare -ra quants=( f16 q8_0 q4_0 q4_1 q5_0 q5_1 q2_k q3_k q4_k q5_k q6_k ) + ## helpers +# Print an error message to stderr and exit with an error. +# usage: die +function die { + local format="$1"; shift + >&2 printf "$format" "$@" + exit 1 +} + # download a file if it does not exist or if it is outdated function gg_wget { local out=$1 @@ -77,7 +91,9 @@ function gg_run { function gg_run_ctest_debug { cd ${SRC} - rm -rf build-ci-debug && mkdir build-ci-debug && cd build-ci-debug + rm -rf build-ci-debug + mkdir build-ci-debug + cd build-ci-debug set -e @@ -105,14 +121,16 @@ function gg_sum_ctest_debug { function gg_run_ctest_release { cd ${SRC} - rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release + rm -rf build-ci-release + mkdir build-ci-release + cd build-ci-release set -e (time cmake -DCMAKE_BUILD_TYPE=Release .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log (time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log - if [ -z ${GG_BUILD_LOW_PERF} ]; then + if [[ -z ${GG_BUILD_LOW_PERF+x} ]]; then (time ctest --output-on-failure -L main ) 2>&1 | tee -a $OUT/${ci}-ctest.log else (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log @@ -133,6 +151,7 @@ function gg_sum_ctest_release { function gg_run_ctest_with_model { cd ${SRC} + cd build-ci-release set -e (time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest_with_model.log set +e @@ -151,81 +170,70 @@ function gg_sum_ctest_with_model { # open_llama_3b_v2 function gg_run_open_llama_3b_v2 { - cd ${SRC} + # We use absolute paths here to not have to track CWD as much + local models_mnt="$(realpath "${SRC}/models-mnt")" + local path_models="${models_mnt}/open-llama/3B-v2" + local path_wiki="${models_mnt}/wikitext" + local path_wiki_raw="${path_wiki}/wikitext-2-raw" - gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/config.json - gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/resolve/main/tokenizer.model - gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/tokenizer_config.json - gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/special_tokens_map.json - gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/resolve/main/pytorch_model.bin - gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/generation_config.json + mkdir -p "${path_models}" "${path_wiki}" - gg_wget models-mnt/wikitext/ https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip - unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/ - head -n 60 models-mnt/wikitext/wikitext-2-raw/wiki.test.raw > models-mnt/wikitext/wikitext-2-raw/wiki.test-60.raw + gg_wget "${path_models}" https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/config.json + gg_wget "${path_models}" https://huggingface.co/openlm-research/open_llama_3b_v2/resolve/main/tokenizer.model + gg_wget "${path_models}" https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/tokenizer_config.json + gg_wget "${path_models}" https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/special_tokens_map.json + gg_wget "${path_models}" https://huggingface.co/openlm-research/open_llama_3b_v2/resolve/main/pytorch_model.bin + gg_wget "${path_models}" https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/generation_config.json - path_models="../models-mnt/open-llama/3B-v2" - path_wiki="../models-mnt/wikitext/wikitext-2-raw" + gg_wget "${path_wiki}" https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip + unzip -o "${path_wiki}/wikitext-2-raw-v1.zip" -d "${path_wiki}" + head -n 60 "${path_wiki_raw}/wiki.test.raw" > "${path_wiki_raw}/wiki.test-60.raw" - rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release + rm -rf "${SRC}/build-ci-release" + mkdir "${SRC}/build-ci-release" + cd "${SRC}/build-ci-release" set -e - (time cmake -DCMAKE_BUILD_TYPE=Release -DLLAMA_QKK_64=1 .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log - (time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log + (time cmake -DCMAKE_BUILD_TYPE=Release -DLLAMA_QKK_64=1 .. ) 2>&1 | tee -a "${OUT}/${ci}-cmake.log" + (time make -j ) 2>&1 | tee -a "${OUT}/${ci}-make.log" - python3 ../convert.py ${path_models} + python3 "${SRC}/convert.py" "${path_models}" - model_f16="${path_models}/ggml-model-f16.gguf" - model_q8_0="${path_models}/ggml-model-q8_0.gguf" - model_q4_0="${path_models}/ggml-model-q4_0.gguf" - model_q4_1="${path_models}/ggml-model-q4_1.gguf" - model_q5_0="${path_models}/ggml-model-q5_0.gguf" - model_q5_1="${path_models}/ggml-model-q5_1.gguf" - model_q2_k="${path_models}/ggml-model-q2_k.gguf" - model_q3_k="${path_models}/ggml-model-q3_k.gguf" - model_q4_k="${path_models}/ggml-model-q4_k.gguf" - model_q5_k="${path_models}/ggml-model-q5_k.gguf" - model_q6_k="${path_models}/ggml-model-q6_k.gguf" + # Get the model path for a quantization + # usage: model_for + function model_for { + if (( $# != 1 )); then + die 'model_for takes a single quantization, such as q8_0' + fi + echo -n "${path_models}/ggml-model-$1.gguf" + } - wiki_test_60="${path_wiki}/wiki.test-60.raw" + wiki_test_60="${path_wiki_raw}/wiki.test-60.raw" - ./bin/quantize ${model_f16} ${model_q8_0} q8_0 - ./bin/quantize ${model_f16} ${model_q4_0} q4_0 - ./bin/quantize ${model_f16} ${model_q4_1} q4_1 - ./bin/quantize ${model_f16} ${model_q5_0} q5_0 - ./bin/quantize ${model_f16} ${model_q5_1} q5_1 - ./bin/quantize ${model_f16} ${model_q2_k} q2_k - ./bin/quantize ${model_f16} ${model_q3_k} q3_k - ./bin/quantize ${model_f16} ${model_q4_k} q4_k - ./bin/quantize ${model_f16} ${model_q5_k} q5_k - ./bin/quantize ${model_f16} ${model_q6_k} q6_k + # Quantize q8_0 through q6_k + for q in "${quants[@]:1}"; do + ./bin/quantize "$(model_for f16)" "$(model_for "${q}")" "${q}" + done - (time ./bin/main --model ${model_f16} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log - (time ./bin/main --model ${model_q8_0} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log - (time ./bin/main --model ${model_q4_0} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log - (time ./bin/main --model ${model_q4_1} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log - (time ./bin/main --model ${model_q5_0} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log - (time ./bin/main --model ${model_q5_1} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log - (time ./bin/main --model ${model_q2_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log - (time ./bin/main --model ${model_q3_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log - (time ./bin/main --model ${model_q4_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log - (time ./bin/main --model ${model_q5_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log - (time ./bin/main --model ${model_q6_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log + # Run basic inference for all quants + for q in "${quants[@]}"; do + ( time \ + ./bin/main --model "$(model_for "${q}")" -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" + ) 2>&1 | tee -a "${OUT}/${ci}-tg-${q}.log" + done - (time ./bin/perplexity --model ${model_f16} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log - (time ./bin/perplexity --model ${model_q8_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log - (time ./bin/perplexity --model ${model_q4_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log - (time ./bin/perplexity --model ${model_q4_1} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log - (time ./bin/perplexity --model ${model_q5_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log - (time ./bin/perplexity --model ${model_q5_1} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log - (time ./bin/perplexity --model ${model_q2_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log - (time ./bin/perplexity --model ${model_q3_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log - (time ./bin/perplexity --model ${model_q4_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log - (time ./bin/perplexity --model ${model_q5_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log - (time ./bin/perplexity --model ${model_q6_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log + # Run perplexity with wiki_test_60 + for q in "${quants[@]}"; do + ( time \ + ./bin/perplexity --model "$(model_for $q)" -f "${wiki_test_60}" -c 128 -b 128 --chunks 2 + ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log + done - (time ./bin/save-load-state --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + # Run examples/save-load-state with q4_0 + ( time \ + ./bin/save-load-state --model "$(model_for q4_0)" + ) 2>&1 | tee -a "${OUT}/${ci}-save-load-state.log" function check_ppl { qnt="$1" @@ -240,17 +248,11 @@ function gg_run_open_llama_3b_v2 { return 0 } - check_ppl "f16" "$(cat $OUT/${ci}-tg-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q8_0" "$(cat $OUT/${ci}-tg-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q4_0" "$(cat $OUT/${ci}-tg-q4_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q4_1" "$(cat $OUT/${ci}-tg-q4_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q5_0" "$(cat $OUT/${ci}-tg-q5_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q5_1" "$(cat $OUT/${ci}-tg-q5_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q2_k" "$(cat $OUT/${ci}-tg-q2_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q3_k" "$(cat $OUT/${ci}-tg-q3_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q4_k" "$(cat $OUT/${ci}-tg-q4_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q5_k" "$(cat $OUT/${ci}-tg-q5_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q6_k" "$(cat $OUT/${ci}-tg-q6_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + # Check perplexity results for all quants + for q in "${quants[@]}"; do + check_ppl "$q" "$(cat "${OUT}/${ci}-tg-f16.log" | grep "^\[1\]")" \ + | tee -a "${OUT}/${ci}-ppl.log" + done # lora function compare_ppl { @@ -267,32 +269,42 @@ function gg_run_open_llama_3b_v2 { return 0 } - path_lora="../models-mnt/open-llama/3B-v2/lora" - path_shakespeare="../models-mnt/shakespeare" + local path_lora="${path_models}/lora" + local path_shakespeare="${models_mnt}/shakespeare" - shakespeare="${path_shakespeare}/shakespeare.txt" - lora_shakespeare="${path_lora}/ggml-adapter-model.bin" + local shakespeare="${path_shakespeare}/shakespeare.txt" + local lora_shakespeare="${path_lora}/ggml-adapter-model.bin" - gg_wget ${path_lora} https://huggingface.co/slaren/open_llama_3b_v2_shakespeare_lora/resolve/main/adapter_config.json - gg_wget ${path_lora} https://huggingface.co/slaren/open_llama_3b_v2_shakespeare_lora/resolve/main/adapter_model.bin - gg_wget ${path_shakespeare} https://huggingface.co/slaren/open_llama_3b_v2_shakespeare_lora/resolve/main/shakespeare.txt + gg_wget "${path_lora}" https://huggingface.co/slaren/open_llama_3b_v2_shakespeare_lora/resolve/main/adapter_config.json + gg_wget "${path_lora}" https://huggingface.co/slaren/open_llama_3b_v2_shakespeare_lora/resolve/main/adapter_model.bin + gg_wget "${path_shakespeare}" https://huggingface.co/slaren/open_llama_3b_v2_shakespeare_lora/resolve/main/shakespeare.txt - python3 ../convert-lora-to-ggml.py ${path_lora} + python3 "${SRC}/convert-lora-to-ggml.py" "${path_lora}" # f16 - (time ./bin/perplexity --model ${model_f16} -f ${shakespeare} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-f16.log - (time ./bin/perplexity --model ${model_f16} -f ${shakespeare} --lora ${lora_shakespeare} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-lora-f16.log - compare_ppl "f16 shakespeare" "$(cat $OUT/${ci}-ppl-shakespeare-f16.log | grep "^\[1\]")" "$(cat $OUT/${ci}-ppl-shakespeare-lora-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-lora-ppl.log + (time ./bin/perplexity --model "$(model_for f16)" -f "${shakespeare}" -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a "${OUT}/${ci}-ppl-shakespeare-f16.log" + (time ./bin/perplexity --model "$(model_for f16)" -f "${shakespeare}" --lora "${lora_shakespeare}" -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a "${OUT}/${ci}-ppl-shakespeare-lora-f16.log" + compare_ppl "f16 shakespeare" \ + "$(cat "${OUT}/${ci}-ppl-shakespeare-f16.log" | grep "^\[1\]")" \ + "$(cat "${OUT}/${ci}-ppl-shakespeare-lora-f16.log" | grep "^\[1\]")" \ + | tee -a "${OUT}/${ci}-lora-ppl.log" # q8_0 - (time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-q8_0.log - (time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-lora-q8_0.log - compare_ppl "q8_0 shakespeare" "$(cat $OUT/${ci}-ppl-shakespeare-q8_0.log | grep "^\[1\]")" "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-lora-ppl.log + (time ./bin/perplexity --model "$(model_for q8_0)" -f "${shakespeare}" -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a "$OUT/${ci}-ppl-shakespeare-q8_0.log" + (time ./bin/perplexity --model "$(model_for q8_0)" -f "${shakespeare}" --lora "${lora_shakespeare}" -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a "$OUT/${ci}-ppl-shakespeare-lora-q8_0.log" + compare_ppl "q8_0 shakespeare" \ + "$(cat "${OUT}/${ci}-ppl-shakespeare-q8_0.log" | grep "^\[1\]")" \ + "$(cat "${OUT}/${ci}-ppl-shakespeare-lora-q8_0.log" | grep "^\[1\]")" \ + | tee -a "${OUT}/${ci}-lora-ppl.log" # q8_0 + f16 lora-base - (time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} --lora-base ${model_f16} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log - compare_ppl "q8_0 / f16 base shakespeare" "$(cat $OUT/${ci}-ppl-shakespeare-q8_0.log | grep "^\[1\]")" "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-lora-ppl.log - + ( time \ + ./bin/perplexity --model "$(model_for q8_0)" -f "${shakespeare}" --lora "${lora_shakespeare}" --lora-base "$(model_for f16)" -c 128 -b 128 --chunks 2 + ) 2>&1 | tee -a "${OUT}/${ci}-ppl-shakespeare-lora-q8_0-f16.log" + compare_ppl "q8_0 / f16 base shakespeare" \ + "$(cat "${OUT}/${ci}-ppl-shakespeare-q8_0.log" | grep "^\[1\]")" \ + "$(cat "${OUT}/${ci}-ppl-shakespeare-lora-q8_0-f16.log" | grep "^\[1\]")" \ + | tee -a "${OUT}/${ci}-lora-ppl.log" set +e } @@ -502,31 +514,42 @@ function gg_sum_open_llama_7b_v2 { ## main -if [ -z ${GG_BUILD_LOW_PERF} ]; then - rm -rf ${SRC}/models-mnt - - mnt_models=${MNT}/models - mkdir -p ${mnt_models} - ln -sfn ${mnt_models} ${SRC}/models-mnt - - python3 -m pip install -r ${SRC}/requirements.txt - python3 -m pip install --editable gguf-py -fi - ret=0 +# This is necessary to test if a variable is set while `set -u` is enabled. +# see: https://stackoverflow.com/a/13864829 +# [[ -z ${var+x} ]] evaluates to false if var is set +# [[ ! -z ${var+x} ]] evaluates to true if var is set +if [[ ! -z ${GG_BUILD_LOW_PERF+x} ]]; then + test "${ret}" -eq 0 && gg_run ctest_debug + test "${ret}" -eq 0 && gg_run ctest_release + exit "${ret}" +fi # Otherwise, do extended testing + +rm -rf ${SRC}/models-mnt + +mnt_models=${MNT}/models +mkdir -p ${mnt_models} +ln -sfn ${mnt_models} ${SRC}/models-mnt + +# Create a fresh python3 venv and enter it +python3 -m venv "${MNT}/venv" +source "${MNT}/venv/bin/activate" + +pip install --disable-pip-version-check -r ${SRC}/requirements.txt +pip install --disable-pip-version-check --editable gguf-py + test $ret -eq 0 && gg_run ctest_debug test $ret -eq 0 && gg_run ctest_release -if [ -z ${GG_BUILD_LOW_PERF} ]; then - if [ -z ${GG_BUILD_VRAM_GB} ] || [ ${GG_BUILD_VRAM_GB} -ge 8 ]; then - if [ -z ${GG_BUILD_CUDA} ]; then - test $ret -eq 0 && gg_run open_llama_3b_v2 - else - test $ret -eq 0 && gg_run open_llama_7b_v2 - fi - test $ret -eq 0 && gg_run ctest_with_model +# Run tests with open_llama +if [[ -z ${GG_BUILD_VRAM_GB+x} ]] || (( GG_BUILD_VRAM_GB >= 8 )); then + if [[ ! -z ${GG_BUILD_CUDA+x} ]]; then + test $ret -eq 0 && gg_run open_llama_7b_v2 + else + test $ret -eq 0 && gg_run open_llama_3b_v2 fi + test $ret -eq 0 && gg_run ctest_with_model fi exit $ret diff --git a/requirements.txt b/requirements.txt index badfec3be..35713223f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,6 @@ -numpy==1.24.4 -sentencepiece==0.1.98 -transformers>=4.34.0 gguf>=0.1.0 +numpy==1.24.4 +protobuf==4.25.1 +sentencepiece==0.1.98 +torch==2.0.1 +transformers>=4.34.0 diff --git a/tests/test-model-load-cancel.cpp b/tests/test-model-load-cancel.cpp index 926a305da..509f3e8e0 100644 --- a/tests/test-model-load-cancel.cpp +++ b/tests/test-model-load-cancel.cpp @@ -2,27 +2,45 @@ #include #include +#include int main(void) { - const auto * model_path = "models/7B/ggml-model-f16.gguf"; - auto * file = fopen(model_path, "r"); + const char * models_to_try[] = { + // Same default as example/main for local use + "./models/7B/ggml-model-f16.gguf", + // Models for ./ci/run.sh + "./models-mnt/open-llama/3B-v2/ggml-model-q2_k.gguf", + "./models-mnt/open-llama/7B-v2/ggml-model-q2_k.gguf", + }; - if (file == nullptr) { - fprintf(stderr, "no model at '%s' found\n", model_path); - return EXIT_FAILURE; + const char * chosen_model; + for (size_t i = 0; i < sizeof(models_to_try) / sizeof(models_to_try[0]); i++) { + const auto * model = models_to_try[i]; + + auto * file = fopen(model, "r"); + if (file == nullptr) { + continue; + } + + chosen_model = model; + fprintf(stderr, "using '%s'\n", model); + fclose(file); } - fprintf(stderr, "using '%s'\n", model_path); - fclose(file); + if (chosen_model == nullptr) { + fprintf(stderr, "no model found\n"); + return EXIT_FAILURE; + } llama_backend_init(false); auto params = llama_model_params{}; params.use_mmap = false; params.progress_callback = [](float progress, void * ctx){ (void) ctx; - return progress > 0.50; + return progress > 0.05; }; - auto * model = llama_load_model_from_file(model_path, params); + + auto * model = llama_load_model_from_file(chosen_model, params); llama_backend_free(); return model == nullptr ? EXIT_SUCCESS : EXIT_FAILURE; }