Add YX simple filter for llama-server
This commit is contained in:
parent
f8ec8877b7
commit
fcb2bb1222
18 changed files with 120 additions and 1698 deletions
310
.github/workflows/bench.yml
vendored
310
.github/workflows/bench.yml
vendored
|
@ -1,310 +0,0 @@
|
||||||
# Benchmark
|
|
||||||
name: Benchmark
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_dispatch:
|
|
||||||
inputs:
|
|
||||||
gpu-series:
|
|
||||||
description: 'Azure GPU series to run with'
|
|
||||||
required: true
|
|
||||||
type: choice
|
|
||||||
options:
|
|
||||||
- Standard_NC4as_T4_v3
|
|
||||||
- Standard_NC24ads_A100_v4
|
|
||||||
- Standard_NC80adis_H100_v5
|
|
||||||
sha:
|
|
||||||
description: 'Commit SHA1 to build'
|
|
||||||
required: false
|
|
||||||
type: string
|
|
||||||
duration:
|
|
||||||
description: 'Duration of the bench'
|
|
||||||
type: string
|
|
||||||
default: 10m
|
|
||||||
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
|
|
||||||
pull_request_target:
|
|
||||||
types: [opened, synchronize, reopened]
|
|
||||||
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
|
|
||||||
schedule:
|
|
||||||
- cron: '04 2 * * *'
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}-${{ github.event.inputs.sha }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
bench-server-baseline:
|
|
||||||
runs-on: Standard_NC4as_T4_v3
|
|
||||||
env:
|
|
||||||
RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it
|
|
||||||
N_USERS: 8
|
|
||||||
DURATION: 10m
|
|
||||||
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
model: [phi-2]
|
|
||||||
ftype: [q4_0, q8_0, f16]
|
|
||||||
include:
|
|
||||||
- model: phi-2
|
|
||||||
ftype: q4_0
|
|
||||||
pr_comment_enabled: "true"
|
|
||||||
|
|
||||||
if: |
|
|
||||||
inputs.gpu-series == 'Standard_NC4as_T4_v3'
|
|
||||||
|| (
|
|
||||||
github.event_name == 'schedule'
|
|
||||||
&& github.ref_name == 'master'
|
|
||||||
&& github.repository_owner == 'ggerganov'
|
|
||||||
)
|
|
||||||
|| github.event_name == 'pull_request_target'
|
|
||||||
|| (
|
|
||||||
github.event_name == 'push'
|
|
||||||
&& github.event.ref == 'refs/heads/master'
|
|
||||||
&& github.repository_owner == 'ggerganov'
|
|
||||||
)
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
id: checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
fetch-depth: 0
|
|
||||||
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
|
|
||||||
|
|
||||||
- name: Install python env
|
|
||||||
id: pipenv
|
|
||||||
run: |
|
|
||||||
cd examples/server/bench
|
|
||||||
python3 -m venv venv
|
|
||||||
source venv/bin/activate
|
|
||||||
pip install -r requirements.txt
|
|
||||||
|
|
||||||
- name: Prometheus
|
|
||||||
id: install_prometheus
|
|
||||||
run: |
|
|
||||||
wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz
|
|
||||||
tar xzf prometheus*.tar.gz --strip-components=1
|
|
||||||
./prometheus --config.file=examples/server/bench/prometheus.yml &
|
|
||||||
while ! nc -z localhost 9090; do
|
|
||||||
sleep 0.1
|
|
||||||
done
|
|
||||||
|
|
||||||
- name: Set up Go
|
|
||||||
uses: actions/setup-go@v5
|
|
||||||
with:
|
|
||||||
go-version: '1.21'
|
|
||||||
|
|
||||||
- name: Install k6 and xk6-sse
|
|
||||||
id: k6_installation
|
|
||||||
run: |
|
|
||||||
cd examples/server/bench
|
|
||||||
go install go.k6.io/xk6/cmd/xk6@latest
|
|
||||||
xk6 build master \
|
|
||||||
--with github.com/phymbert/xk6-sse
|
|
||||||
|
|
||||||
- name: Build
|
|
||||||
id: cmake_build
|
|
||||||
run: |
|
|
||||||
set -eux
|
|
||||||
cmake -B build \
|
|
||||||
-DLLAMA_NATIVE=OFF \
|
|
||||||
-DLLAMA_BUILD_SERVER=ON \
|
|
||||||
-DLLAMA_CURL=ON \
|
|
||||||
-DLLAMA_CUBLAS=ON \
|
|
||||||
-DCUDAToolkit_ROOT=/usr/local/cuda \
|
|
||||||
-DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
|
|
||||||
-DCMAKE_CUDA_ARCHITECTURES=75 \
|
|
||||||
-DLLAMA_FATAL_WARNINGS=OFF \
|
|
||||||
-DLLAMA_ALL_WARNINGS=OFF \
|
|
||||||
-DCMAKE_BUILD_TYPE=Release;
|
|
||||||
cmake --build build --config Release -j $(nproc) --target llama-server
|
|
||||||
|
|
||||||
- name: Download the dataset
|
|
||||||
id: download_dataset
|
|
||||||
run: |
|
|
||||||
cd examples/server/bench
|
|
||||||
wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
|
|
||||||
|
|
||||||
- name: Server bench
|
|
||||||
id: server_bench
|
|
||||||
run: |
|
|
||||||
set -eux
|
|
||||||
|
|
||||||
cd examples/server/bench
|
|
||||||
source venv/bin/activate
|
|
||||||
python bench.py \
|
|
||||||
--runner-label ${{ env.RUNNER_LABEL }} \
|
|
||||||
--name ${{ github.job }} \
|
|
||||||
--branch ${{ github.head_ref || github.ref_name }} \
|
|
||||||
--commit ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha }} \
|
|
||||||
--scenario script.js \
|
|
||||||
--duration ${{ github.event.inputs.duration || env.DURATION }} \
|
|
||||||
--hf-repo ggml-org/models \
|
|
||||||
--hf-file ${{ matrix.model }}/ggml-model-${{ matrix.ftype }}.gguf \
|
|
||||||
--model-path-prefix /models \
|
|
||||||
--parallel ${{ env.N_USERS }} \
|
|
||||||
-ngl 33 \
|
|
||||||
--batch-size 2048 \
|
|
||||||
--ubatch-size 256 \
|
|
||||||
--ctx-size 16384 \
|
|
||||||
--n-prompts 1000 \
|
|
||||||
--max-prompt-tokens 1024 \
|
|
||||||
--max-tokens 2048
|
|
||||||
|
|
||||||
cat results.github.env >> $GITHUB_ENV
|
|
||||||
|
|
||||||
# Remove dataset as we do not want it in the artefact
|
|
||||||
rm ShareGPT_V3_unfiltered_cleaned_split.json
|
|
||||||
|
|
||||||
- uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
|
|
||||||
compression-level: 9
|
|
||||||
path: |
|
|
||||||
examples/server/bench/*.jpg
|
|
||||||
examples/server/bench/*.json
|
|
||||||
examples/server/bench/*.log
|
|
||||||
|
|
||||||
- name: Commit status
|
|
||||||
uses: Sibz/github-status-action@v1
|
|
||||||
with:
|
|
||||||
authToken: ${{secrets.GITHUB_TOKEN}}
|
|
||||||
sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }}
|
|
||||||
context: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
|
|
||||||
description: |
|
|
||||||
${{ env.BENCH_RESULTS }}
|
|
||||||
state: 'success'
|
|
||||||
|
|
||||||
- name: Upload benchmark images
|
|
||||||
uses: devicons/public-upload-to-imgur@v2.2.2
|
|
||||||
continue-on-error: true # Important as it looks unstable: 503
|
|
||||||
id: imgur_step
|
|
||||||
with:
|
|
||||||
client_id: ${{secrets.IMGUR_CLIENT_ID}}
|
|
||||||
path: |
|
|
||||||
examples/server/bench/prompt_tokens_seconds.jpg
|
|
||||||
examples/server/bench/predicted_tokens_seconds.jpg
|
|
||||||
examples/server/bench/kv_cache_usage_ratio.jpg
|
|
||||||
examples/server/bench/requests_processing.jpg
|
|
||||||
|
|
||||||
- name: Extract mermaid
|
|
||||||
id: set_mermaid
|
|
||||||
run: |
|
|
||||||
set -eux
|
|
||||||
|
|
||||||
cd examples/server/bench
|
|
||||||
PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid)
|
|
||||||
echo "PROMPT_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
|
|
||||||
echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV
|
|
||||||
echo "EOF" >> $GITHUB_ENV
|
|
||||||
|
|
||||||
PREDICTED_TOKENS_SECONDS=$(cat predicted_tokens_seconds.mermaid)
|
|
||||||
echo "PREDICTED_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
|
|
||||||
echo "$PREDICTED_TOKENS_SECONDS" >> $GITHUB_ENV
|
|
||||||
echo "EOF" >> $GITHUB_ENV
|
|
||||||
|
|
||||||
KV_CACHE_USAGE_RATIO=$(cat kv_cache_usage_ratio.mermaid)
|
|
||||||
echo "KV_CACHE_USAGE_RATIO<<EOF" >> $GITHUB_ENV
|
|
||||||
echo "$KV_CACHE_USAGE_RATIO" >> $GITHUB_ENV
|
|
||||||
echo "EOF" >> $GITHUB_ENV
|
|
||||||
|
|
||||||
REQUESTS_PROCESSING=$(cat requests_processing.mermaid)
|
|
||||||
echo "REQUESTS_PROCESSING<<EOF" >> $GITHUB_ENV
|
|
||||||
echo "$REQUESTS_PROCESSING" >> $GITHUB_ENV
|
|
||||||
echo "EOF" >> $GITHUB_ENV
|
|
||||||
|
|
||||||
- name: Extract image url
|
|
||||||
id: extract_image_url
|
|
||||||
continue-on-error: true
|
|
||||||
run: |
|
|
||||||
set -eux
|
|
||||||
|
|
||||||
echo "IMAGE_O=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" >> $GITHUB_ENV
|
|
||||||
echo "IMAGE_1=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" >> $GITHUB_ENV
|
|
||||||
echo "IMAGE_2=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" >> $GITHUB_ENV
|
|
||||||
echo "IMAGE_3=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" >> $GITHUB_ENV
|
|
||||||
|
|
||||||
- name: Comment PR
|
|
||||||
uses: mshick/add-pr-comment@v2
|
|
||||||
id: comment_pr
|
|
||||||
if: ${{ github.event.pull_request != '' && matrix.pr_comment_enabled == 'true' }}
|
|
||||||
with:
|
|
||||||
message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
|
|
||||||
message: |
|
|
||||||
<p align="center">
|
|
||||||
|
|
||||||
📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for `${{ matrix.model }}`-`${{ matrix.ftype }}`: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
|
|
||||||
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<details>
|
|
||||||
|
|
||||||
<summary>Expand details for performance related PR only</summary>
|
|
||||||
|
|
||||||
- Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }}
|
|
||||||
- HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(95)=${{ env.HTTP_REQ_DURATION_P_95_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
|
|
||||||
- Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_95_ }}tk/s
|
|
||||||
- Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_TOKENS_SECOND_P_95_ }}tk/s
|
|
||||||
- ${{ env.BENCH_GRAPH_XLABEL }}
|
|
||||||
|
|
||||||
|
|
||||||
<p align="center">
|
|
||||||
|
|
||||||
<img width="100%" height="100%" src="${{ env.IMAGE_O }}" alt="prompt_tokens_seconds" />
|
|
||||||
|
|
||||||
<details>
|
|
||||||
|
|
||||||
<summary>More</summary>
|
|
||||||
|
|
||||||
```mermaid
|
|
||||||
${{ env.PROMPT_TOKENS_SECONDS }}
|
|
||||||
```
|
|
||||||
|
|
||||||
</details>
|
|
||||||
|
|
||||||
<img width="100%" height="100%" src="${{ env.IMAGE_1 }}" alt="predicted_tokens_seconds"/>
|
|
||||||
|
|
||||||
<details>
|
|
||||||
<summary>More</summary>
|
|
||||||
|
|
||||||
```mermaid
|
|
||||||
${{ env.PREDICTED_TOKENS_SECONDS }}
|
|
||||||
```
|
|
||||||
|
|
||||||
</details>
|
|
||||||
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<details>
|
|
||||||
|
|
||||||
<summary>Details</summary>
|
|
||||||
|
|
||||||
<p align="center">
|
|
||||||
|
|
||||||
<img width="100%" height="100%" src="${{ env.IMAGE_2 }}" alt="kv_cache_usage_ratio" />
|
|
||||||
|
|
||||||
<details>
|
|
||||||
<summary>More</summary>
|
|
||||||
|
|
||||||
```mermaid
|
|
||||||
${{ env.KV_CACHE_USAGE_RATIO }}
|
|
||||||
```
|
|
||||||
|
|
||||||
</details>
|
|
||||||
|
|
||||||
<img width="100%" height="100%" src="${{ env.IMAGE_3 }}" alt="requests_processing"/>
|
|
||||||
|
|
||||||
<details>
|
|
||||||
<summary>More</summary>
|
|
||||||
|
|
||||||
```mermaid
|
|
||||||
${{ env.REQUESTS_PROCESSING }}
|
|
||||||
```
|
|
||||||
|
|
||||||
</details>
|
|
||||||
|
|
||||||
</p>
|
|
||||||
</details>
|
|
||||||
</details>
|
|
696
.github/workflows/build.yml
vendored
696
.github/workflows/build.yml
vendored
|
@ -25,662 +25,6 @@ env:
|
||||||
GGML_N_THREADS: 1
|
GGML_N_THREADS: 1
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
macOS-latest-cmake-arm64:
|
|
||||||
runs-on: macos-14
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
id: checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
fetch-depth: 0
|
|
||||||
|
|
||||||
- name: Dependencies
|
|
||||||
id: depends
|
|
||||||
continue-on-error: true
|
|
||||||
run: |
|
|
||||||
brew update
|
|
||||||
|
|
||||||
- name: Build
|
|
||||||
id: cmake_build
|
|
||||||
run: |
|
|
||||||
sysctl -a
|
|
||||||
mkdir build
|
|
||||||
cd build
|
|
||||||
cmake -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON ..
|
|
||||||
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
|
|
||||||
|
|
||||||
- name: Test
|
|
||||||
id: cmake_test
|
|
||||||
run: |
|
|
||||||
cd build
|
|
||||||
ctest -L 'main|curl' --verbose --timeout 900
|
|
||||||
|
|
||||||
- name: Determine tag name
|
|
||||||
id: tag
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
|
||||||
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
|
||||||
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
|
||||||
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
|
||||||
else
|
|
||||||
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
|
||||||
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Pack artifacts
|
|
||||||
id: pack_artifacts
|
|
||||||
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
|
||||||
run: |
|
|
||||||
cp LICENSE ./build/bin/
|
|
||||||
zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/*
|
|
||||||
|
|
||||||
- name: Upload artifacts
|
|
||||||
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip
|
|
||||||
name: llama-bin-macos-arm64.zip
|
|
||||||
|
|
||||||
macOS-latest-cmake-x64:
|
|
||||||
runs-on: macos-12
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
id: checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
fetch-depth: 0
|
|
||||||
|
|
||||||
- name: Dependencies
|
|
||||||
id: depends
|
|
||||||
continue-on-error: true
|
|
||||||
run: |
|
|
||||||
brew update
|
|
||||||
|
|
||||||
- name: Build
|
|
||||||
id: cmake_build
|
|
||||||
run: |
|
|
||||||
sysctl -a
|
|
||||||
# Metal is disabled due to intermittent failures with Github runners not having a GPU:
|
|
||||||
# https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
|
|
||||||
cmake -B build -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL=OFF -DLLAMA_CURL=ON
|
|
||||||
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
|
|
||||||
|
|
||||||
- name: Test
|
|
||||||
id: cmake_test
|
|
||||||
run: |
|
|
||||||
cd build
|
|
||||||
ctest -L main --verbose --timeout 900
|
|
||||||
|
|
||||||
- name: Determine tag name
|
|
||||||
id: tag
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
|
||||||
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
|
||||||
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
|
||||||
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
|
||||||
else
|
|
||||||
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
|
||||||
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Pack artifacts
|
|
||||||
id: pack_artifacts
|
|
||||||
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
|
||||||
run: |
|
|
||||||
cp LICENSE ./build/bin/
|
|
||||||
zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/*
|
|
||||||
|
|
||||||
- name: Upload artifacts
|
|
||||||
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip
|
|
||||||
name: llama-bin-macos-x64.zip
|
|
||||||
|
|
||||||
ubuntu-focal-make:
|
|
||||||
runs-on: ubuntu-20.04
|
|
||||||
env:
|
|
||||||
LLAMA_NODE_AVAILABLE: true
|
|
||||||
LLAMA_PYTHON_AVAILABLE: true
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
id: checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Dependencies
|
|
||||||
id: depends
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install build-essential gcc-8
|
|
||||||
|
|
||||||
- uses: actions/setup-node@v4
|
|
||||||
with:
|
|
||||||
node-version: "20"
|
|
||||||
|
|
||||||
- uses: actions/setup-python@v5
|
|
||||||
with:
|
|
||||||
python-version: "3.11"
|
|
||||||
|
|
||||||
- name: Build
|
|
||||||
id: make_build
|
|
||||||
env:
|
|
||||||
LLAMA_FATAL_WARNINGS: 1
|
|
||||||
run: |
|
|
||||||
CC=gcc-8 make -j $(nproc)
|
|
||||||
|
|
||||||
- name: Test
|
|
||||||
id: make_test
|
|
||||||
run: |
|
|
||||||
CC=gcc-8 make tests -j $(nproc)
|
|
||||||
make test -j $(nproc)
|
|
||||||
|
|
||||||
ubuntu-focal-make-curl:
|
|
||||||
runs-on: ubuntu-20.04
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
id: checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Dependencies
|
|
||||||
id: depends
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install build-essential gcc-8 libcurl4-openssl-dev
|
|
||||||
|
|
||||||
- name: Build
|
|
||||||
id: make_build
|
|
||||||
env:
|
|
||||||
LLAMA_FATAL_WARNINGS: 1
|
|
||||||
LLAMA_CURL: 1
|
|
||||||
run: |
|
|
||||||
CC=gcc-8 make -j $(nproc)
|
|
||||||
|
|
||||||
ubuntu-latest-cmake:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
id: checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
fetch-depth: 0
|
|
||||||
|
|
||||||
- name: Dependencies
|
|
||||||
id: depends
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install build-essential libcurl4-openssl-dev
|
|
||||||
|
|
||||||
- name: Build
|
|
||||||
id: cmake_build
|
|
||||||
run: |
|
|
||||||
mkdir build
|
|
||||||
cd build
|
|
||||||
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON
|
|
||||||
cmake --build . --config Release -j $(nproc)
|
|
||||||
|
|
||||||
- name: Test
|
|
||||||
id: cmake_test
|
|
||||||
run: |
|
|
||||||
cd build
|
|
||||||
ctest -L 'main|curl' --verbose --timeout 900
|
|
||||||
|
|
||||||
- name: Test llama2c conversion
|
|
||||||
id: llama2c_test
|
|
||||||
run: |
|
|
||||||
cd build
|
|
||||||
echo "Fetch tokenizer"
|
|
||||||
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin
|
|
||||||
echo "Fetch llama2c model"
|
|
||||||
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin
|
|
||||||
./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf
|
|
||||||
./bin/llama-cli -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
|
|
||||||
|
|
||||||
- name: Determine tag name
|
|
||||||
id: tag
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
|
||||||
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
|
||||||
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
|
||||||
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
|
||||||
else
|
|
||||||
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
|
||||||
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Pack artifacts
|
|
||||||
id: pack_artifacts
|
|
||||||
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
|
||||||
run: |
|
|
||||||
cp LICENSE ./build/bin/
|
|
||||||
zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip ./build/bin/*
|
|
||||||
|
|
||||||
- name: Upload artifacts
|
|
||||||
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip
|
|
||||||
name: llama-bin-ubuntu-x64.zip
|
|
||||||
|
|
||||||
ubuntu-latest-cmake-sanitizer:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
continue-on-error: true
|
|
||||||
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
sanitizer: [ADDRESS, THREAD, UNDEFINED]
|
|
||||||
build_type: [Debug, Release]
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
id: checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Dependencies
|
|
||||||
id: depends
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install build-essential
|
|
||||||
|
|
||||||
- name: Build
|
|
||||||
id: cmake_build
|
|
||||||
if: ${{ matrix.sanitizer != 'THREAD' }}
|
|
||||||
run: |
|
|
||||||
mkdir build
|
|
||||||
cd build
|
|
||||||
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
|
|
||||||
cmake --build . --config ${{ matrix.build_type }} -j $(nproc)
|
|
||||||
|
|
||||||
- name: Build (no OpenMP)
|
|
||||||
id: cmake_build_no_openmp
|
|
||||||
if: ${{ matrix.sanitizer == 'THREAD' }}
|
|
||||||
run: |
|
|
||||||
mkdir build
|
|
||||||
cd build
|
|
||||||
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DLLAMA_OPENMP=OFF
|
|
||||||
cmake --build . --config ${{ matrix.build_type }} -j $(nproc)
|
|
||||||
|
|
||||||
- name: Test
|
|
||||||
id: cmake_test
|
|
||||||
run: |
|
|
||||||
cd build
|
|
||||||
ctest -L main --verbose --timeout 900
|
|
||||||
|
|
||||||
ubuntu-latest-cmake-rpc:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
continue-on-error: true
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
id: checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Dependencies
|
|
||||||
id: depends
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install build-essential
|
|
||||||
|
|
||||||
- name: Build
|
|
||||||
id: cmake_build
|
|
||||||
run: |
|
|
||||||
mkdir build
|
|
||||||
cd build
|
|
||||||
cmake -DLLAMA_RPC=ON ..
|
|
||||||
cmake --build . --config Release -j $(nproc)
|
|
||||||
|
|
||||||
- name: Test
|
|
||||||
id: cmake_test
|
|
||||||
run: |
|
|
||||||
cd build
|
|
||||||
ctest -L main --verbose
|
|
||||||
|
|
||||||
ubuntu-22-cmake-vulkan:
|
|
||||||
runs-on: ubuntu-22.04
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
id: checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Dependencies
|
|
||||||
id: depends
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install build-essential libvulkan-dev
|
|
||||||
|
|
||||||
- name: Build
|
|
||||||
id: cmake_build
|
|
||||||
run: |
|
|
||||||
mkdir build
|
|
||||||
cd build
|
|
||||||
cmake -DLLAMA_VULKAN=ON ..
|
|
||||||
cmake --build . --config Release -j $(nproc)
|
|
||||||
|
|
||||||
ubuntu-22-cmake-hip:
|
|
||||||
runs-on: ubuntu-22.04
|
|
||||||
container: rocm/dev-ubuntu-22.04:6.0.2
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
id: checkout
|
|
||||||
uses: actions/checkout@v3
|
|
||||||
|
|
||||||
- name: Dependencies
|
|
||||||
id: depends
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev
|
|
||||||
|
|
||||||
- name: Build with native CMake HIP support
|
|
||||||
id: cmake_build
|
|
||||||
run: |
|
|
||||||
cmake -B build -S . -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" -DLLAMA_HIPBLAS=ON
|
|
||||||
cmake --build build --config Release -j $(nproc)
|
|
||||||
|
|
||||||
- name: Build with legacy HIP support
|
|
||||||
id: cmake_build_legacy_hip
|
|
||||||
run: |
|
|
||||||
cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DLLAMA_HIPBLAS=ON
|
|
||||||
cmake --build build2 --config Release -j $(nproc)
|
|
||||||
|
|
||||||
ubuntu-22-cmake-sycl:
|
|
||||||
runs-on: ubuntu-22.04
|
|
||||||
|
|
||||||
continue-on-error: true
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v2
|
|
||||||
|
|
||||||
- name: add oneAPI to apt
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
cd /tmp
|
|
||||||
wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
|
||||||
sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
|
||||||
rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
|
||||||
sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
|
|
||||||
|
|
||||||
- name: install oneAPI dpcpp compiler
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
sudo apt update
|
|
||||||
sudo apt install intel-oneapi-compiler-dpcpp-cpp
|
|
||||||
|
|
||||||
- name: install oneAPI MKL library
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
sudo apt install intel-oneapi-mkl-devel
|
|
||||||
|
|
||||||
- name: Clone
|
|
||||||
id: checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Build
|
|
||||||
id: cmake_build
|
|
||||||
run: |
|
|
||||||
source /opt/intel/oneapi/setvars.sh
|
|
||||||
mkdir build
|
|
||||||
cd build
|
|
||||||
cmake -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
|
|
||||||
cmake --build . --config Release -j $(nproc)
|
|
||||||
|
|
||||||
ubuntu-22-cmake-sycl-fp16:
|
|
||||||
runs-on: ubuntu-22.04
|
|
||||||
|
|
||||||
continue-on-error: true
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v2
|
|
||||||
|
|
||||||
- name: add oneAPI to apt
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
cd /tmp
|
|
||||||
wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
|
||||||
sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
|
||||||
rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
|
||||||
sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
|
|
||||||
|
|
||||||
- name: install oneAPI dpcpp compiler
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
sudo apt update
|
|
||||||
sudo apt install intel-oneapi-compiler-dpcpp-cpp
|
|
||||||
|
|
||||||
- name: install oneAPI MKL library
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
sudo apt install intel-oneapi-mkl-devel
|
|
||||||
|
|
||||||
- name: Clone
|
|
||||||
id: checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Build
|
|
||||||
id: cmake_build
|
|
||||||
run: |
|
|
||||||
source /opt/intel/oneapi/setvars.sh
|
|
||||||
mkdir build
|
|
||||||
cd build
|
|
||||||
cmake -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON ..
|
|
||||||
cmake --build . --config Release -j $(nproc)
|
|
||||||
|
|
||||||
# TODO: build with LLAMA_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know
|
|
||||||
# how to debug it.
|
|
||||||
# ref: https://github.com/ggerganov/llama.cpp/actions/runs/7131777249/job/19420981052#step:5:1124
|
|
||||||
macOS-latest-make:
|
|
||||||
runs-on: macos-latest
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
id: checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Dependencies
|
|
||||||
id: depends
|
|
||||||
continue-on-error: true
|
|
||||||
run: |
|
|
||||||
brew update
|
|
||||||
|
|
||||||
- name: Build
|
|
||||||
id: make_build
|
|
||||||
env:
|
|
||||||
LLAMA_FATAL_WARNINGS: 1
|
|
||||||
run: |
|
|
||||||
LLAMA_NO_METAL=1 make -j $(sysctl -n hw.logicalcpu)
|
|
||||||
|
|
||||||
- name: Test
|
|
||||||
id: make_test
|
|
||||||
run: |
|
|
||||||
LLAMA_NO_METAL=1 make tests -j $(sysctl -n hw.logicalcpu)
|
|
||||||
LLAMA_NO_METAL=1 make test -j $(sysctl -n hw.logicalcpu)
|
|
||||||
|
|
||||||
# TODO: build with LLAMA_METAL=OFF because test-backend-ops fail on "Apple Paravirtual device" and I don't know
|
|
||||||
# how to debug it.
|
|
||||||
# ref: https://github.com/ggerganov/llama.cpp/actions/runs/7132125951/job/19422043567?pr=4359#step:5:6584
|
|
||||||
# would be great if we fix these
|
|
||||||
macOS-latest-cmake:
|
|
||||||
runs-on: macos-latest
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
id: checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Dependencies
|
|
||||||
id: depends
|
|
||||||
continue-on-error: true
|
|
||||||
run: |
|
|
||||||
brew update
|
|
||||||
|
|
||||||
- name: Build
|
|
||||||
id: cmake_build
|
|
||||||
run: |
|
|
||||||
sysctl -a
|
|
||||||
mkdir build
|
|
||||||
cd build
|
|
||||||
cmake -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL=OFF ..
|
|
||||||
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
|
|
||||||
|
|
||||||
- name: Test
|
|
||||||
id: cmake_test
|
|
||||||
run: |
|
|
||||||
cd build
|
|
||||||
ctest -L main --verbose --timeout 900
|
|
||||||
|
|
||||||
macOS-latest-cmake-ios:
|
|
||||||
runs-on: macos-latest
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
id: checkout
|
|
||||||
uses: actions/checkout@v1
|
|
||||||
|
|
||||||
- name: Dependencies
|
|
||||||
id: depends
|
|
||||||
continue-on-error: true
|
|
||||||
run: |
|
|
||||||
brew update
|
|
||||||
|
|
||||||
- name: Build
|
|
||||||
id: cmake_build
|
|
||||||
run: |
|
|
||||||
sysctl -a
|
|
||||||
mkdir build
|
|
||||||
cd build
|
|
||||||
cmake -G Xcode .. \
|
|
||||||
-DLLAMA_METAL_EMBED_LIBRARY=ON \
|
|
||||||
-DLLAMA_BUILD_EXAMPLES=OFF \
|
|
||||||
-DLLAMA_BUILD_TESTS=OFF \
|
|
||||||
-DLLAMA_BUILD_SERVER=OFF \
|
|
||||||
-DCMAKE_SYSTEM_NAME=iOS \
|
|
||||||
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0
|
|
||||||
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
|
|
||||||
|
|
||||||
macOS-latest-cmake-tvos:
|
|
||||||
runs-on: macos-latest
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
id: checkout
|
|
||||||
uses: actions/checkout@v1
|
|
||||||
|
|
||||||
- name: Dependencies
|
|
||||||
id: depends
|
|
||||||
continue-on-error: true
|
|
||||||
run: |
|
|
||||||
brew update
|
|
||||||
|
|
||||||
- name: Build
|
|
||||||
id: cmake_build
|
|
||||||
run: |
|
|
||||||
sysctl -a
|
|
||||||
mkdir build
|
|
||||||
cd build
|
|
||||||
cmake -G Xcode .. \
|
|
||||||
-DLLAMA_METAL_EMBED_LIBRARY=ON \
|
|
||||||
-DLLAMA_BUILD_EXAMPLES=OFF \
|
|
||||||
-DLLAMA_BUILD_TESTS=OFF \
|
|
||||||
-DLLAMA_BUILD_SERVER=OFF \
|
|
||||||
-DCMAKE_SYSTEM_NAME=tvOS \
|
|
||||||
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0
|
|
||||||
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
|
|
||||||
|
|
||||||
macOS-latest-swift:
|
|
||||||
runs-on: macos-latest
|
|
||||||
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
id: checkout
|
|
||||||
uses: actions/checkout@v1
|
|
||||||
|
|
||||||
- name: Dependencies
|
|
||||||
id: depends
|
|
||||||
continue-on-error: true
|
|
||||||
run: |
|
|
||||||
brew update
|
|
||||||
|
|
||||||
- name: xcodebuild for swift package
|
|
||||||
id: xcodebuild
|
|
||||||
run: |
|
|
||||||
xcodebuild -scheme llama -destination "${{ matrix.destination }}"
|
|
||||||
|
|
||||||
- name: Build Swift Example
|
|
||||||
id: make_build_swift_example
|
|
||||||
run: |
|
|
||||||
make swift
|
|
||||||
|
|
||||||
windows-msys2:
|
|
||||||
runs-on: windows-latest
|
|
||||||
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
include:
|
|
||||||
- { sys: UCRT64, env: ucrt-x86_64, build: Release }
|
|
||||||
- { sys: CLANG64, env: clang-x86_64, build: Release }
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Setup ${{ matrix.sys }}
|
|
||||||
uses: msys2/setup-msys2@v2
|
|
||||||
with:
|
|
||||||
update: true
|
|
||||||
msystem: ${{matrix.sys}}
|
|
||||||
install: >-
|
|
||||||
base-devel
|
|
||||||
mingw-w64-${{matrix.env}}-toolchain
|
|
||||||
mingw-w64-${{matrix.env}}-cmake
|
|
||||||
mingw-w64-${{matrix.env}}-openblas
|
|
||||||
|
|
||||||
- name: Build using make
|
|
||||||
shell: msys2 {0}
|
|
||||||
run: |
|
|
||||||
make -j $(nproc)
|
|
||||||
|
|
||||||
- name: Clean after building using make
|
|
||||||
shell: msys2 {0}
|
|
||||||
run: |
|
|
||||||
make clean
|
|
||||||
|
|
||||||
- name: Build using make w/ OpenBLAS
|
|
||||||
shell: msys2 {0}
|
|
||||||
run: |
|
|
||||||
make LLAMA_OPENBLAS=1 -j $(nproc)
|
|
||||||
|
|
||||||
- name: Build using CMake
|
|
||||||
shell: msys2 {0}
|
|
||||||
run: |
|
|
||||||
cmake -B build
|
|
||||||
cmake --build build --config ${{ matrix.build }} -j $(nproc)
|
|
||||||
|
|
||||||
- name: Clean after building using CMake
|
|
||||||
shell: msys2 {0}
|
|
||||||
run: |
|
|
||||||
rm -rf build
|
|
||||||
|
|
||||||
- name: Build using CMake w/ OpenBLAS
|
|
||||||
shell: msys2 {0}
|
|
||||||
run: |
|
|
||||||
cmake -B build -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
|
|
||||||
cmake --build build --config ${{ matrix.build }} -j $(nproc)
|
|
||||||
|
|
||||||
windows-latest-cmake:
|
windows-latest-cmake:
|
||||||
runs-on: windows-2019
|
runs-on: windows-2019
|
||||||
|
|
||||||
|
@ -990,40 +334,6 @@ jobs:
|
||||||
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DLLAMA_HIPBLAS=ON
|
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DLLAMA_HIPBLAS=ON
|
||||||
cmake --build build --config Release
|
cmake --build build --config Release
|
||||||
|
|
||||||
ios-xcode-build:
|
|
||||||
runs-on: macos-latest
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout code
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Build Xcode project
|
|
||||||
run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build
|
|
||||||
|
|
||||||
android-build:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Set up JDK
|
|
||||||
uses: actions/setup-java@v3
|
|
||||||
with:
|
|
||||||
java-version: 17
|
|
||||||
distribution: zulu
|
|
||||||
|
|
||||||
- name: Setup Android SDK
|
|
||||||
uses: android-actions/setup-android@v3
|
|
||||||
with:
|
|
||||||
log-accepted-android-sdk-licenses: false
|
|
||||||
|
|
||||||
- name: Build
|
|
||||||
run: |
|
|
||||||
cd examples/llama.android
|
|
||||||
|
|
||||||
./gradlew build --no-daemon
|
|
||||||
|
|
||||||
# freeBSD-latest:
|
# freeBSD-latest:
|
||||||
# runs-on: macos-12
|
# runs-on: macos-12
|
||||||
# steps:
|
# steps:
|
||||||
|
@ -1047,14 +357,8 @@ jobs:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
needs:
|
needs:
|
||||||
- ubuntu-focal-make
|
|
||||||
- ubuntu-latest-cmake
|
|
||||||
- macOS-latest-make
|
|
||||||
- macOS-latest-cmake
|
|
||||||
- windows-latest-cmake
|
- windows-latest-cmake
|
||||||
- windows-latest-cmake-cuda
|
- windows-latest-cmake-cuda
|
||||||
- macOS-latest-cmake-arm64
|
|
||||||
- macOS-latest-cmake-x64
|
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
|
|
23
.github/workflows/close-issue.yml
vendored
23
.github/workflows/close-issue.yml
vendored
|
@ -1,23 +0,0 @@
|
||||||
name: Close inactive issues
|
|
||||||
on:
|
|
||||||
schedule:
|
|
||||||
- cron: "42 0 * * *"
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
close-issues:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
permissions:
|
|
||||||
issues: write
|
|
||||||
pull-requests: write
|
|
||||||
steps:
|
|
||||||
- uses: actions/stale@v5
|
|
||||||
with:
|
|
||||||
exempt-issue-labels: "refactor,help wanted,good first issue,research,bug"
|
|
||||||
days-before-issue-stale: 30
|
|
||||||
days-before-issue-close: 14
|
|
||||||
stale-issue-label: "stale"
|
|
||||||
close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale."
|
|
||||||
days-before-pr-stale: -1
|
|
||||||
days-before-pr-close: -1
|
|
||||||
operations-per-run: 10000
|
|
||||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
|
40
.github/workflows/code-coverage.yml
vendored
40
.github/workflows/code-coverage.yml
vendored
|
@ -1,40 +0,0 @@
|
||||||
name: Code Coverage
|
|
||||||
on: [push, pull_request]
|
|
||||||
|
|
||||||
env:
|
|
||||||
GGML_NLOOP: 3
|
|
||||||
GGML_N_THREADS: 1
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
run:
|
|
||||||
runs-on: ubuntu-20.04
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Dependencies
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install build-essential gcc-8 lcov
|
|
||||||
|
|
||||||
- name: Build
|
|
||||||
run: CC=gcc-8 make -j LLAMA_CODE_COVERAGE=1 tests
|
|
||||||
|
|
||||||
- name: Run tests
|
|
||||||
run: CC=gcc-8 make test
|
|
||||||
|
|
||||||
- name: Generate coverage report
|
|
||||||
run: |
|
|
||||||
make coverage
|
|
||||||
make lcov-report
|
|
||||||
|
|
||||||
- name: Upload coverage to Codecov
|
|
||||||
uses: codecov/codecov-action@v3
|
|
||||||
env:
|
|
||||||
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
|
||||||
with:
|
|
||||||
files: lcov-report/coverage.info
|
|
117
.github/workflows/docker.yml
vendored
117
.github/workflows/docker.yml
vendored
|
@ -1,117 +0,0 @@
|
||||||
# This workflow uses actions that are not certified by GitHub.
|
|
||||||
# They are provided by a third-party and are governed by
|
|
||||||
# separate terms of service, privacy policy, and support
|
|
||||||
# documentation.
|
|
||||||
|
|
||||||
# GitHub recommends pinning actions to a commit SHA.
|
|
||||||
# To get a newer version, you will need to update the SHA.
|
|
||||||
# You can also reference a tag or branch, but the action may change without warning.
|
|
||||||
|
|
||||||
name: Publish Docker image
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
push_to_registry:
|
|
||||||
name: Push Docker image to Docker Hub
|
|
||||||
if: github.event.pull_request.draft == false
|
|
||||||
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
env:
|
|
||||||
COMMIT_SHA: ${{ github.sha }}
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
config:
|
|
||||||
- { tag: "light", dockerfile: ".devops/llama-cli.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
|
||||||
- { tag: "server", dockerfile: ".devops/llama-server.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
|
||||||
- { tag: "full", dockerfile: ".devops/full.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
|
||||||
# NOTE(canardletter): The CUDA builds on arm64 are very slow, so I
|
|
||||||
# have disabled them for now until the reason why
|
|
||||||
# is understood.
|
|
||||||
- { tag: "light-cuda", dockerfile: ".devops/llama-cli-cuda.Dockerfile", platforms: "linux/amd64" }
|
|
||||||
- { tag: "server-cuda", dockerfile: ".devops/llama-server-cuda.Dockerfile", platforms: "linux/amd64" }
|
|
||||||
- { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" }
|
|
||||||
- { tag: "light-rocm", dockerfile: ".devops/llama-cli-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
|
||||||
- { tag: "server-rocm", dockerfile: ".devops/llama-server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
|
||||||
- { tag: "full-rocm", dockerfile: ".devops/full-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
|
||||||
- { tag: "light-intel", dockerfile: ".devops/llama-cli-intel.Dockerfile", platforms: "linux/amd64" }
|
|
||||||
- { tag: "server-intel", dockerfile: ".devops/llama-server-intel.Dockerfile", platforms: "linux/amd64" }
|
|
||||||
steps:
|
|
||||||
- name: Check out the repo
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Set up QEMU
|
|
||||||
uses: docker/setup-qemu-action@v2
|
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
|
||||||
uses: docker/setup-buildx-action@v2
|
|
||||||
|
|
||||||
- name: Log in to Docker Hub
|
|
||||||
uses: docker/login-action@v2
|
|
||||||
with:
|
|
||||||
registry: ghcr.io
|
|
||||||
username: ${{ github.repository_owner }}
|
|
||||||
password: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
|
|
||||||
# https://github.com/jlumbroso/free-disk-space/tree/54081f138730dfa15788a46383842cd2f914a1be#example
|
|
||||||
- name: Free Disk Space (Ubuntu)
|
|
||||||
uses: jlumbroso/free-disk-space@main
|
|
||||||
with:
|
|
||||||
# this might remove tools that are actually needed,
|
|
||||||
# if set to "true" but frees about 6 GB
|
|
||||||
tool-cache: false
|
|
||||||
|
|
||||||
# all of these default to true, but feel free to set to
|
|
||||||
# "false" if necessary for your workflow
|
|
||||||
android: true
|
|
||||||
dotnet: true
|
|
||||||
haskell: true
|
|
||||||
large-packages: true
|
|
||||||
docker-images: true
|
|
||||||
swap-storage: true
|
|
||||||
|
|
||||||
- name: Determine tag name
|
|
||||||
id: tag
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
|
||||||
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
|
||||||
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
|
||||||
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
|
||||||
else
|
|
||||||
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
|
||||||
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Downcase github.repository_owner
|
|
||||||
run: |
|
|
||||||
echo "repository_owner_lowercase=${GITHUB_REPOSITORY_OWNER@L}" >> $GITHUB_ENV
|
|
||||||
env:
|
|
||||||
GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
|
|
||||||
|
|
||||||
- name: Build and push Docker image (versioned)
|
|
||||||
if: github.event_name == 'push'
|
|
||||||
uses: docker/build-push-action@v4
|
|
||||||
with:
|
|
||||||
context: .
|
|
||||||
push: true
|
|
||||||
platforms: ${{ matrix.config.platforms }}
|
|
||||||
tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
|
|
||||||
file: ${{ matrix.config.dockerfile }}
|
|
||||||
|
|
||||||
- name: Build and push Docker image (tagged)
|
|
||||||
uses: docker/build-push-action@v4
|
|
||||||
with:
|
|
||||||
context: .
|
|
||||||
push: ${{ github.event_name == 'push' }}
|
|
||||||
platforms: ${{ matrix.config.platforms }}
|
|
||||||
tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }},ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ steps.tag.outputs.name }}"
|
|
||||||
file: ${{ matrix.config.dockerfile }}
|
|
27
.github/workflows/editorconfig.yml
vendored
27
.github/workflows/editorconfig.yml
vendored
|
@ -1,27 +0,0 @@
|
||||||
name: EditorConfig Checker
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_dispatch: # allows manual triggering
|
|
||||||
inputs:
|
|
||||||
create_release:
|
|
||||||
description: 'Create new release'
|
|
||||||
required: true
|
|
||||||
type: boolean
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
pull_request:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
editorconfig:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
- uses: editorconfig-checker/action-editorconfig-checker@main
|
|
||||||
- run: editorconfig-checker
|
|
44
.github/workflows/gguf-publish.yml
vendored
44
.github/workflows/gguf-publish.yml
vendored
|
@ -1,44 +0,0 @@
|
||||||
# This workflow will upload a Python Package using Twine when a GGUF release is created
|
|
||||||
# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
|
|
||||||
|
|
||||||
# See `gguf-py/README.md` for how to make a release.
|
|
||||||
|
|
||||||
# This workflow uses actions that are not certified by GitHub.
|
|
||||||
# They are provided by a third-party and are governed by
|
|
||||||
# separate terms of service, privacy policy, and support
|
|
||||||
# documentation.
|
|
||||||
|
|
||||||
name: Upload Python Package
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_dispatch:
|
|
||||||
push:
|
|
||||||
# Pattern matched against refs/tags
|
|
||||||
tags:
|
|
||||||
- 'gguf-v*' # Push events to every version tag
|
|
||||||
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
deploy:
|
|
||||||
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
- name: Set up Python
|
|
||||||
uses: actions/setup-python@v5
|
|
||||||
with:
|
|
||||||
python-version: '3.9.x'
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
cd gguf-py
|
|
||||||
python -m pip install poetry
|
|
||||||
poetry install
|
|
||||||
|
|
||||||
- name: Build package
|
|
||||||
run: cd gguf-py && poetry build
|
|
||||||
- name: Publish package
|
|
||||||
uses: pypa/gh-action-pypi-publish@release/v1
|
|
||||||
with:
|
|
||||||
password: ${{ secrets.PYPI_API_TOKEN }}
|
|
||||||
packages-dir: gguf-py/dist
|
|
17
.github/workflows/labeler.yml
vendored
17
.github/workflows/labeler.yml
vendored
|
@ -1,17 +0,0 @@
|
||||||
name: "Pull Request Labeler"
|
|
||||||
on:
|
|
||||||
- pull_request_target
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
labeler:
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
pull-requests: write
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
repository: "ggerganov/llama.cpp"
|
|
||||||
- uses: actions/labeler@v5
|
|
||||||
with:
|
|
||||||
configuration-path: '.github/labeler.yml'
|
|
65
.github/workflows/nix-ci-aarch64.yml
vendored
65
.github/workflows/nix-ci-aarch64.yml
vendored
|
@ -1,65 +0,0 @@
|
||||||
name: Nix aarch64 builds
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_dispatch: # allows manual triggering
|
|
||||||
schedule:
|
|
||||||
# Rebuild daily rather than on every push because QEMU is expensive (e.g.
|
|
||||||
# 1.5h instead of minutes with the cold cache).
|
|
||||||
#
|
|
||||||
# randint(0, 59), randint(0, 23)
|
|
||||||
- cron: '26 12 * * *'
|
|
||||||
# But also rebuild if we touched any of the Nix expressions:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
paths: ['**/*.nix', 'flake.lock']
|
|
||||||
pull_request:
|
|
||||||
types: [opened, synchronize, reopened]
|
|
||||||
paths: ['**/*.nix', 'flake.lock']
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
nix-build-aarch64:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
- name: Install QEMU
|
|
||||||
# Copy-paste from https://github.com/orgs/community/discussions/8305#discussioncomment-5888654
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get install -y qemu-user-static qemu-system-aarch64
|
|
||||||
sudo usermod -a -G kvm $USER
|
|
||||||
- name: Install Nix
|
|
||||||
uses: DeterminateSystems/nix-installer-action@v9
|
|
||||||
with:
|
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
extra-conf: |
|
|
||||||
extra-platforms = aarch64-linux
|
|
||||||
extra-system-features = nixos-test kvm
|
|
||||||
extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
|
|
||||||
extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
|
|
||||||
- uses: DeterminateSystems/magic-nix-cache-action@v2
|
|
||||||
with:
|
|
||||||
upstream-cache: https://${{ matrix.cachixName }}.cachix.org
|
|
||||||
- name: Set-up cachix to push the results to
|
|
||||||
uses: cachix/cachix-action@v13
|
|
||||||
with:
|
|
||||||
authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
|
|
||||||
name: llama-cpp
|
|
||||||
- name: Show all output paths
|
|
||||||
run: >
|
|
||||||
nix run github:nix-community/nix-eval-jobs
|
|
||||||
-- --gc-roots-dir gcroot
|
|
||||||
--flake
|
|
||||||
".#packages.aarch64-linux"
|
|
||||||
- name: Build
|
|
||||||
run: >
|
|
||||||
nix run github:Mic92/nix-fast-build
|
|
||||||
-- --skip-cached --no-nom
|
|
||||||
--systems aarch64-linux
|
|
||||||
--flake
|
|
||||||
".#checks.aarch64-linux"
|
|
72
.github/workflows/nix-ci.yml
vendored
72
.github/workflows/nix-ci.yml
vendored
|
@ -1,72 +0,0 @@
|
||||||
name: Nix CI
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_dispatch: # allows manual triggering
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
pull_request:
|
|
||||||
types: [opened, synchronize, reopened]
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
nix-eval:
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
os: [ ubuntu-latest, macos-latest ]
|
|
||||||
runs-on: ${{ matrix.os }}
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
- name: Install Nix
|
|
||||||
uses: DeterminateSystems/nix-installer-action@v9
|
|
||||||
with:
|
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
extra-conf: |
|
|
||||||
extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
|
|
||||||
extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
|
|
||||||
- uses: DeterminateSystems/magic-nix-cache-action@v2
|
|
||||||
with:
|
|
||||||
upstream-cache: https://${{ matrix.cachixName }}.cachix.org
|
|
||||||
- name: List all flake outputs
|
|
||||||
run: nix flake show --all-systems
|
|
||||||
- name: Show all output paths
|
|
||||||
run: >
|
|
||||||
nix run github:nix-community/nix-eval-jobs
|
|
||||||
-- --gc-roots-dir gcroot
|
|
||||||
--flake
|
|
||||||
".#packages.$(nix eval --raw --impure --expr builtins.currentSystem)"
|
|
||||||
nix-build:
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
os: [ ubuntu-latest, macos-latest ]
|
|
||||||
runs-on: ${{ matrix.os }}
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
- name: Install Nix
|
|
||||||
uses: DeterminateSystems/nix-installer-action@v9
|
|
||||||
with:
|
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
extra-conf: |
|
|
||||||
extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
|
|
||||||
extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
|
|
||||||
- uses: DeterminateSystems/magic-nix-cache-action@v2
|
|
||||||
with:
|
|
||||||
upstream-cache: https://${{ matrix.cachixName }}.cachix.org
|
|
||||||
- name: Set-up cachix to push the results to
|
|
||||||
uses: cachix/cachix-action@v13
|
|
||||||
with:
|
|
||||||
authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
|
|
||||||
name: llama-cpp
|
|
||||||
- name: Build
|
|
||||||
run: >
|
|
||||||
nix run github:Mic92/nix-fast-build
|
|
||||||
-- --skip-cached --no-nom
|
|
||||||
--flake
|
|
||||||
".#checks.$(nix eval --raw --impure --expr builtins.currentSystem)"
|
|
22
.github/workflows/nix-flake-update.yml
vendored
22
.github/workflows/nix-flake-update.yml
vendored
|
@ -1,22 +0,0 @@
|
||||||
name: update-flake-lock
|
|
||||||
on:
|
|
||||||
workflow_dispatch:
|
|
||||||
schedule:
|
|
||||||
- cron: '0 0 * * 0' # runs weekly on Sunday at 00:00
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
lockfile:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
- name: Install Nix
|
|
||||||
uses: DeterminateSystems/nix-installer-action@main
|
|
||||||
- name: Update flake.lock
|
|
||||||
uses: DeterminateSystems/update-flake-lock@main
|
|
||||||
with:
|
|
||||||
pr-title: "nix: update flake.lock"
|
|
||||||
pr-labels: |
|
|
||||||
nix
|
|
||||||
pr-reviewers: philiptaron,SomeoneSerge
|
|
||||||
token: ${{ secrets.FLAKE_TOKEN }}
|
|
36
.github/workflows/nix-publish-flake.yml
vendored
36
.github/workflows/nix-publish-flake.yml
vendored
|
@ -1,36 +0,0 @@
|
||||||
# Make the flake discoverable on https://flakestry.dev and https://flakehub.com/flakes
|
|
||||||
name: "Publish a flake to flakestry & flakehub"
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
tags:
|
|
||||||
- "*"
|
|
||||||
workflow_dispatch:
|
|
||||||
inputs:
|
|
||||||
tag:
|
|
||||||
description: "The existing tag to publish"
|
|
||||||
type: "string"
|
|
||||||
required: true
|
|
||||||
jobs:
|
|
||||||
flakestry-publish:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
permissions:
|
|
||||||
id-token: "write"
|
|
||||||
contents: "read"
|
|
||||||
steps:
|
|
||||||
- uses: flakestry/flakestry-publish@main
|
|
||||||
with:
|
|
||||||
version: "${{ inputs.tag || github.ref_name }}"
|
|
||||||
flakehub-publish:
|
|
||||||
runs-on: "ubuntu-latest"
|
|
||||||
permissions:
|
|
||||||
id-token: "write"
|
|
||||||
contents: "read"
|
|
||||||
steps:
|
|
||||||
- uses: "actions/checkout@v4"
|
|
||||||
with:
|
|
||||||
ref: "${{ (inputs.tag != null) && format('refs/tags/{0}', inputs.tag) || '' }}"
|
|
||||||
- uses: "DeterminateSystems/nix-installer-action@main"
|
|
||||||
- uses: "DeterminateSystems/flakehub-push@main"
|
|
||||||
with:
|
|
||||||
visibility: "public"
|
|
||||||
tag: "${{ inputs.tag }}"
|
|
35
.github/workflows/python-check-requirements.yml
vendored
35
.github/workflows/python-check-requirements.yml
vendored
|
@ -1,35 +0,0 @@
|
||||||
name: Python check requirements.txt
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
paths:
|
|
||||||
- '.github/workflows/python-check-requirements.yml'
|
|
||||||
- 'scripts/check-requirements.sh'
|
|
||||||
- 'convert*.py'
|
|
||||||
- 'requirements.txt'
|
|
||||||
- 'requirements/*.txt'
|
|
||||||
pull_request:
|
|
||||||
paths:
|
|
||||||
- '.github/workflows/python-check-requirements.yml'
|
|
||||||
- 'scripts/check-requirements.sh'
|
|
||||||
- 'convert*.py'
|
|
||||||
- 'requirements.txt'
|
|
||||||
- 'requirements/*.txt'
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
python-check-requirements:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
name: check-requirements
|
|
||||||
steps:
|
|
||||||
- name: Check out source repository
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
- name: Set up Python environment
|
|
||||||
uses: actions/setup-python@v5
|
|
||||||
with:
|
|
||||||
python-version: "3.11"
|
|
||||||
- name: Run check-requirements.sh script
|
|
||||||
run: bash scripts/check-requirements.sh
|
|
23
.github/workflows/python-lint.yml
vendored
23
.github/workflows/python-lint.yml
vendored
|
@ -1,23 +0,0 @@
|
||||||
name: flake8 Lint
|
|
||||||
|
|
||||||
on: [push, pull_request]
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
flake8-lint:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
name: Lint
|
|
||||||
steps:
|
|
||||||
- name: Check out source repository
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
- name: Set up Python environment
|
|
||||||
uses: actions/setup-python@v5
|
|
||||||
with:
|
|
||||||
python-version: "3.11"
|
|
||||||
- name: flake8 Lint
|
|
||||||
uses: py-actions/flake8@v2
|
|
||||||
with:
|
|
||||||
plugins: "flake8-no-print"
|
|
169
.github/workflows/server.yml
vendored
169
.github/workflows/server.yml
vendored
|
@ -1,169 +0,0 @@
|
||||||
# Server build and tests
|
|
||||||
name: Server
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_dispatch: # allows manual triggering
|
|
||||||
inputs:
|
|
||||||
sha:
|
|
||||||
description: 'Commit SHA1 to build'
|
|
||||||
required: false
|
|
||||||
type: string
|
|
||||||
slow_tests:
|
|
||||||
description: 'Run slow tests'
|
|
||||||
required: true
|
|
||||||
type: boolean
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
|
|
||||||
pull_request:
|
|
||||||
types: [opened, synchronize, reopened]
|
|
||||||
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
server:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
sanitizer: [ADDRESS, THREAD, UNDEFINED]
|
|
||||||
build_type: [RelWithDebInfo]
|
|
||||||
include:
|
|
||||||
- build_type: Release
|
|
||||||
sanitizer: ""
|
|
||||||
fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Dependencies
|
|
||||||
id: depends
|
|
||||||
run: |
|
|
||||||
sudo apt-get update
|
|
||||||
sudo apt-get -y install \
|
|
||||||
build-essential \
|
|
||||||
xxd \
|
|
||||||
git \
|
|
||||||
cmake \
|
|
||||||
curl \
|
|
||||||
wget \
|
|
||||||
language-pack-en \
|
|
||||||
libcurl4-openssl-dev
|
|
||||||
|
|
||||||
- name: Clone
|
|
||||||
id: checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
fetch-depth: 0
|
|
||||||
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
|
|
||||||
|
|
||||||
- name: Python setup
|
|
||||||
id: setup_python
|
|
||||||
uses: actions/setup-python@v5
|
|
||||||
with:
|
|
||||||
python-version: '3.11'
|
|
||||||
|
|
||||||
- name: Tests dependencies
|
|
||||||
id: test_dependencies
|
|
||||||
run: |
|
|
||||||
pip install -r examples/server/tests/requirements.txt
|
|
||||||
|
|
||||||
- name: Verify server deps
|
|
||||||
id: verify_server_deps
|
|
||||||
run: |
|
|
||||||
git config --global --add safe.directory $(realpath .)
|
|
||||||
cd examples/server
|
|
||||||
git ls-files --others --modified
|
|
||||||
git status
|
|
||||||
./deps.sh
|
|
||||||
git status
|
|
||||||
not_ignored_files="$(git ls-files --others --modified)"
|
|
||||||
echo "Modified files: ${not_ignored_files}"
|
|
||||||
if [ -n "${not_ignored_files}" ]; then
|
|
||||||
echo "Repository is dirty or server deps are not built as expected"
|
|
||||||
echo "${not_ignored_files}"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Build
|
|
||||||
id: cmake_build
|
|
||||||
run: |
|
|
||||||
cmake -B build \
|
|
||||||
-DLLAMA_NATIVE=OFF \
|
|
||||||
-DLLAMA_BUILD_SERVER=ON \
|
|
||||||
-DLLAMA_CURL=ON \
|
|
||||||
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
|
|
||||||
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
|
|
||||||
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
|
||||||
|
|
||||||
- name: Tests
|
|
||||||
id: server_integration_tests
|
|
||||||
run: |
|
|
||||||
cd examples/server/tests
|
|
||||||
PORT=8888 ./tests.sh
|
|
||||||
|
|
||||||
- name: Slow tests
|
|
||||||
id: server_integration_tests_slow
|
|
||||||
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
|
||||||
run: |
|
|
||||||
cd examples/server/tests
|
|
||||||
PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow
|
|
||||||
|
|
||||||
|
|
||||||
server-windows:
|
|
||||||
runs-on: windows-2019
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Clone
|
|
||||||
id: checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
fetch-depth: 0
|
|
||||||
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
|
|
||||||
|
|
||||||
- name: libCURL
|
|
||||||
id: get_libcurl
|
|
||||||
env:
|
|
||||||
CURL_VERSION: 8.6.0_6
|
|
||||||
run: |
|
|
||||||
curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip"
|
|
||||||
mkdir $env:RUNNER_TEMP/libcurl
|
|
||||||
tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl
|
|
||||||
|
|
||||||
- name: Build
|
|
||||||
id: cmake_build
|
|
||||||
run: |
|
|
||||||
cmake -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
|
|
||||||
cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target llama-server
|
|
||||||
|
|
||||||
- name: Python setup
|
|
||||||
id: setup_python
|
|
||||||
uses: actions/setup-python@v5
|
|
||||||
with:
|
|
||||||
python-version: '3.11'
|
|
||||||
|
|
||||||
- name: Tests dependencies
|
|
||||||
id: test_dependencies
|
|
||||||
run: |
|
|
||||||
pip install -r examples/server/tests/requirements.txt
|
|
||||||
|
|
||||||
- name: Copy Libcurl
|
|
||||||
id: prepare_libcurl
|
|
||||||
run: |
|
|
||||||
cp $env:RUNNER_TEMP/libcurl/bin/libcurl-x64.dll ./build/bin/Release/libcurl-x64.dll
|
|
||||||
|
|
||||||
- name: Tests
|
|
||||||
id: server_integration_tests
|
|
||||||
if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
|
|
||||||
run: |
|
|
||||||
cd examples/server/tests
|
|
||||||
behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
|
|
||||||
|
|
||||||
- name: Slow tests
|
|
||||||
id: server_integration_tests_slow
|
|
||||||
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
|
||||||
run: |
|
|
||||||
cd examples/server/tests
|
|
||||||
behave.exe --stop --no-skipped --no-capture --tags slow
|
|
|
@ -4,6 +4,7 @@ option(LLAMA_SERVER_SSL "Build SSL support for the server" OFF)
|
||||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
|
||||||
set(TARGET_SRCS
|
set(TARGET_SRCS
|
||||||
server.cpp
|
server.cpp
|
||||||
|
stoplist.cpp
|
||||||
utils.hpp
|
utils.hpp
|
||||||
httplib.h
|
httplib.h
|
||||||
)
|
)
|
||||||
|
|
10
examples/server/stoplist.cpp
Normal file
10
examples/server/stoplist.cpp
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
#include "utils.hpp"
|
||||||
|
|
||||||
|
std::set<const char *> SWordsFilter::stoplist = {
|
||||||
|
"<|endoftext|>",
|
||||||
|
"<|im_end|>",
|
||||||
|
"<|startoftext|>",
|
||||||
|
"<|im_start|>"
|
||||||
|
};
|
||||||
|
|
||||||
|
SWordsFilter stopped_filter;
|
|
@ -8,9 +8,11 @@
|
||||||
#include "json.hpp"
|
#include "json.hpp"
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <cstdlib>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <random>
|
#include <random>
|
||||||
|
#include <set>
|
||||||
|
|
||||||
#define DEFAULT_OAICOMPAT_MODEL "gpt-3.5-turbo-0613"
|
#define DEFAULT_OAICOMPAT_MODEL "gpt-3.5-turbo-0613"
|
||||||
|
|
||||||
|
@ -430,7 +432,107 @@ static json oaicompat_completion_params_parse(
|
||||||
return llama_params;
|
return llama_params;
|
||||||
}
|
}
|
||||||
|
|
||||||
static json format_final_response_oaicompat(const json & request, json result, const std::string & completion_id, bool streaming = false) {
|
|
||||||
|
class SWordsFilter {
|
||||||
|
std::map<std::string, std::string> scache;
|
||||||
|
static std::set<const char * > stoplist;
|
||||||
|
static size_t strcmpn(const char * a, const char * b, bool & nostop) {
|
||||||
|
nostop = false;
|
||||||
|
int k = 0;
|
||||||
|
while(*b){
|
||||||
|
if(*a){
|
||||||
|
if(*a == *b){
|
||||||
|
k++;
|
||||||
|
a++;
|
||||||
|
nostop = false;
|
||||||
|
}else{
|
||||||
|
nostop = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
b++;
|
||||||
|
}
|
||||||
|
return k;
|
||||||
|
}
|
||||||
|
static std::string replace_all(
|
||||||
|
const std::string & content, const std::string & from, const std::string & to
|
||||||
|
){
|
||||||
|
std::string ret;
|
||||||
|
size_t pos = 0;
|
||||||
|
size_t last = 0;
|
||||||
|
while((pos = content.find(from, last)) != std::string::npos){
|
||||||
|
ret += content.substr(last, pos - last);
|
||||||
|
ret += to;
|
||||||
|
last = pos + from.size();
|
||||||
|
}
|
||||||
|
ret += content.substr(last);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
public:
|
||||||
|
static void yx_simle_filter_init(){
|
||||||
|
char * fname;
|
||||||
|
fname = getenv("LLAMA_CPP_SERVER_STOPWORDS");
|
||||||
|
do{
|
||||||
|
if(fname != NULL){
|
||||||
|
FILE * f = fopen(fname, "r");
|
||||||
|
if(f == NULL){
|
||||||
|
LOG_WARNING("failed to open stopword file", {{"file", fname}});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
char buf[1024];
|
||||||
|
while(fgets(buf, 1024, f)){
|
||||||
|
buf[strlen(buf)-1] = 0;
|
||||||
|
stoplist.insert(strdup(buf));
|
||||||
|
}
|
||||||
|
fclose(f);
|
||||||
|
}
|
||||||
|
}while(false);
|
||||||
|
LOG_INFO("initialized stopwords filter module by Y.X.",
|
||||||
|
{{"stoplist_size", stoplist.size()},
|
||||||
|
{"file", fname == NULL ? "default" : fname},}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
void yx_simple_filter(std::string & content, const std::string & uid){
|
||||||
|
if(content.size()==0 || stoplist.size()==0){
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if(scache.find(uid) != scache.end()){
|
||||||
|
content = scache[uid] + content;
|
||||||
|
scache[uid]="";
|
||||||
|
}
|
||||||
|
bool cache = false;
|
||||||
|
bool g_nostop = true;
|
||||||
|
size_t max_allow = 0x7fffffff;
|
||||||
|
for(const auto * s: stoplist){
|
||||||
|
const char * cont = content.c_str();
|
||||||
|
if(strstr(cont, s)){
|
||||||
|
content = replace_all(content, s, "");
|
||||||
|
LOG_INFO("hit stopword", {{"stopword", s}});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(const auto * s: stoplist){
|
||||||
|
bool nostop;
|
||||||
|
const char * cont = content.c_str();
|
||||||
|
auto k = strcmpn(s, cont, nostop);
|
||||||
|
if(k > 0){
|
||||||
|
g_nostop = g_nostop && nostop;
|
||||||
|
cache = true;
|
||||||
|
}
|
||||||
|
max_allow = std::min(max_allow, strlen(cont) - k);
|
||||||
|
}
|
||||||
|
if(cache && !g_nostop){
|
||||||
|
scache[uid] = content.substr(max_allow);
|
||||||
|
content = content.substr(0, max_allow);
|
||||||
|
const char * ctx2 = scache[uid].c_str();
|
||||||
|
LOG_INFO("cache stopword", {{"content", ctx2}});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
SWordsFilter(){
|
||||||
|
yx_simle_filter_init();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
extern SWordsFilter stopped_filter;
|
||||||
|
|
||||||
|
static json format_final_response_oaicompat(const json & request, const json & result, const std::string & completion_id, bool streaming = false) {
|
||||||
bool stopped_word = result.count("stopped_word") != 0;
|
bool stopped_word = result.count("stopped_word") != 0;
|
||||||
bool stopped_eos = json_value(result, "stopped_eos", false);
|
bool stopped_eos = json_value(result, "stopped_eos", false);
|
||||||
int num_tokens_predicted = json_value(result, "tokens_predicted", 0);
|
int num_tokens_predicted = json_value(result, "tokens_predicted", 0);
|
||||||
|
@ -441,6 +543,8 @@ static json format_final_response_oaicompat(const json & request, json result, c
|
||||||
if (stopped_word || stopped_eos) {
|
if (stopped_word || stopped_eos) {
|
||||||
finish_reason = "stop";
|
finish_reason = "stop";
|
||||||
}
|
}
|
||||||
|
// Add stopwords filter
|
||||||
|
stopped_filter.yx_simple_filter(content, completion_id);
|
||||||
|
|
||||||
json choices =
|
json choices =
|
||||||
streaming ? json::array({json{{"finish_reason", finish_reason},
|
streaming ? json::array({json{{"finish_reason", finish_reason},
|
||||||
|
@ -479,7 +583,7 @@ static json format_final_response_oaicompat(const json & request, json result, c
|
||||||
}
|
}
|
||||||
|
|
||||||
// return value is vector as there is one case where we might need to generate two responses
|
// return value is vector as there is one case where we might need to generate two responses
|
||||||
static std::vector<json> format_partial_response_oaicompat(json result, const std::string & completion_id) {
|
static std::vector<json> format_partial_response_oaicompat(const json & result, const std::string & completion_id) {
|
||||||
if (!result.contains("model") || !result.contains("oaicompat_token_ctr")) {
|
if (!result.contains("model") || !result.contains("oaicompat_token_ctr")) {
|
||||||
return std::vector<json>({result});
|
return std::vector<json>({result});
|
||||||
}
|
}
|
||||||
|
@ -500,6 +604,9 @@ static std::vector<json> format_partial_response_oaicompat(json result, const st
|
||||||
finish_reason = "length";
|
finish_reason = "length";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add stopwords filter
|
||||||
|
stopped_filter.yx_simple_filter(content, completion_id);
|
||||||
|
|
||||||
std::time_t t = std::time(0);
|
std::time_t t = std::time(0);
|
||||||
|
|
||||||
json choices;
|
json choices;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue