diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
index b974e7fac..758796632 100644
--- a/.github/workflows/bench.yml
+++ b/.github/workflows/bench.yml
@@ -79,12 +79,18 @@ jobs:
sleep 0.1
done
- - name: Install k6
+ - name: Set up Go
+ uses: actions/setup-go@v5
+ with:
+ go-version: '1.21'
+
+ - name: Install k6 and xk6-sse
id: k6_installation
run: |
cd examples/server/bench
- wget --quiet https://github.com/grafana/k6/releases/download/v0.49.0/k6-v0.49.0-linux-amd64.tar.gz
- tar xzf k6*.tar.gz --strip-components=1
+ go install go.k6.io/xk6/cmd/xk6@latest
+ xk6 build master \
+ --with github.com/phymbert/xk6-sse
- name: Build
id: cmake_build
@@ -118,7 +124,7 @@ jobs:
cd examples/server/bench
source venv/bin/activate
- BENCH_K6_BIN_PATH=./k6 python bench.py \
+ python bench.py \
--runner-label ${{ env.RUNNER_LABEL }} \
--name ${{ github.job }} \
--branch ${{ github.head_ref || github.ref_name }} \
@@ -228,9 +234,9 @@ jobs:
Expand details for performance related PR only
- Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }}
- - HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(90)=${{ env.HTTP_REQ_DURATION_P_90_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
- - Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_TOKENS_AVG }}tk/s p(90)=${{ env.LLAMACPP_PROMPT_TOKENS_P_90_ }}tk/s **total=${{ env.LLAMACPP_PROMPT_TOKENS_TOTAL_COUNTER_RATE }}tk/s**
- - Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(90)=${{ env.LLAMACPP_TOKENS_SECOND_P_90_ }}tk/s **total=${{ env.LLAMACPP_COMPLETION_TOKENS_TOTAL_COUNTER_RATE }}tk/s**
+ - HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(95)=${{ env.HTTP_REQ_DURATION_P_95_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
+ - Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_95_ }}tk/s
+ - Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_TOKENS_SECOND_P_95_ }}tk/s
- ${{ env.BENCH_GRAPH_XLABEL }}
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index ff7238aba..63143bc94 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -52,7 +52,7 @@ jobs:
id: cmake_test
run: |
cd build
- ctest -L main --verbose --timeout 900
+ ctest -L 'main|curl' --verbose --timeout 900
- name: Determine tag name
id: tag
@@ -101,7 +101,9 @@ jobs:
sysctl -a
mkdir build
cd build
- cmake -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON ..
+ # Metal is disabled due to intermittent failures with Github runners not having a GPU:
+ # https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
+ cmake -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL=OFF -DLLAMA_CURL=ON ..
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
- name: Test
@@ -209,21 +211,21 @@ jobs:
id: depends
run: |
sudo apt-get update
- sudo apt-get install build-essential
+ sudo apt-get install build-essential libcurl4-openssl-dev
- name: Build
id: cmake_build
run: |
mkdir build
cd build
- cmake .. -DLLAMA_FATAL_WARNINGS=ON
+ cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON
cmake --build . --config Release -j $(nproc)
- name: Test
id: cmake_test
run: |
cd build
- ctest -L main --verbose --timeout 900
+ ctest -L 'main|curl' --verbose --timeout 900
- name: Test llama2c conversion
id: llama2c_test
@@ -938,6 +940,12 @@ jobs:
- name: Download artifacts
id: download-artifact
uses: actions/download-artifact@v4
+ with:
+ path: ./artifact
+
+ - name: Move artifacts
+ id: move_artifacts
+ run: mkdir -p ./artifact/release && mv ./artifact/*/*.zip ./artifact/release
- name: Create release
id: create_release
@@ -956,7 +964,7 @@ jobs:
const path = require('path');
const fs = require('fs');
const release_id = '${{ steps.create_release.outputs.id }}';
- for (let file of await fs.readdirSync('./artifact')) {
+ for (let file of await fs.readdirSync('./artifact/release')) {
if (path.extname(file) === '.zip') {
console.log('uploadReleaseAsset', file);
await github.repos.uploadReleaseAsset({
@@ -964,7 +972,7 @@ jobs:
repo: context.repo.repo,
release_id: release_id,
name: file,
- data: await fs.readFileSync(`./artifact/${file}`)
+ data: await fs.readFileSync(`./artifact/release/${file}`)
});
}
}
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index eefd87878..9b03d19bc 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -91,6 +91,12 @@ jobs:
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
fi
+ - name: Downcase github.repository_owner
+ run: |
+ echo "repository_owner_lowercase=${GITHUB_REPOSITORY_OWNER@L}" >> $GITHUB_ENV
+ env:
+ GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
+
- name: Build and push Docker image (versioned)
if: github.event_name == 'push'
uses: docker/build-push-action@v4
@@ -98,7 +104,7 @@ jobs:
context: .
push: true
platforms: ${{ matrix.config.platforms }}
- tags: "ghcr.io/${{ github.repository_owner }}/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
+ tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
file: ${{ matrix.config.dockerfile }}
- name: Build and push Docker image (tagged)
@@ -107,5 +113,5 @@ jobs:
context: .
push: ${{ github.event_name == 'push' }}
platforms: ${{ matrix.config.platforms }}
- tags: "ghcr.io/${{ github.repository_owner }}/llama.cpp:${{ matrix.config.tag }},ghcr.io/${{ github.repository_owner }}/llama.cpp:${{ matrix.config.tag }}-${{ steps.tag.outputs.name }}"
+ tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }},ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ steps.tag.outputs.name }}"
file: ${{ matrix.config.dockerfile }}
diff --git a/.gitignore b/.gitignore
index 9fb5b80c3..fdc5184a1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -48,6 +48,7 @@ models-mnt
/convert-llama2c-to-ggml
/embd-input-test
/embedding
+/eval-callback
/gguf
/gguf-llama-simple
/gguf-split
diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 000000000..b029f13da
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1,655 @@
+# date: Tue Apr 9 09:17:14 EEST 2024
+# this file is auto-generated by scripts/gen-authors.sh
+
+0cc4m
+0xspringtime <110655352+0xspringtime@users.noreply.github.com>
+2f38b454
+3ooabkhxtn <31479382+3ooabkhxtn@users.noreply.github.com>
+44670 <44670@users.noreply.github.com>
+AN Long
+AT
+Aarni Koskela
+Aaron Miller
+Aaryaman Vasishta
+Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
+Abhishek Gopinath K <31348521+overtunned@users.noreply.github.com>
+Adithya Balaji
+AdithyanI
+Adrian
+Adrian Hesketh
+AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com>
+Aisuko
+Alberto <57916483+albbus-stack@users.noreply.github.com>
+Alex
+Alex Azarov
+Alex Azarov
+Alex Klinkhamer
+Alex Klinkhamer
+Alex Nguyen
+Alex Petenchea
+Alex Renda
+Alex von Gluck IV
+Alexey Parfenov
+Ali Chraghi <63465728+alichraghi@users.noreply.github.com>
+Ali Nehzat
+Ali Tariq
+Alon
+AlpinDale <52078762+AlpinDale@users.noreply.github.com>
+AmirAli Mirian <37371367+amiralimi@users.noreply.github.com>
+Ananta Bastola
+Anas Ahouzi <112881240+aahouzi@users.noreply.github.com>
+András Salamon
+Andrei
+Andrew Canis
+Andrew Duffy
+Andrew Godfrey
+Arik Poznanski
+Artem
+Artyom Lebedev
+Asbjørn Olling
+Ásgeir Bjarni Ingvarsson
+Ashok Gelal <401055+ashokgelal@users.noreply.github.com>
+Ashraful Islam
+Atsushi Tatsuma
+Austin <77757836+teleprint-me@users.noreply.github.com>
+AustinMroz
+BADR
+Bach Le
+Bailey Chittle <39804642+bachittle@users.noreply.github.com>
+BarfingLemurs <128182951+BarfingLemurs@users.noreply.github.com>
+Behnam M <58621210+ibehnam@users.noreply.github.com>
+Ben Garney
+Ben Siraphob
+Ben Williams
+Benjamin Lecaillon <84293038+blecaillon@users.noreply.github.com>
+Bernat Vadell
+Bodo Graumann
+Bono Lv
+Borislav Stanimirov
+Branden Butler
+Brian
+Bruce MacDonald
+CJ Pais
+CRD716
+Cameron
+Cameron Kaiser
+Casey Primozic
+Casey Primozic
+CausalLM <148736309+CausalLM@users.noreply.github.com>
+Cebtenzzre
+Chad Brewbaker
+Cheng Shao
+Chris Kuehl
+Christian Demsar
+Christian Demsar
+Christian Falch <875252+chrfalch@users.noreply.github.com>
+Christian Kögler
+Clark Saben <76020733+csaben@users.noreply.github.com>
+Clint Herron
+Cuong Trinh Manh
+DAN™
+Damian Stewart
+Dane Madsen
+DaniAndTheWeb <57776841+DaniAndTheWeb@users.noreply.github.com>
+Daniel Bevenius
+Daniel Drake
+Daniel Hiltgen
+Daniel Illescas Romero
+DannyDaemonic
+Dat Quoc Nguyen <2412555+datquocnguyen@users.noreply.github.com>
+Dave Della Costa
+David Friehs
+David Kennedy
+David Pflug
+David Renshaw
+David Sommers <12738+databyte@users.noreply.github.com>
+David Yang
+Dawid Wysocki <62249621+TortillaZHawaii@users.noreply.github.com>
+Dean
+Deins
+Didzis Gosko
+Don Mahurin
+DooWoong Lee (David)
+Doomsdayrs <38189170+Doomsdayrs@users.noreply.github.com>
+Douglas Hanley
+Dr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com>
+Ebey Abraham
+Ed Lee
+Ed Lepedus
+Edward Taylor
+Elbios <141279586+Elbios@users.noreply.github.com>
+Engininja2 <139037756+Engininja2@users.noreply.github.com>
+Equim
+Eric Sommerlade
+Eric Zhang <34133756+EZForever@users.noreply.github.com>
+Erik Garrison
+Erik Scholz
+Ettore Di Giacinto
+Evan Jones
+Evan Miller
+Eve <139727413+netrunnereve@users.noreply.github.com>
+Evgeny Kurnevsky
+Ewout ter Hoeven
+ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com>
+FK
+Fabian
+Fabio R. Sluzala
+Faez Shakil
+FantasyGmm <16450052+FantasyGmm@users.noreply.github.com>
+Fattire <528174+fat-tire@users.noreply.github.com>
+Felix
+Finn Voorhees
+Firat
+Folko-Ven <71110216+Folko-Ven@users.noreply.github.com>
+Foul-Tarnished <107711110+Foul-Tarnished@users.noreply.github.com>
+Francisco Melo <43780565+francis2tm@users.noreply.github.com>
+FrankHB
+Frederik Vogel
+Gabe Goodhart
+GainLee
+Galunid
+Gary Linscott
+Gary Mulder
+Genkagaku.GPT
+Georgi Gerganov
+Gilad S
+GiviMAD
+Govlzkoy
+Guillaume "Vermeille" Sanchez
+Guillaume Wenzek
+Guoteng <32697156+SolenoidWGT@users.noreply.github.com>
+Gustavo Rocha Dias <91472747+gustrd@users.noreply.github.com>
+Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com>
+Haohui Mai
+Haoxiang Fei
+Harald Fernengel
+Hatsune Miku <129688334+at8u@users.noreply.github.com>
+Henk Poley
+Henri Vasserman
+Henrik Forstén
+Herman Semenov
+Hesen Peng
+Hoang Nguyen
+Hongyu Ouyang <96765450+casavaca@users.noreply.github.com>
+Howard Su
+Hua Jiang
+Huawei Lin
+Ian Bull
+Ian Bull
+Ian Scrivener
+Ido S
+IgnacioFDM
+Igor Okulist
+Ikko Eltociear Ashimine
+Ilya Kurdyukov <59548320+ilyakurdyukov@users.noreply.github.com>
+Ionoclast Laboratories
+Isaac McFadyen
+IsaacDynamo <61521674+IsaacDynamo@users.noreply.github.com>
+Ivan Komarov
+Ivan Stepanov
+JH23X <165871467+JH23X@users.noreply.github.com>
+Jack Mousseau
+JackJollimore <130917767+JackJollimore@users.noreply.github.com>
+Jag Chadha
+Jakub N
+James Reynolds
+Jan Boon
+Jan Boon
+Jan Ploski
+Jannis Schönleber
+Jared Van Bortel
+Jared Van Bortel
+Jason McCartney
+Jean-Christophe Hoelt
+Jean-Michaël Celerier
+Jed Fox
+Jeffrey Quesnelle
+Jesse Jojo Johnson
+Jhen-Jie Hong
+Jiahao Li
+Jian Liao
+JidongZhang-THU <1119708529@qq.com>
+Jinwoo Jeong <33892306+williamjeong2@users.noreply.github.com>
+Jiří Podivín <66251151+jpodivin@users.noreply.github.com>
+Johannes Gäßler
+Johannes Rudolph
+John <78893154+cmp-nct@users.noreply.github.com>
+John Balis
+John Smith <67539080+kingsidelee@users.noreply.github.com>
+JohnnyB
+Jonas Wunderlich <32615971+jonas-w@users.noreply.github.com>
+Jorge A <161275481+jorgealias@users.noreply.github.com>
+Jose Maldonado <63384398+yukiteruamano@users.noreply.github.com>
+Joseph Stahl <1269177+josephst@users.noreply.github.com>
+Joyce
+Juan Calderon-Perez <835733+gaby@users.noreply.github.com>
+Judd
+Julius Arkenberg
+Jun Jie <71215065+junnjiee16@users.noreply.github.com>
+Juraj Bednar
+Justin Parker
+Justin Suess
+Justine Tunney
+Juuso Alasuutari
+KASR
+Kamil Tomšík
+Karsten Weiss
+Karthick
+Karthik Kumar Viswanathan <195178+guilt@users.noreply.github.com>
+Karthik Sethuraman
+Kasumi <90275229+kasumi-1@users.noreply.github.com>
+Kawrakow <48489457+ikawrakow@users.noreply.github.com>
+Keiichi Tabata
+Kenvix ⭐
+Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com>
+Kevin Ji <1146876+kevinji@users.noreply.github.com>
+Kevin Kwok
+Kevin Lo
+Kolen Cheung
+Konstantin Herud
+Konstantin Zhuravlyov
+Kunshang Ji
+Kyle Liang
+Kyle Mistele
+Kylin <56434533+KyL0N@users.noreply.github.com>
+Lars Grammel
+Laura
+Lee <44310445+lx200916@users.noreply.github.com>
+Lee Drake
+Leng Yue
+LeonEricsson <70749762+LeonEricsson@users.noreply.github.com>
+Leonardo Neumann
+Li Tan
+Linwei Wang
+LoganDark
+LostRuins <39025047+LostRuins@users.noreply.github.com>
+Luciano
+Luo Tian
+M. Yusuf Sarıgöz
+Maarten ter Huurne
+Mack Straight
+Maël Kerbiriou
+MaggotHATE
+Marc Köhlbrugge
+Marco Matthies <71844+marcom@users.noreply.github.com>
+Marcus Dunn <51931484+MarcusDunn@users.noreply.github.com>
+Marian Cepok
+Mark Fairbairn
+Marko Tasic
+Martin Krasser
+Martin Schwaighofer
+Marvin Gießing
+Mateusz Charytoniuk
+Matheus C. França
+Matheus Gabriel Alves Silva
+Mathieu Nayrolles
+Mathijs de Bruin
+Matt Clayton <156335168+mattjcly@users.noreply.github.com>
+Matt Pulver
+Matteo Boschini <12133566+mbosc@users.noreply.github.com>
+Matthew Tejo
+Matvey Soloviev
+Maxime <672982+maximegmd@users.noreply.github.com>
+Maximilian Winter
+Meng Zhang
+Meng, Hengyu
+Merrick Christensen
+Michael Coppola
+Michael Hueschen
+Michael Kesper
+Michael Klimenko
+Michael Podvitskiy
+Michael Potter
+Michaël de Vries
+Mihai
+Mike
+Minsoo Cheong <54794500+mscheong01@users.noreply.github.com>
+Mirko185
+Mirror Azure <54669636+MirrorAzure@users.noreply.github.com>
+Miwa / Ensan <63481257+ensan-hcl@users.noreply.github.com>
+Mohammadreza Hendiani
+Murilo Santana
+Musab Gultekin
+Nam D. Tran <42194884+namtranase@users.noreply.github.com>
+NawafAlansari <72708095+NawafAlansari@users.noreply.github.com>
+Nebula
+Neo Zhang Jianyu
+Neuman Vong
+Nexesenex <124105151+Nexesenex@users.noreply.github.com>
+Niall Coates <1349685+Niall-@users.noreply.github.com>
+Nicolai Weitkemper
+Nigel Bosch
+Niklas Korz
+Nindaleth
+Oleksandr Nikitin
+Oleksii Maryshchenko
+Olivier Chafik
+Ondřej Čertík
+Ouadie EL FAROUKI
+Paul Tsochantaris
+Pavol Rusnak
+Pedro Cuenca
+Peter Sugihara
+Phil H <5756783+phiharri@users.noreply.github.com>
+Philip Taron
+Phillip Kravtsov
+Pierre Alexandre SCHEMBRI