Merge PR (#10)

Merge

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
This commit is contained in:
dennyxbox890 2024-10-15 00:29:57 +08:00 committed by GitHub
parent a89f75e1b7
commit 65ec5f4326
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
23 changed files with 422 additions and 1775 deletions

File diff suppressed because it is too large Load diff

View file

@ -1,128 +0,0 @@
# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party and are governed by
# separate terms of service, privacy policy, and support
# documentation.
# GitHub recommends pinning actions to a commit SHA.
# To get a newer version, you will need to update the SHA.
# You can also reference a tag or branch, but the action may change without warning.
name: Publish Docker image
on:
#pull_request:
push:
branches:
- master
paths: ['.github/workflows/docker.yml', '.devops/*.Dockerfile', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
workflow_dispatch: # allows manual triggering, useful for debugging
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
# Fine-grant permission
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
permissions:
packages: write
jobs:
push_to_registry:
name: Push Docker image to Docker Hub
#if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
env:
COMMIT_SHA: ${{ github.sha }}
strategy:
matrix:
config:
- { tag: "light", dockerfile: ".devops/llama-cli.Dockerfile", platforms: "linux/amd64,linux/arm64" }
- { tag: "server", dockerfile: ".devops/llama-server.Dockerfile", platforms: "linux/amd64,linux/arm64" }
- { tag: "full", dockerfile: ".devops/full.Dockerfile", platforms: "linux/amd64,linux/arm64" }
- { tag: "light-cuda", dockerfile: ".devops/llama-cli-cuda.Dockerfile", platforms: "linux/amd64" }
- { tag: "server-cuda", dockerfile: ".devops/llama-server-cuda.Dockerfile", platforms: "linux/amd64" }
- { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" }
- { tag: "light-musa", dockerfile: ".devops/llama-cli-musa.Dockerfile", platforms: "linux/amd64" }
- { tag: "server-musa", dockerfile: ".devops/llama-server-musa.Dockerfile", platforms: "linux/amd64" }
- { tag: "full-musa", dockerfile: ".devops/full-musa.Dockerfile", platforms: "linux/amd64" }
# Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
#- { tag: "light-rocm", dockerfile: ".devops/llama-cli-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
#- { tag: "server-rocm", dockerfile: ".devops/llama-server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
#- { tag: "full-rocm", dockerfile: ".devops/full-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
- { tag: "light-intel", dockerfile: ".devops/llama-cli-intel.Dockerfile", platforms: "linux/amd64" }
- { tag: "server-intel", dockerfile: ".devops/llama-server-intel.Dockerfile", platforms: "linux/amd64" }
steps:
- name: Check out the repo
uses: actions/checkout@v4
with:
fetch-depth: 0 # preserve git history, so we can determine the build number
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Log in to Docker Hub
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Determine tag name
id: tag
shell: bash
run: |
BUILD_NUMBER="$(git rev-list --count HEAD)"
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case
REPO_NAME="${{ github.event.repository.name }}"
# determine tag name postfix (build number, commit hash)
if [[ "${{ env.GITHUB_BRANCH_NAME }}" == "master" ]]; then
TAG_POSTFIX="b${BUILD_NUMBER}"
else
SAFE_NAME=$(echo "${{ env.GITHUB_BRANCH_NAME }}" | tr '/' '-')
TAG_POSTFIX="${SAFE_NAME}-${SHORT_HASH}"
fi
# list all tags possible
TAGS=""
TAGS="${TAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:${{ matrix.config.tag }},"
TAGS="${TAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:${{ matrix.config.tag }}-${TAG_POSTFIX}"
echo "output_tags=$TAGS" >> $GITHUB_OUTPUT
echo "output_tags=$TAGS" # print out for debugging
env:
GITHUB_BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
# https://github.com/jlumbroso/free-disk-space/tree/54081f138730dfa15788a46383842cd2f914a1be#example
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
with:
# this might remove tools that are actually needed,
# if set to "true" but frees about 6 GB
tool-cache: false
# all of these default to true, but feel free to set to
# "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: true
swap-storage: true
- name: Build and push Docker image (tagged + versioned)
if: github.event_name == 'push'
uses: docker/build-push-action@v6
with:
context: .
push: true
platforms: ${{ matrix.config.platforms }}
# tag list is generated from step above
tags: ${{ steps.tag.outputs.output_tags }}
file: ${{ matrix.config.dockerfile }}

View file

@ -1,27 +0,0 @@
name: EditorConfig Checker
on:
workflow_dispatch: # allows manual triggering
inputs:
create_release:
description: 'Create new release'
required: true
type: boolean
push:
branches:
- master
pull_request:
branches:
- master
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
jobs:
editorconfig:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: editorconfig-checker/action-editorconfig-checker@main
- run: editorconfig-checker

View file

@ -1,72 +0,0 @@
name: Nix aarch64 builds
on:
workflow_dispatch: # allows manual triggering
schedule:
# Rebuild daily rather than on every push because QEMU is expensive (e.g.
# 1.5h instead of minutes with the cold cache).
#
# randint(0, 59), randint(0, 23)
- cron: '26 12 * * *'
# But also rebuild if we touched any of the Nix expressions:
push:
branches:
- master
paths: ['**/*.nix', 'flake.lock']
pull_request:
types: [opened, synchronize, reopened]
paths: ['**/*.nix', 'flake.lock']
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
# Fine-grant permission
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
permissions:
# https://github.com/DeterminateSystems/nix-installer-action?tab=readme-ov-file#with-flakehub
id-token: write
contents: read
jobs:
nix-build-aarch64:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Install QEMU
# Copy-paste from https://github.com/orgs/community/discussions/8305#discussioncomment-5888654
run: |
sudo apt-get update
sudo apt-get install -y qemu-user-static qemu-system-aarch64
sudo usermod -a -G kvm $USER
- name: Install Nix
uses: DeterminateSystems/nix-installer-action@v9
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
extra-conf: |
extra-platforms = aarch64-linux
extra-system-features = nixos-test kvm
extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
- uses: DeterminateSystems/magic-nix-cache-action@v2
with:
upstream-cache: https://${{ matrix.cachixName }}.cachix.org
- name: Set-up cachix to push the results to
uses: cachix/cachix-action@v13
with:
authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
name: llama-cpp
- name: Show all output paths
run: >
nix run github:nix-community/nix-eval-jobs
-- --gc-roots-dir gcroot
--flake
".#packages.aarch64-linux"
- name: Build
run: >
nix run github:Mic92/nix-fast-build
-- --skip-cached --no-nom
--systems aarch64-linux
--flake
".#checks.aarch64-linux"

View file

@ -1,79 +0,0 @@
name: Nix CI
on:
workflow_dispatch: # allows manual triggering
push:
branches:
- master
pull_request:
types: [opened, synchronize, reopened]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
# Fine-grant permission
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
permissions:
# https://github.com/DeterminateSystems/nix-installer-action?tab=readme-ov-file#with-flakehub
id-token: write
contents: read
jobs:
nix-eval:
strategy:
fail-fast: false
matrix:
os: [ ubuntu-latest, macos-latest ]
runs-on: ${{ matrix.os }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Install Nix
uses: DeterminateSystems/nix-installer-action@v9
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
extra-conf: |
extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
- uses: DeterminateSystems/magic-nix-cache-action@v2
with:
upstream-cache: https://${{ matrix.cachixName }}.cachix.org
- name: List all flake outputs
run: nix flake show --all-systems
- name: Show all output paths
run: >
nix run github:nix-community/nix-eval-jobs
-- --gc-roots-dir gcroot
--flake
".#packages.$(nix eval --raw --impure --expr builtins.currentSystem)"
nix-build:
strategy:
fail-fast: false
matrix:
os: [ ubuntu-latest, macos-latest ]
runs-on: ${{ matrix.os }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Install Nix
uses: DeterminateSystems/nix-installer-action@v9
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
extra-conf: |
extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
- uses: DeterminateSystems/magic-nix-cache-action@v2
with:
upstream-cache: https://${{ matrix.cachixName }}.cachix.org
- name: Set-up cachix to push the results to
uses: cachix/cachix-action@v13
with:
authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
name: llama-cpp
- name: Build
run: >
nix run github:Mic92/nix-fast-build
-- --skip-cached --no-nom
--flake
".#checks.$(nix eval --raw --impure --expr builtins.currentSystem)"

View file

@ -1,22 +0,0 @@
name: update-flake-lock
on:
workflow_dispatch:
schedule:
- cron: '0 0 * * 0' # runs weekly on Sunday at 00:00
jobs:
lockfile:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Install Nix
uses: DeterminateSystems/nix-installer-action@main
- name: Update flake.lock
uses: DeterminateSystems/update-flake-lock@main
with:
pr-title: "nix: update flake.lock"
pr-labels: |
nix
pr-reviewers: philiptaron,SomeoneSerge
token: ${{ secrets.FLAKE_TOKEN }}

View file

@ -1,36 +0,0 @@
# Make the flake discoverable on https://flakestry.dev and https://flakehub.com/flakes
name: "Publish a flake to flakestry & flakehub"
on:
push:
tags:
- "*"
workflow_dispatch:
inputs:
tag:
description: "The existing tag to publish"
type: "string"
required: true
jobs:
flakestry-publish:
runs-on: ubuntu-latest
permissions:
id-token: "write"
contents: "read"
steps:
- uses: flakestry/flakestry-publish@main
with:
version: "${{ inputs.tag || github.ref_name }}"
flakehub-publish:
runs-on: "ubuntu-latest"
permissions:
id-token: "write"
contents: "read"
steps:
- uses: "actions/checkout@v4"
with:
ref: "${{ (inputs.tag != null) && format('refs/tags/{0}', inputs.tag) || '' }}"
- uses: "DeterminateSystems/nix-installer-action@main"
- uses: "DeterminateSystems/flakehub-push@main"
with:
visibility: "public"
tag: "${{ inputs.tag }}"

View file

@ -1,23 +0,0 @@
name: flake8 Lint
on: [push, pull_request]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
jobs:
flake8-lint:
runs-on: ubuntu-latest
name: Lint
steps:
- name: Check out source repository
uses: actions/checkout@v4
- name: Set up Python environment
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: flake8 Lint
uses: py-actions/flake8@v2
with:
plugins: "flake8-no-print"

View file

@ -1,190 +0,0 @@
# Server build and tests
name: Server
on:
workflow_dispatch: # allows manual triggering
inputs:
sha:
description: 'Commit SHA1 to build'
required: false
type: string
slow_tests:
description: 'Run slow tests'
required: true
type: boolean
push:
branches:
- master
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
pull_request:
types: [opened, synchronize, reopened]
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
env:
LLAMA_LOG_COLORS: 1
LLAMA_LOG_PREFIX: 1
LLAMA_LOG_TIMESTAMPS: 1
LLAMA_LOG_VERBOSITY: 10
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
server:
runs-on: ubuntu-latest
strategy:
matrix:
sanitizer: [ADDRESS, UNDEFINED] # THREAD is broken
build_type: [RelWithDebInfo]
include:
- build_type: Release
sanitizer: ""
fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken
steps:
- name: Dependencies
id: depends
run: |
sudo apt-get update
sudo apt-get -y install \
build-essential \
xxd \
git \
cmake \
curl \
wget \
language-pack-en \
libcurl4-openssl-dev
- name: Clone
id: checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
- name: Python setup
id: setup_python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Tests dependencies
id: test_dependencies
run: |
pip install -r examples/server/tests/requirements.txt
- name: Verify server deps
id: verify_server_deps
run: |
git config --global --add safe.directory $(realpath .)
cd examples/server
git ls-files --others --modified
git status
./deps.sh
git status
not_ignored_files="$(git ls-files --others --modified)"
echo "Modified files: ${not_ignored_files}"
if [ -n "${not_ignored_files}" ]; then
echo "Repository is dirty or server deps are not built as expected"
echo "${not_ignored_files}"
exit 1
fi
- name: Build (no OpenMP)
id: cmake_build_no_openmp
if: ${{ matrix.sanitizer == 'THREAD' }}
run: |
cmake -B build \
-DGGML_NATIVE=OFF \
-DLLAMA_BUILD_SERVER=ON \
-DLLAMA_CURL=ON \
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
-DGGML_OPENMP=OFF ;
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
- name: Build
id: cmake_build
if: ${{ matrix.sanitizer != 'THREAD' }}
run: |
cmake -B build \
-DGGML_NATIVE=OFF \
-DLLAMA_BUILD_SERVER=ON \
-DLLAMA_CURL=ON \
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
- name: Tests
id: server_integration_tests
run: |
cd examples/server/tests
PORT=8888 ./tests.sh
- name: Slow tests
id: server_integration_tests_slow
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
run: |
cd examples/server/tests
PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow
server-windows:
runs-on: windows-2019
steps:
- name: Clone
id: checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
- name: libCURL
id: get_libcurl
env:
CURL_VERSION: 8.6.0_6
run: |
curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip"
mkdir $env:RUNNER_TEMP/libcurl
tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl
- name: Build
id: cmake_build
run: |
cmake -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target llama-server
- name: Python setup
id: setup_python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Tests dependencies
id: test_dependencies
run: |
pip install -r examples/server/tests/requirements.txt
- name: Copy Libcurl
id: prepare_libcurl
run: |
cp $env:RUNNER_TEMP/libcurl/bin/libcurl-x64.dll ./build/bin/Release/libcurl-x64.dll
- name: Tests
id: server_integration_tests
if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
run: |
cd examples/server/tests
$env:PYTHONIOENCODING = ":replace"
behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
- name: Slow tests
id: server_integration_tests_slow
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
run: |
cd examples/server/tests
behave.exe --stop --no-skipped --no-capture --tags slow

View file

@ -3,7 +3,22 @@ project("llama.cpp" C CXX)
include(CheckIncludeFileCXX) include(CheckIncludeFileCXX)
#set(CMAKE_WARN_DEPRECATED YES) #set(CMAKE_WARN_DEPRECATED YES)
set(CMAKE_WARN_UNUSED_CLI YES) #set(CMAKE_WARN_UNUSED_CLI YES)
#enable O3 flag
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused-variable -Wno-unused-function -Wno-error")
set(CMAKE_CXX_FLAGS_DEBUG "-g")
set(CMAKE_CXX_FLAGS_RELEASE "-Ofast")
#add lto
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
#no warn
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
@ -51,7 +66,7 @@ endif()
# #
# debug # debug
option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" OFF)
option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF) option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF)
# build # build

View file

@ -1,5 +1,15 @@
# common # common
#enable O3 flag
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused-variable -Wno-unused-function -Wno-error")
set(CMAKE_CXX_FLAGS_DEBUG "-g")
set(CMAKE_CXX_FLAGS_RELEASE "-Ofast")
#add lto
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
find_package(Threads REQUIRED) find_package(Threads REQUIRED)
# Build info header # Build info header

View file

@ -1,5 +1,15 @@
# dependencies # dependencies
#enable O3 flag
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused-variable -Wno-unused-function -Wno-error")
set(CMAKE_CXX_FLAGS_DEBUG "-g")
set(CMAKE_CXX_FLAGS_RELEASE "-Ofast")
#add lto
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
find_package(Threads REQUIRED) find_package(Threads REQUIRED)
# third-party # third-party

View file

@ -1,5 +1,5 @@
-r ../../requirements/requirements-convert_legacy_llama.txt -r ../../requirements/requirements-convert_legacy_llama.txt
--extra-index-url https://download.pytorch.org/whl/cpu --extra-index-url https://download.pytorch.org/whl/cpu
pillow~=10.2.0 pillow~=10.4.0
torch~=2.2.1 torch~=2.2.1
torchvision~=0.17.1 torchvision~=0.17.1

View file

@ -1,7 +1,7 @@
aiohttp~=3.9.3 aiohttp~=3.10.10
behave~=1.2.6 behave~=1.2.6
huggingface_hub~=0.23.2 huggingface_hub~=0.23.2
numpy~=1.26.4 numpy~=1.26.4
openai~=1.30.3 openai~=1.30.3
prometheus-client~=0.20.0 prometheus-client~=0.20.0
requests~=2.32.3 requests~=2.32.2

View file

@ -1,7 +1,17 @@
cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories. cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories.
project("ggml" C CXX) project("ggml" C CXX)
include(CheckIncludeFileCXX) include(CheckIncludeFileCXX)
#enable O3 flag
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused-variable -Wno-unused-function -Wno-error")
set(CMAKE_CXX_FLAGS_DEBUG "-g")
set(CMAKE_CXX_FLAGS_RELEASE "-Ofast")
#add lto
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)

View file

@ -1,5 +1,15 @@
include(CheckCXXCompilerFlag) include(CheckCXXCompilerFlag)
#enable O3 flag
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused-variable -Wno-unused-function -Wno-error")
set(CMAKE_CXX_FLAGS_DEBUG "-g")
set(CMAKE_CXX_FLAGS_RELEASE "-Ofast")
#add lto
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
unset(GGML_CDEF_PUBLIC) unset(GGML_CDEF_PUBLIC)
add_compile_definitions(GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES}) add_compile_definitions(GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES})

View file

@ -1,3 +1,14 @@
#enable O3 flag
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused-variable -Wno-unused-function -Wno-error")
set(CMAKE_CXX_FLAGS_DEBUG "-g")
set(CMAKE_CXX_FLAGS_RELEASE "-Ofast")
#add lto
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
if (NOT SOC_TYPE) if (NOT SOC_TYPE)
set (SOC_TYPE "Ascend910B3") set (SOC_TYPE "Ascend910B3")
endif() endif()

View file

@ -1,3 +1,14 @@
#enable O3 flag
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused-variable -Wno-unused-function -Wno-error")
set(CMAKE_CXX_FLAGS_DEBUG "-g")
set(CMAKE_CXX_FLAGS_RELEASE "-Ofast")
#add lto
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
find_package (Threads REQUIRED) find_package (Threads REQUIRED)
set(TARGET vulkan-shaders-gen) set(TARGET vulkan-shaders-gen)

View file

@ -1,5 +1,15 @@
# dependencies # dependencies
#enable O3 flag
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused-variable -Wno-unused-function -Wno-error")
set(CMAKE_CXX_FLAGS_DEBUG "-g")
set(CMAKE_CXX_FLAGS_RELEASE "-Ofast")
#add lto
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
find_package(Threads REQUIRED) find_package(Threads REQUIRED)
# third-party # third-party

View file

@ -1,3 +1,14 @@
#enable O3 flag
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused-variable -Wno-unused-function -Wno-error")
set(CMAKE_CXX_FLAGS_DEBUG "-g")
set(CMAKE_CXX_FLAGS_RELEASE "-Ofast")
#add lto
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
set(TARGET llama-vdot) set(TARGET llama-vdot)
add_executable(${TARGET} vdot.cpp) add_executable(${TARGET} vdot.cpp)
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})

34
poetry.lock generated
View file

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. # This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
[[package]] [[package]]
name = "atomicwrites" name = "atomicwrites"
@ -31,13 +31,13 @@ tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "p
[[package]] [[package]]
name = "certifi" name = "certifi"
version = "2024.2.2" version = "2024.7.4"
description = "Python package for providing Mozilla's CA Bundle." description = "Python package for providing Mozilla's CA Bundle."
optional = false optional = false
python-versions = ">=3.6" python-versions = ">=3.6"
files = [ files = [
{file = "certifi-2024.2.2-py3-none-any.whl", hash = "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1"}, {file = "certifi-2024.7.4-py3-none-any.whl", hash = "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90"},
{file = "certifi-2024.2.2.tar.gz", hash = "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f"}, {file = "certifi-2024.7.4.tar.gz", hash = "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b"},
] ]
[[package]] [[package]]
@ -251,24 +251,24 @@ typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "t
[[package]] [[package]]
name = "idna" name = "idna"
version = "3.6" version = "3.7"
description = "Internationalized Domain Names in Applications (IDNA)" description = "Internationalized Domain Names in Applications (IDNA)"
optional = false optional = false
python-versions = ">=3.5" python-versions = ">=3.5"
files = [ files = [
{file = "idna-3.6-py3-none-any.whl", hash = "sha256:c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f"}, {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"},
{file = "idna-3.6.tar.gz", hash = "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca"}, {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"},
] ]
[[package]] [[package]]
name = "jinja2" name = "jinja2"
version = "3.1.3" version = "3.1.4"
description = "A very fast and expressive template engine." description = "A very fast and expressive template engine."
optional = false optional = false
python-versions = ">=3.7" python-versions = ">=3.7"
files = [ files = [
{file = "Jinja2-3.1.3-py3-none-any.whl", hash = "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa"}, {file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"},
{file = "Jinja2-3.1.3.tar.gz", hash = "sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90"}, {file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"},
] ]
[package.dependencies] [package.dependencies]
@ -1066,13 +1066,13 @@ reference = "pytorch"
[[package]] [[package]]
name = "tqdm" name = "tqdm"
version = "4.66.2" version = "4.66.3"
description = "Fast, Extensible Progress Meter" description = "Fast, Extensible Progress Meter"
optional = false optional = false
python-versions = ">=3.7" python-versions = ">=3.7"
files = [ files = [
{file = "tqdm-4.66.2-py3-none-any.whl", hash = "sha256:1ee4f8a893eb9bef51c6e35730cebf234d5d0b6bd112b0271e10ed7c24a02bd9"}, {file = "tqdm-4.66.3-py3-none-any.whl", hash = "sha256:4f41d54107ff9a223dca80b53efe4fb654c67efaba7f47bada3ee9d50e05bd53"},
{file = "tqdm-4.66.2.tar.gz", hash = "sha256:6cd52cdf0fef0e0f543299cfc96fec90d7b8a7e88745f411ec33eb44d5ed3531"}, {file = "tqdm-4.66.3.tar.gz", hash = "sha256:23097a41eba115ba99ecae40d06444c15d1c0c698d527a01c6c8bd1c5d0647e5"},
] ]
[package.dependencies] [package.dependencies]
@ -1165,13 +1165,13 @@ files = [
[[package]] [[package]]
name = "urllib3" name = "urllib3"
version = "2.2.1" version = "2.2.2"
description = "HTTP library with thread-safe connection pooling, file post, and more." description = "HTTP library with thread-safe connection pooling, file post, and more."
optional = false optional = false
python-versions = ">=3.8" python-versions = ">=3.8"
files = [ files = [
{file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"}, {file = "urllib3-2.2.2-py3-none-any.whl", hash = "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472"},
{file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"}, {file = "urllib3-2.2.2.tar.gz", hash = "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168"},
] ]
[package.extras] [package.extras]
@ -1194,4 +1194,4 @@ files = [
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = ">=3.9" python-versions = ">=3.9"
content-hash = "c8c4cc87637266a7b85debcbafa8887c5ad81cc8ef40e98a3f52c7c50af05c03" content-hash = "0bcd190f2fc81c926bb25e891c22ce86b513cc8aa6e40b3d73b5b18104caab88"

View file

@ -1,3 +1,14 @@
#enable O3 flag
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused-variable -Wno-unused-function -Wno-error")
set(CMAKE_CXX_FLAGS_DEBUG "-g")
set(CMAKE_CXX_FLAGS_RELEASE "-Ofast")
#add lto
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
# TODO: should not use this # TODO: should not use this
if (WIN32) if (WIN32)
if (BUILD_SHARED_LIBS) if (BUILD_SHARED_LIBS)

View file

@ -1,3 +1,14 @@
#enable O3 flag
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused-variable -Wno-unused-function -Wno-error")
set(CMAKE_CXX_FLAGS_DEBUG "-g")
set(CMAKE_CXX_FLAGS_RELEASE "-Ofast")
#add lto
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
function(llama_test target) function(llama_test target)
include(CMakeParseArguments) include(CMakeParseArguments)
set(options) set(options)