Merge PR (#10)

Merge --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-10-15 00:29:57 +08:00 · 2024-10-15 00:29:57 +08:00 · 65ec5f4326
commit 65ec5f4326
parent a89f75e1b7
23 changed files with 422 additions and 1775 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@ -1,128 +0,0 @@
 # This workflow uses actions that are not certified by GitHub.
 # They are provided by a third-party and are governed by
 # separate terms of service, privacy policy, and support
 # documentation.
 # GitHub recommends pinning actions to a commit SHA.
 # To get a newer version, you will need to update the SHA.
 # You can also reference a tag or branch, but the action may change without warning.
 name: Publish Docker image
 on:
  #pull_request:
  push:
    branches:
      - master
    paths: ['.github/workflows/docker.yml', '.devops/*.Dockerfile', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
  workflow_dispatch: # allows manual triggering, useful for debugging
 concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
  cancel-in-progress: true
 # Fine-grant permission
 # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
 permissions:
  packages: write
 jobs:
  push_to_registry:
    name: Push Docker image to Docker Hub
    #if: github.event.pull_request.draft == false
    runs-on: ubuntu-latest
    env:
      COMMIT_SHA: ${{ github.sha }}
    strategy:
      matrix:
        config:
          - { tag: "light", dockerfile: ".devops/llama-cli.Dockerfile", platforms: "linux/amd64,linux/arm64" }
          - { tag: "server", dockerfile: ".devops/llama-server.Dockerfile", platforms: "linux/amd64,linux/arm64" }
          - { tag: "full", dockerfile: ".devops/full.Dockerfile", platforms: "linux/amd64,linux/arm64" }
          - { tag: "light-cuda", dockerfile: ".devops/llama-cli-cuda.Dockerfile", platforms: "linux/amd64" }
          - { tag: "server-cuda", dockerfile: ".devops/llama-server-cuda.Dockerfile", platforms: "linux/amd64" }
          - { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" }
          - { tag: "light-musa", dockerfile: ".devops/llama-cli-musa.Dockerfile", platforms: "linux/amd64" }
          - { tag: "server-musa", dockerfile: ".devops/llama-server-musa.Dockerfile", platforms: "linux/amd64" }
          - { tag: "full-musa", dockerfile: ".devops/full-musa.Dockerfile", platforms: "linux/amd64" }
          # Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
          #- { tag: "light-rocm", dockerfile: ".devops/llama-cli-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
          #- { tag: "server-rocm", dockerfile: ".devops/llama-server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
          #- { tag: "full-rocm", dockerfile: ".devops/full-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
          - { tag: "light-intel", dockerfile: ".devops/llama-cli-intel.Dockerfile", platforms: "linux/amd64" }
          - { tag: "server-intel", dockerfile: ".devops/llama-server-intel.Dockerfile", platforms: "linux/amd64" }
    steps:
      - name: Check out the repo
        uses: actions/checkout@v4
        with:
          fetch-depth: 0 # preserve git history, so we can determine the build number
      - name: Set up QEMU
        uses: docker/setup-qemu-action@v2
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v2
      - name: Log in to Docker Hub
        uses: docker/login-action@v2
        with:
          registry: ghcr.io
          username: ${{ github.repository_owner }}
          password: ${{ secrets.GITHUB_TOKEN }}
      - name: Determine tag name
        id: tag
        shell: bash
        run: |
          BUILD_NUMBER="$(git rev-list --count HEAD)"
          SHORT_HASH="$(git rev-parse --short=7 HEAD)"
          REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}"  # to lower case
          REPO_NAME="${{ github.event.repository.name }}"
          # determine tag name postfix (build number, commit hash)
          if [[ "${{ env.GITHUB_BRANCH_NAME }}" == "master" ]]; then
            TAG_POSTFIX="b${BUILD_NUMBER}"
          else
            SAFE_NAME=$(echo "${{ env.GITHUB_BRANCH_NAME }}" | tr '/' '-')
            TAG_POSTFIX="${SAFE_NAME}-${SHORT_HASH}"
          fi
          # list all tags possible
          TAGS=""
          TAGS="${TAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:${{ matrix.config.tag }},"
          TAGS="${TAGS}ghcr.io/${REPO_OWNER}/${REPO_NAME}:${{ matrix.config.tag }}-${TAG_POSTFIX}"
          echo "output_tags=$TAGS" >> $GITHUB_OUTPUT
          echo "output_tags=$TAGS"  # print out for debugging
        env:
          GITHUB_BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
          GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
      # https://github.com/jlumbroso/free-disk-space/tree/54081f138730dfa15788a46383842cd2f914a1be#example
      - name: Free Disk Space (Ubuntu)
        uses: jlumbroso/free-disk-space@main
        with:
          # this might remove tools that are actually needed,
          # if set to "true" but frees about 6 GB
          tool-cache: false
          # all of these default to true, but feel free to set to
          # "false" if necessary for your workflow
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          docker-images: true
          swap-storage: true
      - name: Build and push Docker image (tagged + versioned)
        if: github.event_name == 'push'
        uses: docker/build-push-action@v6
        with:
          context: .
          push: true
          platforms: ${{ matrix.config.platforms }}
          # tag list is generated from step above
          tags: ${{ steps.tag.outputs.output_tags }}
          file: ${{ matrix.config.dockerfile }}
--- a/.github/workflows/editorconfig.yml
+++ b/.github/workflows/editorconfig.yml
@ -1,27 +0,0 @@
 name: EditorConfig Checker
 on:
  workflow_dispatch: # allows manual triggering
    inputs:
      create_release:
        description: 'Create new release'
        required: true
        type: boolean
  push:
    branches:
      - master
  pull_request:
    branches:
      - master
 concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
  cancel-in-progress: true
 jobs:
  editorconfig:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: editorconfig-checker/action-editorconfig-checker@main
      - run: editorconfig-checker
--- a/.github/workflows/nix-ci-aarch64.yml
+++ b/.github/workflows/nix-ci-aarch64.yml
@ -1,72 +0,0 @@
 name: Nix aarch64 builds
 on:
  workflow_dispatch: # allows manual triggering
  schedule:
    # Rebuild daily rather than on every push because QEMU is expensive (e.g.
    # 1.5h instead of minutes with the cold cache).
    #
    # randint(0, 59), randint(0, 23)
    - cron: '26 12 * * *'
  # But also rebuild if we touched any of the Nix expressions:
  push:
    branches:
      - master
    paths: ['**/*.nix', 'flake.lock']
  pull_request:
    types: [opened, synchronize, reopened]
    paths: ['**/*.nix', 'flake.lock']
 concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
  cancel-in-progress: true
 # Fine-grant permission
 # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
 permissions:
  # https://github.com/DeterminateSystems/nix-installer-action?tab=readme-ov-file#with-flakehub
  id-token: write
  contents: read
 jobs:
  nix-build-aarch64:
    runs-on: ubuntu-latest
    steps:
    - name: Checkout repository
      uses: actions/checkout@v4
    - name: Install QEMU
      # Copy-paste from https://github.com/orgs/community/discussions/8305#discussioncomment-5888654
      run: |
        sudo apt-get update
        sudo apt-get install -y qemu-user-static qemu-system-aarch64
        sudo usermod -a -G kvm $USER
    - name: Install Nix
      uses: DeterminateSystems/nix-installer-action@v9
      with:
        github-token: ${{ secrets.GITHUB_TOKEN }}
        extra-conf: |
          extra-platforms = aarch64-linux
          extra-system-features = nixos-test kvm
          extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
          extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
    - uses: DeterminateSystems/magic-nix-cache-action@v2
      with:
        upstream-cache: https://${{ matrix.cachixName }}.cachix.org
    - name: Set-up cachix to push the results to
      uses: cachix/cachix-action@v13
      with:
        authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
        name: llama-cpp
    - name: Show all output paths
      run: >
          nix run github:nix-community/nix-eval-jobs
          -- --gc-roots-dir gcroot
          --flake
          ".#packages.aarch64-linux"
    - name: Build
      run: >
          nix run github:Mic92/nix-fast-build
          -- --skip-cached --no-nom
          --systems aarch64-linux
          --flake
          ".#checks.aarch64-linux"
--- a/.github/workflows/nix-ci.yml
+++ b/.github/workflows/nix-ci.yml
@ -1,79 +0,0 @@
 name: Nix CI
 on:
  workflow_dispatch: # allows manual triggering
  push:
    branches:
      - master
  pull_request:
    types: [opened, synchronize, reopened]
 concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
  cancel-in-progress: true
 # Fine-grant permission
 # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
 permissions:
  # https://github.com/DeterminateSystems/nix-installer-action?tab=readme-ov-file#with-flakehub
  id-token: write
  contents: read
 jobs:
  nix-eval:
    strategy:
      fail-fast: false
      matrix:
        os: [ ubuntu-latest, macos-latest ]
    runs-on: ${{ matrix.os }}
    steps:
    - name: Checkout repository
      uses: actions/checkout@v4
    - name: Install Nix
      uses: DeterminateSystems/nix-installer-action@v9
      with:
        github-token: ${{ secrets.GITHUB_TOKEN }}
        extra-conf: |
          extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
          extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
    - uses: DeterminateSystems/magic-nix-cache-action@v2
      with:
        upstream-cache: https://${{ matrix.cachixName }}.cachix.org
    - name: List all flake outputs
      run: nix flake show --all-systems
    - name: Show all output paths
      run: >
          nix run github:nix-community/nix-eval-jobs
          -- --gc-roots-dir gcroot
          --flake
          ".#packages.$(nix eval --raw --impure --expr builtins.currentSystem)"
  nix-build:
    strategy:
      fail-fast: false
      matrix:
        os: [ ubuntu-latest, macos-latest ]
    runs-on: ${{ matrix.os }}
    steps:
    - name: Checkout repository
      uses: actions/checkout@v4
    - name: Install Nix
      uses: DeterminateSystems/nix-installer-action@v9
      with:
        github-token: ${{ secrets.GITHUB_TOKEN }}
        extra-conf: |
          extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
          extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
    - uses: DeterminateSystems/magic-nix-cache-action@v2
      with:
        upstream-cache: https://${{ matrix.cachixName }}.cachix.org
    - name: Set-up cachix to push the results to
      uses: cachix/cachix-action@v13
      with:
        authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
        name: llama-cpp
    - name: Build
      run: >
          nix run github:Mic92/nix-fast-build
          -- --skip-cached --no-nom
          --flake
          ".#checks.$(nix eval --raw --impure --expr builtins.currentSystem)"
--- a/.github/workflows/nix-flake-update.yml
+++ b/.github/workflows/nix-flake-update.yml
@ -1,22 +0,0 @@
 name: update-flake-lock
 on:
  workflow_dispatch:
  schedule:
    - cron: '0 0 * * 0' # runs weekly on Sunday at 00:00
 jobs:
  lockfile:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
      - name: Install Nix
        uses: DeterminateSystems/nix-installer-action@main
      - name: Update flake.lock
        uses: DeterminateSystems/update-flake-lock@main
        with:
          pr-title: "nix: update flake.lock"
          pr-labels: |
            nix
          pr-reviewers: philiptaron,SomeoneSerge
          token: ${{ secrets.FLAKE_TOKEN }}
--- a/.github/workflows/nix-publish-flake.yml
+++ b/.github/workflows/nix-publish-flake.yml
@ -1,36 +0,0 @@
 # Make the flake discoverable on https://flakestry.dev and https://flakehub.com/flakes
 name: "Publish a flake to flakestry & flakehub"
 on:
    push:
        tags:
        - "*"
    workflow_dispatch:
        inputs:
            tag:
                description: "The existing tag to publish"
                type: "string"
                required: true
 jobs:
    flakestry-publish:
        runs-on: ubuntu-latest
        permissions:
            id-token: "write"
            contents: "read"
        steps:
            - uses: flakestry/flakestry-publish@main
              with:
                version: "${{ inputs.tag || github.ref_name }}"
    flakehub-publish:
      runs-on: "ubuntu-latest"
      permissions:
        id-token: "write"
        contents: "read"
      steps:
        - uses: "actions/checkout@v4"
          with:
            ref: "${{ (inputs.tag != null) && format('refs/tags/{0}', inputs.tag) || '' }}"
        - uses: "DeterminateSystems/nix-installer-action@main"
        - uses: "DeterminateSystems/flakehub-push@main"
          with:
            visibility: "public"
            tag: "${{ inputs.tag }}"
--- a/.github/workflows/python-lint.yml
+++ b/.github/workflows/python-lint.yml
@ -1,23 +0,0 @@
 name: flake8 Lint
 on: [push, pull_request]
 concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
  cancel-in-progress: true
 jobs:
  flake8-lint:
    runs-on: ubuntu-latest
    name: Lint
    steps:
      - name: Check out source repository
        uses: actions/checkout@v4
      - name: Set up Python environment
        uses: actions/setup-python@v5
        with:
          python-version: "3.11"
      - name: flake8 Lint
        uses: py-actions/flake8@v2
        with:
            plugins: "flake8-no-print"
--- a/.github/workflows/server.yml
+++ b/.github/workflows/server.yml
@ -1,190 +0,0 @@
 # Server build and tests
 name: Server
 on:
  workflow_dispatch: # allows manual triggering
    inputs:
      sha:
        description: 'Commit SHA1 to build'
        required: false
        type: string
      slow_tests:
        description: 'Run slow tests'
        required: true
        type: boolean
  push:
    branches:
      - master
    paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
  pull_request:
    types: [opened, synchronize, reopened]
    paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
 env:
  LLAMA_LOG_COLORS: 1
  LLAMA_LOG_PREFIX: 1
  LLAMA_LOG_TIMESTAMPS: 1
  LLAMA_LOG_VERBOSITY: 10
 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
  cancel-in-progress: true
 jobs:
  server:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        sanitizer: [ADDRESS, UNDEFINED] # THREAD is broken
        build_type: [RelWithDebInfo]
        include:
          - build_type: Release
            sanitizer: ""
      fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken
    steps:
      - name: Dependencies
        id: depends
        run: |
          sudo apt-get update
          sudo apt-get -y install \
            build-essential \
            xxd \
            git \
            cmake \
            curl \
            wget \
            language-pack-en \
            libcurl4-openssl-dev
      - name: Clone
        id: checkout
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
          ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
      - name: Python setup
        id: setup_python
        uses: actions/setup-python@v5
        with:
          python-version: '3.11'
      - name: Tests dependencies
        id: test_dependencies
        run: |
          pip install -r examples/server/tests/requirements.txt
      - name: Verify server deps
        id: verify_server_deps
        run: |
          git config --global --add safe.directory $(realpath .)
          cd examples/server
          git ls-files --others --modified
          git status
          ./deps.sh
          git status
          not_ignored_files="$(git ls-files --others --modified)"
          echo "Modified files: ${not_ignored_files}"
          if [ -n "${not_ignored_files}" ]; then
            echo "Repository is dirty or server deps are not built as expected"
            echo "${not_ignored_files}"
            exit 1
          fi
      - name: Build (no OpenMP)
        id: cmake_build_no_openmp
        if: ${{ matrix.sanitizer == 'THREAD' }}
        run: |
          cmake -B build \
              -DGGML_NATIVE=OFF \
              -DLLAMA_BUILD_SERVER=ON \
              -DLLAMA_CURL=ON \
              -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
              -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
              -DGGML_OPENMP=OFF ;
          cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
      - name: Build
        id: cmake_build
        if: ${{ matrix.sanitizer != 'THREAD' }}
        run: |
          cmake -B build \
              -DGGML_NATIVE=OFF \
              -DLLAMA_BUILD_SERVER=ON \
              -DLLAMA_CURL=ON \
              -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
              -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
          cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
      - name: Tests
        id: server_integration_tests
        run: |
          cd examples/server/tests
          PORT=8888 ./tests.sh
      - name: Slow tests
        id: server_integration_tests_slow
        if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
        run: |
          cd examples/server/tests
          PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow
  server-windows:
    runs-on: windows-2019
    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
          ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
      - name: libCURL
        id: get_libcurl
        env:
          CURL_VERSION: 8.6.0_6
        run: |
          curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip"
          mkdir $env:RUNNER_TEMP/libcurl
          tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl
      - name: Build
        id: cmake_build
        run: |
          cmake -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
          cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target llama-server
      - name: Python setup
        id: setup_python
        uses: actions/setup-python@v5
        with:
          python-version: '3.11'
      - name: Tests dependencies
        id: test_dependencies
        run: |
          pip install -r examples/server/tests/requirements.txt
      - name: Copy Libcurl
        id: prepare_libcurl
        run: |
          cp $env:RUNNER_TEMP/libcurl/bin/libcurl-x64.dll ./build/bin/Release/libcurl-x64.dll
      - name: Tests
        id: server_integration_tests
        if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
        run: |
          cd examples/server/tests
          $env:PYTHONIOENCODING = ":replace"
          behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
      - name: Slow tests
        id: server_integration_tests_slow
        if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
        run: |
          cd examples/server/tests
          behave.exe --stop --no-skipped --no-capture --tags slow
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -3,7 +3,22 @@ project("llama.cpp" C CXX)
 include(CheckIncludeFileCXX)
 #set(CMAKE_WARN_DEPRECATED YES)
-set(CMAKE_WARN_UNUSED_CLI YES)
+#set(CMAKE_WARN_UNUSED_CLI YES)
 #enable O3 flag
 if(NOT CMAKE_BUILD_TYPE)
  set(CMAKE_BUILD_TYPE Release)
 endif()
 set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused-variable -Wno-unused-function -Wno-error")
 set(CMAKE_CXX_FLAGS_DEBUG "-g")
 set(CMAKE_CXX_FLAGS_RELEASE "-Ofast")
 #add lto
 set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
 #no warn
 #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
@ -51,7 +66,7 @@ endif()
 #
 # debug
-option(LLAMA_ALL_WARNINGS           "llama: enable all compiler warnings"                   ON)
+option(LLAMA_ALL_WARNINGS           "llama: enable all compiler warnings"                   OFF)
 option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF)
 # build
--- a/common/CMakeLists.txt
+++ b/common/CMakeLists.txt
@ -1,5 +1,15 @@
 # common
 #enable O3 flag
 if(NOT CMAKE_BUILD_TYPE)
  set(CMAKE_BUILD_TYPE Release)
 endif()
 set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused-variable -Wno-unused-function -Wno-error")
 set(CMAKE_CXX_FLAGS_DEBUG "-g")
 set(CMAKE_CXX_FLAGS_RELEASE "-Ofast")
 #add lto
 set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
 find_package(Threads REQUIRED)
 # Build info header
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@ -1,5 +1,15 @@
 # dependencies
 #enable O3 flag
 if(NOT CMAKE_BUILD_TYPE)
  set(CMAKE_BUILD_TYPE Release)
 endif()
 set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused-variable -Wno-unused-function -Wno-error")
 set(CMAKE_CXX_FLAGS_DEBUG "-g")
 set(CMAKE_CXX_FLAGS_RELEASE "-Ofast")
 #add lto
 set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
 find_package(Threads REQUIRED)
 # third-party
--- a/examples/llava/requirements.txt
+++ b/examples/llava/requirements.txt
@ -1,5 +1,5 @@
 -r ../../requirements/requirements-convert_legacy_llama.txt
 --extra-index-url https://download.pytorch.org/whl/cpu
-pillow~=10.2.0
+pillow~=10.4.0
 torch~=2.2.1
 torchvision~=0.17.1
--- a/examples/server/tests/requirements.txt
+++ b/examples/server/tests/requirements.txt
@ -1,7 +1,7 @@
-aiohttp~=3.9.3
+aiohttp~=3.10.10
 behave~=1.2.6
 huggingface_hub~=0.23.2
 numpy~=1.26.4
 openai~=1.30.3
 prometheus-client~=0.20.0
-requests~=2.32.3
+requests~=2.32.2
--- a/ggml/CMakeLists.txt
+++ b/ggml/CMakeLists.txt
@ -1,7 +1,17 @@
 cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories.
 project("ggml" C CXX)
 include(CheckIncludeFileCXX)
 #enable O3 flag
 if(NOT CMAKE_BUILD_TYPE)
  set(CMAKE_BUILD_TYPE Release)
 endif()
 set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused-variable -Wno-unused-function -Wno-error")
 set(CMAKE_CXX_FLAGS_DEBUG "-g")
 set(CMAKE_CXX_FLAGS_RELEASE "-Ofast")
 #add lto
 set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
--- a/ggml/src/CMakeLists.txt
+++ b/ggml/src/CMakeLists.txt
@ -1,5 +1,15 @@
 include(CheckCXXCompilerFlag)
 #enable O3 flag
 if(NOT CMAKE_BUILD_TYPE)
  set(CMAKE_BUILD_TYPE Release)
 endif()
 set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused-variable -Wno-unused-function -Wno-error")
 set(CMAKE_CXX_FLAGS_DEBUG "-g")
 set(CMAKE_CXX_FLAGS_RELEASE "-Ofast")
 #add lto
 set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
 unset(GGML_CDEF_PUBLIC)
 add_compile_definitions(GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES})
--- a/ggml/src/ggml-cann/kernels/CMakeLists.txt
+++ b/ggml/src/ggml-cann/kernels/CMakeLists.txt
@ -1,3 +1,14 @@
 #enable O3 flag
 if(NOT CMAKE_BUILD_TYPE)
  set(CMAKE_BUILD_TYPE Release)
 endif()
 set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused-variable -Wno-unused-function -Wno-error")
 set(CMAKE_CXX_FLAGS_DEBUG "-g")
 set(CMAKE_CXX_FLAGS_RELEASE "-Ofast")
 #add lto
 set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
 if (NOT SOC_TYPE)
    set (SOC_TYPE "Ascend910B3")
 endif()
--- a/ggml/src/vulkan-shaders/CMakeLists.txt
+++ b/ggml/src/vulkan-shaders/CMakeLists.txt
@ -1,3 +1,14 @@
 #enable O3 flag
 if(NOT CMAKE_BUILD_TYPE)
  set(CMAKE_BUILD_TYPE Release)
 endif()
 set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused-variable -Wno-unused-function -Wno-error")
 set(CMAKE_CXX_FLAGS_DEBUG "-g")
 set(CMAKE_CXX_FLAGS_RELEASE "-Ofast")
 #add lto
 set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
 find_package (Threads REQUIRED)
 set(TARGET vulkan-shaders-gen)
--- a/pocs/CMakeLists.txt
+++ b/pocs/CMakeLists.txt
@ -1,5 +1,15 @@
 # dependencies
 #enable O3 flag
 if(NOT CMAKE_BUILD_TYPE)
  set(CMAKE_BUILD_TYPE Release)
 endif()
 set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused-variable -Wno-unused-function -Wno-error")
 set(CMAKE_CXX_FLAGS_DEBUG "-g")
 set(CMAKE_CXX_FLAGS_RELEASE "-Ofast")
 #add lto
 set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
 find_package(Threads REQUIRED)
 # third-party
--- a/pocs/vdot/CMakeLists.txt
+++ b/pocs/vdot/CMakeLists.txt
@ -1,3 +1,14 @@
 #enable O3 flag
 if(NOT CMAKE_BUILD_TYPE)
  set(CMAKE_BUILD_TYPE Release)
 endif()
 set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused-variable -Wno-unused-function -Wno-error")
 set(CMAKE_CXX_FLAGS_DEBUG "-g")
 set(CMAKE_CXX_FLAGS_RELEASE "-Ofast")
 #add lto
 set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
 set(TARGET llama-vdot)
 add_executable(${TARGET} vdot.cpp)
 target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
--- a/poetry.lock
+++ b/poetry.lock
@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
 [[package]]
 name = "atomicwrites"
@ -31,13 +31,13 @@ tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "p
 [[package]]
 name = "certifi"
-version = "2024.2.2"
+version = "2024.7.4"
 description = "Python package for providing Mozilla's CA Bundle."
 optional = false
 python-versions = ">=3.6"
 files = [
-    {file = "certifi-2024.2.2-py3-none-any.whl", hash = "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1"},
+    {file = "certifi-2024.7.4-py3-none-any.whl", hash = "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90"},
-    {file = "certifi-2024.2.2.tar.gz", hash = "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f"},
+    {file = "certifi-2024.7.4.tar.gz", hash = "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b"},
 ]
 [[package]]
@ -251,24 +251,24 @@ typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "t
 [[package]]
 name = "idna"
-version = "3.6"
+version = "3.7"
 description = "Internationalized Domain Names in Applications (IDNA)"
 optional = false
 python-versions = ">=3.5"
 files = [
-    {file = "idna-3.6-py3-none-any.whl", hash = "sha256:c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f"},
+    {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"},
-    {file = "idna-3.6.tar.gz", hash = "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca"},
+    {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"},
 ]
 [[package]]
 name = "jinja2"
-version = "3.1.3"
+version = "3.1.4"
 description = "A very fast and expressive template engine."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "Jinja2-3.1.3-py3-none-any.whl", hash = "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa"},
+    {file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"},
-    {file = "Jinja2-3.1.3.tar.gz", hash = "sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90"},
+    {file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"},
 ]
 [package.dependencies]
@ -1066,13 +1066,13 @@ reference = "pytorch"
 [[package]]
 name = "tqdm"
-version = "4.66.2"
+version = "4.66.3"
 description = "Fast, Extensible Progress Meter"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "tqdm-4.66.2-py3-none-any.whl", hash = "sha256:1ee4f8a893eb9bef51c6e35730cebf234d5d0b6bd112b0271e10ed7c24a02bd9"},
+    {file = "tqdm-4.66.3-py3-none-any.whl", hash = "sha256:4f41d54107ff9a223dca80b53efe4fb654c67efaba7f47bada3ee9d50e05bd53"},
-    {file = "tqdm-4.66.2.tar.gz", hash = "sha256:6cd52cdf0fef0e0f543299cfc96fec90d7b8a7e88745f411ec33eb44d5ed3531"},
+    {file = "tqdm-4.66.3.tar.gz", hash = "sha256:23097a41eba115ba99ecae40d06444c15d1c0c698d527a01c6c8bd1c5d0647e5"},
 ]
 [package.dependencies]
@ -1165,13 +1165,13 @@ files = [
 [[package]]
 name = "urllib3"
-version = "2.2.1"
+version = "2.2.2"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"},
+    {file = "urllib3-2.2.2-py3-none-any.whl", hash = "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472"},
-    {file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"},
+    {file = "urllib3-2.2.2.tar.gz", hash = "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168"},
 ]
 [package.extras]
@ -1194,4 +1194,4 @@ files = [
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9"
-content-hash = "c8c4cc87637266a7b85debcbafa8887c5ad81cc8ef40e98a3f52c7c50af05c03"
+content-hash = "0bcd190f2fc81c926bb25e891c22ce86b513cc8aa6e40b3d73b5b18104caab88"
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -1,3 +1,14 @@
 #enable O3 flag
 if(NOT CMAKE_BUILD_TYPE)
  set(CMAKE_BUILD_TYPE Release)
 endif()
 set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused-variable -Wno-unused-function -Wno-error")
 set(CMAKE_CXX_FLAGS_DEBUG "-g")
 set(CMAKE_CXX_FLAGS_RELEASE "-Ofast")
 #add lto
 set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
 # TODO: should not use this
 if (WIN32)
    if (BUILD_SHARED_LIBS)
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -1,3 +1,14 @@
 #enable O3 flag
 if(NOT CMAKE_BUILD_TYPE)
  set(CMAKE_BUILD_TYPE Release)
 endif()
 set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wno-unused-variable -Wno-unused-function -Wno-error")
 set(CMAKE_CXX_FLAGS_DEBUG "-g")
 set(CMAKE_CXX_FLAGS_RELEASE "-Ofast")
 #add lto
 set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
 function(llama_test target)
    include(CMakeParseArguments)
    set(options)