From 5a54af4d4f588f109f31e456483fdf77096399d9 Mon Sep 17 00:00:00 2001 From: Romain Biessy Date: Fri, 15 Nov 2024 04:09:12 +0100 Subject: [PATCH 001/599] sycl: Use syclcompat::dp4a (#10267) * sycl: Use syclcompat::dp4a * Using the syclcompat version allow the compiler to optimize the operation with native function * Update news section * Update CI Windows oneAPI version to 2025.0 * Reword doc * Call syclcompat::dp4a inside dpct::dp4a This reverts commit 90cb61d692d61360b46954a1c7f780bd2e569b73. --- .github/workflows/build.yml | 2 +- docs/backend/SYCL.md | 2 ++ ggml/src/ggml-sycl/dpct/helper.hpp | 24 ++---------------------- ggml/src/ggml-sycl/vecdotq.hpp | 8 ++++---- 4 files changed, 9 insertions(+), 27 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d6a7b66a5..c770bbd15 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -930,7 +930,7 @@ jobs: shell: bash env: - WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/7dff44ba-e3af-4448-841c-0d616c8da6e7/w_BaseKit_p_2024.1.0.595_offline.exe + WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882_offline.exe WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI" steps: diff --git a/docs/backend/SYCL.md b/docs/backend/SYCL.md index bc8c0f886..38185f738 100644 --- a/docs/backend/SYCL.md +++ b/docs/backend/SYCL.md @@ -41,6 +41,8 @@ The following release is verified with good quality: ## News +- 2024.11 + - Use syclcompat to improve the performance on some platforms. This requires to use oneAPI 2025.0 or newer. - 2024.8 - Use oneDNN as the default GEMM library, improve the compatibility for new Intel GPUs. diff --git a/ggml/src/ggml-sycl/dpct/helper.hpp b/ggml/src/ggml-sycl/dpct/helper.hpp index fe4a8f744..c2f28bb49 100644 --- a/ggml/src/ggml-sycl/dpct/helper.hpp +++ b/ggml/src/ggml-sycl/dpct/helper.hpp @@ -15,6 +15,7 @@ #include #include +#include #include #include @@ -1830,31 +1831,10 @@ namespace dpct : id); } - template - sycl::vec extract_and_sign_or_zero_extend4(T val) - { - return sycl::vec(val) - .template as, int8_t, uint8_t>, 4>>() - .template convert(); - } - - template - using dot_product_acc_t = - std::conditional_t && std::is_unsigned_v, - uint32_t, int32_t>; - template inline auto dp4a(T1 a, T2 b, T3 c) { - dot_product_acc_t res = c; - auto va = extract_and_sign_or_zero_extend4(a); - auto vb = extract_and_sign_or_zero_extend4(b); - res += va[0] * vb[0]; - res += va[1] * vb[1]; - res += va[2] * vb[2]; - res += va[3] * vb[3]; - return res; + return syclcompat::dp4a(a, b, c); } struct sub_sat diff --git a/ggml/src/ggml-sycl/vecdotq.hpp b/ggml/src/ggml-sycl/vecdotq.hpp index d2dccade2..c5942008a 100644 --- a/ggml/src/ggml-sycl/vecdotq.hpp +++ b/ggml/src/ggml-sycl/vecdotq.hpp @@ -968,8 +968,8 @@ vec_dot_iq3_xxs_q8_1(const void *__restrict__ vbq, grid1[0] ^ signs[0], signs[0], std::minus<>()); const int grid_h = dpct::vectorized_binary( grid2[0] ^ signs[1], signs[1], std::minus<>()); - sumi = dpct::dp4a(grid_l, *((int *)q8 + 0), sumi); - sumi = dpct::dp4a(grid_h, *((int *)q8 + 1), sumi); + sumi = dpct::dp4a(grid_l, *((const int *)q8 + 0), sumi); + sumi = dpct::dp4a(grid_h, *((const int *)q8 + 1), sumi); q8 += 8; aux32 >>= 7; } @@ -1009,8 +1009,8 @@ vec_dot_iq3_s_q8_1(const void *__restrict__ vbq, grid1[0] ^ signs0, signs0, std::minus<>()); const int grid_h = dpct::vectorized_binary( grid2[0] ^ signs1, signs1, std::minus<>()); - sumi = dpct::dp4a(grid_l, *((int *)q8 + 0), sumi); - sumi = dpct::dp4a(grid_h, *((int *)q8 + 1), sumi); + sumi = dpct::dp4a(grid_l, *((const int *)q8 + 0), sumi); + sumi = dpct::dp4a(grid_h, *((const int *)q8 + 1), sumi); q8 += 8; } const float d = From 4802ad350b8e19cbc7a77269b4494c896f6e0896 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 15 Nov 2024 08:38:43 +0200 Subject: [PATCH 002/599] scripts : fix regex in sync [no ci] --- scripts/sync-ggml-am.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/sync-ggml-am.sh b/scripts/sync-ggml-am.sh index 06a04745b..74d6c6c8b 100755 --- a/scripts/sync-ggml-am.sh +++ b/scripts/sync-ggml-am.sh @@ -144,17 +144,17 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then -e 's/([[:space:]]|[ab]\/)CMakeLists.txt/\1ggml\/CMakeLists.txt/g' \ -e 's/([[:space:]]|[ab]\/)src\/CMakeLists.txt/\1ggml\/src\/CMakeLists.txt/g' \ -e 's/([[:space:]]|[ab]\/)cmake\/FindSIMD.cmake/\1ggml\/cmake\/FindSIMD.cmake/g' \ - -e 's/([[:space:]]|[ab]\/)src\/ggml(.*)\.c/\1ggml\/src\/ggml\1.c/g' \ - -e 's/([[:space:]]|[ab]\/)src\/ggml(.*)\.cpp/\1ggml\/src\/ggml\1.cpp/g' \ - -e 's/([[:space:]]|[ab]\/)src\/ggml(.*)\.h/\1ggml\/src\/ggml\1.h/g' \ - -e 's/([[:space:]]|[ab]\/)src\/ggml(.*)\.cu/\1ggml\/src\/ggml\1.cu/g' \ - -e 's/([[:space:]]|[ab]\/)src\/ggml(.*)\.m/\1ggml\/src\/ggml\1.m/g' \ + -e 's/([[:space:]]|[ab]\/)src\/ggml(.*)\.c/\1ggml\/src\/ggml\2.c/g' \ + -e 's/([[:space:]]|[ab]\/)src\/ggml(.*)\.cpp/\1ggml\/src\/ggml\2.cpp/g' \ + -e 's/([[:space:]]|[ab]\/)src\/ggml(.*)\.h/\1ggml\/src\/ggml\2.h/g' \ + -e 's/([[:space:]]|[ab]\/)src\/ggml(.*)\.cu/\1ggml\/src\/ggml\2.cu/g' \ + -e 's/([[:space:]]|[ab]\/)src\/ggml(.*)\.m/\1ggml\/src\/ggml\2.m/g' \ -e 's/([[:space:]]|[ab]\/)src\/ggml-amx\//\1ggml\/src\/ggml-amx\//g' \ -e 's/([[:space:]]|[ab]\/)src\/ggml-cann\//\1ggml\/src\/ggml-cann\//g' \ -e 's/([[:space:]]|[ab]\/)src\/ggml-cuda\//\1ggml\/src\/ggml-cuda\//g' \ -e 's/([[:space:]]|[ab]\/)src\/ggml-sycl\//\1ggml\/src\/ggml-sycl\//g' \ -e 's/([[:space:]]|[ab]\/)src\/vulkan-shaders\//\1ggml\/src\/vulkan-shaders\//g' \ - -e 's/([[:space:]]|[ab]\/)include\/ggml(.*)\.h/\1ggml\/include\/ggml\1.h/g' \ + -e 's/([[:space:]]|[ab]\/)include\/ggml(.*)\.h/\1ggml\/include\/ggml\2.h/g' \ -e 's/([[:space:]]|[ab]\/)examples\/common\.h/\1examples\/common.h/g' \ -e 's/([[:space:]]|[ab]\/)examples\/common\.cpp/\1examples\/common.cpp/g' \ -e 's/([[:space:]]|[ab]\/)examples\/common-ggml\.h/\1examples\/common-ggml.h/g' \ From 231f9360d94446cd083b6b116f63991b1328c484 Mon Sep 17 00:00:00 2001 From: Chenguang Li <87689256+noemotiovon@users.noreply.github.com> Date: Fri, 15 Nov 2024 15:09:35 +0800 Subject: [PATCH 003/599] cann: dockerfile and doc adjustment (#10302) Co-authored-by: noemotiovon --- .devops/llama-cli-cann.Dockerfile | 4 ++-- docs/build.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.devops/llama-cli-cann.Dockerfile b/.devops/llama-cli-cann.Dockerfile index db5ba2f25..45c0585b0 100644 --- a/.devops/llama-cli-cann.Dockerfile +++ b/.devops/llama-cli-cann.Dockerfile @@ -1,6 +1,6 @@ ARG ASCEND_VERSION=8.0.rc2.alpha003-910b-openeuler22.03-py3.8 -FROM cosdt/cann:$ASCEND_VERSION AS build +FROM ascendai/cann:$ASCEND_VERSION AS build WORKDIR /app @@ -26,7 +26,7 @@ RUN echo "Building with static libs" && \ cmake --build build --config Release --target llama-cli # TODO: use image with NNRT -FROM cosdt/cann:$ASCEND_VERSION AS runtime +FROM ascendai/cann:$ASCEND_VERSION AS runtime COPY --from=build /app/build/bin/llama-cli /llama-cli ENV LC_ALL=C.utf8 diff --git a/docs/build.md b/docs/build.md index 95512415a..52de2b4e2 100644 --- a/docs/build.md +++ b/docs/build.md @@ -375,7 +375,7 @@ cmake --build build --config release You can test with: -`./build/llama-cli -m PATH_TO_MODEL -p "Building a website can be done in 10 steps:" -ngl 32` +`./build/bin/llama-cli -m PATH_TO_MODEL -p "Building a website can be done in 10 steps:" -ngl 32` If the fllowing info is output on screen, you are using `llama.cpp by CANN backend`: ```bash From 9901068ac78838745e604fffb4601d315a610456 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Fri, 15 Nov 2024 05:48:49 -0400 Subject: [PATCH 004/599] server : (web UI) add copy button for code block, fix api key (#10242) * server : (web ui) add copy btn for code blocks * fix problem with api key * use settings-modal-short-input component * always show copy btn for code snippet --- examples/server/public/index.html | 62 +++++++++++++++++++++---------- examples/server/server.cpp | 42 +++++++++++++-------- 2 files changed, 68 insertions(+), 36 deletions(-) diff --git a/examples/server/public/index.html b/examples/server/public/index.html index 55639a944..65a915d59 100644 --- a/examples/server/public/index.html +++ b/examples/server/public/index.html @@ -12,7 +12,7 @@ .markdown { h1, h2, h3, h4, h5, h6, ul, ol, li { all: revert; } pre { - @apply whitespace-pre-wrap my-4 rounded-lg p-2; + @apply whitespace-pre-wrap rounded-lg p-2; border: 1px solid currentColor; } /* TODO: fix markdown table */ @@ -25,8 +25,11 @@ .bg-base-200 {background-color: var(--fallback-b2,oklch(var(--b2)/1))} .bg-base-300 {background-color: var(--fallback-b3,oklch(var(--b3)/1))} .text-base-content {color: var(--fallback-bc,oklch(var(--bc)/1))} + .show-on-hover { + @apply opacity-0 group-hover:opacity-100; + } .btn-mini { - @apply cursor-pointer opacity-0 group-hover:opacity-100 hover:shadow-md; + @apply cursor-pointer hover:shadow-md; } .chat-screen { max-width: 900px; } /* because the default bubble color is quite dark, we will make a custom one using bg-base-300 */ @@ -152,14 +155,14 @@
- - -
@@ -196,12 +199,13 @@

Settings

Settings below are saved in browser's localStorage

+ @@ -209,7 +213,7 @@ Other sampler settings
@@ -218,7 +222,7 @@ Penalties settings
@@ -245,7 +249,7 @@
-