From 1a05004743e00aca400833be625f0ec8cce176a7 Mon Sep 17 00:00:00 2001 From: Borislav Stanimirov Date: Mon, 9 Dec 2024 09:15:13 +0200 Subject: [PATCH 1/4] cmake : simplify msvc charsets (#10672) --- CMakeLists.txt | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f84fff9e6..a717a508f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,11 +46,9 @@ if (WIN32) add_compile_definitions(_CRT_SECURE_NO_WARNINGS) endif() -if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") - add_compile_options("$<$:/source-charset:utf-8>") - add_compile_options("$<$:/source-charset:utf-8>") - add_compile_options("$<$:/execution-charset:utf-8>") - add_compile_options("$<$:/execution-charset:utf-8>") +if (MSVC) + add_compile_options("$<$:/utf-8>") + add_compile_options("$<$:/utf-8>") endif() # From 3d98b4cb226c3140bd1ae6c65ed126b7d90332fa Mon Sep 17 00:00:00 2001 From: Jeff Bolz Date: Mon, 9 Dec 2024 01:24:01 -0600 Subject: [PATCH 2/4] vulkan: fix compile warnings (#10731) --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 9e2de9439..5d9eba983 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -427,7 +427,7 @@ static_assert(sizeof(vk_op_unary_push_constants) <= 128, "sizeof(vk_op_unary_pus // and a shift: // // n/d = (mulhi(n, mp) + n) >> L; -void init_fastdiv_values(uint32_t d, uint32_t &mp, uint32_t &L) +static void init_fastdiv_values(uint32_t d, uint32_t &mp, uint32_t &L) { // compute L = ceil(log2(d)); L = 0; @@ -439,6 +439,7 @@ void init_fastdiv_values(uint32_t d, uint32_t &mp, uint32_t &L) } template void init_pushconst_fastdiv(T &p) { + GGML_UNUSED(p); static_assert(!std::is_const::value, "unexpected type"); } @@ -3417,7 +3418,7 @@ static uint32_t ggml_vk_guess_split_k(ggml_backend_vk_context * ctx, int m, int return split_k; } -static vk_pipeline ggml_vk_guess_matmul_pipeline(ggml_backend_vk_context * ctx, vk_matmul_pipeline& mmp, int m, int n, bool aligned, ggml_type type_a) { +static vk_pipeline ggml_vk_guess_matmul_pipeline(ggml_backend_vk_context * ctx, vk_matmul_pipeline& mmp, int m, int n, bool aligned) { VK_LOG_DEBUG("ggml_vk_guess_matmul_pipeline(" << m << ", " << n << ", " << aligned << ")"); if (ctx->device->coopmat2) { @@ -3439,9 +3440,9 @@ static vk_pipeline ggml_vk_guess_matmul_pipeline(ggml_backend_vk_context * ctx, return aligned ? mmp->a_l : mmp->l; } -static uint32_t ggml_vk_guess_matmul_pipeline_align(ggml_backend_vk_context * ctx, vk_matmul_pipeline& mmp, int m, int n, ggml_type type_a) { +static uint32_t ggml_vk_guess_matmul_pipeline_align(ggml_backend_vk_context * ctx, vk_matmul_pipeline& mmp, int m, int n) { VK_LOG_DEBUG("ggml_vk_guess_matmul_pipeline_align(" << m << ", " << n << ")"); - return ggml_vk_guess_matmul_pipeline(ctx, mmp, m, n, true, type_a)->align; + return ggml_vk_guess_matmul_pipeline(ctx, mmp, m, n, true)->align; } static void ggml_vk_matmul( @@ -3571,6 +3572,7 @@ static void ggml_vk_cpy_to_contiguous(ggml_backend_vk_context * ctx, vk_context& (uint32_t)tensor->ne[0], (uint32_t)tensor->ne[1], (uint32_t)tensor->ne[2], (uint32_t)tensor->ne[3], 1 , (uint32_t)tensor->ne[0] , (uint32_t)(tensor->ne[0] * tensor->ne[1]) , (uint32_t)(tensor->ne[0] * tensor->ne[1] * tensor->ne[2]), 0, 0.0f, 0.0f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; init_pushconst_fastdiv(pc); ggml_vk_sync_buffers(subctx); @@ -3644,10 +3646,10 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& sub const int y_ne = ne11 * ne10; const int d_ne = ne11 * ne01; - const uint32_t kpad = ggml_vk_align_size(ne10, ggml_vk_guess_matmul_pipeline_align(ctx, mmp, ne01, ne11, src0->type)); + const uint32_t kpad = ggml_vk_align_size(ne10, ggml_vk_guess_matmul_pipeline_align(ctx, mmp, ne01, ne11)); const bool aligned = ne10 == kpad && ne01 > 8 && ne11 > 8; - vk_pipeline pipeline = ggml_vk_guess_matmul_pipeline(ctx, mmp, ne01, ne11, aligned, src0->type); + vk_pipeline pipeline = ggml_vk_guess_matmul_pipeline(ctx, mmp, ne01, ne11, aligned); const uint32_t split_k = ggml_vk_guess_split_k(ctx, ne01, ne11, ne10, pipeline); @@ -5351,7 +5353,8 @@ static void ggml_vk_scale(ggml_backend_vk_context * ctx, vk_context& subctx, con (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size, (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size, 0, - op_params[0], 0.0f + op_params[0], 0.0f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, dryrun); } @@ -5365,6 +5368,7 @@ static void ggml_vk_sqr(ggml_backend_vk_context * ctx, vk_context& subctx, const (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size, 0, 0.0f, 0.0f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, dryrun); } @@ -5378,6 +5382,7 @@ static void ggml_vk_sin(ggml_backend_vk_context * ctx, vk_context& subctx, const (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size, 0, 0.0f, 0.0f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, dryrun); } @@ -5391,6 +5396,7 @@ static void ggml_vk_cos(ggml_backend_vk_context * ctx, vk_context& subctx, const (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size, 0, 0.0f, 0.0f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, dryrun); } @@ -5405,6 +5411,7 @@ static void ggml_vk_clamp(ggml_backend_vk_context * ctx, vk_context& subctx, con (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size, 0, op_params[0], op_params[1], + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, dryrun); } @@ -5418,6 +5425,7 @@ static void ggml_vk_pad(ggml_backend_vk_context * ctx, vk_context& subctx, const (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size, 0, 0.0f, 0.0f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, dryrun); } @@ -5431,6 +5439,7 @@ static void ggml_vk_repeat(ggml_backend_vk_context * ctx, vk_context& subctx, co (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size, 0, 0.0f, 0.0f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, dryrun); } @@ -5445,6 +5454,7 @@ static void ggml_vk_cpy(ggml_backend_vk_context * ctx, vk_context& subctx, const (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size, d_offset, 0.0f, 0.0f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, dryrun); } From c37fb4cf62ddf0d33562c4c4a4d6fb45e32ad3b6 Mon Sep 17 00:00:00 2001 From: Srihari-mcw <96763064+Srihari-mcw@users.noreply.github.com> Date: Mon, 9 Dec 2024 23:10:19 +0530 Subject: [PATCH 3/4] Changes to CMakePresets.json to add ninja clang target on windows (#10668) * Update cmakepreset.json to use clang with ninja by default * Update cmakepreset.json to add clang and ninja based configs * Updates to build.md file * Make updates to rename preset targets * Update with .cmake file * Remove additional whitespaces * Add .cmake file for x64-windows-llvm * Update docs/build.md * Update docs/build.md --------- Co-authored-by: Max Krasnyansky --- CMakePresets.json | 12 ++++++++++++ cmake/x64-windows-llvm.cmake | 11 +++++++++++ docs/build.md | 7 +++++++ 3 files changed, 30 insertions(+) create mode 100644 cmake/x64-windows-llvm.cmake diff --git a/CMakePresets.json b/CMakePresets.json index 436448967..13bdd7907 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -31,6 +31,13 @@ { "name": "sycl_f16", "hidden": true, "cacheVariables": { "GGML_SYCL_F16": "ON" } }, { "name": "vulkan", "hidden": true, "cacheVariables": { "GGML_VULKAN": "ON" } }, + { + "name": "x64-windows-llvm", "hidden": true, + "cacheVariables": { + "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/x64-windows-llvm.cmake" + } + }, + { "name": "arm64-windows-msvc", "hidden": true, "architecture": { "value": "arm64", "strategy": "external" }, @@ -70,6 +77,11 @@ { "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg" ] }, { "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg", "static" ] }, + { "name": "x64-windows-llvm-debug", "inherits": [ "base", "x64-windows-llvm", "debug" ] }, + { "name": "x64-windows-llvm-release", "inherits": [ "base", "x64-windows-llvm", "release" ] }, + { "name": "x64-windows-llvm-reldbg", "inherits": [ "base", "x64-windows-llvm", "reldbg" ] }, + { "name": "x64-windows-llvm+static-release", "inherits": [ "base", "x64-windows-llvm", "reldbg", "static" ] }, + { "name": "x64-windows-msvc-debug", "inherits": [ "base", "debug" ] }, { "name": "x64-windows-msvc-release", "inherits": [ "base", "reldbg" ] }, { "name": "x64-windows-msvc+static-release", "inherits": [ "base", "reldbg", "static" ] }, diff --git a/cmake/x64-windows-llvm.cmake b/cmake/x64-windows-llvm.cmake new file mode 100644 index 000000000..0603d738f --- /dev/null +++ b/cmake/x64-windows-llvm.cmake @@ -0,0 +1,11 @@ +set( CMAKE_SYSTEM_NAME Windows ) +set( CMAKE_SYSTEM_PROCESSOR x86_64 ) + +set( CMAKE_C_COMPILER clang ) +set( CMAKE_CXX_COMPILER clang++ ) + +set( arch_c_flags "-march=native" ) + +set( CMAKE_C_FLAGS_INIT "${arch_c_flags}" ) +set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags}" ) + diff --git a/docs/build.md b/docs/build.md index 26e673788..84019b204 100644 --- a/docs/build.md +++ b/docs/build.md @@ -57,6 +57,13 @@ cmake --build build --config Release ``` Building for arm64 can also be done with the MSVC compiler with the build-arm64-windows-MSVC preset, or the standard CMake build instructions. However, note that the MSVC compiler does not support inline ARM assembly code, used e.g. for the accelerated Q4_0_N_M CPU kernels. + For building with ninja generator and clang compiler as default: + -set path:set LIB=C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\um\x64;C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Tools\MSVC\14.41.34120\lib\x64\uwp;C:\Program Files (x86)\Windows Kits\10\Lib\10.0.22621.0\ucrt\x64 + ```bash + cmake --preset x64-windows-llvm-release + cmake --build build-x64-windows-llvm-release + ``` + ## BLAS Build Building the program with BLAS support may lead to some performance improvements in prompt processing using batch sizes higher than 32 (the default is 512). Using BLAS doesn't affect the generation performance. There are currently several different BLAS implementations available for build and use: From 26a8406ba9198eb6fdd8329fa717555b4f77f05f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Mon, 9 Dec 2024 20:07:12 +0100 Subject: [PATCH 4/4] CUDA: fix shared memory access condition for mmv (#10740) --- ggml/src/ggml-cuda/mmv.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-cuda/mmv.cu b/ggml/src/ggml-cuda/mmv.cu index cfe91f428..a4b4f6bc1 100644 --- a/ggml/src/ggml-cuda/mmv.cu +++ b/ggml/src/ggml-cuda/mmv.cu @@ -57,7 +57,7 @@ static __global__ void mul_mat_vec( if (block_size > WARP_SIZE) { buf_iw[tid/WARP_SIZE] = sumf; __syncthreads(); - if (tid > WARP_SIZE) { + if (tid >= WARP_SIZE) { return; } sumf = buf_iw[tid];