From 2a11b6b0946c1abab2ab150725610e5ee736b3af Mon Sep 17 00:00:00 2001 From: bandoti <141645996+bandoti@users.noreply.github.com> Date: Tue, 19 Nov 2024 12:10:30 -0400 Subject: [PATCH 1/7] Add required ggml-base and backend libs to cmake pkg (#10407) --- cmake/llama-config.cmake.in | 52 +++++++++++++++++++++++-------------- ggml/CMakeLists.txt | 8 ++---- ggml/src/CMakeLists.txt | 2 +- 3 files changed, 35 insertions(+), 27 deletions(-) diff --git a/cmake/llama-config.cmake.in b/cmake/llama-config.cmake.in index a7f1efb88..28a8c18b6 100644 --- a/cmake/llama-config.cmake.in +++ b/cmake/llama-config.cmake.in @@ -3,17 +3,11 @@ set(LLAMA_BUILD_COMMIT @LLAMA_BUILD_COMMIT@) set(LLAMA_BUILD_NUMBER @LLAMA_BUILD_NUMBER@) set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@) -set(GGML_BLAS @GGML_BLAS@) -set(GGML_CUDA @GGML_CUDA@) -set(GGML_METAL @GGML_METAL@) -set(GGML_HIP @GGML_HIP@) set(GGML_ACCELERATE @GGML_ACCELERATE@) -set(GGML_VULKAN @GGML_VULKAN@) set(GGML_VULKAN_CHECK_RESULTS @GGML_VULKAN_CHECK_RESULTS@) set(GGML_VULKAN_DEBUG @GGML_VULKAN_DEBUG@) set(GGML_VULKAN_MEMORY_DEBUG @GGML_VULKAN_MEMORY_DEBUG@) set(GGML_VULKAN_VALIDATE @GGML_VULKAN_VALIDATE@) -set(GGML_SYCL @GGML_SYCL@) set(GGML_OPENMP @GGML_OPENMP@) @PACKAGE_INIT@ @@ -22,10 +16,39 @@ set_and_check(LLAMA_INCLUDE_DIR "@PACKAGE_LLAMA_INCLUDE_INSTALL_DIR@") set_and_check(LLAMA_LIB_DIR "@PACKAGE_LLAMA_LIB_INSTALL_DIR@") set_and_check(LLAMA_BIN_DIR "@PACKAGE_LLAMA_BIN_INSTALL_DIR@") -# Ensure transient dependencies satisfied - find_package(Threads REQUIRED) +set(_llama_transient_defines "@GGML_TRANSIENT_DEFINES@") +set(_llama_link_deps "") +foreach(_ggml_lib ggml ggml-base) + string(REPLACE "-" "_" _ggml_lib_var "${_ggml_lib}_LIBRARY") + find_library(${_ggml_lib_var} ${_ggml_lib} + REQUIRED + HINTS ${LLAMA_LIB_DIR} + NO_CMAKE_FIND_ROOT_PATH + ) + list(APPEND _llama_link_deps "${${_ggml_lib_var}}") + message(STATUS "Found ${${_ggml_lib_var}}") +endforeach() + +foreach(backend amx blas cann cpu cuda hip kompute metal musa rpc sycl vulkan) + string(TOUPPER "GGML_${backend}" backend_id) + set(_ggml_lib "ggml-${backend}") + string(REPLACE "-" "_" _ggml_lib_var "${_ggml_lib}_LIBRARY") + + find_library(${_ggml_lib_var} ${_ggml_lib} + HINTS ${LLAMA_LIB_DIR} + NO_CMAKE_FIND_ROOT_PATH + ) + if(${_ggml_lib_var}) + list(APPEND _llama_link_deps "${${_ggml_lib_var}}") + set(${backend_id} ON) + message(STATUS "Found backend ${${_ggml_lib_var}}") + else() + set(${backend_id} OFF) + endif() +endforeach() + if (APPLE AND GGML_ACCELERATE) find_library(ACCELERATE_FRAMEWORK Accelerate REQUIRED) endif() @@ -48,7 +71,7 @@ if (GGML_VULKAN) find_package(Vulkan REQUIRED) endif() -if (GGML_HIPBLAS) +if (GGML_HIP) find_package(hip REQUIRED) find_package(hipblas REQUIRED) find_package(rocblas REQUIRED) @@ -63,24 +86,13 @@ if (GGML_OPENMP) find_package(OpenMP REQUIRED) endif() - -find_library(ggml_LIBRARY ggml - REQUIRED - HINTS ${LLAMA_LIB_DIR} - NO_CMAKE_FIND_ROOT_PATH -) - find_library(llama_LIBRARY llama REQUIRED HINTS ${LLAMA_LIB_DIR} NO_CMAKE_FIND_ROOT_PATH ) -set(_llama_link_deps "${ggml_LIBRARY}" "@GGML_LINK_LIBRARIES@") -set(_llama_transient_defines "@GGML_TRANSIENT_DEFINES@") - add_library(llama UNKNOWN IMPORTED) - set_target_properties(llama PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INCLUDE_DIR}" diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index b16a0e9ad..9ab91421a 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -235,12 +235,8 @@ set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}") #if (GGML_METAL) # set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal") #endif() -install(TARGETS ggml PUBLIC_HEADER) - -if (BUILD_SHARED_LIBS) - install(TARGETS ggml LIBRARY) - install(TARGETS ggml-base LIBRARY) -endif() +install(TARGETS ggml LIBRARY PUBLIC_HEADER) +install(TARGETS ggml-base LIBRARY) # FIXME: this should be done in the backend cmake files if (GGML_METAL) diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index ae7d3abc8..8df0e85c0 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -239,8 +239,8 @@ function(ggml_add_backend backend) if (${BUILD_SHARED_LIBS}) target_compile_definitions(${backend_target} PRIVATE GGML_BACKEND_BUILD) target_compile_definitions(${backend_target} PUBLIC GGML_BACKEND_SHARED) - install(TARGETS ${backend_target} LIBRARY) endif() + install(TARGETS ${backend_target} LIBRARY) target_link_libraries(ggml PUBLIC ${backend_target}) string(TOUPPER "GGML_USE_${backend}" backend_use) target_compile_definitions(ggml PUBLIC ${backend_use}) From 342397dc7edb311e0373205134d0d3a928b891b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=95=AD=E6=BE=A7=E9=82=A6?= <45505768+shou692199@users.noreply.github.com> Date: Wed, 20 Nov 2024 01:42:00 +0800 Subject: [PATCH 2/7] cmake: force MSVC compiler charset to utf-8 (#9989) --- CMakeLists.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 93c60ef43..994e61e45 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,6 +46,13 @@ if (WIN32) add_compile_definitions(_CRT_SECURE_NO_WARNINGS) endif() +if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") + add_compile_options("$<$:/source-charset:utf-8>") + add_compile_options("$<$:/source-charset:utf-8>") + add_compile_options("$<$:/execution-charset:utf-8>") + add_compile_options("$<$:/execution-charset:utf-8>") +endif() + # # option list # From 12b0ad953a59563ea8d973708760d747321d8432 Mon Sep 17 00:00:00 2001 From: PAB Date: Mon, 18 Nov 2024 10:02:49 +0100 Subject: [PATCH 3/7] metal : add `GGML_UNARY_OP_ELU` kernel (ggml/1018) --- ggml/src/ggml-metal/ggml-metal.m | 15 +++++++++++++++ ggml/src/ggml-metal/ggml-metal.metal | 8 ++++++++ 2 files changed, 23 insertions(+) diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m index 58fee4bfd..d1abb3cef 100644 --- a/ggml/src/ggml-metal/ggml-metal.m +++ b/ggml/src/ggml-metal/ggml-metal.m @@ -126,6 +126,7 @@ enum ggml_metal_kernel_type { GGML_METAL_KERNEL_TYPE_GELU_QUICK_4, GGML_METAL_KERNEL_TYPE_SILU, GGML_METAL_KERNEL_TYPE_SILU_4, + GGML_METAL_KERNEL_TYPE_ELU, GGML_METAL_KERNEL_TYPE_SOFT_MAX_F16, GGML_METAL_KERNEL_TYPE_SOFT_MAX_F16_4, GGML_METAL_KERNEL_TYPE_SOFT_MAX_F32, @@ -649,6 +650,7 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GELU_QUICK_4, gelu_quick_4, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SILU, silu, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SILU_4, silu_4, true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ELU, elu, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SOFT_MAX_F16, soft_max_f16, has_simdgroup_reduction); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SOFT_MAX_F16_4, soft_max_f16_4, has_simdgroup_reduction); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SOFT_MAX_F32, soft_max_f32, has_simdgroup_reduction); @@ -968,6 +970,7 @@ static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_contex case GGML_UNARY_OP_GELU: case GGML_UNARY_OP_GELU_QUICK: case GGML_UNARY_OP_SILU: + case GGML_UNARY_OP_ELU: return ggml_is_contiguous(op->src[0]); default: return false; @@ -1589,6 +1592,18 @@ static void ggml_metal_encode_node( [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; } break; + case GGML_UNARY_OP_ELU: + { + id pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ELU].pipeline; + + [encoder setComputePipelineState:pipeline]; + [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0]; + [encoder setBuffer:id_dst offset:offs_dst atIndex:1]; + + const int64_t n = ggml_nelements(dst); + + [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; + } break; default: { GGML_LOG_WARN("%s: node %3d, op = %8s not implemented\n", __func__, idx, ggml_op_name(dst->op)); diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal index 86fdf1c18..819b20ba8 100644 --- a/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal @@ -782,6 +782,14 @@ kernel void kernel_silu_4( dst[tpig] = x / (1.0f + exp(-x)); } +kernel void kernel_elu( + device const float * src0, + device float * dst, + uint tpig[[thread_position_in_grid]]) { + device const float & x = src0[tpig]; + dst[tpig] = (x > 0.0f) ? x : (exp(x) - 1.0f); +} + kernel void kernel_sqr( device const float * src0, device float * dst, From 611fabd7922050e1e99bd276d3544527cd46047b Mon Sep 17 00:00:00 2001 From: Plamen Minev Date: Mon, 18 Nov 2024 15:02:27 +0200 Subject: [PATCH 4/7] metal : fox offset integer overflows in im2col (ggml/1015) -- While running StableDiffusion.cpp locally with Metal some offsets overflow and results in incorrect calculations --- ggml/src/ggml-metal/ggml-metal.metal | 52 ++++++++++++++++++---------- 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal index 819b20ba8..971f5054b 100644 --- a/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal @@ -2145,20 +2145,34 @@ kernel void kernel_im2col( uint3 tgpg[[threadgroups_per_grid]], uint3 tpitg[[thread_position_in_threadgroup]], uint3 ntg[[threads_per_threadgroup]]) { - const int32_t iiw = tgpig[2] * s0 + tpitg[2] * d0 - p0; - const int32_t iih = tgpig[1] * s1 + tpitg[1] * d1 - p1; +// const int64_t IC = tgpg[0]; + const int64_t OH = tgpg[1]; + const int64_t OW = tgpg[2]; - const int32_t offset_dst = - (tpitg[0] * tgpg[1] * tgpg[2] + tgpig[1] * tgpg[2] + tgpig[2]) * CHW + - (tgpig[0] * (ntg[1] * ntg[2]) + tpitg[1] * ntg[2] + tpitg[2]); +// const int64_t N = ntg[0]; + const int64_t KH = ntg[1]; + const int64_t KW = ntg[2]; + + const int64_t in = tpitg[0]; + const int64_t ikh = tpitg[1]; + const int64_t ikw = tpitg[2]; + + const int64_t iic = tgpig[0]; + const int64_t ioh = tgpig[1]; + const int64_t iow = tgpig[2]; + + const int64_t iiw = iow*s0 + ikw*d0 - p0; + const int64_t iih = ioh*s1 + ikh*d1 - p1; + + const int64_t offset_dst = (in*OH*OW + ioh*OW + iow)*CHW + (iic*(KH*KW) + ikh*KW + ikw); device T * pdst = (device T *) (dst); if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) { pdst[offset_dst] = 0.0f; } else { - const int32_t offset_src = tpitg[0] * ofs0 + tgpig[0] * ofs1; - pdst[offset_dst] = x[offset_src + iih * IW + iiw]; + const int64_t offset_src = in*ofs0 + iic*ofs1 + iih*IW + iiw; + pdst[offset_dst] = x[offset_src]; } } @@ -2209,25 +2223,25 @@ kernel void kernel_im2col_ext( uint3 tgpg[[threadgroups_per_grid]], // tgpg[0] = D x IC x KH x KW, CHW = IC x KH x KW uint3 tpitg[[thread_position_in_threadgroup]], uint3 ntg[[threads_per_threadgroup]]) { // [M, 1, 1] - const int32_t KHW = KH * KW; // KHW == ntg[1] * ntg[2], KW == ntg[2] + const int64_t KHW = KH * KW; // KHW == ntg[1] * ntg[2], KW == ntg[2] - const int32_t d = tgpig[0] / CHW; - const int32_t chw = tgpig[0] % CHW; - const int32_t tgpig_0 = chw / KHW; // 0 ~ (IC - 1) - const int32_t HW = tgpig[0] % KHW; + const int64_t d = tgpig[0] / CHW; + const int64_t chw = tgpig[0] % CHW; + const int64_t tgpig_0 = chw / KHW; // 0 ~ (IC - 1) + const int64_t HW = tgpig[0] % KHW; - const int32_t tpitg_0 = (d * ntg[0]) + tpitg[0]; + const int64_t tpitg_0 = (d * ntg[0]) + tpitg[0]; if (tpitg_0 >= N) { return; } - const int32_t tpitg_1 = HW / KW; - const int32_t tpitg_2 = HW % KW; + const int64_t tpitg_1 = HW / KW; + const int64_t tpitg_2 = HW % KW; - const int32_t iiw = tgpig[2] * s0 + tpitg_2 * d0 - p0; - const int32_t iih = tgpig[1] * s1 + tpitg_1 * d1 - p1; + const int64_t iiw = tgpig[2] * s0 + tpitg_2 * d0 - p0; + const int64_t iih = tgpig[1] * s1 + tpitg_1 * d1 - p1; - const int32_t offset_dst = + const int64_t offset_dst = (tpitg_0 * tgpg[1] * tgpg[2] + tgpig[1] * tgpg[2] + tgpig[2]) * CHW + (tgpig_0 * KHW + tpitg_1 * KW + tpitg_2); @@ -2236,7 +2250,7 @@ kernel void kernel_im2col_ext( if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) { pdst[offset_dst] = 0.0f; } else { - const int32_t offset_src = tpitg_0 * ofs0 + tgpig_0 * ofs1; + const int64_t offset_src = tpitg_0 * ofs0 + tgpig_0 * ofs1; pdst[offset_dst] = x[offset_src + iih * IW + iiw]; } } From 9fe0fb062630728e3c21b5839e3bce87bff2440a Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 19 Nov 2024 19:15:50 +0200 Subject: [PATCH 5/7] sync : ggml --- scripts/sync-ggml.last | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/sync-ggml.last b/scripts/sync-ggml.last index 6ddb71ab1..e9bd2dbb0 100644 --- a/scripts/sync-ggml.last +++ b/scripts/sync-ggml.last @@ -1 +1 @@ -9d0708e863f3aa2fc1eb0b75d433303c30bd0dbc +2884dd72fea8922910fe53387c3d17ab928d3a8e From 42ae10bbcd7b56f29a302c86796542a6dadf46c9 Mon Sep 17 00:00:00 2001 From: haopeng <657407891@qq.com> Date: Wed, 20 Nov 2024 04:10:31 +0800 Subject: [PATCH 6/7] add cmake rvv support (#10411) --- ggml/CMakeLists.txt | 1 + ggml/src/ggml-cpu/CMakeLists.txt | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index 9ab91421a..2d32da1b6 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -109,6 +109,7 @@ if (NOT MSVC) endif() option(GGML_LASX "ggml: enable lasx" ON) option(GGML_LSX "ggml: enable lsx" ON) +option(GGML_RVV "ggml: enable rvv" ON) option(GGML_SVE "ggml: enable SVE" OFF) if (WIN32) diff --git a/ggml/src/ggml-cpu/CMakeLists.txt b/ggml/src/ggml-cpu/CMakeLists.txt index cef41a074..288052333 100644 --- a/ggml/src/ggml-cpu/CMakeLists.txt +++ b/ggml/src/ggml-cpu/CMakeLists.txt @@ -244,6 +244,11 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64") if (GGML_LSX) list(APPEND ARCH_FLAGS -mlsx) endif() +elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv64") + message(STATUS "RISC-V detected") + if (GGML_RVV) + list(APPEND ARCH_FLAGS -march=rv64gcv -mabi=lp64d) + endif() else() message(STATUS "Unknown architecture") endif() From 3952a221af54b8a6549bc2bd4a7363ef7ad3081e Mon Sep 17 00:00:00 2001 From: Anthony Van de Gejuchte Date: Tue, 19 Nov 2024 23:18:17 +0100 Subject: [PATCH 7/7] Fix missing file renames in Makefile due to changes in commit ae8de6d50a (#10413) --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 95110d4eb..5c8994385 100644 --- a/Makefile +++ b/Makefile @@ -730,10 +730,10 @@ GLSLC_CMD = glslc _ggml_vk_genshaders_cmd = $(shell pwd)/vulkan-shaders-gen _ggml_vk_header = ggml/src/ggml-vulkan-shaders.hpp _ggml_vk_source = ggml/src/ggml-vulkan-shaders.cpp -_ggml_vk_input_dir = ggml/src/vulkan-shaders +_ggml_vk_input_dir = ggml/src/ggml-vulkan/vulkan-shaders _ggml_vk_shader_deps = $(echo $(_ggml_vk_input_dir)/*.comp) -ggml/src/ggml-vulkan.o: ggml/src/ggml-vulkan.cpp ggml/include/ggml-vulkan.h $(_ggml_vk_header) $(_ggml_vk_source) +ggml/src/ggml-vulkan.o: ggml/src/ggml-vulkan/ggml-vulkan.cpp ggml/include/ggml-vulkan.h $(_ggml_vk_header) $(_ggml_vk_source) $(CXX) $(CXXFLAGS) $(shell pkg-config --cflags vulkan) -c $< -o $@ $(_ggml_vk_header): $(_ggml_vk_source) @@ -745,8 +745,8 @@ $(_ggml_vk_source): $(_ggml_vk_shader_deps) vulkan-shaders-gen --target-hpp $(_ggml_vk_header) \ --target-cpp $(_ggml_vk_source) -vulkan-shaders-gen: ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp - $(CXX) $(CXXFLAGS) -o $@ $(LDFLAGS) ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +vulkan-shaders-gen: ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp + $(CXX) $(CXXFLAGS) -o $@ $(LDFLAGS) ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp endif # GGML_VULKAN