From 888d790d224a3772d413b0363981a0c181d48bec Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sat, 22 Jun 2024 16:51:59 +0300 Subject: [PATCH] cmake : fixes [no ci] --- CMakeLists.txt | 804 +----------------- Makefile | 15 +- ci/run.sh | 2 +- {scripts => cmake}/build-info.cmake | 0 cmake/git-vars.cmake | 22 + .../llama-config.cmake.in | 20 +- common/CMakeLists.txt | 2 +- .../cmake/build-info-gen-cpp.cmake | 4 +- ggml/CMakeLists.txt | 3 +- ggml/src/CMakeLists.txt | 143 ++-- ggml/src/ggml-cuda/fattn-common.cuh | 4 +- scripts/build-info.sh | 10 +- src/CMakeLists.txt | 9 + 13 files changed, 166 insertions(+), 872 deletions(-) rename {scripts => cmake}/build-info.cmake (100%) create mode 100644 cmake/git-vars.cmake rename scripts/LlamaConfig.cmake.in => cmake/llama-config.cmake.in (80%) rename scripts/gen-build-info-cpp.cmake => common/cmake/build-info-gen-cpp.cmake (86%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 435b28062..18297834e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,11 +12,16 @@ if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") endif() +# Add path to modules +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/") + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) set(LLAMA_STANDALONE ON) + include(git-vars) + # configure project version # TODO else() @@ -42,14 +47,14 @@ option(BUILD_SHARED_LIBS "build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT}) # # general -option(LLAMA_CCACHE "llama: use ccache if available" ON) +option(LLAMA_CCACHE "llama: use ccache if available" ON) # debug option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF) # build -option(LLAMA_FATAL_WARNINGS "llama: enable -Werror flag" OFF) +option(LLAMA_FATAL_WARNINGS "llama: enable -Werror flag" OFF) # sanitizers option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF) @@ -62,67 +67,13 @@ option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE}) option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE}) # 3rd party libs -<<<<<<< HEAD -option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) -option(LLAMA_BLAS "llama: use BLAS" ${LLAMA_BLAS_DEFAULT}) -set(LLAMA_BLAS_VENDOR ${LLAMA_BLAS_VENDOR_DEFAULT} CACHE STRING - "llama: BLAS library vendor") -option(LLAMA_LLAMAFILE "llama: use llamafile SGEMM" ${LLAMA_LLAMAFILE_DEFAULT}) -option(LLAMA_CUDA "llama: use CUDA" OFF) -option(LLAMA_CUBLAS "llama: use CUDA (deprecated, use LLAMA_CUDA)" OFF) -option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF) -option(LLAMA_CUDA_FORCE_MMQ "llama: always use mmq kernels instead of cuBLAS" OFF) -option(LLAMA_CUDA_FORCE_CUBLAS "llama: always use cuBLAS instead of mmq kernels" OFF) -set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels") -set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels") -option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some calculations" OFF) -set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K") -set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING - "llama: max. batch size for using peer access") -option(LLAMA_CUDA_NO_PEER_COPY "llama: do not use peer to peer copies" OFF) -option(LLAMA_CUDA_NO_VMM "llama: do not try to use CUDA VMM" OFF) -option(LLAMA_CUDA_FA_ALL_QUANTS "llama: compile all quants for FlashAttention" OFF) - -option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF) -option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF) -option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF) -option(LLAMA_VULKAN "llama: use Vulkan" OFF) -option(LLAMA_VULKAN_CHECK_RESULTS "llama: run Vulkan op checks" OFF) -option(LLAMA_VULKAN_DEBUG "llama: enable Vulkan debug output" OFF) -option(LLAMA_VULKAN_MEMORY_DEBUG "llama: enable Vulkan memory debug output" OFF) -option(LLAMA_VULKAN_VALIDATE "llama: enable Vulkan validation" OFF) -option(LLAMA_VULKAN_RUN_TESTS "llama: run Vulkan tests" OFF) -option(LLAMA_METAL "llama: use Metal" ${LLAMA_METAL_DEFAULT}) -option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging" OFF) -option(LLAMA_METAL_SHADER_DEBUG "llama: compile Metal with -fno-fast-math" OFF) -option(LLAMA_METAL_EMBED_LIBRARY "llama: embed Metal library" OFF) -set(LLAMA_METAL_MACOSX_VERSION_MIN "" CACHE STRING - "llama: metal minimum macOS version") -set(LLAMA_METAL_STD "" CACHE STRING "llama: metal standard version (-std flag)") -option(LLAMA_KOMPUTE "llama: use Kompute" OFF) -option(LLAMA_RPC "llama: use RPC" OFF) -option(LLAMA_OPENMP "llama: use OpenMP" ON) -option(LLAMA_SYCL "llama: use SYCL" OFF) -option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF) -set(LLAMA_SYCL_TARGET "INTEL" CACHE STRING "llama: sycl target device") -option(LLAMA_CPU_HBM "llama: use memkind for CPU HBM" OFF) -set(LLAMA_SCHED_MAX_COPIES "4" CACHE STRING "llama: max input copies for pipeline parallelism") - -option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE}) -option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE}) -option(LLAMA_BUILD_SERVER "llama: build server example" ON) -option(LLAMA_LASX "llama: enable lasx" ON) -option(LLAMA_LSX "llama: enable lsx" ON) -======= option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF) ->>>>>>> 9839374f (files : relocate [no ci]) # Required for relocatable CMake package -include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake) +include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake) # override ggml options set(GGML_CCACHE ${LLAMA_CCACHE}) -set(GGML_BUILD_SHARED_LIBS ${LLAMA_BUILD_SHARED_LIBS}) set(GGML_SANITIZE_THREAD ${LLAMA_SANITIZE_THREAD}) set(GGML_SANITIZE_ADDRESS ${LLAMA_SANITIZE_ADDRESS}) set(GGML_SANITIZE_UNDEFINED ${LLAMA_SANITIZE_UNDEFINED}) @@ -136,731 +87,6 @@ function (llama_option_depr TYPE OLD NEW) message(${TYPE} "${OLD} is deprecated and will be removed in the future.\nUse ${NEW} instead\n") set(${NEW} ON) endif() -<<<<<<< HEAD - - if (LLAMA_SANITIZE_ADDRESS) - add_compile_options(-fsanitize=address -fno-omit-frame-pointer) - link_libraries (-fsanitize=address) - endif() - - if (LLAMA_SANITIZE_UNDEFINED) - add_compile_options(-fsanitize=undefined) - link_libraries (-fsanitize=undefined) - endif() -endif() - -if (APPLE AND LLAMA_ACCELERATE) - find_library(ACCELERATE_FRAMEWORK Accelerate) - if (ACCELERATE_FRAMEWORK) - message(STATUS "Accelerate framework found") - - add_compile_definitions(GGML_USE_ACCELERATE) - add_compile_definitions(ACCELERATE_NEW_LAPACK) - add_compile_definitions(ACCELERATE_LAPACK_ILP64) - set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK}) - else() - message(WARNING "Accelerate framework not found") - endif() -endif() - -if (LLAMA_METAL) - find_library(FOUNDATION_LIBRARY Foundation REQUIRED) - find_library(METAL_FRAMEWORK Metal REQUIRED) - find_library(METALKIT_FRAMEWORK MetalKit REQUIRED) - - message(STATUS "Metal framework found") - set(GGML_HEADERS_METAL ggml-metal.h) - set(GGML_SOURCES_METAL ggml-metal.m) - - add_compile_definitions(GGML_USE_METAL) - if (LLAMA_METAL_NDEBUG) - add_compile_definitions(GGML_METAL_NDEBUG) - endif() - - # copy ggml-common.h and ggml-metal.metal to bin directory - configure_file(ggml-common.h ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h COPYONLY) - configure_file(ggml-metal.metal ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal COPYONLY) - - if (LLAMA_METAL_EMBED_LIBRARY) - enable_language(ASM) - add_compile_definitions(GGML_METAL_EMBED_LIBRARY) - - set(METALLIB_COMMON "${CMAKE_CURRENT_SOURCE_DIR}/ggml-common.h") - set(METALLIB_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal") - - file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/autogenerated") - - # merge ggml-common.h and ggml-metal.metal into a single file - set(METALLIB_EMBED_ASM "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.s") - set(METALLIB_SOURCE_EMBED "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.metal") - - add_custom_command( - OUTPUT ${METALLIB_EMBED_ASM} - COMMAND echo "Embedding Metal library" - COMMAND sed -e '/\#include \"ggml-common.h\"/r ${METALLIB_COMMON}' -e '/\#include \"ggml-common.h\"/d' < ${METALLIB_SOURCE} > ${METALLIB_SOURCE_EMBED} - COMMAND echo ".section __DATA,__ggml_metallib" > ${METALLIB_EMBED_ASM} - COMMAND echo ".globl _ggml_metallib_start" >> ${METALLIB_EMBED_ASM} - COMMAND echo "_ggml_metallib_start:" >> ${METALLIB_EMBED_ASM} - COMMAND echo ".incbin \\\"${METALLIB_SOURCE_EMBED}\\\"" >> ${METALLIB_EMBED_ASM} - COMMAND echo ".globl _ggml_metallib_end" >> ${METALLIB_EMBED_ASM} - COMMAND echo "_ggml_metallib_end:" >> ${METALLIB_EMBED_ASM} - DEPENDS ggml-metal.metal ggml-common.h - COMMENT "Generate assembly for embedded Metal library" - ) - - set(GGML_SOURCES_METAL ${GGML_SOURCES_METAL} ${METALLIB_EMBED_ASM}) - else() - if (LLAMA_METAL_SHADER_DEBUG) - # custom command to do the following: - # xcrun -sdk macosx metal -fno-fast-math -c ggml-metal.metal -o ggml-metal.air - # xcrun -sdk macosx metallib ggml-metal.air -o default.metallib - # - # note: this is the only way I found to disable fast-math in Metal. it's ugly, but at least it works - # disabling fast math is needed in order to pass tests/test-backend-ops - # note: adding -fno-inline fixes the tests when using MTL_SHADER_VALIDATION=1 - # note: unfortunately, we have to call it default.metallib instead of ggml.metallib - # ref: https://github.com/ggerganov/whisper.cpp/issues/1720 - set(XC_FLAGS -fno-fast-math -fno-inline -g) - else() - set(XC_FLAGS -O3) - endif() - - # Append macOS metal versioning flags - if (LLAMA_METAL_MACOSX_VERSION_MIN) - message(STATUS "Adding -mmacosx-version-min=${LLAMA_METAL_MACOSX_VERSION_MIN} flag to metal compilation") - list(APPEND XC_FLAGS -mmacosx-version-min=${LLAMA_METAL_MACOSX_VERSION_MIN}) - endif() - if (LLAMA_METAL_STD) - message(STATUS "Adding -std=${LLAMA_METAL_STD} flag to metal compilation") - list(APPEND XC_FLAGS -std=${LLAMA_METAL_STD}) - endif() - - add_custom_command( - OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib - COMMAND xcrun -sdk macosx metal ${XC_FLAGS} -c ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air - COMMAND xcrun -sdk macosx metallib ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib - COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air - COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h - COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal - DEPENDS ggml-metal.metal ggml-common.h - COMMENT "Compiling Metal kernels" - ) - - add_custom_target( - ggml-metal ALL - DEPENDS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib - ) - endif() # LLAMA_METAL_EMBED_LIBRARY - - set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} - ${FOUNDATION_LIBRARY} - ${METAL_FRAMEWORK} - ${METALKIT_FRAMEWORK} - ) -endif() - -if (LLAMA_OPENMP) - find_package(OpenMP) - if (OpenMP_FOUND) - message(STATUS "OpenMP found") - add_compile_definitions(GGML_USE_OPENMP) - set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} OpenMP::OpenMP_C OpenMP::OpenMP_CXX) - else() - message(WARNING "OpenMP not found") - endif() -endif() - -if (LLAMA_BLAS) - if (LLAMA_STATIC) - set(BLA_STATIC ON) - endif() - #if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22) - # set(BLA_SIZEOF_INTEGER 8) - #endif() - - set(BLA_VENDOR ${LLAMA_BLAS_VENDOR}) - find_package(BLAS) - - if (BLAS_FOUND) - message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}") - - if (("${BLAS_INCLUDE_DIRS}" STREQUAL "") AND NOT (${LLAMA_BLAS_VENDOR} MATCHES "Apple")) - # BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake. - # see https://gitlab.kitware.com/cmake/cmake/-/issues/20268 - find_package(PkgConfig REQUIRED) - if (${LLAMA_BLAS_VENDOR} MATCHES "Generic") - pkg_check_modules(DepBLAS REQUIRED blas) - elseif (${LLAMA_BLAS_VENDOR} MATCHES "OpenBLAS") - # As of openblas v0.3.22, the 64-bit is named openblas64.pc - pkg_check_modules(DepBLAS openblas64) - if (NOT DepBLAS_FOUND) - pkg_check_modules(DepBLAS REQUIRED openblas) - endif() - elseif (${LLAMA_BLAS_VENDOR} MATCHES "FLAME") - pkg_check_modules(DepBLAS REQUIRED blis) - elseif (${LLAMA_BLAS_VENDOR} MATCHES "ATLAS") - pkg_check_modules(DepBLAS REQUIRED blas-atlas) - elseif (${LLAMA_BLAS_VENDOR} MATCHES "FlexiBLAS") - pkg_check_modules(DepBLAS REQUIRED flexiblas_api) - elseif (${LLAMA_BLAS_VENDOR} MATCHES "Intel") - # all Intel* libraries share the same include path - pkg_check_modules(DepBLAS REQUIRED mkl-sdl) - elseif (${LLAMA_BLAS_VENDOR} MATCHES "NVHPC") - # this doesn't provide pkg-config - # suggest to assign BLAS_INCLUDE_DIRS on your own - if ("${NVHPC_VERSION}" STREQUAL "") - message(WARNING "Better to set NVHPC_VERSION") - else() - set(DepBLAS_FOUND ON) - set(DepBLAS_INCLUDE_DIRS "/opt/nvidia/hpc_sdk/${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR}/${NVHPC_VERSION}/math_libs/include") - endif() - endif() - if (DepBLAS_FOUND) - set(BLAS_INCLUDE_DIRS ${DepBLAS_INCLUDE_DIRS}) - else() - message(WARNING "BLAS_INCLUDE_DIRS neither been provided nor been automatically" - " detected by pkgconfig, trying to find cblas.h from possible paths...") - find_path(BLAS_INCLUDE_DIRS - NAMES cblas.h - HINTS - /usr/include - /usr/local/include - /usr/include/openblas - /opt/homebrew/opt/openblas/include - /usr/local/opt/openblas/include - /usr/include/x86_64-linux-gnu/openblas/include - ) - endif() - endif() - - message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}") - - add_compile_options(${BLAS_LINKER_FLAGS}) - - add_compile_definitions(GGML_USE_BLAS) - - if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${LLAMA_BLAS_VENDOR} MATCHES "Generic" OR ${LLAMA_BLAS_VENDOR} MATCHES "Intel")) - add_compile_definitions(GGML_BLAS_USE_MKL) - endif() - - set(GGML_HEADERS_BLAS ggml-blas.h) - set(GGML_SOURCES_BLAS ggml-blas.cpp) - - set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${BLAS_LIBRARIES}) - set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${BLAS_INCLUDE_DIRS}) - else() - message(WARNING "BLAS not found, please refer to " - "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors" - " to set correct LLAMA_BLAS_VENDOR") - endif() -endif() - -if (LLAMA_LLAMAFILE) - add_compile_definitions(GGML_USE_LLAMAFILE) - - set(GGML_HEADERS_LLAMAFILE sgemm.h) - set(GGML_SOURCES_LLAMAFILE sgemm.cpp) -endif() - -if (LLAMA_CUBLAS) - message(WARNING "LLAMA_CUBLAS is deprecated and will be removed in the future.\nUse LLAMA_CUDA instead") - set(LLAMA_CUDA ON) -endif() - -if (LLAMA_CUDA) - cmake_minimum_required(VERSION 3.18) # for CMAKE_CUDA_ARCHITECTURES - - find_package(CUDAToolkit) - if (CUDAToolkit_FOUND) - message(STATUS "CUDA found") - - if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES) - # 52 == lowest CUDA 12 standard - # 60 == FP16 CUDA intrinsics - # 61 == integer CUDA intrinsics - # 70 == FP16 tensor cores - # 75 == int8 tensor cores - if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16) - set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75") - else() - set(CMAKE_CUDA_ARCHITECTURES "52;61;70;75") - #set(CMAKE_CUDA_ARCHITECTURES "OFF") # use this to compile much faster, but only F16 models work - endif() - endif() - message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}") - - enable_language(CUDA) - - set(GGML_HEADERS_CUDA ggml-cuda.h) - - file(GLOB GGML_SOURCES_CUDA "ggml-cuda/*.cu") - list(APPEND GGML_SOURCES_CUDA "ggml-cuda.cu") - file(GLOB SRCS "ggml-cuda/template-instances/fattn-wmma*.cu") - list(APPEND GGML_SOURCES_CUDA ${SRCS}) - file(GLOB SRCS "ggml-cuda/template-instances/mmq*.cu") - list(APPEND GGML_SOURCES_CUDA ${SRCS}) - - add_compile_definitions(GGML_USE_CUDA) - add_compile_definitions(GGML_CUDA_USE_GRAPHS) - if (LLAMA_CUDA_FORCE_DMMV) - add_compile_definitions(GGML_CUDA_FORCE_DMMV) - endif() - if (LLAMA_CUDA_FORCE_MMQ) - add_compile_definitions(GGML_CUDA_FORCE_MMQ) - endif() - if (LLAMA_CUDA_FORCE_CUBLAS) - add_compile_definitions(GGML_CUDA_FORCE_CUBLAS) - endif() - if (LLAMA_CUDA_NO_VMM) - add_compile_definitions(GGML_CUDA_NO_VMM) - endif() - add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X}) - add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y}) - if (DEFINED LLAMA_CUDA_DMMV_Y) - add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_DMMV_Y}) # for backwards compatibility - endif() - if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16) - add_compile_definitions(GGML_CUDA_F16) - endif() - add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER}) - add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${LLAMA_CUDA_PEER_MAX_BATCH_SIZE}) - if (LLAMA_CUDA_NO_PEER_COPY) - add_compile_definitions(GGML_CUDA_NO_PEER_COPY) - endif() - if (LLAMA_CUDA_FA_ALL_QUANTS) - file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*.cu") - list(APPEND GGML_SOURCES_CUDA ${SRCS}) - add_compile_definitions(GGML_CUDA_FA_ALL_QUANTS) - else() - file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu") - list(APPEND GGML_SOURCES_CUDA ${SRCS}) - file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu") - list(APPEND GGML_SOURCES_CUDA ${SRCS}) - file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*f16-f16.cu") - list(APPEND GGML_SOURCES_CUDA ${SRCS}) - endif() - - if (LLAMA_STATIC) - if (WIN32) - # As of 12.3.1 CUDA Toolkit for Windows does not offer a static cublas library - set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas CUDA::cublasLt) - else () - set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static) - endif() - else() - set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt) - endif() - - if (LLAMA_CUDA_NO_VMM) - # No VMM requested, no need to link directly with the cuda driver lib (libcuda.so) - else() - set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cuda_driver) # required by cuDeviceGetAttribute(), cuMemGetAllocationGranularity(...), ... - endif() - else() - message(WARNING "CUDA not found") - endif() -endif() - -if (LLAMA_RPC) - add_compile_definitions(GGML_USE_RPC) - - if (WIN32) - set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ws2_32) - endif() - - set(GGML_HEADERS_RPC ggml-rpc.h) - set(GGML_SOURCES_RPC ggml-rpc.cpp) -endif() - -if (LLAMA_VULKAN) - find_package(Vulkan) - if (Vulkan_FOUND) - message(STATUS "Vulkan found") - - set(GGML_HEADERS_VULKAN ggml-vulkan.h) - set(GGML_SOURCES_VULKAN ggml-vulkan.cpp) - - add_compile_definitions(GGML_USE_VULKAN) - - # Workaround to the "can't dereference invalidated vector iterator" bug in clang-cl debug build - # Posssibly relevant: https://stackoverflow.com/questions/74748276/visual-studio-no-displays-the-correct-length-of-stdvector - if (MSVC AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - add_compile_definitions(_ITERATOR_DEBUG_LEVEL=0) - endif() - - if (LLAMA_VULKAN_CHECK_RESULTS) - add_compile_definitions(GGML_VULKAN_CHECK_RESULTS) - endif() - - if (LLAMA_VULKAN_DEBUG) - add_compile_definitions(GGML_VULKAN_DEBUG) - endif() - - if (LLAMA_VULKAN_MEMORY_DEBUG) - add_compile_definitions(GGML_VULKAN_MEMORY_DEBUG) - endif() - - if (LLAMA_VULKAN_VALIDATE) - add_compile_definitions(GGML_VULKAN_VALIDATE) - endif() - - if (LLAMA_VULKAN_RUN_TESTS) - add_compile_definitions(GGML_VULKAN_RUN_TESTS) - endif() - - set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} Vulkan::Vulkan) - else() - message(WARNING "Vulkan not found") - endif() -endif() - -if (LLAMA_HIPBLAS) - if (NOT EXISTS $ENV{ROCM_PATH}) - if (NOT EXISTS /opt/rocm) - set(ROCM_PATH /usr) - else() - set(ROCM_PATH /opt/rocm) - endif() - else() - set(ROCM_PATH $ENV{ROCM_PATH}) - endif() - list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}) - list(APPEND CMAKE_PREFIX_PATH "${ROCM_PATH}/lib64/cmake") - - # CMake on Windows doesn't support the HIP language yet - if(WIN32) - set(CXX_IS_HIPCC TRUE) - else() - string(REGEX MATCH "hipcc(\.bat)?$" CXX_IS_HIPCC "${CMAKE_CXX_COMPILER}") - endif() - - if(CXX_IS_HIPCC) - if(LINUX) - if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang") - message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++") - endif() - - message(WARNING "Setting hipcc as the C++ compiler is legacy behavior." - " Prefer setting the HIP compiler directly. See README for details.") - endif() - else() - # Forward AMDGPU_TARGETS to CMAKE_HIP_ARCHITECTURES. - if(AMDGPU_TARGETS AND NOT CMAKE_HIP_ARCHITECTURES) - set(CMAKE_HIP_ARCHITECTURES ${AMDGPU_TARGETS}) - endif() - cmake_minimum_required(VERSION 3.21) - enable_language(HIP) - endif() - find_package(hip REQUIRED) - find_package(hipblas REQUIRED) - find_package(rocblas REQUIRED) - - message(STATUS "HIP and hipBLAS found") - - set(GGML_HEADERS_ROCM ggml-cuda.h) - - file(GLOB GGML_SOURCES_ROCM "ggml-cuda/*.cu") - list(APPEND GGML_SOURCES_ROCM "ggml-cuda.cu") - file(GLOB SRCS "ggml-cuda/template-instances/fattn-wmma*.cu") - list(APPEND GGML_SOURCES_ROCM ${SRCS}) - file(GLOB SRCS "ggml-cuda/template-instances/mmq*.cu") - list(APPEND GGML_SOURCES_ROCM ${SRCS}) - - add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUDA) - - if (LLAMA_HIP_UMA) - add_compile_definitions(GGML_HIP_UMA) - endif() - - if (LLAMA_CUDA_FORCE_DMMV) - add_compile_definitions(GGML_CUDA_FORCE_DMMV) - endif() - - if (LLAMA_CUDA_FORCE_MMQ) - add_compile_definitions(GGML_CUDA_FORCE_MMQ) - endif() - - if (LLAMA_CUDA_NO_PEER_COPY) - add_compile_definitions(GGML_CUDA_NO_PEER_COPY) - endif() - - if (LLAMA_CUDA_FA_ALL_QUANTS) - file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*.cu") - list(APPEND GGML_SOURCES_ROCM ${SRCS}) - add_compile_definitions(GGML_CUDA_FA_ALL_QUANTS) - else() - file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu") - list(APPEND GGML_SOURCES_ROCM ${SRCS}) - file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu") - list(APPEND GGML_SOURCES_ROCM ${SRCS}) - file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*f16-f16.cu") - list(APPEND GGML_SOURCES_ROCM ${SRCS}) - endif() - - add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X}) - add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y}) - add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER}) - - if (CXX_IS_HIPCC) - set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX) - set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} hip::device) - else() - set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE HIP) - endif() - - if (LLAMA_STATIC) - message(FATAL_ERROR "Static linking not supported for HIP/ROCm") - endif() - - set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} PUBLIC hip::host roc::rocblas roc::hipblas) -endif() - -if (LLAMA_SYCL) - if (NOT LLAMA_SYCL_TARGET MATCHES "^(INTEL|NVIDIA)$") - message(FATAL_ERROR "Invalid backend chosen, supported options are INTEL or NVIDIA") - endif() - - if ( NOT DEFINED ENV{ONEAPI_ROOT}) - message(FATAL_ERROR "Not detect ENV {ONEAPI_ROOT}, please install oneAPI & source it, like: source /opt/intel/oneapi/setvars.sh") - endif() - #todo: AOT - - find_package(IntelSYCL REQUIRED) - find_package(MKL REQUIRED) - - message(STATUS "SYCL found") - - add_compile_definitions(GGML_USE_SYCL) - - if (LLAMA_SYCL_F16) - add_compile_definitions(GGML_SYCL_F16) - endif() - - if (LLAMA_CUDA_FORCE_MMQ) - add_compile_definitions(GGML_SYCL_FORCE_MMQ) - endif() - - add_compile_options(-I./) #include DPCT - - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") - if (LLAMA_SYCL_TARGET STREQUAL "NVIDIA") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda") - endif() - - set(GGML_HEADERS_SYCL ggml-sycl.h) - file(GLOB GGML_SOURCES_SYCL "ggml-sycl/*.cpp") - list(APPEND GGML_SOURCES_SYCL "ggml-sycl.cpp") - - if (WIN32) - set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL) - else() - add_compile_options(-I/${SYCL_INCLUDE_DIR}) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib") - if (LLAMA_SYCL_TARGET STREQUAL "INTEL") - set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread) - elseif (LLAMA_SYCL_TARGET STREQUAL "NVIDIA") - set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl pthread m dl onemkl) - endif() - endif() -endif() - -if (LLAMA_KOMPUTE) - add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1) - find_package(Vulkan COMPONENTS glslc REQUIRED) - find_program(glslc_executable NAMES glslc HINTS Vulkan::glslc) - if (NOT glslc_executable) - message(FATAL_ERROR "glslc not found") - endif() - - function(compile_shader) - set(options) - set(oneValueArgs) - set(multiValueArgs SOURCES) - cmake_parse_arguments(compile_shader "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - foreach(source ${compile_shader_SOURCES}) - get_filename_component(filename ${source} NAME) - set(spv_file ${filename}.spv) - add_custom_command( - OUTPUT ${spv_file} - DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${source} - ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/common.comp - ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_getrows.comp - ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n_pre.comp - ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n.comp - COMMAND ${glslc_executable} --target-env=vulkan1.2 -o ${spv_file} ${CMAKE_CURRENT_SOURCE_DIR}/${source} - COMMENT "Compiling ${source} to ${spv_file}" - ) - - get_filename_component(RAW_FILE_NAME ${spv_file} NAME) - set(FILE_NAME "shader${RAW_FILE_NAME}") - string(REPLACE ".comp.spv" ".h" HEADER_FILE ${FILE_NAME}) - string(TOUPPER ${HEADER_FILE} HEADER_FILE_DEFINE) - string(REPLACE "." "_" HEADER_FILE_DEFINE "${HEADER_FILE_DEFINE}") - set(OUTPUT_HEADER_FILE "${HEADER_FILE}") - message(STATUS "${HEADER_FILE} generating ${HEADER_FILE_DEFINE}") - if(CMAKE_GENERATOR MATCHES "Visual Studio") - add_custom_command( - OUTPUT ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_BINARY_DIR}/bin/$/xxd -i ${RAW_FILE_NAME} >> ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE} - DEPENDS ${spv_file} xxd - COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/$/xxd" - ) - else() - add_custom_command( - OUTPUT ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_BINARY_DIR}/bin/xxd -i ${RAW_FILE_NAME} >> ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE} - DEPENDS ${spv_file} xxd - COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/xxd" - ) - endif() - endforeach() - endfunction() - - if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/kompute/CMakeLists.txt") - message(STATUS "Kompute found") - set(KOMPUTE_OPT_LOG_LEVEL Error CACHE STRING "Kompute log level") - add_subdirectory(kompute) - - # Compile our shaders - compile_shader(SOURCES - kompute-shaders/op_scale.comp - kompute-shaders/op_scale_8.comp - kompute-shaders/op_add.comp - kompute-shaders/op_addrow.comp - kompute-shaders/op_mul.comp - kompute-shaders/op_silu.comp - kompute-shaders/op_relu.comp - kompute-shaders/op_gelu.comp - kompute-shaders/op_softmax.comp - kompute-shaders/op_norm.comp - kompute-shaders/op_rmsnorm.comp - kompute-shaders/op_diagmask.comp - kompute-shaders/op_mul_mat_mat_f32.comp - kompute-shaders/op_mul_mat_f16.comp - kompute-shaders/op_mul_mat_q8_0.comp - kompute-shaders/op_mul_mat_q4_0.comp - kompute-shaders/op_mul_mat_q4_1.comp - kompute-shaders/op_mul_mat_q6_k.comp - kompute-shaders/op_getrows_f32.comp - kompute-shaders/op_getrows_f16.comp - kompute-shaders/op_getrows_q4_0.comp - kompute-shaders/op_getrows_q4_1.comp - kompute-shaders/op_getrows_q6_k.comp - kompute-shaders/op_rope_f16.comp - kompute-shaders/op_rope_f32.comp - kompute-shaders/op_cpy_f16_f16.comp - kompute-shaders/op_cpy_f16_f32.comp - kompute-shaders/op_cpy_f32_f16.comp - kompute-shaders/op_cpy_f32_f32.comp - ) - - # Create a custom target for our generated shaders - add_custom_target(generated_shaders DEPENDS - shaderop_scale.h - shaderop_scale_8.h - shaderop_add.h - shaderop_addrow.h - shaderop_mul.h - shaderop_silu.h - shaderop_relu.h - shaderop_gelu.h - shaderop_softmax.h - shaderop_norm.h - shaderop_rmsnorm.h - shaderop_diagmask.h - shaderop_mul_mat_mat_f32.h - shaderop_mul_mat_f16.h - shaderop_mul_mat_q8_0.h - shaderop_mul_mat_q4_0.h - shaderop_mul_mat_q4_1.h - shaderop_mul_mat_q6_k.h - shaderop_getrows_f32.h - shaderop_getrows_f16.h - shaderop_getrows_q4_0.h - shaderop_getrows_q4_1.h - shaderop_getrows_q6_k.h - shaderop_rope_f16.h - shaderop_rope_f32.h - shaderop_cpy_f16_f16.h - shaderop_cpy_f16_f32.h - shaderop_cpy_f32_f16.h - shaderop_cpy_f32_f32.h - ) - - # Create a custom command that depends on the generated_shaders - add_custom_command( - OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp - COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp - DEPENDS generated_shaders - COMMENT "Ensuring shaders are generated before compiling ggml-kompute.cpp" - ) - - # Add the stamp to the main sources to ensure dependency tracking - set(GGML_SOURCES_KOMPUTE ggml-kompute.cpp ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp) - set(GGML_HEADERS_KOMPUTE ggml-kompute.h ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp) - - add_compile_definitions(GGML_USE_KOMPUTE) - - set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} kompute) - set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${CMAKE_BINARY_DIR}) - else() - message(WARNING "Kompute not found") - endif() -endif() - -if (LLAMA_CPU_HBM) - find_library(memkind memkind REQUIRED) - - add_compile_definitions(GGML_USE_CPU_HBM) - - target_link_libraries(ggml PUBLIC memkind) -endif() - -function(get_flags CCID CCVER) - set(C_FLAGS "") - set(CXX_FLAGS "") - - if (CCID MATCHES "Clang") - set(C_FLAGS -Wunreachable-code-break -Wunreachable-code-return) - set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi) - - if ( - (CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR - (CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0) - ) - list(APPEND C_FLAGS -Wdouble-promotion) - endif() - elseif (CCID STREQUAL "GNU") - set(C_FLAGS -Wdouble-promotion) - set(CXX_FLAGS -Wno-array-bounds) - - if (CCVER VERSION_GREATER_EQUAL 7.1.0) - list(APPEND CXX_FLAGS -Wno-format-truncation) - endif() - if (CCVER VERSION_GREATER_EQUAL 8.1.0) - list(APPEND CXX_FLAGS -Wextra-semi) - endif() - endif() - - set(GF_C_FLAGS ${C_FLAGS} PARENT_SCOPE) - set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE) -======= ->>>>>>> 9839374f (files : relocate [no ci]) endfunction() llama_option_depr(FATAL_ERROR LLAMA_CUBLAS GGML_CUDA) @@ -902,21 +128,21 @@ set_target_properties(llama PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR} install(TARGETS llama LIBRARY PUBLIC_HEADER) configure_package_config_file( - ${CMAKE_CURRENT_SOURCE_DIR}/scripts/LlamaConfig.cmake.in - ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfig.cmake - INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/Llama + ${CMAKE_CURRENT_SOURCE_DIR}/cmake/llama-config.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/llama-config.cmake + INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/llama PATH_VARS LLAMA_INCLUDE_INSTALL_DIR LLAMA_LIB_INSTALL_DIR LLAMA_BIN_INSTALL_DIR ) write_basic_package_version_file( - ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfigVersion.cmake + ${CMAKE_CURRENT_BINARY_DIR}/llama-version.cmake VERSION ${LLAMA_INSTALL_VERSION} COMPATIBILITY SameMajorVersion) -install(FILES ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfig.cmake - ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfigVersion.cmake - DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/Llama) +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/llama-config.cmake + ${CMAKE_CURRENT_BINARY_DIR}/llama-version.cmake + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/llama) install( FILES convert-hf-to-gguf.py diff --git a/Makefile b/Makefile index b5acffec7..88084cac7 100644 --- a/Makefile +++ b/Makefile @@ -81,11 +81,6 @@ GGML_METAL := 1 DEPRECATE_WARNING := 1 endif -ifdef LLAMA_METAL_EMBED_LIBRARY -GGML_METAL_EMBED_LIBRARY := 1 -DEPRECATE_WARNING := 1 -endif - ifdef LLAMA_OPENMP GGML_OPENMP := 1 DEPRECATE_WARNING := 1 @@ -183,6 +178,10 @@ ifeq ($(UNAME_S),Darwin) endif endif +ifdef GGML_METAL + GGML_METAL_EMBED_LIBRARY := 1 +endif + ifdef GGML_RPC BUILD_TARGETS += rpc-server endif @@ -1064,11 +1063,10 @@ $(LIB_COMMON_S): \ ar rcs $(LIB_COMMON_S) $^ clean: - rm -vrf ggml/src/*.o src/*.o tests/*.o common/*.o *.a *.so ggml*.so *.dll common/build-info.cpp *.dot $(BUILD_TARGETS) $(TEST_TARGETS) - rm -vrf ggml/src/*.o + rm -vrf *.dot $(BUILD_TARGETS) $(TEST_TARGETS) rm -rvf src/*.o rm -rvf tests/*.o - rm -rvf common/*.o + rm -rvf examples/*.o rm -rvf *.a rm -rvf *.dll rm -rvf *.so @@ -1076,6 +1074,7 @@ clean: rm -rvf ggml/*.a rm -rvf ggml/*.dll rm -rvf ggml/*.so + rm -vrf ggml/src/*.o rm -rvf common/build-info.cpp rm -vrf ggml/src/ggml-metal-embed.metal rm -vrf ggml/src/ggml-cuda/*.o diff --git a/ci/run.sh b/ci/run.sh index 9b46f26bc..e0cedb24f 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -36,7 +36,7 @@ SRC=`pwd` CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=ON" if [ ! -z ${GG_BUILD_METAL} ]; then - CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON -DGGML_METAL_EMBED_LIBRARY=ON" + CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON" fi if [ ! -z ${GG_BUILD_CUDA} ]; then diff --git a/scripts/build-info.cmake b/cmake/build-info.cmake similarity index 100% rename from scripts/build-info.cmake rename to cmake/build-info.cmake diff --git a/cmake/git-vars.cmake b/cmake/git-vars.cmake new file mode 100644 index 000000000..1a4c24ebf --- /dev/null +++ b/cmake/git-vars.cmake @@ -0,0 +1,22 @@ +find_package(Git) + +# the commit's SHA1 +execute_process(COMMAND + "${GIT_EXECUTABLE}" describe --match=NeVeRmAtCh --always --abbrev=8 + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" + OUTPUT_VARIABLE GIT_SHA1 + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + +# the date of the commit +execute_process(COMMAND + "${GIT_EXECUTABLE}" log -1 --format=%ad --date=local + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" + OUTPUT_VARIABLE GIT_DATE + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + +# the subject of the commit +execute_process(COMMAND + "${GIT_EXECUTABLE}" log -1 --format=%s + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" + OUTPUT_VARIABLE GIT_COMMIT_SUBJECT + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) diff --git a/scripts/LlamaConfig.cmake.in b/cmake/llama-config.cmake.in similarity index 80% rename from scripts/LlamaConfig.cmake.in rename to cmake/llama-config.cmake.in index fd2aae288..2e7da2f8e 100644 --- a/scripts/LlamaConfig.cmake.in +++ b/cmake/llama-config.cmake.in @@ -1,22 +1,24 @@ -set(LLAMA_VERSION @LLAMA_INSTALL_VERSION@) +set(LLAMA_VERSION @LLAMA_INSTALL_VERSION@) set(LLAMA_BUILD_COMMIT @LLAMA_BUILD_COMMIT@) set(LLAMA_BUILD_NUMBER @LLAMA_BUILD_NUMBER@) -set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@) -set(GGML_BLAS @GGML_BLAS@) -set(GGML_CUDA @GGML_CUDA@) -set(GGML_METAL @GGML_METAL@) -set(GGML_HIPBLAS @GGML_HIPBLAS@) +set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@) + +set(GGML_BLAS @GGML_BLAS@) +set(GGML_CUDA @GGML_CUDA@) +set(GGML_METAL @GGML_METAL@) +set(GGML_HIPBLAS @GGML_HIPBLAS@) set(GGML_ACCELERATE @GGML_ACCELERATE@) @PACKAGE_INIT@ set_and_check(LLAMA_INCLUDE_DIR "@PACKAGE_LLAMA_INCLUDE_INSTALL_DIR@") -set_and_check(LLAMA_LIB_DIR "@PACKAGE_LLAMA_LIB_INSTALL_DIR@") -set_and_check(LLAMA_BIN_DIR "@PACKAGE_LLAMA_BIN_INSTALL_DIR@") +set_and_check(LLAMA_LIB_DIR "@PACKAGE_LLAMA_LIB_INSTALL_DIR@") +set_and_check(LLAMA_BIN_DIR "@PACKAGE_LLAMA_BIN_INSTALL_DIR@") # Ensure transient dependencies satisfied find_package(Threads REQUIRED) + if (APPLE AND GGML_ACCELERATE) find_library(ACCELERATE_FRAMEWORK Accelerate REQUIRED) endif() @@ -47,7 +49,9 @@ find_library(llama_LIBRARY llama set(_llama_link_deps "Threads::Threads" "@LLAMA_EXTRA_LIBS@") set(_llama_transient_defines "@LLAMA_TRANSIENT_DEFINES@") + add_library(llama UNKNOWN IMPORTED) + set_target_properties(llama PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INCLUDE_DIR}" diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index c6fccc025..761971d68 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -37,7 +37,7 @@ add_custom_command( COMMENT "Generating build details from Git" COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DCMAKE_C_COMPILER_VERSION=${CMAKE_C_COMPILER_VERSION} -DCMAKE_C_COMPILER_ID=${CMAKE_C_COMPILER_ID} -DCMAKE_VS_PLATFORM_NAME=${CMAKE_VS_PLATFORM_NAME} - -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -P "${CMAKE_CURRENT_SOURCE_DIR}/../scripts/gen-build-info-cpp.cmake" + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -P "${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info-gen-cpp.cmake" WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/.." DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in" ${GIT_INDEX} VERBATIM diff --git a/scripts/gen-build-info-cpp.cmake b/common/cmake/build-info-gen-cpp.cmake similarity index 86% rename from scripts/gen-build-info-cpp.cmake rename to common/cmake/build-info-gen-cpp.cmake index d89338920..fbc92b52c 100644 --- a/scripts/gen-build-info-cpp.cmake +++ b/common/cmake/build-info-gen-cpp.cmake @@ -1,7 +1,7 @@ -include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake) +include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake) set(TEMPLATE_FILE "${CMAKE_CURRENT_SOURCE_DIR}/common/build-info.cpp.in") -set(OUTPUT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/common/build-info.cpp") +set(OUTPUT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/common/build-info.cpp") # Only write the build info if it changed if(EXISTS ${OUTPUT_FILE}) diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index 7e0fb34cf..1d5fc0716 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -133,7 +133,7 @@ option(GGML_KOMPUTE "ggml: use Kompute" option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT}) option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF) option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF) -option(GGML_METAL_EMBED_LIBRARY "ggml: embed Metal library" OFF) +option(GGML_METAL_EMBED_LIBRARY "ggml: embed Metal library" ${GGML_METAL}) set (GGML_METAL_MACOSX_VERSION_MIN "" CACHE STRING "ggml: metal minimum macOS version") set (GGML_METAL_STD "" CACHE STRING "ggml: metal standard version (-std flag)") @@ -219,6 +219,7 @@ if (GGML_METAL) GROUP_READ WORLD_READ DESTINATION ${CMAKE_INSTALL_BINDIR}) + if (NOT GGML_METAL_EMBED_LIBRARY) install( FILES ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index 66f81db42..ed5fdda65 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -1,6 +1,9 @@ include(CheckCXXCompilerFlag) -add_compile_definitions(GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES}) +unset(GGML_CDEF_PRIVATE) +unset(GGML_CDEF_PUBLIC) + +list(APPEND GGML_CDEF_PRIVATE GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES}) # enable libstdc++ assertions for debug builds if (CMAKE_SYSTEM_NAME MATCHES "Linux") @@ -29,9 +32,10 @@ if (APPLE AND GGML_ACCELERATE) if (ACCELERATE_FRAMEWORK) message(STATUS "Accelerate framework found") - add_compile_definitions(GGML_USE_ACCELERATE) - add_compile_definitions(ACCELERATE_NEW_LAPACK) - add_compile_definitions(ACCELERATE_LAPACK_ILP64) + list(APPEND GGML_CDEF_PRIVATE GGML_USE_ACCELERATE) + list(APPEND GGML_CDEF_PRIVATE ACCELERATE_NEW_LAPACK) + list(APPEND GGML_CDEF_PRIVATE ACCELERATE_LAPACK_ILP64) + set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK}) else() message(WARNING "Accelerate framework not found") @@ -47,9 +51,9 @@ if (GGML_METAL) set(GGML_HEADERS_METAL ggml-metal.h) set(GGML_SOURCES_METAL ggml-metal.m) - add_compile_definitions(GGML_USE_METAL) + list(APPEND GGML_CDEF_PUBLIC GGML_USE_METAL) if (GGML_METAL_NDEBUG) - add_compile_definitions(GGML_METAL_NDEBUG) + list(APPEND GGML_CDEF_PRIVATE GGML_METAL_NDEBUG) endif() # copy ggml-common.h and ggml-metal.metal to bin directory @@ -58,7 +62,8 @@ if (GGML_METAL) if (GGML_METAL_EMBED_LIBRARY) enable_language(ASM) - add_compile_definitions(GGML_METAL_EMBED_LIBRARY) + + list(APPEND GGML_CDEF_PRIVATE GGML_METAL_EMBED_LIBRARY) set(METALLIB_COMMON "${CMAKE_CURRENT_SOURCE_DIR}/ggml-common.h") set(METALLIB_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal") @@ -139,7 +144,9 @@ if (GGML_OPENMP) find_package(OpenMP) if (OpenMP_FOUND) message(STATUS "OpenMP found") - add_compile_definitions(GGML_USE_OPENMP) + + list(APPEND GGML_CDEF_PRIVATE GGML_USE_OPENMP) + set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} OpenMP::OpenMP_C OpenMP::OpenMP_CXX) else() message(WARNING "OpenMP not found") @@ -213,10 +220,10 @@ if (GGML_BLAS) add_compile_options(${BLAS_LINKER_FLAGS}) - add_compile_definitions(GGML_USE_BLAS) + list(APPEND GGML_CDEF_PUBLIC GGML_USE_BLAS) if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${GGML_BLAS_VENDOR} MATCHES "Generic" OR ${GGML_BLAS_VENDOR} MATCHES "Intel")) - add_compile_definitions(GGML_BLAS_USE_MKL) + list(APPEND GGML_CDEF_PRIVATE GGML_BLAS_USE_MKL) endif() set(GGML_HEADERS_BLAS ggml-blas.h) @@ -234,7 +241,7 @@ endif() if (GGML_LLAMAFILE) message(STATUS "Using ggml SGEMM") - add_compile_definitions(GGML_USE_LLAMAFILE) + list(APPEND GGML_CDEF_PRIVATE GGML_USE_LLAMAFILE) set(GGML_HEADERS_LLAMAFILE sgemm.h) set(GGML_SOURCES_LLAMAFILE sgemm.cpp) @@ -244,6 +251,7 @@ if (GGML_CUDA) cmake_minimum_required(VERSION 3.18) # for CMAKE_CUDA_ARCHITECTURES find_package(CUDAToolkit) + if (CUDAToolkit_FOUND) message(STATUS "CUDA found") @@ -276,7 +284,7 @@ if (GGML_CUDA) if (GGML_CUDA_FA_ALL_QUANTS) file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*.cu") list(APPEND GGML_SOURCES_CUDA ${SRCS}) - add_compile_definitions(GGML_CUDA_FA_ALL_QUANTS) + list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_FA_ALL_QUANTS) else() file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu") list(APPEND GGML_SOURCES_CUDA ${SRCS}) @@ -286,32 +294,40 @@ if (GGML_CUDA) list(APPEND GGML_SOURCES_CUDA ${SRCS}) endif() - add_compile_definitions(GGML_USE_CUDA) - add_compile_definitions(GGML_CUDA_USE_GRAPHS) - add_compile_definitions(GGML_CUDA_DMMV_X=${GGML_CUDA_DMMV_X}) - add_compile_definitions(GGML_CUDA_MMV_Y=${GGML_CUDA_MMV_Y}) - add_compile_definitions(K_QUANTS_PER_ITERATION=${GGML_CUDA_KQUANTS_ITER}) # TODO: remove - add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE}) + list(APPEND GGML_CDEF_PUBLIC GGML_USE_CUDA) + + list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_USE_GRAPHS) + list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_DMMV_X=${GGML_CUDA_DMMV_X}) + list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_MMV_Y=${GGML_CUDA_MMV_Y}) + list(APPEND GGML_CDEF_PRIVATE K_QUANTS_PER_ITERATION=${GGML_CUDA_KQUANTS_ITER}) + list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE}) + if (GGML_CUDA_FORCE_DMMV) - add_compile_definitions(GGML_CUDA_FORCE_DMMV) + list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_FORCE_DMMV) endif() + if (GGML_CUDA_FORCE_MMQ) - add_compile_definitions(GGML_CUDA_FORCE_MMQ) + list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_FORCE_MMQ) endif() + if (GGML_CUDA_FORCE_CUBLAS) add_compile_definitions(GGML_CUDA_FORCE_CUBLAS) endif() + if (GGML_CUDA_NO_VMM) - add_compile_definitions(GGML_CUDA_NO_VMM) + list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_NO_VMM) endif() + if (DEFINED GGML_CUDA_DMMV_Y) - add_compile_definitions(GGML_CUDA_MMV_Y=${GGML_CUDA_DMMV_Y}) # for backwards compatibility + list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_MMV_Y=${GGML_CUDA_DMMV_Y}) # for backwards compatibility endif() + if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16) - add_compile_definitions(GGML_CUDA_F16) + list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_F16) endif() + if (GGML_CUDA_NO_PEER_COPY) - add_compile_definitions(GGML_CUDA_NO_PEER_COPY) + list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_NO_PEER_COPY) endif() if (GGML_STATIC) @@ -345,6 +361,7 @@ if (GGML_HIPBLAS) else() set(ROCM_PATH $ENV{ROCM_PATH}) endif() + list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}) list(APPEND CMAKE_PREFIX_PATH "${ROCM_PATH}/lib64/cmake") @@ -372,6 +389,7 @@ if (GGML_HIPBLAS) cmake_minimum_required(VERSION 3.21) enable_language(HIP) endif() + find_package(hip REQUIRED) find_package(hipblas REQUIRED) find_package(rocblas REQUIRED) @@ -391,7 +409,7 @@ if (GGML_HIPBLAS) if (GGML_CUDA_FA_ALL_QUANTS) file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*.cu") list(APPEND GGML_SOURCES_ROCM ${SRCS}) - add_compile_definitions(GGML_CUDA_FA_ALL_QUANTS) + list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_FA_ALL_QUANTS) else() file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu") list(APPEND GGML_SOURCES_ROCM ${SRCS}) @@ -401,26 +419,27 @@ if (GGML_HIPBLAS) list(APPEND GGML_SOURCES_ROCM ${SRCS}) endif() + list(APPEND GGML_CDEF_PUBLIC GGML_USE_CUDA) - add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUDA) - add_compile_definitions(GGML_CUDA_DMMV_X=${GGML_CUDA_DMMV_X}) - add_compile_definitions(GGML_CUDA_MMV_Y=${GGML_CUDA_MMV_Y}) - add_compile_definitions(K_QUANTS_PER_ITERATION=${GGML_CUDA_KQUANTS_ITER}) + list(APPEND GGML_CDEF_PRIVATE GGML_USE_HIPBLAS) + list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_DMMV_X=${GGML_CUDA_DMMV_X}) + list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_MMV_Y=${GGML_CUDA_MMV_Y}) + list(APPEND GGML_CDEF_PRIVATE K_QUANTS_PER_ITERATION=${GGML_CUDA_KQUANTS_ITER}) if (GGML_HIP_UMA) - add_compile_definitions(GGML_HIP_UMA) + list(APPEND GGML_CDEF_PRIVATE GGML_HIP_UMA) endif() if (GGML_CUDA_FORCE_DMMV) - add_compile_definitions(GGML_CUDA_FORCE_DMMV) + list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_FORCE_DMMV) endif() if (GGML_CUDA_FORCE_MMQ) - add_compile_definitions(GGML_CUDA_FORCE_MMQ) + list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_FORCE_MMQ) endif() if (GGML_CUDA_NO_PEER_COPY) - add_compile_definitions(GGML_CUDA_NO_PEER_COPY) + list(APPEND GGML_CDEF_PRIVATE GGML_CUDA_NO_PEER_COPY) endif() if (CXX_IS_HIPCC) @@ -452,14 +471,14 @@ if (GGML_SYCL) message(STATUS "SYCL found") - add_compile_definitions(GGML_USE_SYCL) + list(APPEND GGML_CDEF_PUBLIC GGML_USE_SYCL) if (GGML_SYCL_F16) - add_compile_definitions(GGML_SYCL_F16) + list(APPEND GGML_CDEF_PRIVATE GGML_SYCL_F16) endif() if (GGML_CUDA_FORCE_MMQ) - add_compile_definitions(GGML_SYCL_FORCE_MMQ) + list(APPEND GGML_CDEF_PRIVATE GGML_SYCL_FORCE_MMQ) endif() add_compile_options(-I./) #include DPCT @@ -493,7 +512,7 @@ endif() if (GGML_RPC) message(STATUS "RPC found") - add_compile_definitions(GGML_USE_RPC) + list(APPEND GGML_CDEF_PUBLIC GGML_USE_RPC) if (WIN32) set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ws2_32) @@ -512,7 +531,7 @@ if (GGML_VULKAN) set(GGML_HEADERS_VULKAN ggml-vulkan.h) set(GGML_SOURCES_VULKAN ggml-vulkan.cpp) - add_compile_definitions(GGML_USE_VULKAN) + list(APPEND GGML_CDEF_PUBLIC GGML_USE_VULKAN) # Workaround to the "can't dereference invalidated vector iterator" bug in clang-cl debug build # Posssibly relevant: https://stackoverflow.com/questions/74748276/visual-studio-no-displays-the-correct-length-of-stdvector @@ -521,23 +540,23 @@ if (GGML_VULKAN) endif() if (GGML_VULKAN_CHECK_RESULTS) - add_compile_definitions(GGML_VULKAN_CHECK_RESULTS) + list(APPEND GGML_CDEF_PRIVATE GGML_VULKAN_CHECK_RESULTS) endif() if (GGML_VULKAN_DEBUG) - add_compile_definitions(GGML_VULKAN_DEBUG) + list(APPEND GGML_CDEF_PRIVATE GGML_VULKAN_DEBUG) endif() if (GGML_VULKAN_MEMORY_DEBUG) - add_compile_definitions(GGML_VULKAN_MEMORY_DEBUG) + list(APPEND GGML_CDEF_PRIVATE GGML_VULKAN_MEMORY_DEBUG) endif() if (GGML_VULKAN_VALIDATE) - add_compile_definitions(GGML_VULKAN_VALIDATE) + list(APPEND GGML_CDEF_PRIVATE GGML_VULKAN_VALIDATE) endif() if (GGML_VULKAN_RUN_TESTS) - add_compile_definitions(GGML_VULKAN_RUN_TESTS) + list(APPEND GGML_CDEF_PRIVATE GGML_VULKAN_RUN_TESTS) endif() set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} Vulkan::Vulkan) @@ -548,6 +567,7 @@ endif() if (GGML_KOMPUTE) add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1) + find_package(Vulkan COMPONENTS glslc REQUIRED) find_program(glslc_executable NAMES glslc HINTS Vulkan::glslc) if (NOT glslc_executable) @@ -695,9 +715,9 @@ if (GGML_KOMPUTE) set(GGML_SOURCES_KOMPUTE ggml-kompute.cpp ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp) set(GGML_HEADERS_KOMPUTE ggml-kompute.h ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp) - add_compile_definitions(GGML_USE_KOMPUTE) + list(APPEND GGML_CDEF_PUBLIC GGML_USE_KOMPUTE) - set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} kompute) + set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} kompute) set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} ${CMAKE_BINARY_DIR}) else() message(WARNING "Kompute not found") @@ -709,7 +729,7 @@ if (GGML_CPU_HBM) message(STATUS "Using memkind for CPU HBM") - add_compile_definitions(GGML_USE_CPU_HBM) + list(APPEND GGML_CDEF_PRIVATE GGML_USE_CPU_HBM) target_link_libraries(ggml PUBLIC memkind) endif() @@ -822,14 +842,6 @@ if (GGML_CUDA) endif() endif() -if (WIN32) - add_compile_definitions(_CRT_SECURE_NO_WARNINGS) - - if (BUILD_SHARED_LIBS) - set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) - endif() -endif() - if (GGML_LTO) include(CheckIPOSupported) check_ipo_supported(RESULT result OUTPUT output) @@ -844,6 +856,7 @@ if (GGML_CCACHE) find_program(GGML_CCACHE_FOUND ccache) if (GGML_CCACHE_FOUND) + # TODO: should not be set globally set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache) set(ENV{CCACHE_SLOPPINESS} time_macros) message(STATUS "ccache found, compilation results will be cached. Disable with GGML_CCACHE=OFF.") @@ -860,7 +873,7 @@ execute_process( ) if (output MATCHES "dyld-1015\.7") - add_compile_definitions(HAVE_BUGGY_APPLE_LINKER) + list(APPEND GGML_CDEF_PRIVATE HAVE_BUGGY_APPLE_LINKER) endif() # architecture specific @@ -903,11 +916,14 @@ if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR set(CMAKE_REQUIRED_FLAGS_PREV ${CMAKE_REQUIRED_FLAGS}) string(JOIN " " CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS} "/arch:armv8.2") + check_cxx_source_compiles("#include \nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD) if (GGML_COMPILER_SUPPORT_DOTPROD) add_compile_definitions(__ARM_FEATURE_DOTPROD) endif () + check_cxx_source_compiles("#include \nint main() { int8x16_t _a, _b; int32x4_t _s = vmlaq_f32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_MATMUL_INT8) + if (GGML_COMPILER_SUPPORT_MATMUL_INT8) add_compile_definitions(__ARM_FEATURE_MATMUL_INT8) endif () @@ -916,6 +932,7 @@ if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR if (GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC) add_compile_definitions(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) endif () + set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_PREV}) else() check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E) @@ -1097,6 +1114,15 @@ if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD") add_compile_definitions(_BSD_SOURCE) endif() +if (WIN32) + add_compile_definitions(_CRT_SECURE_NO_WARNINGS) + + if (BUILD_SHARED_LIBS) + # TODO: should not use this + set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) + endif() +endif() + # # libraries # @@ -1124,6 +1150,13 @@ add_library(ggml OBJECT ${GGML_SOURCES_LLAMAFILE} ${GGML_HEADERS_LLAMAFILE} ) +if (EMSCRIPTEN) + set_target_properties(ggml PROPERTIES COMPILE_FLAGS "-msimd128") +endif() + +target_compile_definitions(ggml PUBLIC ${GGML_CDEF_PUBLIC}) +target_compile_definitions(ggml PRIVATE ${GGML_CDEF_PRIVATE}) + target_include_directories(ggml PUBLIC . ../include ${GGML_EXTRA_INCLUDES}) target_compile_features (ggml PUBLIC c_std_11) # don't bump diff --git a/ggml/src/ggml-cuda/fattn-common.cuh b/ggml/src/ggml-cuda/fattn-common.cuh index 37b3b9932..bd7993595 100644 --- a/ggml/src/ggml-cuda/fattn-common.cuh +++ b/ggml/src/ggml-cuda/fattn-common.cuh @@ -603,7 +603,7 @@ static void on_no_fattn_vec_case(const int D) { if (D == 64) { fprintf(stderr, "Unsupported KV type combination for head_size 64.\n"); fprintf(stderr, "By default only f16 KV cache is supported.\n"); - fprintf(stderr, "Compile with LLAMA_CUDA_FA_ALL_QUANTS for V cache quantization support.\n"); + fprintf(stderr, "Compile with GGML_CUDA_FA_ALL_QUANTS for V cache quantization support.\n"); GGML_ASSERT(false); } else if (D == 128) { fprintf(stderr, "Unsupported KV type combination for head_size 128.\n"); @@ -611,7 +611,7 @@ static void on_no_fattn_vec_case(const int D) { fprintf(stderr, " - K == q4_0, V == q4_0, 4.50 BPV\n"); fprintf(stderr, " - K == q8_0, V == q8_0, 8.50 BPV\n"); fprintf(stderr, " - K == f16, V == f16, 16.00 BPV\n"); - fprintf(stderr, "Compile with LLAMA_CUDA_FA_ALL_QUANTS for all combinations of q4_0, q4_1, q5_0, q5_1, q8_0, and f16.\n"); + fprintf(stderr, "Compile with GGML_CUDA_FA_ALL_QUANTS for all combinations of q4_0, q4_1, q5_0, q5_1, q8_0, and f16.\n"); GGML_ASSERT(false); } else { fprintf(stderr, "Unsupported KV type combination for head_size 256.\n"); diff --git a/scripts/build-info.sh b/scripts/build-info.sh index 32682afbd..fa9e7bacd 100755 --- a/scripts/build-info.sh +++ b/scripts/build-info.sh @@ -8,20 +8,20 @@ build_compiler="unknown" build_target="unknown" if out=$(git rev-list --count HEAD); then - # git is broken on WSL so we need to strip extra newlines - build_number=$(printf '%s' "$out" | tr -d '\n') + # git is broken on WSL so we need to strip extra newlines + build_number=$(printf '%s' "$out" | tr -d '\n') fi if out=$(git rev-parse --short HEAD); then - build_commit=$(printf '%s' "$out" | tr -d '\n') + build_commit=$(printf '%s' "$out" | tr -d '\n') fi if out=$($CC --version | head -1); then - build_compiler=$out + build_compiler=$out fi if out=$($CC -dumpmachine); then - build_target=$out + build_target=$out fi echo "int LLAMA_BUILD_NUMBER = ${build_number};" diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index eaa504567..ccb607e56 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,3 +1,12 @@ +# TODO: should not use this +if (WIN32) + add_compile_definitions(_CRT_SECURE_NO_WARNINGS) + + if (BUILD_SHARED_LIBS) + set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) + endif() +endif() + # # libraries #