diff --git a/CMakeLists.txt b/CMakeLists.txt index b05c2535e..45b740d73 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ project("llama.cpp" C CXX) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -if(NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) +if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") endif() @@ -13,70 +13,70 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) set(LLAMA_STANDALONE ON) -# configure project version -# TODO + # configure project version + # TODO else() set(LLAMA_STANDALONE OFF) endif() -if(EMSCRIPTEN) +if (EMSCRIPTEN) set(BUILD_SHARED_LIBS_DEFAULT OFF) option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" ON) else() - if(MINGW) + if (MINGW) set(BUILD_SHARED_LIBS_DEFAULT OFF) else() set(BUILD_SHARED_LIBS_DEFAULT ON) endif() endif() + # # Option list # # general -option(LLAMA_STATIC "llama: static link libraries" OFF) -option(LLAMA_NATIVE "llama: enable -march=native flag" OFF) -option(LLAMA_LTO "llama: enable link time optimization" OFF) +option(LLAMA_STATIC "llama: static link libraries" OFF) +option(LLAMA_NATIVE "llama: enable -march=native flag" OFF) +option(LLAMA_LTO "llama: enable link time optimization" OFF) # debug -option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) -option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF) -option(LLAMA_GPROF "llama: enable gprof" OFF) +option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) +option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF) +option(LLAMA_GPROF "llama: enable gprof" OFF) # sanitizers -option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF) -option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF) -option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF) +option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF) +option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF) +option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF) # instruction set specific -option(LLAMA_AVX "llama: enable AVX" ON) -option(LLAMA_AVX2 "llama: enable AVX2" ON) -option(LLAMA_AVX512 "llama: enable AVX512" OFF) -option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF) -option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF) -option(LLAMA_FMA "llama: enable FMA" ON) - +option(LLAMA_AVX "llama: enable AVX" ON) +option(LLAMA_AVX2 "llama: enable AVX2" ON) +option(LLAMA_AVX512 "llama: enable AVX512" OFF) +option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF) +option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF) +option(LLAMA_FMA "llama: enable FMA" ON) # in MSVC F16C is implied with AVX2/AVX512 -if(NOT MSVC) - option(LLAMA_F16C "llama: enable F16C" ON) +if (NOT MSVC) + option(LLAMA_F16C "llama: enable F16C" ON) endif() # 3rd party libs -option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) -option(LLAMA_BLAS "llama: use BLAS" OFF) +option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) +option(LLAMA_BLAS "llama: use BLAS" OFF) set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor") -option(LLAMA_CUBLAS "llama: use cuBLAS" OFF) -set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels") -set(LLAMA_CUDA_DMMV_Y "1" CACHE STRING "llama: y block size for dmmv CUDA kernels") -option(LLAMA_CLBLAST "llama: use CLBlast" OFF) -option(LLAMA_METAL "llama: use Metal" OFF) -option(LLAMA_K_QUANTS "llama: use k-quants" ON) +option(LLAMA_CUBLAS "llama: use cuBLAS" OFF) +set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels") +set(LLAMA_CUDA_DMMV_Y "1" CACHE STRING "llama: y block size for dmmv CUDA kernels") +option(LLAMA_CLBLAST "llama: use CLBlast" OFF) +option(LLAMA_METAL "llama: use Metal" OFF) +option(LLAMA_K_QUANTS "llama: use k-quants" ON) -option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE}) -option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE}) -option(LLAMA_BUILD_SERVER "llama: build server example" OFF) +option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE}) +option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE}) +option(LLAMA_BUILD_SERVER "llama: build server example" OFF) # # Build info header @@ -114,6 +114,7 @@ endif() # # Compile flags # + set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED true) set(CMAKE_C_STANDARD 11) @@ -121,27 +122,26 @@ set(CMAKE_C_STANDARD_REQUIRED true) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) -if(NOT MSVC) - if(LLAMA_SANITIZE_THREAD) +if (NOT MSVC) + if (LLAMA_SANITIZE_THREAD) add_compile_options(-fsanitize=thread) link_libraries(-fsanitize=thread) endif() - if(LLAMA_SANITIZE_ADDRESS) + if (LLAMA_SANITIZE_ADDRESS) add_compile_options(-fsanitize=address -fno-omit-frame-pointer) link_libraries(-fsanitize=address) endif() - if(LLAMA_SANITIZE_UNDEFINED) + if (LLAMA_SANITIZE_UNDEFINED) add_compile_options(-fsanitize=undefined) link_libraries(-fsanitize=undefined) endif() endif() -if(APPLE AND LLAMA_ACCELERATE) +if (APPLE AND LLAMA_ACCELERATE) find_library(ACCELERATE_FRAMEWORK Accelerate) - - if(ACCELERATE_FRAMEWORK) + if (ACCELERATE_FRAMEWORK) message(STATUS "Accelerate framework found") add_compile_definitions(GGML_USE_ACCELERATE) @@ -151,19 +151,16 @@ if(APPLE AND LLAMA_ACCELERATE) endif() endif() -if(LLAMA_BLAS) - if(LLAMA_STATIC) +if (LLAMA_BLAS) + if (LLAMA_STATIC) set(BLA_STATIC ON) endif() - - if($(CMAKE_VERSION) VERSION_GREATER_EQUAL 3.22) + if ($(CMAKE_VERSION) VERSION_GREATER_EQUAL 3.22) set(BLA_SIZEOF_INTEGER 8) endif() - set(BLA_VENDOR ${LLAMA_BLAS_VENDOR}) find_package(BLAS) - - if(BLAS_FOUND) + if (BLAS_FOUND) message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}") add_compile_options(${BLAS_LINKER_FLAGS}) @@ -174,17 +171,16 @@ if(LLAMA_BLAS) include_directories(${BLAS_INCLUDE_DIRS}) else() message(WARNING "BLAS not found, please refer to " - "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors" - " to set correct LLAMA_BLAS_VENDOR") + "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors" + " to set correct LLAMA_BLAS_VENDOR") endif() endif() -if(LLAMA_CUBLAS) +if (LLAMA_CUBLAS) cmake_minimum_required(VERSION 3.17) find_package(CUDAToolkit) - - if(CUDAToolkit_FOUND) + if (CUDAToolkit_FOUND) message(STATUS "cuBLAS found") enable_language(CUDA) @@ -195,7 +191,7 @@ if(LLAMA_CUBLAS) add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X}) add_compile_definitions(GGML_CUDA_DMMV_Y=${LLAMA_CUDA_DMMV_Y}) - if(LLAMA_STATIC) + if (LLAMA_STATIC) set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static) else() set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt) @@ -206,10 +202,10 @@ if(LLAMA_CUBLAS) endif() endif() -if(LLAMA_METAL) - find_library(FOUNDATION_LIBRARY Foundation REQUIRED) - find_library(METAL_FRAMEWORK Metal REQUIRED) - find_library(METALKIT_FRAMEWORK MetalKit REQUIRED) +if (LLAMA_METAL) + find_library(FOUNDATION_LIBRARY Foundation REQUIRED) + find_library(METAL_FRAMEWORK Metal REQUIRED) + find_library(METALKIT_FRAMEWORK MetalKit REQUIRED) find_library(METALPERFORMANCE_FRAMEWORK MetalPerformanceShaders REQUIRED) set(GGML_SOURCES_METAL ggml-metal.m ggml-metal.h) @@ -218,7 +214,7 @@ if(LLAMA_METAL) add_compile_definitions(GGML_METAL_NDEBUG) # get full path to the file - # add_compile_definitions(GGML_METAL_DIR_KERNELS="${CMAKE_CURRENT_SOURCE_DIR}/") + #add_compile_definitions(GGML_METAL_DIR_KERNELS="${CMAKE_CURRENT_SOURCE_DIR}/") # copy ggml-metal.metal to bin directory configure_file(ggml-metal.metal bin/ggml-metal.metal COPYONLY) @@ -228,18 +224,17 @@ if(LLAMA_METAL) ${METAL_FRAMEWORK} ${METALKIT_FRAMEWORK} ${METALPERFORMANCE_FRAMEWORK} - ) + ) endif() -if(LLAMA_K_QUANTS) +if (LLAMA_K_QUANTS) set(GGML_SOURCES_EXTRA ${GGML_SOURCES_EXTRA} k_quants.c k_quants.h) add_compile_definitions(GGML_USE_K_QUANTS) endif() -if(LLAMA_CLBLAST) +if (LLAMA_CLBLAST) find_package(CLBlast) - - if(CLBlast_FOUND) + if (CLBlast_FOUND) message(STATUS "CLBlast found") set(GGML_SOURCES_OPENCL ggml-opencl.cpp ggml-opencl.h) @@ -252,8 +247,8 @@ if(LLAMA_CLBLAST) endif() endif() -if(LLAMA_ALL_WARNINGS) - if(NOT MSVC) +if (LLAMA_ALL_WARNINGS) + if (NOT MSVC) set(c_flags -Wall -Wextra @@ -277,24 +272,24 @@ if(LLAMA_ALL_WARNINGS) endif() add_compile_options( - "$<$:${c_flags}>" - "$<$:${cxx_flags}>" + "$<$:${c_flags}>" + "$<$:${cxx_flags}>" ) + endif() -if(MSVC) +if (MSVC) add_compile_definitions(_CRT_SECURE_NO_WARNINGS) - if(BUILD_SHARED_LIBS) + if (BUILD_SHARED_LIBS) set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) endif() endif() -if(LLAMA_LTO) +if (LLAMA_LTO) include(CheckIPOSupported) check_ipo_supported(RESULT result OUTPUT output) - - if(result) + if (result) set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) else() message(WARNING "IPO is not supported: ${output}") @@ -303,125 +298,108 @@ endif() # Architecture specific # TODO: probably these flags need to be tweaked on some architectures -# feel free to update the Makefile for your architecture and send a pull request or issue +# feel free to update the Makefile for your architecture and send a pull request or issue message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") - -if(NOT MSVC) - if(LLAMA_STATIC) +if (NOT MSVC) + if (LLAMA_STATIC) add_link_options(-static) - - if(MINGW) + if (MINGW) add_link_options(-static-libgcc -static-libstdc++) endif() endif() - - if(LLAMA_GPROF) + if (LLAMA_GPROF) add_compile_options(-pg) endif() - - if(LLAMA_NATIVE) + if (LLAMA_NATIVE) add_compile_options(-march=native) endif() endif() -if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") +if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") message(STATUS "ARM detected") - - if(MSVC) - # TODO: arm msvc? + if (MSVC) + # TODO: arm msvc? else() - if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") + if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") # Apple M1, M2, etc. # Raspberry Pi 3, 4, Zero 2 (64-bit) - if(NOT DEFINED ANDROID_NDK) + + # latest Android NDK uses CLang now, which does not support the native flag + if(DEFINED ANDROID_NDK) + else() add_compile_options(-mcpu=native) endif() endif() - - if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6") + if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6") # Raspberry Pi 1, Zero - if(NOT DEFINED ANDROID_NDK) - add_compile_options(-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access) - endif() + add_compile_options(-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access) endif() - - if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7") + if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7") # Raspberry Pi 2 - if(NOT DEFINED ANDROID_NDK) - add_compile_options(-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations) - endif() + add_compile_options(-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access) endif() - - if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8") + if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8") # Raspberry Pi 3, 4, Zero 2 (32-bit) - if(NOT DEFINED ANDROID_NDK) - add_compile_options(-mfp16-format=ieee -mno-unaligned-access) + add_compile_options(-mfp16-format=ieee -mno-unaligned-access) + + if(DEFINED ANDROID_NDK) + add_compile_options(-march=armv8.4a+dotprod) endif() endif() endif() -elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$") +elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$") message(STATUS "x86 detected") - - if(MSVC) - if(LLAMA_AVX512) + if (MSVC) + if (LLAMA_AVX512) add_compile_options($<$:/arch:AVX512>) add_compile_options($<$:/arch:AVX512>) - # MSVC has no compile-time flags enabling specific # AVX512 extensions, neither it defines the # macros corresponding to the extensions. # Do it manually. - if(LLAMA_AVX512_VBMI) + if (LLAMA_AVX512_VBMI) add_compile_definitions($<$:__AVX512VBMI__>) add_compile_definitions($<$:__AVX512VBMI__>) endif() - - if(LLAMA_AVX512_VNNI) + if (LLAMA_AVX512_VNNI) add_compile_definitions($<$:__AVX512VNNI__>) add_compile_definitions($<$:__AVX512VNNI__>) endif() - elseif(LLAMA_AVX2) + elseif (LLAMA_AVX2) add_compile_options($<$:/arch:AVX2>) add_compile_options($<$:/arch:AVX2>) - elseif(LLAMA_AVX) + elseif (LLAMA_AVX) add_compile_options($<$:/arch:AVX>) add_compile_options($<$:/arch:AVX>) endif() else() - if(LLAMA_F16C) + if (LLAMA_F16C) add_compile_options(-mf16c) endif() - - if(LLAMA_FMA) + if (LLAMA_FMA) add_compile_options(-mfma) endif() - - if(LLAMA_AVX) + if (LLAMA_AVX) add_compile_options(-mavx) endif() - - if(LLAMA_AVX2) + if (LLAMA_AVX2) add_compile_options(-mavx2) endif() - - if(LLAMA_AVX512) + if (LLAMA_AVX512) add_compile_options(-mavx512f) add_compile_options(-mavx512bw) endif() - - if(LLAMA_AVX512_VBMI) + if (LLAMA_AVX512_VBMI) add_compile_options(-mavx512vbmi) endif() - - if(LLAMA_AVX512_VNNI) + if (LLAMA_AVX512_VNNI) add_compile_options(-mavx512vnni) endif() endif() -elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64") +elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64") message(STATUS "PowerPC detected") add_compile_options(-mcpu=native -mtune=native) - -# TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be) + #TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be) else() message(STATUS "Unknown architecture") endif() @@ -429,57 +407,60 @@ endif() # # Build libraries # + add_library(ggml OBJECT - ggml.c - ggml.h - ${GGML_SOURCES_CUDA} - ${GGML_SOURCES_OPENCL} - ${GGML_SOURCES_METAL} - ${GGML_SOURCES_EXTRA} -) + ggml.c + ggml.h + ${GGML_SOURCES_CUDA} + ${GGML_SOURCES_OPENCL} + ${GGML_SOURCES_METAL} + ${GGML_SOURCES_EXTRA} + ) target_include_directories(ggml PUBLIC .) target_compile_features(ggml PUBLIC c_std_11) # don't bump target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS}) -if(BUILD_SHARED_LIBS) +if (BUILD_SHARED_LIBS) set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON) endif() add_library(llama - llama.cpp - llama.h - llama-util.h -) + llama.cpp + llama.h + llama-util.h + ) target_include_directories(llama PUBLIC .) target_compile_features(llama PUBLIC cxx_std_11) # don't bump target_link_libraries(llama PRIVATE ggml ${LLAMA_EXTRA_LIBS} -) + ) -if(BUILD_SHARED_LIBS) +if (BUILD_SHARED_LIBS) set_target_properties(llama PROPERTIES POSITION_INDEPENDENT_CODE ON) target_compile_definitions(llama PRIVATE LLAMA_SHARED LLAMA_BUILD) endif() -if(GGML_SOURCES_CUDA) +if (GGML_SOURCES_CUDA) message(STATUS "GGML CUDA sources found, configuring CUDA architecture") - set_property(TARGET ggml PROPERTY CUDA_ARCHITECTURES OFF) - set_property(TARGET ggml PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto") + set_property(TARGET ggml PROPERTY CUDA_ARCHITECTURES OFF) + set_property(TARGET ggml PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto") set_property(TARGET llama PROPERTY CUDA_ARCHITECTURES OFF) endif() + # # programs, examples and tests # -if(LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION) + +if (LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION) include(CTest) add_subdirectory(tests) -endif() +endif () -if(LLAMA_BUILD_EXAMPLES) +if (LLAMA_BUILD_EXAMPLES) add_subdirectory(examples) add_subdirectory(pocs) -endif() +endif() \ No newline at end of file