From c3b4efc89e925f5ca149e532fab7149a46a608a5 Mon Sep 17 00:00:00 2001 From: George Date: Mon, 5 Jun 2023 17:25:51 +0800 Subject: [PATCH] updated CMakeLists.txt and added JNI implementation to support building this library as a dependency in Android Studio with NDK --- CMakeLists.txt | 271 +++++++++++++++++++++++++++---------------------- llama-jni.cpp | 12 +++ 2 files changed, 162 insertions(+), 121 deletions(-) create mode 100644 llama-jni.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 1f2e78c0f..e06046c40 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ project("llama.cpp" C CXX) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) +if(NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") endif() @@ -13,69 +13,69 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) set(LLAMA_STANDALONE ON) - # configure project version - # TODO +# configure project version +# TODO else() set(LLAMA_STANDALONE OFF) endif() -if (EMSCRIPTEN) +if(EMSCRIPTEN) set(BUILD_SHARED_LIBS_DEFAULT OFF) option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" ON) else() - if (MINGW) + if(MINGW) set(BUILD_SHARED_LIBS_DEFAULT OFF) else() set(BUILD_SHARED_LIBS_DEFAULT ON) endif() endif() - # # Option list # # general -option(LLAMA_STATIC "llama: static link libraries" OFF) -option(LLAMA_NATIVE "llama: enable -march=native flag" OFF) -option(LLAMA_LTO "llama: enable link time optimization" OFF) +option(LLAMA_STATIC "llama: static link libraries" OFF) +option(LLAMA_NATIVE "llama: enable -march=native flag" OFF) +option(LLAMA_LTO "llama: enable link time optimization" OFF) # debug -option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) -option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF) -option(LLAMA_GPROF "llama: enable gprof" OFF) +option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) +option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF) +option(LLAMA_GPROF "llama: enable gprof" OFF) # sanitizers -option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF) -option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF) -option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF) +option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF) +option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF) +option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF) # instruction set specific -option(LLAMA_AVX "llama: enable AVX" ON) -option(LLAMA_AVX2 "llama: enable AVX2" ON) -option(LLAMA_AVX512 "llama: enable AVX512" OFF) -option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF) -option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF) -option(LLAMA_FMA "llama: enable FMA" ON) +option(LLAMA_AVX "llama: enable AVX" ON) +option(LLAMA_AVX2 "llama: enable AVX2" ON) +option(LLAMA_AVX512 "llama: enable AVX512" OFF) +option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF) +option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF) +option(LLAMA_FMA "llama: enable FMA" ON) + # in MSVC F16C is implied with AVX2/AVX512 -if (NOT MSVC) - option(LLAMA_F16C "llama: enable F16C" ON) +if(NOT MSVC) + option(LLAMA_F16C "llama: enable F16C" ON) endif() # 3rd party libs -option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) -option(LLAMA_BLAS "llama: use BLAS" OFF) +option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) +option(LLAMA_BLAS "llama: use BLAS" OFF) set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor") -option(LLAMA_CUBLAS "llama: use cuBLAS" OFF) -set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels") -set(LLAMA_CUDA_DMMV_Y "1" CACHE STRING "llama: y block size for dmmv CUDA kernels") -option(LLAMA_CLBLAST "llama: use CLBlast" OFF) -option(LLAMA_METAL "llama: use Metal" OFF) +option(LLAMA_CUBLAS "llama: use cuBLAS" OFF) +set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels") +set(LLAMA_CUDA_DMMV_Y "1" CACHE STRING "llama: y block size for dmmv CUDA kernels") +option(LLAMA_CLBLAST "llama: use CLBlast" OFF) +option(LLAMA_METAL "llama: use Metal" OFF) -option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE}) -option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE}) -option(LLAMA_BUILD_SERVER "llama: build server example" OFF) +option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE}) +option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE}) +option(LLAMA_BUILD_SERVER "llama: build server example" OFF) # # Build info header @@ -113,7 +113,6 @@ endif() # # Compile flags # - set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED true) set(CMAKE_C_STANDARD 11) @@ -121,26 +120,27 @@ set(CMAKE_C_STANDARD_REQUIRED true) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) -if (NOT MSVC) - if (LLAMA_SANITIZE_THREAD) +if(NOT MSVC) + if(LLAMA_SANITIZE_THREAD) add_compile_options(-fsanitize=thread) link_libraries(-fsanitize=thread) endif() - if (LLAMA_SANITIZE_ADDRESS) + if(LLAMA_SANITIZE_ADDRESS) add_compile_options(-fsanitize=address -fno-omit-frame-pointer) link_libraries(-fsanitize=address) endif() - if (LLAMA_SANITIZE_UNDEFINED) + if(LLAMA_SANITIZE_UNDEFINED) add_compile_options(-fsanitize=undefined) link_libraries(-fsanitize=undefined) endif() endif() -if (APPLE AND LLAMA_ACCELERATE) +if(APPLE AND LLAMA_ACCELERATE) find_library(ACCELERATE_FRAMEWORK Accelerate) - if (ACCELERATE_FRAMEWORK) + + if(ACCELERATE_FRAMEWORK) message(STATUS "Accelerate framework found") add_compile_definitions(GGML_USE_ACCELERATE) @@ -150,16 +150,19 @@ if (APPLE AND LLAMA_ACCELERATE) endif() endif() -if (LLAMA_BLAS) - if (LLAMA_STATIC) +if(LLAMA_BLAS) + if(LLAMA_STATIC) set(BLA_STATIC ON) endif() - if ($(CMAKE_VERSION) VERSION_GREATER_EQUAL 3.22) + + if($(CMAKE_VERSION) VERSION_GREATER_EQUAL 3.22) set(BLA_SIZEOF_INTEGER 8) endif() + set(BLA_VENDOR ${LLAMA_BLAS_VENDOR}) find_package(BLAS) - if (BLAS_FOUND) + + if(BLAS_FOUND) message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}") add_compile_options(${BLAS_LINKER_FLAGS}) @@ -170,16 +173,17 @@ if (LLAMA_BLAS) include_directories(${BLAS_INCLUDE_DIRS}) else() message(WARNING "BLAS not found, please refer to " - "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors" - " to set correct LLAMA_BLAS_VENDOR") + "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors" + " to set correct LLAMA_BLAS_VENDOR") endif() endif() -if (LLAMA_CUBLAS) +if(LLAMA_CUBLAS) cmake_minimum_required(VERSION 3.17) find_package(CUDAToolkit) - if (CUDAToolkit_FOUND) + + if(CUDAToolkit_FOUND) message(STATUS "cuBLAS found") enable_language(CUDA) @@ -190,7 +194,7 @@ if (LLAMA_CUBLAS) add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X}) add_compile_definitions(GGML_CUDA_DMMV_Y=${LLAMA_CUDA_DMMV_Y}) - if (LLAMA_STATIC) + if(LLAMA_STATIC) set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static) else() set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt) @@ -201,10 +205,10 @@ if (LLAMA_CUBLAS) endif() endif() -if (LLAMA_METAL) - find_library(FOUNDATION_LIBRARY Foundation REQUIRED) - find_library(METAL_FRAMEWORK Metal REQUIRED) - find_library(METALKIT_FRAMEWORK MetalKit REQUIRED) +if(LLAMA_METAL) + find_library(FOUNDATION_LIBRARY Foundation REQUIRED) + find_library(METAL_FRAMEWORK Metal REQUIRED) + find_library(METALKIT_FRAMEWORK MetalKit REQUIRED) find_library(METALPERFORMANCE_FRAMEWORK MetalPerformanceShaders REQUIRED) set(GGML_SOURCES_METAL ggml-metal.m ggml-metal.h) @@ -213,7 +217,7 @@ if (LLAMA_METAL) add_compile_definitions(GGML_METAL_NDEBUG) # get full path to the file - #add_compile_definitions(GGML_METAL_DIR_KERNELS="${CMAKE_CURRENT_SOURCE_DIR}/") + # add_compile_definitions(GGML_METAL_DIR_KERNELS="${CMAKE_CURRENT_SOURCE_DIR}/") # copy ggml-metal.metal to bin directory configure_file(ggml-metal.metal bin/ggml-metal.metal COPYONLY) @@ -223,12 +227,13 @@ if (LLAMA_METAL) ${METAL_FRAMEWORK} ${METALKIT_FRAMEWORK} ${METALPERFORMANCE_FRAMEWORK} - ) + ) endif() -if (LLAMA_CLBLAST) +if(LLAMA_CLBLAST) find_package(CLBlast) - if (CLBlast_FOUND) + + if(CLBlast_FOUND) message(STATUS "CLBlast found") set(GGML_SOURCES_OPENCL ggml-opencl.cpp ggml-opencl.h) @@ -241,8 +246,8 @@ if (LLAMA_CLBLAST) endif() endif() -if (LLAMA_ALL_WARNINGS) - if (NOT MSVC) +if(LLAMA_ALL_WARNINGS) + if(NOT MSVC) set(c_flags -Wall -Wextra @@ -266,24 +271,24 @@ if (LLAMA_ALL_WARNINGS) endif() add_compile_options( - "$<$:${c_flags}>" - "$<$:${cxx_flags}>" + "$<$:${c_flags}>" + "$<$:${cxx_flags}>" ) - endif() -if (MSVC) +if(MSVC) add_compile_definitions(_CRT_SECURE_NO_WARNINGS) - if (BUILD_SHARED_LIBS) + if(BUILD_SHARED_LIBS) set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) endif() endif() -if (LLAMA_LTO) +if(LLAMA_LTO) include(CheckIPOSupported) check_ipo_supported(RESULT result OUTPUT output) - if (result) + + if(result) set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) else() message(WARNING "IPO is not supported: ${output}") @@ -292,99 +297,125 @@ endif() # Architecture specific # TODO: probably these flags need to be tweaked on some architectures -# feel free to update the Makefile for your architecture and send a pull request or issue +# feel free to update the Makefile for your architecture and send a pull request or issue message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") -if (NOT MSVC) - if (LLAMA_STATIC) + +if(NOT MSVC) + if(LLAMA_STATIC) add_link_options(-static) - if (MINGW) + + if(MINGW) add_link_options(-static-libgcc -static-libstdc++) endif() endif() - if (LLAMA_GPROF) + + if(LLAMA_GPROF) add_compile_options(-pg) endif() - if (LLAMA_NATIVE) + + if(LLAMA_NATIVE) add_compile_options(-march=native) endif() endif() -if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") +if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") message(STATUS "ARM detected") - if (MSVC) - # TODO: arm msvc? + + if(MSVC) + # TODO: arm msvc? else() - if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") + if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") # Apple M1, M2, etc. # Raspberry Pi 3, 4, Zero 2 (64-bit) - add_compile_options(-mcpu=native) + if(NOT DEFINED ANDROID_NDK) + add_compile_options(-mcpu=native) + endif() endif() - if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6") + + if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6") # Raspberry Pi 1, Zero - add_compile_options(-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access) + if(NOT DEFINED ANDROID_NDK) + add_compile_options(-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access) + endif() endif() - if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7") + + if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7") # Raspberry Pi 2 - add_compile_options(-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations) + if(NOT DEFINED ANDROID_NDK) + add_compile_options(-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations) + endif() endif() - if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8") + + if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8") # Raspberry Pi 3, 4, Zero 2 (32-bit) - add_compile_options(-mfp16-format=ieee -mno-unaligned-access) + if(NOT DEFINED ANDROID_NDK) + add_compile_options(-mfp16-format=ieee -mno-unaligned-access) + endif() endif() endif() -elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$") +elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$") message(STATUS "x86 detected") - if (MSVC) - if (LLAMA_AVX512) + + if(MSVC) + if(LLAMA_AVX512) add_compile_options($<$:/arch:AVX512>) add_compile_options($<$:/arch:AVX512>) + # MSVC has no compile-time flags enabling specific # AVX512 extensions, neither it defines the # macros corresponding to the extensions. # Do it manually. - if (LLAMA_AVX512_VBMI) + if(LLAMA_AVX512_VBMI) add_compile_definitions($<$:__AVX512VBMI__>) add_compile_definitions($<$:__AVX512VBMI__>) endif() - if (LLAMA_AVX512_VNNI) + + if(LLAMA_AVX512_VNNI) add_compile_definitions($<$:__AVX512VNNI__>) add_compile_definitions($<$:__AVX512VNNI__>) endif() - elseif (LLAMA_AVX2) + elseif(LLAMA_AVX2) add_compile_options($<$:/arch:AVX2>) add_compile_options($<$:/arch:AVX2>) - elseif (LLAMA_AVX) + elseif(LLAMA_AVX) add_compile_options($<$:/arch:AVX>) add_compile_options($<$:/arch:AVX>) endif() else() - if (LLAMA_F16C) + if(LLAMA_F16C) add_compile_options(-mf16c) endif() - if (LLAMA_FMA) + + if(LLAMA_FMA) add_compile_options(-mfma) endif() - if (LLAMA_AVX) + + if(LLAMA_AVX) add_compile_options(-mavx) endif() - if (LLAMA_AVX2) + + if(LLAMA_AVX2) add_compile_options(-mavx2) endif() - if (LLAMA_AVX512) + + if(LLAMA_AVX512) add_compile_options(-mavx512f) add_compile_options(-mavx512bw) endif() - if (LLAMA_AVX512_VBMI) + + if(LLAMA_AVX512_VBMI) add_compile_options(-mavx512vbmi) endif() - if (LLAMA_AVX512_VNNI) + + if(LLAMA_AVX512_VNNI) add_compile_options(-mavx512vnni) endif() endif() -elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64") +elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64") message(STATUS "PowerPC detected") add_compile_options(-mcpu=native -mtune=native) - #TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be) + +# TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be) else() message(STATUS "Unknown architecture") endif() @@ -392,59 +423,57 @@ endif() # # Build libraries # - add_library(ggml OBJECT - ggml.c - ggml.h - ${GGML_SOURCES_CUDA} - ${GGML_SOURCES_OPENCL} - ${GGML_SOURCES_METAL} - ) + ggml.c + ggml.h + ${GGML_SOURCES_CUDA} + ${GGML_SOURCES_OPENCL} + ${GGML_SOURCES_METAL} +) target_include_directories(ggml PUBLIC .) target_compile_features(ggml PUBLIC c_std_11) # don't bump target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS}) -if (BUILD_SHARED_LIBS) +if(BUILD_SHARED_LIBS) set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON) endif() add_library(llama - llama.cpp - llama.h - llama-util.h - ) + llama.cpp + llama.h + llama-util.h + llama-jni.cpp +) target_include_directories(llama PUBLIC .) target_compile_features(llama PUBLIC cxx_std_11) # don't bump target_link_libraries(llama PRIVATE ggml ${LLAMA_EXTRA_LIBS} - ) +) -if (BUILD_SHARED_LIBS) +if(BUILD_SHARED_LIBS) set_target_properties(llama PROPERTIES POSITION_INDEPENDENT_CODE ON) target_compile_definitions(llama PRIVATE LLAMA_SHARED LLAMA_BUILD) endif() -if (GGML_SOURCES_CUDA) +if(GGML_SOURCES_CUDA) message(STATUS "GGML CUDA sources found, configuring CUDA architecture") - set_property(TARGET ggml PROPERTY CUDA_ARCHITECTURES OFF) - set_property(TARGET ggml PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto") + set_property(TARGET ggml PROPERTY CUDA_ARCHITECTURES OFF) + set_property(TARGET ggml PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto") set_property(TARGET llama PROPERTY CUDA_ARCHITECTURES OFF) endif() - # # programs, examples and tests # - -if (LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION) +if(LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION) include(CTest) add_subdirectory(tests) -endif () +endif() -if (LLAMA_BUILD_EXAMPLES) +if(LLAMA_BUILD_EXAMPLES) add_subdirectory(examples) add_subdirectory(pocs) endif() diff --git a/llama-jni.cpp b/llama-jni.cpp new file mode 100644 index 000000000..2066eefd9 --- /dev/null +++ b/llama-jni.cpp @@ -0,0 +1,12 @@ +#include +#include "llama.h" + +// +// Created by gcpth on 05/06/2023. +// + +extern "C" +JNIEXPORT void JNICALL +Java_com_layla_LlamaCpp_llama_1init_1backend(JNIEnv *env, jclass clazz) { + llama_init_backend(); +}