Merge branch 'master' into HEAD
ggml-ci
This commit is contained in:
commit
8214cbca22
11 changed files with 317 additions and 218 deletions
379
CMakeLists.txt
379
CMakeLists.txt
|
@ -115,17 +115,14 @@ option(LLAMA_MPI "llama: use MPI"
|
||||||
option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF)
|
option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF)
|
||||||
option(LLAMA_SYCL "llama: use SYCL" OFF)
|
option(LLAMA_SYCL "llama: use SYCL" OFF)
|
||||||
option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF)
|
option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF)
|
||||||
|
option(LLAMA_CPU_HBM "llama: use memkind for CPU HBM" OFF)
|
||||||
|
|
||||||
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
|
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
|
||||||
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
|
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
|
||||||
option(LLAMA_BUILD_SERVER "llama: build server example" ON)
|
option(LLAMA_BUILD_SERVER "llama: build server example" ON)
|
||||||
|
|
||||||
|
|
||||||
# add perf arguments
|
# add perf arguments
|
||||||
option(LLAMA_PERF "llama: enable perf" OFF)
|
option(LLAMA_PERF "llama: enable perf" OFF)
|
||||||
if (LLAMA_PERF)
|
|
||||||
add_definitions(-DGGML_PERF)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# Required for relocatable CMake package
|
# Required for relocatable CMake package
|
||||||
include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake)
|
include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake)
|
||||||
|
@ -133,6 +130,7 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake)
|
||||||
#
|
#
|
||||||
# Compile flags
|
# Compile flags
|
||||||
#
|
#
|
||||||
|
|
||||||
if (LLAMA_SYCL)
|
if (LLAMA_SYCL)
|
||||||
set(CMAKE_CXX_STANDARD 17)
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
else()
|
else()
|
||||||
|
@ -143,6 +141,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED true)
|
||||||
set(CMAKE_C_STANDARD 11)
|
set(CMAKE_C_STANDARD 11)
|
||||||
set(CMAKE_C_STANDARD_REQUIRED true)
|
set(CMAKE_C_STANDARD_REQUIRED true)
|
||||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||||
|
|
||||||
find_package(Threads REQUIRED)
|
find_package(Threads REQUIRED)
|
||||||
include(CheckCXXCompilerFlag)
|
include(CheckCXXCompilerFlag)
|
||||||
|
|
||||||
|
@ -162,17 +161,17 @@ endif()
|
||||||
if (NOT MSVC)
|
if (NOT MSVC)
|
||||||
if (LLAMA_SANITIZE_THREAD)
|
if (LLAMA_SANITIZE_THREAD)
|
||||||
add_compile_options(-fsanitize=thread)
|
add_compile_options(-fsanitize=thread)
|
||||||
link_libraries(-fsanitize=thread)
|
link_libraries (-fsanitize=thread)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (LLAMA_SANITIZE_ADDRESS)
|
if (LLAMA_SANITIZE_ADDRESS)
|
||||||
add_compile_options(-fsanitize=address -fno-omit-frame-pointer)
|
add_compile_options(-fsanitize=address -fno-omit-frame-pointer)
|
||||||
link_libraries(-fsanitize=address)
|
link_libraries (-fsanitize=address)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (LLAMA_SANITIZE_UNDEFINED)
|
if (LLAMA_SANITIZE_UNDEFINED)
|
||||||
add_compile_options(-fsanitize=undefined)
|
add_compile_options(-fsanitize=undefined)
|
||||||
link_libraries(-fsanitize=undefined)
|
link_libraries (-fsanitize=undefined)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
@ -309,14 +308,17 @@ if (LLAMA_BLAS)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")
|
message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")
|
||||||
|
|
||||||
add_compile_options(${BLAS_LINKER_FLAGS})
|
add_compile_options(${BLAS_LINKER_FLAGS})
|
||||||
|
|
||||||
add_compile_definitions(GGML_USE_OPENBLAS)
|
add_compile_definitions(GGML_USE_OPENBLAS)
|
||||||
|
|
||||||
if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${LLAMA_BLAS_VENDOR} MATCHES "Generic" OR ${LLAMA_BLAS_VENDOR} MATCHES "Intel"))
|
if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${LLAMA_BLAS_VENDOR} MATCHES "Generic" OR ${LLAMA_BLAS_VENDOR} MATCHES "Intel"))
|
||||||
add_compile_definitions(GGML_BLAS_USE_MKL)
|
add_compile_definitions(GGML_BLAS_USE_MKL)
|
||||||
endif()
|
endif()
|
||||||
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${BLAS_LIBRARIES})
|
|
||||||
set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${BLAS_INCLUDE_DIRS})
|
|
||||||
|
|
||||||
|
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${BLAS_LIBRARIES})
|
||||||
|
set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${BLAS_INCLUDE_DIRS})
|
||||||
else()
|
else()
|
||||||
message(WARNING "BLAS not found, please refer to "
|
message(WARNING "BLAS not found, please refer to "
|
||||||
"https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
|
"https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
|
||||||
|
@ -341,9 +343,6 @@ if (LLAMA_CUBLAS)
|
||||||
set(GGML_SOURCES_CUDA ggml-cuda.cu)
|
set(GGML_SOURCES_CUDA ggml-cuda.cu)
|
||||||
|
|
||||||
add_compile_definitions(GGML_USE_CUBLAS)
|
add_compile_definitions(GGML_USE_CUBLAS)
|
||||||
# if (LLAMA_CUDA_CUBLAS)
|
|
||||||
# add_compile_definitions(GGML_CUDA_CUBLAS)
|
|
||||||
# endif()
|
|
||||||
if (LLAMA_CUDA_FORCE_DMMV)
|
if (LLAMA_CUDA_FORCE_DMMV)
|
||||||
add_compile_definitions(GGML_CUDA_FORCE_DMMV)
|
add_compile_definitions(GGML_CUDA_FORCE_DMMV)
|
||||||
endif()
|
endif()
|
||||||
|
@ -398,15 +397,20 @@ if (LLAMA_MPI)
|
||||||
find_package(MPI)
|
find_package(MPI)
|
||||||
if (MPI_C_FOUND)
|
if (MPI_C_FOUND)
|
||||||
message(STATUS "MPI found")
|
message(STATUS "MPI found")
|
||||||
|
|
||||||
set(GGML_HEADERS_MPI ggml-mpi.h)
|
set(GGML_HEADERS_MPI ggml-mpi.h)
|
||||||
set(GGML_SOURCES_MPI ggml-mpi.c ggml-mpi.h)
|
set(GGML_SOURCES_MPI ggml-mpi.c)
|
||||||
|
|
||||||
add_compile_definitions(GGML_USE_MPI)
|
add_compile_definitions(GGML_USE_MPI)
|
||||||
add_compile_definitions(${MPI_C_COMPILE_DEFINITIONS})
|
add_compile_definitions(${MPI_C_COMPILE_DEFINITIONS})
|
||||||
|
|
||||||
if (NOT MSVC)
|
if (NOT MSVC)
|
||||||
add_compile_options(-Wno-cast-qual)
|
add_compile_options(-Wno-cast-qual)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${MPI_C_LIBRARIES})
|
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${MPI_C_LIBRARIES})
|
||||||
set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${MPI_C_INCLUDE_DIRS})
|
set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${MPI_C_INCLUDE_DIRS})
|
||||||
|
|
||||||
# Even if you're only using the C header, C++ programs may bring in MPI
|
# Even if you're only using the C header, C++ programs may bring in MPI
|
||||||
# C++ functions, so more linkage is needed
|
# C++ functions, so more linkage is needed
|
||||||
if (MPI_CXX_FOUND)
|
if (MPI_CXX_FOUND)
|
||||||
|
@ -438,31 +442,28 @@ if (LLAMA_VULKAN)
|
||||||
if (Vulkan_FOUND)
|
if (Vulkan_FOUND)
|
||||||
message(STATUS "Vulkan found")
|
message(STATUS "Vulkan found")
|
||||||
|
|
||||||
add_library(ggml-vulkan OBJECT ggml-vulkan.cpp ggml-vulkan.h)
|
set(GGML_HEADERS_VULKAN ggml-vulkan.h)
|
||||||
if (BUILD_SHARED_LIBS)
|
set(GGML_SOURCES_VULKAN ggml-vulkan.cpp)
|
||||||
set_target_properties(ggml-vulkan PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
||||||
endif()
|
|
||||||
target_link_libraries(ggml-vulkan PRIVATE Vulkan::Vulkan)
|
|
||||||
|
|
||||||
add_compile_definitions(GGML_USE_VULKAN)
|
add_compile_definitions(GGML_USE_VULKAN)
|
||||||
|
|
||||||
if (LLAMA_VULKAN_CHECK_RESULTS)
|
if (LLAMA_VULKAN_CHECK_RESULTS)
|
||||||
target_compile_definitions(ggml-vulkan PRIVATE GGML_VULKAN_CHECK_RESULTS)
|
add_compile_definitions(GGML_VULKAN_CHECK_RESULTS)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (LLAMA_VULKAN_DEBUG)
|
if (LLAMA_VULKAN_DEBUG)
|
||||||
target_compile_definitions(ggml-vulkan PRIVATE GGML_VULKAN_DEBUG)
|
add_compile_definitions(GGML_VULKAN_DEBUG)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (LLAMA_VULKAN_VALIDATE)
|
if (LLAMA_VULKAN_VALIDATE)
|
||||||
target_compile_definitions(ggml-vulkan PRIVATE GGML_VULKAN_VALIDATE)
|
add_compile_definitions(GGML_VULKAN_VALIDATE)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (LLAMA_VULKAN_RUN_TESTS)
|
if (LLAMA_VULKAN_RUN_TESTS)
|
||||||
target_compile_definitions(ggml-vulkan PRIVATE GGML_VULKAN_RUN_TESTS)
|
add_compile_definitions(GGML_VULKAN_RUN_TESTS)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ggml-vulkan)
|
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} Vulkan::Vulkan)
|
||||||
else()
|
else()
|
||||||
message(WARNING "Vulkan not found")
|
message(WARNING "Vulkan not found")
|
||||||
endif()
|
endif()
|
||||||
|
@ -474,43 +475,45 @@ if (LLAMA_HIPBLAS)
|
||||||
if (NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang")
|
if (NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang")
|
||||||
message(WARNING "Only LLVM is supported for HIP, hint: CC=/opt/rocm/llvm/bin/clang")
|
message(WARNING "Only LLVM is supported for HIP, hint: CC=/opt/rocm/llvm/bin/clang")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
|
if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
|
||||||
message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++")
|
message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
find_package(hip)
|
find_package(hip REQUIRED)
|
||||||
find_package(hipblas)
|
find_package(hipblas REQUIRED)
|
||||||
find_package(rocblas)
|
find_package(rocblas REQUIRED)
|
||||||
|
|
||||||
if (${hipblas_FOUND} AND ${hip_FOUND})
|
message(STATUS "HIP and hipBLAS found")
|
||||||
message(STATUS "HIP and hipBLAS found")
|
|
||||||
add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUBLAS)
|
|
||||||
if (LLAMA_HIP_UMA)
|
|
||||||
add_compile_definitions(GGML_HIP_UMA)
|
|
||||||
endif()
|
|
||||||
add_library(ggml-rocm OBJECT ggml-cuda.cu ggml-cuda.h)
|
|
||||||
if (BUILD_SHARED_LIBS)
|
|
||||||
set_target_properties(ggml-rocm PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
||||||
endif()
|
|
||||||
if (LLAMA_CUDA_FORCE_DMMV)
|
|
||||||
target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_FORCE_DMMV)
|
|
||||||
endif()
|
|
||||||
if (LLAMA_CUDA_FORCE_MMQ)
|
|
||||||
target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_FORCE_MMQ)
|
|
||||||
endif()
|
|
||||||
target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
|
|
||||||
target_compile_definitions(ggml-rocm PRIVATE GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
|
|
||||||
target_compile_definitions(ggml-rocm PRIVATE K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
|
|
||||||
set_source_files_properties(ggml-cuda.cu PROPERTIES LANGUAGE CXX)
|
|
||||||
target_link_libraries(ggml-rocm PRIVATE hip::device PUBLIC hip::host roc::rocblas roc::hipblas)
|
|
||||||
|
|
||||||
if (LLAMA_STATIC)
|
set(GGML_HEADERS_ROCM ggml-cuda.h)
|
||||||
message(FATAL_ERROR "Static linking not supported for HIP/ROCm")
|
set(GGML_SOURCES_ROCM ggml-cuda.cu)
|
||||||
endif()
|
|
||||||
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ggml-rocm)
|
add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUBLAS)
|
||||||
else()
|
|
||||||
message(WARNING "hipBLAS or HIP not found. Try setting CMAKE_PREFIX_PATH=/opt/rocm")
|
if (LLAMA_HIP_UMA)
|
||||||
|
add_compile_definitions(GGML_HIP_UMA)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (LLAMA_CUDA_FORCE_DMMV)
|
||||||
|
add_compile_definitions(GGML_CUDA_FORCE_DMMV)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (LLAMA_CUDA_FORCE_MMQ)
|
||||||
|
add_compile_definitions(GGML_CUDA_FORCE_MMQ)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
|
||||||
|
add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
|
||||||
|
add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
|
||||||
|
|
||||||
|
set_source_files_properties(ggml-cuda.cu PROPERTIES LANGUAGE CXX)
|
||||||
|
|
||||||
|
if (LLAMA_STATIC)
|
||||||
|
message(FATAL_ERROR "Static linking not supported for HIP/ROCm")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} hip::device PUBLIC hip::host roc::rocblas roc::hipblas)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (LLAMA_SYCL)
|
if (LLAMA_SYCL)
|
||||||
|
@ -520,10 +523,14 @@ if (LLAMA_SYCL)
|
||||||
#todo: AOT
|
#todo: AOT
|
||||||
|
|
||||||
find_package(IntelSYCL REQUIRED)
|
find_package(IntelSYCL REQUIRED)
|
||||||
|
|
||||||
|
message(STATUS "SYCL found")
|
||||||
|
|
||||||
|
add_compile_definitions(GML_USE_SYCL)
|
||||||
|
|
||||||
if (LLAMA_SYCL_F16)
|
if (LLAMA_SYCL_F16)
|
||||||
add_compile_definitions(GGML_SYCL_F16)
|
add_compile_definitions(GGML_SYCL_F16)
|
||||||
endif()
|
endif()
|
||||||
add_compile_definitions(GGML_USE_SYCL)
|
|
||||||
|
|
||||||
add_compile_options(-I./) #include DPCT
|
add_compile_options(-I./) #include DPCT
|
||||||
add_compile_options(-I/${SYCL_INCLUDE_DIR})
|
add_compile_options(-I/${SYCL_INCLUDE_DIR})
|
||||||
|
@ -532,7 +539,7 @@ if (LLAMA_SYCL)
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
|
||||||
|
|
||||||
set(GGML_HEADERS_SYCL ggml.h ggml-sycl.h)
|
set(GGML_HEADERS_SYCL ggml-sycl.h)
|
||||||
set(GGML_SOURCES_SYCL ggml-sycl.cpp)
|
set(GGML_SOURCES_SYCL ggml-sycl.cpp)
|
||||||
|
|
||||||
if (WIN32)
|
if (WIN32)
|
||||||
|
@ -551,61 +558,61 @@ if (LLAMA_KOMPUTE)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
function(compile_shader)
|
function(compile_shader)
|
||||||
set(options)
|
set(options)
|
||||||
set(oneValueArgs)
|
set(oneValueArgs)
|
||||||
set(multiValueArgs SOURCES)
|
set(multiValueArgs SOURCES)
|
||||||
cmake_parse_arguments(compile_shader "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
|
cmake_parse_arguments(compile_shader "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
|
||||||
foreach(source ${compile_shader_SOURCES})
|
foreach(source ${compile_shader_SOURCES})
|
||||||
get_filename_component(filename ${source} NAME)
|
get_filename_component(filename ${source} NAME)
|
||||||
set(spv_file ${filename}.spv)
|
set(spv_file ${filename}.spv)
|
||||||
add_custom_command(
|
add_custom_command(
|
||||||
OUTPUT ${spv_file}
|
OUTPUT ${spv_file}
|
||||||
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${source}
|
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${source}
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/common.comp
|
${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/common.comp
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_getrows.comp
|
${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_getrows.comp
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n_pre.comp
|
${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n_pre.comp
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n.comp
|
${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n.comp
|
||||||
COMMAND ${glslc_executable} --target-env=vulkan1.2 -o ${spv_file} ${CMAKE_CURRENT_SOURCE_DIR}/${source}
|
COMMAND ${glslc_executable} --target-env=vulkan1.2 -o ${spv_file} ${CMAKE_CURRENT_SOURCE_DIR}/${source}
|
||||||
COMMENT "Compiling ${source} to ${spv_file}"
|
COMMENT "Compiling ${source} to ${spv_file}"
|
||||||
)
|
)
|
||||||
|
|
||||||
get_filename_component(RAW_FILE_NAME ${spv_file} NAME)
|
get_filename_component(RAW_FILE_NAME ${spv_file} NAME)
|
||||||
set(FILE_NAME "shader${RAW_FILE_NAME}")
|
set(FILE_NAME "shader${RAW_FILE_NAME}")
|
||||||
string(REPLACE ".comp.spv" ".h" HEADER_FILE ${FILE_NAME})
|
string(REPLACE ".comp.spv" ".h" HEADER_FILE ${FILE_NAME})
|
||||||
string(TOUPPER ${HEADER_FILE} HEADER_FILE_DEFINE)
|
string(TOUPPER ${HEADER_FILE} HEADER_FILE_DEFINE)
|
||||||
string(REPLACE "." "_" HEADER_FILE_DEFINE "${HEADER_FILE_DEFINE}")
|
string(REPLACE "." "_" HEADER_FILE_DEFINE "${HEADER_FILE_DEFINE}")
|
||||||
set(OUTPUT_HEADER_FILE "${HEADER_FILE}")
|
set(OUTPUT_HEADER_FILE "${HEADER_FILE}")
|
||||||
message(STATUS "${HEADER_FILE} generating ${HEADER_FILE_DEFINE}")
|
message(STATUS "${HEADER_FILE} generating ${HEADER_FILE_DEFINE}")
|
||||||
if(CMAKE_GENERATOR MATCHES "Visual Studio")
|
if(CMAKE_GENERATOR MATCHES "Visual Studio")
|
||||||
add_custom_command(
|
add_custom_command(
|
||||||
OUTPUT ${OUTPUT_HEADER_FILE}
|
OUTPUT ${OUTPUT_HEADER_FILE}
|
||||||
COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE}
|
COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE}
|
||||||
COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
|
COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
|
||||||
COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
|
COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
|
||||||
COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE}
|
COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE}
|
||||||
COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE}
|
COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE}
|
||||||
COMMAND ${CMAKE_BINARY_DIR}/bin/$<CONFIG>/xxd -i ${RAW_FILE_NAME} >> ${OUTPUT_HEADER_FILE}
|
COMMAND ${CMAKE_BINARY_DIR}/bin/$<CONFIG>/xxd -i ${RAW_FILE_NAME} >> ${OUTPUT_HEADER_FILE}
|
||||||
COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE}
|
COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE}
|
||||||
COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
|
COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
|
||||||
DEPENDS ${spv_file} xxd
|
DEPENDS ${spv_file} xxd
|
||||||
COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/$<CONFIG>/xxd"
|
COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/$<CONFIG>/xxd"
|
||||||
)
|
)
|
||||||
else()
|
else()
|
||||||
add_custom_command(
|
add_custom_command(
|
||||||
OUTPUT ${OUTPUT_HEADER_FILE}
|
OUTPUT ${OUTPUT_HEADER_FILE}
|
||||||
COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE}
|
COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE}
|
||||||
COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
|
COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
|
||||||
COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
|
COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
|
||||||
COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE}
|
COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE}
|
||||||
COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE}
|
COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE}
|
||||||
COMMAND ${CMAKE_BINARY_DIR}/bin/xxd -i ${RAW_FILE_NAME} >> ${OUTPUT_HEADER_FILE}
|
COMMAND ${CMAKE_BINARY_DIR}/bin/xxd -i ${RAW_FILE_NAME} >> ${OUTPUT_HEADER_FILE}
|
||||||
COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE}
|
COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE}
|
||||||
COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
|
COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
|
||||||
DEPENDS ${spv_file} xxd
|
DEPENDS ${spv_file} xxd
|
||||||
COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/xxd"
|
COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/xxd"
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
endforeach()
|
endforeach()
|
||||||
endfunction()
|
endfunction()
|
||||||
|
|
||||||
if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/kompute/CMakeLists.txt")
|
if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/kompute/CMakeLists.txt")
|
||||||
|
@ -615,66 +622,66 @@ if (LLAMA_KOMPUTE)
|
||||||
|
|
||||||
# Compile our shaders
|
# Compile our shaders
|
||||||
compile_shader(SOURCES
|
compile_shader(SOURCES
|
||||||
kompute-shaders/op_scale.comp
|
kompute-shaders/op_scale.comp
|
||||||
kompute-shaders/op_scale_8.comp
|
kompute-shaders/op_scale_8.comp
|
||||||
kompute-shaders/op_add.comp
|
kompute-shaders/op_add.comp
|
||||||
kompute-shaders/op_addrow.comp
|
kompute-shaders/op_addrow.comp
|
||||||
kompute-shaders/op_mul.comp
|
kompute-shaders/op_mul.comp
|
||||||
kompute-shaders/op_silu.comp
|
kompute-shaders/op_silu.comp
|
||||||
kompute-shaders/op_relu.comp
|
kompute-shaders/op_relu.comp
|
||||||
kompute-shaders/op_gelu.comp
|
kompute-shaders/op_gelu.comp
|
||||||
kompute-shaders/op_softmax.comp
|
kompute-shaders/op_softmax.comp
|
||||||
kompute-shaders/op_norm.comp
|
kompute-shaders/op_norm.comp
|
||||||
kompute-shaders/op_rmsnorm.comp
|
kompute-shaders/op_rmsnorm.comp
|
||||||
kompute-shaders/op_diagmask.comp
|
kompute-shaders/op_diagmask.comp
|
||||||
kompute-shaders/op_mul_mat_mat_f32.comp
|
kompute-shaders/op_mul_mat_mat_f32.comp
|
||||||
kompute-shaders/op_mul_mat_f16.comp
|
kompute-shaders/op_mul_mat_f16.comp
|
||||||
kompute-shaders/op_mul_mat_q8_0.comp
|
kompute-shaders/op_mul_mat_q8_0.comp
|
||||||
kompute-shaders/op_mul_mat_q4_0.comp
|
kompute-shaders/op_mul_mat_q4_0.comp
|
||||||
kompute-shaders/op_mul_mat_q4_1.comp
|
kompute-shaders/op_mul_mat_q4_1.comp
|
||||||
kompute-shaders/op_mul_mat_q6_k.comp
|
kompute-shaders/op_mul_mat_q6_k.comp
|
||||||
kompute-shaders/op_getrows_f16.comp
|
kompute-shaders/op_getrows_f16.comp
|
||||||
kompute-shaders/op_getrows_q4_0.comp
|
kompute-shaders/op_getrows_q4_0.comp
|
||||||
kompute-shaders/op_getrows_q4_1.comp
|
kompute-shaders/op_getrows_q4_1.comp
|
||||||
kompute-shaders/op_getrows_q6_k.comp
|
kompute-shaders/op_getrows_q6_k.comp
|
||||||
kompute-shaders/op_rope_f16.comp
|
kompute-shaders/op_rope_f16.comp
|
||||||
kompute-shaders/op_rope_f32.comp
|
kompute-shaders/op_rope_f32.comp
|
||||||
kompute-shaders/op_cpy_f16_f16.comp
|
kompute-shaders/op_cpy_f16_f16.comp
|
||||||
kompute-shaders/op_cpy_f16_f32.comp
|
kompute-shaders/op_cpy_f16_f32.comp
|
||||||
kompute-shaders/op_cpy_f32_f16.comp
|
kompute-shaders/op_cpy_f32_f16.comp
|
||||||
kompute-shaders/op_cpy_f32_f32.comp
|
kompute-shaders/op_cpy_f32_f32.comp
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create a custom target for our generated shaders
|
# Create a custom target for our generated shaders
|
||||||
add_custom_target(generated_shaders DEPENDS
|
add_custom_target(generated_shaders DEPENDS
|
||||||
shaderop_scale.h
|
shaderop_scale.h
|
||||||
shaderop_scale_8.h
|
shaderop_scale_8.h
|
||||||
shaderop_add.h
|
shaderop_add.h
|
||||||
shaderop_addrow.h
|
shaderop_addrow.h
|
||||||
shaderop_mul.h
|
shaderop_mul.h
|
||||||
shaderop_silu.h
|
shaderop_silu.h
|
||||||
shaderop_relu.h
|
shaderop_relu.h
|
||||||
shaderop_gelu.h
|
shaderop_gelu.h
|
||||||
shaderop_softmax.h
|
shaderop_softmax.h
|
||||||
shaderop_norm.h
|
shaderop_norm.h
|
||||||
shaderop_rmsnorm.h
|
shaderop_rmsnorm.h
|
||||||
shaderop_diagmask.h
|
shaderop_diagmask.h
|
||||||
shaderop_mul_mat_mat_f32.h
|
shaderop_mul_mat_mat_f32.h
|
||||||
shaderop_mul_mat_f16.h
|
shaderop_mul_mat_f16.h
|
||||||
shaderop_mul_mat_q8_0.h
|
shaderop_mul_mat_q8_0.h
|
||||||
shaderop_mul_mat_q4_0.h
|
shaderop_mul_mat_q4_0.h
|
||||||
shaderop_mul_mat_q4_1.h
|
shaderop_mul_mat_q4_1.h
|
||||||
shaderop_mul_mat_q6_k.h
|
shaderop_mul_mat_q6_k.h
|
||||||
shaderop_getrows_f16.h
|
shaderop_getrows_f16.h
|
||||||
shaderop_getrows_q4_0.h
|
shaderop_getrows_q4_0.h
|
||||||
shaderop_getrows_q4_1.h
|
shaderop_getrows_q4_1.h
|
||||||
shaderop_getrows_q6_k.h
|
shaderop_getrows_q6_k.h
|
||||||
shaderop_rope_f16.h
|
shaderop_rope_f16.h
|
||||||
shaderop_rope_f32.h
|
shaderop_rope_f32.h
|
||||||
shaderop_cpy_f16_f16.h
|
shaderop_cpy_f16_f16.h
|
||||||
shaderop_cpy_f16_f32.h
|
shaderop_cpy_f16_f32.h
|
||||||
shaderop_cpy_f32_f16.h
|
shaderop_cpy_f32_f16.h
|
||||||
shaderop_cpy_f32_f32.h
|
shaderop_cpy_f32_f32.h
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create a custom command that depends on the generated_shaders
|
# Create a custom command that depends on the generated_shaders
|
||||||
|
@ -687,8 +694,10 @@ if (LLAMA_KOMPUTE)
|
||||||
|
|
||||||
# Add the stamp to the main sources to ensure dependency tracking
|
# Add the stamp to the main sources to ensure dependency tracking
|
||||||
set(GGML_SOURCES_KOMPUTE ggml-kompute.cpp ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp)
|
set(GGML_SOURCES_KOMPUTE ggml-kompute.cpp ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp)
|
||||||
set(GGML_HEADERS_KOMPUTE ggml-kompute.h ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp)
|
set(GGML_HEADERS_KOMPUTE ggml-kompute.h ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp)
|
||||||
|
|
||||||
add_compile_definitions(GGML_USE_KOMPUTE)
|
add_compile_definitions(GGML_USE_KOMPUTE)
|
||||||
|
|
||||||
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} kompute)
|
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} kompute)
|
||||||
set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${CMAKE_BINARY_DIR})
|
set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${CMAKE_BINARY_DIR})
|
||||||
else()
|
else()
|
||||||
|
@ -696,6 +705,18 @@ if (LLAMA_KOMPUTE)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (LLAMA_CPU_HBM)
|
||||||
|
find_library(memkind memkind REQUIRED)
|
||||||
|
|
||||||
|
add_compile_definitions(GGML_USE_CPU_HBM)
|
||||||
|
|
||||||
|
target_link_libraries(ggml PUBLIC memkind)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (LLAMA_PERF)
|
||||||
|
add_compile_definitions(GGML_PERF)
|
||||||
|
endif()
|
||||||
|
|
||||||
function(get_flags CCID CCVER)
|
function(get_flags CCID CCVER)
|
||||||
set(C_FLAGS "")
|
set(C_FLAGS "")
|
||||||
set(CXX_FLAGS "")
|
set(CXX_FLAGS "")
|
||||||
|
@ -832,6 +853,7 @@ execute_process(
|
||||||
ERROR_VARIABLE output
|
ERROR_VARIABLE output
|
||||||
OUTPUT_QUIET
|
OUTPUT_QUIET
|
||||||
)
|
)
|
||||||
|
|
||||||
if (output MATCHES "dyld-1015\.7")
|
if (output MATCHES "dyld-1015\.7")
|
||||||
add_compile_definitions(HAVE_BUGGY_APPLE_LINKER)
|
add_compile_definitions(HAVE_BUGGY_APPLE_LINKER)
|
||||||
endif()
|
endif()
|
||||||
|
@ -841,10 +863,10 @@ endif()
|
||||||
# feel free to update the Makefile for your architecture and send a pull request or issue
|
# feel free to update the Makefile for your architecture and send a pull request or issue
|
||||||
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
|
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
|
||||||
if (MSVC)
|
if (MSVC)
|
||||||
string(TOLOWER "${CMAKE_GENERATOR_PLATFORM}" CMAKE_GENERATOR_PLATFORM_LWR)
|
string(TOLOWER "${CMAKE_GENERATOR_PLATFORM}" CMAKE_GENERATOR_PLATFORM_LWR)
|
||||||
message(STATUS "CMAKE_GENERATOR_PLATFORM: ${CMAKE_GENERATOR_PLATFORM}")
|
message(STATUS "CMAKE_GENERATOR_PLATFORM: ${CMAKE_GENERATOR_PLATFORM}")
|
||||||
else ()
|
else ()
|
||||||
set(CMAKE_GENERATOR_PLATFORM_LWR "")
|
set(CMAKE_GENERATOR_PLATFORM_LWR "")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (NOT MSVC)
|
if (NOT MSVC)
|
||||||
|
@ -1038,11 +1060,6 @@ endif()
|
||||||
|
|
||||||
# ggml
|
# ggml
|
||||||
|
|
||||||
if (GGML_USE_CPU_HBM)
|
|
||||||
add_definitions(-DGGML_USE_CPU_HBM)
|
|
||||||
find_library(memkind memkind REQUIRED)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
add_library(ggml OBJECT
|
add_library(ggml OBJECT
|
||||||
ggml.c
|
ggml.c
|
||||||
ggml.h
|
ggml.h
|
||||||
|
@ -1059,16 +1076,17 @@ add_library(ggml OBJECT
|
||||||
${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
|
${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
|
||||||
${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
|
${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
|
||||||
${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE}
|
${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE}
|
||||||
|
${GGML_SOURCES_VULKAN} ${GGML_HEADERS_VULKAN}
|
||||||
|
${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
|
||||||
)
|
)
|
||||||
|
|
||||||
target_include_directories(ggml PUBLIC . ${LLAMA_EXTRA_INCLUDES})
|
target_include_directories(ggml PUBLIC . ${LLAMA_EXTRA_INCLUDES})
|
||||||
target_compile_features(ggml PUBLIC c_std_11) # don't bump
|
target_compile_features (ggml PUBLIC c_std_11) # don't bump
|
||||||
|
|
||||||
target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
|
target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
|
||||||
if (GGML_USE_CPU_HBM)
|
|
||||||
target_link_libraries(ggml PUBLIC memkind)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
add_library(ggml_static STATIC $<TARGET_OBJECTS:ggml>)
|
add_library(ggml_static STATIC $<TARGET_OBJECTS:ggml>)
|
||||||
|
|
||||||
if (BUILD_SHARED_LIBS)
|
if (BUILD_SHARED_LIBS)
|
||||||
set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||||
add_library(ggml_shared SHARED $<TARGET_OBJECTS:ggml>)
|
add_library(ggml_shared SHARED $<TARGET_OBJECTS:ggml>)
|
||||||
|
@ -1084,7 +1102,8 @@ add_library(llama
|
||||||
)
|
)
|
||||||
|
|
||||||
target_include_directories(llama PUBLIC .)
|
target_include_directories(llama PUBLIC .)
|
||||||
target_compile_features(llama PUBLIC cxx_std_11) # don't bump
|
target_compile_features (llama PUBLIC cxx_std_11) # don't bump
|
||||||
|
|
||||||
target_link_libraries(llama PRIVATE
|
target_link_libraries(llama PRIVATE
|
||||||
ggml
|
ggml
|
||||||
${LLAMA_EXTRA_LIBS}
|
${LLAMA_EXTRA_LIBS}
|
||||||
|
@ -1135,7 +1154,7 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfig.cmake
|
||||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/Llama)
|
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/Llama)
|
||||||
|
|
||||||
set(GGML_PUBLIC_HEADERS "ggml.h" "ggml-alloc.h" "ggml-backend.h"
|
set(GGML_PUBLIC_HEADERS "ggml.h" "ggml-alloc.h" "ggml-backend.h"
|
||||||
"${GGML_HEADERS_CUDA}" "${GGML_HEADERS_OPENCL}"
|
"${GGML_HEADERS_CUDA}" "${GGML_HEADERS_OPENCL}"
|
||||||
"${GGML_HEADERS_METAL}" "${GGML_HEADERS_MPI}" "${GGML_HEADERS_EXTRA}")
|
"${GGML_HEADERS_METAL}" "${GGML_HEADERS_MPI}" "${GGML_HEADERS_EXTRA}")
|
||||||
|
|
||||||
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
|
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
|
||||||
|
|
|
@ -341,7 +341,7 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
const auto sampler_names = string_split(argv[i], ';');
|
const auto sampler_names = string_split(argv[i], ';');
|
||||||
sparams.samplers_sequence = sampler_types_from_names(sampler_names);
|
sparams.samplers_sequence = sampler_types_from_names(sampler_names, true);
|
||||||
} else if (arg == "--sampling-seq") {
|
} else if (arg == "--sampling-seq") {
|
||||||
if (++i >= argc) {
|
if (++i >= argc) {
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
|
@ -964,7 +964,8 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
||||||
printf(" -n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict);
|
printf(" -n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict);
|
||||||
printf(" -c N, --ctx-size N size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx);
|
printf(" -c N, --ctx-size N size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx);
|
||||||
printf(" -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
|
printf(" -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
|
||||||
printf(" --samplers samplers that will be used for generation in the order, separated by \';\' (default: %s)\n", sampler_type_names.c_str());
|
printf(" --samplers samplers that will be used for generation in the order, separated by \';\'\n");
|
||||||
|
printf(" (default: %s)\n", sampler_type_names.c_str());
|
||||||
printf(" --sampling-seq simplified sequence for samplers that will be used (default: %s)\n", sampler_type_chars.c_str());
|
printf(" --sampling-seq simplified sequence for samplers that will be used (default: %s)\n", sampler_type_chars.c_str());
|
||||||
printf(" --top-k N top-k sampling (default: %d, 0 = disabled)\n", sparams.top_k);
|
printf(" --top-k N top-k sampling (default: %d, 0 = disabled)\n", sparams.top_k);
|
||||||
printf(" --top-p N top-p sampling (default: %.1f, 1.0 = disabled)\n", (double)sparams.top_p);
|
printf(" --top-p N top-p sampling (default: %.1f, 1.0 = disabled)\n", (double)sparams.top_p);
|
||||||
|
@ -1133,34 +1134,50 @@ std::vector<std::string> string_split(std::string input, char separator) {
|
||||||
return parts;
|
return parts;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<llama_sampler_type> sampler_types_from_names(const std::vector<std::string> & names) {
|
std::vector<llama_sampler_type> sampler_types_from_names(const std::vector<std::string> & names, bool allow_alt_names) {
|
||||||
|
std::unordered_map<std::string, llama_sampler_type> sampler_canonical_name_map {
|
||||||
|
{"top_k", llama_sampler_type::TOP_K},
|
||||||
|
{"top_p", llama_sampler_type::TOP_P},
|
||||||
|
{"typical_p", llama_sampler_type::TYPICAL_P},
|
||||||
|
{"min_p", llama_sampler_type::MIN_P},
|
||||||
|
{"tfs_z", llama_sampler_type::TFS_Z},
|
||||||
|
{"temperature", llama_sampler_type::TEMPERATURE}
|
||||||
|
};
|
||||||
|
|
||||||
// since samplers names are written multiple ways
|
// since samplers names are written multiple ways
|
||||||
// make it ready for both system names and input names
|
// make it ready for both system names and input names
|
||||||
std::unordered_map<std::string, llama_sampler_type> sampler_name_map {
|
std::unordered_map<std::string, llama_sampler_type> sampler_alt_name_map {
|
||||||
{"top_k", llama_sampler_type::TOP_K},
|
|
||||||
{"top-k", llama_sampler_type::TOP_K},
|
{"top-k", llama_sampler_type::TOP_K},
|
||||||
{"top_p", llama_sampler_type::TOP_P},
|
|
||||||
{"top-p", llama_sampler_type::TOP_P},
|
{"top-p", llama_sampler_type::TOP_P},
|
||||||
{"nucleus", llama_sampler_type::TOP_P},
|
{"nucleus", llama_sampler_type::TOP_P},
|
||||||
{"typical_p", llama_sampler_type::TYPICAL_P},
|
|
||||||
{"typical-p", llama_sampler_type::TYPICAL_P},
|
{"typical-p", llama_sampler_type::TYPICAL_P},
|
||||||
{"typical", llama_sampler_type::TYPICAL_P},
|
{"typical", llama_sampler_type::TYPICAL_P},
|
||||||
{"min_p", llama_sampler_type::MIN_P},
|
|
||||||
{"min-p", llama_sampler_type::MIN_P},
|
{"min-p", llama_sampler_type::MIN_P},
|
||||||
{"tfs_z", llama_sampler_type::TFS_Z},
|
|
||||||
{"tfs-z", llama_sampler_type::TFS_Z},
|
{"tfs-z", llama_sampler_type::TFS_Z},
|
||||||
{"tfs", llama_sampler_type::TFS_Z},
|
{"tfs", llama_sampler_type::TFS_Z},
|
||||||
{"temp", llama_sampler_type::TEMP},
|
{"temp", llama_sampler_type::TEMPERATURE}
|
||||||
{"temperature", llama_sampler_type::TEMP}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector<llama_sampler_type> sampler_types;
|
std::vector<llama_sampler_type> sampler_types;
|
||||||
sampler_types.reserve(names.size());
|
sampler_types.reserve(names.size());
|
||||||
for (const auto& name : names) {
|
for (const auto & name : names)
|
||||||
const auto sampler_item = sampler_name_map.find(name);
|
{
|
||||||
if (sampler_item != sampler_name_map.end()) {
|
auto sampler_item = sampler_canonical_name_map.find(name);
|
||||||
|
if (sampler_item != sampler_canonical_name_map.end())
|
||||||
|
{
|
||||||
sampler_types.push_back(sampler_item->second);
|
sampler_types.push_back(sampler_item->second);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (allow_alt_names)
|
||||||
|
{
|
||||||
|
sampler_item = sampler_alt_name_map.find(name);
|
||||||
|
if (sampler_item != sampler_alt_name_map.end())
|
||||||
|
{
|
||||||
|
sampler_types.push_back(sampler_item->second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return sampler_types;
|
return sampler_types;
|
||||||
}
|
}
|
||||||
|
@ -1172,7 +1189,7 @@ std::vector<llama_sampler_type> sampler_types_from_chars(const std::string & nam
|
||||||
{'y', llama_sampler_type::TYPICAL_P},
|
{'y', llama_sampler_type::TYPICAL_P},
|
||||||
{'m', llama_sampler_type::MIN_P},
|
{'m', llama_sampler_type::MIN_P},
|
||||||
{'f', llama_sampler_type::TFS_Z},
|
{'f', llama_sampler_type::TFS_Z},
|
||||||
{'t', llama_sampler_type::TEMP}
|
{'t', llama_sampler_type::TEMPERATURE}
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector<llama_sampler_type> sampler_types;
|
std::vector<llama_sampler_type> sampler_types;
|
||||||
|
@ -1188,12 +1205,12 @@ std::vector<llama_sampler_type> sampler_types_from_chars(const std::string & nam
|
||||||
|
|
||||||
std::string sampler_type_to_name_string(llama_sampler_type sampler_type) {
|
std::string sampler_type_to_name_string(llama_sampler_type sampler_type) {
|
||||||
switch (sampler_type) {
|
switch (sampler_type) {
|
||||||
case llama_sampler_type::TOP_K: return "top_k";
|
case llama_sampler_type::TOP_K: return "top_k";
|
||||||
case llama_sampler_type::TFS_Z: return "tfs_z";
|
case llama_sampler_type::TFS_Z: return "tfs_z";
|
||||||
case llama_sampler_type::TYPICAL_P: return "typical_p";
|
case llama_sampler_type::TYPICAL_P: return "typical_p";
|
||||||
case llama_sampler_type::TOP_P: return "top_p";
|
case llama_sampler_type::TOP_P: return "top_p";
|
||||||
case llama_sampler_type::MIN_P: return "min_p";
|
case llama_sampler_type::MIN_P: return "min_p";
|
||||||
case llama_sampler_type::TEMP: return "temp";
|
case llama_sampler_type::TEMPERATURE: return "temperature";
|
||||||
default : return "";
|
default : return "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -165,7 +165,7 @@ void process_escapes(std::string& input);
|
||||||
// String utils
|
// String utils
|
||||||
//
|
//
|
||||||
|
|
||||||
std::vector<llama_sampler_type> sampler_types_from_names(const std::vector<std::string> & names);
|
std::vector<llama_sampler_type> sampler_types_from_names(const std::vector<std::string> & names, bool allow_alt_names);
|
||||||
std::vector<llama_sampler_type> sampler_types_from_chars(const std::string & names_string);
|
std::vector<llama_sampler_type> sampler_types_from_chars(const std::string & names_string);
|
||||||
std::vector<std::string> string_split(std::string input, char separator);
|
std::vector<std::string> string_split(std::string input, char separator);
|
||||||
std::string sampler_type_to_name_string(llama_sampler_type sampler_type);
|
std::string sampler_type_to_name_string(llama_sampler_type sampler_type);
|
||||||
|
|
|
@ -139,7 +139,7 @@ static void sampler_queue(
|
||||||
case llama_sampler_type::TYPICAL_P: llama_sample_typical (ctx_main, &cur_p, typical_p, min_keep); break;
|
case llama_sampler_type::TYPICAL_P: llama_sample_typical (ctx_main, &cur_p, typical_p, min_keep); break;
|
||||||
case llama_sampler_type::TOP_P : llama_sample_top_p (ctx_main, &cur_p, top_p, min_keep); break;
|
case llama_sampler_type::TOP_P : llama_sample_top_p (ctx_main, &cur_p, top_p, min_keep); break;
|
||||||
case llama_sampler_type::MIN_P : llama_sample_min_p (ctx_main, &cur_p, min_p, min_keep); break;
|
case llama_sampler_type::MIN_P : llama_sample_min_p (ctx_main, &cur_p, min_p, min_keep); break;
|
||||||
case llama_sampler_type::TEMP:
|
case llama_sampler_type::TEMPERATURE:
|
||||||
if (dynatemp_range > 0) {
|
if (dynatemp_range > 0) {
|
||||||
float dynatemp_min = std::max(0.0f, temp - dynatemp_range);
|
float dynatemp_min = std::max(0.0f, temp - dynatemp_range);
|
||||||
float dynatemp_max = std::max(0.0f, temp + dynatemp_range);
|
float dynatemp_max = std::max(0.0f, temp + dynatemp_range);
|
||||||
|
|
|
@ -10,12 +10,12 @@
|
||||||
|
|
||||||
// sampler types
|
// sampler types
|
||||||
enum class llama_sampler_type : char {
|
enum class llama_sampler_type : char {
|
||||||
TOP_K = 'k',
|
TOP_K = 'k',
|
||||||
TOP_P = 'p',
|
TOP_P = 'p',
|
||||||
MIN_P = 'm',
|
MIN_P = 'm',
|
||||||
TFS_Z = 'f',
|
TFS_Z = 'f',
|
||||||
TYPICAL_P = 'y',
|
TYPICAL_P = 'y',
|
||||||
TEMP = 't'
|
TEMPERATURE = 't'
|
||||||
};
|
};
|
||||||
|
|
||||||
// sampling parameters
|
// sampling parameters
|
||||||
|
@ -45,7 +45,7 @@ typedef struct llama_sampling_params {
|
||||||
llama_sampler_type::TYPICAL_P,
|
llama_sampler_type::TYPICAL_P,
|
||||||
llama_sampler_type::TOP_P,
|
llama_sampler_type::TOP_P,
|
||||||
llama_sampler_type::MIN_P,
|
llama_sampler_type::MIN_P,
|
||||||
llama_sampler_type::TEMP
|
llama_sampler_type::TEMPERATURE
|
||||||
};
|
};
|
||||||
|
|
||||||
std::string grammar; // optional BNF-like grammar to constrain sampling
|
std::string grammar; // optional BNF-like grammar to constrain sampling
|
||||||
|
|
|
@ -315,7 +315,6 @@ static bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_thre
|
||||||
float * image_embd = (float *)malloc(clip_embd_nbytes(ctx_clip)*6); // TODO: base on gridsize/llava model
|
float * image_embd = (float *)malloc(clip_embd_nbytes(ctx_clip)*6); // TODO: base on gridsize/llava model
|
||||||
if (!image_embd) {
|
if (!image_embd) {
|
||||||
fprintf(stderr, "Unable to allocate memory for image embeddings\n");
|
fprintf(stderr, "Unable to allocate memory for image embeddings\n");
|
||||||
free(image_embd);
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -204,6 +204,8 @@ node index.js
|
||||||
|
|
||||||
`system_prompt`: Change the system prompt (initial prompt of all slots), this is useful for chat applications. [See more](#change-system-prompt-on-runtime)
|
`system_prompt`: Change the system prompt (initial prompt of all slots), this is useful for chat applications. [See more](#change-system-prompt-on-runtime)
|
||||||
|
|
||||||
|
`samplers`: The order the samplers should be applied in. An array of strings representing sampler type names. If a sampler is not set, it will not be used. If a sampler is specified more than once, it will be applied multiple times. (default: `["top_k", "tfs_z", "typical_p", "top_p", "min_p", "temperature"]` - these are all the available values)
|
||||||
|
|
||||||
### Result JSON
|
### Result JSON
|
||||||
|
|
||||||
- Note: When using streaming mode (`stream`) only `content` and `stop` will be returned until end of completion.
|
- Note: When using streaming mode (`stream`) only `content` and `stop` will be returned until end of completion.
|
||||||
|
|
|
@ -672,6 +672,24 @@ struct llama_server_context
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const auto &samplers_sequence = data.find("samplers");
|
||||||
|
if (samplers_sequence != data.end() && samplers_sequence->is_array())
|
||||||
|
{
|
||||||
|
std::vector<std::string> sampler_names;
|
||||||
|
for (const auto &sampler_name : *samplers_sequence)
|
||||||
|
{
|
||||||
|
if (sampler_name.is_string())
|
||||||
|
{
|
||||||
|
sampler_names.emplace_back(sampler_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
slot->sparams.samplers_sequence = sampler_types_from_names(sampler_names, false);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
slot->sparams.samplers_sequence = default_sparams.samplers_sequence;
|
||||||
|
}
|
||||||
|
|
||||||
if (multimodal)
|
if (multimodal)
|
||||||
{
|
{
|
||||||
const auto &images_data = data.find("image_data");
|
const auto &images_data = data.find("image_data");
|
||||||
|
@ -1026,6 +1044,12 @@ struct llama_server_context
|
||||||
const auto eos_bias = slot.sparams.logit_bias.find(llama_token_eos(model));
|
const auto eos_bias = slot.sparams.logit_bias.find(llama_token_eos(model));
|
||||||
const bool ignore_eos = eos_bias != slot.sparams.logit_bias.end() &&
|
const bool ignore_eos = eos_bias != slot.sparams.logit_bias.end() &&
|
||||||
eos_bias->second < 0.0f && std::isinf(eos_bias->second);
|
eos_bias->second < 0.0f && std::isinf(eos_bias->second);
|
||||||
|
std::vector<std::string> samplers_sequence;
|
||||||
|
for (const auto &sampler_type : slot.sparams.samplers_sequence)
|
||||||
|
{
|
||||||
|
samplers_sequence.emplace_back(sampler_type_to_name_string(sampler_type));
|
||||||
|
}
|
||||||
|
|
||||||
return json {
|
return json {
|
||||||
{"n_ctx", slot.n_ctx},
|
{"n_ctx", slot.n_ctx},
|
||||||
{"model", params.model_alias},
|
{"model", params.model_alias},
|
||||||
|
@ -1056,6 +1080,7 @@ struct llama_server_context
|
||||||
{"logit_bias", slot.sparams.logit_bias},
|
{"logit_bias", slot.sparams.logit_bias},
|
||||||
{"n_probs", slot.sparams.n_probs},
|
{"n_probs", slot.sparams.n_probs},
|
||||||
{"grammar", slot.sparams.grammar},
|
{"grammar", slot.sparams.grammar},
|
||||||
|
{"samplers", samplers_sequence}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1091,7 +1091,7 @@ static void ggml_vk_print_gpu_info(size_t idx) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_vk_instance_init() {
|
static void ggml_vk_instance_init() {
|
||||||
if (vk_instance_initialized) {
|
if (vk_instance_initialized) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1150,7 +1150,7 @@ void ggml_vk_instance_init() {
|
||||||
vk_instance_initialized = true;
|
vk_instance_initialized = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_vk_init(ggml_backend_vk_context * ctx, size_t idx) {
|
static void ggml_vk_init(ggml_backend_vk_context * ctx, size_t idx) {
|
||||||
GGML_ASSERT(idx < vk_instance.device_indices.size());
|
GGML_ASSERT(idx < vk_instance.device_indices.size());
|
||||||
size_t dev_num = vk_instance.device_indices[idx];
|
size_t dev_num = vk_instance.device_indices[idx];
|
||||||
#ifdef GGML_VULKAN_DEBUG
|
#ifdef GGML_VULKAN_DEBUG
|
||||||
|
@ -4556,13 +4556,13 @@ static void ggml_vk_cleanup(ggml_backend_vk_context * ctx) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_CALL int ggml_vk_get_device_count() {
|
GGML_CALL static int ggml_vk_get_device_count() {
|
||||||
ggml_vk_instance_init();
|
ggml_vk_instance_init();
|
||||||
|
|
||||||
return vk_instance.device_indices.size();
|
return vk_instance.device_indices.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_CALL void ggml_vk_get_device_description(int device, char * description, size_t description_size) {
|
GGML_CALL static void ggml_vk_get_device_description(int device, char * description, size_t description_size) {
|
||||||
ggml_vk_instance_init();
|
ggml_vk_instance_init();
|
||||||
|
|
||||||
std::vector<vk::PhysicalDevice> devices = vk_instance.instance.enumeratePhysicalDevices();
|
std::vector<vk::PhysicalDevice> devices = vk_instance.instance.enumeratePhysicalDevices();
|
||||||
|
@ -4580,7 +4580,7 @@ void ggml_vk_init_cpu_assist() {
|
||||||
|
|
||||||
std::cerr << "ggml_vulkan: Found " << ggml_vk_get_device_count() << " Vulkan devices:" << std::endl;
|
std::cerr << "ggml_vulkan: Found " << ggml_vk_get_device_count() << " Vulkan devices:" << std::endl;
|
||||||
|
|
||||||
for (size_t i = 0; i < ggml_vk_get_device_count(); i++) {
|
for (int i = 0; i < ggml_vk_get_device_count(); i++) {
|
||||||
ggml_vk_print_gpu_info(i);
|
ggml_vk_print_gpu_info(i);
|
||||||
}
|
}
|
||||||
// Initialize the first backend to make sure CPU matrix multiplications can be offloaded.
|
// Initialize the first backend to make sure CPU matrix multiplications can be offloaded.
|
||||||
|
@ -5267,7 +5267,7 @@ GGML_CALL void ggml_backend_vk_get_device_description(int device, char * descrip
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_CALL void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total) {
|
GGML_CALL void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total) {
|
||||||
GGML_ASSERT(device < vk_instance.device_indices.size());
|
GGML_ASSERT(device < (int) vk_instance.device_indices.size());
|
||||||
|
|
||||||
vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[device]];
|
vk::PhysicalDevice vkdev = vk_instance.instance.enumeratePhysicalDevices()[vk_instance.device_indices[device]];
|
||||||
|
|
||||||
|
|
|
@ -10893,7 +10893,7 @@ static int llama_apply_lora_from_file_internal(
|
||||||
{
|
{
|
||||||
LLAMA_LOG_ERROR("%s: invalid tensor data type '%d'\n",
|
LLAMA_LOG_ERROR("%s: invalid tensor data type '%d'\n",
|
||||||
__func__, ftype);
|
__func__, ftype);
|
||||||
return false;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
37
scripts/compare-commits.sh
Executable file
37
scripts/compare-commits.sh
Executable file
|
@ -0,0 +1,37 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
if [ $# -lt 2 ]; then
|
||||||
|
echo "usage: ./scripts/compare-commits.sh <commit1> <commit2> [additional llama-bench arguments]"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
set -e
|
||||||
|
set -x
|
||||||
|
|
||||||
|
bench_args="${@:3}"
|
||||||
|
|
||||||
|
rm -f llama-bench.sqlite
|
||||||
|
|
||||||
|
backend="cpu"
|
||||||
|
|
||||||
|
if [[ "$OSTYPE" == "darwin"* ]]; then
|
||||||
|
backend="metal"
|
||||||
|
elif command -v nvcc &> /dev/null; then
|
||||||
|
backend="cuda"
|
||||||
|
fi
|
||||||
|
|
||||||
|
make_opts=""
|
||||||
|
|
||||||
|
if [[ "$backend" == "cuda" ]]; then
|
||||||
|
make_opts="LLAMA_CUBLAS=1"
|
||||||
|
fi
|
||||||
|
|
||||||
|
git checkout $1
|
||||||
|
make clean && make -j32 $make_opts llama-bench
|
||||||
|
./llama-bench -o sql $bench_args | tee /dev/tty | sqlite3 llama-bench.sqlite
|
||||||
|
|
||||||
|
git checkout $2
|
||||||
|
make clean && make -j32 $make_opts llama-bench
|
||||||
|
./llama-bench -o sql $bench_args | tee /dev/tty | sqlite3 llama-bench.sqlite
|
||||||
|
|
||||||
|
./scripts/compare-llama-bench.py -b $1 -c $2
|
Loading…
Add table
Add a link
Reference in a new issue