fixed cmake, 8bit MMV should be working now
This commit is contained in:
parent
7516488550
commit
8f4ed0d18c
3 changed files with 37 additions and 21 deletions
|
@ -28,6 +28,8 @@ set(LLAMA_SANITIZE_THREAD OFF)
|
|||
set(LLAMA_SANITIZE_ADDRESS OFF)
|
||||
set(LLAMA_SANITIZE_UNDEFINED OFF)
|
||||
|
||||
option(MAKE_MISC_FILES "MAKE_MISC_FILES" OFF)
|
||||
|
||||
# instruction set specific
|
||||
option(LLAMA_AVX "llama: enable AVX" ON)
|
||||
option(LLAMA_AVX2 "llama: enable AVX2" ON)
|
||||
|
@ -72,12 +74,12 @@ if (LLAMA_CUBLAS)
|
|||
|
||||
enable_language(CUDA)
|
||||
|
||||
set(GGML_CUDA_SOURCES ggml-cuda.cu ggml-cuda.h)
|
||||
set(GGML_SOURCES_CUDA ggml-cuda.cu ggml-cuda.h)
|
||||
set(GGML_V2_CUDA_SOURCES otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h)
|
||||
set(GGML_V2_LEGACY_CUDA_SOURCES otherarch/ggml_v2-cuda-legacy.cu otherarch/ggml_v2-cuda-legacy.h)
|
||||
|
||||
add_compile_definitions(GGML_USE_CUBLAS)
|
||||
add_compile_definitions(GGML_CUDA_FORCE_DMMV) #non dmmv broken for me
|
||||
#add_compile_definitions(GGML_CUDA_FORCE_DMMV) #non dmmv broken for me
|
||||
|
||||
add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
|
||||
add_compile_definitions(GGML_CUDA_DMMV_Y=${LLAMA_CUDA_DMMV_Y})
|
||||
|
@ -259,7 +261,7 @@ add_library(ggml OBJECT
|
|||
ggml.h
|
||||
k_quants.h
|
||||
k_quants.c
|
||||
${GGML_CUDA_SOURCES})
|
||||
${GGML_SOURCES_CUDA})
|
||||
target_include_directories(ggml PUBLIC . ./otherarch ./otherarch/tools)
|
||||
target_compile_features(ggml PUBLIC c_std_11) # don't bump
|
||||
target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
|
||||
|
@ -299,12 +301,6 @@ target_link_libraries(gpttype_adapter PRIVATE common2 ggml ${LLAMA_EXTRA_LIBS})
|
|||
set_target_properties(gpttype_adapter PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
|
||||
if (GGML_CUDA_SOURCES)
|
||||
message(STATUS "GGML CUDA sources found, configuring CUDA architecture")
|
||||
set_property(TARGET ggml PROPERTY CUDA_ARCHITECTURES OFF)
|
||||
set_property(TARGET ggml PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto")
|
||||
endif()
|
||||
|
||||
set(TARGET koboldcpp_cublas)
|
||||
add_library(${TARGET} SHARED expose.cpp expose.h)
|
||||
target_include_directories(${TARGET} PUBLIC . ./otherarch ./otherarch/tools ./examples)
|
||||
|
@ -314,3 +310,19 @@ set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "koboldcpp_cublas")
|
|||
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
target_link_libraries(${TARGET} PUBLIC ggml ggml_v1 ggml_v2 common2 gpttype_adapter ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
|
||||
|
||||
if (MAKE_MISC_FILES)
|
||||
add_library(llama
|
||||
llama.cpp
|
||||
llama.h
|
||||
llama-util.h
|
||||
)
|
||||
target_include_directories(llama PUBLIC .)
|
||||
target_compile_features(llama PUBLIC cxx_std_11) # don't bump
|
||||
target_link_libraries(llama PRIVATE
|
||||
ggml
|
||||
${LLAMA_EXTRA_LIBS}
|
||||
)
|
||||
add_subdirectory(examples)
|
||||
endif()
|
18
Makefile
18
Makefile
|
@ -144,19 +144,27 @@ ifdef LLAMA_CUBLAS
|
|||
CUBLASLD_FLAGS = -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib
|
||||
CUBLAS_OBJS = ggml-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o
|
||||
NVCC = nvcc
|
||||
NVCCFLAGS = --forward-unknown-to-host-compiler -arch=native -DGGML_CUDA_FORCE_DMMV
|
||||
NVCCFLAGS = --forward-unknown-to-host-compiler
|
||||
ifdef CUDA_DOCKER_ARCH
|
||||
NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
|
||||
else
|
||||
NVCCFLAGS += -arch=native
|
||||
endif # CUDA_DOCKER_ARCH
|
||||
ifdef LLAMA_CUDA_FORCE_DMMV
|
||||
NVCCFLAGS += -DGGML_CUDA_FORCE_DMMV
|
||||
endif # LLAMA_CUDA_FORCE_DMMV
|
||||
ifdef LLAMA_CUDA_DMMV_X
|
||||
NVCCFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
|
||||
else
|
||||
NVCCFLAGS += -DGGML_CUDA_DMMV_X=32
|
||||
endif # LLAMA_CUDA_DMMV_X
|
||||
ifdef LLAMA_CUDA_DMMV_Y
|
||||
ifdef LLAMA_CUDA_MMV_Y
|
||||
NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y)
|
||||
NVCCFLAGS += -DGGML_CUDA_DMMV_Y=$(LLAMA_CUDA_DMMV_Y)
|
||||
else ifdef LLAMA_CUDA_DMMV_Y
|
||||
NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_DMMV_Y) # for backwards compatibility
|
||||
else
|
||||
NVCCFLAGS += -DGGML_CUDA_DMMV_Y=1
|
||||
NVCCFLAGS += -DGGML_CUDA_MMV_Y=1
|
||||
endif # LLAMA_CUDA_DMMV_Y
|
||||
endif # LLAMA_CUDA_MMV_Y
|
||||
ifdef LLAMA_CUDA_DMMV_F16
|
||||
NVCCFLAGS += -DGGML_CUDA_DMMV_F16
|
||||
endif # LLAMA_CUDA_DMMV_F16
|
||||
|
|
|
@ -2547,11 +2547,7 @@ inline void ggml_cuda_op_rope(
|
|||
const float theta_scale = get_theta_scale(n_dims,n_past,n_ctx);
|
||||
const float p0 = ((mode & 1) == 0 ? n_past + i02 : i02);
|
||||
|
||||
float p = p0;
|
||||
if(!get_ntk_rope_scale_mode())
|
||||
{
|
||||
p = n_ctx <= GGML_TRAINING_CTX ? p0 : p0 * GGML_TRAINING_CTX / n_ctx;
|
||||
}
|
||||
const float p = get_ntk_rope_scale_mode()?p0:(n_ctx <= GGML_TRAINING_CTX ? p0 : p0 * GGML_TRAINING_CTX / n_ctx);
|
||||
|
||||
// compute
|
||||
rope_f32_cuda(src0_ddf_i, dst_ddf_i, ne00, i01_diff, p, theta_scale, cudaStream_main);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue