Add CXX flags to nvcc

This commit is contained in:
Slaren 2023-04-21 03:39:04 +02:00
parent 641e9a0c52
commit c832e7c793
2 changed files with 9 additions and 7 deletions

View file

@ -101,11 +101,13 @@ ifdef LLAMA_OPENBLAS
LDFLAGS += -lopenblas LDFLAGS += -lopenblas
endif endif
ifdef LLAMA_CUBLAS ifdef LLAMA_CUBLAS
CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include
LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64
OBJS += ggml-cuda.o OBJS += ggml-cuda.o
NVCC = nvcc
NVCCFLAGS = --forward-unknown-to-host-linker -arch=native
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
nvcc -arch=native -c -o $@ $< $(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -c $< -o $@
endif endif
ifdef LLAMA_GPROF ifdef LLAMA_GPROF
CFLAGS += -pg CFLAGS += -pg

View file

@ -154,11 +154,11 @@ void dequantize_row_q4_3_cuda(const void * vx, float * y, int k, cudaStream_t st
// lock-free, thread safe buffer pool for cuda // lock-free, thread safe buffer pool for cuda
#define MAX_CUDA_BUFFERS 16 #define MAX_CUDA_BUFFERS 16
struct cuda_buffer { struct cuda_buffer {
std::atomic_uintptr_t ptr; std::atomic_uintptr_t ptr { 0 };
size_t size; size_t size { 0 };
}; };
static struct cuda_buffer cuda_buffer_pool[MAX_CUDA_BUFFERS] = {0}; static cuda_buffer cuda_buffer_pool[MAX_CUDA_BUFFERS];
void * ggml_cuda_pool_malloc(size_t size, size_t * actual_size) { void * ggml_cuda_pool_malloc(size_t size, size_t * actual_size) {
for (int i = 0; i < MAX_CUDA_BUFFERS; ++i) { for (int i = 0; i < MAX_CUDA_BUFFERS; ++i) {