Add CXX flags to nvcc

This commit is contained in:
Slaren 2023-04-21 03:39:04 +02:00
parent 641e9a0c52
commit c832e7c793
2 changed files with 9 additions and 7 deletions

View file

@ -101,11 +101,13 @@ ifdef LLAMA_OPENBLAS
LDFLAGS += -lopenblas
endif
ifdef LLAMA_CUBLAS
CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include
LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64
OBJS += ggml-cuda.o
CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include
LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64
OBJS += ggml-cuda.o
NVCC = nvcc
NVCCFLAGS = --forward-unknown-to-host-linker -arch=native
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
nvcc -arch=native -c -o $@ $<
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -c $< -o $@
endif
ifdef LLAMA_GPROF
CFLAGS += -pg

View file

@ -154,11 +154,11 @@ void dequantize_row_q4_3_cuda(const void * vx, float * y, int k, cudaStream_t st
// lock-free, thread safe buffer pool for cuda
#define MAX_CUDA_BUFFERS 16
struct cuda_buffer {
std::atomic_uintptr_t ptr;
size_t size;
std::atomic_uintptr_t ptr { 0 };
size_t size { 0 };
};
static struct cuda_buffer cuda_buffer_pool[MAX_CUDA_BUFFERS] = {0};
static cuda_buffer cuda_buffer_pool[MAX_CUDA_BUFFERS];
void * ggml_cuda_pool_malloc(size_t size, size_t * actual_size) {
for (int i = 0; i < MAX_CUDA_BUFFERS; ++i) {