Add CXX flags to nvcc

2023-04-21 03:39:04 +02:00 · 2023-04-21 03:39:04 +02:00 · c832e7c793
commit c832e7c793
parent 641e9a0c52
2 changed files with 9 additions and 7 deletions
--- a/10
+++ b/10
@ -101,11 +101,13 @@ ifdef LLAMA_OPENBLAS
 	LDFLAGS += -lopenblas
 endif
 ifdef LLAMA_CUBLAS
-	CFLAGS  += -DGGML_USE_CUBLAS -I/usr/local/cuda/include
-	LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64
-	OBJS	+= ggml-cuda.o
+	CFLAGS    += -DGGML_USE_CUBLAS -I/usr/local/cuda/include
+	LDFLAGS   += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64
+	OBJS      += ggml-cuda.o
+	NVCC      = nvcc
+	NVCCFLAGS = --forward-unknown-to-host-linker -arch=native
 ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
-	nvcc -arch=native -c -o $@ $<
+	$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -c $< -o $@
 endif
 ifdef LLAMA_GPROF
 	CFLAGS   += -pg
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@ -154,11 +154,11 @@ void dequantize_row_q4_3_cuda(const void * vx, float * y, int k, cudaStream_t st
 // lock-free, thread safe buffer pool for cuda
 #define MAX_CUDA_BUFFERS 16
 struct cuda_buffer {
-    std::atomic_uintptr_t ptr;
-    size_t size;
+    std::atomic_uintptr_t ptr { 0 };
+    size_t size { 0 };
 };

-static struct cuda_buffer cuda_buffer_pool[MAX_CUDA_BUFFERS] = {0};
+static cuda_buffer cuda_buffer_pool[MAX_CUDA_BUFFERS];

 void * ggml_cuda_pool_malloc(size_t size, size_t * actual_size) {
    for (int i = 0; i < MAX_CUDA_BUFFERS; ++i) {