diff --git a/Makefile b/Makefile
index f267d0864..3b48eec99 100644
--- a/Makefile
+++ b/Makefile
@@ -101,11 +101,13 @@ ifdef LLAMA_OPENBLAS
 	LDFLAGS += -lopenblas
 endif
 ifdef LLAMA_CUBLAS
-	CFLAGS  += -DGGML_USE_CUBLAS -I/usr/local/cuda/include
-	LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64
-	OBJS	+= ggml-cuda.o
+	CFLAGS    += -DGGML_USE_CUBLAS -I/usr/local/cuda/include
+	LDFLAGS   += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64
+	OBJS      += ggml-cuda.o
+	NVCC      = nvcc
+	NVCCFLAGS = --forward-unknown-to-host-linker -arch=native
 ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
-	nvcc -arch=native -c -o $@ $<
+	$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -c $< -o $@
 endif
 ifdef LLAMA_GPROF
 	CFLAGS   += -pg
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
index 2d2e5a90e..dc8f486f2 100644
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -154,11 +154,11 @@ void dequantize_row_q4_3_cuda(const void * vx, float * y, int k, cudaStream_t st
 // lock-free, thread safe buffer pool for cuda
 #define MAX_CUDA_BUFFERS 16
 struct cuda_buffer {
-    std::atomic_uintptr_t ptr;
-    size_t size;
+    std::atomic_uintptr_t ptr { 0 };
+    size_t size { 0 };
 };
 
-static struct cuda_buffer cuda_buffer_pool[MAX_CUDA_BUFFERS] = {0};
+static cuda_buffer cuda_buffer_pool[MAX_CUDA_BUFFERS];
 
 void * ggml_cuda_pool_malloc(size_t size, size_t * actual_size) {
     for (int i = 0; i < MAX_CUDA_BUFFERS; ++i) {