diff --git a/Makefile b/Makefile index f267d0864..3b48eec99 100644 --- a/Makefile +++ b/Makefile @@ -101,11 +101,13 @@ ifdef LLAMA_OPENBLAS LDFLAGS += -lopenblas endif ifdef LLAMA_CUBLAS - CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include - LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 - OBJS += ggml-cuda.o + CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include + LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 + OBJS += ggml-cuda.o + NVCC = nvcc + NVCCFLAGS = --forward-unknown-to-host-linker -arch=native ggml-cuda.o: ggml-cuda.cu ggml-cuda.h - nvcc -arch=native -c -o $@ $< + $(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -c $< -o $@ endif ifdef LLAMA_GPROF CFLAGS += -pg diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 2d2e5a90e..dc8f486f2 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -154,11 +154,11 @@ void dequantize_row_q4_3_cuda(const void * vx, float * y, int k, cudaStream_t st // lock-free, thread safe buffer pool for cuda #define MAX_CUDA_BUFFERS 16 struct cuda_buffer { - std::atomic_uintptr_t ptr; - size_t size; + std::atomic_uintptr_t ptr { 0 }; + size_t size { 0 }; }; -static struct cuda_buffer cuda_buffer_pool[MAX_CUDA_BUFFERS] = {0}; +static cuda_buffer cuda_buffer_pool[MAX_CUDA_BUFFERS]; void * ggml_cuda_pool_malloc(size_t size, size_t * actual_size) { for (int i = 0; i < MAX_CUDA_BUFFERS; ++i) {