Add CXX flags to nvcc
This commit is contained in:
parent
641e9a0c52
commit
c832e7c793
2 changed files with 9 additions and 7 deletions
10
Makefile
10
Makefile
|
@ -101,11 +101,13 @@ ifdef LLAMA_OPENBLAS
|
||||||
LDFLAGS += -lopenblas
|
LDFLAGS += -lopenblas
|
||||||
endif
|
endif
|
||||||
ifdef LLAMA_CUBLAS
|
ifdef LLAMA_CUBLAS
|
||||||
CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include
|
CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include
|
||||||
LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64
|
LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64
|
||||||
OBJS += ggml-cuda.o
|
OBJS += ggml-cuda.o
|
||||||
|
NVCC = nvcc
|
||||||
|
NVCCFLAGS = --forward-unknown-to-host-linker -arch=native
|
||||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
|
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
|
||||||
nvcc -arch=native -c -o $@ $<
|
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -c $< -o $@
|
||||||
endif
|
endif
|
||||||
ifdef LLAMA_GPROF
|
ifdef LLAMA_GPROF
|
||||||
CFLAGS += -pg
|
CFLAGS += -pg
|
||||||
|
|
|
@ -154,11 +154,11 @@ void dequantize_row_q4_3_cuda(const void * vx, float * y, int k, cudaStream_t st
|
||||||
// lock-free, thread safe buffer pool for cuda
|
// lock-free, thread safe buffer pool for cuda
|
||||||
#define MAX_CUDA_BUFFERS 16
|
#define MAX_CUDA_BUFFERS 16
|
||||||
struct cuda_buffer {
|
struct cuda_buffer {
|
||||||
std::atomic_uintptr_t ptr;
|
std::atomic_uintptr_t ptr { 0 };
|
||||||
size_t size;
|
size_t size { 0 };
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct cuda_buffer cuda_buffer_pool[MAX_CUDA_BUFFERS] = {0};
|
static cuda_buffer cuda_buffer_pool[MAX_CUDA_BUFFERS];
|
||||||
|
|
||||||
void * ggml_cuda_pool_malloc(size_t size, size_t * actual_size) {
|
void * ggml_cuda_pool_malloc(size_t size, size_t * actual_size) {
|
||||||
for (int i = 0; i < MAX_CUDA_BUFFERS; ++i) {
|
for (int i = 0; i < MAX_CUDA_BUFFERS; ++i) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue