diff --git a/Makefile b/Makefile index 5b3baf695..4b98452f6 100644 --- a/Makefile +++ b/Makefile @@ -366,10 +366,14 @@ ifdef LLAMA_BLIS endif # LLAMA_BLIS ifdef LLAMA_CUBLAS - MK_CPPFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -L$(CUDA_PATH)/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include + MK_CPPFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include MK_LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib OBJS += ggml-cuda.o +ifdef JETSON_EOL_MODULE_DETECT MK_NVCCFLAGS = -use_fast_math +else + MK_NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math +endif # JETSON_EOL_MODULE_DETECT ifdef LLAMA_DEBUG MK_NVCCFLAGS += -lineinfo diff --git a/ggml-cuda.cu b/ggml-cuda.cu index b4396de71..d0eed3d2d 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -8403,7 +8403,7 @@ static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const half * dst_f16 = nullptr; char * dst_t = nullptr; - cudaDataType_t cu_compute_type = CUBLAS_COMPUTE_16F; + cublasComputeType_t cu_compute_type = CUBLAS_COMPUTE_16F; cudaDataType_t cu_data_type = CUDA_R_16F; // dst strides