fix compile errors

This commit is contained in:
Concedo 2023-06-29 17:54:12 +08:00
parent dff5575647
commit c2f1ed6556
2 changed files with 5 additions and 4 deletions

View file

@ -74,9 +74,9 @@ if (LLAMA_CUBLAS)
add_compile_definitions(GGML_USE_CUBLAS) add_compile_definitions(GGML_USE_CUBLAS)
if (LLAMA_STATIC) if (LLAMA_STATIC)
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static) set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static)
else() else()
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt) set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart CUDA::cublas)
endif() endif()
else() else()

View file

@ -9,6 +9,7 @@
#include <cuda_fp16.h> #include <cuda_fp16.h>
#include "ggml_v2-cuda-legacy.h" #include "ggml_v2-cuda-legacy.h"
#include "ggml_v2-cuda.h"
#include "ggml_v2.h" #include "ggml_v2.h"
static_assert(sizeof(half) == sizeof(ggml_v2_fp16_t), "wrong fp16 size"); static_assert(sizeof(half) == sizeof(ggml_v2_fp16_t), "wrong fp16 size");
@ -62,7 +63,7 @@ typedef struct {
__half m; // min __half m; // min
uint8_t qs[QK4_3 / 2]; // nibbles / quants uint8_t qs[QK4_3 / 2]; // nibbles / quants
} block_q4_3; } block_q4_3;
static_assert(sizeof(block_q4_3) == 2 * sizeof(ggml_fp16_t) + QK4_3 / 2, "wrong q4_3 block size/padding"); static_assert(sizeof(block_q4_3) == 2 * sizeof(ggml_v2_fp16_t) + QK4_3 / 2, "wrong q4_3 block size/padding");
#define QK5_0 32 #define QK5_0 32
typedef struct { typedef struct {
@ -672,7 +673,7 @@ static void ggml_v2_cuda_mul_mat_q_f32(const ggml_v2_tensor * src0, const ggml_v
ggml_v2_cuda_pool_free(d_Q, q_size); ggml_v2_cuda_pool_free(d_Q, q_size);
} }
bool ggml_v2_cuda_mul_mat_use_f16(const struct ggml_v2_tensor * src0, const struct ggml_v2_tensor * src1, struct ggml_v2_tensor * /* dst */) { static bool ggml_v2_cuda_mul_mat_use_f16(const struct ggml_v2_tensor * src0, const struct ggml_v2_tensor * src1, struct ggml_v2_tensor * /* dst */) {
size_t src0_sz = ggml_v2_nbytes(src0); size_t src0_sz = ggml_v2_nbytes(src0);
size_t src1_sz = ggml_v2_nbytes(src1); size_t src1_sz = ggml_v2_nbytes(src1);