Define GGML_CUDA_DMMV_BLOCK_Y if not defined
This commit is contained in:
parent
5d0cf9928b
commit
e199938a3a
1 changed files with 5 additions and 1 deletions
|
@ -85,7 +85,11 @@ static_assert(sizeof(block_q8_0) == sizeof(ggml_fp16_t) + QK8_0, "wrong q8_0 blo
|
||||||
|
|
||||||
#define CUDA_MUL_BLOCK_SIZE 256
|
#define CUDA_MUL_BLOCK_SIZE 256
|
||||||
#define CUDA_DEQUANTIZE_BLOCK_SIZE 256
|
#define CUDA_DEQUANTIZE_BLOCK_SIZE 256
|
||||||
#define GGML_CUDA_DMMV_BLOCK_X 32 // dmmv = dequantize_mul_mat_vec
|
// dmmv = dequantize_mul_mat_vec
|
||||||
|
#define GGML_CUDA_DMMV_BLOCK_X 32
|
||||||
|
#ifndef GGML_CUDA_DMMV_BLOCK_Y
|
||||||
|
#define GGML_CUDA_DMMV_BLOCK_Y 1 // can by set by compiler option LLAMA_CUDA_BY
|
||||||
|
#endif
|
||||||
|
|
||||||
static __global__ void mul_f32(const float * x, const float * y, float * dst, const int kx, const int ky) {
|
static __global__ void mul_f32(const float * x, const float * y, float * dst, const int kx, const int ky) {
|
||||||
const int i = blockDim.x*blockIdx.x + threadIdx.x;
|
const int i = blockDim.x*blockIdx.x + threadIdx.x;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue