fixup! More GPU threads for CUDA kernels
This commit is contained in:
parent
50148408b5
commit
8d8de07a4e
1 changed files with 6 additions and 6 deletions
12
ggml-cuda.cu
12
ggml-cuda.cu
|
@ -242,7 +242,7 @@ static void dequantize_row_q4_0_cuda(const void * vx, float * y, int k, cudaStre
|
|||
int min_grid_size, block_size = 1; // Initialize to suppress compiler warning.
|
||||
CUDA_CHECK(cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size, dequantize_block_q4_0, 0, 0));
|
||||
int grid_size = (nb + block_size - 1) / block_size; // Round up.
|
||||
dequantize_block_q4_0<<<grid_size, block_size, 0, stream>>>(vx, y, k);
|
||||
dequantize_block_q4_0<<<grid_size, block_size, 0, stream>>>(vx, y, nb);
|
||||
}
|
||||
|
||||
static void dequantize_row_q4_1_cuda(const void * vx, float * y, int k, cudaStream_t stream) {
|
||||
|
@ -250,7 +250,7 @@ static void dequantize_row_q4_1_cuda(const void * vx, float * y, int k, cudaStre
|
|||
int min_grid_size, block_size = 1; // Initialize to suppress compiler warning.
|
||||
CUDA_CHECK(cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size, dequantize_block_q4_1, 0, 0));
|
||||
int grid_size = (nb + block_size - 1) / block_size; // Round up.
|
||||
dequantize_block_q4_1<<<grid_size, block_size, 0, stream>>>(vx, y, k);
|
||||
dequantize_block_q4_1<<<grid_size, block_size, 0, stream>>>(vx, y, nb);
|
||||
}
|
||||
|
||||
static void dequantize_row_q4_2_cuda(const void * vx, float * y, int k, cudaStream_t stream) {
|
||||
|
@ -258,7 +258,7 @@ static void dequantize_row_q4_2_cuda(const void * vx, float * y, int k, cudaStre
|
|||
int min_grid_size, block_size = 1; // Initialize to suppress compiler warning.
|
||||
CUDA_CHECK(cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size, dequantize_block_q4_2, 0, 0));
|
||||
int grid_size = (nb + block_size - 1) / block_size; // Round up.
|
||||
dequantize_block_q4_2<<<grid_size, block_size, 0, stream>>>(vx, y, k);
|
||||
dequantize_block_q4_2<<<grid_size, block_size, 0, stream>>>(vx, y, nb);
|
||||
}
|
||||
|
||||
static void dequantize_row_q5_0_cuda(const void * vx, float * y, int k, cudaStream_t stream) {
|
||||
|
@ -266,7 +266,7 @@ static void dequantize_row_q5_0_cuda(const void * vx, float * y, int k, cudaStre
|
|||
int min_grid_size, block_size = 1; // Initialize to suppress compiler warning.
|
||||
CUDA_CHECK(cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size, dequantize_block_q5_0, 0, 0));
|
||||
int grid_size = (nb + block_size - 1) / block_size; // Round up.
|
||||
dequantize_block_q5_0<<<grid_size, block_size, 0, stream>>>(vx, y, k);
|
||||
dequantize_block_q5_0<<<grid_size, block_size, 0, stream>>>(vx, y, nb);
|
||||
}
|
||||
|
||||
static void dequantize_row_q5_1_cuda(const void * vx, float * y, int k, cudaStream_t stream) {
|
||||
|
@ -274,7 +274,7 @@ static void dequantize_row_q5_1_cuda(const void * vx, float * y, int k, cudaStre
|
|||
int min_grid_size, block_size = 1; // Initialize to suppress compiler warning.
|
||||
CUDA_CHECK(cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size, dequantize_block_q5_1, 0, 0));
|
||||
int grid_size = (nb + block_size - 1) / block_size; // Round up.
|
||||
dequantize_block_q5_1<<<grid_size, block_size, 0, stream>>>(vx, y, k);
|
||||
dequantize_block_q5_1<<<grid_size, block_size, 0, stream>>>(vx, y, nb);
|
||||
}
|
||||
|
||||
static void dequantize_row_q8_0_cuda(const void * vx, float * y, int k, cudaStream_t stream) {
|
||||
|
@ -282,7 +282,7 @@ static void dequantize_row_q8_0_cuda(const void * vx, float * y, int k, cudaStre
|
|||
int min_grid_size, block_size = 1; // Initialize to suppress compiler warning.
|
||||
CUDA_CHECK(cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size, dequantize_block_q8_0, 0, 0));
|
||||
int grid_size = (nb + block_size - 1) / block_size; // Round up.
|
||||
dequantize_block_q8_0<<<grid_size, block_size, 0, stream>>>(vx, y, k);
|
||||
dequantize_block_q8_0<<<grid_size, block_size, 0, stream>>>(vx, y, nb);
|
||||
}
|
||||
|
||||
// TODO: optimize
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue