Add __restrict__ to dequantize_mul_mat kernels
This commit is contained in:
parent
2d55023143
commit
b90c80bdbf
1 changed files with 2 additions and 1 deletions
|
@ -1485,7 +1485,8 @@ static __global__ void dequantize_mul_mat_vec(const void * vx, const dfloat * y,
|
||||||
|
|
||||||
template <dequantize_1_kernel_t dequantize_kernel>
|
template <dequantize_1_kernel_t dequantize_kernel>
|
||||||
static __global__ void dequantize_mul_mat(
|
static __global__ void dequantize_mul_mat(
|
||||||
const void * vx, const float * y, float * dst, const int ncols_x, const int nrows_x, const int ncols_y, const int nrows_dst) {
|
const void * __restrict__ vx, const float * __restrict__ y, float * __restrict__ dst,
|
||||||
|
const int ncols_x, const int nrows_x, const int ncols_y, const int nrows_dst) {
|
||||||
|
|
||||||
const int nrows_y = ncols_x;
|
const int nrows_y = ncols_x;
|
||||||
const int ncols_dst = ncols_y;
|
const int ncols_dst = ncols_y;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue