update lint

This commit is contained in:
Zhiyuan Li 2024-11-05 13:31:41 +11:00
parent e264c35fc9
commit 623db3b06f
8 changed files with 34 additions and 36 deletions

View file

@ -11655,15 +11655,15 @@ static void ggml_compute_forward_rwkv_wkv6_f32(
float * dst_data = (float *) dst->data;
float * state = ((float *) dst->data) + C * T;
const int ith = params->ith;
const int nth = params->nth;
const int ith = params->ith;
const int nth = params->nth;
if (ith >= HEADS) {
return;
}
const int h_start = (HEADS * ith) / nth;
const int h_end = ((HEADS * (ith + 1)) / nth < HEADS) ?
const int h_end = ((HEADS * (ith + 1)) / nth < HEADS) ?
(HEADS * (ith + 1)) / nth : HEADS;
float * k = (float *) dst->src[0]->data;
@ -11683,7 +11683,7 @@ static void ggml_compute_forward_rwkv_wkv6_f32(
}
ggml_barrier(params->threadpool);
#if defined(__AVX__) && !defined(__AVX512F__)
#define GGML_F32X GGML_F32x8
#define GGML_F32X_SET1 GGML_F32x8_SET1
@ -11820,7 +11820,6 @@ static void ggml_compute_forward_rwkv_wkv6_f32(
}
}
}
}
#endif
}

View file

@ -2263,9 +2263,9 @@ static void argsort_f32_i32_sycl(const float *x, int *dst, const int ncols,
static void argmax_f32_i32_sycl(const float *x, int *dst, const int ncols,
const int nrows, queue_ptr stream) {
const sycl::range<3> block_dims(1, 1, SYCL_ARGMAX_BLOCK_SIZE);
const sycl::range<3> block_dims(1, 1, SYCL_ARGMAX_BLOCK_SIZE);
const sycl::range<3> block_nums(1, nrows, 1);
const size_t shared_mem = 256 * sizeof(float);
const size_t shared_mem = 256 * sizeof(float);
stream->submit([&](sycl::handler &cgh) {
sycl::local_accessor<float, 1> shared_data(
@ -2276,12 +2276,12 @@ static void argmax_f32_i32_sycl(const float *x, int *dst, const int ncols,
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
[=](sycl::nd_item<3> item_ct1) {
const int tid = item_ct1.get_local_id(2);
const int row = item_ct1.get_global_id(1);
const int tid = item_ct1.get_local_id(2);
const int row = item_ct1.get_global_id(1);
float max_val = -INFINITY;
int max_idx = -1;
for (int col = tid; col < ncols; col += 256) {
float val = x[row * ncols + col];
if (val > max_val) {
@ -2289,11 +2289,11 @@ static void argmax_f32_i32_sycl(const float *x, int *dst, const int ncols,
max_idx = col;
}
}
shared_data[tid] = max_val;
shared_indices[tid] = max_idx;
item_ct1.barrier(sycl::access::fence_space::local_space);
for (int stride = 256/2; stride > 0; stride >>= 1) {
if (tid < stride) {
float val1 = shared_data[tid];
@ -2305,7 +2305,7 @@ static void argmax_f32_i32_sycl(const float *x, int *dst, const int ncols,
}
item_ct1.barrier(sycl::access::fence_space::local_space);
}
if (tid == 0) {
dst[row] = shared_indices[0];
@ -2632,9 +2632,9 @@ inline void ggml_sycl_op_sum(ggml_backend_sycl_context & ctx, const ggml_tensor
const queue_ptr &main_stream) {
GGML_ASSERT(src0->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32);
const int64_t ne = ggml_nelements(src0);
sum_rows_f32_sycl(src0_dd, dst_dd, ne, 1, main_stream);
(void) src1;

View file

@ -101,4 +101,4 @@ catch (sycl::exception const &exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
<< ", line:" << __LINE__ << std::endl;
std::exit(1);
}
}

View file

@ -167,7 +167,7 @@ void log_f32(const float * x, float * dst, const int k,
}
float xi = x[i];
if (xi <= 0) {
dst[i] = -INFINITY;
dst[i] = -INFINITY;
} else {
dst[i] = sycl::log(xi);
}

View file

@ -33,7 +33,7 @@ void ggml_sycl_cos(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, co
void ggml_sycl_acc(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
void ggml_sycl_silu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
@ -49,7 +49,6 @@ void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor *
void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
void ggml_sycl_exp(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
void ggml_sycl_log(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
@ -74,4 +73,4 @@ void ggml_sycl_mul(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, co
void ggml_sycl_div(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
#endif // GGML_SYCL_ELEMENTWISE_HPP
#endif // GGML_SYCL_ELEMENTWISE_HPP

View file

@ -4,8 +4,8 @@
void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, const ggml_tensor* src0,
const ggml_tensor* src1, ggml_tensor* dst) {
GGML_ASSERT(src0->type == GGML_TYPE_F32);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_F32);
@ -33,7 +33,7 @@ void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, const ggml_tensor* sr
// Handle transposition of src1
const bool src1_T = ggml_is_transposed(src1);
const oneapi::mkl::transpose src1_op =
const oneapi::mkl::transpose src1_op =
src1_T ? oneapi::mkl::transpose::nontrans : oneapi::mkl::transpose::trans;
const int64_t ldb = (src1_T ? nb10 : nb11) / sizeof(float);
@ -52,4 +52,4 @@ void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, const ggml_tensor* sr
std::cerr << exc.what() << std::endl;
GGML_ASSERT(false);
}
}
}

View file

@ -12,7 +12,7 @@ static void rwkv_wkv_f32_kernel(
const int tid = item_ct1.get_local_id(2);
const int bid = item_ct1.get_group(2);
const int head_size = WKV_BLOCK_SIZE;
const int batch_i = bid / H;
const int head_i = bid % H;
@ -36,7 +36,7 @@ static void rwkv_wkv_f32_kernel(
// Sync threads before shared memory operations
item_ct1.barrier(sycl::access::fence_space::local_space);
// Load time-mixing parameters
_tf[tid] = tf[head_i * head_size + tid];
item_ct1.barrier(sycl::access::fence_space::local_space);
@ -45,14 +45,14 @@ static void rwkv_wkv_f32_kernel(
for (int t = batch_i * n_seq_tokens * C + head_i * head_size + tid;
t < (batch_i + 1) * n_seq_tokens * C + head_i * head_size + tid;
t += C) {
item_ct1.barrier(sycl::access::fence_space::local_space);
// Load current timestep data to shared memory
_k[tid] = k[t];
_r[tid] = r[t];
_td[tid] = td[t];
item_ct1.barrier(sycl::access::fence_space::local_space);
const float _v = v[t];
@ -71,13 +71,13 @@ static void rwkv_wkv_f32_kernel(
// Compute key-value product
sycl::float4 kv4 = k4 * _v;
// Accumulate weighted sum
y += sycl::dot(r4, tf4 * kv4 + s4);
// Update state
s4 = s4 * td4 + kv4;
// Store updated state
state[j] = s4.x();
state[j+1] = s4.y();
@ -97,7 +97,7 @@ static void rwkv_wkv_f32_kernel(
void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, const ggml_tensor* src0,
const ggml_tensor* src1, ggml_tensor* dst) {
const float* k_d = (const float*)dst->src[0]->data;
const float* v_d = (const float*)dst->src[1]->data;
const float* r_d = (const float*)dst->src[2]->data;
@ -135,4 +135,4 @@ void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, const ggml_tensor* s
);
});
});
}
}

View file

@ -7,4 +7,4 @@ void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context & ctx, const ggml_tensor *
const ggml_tensor *src1, ggml_tensor * dst);
#endif // GGML_SYCL_WKV6_HPP
#endif // GGML_SYCL_WKV6_HPP