rm get_work_group_size() by local cache for performance (#8286)
Co-authored-by: arthw <14088817+arthw@users.noreply.github.com>
This commit is contained in:
parent
a38b884c6c
commit
f09b7cb609
3 changed files with 17 additions and 26 deletions
|
@ -47,10 +47,6 @@ static int g_ggml_sycl_debug = 0;
|
|||
} \
|
||||
}()
|
||||
|
||||
// #define DEBUG_SYCL_MALLOC
|
||||
|
||||
static int g_work_group_size = 0;
|
||||
// typedef sycl::half ggml_fp16_t;
|
||||
|
||||
#define __SYCL_ARCH__ DPCT_COMPATIBILITY_TEMP
|
||||
#define VER_4VEC 610 // todo for hardward optimize.
|
||||
|
@ -193,6 +189,8 @@ struct ggml_sycl_device_info {
|
|||
sycl_device_info devices[GGML_SYCL_MAX_DEVICES] = {};
|
||||
|
||||
std::array<float, GGML_SYCL_MAX_DEVICES> default_tensor_split = {};
|
||||
|
||||
int max_work_group_sizes[GGML_SYCL_MAX_DEVICES] = {0};
|
||||
};
|
||||
|
||||
const ggml_sycl_device_info & ggml_sycl_info();
|
||||
|
@ -295,15 +293,6 @@ struct ggml_backend_sycl_context {
|
|||
}
|
||||
};
|
||||
|
||||
// common host functions
|
||||
|
||||
static inline int get_work_group_size(const sycl::device& device) {
|
||||
dpct::device_info prop;
|
||||
dpct::get_device_info(prop, device);
|
||||
return prop.get_max_work_group_size();
|
||||
}
|
||||
|
||||
|
||||
// common device functions
|
||||
|
||||
static __dpct_inline__ float warp_reduce_sum(float x,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue