Add environmental variable GGML_KLEIDIAI_SME
This commit is contained in:
parent
6adca19c94
commit
119d3bf986
14 changed files with 38 additions and 48 deletions
|
@ -189,7 +189,7 @@ extern "C" {
|
|||
// Set the number of threads for the backend
|
||||
typedef void (*ggml_backend_set_n_threads_t)(ggml_backend_t backend, int n_threads);
|
||||
// Get additional buffer types provided by the device (returns a NULL-terminated array)
|
||||
typedef ggml_backend_buffer_type_t * (*ggml_backend_dev_get_extra_bufts_t)(ggml_backend_dev_t device, int n_threads);
|
||||
typedef ggml_backend_buffer_type_t * (*ggml_backend_dev_get_extra_bufts_t)(ggml_backend_dev_t device);
|
||||
// Set the abort callback for the backend
|
||||
typedef void (*ggml_backend_set_abort_callback_t)(ggml_backend_t backend, ggml_abort_callback abort_callback, void * abort_callback_data);
|
||||
// Get a list of feature flags supported by the backend (returns a NULL-terminated array)
|
||||
|
|
|
@ -325,9 +325,9 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|||
|
||||
# Fetch KleidiAI sources:
|
||||
include(FetchContent)
|
||||
set(KLEIDIAI_COMMIT_SHA "v1.2.0")
|
||||
set(KLEIDIAI_DOWNLOAD_URL "https://gitlab.arm.com/kleidi/kleidiai/-/archive/${KLEIDIAI_COMMIT_SHA}/kleidiai-${KLEIDIAI_COMMIT_SHA}.tar.gz")
|
||||
set(KLEIDIAI_ARCHIVE_MD5 "cebcb660079bf15626e7bdaecd18f49c")
|
||||
set(KLEIDIAI_COMMIT_TAG "v1.2.0")
|
||||
set(KLEIDIAI_DOWNLOAD_URL "https://github.com/ARM-software/kleidiai/archive/refs/tags/${KLEIDIAI_COMMIT_TAG}.tar.gz")
|
||||
set(KLEIDIAI_ARCHIVE_MD5 "6634fefce7357ecfee9eace2068bc68b")
|
||||
|
||||
if (POLICY CMP0135)
|
||||
cmake_policy(SET CMP0135 NEW)
|
||||
|
|
|
@ -10,7 +10,7 @@ extra_buffer_type::~extra_buffer_type() {}
|
|||
} // namespace ggml::cpu
|
||||
|
||||
bool ggml_cpu_extra_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op) {
|
||||
for (auto extra : ggml_backend_cpu_get_extra_buffers_type(params->nth)) {
|
||||
for (auto extra : ggml_backend_cpu_get_extra_buffers_type()) {
|
||||
if (extra && extra->context) {
|
||||
auto buf_extra = (ggml::cpu::extra_buffer_type *) extra->context;
|
||||
auto tensor_traits = buf_extra->get_tensor_traits(op);
|
||||
|
@ -23,7 +23,7 @@ bool ggml_cpu_extra_compute_forward(struct ggml_compute_params * params, struct
|
|||
}
|
||||
|
||||
bool ggml_cpu_extra_work_size(int n_threads, const struct ggml_tensor * op, size_t * size) {
|
||||
for (auto extra : ggml_backend_cpu_get_extra_buffers_type(n_threads)) {
|
||||
for (auto extra : ggml_backend_cpu_get_extra_buffers_type()) {
|
||||
if (extra && extra->context) {
|
||||
auto buf_extra = (ggml::cpu::extra_buffer_type *) extra->context;
|
||||
auto tensor_traits = buf_extra->get_tensor_traits(op);
|
||||
|
|
|
@ -33,6 +33,6 @@ class extra_buffer_type {
|
|||
} // namespace ggml::cpu
|
||||
|
||||
// implemented in ggml-cpu.cpp.
|
||||
std::vector<ggml_backend_buffer_type_t> & ggml_backend_cpu_get_extra_buffers_type(int n_threads);
|
||||
std::vector<ggml_backend_buffer_type_t> & ggml_backend_cpu_get_extra_buffers_type();
|
||||
|
||||
#endif
|
||||
|
|
|
@ -33,8 +33,8 @@
|
|||
|
||||
// ggml-backend interface
|
||||
|
||||
std::vector<ggml_backend_buffer_type_t>& ggml_backend_cpu_get_extra_buffers_type(int n_threads) {
|
||||
static std::vector<ggml_backend_buffer_type_t> bufts = [n_threads]() {
|
||||
std::vector<ggml_backend_buffer_type_t>& ggml_backend_cpu_get_extra_buffers_type() {
|
||||
static std::vector<ggml_backend_buffer_type_t> bufts = []() {
|
||||
std::vector<ggml_backend_buffer_type_t> bufts;
|
||||
|
||||
#if defined(__AMX_INT8__) && defined(__AVX512VNNI__)
|
||||
|
@ -44,8 +44,8 @@ std::vector<ggml_backend_buffer_type_t>& ggml_backend_cpu_get_extra_buffers_type
|
|||
#endif
|
||||
|
||||
#ifdef GGML_USE_CPU_KLEIDIAI
|
||||
if (ggml_backend_cpu_kleidiai_buffer_type(n_threads)) {
|
||||
bufts.push_back(ggml_backend_cpu_kleidiai_buffer_type(n_threads));
|
||||
if (ggml_backend_cpu_kleidiai_buffer_type()) {
|
||||
bufts.push_back(ggml_backend_cpu_kleidiai_buffer_type());
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -58,21 +58,19 @@ std::vector<ggml_backend_buffer_type_t>& ggml_backend_cpu_get_extra_buffers_type
|
|||
bufts.push_back(NULL);
|
||||
|
||||
return bufts;
|
||||
|
||||
GGML_UNUSED(n_threads);
|
||||
}();
|
||||
|
||||
return bufts;
|
||||
}
|
||||
|
||||
static ggml_backend_buffer_type_t * ggml_backend_cpu_device_get_extra_buffers_type(ggml_backend_dev_t device, int n_threads) {
|
||||
return ggml_backend_cpu_get_extra_buffers_type(n_threads).data();
|
||||
static ggml_backend_buffer_type_t * ggml_backend_cpu_device_get_extra_buffers_type(ggml_backend_dev_t device) {
|
||||
return ggml_backend_cpu_get_extra_buffers_type().data();
|
||||
|
||||
GGML_UNUSED(device);
|
||||
}
|
||||
|
||||
static bool ggml_backend_cpu_is_extra_buffer_type(ggml_backend_buffer_type_t buft) {
|
||||
for (auto extra : ggml_backend_cpu_get_extra_buffers_type(-1)) {
|
||||
for (auto extra : ggml_backend_cpu_get_extra_buffers_type()) {
|
||||
if (extra && extra == buft) return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -387,7 +385,7 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
|
|||
}
|
||||
|
||||
// extra_buffer_op?
|
||||
for (auto extra : ggml_backend_cpu_get_extra_buffers_type(-1)) {
|
||||
for (auto extra : ggml_backend_cpu_get_extra_buffers_type()) {
|
||||
if (extra) {
|
||||
auto buf_extra = (ggml::cpu::extra_buffer_type*) extra->context;
|
||||
if (buf_extra && buf_extra->supports_op(dev, op)) {
|
||||
|
@ -577,7 +575,7 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
|
|||
features.push_back({ "OPENMP", "1" });
|
||||
#endif
|
||||
#ifdef GGML_USE_CPU_KLEIDIAI
|
||||
features.push_back({ "KLEIDIAI_REPACK", "1" });
|
||||
features.push_back({ "KLEIDIAI", "1" });
|
||||
#endif
|
||||
#ifdef GGML_USE_CPU_AARCH64
|
||||
features.push_back({ "AARCH64_REPACK", "1" });
|
||||
|
|
|
@ -34,25 +34,25 @@ struct ggml_kleidiai_context {
|
|||
ggml_kleidiai_kernels * kernels;
|
||||
} static ctx = { NULL };
|
||||
|
||||
static void init_kleidiai_context(int n_threads) {
|
||||
static void init_kleidiai_context(void) {
|
||||
static bool initialized = false;
|
||||
|
||||
if (!initialized) {
|
||||
GGML_ASSERT(n_threads > 0);
|
||||
|
||||
initialized = true;
|
||||
const char *env_var = getenv("GGML_KLEIDIAI_SME");
|
||||
int sme_enabled = 0;
|
||||
|
||||
cpu_feature features = (ggml_cpu_has_dotprod() ? CPU_FEATURE_DOTPROD : CPU_FEATURE_NONE) |
|
||||
(ggml_cpu_has_matmul_int8() ? CPU_FEATURE_I8MM : CPU_FEATURE_NONE) |
|
||||
(ggml_cpu_has_sve() ? CPU_FEATURE_SVE : CPU_FEATURE_NONE);
|
||||
|
||||
#if defined(__APPLE__)
|
||||
if (n_threads == 1) {
|
||||
if (env_var) {
|
||||
sme_enabled = atoi(env_var);
|
||||
}
|
||||
|
||||
if (sme_enabled != 0) {
|
||||
features |= ggml_cpu_has_sme() ? CPU_FEATURE_SME : CPU_FEATURE_NONE;
|
||||
}
|
||||
#else
|
||||
features |= ggml_cpu_has_sme() ? CPU_FEATURE_SME : CPU_FEATURE_NONE;
|
||||
#endif
|
||||
ctx.kernels = ggml_kleidiai_select_kernels(features);
|
||||
}
|
||||
}
|
||||
|
@ -162,6 +162,8 @@ public:
|
|||
ctx.kernels->rhs_info.pack_func(1, n, k, nr, kr, sr, k_q4_0_block_size, (const uint8_t *)data, NULL, tensor->data, 0, ¶ms);
|
||||
|
||||
return 0;
|
||||
|
||||
GGML_UNUSED(data_size);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -223,7 +225,7 @@ class extra_buffer_type : ggml::cpu::extra_buffer_type {
|
|||
op->src[0]->type == GGML_TYPE_Q4_0 &&
|
||||
op->src[0]->buffer &&
|
||||
(ggml_n_dims(op->src[0]) == 2) &&
|
||||
op->src[0]->buffer->buft == ggml_backend_cpu_kleidiai_buffer_type(-1) && ctx.kernels
|
||||
op->src[0]->buffer->buft == ggml_backend_cpu_kleidiai_buffer_type() && ctx.kernels
|
||||
) {
|
||||
if (op->src[1]->buffer && !ggml_backend_buft_is_host(op->src[1]->buffer->buft)) {
|
||||
return false;
|
||||
|
@ -237,7 +239,7 @@ class extra_buffer_type : ggml::cpu::extra_buffer_type {
|
|||
|
||||
ggml::cpu::tensor_traits * get_tensor_traits(const struct ggml_tensor * op) override {
|
||||
if (op->op == GGML_OP_MUL_MAT) {
|
||||
if (op->src[0]->buffer && op->src[0]->buffer->buft == ggml_backend_cpu_kleidiai_buffer_type(-1)) {
|
||||
if (op->src[0]->buffer && op->src[0]->buffer->buft == ggml_backend_cpu_kleidiai_buffer_type()) {
|
||||
return (ggml::cpu::tensor_traits *) op->src[0]->extra;
|
||||
}
|
||||
}
|
||||
|
@ -246,7 +248,7 @@ class extra_buffer_type : ggml::cpu::extra_buffer_type {
|
|||
};
|
||||
} // namespace ggml::cpu::kleidiai
|
||||
|
||||
ggml_backend_buffer_type_t ggml_backend_cpu_kleidiai_buffer_type(int n_threads) {
|
||||
ggml_backend_buffer_type_t ggml_backend_cpu_kleidiai_buffer_type(void) {
|
||||
static ggml::cpu::kleidiai::extra_buffer_type ctx;
|
||||
static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type_kleidiai = {
|
||||
/* .iface = */ {
|
||||
|
@ -261,7 +263,7 @@ ggml_backend_buffer_type_t ggml_backend_cpu_kleidiai_buffer_type(int n_threads)
|
|||
/* .context = */ &ctx,
|
||||
};
|
||||
|
||||
init_kleidiai_context(n_threads);
|
||||
init_kleidiai_context();
|
||||
|
||||
return &ggml_backend_cpu_buffer_type_kleidiai;
|
||||
}
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
ggml_backend_buffer_type_t ggml_backend_cpu_kleidiai_buffer_type(int n_threads);
|
||||
ggml_backend_buffer_type_t ggml_backend_cpu_kleidiai_buffer_type(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue