update backends

ggml-ci
2024-03-17 14:55:35 +01:00 · 2024-03-17 14:55:35 +01:00 · cc9299ce19
commit cc9299ce19
parent 0661e6a1ae
6 changed files with 6 additions and 6 deletions
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@ -7791,10 +7791,6 @@ struct cuda_pool_alloc {

 static bool g_cublas_loaded = false;

-static bool ggml_cublas_loaded(void) {
-    return g_cublas_loaded;
-}
-
 static void ggml_init_cublas() {
    static bool initialized = false;

@ -11381,7 +11377,7 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons
 GGML_CALL static bool ggml_backend_cuda_offload_op(ggml_backend_t backend, const ggml_tensor * op) {
    const int min_batch_size = 32;

-    return op->ne[1] > min_batch_size && op->op != GGML_OP_GET_ROWS;
+    return op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS;

    UNUSED(backend);
 }
--- a/ggml-kompute.cpp
+++ b/ggml-kompute.cpp
@ -1951,6 +1951,7 @@ static struct ggml_backend_i kompute_backend_i = {
    /* .graph_plan_compute      = */ NULL,
    /* .graph_compute           = */ ggml_backend_kompute_graph_compute,
    /* .supports_op             = */ ggml_backend_kompute_supports_op,
+    /* .offload_op              = */ NULL,
    /* .event_new               = */ NULL,
    /* .event_free              = */ NULL,
    /* .event_record            = */ NULL,
--- a/ggml-metal.m
+++ b/ggml-metal.m
@ -2837,6 +2837,7 @@ static struct ggml_backend_i ggml_backend_metal_i = {
    /* .graph_plan_compute      = */ NULL,
    /* .graph_compute           = */ ggml_backend_metal_graph_compute,
    /* .supports_op             = */ ggml_backend_metal_supports_op,
+    /* .offload_op              = */ NULL,
    /* .event_new               = */ NULL,
    /* .event_free              = */ NULL,
    /* .event_record            = */ NULL,
--- a/ggml-sycl.cpp
+++ b/ggml-sycl.cpp
@ -17390,6 +17390,7 @@ static ggml_backend_i ggml_backend_sycl_interface = {
    /* .graph_plan_compute      = */ NULL,
    /* .graph_compute           = */ ggml_backend_sycl_graph_compute,
    /* .supports_op             = */ ggml_backend_sycl_supports_op,
+    /* .offload_op              = */ NULL,
    /* .event_new               = */ NULL,
    /* .event_free              = */ NULL,
    /* .event_record            = */ NULL,
--- a/ggml-vulkan.cpp
+++ b/ggml-vulkan.cpp
@ -5693,6 +5693,7 @@ static ggml_backend_i ggml_backend_vk_interface = {
    /* .graph_plan_compute      = */ NULL,
    /* .graph_compute           = */ ggml_backend_vk_graph_compute,
    /* .supports_op             = */ ggml_backend_vk_supports_op,
+    /* .offload_op              = */ NULL,
    /* .event_new               = */ NULL,
    /* .event_free              = */ NULL,
    /* .event_record            = */ NULL,
--- a/llama.cpp
+++ b/llama.cpp
@ -8614,7 +8614,7 @@ static struct ggml_cgraph * llama_build_graph(
        // norm may be automatically assigned to the backend of the previous layer, increasing data transfer between backends
        // FIXME: fix in ggml_backend_sched
        const bool full_offload = lctx.model.n_gpu_layers > (int)lctx.model.hparams.n_layer;
-        if (batch.n_tokens <= 32 || full_offload) {
+        if (batch.n_tokens < 32 || full_offload) {
            if (il != -1 && strcmp(name, "norm") == 0) {
                for (auto * backend : lctx.backends) {
                    if (ggml_backend_buft_supports_backend(lctx.model.buft_layer[il].buft, backend)) {