From 211fb045f1c9cd4c949389817624ec510c0fcefd Mon Sep 17 00:00:00 2001
From: slaren <slarengh@gmail.com>
Date: Thu, 13 Jun 2024 02:38:36 +0200
Subject: [PATCH] sched : allow ops with weights on an incompatible buffer type

This will cause the weight to be copied to a backend that supports the
op, which is very costly. The weight should have been stored in a buffer
of a backend that can run the op, but llama.cpp cannot do this
automatically at the moment.

ggml-ci
---
 ggml-backend.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/ggml-backend.c b/ggml-backend.c
index 21636a6ae..2bec7bea3 100644
--- a/ggml-backend.c
+++ b/ggml-backend.c
@@ -1116,9 +1116,10 @@ static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, co
         }
     }
 
-    fprintf(stderr, "%s: error: no backend supports buffer type %s used in tensor %s\n",
-        __func__, ggml_backend_buffer_name(buffer), tensor->name);
-    GGML_ASSERT(false);
+#ifndef NDEBUG
+    fprintf(stderr, "%s: warning: no backend supports op %s with a weight with buffer type %s used in tensor %s, the weight will need to be copied\n",
+        __func__, ggml_op_desc(tensor), ggml_backend_buffer_name(buffer), tensor->name);
+#endif
 
     return -1;
 }