From 211fb045f1c9cd4c949389817624ec510c0fcefd Mon Sep 17 00:00:00 2001 From: slaren Date: Thu, 13 Jun 2024 02:38:36 +0200 Subject: [PATCH] sched : allow ops with weights on an incompatible buffer type This will cause the weight to be copied to a backend that supports the op, which is very costly. The weight should have been stored in a buffer of a backend that can run the op, but llama.cpp cannot do this automatically at the moment. ggml-ci --- ggml-backend.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ggml-backend.c b/ggml-backend.c index 21636a6ae..2bec7bea3 100644 --- a/ggml-backend.c +++ b/ggml-backend.c @@ -1116,9 +1116,10 @@ static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, co } } - fprintf(stderr, "%s: error: no backend supports buffer type %s used in tensor %s\n", - __func__, ggml_backend_buffer_name(buffer), tensor->name); - GGML_ASSERT(false); +#ifndef NDEBUG + fprintf(stderr, "%s: warning: no backend supports op %s with a weight with buffer type %s used in tensor %s, the weight will need to be copied\n", + __func__, ggml_op_desc(tensor), ggml_backend_buffer_name(buffer), tensor->name); +#endif return -1; }