update backends

ggml-ci
This commit is contained in:
slaren 2024-03-17 14:55:35 +01:00
parent 0661e6a1ae
commit cc9299ce19
6 changed files with 6 additions and 6 deletions

View file

@ -7791,10 +7791,6 @@ struct cuda_pool_alloc {
static bool g_cublas_loaded = false;
static bool ggml_cublas_loaded(void) {
return g_cublas_loaded;
}
static void ggml_init_cublas() {
static bool initialized = false;
@ -11381,7 +11377,7 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons
GGML_CALL static bool ggml_backend_cuda_offload_op(ggml_backend_t backend, const ggml_tensor * op) {
const int min_batch_size = 32;
return op->ne[1] > min_batch_size && op->op != GGML_OP_GET_ROWS;
return op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS;
UNUSED(backend);
}

View file

@ -1951,6 +1951,7 @@ static struct ggml_backend_i kompute_backend_i = {
/* .graph_plan_compute = */ NULL,
/* .graph_compute = */ ggml_backend_kompute_graph_compute,
/* .supports_op = */ ggml_backend_kompute_supports_op,
/* .offload_op = */ NULL,
/* .event_new = */ NULL,
/* .event_free = */ NULL,
/* .event_record = */ NULL,

View file

@ -2837,6 +2837,7 @@ static struct ggml_backend_i ggml_backend_metal_i = {
/* .graph_plan_compute = */ NULL,
/* .graph_compute = */ ggml_backend_metal_graph_compute,
/* .supports_op = */ ggml_backend_metal_supports_op,
/* .offload_op = */ NULL,
/* .event_new = */ NULL,
/* .event_free = */ NULL,
/* .event_record = */ NULL,

View file

@ -17390,6 +17390,7 @@ static ggml_backend_i ggml_backend_sycl_interface = {
/* .graph_plan_compute = */ NULL,
/* .graph_compute = */ ggml_backend_sycl_graph_compute,
/* .supports_op = */ ggml_backend_sycl_supports_op,
/* .offload_op = */ NULL,
/* .event_new = */ NULL,
/* .event_free = */ NULL,
/* .event_record = */ NULL,

View file

@ -5693,6 +5693,7 @@ static ggml_backend_i ggml_backend_vk_interface = {
/* .graph_plan_compute = */ NULL,
/* .graph_compute = */ ggml_backend_vk_graph_compute,
/* .supports_op = */ ggml_backend_vk_supports_op,
/* .offload_op = */ NULL,
/* .event_new = */ NULL,
/* .event_free = */ NULL,
/* .event_record = */ NULL,

View file

@ -8614,7 +8614,7 @@ static struct ggml_cgraph * llama_build_graph(
// norm may be automatically assigned to the backend of the previous layer, increasing data transfer between backends
// FIXME: fix in ggml_backend_sched
const bool full_offload = lctx.model.n_gpu_layers > (int)lctx.model.hparams.n_layer;
if (batch.n_tokens <= 32 || full_offload) {
if (batch.n_tokens < 32 || full_offload) {
if (il != -1 && strcmp(name, "norm") == 0) {
for (auto * backend : lctx.backends) {
if (ggml_backend_buft_supports_backend(lctx.model.buft_layer[il].buft, backend)) {