CUDA: fix partial offloading for ne0 % 256 != 0 (#8572)

This commit is contained in:
Johannes Gäßler 2024-07-18 23:48:47 +02:00 committed by GitHub
parent 705b7ecf60
commit a15ef8f8a0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 29 additions and 15 deletions

View file

@ -776,6 +776,7 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
fprintf(stderr, "%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), new_size);
return false;
}
ggml_backend_buffer_set_usage(galloc->buffers[i], GGML_BACKEND_BUFFER_USAGE_COMPUTE);
}
}