update backends
ggml-ci
This commit is contained in:
parent
0661e6a1ae
commit
cc9299ce19
6 changed files with 6 additions and 6 deletions
|
@ -7791,10 +7791,6 @@ struct cuda_pool_alloc {
|
|||
|
||||
static bool g_cublas_loaded = false;
|
||||
|
||||
static bool ggml_cublas_loaded(void) {
|
||||
return g_cublas_loaded;
|
||||
}
|
||||
|
||||
static void ggml_init_cublas() {
|
||||
static bool initialized = false;
|
||||
|
||||
|
@ -11381,7 +11377,7 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons
|
|||
GGML_CALL static bool ggml_backend_cuda_offload_op(ggml_backend_t backend, const ggml_tensor * op) {
|
||||
const int min_batch_size = 32;
|
||||
|
||||
return op->ne[1] > min_batch_size && op->op != GGML_OP_GET_ROWS;
|
||||
return op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS;
|
||||
|
||||
UNUSED(backend);
|
||||
}
|
||||
|
|
|
@ -1951,6 +1951,7 @@ static struct ggml_backend_i kompute_backend_i = {
|
|||
/* .graph_plan_compute = */ NULL,
|
||||
/* .graph_compute = */ ggml_backend_kompute_graph_compute,
|
||||
/* .supports_op = */ ggml_backend_kompute_supports_op,
|
||||
/* .offload_op = */ NULL,
|
||||
/* .event_new = */ NULL,
|
||||
/* .event_free = */ NULL,
|
||||
/* .event_record = */ NULL,
|
||||
|
|
|
@ -2837,6 +2837,7 @@ static struct ggml_backend_i ggml_backend_metal_i = {
|
|||
/* .graph_plan_compute = */ NULL,
|
||||
/* .graph_compute = */ ggml_backend_metal_graph_compute,
|
||||
/* .supports_op = */ ggml_backend_metal_supports_op,
|
||||
/* .offload_op = */ NULL,
|
||||
/* .event_new = */ NULL,
|
||||
/* .event_free = */ NULL,
|
||||
/* .event_record = */ NULL,
|
||||
|
|
|
@ -17390,6 +17390,7 @@ static ggml_backend_i ggml_backend_sycl_interface = {
|
|||
/* .graph_plan_compute = */ NULL,
|
||||
/* .graph_compute = */ ggml_backend_sycl_graph_compute,
|
||||
/* .supports_op = */ ggml_backend_sycl_supports_op,
|
||||
/* .offload_op = */ NULL,
|
||||
/* .event_new = */ NULL,
|
||||
/* .event_free = */ NULL,
|
||||
/* .event_record = */ NULL,
|
||||
|
|
|
@ -5693,6 +5693,7 @@ static ggml_backend_i ggml_backend_vk_interface = {
|
|||
/* .graph_plan_compute = */ NULL,
|
||||
/* .graph_compute = */ ggml_backend_vk_graph_compute,
|
||||
/* .supports_op = */ ggml_backend_vk_supports_op,
|
||||
/* .offload_op = */ NULL,
|
||||
/* .event_new = */ NULL,
|
||||
/* .event_free = */ NULL,
|
||||
/* .event_record = */ NULL,
|
||||
|
|
|
@ -8614,7 +8614,7 @@ static struct ggml_cgraph * llama_build_graph(
|
|||
// norm may be automatically assigned to the backend of the previous layer, increasing data transfer between backends
|
||||
// FIXME: fix in ggml_backend_sched
|
||||
const bool full_offload = lctx.model.n_gpu_layers > (int)lctx.model.hparams.n_layer;
|
||||
if (batch.n_tokens <= 32 || full_offload) {
|
||||
if (batch.n_tokens < 32 || full_offload) {
|
||||
if (il != -1 && strcmp(name, "norm") == 0) {
|
||||
for (auto * backend : lctx.backends) {
|
||||
if (ggml_backend_buft_supports_backend(lctx.model.buft_layer[il].buft, backend)) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue