test zero max buffer size
This commit is contained in:
parent
8854044561
commit
1e6e363d7f
2 changed files with 18 additions and 13 deletions
24
ggml-alloc.c
24
ggml-alloc.c
|
@ -988,19 +988,19 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
|
||||||
this_size = GGML_PAD(ggml_backend_buft_get_alloc_size(buft, t), alignment);
|
this_size = GGML_PAD(ggml_backend_buft_get_alloc_size(buft, t), alignment);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this_size > max_size) {
|
//if (this_size > max_size) {
|
||||||
fprintf(stderr, "%s: tensor %s is too large to fit in a %s buffer (tensor size: %zu, max buffer size: %zu)\n",
|
// fprintf(stderr, "%s: tensor %s is too large to fit in a %s buffer (tensor size: %zu, max buffer size: %zu)\n",
|
||||||
__func__, t->name,
|
// __func__, t->name,
|
||||||
ggml_backend_buft_name(buft),
|
// ggml_backend_buft_name(buft),
|
||||||
this_size, max_size);
|
// this_size, max_size);
|
||||||
for (size_t i = 0; i < n_buffers; i++) {
|
// for (size_t i = 0; i < n_buffers; i++) {
|
||||||
ggml_backend_buffer_free(buffers[i]);
|
// ggml_backend_buffer_free(buffers[i]);
|
||||||
}
|
// }
|
||||||
free(buffers);
|
// free(buffers);
|
||||||
return NULL;
|
// return NULL;
|
||||||
}
|
//}
|
||||||
|
|
||||||
if ((cur_buf_size + this_size) > max_size) {
|
if (cur_buf_size != 0 && (cur_buf_size + this_size) > max_size) {
|
||||||
// allocate tensors in the current buffer
|
// allocate tensors in the current buffer
|
||||||
if (!alloc_tensor_range(ctx, first, t, buft, cur_buf_size, &buffers, &n_buffers)) {
|
if (!alloc_tensor_range(ctx, first, t, buft, cur_buf_size, &buffers, &n_buffers)) {
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
|
@ -576,6 +576,11 @@ GGML_CALL static size_t ggml_backend_cuda_buffer_type_get_alignment(ggml_backend
|
||||||
GGML_UNUSED(buft);
|
GGML_UNUSED(buft);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GGML_CALL static size_t ggml_backend_cuda_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) {
|
||||||
|
ggml_backend_cuda_buffer_type_context * buft_ctx = (ggml_backend_cuda_buffer_type_context *)buft->context;
|
||||||
|
return ggml_cuda_info().devices[buft_ctx->device].vmm_granularity;
|
||||||
|
}
|
||||||
|
|
||||||
GGML_CALL static size_t ggml_backend_cuda_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) {
|
GGML_CALL static size_t ggml_backend_cuda_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) {
|
||||||
size_t size = ggml_nbytes(tensor);
|
size_t size = ggml_nbytes(tensor);
|
||||||
int64_t ne0 = tensor->ne[0];
|
int64_t ne0 = tensor->ne[0];
|
||||||
|
@ -595,7 +600,7 @@ static ggml_backend_buffer_type_i ggml_backend_cuda_buffer_type_interface = {
|
||||||
/* .get_name = */ ggml_backend_cuda_buffer_type_name,
|
/* .get_name = */ ggml_backend_cuda_buffer_type_name,
|
||||||
/* .alloc_buffer = */ ggml_backend_cuda_buffer_type_alloc_buffer,
|
/* .alloc_buffer = */ ggml_backend_cuda_buffer_type_alloc_buffer,
|
||||||
/* .get_alignment = */ ggml_backend_cuda_buffer_type_get_alignment,
|
/* .get_alignment = */ ggml_backend_cuda_buffer_type_get_alignment,
|
||||||
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
|
/* .get_max_size = */ ggml_backend_cuda_buffer_type_get_max_size,
|
||||||
/* .get_alloc_size = */ ggml_backend_cuda_buffer_type_get_alloc_size,
|
/* .get_alloc_size = */ ggml_backend_cuda_buffer_type_get_alloc_size,
|
||||||
/* .is_host = */ NULL,
|
/* .is_host = */ NULL,
|
||||||
};
|
};
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue