Implement max_size for backend buffer types to limit the size of a single allocation
This commit is contained in:
parent
7fa5ca9e62
commit
f652ebfd54
8 changed files with 213 additions and 21 deletions
81
ggml-alloc.c
81
ggml-alloc.c
|
@ -780,6 +780,7 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
|
||||||
GGML_ASSERT(ggml_get_no_alloc(ctx) == true);
|
GGML_ASSERT(ggml_get_no_alloc(ctx) == true);
|
||||||
|
|
||||||
size_t alignment = ggml_backend_buft_get_alignment(buft);
|
size_t alignment = ggml_backend_buft_get_alignment(buft);
|
||||||
|
size_t max_size = ggml_backend_buft_get_max_size(buft);
|
||||||
|
|
||||||
size_t nbytes = 0;
|
size_t nbytes = 0;
|
||||||
for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
||||||
|
@ -796,6 +797,8 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// single buffer allocation
|
||||||
|
if (nbytes <= max_size) {
|
||||||
ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, nbytes);
|
ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, nbytes);
|
||||||
if (buffer == NULL) {
|
if (buffer == NULL) {
|
||||||
// failed to allocate buffer
|
// failed to allocate buffer
|
||||||
|
@ -825,6 +828,84 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
|
||||||
ggml_tallocr_free(tallocr);
|
ggml_tallocr_free(tallocr);
|
||||||
|
|
||||||
return buffer;
|
return buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
// multi-buffer
|
||||||
|
size_t n_allocs = (nbytes - 1 + max_size) / max_size;
|
||||||
|
size_t * nbytes_per_alloc = (size_t *) malloc(n_allocs * sizeof(size_t));
|
||||||
|
memset(nbytes_per_alloc, 0, n_allocs * sizeof(size_t));
|
||||||
|
|
||||||
|
// Calculate nbytes per alloc
|
||||||
|
size_t alloc_idx = 0;
|
||||||
|
for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
||||||
|
if (t->data == NULL && t->view_src == NULL) {
|
||||||
|
size_t tensor_size = GGML_PAD(ggml_backend_buft_get_alloc_size(buft, t), alignment);
|
||||||
|
if (nbytes_per_alloc[alloc_idx] + tensor_size > max_size) {
|
||||||
|
// Move to next allocation
|
||||||
|
alloc_idx += 1;
|
||||||
|
}
|
||||||
|
nbytes_per_alloc[alloc_idx] += tensor_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_backend_buffer_t multi_buffer = ggml_backend_multi_buffer_alloc_buffer(n_allocs, buft, nbytes);
|
||||||
|
ggml_backend_multi_buffer_context_t multi_ctx = (ggml_backend_multi_buffer_context_t) multi_buffer->context;
|
||||||
|
|
||||||
|
size_t bytes_counter = 0;
|
||||||
|
struct ggml_tensor * current_tensor = ggml_get_first_tensor(ctx);
|
||||||
|
|
||||||
|
for (alloc_idx = 0; alloc_idx < n_allocs; alloc_idx++) {
|
||||||
|
ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, nbytes_per_alloc[alloc_idx]);
|
||||||
|
if (buffer == NULL) {
|
||||||
|
// failed to allocate buffer
|
||||||
|
#ifndef NDEBUG
|
||||||
|
fprintf(stderr, "%s: failed to allocate buffer\n", __func__);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// free previously allocated buffers
|
||||||
|
for (size_t dealloc_idx = 0; dealloc_idx < alloc_idx; dealloc_idx++) {
|
||||||
|
ggml_backend_buffer_free(multi_ctx->buffers[dealloc_idx]);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(nbytes_per_alloc);
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
multi_ctx->buffers[alloc_idx] = buffer;
|
||||||
|
|
||||||
|
ggml_tallocr_t tallocr = ggml_tallocr_new_from_buffer(buffer);
|
||||||
|
|
||||||
|
for (; current_tensor != NULL; current_tensor = ggml_get_next_tensor(ctx, current_tensor)) {
|
||||||
|
size_t tensor_size = GGML_PAD(ggml_backend_buft_get_alloc_size(buft, current_tensor), alignment);
|
||||||
|
|
||||||
|
if (bytes_counter + tensor_size > max_size) {
|
||||||
|
// tensor uses next buffer
|
||||||
|
bytes_counter = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
bytes_counter += tensor_size;
|
||||||
|
if (current_tensor->data == NULL) {
|
||||||
|
if (current_tensor->view_src == NULL) {
|
||||||
|
ggml_tallocr_alloc(tallocr, current_tensor);
|
||||||
|
} else {
|
||||||
|
ggml_backend_view_init(buffer, current_tensor);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (current_tensor->view_src != NULL) {
|
||||||
|
// view of a pre-allocated tensor
|
||||||
|
ggml_backend_view_init(buffer, current_tensor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ggml_tallocr_free(tallocr);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(nbytes_per_alloc);
|
||||||
|
|
||||||
|
return multi_buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend) {
|
ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, ggml_backend_t backend) {
|
||||||
|
|
|
@ -19,6 +19,7 @@ extern "C" {
|
||||||
const char * (*GGML_CALL get_name) (ggml_backend_buffer_type_t buft);
|
const char * (*GGML_CALL get_name) (ggml_backend_buffer_type_t buft);
|
||||||
ggml_backend_buffer_t (*GGML_CALL alloc_buffer) (ggml_backend_buffer_type_t buft, size_t size);
|
ggml_backend_buffer_t (*GGML_CALL alloc_buffer) (ggml_backend_buffer_type_t buft, size_t size);
|
||||||
size_t (*GGML_CALL get_alignment) (ggml_backend_buffer_type_t buft); // tensor alignment
|
size_t (*GGML_CALL get_alignment) (ggml_backend_buffer_type_t buft); // tensor alignment
|
||||||
|
size_t (*GGML_CALL get_max_size) (ggml_backend_buffer_type_t buft); // allocation max size
|
||||||
size_t (*GGML_CALL get_alloc_size) (ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor); // data size needed to allocate the tensor, including padding
|
size_t (*GGML_CALL get_alloc_size) (ggml_backend_buffer_type_t buft, const struct ggml_tensor * tensor); // data size needed to allocate the tensor, including padding
|
||||||
bool (*GGML_CALL supports_backend)(ggml_backend_buffer_type_t buft, ggml_backend_t backend); // check if the buffer type is usable by the backend
|
bool (*GGML_CALL supports_backend)(ggml_backend_buffer_type_t buft, ggml_backend_t backend); // check if the buffer type is usable by the backend
|
||||||
// check if tensor data is in host memory
|
// check if tensor data is in host memory
|
||||||
|
@ -63,6 +64,20 @@ extern "C" {
|
||||||
// do not use directly, use ggml_backend_tensor_copy instead
|
// do not use directly, use ggml_backend_tensor_copy instead
|
||||||
bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst);
|
bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst);
|
||||||
|
|
||||||
|
// multi-buffer
|
||||||
|
struct ggml_backend_multi_buffer_context {
|
||||||
|
ggml_backend_buffer_t * buffers;
|
||||||
|
size_t n_buffers;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct ggml_backend_multi_buffer_context * ggml_backend_multi_buffer_context_t;
|
||||||
|
|
||||||
|
GGML_CALL const char* ggml_backend_multi_buffer_get_name(ggml_backend_buffer_t buffer);
|
||||||
|
GGML_CALL ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(size_t n_buffers, ggml_backend_buffer_type_t buft, size_t nbytes);
|
||||||
|
GGML_CALL void ggml_backend_multi_buffer_free_buffer(ggml_backend_buffer_t buffer);
|
||||||
|
GGML_CALL void ggml_backend_multi_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value);
|
||||||
|
struct ggml_backend_buffer_i ggml_backend_multi_buffer_context_interface(void);
|
||||||
|
|
||||||
//
|
//
|
||||||
// Backend
|
// Backend
|
||||||
//
|
//
|
||||||
|
|
|
@ -27,6 +27,14 @@ size_t ggml_backend_buft_get_alignment(ggml_backend_buffer_type_t buft) {
|
||||||
return buft->iface.get_alignment(buft);
|
return buft->iface.get_alignment(buft);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t ggml_backend_buft_get_max_size(ggml_backend_buffer_type_t buft) {
|
||||||
|
// get_max_size is optional, defaults to UINT64_MAX
|
||||||
|
if (buft->iface.get_max_size) {
|
||||||
|
return buft->iface.get_max_size(buft);
|
||||||
|
}
|
||||||
|
return UINT64_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
GGML_CALL size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor) {
|
GGML_CALL size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor) {
|
||||||
// get_alloc_size is optional, defaults to ggml_nbytes
|
// get_alloc_size is optional, defaults to ggml_nbytes
|
||||||
if (buft->iface.get_alloc_size) {
|
if (buft->iface.get_alloc_size) {
|
||||||
|
@ -55,8 +63,6 @@ GGML_CALL ggml_backend_buffer_t ggml_backend_buffer_init(
|
||||||
size_t size) {
|
size_t size) {
|
||||||
ggml_backend_buffer_t buffer = malloc(sizeof(struct ggml_backend_buffer));
|
ggml_backend_buffer_t buffer = malloc(sizeof(struct ggml_backend_buffer));
|
||||||
|
|
||||||
GGML_ASSERT(iface.get_base != NULL);
|
|
||||||
|
|
||||||
(*buffer) = (struct ggml_backend_buffer) {
|
(*buffer) = (struct ggml_backend_buffer) {
|
||||||
/* .interface = */ iface,
|
/* .interface = */ iface,
|
||||||
/* .buft = */ buft,
|
/* .buft = */ buft,
|
||||||
|
@ -106,6 +112,10 @@ size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer) {
|
||||||
return ggml_backend_buft_get_alignment(ggml_backend_buffer_get_type(buffer));
|
return ggml_backend_buft_get_alignment(ggml_backend_buffer_get_type(buffer));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t ggml_backend_buffer_get_max_size(ggml_backend_buffer_t buffer) {
|
||||||
|
return ggml_backend_buft_get_max_size(ggml_backend_buffer_get_type(buffer));
|
||||||
|
}
|
||||||
|
|
||||||
size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
|
size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
|
||||||
return ggml_backend_buft_get_alloc_size(ggml_backend_buffer_get_type(buffer), tensor);
|
return ggml_backend_buft_get_alloc_size(ggml_backend_buffer_get_type(buffer), tensor);
|
||||||
}
|
}
|
||||||
|
@ -169,6 +179,10 @@ size_t ggml_backend_get_alignment(ggml_backend_t backend) {
|
||||||
return ggml_backend_buft_get_alignment(ggml_backend_get_default_buffer_type(backend));
|
return ggml_backend_buft_get_alignment(ggml_backend_get_default_buffer_type(backend));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t ggml_backend_get_max_size(ggml_backend_t backend) {
|
||||||
|
return ggml_backend_buft_get_max_size(ggml_backend_get_default_buffer_type(backend));
|
||||||
|
}
|
||||||
|
|
||||||
void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
||||||
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
|
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
|
||||||
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
|
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
|
||||||
|
@ -342,6 +356,11 @@ GGML_CALL static void ggml_backend_registry_init(void) {
|
||||||
extern GGML_CALL ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
|
extern GGML_CALL ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
|
||||||
ggml_backend_register("Metal", ggml_backend_reg_metal_init, ggml_backend_metal_buffer_type(), NULL);
|
ggml_backend_register("Metal", ggml_backend_reg_metal_init, ggml_backend_metal_buffer_type(), NULL);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef GGML_USE_VULKAN
|
||||||
|
extern GGML_CALL int ggml_backend_vk_reg_devices(void);
|
||||||
|
ggml_backend_vk_reg_devices();
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) {
|
GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) {
|
||||||
|
@ -545,6 +564,7 @@ GGML_CALL ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void) {
|
||||||
/* .get_name = */ ggml_backend_cpu_buffer_type_get_name,
|
/* .get_name = */ ggml_backend_cpu_buffer_type_get_name,
|
||||||
/* .alloc_buffer = */ ggml_backend_cpu_buffer_type_alloc_buffer,
|
/* .alloc_buffer = */ ggml_backend_cpu_buffer_type_alloc_buffer,
|
||||||
/* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment,
|
/* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment,
|
||||||
|
/* .get_max_size = */ NULL, // defaults to UINT64_MAX
|
||||||
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
|
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
|
||||||
/* .supports_backend = */ ggml_backend_cpu_buffer_type_supports_backend,
|
/* .supports_backend = */ ggml_backend_cpu_buffer_type_supports_backend,
|
||||||
/* .is_host = */ ggml_backend_cpu_buffer_type_is_host,
|
/* .is_host = */ ggml_backend_cpu_buffer_type_is_host,
|
||||||
|
@ -600,6 +620,7 @@ ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void) {
|
||||||
/* .get_name = */ ggml_backend_cpu_hbm_buffer_type_get_name,
|
/* .get_name = */ ggml_backend_cpu_hbm_buffer_type_get_name,
|
||||||
/* .alloc_buffer = */ ggml_backend_cpu_hbm_buffer_type_alloc_buffer,
|
/* .alloc_buffer = */ ggml_backend_cpu_hbm_buffer_type_alloc_buffer,
|
||||||
/* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment,
|
/* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment,
|
||||||
|
/* .get_max_size = */ NULL, // defaults to UINT64_MAX
|
||||||
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
|
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
|
||||||
/* .supports_backend = */ ggml_backend_cpu_buffer_type_supports_backend,
|
/* .supports_backend = */ ggml_backend_cpu_buffer_type_supports_backend,
|
||||||
/* .is_host = */ ggml_backend_cpu_buffer_type_is_host,
|
/* .is_host = */ ggml_backend_cpu_buffer_type_is_host,
|
||||||
|
@ -755,6 +776,56 @@ GGML_CALL static ggml_backend_t ggml_backend_reg_cpu_init(const char * params, v
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// multi-buffer buffer
|
||||||
|
|
||||||
|
GGML_CALL const char * ggml_backend_multi_buffer_get_name(ggml_backend_buffer_t buffer) {
|
||||||
|
ggml_backend_multi_buffer_context_t ctx = (ggml_backend_multi_buffer_context_t) buffer->context;
|
||||||
|
|
||||||
|
return ctx->buffers[0]->iface.get_name(ctx->buffers[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
GGML_CALL ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(size_t n_buffers, ggml_backend_buffer_type_t buft, size_t nbytes) {
|
||||||
|
ggml_backend_multi_buffer_context_t ctx = (ggml_backend_multi_buffer_context_t) malloc(sizeof(struct ggml_backend_multi_buffer_context));
|
||||||
|
ctx->n_buffers = n_buffers;
|
||||||
|
ctx->buffers = (ggml_backend_buffer_t *) malloc(n_buffers * sizeof(ggml_backend_buffer_t));
|
||||||
|
|
||||||
|
return ggml_backend_buffer_init(buft, ggml_backend_multi_buffer_context_interface(), ctx, nbytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
GGML_CALL void ggml_backend_multi_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
||||||
|
ggml_backend_multi_buffer_context_t ctx = (ggml_backend_multi_buffer_context_t) buffer->context;
|
||||||
|
for (size_t i = 0; i < ctx->n_buffers; i++) {
|
||||||
|
ggml_backend_buffer_free(ctx->buffers[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(ctx->buffers);
|
||||||
|
free(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
GGML_CALL void ggml_backend_multi_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
|
||||||
|
ggml_backend_multi_buffer_context_t ctx = (ggml_backend_multi_buffer_context_t) buffer->context;
|
||||||
|
for (size_t i = 0; i < ctx->n_buffers; i++) {
|
||||||
|
ggml_backend_buffer_clear(ctx->buffers[i], value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_backend_buffer_i ggml_backend_multi_buffer_context_interface(void) {
|
||||||
|
static struct ggml_backend_buffer_i multi_backend_buffer_i = {
|
||||||
|
/* .get_name = */ ggml_backend_multi_buffer_get_name,
|
||||||
|
/* .free_buffer = */ ggml_backend_multi_buffer_free_buffer,
|
||||||
|
/* .get_base = */ NULL,
|
||||||
|
/* .init_tensor = */ NULL,
|
||||||
|
/* .set_tensor = */ NULL,
|
||||||
|
/* .get_tensor = */ NULL,
|
||||||
|
/* .cpy_tensor = */ NULL,
|
||||||
|
/* .clear = */ ggml_backend_multi_buffer_clear,
|
||||||
|
/* .reset = */ NULL,
|
||||||
|
};
|
||||||
|
|
||||||
|
return multi_backend_buffer_i;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// scheduler
|
// scheduler
|
||||||
|
|
||||||
#define GGML_MAX_BACKENDS 16
|
#define GGML_MAX_BACKENDS 16
|
||||||
|
|
|
@ -20,6 +20,7 @@ extern "C" {
|
||||||
GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft);
|
GGML_API const char * ggml_backend_buft_name (ggml_backend_buffer_type_t buft);
|
||||||
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size);
|
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_buft_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size);
|
||||||
GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
|
GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
|
||||||
|
GGML_API size_t ggml_backend_buft_get_max_size (ggml_backend_buffer_type_t buft);
|
||||||
GGML_API GGML_CALL size_t ggml_backend_buft_get_alloc_size (ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
|
GGML_API GGML_CALL size_t ggml_backend_buft_get_alloc_size (ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
|
||||||
GGML_API bool ggml_backend_buft_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend);
|
GGML_API bool ggml_backend_buft_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend);
|
||||||
GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft);
|
GGML_API bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft);
|
||||||
|
@ -36,6 +37,7 @@ extern "C" {
|
||||||
GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
|
GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
|
||||||
GGML_API GGML_CALL void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
GGML_API GGML_CALL void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
||||||
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
|
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
|
||||||
|
GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
|
||||||
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
|
||||||
GGML_API void ggml_backend_buffer_clear (ggml_backend_buffer_t buffer, uint8_t value);
|
GGML_API void ggml_backend_buffer_clear (ggml_backend_buffer_t buffer, uint8_t value);
|
||||||
GGML_API bool ggml_backend_buffer_is_host (ggml_backend_buffer_t buffer);
|
GGML_API bool ggml_backend_buffer_is_host (ggml_backend_buffer_t buffer);
|
||||||
|
@ -54,6 +56,7 @@ extern "C" {
|
||||||
GGML_API ggml_backend_buffer_type_t ggml_backend_get_default_buffer_type(ggml_backend_t backend);
|
GGML_API ggml_backend_buffer_type_t ggml_backend_get_default_buffer_type(ggml_backend_t backend);
|
||||||
GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size);
|
GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size);
|
||||||
GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend);
|
GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend);
|
||||||
|
GGML_API size_t ggml_backend_get_max_size(ggml_backend_t backend);
|
||||||
|
|
||||||
GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
||||||
GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
||||||
|
|
|
@ -10428,6 +10428,7 @@ static ggml_backend_buffer_type_i ggml_backend_cuda_buffer_type_interface = {
|
||||||
/* .get_name = */ ggml_backend_cuda_buffer_type_name,
|
/* .get_name = */ ggml_backend_cuda_buffer_type_name,
|
||||||
/* .alloc_buffer = */ ggml_backend_cuda_buffer_type_alloc_buffer,
|
/* .alloc_buffer = */ ggml_backend_cuda_buffer_type_alloc_buffer,
|
||||||
/* .get_alignment = */ ggml_backend_cuda_buffer_type_get_alignment,
|
/* .get_alignment = */ ggml_backend_cuda_buffer_type_get_alignment,
|
||||||
|
/* .get_max_size = */ NULL, // defaults to UINT64_MAX
|
||||||
/* .get_alloc_size = */ ggml_backend_cuda_buffer_type_get_alloc_size,
|
/* .get_alloc_size = */ ggml_backend_cuda_buffer_type_get_alloc_size,
|
||||||
/* .supports_backend = */ ggml_backend_cuda_buffer_type_supports_backend,
|
/* .supports_backend = */ ggml_backend_cuda_buffer_type_supports_backend,
|
||||||
/* .is_host = */ NULL,
|
/* .is_host = */ NULL,
|
||||||
|
@ -10703,6 +10704,7 @@ static ggml_backend_buffer_type_i ggml_backend_cuda_split_buffer_type_interface
|
||||||
/* .get_name = */ ggml_backend_cuda_split_buffer_type_name,
|
/* .get_name = */ ggml_backend_cuda_split_buffer_type_name,
|
||||||
/* .alloc_buffer = */ ggml_backend_cuda_split_buffer_type_alloc_buffer,
|
/* .alloc_buffer = */ ggml_backend_cuda_split_buffer_type_alloc_buffer,
|
||||||
/* .get_alignment = */ ggml_backend_cuda_split_buffer_type_get_alignment,
|
/* .get_alignment = */ ggml_backend_cuda_split_buffer_type_get_alignment,
|
||||||
|
/* .get_max_size = */ NULL, // defaults to UINT64_MAX
|
||||||
/* .get_alloc_size = */ ggml_backend_cuda_split_buffer_type_get_alloc_size,
|
/* .get_alloc_size = */ ggml_backend_cuda_split_buffer_type_get_alloc_size,
|
||||||
/* .supports_backend = */ ggml_backend_cuda_split_buffer_type_supports_backend,
|
/* .supports_backend = */ ggml_backend_cuda_split_buffer_type_supports_backend,
|
||||||
/* .is_host = */ ggml_backend_cuda_split_buffer_type_is_host,
|
/* .is_host = */ ggml_backend_cuda_split_buffer_type_is_host,
|
||||||
|
@ -10782,6 +10784,7 @@ GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type() {
|
||||||
/* .get_name = */ ggml_backend_cuda_host_buffer_type_name,
|
/* .get_name = */ ggml_backend_cuda_host_buffer_type_name,
|
||||||
/* .alloc_buffer = */ ggml_backend_cuda_host_buffer_type_alloc_buffer,
|
/* .alloc_buffer = */ ggml_backend_cuda_host_buffer_type_alloc_buffer,
|
||||||
/* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
|
/* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
|
||||||
|
/* .get_max_size = */ NULL, // defaults to UINT64_MAX
|
||||||
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
||||||
/* .supports_backend = */ ggml_backend_cpu_buffer_type()->iface.supports_backend,
|
/* .supports_backend = */ ggml_backend_cpu_buffer_type()->iface.supports_backend,
|
||||||
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
||||||
|
|
|
@ -2445,6 +2445,7 @@ GGML_CALL ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void) {
|
||||||
/* .get_name = */ ggml_backend_metal_buffer_type_get_name,
|
/* .get_name = */ ggml_backend_metal_buffer_type_get_name,
|
||||||
/* .alloc_buffer = */ ggml_backend_metal_buffer_type_alloc_buffer,
|
/* .alloc_buffer = */ ggml_backend_metal_buffer_type_alloc_buffer,
|
||||||
/* .get_alignment = */ ggml_backend_metal_buffer_type_get_alignment,
|
/* .get_alignment = */ ggml_backend_metal_buffer_type_get_alignment,
|
||||||
|
/* .get_max_size = */ NULL, // defaults to UINT64_MAX
|
||||||
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
|
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
|
||||||
/* .supports_backend = */ ggml_backend_metal_buffer_type_supports_backend,
|
/* .supports_backend = */ ggml_backend_metal_buffer_type_supports_backend,
|
||||||
/* .is_host = */ ggml_backend_metal_buffer_type_is_host,
|
/* .is_host = */ ggml_backend_metal_buffer_type_is_host,
|
||||||
|
|
|
@ -2055,6 +2055,7 @@ static ggml_backend_buffer_type_i ggml_backend_opencl_buffer_type_interface = {
|
||||||
/* .get_name = */ ggml_backend_opencl_buffer_type_name,
|
/* .get_name = */ ggml_backend_opencl_buffer_type_name,
|
||||||
/* .alloc_buffer = */ ggml_backend_opencl_buffer_type_alloc_buffer,
|
/* .alloc_buffer = */ ggml_backend_opencl_buffer_type_alloc_buffer,
|
||||||
/* .get_alignment = */ ggml_backend_opencl_buffer_type_get_alignment,
|
/* .get_alignment = */ ggml_backend_opencl_buffer_type_get_alignment,
|
||||||
|
/* .get_max_size = */ NULL, // defaults to UINT64_MAX
|
||||||
/* .get_alloc_size = */ NULL,
|
/* .get_alloc_size = */ NULL,
|
||||||
/* .supports_backend = */ ggml_backend_opencl_buffer_type_supports_backend,
|
/* .supports_backend = */ ggml_backend_opencl_buffer_type_supports_backend,
|
||||||
/* .is_host = */ NULL,
|
/* .is_host = */ NULL,
|
||||||
|
@ -2111,6 +2112,7 @@ ggml_backend_buffer_type_t ggml_backend_opencl_host_buffer_type() {
|
||||||
/* .get_name = */ ggml_backend_opencl_host_buffer_type_name,
|
/* .get_name = */ ggml_backend_opencl_host_buffer_type_name,
|
||||||
/* .alloc_buffer = */ ggml_backend_opencl_host_buffer_type_alloc_buffer,
|
/* .alloc_buffer = */ ggml_backend_opencl_host_buffer_type_alloc_buffer,
|
||||||
/* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
|
/* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
|
||||||
|
/* .get_max_size = */ NULL, // defaults to UINT64_MAX
|
||||||
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
||||||
/* .supports_backend = */ ggml_backend_cpu_buffer_type()->iface.supports_backend,
|
/* .supports_backend = */ ggml_backend_cpu_buffer_type()->iface.supports_backend,
|
||||||
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
||||||
|
|
|
@ -121,6 +121,7 @@ typedef std::vector<vk_submission> vk_sequence;
|
||||||
struct vk_device {
|
struct vk_device {
|
||||||
vk::PhysicalDevice physical_device;
|
vk::PhysicalDevice physical_device;
|
||||||
vk::PhysicalDeviceProperties properties;
|
vk::PhysicalDeviceProperties properties;
|
||||||
|
uint64_t max_memory_allocation_size;
|
||||||
bool fp16;
|
bool fp16;
|
||||||
vk::Device device;
|
vk::Device device;
|
||||||
uint32_t vendor_id;
|
uint32_t vendor_id;
|
||||||
|
@ -972,7 +973,14 @@ std::cerr << "ggml_vulkan: Validation layers enabled" << std::endl;
|
||||||
vk_instance = vk::createInstance(instance_create_info);
|
vk_instance = vk::createInstance(instance_create_info);
|
||||||
|
|
||||||
vk_device.physical_device = vk_instance.enumeratePhysicalDevices()[dev_num];
|
vk_device.physical_device = vk_instance.enumeratePhysicalDevices()[dev_num];
|
||||||
vk_device.properties = vk_device.physical_device.getProperties();
|
vk::PhysicalDeviceProperties2 props2;
|
||||||
|
vk::PhysicalDeviceMaintenance3Properties props3;
|
||||||
|
props3.pNext = nullptr;
|
||||||
|
props2.pNext = &props3;
|
||||||
|
vk_device.physical_device.getProperties2(&props2);
|
||||||
|
vk_device.properties = props2.properties;
|
||||||
|
vk_device.max_memory_allocation_size = props3.maxMemoryAllocationSize;
|
||||||
|
|
||||||
std::cerr << "ggml_vulkan: Using " << vk_device.properties.deviceName << std::endl;
|
std::cerr << "ggml_vulkan: Using " << vk_device.properties.deviceName << std::endl;
|
||||||
|
|
||||||
vk_device.vendor_id = vk_device.properties.vendorID;
|
vk_device.vendor_id = vk_device.properties.vendorID;
|
||||||
|
@ -4243,6 +4251,12 @@ GGML_CALL static size_t ggml_backend_vk_buffer_type_get_alignment(ggml_backend_b
|
||||||
UNUSED(buft);
|
UNUSED(buft);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GGML_CALL static size_t ggml_backend_vk_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) {
|
||||||
|
return vk_device.max_memory_allocation_size;
|
||||||
|
|
||||||
|
UNUSED(buft);
|
||||||
|
}
|
||||||
|
|
||||||
GGML_CALL static size_t ggml_backend_vk_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) {
|
GGML_CALL static size_t ggml_backend_vk_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) {
|
||||||
return ggml_nbytes(tensor);
|
return ggml_nbytes(tensor);
|
||||||
|
|
||||||
|
@ -4259,6 +4273,7 @@ static ggml_backend_buffer_type_i ggml_backend_vk_buffer_type_interface = {
|
||||||
/* .get_name = */ ggml_backend_vk_buffer_type_name,
|
/* .get_name = */ ggml_backend_vk_buffer_type_name,
|
||||||
/* .alloc_buffer = */ ggml_backend_vk_buffer_type_alloc_buffer,
|
/* .alloc_buffer = */ ggml_backend_vk_buffer_type_alloc_buffer,
|
||||||
/* .get_alignment = */ ggml_backend_vk_buffer_type_get_alignment,
|
/* .get_alignment = */ ggml_backend_vk_buffer_type_get_alignment,
|
||||||
|
/* .get_max_size = */ ggml_backend_vk_buffer_type_get_max_size,
|
||||||
/* .get_alloc_size = */ ggml_backend_vk_buffer_type_get_alloc_size,
|
/* .get_alloc_size = */ ggml_backend_vk_buffer_type_get_alloc_size,
|
||||||
/* .supports_backend = */ ggml_backend_vk_buffer_type_supports_backend,
|
/* .supports_backend = */ ggml_backend_vk_buffer_type_supports_backend,
|
||||||
/* .is_host = */ NULL,
|
/* .is_host = */ NULL,
|
||||||
|
@ -4326,6 +4341,7 @@ GGML_CALL ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() {
|
||||||
/* .get_name = */ ggml_backend_vk_host_buffer_type_name,
|
/* .get_name = */ ggml_backend_vk_host_buffer_type_name,
|
||||||
/* .alloc_buffer = */ ggml_backend_vk_host_buffer_type_alloc_buffer,
|
/* .alloc_buffer = */ ggml_backend_vk_host_buffer_type_alloc_buffer,
|
||||||
/* .get_alignment = */ ggml_backend_vk_host_buffer_type_get_alignment,
|
/* .get_alignment = */ ggml_backend_vk_host_buffer_type_get_alignment,
|
||||||
|
/* .get_max_size = */ NULL, // defaults to UINT64_MAX
|
||||||
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
||||||
/* .supports_backend = */ ggml_backend_cpu_buffer_type()->iface.supports_backend,
|
/* .supports_backend = */ ggml_backend_cpu_buffer_type()->iface.supports_backend,
|
||||||
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue