metal : handle buffers larger than device's maxBufferLength (#1826)
* metal : handle buffers larger than device's maxBufferLength * metal : print more verbose device info + handle errors * metal : fix prints for overlapping views * metal : minimize view overlap to try to utilize device memory better
This commit is contained in:
parent
57cd69460f
commit
ce2c7d72e2
6 changed files with 125 additions and 35 deletions
24
ggml.c
24
ggml.c
|
@ -4154,14 +4154,34 @@ void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) {
|
|||
ctx->no_alloc = no_alloc;
|
||||
}
|
||||
|
||||
void * ggml_get_mem_buffer(struct ggml_context * ctx) {
|
||||
void * ggml_get_mem_buffer(const struct ggml_context * ctx) {
|
||||
return ctx->mem_buffer;
|
||||
}
|
||||
|
||||
size_t ggml_get_mem_size(struct ggml_context * ctx) {
|
||||
size_t ggml_get_mem_size(const struct ggml_context * ctx) {
|
||||
return ctx->mem_size;
|
||||
}
|
||||
|
||||
size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
|
||||
size_t max_size = 0;
|
||||
|
||||
struct ggml_object * obj = ctx->objects_begin;
|
||||
|
||||
while (obj != NULL) {
|
||||
struct ggml_tensor * tensor = (struct ggml_tensor *) ((char *) ctx->mem_buffer + obj->offs);
|
||||
|
||||
const size_t size = ggml_nbytes(tensor);
|
||||
|
||||
if (max_size < size) {
|
||||
max_size = size;
|
||||
}
|
||||
|
||||
obj = obj->next;
|
||||
}
|
||||
|
||||
return max_size;
|
||||
}
|
||||
|
||||
// IMPORTANT:
|
||||
// when creating "opt" tensors, always save and load the scratch buffer
|
||||
// this is an error prone process, but it is necessary to support inplace
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue