sync : ggml (Metal F32 support + reduce ggml-alloc size) (#3192)
* sync : ggml (Metal F32 support + reduce ggml-alloc size) ggml-ci * llama-bench : fix ggml_cpu_has_metal() duplicate function ggml-ci
This commit is contained in:
parent
7e50d34be6
commit
8c00b7a6ff
6 changed files with 193 additions and 90 deletions
12
ggml-alloc.c
12
ggml-alloc.c
|
@ -131,6 +131,10 @@ static bool ggml_allocr_is_own(struct ggml_allocr * alloc, const struct ggml_ten
|
|||
return ptr >= alloc->data && (char *)ptr < (char *)alloc->data + alloc->max_size;
|
||||
}
|
||||
|
||||
static bool ggml_is_view(struct ggml_tensor * t) {
|
||||
return t->view_src != NULL;
|
||||
}
|
||||
|
||||
void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor) {
|
||||
#ifdef GGML_ALLOCATOR_DEBUG
|
||||
GGML_ASSERT(!ggml_is_view(tensor)); // views generally get data pointer from one of their sources
|
||||
|
@ -338,8 +342,8 @@ static void free_vmem(void * base_addr, size_t size) {
|
|||
|
||||
// allocate uncommitted virtual memory to measure the size of the graph
|
||||
static void alloc_measure_vmem(void ** base_addr, size_t * size) {
|
||||
// 1TB for 64-bit, 1GB for 32-bit
|
||||
*size = sizeof(void *) == 4 ? 1ULL<<30 : 1ULL<<40;
|
||||
// 128GB for 64-bit, 1GB for 32-bit
|
||||
*size = sizeof(void *) == 4 ? 1ULL<<30 : 1ULL<<37;
|
||||
do {
|
||||
*base_addr = alloc_vmem(*size);
|
||||
if (*base_addr != NULL) {
|
||||
|
@ -399,10 +403,6 @@ bool ggml_allocr_is_measure(struct ggml_allocr * alloc) {
|
|||
|
||||
//////////// compute graph allocator
|
||||
|
||||
static bool ggml_is_view(struct ggml_tensor * t) {
|
||||
return t->view_src != NULL;
|
||||
}
|
||||
|
||||
static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml_tensor * b) {
|
||||
if (a->type != b->type) {
|
||||
return false;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue