release metal buffers when freeing metal context

This commit is contained in:
Aaron Miller 2023-06-30 16:08:37 -07:00
parent b8c8dda75f
commit 1a3e8ad6db
2 changed files with 10 additions and 2 deletions

View file

@ -202,7 +202,9 @@ struct ggml_metal_context * ggml_metal_init(void) {
void ggml_metal_free(struct ggml_metal_context * ctx) { void ggml_metal_free(struct ggml_metal_context * ctx) {
fprintf(stderr, "%s: deallocating\n", __func__); fprintf(stderr, "%s: deallocating\n", __func__);
for (int i = 0; i < ctx->n_buffers; ++i) {
[ctx->buffers[i].metal release];
}
free(ctx); free(ctx);
} }

View file

@ -253,7 +253,13 @@ struct llama_model {
struct llama_context { struct llama_context {
llama_context(const llama_model & model, const llama_vocab & vocab) : model(model), vocab(vocab), t_load_us(model.t_load_us), t_start_us(model.t_start_us) {} llama_context(const llama_model & model, const llama_vocab & vocab) : model(model), vocab(vocab), t_load_us(model.t_load_us), t_start_us(model.t_start_us) {}
#ifdef GGML_USE_METAL
~llama_context() {
if (ctx_metal) {
ggml_metal_free(ctx_metal);
}
}
#endif
std::mt19937 rng; std::mt19937 rng;
bool has_evaluated_once = false; bool has_evaluated_once = false;