metal : first working version of the inference without prompt processing
Bonus: supports partial inference on the CPU
This commit is contained in:
parent
290cb700bf
commit
cb82adadb8
2 changed files with 11 additions and 6 deletions
12
ggml-metal.m
12
ggml-metal.m
|
@ -237,7 +237,7 @@ static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_metal_context * ctx, stru
|
|||
}
|
||||
}
|
||||
|
||||
fprintf(stderr, "%s: error: buffer is nil\n", __func__);
|
||||
fprintf(stderr, "%s: error: buffer is nil for tensor '%s'\n", __func__, t->name);
|
||||
|
||||
return nil;
|
||||
}
|
||||
|
@ -877,15 +877,15 @@ void ggml_metal_graph_compute(
|
|||
encoder = [command_buffer computeCommandEncoder];
|
||||
}
|
||||
|
||||
const int n_dims = ((int32_t *) src1->data)[1];
|
||||
const int mode = ((int32_t *) src1->data)[2];
|
||||
const int n_dims = ((int32_t *) dst->op_params)[1];
|
||||
const int mode = ((int32_t *) dst->op_params)[2];
|
||||
|
||||
const int n_past = ((int32_t *)(src1->data))[0];
|
||||
const int n_past = ((int32_t *)(dst->op_params))[0];
|
||||
|
||||
float freq_base;
|
||||
float freq_scale;
|
||||
memcpy(&freq_base, (int32_t *) src1->data + 4, sizeof(float));
|
||||
memcpy(&freq_scale, (int32_t *) src1->data + 5, sizeof(float));
|
||||
memcpy(&freq_base, (int32_t *) dst->op_params + 4, sizeof(float));
|
||||
memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float));
|
||||
|
||||
[encoder setComputePipelineState:ctx->pipeline_rope];
|
||||
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue