metal : use metal_printf for debug logging

This commit is contained in:
Ravindra Marella 2023-08-24 16:50:05 +05:30
parent c3e53b421a
commit d30cb53c9d

View file

@ -107,7 +107,7 @@ static NSString * const msl_library_source = @"see metal.metal";
@end @end
struct ggml_metal_context * ggml_metal_init(int n_cb) { struct ggml_metal_context * ggml_metal_init(int n_cb) {
fprintf(stderr, "%s: allocating\n", __func__); metal_printf(stderr, "%s: allocating\n", __func__);
struct ggml_metal_context * ctx = malloc(sizeof(struct ggml_metal_context)); struct ggml_metal_context * ctx = malloc(sizeof(struct ggml_metal_context));
@ -139,7 +139,7 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
//NSString * path = [[NSBundle mainBundle] pathForResource:@"../../examples/metal/metal" ofType:@"metal"]; //NSString * path = [[NSBundle mainBundle] pathForResource:@"../../examples/metal/metal" ofType:@"metal"];
NSBundle * bundle = [NSBundle bundleForClass:[GGMLMetalClass class]]; NSBundle * bundle = [NSBundle bundleForClass:[GGMLMetalClass class]];
NSString * path = [bundle pathForResource:@"ggml-metal" ofType:@"metal"]; NSString * path = [bundle pathForResource:@"ggml-metal" ofType:@"metal"];
fprintf(stderr, "%s: loading '%s'\n", __func__, [path UTF8String]); metal_printf(stderr, "%s: loading '%s'\n", __func__, [path UTF8String]);
NSString * src = [NSString stringWithContentsOfFile:path encoding:NSUTF8StringEncoding error:&error]; NSString * src = [NSString stringWithContentsOfFile:path encoding:NSUTF8StringEncoding error:&error];
if (error) { if (error) {
@ -167,7 +167,7 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
#define GGML_METAL_ADD_KERNEL(name) \ #define GGML_METAL_ADD_KERNEL(name) \
ctx->function_##name = [ctx->library newFunctionWithName:@"kernel_"#name]; \ ctx->function_##name = [ctx->library newFunctionWithName:@"kernel_"#name]; \
ctx->pipeline_##name = [ctx->device newComputePipelineStateWithFunction:ctx->function_##name error:&error]; \ ctx->pipeline_##name = [ctx->device newComputePipelineStateWithFunction:ctx->function_##name error:&error]; \
fprintf(stderr, "%s: loaded %-32s %16p | th_max = %4d | th_width = %4d\n", __func__, "kernel_"#name, (void *) ctx->pipeline_##name, \ metal_printf(stderr, "%s: loaded %-32s %16p | th_max = %4d | th_width = %4d\n", __func__, "kernel_"#name, (void *) ctx->pipeline_##name, \
(int) ctx->pipeline_##name.maxTotalThreadsPerThreadgroup, \ (int) ctx->pipeline_##name.maxTotalThreadsPerThreadgroup, \
(int) ctx->pipeline_##name.threadExecutionWidth); \ (int) ctx->pipeline_##name.threadExecutionWidth); \
if (error) { \ if (error) { \
@ -220,19 +220,19 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
#undef GGML_METAL_ADD_KERNEL #undef GGML_METAL_ADD_KERNEL
} }
fprintf(stderr, "%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0); metal_printf(stderr, "%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
fprintf(stderr, "%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false"); metal_printf(stderr, "%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
if (ctx->device.maxTransferRate != 0) { if (ctx->device.maxTransferRate != 0) {
fprintf(stderr, "%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0); metal_printf(stderr, "%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
} else { } else {
fprintf(stderr, "%s: maxTransferRate = built-in GPU\n", __func__); metal_printf(stderr, "%s: maxTransferRate = built-in GPU\n", __func__);
} }
return ctx; return ctx;
} }
void ggml_metal_free(struct ggml_metal_context * ctx) { void ggml_metal_free(struct ggml_metal_context * ctx) {
fprintf(stderr, "%s: deallocating\n", __func__); metal_printf(stderr, "%s: deallocating\n", __func__);
for (int i = 0; i < ctx->n_buffers; ++i) { for (int i = 0; i < ctx->n_buffers; ++i) {
[ctx->buffers[i].metal release]; [ctx->buffers[i].metal release];
} }
@ -271,7 +271,7 @@ int * ggml_metal_get_concur_list(struct ggml_metal_context * ctx) {
// Metal buffer based on the host memory pointer // Metal buffer based on the host memory pointer
// //
static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_metal_context * ctx, struct ggml_tensor * t, size_t * offs) { static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_metal_context * ctx, struct ggml_tensor * t, size_t * offs) {
//fprintf(stderr, "%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach); //metal_printf(stderr, "%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
const int64_t tsize = ggml_nbytes(t); const int64_t tsize = ggml_nbytes(t);
@ -282,7 +282,7 @@ static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_metal_context * ctx, stru
if (ioffs >= 0 && ioffs + tsize <= (int64_t) ctx->buffers[i].size) { if (ioffs >= 0 && ioffs + tsize <= (int64_t) ctx->buffers[i].size) {
*offs = (size_t) ioffs; *offs = (size_t) ioffs;
//fprintf(stderr, "%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs); //metal_printf(stderr, "%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs);
return ctx->buffers[i].metal; return ctx->buffers[i].metal;
} }
@ -335,7 +335,7 @@ bool ggml_metal_add_buffer(
return false; return false;
} }
fprintf(stderr, "%s: allocated '%-16s' buffer, size = %8.2f MB", __func__, name, size_aligned / 1024.0 / 1024.0); metal_printf(stderr, "%s: allocated '%-16s' buffer, size = %8.2f MB", __func__, name, size_aligned / 1024.0 / 1024.0);
++ctx->n_buffers; ++ctx->n_buffers;
} else { } else {
@ -359,23 +359,23 @@ bool ggml_metal_add_buffer(
return false; return false;
} }
fprintf(stderr, "%s: allocated '%-16s' buffer, size = %8.2f MB, offs = %12ld", __func__, name, size_step_aligned / 1024.0 / 1024.0, i); metal_printf(stderr, "%s: allocated '%-16s' buffer, size = %8.2f MB, offs = %12ld", __func__, name, size_step_aligned / 1024.0 / 1024.0, i);
if (i + size_step < size) { if (i + size_step < size) {
fprintf(stderr, "\n"); metal_printf(stderr, "\n");
} }
++ctx->n_buffers; ++ctx->n_buffers;
} }
} }
fprintf(stderr, ", (%8.2f / %8.2f)", metal_printf(stderr, ", (%8.2f / %8.2f)",
ctx->device.currentAllocatedSize / 1024.0 / 1024.0, ctx->device.currentAllocatedSize / 1024.0 / 1024.0,
ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0); ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
if (ctx->device.currentAllocatedSize > ctx->device.recommendedMaxWorkingSetSize) { if (ctx->device.currentAllocatedSize > ctx->device.recommendedMaxWorkingSetSize) {
fprintf(stderr, ", warning: current allocated size is greater than the recommended max working set size\n"); fprintf(stderr, ", warning: current allocated size is greater than the recommended max working set size\n");
} else { } else {
fprintf(stderr, "\n"); metal_printf(stderr, "\n");
} }
} }