metal : use metal_printf
for debug logging
This commit is contained in:
parent
c3e53b421a
commit
d30cb53c9d
1 changed files with 15 additions and 15 deletions
30
ggml-metal.m
30
ggml-metal.m
|
@ -107,7 +107,7 @@ static NSString * const msl_library_source = @"see metal.metal";
|
||||||
@end
|
@end
|
||||||
|
|
||||||
struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
||||||
fprintf(stderr, "%s: allocating\n", __func__);
|
metal_printf(stderr, "%s: allocating\n", __func__);
|
||||||
|
|
||||||
struct ggml_metal_context * ctx = malloc(sizeof(struct ggml_metal_context));
|
struct ggml_metal_context * ctx = malloc(sizeof(struct ggml_metal_context));
|
||||||
|
|
||||||
|
@ -139,7 +139,7 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
||||||
//NSString * path = [[NSBundle mainBundle] pathForResource:@"../../examples/metal/metal" ofType:@"metal"];
|
//NSString * path = [[NSBundle mainBundle] pathForResource:@"../../examples/metal/metal" ofType:@"metal"];
|
||||||
NSBundle * bundle = [NSBundle bundleForClass:[GGMLMetalClass class]];
|
NSBundle * bundle = [NSBundle bundleForClass:[GGMLMetalClass class]];
|
||||||
NSString * path = [bundle pathForResource:@"ggml-metal" ofType:@"metal"];
|
NSString * path = [bundle pathForResource:@"ggml-metal" ofType:@"metal"];
|
||||||
fprintf(stderr, "%s: loading '%s'\n", __func__, [path UTF8String]);
|
metal_printf(stderr, "%s: loading '%s'\n", __func__, [path UTF8String]);
|
||||||
|
|
||||||
NSString * src = [NSString stringWithContentsOfFile:path encoding:NSUTF8StringEncoding error:&error];
|
NSString * src = [NSString stringWithContentsOfFile:path encoding:NSUTF8StringEncoding error:&error];
|
||||||
if (error) {
|
if (error) {
|
||||||
|
@ -167,7 +167,7 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
||||||
#define GGML_METAL_ADD_KERNEL(name) \
|
#define GGML_METAL_ADD_KERNEL(name) \
|
||||||
ctx->function_##name = [ctx->library newFunctionWithName:@"kernel_"#name]; \
|
ctx->function_##name = [ctx->library newFunctionWithName:@"kernel_"#name]; \
|
||||||
ctx->pipeline_##name = [ctx->device newComputePipelineStateWithFunction:ctx->function_##name error:&error]; \
|
ctx->pipeline_##name = [ctx->device newComputePipelineStateWithFunction:ctx->function_##name error:&error]; \
|
||||||
fprintf(stderr, "%s: loaded %-32s %16p | th_max = %4d | th_width = %4d\n", __func__, "kernel_"#name, (void *) ctx->pipeline_##name, \
|
metal_printf(stderr, "%s: loaded %-32s %16p | th_max = %4d | th_width = %4d\n", __func__, "kernel_"#name, (void *) ctx->pipeline_##name, \
|
||||||
(int) ctx->pipeline_##name.maxTotalThreadsPerThreadgroup, \
|
(int) ctx->pipeline_##name.maxTotalThreadsPerThreadgroup, \
|
||||||
(int) ctx->pipeline_##name.threadExecutionWidth); \
|
(int) ctx->pipeline_##name.threadExecutionWidth); \
|
||||||
if (error) { \
|
if (error) { \
|
||||||
|
@ -220,19 +220,19 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
||||||
#undef GGML_METAL_ADD_KERNEL
|
#undef GGML_METAL_ADD_KERNEL
|
||||||
}
|
}
|
||||||
|
|
||||||
fprintf(stderr, "%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
|
metal_printf(stderr, "%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
|
||||||
fprintf(stderr, "%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
|
metal_printf(stderr, "%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
|
||||||
if (ctx->device.maxTransferRate != 0) {
|
if (ctx->device.maxTransferRate != 0) {
|
||||||
fprintf(stderr, "%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
|
metal_printf(stderr, "%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "%s: maxTransferRate = built-in GPU\n", __func__);
|
metal_printf(stderr, "%s: maxTransferRate = built-in GPU\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
return ctx;
|
return ctx;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_metal_free(struct ggml_metal_context * ctx) {
|
void ggml_metal_free(struct ggml_metal_context * ctx) {
|
||||||
fprintf(stderr, "%s: deallocating\n", __func__);
|
metal_printf(stderr, "%s: deallocating\n", __func__);
|
||||||
for (int i = 0; i < ctx->n_buffers; ++i) {
|
for (int i = 0; i < ctx->n_buffers; ++i) {
|
||||||
[ctx->buffers[i].metal release];
|
[ctx->buffers[i].metal release];
|
||||||
}
|
}
|
||||||
|
@ -271,7 +271,7 @@ int * ggml_metal_get_concur_list(struct ggml_metal_context * ctx) {
|
||||||
// Metal buffer based on the host memory pointer
|
// Metal buffer based on the host memory pointer
|
||||||
//
|
//
|
||||||
static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_metal_context * ctx, struct ggml_tensor * t, size_t * offs) {
|
static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_metal_context * ctx, struct ggml_tensor * t, size_t * offs) {
|
||||||
//fprintf(stderr, "%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
|
//metal_printf(stderr, "%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
|
||||||
|
|
||||||
const int64_t tsize = ggml_nbytes(t);
|
const int64_t tsize = ggml_nbytes(t);
|
||||||
|
|
||||||
|
@ -282,7 +282,7 @@ static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_metal_context * ctx, stru
|
||||||
if (ioffs >= 0 && ioffs + tsize <= (int64_t) ctx->buffers[i].size) {
|
if (ioffs >= 0 && ioffs + tsize <= (int64_t) ctx->buffers[i].size) {
|
||||||
*offs = (size_t) ioffs;
|
*offs = (size_t) ioffs;
|
||||||
|
|
||||||
//fprintf(stderr, "%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs);
|
//metal_printf(stderr, "%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs);
|
||||||
|
|
||||||
return ctx->buffers[i].metal;
|
return ctx->buffers[i].metal;
|
||||||
}
|
}
|
||||||
|
@ -335,7 +335,7 @@ bool ggml_metal_add_buffer(
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
fprintf(stderr, "%s: allocated '%-16s' buffer, size = %8.2f MB", __func__, name, size_aligned / 1024.0 / 1024.0);
|
metal_printf(stderr, "%s: allocated '%-16s' buffer, size = %8.2f MB", __func__, name, size_aligned / 1024.0 / 1024.0);
|
||||||
|
|
||||||
++ctx->n_buffers;
|
++ctx->n_buffers;
|
||||||
} else {
|
} else {
|
||||||
|
@ -359,23 +359,23 @@ bool ggml_metal_add_buffer(
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
fprintf(stderr, "%s: allocated '%-16s' buffer, size = %8.2f MB, offs = %12ld", __func__, name, size_step_aligned / 1024.0 / 1024.0, i);
|
metal_printf(stderr, "%s: allocated '%-16s' buffer, size = %8.2f MB, offs = %12ld", __func__, name, size_step_aligned / 1024.0 / 1024.0, i);
|
||||||
if (i + size_step < size) {
|
if (i + size_step < size) {
|
||||||
fprintf(stderr, "\n");
|
metal_printf(stderr, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
++ctx->n_buffers;
|
++ctx->n_buffers;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fprintf(stderr, ", (%8.2f / %8.2f)",
|
metal_printf(stderr, ", (%8.2f / %8.2f)",
|
||||||
ctx->device.currentAllocatedSize / 1024.0 / 1024.0,
|
ctx->device.currentAllocatedSize / 1024.0 / 1024.0,
|
||||||
ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
|
ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
|
||||||
|
|
||||||
if (ctx->device.currentAllocatedSize > ctx->device.recommendedMaxWorkingSetSize) {
|
if (ctx->device.currentAllocatedSize > ctx->device.recommendedMaxWorkingSetSize) {
|
||||||
fprintf(stderr, ", warning: current allocated size is greater than the recommended max working set size\n");
|
fprintf(stderr, ", warning: current allocated size is greater than the recommended max working set size\n");
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "\n");
|
metal_printf(stderr, "\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue