Added support to select GPU using metal on Apple Intel or Apple Silicon using --main-gpu index
This commit is contained in:
parent
b72942fac9
commit
0d72b7562b
5 changed files with 72 additions and 20 deletions
|
@ -1116,7 +1116,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef GGML_USE_METAL
|
#ifdef GGML_USE_METAL
|
||||||
new_clip->backend = ggml_backend_metal_init();
|
new_clip->backend = ggml_backend_metal_init(0);
|
||||||
LOG_TEE("%s: CLIP using Metal backend\n", __func__);
|
LOG_TEE("%s: CLIP using Metal backend\n", __func__);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -75,9 +75,9 @@ static ggml_backend_t create_backend() {
|
||||||
}
|
}
|
||||||
#elif GGML_USE_METAL
|
#elif GGML_USE_METAL
|
||||||
fprintf(stderr, "%s: using Metal backend\n", __func__);
|
fprintf(stderr, "%s: using Metal backend\n", __func__);
|
||||||
backend = ggml_backend_metal_init();
|
backend = ggml_backend_metal_init(0);
|
||||||
if (!backend) {
|
if (!backend) {
|
||||||
fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__);
|
fprintf(stderr, "%s: ggml_backend_metal_init(0) failed\n", __func__);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -42,7 +42,7 @@ extern "C" {
|
||||||
|
|
||||||
GGML_API void ggml_backend_metal_log_set_callback(ggml_log_callback log_callback, void * user_data);
|
GGML_API void ggml_backend_metal_log_set_callback(ggml_log_callback log_callback, void * user_data);
|
||||||
|
|
||||||
GGML_API ggml_backend_t ggml_backend_metal_init(void);
|
GGML_API ggml_backend_t ggml_backend_metal_init(int deviceIndex);
|
||||||
|
|
||||||
GGML_API bool ggml_backend_is_metal(ggml_backend_t backend);
|
GGML_API bool ggml_backend_is_metal(ggml_backend_t backend);
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
#import <Foundation/Foundation.h>
|
#import <Foundation/Foundation.h>
|
||||||
|
|
||||||
#import <Metal/Metal.h>
|
#import <Metal/Metal.h>
|
||||||
|
#import <sys/sysctl.h>
|
||||||
|
|
||||||
#undef MIN
|
#undef MIN
|
||||||
#undef MAX
|
#undef MAX
|
||||||
|
@ -293,7 +294,7 @@ static void * ggml_metal_host_malloc(size_t n) {
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct ggml_backend_metal_context * ggml_metal_init(int n_cb) {
|
static struct ggml_backend_metal_context * ggml_metal_init( int deviceIndex, int n_cb) {
|
||||||
GGML_METAL_LOG_INFO("%s: allocating\n", __func__);
|
GGML_METAL_LOG_INFO("%s: allocating\n", __func__);
|
||||||
|
|
||||||
#if TARGET_OS_OSX && !GGML_METAL_NDEBUG
|
#if TARGET_OS_OSX && !GGML_METAL_NDEBUG
|
||||||
|
@ -305,10 +306,61 @@ static struct ggml_backend_metal_context * ggml_metal_init(int n_cb) {
|
||||||
[devices release]; // since it was created by a *Copy* C method
|
[devices release]; // since it was created by a *Copy* C method
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
size_t size_arm;
|
||||||
|
NSMutableString *logMessages = [NSMutableString string];
|
||||||
|
|
||||||
|
// Check for Apple Silicon (M1, M2, etc.)
|
||||||
|
if (sysctlbyname("hw.optional.arm64", NULL, &size_arm, NULL, 0) == 0 && size_arm == 4) {
|
||||||
|
int isAppleSilicon = 0;
|
||||||
|
sysctlbyname("hw.optional.arm64", &isAppleSilicon, &size_arm, NULL, 0);
|
||||||
|
if (isAppleSilicon) {
|
||||||
|
[logMessages appendString:@"This Mac is running on an Apple Silicon (M) Series processor."];
|
||||||
|
} else {
|
||||||
|
[logMessages appendString:@"This Mac is running on an Intel processor."];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
[logMessages appendString:@"This Mac is running on an Intel processor."];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
GGML_METAL_LOG_INFO("%s'%s'\n", __func__, [logMessages UTF8String]);
|
||||||
|
|
||||||
|
|
||||||
// Pick and show default Metal device
|
// Pick and show default Metal device
|
||||||
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
|
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
|
||||||
|
NSString *defaultDeviceName = device.name;
|
||||||
GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
|
GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
|
||||||
|
|
||||||
|
|
||||||
|
GGML_METAL_LOG_INFO("%s: Passed GPU at index %d:\n", __func__, deviceIndex);
|
||||||
|
|
||||||
|
|
||||||
|
NSArray<id<MTLDevice>> *alldevices = MTLCopyAllDevices();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// Check if passed device index is within range
|
||||||
|
if (deviceIndex<=(alldevices.count -1)) {
|
||||||
|
for (NSUInteger i = 0; i < alldevices.count; i++) {
|
||||||
|
id<MTLDevice> selectgpu = alldevices[i];
|
||||||
|
NSString *deviceName = selectgpu.name;
|
||||||
|
// NSLog(@"Device at index %lu: %@", (unsigned long)i, deviceName);
|
||||||
|
if (i == deviceIndex) {
|
||||||
|
if (![defaultDeviceName isEqualToString:deviceName]) {
|
||||||
|
device = selectgpu;
|
||||||
|
// NSLog(@"Device at index %lu: %@", (unsigned long)i, deviceName);
|
||||||
|
GGML_METAL_LOG_INFO("%s: Picking Index GPU Name: %s\n", __func__, [ deviceName UTF8String]);
|
||||||
|
}else{
|
||||||
|
[alldevices release];
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Configure context
|
// Configure context
|
||||||
struct ggml_backend_metal_context * ctx = malloc(sizeof(struct ggml_backend_metal_context));
|
struct ggml_backend_metal_context * ctx = malloc(sizeof(struct ggml_backend_metal_context));
|
||||||
ctx->device = device;
|
ctx->device = device;
|
||||||
|
@ -3238,8 +3290,8 @@ static ggml_guid_t ggml_backend_metal_guid(void) {
|
||||||
return &guid;
|
return &guid;
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_backend_t ggml_backend_metal_init(void) {
|
ggml_backend_t ggml_backend_metal_init(int deviceIndex) {
|
||||||
struct ggml_backend_metal_context * ctx = ggml_metal_init(GGML_DEFAULT_N_THREADS);
|
struct ggml_backend_metal_context * ctx = ggml_metal_init(deviceIndex,GGML_DEFAULT_N_THREADS);
|
||||||
if (ctx == NULL) {
|
if (ctx == NULL) {
|
||||||
GGML_METAL_LOG_ERROR("%s: error: failed to allocate context\n", __func__);
|
GGML_METAL_LOG_ERROR("%s: error: failed to allocate context\n", __func__);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -3295,8 +3347,8 @@ void ggml_backend_metal_capture_next_compute(ggml_backend_t backend) {
|
||||||
GGML_CALL ggml_backend_t ggml_backend_reg_metal_init(const char * params, void * user_data); // silence warning
|
GGML_CALL ggml_backend_t ggml_backend_reg_metal_init(const char * params, void * user_data); // silence warning
|
||||||
|
|
||||||
GGML_CALL ggml_backend_t ggml_backend_reg_metal_init(const char * params, void * user_data) {
|
GGML_CALL ggml_backend_t ggml_backend_reg_metal_init(const char * params, void * user_data) {
|
||||||
return ggml_backend_metal_init();
|
return ggml_backend_metal_init((int) (intptr_t) user_data);
|
||||||
|
|
||||||
GGML_UNUSED(params);
|
GGML_UNUSED(params);
|
||||||
GGML_UNUSED(user_data);
|
// GGML_UNUSED(user_data);
|
||||||
}
|
}
|
||||||
|
|
|
@ -16588,7 +16588,7 @@ struct llama_context * llama_new_context_with_model(
|
||||||
// initialize backends
|
// initialize backends
|
||||||
#if defined(GGML_USE_METAL)
|
#if defined(GGML_USE_METAL)
|
||||||
if (model->n_gpu_layers > 0) {
|
if (model->n_gpu_layers > 0) {
|
||||||
ctx->backend_metal = ggml_backend_metal_init();
|
ctx->backend_metal = ggml_backend_metal_init(model->main_gpu);
|
||||||
if (ctx->backend_metal == nullptr) {
|
if (ctx->backend_metal == nullptr) {
|
||||||
LLAMA_LOG_ERROR("%s: failed to initialize Metal backend\n", __func__);
|
LLAMA_LOG_ERROR("%s: failed to initialize Metal backend\n", __func__);
|
||||||
llama_free(ctx);
|
llama_free(ctx);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue