From 654c1cc2796a1c6181e37e69058747df04781d7b Mon Sep 17 00:00:00 2001 From: Radoslav Gerganov Date: Tue, 30 Apr 2024 14:34:09 +0300 Subject: [PATCH] implement llama_max_devices() for RPC --- ggml-rpc.h | 2 ++ llama.cpp | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/ggml-rpc.h b/ggml-rpc.h index 2c26c82bc..6c1f6d091 100644 --- a/ggml-rpc.h +++ b/ggml-rpc.h @@ -37,6 +37,8 @@ enum rpc_cmd { GRAPH_COMPUTE, }; +#define GGML_RPC_MAX_SERVERS 16 + // backend API GGML_API GGML_CALL ggml_backend_t ggml_backend_rpc_init(const std::string & endpoint); GGML_API GGML_CALL bool ggml_backend_is_rpc(ggml_backend_t backend); diff --git a/llama.cpp b/llama.cpp index 65c494b22..cf60f5ad6 100644 --- a/llama.cpp +++ b/llama.cpp @@ -15480,7 +15480,9 @@ struct llama_model_quantize_params llama_model_quantize_default_params() { } size_t llama_max_devices(void) { -#if defined(GGML_USE_METAL) +#if defined(GGML_USE_RPC) + return GGML_RPC_MAX_SERVERS; +#elif defined(GGML_USE_METAL) return 1; #elif defined(GGML_USE_CUDA) return GGML_CUDA_MAX_DEVICES;