ggml : add RPC backend (#6829)
* ggml : add RPC backend The RPC backend proxies all operations to a remote server which runs a regular backend (CPU, CUDA, Metal, etc). * set TCP_NODELAY * add CI workflows * Address review comments * fix warning * implement llama_max_devices() for RPC * Address review comments * Address review comments * wrap sockfd into a struct * implement get_alignment and get_max_size * add get_device_memory * fix warning * win32 support * add README * readme : trim trailing whitespace * Address review comments * win32 fix * Address review comments * fix compile warnings on macos
This commit is contained in:
parent
541600201e
commit
5e31828d3e
12 changed files with 1395 additions and 98 deletions
|
@ -1060,6 +1060,14 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
|||
#endif // GGML_USE_CUDA_SYCL_VULKAN
|
||||
return true;
|
||||
}
|
||||
if (arg == "--rpc") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
return true;
|
||||
}
|
||||
params.rpc_servers = argv[i];
|
||||
return true;
|
||||
}
|
||||
if (arg == "--no-mmap") {
|
||||
params.use_mmap = false;
|
||||
return true;
|
||||
|
@ -1557,6 +1565,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
|||
printf(" -mg i, --main-gpu i the GPU to use for the model (with split-mode = none),\n");
|
||||
printf(" or for intermediate results and KV (with split-mode = row) (default: %d)\n", params.main_gpu);
|
||||
}
|
||||
printf(" --rpc SERVERS comma separated list of RPC servers\n");
|
||||
printf(" --verbose-prompt print a verbose prompt before generation (default: %s)\n", params.verbose_prompt ? "true" : "false");
|
||||
printf(" --no-display-prompt don't print prompt at generation (default: %s)\n", !params.display_prompt ? "true" : "false");
|
||||
printf(" -gan N, --grp-attn-n N\n");
|
||||
|
@ -1830,6 +1839,7 @@ struct llama_model_params llama_model_params_from_gpt_params(const gpt_params &
|
|||
if (params.n_gpu_layers != -1) {
|
||||
mparams.n_gpu_layers = params.n_gpu_layers;
|
||||
}
|
||||
mparams.rpc_servers = params.rpc_servers.c_str();
|
||||
mparams.main_gpu = params.main_gpu;
|
||||
mparams.split_mode = params.split_mode;
|
||||
mparams.tensor_split = params.tensor_split;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue