ggml : add RPC backend (#6829)
* ggml : add RPC backend The RPC backend proxies all operations to a remote server which runs a regular backend (CPU, CUDA, Metal, etc). * set TCP_NODELAY * add CI workflows * Address review comments * fix warning * implement llama_max_devices() for RPC * Address review comments * Address review comments * wrap sockfd into a struct * implement get_alignment and get_max_size * add get_device_memory * fix warning * win32 support * add README * readme : trim trailing whitespace * Address review comments * win32 fix * Address review comments * fix compile warnings on macos
This commit is contained in:
parent
541600201e
commit
5e31828d3e
12 changed files with 1395 additions and 98 deletions
|
@ -82,6 +82,7 @@ struct gpt_params {
|
|||
float yarn_beta_slow = 1.0f; // YaRN high correction dim
|
||||
int32_t yarn_orig_ctx = 0; // YaRN original context length
|
||||
float defrag_thold = -1.0f; // KV cache defragmentation threshold
|
||||
std::string rpc_servers = ""; // comma separated list of RPC servers
|
||||
|
||||
ggml_backend_sched_eval_callback cb_eval = nullptr;
|
||||
void * cb_eval_user_data = nullptr;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue