common: llama_load_model_from_url split support (#6192)
* llama: llama_split_prefix fix strncpy does not include string termination common: llama_load_model_from_url: - fix header name case sensitive - support downloading additional split in parallel - hide password in url * common: EOL EOF * common: remove redundant LLAMA_CURL_MAX_PATH_LENGTH definition * common: change max url max length * common: minor comment * server: support HF URL options * llama: llama_model_loader fix log * common: use a constant for max url length * common: clean up curl if file cannot be loaded in gguf * server: tests: add split tests, and HF options params * common: move llama_download_hide_password_in_url inside llama_download_file as a lambda * server: tests: enable back Release test on PR * spacing Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * spacing Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * spacing Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
1997577d5e
commit
f482bb2e49
10 changed files with 200 additions and 63 deletions
|
@ -306,3 +306,10 @@ struct llama_control_vector_load_info {
|
|||
// Load control vectors, scale each by strength, and add them together.
|
||||
// On error, returns {-1, empty}
|
||||
llama_control_vector_data llama_control_vector_load(const std::vector<llama_control_vector_load_info> & load_infos);
|
||||
|
||||
//
|
||||
// Split utils
|
||||
//
|
||||
static const char * const LLM_KV_SPLIT_NO = "split.no";
|
||||
static const char * const LLM_KV_SPLIT_COUNT = "split.count";
|
||||
static const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue