common: llama_load_model_from_url split support (#6192)
* llama: llama_split_prefix fix strncpy does not include string termination common: llama_load_model_from_url: - fix header name case sensitive - support downloading additional split in parallel - hide password in url * common: EOL EOF * common: remove redundant LLAMA_CURL_MAX_PATH_LENGTH definition * common: change max url max length * common: minor comment * server: support HF URL options * llama: llama_model_loader fix log * common: use a constant for max url length * common: clean up curl if file cannot be loaded in gguf * server: tests: add split tests, and HF options params * common: move llama_download_hide_password_in_url inside llama_download_file as a lambda * server: tests: enable back Release test on PR * spacing Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * spacing Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * spacing Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
1997577d5e
commit
f482bb2e49
10 changed files with 200 additions and 63 deletions
|
@ -16,7 +16,6 @@ import numpy as np
|
|||
import openai
|
||||
from behave import step
|
||||
from behave.api.async_step import async_run_until_complete
|
||||
from huggingface_hub import hf_hub_download
|
||||
from prometheus_client import parser
|
||||
|
||||
|
||||
|
@ -39,6 +38,8 @@ def step_server_config(context, server_fqdn, server_port):
|
|||
|
||||
context.model_alias = None
|
||||
context.model_file = None
|
||||
context.model_hf_repo = None
|
||||
context.model_hf_file = None
|
||||
context.model_url = None
|
||||
context.n_batch = None
|
||||
context.n_ubatch = None
|
||||
|
@ -68,9 +69,9 @@ def step_server_config(context, server_fqdn, server_port):
|
|||
|
||||
@step('a model file {hf_file} from HF repo {hf_repo}')
|
||||
def step_download_hf_model(context, hf_file, hf_repo):
|
||||
context.model_file = hf_hub_download(repo_id=hf_repo, filename=hf_file)
|
||||
if context.debug:
|
||||
print(f"model file: {context.model_file}")
|
||||
context.model_hf_repo = hf_repo
|
||||
context.model_hf_file = hf_file
|
||||
context.model_file = os.path.basename(hf_file)
|
||||
|
||||
|
||||
@step('a model file {model_file}')
|
||||
|
@ -1079,6 +1080,10 @@ def start_server_background(context):
|
|||
server_args.extend(['--model', context.model_file])
|
||||
if context.model_url:
|
||||
server_args.extend(['--model-url', context.model_url])
|
||||
if context.model_hf_repo:
|
||||
server_args.extend(['--hf-repo', context.model_hf_repo])
|
||||
if context.model_hf_file:
|
||||
server_args.extend(['--hf-file', context.model_hf_file])
|
||||
if context.n_batch:
|
||||
server_args.extend(['--batch-size', context.n_batch])
|
||||
if context.n_ubatch:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue