diff --git a/examples/server/tests/unit/test_tool_call.py b/examples/server/tests/unit/test_tool_call.py index b72d92cbd..f15d605b9 100644 --- a/examples/server/tests/unit/test_tool_call.py +++ b/examples/server/tests/unit/test_tool_call.py @@ -143,10 +143,10 @@ def test_completion_with_required_tool_tiny_slow(template_name: str, tool: dict, (PYTHON_TOOL, "code", "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None), (TEST_TOOL, "success", "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None), (PYTHON_TOOL, "code", "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None), - (TEST_TOOL, "success", "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")), - (PYTHON_TOOL, "code", "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")), - (TEST_TOOL, "success", "NousResearch/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")), - (PYTHON_TOOL, "code", "NousResearch/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")), + (TEST_TOOL, "success", "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")), + (PYTHON_TOOL, "code", "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")), + (TEST_TOOL, "success", "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")), + (PYTHON_TOOL, "code", "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")), (TEST_TOOL, "success", "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None), (PYTHON_TOOL, "code", "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None), (TEST_TOOL, "success", "bartowski/functionary-small-v3.2-GGUF:Q8_0", ("meetkai/functionary-medium-v3.2", None)), @@ -252,8 +252,8 @@ def test_completion_without_tool_call_slow(template_name: str, n_predict: int, t ("bartowski/gemma-2-2b-it-GGUF:Q4_K_M", None), ("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None), ("bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None), - ("NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")), - ("NousResearch/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")), + ("bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")), + ("bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")), ("bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None), ("bartowski/functionary-small-v3.2-GGUF:Q8_0", ("meetkai/functionary-medium-v3.2", None)), ("bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)), @@ -301,8 +301,8 @@ def test_weather_tool_call(hf_repo: str, template_override: Tuple[str, str | Non (None, "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama-Llama-3.2-3B-Instruct", None)), ('{"code":"print("}', "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", ("meta-llama-Llama-3.2-3B-Instruct", None)), (None, "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None), - (None, "NousResearch/Hermes-2-Pro-Llama-3-8B:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")), - (None, "NousResearch/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch-Hermes-3-Llama-3.1-8B", "tool_use")), + (None, "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")), + (None, "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch-Hermes-3-Llama-3.1-8B", "tool_use")), (None, "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None), # (None, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None), ]) diff --git a/scripts/fetch_server_test_models.py b/scripts/fetch_server_test_models.py index a0783ce3c..82cc2743b 100755 --- a/scripts/fetch_server_test_models.py +++ b/scripts/fetch_server_test_models.py @@ -16,12 +16,13 @@ import logging import os from typing import Generator from pydantic import BaseModel +from typing import * import subprocess class HuggingFaceModel(BaseModel): hf_repo: str - hf_file: str + hf_file: Optional[str] = None class Config: frozen = True @@ -40,7 +41,7 @@ def collect_hf_model_test_parameters(test_file) -> Generator[HuggingFaceModel, N for dec in node.decorator_list: if isinstance(dec, ast.Call) and isinstance(dec.func, ast.Attribute) and dec.func.attr == 'parametrize': param_names = ast.literal_eval(dec.args[0]).split(",") - if "hf_repo" not in param_names or "hf_file" not in param_names: + if "hf_repo" not in param_names: continue raw_param_values = dec.args[1] @@ -49,7 +50,7 @@ def collect_hf_model_test_parameters(test_file) -> Generator[HuggingFaceModel, N continue hf_repo_idx = param_names.index("hf_repo") - hf_file_idx = param_names.index("hf_file") + hf_file_idx = param_names.index("hf_file") if "hf_file" in param_names else None for t in raw_param_values.elts: if not isinstance(t, ast.Tuple): @@ -57,7 +58,7 @@ def collect_hf_model_test_parameters(test_file) -> Generator[HuggingFaceModel, N continue yield HuggingFaceModel( hf_repo=ast.literal_eval(t.elts[hf_repo_idx]), - hf_file=ast.literal_eval(t.elts[hf_file_idx])) + hf_file=ast.literal_eval(t.elts[hf_file_idx]) if hf_file_idx is not None else None) if __name__ == '__main__': @@ -80,14 +81,22 @@ if __name__ == '__main__': '../build/bin/Release/llama-cli.exe' if os.name == 'nt' else '../build/bin/llama-cli')) for m in models: - if '<' in m.hf_repo or '<' in m.hf_file: + if '<' in m.hf_repo or (m.hf_file is not None and '<' in m.hf_file): continue - if '-of-' in m.hf_file: + if m.hf_file is not None and '-of-' in m.hf_file: logging.warning(f'Skipping model at {m.hf_repo} / {m.hf_file} because it is a split file') continue logging.info(f'Using llama-cli to ensure model {m.hf_repo}/{m.hf_file} was fetched') - cmd = [cli_path, '-hfr', m.hf_repo, '-hff', m.hf_file, '-n', '1', '-p', 'Hey', '--no-warmup', '--log-disable', '-no-cnv'] - if m.hf_file != 'tinyllamas/stories260K.gguf' and not m.hf_file.startswith('Mistral-Nemo'): + cmd = [ + cli_path, + '-hfr', m.hf_repo, + *([] if m.hf_file is None else ['-hff', m.hf_file]), + '-n', '1', + '-p', 'Hey', + '--no-warmup', + '--log-disable', + '-no-cnv'] + if m.hf_file != 'tinyllamas/stories260K.gguf' and 'Mistral-Nemo' not in m.hf_repo: cmd.append('-fa') try: subprocess.check_call(cmd)