Update scripts/fetch_server_test_models.py to new compact hf_repo syntax + switch Hermes models
This commit is contained in:
parent
0c171f5463
commit
9685043274
2 changed files with 25 additions and 16 deletions
|
@ -143,10 +143,10 @@ def test_completion_with_required_tool_tiny_slow(template_name: str, tool: dict,
|
||||||
(PYTHON_TOOL, "code", "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
|
(PYTHON_TOOL, "code", "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
|
||||||
(TEST_TOOL, "success", "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None),
|
(TEST_TOOL, "success", "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None),
|
||||||
(PYTHON_TOOL, "code", "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None),
|
(PYTHON_TOOL, "code", "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None),
|
||||||
(TEST_TOOL, "success", "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
|
(TEST_TOOL, "success", "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
|
||||||
(PYTHON_TOOL, "code", "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
|
(PYTHON_TOOL, "code", "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
|
||||||
(TEST_TOOL, "success", "NousResearch/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")),
|
(TEST_TOOL, "success", "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")),
|
||||||
(PYTHON_TOOL, "code", "NousResearch/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")),
|
(PYTHON_TOOL, "code", "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")),
|
||||||
(TEST_TOOL, "success", "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None),
|
(TEST_TOOL, "success", "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None),
|
||||||
(PYTHON_TOOL, "code", "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None),
|
(PYTHON_TOOL, "code", "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None),
|
||||||
(TEST_TOOL, "success", "bartowski/functionary-small-v3.2-GGUF:Q8_0", ("meetkai/functionary-medium-v3.2", None)),
|
(TEST_TOOL, "success", "bartowski/functionary-small-v3.2-GGUF:Q8_0", ("meetkai/functionary-medium-v3.2", None)),
|
||||||
|
@ -252,8 +252,8 @@ def test_completion_without_tool_call_slow(template_name: str, n_predict: int, t
|
||||||
("bartowski/gemma-2-2b-it-GGUF:Q4_K_M", None),
|
("bartowski/gemma-2-2b-it-GGUF:Q4_K_M", None),
|
||||||
("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
|
("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
|
||||||
("bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None),
|
("bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None),
|
||||||
("NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
|
("bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
|
||||||
("NousResearch/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")),
|
("bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")),
|
||||||
("bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None),
|
("bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None),
|
||||||
("bartowski/functionary-small-v3.2-GGUF:Q8_0", ("meetkai/functionary-medium-v3.2", None)),
|
("bartowski/functionary-small-v3.2-GGUF:Q8_0", ("meetkai/functionary-medium-v3.2", None)),
|
||||||
("bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)),
|
("bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)),
|
||||||
|
@ -301,8 +301,8 @@ def test_weather_tool_call(hf_repo: str, template_override: Tuple[str, str | Non
|
||||||
(None, "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama-Llama-3.2-3B-Instruct", None)),
|
(None, "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama-Llama-3.2-3B-Instruct", None)),
|
||||||
('{"code":"print("}', "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", ("meta-llama-Llama-3.2-3B-Instruct", None)),
|
('{"code":"print("}', "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", ("meta-llama-Llama-3.2-3B-Instruct", None)),
|
||||||
(None, "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None),
|
(None, "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None),
|
||||||
(None, "NousResearch/Hermes-2-Pro-Llama-3-8B:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
|
(None, "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
|
||||||
(None, "NousResearch/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch-Hermes-3-Llama-3.1-8B", "tool_use")),
|
(None, "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch-Hermes-3-Llama-3.1-8B", "tool_use")),
|
||||||
(None, "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None),
|
(None, "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None),
|
||||||
# (None, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
|
# (None, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
|
||||||
])
|
])
|
||||||
|
|
|
@ -16,12 +16,13 @@ import logging
|
||||||
import os
|
import os
|
||||||
from typing import Generator
|
from typing import Generator
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
from typing import *
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
|
|
||||||
class HuggingFaceModel(BaseModel):
|
class HuggingFaceModel(BaseModel):
|
||||||
hf_repo: str
|
hf_repo: str
|
||||||
hf_file: str
|
hf_file: Optional[str] = None
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
frozen = True
|
frozen = True
|
||||||
|
@ -40,7 +41,7 @@ def collect_hf_model_test_parameters(test_file) -> Generator[HuggingFaceModel, N
|
||||||
for dec in node.decorator_list:
|
for dec in node.decorator_list:
|
||||||
if isinstance(dec, ast.Call) and isinstance(dec.func, ast.Attribute) and dec.func.attr == 'parametrize':
|
if isinstance(dec, ast.Call) and isinstance(dec.func, ast.Attribute) and dec.func.attr == 'parametrize':
|
||||||
param_names = ast.literal_eval(dec.args[0]).split(",")
|
param_names = ast.literal_eval(dec.args[0]).split(",")
|
||||||
if "hf_repo" not in param_names or "hf_file" not in param_names:
|
if "hf_repo" not in param_names:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
raw_param_values = dec.args[1]
|
raw_param_values = dec.args[1]
|
||||||
|
@ -49,7 +50,7 @@ def collect_hf_model_test_parameters(test_file) -> Generator[HuggingFaceModel, N
|
||||||
continue
|
continue
|
||||||
|
|
||||||
hf_repo_idx = param_names.index("hf_repo")
|
hf_repo_idx = param_names.index("hf_repo")
|
||||||
hf_file_idx = param_names.index("hf_file")
|
hf_file_idx = param_names.index("hf_file") if "hf_file" in param_names else None
|
||||||
|
|
||||||
for t in raw_param_values.elts:
|
for t in raw_param_values.elts:
|
||||||
if not isinstance(t, ast.Tuple):
|
if not isinstance(t, ast.Tuple):
|
||||||
|
@ -57,7 +58,7 @@ def collect_hf_model_test_parameters(test_file) -> Generator[HuggingFaceModel, N
|
||||||
continue
|
continue
|
||||||
yield HuggingFaceModel(
|
yield HuggingFaceModel(
|
||||||
hf_repo=ast.literal_eval(t.elts[hf_repo_idx]),
|
hf_repo=ast.literal_eval(t.elts[hf_repo_idx]),
|
||||||
hf_file=ast.literal_eval(t.elts[hf_file_idx]))
|
hf_file=ast.literal_eval(t.elts[hf_file_idx]) if hf_file_idx is not None else None)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -80,14 +81,22 @@ if __name__ == '__main__':
|
||||||
'../build/bin/Release/llama-cli.exe' if os.name == 'nt' else '../build/bin/llama-cli'))
|
'../build/bin/Release/llama-cli.exe' if os.name == 'nt' else '../build/bin/llama-cli'))
|
||||||
|
|
||||||
for m in models:
|
for m in models:
|
||||||
if '<' in m.hf_repo or '<' in m.hf_file:
|
if '<' in m.hf_repo or (m.hf_file is not None and '<' in m.hf_file):
|
||||||
continue
|
continue
|
||||||
if '-of-' in m.hf_file:
|
if m.hf_file is not None and '-of-' in m.hf_file:
|
||||||
logging.warning(f'Skipping model at {m.hf_repo} / {m.hf_file} because it is a split file')
|
logging.warning(f'Skipping model at {m.hf_repo} / {m.hf_file} because it is a split file')
|
||||||
continue
|
continue
|
||||||
logging.info(f'Using llama-cli to ensure model {m.hf_repo}/{m.hf_file} was fetched')
|
logging.info(f'Using llama-cli to ensure model {m.hf_repo}/{m.hf_file} was fetched')
|
||||||
cmd = [cli_path, '-hfr', m.hf_repo, '-hff', m.hf_file, '-n', '1', '-p', 'Hey', '--no-warmup', '--log-disable', '-no-cnv']
|
cmd = [
|
||||||
if m.hf_file != 'tinyllamas/stories260K.gguf' and not m.hf_file.startswith('Mistral-Nemo'):
|
cli_path,
|
||||||
|
'-hfr', m.hf_repo,
|
||||||
|
*([] if m.hf_file is None else ['-hff', m.hf_file]),
|
||||||
|
'-n', '1',
|
||||||
|
'-p', 'Hey',
|
||||||
|
'--no-warmup',
|
||||||
|
'--log-disable',
|
||||||
|
'-no-cnv']
|
||||||
|
if m.hf_file != 'tinyllamas/stories260K.gguf' and 'Mistral-Nemo' not in m.hf_repo:
|
||||||
cmd.append('-fa')
|
cmd.append('-fa')
|
||||||
try:
|
try:
|
||||||
subprocess.check_call(cmd)
|
subprocess.check_call(cmd)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue