From aefac1e5cbf6d9bd7a400ccc8396c845333bc7b0 Mon Sep 17 00:00:00 2001
From: ochafik <ochafik@google.com>
Date: Mon, 28 Oct 2024 23:57:23 +0000
Subject: [PATCH] `tool-call`: update scripts/fetch_server_test_models.py

---
 examples/server/tests/README.md     |  7 +++++++
 scripts/fetch_server_test_models.py | 19 +++++++++++++++----
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/examples/server/tests/README.md b/examples/server/tests/README.md
index 10f22c447..26dbf582c 100644
--- a/examples/server/tests/README.md
+++ b/examples/server/tests/README.md
@@ -62,3 +62,10 @@ After changing logic in `steps.py`, ensure that `@bug` and `@wrong_usage` scenar
 ```shell
 ./tests.sh --no-skipped --tags bug,wrong_usage || echo "should failed but compile"
 ```
+
+Some tests (especially `@slow` ones) require model downloads. Since this can time out the tests, you can pre-download them in the cache ahead of time with:
+
+```shell
+pip install -r examples/server/tests/requirements.txt
+python scripts/fetch_server_test_models.py
+```
diff --git a/scripts/fetch_server_test_models.py b/scripts/fetch_server_test_models.py
index 2686954aa..e7d1aa13b 100644
--- a/scripts/fetch_server_test_models.py
+++ b/scripts/fetch_server_test_models.py
@@ -9,12 +9,13 @@
         python scripts/fetch_server_test_models.py
         ( cd examples/server/tests && ./tests.sh --tags=slow )
 '''
-import os
 from behave.parser import Parser
 import glob
-import re
+import os
 from pydantic import BaseModel
+import re
 import subprocess
+import sys
 
 
 class HuggingFaceModel(BaseModel):
@@ -60,8 +61,18 @@ cli_path = os.environ.get(
         os.path.dirname(__file__),
         '../build/bin/Release/llama-cli.exe' if os.name == 'nt' else '../build/bin/llama-cli'))
 
-for m in models:
+for m in sorted(list(models), key=lambda m: m.hf_repo):
     if '<' in m.hf_repo or '<' in m.hf_file:
         continue
+    if '-of-' in m.hf_file:
+        print(f'# Skipping model at {m.hf_repo} / {m.hf_file} because it is a split file', file=sys.stderr)
+        continue
     print(f'# Ensuring model at {m.hf_repo} / {m.hf_file} is fetched')
-    subprocess.check_call([cli_path, '-hfr', m.hf_repo, '-hff', m.hf_file, '-fa', '-n', '1', '-p', 'Hey', '--no-warmup'])
+    cmd = [cli_path, '-hfr', m.hf_repo, '-hff', m.hf_file, '-n', '1', '-p', 'Hey', '--no-warmup', '--log-disable']
+    if m.hf_file != 'tinyllamas/stories260K.gguf':
+        cmd.append('-fa')
+    try:
+        subprocess.check_call(cmd)
+    except subprocess.CalledProcessError:
+        print(f'# Failed to fetch model at {m.hf_repo} / {m.hf_file} with command:\n  {" ".join(cmd)}', file=sys.stderr)
+        exit(1)