Tool call support (generic + native for Llama, Functionary, Hermes, Mistral, Firefunction, DeepSeek) w/ lazy grammars (#9639)

---------

Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Co-authored-by: Xuan Son Nguyen <son@huggingface.co>
This commit is contained in:
Olivier Chafik 2025-01-30 19:13:58 +00:00 committed by GitHub
parent 27d135c970
commit 8b576b6c55
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
48 changed files with 3861 additions and 156 deletions

View file

@ -26,7 +26,7 @@ from re import RegexFlag
import wget
DEFAULT_HTTP_TIMEOUT = 10 if "LLAMA_SANITIZE" not in os.environ else 30
DEFAULT_HTTP_TIMEOUT = 12 if "LLAMA_SANITIZE" not in os.environ else 30
class ServerResponse:
@ -41,7 +41,7 @@ class ServerProcess:
server_port: int = 8080
server_host: str = "127.0.0.1"
model_hf_repo: str = "ggml-org/models"
model_hf_file: str = "tinyllamas/stories260K.gguf"
model_hf_file: str | None = "tinyllamas/stories260K.gguf"
model_alias: str = "tinyllama-2"
temperature: float = 0.8
seed: int = 42
@ -191,7 +191,7 @@ class ServerProcess:
creationflags=flags,
stdout=sys.stdout,
stderr=sys.stdout,
env={**os.environ, "LLAMA_CACHE": "tmp"},
env={**os.environ, "LLAMA_CACHE": "tmp"} if "LLAMA_CACHE" not in os.environ else None,
)
server_instances.add(self)