From 703764a382689bd905803cfd851ae7942360248b Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 2 Jul 2024 19:29:26 +0300 Subject: [PATCH] convert : use non-fast T5 tokenizer --- convert-hf-to-gguf-update.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/convert-hf-to-gguf-update.py b/convert-hf-to-gguf-update.py index 73cf4c4d6..a6b1b61b4 100755 --- a/convert-hf-to-gguf-update.py +++ b/convert-hf-to-gguf-update.py @@ -147,7 +147,10 @@ for model in models: # create the tokenizer try: - tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}") + if name == "t5": + tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}", use_fast=False) + else: + tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}") except OSError as e: logger.error(f"Error loading tokenizer for model {name}. The model may not exist or is not accessible with the provided token. Error: {e}") continue # Skip to the next model if the tokenizer can't be loaded @@ -306,7 +309,10 @@ for model in models: # create the tokenizer try: - tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}") + if name == "t5": + tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}", use_fast=False) + else: + tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}") except OSError as e: logger.error(f"Failed to load tokenizer for model {name}. Error: {e}") continue # Skip this model and continue with the next one in the loop