convert : add BPE pre-tokenization for DBRX (#7132)

* Add BPE pre-tokenization for DBRX.

* Add vocab GGUFs.

* Remove test.

* Remove GGUFs.
This commit is contained in:
DAN™ 2024-05-08 06:43:23 -04:00 committed by GitHub
parent 7e0b6a7b3b
commit 4cd621c26d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 9 additions and 0 deletions

View file

@ -317,6 +317,9 @@ class Model(ABC):
if chkhsh == "b6dc8df998e1cfbdc4eac8243701a65afe638679230920b50d6f17d81c098166":
# ref: https://huggingface.co/allenai/OLMo-1.7-7B-hf
res = "olmo"
if chkhsh == "a8594e3edff7c29c003940395316294b2c623e09894deebbc65f33f1515df79e":
# ref: https://huggingface.co/databricks/dbrx-instruct
res = "dbrx"
if res is None:
logger.warning("\n")