llama : add support for SmolLm pre-tokenizer (#8609)

* Adding SmolLM Pre Tokenizer * Update convert_hf_to_gguf_update.py Co-authored-by: compilade <git@compilade.net> * Update src/llama.cpp Co-authored-by: compilade <git@compilade.net> * handle regex * removed .inp and out .out ggufs --------- Co-authored-by: compilade <git@compilade.net>
2024-07-22 10:43:01 -04:00 · 2024-07-22 10:43:01 -04:00 · d94c6e0ccb
commit d94c6e0ccb
parent 566daa5a5b
4 changed files with 10 additions and 0 deletions
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@ -597,6 +597,9 @@ class Model:
        if chkhsh == "63b97e4253352e6f357cc59ea5b583e3a680eaeaf2632188c2b952de2588485e":
            # ref: https://huggingface.co/mistralai/Mistral-Nemo-Base-2407
            res = "tekken"
+        if chkhsh == "855059429035d75a914d1eda9f10a876752e281a054a7a3d421ef0533e5b6249":
+            # ref: https://huggingface.co/HuggingFaceTB/SmolLM-135M
+            res = "smollm"

        if res is None:
            logger.warning("\n")