convert : add comments

This commit is contained in:
Georgi Gerganov 2024-04-28 22:10:04 +03:00
parent 02fd977fe1
commit 0f9058ceec
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
2 changed files with 12 additions and 0 deletions

View file

@ -1,3 +1,12 @@
# This script downloads the tokenizer models of the specified models from Huggingface and
# generates the get_vocab_base_pre() function for convert-hf-to-gguf.py
#
# This is necessary in order to analyze the type of pre-tokenizer used by the model and
# provide the necessary information to llama.cpp via the GGUF header in order to implement
# the same pre-tokenizer.
#
# ref: https://github.com/ggerganov/llama.cpp/pull/6920
#
# Instructions:
#
# - Add a new model to the "models" list
@ -9,6 +18,7 @@
# - Update llama.cpp with the new pre-tokenizer if necessary
#
# TODO: generate tokenizer tests for llama.cpp
# TODO: automate the update of convert-hf-to-gguf.py
#
import os
@ -138,6 +148,7 @@ src_func += "\n"
src_func += " res = None\n"
src_func += "\n"
src_func += " # NOTE: if you get an error here, you need to add the model to the if-elif chain below\n"
src_func += " # don't do this manually - use the convert-hf-to-gguf-update.py script!\n"
src_func += f"{src_ifs}\n"
src_func += " if res is None:\n"
src_func += " print( \"\\n\")\n"

View file

@ -282,6 +282,7 @@ class Model(ABC):
res = None
# NOTE: if you get an error here, you need to add the model to the if-elif chain below
# don't do this manually - use the convert-hf-to-gguf-update.py script!
if chkhsh == "0fc850edd52197e357970116fbf58f6c2567f259cdc1bfc3df081d7e4bc658c1":
# ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B
res = "llama-v3"