From 7f156a7223bf93144f3fa12e94ac8a1a138be782 Mon Sep 17 00:00:00 2001 From: Iaroslav Chelombitko <57654715+AragonerUA@users.noreply.github.com> Date: Sun, 23 Jun 2024 18:51:12 +0300 Subject: [PATCH] Resolving the problem in chktxt Resolving the problem of different chktxt's in convert-hf-to-gguf-update.py and convert-hf-to-gguf.py. --- convert-hf-to-gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 3107b69f7..65e4937f8 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -410,7 +410,7 @@ class Model: # we will use this unique identifier to write a "tokenizer.ggml.pre" entry in the GGUF file which we can # use in llama.cpp to implement the same pre-tokenizer - chktxt = '\n \n\n \n\n\n \t \t\t \t\n \n \n \n \n🚀 (normal) 😶\u200d🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български \'\'\'\'\'\'```````""""......!!!!!!?????? I\'ve been \'told he\'s there, \'RE you sure? \'M not sure I\'ll make it, \'D you like some tea? We\'Ve a\'lL' + chktxt = '\n \n\n \n\n\n \t \t\t \t\n \n \n \n \n🚀 (normal) 😶\u200d🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български \'\'\'\'\'\'```````""""......!!!!!!?????? I\'ve been \'told he\'s there, \'RE you sure? \'M not sure I\'ll make it, \'D you like some tea? We\'Ve a\'lL додаємо декілька Українських токенів' chktok = tokenizer.encode(chktxt) chkhsh = sha256(str(chktok).encode()).hexdigest()