From 8083e20d1948844cdf66abead57dd30a822afd04 Mon Sep 17 00:00:00 2001 From: KerfuffleV2 Date: Sun, 20 Aug 2023 11:23:13 -0600 Subject: [PATCH] More vocab conversion fixes --- convert-llama-ggmlv3-to-gguf.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/convert-llama-ggmlv3-to-gguf.py b/convert-llama-ggmlv3-to-gguf.py index 58fd3a09a..e8ecdf6da 100644 --- a/convert-llama-ggmlv3-to-gguf.py +++ b/convert-llama-ggmlv3-to-gguf.py @@ -181,15 +181,15 @@ class GGMLToGGUF: print(f'* Adding {hp.n_vocab} vocab item(s)') toktypes = [] for (tokid, (vbytes, vscore)) in enumerate(self.model.vocab.items): - tt = 1 - if len(vbytes) > 0 and vbytes[0] == 32: - vbytes = vbytes.replace(b' ', b'\xe2\x96\x81') - elif len(vbytes) == 0: - tt = 3 + tt = 1 # Normal + if len(vbytes) == 0: + tt = 3 # Control elif tokid >= 3 and tokid <= 258 and len(vbytes) == 1: hv = hex(vbytes[0])[2:].upper() vbytes = bytes(f'<0x{hv}>', encoding = 'UTF-8') - tt = 6 + tt = 6 # Byte + else: + vbytes = vbytes.replace(b' ', b'\xe2\x96\x81') toktypes.append(tt) tokens.append(vbytes) scores.append(vscore)