Fix vocab space conversion logic

This commit is contained in:
KerfuffleV2 2023-08-20 10:36:57 -06:00
parent f7e61fd1a9
commit 08959c88c2

View file

@ -182,7 +182,7 @@ class GGMLToGGUF:
toktypes = [] toktypes = []
for (tokid, (vbytes, vscore)) in enumerate(self.model.vocab.items): for (tokid, (vbytes, vscore)) in enumerate(self.model.vocab.items):
tt = 1 tt = 1
if len(vbytes) > 1 and vbytes[0] == 32: if len(vbytes) > 0 and vbytes[0] == 32:
vbytes = vbytes.replace(b' ', b'\xe2\x96\x81') vbytes = vbytes.replace(b' ', b'\xe2\x96\x81')
elif len(vbytes) == 0: elif len(vbytes) == 0:
tt = 3 tt = 3