convert_hf : reduce usages of UNKNOWN for InternLM2
This makes the changes from #8321 more consistent with the other changes made here.
This commit is contained in:
parent
afa6119850
commit
1caa20fc7a
1 changed files with 3 additions and 3 deletions
|
@ -2189,7 +2189,7 @@ class InternLM2Model(Model):
|
||||||
toktype = SentencePieceTokenTypes.BYTE
|
toktype = SentencePieceTokenTypes.BYTE
|
||||||
# take care of ununsed raw token
|
# take care of ununsed raw token
|
||||||
if piece.startswith('[UNUSED'):
|
if piece.startswith('[UNUSED'):
|
||||||
toktype = SentencePieceTokenTypes.UNKNOWN
|
toktype = SentencePieceTokenTypes.UNUSED
|
||||||
|
|
||||||
tokens.append(text)
|
tokens.append(text)
|
||||||
scores.append(score)
|
scores.append(score)
|
||||||
|
@ -2219,7 +2219,7 @@ class InternLM2Model(Model):
|
||||||
if token == chat_eos_token:
|
if token == chat_eos_token:
|
||||||
chat_eos_token_id = token_id
|
chat_eos_token_id = token_id
|
||||||
token = token.encode("utf-8")
|
token = token.encode("utf-8")
|
||||||
if toktypes[token_id] != SentencePieceTokenTypes.UNKNOWN:
|
if toktypes[token_id] != SentencePieceTokenTypes.UNUSED:
|
||||||
assert(tokens[token_id] == token)
|
assert(tokens[token_id] == token)
|
||||||
tokens[token_id] = token
|
tokens[token_id] = token
|
||||||
scores[token_id] = -1000.0
|
scores[token_id] = -1000.0
|
||||||
|
@ -2238,7 +2238,7 @@ class InternLM2Model(Model):
|
||||||
if token == chat_eos_token:
|
if token == chat_eos_token:
|
||||||
chat_eos_token_id = token_id
|
chat_eos_token_id = token_id
|
||||||
token = token.encode("utf-8")
|
token = token.encode("utf-8")
|
||||||
if toktypes[token_id] != SentencePieceTokenTypes.UNKNOWN:
|
if toktypes[token_id] != SentencePieceTokenTypes.UNUSED:
|
||||||
assert(tokens[token_id] == token)
|
assert(tokens[token_id] == token)
|
||||||
tokens[token_id] = token
|
tokens[token_id] = token
|
||||||
scores[token_id] = -1000.0
|
scores[token_id] = -1000.0
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue