fix func call tokens for internlm2
This commit is contained in:
parent
7acfd4e8d5
commit
7b575e70f5
1 changed files with 3 additions and 3 deletions
|
@ -2213,7 +2213,7 @@ class InternLM2Model(Model):
|
||||||
|
|
||||||
chat_eos_token = '<|im_end|>'
|
chat_eos_token = '<|im_end|>'
|
||||||
chat_eos_token_id = None
|
chat_eos_token_id = None
|
||||||
|
func_call_tokens =('<|plugin|>', '<|interpreter|>', '<|action_end|>', '<|action_start|>')
|
||||||
tokenizer_config_file = self.dir_model / 'tokenizer_config.json'
|
tokenizer_config_file = self.dir_model / 'tokenizer_config.json'
|
||||||
if tokenizer_config_file.is_file():
|
if tokenizer_config_file.is_file():
|
||||||
with open(tokenizer_config_file, "r", encoding="utf-8") as f:
|
with open(tokenizer_config_file, "r", encoding="utf-8") as f:
|
||||||
|
@ -2230,7 +2230,7 @@ class InternLM2Model(Model):
|
||||||
tokens[token_id] = token
|
tokens[token_id] = token
|
||||||
scores[token_id] = -1000.0
|
scores[token_id] = -1000.0
|
||||||
toktypes[token_id] = SentencePieceTokenTypes.USER_DEFINED
|
toktypes[token_id] = SentencePieceTokenTypes.USER_DEFINED
|
||||||
if foken_data.get("special"):
|
if foken_data.get("special") and not foken_data["content"] in func_call_tokens:
|
||||||
toktypes[token_id] = SentencePieceTokenTypes.CONTROL
|
toktypes[token_id] = SentencePieceTokenTypes.CONTROL
|
||||||
|
|
||||||
tokenizer_file = self.dir_model / 'tokenizer.json'
|
tokenizer_file = self.dir_model / 'tokenizer.json'
|
||||||
|
@ -2249,7 +2249,7 @@ class InternLM2Model(Model):
|
||||||
tokens[token_id] = token
|
tokens[token_id] = token
|
||||||
scores[token_id] = -1000.0
|
scores[token_id] = -1000.0
|
||||||
toktypes[token_id] = SentencePieceTokenTypes.USER_DEFINED
|
toktypes[token_id] = SentencePieceTokenTypes.USER_DEFINED
|
||||||
if foken_data.get("special"):
|
if foken_data.get("special") and not foken_data["content"] in func_call_tokens:
|
||||||
toktypes[token_id] = SentencePieceTokenTypes.CONTROL
|
toktypes[token_id] = SentencePieceTokenTypes.CONTROL
|
||||||
|
|
||||||
self.gguf_writer.add_tokenizer_model("llama")
|
self.gguf_writer.add_tokenizer_model("llama")
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue