Ignore unusable json values
This commit is contained in:
parent
ca1fc20508
commit
41a2ed03e7
1 changed files with 7 additions and 6 deletions
13
convert.py
13
convert.py
|
@ -283,10 +283,12 @@ class SentencePieceVocab:
|
||||||
else:
|
else:
|
||||||
tokenizer_config = {}
|
tokenizer_config = {}
|
||||||
for key, value in tokenizer_config.items():
|
for key, value in tokenizer_config.items():
|
||||||
assert isinstance(value, dict) or isinstance(value, str)
|
if not isinstance(value, dict) or not isinstance(value, str):
|
||||||
if key not in TOKEN_NAME_TO_ID or TOKEN_NAME_TO_ID[key] == -1:
|
|
||||||
continue
|
continue
|
||||||
self.special_tokens_map[TOKEN_NAME_TO_ID[key]] = value["content"] if isinstance(value, dict) else value
|
token_id = TOKEN_NAME_TO_ID.get(key, -1)
|
||||||
|
if token_id == -1:
|
||||||
|
continue
|
||||||
|
self.special_tokens_map[token_id] = value["content"] if isinstance(value, dict) else value
|
||||||
|
|
||||||
special_tokens: Dict[str, Any]
|
special_tokens: Dict[str, Any]
|
||||||
if fname_special_tokens is not None:
|
if fname_special_tokens is not None:
|
||||||
|
@ -294,10 +296,9 @@ class SentencePieceVocab:
|
||||||
else:
|
else:
|
||||||
special_tokens = {}
|
special_tokens = {}
|
||||||
for key, value in special_tokens.items():
|
for key, value in special_tokens.items():
|
||||||
assert isinstance(value, dict) or isinstance(value, str)
|
if not isinstance(value, dict) or not isinstance(value, str):
|
||||||
if key not in TOKEN_NAME_TO_ID:
|
|
||||||
continue
|
continue
|
||||||
token_id = TOKEN_NAME_TO_ID[key]
|
token_id = TOKEN_NAME_TO_ID.get(key, -1)
|
||||||
if token_id == -1 or token_id in self.special_tokens_map:
|
if token_id == -1 or token_id in self.special_tokens_map:
|
||||||
continue
|
continue
|
||||||
self.special_tokens_map[token_id] = value["content"] if isinstance(value, dict) else value
|
self.special_tokens_map[token_id] = value["content"] if isinstance(value, dict) else value
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue