lint : fix
This commit is contained in:
parent
1b9b79dd14
commit
8791e94e3c
3 changed files with 4 additions and 2 deletions
2
.github/workflows/python-lint.yml
vendored
2
.github/workflows/python-lint.yml
vendored
|
@ -20,5 +20,5 @@ jobs:
|
||||||
- name: flake8 Lint
|
- name: flake8 Lint
|
||||||
uses: py-actions/flake8@v2
|
uses: py-actions/flake8@v2
|
||||||
with:
|
with:
|
||||||
ignore: "E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503"
|
ignore: "E203,E211,E221,E222,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503"
|
||||||
exclude: "examples/*,examples/*/**,*/**/__init__.py"
|
exclude: "examples/*,examples/*/**,*/**/__init__.py"
|
||||||
|
|
|
@ -406,7 +406,7 @@ class Model(ABC):
|
||||||
res = "deepseek-coder"
|
res = "deepseek-coder"
|
||||||
|
|
||||||
if res is None:
|
if res is None:
|
||||||
raise NotImplementedError(f"BPE pre-tokenizer was not recognized - update get_vocab_base_pre()")
|
raise NotImplementedError("BPE pre-tokenizer was not recognized - update get_vocab_base_pre()")
|
||||||
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
|
@ -12033,6 +12033,8 @@ struct llm_tokenizer_bpe {
|
||||||
word_collection = unicode_regex_split(text, {
|
word_collection = unicode_regex_split(text, {
|
||||||
// TODO: ??????????????
|
// TODO: ??????????????
|
||||||
//"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+
|
//"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+
|
||||||
|
|
||||||
|
// TODO: this is wrong - need to use ReFlex and update unicode.cpp to support the regex above
|
||||||
"\\p{P}+",
|
"\\p{P}+",
|
||||||
"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
|
"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
|
||||||
"\\p{N}+",
|
"\\p{N}+",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue