unicode : support \p{N}, \p{L} and \p{P} natively
This commit is contained in:
parent
ce5485aee0
commit
91eaa414bf
7 changed files with 94 additions and 26 deletions
|
@ -111,7 +111,7 @@ if fname_tok:
|
|||
# f.write(str(x) + ' \' ' + tokenizer.decode(x) + '\'\n')
|
||||
# else:
|
||||
# f.write(str(x) + ' \'' + tokenizer.decode(x) + '\'\n')
|
||||
f.write(str(x) + ' \'' + tokenizer.decode(x) + '\'\n')
|
||||
f.write(str(x) + ' \'' + tokenizer.decode(x).strip() + '\'\n')
|
||||
print('len(res): ', len(res))
|
||||
print('len(lines): ', len(lines))
|
||||
print('results written to: ', fname_out)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue