fix format
This commit is contained in:
parent
76f6831fce
commit
87fe183d4d
2 changed files with 31 additions and 33 deletions
|
@ -171,7 +171,7 @@ class Model:
|
||||||
if model_architecture in ("StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM"):
|
if model_architecture in ("StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM"):
|
||||||
return StableLMModel
|
return StableLMModel
|
||||||
return Model
|
return Model
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def from_model_name(model_name: str):
|
def from_model_name(model_name: str):
|
||||||
model_name_lower = model_name.lower()
|
model_name_lower = model_name.lower()
|
||||||
|
@ -864,7 +864,7 @@ class DeepseekCoderModel(Model):
|
||||||
|
|
||||||
def set_vocab(self):
|
def set_vocab(self):
|
||||||
self._set_vocab_gpt2("deepseek_coder")
|
self._set_vocab_gpt2("deepseek_coder")
|
||||||
|
|
||||||
|
|
||||||
class StableLMModel(Model):
|
class StableLMModel(Model):
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
|
|
|
@ -1,7 +1,5 @@
|
||||||
# tests with BPE tokenizer
|
# tests with BPE tokenizer
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
|
@ -16,35 +14,35 @@ dir_tokenizer = args.dir_tokenizer
|
||||||
tokenizer = AutoTokenizer.from_pretrained(dir_tokenizer)
|
tokenizer = AutoTokenizer.from_pretrained(dir_tokenizer)
|
||||||
|
|
||||||
tests = [
|
tests = [
|
||||||
"",
|
"",
|
||||||
" ",
|
" ",
|
||||||
" ",
|
" ",
|
||||||
" ",
|
" ",
|
||||||
"\t",
|
"\t",
|
||||||
"\n",
|
"\n",
|
||||||
"\t\n",
|
"\t\n",
|
||||||
"Hello world",
|
"Hello world",
|
||||||
" Hello world",
|
" Hello world",
|
||||||
"Hello World",
|
"Hello World",
|
||||||
" Hello World",
|
" Hello World",
|
||||||
" Hello World!",
|
" Hello World!",
|
||||||
"Hello, world!",
|
"Hello, world!",
|
||||||
" Hello, world!",
|
" Hello, world!",
|
||||||
" this is 🦙.cpp",
|
" this is 🦙.cpp",
|
||||||
"w048 7tuijk dsdfhu",
|
"w048 7tuijk dsdfhu",
|
||||||
"нещо на Български",
|
"нещо на Български",
|
||||||
"កាន់តែពិសេសអាចខលចេញ",
|
"កាន់តែពិសេសអាចខលចេញ",
|
||||||
"🚀 (normal) 😶🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)",
|
"🚀 (normal) 😶🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)",
|
||||||
"Hello",
|
"Hello",
|
||||||
" Hello",
|
" Hello",
|
||||||
" Hello",
|
" Hello",
|
||||||
" Hello",
|
" Hello",
|
||||||
" Hello",
|
" Hello",
|
||||||
" Hello\n Hello",
|
" Hello\n Hello",
|
||||||
"\n =",
|
"\n =",
|
||||||
"' era",
|
"' era",
|
||||||
"Hello, y'all! How are you 😁 ?我想在apple工作1314151天~",
|
"Hello, y'all! How are you 😁 ?我想在apple工作1314151天~",
|
||||||
]
|
]
|
||||||
|
|
||||||
for text in tests:
|
for text in tests:
|
||||||
print('text: ', text)
|
print('text: ', text)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue