fix format

This commit is contained in:
Bingxuan Wang 2023-11-22 11:41:11 +08:00
parent 76f6831fce
commit 87fe183d4d
2 changed files with 31 additions and 33 deletions

View file

@ -171,7 +171,7 @@ class Model:
if model_architecture in ("StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM"):
return StableLMModel
return Model
@staticmethod
def from_model_name(model_name: str):
model_name_lower = model_name.lower()
@ -864,7 +864,7 @@ class DeepseekCoderModel(Model):
def set_vocab(self):
self._set_vocab_gpt2("deepseek_coder")
class StableLMModel(Model):
def set_gguf_parameters(self):

View file

@ -1,7 +1,5 @@
# tests with BPE tokenizer
import os
import sys
import argparse
from transformers import AutoTokenizer
@ -16,35 +14,35 @@ dir_tokenizer = args.dir_tokenizer
tokenizer = AutoTokenizer.from_pretrained(dir_tokenizer)
tests = [
"",
" ",
" ",
" ",
"\t",
"\n",
"\t\n",
"Hello world",
" Hello world",
"Hello World",
" Hello World",
" Hello World!",
"Hello, world!",
" Hello, world!",
" this is 🦙.cpp",
"w048 7tuijk dsdfhu",
"нещо на Български",
"កាន់តែពិសេសអាចខលចេញ",
"🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)",
"Hello",
" Hello",
" Hello",
" Hello",
" Hello",
" Hello\n Hello",
"\n =",
"' era",
"Hello, y'all! How are you 😁 ?我想在apple工作1314151天",
]
"",
" ",
" ",
" ",
"\t",
"\n",
"\t\n",
"Hello world",
" Hello world",
"Hello World",
" Hello World",
" Hello World!",
"Hello, world!",
" Hello, world!",
" this is 🦙.cpp",
"w048 7tuijk dsdfhu",
"нещо на Български",
"កាន់តែពិសេសអាចខលចេញ",
"🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)",
"Hello",
" Hello",
" Hello",
" Hello",
" Hello",
" Hello\n Hello",
"\n =",
"' era",
"Hello, y'all! How are you 😁 ?我想在apple工作1314151天",
]
for text in tests:
print('text: ', text)