fix format

This commit is contained in:
Bingxuan Wang 2023-11-22 11:41:11 +08:00
parent 76f6831fce
commit 87fe183d4d
2 changed files with 31 additions and 33 deletions

View file

@ -171,7 +171,7 @@ class Model:
if model_architecture in ("StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM"): if model_architecture in ("StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM"):
return StableLMModel return StableLMModel
return Model return Model
@staticmethod @staticmethod
def from_model_name(model_name: str): def from_model_name(model_name: str):
model_name_lower = model_name.lower() model_name_lower = model_name.lower()
@ -864,7 +864,7 @@ class DeepseekCoderModel(Model):
def set_vocab(self): def set_vocab(self):
self._set_vocab_gpt2("deepseek_coder") self._set_vocab_gpt2("deepseek_coder")
class StableLMModel(Model): class StableLMModel(Model):
def set_gguf_parameters(self): def set_gguf_parameters(self):

View file

@ -1,7 +1,5 @@
# tests with BPE tokenizer # tests with BPE tokenizer
import os
import sys
import argparse import argparse
from transformers import AutoTokenizer from transformers import AutoTokenizer
@ -16,35 +14,35 @@ dir_tokenizer = args.dir_tokenizer
tokenizer = AutoTokenizer.from_pretrained(dir_tokenizer) tokenizer = AutoTokenizer.from_pretrained(dir_tokenizer)
tests = [ tests = [
"", "",
" ", " ",
" ", " ",
" ", " ",
"\t", "\t",
"\n", "\n",
"\t\n", "\t\n",
"Hello world", "Hello world",
" Hello world", " Hello world",
"Hello World", "Hello World",
" Hello World", " Hello World",
" Hello World!", " Hello World!",
"Hello, world!", "Hello, world!",
" Hello, world!", " Hello, world!",
" this is 🦙.cpp", " this is 🦙.cpp",
"w048 7tuijk dsdfhu", "w048 7tuijk dsdfhu",
"нещо на Български", "нещо на Български",
"កាន់តែពិសេសអាចខលចេញ", "កាន់តែពិសេសអាចខលចេញ",
"🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)", "🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)",
"Hello", "Hello",
" Hello", " Hello",
" Hello", " Hello",
" Hello", " Hello",
" Hello", " Hello",
" Hello\n Hello", " Hello\n Hello",
"\n =", "\n =",
"' era", "' era",
"Hello, y'all! How are you 😁 ?我想在apple工作1314151天", "Hello, y'all! How are you 😁 ?我想在apple工作1314151天",
] ]
for text in tests: for text in tests:
print('text: ', text) print('text: ', text)