diff --git a/Makefile-finetune.mak b/Makefile-finetune.mak index 8639d493b..a000b918b 100644 --- a/Makefile-finetune.mak +++ b/Makefile-finetune.mak @@ -152,4 +152,34 @@ client-test: curl --request POST \ --url http://localhost:8080/completion \ --header "Content-Type: application/json" \ - --data "${CURL_DATA}" \ No newline at end of file + --data "${CURL_DATA}" + + +bench13b: + ./main -m ../models/ggmls/openbuddy-llama2-13b-v11.1.ggmlv3.Q3_K_S.gguf -n 256 -p '${PROMPT}' -t 2 -ngl 10 + sleep 50 + ./main -m ../models/ggmls/chinese-llama-2-13b-16k.Q3_K_S.gguf -n 256 -p '${PROMPT}' -t 2 -ngl 10 + sleep 50 + ./main -m ../models/ggmls/bc2-13b-chat-q2_k.gguf -n 256 -p '${PROMPT}' -t 2 -ngl 10 + sleep 50 + ./main -m ../models/ggmls/openbuddy-mistral-7b-v13.1-q4_0.gguf -n 256 -p '${PROMPT}' -t 2 -ngl 10 + sleep 50 + ./main -m ../models/ggmls/openbuddy-zephyr-7b-v14.1-q5_k_s.gguf -n 256 -p '${PROMPT}' -t 2 -ngl 10 + sleep 50 + +bench-lj: + make bench13b PROMPT="小丽有3个兄弟, 他们各有2个姐妹, 问小丽有几个姐妹" + +bench-sql: + make bench13b PROMPT="展示上个季度所有销售额超过 10000 美元的订单,写出SQL" + +bench-gpt: + make bench13b PROMPT='写一首藏头诗五言绝句,每句诗的开头字母分别是"莫""勇"二字:' + +bench-all: bench-lj bench-sql bench-gpt + + ./main -m ../models/ggmls/openbuddy-mistral-7b-v13.1-q4_0.gguf -n 256 -r "User:" -f prompts/test.txt -t 2 -ngl 10 + + + + diff --git a/convert-gptneox-hf-to-gguf.py b/convert-gptneox-hf-to-gguf.py index 02d1fdf16..b767b6164 100755 --- a/convert-gptneox-hf-to-gguf.py +++ b/convert-gptneox-hf-to-gguf.py @@ -77,6 +77,7 @@ print("gguf: loading model "+dir_model.name) with open(dir_model / "config.json", "r", encoding="utf-8") as f: hparams = json.load(f) + if hparams["architectures"][0] != "GPTNeoXForCausalLM": print("Model architecture not supported: " + hparams["architectures"][0]) diff --git a/download-TabbyML-Mistral-7B.py b/download-TabbyML-Mistral-7B.py deleted file mode 100644 index d41d7bbde..000000000 --- a/download-TabbyML-Mistral-7B.py +++ /dev/null @@ -1,17 +0,0 @@ -# https://modelscope.cn/models/TabbyML/Mistral-7B/summary - -# 下载 -import torch -from modelscope import snapshot_download, Model -model_dir = snapshot_download("TabbyML/Mistral-7B",cache_dir="../models") - -# 转换 -# python convert.py ../models/TabbyML/Mistral-7B/ - -# 量化 -# ./quantize ../models/TabbyML/Mistral-7B/ggml-model-f16.gguf ../models/ggml-model-f16-zephyr-7b-beta-q8_0.gguf q8_0 -# ./quantize ../models/TabbyML/Mistral-7B/ggml-model-f16.gguf ../models/ggml-model-f16-zephyr-7b-beta-q5_0.gguf q5_0 - -#推理 -# ./main -m ../models/ggml-model-f16-zephyr-7b-beta-q5_0.gguf -n 128 -p "How many helicopters can a human eat in one sitting?" -t 2 -ngl 4 - diff --git a/download-bc2-13b-int4.py b/download-bc2-13b-int4.py deleted file mode 100644 index b5f138106..000000000 --- a/download-bc2-13b-int4.py +++ /dev/null @@ -1,16 +0,0 @@ -import torch -from modelscope import snapshot_download, Model -model_dir = snapshot_download("baichuan-inc/Baichuan2-13B-Chat-4bits", revision='v1.0.3') -model = Model.from_pretrained(model_dir, device_map="balanced", trust_remote_code=True, torch_dtype=torch.float16) -messages = [] -messages.append({"role": "user", "content": "讲解一下“温故而知新”"}) -response = model(messages) -print(response) -messages = response['history'].copy() -messages.append({"role": "user", "content": "背诵一下将进酒"}) -response = model(messages) -print(response) - -#python convert.py /root/.cache/modelscope/hub/baichuan-inc/Baichuan2-13B-Chat-4bits/ -# ./main -m ggml-model-q4_0.gguf -n 128 -p "莫勇开头写一首藏头诗" - diff --git a/download-bc2-13b.py b/download-bc2-13b.py index eeb73b38b..895a1bb9f 100644 --- a/download-bc2-13b.py +++ b/download-bc2-13b.py @@ -12,5 +12,6 @@ response = model(messages) print(response) #python convert.py /root/.cache/modelscope/hub/baichuan-inc/Baichuan2-13B-Chat-4bits/ -# ./main -m ggml-model-q4_0.gguf -n 128 -p "莫勇开头写一首藏头诗" +# ./main -m ../models/ggmls/bc2-13b-chat-q2_k.gguf -n 128 -p "莫勇开头写一首藏头诗" +# ./main -m ../models/ggmls/bc2-13b-chat-q2_k.gguf -n 256 -p "小丽有3个兄弟, 他们各有2个姐妹, 问小丽有几个姐妹" -t 2 -ngl 10 diff --git a/download-bc2-7b.py b/download-bc2-7b.py deleted file mode 100644 index 4ffb00980..000000000 --- a/download-bc2-7b.py +++ /dev/null @@ -1,17 +0,0 @@ -import torch -from modelscope import snapshot_download, Model -# model_dir = snapshot_download("baichuan-inc/Baichuan2-7B-Chat", revision='v1.0.3') -model_dir = snapshot_download("baichuan-inc/Baichuan2-7B-Chat", revision='v1.0.4') -model = Model.from_pretrained(model_dir, device_map="balanced", trust_remote_code=True, torch_dtype=torch.float16) -messages = [] -messages.append({"role": "user", "content": "讲解一下“温故而知新”"}) -response = model(messages) -print(response) -messages = response['history'].copy() -messages.append({"role": "user", "content": "背诵一下将进酒"}) -response = model(messages) -print(response) - -#python convert.py /root/.cache/modelscope/hub/baichuan-inc/Baichuan2-13B-Chat-4bits/ -# ./main -m ggml-model-q4_0.gguf -n 128 -p "莫勇开头写一首藏头诗" - diff --git a/download-chinese-llama2-13b-16k-q3_k.py b/download-chinese-llama2-13b-16k-q3_k.py index 5d212a71f..501930bb1 100644 --- a/download-chinese-llama2-13b-16k-q3_k.py +++ b/download-chinese-llama2-13b-16k-q3_k.py @@ -5,9 +5,8 @@ import torch # 下载 from modelscope.hub.file_download import model_file_download -# model_dir = model_file_download(model_id='shaowenchen/chinese-llama-2-13b-16k-gguf',file_path='chinese-llama-2-13b-16k.Q3_K_S.gguf',cache_dir="../models") -model_dir = model_file_download(model_id='shaowenchen/chinese-llama-2-7b-16k-gguf',file_path='chinese-llama-2-7b-16k.Q2_K.gguf',cache_dir="../models") +model_dir = model_file_download(model_id='shaowenchen/chinese-llama-2-13b-16k-gguf',file_path='chinese-llama-2-13b-16k.Q3_K_S.gguf',cache_dir="../models") # 推理 -# ./main -m ../models/shaowenchen/chinese-llama-2-13b-16k-gguf/chinese-llama-2-13b-16k.Q3_K_S.gguf -n 128 -p "展示上个季度所有销售额超过 10000 美元的订单,写出SQL" -t 2 -ngl 10 -# ./main -m ../models/shaowenchen/chinese-llama-2-7b-16k-gguf/chinese-llama-2-7b-16k.Q2_K.gguf -n 128 -p "展示上个季度所有销售额超过 10000 美元的订单,写出SQL" -t 2 -ngl 10 \ No newline at end of file +# ./main -m ../models/ggmls/chinese-llama-2-13b-16k.Q3_K_S.gguf -n 128 -p "展示上个季度所有销售额超过 10000 美元的订单,写出SQL" -t 2 -ngl 10 +# ./main -m ../models/ggmls/chinese-llama-2-13b-16k.Q3_K_S.gguf -n 256 -p "小丽有3个兄弟, 他们各有2个姐妹, 问小丽有几个姐妹" -t 2 -ngl 10 diff --git a/download-openbuddy-llama2-13b-v11.1.ggmlv3.Q3_K_S.py b/download-openbuddy-llama2-13b-v11.1.ggmlv3.Q3_K_S.py new file mode 100644 index 000000000..e71ea6cf7 --- /dev/null +++ b/download-openbuddy-llama2-13b-v11.1.ggmlv3.Q3_K_S.py @@ -0,0 +1,14 @@ +# https://modelscope.cn/models/Xorbits/OpenBuddy-Llama2-13B-v11.1-GGML/summary + +# 下载 +from modelscope.hub.file_download import model_file_download +model_dir = model_file_download(model_id='Xorbits/OpenBuddy-Llama2-13B-v11.1-GGML',file_path='openbuddy-llama2-13b-v11.1.ggmlv3.Q3_K_S.bin',cache_dir="../models") + + +python convert-llama-ggml-to-gguf.py --input ../models/ggmls/openbuddy-llama2-13b-v11.1.ggmlv3.Q3_K_S.bin --output ../models/ggmls/openbuddy-llama2-13b-v11.1.ggmlv3.Q3_K_S.gguf + +#推理 +# ./main -m ../models/ggmls/openbuddy-llama2-13b-v11.1.ggmlv3.Q3_K_S.gguf -n 128 -p "展示上个季度所有销售额超过 10000 美元的订单,写出SQL" -t 2 -ngl 10 +# ./main -m ../models/ggmls/openbuddy-llama2-13b-v11.1.ggmlv3.Q3_K_S.gguf -n 256 -p "小丽有3个兄弟, 他们各有2个姐妹, 问小丽有几个姐妹" -t 2 -ngl 10 +# ./main -m ../models/ggmls/openbuddy-llama2-13b-v11.1.ggmlv3.Q3_K_S.gguf -ngl 10 -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-baichuan.txt + diff --git a/download-llama2-13b-sql.py b/download-openbuddy-llama2-70b-v10.1-bf16.py similarity index 78% rename from download-llama2-13b-sql.py rename to download-openbuddy-llama2-70b-v10.1-bf16.py index e4176d52e..fd90c9e41 100644 --- a/download-llama2-13b-sql.py +++ b/download-openbuddy-llama2-70b-v10.1-bf16.py @@ -4,11 +4,6 @@ # 这个模型的主要目标是在垂直行业中进行专业数据代理。通过使用 llama-2-13b-chat,业务人员可以直接使用自然语言来查询数据库,而无需掌握复杂的 SQL 查询语法。这不仅可以提升业务人员的工作效率,也可以减少对 IT 人员的依赖。 # 例如,销售人员可以通过输入 "展示上个季度所有销售额超过 10000 美元的订单",llama-2-13b-chat 会将这个查询转换为对应的 SQL 查询,如 "SELECT * FROM orders WHERE sales > 10000 AND quarter = 'Q2';"。 -import torch -from modelscope import snapshot_download, Model -model_dir = snapshot_download("CarbonAgent/llama-2-13b-chat.Q4",cache_dir="../models") - -#./main -m ../models/CarbonAgent/llama-2-13b-chat.Q4/llama-2-13b-chat.Q4_0.gguf -n 128 -p "展示上个季度所有销售额超过 10000 美元的订单,写出对应的SQL语句" -t 2 -ngl 4 # 9月4日,OpenBuddy发布700亿参数跨语言大模型 OpenBuddy-LLaMA2-70B,并以可商用的形态全面开源!现在已经全面上架魔搭ModelScope社区。 @@ -21,9 +16,8 @@ model_dir = snapshot_download("CarbonAgent/llama-2-13b-chat.Q4",cache_dir="../mo python convert.py ../models/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16/ ./quantize ../models/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16/ggml-model-f16.gguf ../models/ggmls/openbuddy-llama2-70b-v10.1-bf16-q3_k_s.gguf q3_k_s -./quantize ../models/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16/ggml-model-f16.gguf ../models/ggmls/openbuddy-llama2-70b-v10.1-bf16-q4_k_S.gguf q4_k_s -./main -m ../models/ggmls/openbuddy-llama2-70b-v10.1-bf16-q2_k.gguf -n 128 -p "展示上个季度所有销售额超过 10000 美元的订单,写出对应的SQL语句" -t 2 -ngl 4 +./main -m ../models/ggmls/openbuddy-llama2-70b-v10.1-bf16-q3_k_s.gguf -n 128 -p "展示上个季度所有销售额超过 10000 美元的订单,写出对应的SQL语句" -t 2 -ngl 4 ./main -t 10 -ngl 40 -gqa 8 -m llama-2-70b-chat.ggmlv3.q4_K_M.bin --color -c 4096 --temp 0.7 --repeat_penalty 1.1 -n -1 -p "[INST] <>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\n<>\nWrite a story about llamas[/INST]" ./main -m llama-2-70b.ggmlv3.q4_0.bin -gqa 8 -t 13 -p "Llamas are" diff --git a/download-openbuddy-mistral-7b-v13.1.py b/download-openbuddy-mistral-7b-v13.1.py index efeb9845d..80f9818a4 100644 --- a/download-openbuddy-mistral-7b-v13.1.py +++ b/download-openbuddy-mistral-7b-v13.1.py @@ -15,11 +15,11 @@ from modelscope import snapshot_download, Model # python convert.py ../models/OpenBuddy/openbuddy-mistral-7b-v13.1/ # 量化 -# ./quantize ../models/OpenBuddy/openbuddy-mistral-7b-v13.1/ggml-model-f16.gguf ../models/ggml-model-f16-openbuddy-mistral-7b-v13.1-q4_0.gguf q4_0 -# ./quantize ../models/OpenBuddy/openbuddy-mistral-7b-v13.1/ggml-model-f16.gguf ../models/ggml-model-f16-openbuddy-mistral-7b-v13.1-q2_k.gguf q2_k +# ./quantize ../models/OpenBuddy/openbuddy-mistral-7b-v13.1/ggml-model-f16.gguf ../models/ggmls/openbuddy-mistral-7b-v13.1-q4_0.gguf q4_0 #推理 -# ./main -m ../models/ggml-model-f16-openbuddy-mistral-7b-v13.1-q4_0.gguf -n 128 -p "展示上个季度所有销售额超过 10000 美元的订单,写出SQL" -t 2 -ngl 10 -# ./main -m ../models/ggml-model-f16-openbuddy-mistral-7b-v13.1-q4_0.gguf -n 256 -p "小丽有3个兄弟, 他们各有2个姐妹, 问小丽有几个姐妹" -t 2 -ngl 10 -# ./main -m ../models/ggml-model-f16-openbuddy-mistral-7b-v13.1-q4_0.gguf -ngl 10 -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-baichuan.txt +# ./main -m ../models/ggmls/openbuddy-mistral-7b-v13.1-q4_0.gguf -n 128 -p "展示上个季度所有销售额超过 10000 美元的订单,写出SQL" -t 2 -ngl 10 +# ./main -m ../models/ggmls/openbuddy-mistral-7b-v13.1-q4_0.gguf -n 256 -p "小丽有3个兄弟, 他们各有2个姐妹, 问小丽有几个姐妹" -t 2 -ngl 10 +# ./main -m ../models/ggmls/openbuddy-mistral-7b-v13.1-q4_0.gguf -ngl 10 -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-baichuan.txt +# ./main -ngl 32 -m mistral-7b-instruct-v0.1.Q4_K_M.gguf --color -c 4096 --temp 0.7 --repeat_penalty 1.1 -n -1 -p "[INST]{prompt} [/INST]" diff --git a/download-zephyr-7b-beta.py b/download-openbuddy-zephyr-7b-v14.1.py similarity index 53% rename from download-zephyr-7b-beta.py rename to download-openbuddy-zephyr-7b-v14.1.py index 09ed8ddee..bb0d3e6e4 100644 --- a/download-zephyr-7b-beta.py +++ b/download-openbuddy-zephyr-7b-v14.1.py @@ -1,4 +1,4 @@ -# https://modelscope.cn/models/modelscope/zephyr-7b-beta/summary +# https://modelscope.cn/models/OpenBuddy/openbuddy-zephyr-7b-v14.1/summary # 7b达到gpt3.5 超越 llama2-70b # Zephyr-7B-α是一系列 Zephyr 经过训练的语言模型中的第一个模型,是 Mistral-7B-v0.1 的微调版本,在使用直接偏好优化的混合公开合成数据集上进行训练。 @@ -18,18 +18,19 @@ # 下载 import torch -from modelscope import snapshot_download, Model -model_dir = snapshot_download("modelscope/zephyr-7b-beta",cache_dir="../models") +from modelscope import AutoTokenizer, snapshot_download +from modelscope import AutoModelForCausalLM + +model_dir = snapshot_download('OpenBuddy/openbuddy-zephyr-7b-v14.1', revision = 'v1.0.0',cache_dir="../models") + # 转换 -python convert.py ../models/modelscope/zephyr-7b-beta/ +# python convert.py ../models/OpenBuddy/openbuddy-zephyr-7b-v14.1/ # 量化 -# ./quantize ../models/modelscope/zephyr-7b-beta/ggml-model-f16.gguf ../models/ggml-model-f16-zephyr-7b-beta-q8_0.gguf q8_0 -./quantize ../models/modelscope/zephyr-7b-beta/ggml-model-f16.gguf ../models/ggml-model-f16-zephyr-7b-beta-q5_0.gguf q5_0 +# ./quantize ../models/OpenBuddy/openbuddy-zephyr-7b-v14.1/ggml-model-f16.gguf ../models/ggmls/openbuddy-zephyr-7b-v14.1-q5_k_s.gguf q5_k_s #推理 -./main -m ../models/ggml-model-f16-zephyr-7b-beta-q5_0.gguf -n 256 -p "为什么我爸妈结婚的时候没邀请我参加婚礼" -t 2 -ngl 4 -./main -m ../models/ggml-model-f16-zephyr-7b-beta-q5_0.gguf -n 256 -p "能给我讲讲GPT-4吗?" -t 2 -ngl 4 -./main -m ../models/ggml-model-f16-zephyr-7b-beta-q5_0.gguf -ngl 10 -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-baichuan.txt - +# ./main -m ../models/ggmls/openbuddy-zephyr-7b-v14.1-q5_k_s.gguf -n 128 -p "展示上个季度所有销售额超过 10000 美元的订单,写出SQL" -t 2 -ngl 10 +# ./main -m ../models/ggmls/openbuddy-zephyr-7b-v14.1-q5_k_s.gguf -n 256 -p "小丽有3个兄弟, 他们各有2个姐妹, 问小丽有几个姐妹" -t 2 -ngl 10 +# ./main -m ../models/ggmls/openbuddy-zephyr-7b-v14.1-q5_k_s.gguf -ngl 10 -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-baichuan.txt \ No newline at end of file diff --git a/download-qwen-Qwen-1_8B-Chat.py b/download-qwen-Qwen-1_8B-Chat.py new file mode 100644 index 000000000..5f6325f62 --- /dev/null +++ b/download-qwen-Qwen-1_8B-Chat.py @@ -0,0 +1,26 @@ + +import torch +from modelscope import AutoTokenizer, snapshot_download +from modelscope import AutoModelForCausalLM + +model_dir = snapshot_download('qwen/Qwen-1_8B-Chat',cache_dir="../models") +model_dir = snapshot_download('Qwen/Qwen-1_8B-Chat-Int4',cache_dir="../models") + +from modelscope import AutoTokenizer, AutoModelForCausalLM, snapshot_download +tokenizer = AutoTokenizer.from_pretrained("../models/qwen/Qwen-1_8B-Chat", trust_remote_code=True) +model = AutoModelForCausalLM.from_pretrained( + "../models/qwen/Qwen-1_8B-Chat", + device_map="cpu", + trust_remote_code=True +).eval() +response, history = model.chat(tokenizer, "你好", history=None) +print(response) +response, history = model.chat(tokenizer, "给我讲一个年轻人奋斗创业最终取得成功的故事。", history=history) +print(response) +response, history = model.chat(tokenizer, "给这个故事起一个标题", history=history) +print(response) +response, _ = model.chat(tokenizer, "你好呀", history=None, system="请用二次元可爱语气和我说话") +print(response) +response, _ = model.chat(tokenizer, "My colleague works diligently", history=None, system="You will write beautiful compliments according to needs") +print(response) + diff --git a/examples/server/chat.sh b/examples/server/chat.sh index 014360121..22fd7d5e4 100755 --- a/examples/server/chat.sh +++ b/examples/server/chat.sh @@ -60,13 +60,7 @@ chat_completion() { printf "%s" "${CONTENT}" ANSWER+="${CONTENT}" fi - done < <(curl \ - --silent \ - --no-buffer \ - --request POST \ - --url "${API_URL}/completion" \ - --header "Content-Type: application/json" \ - --data-raw "${DATA}") + done << (curl --silent --no-buffer --request POST --url "${API_URL}/completion" --header "Content-Type: application/json" --data-raw "${DATA}") printf "\n" diff --git a/examples/server/re.sh b/examples/server/re.sh new file mode 100644 index 000000000..e3c4e8c8e --- /dev/null +++ b/examples/server/re.sh @@ -0,0 +1,5 @@ + wc -l << EOF +欢迎 +来到 +新世界 +EOF diff --git a/nYou are a helpful assistant. 你是一个乐于助人的助手。n b/nYou are a helpful assistant. 你是一个乐于助人的助手。n deleted file mode 100644 index e69de29bb..000000000 diff --git a/nn请列举5条文明乘车的建议 b/nn请列举5条文明乘车的建议 deleted file mode 100644 index 263ea6890..000000000 --- a/nn请列举5条文明乘车的建议 +++ /dev/null @@ -1,3 +0,0 @@ -500 Internal Server Error -[json.exception.parse_error.101] parse error at line 1, column 11: syntax error while parsing value - unexpected end of input; expected '[', '{', or a literal500 Internal Server Error -[json.exception.parse_error.101] parse error at line 1, column 19: syntax error while parsing value - invalid string: missing closing quote; last read: '"[INST]' \ No newline at end of file diff --git a/prompts/chat-with-bob.txt b/prompts/chat-with-bob.txt index eb211f706..ad494d831 100644 --- a/prompts/chat-with-bob.txt +++ b/prompts/chat-with-bob.txt @@ -1,5 +1,4 @@ Transcript of a dialog, where the User interacts with an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision. -对话的脚本,用户在其中与名为 Bob 的助手进行交互。鲍勃乐于助人、善良、诚实、善于写作,并且总是能立即准确地回答用户的请求。 User: Hello, Bob. Bob: Hello. How may I help you today? diff --git a/prompts/test.txt b/prompts/test.txt new file mode 100644 index 000000000..d5a8789fe --- /dev/null +++ b/prompts/test.txt @@ -0,0 +1,5 @@ +User: 小丽有3个兄弟, 他们各有2个姐妹, 问小丽有几个姐妹 +Bob: +User: 展示上个季度所有销售额超过 10000 美元的订单,写出SQL +Bob: +User: 写一首藏头诗五言绝句,每句诗的开头字母分别是"莫""勇"二字: \ No newline at end of file