模型整理

2023-12-02 07:56:55 +08:00 · 2023-12-02 07:56:55 +08:00 · 89fd914926
commit 89fd914926
parent 486833214f
18 changed files with 105 additions and 89 deletions
--- a/Makefile-finetune.mak
+++ b/Makefile-finetune.mak
@ -153,3 +153,33 @@ client-test:
 		--url http://localhost:8080/completion \
 		--header "Content-Type: application/json" \
 		--data "${CURL_DATA}"
 bench13b:
 	./main -m ../models/ggmls/openbuddy-llama2-13b-v11.1.ggmlv3.Q3_K_S.gguf 	-n 256 -p '${PROMPT}' -t 2 -ngl 10
 	sleep 50
 	./main -m ../models/ggmls/chinese-llama-2-13b-16k.Q3_K_S.gguf				-n 256 -p '${PROMPT}' -t 2 -ngl 10
 	sleep 50
 	./main -m ../models/ggmls/bc2-13b-chat-q2_k.gguf 							-n 256 -p '${PROMPT}' -t 2 -ngl 10
 	sleep 50
 	./main -m ../models/ggmls/openbuddy-mistral-7b-v13.1-q4_0.gguf 				-n 256 -p '${PROMPT}' -t 2 -ngl 10
 	sleep 50
 	./main -m ../models/ggmls/openbuddy-zephyr-7b-v14.1-q5_k_s.gguf 			-n 256 -p '${PROMPT}' -t 2 -ngl 10
 	sleep 50
 bench-lj:
 	make bench13b PROMPT="小丽有3个兄弟, 他们各有2个姐妹, 问小丽有几个姐妹"
 bench-sql:
 	make bench13b PROMPT="展示上个季度所有销售额超过 10000 美元的订单,写出SQL"
 bench-gpt:
 	make bench13b PROMPT='写一首藏头诗五言绝句，每句诗的开头字母分别是"莫""勇"二字：'
 bench-all: bench-lj bench-sql bench-gpt
 	./main -m ../models/ggmls/openbuddy-mistral-7b-v13.1-q4_0.gguf 	-n 256  -r "User:" -f prompts/test.txt -t 2 -ngl 10
--- a/convert-gptneox-hf-to-gguf.py
+++ b/convert-gptneox-hf-to-gguf.py
@ -77,6 +77,7 @@ print("gguf: loading model "+dir_model.name)
 with open(dir_model / "config.json", "r", encoding="utf-8") as f:
    hparams = json.load(f)
 if hparams["architectures"][0] != "GPTNeoXForCausalLM":
    print("Model architecture not supported: " + hparams["architectures"][0])
--- a/download-TabbyML-Mistral-7B.py
+++ b/download-TabbyML-Mistral-7B.py
@ -1,17 +0,0 @@
 # https://modelscope.cn/models/TabbyML/Mistral-7B/summary
 # 下载
 import torch
 from modelscope import snapshot_download, Model
 model_dir = snapshot_download("TabbyML/Mistral-7B",cache_dir="../models")
 # 转换
 # python convert.py  ../models/TabbyML/Mistral-7B/
 # 量化
 # ./quantize ../models/TabbyML/Mistral-7B/ggml-model-f16.gguf ../models/ggml-model-f16-zephyr-7b-beta-q8_0.gguf q8_0
 # ./quantize ../models/TabbyML/Mistral-7B/ggml-model-f16.gguf ../models/ggml-model-f16-zephyr-7b-beta-q5_0.gguf q5_0
 #推理
 # ./main -m ../models/ggml-model-f16-zephyr-7b-beta-q5_0.gguf -n 128 -p "How many helicopters can a human eat in one sitting?" -t 2 -ngl 4
--- a/download-bc2-13b-int4.py
+++ b/download-bc2-13b-int4.py
@ -1,16 +0,0 @@
 import torch
 from modelscope import snapshot_download, Model
 model_dir = snapshot_download("baichuan-inc/Baichuan2-13B-Chat-4bits", revision='v1.0.3')
 model = Model.from_pretrained(model_dir, device_map="balanced", trust_remote_code=True, torch_dtype=torch.float16)
 messages = []
 messages.append({"role": "user", "content": "讲解一下“温故而知新”"})
 response = model(messages)
 print(response)
 messages = response['history'].copy()
 messages.append({"role": "user", "content": "背诵一下将进酒"})
 response = model(messages)
 print(response)
 #python convert.py /root/.cache/modelscope/hub/baichuan-inc/Baichuan2-13B-Chat-4bits/
 # ./main -m ggml-model-q4_0.gguf -n 128 -p "莫勇开头写一首藏头诗"
--- a/download-bc2-13b.py
+++ b/download-bc2-13b.py
@ -12,5 +12,6 @@ response = model(messages)
 print(response)
 #python convert.py /root/.cache/modelscope/hub/baichuan-inc/Baichuan2-13B-Chat-4bits/
-# ./main -m ggml-model-q4_0.gguf -n 128 -p "莫勇开头写一首藏头诗"
+# ./main -m ../models/ggmls/bc2-13b-chat-q2_k.gguf -n 128 -p "莫勇开头写一首藏头诗"
 # ./main -m ../models/ggmls/bc2-13b-chat-q2_k.gguf -n 256 -p "小丽有3个兄弟, 他们各有2个姐妹, 问小丽有几个姐妹" -t 2 -ngl 10
--- a/download-bc2-7b.py
+++ b/download-bc2-7b.py
@ -1,17 +0,0 @@
 import torch
 from modelscope import snapshot_download, Model
 # model_dir = snapshot_download("baichuan-inc/Baichuan2-7B-Chat", revision='v1.0.3')
 model_dir = snapshot_download("baichuan-inc/Baichuan2-7B-Chat", revision='v1.0.4')
 model = Model.from_pretrained(model_dir, device_map="balanced", trust_remote_code=True, torch_dtype=torch.float16)
 messages = []
 messages.append({"role": "user", "content": "讲解一下“温故而知新”"})
 response = model(messages)
 print(response)
 messages = response['history'].copy()
 messages.append({"role": "user", "content": "背诵一下将进酒"})
 response = model(messages)
 print(response)
 #python convert.py /root/.cache/modelscope/hub/baichuan-inc/Baichuan2-13B-Chat-4bits/
 # ./main -m ggml-model-q4_0.gguf -n 128 -p "莫勇开头写一首藏头诗"
--- a/download-chinese-llama2-13b-16k-q3_k.py
+++ b/download-chinese-llama2-13b-16k-q3_k.py
@ -5,9 +5,8 @@ import torch
 # 下载
 from modelscope.hub.file_download import model_file_download
-# model_dir = model_file_download(model_id='shaowenchen/chinese-llama-2-13b-16k-gguf',file_path='chinese-llama-2-13b-16k.Q3_K_S.gguf',cache_dir="../models")
+model_dir = model_file_download(model_id='shaowenchen/chinese-llama-2-13b-16k-gguf',file_path='chinese-llama-2-13b-16k.Q3_K_S.gguf',cache_dir="../models")
 model_dir = model_file_download(model_id='shaowenchen/chinese-llama-2-7b-16k-gguf',file_path='chinese-llama-2-7b-16k.Q2_K.gguf',cache_dir="../models")
 # 推理
-# ./main -m ../models/shaowenchen/chinese-llama-2-13b-16k-gguf/chinese-llama-2-13b-16k.Q3_K_S.gguf -n 128 -p "展示上个季度所有销售额超过 10000 美元的订单,写出SQL" -t 2 -ngl 10
+# ./main -m ../models/ggmls/chinese-llama-2-13b-16k.Q3_K_S.gguf -n 128 -p "展示上个季度所有销售额超过 10000 美元的订单,写出SQL" -t 2 -ngl 10
-# ./main -m ../models/shaowenchen/chinese-llama-2-7b-16k-gguf/chinese-llama-2-7b-16k.Q2_K.gguf -n 128 -p "展示上个季度所有销售额超过 10000 美元的订单,写出SQL" -t 2 -ngl 10
+# ./main -m ../models/ggmls/chinese-llama-2-13b-16k.Q3_K_S.gguf -n 256 -p "小丽有3个兄弟, 他们各有2个姐妹, 问小丽有几个姐妹" -t 2 -ngl 10
--- a/download-openbuddy-llama2-13b-v11.1.ggmlv3.Q3_K_S.py
+++ b/download-openbuddy-llama2-13b-v11.1.ggmlv3.Q3_K_S.py
@ -0,0 +1,14 @@
 # https://modelscope.cn/models/Xorbits/OpenBuddy-Llama2-13B-v11.1-GGML/summary
 # 下载
 from modelscope.hub.file_download import model_file_download
 model_dir = model_file_download(model_id='Xorbits/OpenBuddy-Llama2-13B-v11.1-GGML',file_path='openbuddy-llama2-13b-v11.1.ggmlv3.Q3_K_S.bin',cache_dir="../models")
 python convert-llama-ggml-to-gguf.py --input ../models/ggmls/openbuddy-llama2-13b-v11.1.ggmlv3.Q3_K_S.bin --output ../models/ggmls/openbuddy-llama2-13b-v11.1.ggmlv3.Q3_K_S.gguf
 #推理
 # ./main -m ../models/ggmls/openbuddy-llama2-13b-v11.1.ggmlv3.Q3_K_S.gguf -n 128 -p "展示上个季度所有销售额超过 10000 美元的订单,写出SQL" -t 2 -ngl 10
 # ./main -m ../models/ggmls/openbuddy-llama2-13b-v11.1.ggmlv3.Q3_K_S.gguf -n 256 -p "小丽有3个兄弟, 他们各有2个姐妹, 问小丽有几个姐妹" -t 2 -ngl 10
 # ./main -m ../models/ggmls/openbuddy-llama2-13b-v11.1.ggmlv3.Q3_K_S.gguf  -ngl 10  -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-baichuan.txt
--- a/download-openbuddy-llama2-70b-v10.1-bf16.py
+++ b/download-openbuddy-llama2-70b-v10.1-bf16.py
@ -4,11 +4,6 @@
 # 这个模型的主要目标是在垂直行业中进行专业数据代理。通过使用 llama-2-13b-chat，业务人员可以直接使用自然语言来查询数据库，而无需掌握复杂的 SQL 查询语法。这不仅可以提升业务人员的工作效率，也可以减少对 IT 人员的依赖。
 # 例如，销售人员可以通过输入 "展示上个季度所有销售额超过 10000 美元的订单"，llama-2-13b-chat 会将这个查询转换为对应的 SQL 查询，如 "SELECT * FROM orders WHERE sales > 10000 AND quarter = 'Q2';"。
 import torch
 from modelscope import snapshot_download, Model
 model_dir = snapshot_download("CarbonAgent/llama-2-13b-chat.Q4",cache_dir="../models")
 #./main -m ../models/CarbonAgent/llama-2-13b-chat.Q4/llama-2-13b-chat.Q4_0.gguf -n 128 -p "展示上个季度所有销售额超过 10000 美元的订单,写出对应的SQL语句" -t 2 -ngl 4
 # 9月4日，OpenBuddy发布700亿参数跨语言大模型 OpenBuddy-LLaMA2-70B，并以可商用的形态全面开源！现在已经全面上架魔搭ModelScope社区。
@ -21,9 +16,8 @@ model_dir = snapshot_download("CarbonAgent/llama-2-13b-chat.Q4",cache_dir="../mo
 python convert.py  ../models/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16/
 ./quantize ../models/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16/ggml-model-f16.gguf ../models/ggmls/openbuddy-llama2-70b-v10.1-bf16-q3_k_s.gguf q3_k_s
 ./quantize ../models/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16/ggml-model-f16.gguf ../models/ggmls/openbuddy-llama2-70b-v10.1-bf16-q4_k_S.gguf q4_k_s
-./main -m ../models/ggmls/openbuddy-llama2-70b-v10.1-bf16-q2_k.gguf -n 128 -p "展示上个季度所有销售额超过 10000 美元的订单,写出对应的SQL语句" -t 2 -ngl 4
+./main -m ../models/ggmls/openbuddy-llama2-70b-v10.1-bf16-q3_k_s.gguf -n 128 -p "展示上个季度所有销售额超过 10000 美元的订单,写出对应的SQL语句" -t 2 -ngl 4
 ./main -t 10 -ngl 40 -gqa 8 -m llama-2-70b-chat.ggmlv3.q4_K_M.bin --color -c 4096 --temp 0.7 --repeat_penalty 1.1 -n -1 -p "[INST] <<SYS>>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\n<</SYS>>\nWrite a story about llamas[/INST]"
 ./main -m llama-2-70b.ggmlv3.q4_0.bin -gqa 8 -t 13 -p "Llamas are"
--- a/download-openbuddy-mistral-7b-v13.1.py
+++ b/download-openbuddy-mistral-7b-v13.1.py
@ -15,11 +15,11 @@ from modelscope import snapshot_download, Model
 # python convert.py  ../models/OpenBuddy/openbuddy-mistral-7b-v13.1/
 # 量化
-# ./quantize ../models/OpenBuddy/openbuddy-mistral-7b-v13.1/ggml-model-f16.gguf ../models/ggml-model-f16-openbuddy-mistral-7b-v13.1-q4_0.gguf q4_0
+# ./quantize ../models/OpenBuddy/openbuddy-mistral-7b-v13.1/ggml-model-f16.gguf ../models/ggmls/openbuddy-mistral-7b-v13.1-q4_0.gguf q4_0
 # ./quantize ../models/OpenBuddy/openbuddy-mistral-7b-v13.1/ggml-model-f16.gguf ../models/ggml-model-f16-openbuddy-mistral-7b-v13.1-q2_k.gguf q2_k
 #推理
-# ./main -m ../models/ggml-model-f16-openbuddy-mistral-7b-v13.1-q4_0.gguf -n 128 -p "展示上个季度所有销售额超过 10000 美元的订单,写出SQL" -t 2 -ngl 10
+# ./main -m ../models/ggmls/openbuddy-mistral-7b-v13.1-q4_0.gguf -n 128 -p "展示上个季度所有销售额超过 10000 美元的订单,写出SQL" -t 2 -ngl 10
-# ./main -m ../models/ggml-model-f16-openbuddy-mistral-7b-v13.1-q4_0.gguf -n 256 -p "小丽有3个兄弟, 他们各有2个姐妹, 问小丽有几个姐妹" -t 2 -ngl 10
+# ./main -m ../models/ggmls/openbuddy-mistral-7b-v13.1-q4_0.gguf -n 256 -p "小丽有3个兄弟, 他们各有2个姐妹, 问小丽有几个姐妹" -t 2 -ngl 10
-# ./main -m ../models/ggml-model-f16-openbuddy-mistral-7b-v13.1-q4_0.gguf  -ngl 10  -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-baichuan.txt
+# ./main -m ../models/ggmls/openbuddy-mistral-7b-v13.1-q4_0.gguf  -ngl 10  -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-baichuan.txt
 # ./main -ngl 32 -m mistral-7b-instruct-v0.1.Q4_K_M.gguf --color -c 4096 --temp 0.7 --repeat_penalty 1.1 -n -1 -p "<s>[INST]{prompt} [/INST]"
--- a/download-openbuddy-zephyr-7b-v14.1.py
+++ b/download-openbuddy-zephyr-7b-v14.1.py
@ -1,4 +1,4 @@
-# https://modelscope.cn/models/modelscope/zephyr-7b-beta/summary
+# https://modelscope.cn/models/OpenBuddy/openbuddy-zephyr-7b-v14.1/summary
 # 7b达到gpt3.5  超越 llama2-70b
 # Zephyr-7B-α是一系列 Zephyr 经过训练的语言模型中的第一个模型，是 Mistral-7B-v0.1 的微调版本，在使用直接偏好优化的混合公开合成数据集上进行训练。
@ -18,18 +18,19 @@
 # 下载
 import torch
-from modelscope import snapshot_download, Model
+from modelscope import AutoTokenizer, snapshot_download
-model_dir = snapshot_download("modelscope/zephyr-7b-beta",cache_dir="../models")
+from modelscope import AutoModelForCausalLM
 model_dir = snapshot_download('OpenBuddy/openbuddy-zephyr-7b-v14.1', revision = 'v1.0.0',cache_dir="../models")
 # 转换
-python convert.py  ../models/modelscope/zephyr-7b-beta/
+# python convert.py  ../models/OpenBuddy/openbuddy-zephyr-7b-v14.1/
 # 量化
-# ./quantize ../models/modelscope/zephyr-7b-beta/ggml-model-f16.gguf ../models/ggml-model-f16-zephyr-7b-beta-q8_0.gguf q8_0
+# ./quantize ../models/OpenBuddy/openbuddy-zephyr-7b-v14.1/ggml-model-f16.gguf ../models/ggmls/openbuddy-zephyr-7b-v14.1-q5_k_s.gguf q5_k_s
 ./quantize ../models/modelscope/zephyr-7b-beta/ggml-model-f16.gguf ../models/ggml-model-f16-zephyr-7b-beta-q5_0.gguf q5_0
 #推理
-./main -m ../models/ggml-model-f16-zephyr-7b-beta-q5_0.gguf -n  256 -p "为什么我爸妈结婚的时候没邀请我参加婚礼" -t 2 -ngl 4
+# ./main -m ../models/ggmls/openbuddy-zephyr-7b-v14.1-q5_k_s.gguf -n 128 -p "展示上个季度所有销售额超过 10000 美元的订单,写出SQL" -t 2 -ngl 10
-./main -m ../models/ggml-model-f16-zephyr-7b-beta-q5_0.gguf -n  256 -p "能给我讲讲GPT-4吗?" -t 2 -ngl 4
+# ./main -m ../models/ggmls/openbuddy-zephyr-7b-v14.1-q5_k_s.gguf -n 256 -p "小丽有3个兄弟, 他们各有2个姐妹, 问小丽有几个姐妹" -t 2 -ngl 10
-./main -m ../models/ggml-model-f16-zephyr-7b-beta-q5_0.gguf   -ngl 10  -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-baichuan.txt
+# ./main -m ../models/ggmls/openbuddy-zephyr-7b-v14.1-q5_k_s.gguf  -ngl 10  -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-baichuan.txt
--- a/download-qwen-Qwen-1_8B-Chat.py
+++ b/download-qwen-Qwen-1_8B-Chat.py
@ -0,0 +1,26 @@
 import torch
 from modelscope import AutoTokenizer, snapshot_download
 from modelscope import AutoModelForCausalLM
 model_dir = snapshot_download('qwen/Qwen-1_8B-Chat',cache_dir="../models")
 model_dir = snapshot_download('Qwen/Qwen-1_8B-Chat-Int4',cache_dir="../models")
 from modelscope import AutoTokenizer, AutoModelForCausalLM, snapshot_download
 tokenizer = AutoTokenizer.from_pretrained("../models/qwen/Qwen-1_8B-Chat", trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
    "../models/qwen/Qwen-1_8B-Chat",
    device_map="cpu",
    trust_remote_code=True
 ).eval()
 response, history = model.chat(tokenizer, "你好", history=None)
 print(response)
 response, history = model.chat(tokenizer, "给我讲一个年轻人奋斗创业最终取得成功的故事。", history=history)
 print(response)
 response, history = model.chat(tokenizer, "给这个故事起一个标题", history=history)
 print(response)
 response, _ = model.chat(tokenizer, "你好呀", history=None, system="请用二次元可爱语气和我说话")
 print(response)
 response, _ = model.chat(tokenizer, "My colleague works diligently", history=None, system="You will write beautiful compliments according to needs")
 print(response)
--- a/examples/server/chat.sh
+++ b/examples/server/chat.sh
@ -60,13 +60,7 @@ chat_completion() {
            printf "%s" "${CONTENT}"
            ANSWER+="${CONTENT}"
        fi
-    done < <(curl \
+    done << (curl  --silent   --no-buffer  --request POST   --url "${API_URL}/completion"    --header "Content-Type: application/json"  --data-raw "${DATA}")
        --silent \
        --no-buffer \
        --request POST \
        --url "${API_URL}/completion" \
        --header "Content-Type: application/json" \
        --data-raw "${DATA}")
    printf "\n"
--- a/examples/server/re.sh
+++ b/examples/server/re.sh
@ -0,0 +1,5 @@
 wc -l << EOF 
 欢迎
 来到
 新世界
 EOF
--- a/assistant.
+++ b/assistant.
--- a/3
+++ b/3
@ -1,3 +0,0 @@
 500 Internal Server Error
 [json.exception.parse_error.101] parse error at line 1, column 11: syntax error while parsing value - unexpected end of input; expected '[', '{', or a literal500 Internal Server Error
 [json.exception.parse_error.101] parse error at line 1, column 19: syntax error while parsing value - invalid string: missing closing quote; last read: '"[INST]'
--- a/prompts/chat-with-bob.txt
+++ b/prompts/chat-with-bob.txt
@ -1,5 +1,4 @@
 Transcript of a dialog, where the User interacts with an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision.
 对话的脚本，用户在其中与名为 Bob 的助手进行交互。鲍勃乐于助人、善良、诚实、善于写作，并且总是能立即准确地回答用户的请求。
 User: Hello, Bob.
 Bob: Hello. How may I help you today?
--- a/prompts/test.txt
+++ b/prompts/test.txt
@ -0,0 +1,5 @@
 User: 小丽有3个兄弟, 他们各有2个姐妹, 问小丽有几个姐妹
 Bob:
 User: 展示上个季度所有销售额超过 10000 美元的订单,写出SQL
 Bob:
 User: 写一首藏头诗五言绝句，每句诗的开头字母分别是"莫""勇"二字：