word2vec

2023-12-14 20:05:44 +08:00 · 2023-12-14 20:05:44 +08:00 · 03f97c6efa
commit 03f97c6efa
parent 89fd914926
10 changed files with 6293 additions and 7 deletions
--- a/download-bc2-13b.py
+++ b/download-bc2-13b.py
@ -1,6 +1,7 @@
 import torch
 from modelscope import snapshot_download, Model
-model_dir = snapshot_download("baichuan-inc/Baichuan2-13B-Chat", revision='v1.0.3')
+model_dir = snapshot_download("baichuan-inc/Baichuan2-7B-Chat", cache_dir="../models")
+# model_dir = snapshot_download("baichuan-inc/Baichuan2-13B-Chat", revision='v1.0.3')
 model = Model.from_pretrained(model_dir, device_map="balanced", trust_remote_code=True, torch_dtype=torch.float16)
 messages = []
 messages.append({"role": "user", "content": "讲解一下“温故而知新”"})
--- a/download-mixtral-7b-8expert.py
+++ b/download-mixtral-7b-8expert.py
@ -0,0 +1,14 @@
+# # transformers>=4.36 (build from source)
+# import torch
+# from modelscope import AutoModelForCausalLM, AutoTokenizer, snapshot_download
+
+# model = AutoModelForCausalLM.from_pretrained('AI-ModelScope/mixtral-7b-8expert', low_cpu_mem_usage=True, 
+#                                              device_map="auto", trust_remote_code=True)
+# tok = AutoTokenizer.from_pretrained('AI-ModelScope/mixtral-7b-8expert')
+# x = tok.encode("The mistral wind in is a phenomenon ", return_tensors="pt").cuda()
+# x = model.generate(x, max_new_tokens=128).cpu()
+# print(tok.batch_decode(x))
+
+#模型下载
+from modelscope import snapshot_download
+model_dir = snapshot_download('AI-ModelScope/mixtral-7b-8expert',cache_dir="../models")
--- a/download-openbuddy-llama2-70b-v10.1-bf16.py
+++ b/download-openbuddy-llama2-70b-v10.1-bf16.py
@ -9,15 +9,19 @@
 # 9月4日，OpenBuddy发布700亿参数跨语言大模型 OpenBuddy-LLaMA2-70B，并以可商用的形态全面开源！现在已经全面上架魔搭ModelScope社区。
 # 70B模型在能力表现上，相较于早前发布的较小规模模型，在文本生成、复杂逻辑推理以及自然语言处理等任务有了非常显著的提升。据其内测用户及多项能力测试指标反馈，目前70B模型在语言能力和逻辑推理能力可对标为GPT3.5的开源平替！OpenBuddy社区希望用开源激发中国大模型行业的潜能。
 # GitHub链接：https://github.com/OpenBuddy/OpenBuddy
+
 # from modelscope.hub.snapshot_download import snapshot_download
 # model_dir = snapshot_download('OpenBuddy/openbuddy-llama2-70b-v10.1-bf16', 'v1.0.0',cache_dir="../models")
+# model_dir = snapshot_download('Xorbits/Llama-2-70B-Chat-GGML', 'v1.0.0',cache_dir="../models")
+from modelscope.hub.file_download import model_file_download
+model_dir = model_file_download(model_id='Xorbits/Llama-2-70B-Chat-GGML',file_path='llama-2-70b-chat.ggmlv3.q3_K_S.bin',cache_dir="../models")


-python convert.py  ../models/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16/
+# python convert.py  ../models/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16/

-./quantize ../models/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16/ggml-model-f16.gguf ../models/ggmls/openbuddy-llama2-70b-v10.1-bf16-q3_k_s.gguf q3_k_s
+# ./quantize ../models/OpenBuddy/openbuddy-llama2-70b-v10.1-bf16/ggml-model-f16.gguf ../models/ggmls/openbuddy-llama2-70b-v10.1-bf16-q3_k_s.gguf q3_k_s

-./main -m ../models/ggmls/openbuddy-llama2-70b-v10.1-bf16-q3_k_s.gguf -n 128 -p "展示上个季度所有销售额超过 10000 美元的订单,写出对应的SQL语句" -t 2 -ngl 4
-./main -t 10 -ngl 40 -gqa 8 -m llama-2-70b-chat.ggmlv3.q4_K_M.bin --color -c 4096 --temp 0.7 --repeat_penalty 1.1 -n -1 -p "[INST] <<SYS>>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\n<</SYS>>\nWrite a story about llamas[/INST]"
+# ./main -m ../models/ggmls/openbuddy-llama2-70b-v10.1-bf16-q3_k_s.gguf -n 128 -p "展示上个季度所有销售额超过 10000 美元的订单,写出对应的SQL语句" -t 2 -ngl 4
+# ./main -t 10 -ngl 40 -gqa 8 -m llama-2-70b-chat.ggmlv3.q4_K_M.bin --color -c 4096 --temp 0.7 --repeat_penalty 1.1 -n -1 -p "[INST] <<SYS>>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\n<</SYS>>\nWrite a story about llamas[/INST]"

-./main -m llama-2-70b.ggmlv3.q4_0.bin -gqa 8 -t 13 -p "Llamas are"
+# ./main -m llama-2-70b.ggmlv3.q4_0.bin -gqa 8 -t 13 -p "Llamas are"
--- a/download-openbuddy-mistral-7b-v13.1.py
+++ b/download-openbuddy-mistral-7b-v13.1.py
@ -9,7 +9,7 @@
 # 下载
 import torch
 from modelscope import snapshot_download, Model
-# model_dir = snapshot_download("OpenBuddy/openbuddy-mistral-7b-v13.1", revision = 'v1.0.0',cache_dir="../models")
+model_dir = snapshot_download("OpenBuddy/openbuddy-mistral-7b-v13.1", revision = 'v1.0.0',cache_dir="../models")

 # 转换
 # python convert.py  ../models/OpenBuddy/openbuddy-mistral-7b-v13.1/
--- a/examples/embedding/README.md
+++ b/examples/embedding/README.md
@ -1,15 +1,38 @@
 # llama.cpp/example/embedding

+        Embedding层(嵌入层)的一个作用——降维，降维的原理就是矩阵乘法。
+        ![Alt text](image-1.png)
+        one-hot这是它最明显的缺点：过于稀疏时，过度占用资源。
+        ![Alt text](image.png)
+        把一个A中的12个元素的矩阵变成C中6个元素的矩阵，直观上，大小是不是缩小了一半？
+
+        Embedding的又一个作用对低维的数据进行升维，把一些其他特征给放大，或者把笼统的特征给分开了。
+        由此可见，距离的远近会影响我们的观察效果。
+        同理也是一样的，低维的数据可能包含的特征是非常笼统的，我们需要不停地拉近拉远来改变我们的感受，让我们对这幅图有不同的观察点，找出我们要的"茬"。
+
+        语义理解中Embedding意义
+        通过将两个无法比较的文字映射成向量，接下来就能实现对他们的计算。
+        ![Alt text](image-2.png)
+        queen（皇后）= king（国王）- man（男人）+ woman（女人）
+        这样计算机能明白，“皇后啊，就是女性的国王呗！”
+        walked（过去式）= walking（进行时）- swimming（进行时）+ swam（过去式）
+        同理计算机也能明白，“walked，就是walking的过去式啦！”另外，向量间的距离也可能会建立联系，比方说“北京”是“中国”的首都，“巴黎”是“法国”的首都，那么向量：|中国|-|北京|=|法国|-|巴黎|
+
+
 This example demonstrates generate high-dimensional embedding vector of a given text with llama.cpp.
+此示例演示如何使用 llama.cpp 生成给定文本的高维嵌入向量。

 ## Quick Start

 To get started right away, run the following command, making sure to use the correct path for the model you have:
+若要立即开始，请运行以下命令，确保对已有的模型使用正确的路径：

 ### Unix-based systems (Linux, macOS, etc.):

 ```bash
 ./embedding -m ./path/to/model --log-disable -p "Hello World!" 2>/dev/null
+./embedding -m ../models/ggmls/openbuddy-mistral-7b-v13.1-q4_0.gguf --log-disable -p "Hello World!" 2>/dev/null
+./embedding -m ../models/ggmls/openbuddy-mistral-7b-v13.1-q4_0.gguf --log-disable -p "清晨早雨浥轻尘" 2>/dev/null
 ```

 ### Windows:
--- a/examples/embedding/image-1.png
+++ b/examples/embedding/image-1.png
--- a/examples/embedding/image-2.png
+++ b/examples/embedding/image-2.png
--- a/examples/embedding/image.png
+++ b/examples/embedding/image.png
--- a/examples/embedding/word2vec-ex.ipynb
+++ b/examples/embedding/word2vec-ex.ipynb
--- a/sgyy.txt
+++ b/sgyy.txt