From 53dfbbf553e16264a41f71d23d166bc2f79e323b Mon Sep 17 00:00:00 2001 From: ningshanwutuobang Date: Tue, 20 Jun 2023 22:57:21 +0800 Subject: [PATCH] add example of PandaGPT --- convert-lora-to-ggml.py | 6 +- examples/embd_input/README.md | 36 +++++++++-- examples/embd_input/embd_input.py | 29 +++++++++ examples/embd_input/llava.py | 12 +--- examples/embd_input/panda_gpt.py | 100 ++++++++++++++++++++++++++++++ 5 files changed, 166 insertions(+), 17 deletions(-) create mode 100644 examples/embd_input/panda_gpt.py diff --git a/convert-lora-to-ggml.py b/convert-lora-to-ggml.py index 9090e8d6d..f43c836f5 100644 --- a/convert-lora-to-ggml.py +++ b/convert-lora-to-ggml.py @@ -113,6 +113,10 @@ with open(output_path, "wb") as fout: write_file_header(fout, params) for k, v in model.items(): + if k.endswith(".default.weight"): + k = k.replace(".default.weight", ".weight") + if k in ["llama_proj.weight", "llama_proj.bias"]: + continue if k.endswith("lora_A.weight"): if v.dtype != torch.float16 and v.dtype != torch.float32: v = v.float() @@ -120,7 +124,7 @@ with open(output_path, "wb") as fout: else: v = v.float() - t = v.numpy() + t = v.detach().numpy() tname = translate_tensor_name(k) print(f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB") write_tensor_header(fout, tname, t.shape, t.dtype) diff --git a/examples/embd_input/README.md b/examples/embd_input/README.md index 56db072cf..c180d541a 100644 --- a/examples/embd_input/README.md +++ b/examples/embd_input/README.md @@ -1,14 +1,17 @@ ### Examples for input embedding directly +## Requirement +build `libembd_input.so` +run the following comman in main dir (../../). +``` +make +``` + ## LLAVA example (llava.py) 1. obtian llava model (following https://github.com/haotian-liu/LLaVA/ , use https://huggingface.co/liuhaotian/LLaVA-13b-delta-v1-1/) -2. build `libembd_input.so` -``` -make -``` -3. convert it to ggml format -4. llava_projection.pth is [pytorch_model-00003-of-00003.bin](https://huggingface.co/liuhaotian/LLaVA-13b-delta-v1-1/blob/main/pytorch_model-00003-of-00003.bin) +2. convert it to ggml format +3. llava_projection.pth is [pytorch_model-00003-of-00003.bin](https://huggingface.co/liuhaotian/LLaVA-13b-delta-v1-1/blob/main/pytorch_model-00003-of-00003.bin) ``` import torch @@ -21,3 +24,24 @@ used_key = ["model.mm_projector.weight","model.mm_projector.bias"] torch.save({k: dic[k] for k in used_key}, pth_path) ``` +## PandaGPT example (panda_gpt.py) + +1. Obtian PandaGPT lora model. Rename the file to `adapter_model.bin`. Use [convert-lora-to-ggml.py](../../convert-lora-to-ggml.py) to convert it to ggml format. +The `adapter_config.json` is +``` +{ + "peft_type": "LORA", + "fan_in_fan_out": false, + "bias": null, + "modules_to_save": null, + "r": 32, + "lora_alpha": 32, + "lora_dropout": 0.1, + "target_modules": ["q_proj", "k_proj", "v_proj", "o_proj"] +} +``` +2. papare the `vicuna` v0 model. +3. obtain the [ImageBind](https://dl.fbaipublicfiles.com/imagebind/imagebind_huge.pth) model. +4. Clone the PandaGPT source. +5. check the path of PandaGPT source, ImageBind model, lora model and vicuna model in panda_gpt.py. + diff --git a/examples/embd_input/embd_input.py b/examples/embd_input/embd_input.py index db5cd0fdb..ce057a89d 100644 --- a/examples/embd_input/embd_input.py +++ b/examples/embd_input/embd_input.py @@ -33,6 +33,35 @@ class MyModel: s = libc.sampling(self.model) return s + def generate(self, end=""): + ret = b"" + end = end.encode() + for _ in range(500): + tmp = self.sampling() # .decode() + if (ret+tmp).endswith(end): + break + ret += tmp + return ret.decode() + + def stream_generate(self, end=""): + ret = b"" + end = end.encode() + head = b"" + for _ in range(500): + tmp = self.sampling() # .decode() + ret += tmp + try: + text = (head + tmp).decode() + print(text, end="") + head = b"" + except: + head += text + if ret.endswith(end): + break + print("") + return ret.decode() + + if __name__ == "__main__": model = MyModel(["main", "--model", "../llama.cpp/models/ggml-vic13b-q4_1.bin", "-c", "2048"]) # print(model) diff --git a/examples/embd_input/llava.py b/examples/embd_input/llava.py index 8489f7927..a1efaddf6 100644 --- a/examples/embd_input/llava.py +++ b/examples/embd_input/llava.py @@ -31,7 +31,7 @@ class Llava: self.model.eval_string("user: ") self.model.eval_string(question) self.model.eval_string("\nassistant: ") - return self.sampling() + return self.model.generate() def chat_with_image(self, image, question): with torch.no_grad(): @@ -49,16 +49,8 @@ class Llava: self.model.eval_token(32003-1) # im_end self.model.eval_string(question) self.model.eval_string("\nassistant: ") - return self.sampling() + return self.model.generate() - def sampling(self): - ret = b"" - for _ in range(500): - tmp = self.model.sampling() # .decode() - if tmp == b"": - break - ret += tmp - return ret.decode() if __name__=="__main__": # model form liuhaotian/LLaVA-13b-delta-v1-1 diff --git a/examples/embd_input/panda_gpt.py b/examples/embd_input/panda_gpt.py new file mode 100644 index 000000000..b1199b95d --- /dev/null +++ b/examples/embd_input/panda_gpt.py @@ -0,0 +1,100 @@ +import sys +import os +sys.path.insert(0, os.path.dirname(__file__)) +from embd_input import MyModel +import numpy as np +from torch import nn +import torch + +# use PandaGPT path +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "PandaGPT","code","model")) +from ImageBind.models import imagebind_model +from ImageBind import data + +imagebind_ckpt_path = "./models/panda_gpt/" +ModalityType = imagebind_model.ModalityType +max_tgt_len = 400 + +class PandaGPT: + def __init__(self, args): + self.visual_encoder,_ = imagebind_model.imagebind_huge(pretrained=True, store_path=imagebind_ckpt_path) + self.visual_encoder.eval() + self.llama_proj = nn.Linear(1024, 5120) # self.visual_hidden_size, 5120) + self.max_tgt_len = max_tgt_len + self.model = MyModel(["main", *args]) + self.generated_text = "" + self.device = "cpu" + + def load_projection(self, path): + state = torch.load(path, map_location="cpu") + self.llama_proj.load_state_dict({ + "weight": state["llama_proj.weight"], + "bias": state["llama_proj.bias"]}) + + def chat(self, question): + if self.generated_text == "": + self.model.eval_string("###") + self.model.eval_string(" Human: ") + self.model.eval_string(question) + self.model.eval_string("\n### Assistant:") + ret = self.model.stream_generate(end="###") + self.generated_text += ret + return ret + + def chat_with_image(self, inputs, question): + if self.generated_text == "": + self.model.eval_string("###") + self.model.eval_string(" Human: ") + embds = self.extract_multimoal_feature(inputs) + for i in embds: + self.model.eval_float(i.T) + self.model.eval_string(" " + question + "\n### Assistant:") + ret = self.model.stream_generate(end="###") + self.generated_text += ret + return ret + + def extract_multimoal_feature(self, inputs): + features = [] + for key in ["image", "audio", "video", "thermal"]: + if key + "_paths" in inputs: + embeds = self.encode_data(key, inputs[key+"_paths"]) + features.append(embeds) + return features + + def encode_data(self, data_type, data_paths): + + type_map = { + "image": ModalityType.VISION, + "audio": ModalityType.AUDIO, + "video": ModalityType.VISION, + "thermal": ModalityType.THERMAL, + } + load_map = { + "image": data.load_and_transform_vision_data, + "audio": data.load_and_transform_audio_data, + "video": data.load_and_transform_video_data, + "thermal": data.load_and_transform_thermal_data + } + + load_function = load_map[data_type] + key = type_map[data_type] + + inputs = {key: load_function(data_paths, self.device)} + with torch.no_grad(): + embeddings = self.visual_encoder(inputs) + embeds = embeddings[key] + embeds = self.llama_proj(embeds).cpu().numpy() + return embeds + + +if __name__=="__main__": + # model form liuhaotian/LLaVA-13b-delta-v1-1 + a = PandaGPT(["--model", "./models/ggml-vicuna-13b-v0-q4_1.bin", "-c", "2048", "--lora", "./models/panda_gpt/ggml-adapter-model.bin","--temp", "0"]) + # Extract from https://huggingface.co/liuhaotian/LLaVA-13b-delta-v1-1/blob/main/pytorch_model-00003-of-00003.bin. + # Also here can use pytorch_model-00003-of-00003.bin directly. + a.load_projection("./models/panda_gpt/adapter_model.bin") + a.chat_with_image( + {"image_paths": ["./media/llama1-logo.png"]}, + "what is the text in the picture? 'llama' or 'lambda'?") + a.chat("what is the color of it?") +