diff --git a/examples/llava/README.md b/examples/llava/README.md index 323c5fdd0..c1c030951 100644 --- a/examples/llava/README.md +++ b/examples/llava/README.md @@ -49,8 +49,12 @@ python ./convert.py ../llava-v1.5-7b Now both the LLaMA part and the image encoder is in the `llava-v1.5-7b` directory. +## LLaVA 1.6 + +- Use `llava-surgery-v2.py` + ## TODO -- [ ] Support non-CPU backend for the image encoding part. +- [x] Support non-CPU backend for the image encoding part. - [ ] Support different sampling methods. - [ ] Support more model variants. diff --git a/examples/llava/convert-image-encoder-to-gguf.py b/examples/llava/convert-image-encoder-to-gguf.py index 61a147037..3988da70c 100644 --- a/examples/llava/convert-image-encoder-to-gguf.py +++ b/examples/llava/convert-image-encoder-to-gguf.py @@ -78,9 +78,9 @@ ap.add_argument("--text-only", action="store_true", required=False, help="Save a text-only model. It can't be used to encode images") ap.add_argument("--vision-only", action="store_true", required=False, help="Save a vision-only model. It can't be used to encode texts") -ap.add_argument("--clip_model_is_vision", action="store_true", required=False, +ap.add_argument("--clip-model-is-vision", action="store_true", required=False, help="The clip model is a pure vision model (ShareGPT4V vision extract for example)") -ap.add_argument("--clip_model_is_openclip", action="store_true", required=False, +ap.add_argument("--clip-model-is-openclip", action="store_true", required=False, help="The clip model is from openclip (for ViT-SO400M type))") ap.add_argument("--llava-projector", help="Path to llava.projector file. If specified, save an image encoder for LLaVA models.") ap.add_argument("--projector-type", help="Type of projector. Possible values: mlp, ldp", choices=["mlp", "ldp"], default="mlp") @@ -89,8 +89,8 @@ ap.add_argument("-o", "--output-dir", help="Directory to save GGUF files. Defaul # Example --image_mean 0.5 0.5 0.5 --image_std 0.5 0.5 0.5 default_image_mean = [0.48145466, 0.4578275, 0.40821073] default_image_std = [0.26862954, 0.26130258, 0.27577711] -ap.add_argument('--image_mean', type=float, nargs='+', help='Mean of the images for normalization (overrides processor) ', default=None) -ap.add_argument('--image_std', type=float, nargs='+', help='Standard deviation of the images for normalization (overrides processor)', default=None) +ap.add_argument('--image-mean', type=float, nargs='+', help='Mean of the images for normalization (overrides processor) ', default=None) +ap.add_argument('--image-std', type=float, nargs='+', help='Standard deviation of the images for normalization (overrides processor)', default=None) # with proper args = ap.parse_args()