diff --git a/examples/xgenmm/convert.sh b/examples/xgenmm/convert.sh index a2829a5d3..c8fe8ba50 100644 --- a/examples/xgenmm/convert.sh +++ b/examples/xgenmm/convert.sh @@ -1,16 +1,48 @@ source /export/share/yutong/miniconda3/bin/activate conda activate xgenmm-flamingo -# which python -# # step 1: surgery -# python xgenmm_surgery.py +which python -# # step 2: convert vit + projector to gguf +# ======= siglip_kosmos_phi3_4k_instruct ======= -# python xgenmm_convert_image_encoder_to_gguf.py \ -# --surgery_dir /export/share/yutong/xgenmm/llamacpp_wd \ -# --output_dirname gguf_test \ -# --version siglip_kosmos_phi3_4k_instruct \ -# --use_f32 +# # # step 1: surgery +# # python xgenmm_surgery.py + +# # # step 2: convert vit + projector to gguf + +# # python xgenmm_convert_image_encoder_to_gguf.py \ +# # --surgery_dir /export/share/yutong/xgenmm/llamacpp_wd \ +# # --output_dirname gguf \ +# # --version siglip_kosmos_phi3_4k_instruct \ +# # --use_f32 + +# # step 3: convert llm to gguf +# # https://github.com/ggerganov/llama.cpp/discussions/7927 +# cd ../../ +# # HF_TOKEN= +# # downloads the tokenizer models of the specified models from Huggingface; generates the get_vocab_base_pre() function for convert_hf_to_gguf.py +# # python convert_hf_to_gguf_update.py $HF_TOKEN + + +# LLM_PATH=/export/share/yutong/xgenmm/llamacpp_wd/siglip_kosmos_phi3_4k_instruct/llm +# outtype=f32 +# LLM_OUTPUT_FILE=/export/share/yutong/xgenmm/llamacpp_wd/siglip_kosmos_phi3_4k_instruct/gguf/phi3_mini_4k_instruct_$outtype.gguf +# echo $LLM_OUTPUT_FILE +# python convert_hf_to_gguf.py $LLM_PATH --outfile $LLM_OUTPUT_FILE --outtype $outtype + + +# ======= siglip_kosmos_phi3_4k_instruct_bf16_patch128 ======= + +CKPT_PATH=/export/share/manli_shu/models/open-flamingo-dev/fixed_offset-bf16-maxlen2048-newsamplerv1-anyres_patch128-kosmos_non_instruct-phi3_4k_instruct_nq128_pre_V3_6-SFT_v3.6.1.v2-mantis-mix-v0.3.5-continue-8x16-ckpt0/checkpoint_0.pt +VERSION=siglip_kosmos_phi3_4k_instruct_bf16_patch128 +SAVE_PATH=/export/share/yutong/xgenmm/llamacpp_wd +# step 1: surgery +python xgenmm_surgery.py --ckpt_pth $CKPT_PATH --save_pth $SAVE_PATH --version $VERSION +# step 2: convert vit + projector to gguf +python xgenmm_convert_image_encoder_to_gguf.py \ + --surgery_dir $SAVE_PATH\ + --output_dirname gguf \ + --version $VERSION \ + --use_f32 # step 3: convert llm to gguf # https://github.com/ggerganov/llama.cpp/discussions/7927 @@ -19,9 +51,8 @@ cd ../../ # downloads the tokenizer models of the specified models from Huggingface; generates the get_vocab_base_pre() function for convert_hf_to_gguf.py # python convert_hf_to_gguf_update.py $HF_TOKEN - -LLM_PATH=/export/share/yutong/xgenmm/llamacpp_wd/siglip_kosmos_phi3_4k_instruct/llm -outtype=f32 -LLM_OUTPUT_FILE=/export/share/yutong/xgenmm/llamacpp_wd/siglip_kosmos_phi3_4k_instruct/gguf/phi3_mini_4k_instruct_$outtype.gguf -echo $LLM_OUTPUT_FILE -python convert_hf_to_gguf.py $LLM_PATH --outfile $LLM_OUTPUT_FILE --outtype $outtype \ No newline at end of file +# go to llm folder and nano config.json change vocab_size to 32064 +LLM_PATH=$SAVE_PATH/$VERSION/llm +OUTTYPE=f16 +LLM_OUTPUT_FILE=$SAVE_PATH/$VERSION/gguf/phi3_mini_4k_instruct_$OUTTYPE.gguf +python convert_hf_to_gguf.py $LLM_PATH --outfile $LLM_OUTPUT_FILE --outtype $OUTTYPE \ No newline at end of file diff --git a/examples/xgenmm/run_cli.sh b/examples/xgenmm/run_cli.sh index dcbee799c..5da2bd376 100644 --- a/examples/xgenmm/run_cli.sh +++ b/examples/xgenmm/run_cli.sh @@ -41,17 +41,24 @@ make xgenmm-cli # Q="What is card holder's name?" # Q="What is the transaction date?" # Q="What is the phone number of this resturant?" -Q="Who is the attendant?" -# Q="Who is the cashier?" +# Q="Who is the attendant?" +Q="Who is the cashier?" # Q="Briefly describe this image." prompt="<|system|>\nA chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.<|end|>\n<|user|>\n\n $Q<|end|>\n<|assistant|>\n" echo $prompt -model=/export/share/yutong/xgenmm/llamacpp_wd/siglip_kosmos_phi3_4k_instruct/gguf/phi3_mini_4k_instruct_f32.gguf -# model=/export/share/yutong/xgenmm/llamacpp_wd/siglip_kosmos_phi3_4k_instruct/gguf/phi3_mini_4k_instruct_f16.gguf +# base_path=/export/share/yutong/xgenmm/llamacpp_wd/siglip_kosmos_phi3_4k_instruct/gguf +# # model=$base_path/phi3_mini_4k_instruct_f32.gguf +# model=$base_path/phi3_mini_4k_instruct_f16.gguf +# mmproj=$base_path/mmproj-model-f32.gguf + +base_path=/export/share/yutong/xgenmm/llamacpp_wd/siglip_kosmos_phi3_4k_instruct_bf16_patch128/gguf +model=$base_path/phi3_mini_4k_instruct_f16.gguf +mmproj=$base_path/mmproj-model-f32.gguf + ./xgenmm-cli --model $model\ - --mmproj /export/share/yutong/xgenmm/llamacpp_wd/siglip_kosmos_phi3_4k_instruct/gguf_test/mmproj-model-f32.gguf \ + --mmproj $mmproj \ --image /export/home/llama.cpp/examples/xgenmm/imgs/receipt.jpg\ --prompt "$prompt" \ --seed 42 --ctx-size 4096 --predict 1024 \ - --temp 0.8 --verbose-prompt --color --ubatch-size 1280 \ No newline at end of file + --temp 0.0 --verbose-prompt --color --ubatch-size 1280 \ No newline at end of file diff --git a/xgenmm-cli b/xgenmm-cli index 2d1a1e430..ef1407df1 100755 Binary files a/xgenmm-cli and b/xgenmm-cli differ