diff --git a/examples/xgenmm/quantize.sh b/examples/xgenmm/quantize.sh index afc9b9041..d776c7519 100644 --- a/examples/xgenmm/quantize.sh +++ b/examples/xgenmm/quantize.sh @@ -5,6 +5,6 @@ quantize_method=Q4_K_M outname=${model_name}_${quantize_method} input_model_path=$gguf_dir/$model_name.gguf output_model_path=$gguf_dir/$outname.gguf -echo $outname -cd ../../ +echo $input_model_path +echo $output_model_path ./llama-quantize $input_model_path $output_model_path $quantize_method \ No newline at end of file diff --git a/examples/xgenmm/run_cli.sh b/examples/xgenmm/run_cli.sh index df10fa319..9ae1ac18e 100644 --- a/examples/xgenmm/run_cli.sh +++ b/examples/xgenmm/run_cli.sh @@ -35,12 +35,12 @@ make xgenmm-cli # --seed 42 --ctx-size 4096 --predict 1024 \ # --temp 0 --verbose-prompt -Q="What is the address of this resturant?" +Q="What is the address of this restaurant?" # Q="Is this dine in or dine out receipt?" # Q="What is the total amount paid?" # Q="What is card holder's name?" # Q="What is the transaction date?" -# Q="What is the phone number of this resturant?" +# Q="What is the phone number of this restaurant?" # Q="Who is the attendant?" # Q="Who is the cashier?" # Q="Briefly describe this image." @@ -53,7 +53,8 @@ echo $prompt # mmproj=$base_path/mmproj-model-f32.gguf base_path=/export/share/yutong/xgenmm/llamacpp_wd/siglip_kosmos_phi3_4k_instruct_bf16_patch128/gguf -model=$base_path/phi3_mini_4k_instruct_f16.gguf +# model=$base_path/phi3_mini_4k_instruct_f16.gguf +model=$base_path/phi3_mini_4k_instruct_f16_Q4_K_M.gguf mmproj=$base_path/mmproj-model-f32.gguf ./xgenmm-cli --model $model\ diff --git a/xgenmm-cli b/xgenmm-cli index f17b35a8e..dac512a93 100755 Binary files a/xgenmm-cli and b/xgenmm-cli differ