diff --git a/examples/xgenmm/quantize.sh b/examples/xgenmm/quantize.sh
index afc9b9041..d776c7519 100644
--- a/examples/xgenmm/quantize.sh
+++ b/examples/xgenmm/quantize.sh
@@ -5,6 +5,6 @@ quantize_method=Q4_K_M
 outname=${model_name}_${quantize_method}
 input_model_path=$gguf_dir/$model_name.gguf
 output_model_path=$gguf_dir/$outname.gguf
-echo $outname
-cd ../../
+echo $input_model_path
+echo $output_model_path
 ./llama-quantize $input_model_path $output_model_path $quantize_method
\ No newline at end of file
diff --git a/examples/xgenmm/run_cli.sh b/examples/xgenmm/run_cli.sh
index df10fa319..9ae1ac18e 100644
--- a/examples/xgenmm/run_cli.sh
+++ b/examples/xgenmm/run_cli.sh
@@ -35,12 +35,12 @@ make xgenmm-cli
 #     --seed 42 --ctx-size 4096 --predict 1024 \
 #     --temp 0 --verbose-prompt
 
-Q="What is the address of this resturant?"
+Q="What is the address of this restaurant?"
 # Q="Is this dine in or dine out receipt?"
 # Q="What is the total amount paid?"
 # Q="What is card holder's name?"
 # Q="What is the transaction date?"
-# Q="What is the phone number of this resturant?"
+# Q="What is the phone number of this restaurant?"
 # Q="Who is the attendant?"
 # Q="Who is the cashier?"
 # Q="Briefly describe this image."
@@ -53,7 +53,8 @@ echo $prompt
 # mmproj=$base_path/mmproj-model-f32.gguf
 
 base_path=/export/share/yutong/xgenmm/llamacpp_wd/siglip_kosmos_phi3_4k_instruct_bf16_patch128/gguf
-model=$base_path/phi3_mini_4k_instruct_f16.gguf
+# model=$base_path/phi3_mini_4k_instruct_f16.gguf
+model=$base_path/phi3_mini_4k_instruct_f16_Q4_K_M.gguf
 mmproj=$base_path/mmproj-model-f32.gguf
 
 ./xgenmm-cli --model $model\
diff --git a/xgenmm-cli b/xgenmm-cli
index f17b35a8e..dac512a93 100755
Binary files a/xgenmm-cli and b/xgenmm-cli differ