add quantize method

2024-10-04 22:38:39 +00:00 · 2024-10-04 22:38:39 +00:00 · 56e149d627
commit 56e149d627
parent e28cdd78b0
3 changed files with 6 additions and 5 deletions
--- a/examples/xgenmm/quantize.sh
+++ b/examples/xgenmm/quantize.sh
@ -5,6 +5,6 @@ quantize_method=Q4_K_M
 outname=${model_name}_${quantize_method}
 input_model_path=$gguf_dir/$model_name.gguf
 output_model_path=$gguf_dir/$outname.gguf
-echo $outname
-cd ../../
+echo $input_model_path
+echo $output_model_path
 ./llama-quantize $input_model_path $output_model_path $quantize_method
--- a/examples/xgenmm/run_cli.sh
+++ b/examples/xgenmm/run_cli.sh
@ -35,12 +35,12 @@ make xgenmm-cli
 #     --seed 42 --ctx-size 4096 --predict 1024 \
 #     --temp 0 --verbose-prompt

-Q="What is the address of this resturant?"
+Q="What is the address of this restaurant?"
 # Q="Is this dine in or dine out receipt?"
 # Q="What is the total amount paid?"
 # Q="What is card holder's name?"
 # Q="What is the transaction date?"
-# Q="What is the phone number of this resturant?"
+# Q="What is the phone number of this restaurant?"
 # Q="Who is the attendant?"
 # Q="Who is the cashier?"
 # Q="Briefly describe this image."
@ -53,7 +53,8 @@ echo $prompt
 # mmproj=$base_path/mmproj-model-f32.gguf

 base_path=/export/share/yutong/xgenmm/llamacpp_wd/siglip_kosmos_phi3_4k_instruct_bf16_patch128/gguf
-model=$base_path/phi3_mini_4k_instruct_f16.gguf
+# model=$base_path/phi3_mini_4k_instruct_f16.gguf
+model=$base_path/phi3_mini_4k_instruct_f16_Q4_K_M.gguf
 mmproj=$base_path/mmproj-model-f32.gguf

 ./xgenmm-cli --model $model\
--- a/BIN
+++ b/BIN