quantization script added

2024-10-03 22:08:36 +00:00 · 2024-10-03 22:08:36 +00:00 · e28cdd78b0
commit e28cdd78b0
parent 8c179f41fa
4 changed files with 12 additions and 2 deletions
--- a/examples/xgenmm/quantize.sh
+++ b/examples/xgenmm/quantize.sh
@ -0,0 +1,10 @@
+gguf_dir=/export/share/yutong/xgenmm/llamacpp_wd/siglip_kosmos_phi3_4k_instruct_bf16_patch128/gguf
+model_name=phi3_mini_4k_instruct_f16
+quantize_method=Q4_K_M
+
+outname=${model_name}_${quantize_method}
+input_model_path=$gguf_dir/$model_name.gguf
+output_model_path=$gguf_dir/$outname.gguf
+echo $outname
+cd ../../
+./llama-quantize $input_model_path $output_model_path $quantize_method
--- a/examples/xgenmm/run_cli.sh
+++ b/examples/xgenmm/run_cli.sh
@ -35,14 +35,14 @@ make xgenmm-cli
 #     --seed 42 --ctx-size 4096 --predict 1024 \
 #     --temp 0 --verbose-prompt

-# Q="What is the address of this resturant?"
+Q="What is the address of this resturant?"
 # Q="Is this dine in or dine out receipt?"
 # Q="What is the total amount paid?"
 # Q="What is card holder's name?"
 # Q="What is the transaction date?"
 # Q="What is the phone number of this resturant?"
 # Q="Who is the attendant?"
-Q="Who is the cashier?"
+# Q="Who is the cashier?"
 # Q="Briefly describe this image."
 prompt="<|system|>\nA chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.<|end|>\n<|user|>\n<image>\n $Q<|end|>\n<|assistant|>\n"
 echo $prompt
--- a/BIN
+++ b/BIN
--- a/BIN
+++ b/BIN