From 56e149d627e95fbd227580c2b9a6468b35e16623 Mon Sep 17 00:00:00 2001 From: Yutong Dai Date: Fri, 4 Oct 2024 22:38:39 +0000 Subject: [PATCH] add quantize method --- examples/xgenmm/quantize.sh | 4 ++-- examples/xgenmm/run_cli.sh | 7 ++++--- xgenmm-cli | Bin 48440544 -> 48440544 bytes 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/examples/xgenmm/quantize.sh b/examples/xgenmm/quantize.sh index afc9b9041..d776c7519 100644 --- a/examples/xgenmm/quantize.sh +++ b/examples/xgenmm/quantize.sh @@ -5,6 +5,6 @@ quantize_method=Q4_K_M outname=${model_name}_${quantize_method} input_model_path=$gguf_dir/$model_name.gguf output_model_path=$gguf_dir/$outname.gguf -echo $outname -cd ../../ +echo $input_model_path +echo $output_model_path ./llama-quantize $input_model_path $output_model_path $quantize_method \ No newline at end of file diff --git a/examples/xgenmm/run_cli.sh b/examples/xgenmm/run_cli.sh index df10fa319..9ae1ac18e 100644 --- a/examples/xgenmm/run_cli.sh +++ b/examples/xgenmm/run_cli.sh @@ -35,12 +35,12 @@ make xgenmm-cli # --seed 42 --ctx-size 4096 --predict 1024 \ # --temp 0 --verbose-prompt -Q="What is the address of this resturant?" +Q="What is the address of this restaurant?" # Q="Is this dine in or dine out receipt?" # Q="What is the total amount paid?" # Q="What is card holder's name?" # Q="What is the transaction date?" -# Q="What is the phone number of this resturant?" +# Q="What is the phone number of this restaurant?" # Q="Who is the attendant?" # Q="Who is the cashier?" # Q="Briefly describe this image." @@ -53,7 +53,8 @@ echo $prompt # mmproj=$base_path/mmproj-model-f32.gguf base_path=/export/share/yutong/xgenmm/llamacpp_wd/siglip_kosmos_phi3_4k_instruct_bf16_patch128/gguf -model=$base_path/phi3_mini_4k_instruct_f16.gguf +# model=$base_path/phi3_mini_4k_instruct_f16.gguf +model=$base_path/phi3_mini_4k_instruct_f16_Q4_K_M.gguf mmproj=$base_path/mmproj-model-f32.gguf ./xgenmm-cli --model $model\ diff --git a/xgenmm-cli b/xgenmm-cli index f17b35a8e3091380b562b3c6fe1032f008a93cfc..dac512a93e9672fbc0a6d92fcfa3bbaaad551bcf 100755 GIT binary patch delta 3493 zcmWmE<9iSa07mg=TefZ6SjKWK+gP^ERm--qti@$6+pg`}M%VY859jy1f597jb3w*f zii!$KSL5LG51q61xU#p!?C_AfxyD`{cBff*Oi)BjPyhvzJ08WS1eA~xQDRC$Nhuj6 zrxcWuQc-G3Lun};rKb#(kup(c%0gKw8)c^)l#_B%ZpuS>DIevh0#uL+QDG_)9TQY^ z@Rmfq%LMld3#%9$Q7N)06{F%*BDzv!$+Xo1@lM%0*^P$)H}X4IToP)lk>t*H&QrFPVwI#5UI zM4hP%b)|09oqAAD>P2DHoBB{+>PP))01c!;G?<3aP#Q+VX#|a=Q8b#y&{!Hr<7om- zq)9ZHrqEQHM$>5qh0{!$MYAb_=FnW4NAqa`Eu=-Xn3m8|T1Lxh1+Ao2w3^n?T3Scz zX#;JfO|+S|&{o<;+bNQE&`#P#yJ-*YrG2!Y4$wh5L{W5@j?hs$M#t#{Mbk+-MW^Wu zouzYho-WWuxx<=RO2Hm7vberzbUAjm2=>a{YNA#GU&{KLw&*=rdq*wHs z-q2fmNAD??;^-s#m_DIT=`;GAzMwDZEBczgp>OFs`ksEEAL%FhnSP;P={NeFKF}ZZ zC;df#(?9et{Wm`}E?_Z9pae-gi7yExp(K*Tl0=e9GD$8eB&DR1)RIQhN;*j|86=}* zlFX7tvPw3|E;%Hp4KYDz7sEg@1z>PkJSFAb!jG?K>BL_(#hG?V7iLRv~IX)SG}t+bQ& z(m^^(C+RF*q^oq3?$SegN-qhM-qJ_GDW7!G?^|lBwS|7ESW74GDqghJee;GWT7mQ#j-?}$}(9lD`cgtlGUN^KwBh$|bohS0qNR$~CzzH{_<=lG}1e?#ey6FAwCQJd(%qM4rksc`h&H zrM!~Y@D!<9^ z@KBdOA#q5#iY2DkdjhLN=q3jE9IoT zRFH}iES03PRFSGuO{z-`sVTLjwuDF>sVnuQzBG`A(nuOh6A6{3(oC953u!5>q_wn> zw$e`8O9$yFouspLk*?BBx=Rn~DZM02dP^VaEB&Ou43L2`NCwLg87jkMxQvjIGD=3v z7#S<$WV}p}i84th%M_U^(`35LkZ_qPvt+hJ$Q+p~^JKm(kcF~H7RwS@D$8WKtdNzm zN>WV=Ml4%sQYWVh^*y|PdC%KLD%a$?+>o1cOK!^@xhwbNzC4hJ@<<-b6L~7n d=7RJw z6cG`WrsI%_2X{6Z{UQ4JnUB$zFU46D^rG&JsGzW@pa2RacN~gK@hCnepoEl&5>paN zO35fWrJ$6Qic(V=N=xY|J!PPbl!-D^7RpN5C_Ck#oRo`lQy$7o`6xdXpn_B=GAgL> zpe+f?_XsW*(z|qUSV(wbDndo6SY$|e@zj+Aa)pJ)7N-&vOeLulm8LROmda6i3ZV*A zkt$JTszOz%8dawnRFi5^ZK^|csUFp*2Go!mQ7AR0Ce)OgQFCfREvXf?rZ&`;+EIJz zKpm+Qb*3)VmAX-P>OnoJ7xkt-)R+2Ee;PmoX%G#jAvBbR(Qq0;BWV8=X&Ei26||C8(P~;lYiS*= zrwz1`HqmC(?OC97qP ztd(`LUN*=^*(94~i)@u`vR%SuhwPMHvRn4ZUfC!6<$xTNLlPl}<%k@WV{%+hNTi&U zQ*v6)$XPij=jDQ2luL41u1J(zm1}ZcZpcl!CAZ~{+?9KBUmnOqc_feJi9D5O@?2g> zw7is8@><@=TX`oj5-acJ6Zup=$Y=7od?8=TSMs%dlyBr)`A)uWV?jR4%sQYWVh^*y|PdC%Ko1cOK!^@xhwbNzC4hJ@<<-b6L~7n d