From bd5e7409f32dfb0c485ff508957052459fec7a4a Mon Sep 17 00:00:00 2001 From: D3faIt <8147434+D3faIt@users.noreply.github.com> Date: Sun, 30 Apr 2023 06:43:46 +0200 Subject: [PATCH] Fixed incorrect example of quantize in README.md README.md showed an example of quantize that doesn't work anymore --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f55c576ab..f3b42241b 100644 --- a/README.md +++ b/README.md @@ -268,8 +268,10 @@ python3 -m pip install -r requirements.txt # convert the 7B model to ggml FP16 format python3 convert.py models/7B/ -# quantize the model to 4-bits (using q4_0 method) -./quantize ./models/7B/ggml-model-f16.bin ./models/7B/ggml-model-q4_0.bin q4_0 +# quantize the model to 4-bits +# 2 - q4_0 +# 3 - q4_1 +./quantize ./models/7B/ggml-model-f16.bin ./models/7B/ggml-model-q4_0.bin 3 # run the inference ./main -m ./models/7B/ggml-model-q4_0.bin -n 128