From 6f2014a029d7bd67586eda7f019c0a6903dac687 Mon Sep 17 00:00:00 2001
From: Eve <139727413+netrunnereve@users.noreply.github.com>
Date: Wed, 7 Feb 2024 02:24:26 +0000
Subject: [PATCH] recommend Q4_K_M quantization method

---
 README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index aa1f8f6bc..1d13ceea3 100644
--- a/README.md
+++ b/README.md
@@ -680,18 +680,18 @@ python3 convert.py models/mymodel/
 # [Optional] for models using BPE tokenizers
 python convert.py models/mymodel/ --vocabtype bpe
 
-# quantize the model to 4-bits (using q4_0 method)
-./quantize ./models/mymodel/ggml-model-f16.gguf ./models/mymodel/ggml-model-q4_0.gguf q4_0
+# quantize the model to 4-bits (using Q4_K_M method)
+./quantize ./models/mymodel/ggml-model-f16.gguf ./models/mymodel/ggml-model-Q4_K_M.gguf Q4_K_M
 
 # update the gguf filetype to current version if older version is now unsupported
-./quantize ./models/mymodel/ggml-model-q4_0.gguf ./models/mymodel/ggml-model-q4_0-v2.gguf COPY
+./quantize ./models/mymodel/ggml-model-Q4_K_M.gguf ./models/mymodel/ggml-model-Q4_K_M-v2.gguf COPY
 ```
 
 ### Run the quantized model
 
 ```bash
 # start inference on a gguf model
-./main -m ./models/mymodel/ggml-model-q4_0.gguf -n 128
+./main -m ./models/mymodel/ggml-model-Q4_K_M.gguf -n 128
 ```
 
 When running the larger models, make sure you have enough disk space to store all the intermediate files.