diff --git a/awq-py/README.md b/awq-py/README.md index 6ede7b8b9..bb7d3f657 100644 --- a/awq-py/README.md +++ b/awq-py/README.md @@ -57,7 +57,7 @@ python convert-hf-to-gguf.py models/mpt-7b/ --awq-path awq_cache/llama-7b-w4-g12 # For llama and llama2, and mistral models. ./build/bin/main -m models/llama_7b_q4_0.gguf -n 128 --prompt "Once upon a time" # For mpt models. -./build/bin/main -m models/mpt_7b_q4_0.gguf -awq -n 128 --prompt "Once upon a time" +./build/bin/main -m models/mpt_7b_q4_0.gguf --use-awq -n 128 --prompt "Once upon a time" ``` ## Benchmark @@ -66,7 +66,7 @@ The perplexity measurements in table above are done against the `wikitext2` test # For llama and llama2, and mistral models. ./perplexity -m models/llama_7b_q4_0.gguf -f datasets/wikitext-2-raw/wiki.test.raw # For mpt models. -./perplexity -m models/mpt_7b_q4_0.gguf -awq -f datasets/wikitext-2-raw/wiki.test.raw +./perplexity -m models/mpt_7b_q4_0.gguf --use-awq -f datasets/wikitext-2-raw/wiki.test.raw ``` ## Results