diff --git a/awqpy/README.md b/awqpy/README.md
index 8f70f3d3b..11d25d948 100644
--- a/awqpy/README.md
+++ b/awqpy/README.md
@@ -56,7 +56,7 @@ The perplexity measurements in table above are done against the `wikitext2` test
 
 ## Results
 Results are run on OpenBLAS (CPU) and CuBLAS (GPU) for fair comparison
-We use three types of llamacpp quantization methods to work with our version, including q4, q4_1, and q2_k
+We use three types of llamacpp quantization methods to work with our version, including q4_0, q4_1, and q2_k
 
 ### Llama 7B (Build with OpenBLAS)
 
diff --git a/common/common.cpp b/common/common.cpp
index dadf95f5c..def94b4e1 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -150,6 +150,10 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
             }
             params.seed = std::stoul(argv[i]);
         } else if (arg == "-awq" || arg == "--use-awq") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
             params.use_awq = true;
         } else if (arg == "-t" || arg == "--threads") {
             if (++i >= argc) {
@@ -806,7 +810,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
     printf("                        (can be specified more than once for multiple prompts).\n");
     printf("  --color               colorise output to distinguish prompt and user input from generations\n");
     printf("  -s SEED, --seed SEED  RNG seed (default: -1, use random seed for < 0)\n");
-    printf("  -awq SEED, -use-awq   Using AWQ quantization model in inferences\n");
+    printf("  -awq, --use-awq       Using AWQ quantization model in inferences\n");
     printf("  -t N, --threads N     number of threads to use during generation (default: %d)\n", params.n_threads);
     printf("  -tb N, --threads-batch N\n");
     printf("                        number of threads to use during batch and prompt processing (default: same as --threads)\n");
diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index 3bf8a9f13..c8d24b844 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -1029,7 +1029,6 @@ print(f"Loading model: {dir_model.name}")
 
 hparams = Model.load_hparams(dir_model)
 
-
 with torch.inference_mode():
     model_class = Model.from_model_architecture(hparams["architectures"][0])
     model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian)
diff --git a/convert.py b/convert.py
index f1cdf762a..ea1392338 100755
--- a/convert.py
+++ b/convert.py
@@ -1163,7 +1163,7 @@ def main(args_in: list[str] | None = None) -> None:
             print("Saving new weighted model ...")
             add_scale_weights(str(args.model), str(args.awq_path), str(tmp_model_path))
             print(f"Saved weighted model at {tmp_model_path}.") 
-            args.model = tmp_model_path
+        args.model = tmp_model_path
     
     if args.dump_single:
         model_plus = lazy_load_file(args.model)
diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py
index 651ff0031..26b74dd06 100644
--- a/gguf-py/gguf/tensor_mapping.py
+++ b/gguf-py/gguf/tensor_mapping.py
@@ -164,7 +164,7 @@ class TensorNameMap:
             "transformer.h.{bid}.mlp.w1",                             # qwen
         ),
         
-        # Awq-activation gate
+        # AWQ-activation gate
         MODEL_TENSOR.FFN_ACT: (
             "transformer.blocks.{bid}.ffn.act",  # mpt
         ),
diff --git a/llama.cpp b/llama.cpp
index 40de8ae77..9ef48bfa7 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -3937,20 +3937,17 @@ static struct ggml_tensor * llm_build_ffn(
 }
 
 static struct ggml_tensor * llm_build_ffn_mpt_awq(
-    struct ggml_context *ctx,
-    struct ggml_tensor *cur,
-    struct ggml_tensor *up,
-    struct ggml_tensor *up_b,
-    struct ggml_tensor *gate,
-    struct ggml_tensor *gate_b,
-    struct ggml_tensor *down,
-    struct ggml_tensor *down_b,
-    struct ggml_tensor *act_scales,
-    llm_ffn_op_type type_op,
-    llm_ffn_gate_type type_gate,
-    const llm_build_cb &cb,
-    int il)
-{
+        struct ggml_context * ctx,
+         struct ggml_tensor * cur,
+         struct ggml_tensor * up,
+         struct ggml_tensor * up_b,
+         struct ggml_tensor * down,
+         struct ggml_tensor * down_b,
+         struct ggml_tensor * act_scales,
+            llm_ffn_op_type   type_op,
+          llm_ffn_gate_type   type_gate,
+         const llm_build_cb & cb,
+                        int   il) {
     struct ggml_tensor * tmp = ggml_mul_mat(ctx, up, cur);
     cb(tmp, "ffn_up", il);