diff --git a/awq-py/README.md b/awq-py/README.md index 9878ad26c..c5706b096 100644 --- a/awq-py/README.md +++ b/awq-py/README.md @@ -117,4 +117,4 @@ We use three types of llamacpp quantization methods to work with our version, in |MPT 7B | bits/weight | 16.0 | 4.5 | 5.0 | 2.6 | |AWQ-MPT 7B| perplexity | 8.4944 | 8.7053 | 8.6750 | 10.2873| |AWQ-MPT 7B| file size | 13.7G | 3.9G | 4.3G | 2.8G | -|AWQ-MPT 7B| bits/weight | 16.0 | 4.5 | 5.0 | 2.6 | \ No newline at end of file +|AWQ-MPT 7B| bits/weight | 16.0 | 4.5 | 5.0 | 2.6 | diff --git a/awq-py/awq/apply_awq.py b/awq-py/awq/apply_awq.py index 5f2ca9d91..11132c5d2 100644 --- a/awq-py/awq/apply_awq.py +++ b/awq-py/awq/apply_awq.py @@ -189,10 +189,7 @@ def apply_scale(module, scales_list, input_feat_dict=None): if isinstance(prev_op, nn.Linear): assert len(layers) == 1 scale_fc_fc(prev_op, layers[0], scales) - elif ( - isinstance(prev_op, (nn.LayerNorm, LlamaRMSNorm)) - or "rmsnorm" in str(prev_op.__class__).lower() - ): + elif isinstance(prev_op, (nn.LayerNorm, LlamaRMSNorm)) or "rmsnorm" in str(prev_op.__class__).lower(): scale_ln_fcs(prev_op, layers, scales) elif isinstance(prev_op, (nn.GELU, BloomGelu, GELUActivation)): new_module = ScaledActivation(prev_op, scales) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 200608722..1f00298ae 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -1041,7 +1041,7 @@ dir_model = args.model if args.awq_path: sys.path.insert(1, str(Path(__file__).parent / 'awq-py')) - from awq.apply_awq import add_scale_weights + from awq.apply_awq import add_scale_weights tmp_model_path = args.model / "weighted_model" dir_model = tmp_model_path if tmp_model_path.is_dir():