Merge branch 'master' of https://github.com/namtranase/llama.cpp

2023-12-25 17:13:50 +07:00 · 2023-12-25 17:13:50 +07:00 · 44f4ce2272
commit 44f4ce2272
parent 13f60c417d e9ad5fe040
3 changed files with 3 additions and 6 deletions
--- a/awq-py/README.md
+++ b/awq-py/README.md
@ -117,4 +117,4 @@ We use three types of llamacpp quantization methods to work with our version, in
 |MPT 7B    | bits/weight  |   16.0 |    4.5 |    5.0 |    2.6  |
 |AWQ-MPT 7B| perplexity   | 8.4944 | 8.7053 |  8.6750 | 10.2873|
 |AWQ-MPT 7B| file size    |  13.7G  |   3.9G |   4.3G |   2.8G  |
-|AWQ-MPT 7B| bits/weight  |   16.0 |    4.5 |    5.0 |    2.6  |
+|AWQ-MPT 7B| bits/weight  |   16.0 |    4.5 |    5.0 |    2.6  |
--- a/awq-py/awq/apply_awq.py
+++ b/awq-py/awq/apply_awq.py
@ -189,10 +189,7 @@ def apply_scale(module, scales_list, input_feat_dict=None):
        if isinstance(prev_op, nn.Linear):
            assert len(layers) == 1
            scale_fc_fc(prev_op, layers[0], scales)
-        elif (
+        elif isinstance(prev_op, (nn.LayerNorm, LlamaRMSNorm)) or "rmsnorm" in str(prev_op.__class__).lower():
            isinstance(prev_op, (nn.LayerNorm, LlamaRMSNorm))
            or "rmsnorm" in str(prev_op.__class__).lower()
        ):
            scale_ln_fcs(prev_op, layers, scales)
        elif isinstance(prev_op, (nn.GELU, BloomGelu, GELUActivation)):
            new_module = ScaledActivation(prev_op, scales)
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@ -1041,7 +1041,7 @@ dir_model = args.model
 if args.awq_path:
    sys.path.insert(1, str(Path(__file__).parent / 'awq-py'))
-    from awq.apply_awq import add_scale_weights    
+    from awq.apply_awq import add_scale_weights
    tmp_model_path = args.model / "weighted_model"
    dir_model = tmp_model_path
    if tmp_model_path.is_dir():