From 48909ed2a7afa48d9dbef3be8361af950f2b403f Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Mon, 8 Apr 2024 19:01:44 +0200 Subject: [PATCH] model: dbrx convert permute experts directly torch, log shape --- convert-hf-to-gguf.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 7e4dfd609..b62d0747a 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -1531,15 +1531,11 @@ class DbrxModel(Model): for exp_tensor_name in exp_tensor_names.keys(): if name.find(exp_tensor_name) != -1 and name.find(".weight") == -1: experts = True - expert_permute = exp_tensor_names[exp_tensor_name] + data_torch = data_torch.view(n_expert, n_ff, n_embd).permute(*exp_tensor_names[exp_tensor_name]) break old_dtype = data_torch.dtype - # View experts tensors as 3D - if experts: - data_torch = data_torch.view(n_expert, n_ff, n_embd) - # convert any unsupported data types to float32 if data_torch.dtype not in (torch.float16, torch.float32): data_torch = data_torch.to(torch.float32) @@ -1560,11 +1556,6 @@ class DbrxModel(Model): n_dims = len(data.shape) data_dtype = data.dtype - # Transpose experts to the expected llama.cpp format - if experts: - data = data.transpose(expert_permute) - n_dims = len(data.shape) - # if f32 desired, convert any float16 to float32 if self.ftype == 0 and data_dtype == np.float16: data = data.astype(np.float32) @@ -1573,7 +1564,7 @@ class DbrxModel(Model): if self.ftype == 1 and data_dtype == np.float32 and n_dims > 1: data = data.astype(np.float16) - print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") + print(f"{new_name}, n_dims = {n_dims}, shape = {data.shape}, {old_dtype} --> {data.dtype}") self.gguf_writer.add_tensor(new_name, data)