From 487ac226b45a9e6eeffa7e1ffa86a8ccb30e4a94 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Wed, 17 May 2023 17:58:21 +0800 Subject: [PATCH] need to set the unshuffle before loading the model --- gpttype_adapter.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index d9423ecc4..456b8741d 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -377,6 +377,9 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in } else if(file_format==FileFormat::NEOX_1 || file_format==FileFormat::NEOX_2 || file_format==FileFormat::NEOX_3 || file_format==FileFormat::NEOX_4 || file_format==FileFormat::NEOX_5) { + //newer format has bit unshuffling + SetQuantsUnshuffled(file_format==FileFormat::NEOX_4 || file_format==FileFormat::NEOX_5); + ModelLoadResult res = gpt_neox_model_load(params.model, neox_ctx, vocab, file_format); if(res==ModelLoadResult::FAIL) { @@ -389,9 +392,6 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in return res; } - //newer format has bit unshuffling - SetQuantsUnshuffled(file_format==FileFormat::NEOX_4 || file_format==FileFormat::NEOX_5); - // determine the required inference memory per token: gpt_neox_eval(neox_ctx, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);