* gguf-py: remove redundant logs

* llama: remove the init_mapping_prefetch custom parameter
2024-03-26 08:32:30 +00:00 · 2024-03-26 08:32:30 +00:00 · 458c1d16b0
commit 458c1d16b0
parent c14d4e8723
2 changed files with 1 additions and 4 deletions
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@ -212,7 +212,6 @@ class Model(ABC):
        try:
            return cls._model_classes[arch]
        except KeyError:
            print(f"{cls._model_classes}")
            raise NotImplementedError(f'Architecture {arch!r} not supported!') from None
    def _is_model_safetensors(self) -> bool:
--- a/llama.cpp
+++ b/llama.cpp
@ -4375,7 +4375,6 @@ static bool llm_load_tensors(
    LLAMA_LOG_INFO("%s: ggml ctx size = %7.2f MiB\n", __func__, model.ctxs.size()*ctx_size/1024.0/1024.0);
    bool init_mapping_prefetch = true;
    // create tensors for the weights
    {
        const int64_t n_embd       = hparams.n_embd;
@ -5230,7 +5229,6 @@ static bool llm_load_tensors(
                } break;
            case LLM_ARCH_XVERSE:
                {
                    init_mapping_prefetch = false;
                    model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab});
                    {
                        model.output_norm = ml.create_tensor(ctx_output,       tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd});
@ -5289,7 +5287,7 @@ static bool llm_load_tensors(
    ml.done_getting_tensors();
-    ml.init_mappings(init_mapping_prefetch, use_mlock ? &model.mlock_mmaps : nullptr);
+    ml.init_mappings(true, use_mlock ? &model.mlock_mmaps : nullptr);
    model.mappings.reserve(ml.mappings.size());
    // create the backend buffers