diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 2f146d730..e7b42cfb2 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -2,6 +2,7 @@ from __future__ import annotations +import logging import argparse import contextlib import json @@ -26,6 +27,8 @@ import gguf from convert import LlamaHfVocab, permute +logger = logging.getLogger("hf-to-gguf") + ###### MODEL DEFINITIONS ###### @@ -76,7 +79,7 @@ class Model(ABC): def get_tensors(self) -> Iterator[tuple[str, Tensor]]: for part_name in self.part_names: - print(f"gguf: loading model part '{part_name}'") + logger.info(f"gguf: loading model part '{part_name}'") ctx: ContextManager[Any] if self.is_safetensors: from safetensors import safe_open @@ -95,42 +98,42 @@ class Model(ABC): if (n_ctx := self.find_hparam(["max_position_embeddings", "n_ctx"], optional=True)) is not None: self.gguf_writer.add_context_length(n_ctx) - print(f"gguf: context length = {n_ctx}") + logger.info(f"gguf: context length = {n_ctx}") n_embd = self.find_hparam(["hidden_size", "n_embd"]) self.gguf_writer.add_embedding_length(n_embd) - print(f"gguf: embedding length = {n_embd}") + logger.info(f"gguf: embedding length = {n_embd}") if (n_ff := self.find_hparam(["intermediate_size", "n_inner"], optional=True)) is not None: self.gguf_writer.add_feed_forward_length(n_ff) - print(f"gguf: feed forward length = {n_ff}") + logger.info(f"gguf: feed forward length = {n_ff}") n_head = self.find_hparam(["num_attention_heads", "n_head"]) self.gguf_writer.add_head_count(n_head) - print(f"gguf: head count = {n_head}") + logger.info(f"gguf: head count = {n_head}") if (n_head_kv := self.hparams.get("num_key_value_heads")) is not None: self.gguf_writer.add_head_count_kv(n_head_kv) - print(f"gguf: key-value head count = {n_head_kv}") + logger.info(f"gguf: key-value head count = {n_head_kv}") if (rope_theta := self.hparams.get("rope_theta")) is not None: self.gguf_writer.add_rope_freq_base(rope_theta) - print(f"gguf: rope theta = {rope_theta}") + logger.info(f"gguf: rope theta = {rope_theta}") if (f_rms_eps := self.hparams.get("rms_norm_eps")) is not None: self.gguf_writer.add_layer_norm_rms_eps(f_rms_eps) - print(f"gguf: rms norm epsilon = {f_rms_eps}") + logger.info(f"gguf: rms norm epsilon = {f_rms_eps}") if (f_norm_eps := self.find_hparam(["layer_norm_eps", "layer_norm_epsilon", "norm_epsilon"], optional=True)) is not None: self.gguf_writer.add_layer_norm_eps(f_norm_eps) - print(f"gguf: layer norm epsilon = {f_norm_eps}") + logger.info(f"gguf: layer norm epsilon = {f_norm_eps}") if (n_experts := self.hparams.get("num_local_experts")) is not None: self.gguf_writer.add_expert_count(n_experts) - print(f"gguf: expert count = {n_experts}") + logger.info(f"gguf: expert count = {n_experts}") if (n_experts_used := self.hparams.get("num_experts_per_tok")) is not None: self.gguf_writer.add_expert_used_count(n_experts_used) - print(f"gguf: experts used count = {n_experts_used}") + logger.info(f"gguf: experts used count = {n_experts_used}") self.gguf_writer.add_file_type(self.ftype) - print(f"gguf: file type = {self.ftype}") + logger.info(f"gguf: file type = {self.ftype}") def write_tensors(self): block_count = self.hparams.get("n_layers", self.hparams.get("num_hidden_layers", self.hparams.get("n_layer"))) @@ -151,7 +154,7 @@ class Model(ABC): # map tensor names new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) if new_name is None: - print(f"Can not map tensor {name!r}") + logger.error(f"Can not map tensor {name!r}") sys.exit() n_dims = len(data.shape) @@ -169,7 +172,7 @@ class Model(ABC): if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: data = data.astype(np.float16) - print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") + logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") self.gguf_writer.add_tensor(new_name, data) @@ -553,7 +556,7 @@ class BloomModel(Model): ), axis=0, ) - print("re-format attention.linear_qkv.weight") + logger.info("re-format attention.linear_qkv.weight") elif re.match(r"h\.\d+\.self_attention\.query_key_value\.bias", name): qkv_bias = data.reshape((n_head, 3, n_embed // n_head)) data = np.concatenate( @@ -564,12 +567,12 @@ class BloomModel(Model): ), axis=0, ) - print("re-format attention.linear_qkv.bias") + logger.info("re-format attention.linear_qkv.bias") # map tensor names new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) if new_name is None: - print(f"Can not map tensor {name!r}") + logger.error(f"Can not map tensor {name!r}") sys.exit() n_dims = len(data.shape) @@ -587,13 +590,13 @@ class BloomModel(Model): if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: data = data.astype(np.float16) - print(f"=> {new_name}, shape = {data.shape}, {old_dtype} --> {data.dtype}") + logger.info(f"=> {new_name}, shape = {data.shape}, {old_dtype} --> {data.dtype}") self.gguf_writer.add_tensor(new_name, data) if not has_lm_head and name == "word_embeddings.weight": self.gguf_writer.add_tensor("output.weight", data) - print(name, f"=> output.weight, shape = {data.shape}, {old_dtype} --> {data.dtype}") + logger.info(name, f"=> output.weight, shape = {data.shape}, {old_dtype} --> {data.dtype}") @Model.register("MPTForCausalLM") @@ -653,7 +656,7 @@ class MPTModel(Model): else: new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) if new_name is None: - print(f"Can not map tensor {name!r}") + logger.error(f"Can not map tensor {name!r}") sys.exit() n_dims = len(data.shape) @@ -671,7 +674,7 @@ class MPTModel(Model): if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: data = data.astype(np.float16) - print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") + logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") self.gguf_writer.add_tensor(new_name, data) @@ -697,7 +700,7 @@ class OrionModel(Model): elif "model_max_length" in self.hparams: ctx_length = self.hparams["model_max_length"] else: - print("gguf: can not find ctx length parameter.") + logger.error("gguf: can not find ctx length parameter.") sys.exit() self.gguf_writer.add_file_type(self.ftype) @@ -736,7 +739,7 @@ class OrionModel(Model): # map tensor names new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) if new_name is None: - print(f"Can not map tensor {name!r}") + logger.error(f"Can not map tensor {name!r}") sys.exit() n_dims = len(data.shape) @@ -754,7 +757,7 @@ class OrionModel(Model): if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: data = data.astype(np.float16) - print(f"{name} -> {new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") + logger.info(f"{name} -> {new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") self.gguf_writer.add_tensor(new_name, data) @@ -779,7 +782,7 @@ class BaichuanModel(Model): elif "model_max_length" in self.hparams: ctx_length = self.hparams["model_max_length"] else: - print("gguf: can not find ctx length parameter.") + logger.error("gguf: can not find ctx length parameter.") sys.exit() self.gguf_writer.add_name(self.dir_model.name) @@ -809,7 +812,7 @@ class BaichuanModel(Model): for i in range(block_count): if (w := model_kv.get(f"model.layers.{i}.self_attn.W_pack.weight")) is not None: - print(f"Unpacking and permuting layer {i}") + logger.info(f"Unpacking and permuting layer {i}") model_kv[f"model.layers.{i}.self_attn.q_proj.weight"] = \ self._reverse_hf_permute_part(w, 0, head_count, head_count) model_kv[f"model.layers.{i}.self_attn.k_proj.weight"] = \ @@ -834,7 +837,7 @@ class BaichuanModel(Model): # map tensor names new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) if new_name is None: - print(f"Can not map tensor {name!r}") + logger.error(f"Can not map tensor {name!r}") sys.exit() n_dims = len(data.shape) @@ -852,7 +855,7 @@ class BaichuanModel(Model): if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: data = data.astype(np.float16) - print(f"{name} -> {new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") + logger.info(f"{name} -> {new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") self.gguf_writer.add_tensor(new_name, data) def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | None = None) -> Tensor: @@ -937,7 +940,7 @@ class XverseModel(Model): elif "model_max_length" in self.hparams: ctx_length = self.hparams["model_max_length"] else: - print("gguf: can not find ctx length parameter.") + logger.error("gguf: can not find ctx length parameter.") sys.exit() self.gguf_writer.add_name(self.dir_model.name) @@ -987,7 +990,7 @@ class XverseModel(Model): # map tensor names new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) if new_name is None: - print(f"Can not map tensor {name!r}") + logger.error(f"Can not map tensor {name!r}") sys.exit() n_dims = len(data.shape) @@ -1005,7 +1008,7 @@ class XverseModel(Model): if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: data = data.astype(np.float16) - print(f"{name} -> {new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") + logger.info(f"{name} -> {new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") self.gguf_writer.add_tensor(new_name, data) def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | None = None) -> Tensor: @@ -1092,7 +1095,7 @@ class FalconModel(Model): # map tensor names new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) if new_name is None: - print(f"Can not map tensor {name!r}") + logger.error(f"Can not map tensor {name!r}") sys.exit() n_dims = len(data.shape) @@ -1110,7 +1113,7 @@ class FalconModel(Model): if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: data = data.astype(np.float16) - print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") + logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") self.gguf_writer.add_tensor(new_name, data) @@ -1197,7 +1200,7 @@ class RefactModel(Model): # map tensor names new_name = tensor_map.get_name(name, try_suffixes=(".weight",)) if new_name is None: - print(f"Can not map tensor {name!r}") + logger.error(f"Can not map tensor {name!r}") sys.exit() n_dims = len(data.shape) @@ -1215,7 +1218,7 @@ class RefactModel(Model): if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: data = data.astype(np.float16) - print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") + logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") self.gguf_writer.add_tensor(new_name, data) @@ -1264,10 +1267,10 @@ class PersimmonModel(Model): data = data_torch.to(torch.float32).squeeze().numpy() new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) if new_name is None: - print(f"Can not map tensor {name!r}") + logger.error(f"Can not map tensor {name!r}") sys.exit() n_dims = len(data.shape) - print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") + logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") self.gguf_writer.add_tensor(new_name, data) @@ -1480,10 +1483,10 @@ class LlamaModel(Model): new_name = tensor_map.get_name(merged_name, try_suffixes=(".weight", ".bias")) if new_name is None: - print(f"Can not map tensor {name!r}") + logger.error(f"Can not map tensor {name!r}") sys.exit() - print(f"{new_name}, n_dims = {len(data.shape)}, shape = {data.shape} --> {data.dtype}") + logger.info(f"{new_name}, n_dims = {len(data.shape)}, shape = {data.shape} --> {data.dtype}") self.gguf_writer.add_tensor(new_name, data) continue @@ -1491,7 +1494,7 @@ class LlamaModel(Model): # map tensor names new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) if new_name is None: - print(f"Can not map tensor {name!r}") + logger.error(f"Can not map tensor {name!r}") sys.exit() n_dims = len(data.shape) @@ -1509,7 +1512,7 @@ class LlamaModel(Model): if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: data = data.astype(np.float16) - print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") + logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") self.gguf_writer.add_tensor(new_name, data) @@ -1584,10 +1587,10 @@ class GrokModel(Model): new_name = tensor_map.get_name(merged_name, try_suffixes=(".weight", ".bias")) if new_name is None: - print(f"Can not map tensor {name!r}") + logger.error(f"Can not map tensor {name!r}") sys.exit() - print(f"{new_name}, n_dims = {len(data.shape)}, shape = {data.shape} --> {data.dtype}") + logger.info(f"{new_name}, n_dims = {len(data.shape)}, shape = {data.shape} --> {data.dtype}") self.gguf_writer.add_tensor(new_name, data) continue @@ -1595,7 +1598,7 @@ class GrokModel(Model): # map tensor names new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) if new_name is None: - print(f"Can not map tensor {name!r}") + logger.error(f"Can not map tensor {name!r}") sys.exit() n_dims = len(data.shape) @@ -1613,7 +1616,7 @@ class GrokModel(Model): if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: data = data.astype(np.float16) - print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") + logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") self.gguf_writer.add_tensor(new_name, data) @@ -1771,7 +1774,7 @@ class MiniCPMModel(Model): # map tensor names new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) if new_name is None: - print(f"Can not map tensor {name!r}") + logger.error(f"Can not map tensor {name!r}") sys.exit() n_dims = len(data.shape) @@ -1789,7 +1792,7 @@ class MiniCPMModel(Model): if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: data = data.astype(np.float16) - print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") + logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") self.gguf_writer.add_tensor(new_name, data) @@ -1855,7 +1858,7 @@ class QwenModel(Model): # map tensor names new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) if new_name is None: - print(f"Can not map tensor {name!r}") + logger.error(f"Can not map tensor {name!r}") sys.exit() n_dims = len(data.shape) @@ -1873,7 +1876,7 @@ class QwenModel(Model): if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: data = data.astype(np.float16) - print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") + logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") self.gguf_writer.add_tensor(new_name, data) @@ -2024,7 +2027,7 @@ class GPT2Model(Model): # map tensor names new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) if new_name is None: - print(f"Can not map tensor {name!r}") + logger.error(f"Can not map tensor {name!r}") sys.exit() n_dims = len(data.shape) @@ -2042,13 +2045,13 @@ class GPT2Model(Model): if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: data = data.astype(np.float16) - print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") + logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") self.gguf_writer.add_tensor(new_name, data) # note: GPT2 output is tied to (same as) wte in original model if new_name == "token_embd.weight": - print(f"output.weight, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") + logger.info(f"output.weight, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") self.gguf_writer.add_tensor("output.weight", data) @@ -2208,7 +2211,7 @@ class PlamoModel(Model): # map tensor names new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) if new_name is None: - print(f"Can not map tensor {name!r}") + logger.error(f"Can not map tensor {name!r}") sys.exit() # shuffle for broadcasting of gqa in ggml_mul_mat @@ -2240,7 +2243,7 @@ class PlamoModel(Model): if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: data = data.astype(np.float16) - print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") + logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") self.gguf_writer.add_tensor(new_name, data) @@ -2286,7 +2289,7 @@ class CodeShellModel(Model): # map tensor names new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) if new_name is None: - print(f"Can not map tensor {name!r}") + logger.error(f"Can not map tensor {name!r}") sys.exit() n_dims = len(data.shape) @@ -2304,13 +2307,13 @@ class CodeShellModel(Model): if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: data = data.astype(np.float16) - print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") + logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") self.gguf_writer.add_tensor(new_name, data) if not has_lm_head and name == "transformer.wte.weight": self.gguf_writer.add_tensor("output.weight", data) - print(name, f"=> output.weight, shape = {data.shape}, {old_dtype} --> {data.dtype}") + logger.info(name, f"=> output.weight, shape = {data.shape}, {old_dtype} --> {data.dtype}") @Model.register("InternLM2ForCausalLM") @@ -2332,7 +2335,7 @@ class InternLM2Model(Model): toktypes: list[int] = [] if not tokenizer_path.is_file(): - print(f'Error: Missing {tokenizer_path}', file=sys.stderr) + logger.error(f'Error: Missing {tokenizer_path}') sys.exit(1) sentencepiece_model = model.ModelProto() @@ -2349,7 +2352,7 @@ class InternLM2Model(Model): if text == b"\x00": # (TODO): fixme # Hack here and replace the \x00 characters. - print(f"InternLM2 convert token '{text}' to '🐉'!") + logger.debug(f"InternLM2 convert token '{text}' to '🐉'!") text = "🐉" toktype = SentencePieceTokenTypes.NORMAL @@ -2390,7 +2393,7 @@ class InternLM2Model(Model): # TODO: this is a hack, should be fixed # https://github.com/ggerganov/llama.cpp/pull/6745#issuecomment-2067687048 special_vocab.special_token_ids["eos"] = self._try_get_sft_eos(tokenizer) - print(f"Replace eos:{old_eos} with a special token:{special_vocab.special_token_ids['eos']} \ + logger.debug(f"Replace eos:{old_eos} with a special token:{special_vocab.special_token_ids['eos']} \ in chat mode so that the conversation can end normally.") special_vocab.add_to_gguf(self.gguf_writer) @@ -2435,7 +2438,7 @@ in chat mode so that the conversation can end normally.") # map tensor names new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) if new_name is None: - print(f"Can not map tensor {name!r}") + logger.error(f"Can not map tensor {name!r}") sys.exit() n_dims = len(data.shape) @@ -2453,7 +2456,7 @@ in chat mode so that the conversation can end normally.") if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: data = data.astype(np.float16) - print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") + logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") self.gguf_writer.add_tensor(new_name, data) def write_tensors(self): @@ -2564,7 +2567,7 @@ class BertModel(Model): # map tensor names new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) if new_name is None: - print(f"Can not map tensor {name!r}") + logger.error(f"Can not map tensor {name!r}") sys.exit() # convert any unsupported data types to float32 @@ -2585,7 +2588,7 @@ class BertModel(Model): # if f32 desired, convert any float16 to float32 new_dtype = np.float32 - print(f"{new_name}, n_dims = {n_dims}, {data_torch.dtype} --> {new_dtype}") + logger.info(f"{new_name}, n_dims = {n_dims}, {data_torch.dtype} --> {new_dtype}") if data.dtype != new_dtype: data = data.astype(new_dtype) @@ -2681,7 +2684,7 @@ class GemmaModel(Model): # map tensor names new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) if new_name is None: - print(f"Can not map tensor {name!r}") + logger.error(f"Can not map tensor {name!r}") sys.exit() n_dims = len(data.shape) @@ -2693,7 +2696,7 @@ class GemmaModel(Model): if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: data = data.astype(np.float16) - print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") + logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") self.gguf_writer.add_tensor(new_name, data) @@ -2721,7 +2724,7 @@ class MambaModel(Model): else: # Use the GPT-NeoX tokenizer when no tokenizer files are present tokenizer_path = Path(sys.path[0]) / "models" / "ggml-vocab-gpt-neox.gguf" - print(f"Using tokenizer from '{os.path.relpath(tokenizer_path, os.getcwd())}'") + logger.debug(f"Using tokenizer from '{os.path.relpath(tokenizer_path, os.getcwd())}'") neox_reader = gguf.GGUFReader(tokenizer_path, "r") field = neox_reader.get_field(gguf.Keys.Tokenizer.MODEL) @@ -2793,17 +2796,17 @@ class MambaModel(Model): # map tensor names new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) if new_name is None: - print(f"Can not map tensor {name!r}") + logger.error(f"Can not map tensor {name!r}") sys.exit() if name.endswith(".A_log"): - print("A_log --> A ==> " + new_name) + logger.debug("A_log --> A ==> " + new_name) data_torch = -torch.exp(data_torch) # assuming token_embd.weight is seen before output.weight if tok_embd is not None and new_name == output_name: if torch.equal(tok_embd, data_torch): - print(f"{output_name} is equivalent to {tok_embd_name}, omitting") + logger.debug(f"{output_name} is equivalent to {tok_embd_name}, omitting") continue if new_name == tok_embd_name: tok_embd = data_torch @@ -2826,7 +2829,7 @@ class MambaModel(Model): if self.ftype == 1 and data_dtype == np.float32 and new_weight_name.endswith((".ssm_in", ".ssm_out", "token_embd", "output")) and n_dims == 2: data = data.astype(np.float16) - print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") + logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") self.gguf_writer.add_tensor(new_name, data) @@ -2936,6 +2939,7 @@ def parse_args() -> argparse.Namespace: ) parser.add_argument("--use-temp-file", action="store_true", help="use the tempfile library while processing (helpful when running out of memory, process killed)") parser.add_argument("--model-name", type=str, default=None, help="name of the model") + parser.add_argument("--verbose", action="store_true", help="increase output verbosity") return parser.parse_args() @@ -2943,6 +2947,11 @@ def parse_args() -> argparse.Namespace: def main() -> None: args = parse_args() + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + else: + logging.basicConfig(level=logging.INFO) + dir_model = args.model if args.awq_path: @@ -2951,15 +2960,15 @@ def main() -> None: tmp_model_path = args.model / "weighted_model" dir_model = tmp_model_path if tmp_model_path.is_dir(): - print(f"{tmp_model_path} exists as a weighted model.") + logger.info(f"{tmp_model_path} exists as a weighted model.") else: tmp_model_path.mkdir(parents=True, exist_ok=True) - print("Saving new weighted model ...") + logger.info("Saving new weighted model ...") add_scale_weights(str(args.model), str(args.awq_path), str(tmp_model_path)) - print(f"Saved weighted model at {tmp_model_path}.") + logger.info(f"Saved weighted model at {tmp_model_path}.") if not dir_model.is_dir(): - print(f'Error: {args.model} is not a directory', file=sys.stderr) + logger.error(f'Error: {args.model} is not a directory') sys.exit(1) ftype_map = { @@ -2973,7 +2982,7 @@ def main() -> None: # output in the same directory as the model by default fname_out = dir_model / f'ggml-model-{args.outtype}.gguf' - print(f"Loading model: {dir_model.name}") + logger.info(f"Loading model: {dir_model.name}") hparams = Model.load_hparams(dir_model) @@ -2981,20 +2990,20 @@ def main() -> None: model_class = Model.from_model_architecture(hparams["architectures"][0]) model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian, args.use_temp_file) - print("Set model parameters") + logger.info("Set model parameters") model_instance.set_gguf_parameters() - print("Set model tokenizer") + logger.info("Set model tokenizer") model_instance.set_vocab() if args.vocab_only: - print(f"Exporting model vocab to '{fname_out}'") + logger.info(f"Exporting model vocab to '{fname_out}'") model_instance.write_vocab() else: - print(f"Exporting model to '{fname_out}'") + logger.info(f"Exporting model to '{fname_out}'") model_instance.write() - print(f"Model successfully exported to '{fname_out}'") + logger.info(f"Model successfully exported to '{fname_out}'") if __name__ == '__main__': diff --git a/convert-llama-ggml-to-gguf.py b/convert-llama-ggml-to-gguf.py index 5354b748b..11ab69437 100755 --- a/convert-llama-ggml-to-gguf.py +++ b/convert-llama-ggml-to-gguf.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 from __future__ import annotations +import logging import argparse import os import struct @@ -14,6 +15,8 @@ if 'NO_LOCAL_GGUF' not in os.environ: sys.path.insert(1, str(Path(__file__).parent / 'gguf-py')) import gguf +logger = logging.getLogger("ggml-to-gguf") + class GGMLFormat(IntEnum): GGML = 0 @@ -125,7 +128,7 @@ class Tensor: self.start_offset = offset self.len_bytes = n_bytes offset += n_bytes - # print(n_dims, name_len, dtype, self.dims, self.name, pad) + # logger.info(n_dims, name_len, dtype, self.dims, self.name, pad) return offset - orig_offset @@ -175,7 +178,7 @@ class GGMLModel: offset += self.validate_header(data, offset) hp = Hyperparameters() offset += hp.load(data, offset) - print(f'* File format: {self.file_format.name}v{self.format_version} with ftype {hp.ftype.name}') + logger.info(f'* File format: {self.file_format.name}v{self.format_version} with ftype {hp.ftype.name}') self.validate_conversion(hp.ftype) vocab = Vocab(load_scores = self.file_format > GGMLFormat.GGML) offset += vocab.load(data, offset, hp.n_vocab) @@ -215,12 +218,12 @@ class GGMLToGGUF: if float(hp.n_head) / float(x) == gqa: n_kv_head = x assert n_kv_head is not None, "Couldn't determine n_kv_head from GQA param" - print(f'- Guessed n_kv_head = {n_kv_head} based on GQA {cfg.gqa}') + logger.info(f'- Guessed n_kv_head = {n_kv_head} based on GQA {cfg.gqa}') self.n_kv_head = n_kv_head self.name_map = gguf.get_tensor_name_map(gguf.MODEL_ARCH.LLAMA, ggml_model.hyperparameters.n_layer) def save(self): - print('* Preparing to save GGUF file') + logger.info('* Preparing to save GGUF file') gguf_writer = gguf.GGUFWriter( self.cfg.output, gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA], @@ -230,11 +233,11 @@ class GGMLToGGUF: if self.special_vocab is not None: self.special_vocab.add_to_gguf(gguf_writer) self.add_tensors(gguf_writer) - print(" gguf: write header") + logger.info(" gguf: write header") gguf_writer.write_header_to_file() - print(" gguf: write metadata") + logger.info(" gguf: write metadata") gguf_writer.write_kv_data_to_file() - print(" gguf: write tensors") + logger.info(" gguf: write tensors") gguf_writer.write_tensors_to_file() gguf_writer.close() @@ -250,7 +253,7 @@ class GGMLToGGUF: name = cfg.name if cfg.name is not None else cfg.input.name except UnicodeDecodeError: name = None - print('* Adding model parameters and KV items') + logger.info('* Adding model parameters and KV items') if name is not None: gguf_writer.add_name(name) gguf_writer.add_description(desc) @@ -287,7 +290,7 @@ class GGMLToGGUF: toktypes = [] if self.vocab_override is not None: vo = self.vocab_override - print('* Adding vocab item(s)') + logger.info('* Adding vocab item(s)') for (idx, (vbytes, score, ttype)) in enumerate(vo.all_tokens()): tokens.append(vbytes) scores.append(score) @@ -299,7 +302,7 @@ class GGMLToGGUF: if len(toktypes) > 0: gguf_writer.add_token_types(toktypes) return - print(f'* Adding {hp.n_vocab} vocab item(s)') + logger.info(f'* Adding {hp.n_vocab} vocab item(s)') assert len(self.model.vocab.items) >= 3, 'Cannot handle unexpectedly short model vocab' for (tokid, (vbytes, vscore)) in enumerate(self.model.vocab.items): tt = 1 # Normal @@ -334,7 +337,7 @@ class GGMLToGGUF: def add_tensors(self, gguf_writer): tensor_map = self.name_map data = self.data - print(f'* Adding {len(self.model.tensors)} tensor(s)') + logger.info(f'* Adding {len(self.model.tensors)} tensor(s)') for tensor in self.model.tensors: name = str(tensor.name, 'UTF-8') mapped_name = tensor_map.get_name(name, try_suffixes = (".weight", ".bias")) @@ -344,7 +347,7 @@ class GGMLToGGUF: temp = tempdims[1] tempdims[1] = tempdims[0] tempdims[0] = temp - # print(f'+ {tensor.name} | {mapped_name} {tensor.dims} :: {tempdims}') + # logger.info(f'+ {tensor.name} | {mapped_name} {tensor.dims} :: {tempdims}') gguf_writer.add_tensor( mapped_name, data[tensor.start_offset:tensor.start_offset + tensor.len_bytes], @@ -401,33 +404,38 @@ def handle_args(): help="directory containing tokenizer.model, if separate from model file - only meaningful with --model-metadata-dir") parser.add_argument("--vocabtype", default="spm,hfft", help="vocab format - only meaningful with --model-metadata-dir and/or --vocab-dir (default: spm,hfft)") + parser.add_argument("--verbose", action="store_true", help="increase output verbosity") return parser.parse_args() def main(): cfg = handle_args() - print(f'* Using config: {cfg}') - print('\n=== WARNING === Be aware that this conversion script is best-effort. Use a native GGUF model if possible. === WARNING ===\n') + if cfg.verbose: + logging.basicConfig(level=logging.DEBUG) + else: + logging.basicConfig(level=logging.INFO) + logger.info(f'* Using config: {cfg}') + logger.warning('=== WARNING === Be aware that this conversion script is best-effort. Use a native GGUF model if possible. === WARNING ===') if cfg.model_metadata_dir is None and (cfg.gqa == 1 or cfg.eps == '5.0e-06'): - print('- Note: If converting LLaMA2, specifying "--eps 1e-5" is required. 70B models also need "--gqa 8".') + logger.info('- Note: If converting LLaMA2, specifying "--eps 1e-5" is required. 70B models also need "--gqa 8".') data = np.memmap(cfg.input, mode = 'r') model = GGMLModel() - print('* Scanning GGML input file') + logger.info('* Scanning GGML input file') offset = model.load(data, 0) # noqa - print(f'* GGML model hyperparameters: {model.hyperparameters}') + logger.info(f'* GGML model hyperparameters: {model.hyperparameters}') vocab_override = None params_override = None special_vocab = None if cfg.model_metadata_dir is not None: (params_override, vocab_override, special_vocab) = handle_metadata(cfg, model.hyperparameters) - print('!! Note: When overriding params the --gqa, --eps and --context-length options are ignored.') - print(f'* Overriding params: {params_override}') - print(f'* Overriding vocab: {vocab_override}') - print(f'* Special vocab: {special_vocab}') + logger.info('!! Note: When overriding params the --gqa, --eps and --context-length options are ignored.') + logger.info(f'* Overriding params: {params_override}') + logger.info(f'* Overriding vocab: {vocab_override}') + logger.info(f'* Special vocab: {special_vocab}') else: - print('\n=== WARNING === Special tokens may not be converted correctly. Use --model-metadata-dir if possible === WARNING ===\n') + logger.warning('\n=== WARNING === Special tokens may not be converted correctly. Use --model-metadata-dir if possible === WARNING ===\n') if model.file_format == GGMLFormat.GGML: - print('! This is a very old GGML file that does not contain vocab scores. Strongly recommend using model metadata!') + logger.info('! This is a very old GGML file that does not contain vocab scores. Strongly recommend using model metadata!') converter = GGMLToGGUF( model, data, cfg, params_override = params_override, @@ -435,7 +443,7 @@ def main(): special_vocab = special_vocab ) converter.save() - print(f'* Successful completion. Output saved to: {cfg.output}') + logger.info(f'* Successful completion. Output saved to: {cfg.output}') if __name__ == '__main__': diff --git a/convert-lora-to-ggml.py b/convert-lora-to-ggml.py index 9a9936dec..39536feb9 100755 --- a/convert-lora-to-ggml.py +++ b/convert-lora-to-ggml.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 from __future__ import annotations +import logging import json import os import struct @@ -15,6 +16,8 @@ if 'NO_LOCAL_GGUF' not in os.environ: sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf')) import gguf +logger = logging.getLogger("lora-to-gguf") + NUMPY_TYPE_TO_FTYPE: dict[str, int] = {"float32": 0, "float16": 1} @@ -48,11 +51,9 @@ def write_tensor_header(fout: BinaryIO, name: str, shape: Sequence[int], data_ty if __name__ == '__main__': if len(sys.argv) < 2: - print(f"Usage: python {sys.argv[0]} [arch]") - print( - "Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'" - ) - print(f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)") + logger.info(f"Usage: python {sys.argv[0]} [arch]") + logger.info("Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'") + logger.info(f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)") sys.exit(1) input_json = os.path.join(sys.argv[1], "adapter_config.json") @@ -70,7 +71,7 @@ if __name__ == '__main__': arch_name = sys.argv[2] if len(sys.argv) == 3 else "llama" if arch_name not in gguf.MODEL_ARCH_NAMES.values(): - print(f"Error: unsupported architecture {arch_name}") + logger.error(f"Error: unsupported architecture {arch_name}") sys.exit(1) arch = list(gguf.MODEL_ARCH_NAMES.keys())[list(gguf.MODEL_ARCH_NAMES.values()).index(arch_name)] @@ -80,21 +81,21 @@ if __name__ == '__main__': params = json.load(f) if params["peft_type"] != "LORA": - print(f"Error: unsupported adapter type {params['peft_type']}, expected LORA") + logger.error(f"Error: unsupported adapter type {params['peft_type']}, expected LORA") sys.exit(1) if params["fan_in_fan_out"] is True: - print("Error: param fan_in_fan_out is not supported") + logger.error("Error: param fan_in_fan_out is not supported") sys.exit(1) if params["bias"] is not None and params["bias"] != "none": - print("Error: param bias is not supported") + logger.error("Error: param bias is not supported") sys.exit(1) # TODO: these seem to be layers that have been trained but without lora. # doesn't seem widely used but eventually should be supported if params["modules_to_save"] is not None and len(params["modules_to_save"]) > 0: - print("Error: param modules_to_save is not supported") + logger.error("Error: param modules_to_save is not supported") sys.exit(1) with open(output_path, "wb") as fout: @@ -125,13 +126,13 @@ if __name__ == '__main__': suffix = k[-len(lora_suffixes[0]):] k = k[: -len(lora_suffixes[0])] else: - print(f"Error: unrecognized tensor name {orig_k}") + logger.error(f"Error: unrecognized tensor name {orig_k}") sys.exit(1) tname = name_map.get_name(k) if tname is None: - print(f"Error: could not map tensor name {orig_k}") - print(" Note: the arch parameter must be specified if the model is not llama") + logger.error(f"Error: could not map tensor name {orig_k}") + logger.error(" Note: the arch parameter must be specified if the model is not llama") sys.exit(1) if suffix == ".lora_A.weight": @@ -141,8 +142,8 @@ if __name__ == '__main__': else: assert False - print(f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB") + logger.info(f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB") write_tensor_header(fout, tname, t.shape, t.dtype) t.tofile(fout) - print(f"Converted {input_json} and {input_model} to {output_path}") + logger.info(f"Converted {input_json} and {input_model} to {output_path}") diff --git a/convert-persimmon-to-gguf.py b/convert-persimmon-to-gguf.py index aba575426..7ab27ffb2 100755 --- a/convert-persimmon-to-gguf.py +++ b/convert-persimmon-to-gguf.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 from __future__ import annotations +import logging import argparse import os import sys @@ -14,6 +15,8 @@ if 'NO_LOCAL_GGUF' not in os.environ: sys.path.insert(1, str(Path(__file__).parent / 'gguf-py')) import gguf +logger = logging.getLogger("persimmon-to-gguf") + def _flatten_dict(dct, tensors, prefix=None): assert isinstance(dct, dict) @@ -30,9 +33,9 @@ def _flatten_dict(dct, tensors, prefix=None): def _get_sentencepiece_tokenizer_info(dir_model: Path): tokenizer_path = dir_model / 'adept_vocab.model' - print('gguf: getting sentencepiece tokenizer from', tokenizer_path) + logger.info('getting sentencepiece tokenizer from', tokenizer_path) tokenizer = SentencePieceProcessor(str(tokenizer_path)) - print('gguf: adding tokens') + logger.info('adding tokens') tokens: list[bytes] = [] scores: list[float] = [] toktypes: list[int] = [] @@ -67,8 +70,13 @@ def main(): parser.add_argument("--outfile", type=Path, help="path to write to; default: based on input") parser.add_argument("--ckpt-path", type=Path, help="path to persimmon checkpoint .pt file") parser.add_argument("--model-dir", type=Path, help="directory containing model e.g. 8b_chat_model_release") - parser.add_argument("--adept-inference-dir", type=str, help="path to adept-inference code directory") + parser.add_argument("--adept-inference-dir", type=str, help="path to adept-inference code directory") + parser.add_argument("--verbose", action="store_true", help="increase output verbosity") args = parser.parse_args() + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + else: + logging.basicConfig(level=logging.INFO) sys.path.append(str(args.adept_inference_dir)) persimmon_model = torch.load(args.ckpt_path) hparams = persimmon_model['args'] @@ -107,7 +115,7 @@ def main(): gguf_writer.add_eos_token_id(71013) tensor_map = gguf.get_tensor_name_map(arch, block_count) - print(tensor_map) + logger.info(tensor_map) for name in tensors.keys(): data_torch = tensors[name] if name.endswith(".self_attention.rotary_emb.inv_freq"): @@ -117,22 +125,21 @@ def main(): data = data_torch.to(torch.float32).squeeze().numpy() new_name = tensor_map.get_name(name, try_suffixes = (".weight", ".bias")) if new_name is None: - print("Can not map tensor '" + name + "'") + logger.error(f"Can not map tensor '{name}'") sys.exit() n_dims = len(data.shape) - print(new_name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype)) + logger.debug(f"{new_name}, n_dims = {str(n_dims)}, {str(old_dtype)} --> {str(data.dtype)}") gguf_writer.add_tensor(new_name, data) - print("gguf: write header") + logger.info("gguf: write header") gguf_writer.write_header_to_file() - print("gguf: write metadata") + logger.info("gguf: write metadata") gguf_writer.write_kv_data_to_file() - print("gguf: write tensors") + logger.info("gguf: write tensors") gguf_writer.write_tensors_to_file() gguf_writer.close() - print(f"gguf: model successfully exported to '{args.outfile}'") - print("") + logger.info(f"gguf: model successfully exported to '{args.outfile}'") if __name__ == '__main__': diff --git a/convert.py b/convert.py index bde0676cb..cf756fd68 100755 --- a/convert.py +++ b/convert.py @@ -1449,11 +1449,11 @@ def default_outfile(model_paths: list[Path], file_type: GGMLFileType) -> Path: def do_dump_model(model_plus: ModelPlus) -> None: - print(f"model_plus.paths = {model_plus.paths!r}") - print(f"model_plus.format = {model_plus.format!r}") - print(f"model_plus.vocab = {model_plus.vocab!r}") + print(f"model_plus.paths = {model_plus.paths!r}") # noqa: NP100 + print(f"model_plus.format = {model_plus.format!r}") # noqa: NP100 + print(f"model_plus.vocab = {model_plus.vocab!r}") # noqa: NP100 for name, lazy_tensor in model_plus.model.items(): - print(f"{name}: shape={lazy_tensor.shape} type={lazy_tensor.data_type}; {lazy_tensor.description}") + print(f"{name}: shape={lazy_tensor.shape} type={lazy_tensor.data_type}; {lazy_tensor.description}") # noqa: NP100 def main(args_in: list[str] | None = None) -> None: diff --git a/ggml_vk_generate_shaders.py b/ggml_vk_generate_shaders.py index 5dd700963..cd9b8dd96 100644 --- a/ggml_vk_generate_shaders.py +++ b/ggml_vk_generate_shaders.py @@ -1,11 +1,14 @@ #!/usr/bin/env python +import logging import argparse import asyncio import os import sys from tempfile import gettempdir, NamedTemporaryFile +logger = logging.getLogger("ggml-vk-generate-shaders") + shader_f32 = """ #define FLOAT_TYPE float """ @@ -2498,7 +2501,7 @@ async def string_to_spv(name, code, defines, fp16=True): stdout, stderr = await proc.communicate() - print(" ".join(cmd)) + logger.info(" ".join(cmd)) if proc.returncode: raise RuntimeError(f"{name=} {f.name=} {stdout=} {stderr=}") @@ -2507,7 +2510,7 @@ async def string_to_spv(name, code, defines, fp16=True): cmd.extend([f"-D{key}={value}" for key, value in defines.items()]) code_with_lines = "\n".join([f"{i + 1}: {line}" for i, line in enumerate(preprocessed_code.splitlines())]) - print(f"ERROR compiling {name}\n\n{code_with_lines}\n\n{error}") + logger.error(f"cannot compile {name}\n\n{code_with_lines}\n\n{error}") f.close() os.remove(f.name) sys.exit(proc.returncode) @@ -2520,7 +2523,7 @@ async def string_to_spv(name, code, defines, fp16=True): async def main(): - print("ggml_vulkan: Generating and compiling shaders to SPIR-V") + logger.info("ggml_vulkan: Generating and compiling shaders to SPIR-V") tasks = [] @@ -2768,9 +2771,15 @@ if __name__ == "__main__": parser = argparse.ArgumentParser(description="GGML Vulkan Shader Generator") parser.add_argument("--glslc", help="Path to glslc") + parser.add_argument("--verbose", action="store_true", help="increase output verbosity") args = parser.parse_args() + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + else: + logging.basicConfig(level=logging.INFO) + if args.glslc: GLSLC = args.glslc diff --git a/gguf-py/examples/reader.py b/gguf-py/examples/reader.py index 62e0769da..9cefe6c44 100644 --- a/gguf-py/examples/reader.py +++ b/gguf-py/examples/reader.py @@ -1,8 +1,10 @@ #!/usr/bin/env python3 +import logging import sys from pathlib import Path from gguf.gguf_reader import GGUFReader +logger = logging.getLogger("reader") sys.path.insert(0, str(Path(__file__).parent.parent)) @@ -18,28 +20,28 @@ def read_gguf_file(gguf_file_path): reader = GGUFReader(gguf_file_path) # List all key-value pairs in a columnized format - print("Key-Value Pairs:") + logger.info("Key-Value Pairs:") max_key_length = max(len(key) for key in reader.fields.keys()) for key, field in reader.fields.items(): value = field.parts[field.data[0]] - print(f"{key:{max_key_length}} : {value}") - print("----") + logger.info(f"{key:{max_key_length}} : {value}") + logger.info("----") # List all tensors - print("Tensors:") + logger.info("Tensors:") tensor_info_format = "{:<30} | Shape: {:<15} | Size: {:<12} | Quantization: {}" - print(tensor_info_format.format("Tensor Name", "Shape", "Size", "Quantization")) - print("-" * 80) + logger.info(tensor_info_format.format("Tensor Name", "Shape", "Size", "Quantization")) + logger.info("-" * 80) for tensor in reader.tensors: shape_str = "x".join(map(str, tensor.shape)) size_str = str(tensor.n_elements) quantization_str = tensor.tensor_type.name - print(tensor_info_format.format(tensor.name, shape_str, size_str, quantization_str)) + logger.info(tensor_info_format.format(tensor.name, shape_str, size_str, quantization_str)) if __name__ == '__main__': if len(sys.argv) < 2: - print("Usage: reader.py ") + logger.info("Usage: reader.py ") sys.exit(1) gguf_file_path = sys.argv[1] read_gguf_file(gguf_file_path) diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 6d597bfd9..831a2c515 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -1,9 +1,12 @@ from __future__ import annotations +import logging import sys from enum import Enum, IntEnum, auto from typing import Any +logger = logging.getLogger("constants") + # # constants # @@ -854,7 +857,7 @@ class GGUFValueType(IntEnum): return GGUFValueType.INT32 # TODO: need help with 64-bit types in Python else: - print("Unknown type:", type(val)) + logger.error(f"Unknown type: {type(val)}") sys.exit() diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 089aece87..38655e32a 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging import os import shutil import struct @@ -24,6 +25,8 @@ from .constants import ( TokenType, ) +logger = logging.getLogger("gguf-writer") + class WriterState(Enum): EMPTY = auto() @@ -67,7 +70,7 @@ class GGUFWriter: self.use_temp_file = use_temp_file self.temp_file = None self.tensors = [] - print("gguf: This GGUF file is for {0} Endian only".format( + logger.info("gguf: This GGUF file is for {0} Endian only".format( "Big" if self.endianess == GGUFEndian.BIG else "Little", )) self.state = WriterState.EMPTY diff --git a/gguf-py/gguf/vocab.py b/gguf-py/gguf/vocab.py index 378eaecad..e96bbdd51 100644 --- a/gguf-py/gguf/vocab.py +++ b/gguf-py/gguf/vocab.py @@ -1,13 +1,15 @@ from __future__ import annotations +import logging import json import os -import sys from pathlib import Path from typing import Any, Callable from .gguf_writer import GGUFWriter +logger = logging.getLogger("vocab") + class SpecialVocab: merges: list[str] @@ -40,38 +42,29 @@ class SpecialVocab: def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None: if self.merges: if not quiet: - print(f'gguf: Adding {len(self.merges)} merge(s).') + logger.info(f'Adding {len(self.merges)} merge(s).') gw.add_token_merges(self.merges) elif self.load_merges: - print( - 'gguf: WARNING: Adding merges requested but no merges found, output may be non-functional.', - file = sys.stderr, - ) + logger.warning('Adding merges requested but no merges found, output may be non-functional.') for typ, tokid in self.special_token_ids.items(): id_handler: Callable[[int], None] | None = getattr(gw, f'add_{typ}_token_id', None) if id_handler is None: - print( - f'gguf: WARNING: No handler for special token type {typ} with id {tokid} - skipping', - file = sys.stderr, - ) + logger.warning(f'No handler for special token type {typ} with id {tokid} - skipping') continue if not quiet: - print(f'gguf: Setting special token type {typ} to {tokid}') + logger.info(f'Setting special token type {typ} to {tokid}') id_handler(tokid) for typ, value in self.add_special_token.items(): add_handler: Callable[[bool], None] | None = getattr(gw, f'add_add_{typ}_token', None) if add_handler is None: - print( - f'gguf: WARNING: No handler for add_{typ}_token with value {value} - skipping', - file = sys.stderr, - ) + logger.warning(f'No handler for add_{typ}_token with value {value} - skipping') continue if not quiet: - print(f'gguf: Setting add_{typ}_token to {value}') + logger.info(f'Setting add_{typ}_token to {value}') add_handler(value) if self.chat_template is not None: if not quiet: - print(f'gguf: Setting chat_template to {self.chat_template}') + logger.info(f'Setting chat_template to {self.chat_template}') gw.add_chat_template(self.chat_template) def _load(self, path: Path) -> None: @@ -99,10 +92,7 @@ class SpecialVocab: continue parts = line.split(None, 3) if len(parts) != 2: - print( - f'gguf: WARNING: {merges_file.name}: Line {line_num}: Entry malformed, ignoring', - file = sys.stderr, - ) + logger.warning(f'{merges_file.name}: Line {line_num}: Entry malformed, ignoring') continue merges.append(f'{parts[0]} {parts[1]}') self.merges = merges @@ -118,10 +108,7 @@ class SpecialVocab: return self.special_token_ids[typ] = tid return - print( - f'gguf: WARNING: Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping', - file = sys.stderr, - ) + logger.warning(f'Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping') def _try_load_from_tokenizer_json(self, path: Path) -> bool: tokenizer_file = path / 'tokenizer.json' @@ -144,10 +131,7 @@ class SpecialVocab: if chat_template is None or isinstance(chat_template, (str, list)): self.chat_template = chat_template else: - print( - f'gguf: WARNING: Bad type for chat_template field in {tokenizer_config_file!r} - ignoring', - file = sys.stderr - ) + logger.warning(f'Bad type for chat_template field in {tokenizer_config_file!r} - ignoring') for typ in self.special_token_types: add_entry = tokenizer_config.get(f'add_{typ}_token') if isinstance(add_entry, bool): diff --git a/gguf-py/scripts/gguf-convert-endian.py b/gguf-py/scripts/gguf-convert-endian.py index 10a16ad06..dc2b9dfbd 100755 --- a/gguf-py/scripts/gguf-convert-endian.py +++ b/gguf-py/scripts/gguf-convert-endian.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 from __future__ import annotations +import logging import argparse import os import sys @@ -14,6 +15,8 @@ if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / import gguf +logger = logging.getLogger("gguf-convert-endian") + def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None: if np.uint32(1) == np.uint32(1).newbyteorder("<"): @@ -29,11 +32,11 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None else: file_endian = host_endian order = host_endian if args.order == "native" else args.order - print(f"* Host is {host_endian.upper()} endian, GGUF file seems to be {file_endian.upper()} endian") + logger.info(f"* Host is {host_endian.upper()} endian, GGUF file seems to be {file_endian.upper()} endian") if file_endian == order: - print(f"* File is already {order.upper()} endian. Nothing to do.") + logger.info(f"* File is already {order.upper()} endian. Nothing to do.") sys.exit(0) - print("* Checking tensors for conversion compatibility") + logger.info("* Checking tensors for conversion compatibility") for tensor in reader.tensors: if tensor.tensor_type not in ( gguf.GGMLQuantizationType.F32, @@ -41,38 +44,38 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None gguf.GGMLQuantizationType.Q8_0, ): raise ValueError(f"Cannot handle type {tensor.tensor_type.name} for tensor {repr(tensor.name)}") - print(f"* Preparing to convert from {file_endian.upper()} to {order.upper()}") + logger.info(f"* Preparing to convert from {file_endian.upper()} to {order.upper()}") if args.dry_run: return - print("\n*** Warning *** Warning *** Warning **") - print("* This conversion process may damage the file. Ensure you have a backup.") + logger.warning("*** Warning *** Warning *** Warning **") + logger.warning("* This conversion process may damage the file. Ensure you have a backup.") if order != host_endian: - print("* Requested endian differs from host, you will not be able to load the model on this machine.") - print("* The file will be modified immediately, so if conversion fails or is interrupted") - print("* the file will be corrupted. Enter exactly YES if you are positive you want to proceed:") + logger.warning("* Requested endian differs from host, you will not be able to load the model on this machine.") + logger.warning("* The file will be modified immediately, so if conversion fails or is interrupted") + logger.warning("* the file will be corrupted. Enter exactly YES if you are positive you want to proceed:") response = input("YES, I am sure> ") if response != "YES": - print("You didn't enter YES. Okay then, see ya!") + logger.warning("You didn't enter YES. Okay then, see ya!") sys.exit(0) - print(f"\n* Converting fields ({len(reader.fields)})") + logger.info(f"* Converting fields ({len(reader.fields)})") for idx, field in enumerate(reader.fields.values()): - print(f"- {idx:4}: Converting field {repr(field.name)}, part count: {len(field.parts)}") + logger.info(f"- {idx:4}: Converting field {repr(field.name)}, part count: {len(field.parts)}") for part in field.parts: part.byteswap(inplace=True) - print(f"\n* Converting tensors ({len(reader.tensors)})") + logger.info(f"* Converting tensors ({len(reader.tensors)})") for idx, tensor in enumerate(reader.tensors): - print( + log_message = ( f" - {idx:4}: Converting tensor {repr(tensor.name)}, type={tensor.tensor_type.name}, " - f"elements={tensor.n_elements}... ", - end="", + f"elements={tensor.n_elements}... " ) tensor_type = tensor.tensor_type for part in tensor.field.parts: part.byteswap(inplace=True) if tensor_type != gguf.GGMLQuantizationType.Q8_0: tensor.data.byteswap(inplace=True) - print() + logger.info(log_message) continue + # A Q8_0 block consists of a f16 delta followed by 32 int8 quants, so 34 bytes block_size = 34 n_blocks = len(tensor.data) // block_size @@ -82,10 +85,11 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16) delta.byteswap(inplace=True) if block_num % 100000 == 0: - print(f"[{(n_blocks - block_num) // 1000}K]", end="") - sys.stdout.flush() - print() - print("* Completion") + log_message += f"[{(n_blocks - block_num) // 1000}K]" + + logger.info(log_message) + + logger.info("* Completion") def main() -> None: @@ -102,8 +106,16 @@ def main() -> None: "--dry-run", action="store_true", help="Don't actually change anything", ) + parser.add_argument("--verbose", action="store_true", help="increase output verbosity") + args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"]) - print(f'* Loading: {args.model}') + + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + else: + logging.basicConfig(level=logging.INFO) + + logger.info(f'* Loading: {args.model}') reader = gguf.GGUFReader(args.model, 'r' if args.dry_run else 'r+') convert_byteorder(reader, args) diff --git a/gguf-py/scripts/gguf-dump.py b/gguf-py/scripts/gguf-dump.py index dbf891508..f813a982c 100755 --- a/gguf-py/scripts/gguf-dump.py +++ b/gguf-py/scripts/gguf-dump.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 from __future__ import annotations +import logging import argparse import os import sys @@ -15,6 +16,8 @@ if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / from gguf import GGUFReader, GGUFValueType # noqa: E402 +logger = logging.getLogger("gguf-dump") + def get_file_host_endian(reader: GGUFReader) -> tuple[str, str]: host_endian = 'LITTLE' if np.uint32(1) == np.uint32(1).newbyteorder("<") else 'BIG' @@ -29,8 +32,8 @@ def get_file_host_endian(reader: GGUFReader) -> tuple[str, str]: # please see the comments in the modify_gguf.py example. def dump_metadata(reader: GGUFReader, args: argparse.Namespace) -> None: host_endian, file_endian = get_file_host_endian(reader) - print(f'* File is {file_endian} endian, script is running on a {host_endian} endian host.') - print(f'\n* Dumping {len(reader.fields)} key/value pair(s)') + logger.info(f'* File is {file_endian} endian, script is running on a {host_endian} endian host.') + logger.info(f'* Dumping {len(reader.fields)} key/value pair(s)') for n, field in enumerate(reader.fields.values(), 1): if not field.types: pretty_type = 'N/A' @@ -39,20 +42,21 @@ def dump_metadata(reader: GGUFReader, args: argparse.Namespace) -> None: pretty_type = '[' * nest_count + str(field.types[-1].name) + ']' * nest_count else: pretty_type = str(field.types[-1].name) - print(f' {n:5}: {pretty_type:10} | {len(field.data):8} | {field.name}', end = '') + + log_message = f' {n:5}: {pretty_type:10} | {len(field.data):8} | {field.name}' if len(field.types) == 1: curr_type = field.types[0] if curr_type == GGUFValueType.STRING: - print(' = {0}'.format(repr(str(bytes(field.parts[-1]), encoding='utf8')[:60])), end = '') + log_message += ' = {0}'.format(repr(str(bytes(field.parts[-1]), encoding='utf8')[:60])) elif field.types[0] in reader.gguf_scalar_to_np: - print(' = {0}'.format(field.parts[-1][0]), end = '') - print() + log_message += ' = {0}'.format(field.parts[-1][0]) + logger.info(log_message) if args.no_tensors: return - print(f'\n* Dumping {len(reader.tensors)} tensor(s)') + logger.info(f'* Dumping {len(reader.tensors)} tensor(s)') for n, tensor in enumerate(reader.tensors, 1): prettydims = ', '.join('{0:5}'.format(d) for d in list(tensor.shape) + [1] * (4 - len(tensor.shape))) - print(f' {n:5}: {tensor.n_elements:10} | {prettydims} | {tensor.tensor_type.name:7} | {tensor.name}') + logger.info(f' {n:5}: {tensor.n_elements:10} | {prettydims} | {tensor.tensor_type.name:7} | {tensor.name}') def dump_metadata_json(reader: GGUFReader, args: argparse.Namespace) -> None: @@ -103,10 +107,20 @@ def main() -> None: parser.add_argument("--no-tensors", action="store_true", help="Don't dump tensor metadata") parser.add_argument("--json", action="store_true", help="Produce JSON output") parser.add_argument("--json-array", action="store_true", help="Include full array values in JSON output (long)") + parser.add_argument("--verbose", action="store_true", help="increase output verbosity") + args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"]) + + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + else: + logging.basicConfig(level=logging.INFO) + if not args.json: - print(f'* Loading: {args.model}') + logger.info(f'* Loading: {args.model}') + reader = GGUFReader(args.model, 'r') + if args.json: dump_metadata_json(reader, args) else: diff --git a/gguf-py/scripts/gguf-set-metadata.py b/gguf-py/scripts/gguf-set-metadata.py index 3ebdfa898..ed24dd75f 100755 --- a/gguf-py/scripts/gguf-set-metadata.py +++ b/gguf-py/scripts/gguf-set-metadata.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +import logging import argparse import os import sys @@ -10,6 +11,8 @@ if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / from gguf import GGUFReader # noqa: E402 +logger = logging.getLogger("gguf-set-metadata") + def minimal_example(filename: str) -> None: reader = GGUFReader(filename, 'r+') @@ -41,36 +44,33 @@ def minimal_example(filename: str) -> None: def set_metadata(reader: GGUFReader, args: argparse.Namespace) -> None: field = reader.get_field(args.key) if field is None: - print(f'! Field {repr(args.key)} not found', file = sys.stderr) + logger.error(f'! Field {repr(args.key)} not found') sys.exit(1) # Note that field.types is a list of types. This is because the GGUF # format supports arrays. For example, an array of UINT32 would # look like [GGUFValueType.ARRAY, GGUFValueType.UINT32] handler = reader.gguf_scalar_to_np.get(field.types[0]) if field.types else None if handler is None: - print( - f'! This tool only supports changing simple values, {repr(args.key)} has unsupported type {field.types}', - file = sys.stderr, - ) + logger.error(f'! This tool only supports changing simple values, {repr(args.key)} has unsupported type {field.types}') sys.exit(1) current_value = field.parts[field.data[0]][0] new_value = handler(args.value) - print(f'* Preparing to change field {repr(args.key)} from {current_value} to {new_value}') + logger.info(f'* Preparing to change field {repr(args.key)} from {current_value} to {new_value}') if current_value == new_value: - print(f'- Key {repr(args.key)} already set to requested value {current_value}') + logger.info(f'- Key {repr(args.key)} already set to requested value {current_value}') sys.exit(0) if args.dry_run: sys.exit(0) if not args.force: - print('*** Warning *** Warning *** Warning **') - print('* Changing fields in a GGUF file can make it unusable. Proceed at your own risk.') - print('* Enter exactly YES if you are positive you want to proceed:') + logger.warning('*** Warning *** Warning *** Warning **') + logger.warning('* Changing fields in a GGUF file can make it unusable. Proceed at your own risk.') + logger.warning('* Enter exactly YES if you are positive you want to proceed:') response = input('YES, I am sure> ') if response != 'YES': - print("You didn't enter YES. Okay then, see ya!") + logger.info("You didn't enter YES. Okay then, see ya!") sys.exit(0) field.parts[field.data[0]][0] = new_value - print('* Field changed. Successful completion.') + logger.info('* Field changed. Successful completion.') def main() -> None: @@ -80,8 +80,16 @@ def main() -> None: parser.add_argument("value", type=str, help="Metadata value to set") parser.add_argument("--dry-run", action="store_true", help="Don't actually change anything") parser.add_argument("--force", action="store_true", help="Change the field without confirmation") + parser.add_argument("--verbose", action="store_true", help="increase output verbosity") + args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"]) - print(f'* Loading: {args.model}') + + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + else: + logging.basicConfig(level=logging.INFO) + + logger.info(f'* Loading: {args.model}') reader = GGUFReader(args.model, 'r' if args.dry_run else 'r+') set_metadata(reader, args) diff --git a/scripts/compare-llama-bench.py b/scripts/compare-llama-bench.py index ef7f19ecb..981d61939 100755 --- a/scripts/compare-llama-bench.py +++ b/scripts/compare-llama-bench.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +import logging import argparse import heapq import sys @@ -11,9 +12,11 @@ try: import git from tabulate import tabulate except ImportError as e: - print("ERROR: the following Python libraries are required: GitPython, tabulate.") + print("the following Python libraries are required: GitPython, tabulate.") # noqa: NP100 raise e +logger = logging.getLogger("compare-llama-bench") + # Properties by which to differentiate results per commit: KEY_PROPERTIES = [ "cpu_info", "gpu_info", "n_gpu_layers", "main_gpu", "cuda", "opencl", "metal", "gpu_blas", @@ -94,8 +97,7 @@ parser.add_argument("-s", "--show", help=help_s) known_args, unknown_args = parser.parse_known_args() if unknown_args: - print(f"ERROR: Received unknown args: {unknown_args}.") - print() + logger.error(f"Received unknown args: {unknown_args}.") parser.print_help() sys.exit(1) @@ -108,8 +110,7 @@ if input_file is None: input_file = sqlite_files[0] if input_file is None: - print("ERROR: Cannot find a suitable input file, please provide one.") - print() + logger.error("Cannot find a suitable input file, please provide one.") parser.print_help() sys.exit(1) @@ -194,23 +195,19 @@ if known_args.baseline is not None: hexsha8_baseline = get_commit_hexsha8(known_args.baseline) name_baseline = known_args.baseline if hexsha8_baseline is None: - print(f"ERROR: cannot find data for baseline={known_args.baseline}.") + logger.error(f"cannot find data for baseline={known_args.baseline}.") sys.exit(1) # Otherwise, search for the most recent parent of master for which there is data: elif repo is not None: hexsha8_baseline = find_parent_in_data(repo.heads.master.commit) if hexsha8_baseline is None: - print("ERROR: No baseline was provided and did not find data for any master branch commits.") - print() + logger.error("No baseline was provided and did not find data for any master branch commits.") parser.print_help() sys.exit(1) else: - print( - "ERROR: No baseline was provided and the current working directory " - "is not part of a git repository from which a baseline could be inferred." - ) - print() + logger.error("No baseline was provided and the current working directory " + "is not part of a git repository from which a baseline could be inferred.") parser.print_help() sys.exit(1) @@ -227,7 +224,7 @@ if known_args.compare is not None: hexsha8_compare = get_commit_hexsha8(known_args.compare) name_compare = known_args.compare if hexsha8_compare is None: - print(f"ERROR: cannot find data for compare={known_args.compare}.") + logger.error(f"cannot find data for compare={known_args.compare}.") sys.exit(1) # Otherwise, search for the commit for llama-bench was most recently run # and that is not a parent of master: @@ -241,16 +238,12 @@ elif repo is not None: break if hexsha8_compare is None: - print("ERROR: No compare target was provided and did not find data for any non-master commits.") - print() + logger.error("No compare target was provided and did not find data for any non-master commits.") parser.print_help() sys.exit(1) else: - print( - "ERROR: No compare target was provided and the current working directory " - "is not part of a git repository from which a compare target could be inferred." - ) - print() + logger.error("No compare target was provided and the current working directory " + "is not part of a git repository from which a compare target could be inferred.") parser.print_help() sys.exit(1) @@ -284,8 +277,7 @@ if known_args.show is not None: if prop not in KEY_PROPERTIES[:-2]: # Last two values are n_prompt, n_gen. unknown_cols.append(prop) if unknown_cols: - print(f"ERROR: Unknown values for --show: {', '.join(unknown_cols)}") - print() + logger.error(f"Unknown values for --show: {', '.join(unknown_cols)}") parser.print_usage() sys.exit(1) rows_show = get_rows(show) @@ -369,7 +361,7 @@ if "gpu_info" in show: headers = [PRETTY_NAMES[p] for p in show] headers += ["Test", f"t/s {name_baseline}", f"t/s {name_compare}", "Speedup"] -print(tabulate( +logger.info(tabulate( table, headers=headers, floatfmt=".2f", diff --git a/scripts/run-with-preset.py b/scripts/run-with-preset.py index a18252730..fe84cbc7c 100755 --- a/scripts/run-with-preset.py +++ b/scripts/run-with-preset.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +import logging import argparse import os import subprocess @@ -7,6 +8,8 @@ import sys import yaml +logger = logging.getLogger("run-with-preset") + CLI_ARGS_MAIN_PERPLEXITY = [ "batch-size", "cfg-negative-prompt", "cfg-scale", "chunks", "color", "ctx-size", "escape", "export", "file", "frequency-penalty", "grammar", "grammar-file", "hellaswag", @@ -56,6 +59,7 @@ parser.add_argument("-bin", "--binary", help="The binary to run.") parser.add_argument("yaml_files", nargs="*", help="Arbitrary number of YAML files from which to read preset values. " "If two files specify the same values the later one will be used.") +parser.add_argument("--verbose", action="store_true", help="increase output verbosity") known_args, unknown_args = parser.parse_known_args() @@ -63,6 +67,11 @@ if not known_args.yaml_files and not unknown_args: parser.print_help() sys.exit(0) +if known_args.verbose: + logging.basicConfig(level=logging.DEBUG) +else: + logging.basicConfig(level=logging.INFO) + props = dict() for yaml_file in known_args.yaml_files: @@ -85,7 +94,7 @@ elif binary.lower().endswith("llama-bench"): elif binary.lower().endswith("server"): cli_args = CLI_ARGS_SERVER else: - print(f"Unknown binary: {binary}") + logger.error(f"Unknown binary: {binary}") sys.exit(1) command_list = [binary] @@ -121,11 +130,11 @@ for cli_arg in cli_args: num_unused = len(props) if num_unused > 10: - print(f"The preset file contained a total of {num_unused} unused properties.") + logger.info(f"The preset file contained a total of {num_unused} unused properties.") elif num_unused > 0: - print("The preset file contained the following unused properties:") + logger.info("The preset file contained the following unused properties:") for prop, value in props.items(): - print(f" {prop}: {value}") + logger.info(f" {prop}: {value}") command_list += unknown_args diff --git a/scripts/verify-checksum-models.py b/scripts/verify-checksum-models.py index dff4b4734..58d7a1fb8 100755 --- a/scripts/verify-checksum-models.py +++ b/scripts/verify-checksum-models.py @@ -1,8 +1,11 @@ #!/usr/bin/env python3 +import logging import os import hashlib +logger = logging.getLogger("verify-checksum-models") + def sha256sum(file): block_size = 16 * 1024 * 1024 # 16 MB block size @@ -27,7 +30,7 @@ hash_list_file = os.path.join(llama_path, "SHA256SUMS") # Check if the hash list file exists if not os.path.exists(hash_list_file): - print(f"Hash list file not found: {hash_list_file}") + logger.error(f"Hash list file not found: {hash_list_file}") exit(1) # Read the hash file content and split it into an array of lines @@ -46,7 +49,7 @@ for line in hash_list: file_path = os.path.join(llama_path, filename) # Informing user of the progress of the integrity check - print(f"Verifying the checksum of {file_path}") + logger.info(f"Verifying the checksum of {file_path}") # Check if the file exists if os.path.exists(file_path): @@ -73,9 +76,9 @@ for line in hash_list: # Print column headers for results table -print("\n" + "filename".ljust(40) + "valid checksum".center(20) + "file missing".center(20)) -print("-" * 80) +logger.info("filename".ljust(40) + "valid checksum".center(20) + "file missing".center(20)) +logger.info("-" * 80) # Output the results as a table for r in results: - print(f"{r['filename']:40} {r['valid checksum']:^20} {r['file missing']:^20}") + logger.info(f"{r['filename']:40} {r['valid checksum']:^20} {r['file missing']:^20}") diff --git a/tests/test-tokenizer-0-bpe.py b/tests/test-tokenizer-0-bpe.py index 33a272441..731ae5e77 100644 --- a/tests/test-tokenizer-0-bpe.py +++ b/tests/test-tokenizer-0-bpe.py @@ -7,14 +7,23 @@ # python3 tests/test-tokenizer-0-bpe.py ~/Data/huggingface/deepseek-coder-6.7b-instruct/ # +import logging import argparse from transformers import AutoTokenizer +logger = logging.getLogger("convert") + parser = argparse.ArgumentParser() parser.add_argument("dir_tokenizer", help="directory containing 'tokenizer.model' file") parser.add_argument("--fname-tok", help="path to a text file to tokenize") +parser.add_argument("--verbose", action="store_true", help="increase output verbosity") + args = parser.parse_args() +if args.verbose: + logging.basicConfig(level=logging.DEBUG) +else: + logging.basicConfig(level=logging.INFO) dir_tokenizer = args.dir_tokenizer @@ -64,30 +73,34 @@ tests = [ ] for text in tests: - print('text: ', text) - print(tokenizer.encode(text)) - print(tokenizer.decode(tokenizer.encode(text))) + logger.info(f"text: {text}") + logger.info(tokenizer.encode(text)) + logger.info(tokenizer.decode(tokenizer.encode(text))) -print("\n\ntests for C++:\n") +logger.info("tests for C++:") for text in tests: res = tokenizer.encode(text) + # Modify text representation for logging k = text.replace('\n', '\\n') k = k.replace('\t', '\\t') k = '"' + k + '"' - print("{ %-24s, { " % k, end='') - for x in res: - print("%7d," % x, end='') - print(" }, },") -print(tokenizer.encode('hello')) -print(tokenizer.encode('world')) -print(tokenizer.encode(' world')) -print(tokenizer.encode('hello world')) + # Log the modified text and its encoding + log_message = "{ %-24s, { " % k + for x in res: + log_message += "%7d," % x + log_message += " }, }," + logger.info(log_message) + +logger.info(tokenizer.encode('hello')) +logger.info(tokenizer.encode('world')) +logger.info(tokenizer.encode(' world')) +logger.info(tokenizer.encode('hello world')) fname_tok = args.fname_tok if fname_tok: - print('tokenizing file: ', fname_tok) + logger.info(f"tokenizing file: {fname_tok}") fname_out = fname_tok + '.tok' with open(fname_tok, 'r', encoding='utf-8') as f: lines = f.readlines() @@ -112,6 +125,6 @@ if fname_tok: # else: # f.write(str(x) + ' \'' + tokenizer.decode(x) + '\'\n') f.write(str(x) + ' \'' + tokenizer.decode(x).strip() + '\'\n') - print('len(res): ', len(res)) - print('len(lines): ', len(lines)) - print('results written to: ', fname_out) + logger.info(f"len(res): {len(res)}") + logger.info(f"len(lines): {len(lines)}") + logger.info(f"results written to: {fname_out}") diff --git a/tests/test-tokenizer-0-spm.py b/tests/test-tokenizer-0-spm.py index be12a6b93..648b60908 100644 --- a/tests/test-tokenizer-0-spm.py +++ b/tests/test-tokenizer-0-spm.py @@ -7,15 +7,25 @@ # +import logging import argparse from sentencepiece import SentencePieceProcessor +logger = logging.getLogger("test-tokenizer-0-llama") + parser = argparse.ArgumentParser() parser.add_argument("dir_tokenizer", help="directory containing 'tokenizer.model' file") parser.add_argument("--fname-tok", help="path to a text file to tokenize") +parser.add_argument("--verbose", action="store_true", help="increase output verbosity") + args = parser.parse_args() +if args.verbose: + logging.basicConfig(level=logging.DEBUG) +else: + logging.basicConfig(level=logging.INFO) + dir_tokenizer = args.dir_tokenizer tokenizer = SentencePieceProcessor(dir_tokenizer + '/tokenizer.model') @@ -65,41 +75,46 @@ tests = [ for text in tests: - print('text: ', text) - print('\nwith bos:') - print(tokenizer.encode(text, add_bos=True)) - print(tokenizer.decode(tokenizer.encode(text, add_bos=True))) - print('\nwithout bos:') - print(tokenizer.encode(text, add_bos=False)) - print(tokenizer.decode(tokenizer.encode(text, add_bos=False))) + message_log = (f"text: {text}\n" + "with bos:\n" + f"{tokenizer.encode(text, add_bos=True)}\n" + f"{tokenizer.decode(tokenizer.encode(text, add_bos=True))}\n" + "without bos:\n" + f"{tokenizer.encode(text, add_bos=False)}\n" + f"{tokenizer.decode(tokenizer.encode(text, add_bos=False))}\n") + logger.info(message_log) -print("'" + tokenizer.id_to_piece(15043) + "'") # '_Hello' -print("'" + tokenizer.id_to_piece(29871) + "'") # '_' -print("'" + tokenizer.decode([15043]) + "'") # 'Hello' -print("'" + tokenizer.decode([15043, 15043]) + "'") # 'Hello Hello' -print("'" + tokenizer.decode([29871, 15043]) + "'") # ' Hello' -print("'" + tokenizer.decode([29871, 15043, 29871, 15043]) + "'") # ' Hello Hello' +logger.info(f"'{tokenizer.id_to_piece(15043)}'") # '_Hello' +logger.info(f"'{tokenizer.id_to_piece(29871)}'") # '_' +logger.info(f"'{tokenizer.decode([15043])}'") # 'Hello' +logger.info(f"'{tokenizer.decode([15043, 15043])}'") # 'Hello Hello' +logger.info(f"'{tokenizer.decode([29871, 15043])}'") # ' Hello' +logger.info(f"'{tokenizer.decode([29871, 15043, 29871, 15043])}'") # ' Hello Hello' -print("\n\ntests for C++:\n") +logger.info("\n\ntests for C++:\n") for text in tests: res = tokenizer.encode(text, add_bos=False) + # Modify text representation for logging k = text.replace('\n', '\\n') k = k.replace('\t', '\\t') k = '"' + k + '"' - print("{ %-24s, { " % k, end='') - for x in res: - print("%7d," % x, end='') - print(" }, },") -print(tokenizer.encode('hello')) -print(tokenizer.encode('world')) -print(tokenizer.encode(' world')) -print(tokenizer.encode('hello world')) + # Log the modified text and its encoding + log_message = "{ %-24s, { " % k + for x in res: + log_message += "%7d," % x + log_message += " }, }," + logger.info(log_message) + +logger.info(tokenizer.encode('hello')) +logger.info(tokenizer.encode('world')) +logger.info(tokenizer.encode(' world')) +logger.info(tokenizer.encode('hello world')) fname_tok = args.fname_tok if fname_tok: - print('tokenizing file: ', fname_tok) + logger.info(f"tokenizing file: {fname_tok}") fname_out = fname_tok + '.tok' with open(fname_tok, 'r', encoding='utf-8') as f: lines = f.readlines() @@ -109,6 +124,6 @@ if fname_tok: with open(fname_out, 'w', encoding='utf-8') as f: for x in res: f.write(str(x) + ' \'' + tokenizer.decode(x) + '\'\n') - print('len(res): ', len(res)) - print('len(lines): ', len(lines)) - print('results written to: ', fname_out) + logger.info(f"len(res): {len(res)}") + logger.info(f"len(lines): {len(lines)}") + logger.info(f"results written to: {fname_out}")