*.py: Convert all python scripts to use logging module

This commit is contained in:
brian khuu 2024-04-10 15:38:41 +10:00
parent 3670e16e9c
commit f00454fbd4
18 changed files with 388 additions and 296 deletions

View file

@ -2,6 +2,7 @@
from __future__ import annotations from __future__ import annotations
import logging
import argparse import argparse
import contextlib import contextlib
import json import json
@ -26,6 +27,8 @@ import gguf
from convert import LlamaHfVocab, permute from convert import LlamaHfVocab, permute
logger = logging.getLogger("hf-to-gguf")
###### MODEL DEFINITIONS ###### ###### MODEL DEFINITIONS ######
@ -76,7 +79,7 @@ class Model(ABC):
def get_tensors(self) -> Iterator[tuple[str, Tensor]]: def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
for part_name in self.part_names: for part_name in self.part_names:
print(f"gguf: loading model part '{part_name}'") logger.info(f"gguf: loading model part '{part_name}'")
ctx: ContextManager[Any] ctx: ContextManager[Any]
if self.is_safetensors: if self.is_safetensors:
from safetensors import safe_open from safetensors import safe_open
@ -95,42 +98,42 @@ class Model(ABC):
if (n_ctx := self.find_hparam(["max_position_embeddings", "n_ctx"], optional=True)) is not None: if (n_ctx := self.find_hparam(["max_position_embeddings", "n_ctx"], optional=True)) is not None:
self.gguf_writer.add_context_length(n_ctx) self.gguf_writer.add_context_length(n_ctx)
print(f"gguf: context length = {n_ctx}") logger.info(f"gguf: context length = {n_ctx}")
n_embd = self.find_hparam(["hidden_size", "n_embd"]) n_embd = self.find_hparam(["hidden_size", "n_embd"])
self.gguf_writer.add_embedding_length(n_embd) self.gguf_writer.add_embedding_length(n_embd)
print(f"gguf: embedding length = {n_embd}") logger.info(f"gguf: embedding length = {n_embd}")
if (n_ff := self.find_hparam(["intermediate_size", "n_inner"], optional=True)) is not None: if (n_ff := self.find_hparam(["intermediate_size", "n_inner"], optional=True)) is not None:
self.gguf_writer.add_feed_forward_length(n_ff) self.gguf_writer.add_feed_forward_length(n_ff)
print(f"gguf: feed forward length = {n_ff}") logger.info(f"gguf: feed forward length = {n_ff}")
n_head = self.find_hparam(["num_attention_heads", "n_head"]) n_head = self.find_hparam(["num_attention_heads", "n_head"])
self.gguf_writer.add_head_count(n_head) self.gguf_writer.add_head_count(n_head)
print(f"gguf: head count = {n_head}") logger.info(f"gguf: head count = {n_head}")
if (n_head_kv := self.hparams.get("num_key_value_heads")) is not None: if (n_head_kv := self.hparams.get("num_key_value_heads")) is not None:
self.gguf_writer.add_head_count_kv(n_head_kv) self.gguf_writer.add_head_count_kv(n_head_kv)
print(f"gguf: key-value head count = {n_head_kv}") logger.info(f"gguf: key-value head count = {n_head_kv}")
if (rope_theta := self.hparams.get("rope_theta")) is not None: if (rope_theta := self.hparams.get("rope_theta")) is not None:
self.gguf_writer.add_rope_freq_base(rope_theta) self.gguf_writer.add_rope_freq_base(rope_theta)
print(f"gguf: rope theta = {rope_theta}") logger.info(f"gguf: rope theta = {rope_theta}")
if (f_rms_eps := self.hparams.get("rms_norm_eps")) is not None: if (f_rms_eps := self.hparams.get("rms_norm_eps")) is not None:
self.gguf_writer.add_layer_norm_rms_eps(f_rms_eps) self.gguf_writer.add_layer_norm_rms_eps(f_rms_eps)
print(f"gguf: rms norm epsilon = {f_rms_eps}") logger.info(f"gguf: rms norm epsilon = {f_rms_eps}")
if (f_norm_eps := self.find_hparam(["layer_norm_eps", "layer_norm_epsilon", "norm_epsilon"], optional=True)) is not None: if (f_norm_eps := self.find_hparam(["layer_norm_eps", "layer_norm_epsilon", "norm_epsilon"], optional=True)) is not None:
self.gguf_writer.add_layer_norm_eps(f_norm_eps) self.gguf_writer.add_layer_norm_eps(f_norm_eps)
print(f"gguf: layer norm epsilon = {f_norm_eps}") logger.info(f"gguf: layer norm epsilon = {f_norm_eps}")
if (n_experts := self.hparams.get("num_local_experts")) is not None: if (n_experts := self.hparams.get("num_local_experts")) is not None:
self.gguf_writer.add_expert_count(n_experts) self.gguf_writer.add_expert_count(n_experts)
print(f"gguf: expert count = {n_experts}") logger.info(f"gguf: expert count = {n_experts}")
if (n_experts_used := self.hparams.get("num_experts_per_tok")) is not None: if (n_experts_used := self.hparams.get("num_experts_per_tok")) is not None:
self.gguf_writer.add_expert_used_count(n_experts_used) self.gguf_writer.add_expert_used_count(n_experts_used)
print(f"gguf: experts used count = {n_experts_used}") logger.info(f"gguf: experts used count = {n_experts_used}")
self.gguf_writer.add_file_type(self.ftype) self.gguf_writer.add_file_type(self.ftype)
print(f"gguf: file type = {self.ftype}") logger.info(f"gguf: file type = {self.ftype}")
def write_tensors(self): def write_tensors(self):
block_count = self.hparams.get("n_layers", self.hparams.get("num_hidden_layers", self.hparams.get("n_layer"))) block_count = self.hparams.get("n_layers", self.hparams.get("num_hidden_layers", self.hparams.get("n_layer")))
@ -151,7 +154,7 @@ class Model(ABC):
# map tensor names # map tensor names
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
if new_name is None: if new_name is None:
print(f"Can not map tensor {name!r}") logger.error(f"Can not map tensor {name!r}")
sys.exit() sys.exit()
n_dims = len(data.shape) n_dims = len(data.shape)
@ -169,7 +172,7 @@ class Model(ABC):
if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
data = data.astype(np.float16) data = data.astype(np.float16)
print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
@ -553,7 +556,7 @@ class BloomModel(Model):
), ),
axis=0, axis=0,
) )
print("re-format attention.linear_qkv.weight") logger.info("re-format attention.linear_qkv.weight")
elif re.match(r"h\.\d+\.self_attention\.query_key_value\.bias", name): elif re.match(r"h\.\d+\.self_attention\.query_key_value\.bias", name):
qkv_bias = data.reshape((n_head, 3, n_embed // n_head)) qkv_bias = data.reshape((n_head, 3, n_embed // n_head))
data = np.concatenate( data = np.concatenate(
@ -564,12 +567,12 @@ class BloomModel(Model):
), ),
axis=0, axis=0,
) )
print("re-format attention.linear_qkv.bias") logger.info("re-format attention.linear_qkv.bias")
# map tensor names # map tensor names
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
if new_name is None: if new_name is None:
print(f"Can not map tensor {name!r}") logger.error(f"Can not map tensor {name!r}")
sys.exit() sys.exit()
n_dims = len(data.shape) n_dims = len(data.shape)
@ -587,13 +590,13 @@ class BloomModel(Model):
if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
data = data.astype(np.float16) data = data.astype(np.float16)
print(f"=> {new_name}, shape = {data.shape}, {old_dtype} --> {data.dtype}") logger.info(f"=> {new_name}, shape = {data.shape}, {old_dtype} --> {data.dtype}")
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
if not has_lm_head and name == "word_embeddings.weight": if not has_lm_head and name == "word_embeddings.weight":
self.gguf_writer.add_tensor("output.weight", data) self.gguf_writer.add_tensor("output.weight", data)
print(name, f"=> output.weight, shape = {data.shape}, {old_dtype} --> {data.dtype}") logger.info(name, f"=> output.weight, shape = {data.shape}, {old_dtype} --> {data.dtype}")
@Model.register("MPTForCausalLM") @Model.register("MPTForCausalLM")
@ -653,7 +656,7 @@ class MPTModel(Model):
else: else:
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
if new_name is None: if new_name is None:
print(f"Can not map tensor {name!r}") logger.error(f"Can not map tensor {name!r}")
sys.exit() sys.exit()
n_dims = len(data.shape) n_dims = len(data.shape)
@ -671,7 +674,7 @@ class MPTModel(Model):
if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
data = data.astype(np.float16) data = data.astype(np.float16)
print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
@ -697,7 +700,7 @@ class OrionModel(Model):
elif "model_max_length" in self.hparams: elif "model_max_length" in self.hparams:
ctx_length = self.hparams["model_max_length"] ctx_length = self.hparams["model_max_length"]
else: else:
print("gguf: can not find ctx length parameter.") logger.error("gguf: can not find ctx length parameter.")
sys.exit() sys.exit()
self.gguf_writer.add_file_type(self.ftype) self.gguf_writer.add_file_type(self.ftype)
@ -736,7 +739,7 @@ class OrionModel(Model):
# map tensor names # map tensor names
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
if new_name is None: if new_name is None:
print(f"Can not map tensor {name!r}") logger.error(f"Can not map tensor {name!r}")
sys.exit() sys.exit()
n_dims = len(data.shape) n_dims = len(data.shape)
@ -754,7 +757,7 @@ class OrionModel(Model):
if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
data = data.astype(np.float16) data = data.astype(np.float16)
print(f"{name} -> {new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") logger.info(f"{name} -> {new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
@ -779,7 +782,7 @@ class BaichuanModel(Model):
elif "model_max_length" in self.hparams: elif "model_max_length" in self.hparams:
ctx_length = self.hparams["model_max_length"] ctx_length = self.hparams["model_max_length"]
else: else:
print("gguf: can not find ctx length parameter.") logger.error("gguf: can not find ctx length parameter.")
sys.exit() sys.exit()
self.gguf_writer.add_name(self.dir_model.name) self.gguf_writer.add_name(self.dir_model.name)
@ -809,7 +812,7 @@ class BaichuanModel(Model):
for i in range(block_count): for i in range(block_count):
if (w := model_kv.get(f"model.layers.{i}.self_attn.W_pack.weight")) is not None: if (w := model_kv.get(f"model.layers.{i}.self_attn.W_pack.weight")) is not None:
print(f"Unpacking and permuting layer {i}") logger.info(f"Unpacking and permuting layer {i}")
model_kv[f"model.layers.{i}.self_attn.q_proj.weight"] = \ model_kv[f"model.layers.{i}.self_attn.q_proj.weight"] = \
self._reverse_hf_permute_part(w, 0, head_count, head_count) self._reverse_hf_permute_part(w, 0, head_count, head_count)
model_kv[f"model.layers.{i}.self_attn.k_proj.weight"] = \ model_kv[f"model.layers.{i}.self_attn.k_proj.weight"] = \
@ -834,7 +837,7 @@ class BaichuanModel(Model):
# map tensor names # map tensor names
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
if new_name is None: if new_name is None:
print(f"Can not map tensor {name!r}") logger.error(f"Can not map tensor {name!r}")
sys.exit() sys.exit()
n_dims = len(data.shape) n_dims = len(data.shape)
@ -852,7 +855,7 @@ class BaichuanModel(Model):
if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
data = data.astype(np.float16) data = data.astype(np.float16)
print(f"{name} -> {new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") logger.info(f"{name} -> {new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | None = None) -> Tensor: def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | None = None) -> Tensor:
@ -937,7 +940,7 @@ class XverseModel(Model):
elif "model_max_length" in self.hparams: elif "model_max_length" in self.hparams:
ctx_length = self.hparams["model_max_length"] ctx_length = self.hparams["model_max_length"]
else: else:
print("gguf: can not find ctx length parameter.") logger.error("gguf: can not find ctx length parameter.")
sys.exit() sys.exit()
self.gguf_writer.add_name(self.dir_model.name) self.gguf_writer.add_name(self.dir_model.name)
@ -987,7 +990,7 @@ class XverseModel(Model):
# map tensor names # map tensor names
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
if new_name is None: if new_name is None:
print(f"Can not map tensor {name!r}") logger.error(f"Can not map tensor {name!r}")
sys.exit() sys.exit()
n_dims = len(data.shape) n_dims = len(data.shape)
@ -1005,7 +1008,7 @@ class XverseModel(Model):
if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
data = data.astype(np.float16) data = data.astype(np.float16)
print(f"{name} -> {new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") logger.info(f"{name} -> {new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | None = None) -> Tensor: def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | None = None) -> Tensor:
@ -1092,7 +1095,7 @@ class FalconModel(Model):
# map tensor names # map tensor names
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
if new_name is None: if new_name is None:
print(f"Can not map tensor {name!r}") logger.error(f"Can not map tensor {name!r}")
sys.exit() sys.exit()
n_dims = len(data.shape) n_dims = len(data.shape)
@ -1110,7 +1113,7 @@ class FalconModel(Model):
if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
data = data.astype(np.float16) data = data.astype(np.float16)
print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
@ -1197,7 +1200,7 @@ class RefactModel(Model):
# map tensor names # map tensor names
new_name = tensor_map.get_name(name, try_suffixes=(".weight",)) new_name = tensor_map.get_name(name, try_suffixes=(".weight",))
if new_name is None: if new_name is None:
print(f"Can not map tensor {name!r}") logger.error(f"Can not map tensor {name!r}")
sys.exit() sys.exit()
n_dims = len(data.shape) n_dims = len(data.shape)
@ -1215,7 +1218,7 @@ class RefactModel(Model):
if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
data = data.astype(np.float16) data = data.astype(np.float16)
print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
@ -1264,10 +1267,10 @@ class PersimmonModel(Model):
data = data_torch.to(torch.float32).squeeze().numpy() data = data_torch.to(torch.float32).squeeze().numpy()
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
if new_name is None: if new_name is None:
print(f"Can not map tensor {name!r}") logger.error(f"Can not map tensor {name!r}")
sys.exit() sys.exit()
n_dims = len(data.shape) n_dims = len(data.shape)
print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
@ -1480,10 +1483,10 @@ class LlamaModel(Model):
new_name = tensor_map.get_name(merged_name, try_suffixes=(".weight", ".bias")) new_name = tensor_map.get_name(merged_name, try_suffixes=(".weight", ".bias"))
if new_name is None: if new_name is None:
print(f"Can not map tensor {name!r}") logger.error(f"Can not map tensor {name!r}")
sys.exit() sys.exit()
print(f"{new_name}, n_dims = {len(data.shape)}, shape = {data.shape} --> {data.dtype}") logger.info(f"{new_name}, n_dims = {len(data.shape)}, shape = {data.shape} --> {data.dtype}")
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
continue continue
@ -1491,7 +1494,7 @@ class LlamaModel(Model):
# map tensor names # map tensor names
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
if new_name is None: if new_name is None:
print(f"Can not map tensor {name!r}") logger.error(f"Can not map tensor {name!r}")
sys.exit() sys.exit()
n_dims = len(data.shape) n_dims = len(data.shape)
@ -1509,7 +1512,7 @@ class LlamaModel(Model):
if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
data = data.astype(np.float16) data = data.astype(np.float16)
print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
@ -1584,10 +1587,10 @@ class GrokModel(Model):
new_name = tensor_map.get_name(merged_name, try_suffixes=(".weight", ".bias")) new_name = tensor_map.get_name(merged_name, try_suffixes=(".weight", ".bias"))
if new_name is None: if new_name is None:
print(f"Can not map tensor {name!r}") logger.error(f"Can not map tensor {name!r}")
sys.exit() sys.exit()
print(f"{new_name}, n_dims = {len(data.shape)}, shape = {data.shape} --> {data.dtype}") logger.info(f"{new_name}, n_dims = {len(data.shape)}, shape = {data.shape} --> {data.dtype}")
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
continue continue
@ -1595,7 +1598,7 @@ class GrokModel(Model):
# map tensor names # map tensor names
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
if new_name is None: if new_name is None:
print(f"Can not map tensor {name!r}") logger.error(f"Can not map tensor {name!r}")
sys.exit() sys.exit()
n_dims = len(data.shape) n_dims = len(data.shape)
@ -1613,7 +1616,7 @@ class GrokModel(Model):
if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
data = data.astype(np.float16) data = data.astype(np.float16)
print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
@ -1771,7 +1774,7 @@ class MiniCPMModel(Model):
# map tensor names # map tensor names
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
if new_name is None: if new_name is None:
print(f"Can not map tensor {name!r}") logger.error(f"Can not map tensor {name!r}")
sys.exit() sys.exit()
n_dims = len(data.shape) n_dims = len(data.shape)
@ -1789,7 +1792,7 @@ class MiniCPMModel(Model):
if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
data = data.astype(np.float16) data = data.astype(np.float16)
print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
@ -1855,7 +1858,7 @@ class QwenModel(Model):
# map tensor names # map tensor names
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
if new_name is None: if new_name is None:
print(f"Can not map tensor {name!r}") logger.error(f"Can not map tensor {name!r}")
sys.exit() sys.exit()
n_dims = len(data.shape) n_dims = len(data.shape)
@ -1873,7 +1876,7 @@ class QwenModel(Model):
if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
data = data.astype(np.float16) data = data.astype(np.float16)
print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
@ -2024,7 +2027,7 @@ class GPT2Model(Model):
# map tensor names # map tensor names
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
if new_name is None: if new_name is None:
print(f"Can not map tensor {name!r}") logger.error(f"Can not map tensor {name!r}")
sys.exit() sys.exit()
n_dims = len(data.shape) n_dims = len(data.shape)
@ -2042,13 +2045,13 @@ class GPT2Model(Model):
if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
data = data.astype(np.float16) data = data.astype(np.float16)
print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
# note: GPT2 output is tied to (same as) wte in original model # note: GPT2 output is tied to (same as) wte in original model
if new_name == "token_embd.weight": if new_name == "token_embd.weight":
print(f"output.weight, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") logger.info(f"output.weight, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
self.gguf_writer.add_tensor("output.weight", data) self.gguf_writer.add_tensor("output.weight", data)
@ -2208,7 +2211,7 @@ class PlamoModel(Model):
# map tensor names # map tensor names
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
if new_name is None: if new_name is None:
print(f"Can not map tensor {name!r}") logger.error(f"Can not map tensor {name!r}")
sys.exit() sys.exit()
# shuffle for broadcasting of gqa in ggml_mul_mat # shuffle for broadcasting of gqa in ggml_mul_mat
@ -2240,7 +2243,7 @@ class PlamoModel(Model):
if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
data = data.astype(np.float16) data = data.astype(np.float16)
print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
@ -2286,7 +2289,7 @@ class CodeShellModel(Model):
# map tensor names # map tensor names
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
if new_name is None: if new_name is None:
print(f"Can not map tensor {name!r}") logger.error(f"Can not map tensor {name!r}")
sys.exit() sys.exit()
n_dims = len(data.shape) n_dims = len(data.shape)
@ -2304,13 +2307,13 @@ class CodeShellModel(Model):
if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
data = data.astype(np.float16) data = data.astype(np.float16)
print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
if not has_lm_head and name == "transformer.wte.weight": if not has_lm_head and name == "transformer.wte.weight":
self.gguf_writer.add_tensor("output.weight", data) self.gguf_writer.add_tensor("output.weight", data)
print(name, f"=> output.weight, shape = {data.shape}, {old_dtype} --> {data.dtype}") logger.info(name, f"=> output.weight, shape = {data.shape}, {old_dtype} --> {data.dtype}")
@Model.register("InternLM2ForCausalLM") @Model.register("InternLM2ForCausalLM")
@ -2332,7 +2335,7 @@ class InternLM2Model(Model):
toktypes: list[int] = [] toktypes: list[int] = []
if not tokenizer_path.is_file(): if not tokenizer_path.is_file():
print(f'Error: Missing {tokenizer_path}', file=sys.stderr) logger.error(f'Error: Missing {tokenizer_path}')
sys.exit(1) sys.exit(1)
sentencepiece_model = model.ModelProto() sentencepiece_model = model.ModelProto()
@ -2349,7 +2352,7 @@ class InternLM2Model(Model):
if text == b"\x00": if text == b"\x00":
# (TODO): fixme # (TODO): fixme
# Hack here and replace the \x00 characters. # Hack here and replace the \x00 characters.
print(f"InternLM2 convert token '{text}' to '🐉'!") logger.debug(f"InternLM2 convert token '{text}' to '🐉'!")
text = "🐉" text = "🐉"
toktype = SentencePieceTokenTypes.NORMAL toktype = SentencePieceTokenTypes.NORMAL
@ -2390,7 +2393,7 @@ class InternLM2Model(Model):
# TODO: this is a hack, should be fixed # TODO: this is a hack, should be fixed
# https://github.com/ggerganov/llama.cpp/pull/6745#issuecomment-2067687048 # https://github.com/ggerganov/llama.cpp/pull/6745#issuecomment-2067687048
special_vocab.special_token_ids["eos"] = self._try_get_sft_eos(tokenizer) special_vocab.special_token_ids["eos"] = self._try_get_sft_eos(tokenizer)
print(f"Replace eos:{old_eos} with a special token:{special_vocab.special_token_ids['eos']} \ logger.debug(f"Replace eos:{old_eos} with a special token:{special_vocab.special_token_ids['eos']} \
in chat mode so that the conversation can end normally.") in chat mode so that the conversation can end normally.")
special_vocab.add_to_gguf(self.gguf_writer) special_vocab.add_to_gguf(self.gguf_writer)
@ -2435,7 +2438,7 @@ in chat mode so that the conversation can end normally.")
# map tensor names # map tensor names
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
if new_name is None: if new_name is None:
print(f"Can not map tensor {name!r}") logger.error(f"Can not map tensor {name!r}")
sys.exit() sys.exit()
n_dims = len(data.shape) n_dims = len(data.shape)
@ -2453,7 +2456,7 @@ in chat mode so that the conversation can end normally.")
if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
data = data.astype(np.float16) data = data.astype(np.float16)
print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
def write_tensors(self): def write_tensors(self):
@ -2564,7 +2567,7 @@ class BertModel(Model):
# map tensor names # map tensor names
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
if new_name is None: if new_name is None:
print(f"Can not map tensor {name!r}") logger.error(f"Can not map tensor {name!r}")
sys.exit() sys.exit()
# convert any unsupported data types to float32 # convert any unsupported data types to float32
@ -2585,7 +2588,7 @@ class BertModel(Model):
# if f32 desired, convert any float16 to float32 # if f32 desired, convert any float16 to float32
new_dtype = np.float32 new_dtype = np.float32
print(f"{new_name}, n_dims = {n_dims}, {data_torch.dtype} --> {new_dtype}") logger.info(f"{new_name}, n_dims = {n_dims}, {data_torch.dtype} --> {new_dtype}")
if data.dtype != new_dtype: if data.dtype != new_dtype:
data = data.astype(new_dtype) data = data.astype(new_dtype)
@ -2681,7 +2684,7 @@ class GemmaModel(Model):
# map tensor names # map tensor names
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
if new_name is None: if new_name is None:
print(f"Can not map tensor {name!r}") logger.error(f"Can not map tensor {name!r}")
sys.exit() sys.exit()
n_dims = len(data.shape) n_dims = len(data.shape)
@ -2693,7 +2696,7 @@ class GemmaModel(Model):
if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2: if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
data = data.astype(np.float16) data = data.astype(np.float16)
print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
@ -2721,7 +2724,7 @@ class MambaModel(Model):
else: else:
# Use the GPT-NeoX tokenizer when no tokenizer files are present # Use the GPT-NeoX tokenizer when no tokenizer files are present
tokenizer_path = Path(sys.path[0]) / "models" / "ggml-vocab-gpt-neox.gguf" tokenizer_path = Path(sys.path[0]) / "models" / "ggml-vocab-gpt-neox.gguf"
print(f"Using tokenizer from '{os.path.relpath(tokenizer_path, os.getcwd())}'") logger.debug(f"Using tokenizer from '{os.path.relpath(tokenizer_path, os.getcwd())}'")
neox_reader = gguf.GGUFReader(tokenizer_path, "r") neox_reader = gguf.GGUFReader(tokenizer_path, "r")
field = neox_reader.get_field(gguf.Keys.Tokenizer.MODEL) field = neox_reader.get_field(gguf.Keys.Tokenizer.MODEL)
@ -2793,17 +2796,17 @@ class MambaModel(Model):
# map tensor names # map tensor names
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
if new_name is None: if new_name is None:
print(f"Can not map tensor {name!r}") logger.error(f"Can not map tensor {name!r}")
sys.exit() sys.exit()
if name.endswith(".A_log"): if name.endswith(".A_log"):
print("A_log --> A ==> " + new_name) logger.debug("A_log --> A ==> " + new_name)
data_torch = -torch.exp(data_torch) data_torch = -torch.exp(data_torch)
# assuming token_embd.weight is seen before output.weight # assuming token_embd.weight is seen before output.weight
if tok_embd is not None and new_name == output_name: if tok_embd is not None and new_name == output_name:
if torch.equal(tok_embd, data_torch): if torch.equal(tok_embd, data_torch):
print(f"{output_name} is equivalent to {tok_embd_name}, omitting") logger.debug(f"{output_name} is equivalent to {tok_embd_name}, omitting")
continue continue
if new_name == tok_embd_name: if new_name == tok_embd_name:
tok_embd = data_torch tok_embd = data_torch
@ -2826,7 +2829,7 @@ class MambaModel(Model):
if self.ftype == 1 and data_dtype == np.float32 and new_weight_name.endswith((".ssm_in", ".ssm_out", "token_embd", "output")) and n_dims == 2: if self.ftype == 1 and data_dtype == np.float32 and new_weight_name.endswith((".ssm_in", ".ssm_out", "token_embd", "output")) and n_dims == 2:
data = data.astype(np.float16) data = data.astype(np.float16)
print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}") logger.info(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
@ -2936,6 +2939,7 @@ def parse_args() -> argparse.Namespace:
) )
parser.add_argument("--use-temp-file", action="store_true", help="use the tempfile library while processing (helpful when running out of memory, process killed)") parser.add_argument("--use-temp-file", action="store_true", help="use the tempfile library while processing (helpful when running out of memory, process killed)")
parser.add_argument("--model-name", type=str, default=None, help="name of the model") parser.add_argument("--model-name", type=str, default=None, help="name of the model")
parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
return parser.parse_args() return parser.parse_args()
@ -2943,6 +2947,11 @@ def parse_args() -> argparse.Namespace:
def main() -> None: def main() -> None:
args = parse_args() args = parse_args()
if args.verbose:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
dir_model = args.model dir_model = args.model
if args.awq_path: if args.awq_path:
@ -2951,15 +2960,15 @@ def main() -> None:
tmp_model_path = args.model / "weighted_model" tmp_model_path = args.model / "weighted_model"
dir_model = tmp_model_path dir_model = tmp_model_path
if tmp_model_path.is_dir(): if tmp_model_path.is_dir():
print(f"{tmp_model_path} exists as a weighted model.") logger.info(f"{tmp_model_path} exists as a weighted model.")
else: else:
tmp_model_path.mkdir(parents=True, exist_ok=True) tmp_model_path.mkdir(parents=True, exist_ok=True)
print("Saving new weighted model ...") logger.info("Saving new weighted model ...")
add_scale_weights(str(args.model), str(args.awq_path), str(tmp_model_path)) add_scale_weights(str(args.model), str(args.awq_path), str(tmp_model_path))
print(f"Saved weighted model at {tmp_model_path}.") logger.info(f"Saved weighted model at {tmp_model_path}.")
if not dir_model.is_dir(): if not dir_model.is_dir():
print(f'Error: {args.model} is not a directory', file=sys.stderr) logger.error(f'Error: {args.model} is not a directory')
sys.exit(1) sys.exit(1)
ftype_map = { ftype_map = {
@ -2973,7 +2982,7 @@ def main() -> None:
# output in the same directory as the model by default # output in the same directory as the model by default
fname_out = dir_model / f'ggml-model-{args.outtype}.gguf' fname_out = dir_model / f'ggml-model-{args.outtype}.gguf'
print(f"Loading model: {dir_model.name}") logger.info(f"Loading model: {dir_model.name}")
hparams = Model.load_hparams(dir_model) hparams = Model.load_hparams(dir_model)
@ -2981,20 +2990,20 @@ def main() -> None:
model_class = Model.from_model_architecture(hparams["architectures"][0]) model_class = Model.from_model_architecture(hparams["architectures"][0])
model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian, args.use_temp_file) model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian, args.use_temp_file)
print("Set model parameters") logger.info("Set model parameters")
model_instance.set_gguf_parameters() model_instance.set_gguf_parameters()
print("Set model tokenizer") logger.info("Set model tokenizer")
model_instance.set_vocab() model_instance.set_vocab()
if args.vocab_only: if args.vocab_only:
print(f"Exporting model vocab to '{fname_out}'") logger.info(f"Exporting model vocab to '{fname_out}'")
model_instance.write_vocab() model_instance.write_vocab()
else: else:
print(f"Exporting model to '{fname_out}'") logger.info(f"Exporting model to '{fname_out}'")
model_instance.write() model_instance.write()
print(f"Model successfully exported to '{fname_out}'") logger.info(f"Model successfully exported to '{fname_out}'")
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -1,6 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations from __future__ import annotations
import logging
import argparse import argparse
import os import os
import struct import struct
@ -14,6 +15,8 @@ if 'NO_LOCAL_GGUF' not in os.environ:
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py')) sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
import gguf import gguf
logger = logging.getLogger("ggml-to-gguf")
class GGMLFormat(IntEnum): class GGMLFormat(IntEnum):
GGML = 0 GGML = 0
@ -125,7 +128,7 @@ class Tensor:
self.start_offset = offset self.start_offset = offset
self.len_bytes = n_bytes self.len_bytes = n_bytes
offset += n_bytes offset += n_bytes
# print(n_dims, name_len, dtype, self.dims, self.name, pad) # logger.info(n_dims, name_len, dtype, self.dims, self.name, pad)
return offset - orig_offset return offset - orig_offset
@ -175,7 +178,7 @@ class GGMLModel:
offset += self.validate_header(data, offset) offset += self.validate_header(data, offset)
hp = Hyperparameters() hp = Hyperparameters()
offset += hp.load(data, offset) offset += hp.load(data, offset)
print(f'* File format: {self.file_format.name}v{self.format_version} with ftype {hp.ftype.name}') logger.info(f'* File format: {self.file_format.name}v{self.format_version} with ftype {hp.ftype.name}')
self.validate_conversion(hp.ftype) self.validate_conversion(hp.ftype)
vocab = Vocab(load_scores = self.file_format > GGMLFormat.GGML) vocab = Vocab(load_scores = self.file_format > GGMLFormat.GGML)
offset += vocab.load(data, offset, hp.n_vocab) offset += vocab.load(data, offset, hp.n_vocab)
@ -215,12 +218,12 @@ class GGMLToGGUF:
if float(hp.n_head) / float(x) == gqa: if float(hp.n_head) / float(x) == gqa:
n_kv_head = x n_kv_head = x
assert n_kv_head is not None, "Couldn't determine n_kv_head from GQA param" assert n_kv_head is not None, "Couldn't determine n_kv_head from GQA param"
print(f'- Guessed n_kv_head = {n_kv_head} based on GQA {cfg.gqa}') logger.info(f'- Guessed n_kv_head = {n_kv_head} based on GQA {cfg.gqa}')
self.n_kv_head = n_kv_head self.n_kv_head = n_kv_head
self.name_map = gguf.get_tensor_name_map(gguf.MODEL_ARCH.LLAMA, ggml_model.hyperparameters.n_layer) self.name_map = gguf.get_tensor_name_map(gguf.MODEL_ARCH.LLAMA, ggml_model.hyperparameters.n_layer)
def save(self): def save(self):
print('* Preparing to save GGUF file') logger.info('* Preparing to save GGUF file')
gguf_writer = gguf.GGUFWriter( gguf_writer = gguf.GGUFWriter(
self.cfg.output, self.cfg.output,
gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA], gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA],
@ -230,11 +233,11 @@ class GGMLToGGUF:
if self.special_vocab is not None: if self.special_vocab is not None:
self.special_vocab.add_to_gguf(gguf_writer) self.special_vocab.add_to_gguf(gguf_writer)
self.add_tensors(gguf_writer) self.add_tensors(gguf_writer)
print(" gguf: write header") logger.info(" gguf: write header")
gguf_writer.write_header_to_file() gguf_writer.write_header_to_file()
print(" gguf: write metadata") logger.info(" gguf: write metadata")
gguf_writer.write_kv_data_to_file() gguf_writer.write_kv_data_to_file()
print(" gguf: write tensors") logger.info(" gguf: write tensors")
gguf_writer.write_tensors_to_file() gguf_writer.write_tensors_to_file()
gguf_writer.close() gguf_writer.close()
@ -250,7 +253,7 @@ class GGMLToGGUF:
name = cfg.name if cfg.name is not None else cfg.input.name name = cfg.name if cfg.name is not None else cfg.input.name
except UnicodeDecodeError: except UnicodeDecodeError:
name = None name = None
print('* Adding model parameters and KV items') logger.info('* Adding model parameters and KV items')
if name is not None: if name is not None:
gguf_writer.add_name(name) gguf_writer.add_name(name)
gguf_writer.add_description(desc) gguf_writer.add_description(desc)
@ -287,7 +290,7 @@ class GGMLToGGUF:
toktypes = [] toktypes = []
if self.vocab_override is not None: if self.vocab_override is not None:
vo = self.vocab_override vo = self.vocab_override
print('* Adding vocab item(s)') logger.info('* Adding vocab item(s)')
for (idx, (vbytes, score, ttype)) in enumerate(vo.all_tokens()): for (idx, (vbytes, score, ttype)) in enumerate(vo.all_tokens()):
tokens.append(vbytes) tokens.append(vbytes)
scores.append(score) scores.append(score)
@ -299,7 +302,7 @@ class GGMLToGGUF:
if len(toktypes) > 0: if len(toktypes) > 0:
gguf_writer.add_token_types(toktypes) gguf_writer.add_token_types(toktypes)
return return
print(f'* Adding {hp.n_vocab} vocab item(s)') logger.info(f'* Adding {hp.n_vocab} vocab item(s)')
assert len(self.model.vocab.items) >= 3, 'Cannot handle unexpectedly short model vocab' assert len(self.model.vocab.items) >= 3, 'Cannot handle unexpectedly short model vocab'
for (tokid, (vbytes, vscore)) in enumerate(self.model.vocab.items): for (tokid, (vbytes, vscore)) in enumerate(self.model.vocab.items):
tt = 1 # Normal tt = 1 # Normal
@ -334,7 +337,7 @@ class GGMLToGGUF:
def add_tensors(self, gguf_writer): def add_tensors(self, gguf_writer):
tensor_map = self.name_map tensor_map = self.name_map
data = self.data data = self.data
print(f'* Adding {len(self.model.tensors)} tensor(s)') logger.info(f'* Adding {len(self.model.tensors)} tensor(s)')
for tensor in self.model.tensors: for tensor in self.model.tensors:
name = str(tensor.name, 'UTF-8') name = str(tensor.name, 'UTF-8')
mapped_name = tensor_map.get_name(name, try_suffixes = (".weight", ".bias")) mapped_name = tensor_map.get_name(name, try_suffixes = (".weight", ".bias"))
@ -344,7 +347,7 @@ class GGMLToGGUF:
temp = tempdims[1] temp = tempdims[1]
tempdims[1] = tempdims[0] tempdims[1] = tempdims[0]
tempdims[0] = temp tempdims[0] = temp
# print(f'+ {tensor.name} | {mapped_name} {tensor.dims} :: {tempdims}') # logger.info(f'+ {tensor.name} | {mapped_name} {tensor.dims} :: {tempdims}')
gguf_writer.add_tensor( gguf_writer.add_tensor(
mapped_name, mapped_name,
data[tensor.start_offset:tensor.start_offset + tensor.len_bytes], data[tensor.start_offset:tensor.start_offset + tensor.len_bytes],
@ -401,33 +404,38 @@ def handle_args():
help="directory containing tokenizer.model, if separate from model file - only meaningful with --model-metadata-dir") help="directory containing tokenizer.model, if separate from model file - only meaningful with --model-metadata-dir")
parser.add_argument("--vocabtype", default="spm,hfft", parser.add_argument("--vocabtype", default="spm,hfft",
help="vocab format - only meaningful with --model-metadata-dir and/or --vocab-dir (default: spm,hfft)") help="vocab format - only meaningful with --model-metadata-dir and/or --vocab-dir (default: spm,hfft)")
parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
return parser.parse_args() return parser.parse_args()
def main(): def main():
cfg = handle_args() cfg = handle_args()
print(f'* Using config: {cfg}') if cfg.verbose:
print('\n=== WARNING === Be aware that this conversion script is best-effort. Use a native GGUF model if possible. === WARNING ===\n') logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
logger.info(f'* Using config: {cfg}')
logger.warning('=== WARNING === Be aware that this conversion script is best-effort. Use a native GGUF model if possible. === WARNING ===')
if cfg.model_metadata_dir is None and (cfg.gqa == 1 or cfg.eps == '5.0e-06'): if cfg.model_metadata_dir is None and (cfg.gqa == 1 or cfg.eps == '5.0e-06'):
print('- Note: If converting LLaMA2, specifying "--eps 1e-5" is required. 70B models also need "--gqa 8".') logger.info('- Note: If converting LLaMA2, specifying "--eps 1e-5" is required. 70B models also need "--gqa 8".')
data = np.memmap(cfg.input, mode = 'r') data = np.memmap(cfg.input, mode = 'r')
model = GGMLModel() model = GGMLModel()
print('* Scanning GGML input file') logger.info('* Scanning GGML input file')
offset = model.load(data, 0) # noqa offset = model.load(data, 0) # noqa
print(f'* GGML model hyperparameters: {model.hyperparameters}') logger.info(f'* GGML model hyperparameters: {model.hyperparameters}')
vocab_override = None vocab_override = None
params_override = None params_override = None
special_vocab = None special_vocab = None
if cfg.model_metadata_dir is not None: if cfg.model_metadata_dir is not None:
(params_override, vocab_override, special_vocab) = handle_metadata(cfg, model.hyperparameters) (params_override, vocab_override, special_vocab) = handle_metadata(cfg, model.hyperparameters)
print('!! Note: When overriding params the --gqa, --eps and --context-length options are ignored.') logger.info('!! Note: When overriding params the --gqa, --eps and --context-length options are ignored.')
print(f'* Overriding params: {params_override}') logger.info(f'* Overriding params: {params_override}')
print(f'* Overriding vocab: {vocab_override}') logger.info(f'* Overriding vocab: {vocab_override}')
print(f'* Special vocab: {special_vocab}') logger.info(f'* Special vocab: {special_vocab}')
else: else:
print('\n=== WARNING === Special tokens may not be converted correctly. Use --model-metadata-dir if possible === WARNING ===\n') logger.warning('\n=== WARNING === Special tokens may not be converted correctly. Use --model-metadata-dir if possible === WARNING ===\n')
if model.file_format == GGMLFormat.GGML: if model.file_format == GGMLFormat.GGML:
print('! This is a very old GGML file that does not contain vocab scores. Strongly recommend using model metadata!') logger.info('! This is a very old GGML file that does not contain vocab scores. Strongly recommend using model metadata!')
converter = GGMLToGGUF( converter = GGMLToGGUF(
model, data, cfg, model, data, cfg,
params_override = params_override, params_override = params_override,
@ -435,7 +443,7 @@ def main():
special_vocab = special_vocab special_vocab = special_vocab
) )
converter.save() converter.save()
print(f'* Successful completion. Output saved to: {cfg.output}') logger.info(f'* Successful completion. Output saved to: {cfg.output}')
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -1,6 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations from __future__ import annotations
import logging
import json import json
import os import os
import struct import struct
@ -15,6 +16,8 @@ if 'NO_LOCAL_GGUF' not in os.environ:
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf')) sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
import gguf import gguf
logger = logging.getLogger("lora-to-gguf")
NUMPY_TYPE_TO_FTYPE: dict[str, int] = {"float32": 0, "float16": 1} NUMPY_TYPE_TO_FTYPE: dict[str, int] = {"float32": 0, "float16": 1}
@ -48,11 +51,9 @@ def write_tensor_header(fout: BinaryIO, name: str, shape: Sequence[int], data_ty
if __name__ == '__main__': if __name__ == '__main__':
if len(sys.argv) < 2: if len(sys.argv) < 2:
print(f"Usage: python {sys.argv[0]} <path> [arch]") logger.info(f"Usage: python {sys.argv[0]} <path> [arch]")
print( logger.info("Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'")
"Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'" logger.info(f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)")
)
print(f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)")
sys.exit(1) sys.exit(1)
input_json = os.path.join(sys.argv[1], "adapter_config.json") input_json = os.path.join(sys.argv[1], "adapter_config.json")
@ -70,7 +71,7 @@ if __name__ == '__main__':
arch_name = sys.argv[2] if len(sys.argv) == 3 else "llama" arch_name = sys.argv[2] if len(sys.argv) == 3 else "llama"
if arch_name not in gguf.MODEL_ARCH_NAMES.values(): if arch_name not in gguf.MODEL_ARCH_NAMES.values():
print(f"Error: unsupported architecture {arch_name}") logger.error(f"Error: unsupported architecture {arch_name}")
sys.exit(1) sys.exit(1)
arch = list(gguf.MODEL_ARCH_NAMES.keys())[list(gguf.MODEL_ARCH_NAMES.values()).index(arch_name)] arch = list(gguf.MODEL_ARCH_NAMES.keys())[list(gguf.MODEL_ARCH_NAMES.values()).index(arch_name)]
@ -80,21 +81,21 @@ if __name__ == '__main__':
params = json.load(f) params = json.load(f)
if params["peft_type"] != "LORA": if params["peft_type"] != "LORA":
print(f"Error: unsupported adapter type {params['peft_type']}, expected LORA") logger.error(f"Error: unsupported adapter type {params['peft_type']}, expected LORA")
sys.exit(1) sys.exit(1)
if params["fan_in_fan_out"] is True: if params["fan_in_fan_out"] is True:
print("Error: param fan_in_fan_out is not supported") logger.error("Error: param fan_in_fan_out is not supported")
sys.exit(1) sys.exit(1)
if params["bias"] is not None and params["bias"] != "none": if params["bias"] is not None and params["bias"] != "none":
print("Error: param bias is not supported") logger.error("Error: param bias is not supported")
sys.exit(1) sys.exit(1)
# TODO: these seem to be layers that have been trained but without lora. # TODO: these seem to be layers that have been trained but without lora.
# doesn't seem widely used but eventually should be supported # doesn't seem widely used but eventually should be supported
if params["modules_to_save"] is not None and len(params["modules_to_save"]) > 0: if params["modules_to_save"] is not None and len(params["modules_to_save"]) > 0:
print("Error: param modules_to_save is not supported") logger.error("Error: param modules_to_save is not supported")
sys.exit(1) sys.exit(1)
with open(output_path, "wb") as fout: with open(output_path, "wb") as fout:
@ -125,13 +126,13 @@ if __name__ == '__main__':
suffix = k[-len(lora_suffixes[0]):] suffix = k[-len(lora_suffixes[0]):]
k = k[: -len(lora_suffixes[0])] k = k[: -len(lora_suffixes[0])]
else: else:
print(f"Error: unrecognized tensor name {orig_k}") logger.error(f"Error: unrecognized tensor name {orig_k}")
sys.exit(1) sys.exit(1)
tname = name_map.get_name(k) tname = name_map.get_name(k)
if tname is None: if tname is None:
print(f"Error: could not map tensor name {orig_k}") logger.error(f"Error: could not map tensor name {orig_k}")
print(" Note: the arch parameter must be specified if the model is not llama") logger.error(" Note: the arch parameter must be specified if the model is not llama")
sys.exit(1) sys.exit(1)
if suffix == ".lora_A.weight": if suffix == ".lora_A.weight":
@ -141,8 +142,8 @@ if __name__ == '__main__':
else: else:
assert False assert False
print(f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB") logger.info(f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB")
write_tensor_header(fout, tname, t.shape, t.dtype) write_tensor_header(fout, tname, t.shape, t.dtype)
t.tofile(fout) t.tofile(fout)
print(f"Converted {input_json} and {input_model} to {output_path}") logger.info(f"Converted {input_json} and {input_model} to {output_path}")

View file

@ -1,6 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations from __future__ import annotations
import logging
import argparse import argparse
import os import os
import sys import sys
@ -14,6 +15,8 @@ if 'NO_LOCAL_GGUF' not in os.environ:
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py')) sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
import gguf import gguf
logger = logging.getLogger("persimmon-to-gguf")
def _flatten_dict(dct, tensors, prefix=None): def _flatten_dict(dct, tensors, prefix=None):
assert isinstance(dct, dict) assert isinstance(dct, dict)
@ -30,9 +33,9 @@ def _flatten_dict(dct, tensors, prefix=None):
def _get_sentencepiece_tokenizer_info(dir_model: Path): def _get_sentencepiece_tokenizer_info(dir_model: Path):
tokenizer_path = dir_model / 'adept_vocab.model' tokenizer_path = dir_model / 'adept_vocab.model'
print('gguf: getting sentencepiece tokenizer from', tokenizer_path) logger.info('getting sentencepiece tokenizer from', tokenizer_path)
tokenizer = SentencePieceProcessor(str(tokenizer_path)) tokenizer = SentencePieceProcessor(str(tokenizer_path))
print('gguf: adding tokens') logger.info('adding tokens')
tokens: list[bytes] = [] tokens: list[bytes] = []
scores: list[float] = [] scores: list[float] = []
toktypes: list[int] = [] toktypes: list[int] = []
@ -68,7 +71,12 @@ def main():
parser.add_argument("--ckpt-path", type=Path, help="path to persimmon checkpoint .pt file") parser.add_argument("--ckpt-path", type=Path, help="path to persimmon checkpoint .pt file")
parser.add_argument("--model-dir", type=Path, help="directory containing model e.g. 8b_chat_model_release") parser.add_argument("--model-dir", type=Path, help="directory containing model e.g. 8b_chat_model_release")
parser.add_argument("--adept-inference-dir", type=str, help="path to adept-inference code directory") parser.add_argument("--adept-inference-dir", type=str, help="path to adept-inference code directory")
parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
args = parser.parse_args() args = parser.parse_args()
if args.verbose:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
sys.path.append(str(args.adept_inference_dir)) sys.path.append(str(args.adept_inference_dir))
persimmon_model = torch.load(args.ckpt_path) persimmon_model = torch.load(args.ckpt_path)
hparams = persimmon_model['args'] hparams = persimmon_model['args']
@ -107,7 +115,7 @@ def main():
gguf_writer.add_eos_token_id(71013) gguf_writer.add_eos_token_id(71013)
tensor_map = gguf.get_tensor_name_map(arch, block_count) tensor_map = gguf.get_tensor_name_map(arch, block_count)
print(tensor_map) logger.info(tensor_map)
for name in tensors.keys(): for name in tensors.keys():
data_torch = tensors[name] data_torch = tensors[name]
if name.endswith(".self_attention.rotary_emb.inv_freq"): if name.endswith(".self_attention.rotary_emb.inv_freq"):
@ -117,22 +125,21 @@ def main():
data = data_torch.to(torch.float32).squeeze().numpy() data = data_torch.to(torch.float32).squeeze().numpy()
new_name = tensor_map.get_name(name, try_suffixes = (".weight", ".bias")) new_name = tensor_map.get_name(name, try_suffixes = (".weight", ".bias"))
if new_name is None: if new_name is None:
print("Can not map tensor '" + name + "'") logger.error(f"Can not map tensor '{name}'")
sys.exit() sys.exit()
n_dims = len(data.shape) n_dims = len(data.shape)
print(new_name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype)) logger.debug(f"{new_name}, n_dims = {str(n_dims)}, {str(old_dtype)} --> {str(data.dtype)}")
gguf_writer.add_tensor(new_name, data) gguf_writer.add_tensor(new_name, data)
print("gguf: write header") logger.info("gguf: write header")
gguf_writer.write_header_to_file() gguf_writer.write_header_to_file()
print("gguf: write metadata") logger.info("gguf: write metadata")
gguf_writer.write_kv_data_to_file() gguf_writer.write_kv_data_to_file()
print("gguf: write tensors") logger.info("gguf: write tensors")
gguf_writer.write_tensors_to_file() gguf_writer.write_tensors_to_file()
gguf_writer.close() gguf_writer.close()
print(f"gguf: model successfully exported to '{args.outfile}'") logger.info(f"gguf: model successfully exported to '{args.outfile}'")
print("")
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -1449,11 +1449,11 @@ def default_outfile(model_paths: list[Path], file_type: GGMLFileType) -> Path:
def do_dump_model(model_plus: ModelPlus) -> None: def do_dump_model(model_plus: ModelPlus) -> None:
print(f"model_plus.paths = {model_plus.paths!r}") print(f"model_plus.paths = {model_plus.paths!r}") # noqa: NP100
print(f"model_plus.format = {model_plus.format!r}") print(f"model_plus.format = {model_plus.format!r}") # noqa: NP100
print(f"model_plus.vocab = {model_plus.vocab!r}") print(f"model_plus.vocab = {model_plus.vocab!r}") # noqa: NP100
for name, lazy_tensor in model_plus.model.items(): for name, lazy_tensor in model_plus.model.items():
print(f"{name}: shape={lazy_tensor.shape} type={lazy_tensor.data_type}; {lazy_tensor.description}") print(f"{name}: shape={lazy_tensor.shape} type={lazy_tensor.data_type}; {lazy_tensor.description}") # noqa: NP100
def main(args_in: list[str] | None = None) -> None: def main(args_in: list[str] | None = None) -> None:

View file

@ -1,11 +1,14 @@
#!/usr/bin/env python #!/usr/bin/env python
import logging
import argparse import argparse
import asyncio import asyncio
import os import os
import sys import sys
from tempfile import gettempdir, NamedTemporaryFile from tempfile import gettempdir, NamedTemporaryFile
logger = logging.getLogger("ggml-vk-generate-shaders")
shader_f32 = """ shader_f32 = """
#define FLOAT_TYPE float #define FLOAT_TYPE float
""" """
@ -2498,7 +2501,7 @@ async def string_to_spv(name, code, defines, fp16=True):
stdout, stderr = await proc.communicate() stdout, stderr = await proc.communicate()
print(" ".join(cmd)) logger.info(" ".join(cmd))
if proc.returncode: if proc.returncode:
raise RuntimeError(f"{name=} {f.name=} {stdout=} {stderr=}") raise RuntimeError(f"{name=} {f.name=} {stdout=} {stderr=}")
@ -2507,7 +2510,7 @@ async def string_to_spv(name, code, defines, fp16=True):
cmd.extend([f"-D{key}={value}" for key, value in defines.items()]) cmd.extend([f"-D{key}={value}" for key, value in defines.items()])
code_with_lines = "\n".join([f"{i + 1}: {line}" for i, line in enumerate(preprocessed_code.splitlines())]) code_with_lines = "\n".join([f"{i + 1}: {line}" for i, line in enumerate(preprocessed_code.splitlines())])
print(f"ERROR compiling {name}\n\n{code_with_lines}\n\n{error}") logger.error(f"cannot compile {name}\n\n{code_with_lines}\n\n{error}")
f.close() f.close()
os.remove(f.name) os.remove(f.name)
sys.exit(proc.returncode) sys.exit(proc.returncode)
@ -2520,7 +2523,7 @@ async def string_to_spv(name, code, defines, fp16=True):
async def main(): async def main():
print("ggml_vulkan: Generating and compiling shaders to SPIR-V") logger.info("ggml_vulkan: Generating and compiling shaders to SPIR-V")
tasks = [] tasks = []
@ -2768,9 +2771,15 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser(description="GGML Vulkan Shader Generator") parser = argparse.ArgumentParser(description="GGML Vulkan Shader Generator")
parser.add_argument("--glslc", help="Path to glslc") parser.add_argument("--glslc", help="Path to glslc")
parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
args = parser.parse_args() args = parser.parse_args()
if args.verbose:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
if args.glslc: if args.glslc:
GLSLC = args.glslc GLSLC = args.glslc

View file

@ -1,8 +1,10 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import logging
import sys import sys
from pathlib import Path from pathlib import Path
from gguf.gguf_reader import GGUFReader from gguf.gguf_reader import GGUFReader
logger = logging.getLogger("reader")
sys.path.insert(0, str(Path(__file__).parent.parent)) sys.path.insert(0, str(Path(__file__).parent.parent))
@ -18,28 +20,28 @@ def read_gguf_file(gguf_file_path):
reader = GGUFReader(gguf_file_path) reader = GGUFReader(gguf_file_path)
# List all key-value pairs in a columnized format # List all key-value pairs in a columnized format
print("Key-Value Pairs:") logger.info("Key-Value Pairs:")
max_key_length = max(len(key) for key in reader.fields.keys()) max_key_length = max(len(key) for key in reader.fields.keys())
for key, field in reader.fields.items(): for key, field in reader.fields.items():
value = field.parts[field.data[0]] value = field.parts[field.data[0]]
print(f"{key:{max_key_length}} : {value}") logger.info(f"{key:{max_key_length}} : {value}")
print("----") logger.info("----")
# List all tensors # List all tensors
print("Tensors:") logger.info("Tensors:")
tensor_info_format = "{:<30} | Shape: {:<15} | Size: {:<12} | Quantization: {}" tensor_info_format = "{:<30} | Shape: {:<15} | Size: {:<12} | Quantization: {}"
print(tensor_info_format.format("Tensor Name", "Shape", "Size", "Quantization")) logger.info(tensor_info_format.format("Tensor Name", "Shape", "Size", "Quantization"))
print("-" * 80) logger.info("-" * 80)
for tensor in reader.tensors: for tensor in reader.tensors:
shape_str = "x".join(map(str, tensor.shape)) shape_str = "x".join(map(str, tensor.shape))
size_str = str(tensor.n_elements) size_str = str(tensor.n_elements)
quantization_str = tensor.tensor_type.name quantization_str = tensor.tensor_type.name
print(tensor_info_format.format(tensor.name, shape_str, size_str, quantization_str)) logger.info(tensor_info_format.format(tensor.name, shape_str, size_str, quantization_str))
if __name__ == '__main__': if __name__ == '__main__':
if len(sys.argv) < 2: if len(sys.argv) < 2:
print("Usage: reader.py <path_to_gguf_file>") logger.info("Usage: reader.py <path_to_gguf_file>")
sys.exit(1) sys.exit(1)
gguf_file_path = sys.argv[1] gguf_file_path = sys.argv[1]
read_gguf_file(gguf_file_path) read_gguf_file(gguf_file_path)

View file

@ -1,9 +1,12 @@
from __future__ import annotations from __future__ import annotations
import logging
import sys import sys
from enum import Enum, IntEnum, auto from enum import Enum, IntEnum, auto
from typing import Any from typing import Any
logger = logging.getLogger("constants")
# #
# constants # constants
# #
@ -854,7 +857,7 @@ class GGUFValueType(IntEnum):
return GGUFValueType.INT32 return GGUFValueType.INT32
# TODO: need help with 64-bit types in Python # TODO: need help with 64-bit types in Python
else: else:
print("Unknown type:", type(val)) logger.error(f"Unknown type: {type(val)}")
sys.exit() sys.exit()

View file

@ -1,5 +1,6 @@
from __future__ import annotations from __future__ import annotations
import logging
import os import os
import shutil import shutil
import struct import struct
@ -24,6 +25,8 @@ from .constants import (
TokenType, TokenType,
) )
logger = logging.getLogger("gguf-writer")
class WriterState(Enum): class WriterState(Enum):
EMPTY = auto() EMPTY = auto()
@ -67,7 +70,7 @@ class GGUFWriter:
self.use_temp_file = use_temp_file self.use_temp_file = use_temp_file
self.temp_file = None self.temp_file = None
self.tensors = [] self.tensors = []
print("gguf: This GGUF file is for {0} Endian only".format( logger.info("gguf: This GGUF file is for {0} Endian only".format(
"Big" if self.endianess == GGUFEndian.BIG else "Little", "Big" if self.endianess == GGUFEndian.BIG else "Little",
)) ))
self.state = WriterState.EMPTY self.state = WriterState.EMPTY

View file

@ -1,13 +1,15 @@
from __future__ import annotations from __future__ import annotations
import logging
import json import json
import os import os
import sys
from pathlib import Path from pathlib import Path
from typing import Any, Callable from typing import Any, Callable
from .gguf_writer import GGUFWriter from .gguf_writer import GGUFWriter
logger = logging.getLogger("vocab")
class SpecialVocab: class SpecialVocab:
merges: list[str] merges: list[str]
@ -40,38 +42,29 @@ class SpecialVocab:
def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None: def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None:
if self.merges: if self.merges:
if not quiet: if not quiet:
print(f'gguf: Adding {len(self.merges)} merge(s).') logger.info(f'Adding {len(self.merges)} merge(s).')
gw.add_token_merges(self.merges) gw.add_token_merges(self.merges)
elif self.load_merges: elif self.load_merges:
print( logger.warning('Adding merges requested but no merges found, output may be non-functional.')
'gguf: WARNING: Adding merges requested but no merges found, output may be non-functional.',
file = sys.stderr,
)
for typ, tokid in self.special_token_ids.items(): for typ, tokid in self.special_token_ids.items():
id_handler: Callable[[int], None] | None = getattr(gw, f'add_{typ}_token_id', None) id_handler: Callable[[int], None] | None = getattr(gw, f'add_{typ}_token_id', None)
if id_handler is None: if id_handler is None:
print( logger.warning(f'No handler for special token type {typ} with id {tokid} - skipping')
f'gguf: WARNING: No handler for special token type {typ} with id {tokid} - skipping',
file = sys.stderr,
)
continue continue
if not quiet: if not quiet:
print(f'gguf: Setting special token type {typ} to {tokid}') logger.info(f'Setting special token type {typ} to {tokid}')
id_handler(tokid) id_handler(tokid)
for typ, value in self.add_special_token.items(): for typ, value in self.add_special_token.items():
add_handler: Callable[[bool], None] | None = getattr(gw, f'add_add_{typ}_token', None) add_handler: Callable[[bool], None] | None = getattr(gw, f'add_add_{typ}_token', None)
if add_handler is None: if add_handler is None:
print( logger.warning(f'No handler for add_{typ}_token with value {value} - skipping')
f'gguf: WARNING: No handler for add_{typ}_token with value {value} - skipping',
file = sys.stderr,
)
continue continue
if not quiet: if not quiet:
print(f'gguf: Setting add_{typ}_token to {value}') logger.info(f'Setting add_{typ}_token to {value}')
add_handler(value) add_handler(value)
if self.chat_template is not None: if self.chat_template is not None:
if not quiet: if not quiet:
print(f'gguf: Setting chat_template to {self.chat_template}') logger.info(f'Setting chat_template to {self.chat_template}')
gw.add_chat_template(self.chat_template) gw.add_chat_template(self.chat_template)
def _load(self, path: Path) -> None: def _load(self, path: Path) -> None:
@ -99,10 +92,7 @@ class SpecialVocab:
continue continue
parts = line.split(None, 3) parts = line.split(None, 3)
if len(parts) != 2: if len(parts) != 2:
print( logger.warning(f'{merges_file.name}: Line {line_num}: Entry malformed, ignoring')
f'gguf: WARNING: {merges_file.name}: Line {line_num}: Entry malformed, ignoring',
file = sys.stderr,
)
continue continue
merges.append(f'{parts[0]} {parts[1]}') merges.append(f'{parts[0]} {parts[1]}')
self.merges = merges self.merges = merges
@ -118,10 +108,7 @@ class SpecialVocab:
return return
self.special_token_ids[typ] = tid self.special_token_ids[typ] = tid
return return
print( logger.warning(f'Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping')
f'gguf: WARNING: Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping',
file = sys.stderr,
)
def _try_load_from_tokenizer_json(self, path: Path) -> bool: def _try_load_from_tokenizer_json(self, path: Path) -> bool:
tokenizer_file = path / 'tokenizer.json' tokenizer_file = path / 'tokenizer.json'
@ -144,10 +131,7 @@ class SpecialVocab:
if chat_template is None or isinstance(chat_template, (str, list)): if chat_template is None or isinstance(chat_template, (str, list)):
self.chat_template = chat_template self.chat_template = chat_template
else: else:
print( logger.warning(f'Bad type for chat_template field in {tokenizer_config_file!r} - ignoring')
f'gguf: WARNING: Bad type for chat_template field in {tokenizer_config_file!r} - ignoring',
file = sys.stderr
)
for typ in self.special_token_types: for typ in self.special_token_types:
add_entry = tokenizer_config.get(f'add_{typ}_token') add_entry = tokenizer_config.get(f'add_{typ}_token')
if isinstance(add_entry, bool): if isinstance(add_entry, bool):

View file

@ -1,6 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations from __future__ import annotations
import logging
import argparse import argparse
import os import os
import sys import sys
@ -14,6 +15,8 @@ if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent /
import gguf import gguf
logger = logging.getLogger("gguf-convert-endian")
def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None: def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None:
if np.uint32(1) == np.uint32(1).newbyteorder("<"): if np.uint32(1) == np.uint32(1).newbyteorder("<"):
@ -29,11 +32,11 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
else: else:
file_endian = host_endian file_endian = host_endian
order = host_endian if args.order == "native" else args.order order = host_endian if args.order == "native" else args.order
print(f"* Host is {host_endian.upper()} endian, GGUF file seems to be {file_endian.upper()} endian") logger.info(f"* Host is {host_endian.upper()} endian, GGUF file seems to be {file_endian.upper()} endian")
if file_endian == order: if file_endian == order:
print(f"* File is already {order.upper()} endian. Nothing to do.") logger.info(f"* File is already {order.upper()} endian. Nothing to do.")
sys.exit(0) sys.exit(0)
print("* Checking tensors for conversion compatibility") logger.info("* Checking tensors for conversion compatibility")
for tensor in reader.tensors: for tensor in reader.tensors:
if tensor.tensor_type not in ( if tensor.tensor_type not in (
gguf.GGMLQuantizationType.F32, gguf.GGMLQuantizationType.F32,
@ -41,38 +44,38 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
gguf.GGMLQuantizationType.Q8_0, gguf.GGMLQuantizationType.Q8_0,
): ):
raise ValueError(f"Cannot handle type {tensor.tensor_type.name} for tensor {repr(tensor.name)}") raise ValueError(f"Cannot handle type {tensor.tensor_type.name} for tensor {repr(tensor.name)}")
print(f"* Preparing to convert from {file_endian.upper()} to {order.upper()}") logger.info(f"* Preparing to convert from {file_endian.upper()} to {order.upper()}")
if args.dry_run: if args.dry_run:
return return
print("\n*** Warning *** Warning *** Warning **") logger.warning("*** Warning *** Warning *** Warning **")
print("* This conversion process may damage the file. Ensure you have a backup.") logger.warning("* This conversion process may damage the file. Ensure you have a backup.")
if order != host_endian: if order != host_endian:
print("* Requested endian differs from host, you will not be able to load the model on this machine.") logger.warning("* Requested endian differs from host, you will not be able to load the model on this machine.")
print("* The file will be modified immediately, so if conversion fails or is interrupted") logger.warning("* The file will be modified immediately, so if conversion fails or is interrupted")
print("* the file will be corrupted. Enter exactly YES if you are positive you want to proceed:") logger.warning("* the file will be corrupted. Enter exactly YES if you are positive you want to proceed:")
response = input("YES, I am sure> ") response = input("YES, I am sure> ")
if response != "YES": if response != "YES":
print("You didn't enter YES. Okay then, see ya!") logger.warning("You didn't enter YES. Okay then, see ya!")
sys.exit(0) sys.exit(0)
print(f"\n* Converting fields ({len(reader.fields)})") logger.info(f"* Converting fields ({len(reader.fields)})")
for idx, field in enumerate(reader.fields.values()): for idx, field in enumerate(reader.fields.values()):
print(f"- {idx:4}: Converting field {repr(field.name)}, part count: {len(field.parts)}") logger.info(f"- {idx:4}: Converting field {repr(field.name)}, part count: {len(field.parts)}")
for part in field.parts: for part in field.parts:
part.byteswap(inplace=True) part.byteswap(inplace=True)
print(f"\n* Converting tensors ({len(reader.tensors)})") logger.info(f"* Converting tensors ({len(reader.tensors)})")
for idx, tensor in enumerate(reader.tensors): for idx, tensor in enumerate(reader.tensors):
print( log_message = (
f" - {idx:4}: Converting tensor {repr(tensor.name)}, type={tensor.tensor_type.name}, " f" - {idx:4}: Converting tensor {repr(tensor.name)}, type={tensor.tensor_type.name}, "
f"elements={tensor.n_elements}... ", f"elements={tensor.n_elements}... "
end="",
) )
tensor_type = tensor.tensor_type tensor_type = tensor.tensor_type
for part in tensor.field.parts: for part in tensor.field.parts:
part.byteswap(inplace=True) part.byteswap(inplace=True)
if tensor_type != gguf.GGMLQuantizationType.Q8_0: if tensor_type != gguf.GGMLQuantizationType.Q8_0:
tensor.data.byteswap(inplace=True) tensor.data.byteswap(inplace=True)
print() logger.info(log_message)
continue continue
# A Q8_0 block consists of a f16 delta followed by 32 int8 quants, so 34 bytes # A Q8_0 block consists of a f16 delta followed by 32 int8 quants, so 34 bytes
block_size = 34 block_size = 34
n_blocks = len(tensor.data) // block_size n_blocks = len(tensor.data) // block_size
@ -82,10 +85,11 @@ def convert_byteorder(reader: gguf.GGUFReader, args: argparse.Namespace) -> None
delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16) delta = tensor.data[block_offs:block_offs + 2].view(dtype=np.uint16)
delta.byteswap(inplace=True) delta.byteswap(inplace=True)
if block_num % 100000 == 0: if block_num % 100000 == 0:
print(f"[{(n_blocks - block_num) // 1000}K]", end="") log_message += f"[{(n_blocks - block_num) // 1000}K]"
sys.stdout.flush()
print() logger.info(log_message)
print("* Completion")
logger.info("* Completion")
def main() -> None: def main() -> None:
@ -102,8 +106,16 @@ def main() -> None:
"--dry-run", action="store_true", "--dry-run", action="store_true",
help="Don't actually change anything", help="Don't actually change anything",
) )
parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"]) args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
print(f'* Loading: {args.model}')
if args.verbose:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
logger.info(f'* Loading: {args.model}')
reader = gguf.GGUFReader(args.model, 'r' if args.dry_run else 'r+') reader = gguf.GGUFReader(args.model, 'r' if args.dry_run else 'r+')
convert_byteorder(reader, args) convert_byteorder(reader, args)

View file

@ -1,6 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations from __future__ import annotations
import logging
import argparse import argparse
import os import os
import sys import sys
@ -15,6 +16,8 @@ if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent /
from gguf import GGUFReader, GGUFValueType # noqa: E402 from gguf import GGUFReader, GGUFValueType # noqa: E402
logger = logging.getLogger("gguf-dump")
def get_file_host_endian(reader: GGUFReader) -> tuple[str, str]: def get_file_host_endian(reader: GGUFReader) -> tuple[str, str]:
host_endian = 'LITTLE' if np.uint32(1) == np.uint32(1).newbyteorder("<") else 'BIG' host_endian = 'LITTLE' if np.uint32(1) == np.uint32(1).newbyteorder("<") else 'BIG'
@ -29,8 +32,8 @@ def get_file_host_endian(reader: GGUFReader) -> tuple[str, str]:
# please see the comments in the modify_gguf.py example. # please see the comments in the modify_gguf.py example.
def dump_metadata(reader: GGUFReader, args: argparse.Namespace) -> None: def dump_metadata(reader: GGUFReader, args: argparse.Namespace) -> None:
host_endian, file_endian = get_file_host_endian(reader) host_endian, file_endian = get_file_host_endian(reader)
print(f'* File is {file_endian} endian, script is running on a {host_endian} endian host.') logger.info(f'* File is {file_endian} endian, script is running on a {host_endian} endian host.')
print(f'\n* Dumping {len(reader.fields)} key/value pair(s)') logger.info(f'* Dumping {len(reader.fields)} key/value pair(s)')
for n, field in enumerate(reader.fields.values(), 1): for n, field in enumerate(reader.fields.values(), 1):
if not field.types: if not field.types:
pretty_type = 'N/A' pretty_type = 'N/A'
@ -39,20 +42,21 @@ def dump_metadata(reader: GGUFReader, args: argparse.Namespace) -> None:
pretty_type = '[' * nest_count + str(field.types[-1].name) + ']' * nest_count pretty_type = '[' * nest_count + str(field.types[-1].name) + ']' * nest_count
else: else:
pretty_type = str(field.types[-1].name) pretty_type = str(field.types[-1].name)
print(f' {n:5}: {pretty_type:10} | {len(field.data):8} | {field.name}', end = '')
log_message = f' {n:5}: {pretty_type:10} | {len(field.data):8} | {field.name}'
if len(field.types) == 1: if len(field.types) == 1:
curr_type = field.types[0] curr_type = field.types[0]
if curr_type == GGUFValueType.STRING: if curr_type == GGUFValueType.STRING:
print(' = {0}'.format(repr(str(bytes(field.parts[-1]), encoding='utf8')[:60])), end = '') log_message += ' = {0}'.format(repr(str(bytes(field.parts[-1]), encoding='utf8')[:60]))
elif field.types[0] in reader.gguf_scalar_to_np: elif field.types[0] in reader.gguf_scalar_to_np:
print(' = {0}'.format(field.parts[-1][0]), end = '') log_message += ' = {0}'.format(field.parts[-1][0])
print() logger.info(log_message)
if args.no_tensors: if args.no_tensors:
return return
print(f'\n* Dumping {len(reader.tensors)} tensor(s)') logger.info(f'* Dumping {len(reader.tensors)} tensor(s)')
for n, tensor in enumerate(reader.tensors, 1): for n, tensor in enumerate(reader.tensors, 1):
prettydims = ', '.join('{0:5}'.format(d) for d in list(tensor.shape) + [1] * (4 - len(tensor.shape))) prettydims = ', '.join('{0:5}'.format(d) for d in list(tensor.shape) + [1] * (4 - len(tensor.shape)))
print(f' {n:5}: {tensor.n_elements:10} | {prettydims} | {tensor.tensor_type.name:7} | {tensor.name}') logger.info(f' {n:5}: {tensor.n_elements:10} | {prettydims} | {tensor.tensor_type.name:7} | {tensor.name}')
def dump_metadata_json(reader: GGUFReader, args: argparse.Namespace) -> None: def dump_metadata_json(reader: GGUFReader, args: argparse.Namespace) -> None:
@ -103,10 +107,20 @@ def main() -> None:
parser.add_argument("--no-tensors", action="store_true", help="Don't dump tensor metadata") parser.add_argument("--no-tensors", action="store_true", help="Don't dump tensor metadata")
parser.add_argument("--json", action="store_true", help="Produce JSON output") parser.add_argument("--json", action="store_true", help="Produce JSON output")
parser.add_argument("--json-array", action="store_true", help="Include full array values in JSON output (long)") parser.add_argument("--json-array", action="store_true", help="Include full array values in JSON output (long)")
parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"]) args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
if args.verbose:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
if not args.json: if not args.json:
print(f'* Loading: {args.model}') logger.info(f'* Loading: {args.model}')
reader = GGUFReader(args.model, 'r') reader = GGUFReader(args.model, 'r')
if args.json: if args.json:
dump_metadata_json(reader, args) dump_metadata_json(reader, args)
else: else:

View file

@ -1,4 +1,5 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import logging
import argparse import argparse
import os import os
import sys import sys
@ -10,6 +11,8 @@ if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent /
from gguf import GGUFReader # noqa: E402 from gguf import GGUFReader # noqa: E402
logger = logging.getLogger("gguf-set-metadata")
def minimal_example(filename: str) -> None: def minimal_example(filename: str) -> None:
reader = GGUFReader(filename, 'r+') reader = GGUFReader(filename, 'r+')
@ -41,36 +44,33 @@ def minimal_example(filename: str) -> None:
def set_metadata(reader: GGUFReader, args: argparse.Namespace) -> None: def set_metadata(reader: GGUFReader, args: argparse.Namespace) -> None:
field = reader.get_field(args.key) field = reader.get_field(args.key)
if field is None: if field is None:
print(f'! Field {repr(args.key)} not found', file = sys.stderr) logger.error(f'! Field {repr(args.key)} not found')
sys.exit(1) sys.exit(1)
# Note that field.types is a list of types. This is because the GGUF # Note that field.types is a list of types. This is because the GGUF
# format supports arrays. For example, an array of UINT32 would # format supports arrays. For example, an array of UINT32 would
# look like [GGUFValueType.ARRAY, GGUFValueType.UINT32] # look like [GGUFValueType.ARRAY, GGUFValueType.UINT32]
handler = reader.gguf_scalar_to_np.get(field.types[0]) if field.types else None handler = reader.gguf_scalar_to_np.get(field.types[0]) if field.types else None
if handler is None: if handler is None:
print( logger.error(f'! This tool only supports changing simple values, {repr(args.key)} has unsupported type {field.types}')
f'! This tool only supports changing simple values, {repr(args.key)} has unsupported type {field.types}',
file = sys.stderr,
)
sys.exit(1) sys.exit(1)
current_value = field.parts[field.data[0]][0] current_value = field.parts[field.data[0]][0]
new_value = handler(args.value) new_value = handler(args.value)
print(f'* Preparing to change field {repr(args.key)} from {current_value} to {new_value}') logger.info(f'* Preparing to change field {repr(args.key)} from {current_value} to {new_value}')
if current_value == new_value: if current_value == new_value:
print(f'- Key {repr(args.key)} already set to requested value {current_value}') logger.info(f'- Key {repr(args.key)} already set to requested value {current_value}')
sys.exit(0) sys.exit(0)
if args.dry_run: if args.dry_run:
sys.exit(0) sys.exit(0)
if not args.force: if not args.force:
print('*** Warning *** Warning *** Warning **') logger.warning('*** Warning *** Warning *** Warning **')
print('* Changing fields in a GGUF file can make it unusable. Proceed at your own risk.') logger.warning('* Changing fields in a GGUF file can make it unusable. Proceed at your own risk.')
print('* Enter exactly YES if you are positive you want to proceed:') logger.warning('* Enter exactly YES if you are positive you want to proceed:')
response = input('YES, I am sure> ') response = input('YES, I am sure> ')
if response != 'YES': if response != 'YES':
print("You didn't enter YES. Okay then, see ya!") logger.info("You didn't enter YES. Okay then, see ya!")
sys.exit(0) sys.exit(0)
field.parts[field.data[0]][0] = new_value field.parts[field.data[0]][0] = new_value
print('* Field changed. Successful completion.') logger.info('* Field changed. Successful completion.')
def main() -> None: def main() -> None:
@ -80,8 +80,16 @@ def main() -> None:
parser.add_argument("value", type=str, help="Metadata value to set") parser.add_argument("value", type=str, help="Metadata value to set")
parser.add_argument("--dry-run", action="store_true", help="Don't actually change anything") parser.add_argument("--dry-run", action="store_true", help="Don't actually change anything")
parser.add_argument("--force", action="store_true", help="Change the field without confirmation") parser.add_argument("--force", action="store_true", help="Change the field without confirmation")
parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"]) args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"])
print(f'* Loading: {args.model}')
if args.verbose:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
logger.info(f'* Loading: {args.model}')
reader = GGUFReader(args.model, 'r' if args.dry_run else 'r+') reader = GGUFReader(args.model, 'r' if args.dry_run else 'r+')
set_metadata(reader, args) set_metadata(reader, args)

View file

@ -1,5 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import logging
import argparse import argparse
import heapq import heapq
import sys import sys
@ -11,9 +12,11 @@ try:
import git import git
from tabulate import tabulate from tabulate import tabulate
except ImportError as e: except ImportError as e:
print("ERROR: the following Python libraries are required: GitPython, tabulate.") print("the following Python libraries are required: GitPython, tabulate.") # noqa: NP100
raise e raise e
logger = logging.getLogger("compare-llama-bench")
# Properties by which to differentiate results per commit: # Properties by which to differentiate results per commit:
KEY_PROPERTIES = [ KEY_PROPERTIES = [
"cpu_info", "gpu_info", "n_gpu_layers", "main_gpu", "cuda", "opencl", "metal", "gpu_blas", "cpu_info", "gpu_info", "n_gpu_layers", "main_gpu", "cuda", "opencl", "metal", "gpu_blas",
@ -94,8 +97,7 @@ parser.add_argument("-s", "--show", help=help_s)
known_args, unknown_args = parser.parse_known_args() known_args, unknown_args = parser.parse_known_args()
if unknown_args: if unknown_args:
print(f"ERROR: Received unknown args: {unknown_args}.") logger.error(f"Received unknown args: {unknown_args}.")
print()
parser.print_help() parser.print_help()
sys.exit(1) sys.exit(1)
@ -108,8 +110,7 @@ if input_file is None:
input_file = sqlite_files[0] input_file = sqlite_files[0]
if input_file is None: if input_file is None:
print("ERROR: Cannot find a suitable input file, please provide one.") logger.error("Cannot find a suitable input file, please provide one.")
print()
parser.print_help() parser.print_help()
sys.exit(1) sys.exit(1)
@ -194,23 +195,19 @@ if known_args.baseline is not None:
hexsha8_baseline = get_commit_hexsha8(known_args.baseline) hexsha8_baseline = get_commit_hexsha8(known_args.baseline)
name_baseline = known_args.baseline name_baseline = known_args.baseline
if hexsha8_baseline is None: if hexsha8_baseline is None:
print(f"ERROR: cannot find data for baseline={known_args.baseline}.") logger.error(f"cannot find data for baseline={known_args.baseline}.")
sys.exit(1) sys.exit(1)
# Otherwise, search for the most recent parent of master for which there is data: # Otherwise, search for the most recent parent of master for which there is data:
elif repo is not None: elif repo is not None:
hexsha8_baseline = find_parent_in_data(repo.heads.master.commit) hexsha8_baseline = find_parent_in_data(repo.heads.master.commit)
if hexsha8_baseline is None: if hexsha8_baseline is None:
print("ERROR: No baseline was provided and did not find data for any master branch commits.") logger.error("No baseline was provided and did not find data for any master branch commits.")
print()
parser.print_help() parser.print_help()
sys.exit(1) sys.exit(1)
else: else:
print( logger.error("No baseline was provided and the current working directory "
"ERROR: No baseline was provided and the current working directory " "is not part of a git repository from which a baseline could be inferred.")
"is not part of a git repository from which a baseline could be inferred."
)
print()
parser.print_help() parser.print_help()
sys.exit(1) sys.exit(1)
@ -227,7 +224,7 @@ if known_args.compare is not None:
hexsha8_compare = get_commit_hexsha8(known_args.compare) hexsha8_compare = get_commit_hexsha8(known_args.compare)
name_compare = known_args.compare name_compare = known_args.compare
if hexsha8_compare is None: if hexsha8_compare is None:
print(f"ERROR: cannot find data for compare={known_args.compare}.") logger.error(f"cannot find data for compare={known_args.compare}.")
sys.exit(1) sys.exit(1)
# Otherwise, search for the commit for llama-bench was most recently run # Otherwise, search for the commit for llama-bench was most recently run
# and that is not a parent of master: # and that is not a parent of master:
@ -241,16 +238,12 @@ elif repo is not None:
break break
if hexsha8_compare is None: if hexsha8_compare is None:
print("ERROR: No compare target was provided and did not find data for any non-master commits.") logger.error("No compare target was provided and did not find data for any non-master commits.")
print()
parser.print_help() parser.print_help()
sys.exit(1) sys.exit(1)
else: else:
print( logger.error("No compare target was provided and the current working directory "
"ERROR: No compare target was provided and the current working directory " "is not part of a git repository from which a compare target could be inferred.")
"is not part of a git repository from which a compare target could be inferred."
)
print()
parser.print_help() parser.print_help()
sys.exit(1) sys.exit(1)
@ -284,8 +277,7 @@ if known_args.show is not None:
if prop not in KEY_PROPERTIES[:-2]: # Last two values are n_prompt, n_gen. if prop not in KEY_PROPERTIES[:-2]: # Last two values are n_prompt, n_gen.
unknown_cols.append(prop) unknown_cols.append(prop)
if unknown_cols: if unknown_cols:
print(f"ERROR: Unknown values for --show: {', '.join(unknown_cols)}") logger.error(f"Unknown values for --show: {', '.join(unknown_cols)}")
print()
parser.print_usage() parser.print_usage()
sys.exit(1) sys.exit(1)
rows_show = get_rows(show) rows_show = get_rows(show)
@ -369,7 +361,7 @@ if "gpu_info" in show:
headers = [PRETTY_NAMES[p] for p in show] headers = [PRETTY_NAMES[p] for p in show]
headers += ["Test", f"t/s {name_baseline}", f"t/s {name_compare}", "Speedup"] headers += ["Test", f"t/s {name_baseline}", f"t/s {name_compare}", "Speedup"]
print(tabulate( logger.info(tabulate(
table, table,
headers=headers, headers=headers,
floatfmt=".2f", floatfmt=".2f",

View file

@ -1,5 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import logging
import argparse import argparse
import os import os
import subprocess import subprocess
@ -7,6 +8,8 @@ import sys
import yaml import yaml
logger = logging.getLogger("run-with-preset")
CLI_ARGS_MAIN_PERPLEXITY = [ CLI_ARGS_MAIN_PERPLEXITY = [
"batch-size", "cfg-negative-prompt", "cfg-scale", "chunks", "color", "ctx-size", "escape", "batch-size", "cfg-negative-prompt", "cfg-scale", "chunks", "color", "ctx-size", "escape",
"export", "file", "frequency-penalty", "grammar", "grammar-file", "hellaswag", "export", "file", "frequency-penalty", "grammar", "grammar-file", "hellaswag",
@ -56,6 +59,7 @@ parser.add_argument("-bin", "--binary", help="The binary to run.")
parser.add_argument("yaml_files", nargs="*", parser.add_argument("yaml_files", nargs="*",
help="Arbitrary number of YAML files from which to read preset values. " help="Arbitrary number of YAML files from which to read preset values. "
"If two files specify the same values the later one will be used.") "If two files specify the same values the later one will be used.")
parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
known_args, unknown_args = parser.parse_known_args() known_args, unknown_args = parser.parse_known_args()
@ -63,6 +67,11 @@ if not known_args.yaml_files and not unknown_args:
parser.print_help() parser.print_help()
sys.exit(0) sys.exit(0)
if known_args.verbose:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
props = dict() props = dict()
for yaml_file in known_args.yaml_files: for yaml_file in known_args.yaml_files:
@ -85,7 +94,7 @@ elif binary.lower().endswith("llama-bench"):
elif binary.lower().endswith("server"): elif binary.lower().endswith("server"):
cli_args = CLI_ARGS_SERVER cli_args = CLI_ARGS_SERVER
else: else:
print(f"Unknown binary: {binary}") logger.error(f"Unknown binary: {binary}")
sys.exit(1) sys.exit(1)
command_list = [binary] command_list = [binary]
@ -121,11 +130,11 @@ for cli_arg in cli_args:
num_unused = len(props) num_unused = len(props)
if num_unused > 10: if num_unused > 10:
print(f"The preset file contained a total of {num_unused} unused properties.") logger.info(f"The preset file contained a total of {num_unused} unused properties.")
elif num_unused > 0: elif num_unused > 0:
print("The preset file contained the following unused properties:") logger.info("The preset file contained the following unused properties:")
for prop, value in props.items(): for prop, value in props.items():
print(f" {prop}: {value}") logger.info(f" {prop}: {value}")
command_list += unknown_args command_list += unknown_args

View file

@ -1,8 +1,11 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import logging
import os import os
import hashlib import hashlib
logger = logging.getLogger("verify-checksum-models")
def sha256sum(file): def sha256sum(file):
block_size = 16 * 1024 * 1024 # 16 MB block size block_size = 16 * 1024 * 1024 # 16 MB block size
@ -27,7 +30,7 @@ hash_list_file = os.path.join(llama_path, "SHA256SUMS")
# Check if the hash list file exists # Check if the hash list file exists
if not os.path.exists(hash_list_file): if not os.path.exists(hash_list_file):
print(f"Hash list file not found: {hash_list_file}") logger.error(f"Hash list file not found: {hash_list_file}")
exit(1) exit(1)
# Read the hash file content and split it into an array of lines # Read the hash file content and split it into an array of lines
@ -46,7 +49,7 @@ for line in hash_list:
file_path = os.path.join(llama_path, filename) file_path = os.path.join(llama_path, filename)
# Informing user of the progress of the integrity check # Informing user of the progress of the integrity check
print(f"Verifying the checksum of {file_path}") logger.info(f"Verifying the checksum of {file_path}")
# Check if the file exists # Check if the file exists
if os.path.exists(file_path): if os.path.exists(file_path):
@ -73,9 +76,9 @@ for line in hash_list:
# Print column headers for results table # Print column headers for results table
print("\n" + "filename".ljust(40) + "valid checksum".center(20) + "file missing".center(20)) logger.info("filename".ljust(40) + "valid checksum".center(20) + "file missing".center(20))
print("-" * 80) logger.info("-" * 80)
# Output the results as a table # Output the results as a table
for r in results: for r in results:
print(f"{r['filename']:40} {r['valid checksum']:^20} {r['file missing']:^20}") logger.info(f"{r['filename']:40} {r['valid checksum']:^20} {r['file missing']:^20}")

View file

@ -7,14 +7,23 @@
# python3 tests/test-tokenizer-0-bpe.py ~/Data/huggingface/deepseek-coder-6.7b-instruct/ # python3 tests/test-tokenizer-0-bpe.py ~/Data/huggingface/deepseek-coder-6.7b-instruct/
# #
import logging
import argparse import argparse
from transformers import AutoTokenizer from transformers import AutoTokenizer
logger = logging.getLogger("convert")
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("dir_tokenizer", help="directory containing 'tokenizer.model' file") parser.add_argument("dir_tokenizer", help="directory containing 'tokenizer.model' file")
parser.add_argument("--fname-tok", help="path to a text file to tokenize") parser.add_argument("--fname-tok", help="path to a text file to tokenize")
parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
args = parser.parse_args() args = parser.parse_args()
if args.verbose:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
dir_tokenizer = args.dir_tokenizer dir_tokenizer = args.dir_tokenizer
@ -64,30 +73,34 @@ tests = [
] ]
for text in tests: for text in tests:
print('text: ', text) logger.info(f"text: {text}")
print(tokenizer.encode(text)) logger.info(tokenizer.encode(text))
print(tokenizer.decode(tokenizer.encode(text))) logger.info(tokenizer.decode(tokenizer.encode(text)))
print("\n\ntests for C++:\n") logger.info("tests for C++:")
for text in tests: for text in tests:
res = tokenizer.encode(text) res = tokenizer.encode(text)
# Modify text representation for logging
k = text.replace('\n', '\\n') k = text.replace('\n', '\\n')
k = k.replace('\t', '\\t') k = k.replace('\t', '\\t')
k = '"' + k + '"' k = '"' + k + '"'
print("{ %-24s, { " % k, end='')
for x in res:
print("%7d," % x, end='')
print(" }, },")
print(tokenizer.encode('hello')) # Log the modified text and its encoding
print(tokenizer.encode('world')) log_message = "{ %-24s, { " % k
print(tokenizer.encode(' world')) for x in res:
print(tokenizer.encode('hello world')) log_message += "%7d," % x
log_message += " }, },"
logger.info(log_message)
logger.info(tokenizer.encode('hello'))
logger.info(tokenizer.encode('world'))
logger.info(tokenizer.encode(' world'))
logger.info(tokenizer.encode('hello world'))
fname_tok = args.fname_tok fname_tok = args.fname_tok
if fname_tok: if fname_tok:
print('tokenizing file: ', fname_tok) logger.info(f"tokenizing file: {fname_tok}")
fname_out = fname_tok + '.tok' fname_out = fname_tok + '.tok'
with open(fname_tok, 'r', encoding='utf-8') as f: with open(fname_tok, 'r', encoding='utf-8') as f:
lines = f.readlines() lines = f.readlines()
@ -112,6 +125,6 @@ if fname_tok:
# else: # else:
# f.write(str(x) + ' \'' + tokenizer.decode(x) + '\'\n') # f.write(str(x) + ' \'' + tokenizer.decode(x) + '\'\n')
f.write(str(x) + ' \'' + tokenizer.decode(x).strip() + '\'\n') f.write(str(x) + ' \'' + tokenizer.decode(x).strip() + '\'\n')
print('len(res): ', len(res)) logger.info(f"len(res): {len(res)}")
print('len(lines): ', len(lines)) logger.info(f"len(lines): {len(lines)}")
print('results written to: ', fname_out) logger.info(f"results written to: {fname_out}")

View file

@ -7,15 +7,25 @@
# #
import logging
import argparse import argparse
from sentencepiece import SentencePieceProcessor from sentencepiece import SentencePieceProcessor
logger = logging.getLogger("test-tokenizer-0-llama")
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("dir_tokenizer", help="directory containing 'tokenizer.model' file") parser.add_argument("dir_tokenizer", help="directory containing 'tokenizer.model' file")
parser.add_argument("--fname-tok", help="path to a text file to tokenize") parser.add_argument("--fname-tok", help="path to a text file to tokenize")
parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
args = parser.parse_args() args = parser.parse_args()
if args.verbose:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
dir_tokenizer = args.dir_tokenizer dir_tokenizer = args.dir_tokenizer
tokenizer = SentencePieceProcessor(dir_tokenizer + '/tokenizer.model') tokenizer = SentencePieceProcessor(dir_tokenizer + '/tokenizer.model')
@ -65,41 +75,46 @@ tests = [
for text in tests: for text in tests:
print('text: ', text) message_log = (f"text: {text}\n"
print('\nwith bos:') "with bos:\n"
print(tokenizer.encode(text, add_bos=True)) f"{tokenizer.encode(text, add_bos=True)}\n"
print(tokenizer.decode(tokenizer.encode(text, add_bos=True))) f"{tokenizer.decode(tokenizer.encode(text, add_bos=True))}\n"
print('\nwithout bos:') "without bos:\n"
print(tokenizer.encode(text, add_bos=False)) f"{tokenizer.encode(text, add_bos=False)}\n"
print(tokenizer.decode(tokenizer.encode(text, add_bos=False))) f"{tokenizer.decode(tokenizer.encode(text, add_bos=False))}\n")
logger.info(message_log)
print("'" + tokenizer.id_to_piece(15043) + "'") # '_Hello' logger.info(f"'{tokenizer.id_to_piece(15043)}'") # '_Hello'
print("'" + tokenizer.id_to_piece(29871) + "'") # '_' logger.info(f"'{tokenizer.id_to_piece(29871)}'") # '_'
print("'" + tokenizer.decode([15043]) + "'") # 'Hello' logger.info(f"'{tokenizer.decode([15043])}'") # 'Hello'
print("'" + tokenizer.decode([15043, 15043]) + "'") # 'Hello Hello' logger.info(f"'{tokenizer.decode([15043, 15043])}'") # 'Hello Hello'
print("'" + tokenizer.decode([29871, 15043]) + "'") # ' Hello' logger.info(f"'{tokenizer.decode([29871, 15043])}'") # ' Hello'
print("'" + tokenizer.decode([29871, 15043, 29871, 15043]) + "'") # ' Hello Hello' logger.info(f"'{tokenizer.decode([29871, 15043, 29871, 15043])}'") # ' Hello Hello'
print("\n\ntests for C++:\n") logger.info("\n\ntests for C++:\n")
for text in tests: for text in tests:
res = tokenizer.encode(text, add_bos=False) res = tokenizer.encode(text, add_bos=False)
# Modify text representation for logging
k = text.replace('\n', '\\n') k = text.replace('\n', '\\n')
k = k.replace('\t', '\\t') k = k.replace('\t', '\\t')
k = '"' + k + '"' k = '"' + k + '"'
print("{ %-24s, { " % k, end='')
for x in res:
print("%7d," % x, end='')
print(" }, },")
print(tokenizer.encode('hello')) # Log the modified text and its encoding
print(tokenizer.encode('world')) log_message = "{ %-24s, { " % k
print(tokenizer.encode(' world')) for x in res:
print(tokenizer.encode('hello world')) log_message += "%7d," % x
log_message += " }, },"
logger.info(log_message)
logger.info(tokenizer.encode('hello'))
logger.info(tokenizer.encode('world'))
logger.info(tokenizer.encode(' world'))
logger.info(tokenizer.encode('hello world'))
fname_tok = args.fname_tok fname_tok = args.fname_tok
if fname_tok: if fname_tok:
print('tokenizing file: ', fname_tok) logger.info(f"tokenizing file: {fname_tok}")
fname_out = fname_tok + '.tok' fname_out = fname_tok + '.tok'
with open(fname_tok, 'r', encoding='utf-8') as f: with open(fname_tok, 'r', encoding='utf-8') as f:
lines = f.readlines() lines = f.readlines()
@ -109,6 +124,6 @@ if fname_tok:
with open(fname_out, 'w', encoding='utf-8') as f: with open(fname_out, 'w', encoding='utf-8') as f:
for x in res: for x in res:
f.write(str(x) + ' \'' + tokenizer.decode(x) + '\'\n') f.write(str(x) + ' \'' + tokenizer.decode(x) + '\'\n')
print('len(res): ', len(res)) logger.info(f"len(res): {len(res)}")
print('len(lines): ', len(lines)) logger.info(f"len(lines): {len(lines)}")
print('results written to: ', fname_out) logger.info(f"results written to: {fname_out}")