Cleanups, better output during conversion

This commit is contained in:
KerfuffleV2 2023-08-20 10:26:43 -06:00
parent 8afc1ef312
commit f7e61fd1a9

View file

@ -123,83 +123,103 @@ class GGMLV3Model:
self.tensor_map = tensor_map self.tensor_map = tensor_map
return offset return offset
def save_gguf(ggml_model, data, cfg): class GGMLToGGUF:
hp = ggml_model.hyperparameters def __init__(self, ggml_model, data, cfg):
ff_tensor_idx = ggml_model.tensor_map.get(b'layers.0.feed_forward.w1.weight') hp = ggml_model.hyperparameters
assert ff_tensor_idx is not None, 'Missing layer 0 FF tensor' self.model = ggml_model
ff_tensor = ggml_model.tensors[ff_tensor_idx] self.data = data
if cfg.gqa == 1: self.cfg = cfg
n_kv_head = hp.n_head ff_tensor_idx = ggml_model.tensor_map.get(b'layers.0.feed_forward.w1.weight')
else: assert ff_tensor_idx is not None, 'Missing layer 0 FF tensor'
gqa = float(cfg.gqa) ff_tensor = ggml_model.tensors[ff_tensor_idx]
n_kv_head = None self.ff_length = ff_tensor.dims[1]
for x in range(1, 256): if cfg.gqa == 1:
if float(hp.n_head) / float(x) == gqa: n_kv_head = hp.n_head
n_kv_head = x else:
assert n_kv_head is not None, "Couldn't determine n_kv_head from GQA param" gqa = float(cfg.gqa)
print(f'- Guessed n_kv_head = {n_kv_head} based on GQA {cfg.gqa}') n_kv_head = None
nm = gguf.get_tensor_name_map(gguf.MODEL_ARCH.LLAMA, hp.n_layer) for x in range(1, 256):
gguf_writer = gguf.GGUFWriter(cfg.output, gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA], use_temp_file = False) if float(hp.n_head) / float(x) == gqa:
#gguf_writer.add_name('meep') n_kv_head = x
#gguf_writer.add_source_hf_repo('merp') assert n_kv_head is not None, "Couldn't determine n_kv_head from GQA param"
# gguf_writer.add_tensor_data_layout("Meta AI original pth") print(f'- Guessed n_kv_head = {n_kv_head} based on GQA {cfg.gqa}')
gguf_writer.add_context_length(cfg.context_length) self.n_kv_head = n_kv_head
gguf_writer.add_embedding_length(hp.n_embd) self.name_map = gguf.get_tensor_name_map(gguf.MODEL_ARCH.LLAMA, ggml_model.hyperparameters.n_layer)
gguf_writer.add_block_count(hp.n_layer)
gguf_writer.add_feed_forward_length(ff_tensor.dims[1]) def save(self):
print('FF dim', ff_tensor.dims[1]) print('* Preparing to save GGUF file')
gguf_writer.add_rope_dimension_count(hp.n_embd // hp.n_head) gguf_writer = gguf.GGUFWriter(self.cfg.output, gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA], use_temp_file = False)
gguf_writer.add_head_count(hp.n_head) self.add_params(gguf_writer)
gguf_writer.add_head_count_kv(n_kv_head) self.add_vocab(gguf_writer)
gguf_writer.add_layer_norm_rms_eps(float(cfg.eps)) self.add_tensors(gguf_writer)
gguf_writer.add_tokenizer_model('llama') print(" gguf: write header")
tokens = [] gguf_writer.write_header_to_file()
scores = [] print(" gguf: write metadata")
print(f'* Adding {hp.n_vocab} vocab item(s)') gguf_writer.write_kv_data_to_file()
toktypes = [] print(" gguf: write tensors")
for (tokid, (vbytes, vscore)) in enumerate(ggml_model.vocab.items): gguf_writer.write_tensors_to_file()
if len(vbytes) > 1 and vbytes[0] == 32: gguf_writer.close()
vbytes = vbytes.replace(b' ', b'\xe2\x96\x81')
tt = 1 def add_params(self, gguf_writer):
if len(vbytes) == 0: hp = self.model.hyperparameters
tt = 3 cfg = self.cfg
elif tokid >= 3 and tokid <= 258 and len(vbytes) == 1: print('* Adding model parameters and KV items')
hv = hex(vbytes[0])[2:].upper() gguf_writer.add_context_length(cfg.context_length)
vbytes = bytes(f'<0x{hv}>', encoding = 'UTF-8') gguf_writer.add_embedding_length(hp.n_embd)
tt = 6 gguf_writer.add_block_count(hp.n_layer)
toktypes.append(tt) gguf_writer.add_feed_forward_length(self.ff_length)
tokens.append(vbytes) gguf_writer.add_rope_dimension_count(hp.n_embd // hp.n_head)
scores.append(vscore) gguf_writer.add_head_count(hp.n_head)
gguf_writer.add_token_list(tokens) gguf_writer.add_head_count_kv(self.n_kv_head)
gguf_writer.add_token_scores(scores) gguf_writer.add_layer_norm_rms_eps(float(cfg.eps))
gguf_writer.add_token_types(toktypes) gguf_writer.add_tokenizer_model('llama')
print('* Adding tensors')
for tensor in ggml_model.tensors: def add_vocab(self, gguf_writer):
name = str(tensor.name, 'UTF-8') hp = self.model.hyperparameters
if name.endswith('.weight'): tokens = []
name = name[:-7] scores = []
suffix = '.weight' print(f'* Adding {hp.n_vocab} vocab item(s)')
elif name.endswith('.bias'): toktypes = []
name = name[:-5] for (tokid, (vbytes, vscore)) in enumerate(self.model.vocab.items):
suffix = '.bias' tt = 1
mapped_name = nm.get(name) if len(vbytes) > 1 and vbytes[0] == 32:
assert mapped_name is not None, f'Bad name {name}' vbytes = vbytes.replace(b' ', b'\xe2\x96\x81')
mapped_name += suffix elif len(vbytes) == 0:
tempdims = list(tensor.dims[:]) tt = 3
if len(tempdims) > 1: elif tokid >= 3 and tokid <= 258 and len(vbytes) == 1:
temp = tempdims[1] hv = hex(vbytes[0])[2:].upper()
tempdims[1] = tempdims[0] vbytes = bytes(f'<0x{hv}>', encoding = 'UTF-8')
tempdims[0] = temp tt = 6
print(f'+ {tensor.name} | {mapped_name} {tensor.dims} :: {tempdims}') toktypes.append(tt)
gguf_writer.add_tensor(mapped_name, data[tensor.start_offset:tensor.start_offset + tensor.len_bytes], raw_shape = tempdims, raw_dtype = tensor.dtype) tokens.append(vbytes)
print("gguf: write header") scores.append(vscore)
gguf_writer.write_header_to_file() gguf_writer.add_token_list(tokens)
print("gguf: write metadata") gguf_writer.add_token_scores(scores)
gguf_writer.write_kv_data_to_file() gguf_writer.add_token_types(toktypes)
print("gguf: write tensors")
gguf_writer.write_tensors_to_file() def add_tensors(self, gguf_writer):
nm = self.name_map
data = self.data
print(f'* Adding {len(self.model.tensors)} tensor(s)')
for tensor in self.model.tensors:
name = str(tensor.name, 'UTF-8')
if name.endswith('.weight'):
name = name[:-7]
suffix = '.weight'
elif name.endswith('.bias'):
name = name[:-5]
suffix = '.bias'
mapped_name = nm.get(name)
assert mapped_name is not None, f'Bad name {name}'
mapped_name += suffix
tempdims = list(tensor.dims[:])
if len(tempdims) > 1:
temp = tempdims[1]
tempdims[1] = tempdims[0]
tempdims[0] = temp
# print(f'+ {tensor.name} | {mapped_name} {tensor.dims} :: {tempdims}')
gguf_writer.add_tensor(mapped_name, data[tensor.start_offset:tensor.start_offset + tensor.len_bytes], raw_shape = tempdims, raw_dtype = tensor.dtype)
gguf_writer.close()
def handle_args(): def handle_args():
parser = argparse.ArgumentParser(description = 'Convert GGMLv3 models to GGUF') parser = argparse.ArgumentParser(description = 'Convert GGMLv3 models to GGUF')
@ -212,12 +232,15 @@ def handle_args():
def main(): def main():
cfg = handle_args() cfg = handle_args()
print(f'* Using config: {cfg}')
print('\n=== WARNING === Be aware that this conversion script is best-effort. Use a native GGUF model if possible. === WARNING ===\n')
data = np.memmap(cfg.input, mode = 'r') data = np.memmap(cfg.input, mode = 'r')
model = GGMLV3Model() model = GGMLV3Model()
print('* Scanning GGML input file')
offset = model.load(data, 0) offset = model.load(data, 0)
print(model.hyperparameters) print(model.hyperparameters)
# print(model.vocab.items) converter = GGMLToGGUF(model, data, cfg)
# return converter.save()
save_gguf(model, data, cfg) print(f'* Successful completion. Output saved to: {cfg.output}')
main() main()