Cleanups, better output during conversion
This commit is contained in:
parent
8afc1ef312
commit
f7e61fd1a9
1 changed files with 102 additions and 79 deletions
|
@ -123,83 +123,103 @@ class GGMLV3Model:
|
||||||
self.tensor_map = tensor_map
|
self.tensor_map = tensor_map
|
||||||
return offset
|
return offset
|
||||||
|
|
||||||
def save_gguf(ggml_model, data, cfg):
|
class GGMLToGGUF:
|
||||||
hp = ggml_model.hyperparameters
|
def __init__(self, ggml_model, data, cfg):
|
||||||
ff_tensor_idx = ggml_model.tensor_map.get(b'layers.0.feed_forward.w1.weight')
|
hp = ggml_model.hyperparameters
|
||||||
assert ff_tensor_idx is not None, 'Missing layer 0 FF tensor'
|
self.model = ggml_model
|
||||||
ff_tensor = ggml_model.tensors[ff_tensor_idx]
|
self.data = data
|
||||||
if cfg.gqa == 1:
|
self.cfg = cfg
|
||||||
n_kv_head = hp.n_head
|
ff_tensor_idx = ggml_model.tensor_map.get(b'layers.0.feed_forward.w1.weight')
|
||||||
else:
|
assert ff_tensor_idx is not None, 'Missing layer 0 FF tensor'
|
||||||
gqa = float(cfg.gqa)
|
ff_tensor = ggml_model.tensors[ff_tensor_idx]
|
||||||
n_kv_head = None
|
self.ff_length = ff_tensor.dims[1]
|
||||||
for x in range(1, 256):
|
if cfg.gqa == 1:
|
||||||
if float(hp.n_head) / float(x) == gqa:
|
n_kv_head = hp.n_head
|
||||||
n_kv_head = x
|
else:
|
||||||
assert n_kv_head is not None, "Couldn't determine n_kv_head from GQA param"
|
gqa = float(cfg.gqa)
|
||||||
print(f'- Guessed n_kv_head = {n_kv_head} based on GQA {cfg.gqa}')
|
n_kv_head = None
|
||||||
nm = gguf.get_tensor_name_map(gguf.MODEL_ARCH.LLAMA, hp.n_layer)
|
for x in range(1, 256):
|
||||||
gguf_writer = gguf.GGUFWriter(cfg.output, gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA], use_temp_file = False)
|
if float(hp.n_head) / float(x) == gqa:
|
||||||
#gguf_writer.add_name('meep')
|
n_kv_head = x
|
||||||
#gguf_writer.add_source_hf_repo('merp')
|
assert n_kv_head is not None, "Couldn't determine n_kv_head from GQA param"
|
||||||
# gguf_writer.add_tensor_data_layout("Meta AI original pth")
|
print(f'- Guessed n_kv_head = {n_kv_head} based on GQA {cfg.gqa}')
|
||||||
gguf_writer.add_context_length(cfg.context_length)
|
self.n_kv_head = n_kv_head
|
||||||
gguf_writer.add_embedding_length(hp.n_embd)
|
self.name_map = gguf.get_tensor_name_map(gguf.MODEL_ARCH.LLAMA, ggml_model.hyperparameters.n_layer)
|
||||||
gguf_writer.add_block_count(hp.n_layer)
|
|
||||||
gguf_writer.add_feed_forward_length(ff_tensor.dims[1])
|
def save(self):
|
||||||
print('FF dim', ff_tensor.dims[1])
|
print('* Preparing to save GGUF file')
|
||||||
gguf_writer.add_rope_dimension_count(hp.n_embd // hp.n_head)
|
gguf_writer = gguf.GGUFWriter(self.cfg.output, gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA], use_temp_file = False)
|
||||||
gguf_writer.add_head_count(hp.n_head)
|
self.add_params(gguf_writer)
|
||||||
gguf_writer.add_head_count_kv(n_kv_head)
|
self.add_vocab(gguf_writer)
|
||||||
gguf_writer.add_layer_norm_rms_eps(float(cfg.eps))
|
self.add_tensors(gguf_writer)
|
||||||
gguf_writer.add_tokenizer_model('llama')
|
print(" gguf: write header")
|
||||||
tokens = []
|
gguf_writer.write_header_to_file()
|
||||||
scores = []
|
print(" gguf: write metadata")
|
||||||
print(f'* Adding {hp.n_vocab} vocab item(s)')
|
gguf_writer.write_kv_data_to_file()
|
||||||
toktypes = []
|
print(" gguf: write tensors")
|
||||||
for (tokid, (vbytes, vscore)) in enumerate(ggml_model.vocab.items):
|
gguf_writer.write_tensors_to_file()
|
||||||
if len(vbytes) > 1 and vbytes[0] == 32:
|
gguf_writer.close()
|
||||||
vbytes = vbytes.replace(b' ', b'\xe2\x96\x81')
|
|
||||||
tt = 1
|
def add_params(self, gguf_writer):
|
||||||
if len(vbytes) == 0:
|
hp = self.model.hyperparameters
|
||||||
tt = 3
|
cfg = self.cfg
|
||||||
elif tokid >= 3 and tokid <= 258 and len(vbytes) == 1:
|
print('* Adding model parameters and KV items')
|
||||||
hv = hex(vbytes[0])[2:].upper()
|
gguf_writer.add_context_length(cfg.context_length)
|
||||||
vbytes = bytes(f'<0x{hv}>', encoding = 'UTF-8')
|
gguf_writer.add_embedding_length(hp.n_embd)
|
||||||
tt = 6
|
gguf_writer.add_block_count(hp.n_layer)
|
||||||
toktypes.append(tt)
|
gguf_writer.add_feed_forward_length(self.ff_length)
|
||||||
tokens.append(vbytes)
|
gguf_writer.add_rope_dimension_count(hp.n_embd // hp.n_head)
|
||||||
scores.append(vscore)
|
gguf_writer.add_head_count(hp.n_head)
|
||||||
gguf_writer.add_token_list(tokens)
|
gguf_writer.add_head_count_kv(self.n_kv_head)
|
||||||
gguf_writer.add_token_scores(scores)
|
gguf_writer.add_layer_norm_rms_eps(float(cfg.eps))
|
||||||
gguf_writer.add_token_types(toktypes)
|
gguf_writer.add_tokenizer_model('llama')
|
||||||
print('* Adding tensors')
|
|
||||||
for tensor in ggml_model.tensors:
|
def add_vocab(self, gguf_writer):
|
||||||
name = str(tensor.name, 'UTF-8')
|
hp = self.model.hyperparameters
|
||||||
if name.endswith('.weight'):
|
tokens = []
|
||||||
name = name[:-7]
|
scores = []
|
||||||
suffix = '.weight'
|
print(f'* Adding {hp.n_vocab} vocab item(s)')
|
||||||
elif name.endswith('.bias'):
|
toktypes = []
|
||||||
name = name[:-5]
|
for (tokid, (vbytes, vscore)) in enumerate(self.model.vocab.items):
|
||||||
suffix = '.bias'
|
tt = 1
|
||||||
mapped_name = nm.get(name)
|
if len(vbytes) > 1 and vbytes[0] == 32:
|
||||||
assert mapped_name is not None, f'Bad name {name}'
|
vbytes = vbytes.replace(b' ', b'\xe2\x96\x81')
|
||||||
mapped_name += suffix
|
elif len(vbytes) == 0:
|
||||||
tempdims = list(tensor.dims[:])
|
tt = 3
|
||||||
if len(tempdims) > 1:
|
elif tokid >= 3 and tokid <= 258 and len(vbytes) == 1:
|
||||||
temp = tempdims[1]
|
hv = hex(vbytes[0])[2:].upper()
|
||||||
tempdims[1] = tempdims[0]
|
vbytes = bytes(f'<0x{hv}>', encoding = 'UTF-8')
|
||||||
tempdims[0] = temp
|
tt = 6
|
||||||
print(f'+ {tensor.name} | {mapped_name} {tensor.dims} :: {tempdims}')
|
toktypes.append(tt)
|
||||||
gguf_writer.add_tensor(mapped_name, data[tensor.start_offset:tensor.start_offset + tensor.len_bytes], raw_shape = tempdims, raw_dtype = tensor.dtype)
|
tokens.append(vbytes)
|
||||||
print("gguf: write header")
|
scores.append(vscore)
|
||||||
gguf_writer.write_header_to_file()
|
gguf_writer.add_token_list(tokens)
|
||||||
print("gguf: write metadata")
|
gguf_writer.add_token_scores(scores)
|
||||||
gguf_writer.write_kv_data_to_file()
|
gguf_writer.add_token_types(toktypes)
|
||||||
print("gguf: write tensors")
|
|
||||||
gguf_writer.write_tensors_to_file()
|
def add_tensors(self, gguf_writer):
|
||||||
|
nm = self.name_map
|
||||||
|
data = self.data
|
||||||
|
print(f'* Adding {len(self.model.tensors)} tensor(s)')
|
||||||
|
for tensor in self.model.tensors:
|
||||||
|
name = str(tensor.name, 'UTF-8')
|
||||||
|
if name.endswith('.weight'):
|
||||||
|
name = name[:-7]
|
||||||
|
suffix = '.weight'
|
||||||
|
elif name.endswith('.bias'):
|
||||||
|
name = name[:-5]
|
||||||
|
suffix = '.bias'
|
||||||
|
mapped_name = nm.get(name)
|
||||||
|
assert mapped_name is not None, f'Bad name {name}'
|
||||||
|
mapped_name += suffix
|
||||||
|
tempdims = list(tensor.dims[:])
|
||||||
|
if len(tempdims) > 1:
|
||||||
|
temp = tempdims[1]
|
||||||
|
tempdims[1] = tempdims[0]
|
||||||
|
tempdims[0] = temp
|
||||||
|
# print(f'+ {tensor.name} | {mapped_name} {tensor.dims} :: {tempdims}')
|
||||||
|
gguf_writer.add_tensor(mapped_name, data[tensor.start_offset:tensor.start_offset + tensor.len_bytes], raw_shape = tempdims, raw_dtype = tensor.dtype)
|
||||||
|
|
||||||
gguf_writer.close()
|
|
||||||
|
|
||||||
def handle_args():
|
def handle_args():
|
||||||
parser = argparse.ArgumentParser(description = 'Convert GGMLv3 models to GGUF')
|
parser = argparse.ArgumentParser(description = 'Convert GGMLv3 models to GGUF')
|
||||||
|
@ -212,12 +232,15 @@ def handle_args():
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
cfg = handle_args()
|
cfg = handle_args()
|
||||||
|
print(f'* Using config: {cfg}')
|
||||||
|
print('\n=== WARNING === Be aware that this conversion script is best-effort. Use a native GGUF model if possible. === WARNING ===\n')
|
||||||
data = np.memmap(cfg.input, mode = 'r')
|
data = np.memmap(cfg.input, mode = 'r')
|
||||||
model = GGMLV3Model()
|
model = GGMLV3Model()
|
||||||
|
print('* Scanning GGML input file')
|
||||||
offset = model.load(data, 0)
|
offset = model.load(data, 0)
|
||||||
print(model.hyperparameters)
|
print(model.hyperparameters)
|
||||||
# print(model.vocab.items)
|
converter = GGMLToGGUF(model, data, cfg)
|
||||||
# return
|
converter.save()
|
||||||
save_gguf(model, data, cfg)
|
print(f'* Successful completion. Output saved to: {cfg.output}')
|
||||||
|
|
||||||
main()
|
main()
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue