From 00fe3fdb6105c2f5721915e05dd2191f3b422d17 Mon Sep 17 00:00:00 2001 From: KerfuffleV2 Date: Tue, 5 Sep 2023 03:37:02 -0600 Subject: [PATCH] convert-llama-ggmlv3-to-gguf: Try to handle files older than GGJTv3 Better error messages for files that cannot be converted Add file type to GGUF output --- convert-llama-ggmlv3-to-gguf.py | 99 +++++++++++++++++++++++++++------ 1 file changed, 81 insertions(+), 18 deletions(-) diff --git a/convert-llama-ggmlv3-to-gguf.py b/convert-llama-ggmlv3-to-gguf.py index 08ba0c490..3fbe24608 100755 --- a/convert-llama-ggmlv3-to-gguf.py +++ b/convert-llama-ggmlv3-to-gguf.py @@ -5,6 +5,7 @@ import argparse import math import struct import sys +from enum import IntEnum from pathlib import Path import numpy as np @@ -34,10 +35,34 @@ GGML_QUANT_SIZES = { gguf.GGMLQuantizationType.Q8_K : (256, 4 + QK_K + QK_K // 8), } +class GGMLFormat(IntEnum): + GGML = 0 + GGMF = 1 + GGJT = 2 + +class GGMLFType(IntEnum): + ALL_F32 = 0 + MOSTLY_F16 = 1 + MOSTLY_Q4_0 = 2 + MOSTLY_Q4_1 = 3 + MOSTLY_Q4_1_SOME_F16 = 4 + MOSTLY_Q8_0 = 7 + MOSTLY_Q5_0 = 8 + MOSTLY_Q5_1 = 9 + MOSTLY_Q2_K = 10 + MOSTLY_Q3_K_S = 11 + MOSTLY_Q3_K_M = 12 + MOSTLY_Q3_K_L = 13 + MOSTLY_Q4_K_S = 14 + MOSTLY_Q4_K_M = 15 + MOSTLY_Q5_K_S = 16 + MOSTLY_Q5_K_M = 17 + MOSTLY_Q6_K = 18 + class Hyperparameters: def __init__(self): - self.n_vocab = self.n_embd = self.n_mult = self.n_head = self.n_layer = self.n_rot = self.ftype = 0 - self.n_ff = 0 + self.n_vocab = self.n_embd = self.n_mult = self.n_head = self.n_layer = self.n_rot = self.n_ff = 0 + self.ftype = GGMLFType.ALL_F32 def set_n_ff(self, model): ff_tensor_idx = model.tensor_map.get(b'layers.0.feed_forward.w1.weight') @@ -53,16 +78,21 @@ class Hyperparameters: self.n_head, self.n_layer, self.n_rot, - self.ftype, + ftype, ) = struct.unpack('<7I', data[offset:offset + (4 * 7)]) + try: + self.ftype = GGMLFType(ftype) + except ValueError: + raise ValueError(f'Invalid ftype {ftype}') return 4 * 7 def __str__(self): - return f'' + return f'' class Vocab: - def __init__(self): + def __init__(self, load_scores = True): self.items = [] + self.load_scores = load_scores def load(self, data, offset, n_vocab): orig_offset = offset @@ -70,20 +100,24 @@ class Vocab: itemlen = struct.unpack(' 3: + raise ValueError(f'Cannot handle unexpected GGJT file version {version}') + self.file_format = GGMLFormat.GGJT + self.format_version = version + return 8 + raise ValueError(f"Unexpected file magic {magic!r}! This doesn't look like a GGML format file.") + + def validate_conversion(self, ftype): + if (self.file_format < GGMLFormat.GGJT or self.format_version < 2) and ftype not in (GGMLFType.ALL_F32, GGMLFType.MOSTLY_F16): + raise ValueError(f'Quantizations changed in GGJTv2. Can only convert unquantized GGML files older than GGJTv2. Sorry, your {self.file_format.name}v{self.format_version} file of type {ftype.name} is not eligible for conversion.') + if (self.file_format == GGMLFormat.GGJT and self.format_version == 2) and ftype in (GGMLFType.MOSTLY_Q4_0, GGMLFType.MOSTLY_Q4_1, GGMLFType.MOSTLY_Q4_1_SOME_F16, GGMLFType.MOSTLY_Q8_0): + raise ValueError(f'Q4 and Q8 quantizations changed in GGJTv3. Sorry, your {self.file_format.name}v{self.format_version} file of type {ftype.name} is not eligible for conversion.') def load(self, data, offset): offset += self.validate_header(data, offset) hp = Hyperparameters() offset += hp.load(data, offset) - vocab = Vocab() + print(f'* File format: {self.file_format.name}v{self.format_version} with ftype {hp.ftype.name}') + self.validate_conversion(hp.ftype) + vocab = Vocab(load_scores = self.file_format > GGMLFormat.GGML) offset += vocab.load(data, offset, hp.n_vocab) tensors: list[Tensor] = [] tensor_map = {} while offset < len(data): - tensor = Tensor() + tensor = Tensor(use_padding = self.file_format > GGMLFormat.GGMF) offset += tensor.load(data, offset) tensor_map[tensor.name] = len(tensors) tensors.append(tensor) @@ -195,6 +255,7 @@ class GGMLToGGUF: if name is not None: gguf_writer.add_name(name) gguf_writer.add_description(desc) + gguf_writer.add_file_type(int(hp.ftype)) if self.params_override is not None: po = self.params_override assert po.n_embd == hp.n_embd, 'Model hyperparams mismatch' @@ -330,7 +391,7 @@ def main(): print(f'* Using config: {cfg}') print('\n=== WARNING === Be aware that this conversion script is best-effort. Use a native GGUF model if possible. === WARNING ===\n') data = np.memmap(cfg.input, mode = 'r') - model = GGMLV3Model() + model = GGMLModel() print('* Scanning GGML input file') offset = model.load(data, 0) print(f'* GGML model hyperparameters: {model.hyperparameters}') @@ -345,6 +406,8 @@ def main(): print(f'* Special vocab: {special_vocab}') else: print('\n=== WARNING === Special tokens may not be converted correctly. Use --model-metadata-dir if possible === WARNING ===\n') + if model.file_format == GGMLFormat.GGML: + print('! This is a very old GGML file that does not contain vocab scores. Strongly recommend using model metadata!') converter = GGMLToGGUF(model, data, cfg, params_override = params_override, vocab_override = vocab_override, special_vocab = special_vocab) converter.save() print(f'* Successful completion. Output saved to: {cfg.output}')