Improve some informational output
Formatting changes to clean up some long lines
This commit is contained in:
parent
645b6a2e47
commit
69de44d441
1 changed files with 57 additions and 22 deletions
|
@ -61,7 +61,8 @@ class GGMLFType(IntEnum):
|
||||||
|
|
||||||
class Hyperparameters:
|
class Hyperparameters:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.n_vocab = self.n_embd = self.n_mult = self.n_head = self.n_layer = self.n_rot = self.n_ff = 0
|
self.n_vocab = self.n_embd = self.n_mult = self.n_head = 0
|
||||||
|
self.n_layer = self.n_rot = self.n_ff = 0
|
||||||
self.ftype = GGMLFType.ALL_F32
|
self.ftype = GGMLFType.ALL_F32
|
||||||
|
|
||||||
def set_n_ff(self, model):
|
def set_n_ff(self, model):
|
||||||
|
@ -174,10 +175,16 @@ class GGMLModel:
|
||||||
raise ValueError(f"Unexpected file magic {magic!r}! This doesn't look like a GGML format file.")
|
raise ValueError(f"Unexpected file magic {magic!r}! This doesn't look like a GGML format file.")
|
||||||
|
|
||||||
def validate_conversion(self, ftype):
|
def validate_conversion(self, ftype):
|
||||||
if (self.file_format < GGMLFormat.GGJT or self.format_version < 2) and ftype not in (GGMLFType.ALL_F32, GGMLFType.MOSTLY_F16):
|
err = ''
|
||||||
raise ValueError(f'Quantizations changed in GGJTv2. Can only convert unquantized GGML files older than GGJTv2. Sorry, your {self.file_format.name}v{self.format_version} file of type {ftype.name} is not eligible for conversion.')
|
if (self.file_format < GGMLFormat.GGJT or self.format_version < 2):
|
||||||
if (self.file_format == GGMLFormat.GGJT and self.format_version == 2) and ftype in (GGMLFType.MOSTLY_Q4_0, GGMLFType.MOSTLY_Q4_1, GGMLFType.MOSTLY_Q4_1_SOME_F16, GGMLFType.MOSTLY_Q8_0):
|
if ftype not in (GGMLFType.ALL_F32, GGMLFType.MOSTLY_F16):
|
||||||
raise ValueError(f'Q4 and Q8 quantizations changed in GGJTv3. Sorry, your {self.file_format.name}v{self.format_version} file of type {ftype.name} is not eligible for conversion.')
|
err = 'Quantizations changed in GGJTv2. Can only convert unquantized GGML files older than GGJTv2.'
|
||||||
|
elif (self.file_format == GGMLFormat.GGJT and self.format_version == 2):
|
||||||
|
if ftype in ( GGMLFType.MOSTLY_Q4_0, GGMLFType.MOSTLY_Q4_1,
|
||||||
|
GGMLFType.MOSTLY_Q4_1_SOME_F16, GGMLFType.MOSTLY_Q8_0):
|
||||||
|
err = 'Q4 and Q8 quantizations changed in GGJTv3.'
|
||||||
|
if len(err) > 0:
|
||||||
|
raise ValueError(f'{err} Sorry, your {self.file_format.name}v{self.format_version} file of type {ftype.name} is not eligible for conversion.')
|
||||||
|
|
||||||
def load(self, data, offset):
|
def load(self, data, offset):
|
||||||
offset += self.validate_header(data, offset)
|
offset += self.validate_header(data, offset)
|
||||||
|
@ -228,7 +235,10 @@ class GGMLToGGUF:
|
||||||
|
|
||||||
def save(self):
|
def save(self):
|
||||||
print('* Preparing to save GGUF file')
|
print('* Preparing to save GGUF file')
|
||||||
gguf_writer = gguf.GGUFWriter(self.cfg.output, gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA], use_temp_file = False)
|
gguf_writer = gguf.GGUFWriter(
|
||||||
|
self.cfg.output,
|
||||||
|
gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.LLAMA],
|
||||||
|
use_temp_file = False )
|
||||||
self.add_params(gguf_writer)
|
self.add_params(gguf_writer)
|
||||||
self.add_vocab(gguf_writer)
|
self.add_vocab(gguf_writer)
|
||||||
if self.special_vocab is not None:
|
if self.special_vocab is not None:
|
||||||
|
@ -245,7 +255,10 @@ class GGMLToGGUF:
|
||||||
def add_params(self, gguf_writer):
|
def add_params(self, gguf_writer):
|
||||||
hp = self.model.hyperparameters
|
hp = self.model.hyperparameters
|
||||||
cfg = self.cfg
|
cfg = self.cfg
|
||||||
desc = cfg.desc if cfg.desc is not None else f'converted from legacy {self.model.file_format.name}v{self.model.format_version} {hp.ftype.name} format'
|
if cfg.desc is not None:
|
||||||
|
desc = cfg.desc
|
||||||
|
else:
|
||||||
|
desc = f'converted from legacy {self.model.file_format.name}v{self.model.format_version} {hp.ftype.name} format'
|
||||||
try:
|
try:
|
||||||
# Filenames aren't necessarily valid UTF8.
|
# Filenames aren't necessarily valid UTF8.
|
||||||
name = cfg.name if cfg.name is not None else cfg.input.name
|
name = cfg.name if cfg.name is not None else cfg.input.name
|
||||||
|
@ -292,7 +305,8 @@ class GGMLToGGUF:
|
||||||
tokens.append(vbytes)
|
tokens.append(vbytes)
|
||||||
scores.append(score)
|
scores.append(score)
|
||||||
toktypes.append(ttype)
|
toktypes.append(ttype)
|
||||||
assert len(tokens) == hp.n_vocab, f'Override vocab has a different number of items than hyperparameters - override = {len(tokens)} but n_vocab={hp.n_vocab}'
|
assert len(tokens) == hp.n_vocab, \
|
||||||
|
f'Override vocab has a different number of items than hyperparameters - override = {len(tokens)} but n_vocab={hp.n_vocab}'
|
||||||
gguf_writer.add_token_list(tokens)
|
gguf_writer.add_token_list(tokens)
|
||||||
gguf_writer.add_token_scores(scores)
|
gguf_writer.add_token_scores(scores)
|
||||||
if len(toktypes) > 0:
|
if len(toktypes) > 0:
|
||||||
|
@ -344,7 +358,11 @@ class GGMLToGGUF:
|
||||||
tempdims[1] = tempdims[0]
|
tempdims[1] = tempdims[0]
|
||||||
tempdims[0] = temp
|
tempdims[0] = temp
|
||||||
# print(f'+ {tensor.name} | {mapped_name} {tensor.dims} :: {tempdims}')
|
# print(f'+ {tensor.name} | {mapped_name} {tensor.dims} :: {tempdims}')
|
||||||
gguf_writer.add_tensor(mapped_name, data[tensor.start_offset:tensor.start_offset + tensor.len_bytes], raw_shape = tempdims, raw_dtype = tensor.dtype)
|
gguf_writer.add_tensor(
|
||||||
|
mapped_name,
|
||||||
|
data[tensor.start_offset:tensor.start_offset + tensor.len_bytes],
|
||||||
|
raw_shape = tempdims,
|
||||||
|
raw_dtype = tensor.dtype )
|
||||||
|
|
||||||
def handle_metadata(cfg, hp):
|
def handle_metadata(cfg, hp):
|
||||||
import convert
|
import convert
|
||||||
|
@ -366,30 +384,44 @@ def handle_metadata(cfg, hp):
|
||||||
params = convert.Params.loadOriginalParamsJson(fakemodel, orig_config_path)
|
params = convert.Params.loadOriginalParamsJson(fakemodel, orig_config_path)
|
||||||
else:
|
else:
|
||||||
raise ValueError('Unable to load metadata')
|
raise ValueError('Unable to load metadata')
|
||||||
vocab = convert.load_vocab(cfg.vocab_dir if cfg.vocab_dir is not None else cfg.model_metadata_dir, cfg.vocabtype)
|
vocab = convert.load_vocab(
|
||||||
|
cfg.vocab_dir if cfg.vocab_dir is not None else cfg.model_metadata_dir,
|
||||||
|
cfg.vocabtype )
|
||||||
# FIXME: Respect cfg.vocab_dir?
|
# FIXME: Respect cfg.vocab_dir?
|
||||||
svocab = gguf.SpecialVocab(cfg.model_metadata_dir)
|
svocab = gguf.SpecialVocab(cfg.model_metadata_dir)
|
||||||
convert.check_vocab_size(params, vocab)
|
convert.check_vocab_size(params, vocab)
|
||||||
return (params, vocab, svocab)
|
return (params, vocab, svocab)
|
||||||
|
|
||||||
def handle_args():
|
def handle_args():
|
||||||
parser = argparse.ArgumentParser(description = 'Convert GGMLv3 models to GGUF')
|
parser = argparse.ArgumentParser(description = 'Convert GGML models to GGUF')
|
||||||
parser.add_argument('--input', '-i', type = Path, required = True, help = 'Input GGMLv3 filename')
|
parser.add_argument('--input', '-i', type = Path, required = True,
|
||||||
parser.add_argument('--output', '-o', type = Path, required = True, help ='Output GGUF filename')
|
help = 'Input GGMLv3 filename')
|
||||||
parser.add_argument('--name', help = 'Set model name')
|
parser.add_argument('--output', '-o', type = Path, required = True,
|
||||||
parser.add_argument('--desc', help = 'Set model description')
|
help ='Output GGUF filename')
|
||||||
parser.add_argument('--gqa', type = int, default = 1, help = 'grouped-query attention factor (use 8 for LLaMA2 70B)')
|
parser.add_argument('--name',
|
||||||
parser.add_argument('--eps', default = '5.0e-06', help = 'RMS norm eps: Use 1e-6 for LLaMA1 and OpenLLaMA, use 1e-5 for LLaMA2')
|
help = 'Set model name')
|
||||||
parser.add_argument('--context-length', '-c', type=int, default = 2048, help = 'Default max context length: LLaMA1 is typically 2048, LLaMA2 is typically 4096')
|
parser.add_argument('--desc',
|
||||||
parser.add_argument('--model-metadata-dir', '-m', type = Path, help ='Load HuggingFace/.pth vocab and metadata from the specified directory')
|
help = 'Set model description')
|
||||||
parser.add_argument("--vocab-dir", type=Path, help="directory containing tokenizer.model, if separate from model file - only meaningful with --model-metadata-dir")
|
parser.add_argument('--gqa', type = int, default = 1,
|
||||||
parser.add_argument("--vocabtype", choices=["spm", "bpe"], help="vocab format - only meaningful with --model-metadata-dir and/or --vocab-dir (default: spm)", default="spm")
|
help = 'grouped-query attention factor (use 8 for LLaMA2 70B)')
|
||||||
|
parser.add_argument('--eps', default = '5.0e-06',
|
||||||
|
help = 'RMS norm eps: Use 1e-6 for LLaMA1 and OpenLLaMA, use 1e-5 for LLaMA2')
|
||||||
|
parser.add_argument('--context-length', '-c', type=int, default = 2048,
|
||||||
|
help = 'Default max context length: LLaMA1 is typically 2048, LLaMA2 is typically 4096')
|
||||||
|
parser.add_argument('--model-metadata-dir', '-m', type = Path,
|
||||||
|
help ='Load HuggingFace/.pth vocab and metadata from the specified directory')
|
||||||
|
parser.add_argument("--vocab-dir", type=Path,
|
||||||
|
help="directory containing tokenizer.model, if separate from model file - only meaningful with --model-metadata-dir")
|
||||||
|
parser.add_argument("--vocabtype", choices=["spm", "bpe"], default="spm",
|
||||||
|
help="vocab format - only meaningful with --model-metadata-dir and/or --vocab-dir (default: spm)")
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
cfg = handle_args()
|
cfg = handle_args()
|
||||||
print(f'* Using config: {cfg}')
|
print(f'* Using config: {cfg}')
|
||||||
print('\n=== WARNING === Be aware that this conversion script is best-effort. Use a native GGUF model if possible. === WARNING ===\n')
|
print('\n=== WARNING === Be aware that this conversion script is best-effort. Use a native GGUF model if possible. === WARNING ===\n')
|
||||||
|
if cfg.model_metadata_dir is None and (cfg.gqa == 1 or cfg.eps == '5.0e-06'):
|
||||||
|
print('- Note: If converting LLaMA2, specifying "--eps 1e-5" is required. 70B models also need "--gqa 8".')
|
||||||
data = np.memmap(cfg.input, mode = 'r')
|
data = np.memmap(cfg.input, mode = 'r')
|
||||||
model = GGMLModel()
|
model = GGMLModel()
|
||||||
print('* Scanning GGML input file')
|
print('* Scanning GGML input file')
|
||||||
|
@ -408,7 +440,10 @@ def main():
|
||||||
print('\n=== WARNING === Special tokens may not be converted correctly. Use --model-metadata-dir if possible === WARNING ===\n')
|
print('\n=== WARNING === Special tokens may not be converted correctly. Use --model-metadata-dir if possible === WARNING ===\n')
|
||||||
if model.file_format == GGMLFormat.GGML:
|
if model.file_format == GGMLFormat.GGML:
|
||||||
print('! This is a very old GGML file that does not contain vocab scores. Strongly recommend using model metadata!')
|
print('! This is a very old GGML file that does not contain vocab scores. Strongly recommend using model metadata!')
|
||||||
converter = GGMLToGGUF(model, data, cfg, params_override = params_override, vocab_override = vocab_override, special_vocab = special_vocab)
|
converter = GGMLToGGUF(model, data, cfg,
|
||||||
|
params_override = params_override,
|
||||||
|
vocab_override = vocab_override,
|
||||||
|
special_vocab = special_vocab )
|
||||||
converter.save()
|
converter.save()
|
||||||
print(f'* Successful completion. Output saved to: {cfg.output}')
|
print(f'* Successful completion. Output saved to: {cfg.output}')
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue