diff --git a/model.py b/model.py index 97e3b6638..9789577b5 100644 --- a/model.py +++ b/model.py @@ -2,7 +2,6 @@ import os import re import sys import json -import gguf import torch import contextlib import numpy as np @@ -11,6 +10,12 @@ from enum import IntEnum from pathlib import Path from typing import TypeAlias, Any, Generator +if 'NO_LOCAL_GGUF' not in os.environ: + sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf')) +import gguf + + + NDArray: TypeAlias = 'np.ndarray[Any, Any]' @@ -160,7 +165,7 @@ class Model: def set_vocab(self): self._set_vocab_gpt2() - def get_tensors(self) -> Generator[str, Any]: + def get_tensors(self) -> Generator[str, Any, None]: for part_name in self.part_names: print("gguf: loading model part '" + part_name + "'") if self.is_safetensors: @@ -789,12 +794,13 @@ class PersimmonModel(Model): self.gguf_writer.add_name('persimmon-8b-chat') self.gguf_writer.add_embedding_length(hidden_size) self.gguf_writer.add_block_count(block_count) - self.gguf_writer.add_feed_forward_length(self.hparams["ffn_hidden_size"]) + self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"]) self.gguf_writer.add_rope_dimension_count(hidden_size // head_count) self.gguf_writer.add_head_count(head_count) self.gguf_writer.add_head_count_kv(head_count_kv) - self.gguf_writer.add_rope_freq_base(self.hparams["rotary_emb_base"]) - self.gguf_writer.add_layer_norm_eps(self.hparams["layernorm_epsilon"]) + self.gguf_writer.add_rope_freq_base(self.hparams["rope_theta"]) + self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_eps"]) + self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"]) def set_vocab(self): self._set_vocab_sentencepiece() diff --git a/gguf-py/gguf/util.py b/util.py similarity index 85% rename from gguf-py/gguf/util.py rename to util.py index fcb83b549..d266b54b1 100644 --- a/gguf-py/gguf/util.py +++ b/util.py @@ -3,7 +3,7 @@ import argparse from pathlib import Path def parse_args() -> argparse.Namespace: - parser = argparse.ArgumentParser(description="Convert a stablelm model to a GGML compatible file") + parser = argparse.ArgumentParser(description="Convert a huggingface model to a GGML compatible file") parser.add_argument( "--vocab-only", action="store_true", help="extract only the vocab",