diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 697238ab0..429b83dbe 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -244,6 +244,8 @@ class Model: self.gguf_writer.add_finetune(self.metadata.finetune) if self.metadata.author is not None: self.gguf_writer.add_author(self.metadata.author) + if self.metadata.organization is not None: + self.gguf_writer.add_organization(self.metadata.organization) if self.metadata.version is not None: self.gguf_writer.add_version(self.metadata.version) if self.metadata.base_version is not None: diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index 16175cde5..286fa3637 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -789,6 +789,8 @@ class OutputFile: self.gguf.add_finetune(metadata.finetune) if metadata.author is not None: self.gguf.add_author(metadata.author) + if metadata.organization is not None: + self.add_organization(metadata.organization) if metadata.version is not None: self.gguf.add_version(metadata.version) if metadata.base_version is not None: @@ -805,6 +807,10 @@ class OutputFile: self.gguf.add_source_hf_repo(metadata.source_hf_repo) if metadata.tags is not None: self.gguf_writer.add_tags(metadata.tags) + if metadata.languages is not None: + self.gguf_writer.add_languages(metadata.languages) + if metadata.datasets is not None: + self.gguf_writer.add_datasets(metadata.datasets) def add_meta_arch(self, params: Params) -> None: # Metadata About The Neural Architecture Itself diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index bd6fffd31..a965b37a3 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -27,6 +27,7 @@ class Keys: BASENAME = "general.basename" FINETUNE = "general.finetune" AUTHOR = "general.author" + ORGANIZATION = "general.organization" VERSION = "general.version" BASE_VERSION = "general.base_version" URL = "general.url" @@ -39,7 +40,7 @@ class Keys: FILE_TYPE = "general.file_type" PARAMETER_SIZE_CLASS = "general.parameter_size_class" TAGS = "general.tags" - LANGUAGE = "general.language" + LANGUAGES = "general.languages" DATASETS = "general.datasets" class LLM: diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index c0d553d78..8f87e446e 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -439,6 +439,9 @@ class GGUFWriter: def add_author(self, author: str) -> None: self.add_string(Keys.General.AUTHOR, author) + def add_organization(self, organization: str) -> None: + self.add_string(Keys.General.ORGANIZATION, organization) + def add_version(self, version: str) -> None: self.add_string(Keys.General.VERSION, version) @@ -479,7 +482,7 @@ class GGUFWriter: self.add_array(Keys.Tokenizer.TAGS, tags) def add_languages(self, languages: Sequence[str]) -> None: - self.add_array(Keys.Tokenizer.LANGUAGE, languages) + self.add_array(Keys.Tokenizer.LANGUAGES, languages) def add_datasets(self, datasets: Sequence[str]) -> None: self.add_array(Keys.Tokenizer.DATASETS, datasets) diff --git a/gguf-py/gguf/metadata.py b/gguf-py/gguf/metadata.py index 13b97ccd6..475c99f58 100644 --- a/gguf-py/gguf/metadata.py +++ b/gguf-py/gguf/metadata.py @@ -15,6 +15,7 @@ class Metadata: basename: Optional[str] = None finetune: Optional[str] = None author: Optional[str] = None + organization: Optional[str] = None version: Optional[str] = None base_version: Optional[str] = None url: Optional[str] = None @@ -26,7 +27,7 @@ class Metadata: source_hf_repo: Optional[str] = None parameter_size_class: Optional[str] = None tags: Optional[list[str]] = None - language: Optional[list[str]] = None + languages: Optional[list[str]] = None datasets: Optional[list[str]] = None @staticmethod @@ -38,7 +39,7 @@ class Metadata: # Create a new Metadata instance metadata = Metadata() - # load model folder model card if available + # load huggingface model card if available # Reference Model Card Metadata: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1 model_card = Metadata.load_model_card(model_path) if metadata.name is None: @@ -61,9 +62,9 @@ class Metadata: if metadata.tags is None: metadata.tags = model_card.get("tags", []) if metadata.languages is None: - metadata.languages = model_card.get("languages", []) + metadata.languages = model_card.get("language", model_card.get("languages", [])) if metadata.datasets is None: - metadata.datasets = model_card.get("datasets", []) + metadata.datasets = model_card.get("datasets", model_card.get("dataset", [])) # load huggingface parameters if available hf_params = Metadata.load_huggingface_parameters(model_path) @@ -85,6 +86,7 @@ class Metadata: metadata.basename = metadata_override.get(Keys.General.BASENAME , metadata.basename ) # noqa: E202 metadata.finetune = metadata_override.get(Keys.General.FINETUNE , metadata.finetune ) # noqa: E202 metadata.author = metadata_override.get(Keys.General.AUTHOR , metadata.author ) # noqa: E202 + metadata.organization = metadata_override.get(Keys.General.ORGANIZATION , metadata.organization ) # noqa: E202 metadata.version = metadata_override.get(Keys.General.VERSION , metadata.version ) # noqa: E202 metadata.base_version = metadata_override.get(Keys.General.BASE_VERSION , metadata.base_version ) # noqa: E202 metadata.url = metadata_override.get(Keys.General.URL , metadata.url ) # noqa: E202 @@ -97,7 +99,7 @@ class Metadata: metadata.parameter_size_class = metadata_override.get(Keys.General.PARAMETER_SIZE_CLASS, metadata.parameter_size_class) # noqa: E202 metadata.tags = metadata_override.get(Keys.General.TAGS , metadata.tags ) # noqa: E202 metadata.languages = metadata_override.get(Keys.General.LANGUAGES , metadata.languages ) # noqa: E202 - metadata.datasets = metadata_override.get(Keys.General.datasets , metadata.datasets ) # noqa: E202 + metadata.datasets = metadata_override.get(Keys.General.DATASETS , metadata.datasets ) # noqa: E202 # Direct Metadata Override (via direct cli argument) if model_name is not None: