gguf-py : use pyyaml instead of python-frontmatter

HF transformers already depends on pyyaml for model cards, so it should already be in the environment of the users of the convert scripts, unlike python-frontmatter. This should be completely equivalent since the model cards seem to use only YAML and never TOML.
2024-07-14 15:36:50 -04:00 · 2024-07-14 15:36:50 -04:00 · 78a42fbee5
commit 78a42fbee5
parent 3b1766a992
4 changed files with 15 additions and 9 deletions
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@ -90,7 +90,6 @@ let
      ps.tiktoken
      ps.torchWithoutCuda
      ps.transformers
-      ps.python-frontmatter

      # server bench
      ps.matplotlib
--- a/gguf-py/gguf/metadata.py
+++ b/gguf-py/gguf/metadata.py
@ -2,6 +2,7 @@ from __future__ import annotations

 import re
 import json
+import yaml
 import logging
 from pathlib import Path
 from typing import Any, Optional
@ -116,12 +117,18 @@ class Metadata:
        if not model_card_path.is_file():
            return {}

-        try:
-            import frontmatter
+        # The model card metadata is assumed to always be in YAML
+        # ref: https://github.com/huggingface/transformers/blob/a5c642fe7a1f25d3bdcd76991443ba6ff7ee34b2/src/transformers/modelcard.py#L468-L473
        with open(model_card_path, "r", encoding="utf-8") as f:
-                return frontmatter.load(f).to_dict()
-        except ModuleNotFoundError:
-            logger.warning("module 'frontmatter' not available. Metadata from README.md will NOT be read.")
+            if f.readline() == "---\n":
+                raw = f.read().partition("---\n")[0]
+                data = yaml.safe_load(raw)
+                if isinstance(data, dict):
+                    return data
+                else:
+                    logger.error(f"while reading YAML model card frontmatter, data is {type(data)} instead of dict")
+                    return {}
+            else:
                return {}

    @staticmethod
--- a/gguf-py/pyproject.toml
+++ b/gguf-py/pyproject.toml
@ -22,6 +22,7 @@ classifiers = [
 python = ">=3.8"
 numpy = ">=1.17"
 tqdm = ">=4.27"
+pyyaml = ">=5.1"

 [tool.poetry.dev-dependencies]
 pytest = "^5.2"
--- a/requirements/requirements-convert_legacy_llama.txt
+++ b/requirements/requirements-convert_legacy_llama.txt
@ -3,4 +3,3 @@ sentencepiece~=0.2.0
 transformers>=4.40.1,<5.0.0
 gguf>=0.1.0
 protobuf>=4.21.0,<5.0.0
-python-frontmatter~=1.0.1