convert-hf : add --outtype auto-f16

A reason for this to exist is for model quantizers who want an initial
GGUF with the most fidelity to the original model while still using
a 16-bit float type instead of 32-bit floats.
This commit is contained in:
Francis Couture-Harpin 2024-05-09 15:16:09 -04:00
parent 95930da30e
commit 58b515cad6
2 changed files with 14 additions and 2 deletions

View file

@ -861,6 +861,8 @@ class LlamaFileType(IntEnum):
MOSTLY_IQ1_M = 31 # except 1d tensors
MOSTLY_BF16 = 32 # except 1d tensors
GUESSED = 1024 # not specified in the model file
class GGUFEndian(IntEnum):
LITTLE = 0