Respect tokenizer.ggml.add_bos_token value when tokenizing (#4040)
* gguf-py: gguf-dump: Respect --no-tensor flag in JSON mode. * Respect add_bos_token GGUF metadata value * gguf-py: Try to fix SpecialVocab giving up too easily for the Nth time
This commit is contained in:
parent
8da46278e1
commit
91f6499393
12 changed files with 85 additions and 29 deletions
|
@ -86,13 +86,14 @@ def dump_metadata_json(reader: GGUFReader, args: argparse.Namespace) -> None:
|
|||
curr["value"] = str(bytes(field.parts[-1]), encoding="utf-8")
|
||||
else:
|
||||
curr["value"] = field.parts[-1].tolist()[0]
|
||||
for idx, tensor in enumerate(reader.tensors):
|
||||
tensors[tensor.name] = {
|
||||
"index": idx,
|
||||
"shape": tensor.shape.tolist(),
|
||||
"type": tensor.tensor_type.name,
|
||||
"offset": tensor.field.offset,
|
||||
}
|
||||
if not args.no_tensors:
|
||||
for idx, tensor in enumerate(reader.tensors):
|
||||
tensors[tensor.name] = {
|
||||
"index": idx,
|
||||
"shape": tensor.shape.tolist(),
|
||||
"type": tensor.tensor_type.name,
|
||||
"offset": tensor.field.offset,
|
||||
}
|
||||
json.dump(result, sys.stdout)
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue