gguf-hash: model wide and per tensor hashing using xxhash and sha1 (#8048)
CLI to hash GGUF files to detect difference on a per model and per tensor level The hash type we support is: - `--xxh64`: use xhash 64bit hash mode (default) - `--sha1`: use sha1 - `--uuid`: use uuid - `--sha256`: use sha256 While most POSIX systems already have hash checking programs like sha256sum, it is designed to check entire files. This is not ideal for our purpose if we want to check for consistency of the tensor data even if the metadata content of the gguf KV store has been updated. This program is designed to hash a gguf tensor payload on a 'per tensor layer' in addition to a 'entire tensor model' hash. The intent is that the entire tensor layer can be checked first but if there is any detected inconsistencies, then the per tensor hash can be used to narrow down the specific tensor layer that has inconsistencies. Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
905942abdb
commit
f7cab35ef9
17 changed files with 8846 additions and 0 deletions
24
examples/gguf-hash/deps/sha256/sha256.h
Normal file
24
examples/gguf-hash/deps/sha256/sha256.h
Normal file
|
@ -0,0 +1,24 @@
|
|||
/* Sha256.h -- SHA-256 Hash
|
||||
2010-06-11 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __CRYPTO_SHA256_H
|
||||
#define __CRYPTO_SHA256_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define SHA256_DIGEST_SIZE 32
|
||||
|
||||
typedef struct sha256_t
|
||||
{
|
||||
uint32_t state[8];
|
||||
uint64_t count;
|
||||
unsigned char buffer[64];
|
||||
} sha256_t;
|
||||
|
||||
void sha256_init(sha256_t *p);
|
||||
void sha256_update(sha256_t *p, const unsigned char *data, size_t size);
|
||||
void sha256_final(sha256_t *p, unsigned char *digest);
|
||||
void sha256_hash(unsigned char *buf, const unsigned char *data, size_t size);
|
||||
|
||||
#endif
|
Loading…
Add table
Add a link
Reference in a new issue