llama-gguf-hash: verification added
This commit is contained in:
parent
8fed75edca
commit
ff0912e323
2 changed files with 599 additions and 108 deletions
|
@ -5,10 +5,15 @@ CLI to hash GGUF files to detect difference on a per model and per tensor level.
|
|||
|
||||
**Command line options:**
|
||||
|
||||
- `--help`: display help message
|
||||
- `--xxh64`: use xhash 64bit hash mode (default)
|
||||
- `--sha1`: use sha1
|
||||
- `--uuid`: use uuid
|
||||
- `--sha256`: use sha256
|
||||
- `--all`: use all hash
|
||||
- `--no-layer`: exclude per layer hash
|
||||
- `--uuid`: generate UUIDv5 ID
|
||||
- `-c`, `--check <manifest>`: verify against a manifest
|
||||
|
||||
## About
|
||||
|
||||
|
@ -62,6 +67,136 @@ make -C build llama-gguf-hash VERBOSE=1
|
|||
./build/bin/llama-gguf-hash --sha256 test.gguf
|
||||
```
|
||||
|
||||
## Generation and Verification Example
|
||||
|
||||
To generate we may use this command
|
||||
|
||||
```bash
|
||||
./llama-gguf-hash --all test.gguf > test.gguf.manifest
|
||||
```
|
||||
|
||||
Which would generate a manifest that looks like below, which contains multiple hash type and per tensor layer hashes as well
|
||||
(This excludes UUID as that is an ID not a hash)
|
||||
|
||||
```bash
|
||||
xxh64 f66e9cd66a4396a0 test.gguf:tensor_0
|
||||
sha1 59f79ecefd8125a996fdf419239051a7e99e5f20 test.gguf:tensor_0
|
||||
sha256 c0510d38fa060c46265e0160a85c7243096b01dd31c2f355bdbb5516b20de1bd test.gguf:tensor_0
|
||||
xxh64 7d3a1f9ac04d0537 test.gguf:tensor_1
|
||||
sha1 4765f592eacf096df4628ba59476af94d767080a test.gguf:tensor_1
|
||||
sha256 8514cbcc73692a2c56bd7a33a022edd5ff819614bd23b19915d7224387f397a7 test.gguf:tensor_1
|
||||
xxh64 a0af5d700049693b test.gguf:tensor_2
|
||||
sha1 25cbfbad4513cc348e2c95ebdee69d6ff2fd8753 test.gguf:tensor_2
|
||||
sha256 947e6b36e20f2cc95e1d2ce1c1669d813d574657ac6b5ac5196158d454d35180 test.gguf:tensor_2
|
||||
xxh64 e83fddf559d7b6a6 test.gguf:tensor_3
|
||||
sha1 a9cba73e2d90f2ee3dae2548caa42bef3fe6a96c test.gguf:tensor_3
|
||||
sha256 423b044e016d8ac73c39f23f60bf01bedef5ecb03c0230accd824c91fe86f1a1 test.gguf:tensor_3
|
||||
xxh64 1257733306b7992d test.gguf:tensor_4
|
||||
sha1 d7bc61db93bb685ce9d598da89717c66729b7543 test.gguf:tensor_4
|
||||
sha256 79737cb3912d4201384cf7f16a1a37ff7823f23ea796cb205b6ca361ab9e3ebf test.gguf:tensor_4
|
||||
xxh64 d238d16ba4711e58 test.gguf:tensor_5
|
||||
sha1 0706566c198fe1072f37e0a5135b4b5f23654c52 test.gguf:tensor_5
|
||||
sha256 60949be8298eced0ecdde64487643d018407bd261691e061d9e9c3dbc9fd358b test.gguf:tensor_5
|
||||
xxh64 3fbc3b65ab8c7f39 test.gguf:tensor_6
|
||||
sha1 73922a0727226a409049f6fc3172a52219ca6f00 test.gguf:tensor_6
|
||||
sha256 574f4c46ff384a3b9a225eb955d2a871847a2e8b3fa59387a8252832e92ef7b0 test.gguf:tensor_6
|
||||
xxh64 c22021c29854f093 test.gguf:tensor_7
|
||||
sha1 efc39cece6a951188fc41e354c73bbfe6813d447 test.gguf:tensor_7
|
||||
sha256 4c0410cd3c500f078ae5b21e8dc9eb79e29112713b2ab58a882f82a3868d4d75 test.gguf:tensor_7
|
||||
xxh64 936df61f5d64261f test.gguf:tensor_8
|
||||
sha1 c2490296d789a4f34398a337fed8377d943d9f06 test.gguf:tensor_8
|
||||
sha256 c4401313feeba0261275c3b25bd2d8fe40ce04e0f440c2980ed0e9674c30ff01 test.gguf:tensor_8
|
||||
xxh64 93fd20c64421c081 test.gguf:tensor_9
|
||||
sha1 7047ce1e78437a6884337a3751c7ee0421918a65 test.gguf:tensor_9
|
||||
sha256 23d57cf0d7a6e90b0b3616b41300e0cd354781e812add854a5f95aa55f2bc514 test.gguf:tensor_9
|
||||
xxh64 5a54d3aad816f302 test.gguf
|
||||
sha1 d15be52c4ff213e823cb6dd13af7ee2f978e7042 test.gguf
|
||||
sha256 7dd641b32f59b60dbd4b5420c4b0f6321ccf48f58f6ae201a3dbc4a58a27c6e4 test.gguf
|
||||
```
|
||||
|
||||
We can then use the normal check command which will by default check for the highest security strength hash and verify against that:
|
||||
|
||||
```bash
|
||||
$ ./llama-gguf-hash --check test.gguf.manifest test.gguf
|
||||
manifest test.gguf.manifest sha256 sha1 xxh64
|
||||
sha256 c0510d38fa060c46265e0160a85c7243096b01dd31c2f355bdbb5516b20de1bd test.gguf:tensor_0 - Ok
|
||||
sha256 8514cbcc73692a2c56bd7a33a022edd5ff819614bd23b19915d7224387f397a7 test.gguf:tensor_1 - Ok
|
||||
sha256 947e6b36e20f2cc95e1d2ce1c1669d813d574657ac6b5ac5196158d454d35180 test.gguf:tensor_2 - Ok
|
||||
sha256 423b044e016d8ac73c39f23f60bf01bedef5ecb03c0230accd824c91fe86f1a1 test.gguf:tensor_3 - Ok
|
||||
sha256 79737cb3912d4201384cf7f16a1a37ff7823f23ea796cb205b6ca361ab9e3ebf test.gguf:tensor_4 - Ok
|
||||
sha256 60949be8298eced0ecdde64487643d018407bd261691e061d9e9c3dbc9fd358b test.gguf:tensor_5 - Ok
|
||||
sha256 574f4c46ff384a3b9a225eb955d2a871847a2e8b3fa59387a8252832e92ef7b0 test.gguf:tensor_6 - Ok
|
||||
sha256 4c0410cd3c500f078ae5b21e8dc9eb79e29112713b2ab58a882f82a3868d4d75 test.gguf:tensor_7 - Ok
|
||||
sha256 c4401313feeba0261275c3b25bd2d8fe40ce04e0f440c2980ed0e9674c30ff01 test.gguf:tensor_8 - Ok
|
||||
sha256 23d57cf0d7a6e90b0b3616b41300e0cd354781e812add854a5f95aa55f2bc514 test.gguf:tensor_9 - Ok
|
||||
sha256 7dd641b32f59b60dbd4b5420c4b0f6321ccf48f58f6ae201a3dbc4a58a27c6e4 test.gguf - Ok
|
||||
|
||||
Verification results for test.gguf.manifest - Success
|
||||
```
|
||||
|
||||
Or we may explicitly ask for a faster hash like:
|
||||
|
||||
```bash
|
||||
$ ./llama-gguf-hash --check test.gguf.manifest --xxh64 test.gguf
|
||||
manifest test.gguf.manifest sha256 sha1 xxh64
|
||||
xxh64 f66e9cd66a4396a0 test.gguf:tensor_0 - Ok
|
||||
xxh64 7d3a1f9ac04d0537 test.gguf:tensor_1 - Ok
|
||||
xxh64 a0af5d700049693b test.gguf:tensor_2 - Ok
|
||||
xxh64 e83fddf559d7b6a6 test.gguf:tensor_3 - Ok
|
||||
xxh64 1257733306b7992d test.gguf:tensor_4 - Ok
|
||||
xxh64 d238d16ba4711e58 test.gguf:tensor_5 - Ok
|
||||
xxh64 3fbc3b65ab8c7f39 test.gguf:tensor_6 - Ok
|
||||
xxh64 c22021c29854f093 test.gguf:tensor_7 - Ok
|
||||
xxh64 936df61f5d64261f test.gguf:tensor_8 - Ok
|
||||
xxh64 93fd20c64421c081 test.gguf:tensor_9 - Ok
|
||||
xxh64 5a54d3aad816f302 test.gguf - Ok
|
||||
|
||||
Verification results for test.gguf.manifest - Success
|
||||
```
|
||||
|
||||
Or maybe we want to just check that all the hash is valid:
|
||||
|
||||
```bash
|
||||
$./llama-gguf-hash --check test.gguf.manifest --all test.gguf.manifest
|
||||
manifest test.gguf.manifest sha256 sha1 xxh64
|
||||
xxh64 f66e9cd66a4396a0 test.gguf:tensor_0 - Ok
|
||||
sha1 59f79ecefd8125a996fdf419239051a7e99e5f20 test.gguf:tensor_0 - Ok
|
||||
sha256 c0510d38fa060c46265e0160a85c7243096b01dd31c2f355bdbb5516b20de1bd test.gguf:tensor_0 - Ok
|
||||
xxh64 7d3a1f9ac04d0537 test.gguf:tensor_1 - Ok
|
||||
sha1 4765f592eacf096df4628ba59476af94d767080a test.gguf:tensor_1 - Ok
|
||||
sha256 8514cbcc73692a2c56bd7a33a022edd5ff819614bd23b19915d7224387f397a7 test.gguf:tensor_1 - Ok
|
||||
xxh64 a0af5d700049693b test.gguf:tensor_2 - Ok
|
||||
sha1 25cbfbad4513cc348e2c95ebdee69d6ff2fd8753 test.gguf:tensor_2 - Ok
|
||||
sha256 947e6b36e20f2cc95e1d2ce1c1669d813d574657ac6b5ac5196158d454d35180 test.gguf:tensor_2 - Ok
|
||||
xxh64 e83fddf559d7b6a6 test.gguf:tensor_3 - Ok
|
||||
sha1 a9cba73e2d90f2ee3dae2548caa42bef3fe6a96c test.gguf:tensor_3 - Ok
|
||||
sha256 423b044e016d8ac73c39f23f60bf01bedef5ecb03c0230accd824c91fe86f1a1 test.gguf:tensor_3 - Ok
|
||||
xxh64 1257733306b7992d test.gguf:tensor_4 - Ok
|
||||
sha1 d7bc61db93bb685ce9d598da89717c66729b7543 test.gguf:tensor_4 - Ok
|
||||
sha256 79737cb3912d4201384cf7f16a1a37ff7823f23ea796cb205b6ca361ab9e3ebf test.gguf:tensor_4 - Ok
|
||||
xxh64 d238d16ba4711e58 test.gguf:tensor_5 - Ok
|
||||
sha1 0706566c198fe1072f37e0a5135b4b5f23654c52 test.gguf:tensor_5 - Ok
|
||||
sha256 60949be8298eced0ecdde64487643d018407bd261691e061d9e9c3dbc9fd358b test.gguf:tensor_5 - Ok
|
||||
xxh64 3fbc3b65ab8c7f39 test.gguf:tensor_6 - Ok
|
||||
sha1 73922a0727226a409049f6fc3172a52219ca6f00 test.gguf:tensor_6 - Ok
|
||||
sha256 574f4c46ff384a3b9a225eb955d2a871847a2e8b3fa59387a8252832e92ef7b0 test.gguf:tensor_6 - Ok
|
||||
xxh64 c22021c29854f093 test.gguf:tensor_7 - Ok
|
||||
sha1 efc39cece6a951188fc41e354c73bbfe6813d447 test.gguf:tensor_7 - Ok
|
||||
sha256 4c0410cd3c500f078ae5b21e8dc9eb79e29112713b2ab58a882f82a3868d4d75 test.gguf:tensor_7 - Ok
|
||||
xxh64 936df61f5d64261f test.gguf:tensor_8 - Ok
|
||||
sha1 c2490296d789a4f34398a337fed8377d943d9f06 test.gguf:tensor_8 - Ok
|
||||
sha256 c4401313feeba0261275c3b25bd2d8fe40ce04e0f440c2980ed0e9674c30ff01 test.gguf:tensor_8 - Ok
|
||||
xxh64 93fd20c64421c081 test.gguf:tensor_9 - Ok
|
||||
sha1 7047ce1e78437a6884337a3751c7ee0421918a65 test.gguf:tensor_9 - Ok
|
||||
sha256 23d57cf0d7a6e90b0b3616b41300e0cd354781e812add854a5f95aa55f2bc514 test.gguf:tensor_9 - Ok
|
||||
xxh64 5a54d3aad816f302 test.gguf - Ok
|
||||
sha1 d15be52c4ff213e823cb6dd13af7ee2f978e7042 test.gguf - Ok
|
||||
sha256 7dd641b32f59b60dbd4b5420c4b0f6321ccf48f58f6ae201a3dbc4a58a27c6e4 test.gguf - Ok
|
||||
|
||||
Verification results for test.gguf.manifest - Success
|
||||
```
|
||||
|
||||
|
||||
## Crypto/Hash Libraries Used
|
||||
|
||||
These micro c libraries dependencies was installed via the [clib c package manager](https://github.com/clibs)
|
||||
|
|
|
@ -6,9 +6,10 @@
|
|||
#include <string>
|
||||
#include <stdexcept>
|
||||
#include <algorithm>
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
@ -28,14 +29,70 @@ extern "C" {
|
|||
#define UUID_NAMESPACE_LLAMA_CPP_HEX 0xef, 0x00, 0x12, 0x06, 0xda, 0xdc, 0x5f, 0x6d, 0xa1, 0x5f, 0x33, 0x59, 0xe5, 0x77, 0xd4, 0xe5
|
||||
|
||||
|
||||
#define HASH_TYPE_SHA256_STR "sha256"
|
||||
#define HASH_TYPE_SHA1_STR "sha1"
|
||||
#define HASH_TYPE_XXH64_STR "xxh64"
|
||||
#define HASH_TYPE_UUID_STR "uuid"
|
||||
|
||||
|
||||
typedef enum {
|
||||
HASH_EXIT_SUCCESS = 0, // All hash has been generated or validated
|
||||
HASH_EXIT_FAILURE = 1, // Generic Failure
|
||||
HASH_EXIT_MISMATCH = 2, // Hash mismatched during validation
|
||||
HASH_EXIT_MANIFEST_MISSING_ENTRY = 3, // Hash attempted validation but missing entry in manifest
|
||||
HASH_EXIT_MANIFEST_UNKNOWN_HASH = 4, // Manifest is present, but we do not know any hash format within it
|
||||
HASH_EXIT_MANIFEST_FILE_ERROR = 5 // Manifest is either missing or not a known format
|
||||
} hash_exit_code_t;
|
||||
|
||||
|
||||
typedef enum {
|
||||
HASH_MANIFEST_NOT_FOUND,
|
||||
HASH_MANIFEST_MISMATCH,
|
||||
HASH_MANIFEST_OK,
|
||||
} hash_manifest_result_t;
|
||||
|
||||
|
||||
struct hash_params {
|
||||
std::string input;
|
||||
bool xxh64 = false;
|
||||
bool sha1 = false;
|
||||
bool uuid = false;
|
||||
bool sha256 = false;
|
||||
bool uuid = false;
|
||||
|
||||
bool no_layer = false;
|
||||
|
||||
bool manifest_is_usable = false;
|
||||
std::string manifest_file;
|
||||
};
|
||||
|
||||
struct manifest_check_params {
|
||||
bool xxh64 = false;
|
||||
bool sha1 = false;
|
||||
bool sha256 = false;
|
||||
bool uuid = false;
|
||||
};
|
||||
|
||||
static char const * hash_manifest_result_to_str(hash_manifest_result_t value) {
|
||||
switch (value) {
|
||||
case HASH_MANIFEST_NOT_FOUND: return "Not Found";
|
||||
case HASH_MANIFEST_MISMATCH: return "Mismatch";
|
||||
case HASH_MANIFEST_OK: return "Ok";
|
||||
}
|
||||
return "?";
|
||||
}
|
||||
|
||||
static char const * hash_exit_code_to_str(hash_exit_code_t value) {
|
||||
switch (value) {
|
||||
case HASH_EXIT_SUCCESS: return "Success";
|
||||
case HASH_EXIT_FAILURE: return "Failure";
|
||||
case HASH_EXIT_MISMATCH: return "Mismatch";
|
||||
case HASH_EXIT_MANIFEST_MISSING_ENTRY: return "Manifest Missing Entry";
|
||||
case HASH_EXIT_MANIFEST_UNKNOWN_HASH: return "Manifest Unknown Hash";
|
||||
case HASH_EXIT_MANIFEST_FILE_ERROR: return "Manifest File Error";
|
||||
}
|
||||
return "?";
|
||||
}
|
||||
|
||||
static void hash_print_usage(const char * executable) {
|
||||
const hash_params default_params;
|
||||
printf("\n");
|
||||
|
@ -45,15 +102,19 @@ static void hash_print_usage(const char * executable) {
|
|||
printf("\n");
|
||||
printf("options:\n");
|
||||
printf(" -h, --help show this help message and exit\n");
|
||||
printf(" --xxh64 use xxh64\n");
|
||||
printf(" --sha1 use sha1\n");
|
||||
printf(" --uuid use uuid\n");
|
||||
printf(" --sha256 use sha256\n");
|
||||
printf(" --xxh64 use xxh64 hash\n");
|
||||
printf(" --sha1 use sha1 hash\n");
|
||||
printf(" --sha256 use sha256 hash\n");
|
||||
printf(" --all use all hash\n");
|
||||
printf(" --no-layer exclude per layer hash\n");
|
||||
printf(" --uuid generate UUIDv5 ID\n");
|
||||
printf(" -c, --check <manifest> verify against a manifest\n");
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static void hash_params_parse_ex(int argc, const char ** argv, hash_params & params) {
|
||||
std::string arg;
|
||||
bool invalid_param = false;
|
||||
const std::string arg_prefix = "--";
|
||||
|
||||
int arg_idx = 1;
|
||||
|
@ -89,18 +150,34 @@ static void hash_params_parse_ex(int argc, const char ** argv, hash_params & par
|
|||
params.sha256 = true;
|
||||
}
|
||||
|
||||
if (arg == "--all") {
|
||||
arg_found = true;
|
||||
params.sha256 = true;
|
||||
params.sha1 = true;
|
||||
params.xxh64 = true;
|
||||
}
|
||||
|
||||
if (arg == "--no-layer") {
|
||||
arg_found = true;
|
||||
params.no_layer = true;
|
||||
}
|
||||
|
||||
if (arg == "-c" || arg == "--check") {
|
||||
if (++arg_idx >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
arg_found = true;
|
||||
params.manifest_file = argv[arg_idx];
|
||||
}
|
||||
|
||||
if (!arg_found) {
|
||||
throw std::invalid_argument("error: unknown argument: " + arg);
|
||||
}
|
||||
}
|
||||
|
||||
if (!params.xxh64
|
||||
&& !params.sha1
|
||||
&& !params.uuid
|
||||
&& !params.sha256
|
||||
) {
|
||||
// By default if no swich argument provided, assume xxh64
|
||||
params.xxh64 = true;
|
||||
if (invalid_param) {
|
||||
throw std::invalid_argument("error: invalid parameter for argument:" + arg);
|
||||
}
|
||||
|
||||
if (argc - arg_idx < 1) {
|
||||
|
@ -123,7 +200,89 @@ static bool hash_params_parse(int argc, const char ** argv, hash_params & params
|
|||
return result;
|
||||
}
|
||||
|
||||
static bool gguf_hash(const hash_params & hash_params) {
|
||||
static bool manifest_type(const std::string & manifest_file, manifest_check_params & manifest_check) {
|
||||
if (manifest_file.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::ifstream file(manifest_file);
|
||||
if (!file.is_open()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string manifest_entry_line;
|
||||
while (getline(file, manifest_entry_line)) {
|
||||
// hash_type_str hash_str tensor_name
|
||||
// e.g. 'xxh64 f66e9cd66a4396a0 test.gguf:tensor_0'
|
||||
std::istringstream line_stream(manifest_entry_line);
|
||||
std::string file_hash_type;
|
||||
if (line_stream >> file_hash_type) {
|
||||
if (file_hash_type == HASH_TYPE_SHA256_STR) {
|
||||
manifest_check.sha256 = true;
|
||||
} else if (file_hash_type == HASH_TYPE_SHA1_STR) {
|
||||
manifest_check.sha1 = true;
|
||||
} else if (file_hash_type == HASH_TYPE_XXH64_STR) {
|
||||
manifest_check.xxh64 = true;
|
||||
} else if (file_hash_type == HASH_TYPE_UUID_STR) {
|
||||
manifest_check.uuid = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static hash_manifest_result_t manifest_verify(const std::string& manifest_file, const std::string& hash_type_str, const std::string& hash_str, const std::string& tensor_name) {
|
||||
if (manifest_file.empty()) {
|
||||
return HASH_MANIFEST_NOT_FOUND;
|
||||
}
|
||||
|
||||
std::ifstream file(manifest_file);
|
||||
if (!file.is_open()) {
|
||||
return HASH_MANIFEST_NOT_FOUND;
|
||||
}
|
||||
|
||||
std::string manifest_entry_line;
|
||||
while (getline(file, manifest_entry_line)) {
|
||||
std::istringstream line_stream(manifest_entry_line);
|
||||
std::string file_hash_type;
|
||||
std::string file_hash;
|
||||
std::string file_tensor_name;
|
||||
if (line_stream >> file_hash_type >> file_hash >> file_tensor_name) {
|
||||
// Line parsed. Check hash validity
|
||||
|
||||
if (file_hash_type != hash_type_str) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (file_tensor_name != tensor_name) {
|
||||
continue;
|
||||
}
|
||||
|
||||
return (file_hash == hash_str) ? HASH_MANIFEST_OK : HASH_MANIFEST_MISMATCH;
|
||||
}
|
||||
}
|
||||
|
||||
return HASH_MANIFEST_NOT_FOUND;
|
||||
}
|
||||
|
||||
static void generate_uuidv5(const unsigned char sha1_digest[20], unsigned char uuid[16]) {
|
||||
// Ref: https://www.rfc-editor.org/rfc/rfc9562.html#section-5.5
|
||||
// Assumes that digest was processed correctly with the expected namespace
|
||||
for (int i = 0; i < 16; i++) {
|
||||
uuid[i] = sha1_digest[i];
|
||||
}
|
||||
|
||||
// Set bits corresponding to UUID ver 5
|
||||
uuid[ 6] &= ~(0xF << 4);
|
||||
uuid[ 6] |= (5 << 4);
|
||||
|
||||
// Set bits corresponding to UUID variant 0b10XX
|
||||
uuid[ 8] &= ~(0xc << 4);
|
||||
uuid[ 8] |= (0x8 << 4);
|
||||
}
|
||||
|
||||
static hash_exit_code_t gguf_hash(const hash_params & hash_params) {
|
||||
const std::string & fname = hash_params.input;
|
||||
struct ggml_context * ctx_data = NULL;
|
||||
|
||||
|
@ -158,16 +317,30 @@ static bool gguf_hash(const hash_params & hash_params) {
|
|||
sha256_init(&sha256_model_hash_ctx);
|
||||
}
|
||||
|
||||
// sha1 for uuid init
|
||||
SHA1_CTX sha1_for_uuid_ctx;
|
||||
if (hash_params.uuid) {
|
||||
unsigned char const uuidv5_namespace[] = {UUID_NAMESPACE_LLAMA_CPP_HEX};
|
||||
SHA1Init(&sha1_for_uuid_ctx);
|
||||
SHA1Update( &sha1_for_uuid_ctx, (unsigned char const *)uuidv5_namespace, sizeof(uuidv5_namespace));
|
||||
}
|
||||
|
||||
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
|
||||
const int n_tensors = gguf_get_n_tensors(ctx);
|
||||
bool tensor_layer_in_manifest = false;
|
||||
bool model_in_manifest = false;
|
||||
bool tensor_layer_has_mismatch = false;
|
||||
bool model_has_mismatch = false;
|
||||
for (int i = 0; i < n_tensors; ++i) {
|
||||
const char * name = gguf_get_tensor_name(ctx, i);
|
||||
struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
|
||||
auto n_bytes = ggml_nbytes(cur);
|
||||
auto *raw_data = cur->data;
|
||||
const std::string tensor_layer_name = fname + ":" + name;
|
||||
|
||||
if (hash_params.xxh64) {
|
||||
|
||||
if (!hash_params.no_layer) {
|
||||
// Per Layer Hash
|
||||
XXH64_hash_t hash = XXH64(raw_data, n_bytes, 0);
|
||||
|
||||
|
@ -177,7 +350,26 @@ static bool gguf_hash(const hash_params & hash_params) {
|
|||
sprintf( ( hex_result + (2*offset)), "%02x", (unsigned char) (hash >> shift_bits_by)&0xff);
|
||||
}
|
||||
|
||||
printf("xxh64 %s %s:%s\n", hex_result, fname.c_str(), name);
|
||||
if (hash_params.manifest_is_usable) {
|
||||
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_XXH64_STR, hex_result, tensor_layer_name);
|
||||
|
||||
switch (verify_result) {
|
||||
case HASH_MANIFEST_NOT_FOUND:
|
||||
break;
|
||||
case HASH_MANIFEST_MISMATCH:
|
||||
tensor_layer_in_manifest = true;
|
||||
tensor_layer_has_mismatch = true;
|
||||
break;
|
||||
case HASH_MANIFEST_OK:
|
||||
tensor_layer_in_manifest = true;
|
||||
break;
|
||||
}
|
||||
|
||||
printf("%-8s %-s %s - %s\n", HASH_TYPE_XXH64_STR, hex_result, tensor_layer_name.c_str(), hash_manifest_result_to_str(verify_result));
|
||||
} else {
|
||||
printf("%-8s %-s %s\n", HASH_TYPE_XXH64_STR, hex_result, tensor_layer_name.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
// Overall Model Hash
|
||||
if (XXH64_update(xxh64_model_hash_state, raw_data, n_bytes) == XXH_ERROR) abort();
|
||||
|
@ -185,6 +377,7 @@ static bool gguf_hash(const hash_params & hash_params) {
|
|||
|
||||
if (hash_params.sha1) {
|
||||
|
||||
if (!hash_params.no_layer) {
|
||||
// Per Layer Hash
|
||||
char result[21]; // sha1 outputs 20 bytes
|
||||
SHA1( result, (const char *)raw_data, n_bytes);
|
||||
|
@ -194,7 +387,26 @@ static bool gguf_hash(const hash_params & hash_params) {
|
|||
sprintf( ( hex_result + (2*offset)), "%02x", result[offset]&0xff);
|
||||
}
|
||||
|
||||
printf("sha1 %s %s:%s\n", hex_result, fname.c_str(), name);
|
||||
if (hash_params.manifest_is_usable) {
|
||||
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_SHA1_STR, hex_result, tensor_layer_name);
|
||||
|
||||
switch (verify_result) {
|
||||
case HASH_MANIFEST_NOT_FOUND:
|
||||
break;
|
||||
case HASH_MANIFEST_MISMATCH:
|
||||
tensor_layer_in_manifest = true;
|
||||
tensor_layer_has_mismatch = true;
|
||||
break;
|
||||
case HASH_MANIFEST_OK:
|
||||
tensor_layer_in_manifest = true;
|
||||
break;
|
||||
}
|
||||
|
||||
printf("%-8s %-s %s - %s\n", HASH_TYPE_SHA1_STR, hex_result, tensor_layer_name.c_str(), hash_manifest_result_to_str(verify_result));
|
||||
} else {
|
||||
printf("%-8s %-s %s\n", HASH_TYPE_SHA1_STR, hex_result, tensor_layer_name.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
// Overall Model Hash
|
||||
SHA1Update( &sha1_model_hash_ctx, (unsigned char const *)raw_data, n_bytes);
|
||||
|
@ -202,6 +414,7 @@ static bool gguf_hash(const hash_params & hash_params) {
|
|||
|
||||
if (hash_params.sha256) {
|
||||
|
||||
if (!hash_params.no_layer) {
|
||||
// Per Layer Hash
|
||||
unsigned char result[SHA256_DIGEST_SIZE]; // sha256 outputs 32 bytes
|
||||
sha256_hash((unsigned char*) result, (const unsigned char *)raw_data, n_bytes);
|
||||
|
@ -211,11 +424,34 @@ static bool gguf_hash(const hash_params & hash_params) {
|
|||
sprintf( ( hex_result + (2*offset)), "%02x", result[offset]&0xff);
|
||||
}
|
||||
|
||||
printf("sha256 %s %s:%s\n", hex_result, fname.c_str(), name);
|
||||
if (hash_params.manifest_is_usable) {
|
||||
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_SHA256_STR, hex_result, tensor_layer_name);
|
||||
|
||||
switch (verify_result) {
|
||||
case HASH_MANIFEST_NOT_FOUND:
|
||||
break;
|
||||
case HASH_MANIFEST_MISMATCH:
|
||||
tensor_layer_in_manifest = true;
|
||||
tensor_layer_has_mismatch = true;
|
||||
break;
|
||||
case HASH_MANIFEST_OK:
|
||||
tensor_layer_in_manifest = true;
|
||||
break;
|
||||
}
|
||||
|
||||
printf("%-8s %-s %s - %s\n", HASH_TYPE_SHA256_STR, hex_result, tensor_layer_name.c_str(), hash_manifest_result_to_str(verify_result));
|
||||
} else {
|
||||
printf("%-8s %-s %s\n", HASH_TYPE_SHA256_STR, hex_result, tensor_layer_name.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
// Overall Model Hash
|
||||
sha256_update( &sha256_model_hash_ctx, (unsigned char const *)raw_data, n_bytes);
|
||||
}
|
||||
|
||||
if (hash_params.uuid) {
|
||||
SHA1Update( &sha1_for_uuid_ctx, (unsigned char const *)raw_data, n_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
if (hash_params.xxh64) {
|
||||
|
@ -227,7 +463,25 @@ static bool gguf_hash(const hash_params & hash_params) {
|
|||
sprintf( ( hex_result + (2*offset)), "%02x", (unsigned char) (hash >> shift_bits_by)&0xff);
|
||||
}
|
||||
|
||||
printf("xxh64 %s %s\n", hex_result, fname.c_str());
|
||||
if (hash_params.manifest_is_usable) {
|
||||
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_XXH64_STR, hex_result, fname);
|
||||
|
||||
switch (verify_result) {
|
||||
case HASH_MANIFEST_NOT_FOUND:
|
||||
break;
|
||||
case HASH_MANIFEST_MISMATCH:
|
||||
model_in_manifest = true;
|
||||
model_has_mismatch = true;
|
||||
break;
|
||||
case HASH_MANIFEST_OK:
|
||||
model_in_manifest = true;
|
||||
break;
|
||||
}
|
||||
|
||||
printf("%-8s %-s %s - %s\n", HASH_TYPE_XXH64_STR, hex_result, fname.c_str(), hash_manifest_result_to_str(verify_result));
|
||||
} else {
|
||||
printf("%-8s %-s %s\n", HASH_TYPE_XXH64_STR, hex_result, fname.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
if (hash_params.sha1) {
|
||||
|
@ -239,7 +493,25 @@ static bool gguf_hash(const hash_params & hash_params) {
|
|||
sprintf( ( hex_result + (2*offset)), "%02x", result[offset]&0xff);
|
||||
}
|
||||
|
||||
printf("sha1 %s %s\n", hex_result, fname.c_str());
|
||||
if (hash_params.manifest_is_usable) {
|
||||
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_SHA1_STR, hex_result, fname);
|
||||
|
||||
switch (verify_result) {
|
||||
case HASH_MANIFEST_NOT_FOUND:
|
||||
break;
|
||||
case HASH_MANIFEST_MISMATCH:
|
||||
model_in_manifest = true;
|
||||
model_has_mismatch = true;
|
||||
break;
|
||||
case HASH_MANIFEST_OK:
|
||||
model_in_manifest = true;
|
||||
break;
|
||||
}
|
||||
|
||||
printf("%-8s %-s %s - %s\n", HASH_TYPE_SHA1_STR, hex_result, fname.c_str(), hash_manifest_result_to_str(verify_result));
|
||||
} else {
|
||||
printf("%-8s %-s %s\n", HASH_TYPE_SHA1_STR, hex_result, fname.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
if (hash_params.sha256) {
|
||||
|
@ -251,63 +523,30 @@ static bool gguf_hash(const hash_params & hash_params) {
|
|||
sprintf( ( hex_result + (2*offset)), "%02x", result[offset]&0xff);
|
||||
}
|
||||
|
||||
printf("sha256 %s %s\n", hex_result, fname.c_str());
|
||||
if (hash_params.manifest_is_usable) {
|
||||
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_SHA256_STR, hex_result, fname);
|
||||
|
||||
switch (verify_result) {
|
||||
case HASH_MANIFEST_NOT_FOUND:
|
||||
break;
|
||||
case HASH_MANIFEST_MISMATCH:
|
||||
model_in_manifest = true;
|
||||
model_has_mismatch = true;
|
||||
break;
|
||||
case HASH_MANIFEST_OK:
|
||||
model_in_manifest = true;
|
||||
break;
|
||||
}
|
||||
|
||||
ggml_free(ctx_data);
|
||||
gguf_free(ctx);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void generate_uuidv5(const unsigned char sha1_digest[20], unsigned char uuid[16]) {
|
||||
// Ref: https://www.rfc-editor.org/rfc/rfc9562.html#section-5.5
|
||||
// Assumes that digest was processed correctly with the expected namespace
|
||||
for (int i = 0; i < 16; i++) {
|
||||
uuid[i] = sha1_digest[i];
|
||||
}
|
||||
|
||||
// Set bits corresponding to UUID ver 5
|
||||
uuid[ 6] &= ~(0xF << 4);
|
||||
uuid[ 6] |= (5 << 4);
|
||||
|
||||
// Set bits corresponding to UUID variant 0b10XX
|
||||
uuid[ 8] &= ~(0xc << 4);
|
||||
uuid[ 8] |= (0x8 << 4);
|
||||
}
|
||||
|
||||
static bool gguf_uuid(const hash_params & hash_params) {
|
||||
if (!hash_params.uuid) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const std::string & fname = hash_params.input;
|
||||
struct ggml_context * ctx_data = NULL;
|
||||
|
||||
struct gguf_init_params params = {
|
||||
/*.no_alloc = */ false,
|
||||
/*.ctx = */ &ctx_data,
|
||||
};
|
||||
|
||||
// sha1 init
|
||||
SHA1_CTX sha1_model_hash_ctx;
|
||||
SHA1Init(&sha1_model_hash_ctx);
|
||||
|
||||
unsigned char const uuidv5_namespace[] = {UUID_NAMESPACE_LLAMA_CPP_HEX};
|
||||
SHA1Update( &sha1_model_hash_ctx, (unsigned char const *)uuidv5_namespace, sizeof(uuidv5_namespace));
|
||||
|
||||
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
|
||||
const int n_tensors = gguf_get_n_tensors(ctx);
|
||||
for (int i = 0; i < n_tensors; ++i) {
|
||||
const char * name = gguf_get_tensor_name(ctx, i);
|
||||
struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
|
||||
auto n_bytes = ggml_nbytes(cur);
|
||||
auto *raw_data = cur->data;
|
||||
SHA1Update( &sha1_model_hash_ctx, (unsigned char const *)raw_data, n_bytes);
|
||||
printf("%-8s %-s %s - %s\n", HASH_TYPE_SHA256_STR, hex_result, fname.c_str(), hash_manifest_result_to_str(verify_result));
|
||||
} else {
|
||||
printf("%-8s %-s %s\n", HASH_TYPE_SHA256_STR, hex_result, fname.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
if (hash_params.uuid) {
|
||||
unsigned char result[21];
|
||||
SHA1Final(result, &sha1_model_hash_ctx);
|
||||
SHA1Final(result, &sha1_for_uuid_ctx);
|
||||
|
||||
unsigned char uuid[16];
|
||||
generate_uuidv5(result, uuid);
|
||||
|
@ -318,20 +557,137 @@ static bool gguf_uuid(const hash_params & hash_params) {
|
|||
uuid[4], uuid[5], uuid[6], uuid[7],
|
||||
uuid[8], uuid[9], uuid[10], uuid[11],
|
||||
uuid[12], uuid[13], uuid[14], uuid[15]);
|
||||
printf("UUIDv5 %s %s\n", string_buffer, fname.c_str());
|
||||
|
||||
if (hash_params.manifest_is_usable) {
|
||||
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_SHA256_STR, string_buffer, fname);
|
||||
|
||||
switch (verify_result) {
|
||||
case HASH_MANIFEST_NOT_FOUND:
|
||||
break;
|
||||
case HASH_MANIFEST_MISMATCH:
|
||||
model_in_manifest = true;
|
||||
model_has_mismatch = true;
|
||||
break;
|
||||
case HASH_MANIFEST_OK:
|
||||
model_in_manifest = true;
|
||||
break;
|
||||
}
|
||||
|
||||
printf("%-8s %-s %s - %s\n", HASH_TYPE_UUID_STR, string_buffer, fname.c_str(), hash_manifest_result_to_str(verify_result));
|
||||
} else {
|
||||
printf("%-8s %-s %s\n", HASH_TYPE_UUID_STR, string_buffer, fname.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
ggml_free(ctx_data);
|
||||
gguf_free(ctx);
|
||||
|
||||
return true;
|
||||
|
||||
if (hash_params.manifest_is_usable) {
|
||||
// In hash verification mode
|
||||
|
||||
if (!model_in_manifest) {
|
||||
// model missing in manifest?
|
||||
|
||||
// Check tensor layer...
|
||||
if (!tensor_layer_in_manifest) {
|
||||
// Still missing? Maybe we are reading the wrong manifest.
|
||||
return HASH_EXIT_MANIFEST_MISSING_ENTRY;
|
||||
}
|
||||
|
||||
if (tensor_layer_has_mismatch) {
|
||||
// Per tensor check found error
|
||||
return HASH_EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// All per tensor layer checks passed? Sounds good enough.
|
||||
return HASH_EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
// Overall model check passed, but let's check per layer just in case
|
||||
// If missing, we don't care too much as the overall model checked
|
||||
if (tensor_layer_in_manifest && tensor_layer_has_mismatch) {
|
||||
return HASH_EXIT_FAILURE;
|
||||
}
|
||||
|
||||
if (model_has_mismatch) {
|
||||
// model has failed hash somewhere in the model
|
||||
return HASH_EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// All checks appears to be fine
|
||||
return HASH_EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
// In hash generation mode
|
||||
return HASH_EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
int main(int argc, const char ** argv) {
|
||||
hash_params params;
|
||||
manifest_check_params manifest_check;
|
||||
hash_params_parse(argc, argv, params);
|
||||
|
||||
gguf_hash(params);
|
||||
gguf_uuid(params);
|
||||
|
||||
return 0;
|
||||
if (!params.manifest_file.empty()) {
|
||||
if (!manifest_type(params.manifest_file, manifest_check)) {
|
||||
printf("ERROR cannot open manifest %s", params.manifest_file.c_str());
|
||||
return HASH_EXIT_MANIFEST_FILE_ERROR;
|
||||
}
|
||||
|
||||
if (!manifest_check.sha256 && !manifest_check.sha1 && !manifest_check.xxh64 && !manifest_check.uuid) {
|
||||
printf("ERROR manifest does not have any known hash format in %s", params.manifest_file.c_str());
|
||||
return HASH_EXIT_MANIFEST_UNKNOWN_HASH;
|
||||
}
|
||||
|
||||
printf("manifest %s", params.manifest_file.c_str());
|
||||
|
||||
if (manifest_check.sha256) {
|
||||
printf(" sha256");
|
||||
}
|
||||
|
||||
if (manifest_check.sha1) {
|
||||
printf(" sha1");
|
||||
}
|
||||
|
||||
if (manifest_check.xxh64) {
|
||||
printf(" xxh64");
|
||||
}
|
||||
|
||||
if (manifest_check.uuid) {
|
||||
printf(" uuid");
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
|
||||
// Autoselect the highest security hash if manifest is provided but
|
||||
// the user has not specifically defined the hash they care about
|
||||
if (!params.xxh64 && !params.sha1 && !params.uuid && !params.sha256) {
|
||||
// User has not selected a specific value, pick most secure hash
|
||||
if (manifest_check.sha256) {
|
||||
params.sha256 = true;
|
||||
} else if (manifest_check.sha1) {
|
||||
params.sha1 = true;
|
||||
} else if (manifest_check.xxh64) {
|
||||
params.xxh64 = true;
|
||||
} else if (manifest_check.uuid) {
|
||||
params.uuid = true;
|
||||
}
|
||||
}
|
||||
|
||||
params.manifest_is_usable = true;
|
||||
}
|
||||
|
||||
// By default if no swich argument provided, assume xxh64
|
||||
if (!params.xxh64 && !params.sha1 && !params.uuid && !params.sha256) {
|
||||
params.xxh64 = true;
|
||||
}
|
||||
|
||||
hash_exit_code_t exit_code = gguf_hash(params);
|
||||
|
||||
if (params.manifest_is_usable) {
|
||||
printf("\nVerification results for %s - %s\n", params.manifest_file.c_str(), hash_exit_code_to_str(exit_code));
|
||||
}
|
||||
|
||||
return exit_code;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue