llama-gguf-hash: add sha256

This commit is contained in:
brian khuu 2024-07-05 23:14:45 +10:00
parent ccc6ef7b3c
commit 5360799e85
9 changed files with 342 additions and 20 deletions

View file

@ -1181,13 +1181,17 @@ llama-gguf: examples/gguf/gguf.cpp \
examples/gguf-hash/deps/sha1/sha1.o: \ examples/gguf-hash/deps/sha1/sha1.o: \
examples/gguf-hash/deps/sha1/sha1.c examples/gguf-hash/deps/sha1/sha1.c
$(CC) $(CFLAGS) -c $< -o $@ $(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@
examples/gguf-hash/deps/xxhash/xxhash.o: \ examples/gguf-hash/deps/xxhash/xxhash.o: \
examples/gguf-hash/deps/xxhash/xxhash.c examples/gguf-hash/deps/xxhash/xxhash.c
$(CC) $(CFLAGS) -c $< -o $@ $(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@
llama-gguf-hash: examples/gguf-hash/gguf-hash.cpp examples/gguf-hash/deps/sha1/sha1.o examples/gguf-hash/deps/xxhash/xxhash.o\ examples/gguf-hash/deps/sha256/sha256.o: \
examples/gguf-hash/deps/sha256/sha256.c
$(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@
llama-gguf-hash: examples/gguf-hash/gguf-hash.cpp examples/gguf-hash/deps/sha1/sha1.o examples/gguf-hash/deps/xxhash/xxhash.o examples/gguf-hash/deps/sha256/sha256.o\
$(OBJ_ALL) $(OBJ_ALL)
$(CXX) $(CXXFLAGS) -Iexamples/gguf-hash/deps -c $< -o $(call GET_OBJ_FILE, $<) $(CXX) $(CXXFLAGS) -Iexamples/gguf-hash/deps -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)

View file

@ -8,6 +8,8 @@ add_library(xxhash OBJECT deps/xxhash/xxhash.c deps/xxhash/xxhash.h)
target_link_libraries(${TARGET} PRIVATE xxhash) target_link_libraries(${TARGET} PRIVATE xxhash)
add_library(sha1 OBJECT deps/sha1/sha1.c deps/sha1/sha1.h) add_library(sha1 OBJECT deps/sha1/sha1.c deps/sha1/sha1.h)
target_link_libraries(${TARGET} PRIVATE sha1) target_link_libraries(${TARGET} PRIVATE sha1)
add_library(sha256 OBJECT deps/sha256/sha256.c deps/sha256/sha256.h)
target_link_libraries(${TARGET} PRIVATE sha256)
target_link_libraries(${TARGET} PRIVATE ggml ${CMAKE_THREAD_LIBS_INIT}) target_link_libraries(${TARGET} PRIVATE ggml ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_11) target_compile_features(${TARGET} PRIVATE cxx_std_11)

View file

@ -7,6 +7,7 @@ CLI to hash GGUF files.
- `--xxhash`: use xhash (default) - `--xxhash`: use xhash (default)
- `--sha1`: use sha1 - `--sha1`: use sha1
- `--uuid`: use uuid - `--uuid`: use uuid
- `--sha256`: use sha256
### Compile Example ### Compile Example
@ -18,6 +19,7 @@ make -C build llama-gguf-hash VERBOSE=1
./build/bin/llama-gguf-hash --xxhash test.gguf ./build/bin/llama-gguf-hash --xxhash test.gguf
./build/bin/llama-gguf-hash --sha1 test.gguf ./build/bin/llama-gguf-hash --sha1 test.gguf
./build/bin/llama-gguf-hash --uuid test.gguf ./build/bin/llama-gguf-hash --uuid test.gguf
./build/bin/llama-gguf-hash --sha256 test.gguf
``` ```
### Crypto/Hash Libraries Used ### Crypto/Hash Libraries Used
@ -26,3 +28,4 @@ These micro c libraries dependencies was installed via the [clib c package manag
- https://github.com/mofosyne/xxHash - https://github.com/mofosyne/xxHash
- https://github.com/clibs/sha1/ - https://github.com/clibs/sha1/
- https://github.com/jb55/sha256.c

View file

@ -0,0 +1,13 @@
{
"name": "rotate-bits",
"version": "0.1.1",
"repo": "jb55/rotate-bits.h",
"description": "rotate bits",
"keywords": ["rotl", "rotr"],
"src": ["rotate-bits.h"],
"license": "Public Domain",
"development": {
"thlorenz/tap.c": "*"
}
}

View file

@ -0,0 +1,46 @@
#ifndef __ROTATE_DEFS_H
#define __ROTATE_DEFS_H
#ifdef _MSC_VER
#include <stdlib.h>
#define ROTL32(v, n) _rotl((v), (n))
#define ROTL64(v, n) _rotl64((v), (n))
#define ROTR32(v, n) _rotr((v), (n))
#define ROTR64(v, n) _rotr64((v), (n))
#else
#include <stdint.h>
#define U8V(v) ((uint8_t)(v) & 0xFFU)
#define U16V(v) ((uint16_t)(v) & 0xFFFFU)
#define U32V(v) ((uint32_t)(v) & 0xFFFFFFFFU)
#define U64V(v) ((uint64_t)(v) & 0xFFFFFFFFFFFFFFFFU)
#define ROTL32(v, n) \
(U32V((uint32_t)(v) << (n)) | ((uint32_t)(v) >> (32 - (n))))
// tests fail if we don't have this cast...
#define ROTL64(v, n) \
(U64V((uint64_t)(v) << (n)) | ((uint64_t)(v) >> (64 - (n))))
#define ROTR32(v, n) ROTL32(v, 32 - (n))
#define ROTR64(v, n) ROTL64(v, 64 - (n))
#endif
#define ROTL8(v, n) \
(U8V((uint8_t)(v) << (n)) | ((uint8_t)(v) >> (8 - (n))))
#define ROTL16(v, n) \
(U16V((uint16_t)(v) << (n)) | ((uint16_t)(v) >> (16 - (n))))
#define ROTR8(v, n) ROTL8(v, 8 - (n))
#define ROTR16(v, n) ROTL16(v, 16 - (n))
#endif

View file

@ -0,0 +1,15 @@
{
"name": "sha256",
"version": "0.0.2",
"repo": "jb55/sha256.c",
"description": "sha256 in c",
"keywords": ["sha256", "sha2"],
"src": ["sha256.c", "sha256.h"],
"dependencies": {
"jb55/rotate-bits.h": "0.1.1"
},
"development": {
"thlorenz/tap.c": "*"
}
}

View file

@ -0,0 +1,221 @@
/* Crypto/Sha256.c -- SHA-256 Hash
2010-06-11 : Igor Pavlov : Public domain
This code is based on public domain code from Wei Dai's Crypto++ library. */
#include "rotate-bits/rotate-bits.h"
#include "sha256.h"
/* define it for speed optimization */
#define _SHA256_UNROLL
#define _SHA256_UNROLL2
void
sha256_init(sha256_t *p)
{
p->state[0] = 0x6a09e667;
p->state[1] = 0xbb67ae85;
p->state[2] = 0x3c6ef372;
p->state[3] = 0xa54ff53a;
p->state[4] = 0x510e527f;
p->state[5] = 0x9b05688c;
p->state[6] = 0x1f83d9ab;
p->state[7] = 0x5be0cd19;
p->count = 0;
}
#define S0(x) (ROTR32(x, 2) ^ ROTR32(x,13) ^ ROTR32(x, 22))
#define S1(x) (ROTR32(x, 6) ^ ROTR32(x,11) ^ ROTR32(x, 25))
#define s0(x) (ROTR32(x, 7) ^ ROTR32(x,18) ^ (x >> 3))
#define s1(x) (ROTR32(x,17) ^ ROTR32(x,19) ^ (x >> 10))
#define blk0(i) (W[i] = data[i])
#define blk2(i) (W[i&15] += s1(W[(i-2)&15]) + W[(i-7)&15] + s0(W[(i-15)&15]))
#define Ch(x,y,z) (z^(x&(y^z)))
#define Maj(x,y,z) ((x&y)|(z&(x|y)))
#define a(i) T[(0-(i))&7]
#define b(i) T[(1-(i))&7]
#define c(i) T[(2-(i))&7]
#define d(i) T[(3-(i))&7]
#define e(i) T[(4-(i))&7]
#define f(i) T[(5-(i))&7]
#define g(i) T[(6-(i))&7]
#define h(i) T[(7-(i))&7]
#ifdef _SHA256_UNROLL2
#define R(a,b,c,d,e,f,g,h, i) h += S1(e) + Ch(e,f,g) + K[i+j] + (j?blk2(i):blk0(i));\
d += h; h += S0(a) + Maj(a, b, c)
#define RX_8(i) \
R(a,b,c,d,e,f,g,h, i); \
R(h,a,b,c,d,e,f,g, (i+1)); \
R(g,h,a,b,c,d,e,f, (i+2)); \
R(f,g,h,a,b,c,d,e, (i+3)); \
R(e,f,g,h,a,b,c,d, (i+4)); \
R(d,e,f,g,h,a,b,c, (i+5)); \
R(c,d,e,f,g,h,a,b, (i+6)); \
R(b,c,d,e,f,g,h,a, (i+7))
#else
#define R(i) h(i) += S1(e(i)) + Ch(e(i),f(i),g(i)) + K[i+j] + (j?blk2(i):blk0(i));\
d(i) += h(i); h(i) += S0(a(i)) + Maj(a(i), b(i), c(i))
#ifdef _SHA256_UNROLL
#define RX_8(i) R(i+0); R(i+1); R(i+2); R(i+3); R(i+4); R(i+5); R(i+6); R(i+7);
#endif
#endif
static const uint32_t K[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
static void
sha256_transform(uint32_t *state, const uint32_t *data)
{
uint32_t W[16];
unsigned j;
#ifdef _SHA256_UNROLL2
uint32_t a,b,c,d,e,f,g,h;
a = state[0];
b = state[1];
c = state[2];
d = state[3];
e = state[4];
f = state[5];
g = state[6];
h = state[7];
#else
uint32_t T[8];
for (j = 0; j < 8; j++)
T[j] = state[j];
#endif
for (j = 0; j < 64; j += 16)
{
#if defined(_SHA256_UNROLL) || defined(_SHA256_UNROLL2)
RX_8(0); RX_8(8);
#else
unsigned i;
for (i = 0; i < 16; i++) { R(i); }
#endif
}
#ifdef _SHA256_UNROLL2
state[0] += a;
state[1] += b;
state[2] += c;
state[3] += d;
state[4] += e;
state[5] += f;
state[6] += g;
state[7] += h;
#else
for (j = 0; j < 8; j++)
state[j] += T[j];
#endif
/* Wipe variables */
/* memset(W, 0, sizeof(W)); */
/* memset(T, 0, sizeof(T)); */
}
#undef S0
#undef S1
#undef s0
#undef s1
static void
sha256_write_byte_block(sha256_t *p)
{
uint32_t data32[16];
unsigned i;
for (i = 0; i < 16; i++)
data32[i] =
((uint32_t)(p->buffer[i * 4 ]) << 24) +
((uint32_t)(p->buffer[i * 4 + 1]) << 16) +
((uint32_t)(p->buffer[i * 4 + 2]) << 8) +
((uint32_t)(p->buffer[i * 4 + 3]));
sha256_transform(p->state, data32);
}
void
sha256_hash(unsigned char *buf, const unsigned char *data, size_t size)
{
sha256_t hash;
sha256_init(&hash);
sha256_update(&hash, data, size);
sha256_final(&hash, buf);
}
void
sha256_update(sha256_t *p, const unsigned char *data, size_t size)
{
uint32_t curBufferPos = (uint32_t)p->count & 0x3F;
while (size > 0)
{
p->buffer[curBufferPos++] = *data++;
p->count++;
size--;
if (curBufferPos == 64)
{
curBufferPos = 0;
sha256_write_byte_block(p);
}
}
}
void
sha256_final(sha256_t *p, unsigned char *digest)
{
uint64_t lenInBits = (p->count << 3);
uint32_t curBufferPos = (uint32_t)p->count & 0x3F;
unsigned i;
p->buffer[curBufferPos++] = 0x80;
while (curBufferPos != (64 - 8))
{
curBufferPos &= 0x3F;
if (curBufferPos == 0)
sha256_write_byte_block(p);
p->buffer[curBufferPos++] = 0;
}
for (i = 0; i < 8; i++)
{
p->buffer[curBufferPos++] = (unsigned char)(lenInBits >> 56);
lenInBits <<= 8;
}
sha256_write_byte_block(p);
for (i = 0; i < 8; i++)
{
*digest++ = (unsigned char)(p->state[i] >> 24);
*digest++ = (unsigned char)(p->state[i] >> 16);
*digest++ = (unsigned char)(p->state[i] >> 8);
*digest++ = (unsigned char)(p->state[i]);
}
sha256_init(p);
}

View file

@ -0,0 +1,24 @@
/* Sha256.h -- SHA-256 Hash
2010-06-11 : Igor Pavlov : Public domain */
#ifndef __CRYPTO_SHA256_H
#define __CRYPTO_SHA256_H
#include <stdlib.h>
#include <stdint.h>
#define SHA256_DIGEST_SIZE 32
typedef struct sha256_t
{
uint32_t state[8];
uint64_t count;
unsigned char buffer[64];
} sha256_t;
void sha256_init(sha256_t *p);
void sha256_update(sha256_t *p, const unsigned char *data, size_t size);
void sha256_final(sha256_t *p, unsigned char *digest);
void sha256_hash(unsigned char *buf, const unsigned char *data, size_t size);
#endif

View file

@ -9,25 +9,31 @@
#include <string.h> #include <string.h>
#ifdef __cplusplus
extern "C" {
#endif
#include "xxhash/xxhash.h" #include "xxhash/xxhash.h"
#include "sha1/sha1.h" #include "sha1/sha1.h"
#ifdef SHA256 // TODO: https://github.com/jb55/sha256.c
#include "sha256/sha256.h" #include "sha256/sha256.h"
#ifdef __cplusplus
}
#endif #endif
// uuid.uuid5(uuid.NAMESPACE_URL, 'en.wikipedia.org/wiki/Llama.cpp') // uuid.uuid5(uuid.NAMESPACE_URL, 'en.wikipedia.org/wiki/Llama.cpp')
#define UUID_NAMESPACE_LLAMA_CPP "ef001206-dadc-5f6d-a15f-3359e577d4e5" #define UUID_NAMESPACE_LLAMA_CPP "ef001206-dadc-5f6d-a15f-3359e577d4e5"
#define UUID_NAMESPACE_LLAMA_CPP_HEX 0xef, 0x00, 0x12, 0x06, 0xda, 0xdc, 0x5f, 0x6d, 0xa1, 0x5f, 0x33, 0x59, 0xe5, 0x77, 0xd4, 0xe5 #define UUID_NAMESPACE_LLAMA_CPP_HEX 0xef, 0x00, 0x12, 0x06, 0xda, 0xdc, 0x5f, 0x6d, 0xa1, 0x5f, 0x33, 0x59, 0xe5, 0x77, 0xd4, 0xe5
struct hash_params { struct hash_params {
std::string input; std::string input;
bool xxhash = false; bool xxhash = false;
bool sha1 = false; bool sha1 = false;
bool uuid = false; bool uuid = false;
#ifdef SHA256
bool sha256 = false; bool sha256 = false;
#endif
}; };
static void hash_print_usage(const char * executable) { static void hash_print_usage(const char * executable) {
@ -42,9 +48,7 @@ static void hash_print_usage(const char * executable) {
printf(" --xxhash use xxhash\n"); printf(" --xxhash use xxhash\n");
printf(" --sha1 use sha1\n"); printf(" --sha1 use sha1\n");
printf(" --uuid use uuid\n"); printf(" --uuid use uuid\n");
#ifdef SHA256
printf(" --sha256 use sha256\n"); printf(" --sha256 use sha256\n");
#endif
printf("\n"); printf("\n");
} }
@ -80,12 +84,10 @@ static void hash_params_parse_ex(int argc, const char ** argv, hash_params & par
params.uuid = true; params.uuid = true;
} }
#ifdef SHA256
if (arg == "--sha256") { if (arg == "--sha256") {
arg_found = true; arg_found = true;
params.sha256 = true; params.sha256 = true;
} }
#endif
if (!arg_found) { if (!arg_found) {
throw std::invalid_argument("error: unknown argument: " + arg); throw std::invalid_argument("error: unknown argument: " + arg);
@ -95,9 +97,7 @@ static void hash_params_parse_ex(int argc, const char ** argv, hash_params & par
if (!params.xxhash if (!params.xxhash
&& !params.sha1 && !params.sha1
&& !params.uuid && !params.uuid
#ifdef SHA256
&& !params.sha256 && !params.sha256
#endif
) { ) {
// By default if no swich argument provided, assume xxhash // By default if no swich argument provided, assume xxhash
params.xxhash = true; params.xxhash = true;
@ -152,13 +152,11 @@ static bool gguf_hash(const hash_params & hash_params) {
SHA1Init(&sha1_model_hash_ctx); SHA1Init(&sha1_model_hash_ctx);
} }
#ifdef SHA256
// sha256 init // sha256 init
sha256_t sha256_model_hash_ctx; sha256_t sha256_model_hash_ctx;
if (hash_params.sha256) { if (hash_params.sha256) {
sha256_init(&sha256_model_hash_ctx); sha256_init(&sha256_model_hash_ctx);
} }
#endif
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params); struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
const int n_tensors = gguf_get_n_tensors(ctx); const int n_tensors = gguf_get_n_tensors(ctx);
@ -202,12 +200,11 @@ static bool gguf_hash(const hash_params & hash_params) {
SHA1Update( &sha1_model_hash_ctx, (unsigned char const *)raw_data, n_bytes); SHA1Update( &sha1_model_hash_ctx, (unsigned char const *)raw_data, n_bytes);
} }
#ifdef SHA256
if (hash_params.sha256) { if (hash_params.sha256) {
// Per Layer Hash // Per Layer Hash
unsigned char result[SHA256_DIGEST_SIZE]; // sha256 outputs 32 bytes unsigned char result[SHA256_DIGEST_SIZE]; // sha256 outputs 32 bytes
sha256_hash( result, (const unsigned char *)raw_data, n_bytes); sha256_hash((unsigned char*) result, (const unsigned char *)raw_data, n_bytes);
char hex_result[SHA256_DIGEST_SIZE * 2 + 1] = {0}; char hex_result[SHA256_DIGEST_SIZE * 2 + 1] = {0};
for (int offset = 0; offset < SHA256_DIGEST_SIZE; offset++) { for (int offset = 0; offset < SHA256_DIGEST_SIZE; offset++) {
@ -219,7 +216,6 @@ static bool gguf_hash(const hash_params & hash_params) {
// Overall Model Hash // Overall Model Hash
sha256_update( &sha256_model_hash_ctx, (unsigned char const *)raw_data, n_bytes); sha256_update( &sha256_model_hash_ctx, (unsigned char const *)raw_data, n_bytes);
} }
#endif
} }
if (hash_params.xxhash) { if (hash_params.xxhash) {
@ -246,7 +242,6 @@ static bool gguf_hash(const hash_params & hash_params) {
printf("sha1 %s %s\n", hex_result, fname.c_str()); printf("sha1 %s %s\n", hex_result, fname.c_str());
} }
#ifdef SHA256
if (hash_params.sha256) { if (hash_params.sha256) {
unsigned char result[SHA256_DIGEST_SIZE]; // sha256 outputs 32 bytes unsigned char result[SHA256_DIGEST_SIZE]; // sha256 outputs 32 bytes
sha256_final( &sha256_model_hash_ctx, result); sha256_final( &sha256_model_hash_ctx, result);
@ -258,7 +253,6 @@ static bool gguf_hash(const hash_params & hash_params) {
printf("sha256 %s %s\n", hex_result, fname.c_str()); printf("sha256 %s %s\n", hex_result, fname.c_str());
} }
#endif
ggml_free(ctx_data); ggml_free(ctx_data);
gguf_free(ctx); gguf_free(ctx);