gguf : deduplicate (#2629)

* gguf : better type names

* dedup : CPU + Metal is working

* ggml : fix warnings about unused results

* llama.cpp : fix line feed and compiler warning

* llama : fix strncpy warning + note token_to_str does not write null

* llama : restore the original load/save session implementation

Will migrate this to GGUF in the future

* convert-llama-h5-to-gguf.py : support alt ctx param name

* ggml : assert when using ggml_mul with non-F32 src1

* examples : dedup simple

---------

Co-authored-by: klosax <131523366+klosax@users.noreply.github.com>
This commit is contained in:
Georgi Gerganov 2023-08-16 19:25:29 +03:00 committed by GitHub
parent 758ff1bbb5
commit 88b5769487
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
21 changed files with 1630 additions and 7398 deletions

33
ggml.c
View file

@ -9140,6 +9140,8 @@ static void ggml_compute_forward_mul(
const struct ggml_tensor * src0,
const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
GGML_ASSERT(src1->type == GGML_TYPE_F32 && "only f32 src1 supported for now");
switch (src0->type) {
case GGML_TYPE_F32:
{
@ -18584,17 +18586,18 @@ static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = {
static_assert(GGUF_TYPE_COUNT == 10, "GGUF_TYPE_COUNT != 10");
static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
[GGUF_TYPE_UINT8] = "uint8",
[GGUF_TYPE_INT8] = "int8",
[GGUF_TYPE_UINT16] = "uint16",
[GGUF_TYPE_INT16] = "int16",
[GGUF_TYPE_UINT32] = "uint32",
[GGUF_TYPE_INT32] = "int32",
[GGUF_TYPE_FLOAT32] = "float32",
[GGUF_TYPE_UINT8] = "u8",
[GGUF_TYPE_INT8] = "i8",
[GGUF_TYPE_UINT16] = "u16",
[GGUF_TYPE_INT16] = "i16",
[GGUF_TYPE_UINT32] = "u32",
[GGUF_TYPE_INT32] = "i32",
[GGUF_TYPE_FLOAT32] = "f32",
[GGUF_TYPE_BOOL] = "bool",
[GGUF_TYPE_STRING] = "string",
[GGUF_TYPE_ARRAY] = "array",
[GGUF_TYPE_STRING] = "str",
[GGUF_TYPE_ARRAY] = "arr",
};
static_assert(GGUF_TYPE_COUNT == 10, "GGUF_TYPE_COUNT != 10");
union gguf_value {
uint8_t uint8;
@ -19395,17 +19398,23 @@ static void gguf_buf_grow(struct gguf_buf * buf, size_t size) {
static void gguf_bwrite_str(struct gguf_buf * buf, const struct gguf_str * val) {
gguf_buf_grow(buf, sizeof(val->n) + val->n);
buf->data && memcpy((char *) buf->data + buf->offset, &val->n, sizeof(val->n));
if (buf->data) {
memcpy((char *) buf->data + buf->offset, &val->n, sizeof(val->n));
}
buf->offset += sizeof(val->n);
buf->data && memcpy((char *) buf->data + buf->offset, val->data, val->n);
if (buf->data) {
memcpy((char *) buf->data + buf->offset, val->data, val->n);
}
buf->offset += val->n;
}
static void gguf_bwrite_el(struct gguf_buf * buf, const void * val, size_t el_size) {
gguf_buf_grow(buf, el_size);
buf->data && memcpy((char *) buf->data + buf->offset, val, el_size);
if (buf->data) {
memcpy((char *) buf->data + buf->offset, val, el_size);
}
buf->offset += el_size;
}