cosmopolitan/third_party/ggml/ggjt.v1.c
Justine Tunney 282dd8e7b7
Get radpajama to build
make -j8 o//third_party/radpajama/radpajama.com
    make -j8 o//third_party/radpajama/radpajama-chat.com

This change gets the radpajama.mk config working. This package depends
on THIRD_PARTY_GGML but it's configured to call ggjt_v1(), so that the
library will provide the old quantizers. The ggml_quantize_chunk() API
will now dispatch to older quantizers based on the configured version.
2023-05-13 20:44:36 -07:00

165 lines
7.5 KiB
C

/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8 -*-│
│vi: set net ft=c ts=4 sts=4 sw=4 fenc=utf-8 :vi│
╞══════════════════════════════════════════════════════════════════════════════╡
│ Copyright 2023 Justine Alexandra Roberts Tunney │
│ │
│ Permission to use, copy, modify, and/or distribute this software for │
│ any purpose with or without fee is hereby granted, provided that the │
│ above copyright notice and this permission notice appear in all copies. │
│ │
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
│ PERFORMANCE OF THIS SOFTWARE. │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/assert.h"
#include "third_party/ggml/ggjt.v1.q4_0.h"
#include "third_party/ggml/ggjt.v1.q4_1.h"
#include "third_party/ggml/ggjt.v1.q4_2.h"
#include "third_party/ggml/ggjt.v1.q5_0.h"
#include "third_party/ggml/ggjt.v1.q5_1.h"
#include "third_party/ggml/ggjt.v1.q8_0.h"
#include "third_party/ggml/ggjt.v1.q8_1.h"
#include "third_party/ggml/ggml.h"
// clang-format off
static const int ggjt_v1_blck_size[GGML_TYPE_COUNT] = {
[GGML_TYPE_F32] = 1,
[GGML_TYPE_F16] = 1,
[GGML_TYPE_Q4_0] = V1_QK4_0,
[GGML_TYPE_Q4_1] = V1_QK4_1,
[GGML_TYPE_Q4_2] = V1_QK4_2,
[GGML_TYPE_Q5_0] = V1_QK5_0,
[GGML_TYPE_Q5_1] = V1_QK5_1,
[GGML_TYPE_Q8_0] = V1_QK8_0,
[GGML_TYPE_Q8_1] = V1_QK8_1,
[GGML_TYPE_I8] = 1,
[GGML_TYPE_I16] = 1,
[GGML_TYPE_I32] = 1,
};
static const size_t ggjt_v1_type_size[GGML_TYPE_COUNT] = {
[GGML_TYPE_F32] = sizeof(float),
[GGML_TYPE_F16] = sizeof(ggml_fp16_t),
[GGML_TYPE_Q4_0] = sizeof(block_v1_q4_0),
[GGML_TYPE_Q4_1] = sizeof(block_v1_q4_1),
[GGML_TYPE_Q4_2] = sizeof(block_v1_q4_2),
[GGML_TYPE_Q5_0] = sizeof(block_v1_q5_0),
[GGML_TYPE_Q5_1] = sizeof(block_v1_q5_1),
[GGML_TYPE_Q8_0] = sizeof(block_v1_q8_0),
[GGML_TYPE_Q8_1] = sizeof(block_v1_q8_1),
[GGML_TYPE_I8] = sizeof(int8_t),
[GGML_TYPE_I16] = sizeof(int16_t),
[GGML_TYPE_I32] = sizeof(int32_t),
};
static const char *const ggjt_v1_type_name[GGML_TYPE_COUNT] = {
[GGML_TYPE_F32] = "f32",
[GGML_TYPE_F16] = "f16",
[GGML_TYPE_Q4_0] = "q4_0",
[GGML_TYPE_Q4_1] = "q4_1",
[GGML_TYPE_Q4_2] = "q4_2",
[GGML_TYPE_Q5_0] = "q5_0",
[GGML_TYPE_Q5_1] = "q5_1",
[GGML_TYPE_Q8_0] = "q8_0",
[GGML_TYPE_Q8_1] = "q8_1",
[GGML_TYPE_I8] = "i8",
[GGML_TYPE_I16] = "i16",
[GGML_TYPE_I32] = "i32",
};
static const bool ggjt_v1_is_quantized[GGML_TYPE_COUNT] = {
[GGML_TYPE_F32] = false,
[GGML_TYPE_F16] = false,
[GGML_TYPE_Q4_0] = true,
[GGML_TYPE_Q4_1] = true,
[GGML_TYPE_Q4_2] = true,
[GGML_TYPE_Q5_0] = true,
[GGML_TYPE_Q5_1] = true,
[GGML_TYPE_Q8_0] = true,
[GGML_TYPE_Q8_1] = true,
[GGML_TYPE_I8] = false,
[GGML_TYPE_I16] = false,
[GGML_TYPE_I32] = false,
};
static const quantize_chunk_f *const ggjt_v2_quantize_chunk[GGML_TYPE_COUNT] = {
[GGML_TYPE_Q4_0] = (void *)ggml_quantize_v1_q4_0,
[GGML_TYPE_Q4_1] = (void *)ggml_quantize_v1_q4_1,
[GGML_TYPE_Q4_2] = (void *)ggml_quantize_v1_q4_2,
[GGML_TYPE_Q5_0] = (void *)ggml_quantize_v1_q5_0,
[GGML_TYPE_Q5_1] = (void *)ggml_quantize_v1_q5_1,
[GGML_TYPE_Q8_0] = (void *)ggml_quantize_v1_q8_0,
};
static const quantize_fns_t ggjt_v1_quantize_fns[GGML_TYPE_COUNT] = {
[GGML_TYPE_Q4_0] = {
.dequantize_row_q = dequantize_row_v1_q4_0,
.quantize_row_q = quantize_row_v1_q4_0,
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_v1_q4_0_reference,
.quantize_row_q_dot = quantize_row_v1_q8_0,
.vec_dot_q = ggml_vec_dot_v1_q4_0_q8_0,
.vec_dot_type = GGML_TYPE_Q8_0,
},
[GGML_TYPE_Q4_1] = {
.dequantize_row_q = dequantize_row_v1_q4_1,
.quantize_row_q = quantize_row_v1_q4_1,
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_v1_q4_1_reference,
.quantize_row_q_dot = quantize_row_v1_q8_1,
.vec_dot_q = ggml_vec_dot_v1_q4_1_q8_1,
.vec_dot_type = GGML_TYPE_Q8_1,
},
[GGML_TYPE_Q4_2] = {
.dequantize_row_q = dequantize_row_v1_q4_2,
.quantize_row_q = quantize_row_v1_q4_2,
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_v1_q4_2_reference,
.quantize_row_q_dot = quantize_row_v1_q8_0,
.vec_dot_q = ggml_vec_dot_v1_q4_2_q8_0,
.vec_dot_type = GGML_TYPE_Q8_0,
},
[GGML_TYPE_Q5_0] = {
.dequantize_row_q = dequantize_row_v1_q5_0,
.quantize_row_q = quantize_row_v1_q5_0,
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_v1_q5_0_reference,
.quantize_row_q_dot = quantize_row_v1_q8_0,
.vec_dot_q = ggml_vec_dot_v1_q5_0_q8_0,
.vec_dot_type = GGML_TYPE_Q8_0,
},
[GGML_TYPE_Q5_1] = {
.dequantize_row_q = dequantize_row_v1_q5_1,
.quantize_row_q = quantize_row_v1_q5_1,
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_v1_q5_1_reference,
.quantize_row_q_dot = quantize_row_v1_q8_1,
.vec_dot_q = ggml_vec_dot_v1_q5_1_q8_1,
.vec_dot_type = GGML_TYPE_Q8_1,
},
[GGML_TYPE_Q8_0] = {
.dequantize_row_q = dequantize_row_v1_q8_0,
.quantize_row_q = quantize_row_v1_q8_0,
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_v1_q8_0_reference,
.quantize_row_q_dot = quantize_row_v1_q8_0,
.vec_dot_q = ggml_vec_dot_v1_q8_0_q8_0,
.vec_dot_type = GGML_TYPE_Q8_0,
},
[GGML_TYPE_Q8_1] = {
.dequantize_row_q = NULL, // TODO
.quantize_row_q = quantize_row_v1_q8_1,
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_v1_q8_1_reference,
.quantize_row_q_dot = quantize_row_v1_q8_1,
.vec_dot_q = NULL, // TODO
.vec_dot_type = GGML_TYPE_Q8_1,
},
};
void ggjt_v1(void) {
GGML_BLCK_SIZE = ggjt_v1_blck_size;
GGML_TYPE_SIZE = ggjt_v1_type_size;
GGML_TYPE_NAME = ggjt_v1_type_name;
GGML_IS_QUANTIZED = ggjt_v1_is_quantized;
quantize_fns = ggjt_v1_quantize_fns;
GGML_QUANTIZE_CHUNK = ggjt_v2_quantize_chunk;
}