mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 11:37:35 +00:00
282dd8e7b7
make -j8 o//third_party/radpajama/radpajama.com make -j8 o//third_party/radpajama/radpajama-chat.com This change gets the radpajama.mk config working. This package depends on THIRD_PARTY_GGML but it's configured to call ggjt_v1(), so that the library will provide the old quantizers. The ggml_quantize_chunk() API will now dispatch to older quantizers based on the configured version.
165 lines
7.5 KiB
C
165 lines
7.5 KiB
C
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8 -*-│
|
|
│vi: set net ft=c ts=4 sts=4 sw=4 fenc=utf-8 :vi│
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
│ Copyright 2023 Justine Alexandra Roberts Tunney │
|
|
│ │
|
|
│ Permission to use, copy, modify, and/or distribute this software for │
|
|
│ any purpose with or without fee is hereby granted, provided that the │
|
|
│ above copyright notice and this permission notice appear in all copies. │
|
|
│ │
|
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
|
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
|
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
|
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
|
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
|
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
#include "libc/assert.h"
|
|
#include "third_party/ggml/ggjt.v1.q4_0.h"
|
|
#include "third_party/ggml/ggjt.v1.q4_1.h"
|
|
#include "third_party/ggml/ggjt.v1.q4_2.h"
|
|
#include "third_party/ggml/ggjt.v1.q5_0.h"
|
|
#include "third_party/ggml/ggjt.v1.q5_1.h"
|
|
#include "third_party/ggml/ggjt.v1.q8_0.h"
|
|
#include "third_party/ggml/ggjt.v1.q8_1.h"
|
|
#include "third_party/ggml/ggml.h"
|
|
// clang-format off
|
|
|
|
static const int ggjt_v1_blck_size[GGML_TYPE_COUNT] = {
|
|
[GGML_TYPE_F32] = 1,
|
|
[GGML_TYPE_F16] = 1,
|
|
[GGML_TYPE_Q4_0] = V1_QK4_0,
|
|
[GGML_TYPE_Q4_1] = V1_QK4_1,
|
|
[GGML_TYPE_Q4_2] = V1_QK4_2,
|
|
[GGML_TYPE_Q5_0] = V1_QK5_0,
|
|
[GGML_TYPE_Q5_1] = V1_QK5_1,
|
|
[GGML_TYPE_Q8_0] = V1_QK8_0,
|
|
[GGML_TYPE_Q8_1] = V1_QK8_1,
|
|
[GGML_TYPE_I8] = 1,
|
|
[GGML_TYPE_I16] = 1,
|
|
[GGML_TYPE_I32] = 1,
|
|
};
|
|
|
|
static const size_t ggjt_v1_type_size[GGML_TYPE_COUNT] = {
|
|
[GGML_TYPE_F32] = sizeof(float),
|
|
[GGML_TYPE_F16] = sizeof(ggml_fp16_t),
|
|
[GGML_TYPE_Q4_0] = sizeof(block_v1_q4_0),
|
|
[GGML_TYPE_Q4_1] = sizeof(block_v1_q4_1),
|
|
[GGML_TYPE_Q4_2] = sizeof(block_v1_q4_2),
|
|
[GGML_TYPE_Q5_0] = sizeof(block_v1_q5_0),
|
|
[GGML_TYPE_Q5_1] = sizeof(block_v1_q5_1),
|
|
[GGML_TYPE_Q8_0] = sizeof(block_v1_q8_0),
|
|
[GGML_TYPE_Q8_1] = sizeof(block_v1_q8_1),
|
|
[GGML_TYPE_I8] = sizeof(int8_t),
|
|
[GGML_TYPE_I16] = sizeof(int16_t),
|
|
[GGML_TYPE_I32] = sizeof(int32_t),
|
|
};
|
|
|
|
static const char *const ggjt_v1_type_name[GGML_TYPE_COUNT] = {
|
|
[GGML_TYPE_F32] = "f32",
|
|
[GGML_TYPE_F16] = "f16",
|
|
[GGML_TYPE_Q4_0] = "q4_0",
|
|
[GGML_TYPE_Q4_1] = "q4_1",
|
|
[GGML_TYPE_Q4_2] = "q4_2",
|
|
[GGML_TYPE_Q5_0] = "q5_0",
|
|
[GGML_TYPE_Q5_1] = "q5_1",
|
|
[GGML_TYPE_Q8_0] = "q8_0",
|
|
[GGML_TYPE_Q8_1] = "q8_1",
|
|
[GGML_TYPE_I8] = "i8",
|
|
[GGML_TYPE_I16] = "i16",
|
|
[GGML_TYPE_I32] = "i32",
|
|
};
|
|
|
|
static const bool ggjt_v1_is_quantized[GGML_TYPE_COUNT] = {
|
|
[GGML_TYPE_F32] = false,
|
|
[GGML_TYPE_F16] = false,
|
|
[GGML_TYPE_Q4_0] = true,
|
|
[GGML_TYPE_Q4_1] = true,
|
|
[GGML_TYPE_Q4_2] = true,
|
|
[GGML_TYPE_Q5_0] = true,
|
|
[GGML_TYPE_Q5_1] = true,
|
|
[GGML_TYPE_Q8_0] = true,
|
|
[GGML_TYPE_Q8_1] = true,
|
|
[GGML_TYPE_I8] = false,
|
|
[GGML_TYPE_I16] = false,
|
|
[GGML_TYPE_I32] = false,
|
|
};
|
|
|
|
static const quantize_chunk_f *const ggjt_v2_quantize_chunk[GGML_TYPE_COUNT] = {
|
|
[GGML_TYPE_Q4_0] = (void *)ggml_quantize_v1_q4_0,
|
|
[GGML_TYPE_Q4_1] = (void *)ggml_quantize_v1_q4_1,
|
|
[GGML_TYPE_Q4_2] = (void *)ggml_quantize_v1_q4_2,
|
|
[GGML_TYPE_Q5_0] = (void *)ggml_quantize_v1_q5_0,
|
|
[GGML_TYPE_Q5_1] = (void *)ggml_quantize_v1_q5_1,
|
|
[GGML_TYPE_Q8_0] = (void *)ggml_quantize_v1_q8_0,
|
|
};
|
|
|
|
static const quantize_fns_t ggjt_v1_quantize_fns[GGML_TYPE_COUNT] = {
|
|
[GGML_TYPE_Q4_0] = {
|
|
.dequantize_row_q = dequantize_row_v1_q4_0,
|
|
.quantize_row_q = quantize_row_v1_q4_0,
|
|
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_v1_q4_0_reference,
|
|
.quantize_row_q_dot = quantize_row_v1_q8_0,
|
|
.vec_dot_q = ggml_vec_dot_v1_q4_0_q8_0,
|
|
.vec_dot_type = GGML_TYPE_Q8_0,
|
|
},
|
|
[GGML_TYPE_Q4_1] = {
|
|
.dequantize_row_q = dequantize_row_v1_q4_1,
|
|
.quantize_row_q = quantize_row_v1_q4_1,
|
|
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_v1_q4_1_reference,
|
|
.quantize_row_q_dot = quantize_row_v1_q8_1,
|
|
.vec_dot_q = ggml_vec_dot_v1_q4_1_q8_1,
|
|
.vec_dot_type = GGML_TYPE_Q8_1,
|
|
},
|
|
[GGML_TYPE_Q4_2] = {
|
|
.dequantize_row_q = dequantize_row_v1_q4_2,
|
|
.quantize_row_q = quantize_row_v1_q4_2,
|
|
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_v1_q4_2_reference,
|
|
.quantize_row_q_dot = quantize_row_v1_q8_0,
|
|
.vec_dot_q = ggml_vec_dot_v1_q4_2_q8_0,
|
|
.vec_dot_type = GGML_TYPE_Q8_0,
|
|
},
|
|
[GGML_TYPE_Q5_0] = {
|
|
.dequantize_row_q = dequantize_row_v1_q5_0,
|
|
.quantize_row_q = quantize_row_v1_q5_0,
|
|
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_v1_q5_0_reference,
|
|
.quantize_row_q_dot = quantize_row_v1_q8_0,
|
|
.vec_dot_q = ggml_vec_dot_v1_q5_0_q8_0,
|
|
.vec_dot_type = GGML_TYPE_Q8_0,
|
|
},
|
|
[GGML_TYPE_Q5_1] = {
|
|
.dequantize_row_q = dequantize_row_v1_q5_1,
|
|
.quantize_row_q = quantize_row_v1_q5_1,
|
|
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_v1_q5_1_reference,
|
|
.quantize_row_q_dot = quantize_row_v1_q8_1,
|
|
.vec_dot_q = ggml_vec_dot_v1_q5_1_q8_1,
|
|
.vec_dot_type = GGML_TYPE_Q8_1,
|
|
},
|
|
[GGML_TYPE_Q8_0] = {
|
|
.dequantize_row_q = dequantize_row_v1_q8_0,
|
|
.quantize_row_q = quantize_row_v1_q8_0,
|
|
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_v1_q8_0_reference,
|
|
.quantize_row_q_dot = quantize_row_v1_q8_0,
|
|
.vec_dot_q = ggml_vec_dot_v1_q8_0_q8_0,
|
|
.vec_dot_type = GGML_TYPE_Q8_0,
|
|
},
|
|
[GGML_TYPE_Q8_1] = {
|
|
.dequantize_row_q = NULL, // TODO
|
|
.quantize_row_q = quantize_row_v1_q8_1,
|
|
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_v1_q8_1_reference,
|
|
.quantize_row_q_dot = quantize_row_v1_q8_1,
|
|
.vec_dot_q = NULL, // TODO
|
|
.vec_dot_type = GGML_TYPE_Q8_1,
|
|
},
|
|
};
|
|
|
|
void ggjt_v1(void) {
|
|
GGML_BLCK_SIZE = ggjt_v1_blck_size;
|
|
GGML_TYPE_SIZE = ggjt_v1_type_size;
|
|
GGML_TYPE_NAME = ggjt_v1_type_name;
|
|
GGML_IS_QUANTIZED = ggjt_v1_is_quantized;
|
|
quantize_fns = ggjt_v1_quantize_fns;
|
|
GGML_QUANTIZE_CHUNK = ggjt_v2_quantize_chunk;
|
|
}
|