mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 03:27:39 +00:00
Incorporate some fixes and updates for GGML
This commit is contained in:
parent
b31ba86ace
commit
d9e27203d4
2 changed files with 24 additions and 0 deletions
18
third_party/ggml/ggml.c
vendored
18
third_party/ggml/ggml.c
vendored
|
@ -3641,6 +3641,24 @@ static void ggml_vec_dot_q8_0_q8_0(const int n, float * restrict s, const void *
|
|||
}
|
||||
|
||||
*s = vaddvq_f32(sumv0) + vaddvq_f32(sumv1);
|
||||
#elif defined(__AVX2__)
|
||||
// Initialize accumulator with zeros
|
||||
__m256 acc = _mm256_setzero_ps();
|
||||
|
||||
// Main loop
|
||||
for (int i = 0; i < nb; ++i) {
|
||||
// Compute combined scale for the block
|
||||
const __m256 d = _mm256_mul_ps( _mm256_broadcast_ss( &x[i].d ), _mm256_broadcast_ss( &y[i].d ) );
|
||||
__m256i bx = _mm256_loadu_si256((const __m256i *)x[i].qs);
|
||||
__m256i by = _mm256_loadu_si256((const __m256i *)y[i].qs);
|
||||
|
||||
const __m256 q = mul_sum_i8_pairs_float(bx, by);
|
||||
|
||||
// Multiply q with scale and accumulate
|
||||
acc = _mm256_fmadd_ps( d, q, acc );
|
||||
}
|
||||
|
||||
*s = hsum_float_8(acc);
|
||||
#else
|
||||
// scalar
|
||||
float sumf = 0.0;
|
||||
|
|
6
third_party/ggml/main.cc
vendored
6
third_party/ggml/main.cc
vendored
|
@ -52,6 +52,7 @@ asm(".include \"libc/disclaimer.inc\"");
|
|||
static console_state con_st;
|
||||
static llama_context ** g_ctx;
|
||||
|
||||
static int g_verbose;
|
||||
static bool is_interacting = false;
|
||||
|
||||
#define EPHEMERAL(fmt) "\r\e[K\033[1;35m" fmt " \033[0m"
|
||||
|
@ -64,6 +65,9 @@ void sigint_handler(int signo) {
|
|||
if (!is_interacting) {
|
||||
is_interacting=true;
|
||||
} else {
|
||||
if (g_verbose) {
|
||||
llama_print_timings(*g_ctx);
|
||||
}
|
||||
_exit(128 + signo);
|
||||
}
|
||||
}
|
||||
|
@ -108,6 +112,8 @@ int main(int argc, char ** argv) {
|
|||
win32_console_init(params.use_color);
|
||||
#endif
|
||||
|
||||
g_verbose = params.verbose;
|
||||
|
||||
if (params.perplexity) {
|
||||
printf("\n************\n");
|
||||
printf("%s: please use the 'perplexity' tool for perplexity calculations\n", __func__);
|
||||
|
|
Loading…
Reference in a new issue