From d9e27203d4a064a68782d523666cbf9aa62990fc Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Fri, 28 Apr 2023 20:24:55 -0700 Subject: [PATCH] Incorporate some fixes and updates for GGML --- third_party/ggml/ggml.c | 18 ++++++++++++++++++ third_party/ggml/main.cc | 6 ++++++ 2 files changed, 24 insertions(+) diff --git a/third_party/ggml/ggml.c b/third_party/ggml/ggml.c index 4bdc97df5..b34ffa883 100644 --- a/third_party/ggml/ggml.c +++ b/third_party/ggml/ggml.c @@ -3641,6 +3641,24 @@ static void ggml_vec_dot_q8_0_q8_0(const int n, float * restrict s, const void * } *s = vaddvq_f32(sumv0) + vaddvq_f32(sumv1); +#elif defined(__AVX2__) + // Initialize accumulator with zeros + __m256 acc = _mm256_setzero_ps(); + + // Main loop + for (int i = 0; i < nb; ++i) { + // Compute combined scale for the block + const __m256 d = _mm256_mul_ps( _mm256_broadcast_ss( &x[i].d ), _mm256_broadcast_ss( &y[i].d ) ); + __m256i bx = _mm256_loadu_si256((const __m256i *)x[i].qs); + __m256i by = _mm256_loadu_si256((const __m256i *)y[i].qs); + + const __m256 q = mul_sum_i8_pairs_float(bx, by); + + // Multiply q with scale and accumulate + acc = _mm256_fmadd_ps( d, q, acc ); + } + + *s = hsum_float_8(acc); #else // scalar float sumf = 0.0; diff --git a/third_party/ggml/main.cc b/third_party/ggml/main.cc index 697220e64..e89c35eb2 100644 --- a/third_party/ggml/main.cc +++ b/third_party/ggml/main.cc @@ -52,6 +52,7 @@ asm(".include \"libc/disclaimer.inc\""); static console_state con_st; static llama_context ** g_ctx; +static int g_verbose; static bool is_interacting = false; #define EPHEMERAL(fmt) "\r\e[K\033[1;35m" fmt " \033[0m" @@ -64,6 +65,9 @@ void sigint_handler(int signo) { if (!is_interacting) { is_interacting=true; } else { + if (g_verbose) { + llama_print_timings(*g_ctx); + } _exit(128 + signo); } } @@ -108,6 +112,8 @@ int main(int argc, char ** argv) { win32_console_init(params.use_color); #endif + g_verbose = params.verbose; + if (params.perplexity) { printf("\n************\n"); printf("%s: please use the 'perplexity' tool for perplexity calculations\n", __func__);