Fix some more issues with aarch64 and llama.cpp

This commit is contained in:
Justine Tunney 2023-05-10 07:32:15 -07:00
parent 64aca4dc4f
commit 290a49952e
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
11 changed files with 168 additions and 65 deletions

View file

@ -25,7 +25,7 @@ struct gpt_params {
int32_t n_predict = 128; // new tokens to predict
int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions)
int32_t n_ctx = 512; // context size
int32_t n_batch = 32; // batch size for prompt processing (must be >=32 to use BLAS)
int32_t n_batch = 64; // batch size for prompt processing (must be >=32 to use BLAS)
int32_t n_keep = 0; // number of tokens to keep from initial prompt
// sampling parameters

View file

@ -42,6 +42,7 @@
#include "third_party/ggml/common.h"
#include "third_party/ggml/llama.h"
#include "third_party/ggml/llama_util.h"
#include "third_party/libcxx/atomic"
#include "third_party/libcxx/iostream"
#include "third_party/libcxx/string"
#include "third_party/libcxx/vector"
@ -52,26 +53,20 @@ Copyright (c) 2023 Georgi Gerganov\"");
asm(".include \"libc/disclaimer.inc\"");
// clang-format off
static console_state con_st;
static llama_context ** g_ctx;
static int g_verbose;
static bool is_interacting = false;
static std::atomic<bool> is_interacting;
static std::atomic<bool> is_terminated;
#define EPHEMERAL(fmt) "\r\e[K\033[1;35m" fmt " \033[0m"
void sigint_handler(int signo) {
if (signo == SIGINT) {
if (!is_interacting) {
is_interacting=true;
} else {
console_cleanup(con_st);
printf("\n");
if (g_verbose) {
llama_print_timings(*g_ctx);
}
_exit(128 + signo);
}
static void sigint_handler_batch(int signo) {
is_terminated = true;
}
static void sigint_handler_interactive(int signo) {
if (!is_interacting) {
is_interacting = true;
} else {
is_terminated = true;
}
}
@ -116,9 +111,9 @@ int main(int argc, char ** argv) {
// save choice to use color for later
// (note for later: this is a slightly awkward choice)
static console_state con_st;
con_st.use_color = params.use_color;
g_verbose = params.verbose;
con_st.multiline_input = params.multiline_input;
console_init(con_st);
atexit([]() { console_cleanup(con_st); });
@ -162,7 +157,6 @@ int main(int argc, char ** argv) {
llama_context * ctx;
struct stat model_stat;
g_ctx = &ctx;
// load the model and apply lora adapter, if any
ctx = llama_init_from_gpt_params(params);
@ -280,13 +274,18 @@ int main(int argc, char ** argv) {
fprintf(stderr, "\n");
}
// setup ctrl-c handler
struct sigaction sa;
sa.sa_flags = 0;
sigemptyset(&sa.sa_mask);
if (params.interactive) {
struct sigaction sigint_action;
sigint_action.sa_handler = sigint_handler;
sigemptyset (&sigint_action.sa_mask);
sigint_action.sa_flags = 0;
sigaction(SIGINT, &sigint_action, NULL);
sa.sa_handler = sigint_handler_interactive;
} else {
sa.sa_handler = sigint_handler_batch;
}
sigaction(SIGINT, &sa, NULL);
if (params.interactive) {
if (params.verbose) {
fprintf(stderr, "%s: interactive mode on.\n", __func__);
}
@ -483,7 +482,7 @@ int main(int argc, char ** argv) {
fprintf(stderr, EPHEMERAL("loading weights..."));
}
while (n_remain != 0 || params.interactive) {
while ((n_remain != 0 || params.interactive) && !is_terminated) {
// perform evaluation
if (embd.size() > 0) {
@ -872,6 +871,17 @@ int main(int argc, char ** argv) {
}
}
if (is_terminated) {
if (params.interactive) {
console_cleanup(con_st);
printf("\n");
}
if (params.verbose) {
llama_print_timings(ctx);
}
_exit(128 + SIGINT);
}
if (params.verbose) {
llama_print_timings(ctx);
}

View file

@ -731,7 +731,7 @@ void __cxx_atomic_store(__cxx_atomic_base_impl<_Tp>* __a, _Tp __val,
}
template <typename _Tp>
_LIBCPP_INLINE_VISIBILITY
_LIBCPP_INLINE_VISIBILITY inline
_Tp __cxx_atomic_load(const volatile __cxx_atomic_base_impl<_Tp>* __a,
memory_order __order) {
_Tp __ret;
@ -741,7 +741,7 @@ _Tp __cxx_atomic_load(const volatile __cxx_atomic_base_impl<_Tp>* __a,
}
template <typename _Tp>
_LIBCPP_INLINE_VISIBILITY
_LIBCPP_INLINE_VISIBILITY inline
_Tp __cxx_atomic_load(const __cxx_atomic_base_impl<_Tp>* __a, memory_order __order) {
_Tp __ret;
__atomic_load(&__a->__a_value, &__ret,