From 113e685d18ac4edb20f647fd34b000941556f6a6 Mon Sep 17 00:00:00 2001 From: hoangmit Date: Wed, 15 Mar 2023 15:05:14 -0400 Subject: [PATCH 01/93] inline -> static inline for "bytesFromNibbles" (#161) Without "static" prefix, it fails to compile in clang --- ggml.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml.c b/ggml.c index 42621267b..a0c0dd03b 100644 --- a/ggml.c +++ b/ggml.c @@ -364,7 +364,7 @@ static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float); #if __AVX2__ // Unpack 32 4-bit fields into 32 bytes // The output vector contains 32 bytes, each one in [ 0 .. 15 ] interval -inline __m256i bytesFromNibbles( const uint8_t* rsi ) +static inline __m256i bytesFromNibbles( const uint8_t* rsi ) { // Load 16 bytes from memory __m128i tmp = _mm_loadu_si128( ( const __m128i* )rsi ); @@ -381,7 +381,7 @@ inline __m256i bytesFromNibbles( const uint8_t* rsi ) return bytes; } -inline __m128i packNibbles( __m256i bytes ) +static inline __m128i packNibbles( __m256i bytes ) { // Move bits within 16-bit lanes from 0000_abcd_0000_efgh into 0000_0000_abcd_efgh const __m256i lowByte = _mm256_set1_epi16( 0xFF ); From 956dfda8ad8cea7961e22e0384bbc315bf79aed2 Mon Sep 17 00:00:00 2001 From: Ronsor Date: Wed, 15 Mar 2023 12:37:50 -0700 Subject: [PATCH 02/93] Use `tokenizer.vocab_size()` instead of hardcoding 32000 in convert-pth-to-ggml.py (#142) There are ways that special tokens or other new tokens could be added to the tokenizer; therefore it's probably best not to assume the vocabulary is only 32000 tokens. --- convert-pth-to-ggml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert-pth-to-ggml.py b/convert-pth-to-ggml.py index d2557500a..5c36e9c09 100644 --- a/convert-pth-to-ggml.py +++ b/convert-pth-to-ggml.py @@ -99,7 +99,7 @@ for p in range(n_parts): fout.write(struct.pack("i", ftype)) # Is this correct?? - for i in range(32000): + for i in range(tokenizer.vocab_size()): if tokenizer.is_unknown(i): # "" token (translated as ??) text = " \u2047 ".encode("utf-8") From 977295c700a2952c18400026d57467077dcd1a20 Mon Sep 17 00:00:00 2001 From: Musab Gultekin Date: Wed, 15 Mar 2023 22:39:06 +0300 Subject: [PATCH 03/93] Fix potential licensing issue (#126) * Update README.md * Update README.md remove facebook --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5d8b3b6db..0b2532a09 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![Actions Status](https://github.com/ggerganov/llama.cpp/workflows/CI/badge.svg)](https://github.com/ggerganov/llama.cpp/actions) [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT) -Inference of [Facebook's LLaMA](https://github.com/facebookresearch/llama) model in pure C/C++ +Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++ **Hot topics:** From 16b2c61a22f828ea77d9f084ca871c63bc5cc283 Mon Sep 17 00:00:00 2001 From: Justin Suess Date: Wed, 15 Mar 2023 15:39:38 -0400 Subject: [PATCH 04/93] fixed color reset on exit (#149) * fixed color reset on exit * added sigint handler for ansi_color_reset * Update main.cpp --------- Co-authored-by: Georgi Gerganov --- main.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/main.cpp b/main.cpp index 6dc9ae980..6227db6c5 100644 --- a/main.cpp +++ b/main.cpp @@ -755,6 +755,7 @@ static bool is_interacting = false; #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) void sigint_handler(int signo) { + printf(ANSI_COLOR_RESET); if (signo == SIGINT) { if (!is_interacting) { is_interacting=true; @@ -1052,5 +1053,9 @@ int main(int argc, char ** argv) { ggml_free(model.ctx); + if (params.use_color) { + printf(ANSI_COLOR_RESET); + } + return 0; } From 2d64715ad475f192a4004a52d134c67ccb6f44ad Mon Sep 17 00:00:00 2001 From: Justin Suess Date: Wed, 15 Mar 2023 15:42:40 -0400 Subject: [PATCH 05/93] added ctx_size parameter (#148) * added ctx_size parameter * added it in more places * Apply suggestions from code review --------- Co-authored-by: Georgi Gerganov --- main.cpp | 5 +++-- utils.cpp | 3 +++ utils.h | 3 ++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/main.cpp b/main.cpp index 6227db6c5..dcc42afaf 100644 --- a/main.cpp +++ b/main.cpp @@ -547,6 +547,8 @@ bool llama_eval( const int d_key = n_embd/n_head; + // TODO: check if this size scales with n_ctx linearly and remove constant. somehow I feel it wasn't the case + // static size_t buf_size = hparams.n_ctx*1024*1024; static size_t buf_size = 512u*1024*1024; static void * buf = malloc(buf_size); @@ -819,8 +821,7 @@ int main(int argc, char ** argv) { // load the model { const int64_t t_start_us = ggml_time_us(); - - if (!llama_model_load(params.model, model, vocab, 512)) { // TODO: set context from user input ?? + if (!llama_model_load(params.model, model, vocab, params.n_ctx)) { fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str()); return 1; } diff --git a/utils.cpp b/utils.cpp index 54217f02f..aa3ad1053 100644 --- a/utils.cpp +++ b/utils.cpp @@ -37,6 +37,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { params.n_predict = std::stoi(argv[++i]); } else if (arg == "--top_k") { params.top_k = std::stoi(argv[++i]); + } else if (arg == "-c" || arg == "--ctx_size") { + params.n_ctx = std::stoi(argv[++i]); } else if (arg == "--top_p") { params.top_p = std::stof(argv[++i]); } else if (arg == "--temp") { @@ -92,6 +94,7 @@ void gpt_print_usage(int argc, char ** argv, const gpt_params & params) { fprintf(stderr, " --top_p N top-p sampling (default: %.1f)\n", params.top_p); fprintf(stderr, " --repeat_last_n N last n tokens to consider for penalize (default: %d)\n", params.repeat_last_n); fprintf(stderr, " --repeat_penalty N penalize repeat sequence of tokens (default: %.1f)\n", params.repeat_penalty); + fprintf(stderr, " -c N, --ctx_size N size of the prompt context (default: %d)\n", params.n_ctx); fprintf(stderr, " --temp N temperature (default: %.1f)\n", params.temp); fprintf(stderr, " -b N, --batch_size N batch size for prompt processing (default: %d)\n", params.n_batch); fprintf(stderr, " -m FNAME, --model FNAME\n"); diff --git a/utils.h b/utils.h index 4f98011cf..021120b05 100644 --- a/utils.h +++ b/utils.h @@ -17,7 +17,8 @@ struct gpt_params { int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()); int32_t n_predict = 128; // new tokens to predict int32_t repeat_last_n = 64; // last n tokens to penalize - + int32_t n_ctx = 512; //context size + // sampling parameters int32_t top_k = 40; float top_p = 0.95f; From 2d15d6c9a959749f954d4fbbf44d711e19c5bdff Mon Sep 17 00:00:00 2001 From: Rickey Bowers Jr Date: Wed, 15 Mar 2023 13:56:24 -0600 Subject: [PATCH 06/93] add SIGINT support for _WIN32 environments (#120) * add SIGINT support for _WIN32 environments * perhaps more consistent --- main.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/main.cpp b/main.cpp index dcc42afaf..a812d0fa0 100644 --- a/main.cpp +++ b/main.cpp @@ -14,6 +14,8 @@ #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) #include #include +#elif defined (_WIN32) +#include #endif #define ANSI_COLOR_RED "\x1b[31m" @@ -755,7 +757,7 @@ bool llama_eval( static bool is_interacting = false; -#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32) void sigint_handler(int signo) { printf(ANSI_COLOR_RESET); if (signo == SIGINT) { @@ -865,6 +867,8 @@ int main(int argc, char ** argv) { sigemptyset (&sigint_action.sa_mask); sigint_action.sa_flags = 0; sigaction(SIGINT, &sigint_action, NULL); +#elif defined (_WIN32) + signal(SIGINT, sigint_handler); #endif fprintf(stderr, "%s: interactive mode on.\n", __func__); @@ -894,7 +898,7 @@ int main(int argc, char ** argv) { if (params.interactive) { fprintf(stderr, "== Running in interactive mode. ==\n" -#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32) " - Press Ctrl+C to interject at any time.\n" #endif " - Press Return to return control to LLaMa.\n" @@ -1039,6 +1043,9 @@ int main(int argc, char ** argv) { } } +#if defined (_WIN32) + signal(SIGINT, SIG_DFL); +#endif // report timing { From 27944c4206a49bbe003021a2610bacaa3044e619 Mon Sep 17 00:00:00 2001 From: moritzbrantner <31051084+moritzbrantner@users.noreply.github.com> Date: Wed, 15 Mar 2023 21:35:25 +0100 Subject: [PATCH 07/93] fixed typo (#178) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0b2532a09..1f7e19412 100644 --- a/README.md +++ b/README.md @@ -199,7 +199,7 @@ https://user-images.githubusercontent.com/271616/225014776-1d567049-ad71-4ef2-b0 - We don't know yet how much the quantization affects the quality of the generated text - Probably the token sampling can be improved - The Accelerate framework is actually currently unused since I found that for tensor shapes typical for the Decoder, - there is no benefit compared to the ARM_NEON intrinsics implementation. Of course, it's possible that I simlpy don't + there is no benefit compared to the ARM_NEON intrinsics implementation. Of course, it's possible that I simply don't know how to utilize it properly. But in any case, you can even disable it with `LLAMA_NO_ACCELERATE=1 make` and the performance will be the same, since no BLAS calls are invoked by the current implementation From 6eac39ba953acaeec396cea2969dbf413907e2ec Mon Sep 17 00:00:00 2001 From: hoangmit Date: Wed, 15 Mar 2023 18:41:38 -0400 Subject: [PATCH 08/93] Add RMS norm and use it (#187) * add ggml_rms_norm * update op num --- ggml.c | 128 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- ggml.h | 5 +++ main.cpp | 6 +-- 3 files changed, 134 insertions(+), 5 deletions(-) diff --git a/ggml.c b/ggml.c index a0c0dd03b..eee54f7ff 100644 --- a/ggml.c +++ b/ggml.c @@ -2069,6 +2069,7 @@ static const char * GGML_OP_LABEL[GGML_OP_COUNT] = { "GELU", "SILU", "NORM", + "RMS_NORM", "MUL_MAT", @@ -2089,7 +2090,7 @@ static const char * GGML_OP_LABEL[GGML_OP_COUNT] = { "FLASH_FF", }; -static_assert(GGML_OP_COUNT == 34, "GGML_OP_COUNT != 34"); +static_assert(GGML_OP_COUNT == 35, "GGML_OP_COUNT != 35"); static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { "none", @@ -2112,6 +2113,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { "gelu(x)", "silu(x)", "norm(x)", + "rms_norm(x)", "X*Y", @@ -2132,7 +2134,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { "flash_ff(x)", }; -static_assert(GGML_OP_COUNT == 34, "GGML_OP_COUNT != 34"); +static_assert(GGML_OP_COUNT == 35, "GGML_OP_COUNT != 35"); // // ggml object @@ -3618,6 +3620,39 @@ struct ggml_tensor * ggml_norm_inplace( return ggml_norm_impl(ctx, a, true); } +struct ggml_tensor * ggml_rms_norm_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + bool inplace) { + bool is_node = false; + + if (!inplace && (a->grad)) { + GGML_ASSERT(false); // TODO: implement backward + is_node = true; + } + + struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + + result->op = GGML_OP_RMS_NORM; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = NULL; // TODO: maybe store epsilon here? + + return result; +} + +struct ggml_tensor * ggml_rms_norm( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_rms_norm_impl(ctx, a, false); +} + +struct ggml_tensor * ggml_rms_norm_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_rms_norm_impl(ctx, a, true); +} + // ggml_mul_mat struct ggml_tensor * ggml_mul_mat( @@ -5406,6 +5441,87 @@ static void ggml_compute_forward_norm( } } +static void ggml_compute_forward_rms_norm_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + GGML_ASSERT(ggml_are_same_shape(src0, dst)); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + GGML_ASSERT(src0->nb[0] == sizeof(float)); + + const int ith = params->ith; + const int nth = params->nth; + + const int ne00 = src0->ne[0]; + const int ne01 = src0->ne[1]; + const int ne02 = src0->ne[2]; + const int ne03 = src0->ne[3]; + + const size_t nb01 = src0->nb[1]; + const size_t nb02 = src0->nb[2]; + const size_t nb03 = src0->nb[3]; + + const size_t nb1 = dst->nb[1]; + const size_t nb2 = dst->nb[2]; + const size_t nb3 = dst->nb[3]; + + const ggml_float eps = 1e-5f; // TODO: make this a parameter + + // TODO: optimize + for (int i03 = 0; i03 < ne03; i03++) { + for (int i02 = 0; i02 < ne02; i02++) { + for (int i01 = ith; i01 < ne01; i01 += nth) { + const float * x = (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03); + + ggml_float mean = 0.0; + for (int i00 = 0; i00 < ne00; i00++) { + mean += x[i00] * x[i00]; + } + + mean /= ne00; + + float * y = (float *) ((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3); + + memcpy(y, x, ne00 * sizeof(float)); + // for (int i00 = 0; i00 < ne00; i00++) { + // y[i00] = x[i00]; + // } + + const float scale = 1.0/sqrt(mean + eps); + + ggml_vec_scale_f32(ne00, y, scale); + } + } + } +} + +static void ggml_compute_forward_rms_norm( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_rms_norm_f32(params, src0, dst); + } break; + case GGML_TYPE_Q4_0: + case GGML_TYPE_Q4_1: + case GGML_TYPE_I8: + case GGML_TYPE_I16: + case GGML_TYPE_I32: + case GGML_TYPE_F16: + case GGML_TYPE_COUNT: + { + GGML_ASSERT(false); + } break; + } +} + + // ggml_compute_forward_mul_mat #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) @@ -8522,6 +8638,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm { ggml_compute_forward_norm(params, tensor->src0, tensor); } break; + case GGML_OP_RMS_NORM: + { + ggml_compute_forward_rms_norm(params, tensor->src0, tensor); + } break; case GGML_OP_MUL_MAT: { ggml_compute_forward_mul_mat(params, tensor->src0, tensor->src1, tensor); @@ -8764,6 +8884,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor { GGML_ASSERT(false); // TODO: not implemented } break; + case GGML_OP_RMS_NORM: + { + GGML_ASSERT(false); // TODO: not implemented + } break; case GGML_OP_MUL_MAT: { if (src0->grad) { diff --git a/ggml.h b/ggml.h index 7ce655c1b..bac4fe65c 100644 --- a/ggml.h +++ b/ggml.h @@ -230,6 +230,7 @@ enum ggml_op { GGML_OP_GELU, GGML_OP_SILU, GGML_OP_NORM, // normalize + GGML_OP_RMS_NORM, GGML_OP_MUL_MAT, @@ -482,6 +483,10 @@ struct ggml_tensor * ggml_norm( struct ggml_context * ctx, struct ggml_tensor * a); +struct ggml_tensor * ggml_rms_norm( + struct ggml_context * ctx, + struct ggml_tensor * a); + // A: m rows, n columns // B: p rows, n columns (i.e. we transpose it internally) // result is m columns, p rows diff --git a/main.cpp b/main.cpp index a812d0fa0..ca0fca8b3 100644 --- a/main.cpp +++ b/main.cpp @@ -588,7 +588,7 @@ bool llama_eval( // norm { - cur = ggml_norm(ctx0, inpL); + cur = ggml_rms_norm(ctx0, inpL); // cur = attention_norm*cur cur = ggml_mul(ctx0, @@ -678,7 +678,7 @@ bool llama_eval( { // norm { - cur = ggml_norm(ctx0, inpFF); + cur = ggml_rms_norm(ctx0, inpFF); // cur = ffn_norm*cur cur = ggml_mul(ctx0, @@ -713,7 +713,7 @@ bool llama_eval( // norm { - inpL = ggml_norm(ctx0, inpL); + inpL = ggml_rms_norm(ctx0, inpL); // inpL = norm*inpL inpL = ggml_mul(ctx0, From 9b4a15b17d8395eb075379b140fcd0b0283f4ef6 Mon Sep 17 00:00:00 2001 From: Nebula Date: Wed, 15 Mar 2023 19:29:25 -0400 Subject: [PATCH 09/93] Fix RMS norm in GGML (#191) --- ggml.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ggml.c b/ggml.c index eee54f7ff..535c7b7d2 100644 --- a/ggml.c +++ b/ggml.c @@ -9314,6 +9314,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) node->n_tasks = n_threads; } break; case GGML_OP_NORM: + case GGML_OP_RMS_NORM: { node->n_tasks = n_threads; } break; From 273abc47ff9dd899b3c4f58acd19d4649e90d6b4 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 16 Mar 2023 07:12:12 +0200 Subject: [PATCH 10/93] Update hot topics - RMSnorm --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 1f7e19412..d43fc3774 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++ **Hot topics:** +- RMSNorm implementation / fixes: https://github.com/ggerganov/llama.cpp/issues/173 - Cache input prompts for faster initialization: https://github.com/ggerganov/llama.cpp/issues/64 - Create a `llama.cpp` logo: https://github.com/ggerganov/llama.cpp/issues/105 From ac15de789547e5a6e93df552e787379b3a23ef26 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 16 Mar 2023 08:55:13 +0200 Subject: [PATCH 11/93] Expand "Contributing" section --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d43fc3774..c6fb427e2 100644 --- a/README.md +++ b/README.md @@ -207,8 +207,9 @@ https://user-images.githubusercontent.com/271616/225014776-1d567049-ad71-4ef2-b0 ### Contributing - Contributors can open PRs -- Collaborators can push to branches in the `llama.cpp` repo +- Collaborators can push to branches in the `llama.cpp` repo and merge PRs into the `master` branch - Collaborators will be invited based on contributions +- Any help with managing issues and PRs is very appreciated! ### Coding guidelines From 721311070e31464ac12bef9a4444093eb3eaebf7 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 16 Mar 2023 15:00:09 +0200 Subject: [PATCH 12/93] Update README.md --- README.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/README.md b/README.md index c6fb427e2..15e1b9a2d 100644 --- a/README.md +++ b/README.md @@ -219,7 +219,3 @@ https://user-images.githubusercontent.com/271616/225014776-1d567049-ad71-4ef2-b0 - There are no strict rules for the code style, but try to follow the patterns in the code (indentation, spaces, etc.). Vertical alignment makes things more readable and easier to batch edit - Clean-up any trailing whitespaces, use 4 spaces indentation, brackets on same line, `void * ptr`, `int & a` - See [good first issues](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) for tasks suitable for first contributions - -### Misc - -- Practice your C++ typing skills: https://typing-battles.ggerganov.com From 904d2a8d6acd667c9633138d45a361d40fbf76d0 Mon Sep 17 00:00:00 2001 From: Matvey Soloviev Date: Fri, 17 Mar 2023 05:48:39 +0100 Subject: [PATCH 13/93] Q4_1 quantization (#193) * Add AVX2 version of ggml_vec_dot_q4_1 * Small optimisations to q4_1 dot product (@Const-me) * Rearrange Q4_1 quantization to work for multipart models. (Fix #152) * Fix ggml_vec_mad_q4_1 too * Fix non-vectorised q4_1 vec mul --- ggml.c | 149 ++++++++++++++++++++++++++++++++++++++++++------------ utils.cpp | 20 +++++--- 2 files changed, 130 insertions(+), 39 deletions(-) diff --git a/ggml.c b/ggml.c index 535c7b7d2..c4f838917 100644 --- a/ggml.c +++ b/ggml.c @@ -607,10 +607,11 @@ void quantize_row_q4_1(const float * restrict x, void * restrict y, int k) { assert(k % QK == 0); const int nb = k / QK; + const size_t bs = 2*sizeof(float) + QK/2; - float * restrict pm = (float *) (y); - float * restrict pd = (float *) (pm + nb); - uint8_t * restrict pb = (uint8_t *) (pd + nb); + uint8_t * restrict pd = ((uint8_t *)y + 0*bs); + uint8_t * restrict pm = ((uint8_t *)y + 0*bs + sizeof(float)); + uint8_t * restrict pb = ((uint8_t *)y + 0*bs + 2*sizeof(float)); uint8_t pp[QK/2]; @@ -627,8 +628,10 @@ void quantize_row_q4_1(const float * restrict x, void * restrict y, int k) { const float d = (max - min) / ((1 << 4) - 1); const float id = d ? 1.0f/d : 0.0f; - pm[i] = min; - pd[i] = d; + *(float *)pm = min; + *(float *)pd = d; + pm += bs; + pd += bs; for (int l = 0; l < QK; l += 2) { const float v0 = (x[i*QK + l + 0] - min)*id; @@ -643,7 +646,8 @@ void quantize_row_q4_1(const float * restrict x, void * restrict y, int k) { pp[l/2] = vi0 | (vi1 << 4); } - memcpy(pb + i*QK/2, pp, sizeof(pp)); + memcpy(pb, pp, sizeof(pp)); + pb += bs; } } @@ -687,16 +691,17 @@ void dequantize_row_q4_1(const void * restrict x, float * restrict y, int k) { assert(k % QK == 0); const int nb = k / QK; + const size_t bs = 2*sizeof(float) + QK/2; - const float * restrict pm = (const float *) (x); - const float * restrict pd = (const float *) (pm + nb); - const uint8_t * restrict pb = (const uint8_t *) (pd + nb); + const uint8_t * restrict pd = ((const uint8_t *)x + 0*bs); + const uint8_t * restrict pm = ((const uint8_t *)x + 0*bs + sizeof(float)); + const uint8_t * restrict pb = ((const uint8_t *)x + 0*bs + 2*sizeof(float)); for (int i = 0; i < nb; i++) { - const float m = pm[i]; - const float d = pd[i]; + const float d = *(const float *) (pd + i*bs); + const float m = *(const float *) (pm + i*bs); - const uint8_t * restrict pp = pb + i*QK/2; + const uint8_t * restrict pp = pb + i*bs; for (int l = 0; l < QK; l += 2) { const uint8_t vi = pp[l/2]; @@ -1584,28 +1589,109 @@ inline static void ggml_vec_dot_q4_0(const int n, float * restrict s, const void inline static void ggml_vec_dot_q4_1(const int n, float * restrict s, const void * restrict x, const void * restrict y) { const int nb = n / QK; - const float * restrict pm0 = (const float *) x; - const float * restrict pm1 = (const float *) y; + const size_t bs = 2*sizeof(float) + QK/2; - const float * restrict pd0 = (const float *) (pm0 + nb); - const float * restrict pd1 = (const float *) (pm1 + nb); + const uint8_t * restrict pd0 = ((const uint8_t *)x + 0*bs); + const uint8_t * restrict pd1 = ((const uint8_t *)y + 0*bs); - const uint8_t * restrict pb0 = (const uint8_t *) (pd0 + nb); - const uint8_t * restrict pb1 = (const uint8_t *) (pd1 + nb); + const uint8_t * restrict pm0 = ((const uint8_t *)x + 0*bs + sizeof(float)); + const uint8_t * restrict pm1 = ((const uint8_t *)y + 0*bs + sizeof(float)); + + const uint8_t * restrict pb0 = ((const uint8_t *)x + 0*bs + 2*sizeof(float)); + const uint8_t * restrict pb1 = ((const uint8_t *)y + 0*bs + 2*sizeof(float)); float sumf = 0.0; -#if 1 +#if defined(__AVX2__) +#if QK == 32 + // Initialize accumulator with zeros + __m256 acc = _mm256_setzero_ps(); + // Accumulator for constant offsets + float acc_offset = 0.0f; + + // Main loop + for (int i = 0; i < nb; ++i) { + const float * m0 = (const float *) (pm0 + i*bs); + const float * m1 = (const float *) (pm1 + i*bs); + + const float * d0 = (const float *) (pd0 + i*bs); + const float * d1 = (const float *) (pd1 + i*bs); + + const uint8_t * restrict p0 = pb0 + i*bs; + const uint8_t * restrict p1 = pb1 + i*bs; + + const __m256 d0v = _mm256_broadcast_ss( d0 ); + const __m256 d1v = _mm256_broadcast_ss( d1 ); + const __m256 m0v = _mm256_broadcast_ss( m0 ); + const __m256 m1v = _mm256_broadcast_ss( m1 ); + + + // Compute combined scale for the block + const __m256 scale_01 = _mm256_mul_ps( d0v, d1v ); + + // Compute cross scales for the block + const __m256 scale_0 = _mm256_mul_ps( d0v, m1v ); + const __m256 scale_1 = _mm256_mul_ps( m0v, d1v ); + const __m256 cross_scales = _mm256_blend_ps( scale_0, scale_1, 0b10101010 ); + + // Load 16 bytes, and unpack 4 bit fields into bytes, making 32 bytes + __m256i bx = bytesFromNibbles( p0 ); + __m256i by = bytesFromNibbles( p1 ); + + // Now we have a vector with bytes in [ 0 .. 15 ] interval. + + // Sign-extend first 16 signed bytes into int16_t + __m256i x16 = _mm256_cvtepi8_epi16( _mm256_castsi256_si128( bx ) ); + __m256i y16 = _mm256_cvtepi8_epi16( _mm256_castsi256_si128( by ) ); + // Compute products of int16_t integers, add pairwise + __m256i i32 = _mm256_madd_epi16( x16, y16 ); + + // Sign-extend last 16 signed bytes into int16_t vectors + __m256i x16_h = _mm256_cvtepi8_epi16( _mm256_extracti128_si256( bx, 1 ) ); + __m256i y16_h = _mm256_cvtepi8_epi16( _mm256_extracti128_si256( by, 1 ) ); + // Accumulate products of int16_t integers + i32 = _mm256_add_epi32( i32, _mm256_madd_epi16( x16_h, y16_h ) ); + + // compute sums of unsigned bytes in bx, by in blocks of 8. + // This results in a layout like X100 0000 X200 0000 X300 0000 X400 0000, + // which we then interleave as X100 Y100 X200 Y200 X300 Y300 X400 Y400. + // so if we then cast to 8 singles, we get 8 floats like [ x0_7, y0_7, x8_15, y8_15, x16_23, y16_23, x24_31, y24_31 ] + __m256i xsumi = _mm256_sad_epu8( bx, _mm256_setzero_si256() ); + __m256i ysumi = _mm256_sad_epu8( by, _mm256_setzero_si256() ); + __m256i sumsi = _mm256_or_si256( xsumi, _mm256_slli_si256( ysumi, 4 ) ); + __m256 sums = _mm256_cvtepi32_ps( sumsi ); + + // Convert int32_t to float + __m256 p = _mm256_cvtepi32_ps( i32 ); + // Apply the scale, and accumulate + // acc += d0*d1*x*y + d0*m1*x + d1*m0*y + acc = _mm256_fmadd_ps( scale_01, p, acc ); + acc = _mm256_fmadd_ps( cross_scales, sums, acc ); + // acc_offset += m0*m1 (for each entry in the block) + acc_offset += (*m0)*(*m1); + } + + // Return horizontal sum of the acc vector + __m128 res = _mm256_extractf128_ps( acc, 1 ); + res = _mm_add_ps( res, _mm256_castps256_ps128( acc ) ); + res = _mm_add_ps( res, _mm_movehl_ps( res, res ) ); + res = _mm_add_ss( res, _mm_movehdup_ps( res ) ); + + sumf = _mm_cvtss_f32( res ) + acc_offset * QK; +#else +#error "not implemented for QK" +#endif +#else // scalar for (int i = 0; i < nb; i++) { - const float m0 = pm0[i]; - const float m1 = pm1[i]; + const float m0 = *(const float *) (pm0 + i*bs); + const float m1 = *(const float *) (pm1 + i*bs); - const float d0 = pd0[i]; - const float d1 = pd1[i]; + const float d0 = *(const float *) (pd0 + i*bs); + const float d1 = *(const float *) (pd1 + i*bs); - const uint8_t * restrict p0 = pb0 + i*QK/2; - const uint8_t * restrict p1 = pb1 + i*QK/2; + const uint8_t * restrict p0 = pb0 + i*bs; + const uint8_t * restrict p1 = pb1 + i*bs; for (int j = 0; j < QK/2; j++) { const uint8_t v0 = p0[j]; @@ -1839,16 +1925,17 @@ inline static void ggml_vec_mad_q4_1(const int n, float * restrict y, void * res assert(n % QK == 0); const int nb = n / QK; + const size_t bs = 2*sizeof(float) + QK/2; - const float * restrict pm = (const float *) (x); - const float * restrict pd = (const float *) (pm + nb); - const uint8_t * restrict pb = (const uint8_t *) (pd + nb); + const uint8_t * restrict pd = ((const uint8_t *)x + 0*bs); + const uint8_t * restrict pm = ((const uint8_t *)x + 0*bs + sizeof(float)); + const uint8_t * restrict pb = ((const uint8_t *)x + 0*bs + 2*sizeof(float)); for (int i = 0; i < nb; i++) { - const float m = pm[i]; - const float d = pd[i]; + const float d = *(const float *) (pd + i*bs); + const float m = *(const float *) (pm + i*bs); - const uint8_t * restrict pp = pb + i*QK/2; + const uint8_t * restrict pp = pb + i*bs; for (int l = 0; l < QK; l += 2) { const uint8_t vi = pp[l/2]; diff --git a/utils.cpp b/utils.cpp index aa3ad1053..26e313d5f 100644 --- a/utils.cpp +++ b/utils.cpp @@ -489,7 +489,8 @@ size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t * hist) { const int nb = k / qk; - const size_t row_size = nb*(2*sizeof(float) + sizeof(uint8_t)*qk/2); + const size_t bs = (2*sizeof(float) + sizeof(uint8_t)*qk/2); + const size_t row_size = nb*bs; assert(k % qk == 0); @@ -498,10 +499,10 @@ size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t char * pdst = (char *) dst; - for (int j = 0; j < n; j += k) { - float * pm = (float *) (pdst + (j/k)*row_size); - float * pd = (float *) (pm + nb); - uint8_t * pb = (uint8_t *) (pd + nb); + for (int j = 0; j < n; j += k) { + uint8_t * pd = (uint8_t *) (pdst + (j/k)*row_size + 0*bs); + uint8_t * pm = (uint8_t *) (pdst + (j/k)*row_size + 0*bs + sizeof(float)); + uint8_t * pb = (uint8_t *) (pdst + (j/k)*row_size + 0*bs + 2*sizeof(float)); //printf("n = %d, k = %d, nb = %d, row_size = %d, j = %d, pm = %p, pd = %p, pb = %p\n", n, k, nb, row_size, j, pm, pd, pb); @@ -519,8 +520,10 @@ size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t const float d = (max - min) / ((1 << 4) - 1); const float id = d ? 1.0f/d : 0.0f; - pm[i] = min; - pd[i] = d; + *(float *) pd = d; + *(float *) pm = min; + pd += bs; + pm += bs; for (int l = 0; l < qk; l += 2) { const float v0 = (src[j + i*qk + l + 0] - min)*id; @@ -538,7 +541,8 @@ size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t pp[l/2] = vi0 | (vi1 << 4); } - memcpy(pb + i*qk/2, pp, pp_size); + memcpy(pb, pp, pp_size); + pb += bs; } } } From 2af23d30434a677c6416812eea52ccc0af65119c Mon Sep 17 00:00:00 2001 From: Bernat Vadell Date: Fri, 17 Mar 2023 10:47:06 +0100 Subject: [PATCH 14/93] =?UTF-8?q?=F0=9F=9A=80=20Dockerize=20llamacpp=20(#1?= =?UTF-8?q?32)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: dockerize llamacpp * feat: split build & runtime stages * split dockerfile into main & tools * add quantize into tool docker image * Update .devops/tools.sh Co-authored-by: Georgi Gerganov * add docker action pipeline * change CI to publish at github docker registry * fix name runs-on macOS-latest is macos-latest (lowercase) * include docker versioned images * fix github action docker * fix docker.yml * feat: include all-in-one command tool & update readme.md --------- Co-authored-by: Georgi Gerganov --- .devops/full.Dockerfile | 17 ++++++++++ .devops/main.Dockerfile | 18 ++++++++++ .devops/tools.sh | 46 +++++++++++++++++++++++++ .dockerignore | 24 +++++++++++++ .github/workflows/build.yml | 2 +- .github/workflows/docker.yml | 61 +++++++++++++++++++++++++++++++++ README.md | 32 +++++++++++++++++ convert-pth-to-ggml.py | 6 +++- download-pth.py | 66 ++++++++++++++++++++++++++++++++++++ 9 files changed, 270 insertions(+), 2 deletions(-) create mode 100644 .devops/full.Dockerfile create mode 100644 .devops/main.Dockerfile create mode 100755 .devops/tools.sh create mode 100644 .dockerignore create mode 100644 .github/workflows/docker.yml create mode 100644 download-pth.py diff --git a/.devops/full.Dockerfile b/.devops/full.Dockerfile new file mode 100644 index 000000000..618cdddc4 --- /dev/null +++ b/.devops/full.Dockerfile @@ -0,0 +1,17 @@ +ARG UBUNTU_VERSION=22.04 + +FROM ubuntu:$UBUNTU_VERSION as build + +RUN apt-get update && \ + apt-get install -y build-essential python3 python3-pip + +RUN pip install --upgrade pip setuptools wheel \ + && pip install torch torchvision torchaudio sentencepiece numpy + +WORKDIR /app + +COPY . . + +RUN make + +ENTRYPOINT ["/app/.devops/tools.sh"] \ No newline at end of file diff --git a/.devops/main.Dockerfile b/.devops/main.Dockerfile new file mode 100644 index 000000000..cd575efa0 --- /dev/null +++ b/.devops/main.Dockerfile @@ -0,0 +1,18 @@ +ARG UBUNTU_VERSION=22.04 + +FROM ubuntu:$UBUNTU_VERSION as build + +RUN apt-get update && \ + apt-get install -y build-essential + +WORKDIR /app + +COPY . . + +RUN make + +FROM ubuntu:$UBUNTU_VERSION as runtime + +COPY --from=build /app/main /main + +ENTRYPOINT [ "/main" ] \ No newline at end of file diff --git a/.devops/tools.sh b/.devops/tools.sh new file mode 100755 index 000000000..b5711c94e --- /dev/null +++ b/.devops/tools.sh @@ -0,0 +1,46 @@ +#!/bin/bash +set -e + +# Read the first argument into a variable +arg1="$1" + +# Shift the arguments to remove the first one +shift + +# Join the remaining arguments into a single string +arg2="$@" + +if [[ $arg1 == '--convert' || $arg1 == '-c' ]]; then + python3 ./convert-pth-to-ggml.py $arg2 +elif [[ $arg1 == '--quantize' || $arg1 == '-q' ]]; then + ./quantize $arg2 +elif [[ $arg1 == '--run' || $arg1 == '-r' ]]; then + ./main $arg2 +elif [[ $arg1 == '--download' || $arg1 == '-d' ]]; then + python3 ./download-pth.py $arg2 +elif [[ $arg1 == '--all-in-one' || $arg1 == '-a' ]]; then + echo "Downloading model..." + python3 ./download-pth.py "$1" "$2" + echo "Converting PTH to GGML..." + for i in `ls $1/$2/ggml-model-f16.bin*`; do + if [ -f "${i/f16/q4_0}" ]; then + echo "Skip model quantization, it already exists: ${i/f16/q4_0}" + else + echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..." + ./quantize "$i" "${i/f16/q4_0}" 2 + fi + done +else + echo "Unknown command: $arg1" + echo "Available commands: " + echo " --run (-r): Run a model previously converted into ggml" + echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -t 8 -n 512" + echo " --convert (-c): Convert a llama model into ggml" + echo " ex: \"/models/7B/\" 1" + echo " --quantize (-q): Optimize with quantization process ggml" + echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2" + echo " --download (-d): Download original llama model from CDN: https://agi.gpt4.org/llama/" + echo " ex: \"/models/\" 7B" + echo " --all-in-one (-a): Execute --download, --convert & --quantize" + echo " ex: \"/models/\" 7B" +fi diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..952990f26 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,24 @@ +*.o +*.a +.cache/ +.vs/ +.vscode/ +.DS_Store + +build/ +build-em/ +build-debug/ +build-release/ +build-static/ +build-no-accel/ +build-sanitize-addr/ +build-sanitize-thread/ + +models/* + +/main +/quantize + +arm_neon.h +compile_commands.json +Dockerfile \ No newline at end of file diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1a068ae75..94f199cb8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -19,7 +19,7 @@ jobs: make macOS-latest: - runs-on: macOS-latest + runs-on: macos-latest steps: - name: Clone diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml new file mode 100644 index 000000000..bc9aff7b7 --- /dev/null +++ b/.github/workflows/docker.yml @@ -0,0 +1,61 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +# GitHub recommends pinning actions to a commit SHA. +# To get a newer version, you will need to update the SHA. +# You can also reference a tag or branch, but the action may change without warning. + +name: Publish Docker image + +on: + pull_request: + push: + branches: + - master + +jobs: + push_to_registry: + name: Push Docker image to Docker Hub + runs-on: ubuntu-latest + env: + COMMIT_SHA: ${{ github.sha }} + strategy: + matrix: + config: + - { tag: "light", dockerfile: ".devops/main.Dockerfile" } + - { tag: "full", dockerfile: ".devops/full.Dockerfile" } + steps: + - name: Check out the repo + uses: actions/checkout@v3 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Log in to Docker Hub + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push Docker image (versioned) + if: github.event_name == 'push' + uses: docker/build-push-action@v4 + with: + context: . + push: true + tags: "ghcr.io/ggerganov/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}" + file: ${{ matrix.config.dockerfile }} + + - name: Build and push Docker image (tagged) + uses: docker/build-push-action@v4 + with: + context: . + push: ${{ github.event_name == 'push' }} + tags: "ghcr.io/ggerganov/llama.cpp:${{ matrix.config.tag }}" + file: ${{ matrix.config.dockerfile }} \ No newline at end of file diff --git a/README.md b/README.md index 15e1b9a2d..8cf59f418 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ Supported platforms: - [X] Mac OS - [X] Linux - [X] Windows (via CMake) +- [X] Docker --- @@ -194,6 +195,37 @@ Finally, copy the `llama` binary and the model files to your device storage. Her https://user-images.githubusercontent.com/271616/225014776-1d567049-ad71-4ef2-b050-55b0b3b9274c.mp4 +### Docker + +#### Prerequisites +* Docker must be installed and running on your system. +* Create a folder to store big models & intermediate files (in ex. im using /llama/models) + +#### Images +We have two Docker images available for this project: + +1. `ghcr.io/ggerganov/llama.cpp:full`: This image includes both the main executable file and the tools to convert LLaMA models into ggml and convert into 4-bit quantization. +2. `ghcr.io/ggerganov/llama.cpp:light`: This image only includes the main executable file. + +#### Usage + +The easiest way to download the models, convert them to ggml and optimize them is with the --all-in-one command which includes the full docker image. + + ```bash +docker run -v /llama/models:/models ghcr.io/ggerganov/llama.cpp:full --all-in-one "/models/" 7B +``` + +On complete, you are ready to play! + +```bash +docker run -v /llama/models:/models ghcr.io/ggerganov/llama.cpp:full --run -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -t 8 -n 512 +``` + +or with light image: + +```bash +docker run -v /llama/models:/models ghcr.io/ggerganov/llama.cpp:light -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -t 8 -n 512 +``` ## Limitations diff --git a/convert-pth-to-ggml.py b/convert-pth-to-ggml.py index 5c36e9c09..d0eb213c8 100644 --- a/convert-pth-to-ggml.py +++ b/convert-pth-to-ggml.py @@ -16,7 +16,7 @@ # At the start of the ggml file we write the model parameters # and vocabulary. # - +import os import sys import json import struct @@ -64,6 +64,10 @@ if len(sys.argv) > 2: sys.exit(1) fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".bin" +if os.path.exists(fname_out): + print(f"Skip conversion, it already exists: {fname_out}") + sys.exit(0) + with open(fname_hparams, "r") as f: hparams = json.load(f) diff --git a/download-pth.py b/download-pth.py new file mode 100644 index 000000000..129532c0c --- /dev/null +++ b/download-pth.py @@ -0,0 +1,66 @@ +import os +import sys +from tqdm import tqdm +import requests + +if len(sys.argv) < 3: + print("Usage: download-pth.py dir-model model-type\n") + print(" model-type: Available models 7B, 13B, 30B or 65B") + sys.exit(1) + +modelsDir = sys.argv[1] +model = sys.argv[2] + +num = { + "7B": 1, + "13B": 2, + "30B": 4, + "65B": 8, +} + +if model not in num: + print(f"Error: model {model} is not valid, provide 7B, 13B, 30B or 65B") + sys.exit(1) + +print(f"Downloading model {model}") + +files = ["checklist.chk", "params.json"] + +for i in range(num[model]): + files.append(f"consolidated.0{i}.pth") + +resolved_path = os.path.abspath(os.path.join(modelsDir, model)) +os.makedirs(resolved_path, exist_ok=True) + +for file in files: + dest_path = os.path.join(resolved_path, file) + + if os.path.exists(dest_path): + print(f"Skip file download, it already exists: {file}") + continue + + url = f"https://agi.gpt4.org/llama/LLaMA/{model}/{file}" + response = requests.get(url, stream=True) + with open(dest_path, 'wb') as f: + with tqdm(unit='B', unit_scale=True, miniters=1, desc=file) as t: + for chunk in response.iter_content(chunk_size=1024): + if chunk: + f.write(chunk) + t.update(len(chunk)) + +files2 = ["tokenizer_checklist.chk", "tokenizer.model"] +for file in files2: + dest_path = os.path.join(modelsDir, file) + + if os.path.exists(dest_path): + print(f"Skip file download, it already exists: {file}") + continue + + url = f"https://agi.gpt4.org/llama/LLaMA/{file}" + response = requests.get(url, stream=True) + with open(dest_path, 'wb') as f: + with tqdm(unit='B', unit_scale=True, miniters=1, desc=file) as t: + for chunk in response.iter_content(chunk_size=1024): + if chunk: + f.write(chunk) + t.update(len(chunk)) \ No newline at end of file From 6b0df5ccf360fe5c015f6607f0375bfc6849005e Mon Sep 17 00:00:00 2001 From: mmyjona Date: Sat, 18 Mar 2023 00:38:24 +0800 Subject: [PATCH 15/93] add ptread link to fix cmake build under linux (#114) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add ptread link to fix cmake build under linux * add cmake to linux and macos platform * separate make and cmake workflow --------- Co-authored-by: Sebastián A --- .github/workflows/build.yml | 43 ++++++++++++++++++++++++++++++++++--- CMakeLists.txt | 3 +++ 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 94f199cb8..a94a38991 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -2,7 +2,7 @@ name: CI on: [push, pull_request] jobs: - ubuntu-latest: + ubuntu-latest-make: runs-on: ubuntu-latest steps: @@ -18,7 +18,26 @@ jobs: run: | make - macOS-latest: + ubuntu-latest-cmake: + runs-on: ubuntu-latest + + steps: + - name: Clone + uses: actions/checkout@v1 + + - name: Dependencies + run: | + sudo apt-get update + sudo apt-get install build-essential + + - name: Build + run: | + mkdir build + cd build + cmake .. + cmake --build . --config Release + + macOS-latest-make: runs-on: macos-latest steps: @@ -33,7 +52,25 @@ jobs: run: | make - windows-latest: + macOS-latest-cmake: + runs-on: macOS-latest + + steps: + - name: Clone + uses: actions/checkout@v1 + + - name: Dependencies + run: | + brew update + + - name: Build + run: | + mkdir build + cd build + cmake .. + cmake --build . --config Release + + windows-latest-cmake: runs-on: windows-latest steps: diff --git a/CMakeLists.txt b/CMakeLists.txt index ca3be38a5..38e7266dc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,6 +4,8 @@ project("llama.cpp") set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED true) set(CMAKE_C_STANDARD 11) +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package(Threads REQUIRED) if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) @@ -126,3 +128,4 @@ target_link_libraries(ggml PRIVATE ${LLAMA_EXTRA_LIBS}) target_include_directories(ggml PUBLIC .) target_link_libraries(quantize PRIVATE ggml) target_link_libraries(llama PRIVATE ggml) +target_link_libraries(ggml PRIVATE Threads::Threads) From 367946c668757532deed929e1d78673c6ac6bcb8 Mon Sep 17 00:00:00 2001 From: Stephan Walter Date: Fri, 17 Mar 2023 17:47:35 +0000 Subject: [PATCH 16/93] Don't tell users to use a bad number of threads (#243) The readme tells people to use the command line option "-t 8", causing 8 threads to be started. On systems with fewer than 8 cores, this causes a significant slowdown. Remove the option from the example command lines and use /proc/cpuinfo on Linux to determine a sensible default. --- .devops/tools.sh | 2 +- README.md | 10 +++++----- ggml.c | 4 ---- utils.cpp | 12 ++++++++++++ utils.h | 2 +- 5 files changed, 19 insertions(+), 11 deletions(-) diff --git a/.devops/tools.sh b/.devops/tools.sh index b5711c94e..352e04942 100755 --- a/.devops/tools.sh +++ b/.devops/tools.sh @@ -34,7 +34,7 @@ else echo "Unknown command: $arg1" echo "Available commands: " echo " --run (-r): Run a model previously converted into ggml" - echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -t 8 -n 512" + echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512" echo " --convert (-c): Convert a llama model into ggml" echo " ex: \"/models/7B/\" 1" echo " --quantize (-q): Optimize with quantization process ggml" diff --git a/README.md b/README.md index 8cf59f418..7338ea790 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ Supported platforms: Here is a typical run using LLaMA-7B: ```java -make -j && ./main -m ./models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -t 8 -n 512 +make -j && ./main -m ./models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -n 512 I llama.cpp build info: I UNAME_S: Darwin I UNAME_P: arm @@ -150,7 +150,7 @@ python3 convert-pth-to-ggml.py models/7B/ 1 ./quantize.sh 7B # run the inference -./main -m ./models/7B/ggml-model-q4_0.bin -t 8 -n 128 +./main -m ./models/7B/ggml-model-q4_0.bin -n 128 ``` When running the larger models, make sure you have enough disk space to store all the intermediate files. @@ -164,7 +164,7 @@ In this mode, you can always interrupt generation by pressing Ctrl+C and enter o Here is an example few-shot interaction, invoked with the command ``` -./main -m ./models/13B/ggml-model-q4_0.bin -t 8 -n 256 --repeat_penalty 1.0 --color -i -r "User:" \ +./main -m ./models/13B/ggml-model-q4_0.bin -n 256 --repeat_penalty 1.0 --color -i -r "User:" \ -p \ "Transcript of a dialog, where the User interacts with an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision. @@ -218,13 +218,13 @@ docker run -v /llama/models:/models ghcr.io/ggerganov/llama.cpp:full --all-in-on On complete, you are ready to play! ```bash -docker run -v /llama/models:/models ghcr.io/ggerganov/llama.cpp:full --run -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -t 8 -n 512 +docker run -v /llama/models:/models ghcr.io/ggerganov/llama.cpp:full --run -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -n 512 ``` or with light image: ```bash -docker run -v /llama/models:/models ghcr.io/ggerganov/llama.cpp:light -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -t 8 -n 512 +docker run -v /llama/models:/models ghcr.io/ggerganov/llama.cpp:light -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -n 512 ``` ## Limitations diff --git a/ggml.c b/ggml.c index c4f838917..4fb83adbd 100644 --- a/ggml.c +++ b/ggml.c @@ -9318,10 +9318,6 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { } void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) { - if (cgraph->n_threads <= 0) { - cgraph->n_threads = 8; - } - const int n_threads = cgraph->n_threads; struct ggml_compute_state_shared state_shared = { diff --git a/utils.cpp b/utils.cpp index 26e313d5f..9e50487ef 100644 --- a/utils.cpp +++ b/utils.cpp @@ -16,6 +16,18 @@ #endif bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { + // determine sensible default number of threads. + // std::thread::hardware_concurrency may not be equal to the number of cores, or may return 0. +#ifdef __linux__ + std::ifstream cpuinfo("/proc/cpuinfo"); + params.n_threads = std::count(std::istream_iterator(cpuinfo), + std::istream_iterator(), + std::string("processor")); +#endif + if (params.n_threads == 0) { + params.n_threads = std::max(1, (int32_t) std::thread::hardware_concurrency()); + } + for (int i = 1; i < argc; i++) { std::string arg = argv[i]; diff --git a/utils.h b/utils.h index 021120b05..5e5b40ffa 100644 --- a/utils.h +++ b/utils.h @@ -14,7 +14,7 @@ struct gpt_params { int32_t seed = -1; // RNG seed - int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()); + int32_t n_threads; int32_t n_predict = 128; // new tokens to predict int32_t repeat_last_n = 64; // last n tokens to penalize int32_t n_ctx = 512; //context size From e81b9c81c101f64531ef0fa1ee6b77d562635652 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 17 Mar 2023 20:30:04 +0200 Subject: [PATCH 17/93] Update Contributing section --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 7338ea790..808d54e89 100644 --- a/README.md +++ b/README.md @@ -242,6 +242,7 @@ docker run -v /llama/models:/models ghcr.io/ggerganov/llama.cpp:light -m /models - Collaborators can push to branches in the `llama.cpp` repo and merge PRs into the `master` branch - Collaborators will be invited based on contributions - Any help with managing issues and PRs is very appreciated! +- Make sure to read this: [Inference at the edge](https://github.com/ggerganov/llama.cpp/discussions/205) ### Coding guidelines From 4f546091102a418ffdc6230f872ac56e5cedb835 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 17 Mar 2023 21:46:46 +0200 Subject: [PATCH 18/93] Default to 4 threads (#243) --- utils.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils.h b/utils.h index 5e5b40ffa..c1a8498a7 100644 --- a/utils.h +++ b/utils.h @@ -14,11 +14,11 @@ struct gpt_params { int32_t seed = -1; // RNG seed - int32_t n_threads; + int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()); int32_t n_predict = 128; // new tokens to predict int32_t repeat_last_n = 64; // last n tokens to penalize int32_t n_ctx = 512; //context size - + // sampling parameters int32_t top_k = 40; float top_p = 0.95f; From c9f670a17755311aa28c411f5c7f3c8c05434770 Mon Sep 17 00:00:00 2001 From: thement <40525767+thement@users.noreply.github.com> Date: Fri, 17 Mar 2023 21:05:58 +0100 Subject: [PATCH 19/93] Implement non-greedy tokenizer that tries to maximize token lengths (#242) * Implement non-greedy tokenizer that tries to maximize token lengths * Insert single space in front of the prompt - this is to match original llama tokenizer behavior --------- Co-authored-by: Jakub Horak --- main.cpp | 2 ++ utils.cpp | 70 ++++++++++++++++++++++++++++++++++--------------------- 2 files changed, 45 insertions(+), 27 deletions(-) diff --git a/main.cpp b/main.cpp index ca0fca8b3..39c5d7b76 100644 --- a/main.cpp +++ b/main.cpp @@ -845,6 +845,8 @@ int main(int argc, char ** argv) { std::vector logits; + // Add a space in front of the first character to match OG llama tokenizer behavior + params.prompt.insert(0, 1, ' '); // tokenize the prompt std::vector embd_inp = ::llama_tokenize(vocab, params.prompt, true); diff --git a/utils.cpp b/utils.cpp index 9e50487ef..22ef59377 100644 --- a/utils.cpp +++ b/utils.cpp @@ -287,40 +287,56 @@ std::vector gpt_tokenize(const gpt_vocab & vocab, const std::stri return tokens; } +// TODO: Calculate this constant from the vocabulary +#define MAX_TOKEN_LEN 18 +// SentencePiece implementation after https://guillaume-be.github.io/2020-05-30/sentence_piece std::vector llama_tokenize(const gpt_vocab & vocab, const std::string & text, bool bos) { - //auto res = gpt_tokenize(vocab, text); - - //if (bos) { - // res.insert(res.begin(), 1); // TODO: replace with vocab.bos - //} - std::vector res; + std::vector score; + std::vector prev; + int len = text.length(); + + score.resize(len + 1); + prev.resize(len + 1); + + // Forward pass + for (int i = 0; i < len; i++) { + int max_len = std::min(len - i, MAX_TOKEN_LEN); + for (int sub_len = 1; sub_len <= len - i; sub_len++) { + auto sub = text.substr(i, sub_len); + auto token = vocab.token_to_id.find(sub); + if (token != vocab.token_to_id.end()) { + int token_score = sub.length() * sub.length(); + int local_score = score[i] + token_score; + int next = i + sub_len; + if (score[next] < local_score) { + score[next] = local_score; + prev[next] = (*token).second; + } + } + } + } + + // Backward pass + int i = len; + while (i > 0) { + gpt_vocab::id token_id = prev[i]; + if (token_id == 0) { + // TODO: Return error or something more meaningful + printf("failed to tokenize string!\n"); + break; + } + res.push_back(token_id); + auto token = (*vocab.id_to_token.find(token_id)).second; + i -= token.length(); + } if (bos) { res.push_back(1); // TODO: replace with vocab.bos } - //find the longest token that matches the text - int pos = 0; - while (true) { - int l = 0; - int t = 0; - for (const auto & kv : vocab.id_to_token) { - if (kv.second.size() < l) continue; - if (kv.second.size() > text.size() - pos) continue; - if (text.substr(pos, kv.second.size()) == kv.second) { - l = kv.second.size(); - t = kv.first; - } - } - - if (l == 0) { - break; - } - - res.push_back(t); - pos += l; - } + // Pieces are in reverse order so correct that + std::reverse(res.begin(), res.end()); return res; } From a29274789309029fd88a9465e6d0832d4632272b Mon Sep 17 00:00:00 2001 From: Niklas Korz Date: Fri, 17 Mar 2023 23:03:48 +0100 Subject: [PATCH 20/93] Nix flake (#40) * Nix flake * Nix: only add Accelerate framework on macOS * Nix: development shel, direnv and compatibility * Nix: use python packages supplied by withPackages * Nix: remove channel compatibility * Nix: fix ARM neon dotproduct on macOS --------- Co-authored-by: Pavol Rusnak --- .gitignore | 4 ++++ flake.lock | 43 +++++++++++++++++++++++++++++++++++++++++++ flake.nix | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 95 insertions(+) create mode 100644 flake.lock create mode 100644 flake.nix diff --git a/.gitignore b/.gitignore index 5eb1ff1b8..3087b0ea5 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,10 @@ models/* /main /quantize +/result arm_neon.h compile_commands.json + +.envrc +.direnv/ diff --git a/flake.lock b/flake.lock new file mode 100644 index 000000000..343996da1 --- /dev/null +++ b/flake.lock @@ -0,0 +1,43 @@ +{ + "nodes": { + "flake-utils": { + "locked": { + "lastModified": 1676283394, + "narHash": "sha256-XX2f9c3iySLCw54rJ/CZs+ZK6IQy7GXNY4nSOyu2QG4=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "3db36a8b464d0c4532ba1c7dda728f4576d6d073", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1678470307, + "narHash": "sha256-OEeMUr3ueLIXyW/OaFUX5jUdimyQwMg/7e+/Q0gC/QE=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "0c4800d579af4ed98ecc47d464a5e7b0870c4b1f", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 000000000..dae4ff60f --- /dev/null +++ b/flake.nix @@ -0,0 +1,48 @@ +{ + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + flake-utils.url = "github:numtide/flake-utils"; + }; + outputs = { self, nixpkgs, flake-utils }: + flake-utils.lib.eachDefaultSystem (system: + let + pkgs = import nixpkgs { + inherit system; + }; + llama-python = pkgs.python310.withPackages (ps: with ps; [ + torch + numpy + sentencepiece + ]); + in + { + packages.default = pkgs.stdenv.mkDerivation { + name = "llama.cpp"; + src = ./.; + nativeBuildInputs = with pkgs; [ cmake ]; + buildInputs = with pkgs; lib.optionals stdenv.isDarwin [ + darwin.apple_sdk.frameworks.Accelerate + ]; + cmakeFlags = with pkgs; lib.optionals (system == "aarch64-darwin") [ + "-DCMAKE_C_FLAGS=-D__ARM_FEATURE_DOTPROD=1" + ]; + installPhase = '' + mkdir -p $out/bin + mv llama $out/bin/llama + mv quantize $out/bin/quantize + echo "#!${llama-python}/bin/python" > $out/bin/convert-pth-to-ggml + cat ${./convert-pth-to-ggml.py} >> $out/bin/convert-pth-to-ggml + chmod +x $out/bin/convert-pth-to-ggml + ''; + }; + devShells.default = pkgs.mkShell { + packages = with pkgs; [ + cmake + llama-python + ] ++ lib.optionals stdenv.isDarwin [ + darwin.apple_sdk.frameworks.Accelerate + ]; + }; + } + ); +} From b2de7f18dfbb93463eeb5b4392117bbe82d5bd1b Mon Sep 17 00:00:00 2001 From: anzz1 Date: Sat, 18 Mar 2023 09:27:12 +0200 Subject: [PATCH 21/93] CI Improvements (#230) * CI Improvements Manual build feature, autoreleases for Windows * better CI naming convention use branch name in releases and tags --- .github/workflows/build.yml | 57 ++++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a94a38991..9c1de5823 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,5 +1,20 @@ name: CI -on: [push, pull_request] + +on: + workflow_dispatch: # allows manual triggering + inputs: + create_release: + description: 'Create new release' + required: true + type: boolean + push: + paths: ['.github/workflows/**', 'CMakeLists.txt', 'Makefile', '**.h', '*.c', '**.cpp'] + pull_request: + types: [opened, synchronize, edited, reopened, review_requested, ready_for_review] + paths: ['CMakeLists.txt', 'Makefile', '**.h', '*.c', '**.cpp'] + +env: + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} jobs: ubuntu-latest-make: @@ -7,14 +22,17 @@ jobs: steps: - name: Clone + id: checkout uses: actions/checkout@v1 - name: Dependencies + id: depends run: | sudo apt-get update sudo apt-get install build-essential - name: Build + id: make_build run: | make @@ -42,13 +60,16 @@ jobs: steps: - name: Clone + id: checkout uses: actions/checkout@v1 - name: Dependencies + id: depends run: | brew update - name: Build + id: make_build run: | make @@ -75,15 +96,49 @@ jobs: steps: - name: Clone + id: checkout uses: actions/checkout@v1 - name: Build + id: cmake_build run: | mkdir build cd build cmake .. cmake --build . --config Release + - name: Get commit hash + id: commit + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + uses: pr-mpt/actions-commit-hash@v2 + + - name: Pack artifacts + id: pack_artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + run: | + 7z a llama-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-x64.zip .\build\Release\* + + - name: Create release + id: create_release + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + uses: zendesk/action-create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }} + + - name: Upload release + id: upload_release + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ steps.create_release.outputs.upload_url }} + asset_path: .\llama-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-x64.zip + asset_name: llama-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-x64.zip + asset_content_type: application/octet-stream + # ubuntu-latest-gcc: # runs-on: ubuntu-latest # From a81d0c2a171a4446e6a21a3ec74a0c0768d71184 Mon Sep 17 00:00:00 2001 From: Gary Linscott Date: Sat, 18 Mar 2023 04:17:19 -0700 Subject: [PATCH 22/93] Fix n^2 loop in tokenization (#254) This causes long prompts to parse very slowly. --- utils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils.cpp b/utils.cpp index 22ef59377..efa2e3c35 100644 --- a/utils.cpp +++ b/utils.cpp @@ -302,7 +302,7 @@ std::vector llama_tokenize(const gpt_vocab & vocab, const std::st // Forward pass for (int i = 0; i < len; i++) { int max_len = std::min(len - i, MAX_TOKEN_LEN); - for (int sub_len = 1; sub_len <= len - i; sub_len++) { + for (int sub_len = 1; sub_len <= max_len; sub_len++) { auto sub = text.substr(i, sub_len); auto token = vocab.token_to_id.find(sub); if (token != vocab.token_to_id.end()) { From e03e359730c127f888fcf00e93375771bc0a3500 Mon Sep 17 00:00:00 2001 From: Justin Suess Date: Sat, 18 Mar 2023 07:44:09 -0400 Subject: [PATCH 23/93] fixed warning with std::ignore about unused function result (#151) fixed warning with std::ignore about unused function result --- main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.cpp b/main.cpp index 39c5d7b76..eb78fe5ab 100644 --- a/main.cpp +++ b/main.cpp @@ -1011,7 +1011,7 @@ int main(int argc, char ** argv) { if(params.use_color) printf(ANSI_BOLD ANSI_COLOR_GREEN); if (scanf("%255[^\n]%n%*c", buf, &n_read) <= 0) { // presumable empty line, consume the newline - scanf("%*c"); + std::ignore = scanf("%*c"); n_read=0; } if(params.use_color) printf(ANSI_COLOR_RESET); From d3f202d57b694376cef6f381a6b6901825c3f6d9 Mon Sep 17 00:00:00 2001 From: Alex Nguyen Date: Sat, 18 Mar 2023 20:51:49 +0700 Subject: [PATCH 24/93] Remove unused code since n_vocab is model.hparams.n_vocab (#262) --- main.cpp | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/main.cpp b/main.cpp index eb78fe5ab..c88405b82 100644 --- a/main.cpp +++ b/main.cpp @@ -143,16 +143,8 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab // load vocab { - const int32_t n_vocab = model.hparams.n_vocab; - - if (n_vocab != model.hparams.n_vocab) { - fprintf(stderr, "%s: invalid model file '%s' (bad vocab size %d != %d)\n", - __func__, fname.c_str(), n_vocab, model.hparams.n_vocab); - return false; - } - std::string word; - for (int i = 0; i < n_vocab; i++) { + for (int i = 0; i < model.hparams.n_vocab; i++) { uint32_t len; fin.read((char *) &len, sizeof(len)); From 554b54152145c30618bac171efb712cf4a7d1e96 Mon Sep 17 00:00:00 2001 From: Pavol Rusnak Date: Sat, 18 Mar 2023 21:58:46 +0100 Subject: [PATCH 25/93] Add memory/disk requirements to readme --- README.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 808d54e89..fc8b2fda3 100644 --- a/README.md +++ b/README.md @@ -155,7 +155,17 @@ python3 convert-pth-to-ggml.py models/7B/ 1 When running the larger models, make sure you have enough disk space to store all the intermediate files. -TODO: add model disk/mem requirements +### Memory/Disk Requirements + +As the models are currently fully loaded into memory, you will need adequate disk space to save them +and sufficient RAM to load them. At the moment, memory and disk requirements are the same. + +| model | original size | quantized size (4-bit) | +|-------|---------------|------------------------| +| 7B | 13 GB | 3.9 GB | +| 15B | 24 GB | 7.8 GB | +| 30B | 60 GB | 19.5 GB | +| 65B | 120 GB | 38.5 GB | ### Interactive mode From 1e5a6d088d0f3a967c6e86298a756daec9e8df12 Mon Sep 17 00:00:00 2001 From: Pavol Rusnak Date: Sat, 18 Mar 2023 22:20:04 +0100 Subject: [PATCH 26/93] Add note about Python 3.11 to readme --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index fc8b2fda3..187f82f61 100644 --- a/README.md +++ b/README.md @@ -153,6 +153,8 @@ python3 convert-pth-to-ggml.py models/7B/ 1 ./main -m ./models/7B/ggml-model-q4_0.bin -n 128 ``` +Currently, it's best to use Python 3.9 or Python 3.10, as `sentencepiece` has not yet published a wheel for Python 3.11. + When running the larger models, make sure you have enough disk space to store all the intermediate files. ### Memory/Disk Requirements From 6f61c18ec9a30416e21ed5abfb1321bdb14979be Mon Sep 17 00:00:00 2001 From: Pavol Rusnak Date: Sat, 18 Mar 2023 22:39:46 +0100 Subject: [PATCH 27/93] Fix typo in readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 187f82f61..1fe5b5426 100644 --- a/README.md +++ b/README.md @@ -165,7 +165,7 @@ and sufficient RAM to load them. At the moment, memory and disk requirements are | model | original size | quantized size (4-bit) | |-------|---------------|------------------------| | 7B | 13 GB | 3.9 GB | -| 15B | 24 GB | 7.8 GB | +| 13B | 24 GB | 7.8 GB | | 30B | 60 GB | 19.5 GB | | 65B | 120 GB | 38.5 GB | From d7def1a7524f712e5ebb7cd02bab0f13aa56a7f9 Mon Sep 17 00:00:00 2001 From: Ronsor Date: Sat, 18 Mar 2023 17:10:47 -0700 Subject: [PATCH 28/93] Warn user if a context size greater than 2048 tokens is specified (#274) LLaMA doesn't support more than 2048 token context sizes, and going above that produces terrible results. --- main.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/main.cpp b/main.cpp index c88405b82..105dd91ee 100644 --- a/main.cpp +++ b/main.cpp @@ -792,6 +792,11 @@ int main(int argc, char ** argv) { if (gpt_params_parse(argc, argv, params) == false) { return 1; } + + if (params.n_ctx > 2048) { + fprintf(stderr, "%s: warning: model does not support context sizes greater than 2048 tokens (%d specified);" + "expect poor results\n", __func__, params.n_ctx); + } if (params.seed < 0) { params.seed = time(NULL); From 22213a17b56336bbea384a572a9484ce208c0333 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 19 Mar 2023 17:30:00 +0200 Subject: [PATCH 29/93] Change RMSNorm eps to 1e-6 (#173) I think this is what is used in the Python code --- ggml.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml.c b/ggml.c index 4fb83adbd..4813f74c8 100644 --- a/ggml.c +++ b/ggml.c @@ -5556,7 +5556,7 @@ static void ggml_compute_forward_rms_norm_f32( const size_t nb2 = dst->nb[2]; const size_t nb3 = dst->nb[3]; - const ggml_float eps = 1e-5f; // TODO: make this a parameter + const ggml_float eps = 1e-6f; // TODO: make this a parameter // TODO: optimize for (int i03 = 0; i03 < ne03; i03++) { @@ -5572,7 +5572,7 @@ static void ggml_compute_forward_rms_norm_f32( mean /= ne00; float * y = (float *) ((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3); - + memcpy(y, x, ne00 * sizeof(float)); // for (int i00 = 0; i00 < ne00; i00++) { // y[i00] = x[i00]; From 9e1707218a24ff758c7b623594f8c0ce5e12eb6c Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 19 Mar 2023 18:37:02 +0200 Subject: [PATCH 30/93] Add "--instruct" argument for usage with Alpaca (#240) Also start adding prompts in "./prompts" --- main.cpp | 70 +++++++++++++++++++++++++-------------- prompts/alpaca.txt | 1 + prompts/chat-with-bob.txt | 7 ++++ utils.cpp | 18 ++++------ utils.h | 8 ++--- 5 files changed, 64 insertions(+), 40 deletions(-) create mode 100644 prompts/alpaca.txt create mode 100644 prompts/chat-with-bob.txt diff --git a/main.cpp b/main.cpp index 105dd91ee..a95e2e721 100644 --- a/main.cpp +++ b/main.cpp @@ -176,8 +176,6 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab } } - const ggml_type wtype2 = GGML_TYPE_F32; - auto & ctx = model.ctx; size_t ctx_size = 0; @@ -237,7 +235,6 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab const int n_embd = hparams.n_embd; const int n_layer = hparams.n_layer; - const int n_ctx = hparams.n_ctx; const int n_vocab = hparams.n_vocab; model.layers.resize(n_layer); @@ -539,9 +536,7 @@ bool llama_eval( const int n_vocab = hparams.n_vocab; const int n_rot = hparams.n_embd/hparams.n_head; - const int d_key = n_embd/n_head; - - // TODO: check if this size scales with n_ctx linearly and remove constant. somehow I feel it wasn't the case + // TODO: check if this size scales with n_ctx linearly and remove constant. somehow I feel it wasn't the case // static size_t buf_size = hparams.n_ctx*1024*1024; static size_t buf_size = 512u*1024*1024; static void * buf = malloc(buf_size); @@ -792,7 +787,7 @@ int main(int argc, char ** argv) { if (gpt_params_parse(argc, argv, params) == false) { return 1; } - + if (params.n_ctx > 2048) { fprintf(stderr, "%s: warning: model does not support context sizes greater than 2048 tokens (%d specified);" "expect poor results\n", __func__, params.n_ctx); @@ -820,7 +815,7 @@ int main(int argc, char ** argv) { // load the model { const int64_t t_start_us = ggml_time_us(); - if (!llama_model_load(params.model, model, vocab, params.n_ctx)) { + if (!llama_model_load(params.model, model, vocab, params.n_ctx)) { fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str()); return 1; } @@ -849,9 +844,25 @@ int main(int argc, char ** argv) { params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size()); + // prefix & suffix for instruct mode + const std::vector inp_pfx = ::llama_tokenize(vocab, "\n\n### Instruction:\n\n", true); + const std::vector inp_sfx = ::llama_tokenize(vocab, "\n\n### Response:\n\n", false); + + // in instruct mode, we inject a prefix and a suffix to each input by the user + if (params.instruct) { + fprintf(stderr, "== Instruction mode enabled ==\n"); + params.interactive = true; + params.antiprompt = "### Instruction:\n\n"; + } + // tokenize the reverse prompt std::vector antiprompt_inp = ::llama_tokenize(vocab, params.antiprompt, false); + // enable interactive mode if reverse prompt is specified + if (!antiprompt_inp.empty()) { + params.interactive = true; + } + fprintf(stderr, "\n"); fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str()); fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size()); @@ -872,7 +883,7 @@ int main(int argc, char ** argv) { fprintf(stderr, "%s: interactive mode on.\n", __func__); - if(antiprompt_inp.size()) { + if (antiprompt_inp.size()) { fprintf(stderr, "%s: reverse prompt: '%s'\n", __func__, params.antiprompt.c_str()); fprintf(stderr, "%s: number of tokens in reverse prompt = %zu\n", __func__, antiprompt_inp.size()); for (int i = 0; i < (int) antiprompt_inp.size(); i++) { @@ -894,31 +905,27 @@ int main(int argc, char ** argv) { std::vector last_n_tokens(last_n_size); std::fill(last_n_tokens.begin(), last_n_tokens.end(), 0); - if (params.interactive) { fprintf(stderr, "== Running in interactive mode. ==\n" #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32) " - Press Ctrl+C to interject at any time.\n" #endif " - Press Return to return control to LLaMa.\n" - " - If you want to submit another line, end your input in '\\'.\n"); + " - If you want to submit another line, end your input in '\\'.\n\n"); + is_interacting = true; } - int remaining_tokens = params.n_predict; int input_consumed = 0; bool input_noecho = false; - // prompt user immediately after the starting prompt has been loaded - if (params.interactive_start) { - is_interacting = true; - } + int remaining_tokens = params.n_predict; // set the color for the prompt which will be output initially if (params.use_color) { printf(ANSI_COLOR_YELLOW); } - while (remaining_tokens > 0) { + while (remaining_tokens > 0 || params.interactive) { // predict if (embd.size() > 0) { const int64_t t_start_us = ggml_time_us(); @@ -971,13 +978,13 @@ int main(int argc, char ** argv) { last_n_tokens.erase(last_n_tokens.begin()); last_n_tokens.push_back(embd_inp[input_consumed]); ++input_consumed; - if (embd.size() > params.n_batch) { + if ((int) embd.size() > params.n_batch) { break; } } // reset color to default if we there is no pending user input - if (!input_noecho && params.use_color && embd_inp.size() == input_consumed) { + if (!input_noecho && params.use_color && (int) embd_inp.size() == input_consumed) { printf(ANSI_COLOR_RESET); } } @@ -999,19 +1006,26 @@ int main(int argc, char ** argv) { is_interacting = true; } if (is_interacting) { + if (params.instruct) { + input_consumed = embd_inp.size(); + embd_inp.insert(embd_inp.end(), inp_pfx.begin(), inp_pfx.end()); + + printf("\n> "); + } + // currently being interactive - bool another_line=true; + bool another_line = true; while (another_line) { fflush(stdout); char buf[256] = {0}; int n_read; - if(params.use_color) printf(ANSI_BOLD ANSI_COLOR_GREEN); + if (params.use_color) printf(ANSI_BOLD ANSI_COLOR_GREEN); if (scanf("%255[^\n]%n%*c", buf, &n_read) <= 0) { // presumable empty line, consume the newline std::ignore = scanf("%*c"); n_read=0; } - if(params.use_color) printf(ANSI_COLOR_RESET); + if (params.use_color) printf(ANSI_COLOR_RESET); if (n_read > 0 && buf[n_read-1]=='\\') { another_line = true; @@ -1026,6 +1040,10 @@ int main(int argc, char ** argv) { std::vector line_inp = ::llama_tokenize(vocab, buf, false); embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end()); + if (params.instruct) { + embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end()); + } + remaining_tokens -= line_inp.size(); input_noecho = true; // do not echo this again @@ -1037,8 +1055,12 @@ int main(int argc, char ** argv) { // end of text token if (embd.back() == 2) { - fprintf(stderr, " [end of text]\n"); - break; + if (params.interactive) { + is_interacting = true; + } else { + fprintf(stderr, " [end of text]\n"); + break; + } } } diff --git a/prompts/alpaca.txt b/prompts/alpaca.txt new file mode 100644 index 000000000..2224bdeb0 --- /dev/null +++ b/prompts/alpaca.txt @@ -0,0 +1 @@ +Below is an instruction that describes a task. Write a response that appropriately completes the request. diff --git a/prompts/chat-with-bob.txt b/prompts/chat-with-bob.txt new file mode 100644 index 000000000..009da39ae --- /dev/null +++ b/prompts/chat-with-bob.txt @@ -0,0 +1,7 @@ +Transcript of a dialog, where the User interacts with an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision. + +User: Hello, Bob. +Bob: Hello. How may I help you today? +User: Please tell me the largest city in Europe. +Bob: Sure. The largest city in Europe is Moscow, the capital of Russia. +User: diff --git a/utils.cpp b/utils.cpp index efa2e3c35..be81c6cd0 100644 --- a/utils.cpp +++ b/utils.cpp @@ -38,13 +38,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { } else if (arg == "-p" || arg == "--prompt") { params.prompt = argv[++i]; } else if (arg == "-f" || arg == "--file") { - std::ifstream file(argv[++i]); - - std::copy(std::istreambuf_iterator(file), - std::istreambuf_iterator(), - back_inserter(params.prompt)); - + std::copy(std::istreambuf_iterator(file), std::istreambuf_iterator(), back_inserter(params.prompt)); } else if (arg == "-n" || arg == "--n_predict") { params.n_predict = std::stoi(argv[++i]); } else if (arg == "--top_k") { @@ -65,9 +60,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { params.model = argv[++i]; } else if (arg == "-i" || arg == "--interactive") { params.interactive = true; - } else if (arg == "--interactive-start") { - params.interactive = true; - params.interactive_start = true; + } else if (arg == "-ins" || arg == "--instruct") { + params.instruct = true; } else if (arg == "--color") { params.use_color = true; } else if (arg == "-r" || arg == "--reverse-prompt") { @@ -85,13 +79,13 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { return true; } -void gpt_print_usage(int argc, char ** argv, const gpt_params & params) { +void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { fprintf(stderr, "usage: %s [options]\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, "options:\n"); fprintf(stderr, " -h, --help show this help message and exit\n"); fprintf(stderr, " -i, --interactive run in interactive mode\n"); - fprintf(stderr, " --interactive-start run in interactive mode and poll user input at startup\n"); + fprintf(stderr, " -ins, --instruct run in instruction mode (use with Alpaca models)\n"); fprintf(stderr, " -r PROMPT, --reverse-prompt PROMPT\n"); fprintf(stderr, " in interactive mode, poll user input upon seeing PROMPT\n"); fprintf(stderr, " --color colorise output to distinguish prompt and user input from generations\n"); @@ -398,7 +392,7 @@ gpt_vocab::id llama_sample_top_p_top_k( logits_id.push_back(std::make_pair(logits[i]*scale*repeat_penalty, i)); } else { logits_id.push_back(std::make_pair(logits[i]*scale/repeat_penalty, i)); - } + } } else { logits_id.push_back(std::make_pair(logits[i]*scale, i)); } diff --git a/utils.h b/utils.h index c1a8498a7..e329ba168 100644 --- a/utils.h +++ b/utils.h @@ -27,14 +27,14 @@ struct gpt_params { int32_t n_batch = 8; // batch size for prompt processing - std::string model = "models/lamma-7B/ggml-model.bin"; // model path - std::string prompt; + std::string model = "models/lamma-7B/ggml-model.bin"; // model path + std::string prompt = ""; + std::string antiprompt = ""; // string upon seeing which more user input is prompted bool use_color = false; // use color to distinguish generations and inputs bool interactive = false; // interactive mode - bool interactive_start = false; // reverse prompt immediately - std::string antiprompt = ""; // string upon seeing which more user input is prompted + bool instruct = false; // instruction mode (used for Alpaca models) }; bool gpt_params_parse(int argc, char ** argv, gpt_params & params); From a4e63b73dfa1894387926cc8072b5f36deebf0a5 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 19 Mar 2023 18:49:50 +0200 Subject: [PATCH 31/93] Add instruction for using Alpaca (#240) --- README.md | 48 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 1fe5b5426..65325fc7f 100644 --- a/README.md +++ b/README.md @@ -176,21 +176,51 @@ In this mode, you can always interrupt generation by pressing Ctrl+C and enter o Here is an example few-shot interaction, invoked with the command ``` -./main -m ./models/13B/ggml-model-q4_0.bin -n 256 --repeat_penalty 1.0 --color -i -r "User:" \ - -p \ -"Transcript of a dialog, where the User interacts with an Assistant named Bob. Bob is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision. - -User: Hello, Bob. -Bob: Hello. How may I help you today? -User: Please tell me the largest city in Europe. -Bob: Sure. The largest city in Europe is Moscow, the capital of Russia. -User:" +./main -m ./models/13B/ggml-model-q4_0.bin -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-bob.txt ``` Note the use of `--color` to distinguish between user input and generated text. ![image](https://user-images.githubusercontent.com/1991296/224575029-2af3c7dc-5a65-4f64-a6bb-517a532aea38.png) +### Instruction mode with Alpaca + +First, download the `ggml` Alpaca model into the `./models` folder: + +``` +# use one of these +# NOTE: these are copied from the alpaca.cpp repo - not sure how long these will work +# TODO: add a script to simplify the download +curl -o ggml-alpaca-7b-q4.bin -C - https://gateway.estuary.tech/gw/ipfs/QmQ1bf2BTnYxq73MFJWu1B7bQ2UD6qG7D7YDCxhTndVkPC +curl -o ggml-alpaca-7b-q4.bin -C - https://ipfs.io/ipfs/QmQ1bf2BTnYxq73MFJWu1B7bQ2UD6qG7D7YDCxhTndVkPC +curl -o ggml-alpaca-7b-q4.bin -C - https://cloudflare-ipfs.com/ipfs/QmQ1bf2BTnYxq73MFJWu1B7bQ2UD6qG7D7YDCxhTndVkPC +``` + +Now run the `main` tool like this: + +``` +./main -m ./models/ggml-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins +``` + +Sample run: + +``` +== Running in interactive mode. == + - Press Ctrl+C to interject at any time. + - Press Return to return control to LLaMa. + - If you want to submit another line, end your input in '\'. + + Below is an instruction that describes a task. Write a response that appropriately completes the request. + +> How many letters are there in the English alphabet? +There 26 letters in the English Alphabet +> What is the most common way of transportation in Amsterdam? +The majority (54%) are using public transit. This includes buses, trams and metros with over 100 lines throughout the city which make it very accessible for tourists to navigate around town as well as locals who commute by tram or metro on a daily basis +> List 5 words that start with "ca". +cadaver, cauliflower, cabbage (vegetable), catalpa (tree) and Cailleach. +> +``` + ### Android You can easily run `llama.cpp` on Android device with [termux](https://play.google.com/store/apps/details?id=com.termux). From 70f01cb8632f73b5cf70428608b89cd3c0775d23 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 19 Mar 2023 19:04:44 +0200 Subject: [PATCH 32/93] Drop trailing new line from file prompts (#80) --- main.cpp | 1 - utils.cpp | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/main.cpp b/main.cpp index a95e2e721..2a07bbb40 100644 --- a/main.cpp +++ b/main.cpp @@ -850,7 +850,6 @@ int main(int argc, char ** argv) { // in instruct mode, we inject a prefix and a suffix to each input by the user if (params.instruct) { - fprintf(stderr, "== Instruction mode enabled ==\n"); params.interactive = true; params.antiprompt = "### Instruction:\n\n"; } diff --git a/utils.cpp b/utils.cpp index be81c6cd0..320d7c31c 100644 --- a/utils.cpp +++ b/utils.cpp @@ -40,6 +40,9 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { } else if (arg == "-f" || arg == "--file") { std::ifstream file(argv[++i]); std::copy(std::istreambuf_iterator(file), std::istreambuf_iterator(), back_inserter(params.prompt)); + if (params.prompt.back() == '\n') { + params.prompt.pop_back(); + } } else if (arg == "-n" || arg == "--n_predict") { params.n_predict = std::stoi(argv[++i]); } else if (arg == "--top_k") { From 467b149761cd63248b00d6ffb204d50a4cbb451a Mon Sep 17 00:00:00 2001 From: qunash Date: Sun, 19 Mar 2023 20:17:39 +0300 Subject: [PATCH 33/93] Refactoring `convert-pth-to-ggml.py`: more concise and readable (#109) * Refactor get_n_parts function to simplify code and improve readability * Use f-strings instead of concatenation * Refactoring: more concise and readable * modularize --------- Co-authored-by: Georgi Gerganov --- convert-pth-to-ggml.py | 192 ++++++++++++++++++----------------------- 1 file changed, 84 insertions(+), 108 deletions(-) diff --git a/convert-pth-to-ggml.py b/convert-pth-to-ggml.py index d0eb213c8..819487657 100644 --- a/convert-pth-to-ggml.py +++ b/convert-pth-to-ggml.py @@ -16,7 +16,7 @@ # At the start of the ggml file we write the model parameters # and vocabulary. # -import os +import argparse import sys import json import struct @@ -24,137 +24,91 @@ import numpy as np import torch from sentencepiece import SentencePieceProcessor -if len(sys.argv) < 3: - print("Usage: convert-ckpt-to-ggml.py dir-model ftype\n") - print(" ftype == 0 -> float32") - print(" ftype == 1 -> float16") - sys.exit(1) +def parse_args(): -# output in the same directory as the model -dir_model = sys.argv[1] - -fname_hparams = sys.argv[1] + "/params.json" -fname_tokenizer = sys.argv[1] + "/../tokenizer.model" + parser = argparse.ArgumentParser(description='Convert a LLaMA model checkpoint to a ggml compatible file') + parser.add_argument('dir_model', help='directory containing the model checkpoint') + parser.add_argument('ftype', type=int, choices=[0, 1], default=1, help='file type (0: float32, 1: float16)') + return parser.parse_args() def get_n_parts(dim): - if dim == 4096: - return 1 - elif dim == 5120: - return 2 - elif dim == 6656: - return 4 - elif dim == 8192: - return 8 - else: - print("Invalid dim: " + str(dim)) + + mappings = {4096: 1, 5120: 2, 6656: 4, 8192: 8} + n_parts = mappings.get(dim) + if n_parts is None: + print(f"Invalid dim: {dim}") sys.exit(1) -# possible data types -# ftype == 0 -> float32 -# ftype == 1 -> float16 -# -# map from ftype to string -ftype_str = ["f32", "f16"] + print(f"n_parts = {n_parts}\n") + return n_parts -ftype = 1 -if len(sys.argv) > 2: - ftype = int(sys.argv[2]) - if ftype < 0 or ftype > 1: - print("Invalid ftype: " + str(ftype)) - sys.exit(1) - fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".bin" +def load_hparams_and_tokenizer(dir_model): + + fname_hparams = f"{dir_model}/params.json" + fname_tokenizer = f"{dir_model}/../tokenizer.model" -if os.path.exists(fname_out): - print(f"Skip conversion, it already exists: {fname_out}") - sys.exit(0) + with open(fname_hparams, "r") as f: + hparams = json.load(f) + print(hparams) -with open(fname_hparams, "r") as f: - hparams = json.load(f) + tokenizer = SentencePieceProcessor(fname_tokenizer) + hparams.update({"vocab_size": tokenizer.vocab_size()}) -tokenizer = SentencePieceProcessor(fname_tokenizer) + return hparams, tokenizer -hparams.update({"vocab_size": tokenizer.vocab_size()}) +def write_header(fout, hparams, ftype): + + keys = ["vocab_size", "dim", "multiple_of", "n_heads", "n_layers"] + values = [ + 0x67676d6c, # magic: ggml in hex + *[hparams[key] for key in keys], + hparams["dim"] // hparams["n_heads"], # rot (obsolete) + ftype + ] + fout.write(struct.pack("i" * len(values), *values)) -n_parts = get_n_parts(hparams["dim"]) +def write_tokens(fout, tokenizer): -print(hparams) -print('n_parts = ', n_parts) - -for p in range(n_parts): - print('Processing part ', p) - - #fname_model = sys.argv[1] + "/consolidated.00.pth" - fname_model = sys.argv[1] + "/consolidated.0" + str(p) + ".pth" - fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".bin" - if (p > 0): - fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".bin" + "." + str(p) - - model = torch.load(fname_model, map_location="cpu") - - fout = open(fname_out, "wb") - - fout.write(struct.pack("i", 0x67676d6c)) # magic: ggml in hex - fout.write(struct.pack("i", hparams["vocab_size"])) - fout.write(struct.pack("i", hparams["dim"])) - fout.write(struct.pack("i", hparams["multiple_of"])) - fout.write(struct.pack("i", hparams["n_heads"])) - fout.write(struct.pack("i", hparams["n_layers"])) - fout.write(struct.pack("i", hparams["dim"] // hparams["n_heads"])) # rot (obsolete) - fout.write(struct.pack("i", ftype)) - - # Is this correct?? for i in range(tokenizer.vocab_size()): if tokenizer.is_unknown(i): - # "" token (translated as ??) text = " \u2047 ".encode("utf-8") - fout.write(struct.pack("i", len(text))) - fout.write(text) elif tokenizer.is_control(i): - # ""/"" tokens - fout.write(struct.pack("i", 0)) + text = b"" elif tokenizer.is_byte(i): - # "" tokens (which may be invalid UTF-8) piece = tokenizer.id_to_piece(i) if len(piece) != 6: - print("Invalid token: " + piece) + print(f"Invalid token: {piece}") sys.exit(1) byte_value = int(piece[3:-1], 16) - fout.write(struct.pack("i", 1)) - fout.write(struct.pack("B", byte_value)) + text = struct.pack("B", byte_value) else: - # normal token. Uses U+2581 (LOWER ONE EIGHTH BLOCK) to represent spaces. text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode("utf-8") - fout.write(struct.pack("i", len(text))) - fout.write(text) + fout.write(struct.pack("i", len(text))) + fout.write(text) - for k, v in model.items(): - name = k - shape = v.shape +def process_and_write_variables(fout, model, ftype): - # skip layers.X.attention.inner_attention.rope.freqs - if name[-5:] == "freqs": + for name, data in model.items(): + + if name.endswith("freqs"): continue - - print("Processing variable: " + name + " with shape: ", shape, " and type: ", v.dtype) - - #data = tf.train.load_variable(dir_model, name).squeeze() - data = v.numpy().squeeze() - n_dims = len(data.shape); + + shape = data.shape + + print(f"Processing variable: {name} with shape: {shape} and type: {data.dtype}\n") + + data = np.squeeze(data) + n_dims = len(shape) # for efficiency - transpose some matrices # "model/h.*/attn/c_attn/w" # "model/h.*/attn/c_proj/w" # "model/h.*/mlp/c_fc/w" # "model/h.*/mlp/c_proj/w" - #if name[-14:] == "/attn/c_attn/w" or \ - # name[-14:] == "/attn/c_proj/w" or \ - # name[-11:] == "/mlp/c_fc/w" or \ - # name[-13:] == "/mlp/c_proj/w": - # print(" Transposing") + #if name.endswith(("/attn/c_attn/w", "/attn/c_proj/w", "/mlp/c_fc/w", "/mlp/c_proj/w")): + # print("Transposing") # data = data.transpose() - dshape = data.shape - # default type is fp16 ftype_cur = 1 if ftype == 0 or n_dims == 1: @@ -164,18 +118,40 @@ for p in range(n_parts): # header sname = name.encode('utf-8') - fout.write(struct.pack("iii", n_dims, len(sname), ftype_cur)) - for i in range(n_dims): - fout.write(struct.pack("i", dshape[n_dims - 1 - i])) - fout.write(sname); - + fout.write(struct.pack("iii", len(data.shape), len(sname), ftype_cur)) + for dim in reversed(data.shape): + fout.write(struct.pack("i", dim)) + fout.write(sname) + # data data.tofile(fout) - # I hope this deallocates the memory .. - model = None +def main(): - fout.close() + args = parse_args() + dir_model = args.dir_model + ftype = args.ftype + ftype_str = ["f32", "f16"] - print("Done. Output file: " + fname_out + ", (part ", p, ")") - print("") + hparams, tokenizer = load_hparams_and_tokenizer(dir_model) + n_parts = get_n_parts(hparams["dim"]) + + for p in range(n_parts): + + print(f"Processing part {p}\n") + + fname_model = f"{dir_model}/consolidated.0{p}.pth" + fname_out = f"{dir_model}/ggml-model-{ftype_str[ftype]}.bin{'' if p == 0 else '.' + str(p)}" + + model = torch.load(fname_model, map_location="cpu") + + with open(fname_out, "wb") as fout: + write_header(fout, hparams, ftype) + write_tokens(fout, tokenizer) + process_and_write_variables(fout, model, ftype) + + del model + print(f"Done. Output file: {fname_out}, (part {p})\n") + +if __name__ == "__main__": + main() From c1c7026b470ced0b8a6c67e968c04bb47864def1 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 19 Mar 2023 19:33:18 +0200 Subject: [PATCH 34/93] Fix python stuff (#109) --- convert-pth-to-ggml.py | 37 ++++++++++++++----------------------- 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/convert-pth-to-ggml.py b/convert-pth-to-ggml.py index 819487657..c1941a811 100644 --- a/convert-pth-to-ggml.py +++ b/convert-pth-to-ggml.py @@ -32,7 +32,7 @@ def parse_args(): return parser.parse_args() def get_n_parts(dim): - + mappings = {4096: 1, 5120: 2, 6656: 4, 8192: 8} n_parts = mappings.get(dim) if n_parts is None: @@ -43,7 +43,7 @@ def get_n_parts(dim): return n_parts def load_hparams_and_tokenizer(dir_model): - + fname_hparams = f"{dir_model}/params.json" fname_tokenizer = f"{dir_model}/../tokenizer.model" @@ -57,7 +57,7 @@ def load_hparams_and_tokenizer(dir_model): return hparams, tokenizer def write_header(fout, hparams, ftype): - + keys = ["vocab_size", "dim", "multiple_of", "n_heads", "n_layers"] values = [ 0x67676d6c, # magic: ggml in hex @@ -88,26 +88,17 @@ def write_tokens(fout, tokenizer): def process_and_write_variables(fout, model, ftype): - for name, data in model.items(): - + for name, datao in model.items(): + if name.endswith("freqs"): continue - - shape = data.shape - - print(f"Processing variable: {name} with shape: {shape} and type: {data.dtype}\n") - - data = np.squeeze(data) - n_dims = len(shape) - # for efficiency - transpose some matrices - # "model/h.*/attn/c_attn/w" - # "model/h.*/attn/c_proj/w" - # "model/h.*/mlp/c_fc/w" - # "model/h.*/mlp/c_proj/w" - #if name.endswith(("/attn/c_attn/w", "/attn/c_proj/w", "/mlp/c_fc/w", "/mlp/c_proj/w")): - # print("Transposing") - # data = data.transpose() + shape = datao.shape + + print(f"Processing variable: {name} with shape: {shape} and type: {datao.dtype}") + + data = datao.numpy().squeeze() + n_dims = len(shape) # default type is fp16 ftype_cur = 1 @@ -122,8 +113,8 @@ def process_and_write_variables(fout, model, ftype): for dim in reversed(data.shape): fout.write(struct.pack("i", dim)) fout.write(sname) - - # data + + # data output to file data.tofile(fout) def main(): @@ -139,7 +130,7 @@ def main(): for p in range(n_parts): print(f"Processing part {p}\n") - + fname_model = f"{dir_model}/consolidated.0{p}.pth" fname_out = f"{dir_model}/ggml-model-{ftype_str[ftype]}.bin{'' if p == 0 else '.' + str(p)}" From c494ed5b94b429d3d73721235e78c9f5fa6e5652 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 19 Mar 2023 19:46:32 +0200 Subject: [PATCH 35/93] Fix off-by-one bug (#115) --- main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.cpp b/main.cpp index 2a07bbb40..e8e8df82d 100644 --- a/main.cpp +++ b/main.cpp @@ -977,7 +977,7 @@ int main(int argc, char ** argv) { last_n_tokens.erase(last_n_tokens.begin()); last_n_tokens.push_back(embd_inp[input_consumed]); ++input_consumed; - if ((int) embd.size() > params.n_batch) { + if ((int) embd.size() >= params.n_batch) { break; } } From 160bfb217da5038ccbd74438f9f16a16012d7866 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 19 Mar 2023 19:51:55 +0200 Subject: [PATCH 36/93] Update hot topics to mention Alpaca support --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 65325fc7f..504c1014b 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++ **Hot topics:** -- RMSNorm implementation / fixes: https://github.com/ggerganov/llama.cpp/issues/173 +- [Added Alpaca support](https://github.com/ggerganov/llama.cpp#instruction-mode-with-alpaca) - Cache input prompts for faster initialization: https://github.com/ggerganov/llama.cpp/issues/64 - Create a `llama.cpp` logo: https://github.com/ggerganov/llama.cpp/issues/105 From 0b366e735729327476ec31da02de3c9c9771ddfb Mon Sep 17 00:00:00 2001 From: Erik Scholz Date: Sun, 19 Mar 2023 18:57:00 +0100 Subject: [PATCH 37/93] Command line switch to use F16 for memory_k and memory_v (refactor of #154) (#294) * Use F16 for memory_k and memory_v * add command line switch to use f16 instead of f32 for memory k+v --------- Co-authored-by: Ty Everett --- main.cpp | 13 +++++++------ utils.cpp | 3 +++ utils.h | 1 + 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/main.cpp b/main.cpp index e8e8df82d..024b7e86a 100644 --- a/main.cpp +++ b/main.cpp @@ -86,7 +86,7 @@ struct llama_model { }; // load the model's weights from a file -bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab & vocab, int n_ctx) { +bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab & vocab, int n_ctx, ggml_type memory_type = GGML_TYPE_F32) { fprintf(stderr, "%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str()); std::vector f_buf(1024*1024); @@ -207,8 +207,8 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab ctx_size += n_layer*(n_ff*n_embd*ggml_type_sizef(wtype)); // w2 ctx_size += n_layer*(n_ff*n_embd*ggml_type_sizef(wtype)); // w3 - ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(GGML_TYPE_F32); // memory_k - ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(GGML_TYPE_F32); // memory_v + ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(memory_type); // memory_k + ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(memory_type); // memory_v ctx_size += (5 + 10*n_layer)*256; // object overhead @@ -293,8 +293,8 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab const int n_mem = n_layer*n_ctx; const int n_elements = n_embd*n_mem; - model.memory_k = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_elements); - model.memory_v = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_elements); + model.memory_k = ggml_new_tensor_1d(ctx, memory_type, n_elements); + model.memory_v = ggml_new_tensor_1d(ctx, memory_type, n_elements); const size_t memory_size = ggml_nbytes(model.memory_k) + ggml_nbytes(model.memory_v); @@ -814,8 +814,9 @@ int main(int argc, char ** argv) { // load the model { + const ggml_type memory_type = params.memory_f16 ? GGML_TYPE_F16 : GGML_TYPE_F32; const int64_t t_start_us = ggml_time_us(); - if (!llama_model_load(params.model, model, vocab, params.n_ctx)) { + if (!llama_model_load(params.model, model, vocab, params.n_ctx, memory_type)) { fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str()); return 1; } diff --git a/utils.cpp b/utils.cpp index 320d7c31c..99cb30bb9 100644 --- a/utils.cpp +++ b/utils.cpp @@ -49,6 +49,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { params.top_k = std::stoi(argv[++i]); } else if (arg == "-c" || arg == "--ctx_size") { params.n_ctx = std::stoi(argv[++i]); + } else if (arg == "--memory_f16") { + params.memory_f16 = true; } else if (arg == "--top_p") { params.top_p = std::stof(argv[++i]); } else if (arg == "--temp") { @@ -104,6 +106,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { fprintf(stderr, " --repeat_last_n N last n tokens to consider for penalize (default: %d)\n", params.repeat_last_n); fprintf(stderr, " --repeat_penalty N penalize repeat sequence of tokens (default: %.1f)\n", params.repeat_penalty); fprintf(stderr, " -c N, --ctx_size N size of the prompt context (default: %d)\n", params.n_ctx); + fprintf(stderr, " --memory_f16 use f16 instead of f32 for memory key+value\n"); fprintf(stderr, " --temp N temperature (default: %.1f)\n", params.temp); fprintf(stderr, " -b N, --batch_size N batch size for prompt processing (default: %d)\n", params.n_batch); fprintf(stderr, " -m FNAME, --model FNAME\n"); diff --git a/utils.h b/utils.h index e329ba168..c68e4cba8 100644 --- a/utils.h +++ b/utils.h @@ -18,6 +18,7 @@ struct gpt_params { int32_t n_predict = 128; // new tokens to predict int32_t repeat_last_n = 64; // last n tokens to penalize int32_t n_ctx = 512; //context size + bool memory_f16 = false; // use f16 instead of f32 for memory kv // sampling parameters int32_t top_k = 40; From 084e2f0ec081c929343d44b09df07ae87cd1ed32 Mon Sep 17 00:00:00 2001 From: Qingyou Meng Date: Mon, 20 Mar 2023 02:10:00 +0800 Subject: [PATCH 38/93] interactive mode: print '\n' in sigint_handler, this flush stdout thus ensure color reset. (#283) --- main.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/main.cpp b/main.cpp index 024b7e86a..8e95c23d5 100644 --- a/main.cpp +++ b/main.cpp @@ -747,6 +747,7 @@ static bool is_interacting = false; #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32) void sigint_handler(int signo) { printf(ANSI_COLOR_RESET); + printf("\n"); // this also force flush stdout. if (signo == SIGINT) { if (!is_interacting) { is_interacting=true; From 50fae10d0339f2bd639f69dd679c0201d939a265 Mon Sep 17 00:00:00 2001 From: slaren <2141330+slaren@users.noreply.github.com> Date: Sun, 19 Mar 2023 19:22:48 +0100 Subject: [PATCH 39/93] Add --ignore-eos parameter (#181) Co-authored-by: Georgi Gerganov --- main.cpp | 10 +++++++++- utils.cpp | 3 +++ utils.h | 2 ++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/main.cpp b/main.cpp index 8e95c23d5..e18105624 100644 --- a/main.cpp +++ b/main.cpp @@ -27,6 +27,8 @@ #define ANSI_COLOR_RESET "\x1b[0m" #define ANSI_BOLD "\x1b[1m" +static const int EOS_TOKEN_ID = 2; + // determine number of model parts based on the dimension static const std::map LLAMA_N_PARTS = { { 4096, 1 }, @@ -956,6 +958,11 @@ int main(int argc, char ** argv) { { const int64_t t_start_sample_us = ggml_time_us(); + if (params.ignore_eos) { + // set the logit of the eos token to zero to avoid sampling it + logits[logits.size() - n_vocab + EOS_TOKEN_ID] = 0; + } + id = llama_sample_top_p_top_k(vocab, logits.data() + (logits.size() - n_vocab), last_n_tokens, repeat_penalty, top_k, top_p, temp, rng); last_n_tokens.erase(last_n_tokens.begin()); @@ -1055,7 +1062,8 @@ int main(int argc, char ** argv) { } // end of text token - if (embd.back() == 2) { + + if (embd.back() == EOS_TOKEN_ID) { if (params.interactive) { is_interacting = true; } else { diff --git a/utils.cpp b/utils.cpp index 99cb30bb9..a4135b9fd 100644 --- a/utils.cpp +++ b/utils.cpp @@ -71,6 +71,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { params.use_color = true; } else if (arg == "-r" || arg == "--reverse-prompt") { params.antiprompt = argv[++i]; + } else if (arg == "--ignore-eos") { + params.ignore_eos = true; } else if (arg == "-h" || arg == "--help") { gpt_print_usage(argc, argv, params); exit(0); @@ -106,6 +108,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { fprintf(stderr, " --repeat_last_n N last n tokens to consider for penalize (default: %d)\n", params.repeat_last_n); fprintf(stderr, " --repeat_penalty N penalize repeat sequence of tokens (default: %.1f)\n", params.repeat_penalty); fprintf(stderr, " -c N, --ctx_size N size of the prompt context (default: %d)\n", params.n_ctx); + fprintf(stderr, " --ignore-eos ignore end of stream token and continue generating\n"); fprintf(stderr, " --memory_f16 use f16 instead of f32 for memory key+value\n"); fprintf(stderr, " --temp N temperature (default: %.1f)\n", params.temp); fprintf(stderr, " -b N, --batch_size N batch size for prompt processing (default: %d)\n", params.n_batch); diff --git a/utils.h b/utils.h index c68e4cba8..21325191a 100644 --- a/utils.h +++ b/utils.h @@ -36,6 +36,8 @@ struct gpt_params { bool interactive = false; // interactive mode bool instruct = false; // instruction mode (used for Alpaca models) + + bool ignore_eos = false; // do not stop generating after eos }; bool gpt_params_parse(int argc, char ** argv, gpt_params & params); From 368d0c8a9ebae16a20e1c8971b21ee888bdefad5 Mon Sep 17 00:00:00 2001 From: tjohnman Date: Sun, 19 Mar 2023 19:31:17 +0100 Subject: [PATCH 40/93] Respect the maximum number of tokens in interactive. (#298) Co-authored-by: Johnman Co-authored-by: Georgi Gerganov --- main.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/main.cpp b/main.cpp index e18105624..57e924950 100644 --- a/main.cpp +++ b/main.cpp @@ -1062,7 +1062,6 @@ int main(int argc, char ** argv) { } // end of text token - if (embd.back() == EOS_TOKEN_ID) { if (params.interactive) { is_interacting = true; @@ -1071,6 +1070,12 @@ int main(int argc, char ** argv) { break; } } + + // In interactive mode, respect the maximum number of tokens and drop back to user input when reached. + if (params.interactive && remaining_tokens <= 0) { + remaining_tokens = params.n_predict; + is_interacting = true; + } } #if defined (_WIN32) From ad5fd5b60cfdfbfb22b0f2bc9e9f6c9692768f8d Mon Sep 17 00:00:00 2001 From: tjohnman Date: Sun, 19 Mar 2023 19:36:19 +0100 Subject: [PATCH 41/93] Make prompt randomization optional. (#300) Co-authored-by: Johnman <> --- main.cpp | 2 +- utils.cpp | 5 ++++- utils.h | 2 ++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/main.cpp b/main.cpp index 57e924950..6c78cb04d 100644 --- a/main.cpp +++ b/main.cpp @@ -803,7 +803,7 @@ int main(int argc, char ** argv) { fprintf(stderr, "%s: seed = %d\n", __func__, params.seed); std::mt19937 rng(params.seed); - if (params.prompt.empty()) { + if (params.random_prompt) { params.prompt = gpt_random_prompt(rng); } diff --git a/utils.cpp b/utils.cpp index a4135b9fd..04840e49f 100644 --- a/utils.cpp +++ b/utils.cpp @@ -76,6 +76,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { } else if (arg == "-h" || arg == "--help") { gpt_print_usage(argc, argv, params); exit(0); + } else if (arg == "--random-prompt") { + params.random_prompt = true; } else { fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); gpt_print_usage(argc, argv, params); @@ -99,7 +101,8 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1)\n"); fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads); fprintf(stderr, " -p PROMPT, --prompt PROMPT\n"); - fprintf(stderr, " prompt to start generation with (default: random)\n"); + fprintf(stderr, " prompt to start generation with (default: empty)\n"); + fprintf(stderr, " --random-prompt start with a randomized prompt.\n"); fprintf(stderr, " -f FNAME, --file FNAME\n"); fprintf(stderr, " prompt file to start generation.\n"); fprintf(stderr, " -n N, --n_predict N number of tokens to predict (default: %d)\n", params.n_predict); diff --git a/utils.h b/utils.h index 21325191a..60ef12bbc 100644 --- a/utils.h +++ b/utils.h @@ -32,6 +32,8 @@ struct gpt_params { std::string prompt = ""; std::string antiprompt = ""; // string upon seeing which more user input is prompted + bool random_prompt = false; + bool use_color = false; // use color to distinguish generations and inputs bool interactive = false; // interactive mode From 7392f1cd2cef4dfed41f4db7c4160ab86c0dfcd9 Mon Sep 17 00:00:00 2001 From: Suaj Carrot <72162667+SuajCarrot@users.noreply.github.com> Date: Sun, 19 Mar 2023 12:38:44 -0600 Subject: [PATCH 42/93] Improved quantize script (#222) * Improved quantize script I improved the quantize script by adding error handling and allowing to select many models for quantization at once in the command line. I also converted it to Python for generalization as well as extensibility. * Fixes and improvements based on Matt's observations Fixed and improved many things in the script based on the reviews made by @mattsta. The parallelization suggestion is still to be revised, but code for it was still added (commented). * Small fixes to the previous commit * Corrected to use the original glob pattern The original Bash script uses a glob pattern to match files that have endings such as ...bin.0, ...bin.1, etc. That has been translated correctly to Python now. * Added support for Windows and updated README to use this script New code to set the name of the quantize script binary depending on the platform has been added (quantize.exe if working on Windows) and the README.md file has been updated to use this script instead of the Bash one. * Fixed a typo and removed shell=True in the subprocess.run call Fixed a typo regarding the new filenames of the quantized models and removed the shell=True parameter in the subprocess.run call as it was conflicting with the list of parameters. * Corrected previous commit * Small tweak: changed the name of the program in argparse This was making the automatic help message to be suggesting the program's usage as being literally "$ Quantization Script [arguments]". It should now be something like "$ python3 quantize.py [arguments]". --- README.md | 2 +- quantize.py | 126 ++++++++++++++++++++++++++++++++++++++++++++++++++++ quantize.sh | 15 ------- 3 files changed, 127 insertions(+), 16 deletions(-) create mode 100644 quantize.py delete mode 100755 quantize.sh diff --git a/README.md b/README.md index 504c1014b..dae1bf1b8 100644 --- a/README.md +++ b/README.md @@ -147,7 +147,7 @@ python3 -m pip install torch numpy sentencepiece python3 convert-pth-to-ggml.py models/7B/ 1 # quantize the model to 4-bits -./quantize.sh 7B +python3 quantize.py 7B # run the inference ./main -m ./models/7B/ggml-model-q4_0.bin -n 128 diff --git a/quantize.py b/quantize.py new file mode 100644 index 000000000..6320b0a26 --- /dev/null +++ b/quantize.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 + +"""Script to execute the "quantize" script on a given set of models.""" + +import subprocess +import argparse +import glob +import sys +import os + + +def main(): + """Update the quantize binary name depending on the platform and parse + the command line arguments and execute the script. + """ + + if "linux" in sys.platform or "darwin" in sys.platform: + quantize_script_binary = "quantize" + + elif "win32" in sys.platform or "cygwin" in sys.platform: + quantize_script_binary = "quantize.exe" + + else: + print("WARNING: Unknown platform. Assuming a UNIX-like OS.\n") + quantize_script_binary = "quantize" + + parser = argparse.ArgumentParser( + prog='python3 quantize.py', + description='This script quantizes the given models by applying the ' + f'"{quantize_script_binary}" script on them.' + ) + parser.add_argument( + 'models', nargs='+', choices=('7B', '13B', '30B', '65B'), + help='The models to quantize.' + ) + parser.add_argument( + '-r', '--remove-16', action='store_true', dest='remove_f16', + help='Remove the f16 model after quantizing it.' + ) + parser.add_argument( + '-m', '--models-path', dest='models_path', + default=os.path.join(os.getcwd(), "models"), + help='Specify the directory where the models are located.' + ) + parser.add_argument( + '-q', '--quantize-script-path', dest='quantize_script_path', + default=os.path.join(os.getcwd(), quantize_script_binary), + help='Specify the path to the "quantize" script.' + ) + + # TODO: Revise this code + # parser.add_argument( + # '-t', '--threads', dest='threads', type='int', + # default=os.cpu_count(), + # help='Specify the number of threads to use to quantize many models at ' + # 'once. Defaults to os.cpu_count().' + # ) + + args = parser.parse_args() + + if not os.path.isfile(args.quantize_script_path): + print( + f'The "{quantize_script_binary}" script was not found in the ' + "current location.\nIf you want to use it from another location, " + "set the --quantize-script-path argument from the command line." + ) + sys.exit(1) + + for model in args.models: + # The model is separated in various parts + # (ggml-model-f16.bin, ggml-model-f16.bin.0, ggml-model-f16.bin.1...) + f16_model_path_base = os.path.join( + args.models_path, model, "ggml-model-f16.bin" + ) + + f16_model_parts_paths = map( + lambda filename: os.path.join(f16_model_path_base, filename), + glob.glob(f"{f16_model_path_base}*") + ) + + for f16_model_part_path in f16_model_parts_paths: + if not os.path.isfile(f16_model_part_path): + print( + f"The f16 model {os.path.basename(f16_model_part_path)} " + f"was not found in {args.models_path}{os.path.sep}{model}" + ". If you want to use it from another location, set the " + "--models-path argument from the command line." + ) + sys.exit(1) + + __run_quantize_script( + args.quantize_script_path, f16_model_part_path + ) + + if args.remove_f16: + os.remove(f16_model_part_path) + + +# This was extracted to a top-level function for parallelization, if +# implemented. See https://github.com/ggerganov/llama.cpp/pull/222/commits/f8db3d6cd91bf1a1342db9d29e3092bc12dd783c#r1140496406 + +def __run_quantize_script(script_path, f16_model_part_path): + """Run the quantize script specifying the path to it and the path to the + f16 model to quantize. + """ + + new_quantized_model_path = f16_model_part_path.replace("f16", "q4_0") + subprocess.run( + [script_path, f16_model_part_path, new_quantized_model_path, "2"], + check=True + ) + + +if __name__ == "__main__": + try: + main() + + except subprocess.CalledProcessError: + print("\nAn error ocurred while trying to quantize the models.") + sys.exit(1) + + except KeyboardInterrupt: + sys.exit(0) + + else: + print("\nSuccesfully quantized all models.") diff --git a/quantize.sh b/quantize.sh deleted file mode 100755 index 6194649b3..000000000 --- a/quantize.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env bash - -if ! [[ "$1" =~ ^[0-9]{1,2}B$ ]]; then - echo - echo "Usage: quantize.sh 7B|13B|30B|65B [--remove-f16]" - echo - exit 1 -fi - -for i in `ls models/$1/ggml-model-f16.bin*`; do - ./quantize "$i" "${i/f16/q4_0}" 2 - if [[ "$2" == "--remove-f16" ]]; then - rm "$i" - fi -done From 24568371ae0d7caf85164abe4753f36a7dba0288 Mon Sep 17 00:00:00 2001 From: tjohnman Date: Sun, 19 Mar 2023 20:33:06 +0100 Subject: [PATCH 43/93] Support for multiple reverse prompts. (#299) Co-authored-by: Johnman <> Co-authored-by: Johnman --- main.cpp | 34 ++++++++++++++++++++++------------ utils.cpp | 5 +++-- utils.h | 4 ++-- 3 files changed, 27 insertions(+), 16 deletions(-) diff --git a/main.cpp b/main.cpp index 6c78cb04d..38d11924d 100644 --- a/main.cpp +++ b/main.cpp @@ -855,14 +855,18 @@ int main(int argc, char ** argv) { // in instruct mode, we inject a prefix and a suffix to each input by the user if (params.instruct) { params.interactive = true; - params.antiprompt = "### Instruction:\n\n"; + params.antiprompt.push_back("### Instruction:\n\n"); } // tokenize the reverse prompt - std::vector antiprompt_inp = ::llama_tokenize(vocab, params.antiprompt, false); + std::vector> antipromptv_inp; + + for (auto antiprompt : params.antiprompt) { + antipromptv_inp.push_back(::llama_tokenize(vocab, antiprompt, false)); + } // enable interactive mode if reverse prompt is specified - if (!antiprompt_inp.empty()) { + if (!antipromptv_inp.size()) { params.interactive = true; } @@ -886,13 +890,16 @@ int main(int argc, char ** argv) { fprintf(stderr, "%s: interactive mode on.\n", __func__); - if (antiprompt_inp.size()) { - fprintf(stderr, "%s: reverse prompt: '%s'\n", __func__, params.antiprompt.c_str()); - fprintf(stderr, "%s: number of tokens in reverse prompt = %zu\n", __func__, antiprompt_inp.size()); - for (int i = 0; i < (int) antiprompt_inp.size(); i++) { - fprintf(stderr, "%6d -> '%s'\n", antiprompt_inp[i], vocab.id_to_token.at(antiprompt_inp[i]).c_str()); + if(antipromptv_inp.size()) { + for (size_t apindex = 0; apindex < antipromptv_inp.size(); ++apindex) { + auto antiprompt_inp = antipromptv_inp.at(apindex); + fprintf(stderr, "%s: reverse prompt: '%s'\n", __func__, params.antiprompt.at(apindex).c_str()); + fprintf(stderr, "%s: number of tokens in reverse prompt = %zu\n", __func__, antiprompt_inp.size()); + for (int i = 0; i < (int) antiprompt_inp.size(); i++) { + fprintf(stderr, "%6d -> '%s'\n", antiprompt_inp[i], vocab.id_to_token.at(antiprompt_inp[i]).c_str()); + } + fprintf(stderr, "\n"); } - fprintf(stderr, "\n"); } } fprintf(stderr, "sampling parameters: temp = %f, top_k = %d, top_p = %f, repeat_last_n = %i, repeat_penalty = %f\n", params.temp, params.top_k, params.top_p, params.repeat_last_n, params.repeat_penalty); @@ -1009,9 +1016,12 @@ int main(int argc, char ** argv) { // check if we should prompt the user for more if (params.interactive && embd_inp.size() <= input_consumed) { // check for reverse prompt - if (antiprompt_inp.size() && std::equal(antiprompt_inp.rbegin(), antiprompt_inp.rend(), last_n_tokens.rbegin())) { - // reverse prompt found - is_interacting = true; + for (auto antiprompt_inp : antipromptv_inp) { + if (antiprompt_inp.size() && std::equal(antiprompt_inp.rbegin(), antiprompt_inp.rend(), last_n_tokens.rbegin())) { + // reverse prompt found + is_interacting = true; + break; + } } if (is_interacting) { if (params.instruct) { diff --git a/utils.cpp b/utils.cpp index 04840e49f..08d5c6ba6 100644 --- a/utils.cpp +++ b/utils.cpp @@ -70,7 +70,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { } else if (arg == "--color") { params.use_color = true; } else if (arg == "-r" || arg == "--reverse-prompt") { - params.antiprompt = argv[++i]; + params.antiprompt.push_back(argv[++i]); } else if (arg == "--ignore-eos") { params.ignore_eos = true; } else if (arg == "-h" || arg == "--help") { @@ -96,7 +96,8 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { fprintf(stderr, " -i, --interactive run in interactive mode\n"); fprintf(stderr, " -ins, --instruct run in instruction mode (use with Alpaca models)\n"); fprintf(stderr, " -r PROMPT, --reverse-prompt PROMPT\n"); - fprintf(stderr, " in interactive mode, poll user input upon seeing PROMPT\n"); + fprintf(stderr, " in interactive mode, poll user input upon seeing PROMPT (can be\n"); + fprintf(stderr, " specified more than once for multiple prompts).\n"); fprintf(stderr, " --color colorise output to distinguish prompt and user input from generations\n"); fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1)\n"); fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads); diff --git a/utils.h b/utils.h index 60ef12bbc..49658f7d9 100644 --- a/utils.h +++ b/utils.h @@ -30,15 +30,15 @@ struct gpt_params { std::string model = "models/lamma-7B/ggml-model.bin"; // model path std::string prompt = ""; - std::string antiprompt = ""; // string upon seeing which more user input is prompted bool random_prompt = false; bool use_color = false; // use color to distinguish generations and inputs bool interactive = false; // interactive mode + bool interactive_start = false; // reverse prompt immediately + std::vector antiprompt; // string upon seeing which more user input is prompted bool instruct = false; // instruction mode (used for Alpaca models) - bool ignore_eos = false; // do not stop generating after eos }; From 5c19c70ba631a8f5d54feb6634e0eea178911a84 Mon Sep 17 00:00:00 2001 From: Rickey Bowers Jr Date: Sun, 19 Mar 2023 13:44:30 -0600 Subject: [PATCH 44/93] fix coloring of last `n_batch` of prompt, and refactor line input (#221) * fix coloring of last `n_batch` of prompt, and refactor line input * forgot the newline that needs to be sent to the model * (per #283) try to force flush of color reset in SIGINT handler --- main.cpp | 58 +++++++++++++++++++++++--------------------------------- 1 file changed, 24 insertions(+), 34 deletions(-) diff --git a/main.cpp b/main.cpp index 38d11924d..c7186e0df 100644 --- a/main.cpp +++ b/main.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -997,11 +998,6 @@ int main(int argc, char ** argv) { break; } } - - // reset color to default if we there is no pending user input - if (!input_noecho && params.use_color && (int) embd_inp.size() == input_consumed) { - printf(ANSI_COLOR_RESET); - } } // display text @@ -1011,6 +1007,10 @@ int main(int argc, char ** argv) { } fflush(stdout); } + // reset color to default if we there is no pending user input + if (!input_noecho && params.use_color && (int)embd_inp.size() == input_consumed) { + printf(ANSI_COLOR_RESET); + } // in interactive mode, and not currently processing queued inputs; // check if we should prompt the user for more @@ -1032,43 +1032,33 @@ int main(int argc, char ** argv) { } // currently being interactive + if (params.use_color) printf(ANSI_BOLD ANSI_COLOR_GREEN); + std::string buffer; + std::string line; bool another_line = true; - while (another_line) { - fflush(stdout); - char buf[256] = {0}; - int n_read; - if (params.use_color) printf(ANSI_BOLD ANSI_COLOR_GREEN); - if (scanf("%255[^\n]%n%*c", buf, &n_read) <= 0) { - // presumable empty line, consume the newline - std::ignore = scanf("%*c"); - n_read=0; - } - if (params.use_color) printf(ANSI_COLOR_RESET); - - if (n_read > 0 && buf[n_read-1]=='\\') { - another_line = true; - buf[n_read-1] = '\n'; - buf[n_read] = 0; - } else { + do { + std::getline(std::cin, line); + if (line.empty() || line.back() != '\\') { another_line = false; - buf[n_read] = '\n'; - buf[n_read+1] = 0; + } else { + line.pop_back(); // Remove the continue character } + buffer += line + '\n'; // Append the line to the result + } while (another_line); + if (params.use_color) printf(ANSI_COLOR_RESET); - std::vector line_inp = ::llama_tokenize(vocab, buf, false); - embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end()); + std::vector line_inp = ::llama_tokenize(vocab, buffer, false); + embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end()); - if (params.instruct) { - embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end()); - } - - remaining_tokens -= line_inp.size(); - - input_noecho = true; // do not echo this again + if (params.instruct) { + embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end()); } - is_interacting = false; + remaining_tokens -= line_inp.size(); + + input_noecho = true; // do not echo this again } + is_interacting = false; } // end of text token From edeba283665591f2f726024a92efe4b0b40434b3 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 19 Mar 2023 21:57:28 +0200 Subject: [PATCH 45/93] Add temporary helper script for Alpaca chat --- chat.sh | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100755 chat.sh diff --git a/chat.sh b/chat.sh new file mode 100755 index 000000000..284989bc0 --- /dev/null +++ b/chat.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# +# Temporary script - will be removed in the future +# + +./main -m ./models/ggml-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins --top_k 10000 --temp 0.96 --repeat_penalty 1 -t 7 From 4545539d718cf88f4c3a76669b8ac2e26cd8a1e5 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 19 Mar 2023 21:58:51 +0200 Subject: [PATCH 46/93] Rename script --- chat.sh => alpaca.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename chat.sh => alpaca.sh (100%) diff --git a/chat.sh b/alpaca.sh similarity index 100% rename from chat.sh rename to alpaca.sh From da5303c1ea68aa19db829c634f1e10d08d409680 Mon Sep 17 00:00:00 2001 From: cocktailpeanut <121128867+cocktailpeanut@users.noreply.github.com> Date: Sun, 19 Mar 2023 17:44:20 -0400 Subject: [PATCH 47/93] bugfix: default should not be interactive (#304) --- main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.cpp b/main.cpp index c7186e0df..c005d17cc 100644 --- a/main.cpp +++ b/main.cpp @@ -867,7 +867,7 @@ int main(int argc, char ** argv) { } // enable interactive mode if reverse prompt is specified - if (!antipromptv_inp.size()) { + if (antipromptv_inp.size() != 0) { params.interactive = true; } From 5cb63e2493c49bc2c3b9b355696e8dc26cdd0380 Mon Sep 17 00:00:00 2001 From: Stephan Walter Date: Mon, 20 Mar 2023 08:24:11 +0000 Subject: [PATCH 48/93] Add tqdm to Python requirements (#293) * Add tqdm to Python requirements * Remove torchvision torchaudio, add requests --- .devops/full.Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.devops/full.Dockerfile b/.devops/full.Dockerfile index 618cdddc4..2b3a20c63 100644 --- a/.devops/full.Dockerfile +++ b/.devops/full.Dockerfile @@ -6,7 +6,7 @@ RUN apt-get update && \ apt-get install -y build-essential python3 python3-pip RUN pip install --upgrade pip setuptools wheel \ - && pip install torch torchvision torchaudio sentencepiece numpy + && pip install numpy requests sentencepiece torch tqdm WORKDIR /app @@ -14,4 +14,4 @@ COPY . . RUN make -ENTRYPOINT ["/app/.devops/tools.sh"] \ No newline at end of file +ENTRYPOINT ["/app/.devops/tools.sh"] From 074bea2eb1f1349a0118239c4152914aecaa1be4 Mon Sep 17 00:00:00 2001 From: Mack Straight Date: Mon, 20 Mar 2023 03:17:23 -0700 Subject: [PATCH 49/93] sentencepiece bpe compatible tokenizer (#252) * potential out of bounds read * fix quantize * style * Update convert-pth-to-ggml.py * mild cleanup * don't need the space-prefixing here rn since main.cpp already does it * new file magic + version header field * readme notice * missing newlines Co-authored-by: slaren <2141330+slaren@users.noreply.github.com> --- Makefile | 2 +- README.md | 3 + convert-pth-to-ggml.py | 4 +- main.cpp | 21 +++++- quantize.cpp | 24 +++++- utils.cpp | 167 +++++++++++++++++++++++++++++++---------- utils.h | 3 +- 7 files changed, 180 insertions(+), 44 deletions(-) diff --git a/Makefile b/Makefile index 1601079a4..44fb29840 100644 --- a/Makefile +++ b/Makefile @@ -31,7 +31,7 @@ endif # CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC -CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC +CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++17 -fPIC LDFLAGS = # OS specific diff --git a/README.md b/README.md index dae1bf1b8..c7e5d33c7 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,9 @@ Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++ - Cache input prompts for faster initialization: https://github.com/ggerganov/llama.cpp/issues/64 - Create a `llama.cpp` logo: https://github.com/ggerganov/llama.cpp/issues/105 +**TEMPORARY NOTICE:** +If you're updating to the latest master, you will need to regenerate your model files as the format has changed. + ## Description The main goal is to run the model using 4-bit quantization on a MacBook diff --git a/convert-pth-to-ggml.py b/convert-pth-to-ggml.py index c1941a811..42f537769 100644 --- a/convert-pth-to-ggml.py +++ b/convert-pth-to-ggml.py @@ -60,7 +60,8 @@ def write_header(fout, hparams, ftype): keys = ["vocab_size", "dim", "multiple_of", "n_heads", "n_layers"] values = [ - 0x67676d6c, # magic: ggml in hex + 0x67676d66, # magic: ggml in hex + 1, # file version *[hparams[key] for key in keys], hparams["dim"] // hparams["n_heads"], # rot (obsolete) ftype @@ -85,6 +86,7 @@ def write_tokens(fout, tokenizer): text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode("utf-8") fout.write(struct.pack("i", len(text))) fout.write(text) + fout.write(struct.pack("f", tokenizer.get_score(i))) def process_and_write_variables(fout, model, ftype): diff --git a/main.cpp b/main.cpp index c005d17cc..159033373 100644 --- a/main.cpp +++ b/main.cpp @@ -3,6 +3,7 @@ #include "utils.h" #include +#include #include #include #include @@ -105,10 +106,24 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab { uint32_t magic; fin.read((char *) &magic, sizeof(magic)); - if (magic != 0x67676d6c) { + if (magic == 0x67676d6c) { + fprintf(stderr, "%s: invalid model file '%s' (too old, regenerate your model files!)\n", + __func__, fname.c_str()); + return false; + } + if (magic != 0x67676d66) { fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str()); return false; } + + uint32_t format_version; + fin.read((char *) &format_version, sizeof(format_version)); + + if (format_version != 1) { + fprintf(stderr, "%s: invalid model file '%s' (unsupported format version %" PRIu32 ")\n", + __func__, fname.c_str(), format_version); + return false; + } } int n_ff = 0; @@ -154,8 +169,12 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab word.resize(len); fin.read((char *) word.data(), len); + float score; + fin.read((char *) &score, sizeof(score)); + vocab.token_to_id[word] = i; vocab.id_to_token[i] = word; + vocab.score[i] = score; //if (i < 30000) { // fprintf(stderr, "%s: vocab[%d] = '%s'\n", __func__, i, word.c_str()); diff --git a/quantize.cpp b/quantize.cpp index 14c7b277a..166e9163a 100644 --- a/quantize.cpp +++ b/quantize.cpp @@ -3,6 +3,7 @@ #include "utils.h" #include +#include #include #include #include @@ -63,12 +64,28 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna { uint32_t magic; finp.read((char *) &magic, sizeof(magic)); - if (magic != 0x67676d6c) { + if (magic == 0x67676d6c) { + fprintf(stderr, "%s: invalid model file '%s' (too old, regenerate your model files!)\n", + __func__, fname_inp.c_str()); + return false; + } + if (magic != 0x67676d66) { fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname_inp.c_str()); return false; } fout.write((char *) &magic, sizeof(magic)); + + uint32_t format_version; + finp.read((char *) &format_version, sizeof(format_version)); + + if (format_version != 1) { + fprintf(stderr, "%s: invalid model file '%s' (unsupported format version %" PRIu32 ")\n", + __func__, fname_inp.c_str(), format_version); + return false; + } + + fout.write((char *) &format_version, sizeof(format_version)); } llama_hparams hparams; @@ -122,8 +139,13 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna finp.read ((char *) word.data(), len); fout.write((char *) word.data(), len); + float score; + finp.read ((char *) &score, sizeof(score)); + fout.write((char *) &score, sizeof(score)); + vocab.token_to_id[word] = i; vocab.id_to_token[i] = word; + vocab.score[i] = score; } } diff --git a/utils.cpp b/utils.cpp index 08d5c6ba6..188f114e9 100644 --- a/utils.cpp +++ b/utils.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -294,58 +295,146 @@ std::vector gpt_tokenize(const gpt_vocab & vocab, const std::stri return tokens; } -// TODO: Calculate this constant from the vocabulary -#define MAX_TOKEN_LEN 18 -// SentencePiece implementation after https://guillaume-be.github.io/2020-05-30/sentence_piece -std::vector llama_tokenize(const gpt_vocab & vocab, const std::string & text, bool bos) { - std::vector res; - std::vector score; - std::vector prev; - int len = text.length(); +static size_t utf8_len(char src) { + const size_t lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 }; + uint8_t highbits = static_cast(src) >> 4; + return lookup[highbits]; +} - score.resize(len + 1); - prev.resize(len + 1); +struct llama_sp_symbol { + using index = int; + index prev; + index next; + std::string_view text; +}; - // Forward pass - for (int i = 0; i < len; i++) { - int max_len = std::min(len - i, MAX_TOKEN_LEN); - for (int sub_len = 1; sub_len <= max_len; sub_len++) { - auto sub = text.substr(i, sub_len); - auto token = vocab.token_to_id.find(sub); - if (token != vocab.token_to_id.end()) { - int token_score = sub.length() * sub.length(); - int local_score = score[i] + token_score; - int next = i + sub_len; - if (score[next] < local_score) { - score[next] = local_score; - prev[next] = (*token).second; +struct llama_sp_bigram { + struct comparator { + bool operator()(llama_sp_bigram & l, llama_sp_bigram & r) { + return (l.score < r.score) || (l.score == r.score && l.left > r.left); + } + }; + using queue_storage = std::vector; + using queue = std::priority_queue; + llama_sp_symbol::index left; + llama_sp_symbol::index right; + float score; + size_t size; +}; + +struct llama_tokenizer { + llama_tokenizer(const gpt_vocab & vocab): vocab_(vocab) {} + + void tokenize(std::string_view text, std::vector & output) { + // split string into utf8 chars + int index = 0; + while (!text.empty()) { + llama_sp_symbol sym; + size_t char_len = std::min(text.size(), utf8_len(text.data()[0])); + sym.text = std::string_view(text.data(), char_len); + sym.prev = index - 1; + text.remove_prefix(char_len); + sym.next = text.empty() ? -1 : index + 1; + index++; + symbols_.emplace_back(std::move(sym)); + } + + // seed the work queue with all possible 2-character tokens. + for (size_t i = 1; i < symbols_.size(); ++i) { + try_add_bigram(i - 1, i); + } + + // keep substituting the highest frequency pairs for as long as we can. + while (!work_queue_.empty()) { + auto bigram = work_queue_.top(); + work_queue_.pop(); + + auto & left_sym = symbols_[bigram.left]; + auto & right_sym = symbols_[bigram.right]; + + // if one of the symbols already got merged, skip it. + if (left_sym.text.empty() || right_sym.text.empty() || + left_sym.text.size() + right_sym.text.size() != bigram.size) { + continue; + } + + // merge the right sym into the left one + left_sym.text = std::string_view(left_sym.text.data(), left_sym.text.size() + right_sym.text.size()); + right_sym.text = std::string_view(""); + + // remove the right sym from the chain + left_sym.next = right_sym.next; + if (right_sym.next >= 0) { + symbols_[right_sym.next].prev = bigram.left; + } + + // find more substitutions + try_add_bigram(left_sym.prev, bigram.left); + try_add_bigram(bigram.left, left_sym.next); + } + + for (int i = 0; i != -1; i = symbols_[i].next) { + auto& symbol = symbols_[i]; + auto token = vocab_.token_to_id.find(std::string(symbol.text)); + + if (token == vocab_.token_to_id.end()) { + // output any symbols that did not form tokens as bytes. + for (int j = 0; j < symbol.text.size(); ++j) { + gpt_vocab::id token_id = static_cast(symbol.text[j]) + 3; + output.push_back(token_id); } + } else { + output.push_back((*token).second); } } } - // Backward pass - int i = len; - while (i > 0) { - gpt_vocab::id token_id = prev[i]; - if (token_id == 0) { - // TODO: Return error or something more meaningful - printf("failed to tokenize string!\n"); - break; +private: + void try_add_bigram(int left, int right) { + if (left == -1 || right == -1) { + return; } - res.push_back(token_id); - auto token = (*vocab.id_to_token.find(token_id)).second; - i -= token.length(); + + std::string_view text(symbols_[left].text.data(), symbols_[left].text.size() + symbols_[right].text.size()); + auto token = vocab_.token_to_id.find(std::string(text)); + + if (token == vocab_.token_to_id.end()) { + return; + } + + auto score = vocab_.score.find((*token).second); + + if (score == vocab_.score.end()) { + return; + } + + llama_sp_bigram bigram; + bigram.left = left; + bigram.right = right; + bigram.score = (*score).second; + bigram.size = text.size(); + work_queue_.push(bigram); + } + + const gpt_vocab & vocab_; + std::vector symbols_; + llama_sp_bigram::queue work_queue_; +}; + +std::vector llama_tokenize(const gpt_vocab & vocab, std::string_view text, bool bos) { + llama_tokenizer tokenizer(vocab); + std::vector output; + + if (text.size() == 0) { + return output; } if (bos) { - res.push_back(1); // TODO: replace with vocab.bos + output.push_back(1); } - // Pieces are in reverse order so correct that - std::reverse(res.begin(), res.end()); - - return res; + tokenizer.tokenize(text, output); + return output; } bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab) { diff --git a/utils.h b/utils.h index 49658f7d9..b3a0f4724 100644 --- a/utils.h +++ b/utils.h @@ -58,6 +58,7 @@ struct gpt_vocab { std::map token_to_id; std::map id_to_token; + std::map score; }; void replace(std::string & str, const std::string & needle, const std::string & replacement); @@ -79,7 +80,7 @@ std::vector gpt_tokenize(const gpt_vocab & vocab, const std::stri // TODO: this is probably wrong, but I cannot figure out how this tokenizer works .. // ref: https://github.com/google/sentencepiece -std::vector llama_tokenize(const gpt_vocab & vocab, const std::string & text, bool bos); +std::vector llama_tokenize(const gpt_vocab & vocab, std::string_view text, bool bos); // load the tokens from encoder.json bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab); From 0f1b21cb90ac6b84a9af70cafb8e13b5389e3b32 Mon Sep 17 00:00:00 2001 From: Bernat Vadell Date: Mon, 20 Mar 2023 18:05:20 +0100 Subject: [PATCH 50/93] Docker - Fix publish docker image in GitHub Registry (#235) * fix publish permission * try to fix docker pipeline using as password github_token & username repository_owner --- .github/workflows/docker.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index bc9aff7b7..d1a43caa6 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -40,7 +40,7 @@ jobs: uses: docker/login-action@v2 with: registry: ghcr.io - username: ${{ github.actor }} + username: ${{ github.repository_owner }} password: ${{ secrets.GITHUB_TOKEN }} - name: Build and push Docker image (versioned) From a791a68b613b162c88a83f5f0225223bc167c762 Mon Sep 17 00:00:00 2001 From: Mack Straight Date: Mon, 20 Mar 2023 12:26:01 -0700 Subject: [PATCH 51/93] move file magic/version to header, print expected version (#319) --- main.cpp | 10 +++++----- quantize.cpp | 10 +++++----- utils.h | 8 ++++++++ 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/main.cpp b/main.cpp index 159033373..3321818d3 100644 --- a/main.cpp +++ b/main.cpp @@ -106,12 +106,12 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab { uint32_t magic; fin.read((char *) &magic, sizeof(magic)); - if (magic == 0x67676d6c) { + if (magic == FILE_MAGIC_UNVERSIONED) { fprintf(stderr, "%s: invalid model file '%s' (too old, regenerate your model files!)\n", __func__, fname.c_str()); return false; } - if (magic != 0x67676d66) { + if (magic != FILE_MAGIC) { fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str()); return false; } @@ -119,9 +119,9 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab uint32_t format_version; fin.read((char *) &format_version, sizeof(format_version)); - if (format_version != 1) { - fprintf(stderr, "%s: invalid model file '%s' (unsupported format version %" PRIu32 ")\n", - __func__, fname.c_str(), format_version); + if (format_version != FILE_VERSION) { + fprintf(stderr, "%s: invalid model file '%s' (unsupported format version %" PRIu32 ", expected %d)\n", + __func__, fname.c_str(), format_version, FILE_VERSION); return false; } } diff --git a/quantize.cpp b/quantize.cpp index 166e9163a..07db33a3c 100644 --- a/quantize.cpp +++ b/quantize.cpp @@ -64,12 +64,12 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna { uint32_t magic; finp.read((char *) &magic, sizeof(magic)); - if (magic == 0x67676d6c) { + if (magic == FILE_MAGIC_UNVERSIONED) { fprintf(stderr, "%s: invalid model file '%s' (too old, regenerate your model files!)\n", __func__, fname_inp.c_str()); return false; } - if (magic != 0x67676d66) { + if (magic != FILE_MAGIC) { fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname_inp.c_str()); return false; } @@ -79,9 +79,9 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna uint32_t format_version; finp.read((char *) &format_version, sizeof(format_version)); - if (format_version != 1) { - fprintf(stderr, "%s: invalid model file '%s' (unsupported format version %" PRIu32 ")\n", - __func__, fname_inp.c_str(), format_version); + if (format_version != FILE_VERSION) { + fprintf(stderr, "%s: invalid model file '%s' (unsupported format version %" PRIu32 ", expected %d)\n", + __func__, fname_inp.c_str(), format_version, FILE_VERSION); return false; } diff --git a/utils.h b/utils.h index b3a0f4724..65fe02ba1 100644 --- a/utils.h +++ b/utils.h @@ -48,6 +48,14 @@ void gpt_print_usage(int argc, char ** argv, const gpt_params & params); std::string gpt_random_prompt(std::mt19937 & rng); +// +// Model file parsing +// + +#define FILE_MAGIC_UNVERSIONED 0x67676d6c // pre-versioned files +#define FILE_MAGIC 0x67676d66 // 'ggmf' in hex +#define FILE_VERSION 1 + // // Vocab utils // From 6b6d5b5024faaf82019d08cde5e8a9d69c6ca316 Mon Sep 17 00:00:00 2001 From: Qingyou Meng Date: Tue, 21 Mar 2023 03:33:10 +0800 Subject: [PATCH 52/93] Fixed tokenizer.model not found error when model dir is symlink (#325) --- convert-pth-to-ggml.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/convert-pth-to-ggml.py b/convert-pth-to-ggml.py index 42f537769..108eb1fcc 100644 --- a/convert-pth-to-ggml.py +++ b/convert-pth-to-ggml.py @@ -17,6 +17,7 @@ # and vocabulary. # import argparse +import os import sys import json import struct @@ -44,8 +45,14 @@ def get_n_parts(dim): def load_hparams_and_tokenizer(dir_model): + # `dir_model` is something like `models/7B` or `models/7B/`. + # "tokenizer.model" is expected under model's parent dir. + # When `dir_model` is a symlink, f"{dir_model}/../tokenizer.model" would not be found. + # Let's use the model's parent dir directly. + model_parent_dir = os.path.dirname(os.path.normpath(dir_model)) + fname_hparams = f"{dir_model}/params.json" - fname_tokenizer = f"{dir_model}/../tokenizer.model" + fname_tokenizer = f"{model_parent_dir}/tokenizer.model" with open(fname_hparams, "r") as f: hparams = json.load(f) From bd4b46d6ba504b99c936f43fc014529adffb6048 Mon Sep 17 00:00:00 2001 From: Ben Siraphob Date: Mon, 20 Mar 2023 16:44:30 -0500 Subject: [PATCH 53/93] Nix flake: set meta.mainProgram to llama --- flake.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/flake.nix b/flake.nix index dae4ff60f..da4bd7ba3 100644 --- a/flake.nix +++ b/flake.nix @@ -34,6 +34,7 @@ cat ${./convert-pth-to-ggml.py} >> $out/bin/convert-pth-to-ggml chmod +x $out/bin/convert-pth-to-ggml ''; + meta.mainProgram = "llama"; }; devShells.default = pkgs.mkShell { packages = with pkgs; [ From 8cf9f34eddc124d4ab28f4d2fe8e99d574510bde Mon Sep 17 00:00:00 2001 From: nusu-github <29514220+nusu-github@users.noreply.github.com> Date: Tue, 21 Mar 2023 09:37:16 +0900 Subject: [PATCH 54/93] Adding missing features of CMakeLists.txt & Refactoring (#131) * Functionality addition CMakeLists.txt Refactoring: 1. Simplify more options that are negation of negation. LLAMA_NO_ACCELERATE -> LLAMA_ACCELERATE 2. Changed to an optional expression instead of forcing to enable AVX2 in MSVC. 3. Make CMAKE_CXX_STANDARD, which is different from Makefile, the same. 4. Use add_compile_options instead of adding options to CMAKE_C_FLAGS. 5. Make utils use target_link_libraries instead of directly referencing code. Added features: 1. Added some options. LLAMA_STATIC_LINK,LLAMA_NATIVE,LLAMA_LTO,LLAMA_GPROF,LLAMA_OPENBLAS * Fix Accelerate link in CMake * Windows build Fix * C++11 to C++17 * Reflects C/C++ standard individually * Change the version to 3.12 --------- Co-authored-by: Georgi Gerganov --- CMakeLists.txt | 247 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 162 insertions(+), 85 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 38e7266dc..7f46513c8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,131 +1,208 @@ -cmake_minimum_required(VERSION 3.8) -project("llama.cpp") - -set(CMAKE_CXX_STANDARD 20) -set(CMAKE_CXX_STANDARD_REQUIRED true) -set(CMAKE_C_STANDARD 11) -set(THREADS_PREFER_PTHREAD_FLAG ON) -find_package(Threads REQUIRED) +cmake_minimum_required(VERSION 3.12) +project("llama.cpp" C CXX) if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") endif() -option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) -option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF) +# +# Option list +# -option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF) -option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF) -option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF) +# general +option(LLAMA_STATIC "llama: static link libraries" OFF) +option(LLAMA_NATIVE "llama: enable -march=native flag" OFF) +option(LLAMA_LTO "llama: enable link time optimization" OFF) -if (APPLE) - option(LLAMA_NO_ACCELERATE "llama: disable Accelerate framework" OFF) - option(LLAMA_NO_AVX "llama: disable AVX" OFF) - option(LLAMA_NO_AVX2 "llama: disable AVX2" OFF) - option(LLAMA_NO_FMA "llama: disable FMA" OFF) -endif() +# debug +option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) +option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF) +option(LLAMA_GPROF "llama: enable gprof" OFF) + +# sanitizers +option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF) +option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF) +option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF) + +# instruction set specific +option(LLAMA_AVX "llama: enable AVX" ON) +option(LLAMA_AVX2 "llama: enable AVX2" ON) +option(LLAMA_FMA "llama: enable FMA" ON) + +# 3rd party libs +option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) +option(LLAMA_OPENBLAS "llama: use OpenBLAS" OFF) + +# +# Compile flags +# + +set(CMAKE_CXX_STANDARD_REQUIRED true) +set(CMAKE_C_STANDARD_REQUIRED true) +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package(Threads REQUIRED) if (NOT MSVC) if (LLAMA_SANITIZE_THREAD) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=thread") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread") + add_compile_options(-fsanitize=thread) endif() if (LLAMA_SANITIZE_ADDRESS) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fno-omit-frame-pointer") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer") + add_compile_options(-fsanitize=address -fno-omit-frame-pointer) endif() if (LLAMA_SANITIZE_UNDEFINED) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined") + add_compile_options(-fsanitize=undefined) endif() endif() -if (APPLE AND NOT LLAMA_NO_ACCELERATE) +if (APPLE AND LLAMA_ACCELERATE) find_library(ACCELERATE_FRAMEWORK Accelerate) if (ACCELERATE_FRAMEWORK) message(STATUS "Accelerate framework found") - set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK}) - set(LLAMA_EXTRA_FLAGS ${LLAMA_EXTRA_FLAGS} -DGGML_USE_ACCELERATE) + add_compile_definitions(GGML_USE_ACCELERATE) + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK}) else() message(WARNING "Accelerate framework not found") endif() endif() +if (LLAMA_OPENBLAS) + if (LLAMA_STATIC) + set(BLA_STATIC ON) + endif() + + set(BLA_VENDOR OpenBLAS) + find_package(BLAS) + if (BLAS_FOUND) + message(STATUS "OpenBLAS found") + + add_compile_definitions(GGML_USE_OPENBLAS) + add_link_options(${BLAS_LIBRARIES}) + else() + message(WARNING "OpenBLAS not found") + endif() +endif() if (LLAMA_ALL_WARNINGS) if (NOT MSVC) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} \ - -Wall \ - -Wextra \ - -Wpedantic \ - -Wshadow \ - -Wcast-qual \ - -Wstrict-prototypes \ - -Wpointer-arith \ - -Wno-unused-function \ - ") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \ - -Wall \ - -Wextra \ - -Wpedantic \ - -Wcast-qual \ - ") + set(c_flags + -Wall + -Wextra + -Wpedantic + -Wshadow + -Wcast-qual + -Wstrict-prototypes + -Wpointer-arith + -Wno-unused-function + ) + set(cxx_flags + -Wall + -Wextra + -Wpedantic + -Wcast-qual + ) else() # todo : msvc endif() + + add_compile_options( + "$<$:${c_flags}>" + "$<$:${cxx_flags}>" + ) + endif() -message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") - -if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") - message(STATUS "ARM detected") -else() - message(STATUS "x86 detected") - if (MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /arch:AVX2") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX2") +if (LLAMA_LTO) + include(CheckIPOSupported) + check_ipo_supported(RESULT result OUTPUT output) + if (result) + set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) else() - if(NOT LLAMA_NO_AVX) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx") - endif() - if(NOT LLAMA_NO_AVX2) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx2") - endif() - if(NOT LLAMA_NO_FMA) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfma") - endif() - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mf16c") + message(WARNING "IPO is not supported: ${output}") endif() endif() -# if (LLAMA_PERF) -# set(LLAMA_EXTRA_FLAGS ${LLAMA_EXTRA_FLAGS} -DGGML_PERF) -# endif() +# Architecture specific +# TODO: probably these flags need to be tweaked on some architectures +# feel free to update the Makefile for your architecture and send a pull request or issue +message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") +if (NOT MSVC) + if (LLAMA_STATIC) + add_link_options(-static) + if (MINGW) + add_link_options(-static-libgcc -static-libstdc++) + endif() + endif() + if (LLAMA_GPROF) + add_compile_options(-pg) + endif() + if (LLAMA_NATIVE) + add_compile_options(-march=native) + endif() +endif() -add_executable(llama - main.cpp - utils.cpp - utils.h) +if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") + message(STATUS "ARM detected") + if (MSVC) + # TODO: arm msvc? + else() + if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") + add_compile_options(-mcpu=native) + endif() + # TODO: armv6,7,8 version specific flags + endif() +elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$") + message(STATUS "x86 detected") + if (MSVC) + if (LLAMA_AVX2) + add_compile_options(/arch:AVX2) + elseif (LLAMA_AVX) + add_compile_options(/arch:AVX) + endif() + else() + add_compile_options(-mf16c) + if (LLAMA_FMA) + add_compile_options(-mfma) + endif() + if (LLAMA_AVX) + add_compile_options(-mavx) + endif() + if (LLAMA_AVX2) + add_compile_options(-mavx2) + endif() + endif() +else() + # TODO: support PowerPC + message(STATUS "Unknown architecture") +endif() -add_executable(quantize - quantize.cpp - utils.cpp - utils.h) -add_library(ggml - ggml.c - ggml.h) +# +# Build library +# -target_compile_definitions(ggml PUBLIC ${LLAMA_EXTRA_FLAGS}) -target_compile_definitions(llama PUBLIC ${LLAMA_EXTRA_FLAGS}) -target_compile_definitions(quantize PUBLIC ${LLAMA_EXTRA_FLAGS}) +add_executable(llama main.cpp) + +add_executable(quantize quantize.cpp) + +add_library(ggml OBJECT + ggml.c + ggml.h) + +add_library(utils OBJECT + utils.cpp + utils.h) -target_link_libraries(ggml PRIVATE ${LLAMA_EXTRA_LIBS}) target_include_directories(ggml PUBLIC .) -target_link_libraries(quantize PRIVATE ggml) -target_link_libraries(llama PRIVATE ggml) -target_link_libraries(ggml PRIVATE Threads::Threads) +target_compile_features(ggml PUBLIC c_std_11) +target_compile_features(utils PUBLIC cxx_std_17) + +# +# Linking +# + +target_link_libraries(ggml PRIVATE Threads::Threads ${LLAMA_EXTRA_LIBS}) +target_link_libraries(llama PRIVATE ggml utils) +target_link_libraries(quantize PRIVATE ggml utils) From 2e664f1ff413995506c9a54f3a8d5b8c64e37a91 Mon Sep 17 00:00:00 2001 From: Casey Primozic Date: Tue, 21 Mar 2023 07:35:42 -0700 Subject: [PATCH 55/93] Add initial AVX512 support for dot product on Linux (#320) * Update Makefile to detect AVX512 support and add compiler flags if it's available * Based on existing AVX2 implementation, dot product on one 32-value block of 4-bit quantized ints at a time * Perform 8 bit -> 16 bit sign extension and multiply+add on 32 values at time instead of 16 * Use built-in AVX512 horizontal reduce add to get sum at the end * Manual unrolling on inner dot product loop to reduce loop counter overhead --- Makefile | 32 +++++++++++++++++++++++ ggml.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 109 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 44fb29840..ec2eb7569 100644 --- a/Makefile +++ b/Makefile @@ -95,6 +95,38 @@ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686)) ifneq (,$(findstring sse3,$(SSE3_M))) CFLAGS += -msse3 endif + AVX512F_M := $(shell grep "avx512f " /proc/cpuinfo) + ifneq (,$(findstring avx512f,$(AVX512F_M))) + CFLAGS += -mavx512f + endif + AVX512BW_M := $(shell grep "avx512bw " /proc/cpuinfo) + ifneq (,$(findstring avx512bw,$(AVX512BW_M))) + CFLAGS += -mavx512bw + endif + AVX512DQ_M := $(shell grep "avx512dq " /proc/cpuinfo) + ifneq (,$(findstring avx512dq,$(AVX512DQ_M))) + CFLAGS += -mavx512dq + endif + AVX512VL_M := $(shell grep "avx512vl " /proc/cpuinfo) + ifneq (,$(findstring avx512vl,$(AVX512VL_M))) + CFLAGS += -mavx512vl + endif + AVX512CD_M := $(shell grep "avx512cd " /proc/cpuinfo) + ifneq (,$(findstring avx512cd,$(AVX512CD_M))) + CFLAGS += -mavx512cd + endif + AVX512ER_M := $(shell grep "avx512er " /proc/cpuinfo) + ifneq (,$(findstring avx512er,$(AVX512ER_M))) + CFLAGS += -mavx512er + endif + AVX512IFMA_M := $(shell grep "avx512ifma " /proc/cpuinfo) + ifneq (,$(findstring avx512ifma,$(AVX512IFMA_M))) + CFLAGS += -mavx512ifma + endif + AVX512PF_M := $(shell grep "avx512pf " /proc/cpuinfo) + ifneq (,$(findstring avx512pf,$(AVX512PF_M))) + CFLAGS += -mavx512pf + endif else ifeq ($(UNAME_S),Haiku) AVX1_M := $(shell sysinfo -cpu | grep "AVX ") ifneq (,$(findstring avx,$(AVX1_M))) diff --git a/ggml.c b/ggml.c index 4813f74c8..f85138f38 100644 --- a/ggml.c +++ b/ggml.c @@ -361,7 +361,7 @@ static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float); // AVX routines provided by GH user Const-me // ref: https://github.com/ggerganov/ggml/pull/27#issuecomment-1464934600 -#if __AVX2__ +#if __AVX2__ || __AVX512F__ // Unpack 32 4-bit fields into 32 bytes // The output vector contains 32 bytes, each one in [ 0 .. 15 ] interval static inline __m256i bytesFromNibbles( const uint8_t* rsi ) @@ -397,7 +397,6 @@ static inline __m128i packNibbles( __m256i bytes ) } #endif - // method 5 // blocks of QK elements // represented with a single float (delta) and QK/2 8-bit ints (i.e QK 4-bit signed integer factors) @@ -1262,6 +1261,47 @@ inline static void ggml_vec_dot_f32(const int n, float * restrict s, const float *s = sumf; } +#if __AVX512F__ && QK == 32 +static inline __m512 dot_q4_0_oneblock_avx512( + __m512 acc, + const uint8_t * pd0, + const uint8_t * pd1, + const uint8_t * pb0, + const uint8_t * pb1, + size_t bs, + int i +) { + const float * d0_0 = (const float *) (pd0 + i*bs); + const float * d1_0 = (const float *) (pd1 + i*bs); + + const uint8_t * restrict p0 = pb0 + (i+0)*bs; + const uint8_t * restrict p1 = pb1 + (i+0)*bs; + + // Compute combined scale for the block + float scaleScalar = d0_0[0] * d1_0[0]; + __m512 scale = _mm512_set1_ps( scaleScalar ); + + __m256i bx = bytesFromNibbles( p0 ); + __m256i by = bytesFromNibbles( p1 ); + + // Now we have a vector with bytes in [ 0 .. 15 ] interval. Offset them into [ -8 .. +7 ] interval. + const __m256i off = _mm256_set1_epi8( 8 ); + bx = _mm256_sub_epi8( bx, off ); + by = _mm256_sub_epi8( by, off ); + + // Sign-extend 16 signed bytes into int16_t + __m512i x32 = _mm512_cvtepi8_epi16( bx ); + __m512i y32 = _mm512_cvtepi8_epi16( by ); + // Compute products of int16_t integers, add pairwise + __m512i i64 = _mm512_madd_epi16( x32, y32 ); + + // Convert int32_t to float + __m512 p = _mm512_cvtepi32_ps( i64 ); + // Apply the scale, and accumulate + return _mm512_fmadd_ps( scale, p, acc ); +} +#endif + inline static void ggml_vec_dot_f16(const int n, float * restrict s, ggml_fp16_t * restrict x, ggml_fp16_t * restrict y) { ggml_float sumf = 0.0; @@ -1417,6 +1457,40 @@ inline static void ggml_vec_dot_q4_0(const int n, float * restrict s, const void #else #error "not implemented for QK" #endif +#elif defined(__AVX512F__) + +#if QK == 32 + // Initialize accumulator with zeros + __m512 acc0 = _mm512_setzero_ps(); + __m512 acc1 = _mm512_setzero_ps(); + + const int superblock_size = 8; + const int superblock_count = nb / superblock_size; + const int remainder = nb % superblock_size; + + for (int superblock_ix = 0; superblock_ix < superblock_count; superblock_ix += 1) { + int i = superblock_ix * superblock_size; + + acc0 = dot_q4_0_oneblock_avx512( acc0, pd0, pd1, pb0, pb1, bs, i+0 ); + acc1 = dot_q4_0_oneblock_avx512( acc1, pd0, pd1, pb0, pb1, bs, i+1 ); + acc0 = dot_q4_0_oneblock_avx512( acc0, pd0, pd1, pb0, pb1, bs, i+2 ); + acc1 = dot_q4_0_oneblock_avx512( acc1, pd0, pd1, pb0, pb1, bs, i+3 ); + acc0 = dot_q4_0_oneblock_avx512( acc0, pd0, pd1, pb0, pb1, bs, i+4 ); + acc1 = dot_q4_0_oneblock_avx512( acc1, pd0, pd1, pb0, pb1, bs, i+5 ); + acc0 = dot_q4_0_oneblock_avx512( acc0, pd0, pd1, pb0, pb1, bs, i+6 ); + acc1 = dot_q4_0_oneblock_avx512( acc1, pd0, pd1, pb0, pb1, bs, i+7 ); + } + + // Remainders + for (int i = superblock_count * superblock_size; i < nb; ++i) { + acc0 = dot_q4_0_oneblock_avx512( acc0, pd0, pd1, pb0, pb1, bs, i ); + } + + // Horizontal sum of all lanes of the accumulator + sumf = _mm512_reduce_add_ps( acc0 ) + _mm512_reduce_add_ps( acc1 ); +#else +#error "not implemented for QK" +#endif #elif defined(__AVX2__) #if QK == 32 const size_t countBlocks = nb; @@ -1928,7 +2002,7 @@ inline static void ggml_vec_mad_q4_1(const int n, float * restrict y, void * res const size_t bs = 2*sizeof(float) + QK/2; const uint8_t * restrict pd = ((const uint8_t *)x + 0*bs); - const uint8_t * restrict pm = ((const uint8_t *)x + 0*bs + sizeof(float)); + const uint8_t * restrict pm = ((const uint8_t *)x + 0*bs + sizeof(float)); const uint8_t * restrict pb = ((const uint8_t *)x + 0*bs + 2*sizeof(float)); for (int i = 0; i < nb; i++) { From eb34620aeceaf9d9df7fcb19acc17ad41b9f60f8 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 21 Mar 2023 17:29:41 +0200 Subject: [PATCH 56/93] Add tokenizer test + revert to C++11 (#355) * Add test-tokenizer-0 to do a few tokenizations - feel free to expand * Added option to convert-pth-to-ggml.py script to dump just the vocabulary * Added ./models/ggml-vocab.bin containing just LLaMA vocab data (used for tests) * Added utility to load vocabulary file from previous point (temporary implementation) * Avoid using std::string_view and drop back to C++11 (hope I didn't break something) * Rename gpt_vocab -> llama_vocab * All CMake binaries go into ./bin/ now --- .github/workflows/build.yml | 3 + CMakeLists.txt | 58 ++++++++++-- Makefile | 3 +- convert-pth-to-ggml.py | 28 +++++- main.cpp | 28 +++--- models/ggml-vocab.bin | Bin 0 -> 432578 bytes quantize.cpp | 2 +- tests/CMakeLists.txt | 4 + tests/test-tokenizer-0.cpp | 69 ++++++++++++++ utils.cpp | 174 +++++++++++++++--------------------- utils.h | 28 ++---- 11 files changed, 249 insertions(+), 148 deletions(-) create mode 100644 models/ggml-vocab.bin create mode 100644 tests/CMakeLists.txt create mode 100644 tests/test-tokenizer-0.cpp diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9c1de5823..5b1b5ddfb 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -54,6 +54,7 @@ jobs: cd build cmake .. cmake --build . --config Release + ctest --output-on-failure macOS-latest-make: runs-on: macos-latest @@ -90,6 +91,7 @@ jobs: cd build cmake .. cmake --build . --config Release + ctest --output-on-failure windows-latest-cmake: runs-on: windows-latest @@ -106,6 +108,7 @@ jobs: cd build cmake .. cmake --build . --config Release + ctest --output-on-failure - name: Get commit hash id: commit diff --git a/CMakeLists.txt b/CMakeLists.txt index 7f46513c8..bf0e77b4a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,11 +1,37 @@ -cmake_minimum_required(VERSION 3.12) +cmake_minimum_required(VERSION 3.12) # Don't bump this version for no reason project("llama.cpp" C CXX) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") endif() +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) + +if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) + set(LLAMA_STANDALONE ON) + + # configure project version + # TODO +else() + set(LLAMA_STANDALONE OFF) +endif() + +if (EMSCRIPTEN) + set(BUILD_SHARED_LIBS_DEFAULT OFF) + + option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" ON) +else() + if (MINGW) + set(BUILD_SHARED_LIBS_DEFAULT OFF) + else() + set(BUILD_SHARED_LIBS_DEFAULT ON) + endif() +endif() + + # # Option list # @@ -34,6 +60,9 @@ option(LLAMA_FMA "llama: enable FMA" option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) option(LLAMA_OPENBLAS "llama: use OpenBLAS" OFF) +option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE}) +option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE}) + # # Compile flags # @@ -187,17 +216,19 @@ add_executable(llama main.cpp) add_executable(quantize quantize.cpp) -add_library(ggml OBJECT - ggml.c - ggml.h) - add_library(utils OBJECT utils.cpp utils.h) +target_include_directories(utils PUBLIC .) +target_compile_features(utils PUBLIC cxx_std_11) # don't bump + +add_library(ggml OBJECT + ggml.c + ggml.h) + target_include_directories(ggml PUBLIC .) -target_compile_features(ggml PUBLIC c_std_11) -target_compile_features(utils PUBLIC cxx_std_17) +target_compile_features(ggml PUBLIC c_std_11) # don't bump # # Linking @@ -206,3 +237,16 @@ target_compile_features(utils PUBLIC cxx_std_17) target_link_libraries(ggml PRIVATE Threads::Threads ${LLAMA_EXTRA_LIBS}) target_link_libraries(llama PRIVATE ggml utils) target_link_libraries(quantize PRIVATE ggml utils) + +# +# programs, examples and tests +# + +if (LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION) + enable_testing() + add_subdirectory(tests) +endif () + +#if (LLAMA_BUILD_EXAMPLES) +# add_subdirectory(examples) +#endif() diff --git a/Makefile b/Makefile index ec2eb7569..dffcdbde7 100644 --- a/Makefile +++ b/Makefile @@ -30,8 +30,9 @@ endif # Compile flags # +# keep standard at C11 and C++11 CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC -CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++17 -fPIC +CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC LDFLAGS = # OS specific diff --git a/convert-pth-to-ggml.py b/convert-pth-to-ggml.py index 108eb1fcc..c506676fc 100644 --- a/convert-pth-to-ggml.py +++ b/convert-pth-to-ggml.py @@ -10,12 +10,10 @@ # - Name (char[name_length]) # - Data (float[n_dims]) # -# By default, the bigger matrices are converted to 16-bit floats. -# This can be disabled by adding the "use-f32" CLI argument. -# # At the start of the ggml file we write the model parameters # and vocabulary. # + import argparse import os import sys @@ -23,6 +21,7 @@ import json import struct import numpy as np import torch + from sentencepiece import SentencePieceProcessor def parse_args(): @@ -30,6 +29,7 @@ def parse_args(): parser = argparse.ArgumentParser(description='Convert a LLaMA model checkpoint to a ggml compatible file') parser.add_argument('dir_model', help='directory containing the model checkpoint') parser.add_argument('ftype', type=int, choices=[0, 1], default=1, help='file type (0: float32, 1: float16)') + parser.add_argument('vocab_only', type=bool, default=False, help='only write vocab to file') return parser.parse_args() def get_n_parts(dim): @@ -134,6 +134,27 @@ def main(): ftype_str = ["f32", "f16"] hparams, tokenizer = load_hparams_and_tokenizer(dir_model) + + # if only writing vocab to file + if args.vocab_only: + + fname_model = f"{dir_model}/consolidated.00.pth" + fname_out = f"{dir_model}/ggml-vocab.bin" + + print(f"Extracting only the vocab from '{fname_model}'\n") + + model = torch.load(fname_model, map_location="cpu") + + with open(fname_out, "wb") as fout: + fout.write(struct.pack("i", hparams["vocab_size"])) + write_tokens(fout, tokenizer) + + del model + + print(f"Done. Output file: {fname_out}\n") + + return + n_parts = get_n_parts(hparams["dim"]) for p in range(n_parts): @@ -151,6 +172,7 @@ def main(): process_and_write_variables(fout, model, ftype) del model + print(f"Done. Output file: {fname_out}, (part {p})\n") if __name__ == "__main__": diff --git a/main.cpp b/main.cpp index 3321818d3..e97611e28 100644 --- a/main.cpp +++ b/main.cpp @@ -90,7 +90,7 @@ struct llama_model { }; // load the model's weights from a file -bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab & vocab, int n_ctx, ggml_type memory_type = GGML_TYPE_F32) { +bool llama_model_load(const std::string & fname, llama_model & model, llama_vocab & vocab, int n_ctx, ggml_type memory_type = GGML_TYPE_F32) { fprintf(stderr, "%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str()); std::vector f_buf(1024*1024); @@ -544,9 +544,9 @@ bool llama_eval( const llama_model & model, const int n_threads, const int n_past, - const std::vector & embd_inp, - std::vector & embd_w, - size_t & mem_per_token) { + const std::vector & embd_inp, + std::vector & embd_w, + size_t & mem_per_token) { const int N = embd_inp.size(); const auto & hparams = model.hparams; @@ -832,7 +832,7 @@ int main(int argc, char ** argv) { int64_t t_load_us = 0; - gpt_vocab vocab; + llama_vocab vocab; llama_model model; // load the model @@ -864,13 +864,13 @@ int main(int argc, char ** argv) { // Add a space in front of the first character to match OG llama tokenizer behavior params.prompt.insert(0, 1, ' '); // tokenize the prompt - std::vector embd_inp = ::llama_tokenize(vocab, params.prompt, true); + std::vector embd_inp = ::llama_tokenize(vocab, params.prompt, true); params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size()); // prefix & suffix for instruct mode - const std::vector inp_pfx = ::llama_tokenize(vocab, "\n\n### Instruction:\n\n", true); - const std::vector inp_sfx = ::llama_tokenize(vocab, "\n\n### Response:\n\n", false); + const std::vector inp_pfx = ::llama_tokenize(vocab, "\n\n### Instruction:\n\n", true); + const std::vector inp_sfx = ::llama_tokenize(vocab, "\n\n### Response:\n\n", false); // in instruct mode, we inject a prefix and a suffix to each input by the user if (params.instruct) { @@ -879,8 +879,8 @@ int main(int argc, char ** argv) { } // tokenize the reverse prompt - std::vector> antipromptv_inp; - + std::vector> antipromptv_inp; + for (auto antiprompt : params.antiprompt) { antipromptv_inp.push_back(::llama_tokenize(vocab, antiprompt, false)); } @@ -925,14 +925,14 @@ int main(int argc, char ** argv) { fprintf(stderr, "sampling parameters: temp = %f, top_k = %d, top_p = %f, repeat_last_n = %i, repeat_penalty = %f\n", params.temp, params.top_k, params.top_p, params.repeat_last_n, params.repeat_penalty); fprintf(stderr, "\n\n"); - std::vector embd; + std::vector embd; // determine the required inference memory per token: size_t mem_per_token = 0; llama_eval(model, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token); int last_n_size = params.repeat_last_n; - std::vector last_n_tokens(last_n_size); + std::vector last_n_tokens(last_n_size); std::fill(last_n_tokens.begin(), last_n_tokens.end(), 0); if (params.interactive) { @@ -980,7 +980,7 @@ int main(int argc, char ** argv) { const int n_vocab = model.hparams.n_vocab; - gpt_vocab::id id = 0; + llama_vocab::id id = 0; { const int64_t t_start_sample_us = ggml_time_us(); @@ -1066,7 +1066,7 @@ int main(int argc, char ** argv) { } while (another_line); if (params.use_color) printf(ANSI_COLOR_RESET); - std::vector line_inp = ::llama_tokenize(vocab, buffer, false); + std::vector line_inp = ::llama_tokenize(vocab, buffer, false); embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end()); if (params.instruct) { diff --git a/models/ggml-vocab.bin b/models/ggml-vocab.bin new file mode 100644 index 0000000000000000000000000000000000000000..aba94bd10a4ac08a05b3eca27d6ba0d8d5115e65 GIT binary patch literal 432578 zcmZU62bg3<)%FoVf*>szKv2tNVRvC!S|!Ijo7ja7&CbG-xjoZ8GtJISHxqW*Mf@%p zz&s|z+?Y|(CLlCfI|v!XaSyCfM*rp*#!s%cuoNp z6yUG|EG)p`1vsJriwdx~080vRWC4yUz|sO7U4Uf;IHmyQ0z9_>&nv+53vg@!jw`_N z1vsGqCl=tO0-RicQwnfu0ZuEx=><5W07(G`3b4EYD+;i(0ILeHx&UVuU`+wm7T~M` zoLzu*1z2Bz4F%X(fJy<*DZr)z3>IK>0nRPJc?H-~fb$D*K>;o-z(obPxB!U*A?LU z0=%pMFE79=3h>GTys7}NF2HLFFjs)r7T|RSczprhP=GfU;D!RcsQ@0(_tVHy7Z81^7?_K3srX3hoD8NGn zm@mM?1$d+YpDe(q3h?Oye5L@OEx_jr@c9CKp#Wbjz?TZpEx@hhc{Hy>! zFTgJf@XG@HssO((z;6oh+XD0p@Vf&1z5stHz#j|nrvm)B0DmdKUkmWJ0{p!I|0uve z3-GT3{JQ}Eaj+Zy3t{or$uAPkm}L(-s!!pc+)-oFEFlV2#1hpJ)+TBK<@w{C%gmTr zPtnu({>1Jch`+~}BQ$)1+0@L+L{(&%5H*ouMPXo>if-7fMbtprjA*KEcD=%oc22p> zj3M+!hE~C+O*D?M4-kdzW*woH**!$fSqM&49XI>5Vn@unLK9}+B&tK!p2C`DKNA`< z>k~CW_t#H&@f%(f_7LIZsM+ohvGP!Yy@;wP!M;Rw`@J zB=QbQ1Hy%hMe4;uNU@Y?8nI=unSqeUGH@7dP88uWyRXr{=fCd%1*lb1!DXx5o zou%ppet|+aqrr)qknCE8Z8Li@Q3F}ORIw23dZH@QzMNF@vf1hNsCd&0`v07>NS)v9y!sj2_1HGj>!|we>HPeWX z)vkzvzAUuP?5m33;bZIT%q)XL}7v1FGRQZVU{YuxC`DXSh2dyY(LPUXhq2 zead?R!-LN}dn(;R%o9R~ne9!~MDy)KG>gvrM8$5wEJ8E}Hv7%*4xYno-6tvjOtbx! zsaI%;us)3%d$Qs;m>r-%@6-n}u!TwSDN!O&K2^{=>(iJ)X!*SHbQNHG!%KUR5`yGl z@kOmYBk~1{Llg}W4<%Z1irF)X#=+@X;#5pOUWPF2Gie})>IB$6M{HBG1(IQf6CB3S z21e#Wk@^IBxa1v2=n+bFmDwT%`a-ps*$t};FNxt-njI;DP|HUV&7vkRoI!3W)VYVfkhR?Cj6~`J1_B_RJG<&`ReU3Pmfep-E$1$)u1(}a0tU5eFq(0_P z6g&)F;H09v_=IyZS!{&_r-)(`bC2`pg<;P#^WBYwi4A-i&A^%_?Wj zm;C$x`EylZ4}=dX(K<5NbrSyMPS3AMbX|E>!4wW)~5Sq5>`! z(`mHmB@C>h1}+sRkH3uRo3rESPbmq!+NvT&;|~#C37Li^laId|VZ$4s&iJFzoFmGq zdz;y);1H&oF~*NW{00N7J`68c1!gYvRt69G+&V4_@0}BZ-rAFdVKYLTgtOz2rbWKt zYO^bZ&NZ77rBAfe3PfAXD2-2Rv+A5Bo4zqiS8hi8A!I1 z#2oBbF>uLJtoX`qEhw)hZ1~FcLc+RB{$I*(gq<=|CJ&%~vP_II^sbSd7g=@<Iz>a*#KW8MgUvA8WVdx?rsMQdd*Awi(>Avir|*-J%x zs@Zi49GJ8$!d=%uJ7H*brfQjB!(0AkqD7r#f?Ty;&d}ym%`(Bp44VEG;^3jLn18}! z!^#JfFFyf;J$t3(9P&xvRg|bXY?;t$SSDYs{94FQaMq=G4MSV#Gnru3m%q6v#a7EA zbcOb{l45e&vez=R?!8^?MPDZtsO{`^lA%^dzj?i60E^d0ZkSKrK)BtrdjsRE$nA|p zF4!BDgD*Tc5C*8q*$s+EyS+(iP_8#Ar&;eOH;NQ`e-@$ZEpI06EUHYX1)1JL$_5t3 zOptx#tt!mNQJCzlq!{rr^fqM+0p6x$-q+twSVeVYf^}4fy@MQfWzOLpqzKdKlJ6u3 zpzNJW=0m5=z{N`}Yctif*M{B1(5YF=Zc=7mh3{fuIN36xDyE)yGpxDIvUeAm-U*dc zMY!Ds5DqnG`(6n%vBR$2cedTNK>A z$Ff`0*|(2zo&5+Snn;>O=yHEl*|)s;KdPh?SR6a5y9+E6T7YTh*2o!^bgKkK`r8=T zfF_pR#=v3BW4DuO)4Nx8yHfeYdx!FxLgQq2Fu3lM@SP04yzcXfaHzUUb(c~OB3Q8D zy8hh?#pWp!tRkJ=Bi1$V@b^%TQMBL37+Rgd9yI$H(>1(V@15WGu}MG+>|;|w6*|m) z%At;Y?-PS@*S+s2+hxlvyPu&gDk2|eXanQ);}XFq1A9R2jyVZT9$*BzH)iz*i81m! zuoEz!X{XN>e4n>kPLAFkJ`qLUj;*`kDEibxjKAzEZ0{J@@+oVcu!$uzo1fnkrJJ3M z3fses3AKi0g4HP$<`D|mP^Wr?9KG2;DLL-!e%!K8GT|ty!9FD(&^@zHks@s4rl0A; zIhK8z1XI3NeMYq1J1qMQY3q~TY@bz)Y;R7Xs^W)~h0l>oqh;CWNRRmiQ{CsuW=9JJ z`n<9^!t4vmd<;5W7V)glc3&j1583RCEPWW5)WqfpWGa2@VDE&7`%7G#{HiKsPCPSM}{LPrhu2-6Ui@{+Mt@AB%8%E#x zwgR#7$iA(Tc;~S1kO0iG?~q{Fw}szjU=6e9ca>@uqwjl6g&M+e{~iezV;TIu2%1Bd zeV>6oA^w1YP4uhm2g-}P_a1}m9xT{!%gqlNI_(Sh56J<%>`^7VViq<0Cj;Kv}mbiza#zp&FI3v7e`k&|0G$)eC_*_NY24h`)4tMu8N`kXK}v|wPAk|P4{lg{=(o1>f(Pb*kEw~ zHIm$K_BRE$T!9p>PkIafUF@-Np+Wvm`uaE&mw$-925V6E57CS_eCzj5(oDhZko}W1 zjVV_s{#DQ;+lVK;@BLfku%%@G78^7-!vCYTALeSC{fAWC-`m4fCR|tt|v(pV9 z?Dp_(Xq5Vp<@WF%-aIlmJdX6u?UR-~PT|vBoE1KdMKTu*3(s$3f(4GaAVq4^v9DNIgMKq@uS)6Eqgp=!T8P}uk2m_+FiueTFsK(8Qd60TSkZ- zWiH4`e-9F4ZJ)7Z4-$|2damGYZV}7(cz8cA=KubC%!YlTB~PFX*sWwwVEQS{yt!Z< zgrO}|-D&y$D)nJ*PbrB- z_f|^;>sY6<7&H0LAdB z^Z+rr4NP){1}M%4in!+6!Q?>61cf>~km)X2Z25r_+7}pm3JID}5fZ_0F60nISGD{p zOa=|po=VbXQ124K(7nkr!5yfeJcd5>Az1ZD+rU;Yc^VlsuCz>nu1DvBVb_ab^o#9P z_2Ke#68n_!bP0ypJ{JtGt{%kDfQ2|Yh@@eeWeRjvIzLFvnv<@M9xUQ%)PrChq1nMK zJq*O?%ff+1>SvJOk|mZ1EnZ~VGgMO(O|X6jxmCA82Rwvu2D^>qkcVBB4AWy=8nZ)~ zf1L)ZTpvOD%Au01I*#31a%hxn*s?>J1!h5rBOESpm8OnThgq=av<3Bs(g0r zC4M*(ghza?3`#oD^yMZ-xWbSJ33M4Ot715Rvn9`A%IXY8eI`8O#{;==7zPttK$`KU z_m&0ZJ~PE-K_4(DL7UANkiBcBhmn01=FmiN)>T)*QD=S_N&Q@4At%ZJlmio zLTp+%^~i;>r$>rK3~!9Un3XId)2TWtauIX7`fAG;Nyr)QU>B>X*y|;WnXvouU9Yfg zv1mrztd~a`>>6x|m=9y?kt|WsN4SP(iop>e#_1B3rTaPnVxO#VTl!ULM4+{pI8s$exvRc!p&!uF!16_D21)Hk70<@F~XCY%Q7}lX) zjutJJv!f*wwm8|*Oco|_I*}`Qn=8o67>wET9!rFqe&CY{V zN}tPMDDySTo~s$6rKLC*Rp`DGwtOY7VVX^z$2>8Oda<8Jxo}9DD>$%t_Iw5(eYz#j zS4y&3ZCVlI9tjY|Mfqo4eW<8obh7eBll%Okn5m|p-kw4#7)UUGoFXdp5*+Yliox+C zGKjeD2LDr;!_^mCBDCOi%T85Z(>MUiPgPC$@y2NiuK7xG8k4~oo5j!>Y|iqCD}Fq7 zx~OMxY@D1Pi|5UrozA3nsH_Evt2t+gggqlUgPAtHLkf<&*k?qs+}@m!3G~7wk$_d0 z00oCIOXmq`Vb28ZfHK6|F`z0L*77$Xe#6+^)e@273pbH;H|9 z$j`EpRZKnZr>EH}rk?QjU!~OJ4NT;=n&F&MS8FIYCTQ5oVhk3=Trf;xAe~7$ffAvU zVO>2_Qf?n>T7IS^s84YZyoS`_d`s4lraFVV%hoVY-$>_cB+F3UmEyGwpT8M>ajjTR z;ZQGAFqay^;P#EP$oj}5Ejf$HLia6}oyBCc&!b8@z_+&YORFpCk> zSfuiZ$Gp3&V}eOvr;~NDw(oX(jv_E%Fg*%G0m6V@&jc{hq7w+?Q~~qXdI~q|ie3z! z@-DU^@^p)HvO&_Vht!$yt_-?kF1(Fa&y7rglMn9-8_Cpn!P!RDH`GIKT18CxUcJKl z8ADxG$bQ##zQ3rbt_C2zf}tH^8aRjC>l1$Xm7Ejn6Qw;zl2v_vR4`^=92=5i+Q;=K zbsFeSZa>R5DZwCS(OkIY65F7{r%)xyp!g4>BM+(?#+%KpmW!r3WUd z$z-$AZ^Ons+sqw9aFytCw1S9_7ML(}&0wf|r+lw;fw-@6h3o=7)3kjRpz$b#kzRA2$ z^*H0^(!BT>b1TM0ia}Y?F)t$j*%8YwVy?ShiZw02NSZWDOD~m+897yLVRL#h^TYX` zWrCsmHp@j3Xq3D}6wY7JSNu#cc)eeu64GhqQcBPS<4c(=oQCB?a0(~kTs+VkBF~K*ED@?gaUNpzVwrNyQy)0$aeKfp*{|^4DCk;LHca*# zsLm*uwx*hL9YU^2jR{@<&IH3MY*cH^yb5DLT?HMb6)dSUeC-Cy>P#1~s^@jGtB$&s zIwIECe^@d?;t}*Op&?8eF@D;)i@bdvRMrGOu?ahujPVu=+QPt;_)VC$T1Q( z9cRK}8V6pvNUG45tRd-Ofk_&aVk?G^(Bg|NSM-D{7?-QZjyGVgPT~j5v^c)FoY}zJ z;Wd_DPJWGHlyED9v2}QjCBo=xP&BrZ#En|{R<&evh=)uzu1c#}GOl7ScLVcy94b}l zxOx1TsJi8FLR4HOCq&iqv$jkTkm!|`Pq1B}RdNhXGV@8OA;~0jTeiS51%~eJmJ80m z2fO~J?E;u`Z%AX>6+)z%jMDAVUE1#lZm2+)YQG?pa+O+Ct9J|3}nk9fy zGn-bWS9KDT7euHMn^ESNt&$n0#rQ#W#^4cTpx_4W%d@JKh9$EkZt$RImOPrx-*sVLn!c)&;^U)Y22I#-bs;o$medJV~EVK2x8eST=i zP&Zdx!zKx>rrRQV*D%w8&2Xe)Lf0X}wIabr$Jb0nz))}LwJM_Q2=N2zYt7n9LUjNO zcI1UAv3D&qShvdTMU=koJGt;8W(E_zB{A3+-&AXU2y*9x}}06vlwQPy5@D)j?_aqop>?Z!w+};0JisH%8Mq) z0DK8Ey}WMWB@9JDVf%QAvV{(~%~FvJK}!?K_A0!2+9H8b0$lR=G39z`PgFP)fc!5d zdpNj;mr{6l=t~5{76`BStyt+4IEBq?`=#m)IC!&M!7y%GPr=w0LrJ}k+~)5>jbFzs zM$q-Iljd5#QU;0iI+X%58S3UbF`a@H&s z9B_o|nGsyjlIzJUxJf0wo|3pXS$4hhTkU7@?d!z~7ADv1bCHE@bDOD zl2?#XfC8JoLixLDktuk_YhS@r?#q1yRWsLk~RDY%U(qSD9Awwww~lwWL3k_Z~7`#0D8Zl z%PS&u-)`+!QKZo+IKbpFcq6Js!8k?$>sPZbu9$72bm7(HIqq9u!6wd;(pRfsKAUDD z$DHLOHb&Ik-tuadVX5_mw!wn&8r3uAC3O4OkRZTj_ZsrUaoko*BZ;)I*RUMWzP&TI z^%bg9ehizxMm0A&YdxjM(FYWwIdV;xS(qc&UDratO6J%~$U(mJ=^PnTJAwM6H1)04 zo+Hx+Ce&QXu?fLcEZltn=co2svK^bjAz2WH&W0uWwX%jv;rBTueXUvn+Bgc9y;kyK zn71Qbg;Did)yRx55IwPjGI|3%`%|I|JW!=Y3>2^I^~~=)Ul3nU zzQc7nt>mxQxW#CuWw7^p)&R1+{=!)N3T6avZ- z_952R7mVocHJrbJ6;R!7J)sp)2;NAZ+uafr-l(j)Z?HrVWw%rm!}K*1ZjGZGwMF6% z0=ZxVqpBy0P{Dj)H!wd8%Wwm8!R`@Dzzq_2dKL!|sUo2J*WfY9s`2x6*X7%huIB4? zegj3T!bI8=gmMYbO0vb}1Plt{l{}#oMyJJ0^d>cA_dz*2zDc={yA`7yzbv=vo(UBnw!Pv2rIjvWz=Wlp84`Pa|%WT)s1Fi(s~4xd_~S zrY9J>ZFqc=0h~L+n^i{mI37A`5S2+9nJpNesRQ-4%0Z{D209 zTG0pgj;Q?hJ1NUftN@V_ogaGdRPkY|F>90QEN0~(xWoH+n-rVf%$2q^Vb$Y_UDlRN z*coFHP#T_WM>-s2aBk^|htGR(8)lj-X5FJa+$4q=!}80xN!=FSvuOMDCY5p{>Pir^ ztF;B^Z-JTXCiP|a;n8B(Q{*=9MDLPt6TZZTcd_`>Fvz@1rGzOVeU}P9jOi*)jF=IZBoX+gw|OZ>nxiJAGmlxNosxXO^n2<#m& zF}#=gpbQK=ejf!K=W$;CK9*+& zvztihduH!f;uh3(H17MA*n4jBe&*X4K^X)CjyE$wnD49|i6=0n<`LIW6TQe1KHnH% z_5oEAcKx?n_<+i?8C4YFt`}P>*cib`i_kmI2PiKtzvK#>@)bA6V?ni>CF=y5GYB`Q z#?iDls{^>_o_GD5$!0su`!Rv*szF31 z7!NimO4_+V#G@L z9JW8K?l6SYQmiQ-ri=@gSx*VZb-rUgmR<`hT{+#Rl!wQ zW81fo<7Jmwev1tE)6H@DdGtgTy6?b^SQQszEPN!kCJO9!D8)2zQ!N$5l#Uw#6bO?n z+Ow^2_wdUUAh^ou8LVVw1$ zu5M+%vlzdT54Mod(r+W5hwj6E<~E6mUC6bTDBNe6RN>PNzfY6hM)ovjwqwK$H`z)s ze>2P*F)fsCyPdh7iDtN+++iI>!zH&%dRKdHR|lZtiLH_nEL&>r+nE#gM_2$v?_Nqh z5iDJR3mYQvB`n+_hB!S)?ufNr#hQ@D@b0^Cr$gi`(0khoo^n5!NHK^ZCg@(*c4uVb zDtx$;DdAO*4jLn9bx!Z(Xu_t?RpIPTEzxfH^KQ}>DUSS*F27U7j7I6+os@U}CRjc1 zQdUFm1s?8V_*OJzau=&^V3VbHF&p=;MTKP|32U)Sx=TfBV2REn8?1_G0i_K%NV;3x zx427YxSK374<~n%B~G<`)TMVb*D376v%6IYzX$AkQd?1BCv0Sb+h7&x-OY?gcq`!^ z<-ilgppWX1E1OA-fyJ|C={<3v!z43`esqr}BDaP@=emai1VrTbP{!~a>)oT$@$~&; z#Eltr!SFF9^+gt4NioVp7~K&^6dxm>!R6NeSS%eT)R=UH?m<0~Z=)gWUJ?yr)P#GL zuaDLwVklvj-m8LA@5o{V?TAWCx7bKCQN-A931S6aAF8MtbjC=}kR z>3x)-it5co!1=c=On-=6#G9dEi8uh-{p5Rj)x!Pc3oRDaA{-`suD+jw%eg0u5szl? zu(pyefEg%;!YXFH`_&lBF$_LV*z{WexJp0a7U)DDu8BgG{&*~oJN-r~ZlKyq?VeJ3 z42S*$4ND2SxBDr<17f?)9ezUm7;QFdmOMbV-CKR7e1K(Ku^HEuq8Kz|JwWk>P>*>G zrxR7L`0$HN_Mkf6v>d#`gG|$anwmVQz7Ca@t9tsNc;oyWC3%q9(~{IyL}T3VYvvD9 zmMYAY52_H!Y5QU_hX}#e1iPeZpNc3%rjbMGtpoVgjN5s#J=c%f=q{D4exzI zRReD)j)8|HCa!PeT@vNja{EZ4h+Qx7rier<+}R;wA2scVD92Gp;Y|u9op4{w-b2cQ zoe%55yt3E|-`60_WhI$s>Pwd4a<$sWufxu(eW2>opxB;g0sXuT8Rq)J*~BE4^Il{; zjrrOhR^>whh7uJXW|lZdLO~v;XmDhN5j}l4b_{INU~tJ|s*x%U;Gzt;M}f67Qf|iF zh1senjxb5W#`_2bzZ%P@Q2h!^9#Q`8hzVn+!Y^86*(0hLY*L~AKBAhKf=MiYM7k1o zEAss8DH0k6`tK)6*u4*p9fUDwLj(9Eb6&Q@QiWn~iTV4J%mbDox1#C8_YGJeL=|Rm zE4QbJ*%|kvwogeCKM4=Qbk#^cC2?_BjwK>~KuyC&Kc?7@TO;jH#Z@mJ^yQyo`-KX+ zm*B9UOMaTX+!F!%_NSHMS?+1^X_7bHK=)}?Tm4F(R5KC5RtaN*Qil;gzRTkWCe^#G z_i0vwzs~R(%7IPQ1DIGIu<#jWf(EqYGb+iXJ1eK3QL{HDVYZvVH9*PReXF&_BDfjA zO>6mQBoM6jsM($vVmx48{Hz$lj)4yL*`k4Nu;jDkHVZ{A6%1!$v5Vwm?y1uLthhEN z{c2>c#LZC*)1FdH`C~1gBcGurRtrIRR9NykmC1GD^mAf^lTdGw&#@lXqF;UPu|Y$v zF&7>BocD;HIN*#6=f9tq+_$?L`+4R!e>3KG1&6gT^`#Lvu^-Pq&nz(ew?D5M#PuCN zmd`&gNpK?MrnS$<{(!1+z0kg(sv7c1HRuCl3YD0ALHWDqS{kXaO+}M@K@4Fa!9iC0 z3luHDW}U|fs3uS^dP;*^qHcTqBALy9z-=X8RF+hi5=HE~(RIy;uSbb9VfQPt_7|1K z05k*z2jBAb6pV3Y_N78r)YX^B9%gE{W9UORmGn!jqOcT~ImK}^CSOs6=^0#C&to{8 zs(W85rk^QvI9#4$4%kwo0K&mLNFwZ>4VHGve-yfAmsP~0_Is?|Wo}bdEFb?X0xPY) z0``Z{M{JksWsC0ruP-r?Z!=b z=ZJaidMSEb5DwV3Sn^foH-VQy(ytbs9NrDSb|`9__a(vB7*43Zs_KIt?Je0;OsL>E z;cLunV4a1pF*Dq?uUVqdi!ZkHYZAXPf$0YKaY^v4(jb&5@rfJ zT-sN^#f+-1+c#FY&T z-@@W#5%Rn8~@fY{$46xHaV8rARHjZ`2cPxP|z83~j(|F9^~d z@O#SI-R4ta%yzi=AHyeoY4{!syXX?jg=V0XM{aJW|2|m^`!Ra>J|#wfp*LjmeKiPt z(czYmexH1x+GXEo)3}ohj_&jU4by8#3~&a9n(M)7|FCGcCfyLj56O9>3{lAs zRYX`Kv4zox+u2ZK`k(~X{vie1iKbR^oc_A**;Dv1-Wht7j2hc~D;4yCn^CB^Bz{2K z$NcfA>J5Hb*`q8JIy+{+n3`>v>!XRwJt&B%aIA^;_z@E~reL)f4D%nbG_X-u8=V=`)CBC+se)eG)mK%r4Ym}$aY>c=X; zK+UoU&xFDL$K)ACctV~>dfbP%US#Bl5O^wpiG3Fw1m{11+55j$0Mxss3LZl5%l?~n zK84}%-xLOVyxaTqp$%wHA7P^IUe`Y%A8an}v>@D|cKH(xYFgr;Ed7Mswm01KD*FjD zIb}a<#}s~7BL9hs;}?C<1n~<_9(%j zQx$Q$Yu`Ur)sFaScrSj?3~fJCS3P%9CwAdyN;Qm?Ir$k=p=BXo`ZMNs^5>E%_yrLR(BlfCGjRJOjbdB^O)! zml7MZ5F9@(R|K9$>xtZJ9WO;FR}i6>{z@ru9_;E+@+&ovyE}R7rN5#a@bZGH^egoX z-$y7InkK61S1k0ju51*gSd$6*u~hpvWRIKc zFslgTxC0~MH>wVItAo;Sza>k|au$Be6o`Pou&~d>mi|_?$s^P3w`_^ph_#i{y+raz zJ%E1Di@53cyZU4v22ei{4r40nlLa2SNc-wdIJQT*v%Vx7lrgN`mpJ~6TOJAFuj3m< zeenq%bfn)&yzZ^I0ULf7SBm-+chJf2nAZRthJGhm)}t8&XVHV(F)^$sn3jG=ak{r- zbJnq5!S|imvD@!sChjpFeox6@x`5{Q`(kO5FJ-E@n!875zh{%Zb<)yIW;f}{z7iMuEz?L z{Ds{7)FKtZIqUH9iYf~}E7n%%rgfH!bQqdnPcTelwg0PV-G@6wJP8wQ67h_i`h=lU zVf_A;bq9q7j*RWUGWYqLp~NdZjQQ(cy}wdcm|yH~M8lXF{-&0KuLjK*e^VRK^ECY% zg}}XdWbilg+=&vk1%ppN5spCjjVWRO!fO0?Qm=(aOZdA=vK(C``8!+OFCX)iCH=c{ zgIyR$Kq7?C6{<(+hcS=kk!Fn_I`$Nf$9eGR46_K+J1zW!Y;d6rPH2h3n=N<%2+l%_ z$o|0sBd{%mNjv|?!%y_{OkU)}YL6+urxevkpsxIr{C#g2{>j9L9cIZt)tiRlpi;B+ zpAx@*B{oLcKOf%LxuK#TbC_)ZQ9&VXTOTWL6QE8f~;JHXLE?$v*%yR!)=(DR&u4U_%^C1q9NQI`gd#uFU`M|ou8vy z^6x@Ij7W^<_=)w;?>1*K*`RxtC<+@lSP?PWBA3>eOT|u01><0e9j)> z$+tR5JAV1zqh*~UZo-wByhsK2(yn4$`GqrW&mQ6ZN*t(F{j{lXkL>9(`+G&0BJFV$ zm9A8!$FVrR%%V7o2yQ|u3pae$PlS)epH(8wh^ugqWAVpfTxi8$T%5A(aV&zn=X5`2 zbNa%hNN>x>J;Emma1A-`Bs(!9nj0SX$lflf8=x^zJJx-iB(ILk1QByN3-1N>CAn)b zaHHRi#h8EpJiNWUS(Yc`^;xO1vfB+`jL9nA0d9c zq;ca&DdJ%)|Kah>2D@V`D;W2OUxRm_B;%yJ6kA0xxU7sdOgP{S*;-26@M%@S*tTx9 zIboc#V;DT1b+@hIuika^5nS8BBT|y6hPvuLo+Vrb)lV7wV;*SUz7oi{%68Y_=)N6o zSK6Ja1C~zifX!=n^`YhttHcD;&<0a+k|%jfG+{L+S205U`|2|n zBc``oTM4iXoVm(6F?=glB{9V*=sngI%?iHl)e}t}PB(oqb$?fX9&HcG?e0-l60Xjm zYQi3r6ed+G@1b()29Z8i@lJe22?7p)ER9sW!P?q`^@kgcnb4w>ZBFQNnD+M2h{Zi& zHfJX$MTNora}TvNZl%C#+l@K69iu0_>t&$oi`mt9!sZDSbO>ARlHfM@GlwUzSh&wK zZRICuP{8);t_6vr=yXw03V$gfRWR&ISX3g49P@RBh-8u@N zfLS6}$`OAwru&3P_VY>7{p258nR_uC6x{leU-wOTCrsUE5(B9uj0?@!aqlVF*ZOl8 zlUCl7^@=M<$(|BmdUkZwD)9q%&(RP{KIKN})}9(o3$dr&Q}v4V03&El5g@X*BD{z1 zdWm(SAQceh5!av}b(QmJi>#;2W@p^M+!vp5XfL*xGH|apT?gTm~qWPOgt-Y73WYV4M zI!aOZ1NB@qVME=z3Wj?c7FT8H5Bz8=-B-Y>D~pQ+wzmqg5==^at18iMLBU+u%L?|* zO0qZeADZ$ruR#;o1!+!ME*(Wa7 zjcML^$fFc~cf2dS*&kHuDT4;)qQ2k~|KWfqvMe)DSxZk;#Y4mmK+J)L!_gKI`q^$a`6$> zja;xj?>$jPL?zeVX{`T5RRne)`%>oBew19=SKW2iUwH`ol4NJo%KI`iUc;~BK|K+{ zwliUseboi$@3&NN9Lqv0lIU5@_`%x@b7F;4bSfkI!c(q3?{^|EKLnIejbUg@T@lf_ z`8Mk*CP16EeoWAP58m+ONcBM^159Z@3g+5f5Oi<4@_sBVF1n!{`>BD(-65qCKd7Ij zN)QHzt)&FBlbAd+(F8n>Fllr9v3Bs@U0Y~p!#Yam4)(d=0CwkHvA|jAkoERsP5QGF z@Tu;{81MP^Bz2t@X7WJ9YTS~ro zNs6ZvV;(bpM`><QDF2sU8X%msZ?=?b2S9@dKj_`K5>Mk}KuJ(=Q_Pi^T%Szw;gj=fydkL$096Oyue8dO*{y152T_F~s1YsUA-}7XJ(;B(;5EZJ zkx$H8Ta0MB_2J!lvZ|j$7q@UFP80XUx`K@n9Qzh|V4u?$U%a@9Mad4Ji0B;CRyu$p zExF2qLc@CSFCW1EfUThSx#R#)+V=)Lw7N)0Oo z_IXc{)IN>C%pMdK;8$X0Vfeqm9PkvDx>2iPHm@iRPMWy_q)$436jx#f=qUwG{Go&hpNr!W zJeo>r8oqoyO?jpevg7LYbrwD(ss;`JJ%3*9zXF=RE zuk>^_1&)Py`8PaWJ;BWi*or@$8P1|16b?ss_xG+SJ=_N^6`a2bv)9ufDPiNIE9n38 zFJ6j`z3WOp=9zmsYr@^HC#?N+RpT@kx{fk~mrpK|05=azWBTyJT#Oje!yB|={YZtQ z2$*{gqNL|;w$eeWv>LWr?!XxiiYGH^do91r(n4E#G+S7+0d}HV<#Vkw~ zb})s7$pZ87!K}8bUik#Pd$2mQTRN;KX`rfLu*8(ZI30Zk zOET=9I;Ce&_O*C-DkwOt^m06d1;kp(mtsZKz)-9x!fiW{HC04_|4-`~$`~^q4s{gX z!a$xAR~)cFReXjDj(7L?_FYGDRX7^riD0!kj@!C14;*TuMSCKmb*LXjJPW?hc8IEX z8Y_m+TZb@x6?=dnh>fO|4^jW#&ezF6yrVlA}-o`Q64|h@qK0N#!ncgd2LH9;9)aq{U~_%Hk>lr zGs$lywuR4(ebQYxgMwi(D?d{uo2JDnQTRp-`Dbd_Qg`v1PoJr}@uS|>GbLMnWW?9= zOswI^3SD7NAG4$Gx88mxOWA<=xg+RnaUMfwc3M}Q@R}o6;+|qAU9FFTZQ*rAdzLD8 zx&iYzl(f>bSehLZ?z<75MbWrql@;MPHZUpcV~~y&mB_-|26LsQ&r-?Vn**B^@qikS z!7q#>(3;IXixs$GrM1P()h3iCX0z7c_{{Zz-7bv4T_qlN&xzi%Sp0xnaQ(=0ou4E< zTfxsc%}UQ!x!`NU*P()PN^l4n1{qa!Rst~>jDP&eQGX8Qh9+xe1y48K3*k8`Cmm)gqH()Hs&LJYEwRPQ z&gVBWkzI|qfahX{btthIUu|&?Be<#Ed5&tw&nJAxnLmdmf@RTPgy@O~j_a%!CBqW} zeZlSc#UfjvI(MJ+(gMm3hgJ&?t!qiz-!~hyMS-_&p=K`8?OajYY8aYkF^3)QB z0*KKPSq<=2#<@sU!{K*3VqBB8j;J^vV+M#Z8(`M$#>_F1^cJWVUD>jJF`ok}Y;1mj65e3fca5Qq5niq4Cn_XIw8}@7( z!kLn_#$bI^wx}gP-%+@`@pr@ydMFB`;PBAh)>ZIq4I7i*VH`&T%dIbhDokLuki~%V z2{}p&)vsRc{YscmpOqIXC3Y{BVUD-M_=U#)x()^F%!gEZ>`AI9~r_s`pN^|+E`BX;lHT^)%Xb3 z#16b~ZJo$gi)b!hINh+WAUvY17c+C+WLJdYC?N#0JHtA}W|lUSz2-uZ6T0sm$~D!w`a({+ZKSdezS@QoG{3B4`_F=Ac-G#y1q5(En*$QOW*o}_~kMDB5fSw?!U)9@nRMs_?7Ii zSar$=kIRcG!bbQnC|K_lR~A!fcX!4X%U2Z1sz zR`bFgsS~rW`5dg^S-e8hjTG}Av7T~=tp_T4U*Sz3^Gp7xXJS1oEupw;S6Em=?rzvB zFHwPX<97*53J=m3StUkb5MwVHKm4&7Yl$jM;aoOTgd7CsBGnfE5#sg|)`IVlQEwfk zp$eH77|Uw+F~;bYz4$Td3wS?%Y-?gQwIgGjOpWS>UFk^G58hJ_N2&yTioATJI*h;E zjy`szDxj%ns4Aj_bTCGzN3yIiIiX#|t?Ivh78$sE0*n=rH{KqEe{TH1Mq$=EM@HQN zdXf7K=3*D>GuVX{?C{WG3|@gk^p&K6Q+4ih?I;O8>?YijK6D){9HmnDGve4AAEm*|DRUvA^tEI+|@ke*^4aj()7!X_YA-tzu7Oa!HOR zuVt56C5HM>Fr;GT7r9#^7+8s4=TI{I5{%77yaJ8aK00;@H>GtH0f{j1#bsdgD(gmi z90giW!Si=v_obw>*coBGETbsXS6FEobHde*Wfp|-7yuSYVGh+~8H?yHoj8c9EMwV+ z;oO!klQ=aWgRNz38&ocLF)q0OW zWJ7)zsILfA7Fb!C1;LFus9wThy}NXjCA;5WciGEj$qpSDBQq%|Q7M%e)8e}zX;~A( z0Jg+9Du*IjW^<3@c%{s`k#;>N4me!Kk{{D-;C>f9suS4{B7Lq@`1K_0UAtwr^Cq0j zuzf`5>%H~Mtg3J!%zDqItZ}PUdajzPIu8Bixg;8dO{pvhPeJ^yi|T#^JN)Nr`Wn)O z{8VXi{Bpmwo*U=+NlXCTb}3mMtJ$33V*JchJ0`i+Pk$u{9z=D+Y90k?;Y6Yv@rYmE zi+N%XKZF^y|6Fx!*Szs#GOWD?mssg}3WoU*t@AueJP!Z1^7Aw;4am5VD8X9T!(s$x zWq&aveV!U{dd6RLYejAY{<~F~(hR}O`aG2pc@IGtX!p-k5IbsP@hlQqj1dfdr!1-VL;tf~(^r_@Nw?e^^gd%{~8rCc(xe?D(R*5*we6puf14ix(^g(CsCi zI~2#Xc-oGJpT4N@%LTZ@bS#CMe-HH4(y=Us8?ayn2#N{r;95RbjYOq9IhI8VqfqZf z1J5yRw2ozMj5e$lQ$oqY@AruK+(DZY6TPI|77d?X=|pZpmk>eN+&fCKLP4aJe!OaaojVYhj%UUV`0a$_RogHeAJ4+$xDGn;@sb%o!-suaMKM&# zQ{iQgW^cEaFl2)=8TsRy!i#K95y5{vqpiHcI_O>E>W^V!-;hV52_Nne5BPpQQsJly z7U90+y9_^cc>?95vRgVqHMau$#}iaAnv%=$13yd%H)s)!LP@MBEzS<1WQhis8?o57 z^nt5T?yz!#nv!NswECPFVBlC=bTGWbg48*Ib#@7Uj!Ardsj#ka*o+?66DD>41lA>t zX8xXqoybz~U9*yKeF#5K9!_M5R=UOOM3xA>6F?@ zqU6RY1omz?Ryk3zWx}B^j_ZY+z8^22L^%q_gXAQN$pbo{?<@L#?B$L0 zBo=5LX5W)o8CUyHS&JX`Az0#1QuoJZ2^&J%CnQE#?$kGWkg?O>3I7VOL*h z0sk4bp0f3ykl{6!zOr2lrRC%}RYL8=icmUPooqd}jwe&J)AzIT$(l?2p}BaiGdY>X z$4xFQDkrmEu(rGUs0hC8*gBbo$1eGm{OOh~=H(Yyuos)tCr|j%^V&*@eJ^~xls6vr z$YVx+)1j+SxbyWSrn|TGPgWzIsUN~UMe@&Vx6&z;e*h;e3S4mdsaB5hcnC5%MJ1n* zC7^N&OEJSYpVCvv6l#qhZ?{B=_K?e+KH;o{-3;{MQ&bfrn1S13(rS5Cbd*=)axC+r zfjjRt){Xh%Ox;bGJ$<&&x^6S*OX`hinNukc+|VIHDSr4n;ucP2A*;CHRgU3ferGs2 zm6CMt$2OyKs;ULQR*4->dMfLpg?&j(fRTv_Odog$4)evFQV+Z6ZKZ~{D+>~Nq4z+P zTwgfm^WQJ-D%HAGcr2+GdDj{>=*6e9@gH;Hu+u2?f~5*7^K@`83t! zIA+I05do9&9acF_O*v?3B$;;O(P?TxE{d1|PGgBjwOyW5dTBQ8)3iRxQ67_ZC$d?I z1O8m;VE*yZ)m03t6SK19-096CcrLDI+3D(HsLayolB;6jbczlW8Wxq)B^P#(d>d5s zRak8*f_3aE)6-dSJn86Lm6)PFN$<-{Y<<~}5z&aysmQws|N#X)PE$sqbS_Ocm;l> zj!E!q=S}Mh2OR5JPx*xbKPu}>*sJ|#yc5dpSFTEe{`^i5Y@uz-iKOP$r9|Q3s{O1I zVfT%eCM+VJ!feIBfkDfZF5r1DxS{I9M=SOvi7FD8MPQPHA4tNAtnPG6wdMK>@C9)H z?f8Wsx`3|Q6Z`r5F*YK5IOySb?FJ|#eh(ejW=jL=JNR8Re<>gguz)ZFSb2aMh2?Zv zNCqhNs+CrW;rPKh=D;`Wiy@x# z_bZcKC4}YxT{tq?;#(^vfbNT*{3CDv2vBJ`<(kFSFQJPTSXuD_5BA_U0oB5O9A6Q} zGheVeE?2qXz~r5xwOn<=%Tk%>{LI1T#PwY4+Lx<7_(+y@VuJPlxOl#twYCGWtMH+w zTjqOWM0Kq%%wtJgAy#;Z413qo3W@>`a-Ot?6{CL(}m~QDabsa&h-t5ioXi^#x;r z|MEgF3hp=Cuo8+1J!)Oou$7dYZ-|ywN`~%RP}yN6W$-8P%A#4}mgi(8OLz4tR#E6$ zJXvMwO0|sNz4b{~Y1cJjdW<}21)ozA*#2RP5JsnRO;Mv}G%-XNoqbxCtRai6xn^5+DZa-4E}>EaCIo1Ns0Zv;2;<_ud(u(8ZkSt&Ln55=v9Bax)MKz{i%#JQVrlLa7&5n zep^Pt@X3Iw^h{RII=PCq^%Y?0b`3G|s`wc~u0%+W1w0C}9B;Ek8vNV|HdSY`8n>c9 zt)YAv>u`NuqXMA_t|tXW1@{CmuVL<(1ziP6^o2tN-;`EFhj;DKtJbg*#v3hb#pGvW zg3Q*aj#t3i>|3Qd-~`s7Ks z5A?N^U>FNtNie{K6a?$AvX$4WMXLBIqa@-<_%l?5*Z5{OjVYm8wARK(^PBluq#wcT zJr^UO`{5NZMc@Ddr@U)fxf8h2pisW!*j=kCzza>h($b4roP3(~1vhP6j~_3k^!`ie zrL&mvG>;;~SsEO0?L-b`MFrYAle1VJxYc9!jS;-Co5l!#)vTrTa4owB`}s^UxL$|x zeHP`SqqF}Aa$9lqgy}?fm^CrV^?~;jVRY<@YLGwJB#d3;z1EK@x^Kf^wX;>z4ZOry zI$PB^>^Gi+B5*2?=1>G)EW)IcM5^tseOAuaPRrjNiB}`i$Z;BLLF;T)iM;5uvsrO? zILhYEE^KslT}*5%6@M?MbGDknpW<&~8PCsFpP$6lGwhtZXS3}#3|LQzhPPti_G7kK z)y&p0qjk+;D+&4!J%n{C>ikDgBH`&V41#2x*yAT2{f+uc{H#QiqbTu&JX&ZSi{nZb zt~q969Zo&f=GLj^;KGD`Vf?_U3e+tnX9vln0BbPXx{<9{Z%=r81N+&&66;Ett!IAz zL!aosrS+5_hcb9$LU^5@vX4n%=NFG3vYFJjwGzUF2@hd5g0HVRe%w!aFvac6a9hjMx+}xKyXXQuW@YD0Kq-%Y$NMw zeZ}Tta=52iTLe3NJrTjl*iFS$-S=BJviC*OHL9NEfKxPPq<&;n!$fKo$pNDnst+fM z70I{~Kdc=pl%iV0;dO=LZPE|sCi;q3?kX|K5H|Q}h0# zF#<1I!Q!Y0cgn%6Kf1NDPYqQ##)9!5{1MRLx0JhlH*CQeoT&S500w`DX!d9V9Xw*j62XQ zgGF;;4KEL>#JV&Je}X|~ir3Gu+Zxo+fX@*Qr_w=|cm5Wfn}`VBCm8qQyLQCpBHz`1 zZ?LW0@s=+fd!R}x1zlQmvBgUG@m<$iH}c}akFp47p(ul@$l|#STtM5b>c^(n-Mvej zRSGxq<7W(13jFpsSCDdy_Jdqv?XF+axI%)0T{a=L=P^7O=W#D9^!A35QdCg9n9E+v8?7*{fd z!!#zBa4y;S0$LUf5G{!mvu+L&#*J~j1Qba&Znc(z=kJHvJrkb30+Y&IOp3Bw`&`u@ ze0+R-bd-Ajy_kRn{d!dQT$K?zGRA-JTs1=dvD?Z0$fV}FJ^I*rEJ!#H9#o}>2cdO@ zh`nyg!Z;p4?-Ituh^9D?4cMf;Fx5w+=AES#Q)6?59~D>2;wYT9&$;td3SU*AW4B|p zexVU_@_B5!@Iw3sIoZRO?IybOR4kY_@d#m0UwCr>jc5HBhilAO{cTI^R#o4u#ElF-R8czJ@?Anjk>S*%Efp6X*S8ezzqXofQRTI;Ld}WB zANYbvI7ay9rV}w96~#}aGee)Kx-obqb`w1X`+Jc6EwOg67eejW`AiUYz0^wQYY+@! z$OZ*(9NcW>^Hp5jTZDgVB%i<6D(9=(@iUymerF_Aa!7~fiLAEyqcSl)+`TX_$Ml1% ztbIN!+G>3JJvv4eo1(5*;k_grIEa<|A)z3V)$}&}!n@kee@Ys| z{sI=se``2gAPqJ?kczoXl<*F5uhirM6%CUi=4B-dxb&W0Kw(y64;d*cc*6xh8Lzs+ zQS{seYMA9X25Bo84Qrj4pSy?fl1#3+jqVaDR#m=4-ct^^{?!*n6Ydyxp~M-jx+~O$ zlwl_huP;9$KXM{Th$SU5d+U&M3L6}hPW${cECh*p*j&XmL{sXUwrxtdD>hvm|R^%>Bs{3>30u{!4f&rDnK%e5l3hVogobdk*| zpWwH=+REp&tE{7Zuoj`c@<_5Ct9>`(3UDtHVbQ@{rQEBSeBEE&E>Zov!VMOsOIWaR zJn$bb;n;u}aWs@8slOeD*+02Nt?19USCniL_ubMm=m@b=(v4FG_f_XOO>hK zD~KPk@eWytA5ccHeT*M?QW&Sdm$KEri1eh-|$MlpSti&kmD{G&P+;7D$;|v-t z26(LJGA$9Z@cIuHgv;1m;RP?S@?|PCEI-%{U&j0}@w#2Ra+z9cy`RZPvOzz(Yl#fQ z%c~&Mmv6A#UN6?OXz6y0Kp(+Iz7wSw#O73|hk4`)Gw%b|jUPBA#k!&oOh?}H{#S82 z?xdcw%JC~{rIf|M>jf54*1or?ch9muVL;)~NtC2{xm8k@kVi@PSgP>nEU;F}vd+3q zJxjF$ZK3@N#xPN>s##m;)Z$ zUV5miDHbomn{Vnt*toi(r(9Lbz<3VRq&{?UxDwOAjW3Nvcr^=l;;ID4(I=GXx@A?? z)EW#M|b;pOxenT>I(77)*`0n`F6U^C=dRcB4RegAZG;B2VMahy=%G4T{(Z zWx#_Rn2$%4FMnPK+L)s77INE?5tbXLB32Qc!Fx}s;8yg#mWYSz7+9IG|46sZMT{P4 z?a0ub13EE$C6=;WY{oDXbR}B@r(9@~5%r0R^uuNg!Iltzon#t#h1au-Hf zx>>rNlt;;7E7p6VO>3Hm%)JRptyVDgJ{*%75iX>nl$_Men9p8)KfBT&yHGg`0U{OpP?+ zw=v4`V}tM76A^{^o2(+p*T%<`ogVVC)|dtX?!al4%VH{=RL+T894(P`_f4=Rb;eXq zOtNsb&&O2J@cy=L#`t`?A z_^3CSC5E!!i%J?QHGitJq7OWCgH}&t3{GmX0TaKa?p&Bf-ZQB4xdxki;X-RSSgDgu z92G?ZzVrs!to15FIa7m&^E&2~vo-4tQ zt1Tz!-@v6LTXxBVR7jRxb}%SmNHIoCK@7Q6AV38O6bVU+OcaPfF_#&|T!cyRq(?pW zXYcQp`FA~?U!8OI8TJ|W*<&%q(`L?ws<7X0dL>t8c?_S?*4YgyFnUJo1EtgAOpK4S zgLjwG#U5V!lV@TGY+QO>QTYoO>Q+6YxyAP<49>brxd07Ln5v)j=&EZty{XK(xQ+Dm8kK5>3O*JxDUXo`u zt}7UDsSG}Lxh*q|b8f4s$c*z{lsw~{3PEussX{bz!D>ILt3ykkSXvH~=2U3ecoQv^XfL}kU z5`^xpWQp6ugeE_h7I8RVKNo0bwaB__Rdrr=)z-4Ph&~5K&S@lgDsn}_b6VnP4_a?G z4RQxr3P%_0@8NEb{i1{qmOb8Uc3zWy1QF4Bb^eMgz7^-A5j;$|&#>|HY9z*@G_qOV z#46>H;c29*9Ovml+wJE218$#`U*wevAqb#&GA|B;YxSx1%ONAL<$<&B3q!IXUQ zKB&m^I!?l7Ih4}R3k^`)ZlP=oF@c2nR3O>W1gRr4bjCplZ!&; zs{>7{J-Kw}{orw)=x9BLH-oo?q8{Dm!r}2YURMuMQbrCx zS=Smo0#CP+X~ff3Rqgq+@L{sT6bB#L6)MDK1an`7&T@~nR48%gie4W^MmKhLOBs1y}>!9*R4F?}s zR?YMwx>wIqc-ditt>%{&;hKOg!g6Z8F(!)Z%Q5oCK;C>By7b?4A zC#J>AP{8oLVcTloQE#>sfYC{VC>mYT5|dnlomnhtaPloXz9hu)mY-aT;e;-GMd_mc z;zzBzq;+zhbUAv9L=G3x(OVP)ISiDBBY>y#tl?rqvMS3OzTRBU zfK^$1Cv)rU!r%s4U5-iPPy!ok`Y6!+LfIle&T~jvM^$(r@G4u@O!C5T{HSl33)^rx zCZck{wwJY27-TBB+J$I`tBNCb^g@hh58}S!f`BbA&iaTh#OS!3*fmNQWf*%E)h@8a zV$G>GyTj^PW*N=GRsHP(m^Bsmxqy3lA$qlMpLH%IM)7&VXnU&QT+KnkIRr~?sPcQj zNVbZ0GY^>+xfg|T7kX)-jI=usM*l1 zwr#=isG{&BLTm40^xW5gjb4n|D!tYUr4Amn@r%(5_gqfPIJUkRot?+2QANomsHp12 z=m}3S_YqjX7*hispfPHlFwFDK{F>FQ-e9^eb6W@a7n2lp^P zc(X!#W*06Z!MLntgKqF>COLkK%P~SJGml@6Vci+Jp(<0n;%2K{)~IH$C!kdu7Bz>m(s4w!)P!@2xp9>7;>X>)p)v|&A8eR};FaOd6*42L)Bg0Y^#A{ zD*Rg8J<*(V-Wb;CJu$3}&vBF~W7)M+G3~r3hK5PMYwX^$yN+C|ALjo(nwx9)(`RBQ ztE|Gl)%AhIife!M9?i)CPVhCQkp{BD?ujY9=~nBg12|l@o;olCK@QP7$PR=D=%GGt zk6>oo0`!WPR^Ms%CM}Ya;j?4(P0<|30owUWyYSY=-xSTgiNJDi($=~(&l9xrrWm}( zC8_S#g!Zee42(=Bc~u{x>DE-?7$93p!XdVf>Y^|@YdxhSBJbc$F-uD`r$h2P%&rCt z(YALh@qV?d(J7Q&In76}MyCq2lM5wJqvJBJtYg z5YN`AXGt{tLapa&%)(`^e5G(t(8}6AP0%k_g|1m>)}A1_IUHL(KV+GnrS`0xvhA$T zix?E!o1^dZSWb<;S)gFzTaoD_Jc-BOEFuB&WntOxRGWOW$UpbZ0)gXKY$Yq8#$(kt zN3SvPufJKi(v7W|BAo5>VJd z43&WTO$P!HwILjY|RRg&)9bY>i5uBxh3c&Gq>FxOUN zS{5ND>Z_Vmyk-$)uA%PDW1Z!=URCDcX)er;HC5|Q7=mj87?hJMG_J44+#lX!Emd(3 z-tbD=Ib7IjE@SJc*!c_ARqv2X%~>zIz@v$Y?yJgfESEvf5WZgLY%{@u62D`H0umc( zE6W`VK2w%+BVVfRs2n~Da7KA=^z_DCY~n9Q)j?(=Ogd+q>U#}FtRRQV{d2Z;Z!Y$e z!6w|!I;Y%!@Q&&<2$*$W;9=uQz>eG(*qMR}rGG;~YwSL4b@x>h>#|TL9{uL5bYBd3 zhUgsQ_vL$6?#=w9o>0jdUc@%3k{tmM@jUv8EAv{hMACv95)Z#==CQB8!&=dE_1 zHVI}b6sl*AGdml~fdW|KcwN0u14iY=IUmjYV$u|r8j^2KO%*WHy0)HBoJ4I2ia$Mh zy0$Y1-+9(ihnDaQ*Uj2E?_kdebg`V?6#H4{6uli}dYKz=Q}yS$iHAy-4;-^CmEaJ$ z*R~UE-B<2TR&^yBd#eTwIY*R@5q+We?E8K6t=ccHy=G&|g8c9^qj+oJ;~dPE(pv+w z9*4=sRfFA0?``_ z#hq||vYSLbqniz7xMP;px5gR@#rx2RZYBh`?X$J4<}rSrvvpNlndNFwwZdO65nrjj z+pzO0I{LtW7Jco$Ra*of%dp3|=QaApv)oPn%o9tO?W6Y6m+Yq6n?W6FnBXtXa6@nD z|9K=6+c}l)lyyhI5Q(FL#4TiRi-zOdy)}%yO~^r!2;upCx+`7r$u4#8c+Q{3rz+G+GUj}cFc}G8ROsRu=ta!UlI1eVoxqs41 zstwY`DOyWt zvW2y}poL99BOCR1&Z;2iCEiS6ch5T^pe%PyHG9g|HJ}>_39IS|t>0~Jm61}YbrLXF zf^`K7eM6?dr(!UoF4WFjUs(!#JW!X}J+}GwSnO_|iwO#7$=;NYA31DW`Uo+EZL1QY z3v5S~h@yp87E~kc9We#y71+o-gdEOZe+`emL(_)Gwq+Z8M_}qU)KK0bbUG1I${M`T zI#>^lXAM+fJ2sIY;2HgtcL*VF@;_%~ecdT9x0M7E-oKV@dbbxm@@b-)%`HO-)|7^S z1v!xaA4WU5p)o+r+NzRB=B$}*;)jJEtSJM#cw2vmPAB)CCSgCm*aaI$47e-}MkSEiTo)4G*lgEDvq2H>_b z{as=^nRfG3yFXB~@fcUm{ldi^!GX^I;r5IvSui=p{ec&PMuV68b-G=M$IrOF(0KRr zcOt7CJ!O;k=K$Tc&Al)N%7Rh2m!iu3f$^gio7TUfJzu>))(CD+BD`SjeobXKtnkxJ zMe#v9Z;fmg&INi8tNG>9lLc$46CV6`O%UB~&zJtXM#@p+y88NXGsPuqt6<@QbO+B` zC;NJsldOBcs2x@@h>N?or)=V+cC4?~F4Mt5*1EBDlWiuzWui!MY7X<0_%?1kvXxVQ z+$|fn^8+303$~-7gbf&48_itWF8Z;NrV!;SCN|m(1kT_kbxi;J(-VbG{g2B)D>Vaa z`&`d>TxEQ;Cv8GyivM_}O)5$BJ}WD^j9{mtni1b?y7}V&;qR>wiOANQu~t>WJwjV8 z)4mnz0)ioP<*TKf(DZ66hc|)q zb2~X3hv!dm^)z+7;9T65wR`!=O81!HeF5Q)f+T3h(!2TfwyA%MI>gbi8EegLx+0w0 zRyKQ?$N6>w^Yu=TcCuaUC@}?irw{};9LCFtzS5i4Xyz@1jn&vYqXm?)JOm}~dG#Hx z8r;^U^iCap$NVOGXW(`T7qS!Y)WGBe)g}dV9_N-EDCLBaOVcXZA}-dT7pL><{EQoN zSKq0{!;axXQUkc^vUl{HigLOYCFvw5C89;>jadTxBOmih zm1SU+x8Io$g~j@=KlYAt=a61@vt@TG?hdzm?+na9_VlyWqqAq6Bp75Om}DErH4EBkQN6@9n!BRsF{s7l0{ewWTJJ(Nb?r71t_ItA~FDZh$k_t@^=EbT&R zh~XtGsmKi|Q;)w(N7E_X-uQ!iBCG5nJ*i?R(ElmFOY2-)2gFvCSIpOJIzceAaCmtK zscRl26~R|6zwoqxSg`)Cm>EpQ8~Q)pXs!y}497t;As>V}_AX)C)tPNwALu$cvp=B{ z&6clrcKReNjZW5ypp4xthEX02oL+X6&?Vtvr~fV?HTZSTuY#gWNJmI-zAF~2=X!L& zp+0y0DQ&5ni}ZL~IfZ_%N)&9JGbk|Iy8|&uSQc#L-GP{5&lMY0@`hL3U}Gv#%rHB} zyER_S$GDx8;VxBe{N2Kmdk^7Qu!(mE_PHCMbVGGjyHs;dM)}<_!^iemMSsH&1eenZ z`q&DukZOJ`FG5|`vIB6Gtge1tn1NczGMDD&ut~}f%nsn{WQ9c*^jcsXU6O5G~zz>qNj*(XWzT);YB}Z~e>)&M!xuvS2H{V4F%4rRuy5 z1+Ef4#I~|NY9Y3*3|ty8TRZC4nQ!s(d5>n;oo?93dt$2g9k9{&s3$l>@mM)yD#dNX zb`(rB)rv>0SyS>1532QS5PcGE zpd1O2p&`NA*~3@7!a9P3XP48~RSj6H7kg`#3&5+{^g;v?2B#wTUbiUdtx+Kay}OXOUNf6(#4e7M{-1Z1S^KRWDrD z#h$5UmaA!4UF{Voc;vCz8|q;NfBFAQoyV=II`hF`T+2E$?(mu`CgCAv zcw0FQCueOtf$ww4&yN0I1gt#}b4xQpR*XELd1sR&6MaBPb3c7!Dpz<|6Z&1`Cv>da zoRl62l*}W49#^Hp&*6!LMsNb2qz@$8xpvQ6IkQgAoj@rhKTf-{+H`{U_^h;46}-#K z&`!0?LIY!!cU)bqB9yd-N->Ww*sA_6N;g1}!~Pi_wy z?1);^&9}ta#i3ABEskcZ3tZC6jcheIquG$VaNvy77gwM=^Igkwd*PX{uf_D<{9o2m zE4yLU+6jP-z-`;n7>n!i8hEK@0d33P_4YWHt)E~1PKPp3dww6=rqVd5n6sfuc@PBd z%`Nrhu8M6db?Yl^Msf)e7p$c=_RLya ze?xR4-0NhAUFXW5FI~Yc5$3a})-Ng~bzfh|B3uSFP@BBMAfi=e38lKB8d>J*$+4cr zP+>dM9@lC|Uu;P5T1(w2P+~-fsQm%$ zRScy3nCN8nYSp^gq8xf)%OP;+nVhqJrjPMj9wa<=KDWqb!sD^LK+lFMc8TUa5dBD(Lg zvsU|HY;hcR)Ia#*zc?f4Bxm|{KfMiQ6Mk^@gRyGDroJg?^H;9en*J`%EY8}x(qSdh zQY(N$IosN*u<jPiUXSpQ zLzU&WgG-RDN{+a#qUWzi?%2@`p}W9}vJYt5q-i|42`AH z564`iVq;@U^7OLe!@7_5vUW-z)@56=@efO{$NkI5o9~3m;S2Xko6I_O&fBs;aoSo% z)vw-Q(;wDiiNoBgAI|;l9#?DnplQd70HIa#pf$489j=zSDyRowXE*cz`qCA!k}bRA zh;?PmLvO+?$c#$4Vr`9^GBNIsU=`I{t*aayS|h>ch|5m|V5r_&KVd9;c+PCqO}TCJ z!&;E8wGl+yP~+pQapQ}vEL(=PY1?XT;!I;8)iIg7HJ%&-xxRUNMbK*f^DabG^l;&(wAPT%hJa1)zDDd&hlvdO% zBp_@9rCla#Rh5E)>$F_$!8~MPak^l=u96j>@J4pr0~W4kLvv0-HXqb!!1>;Bba4o_0%hO+mwnpVozHr>O^I$CWDTHz#(RjnzE>k-45gk^a=dc?6h$bBk*D)pC z>vHiCt!<7v4#Lt$H1w$h;mdmbBLR5NL7T|RuX>eDW*I;9xKYX<(RQf&le&`abAX+5 zW!-6g9}Bx#uvS%nPz*k8HKl`RUH^#W5W6nng{?+{CRz%do!y}k;mftVn(EQUXP~fi z3m1bHxh3vu>j_5e4<1G8$I_g069=kw z?fopt&8+3BUa+=@`3Xhji)|}EaT>|JX50CBXfBMzcXDv31-xLkF5KL3gN>{UA;g_Q zEn{>&5OR>C$Ipc^eabKzO=VSE!~&_bu7mHT)pZ+R-`z-Hl}ihuP}aM|O!7IIpA|)H zDzEE6h>geWTIKp5C`~yjH=X^%H(xdD&iDp{SE_cMUupIAST3l(H?k1qq#p&Vs$NGq zqM7|XxdK-%oAHCzhykj^AO}j%<`ypOny|; z;oie=>tyARYNsBa3oo~otme{Co6e8pnBiGfFy;xuQcG|LVWFzY`He?GmWt1ZKH%zZ z`%e2eRe@DxYuTP0XW4o}J#ocanS*Brmr=GqDyx*ag@5{$>U>md8o>d>?W*l-Z^hqb zPdQvgPp}R8Iejqv{He%-A1FV4u5IQdKo11mc@ruzKG z*UUba=asuE8~K><F)p%A*bpDnIA9)uol+KMkKuXwV>&92 z*u=-QmyaE=$&U$172Z4?2xWb`*{Hj0seDXZcAm?6dKWS-(YV9}i~s_)Ec0p(*LtS+ z@S<+0ZN-{f&B{2U;CgTBYxy#2jaa(tD|F5txApAX6lhCmhx}m7LOi0OhdK#h4kaK? zMorO@3py3|v6tf>Jj_$0FNm+aVbTU4(=igN_D;fX3J~lkU(Ca75N8Col@kNM#6u8o zD~lV17q8Ozp~W-P$Av3A;=u|3xWHqHhrA2L*wK$`{+2H<*x1Ko(p^ig__&b7tB4b! zlpt_XfO6w*4Y*oi;Y{ZS;;#&4j!1Wf6YD-c*1y==iA69G=G@Jxc83HPY4|d)8HqMeIjtS@jNF*kvUlS zz(_59Lg?LxzxRj=HZC~3ev>CY5g2xjwMhXfUc=HYE4|Mt-pVIp55E5OHm$tA_gkTh(axhYej*AMUsV*Gl?W3X|A%)o{mut?UqYqO~)< z;M^iL;l6fxsCCs7sj@<@e?n)EQ}x^AviWXMCc!fh~T zTk6&P72k8)**uCiwxcwxRY>-SH1W^*PI^ex7RPbuqlwxYeJGF!O%HK4_E5~W%W?`O z&pCTsMdml!G&?Sc#Q}tB6Dm_+i#e&}0pCdv3H5h{G_dlJ)IuXU-JuCpIiMQ9l?@_UbeAHHER9F|YLH04Hb|hAa1Otz#+Gqc zp1|OO%-EL7J3D0ip%?#8XSKNVxp>lP`gc?u@gaN7{yV0>;CdbT??7DLUk1*Cp345+{m<_psU&ILvkVBRP_~t(y!$(_w%yK3NBM= z{dcSzKR!6Y+gY8N@h3_rtHa&yX6q=SK}iSzhYSxar7;~raVy)ngNJXn5y7C;Mk*0) zWzG@aOxvme32!^u!2e66b+b&GSLtz9yNW5@5D(Uzvz_0Kup$6CVAakvjfOZ%+! zuT~NaxNi)5JgcNX=ZTgsmRH!MDm$%gW&PWLRiwJb;FH|J(}Ib_*QzRwSK#+pP3bU3 zs%ICL;iWWMv4C8F5f2gEc?s?tZndUBA&0UxRXOfaKGs`8(Y^>X*UG-SVh9hicEWbX zL`VJNImMW|*)L~DXo@6t^@FdkN}+Ku$iZCmK=Ydk92OsLOl{cG>U9fZe`#z9rYj?D ztJ<79g|!|1KgSf?!9BBeML)gvfhhw?8>sL-Ql_~HnX0?X{ z&r=Ww`Zt2DG?eB+#HLZ2Ll#|Bfk8qHiMEzt4xx6xuBJ}mzq*yZcZC5TWBcJi9d1Lt zubu2V!A@QAqAP&)rHwX9_Spju$K2PLQQ11J%VMC+iaWgBd{`Hrzf?J7IBG_tdMIO$fE1Cz|-Dm3-QUF>51wJi7luprkpj=2mO*iz3I z*c9#T`33K6C;Q;RlFwOJrSLp&z1{AtK$i7$Qo`X8V!s@TJ3o)`m7A)!@dR5*0NM9l z^ftDXQQUpIZ42PW=jp5MsHbr$+oy$!OX#$Yd^#rhCZt89`S*FAg=3$N&GJtpuTN`j zI1_e`?G9<6(#SF}kQ9(&@D7_$(uvl|PYeC*&&RE-53X@Irj<_%<NM&G)C zx>y*AMGlyo%i7r`o{@gWbyN;Q5(|ayr(;F)-sxqHec-;Ye_wu+4YIAlXKhpUiuwvd z=;5bhDNDu4w)9n;pSg@V^>%j39f0i!(&PoZj6Mjwg!k zGNuns8!o2ekwD_c=NGK>Nc8n)0);&iSSA7(s;BxFY3QW>r5CV%1Rs;`vmWZ8@`&aT zQbs;>rv3?Ibi77;**dZC{Mp)nL}xr&dm--~WcFzV;oVFq zsFd3}3kj9s;(xbQm3C6mY65ct`n9euz#0(?xbc}-PTU1tC#$N14cR$sW?y08 zhKKky6~tR8eB^ZjBrc@2@_)bmt*sB}E8jhW1}0H#P=+F>_SAhAnD6_(vX~Uui-YU| zd_mjHdHiY+7i*S)D{^cVkub z*+9$jCC==z&&CAa`Z6n2L_P#cDnb}<$b`=d#XPqd zSjwnH+fePjc;BLJslAg|Y0To^mo)IPchbrpY2jVD|~WpLC4-D<15Z6JNfw&OHh%Pyie z<$Gye#TLHxacikqabO-(hIaPjB$r1gtFS=alX_R5d+|8qw_Q&q!?AKdYa!>gfkt*+ z&vaAZ!}iIBYIcct?^br(Ih^bfW!!ShEw-bKd7gPpjI}i}Q@puH+M0D@X}f*RXgiPw zkz-?RUHu%!?n9EyLiJEOe85U7SlnK*@wRpuBo8-ETbGH9Y&qU0voTjbF1K||pXaU) zMd7S@f#bub1@VA;kxk&op$4)F%ix^9Z|m*Ao<|^~M-6=i`|?3sZO2-|^HNg)!cl(0 zg1xt%&B4NiN00-b;+6$cTip!DfE_`_XR>3zE2zhhSug7&)kCJIzYn8gk|T88>&+~J zKpp+y%$VX7-pYAdc3W?CATF^kv((q)gp6SCF)itu&u+v_BRJR@B#Y2aKwb#tD=p5PFqgs3D z6T0wb{ZTErnRA4+4g1rUMzeR1wKXwkIT$+nlZDsSpRXrKOD|{3T|7W1Szm=NU$Q}V z$RF>vnUe@&m$4Djmm<2^R`$$g(%Y)!oHxic6+<-ayWBpXrwU7U zz+)Qwyhi2*>zHm7pBFA%qHHBqT0(ba{PWr@VN>CnJrl~C_1j8*-I?BPQi*PMDF93OA@U-<4$i7_<$yqV9 z@oZBS!Ecf4lr5EV^#RV5?W_aUg6aNFmh*?BeL=HJ6fN!)EUrfK58l-mAYpl% zenDt*P^_%6F9=QNoCR8ZArPA8{iQExGT6QIxG=HtFT^T9c?hH_U2q)Rq|)Gd$;$cv z-5{ruE#d3eIdjv>f}{p-N?%0aY&A7nC@^jP3$d9GOj|=`@Ei?ckiA48xMvFOxDTTuL%XRZWaT*T=qRRc?gR~>nmNHfrB$h z$hDU$!R|v431hR}ikJ%R!3#?9-P zFUABP*>9C>goi&5_vtKiBbxJ7HFzA!SxrftMp-?Z!=w})MQbR>9p9~HV?hC-E z964ZX*$yl;w2#(Rh;?W!6}sd*u&wl>d!y~B0wQ@><<`}p30&06u>=?VqV@BWQ_3CD z4g}%aTU`g}G3h(_IUD+}Fsz1xGmA2vO7d<;izL%~ov)JsUB}dA3tti{-0JVLjeJR{ zKqYgQRXF;km`GS8$WOl%xZL<8C)}3;mop(;E9s-Sd zJZj>#Rs^H<{)>Q*AyE$HZ*;>2{ z`}sj&5Z*$XQyM?ZGTAzqgas6A>YY~fAp-` zm$k5Q!($^~4kWm81bhU25R%LHQu+2>6poeN)nTh z<4z^#A^n=!Nz^Ia|jSpf;Bwbb3*iYGaG zdI_FY2z6-v{EVn~kJ}&zJ2hpSY5^L<1Jn)$0$wN>QX1c?9C>Q_Tu2Uf^g$p(2sG;i zdaxV7z_=6WaXBWBqfX2%$~ZRG$^FKpqcp4EZ0?t!-5;EmEh2q_jg%aJt$sVwT`N|AlSOufqhWOy-px5s_V$< z=YWpEt-(QbmOa9<*zClDdqv5HIXnB@ebkmduzi8X-p(FyYPBY-dkH5@o5Hdn+sHcIXuWT|sS*J(NJ1O$hs{O3k6uRC_#D2L5R{=dy)a#aC{q z>D!Om>f>4&_3%@H`IqX?9!g>dS6l-slrCr58c!n3CsEU?`pl#6_GAp04^rVCnvR#`DYLyPUU) z{6Z0pugNE3(n74pbJEIciDeF@@PxK2%1#6r#YSK{`^34=DW}GZy)#x*a^pF$^F*95 zoWr{|n~j9%g49*DovfT@f`cr@$&X;%dEmIME4k0zkUS9!3pQ)}iC9?hR4%xy!cNwm zt8+$o)lv5;B$p~Z6)xr$IA>8x$Swp$rw(F|b|CFbHZvFR{5Dkj#gA@0W?T8chXmeM zE!VNXOn!+lkViU2k9zLCDFtw47;hIzVXvk|Al zYS|WNzSWiH20LMm{J)O#j@1O|8ty?)Hr3#4xZz(@Nmq}vb@le>thJOn4j1UFA`o09 z#2NhHEBhOCNjJOaYUnPl>Se@MWuHh4Ya8)0uWE zzN$1Hk6k4O%5sR~}D)2>FJ00b@cAzs*x~g^a|NSr4Q%gK!5#{;Wr;_LbLZG8-Dk#h0ws=z? z+}~{bp=RWj7=#H6gxtgeFuZVWtDuYjc`RkELCZ}gp2|hcMw&mxwt6aN&!hE@J{5>S z+74wq_LQz1e^Swn;wj-!V%SmwVQ%As8-FSg1v3oNAxpZFH~jCX0&RG`Evrm%ynMha zYNAo;{ zZ;n2#-Ebqm4abxY2c^Z++CO+K!By7N(oGFplLWjej;Hyu^ zir5e5Av0<`|ERvE>f8jjuH+JefL0btOYnAf>$n56j+(jlF&={5r*|K{uE!IPQtM@9 z;;eWGfWC_3*bm29nFVud4jPNqY+FhA2)3i7TM}4TqjOO_ zH0Ve-u;rT3P?Y z#(b8QUdd8n4`tK&h3JYau&O#ZgR15KbK!Zq-qjK+GqOhJ9E079(soyiXt3tmJ}QQ< z@7ruGGdQlHy|d2IFzIe-=zC7$rX3J%HG1n!)=?d2#lrwk0Eb<3zNgXu>n*ofU&%9d z0&58@SDNcvZf1qb657neuGXoW=_5V2Ej4oW6}Fvy;jzs5x|3hBK(?>NI3ZL}^-+4y z-opoM^lKU=K723+`esYC zJsfLT9W_;RVeX8rWu8>FnWXg`8T2KOiq_Y(LS6jAI%}&P?s}}0mFBoZ-A8FA13^Vr zN&n-0WzD)8?;r;$niy(aZ6$wfsFF1#OtzK7$0iW3bJ-6K@+);vUrbE&7*1m=NrR(&RR=RNmWO?@c#)UBT2xJ%w@4VAe4u&pXp z{FgOVHbQ3Zc4Y1X)-USf8PqA0uP8c`{#$%KIF8yC-&&JG#w(0XW z@~qH<{S_~!X9E{|IqAlpjmcpV7taPV_wPA=z)C9MPWr}`y!GZ=Z9))vL^x4D`E2yo zopKOI-w+GW<)Qz6?vwHS;1PwANrGHJOgs6U zrWdIND~k5YnR$f6rLvpmMww3F%RG76F@gu*91dSioq@ep&k9p;_ZrV>t?sMgx*oS$ z%DVbr_q@)U>N6}jTT|WJZnbr#rWURBoVF_N3n4dn$7bj83kFrN+XMsYW(JxK+&-B> z01E4?fft|SIi$a_Kf&`tc8ORNZrn9gsVR3@|D0TWIH`HvbbLGK2Ek~Wp$HakCy6TX ze4cEl@K2j*r$9%ak7;Lvvb4sYk2zq*;t^M<+$ER1m$J@Ym=fd9Yrt-j;*N7BGS4;A zCNq7JcT4&Cz;$u!tyXzHa1HhN0$Y7LGycbaT2<{A(EMQL`B*S#VKQX5T;6SsEPnKu zt!A=}(RkM6q=8;jv+fqy)|G}s2!Wm5!PSPf6DCnHc>^iqh`(>U2?=)1`>dC>UDK|w z)=nb99_a5GCnz`dca1PS!{$@Or|@(3yjF0;&LQL46JyuF6;q+(x1tDmk zBZxQRgfbE0K$TCbP8HXU`WteYv%d0z7KrcY=@;~FrrlF8HeF^?RW|+Lj8ppR8`vB5 z7c`t8iJiOI$UZ~(y2a9}283S=H)Uz+3$MdI@-?-OrmG)v0(#6TgP6`46V3xeJ8I4qQDuT4YCh&e!1FaRz$3bVORE+*PKi1wszsT zP3LkAINO2)A%g93);#Vj&c3cWKDok-aY2uKJ-W8>2)EJK^N@&x7S_L<@@H*A(2Bd@-(}%BZqTyQtRrMcU)Q>Em+3bBb={5rsQQRi z6Vg2JYhTwII)j2dBdjZnh$g7mD~)EzysZiv?hL?xGeL(}i#2s+k)^z@3bi@ztd`Oc zIcRRHyh}A95|n|y7Hqrh7LJr5JA3-#hTC`oJC*Dhq@eo<7-XB5+;`ZfU~D{$Bc5Tl zkEnku=Mlz7xT)G!9Vf=2;{?4}Jb`JYSuZ9eonMUfV#2ZSL_Me%^QH)9^x~+6NdzByYHSAPJ>OA!v!rXV3@siIVkqx>L096L`%v!Di44=-w--OO_l8m zZ~U8q8jffi`DRQ$Htg;aV)UE2qwWoTr?LEW(GhV&3RQzaVkK28ICI$!jQO&aRhPr{0&kAWH)B09K+vajah&B-O_;ElTx5XH8o}h* z;|$}kT3s;xhDR|k8{m}+Z%+-lAdHKrsb0?@{9jWk*Er#D)c88n+YcSsjKO2aTeA%p#N*vfk0-NLr> zi@3}nc}E>92ti>Rfhg>kY-B@INY{yqvmre3XywsjV;h=(NEWU-r00bSWAVVml{Ny! z1yQddl*d&y8m8pL#xAD93dkn)jqFV{X*ofYD<#g#M&K)WUDMf(V06PNSJt(6v65

56;F2}-{bHAgdxn{9BO{6-`0vk*XRlYjBo2!4q*$A zO_)dmL|`n8E)&oR2m+5Z)za`bp5j@oVu6#6{+_!))DI=+T~%BS5e~&oBKukOIqoM7 z^}k+bn>hy<0J|V>n4ioR)TwP{w=wPFMWy;zToEq;KM%3+KC|!WusRRxVB|YG!*06C zM)U6@E(rXN1_^=XE*gMVd?#l6PIOzpBgE#RZ{rE#hS%9dg5VCa$?wFhogzv%6lYn5 zR{TAs4E&Y*D`q;YOu@FRW_m9tcul1W7`lEBeJ8MZC}I#dGRtqT6Sk^Kx4ZADrivA@ z7S~h^i>Z0sMy_Wuw?u9!4Q&MLRaIbfp(@qM`iI;)y_+BSr-gXPkNr?e*4JFT{1zK% zSk5uqyxuG4hYZ!koU>K7a)OR>;V9`cmK~MAykyqcqY>-lmRnwBBTBo%zK!;^@(%KH zM{B6B%QY9N%Y2Hgi20(Gl*XACoTI*0{glffC$i8KBF)K6&oZX6(wAIkwbBpt9yn~% z{n&IKhBx>T>O#7Hul2PqoXO&j8PV)g@xlX)KwmccFOIsSmVfr~QiQ@=&5^`qXHB*I zpD(RGWNZCcP6vsh6Ai9sNxIIHqt(}$a6P--*Mj}eS6L?qxz}w3ySojzu88%r8v7uO zK&`J9msW5_KgfEQ+y~jF>Ot)_U30P@er6Oh`=2ozw^Tz8&9((<<59dy?x^+wbaWtt z?Yr6;u9V}tkR#s>wE9V5qu=O<@e2TYoh8A&Q zx06}?zwsH^7T*)9VEc#kVB~w+FyNqLqxrEHkBBz*y}*+@oGZR39P%DL%?2QHvhrX^ z+qmlRg64pjNC3nTS3Iv!|`o1<%a0n0& zem{`7g1hpG?`xeCNeYIHFOx}S&Yeff@%DXJ>V^#END zzLq5qN&Hn2s{VH?|K|61i5qfKzGt;(us<-k1L zLtAN9ACg?n5&mVn0(>XbNG}WTb&f>#V;}g*_BekC(+<$hEVplJ%7z+tfwOo9vz1-; z&}_D?*@w%h@T5D54JI}Os`dkI99-GLl=y+}F}EBV{ek8=2(eRfe`V2G*JUq$5Lk6# zD37iut@HzJGakv~KL{kifODE{LIv>`F4tC*Dp2Ws7laI9w&HOmDs3& z;AF0<6c0lnFI4^0Bf1E3^$9TPS9{; z#Buh5wGsx7Wx!Q_Fz0Xqvmhkc{hTq~%sUz{T7i!U%l#bh9zw(*=CFbKcLFwGLVz{t zMwLTl96>)Khj5E+3(yf)FW*T(ZbI$L+Qde)Yu^$Hhj<`Nd zp@07sFHfqt%M2b*IOk@#vTHW+!@w5$obXF+GBfX3T$_3BXyRAp9tBMs#VvD#~hS&K(S~ZYtUA za)x~Sk*LmiT|O7PtD4mhgwKpxe!1NxFDdn)m}L(&vTMM_?M7xG3AARW33zO4S!ns( z#v`__jM~OioO)Tv$;$SRG*{s%$wLfUCtH<{WcNq0&iwR+bv9?c-Ig01HGPfy=Kr+8 zk7D`Fa!hV!2?#Z)b^Y�$Vw296n2n?qhR1yKx9QeMjlD?#14I98-;q1{Ibc2i#)t zB^&*5^u$fCzrhWi3noQFEWqVvaEzdo9{WH&P#);0W=VRpXEJ-4O)-p;J^#!`m6v$h>Ikk43y z_3^*+I&nT4jM+i3LGC7;^ghWEoP#;4B6wLj1MN4^K5r~$|*3;vYJ}; z2wnajs}EwOr1JIiFuWQAZBQ+hn5k6_0ym=&9ZmIZ(fR1vN(E>9ShuScSS|1=a%>)Y z0VfGqu} z@1S71GD#7itnf;f;7#TU5g2Kh4GPV!h_3dNA>qbme1I=nCC0p zWGdjYM=M>^RygQ>{Kr)q(T`0ic^T)Slh?Et-K+|&U5HSIPk<*pR$5a9i!<26&skcX@MxFAQ7r+=;-ZtTJ0yo7u#0xbL+}O|HB$e-pa|gnvL=(M#Z72X53F8 zo9idB;Hs~%^@QbDrVDnhB>-pXb^9k`zV4kz^WnNIpd&y4Ws7%JdIlCvPyf%O9l|}9 zU>CUN!WH5mhf*iX#-=J2g9mM>IY-kBk-;f+&{YgC1RRb^Ut4EI?r1FKIk&_$`>Bqw zXCUK$8n|$41RMRS)+*RzRgYzk+ldrPb7c_&#x@cqjP{Me$03$Oe%HeRYKx_-sW~B_2N9>o+bckx`NU4HI|}|o(vB>MmNYg*o%B%Q$Qhqn8%?$W^iu7+v}&HaE3@7 zInb`iXIsGhxOIsx7w3)!>Z%TKjO=HDBUZjUyBPUdOd6Q?>r5ZS^oB<<_Oo2$i>EHQ z0<;xB6QZ5LQ~Fu%w527tZ6E(xEQbrw!4p}^!zxb-p0xeSnQ@rOt|&d@&X%V0f1>YL z^=HD`T~<>si#_LHAz58zj=OV#Ms{-W5LyGX<5qZbMWA8JWKLeewJC95DetVHkp-fM?VQhyE0D0KdPuck41Lw z^v^X}h{Z$VQO#QDZPC-(&$STtL*&*o=LBy9n1BKT&k>JqtNMT-a}Ja0z+4A0N{5hT zJ?r0r>2REW5qN++g|PTV+!X&buiSh2c66Y#HE_cTfsTx7AdiNgS32xm8`&_&Hq2XP z6wou)RC?o4=6o%C;4egMMVwc-o_!2{#*^0ig^rAP4!MY=t?F^pcd~34Zlc+wuDJmQ zqWYTka+;{iGVNz!_KZ_KgI|cs@@O3Ll)AY)kA%%|Q^sMA08JE#LLsR*io?nT%L$GB z+D?`_=mrP&OZ67JjZSb`Mt&JMJP!Z;ms;UR*cf9Myl&PC}Y7HBEu{lfZ9FThq8%oK7UNAJ=~wYr44ofVC3P zGKS!2S!fUvsCKfKcRzN=c0>ekDwy8AH$U4223Lunf**S87N{G@H>(9d%o4s)8^ z^QLWO26U%w|57g(-obDYo^>nj%ynPgwkgyR>16)g8Se6t%^1SQ6L5StwN`db+t5+l z)JAomHCCvka{)`6f#pTNg~qcM;{23VO{ie;x|iGJ=I+6Z%`0Z~s>d3Fz|Sm?m*|(7 zDlN>+T6Hs4Qw_IB`tyiC$P(y9=tSjD?X%S!4Fog$m!Q;nx*~*0j{L^20tHBk=5T4Ss)E7b@Tj+S)GCKLA}D^_Qn%6ZM!U_nv$h+2+0L(Gi-hW&R)PImTjZekX5`l~7w%pt==ITG z2in{@rj7kt2*4ewQaAsHm6SS#zv~3!u83?x)$l}#6ABf19qR4*FEor*ew_yu(Vy@F zIQ{FG=nT|Q&CF%u^77)Ssoot^NXF{wAgmv0t2KVD0}Ph(YG(PGb)vPYtobv@OtN#c z_@A)#tg`2XwXz|OaaTTQtC(}m;rQ-$!Y!|m@+Sz~T*L8A5F7VeUp;qfn^FS}7J_6` z|FiN|SmQ(0Mpg|cWJ_5E>`vc732Xn1?I;OL$?P{V@%|>Tk>6;H7;22_@4KAgGxnRj zS{e}Ta#36SMn}fXrHB))dMj-H@!x2dIFC1;!4p~1<45r*PEhbF5Z16M;AN=5KK3Eh zVkNtC9-hy%a`5!&Z;I+~bo=c=DB*^>wX7$DiTZEEnAnXe>m2sRZ(<#tfIpvgImvnB zqUNqvpwiCTT6XXxcjk<(|3-`S5c(_DQn5vMP105dOd}R|M+2V4z^R)9b}t6;xbNj3 zp^oEL_Wgw7MDsvtyne&b5RlT%udrdZitg2uwxw<_!fHx<9JpvZs))-4SWUmx|;F$;dLeUdX9l$6>!)Z zs({TZI=5=bIr)JqeNJ&3`R7{JJIp%duF86D*V9rb!cg8VI@-UDm55>$XTLiX>||9J zQlRCeq2W-*dKwFB1CpnoQ*i|~5gR0YTC+{1?_5E*W2jC+io`2$E9b^vi1cJT8{$v_ zciJj!Dqh-5{OIojlO7dqEX(bO5GgW^-GY^V7pRY$;cGUY zIqt4^BGae2VkdtW3x8(D%D;;haV(CviaszZgGQPDop5&YGLL?4vugIn1K2nx)N1OD z52&uP|CT>%WLpQh`Bs0I%d8fdZ009ap!1p5RFbQ(W$WsDad6JrYc1s<%0iP+9V<>@ z_iP=tdzP0$S2YkGBFVAdb6Nv4u=h&`=!0{cpBYgTBP~K^8mWoWQN@_yLaA zZM9QWa9EmuS15144-6f1HtWdmbsoMNwuSz7{*jF-J?Gr{;`cF)*B-zC&Hc`ocFVDp z#(%GU%3;inH}U&Kx4#mc#GCxRW=;%QEB`)_R(gX~vL4hFP^JZukg+IW{9Y&VA&(!8 z&jgrIHC21_o2{Pk5LR5MLHlLB3oMA$-|JL!?!@8?yB*zaO_3Ev1P( ziOe|vKY{LOM{wfhY+beIOmb%w^|IGIBACYh?{yx_5hbT4C(=M&tsJ(^-Tr(KL=Llo zy=;LkrLTRIb8uVfm2)(_lVig!-KA;v2hA*Trpz3KWNhNvjaJ0r9bHQyKn_`{UH!=^%k2*@DcdhCU?IUS7y1c{vb3G*o^xPPIg7$ zA^q@qJ)Lc#?>A@FtO)Ono2{mno%nXQJ$1Fb;t^yTswH=$s|ljw8p2JLDh|UiS^Gn* z#yZx2>wnPZ4IjPWsHK+ZkP{8ZzGgU!A1Kj+H^Hd*My)2i{Sf`Np|C9`Ei+wL|B3~408|daI23yVa|~Eh zAA3;-g}g{;b!>>&)jNa*{-S9OwG=%tG%xD5=H%8Jt&`mj4F$U0Rhu|PvtG7CY`b~u z3p6JvD%48D**j;O+2kQEk6{kL%{wr(;t0$?TyHciY__vC?3S>71Wl^VlZSowM@{>| z;PH<9F>tf-FlfpQ=hJXo{c(3sBrCg6$(vsPaw}!Y89Y+r$l{NI_`7&psmN*iJE>%W z9KNj5-0dsui%P-;e;AE06VqAp#y42?k6JkAJ?7W1Ws8$s zMcwQe`l{R(7$Rg{w051}(N}v*xbqvR7Sda|x(VTkGb}Rg%>OO*i8tfD+%DT1B$g{o z+m4EbcWHYm&oMxaC|w+5-;BPbqY)?k_%%6+5#-oQfyCnYQ7c{w>`E?dC1uPLXJb6e z;6iueB~2^ds8AeyNmJ>@cD|id?4%!pS&aBP#8-VOko?+LSXCv^thQSAY8H~au7>?K zLi@X+x-vdnRox2>Yi5Om%*dK5;N#t8s_TL}L%fLAOIiiIC%9Cxz|VrW!(*MXjvyb6 z@YP+xLuUX+V73A~SE;N2VS^kTI#f=dY-aNZVCfB2XwTiYmH$?&Roi|^s~*cC#X;K1 zNpJz8*`MO7_NVs>Y$Ct^M*gG&%}q~6|D>I@C(MG!{-jwx=sMQLpJMu6cC(dKh)3oO zjG1v&chv-&_>(Z<3@EsHlge6}N4Qr0lMuY|h*gx_bG&TRe^O_0Wk3X@bEmAD9pLE> zg;&e2xeJQAY7zI=8d(T_HgwNw_R=FqSW`Wa?kz@*nTO{=t|vhYf3ht>JBA5=TVGRvp)lR~`5d(Jnv#v=_7r3HQ2N?OY<`tm^VW4dE&$^AC<2L%U5Jo5v2&5v* z#X5t3^k-rAIJDMyW=%2EkRlU*7Rg4y4m>k)wqT0-=UBAyKy%u8p{j<&h*AH;TPTa_4%>}gb zpeyvniHPE(t^av~#aATXw*|&o650uJKIdfQKZRMD*-ecQpE_`?GB&g6}`oE%2V@kR8vGFfe!wE31G7$e<^) z9wfU*D=q&ecIwpKR#CgYl&AlqowkoE?0&G8mAEZjN?rSlj;!FBIpVT8&_e{T{J3KU z|JAEm5tC3?b86=26wd+{#+r({E*5%E>d7lh*3xWXoXb6xHGHKx4X93XwOUu5^d-St z&o^CG^M^@4$2_~}E(-?AL2-veQEg&)PJf2iXYI*r_t{o{J`_E+J{{lJd=Rg(dM$o}S;r98Jxx3SE_g*hTJNcX=AgM&};Xej+vv%EMP17O?f zkAGft6Mqe)Zafn~x+k+TS~h4r<%hqNtfD3toe-Z^9h^RU(Anx=dV(WWaMfVjG%kXEO;2I3ohYbW0js*CPu_x|5x6co5NX4BXLKw z=W1?%*G@5_F+noS6UP-%E2glx@G4Z>dp>Jfx6(pQ@94YWB@)6Tj3r&e5Z2x~c!gVmF?l6N zjI|Zh{qO&7V_SJB`JA!03l$_1L2!;rTe{yct+DZ~K-X3GqP3-)1rGj#^fV`xc?t@r zyd@;Ne_`av`oay3@0985o$_b0>J;C4kJa*jE&^;;)w}(Btg$5w%h4w%)M~;$ICG9S zeN$h1S5CPu_FCqh439nQ0)Fc)w^%EIPvP~!+SglR1$J_HuV&eH)#^!(L={=2367?QAh#g*%#G_X*&Kiv3+nkqa97 zT_1;i|J&%_HFRw%M0#V&Mg8YdF~cWV0Yu}YR#FpB4Hsj3!|!@AY}9vDBv;N;)Z>VMakWk0D} z*wWDR)r85rk`>z&km48=(wds)0t9*Me~*c}qZYGuL2lgNg%kD-0gQ9<`75y?+D(&{2ip_=hHu1rB$A>>t{9u%A)X&5y-8jMY+pK(}F!uMbAZ z2{Q2y?ZRZH*rcGtF~u5Rd7K$;RQ;q?{t-xWH%Bf+npU5dIi9KlSqn~CEjxABg0HiB z0=(^IFSACbFG9|){zEsN6qVAoKr?&eVd3XooiJN;HDY&NqWUjByz!W|{t=5{5p9et zYq_=6|YbBLv3Ue z=iohln5MDkBoD>$^GMGHtN5WdH9{YaLHpdOvLEU!N(Va_F&ovY03Xg2{!laFsugs( zt_aI7%mE8=Pr41jdZ^KMxPaUb)1bZBWu5vl!@Fwt57Q!ITs*|-;19Kk@T`-!LIB25 zpiQY5hwDgwz@}AfICXa>&~cS5$Z%H4qp*Pfq&_DgNfb06KzJ2{FI@=m4K%g*LoG!5 zlTUAwk=Jn!4NxKLrbcT&)aDeAJ4m4d-)27#Hv}HHE@@X32$*W#^#fcJ%V9A^2`Sg(YCd}wHMsG*sfCU1fDa(ew5|}eLBSP`p(>sP5F;hXK2KqY%6}GX)HJo zlSP(7WLHNPw*GL0%JQJm)T&bo@0`w7*GIvmecci;271mS#K@^(?zCK+f28?m`dt;e z`$uV_xs%!8k2LfhZb!!j4Z`|SS{zMHHl?r6e%7Xcq$S8}i2N|k2tw9I(3U{Rxpm$k z10G7ZDDwGIpI1@0AzcUrQ6WJgU@=m|1bAnz0>!qx$62-;(J?N_T*1^8UPm#MjPWGE zg>3|cV=2(&rW!iORbWe>r`dFN0`;Vd<&SW?KN2+$_@0bDp#50q*8nf?@4z}UWnRfotK|X{h)REwCe3Bk zocNf4|K!15%e4u{sp2P^MJ6EA=lE${%KStGM_?MR`L#buQ|C;*ds(YfW-#^jKhfNe zxi3gsZ*!*R(@niQnK9zMgWqhL~pvD%&3 zcaT7&ajH{xl9m4ltKW%(&=sCqcG42@ETol-mJ=oiJ_kpkT50ItwClpB#8bR-^BV>#FCg$gc>tA&8Bmwiyt|&!D&i{cWk5 z+hU@Mwxe}|oYXz_aOCaAG~s^OeyTC{8;zn}{#0|tLnUIja;4gNa)OJg_^GIb*LKT( znx^^~zO$--nkF$LuJ|OASgT|{e0JBVAPdgw12%4rIK5f~4gc#Hi2pPVPOif`f2s`{ zr31Km+@Jkavy9Ik;v+>%DBk!2EeHbY1~?rI*aY!mnNKSRo|K(X-#l$1ZPGV)d6cQ+ z`!Mb%=Y?c;q|p|Bs^gvwE*Zx8U13q#lW4UqM=ezD?M9#jZMzZKS`i?RK;Rv3RU_uO@})u zD39+k=h}a!-S9keLVXhX#j5procF=npJ|NYjFPmfE-D{&Q>~wA{_(=Z^HPg)c!M0Z z+|M)<=iJ`9Q_#@Zx9*>%xtly?gDSyVB2GZzXIfeKz?avmsh???@PsqLQ9d1w;$gyO zl!b|T+Q()C2Q(A8BhE#^UQ`z6BM130!?X}hpjVB@CAf{nXaWOl-hx%#=W$8|ruO!W%y;>I*oxFZUyx>T8p`oGxO9upFi7Xl~Rej@M$9>p6qH z*3Z*eVU`3nmi}BrN~};e>GGckwXm}|O+~UF&2Y+QQT7<`v$)a!xyI0)vLyzq_UD@N z9D%#))JBF^xo=zj&(oX_onp@PnKhob+|RYNit=~f0Xw6C9_$+t>k8iLH59o90~1#o z=e||=d74&2o=-(XXI#uO9Vkf_$QC{m&GC9UtIzL$6~0}7c~8Rhr- zLm4CN8nA%TJll}(d^aM=DVY3i{anW=k4A6dMgmR-SnC1!oJ(pp0=*xo;YIN_q7cCjchQ!8(@?Q_qDeCpu%K(N;QUi@Z^49#CseGj?JhF{66PbWZ=N@ zw9Q2mFwyh+0B48os76D_(G$mQH|7I1&j~MQzew}Z4E>hs)0NT6f1z0`UhpfF zW;75Wut8_P5Fz&AGz{63Dh}b|qE?>=T#g&RVwqq63ytQnJaSu}_vTrKE1nWmqL&;&P>yvQwzmEb4pGVJ!=AL((|8Axz^fbS*~1{gGe+85o>) zE9VXMuA#lzHUkAVbvF`5-u?@*6Ait{P$mZ$i=C+Y63mC~1|ki-tJ^PAN7@^#^p{$& z$6Pb_ms*Ovp>Pe3jKe&RWxq`0-GFQ0QSRN5*hou zr2R_sfPpgVol3a*KDXarrP*5jM@$?lekGb>CnrK90>EFT;m$hFS@o~d^b)_I_E#b- zX_wTgI$D~Tq$gXUHes9sG~$SaIL0wL18`HhVUC;9DNC3KdfpT-%yT#9c{B6Au*aBJaz-4 zHutV?yBh8Yj9{g^X)4myy3thGg(7RZ6sl|h7kyILv7k^{+mnjsx=LbQVjKGG!y)u$^z)Tb+u9Z7>-OSY z!;TsnauMrpG}?=V)qb5u9MK{pSo&*?_%UMk{aRZcK9{XR-{+D~tzRd|;&B0ty9+^p zRJobd{#pZ*<92}P=+|k8`=_k_*IE#mG$j_dCDJd$v-m2{sR=d^R04mk;Y=(89y?Vu z9e+;ngF^BPEX6`Vaf>h4pPN$Ffde%*9azDIvDNAKp3~S`sY*xjX>U$hgqgDW=ra1e zwh%ZVP4xA@s9L^>ramtVdIm?z{pr^sSta5R%D9Od)>MEu6X(s!X(cJK=!Gfq&an!A?E;7amdzrU-WsIJ~{g2=~_XroRQtTCe+wTlq zlIDspu_n%qoeVETsA|Ey5Y zUghAOQx1ga!oT^*DX3_ z42Hjp=8y0|ofFt(?`oaJHkg0w4qW*5P;P_2O=|~BTr2!GEi%_7)>5AOt?n$`Bi-*Q z#}yZY>EEUm_Qfij5mEV;XQeS+s6MopE$gHlthv#%o&-I*lu-9rd!I4aaEF ze|8S+QqA_%N7#cTgiJ(Ux|b#r<`|w_->593#k8k+9G^_$#%wQ5mp_|8)72`Nd7Poz zsML3gt#B_*6PzFNdrq`^6>nWeKDV6R)z%#A(P`g5D0hVJXkk%`?%?elwDm@n<@Wj<`^UBDMW4E=SZ{#58 zLklg`8+hHa<$&gIS1^n>0>bztYQbwt3?)CpXqT8sHVb(?-c{5dBClota8z zuW(=bT^j7w?~EYds#U^$z2isWcX39c3+>x>yExuV=gma1CPcflf|%4D+$YDJN}iD8 zKjGnzqBnCa%UpwDh1}UmDv>vZsuaY*UpD(BBG<;Ye(b#jO%21WZ@6_YgY05H0UTW zv$rq_+3(Z*5~k;CR{eWz%&6!~DH)Av)nyg4gz8k57cky^^_?ANBwF+hFNT&<{Qf#C zsq^=7LUO+qoS_M~t{D8iripl|KJSIVFUM(aQ$jsG+afj+G(Rb9COYMAnmDbB?XxE4 z)DByk&BxD28T^Il4*i~X*%Z1fi@(=GefvM8mHX$mETDS)wHq3oBr5-l^?$LofOdq_ zWIeDXQuzhjP>Z7&#%{*%c)>>bMG41S8g?#4IW`9USjwp+n`h%ve z!yV9;|3L(i-hhBsp-k!cWq;6|@^sA7u8w-I^zcYt`-cR&@u=1PA*9gn?5kF@`1+Fw|wz#L}li&9Ml2k`ldB(LYrnHPlr;ZdF^rv8wo_>BWqHvI>U zZE)~d$pyxo#R$!2mEsoRFy_>7hrb}(eAJ953SQ@xNKzS!f<0;&zOe zI}daZo@+uTdmdAS_-uU1HUw-s(bi^?Xz3&^_;^lkcVru zTkavg*JIsRMkVBzv(s+~N3<#QUVgnH^DPfL8(E4GT8J|}h4D7}5 z3@hjRf6l{ZA-a6(=2vV&-&q5uL{kso`u<`U18WQ!&e*gjed{5a+u`x5Wp8lN5ZHR5~4u%L38_TfZXwzx;yTcrZEKi8*p_s$Dnlfx%jfPPY!> zWbt++*1yV$wyuQGyQ74QQhIDd&19#C>~7%tG71x$`sQ6{%R@~*VnfX6dT z>AOUBNBnWc)k4eOrFDC>-O5#|XM}JY`{HMhma6(5d}qQc-<60m2|j%P$y6tM8TzPF zL2Rd)%jy8II&C$oli`6I6+wJhM^m!SXP$~6D2JAj>- zu>R-|EGkBQ(JCHf^%_3>u3L{fp}w;Eu6P2f_$Ez=K@0x>9^j-3B6ra zxk;QA+k8O7@DO5(1oz>ywxI44kBZPLc3tpLnz6;`pg&yelv|2=$9f=!<(L#?f85ws z)D|KL)bgSjCUX6DGZGki!eR@0Gv=4KRbkBMeO=!v^30t8J3yFV4Cn&~H+Dovf!*zH z!0YiB?ae5hCdKZ>0K~!hmTjrI44sMXC<(dw2^FaVP3G;snny^7h$+D1uY1_`ltFl| zg4w&ZQsrc-|0%0@cOtwY zkgHUgqwR-}Tc)4g`*T$^IUvP%Ww=UQjml@OaJaSaP6R1S98fu^VxOtsZ*@^D2k&}F z#WLhgtKWcD>xBvj8*;U#cWWv4BZ_u4KU91LB|F|=t$}nhkj%Y1?YHQ_Boh?Z$?hoT z|1GP&Gn(u`$j~JiNc62c8YsALU5`&|{i=mfncZqIknu^j;o{H^jrr3|A*%YuCdGvb zwZYDb{?)q^HK8|f@!i_kA_g3a`GHCUgLFyXys^RlzB?VTjiX3$PNJZZR@LPg>TyC7 zVb4_T(*FfTT!}<-#y+P$<=Rfz)qvj%{P`FzqM=_?@!_HT>VLC^D1*lbyRL5-S_D4o z0ab1vB}dh-Tm!?6yW^63xn(a(F)% zQP`bG!zS?H%P}P+h9n!byOGUZCxv0!jClx?&wFbBG)((ewD0G4+JLuJ)$h5hovKp;9S7*RaR_n`2mad1^T!oDwB$3t;WxbF@w?nP7(p2GVn)#1`0bU}o@@0|TH znny7hb0gqEb@#>3+VDeSiBZ)_wyge+Q;0UWP5Y@np~85`n<`F<$K=IO{GwXmE#0Oc z(yrl;p?C*j@3|DHp)1eEFrz+U6?n!jN7?@51;Y8<*%+J?y!~H^N#kOU^AELP6X0E?uYwxiy8@H}Vx3U*&@uYQMjfL*~Ky>J1+ z^`0`aMxOGmBvBO}X{@9~i+X(61B{j`Bacj?EXp7XEH6ovxB7LC@_qU_YsUmRs=B+D zajR6J9u8N_N<@I}1=p&Sk-4k&>XIOD3>AC<)Cff1@KC|uUrMwnxgxo?IQz;;I~X-s zT&z@XT#=^C0Coa)NcfDPUr}Eo8tKg^maZvEVz8g|6;j^SovKDhry)gk6Z8gCvFn6Ao2GMW|)P z7}C87Wx=mcCfVeavfe;v@}j_nKcwks`V<%FO98KW3auK(U?zsU`UlhVb~!THrOqcB z6PS#1hEw)R7+MYtxvg2J>Jzlwp zRD*bCQE6-pPo@odDUkQ5@5?1Rtzj26-HhNr`l$gGFWZeMIfT&irdsFC($)l!y#d8L zcD&nxHrozT!u3dD6pL)+PGFbByDP4v8OsWnZ#p2DlDn~1@YsJ2f!}5{+Xy3iPv4+} z?=vC$Qlo9FjY&O!>}bR%l1HNZf#?5(bJXXH|67;s3Cw`oIof-)GTrekjhDP94Z-I> zZ>8@^l$D{@Wo7RX(VRnaY~}AsWXb;jJ(RlkDdh@vjCMs7Y(Z*ol}fm$##5F>&dctw zy($u?CONID-y@1gDX``}qCxlG=wXCwqbYcQh&@Yw?gtWFt(j>+R{|Bx~oa!0K` zy2D-Bcl;)GB3;p2B5ORKVw$w7($FcSWAD*Pk>nr5s?S?TAb8lL|8y2BYLo7IkCvDm z-N2r9M?O+<-Z`v4l1=F>utBxccfQYtqkEk3$&R5Q>(VovQv1kfI4Uoy z7|ut~;t#|SZ!g=WNVsQLn~6>|c<6((IL@)lYPshOrcD9?fAeQ{MN|AbPPkRzG~v3h z1}NkzoMpuhqjJimUK85vMsn=E5Z!Q7tzi!5>oFPaNpE*CAi3z(meel3^LX3PcQ%1e z@JbAR|B&4XXhr8(H)GEK#R<>W1g$XX%7wQB!iYyaP=?G}?*zjBUJVIu#0({!ZfpSU zu9{oDhQvK!^h59W;yaH!fvp(66dZup$@8`yh!KYv|Cv~&3djYd`}d|s%(_z){sRoro&v3;uQ>WPRH zA5JvGb;v3Md<&;xrtjV%*y*Z(o}Q_6i>Owup59)oiH4G{__wY0;Y9YQ&)7kIUEtY9 zz*fIAFkpxD>+8){A7I^4f@{D!kqK<}WukgT3+z>`(E@r&Xj@qBVIAiEF2%BrNVyRO z1op6Gii)ei`8vQ(-u1Bd0wfAtR(M$uyf}cZ|KTFd618A;0h%4u01b?}tAb%6(t*xO zL4aK7%w6R#Y(o7)iwwt9s*?@4+~Q)i*gPWl6Vs8|=;p4MqC0S7#3Z$u81+H8h|7UW z@d)72B5E>K1-qiY!tncZ(&`m=UbU&Jn+*Vo01cCFYeC$@C+TZoZ5Jn4Jy^%!c5 z3l{^S_5YSMV3!__gD>qmyf`cehV5*zD+0m*Eb};WLqNvxd2myRj!wE~>a~D4;zNGk zZmZZR4A;6wCOxt)W1w;;VB?6)HdO5Bv1Yp)19+-uB#D(aqZmf?c26y-8(T^Y;UIL} zwgUkAR6D}6U+%T;t2z$d;1>fozo8S3XiotAE{HJkz3TcXq32VuzmoTA>7@60j4w;y zn+u+A79&^AsB;st_jF5xiGyp67E6O4i*zA09ajg!u=X0?TACl#s4B^OZ7sP zJKTsh>6^y_l)2TL>{aCS@#!w6R3dAbK&mLrkgEGLRA=NIX7}q-g$q>Z)+ahP*00a+ ze;scS8b;0?S?>?4GKvO$RtP}u_HaVqD6sIX_g-ya%_&reT@*AHgiY)7wT4!^6di{p zTd^6{?saD5axug8k!FjDtL)%c0=<|Mw(D>>DWDs?znFD`V_4dW^IS4tRD#Q$l$IsPZfcE`f_Nm9b@E0As|CvZB#&?-8zHZZ`zoWe9fjy=nVA z@rtbl(nzV!*zF>ggy}`KBHDGA;2u51?x@|NoBz9Qy!Y1oU0BO%VpF9#ZP<5XA^Szn zA8IyLBm2w?b}t(0Mf7M}YQO7g+m40WhXE*}0G8%X{L(z-5sF+yd0$XG@cFJfG438S z_XLr<8_}i+0qz3&@;> zM@2fQy!2F37l^OVv+W-eI!6y%y&BJ6%UhF5XFs^~HEW5VvEyjf&-)LcLX-^RJO|1Qt02aaZ;8SbosvqsRSj;Nn8NYk|`xIz7 zqUR8-^H+FJunlFyvLUB`FlxAauuVZhtq>}^r(y#x3ENVK{aJG?DbH?4d+A296N3$o z9?#=`B=i0{#sioJI@f4>G0<4)V6p{uJ(8#^YlhNF9!X?^Gr-bE5}l!T!J$^BLb<{iI z9CM9+Z=(~nkBErH`yW&)Yj$?e>Y~~2&)Xrjgq{_<)u`1AT6;Q>o0Ny1J2E_CRI1il zPM21F9&PWioceuY&^iL@Y2LkJVB9T8*CX0B#<@$LOZo%dg6k3(g3?;QlE{kz4gV)p z#iZD9AdXr;m*_%twq;_XU=wQZ6o!3MYHzHEW8k97ATi>goQ}rXM}`!z>ynCMgO5~0 z-@Ib|gGTNig-ozxlJJ=sSu4tfb`_Q({fpigI#(X~7jtpiTsO zT=LpYWum#^qNcULcbF;m&HZ+)rCc#C?|QV7m@c~`q#z(fIU50(cLiTIcLQOJeGw)q z;gPrUc25a6?t#`~RKLn={B{86E}I{)onlj{hH(4RP~bw|b^}R#yrwbSi}XMqu_>1j zdo&T({x_`T(L{1^;Sk!RiF%H9G+WuDiRPRE#<1bhwBk6RL;unCsTNBdQ@lqtfw-0G zJI%pb`Dh}P)}AxHaAKKYPO(>2DI>!b37P@b%0O;tHG+l{anyt#)jqv`HEAClj5_WW z6gpEX-7OK?3&Qiyf7a?B)kZ+HEqt=Fr8NnE!f_y4ZV4dt1+(tfV%fJs#V@B0G$CB! zmWz->a7WnJJB8V6?lz)J-~LQ)-GL<%Cs(fQk0D^JlV`(0r5o@skZjO~0~?eXImn`Y zlmXCcX9Z0ZfAAoL{xmmL>~vDeyclgYNPs>auzEfE(j}E_m!p=MM@1>=tz=}-YZd_bGqnJd`{3Ole@gDs+@Sqd949_CnY`%w66ssEPP(0BXQVc z9bVS}Vux#s`W<&ftoJ3Yx@VrU<>G*GZu#wRr8wlM460Gr`LUZZ5Cc&9h^;9Hxfv{E zwAbSAMeBl#Gm1M=A^GOEjp(rRA%H`K_-NV`l61Xz7HkG8$lTz1C+uE8e%+lqZ^a0` zqt#>E(S(bkPuh-ff^&iX1j!Y1Jz#+Cs#Bw>)jTG;BksKiP^ncC53anU@H(xe93~(F5j-S$c1X=!@Li=o zntX+|)fAZLxUr+yA1Jb6x2hAIPMohf{f_Rjbv&kR1aG$KouM;;9flj~ijKGAw$!>K z(K>Xp$NB^99HJ1m%0ac-maO1ySSh!nVDt)6ugzt!6VV#a4A3k(Q;%r_VMpU^U>Ccn z0*OQL>)5m~9b|=E3PAnX{@aYuOa`Ktm2oC%`Om7tIW7cOqALy1#$0^k+tscLKStHx z?&kw6YTl!EO~nu-;Z{k--u?;?8sD<(j|J!Iw-z12izy&vlNg;W@{;CTRav6Q7hkYy{frO~ST$vd+=o}MS#5wl!)|;~O|sy0 zr>zT6?R|si>`)}YHE;E0+5`%tu3Od=FwTvhvKE2PcxrFe&xz^G>HF=!eETcb5eTzw zgN}7ZM_qvpxtw(cgvK80RxLE=A**N-880W_pi+~v#2J2oIKv2~6arU{x(SUxlk5-^#c>5>(O|>$3wM6 zrFbm$4RT3PCdYbhS*>M9QIA>)h{rsx^bM66BId)*z%U&oAhD)y9**&VYb?0>$F*ZL_a{H!Cw-F@E*EZTfMM9TV+7?Z@qs zGP7o!NzVxE*id>MxU3YH8;`R)8%?&QJN=bFzRe{Pb1_ax9h{1;Dv5fabK^E2EkfqR z%H*88rXf#vQZ)2z;c-zHR~>&h0>X8b$lAF{`dxc5KqPgfe`c1J^aC6OQbFHsmjj;; z_u0E5RB?jgF4S%W;=J$iC~-3io^V0mnzG*hKip8zZpTQVzhLX}%@ecTQ59K?Y{VoS zIcj&+&(;5lgpS9$O(l!5P48*=zf@&g%JADR2-$W(Kz_5M_K_t*!**X8u9m#3Ph2?J zo?3@8U>EFtI*gEaCg!;0eWJ9UQ@jwCzE9&w!0b5v%ifpBje)=&@B6fv@z3Q0wl8X8 z$H3dz`x3=*Vq0YZ;X;Qdh%0Sn?@M%t-H?N+Qqc?F|Eg8%GlPq)QA0e5p!X3!j}F?w z_a*ZC%;&5wnrkPZi5*gDRK#kmUf;NGI%`eQJ9p#g+W9TTJQQA91F!zoZ}bVTTy%&4 z2rfG6h{Aa+RXYQP?iZ{per|<;x}%2gOS}s7tJLZe$Cz5ygBN$iVIkhpZiV-WUhqOJ z?T3lzNpdAH6$$q=WEZ2Uex&Wwk-_yaZ7L(x-KhY$ip(uDude>i7SCA zO1r*f=2Q;$Y|O5zoJUEj4d4AO0G$sLmnQS&9+( zCXN%LLN7NITTue8fOUOCg-*J-BYt09fcIF_?|l@x9SwM>1ITwhuxetzxfA2Z>gV-< z%niXvF?j8641hBWoBC#_a zq*hhx!_@waEjoHK%8eI5%q@XRP;97tf zo9xB&T(MemqIW&Al11*K@PQxZZi!q&jiJkm=NKXMMpc!qM3q)oyT1_)jdPhnun{Tl zMBGf*)c=ADVl%}!&|sdnD}N@9Nw z1qQw6u?ABi97|YJxi$`b#JJUMi*oQ_Zmp_Lh*#GY%_)N@an_;Fp__kgoiQfPtXh{) zB)DF8RAsSa{nQ@_uYMP9a!_cv_ZJ&3qC}w3f`GaK=0yCA!x5W`p=BfIG*9f%MGcs% zTP8Wubo7AXgfx1IiR+)bjUu? zl*epd2@vT={ZI+cFy7l11kE3v*1u=hm4dNDGG~iQx$O%In@iCy`~k|`%K@Gc67^@xaIIISD8o0?-ck%X;aPgx6u{Q`YkKwd)GcF@*iDEykkTDTL5YMP5O?u_n7Tez%Y z1Ky3II5)yj5#0>|O>uwi13I_hwa!^dB>ZT#(hq3wKjO|8xKa;V*#~r35lEN(@(-l7 z-8@yWeIF2UV(FA_dlesu0}oywCi^X`{6JdQ!#L3NSXbExM1QZb6ISUPO#p0dqk$vI zXrM-w+=;%6ernZ-YX&98_n<1`)d$ZxQ4wC1?ck8Q;ZNdYRv)-|kX*kiHwDywR3c!F zT0WrNBE14h;iomQ$7cwd$jn?WMxr;56339+zjg#rqR=Px;m#<5v*a)Z?TVqqb1R&p zhN@@yr1eMhJ;~*S4MvhB?&|r&>PI?9k$0$vXj36Z6g9R2vAg+C)+4cL8s9r&W{268EwICgb=fq4fC zBxWBJwY_$<;fR%dFp-lxIvgj$?gvv}TX+%spr{F9P%`%@|DdRev7Q*kHJRnKL%k~ghV>DPEoJbeBrviT1dFT=&~9aRC%4^oa2L>xi`kq*;rHPN_U(0Pnj zTchZ^9sFRT+SRYKebwn_ED7vT^a<)BPHZIF{P=1TWS*Yf%BMxuvsX|#X^mE(2;K<^ zNgvhLzYcfYq0FL>F6)fK#h_bV0?W+eH_*BRY%4rRzjD$&9@j5XuYu=rS1}llVVgK> z1yynpluhV+$Q5m;ghz973v@A%;?3)fdA6qv*fG z$M<4!7>Pn|OOb)!LtEDO`ga)Kl@Dsi%O3nypC0*t+}!+Fk3SmSR9Un+kkAK)h=X|p zyd7gOUXb6^^%&5O^L8g%PFg=3u{aR#-i<20+FYu=Sww2}O}i)HZo}Z^QZ-zf;H*HRyg2K_3$V3N@P{n~M2x2E5-cMplQv zb509&zw9HjrQ)UN3%-98p>!t3wAnd9X6H4!{h}ge|EK;zx`%mEp{1zpW^N zX$6TJ#Zt&eZbm2Yip)B}ZNJhGMzSBW+p+RkckZ6vy`_H3){DcI#Nu~^TrZ*8Hw1Yw z&sx8$pR+dNf3EWogc$U?g~tL z3$eYZ_lg`4*}h;F5yL`*N)c+sS72%6wWNTxW&3Wq6o1l{GRpOh;u7kM`x0roR{>;- z`x2q`_Dou(YO~Z`UXdxoz1vppi-WT0JF7aNy@HxzjlL%aU1d>_vqBGadvKq4(;;NP zOkZ8(t$&rh@=zpT$Z*7?c9WY;Orn+LR-L#?k8;p;ueGXN;y=D^If2JrM+*DU5h#r_ zn4S7gGJvjt084|FruuXmb%*|_j^xh+&JRX044ok`72{#l3Idg4*-YsBRQ~jYP3_ZW ze|X5{dlMrWY?pPJG@*}9q8Ft z&XO>6Q6dhYl;FV;rf4NlA|OUu7dKQ7UchJdrhuOryH|Hzsx{jg5I zlZd@xt;#-}sOU_(c$I%RF`0O%-WR3ZnuW_;g#a9LUn7+tPLu5>U`b0U6NJ%|C)tv# zl$j+T-be_m1LEseBM{kPNVHbJe*Q%}_+c$TZV<&+sJi&cwU$xF^pK#3)I`RlaGD9w zAWjXeN#BeP&aa{Y-`HCNjmeSIBks^Cr@RJtRNfIoa~P92i0s2@<7jAYwXP3q0$4C? zS-zaQ17t5(n*P9{FbY8r3Vvd`+)c=^GMjnJ9=3vN9dR?4iKsQ`^EnkQPhhfvVn<{h z&ZE9F9S9@hNHNeasc72Jzh^Un$2cwoFTXQl-)0XZ==`TbMOhh57<6Ouy@U!@w8+wSX8ztLU9ENXZ-W&XnT4-wWbDO)xj%fivE zm~BNF%|J(wN+^M=+qT?{)ISj1Z`#&AtSuS0Y*J9WEhxNx6>L3l?CE!12)h%=48bhg zM&wb;uk?3?|KWDq6aW^)xZR7vKn0)vZ>jz;4#T;oDKS2nXEt|=kU3!Oz5t{E0lUh8 zzk+qyE8>dk=Sja!+Y_Ry%UD^KJdwyKH|TH2rBB4!mL77;o=Epi9LwX7k6A%O7)7LV zRYo3zuN-}6O}tZAJR$Pz8%lvSDxV0Z&|NUFc(W%Gy)^wBuew#yn)*_$0`GX0#K0xM z|LgZ&wAv@qa?kc3vV%`(xt~i;(tNqs1x~Iyf9Q!@$?w$zg9kcXuTGw3PeJ|TiL_UY z^jb^6MAf}Dn&ObRoPPcsu1Y$JDCV6|JD*6CF7rz#@2=>xD~8a=Zk6Ua*R@vq)o3<6 zi6O~g;4BHM<2)SMuettRLCvEsblN7OGf4exO5ZM0u#5Wk*|2GSXQ;i|C80ZnC~-zV zpTR4XT^2Iv?YPWtR?`uZgJruC0NU~cHm7f#W=`1E`28hLsQKs}gyzq4*CKf;-#^N# zTu@Sb4o*Has!w*=Vw7_WJjQ;hI1Hm`RxQVP`sOxnD@x&j$1l7$)DIM@>}G%<5Q=jl zYXWh`H#fT-@OW_gnyoA0t{lhhPT=ltrZxhCzYDUv-&Gc>Rm8T|P#{Z|rqVsZ=!N$~ zmr5uNojmQ%__l=-&&IYBz}*JnzH0OkHV$W(D#KLRUZ8e3k3h^ml87vOh2~2>BD!+5 z2b^GjB#|7!f7qPLKBC3k&$DrIAa3O!(Zg5xu;D$^vVhJZ z2OCtSR37UO*vo3AB5+U5N3?5spxflHq&5mWhnTm6f%41jQ+4`oZ*J?bLmv?>`^kmO z*Xo5y4{pAiqA$D$A_@6OaSIshBic)A3Z>c6W7fF^2{yV#n7^p`*W2u#iulVUTES5sH3cqaMK}f9QPt#T>wb)`B01^- z=Xg3(S$tKwgZIvgw8NxJHZRF=(p_qv=ML+RE)Jb&xBd!|c_VjOuQ(V(*3o9eQS24g zMntKUG-D5Oxsc9qskQPE|;zNO4WTe$(d9N6Q$ zjm0^ujOU-Pu`Bvq{W^}Sqa7zqj;5<>2YcJUvU!y{&DH5zg|@qKbbvn4disfS zcs-!~9hx2iZOrc{d0PssyQ;fvIlhzU#K4&|g%>iS+X>NDXo%UmDR^stJ7jACPX=Ok zJJ4Wua6HT2x*ikT>fzb$2nx&3xvP!nJnm{?!tX}wIoF#~B@P`#zW21`Sj=0hKvN!g zXIm{Jd3S4O+Y!c8ANQlj?ne*UtPwonTOi=$Ez$M_vf(AOk0yE=$)B{6kLsv`=lwG) z{b-__-eJs}%2bBq*~&kvQEueP0`V_8Q&9;edoExqIK){|`OydKD{1d!D#upKs;Uxj z^z;i>{n12FUwYnZRD1P1#18vtqEwa=o6ndX{Ai-ov~Ryh+B* zn+Gy|`|FvtMAM8qZgG^6Y-)1C5jg=5_smB};5-(A0y|aq_^W>HijH&Jb2Uc-oJPl8 z{l@yE;}?c(P?gXwAG6^Ah}2Ntu1{+P=PK)O1fD@6jLCHT6Mo7APv9wy_ai38<~k<6%h_9XJPfZ&#xOCx>ia ztzks$%IVhxfVSVV1r<6EA9`J%$0)I=3Cs`KQnc@~5Pz(Oyq2Sp0S`&O5(zX(xNr#q zde(L`+W0B~jMnr`JW`bDRU~^Ao0R|!QtlbsJANm+Hh>UmBeFdt8xBNAd4j(im6@GQ zwc83`k6EUw6>#QaTQRAuWKraHV92xTr0v8&pxeqCydSC6@3@d-SDk*N+xCj3M*Qu? zJ|^bR<)i4+e@xWU?!olj3Z?X8iNttEVQk8jaJHA5*2faPakJo@ifUM?`vo79mH0u{ zL{zO*X~>>;=#PogaGIH1>sARY{slORsy~*iC$LPj+CG16?tyRwX&XIB*ZD zBuaQz7r!5O;o2cJ)k8=-tJgOke@9BMO#ifslKOi=z$Hki|4y=2fqxj`Wvl?7+!e*m$ucI`9Tt2!z zbO!Gn`gW(eb~Rder^q%R7$R7?rZSwqgfj_9J)^7N!`tu2M8-(+i`Ey5u>jmOi#y)1 zEfs5U7W4Z2v9yQ2@vN-`7K7&tc0(->kDkw?LKS^=**kjOwiW<5lc#Xf5qQ0hKgDPr z#WcM;=VF2;GK6@pFL*~Yp9H?9>Rp#$8}P@?vFnuAJ-CJ#%AT?X#%l! zPR>lqKAvdG18iEkO5vK<_Nf%OB>!p^DwQ=b#gzi=%Cw`FMJw=~==iD>Ivw1zlraRO zj@%-upTQ4Gtr|Icyu%I#D(+Z;eW6ZCZXroNe@F#0)Ftaxzs3EiG<{sV$4i_*_|H}$ zS59cv=QyWyF3Rw*&KBA6ajg{>Sxg|u;$Z6hcv=j9RAXI%S^=4Rw{Z4H9JG7`InFM^ zq1&LKzwswsHmqX4eZl<20Jx%|O{m?ZlJqT``nYyB7p5oWn2XWYVIE(m14Sg?pm-@7 zYj?8{s7eJVVXW+OR650eF#GYicTSEY=;*gA0i(es#O6EBwm&yqcsm+wUwd_t_j=-4_4}a?Ig^0 zlEjr(7e(av^iZX?0zK|nz5125dXMzk6zIPCinR#ct=n)Dg3Eou>T3fOOHG0%H5@;%*;Ofj}7}yrbTZE}>APJHdLT7yw2P0gkNU?nEEp z5r=IfY7jO=x^{O3y&e4m+f=(rmHjX6p6anZxddKWCuE$ttx9e9Q?ea>ug|i8?&}-b zIjV_)Dlwg0Z*eb@?SAkO!tE1j-qI^fdU~{yPb9L)e()duiZuNHiR zx0+9gs0d2Hsa6}!o!3Q1a_@6U86F_j>OY}Pfc?}Z0;p^}_`w+@&Y~7o zUH!Vtu3Mu^XsG4Xa=OUAX&uoR?~>N3LZgFtmWp;+Xy|GL>OATYiPHL2g61=vf`{Glgbc)?~O zf#KJ;?eZtI^bonZho^KKnH8KN52|}5`s2^LbAdNvboZQmHQ=z`p|WVNC%N0bCZvF1 z3()}K$j1;(rn|&-VLyeUhSq?4dBUX~LigwmZ_djx=P&t7tXPeeDBTeHm^W1VRd|$} zfm;$7FWZ`0I6Q2(qXS&wY+c`P&mc>>6X4T>pReWb*hWlwiWcX(`gaATtCv z9a{_+A|iOgPbN~pgch+gf%ra|7P@l_h&IZUnT@;T5Rt|w6E$LufWFix6E$Xpbg2+X zd7Ouhsr;n2-y~~Kr*5W-Z608!DuB3=eYI-#BMDE=iE7k(hTR+&ms&M3gx3DSPbPv- zQMav5zmGHhhx9wqNw`AQD}h}PgWIHLde8ISqHntN=hhlPQI^Q5b+(&DpC;>wE;S5T z=O+^ZRM%UV>T>ggvQ%KSzt{R>Wb*EW76$GaEnN0RqKiLle086|~n~ zS70h;0%5OjJ9bfR9y@B&0epblKfC0mPiph`vtH&NWVtHX?au6vhGaQo++O1e<|NTMC?TFUpN|*sYGF&IT0`q zb4Qzs;Yb?1boip6_PCBzRP4>;6Fs@SyA*}pX|%ua%*4q4@uXWDUKZF|dH=^Mc>PsyqbmGR_b^Dzxb8c*7_=ufhlhJTE4$guCIIpt@5rw>Q5X{p=?m z4VSTK68-4(%oU?~5{76)Ew(pz*j@ch7b4ZE9D9I90A%Q`TsJtxcB5376+fx*=EK%9(;hZBAZT8`ImEKon+Mhk>p-T;{ zTBQk;>0$YJ*ZOj z!-iG)g0ty`sBvUup|b<dyB2fA_RRUm z+FXcnl)>|Lr62;2d96>jw0{h8++tGNw@wDBKpJBQ&cvhk#d9vRNeA%<0cP={iq?S3? zJ(-}DJSkE_XRFsrpG-7|6yPtd>`9$TM@QUsY`Jn+3RsRs&ag)du!;b%{y!MV%19Jk zgqA6BENL-TDUseQtoq4B>uiy(8NF;ZPl^_Coa$Z>YL(4jnk2-VHHClis4 za@Zb<-pJV@r%b)j%o^d@nx0H&$6;>Tt|<@MwG=xsHfgQVB;H$4%Emmk5IFW=Xf*L; zBJ5n7O+}Lob|RjO(X}Li^jNFY0feB;AaNq8iaJAuGn~jR@ z?*=)eDmH}Be@@A!WV?#uZo}(?#5uVYbQa$rZQm#P|Ud(;P4}4TTk9fW@*k} zPqUB7T75gvaoY{Zig5PDt|m9)jPm_xt_&p<1K+W|NTf$4+beIpvgRZjfgd9>3i_lvHNI477z##tU| ztM-eBY@0-q$WB-t1(WSijSA-d#IB7DUc%rd_lrW=v0Tht7fF<@oaH#QU#o$`4gAuO z*6&X%yP?mT0uB@RFV><4Qh!_Z?GGWAQ-3E>tLe}uax3c$ygj&Im%iZ-h`_3Eh>n5)Owq9Ei-E6fg7aDF>gmt*lZw>)Wn_glr_fF{oMXruE&{}A;dedLyLAb`r3}E z=X`umU=DIk)jZJNLj2r@f2ZpK9Wp{)jA!Xcdl*x{{4A;L>I&+T%Tdom2ChWE3mDgM zfs0ybnxvo^H;aRWsGMP0Q$@}Os4V6f@h_36(vYqfmoG$st+Wa|Mc78b#aJJ&ngzQX z>9IJ;H)Fiv(TH5i|@bUQfyC&6xQchswGc_Tm#X%OR|ubJS8GTwUvcl7KMBI7_9Qhg0JVC-WMtK z0ol|mo)Sf@{|ys}5$98By}#CE*;9#(*&48JiPkuo!xd4r%JuO;{gjC2K@=Ngl25&} zE{Zlo2cwH~dbiv?>6OgwUDOO$gbqa0POPu)sR zHSqs9$?JGu-X+?(DC4@2Tx&a`zr!iuKxd?||6)mX2@Thp@Bn7kt=u+gcl9f`kq5e7 z7jzFkm3A{;h-~;NO(9!e3JPF_z_=NKhD~U6{v9F8qJai?7Qt{|j5Nn{I>)93@^kyY zY?oB&OIM|YwY68T}fA0u*(lL^){QYRqEoI=}k$M)&Y+G&_>~mDy>RuWd9pAR#ue=)?El--&rNMXda7!jm~Ys zeLLS5^^;nl%RnnsKRbHFD)l+SOLA6~$nt1|RaGUj#MFf|uPSKQO;HJD8(Ho{sy6WC z?Kz!e2O}-7!n_U$HXMx|s?x?WKH*yM_0fI~(TP4A@LbTMPxxAE4M-BAbL2&J9uw$( zM-e4N4WQI1l+gBo)-)Xb_@C;tZgr^o^VYAQvvwCl4@RQZRcD6-l~>2D5WnNl?2JuB zKO69FHl@~+%;2BxVzi#aI|j&12Wq&KL}qYF;D++OHuC@i`XK=t)&uHN0t^|$D^a}> zmGr#LsrqSpa#htYtbPaPQcWfw($u-8(fM@z*@9~I;A-c(YH?1W3aoFg=p1o@birMb za>@lhIWz=qB@(Z@YT%8kw5__`_sF=bzp;L9#%y@3*L1VfdUC#3_F}h%!0O*k*n0H3 zG54z7iN3+a(#(bmwWCIQH(KPRsoI94JX{y6p&)39p?iya*hDdxQ*zLzqNC6Y8j*HUNq@|C z{k`ev&e=SdJHcVn{Q3)%8E7{J7h#t*8u*oPv6C<7*{FUR{`88z$9z|s(>H=;9^-`f zBn<6*K;$(Kt6eV6*Mv=Ta-@e{ry4-xhqH01IJ62J z@oifUj0V7NMU`D*;``H$Salpu9KU`KyeZV)c)rWlR1do2a0?ix;YHU2f%U&(&swxQ z$|6qFN!!ru;E#++xEpm^xDaf{x10Lk(>Dhh+9WYCuP1TCc0h&^V{@<_B_~lPan$Y$ zI6M+6DZ2sCV_WU%8$}XMw`a7z&mc3Rza`Hkid_FX|K_dq8Byd>-dU~e8C~qqu_}+x zW9_z2Mdad4)mE=@$LkCb@Y&WC4HLzdk?4+*IF0SJ3SrpIOSisz{55?zDs(g)Zf&-L zQogrvSWc)*ZEl-tQ_(*Rm55HEUby9}bGbJCOmRPSV|bTd>M&X z{6aGu&0{`9r7OxhH|R(2oUm+}AY$NEW#kf%JRblKW6)#Qo=IEJ2{9PY>%)lM zjs$`{boJFL+X#l7F$L{8+lcxcgzOD=SEUH5jiSDKkr7#Bgsa_{-BafJ!uqUjX|P=g z#Z&BCw*8FG0lzW&13gP+CqSc<=01_`t9K{Sx@Vu>6)g8qX?sHF0)uVOrbQ0w6+9F~XE7zQ7M03S!)w*ghet8dEz6(J z+mteTk`JAWF(~ak^CdU;(^0aO4g69-M32L_wV8mJ9+EGs9J~`c{Lw!=0kcD0iCTK{ zwz=rRkvQqE1{Cc0`)xiF{rL+JoIrZiFk1*9C}gdE6ZbpHDqx6ai_sc8H#jb7$a|sq zWdY+x##F3CDG+Bd-3V|4?(&Qq+OvrapFUt~`ug&-b~{i(gn={EsEy18cI{4#Niz#! zL*GLllj*L$lW<`(Y8*+L^Y^0DP+W3SxfR2VS%X4+ZmV67IH3-_KFGlpD8NSWxGjJ| z(tAoJzlKX@)xobh87XLNNwtV)VAv0x(&|Kk$h!+xRvjlDK4u}va%E*MTHaTkXsqYF zN291vA{)e99mUC@XJkgbs9-R8rA5l@2u6TH1Uj`bvPK!A5JFu zR#&YJz}=nUPRkA{?Y6Km2IE#A#SwSA)S^k*nG?QSz_bWS=g43hl{kVD3zxG%zrBx{ z>8RG3-Oyy6fjhecgwQ1{32NS5owftCePVm>SIZo%=}~e}Ad;1gigL>GBJ5F<3Tlht zWdoWJ$Y=k^rt~waQ!MR^)jGH)@_mCN$yk0m#;uutU5aj=m}28XwIq;-b{Flk%1sR6 z!EsjD^SeJ2Tv4%BjMh-x3arrcaNB0M#Q=iU9ga|iKB0*0TKt|=$QA?~;)AO6&1s1P zkRoy@4=kyIN3-FQCQMnA$*J#(%JojdRosYC=6JvPV2H%<+gjw|+VOULe*bT5U9~xq z5%udEW#J395v?CZvU69hV?)F0*;K~cU&YJhy%@7LTdG~d%Jg!U118V1`{^1`VHHQ;o)kAdhr2KmfuT4U$5kV&JEN-@zD+QT6!R@ zc>*1m>FbS)R(?RIRr6H7&-NWitFOB1-&=(~k>Oe8fkdIbIta4^i4HP%(cll}Rm$~k z-;b@x<4OiJtMpfgPv5c?^U#6!=aUnL7QPb?_E(D5?+mBO1W#BX58+yGb|? zAYhD+wXl5L`IfZ`hpagX#&Uw)gzKh`1KL9TVF)= zZyx~>^%pjaL1hpu&xRudrUaYWLSO<}^Wp{{IT6Eo#y!JNsVwr+?;<9<7`P-~S_nP_ zlQ#mp6gdd&+{@#PV6-(ibTr%L063mM+hem)@G#dK7UGr2gVDpea3t2Ra<#pxL^`=_ zUR~_?Lay1aMHe|Y7NP>mPs4USe&7G^ZBgU=Y_~0`Vs8%t)t3(x*;yWj()E2s^&62W zaL-bGj2`T!K0Qd~S_~ErGq};*4(#C{SdO+X=<9FdKI4w+^o&7KF=+qE87;dT)ARBu zBo&K;rx=7){dO-p&;Ui)Ry1`cIq2V39~xh>9RYazKSPK2qe4=;+l{r*%L&eI=#uI^ zVbP9Ngguuip&w~#$#XiOa`>aTV5LfN7e-mj%AV8uYDEQ$>a}iRwnuxMZr8ITsl4&Y-4=D56?9uO4l(n&%QBVa0sGY6CeJX{edx z$a2~01lErMJa_~wwtft4tzMOSIE$KuO~x&hsX*UR)i@?F%5Dv8y6Y{cHhcWRuj4t% zj8Y6`oR*}^XJ`D_=D|6NkqNp~3w=#|b{BDA$Kp=Z9{~L^ZSXnmPEbekV`Ib5rQNCK zaVrQP50*4!6VY+}%-U46hqvXT`$xp}(S~cIA!nM7&af%s5KfgnKDf<9xnX}O!m)?4 zxGNx1j5=I=kZC9*1#FuWP7{Q%jUq_w&e(hm;7PaFW}dD^<~jFvu@LK}Z`7YJuScsB zqsbY}qJX!!h09Vvfz^gfhQwmYq(x#Y0rQM=^f#WSi?4h)i_c)}nNW zn~L2Q)=fEEk3w!y3_JLoX!tm~)v`m{P`+!ia7y2ep1ZcuX0g96F}f#!y_{X~Gq#2J z^@v385c|*0bCTxupU2U2QKl$>Srfwb-6(u`65eqyGWv-vW6!6Rjn7E@!09_1LFvBq z`LrII8?Eg5L@kHW%|(>)yf~zu)3)#VM44_x1>aKfyl9ap0e4+e`Mf$imOlw&?8jdw zFxinwjAp8$G+lQ`(A8=f2`%O^fyAl&DXUd$+<)xg^E&t@lZrp$d9|Lzd*8J~Dp>vO zr>#EP;j)YVl}cqV!4%-5N~Q1&V66de01Y0;E~i$9P9ibyP_Y3FmOE9<-)~)jZe1$Y z>jseB>H}<)2OzNi=Wl(Or`{|S-;y6dzpEy_qrpH5YiGlIdD9>sh-<3F#3@y^2z1Q(-n%;k=D z<@vM~ByG7?*CUw|G=GP(tNPAxCS~)1DlGU{>{^V!>s2k=g0dz@5cAn}jbGM!!WOlP z@;$bs^5ZG^fh`LxjN%1b(YIg3?MAVQIZmIOO0XHjwyg!ugs3KUTXhE9AK$v#ICI+W zr~>9-O!mg}+A@!hwz4rJ!~&pf~%fs8hMd7J}*?s!5x}XP&is{fZJW zETW3T9xx0oY7)wvtK%3IU$M?iF+vPh*pxDu}+n8tGq6O#*H>|LzOw3 zG{^gukrj}U3@U@m82j{aP4SY5IfTnHlzuV(Y$AI7Y?n>x^IyO9qFs!Ecexn^IW6d1 zEOGErqm3VWM68u$Ju@-Lv7eqP?Ii7>6__)Gs6w zl55cLeV&1&-ji>{|59ZPj2AmCBBC51q2>0@2X;iY*F9xHa~H zCBcJ3CE{2S!LKLz_eybU-Sy3l$V(D87{h?UhKNKyTFOu1TET9sr2}tt*}4#Hf|c}l zLA#?`5Ivme#tY(|xT(1nFw5<3z&V4?8s}D|nv?3|y%?SrtQfW;=NM1jr)^v9WpDqV zwxiFRe~rds{JQ@|+bwnrcaZ2>?!BPh&Mz5bNsGzsMbQI}d5|iXyr@fqD+4BWsPsi) z>Z0U{B&;fnUwnJyCzfvci#lqE%DL}Fky?YF2Nm&qBR+NX`6@Tr>_we|cnE@Zv?{fq zoy+HW2C0rrk2cDpt`RtzVSUvGZtT;pI)cLfi=y}Agh{E>H|M|sJEX>jE)b|Tp!c13 zl52{TTNWXcS1S0v)_~6R^ow5L=GyP0|NWd8DGo;-iy>Yd(VIDSO3j}%oAO$9nZrMbQR za=YCZ3eJkM8c;Ya=!bCx^oUA0o)%S|gx^?jKF|yP^nQp%Un!i^vJlKLqqG%GK#PLv zuc~Woifm1l&{hzaTR*oUcio72NQs=?h;{Qdtxmm;_nGgi-^+Yx#>9(V< zi?16&EQwvm>sr^77?T};Jq;uszOJA+@wy1q?FCYdij!(exC-_(JEiIwSfxnxSpden zri8t4y!G53gK=6F;0Nf)d*#hp=% zi{^Ff3P2qHEYyglx zwh-fI!}Q4Liz>O&-fR!l3;Y3;zNB8T!SzVJ%Ygu)!G~={MPuVzOE7(_Q3MkO$qAav zy4O_;Ik5saKD*)`5;6Uz>D#j@eNKa9C?7a6p` zPgK`$+$e}v?P;J{HuHX_A;1|8*z=fdMAR>YuoE>W+UmEnH2<+#wRRb4Fxh_zc~vbU zWOqh6cCyZJ>53oOn!XkB_ia7K z4O`~c4jb{>fCVxBHk$$?adr|NZby&3cfS78c2t{$5q@X2-B>%=SEnq6d+L&9OtxE< zB`KU+@9hhPGgr>pBY}ibfq)MJ1_sP{ao(`U(O)|I4g)*1Mm`-+qAuJkvJ-N|yqt8m zWCGZ;Sl3&#_FTBIN{BlgFfT&wX5Z6-KjQ|}kjjzoY4Q4kVup`?FOkOAUvV6MFHvHi zYZDrPs5aJ*GLz%5+-iL5muxjB)hfe+`c#y#OAWJUXX?%!Mh0B`w?mp(L1nhaUY~EHOtyj^L58_8|Qf; zDxGulqN4iM7VcN+A@w_s2B@S)#4#p*z8Ycq2K7v7pr~rt_6BV_rU8!!zY)&_x*oc9 zHi}a4Y>cf`#47u>%?C;?_*~tK(X+d_zc1ES!lGx400?>{8kH>y2@f<5?N$P!@6 zXN2|Ams>~ZTWlJybD+!A)EE}yP{q_Vvel}xvk&2ofvY-n3o|ANL!Ebj*&>p zHUd4*j0(xF+NN;Ih8Gp=IL6IIH@W@)cs@P6deY_1Y^5wPom#k|ETY-PXlFa?{R)b zA^I#T6IJ_$J&($K2+!&CLSW^uZuWgKpPsnNj(lJ1^6G#mxrZ$~8l~;-Lvbu#8a?jd z@$V;6`~qwK6Y={wPvms+`=VG(3}H6y)c4bhWjph(v#5zUa?&c|C0E3Fvr^!)rps0p zkEg$D)!)}S6v4)v%+v@S4}5Ly_Y-+}upX;ZD+JkAub;D~iC$|`eGI2r?JYtE2N`6` z*82TKu?6gh^?SzV!9vLk($)rNn;>~>>-v7$nplk~T9tKwKds0{=PW&M2&!qQEf6N$ z1Z-^y$}R>nmrfQelW zyxcJBuB`+zc>Exj6gXI2#3ypHY+tAM$AvA1LZMp7%%66<5LUt4=+fkFs z)8Db(KLffG=F2}; zTXJBPd<#1iKIA@zl%B-EI<_{@uEk3aL~PH5T0;p&GCEJo`ZxAMxMdB=qZ_M`Hxf?mmB;bVssx&Q^ng{zGr@waCY~{(w^}`4_LI6!&)L>b z2pbZpU?T?{ErJ*;z`QQA`v3Ji+82?RD$x`&CU%#|* z^_8qaZR(9g^_NRF9i0r3J8VW6WP87fYyNC}H8PqdkK8%Iz~&iz=VMOJ-5NzSeot?Z z&+oWG+*d_6ZL!=T);$jbKC2!{BSu$Ib{o5wKf3zu-zIKz=Z$ZU zyU*&Me)~5`AHa6N?_b%jy5;u!8`~3TH;!}vyNB@=&pWfTuX+>1Sm-CsESDe)v?+7}$^*Ci);yDIye2+raH~>&A*WR#_5G{2yjD z;u75LfDh%@2X1gRBEVsi&Nvf$v_{0`#+?MItI_$_P~g^lJgUMxxctUW)MybS*l<&t zlQkmIb6kN=)ueUQe9p3$zhX(E=UZFL9BBcWOGd$}YILbX2UlIALl$w$pCq|=jX-_( zkE~X;nlY-Y6QpEQQXgm_{Bk$_rs&YqhO|T{?0Q4i8U=l0w=Jh<*o_;OkM`)6-PEE} zMK_p3>x#+n2xVvmYQ8za67Yb4|G#BW2;g@K;!%ttlmm0yP)wX#@)y|TO6sw^8I0FR z8U1g*i%kW*Jd-&sEXd{Miouyc_bMV#?uxU4BA*o2bS?&*_pmGTIh4d*xVy;ij>Hn%N4_p#%nJ+ zNlx8kZ)z&Yg@@XAl$<}u-xRHpXwyzaWqh<@cL`xH#RHxodv`pwDXEw7p6J=op z*guZH0p$2*9K@bux>C)$zNoOO0O-mJSflEA?LYwQ>Te;#MYgPQtQ8y=px(B+c;PEz zDk-2D{7RE>LF~KrOT;OvMR-^UuvJAw4EDVs_h#BUD*wRpLOA1aRnCJ>bi#SDOD z?otOs7@&2lU9QIpnw*mJ6Gg$!_=fDSLxIm(!jQxay1-_L5wS>j3HDoXdWwemz{23t0Bc zD;ygSze_)gzL7lIfu0GKf%GzZM-OxuRfrE~PlVq4r|oH=aXlpoc=o0!I_!%E@N{?{ z%~}xxy%4m!u#fguA`yg9_|3c(1mwy4K$fTnZ;1e1W#-r)dn?g`avC0gD=ooEV*RLL zT5r;DoP0~?V+nq7)K0}45@ELHEP%?KhoD+-iK^M7{C-!d;@bouKv*8FoZ6t`5GkLPjR&hP1&eU`8X(ZctSpWA|fWo<6vTofJ`FkgL8{@u^Q zr9dF^Z7vHMrr($+t4Vfc&{kj8Vml|Dt{~u>&#AbsCPd7_5j5(diE=Bq&A`nKEw=-= zzf4-B9YJ6!urb<=8BMMgwx=R2NuB5fAv@cTGfk`=!jp_+%87nF2Eu_qWe2Kxi&W_L zSkIVkqA-Y7iT!m2$P*1~j5sV$ql~@C&0*K=S^VJcY|!JRJ&!r4_{3h6d%N6`H+%b= zBYyj6MBs4b?L;wd34jFT=-X-aRi1v`j=e3aR4g%PK99exSB;a-FdwcsL`bb_cVl8UU|jtM|T}=Wwj6=-p>y=D?pm5c8ENPWnd2iSvQ4C zd3~2+E|A_X3(yboVZ!=LzhAzRx7An-tsJmxQT^jdTUT`nJ~pDxRVHyW@J?}8Z9CQ( zM&Pa=u$@5nsw;hW%Ztzh>N8t=0^bUyKNJ+iIqp!pAFUA96J#qo!z0oT;`svH&f~YW z&Eft*%oIte4x`~&kIVW*;L#k~)0k@n;cgi9EUG1&^5-#tb~l@_7i#qR=apu)q8SX* zar5GaSF2^&g!3Niw4-`OknN$$>{!&s&ng{+$7>UXIjeWaPSlDX-C7qB!&i1vrCGAR zI8~cSnKX!&>2bh~n4`+A0lZBrYjs+Z*Ot#!6@A@?9rgXMI;s-us?TZyO$_mzk**cQ zJHupsifZR>_201i+Bi_)`aNO-Aq>~8NtJKlx7eb`%k6E}s_xS6<;J7A+O#QQy9EUX zVjPHk?oKtz;cCzoopC=PtWVU!m>eBWA>Lfy17|5{#NtO$Q2R}alZFQ&jA1fv<|HiD zrv2h-vyH26mU9hVJogaMYC0fZPht0F^aj=XUOO9z_;t-vwiMqu=c;KJ4=%@7;%5~9#h06C)r%R5D*U}WIsrY3_oW)2Yw(* zWUnG!3&zYpNYs$zN_cClX2ylij;kbt$3<^);s+W>ySuO?ORAGmccdS?upcC{JCF7{ z`+<%_wya^Rh&n`Fx5|LW6>aMmRu$FY>?W*QMalk#(=IADBt3Mk5JoC`J8j@~KhV}e zniuCb>eU0bJcwC@#V7`7Eqcrbxyq?QQZg~sIl;rAaa?jv0l=;dnp0Wic<-|=mEn{F z-NjG}?w=FLW0qgAV$A6I@UIw(idRsTmVTgv&{r6Sg}zwF1GC(OP5mGdV-t}_0wA>d zS2hzPOfD=)Q#A7~xlXstHJghHd_xz;&TGDsqwA2}E8~gG^1hzA`Qs}PUEtU`VvB(@ z*)ho9Ouoj&4z zoQTG*HpT)|EznnSy?ZB}f8{LMW0t9^U$ohxE8Y>E;#ZG*kKb1+)!0ugc>kDkQ7X%N4d}PP#y`!}WCUgzy<-I8u9_Wu(2!@rx3>_!O=P<6HeComdTwFAx~Y)}rG+cB`!iwyq(c$6Xk#H{yd!G36G=ARpQwR=y!1JekZX`=V1XnPLy+eD8~Z`@^+6a^ejf%MDTQb9xp9eI@pVN z!6fWY%-$6#4R8Gx2J_uSV|ll=J^HQ|AzaIC2Y>8cagbwoh?C^khvV<2Wit?*?+LwT zd@kHS9!~ws!~Q7A~+>P-|B=;g(!f z5jKhJj6*Sf_%J~vR#KOjE@PFc$6;uH>fN+OeENk=tGWkaEVA3oXvoN;IQia{e#_0L z-QU{3N_sTQxk_fvWvzKGE3*}D+$LK7VEe4j?T&%?bs8?nl zu`Ur_1qk_&pG3>TJL|5rmFR(MCaqRuO58@y)&gR%QtL58&VPSt8(IbI5pa&YZ>kit zMYlHGF87|4V73#8-yS6|V)V_$e=k;2GjV~pe%@mbqxX)j|1xU(YIAdI2_Z{NIV<2m zkO?_O+`BvWSl#kv{BC&ou68fx4zqFWutYr+`@Q@g~uE| z*Barv<35ybYyBdonN@@9IyM+qmxz-5e*iyHmsVea{rqU1xH!M+Gm*#Y61jdm&z0!7 ziagMQ-y%=ciF(-{5_O%d)3Hh(6GXMA0#3saaTE}_2u|{JuTa++?$N_msRHx?Bo2rI zk7aAsDnO0=JF5|X+37#D+B)qKBV0%@exG@}2C(9E(g7o=bT_SAa0o)Iq{pv`n4*gzHX^VjpaWY(Q@Iakt@hpGyrg3U@4At;71u&09Wr}#5HPEAeO^BBxI+^Jr~^AIi}_FkeX((6JVN8U@6 zbJt}tNA=7camc9>Vb&0899O}hTN0g!)@+fa%Z^%*A(`CX3o(Z0)BL5Y2UtLvY7D|} zpgedl5pedScdYWgM20YYPIpy$%g%8gk=6IT@(q_0FGPnmLIHEJHmi+Z39|`%sRF#G zxXisLs>41qyy@Oc#GfwQ&fT`?T``W`>{*at_&DctLZZpVvT&EtVL!1yJN2Ag1G=Jf zPFYg@#(=QSE=2vY?LV=ie!hw6<4~Z4zC@t|OYdpta(Ouzj0>DL)Tx+;ByF&16%DvI zh|RogM%YgxZ$)pnw|;7K@!ZH3IUf)%xr*2BsU%6x@2iA$>LTI=z4>*=!>}#JB6$VI4UP0zmj*?K@o{-d^`R$&={ zQd}d0`57;=BX6rAT46+%!YhjmM|8OxT_ObFgOeN}YA8|*JFPv8Uy{LZVv+X+k?1}H z_Ncr@Q{vThmHdSr2)~Gf`B+20e5u(E^#!=klLzyOekZP|J=NnaKE$(F4#~~qd3h+3 z1iet%Fpi3th`q11$vuF)Bh1#3_r-5wy-wcWcqw`~xMT0fjS1QYMaU_c;v9cpCz;=s zU5|F+{X{}KdhO)<=^Oe!#5Z)qq*L#wt#tghW$z~5~yIk7&UzJ>&{dn0tct4%Un7O|UY`hdm z-|oL*%jN3iG;=Gkm3Vjcm22=;%P5dEz}ED#wc%31^?=gB5wKC7;-5HcY@5-D-M3`h z0TF$rOLuHX@5$HLsd+cb`$xzr*=$_*`*0P{(;C74<;g@e4rHy*Xv%oC!<>Z zX*;Dy--ojg5|y3jY`2OJMD1?KlQ!H+6=vrstWulTUwzH0)r((0daULH9iSR*OlYmp zcULkF+&VS=;)^e=J`lV)L5{Abc-i}Z&;qx7pc|*JJZzF#_N{^c)}M1hCV8olAu4CV z@=>(+*NM;S{2*;I=lxdWIa|5{@3Ec<>lP|*@`;&VAxcV&sHpC_8No&j1unSm4_hf- z&bef4{DZV3wKdw5KzWu%@R?Sp*W1sk1@0y zuh>=gGMbgZ>m|ds8bC>U3hhPbuIzK>cs&Yn0K+a!BPejg$0H(L5H>ZPm{H%fZ9QYF zjHhdPn%PBm^-Sy`GUi(tZ~f&c`5HCty|Y zMKerV?7o^|?>3A{En)X;AxcNli7aY0;H75|^s{G}x1~T(9-WZnvPMTB92Y{ZL}!ry zx~;|w3AbT_#&KRjU(k>Ut)HFZXhWi` z^Eh7mF_X5&V-1N=2-<_?sU2_7ibYp~(jQk=HSQs1)5!)MgT$vMLS;juWZz&dQ=Lga z?DR7n0`Z3Q^QW)AYE=z7YVjs1Z!XmhqIps@xOsI=VBrp5NGk&i%mHt>FI=78u_*C- zJGVP^1J8j!omzUp9Jh&%macU7>2B3K7hDK}_neR#$DcPZAld2DRn{5ZH{5j}!ma?4 z(xozO-3@8?=oq#_G{-lgs3uAO{yQ6r;Sfc})7X{bBX9_5mdB&qgIi+iH5Gl|AW+4$ z<`teAp~s9~T+4+qiW<3S#MQlX0aV6JZV47&1jk=u5ec=nhEBV$-(4QoYYPG*tudxx zS^^o`kx;*K&IJZ+0SGLG8hq<@Th_C?GucW^(Fjb&9b1hr`t9sXubeei@HEjZob|vB zAt?zv1n1ko-#3A&+S_e1Eou>+9wU)xUqv@F`S)<7Ec?* zMVeCK1Z8TfrSUBMNFH^tv5ABRcg% zj%^(|i%Ov=?ykq9DlW<#cxMs?kM|GSNj+{Y4cjR}_Kn`MGoq%eX!kk%D)h?Xgqs7D zsG2OQ9;*@n4~SnKjbM!l@L@G)MEY(TmdBp9R;|!VV2!Egi5p~WsTXQs#u9B(L3WW( z-Vy+OX_2qQTF>YlM{%P9H)Q+%&w408ua7-tP<4iP- z7;wZaI-`AykSxi>f=zxdpee(i%?q0Qp>fX>7M?)teqfDT!H_LP8-~D+Y>Vo~Z~Jau zOdJ(WoL}&h?0ZR#iRJ-Ix*Rjy3SF!S5Zj>dcD5Q54y!!oeE3>)T2cARYqlOkbDi1hz9O^}U(-R?pAIhKD(?I4X1AeBCNOo+` z60#F4=M#6YXWa?6OB26Ad-7LXPPX8Iv7vOv|xQv=h-%X<|<{ ze&f491J3uO1BG_@A*b#V9#BG zrbKz@Tb_B?70B8z$-ROQX-bspc2vIS*9tWI8VtzF zy?Ee&I>;T-*UC#Qd48;yza%j+q3MnIl0WO)`k8IUq>YWDL*0(mG}%AILEdjW`Y5QI zla|Bvb^3M$G1B)OCpfb0MYGMGKPbLCG4fp_q{v>{MMXEl1v@!1xK0_ zwXzy~BRJZeNDN;`f~QBdqAQG#HzykLSY>Y4bD}vB@-QpmWV1-u!^XjBV3gS`YINrw zmrQ4Bn!yk_r@R_0eH7^v`{;DG(&AII?mOY7qu_WVo|WtH5U8E?7>;5Gaj9F&};Shw+RR zu$^kRwTyHW$_%ist4>tj9blVp+#x4eK?r4i<5N})gs#H(f&EbQ*n;bw+90*@l|8PR z<4Y*J^pq+NZT-}y%k$*I@|l30!}erW&mMf+=HmAe0#ncHnaHR}R^p4~Ji#l3d_RC; z%!F1UDnkFZD0G5De4vtV|BRr~OEH9G#W-xs!VKdgTT#i?SQ)EPvw zua2;@Mb`Kz2Fl)va7C~&Y4a=RW1$TT>6ZdK3}9W;_9Q0cHZ<^5m~#31EmzmSv}ZB8 zw$Y+J4~!Z;(jd8A@H~jpi?G6WHjN42$0KJGg>lsSW&h~eSmR0L<}vw>olUgl34xBE zP2`7&t=CTI+3%=!Qou%r?9^G&Zq65Erk2^K?^;E)Jj-EP8PBIN9j#K8;oE&yeO3(V zbr?6gt%=u=uhjrA?`u$g}DQe)OgrP~iVEGO+8695fIAgi9+722# z%BH95&7albhtEZ_qW24TXCMx@(-mkSU-Q6_-SN8j5A4!kT0s?^r6~%|rGG1_ABxVB z>6%keDSq(Jm2U6j!c$D$RLs50zTwDVgQo?b0JJt!uIaMQ*|Som;+Mj~`%*J7C+tQ^ z1LEw=e2lxJ>7w1!SI$~Us)*k1thfvgka85tg_toSanIOd)OKqKL@Ju&ro1f);?^?M zysQS&Dz2!?>!)osI<5Sl|IOCoxd>Fak>G)A{42p%fP9;#+lmF)-Ii`+0D#q%U!4`M`I4A@8>t7PZzF=;uB zT0KNqc@odq0o&8~R>+QA7ERz;fj~&+tk(0GFBkIIi)h!2)X~l*db!fvVMosCsKJVJ z+|>t1&xtzRHp-5j(|W(go^ux=iJG}h`iXNQv-6zACshlfN|E(xrvd`PfC=os;P`h7&pROo=fCCg(w)qvueF%!=ujD*67XEg?qoXS~XJ$`8vU4u^=STGijNB zXHDl4Uzm;FB0IUsvr7O6+_bjyT-u1< z`L1Hr<;P1R`;UaODy#D zXof8vo~!4+NkTslQ~rCVYEwhHN-mh~_?&JryQAkxH)Gm8r%fRgJiZa`2~}blI{Wu9 za0vGZG~p_aTiT<*;*6iI2T|AgZ8&S3>D1G=KSsxLC}hs(TkOd>oi}6ss45bJ|1`j~ z`f>S8twy$fHf+yTAos-=G2w2bfnc&lXV{OEKJQ42V7gTTH&K*nM^&C(qX*0?^>(A5 z?0K+`EoC8v;%l5}Nz`|pV1RbAMKs!rKP*#wN=3?V+M?NVzB}Ds{@E&866I%HD=P!2 zTW&k$R|TNZ3|Fh>RTQcWDqnFcTv}htJ9*ASRSOkCzE()m{jg=P|bPvBKTRJOwMXBj-gc10(Pk^mp{U zM#9GKULb^$J)a0MyG~|bj7`pelYGm^UD16$t!59si06)~B@^jYJ0+;@teLzrvS@ax z=eAWu53b-u_RHZWIO(=#Re=xd*)6cGI%F&Cz&9@hP|q=I#q(+7;k4{`6OEz3yqV0YI&w?r6RGDe+qkMmIxd?kPf4;m zf`!x&Q`$I)FFGL(De0{=UTkn|PFUeCh~aV!$o1Fv;&~V~>3vnlBgGctGy8YvTBb^P z-eTt!-KU(W7}qSR(I`6C<>;HwN<>SwagwxEJ*T91YkD4a%cFI*q#g5o^Ofq4dun%Jy?g&3;Y+%)Pqc}z5@CYY+>cr0mdnknYI+%id`)9VVrXFsZ zkMc-0v#o!+Wd|`d^1l^a6ZJUCoQpY(v0;^h+D!$BQXstea`>-c`Q{tjv#5%J5$>sa zAzchFFQOy`jW>^!Uw3okoFO*I@g_@wx$c(M_fsQsOKwdi6IZW zi(;!r)&7y?^wgTOyl}|eAH7qpl~>rOxJzTo&Y}26<9$K<7@RshS3;){{kfLa89*z> zFkHv&)|`Yx2@D7Wb;C*lkT9gkC&q=vYyZ)v^pUcz(>5LT$5RC7s6T{*31^&eEsQeq zxtM)s2X&dvM=L~i_=lIjOmd#E`|*k^X!2AIm{>C~CyN&p--wObIYK`ee!-3M$M!%| z+u;#}U7EBMqsImkPKm@wcWgNT<7DhU%qzmdb+x0m8Y5~uV{1aYe}u?A>uTcqaNjpK zF6h#XaxwIN7(5HVNqU8Fw&R;%f<8gaFoY!n>{r2Eb?C9ipvCwU98HcOhoQQ~Y?O0Q z&j=ieg{gsM9_V%uG1@@@1omTf4d3L8=`bb=B@Udx6M;aaOU_~SA&#`K@{ zSQpP@T0Flaht~_0!Foaqc2R4Hg9a4~S9v6p7j+h1>+kh|Vs`YR)-WP0M9d(6>|&xN zWUwd@E+%Ro+5S5_p`SV0d+g*z@s}Rj4I(=guX`JW3jJftR1+I2&cTa1Sg`pM7t<~!39Sj&6_L zqQ1uQRdrenASsQVFNIvRyXZud*D(7rr0LY}XB)1x=B+ER$D{#z=>B~ZjGP1D-lCgd z@%f4xOu&#gugP6KqyMeIJ8cp3QZ81fh}oCl z(k=Xw`f)Ds%k_H}v3ZT+I^XS{`a^Pzs!q>$``Hi{0%%%F9*KQ1e!%B~jU6H41EIb3 zQ;*2B6s6ZZ-hLyu^5qyuA8r*ZdWIWD3$Cs^+|n}Uft8R5dp&R@F)gg=Mi~H>kDF>l z&{5lt$s&Ocm%Eq?W@Jx9V0=^c`-!2nlN9 zm87ul^;#R%lL=sAKm%jq2cWs|oCJtlq75o@@>N+=yt;D&0mxg_JIo0#2dz3roiBCu zNG{MG#_b6y3Sr3es*%B|Vy=!iU3tLdOX-T%_+K-#>((6|k&VJ{Nrk|*z#fy_K#Hwt zGfAn6ZKzcnB6j3XL4M6DX^IBM?%24x$)E+BYE8S*>s$m|(;nurjQKD#QK#){qs47l#@QN*| zb#WXOL3H1NIB-Qjzw+s8wi=xkCioI-F(b}+LF4PuWP~(38>%-tNj!VoRQnRLw{6w& z4c2xv5V>s9eph&$!R|78Y4a26p{lvJY}(xS%M*h|^&>rVc#}3uA0+QC6z&FOkE3D} z8_HoobakFR5k8|6Q}$GKkeT9J5cP2@x92gmW-e}Q$uBfvDe~L@Fl*ByZjgcBk+wu` z9p^7!vZHM}^+*&o=8B19ZOIw=5C4}xJKiP=3cqbW!P8L@{!$qD3?u6d9%9 zrRy}X>;@N=`nI&X5p?yD;krh9$;)EA+7mepOS&gF^ zJ8NOW76h@8Jq|k+A(WilAH-LErOK9K93HWOg}fX{q$7T%JO-p$tE%U|t-c|ysVGm* zSX+-a*k)n$C0t1Oxv5S@28y;FU$*0t?F5F6xZ@I^K~3)radO*Uba#dg`C*JWbSy*y zQ%yp(GKG(#N8-W_*+DGzv>$RhK08XX z(`RbK9C9>r_X`rag_M}tXYemnp)tH@?UJ}RFe9bGr9?0|o7vGzaXJy}M|oh5sc;mV zisO20VBb4&NjxLbVemIOc}bM}albNAoVp|;Plmx5fJJeNZ^4FIaVZh~z{qtpc6!bs zsIaO)3ZWbMR^#Qy03o<)E{SN0qhrLAWWs8r{}g-N>I8(D`75iB&d7gT^b1>4;Nrdn zY!xkf<=`0^fWK)~%k#d-b9x>`cW?PiI_~|L;QpFEL}$#+W!HdBdcKM-O-K*xwp))K z7%dkBhvkfyfu586Ye+apK;4o`GPmO$4;%@D(hE{k@#?x9xYTKZ&Pb>@6OAr&T+POu z;4tM<;&q!-J@>MmS3M-0xU}l$t)Jn}qn|zOhAotv{{q?3zo6hR7kcGNEdLQ!t`J@YG1+5@a-!NBI47RGoT$-P0W9n(6=Y`{EQ>~bAXiL5dg@`6po17-UQTp4LS7%MQUP&F z=(<`ET(4CVSm0#jhk30AkV)RvV|D7;1uNiKFMyOncj`J^8JzN>uo0>AMt#HNzduYPLXYBD;(X2&&8zR%2&VpzRd(MzaHX1u95+_&zuX?NCn|>;pU?vx-J;u zd(kUt3hu}J`vol7dn~Bt*0Re>Y*8RHZY=g7U|`M79c)Q&zM0eYvfe=O$ISAI-qIq% zohM+F3IrsIuS4`1gy98SSIrjAcUJ#~K5;_|cDSi#en8mvWo`O$FUBCwcJzzl-4;EX z?e1ln1%yB-sgYQ~dm3xT3VNu{P)qvI_5(?eB=e;`QrB4`(1XAjn?-x9=bo>>?6E_^ zYPgiQC*@_j^-Dx6(U?`mK^HUc;%koX=P|*4%Wy)m7XpYgEpBu<9dcyfAZcVSk&Ek( zj^@(3$DE?ijzt}|UPLK5U3!wmBcnT^HreS{>|`#H_aEWEnoES(;OdifE@ZhheXt*i zs2OgB+V$L>u*#ecN?GMp>CKfWo@eA`cSwg(ZPx@Yy*RvEZBAz>cRRn&)~Rmh77Biz z*GF9p5TLc_1hFkcY7v6mkotQ_<9x$fRg8t}Xrb3~Q5grb67(iGZd-K6I-@1YlqB!! z(o9vp+Wne!>zVzV3tUuefoUZ;D(9T$Ad_{ocRD2e8^Z>_6fJNOm>?e+_c(#edf5)9 zVsIpsNVlqKq3F&f985EM^Z4i79cT6CsaCnC%;}BAfSrrY$M+h*%}3zfGSu#Cj9i+b zrG=PRSSVM_EDAu@kemn}1c08KwxmaxAX|Kjp_v-B@BOFBeTxcbWUR=oq@C*tS`!Eq*5fDc{)EVVT&R6Z|hdRxMQ3EGg^05yS zt#18ym*O4&FcB#Cr2#vkcG-uxZIY+%Ls7+`r>(=&K=22)|ID%v%VJa)y&-iW`}l|2 z*E}85)_?VwAeH(>LL5F8%7+hiB72-Gu8&r&?*8@vy2roOs5EoAGoRCHg&3ke^bW!p zUIHeoKDxT(J{&0Al1VUc{vD#v~mbO;b8T(Z3fY-%OF5c7~zXFL$|EY*EB zr8oQs2Uu*=AEqtuJ5@HLr^`(?8^~V3wrWn3UrFqXdG&|kcyb}Q7vF*iQJi{7F>pUn z7@I&dzVM-zHWqZ0;%~?Xw-}Jy!Q(-BxXC~V_ttpF=q2f_m(?=?wrnMuaSO6l{fxu2 zt;J(gqpe3{nIqq_4YkPiaBy#GU>|&8+xoS`XLsl2q#%8_U4iN=aNAR*K}=TdVRYOx zIcWQ8;7*O4&yNH&ch(Nnbb{)SWBlz%GN7V8XxEYBw)HrTw~GkQkbO@5oU{zW$*J zh^;-CO!s>2zX82PF-FR;J*V0eIeSi^OhB?5E-|iX7gZAV<<|s%u8h}y)*eJtrB-=N zL#x{pVRN^@s9Db(<0Y$&79i|4)^*VjOklkpNtkI(!o|gr-!527pgQbP;ayzRsvj^= z?10&22h6E{NZ9g$Fc$KJCn`@FE&T%4rCx3zPwy5g_&B;fffb?~S4R_w6)%H8)r}i@ z9nvVWwtiH3?IBwHE zK4UJr@@Pi$?TLaoMH1WUb7J?RoUAi;Kb{-#Nm}^*ykUyB7+}v%Z2dz@0P`SF=)fKr zonA~6=XS}KH7yxd%q#JG2WBp|8W3G<{5xBV89~B2w1sLtUi(JTArZJCCdKz$79^|A zHls&8&_@Ps`}aK#+fIBKSSz+%49Cio;OC!fn)U*E@lI(JG226p^XvaKaohF-&@k)H z9t9v2e|}@KgP4$9`(=Bq{<$Hz#{Mu$?s~)`L1z!`+4=|UGy@{v3eGcLP$rJrbIlA( z#9jnOtU8Z>U>%7@vW81`q(fwZCzS^^JQ^h(ZFa2Vn`FJx!1d*L{QULr>_q(h31-bF zI}$0P^a(BJsep#Bjz`1GqHZ&@P|+cZPx?Wt?9ehyUOlcNM6%M6NFMf^>91CY@0~tv zHTpdxzuUOgcBK6t)3~~hw4-zo&?Ma&>(y6VGpu=&@XFw~f9cEITGV-pJ%o8?K)m4h zk6hr$#$|a`OYzrmt&0-k8(Wup%UmABX<^+eLw*9IsX>D`HyE7BW1yZwtH*`{6~84V zJWB!fE^Bu@5P`?RnN=^udaiI?-1^6HmuJofDuW})yI3`I9ojs}-Q))q z(%N}-(ZIL1dognNF8P(+*YsWQvxV{mzix|x5gtqSK+iqnDCU;*jH&IEEk`GA?a9&0 z46JnMd}J9aXe}G{sygEcbY zn3~6elcIMV#&g>So^0le?F zbp@U*a;J#yye{{wH|c+L;1ptZ;7kXts3z%qBi|c}GDPqke8Lq9c1gy#YBHd7J)X*I zF{cB7P2A~fI`BNp!kP(4Xk@;!*+8rT+kf%{&IQU1Z8jfG@G`J_(H}Vk(6ruHrAyp5 z7NXz`4o{P|n3uVbJBzbbDXi23RdMqn&X*;%{qA>9+j1EX{4Z=pzi<6_4!h`hoIDw8 z(bySx#F*t0l4NYCL1Sx!Z3>Wki=nj3YX?c3FvlMTWU&StxTUZvZ8zQ(_J~_ud^bdp znB)QCp*r<+nd}YwF{qc;IbWgCeiT2tapG{dAfw?CHxdg=7GG3KyS0i&;U2M98psx$9vAUkGHO&+JMf92XIC2|sd02jIsOc2or! zEFLGou`7ufQ;t!;o*lmuhhQ%*KUXwO1#bC6E<~`C0p|2)b}GPhB$w?h-jJxkCszaz zR7_T>61U7|T~%F4v`zGVV&7G(8hqlg)dT=+^`VMd^|SRevesP*?iZPI0#UD;M4@&E zfTk;HN5~PM**CC^Z9ajrZqQb^(98ZE~#Si2X`m(DfX{g}%CMkt3E zV+ysj7!y!H73ETl2l0N5_?2!Sv=qG;(uCP^w7u!Md{+Ygzs~)&t(FmS8JH8`T8!^c z*nWR&>uNVd(s3j$0rM)C%lPuz52MB)-i-aWuQ%8a7yLX5^u(02i5vuap7U(Y9>+-B+wd^J;BWx@_T)-BP2T;& zp2qK`TX&hKXBrSWFR1ifMLDg-7criWt7cczI>+UdYuS;jI zs;K!AmiLucW8?k5C6#4sU$v^MXrOm3NYczBiAJn|WImtF^ucq1f(I-}?YRxJ4 zr`4+iKKy_+UDde`W5bfgkW8@I60B9tZ%tY*dV!(9E<7LjrwELl(cWW=a77mSK z*EylWEa{+rV<%NO(qCkswo`gzzj2vKc1^2^!yjWc*EFcu883*q@>(LD*D9^*T3Hm4 zl*7OH_AjjZS|U)`3XVWMXT+DW+H0aEBx|T*;X(E8>!6p+qE@dlCIJXul!)M)gfLOp z)ArpG`1iPm3mX=_rpz@^Q|_Af5|{J12*8i#{57%NH{HUiQ%Dcpv@T&a>28Ck+REy(z3hhGlFxtljbLv`bN36QCmd2)I3T`3mp3yA2 z6%7Tmm@v;@dXkybvfM4)3@7szazm4n7sI=aAH3F=nQjTO3#R2kIun za0zHj@#ZnvaU6=T&=?N!D}vM7+|bc%t5FrLD7Fb}8gkuxwjOg40h2ZYsT96rQ%fbq zBTCH7wy;E+01q02C!4SBs#S)GAe0vi42C%=3+$oBj9a4ZM@6y^qe>S_Y#$!({6Iy( zX6twy^*q3f9jd3wSKs}2_9S{Ojo!7Vs>DRFpq{C~y&vqk#^gRXeyG0C>ht@R>-W&u zSZ7+t7m#$@k)nY?EkRBYqJ`<@ zR&pCV)tM;xHe7QifEmIt+>9#pc6~fW{isy6%elN&MZY&WWiedoOtg;lg~*IJo2y>7 zHD|S*i99>nt*%q#>0)1^pmRa;J#C9Mjucg)RmtZ$!wj0MgG2v>8o8k&EfC*`;^Vjw_K{k-X$`T!^pZ8)!fx$xym-D_I}{95i_NY;e4>d! z(tL{?;aKQCRk>W3pfdGARd?Hat~Wn9ZXm?eoLyy?vX9dGy)xoLFgVJO#9sFG_uz=| zQKCHz@-WX-0rE){o5xjpe=5;TQeJ|Cq8HOxkrY=g3obQpG| zgnt!LpP)~i7#74wfDPFStp@oMoa4@X;+ zW3V&qJ|Okcyd9w$yJC|N&&X2=)qRwd<-rkuL>S>uD|1D-R4*V%wNH483o zv96CqwY{#sbvC3srpP^(3(;|DwB-C^Q5}pDRck0}^$}^c6d+K+`kqBJ=yJ1Lqb?Yj z`Y7!upZ*7%{wR)n&wb+?_g6L}nAs~8HY=E(K+ERzbLDB2b}?K83&iA$ez7*U`+A-j zBACm9D&1sjhnj!z6le%9et!*)`sl_sFn$)cvnq4qPguw0Xopz_E3#* zP2Tz`cQ8Y?6-Xk9;^2+jeN_O<)L>A+61P@{~$^`%I;p(Duq%YkDBO)RJ~XS)e4=C^B*@`U7&^uuhpv-K|LU5YYG5Z zGjL^#zF4k*W34f~{_AXJFa)`e6Y*{T>0Qf5z0o48w6lzp+gLjVc73e*P8NGpZjs|# zOt-pC?mNG;f`C=N@`qOZSl8n*?7>+tLqc_=7ZqgbV{KIBJ1;SbVjx?8Ibu_SI4rB` z>A-2IXA-~HfQiSUJCMwNEK-C;Ur(g?E1UZ`(djsmX!Z0yf%*izAB}|F4{-e+?{p!i zf;|jW;2{IN+t1^;^+4?~0sgTqMN!*8!R@=2RYXQBqR<9NR`*?774m1fY`|%*MJt4C z?w-G{Rzy_86;{vLB%9u*8fEBGxg%}I?4W81>wujyUb77Q7q%N6rT{y8<+2}>+3YXu zVU&&Fxnlcz=^1pv^P?CnRzJ!0=s=y~Zsh#(<3NYd3U(MU;Fj?}58OJjR6lP|)!Qv% zPbG<^T5%-w*>in*B01O#b(QS?{q|{}h}H(tRvh^xEw^lM){cInWl$hn6xVJJ_hV79 zkwgFZCvkBDypp@S)A)Q6G91Ug=*_ZOCnA;rqg$0$M;O(3u+Z=EPia*lOH`6b_9oF2P{{?}h_ z|EU!|Nqg3%4)iikZN-?#7FP98G~8PNMf7J@icipd!^WfDd3f$AJ(Gs(Z)`dcZE)Gh zj0Tj%FTb_fGH4d`TnrUD;256&MC8c#L{{W%fZY>bS^q7}TnHL}Kf1>LVIkmj;KQ^S zJ#f{=sN;c3I0lhd1^}c&nD&p@vdYjP5|=L^+=U2N1K~3+eOrr}!&`omy?8z9-K5P% z)VoN^FxynUUbZq1VZU7-^cd&EPIP>0>!%)8({?{e8_RpI+g_C9a9by4lRb=O+>_jk zT-v@Lqx{29?2#%ncdK}Qt=mC-3}jrl?)ZXy9Jp%n+F=0m;I15Le3mCM$Dqho{xm=% zD}EM}Hpiy$8+-n8@{?5G7r_hF9cA9FD^cA9O!tv49dE<%k}jJ*+9guza2E1dSEAIs zr#C*{B?81;g&priR~rAhb~~vCZ5~R?PIZN?s>dyq3z`QZ%DS`^+yf(gD=O3s?Jp;1 zWx&4XZpK!nUb7tHl4|wy$#<>UrUV{QtW{OLgriUWVyKLqM9q?M>p%0MIs;t~AYffxvBS8v)@?fJ?$UNN%%pJ$ z7S#Cj__Lx0@dvNkP*-@AcuX5?ido=aLhwJfQa~V9pnunKVd?f0O!k!ClGguMHeH?` z48dl~n7Ch}f8<$Jjo?;uQOs&{!MBFfp3Fd2RG5vq@@rKebL+P#_5z}-Akrza zt|4swln(I_iqU$UtWC}1nKQN>pW~Mtk+u^!UrYiV+l`K4RM1SL)zjiD-+DX&&%>zF zi^j{9@B4yqoh9J`-5#mWiM~PHVFT@(6GRey+@%fPtuM%;n4+)SVSF9m^1ret0SeyS z?~_jj3X_07d!~xngd5n{^8lQ`+;rJq#3)DcxUo;u>VlsN@$ivPMWWnhJR6rC{Zu51 zW!R7%`&6WbJ{I%GJH7^vs~j&k4(t=3ifj?iv8+#inh3gY>z9*uO3%(ITc+pi9GZ)Y z=q_nAVL&RQ2M-@Ha2p91F*85adhbZV^{o2S^2zNw>N^lbjo`7)er2@*j|*!5by5Y^ z1^PG)6FgckttnValW=8sB$A%BL?<3C5t3-tw2&5HOcvca)h4#24>}*UB|fvx=rm$M zSXXq~KlJrqS$B-6oHrWig_x%1fe};(#h4C+_cl~6X>Y&WZl!qZaRW~qkGC76J&5L~ zq7P(Xn7RarF^=Pza)GNIXFpB8mA?Ha(*<}0UrsjnY1&cGVqq3gucq*3_o6ZW-rNrb z6?4Nowx9x;(JiVVrKEnK_VnNXg5hVYtp$vK zNpjissMiF6kSifDA^m>Le*XT$&#sz z($NvJj-eN*`Yh3#doHkqJbhlZaBjVP&T0hPWkX7*R2!(Bztqv(p0~Pa{O75$=TEFx zW6y}vq#&96RkGh+`f?j z5ize*P+NdTAU1AAK_jR(ZbQ)`IWt%(#$7;8<|zxtqtp+{yEdiLttDbxJ99a$fpOq? z_)PbunTe5K;ABG+JsZu_Mat%)I{ZhExYPO1f_-;oh4Y?&$L@7cjbfhf2MD49AQW2A zo0}=o|Kevaub*Er9iF-Jfe>&}C&GGLit%t$PKttM%}5KEkCgzNBF);W<|iW{>AJ0H zewa|CG*PWB`IK+O8zMC3cT=6^Mw5ulwqx+_)6LxN=+$*e9{q8*+=Jf$_VmKnpyI;- zibaWQ9`wfWJ&x?7ShqM6x?UTC6mEPFk|o5dID>HcP$O$1*6I`ee7FDWzqh9`MSvqB z9p~qoJqwWRN!MB2cr?ThBg8dI-WV?g%Z&u>^F*3kzrJlpK2O9)=+j<1`gvUcM#)ii zOa=Jhb7}lMQ7rdn;$|U0x4OWTnZ9oIpT~kBrE=mZeql|5 z;=D#n0Dt9`SFBa&j36a1x|vJBbdeP1ovO&i1S!qu=@6MjudZKlQi45Iuj0cB(YT06 zAG6NS#mbT;$9Wpk1gV(YiyykBz!$R{$WI`XG4Ryq-~8_`ZDWOcYQldTlyW*c@N4O? z&FB;OO%5^5s?7?>Skkgb&4J`AG_RVO8)S1H*}do+Lk;*+HAD4CLWP*>YXi2Z@xESZ z4+M+r>VJl6msFk2w!xO;)8-VBZ6&&K$N9Tlq*p%|^Ahqs2E7)b-0S-nwjM~1^iLwI z+E9ldRM}?y-a)Kn^bmn;0g1Qm1V|B^^YgpHC%en-Wlspen~L9k7|Q_G&Z9u38)HD@ z8xZhTZIi*qfu3D;)|SRSRW-A^;nahU;EdYMQc%4Swo|>meF?kf390R zII^ulV(D7G+#cOqr^@`m^=rCyMOmH;TDN*l#PxG4#ArMGzHh~D(H_CvCBhj}k4c?o zrI@HTcY!4VY|H_(JEe*F(Z924!5ft??2M{x{d&-516|^5!`_eBoSIa8Zu5cMO-|c; z`ZeEb_w_58xTkDESm6E0hO!tK6OfjJ|3NgqNI<5gzz?S0x5(uvf*H4!XhbAbcFa{( z!~&BG`7K-1D+y>W?V5s^_+wB(lvTL}% z6OzI`b-`Axw?QpR6+sD)R$urTAPId@lc^03bc?yOQZt=Okc7r==rEXsc&#*l=>k(AtHidn_ zN4-Z2WDHk*7U7s3?GeeH^_=0P3F=97(c4Ra2t9s^zsQN6M6ulKChcSZbrcepgoq|! z@IA;wZkZYmj7(WYPomSY{&6ffDtpS8&JlN@v?@W(uH%GQtv2rwAg?BVE^Pgi)e16# zQ6yPoR@W0>=H*^euMXh+A@8QZBLxIPDWNiiKCn`)(bSoOi4U(V*R4wpo6rg3R_ps$w-Dq?Ms6-WBPhp`Gx{5I_IoHM+Woi+u6ix06!x3t zJg%u|fFeAco=pWV!-F;*!#{(pen>x9s#{4e-v=>$ER6tJ)RJ^;YydOBKi8 z)#&QRl zjZUO6FMIKhK*i0c?P1J!vupkL%gFy6A>q0`in75Wn7IRiC6onrqV_myavOB*)M3oA zi)5eZnR9H;p2qK}-t1W!P0vt7+B?BZrXKh(Xn0`61?d)x)_TQ9x&rS=uSjqbvqhvQ ze(yQjtFz04UD&bSwDK@!^VAN5V&| z2S3OJSNFS$C@MhK-1Y*li-3nq47w_CK>0^Xyxv6fH`$+R0(BTNs|^r%yP<)tQ;GXs zTD?kKN5d)I6tECaL38S90BOLjmV8^QwZ;UHHI~R?X_v}HCu7LM@@kAbAz!95&}1hX zg3vhjdecJr?2oKl^|+)%%LM^=<<(c;wPFDC%hOOilXb&N0V`#o#Vl6}$z&w*~y`zk>SW@h6B^^q5^~wmnsE zc1Dt`oTi2NRuO|t10wPp1A3&wWE<-e?gPCi74=ul#pA#}o!fRO6u+M3%RLEn5&qfJ zsO3yx$)2fXYZ)Q=b7A#G*Q@p-zUHKd*0w%T$;*hFeTe`Y@{KKav@cN`QB*q*tjm}VmW30~HLvuPI3r&)UyeQA026KAtegqtGX zyBTPefFpI}4)pEcato?fLqzJxG6P`*6iIMf8$i#xEVD1|2!7;YU{$XYT(}4}rsoNl z=ClNa#)uKsDtwt)7~-5jWF4sfM=T#hAz{at)+rcYeD*`DEg5bue{W;cDRP+cC zh4Oet8k4z-6*V>Q*1c{+g4c^3BC-551sxyRxM~b@nVgCqn;g`%FhvVP8Zq8ygdBwD zOJcT9`y)BSZ(%#-r^B28Ri6Gsn-4%^TN^}TQCFAG*!>tE`yTZdVk#zGlE~C7Mk6d_ zpsOf^B_K%>l%+AZH_H}tS!1SK?8pL@TiE#AAY}y$VpX-Xdv322r z9f8Z`H~JFwW3S{f&c%vu3VyH0N!-?RHVNPzJ%91V=RdOD`2F0K^R_35Y+5dYdl+BA zE7z>}DY77iQSXzXbWuFaGF zkY-ZT+jPaA3kg|a*bDvM(qUEzcG5))U%5So9VuwtBR%c8n-mF!M0!~p*WhDONL+)y z6uRWN3Z19669utcNh)9`qZwYub}F8WnCxe&T^YC48<9-APWdHJnSx;QsI zt8rIWzG`!-NF-=}pDz!z9N6k!K`KTrx^9x@fvG(4-tX+bAWW1>wh#mHj46xp+`%$^ z5GahfJ@ryxcZP_iXxdec+s&x02+}CYdRLH_3V zJ;S&R*tYuI`pF2_<()vF;0{Q;f}Qf9p~j0Mhzrkzc4u7RmE6jI`et z>_sd-c9RJ>0_#t7)s}Lb_owxKd(`LpXn&%K(v9nOOm(szE?&p`$6LIoYM7RF&Dg*pgXtYXgpky|)>VA=OFB-|B)dV;) zdtpkZB2GDuT&s(U3W-p!H@W+ZpXrnWUk&|POa^&ku4d|(Ad zSm#Ul7-CwNs$C!AXt8dAbJxXWQc+MLnZ2NlVzlFKh@I(Be>&B#_gbl6BnN@XebdHc zdYZ+aPsMBht?Q#hIKm1SET&Nu2&VfV!&lGtCsIv3Arb!4>Ya-bT_L)Az)5cR>vm6V z`wJ5;-MAkg4D3?G9$SbR=Ie3V#k@_F*c9NA9z<>29R0Y(j58{v3qB49%j%ZE&$c3b zTRk4}Y5--gA?i`Qq;MMRf#)@(DS8|!5lcF%bo30}w(Y3W$H^AI@BK9!_O3dZ>?z`Y zt^&jAVs#k5d|lSz=c3EHXqXAB=yqM!FR@67A*LE)h#`g;Vu&Gz7-EPa=Jz_!`&=B1 z$^G)jl{T+XMls}_U2{wAGZHo^QV`46* z7MZpY+ohi!wH>x6&}u`bf`?snS%#W(&Us%G~=S@m^dPT$1Yb*^Q{%lq7G~n~oiN6U;dEJJXDz{_I}8X2_-78h%c% zrqWk7iR2jT+}?D~+|mk0u2)AE)fVi?{D8EPk_Ejn#~u}M0%*r9)0!4oS>@HHTqiSr4dM%OkDrc2L!( z-_N^{^H3}grj^d<9ahD@URLgiuyeV?XD^UO?r4mdy^6^8m>N#M-wR)ov*T)*X@Xjt zir+c*+zI_gLWaOnzcH5N3`U(TToq@u5G}2Zc2)HO(piJ?9%u4dYf?-%uShV<+Qqk`&msE}Bl1W#(oiY1{MDQT3|7qJ)huaz( zFm;(SoDtDhUm29j9EUwIB66D`r{_6TyUgnKkJ`Sd*$P$9OBf|}geg4`Bl>$+IUkIL zY~#^ji4R3t8@?NMxZ0>06R-e>nI5f6@|($+p~7LErn<`UhWO~q#`q7&Zz z6Y=R;*g^iJx_UGjd7ct@XS7g5pNWpwu;gc>V^?Mp;321*JxE7CXy;V9$8$xWk1p0o z_jDmf)949!E~*~Rnk4HI{`G`0xvX__t1#D?;#;mpa`wNfM$9jYx#$~?(3i=5<*x^# zt0B~G#A3K)sMJl8puV&r+?f(8gZ($FFP%la$56=WHyHtPHYa}nmb+$dU!v;jTWwxn zI+f(W@{K;fPv0D=Kh-?&p=Yj6C+W1)MkpY$n zc70!EkZWgYG+8-r%c@PL|8>ZgS3jI(T+ydzZ4k8@PUF@fq>&br5l?sFgx(JTrq=g++&+D+=aBuoU(E+BqTmy$yxz#hg zaG}}}b@SPNBIzEDF<|6|@Q1=<(a(SRLu$vPI3?Kghha)%Dh$BKsV2`~g`IUWRx`!= zIu!$H!sO~qb!9!@j7v|VqHLh`!5fc~^ju)wfGZZZ&(U?;79x;1({w>&ETP5nO?ENh za#TI?uU!gs6hYx~l&(gW;!$F+2=9fpe`8kzIk#KA7Ranwzv>aY9t8*s>_$w(@nmOe z{o0)*_~afVa&*5gV^}r0LC)?^2L^(i+iOn09z0CZFm#^Ve>(Ac$(HIv_8@86( zwYs|@<|qB9Ww#VIe$0N=hH^oFI@j=p*h5D@_%uLh>LUY+|6z-&wMcnZ8^2qy?OPJa z6!K#bXG{Cj@vUoJZ_D&^`rScW-mlXSDd(iuihxT}S^`N&87>kYgSsg`u^{b49Nny* zpf)Sjq847WM>fF+gLcBjX3lDS;u?~^_ z0)paPb-*4BH?Qti<7k^Co9e(NiZ}*UVt&^ad=a=dMxz*!Be-pgVonzpl?aX|;tv=( zxQXSI?b7UcJfl7RRhk<6?j*#WRAFZML$*(FKii%5$$pK2RV7D|Ds^)o9n_zv4^H6MV0ChDGDsy#Q(-abw z-LC6rKM&}QSP!l?fB3jhZ9tFGJ&SGDK$U6h#8i0pfX<%pA&=$H?Fr*Ipu_82I|gHA zn>(Nb_VI_KcI8p8+ip5sAl_`b&Zx8(4lGb2pkAG1IX&>w`@@WA*u(* zv^M&aZi>5TNkHZ1@(^COG$8rQj@WpaMuXbTmPbvp$t8K_ifW&ogC2~rVL;DxgGa1r ziZa;SS0EmnqbVeZ+bvm(>Y|t8qg`EiWQR@qJI@}cq(gtl&D%C>3@JG0lx0=Wg*i!X zK*xa?0=#-oA?0u4Ul>4WQWQkkU&dO|heT|nf>P)rXHZO+=UAGE_T|o=Zp;;S4Wuo! z?Qz?q`XpPh$pD)5oL&23eplw)Eq{MBZ&0v}1M%suvrd2t+Ca_@smtjjJY$EW`yz(G zM+B`Nt{+e{3HIjI)qEMaWbb$z3q zQSFRhR(3W3afQOa&kX2&m9OR-2XA4b90ymy;wvE>xMF*V!5*&`A;;&&lg-jT6gwZa!^SvVd~g%*&^v@~%`E<^bQko-KBQ!Ea42}!7D4#at7>}rX6>;s>3NFTCgjnEv zN!0gbqL`)xe%NUy1Z|n1X9n?aX9kw%(@nCyqt;dk2e(+ThN#ukIf4a>H3fRe^Q~De z8X%oo)U^}To=?o$_3i3^P8aF$RJ8;r(;=+z(sSN!vnu*xCD-b#;E#~w*m42H6G}R1 zmsd+3DOVa5HEY9Vry(jaxMZ#J@4T0zstd7D;hNhfNW(;2N+H=N)XRYTT4I7Jyq)bR3u59FLyya`EJOQ~7jh-20H7h_=Zsx{I@L zGB0A|rhJt%cq;$bL|KF_J`Njxu^C~O$s&nASDnOZMO`~D;2yEbS-kqO zH4v&XEW~)kYEk2qOyrUZhKO_9ZI@#J2!&zr?TY5rL+7z0uBufhsgUei;F4T!uE%!h zhfs6P-3>J(CYwhQv%%9zUNP+aDKp20f)k!Kn9d;i;3jPLpkA#Mn5)y~4C=hDb{iq= zeQj>k#Shfx4F*p({ek zf%}P@N!6+o>@Bz0l4$5r+ia;mIOV0tZ1)v|X~~QB5iq&en35LZLv&6eWLUfEgD+GRg8WUw4^wU8!0UH6@se$` zT48ntiDX%oWHvzF=BiVZO^U&Mpk6zHRF8+mCaaxzgzSAnCaD|t2Vw)ndp@9{55ZEAR?}bWgM!fFYV}xs zCoj0Iag>d&6a6`0N7MsRi-`VkRCw_I`qOYMI&a4T%8mw3ClBqTFTnwy!BZDW|uPcGn!Ily9%*)E=G)RfSr%u z?j>{~I!kF1JU$$Lv$67cU2uFZX#r~fmtBrAjE~qARq-np3iYb0G`85aXo_1F+Su!A zFpT5J4Rwh-70=|5PQ^ygY&>g7gy}E0+w7rq2sx0%Nz!k0&p7DmcczsDGxLUY8eNcw z#F$V?0c<72lr0$2(S=Ox!tgvZq-*W3;=o#s0JwFB4D%L68;m`$PP1C!Q*^#CO2|fn zWw9*{G22B!FsAQ!b<6cOOo^7@T5 z23t+_yCsDatPJVuYR4TGXB8WbMqLnc+oH_XuH_!cPhmAmq|G5+GU)|(w`nry#Q=(Z zqumH-1SXGBi0zAUy4{=YR{<$yIC4Yj_Jgu<4&F6l2V)!5wcDYncmyAY!vSF(ceNdf zkHN9>6LwU!NXs>3$71-r>Ml7r9su4N?6j$vHk6f$%6~#bBh{6p+b2~yQ@63rPU-h& zdtrCaRR8V*P!0c=i)|(*21i9yOJjC!==A^NT54j|daOMU95Ug|vD18ZUigkCbG!>0 z;4tBiE(YlG^uOAr_}#+PUDiax@c4pXQG-WYiPaL513P@JI{!D*%-eO19vKOD5+Peu z`S*)|XI4mO3r8}WRnTfTqsnt@h1mt2K_u~b!xqwVG~iB$I-l(DHn$L5p%ZHImVeLY zMH@_iICq6A8CxM)HL9uwDkTjQ+dWfAo4S+eYsh+Q3h7`Ii>PFaRL%K$@(8I~#@%ST zZAn3=mo4sW?$Y>7375#0E+C|SB(GN?c3}#mlf<(t0w4EKNFn|jqJA5Gc1=;py@1~? zk6Uvg-3-kLGdoLou)F_JOZ4Y-9ER@gRcu^?`3Ku1a5Mi1W*yNz>@j3(I!&83Ea)45 z_Egu?d|EDs*gAaz_usr$Wcu*16{7$K9T8YXOOSz5!^#S}#YR;%(>i9`qCY<`>@Mff zCt^MEvqVZl@aY(Bme+GnRL8=JJ2H{PlQ9_N>(0gOi`G7XWcm`lCkr=t^{sXwu;W(a zjyf1$o%te9@*$1Y_bDe-9dnD|yr62Iy_9aDqk&Qr`4x`EU{8& zQNK+_inGTQ`PnOH$P-{CQy2$wSf@!X9cI#`FNgHr$|-qDiyZ>Il;jW z)XoPSLb7ym7hkCE<4!1kpZAM__PQ-A?NZDW#>B}J;<73uHX>kyn_*rF{Oe(jx(ZGh zT~%`kSXf65l|&(!J6+e7tHwa6jIj|#2` zXtiAcg9Wgh(1y|zG=@|;Q!{gx@FW9nW) zOm>ZcXQW%_sKz(;P;OajwF24xTU(-b+;W75wWa#|$_=(m;7d?({+KNn>cxVqD^|of zX8y$XZ4gfFcu823fI0UG63v0_|M<;stR;rz`YDc8y9V0>Bflw1R=PY0VQDcP=gjmg z&I5BlacOTBSZ9aupUFl8_YVP7PM;gYXFo59+f>KZ0iXRJ^) z*B`h2A8cDxXy96$h~K?5+@<-`iT&(=?GZ+ea&DQ7`EXN4+b0Cb)NA|I3f}ctQg9tm zt4uv+1)Rj9ZoZ`8bo0ur9Z~~NT;=?g9gZrlam5M6srw_bl9e&c)i`!Xg=Mi!l+k0t z1HTUD<#@E6c|WB-y%Qj|^og{8S^QF8SF(~@ z7HwC#%fj-&iY%K<%?kB)=ARHBH|Y2Dzq1FM^t+fi!zV;J5U48|Lbj;)?RQ(d@bkcJ zo75n)o{VE1F*wlS>9ox%s&{{kZ1nCyv$t4IsAk}Qhmw<+wHpy7~{#tgQQ5j5wvJLFwiez{SmDy@slLEB56{DKRJ}d)t-pfYwGP}%wo|K;@YYB!k`yk>5RUJoa;B{Sv6V*2{a>2 zh-go;XZ?nU!YWk1T@P#*^gHw9_Fvn@m_OuH$(G_$bmQ_S>_RmFqRV%agU7DKk|!^C z#HaBIE#^t>uf-hW^!ru>{J8$aX_vy(AszQL_w7Uxz|Z?eOda&)xLGM3uVl|TtCS8% z3EBGWQreDAcR|~rNi2mP%u~m`>(L(lRkFFN<}w?yHlef-G>-&e^GiB?XcBnxZ9yp= zCk!q#s+8G?@U=z&0G5b)TU63<-rx+i)ke+L-95HMH6nkzCc5mC_9YZ;IS} zxhiJXbxX;-LWRl(-=ISGK~7<1n$#c41*blnh2yVktwqQu5defuyI^>>B~GDWutXBl zbO=Tp$CyWQbKvD3jamKfb1`!+M%GhuN6Eat(EGUQV^Lr3iH>-*QqmQe;(IyyF{(ef zxm1diY1?9I(cyZ6hlyx_G;a$T1UZ#LH?yX#ZN2S@`Pj-mjnKj-OCo=g(;ZY>db;*$ z=^F{)us<3%=E3YhVC@OKh_PGJt1X5#y^0zk5^zkjHG7XX*b#wPg-sr>pg>Q*+i%BI zd-@2P*W)oyt>|5*^c!+($WBD}xE-U*kM5fZl9abo>Y#1F&cp;HhqU+XY)SkfL{H)g zomEkSdiLl{0OV0PVdpf0x}Wdu^FojCT6RIbvA5CB*~OSu)L1Y0gJPFrcid~2W7Mvx zx}rurKb}10Y9NN3gv8)>O`l0M%vF3{ePJbJH)3RU!%1&gPTL+a&mUROS><&2f;Ks3 zv&-p>j<}ikoN_uWYI6oQ!Nr5_St;>xlrt}1p>`ZcJeS&PA5gLWsN>E zkK~}D7pYSMOe-Klt-xiv{OpyrBWL~(lYtwJEfp3S?1nv|;W7d0D-*%kmaFgSchTPq z25FSBqlqRne@u);Q&~^B8%+>X%Gsl4)rNq;giTmy+-QR17l;y(WxzHCAWnlHj*h75 z3(#kq)%iNWWn(~4U#PgrJ1g+(+=@3Jy@#u*6Kh2kdzfCU2uc?=w4?fSW)-2$wy7)L z50{fnsFKIz+9j0M!PEL%zDFgQx-pv!%(0yxvwdZ~W7(U|_t2qH_sJSN5F>$2<|RF- zj(Kh{Ruh!=g2#k892l*}Cg?~s@m++*u&k5KA@J)tnL*pJsDk>HnLHko_b_iMb2O#i zw{QQYoro#nZFEM($8oZpuIyExBPNR9Xw?a87`W10;JE zl&wV6jh0A!Nz0%=uZR7Mg>qv==L9aRZ98jrrMmIlXpjuHb~XB_hn~9@W7*1%7XFB<^$re=fhS3&9U#6nN{C~Ns-HW(Jfa%v`NK{0UR(h;fa5JAB zVBz-IlBm`+Oop(f0TJ6MATCp*!Uz(YC}=M@nk&?XGs9{$sNYO+`UApxAe@PBWZRHB z7|c{6(5A;oJAq7Z%HI<1%N9WyLyZPA7+9^vMih;}rod{Ivwj^Fy#~B1eri~`&6=n2 zZZaHYg_=iiW36)f^3(uaaCw12R|r#9j4oWX2b)`oA>p4uAEWw07>n72ZPOP_JV^^| zB5JnrYH|iEi9&fq5#XFnbE^#76VTUn_KvYHCu1c?xC{5`ci;5Ckf$#eV)NhH0kuGs zn|Gg`gVCZDz40NXmnctPV2HIarN=aCHYfDt@oEwF zyak)8q?7h2f|=^ydCSol#uPgP?sE8wor-7Xs@z6Q6xUBOX-n z+|7xg7+V=3Q|Dn(?SejhA(70ePwshLBU5%sQ_$td=CX#U*A9FG+mB4B}=owxYoF(il5&gu4-==3d6k!G&c>W+lA{|#BRs1ZnCro`jSp$ zv#MkWLj?5-cc|e-KwHac&hD$Ve00=QaOWU@5;bj^2=c~XDlx5%c*#JQ3dyaEbz6*P zwF~Da1oEDLuwD8y{5s6R9#zl$?n#@}&oX@MzG%79y~y?t2b*!6P2x_uSieJ;ABb*? zDXiAP7^9-BbXUeJb|`>Urr#%sVDz)T&yGYx?<(|wkM$#ER^QTT*s!GnQ+A{TyEr(N7YLcWcZ&L9<=f}IOS-tt0gB@s5326#YAF9LyDL6k^9gnAN=}-~J7$vN2 zj-l5LJNt>V(nz|X?!CuyF}LpKnv^to;pw^omnjqjZ34|Zu~|{Wwufyr3a}BVK=p46 zR3J)GGf${mU8C(1Qf=vp-4m5sI)|w=sd-!c(SO-K{oGC*jQtv451C)>Kp^Kj5T_07 zprH9N9%lL@7<}` z2=GA9*ohI57HoG9Z{@UTbhCzwhYQqB#X90HVrOCuR&qJ^Lr(KNlb=>8yG`u8Sr=NF$(`Ahk(@O#Z*%b{x zhsgJ8%p%MqMp!6Qt_jS|&>+{M+dOs;3DJWEX1s z*;~_jtQ+M;n-fK>7w?hH)hF*jVt(dr6;0ZPN`kxnn5J<#{q7I+S-B)P(N zs5__*4{P|IZH}4{J`6OgnmqI*?+ZkRdhxNcyhiP5FN%RN79d5x&nm*v9=fGbojUa&-MgreH`6MRVoa^8Ac_R2O4X?Ds{OA`9PHLV$tNF zx_$)hB`$-p8mmdc0Van9&Rr*uY>fyPue$}_Uv2uMF%+OB)!DI_)wr6G+#eT|PImcJ zz)$2ZahHVEkjLpg8E{E-;PLTR8oO0B|@Wc7kXT(n$fhA{^8fQNB|lD zTsxXhtr8W$5xr+i1SW%Zmn~JdUBi9I&6WkWb!2q!=FF)KwKu|w74boQnSZqgeITgg z&S{E4C0iAcZywd9!EWJzx2Q8LOUl-+5|8rz1m`Uv60)dh9f4CTX7iil_tz8uWZ3}L ziE12U(rA*bsPh`_=FL_Nyt(5^`Dm5u)Oh%24|fiUG8#xW!tuH|bDJOy<6J?Q3U#tk z>c?Ht{gwj06Dr{`hpe>8(bLI1aUEOfXDiTsnrr+kZGT|vk1vN%1sFlv!6<0x?m};I zDDdT|60OM&tKPab>+Oip+yHZv^vg$uCiYSsmtz8gl!hSMc0A^Si=SsO;3E^`^q)`> z%oVrx$x)FjC66cSei~UjrE*LMd5F$vxSu`u*&pp}bVo)a;^~g+6iV*oXDhFpgFRR5jj0i5Rb8#yCs+GE8y z#4g~m>!YXTVf4o&b9gsNYYF=Zp?^cro_w;yY%Cok_ozZEIF?RgrY_%Qv&VE2$qhb) zHN>3w^wQKocda|_3Xw=qHQZ?p-E%1vM*@B`sSA&)R(@=20kipIW9h1G-)P5Gr_=}S zHl-2vk!t|oB^9jlut6t&Z<1*j#S zzJSD(mh4(gJ|$a-KYTr!_KneEWj%DMzCOYE@RGUm24A2T8ig_5kT*J85 z48Qy2a@!Q&etkH7(yVV65O4w!j|N=gF!_Dc9)qqY7mjVZIR=k+*Ae*0F>|vJ_`AQm z_esk}AFj}LYph(rB)-4jxB2m)2$D`d=^(8rV1y^|SYwp{z_XJ_GiRg10%9Ib>$Y*d z6A1X>+Tq+0m1;WOm}+ZZmX~Umup!13Zok!aLxXRVn$OJQ-`KvGvhwtwZGVhqWinAZ z5csw^QHO|BYyHqYc1YE75Y)grELfO9*%AF5&iB~SSlZrhj|UFBX60lcQwK6=<^_+PLcp>ytR&8;9pTd(?5wJK#A)u*nHVWzrC01+ z;1S`a5s8j#*SC5!Qdf-H1=Z&`vq%@C@j8#ObtyiuUm|`?Ao>`T@s)rYRN8;oRgJs_ zSEXw)xA*+5U5_RWZb~W~p#@x-YftJ-@#3=;W<8nC6g=DjIi&TGK_`JC@X55l0g!BQ z=02GY++;}d8DRW;MFzRhZ)Dz-1kUWkwO zWIDDDJSer%3fh2F2DT(VnId21rNRo%i=|l>Wy9>O%CIeeQd88#@mZ z(+2f2{W=c4fq)+uf7qLa921NTRDED?!G38EtjqX=Zi?UDcZYss8d|Z<>c)LOr(egh zUhSdCISjc#VI5&D5uxUvj91?U(@q=~wE~GOK_5{qp8D)1WD|d|(P%!1Sp%!LEy|sm zNO$%`j00=-6i|3q^?PV6xs>fu`wfk)ZEGG`ZXP(5Qx zp>{>xBJqQMxEhedDAW(awHOyp^ehsYj;ga!>U1ZT8-ZxtuZUx|X(JGd(h1}*+*DwF4&fi z^L>jfvrVt+%n??+W?MSjaOC7!QFqu^S*?Ed!DTMd&q`O`mg;965B{=kx(Cn&LBpY= zSAQ9Xj{O?X>x%dTR&c{fBhjG7YwI>ylW} z3dmuE2zP;94MYgkW!GY5P>ex;Mgi&E>_+@X>}$4Nn+xB#a-YrGp3X>0e3j17>?lSY z%x0TY{c`23&D|cS<~5$wdE3*e!6FQ+wC&mjytVGe;vr`iY}er+Km)0Y$%eaKhnINW zrS2_OvppSaXE+w=XQmM=?dZ;Bk!S@21lEXoPn5b;J@)&}x-7u8_2oVM`SPfmOz**T zg#dLA5mOkgy&Cc^VIHdA5CYh1Zx3xf0t0lUJF$$GYOF;A#Q=k4Y8NajOdlp~*vc@>l3?i*>LqNoSe1G-*_rvU8ry33i?(UkboH=$R zR*ayZE}uE778!z~a;uT9CHg-OMko#Sl|etoB{LOGp)GtMLREeRksei5CB@5?P9f_JC9}PU-fQ#qXY$v< zEbh>OCYQz*%cx-GBMpsK69qFktk|L*qUDH^%M{a3?T&=-|J-a#0wV<02%`yFuYR)K zgkpPqo53xkEB5rzkx%o2OT>%`q(x_gOYZyo$%$a>An7y%zG( z%^E_MaP<~dv5;MZG1%NJC2Rl~<{m&qJUiBLEVqd5h{eN7<7X$A$GK5?T za9qkq;2iaZ&d#IZT~R%D9d12KUzhO;hKb1~a52CR-Po?9@)r{o>%QaWus}aT}=azX-X}i}WAHv(ir3CYz zN++p|u27XTW#V6XTEmR`$Z!&@Mt!jDZIKY+@_z+~iQ1^tHNMrBJe8sWc4o7U?#6M` zR%@y1q$9pezdOgrQQ@n@P9ht-eT=OTWLE?6UNi*chSIP#>9iHm1P(|dc&vEox`oy zjc)>bIUl8u`4wM`ezELvtGkN&zy>UZMT|mt++nuhwit=KK~FrTD~O;BSdVqlbeBpA za!4L|^_iiFXALpfWK;)Rf5q6G`-DY((yQ&)vJ7F_e?UJ;?_~!CaUC?b9ST&O{~#*P z;eh9_qU4Q7g!vZi(zxx9#u$5?+Bp^U#kihx)**h!Wrn-L;JhIG-{xhO;giIb(B4^)zF;(}^&&AI`{ z#iw+w{_KOoyj@a#iH9z$Q_rPqS5yEIf^_<7K&|&UWY;u=Oe=BPqH`DAKnmQ@5!q2= zHj$26d-o>0Wg?wNHY+k>n>C?lj9?)ZyH!QbjBvfqRuRDf*fzK6qYfehZgN78sc(PY zW&6d1Ub#Vn7oy+82}<2HKeEzpkGhGC!0>Y3M7p2d9<0*#a~O;}CeppI=w8yH%~xM= zHSEDVCvHk?2D{yj&lU)H(}vz91v%Rg?9Rzb+P z%g?a2tFW%J2Gg5S+o09KFlFawlc*1In2y$@>T%liv6yp@nr$6bP%sA-dt5c03@&ht zHV5jRTL_}!1W{L@w4VDd8oDEQr}b#`M3@?|T)=Ph6xjW$ma=1&Eg$XM>TF0YGF#c? zMg2zDSGH1YF4!tuUEEuyqDBNev5^4ncf5_p+A?f6le6)_wx_#0>zY2>7UNDcU$z}8 zPZV4t>n8+_O#@f8GoVS@!c`j3FhFvFL)LZ+N!+f<1UaF5f&eN^DTmQ5e)g(6zl)tm znG9Svty#0iUW(bmNdZxi{c=<;gS+ilWf#0N{8wTIImAFcpvk~X%U%tP z8w$g4lo)=fXgHs>*P?@R3I*?;4TqwGF4x|@5yeT&ksOW>$5cTG_6?TpO^vnHmAOZv zXsyRV&)M5C5&|TH>e2Wjz7;$6FI6ZgB{(a`eqKtv6FEo>#RDvhWhTLTCqzxbO%mq8X=M9S!_GX`X%{pbokRWI z_EmKOu%02XXbdpSQ@>W50i-0CqF#>OTe5Eg70giE?Q(ozGTv?93f=xLyArdpnw+1# zJbd2;>_*;u!mnM`+&?m0!3(qvs~$vG-wUzNaH_Iv0W}3y|3L*Xr*MSvyS%kvKMJcs z9K6|EH`F9a6zwNLL}qHRVU`Lve6J9lmTXV2R0@h8+h za*@D&IGPmQ$McS-)2rLZ0w?U|2Xq&wY_U6^);sCsp})!)5C10rCoWMb<8&#^{edNBHVETzu(729sS~e*2lN}#6va){^u&JHIC5wo6l7QWq$OJ} z$gOT^Q?iFNvgyOUw&LkTjC= zkS+x6wb3YL#3@boc$D@JyOG7V095a;l{@0Ym&_&t>D65OJ5@kLK;FGw+RB5Yw)^RH zIi~osPVP#*w5K{tI7uQEc*gd|kk@Ro$><#SBlyRc;%gmhT3*TMo#SxKUKZ#KJGxXD zbzdUC4fYEX(PZtFK(B{|J|H;o32svRYQWj(-UA2YBVG5eRWZY}$uC|z6s2$qWqU&q zF_iDK!}{HbwS&DGmD=IC>_~igC^7ts8sMui|$j zlXOv4GIbm5YyEkpYZWgA*wygc-S&-2*thr!s&`L2oVRbQgM5tzcc+|p?1}&qP~E8TJw7nutKpR+SPbv$%9Jp4i*188oDqNvUY7hbm8RZzf39K*Ps@gSlH z!6Kfh7Iuf~@*$Kbt|3G7ibdR+a50`d?U z`D%9Rp&sn+=k(tlowH2tT;aOxMLR|O;g#<3@KR{M`vSI;=~f$~c2fcve^L-F4wy}e zK3x)hKibkzhpPMj>dc;idioC>)s_mGVP2mHZcYmEuJk+8gE#Gi>W@Rk^|L%WM`4@ZwmYh#s=?1|PgHH? zHsoF3yHf(0RuaK`zNkqd%oBLRUQ$^SI(cB0eNopVe7FPS%R17e?c5(T(1Rzjy&@3p zjhou-fC>;%K!&{<^V?25iG#63Fg&HPtthy?HaRpVv6bplF9jT zO+MFfiap4Tz8_nFoZFaxxF}^R;5B$uk`Dr1cW=YlAc`t+bu znYfpnQAts;*`J@sE{xdH=*|U(XO5ZvB4)abUEcX9;HfOwm(kfq&mVX}AKcjFv;0a3 z*R?j-#hBJY5rZn8zOMysBLUd#QUE|N`HFoLNTB%vO|f&}m1YSg}wk+fY2gqQ^T zPOC(?HWI|6Wl(Rc;MY94y?q}u?P0kHtfq#ZbH{#&zDMw*w(F{f`JJQqqrU1nMx3e} z0ixY)OYJ9pxdRku!=6b`A6_~YyXBempcOC{pY@D5HFRSoyEQ&G5iQDQ$4A!RvfI?U zjhn#c=y&JGJ9+T*yF7pbbM7;GKU0)Uo;($K@Ee==OnQR)(fqPq?}&x z=Y{XF2jWZF$Ce2KqF{Sa{bHhm*k^gn1*|}ciF&A7M!H{Hp{Zc`{H1pFd;K$sCTUw| z4MKx(zC3rromgQ`ykoqezDCF+zc!FSOU&id9P_irL+-j~&3e_$@MyQFXQTlT*{v}f zWV=kZi|wknnmB|TV*H!dY_LsHZH>F;(eYy%3=T$Idni+5f}04dZjT2hgTwvo)XlNV zqE_68txIt4`90atv(ZNyezoQct!7z>92Er=_~Ppc90Cqs)TaDUJV?R^cL+B;?9_m zKk;t7CR{vlI~xri|h@-t55pL!)lUg<1u|x1$Df*WVO@>R~7Wa+p0Cjv9_ba zATw05zX;|i&syME&xy2mqQ*be+Ho}`?0(7KuPz_AWS<)UV;?-DH^kYGn>IQp zs;IR!+lMh#52H9^Fel@ad&XV*J__`Dp+`d9s!Gy_=C%2 zk{7@yu`Vm2hz9Je5Nl~_u}`C5Rb6viqs;_HMIPSIHTIhAb}q&aKR9e({5;5HZ+t!g z;PJ*3d>MG>&=9a*7ou`wxb1x~)y zRZAi-hV0ww#7clCTl_0Ae78r0>d4!7nzKsZz<6W~wyQCyEd}=R_wj+kpVWe}?W)nN z;PRrFM;R_J=Ey@_{}^k5gT!0TYjWUk#HcXH_sr)%1uDtml4L$+yY$xe^z_&*yYwox zJDG}6_pbD0R7Hn&tEy%m!~A8oev?hK({9@p@1h4)n4?A+h1Nv88I9HwLt*YNaaY5o zE{@slY6g|TyFG80o}+rV$@J;JBibQaviZ?2<*cwfqq|0u!P3K%^Z+GeEqwJeZ zG)C`X3u91t{dtbD8WlmbcH7-iH1kJS_SvHNBxZ-J0`H5OC2ZVkcS#FaV~f>Gz8hC! zA%X;ot#yBldo5(4GY3lpg#!2a0|B_M!dUToj3R86>_Ne3gFQyg7b7EgIJ{g8x)aS{ z-d60=DRUk(EFbHm0-_H3YuKd|oq~R+98J7|tAwmOnl`CZWWqUH8Acxx%EI4l-(m~s#V)>{eY3q?<0 z*|94`1UU>A;8_r7#xR%-YncnmRh#1z@5D>)m&DE=*05Y6NojqZF(*c%Mu~S zjA0#FL|={HWeB^2szxv?tfyi0Vgb_y<8l*^qH`$fZ$S3@MocJ%PbB>$ zFhi8JqegyHBOrT_Ys!wq*m=A;U~g+LH*u>URg<+0&Hkd_kVRM{$G(LEbm@Wzp?OEx zH5IlxGit{Jq^y_U*U+)gCzkqD4AT~q_2`gRbii2!F%A+ znLtZ6CZEUL^&%oU7bqqdKOUwp)Kh16#LlZxl12ZceHpU^<(2LU?5Zxr*pb}Aa@tp_ z@8VZv*BABWMm(@yU#rVuPaSlry4Fd|j;P%Ohkc_7@bK};=E5!uv815EYtFtEVrwun zxe_=xqhWDtvhSiRmY+-aYRpOVFtH-PuYR~V^;&$a^fbCZ#3kafy|@&RcU@PpYo3sL z3Z|yTZrt1=$Pzz*l6}E`Qtcj$op7LdR@Z1GFw;i7aj6^tP^cTG#zgei1;Egw<2{P1)RM z^{f_MEPuN`6gP8T;Da?aC*Y2#hb9$)Xuv^-RT`6;>&|D>3y&lN<)2VnvwFSV6*Y(PqeHs)t#(p^y znm7s}d(Z%?kbA-P981||L$j)+(wtGKi4v`1s@1N8-4MY2@s59FOhS4IS!wTFa023Y;JLccL`?Y8>(?UwQlF=FUimeN*5Kg4(-KU@sirEo*G zR=qrm>bf}!AaR(b_0^d{m65j=0WW097u_0zYQcCTg(ztcjMgEf+VHHNzLaPrU3Z(J zb4=l24`V9bco7dY>rf#vBv{4Aqf3s>0I?)v{41NSD>~>$#E{j{U?&pvo@g`uI$~1G z#kf;|&64%U=gc8DR zNTA|x>ZpDvMeumQ9w81%jIoQ5*#6e-5Q;c6`+GAXoILEhtJ!uwn;yrThNd2aZ3u zF`*i_H)8BE+&moE`nDF+G$4u9QLy?Qh9qjz`6Q$$c}0@JWKdrV&|2~X92^8 z_juGqPX?RtPIbB8;70nujz{5Iq8{|v`_VTcneY=3iui$-?Sm+Qd>XV9)$OaK7p&KZ z(Pt8+^8DJ#=mmdDGR@gX@mVZ)dm_D48rA=L$WBK?t{hA}U}w)%A-xP?<1fZ1fgsjz zm};I4K%7~Ru=J_W!Zwv%GNU<|ez(g$*Ux)@^(#9U6WvHKyf3QT){R_JxK2AS@Cwgo zU+RlH3O`1>5KXz}?JIRWGW|Zn2h3hHg1Vx8tx8aAyouVU%)(UCS2mS^fF0(j{ZO4x z=*WT_vtExLoFzSBKdNfq00P(>Dj|HWhsOUYI=bgqX1mjyk=Y2rcFXScRAH31DoHSA z?M^or7D6fc2nOG`?$(Pa!Zk_7X75f<)o&l^i0+`TEL0G@{_TQ}|{)@i5H0Ef7#LKf7s( z8&uu3JH2z$e|A<4(dh2rTk6q6KdH4 z=f5`4Y~g8MEbQ;Q?><`+&{Ar6j6&7EdyB^}wWVr5> zs7s_%SiTMM1p~svw<+ey-2wpgSak6)^k9czwCC_$u*YL+Fca%^74>F8p^e{+UD3ZU zH=#hvmKB@y8NZ9<#oZOS4ElAwJ0`NXd#eW&+!Hf`Kb}XCJZ5`UxeLGAKARL2f-n{A zrKnlwFP&ei`vMXqtTS^j$0wWul3d##3qfj?0!ef1l|X{Cj`TDNF?UL-Szg_JIt4Ux z2`p}I&RoH^9}I{vD95-1U(+nE@npZm%v7PIP3t_!A{#dO<``ryuIFW*pmLN%1_r`SV&!8-A2 z&!x8_MYytC^pPP76v^)O(bwy`+*_a1O^AJIioI>KpF8c=qi6nf>KVIDzloHA_r!Bz zGIL{Dw~kRW;zx6z`I(>lTzaH_x593ZzSg;U{JenB27PkJb7CpF$%r7W`+UJ9AF(q^ zck1g==vw6I3!Y2Qy66D7ctltNPh_rB-ERMsi`TW!n-XAxEdVdLT`U)|m1X~CDoC$=K4{J3AB z>j*9$vpVi}zz;;NbSL6A5uaRi9?vVE7W14xgDuB8S|aA=2P}kjEq>5h2r#EwlBWh3$Q#~P%mpp|0N_v*ecl z1$#v}vAlL5a7CcB)n1J*+P37NJMExwX@hxG^D?@Q>fzYF zenr0-^^gN5nW}o-(%)xqtHfPf{h~S=5YTV&A_@Yk58}ZKgQhk1PE>IzjU5k25YX>e z|33Xz(WdlwvR~T=g5ti6c0zT=`>=(u4^=RI0@H$%)eFH5-aRDjM=D&~xU$_&X%BNz zkm|(Q7fg3gs{@x1o>A37_bUEG)6?8J>@U?>m6Y9D~s9|)GpHkdHY42?jdsb5o(va7@W00k zy;Y^#7uMSBJu#&>b9*Lt?%VdHM>P5W%u(&knr8cjequ&$b5$erXj_}zzDG~c`V_0% z<_ViV`AsoBckD?IXIl~<6Q`_X^P>g1YKewncM5I=S=NF*>6uQXKbWShuQZ7@mlPAHwp!4%-l2BC=*9Cqd#S)#R4tUU*C$ zxw%M3^x4#Ck4G!+Vj}gVGqhQ)GOa!fU9kZOh2Z%(vOOL2}B~e zOs4e*kd2*~SLcP~uNK)*?8Y^1k61CWrcgkqwXSi;a%>!t2X>>Sf`Hr6XB|qC@U%XC+@z z5z?P7J0P^`8XD}?Jz^G$!=52VT*|>JZk}`e8D!6|1t=~Nw?aD<1MVDiMZ_C{xT`hL zZP?+c+(mRDM4oTPe2h%LMRv>>$4wdu_A!8Eab+Z=gMeX4?)Ub8+F!KF8{6$z)Jq0J z_D(>>TyTpW4}5R3pzlYi`_$W%2JYdG?1L&0ND35>f%#TcD84U0jAn@=JgGS%udjWi zpU$(MiYAH^gvv3RcosH06VOSx%Q^lec9(nk_wa5IMk4UUZYP%nd7lb35;-GXjfFtU zMvQT&5T^F?0Qhjq9dj2HMa<4^vo!!GN0rp@2Xm=@t?sea#cFFw2v=y#73b<3hfg&fpnd$5XO%J){br#^9oEmeb$1zR5!zhn9%`;N78joerzYBCOgQng@ixTLd8jw18a7qu*(dWcePkM3 zVA2sA4(G*u5GIdnym>S)n+0|H^*m1Y))fOF9xA3)K+0-#nKJHgJ@LDpumrj29?GE) zUd#HU;YPAX44`$lyecH43=FeuNR6L>;IU#1Y!%E5yS5Z?xizdJbQGiCMsA|+;oj4W zKB~%ncz4=(%t#i61gb}sIE89E)Vm8LnA-PjBD$#S!pShs!pc>)OMgZzjcip&(GNzy zL>$r-ZtZsO#l6pqV!au%MGw|wjAO&}TP|*YDXHuQAVlPK+w~K&!+y8u` z$=3bWUWtN2cEELm2Vzuhecd^HFrtw7o$kVMFa}cQ{dr9l!!hJgG()&E-f3^d$hr_X z9=^G#DV|`~-qh?AhE}3C=ENR}3M48cxdt+_x1*QVPGaT;0`=%4VbA`eB3{Y=vSa#v z@4eV8zoXwzlG<}?dej*Nq3z3i^5NLwK z_o1MDbV&c%Nny(cnG8`e&k?}sBgr&Jc#Qi!e>%p+*>XnnzwM0Xys^LjPFfxzm#5Fu7&kA11%nE~eZ zf`*gvfL~?q#IIDg%9Ej8445FH82d@c}_1yS#pjk*f%Qk$Q@hka&(SaL!~r~ zi0#`dpnp!X@hke|vFq~`8sBLUPgLyc^NCC1{8WWo-)ljJ3LpRJ)N^)C4OhxF+$+NJ+MFoI`EbyFRBKP!%CK+5P9tDK zBndC{l_BRQGVNQVDyj$%pEP@~-qoH0m+x)*D2iDCHqCRK|YIduFc&W z94883?JchL_w4q)r~Ta}MIG5P_UhfmcVwuHtEJr$AmFClh-kigq8R?;&cMMFDA@va zJKnv;?g}7HtSka>qQ&i%_!c+0t%aOF(F-*;-;kaFJUYSP!mU;Bj`k}#mBhd!;(wGw zZF^ke`=a0SR*b`H^-=E;PHl0tBo&e?Ov|<;<}jPMr~BipGW|B^eQC9<AYDAC0R? zPq9jPPoLo3x271;%F^(-trbL!D4i^B4u}g^*!n;OZ2|YQwFp{GVU$DocTO?+i1K29ozbk+>xzj{e1;a?u ztw+E6D$|E8r{6?t9I*bFN|g-av5`7(BB(3lI?hU|+N47bSn*dSQNd4$V~ zeHnF|k}Pq^F2uLQ-n@wVhJQ#@pIr<9q-~O*=xd?cQ^1hSE@`f|4B0pO^{X1Yy!Z56 zF7|ZQyFJi%kPZO)xD?xUuVf?{)#*Y%+K~e z-wP>Pc+eDDT#NbZf}yn^0%i0it`nQE>(N^a?xH{HH@g}BQLVh=PWx%^%}Bdp$K-s$ zX9vt)5HsfrP#(({PW!V(!Up^~UP!MY90e%c`hwmtU$SIrGh6N4=@rHOZ7=A}ZI|AD zP5^C5o-)7CmuE5HP|p?E%m#d0?DqIToRhVA(TaPReQ`&CWR!##via3!eoNf>LV5-| z2hkP8Rka`xMES_ZVISS4IwS~9`OVzjF%ux60NqS4L+DYHF6;rhq%Ei6 z;m*KC-md|5Y`46S2%|5YJ_5(&F!P=jV-`{4IhRT}n^^5Klt(4%)(wq$@S>3zkBiSe zhs$W}OtCQV-vok-)~$8xY@4uk?V5%+s$k(>3Cszd#{NGbSh!9mJHS_Ci0inf4#sbn3cnTuOhHBLkl?w=0pV;^4FiP% zJ6vr}=>9hYt&}3W6IJPvC~U&P?QKEWl3vfF)p2(Y;piXBlA;$A-}8k;!T8hVQ&|`+ zP5-OCBS-}4{i7X^#%0&lz8{TABTyn*$yAiJLABTiQ3m}!1gmvIpY?DNE};8R{bUfR z9!uxLLs*@RCT(cXnb(gr35bYYR&z?g5uQ8Bln3=9l+Mw&Q@m! zO8!$-+1l4{GlAu5SGIhvlMQtT=OWvk*gR+j33-bKQ(d{4IA`g6l=4txlrv;s#?3$= z0S>B%54@mW`#gT>R~ir!S+*2v%jzCa(dHQM)@9dhU15bGd z`$PP_U0DQ7>$nyu$7a#z@-Zb}x=6=+aUw@ju-qfB(S zS^7zeJG*sKPiHSD0WSnHYN!l`!*0{h%)`SrXEHsved)#j#iZU-q8dA|H+S-MlH%62 z5ejuuG;pK8#7?m$;(gwvo^g-chBxG7dd=Vj-CXCm%~yLoK8Ea0^@Dhf+hxI|_$lJu z6ZM|DAJ|<1iR61ofir?&;PZK*!oodwgL7;0z`ZCN0M z0U#Jx?7g--&+dA8*HCU`yIM+H;<<; z;fJY5M`0Rp)r2IXYHJN9^Nv4x9lvwjlD*1F=a)2~`K zes*_RkI=~6bI-3W7l?DEF^v96v3oA$NVDZcOg{R>dD;y>hoWDMi#UnJXy1fcIf7V0 zW--aLr{5TojN3>oZ0|6ER7ax|*XVhe7(I=GlS~H)h7%app)cJL-IMpkO%Nucd*UE6 z?426bFz?;2*aAFwok=UA8wc=X}BcOa`{Yy~FlWEIhm& zreqk~*}j;(<^RK8Rz*^$kVhrZ972)s%48z-p8Ta9h`|nbai(8YFGQQ-1+s(sf83An6bnXfI@lBzFa?XxKHM9vudpq!XgcETz3ZOB& z?Jp{5_ZXYUqSAUEly_o_VFQs?;dnGcjrokduh~P|jCF=T6;m;tiPCo8)DHqwGw!zBb`IGpVl`vuTG<7?Tp5ZHa&$Y`Xo># zJ3_L&JgYgtt;jx&)!EizGci$Xp@2Z@^O({$B2MM)T%fy!b@?JHmb*vYIPko%$8UnY z@TFQg1&@sSLbOV;ZuFbGfXD!`jjR6qxa@V>*MZ6~c`Kstb@;|1ZB+-OgL&w{%Q08@ zj}-0OnANN&Wp(-FmFRGsB-!5uka`3VV|F##Ax|X2Y2d=1uxo(}m%EE6@uY*7`XR=I z{Wa#S0ca2gaL9g)(XVD87`NXL);J{DPb%STx+>S~MLkJ)8rv=UjinCOtS_dQ3%zT) zYj1rq?hQ|Xh`tL;y(ydhqL{5!eM7EcvfExvj~6%IGgx@e5wvlCPJZ#C-lt~5b4>r+ z^n1MLbJalxmNAb8;`SFMW*$P7C8}ngpk>@ZcdOkIUmOFN&-pJ(!dvHFZFj08V#Oe7 zeK9@VnN^S2U8>VhWcMM<2vxT;0==EKFaQjD-s&2`c2`hFaks!NUWm$KQH+ph5#Q(` zyHC@QMXcb{k~F?n8~qOvENF3zip&ST<(CB3MDRkcf4`7(^NXd?e=9)}?SU6XS-8hD zQCtRWnMyIEMlJTBKFTSb;PPmPjEqn?(OIs$%j5O0h_SC(Y4t)WQ;#UEA!>PSd`{~s z6-~d6XrW2J$$wh0wE-D{3XuV#J<%6!eXL!2GH{M!;ajv=8JIVhw{qHAqd^9lOOWwkmI z%E6%Z#3tSN*Y?Y~So!dkv3|``7Nf4bIzo$CvY}W50#`km=v)h6P1Ti>YdjNgw! zdGGpp+QWy~cE{|sqg1dx0=)$Tn!Q2=8R;-j@nr1T@KLar0&n-Cg{#@8%ISrA&t8t% z@$gCSU_-iJU643~xN@(&c-q}I{KEb#3C)4PBIQhXczn6M0i*AO8t|dOH%+(ly*;Z>xi0>`#xX38y5vll&!6 z@z4vNg7#Py&s~h`o$8KF*?ygkIWC}BRuTa`sgM^1?Lrmye-Uu_ICSAxst#Y{I*N-L;%c9PuVVE>R*zXXQEzKPB}Kt8S>v2<=w>(LaxjqemVkz;h_#YB3os<-c6l$y@rI#ZNr z{7JhS%bVOHnT##=y{4&CS)yX0+{Tve!hiw#L*SG$d6Bcwu2+YML8_Z}+K(DkX6^L5 zc0*ICoY-Ub)Bn%b`+#^|rhESv5fRaeh<*_f@m(K}s~*>lhzK87a%LR=G>-F|nau6D zuJhL!XPlWc|8%B5NT>Db>3Gb{H8V3aGwav(^SG|Z<8eLVm_$feMD&Y@kcfzgh=_=Y z-|M>H@2~Ci@JP`2GxvR8_jUihuKW6{ZCF`S7qLz6M*>fdC1}iCw-uVoJF7n~-zG}6 zIK51+*e2@K+8bAH(`y*YQk03AuUM>T20GPG#w%<9$dogM6vC5E?zXV)^X z^+FC*Gzg)kD;Gth|BeV7&7>MMENWd8&8kE8nP}Bi6o23&u~NGO*=}K2iVAC$YNUfJ zDLC)*zzFh_J}J0K3hsSalg1k3-`j*=14Oinf2RB7<6= zk4zMDI2b)bv$ASwtXD6@MI;fl!EPwA8xXB(m>DL3#2OxRW&n=iZDRiv=Ax{_sLI<) z5VUbYh5I6is(x1*xfdpVv~qu82+cd^0in`@iNZsIkW3O`1KetCU}HR7U2_6zz%QSO zM_gC~XS2n>R}%W~QG-Ve#EU$Af* z2Mo0Em|WEdHM`FKF*?B;`j9ZVe{CH0J%cXelK$is!ERXExF}u~^2GkYMK1^anmcaC z=Jkj-<#B#wJ;5(k&wPiSuXxi(MgXI;J@y(7}4Gfumg z8n(*oo|JT_8Z0ifXFQ|j9!poVv)<4=eA55voLVE?c_Q9)T~N#f$%G9ME<~2%Vv6(n z%6Ajy!f(6VI&2UwxM9m+_b76B0;jqmH!|J{i zGx(8_T}Ruo_V`$z=#5i69iIpeKE5a|E(#9Jcu#!lX6<7oB+Y03%=|&n_Rm#wIjn7b z;h7|5%Se1#-NY<#zA^|U?!`#&v;O#64KaCci*F3`?7#L;Jx{JE6>+2V#&?FQ-^CZ9xwO*IJT_%^@}-i`QxHt8PNr{;a}nk z6)k}Rx(Ley{ITJJ)(nG96Dyp*X9^#lk$)Gn4S;(KUk)KxNxfJv^3=J4QmG&8 zhm*S1U-)@a_cnL7aWZaKGmZdx@d(#)hv1hO4tMG|beY(4?kgFsn6HA~4cO%^Ft!=( z_CMk-gG=G$e{s5N)YE$2!*zmy#q!*_QSVXVJYJX~ z2ojqLgWj%aRIe?In#n01O|D-Oy)ezq!mlf{8?+j63_f`^`eOj{9;;NVg4a^qr)`t( zt`g!PR=Y66EvzwiTxV%`v(}?W1Q6|>$us{Io%%CD7_iOpba!Jb-4i`3L3cbF>y0Xs z4MJIOi2lb##JHq#sCg&{+?b~p-29MHUfhc!de}vX--$y%D!6zT6He9DmZuDLlL{&g zW1C{q$RlYOCh6=0-j<7z&|{;|fRW=rWTcj0EtY}BHVYUAGA#DP?i2L@``{5BlSTg& zTdHUR_fPStL0~~lM+5Pg@wsPF{leHPe2|!7k7InWJQydy$oA?{uD+yq822TR)tv&u z%bEAuE;US3*kl@ayWxFY$;b-^Vvm-hg@B`b1tV>dVxQ3@DHdP*RS748IABCB>?!2@ zpyBmR9iN;!j_Bf$M}VEnNE~))UP--Ee;$Zev~GW!x9L^E#=0b4bK8-wseg+jo*KLy zN?xs_syD&*IVMEYwI+@m(ebW~mwCd|a4#c2X=pe#t&dYi8xw%Eq(AMl!sWa!$mI`C4JM7m`l<``6HDGao(`nGJ{vVtxoEf zE{+TOSsCe$cdOGpNX*)J@9}doH^`(QNnJ}6gTipWe`&)bAtsV6$MpkG(Bjl*L0mo* zSR!Y}NA3an_{X{gNau==eg}&XpFCb(JBZ$P6>UgfI2Wt?5S?@Cus-!biW9|WhBiJH zpKE@Xr~2ZHOAAErTrOXF?iaF>UwK?SSGnxI_JnbG(H*+lVo?6J`WtP=cWT15g4jL2 zR|(;qaCOu!g5E$3QVag0w-tm+_Om~Eb&2HK2WJ=C3n@w$Y#xBy3+3ugE!XAS^*rQ3 zpI%H8cCJa}*UD+1hy|lYVT~yNaH?Gx34HMiK*ST#f zRy(*=uHP=Io6i>#DRjmS>W9UM8@Cryh*B;63U6}FJgfJ@uf)wl9c{{=haE3oRz6+GvXD z=U$pEu8w}RJaOxGza4l9as2VOh}*UoM&pa_fX^r55r)u!ee%v?;}>OI(K3 zGF_dQxs0rzcy$=BOl8G(f+lLc7*4Syh$b~o20iuh%|fzLho-SrfJO)N;#sM7u({|h z2;jI%g|#d0X>5)AJe(RNg(~lM(vA7T?(+)df(uU3dum5Xb`!!qtWTF z%qC1&0DaYTbthlaQyqJ5zxBeGa8eV|uY#%LsYDp~o7Eh~U;(MB|3aL|t^vXU_rZcg^g>Dj-@B+R@Efd0Xt-ymm>Mvlq+m^%XEu@ z3hnUBwvDYT`jed=H&ncswLv6>9ulw7*zMBJuCYzMQ-`@h_Ncw?8m^zc-a;FQYXCPg z9Q%Zt#jg8>TFVf5wc>!s1$H zz#6Z)xB$iIju9VmabsI!9Ieh9RDtmv^K^G@A|1eSb$Cx>U7S#pp*GxjPI@*==Jr!= zup#|oPaE)m`^B&0br%p0aU{;DV3_oSan^(UACM{>LE@bE`=Wu2twq2u;_b(qY5*~Z zP7006FQ>(M{dqF^zqhM*#uzVTgoAOxJ-+j=7&pADg<%?X`y7e))V8~e?eV@qHZ1&Y ze4s0-p68${jAdS?k2Ed3^zwYg$5j;WicdUY3pN!Gmm` zj?eYG?BXv3tev5Ksn(>Jc$s)GUm0|Uet;wY+7pAuF6{^jIlydvqsC*zJNs6Rr=G_I zp)I~s50zFZ<@c(D)GK~4=8GodNAK12MCJ*ARP!fK;b?{dNnuPpQ3%{1*TH2^6lw#b znOeBZpC}3i;{W6oEze*KYgD}+-B`=}sWE;ek5;|*>|r~+O*H$HK$80IX& z5A-HMKXY_W-28+difNOKBit>{jPWcZ4T%MqHT;lwn-gbwC~Io&W3BE}4r3~>E$ zcg$1|vu52NvxO(YAo=?ogKHxiFh_i@%Pu^FjP6#odE@Ecxa|p1#EcD41pgy(yGH}j zB+}U(?s*w|^v);5R&b+^=YwOO%8&-&@v+cM|TNeJnzfbKd%?o3-S1U6-A>k!5j+?diAk|o_d08am zbOMa4VMlLobh=)KU5jqdpAv>c_&oV=schmRDO&IP18FDJ?=@VAq!kJ~VC)FY#EBYm z&7L&b7*CYZGVxC?PXkPn9;n*JX@JIV% z^AmbVRQ{KLGy4YKz9OyG$smD{>k*@e-yU!7ErK-X;^wF0Q7?*k<3XYk565GIzMkal zTa8~icH8{1a2ZCO+l?Kl_I7p04)>6r=eTU>Yeebac6mpvL>If;%@mCz_V|;GCHctN z>rbN1<yD`;^*Iloll=Nr(dkJRaXT2@CNy&GGb*UPqtz%ET>0Qtb z)e~$46h-oX-e&?LkRzCx`Nt`o>?CL{dJqHQ1{0@JUJkHy#Ya-}u&h9U={- z#={V{@H>rS8j)(h7v#d|rP<97!YR%7Q%>}w(HP1MH}R7ZPFGwueC!YfK>?mmj>~ow zIy3ch#*1@Nd3FDLT%n>$AKdkoI||jTjF(h(SE(Yt1LXmB^^QW?aQTE|zeXPwgb)33 ztv|4jpNQ-H(a?p~`FepECCyNBz&GqD1bXH_{xWVf7DP?Xcyl+Y6{axE(ak#wHCFh( zeCgdH91(-|M@2Q$4-c8B0RCp$^q8(X{e!UGGc=A==#3x{)fY!CyGss@d*Y8Rlyz>H2M_T{kGjf-@#po2`l8aqqJeu(VPrt>e^HOCj2sDG9bgmiV zr)ms!7)F$ZTP7^ZHG`l_vsD4^yZ6T-ZV^LN4*u{)Cu}Lp|2)5Sd7sb%tT)9_0|kS`7s1!}PW5 zV?86rvC-{If|9Q2HX6NXfqFDx=@q#t*Lz6mOf)zFr`W$k0)-ZyL1QDB;1|mqL10rn z7DFC$aYst~p|%({5EhSj7Web0=NNxE5)5nORTkY7n*^t~g9r?hUX|vaw6A)=C}Sy% zm1aETv5n$65}UoznB`&*_pry)iY3S+?x_(5DYm!(oBDWE1u)D!X&*E6#;%Q-nRBb_ zjkfp4HUntMtgGAo0ap%LRHJ;pC8xF1F#7S0=eX|jG+cPTTsoPYWw%De{gk#ydyMsp zenN8cXxZxqcs!31S3Re7pFuVgcDuhj_IpOS<8TZ-AS5Rz(j!CLvVvPe`isJR9nz$p z`OjH#SQU#M<14CIVkM@5&_ulIAx*;g$7|Iw3=$)SWWy@!DoVwp#_wLlJeFi16GHiT zwbi&Y4Vvoj2ePAqsmE9?Kl~%3X)u)ATZSA6X-NQ_A{zRPdTHxvt zXLpph0rvmrR3}B|oai^z_`&;U##?U837mQ<&iiXss&3y z%HTfns%He_REA&l@KPS}X?4r|8F|mY6^Fvl+zc|MolLCZzQt!ZSc#RSb`LZVqMQltpHAXf1s$i&uT;_@=EjjgL=*lOx z(zr7|Sx6;55O``pVT##MnE14kpR>uoKq^2uYXc> z9ld7yZr)&gN74*A2XLdIHf8WC3{yD}H$5q_Zz~!{W89B;DCJDIs3-?hd9rW_m4$=@ zndaY1mlAwczw1{tFOM0jPAra?X{<66!(U^zu_~bj+G5U=deJFZ_9Pw6)r6e=*Yq5} z)d<0T-6k+e%-Kt~dwToFGEep$YC|B~sXxb^`oJ3#f;-RMjS@SOX7g2s(1DKyF2evN z8K1lSN#?iQxi#+LZq5U_i9LH5o270I8&yF9r?jb5AmI1;T!ieTq$m=><|XA11tWUHz}O)1QM{uoJ( z(rSOS@GOcon#EZ}EnKVlu0#2WEjc%2vd{C+ar)^J85m272i$~Qj)iV|PeX}QNpuV*ZnH3%I*3Km!}?vpvN0YJzIg{= zuCYaZtz6h}&!Tu#)pDoBW2(j~<9iuf^;M#1Az)Rbspla}vE5xGVje{2mXFaLfBmxmGqhs;5N^T{)qR6 zQek$(`5YDKRs4%%MxhZQ`EiYd2VF+6J)tGuG(wVMRa%ZoSez0#l+_76@1N7vQwHJV zNcP6-La4&@GtSXCWAxi#gyO6xi~YEtHV1n z>1pE`UkEg2g-7B`*Dm}jkpldQJ#|`5eO>J<3dr)J(T~_!Xk|)L*r1o~6hRr`J$CudLbbT# zDeH(Ub{4`}0+Qw_yHX7aKAnoUt9FV8mxKqY)3|zPAt&WhItAfA5!VPF^t`O|wL1%i zTQp2;IsKeE-V@gwYJZ6WeS@L4u;bX3h+cOVYW<7f#7(M*HE9yzn}s*l_Xu7Mf*eVd zRCb;#*#z>r&Pns@YTC|1>xl@2GJ3jz{_5{zhLEllulQnSG}9G_y6`%ktr1i*3?dtE zjyfeb7PM0JkjByHt(vL|xRuO0x49j`5b|u@uHnvla8BG|T)UACqJeZfUOEUOaAQ;l zAH4McJreVGiu3q6cVjGY;}$4VPuyipS7ywS8h?x?O|H|&@K~~o^;O)`%M!$wxF%Ha zR4fz3qWL1#Y&O&egemzVY&Fz|MfI(*Qsb>`9Ew$% z>R;a<_jyh*cSuj-)hbvpBi49SP@XrQh_wQYmak-WMT<{0^DrzQv)O5W6vFK*Vv(I0*Edj59#L^Ik`5gdgc<0hYgrKp7__~GvyKQTQ0HG z5h998MtF}ro!41IA;yeb^}Fu&v`c&)mBif>gMQN4SY*2Phm zq~4L2*D-+}LdP~5$E%&DQ_KnNxt7K_sn!dZH^nK}AO$?iMT1(5MhV1T*Sw^8Vp`iE zlR4w*&D(r1&I$}Z5phmq$X8pusY)22rP0D$+F~Sd7{bG5AkKTs4HC3IolD>Lh~IcR zF8KPBGZu9AT_ZP=a)tMd90HRbrtp1V5XE@1JgMRXPvj7y#t(PunbJFemLM<9j6m^` zJGhreKR3?0_}Iw65OP^ye<}#6HjRt!eAyr(w@=+Uug8q4@>z9WGrr5`{cxC@khx{*=l@U<}aQWEX3u1ECjC> zhlWH|(A9`W~O6o8};D~YDA2T(uc0x+bc9T)wQZdI(icQbD znEOY)8~IGvK>EGae=$4BUv5)-veGcGw+l@ySh-e-O>>85W~A$%;!dpq_7q(RNawkv zo++O%Yh%=d?$oy3CL$(PXfrWi-tO3mcqahg*$uYi(1k6v`FEeyiMOfEU@O zxK9OX3$fbu(O1SAqeK`wg7HE`)~XQ5UD2-JQ-9*|)M-@lW#duk=_H3xkq-9w8&8L+ zm$4>Y?`7=jh<^QSUQ`zY?)Pr^$RT%=p>>8mpWXe~n2vfOQ`ZXj1XUV0ifpHUe&>He zHa2;TYkwb;`Z?Hv`=Y0h7in(1c*xD#Iv~c7f!OSBNFIYFsxcnI;{?m4c*IkRbxCqT zTikd953GSv-l>mz_?U;HX7tR~4)Xqwt!i3=?yTsJZB@#IH)EN z-2aS2YSNC$>R~THDg|C~{WRl^R}G73Iw<+$H6vW?-`J%`H1^tL!TRH<(H~hK#{@)z zyyJf?JoDW7aYFTxRAq3jld9i4_5Z{v{cP*&iPLJD84gC`bs+{&%t%RaODncv*JsmmQg^k4nWJO zm6-N-@wG?X-Nh!xhOv=O`d*0^j}p=(4elb+>ree>wa^M!?tzo$+XZCPBoOH_@3 ztZAlq)viL-($23huHIEBS#5g17OfNB`|7NOV#aY_U+H8BBYd4fw{?@#D5>7ALQ>~W zpB^{3B1SlnvK!s!vN71!&bVopE^FqbZR+2+rf%MKu9)DgxTiI4*(J^frlv>FLAb1n z!C@`I3uu~fu3R|C98T8y!$%;-e7>nmeMA|ATKBV+#(SUK(#40ML5;^$R+o_e`I=DQ*% zB5hF@xQs*>U2&I3mxfV&QRA*hxjXBsB^;iPs5cCZ1(LN~qQPViBxH`fe3z-^)Sr1l zG^pj&Ni^*J(WtK!H(}h_PQf_) zub{e1&9IB%F50bTJQR{w+~e^s+yGaY765xa7x=U8Gq(7_L&##kdw|JC^xP8%1SsSE za3l^IP-?DwQ)GU~)%$u$_=*Daun;0#8T8SRJJ3GD)xBD6n^B6Hsd!Cuiv?w>?D9s7 zBc2~p6vR=Fd?8ZIfjB0Ndy#4%uTEB)pPcYs5&HQwIH{V%t%_6bk`5HA;IwPQ?_#%@ zJ-+T9mtk6QMu0Of$Z(w9RcLWtL!5IJGE2vsLaIf+0&jVDH2rKwe_oZj-yoKP2j#>R z@q%UpEh(~25^WeL%uu}N-Zc05MC+O^Q*q5x)yH1yYs-eb_NhX>rk-c3UH6pqkqZ^PIIb5y2;g?X zqun4_6$GyRto@BfRz_<#2@HgrP&PtlE{dgF^ymN1zo@uf(ccil35aYT5+M1vP`5XVJiacN}K zD4gK^+M>y*w@l!VA=FVkH6{UT)%v2`O6~B8Sg9rGhHkI&(kCmMuB!JLqx9P@aW+>A zhZZ&_ZU8gGV4kZl=Ujj zFk)@dFK`*pCI$>7*P4*xLU~7Odc@&RK=3KSTmN)Ov?B-3#*O2Xa6u653BD%42B-LG?Q#HOnbJd&$Ca%ygcgd zJJ7We7LlIfF?EsRRKB(f%)*-5*k*K=LoRwa6qIh)?ET~Yu|s_?OyV9p)j{Q6F1=kU zsN9|8Yj<^F6e$&Gi$eto11$v_-XMXpTVAt(&p zu9#(nOu1?e6VR=K9Eq}tRJV2o6lP>QaP}bCBhE8dg#}PN4_JY)OjZYmf z{MYO55|Q#aBR%8E?eF3_DO6Ds=bd-XB_l`=-!zyszEQyXx3pGGydyPQ9DCu1-d5KY z@~B<#v0`nDZ``|vK|pz?EiHoIGb&Rr!v>GW`<_RR*@muOn2HaK0j7xY;Zu5D6o#`X z?YJC162k26Q~-YLX@Q=^Ck9Q}njGat7mXvJ{nSv~F%|sGzx!d|J{K@HM_QtOA?$g^ zDa81fUQ<-fSlSqR8Zf8ZqUv^5Z3e$_-ArQ}-@1z~5^H{^DRA)oyUTYMdWxlG8$05P-FiE= zWQeG?xN>(Pe58+rW~$;F51;)55~Hhki^3GM=JXf1MpdDFJR%K6UMX>%yX3iszy*(u z-6Gng?(B#g3<#SO92mK$Z&W{(VX}OW#!Uj%3}ML>J2$JB(G)D);$Bvyrx)psjR5-j zn5N%k+a~^-dt00jtQosS+>5T^--~b2%xb$-8163Da??c25f(%e?LjL(*IjY@VSpF6 zs@m-P<2E;*WU+5I;vP3v`}8(kL(HyP}do#sW{)#*Dqe9dVbx zSK_9dmt4(m?QaNFipkSk>qOKUh9Z$fz2Gk=`Bi@`ac3+zL2NwRokXSCkg+!!j5H5g zux!+DO2*G7cg!_kguBg#b2oD1Xx&|SsozYW87n=d>AIIx6Yat(SMAK4_4oN>JQ?2A zyLC%dLpi|+#~LHg4#*tVYpVmst`CclXctbrhXqho{zi}kz3 z`qdAk>gA<_--hbG@i14^fV$&d%UyuaqKZn$uVJAof;3v4Q|#lpG{!aJwX^2LCM|#G z`j|8hO&qKTRH5?sbK)U4#T1YH^Rd~Jt`LOq*$-FIMa4_9M%QMdv5$x?f{D7RCm!|O z!87#7WB%w!_3l=GWE`>BW(dnt**g;3g~`xRZ|pFF8;SO`Q@!#CPvfOsYQqA+_m0Hw z-PS$G6^@v++-GFga5qi7Cu6V2QP{>bb=aqlYF8|e{YC-R6*k6k!0m8>AB}@X0?lC@ z^4h>VWVB2+_+gbIPMC;Sgd(}k(?;S|1IYs(uX)Z=`!X0u3^FBwM=ycLk>9@?M#-(A zWg2XY6W*fm4$u#7w!t_l*u)v-te$d5y!ra#v<5|Jhj?Ak+D0Z3ry52oS$VAF)q2)P zG2Q%<-j+AiIZs51^MGvCXp` ztwgvJU%T`AjJQL0-|Rl8HwX4NjHMd2Aot_9-lq3p3z0!Ux_Ud)<>5O4AIN}b zaaTND$R-n=kAy3q7Bz(E%9j7Cr>l=h^e{@9aa zWZZaKWSG$>yL$c8I-G^4$?YLbZ+N;~!tmX3qkg7c$xTKX6BY6|s?%bqN_r}zX&VLX zQH>KIkws1uPL(vhffU0XO&3l@uRFtaHx|*-OqFDkpO`JMc;VV4s5j~zC>@||pSbu4{xSTM-QQvPoIX2G081VWpMae(pE>e9&_+2_3u0l#L zWn{4WgcO(69T(na{ zlV#)5rs2Jv76p%}Dkpg?ws=yp#6T_dsF$XvtG`%yJ*E+u-%d}jt(x|ZqIB3M6f$cV zJ36)-qxzB=C3g4|kG5Q}onBeAO#QLTW$A&3wt2Tk-oGyP7}?jbwpA`r7$A!4!9r8S)yv=n_tBJ!#V|g+NuXwd;u{qA@FJXH@&YPDQxeNcAzdQTym8bUIbFuC zRdFS4{I4_2BKEpoAW}eagML;P*T#+diFI<^BwUdrB}uwjZSNV#N9QfZl}mQΠ}z zQ?B>c)|lpk6i$HVbV2KxdOl`&2<6@LpX|Ds0+=sp^pI}1L~h)3g!EWH3qN;{iC51c z0t7NOs$Ft3ghKV`{_%Hn;`TkFj3fYO+wsVTL%zdsr}nbhcvfmil{A*d{#1pv+NUw! zYm+S3yJJ$6^y}@^215Om2D)<2;g-Be5DtCHE zG`J!WRvA&FAhd1lizW@W>be%q>Ugl^FmBaHZR>BF7R5@{ESGHVtX0)%f*Z-SvG?uK ztFACXEC)eRy#gp+lqhbEN3|U9{8($0kQG3%+x@k$IG}b`*A8b>w7w^z+nu3tDVKPU zrv!VKbm3WVz%9uJ_PcRLH_dnkF`$Vcj5f5;fV{C|*nsZFw~aF~DnJvc0eMpH(NiA$ zyVH!n$apsi*wg_4Pa5fNWKDzdfM%7H*71;0$l%Q*X?eBT2$EA5`y4mhNc3yOu-eNH z{W`XIKW#vif;&m!S043#+sOHNOzo0hLNm7d=@mP5yktlo*%f;)O+bc?*k_D- zCO8q)N4T)x6^iCD4tN#@rVb9rK_N@XeMCp-C^ZdDzm3Dy>SKOix$KsgbWr&93Uea1^S{W#u@UXuvqS(LosUrm9YCB;w0kg7Dz1 zS#kc-ToP|E-Zl({qmB#i7Vmwuk}gBZLGrhFPw-HXV6>{=u#TtW0}l(26s(;=*Va-kVN>YBd|IT$82|7 zi=7|O&^gZ(a=iNYG1vWZK#L@2B5oBRWD}W@{5AoyH2yDfy8vMwSuUtM^jX*2*^r{f zJ2jj-C;Y&G)fjdTw#WQu3P)1IGA~dwg-XDh$FKC?503d4`tI9b3J28R2J}e!sH^`nFxIDow+a z6GO4Xi*O%Ox1H4vc%da%AG^GtThM!w4*>>mw;JQ-nqk=XxI7<;@+jGB5V=kz*u_4d zm1X1?M`6F;2c57J$}ftR03-4P#_t)p^n?1HQGMf(TW4H%=KHXwcd*1Aex-`-URZcy z#>A`sL*X{_9rT+2fZ(PVM?Cx-js3sB^Ed*9qyAD<|6`sO?squX<3a*Xd@xREbqEh0 zCxs7}NeRV$%FE5mdoWIW{$=Pm7_a-A=SJaW&O9T2`JSb%Nmq*~>Z}%b)-U3mx4^*q zcvDb`i^V2->(UtV-Nc=C-e5TyX&w}Bdy3f6$62xq{=D!!OYoO?SEE8EpV#+$9wRIf zpx^fwcO#qsgJ-O{@pPuTBtG;4qu;VU=12MqcjgHm@_CtmtR7n>GLKJ;a81{M(jQ$E z!flBmB~W8BR-bD!s2^Vl$uyP6e)3BXK4pM88DFWi zxI3PRuRV6WJ~HUVH^yXL`b1;`xo=$?Q4^Q(cV0X=R`PGCs2N9mX66SKp&R4g`_WJ{ z`YOcWCzW;gZeoYUUeQBD^VnuxwpSy`iOKWB1YW+kT+^E-CgX~|(ks@bUi8Ynh1jIY za$Kd~yrKHz>b-h>(G?2Np}0nk*h8G+Yu#HPJmdPfPR)wd{`GrB&EYxnKDoi4cvRuc z5;yKGCXE#|`!X3S1eSn1Lau^@{bu!pTTNGA+#<-8G|{M3L3KV@^|0ZEi!|?60Ud;$x=qMz zOs4tv>U852&SO)!RM=EplxYcir<&bAeRa%J)%3@Q0?qe`hLI^OFqRE%X}}hD8INTt zsjtyT0|$#xYEx;d6q2m2&TeIk|_HMTwbqM=#{=Z4c*qwv}U%hzN~ z7Q<*ayUjw(l86Z4?z+~Om^HD|r6XPG(YQ)qnTj1e?^E4;x8`LZtM`hbg0Uw-DAL9? z8pts2v=C5DR5@`(<>8KPSNo-0BAF&AIz5#LtTC|6cUiabgt|wMG3%K6GwGkvU{?2e zsuxbDU2^n$7p$Hh1Fl`19x>@15-N=)2-~n{dnm&{jvCI=yx5560jlF}TxE$iRqK=X zG$xHxK64*%L2Z6KBtX23P#SZ8BgEfaU4iGh93GE{UA-p5={zDh1cM;%oM#TH5|2~% z(4!j3ykGq)9;?E5k$3p!*y`>ZP?~P5PD%O=fWdZwDel8X60*Z>u}?|Lv$Hy7HYJ0L z+~p~&LGbFuMJLMQ?s46A^i*9Xq)zM=`jdE)^~FAq1)UxKA>JiCb9hD`P_OAjofpPI zVT^bL)uyq(n``v2ksB%L4&xOSA&KjWSG^OetT_)&qqBrV28WpD9!Fehc;$v5|kW$N|wMy3Oi z0=B^r{#xg);{*MbndHIgSsoiocZK*! z$YCLy%+-)wye(!DL&b#O$%A4sfL-7Tb`AV3f5+g5%AaXAQ{u`sYBu)It_Ne7v`m+fwTqKNzWjvBB}d_|esCd5`{N;Av}#0bx8V8VG$j8JFpUO9ch?<lD1eqkni&-dYY#(=r%*+PtR$mwAY$g7_%j>qlKiaSXsVy>(_A9p;f^D%WiWlFJ^VB<5^^=>3CJJYx@V$mcHhVfbRjrIwBwyY>1A!i-z7Vf<$3jN-%Rn zDz{kv;kdEnO7D&nF1@=re)Cv5sgbQ*7^hqhs@5K-HB6$4|21A$0k;Et@{H$n-5^|L zSDZC2|7}5>Qp<9;MGab1d!_4mq9W_tU? zqbcIlQ*lv!RW>3m`_%P%Qo8$@QO0?Q@gT5OJ65YaKEE)S)KifF?Mp9LMzQUWuLKm^ zJ#?5JDtYTN<2-(sTEL|%78O6Y>yY$>-y;eT6#0j3xJe`MyGgPyrUH`W5>`WJy`t9apL*mSoA@T(wUZ zWE;UeNNt$$(U9A^eg=w+94>1s)-X#QZWSbKy4vD4_p@*S zK{9s71l7@KGq71F*Weu=D-7 zbHHhVnXS>>)bvK3W@GA9d(^9Xe}*kwVyw&Z<+LvoU=-=hdV`nb!k+RXZ4~T${=n}w zsStNT3^ymDStCdZOF3(;>hO0ca#!vvT);v?v}U@QrAP^4)jly?X$d<#QQcYh2}nyu z8&1{YYEKL2bRxYi*Lc{Rh=Ew^1)lj2(XJuk?$92c{yoVR(Cq=#W;6;sO57ZylKxol znQFwmbvXL{mCLmu22_#k+*r&G8Ax*Cm?jSEvz;&-xsao7$QznnFs_9z%O<~@P5O)J zYlc{!G(2o$(=PD=^@a8mVZlQ}z6p2Z*sKCXpA7b4H(Fle9mXRb2`{w4*rJIbgl#

RggHZb zsgHWp2k0&hzcA3T*JxyvnSE}P@jqj~Ad&~f3fKet&dJy@?EoEn4-$Wd>r=3cx3d5B+I!u+~uX$=Tt~NvhJdE+`yym&8bT{L9T>IR))DL!NW`^rjk)6XtjK%f( zY#D1`pV#0G?u0IQvNMJojSaq?H$7KK8fH1{>6C!S%~kvmVZ<$Nij9Hpr;2fCMM^yp z(_EJW-W$_}P=&NeF+9n*}|mdo|>4W<`^@y#azQ} z!hJDrb(hO}Q_6aq=j6i962!x_M4!tX@aRPvf7f$`O_=eUsHsjt>Zf|~sjYiXOp7!47O&8#SN&ROq|ikj0tXt) zs+CIs@*;;n5e@Eq$r#ehMt{_Gu}7OU39ZfdkP+DJqR={}uwhiiAc z%Gvk-Ozj;WVm%vkXBE(?)RV<7Pc#mm9J<~9T9{mM?C}?T9#&9$wa*s(I`(-9GHP_n z@AkW1>u}m59dKtWIO7NXyAyl&L!MhE9S*!3#^$i#@cL^T#V{ydsiKyOgd_2)K9l>% zJN7lVXQPyi;1hAg>rt~PjtYfEGSoR{K=tV^fP;w&xn?|bbs?TxaZX51;+OoUHvp2e z4e^$cBF$#T7&@=X{lzci?dq0cQsROUapWpzY{`Q<&$OYYW^<`E&Rx<@7r z?etVEMDK?t@I#-eiVZBl*UihYbjNq$I^HJeMaop z>x!oc0fmMUx?Z-wkhsd!^H9+JBByQS1&Ay5ixNWE;Uuo~7ZG70wO8#I86L?zJ>-tu zU+(sz>rFEjX8Ib}$5M}EjMwhhy-Lu*b%+S!I(NF9X}^BIUuhX(FU4Iq82E4(aT?KN z?$H|GJ`9j<w0(NoJ_tD)nk{Tz1(jbLvgUh9};ch?V2Pu zTM3<$2mKCD3-(zUZt}!5B8^=f)p`18D7NqO1-~+F_54`i-#rL;@6vCst-+}AAo0P5 zC?F%K)9fHOihBL5;K{qhcobE9T5kU(mI)6|wc>p>>@S3Qq&*t77DNMK;wGZWU5}G% ztJ%n+{ahEVUb72-LRxz|RvHx^4d_Do!z#@dyD+tC_vx#pwRtJb+G+!xI=Md92$PbJ z1I2Z$6(EG!qY`trUm7I*YqH&)MuY4c1JNxsuz@Ug6Fq{Adi3R3?|RI}K=d05v3fF| z-ntm@N)ULnXuF20YxrMW(6CW?P4f~NHmWNTlaE%1e4GkLW89xDu*GcEO`7m@{+jfe zOMNw=--$!;fO|unhB+gaL0%mKVQk)i?*D>~tEAi<<||`IFroQ>w$Q^K1H#x1@rW9Y zK~A=~{fc$*sD9Rrl4w~!Gr93rBbVYYj^j3C#2X%ULymvD28O<7Ja+6avVqjG6YV@D z2k0`r+gs3>_QftwLjz`WyH$b+g^eA1RMOT>){MRS-JRV2KK~tPvET6jh7XDZDkpk4 z2||QyMv%P6aqhmO5#u0V% z;Da+)$5Bsab6Nx(^8^vIHE*xu?lY}OGMv*1wOg{Hr8Q2fQx-Wd@KeHOgq!2E0bY2C zrFvZelQ&C|#F_mv$3QUCmIoTeX1UzXs`+4hdz|wWx6&o4BvnxP$9eIVesa#@yhoJp zv3xCH3Z%kW|FAkP7(j+lM{B9sNb5Q&l-}EaPS1LzVS0wZQEe^Hk@wwFDf>RC?hZdv zp`{vrPbPZ(NTb9(kG1&NL&miY3tH8xG!v2Fq9Nk8#dZIwt3jnk<10?U~({?Dny0nt{*h(Sm*J|08dVz1;@uE&5`)EFXPkNQ_{O2Vc zmhbf&$ribm^_$JVK7RCNh=L4GEn}U>PbzLoPtABarQD7fRK+#V>t$55U}+LTVo7y` zJ6C^P_q>R5iF0-vjdw@+oTbN=O$Xv~NUYrI; z7EhzhO*YpUEh60Dt^PfN>%eUuAZ8t%al1d7AZc-jKDcv`wBISji{dF=z2~X>VqS>> z83I`Mm_`GG{qu#_=;L*>M1OBv zPjHD?roLfMlnA-O3;5@hAx5LV(shr4Hwi{3+Qnx5>>Aq`t@;Uxjg_jNVk8dmD&a^# z$+oypC1`;&s_klhVceDP=QSRFdLO65>00C6JOMpx|9R+`W2EG!)5Tn9>!MqI5WE26 z*y?!G9&Wwn5dSCUC4(>YyR;Vn>=^KNs7JI*28$uDSvTu9tofTaD@OHK!qEMDjH`8h z7q92oH?Rq<%4jEVrqdDxlqeST>uAL_w%M&77kS)uw zQ&5+wTl?cR-!$w^nA?K>5&sF<{_unn{v9>clEX2MsRF(hX&#hsxHzr~QwLGwk`P1) zLCyQ(q_Z6u7zX%`F)lC1o7&jD`-{0`%cK6?i+w1C#0)yj?3OvE`sAcG}o zK-{Q(EER9@mXU12avoWcU5t6qA%X_bft4q`trPQ zh-4}y9PbC7JWN>gB}Vy&0*u57?a@cZh7c$zFZoz);gS>5Cu*B){?KR%y?N2YUz83t zpSq)QqQK#3X%v^CZD1V-;&TlenZ>F2LRFb}_}TcfI+|)c@vl@|d@lbZzSh@LObEd6 z8)3**(w+>+x5gk3dVFW>VFdo?J5R*-M(EzI;S37vdEpgR-k?8vCZGbSQ!?(^Pr{JE zq4<;>I9EI<(n#&H1ELp>g47n59}wx{)*Fp04xB6YJgiJNew$XPU+MPkto&663bl10 zj=1_jp{dk|(PDpJb3k+wSw(KdQSOv$50v}*7r*#LTz9~N+myh@^+p9Rd)5z@`vw6^ z(ObGT#*MCk8YM$I;fg6-TNlW-H#H|+vVF^}T%?77Q$!M}Axjmj#-hs~!~o%>+` zkg0lqVK&9}yhJ#`hRXSDnM>2>gwW^4IO7|o8DgVL$)*ORrs{aQh>d{MquHgng?9}` zt4q5wA_9xPvRb>Rmze*VsCG*y#zaigF;D8EPSo|s4>$+J{_<-2FRo=I(LC@@=sK&4bTi5< zZBM%G!c+pE@`R;sK27vbd#A7HNEyWI9)#Kci8$lp4b01AoYh|9ke6^J=RCs=Z5{EZ z;Fh-)Ly5Nzhy}tkkQ+O+O&?SU%*50&CFs;Dm?lRE^CGBiyw_~?TULETjM8vB~4-@#S4XG z;RS`mChCgIUMK``EOWQTJ zPo-;I!kZrP^|dbPYa5K~^c%h%iK=Q5S)U9@af91%79b2az91Scmuk#&Z}K1Vq+zl= z7&rS5dG8OW6YVYTrjDzy@*O_sT^a zGp_3;LXBWK#2rvIVis}g8(yeBd^4CwN|PErfs2uHG#Q}_PZoK6vp<`2LGcH1Tcm>A zCi$jbskZ1D3B%)+L0yq?_lb$_)u|26JWGH`) z%`Q!uB78$U?56cFt?`H#Zh3FMOSfp{D}R4~JnDkHpf|*0?s_>2yR9mLyzokjZN``w zR7p%wI=Ufd6%gAUp7+i!xY)tisY2`_VCiF*y6f%gh}~|5dWG)x7$|b6bU^Ejy&eNj zfRia&+$Vgv%m-q>%Q%$f!Er!kyr2jx;IU4<%;*m312HzieGaSdVxQ{1M@A=zmHDc{ zr2%p3nO;+?;SwMHh=zkgA~pVTw7SXirPUe7+#3!iggo~cn%W3gz{JN1l_I`KV(8MM zP0!7I%G*1GL~z=e$kUh5rTBH%Z%N)DRnlkt2Php2!JU3q1L{XBcg{n|2vrIDP2<&$ z(ly@lXmRF-4?FJ_X9?bRErQ=c2@M`saacZJysL_%(Af7}aeW(3OYiI2SWj|D1I8_a zhwO*Kp&_+tdBcCC?}S%Ol0S9_SSW)CSLr97%N8~qb9qtokv1De&F~-bsk((1X851E zHfoM!ls^|1bMJ~TjNH9br-(4%ZQBjNuiPU2-npZ`cG2RgzpGY0N$2Ss5rG+|nP5)<8nW^wN@T8mJg#GUG*pO-q5 zFi*9bxw7VaTxqSgpgQF|ym?jKbw?V3|5=Ua_Jz8q^8}#0h{lx@$g9LkODui72S#Pxd%R2nrwxbE{UahZC9x za4>tW~ z$_3eZ(ELqDs$LKQaoD;&hc#0#M~~WbpQd|9tk+Bru*&_$hi6S16AY*k-f0L_4PX@W z)?wq>ou-=r9~E|JWDrJ;d#Y=S7-*A#_aNq&)X&Py`{Mx(taTzDa*uhX@c7z%P|qQb zXz{Zs@f#mjLy{ZEBl@|1WGJ=>p&Al#$D>|BqG620W5x!0I1*d+(UNx2u}ye4mzOpt zbGuRH83HNX;Stty_3rfVwZH#;?DEPk$1c31cZlc2Znf*gxs{7+kL$K$bF$ZCtXmlS z)Y)=^DJ@ME=zdp2U=auWJ2MR*H15T{^pF~{#fm>o92UG;b7saXs)rpiYxb%>QoaE} z6tspFwRgu6&nK*0sxptNh%6Xn|1fTgTjRL=*vTV2*IMTdUQ{lRK?#+&}Ys03dP&qOWS zp7Sc;bw#bW^|7ICJ!0DnZjxCo<6W;Bx?lwT@2QAT8dHCb_g#c-0O7YjP*KLf8Hf)L z7NusYs#BF{(?ep<`BcKJ6kB|r-yi!xQAfGbKXLaxJSXF#=ArkE=i^g#$74HvmOs-M zUKnYl__+qw(@qBYFZ7%IkjO&sEG3=N+mg$AAdQrDdd5Wb07ger)@m!&# zwOqm1sE zyf1IAqDT0n5^m%c_uatGs<;p?E}f;YvGD}7jOlKk?r-!zg}Is`dpg$+d|jdr+sJjW%*b3iBMnvTk})KFQZxz-$QMT&$ ztTn_UTv=U%32d23nH8(N1yEC_#pr#i+na{Xt6dj*hJcBUw5D1&Jy8EO)*AH<#q+M+ zJuXI|*Qr@0M=C6Gx2L$7d5a#m$uNvqq`oM&ga?hhN}^vA@Jp0-18T=h5gv1t!kErWxNwTYjwDv zZP#z2QuW0S{bnx_a7WFp#@gjY$YA$S)7=_wQA1+I5_?`O>~BA*BCAt9Fn|(bpPJR8 zFG|7UenVmAdx(B~z(XOB4GAL!5=$E92t~-Dzg_mtao8wl-osb?d+3e-H(phLNyUe| z<26qgXRdgwcE%CUb$vhD;G=5T)-gfiNF&Pi!mc~6-ZQTrwy*thLd}{=;?tAHGc_=A zN>kp4;NrCY4*h}1ye@bwTuK$rs8$y`x3iiX9>C{R1sas1v^PD9B}m%e@^2(tabCac zn~4qow&yuRS>&VPf{{k(!f@X8*S#c}=!y4Kw~{O;>b!X0y}=&S+XtQp3}AcWLpQ9Q zM4S^Jd8TW4i$UZ*u1>tHK0XmTQ^z>A7Y&5B>`;8Fk8&Zh?`Y^ z>KO8}Tl5=QJ3jx1&i(up%`mv9FIkC9Q?>Cy*sbXvVbSE{2of`@f5;a!k(FYm`WnLr za`vHelR};#4(aqAa}J5{L37fiGGk-URiz3ZSY0?5AuqU9gN4!02itAxy^e*8+pBfa z0%6p^HFSq2FTH5T<4zZ2t;EA?gmXGiGg5;o8NqvEes$()$^l()sPH;nsSqCN;>oB9 z``z!OMx9n#2D_uqziWA@)T_IqAde+pln(amNG#L1e+NBlP{G`}zvjPa6jp3ktkjBy zUy~q`0R^?8ek0eNh*teh_lsD0NIX+&e}=j5lZ{xVF51@f9uy)t7-Pb@T6q7qCDsT6 z%6+tj8eIjA4ua>AX!p0JIMJ!eNYPT#gYN1=K9}z+;zIXSC!2WI@Pb@b>%E&{HPR9> z4-%#}UV+IB?Khy2ryw`!V-+OEvhcwtV%Rgr;b$9<3Ry0s-WYe)whrEgX@$SZOT~jZ z9k(al%i{D{d_ZLo0!G#z4+#!|Upr&7epeRb<@m5+rihN@G6c_2J3_AT1nP(_Dqmcr za_NlxsOE9;@_5YM!SPMRRxgDNJ#~L<(@1bk$|!-`UA?$d?C?kZ()xw5(;J>U2`;SY z6LzVWHNT489wXlECwbxS@h68IL$%mzI1AVF-rlGFlyM^Vdmfnqp1TKJu{NCw54wmj zSM2{o+A*X{iNmhdP8`s9rMf3Ck?2=doOw;rG8zarr91(SsHpg=#ZedGMuEKHn5TOk zDuLtbs}(D!6CTNOQi{XhpLEx)>5*_svrd|Cn?Rlx3|!culKKr-_^UYM{n~;$B9Ho6 zFDFyk&O-sS8lh6&Y>&sA?i2A>I<&v#KGTdW&ikW%49l^%)d#FJ?iwy27bEmi1#tFw zIR0D!sZ>FwA7SeU>U4|@%QX4Wvq6Gx=KdqiD(9!jaNc<&K2|w~hN+SL#I+GT@x;5R ztpS6<35Uc%gg$jgsjdIa7!crOBtG}xFd-wyXnf&%SVYB_myqj-ue1c6?St{PnzyAt zQhei%@l|G$zg5v7dhCJtt~$$Xx=gJ2y>~{E08HyRv_H5p*&KMJ{^(mFSTYtHypFBO8zLoaWMt6wUl6!xy%Vb>T~`ix$yic<$+EeQdjl7$19L|*?=A-F@u zbM*#&Oy&~BjV~37y8^B3P1SzW?HtN=v%eM14`UkjTv6!1sJv7d1IV15>SLPzK*|qD zQ6fIw4Y>Ovw6{?xLrRNH8;l{!XN4KKc^Lb+^NPRU8t+)88vX}=BAi$Ozgdo z7ItZ>yg;4z6=RvW%XlH>Mceq2$U3{L6E-QejCEd$dyr$*8|{WRblS1RB_mMaW!@4g zUT3!`lyHNmx~6M%BY9_{Q4PAG4AJCiyR`e7U(#z0Su2hdInk}gY&p7#Sm|PTkdlu8 z%Nyfb*qf5OBknVPb&G25!H>i3d&YRltnt*4W`YD4vDUcv6N9<^rNUm-Vt}K+ZcI^1 zcXdQ03d)IQ5syVr6^nd8up7E!y*t6ra5DP!L6Q~js)0)zhX}n8su=QgU=Lpm_=bg2 zvGC{kjjD6p7bjv|fbbzf`M=5Soa8zt4Y&k?=wQ_zc*!;)at=8G`&Rs1;vsJUOgc); z{Vs?<_w8oSMVh=Z&kuXnh{}f4>5-St6|=Z%rkTt@KN4F!UoBWPKI$R25;r^^)6Q(e zlX9!z6mKWu2k0v;yBPa+wdu({7duqM?U3*Ooq|wl?u}hS43@Cm0=s>Uq@9`mghPiA z;r-nodyV)|l8V@;X(5Fl6gtx&>{l`DXSz5as7`5y?KtQuUBPBbIn$y4b+Kj9d|2%> zz#$`eMYyGYgN)(vs_;N9I2o@Q8U!s30OE)zt6^zl991){W{OPgm^V0ezT}96@R6G=gww)-AwKquHsSE`iOW_lY_46h zusJTetP|UfPdxzA7N_z4XC46TRX4`a81M+O!Huagi7#B7p6onqy5dV=#{Gn{`YWT! zjWH2lS1+6Wg&Oikif`Q4;^DMqi*E(GvV_%u8hcX{z4Uwck^&W;MWowOS$o&U_|bi| zldvj&@<8r|8HmG$I#w1UIUJ134vV(V#K-9J!-Z0&LtVZ>uW%859dYGhlkB_)*u$3| z)+gdBS3{W^S09%2bz^F4uki-~Msf3StrlKL*ButUZXL|qY&fnrMhbjXD&hF^i{;L$kU=(Tk96JWsT zkyl7p`9;dV*JhXAYhd6=AU0}f@(AtsEE94X)|%xVap&f&QD%xBx-~! zF+hoiuC=VJ>o50Ry+4T#n4xHvc(@z5bCS98kt#r*|;J{t9$^#+g64tEMQKiAmYNOUPR_rv_9F{D9^{iN{ zY7-k$E!?hZc0Pzsx9a7|0j=!zWZVnUA1zE^kB~t~G!g5K%u-m7HYT!PfHS(&U<_y> zR<^{@VKJX+%7oGfc?mq?u!dOJt^bHociV}G5msYdfHIy-S8VcR5D5n-oJoC6{fYTT zUh5Ip;FK5-8J$INjq7+JZFV7dFX<^=cyCuf8wZ`Gd#q#1L1&AmXB|84QSCTfXf~9* z#Sr?({HNk6q__9h>JoJhvd_2aE5n&HYugV?R95&L`%S}QrJ3XI*x`npC6u;14@;`W zeU}*+M`D-PY3&2C+bCx2Pj0V0hyPEyvLmtABSR}eCTl|TsDom3*%|v?%0C!Ky@!oz@|`?fjVj*PYzAHcuXL$rL5=fya3_hLr?< zRSELL|DUV3fz7&3_q?wdV~lZ);W5Pc7;`U&t-|O{$U6=uR&@NWGnqdaKPr2~8XL0dAQd@-06HuQYt1YH&IEQHJ zYrqJf)=p4*ogInKe%isIq1gj;^11)Ou^EBh7>qAGZaYkM3fsTbN)Xa@A*$@|$1fG>z?FuR zJHksqtaR7lG87|WLch93Ue7d~yvE;{^bTeKrOJVVzb40Hp2pq5Tw=cGOWMYsxYi$d zuJ*?QK~#bLgTcFAy|oW@j=*am^fl~TXa6l0x!H~?=Jq$JV&2AlrQ$|a#IbQAZc>+d ztGRwRACP2#2zA9wGl9HCHTs82;G)F`gj#;~%UGgr;QV1)J>FCj0?XXX3MJSWOjGQ7 zRhWG|Es`4ak)C=xqR~yq@D+(B^$F#TnXLA#Wb@=iHhX$0T4?b?q@s2zS`UaVKL2pR zFeyxF^Gf5B$CdAo4iD0t_;{_BeGDemvmb=3Bhm8#Ginbcfs;mJ1Rx+1b8OIrU{&M_ zol=&K>f|>IW0Qw(sFFZYA(^@aYYEWVqZJ$G_VnuKD(I{}PY%{0N%Ae8&~FwkTJ+l( zJRl@ma7t2(d7+2A@+)tP5zl7!-(b*2-E^YJJI38FT6Fl>nDjcNFnabmm{qr2lkek{ zTVg&8lHBQ*$a>SC=Pnl(cih;eLJ(-$R>W?1%P2yyu1qpjn%+~}k6gbzax-zSDlG3r zGmm|c4t1w^GCM&yi7 zQhQ6B^(Yx1FwVI}FY_9T*A7V1kebE6a;UGXpRr-2N^jIo0WThSD&9OGWc5wG70AGtq88-djX8jX)u0p zvu&eO)mQ%|el#3Nz=$?*P|Qn)j>n(^s^Wr!!j$A2{jYK1L7}fqUG+j-bgL7|nA9h-AQ<{T83Ze{(`nE47r zQzC@5WR1CMTMub*rAFtR;5n&=T>KP6PRG@%b{o9-HHL+dG@R;jg7Uai#P;xi#casK zDiB$B%vaBqHlq7p>)PefFAK83r5$X@^@bmXI0m_~P|K70QVJJo(swkiXj>6Cczw|9 z491PBS6MxgZn8HC*eTR%Hy;$%Arm1sOY9!$xK!GXNO-Z~Oegw0YD;R1(GyGcv+=gJ zSfPOh?9AIUS5m&i`S29u#&d2NSxutZSQq>g=9~4*kURKh}B-f;y!Z0Txus|HNZj?4K?ES?p5}83Pe& zdMuvsfygjenxZQ^(w|OFg$17_BJwHv25pbLho1=uc?=$kvUVZ z8_?yTqBqpzs*Y8+wZxkSwRiS7L21s{HVCaiYI5E-m~Gh5#5?|&-Qg6Z#e42! z4bfk>#`^}J{RbC(pn4e`JwDV5<%9W=Dh-rCx$&`wP0IktulU5Bb4TM-ciuj}0~GO@ zYD&xjY5KWJi-`lsQtgHZN08DQ+Vv^S{K_AhI3&In7!8#CR z6uAIOkj$im<9h?u2*dM3?FB9r%8qxUulFZj#3G=;^rsxK~7 z0URmvpo?6qi5zHg@n3}Ih#8n6Jumr7L32=53gvRC>Si!bq_e7)mw;MidJT9$s@S z0DdLv4kR-iH@GIDYgpVH|56ZO>u6?>N;w}Ewfa*9Ic#Q;%4p0tYt*C&K#+g!oHrJK zOLhL%AGt$I^c#ylvdPbtp=4hZI~<>(dOoGYh3)anCeh-GDW7M zcdb`44Sr`=S8Vc}R|<1DBkEccU#OfC&xwurXry zMpc%Y^c^w&7d_8t%}VUBne zsv?p>=jGY$1x^?HLClNpR@Ib6?}&R0W|Dbvua}MIc6TwAxX*v6N4Ue$BTkP_b0%!r z>y{~kiTe#$6SBMqJRYnbqTQZ&&=cgfli?gx|3hj_CSKUThdo<_zKIt-<|xV63d!)O z`&%=6GEME{vD#6@9dMtU))$KU2{&c$m^k3>*UAlz#*-Sb7gwszcuGA$qY$FNZMcIN zVDJDO(iDe~dmXMV4t6FnZ7>>EG}hhT+!0571xJvla&Sj*_d4n-&1h-Un&`NWGNyny zUKY98i~w4(Hcn_5;>X8H57RRR0yyQ56bHv?RjRBSjx+kX9J2YWCSBCqGjYyKum&Ms zcf3|xoZ0>Hx_?g*>i-RQ+deov8E>k|1j+}V=<`0D8D0by>}}7hSduD=+B;sOV(yow zbngn7RV@wip317*YdLG)cja3%hZVaJAE^HxtO_&4*N27|krJ!@kzj<_CPume8A~}S zLDGMkW`-5})L_}*BV`?*x&CtAOwPsU9vJa7QbKOg7kcmsk+eeI9T~gmOYaG`A>+fT z4~wr1(3VaDqQ%$R!xBuR1nBXe!a5Y&l5bs`e2zFXeCK5yOtta%1^{bpyv=ZK`N5@m zed0$$%ehP=v^aFG_>1H#e8C~1!mUZtkwfH=I6gpxOdXDk4i&Tp=btf)FLvct%t5&8 zm-urK{SZ&frRo;{$$_{`{S*d}G>~z*`l${T+n_mzEQF&-0yipVgt{vZ38AiNjJbz| z)*5NmZqev!!Wh*j;~c$WP_Z6L0snn5j%{=diO&dxl~kc(D=Xo&Cg<^$0vgk zFV7}@m3??Ay4gsfGwVKM?E{3 zCZ<8IjEVx0#9fC9%Ke*De=2sl!5X9taG|?B@@DLc?p6`TA1Ic4;vN^Zw>QPTnqNVf z+#QVe@AHBnf=M5xlostVr>uG4ktkol0-S^Gw}0Y#i3fmBpXs9df0<>5+Vq5ylH`XTU!IcvX8j( zas&YBL=Z>4HR*$gG*$qiCWn($Hw0EFXXAunNO>K?24Fbpzi`w_fMl3S^{x?3B z{MjGkj0Q^g3U0+&FC?xyC@M8-nkvR?`c3LN)RgKjljM$#BnR_`fgRhHb~SGbKP+y;1+-E()RHT%}wwgmtyz8!T40A>`l2k@tFoel83*F2FZM3WATLwFn9z_ zdYi^-O0Ta}a~K)iw)onxO?96AQ+(r>o7F=wQ;cm=Z}Y8t$RmUIN>6+zIO;G`!%=-l ze1Ay%Sa(h4n7&RwctMJehoG+>2n<6MpQnX#@=@fFo{9^e7Tz33_DdFo3!g5Cs}3RQ zMNgkA*mW~TrWdOgv?wEy;`!-1Wsdd}%E3e5Zzo<9hw>Cuu<} zG-ybNhGUUC_=kzv16@`8QdG$0Ve@ZPtJ(cvrJMAds69BO=yy6S#Vz`cfEbR_JtHw5 ziY5Ml3h$4lo?`kebKaL}RL01#U)a{FZpJ;ss#ZO&&cIuZYR7#fSRH|UydDk3CTO)s zAXnZ{G`k4H7DRL{wPQgju=CF&glYBKl}zgd(BiDMsd+{Su0}_#0TIx7=m?8$i1A-f z1J!kIk76%{n(4r~Ui$>S7#jpKo#U&ixnWBp7n4n@Hrzw#S5>R5=g9TcHuI=luo$ve z#js?Z(dT_>LHwL-*cSI%^3pJ$L3gz^9a6k{JVs+N^0YYZm+-_NAUw9Bhs$BiMPkH4DJ zMJeufPaOE|OhClbd9C|%!96eYg#xzB!Jc?owPk3R*r22CJS9{4upC!6WD<-S6=gQ} zvb`s2pgs;cmuBgwFx@-(bRkGv$nWgcBPQh5IBgL6W)H*}6^vH9x~g$j{SPA}Ij6rP z597Ui&8@q3@IbuokMsWr2;cC>h6QMq-&FI;rp0kye@~fQHQv_W@!^kmTz_S%INz=H zbOe`cxu!(H& zR55g11^7@>I(uFv{ph!v38Zlm4t(c{wN#7T=6f{-3?3zDj~@iZs+CQIx6^MDa^k~z zxZnlyD|YWlBjF1U3->iwhvP#1hK-zziw+mWSJ?~^OGpE?!goH$uEr&Y3oflgcbqm# zm+BKSXjex0y-fYAOrgZ(hl|M^Ma5@^TQ1GkoWlkEc2`Nlby%3XXP98MsS25^_UOe3 zb$3`y<*lOx2ffNbb@mW;H?9^?l`+ij((>k-!(ww#FsB_UU{=)8>`4Hb=Z@H=!I-a( zu+_>mLf0AqB$-FBD%u?j1YG3~%z4yof~=7jWf?phPNBRekxyX_OQ5W>_CHo9M;{Z$r<4dYF{PB))MzWHPpF<0H#YdS7a21gKu`bcLF@8*6MEe z)<=Z8xX1f5-V^tFE;~2{)sh4CKG!^dKh8LNYRx*whGMVwos3S{0Oredzs4Az#7S%R z>3Bf%Ch}BgJgDDyRFURBs^IzMb6R}|x!M?8hC1V&Dw%cCCdw&l&s<1rU@vd)`1 zqx(FgF+?pK|0gtwUZleN)hR}uX&L&Yhg-L9T|DJ&O((;mhj{s|e~N>K6Vw@p+$mO$ z8O-yrmxC9W7eB2;r&al_2RP?PeDLe5&<^lFan!AOj#J%y+~5~&&Wt~XVGZJhPSSW+ zoYbrudHFCdLME$foI4iuv?jt8$7?9gc&!St8)rQ@k}d>roau8~fZyT%`I^V-$b^%x z>myAH&`|1wy8_;S(;bc#N&b0#5rZSGwBGhT=X%Kb3+7TluNjVaePG&$;yuC7Bbpgj zFoN}J=g1$pe=Nj!=0DWXcq}uskJLj+SLqi0J>=O^ZzZ-6$2LB74{ard)Mx%`uU4_@ z|J)0}?Hn3IGZ0@GGlXz_t#MIe^u;$W8J|6l*5+G% zL1bny(eHfjGDKS@2l-yhKm--S)BE`j*J|mX2osPKSmpvn%t(^s# z{a2hgpDDN*CpipzTs02?EXoxc3#^n1=ea6`A1Jr@N)41TT!~(p!Mv|h&zT~W?8Kzp z((DQ~7}q=_gdlp6OLZbD>i&+jTmW?nj^@Ns^YxpR*b&$2cclT;yuk3mu=9k*_0JUa zK5x1`4`_cZ@*3ez%Zqq}S0rhQ(LAJaqnDtE4T0K-o7B(G=;vnjlPXP;Y#C-u zuQ@z~{w39xs{VHVqFAC{QY*!z{v1nPThTuX%3G%1u&2PLwO$kH$Adi@^m|)guSQRb zmosgmnhbo}#ADc_9tiA(2%%Yn*RSe`7S|e@NcyVPU7*X$SblA)x4f<{I@Eslsg$Oy zt!;gGA0to2I@POCZ@v0U6H6w(!M%fgpvyM8cc_kht~ROF7*6P2D#2R0Sa0{Z|N0D^ zkKVM`TUR&Q9ewWd4lJgJVvCM;#h0?gg^EE{0Uf3)WymuJt)i`brl8dNU&p9nZeJDS zwT1gvn4n!T>Cx`IZc%Kn-6hZi`%Z8J58qZDf{obeBhgpVC??vxOQTg#cuAV0Vv4$&7hr@9y;iAnePupmCo;8Yeo?9`D4#wCdlhe()nn z3#9wqtVapS;{nZabG0uXRK+xk;O;)8{?7j^uhhe;)Q!vRBVLFO9t!mPj~c8tXqc_I zf5c-hN7a}&ZJ(j~HRRZLfOugX8__2bsj7RDQ~+ykYhiXBwxIv zd16;YVjdW~cMU<&OuSd?ucV2CLwMh%6PtMlKd6;H%j0ec5?%AV&L^Yf)4qq605@6!`U!MFgbqlu; zF1EI>sB6ipr7tXn(`n!vca=7V7*s*5e5-M}$)v^k&fs;9?I2}beD6#7sNV(GUl((yg;Et0EUq*tb-#|Q zG-4M>l7I#pgGEVJ>l)P>9-WAa+hg9Awixp~;+pQ2G2g#ikhfo}-#jlb#{y3!nf$ok z?K9Q_h3jXC=?J2@UVs3I}I%kjI>osj1Rm7~hqZK{5=7o>j0N}Qu_nJ$8% zN>T!ltdgC@Vojl++>lFDotdS~WT_i>4?}gvGK1G%_^+Q$RWJy`>~|w)^zwU zoauCQ;xB6rLdp?&iPyOTe#v&#UO!!M4I0)BE`zvDjSm2CoVLebCJIjHpp4-`%3mbs z55=Zw5pd-XCwXkTep(}Ep|=6x9SBBWww z$bb{7EmQoAcvVNbW7M4?m*KXJdz|&jYD{_xC9YvoK(SrL$(F+{s$~Y@2Ev{0r=dis zxl5aaF&S2zD0p_c-_hYIyg>ZXm=;NQt6kAY5s<(|Y5fuRx|8a5_+u9EJ`d5gBcqS) z(N{6FoQb`jAgZeb^nNu(_5e8mx%hx4JdBm(gSAa7qLpOCAJRgR|6?c~_84T3W1SwE z7K={akc>T_4;dVzs#h8iRXnDC2WKCTeO}X_{Vtv`;Dpf0gnzN$pem&OlLoa8odH+r zDRoEC9HJ2zwoLm)=zWgZA$OJ1yEG3xtj1tx&LkO|r%f6EDdi|BY>6X+V-4n^N8L42 zzc}s#MuM9R!r2`&s>gSn(9g4f?1_^)z(ZT(l(%AKdQ6`-%t=u3OrCLPdBb5?M&hg| zU$-sJd3nZ|&ujj3-$cA_NV%`f4O1PAcxs!Cok+Z?W(%(S<)%2V^@0#g=ICubC(Yc_ zLW6+jdC%W*W0>qe$GiSmH3pu4PgD5K#&}=D50T>G1J@uAcTaq%*`V|+HufKRaxKDC zAG>O6z8;_WJPe@87>iFm2}r*@59ze@nQLe=|O>g<31*aCJ2f=7~gslFuUJ**;iJ(X7}@^#P=S&E8T>D@CWAdq-Az5 z7-u7HMXLI&xW&GV2Xn!*!g)~8&_5UID}#a&==7pzC4L=p?(2)6l_3l7*kAK$jm0JI zZ!`8Xap|)npOcx z)}gz)R_&9ff{0k)-nPI455@KFO(=j1xA56>MH5T*?pS1S5%wmJc7uT8O{U?E?gmd( zD5RTIb6d5mWK+G_4b$nB(|d~tA%|v$Y{FDW)2(cQdB*cGmS_r@4tc454qyhf%%HUM zOw@b0ZNpnINosKaP!6dJZS)pGizhK#^Q~(Vysz$_c=cqg_97J9^k^0U4Mdb0B++)X zXw-fpHMXi%1?mjrRzsfJqSP8jhv8`HA3#I3)(~crg>|(#f#vhjUGJtnsc0KRgt5UL zH?O*NMQrp(V`8{XKdoi@mvyBOSfi!%&XboZTDlartZ(^kPkB!$r! zRK43!h7IZG1oFcXFF_iR{W(TGD42(nIqsQk`uWde(j~C8^tD}Ii4ysm^2#?=v6v(h z-m`_K7^-edwMR-P?$Tcfh?qGaQ-83_O>V_R69#Ly0k<-O$UQDElwg0{HT?BtaJ^rDZmw>Q2fWOLAwz!Zd955xqo{{eYZ%q* z!}{67EB%PB%QyY`_GoDnV?(vCu zN+6OPvO5m?LBVUIrjk0O;v2`x zmRmQY38(lm*+Vx(Q;_t`NrSmM@!cse4a5c4>$Hc<^z~LFA{Zi& z-bB3itcVF7ooN*N|HKnC9j~kBV)*{Yctc+W+GFvi`nvAtBwhA`P$H9*Q}MP+@0|bh zc*n4UlgJ|C56qLt<2`|juo%}4k3Za#xKzDW8dkI~kIRlocsf2zBr(X@%hj^78mze^<{T-A1$GalsG3DO_>Vw4_ejB8 zpzc0s5)-0vrQ4?gSv9WGP^mVCc|0N{C_603m}^uQw~#JIuDJO8Kl5y-0rb2hLf{$N zls_SpjQN6x9CFpT*0UmlO@?JyP+J5-p2>sddIJw>84HgTtlfS`EYe?xC-E$}!Ch<~ zoqd8`y|Fgn{m^fJghW3QH`V_1I5?`jO*+|A zD=>hs7wJ`>k5@M@6=-QZB{7)oXuALDXE7)U7F@UKw+mv(u$})G&drF{3)765a*jr8 zpd9B9sSufoajkvt_()9FjvX>V7VVF*-K(*=xC%`9YZ9?WXYABC!+GfAF2g>8I(C;j zeDxW;B;8_!H+nekcK1By2sYWxd(^6b3aa5=pT9M{b8tNOsb$&*p*`K>L(w{#sU4?c zulpsFSMrjcR7x|TWU#=B2h>|v29qb*;Dfbkrv+9LmoxE@fk{<2LXMtz*jti>KpKkX z);=Ntaj(S4RbxWD50jMpF;94qv%IgiY&b_1Iph=mL*3}+q707xS}WAmNC%$OWD@@+ zrirHnYb`MQL2c{p%j1xDvMpEmum>C`L*}##>QjPx#2>Bc!*KM7m;q*$+1Aq=KNWJv zRp-^ayQg-`3TlKMfG>9pUL*?;UQAu-@J)wFwL zN1PRqki_G0P7OhMeFLcpdChByLOEU+JoQD6{)R`uegxSiR<0T!#aP-VpVxSVI!JG{ z3>p5mIzsf1I?}L`Y5;-9yFNor{p0Dl^PUQMrSVyd_g$FxpLu*xI}&OC|6(%d@u9aZ zF+*P6_((JBfZ9(f$(NoW-kOLRb{S7k70Py7HdafYJLX-V5E_|*a$9Zc{k@dVQ*qukzODzu&)pp z_O_ktakr-9E8|@G+__}!>i8$FdQJ$GuvLmpeYKiamS@I@xW-L&YKb>c8wzS@b761h zJy)<`OMT22h)8vzsjhX!TM58Tur#1t@La)7*YUJoF93SeNMNBCaXHtF(7&mhU!-FE z0nkf&s`xJsvuQUPT+Hk8^&=9y!ObWj)wtQk=q1Y&7Pknd3QGfSpD`(Q^BmdQ9b;)ElM)e&|2>)8SgV!qn22?r z<>o5t#`S7bnVQV>up8W_eFBDiM{HE*buG(R#3s)YHEyP==h$?qlvTi2#C`T;^x9tc z+05%fni5sx795E!UUBrYMJ^YE?v>|%WTKE0L(hpUCN>BH8;PDq{I_BznY+%h9IZ|7 zL9P$@RJ&E#^oN+Nm0*XywZT0(c(c5EC8>%TXjJ>)) zDKaRa->;>s_8?<_K<$RXFbHc^P#M8F_96GtGh0G#J?ygwg5~-a9n&Lfns&$4c+}&k z_jEi~yRh&wX==Pr{Wh(rYltVjHE2*BA$7l-5^cR2Pa1?e5X)1B8^Q+Z{joTxifKZ( zJr1d25~p$43)M@`g!BrHY4_3rEqp{18FWg?qbeFmy~=Tq+>n;ZGpd)iNW(o{al(Ti zdFx(=|6`o20S32v%DaoI3s&~DXWO1meTY`h=sOh-Z#@jyXLW|ET=8=n17#AryTN$P zYm81YO{HJ=5KU{^;|(tpsT>Hpkc;@Hx*4o)iSw%1!>bT)8zMV^W1-@ui|!!wAQT$>f;CRND_-^1>#4qd;32$CrXT?Vk03PhcJ7+;AlZ6 z8y3ZdM+>&N4U@o&j_StN^>Bex7N$FJv717Eqb80^{MjTP{?elb$5qzy0AHq}1_bzt z{w_Z%Miv)MSdckK^*Ukkz*b|u#dCLsz}U7u=DM44w*Jbak}ShB;YNA1;PTsQl=0R2 z=8_jiDXuX%UHEQ;%_|xcmwbe2G0#2WcK2M&KU%Q&T3i-PWnQa62YFv&fg3{A_Kn5$ zwK_(j$xDqvS*%vSC_wX`H47+0qPOT&{A&ImqSaGOJkvS*Y_zHQma~8C zjt&pUdo+$oNDA843LH=b%F%TOJ3|_G#(H%+j7`f1uP-XGBpEk)3}G%ZnW!DFdAj9x z8Q>uR?s1Q}_c8ZgF9c_m$OGyVzoiU96Lv|l7q56P`>zFSRQbhlQi(y%)2Q9&xB5p&+F3-mW>WB0^;f?#kw8_%2ljUp7bAxxs&SN(RfNABLPiUxP#uX!OVVi$iVd= zSvc$ymyb^|=O9~&yCPOsW?|}yBObe}+K0?Mj{1|d@|bMLaead1i?`K`cZ&0mVF?*8 zJ^Rclp0hvc?{RXuCdN4BDI>%>ZCI=TO>2`g>bw>L;jE_7Uy@s$^8?8Jhq_~7IHa!` zw&Ko<)vE^M?kd668=5&;r-$NA145LqbhnK2emBZ9oEg62Z4JO-bzf z7R!dbf?uc*Sr_)EU#k0Itd3dBue39-{_{Y5?QzLSz$^5PhalgtEl0jJ*zM^wfw8Pk zP`a$9l7uj^ZlSnR3Dgg&lCrE+A`iun8f^>GjW{NJHG2S>@`7XMiaL)t%W>f`@u66C z7P9}MV?x52+Jr~_VpqkcU308Z25G`s z5u^=h0AilHsHJ^~`L$W3iIB#Iui1u4tpp4O2bWGST^_nAu z6yqhk!BZd|2s-K;J$d3-4#iELE>#h&G2NTJrYub=I&aZuGvrln+hT7GMN_dvAGyQW z2)P|1f=sHj%>CkbfFhoj^#-`UIuH%|edl%9*)|&7lJN&Z3;fjNu54<(+R(GW9FAre zrq&bqTHGy6RJ7K{#Q7g1n$c)eVH)U0hYNFra#7a0+chYc`eL2t(oO_zoG)FsD-D?v z_zkX0K1zd)BW`r!Zp0My*dKD2#fk$ix8pP zs+(hrYprS^A(&1Cd|0Be!5H!(OLj9&IYxe35VYDcs;}a+`%;V>be=1P2bfgH%j?=# z#CDHaVvUT)ls}ycXs4Uac6X~41V4)N zH0rmHAeyHeciX<$BIGG^YVibH9C7JIZZ?M-#D*GrKGg7^FP0CwLGsQqB| z_hYy^jmLwvsb-iaEOg@`&xeEqNjpC5wupe^5!X%npx8h^>cSCB{~kLgO4@?oLLlzb zVEMQg8SN9QUt(D7*EmdLh?&Hb9(Js=H{&op<@w{G`R6$3)4@sU?n@1H9P;K7h7ax7 zVYTJp^res7v_^$Ik=Xc%R%H?n@u&bmnzTKRt6G^huB|cSQLttr3=#juiDQKy*-FR) zLkEqOHgu;vVDS(I<*L`^E3mydBfyhP8H=+Txzh6MIH$kYO^l-peC?QmxNyD@kOG0N z72GLJYLW%JO;GvMXK3fLiF^-#mCFJX;>4Ns8$90uAaEm{dGVUmme2mCd%O`Xf);=FG#y{2vhJYj$7FL947|#3-r!hcbD%lrhiv@ zs0OsIan$JGWK!*@J=uBr8NG6^w=vYDu;A^Jx4bDnCiDuiszKVF{Y4l>_# z_Oxcgq-zCJ$uPJ;gUmiX5Z7zQe?Ko4>W|f--dLocIF-c>2BM2RUl9}5v zHVMhwpoQXQ0W?05akg(!OX5l5G^}M33J=UsfWd-5QVx9h;mip^!u>4y-PO-t&M$*E1 zW96!=s;;0;W1Zo$lC{Bk*7(l}sxX0M*y5f6JfkUWwL zhI_<+0B@my=5cz|vm<>|QdW$Un%zlwKUG&X|#FSQWwx@`}V+vGjo^I8{{Gu{^*t)s(uet)2%N_*jWKJ-DD zVjVxy(hVdf2XXtc=F@;@Yierg^%GSlWY1uHszs_ax5j7caX8I-I^%Qqk4%6&{6+1h zDY*i%kD7@uRRJOb->UdZu<=TAGrm?)#^)n~%r}PZj@HKbR1F^Hz*!M8oe@ zkE!>@4{np@AwT}~iX}B&=+k(%Ff<1xfYNCC?iXLXYwlTg-IJuiZw=$@QDIIezPsD5zvX-o>^8jZ2xPFSmoenZy6 zWx84tLg$aid=0S;>r*mmx@?pw@v<#2*eHzplPJAj-E1Dj|74*-!?A(T>i+0JEgv_y zRU5uzsAS_tO{qkio{F2iP(S5$y;;+0Sk)f4Xcvk+biAaQSnQ5RhKcsHL^U&~-%^ip zYd$aR`Z9ml6(`lss8`hy3=taiv%UKJXcYXFI^=Rqf~wMyk*Z_0O1jcoz1d^l3JDV} zZjy;7S~bWpD^(DAn+GLXR&ku@@UA4Az=1yY*8hUkT&r=2ulRcc6s`Mddz1QE?~!_Q z3LCuk80e6LJ~j&MRdw~T$t8Cn3I_77=L==FaY6LBeOHDH@AU`AZy@^I1aB7v-Lb`G zef?yN8uZ7l`MeMLV{L}*AMu*3!9Kn}MxPhCmil@{HH&er##@gd&7J&fOJd;Fc2)6p zPQ{eE!mJ@fUF@vgj*L5-9;N*M zxYr+>NdOr4dCu*Ht&Bas(nX3#Ihst@hM9+-6a<)|V(2+dkl6u5U z@vuHHw1(7qq&DMc-})yQygl)#htDJeyq}NN%Kx>nM$gASFXvBpmJoQVgVSV6=%7;e zYty?4==!AQUERE6iYI6wp7Nv9`qzxVaZrn#-f-O3LmHr1A!M+1&eUPALE2}K(~b9K z+EA_`4Jm=Vj%bwI*cJ}qwm9nfvf8+zxGwUbo{6cL(P+5uWv-+X-WN0%Pv>iW(if#l zI@eR~g($!c|kb*URfYL4etWVoE5?J=fIrrBNZpNM`Ui12Kk%&MBU-!DIXAUp4 zlW%y4#HBfx0IX%m~3WPi%**i3{{O(nhiHm0>&0U`4!zKP`p1>UV(wXw4p_spHMhq$ncVNHVBN8Ds zLwC&aX9nNkwp=k&@ONcobIhG72o+T!L4Ne-bOpUigQPvgSX}K9VJ9Zy8dX|T*BTY| z+CKu#06#WwMwmR=$FvBH`D(|QybjkIgt6-8*%L#tz>{8y`BuJ8*L%`+WbW(lO5S~; zx~oZ)cEq9?J*#PykOB)xfE(Nj&4?w3H@aUay3C*zH|ZN+N0BZ!doI{6-xs$S;&iI+ z&)0RaONp@nf9X=N(lHL_v$0fDA3$TX%qv*e81-tis-*+-p9cN?y6fggqc?;*iejrt zz-K-OWCn$bTI~wGNKeU0q6%qbGSsu2*akSHXm2WO$Q0Y^-KxY* zv%r7uOSWjzFd!kpFlD=oMmk4`SKuP_jF`plbP@SEE4=SLUMqwwr%?3V>*|?P80%UUrszIpwtGC);hN%Nucq1s zm3O}uYxeOJ!5UIT1aOn;BSXs#BlCO60~Aw`c-S=wx(_#LP;AinsI~*%{}@EiOgyIb z+wjZS=UITc(uRfG{)Bq9wf=s=vH~eoJXu@N6#YNt9j=E_N>%K^+TJ~vdq>QoL#mC; z7TV*m;X(GEE@adG#1@-XI-&ApP;r%E z&L`afk-~6t^>NDZg31Tuv?kJ?x9g0L{>ia8tN8kSGMj^Rg6W4)``c zaqn0Xu-%`!Zswtg&uSmD#}UBlBLnYt#uqN(`c&h~S`VN;TvNO+=O0LKl=#}8ZPip} z$2Xea8t!BrHc~~0_pRDv`3%Ps-wE(4%7^a_E5=698F2 z2jbEfgeKFVh^PiH2r;L_IEUkMRm5U+D(1X!uH=#jjc}0|w?KM@L4&gE>cnIx=BkI( z%%?)-O7}2=0q0d7isuSa`RW%6&SlbZjXS~UGu<<%qoT69HO(>4jYf;d@qBl;{I=$} zR+A)fG@HD@Gsf$lU0YY7+mL^9 z0G4U4%j-I#UOz`ScSVCI+Kh<5QC)4F7%zhOCKuho$z1K3uF4o3JUh)QYv4#niN1{%Vr~GwMxk zn%y7wYIo4y#eM$sHe}s%e*bP92Ag_0dbZ_j_n=3*Jt@$l%g2mJ zz2Dx}5)XUcrWW(5d!)92j{z)KF;i2=qXz$kP^*V)N94XwB`XQ{KSE|Nm0+0&11e~T z$1+#PlPbX1Vkn-{_^3YOpjzVrRb=kxPIId>-h!dZ?8Z!oVB^=WU{r=q_2<3Xx-DB}uwDG6Z{ePMtr#;y`qSJ9k zU043@4{=sa2owTHXiv{+hN<=38n5Xi6Xk$I)kWpjL4y0n8$P`$K44+q^x?quVtk?- ztMu~%3oSeAp2SsdbJL0RGaTV4<91+s|B>Dbxv(=l+RGM4ZDfv@$6N zECP%#H3F(PM73XeyEt7ePEz(?Yfbwx75v7%w!v@{^uS+}*i53k^y$nX$78*P@B3aY zR}t{{2ZP6bMVlT!dZlnqg#6@*ixcP4#s{%AGT9TtpT&eabx0SUD7a??$Hj~Ea}@!b zFRoQf)qgrjUUEXXu@l+#WL$dU-2YXKu8c@=BTHUJ+Mm8kM|WS8Eh9@f3|p z5(?LxDEN8}&J&&B)XE8Q%FuE{!`t#w%~NO1{X+25SSN z9~koOg|AuYL6f~qs+VVbk-DkP^al4z&gZUP9Z_Xzi1j>R%VFfe<35>;~j35VQ4 zd5PXU56BDrKYg-Zhz6gFG&OJZO82B4Y?C^{I}+2Qo>;BLAq!G8dk^Y(vRd3^Tbj|g zdaB8i7evx#K-$IAcjz+%4k!7(R&5dXbJN$kqqV~@lj}X9v5D^P%!9JQ``C@&A|xT; zY!o=;BZ^JlpYG9&vDjr890PJVdQQkfyqzf8{k`MGgc35N*P!7CnE|`|)L#{XdyD$Z z%mSS9L4&e+_5k(=!!hK^H_x7o5tkv6K%&|gqw3@~l(*w2%JbYj5tD`;!WSVLmN>D! zb`?r|!YNHWor+0mq7RayL#W;*5ZjdaGj@68+ju8-`}eKrZ|?R?I?_gy(CjMePksHp z`i*59NikiG#ZOP$Jzgi}c;bTYRox;HykB68>LcxWh_3h3o)K>gg(JAs^TwQ<-G8*UF7nQ0PapGNwj#@x26CTIJ|Oev<#>bf zgcp51M=lK^_X}<~#G!c7^@%8s*f+IBWSjP=1A|HYA9QO@BpN3YPU|{DM3n4KiI5I! zp&Bt;&r>?BO&#K-9nsGoh{>bwb#x+=xgB?}aC(Icn5ivFl1~u4`FfTt1}D4;ASfaR z#7VbXNphmXzv|R1{QEfVX9f(i3k6=f8lI`Og>3|lp7m!h4y2sta|Wnq_Gu`t*Yt%k zdU}n$?qQ135{t?=G>;02`rdRA?_~*ec;0&ef61xt20Oj&8rAK1wy{g^)Gm3Zxyh{P z?`i`8VmRJ&1ydsk8s<)qg=A`GNW`3-S`tA6|$L4Y;iDKT>Cv6(EU^-8c>E zp|+>v6ZM`kA9?JFx1eL%f$0eusC|#ht^HgdmBOxmp+4a+aSW~PTd})1sqtKh3P9vAxqi|9YcB`=DqhxJT0qpU_; z`l2izR5H_5jUtqLerCV`o%(0lwkjfzLGMy z$t}tK&>S}lOmaz#<0Z#h+#<-X+psVezj!YB`gCmhn?PGjyt<8$>|@owSnBoQ-o!Hf zt_;F&)O#-HALGOxiw2L{oksnQnhG8c85^nTMaeAi9~p!z&+EV175Xt7o{VO%B^gBU z`sGr$_z!4BWd+;n!O}R37rxESdohs57}vG)IN+5t9&242vz`pnv(BK^r^nrTH{~2a z{cLbem}m&p*yutO3P7<*g(Y(vG(*>mBC$$Xn)NsvA*TlyK58{6leGYSf% z&layLX_K&@A=##zqIHWQZ3%8kG2&{I{kSv&(5OMjIdUw<)h49}F{w6NdIn;yttv}cayj{HVM0sJUA2KZG z|9cYJ@vv*A>>(cUk>}k`4FU4%o_N%)?t3I?{#0n?bWM4~<=9_fa;p_B zD&Tx$#^St>hx~`T<83WX$EtY84Leec^sZ-?0TbVILGtJE{)?hj39F3}8zP)S2gL`f zzpA~XJwEgz!5@;VBtFt=Eac((xCU;@@d$W|dgT+rqf^HV|EUMANBa1gJK73c$~F32 zMeWtW-uR++&>rRPybxa+M3`{m*808=B~OYu`i}To%d5~t@r^5EoiH5V`s4iLh}pmM zHk1g(@x8|&)lHf+{ouluPE_LYqrX~7XNw=`NwJ+=r5#)2f|CVh_HRwC$Au>g%IqIo zK_uKAanVWPz$WaRa<4B|-Aa=|pGEU2RJt&T(*mz|XMvw=vqC-u64 zX?qK|9&=oH=O5w<4PNQO>4%k>d$OR&${+4r7+30dUCT;XR)gPC$lj|}!qJ0Ubq6V) z1lo#d*fB zL^NrInzYz^liRnn&JQiKWg^ zi96lNTEZk%$@QzTk}bKK=J zLXQwQX1AI_?eOs5?V=XMvYl~{SHb+sy&j+)p-tT9nzw=kr(%zaW}haxMeKDmZgZ-j z@2`Dshc2kb13D{b|I{B3s*jX+fu|qRo)b57Fdp{o3PsnxRd;4Wv~J2GeEg zQT0dulz2=%z?N=~eSSeOJ49H{G9U=oA&(~Ax2hW4? zIH=#1bz4bS?M*-hMt~qJunv1_t#CTr;AwAgEp5ON*PpER^%sA;qZ$=NAIIHq5AyFB z{l-5B>94zN9>%Q*U8<{L2N9>d42>;N%5Z70n^YFpY_E18g%`R&GCUCD^h?DJtc^&ZLs`2-XCcsC>8l4eXNGt zkivfAX^iwwvYem(e_t*s1U~b)X#I#2NOzxm9-XA_gj4)NV-xdkJic^y8DV5=d{tX& zII^wrwdU81Or3ZNYLuK+e5-2Ps)$4}%N1}FT(R%H2&3s-Snyi>;4bt2cE*pcuo>55 z!tLZ-<0ZZ3nGK59J70(kUMjc?p#(VfCE*LZ@`F`h62=9~^~A+5NliI85SQp@rS;ah z^reEL>#+8MXuiw^zq>9je@Vn)W6aSXn=!k*LZelNX8$eb>i002#N*1Bguc<6LZf1g ztARN`IE`Md57Zn;Zm-dJZ|zO;J}UYeAKr>#eaw4FY&5J{y8O)dXYx4^nb&&kL1w+c zT_OV`3AG3Bn){YnK9D zE)isWqu?u%b^m8Hc{>REz4|2)n24oLl`-VN*!+?h=2f?42x5YdviL3PV{BmdB~18Q zJ^SU&tD?=5eCrV&(++L$@-?wmzd9OX-Am_+YCJP)DQ-n#zxCb-3?`Cq+n^qa6Vn?T zy;9q%T}4*0Nn=;Ww&lWfdF&~;&mNDRfdHdd1zhQ#=u;C8b4n$*c&t?^AVEXw&-QI4 zp7Bs^I51HTFyc<%`}hCn?ilq)wK}ma#=T2Fg%^oQcg542HbL9HwrK6A4CV+1eLMA= z$i=vLstQD2?9!i;<&WJS=hlJ_-D_s*B4x*8pcWTo zkG7Blkg~FwROZKCwM4em4!KJVKMjxF!`^s6P~G{5o$;XO(13xF@BVsn!*PU(1h29> zMC6YMsO8OdZ7bqYeLx|ghF`rZ0pN^6YVsqpS1!VpeIh@wP|lnn3LEj*9xH zrUB<=N`??9ro3@QD+mku@@xjj$$4?jJpsm@_z;zun+8fV2R zGmwPbC+*3pg26D_jtfu48)Z+NQbw9 ziLY|2)Pu5MSF6AIcm6W2Q3pkG7!~)nJcT~<)EnMuoiSg(VFL(rph_#TM8UXLQ^AD> zl7>dtpDM_xn4?RBu~2|yasY7SA~gny(f9@r$9Chac}m2>*4f8LyVE-VCjS9TwBF*z zcC-4flw86i=_PuLt0byoxn^Rq=LzQR2Tv|>i_}4+xgep0+ydKRDTbCA2HwuTsQ036 zONu8NT!c*^p={kVWGbCUleaU2Jg@c|5Mm(BUZUCCfMIa7s7K_gIr~<3l8B4VYpd;a z#=R|}tD-|SQV|V?iM47z3A=?mc$qra&R#-QiIy5nsS zLtX?-$=OY^gc(-+EBmn+9zAugAo32N7~@Xx77bu8wl+10n;_ zYfO1!#JC@eodW&rf9sFC4Cq^b0%4^pdROgSqc91oz~?QXy^$X(UV$`vhf zulICzKRQm=!S#sT)85cJaPwaM-a^p4xZlHI3z;T)Fp7qkd?R_y9&|Y(rILOm9`a_S z{0WnBWHMv%uq)w>2eD4#zei4qvO)~@PkAah+GMqfM>Q~rhR8$O?_!7bLhRFaZtsmJ zR0q+;RP6Ve=prT)`b`Nbtgy*VAFQQd+KD03;Vu`@*MKC&%_6wOwyYdsudZ~XHn&1%-htCTh_af_OgrepaG=LERn%~(BL#U1 zqu>-@E_nYo7{!+hwUzjR048bJu+v?%V$eJscfEYBVBI2VwXD0XaLAk1yp8h zD9rlZp3)@#Sb%hoMnC~h{Ocmcz1M%SvY$Hld0uOfts&ZrJ>H{6xU0RjDPfpC82795 z_O#?<&mQo8oPQis$_G_5B^-EgJoK`W;Qzt8A$dH6X>fQCt5SLgu-=b&pjZE8TRiH4 zrmzEmkMz>EWA;a4-J#g$mt}Z#b0^U~wGdCJ4cX$88r|=i6AB4hDW0r-7O_e^Ra+Od z{9L|+I$ijw4G{pb*v=iQ9k>+t;wk^uKgMB$k(jI4PEPxVRAHqsHYm^U5w8LgCpb-g z5IKrR|G0rBi!B6W(owATjE-Mr$8enR{4)Z`NpFG-mf$!&bZHAw7N@;HtvrWh{SDNwwtNw(hB9`6~n%}MyY?~j4Ph-KU;zcO6Vu*ItW>l$20#Czf!m5y=*U^3uQJ*BozOeMb)Sg$^g zEB*Ji!vh6S3}1inc6VUX^P^j#H$@D4S{QW&y!PfDae+TaSZy?U7oHZ2*p#WtE;=m~ z!WCo=7oRS;vO3y z9F7*b?}RUII4vRrG8#i0DE=E&IsqX;?rM$oALm{7%eYyK(@^d0M2vvQ_ViEIjYLsm z{6Zn}q%A&O$R!?NvW4hfTH+ay^N#&nYN)ZH$)L*1v}fqBqTZl2r~0+Q)!=l;S%pUR z!Rwi5p-GUvwU=qHR>{OxHf={V|FoXDCfopwF`~sg+lI#oRAIClAlMv|X|(x^FrL_4 zb+~RjTs-3KSnF@rr#$lO1SI$zIl_7aNkDVt^07f+RnkRzqkoSQU1pOf#`uJbSBK0) zW_~^HgXn%~PTZ?@Fr5U!bstp(L0b$ZFhb7{>X23B1e^$e$baE!MNgjSXhf~Y2JiwI zRcj*15mqC{-52i{`sqoX^N#x1F3>nQ@@bgz_>)lkJ5_7#ZE=@pL-?Sf*rh64(g0?+ zx-H!8OL4cpFtN>aocHLYrH8VNBkwi56Qg4pe}*^dzS=^hNB!2=V<4dx*@nF?+}giA z-@g0(nQ_jLrafSgdei0LL4njq@|AcT7%jMO^0u&SFL^aKs==p`)2pYK~H!%1F98P`H-QW zJq!jutb0>jz*Exz9Y3vZO9BL<40({A&J4$LRK4Y`rLW|(k87yk{%Tਊv{&2$W zZZARjPx?47PZADd`jo%d&z{7pl}w)Mw+U)`3c`S6bVmJl>==o&o&ro9Nq{q6!8yO9 zjzflQt0!LbfPG}0>xtJrKfcj!9_;nBd<|M5^^7FVw|P4 zM@}N)6MTt~oqc#vT0JtAiP#%@Ds3EMr7IjV}#mS9KsI z7GJ5TJA+|;t+Al0u;W8g_>Dlrv4^yS-?|;@<;(~2op+gBRo{E2ZJkr`gTcc}iNo=u zm$LAKa0d~uoGVOe*X&Di!7IX_(1k=$ctv=zj$K|gSgj`{p=D0?oexEP@&vm^*9pOZeMFuqD z+*b->=33UQ40gqp>SY7oSXaIB6EhATC|+h)zhY{E)J%zAJw1FQJEP){OlBSPJeyod zw4eG)!wQhdwVGQo@?7wW6_B$BQU}azuNRQ$%+eThJQjM$l=@+Vu*jnzEg0Ocx>rGxpR1|TuD=#NH%q<3Q4Z+mNUgkcL91g@1e>5ZXjisvBP6%u6 z>oOHJR0pvsuGa?$6PBal72)<&cp;AYr)aD}laxV*l}>|`3~sUevt-YsOxb{ztaXUca>z^l>bo$4IlO0Evr$!c^6JVLAT z0I&6&ire418Wf$|c5ukotADXoAoUG~3UNPpF%@n;S43!^Gc!TS{tI*!xcm~6MY5+Zj@NK9%~y0StzF&Yfxh$|KVwXpc+y?i+&X& zhN2f^qtqUZ>Y%pYz9z=KvyG4xlm6Yjq9L|xtaLmXjwy{*L}tmFAB&ydv<3tyNWjpM zs>^XiXuH%6A@hb}w|=9J>y5krf4$7aJu1cGfXrsKV^9>^#NxQm8w1u;ew9(T4telHf@k0t+#=Zryr@r!s+Rk|JD5p#q#O$IcPpa=G>?wUJO`m;#hGaj%vV?TN9_$s5ok#RwIZSUfVOT{EbCbDotwo>9cBa-Oqo%AkKLh z2tT|>Rq!7dEm{<>`=qXcX~z-}jQPgjoP@mO)A6R4t_>%P;W+QFI>Cn2Fd!X1AEhf_*PTo4#J1AZr`~SBQwE>=rkqRzPFjBp)#T>SruD^DhE71t5!CoVfvP&JA)#*NF}DYiCg{X9n%N#r^f zSEyoQIv)JFXUZ$s(i~Un@0ft0O^B=1!AKuAt1REu9z1=KQy|HSzUE9p&5%UX1*80z zlozXyCS0+3|K^<$$~pfao1T2yd_y%s>b71?jJz&w!`-n!y;TTdl>TV_d7ZCU`9#l9 ze=PK}v<-9Y7u7meWV54jgG;w|66E(rje^yBdG&8{FBuu-<}-!9X&oi;b^2`Ga;6X@ zD0^eEVPiheGm|BnNu{1JP(86!{X?j8t(JN87DrHxdX3O9%t|-78$!EGY=ci#QJQ&1 zlYWC$h`*rU*r^Rfvzv4>Zw^39?Ew4_;`6l%;7S7rDwnj)h0Uo%>QG^dxXSXbb!D7J z(&#8zo>?j~$gzFq76m)2Cf6x_W*9m1TWRNJWK7#JRjO`3mEb`iqJ!xDXlptxs5 z#?*WPQ*<4@n$TwA7DS)mP0L%HhP1@;|8Wt{T$D)eFsjbo=qKs7uXcj-5}Nn-xhMkD$Yl<#h@ z9R_Jz`+Kmv_2^?;$)Fea_^X2taHzi~Nra7=k=GRJnh^MD5w=Ie?C3-}ve&mH{k=27 zbdr4c3wW&A*zN}e$LxWT;k3zqP;;rYERTo0HZ6_K@vwe_k%;@}%}6bJJnF*;(|n*C zDN8)2{!;3mIB=gUb2A8FWWe$8XU-MsDZjA4Pg5{veb7%p zAy3L99*4Y!0L~`GVUGnUBQdRswB2?`9PzOIec8`ZLyS-wqc@rz*AujJe$2Q7M%@-C zyb|?oanhS}Td}x9`s;c;gJW^p{lR=8cIH}~@o>daHO^|5xi@jnQ%IG*rV+1s5g?f3 zbv4BMIo-|QaLFLH0&lwEKTJT^6|crPugdiVA%od_+bhx5nU7AqV*oM0CB&c0&i)C* z1Cng}yr6_~p!4@NQfg}A1HXfE4FbV(Yd>@cw_&x;@%pGXKg74=@v+yYh4-!pWc`VT zM*Bh4Pt~#&ERYZCXZlP%U>aO}?pX~JKM3#0+>0;N8a9KK{n9Htied%>54coM3tL3g ztQr_BMs_>+iEj)R?>7?YZ@qeTJ(KaBniT`;_+Hiew-%xG5BkdJnXr^UdZPI}!F|N6 zP~qe#0;db~(aWhOXwj_r)FM3L)xBs|PZES4&l-N5X{>Ycte96qhq4Qo%nAw5Tk?yz zbXKU8hXA+D66*IdSB7u_Ph9S=&YnCrb7n2?%~zKHX5tEW_N!kljk)TKG@)EMQAAhH z7PQh$Pgkkt^3(-i?fGn-z=Zjl+7M_NrlR5rL1C7#BJ=cJ5})M}nD6pbOJ&-pYu$Pb z>z#pEFk487Boj6>@#{5Ca$cRW&^4Q@>4UsTQ^DUCZno$$ZcsUb(G+{#sHVurV7_ni zEVr@zH@h32@K^sOZc(-E880;!ds=G{e#H{igC0vaho!S--&wUIP=6UzWSNIRdjN~7 zd8Kto$@9~o!mXVd@uG3I&<`m^lJ>62t1$L^bn9&UYE`JDT}5ZU%*}!rznbxAG4yT! zFIjH`qh*!l`>q(T*AT8T#&C@x<~!6#s zB&j8ZxH++H_n)|ktwT@fSbDpypeO8MYHwpw2dnL}IQq;5dC z7I9s0BiTBwVl}EZFOy_L>Ras&y77D2;)+t+pkb1(;7^Hp-59UQPG4`EhEs_aWRd^r zpKaFpsKM35wg_%GwO6p(VPXj>+^18qwr7Y#?b(=K4t(a#xBcpe#M5jC zGNh?SOD1^@RSwT`NLPbvr;eg8JZ6X0acx&ihrOzYbZNaNC?yCHR9nel!ZzgXNWe)% z$szs1jtW;~RR7?QMRPnX+}rqgcJSRs;b-N9rh)>%^|&n5$=GFQI{ezQQ`sRvN7ui` zP75kFlyDqIVwI;(n`EQ$ci2=IxpSI zUDwpT9ygpQ8+0QOc8lX@g4*O^$$UjOqpL?7b1mG8x31@a2_&JscGel-+Zqwlx5mC0 z4t-`?%|9nZ{4xfbHf^XhrlEDLp)|zkBy8b%qs7R1CBEU@ChroSUe6Q$9MY?)bM+p_qJ*DLaLil9`U4dmvl6SgEUMCIZk zqrbNbO>ZITW$LX;8@>i_daJ%a8P6sIU5u9=6Vz;VBO&5@pbZ{I+$0-qxjML$ihrjo zVhZ_Q%rGls912XhAH}Lzmoii{&&SU}n9kKP05>pr*K4LI`{5lL;#F(2JAyQggz^CG zpWE4wSxd|jTIdzptu4mQRV-a(9q}98Z?6^N_wcXz___liG#ZbW9wA=L$S$$=X#iPl zI&46T$PMIj-8v1h7HJo;Z$N*{sNcq-4q}Z#bZPU2-(-+wGm{x#Caq*H+ zg-?Ww!?kmVppE?t%?TIDK)nO2{y@6(K zD8Ck^$iBeK^=tOB@T!3zwc=WdjWnj2vO~Fds%s7pM30fb$B=a}25wExPQ7+0zB3%{ zJ)S=)ZVpEaNKnCjR9=lUw3w1euLV*asp#G7LaG*Klo*$IN*`;79o3%0G<9;3+cC98 zMTnexTyuZU{mpyqgr?fuh$r4jz3b^3gxJ}sz?>+>f}IW^{hlWf$0Id##TDY%837qP z%y&2vpL9!$jb`8)o5=TX=d{dC7;im8vYn4^+zD*>1!2t=atC$UMZv6dFTAmlP!hWx zhc6?YF4q9rWwk1Eh1C^J)VZcEyQ)iU@t@7Z*Y2-Gp+N3jjp%2a@MgOqSPMNh_GiMq z5@vt+H+FL*WG!wN=TzKp;UDZ)tdEP4_PIdg35<&EcJ$5<|NHQpob4|J3}TGevVEyX zcw=(Y*;jv_c`CX5bu3_8yWJ5+*r7;16sIj+4BSdw@84vv`?&({1%?!fgBY|D`U59u z-v)-cXSt5Pi=*_@$usQx7|}xqlKlNcG{Ywv$D^}}GB&t3j6HBR?jxdu-=nk0Z&m5@VKl4NgmRTHeMar=QdoLA2}Ozg_Fb;ND|$% zVR%gS#K|brXO9I!x$4TrrS|yQAkWCCt~hjW{|RSDo%BO-i4j$AE)*v?Try9_+m1E} z-o&$sSThxOM|?`4C})GU3{Sl-%L%tdt$pQGx}21ajK7#o)h#*wvx$5+!(mL<#9S}r9sx7XCT3tsugyH0m`1d|_eCi*%TLGqs%o31VOvq{ z*ldCENN-OoH7921+?y8&a&>OZ3Ot;s&sAsF=-RxPe9lvN%-0Ki7I6XAL|a-DV^FN6 zSc`^u?eH576ag!ZaVv2KOsiaWRv&;f{d{@uFKp#8-JksAC$>b(Lp%k1KU_hD(CQ%c zg7mtYMi~SY8ON#vvxYoEL5jN9`h91YZqTO*P2rqMY+M2X>C?b$o zwGt2?H}Z!_*rpaJ)d#IZD5hLHsC)GoW-;r|&YhPx!~GN9wkLy+XQ%ZAG6dA%mZYVr zCoa#wrghP<3D$|~p)u8DJV2Oeu=N=d&R1;ELZAi;o$E#&hFhCnvrYP4Gkn}O3q8+( z2-Gd{%Jbja*4RVmf^oIprX7Wj5ZMkzUDPuoe<}6etb<}`>48XuWQ@J{?*l} zsI>pVj%0uGRN<#D?cs|!dR9Il?%TXMx!-w&;;}%Y@~`c9Oj6m!|7a&-7P-E4b}~b} z0}M(3Clozl*IJi=PU}72kSlZEis^9kG}##~9je-7I2h3p?`s+U_3!}?$sCQpB;Ou< z?k@e(&YcZAWnARR)x;@l$|`Q>1+WxH4EJuh5NLY*%|+qmiJ0&Np^|7Ivbd_Y&Ms?< zD4U1aawRY>!+Pdv(XPgNx;uD>U5j3e+<*4EW=3jbcR#qHb)J2E|zv%NP7Auy!|kuM!QS@9g*bAXt%Vc)V>l*9tmz=3k}jG za5LB)?WN1V$N_&!pZgKJ8(p}6aDw_KHnz6a?!^Gi8<*c`LQD`}<#$?=qLh05`&ih8 zg@&jM{X;ZG^1%LLqhTlY2r+k%$5ClC$Sa~LMM~_!(M0QV<+wXNq*q=F0%q#r(L`VR zar+%jM5wiu0xzQ>=AQi(Jj^5UkjF)+V2{R7vE7M9e=NEY#$b<+CPGx_vPdotj7FaghNnD0b2Xu%s5lK7MwK)ZX|O4yL3Z4%#*&;&lHSz7w3%p)>7(}rCU^7xC!3+smy+FqRhbzJMn=FE zdpdr1V=u8;(GQH#^E*+PEkNZ?&Ry7&ny)`x@a z$^Zn>#QA76cpY!br;=9ec_Ci~FST0d47br9s~3vRh{}0uh*#(gh?w5c1YA##W?e%BKNn4( z9-Etq*d**m&FgTA@!|uvF6KeH6OtDRb;MtWUavPEn7bi{q__#qHfsDL&xg24>rleS zv01?L{Y_YR;QF?xC61YYXIu4_8YdhHmUWwkt4+M~_CPeHLxe=V6nj3{YCF^nN7o^e z*6Ru6;m~*K1F7K|?uqt(rLeqvg>4bKr+x9JH=`AUmkPJcplHJ7RFN9m?kXJ6NZ~nzMgX&{hh=2>ogg zn6lThLH)Y>J>=()Nb9fbk8Tqu7j9%6ITBEDN9SujDyWHSxg2;b-ui}8om)c?g3M=s zBKw?NFXm_Lmv&MY2*{pNTMXItkl28mmH&pwjs~wDF-9R(;j^SgB=f6{=y-WbIjHie z9?Ma^bA-;_|HeqYMLhqYosSj87bbYi3z`Zl>~+a9d{L*a8{U2_eh5O7!`(eHX}aYx)`b zK30gVdiI0BjqUVH_tGy}_@icT2NB{-QoofPM{ke16j|afCJ(+HO5L?^0X_E6+aZxI ztxulGY~jQ47Y<)?(}PTpdt1_xdwQj)e?GPG8ltzcF_||Xi6N#=n>NE9)w|(eW4^}- z{Met-A-fa)NTTt58-&yB0)aDDK;d2k``GgPrcXUL!nv z2hb*KaZ6AScozKS*&N6%vm$;Yp|#pd{e2Bl>#JhQxsv7fycQUlguD;g#QKSUgwAHo z7F&a6X-@;pi0SsgKf9NH98>7axu=ma1wyp-%(zqf96 z|D#tZ)tqX(3FZQkzCeUD1U4YZsLed^cby*PH8`Y!#$HL>+#9xDc)Jfhp{;t2Fi0{k z#NUlN%v?-V+;QoAQw&v9xzIN2C7ioQ4DelU5ni~bB(=s?Enbap-!?r^kO;DkoullMrXH}**dBNUc*L8s5 z!<<4XJNSxPEC+?=;(iJ?GMQU3(^gju+2@*eC7Czvb}W50 zo~ial0A2OsD*IAEs}Zdrxk9m=LLlgBzmAuzhu22E6TK?<6z&Q##1IdrSkOeZ{D!_4 z?QoW|Z=;lu7utUtSEM|!J z59#ZLxy$5<9kL}Fo_a1VR&g$IAzbT0c-^v4rFAGYuEigW_*8XB$-ip?_K$u#ZJO1o zAAgQj>`Yb{zM1ZPVCq;0|ce6Uq zA+Eug1RRfgt=EbUGyjk2bD+1vp|-!5 zwJPXo4G5ECN${T~?!ZY%kDeRXdBA(4^=Sz1Sg()vM;lQ0WmTTTWu2ZpN(c+Co>E}5 zt=E$$L~XVWvB_%&+ijy(0p=xYtJtP^xp--+Tw5W%2ZG;7iqQw{uF@QZ#4 za-n+|_O{sI7Cgu=#n&Tc_t z^^j60?A4^Y+IvQRW&1RS2QZwrm*dG*qD0j1*G87{3}HKEL1-7$fI!nf{%P}{y zjMx=Tq8y#iRSkn}!_`dCpKH;OybyLh8WJ7aWj8cIB%(&Q%6t~%lja?6=cZ;`pYQb? zdAH(#7NFYr#XZd<@(fL}i z$lBt0MDAo?9XeT#?+LrB3D!3L+`ftFOd?(Jz4$Ysu!yH>GI{+f`%X(qxJc|P<`qE9K=(8KQp)v1IBCO$P9 z=GMYsa+8mLC#Y&2@xpx`sqskk%wa*YN8d>l*7Ziv%Xi`<_M%#d`hFun9*~`^@JUaI z);Xf1u(N8I8utY4u_x8p9gU#2EocKKz7yAm`y`~yVK5Q)l)5$NDJy^?IFRww(Kbnl zRdaeKM>D>4T=DvBihAMb=~*qN3K9H^erMD5&hemZ!Hk%%TSq;Tjl8_?D7tJ%lKj2Q zrHknQU{Al3*aqSQX2mpz-{8jYve{a(H7)(vpjfdv?*uEuHIc&Yo{5gNi)?O;?HUBH zI6m*4;AzM}inP|r0iGXoT3ofb&KAUPt|BDTSg!KEzV4bx8DgdJdW=j*Ru;cY;VP^= ze&>3jnoHt04x(LF5sT27AFOY*syHai>K0nH+G8`bnrM^s@K&dV$`d4r?6ExBHQ?fI zEA)zrz+Rtdr9e3STBXBPO4Z3$dtOfz(SUBPj)hrDh6wb$)K(LELnFVi2EnL;EpG}m z+-a9A^#buX5RMjWi)OIP5Jepr_#6Tx+1xi!>lFlt;-l8B_4S%V?VMjKy~Nv8#J5jx zhfm?*Qy?J02DGoQARglHt4S`6Tqr}@)uJX6Z^3M@k0s=8u?=deD4>l2hKdqSem4pK zB1DtCZ5EVNI*TRPqNCYW&)2*)KvG`EwgsXXu*p#$zxlvOQU6Y25B!RTV7ZsQe`z~( z1j)tW97u!hj6uP)gQr~qfLns1FM$9Z!=|@KxR)%dah7*)bmYTvGP^HcI(OBNRS)$!5*T-)luXIUj%wdreLYz3xf+gXiOW-pg5!c->WRHi=v6aSnMmsJPU0d_ zKe~rIydIdHiss16w2|k(6x7l@{$pPS)EpKKf)dH%pk-Lh@OQL*U2E{wNnF5P zy=`ZJZ$ua1Me%O+w9R%;ueomR+ZdE!f2#KqVXAQmK1{Ad-wSr6ymRLLhd{U;P7gi$ zc$Y9KA&q}xH`ReD{&GG@;gYW5{|j5`d0pHbYyXNKif0#w`QK4{_#9LZu36oex1VnpjSk zS*l`|d~jV>9l!evgXAXE2%zK&wYtDMS4s33r<>hduD`h0OMWO=`W0FhFKJH#Ujgvo zF38R*ja!SytSdgA&(^TpEhwwC#}&BY)W>9;i)*lkXod;S<(Q^8F}++JEgHVX9a&Qr zh_-myz;<22H~Sequ3pwJNW9cPxc6(=a?YEWwuIW+@psL#773p$w?wU z(cNy=X@X>I_8Vm=rr@58wqC$$y$X(P5MubwZHz{RMy9qYJIkIW#T8SVvtDGC#|>Cd zBt*)uwXOFLL`#=#Q_m*k0W`Huwq29+YZIx5WBig9lsvGuBb$tKpkTT)VCCDrqU$cf zav!{uOlh~~fgEIew2lDHEaQ6vEj&{Ei{6(l^edtG5mwH#Du_-e9-vB7WJAx(r2zjvrTe|EDS4sgpBFXqlR#Pk4NvfHs`b7;^?Dyx`c_}1EqOnQl&`Nq4v8^KlmArl9e+M%a~G+D>Hd zWKpBV=b8+{8f?n#Xzw2wdA{t6`(Gq35y|`TOF`)EPkH$q`>!-%w0MK|bzrmx=Y>1^ z`y#A)b~pY$b@EjECPwTptaXduJzavHu9YObC_iyLqF3{Tn*@!QLy zn7x}w)Q{)?%pQ0*QOR6^f@1dIyFp+V_x8fdJrqw#-+sa#R>Pz^7#9z92iF($)BpTkR>mt|G@UwZ7jCcA=UaA)dq5P4$yv%zlrwpB#`$Z_0vA30PiWg!DrE zVQS1^C35ZbXz0d=4x90=j-tzyf6HBf8OCM`$zp6K_O!<6;lrQxuJ{1LCtzk~$23cb zrO|lq(Sbu9v{WF<2->i@>Q>oLei_6}y?fT{LC%i_!ZoY8)p?!;FC6wzyrR z^xa?r$QqiY@o-lz6KEGZaquh`v?7vt`UkosrrqD1w~7Eg6R%^R`ZH7d1M)f=69r zL;4eT5!n0WN45Ul#4DkPEQrKzP@~K}oGWZ&0RDge^hf@)Oz*JkXm!d6MeNNeq7>(Fz7o&FSO*L2Bt_-zS41a~> zud-XO%Npx#kM@{|+N1^As}F?Kq-)Z-^BgMsVlm4H?d3qVwXNUwYZ2Gf+W~Eh7xlA) z0kOkFWIO`uP^=c^bV(LKtzz=t>o4wxf@YiB^Hoi?+HIjd+ja(S}34fx+pzVCSMAejpV6w)0xD zNz?6u=7Me;m*8)_@!XL%Tg6BwiPBZsjr#u*Ls~NDWl!dyr>ulOqjO2S_0SN73`a6Ti@7-X8)c# zB0T-Vz7-|#l4xD)LZ|m#;9F5?->WJ4tjK{c+!_llW)~8%8u?YjkUelA$RMgvCyE!+ z)mp>(bbp$dsy%ce(M^<);P&u^bn~NzN$HWtT?lc`-4r;q1f53X*7~#mqr)DF2gGg8 z*rOLhshDZm$jfj+G?81<1414TWESEc3FV%k&P7yEwkKj(SJ^gpB6wcV)rDNksLsnw zydZb=R8`DnEVnAzQ-V_2)!S}4VT+c^-=4Nf0!vsWwoRKH2;~Z>OUkK5ed`ENQXRe&` z+!yXMgpMF_*eKEFYO-!8m7B)AY*wBzX1>6#>F&1$vB-oMBG@L?cX2c=;{G5MMiV3? z3PMAABTcZVFzEi3<$7EKbI-N1BmlSe!qrx&rzaY9?z>7&3I28ow^|5M!HQggdT7Fh z-|F;Ga4*^A(F+&eq(EFDAhl~+T$#L5kCntF;T*6^k9@_Q-1^VQGBuzwULA<_`wdwy z_%$voH|Sv@C(4?l11c43(chnKMM>NyxKtOxch@@fuBs76uyB8Y{77QM)Yjq=%K_+7 zPt{>Nrz+YI@_!lFxPg=W9 zDH9W&xk*z5qU{i{HAu^Aola^ZrI~~Eu54l5mvXmYxh|Fr4LW{U6>P7Xp*4m$-d|WE zK3>)XCEqEXrgcoQ0jKRitV&s_9Snp?C~t=Zb|qOF4#$A3Dn@;KRlQLFwV}OvEg*9Z z@CCoFcXbp{0fi%)7fhRnP}$KK>F2F>EJIsSbRqICx9(VMeq7ji(0CyAvf44YYN)eIIKuf7QI_ z?T5Hjld>;4&zZfKsGvLYJ8}=amk8N&6cMZUd_E7pCtBo@<7h?r&OFHWq4$E=;(!8% z4@ak*X9ViPF<{(#A)bO{w~dbhkcM!_q3GIsL1sBpZZ_?)NA(Afm?QFo(9(b=%67ya ze=iYv&v$JT1UXIpiGbW142(S)IAO5wLyDdlz4B|2b)Smgxg}kLmW$tI6-A3|QuIJO z<4o*i&B;?fwAvJ*1^G=X4V$W=aZE1Q^ccE|csakhXJlRRn;Np2YB~Jhc=fc#E?QP+ zv!Yj#k}=MXb_=ss*a|_9VI@9ztre2RrfQL*To`KDnrzAVPCp1s#LDXma?&d{azW3Ym-AB zbTl3@yb>Wqz3zDQ8xGU8t=5*|0h$swT7BT@r4+0|t5Q``RAfzh7<*;xU#&$CsPxF? zpb<()Hb-vz4uMPwHk>yr1W45L*giEkC#Y*v?0iokPw{bQ6ZaF@OAGc zu4x@st05uKfSh3KwGBn2R@$H*^;`t_-A0dgbgY}Q7kFEdvf(}H?2qzwhL`7-Xxm5; zfvo|sts52THuYQT-eI;qCRc)Ug|8R1RI^1S;F&|YL&y-vVmtLR6*4)2QEk_Ivf_J9 zz?6E8%g_s`?T%h+THy%yXl~tIjh(hv!x!-BC0}4XAQwWQBMl9<6EHSB69Rbc$)k|aT(n$-@U!mets#UtEZ^8b8YGpT7zo-!0&doKwK z&JYF*o}9{#Mz8P_{;?g?LG{SbrY_=HG)BH-O+R1ZiEMd1zSQaP$?V(Zk(<-KUrBuw zKsM-~ofeR>{e0rTv$r%xNph7~XJ<6OZsgbzoi=&*VB;8#ORhZ6qP7Q2osXuN z|N40KL^@xt?*3vYUZ^(efg@6+cO)W}ioQs6(IKj_$6( zMO*kqhBAT8FuNNl};Mqs!BR#>XaE`u>dEjCq zhwKCQ<%97+^goOE(8UlWYa8*PBGYWa9=@2ah4nvs!N$eU+zShA{6&4?-2celm`d%D zi$O<2x6U3usy`&%k;er+rv4}gAHSH0e5*2FOt=`-+dY^1?1?}(x3;%?_zkjHJ$X?i ze_t|%cy@RX=9(BVa*%CL!1&a~^vzHY_3TN~U!f#vC6!7}v#akx%$X#>R8mYV>twxL z?kU=)2!~pZ2Gvai*AAk+rfVK9ma}BkYE#Ryl!{{J#ZkA$k`x9#Zv4*GyHzwpkGlm* zH(O)lE}z`}=4cOaYeJJNL>9t;*<67qY@%na%?mJ{MBn+BG(R5CZYBeCW1B6wn1r7H z{DKt=%Ou!(zTr~+fmHkCP=dEF(;$=uTxaFcaq&u9B1HOHp|ceMo}`#bxmqO{wfG<; z-l#ezn2M0pXtMdjfYoJ#LHSrCq){Ct3{m~=w2mS-J2J^bH2LkU6ofW>vR1`!_~H(G zUeL&Cg=Zb8uht)^2aO6Ck*8jNSj*i9A3?@?fg1jAINQb~Sap<-z19*FT;qsXn--UD z`E}M2y((OdQ;6SZHxtR*9cM{y8_s3Gn_5r6%@NDM>RVrQfXn}P8;IjFAB=D4AbYpfzS|bztrfk;9YRS5676Pu-WL384k*moa zM7C7eq(k;Zz7N|I(;YtO#+JPSzp2%05%k)=c=U+)ZYJ|`G+ka)LBf{(7p0u> z^mthMAy`N3Kp;f@r#I}NR<+%E13MINntO-;%?_)TXH$7qE7nO2mD|zmwLil}_JD=` zI}8}F$K)>U;#)z}YDWSwHxGLx{?Yg}=KaWyX@PJdZiE{-9yqMQXB=Ve#KqD7-CavK z9DX0UfTL2rDM#T+AzTVg@3T`GVo37I%jk3*n2}#L+gmY#HR#{$O#FG;iG_h#U(w-n zMKna&u14eMD(#w9ftk4O@OlhIsvo-%I3TOkLsdSDr^uLVH=`Gtv1Dky6^LMEVlzJ% z?k;jcRD~n9d_0UaPXt?nQ@BXJ)ad9yy~fm6u~^;xTmoMQ7$QYo7gEA-odg2;{Yo_!b7E?Z&WN1F;RcKbmHd$d=(+54k@`FN3xeOMjb%Lc@HZQT1oau?z&g8wA` zj^hJ_$D!kj8-qdnp{VZ%ndD3306rE^d@58+vB%#}6cx=(%2PT)P>T`anHRV`5r8-@ zc;ddF7=m0$`f4$YzMqKv0QSpV1gQozrx8U9V@+27jam{|^99demi zx+&2*-^Vshjo-wyl5#hG=So?w8G=Ggpu45bR2MS!+S7uV7}GZq>^Ir0Scq^XnEk%4 z2v;y?r7z|Pb_uqKX9T<8JY%oTjaM}bYin&@hI|Va7*g+Q`pqdhNWm7opQz}fMOLic zS>$!9-HWYM>sUnGHRqglEDN}#8YK`k5X1tE%gd5jMBLJ>BF3xjCm5J$kSYQ3%6}YV z4r_IEbUOk07vdp@TARf6?~B=SW9uIi1@a6Z%Qb@r@p^7&xci~4L)Jc$S`8e*P!IQLppbVZ70VOum% z3Y2Nn!=O7}cEkfx3v{rc!%DO*=C^JwEPg6(J8QI_0AE*ARAqf?O5Hx*3RX)i9Bv6t_*9^uj`BoZB3G*3;i)TQn>T zdY>EqwhCi64PZIywQbQhzi!aBYmg-Ky%Y~A#KCsNQ6fkCPQmN#X3P1Icj*~DLBEsh z-NFqq%(JxZ(JU))U$?z_sC3C_UxosU!%-gGhOSqOc+G|1$&YcJ+dtHx9JNWj=ULWhgU1&$&A61oE zcWzHi0hIk%4AbhK@pe2OX3o8}ahsjcv4n_W2I2B&HYYU!LcYN2ly(%hkz7DfE!4p3 z#P(z9LB+%|cq@=chHtPl0x;Cj#d?egJHiV`1q-D!HxwF=UON|WOF-;=bgNAas$I|= zVP#r5@E5h$4Q)0S?a&Iq1={5}Lbw)sfRdY#u0+#?l|^<{Kx)E6`dT2F<7T(((P}y7 zf*S(3HR-fJ)9&KC_*+cK8|`Mi=0@@8j$418Jj_@<5d;nPdB7TKY_i+h(UNiif1&Ye zTkGvhy+EtgXJ2U{(8e4^Tk<=%k_uH=?|jobyQ41yM>e;v(C4lIa0dGA8zIS*-Dz;6 z-3t_Q!~baC#;AN>M1RKb6sG-s%p+IR==QlEG+vX}@+=rBD^ld=(w<1C|G?O2BB|Y7 zZT8?;P*B)`A$w>nQ6`+g?cr$C)xWmE#*L*L`@c@L@#*|jaOcu z%N~nYxfZxW&cWmHia48An;-}{S(p<(H?iGncD+A;h}vpuE9HLZT| zVFMGMxzZY&G!|64JWqIyO%8~O$C{!ysmPa2)fyC`jH3>cO&<$ZqS9Fr_xqd?Fd4yS zYE7ul@OSpKz~G{UJ!w|7`svjFVzUDx%1OQ~%E8XbM(D`b=ds#76HRMNpS8L1b6H)9 z&5NHu``JvJA1D;{G*bh?6>tm2f*QfbLFv(O^(9SIaZGCXb+9ZA)QSf&S6Z3Ya>?>a zD<4aY4V6c!pNhP9+Lj2P3bH?w$;vA9jH!t%F@shW6HCQWPFb~J70A6@6N8kLI5n@+ z{Jm;ouPu+GFl~yhhz__rl9ou@;|T~+5U+}n_yGQZSMf1>Uh7%owYXO6fwIKdJoP}T z=4NXMi1qH@-jr>47j_|%5(PvL{36-uptWTK?qq;JT1WJ}^!E&2(8fYOIn#)m-7x_K zqYggj9yQ$vUVZT!_p7`O2otU(0#o9F8ei~bLos$KzOPAeT(1Rz8$j=}A)3}U_EB(e zqtGJsrt$14+jJil(rV&mV4E|v|If5TTLg)Vm)+c|{egL{cT3qe^;0ipTsbl4J z{^wwP2K9r~N*fDS({Ff>x1sA1I~?zOy0DUF>)q(G@oTYXunm+Z6+RxCkaR^yqSv$+ zr`ge18@O3I9g7F|qatN}G|G+r+D-(p`r$V^?4*XNV0EdImJNz!jU4srKSbJHdrPBI zNh8c|XEZfxnNgb0Mq=GPE*|f0*NKeAFrBT<I!I=mu`QaG2VhPkSa%X<4!>JYu6#-17Qx*DUkcSq+V zb|coK1##pv^>fE2S0db0zuJX0c55t{+@xUS%ko|PT<;g-4R>38^F%EBW%h;E3`dH* zeW~R{c7y@?D!M_b8trR=@V&hgzlnBiwY%8{v{GllzKJ(WTp+wBz-zq<uOOaE= z?0|=c;0RR(zt0*FPfIy(AxQx=?gpG{FC`k5Lnr3d)a3X zU5eW^<6YZ>yRhxi9oX-V(#bUNxkbG8h*nj1|l7UB#wIOQraWuIb1V| zM6=0q@wXhtX19%|134)M6S4TcO%_0$a;WGM0IU0{89wz=P)WaqlPEPkCWm*Yd+W}K z$(4~eXXd52TTr+6xeE5@9rm=Ag+N!V!n1@27nE1%O>wrf1)i`CJR*=-=ctR{a8w@E z8ZX5Cd~Up9HM(GM=4s!MhFv=}Uyr3pg=q`4&#{hredX17n4bf``wNYkRlSbw0;G&}}%b>Ny zs^ehPYi(LNw{mq@hlW5dW%CQs&OPp}TRYWF6}g^(4@qDt`m_{YR|n@%8_?v(wnPpJ zqDpt8K3^C82iygHNV6tdj|0oKus(1kGvJyer0}6{2u!$(ya>}q;pNU7y|yVfaN1A* z$~Ffo95-&^En2N&|M0hJC7)|WvPE~BohL*d-fj;NZh(9#FoSbJXB)qXU2C+Rf>z~e zk9MhhJ|$YV-FnyL2_E-o1xS9p7G}xz>SeuuU;6?gWvo!q3J952$Q5Jz<5jM9S+O0^ zE9|s5#M!}1K`}8gk+P729ypHJp;&~p&HrGBHEQF!mb?eJym~3QUjOX>^Uq$>{@41| zz_mw(#n-j|I8PF+0-JF}%Ujt`X;nKKctKc*T#4V*X(Wez;8^MbE^PFP=v3x*G{QnP znPPIDpNa+GgGV$u9T#i312`9KZ)tLDf8=shhJjuJuQU1_9VUJu+L>#$z&>oH7m_4zMUJgAl_AHodzaAdH97MdDd^-e- z>S+m)D5O@1ClGM)=E)cAH9Gm^yhm+vzbq!8}2r{mdgSHD|r zmX??C@wA2?m>rOv@_E9>IobPET7}{P&uIDK+kcDrJ6De+kMqp2^MtnR08?l*M$$mL zrxliFfuNB~m#nXBRk6B5w@IL7r5W&lOkZGG%%lc-(r4vb@=lz22o?>ft_8LVO_;Pz z`0J|Kv!u;zv1$QipWJ?4qn7X&klkqM0ls+0F4vAH$(qQq9$OJZ;C4#N`sm1&2b5LX zN#qu$@O-vH2-Qi6v|59p9mX~)1c*oT4*AsewkcYbR4lg5 zv3!Jw&9E(kRNS=|K~@iVKO@CvZ}?bkd$#?5;*&UGFJ)We^v|VO+Y#WMuidGG4HK8R zmYHp*W(Jxm4`*chj#W^iFxPXz7_3qy+nel-9SBlMFE=ZZcwRFfv$!}Z)~-5(XQWRj`Np;YxP6KVN)trj2QS*pCyzz8uKp5K7#dblR>6gBJMVtFbgosSJP^c`drSmwBo) ze?2=R_oIz&L|^D!-ad=p!@q%U+{^}nf%dA_c1u{);Kc3Dt)J_Z@Y!Z0{kHJ<(T}Ft z7kZsMtng-s1Nc(wUY{og;#cu7Z~oM2e`R0CN~IDkG`tg=#-$eS3+|$GSHrt<-GP0h znc%R>5%U<>d)n$6NC|P)sQ}Zr8nS2jaI<}DvpppYTo0BD#CtspPm0Bz^g)nPY-#mWWP?^BQ%w%^7g7y!ir`klolcF_$e~-{ z&`#GHaEA~ZiX1=VgG4Reg$ja5TxdKaf_wUv276l1?$b%=hRymQu?f|vx7Z+OhG)mP z?hiI6e&!Zd+B5puKV)-b2P#R28+fIFr1`?jqr=IDtJ#+~Vwfd|pB|xa?ix}mBwWQt zWg9E|Ag*qdF5>^Ng=zzuyKsJ55~F*a-3oy!!Ip&%x=Ksu)Ygp&h=DIzwfffh^;Hvn zy^K~}Ocoa}6r#!SvpimQaks3{$|Wb@OaEjmg(OSk#&uhDf2mzH$t<4ND8yD!-bH93 zJHxM8y+(2Y1`1E>h7aUTdfHhBbkaE1dyYkG5*V1Y0c+7Jx(yZYA`OWNzCGnJ>G&Y3 zXN)2ALSXLau;AM`VFk5rru=&P(z^AB#Ojg>*?MA11Sbc1?9;h=mMt2H39N0bCjheD z)_pLVYD^~gp5V1$?}oG{*gJ{eBlm(%wWsaa5c7cPV*VTB1%b^4+Z2%5Y7a-X%>iTh zV2Zyj*cM@l*CbXAbaGpxFCobV+ots(HgEXfY&rP20A%WAK^7A^5x=K`{qH6+Yp-uA0o5ynT4 z46p++RTyx%A#}iM%=u}zLpmWP5zBNq-XRg934is25CKY3H5@oJVrAkhYhGio2QIn) zJk^dwr<7vZjtU@FJR?o7os9WF@_eIC2@Q&NH`?iV za1B@9-iil2H&~A|T3i^}Uc$(^+DEj2)p^$ejOrvgY95SK^s1BGv@f;A+fy9o){fz{d?Te7x5M zD2aru$c>mxuFh>TpXs65^Z(q82V_H-+gky|8EPi|c|5TgPF!zmxv6~O@yTDr2!owO zAUbMa1|0XM{!9Bx!}}ULZ}xR|@uUc_0Y2?Ju_&!>q`n*DkOnGIQH_H<;u&x52^2Rx z5waxsi`QNr&s%>NeOX*jNb!9@eQ_Eb>kn}_;m!&;E_!~2DUv2 z^hyxyqV^QJj7m7hUD&}6O^Dw_gB9$FD?y@3M~g04$az+Ri5ku$STT=0rFJ#0`^f2` z8nuc|Qm3}wCYyXEQ5Phei0Q>HWK;CMj*DZeey@Z#oh}p$_)NBX-K`l{61!2i)@BBL z*Z%j|)3FMCPkv9&idWQC=bX$Aa4=|pvN=~03qr*idnS4yGrD`m+-Nj32nm}PzsYZc zsCgw_1hB^L&wy}@%~TuWKA#caUK~)1*sao-n-{_%fn?OIDARl>D{19{8G2yE>Lu|& z$t|v`is*@4TGS_30IRwZqPANW-9T5Z=XxlDAGR7nuBH%BT>#5@tqfbPeW@fX2IA!k zJyvz~tcAx|scuU!Ag)rkPWJ?P{z@V;xqJgh4dom*BBa*UYO)6RHLDL$On@r@=~IyV zm;=5nO={Xekv415$t3k4P}=mWr~&*s^a^2$51|nF4F49vq+1(KGKv$I{s~#MCz`em z{|47I>(h8cYZ1orCCbL@qEO$uc;}nqKEy-OF_qZ(y{*@rQ4iXN*stM#Bk661|5{wx z@E}sF8f*cZ1-ohZ1RihQtI%NE0$R^Nr)}4?io6V>y%aBUrERuDFK~ik z+jhn_ma@W$FW8mM!t3DeR?|w-I<(lH?CZxo;fdhzx($rG-IbnWvrp^ge$shYlD`~1 z$cNHJ#v?>?+pkGIi*e|HepmCI9Sr>XhktE{;?G>XT2@mR5vqf-A_G>!a#O}E! z2Y~|JZJ?Wc0N%7CF|%NB(6S%BB5$`OCc`1QiIzGoLV7VQ1)e*Dc09)C5+QUnhHURI z;B{;xy%VW3Ss%{lNB~16$T=I03D@%poQt2W z++pYSQMo>#4{PcL9SMA}T|sg&ptU(_W7?ajM1DD7pa`-n(Wr?oc2!%3wH_MgtmZX! ztLx!Pu>^BE-f+C-I!|c067)n zXATbiUX0K+F!Im#t>##PL4^cE0os`|YIet~u7Tk{Vp;qln+3v#-(mJ)5TPLBEcgQ- zCSub`ea%jL@WY^I4fwHf0w2;N1$_|STFt3CO<*drf~ zx*HND7d!0H_&v}%^0)R_{N~y~wv6F(d`~1&i1BJu%nw>wz4L_i_T-0&mLTlf#1G@1 zCc`$@BJrOeCL#sp%ta&a5Sye%DcKlvnH<=y@#Ml)$SgKR&|P_8Q}xi$&!ss1!{7nj zrq#(eO&!s|IM)uNIWv$%fW#Au5~8svF4*BCH0#6I`Q&tibr!koh|Si*+)p++C%_F2 z;zROG{I0-h$>s`+v42FKnHMi$R=R9{3_>&xzDKq|QzNQ7X)%hUo2P|tw^G3XgyjK_l&|i%W3Tk90`Q+i)KLc`x~b+>$J(vsJ1#~7}A7Y55Y0;G$`wZ zo?8sk+3>GwLk#%*3x8=FHR(uhtJ^YU`ra(@R>{qwCtlt@^< za=)FnX>g?P{wU{V+qJ$ZvT+@u?1W zmAeDhV%%Y&L4IJ41!B43zqjKbCgwTEb`*MD55i?|Les(3`ee+Lq%5B8>6BK{3Gby} zrceKLw&P0D&prBkdrR0D=Kt;Z{*$=L0vy z+~f*^M&(Vr5C~oJToD*RvmG1$HPkd-<-DeSqg~btlEW3SZWOtqzjSkPc+dkT(CC5V zmsiZbrv7;7QT+6JY-Fwny~K@p!)7?o_*r}iMFSppkEYyi#uSz&_MD@0OI_>MB4Y}} z_MYK4?6&6WCFq>~f1!q6XXZ=2bbA41 z;wTR1R1Yf#{zg|rl4}?~-e3>vf#5?Nj;_Xan3Pz5#8YdNJ$y9;RE{i*KkjNG#Tcxh zG=nxipyui-YVDC|$N_U%?$H2CA_TVLvFMfKqwpGUkE<8OnQVf7SEIRk;%aaKEMEA_ zKdG^cyXx`!o_I9~EO%Lh`@1|9L!ccZ6?{&8ljps&6qBwd%AETf>UCcY%DISOgO=fg zaFkOrP*fsp8UD3R)gs`52BFoc;lD8>aEF^O#sAP|3g2?>r>8Y7w|KI*vjhYeQv}Xz z%7lVJ9OlF$w8>3Q_DnP_CL2nN%?(VKmX+B&;iY&4n=g>KKDO8b4eg$(#qolh0a?0K zt)S!sR;I~P$O5NQCkW-*R2N5GiGrSJiN>jA)1AMq(CTrK;E=}GP<1ukFt~k$80r^L zwN|vW(rPp!uFC&lb;7G=S(Po1-}!cq*^2nRhB!T2sR7}qQXtnV^=Y8p^I9rCf#34I ztd3VH^tE2ET(y9mw?P>6by`zw5*AnAvKDO_sxAD2+ca2TYa6l!`Q)S9Pha?v6*O)! zQd74&A!NX<_NbG~jjkr_Q%fSdZ6MH)CfwF(wUEfEO2rj1Bs9{`36fim*bo0qoOwyVL``Nk%* zh09{wwJ;s1YDuo3IaYXuaodr7U>;uOez-d|7TRUvKLbx!$|hgK-ReuQ3U;nNR}(vn zjc>1@c#I2t7?d=iFk6n&%dyyb5~b}A0BD)q1N(rm0);3PdnO(TO9tpjVBJWx z@n|d>q+Q{7=i&>aels>N26bY0K01d(*#+U?6-bdl1BWska?u#uSm40%?;yK8gi|=d zj9@c@Z0egwfJu%fpm{Zr$yL?ZH7y#~9ZBWm0b!an^)~{|#atZgs{PXWEEdrtO6;cA z0oG>t6)2`Ub~PYNe;)1DdTNl{T6h>V`$B!(Fv7?Er5-u51K8=l(jt(=R(UaT%~g}J zf4Q_Bv(X*xT>1Sp*LMZp9l5@VN%8I2y;v{L=t|DOZ{w*K6%otxot6jB%p`AquV>d` zdx4xE0?f2k)66~!dPnXMuKNc*lGmiOpF}N(w$dK_C{fc~Q9VUL)uCnhI0D7Pdg7)> z@*Ah8rZ!K7JpQAgL*foOAddtbm|tX3wRJw7SeB0xp-pB~Xy4=U(s>Zdm`?a82xT{d zj^9D{#79BVpL5FhWHu;lcdtzhK)K@4w^A3)p7}^jiCYqXml)W&AH~hbm(DUn1qZkz<_U5gg2eof zf>%It^qi))Kp@BV;*GS?inT;lh}j)hnr*T>#!}bOB@Lo&%QWfyI)ImJ?@I0mPb~@L z+lODV3PDdk4`h^A$5L(&s}4+{6`jtCT20Kx@9dO0i5tI8-768`_>`6hU=QeD5pR&1 zTd4hKE90>{KbU+LSE*l9Ysy>xyyjn2T579ff}TN$*wp&$yd)bPa2qs#ju#5lCM{Mv zY~1H9!oPw95!R-rq%7y4b!bS^ z=6G<2SuB}Ct2m{Vv8@71C>d!_w`t~$g*GY|G}v~%T>=ABZ!c*`n6^IK5pZ1V?$YB< z9m++JbiQX--Rz2~alv583_!>z{zdK)p!(tCc;dzbc2^fdwKG)l;|t=Py)1Nx*0zrP za>(}U@mW{?7wo|O8wAA;0&l$?6dn+Jyw|zj$o!{CRl1Ro&+>5K1>NhiSL1B`boESo zO+z<<2#iDCUXORw!LcLiQfO?qqe6iZa44~3(Q5U~)plH~1pu73PsFf%K(1JL!%k|_ zc$z{&1*8_Qbw26S0e@`A*uU6Y`j!)gNRwfdy5PhrT!{BcWKNd1i&{(n62S^7U@Wl871dSS<(Ot8UanWP zo*tO$7Ral5CKDRFi|i7x6v)rx)$Z81d{Ygzef_8ot5}dtd9}S+~R8 z(M^mGO9LLzDZA|5Xuuu8XYx%9$SL$7y?g3g%LQxSYIM@P-~z{S{!R;kbdW4k-v{FT zm}l$@f{r;V_lW2P0wE5Q(M^eXLBILs)EVvdF8hw%Tr1><( zVnONbLd9IF(Xjq`ZJjbba>hw0X63PdICvyoiY-ys3U^bk2+XK*S!-4Cd(ahX)oN6W z0@`Zyz_nLzb#W%@F;XnoKx~C;5mxBYZBiJASH^1P*jn6TRtW|nAtca^k#dWRD{Zx2 z)WKs}y&f1fZw&#!4;+VHrX-#%0#XHrZTkHzi7h&WhU>XFP6ZwF_{5^xM0X71 z+Qnw;(U?t6dHRGjdQG^lfmk}f)f1brPM~Y43k5GcbS=^1)jziNI-}g5R8s4;4S}qe z2SPuIxVcfA2;W6$8_Hq|8`vg|OnT-{+br0nL}sA@-J)g26umW?5eLHQ-==27BDMf` z`~5|Bd+ST_L4epj(Hc^BRwJIvfKn zt}0!)!d{IBcT*zsUpA`SM`2K?74dq!hZaE?j>H35Jn>~Xszb06M)X)rs$%JLc3il4 zjB6tTz=`PN1c-DB+9<4xUJ9Lzy3smC)}cJJ(*fu&rp>gsw4Tp}b>NKV1RdZ@a;5x8 zK+Y}lD2P!(#^Ver8OY~^-T*qb^JU1S09Vx)v(qs2P|YsX*RB? z*|U6*R|Tuh{o1a@i*o0juIoh+!nWPW4j|Q!h$eH-_&4n{fvH+nX*V;J@Zz;waS#T3 z`v#G?KhF*l3kdPO9TP|G5p{@9*cSn(VqqDIT%E`|@_CVPKHl`YyXDt00+rbLwC<=G zp)fe1-PK}U`gqz3yYwxJgH%2ZeHUcl*h4cND8oWu*)y?iJ963^Oe-^Z4bhQNM^ zpI+6&K2B89^Rsfk`3UWSj}z@717EV6d(_W^A4|+4@rru~`B{GG<3u#^Rz|#i_~S%b z>-%Bt#>ETL@1uPh|8WSr_4x+uNRPxTJZceLAN@EkaR2sW@i7%W_HiQTsD1LT33)u= zdO~5Fpw5f@MtnjK*a!OT$#{STBFuB5Ml8XRh6D@u+wtY+J`SnYl@Few!Y0M!{Vt}! z6sk*`9PJTf*qJG@5;+v>+}!-{m`x4*{08%*JzWq{+_{Pz3H5)5$X3scDdfpnVNZXY zxDygk+N_T=a!yzr+~e$z)1_HlW^s-7&*6kidDp% zavr>0r6uU?r_7k!psKSaz{#z^?SkPn{(|EH7eYEbbpkMeS$ny*s18=tR>T}iJ&vQn zR;pRe!a7?OzfrefGu3%y-%t^Tx6JB5uamq*^*RFmRQkmQSMZyLkCW9;`v+#j)UavM zV=ZcERG)VqCHL!Y`TcJNZ3$srJgm^*FzVmzm3kkA+p z-qNND!u zx^MF%aD zst|A=+^*&nWpz!smghULi)zK18r=+VLr)xgSo_a}hetxf58sS_9yZ-*xAe~a-0Apv zHWfUlZ)?c0mt8aJ1j@e1mVpKE)5Z<_rG{D#RUolnELkNR{B;b|j5{F}T-0iDZ=QUH zcVkf3>NykoO?DK9$rD>ZHom7F=^<16w*h7inP#XO8oQero-s{*FZ?gPlq?!Qd_0;` zS&_%TCyy~1Vu=yct|#i4YbT5D1J@G?m^x{OJs3ZyP5Y@mbUhK%9IQ7fimr!b=p6AM zY@A-98fmxj@!%Ty27Bash`#M;{+Q#V*Mp*Pn~)2+*&d6wMfHBKJ{~YJ2KX{fP}9nM z8!`mZEP_;*4^P!hLiuN0(yT>J(%RKOMx1kkEfcO6z^-2 zlltWBeK>~sM?EDbUyKMpRc})T&+t{79=&tjWa_dR!oAL2V{PX3AmYQXaGu=h;%Q;q zjW;Oq+OyQP4;pT>1=@vGzI!LxbF>)^FwM_I*WSivXWeaXJW^_ti~)l-PpjkxK+gm< z|9WWGU6F=v04`*KcFI|&BUUWPr5vHs==&G?XJvwgEQ-YxZEy6%NRLarEfLPJ^SxG~ zVU~6bQpVbI_*7{Yi$cir<*$zZiQR|53OaeLTfJaWojOo5v^#QR*m4cAGT(yMR}a*# zrWBgcF79u(Rj~r9#%#~Wt4qIft7&Xl&7xwfkM2v|HQ5@X4LY%cH3{_+XWvqd)0Svj zNKScedQI-r4ssaMx#M~;mW$Cuj5T{>3!gE`*Bv8yZ^dB~8TnP#>z#WZ&o7qG=Ca4}{OVv>If{+YC;0Y4V7RF77C!?)f zJo%(fY4(e%?R2!l=E=&w6_}^=69^@+tY-wI`|Ph;Z6rGhEa0ftX=y`yo|KyB^s2JA z+0F~EVwCaCb|G6(^73*8UDQTYE93Q;8spOePjXq4a5j(YchIiHsZY*uzi_K1*Zfs= zp(GPm;x%DdgF@7<3p&15DctBrc03y?7Xf`9w9nMJt9ST~db=s){4?V|xuxc0-ql(A zT=U|SvfJ^SJB{ELfx&46HO`j;!xY^3_0|2Ypvnd;gnfNINh~$X?2cMKhbYMHb65Cg zL1o`WPqxYJB^-=2bnV3T5w!@ofvHTjbux1A~O!Wk)>5<|51_S@6|u;2xo zu5}?(n#~A|7Q5+-65IhHoL)JXI1oLp#uP%cS;DftwFd?Gpv~5RHB==-sB?wToEWed ze&v}@f+80sjXCe;YErJ`3s>EF0X)B^4Kfr( zRu&Irs$+-B z59`+u;M}xM88m@ImPEbPqO~UwjLcE!aHA=@cJ7@Hy~Hfi(Kzz=R>+Qzr#VXHT)Xub z5+A{<`(4$e?SWV7OUBy1z@x5wp$!D^AspyQr4SD&$C=g8CqbFRdzdT~ocQ&hj7A!~ z{n#wr-NH-YQQQ=gw?TtM?lt6zjamn{JF*p!h zq=h&vFEg=CfG9JTN9ndbkjF8>UeZszD0T$ux!yLsp?7LulN|j!Kp(lZ)plv+&~9=w zblL8#hZ~0WXa^v-2*BamVC_R#m?kb9K zraUTKD;8J%wH*`KGB?25@c>2r8p^uplq|yQt0Pa?f}M=FUQ@tMsoyem^kmMBW$q$w z{w-lpiw1P~Ful&GC1yOPH6na)kB87DCG_ovz9&*u_99M(RcOyNNIUMhw#Fu{)n>`AS=1uWrVR)?OR~Z|Q||rS`d&fh^h>9l3;V2f!pspR_Nc zAN5#z?aTOGOY&#?D*H5(iOwaAuj6gr)uwke!hk#S+>M8#A-|Ho(e$u;QDNks4wMWg z5o+=6CkiunMd3+jQo+T`n|zmjmr`Nc_d+81_aaZ>niTu{;)_2qyAjmM6XW~U@xYCs zPSnG5>dig);EmAd;z&pZyN7PX%@2#vL<9xN7j6V)LgNL|YIL3NIJLo{9)1J%V0^Tp zPcm)#IFCe|Tf6q%+O?7Y?9m%R(JK(r9t))S#I{25-?YbX1ch&R<@p3*OgJKJJ*4(V zT)$N*6`wt+wk7TgF;Q*Z=hO`WPwCl93OW|Km>WT@&6xWq-AHu!h4nT$ka3q7o1#82 z07)918sm{r8+HNDUM*Ob&jE$7{(?o-HRq80nE|eN>Cy^&TJwAsubf%&8!G_W7y=b2 zp^NKt{){*Yvay@qNUTfl(to$PF}`~a*W&LqFCMWCqzIY$F?}qvaQzDcCSGLRLRPGm zNdZr#TD5L+s)1oyz$bBsm21BXm?!eKBwqZdsgtZiICYam!Hum|(Xcl0q*g6}r8rI? zBh}oHsy{hcxrJoxxYfl1uUKx&<5veBy)i&*JApGou(qo}eeP({`Ez?d{!ET{;tW@7 zmdl7Ym0RsSnbZ1g z42u7eCB*I28=`et8M#U~YKTQG?sBn7jrrW2RG~+;&9Qz3_@^y_8($niwraKu!%DGD z-{e|SGo&0U+vAkpza#(BUJ6J}c*^bw402PaPq&@%=byarg6-0Zdh#&OXt7&kbhz;Z zcU~QkMJ~v3fA5W+oM_vZ9S5>@IOR{OvzIl>v()6=uXz#;!cjdCPp%7wKR6ie3RIo6 zLuyC%d5F}bSVsts`!C$l&f z9jQ)7I=kcdJ4QqmbW~d)UAP-!PsUOGKYnJXqObF*Xn6y<^hNt+^HvO5+t_Jm;wM^F z8_^U>qHMB_YJ$);?tyc9>t1P|eek@tu{4W3yb#EfFeRl%x~Ls-M**T7$KufonZT6U z<$%`ERhCNY*cCx*NH-PtHP?fzY)_ILJ{+smL;)8 zzlSwBVK?J#uB^*$X{U?Gp8C1|eC8FqEr3nv#QKL{>9H?v1Pj`L@3yP(ec8SYoN^vU zYhMMRRi3HqYrWuH;EbeYEAiYSb~om|xTL5KhYtHjEgP@{*gY)~#LLy(-v-29w9)pR zUb)i+(&P8{7as=_xPu?`y229$Hkf@HREgN@wf4ZLiAus!v~wvx_~~d8PfO7|J*3~Y zq>O*~)1a;{=%N_s0vq>f+>@1Dnnkz_B=z+8PlG5|rVJF+?AA!6{zK^-FlyJhXI{#x z{+M8ZrGM;v|6DnbM<1^;mA46>2G#Cq^b)sF?I%7>r0z#QwkL&NIWw9VlkkE%m=mA+ zGzcqt7Y|v@35L6&r^vHOdS6Sp%khFu7Sin<=u4b#1~5ptU%pc{g|T1pFg;$On(ij} za7J{bs!d<3@5oF6%J(;Wes_CXT~Shzf#K6QA@oEQ*XOsk&Hgl$Ta~Ulg${deQ4p9D z{puhM5KsYq*6#@vNpn9<1hs16b2d-QM-5cB&xEpjz6M1mTW1UO0PnQMilb*Gt5_QI zNO^95Z)MR6ZjPI}To0|T&0V4q-1XmGbQ4oh5iOeVMkEGRE7G3C)M|}a`~On)_CZx& zYrf}qQmK@c@>;2sWo1&9#W=RdlWW_K<2a6E+iHx*c+}WxjP12;U(lc-N-HtO< zHo6XYGv-zlSTVP*bVo0b<8a<#iKy~Vsz zuh1>MwOWmFSAa_wTsOF8?oMBk)!SxSV1B6K(_s#$9K9< zbSrLIQ_+5M%hvx;>f82>{ri*PTo#>5?(02YAD|*mmnFXWQ+E>dj>WB2 zdJDZdxMvl}^EmOiZ~1e&n(mY}9#|dD;(Pv34`tSjf50OvwA-4X`bW@$Iqd`Q3b?Q$ z^=>3VkGU?QmMR+;>Lgic9>3s14*Ft|(XatWi;pdt58lOqQs>g{YCIKlS#d3%$xGb( zAL6-Xe;bzz@xrP*Vf7l`r?3ex<=B2|GZ%G!w927j1~z^_#S(4e3lYH2R?fxTQTxR_ zQuTvf3?8YY`^e4Z*uB5<$m??0410QeuRhYO=X3U=kJN*4yD}l3W*IQ(k=NlmMIDyw z^+)Ph*>iCZ$=UuMY++~r9z*1nIzOv9uYbeR^p5$lv3XM-Kij$nL(L-_oAxFQiv{E8 zi3drQwYbl@zR8d1ZTst{+#I3~e{{oj_^aJTjw2p<0IaS`tPo#x;zB zQ<+hgXR0A_dg12DqaS&_^2UG07>kd4?|8?en4L%i;bUXz((WJ2JQPL5^lR72O8GqN_z)y3srL^`l9-f$DDS2nb;xX0! zE^*s-cc;A3Kp0m-4#sk{kHByPO2jPrRY%;&^ zEqj0QlEp}}w7qqG8YRG%M;aio<@a`jHGoOBr@(UvoJVZ4JiW(D!nRukOPv)JryjdQ zsZp1kCD|D(3BRFTa^Uesc4t1zMm#M7GFPegmn;6%{ztmycrncbBg2Zr(@A_UX-3!n zvyL~r^}1}UbZ604YuGtTb*)qI$+f7~J8QFfmJAzBry!GWZoHoU6BL-6vt7b>x#*GC zo$?X=s>4#OXjvn?07f6zUynnnq-=nwcNBXaIb-DlCiei!tpetJN;7|tJX`Z|ykJ`m z3uAISV}3o>HLGc?Ufwy%ibs_)#23`N>GO=`kBg2@TeMnv|IC>zMosKvboQ}0=yZoZ zY8C5_j>D*vU+SB={?+115298OH+`j2 zN)h_3HaVwS&zF)z^v9O3jqKnCEH9iLxar~lVq*=5&hRD{31GYE6_j>F<{&ppXe(!b3o=p`F=eYJgOi}SXCc9I4jGH|ZZ!5vL z*<04Jy})p}xZ8k6D8R@me;A3l@4cadTQ=jm^hS)78@mvy@QM?><=XExP-s6*V|?yQ z8)E`n#<+xczSpqZZQi*BW}d|vs|s+-ARl9#G9I^ji`%bqdyL~P1Um_DBb;yk1j}^S zF}DqbiB>Z-Wyd#bk{p*L(7Fa!dnPM;ald0rKgH_Zvk*Pi>YW>3$(W{cCwA<{bY+>h z$1y+driz(X23jQHJd17QEX(7(%~VF<^6_fSHp$l2{M^0R=a@vcM%~eqxh8pU`#Rd< zb}K^jzV}8P|MZ`+K$+a=8rFqY8V#@xQurTvWn$=W)E{Om82<>HfJrvQ^5``y9_; zO{{`>Tz)O$EeAy z3S={U|Dh^eqlHux&;TA6Z#k1rSBhKMA7v=Tq)*v4W-3Lu@;#mYn6{P`%658l#r{9}|V zU2O)lsVs}d<}R#HM!99nGd|8Hq2l}4c*<=oyY&rjjQP#eYF@tJ_8%&hYC;mKET0S4 z)6|g_V6oe~skUU+(G)xPHfmI(?h;zQcD1qUcAsM%>g@5Y{@l&8>J`oA&sWnVTZ2mV zkAFQ40gHaN>wKG(ZWlLJ(IP)Igo0Lyo$3Ov#U5KZ`xiHmY_k?{R(lH0*zFLkDNb zf+~glbRU`b)$zQyBB=Ux8`Q3?3Mxa* zTaD0S9|mHz=}yo1xat1XYST6G?y6yhopO@i0w`crbyc<{Ig(||0zdbGMm-5OX zCM_DpUI%wQP5?hyCTKpE%Pv3LKTmZFznE9vA?zA|1U2Pcwh*rb)t@eROHZ!`Z+I)Z z^H^Q41-))?^`~2%g2ter=CIW3;9#2OjDnhqav_U*&2Ho{SaHr+yaYoO=R%J6I1|+$ zclw*UMf$_)<-BKTP@Ncc7ugAL*fPxWGQ%mdzh!?Ga}tNQ75SKvQ!!lmVn^pThDKN% z)JICDNt{A?Q=dd##a_!_x~I?EoB*E_R` znjQQ9nN&*WSm~2#QFaaHTE)>)tcGRtBuL=Ag3CgDJPYKX%Y)SA!$Ql{)_5!m-q5E^ zH?Q-TEX8`c@~W`;e45V4V1l{M`63A$ zt>U)1+nGK^qKfw0->$$W%Sb$Jjq(7DYS3*;(qIjhbQMSPYFh71mQS7|adp4VVociX z*)iI#7_T3Q#}2u8Tep0RcA7`tJhqwp!nYE@a)UM^rw>D$#l#iStWL^G96Dm3RlgqpG0p2yo*k*Xy~jJ~n>B8TnhC@ENKVd;?X( zQ6ur<_)k!)+)#(WYxwG{6u);`?)CQPBR8vUus>_u`K*AI@Aq8#Zn9T`Lwt7WEmpdX z9L2NhTUDpdnh+a7zRX*3&C20y=e6lw=X}denB2-tBRVWTw}`k7H}ubaLL6k=RM`15 z-d})Q7M2T6-jvIIgtudvq;XPoTFpvD#BB$3S=3A1E-UWXyXEx4wrt?^^sbz}1t9jE z_Uze_a+t?isFj(s7(B3`G_b^}72|qnF>`~1`m~Si-+L_eCInR_+Utu)Sn+Tzss%k( zTkma6THSsDQF{aJn6%SR=#!5bv;@F62#@vTxNM36mGxt1;1y3Tb6a>J>NDdVoI5>_ z$vGc+5!4FN%ndZ0>U3~;o!@G_j1|QhtaIBpKdRcOm%zPIZ>0Z|)hh3Y+Jc{h8pzG? zy5>bRnZf;nUzDx)-&+iX)TL05$aQG1gw%&wE%Hcd$m>tn#&6n4J))4EwL7T!JU?-_ zjyp(k@B`rYx&yC=yk3`^ebmwwbFFj@hJ@7T@{$#2r2=n+)aTAf{FsB6=)YZox0S&-I;CQG%;!P3slhSj2=hrbD#R(9 z^SxD6S~*g{F}#QJw-{x4b0apVU^d$F=51$sTFGM+6IItX;T@|qZ%9w`yY|;sD#yv^ z;1(`21yMRdEO(_=Z|&3jZz#Hp7FjMndUrOKSW4~`GB-}K)M8lfhCa*8 z(b+(9I(f^jroDY|E>E$)G#}37f$L#cD(X+1*6k`w^g}Mc)BL>N$Aw%e;*9T$clklM zF>}0mN~G#GmvAk0>MhbfpjD8Ys`7*0D4(32I^BRo6>bOB#5Y+UcqW?zTV60~|6PWh z-Jvb^KY?bwsZ@q!Mbz&1%!_QZRCuDx9UIziIpDg_XV?+)y0e+Huv5uxTgTg3gfx^7 zpJjfb8%-lMmM(|*Zq+wEq)v~uC3}pVYAWo9^A{k~e9vrep{n>6WLv@<@zEZj1?82T z8`fx*C08};osr5jzfK<-`LXY)h5Z!oL_mSLzU!G>P%<3iZlF40GK!VuFSy1-O@I;= zaP}sYDra$v-bI;(-|FtoF1I3XN?0`uuQcX*RK zq000ecX{I(ar+rQ+UnS+md?+@4c=-L(>$uA@Cnt*D_2z3p-#C;=J7wCWU=%)jd!Ea z2CGBXFz;-j>X1e2_Ub%qf=!CnYmqmg#hl&b-L2rDRfTRZ=T)Ls6fU-NLffVcZ25xo z`i|HO;w~NAfE$*kTZ4A?S8keDH)FbG3Aj5V*xu9Lf;GZ!*~68Szs^o`=dX{K%ZIY} z(WPQ~wpegSnfYY1vsHwpG4fZe=Z=+0xC0Yb=-J)Xiszjb%zu= z^(OcR1oek%2iz z8|-Xp6!NM5d2cokipKhm&)Scz<59g&@!Zn+WJ5B0A$FgNYW-^7+<})?Tuz1EuE>x7 zUWJ;xyUXaQglkpEM#yE&efK|G9_FoHvThlEQ5qXQ{)DqJrh%|JPujY13c#;~HTRtH zDIe3TVKwZ|DlKgThSjg~ag<;z*J^qskT)&Uav>*t-F7R9Nkjx3f!)0oMi6t?7?|c1|Rz5us66^Ol_^&(q6HtBMsB!;BM%` zbjz?ayn>nfLtRi-@hru(HJ*2)S)zZ>xij^L>>tgdaDp;dk;N@%zcnwcVVLbUxB~@y zu^{#+*afp8TBy)&l*7JzktNUR^ja+WH8=-eR3&GtT#u!e8v7X9zFKBa;OpIN4ldJ7 zXCs!|!~1Xxm9n^uz2evO*dK5%YVV}CCg`jftTO+f^4Neg>b%CXz~RHkh*w|^9=tw% zz4?9jce2WtV9E0=raMdJ76>-V3+FiQ)I%*2Rc&!2r(jd;C1oea7mIo!Nmg0B{uWz| z-{Uob$(CnUoICQl&GP&^cUhU6C2yDehe__>5T~cHB67)P>nGT0xnvXKttsqMiTJX! z8}gZ7ZiiDF5vk_aox*VHcj>C$8J?9|WLRY{;l&llk*OT=3tNM1%hEh*x_Sp0b1dGO z-kv6MRqX3W)*{a;bVL001IX8(KmPHb{;&cC7AAfR6*IV=Yk8sWx_5VDv79;o_ojU% zO8yHsW-e7U)V^kCRc2|m@cwQ~i#8|{P@(WNnBlFu`4#+=jk#?7#M`j0wMufsG4`_h$9oLj=T~nuNOUgOiws(@JTfOCO9v4-#-elLkz+H-V0|y~@ zglr{o*CJ&1OkG@+dA_@5l}B0>{c3)Fcwo8b3QZCoT86otw!!VdJ+ibp{oq!V>yd*B z`uS!a0}orZx{U~`;>z#Z&9Ym-x{F!^%7shM#DE!Z=?iNgob7`y2Vj}qjQ+85!htOZ z^aFYfG(NySHy%&zsjwx!%r^X)Qs%hY)zI@;Bkzs}cvlbNg=KA_(-gr=`{(5iKdO$l zbG-ADFpdhqKgWv7Ei&3`cL!VKILmdY-Ci3#pBZ~aIOaDrz|gI(lskFsl3wdpFG~Y6 zw6fc+-q2Iw+zO&!Me|>Ow_F|Z!uTRH*CG0O7*7U-RiP<3ZAvO$*t^Rgrzxq z9Zw7+PHFz_XpEG+i$?P}aUEqjSl}!~jkZ6ZZ=iXNG2J$Dpsnn6$?fHM$0B5##*b`l zw}v2VJu^2LCr2s+ySdhQ^U9eN+xrRj&#B(gb{{RP`@{ab=zrN3gt@> zz+@FUjNifc{~vKo<5u^k$uB96^qQ#of74y>@0cSNTJb*pgiH8Xq!f9Q zm0Ac(%qPzuxFP;hxh>|p_7_-YW#FDK^#Zv0Y4!cdx-U52;DBj`UPN9!fR(Yj*t`y_ ztVl~;SF*+&w(!QNc==!S!9uK;n5!F{dFsjk&2(&3M%}6(>o?KTbG#0!3?F4TQPc2Rh%3Zr)IAiIZD^RbrZFw?Er^W3aA!~Sq&zX_RE$unId zoO>z|^|i(#$BML^!)DGAxYeDET+0O4YB}M|`w!2Sw~iy{SdMp{+) z_cx0co=~X-IYVI;RmmTXXz>K79N4@vP__QVvEiUKdW(I7Gx@@YT5B=VPKq-bSodpP zB|44TdV2?`ElvZB?6n$V-{VpH4*z{iB4G8#9b=CEn=BoU&=T9LXk0=J*-BGRMp7Px8 zU&{vJg|fwM%VDBfo(bR)J(~7;6JD7$bjL>HEGr+ll@Y4QjI?siV$U(kqWis@LyndM_d7Yclmo{g z99X|&W&GQb7;Aw);UgWVV;>gxZQ?n%Ep+H(=pu~*14TAy60M`ZPTC^O2a4Tn?d3Q^48@6GAIkM<%E$U{vcyrudr?gi-aBxeA-V_qst|^(E@Pi zjjsm7umw)EI2Upqi>fx8td17(;&Nxzi#?}e2HXXdx zlWoc{N6?(vZMWEdT@S(zlf+cIxB+UVoxzu3SFGebhTyY|eL!O_znh0tD-8AjX{CqX zWV-xrU(K`i8CEc!apl+M>|1gZRBz93|2PiWmTeuErS2fLhE(JHbf_kcTuaNH1mMnB zo|S14dlPOLAYVysO{C$d0`p4k8J?R&QI9tSp0^0a_6%q7Ovx80F>hRL$l^CyYQ^SJ zJiD2_Ox|4s$HJ7`+x4y+=u{|qPoI+$1^6xdrX@r38>o!EFq~^`LzUIdR%#Yd^_Z)= zmO3|Td5Qzo^6gHO@PXFYE9EIc*IL{kvlZZOmn%}8DlUnm+IoxU-Gp`Sdup(B+}UBT z1qztEQ-)?E(qw;iXB=AWuQM6SCB#;x>uf`8!8LQ^IUR5QshDSP*s?X*n_zd?n?N(i zxM7dO9cQN*Hr%xTw1DZoa<{Cx`t2WZTP5XEl3U5^v|3u??J#s%l3O>kyW-v3R^>^~ zg5q6sYp24~8d_FR>Yj<=`(m#5ufqeY0Inl);?GDPn!p|G<$$3Tk1QgtakybfP+9wI zJq-XVy#x-r*q172-cnNwqL!7do2iG~r$6pI6K9&{!qHg*1{7%P7aUV=$5Shux9*5% zmM+(ix!r>2N|y!43yXpm;PMoxyfOctYU+B~@p5vJ z!uJjl^`A*p(c`lFEB2SV#_mS7S0id*w{if$PoAIIYZ0$cUFZ%g3=(s!NJI3I8{ROP z>p#36kgtcg?>ac5X)5*UIJ_BRvBgwcdEa(K-St*?)c#HV;Z8PBZ4A{P9(lNy1m3np_!v0L86Hu;?NkumE$AbxM5{mB?)JUoB3{QI_mBUGk;;DD zyE7MKlzedq&Y8NxXiI~OHf%$=g)v5HtmeWhKZ|!PMc2D?zNsib;xu)fm50q1t@~NZ zT;QgD4klQf?Bkqrb}A-XENfUfw3HCBCY$2DC4bC{cn#e~uGq$MP4#}viRHts zW@r;&ZbU1!Tv$$C&9mP8YFA^P6=(iuT($U=Z@>Z*uq$!rw7Ga|(azaSW0Z@OL2q+x zi6!A`f?t_aTwbcWSnO0cseEfuIlDhzyBhe$t`Ix(k60=G#O>g(QWWndIE`Jb zvC8GW4C}f5sR;Q3^VEXdG+eJ|mXMT$1bZVs`V1R|*N^i8ZVS($g`0p{QMS6nkx6pm zYKr^tw@Z)y;||zPdsd6R)roc3WhLi= zF?HwJMEK;HJLjBqDi;ekaIB1UJ-+v5Bg5k3kriZGc&DZ0PB$Z4>3+s#lN`&@0(V!} z2ISfoyUDE@xj`(eDjs?IM%6QM$@~iPElQqxqJ9P6>jI_s1#PXNNOj?j+EP(0mpSVa zlTae29&j>BE#(i`Z1ZoK6?#S@4-HeBx!m3dp8NBZHrb_BDBBxWQ^i)LW~0GX!Agti zU5;xxD6dlT)PBFjKKYD%E>3Wh>1w6F)g1+>k%QY@=dPzxcbzf6tX9<7lV0e00My$* zZY8lfY_Od2`sA(d8mNFG<#F!SXtH|gI_~upm(XHCdA^O~{8p7U3z;~d_e~4O(QZ6$Staqj8SPK9OW?MrUV*tY(P@qg-2%W? zZ;`DlqOqh?xuveqS~oPj6N}K}JjRz*tPwUC?}MsvfjSEA}dJq>Q| zaz@oZ>1F3*d@ZUc;Z22k6^GN69%NxY-9Y66&OsF+Evh=RMuRN|_Hgb1?tTo3s!wyK z+qvC=H_QVUJm%B zRxJ^m%&E)ICN;_a&;-F2Y_UI{QdzS7;jtPT7qS>uui+Id*lsV`e4fv9!}%S`9xu74 zaiL#dGS93OxAj>`yDT`@W7!z`^u%l;i&TZCF&Qq(gv=8U4FAr!98S}taA*#m6{4pr-{g-DSJ zD!yy=7VUhFC%JQ}Xg93gK|2YGmet4px+xY|=21&F)bCji_l4y9l{K=js2rgNW`t(oNOF3hL?HvWjJ4 zy^EG_E1R?<%3AOI_iR#e8eLX1Bl*taj=6Cc(wT#~Yp)^~g6LK0;hsg#KIRK&4C%f_ zKx6h>@gVAr>}Mpu&t6J5hdzvYvnAKSQ|-Yi-Z|~8N0t)1Y&X>kDiYpN&2uhc)f^4e z#?ycTKaU>88fSxOUd@Q8<>FHwGmWPa;6DAQhU+>8iO1vEGjrG7V8BwF`}f?1qd`AC zLT**i2$8*X3uk?R=dt&PY94Fx!cyhToeGk!ZO2P_=Fo5(ew1gY+rYQxCkyIMkZ=Xp za>wF$t=cb^$ZA?=L~m>s%HjG}ywdCS7W@YAYOiL9Jm#N-*Yt;DKlXWp2Y9{r zhBSlN+6|U?2gfu+dNupxS~aI5Z`eO>Mtq!%H+$8kE=>4>revsGtEg#PTBL+4o3EBM$LUI+7WO= zfl*dMZeFa#XbYLh!=-%4W9;t?F1b^^`<>X|M6Ss4yo%Mp$Uk75Wlnb`($i9cLbQ5$yQ5F2Z5(NmBLJ}O-@yev{JeW(=5v~d3+Gl zV}IP1!b}DD&2Rq;W+^7mvSb}*o3DlL)bE^L&2{H*NdB1SSzSrz+9TY=w`iV1&f+FA zo1z7lBTlxs^|;XLkJ}9H_zB;GMX@T|;2oJ+q8vEw>ZQFm{ug&JbId23Ivr56R+40_ z5Uj>Z!3L}n(CmSLtFZ#A%L=$*DM-Xd0rg`A3D_i9izLBk*dq88$$}(o6MT&Ag7w%T z7>AvLHP|KK{bGW#NELjJbip=c2tGlk;Qv6jfR@??V~{KOYvc*WBVRBD1%mfcBv_2% zUT=8ACZt576(|)eeCN1cTa zpkAVXLW7AeAt2EXG@0lMS|s{gv5=3hqnv|Kfq*f5$_?C_EDUGlGHz2n+sC^jHDv5S3^)`b^Y} z#}fTt7!XXvQ^90BlPC6~&n5Z*F9dV&(jsWWj}ooIPl8GKS@1jjBKW_7KJP>L2(R?% zLD%C|iKgK-6SZKFh&l(a%kjTpu;3ptM6eWZnD?uAQ=$(sR4^681atA0;P-gjd|$+H zi+n#u7!@JTTrXp!0{k6D3H~!io7)A95pkaKj>#G^R>VGYoC5p<#+$c`m|%{3Fwtl) zCYkFwOjdxu#}vUzOf@$@rb#p!(=Fg`%#>&+W(k&Iw%~um9Km$VwFp`;ug~ka!oGu8 zAlJXbLW?T{i_9Mlh>E%zuvFfbW0^TtVYyK`R)}Z=dSxu)0<2Pq|AaN>nu~Z5&#bK% zwKXF_)O8#iO?DQEu~-gclZ0QVBgs7GV~f!-Bpa1rTP#Q^wp);5>@eq2>@>>5E>Zg_ z_>4{>RYYU2>B{sjWJp8}l+0MnN0Dv9-N-T8hg?xtIr2nbA40y#j-o(B>qbQ;JAh)N z0+g8R36vTgLz##gt>s2XP+_iT;1_k3p;C1HII2u`5NC`kP;IU)s1b29rq*O!N-{cv zdJ#`VHJI!$0!9bW6f1feS|sc`f>zPj`*2Ofv&C)ZdIB9r#kgVqvT@U>0JlW#6}W9w zgHEG5bcwnS;f|3Xcg-~m_e4BWeqYpf84pA^596WedO9AN>_b5U{O~FhL}fZ*4c!WcvJNC5eyZz z7h#xDGu|?P=kd15sxds4UL{6Iczqw@%wrKo8r5Kw1xdkZ(d`tBF7tIa$Pjg2LZ+zg6tYFv zQ;;L-%0#ZYos1tqzT1uBSg9_eM8ftGl$vt}%8as6F8V4B6{5C! z_+x?hqEf=n7F3z@MVt{`KZa`2S3cB;zRN_d=>B=si8`B5AFGBmG)UOK4*}7iGtp#0 zs?cJt2hnPjhHD~P+ii^hB3^T=k=;wdP_ z9SPfOaaVNb0Pcy}&f&i3tKE2DvPL|NWr7W(g!j)NDEjs=!lKTL=rPxeh+1}1YS1TP z$1yxMkM$T3{c8@M#`1j;&m{bA51xy@EyN4aS6O%|>O7AhEjvY-_{oH{S14g`NjiR! zunYgV*H#=R6D&bynkb^XV(ZU>F_mgssh-Zsj|a8cV8jId}53J@pZS2-9NOS%N3BQEYsy%}ZZegfr22T);D0Ke#4ZcvKu*P+VdIk^vKVo{zzwMChd zjv9Hr)r?wES2pV898S$by*am_!NR1SLBQO7d1x|e>Up%7G_wq?hK&E3BKT?_+CN*K4~>PGBd@p8Fjv&kig^|&?Hm>e!zuMxC=8z8heamclaD0|b^QdEO87$y zN0<`cEWvUMR9}b{7AWf^R>q`Pu}avTR)#f(sRt1+3^ks>`j|9;1YxxJJT?jkj@Bbl z7~Z=Zn}oduwMY_nr*iTw>`ys}WMMF{2iug#wkm8Fedogtb8hrur!ac-0Cvf}JsUpJ zw?~mG=diB{>B8QEDrCt0t6XGC81bJ*w(y6REaV7-EqjqG>@VDdJYjdrUgQgV0{c*) zxH|nPk}z_v62%hUs6~l{(b6oGO889xWfK0-l8JI*cUmY*EzCs-)t*^ojVR!0T+*GLR7jeskDXq9|UN5DfQ||xYader`Ux+(~ zm94ldjMVPOJ$e218175hcRU9Vgxx9U@lfu!Qt-$U%|lSsUWTxFOwC4549_7d_s%2e zlQ5LV_Egvt$ijemP2os17K$TL3)O(<5=H{4cp>b~-HVsPaQadFXjy5*PZmBU13$;G z62Ax|IZeQT*FpyC*{BY9&1+jKUX?J?SdZ5v{ML^_qHoUPb!7%Y!rA4j7Wk>8Iu z%zZ!JjJY4cPzi6RVVI~R18<4G%fj38*v*$?cntSpgfNtL9B~%NhmjHn{hY1~dkcDOm#`$}egRA6{^fBjGxvfNEEjfjFtZ|tXRuOU+gh+n^vy}EF=vhv<0ZTo zzPastoDXm4iFj~r?sl2uqAXCC{dIhp$p(>GMn8IaEVPtnT z@`RzLV$#eKw90GSNnDe#mlKCJVW@ErIxJL5A#TX)cX_xO6Yj??VesT0+?IRSS#(o4bLqmcKkK- zx%kP#U*Ry-q=y>ti!c~C1pLKoNx!VZD}V8t*Y31EcvbjAU_V}y`>kvYGWXOfye^FF zJA}c)zLQrmL>Tnf;SKZ4*6__39>P#zr12t#8MbhuFAQBR!rMyh&JhfkFzCO85fa{S zLY$~QA0sVzO#?<*2JK`Ufc69Pjv4x7R2J*i-p4ev??r;`}GVg5&bC-OGV!nW0^b#{pYdVf~TZnh1}a( zu~NcN(+RAKxgW`qI;4#oA&CG0e>S^Ka{*uUohd~&~+iqx1e7wP6T=MXZ4 z!N6H$3d6oyWDC1fP9VqpW|t$^F!vbp6z#1#9(TEyk>I5YGtfPwSZ!f;VO8ic)h83+jb_MAbJ`Mr7^Er!h}&?*cDF5#Ne{k9Tq5{5Vx=rDJG zC2k1ADFNJ+*W2fDOTzAyY}^+1RdUcO3|*~6moQkvQL9OtcjK-}Pp06W;<}NK`=VRC zbWP5YoFY6FhD#6Pkp-&f;;C@pXfeXVCwu+q5k@&Mi7Kw|_M^}8%6ni$f6l>x#pG|q zQ(<@70X(zdwe@%|3^iTF3-cQ&z)SO+%AIs!IECX@VdNs4Sd;qp;g^`-RN$%Cutv|F zz$?OVQ8`|H>NULgEAX0x!9W=X2_qT1@w(jGb1+!+rvePI;04uqL)der6mMFfyi5!g zhOPuKOxT@r6>kZ{MJ;&SJX~$SaFb>q#t7jLSIQ7)9-8-Jq|*H^AEP9U94W(SVNX*A z#t5TXKD-l??!{PP-|;Gp69xkpFqREX=_gVRwoj@xl-% z$m@mC3&)V4qJ3M0jiUCWNEH3cX>5|mU}FxFgpo`RVoh3e70L2?D;3+!{d^;~3kS0O z*dgq>auqu*ROMCd5_WR|)h7&b-koY0*pGAx!7&REALgbi0)USRnAc^cwCe4RwLR(UlpQ5&XK%a+zyg=HdyVG#X0u|u4 zgik7~(J6dPuU*1u-9_9H_7-p?E9}d@fP2Ew6|S|KhYQVkAPg5Bz(bQ}(VC7D>pX;@ zs4al7oP+06&?5|`akW+M_s^ow+%IL|u`pa%jse5$3_KNf`}X3QVOlz#3wt>bej$ux zSK*~$Wh#C&52eTOlM?%~4nLdw{>%8qa!?EW;I*;?<%M`f7%e-5R}GH@@S0)m0SppG z_MgD(KX}dXcLy+7!d`z9h6sc8HF!ffP?(N4E!2@#3>Ahm&tRA^npugr43FmGZDFv! z4#S1rzHE#TMs{Z*PS|^x3$G@s^6h=#Zn4|)C?ZsphX6?lk3BS35sS^Isatzaik+Qv*E(|rXsTFpo zHe;4~Xl}ynnDi9p2zvr-YJ~%PxrHTslHl@ z2FsMk&f{1vVR!0rtPn<;DzQ@7Uw9d-gwf(-SYv+K#K#Lm&D`l1KCbm4LD2^ICHT>B51dyrfasbEO=a!v0gcku41S@{wbB_#|?L5q}BtEZS4Z7u_jA zft*9l2T^1`Pn<=u;gKUK5rzxuQ7R19)S%2f>}OkR(o=g-Aq-uqgvYI8r)N)u0lZgm%?H+3459f(PAFX z6{A&&-D|`(6J9xpHVNDJqC>)P@owBO_w!A-DUA9~;g+zc@fdCkyQz)QX~@ltF7t3@ zAMOZ)^@nj+Df}zPv=T;qEx2#E-;W2vP+C47S}3;SkA(e)D-bm4r9B7>!$l3~F=

    0gx#qZ@Qbh~PzpTr8r(=;4qg!soM^$T!k()~@!B)5X>BjWAPGaw2k^RK zODzTq`;X>hh_E}g9&ZSH_{iQAMgzMsRM^XQe3&p8$i!Pp;XaS0nb)-482-#t@PBrc z?cNCUTUm%WVTe=1k%rAjF-jPz&A@13l*7I;!YBI=;~gc@c@kqK3>Q$5RM^cK`*>l5 zU&#cMaxY?{Fj!fGNik_ICM&Lv7ECdBKi6dq16-Ju``rte9&@k8OyNKo6$XUSf~%Nq z9@r(!5%wRd$6VnL)hU=~evi`7moU_rhK0h&sl8Za9=I{E#H8#QmzuQjFqR4XaxY`K zdB{A36^2}pS}6>rRbZ8{JMBEy#H5E2FYG;a0PBU(;*&@)tZT+bVI(gLiNfHON^CL= zR3S+<@+G&t%GY`AZBGs_G z9_i+xpcxs4hg*>;3>Rl0Ti91sj2vO4^$>D}-KkfQCya70CSMrxRii*Sa3~K&!eC_` zip@hK+gxE}_eGRiA&#TW+^<|jxiHKnhYDeL+F|&G(egd06h7ITi7H{}Nu#+wEQ>%!boNVn#@CE3R+B>O@&lpKbJbL zne-%8Qk8`}2hkyFKa3l44yPZ)P0L{eZb|qgn*&^PFUUiuAz!;LVKiqC?g;y`>TuWa zY60#E``M!2H|crmrV7JZXYtTHR8--SFm#3YlQdjiiSj2_`ig0Q!0FJ1^Ab1(L#h2r|?kHX-U7W||LuAj!w zCaf>PFTx1to%nBFi`!eu7FQTKP=!~8-6?Ew4O7!GNZ40<8LtcbD-UC^FnstthM3=! zV!ZL+v=otpHzka6xINUoX70u?!-8VGC5&Vq#oNN(%KaE_n6(EZgx#qPh%>)?3o+7g zKTS0X2huNKw9>u723NvxVI|%%_o8}?6^7DkFizOpdK%+}K@M>y2>UJ^$3$TyoB9R9 zFnhMi!YBLFF-7?JVmYP?`>yW6G-ctNR!o<$w`31y8fG*g;z00*thor z=Egjn#5`fJ{sa~%+O9KLXu`?>76~J}snZ{m`mt0P6Ffn*6I`wFp5!dpIUw?O4B*kSIim$6egP@axm!fuP?7Mmd z8N&X zDMPK{z7o_4pYZwC3kQzmp~0jV>k$x!vkTEA>^sHHF=4l_6s^MEg3Guj3|4aK);w@? zu){EQA8rVrq!#0*`Mr{gTf*M-%42uCvZ=~aN!}` zkKt)NFt10Cb9iVX&qSeZp{78XgP(a<~8k z%EG@E;;DrF#aHo681h}fb73^|GF}M7nP>4*`1n#aeiR0;RNyCJ$afe&o8O`Ueo+eF zR0Gew7B_roFJ2Kw4qU;jhP4;)ny|lh9|oB;?+9Ku^zX;u=U&^|mVzM?_EqtKfVuC^ z#GAq}7q^BAL)_gN7Q?f6OBk%8@gl>TOBgQfPC1PchMZQ!nTN)7jEwoEI#_RR^rBVni|9b?Ts&5v=y$nF5f8#a|LSeKa6N`ks)sSgQixT;VEsj` z5e^)xMZDqBbgUP4r_>?Aq{S)NC=3_)ktmE@uEQo_KbL=!gdtx#wg?|zI)`NQThoYb z!f5FkY>!C~VTUkOPdx--cgjWV5)PDclq>9SJ&aUg@NzcNh0*tSkid=hHe8`iqrzIQt5`M+&O(YCb%cn@f>$NDBusbykCBj~+A(je5jTI;p z_Ha$ITo}$ii3&w|Ycn-v&Ao^xt%SW?!Z;)BX*`N*VgC`T%35AeqgKLDSp({X zeN@`17Y45sp+PuMQj35v(o&8l^WZ;*mKa_}t9i&ffNR3g)stuw_WQEXA$)xC5^e~) zQ|oY3xw(~xTN3s(r{K0QQl5%Vxqn-NE(t?av%X{4Qh>X{VEt~~GbtmuZ&J3G4}=5h z<#=dV?8hTvPvdR`l|ox7!ZBeMdW1p$ZbXIQ!UFUOqlc*DWl~=i2807geRyi6xr}EL z_EXR0xv=+g30?>z*~jtH@EAAC3@?=7Ct?5YefU}U;4Eyh1 zL)}@0wjYu+oF5Z~5iSW&GHGTJCL5mSGOl5HBc>{@jx(4h zVQ+30rVE3CM$8n3_=3z5_B8o1TNpW*f;qyztn-*F3|>jcJYje0X)G|mnTN4Z*k8nD z1*^s)ERpa@HI2{+`zp?2nQ-8EEtU(TH0L3wzHVMvmOS z;Sg8CzZBLZPZ-(H_Es3=>5T$m-*K+v8gk2^SP`^QHNiq<9zm(F_i`1=EP*sju!kTL^ zzo#1zh)Iv4NjPw*8ZCyYwP+QFd!Lk?_nQq8D^fq zZDC){adZl!M|iBu9tn@~OBlM+jJp!v+KYP<_OMg9FYNZQ#T78Oq$z_ zph@d^wo4ce)SyS$&EptR^P6!PeZszm-FPev)||(Hu%|g2Ple%vOgs}tPt@SK`MsQv z7v>@R5MBzOaQye9J!@XeBw>ilX+KN&r?dD)!hR0f@Iqr;e#fr}pBya3t1q;QdMsxj zUXwV=Rf9o>jYW9sIZ6QsbRuM%^tjE z*m4PP3;WKW!*D~M)fge{Y2AxBVW_bhBZd80=@=!9?B0vf!fRH#3l!XybJJs^lD|vdkHasp&GRg#+mU)Ci-#qo_4J*Ni%0UrG_`g-_1&K$!4{79O7!M*Q_? z5(Y1|phXzY%tEWMkIl|ClNO|*&5&oJI)u@jEZi{ZK3dKaM*J1HWgaeM;kK}+ITxM6 zAeAb*g#8u8xT6C9IRkek47E_@Rv5WZi2K6c(iA)p2Dz;LP}p}M3y*|+#!DC^tfhWVv@9|4`Dva#DfMCsyAk*wUZWeLs^lxe?$ivtYM4@h*Mz}~Tv!nH zvg>-?q?cMTSQsumfg!?ZW&m%PbWZ`^6h`)3!BAnS=^%!gw6+y*8FG~Jwy=jwD~<>k1^(9Uj^O~My~G0SYdCy595TvE5#Te z!zN4+hNzV|(WIA}F-aJyIf}``Xzd|P5%wRVW|*+2nI>7x@8wENH@__vm?`YrdlIwE z!-c(=9YY`HnBS~}m@Di(+=O|;{zElbU>Y6-ge&Z?EJC)hrQ6!A!a52}UEJcZ7>v5C{`#6;<6F$CFi*ob8m%l>T%XY|bQch|s4Qsf4CXCiM z)_@e+I(lA!8qUgad_{ zh?-wM_CDd0+Cn@w4<)BDAPkpY#ZzIFJBQB<>niYE*xR}rFD&OpcqzJ-iy!42xmu2& zg#86v%@sb`TZLZ?PuBx4y^piwGG2MwlKR9G;tR5%?a40CmRw6KSV&&LQOdz$f%Fw{cZ2Ex9oa~LNKQ}=VcVd^1F5cW14 z#6;m^KAcIykgpJvg~5}4Oi^yW&cjr5Z*9OdVYrNAU17wjhzSRHesq?wn+LCED}w8^ z03%^f(*ex2PzSOx&(NQR1;W1bg;*%;FQG{+;p5s;EKywFG-0WQ%JgHIVRjal%l*$B z=o*z{rJQ?P*xm|*d|+#Yp=OSAg?+6Y=PFc32@*sPk7A?wq*70!A=e5v2_yS2B1!n< zpdVWl>iSV6i~eOlw#hkofhQgexmdBou;v1G3L}0Vc@XwG?J!~A=?h3T4@XmxZdhA@ z4D)dO2r`w}H-pTjdI)<(H8s?5}P?zA)%-L4oiuRR1UvMv6*NEQ}VN zM~SeX!<|y$lT$e;Q@WjHD3>rq^8*#a-c~>S!oI5qP$`TYFF=(-eU*(f5(aCkP%Z4H zeUKVq@9`Sc8rD~!P8jAFRxj*7T80K;wDbr9=HcjhGzlXCwza}g^F_1@dkgAtP1x6v zjW%Kb#gphTY-+|0!y~(KQyAULqq@RSnGd&3+H?k;hL`rDOW1e*DDIf_LOJf5bbl%C z3Hwj*u&(eg={)x!3|3vmLt!*<9FNSe?-+urvIngQo7cKp^qBi$wzk4Z3e6vw2Tn&G z8#a_+Kp48xgs0{=;|QJ^Hc=y2*xOWr7fP&yX0;^jJ6nVwCA=QMPomCD{4D2CGxftP zrgKMufAgBy{_=Bp#qh)ty!vlm6a42Aye46oN+*LP{BAE^HzAi21`B(5q;ZIa=eYO{ zdHwn<-jpzMxgJA>(fTqB6ZX~B<1Gtyyb*5;gY}gdt_Z&5!7|ZpwzQ%<$1zeKdwDEr zl*M#452J+x>|MuLOsyC2jxfaI4P%87YA=lwhS_9}7xpw(VuCPQ#*H&!?}dw)B<#Cd zhRNpPAXgPkdSW-G3Ljr=!8BoaN*<;QBm4JZrj;QFvn1?kuEA_`KTwQ0!f5e1%oPUf z8!*p2?5@WG!p?087s`)SB8~_m$I-* z7`d<;YlMH{S-kjv)B4iUgGaGm;-H`7TVe11R%}!ff8K*c2?r`@4?!5oK8++pDhh3} zP+X8t7KX|WVw-trt-*HT4=o3;5)tjxSabiHnzKS&b?w> zUB{Bm%3@iA0e`JiV#sCAip^^BY#Gtx=E+qHI61_ck=Kwl<_pM)aVdwqjW#maG_rZT zD9(#%Y>832jk1xIksWc!N53L^XN#zcBbJhPMZa4@O$;(=xJD+)_Qjc7!-0`sm8Ljk zhPWk85-l8y@iq@e_zz%SLG*dXLRSo(B+kXCmO@XQuyyuQoN`foCB|i*%_Rn`!wlHkSW5>m_?%+)WFB%NEE%?|iQJy$)hypDnrKgg59zj0+Av#W2^# Rml$yE_AQRttp58m_yeWbYZ3qe literal 0 HcmV?d00001 diff --git a/quantize.cpp b/quantize.cpp index 07db33a3c..b90f34f48 100644 --- a/quantize.cpp +++ b/quantize.cpp @@ -44,7 +44,7 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna return false; } - gpt_vocab vocab; + llama_vocab vocab; printf("%s: loading model from '%s'\n", __func__, fname_inp.c_str()); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 000000000..a2c1e3fa2 --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,4 @@ +set(TEST_TARGET test-tokenizer-0) +add_executable(${TEST_TARGET} ${TEST_TARGET}.cpp) +target_link_libraries(${TEST_TARGET} PRIVATE utils) +add_test(NAME ${TEST_TARGET} COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab.bin) diff --git a/tests/test-tokenizer-0.cpp b/tests/test-tokenizer-0.cpp new file mode 100644 index 000000000..6bc49f281 --- /dev/null +++ b/tests/test-tokenizer-0.cpp @@ -0,0 +1,69 @@ +#include "utils.h" + +#include +#include +#include + +static const std::map> k_tests = { + { "Hello World", { 1, 10994, 2787, }, }, + { " Hello World", { 1, 15043, 2787, }, }, + { " Hello World!", { 1, 15043, 2787, 29991, }, }, + { " this is 🦙.cpp", { 1, 445, 338, 29871, 243, 162, 169, 156, 29889, 8223, }, }, + { "w048 7tuijk dsdfhu", { 1, 29893, 29900, 29946, 29947, 29871, 29955, 9161, 13535, 18031, 2176, 6905, }, }, + { "нещо на Български", { 1, 821, 4851, 665, 1386, 29713, 1305, }, }, +}; + +int main(int argc, char **argv) { + if (argc < 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + const std::string fname = argv[1]; + + fprintf(stderr, "%s : reading vocab from: '%s'\n", __func__, fname.c_str()); + + llama_vocab vocab; + + if (!llama_vocab_load(fname, vocab)) { + fprintf(stderr, "%s : failed to load vocab from: '%s'\n", __func__, fname.c_str()); + return 1; + } + + const int n_vocab = vocab.id_to_token.size(); + + if (n_vocab != 32000) { + fprintf(stderr, "%s : expected 32000 tokens, got %d\n", __func__, n_vocab); + return 2; + } + + for (const auto & test_kv : k_tests) { + const auto res = llama_tokenize(vocab, test_kv.first, true); + + bool correct = res.size() == test_kv.second.size(); + + for (int i = 0; i < (int) res.size() && correct; ++i) { + if (res[i] != test_kv.second[i]) { + correct = false; + } + } + + if (!correct) { + fprintf(stderr, "%s : failed test: '%s'\n", __func__, test_kv.first.c_str()); + fprintf(stderr, "%s : expected tokens: ", __func__); + for (const auto & t : test_kv.second) { + fprintf(stderr, "%6d, ", t); + } + fprintf(stderr, "\n"); + fprintf(stderr, "%s : got tokens: ", __func__); + for (const auto & t : res) { + fprintf(stderr, "%6d, ", t); + } + fprintf(stderr, "\n"); + + return 3; + } + } + + return 0; +} diff --git a/utils.cpp b/utils.cpp index 188f114e9..4843b4f55 100644 --- a/utils.cpp +++ b/utils.cpp @@ -240,61 +240,6 @@ std::map json_parse(const std::string & fname) { return result; } -std::vector gpt_tokenize(const gpt_vocab & vocab, const std::string & text) { - std::vector words; - - // first split the text into words - { - std::string str = text; - std::string pat = R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)"; - - std::regex re(pat); - std::smatch m; - - while (std::regex_search(str, m, re)) { - for (auto x : m) { - words.push_back(x); - } - str = m.suffix(); - } - } - - // find the longest tokens that form the words: - std::vector tokens; - for (const auto & word : words) { - if (word.size() == 0) continue; - - int i = 0; - int n = word.size(); - while (i < n) { - int j = n; - while (j > i) { - auto it = vocab.token_to_id.find(word.substr(i, j-i)); - if (it != vocab.token_to_id.end()) { - tokens.push_back(it->second); - i = j; - break; - } - --j; - } - if (i == n) { - break; - } - if (j == i) { - auto sub = word.substr(i, 1); - if (vocab.token_to_id.find(sub) != vocab.token_to_id.end()) { - tokens.push_back(vocab.token_to_id.at(sub)); - } else { - fprintf(stderr, "%s: unknown token '%s'\n", __func__, sub.data()); - } - ++i; - } - } - } - - return tokens; -} - static size_t utf8_len(char src) { const size_t lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 }; uint8_t highbits = static_cast(src) >> 4; @@ -305,7 +250,8 @@ struct llama_sp_symbol { using index = int; index prev; index next; - std::string_view text; + const char * text; + size_t n; }; struct llama_sp_bigram { @@ -322,19 +268,23 @@ struct llama_sp_bigram { size_t size; }; +// original implementation: +// https://github.com/ggerganov/llama.cpp/commit/074bea2eb1f1349a0118239c4152914aecaa1be4 struct llama_tokenizer { - llama_tokenizer(const gpt_vocab & vocab): vocab_(vocab) {} + llama_tokenizer(const llama_vocab & vocab): vocab_(vocab) {} - void tokenize(std::string_view text, std::vector & output) { + void tokenize(const std::string & text, std::vector & output) { // split string into utf8 chars int index = 0; - while (!text.empty()) { + size_t offs = 0; + while (offs < text.size()) { llama_sp_symbol sym; - size_t char_len = std::min(text.size(), utf8_len(text.data()[0])); - sym.text = std::string_view(text.data(), char_len); + size_t char_len = std::min(text.size() - offs, utf8_len(text[offs])); + sym.text = text.c_str() + offs; + sym.n = char_len; + offs += char_len; sym.prev = index - 1; - text.remove_prefix(char_len); - sym.next = text.empty() ? -1 : index + 1; + sym.next = offs == text.size() ? -1 : index + 1; index++; symbols_.emplace_back(std::move(sym)); } @@ -353,14 +303,16 @@ struct llama_tokenizer { auto & right_sym = symbols_[bigram.right]; // if one of the symbols already got merged, skip it. - if (left_sym.text.empty() || right_sym.text.empty() || - left_sym.text.size() + right_sym.text.size() != bigram.size) { + if (left_sym.n == 0 || right_sym.n == 0 || + left_sym.n + right_sym.n != bigram.size) { continue; } // merge the right sym into the left one - left_sym.text = std::string_view(left_sym.text.data(), left_sym.text.size() + right_sym.text.size()); - right_sym.text = std::string_view(""); + left_sym.n += right_sym.n; + right_sym.n = 0; + + //printf("left = '%*s' size = %zu\n", (int) left_sym.n, left_sym.text, bigram.size); // remove the right sym from the chain left_sym.next = right_sym.next; @@ -374,13 +326,13 @@ struct llama_tokenizer { } for (int i = 0; i != -1; i = symbols_[i].next) { - auto& symbol = symbols_[i]; - auto token = vocab_.token_to_id.find(std::string(symbol.text)); + auto & symbol = symbols_[i]; + auto token = vocab_.token_to_id.find(std::string(symbol.text, symbol.n)); if (token == vocab_.token_to_id.end()) { // output any symbols that did not form tokens as bytes. - for (int j = 0; j < symbol.text.size(); ++j) { - gpt_vocab::id token_id = static_cast(symbol.text[j]) + 3; + for (int j = 0; j < (int) symbol.n; ++j) { + llama_vocab::id token_id = static_cast(symbol.text[j]) + 3; output.push_back(token_id); } } else { @@ -395,8 +347,8 @@ private: return; } - std::string_view text(symbols_[left].text.data(), symbols_[left].text.size() + symbols_[right].text.size()); - auto token = vocab_.token_to_id.find(std::string(text)); + const std::string text = std::string(symbols_[left].text, symbols_[left].n + symbols_[right].n); + auto token = vocab_.token_to_id.find(text); if (token == vocab_.token_to_id.end()) { return; @@ -416,14 +368,52 @@ private: work_queue_.push(bigram); } - const gpt_vocab & vocab_; + const llama_vocab & vocab_; std::vector symbols_; llama_sp_bigram::queue work_queue_; }; -std::vector llama_tokenize(const gpt_vocab & vocab, std::string_view text, bool bos) { +// TODO: temporary code duplication with llama.cpp +// will resolve after #77 is merged +bool llama_vocab_load(const std::string & fname, llama_vocab & vocab) { + std::ifstream fin(fname, std::ios::binary); + if (!fin.is_open()) { + return false; + } + + int n_vocab = 0; + fin.read((char *) &n_vocab, sizeof(n_vocab)); + + std::string word; + std::vector tmp(64); + + for (int i = 0; i < n_vocab; i++) { + uint32_t len; + fin.read((char *) &len, sizeof(len)); + + word.resize(len); + if (len > 0) { + tmp.resize(len); + fin.read(tmp.data(), len); + word.assign(tmp.data(), len); + } else { + word.clear(); + } + + float score; + fin.read((char *) &score, sizeof(score)); + + vocab.token_to_id[word] = i; + vocab.id_to_token[i] = word; + vocab.score[i] = score; + } + + return true; +} + +std::vector llama_tokenize(const llama_vocab & vocab, const std::string & text, bool bos) { llama_tokenizer tokenizer(vocab); - std::vector output; + std::vector output; if (text.size() == 0) { return output; @@ -437,42 +427,22 @@ std::vector llama_tokenize(const gpt_vocab & vocab, std::string_v return output; } -bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab) { - printf("%s: loading vocab from '%s'\n", __func__, fname.c_str()); - - vocab.token_to_id = ::json_parse(fname); - - for (const auto & kv : vocab.token_to_id) { - vocab.id_to_token[kv.second] = kv.first; - } - - printf("%s: vocab size = %d\n", __func__, (int) vocab.token_to_id.size()); - - // print the vocabulary - //for (auto kv : vocab.token_to_id) { - // printf("'%s' -> %d\n", kv.first.data(), kv.second); - //} - - return true; -} - - -void sample_top_k(std::vector> & logits_id, int top_k) { +void sample_top_k(std::vector> & logits_id, int top_k) { // find the top K tokens std::partial_sort( logits_id.begin(), logits_id.begin() + top_k, logits_id.end(), - [](const std::pair & a, const std::pair & b) { + [](const std::pair & a, const std::pair & b) { return a.first > b.first; }); logits_id.resize(top_k); } -gpt_vocab::id llama_sample_top_p_top_k( - const gpt_vocab & vocab, +llama_vocab::id llama_sample_top_p_top_k( + const llama_vocab & vocab, const float * logits, - std::vector & last_n_tokens, + std::vector & last_n_tokens, double repeat_penalty, int top_k, double top_p, @@ -480,7 +450,7 @@ gpt_vocab::id llama_sample_top_p_top_k( std::mt19937 & rng) { int n_logits = vocab.id_to_token.size(); - std::vector> logits_id; + std::vector> logits_id; logits_id.reserve(n_logits); { diff --git a/utils.h b/utils.h index 65fe02ba1..971cc0e98 100644 --- a/utils.h +++ b/utils.h @@ -60,7 +60,7 @@ std::string gpt_random_prompt(std::mt19937 & rng); // Vocab utils // -struct gpt_vocab { +struct llama_vocab { using id = int32_t; using token = std::string; @@ -74,34 +74,22 @@ void replace(std::string & str, const std::string & needle, const std::string & // poor-man's JSON parsing std::map json_parse(const std::string & fname); -// split text into tokens -// -// ref: https://github.com/openai/gpt-2/blob/a74da5d99abaaba920de8131d64da2862a8f213b/src/encoder.py#L53 -// -// Regex (Python): -// r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""" -// -// Regex (C++): -// R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)" -// -std::vector gpt_tokenize(const gpt_vocab & vocab, const std::string & text); +// TODO: temporary until #77 is merged, need this now for some tokenizer tests +bool llama_vocab_load(const std::string & fname, llama_vocab & vocab); // TODO: this is probably wrong, but I cannot figure out how this tokenizer works .. // ref: https://github.com/google/sentencepiece -std::vector llama_tokenize(const gpt_vocab & vocab, std::string_view text, bool bos); - -// load the tokens from encoder.json -bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab); +std::vector llama_tokenize(const llama_vocab & vocab, const std::string & text, bool bos); // sample next token given probabilities for each embedding // // - consider only the top K tokens // - from them, consider only the top tokens with cumulative probability > P // -gpt_vocab::id llama_sample_top_p_top_k( - const gpt_vocab & vocab, +llama_vocab::id llama_sample_top_p_top_k( + const llama_vocab & vocab, const float * logits, - std::vector & last_n_tokens, + std::vector & last_n_tokens, double repeat_penalty, int top_k, double top_p, @@ -109,7 +97,7 @@ gpt_vocab::id llama_sample_top_p_top_k( std::mt19937 & rng); // filer to top K tokens from list of logits -void sample_top_k(std::vector> & logits_id, int top_k); +void sample_top_k(std::vector> & logits_id, int top_k); // // Quantization From 8f644a0a859938c787d329d27f98e03c58d7df27 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 21 Mar 2023 17:32:14 +0200 Subject: [PATCH 57/93] Change default repeat_penalty to 1.0 I feel this penalty is not really helping. Especially for the example from the README it makes results pretty bad --- utils.h | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/utils.h b/utils.h index 971cc0e98..4aa7c63b2 100644 --- a/utils.h +++ b/utils.h @@ -13,33 +13,32 @@ // struct gpt_params { - int32_t seed = -1; // RNG seed - int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()); - int32_t n_predict = 128; // new tokens to predict + int32_t seed = -1; // RNG seed + int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()); + int32_t n_predict = 128; // new tokens to predict int32_t repeat_last_n = 64; // last n tokens to penalize - int32_t n_ctx = 512; //context size - bool memory_f16 = false; // use f16 instead of f32 for memory kv + int32_t n_ctx = 512; //context size // sampling parameters int32_t top_k = 40; float top_p = 0.95f; float temp = 0.80f; - float repeat_penalty = 1.30f; + float repeat_penalty = 1.10f; int32_t n_batch = 8; // batch size for prompt processing - std::string model = "models/lamma-7B/ggml-model.bin"; // model path - std::string prompt = ""; + std::string model = "models/lamma-7B/ggml-model.bin"; // model path + std::string prompt = ""; - bool random_prompt = false; - - bool use_color = false; // use color to distinguish generations and inputs - - bool interactive = false; // interactive mode - bool interactive_start = false; // reverse prompt immediately std::vector antiprompt; // string upon seeing which more user input is prompted - bool instruct = false; // instruction mode (used for Alpaca models) - bool ignore_eos = false; // do not stop generating after eos + + bool memory_f16 = false; // use f16 instead of f32 for memory kv + bool random_prompt = false; // do not randomize prompt if none provided + bool use_color = false; // use color to distinguish generations and inputs + bool interactive = false; // interactive mode + bool interactive_start = false; // reverse prompt immediately + bool instruct = false; // instruction mode (used for Alpaca models) + bool ignore_eos = false; // do not stop generating after eos }; bool gpt_params_parse(int argc, char ** argv, gpt_params & params); From e0ffc861fae5ac8b40ce973f822d03db02929d36 Mon Sep 17 00:00:00 2001 From: Kevin Kwok Date: Tue, 21 Mar 2023 08:34:49 -0700 Subject: [PATCH 58/93] Update IPFS links to quantized alpaca with new tokenizer format (#352) --- README.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index c7e5d33c7..349e82c56 100644 --- a/README.md +++ b/README.md @@ -192,17 +192,16 @@ First, download the `ggml` Alpaca model into the `./models` folder: ``` # use one of these -# NOTE: these are copied from the alpaca.cpp repo - not sure how long these will work # TODO: add a script to simplify the download -curl -o ggml-alpaca-7b-q4.bin -C - https://gateway.estuary.tech/gw/ipfs/QmQ1bf2BTnYxq73MFJWu1B7bQ2UD6qG7D7YDCxhTndVkPC -curl -o ggml-alpaca-7b-q4.bin -C - https://ipfs.io/ipfs/QmQ1bf2BTnYxq73MFJWu1B7bQ2UD6qG7D7YDCxhTndVkPC -curl -o ggml-alpaca-7b-q4.bin -C - https://cloudflare-ipfs.com/ipfs/QmQ1bf2BTnYxq73MFJWu1B7bQ2UD6qG7D7YDCxhTndVkPC +curl -o ggml2-alpaca-7b-q4.bin -C - https://gateway.estuary.tech/gw/ipfs/QmUp1UGeQFDqJKvtjbSYPBiZZKRjLp8shVP9hT8ZB9Ynv1 +curl -o ggml2-alpaca-7b-q4.bin -C - https://ipfs.io/ipfs/QmUp1UGeQFDqJKvtjbSYPBiZZKRjLp8shVP9hT8ZB9Ynv1 +curl -o ggml2-alpaca-7b-q4.bin -C - https://cloudflare-ipfs.com/ipfs/QmUp1UGeQFDqJKvtjbSYPBiZZKRjLp8shVP9hT8ZB9Ynv1 ``` Now run the `main` tool like this: ``` -./main -m ./models/ggml-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins +./main -m ./models/ggml2-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins ``` Sample run: From 975d2cebf97ce888fa0aeee6f5ac774d7135891f Mon Sep 17 00:00:00 2001 From: anzz1 Date: Tue, 21 Mar 2023 17:42:43 +0200 Subject: [PATCH 59/93] cmdline option for custom amount of model parts (--n_parts N) (#348) * cmdline option for custom amount of model parts (--n_parts N) * Update main.cpp --------- Co-authored-by: Georgi Gerganov --- main.cpp | 11 +++++++---- utils.cpp | 3 +++ utils.h | 3 ++- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/main.cpp b/main.cpp index e97611e28..662a2a79b 100644 --- a/main.cpp +++ b/main.cpp @@ -90,7 +90,8 @@ struct llama_model { }; // load the model's weights from a file -bool llama_model_load(const std::string & fname, llama_model & model, llama_vocab & vocab, int n_ctx, ggml_type memory_type = GGML_TYPE_F32) { + +bool llama_model_load(const std::string & fname, llama_model & model, llama_vocab & vocab, int n_ctx, int n_parts, ggml_type memory_type = GGML_TYPE_F32) { fprintf(stderr, "%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str()); std::vector f_buf(1024*1024); @@ -127,7 +128,6 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca } int n_ff = 0; - int n_parts = 0; // load hparams { @@ -145,7 +145,10 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca hparams.n_ctx = n_ctx; n_ff = ((2*(4*hparams.n_embd)/3 + hparams.n_mult - 1)/hparams.n_mult)*hparams.n_mult; - n_parts = LLAMA_N_PARTS.at(hparams.n_embd); + + if (n_parts < 1) { + n_parts = LLAMA_N_PARTS.at(hparams.n_embd); + } fprintf(stderr, "%s: n_vocab = %d\n", __func__, hparams.n_vocab); fprintf(stderr, "%s: n_ctx = %d\n", __func__, hparams.n_ctx); @@ -839,7 +842,7 @@ int main(int argc, char ** argv) { { const ggml_type memory_type = params.memory_f16 ? GGML_TYPE_F16 : GGML_TYPE_F32; const int64_t t_start_us = ggml_time_us(); - if (!llama_model_load(params.model, model, vocab, params.n_ctx, memory_type)) { + if (!llama_model_load(params.model, model, vocab, params.n_ctx, params.n_parts, memory_type)) { fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str()); return 1; } diff --git a/utils.cpp b/utils.cpp index 4843b4f55..03ed9bc06 100644 --- a/utils.cpp +++ b/utils.cpp @@ -74,6 +74,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { params.antiprompt.push_back(argv[++i]); } else if (arg == "--ignore-eos") { params.ignore_eos = true; + } else if (arg == "--n_parts") { + params.n_parts = std::stoi(argv[++i]); } else if (arg == "-h" || arg == "--help") { gpt_print_usage(argc, argv, params); exit(0); @@ -116,6 +118,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { fprintf(stderr, " --ignore-eos ignore end of stream token and continue generating\n"); fprintf(stderr, " --memory_f16 use f16 instead of f32 for memory key+value\n"); fprintf(stderr, " --temp N temperature (default: %.1f)\n", params.temp); + fprintf(stderr, " --n_parts N number of model parts (default: -1 = determine from dimensions)\n"); fprintf(stderr, " -b N, --batch_size N batch size for prompt processing (default: %d)\n", params.n_batch); fprintf(stderr, " -m FNAME, --model FNAME\n"); fprintf(stderr, " model path (default: %s)\n", params.model.c_str()); diff --git a/utils.h b/utils.h index 4aa7c63b2..c7fce964b 100644 --- a/utils.h +++ b/utils.h @@ -13,10 +13,11 @@ // struct gpt_params { - int32_t seed = -1; // RNG seed + int32_t seed = -1; // RNG seed int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()); int32_t n_predict = 128; // new tokens to predict int32_t repeat_last_n = 64; // last n tokens to penalize + int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions) int32_t n_ctx = 512; //context size // sampling parameters From c3b2306b18a087799acc431e485b8a2e3162cd52 Mon Sep 17 00:00:00 2001 From: Qingyou Meng Date: Tue, 21 Mar 2023 23:44:11 +0800 Subject: [PATCH 60/93] Makefile: slightly cleanup for Mac Intel; echo instead of run ./main -h (#335) --- Makefile | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index dffcdbde7..4becf6e19 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,7 @@ CXXV := $(shell $(CXX) --version | head -n 1) # ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789 ifeq ($(UNAME_S),Darwin) ifneq ($(UNAME_P),arm) - SYSCTL_M := $(shell sysctl -n hw.optional.arm64) + SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null) ifeq ($(SYSCTL_M),1) # UNAME_P := arm # UNAME_M := arm64 @@ -149,9 +149,6 @@ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686)) CFLAGS += -mfma -mf16c -mavx -mavx2 endif endif -ifeq ($(UNAME_M),amd64) - CFLAGS += -mavx -mavx2 -mfma -mf16c -endif ifneq ($(filter ppc64%,$(UNAME_M)),) POWER9_M := $(shell grep "POWER9" /proc/cpuinfo) ifneq (,$(findstring POWER9,$(POWER9_M))) @@ -163,7 +160,8 @@ ifneq ($(filter ppc64%,$(UNAME_M)),) endif endif ifndef LLAMA_NO_ACCELERATE - # Mac M1 - include Accelerate framework + # Mac M1 - include Accelerate framework. + # `-framework Accelerate` works on Mac Intel as well, with negliable performance boost (as of the predict time). ifeq ($(UNAME_S),Darwin) CFLAGS += -DGGML_USE_ACCELERATE LDFLAGS += -framework Accelerate @@ -226,7 +224,7 @@ clean: main: main.cpp ggml.o utils.o $(CXX) $(CXXFLAGS) main.cpp ggml.o utils.o -o main $(LDFLAGS) - ./main -h + @echo "\x1b[36mrun ./main -h for help\x1b[0m" quantize: quantize.cpp ggml.o utils.o $(CXX) $(CXXFLAGS) quantize.cpp ggml.o utils.o -o quantize $(LDFLAGS) From c98ae02668a25916954b1653e25a5a35ca048d63 Mon Sep 17 00:00:00 2001 From: Mack Straight Date: Tue, 21 Mar 2023 08:49:43 -0700 Subject: [PATCH 61/93] fix typo in comment (#318) --- convert-pth-to-ggml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert-pth-to-ggml.py b/convert-pth-to-ggml.py index c506676fc..46f7eba1c 100644 --- a/convert-pth-to-ggml.py +++ b/convert-pth-to-ggml.py @@ -67,7 +67,7 @@ def write_header(fout, hparams, ftype): keys = ["vocab_size", "dim", "multiple_of", "n_heads", "n_layers"] values = [ - 0x67676d66, # magic: ggml in hex + 0x67676d66, # magic: ggmf in hex 1, # file version *[hparams[key] for key in keys], hparams["dim"] // hparams["n_heads"], # rot (obsolete) From 715d292ee0e34d27f27af43d7feaad1f1344981d Mon Sep 17 00:00:00 2001 From: Kevin Lo Date: Tue, 21 Mar 2023 09:50:09 -0600 Subject: [PATCH 62/93] Add OpenBSD support (#314) --- Makefile | 4 ++++ ggml.c | 2 +- utils.cpp | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 4becf6e19..4b923e575 100644 --- a/Makefile +++ b/Makefile @@ -53,6 +53,10 @@ ifeq ($(UNAME_S),NetBSD) CFLAGS += -pthread CXXFLAGS += -pthread endif +ifeq ($(UNAME_S),OpenBSD) + CFLAGS += -pthread + CXXFLAGS += -pthread +endif ifeq ($(UNAME_S),Haiku) CFLAGS += -pthread CXXFLAGS += -pthread diff --git a/ggml.c b/ggml.c index f85138f38..8daac3510 100644 --- a/ggml.c +++ b/ggml.c @@ -2,7 +2,7 @@ #if defined(_MSC_VER) || defined(__MINGW32__) #include // using malloc.h with MSC/MINGW -#elif !defined(__FreeBSD__) && !defined(__NetBSD__) +#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__) #include #endif diff --git a/utils.cpp b/utils.cpp index 03ed9bc06..a3bda1563 100644 --- a/utils.cpp +++ b/utils.cpp @@ -12,7 +12,7 @@ #if defined(_MSC_VER) || defined(__MINGW32__) #include // using malloc.h with MSC/MINGW - #elif !defined(__FreeBSD__) && !defined(__NetBSD__) + #elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__) #include #endif From 3bfa3b43b7319b71853bfc7d3cf4e9767c24bbc8 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 21 Mar 2023 17:59:16 +0200 Subject: [PATCH 63/93] Fix convert script, warnings alpaca instructions, default params --- README.md | 10 +++++----- alpaca.sh | 2 +- convert-pth-to-ggml.py | 8 +++++--- main.cpp | 20 ++++++++++++-------- 4 files changed, 23 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 349e82c56..f0b0c2a5b 100644 --- a/README.md +++ b/README.md @@ -193,15 +193,15 @@ First, download the `ggml` Alpaca model into the `./models` folder: ``` # use one of these # TODO: add a script to simplify the download -curl -o ggml2-alpaca-7b-q4.bin -C - https://gateway.estuary.tech/gw/ipfs/QmUp1UGeQFDqJKvtjbSYPBiZZKRjLp8shVP9hT8ZB9Ynv1 -curl -o ggml2-alpaca-7b-q4.bin -C - https://ipfs.io/ipfs/QmUp1UGeQFDqJKvtjbSYPBiZZKRjLp8shVP9hT8ZB9Ynv1 -curl -o ggml2-alpaca-7b-q4.bin -C - https://cloudflare-ipfs.com/ipfs/QmUp1UGeQFDqJKvtjbSYPBiZZKRjLp8shVP9hT8ZB9Ynv1 +curl -o ./models/ggml-alpaca-7b-q4.bin -C - https://gateway.estuary.tech/gw/ipfs/QmUp1UGeQFDqJKvtjbSYPBiZZKRjLp8shVP9hT8ZB9Ynv1 +curl -o ./models/ggml-alpaca-7b-q4.bin -C - https://ipfs.io/ipfs/QmUp1UGeQFDqJKvtjbSYPBiZZKRjLp8shVP9hT8ZB9Ynv1 +curl -o ./models/ggml-alpaca-7b-q4.bin -C - https://cloudflare-ipfs.com/ipfs/QmUp1UGeQFDqJKvtjbSYPBiZZKRjLp8shVP9hT8ZB9Ynv1 ``` Now run the `main` tool like this: ``` -./main -m ./models/ggml2-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins +./main -m ./models/ggml-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins ``` Sample run: @@ -218,7 +218,7 @@ Sample run: There 26 letters in the English Alphabet > What is the most common way of transportation in Amsterdam? The majority (54%) are using public transit. This includes buses, trams and metros with over 100 lines throughout the city which make it very accessible for tourists to navigate around town as well as locals who commute by tram or metro on a daily basis -> List 5 words that start with "ca". +> List 5 words that start with "ca". cadaver, cauliflower, cabbage (vegetable), catalpa (tree) and Cailleach. > ``` diff --git a/alpaca.sh b/alpaca.sh index 284989bc0..2f36d6f54 100755 --- a/alpaca.sh +++ b/alpaca.sh @@ -3,4 +3,4 @@ # Temporary script - will be removed in the future # -./main -m ./models/ggml-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins --top_k 10000 --temp 0.96 --repeat_penalty 1 -t 7 +./main -m ./models/ggml-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins --top_k 10000 --temp 0.2 --repeat_penalty 1 -t 7 diff --git a/convert-pth-to-ggml.py b/convert-pth-to-ggml.py index 46f7eba1c..db5b00fec 100644 --- a/convert-pth-to-ggml.py +++ b/convert-pth-to-ggml.py @@ -27,9 +27,9 @@ from sentencepiece import SentencePieceProcessor def parse_args(): parser = argparse.ArgumentParser(description='Convert a LLaMA model checkpoint to a ggml compatible file') - parser.add_argument('dir_model', help='directory containing the model checkpoint') - parser.add_argument('ftype', type=int, choices=[0, 1], default=1, help='file type (0: float32, 1: float16)') - parser.add_argument('vocab_only', type=bool, default=False, help='only write vocab to file') + parser.add_argument('dir_model', help='directory containing the model checkpoint') + parser.add_argument('ftype', help='file type (0: float32, 1: float16)', type=int, choices=[0, 1], default=1) + parser.add_argument('vocab_only', help='only write vocab to file', type=int, default=0, nargs='?') return parser.parse_args() def get_n_parts(dim): @@ -135,6 +135,8 @@ def main(): hparams, tokenizer = load_hparams_and_tokenizer(dir_model) + print(args) + # if only writing vocab to file if args.vocab_only: diff --git a/main.cpp b/main.cpp index 662a2a79b..6bae80cdf 100644 --- a/main.cpp +++ b/main.cpp @@ -165,12 +165,20 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca // load vocab { std::string word; + std::vector tmp(64); + for (int i = 0; i < model.hparams.n_vocab; i++) { uint32_t len; fin.read((char *) &len, sizeof(len)); word.resize(len); - fin.read((char *) word.data(), len); + if (len > 0) { + tmp.resize(len); + fin.read(tmp.data(), len); + word.assign(tmp.data(), len); + } else { + word.clear(); + } float score; fin.read((char *) &score, sizeof(score)); @@ -178,10 +186,6 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca vocab.token_to_id[word] = i; vocab.id_to_token[i] = word; vocab.score[i] = score; - - //if (i < 30000) { - // fprintf(stderr, "%s: vocab[%d] = '%s'\n", __func__, i, word.c_str()); - //} } } @@ -974,7 +978,7 @@ int main(int argc, char ** argv) { n_past += embd.size(); embd.clear(); - if (embd_inp.size() <= input_consumed) { + if ((int) embd_inp.size() <= input_consumed) { // out of user input, sample next token const float top_k = params.top_k; const float top_p = params.top_p; @@ -1011,7 +1015,7 @@ int main(int argc, char ** argv) { --remaining_tokens; } else { // some user input remains from prompt or interaction, forward it to processing - while (embd_inp.size() > input_consumed) { + while ((int) embd_inp.size() > input_consumed) { embd.push_back(embd_inp[input_consumed]); last_n_tokens.erase(last_n_tokens.begin()); last_n_tokens.push_back(embd_inp[input_consumed]); @@ -1036,7 +1040,7 @@ int main(int argc, char ** argv) { // in interactive mode, and not currently processing queued inputs; // check if we should prompt the user for more - if (params.interactive && embd_inp.size() <= input_consumed) { + if (params.interactive && (int) embd_inp.size() <= input_consumed) { // check for reverse prompt for (auto antiprompt_inp : antipromptv_inp) { if (antiprompt_inp.size() && std::equal(antiprompt_inp.rbegin(), antiprompt_inp.rend(), last_n_tokens.rbegin())) { From d5f56a5e5a0069329a81f96460221e7afb1daddc Mon Sep 17 00:00:00 2001 From: tjohnman Date: Tue, 21 Mar 2023 17:04:43 +0100 Subject: [PATCH 64/93] Check for reverse prompt by characters instead of tokens (#292) (#330) * Check for reverse prompt by characters instead of tokens (#292) * Update main.cpp Wording. * Cleanup. * Remove unnecessary use of std::stringstream. --------- Co-authored-by: Johnman Co-authored-by: Georgi Gerganov --- main.cpp | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/main.cpp b/main.cpp index 6bae80cdf..bda824ff1 100644 --- a/main.cpp +++ b/main.cpp @@ -885,15 +885,8 @@ int main(int argc, char ** argv) { params.antiprompt.push_back("### Instruction:\n\n"); } - // tokenize the reverse prompt - std::vector> antipromptv_inp; - - for (auto antiprompt : params.antiprompt) { - antipromptv_inp.push_back(::llama_tokenize(vocab, antiprompt, false)); - } - // enable interactive mode if reverse prompt is specified - if (antipromptv_inp.size() != 0) { + if (params.antiprompt.size() != 0) { params.interactive = true; } @@ -917,15 +910,9 @@ int main(int argc, char ** argv) { fprintf(stderr, "%s: interactive mode on.\n", __func__); - if(antipromptv_inp.size()) { - for (size_t apindex = 0; apindex < antipromptv_inp.size(); ++apindex) { - auto antiprompt_inp = antipromptv_inp.at(apindex); - fprintf(stderr, "%s: reverse prompt: '%s'\n", __func__, params.antiprompt.at(apindex).c_str()); - fprintf(stderr, "%s: number of tokens in reverse prompt = %zu\n", __func__, antiprompt_inp.size()); - for (int i = 0; i < (int) antiprompt_inp.size(); i++) { - fprintf(stderr, "%6d -> '%s'\n", antiprompt_inp[i], vocab.id_to_token.at(antiprompt_inp[i]).c_str()); - } - fprintf(stderr, "\n"); + if(params.antiprompt.size()) { + for (auto antiprompt : params.antiprompt) { + fprintf(stderr, "Reverse prompt: '%s'\n", antiprompt.c_str()); } } } @@ -1042,9 +1029,14 @@ int main(int argc, char ** argv) { // check if we should prompt the user for more if (params.interactive && (int) embd_inp.size() <= input_consumed) { // check for reverse prompt - for (auto antiprompt_inp : antipromptv_inp) { - if (antiprompt_inp.size() && std::equal(antiprompt_inp.rbegin(), antiprompt_inp.rend(), last_n_tokens.rbegin())) { - // reverse prompt found + std::string last_output; + for (auto id : last_n_tokens) { + last_output += vocab.id_to_token[id]; + } + + // Check if each of the reverse prompts appears at the end of the output. + for (std::string antiprompt : params.antiprompt) { + if (last_output.find(antiprompt.c_str(), last_output.length() - antiprompt.length(), antiprompt.length()) != std::string::npos) { is_interacting = true; break; } From 6a612959e1b6c37b68b6b141329751a2902b1030 Mon Sep 17 00:00:00 2001 From: tjohnman Date: Tue, 21 Mar 2023 17:05:06 +0100 Subject: [PATCH 65/93] Check for reverse prompt by characters instead of tokens (#292) (#330) * Check for reverse prompt by characters instead of tokens (#292) * Update main.cpp Wording. * Cleanup. * Remove unnecessary use of std::stringstream. --------- Co-authored-by: Johnman Co-authored-by: Georgi Gerganov From dc6a845b8573cd7d06c6b295241d26f311602a1f Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 21 Mar 2023 18:09:37 +0200 Subject: [PATCH 66/93] Add chat.sh script --- README.md | 5 ++++- chat.sh | 6 ++++++ 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100755 chat.sh diff --git a/README.md b/README.md index f0b0c2a5b..62cf82d85 100644 --- a/README.md +++ b/README.md @@ -179,8 +179,11 @@ In this mode, you can always interrupt generation by pressing Ctrl+C and enter o Here is an example few-shot interaction, invoked with the command ``` -./main -m ./models/13B/ggml-model-q4_0.bin -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-bob.txt +# default arguments using 7B model +./chat.sh +# custom arguments using 13B model +./main -m ./models/13B/ggml-model-q4_0.bin -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-bob.txt ``` Note the use of `--color` to distinguish between user input and generated text. diff --git a/chat.sh b/chat.sh new file mode 100755 index 000000000..24a0f10ad --- /dev/null +++ b/chat.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# +# Temporary script - will be removed in the future +# + +./main -m ./models/7B/ggml-model-q4_0.bin -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-bob.txt From 1daf4dd71235dbbf537738e7ad53daad8d97586f Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 21 Mar 2023 18:10:32 +0200 Subject: [PATCH 67/93] Minor style changes --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 62cf82d85..d9a4b1bab 100644 --- a/README.md +++ b/README.md @@ -178,13 +178,15 @@ If you want a more ChatGPT-like experience, you can run in interactive mode by p In this mode, you can always interrupt generation by pressing Ctrl+C and enter one or more lines of text which will be converted into tokens and appended to the current context. You can also specify a *reverse prompt* with the parameter `-r "reverse prompt string"`. This will result in user input being prompted whenever the exact tokens of the reverse prompt string are encountered in the generation. A typical use is to use a prompt which makes LLaMa emulate a chat between multiple users, say Alice and Bob, and pass `-r "Alice:"`. Here is an example few-shot interaction, invoked with the command -``` + +```bash # default arguments using 7B model ./chat.sh # custom arguments using 13B model ./main -m ./models/13B/ggml-model-q4_0.bin -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-bob.txt ``` + Note the use of `--color` to distinguish between user input and generated text. ![image](https://user-images.githubusercontent.com/1991296/224575029-2af3c7dc-5a65-4f64-a6bb-517a532aea38.png) From c86ba036e613d46815501a4c6775117c9fc7afce Mon Sep 17 00:00:00 2001 From: anzz1 Date: Tue, 21 Mar 2023 18:14:46 +0200 Subject: [PATCH 68/93] Enable ANSI colors on Windows 10+ (#311) * Enable ANSI colors on Windows 10+ On older versions function will silently fail without any ill effects * Do not call SetConsoleMode if the mode is already set * Update main.cpp --------- Co-authored-by: Georgi Gerganov --- main.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/main.cpp b/main.cpp index bda824ff1..dd8e52df2 100644 --- a/main.cpp +++ b/main.cpp @@ -20,6 +20,13 @@ #include #endif +#if defined (_WIN32) +#pragma comment(lib,"kernel32.lib") +extern "C" __declspec(dllimport) void* __stdcall GetStdHandle(unsigned long nStdHandle); +extern "C" __declspec(dllimport) int __stdcall GetConsoleMode(void* hConsoleHandle, unsigned long* lpMode); +extern "C" __declspec(dllimport) int __stdcall SetConsoleMode(void* hConsoleHandle, unsigned long dwMode); +#endif + #define ANSI_COLOR_RED "\x1b[31m" #define ANSI_COLOR_GREEN "\x1b[32m" #define ANSI_COLOR_YELLOW "\x1b[33m" @@ -946,6 +953,14 @@ int main(int argc, char ** argv) { // set the color for the prompt which will be output initially if (params.use_color) { +#if defined (_WIN32) + // Enable ANSI colors on Windows 10+ + unsigned long dwMode = 0; + void* hConOut = GetStdHandle((unsigned long)-11); // STD_OUTPUT_HANDLE (-11) + if (hConOut && hConOut != (void*)-1 && GetConsoleMode(hConOut, &dwMode) && !(dwMode & 0x4)) { + SetConsoleMode(hConOut, dwMode | 0x4); // ENABLE_VIRTUAL_TERMINAL_PROCESSING (0x4) + } +#endif printf(ANSI_COLOR_YELLOW); } From f157088cb75f23208abc92b473a132ef3f7a7f15 Mon Sep 17 00:00:00 2001 From: Alex von Gluck IV Date: Tue, 21 Mar 2023 11:21:06 -0500 Subject: [PATCH 69/93] makefile: Fix CPU feature detection on Haiku (#218) --- Makefile | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 4b923e575..071275470 100644 --- a/Makefile +++ b/Makefile @@ -133,20 +133,20 @@ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686)) CFLAGS += -mavx512pf endif else ifeq ($(UNAME_S),Haiku) - AVX1_M := $(shell sysinfo -cpu | grep "AVX ") - ifneq (,$(findstring avx,$(AVX1_M))) + AVX1_M := $(shell sysinfo -cpu | grep -w "AVX") + ifneq (,$(findstring AVX,$(AVX1_M))) CFLAGS += -mavx endif - AVX2_M := $(shell sysinfo -cpu | grep "AVX2 ") - ifneq (,$(findstring avx2,$(AVX2_M))) + AVX2_M := $(shell sysinfo -cpu | grep -w "AVX2") + ifneq (,$(findstring AVX2,$(AVX2_M))) CFLAGS += -mavx2 endif - FMA_M := $(shell sysinfo -cpu | grep "FMA ") - ifneq (,$(findstring fma,$(FMA_M))) + FMA_M := $(shell sysinfo -cpu | grep -w "FMA") + ifneq (,$(findstring FMA,$(FMA_M))) CFLAGS += -mfma endif - F16C_M := $(shell sysinfo -cpu | grep "F16C ") - ifneq (,$(findstring f16c,$(F16C_M))) + F16C_M := $(shell sysinfo -cpu | grep -w "F16C") + ifneq (,$(findstring F16C,$(F16C_M))) CFLAGS += -mf16c endif else From 3ab3e6582f7320c2b6568c892fdfc8215caf7e6c Mon Sep 17 00:00:00 2001 From: Jean-Christophe Hoelt Date: Tue, 21 Mar 2023 18:23:15 +0200 Subject: [PATCH 70/93] Add chatLLaMa script (#198) * Add chatLLaMa script * Fix shellcheck errors and do some cleanup * Move chatLLaMa script to `examples` directory * Reduce chatLLaMa context size to 2048 Ref d7def1a7524f712e5ebb7cd02bab0f13aa56a7f9 * Include n_predict to 2048 in examples/chatLLaMa --- examples/chatLLaMa | 53 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100755 examples/chatLLaMa diff --git a/examples/chatLLaMa b/examples/chatLLaMa new file mode 100755 index 000000000..97c48ac87 --- /dev/null +++ b/examples/chatLLaMa @@ -0,0 +1,53 @@ +#!/bin/bash + +cd "$(dirname "$0")/.." || exit + +MODEL="${MODEL:-./models/13B/ggml-model-q4_0.bin}" +USER_NAME="${USER_NAME:-User}" +AI_NAME="${AI_NAME:-ChatLLaMa}" + +# Adjust to the number of CPU cores you want to use. +N_THREAD="${N_THREAD:-8}" +# Number of tokens to predict (made it larger than default because we want a long interaction) +N_PREDICTS="${N_PREDICTS:-2048}" + +# Note: you can also override the generation options by specifying them on the command line: +# For example, override the context size by doing: ./chatLLaMa --ctx_size 1024 +GEN_OPTIONS="${GEN_OPTIONS:---ctx_size 2048 --temp 0.7 --top_k 40 --top_p 0.5 --repeat_last_n 256 --repeat_penalty 1.17647}" + +# shellcheck disable=SC2086 # Intended splitting of GEN_OPTIONS +./main $GEN_OPTIONS \ + --model "$MODEL" \ + --threads "$N_THREAD" \ + --n_predict "$N_PREDICTS" \ + --color --interactive \ + --reverse-prompt "${USER_NAME}:" \ + --prompt " +Text transcript of a never ending dialog, where ${USER_NAME} interacts with an AI assistant named ${AI_NAME}. +${AI_NAME} is helpful, kind, honest, friendly, good at writing and never fails to answer ${USER_NAME}’s requests immediately and with details and precision. +There are no annotations like (30 seconds passed...) or (to himself), just what ${USER_NAME} and ${AI_NAME} say alound to each other. +The dialog lasts for years, the entirety of it is shared below. It's 10000 pages long. +The transcript only includes text, it does not include markup like HTML and Markdown. + +$USER_NAME: Hello, $AI_NAME! +$AI_NAME: Hello $USER_NAME! How may I help you today? +$USER_NAME: What time is it? +$AI_NAME: It is $(date +%H:%M). +$USER_NAME: What year is it? +$AI_NAME: We are in $(date +%Y). +$USER_NAME: Please tell me the largest city in Europe. +$AI_NAME: The largest city in Europe is Moscow, the capital of Russia. +$USER_NAME: What can you tell me about Moscow? +$AI_NAME: Moscow, on the Moskva River in western Russia, is the nation’s cosmopolitan capital. In its historic core is the Kremlin, a complex that’s home to the president and tsarist treasures in the Armoury. Outside its walls is Red Square, Russia’s symbolic center. +$USER_NAME: What is a cat? +$AI_NAME: A cat is a domestic species of small carnivorous mammal. It is the only domesticated species in the family Felidae. +$USER_NAME: How do I pass command line arguments to a Node.js program? +$AI_NAME: The arguments are stored in process.argv. + + argv[0] is the path to the Node. js executable. + argv[1] is the path to the script file. + argv[2] is the first argument passed to the script. + argv[3] is the second argument passed to the script and so on. +$USER_NAME: Name a color. +$AI_NAME: Blue +$USER_NAME:" "$@" From 486ae645fd3eda8b9d7413d5ff34fb65a3e337fb Mon Sep 17 00:00:00 2001 From: Gary Linscott Date: Tue, 21 Mar 2023 09:27:42 -0700 Subject: [PATCH 71/93] Compute perplexity over prompt (#270) * Compute perplexity over prompt * More accurate perplexity calculation - over all logits in the context window (so 512x more tokens!) * Output all perplexitiies * Add timing/ETA --- main.cpp | 103 ++++++++++++++++++++++++++++++++++++++++++++++++------ utils.cpp | 7 ++-- utils.h | 1 + 3 files changed, 98 insertions(+), 13 deletions(-) diff --git a/main.cpp b/main.cpp index dd8e52df2..9f46d5698 100644 --- a/main.cpp +++ b/main.cpp @@ -560,7 +560,8 @@ bool llama_eval( const int n_past, const std::vector & embd_inp, std::vector & embd_w, - size_t & mem_per_token) { + size_t & mem_per_token, + bool return_all_logits = false) { const int N = embd_inp.size(); const auto & hparams = model.hparams; @@ -578,7 +579,7 @@ bool llama_eval( static void * buf = malloc(buf_size); if (mem_per_token > 0 && mem_per_token*N > buf_size) { - const size_t buf_size_new = 1.1*(mem_per_token*N); // add 10% to account for ggml object overhead + const size_t buf_size_new = 1.3*(mem_per_token*N); // add 30% to account for ggml object overhead //fprintf(stderr, "\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_size, buf_size_new); // reallocate @@ -764,9 +765,14 @@ bool llama_eval( //embd_w.resize(n_vocab*N); //memcpy(embd_w.data(), ggml_get_data(inpL), sizeof(float)*n_vocab*N); - // return result for just the last token - embd_w.resize(n_vocab); - memcpy(embd_w.data(), (float *) ggml_get_data(inpL) + (n_vocab*(N-1)), sizeof(float)*n_vocab); + if (return_all_logits) { + embd_w.resize(n_vocab * N); + memcpy(embd_w.data(), (float *) ggml_get_data(inpL), sizeof(float)*n_vocab*N); + } else { + // return result for just the last token + embd_w.resize(n_vocab); + memcpy(embd_w.data(), (float *) ggml_get_data(inpL) + (n_vocab*(N-1)), sizeof(float)*n_vocab); + } if (mem_per_token == 0) { mem_per_token = ggml_used_mem(ctx0)/N; @@ -778,6 +784,76 @@ bool llama_eval( return true; } +std::vector softmax(const std::vector& logits) { + std::vector probs(logits.size()); + float max_logit = logits[0]; + for (float v : logits) max_logit = std::max(max_logit, v); + double sum_exp = 0.0; + for (size_t i = 0; i < logits.size(); i++) { + // Subtract the maximum logit value from the current logit value for numerical stability + float logit = logits[i] - max_logit; + double exp_logit = std::exp(logit); + sum_exp += exp_logit; + probs[i] = exp_logit; + } + for (size_t i = 0; i < probs.size(); i++) probs[i] /= sum_exp; + return probs; +} + +void perplexity(const llama_vocab &vocab, const llama_model &model, const gpt_params ¶ms, size_t mem_per_token) { + // Download: https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip?ref=salesforce-research + // Run `./main --perplexity -m models/7B/ggml-model-q4_0.bin -f wiki.test.raw` + // Output: `perplexity: 13.5106 [114/114]` + std::vector tokens = ::llama_tokenize(vocab, params.prompt, true); + + int count = 0; + double nll = 0.0; + int seq_count = tokens.size() / params.n_ctx; + printf("Calculating perplexity over %d chunks\n", seq_count); + for (int i = 0; i < seq_count; ++i) { + int start = i * params.n_ctx; + int end = start + params.n_ctx - 1; + std::vector embd(tokens.begin() + start, tokens.begin() + end); + std::vector logits; + auto start_t = std::chrono::high_resolution_clock::now(); + if (!llama_eval(model, params.n_threads, 0, embd, logits, mem_per_token, true)) { + fprintf(stderr, "Failed to predict\n"); + return; + } + auto end_t = std::chrono::high_resolution_clock::now(); + if (i == 0) { + double seconds = std::chrono::duration(end_t - start_t).count(); + printf("%.2f seconds per pass - ETA %.2f hours\n", seconds, (seconds * seq_count) / (60.0*60.0)); + } + // We get the logits for all the tokens in the context window (params.n_ctx) + // from llama_eval above. Now, based on https://huggingface.co/docs/transformers/perplexity, + // calculate the perplexity over the last half the window (so the model always has + // some context to predict the token). + // + // We rely on the fact that attention in the forward pass only looks at previous + // tokens here, so the logits returned for each token are an accurate representation + // of what the model would have predicted at that point. + // + // Example, we have a context window of 512, we will compute perplexity for each of the + // last 256 tokens. Then, we split the input up into context window size chunks to + // process the entire prompt. + for (int j = params.n_ctx / 2; j < params.n_ctx - 1; ++j) { + // Calculate probability of next token, given the previous ones. + int n_vocab = model.hparams.n_vocab; + std::vector tok_logits( + logits.begin() + j * n_vocab, + logits.begin() + (j + 1) * n_vocab); + double prob = softmax(tok_logits)[tokens[start + j + 1]]; + nll += -std::log(prob); + ++count; + } + // perplexity is e^(average negative log-likelihood) + printf("[%d]%.4lf,", i + 1, std::exp(nll / count)); + fflush(stdout); + } + printf("\n"); +} + static bool is_interacting = false; #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32) @@ -868,13 +944,22 @@ int main(int argc, char ** argv) { params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info()); } + std::vector logits; + + // determine the required inference memory per token: + size_t mem_per_token = 0; + llama_eval(model, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token); + + if (params.perplexity) { + perplexity(vocab, model, params, mem_per_token); + exit(0); + } + int n_past = 0; int64_t t_sample_us = 0; int64_t t_predict_us = 0; - std::vector logits; - // Add a space in front of the first character to match OG llama tokenizer behavior params.prompt.insert(0, 1, ' '); // tokenize the prompt @@ -928,10 +1013,6 @@ int main(int argc, char ** argv) { std::vector embd; - // determine the required inference memory per token: - size_t mem_per_token = 0; - llama_eval(model, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token); - int last_n_size = params.repeat_last_n; std::vector last_n_tokens(last_n_size); std::fill(last_n_tokens.begin(), last_n_tokens.end(), 0); diff --git a/utils.cpp b/utils.cpp index a3bda1563..7c6864c8f 100644 --- a/utils.cpp +++ b/utils.cpp @@ -72,6 +72,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { params.use_color = true; } else if (arg == "-r" || arg == "--reverse-prompt") { params.antiprompt.push_back(argv[++i]); + } else if (arg == "--perplexity") { + params.perplexity = true; } else if (arg == "--ignore-eos") { params.ignore_eos = true; } else if (arg == "--n_parts") { @@ -120,6 +122,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { fprintf(stderr, " --temp N temperature (default: %.1f)\n", params.temp); fprintf(stderr, " --n_parts N number of model parts (default: -1 = determine from dimensions)\n"); fprintf(stderr, " -b N, --batch_size N batch size for prompt processing (default: %d)\n", params.n_batch); + fprintf(stderr, " --perplexity compute perplexity over the prompt\n"); fprintf(stderr, " -m FNAME, --model FNAME\n"); fprintf(stderr, " model path (default: %s)\n", params.model.c_str()); fprintf(stderr, "\n"); @@ -596,7 +599,7 @@ size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t char * pdst = (char *) dst; - for (int j = 0; j < n; j += k) { + for (int j = 0; j < n; j += k) { uint8_t * pd = (uint8_t *) (pdst + (j/k)*row_size + 0*bs); uint8_t * pm = (uint8_t *) (pdst + (j/k)*row_size + 0*bs + sizeof(float)); uint8_t * pb = (uint8_t *) (pdst + (j/k)*row_size + 0*bs + 2*sizeof(float)); @@ -619,7 +622,7 @@ size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t *(float *) pd = d; *(float *) pm = min; - pd += bs; + pd += bs; pm += bs; for (int l = 0; l < qk; l += 2) { diff --git a/utils.h b/utils.h index c7fce964b..6693775c5 100644 --- a/utils.h +++ b/utils.h @@ -40,6 +40,7 @@ struct gpt_params { bool interactive_start = false; // reverse prompt immediately bool instruct = false; // instruction mode (used for Alpaca models) bool ignore_eos = false; // do not stop generating after eos + bool perplexity = false; // compute perplexity over the prompt }; bool gpt_params_parse(int argc, char ** argv, gpt_params & params); From 16ffc013c62f22bdaa3cdc022d7a13fd952d73fc Mon Sep 17 00:00:00 2001 From: comex Date: Tue, 21 Mar 2023 09:42:25 -0700 Subject: [PATCH 72/93] Importer for GPTQ quantized LLaMA models (#301) * [WIP, broken] Importer for GPTQ quantized LLaMA models Based on: https://github.com/qwopqwop200/GPTQ-for-LLaMa Current status: Something is busted. The output starts out decent, but quickly degrades into gibberish. This doesn't happen with either the original GPTQ-for-LLaMa using the same weights, or llama.cpp when using weights quantized by its own quantizer. Is there a bug in the conversion script that somehow only comes into play with a large context size? I did notice one potential issue. It's clearly not the main cause of the gibberish, since it doesn't happen when using q4_1 weights quantized by llama.cpp itself, but it seems concerning. When doing a matrix multiplication of f16 * f32 => f32 or q4_1 * f32 => f32, at least when the multiplication is not done with BLAS, the intermediate results are stored in the smaller format rather than f32. This seems like an unnecessary waste of precision, especially in the q4_1 case. I was originally hoping to validate the results by matching the Python implementation's output exactly, but precision and non-associativity issues make this very difficult, including when performing matrix multiplications and, especially, computing norms. Anyway, design details: The models being imported store per-layer weights in essentially q4_1 format, although the addend and scale are shared across an entire row rather than every group of 32 weights. This script duplicates the addend and scale to match ggml's expectations, at the cost of wasting some memory. However, there are two differences which I accommodated changing the output format (and adding corresponding support to main.cpp) rather than having the script match the existing one: - The tok_embeddings and output weights (i.e. the weights that aren't per-layer) are f16 instead of q4_1. They could be converted to q4_1, and the impact of the loss of precision would probably be low, but this would rule out exactly matching the Python implementation's output for validation. - There is no sharding, since the input doesn't have it, and for a CPU-only implementation it seems more useful to avoid having to deal with multiple files. The new format is differentiated from existing q4_1 format by changing the 'f16' header flag to a new value, 4. That said, I think a cleaner approach would be to change main.cpp to support loading each tensor with an arbitrary sharding configuration and type rather than hardcoding specific combinations of types. So far I've wasted too much time debugging to try implementing this... * Add missing permutation. Now it works. --------- Co-authored-by: Georgi Gerganov --- convert-gptq-to-ggml.py | 172 ++++++++++++++++++++++++++++++++++++++++ main.cpp | 26 +++--- 2 files changed, 189 insertions(+), 9 deletions(-) create mode 100644 convert-gptq-to-ggml.py diff --git a/convert-gptq-to-ggml.py b/convert-gptq-to-ggml.py new file mode 100644 index 000000000..7fccb4d56 --- /dev/null +++ b/convert-gptq-to-ggml.py @@ -0,0 +1,172 @@ +# Convert a GPTQ quantized LLaMA model to a ggml compatible file +# Based on: https://github.com/qwopqwop200/GPTQ-for-LLaMa +# +import os +import re +import sys +import json +import struct +import numpy as np +import torch +from sentencepiece import SentencePieceProcessor + +if len(sys.argv) != 4: + print("Usage: convert-gptq-to-ggml.py llamaXXb-4bit.pt tokenizer.model out.bin\n") + sys.exit(1) + +fname_model = sys.argv[1] +fname_tokenizer = sys.argv[2] +dir_out = sys.argv[3] + +model = torch.load(fname_model, map_location="cpu") + +n_vocab, n_embd = model['model.embed_tokens.weight'].shape +n_layer = 1 + max(int(m.group(1)) for name in model + if (m := re.match(r'model\.layers\.([0-9]+)', name))) + +# hardcoded: +n_mult = 256 +n_head = {32: 32, 40: 40, 60: 52, 80: 64}[n_layer] + +tokenizer = SentencePieceProcessor(fname_tokenizer) + +assert tokenizer.vocab_size() == n_vocab + +fname_out = sys.argv[3] + +fout = open(fname_out, "wb") + +fout.write(struct.pack("i", 0x67676d6c)) # magic: ggml in hex +fout.write(struct.pack("i", n_vocab)) +fout.write(struct.pack("i", n_embd)) +fout.write(struct.pack("i", n_mult)) +fout.write(struct.pack("i", n_head)) +fout.write(struct.pack("i", n_layer)) +fout.write(struct.pack("i", n_embd // n_head)) # rot (obsolete) +fout.write(struct.pack("i", 4)) + + +# This loop unchanged from convert-pth-to-ggml.py: +for i in range(tokenizer.vocab_size()): + if tokenizer.is_unknown(i): + # "" token (translated as ??) + text = " \u2047 ".encode("utf-8") + fout.write(struct.pack("i", len(text))) + fout.write(text) + elif tokenizer.is_control(i): + # ""/"" tokens + fout.write(struct.pack("i", 0)) + elif tokenizer.is_byte(i): + # "" tokens (which may be invalid UTF-8) + piece = tokenizer.id_to_piece(i) + if len(piece) != 6: + print("Invalid token: " + piece) + sys.exit(1) + byte_value = int(piece[3:-1], 16) + fout.write(struct.pack("i", 1)) + fout.write(struct.pack("B", byte_value)) + else: + # normal token. Uses U+2581 (LOWER ONE EIGHTH BLOCK) to represent spaces. + text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode("utf-8") + fout.write(struct.pack("i", len(text))) + fout.write(text) + +def write_header(shape, dst_name, ftype_cur): + sname = dst_name.encode('utf-8') + fout.write(struct.pack("iii", len(shape), len(sname), ftype_cur)) + fout.write(struct.pack("i" * len(shape), *shape[::-1])) + fout.write(sname) + +def convert_non_q4(src_name, dst_name): + v = model[src_name] + shape = v.shape + print("Processing non-Q4 variable: " + src_name + " with shape: ", shape, " and type: ", v.dtype) + if len(shape) == 1: + print(" Converting to float32") + v = v.to(torch.float32) + + ftype_cur = {torch.float16: 1, torch.float32: 0}[v.dtype] + + # header + write_header(shape, dst_name, ftype_cur) + + # data + v.numpy().tofile(fout) + +def convert_q4(src_name, dst_name, permute=False): + zeros = model[f"{src_name}.zeros"].numpy() + scales = model[f"{src_name}.scales"].numpy() + bias = model[f"{src_name}.bias"].numpy() + qweight = model[f"{src_name}.qweight"].numpy().T # transpose + + # Q4_1 does not support bias; good thing the bias is always all zeros. + assert not np.any(bias) + + # Each int32 item is actually 8 int4 items packed together, and it's transposed. + shape = (qweight.shape[0], qweight.shape[1] * 8) + + print("Processing Q4 variable: " + src_name + " with shape: ", shape) + + # The output format has the int4 weights in groups of 32 rather than 8. + # It looks like this: + # For each row: + # For each group of 32 columns: + # - addend (float32, 4 bytes) + # - scale (float32, 4 bytes) + # - weights (int4 * 32, 16 bytes) + # Note that in the input, the scales and addends are shared between all + # the columns in a row, so we end up wasting quite a bit of memory with + # repeated scales and addends. + + addends = -zeros # flip sign + + # Since the output format is mixed between integers and floats, we have + # to hackily view the floats as int32s just so numpy will let us + # concatenate them. + addends_view = addends.view(dtype=np.int32) + scales_view = scales.view(dtype=np.int32) + + # Split into groups of 4 columns (i.e. 32 columns of quantized data): + grouped = qweight.reshape([qweight.shape[0], qweight.shape[1] // 4, 4]) + + # Repeat addends and scales: + addends_rep = np.atleast_3d(addends_view).repeat(grouped.shape[1], axis=1) + scales_rep = np.atleast_3d(scales_view).repeat(grouped.shape[1], axis=1) + + blob = np.concatenate([scales_rep, addends_rep, grouped], axis=2, casting='no') + + if permute: + # Permute some rows to undo the permutation done by convert_llama_weights_to_hf.py. + # This can be done after the above conversion because it doesn't affect column order/layout. + blob = (blob.reshape(n_head, 2, shape[0] // n_head // 2, *blob.shape[1:]) + .swapaxes(1, 2) + .reshape(blob.shape)) + + # header + write_header(shape, dst_name, 3) # ftype = Q4_1 + + # data + blob.tofile(fout) + +convert_non_q4("model.embed_tokens.weight", "tok_embeddings.weight") +convert_non_q4("model.norm.weight", "norm.weight") +convert_non_q4("lm_head.weight", "output.weight") + +for i in range(n_layer): + convert_q4(f"model.layers.{i}.self_attn.q_proj", f"layers.{i}.attention.wq.weight", permute=True) + convert_q4(f"model.layers.{i}.self_attn.k_proj", f"layers.{i}.attention.wk.weight", permute=True) + convert_q4(f"model.layers.{i}.self_attn.v_proj", f"layers.{i}.attention.wv.weight") + convert_q4(f"model.layers.{i}.self_attn.o_proj", f"layers.{i}.attention.wo.weight") + + convert_q4(f"model.layers.{i}.mlp.gate_proj", f"layers.{i}.feed_forward.w1.weight") + convert_q4(f"model.layers.{i}.mlp.down_proj", f"layers.{i}.feed_forward.w2.weight") + convert_q4(f"model.layers.{i}.mlp.up_proj", f"layers.{i}.feed_forward.w3.weight") + + convert_non_q4(f"model.layers.{i}.input_layernorm.weight", f"layers.{i}.attention_norm.weight") + convert_non_q4(f"model.layers.{i}.post_attention_layernorm.weight", f"layers.{i}.ffn_norm.weight") + + +fout.close() + +print("Done. Output file: " + fname_out) +print("") diff --git a/main.cpp b/main.cpp index 9f46d5698..4b220c8cf 100644 --- a/main.cpp +++ b/main.cpp @@ -157,6 +157,12 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca n_parts = LLAMA_N_PARTS.at(hparams.n_embd); } + // temp warning to tell the user to use "--n_parts" + if (hparams.f16 == 4 && n_parts != 1) { + fprintf(stderr, "%s: GPTQ model detected - are you sure n_parts should be %d? we normally expect it to be 1\n", __func__, n_parts); + fprintf(stderr, "%s: use '--n_parts 1' if necessary\n", __func__); + } + fprintf(stderr, "%s: n_vocab = %d\n", __func__, hparams.n_vocab); fprintf(stderr, "%s: n_ctx = %d\n", __func__, hparams.n_ctx); fprintf(stderr, "%s: n_embd = %d\n", __func__, hparams.n_embd); @@ -198,12 +204,14 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca // for the big tensors, we have the option to store the data in 16-bit floats or quantized // in order to save memory and also to speed up the computation - ggml_type wtype = GGML_TYPE_COUNT; + // wtype is for per-layer weights, while vtype is for other weights + ggml_type wtype, vtype; switch (model.hparams.f16) { - case 0: wtype = GGML_TYPE_F32; break; - case 1: wtype = GGML_TYPE_F16; break; - case 2: wtype = GGML_TYPE_Q4_0; break; - case 3: wtype = GGML_TYPE_Q4_1; break; + case 0: wtype = vtype = GGML_TYPE_F32; break; + case 1: wtype = vtype = GGML_TYPE_F16; break; + case 2: wtype = vtype = GGML_TYPE_Q4_0; break; + case 3: wtype = vtype = GGML_TYPE_Q4_1; break; + case 4: wtype = GGML_TYPE_Q4_1; vtype = GGML_TYPE_F16; break; default: { fprintf(stderr, "%s: invalid model file '%s' (bad f16 value %d)\n", @@ -224,11 +232,11 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca const int n_ctx = hparams.n_ctx; const int n_vocab = hparams.n_vocab; - ctx_size += n_embd*n_vocab*ggml_type_sizef(wtype); // tok_embeddings + ctx_size += n_embd*n_vocab*ggml_type_sizef(vtype); // tok_embeddings ctx_size += n_embd*ggml_type_sizef(GGML_TYPE_F32); // norm - ctx_size += n_embd*n_vocab*ggml_type_sizef(wtype); // output + ctx_size += n_embd*n_vocab*ggml_type_sizef(vtype); // output ctx_size += n_layer*(n_embd*ggml_type_sizef(GGML_TYPE_F32)); // attention_norm @@ -275,10 +283,10 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca model.layers.resize(n_layer); - model.tok_embeddings = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab); + model.tok_embeddings = ggml_new_tensor_2d(ctx, vtype, n_embd, n_vocab); model.norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); - model.output = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab); + model.output = ggml_new_tensor_2d(ctx, vtype, n_embd, n_vocab); // map by name model.tensors["tok_embeddings.weight"] = model.tok_embeddings; From 89d5d90f3b6d25f134da7a8e252c3432bffcf674 Mon Sep 17 00:00:00 2001 From: Matvey Soloviev Date: Tue, 21 Mar 2023 18:11:01 +0100 Subject: [PATCH 73/93] Fix color codes emitting mid-UTF8 code. (#312) --- main.cpp | 61 ++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 13 deletions(-) diff --git a/main.cpp b/main.cpp index 4b220c8cf..43b82b1e4 100644 --- a/main.cpp +++ b/main.cpp @@ -36,6 +36,36 @@ extern "C" __declspec(dllimport) int __stdcall SetConsoleMode(void* hConsoleHand #define ANSI_COLOR_RESET "\x1b[0m" #define ANSI_BOLD "\x1b[1m" +/* Keep track of current color of output, and emit ANSI code if it changes. */ +enum console_state { + CONSOLE_STATE_DEFAULT=0, + CONSOLE_STATE_PROMPT, + CONSOLE_STATE_USER_INPUT +}; + +static console_state con_st = CONSOLE_STATE_DEFAULT; +static bool con_use_color = false; + +void set_console_state(console_state new_st) +{ + if (!con_use_color) return; + // only emit color code if state changed + if (new_st != con_st) { + con_st = new_st; + switch(con_st) { + case CONSOLE_STATE_DEFAULT: + printf(ANSI_COLOR_RESET); + return; + case CONSOLE_STATE_PROMPT: + printf(ANSI_COLOR_YELLOW); + return; + case CONSOLE_STATE_USER_INPUT: + printf(ANSI_BOLD ANSI_COLOR_GREEN); + return; + } + } +} + static const int EOS_TOKEN_ID = 2; // determine number of model parts based on the dimension @@ -866,7 +896,7 @@ static bool is_interacting = false; #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32) void sigint_handler(int signo) { - printf(ANSI_COLOR_RESET); + set_console_state(CONSOLE_STATE_DEFAULT); printf("\n"); // this also force flush stdout. if (signo == SIGINT) { if (!is_interacting) { @@ -925,6 +955,10 @@ int main(int argc, char ** argv) { params.prompt = gpt_random_prompt(rng); } + // save choice to use color for later + // (note for later: this is a slightly awkward choice) + con_use_color = params.use_color; + // params.prompt = R"(// this function checks if the number n is prime //bool is_prime(int n) {)"; @@ -1040,18 +1074,18 @@ int main(int argc, char ** argv) { int remaining_tokens = params.n_predict; - // set the color for the prompt which will be output initially - if (params.use_color) { #if defined (_WIN32) + if (params.use_color) { // Enable ANSI colors on Windows 10+ unsigned long dwMode = 0; void* hConOut = GetStdHandle((unsigned long)-11); // STD_OUTPUT_HANDLE (-11) if (hConOut && hConOut != (void*)-1 && GetConsoleMode(hConOut, &dwMode) && !(dwMode & 0x4)) { SetConsoleMode(hConOut, dwMode | 0x4); // ENABLE_VIRTUAL_TERMINAL_PROCESSING (0x4) } -#endif - printf(ANSI_COLOR_YELLOW); } +#endif + // the first thing we will do is to output the prompt, so set color accordingly + set_console_state(CONSOLE_STATE_PROMPT); while (remaining_tokens > 0 || params.interactive) { // predict @@ -1125,8 +1159,8 @@ int main(int argc, char ** argv) { fflush(stdout); } // reset color to default if we there is no pending user input - if (!input_noecho && params.use_color && (int)embd_inp.size() == input_consumed) { - printf(ANSI_COLOR_RESET); + if (!input_noecho && (int)embd_inp.size() == input_consumed) { + set_console_state(CONSOLE_STATE_DEFAULT); } // in interactive mode, and not currently processing queued inputs; @@ -1146,6 +1180,9 @@ int main(int argc, char ** argv) { } } if (is_interacting) { + // potentially set color to indicate we are taking user input + set_console_state(CONSOLE_STATE_USER_INPUT); + if (params.instruct) { input_consumed = embd_inp.size(); embd_inp.insert(embd_inp.end(), inp_pfx.begin(), inp_pfx.end()); @@ -1153,8 +1190,6 @@ int main(int argc, char ** argv) { printf("\n> "); } - // currently being interactive - if (params.use_color) printf(ANSI_BOLD ANSI_COLOR_GREEN); std::string buffer; std::string line; bool another_line = true; @@ -1167,7 +1202,9 @@ int main(int argc, char ** argv) { } buffer += line + '\n'; // Append the line to the result } while (another_line); - if (params.use_color) printf(ANSI_COLOR_RESET); + + // done taking input, reset color + set_console_state(CONSOLE_STATE_DEFAULT); std::vector line_inp = ::llama_tokenize(vocab, buffer, false); embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end()); @@ -1218,9 +1255,7 @@ int main(int argc, char ** argv) { ggml_free(model.ctx); - if (params.use_color) { - printf(ANSI_COLOR_RESET); - } + set_console_state(CONSOLE_STATE_DEFAULT); return 0; } From 353ec251a42491f5192c48561da4b444ef67f23c Mon Sep 17 00:00:00 2001 From: "Fabio R. Sluzala" Date: Tue, 21 Mar 2023 14:21:50 -0300 Subject: [PATCH 74/93] We could use std::unordered_map over std::map (#305) * Improve performance by changing std::map to std::unordered_map and std::map id_to_token; to std::vector id_to_token; * fix last commit on gpt_vocab_init add vocab.id_to_token.resize(vocab.token_to_id.size()); * Removed include * Nest struct token score inside gpt_vocab * renamed token to tok --- main.cpp | 18 ++++++++++-------- quantize.cpp | 8 +++++--- utils.cpp | 20 ++++++++++++-------- utils.h | 14 +++++++++----- 4 files changed, 36 insertions(+), 24 deletions(-) diff --git a/main.cpp b/main.cpp index 43b82b1e4..fe9e583f8 100644 --- a/main.cpp +++ b/main.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include #include @@ -69,7 +68,7 @@ void set_console_state(console_state new_st) static const int EOS_TOKEN_ID = 2; // determine number of model parts based on the dimension -static const std::map LLAMA_N_PARTS = { +static const std::unordered_map LLAMA_N_PARTS = { { 4096, 1 }, { 5120, 2 }, { 6656, 4 }, @@ -123,7 +122,7 @@ struct llama_model { // struct ggml_context * ctx; - std::map tensors; + std::unordered_map tensors; }; // load the model's weights from a file @@ -208,6 +207,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca // load vocab { std::string word; + vocab.id_to_token.resize(model.hparams.n_vocab); std::vector tmp(64); for (int i = 0; i < model.hparams.n_vocab; i++) { @@ -227,8 +227,10 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca fin.read((char *) &score, sizeof(score)); vocab.token_to_id[word] = i; - vocab.id_to_token[i] = word; - vocab.score[i] = score; + + auto &tok_score = vocab.id_to_token[i]; + tok_score.tok = word; + tok_score.score = score; } } @@ -1028,7 +1030,7 @@ int main(int argc, char ** argv) { fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str()); fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size()); for (int i = 0; i < (int) embd_inp.size(); i++) { - fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], vocab.id_to_token.at(embd_inp[i]).c_str()); + fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], vocab.id_to_token.at(embd_inp[i]).tok.c_str()); } fprintf(stderr, "\n"); if (params.interactive) { @@ -1154,7 +1156,7 @@ int main(int argc, char ** argv) { // display text if (!input_noecho) { for (auto id : embd) { - printf("%s", vocab.id_to_token[id].c_str()); + printf("%s", vocab.id_to_token[id].tok.c_str()); } fflush(stdout); } @@ -1169,7 +1171,7 @@ int main(int argc, char ** argv) { // check for reverse prompt std::string last_output; for (auto id : last_n_tokens) { - last_output += vocab.id_to_token[id]; + last_output += vocab.id_to_token[id].tok; } // Check if each of the reverse prompts appears at the end of the output. diff --git a/quantize.cpp b/quantize.cpp index b90f34f48..52b7ac9b3 100644 --- a/quantize.cpp +++ b/quantize.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -130,6 +129,7 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna } std::string word; + vocab.id_to_token.resize(n_vocab); for (int i = 0; i < n_vocab; i++) { uint32_t len; finp.read ((char *) &len, sizeof(len)); @@ -144,8 +144,10 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna fout.write((char *) &score, sizeof(score)); vocab.token_to_id[word] = i; - vocab.id_to_token[i] = word; - vocab.score[i] = score; + + auto &tok_score = vocab.id_to_token[i]; + tok_score.tok = word; + tok_score.score = score; } } diff --git a/utils.cpp b/utils.cpp index 7c6864c8f..b15c68ade 100644 --- a/utils.cpp +++ b/utils.cpp @@ -155,8 +155,8 @@ void replace(std::string & str, const std::string & needle, const std::string & } } -std::map json_parse(const std::string & fname) { - std::map result; +std::unordered_map json_parse(const std::string & fname) { + std::unordered_map result; // read file into string std::string json; @@ -360,16 +360,16 @@ private: return; } - auto score = vocab_.score.find((*token).second); - - if (score == vocab_.score.end()) { + if (static_cast((*token).second) >= vocab_.id_to_token.size()) { return; } + const auto &tok_score = vocab_.id_to_token[(*token).second]; + llama_sp_bigram bigram; bigram.left = left; bigram.right = right; - bigram.score = (*score).second; + bigram.score = tok_score.score; bigram.size = text.size(); work_queue_.push(bigram); } @@ -393,6 +393,8 @@ bool llama_vocab_load(const std::string & fname, llama_vocab & vocab) { std::string word; std::vector tmp(64); + vocab.id_to_token.resize(n_vocab); + for (int i = 0; i < n_vocab; i++) { uint32_t len; fin.read((char *) &len, sizeof(len)); @@ -410,8 +412,10 @@ bool llama_vocab_load(const std::string & fname, llama_vocab & vocab) { fin.read((char *) &score, sizeof(score)); vocab.token_to_id[word] = i; - vocab.id_to_token[i] = word; - vocab.score[i] = score; + + auto &tok_score = vocab.id_to_token[i]; + tok_score.tok = word; + tok_score.score = score; } return true; diff --git a/utils.h b/utils.h index 6693775c5..312903859 100644 --- a/utils.h +++ b/utils.h @@ -3,7 +3,7 @@ #pragma once #include -#include +#include #include #include #include @@ -65,15 +65,19 @@ struct llama_vocab { using id = int32_t; using token = std::string; - std::map token_to_id; - std::map id_to_token; - std::map score; + struct token_score { + token tok; + float score; + }; + + std::unordered_map token_to_id; + std::vector id_to_token; }; void replace(std::string & str, const std::string & needle, const std::string & replacement); // poor-man's JSON parsing -std::map json_parse(const std::string & fname); +std::unordered_map json_parse(const std::string & fname); // TODO: temporary until #77 is merged, need this now for some tokenizer tests bool llama_vocab_load(const std::string & fname, llama_vocab & vocab); From 0f6135270839f0715843c4d480c63ae150def419 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 21 Mar 2023 19:47:27 +0200 Subject: [PATCH 75/93] Update issue templates --- .github/ISSUE_TEMPLATE/custom.md | 198 +++++++++++++++++++++++++++++++ 1 file changed, 198 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/custom.md diff --git a/.github/ISSUE_TEMPLATE/custom.md b/.github/ISSUE_TEMPLATE/custom.md new file mode 100644 index 000000000..fe6a97bea --- /dev/null +++ b/.github/ISSUE_TEMPLATE/custom.md @@ -0,0 +1,198 @@ +--- +name: Custom issue template +about: Used to report user-related issues with the software +title: "[User] I encountered a problem .." +labels: '' +assignees: '' + +--- + +# Prerequisites + +Please answer the following questions for yourself before submitting an issue. + +- [ ] I am running the latest code. Development is very rapid so there are no tagged versions as of now. +- [ ] I carefully followed the [README.md](https://github.com/ggerganov/llama.cpp/blob/master/README.md). +- [ ] I [searched using keywords relevant to my issue](https://docs.github.com/en/issues/tracking-your-work-with-issues/filtering-and-searching-issues-and-pull-requests) to make sure that I am creating a new issue that is not already open (or closed). +- [ ] I reviewed the [Discussions](https://github.com/ggerganov/llama.cpp/discussions), and have a new bug or useful enhancement to share. + +# Expected Behavior + +Please provide a detailed written description of what you were trying to do, and what you expected `lamma.cpp` to do. + +# Current Behavior + +Please provide a detailed written description of what `lamma.cpp` did, instead. + +# Environment and Context + +Please provide detailed information about your computer setup. This is important in case the issue is not reproducible except for under certain specific conditions. + +* Physical (or virtual) hardware you are using, e.g. for Linux: + +`$ lscpu` + +* Operating System, e.g. for Linux: + +`$ uname -a` + +* SDK version, e.g. for Linux: + +``` +$ python3 --version +$ make --version +$ g++ --version +``` + +# Models + +* The LLaMA models are officially distributed by Facebook and will never be provided through this repository. See this [pull request in Facebook's LLaMA repository](https://github.com/facebookresearch/llama/pull/73/files) if you need to obtain access to the model data. +* If your issue is with model conversion please verify the `sha256sum` of each of your `consolidated*.pth` and `ggml-model-XXX.bin` files to confirm that you have the correct model data files before logging an issue. [Latest sha256 sums for your reference](https://github.com/ggerganov/llama.cpp/issues/238). +* If your issue is with model generation quality then please at least scan the following links and papers to understand the limitations of LLaMA models. This is especially important when choosing an appropriate model size and appreciating both the significant and subtle differences between LLaMA models and ChatGPT: + * LLaMA: + * [Introducing LLaMA: A foundational, 65-billion-parameter large language model](https://ai.facebook.com/blog/large-language-model-llama-meta-ai/) + * [LLaMA: Open and Efficient Foundation Language Models](https://arxiv.org/abs/2302.13971) + * GPT-3 + * [Language Models are Few-Shot Learners](https://arxiv.org/abs/2005.14165) + * GPT-3.5 / InstructGPT / ChatGPT: + * [Aligning language models to follow instructions](https://openai.com/research/instruction-following) + * [Training language models to follow instructions with human feedback](https://arxiv.org/abs/2203.02155) + +# Failure Information (for bugs) + +Please help provide information about the failure if this is a bug. If it is not a bug, please remove the rest of this template. + +# Steps to Reproduce + +Please provide detailed steps for reproducing the issue. We are not sitting in front of your screen, so the more detail the better. + +1. step 1 +2. step 2 +3. step 3 +4. etc. + +# Failure Logs + +Please include any relevant log snippets or files. If it works under one configuration but not under another, please provide logs for both configurations and their corresponding outputs so it is easy to see where behavior changes. + +Also, please try to **avoid using screenshots** if at all possible. Instead, copy/paste the console output and use [Github's markdown](https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax) to cleanly format your logs for easy readability. e.g. + +``` +llama.cpp$ git log | head -1 +commit 2af23d30434a677c6416812eea52ccc0af65119c + +llama.cpp$ lscpu | egrep "AMD|Flags" +Vendor ID: AuthenticAMD +Model name: AMD Ryzen Threadripper 1950X 16-Core Processor +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid amd_dcm aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb hw_pstate ssbd ibpb vmmcall fsgsbase bmi1 avx2 smep bmi2 rdseed adx smap clflushopt sha_ni xsaveopt xsavec xgetbv1 xsaves clzero irperf xsaveerptr arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif overflow_recov succor smca sme sev +Virtualization: AMD-V + +llama.cpp$ python3 --version +Python 3.10.9 + +llama.cpp$ pip list | egrep "torch|numpy|sentencepiece" +numpy 1.24.2 +numpydoc 1.5.0 +sentencepiece 0.1.97 +torch 1.13.1 +torchvision 0.14.1 + +llama.cpp$ make --version | head -1 +GNU Make 4.3 + +$ md5sum ./models/65B/ggml-model-q4_0.bin +dbdd682cce80e2d6e93cefc7449df487 ./models/65B/ggml-model-q4_0.bin +``` +Here's a run with the Linux command [perf](https://www.brendangregg.com/perf.html) + +``` +llama.cpp$ perf stat ./main -m ./models/65B/ggml-model-q4_0.bin -t 16 -n 1024 -p "Please close your issue when it has been answered." +main: seed = 1679149377 +llama_model_load: loading model from './models/65B/ggml-model-q4_0.bin' - please wait ... +llama_model_load: n_vocab = 32000 +llama_model_load: n_ctx = 512 +llama_model_load: n_embd = 8192 +llama_model_load: n_mult = 256 +llama_model_load: n_head = 64 +llama_model_load: n_layer = 80 +llama_model_load: n_rot = 128 +llama_model_load: f16 = 2 +llama_model_load: n_ff = 22016 +llama_model_load: n_parts = 8 +llama_model_load: ggml ctx size = 41477.73 MB +llama_model_load: memory_size = 2560.00 MB, n_mem = 40960 +llama_model_load: loading model part 1/8 from './models/65B/ggml-model-q4_0.bin' +llama_model_load: .......................................................................................... done +llama_model_load: model size = 4869.09 MB / num tensors = 723 +llama_model_load: loading model part 2/8 from './models/65B/ggml-model-q4_0.bin.1' +llama_model_load: .......................................................................................... done +llama_model_load: model size = 4869.09 MB / num tensors = 723 +llama_model_load: loading model part 3/8 from './models/65B/ggml-model-q4_0.bin.2' +llama_model_load: .......................................................................................... done +llama_model_load: model size = 4869.09 MB / num tensors = 723 +llama_model_load: loading model part 4/8 from './models/65B/ggml-model-q4_0.bin.3' +llama_model_load: .......................................................................................... done +llama_model_load: model size = 4869.09 MB / num tensors = 723 +llama_model_load: loading model part 5/8 from './models/65B/ggml-model-q4_0.bin.4' +llama_model_load: .......................................................................................... done +llama_model_load: model size = 4869.09 MB / num tensors = 723 +llama_model_load: loading model part 6/8 from './models/65B/ggml-model-q4_0.bin.5' +llama_model_load: .......................................................................................... done +llama_model_load: model size = 4869.09 MB / num tensors = 723 +llama_model_load: loading model part 7/8 from './models/65B/ggml-model-q4_0.bin.6' +llama_model_load: .......................................................................................... done +llama_model_load: model size = 4869.09 MB / num tensors = 723 +llama_model_load: loading model part 8/8 from './models/65B/ggml-model-q4_0.bin.7' +llama_model_load: .......................................................................................... done +llama_model_load: model size = 4869.09 MB / num tensors = 723 + +system_info: n_threads = 16 / 32 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | + +main: prompt: 'Please close your issue when it has been answered.' +main: number of tokens in prompt = 11 + 1 -> '' + 12148 -> 'Please' + 3802 -> ' close' + 596 -> ' your' + 2228 -> ' issue' + 746 -> ' when' + 372 -> ' it' + 756 -> ' has' + 1063 -> ' been' + 7699 -> ' answered' + 29889 -> '.' + +sampling parameters: temp = 0.800000, top_k = 40, top_p = 0.950000, repeat_last_n = 64, repeat_penalty = 1.300000 + + +Please close your issue when it has been answered. +@duncan-donut: I'm trying to figure out what kind of "support" you need for this script and why, exactly? Is there a question about how the code works that hasn't already been addressed in one or more comments below this ticket, or are we talking something else entirely like some sorta bugfixing job because your server setup is different from mine?? +I can understand if your site needs to be running smoothly and you need help with a fix of sorts but there should really be nothing wrong here that the code itself could not handle. And given that I'm getting reports about how it works perfectly well on some other servers, what exactly are we talking? A detailed report will do wonders in helping us get this resolved for ya quickly so please take your time and describe the issue(s) you see as clearly & concisely as possible!! +@duncan-donut: I'm not sure if you have access to cPanel but you could try these instructions. It is worth a shot! Let me know how it goes (or what error message, exactly!) when/if ya give that code a go? [end of text] + + +main: mem per token = 71159620 bytes +main: load time = 19309.95 ms +main: sample time = 168.62 ms +main: predict time = 223895.61 ms / 888.47 ms per token +main: total time = 246406.42 ms + + Performance counter stats for './main -m ./models/65B/ggml-model-q4_0.bin -t 16 -n 1024 -p Please close your issue when it has been answered.': + + 3636882.89 msec task-clock # 14.677 CPUs utilized + 13509 context-switches # 3.714 /sec + 2436 cpu-migrations # 0.670 /sec + 10476679 page-faults # 2.881 K/sec + 13133115082869 cycles # 3.611 GHz (16.77%) + 29314462753 stalled-cycles-frontend # 0.22% frontend cycles idle (16.76%) + 10294402631459 stalled-cycles-backend # 78.39% backend cycles idle (16.74%) + 23479217109614 instructions # 1.79 insn per cycle + # 0.44 stalled cycles per insn (16.76%) + 2353072268027 branches # 647.002 M/sec (16.77%) + 1998682780 branch-misses # 0.08% of all branches (16.76%) + + 247.802177522 seconds time elapsed + + 3618.573072000 seconds user + 18.491698000 seconds sys +``` From 3f9c6135e45ae3f520b1e17197004cc60c9ca45b Mon Sep 17 00:00:00 2001 From: Mathieu Nayrolles Date: Tue, 21 Mar 2023 16:52:27 -0400 Subject: [PATCH 76/93] fix typo in chatLLaMa (#368) The prompt contains a typo where 'alound' is used instead of 'aloud'. --- examples/chatLLaMa | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/chatLLaMa b/examples/chatLLaMa index 97c48ac87..97ababbc5 100755 --- a/examples/chatLLaMa +++ b/examples/chatLLaMa @@ -25,7 +25,7 @@ GEN_OPTIONS="${GEN_OPTIONS:---ctx_size 2048 --temp 0.7 --top_k 40 --top_p 0.5 -- --prompt " Text transcript of a never ending dialog, where ${USER_NAME} interacts with an AI assistant named ${AI_NAME}. ${AI_NAME} is helpful, kind, honest, friendly, good at writing and never fails to answer ${USER_NAME}’s requests immediately and with details and precision. -There are no annotations like (30 seconds passed...) or (to himself), just what ${USER_NAME} and ${AI_NAME} say alound to each other. +There are no annotations like (30 seconds passed...) or (to himself), just what ${USER_NAME} and ${AI_NAME} say aloud to each other. The dialog lasts for years, the entirety of it is shared below. It's 10000 pages long. The transcript only includes text, it does not include markup like HTML and Markdown. From 3366853e41fcc818222a0271c76b6106179106fb Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 21 Mar 2023 22:57:35 +0200 Subject: [PATCH 77/93] Add notice about pending change --- README.md | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index d9a4b1bab..6149032b1 100644 --- a/README.md +++ b/README.md @@ -5,15 +5,21 @@ Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++ +--- + +**TEMPORARY NOTICE:** +Big code change incoming: https://github.com/ggerganov/llama.cpp/pull/370 + +Do not merge stuff until we merge this. Probably merge will happen on March 22 ~6:00am UTC + +--- + **Hot topics:** - [Added Alpaca support](https://github.com/ggerganov/llama.cpp#instruction-mode-with-alpaca) - Cache input prompts for faster initialization: https://github.com/ggerganov/llama.cpp/issues/64 - Create a `llama.cpp` logo: https://github.com/ggerganov/llama.cpp/issues/105 -**TEMPORARY NOTICE:** -If you're updating to the latest master, you will need to regenerate your model files as the format has changed. - ## Description The main goal is to run the model using 4-bit quantization on a MacBook From 01a297b09932e29f3319d6588977c32a926c7907 Mon Sep 17 00:00:00 2001 From: Erik Scholz Date: Tue, 21 Mar 2023 22:34:25 +0100 Subject: [PATCH 78/93] specify build type for ctest on windows (#371) --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5b1b5ddfb..3030f4fd6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -108,7 +108,7 @@ jobs: cd build cmake .. cmake --build . --config Release - ctest --output-on-failure + ctest -C Release --output-on-failure - name: Get commit hash id: commit From e6c9e0986c79ba1cc8848879b2fcce979f9b4672 Mon Sep 17 00:00:00 2001 From: anzz1 Date: Tue, 21 Mar 2023 23:49:24 +0200 Subject: [PATCH 79/93] Fix bin dir for win ci --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3030f4fd6..5882fc747 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -119,7 +119,7 @@ jobs: id: pack_artifacts if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} run: | - 7z a llama-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-x64.zip .\build\Release\* + 7z a llama-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-x64.zip .\build\bin\Release\* - name: Create release id: create_release From da0e9fe90ccf6e73597eb19dd0cfc0a28363fb3b Mon Sep 17 00:00:00 2001 From: Gary Mulder Date: Mon, 20 Mar 2023 20:14:06 +0000 Subject: [PATCH 80/93] Add SHA256SUMS file and instructions to README how to obtain and verify the downloads Hashes created using: sha256sum models/*B/*.pth models/*[7136]B/ggml-model-f16.bin* models/*[7136]B/ggml-model-q4_0.bin* > SHA256SUMS --- README.md | 14 ++++++++++++++ SHA256SUMS | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 SHA256SUMS diff --git a/README.md b/README.md index 6149032b1..fa70499b4 100644 --- a/README.md +++ b/README.md @@ -234,6 +234,20 @@ cadaver, cauliflower, cabbage (vegetable), catalpa (tree) and Cailleach. > ``` +### Obtaining and verifying the Facebook LLaMA original model and Stanford Alpaca model data + +* The LLaMA models are officially distributed by Facebook and will never be provided through this repository. See this [Pull Request in Facebook's LLaMA repository](https://github.com/facebookresearch/llama/pull/73/files) if you need to obtain access to the model data. + +* Please verify the sha256 checksums of all of your `consolidated*.pth` and corresponding converted `ggml-model-*.bin` model files to confirm that you have the correct model data files before creating an issue relating to your model files. + +The following command will verify if you have all possible latest files in your self-installed `./models` subdirectory: + +`sha256sum --ignore-missing -c SHA256SUMS` on Linux + +or + +`shasum -a 256 --ignore-missing -c SHA256SUMS` on macOS + ### Android You can easily run `llama.cpp` on Android device with [termux](https://play.google.com/store/apps/details?id=com.termux). diff --git a/SHA256SUMS b/SHA256SUMS new file mode 100644 index 000000000..532beaea2 --- /dev/null +++ b/SHA256SUMS @@ -0,0 +1,53 @@ +700df0d3013b703a806d2ae7f1bfb8e59814e3d06ae78be0c66368a50059f33d models/7B/consolidated.00.pth +abe4aec2cdc297e2916011f66c7efd6fb4424e0e84315503005b5c118358cc22 models/7B/ggml-model-f16.bin +f495fa02a0b5ef265e1864d9680eede7fd23a60b0a2f93edba8091e2a4ca68b9 models/7B/ggml-model-q4_0.bin +7e89e242ddc0dd6f060b43ca219ce8b3e8f08959a72cb3c0855df8bb04d46265 models/7B/params.json +745bf4e29a4dd6f411e72976d92b452da1b49168a4f41c951cfcc8051823cf08 models/13B/consolidated.00.pth +d5ccbcc465c71c0de439a5aeffebe8344c68a519bce70bc7f9f92654ee567085 models/13B/consolidated.01.pth +a6bd0537c6873f36c47292df0b6f794e1135f5aafb89c3343bcc9e93264bf167 models/13B/ggml-model-f16.bin +0fb0951b90f2ec46c1f2f2372af5dacb4614b27e9fb6c10c69fbec58d7dd0e36 models/13B/ggml-model-f16.bin.1 +1c218ba37ae61e15e35efd9949c78d6edf553b6280824c263cad56ae0b9d5a8f models/13B/ggml-model-q4_0.bin +c37a20c2ab9fa74b006b389085660269ee06110d1e45a494eb57d4602c9bcdb2 models/13B/ggml-model-q4_0.bin.1 +4ab77bec4d4405ccb66a97b282574c89a94417e3c32e5f68f37e2876fc21322f models/13B/params.json +e23294a58552d8cdec5b7e8abb87993b97ea6eced4178ff2697c02472539d067 models/30B/consolidated.00.pth +4e077b7136c7ae2302e954860cf64930458d3076fcde9443f4d0e939e95903ff models/30B/consolidated.01.pth +24a87f01028cbd3a12de551dcedb712346c0b5cbdeff1454e0ddf2df9b675378 models/30B/consolidated.02.pth +1adfcef71420886119544949767f6a56cb6339b4d5fcde755d80fe68b49de93b models/30B/consolidated.03.pth +def20ea508f4e36793719f857471e85b85f96e497a2cbffbbaa1b60e2b18202c models/30B/ggml-model-f16.bin +b37040aa67fa8608cb2d8e0719132cf3e267fd35ec1e2f0d37dbc9fa43d674f1 models/30B/ggml-model-f16.bin.1 +e7f263557e99069fe29003262ea5fa9ed885dbe79069083e6eb569b328cf30d3 models/30B/ggml-model-f16.bin.2 +2ad6a23af05eb720f202f63d130f4fc5de9b6d2efc95b921be003209a56695aa models/30B/ggml-model-f16.bin.3 +7de31d005e6d02ebd9603b2cf5329ad2f832b65d08873a098c5cafc4046cb9ed models/30B/ggml-model-q4_0.bin +f91feef9f30f9a023616db2e91297ca6d5d5d7b9eb351e452a82115c46f7da9e models/30B/ggml-model-q4_0.bin.1 +66f3a0916ac7a81839153eb061fa861030ed1892477c2f7af2ce4f98d2f6d06f models/30B/ggml-model-q4_0.bin.2 +e3c587ba97f83d2088b001bcda3026571065649ee3090bef6743a51390b01d3b models/30B/ggml-model-q4_0.bin.3 +2c07118ea98d69dbe7810d88520e30288fa994751b337f8fca02b171955f44cb models/30B/params.json +135c563f6b3938114458183afb01adc9a63bef3d8ff7cccc3977e5d3664ecafe models/65B/consolidated.00.pth +9a600b37b19d38c7e43809485f70d17d1dc12206c07efa83bc72bb498a568bde models/65B/consolidated.01.pth +e7babf7c5606f165a3756f527cb0fedc4f83e67ef1290391e52fb1cce5f26770 models/65B/consolidated.02.pth +73176ffb426b40482f2aa67ae1217ef79fbbd1fff5482bae5060cdc5a24ab70e models/65B/consolidated.03.pth +882e6431d0b08a8bc66261a0d3607da21cbaeafa96a24e7e59777632dbdac225 models/65B/consolidated.04.pth +a287c0dfe49081626567c7fe87f74cce5831f58e459b427b5e05567641f47b78 models/65B/consolidated.05.pth +72b4eba67a1a3b18cb67a85b70f8f1640caae9b40033ea943fb166bd80a7b36b models/65B/consolidated.06.pth +d27f5b0677d7ff129ceacd73fd461c4d06910ad7787cf217b249948c3f3bc638 models/65B/consolidated.07.pth +7eba2625260cd91f8de901fd9704a1aa39448425514a335a0d3878de4ab9dc77 models/65B/ggml-model-f16.bin +f6aa886575df0785d4231f30cc776d499ccde18857818effc0378c65b178e0b5 models/65B/ggml-model-f16.bin.1 +076037141682f5d7537955058c4740ab27f285aa4588915f830874a589c0693d models/65B/ggml-model-f16.bin.2 +7853d96d2903ad7de2b2a89c4acf5a33a2f8e3c24ac39c9df6b44cdb42bf530a models/65B/ggml-model-f16.bin.3 +b16b7b941abb3bc03a14df1656140855e9360a5371c83e919b9da83a72362314 models/65B/ggml-model-f16.bin.4 +5291270216f888697695acb78ef28df0c080f9e85d3245c92fb9992d1fde6678 models/65B/ggml-model-f16.bin.5 +0685ee77715f34686841006f8f94d3e7eaf148b97cecc9d3eee72808b0f7989c models/65B/ggml-model-f16.bin.6 +00d993d73bb21d7c29388ffe0dced008cbaa0d391831dea77d7eb8f0b5c404b9 models/65B/ggml-model-f16.bin.7 +4e398f05842206e08cdc5e7bb4f6c7c34b9dc373435ece6f261b14b7b4fe9b89 models/65B/ggml-model-q4_0.bin +4c4e899e3b12d9f57c9dcea5a1fb41bbc72023323535551f6273582ca7d7294b models/65B/ggml-model-q4_0.bin.1 +d7b4594bbbd192043b3db0e5acc2561c42e6944e1cb91cc6e61510eee89dbcd8 models/65B/ggml-model-q4_0.bin.2 +9a099d271648863d923d0d097391ea0bc75591f27a2ca3a327760f42e6b69af2 models/65B/ggml-model-q4_0.bin.3 +5ee474051e418c5732b7949190b084d9d679db447f83c1de0d2a82daaa1a0cfa models/65B/ggml-model-q4_0.bin.4 +a45aa05e7212bd6782790722d68056c5419667ea6b564ccc94bbcb8111d79b8b models/65B/ggml-model-q4_0.bin.5 +a58fda714b759c28ad5e4c1d8bf8fda7b158fd5e4c4a49f851f36342fa97a105 models/65B/ggml-model-q4_0.bin.6 +a3540cfcbcda33c223c6b0d606034adbd78f17e0e5de1582b78795e78754f7a8 models/65B/ggml-model-q4_0.bin.7 +999ed1659b469ccc2a941714c0a9656fa571d17c9f7c8c7589817ca90edef51b models/65B/params.json +1f582babc2bd56bb63b33141898748657d369fd110c4358b2bc280907882bf13 models/alpaca-7B/ggml-model-q4_0.bin +e17730c6b62b565b098af023ca446dcb9e3535d4222ead6369c7aae67207eb3d models/alpaca-13B/ggml-model-q4_0.bin +9bcd1bb30e679c939f367be11b030fe20b3eb9a3606b9bc4106420f1827b6ae4 models/alpaca-30B/ggml-model-q4_0.bin +36079249f53c292a4c2302d7784005dcae94c865f0bedfdbfa51d9ddad402935 models/alpaca-30B/params.json From f5a77a629bd0f37ae1696747633ab42a5530ec15 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 22 Mar 2023 07:32:36 +0200 Subject: [PATCH 81/93] Introduce C-style API (#370) * Major refactoring - introduce C-style API * Clean up * Add * Add * Add .... * Fix timing reporting and accumulation * Measure eval time only for single-token calls * Change llama_tokenize return meaning --- CMakeLists.txt | 31 +- Makefile | 11 +- convert-pth-to-ggml.py | 2 +- ggml.c | 121 +++ ggml.h | 7 + llama.cpp | 1565 ++++++++++++++++++++++++++++++++++++ llama.h | 139 ++++ main.cpp | 914 ++------------------- models/ggml-vocab.bin | Bin 432578 -> 432610 bytes quantize.cpp | 310 +------ tests/CMakeLists.txt | 2 +- tests/test-tokenizer-0.cpp | 24 +- utils.cpp | 519 +----------- utils.h | 61 +- 14 files changed, 1954 insertions(+), 1752 deletions(-) create mode 100644 llama.cpp create mode 100644 llama.h diff --git a/CMakeLists.txt b/CMakeLists.txt index bf0e77b4a..400cecf9c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -207,15 +207,10 @@ else() message(STATUS "Unknown architecture") endif() - # -# Build library +# Build libraries # -add_executable(llama main.cpp) - -add_executable(quantize quantize.cpp) - add_library(utils OBJECT utils.cpp utils.h) @@ -229,14 +224,24 @@ add_library(ggml OBJECT target_include_directories(ggml PUBLIC .) target_compile_features(ggml PUBLIC c_std_11) # don't bump - -# -# Linking -# - target_link_libraries(ggml PRIVATE Threads::Threads ${LLAMA_EXTRA_LIBS}) -target_link_libraries(llama PRIVATE ggml utils) -target_link_libraries(quantize PRIVATE ggml utils) + +add_library(llama OBJECT + llama.cpp + llama.h) + +target_include_directories(llama PUBLIC .) +target_compile_features(llama PUBLIC cxx_std_11) # don't bump + +# +# Executables +# + +add_executable(main main.cpp) +target_link_libraries(main PRIVATE llama ggml utils) + +add_executable(quantize quantize.cpp) +target_link_libraries(quantize PRIVATE llama ggml utils) # # programs, examples and tests diff --git a/Makefile b/Makefile index 071275470..edb0c64c8 100644 --- a/Makefile +++ b/Makefile @@ -220,18 +220,21 @@ default: main quantize ggml.o: ggml.c ggml.h $(CC) $(CFLAGS) -c ggml.c -o ggml.o +llama.o: llama.cpp llama.h + $(CXX) $(CXXFLAGS) -c llama.cpp -o llama.o + utils.o: utils.cpp utils.h $(CXX) $(CXXFLAGS) -c utils.cpp -o utils.o clean: rm -f *.o main quantize -main: main.cpp ggml.o utils.o - $(CXX) $(CXXFLAGS) main.cpp ggml.o utils.o -o main $(LDFLAGS) +main: main.cpp ggml.o llama.o utils.o + $(CXX) $(CXXFLAGS) main.cpp ggml.o llama.o utils.o -o main $(LDFLAGS) @echo "\x1b[36mrun ./main -h for help\x1b[0m" -quantize: quantize.cpp ggml.o utils.o - $(CXX) $(CXXFLAGS) quantize.cpp ggml.o utils.o -o quantize $(LDFLAGS) +quantize: quantize.cpp ggml.o llama.o utils.o + $(CXX) $(CXXFLAGS) quantize.cpp ggml.o llama.o utils.o -o quantize $(LDFLAGS) # # Tests diff --git a/convert-pth-to-ggml.py b/convert-pth-to-ggml.py index db5b00fec..f0f6b0ec4 100644 --- a/convert-pth-to-ggml.py +++ b/convert-pth-to-ggml.py @@ -148,7 +148,7 @@ def main(): model = torch.load(fname_model, map_location="cpu") with open(fname_out, "wb") as fout: - fout.write(struct.pack("i", hparams["vocab_size"])) + write_header(fout, hparams, ftype) write_tokens(fout, tokenizer) del model diff --git a/ggml.c b/ggml.c index 8daac3510..d00544577 100644 --- a/ggml.c +++ b/ggml.c @@ -10702,6 +10702,127 @@ enum ggml_opt_result ggml_opt( //////////////////////////////////////////////////////////////////////////////// +size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t * hist) { + const int nb = k / qk; + const size_t bs = (sizeof(float) + sizeof(uint8_t)*qk/2); + const size_t row_size = nb*bs; + + assert(k % qk == 0); + + const size_t pp_size = qk / 2; + uint8_t * pp = (uint8_t *) alloca(pp_size); + + char * pdst = (char *) dst; + + for (int j = 0; j < n; j += k) { + uint8_t * pd = (uint8_t *) (pdst + (j/k)*row_size + 0*bs); + uint8_t * pb = (uint8_t *) (pdst + (j/k)*row_size + 0*bs + sizeof(float)); + + for (int i = 0; i < nb; i++) { + float amax = 0.0f; // absolute max + + { + for (int l = 0; l < qk; l++) { + const float v = src[j + i*qk + l]; + amax = MAX(amax, fabsf(v)); + } + + const float d = amax / ((1 << 3) - 1); + const float id = d ? 1.0f/d : 0.0f; + + *(float *) pd = d; + pd += bs; + + for (int l = 0; l < qk; l += 2) { + const float v0 = (src[j + i*qk + l + 0])*id; + const float v1 = (src[j + i*qk + l + 1])*id; + + const uint8_t vi0 = ((int8_t) (round(v0))) + 8; + const uint8_t vi1 = ((int8_t) (round(v1))) + 8; + + assert(vi0 >= 0 && vi0 < 16); + assert(vi1 >= 0 && vi1 < 16); + + hist[vi0]++; + hist[vi1]++; + + pp[l/2] = vi0 | (vi1 << 4); + } + + memcpy(pb, pp, pp_size); + pb += bs; + } + } + } + + return (n/k)*row_size; +} + +size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t * hist) { + const int nb = k / qk; + const size_t bs = (2*sizeof(float) + sizeof(uint8_t)*qk/2); + const size_t row_size = nb*bs; + + assert(k % qk == 0); + + const size_t pp_size = qk / 2; + uint8_t * pp = (uint8_t *) alloca(pp_size); + + char * pdst = (char *) dst; + + for (int j = 0; j < n; j += k) { + uint8_t * pd = (uint8_t *) (pdst + (j/k)*row_size + 0*bs); + uint8_t * pm = (uint8_t *) (pdst + (j/k)*row_size + 0*bs + sizeof(float)); + uint8_t * pb = (uint8_t *) (pdst + (j/k)*row_size + 0*bs + 2*sizeof(float)); + + //printf("n = %d, k = %d, nb = %d, row_size = %d, j = %d, pm = %p, pd = %p, pb = %p\n", n, k, nb, row_size, j, pm, pd, pb); + + for (int i = 0; i < nb; i++) { + float min = FLT_MAX; + float max = -FLT_MAX; + + { + for (int l = 0; l < qk; l++) { + const float v = src[j + i*qk + l]; + if (v < min) min = v; + if (v > max) max = v; + } + + const float d = (max - min) / ((1 << 4) - 1); + const float id = d ? 1.0f/d : 0.0f; + + *(float *) pd = d; + *(float *) pm = min; + pd += bs; + pm += bs; + + for (int l = 0; l < qk; l += 2) { + const float v0 = (src[j + i*qk + l + 0] - min)*id; + const float v1 = (src[j + i*qk + l + 1] - min)*id; + + const uint8_t vi0 = round(v0); + const uint8_t vi1 = round(v1); + + assert(vi0 >= 0 && vi0 < 16); + assert(vi1 >= 0 && vi1 < 16); + + hist[vi0]++; + hist[vi1]++; + + pp[l/2] = vi0 | (vi1 << 4); + } + + memcpy(pb, pp, pp_size); + pb += bs; + } + } + } + + return (n/k)*row_size; +} + +//////////////////////////////////////////////////////////////////////////////// + int ggml_cpu_has_avx(void) { #if defined(__AVX__) return 1; diff --git a/ggml.h b/ggml.h index bac4fe65c..48b6cc028 100644 --- a/ggml.h +++ b/ggml.h @@ -741,6 +741,13 @@ enum ggml_opt_result ggml_opt( struct ggml_opt_params params, struct ggml_tensor * f); +// +// quantization +// + +size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t * hist); +size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t * hist); + // // system info // diff --git a/llama.cpp b/llama.cpp new file mode 100644 index 000000000..08dfcb31f --- /dev/null +++ b/llama.cpp @@ -0,0 +1,1565 @@ +#include "llama.h" + +#include "ggml.h" + +#include +#include +#include +#include +#include +#include +#include + +// determine number of model parts based on the dimension +static const std::unordered_map LLAMA_N_PARTS = { + { 4096, 1 }, + { 5120, 2 }, + { 6656, 4 }, + { 8192, 8 }, +}; + +// default hparams (LLaMA 7B) +struct llama_hparams { + int32_t n_vocab = 32000; + int32_t n_ctx = 512; // this is provided as user input? + int32_t n_embd = 4096; + int32_t n_mult = 256; + int32_t n_head = 32; + int32_t n_layer = 32; + int32_t n_rot = 64; + int32_t f16 = 1; +}; + +struct llama_layer { + // normalization + struct ggml_tensor * attention_norm; + + // attention + struct ggml_tensor * wq; + struct ggml_tensor * wk; + struct ggml_tensor * wv; + struct ggml_tensor * wo; + + // normalization + struct ggml_tensor * ffn_norm; + + // ff + struct ggml_tensor * w1; + struct ggml_tensor * w2; + struct ggml_tensor * w3; +}; + +struct llama_model { + llama_hparams hparams; + + struct ggml_tensor * tok_embeddings; + + struct ggml_tensor * norm; + struct ggml_tensor * output; + + std::vector layers; + + // key + value memory + struct ggml_tensor * memory_k; + struct ggml_tensor * memory_v; + + // + struct ggml_context * ctx; + std::unordered_map tensors; +}; + +struct llama_vocab { + using id = int32_t; + using token = std::string; + + struct token_score { + token tok; + float score; + }; + + std::unordered_map token_to_id; + std::vector id_to_token; +}; + +struct llama_context { + std::mt19937 rng; + + int64_t t_load_us = 0; + int64_t t_start_us = 0; + + int64_t t_sample_us = 0; + int64_t t_eval_us = 0; + + int32_t n_sample = 0; // number of tokens sampled + int32_t n_eval = 0; // number of eval calls + + llama_model model; + llama_vocab vocab; + + size_t mem_per_token = 0; + + // decode output (2-dimensional array: [n_tokens][n_vocab]) + std::vector logits; + bool logits_all = false; +}; + +struct llama_context_params llama_context_default_params() { + struct llama_context_params result = { + /*.n_ctx =*/ 512, + /*.n_parts =*/ -1, + /*.seed =*/ 0, + /*.f16_kv =*/ false, + /*.logits_all =*/ false, + /*.vocab_only =*/ false, + }; + + return result; +} + +// +// model loading +// + +static bool llama_model_load( + const std::string & fname, + llama_context & lctx, + int n_ctx, + int n_parts, + ggml_type memory_type, + bool vocab_only) { + fprintf(stderr, "%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str()); + + const int64_t t_start_us = ggml_time_us(); + + lctx.t_start_us = t_start_us; + + std::vector f_buf(1024*1024); + + auto & model = lctx.model; + auto & vocab = lctx.vocab; + + auto fin = std::ifstream(fname, std::ios::binary); + fin.rdbuf()->pubsetbuf(f_buf.data(), f_buf.size()); + if (!fin) { + fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str()); + return false; + } + + // verify magic + { + uint32_t magic; + fin.read((char *) &magic, sizeof(magic)); + if (magic == LLAMA_FILE_MAGIC_UNVERSIONED) { + fprintf(stderr, "%s: invalid model file '%s' (too old, regenerate your model files!)\n", + __func__, fname.c_str()); + return false; + } + if (magic != LLAMA_FILE_MAGIC) { + fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str()); + return false; + } + + uint32_t format_version; + fin.read((char *) &format_version, sizeof(format_version)); + + if (format_version != LLAMA_FILE_VERSION) { + fprintf(stderr, "%s: invalid model file '%s' (unsupported format version %" PRIu32 ", expected %d)\n", + __func__, fname.c_str(), format_version, LLAMA_FILE_VERSION); + return false; + } + } + + int n_ff = 0; + + // load hparams + { + auto & hparams = model.hparams; + + fin.read((char *) &hparams.n_vocab, sizeof(hparams.n_vocab)); + //fin.read((char *) &hparams.n_ctx, sizeof(hparams.n_ctx)); + fin.read((char *) &hparams.n_embd, sizeof(hparams.n_embd)); + fin.read((char *) &hparams.n_mult, sizeof(hparams.n_mult)); + fin.read((char *) &hparams.n_head, sizeof(hparams.n_head)); + fin.read((char *) &hparams.n_layer, sizeof(hparams.n_layer)); + fin.read((char *) &hparams.n_rot, sizeof(hparams.n_rot)); + fin.read((char *) &hparams.f16, sizeof(hparams.f16)); + + hparams.n_ctx = n_ctx; + + n_ff = ((2*(4*hparams.n_embd)/3 + hparams.n_mult - 1)/hparams.n_mult)*hparams.n_mult; + + if (n_parts < 1) { + n_parts = LLAMA_N_PARTS.at(hparams.n_embd); + } + + // temp warning to tell the user to use "--n_parts" + if (hparams.f16 == 4 && n_parts != 1) { + fprintf(stderr, "%s: GPTQ model detected - are you sure n_parts should be %d? we normally expect it to be 1\n", __func__, n_parts); + fprintf(stderr, "%s: use '--n_parts 1' if necessary\n", __func__); + } + + fprintf(stderr, "%s: n_vocab = %d\n", __func__, hparams.n_vocab); + fprintf(stderr, "%s: n_ctx = %d\n", __func__, hparams.n_ctx); + fprintf(stderr, "%s: n_embd = %d\n", __func__, hparams.n_embd); + fprintf(stderr, "%s: n_mult = %d\n", __func__, hparams.n_mult); + fprintf(stderr, "%s: n_head = %d\n", __func__, hparams.n_head); + fprintf(stderr, "%s: n_layer = %d\n", __func__, hparams.n_layer); + fprintf(stderr, "%s: n_rot = %d\n", __func__, hparams.n_rot); + fprintf(stderr, "%s: f16 = %d\n", __func__, hparams.f16); + fprintf(stderr, "%s: n_ff = %d\n", __func__, n_ff); + fprintf(stderr, "%s: n_parts = %d\n", __func__, n_parts); + } + + // load vocab + { + std::string word; + vocab.id_to_token.resize(model.hparams.n_vocab); + std::vector tmp(64); + + for (int i = 0; i < model.hparams.n_vocab; i++) { + uint32_t len; + fin.read((char *) &len, sizeof(len)); + + word.resize(len); + if (len > 0) { + tmp.resize(len); + fin.read(tmp.data(), len); + word.assign(tmp.data(), len); + } else { + word.clear(); + } + + float score; + fin.read((char *) &score, sizeof(score)); + + vocab.token_to_id[word] = i; + + auto &tok_score = vocab.id_to_token[i]; + tok_score.tok = word; + tok_score.score = score; + } + } + + if (vocab_only) { + return true; + } + + // for the big tensors, we have the option to store the data in 16-bit floats or quantized + // in order to save memory and also to speed up the computation + // wtype is for per-layer weights, while vtype is for other weights + ggml_type wtype, vtype; + switch (model.hparams.f16) { + case 0: wtype = vtype = GGML_TYPE_F32; break; + case 1: wtype = vtype = GGML_TYPE_F16; break; + case 2: wtype = vtype = GGML_TYPE_Q4_0; break; + case 3: wtype = vtype = GGML_TYPE_Q4_1; break; + case 4: wtype = GGML_TYPE_Q4_1; vtype = GGML_TYPE_F16; break; + default: + { + fprintf(stderr, "%s: invalid model file '%s' (bad f16 value %d)\n", + __func__, fname.c_str(), model.hparams.f16); + return false; + } + } + + auto & ctx = model.ctx; + + size_t ctx_size = 0; + + { + const auto & hparams = model.hparams; + + const int n_embd = hparams.n_embd; + const int n_layer = hparams.n_layer; + const int n_ctx = hparams.n_ctx; + const int n_vocab = hparams.n_vocab; + + ctx_size += n_embd*n_vocab*ggml_type_sizef(vtype); // tok_embeddings + + ctx_size += n_embd*ggml_type_sizef(GGML_TYPE_F32); // norm + + ctx_size += n_embd*n_vocab*ggml_type_sizef(vtype); // output + + ctx_size += n_layer*(n_embd*ggml_type_sizef(GGML_TYPE_F32)); // attention_norm + + ctx_size += n_layer*(n_embd*n_embd*ggml_type_sizef(wtype)); // wq + ctx_size += n_layer*(n_embd*n_embd*ggml_type_sizef(wtype)); // wk + ctx_size += n_layer*(n_embd*n_embd*ggml_type_sizef(wtype)); // wv + ctx_size += n_layer*(n_embd*n_embd*ggml_type_sizef(wtype)); // wo + + ctx_size += n_layer*(n_embd*ggml_type_sizef(GGML_TYPE_F32)); // ffn_norm + + ctx_size += n_layer*(n_ff*n_embd*ggml_type_sizef(wtype)); // w1 + ctx_size += n_layer*(n_ff*n_embd*ggml_type_sizef(wtype)); // w2 + ctx_size += n_layer*(n_ff*n_embd*ggml_type_sizef(wtype)); // w3 + + ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(memory_type); // memory_k + ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(memory_type); // memory_v + + ctx_size += (5 + 10*n_layer)*256; // object overhead + + fprintf(stderr, "%s: ggml ctx size = %6.2f MB\n", __func__, ctx_size/(1024.0*1024.0)); + } + + // create the ggml context + { + struct ggml_init_params params = { + /*.mem_size =*/ ctx_size, + /*.mem_buffer =*/ NULL, + }; + + model.ctx = ggml_init(params); + if (!model.ctx) { + fprintf(stderr, "%s: ggml_init() failed\n", __func__); + return false; + } + } + + // prepare memory for the weights + { + const auto & hparams = model.hparams; + + const int n_embd = hparams.n_embd; + const int n_layer = hparams.n_layer; + const int n_vocab = hparams.n_vocab; + + model.layers.resize(n_layer); + + model.tok_embeddings = ggml_new_tensor_2d(ctx, vtype, n_embd, n_vocab); + + model.norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + model.output = ggml_new_tensor_2d(ctx, vtype, n_embd, n_vocab); + + // map by name + model.tensors["tok_embeddings.weight"] = model.tok_embeddings; + + model.tensors["norm.weight"] = model.norm; + model.tensors["output.weight"] = model.output; + + for (int i = 0; i < n_layer; ++i) { + auto & layer = model.layers[i]; + + layer.attention_norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + + layer.wq = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd); + layer.wk = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd); + layer.wv = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd); + layer.wo = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd); + + layer.ffn_norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); + + layer.w1 = ggml_new_tensor_2d(ctx, wtype, n_embd, n_ff); + layer.w2 = ggml_new_tensor_2d(ctx, wtype, n_ff, n_embd); + layer.w3 = ggml_new_tensor_2d(ctx, wtype, n_embd, n_ff); + + // map by name + model.tensors["layers." + std::to_string(i) + ".attention_norm.weight"] = layer.attention_norm; + + model.tensors["layers." + std::to_string(i) + ".attention.wq.weight"] = layer.wq; + model.tensors["layers." + std::to_string(i) + ".attention.wk.weight"] = layer.wk; + model.tensors["layers." + std::to_string(i) + ".attention.wv.weight"] = layer.wv; + model.tensors["layers." + std::to_string(i) + ".attention.wo.weight"] = layer.wo; + + model.tensors["layers." + std::to_string(i) + ".ffn_norm.weight"] = layer.ffn_norm; + + model.tensors["layers." + std::to_string(i) + ".feed_forward.w1.weight"] = layer.w1; + model.tensors["layers." + std::to_string(i) + ".feed_forward.w2.weight"] = layer.w2; + model.tensors["layers." + std::to_string(i) + ".feed_forward.w3.weight"] = layer.w3; + } + } + + // key + value memory + { + const auto & hparams = model.hparams; + + const int n_embd = hparams.n_embd; + const int n_layer = hparams.n_layer; + const int n_ctx = hparams.n_ctx; + + const int n_mem = n_layer*n_ctx; + const int n_elements = n_embd*n_mem; + + model.memory_k = ggml_new_tensor_1d(ctx, memory_type, n_elements); + model.memory_v = ggml_new_tensor_1d(ctx, memory_type, n_elements); + + const size_t memory_size = ggml_nbytes(model.memory_k) + ggml_nbytes(model.memory_v); + + fprintf(stderr, "%s: memory_size = %8.2f MB, n_mem = %d\n", __func__, memory_size/1024.0/1024.0, n_mem); + } + + const size_t file_offset = fin.tellg(); + + fin.close(); + + std::vector tmp; + + for (int i = 0; i < n_parts; ++i) { + const int part_id = i; + //const int part_id = n_parts - i - 1; + + std::string fname_part = fname; + if (i > 0) { + fname_part += "." + std::to_string(i); + } + + fprintf(stderr, "%s: loading model part %d/%d from '%s'\n", __func__, i+1, n_parts, fname_part.c_str()); + + fin = std::ifstream(fname_part, std::ios::binary); + fin.rdbuf()->pubsetbuf(f_buf.data(), f_buf.size()); + fin.seekg(file_offset); + + // load weights + { + int n_tensors = 0; + size_t total_size = 0; + + fprintf(stderr, "%s: ", __func__); + + while (true) { + int32_t n_dims; + int32_t length; + int32_t ftype; + + fin.read(reinterpret_cast(&n_dims), sizeof(n_dims)); + fin.read(reinterpret_cast(&length), sizeof(length)); + fin.read(reinterpret_cast(&ftype), sizeof(ftype)); + + if (fin.eof()) { + break; + } + + int32_t nelements = 1; + int32_t ne[2] = { 1, 1 }; + for (int i = 0; i < n_dims; ++i) { + fin.read(reinterpret_cast(&ne[i]), sizeof(ne[i])); + nelements *= ne[i]; + } + + std::string name(length, 0); + fin.read(&name[0], length); + + if (model.tensors.find(name.data()) == model.tensors.end()) { + fprintf(stderr, "%s: unknown tensor '%s' in model file\n", __func__, name.data()); + return false; + } + + // split_type = 0: split by columns + // split_type = 1: split by rows + int split_type = 0; + + // split_type = 0: + // regex: + // - tok_embeddings.* + // - layers.*.attention.wo.weight + // - layers.*.feed_forward.w2.weight + + // split_type = 1: + // regex: + // - output.* + // - layers.*.attention.wq.weight + // - layers.*.attention.wk.weight + // - layers.*.attention.wv.weight + // - layers.*.feed_forward.w1.weight + // - layers.*.feed_forward.w3.weight + if (name.find("tok_embeddings") != std::string::npos) { + split_type = 0; + } else if (name.find("layers") != std::string::npos) { + if (name.find("attention.wo.weight") != std::string::npos) { + split_type = 0; + } else if (name.find("feed_forward.w2.weight") != std::string::npos) { + split_type = 0; + } else { + split_type = 1; + } + } else if (name.find("output") != std::string::npos) { + split_type = 1; + } + + auto tensor = model.tensors[name.data()]; + + if (n_dims == 1) { + if (ggml_nelements(tensor) != nelements) { + fprintf(stderr, "%s: tensor '%s' has wrong size in model file\n", __func__, name.data()); + return false; + } + } else { + if (ggml_nelements(tensor)/n_parts != nelements) { + fprintf(stderr, "%s: tensor '%s' has wrong size in model file\n", __func__, name.data()); + return false; + } + } + + if (n_dims == 1) { + if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) { + fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d], expected [%d, %d]\n", + __func__, name.data(), tensor->ne[0], tensor->ne[1], ne[0], ne[1]); + return false; + } + } else { + if (split_type == 0) { + if (tensor->ne[0]/n_parts != ne[0] || tensor->ne[1] != ne[1]) { + fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d], expected [%d, %d]\n", + __func__, name.data(), tensor->ne[0]/n_parts, tensor->ne[1], ne[0], ne[1]); + return false; + } + } else { + if (tensor->ne[0] != ne[0] || tensor->ne[1]/n_parts != ne[1]) { + fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d], expected [%d, %d]\n", + __func__, name.data(), tensor->ne[0], tensor->ne[1]/n_parts, ne[0], ne[1]); + return false; + } + } + } + + if (0) { + static const char * ftype_str[] = { "f32", "f16", "q4_0", "q4_1", }; + fprintf(stderr, "%24s - [%5d, %5d], type = %6s, split = %d\n", name.data(), ne[0], ne[1], ftype_str[ftype], split_type); + } + + size_t bpe = 0; + + switch (ftype) { + case 0: bpe = ggml_type_size(GGML_TYPE_F32); break; + case 1: bpe = ggml_type_size(GGML_TYPE_F16); break; + case 2: bpe = ggml_type_size(GGML_TYPE_Q4_0); assert(ne[0] % 64 == 0); break; + case 3: bpe = ggml_type_size(GGML_TYPE_Q4_1); assert(ne[0] % 64 == 0); break; + default: + { + fprintf(stderr, "%s: unknown ftype %d in model file\n", __func__, ftype); + return false; + } + }; + + if (n_dims == 1 || n_parts == 1) { + if ((nelements*bpe)/ggml_blck_size(tensor->type) != ggml_nbytes(tensor)) { + fprintf(stderr, "%s: tensor '%s' has wrong size in model file: got %zu, expected %zu\n", + __func__, name.data(), ggml_nbytes(tensor), nelements*bpe); + return false; + } + + if (part_id == 0) { + fin.read(reinterpret_cast(tensor->data), ggml_nbytes(tensor)); + } else { + fin.seekg(ggml_nbytes(tensor), std::ios::cur); + } + + total_size += ggml_nbytes(tensor); + } else { + if ((nelements*bpe)/ggml_blck_size(tensor->type) != ggml_nbytes(tensor)/n_parts) { + fprintf(stderr, "%s: tensor '%s' has wrong size in model file: got %zu, expected %zu\n", + __func__, name.data(), ggml_nbytes(tensor)/n_parts, nelements*bpe); + return false; + } + + if (split_type == 0) { + const int np0 = ne[0]; + + const size_t row_size = (tensor->ne[0]/ggml_blck_size(tensor->type))*ggml_type_size(tensor->type); + assert(row_size == tensor->nb[1]); + + for (int i1 = 0; i1 < ne[1]; ++i1) { + const size_t offset_row = i1*row_size; + const size_t offset = offset_row + ((part_id*np0)/ggml_blck_size(tensor->type))*ggml_type_size(tensor->type); + fin.read(reinterpret_cast(tensor->data) + offset, row_size/n_parts); + } + } else { + const int np1 = ne[1]; + + const size_t row_size = (tensor->ne[0]/ggml_blck_size(tensor->type))*ggml_type_size(tensor->type); + + for (int i1 = 0; i1 < ne[1]; ++i1) { + const size_t offset_row = (i1 + part_id*np1)*row_size; + fin.read(reinterpret_cast(tensor->data) + offset_row, row_size); + } + } + + total_size += ggml_nbytes(tensor)/n_parts; + } + + //fprintf(stderr, "%42s - [%5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0); + if (++n_tensors % 8 == 0) { + fprintf(stderr, "."); + fflush(stderr); + } + } + + fprintf(stderr, " done\n"); + + fprintf(stderr, "%s: model size = %8.2f MB / num tensors = %d\n", __func__, total_size/1024.0/1024.0, n_tensors); + } + + fin.close(); + } + + lctx.logits.reserve(lctx.model.hparams.n_ctx); + + lctx.t_load_us = ggml_time_us() - t_start_us; + + return true; +} + +// evaluate the transformer +// +// - lctx: llama context +// - tokens: new batch of tokens to process +// - n_past: the context size so far +// - n_threads: number of threads to use +// +static bool llama_eval_internal( + llama_context & lctx, + const llama_token * tokens, + const int n_tokens, + const int n_past, + const int n_threads) { + const int64_t t_start_us = ggml_time_us(); + + const int N = n_tokens; + + const auto & model = lctx.model; + const auto & hparams = model.hparams; + + const int n_embd = hparams.n_embd; + const int n_layer = hparams.n_layer; + const int n_ctx = hparams.n_ctx; + const int n_head = hparams.n_head; + const int n_vocab = hparams.n_vocab; + const int n_rot = hparams.n_embd/hparams.n_head; + + auto & mem_per_token = lctx.mem_per_token; + + // TODO: fix this hardcoded size + static size_t buf_size = 512u*1024*1024; + static void * buf = malloc(buf_size); + + if (mem_per_token > 0 && mem_per_token*N > buf_size) { + const size_t buf_size_new = 1.3*(mem_per_token*N); // add 30% to account for ggml object overhead + //fprintf(stderr, "\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_size, buf_size_new); + + // reallocate + buf_size = buf_size_new; + buf = realloc(buf, buf_size); + if (buf == nullptr) { + fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size); + return false; + } + } + + struct ggml_init_params params = { + /*.mem_size =*/ buf_size, + /*.mem_buffer =*/ buf, + }; + + struct ggml_context * ctx0 = ggml_init(params); + ggml_cgraph gf = {}; + gf.n_threads = n_threads; + + struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N); + memcpy(embd->data, tokens, N*ggml_element_size(embd)); + + struct ggml_tensor * inpL = ggml_get_rows(ctx0, model.tok_embeddings, embd); + + for (int il = 0; il < n_layer; ++il) { + struct ggml_tensor * inpSA = inpL; + + struct ggml_tensor * cur; + + // norm + { + cur = ggml_rms_norm(ctx0, inpL); + + // cur = attention_norm*cur + cur = ggml_mul(ctx0, + ggml_repeat(ctx0, model.layers[il].attention_norm, cur), + cur); + } + + // self-attention + { + struct ggml_tensor * Qcur = ggml_mul_mat(ctx0, model.layers[il].wq, cur); + struct ggml_tensor * Kcur = ggml_mul_mat(ctx0, model.layers[il].wk, cur); + struct ggml_tensor * Vcur = ggml_mul_mat(ctx0, model.layers[il].wv, cur); + + // store key and value to memory + if (N >= 1) { + struct ggml_tensor * k = ggml_view_1d(ctx0, model.memory_k, N*n_embd, (ggml_element_size(model.memory_k)*n_embd)*(il*n_ctx + n_past)); + struct ggml_tensor * v = ggml_view_1d(ctx0, model.memory_v, N*n_embd, (ggml_element_size(model.memory_v)*n_embd)*(il*n_ctx + n_past)); + + ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k)); + ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v)); + } + + // Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, 3) + struct ggml_tensor * Q = + ggml_permute(ctx0, + ggml_rope(ctx0, + ggml_cpy(ctx0, + Qcur, + ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_embd/n_head, n_head, N)), + n_past, n_rot, 0), + 0, 2, 1, 3); + + // K = Kmem.view(n_embd/n_head, n_head, n_past + N).permute(0, 2, 1, 3) + struct ggml_tensor * K = + ggml_permute(ctx0, + ggml_rope(ctx0, + ggml_reshape_3d(ctx0, + ggml_view_1d(ctx0, model.memory_k, (n_past + N)*n_embd, il*n_ctx*ggml_element_size(model.memory_k)*n_embd), + n_embd/n_head, n_head, n_past + N), + n_past, n_rot, 1), + 0, 2, 1, 3); + + // K * Q + struct ggml_tensor * KQ = ggml_mul_mat(ctx0, K, Q); + + // KQ_scaled = KQ / sqrt(n_embd/n_head) + struct ggml_tensor * KQ_scaled = + ggml_scale(ctx0, + KQ, + ggml_new_f32(ctx0, 1.0f/sqrt(float(n_embd)/n_head)) + ); + + // KQ_masked = mask_past(KQ_scaled) + struct ggml_tensor * KQ_masked = ggml_diag_mask_inf(ctx0, KQ_scaled, n_past); + + // KQ = soft_max(KQ_masked) + struct ggml_tensor * KQ_soft_max = ggml_soft_max(ctx0, KQ_masked); + + // V_trans = Vmem.view(n_embd/n_head, n_head, n_past + N).permute(1, 2, 0, 3).contiguous() + struct ggml_tensor * V_trans = + ggml_permute(ctx0, + ggml_reshape_3d(ctx0, + ggml_view_1d(ctx0, model.memory_v, (n_past + N)*n_embd, il*n_ctx*ggml_element_size(model.memory_v)*n_embd), + n_embd/n_head, n_head, n_past + N), + 1, 2, 0, 3); + + // KQV = transpose(V) * KQ_soft_max + struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V_trans, KQ_soft_max); + + // KQV_merged = KQV.permute(0, 2, 1, 3) + struct ggml_tensor * KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3); + + // cur = KQV_merged.contiguous().view(n_embd, N) + cur = ggml_cpy(ctx0, + KQV_merged, + ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N)); + + // projection (no bias) + cur = ggml_mul_mat(ctx0, + model.layers[il].wo, + cur); + } + + struct ggml_tensor * inpFF = ggml_add(ctx0, cur, inpSA); + + // feed-forward network + { + // norm + { + cur = ggml_rms_norm(ctx0, inpFF); + + // cur = ffn_norm*cur + cur = ggml_mul(ctx0, + ggml_repeat(ctx0, model.layers[il].ffn_norm, cur), + cur); + } + + struct ggml_tensor * tmp = ggml_mul_mat(ctx0, + model.layers[il].w3, + cur); + + + cur = ggml_mul_mat(ctx0, + model.layers[il].w1, + cur); + + // SILU activation + cur = ggml_silu(ctx0, cur); + + cur = ggml_mul(ctx0, cur, tmp); + + cur = ggml_mul_mat(ctx0, + model.layers[il].w2, + cur); + } + + cur = ggml_add(ctx0, cur, inpFF); + + // input for next layer + inpL = cur; + } + + // norm + { + inpL = ggml_rms_norm(ctx0, inpL); + + // inpL = norm*inpL + inpL = ggml_mul(ctx0, + ggml_repeat(ctx0, model.norm, inpL), + inpL); + } + + // lm_head + { + inpL = ggml_mul_mat(ctx0, model.output, inpL); + } + + // logits -> probs + //inpL = ggml_soft_max(ctx0, inpL); + + // run the computation + ggml_build_forward_expand(&gf, inpL); + ggml_graph_compute (ctx0, &gf); + + //if (n_past%100 == 0) { + // ggml_graph_print (&gf); + // ggml_graph_dump_dot(&gf, NULL, "gpt-2.dot"); + //} + + //embd_w.resize(n_vocab*N); + //memcpy(embd_w.data(), ggml_get_data(inpL), sizeof(float)*n_vocab*N); + + auto & logits_out = lctx.logits; + + if (lctx.logits_all) { + logits_out.resize(n_vocab * N); + memcpy(logits_out.data(), (float *) ggml_get_data(inpL), sizeof(float)*n_vocab*N); + } else { + // return result for just the last token + logits_out.resize(n_vocab); + memcpy(logits_out.data(), (float *) ggml_get_data(inpL) + (n_vocab*(N-1)), sizeof(float)*n_vocab); + } + + if (mem_per_token == 0) { + mem_per_token = ggml_used_mem(ctx0)/N; + } + //fprintf(stderr, "used_mem = %zu\n", ggml_used_mem(ctx0)); + + ggml_free(ctx0); + + // measure the performance only for the single-token evals + if (N == 1) { + lctx.t_eval_us += ggml_time_us() - t_start_us; + lctx.n_eval++; + } + + return true; +} + +// +// tokenizer +// + +static size_t utf8_len(char src) { + const size_t lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 }; + uint8_t highbits = static_cast(src) >> 4; + return lookup[highbits]; +} + +struct llama_sp_symbol { + using index = int; + index prev; + index next; + const char * text; + size_t n; +}; + +struct llama_sp_bigram { + struct comparator { + bool operator()(llama_sp_bigram & l, llama_sp_bigram & r) { + return (l.score < r.score) || (l.score == r.score && l.left > r.left); + } + }; + using queue_storage = std::vector; + using queue = std::priority_queue; + llama_sp_symbol::index left; + llama_sp_symbol::index right; + float score; + size_t size; +}; + +// original implementation: +// https://github.com/ggerganov/llama.cpp/commit/074bea2eb1f1349a0118239c4152914aecaa1be4 +struct llama_tokenizer { + llama_tokenizer(const llama_vocab & vocab): vocab_(vocab) {} + + void tokenize(const std::string & text, std::vector & output) { + // split string into utf8 chars + int index = 0; + size_t offs = 0; + while (offs < text.size()) { + llama_sp_symbol sym; + size_t char_len = std::min(text.size() - offs, utf8_len(text[offs])); + sym.text = text.c_str() + offs; + sym.n = char_len; + offs += char_len; + sym.prev = index - 1; + sym.next = offs == text.size() ? -1 : index + 1; + index++; + symbols_.emplace_back(std::move(sym)); + } + + // seed the work queue with all possible 2-character tokens. + for (size_t i = 1; i < symbols_.size(); ++i) { + try_add_bigram(i - 1, i); + } + + // keep substituting the highest frequency pairs for as long as we can. + while (!work_queue_.empty()) { + auto bigram = work_queue_.top(); + work_queue_.pop(); + + auto & left_sym = symbols_[bigram.left]; + auto & right_sym = symbols_[bigram.right]; + + // if one of the symbols already got merged, skip it. + if (left_sym.n == 0 || right_sym.n == 0 || + left_sym.n + right_sym.n != bigram.size) { + continue; + } + + // merge the right sym into the left one + left_sym.n += right_sym.n; + right_sym.n = 0; + + //printf("left = '%*s' size = %zu\n", (int) left_sym.n, left_sym.text, bigram.size); + + // remove the right sym from the chain + left_sym.next = right_sym.next; + if (right_sym.next >= 0) { + symbols_[right_sym.next].prev = bigram.left; + } + + // find more substitutions + try_add_bigram(left_sym.prev, bigram.left); + try_add_bigram(bigram.left, left_sym.next); + } + + for (int i = 0; i != -1; i = symbols_[i].next) { + auto & symbol = symbols_[i]; + auto token = vocab_.token_to_id.find(std::string(symbol.text, symbol.n)); + + if (token == vocab_.token_to_id.end()) { + // output any symbols that did not form tokens as bytes. + for (int j = 0; j < (int) symbol.n; ++j) { + llama_vocab::id token_id = static_cast(symbol.text[j]) + 3; + output.push_back(token_id); + } + } else { + output.push_back((*token).second); + } + } + } + +private: + void try_add_bigram(int left, int right) { + if (left == -1 || right == -1) { + return; + } + + const std::string text = std::string(symbols_[left].text, symbols_[left].n + symbols_[right].n); + auto token = vocab_.token_to_id.find(text); + + if (token == vocab_.token_to_id.end()) { + return; + } + + if (static_cast((*token).second) >= vocab_.id_to_token.size()) { + return; + } + + const auto &tok_score = vocab_.id_to_token[(*token).second]; + + llama_sp_bigram bigram; + bigram.left = left; + bigram.right = right; + bigram.score = tok_score.score; + bigram.size = text.size(); + work_queue_.push(bigram); + } + + const llama_vocab & vocab_; + std::vector symbols_; + llama_sp_bigram::queue work_queue_; +}; + +static std::vector llama_tokenize(const llama_vocab & vocab, const std::string & text, bool bos) { + llama_tokenizer tokenizer(vocab); + std::vector output; + + if (text.size() == 0) { + return output; + } + + if (bos) { + output.push_back(1); + } + + tokenizer.tokenize(text, output); + return output; +} + +// +// sampling +// + +static void sample_top_k(std::vector> & logits_id, int top_k) { + // find the top k tokens + std::partial_sort( + logits_id.begin(), + logits_id.begin() + top_k, logits_id.end(), + [](const std::pair & a, const std::pair & b) { + return a.first > b.first; + }); + + logits_id.resize(top_k); +} + +static llama_vocab::id llama_sample_top_p_top_k( + llama_context & lctx, + const std::vector & last_n_tokens, + int top_k, + double top_p, + double temp, + double repeat_penalty) { + auto & rng = lctx.rng; + + const auto & vocab = lctx.vocab; + const auto & logits = lctx.logits; + + int n_logits = vocab.id_to_token.size(); + + std::vector> logits_id; + logits_id.reserve(n_logits); + + { + const double scale = 1.0/temp; + for (int i = 0; i < n_logits; ++i) { + // repetition penalty from ctrl paper (https://arxiv.org/abs/1909.05858) + // credit https://github.com/facebookresearch/llama/compare/main...shawwn:llama:main + if (std::find(last_n_tokens.begin(), last_n_tokens.end(), i) != last_n_tokens.end()) { + // if score < 0 then repetition penalty has to multiplied to reduce the previous token probability + if (logits[i] < 0.0) { + logits_id.push_back(std::make_pair(logits[i]*scale*repeat_penalty, i)); + } else { + logits_id.push_back(std::make_pair(logits[i]*scale/repeat_penalty, i)); + } + } else { + logits_id.push_back(std::make_pair(logits[i]*scale, i)); + } + } + } + + sample_top_k(logits_id, top_k); + + double maxl = -std::numeric_limits::infinity(); + for (const auto & kv : logits_id) { + maxl = std::max(maxl, kv.first); + } + + // compute probs for the top k tokens + std::vector probs; + probs.reserve(logits_id.size()); + + double sum = 0.0; + for (const auto & kv : logits_id) { + double p = exp(kv.first - maxl); + probs.push_back(p); + sum += p; + } + + // normalize the probs + for (auto & p : probs) { + p /= sum; + } + + if (top_p < 1.0f) { + double cumsum = 0.0f; + for (int i = 0; i < (int) probs.size(); i++) { + cumsum += probs[i]; + if (cumsum >= top_p) { + probs.resize(i + 1); + logits_id.resize(i + 1); + break; + } + } + + cumsum = 1.0/cumsum; + for (int i = 0; i < (int) probs.size(); i++) { + probs[i] *= cumsum; + } + } + + //printf("\n"); + //for (int i = 0; i < (int) 10; i++) { + // printf("%d: '%s' %f\n", i, vocab.id_to_token.at(logits_id[i].second).c_str(), probs[i]); + //} + //printf("\n\n"); + //exit(0); + + std::discrete_distribution<> dist(probs.begin(), probs.end()); + int idx = dist(rng); + + return logits_id[idx].second; +} + +// +// quantization +// + +// TODO: reuse code from the llama_model_load() somehow +bool llama_model_quantize_internal(const std::string & fname_inp, const std::string & fname_out, int itype, int qk) { + ggml_type type = GGML_TYPE_Q4_1; + + switch (itype) { + case 2: type = GGML_TYPE_Q4_0; break; + case 3: type = GGML_TYPE_Q4_1; break; + default: fprintf(stderr, "%s: invalid quantization type %d\n", __func__, itype); return 1; + }; + + if (type != GGML_TYPE_Q4_0 && type != GGML_TYPE_Q4_1) { + fprintf(stderr, "%s: invalid quantization type %d\n", __func__, type); + return false; + } + + llama_vocab vocab; + + printf("%s: loading model from '%s'\n", __func__, fname_inp.c_str()); + + auto finp = std::ifstream(fname_inp, std::ios::binary); + if (!finp) { + fprintf(stderr, "%s: failed to open '%s' for reading\n", __func__, fname_inp.c_str()); + return false; + } + + auto fout = std::ofstream(fname_out, std::ios::binary); + if (!fout) { + fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname_out.c_str()); + return false; + } + + // verify magic + { + uint32_t magic; + finp.read((char *) &magic, sizeof(magic)); + if (magic == LLAMA_FILE_MAGIC_UNVERSIONED) { + fprintf(stderr, "%s: invalid model file '%s' (too old, regenerate your model files!)\n", + __func__, fname_inp.c_str()); + return false; + } + if (magic != LLAMA_FILE_MAGIC) { + fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname_inp.c_str()); + return false; + } + + fout.write((char *) &magic, sizeof(magic)); + + uint32_t format_version; + finp.read((char *) &format_version, sizeof(format_version)); + + if (format_version != LLAMA_FILE_VERSION) { + fprintf(stderr, "%s: invalid model file '%s' (unsupported format version %" PRIu32 ", expected %d)\n", + __func__, fname_inp.c_str(), format_version, LLAMA_FILE_VERSION); + return false; + } + + fout.write((char *) &format_version, sizeof(format_version)); + } + + llama_hparams hparams; + + // load hparams + { + finp.read((char *) &hparams.n_vocab, sizeof(hparams.n_vocab)); + //finp.read((char *) &hparams.n_ctx, sizeof(hparams.n_ctx)); + finp.read((char *) &hparams.n_embd, sizeof(hparams.n_embd)); + finp.read((char *) &hparams.n_mult, sizeof(hparams.n_mult)); + finp.read((char *) &hparams.n_head, sizeof(hparams.n_head)); + finp.read((char *) &hparams.n_layer, sizeof(hparams.n_layer)); + finp.read((char *) &hparams.n_rot, sizeof(hparams.n_rot)); + finp.read((char *) &hparams.f16, sizeof(hparams.f16)); + + printf("%s: n_vocab = %d\n", __func__, hparams.n_vocab); + printf("%s: n_ctx = %d\n", __func__, hparams.n_ctx); + printf("%s: n_embd = %d\n", __func__, hparams.n_embd); + printf("%s: n_mult = %d\n", __func__, hparams.n_mult); + printf("%s: n_head = %d\n", __func__, hparams.n_head); + printf("%s: n_layer = %d\n", __func__, hparams.n_layer); + printf("%s: f16 = %d\n", __func__, hparams.f16); + + fout.write((char *) &hparams.n_vocab, sizeof(hparams.n_vocab)); + //fout.write((char *) &hparams.n_ctx, sizeof(hparams.n_ctx)); + fout.write((char *) &hparams.n_embd, sizeof(hparams.n_embd)); + fout.write((char *) &hparams.n_mult, sizeof(hparams.n_mult)); + fout.write((char *) &hparams.n_head, sizeof(hparams.n_head)); + fout.write((char *) &hparams.n_layer, sizeof(hparams.n_layer)); + fout.write((char *) &hparams.n_rot, sizeof(hparams.n_rot)); + fout.write((char *) &itype, sizeof(hparams.f16)); + } + + // load vocab + { + const int32_t n_vocab = hparams.n_vocab; + + if (n_vocab != hparams.n_vocab) { + fprintf(stderr, "%s: invalid model file '%s' (bad vocab size %d != %d)\n", + __func__, fname_inp.c_str(), n_vocab, hparams.n_vocab); + return false; + } + + std::string word; + vocab.id_to_token.resize(n_vocab); + for (int i = 0; i < n_vocab; i++) { + uint32_t len; + finp.read ((char *) &len, sizeof(len)); + fout.write((char *) &len, sizeof(len)); + + word.resize(len); + finp.read ((char *) word.data(), len); + fout.write((char *) word.data(), len); + + float score; + finp.read ((char *) &score, sizeof(score)); + fout.write((char *) &score, sizeof(score)); + + vocab.token_to_id[word] = i; + + auto &tok_score = vocab.id_to_token[i]; + tok_score.tok = word; + tok_score.score = score; + } + } + + // load weights + { + size_t total_size_org = 0; + size_t total_size_new = 0; + + std::vector work; + + std::vector data_u8; + std::vector data_f16; + std::vector data_f32; + + std::vector hist_all(1 << 4, 0); + + while (true) { + int32_t n_dims; + int32_t length; + int32_t ftype; + + finp.read(reinterpret_cast(&n_dims), sizeof(n_dims)); + finp.read(reinterpret_cast(&length), sizeof(length)); + finp.read(reinterpret_cast(&ftype), sizeof(ftype)); + + if (finp.eof()) { + break; + } + + int32_t nelements = 1; + int32_t ne[2] = { 1, 1 }; + for (int i = 0; i < n_dims; ++i) { + finp.read (reinterpret_cast(&ne[i]), sizeof(ne[i])); + nelements *= ne[i]; + } + + std::string name(length, 0); + finp.read (&name[0], length); + + { + static const char * ftype_str[] = { "f32", "f16", "q4_0", "q4_1", }; + printf("%48s - [%5d, %5d], type = %6s ", name.data(), ne[0], ne[1], ftype_str[ftype]); + } + + // regexes of tensor names to be quantized + const std::vector k_names = { + ".*weight", + }; + + bool quantize = false; + for (const auto & s : k_names) { + if (std::regex_match(name, std::regex(s))) { + quantize = true; + break; + } + } + + // quantize only 2D tensors + quantize &= (n_dims == 2); + + if (quantize) { + if (ftype != 0 && ftype != 1) { + fprintf(stderr, "%s: unsupported ftype %d for integer quantization\n", __func__, ftype); + return false; + } + + if (ftype == 1) { + data_f16.resize(nelements); + finp.read(reinterpret_cast(data_f16.data()), nelements * sizeof(ggml_fp16_t)); + data_f32.resize(nelements); + for (int i = 0; i < nelements; ++i) { + data_f32[i] = ggml_fp16_to_fp32(data_f16[i]); + } + } else { + data_f32.resize(nelements); + finp.read(reinterpret_cast(data_f32.data()), nelements * sizeof(float)); + } + + ftype = itype; + } else { + const int bpe = (ftype == 0) ? sizeof(float) : sizeof(uint16_t); + + data_u8.resize(nelements*bpe); + finp.read(reinterpret_cast(data_u8.data()), nelements * bpe); + } + + fout.write(reinterpret_cast(&n_dims), sizeof(n_dims)); + fout.write(reinterpret_cast(&length), sizeof(length)); + fout.write(reinterpret_cast(&ftype), sizeof(ftype)); + for (int i = 0; i < n_dims; ++i) { + fout.write(reinterpret_cast(&ne[i]), sizeof(ne[i])); + } + fout.write(&name[0], length); + + if (quantize) { + printf("quantizing .. "); + work.resize(nelements); // for quantization + + size_t cur_size = 0; + std::vector hist_cur(1 << 4, 0); + + switch (type) { + case GGML_TYPE_Q4_0: + { + cur_size = ggml_quantize_q4_0(data_f32.data(), work.data(), nelements, ne[0], qk, hist_cur.data()); + } break; + case GGML_TYPE_Q4_1: + { + cur_size = ggml_quantize_q4_1(data_f32.data(), work.data(), nelements, ne[0], qk, hist_cur.data()); + } break; + default: + { + fprintf(stderr, "%s: unsupported quantization type %d\n", __func__, type); + return false; + } + } + + fout.write(reinterpret_cast(work.data()), cur_size); + total_size_new += cur_size; + + printf("size = %8.2f MB -> %8.2f MB | hist: ", nelements * sizeof(float)/1024.0/1024.0, cur_size/1024.0/1024.0); + for (int i = 0; i < (int) hist_cur.size(); ++i) { + hist_all[i] += hist_cur[i]; + } + + for (int i = 0; i < (int) hist_cur.size(); ++i) { + printf("%5.3f ", hist_cur[i] / (float)nelements); + } + printf("\n"); + } else { + printf("size = %8.3f MB\n", data_u8.size()/1024.0/1024.0); + fout.write(reinterpret_cast(data_u8.data()), data_u8.size()); + total_size_new += data_u8.size(); + } + + total_size_org += nelements * sizeof(float); + } + + printf("%s: model size = %8.2f MB\n", __func__, total_size_org/1024.0/1024.0); + printf("%s: quant size = %8.2f MB\n", __func__, total_size_new/1024.0/1024.0); + + { + int64_t sum_all = 0; + for (int i = 0; i < (int) hist_all.size(); ++i) { + sum_all += hist_all[i]; + } + + printf("%s: hist: ", __func__); + for (int i = 0; i < (int) hist_all.size(); ++i) { + printf("%5.3f ", hist_all[i] / (float)sum_all); + } + printf("\n"); + } + } + + finp.close(); + fout.close(); + + return true; +} + +// +// interface implementation +// + +struct llama_context * llama_init_from_file( + const char * path_model, + struct llama_context_params params) { + ggml_time_init(); + + llama_context * ctx = new llama_context; + + ctx->rng = std::mt19937(params.seed); + ctx->logits_all = params.logits_all; + + ggml_type type_memory = params.f16_kv ? GGML_TYPE_F16 : GGML_TYPE_F32; + + if (!llama_model_load(path_model, *ctx, params.n_ctx, params.n_parts, type_memory, params.vocab_only)) { + fprintf(stderr, "%s: failed to load model\n", __func__); + delete ctx; + return nullptr; + } + + return ctx; +} + +void llama_free(struct llama_context * ctx) { + ggml_free(ctx->model.ctx); + + delete ctx; +} + +int llama_model_quantize( + const char * fname_inp, + const char * fname_out, + int itype, + int qk) { + if (!llama_model_quantize_internal(fname_inp, fname_out, itype, qk)) { + fprintf(stderr, "%s: failed to quantize\n", __func__); + return 1; + } + + return 0; +} + +int llama_eval( + struct llama_context * ctx, + const llama_token * tokens, + int n_tokens, + int n_past, + int n_threads) { + if (!llama_eval_internal(*ctx, tokens, n_tokens, n_past, n_threads)) { + fprintf(stderr, "%s: failed to eval\n", __func__); + return 1; + } + + return 0; +} + +int llama_tokenize( + struct llama_context * ctx, + const char * text, + llama_token * tokens, + int n_max_tokens, + bool add_bos) { + auto res = llama_tokenize(ctx->vocab, text, add_bos); + + if (n_max_tokens < (int) res.size()) { + fprintf(stderr, "%s: too many tokens\n", __func__); + return -((int) res.size()); + } + + for (size_t i = 0; i < res.size(); i++) { + tokens[i] = res[i]; + } + + return res.size(); +} + +int llama_n_vocab(struct llama_context * ctx) { + return ctx->vocab.id_to_token.size(); +} + +int llama_n_ctx(struct llama_context * ctx) { + return ctx->model.hparams.n_ctx; +} + +float * llama_get_logits(struct llama_context * ctx) { + return ctx->logits.data(); +} + +const char * llama_token_to_str(struct llama_context * ctx, llama_token token) { + if (token >= llama_n_vocab(ctx)) { + return nullptr; + } + + return ctx->vocab.id_to_token[token].tok.c_str(); +} + +llama_token llama_token_bos() { + return 1; +} + +llama_token llama_token_eos() { + return 2; +} + +llama_token llama_sample_top_p_top_k( + llama_context * ctx, + const llama_token * last_n_tokens_data, + int last_n_tokens_size, + int top_k, + double top_p, + double temp, + double repeat_penalty) { + const int64_t t_start_sample_us = ggml_time_us(); + + llama_token result = 0; + + // TODO: avoid this ... + const auto last_n_tokens = std::vector(last_n_tokens_data, last_n_tokens_data + last_n_tokens_size); + + result = llama_sample_top_p_top_k( + *ctx, + last_n_tokens, + top_k, + top_p, + temp, + repeat_penalty); + + ctx->t_sample_us += ggml_time_us() - t_start_sample_us; + ctx->n_sample++; + + return result; +} + + +void llama_print_timings(struct llama_context * ctx) { + const int64_t t_end_us = ggml_time_us(); + + const int32_t n_sample = std::max(1, ctx->n_sample); + const int32_t n_eval = std::max(1, ctx->n_eval); + + fprintf(stderr, "\n"); + fprintf(stderr, "%s: load time = %8.2f ms\n", __func__, ctx->t_load_us / 1000.0f); + fprintf(stderr, "%s: sample time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->t_sample_us, n_sample, 1e-3f * ctx->t_sample_us / n_sample); + fprintf(stderr, "%s: eval time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->t_eval_us, n_eval, 1e-3f * ctx->t_eval_us / n_eval); + fprintf(stderr, "%s: total time = %8.2f ms\n", __func__, (t_end_us - ctx->t_start_us)/1000.0f); +} + +void llama_reset_timings(struct llama_context * ctx) { + ctx->t_start_us = ggml_time_us(); + + ctx->t_sample_us = ctx->n_sample = 0; + ctx->t_eval_us = ctx->n_eval = 0; +} + +const char * llama_print_system_info(void) { + static std::string s; + + s = ""; + s += "AVX = " + std::to_string(ggml_cpu_has_avx()) + " | "; + s += "AVX2 = " + std::to_string(ggml_cpu_has_avx2()) + " | "; + s += "AVX512 = " + std::to_string(ggml_cpu_has_avx512()) + " | "; + s += "FMA = " + std::to_string(ggml_cpu_has_fma()) + " | "; + s += "NEON = " + std::to_string(ggml_cpu_has_neon()) + " | "; + s += "ARM_FMA = " + std::to_string(ggml_cpu_has_arm_fma()) + " | "; + s += "F16C = " + std::to_string(ggml_cpu_has_f16c()) + " | "; + s += "FP16_VA = " + std::to_string(ggml_cpu_has_fp16_va()) + " | "; + s += "WASM_SIMD = " + std::to_string(ggml_cpu_has_wasm_simd()) + " | "; + s += "BLAS = " + std::to_string(ggml_cpu_has_blas()) + " | "; + s += "SSE3 = " + std::to_string(ggml_cpu_has_sse3()) + " | "; + s += "VSX = " + std::to_string(ggml_cpu_has_vsx()) + " | "; + + return s.c_str(); +} + diff --git a/llama.h b/llama.h new file mode 100644 index 000000000..3df9ed1fd --- /dev/null +++ b/llama.h @@ -0,0 +1,139 @@ +#ifndef LLAMA_H +#define LLAMA_H + +#include +#include +#include + +#ifdef LLAMA_SHARED +# ifdef _WIN32 +# ifdef LLAMA_BUILD +# define LLAMA_API __declspec(dllexport) +# else +# define LLAMA_API __declspec(dllimport) +# endif +# else +# define LLAMA_API __attribute__ ((visibility ("default"))) +# endif +#else +# define LLAMA_API +#endif + +#define LLAMA_FILE_VERSION 1 +#define LLAMA_FILE_MAGIC 0x67676d66 // 'ggmf' in hex +#define LLAMA_FILE_MAGIC_UNVERSIONED 0x67676d6c // pre-versioned files + +#ifdef __cplusplus +extern "C" { +#endif + + // + // C interface + // + // TODO: show sample usage + // + + struct llama_context; + + typedef int llama_token; + + typedef struct llama_token_data { + llama_token id; // token id + + float p; // probability of the token + float plog; // log probability of the token + + } llama_token_data; + + struct llama_context_params { + int n_ctx; // text context + int n_parts; // -1 for default + int seed; // RNG seed, 0 for random + + bool f16_kv; // use fp16 for KV cache + bool logits_all; // the llama_eval() call computes all logits, not just the last one + bool vocab_only; // only load the vocabulary, no weights + }; + + LLAMA_API struct llama_context_params llama_context_default_params(); + + // Various functions for loading a ggml llama model. + // Allocate (almost) all memory needed for the model. + // Return NULL on failure + LLAMA_API struct llama_context * llama_init_from_file( + const char * path_model, + struct llama_context_params params); + + // Frees all allocated memory + LLAMA_API void llama_free(struct llama_context * ctx); + + // TODO: not great API - very likely to change + // Returns 0 on success + LLAMA_API int llama_model_quantize( + const char * fname_inp, + const char * fname_out, + int itype, + int qk); + + // Run the llama inference to obtain the logits and probabilities for the next token. + // tokens + n_tokens is the provided batch of new tokens to process + // n_past is the number of tokens to use from previous eval calls + // Returns 0 on success + LLAMA_API int llama_eval( + struct llama_context * ctx, + const llama_token * tokens, + int n_tokens, + int n_past, + int n_threads); + + // Convert the provided text into tokens. + // The tokens pointer must be large enough to hold the resulting tokens. + // Returns the number of tokens on success, no more than n_max_tokens + // Returns a negative number on failure - the number of tokens that would have been returned + // TODO: not sure if correct + LLAMA_API int llama_tokenize( + struct llama_context * ctx, + const char * text, + llama_token * tokens, + int n_max_tokens, + bool add_bos); + + LLAMA_API int llama_n_vocab(struct llama_context * ctx); + LLAMA_API int llama_n_ctx (struct llama_context * ctx); + + // Token logits obtained from the last call to llama_eval() + // The logits for the last token are stored in the last row + // Can be mutated in order to change the probabilities of the next token + // Rows: n_tokens + // Cols: n_vocab + LLAMA_API float * llama_get_logits(struct llama_context * ctx); + + // Token Id -> String. Uses the vocabulary in the provided context + LLAMA_API const char * llama_token_to_str(struct llama_context * ctx, llama_token token); + + // Special tokens + LLAMA_API llama_token llama_token_bos(); + LLAMA_API llama_token llama_token_eos(); + + // TODO: improve the last_n_tokens interface ? + LLAMA_API llama_token llama_sample_top_p_top_k( + llama_context * ctx, + const llama_token * last_n_tokens_data, + int last_n_tokens_size, + int top_k, + double top_p, + double temp, + double repeat_penalty); + + // Performance information + LLAMA_API void llama_print_timings(struct llama_context * ctx); + LLAMA_API void llama_reset_timings(struct llama_context * ctx); + + // Print system information + LLAMA_API const char * llama_print_system_info(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/main.cpp b/main.cpp index fe9e583f8..7db3df7e9 100644 --- a/main.cpp +++ b/main.cpp @@ -1,6 +1,6 @@ -#include "ggml.h" - #include "utils.h" +#include "ggml.h" +#include "llama.h" #include #include @@ -40,7 +40,7 @@ enum console_state { CONSOLE_STATE_DEFAULT=0, CONSOLE_STATE_PROMPT, CONSOLE_STATE_USER_INPUT -}; +}; static console_state con_st = CONSOLE_STATE_DEFAULT; static bool con_use_color = false; @@ -65,765 +65,6 @@ void set_console_state(console_state new_st) } } -static const int EOS_TOKEN_ID = 2; - -// determine number of model parts based on the dimension -static const std::unordered_map LLAMA_N_PARTS = { - { 4096, 1 }, - { 5120, 2 }, - { 6656, 4 }, - { 8192, 8 }, -}; - -// default hparams (LLaMA 7B) -struct llama_hparams { - int32_t n_vocab = 32000; - int32_t n_ctx = 512; // this is provided as user input? - int32_t n_embd = 4096; - int32_t n_mult = 256; - int32_t n_head = 32; - int32_t n_layer = 32; - int32_t n_rot = 64; - int32_t f16 = 1; -}; - -struct llama_layer { - // normalization - struct ggml_tensor * attention_norm; - - // attention - struct ggml_tensor * wq; - struct ggml_tensor * wk; - struct ggml_tensor * wv; - struct ggml_tensor * wo; - - // normalization - struct ggml_tensor * ffn_norm; - - // ff - struct ggml_tensor * w1; - struct ggml_tensor * w2; - struct ggml_tensor * w3; -}; - -struct llama_model { - llama_hparams hparams; - - struct ggml_tensor * tok_embeddings; - - struct ggml_tensor * norm; - struct ggml_tensor * output; - - std::vector layers; - - // key + value memory - struct ggml_tensor * memory_k; - struct ggml_tensor * memory_v; - - // - struct ggml_context * ctx; - std::unordered_map tensors; -}; - -// load the model's weights from a file - -bool llama_model_load(const std::string & fname, llama_model & model, llama_vocab & vocab, int n_ctx, int n_parts, ggml_type memory_type = GGML_TYPE_F32) { - fprintf(stderr, "%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str()); - - std::vector f_buf(1024*1024); - - auto fin = std::ifstream(fname, std::ios::binary); - fin.rdbuf()->pubsetbuf(f_buf.data(), f_buf.size()); - if (!fin) { - fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str()); - return false; - } - - // verify magic - { - uint32_t magic; - fin.read((char *) &magic, sizeof(magic)); - if (magic == FILE_MAGIC_UNVERSIONED) { - fprintf(stderr, "%s: invalid model file '%s' (too old, regenerate your model files!)\n", - __func__, fname.c_str()); - return false; - } - if (magic != FILE_MAGIC) { - fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str()); - return false; - } - - uint32_t format_version; - fin.read((char *) &format_version, sizeof(format_version)); - - if (format_version != FILE_VERSION) { - fprintf(stderr, "%s: invalid model file '%s' (unsupported format version %" PRIu32 ", expected %d)\n", - __func__, fname.c_str(), format_version, FILE_VERSION); - return false; - } - } - - int n_ff = 0; - - // load hparams - { - auto & hparams = model.hparams; - - fin.read((char *) &hparams.n_vocab, sizeof(hparams.n_vocab)); - //fin.read((char *) &hparams.n_ctx, sizeof(hparams.n_ctx)); - fin.read((char *) &hparams.n_embd, sizeof(hparams.n_embd)); - fin.read((char *) &hparams.n_mult, sizeof(hparams.n_mult)); - fin.read((char *) &hparams.n_head, sizeof(hparams.n_head)); - fin.read((char *) &hparams.n_layer, sizeof(hparams.n_layer)); - fin.read((char *) &hparams.n_rot, sizeof(hparams.n_rot)); - fin.read((char *) &hparams.f16, sizeof(hparams.f16)); - - hparams.n_ctx = n_ctx; - - n_ff = ((2*(4*hparams.n_embd)/3 + hparams.n_mult - 1)/hparams.n_mult)*hparams.n_mult; - - if (n_parts < 1) { - n_parts = LLAMA_N_PARTS.at(hparams.n_embd); - } - - // temp warning to tell the user to use "--n_parts" - if (hparams.f16 == 4 && n_parts != 1) { - fprintf(stderr, "%s: GPTQ model detected - are you sure n_parts should be %d? we normally expect it to be 1\n", __func__, n_parts); - fprintf(stderr, "%s: use '--n_parts 1' if necessary\n", __func__); - } - - fprintf(stderr, "%s: n_vocab = %d\n", __func__, hparams.n_vocab); - fprintf(stderr, "%s: n_ctx = %d\n", __func__, hparams.n_ctx); - fprintf(stderr, "%s: n_embd = %d\n", __func__, hparams.n_embd); - fprintf(stderr, "%s: n_mult = %d\n", __func__, hparams.n_mult); - fprintf(stderr, "%s: n_head = %d\n", __func__, hparams.n_head); - fprintf(stderr, "%s: n_layer = %d\n", __func__, hparams.n_layer); - fprintf(stderr, "%s: n_rot = %d\n", __func__, hparams.n_rot); - fprintf(stderr, "%s: f16 = %d\n", __func__, hparams.f16); - fprintf(stderr, "%s: n_ff = %d\n", __func__, n_ff); - fprintf(stderr, "%s: n_parts = %d\n", __func__, n_parts); - } - - // load vocab - { - std::string word; - vocab.id_to_token.resize(model.hparams.n_vocab); - std::vector tmp(64); - - for (int i = 0; i < model.hparams.n_vocab; i++) { - uint32_t len; - fin.read((char *) &len, sizeof(len)); - - word.resize(len); - if (len > 0) { - tmp.resize(len); - fin.read(tmp.data(), len); - word.assign(tmp.data(), len); - } else { - word.clear(); - } - - float score; - fin.read((char *) &score, sizeof(score)); - - vocab.token_to_id[word] = i; - - auto &tok_score = vocab.id_to_token[i]; - tok_score.tok = word; - tok_score.score = score; - } - } - - // for the big tensors, we have the option to store the data in 16-bit floats or quantized - // in order to save memory and also to speed up the computation - // wtype is for per-layer weights, while vtype is for other weights - ggml_type wtype, vtype; - switch (model.hparams.f16) { - case 0: wtype = vtype = GGML_TYPE_F32; break; - case 1: wtype = vtype = GGML_TYPE_F16; break; - case 2: wtype = vtype = GGML_TYPE_Q4_0; break; - case 3: wtype = vtype = GGML_TYPE_Q4_1; break; - case 4: wtype = GGML_TYPE_Q4_1; vtype = GGML_TYPE_F16; break; - default: - { - fprintf(stderr, "%s: invalid model file '%s' (bad f16 value %d)\n", - __func__, fname.c_str(), model.hparams.f16); - return false; - } - } - - auto & ctx = model.ctx; - - size_t ctx_size = 0; - - { - const auto & hparams = model.hparams; - - const int n_embd = hparams.n_embd; - const int n_layer = hparams.n_layer; - const int n_ctx = hparams.n_ctx; - const int n_vocab = hparams.n_vocab; - - ctx_size += n_embd*n_vocab*ggml_type_sizef(vtype); // tok_embeddings - - ctx_size += n_embd*ggml_type_sizef(GGML_TYPE_F32); // norm - - ctx_size += n_embd*n_vocab*ggml_type_sizef(vtype); // output - - ctx_size += n_layer*(n_embd*ggml_type_sizef(GGML_TYPE_F32)); // attention_norm - - ctx_size += n_layer*(n_embd*n_embd*ggml_type_sizef(wtype)); // wq - ctx_size += n_layer*(n_embd*n_embd*ggml_type_sizef(wtype)); // wk - ctx_size += n_layer*(n_embd*n_embd*ggml_type_sizef(wtype)); // wv - ctx_size += n_layer*(n_embd*n_embd*ggml_type_sizef(wtype)); // wo - - ctx_size += n_layer*(n_embd*ggml_type_sizef(GGML_TYPE_F32)); // ffn_norm - - ctx_size += n_layer*(n_ff*n_embd*ggml_type_sizef(wtype)); // w1 - ctx_size += n_layer*(n_ff*n_embd*ggml_type_sizef(wtype)); // w2 - ctx_size += n_layer*(n_ff*n_embd*ggml_type_sizef(wtype)); // w3 - - ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(memory_type); // memory_k - ctx_size += n_ctx*n_layer*n_embd*ggml_type_sizef(memory_type); // memory_v - - ctx_size += (5 + 10*n_layer)*256; // object overhead - - fprintf(stderr, "%s: ggml ctx size = %6.2f MB\n", __func__, ctx_size/(1024.0*1024.0)); - } - - // create the ggml context - { - struct ggml_init_params params = { - /*.mem_size =*/ ctx_size, - /*.mem_buffer =*/ NULL, - }; - - model.ctx = ggml_init(params); - if (!model.ctx) { - fprintf(stderr, "%s: ggml_init() failed\n", __func__); - return false; - } - } - - // prepare memory for the weights - { - const auto & hparams = model.hparams; - - const int n_embd = hparams.n_embd; - const int n_layer = hparams.n_layer; - const int n_vocab = hparams.n_vocab; - - model.layers.resize(n_layer); - - model.tok_embeddings = ggml_new_tensor_2d(ctx, vtype, n_embd, n_vocab); - - model.norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); - model.output = ggml_new_tensor_2d(ctx, vtype, n_embd, n_vocab); - - // map by name - model.tensors["tok_embeddings.weight"] = model.tok_embeddings; - - model.tensors["norm.weight"] = model.norm; - model.tensors["output.weight"] = model.output; - - for (int i = 0; i < n_layer; ++i) { - auto & layer = model.layers[i]; - - layer.attention_norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); - - layer.wq = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd); - layer.wk = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd); - layer.wv = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd); - layer.wo = ggml_new_tensor_2d(ctx, wtype, n_embd, n_embd); - - layer.ffn_norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); - - layer.w1 = ggml_new_tensor_2d(ctx, wtype, n_embd, n_ff); - layer.w2 = ggml_new_tensor_2d(ctx, wtype, n_ff, n_embd); - layer.w3 = ggml_new_tensor_2d(ctx, wtype, n_embd, n_ff); - - // map by name - model.tensors["layers." + std::to_string(i) + ".attention_norm.weight"] = layer.attention_norm; - - model.tensors["layers." + std::to_string(i) + ".attention.wq.weight"] = layer.wq; - model.tensors["layers." + std::to_string(i) + ".attention.wk.weight"] = layer.wk; - model.tensors["layers." + std::to_string(i) + ".attention.wv.weight"] = layer.wv; - model.tensors["layers." + std::to_string(i) + ".attention.wo.weight"] = layer.wo; - - model.tensors["layers." + std::to_string(i) + ".ffn_norm.weight"] = layer.ffn_norm; - - model.tensors["layers." + std::to_string(i) + ".feed_forward.w1.weight"] = layer.w1; - model.tensors["layers." + std::to_string(i) + ".feed_forward.w2.weight"] = layer.w2; - model.tensors["layers." + std::to_string(i) + ".feed_forward.w3.weight"] = layer.w3; - } - } - - // key + value memory - { - const auto & hparams = model.hparams; - - const int n_embd = hparams.n_embd; - const int n_layer = hparams.n_layer; - const int n_ctx = hparams.n_ctx; - - const int n_mem = n_layer*n_ctx; - const int n_elements = n_embd*n_mem; - - model.memory_k = ggml_new_tensor_1d(ctx, memory_type, n_elements); - model.memory_v = ggml_new_tensor_1d(ctx, memory_type, n_elements); - - const size_t memory_size = ggml_nbytes(model.memory_k) + ggml_nbytes(model.memory_v); - - fprintf(stderr, "%s: memory_size = %8.2f MB, n_mem = %d\n", __func__, memory_size/1024.0/1024.0, n_mem); - } - - const size_t file_offset = fin.tellg(); - - fin.close(); - - std::vector tmp; - - for (int i = 0; i < n_parts; ++i) { - const int part_id = i; - //const int part_id = n_parts - i - 1; - - std::string fname_part = fname; - if (i > 0) { - fname_part += "." + std::to_string(i); - } - - fprintf(stderr, "%s: loading model part %d/%d from '%s'\n", __func__, i+1, n_parts, fname_part.c_str()); - - fin = std::ifstream(fname_part, std::ios::binary); - fin.rdbuf()->pubsetbuf(f_buf.data(), f_buf.size()); - fin.seekg(file_offset); - - // load weights - { - int n_tensors = 0; - size_t total_size = 0; - - fprintf(stderr, "%s: ", __func__); - - while (true) { - int32_t n_dims; - int32_t length; - int32_t ftype; - - fin.read(reinterpret_cast(&n_dims), sizeof(n_dims)); - fin.read(reinterpret_cast(&length), sizeof(length)); - fin.read(reinterpret_cast(&ftype), sizeof(ftype)); - - if (fin.eof()) { - break; - } - - int32_t nelements = 1; - int32_t ne[2] = { 1, 1 }; - for (int i = 0; i < n_dims; ++i) { - fin.read(reinterpret_cast(&ne[i]), sizeof(ne[i])); - nelements *= ne[i]; - } - - std::string name(length, 0); - fin.read(&name[0], length); - - if (model.tensors.find(name.data()) == model.tensors.end()) { - fprintf(stderr, "%s: unknown tensor '%s' in model file\n", __func__, name.data()); - return false; - } - - // split_type = 0: split by columns - // split_type = 1: split by rows - int split_type = 0; - - // split_type = 0: - // regex: - // - tok_embeddings.* - // - layers.*.attention.wo.weight - // - layers.*.feed_forward.w2.weight - - // split_type = 1: - // regex: - // - output.* - // - layers.*.attention.wq.weight - // - layers.*.attention.wk.weight - // - layers.*.attention.wv.weight - // - layers.*.feed_forward.w1.weight - // - layers.*.feed_forward.w3.weight - if (name.find("tok_embeddings") != std::string::npos) { - split_type = 0; - } else if (name.find("layers") != std::string::npos) { - if (name.find("attention.wo.weight") != std::string::npos) { - split_type = 0; - } else if (name.find("feed_forward.w2.weight") != std::string::npos) { - split_type = 0; - } else { - split_type = 1; - } - } else if (name.find("output") != std::string::npos) { - split_type = 1; - } - - auto tensor = model.tensors[name.data()]; - - if (n_dims == 1) { - if (ggml_nelements(tensor) != nelements) { - fprintf(stderr, "%s: tensor '%s' has wrong size in model file\n", __func__, name.data()); - return false; - } - } else { - if (ggml_nelements(tensor)/n_parts != nelements) { - fprintf(stderr, "%s: tensor '%s' has wrong size in model file\n", __func__, name.data()); - return false; - } - } - - if (n_dims == 1) { - if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) { - fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d], expected [%d, %d]\n", - __func__, name.data(), tensor->ne[0], tensor->ne[1], ne[0], ne[1]); - return false; - } - } else { - if (split_type == 0) { - if (tensor->ne[0]/n_parts != ne[0] || tensor->ne[1] != ne[1]) { - fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d], expected [%d, %d]\n", - __func__, name.data(), tensor->ne[0]/n_parts, tensor->ne[1], ne[0], ne[1]); - return false; - } - } else { - if (tensor->ne[0] != ne[0] || tensor->ne[1]/n_parts != ne[1]) { - fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d], expected [%d, %d]\n", - __func__, name.data(), tensor->ne[0], tensor->ne[1]/n_parts, ne[0], ne[1]); - return false; - } - } - } - - if (0) { - static const char * ftype_str[] = { "f32", "f16", "q4_0", "q4_1", }; - fprintf(stderr, "%24s - [%5d, %5d], type = %6s, split = %d\n", name.data(), ne[0], ne[1], ftype_str[ftype], split_type); - } - - size_t bpe = 0; - - switch (ftype) { - case 0: bpe = ggml_type_size(GGML_TYPE_F32); break; - case 1: bpe = ggml_type_size(GGML_TYPE_F16); break; - case 2: bpe = ggml_type_size(GGML_TYPE_Q4_0); assert(ne[0] % 64 == 0); break; - case 3: bpe = ggml_type_size(GGML_TYPE_Q4_1); assert(ne[0] % 64 == 0); break; - default: - { - fprintf(stderr, "%s: unknown ftype %d in model file\n", __func__, ftype); - return false; - } - }; - - if (n_dims == 1 || n_parts == 1) { - if ((nelements*bpe)/ggml_blck_size(tensor->type) != ggml_nbytes(tensor)) { - fprintf(stderr, "%s: tensor '%s' has wrong size in model file: got %zu, expected %zu\n", - __func__, name.data(), ggml_nbytes(tensor), nelements*bpe); - return false; - } - - if (part_id == 0) { - fin.read(reinterpret_cast(tensor->data), ggml_nbytes(tensor)); - } else { - fin.seekg(ggml_nbytes(tensor), std::ios::cur); - } - - total_size += ggml_nbytes(tensor); - } else { - if ((nelements*bpe)/ggml_blck_size(tensor->type) != ggml_nbytes(tensor)/n_parts) { - fprintf(stderr, "%s: tensor '%s' has wrong size in model file: got %zu, expected %zu\n", - __func__, name.data(), ggml_nbytes(tensor)/n_parts, nelements*bpe); - return false; - } - - if (split_type == 0) { - const int np0 = ne[0]; - - const size_t row_size = (tensor->ne[0]/ggml_blck_size(tensor->type))*ggml_type_size(tensor->type); - assert(row_size == tensor->nb[1]); - - for (int i1 = 0; i1 < ne[1]; ++i1) { - const size_t offset_row = i1*row_size; - const size_t offset = offset_row + ((part_id*np0)/ggml_blck_size(tensor->type))*ggml_type_size(tensor->type); - fin.read(reinterpret_cast(tensor->data) + offset, row_size/n_parts); - } - } else { - const int np1 = ne[1]; - - const size_t row_size = (tensor->ne[0]/ggml_blck_size(tensor->type))*ggml_type_size(tensor->type); - - for (int i1 = 0; i1 < ne[1]; ++i1) { - const size_t offset_row = (i1 + part_id*np1)*row_size; - fin.read(reinterpret_cast(tensor->data) + offset_row, row_size); - } - } - - total_size += ggml_nbytes(tensor)/n_parts; - } - - //fprintf(stderr, "%42s - [%5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0); - if (++n_tensors % 8 == 0) { - fprintf(stderr, "."); - fflush(stderr); - } - } - - fprintf(stderr, " done\n"); - - fprintf(stderr, "%s: model size = %8.2f MB / num tensors = %d\n", __func__, total_size/1024.0/1024.0, n_tensors); - } - - fin.close(); - } - - return true; -} - -// evaluate the transformer -// -// - model: the model -// - n_threads: number of threads to use -// - n_past: the context size so far -// - embd_inp: the embeddings of the tokens in the context -// - embd_w: the predicted logits for the next token -// -// The GPT-J model requires about 16MB of memory per input token. -// -bool llama_eval( - const llama_model & model, - const int n_threads, - const int n_past, - const std::vector & embd_inp, - std::vector & embd_w, - size_t & mem_per_token, - bool return_all_logits = false) { - const int N = embd_inp.size(); - - const auto & hparams = model.hparams; - - const int n_embd = hparams.n_embd; - const int n_layer = hparams.n_layer; - const int n_ctx = hparams.n_ctx; - const int n_head = hparams.n_head; - const int n_vocab = hparams.n_vocab; - const int n_rot = hparams.n_embd/hparams.n_head; - - // TODO: check if this size scales with n_ctx linearly and remove constant. somehow I feel it wasn't the case - // static size_t buf_size = hparams.n_ctx*1024*1024; - static size_t buf_size = 512u*1024*1024; - static void * buf = malloc(buf_size); - - if (mem_per_token > 0 && mem_per_token*N > buf_size) { - const size_t buf_size_new = 1.3*(mem_per_token*N); // add 30% to account for ggml object overhead - //fprintf(stderr, "\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_size, buf_size_new); - - // reallocate - buf_size = buf_size_new; - buf = realloc(buf, buf_size); - if (buf == nullptr) { - fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size); - return false; - } - } - - struct ggml_init_params params = { - /*.mem_size =*/ buf_size, - /*.mem_buffer =*/ buf, - }; - - struct ggml_context * ctx0 = ggml_init(params); - ggml_cgraph gf = {}; - gf.n_threads = n_threads; - - struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N); - memcpy(embd->data, embd_inp.data(), N*ggml_element_size(embd)); - - struct ggml_tensor * inpL = ggml_get_rows(ctx0, model.tok_embeddings, embd); - - for (int il = 0; il < n_layer; ++il) { - struct ggml_tensor * inpSA = inpL; - - struct ggml_tensor * cur; - - // norm - { - cur = ggml_rms_norm(ctx0, inpL); - - // cur = attention_norm*cur - cur = ggml_mul(ctx0, - ggml_repeat(ctx0, model.layers[il].attention_norm, cur), - cur); - } - - // self-attention - { - struct ggml_tensor * Qcur = ggml_mul_mat(ctx0, model.layers[il].wq, cur); - struct ggml_tensor * Kcur = ggml_mul_mat(ctx0, model.layers[il].wk, cur); - struct ggml_tensor * Vcur = ggml_mul_mat(ctx0, model.layers[il].wv, cur); - - // store key and value to memory - if (N >= 1) { - struct ggml_tensor * k = ggml_view_1d(ctx0, model.memory_k, N*n_embd, (ggml_element_size(model.memory_k)*n_embd)*(il*n_ctx + n_past)); - struct ggml_tensor * v = ggml_view_1d(ctx0, model.memory_v, N*n_embd, (ggml_element_size(model.memory_v)*n_embd)*(il*n_ctx + n_past)); - - ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k)); - ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v)); - } - - // Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, 3) - struct ggml_tensor * Q = - ggml_permute(ctx0, - ggml_rope(ctx0, - ggml_cpy(ctx0, - Qcur, - ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_embd/n_head, n_head, N)), - n_past, n_rot, 0), - 0, 2, 1, 3); - - // K = Kmem.view(n_embd/n_head, n_head, n_past + N).permute(0, 2, 1, 3) - struct ggml_tensor * K = - ggml_permute(ctx0, - ggml_rope(ctx0, - ggml_reshape_3d(ctx0, - ggml_view_1d(ctx0, model.memory_k, (n_past + N)*n_embd, il*n_ctx*ggml_element_size(model.memory_k)*n_embd), - n_embd/n_head, n_head, n_past + N), - n_past, n_rot, 1), - 0, 2, 1, 3); - - // K * Q - struct ggml_tensor * KQ = ggml_mul_mat(ctx0, K, Q); - - // KQ_scaled = KQ / sqrt(n_embd/n_head) - struct ggml_tensor * KQ_scaled = - ggml_scale(ctx0, - KQ, - ggml_new_f32(ctx0, 1.0f/sqrt(float(n_embd)/n_head)) - ); - - // KQ_masked = mask_past(KQ_scaled) - struct ggml_tensor * KQ_masked = ggml_diag_mask_inf(ctx0, KQ_scaled, n_past); - - // KQ = soft_max(KQ_masked) - struct ggml_tensor * KQ_soft_max = ggml_soft_max(ctx0, KQ_masked); - - // V_trans = Vmem.view(n_embd/n_head, n_head, n_past + N).permute(1, 2, 0, 3).contiguous() - struct ggml_tensor * V_trans = - ggml_permute(ctx0, - ggml_reshape_3d(ctx0, - ggml_view_1d(ctx0, model.memory_v, (n_past + N)*n_embd, il*n_ctx*ggml_element_size(model.memory_v)*n_embd), - n_embd/n_head, n_head, n_past + N), - 1, 2, 0, 3); - - // KQV = transpose(V) * KQ_soft_max - struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V_trans, KQ_soft_max); - - // KQV_merged = KQV.permute(0, 2, 1, 3) - struct ggml_tensor * KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3); - - // cur = KQV_merged.contiguous().view(n_embd, N) - cur = ggml_cpy(ctx0, - KQV_merged, - ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N)); - - // projection (no bias) - cur = ggml_mul_mat(ctx0, - model.layers[il].wo, - cur); - } - - struct ggml_tensor * inpFF = ggml_add(ctx0, cur, inpSA); - - // feed-forward network - { - // norm - { - cur = ggml_rms_norm(ctx0, inpFF); - - // cur = ffn_norm*cur - cur = ggml_mul(ctx0, - ggml_repeat(ctx0, model.layers[il].ffn_norm, cur), - cur); - } - - struct ggml_tensor * tmp = ggml_mul_mat(ctx0, - model.layers[il].w3, - cur); - - - cur = ggml_mul_mat(ctx0, - model.layers[il].w1, - cur); - - // SILU activation - cur = ggml_silu(ctx0, cur); - - cur = ggml_mul(ctx0, cur, tmp); - - cur = ggml_mul_mat(ctx0, - model.layers[il].w2, - cur); - } - - cur = ggml_add(ctx0, cur, inpFF); - - // input for next layer - inpL = cur; - } - - // norm - { - inpL = ggml_rms_norm(ctx0, inpL); - - // inpL = norm*inpL - inpL = ggml_mul(ctx0, - ggml_repeat(ctx0, model.norm, inpL), - inpL); - } - - // lm_head - { - inpL = ggml_mul_mat(ctx0, model.output, inpL); - } - - // logits -> probs - //inpL = ggml_soft_max(ctx0, inpL); - - // run the computation - ggml_build_forward_expand(&gf, inpL); - ggml_graph_compute (ctx0, &gf); - - //if (n_past%100 == 0) { - // ggml_graph_print (&gf); - // ggml_graph_dump_dot(&gf, NULL, "gpt-2.dot"); - //} - - //embd_w.resize(n_vocab*N); - //memcpy(embd_w.data(), ggml_get_data(inpL), sizeof(float)*n_vocab*N); - - if (return_all_logits) { - embd_w.resize(n_vocab * N); - memcpy(embd_w.data(), (float *) ggml_get_data(inpL), sizeof(float)*n_vocab*N); - } else { - // return result for just the last token - embd_w.resize(n_vocab); - memcpy(embd_w.data(), (float *) ggml_get_data(inpL) + (n_vocab*(N-1)), sizeof(float)*n_vocab); - } - - if (mem_per_token == 0) { - mem_per_token = ggml_used_mem(ctx0)/N; - } - //fprintf(stderr, "used_mem = %zu\n", ggml_used_mem(ctx0)); - - ggml_free(ctx0); - - return true; -} - std::vector softmax(const std::vector& logits) { std::vector probs(logits.size()); float max_logit = logits[0]; @@ -840,24 +81,25 @@ std::vector softmax(const std::vector& logits) { return probs; } -void perplexity(const llama_vocab &vocab, const llama_model &model, const gpt_params ¶ms, size_t mem_per_token) { +void perplexity(llama_context * ctx, const gpt_params & params) { // Download: https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip?ref=salesforce-research // Run `./main --perplexity -m models/7B/ggml-model-q4_0.bin -f wiki.test.raw` // Output: `perplexity: 13.5106 [114/114]` - std::vector tokens = ::llama_tokenize(vocab, params.prompt, true); + auto tokens = ::llama_tokenize(ctx, params.prompt.c_str(), true); int count = 0; double nll = 0.0; int seq_count = tokens.size() / params.n_ctx; - printf("Calculating perplexity over %d chunks\n", seq_count); + + fprintf(stderr, "%s : calculating perplexity over %d chunks\n", __func__, seq_count); + for (int i = 0; i < seq_count; ++i) { int start = i * params.n_ctx; int end = start + params.n_ctx - 1; - std::vector embd(tokens.begin() + start, tokens.begin() + end); - std::vector logits; + std::vector embd(tokens.begin() + start, tokens.begin() + end); auto start_t = std::chrono::high_resolution_clock::now(); - if (!llama_eval(model, params.n_threads, 0, embd, logits, mem_per_token, true)) { - fprintf(stderr, "Failed to predict\n"); + if (llama_eval(ctx, embd.data(), embd.size(), 0, params.n_threads)) { + fprintf(stderr, "%s : failed to eval\n", __func__); return; } auto end_t = std::chrono::high_resolution_clock::now(); @@ -877,12 +119,14 @@ void perplexity(const llama_vocab &vocab, const llama_model &model, const gpt_pa // Example, we have a context window of 512, we will compute perplexity for each of the // last 256 tokens. Then, we split the input up into context window size chunks to // process the entire prompt. + + auto logits = llama_get_logits(ctx); for (int j = params.n_ctx / 2; j < params.n_ctx - 1; ++j) { // Calculate probability of next token, given the previous ones. - int n_vocab = model.hparams.n_vocab; + int n_vocab = llama_n_vocab(ctx); std::vector tok_logits( - logits.begin() + j * n_vocab, - logits.begin() + (j + 1) * n_vocab); + logits + j * n_vocab, + logits + (j + 1) * n_vocab); double prob = softmax(tok_logits)[tokens[start + j + 1]]; nll += -std::log(prob); ++count; @@ -910,29 +154,9 @@ void sigint_handler(int signo) { } #endif -const char * llama_print_system_info(void) { - static std::string s; - - s = ""; - s += "AVX = " + std::to_string(ggml_cpu_has_avx()) + " | "; - s += "AVX2 = " + std::to_string(ggml_cpu_has_avx2()) + " | "; - s += "AVX512 = " + std::to_string(ggml_cpu_has_avx512()) + " | "; - s += "FMA = " + std::to_string(ggml_cpu_has_fma()) + " | "; - s += "NEON = " + std::to_string(ggml_cpu_has_neon()) + " | "; - s += "ARM_FMA = " + std::to_string(ggml_cpu_has_arm_fma()) + " | "; - s += "F16C = " + std::to_string(ggml_cpu_has_f16c()) + " | "; - s += "FP16_VA = " + std::to_string(ggml_cpu_has_fp16_va()) + " | "; - s += "WASM_SIMD = " + std::to_string(ggml_cpu_has_wasm_simd()) + " | "; - s += "BLAS = " + std::to_string(ggml_cpu_has_blas()) + " | "; - s += "SSE3 = " + std::to_string(ggml_cpu_has_sse3()) + " | "; - s += "VSX = " + std::to_string(ggml_cpu_has_vsx()) + " | "; - - return s.c_str(); -} - int main(int argc, char ** argv) { + // has to be called once at the start of the program to init ggml stuff ggml_time_init(); - const int64_t t_main_start_us = ggml_time_us(); gpt_params params; params.model = "models/llama-7B/ggml-model.bin"; @@ -964,21 +188,21 @@ int main(int argc, char ** argv) { // params.prompt = R"(// this function checks if the number n is prime //bool is_prime(int n) {)"; - int64_t t_load_us = 0; - - llama_vocab vocab; - llama_model model; + llama_context * ctx; // load the model { - const ggml_type memory_type = params.memory_f16 ? GGML_TYPE_F16 : GGML_TYPE_F32; - const int64_t t_start_us = ggml_time_us(); - if (!llama_model_load(params.model, model, vocab, params.n_ctx, params.n_parts, memory_type)) { - fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str()); + auto lparams = llama_context_default_params(); + + lparams.f16_kv = params.memory_f16; + lparams.logits_all = params.perplexity; + + ctx = llama_init_from_file(params.model.c_str(), lparams); + + if (ctx == NULL) { + fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str()); return 1; } - - t_load_us = ggml_time_us() - t_start_us; } // print system information @@ -988,32 +212,33 @@ int main(int argc, char ** argv) { params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info()); } - std::vector logits; - // determine the required inference memory per token: - size_t mem_per_token = 0; - llama_eval(model, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token); + // TODO: better way to do that + { + const std::vector tmp = { 0, 1, 2, 3 }; + llama_eval(ctx, tmp.data(), tmp.size(), 0, params.n_threads); + } if (params.perplexity) { - perplexity(vocab, model, params, mem_per_token); + perplexity(ctx, params); exit(0); } int n_past = 0; - int64_t t_sample_us = 0; - int64_t t_predict_us = 0; - // Add a space in front of the first character to match OG llama tokenizer behavior params.prompt.insert(0, 1, ' '); - // tokenize the prompt - std::vector embd_inp = ::llama_tokenize(vocab, params.prompt, true); - params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size()); + // tokenize the prompt + auto embd_inp = ::llama_tokenize(ctx, params.prompt, true); + + const int n_ctx = llama_n_ctx(ctx); + + params.n_predict = std::min(params.n_predict, n_ctx - (int) embd_inp.size()); // prefix & suffix for instruct mode - const std::vector inp_pfx = ::llama_tokenize(vocab, "\n\n### Instruction:\n\n", true); - const std::vector inp_sfx = ::llama_tokenize(vocab, "\n\n### Response:\n\n", false); + const auto inp_pfx = ::llama_tokenize(ctx, "\n\n### Instruction:\n\n", true); + const auto inp_sfx = ::llama_tokenize(ctx, "\n\n### Response:\n\n", false); // in instruct mode, we inject a prefix and a suffix to each input by the user if (params.instruct) { @@ -1030,7 +255,7 @@ int main(int argc, char ** argv) { fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str()); fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size()); for (int i = 0; i < (int) embd_inp.size(); i++) { - fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], vocab.id_to_token.at(embd_inp[i]).tok.c_str()); + fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], llama_token_to_str(ctx, embd_inp[i])); } fprintf(stderr, "\n"); if (params.interactive) { @@ -1055,10 +280,10 @@ int main(int argc, char ** argv) { fprintf(stderr, "sampling parameters: temp = %f, top_k = %d, top_p = %f, repeat_last_n = %i, repeat_penalty = %f\n", params.temp, params.top_k, params.top_p, params.repeat_last_n, params.repeat_penalty); fprintf(stderr, "\n\n"); - std::vector embd; + std::vector embd; int last_n_size = params.repeat_last_n; - std::vector last_n_tokens(last_n_size); + std::vector last_n_tokens(last_n_size); std::fill(last_n_tokens.begin(), last_n_tokens.end(), 0); if (params.interactive) { @@ -1092,14 +317,10 @@ int main(int argc, char ** argv) { while (remaining_tokens > 0 || params.interactive) { // predict if (embd.size() > 0) { - const int64_t t_start_us = ggml_time_us(); - - if (!llama_eval(model, params.n_threads, n_past, embd, logits, mem_per_token)) { - fprintf(stderr, "Failed to predict\n"); + if (llama_eval(ctx, embd.data(), embd.size(), n_past, params.n_threads)) { + fprintf(stderr, "%s : failed to eval\n", __func__); return 1; } - - t_predict_us += ggml_time_us() - t_start_us; } n_past += embd.size(); @@ -1107,29 +328,28 @@ int main(int argc, char ** argv) { if ((int) embd_inp.size() <= input_consumed) { // out of user input, sample next token - const float top_k = params.top_k; - const float top_p = params.top_p; - const float temp = params.temp; + const float top_k = params.top_k; + const float top_p = params.top_p; + const float temp = params.temp; const float repeat_penalty = params.repeat_penalty; - const int n_vocab = model.hparams.n_vocab; - - llama_vocab::id id = 0; + llama_token id = 0; { - const int64_t t_start_sample_us = ggml_time_us(); + auto logits = llama_get_logits(ctx); if (params.ignore_eos) { // set the logit of the eos token to zero to avoid sampling it - logits[logits.size() - n_vocab + EOS_TOKEN_ID] = 0; + //logits[logits.size() - n_vocab + EOS_TOKEN_ID] = 0; + // TODO: this does not work of params.logits_all == true + assert(params.perplexity == false); + logits[llama_token_eos()] = 0; } - id = llama_sample_top_p_top_k(vocab, logits.data() + (logits.size() - n_vocab), last_n_tokens, repeat_penalty, top_k, top_p, temp, rng); + id = llama_sample_top_p_top_k(ctx, last_n_tokens.data(), last_n_tokens.size(), top_k, top_p, temp, repeat_penalty); last_n_tokens.erase(last_n_tokens.begin()); last_n_tokens.push_back(id); - - t_sample_us += ggml_time_us() - t_start_sample_us; } // add it to the context @@ -1156,7 +376,7 @@ int main(int argc, char ** argv) { // display text if (!input_noecho) { for (auto id : embd) { - printf("%s", vocab.id_to_token[id].tok.c_str()); + printf("%s", llama_token_to_str(ctx, id)); } fflush(stdout); } @@ -1171,7 +391,7 @@ int main(int argc, char ** argv) { // check for reverse prompt std::string last_output; for (auto id : last_n_tokens) { - last_output += vocab.id_to_token[id].tok; + last_output += llama_token_to_str(ctx, id); } // Check if each of the reverse prompts appears at the end of the output. @@ -1208,7 +428,7 @@ int main(int argc, char ** argv) { // done taking input, reset color set_console_state(CONSOLE_STATE_DEFAULT); - std::vector line_inp = ::llama_tokenize(vocab, buffer, false); + auto line_inp = ::llama_tokenize(ctx, buffer, false); embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end()); if (params.instruct) { @@ -1223,7 +443,7 @@ int main(int argc, char ** argv) { } // end of text token - if (embd.back() == EOS_TOKEN_ID) { + if (embd.back() == llama_token_eos()) { if (params.interactive) { is_interacting = true; } else { @@ -1243,19 +463,9 @@ int main(int argc, char ** argv) { signal(SIGINT, SIG_DFL); #endif - // report timing - { - const int64_t t_main_end_us = ggml_time_us(); + llama_print_timings(ctx); - fprintf(stderr, "\n\n"); - fprintf(stderr, "%s: mem per token = %8zu bytes\n", __func__, mem_per_token); - fprintf(stderr, "%s: load time = %8.2f ms\n", __func__, t_load_us/1000.0f); - fprintf(stderr, "%s: sample time = %8.2f ms\n", __func__, t_sample_us/1000.0f); - fprintf(stderr, "%s: predict time = %8.2f ms / %.2f ms per token\n", __func__, t_predict_us/1000.0f, t_predict_us/1000.0f/n_past); - fprintf(stderr, "%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us)/1000.0f); - } - - ggml_free(model.ctx); + llama_free(ctx); set_console_state(CONSOLE_STATE_DEFAULT); diff --git a/models/ggml-vocab.bin b/models/ggml-vocab.bin index aba94bd10a4ac08a05b3eca27d6ba0d8d5115e65..3651f708e80eaa74f2f0004bbcfd8744b15e48e0 100644 GIT binary patch delta 63 zcmX@KS?bYdDW$aB^mIlBAgBdm0U!o)6o3>6HvlmsLnBiw6JskAkYsLUVrgY!-O9vv GU?u>K7Ym&L delta 31 ncmaE~S?bVcDJF*6Mx|CI##SY!Rwd?EC6-nt)~!lx2WA2Qt3?Uv diff --git a/quantize.cpp b/quantize.cpp index 52b7ac9b3..f0230f5dc 100644 --- a/quantize.cpp +++ b/quantize.cpp @@ -1,319 +1,17 @@ #include "ggml.h" +#include "llama.h" -#include "utils.h" - -#include -#include -#include #include -#include -#include #include -#include -#include -// TODO: move somewhere else -#define QK 32 - -// default hparams (LLaMA76B) -struct llama_hparams { - int32_t n_vocab = 32000; - int32_t n_ctx = 512; // this is provided as user input? - int32_t n_embd = 4096; - int32_t n_mult = 256; - int32_t n_head = 32; - int32_t n_layer = 32; - int32_t n_rot = 64; - int32_t f16 = 1; -}; - - -// quantize a model -bool llama_model_quantize(const std::string & fname_inp, const std::string & fname_out, int itype) { - ggml_type type = GGML_TYPE_Q4_1; - - switch (itype) { - case 2: type = GGML_TYPE_Q4_0; break; - case 3: type = GGML_TYPE_Q4_1; break; - default: fprintf(stderr, "%s: invalid quantization type %d\n", __func__, itype); return 1; - }; - - if (type != GGML_TYPE_Q4_0 && type != GGML_TYPE_Q4_1) { - fprintf(stderr, "%s: invalid quantization type %d\n", __func__, type); - return false; - } - - llama_vocab vocab; - - printf("%s: loading model from '%s'\n", __func__, fname_inp.c_str()); - - auto finp = std::ifstream(fname_inp, std::ios::binary); - if (!finp) { - fprintf(stderr, "%s: failed to open '%s' for reading\n", __func__, fname_inp.c_str()); - return false; - } - - auto fout = std::ofstream(fname_out, std::ios::binary); - if (!fout) { - fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname_out.c_str()); - return false; - } - - // verify magic - { - uint32_t magic; - finp.read((char *) &magic, sizeof(magic)); - if (magic == FILE_MAGIC_UNVERSIONED) { - fprintf(stderr, "%s: invalid model file '%s' (too old, regenerate your model files!)\n", - __func__, fname_inp.c_str()); - return false; - } - if (magic != FILE_MAGIC) { - fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname_inp.c_str()); - return false; - } - - fout.write((char *) &magic, sizeof(magic)); - - uint32_t format_version; - finp.read((char *) &format_version, sizeof(format_version)); - - if (format_version != FILE_VERSION) { - fprintf(stderr, "%s: invalid model file '%s' (unsupported format version %" PRIu32 ", expected %d)\n", - __func__, fname_inp.c_str(), format_version, FILE_VERSION); - return false; - } - - fout.write((char *) &format_version, sizeof(format_version)); - } - - llama_hparams hparams; - - // load hparams - { - finp.read((char *) &hparams.n_vocab, sizeof(hparams.n_vocab)); - //finp.read((char *) &hparams.n_ctx, sizeof(hparams.n_ctx)); - finp.read((char *) &hparams.n_embd, sizeof(hparams.n_embd)); - finp.read((char *) &hparams.n_mult, sizeof(hparams.n_mult)); - finp.read((char *) &hparams.n_head, sizeof(hparams.n_head)); - finp.read((char *) &hparams.n_layer, sizeof(hparams.n_layer)); - finp.read((char *) &hparams.n_rot, sizeof(hparams.n_rot)); - finp.read((char *) &hparams.f16, sizeof(hparams.f16)); - - printf("%s: n_vocab = %d\n", __func__, hparams.n_vocab); - printf("%s: n_ctx = %d\n", __func__, hparams.n_ctx); - printf("%s: n_embd = %d\n", __func__, hparams.n_embd); - printf("%s: n_mult = %d\n", __func__, hparams.n_mult); - printf("%s: n_head = %d\n", __func__, hparams.n_head); - printf("%s: n_layer = %d\n", __func__, hparams.n_layer); - printf("%s: f16 = %d\n", __func__, hparams.f16); - - fout.write((char *) &hparams.n_vocab, sizeof(hparams.n_vocab)); - //fout.write((char *) &hparams.n_ctx, sizeof(hparams.n_ctx)); - fout.write((char *) &hparams.n_embd, sizeof(hparams.n_embd)); - fout.write((char *) &hparams.n_mult, sizeof(hparams.n_mult)); - fout.write((char *) &hparams.n_head, sizeof(hparams.n_head)); - fout.write((char *) &hparams.n_layer, sizeof(hparams.n_layer)); - fout.write((char *) &hparams.n_rot, sizeof(hparams.n_rot)); - fout.write((char *) &itype, sizeof(hparams.f16)); - } - - // load vocab - { - const int32_t n_vocab = hparams.n_vocab; - - if (n_vocab != hparams.n_vocab) { - fprintf(stderr, "%s: invalid model file '%s' (bad vocab size %d != %d)\n", - __func__, fname_inp.c_str(), n_vocab, hparams.n_vocab); - return false; - } - - std::string word; - vocab.id_to_token.resize(n_vocab); - for (int i = 0; i < n_vocab; i++) { - uint32_t len; - finp.read ((char *) &len, sizeof(len)); - fout.write((char *) &len, sizeof(len)); - - word.resize(len); - finp.read ((char *) word.data(), len); - fout.write((char *) word.data(), len); - - float score; - finp.read ((char *) &score, sizeof(score)); - fout.write((char *) &score, sizeof(score)); - - vocab.token_to_id[word] = i; - - auto &tok_score = vocab.id_to_token[i]; - tok_score.tok = word; - tok_score.score = score; - } - } - - // load weights - { - size_t total_size_org = 0; - size_t total_size_new = 0; - - std::vector work; - - std::vector data_u8; - std::vector data_f16; - std::vector data_f32; - - std::vector hist_all(1 << 4, 0); - - while (true) { - int32_t n_dims; - int32_t length; - int32_t ftype; - - finp.read(reinterpret_cast(&n_dims), sizeof(n_dims)); - finp.read(reinterpret_cast(&length), sizeof(length)); - finp.read(reinterpret_cast(&ftype), sizeof(ftype)); - - if (finp.eof()) { - break; - } - - int32_t nelements = 1; - int32_t ne[2] = { 1, 1 }; - for (int i = 0; i < n_dims; ++i) { - finp.read (reinterpret_cast(&ne[i]), sizeof(ne[i])); - nelements *= ne[i]; - } - - std::string name(length, 0); - finp.read (&name[0], length); - - { - static const char * ftype_str[] = { "f32", "f16", "q4_0", "q4_1", }; - printf("%48s - [%5d, %5d], type = %6s ", name.data(), ne[0], ne[1], ftype_str[ftype]); - } - - // regexes of tensor names to be quantized - const std::vector k_names = { - ".*weight", - }; - - bool quantize = false; - for (const auto & s : k_names) { - if (std::regex_match(name, std::regex(s))) { - quantize = true; - break; - } - } - - // quantize only 2D tensors - quantize &= (n_dims == 2); - - if (quantize) { - if (ftype != 0 && ftype != 1) { - fprintf(stderr, "%s: unsupported ftype %d for integer quantization\n", __func__, ftype); - return false; - } - - if (ftype == 1) { - data_f16.resize(nelements); - finp.read(reinterpret_cast(data_f16.data()), nelements * sizeof(ggml_fp16_t)); - data_f32.resize(nelements); - for (int i = 0; i < nelements; ++i) { - data_f32[i] = ggml_fp16_to_fp32(data_f16[i]); - } - } else { - data_f32.resize(nelements); - finp.read(reinterpret_cast(data_f32.data()), nelements * sizeof(float)); - } - - ftype = itype; - } else { - const int bpe = (ftype == 0) ? sizeof(float) : sizeof(uint16_t); - - data_u8.resize(nelements*bpe); - finp.read(reinterpret_cast(data_u8.data()), nelements * bpe); - } - - fout.write(reinterpret_cast(&n_dims), sizeof(n_dims)); - fout.write(reinterpret_cast(&length), sizeof(length)); - fout.write(reinterpret_cast(&ftype), sizeof(ftype)); - for (int i = 0; i < n_dims; ++i) { - fout.write(reinterpret_cast(&ne[i]), sizeof(ne[i])); - } - fout.write(&name[0], length); - - if (quantize) { - printf("quantizing .. "); - work.resize(nelements); // for quantization - - size_t cur_size = 0; - std::vector hist_cur(1 << 4, 0); - - switch (type) { - case GGML_TYPE_Q4_0: - { - cur_size = ggml_quantize_q4_0(data_f32.data(), work.data(), nelements, ne[0], QK, hist_cur.data()); - } break; - case GGML_TYPE_Q4_1: - { - cur_size = ggml_quantize_q4_1(data_f32.data(), work.data(), nelements, ne[0], QK, hist_cur.data()); - } break; - default: - { - fprintf(stderr, "%s: unsupported quantization type %d\n", __func__, type); - return false; - } - } - - fout.write(reinterpret_cast(work.data()), cur_size); - total_size_new += cur_size; - - printf("size = %8.2f MB -> %8.2f MB | hist: ", nelements * sizeof(float)/1024.0/1024.0, cur_size/1024.0/1024.0); - for (int i = 0; i < hist_cur.size(); ++i) { - hist_all[i] += hist_cur[i]; - } - - for (int i = 0; i < hist_cur.size(); ++i) { - printf("%5.3f ", hist_cur[i] / (float)nelements); - } - printf("\n"); - } else { - printf("size = %8.3f MB\n", data_u8.size()/1024.0/1024.0); - fout.write(reinterpret_cast(data_u8.data()), data_u8.size()); - total_size_new += data_u8.size(); - } - - total_size_org += nelements * sizeof(float); - } - - printf("%s: model size = %8.2f MB\n", __func__, total_size_org/1024.0/1024.0); - printf("%s: quant size = %8.2f MB\n", __func__, total_size_new/1024.0/1024.0); - - { - int64_t sum_all = 0; - for (int i = 0; i < hist_all.size(); ++i) { - sum_all += hist_all[i]; - } - - printf("%s: hist: ", __func__); - for (int i = 0; i < hist_all.size(); ++i) { - printf("%5.3f ", hist_all[i] / (float)sum_all); - } - printf("\n"); - } - } - - finp.close(); - fout.close(); - - return true; -} +const int QK = 32; // usage: // ./llama-quantize models/llama/ggml-model.bin models/llama/ggml-model-quant.bin type // int main(int argc, char ** argv) { ggml_time_init(); + if (argc != 4) { fprintf(stderr, "usage: %s model-f32.bin model-quant.bin type\n", argv[0]); fprintf(stderr, " type = 2 - q4_0\n"); @@ -341,7 +39,7 @@ int main(int argc, char ** argv) { { const int64_t t_start_us = ggml_time_us(); - if (!llama_model_quantize(fname_inp, fname_out, itype)) { + if (llama_model_quantize(fname_inp.c_str(), fname_out.c_str(), itype, QK)) { fprintf(stderr, "%s: failed to quantize model from '%s'\n", __func__, fname_inp.c_str()); return 1; } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index a2c1e3fa2..4990c3432 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,4 +1,4 @@ set(TEST_TARGET test-tokenizer-0) add_executable(${TEST_TARGET} ${TEST_TARGET}.cpp) -target_link_libraries(${TEST_TARGET} PRIVATE utils) +target_link_libraries(${TEST_TARGET} PRIVATE llama ggml utils) add_test(NAME ${TEST_TARGET} COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab.bin) diff --git a/tests/test-tokenizer-0.cpp b/tests/test-tokenizer-0.cpp index 6bc49f281..49bc232b6 100644 --- a/tests/test-tokenizer-0.cpp +++ b/tests/test-tokenizer-0.cpp @@ -1,10 +1,11 @@ #include "utils.h" +#include "llama.h" #include #include #include -static const std::map> k_tests = { +static const std::map> k_tests = { { "Hello World", { 1, 10994, 2787, }, }, { " Hello World", { 1, 15043, 2787, }, }, { " Hello World!", { 1, 15043, 2787, 29991, }, }, @@ -23,14 +24,23 @@ int main(int argc, char **argv) { fprintf(stderr, "%s : reading vocab from: '%s'\n", __func__, fname.c_str()); - llama_vocab vocab; + llama_context * ctx; - if (!llama_vocab_load(fname, vocab)) { - fprintf(stderr, "%s : failed to load vocab from: '%s'\n", __func__, fname.c_str()); - return 1; + // load the vocab + { + auto lparams = llama_context_default_params(); + + lparams.vocab_only = true; + + ctx = llama_init_from_file(fname.c_str(), lparams); + + if (ctx == NULL) { + fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str()); + return 1; + } } - const int n_vocab = vocab.id_to_token.size(); + const int n_vocab = llama_n_vocab(ctx); if (n_vocab != 32000) { fprintf(stderr, "%s : expected 32000 tokens, got %d\n", __func__, n_vocab); @@ -38,7 +48,7 @@ int main(int argc, char **argv) { } for (const auto & test_kv : k_tests) { - const auto res = llama_tokenize(vocab, test_kv.first, true); + const auto res = ::llama_tokenize(ctx, test_kv.first, true); bool correct = res.size() == test_kv.second.size(); diff --git a/utils.cpp b/utils.cpp index b15c68ade..f9c4c6848 100644 --- a/utils.cpp +++ b/utils.cpp @@ -3,12 +3,9 @@ #include #include #include -#include -#include -#include -#include #include -#include +#include +#include #if defined(_MSC_VER) || defined(__MINGW32__) #include // using malloc.h with MSC/MINGW @@ -147,509 +144,11 @@ std::string gpt_random_prompt(std::mt19937 & rng) { return "The"; } -void replace(std::string & str, const std::string & needle, const std::string & replacement) { - size_t pos = 0; - while ((pos = str.find(needle, pos)) != std::string::npos) { - str.replace(pos, needle.length(), replacement); - pos += replacement.length(); - } -} - -std::unordered_map json_parse(const std::string & fname) { - std::unordered_map result; - - // read file into string - std::string json; - { - std::ifstream ifs(fname); - if (!ifs) { - fprintf(stderr, "Failed to open %s\n", fname.c_str()); - exit(1); - } - - json = std::string((std::istreambuf_iterator(ifs)), - (std::istreambuf_iterator())); - } - - if (json[0] != '{') { - return result; - } - - // parse json - { - bool has_key = false; - bool in_token = false; - - std::string str_key = ""; - std::string str_val = ""; - - int n = json.size(); - for (int i = 1; i < n; ++i) { - if (!in_token) { - if (json[i] == ' ') continue; - if (json[i] == '"') { - in_token = true; - continue; - } - } else { - if (json[i] == '\\' && i+1 < n) { - if (has_key == false) { - str_key += json[i]; - } else { - str_val += json[i]; - } - ++i; - } else if (json[i] == '"') { - if (has_key == false) { - has_key = true; - ++i; - while (json[i] == ' ') ++i; - ++i; // : - while (json[i] == ' ') ++i; - if (json[i] != '\"') { - while (json[i] != ',' && json[i] != '}') { - str_val += json[i++]; - } - has_key = false; - } else { - in_token = true; - continue; - } - } else { - has_key = false; - } - - ::replace(str_key, "\\u0120", " " ); // \u0120 -> space - ::replace(str_key, "\\u010a", "\n"); // \u010a -> new line - ::replace(str_key, "\\\"", "\""); // \\\" -> " - - try { - result[str_key] = std::stoi(str_val); - } catch (...) { - //fprintf(stderr, "%s: ignoring key '%s' with value '%s'\n", fname.c_str(), str_key.c_str(), str_val.c_str()); - - } - str_key = ""; - str_val = ""; - in_token = false; - continue; - } - if (has_key == false) { - str_key += json[i]; - } else { - str_val += json[i]; - } - } - } - } - - return result; -} - -static size_t utf8_len(char src) { - const size_t lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 }; - uint8_t highbits = static_cast(src) >> 4; - return lookup[highbits]; -} - -struct llama_sp_symbol { - using index = int; - index prev; - index next; - const char * text; - size_t n; -}; - -struct llama_sp_bigram { - struct comparator { - bool operator()(llama_sp_bigram & l, llama_sp_bigram & r) { - return (l.score < r.score) || (l.score == r.score && l.left > r.left); - } - }; - using queue_storage = std::vector; - using queue = std::priority_queue; - llama_sp_symbol::index left; - llama_sp_symbol::index right; - float score; - size_t size; -}; - -// original implementation: -// https://github.com/ggerganov/llama.cpp/commit/074bea2eb1f1349a0118239c4152914aecaa1be4 -struct llama_tokenizer { - llama_tokenizer(const llama_vocab & vocab): vocab_(vocab) {} - - void tokenize(const std::string & text, std::vector & output) { - // split string into utf8 chars - int index = 0; - size_t offs = 0; - while (offs < text.size()) { - llama_sp_symbol sym; - size_t char_len = std::min(text.size() - offs, utf8_len(text[offs])); - sym.text = text.c_str() + offs; - sym.n = char_len; - offs += char_len; - sym.prev = index - 1; - sym.next = offs == text.size() ? -1 : index + 1; - index++; - symbols_.emplace_back(std::move(sym)); - } - - // seed the work queue with all possible 2-character tokens. - for (size_t i = 1; i < symbols_.size(); ++i) { - try_add_bigram(i - 1, i); - } - - // keep substituting the highest frequency pairs for as long as we can. - while (!work_queue_.empty()) { - auto bigram = work_queue_.top(); - work_queue_.pop(); - - auto & left_sym = symbols_[bigram.left]; - auto & right_sym = symbols_[bigram.right]; - - // if one of the symbols already got merged, skip it. - if (left_sym.n == 0 || right_sym.n == 0 || - left_sym.n + right_sym.n != bigram.size) { - continue; - } - - // merge the right sym into the left one - left_sym.n += right_sym.n; - right_sym.n = 0; - - //printf("left = '%*s' size = %zu\n", (int) left_sym.n, left_sym.text, bigram.size); - - // remove the right sym from the chain - left_sym.next = right_sym.next; - if (right_sym.next >= 0) { - symbols_[right_sym.next].prev = bigram.left; - } - - // find more substitutions - try_add_bigram(left_sym.prev, bigram.left); - try_add_bigram(bigram.left, left_sym.next); - } - - for (int i = 0; i != -1; i = symbols_[i].next) { - auto & symbol = symbols_[i]; - auto token = vocab_.token_to_id.find(std::string(symbol.text, symbol.n)); - - if (token == vocab_.token_to_id.end()) { - // output any symbols that did not form tokens as bytes. - for (int j = 0; j < (int) symbol.n; ++j) { - llama_vocab::id token_id = static_cast(symbol.text[j]) + 3; - output.push_back(token_id); - } - } else { - output.push_back((*token).second); - } - } - } - -private: - void try_add_bigram(int left, int right) { - if (left == -1 || right == -1) { - return; - } - - const std::string text = std::string(symbols_[left].text, symbols_[left].n + symbols_[right].n); - auto token = vocab_.token_to_id.find(text); - - if (token == vocab_.token_to_id.end()) { - return; - } - - if (static_cast((*token).second) >= vocab_.id_to_token.size()) { - return; - } - - const auto &tok_score = vocab_.id_to_token[(*token).second]; - - llama_sp_bigram bigram; - bigram.left = left; - bigram.right = right; - bigram.score = tok_score.score; - bigram.size = text.size(); - work_queue_.push(bigram); - } - - const llama_vocab & vocab_; - std::vector symbols_; - llama_sp_bigram::queue work_queue_; -}; - -// TODO: temporary code duplication with llama.cpp -// will resolve after #77 is merged -bool llama_vocab_load(const std::string & fname, llama_vocab & vocab) { - std::ifstream fin(fname, std::ios::binary); - if (!fin.is_open()) { - return false; - } - - int n_vocab = 0; - fin.read((char *) &n_vocab, sizeof(n_vocab)); - - std::string word; - std::vector tmp(64); - - vocab.id_to_token.resize(n_vocab); - - for (int i = 0; i < n_vocab; i++) { - uint32_t len; - fin.read((char *) &len, sizeof(len)); - - word.resize(len); - if (len > 0) { - tmp.resize(len); - fin.read(tmp.data(), len); - word.assign(tmp.data(), len); - } else { - word.clear(); - } - - float score; - fin.read((char *) &score, sizeof(score)); - - vocab.token_to_id[word] = i; - - auto &tok_score = vocab.id_to_token[i]; - tok_score.tok = word; - tok_score.score = score; - } - - return true; -} - -std::vector llama_tokenize(const llama_vocab & vocab, const std::string & text, bool bos) { - llama_tokenizer tokenizer(vocab); - std::vector output; - - if (text.size() == 0) { - return output; - } - - if (bos) { - output.push_back(1); - } - - tokenizer.tokenize(text, output); - return output; -} - -void sample_top_k(std::vector> & logits_id, int top_k) { - // find the top K tokens - std::partial_sort( - logits_id.begin(), - logits_id.begin() + top_k, logits_id.end(), - [](const std::pair & a, const std::pair & b) { - return a.first > b.first; - }); - - logits_id.resize(top_k); -} - -llama_vocab::id llama_sample_top_p_top_k( - const llama_vocab & vocab, - const float * logits, - std::vector & last_n_tokens, - double repeat_penalty, - int top_k, - double top_p, - double temp, - std::mt19937 & rng) { - int n_logits = vocab.id_to_token.size(); - - std::vector> logits_id; - logits_id.reserve(n_logits); - - { - const double scale = 1.0/temp; - for (int i = 0; i < n_logits; ++i) { - // repetition penalty from CTRL paper (https://arxiv.org/abs/1909.05858) - // credit https://github.com/facebookresearch/llama/compare/main...shawwn:llama:main - if (std::find(last_n_tokens.begin(), last_n_tokens.end(), i) != last_n_tokens.end()) { - // if score < 0 then repetition penalty has to multiplied to reduce the previous token probability - if (logits[i] < 0.0) { - logits_id.push_back(std::make_pair(logits[i]*scale*repeat_penalty, i)); - } else { - logits_id.push_back(std::make_pair(logits[i]*scale/repeat_penalty, i)); - } - } else { - logits_id.push_back(std::make_pair(logits[i]*scale, i)); - } - } - } - - sample_top_k(logits_id, top_k); - - double maxl = -INFINITY; - for (const auto & kv : logits_id) { - maxl = std::max(maxl, kv.first); - } - - // compute probs for the top K tokens - std::vector probs; - probs.reserve(logits_id.size()); - - double sum = 0.0; - for (const auto & kv : logits_id) { - double p = exp(kv.first - maxl); - probs.push_back(p); - sum += p; - } - - // normalize the probs - for (auto & p : probs) { - p /= sum; - } - - if (top_p < 1.0f) { - double cumsum = 0.0f; - for (int i = 0; i < (int) probs.size(); i++) { - cumsum += probs[i]; - if (cumsum >= top_p) { - probs.resize(i + 1); - logits_id.resize(i + 1); - break; - } - } - - cumsum = 1.0/cumsum; - for (int i = 0; i < (int) probs.size(); i++) { - probs[i] *= cumsum; - } - } - - //printf("\n"); - //for (int i = 0; i < (int) 10; i++) { - // printf("%d: '%s' %f\n", i, vocab.id_to_token.at(logits_id[i].second).c_str(), probs[i]); - //} - //printf("\n\n"); - //exit(0); - - std::discrete_distribution<> dist(probs.begin(), probs.end()); - int idx = dist(rng); - - return logits_id[idx].second; -} - - -size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t * hist) { - const int nb = k / qk; - const size_t bs = (sizeof(float) + sizeof(uint8_t)*qk/2); - const size_t row_size = nb*bs; - - assert(k % qk == 0); - - const size_t pp_size = qk / 2; - uint8_t *pp = static_cast(alloca(pp_size)); - - char * pdst = (char *) dst; - - for (int j = 0; j < n; j += k) { - uint8_t * pd = (uint8_t *) (pdst + (j/k)*row_size + 0*bs); - uint8_t * pb = (uint8_t *) (pdst + (j/k)*row_size + 0*bs + sizeof(float)); - - for (int i = 0; i < nb; i++) { - float amax = 0.0f; // absolute max - - { - for (int l = 0; l < qk; l++) { - const float v = src[j + i*qk + l]; - amax = std::max(amax, fabsf(v)); - } - - const float d = amax / ((1 << 3) - 1); - const float id = d ? 1.0f/d : 0.0f; - - *(float *) pd = d; - pd += bs; - - for (int l = 0; l < qk; l += 2) { - const float v0 = (src[j + i*qk + l + 0])*id; - const float v1 = (src[j + i*qk + l + 1])*id; - - const uint8_t vi0 = ((int8_t) (round(v0))) + 8; - const uint8_t vi1 = ((int8_t) (round(v1))) + 8; - - assert(vi0 >= 0 && vi0 < 16); - assert(vi1 >= 0 && vi1 < 16); - - hist[vi0]++; - hist[vi1]++; - - pp[l/2] = vi0 | (vi1 << 4); - } - - memcpy(pb, pp, pp_size); - pb += bs; - } - } - } - - return (n/k)*row_size; -} - -size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t * hist) { - const int nb = k / qk; - const size_t bs = (2*sizeof(float) + sizeof(uint8_t)*qk/2); - const size_t row_size = nb*bs; - - assert(k % qk == 0); - - const size_t pp_size = qk / 2; - uint8_t *pp = static_cast(alloca(pp_size)); - - char * pdst = (char *) dst; - - for (int j = 0; j < n; j += k) { - uint8_t * pd = (uint8_t *) (pdst + (j/k)*row_size + 0*bs); - uint8_t * pm = (uint8_t *) (pdst + (j/k)*row_size + 0*bs + sizeof(float)); - uint8_t * pb = (uint8_t *) (pdst + (j/k)*row_size + 0*bs + 2*sizeof(float)); - - //printf("n = %d, k = %d, nb = %d, row_size = %d, j = %d, pm = %p, pd = %p, pb = %p\n", n, k, nb, row_size, j, pm, pd, pb); - - for (int i = 0; i < nb; i++) { - float min = std::numeric_limits::max(); - float max = std::numeric_limits::min(); - - { - for (int l = 0; l < qk; l++) { - const float v = src[j + i*qk + l]; - if (v < min) min = v; - if (v > max) max = v; - } - - const float d = (max - min) / ((1 << 4) - 1); - const float id = d ? 1.0f/d : 0.0f; - - *(float *) pd = d; - *(float *) pm = min; - pd += bs; - pm += bs; - - for (int l = 0; l < qk; l += 2) { - const float v0 = (src[j + i*qk + l + 0] - min)*id; - const float v1 = (src[j + i*qk + l + 1] - min)*id; - - const uint8_t vi0 = round(v0); - const uint8_t vi1 = round(v1); - - assert(vi0 >= 0 && vi0 < 16); - assert(vi1 >= 0 && vi1 < 16); - - hist[vi0]++; - hist[vi1]++; - - pp[l/2] = vi0 | (vi1 << 4); - } - - memcpy(pb, pp, pp_size); - pb += bs; - } - } - } - - return (n/k)*row_size; +// TODO: not great allocating this every time +std::vector llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos) { + std::vector res(8096); + int n = llama_tokenize(ctx, text.c_str(), res.data(), res.size(), add_bos); + res.resize(n); + + return res; } diff --git a/utils.h b/utils.h index 312903859..3f970eabb 100644 --- a/utils.h +++ b/utils.h @@ -2,8 +2,9 @@ #pragma once +#include "llama.h" + #include -#include #include #include #include @@ -49,64 +50,8 @@ void gpt_print_usage(int argc, char ** argv, const gpt_params & params); std::string gpt_random_prompt(std::mt19937 & rng); -// -// Model file parsing -// - -#define FILE_MAGIC_UNVERSIONED 0x67676d6c // pre-versioned files -#define FILE_MAGIC 0x67676d66 // 'ggmf' in hex -#define FILE_VERSION 1 - // // Vocab utils // -struct llama_vocab { - using id = int32_t; - using token = std::string; - - struct token_score { - token tok; - float score; - }; - - std::unordered_map token_to_id; - std::vector id_to_token; -}; - -void replace(std::string & str, const std::string & needle, const std::string & replacement); - -// poor-man's JSON parsing -std::unordered_map json_parse(const std::string & fname); - -// TODO: temporary until #77 is merged, need this now for some tokenizer tests -bool llama_vocab_load(const std::string & fname, llama_vocab & vocab); - -// TODO: this is probably wrong, but I cannot figure out how this tokenizer works .. -// ref: https://github.com/google/sentencepiece -std::vector llama_tokenize(const llama_vocab & vocab, const std::string & text, bool bos); - -// sample next token given probabilities for each embedding -// -// - consider only the top K tokens -// - from them, consider only the top tokens with cumulative probability > P -// -llama_vocab::id llama_sample_top_p_top_k( - const llama_vocab & vocab, - const float * logits, - std::vector & last_n_tokens, - double repeat_penalty, - int top_k, - double top_p, - double temp, - std::mt19937 & rng); - -// filer to top K tokens from list of logits -void sample_top_k(std::vector> & logits_id, int top_k); - -// -// Quantization -// - -size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t * hist); -size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t * hist); +std::vector llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos); From 56817b1f882b1894daa4051d0de0bf9a0926d315 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 22 Mar 2023 07:34:02 +0200 Subject: [PATCH 82/93] Remove temporary notice and update hot topics --- README.md | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/README.md b/README.md index fa70499b4..7c9a4bf49 100644 --- a/README.md +++ b/README.md @@ -5,17 +5,9 @@ Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++ ---- - -**TEMPORARY NOTICE:** -Big code change incoming: https://github.com/ggerganov/llama.cpp/pull/370 - -Do not merge stuff until we merge this. Probably merge will happen on March 22 ~6:00am UTC - ---- - **Hot topics:** +- New C-style API is now available: https://github.com/ggerganov/llama.cpp/pull/370 - [Added Alpaca support](https://github.com/ggerganov/llama.cpp#instruction-mode-with-alpaca) - Cache input prompts for faster initialization: https://github.com/ggerganov/llama.cpp/issues/64 - Create a `llama.cpp` logo: https://github.com/ggerganov/llama.cpp/issues/105 From 928480ef5b7b03d7a07e98286aebe3d8b24457d9 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 22 Mar 2023 07:45:00 +0200 Subject: [PATCH 83/93] Init llama_context_params properly from CLI (#370) --- llama.cpp | 4 ++++ main.cpp | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index 08dfcb31f..fde4d2516 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1398,6 +1398,10 @@ struct llama_context * llama_init_from_file( llama_context * ctx = new llama_context; + if (params.seed <= 0) { + params.seed = time(NULL); + } + ctx->rng = std::mt19937(params.seed); ctx->logits_all = params.logits_all; diff --git a/main.cpp b/main.cpp index 7db3df7e9..b98c9e2b3 100644 --- a/main.cpp +++ b/main.cpp @@ -194,7 +194,10 @@ int main(int argc, char ** argv) { { auto lparams = llama_context_default_params(); - lparams.f16_kv = params.memory_f16; + lparams.n_ctx = params.n_ctx; + lparams.n_parts = params.n_parts; + lparams.seed = params.seed; + lparams.f16_kv = params.memory_f16; lparams.logits_all = params.perplexity; ctx = llama_init_from_file(params.model.c_str(), lparams); From ae44e23ee36c02da0e37ab508a4b473ace724f8e Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 22 Mar 2023 07:47:15 +0200 Subject: [PATCH 84/93] When seed <= 0 - use the clock to generate one --- main.cpp | 2 +- utils.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/main.cpp b/main.cpp index b98c9e2b3..c164c102d 100644 --- a/main.cpp +++ b/main.cpp @@ -170,7 +170,7 @@ int main(int argc, char ** argv) { "expect poor results\n", __func__, params.n_ctx); } - if (params.seed < 0) { + if (params.seed <= 0) { params.seed = time(NULL); } diff --git a/utils.cpp b/utils.cpp index f9c4c6848..1679ae10a 100644 --- a/utils.cpp +++ b/utils.cpp @@ -101,7 +101,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { fprintf(stderr, " in interactive mode, poll user input upon seeing PROMPT (can be\n"); fprintf(stderr, " specified more than once for multiple prompts).\n"); fprintf(stderr, " --color colorise output to distinguish prompt and user input from generations\n"); - fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1)\n"); + fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1, use random seed for <= 0)\n"); fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads); fprintf(stderr, " -p PROMPT, --prompt PROMPT\n"); fprintf(stderr, " prompt to start generation with (default: empty)\n"); From d5850c53ca179b9674b98f35d359763416a3cc11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yusuf=20Ka=C4=9Fan=20Hano=C4=9Flu?= Date: Wed, 22 Mar 2023 11:55:45 +0300 Subject: [PATCH 85/93] Add missing header for memcpy (#386) fixed: memcpy is not defined --- llama.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llama.cpp b/llama.cpp index fde4d2516..7de3c19c8 100644 --- a/llama.cpp +++ b/llama.cpp @@ -9,6 +9,7 @@ #include #include #include +#include // determine number of model parts based on the dimension static const std::unordered_map LLAMA_N_PARTS = { From 40ea807a972ec7b5a426f034ebfa593b5e7a06ed Mon Sep 17 00:00:00 2001 From: Gary Linscott Date: Wed, 22 Mar 2023 08:53:54 -0700 Subject: [PATCH 86/93] Add details on perplexity to README.md (#395) --- README.md | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7c9a4bf49..b5a113c91 100644 --- a/README.md +++ b/README.md @@ -240,6 +240,40 @@ or `shasum -a 256 --ignore-missing -c SHA256SUMS` on macOS +### Perplexity (Measuring model quality) + +You can pass `--perplexity` as a command line option to measure perplexity over the given prompt. For more background, +see https://huggingface.co/docs/transformers/perplexity. However, in general, lower perplexity is better for LLMs. + +#### Measurements + +https://github.com/ggerganov/llama.cpp/pull/270 is the unofficial tracking page for now. llama.cpp is measuring very well +compared to the baseline implementations. Quantization has a small negative impact to quality, but, as you can see, running +13B at q4_0 beats the 7B f16 model by a significant amount. + +All measurements are done against wikitext2 test dataset (https://paperswithcode.com/dataset/wikitext-2), with default options (512 length context). +Note that the changing the context length will have a significant impact on perplexity (longer context = better perplexity). +``` +Perplexity - model options +5.5985 - 13B, q4_0 +5.9565 - 7B, f16 +6.3001 - 7B, q4_1 +6.5949 - 7B, q4_0 +6.5995 - 7B, q4_0, --memory_f16 +``` + +#### How to run + +1. Download/extract: https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip?ref=salesforce-research +2. Run `./main --perplexity -m models/7B/ggml-model-q4_0.bin -f wiki.test.raw` +3. Output: +``` +Calculating perplexity over 655 chunks +24.43 seconds per pass - ETA 4.45 hours +[1]4.5970,[2]5.1807,[3]6.0382,... +``` +And after 4.45 hours, you will have the final perplexity. + ### Android You can easily run `llama.cpp` on Android device with [termux](https://play.google.com/store/apps/details?id=com.termux). @@ -290,7 +324,6 @@ docker run -v /llama/models:/models ghcr.io/ggerganov/llama.cpp:light -m /models ## Limitations -- We don't know yet how much the quantization affects the quality of the generated text - Probably the token sampling can be improved - The Accelerate framework is actually currently unused since I found that for tensor shapes typical for the Decoder, there is no benefit compared to the ARM_NEON intrinsics implementation. Of course, it's possible that I simply don't From 56e659a0b271436e24813a801640d015e7b05328 Mon Sep 17 00:00:00 2001 From: Erik Scholz Date: Wed, 22 Mar 2023 17:09:38 +0100 Subject: [PATCH 87/93] fix perplexity after c-api refactor (#390) * preallocate a buffer of fitting size for tokenization (utils.cpp) * don't create a new std::string (especially here, where it's usually large) --- main.cpp | 2 +- utils.cpp | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/main.cpp b/main.cpp index c164c102d..fbb43a8cc 100644 --- a/main.cpp +++ b/main.cpp @@ -85,7 +85,7 @@ void perplexity(llama_context * ctx, const gpt_params & params) { // Download: https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip?ref=salesforce-research // Run `./main --perplexity -m models/7B/ggml-model-q4_0.bin -f wiki.test.raw` // Output: `perplexity: 13.5106 [114/114]` - auto tokens = ::llama_tokenize(ctx, params.prompt.c_str(), true); + auto tokens = ::llama_tokenize(ctx, params.prompt, true); int count = 0; double nll = 0.0; diff --git a/utils.cpp b/utils.cpp index 1679ae10a..3909c974f 100644 --- a/utils.cpp +++ b/utils.cpp @@ -146,8 +146,10 @@ std::string gpt_random_prompt(std::mt19937 & rng) { // TODO: not great allocating this every time std::vector llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos) { - std::vector res(8096); + // initialize to prompt numer of chars, since n_tokens <= n_prompt_chars + std::vector res(text.size() + (int)add_bos); int n = llama_tokenize(ctx, text.c_str(), res.data(), res.size(), add_bos); + assert(n >= 0); res.resize(n); return res; From 4122dffff958cd137175b58f1f27c0913528d7ba Mon Sep 17 00:00:00 2001 From: Erik Scholz Date: Wed, 22 Mar 2023 17:37:10 +0100 Subject: [PATCH 88/93] cmake: make llama an actual library (#392) --- CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 400cecf9c..d952afb4f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -217,6 +217,7 @@ add_library(utils OBJECT target_include_directories(utils PUBLIC .) target_compile_features(utils PUBLIC cxx_std_11) # don't bump +target_link_libraries(utils PRIVATE ${LLAMA_EXTRA_LIBS}) add_library(ggml OBJECT ggml.c @@ -226,12 +227,13 @@ target_include_directories(ggml PUBLIC .) target_compile_features(ggml PUBLIC c_std_11) # don't bump target_link_libraries(ggml PRIVATE Threads::Threads ${LLAMA_EXTRA_LIBS}) -add_library(llama OBJECT +add_library(llama llama.cpp llama.h) target_include_directories(llama PUBLIC .) target_compile_features(llama PUBLIC cxx_std_11) # don't bump +target_link_libraries(llama PRIVATE utils ggml ${LLAMA_EXTRA_LIBS}) # # Executables From 305ba6f0e6daa3796aad9dd18053a1945dd4cc58 Mon Sep 17 00:00:00 2001 From: tjohnman Date: Wed, 22 Mar 2023 18:16:35 +0100 Subject: [PATCH 89/93] Don't force immediate interactive without `-i` (#354) * Don't force immediate interactive without -i Sometimes we might want to use a reverse prompt but we want to let the model generate tokens right after the initial prompt. So we don't force user input mode if the -i flag wasn't specified and instead let it run until we encounter the reverse prompt. This gives use some more flexibility, since it doesn't force the user to enter a newline if they want to let the model generate text right after the initial prompt and only be asked for input if the reverse prompt is encountered. The `--interactive-first` flag is reintroduced to force the old behavior. `-r` behaves like `-i` plus introduces a reverse prompt (it can be specified more than once). * Update help output. --------- Co-authored-by: Johnman --- main.cpp | 6 +++++- utils.cpp | 5 ++++- utils.h | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/main.cpp b/main.cpp index fbb43a8cc..4569ef2a1 100644 --- a/main.cpp +++ b/main.cpp @@ -254,6 +254,10 @@ int main(int argc, char ** argv) { params.interactive = true; } + if (params.interactive_start) { + params.interactive = true; + } + fprintf(stderr, "\n"); fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str()); fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size()); @@ -296,7 +300,7 @@ int main(int argc, char ** argv) { #endif " - Press Return to return control to LLaMa.\n" " - If you want to submit another line, end your input in '\\'.\n\n"); - is_interacting = true; + is_interacting = params.interactive_start; } int input_consumed = 0; diff --git a/utils.cpp b/utils.cpp index 3909c974f..1d5309c3a 100644 --- a/utils.cpp +++ b/utils.cpp @@ -63,6 +63,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { params.model = argv[++i]; } else if (arg == "-i" || arg == "--interactive") { params.interactive = true; + } else if (arg == "--interactive-first") { + params.interactive_start = true; } else if (arg == "-ins" || arg == "--instruct") { params.instruct = true; } else if (arg == "--color") { @@ -96,9 +98,10 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { fprintf(stderr, "options:\n"); fprintf(stderr, " -h, --help show this help message and exit\n"); fprintf(stderr, " -i, --interactive run in interactive mode\n"); + fprintf(stderr, " --interactive-first run in interactive mode and wait for input right away\n"); fprintf(stderr, " -ins, --instruct run in instruction mode (use with Alpaca models)\n"); fprintf(stderr, " -r PROMPT, --reverse-prompt PROMPT\n"); - fprintf(stderr, " in interactive mode, poll user input upon seeing PROMPT (can be\n"); + fprintf(stderr, " run in interactive mode and poll user input upon seeing PROMPT (can be\n"); fprintf(stderr, " specified more than once for multiple prompts).\n"); fprintf(stderr, " --color colorise output to distinguish prompt and user input from generations\n"); fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1, use random seed for <= 0)\n"); diff --git a/utils.h b/utils.h index 3f970eabb..b0de556c9 100644 --- a/utils.h +++ b/utils.h @@ -38,7 +38,7 @@ struct gpt_params { bool random_prompt = false; // do not randomize prompt if none provided bool use_color = false; // use color to distinguish generations and inputs bool interactive = false; // interactive mode - bool interactive_start = false; // reverse prompt immediately + bool interactive_start = false; // wait for user input immediately bool instruct = false; // instruction mode (used for Alpaca models) bool ignore_eos = false; // do not stop generating after eos bool perplexity = false; // compute perplexity over the prompt From 97940520e8fd49c56bb29b71cc350190b723513f Mon Sep 17 00:00:00 2001 From: Valentyn Bezshapkin <61702053+valentynbez@users.noreply.github.com> Date: Wed, 22 Mar 2023 18:20:25 +0100 Subject: [PATCH 90/93] fix: add POSIX functionality for Linux compilation (#51) * fix: add POSIX functionality for Linux compilation * fix: older standard for compatibility --- ggml.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ggml.c b/ggml.c index d00544577..7ea9f6228 100644 --- a/ggml.c +++ b/ggml.c @@ -1,3 +1,6 @@ +// Defines CLOCK_MONOTONIC on Linux +#define _POSIX_C_SOURCE 199309L + #include "ggml.h" #if defined(_MSC_VER) || defined(__MINGW32__) From 69c92298a9e36dc2363b3bf50452976ce49487b3 Mon Sep 17 00:00:00 2001 From: Stephan Walter Date: Wed, 22 Mar 2023 17:29:06 +0000 Subject: [PATCH 91/93] Deduplicate q4 quantization functions (#383) * Deduplicate q4 quantization functions * Use const; add basic test * Re-enable quantization test * Disable AVX2 flags in CI --------- Co-authored-by: Georgi Gerganov --- .github/workflows/build.yml | 2 +- ggml.c | 171 ++++++++++++++---------------------- ggml.h | 4 +- tests/CMakeLists.txt | 13 ++- tests/test-quantize.c | 42 +++++++++ 5 files changed, 119 insertions(+), 113 deletions(-) create mode 100644 tests/test-quantize.c diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5882fc747..6ce9cc726 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -89,7 +89,7 @@ jobs: run: | mkdir build cd build - cmake .. + cmake -DLLAMA_AVX2=OFF .. cmake --build . --config Release ctest --output-on-failure diff --git a/ggml.c b/ggml.c index 7ea9f6228..0e4b1466c 100644 --- a/ggml.c +++ b/ggml.c @@ -403,9 +403,55 @@ static inline __m128i packNibbles( __m256i bytes ) // method 5 // blocks of QK elements // represented with a single float (delta) and QK/2 8-bit ints (i.e QK 4-bit signed integer factors) + +// reference implementation for deterministic creation of model files +static void quantize_row_q4_0_reference(const float * restrict x, void * restrict y, int k) { + assert(k % QK == 0); + const int nb = k / QK; + + const size_t bs = sizeof(float) + QK/2; + + uint8_t * restrict pd = ((uint8_t *)y + 0*bs); + uint8_t * restrict pb = ((uint8_t *)y + 0*bs + sizeof(float)); + + uint8_t pp[QK/2]; + + for (int i = 0; i < nb; i++) { + float amax = 0.0f; // absolute max + + for (int l = 0; l < QK; l++) { + const float v = x[i*QK + l]; + amax = MAX(amax, fabsf(v)); + } + + const float d = amax / ((1 << 3) - 1); + const float id = d ? 1.0f/d : 0.0f; + + *(float *)pd = d; + pd += bs; + + for (int l = 0; l < QK; l += 2) { + const float v0 = x[i*QK + l + 0]*id; + const float v1 = x[i*QK + l + 1]*id; + + const uint8_t vi0 = ((int8_t) (round(v0))) + 8; + const uint8_t vi1 = ((int8_t) (round(v1))) + 8; + + assert(vi0 >= 0 && vi0 < 16); + assert(vi1 >= 0 && vi1 < 16); + + pp[l/2] = vi0 | (vi1 << 4); + } + + memcpy(pb, pp, sizeof(pp)); + pb += bs; + } +} + void quantize_row_q4_0(const float * restrict x, void * restrict y, int k) { assert(k % QK == 0); +#if __ARM_NEON || defined(__AVX2__) || defined(__wasm_simd128__) const int nb = k / QK; const size_t bs = sizeof(float) + QK/2; @@ -413,6 +459,7 @@ void quantize_row_q4_0(const float * restrict x, void * restrict y, int k) { uint8_t * restrict pb = ((uint8_t *)y + 0*bs + sizeof(float)); uint8_t pp[QK/2]; +#endif #if __ARM_NEON #if QK == 32 @@ -569,36 +616,7 @@ void quantize_row_q4_0(const float * restrict x, void * restrict y, int k) { #endif #else // scalar - for (int i = 0; i < nb; i++) { - float amax = 0.0f; // absolute max - - for (int l = 0; l < QK; l++) { - const float v = x[i*QK + l]; - amax = MAX(amax, fabsf(v)); - } - - const float d = amax / ((1 << 3) - 1); - const float id = d ? 1.0f/d : 0.0f; - - *(float *)pd = d; - pd += bs; - - for (int l = 0; l < QK; l += 2) { - const float v0 = x[i*QK + l + 0]*id; - const float v1 = x[i*QK + l + 1]*id; - - const uint8_t vi0 = ((int8_t) (round(v0))) + 8; - const uint8_t vi1 = ((int8_t) (round(v1))) + 8; - - assert(vi0 >= 0 && vi0 < 16); - assert(vi1 >= 0 && vi1 < 16); - - pp[l/2] = vi0 | (vi1 << 4); - } - - memcpy(pb, pp, sizeof(pp)); - pb += bs; - } + quantize_row_q4_0_reference(x, y, k); #endif } @@ -10705,119 +10723,60 @@ enum ggml_opt_result ggml_opt( //////////////////////////////////////////////////////////////////////////////// -size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t * hist) { +size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int qk, int64_t * hist) { const int nb = k / qk; const size_t bs = (sizeof(float) + sizeof(uint8_t)*qk/2); const size_t row_size = nb*bs; assert(k % qk == 0); - const size_t pp_size = qk / 2; - uint8_t * pp = (uint8_t *) alloca(pp_size); - char * pdst = (char *) dst; for (int j = 0; j < n; j += k) { uint8_t * pd = (uint8_t *) (pdst + (j/k)*row_size + 0*bs); uint8_t * pb = (uint8_t *) (pdst + (j/k)*row_size + 0*bs + sizeof(float)); + quantize_row_q4_0_reference(src + j, pd, k); + for (int i = 0; i < nb; i++) { - float amax = 0.0f; // absolute max + for (int l = 0; l < qk; l += 2) { + const uint8_t vi0 = pb[l/2] & 0xF; + const uint8_t vi1 = pb[l/2] >> 4; - { - for (int l = 0; l < qk; l++) { - const float v = src[j + i*qk + l]; - amax = MAX(amax, fabsf(v)); - } - - const float d = amax / ((1 << 3) - 1); - const float id = d ? 1.0f/d : 0.0f; - - *(float *) pd = d; - pd += bs; - - for (int l = 0; l < qk; l += 2) { - const float v0 = (src[j + i*qk + l + 0])*id; - const float v1 = (src[j + i*qk + l + 1])*id; - - const uint8_t vi0 = ((int8_t) (round(v0))) + 8; - const uint8_t vi1 = ((int8_t) (round(v1))) + 8; - - assert(vi0 >= 0 && vi0 < 16); - assert(vi1 >= 0 && vi1 < 16); - - hist[vi0]++; - hist[vi1]++; - - pp[l/2] = vi0 | (vi1 << 4); - } - - memcpy(pb, pp, pp_size); - pb += bs; + hist[vi0]++; + hist[vi1]++; } + pb += bs; } } return (n/k)*row_size; } -size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t * hist) { +size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int qk, int64_t * hist) { const int nb = k / qk; const size_t bs = (2*sizeof(float) + sizeof(uint8_t)*qk/2); const size_t row_size = nb*bs; assert(k % qk == 0); - const size_t pp_size = qk / 2; - uint8_t * pp = (uint8_t *) alloca(pp_size); - char * pdst = (char *) dst; for (int j = 0; j < n; j += k) { uint8_t * pd = (uint8_t *) (pdst + (j/k)*row_size + 0*bs); - uint8_t * pm = (uint8_t *) (pdst + (j/k)*row_size + 0*bs + sizeof(float)); uint8_t * pb = (uint8_t *) (pdst + (j/k)*row_size + 0*bs + 2*sizeof(float)); - //printf("n = %d, k = %d, nb = %d, row_size = %d, j = %d, pm = %p, pd = %p, pb = %p\n", n, k, nb, row_size, j, pm, pd, pb); + quantize_row_q4_1(src + j, pd, k); for (int i = 0; i < nb; i++) { - float min = FLT_MAX; - float max = -FLT_MAX; + for (int l = 0; l < qk; l += 2) { + const uint8_t vi0 = pb[l/2] & 0xF; + const uint8_t vi1 = pb[l/2] >> 4; - { - for (int l = 0; l < qk; l++) { - const float v = src[j + i*qk + l]; - if (v < min) min = v; - if (v > max) max = v; - } - - const float d = (max - min) / ((1 << 4) - 1); - const float id = d ? 1.0f/d : 0.0f; - - *(float *) pd = d; - *(float *) pm = min; - pd += bs; - pm += bs; - - for (int l = 0; l < qk; l += 2) { - const float v0 = (src[j + i*qk + l + 0] - min)*id; - const float v1 = (src[j + i*qk + l + 1] - min)*id; - - const uint8_t vi0 = round(v0); - const uint8_t vi1 = round(v1); - - assert(vi0 >= 0 && vi0 < 16); - assert(vi1 >= 0 && vi1 < 16); - - hist[vi0]++; - hist[vi1]++; - - pp[l/2] = vi0 | (vi1 << 4); - } - - memcpy(pb, pp, pp_size); - pb += bs; + hist[vi0]++; + hist[vi1]++; } + pb += bs; } } diff --git a/ggml.h b/ggml.h index 48b6cc028..c7e6814a8 100644 --- a/ggml.h +++ b/ggml.h @@ -745,8 +745,8 @@ enum ggml_opt_result ggml_opt( // quantization // -size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t * hist); -size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t * hist); +size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int qk, int64_t * hist); +size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int qk, int64_t * hist); // // system info diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 4990c3432..6a4170f80 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,4 +1,9 @@ -set(TEST_TARGET test-tokenizer-0) -add_executable(${TEST_TARGET} ${TEST_TARGET}.cpp) -target_link_libraries(${TEST_TARGET} PRIVATE llama ggml utils) -add_test(NAME ${TEST_TARGET} COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab.bin) +function(llama_add_test source) + get_filename_component(TEST_TARGET ${source} NAME_WE) + add_executable(${TEST_TARGET} ${source}) + target_link_libraries(${TEST_TARGET} PRIVATE llama ggml utils) + add_test(NAME ${TEST_TARGET} COMMAND $ ${ARGN}) +endfunction() + +llama_add_test(test-quantize.c) +llama_add_test(test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab.bin) diff --git a/tests/test-quantize.c b/tests/test-quantize.c new file mode 100644 index 000000000..d59ecb8ab --- /dev/null +++ b/tests/test-quantize.c @@ -0,0 +1,42 @@ +#include "ggml.h" +#undef NDEBUG +#include +#include + +int main(void) { + #define QK 32 + float src[QK]; + uint8_t dst[24]; + int64_t hist[16]; + + for (int i = 0; i < QK; i++) { + src[i] = (float)(i + 1); + } + + size_t size = ggml_quantize_q4_0(src, dst, QK, QK, QK, hist); + assert(size == 20); + float max_result = ((float *)dst)[0]; + float max_expected = src[31] / ((1 << 3) - 1); + assert(max_result == max_expected); + for (int i = 0; i < QK; i++) { + uint8_t q4_result = (i % 2) ? (dst[sizeof(float) + i/2] >> 4) : (dst[sizeof(float) + i/2] & 0xF); + uint8_t q4_expected = roundf(src[i] / max_expected) + 8; + assert(q4_result == q4_expected); + } + + size = ggml_quantize_q4_1(src, dst, QK, QK, QK, hist); + assert(size == 24); + float delta_result = ((float *)dst)[0]; + float delta_expected = (src[31] - src[0]) / ((1 << 4) - 1); + assert(delta_result == delta_expected); + float min_result = ((float *)dst)[1]; + float min_expected = src[0]; + assert(min_result == min_expected); + for (int i = 0; i < QK; i++) { + uint8_t q4_result = (i % 2) ? (dst[sizeof(float)*2 + i/2] >> 4) : (dst[sizeof(float)*2 + i/2] & 0xF); + uint8_t q4_expected = roundf((src[i] - min_expected) / delta_expected); + assert(q4_result == q4_expected); + } + + return 0; +} From ee8a7887865a893be208e0a92d6d94d2cb66a789 Mon Sep 17 00:00:00 2001 From: Gary Mulder Date: Wed, 22 Mar 2023 19:06:18 +0000 Subject: [PATCH 92/93] Update issue template so people will use it (#404) --- .github/ISSUE_TEMPLATE/custom.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/custom.md b/.github/ISSUE_TEMPLATE/custom.md index fe6a97bea..72224624c 100644 --- a/.github/ISSUE_TEMPLATE/custom.md +++ b/.github/ISSUE_TEMPLATE/custom.md @@ -1,7 +1,7 @@ --- -name: Custom issue template -about: Used to report user-related issues with the software -title: "[User] I encountered a problem .." +name: Issue and enhancement template +about: Used to report issues and request enhancements for llama.cpp +title: "[User] Insert summary of your issue or enhancement.." labels: '' assignees: '' @@ -18,11 +18,11 @@ Please answer the following questions for yourself before submitting an issue. # Expected Behavior -Please provide a detailed written description of what you were trying to do, and what you expected `lamma.cpp` to do. +Please provide a detailed written description of what you were trying to do, and what you expected `llama.cpp` to do. # Current Behavior -Please provide a detailed written description of what `lamma.cpp` did, instead. +Please provide a detailed written description of what `llama.cpp` did, instead. # Environment and Context From f7dc43bc0d759732815856183246f167111587ad Mon Sep 17 00:00:00 2001 From: tjohnman Date: Thu, 23 Mar 2023 01:30:23 +0100 Subject: [PATCH 93/93] Fix instruct mode broken by PR #354 (#409) Co-authored-by: Johnman --- main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.cpp b/main.cpp index 4569ef2a1..431c94b52 100644 --- a/main.cpp +++ b/main.cpp @@ -300,7 +300,7 @@ int main(int argc, char ** argv) { #endif " - Press Return to return control to LLaMa.\n" " - If you want to submit another line, end your input in '\\'.\n\n"); - is_interacting = params.interactive_start; + is_interacting = params.interactive_start || params.instruct; } int input_consumed = 0;