From c300e68ef490e6cf6c04ed96fd27a6a53ab8a422 Mon Sep 17 00:00:00 2001 From: uvos Date: Wed, 29 Jan 2025 17:46:23 +0100 Subject: [PATCH 01/46] CUDA/HIP: add warp_size to cuda_device_info --- ggml/src/ggml-cuda/common.cuh | 1 + ggml/src/ggml-cuda/ggml-cuda.cu | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh index a66322da0..eec227dce 100644 --- a/ggml/src/ggml-cuda/common.cuh +++ b/ggml/src/ggml-cuda/common.cuh @@ -520,6 +520,7 @@ struct ggml_cuda_device_info { bool vmm; // virtual memory support size_t vmm_granularity; // granularity of virtual memory size_t total_vram; + int warp_size; // Number of threads in a dispatch }; cuda_device_info devices[GGML_CUDA_MAX_DEVICES] = {}; diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index de3f9c2ca..ecf06fec4 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -242,6 +242,7 @@ static ggml_cuda_device_info ggml_cuda_init() { info.devices[id].nsm = prop.multiProcessorCount; info.devices[id].smpb = prop.sharedMemPerBlock; + info.devices[id].warp_size = prop.warpSize; #if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) info.devices[id].smpbo = prop.sharedMemPerBlock; @@ -256,8 +257,9 @@ static ggml_cuda_device_info ggml_cuda_init() { info.devices[id].cc += prop.minor * 0x10; } } - GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s\n", - id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff, device_vmm ? "yes" : "no"); + GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d\n", + id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff, + device_vmm ? "yes" : "no", prop.warpSize); #else info.devices[id].smpbo = prop.sharedMemPerBlockOptin; info.devices[id].cc = 100*prop.major + 10*prop.minor; From 6af1ca48cbdf9a438438afd0a9a549a272bc95bf Mon Sep 17 00:00:00 2001 From: uvos Date: Wed, 29 Jan 2025 19:12:42 +0100 Subject: [PATCH 02/46] HIP: Prepare reduction operators for wave 64 --- ggml/src/ggml-cuda/common.cuh | 59 +++++++++++++++------------------ ggml/src/ggml-cuda/ggml-cuda.cu | 4 +-- 2 files changed, 28 insertions(+), 35 deletions(-) diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh index eec227dce..8d8d3932e 100644 --- a/ggml/src/ggml-cuda/common.cuh +++ b/ggml/src/ggml-cuda/common.cuh @@ -190,53 +190,46 @@ static __device__ void no_device_code( #define NO_DEVICE_CODE //GGML_ABORT("NO_DEVICE_CODE not valid in host code.") #endif // __CUDA_ARCH__ +template static __device__ __forceinline__ int warp_reduce_sum(int x) { #if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= GGML_CUDA_CC_AMPERE return __reduce_add_sync(0xffffffff, x); #else #pragma unroll - for (int offset = 16; offset > 0; offset >>= 1) { - x += __shfl_xor_sync(0xffffffff, x, offset, 32); + for (int offset = width/2; offset > 0; offset >>= 1) { + x += __shfl_xor_sync(0xffffffff, x, offset, width); } return x; #endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= GGML_CUDA_CC_AMPERE } +template static __device__ __forceinline__ float warp_reduce_sum(float x) { #pragma unroll - for (int offset = 16; offset > 0; offset >>= 1) { - x += __shfl_xor_sync(0xffffffff, x, offset, 32); + for (int offset = width/2; offset > 0; offset >>= 1) { + x += __shfl_xor_sync(0xffffffff, x, offset, width); } return x; } +template static __device__ __forceinline__ float2 warp_reduce_sum(float2 a) { #pragma unroll - for (int offset = 16; offset > 0; offset >>= 1) { - a.x += __shfl_xor_sync(0xffffffff, a.x, offset, 32); - a.y += __shfl_xor_sync(0xffffffff, a.y, offset, 32); + for (int offset = width/2; offset > 0; offset >>= 1) { + a.x += __shfl_xor_sync(0xffffffff, a.x, offset, width); + a.y += __shfl_xor_sync(0xffffffff, a.y, offset, width); } return a; } +template static __device__ __forceinline__ half2 warp_reduce_sum(half2 a) { #ifdef FP16_AVAILABLE - -#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) #pragma unroll - for (int offset = 16; offset > 0; offset >>= 1) { - const half2 a_other = __shfl_xor_sync(0xffffffff, a, offset, 32); - reinterpret_cast(a.x) += __low2half(a_other); - reinterpret_cast(a.y) += __high2half(a_other); + for (int offset = width/2; offset > 0; offset >>= 1) { + a = __hadd2(a, __shfl_xor_sync(0xffffffff, a, offset, width)); } return a; -#else -#pragma unroll - for (int offset = 16; offset > 0; offset >>= 1) { - a = __hadd2(a, __shfl_xor_sync(0xffffffff, a, offset, 32)); - } - return a; -#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) #else NO_DEVICE_CODE; @@ -244,10 +237,11 @@ static __device__ __forceinline__ half2 warp_reduce_sum(half2 a) { #endif // FP16_AVAILABLE } +template static __device__ __forceinline__ float warp_reduce_max(float x) { #pragma unroll - for (int offset = 16; offset > 0; offset >>= 1) { - x = fmaxf(x, __shfl_xor_sync(0xffffffff, x, offset, 32)); + for (int offset = width/2; offset > 0; offset >>= 1) { + x = fmaxf(x, __shfl_xor_sync(0xffffffff, x, offset, width)); } return x; } @@ -269,35 +263,34 @@ static __device__ __forceinline__ half ggml_cuda_hmax(const half a, const half b } static __device__ __forceinline__ half2 ggml_cuda_hmax2(const half2 a, const half2 b) { -#if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) - -#if CUDART_VERSION >= CUDART_HMAX +#if defined(GGML_USE_HIP) && HIP_VERSION >= 50700000 + return half2(__hmax(a.x, b.x), __hmax(a.y, b.y)); +#elif !defined(GGML_USE_HIP) && CUDART_VERSION >= CUDART_HMAX return __hmax2(a, b); -#else +#elif !defined(GGML_USE_HIP) half2 ret; reinterpret_cast(ret.x) = __float2half(fmaxf( __low2float(a), __low2float(b))); reinterpret_cast(ret.y) = __float2half(fmaxf(__high2float(a), __high2float(b))); return ret; -#endif // CUDART_VERSION >= CUDART_HMAX - #else GGML_UNUSED(a); GGML_UNUSED(b); NO_DEVICE_CODE; -#endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) +#endif } +template static __device__ __forceinline__ half2 warp_reduce_max(half2 x) { -#if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL +#if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || (defined(GGML_USE_HIP) && HIP_VERSION >= 50700000) #pragma unroll - for (int offset = 16; offset > 0; offset >>= 1) { - x = ggml_cuda_hmax2(x, __shfl_xor_sync(0xffffffff, x, offset, 32)); + for (int offset = width/2; offset > 0; offset >>= 1) { + x = ggml_cuda_hmax2(x, __shfl_xor_sync(0xffffffff, x, offset, width)); } return x; #else GGML_UNUSED(x); NO_DEVICE_CODE; -#endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL +#endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || (defined(GGML_USE_HIP) && HIP_VERSION >= 50700000) } #if CUDART_VERSION < CUDART_HMASK diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index ecf06fec4..383131c77 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -240,8 +240,8 @@ static ggml_cuda_device_info ggml_cuda_init() { info.default_tensor_split[id] = total_vram; total_vram += prop.totalGlobalMem; - info.devices[id].nsm = prop.multiProcessorCount; - info.devices[id].smpb = prop.sharedMemPerBlock; + info.devices[id].nsm = prop.multiProcessorCount; + info.devices[id].smpb = prop.sharedMemPerBlock; info.devices[id].warp_size = prop.warpSize; #if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) info.devices[id].smpbo = prop.sharedMemPerBlock; From 27d135c970c00f655d486f870edacded792bef5c Mon Sep 17 00:00:00 2001 From: uvos Date: Wed, 29 Jan 2025 19:36:00 +0100 Subject: [PATCH 03/46] HIP: require at least HIP 5.5 --- ggml/src/ggml-hip/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ggml/src/ggml-hip/CMakeLists.txt b/ggml/src/ggml-hip/CMakeLists.txt index ecc3bc66d..7a877bdc1 100644 --- a/ggml/src/ggml-hip/CMakeLists.txt +++ b/ggml/src/ggml-hip/CMakeLists.txt @@ -40,6 +40,10 @@ find_package(hip REQUIRED) find_package(hipblas REQUIRED) find_package(rocblas REQUIRED) +if (${hip_VERSION} VERSION_LESS 5.5) + message(FATAL_ERROR "At least ROCM/HIP V5.5 is required") +endif() + message(STATUS "HIP and hipBLAS found") file(GLOB GGML_HEADERS_ROCM "../ggml-cuda/*.cuh") From 8b576b6c55bc4e6be898b47522f0ef402b93ef62 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Thu, 30 Jan 2025 19:13:58 +0000 Subject: [PATCH 04/46] Tool call support (generic + native for Llama, Functionary, Hermes, Mistral, Firefunction, DeepSeek) w/ lazy grammars (#9639) --------- Co-authored-by: Xuan Son Nguyen Co-authored-by: Georgi Gerganov Co-authored-by: Xuan Son Nguyen --- .editorconfig | 8 + .github/workflows/server.yml | 2 +- Makefile | 9 + README.md | 1 + common/CMakeLists.txt | 2 + common/chat.cpp | 848 ++++++++++++++++++ common/chat.hpp | 50 ++ common/common.cpp | 22 +- common/common.h | 17 +- common/json-schema-to-grammar.cpp | 15 +- common/json-schema-to-grammar.h | 9 +- common/sampling.cpp | 11 +- examples/gbnf-validator/gbnf-validator.cpp | 2 +- examples/main/main.cpp | 27 +- examples/server/README.md | 76 ++ examples/server/server.cpp | 187 ++-- examples/server/tests/README.md | 13 +- examples/server/tests/pytest.ini | 4 + examples/server/tests/tests.sh | 11 +- .../server/tests/unit/test_chat_completion.py | 11 +- examples/server/tests/unit/test_tool_call.py | 352 ++++++++ examples/server/tests/utils.py | 6 +- examples/server/utils.hpp | 80 +- include/llama.h | 12 + ...reForAI-c4ai-command-r-plus-tool_use.jinja | 202 +++++ ...rch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja | 152 ++++ ...earch-Hermes-3-Llama-3.1-8B-tool_use.jinja | 152 ++++ .../templates/Qwen-Qwen2.5-7B-Instruct.jinja | 54 ++ ...seek-ai-DeepSeek-R1-Distill-Llama-8B.jinja | 1 + ...seek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja | 56 ++ ...fireworks-ai-llama-3-firefunction-v2.jinja | 57 ++ models/templates/google-gemma-2-2b-it.jinja | 4 + .../meetkai-functionary-medium-v3.1.jinja | 58 ++ .../meetkai-functionary-medium-v3.2.jinja | 287 ++++++ .../meta-llama-Llama-3.1-8B-Instruct.jinja | 109 +++ .../meta-llama-Llama-3.2-3B-Instruct.jinja | 93 ++ .../meta-llama-Llama-3.3-70B-Instruct.jinja | 109 +++ .../microsoft-Phi-3.5-mini-instruct.jinja | 8 + ...mistralai-Mistral-Nemo-Instruct-2407.jinja | 87 ++ scripts/fetch_server_test_models.py | 105 +++ ..._chat_template.py => get_chat_template.py} | 12 +- src/llama-grammar.cpp | 88 +- src/llama-grammar.h | 23 +- src/llama-sampling.cpp | 51 +- tests/CMakeLists.txt | 1 + tests/test-chat-template.cpp | 10 +- tests/test-chat.cpp | 521 +++++++++++ tests/test-grammar-integration.cpp | 2 +- 48 files changed, 3861 insertions(+), 156 deletions(-) create mode 100644 common/chat.cpp create mode 100644 common/chat.hpp create mode 100644 examples/server/tests/pytest.ini create mode 100644 examples/server/tests/unit/test_tool_call.py create mode 100644 models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja create mode 100644 models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja create mode 100644 models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja create mode 100644 models/templates/Qwen-Qwen2.5-7B-Instruct.jinja create mode 100644 models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja create mode 100644 models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja create mode 100644 models/templates/fireworks-ai-llama-3-firefunction-v2.jinja create mode 100644 models/templates/google-gemma-2-2b-it.jinja create mode 100644 models/templates/meetkai-functionary-medium-v3.1.jinja create mode 100644 models/templates/meetkai-functionary-medium-v3.2.jinja create mode 100644 models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja create mode 100644 models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja create mode 100644 models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja create mode 100644 models/templates/microsoft-Phi-3.5-mini-instruct.jinja create mode 100644 models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja create mode 100755 scripts/fetch_server_test_models.py rename scripts/{get_hf_chat_template.py => get_chat_template.py} (86%) mode change 100755 => 100644 create mode 100644 tests/test-chat.cpp diff --git a/.editorconfig b/.editorconfig index eac38a15f..5d63d0a51 100644 --- a/.editorconfig +++ b/.editorconfig @@ -40,3 +40,11 @@ indent_style = tab [examples/cvector-generator/*.txt] trim_trailing_whitespace = unset insert_final_newline = unset + +[models/templates/*.jinja] +indent_style = unset +indent_size = unset +end_of_line = unset +charset = unset +trim_trailing_whitespace = unset +insert_final_newline = unset diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index ed1c357a5..0cbc3d640 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -205,7 +205,7 @@ jobs: run: | cd examples/server/tests $env:PYTHONIOENCODING = ":replace" - pytest -v -x + pytest -v -x -m "not slow" - name: Slow tests id: server_integration_tests_slow diff --git a/Makefile b/Makefile index 295522ba3..ef152d246 100644 --- a/Makefile +++ b/Makefile @@ -52,6 +52,7 @@ TEST_TARGETS = \ tests/test-arg-parser \ tests/test-autorelease \ tests/test-backend-ops \ + tests/test-chat \ tests/test-chat-template \ tests/test-double-float \ tests/test-grammar-integration \ @@ -983,6 +984,7 @@ OBJ_COMMON = \ $(DIR_COMMON)/ngram-cache.o \ $(DIR_COMMON)/sampling.o \ $(DIR_COMMON)/speculative.o \ + $(DIR_COMMON)/chat.o \ $(DIR_COMMON)/build-info.o \ $(DIR_COMMON)/json-schema-to-grammar.o @@ -1361,6 +1363,8 @@ llama-server: \ examples/server/httplib.h \ examples/server/index.html.hpp \ examples/server/loading.html.hpp \ + common/chat.cpp \ + common/chat.hpp \ common/chat-template.hpp \ common/json.hpp \ common/minja.hpp \ @@ -1471,6 +1475,11 @@ tests/test-json-schema-to-grammar: tests/test-json-schema-to-grammar.cpp \ $(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<) $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) +tests/test-chat: tests/test-chat.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + tests/test-opt: tests/test-opt.cpp \ $(OBJ_GGML) $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) diff --git a/README.md b/README.md index 382c67041..d40309875 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) - **How to use [MTLResidencySet](https://developer.apple.com/documentation/metal/mtlresidencyset?language=objc) to keep the GPU memory active?** https://github.com/ggerganov/llama.cpp/pull/11427 - **VS Code extension for FIM completions:** https://github.com/ggml-org/llama.vscode +- Universal tool call support in `llama-server`: https://github.com/ggerganov/llama.cpp/pull/9639 - Vim/Neovim plugin for FIM completions: https://github.com/ggml-org/llama.vim - Introducing GGUF-my-LoRA https://github.com/ggerganov/llama.cpp/discussions/10123 - Hugging Face Inference Endpoints now support GGUF out of the box! https://github.com/ggerganov/llama.cpp/discussions/9669 diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 24b7f8741..72f0915c1 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -56,6 +56,8 @@ add_library(${TARGET} STATIC arg.cpp arg.h base64.hpp + chat.cpp + chat.hpp chat-template.hpp common.cpp common.h diff --git a/common/chat.cpp b/common/chat.cpp new file mode 100644 index 000000000..d9a654892 --- /dev/null +++ b/common/chat.cpp @@ -0,0 +1,848 @@ +#include "chat.hpp" +#include "chat-template.hpp" +#include "json-schema-to-grammar.h" +#include "log.h" +#include "minja.hpp" + +std::string common_chat_format_name(common_chat_format format) { + switch (format) { + case COMMON_CHAT_FORMAT_CONTENT_ONLY: return "Content-only"; + case COMMON_CHAT_FORMAT_GENERIC: return "Generic"; + case COMMON_CHAT_FORMAT_MISTRAL_NEMO: return "Mistral Nemo"; + case COMMON_CHAT_FORMAT_LLAMA_3_X: return "Llama 3.x"; + case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: return "Llama 3.x with builtin tools"; + case COMMON_CHAT_FORMAT_DEEPSEEK_R1: return "DeepSeek R1"; + case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2"; + case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2"; + case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1"; + case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro"; + default: + throw std::runtime_error("Unknown chat format"); + } +} + +const common_grammar_options grammar_options { + /* .dotall = */ false, + /* .compact_spaces = */ false, + // /* .compact_spaces = */ true, +}; + +static bool parse_json(std::string::const_iterator & it, const std::string::const_iterator & end, json & out) { + // // https://json.nlohmann.me/features/parsing/sax_interface/ + struct json_error_locator : public nlohmann::json_sax { + std::size_t position; + bool found_error; + + json_error_locator() : position(0), found_error(false) {} + + bool parse_error(std::size_t position, const std::string &, const json::exception &) override { + this->position = position - 1; + this->found_error = true; + return false; + } + bool null() override { return true; } + bool boolean(bool) override { return true; } + bool number_integer(number_integer_t) override { return true; } + bool number_unsigned(number_unsigned_t) override { return true; } + bool number_float(number_float_t, const string_t &) override { return true; } + bool string(string_t &) override { return true; } + bool binary(binary_t &) override { return true; } + bool start_object(std::size_t) override { return true; } + bool key(string_t &) override { return true; } + bool end_object() override { return true; } + bool start_array(std::size_t) override { return true; } + bool end_array() override { return true; } + }; + json_error_locator err_loc; + json::sax_parse(it, end, &err_loc); + + std::string::const_iterator temptative_end; + if (err_loc.found_error) { + temptative_end = it + err_loc.position; + } else { + temptative_end = end; + } + std::string json_sub {it, temptative_end}; + try { + out = json::parse(json_sub); + it = temptative_end; + return true; + } catch (const std::exception &) { + return false; + } +} + + +/** + * Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between. + * Aggregates the prefix, suffix and in-between text into the content. + */ +static common_chat_msg parse_json_tool_calls( + const std::string& input, + const std::optional & trigger_opt, + const std::regex & function_regex, + const std::regex & close_regex) { + std::smatch match; + + common_chat_msg result; + result.role = "assistant"; + + + auto end = input.end(); + auto it = input.begin(); + + if (trigger_opt) { + if (!std::regex_search(it, end, match, *trigger_opt)) { + result.content = input; + return result; + } + result.content = match.prefix().str(); + it = match.suffix().first; + } + + while (it != end) { + std::sregex_iterator rend; + std::sregex_iterator rit(it, end, function_regex); + if (rit == rend) { + fprintf(stderr, "No more tool calls found\n"); + result.content += std::string(it, end); + break; + } + auto name = rit->str(1); + result.content += std::string(it, rit->prefix().second); + it = rit->suffix().first; + + json arguments; + if (!parse_json(it, end, arguments)) { + throw std::runtime_error("Failed to parse json tool call arguments"); + } + if (!std::regex_search(it, end, match, close_regex)) { + throw std::runtime_error("Malformed input, missing closing pattern"); + } + it = match.suffix().first; + result.tool_calls.push_back({name, arguments.is_string() ? arguments.get() : arguments.dump(), /* id= */ ""}); + } + return result; +} + +static common_chat_msg parse_prefixed_json_tool_call_array(const std::string& input, const std::string & prefix, size_t rstrip_prefix = 0) { + auto content_end = input.find(prefix); + size_t tc_start = std::string::npos; + + common_chat_msg result; + result.role = "assistant"; + const auto process_tool_calls = [&](const json & tool_calls) { + for (const auto & tool_call : tool_calls) { + const auto & arguments = tool_call["arguments"]; + result.tool_calls.push_back({ + tool_call["name"], + arguments.is_string() ? arguments.get() : arguments.dump(), + tool_call.contains("id") ? tool_call["id"] : "", + }); + } + }; + if (content_end == std::string::npos) { + result.content = input; + } else { + tc_start = content_end + prefix.size() - rstrip_prefix; + result.content = input.substr(0, content_end); + auto tool_calls = json::parse(input.substr(tc_start)); + process_tool_calls(tool_calls); + } + return result; +} + +static void foreach_function(const json & tools, const std::function & fn) { + for (const auto & tool : tools) { + if (!tool.contains("type") || tool["type"] != "function" || !tool.contains("function")) { + LOG_INF("Skipping tool without function: %s", tool.dump(2).c_str()); + continue; + } + fn(tool); + } +} + +static common_chat_params common_chat_params_init_generic(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { + common_chat_params data; + + auto tool_call_schemas = json::array(); + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool["function"]; + auto tool_schema = json { + {"type", "object"}, + {"properties", { + {"name", { + {"type", "string"}, + {"const", function["name"]}, + }}, + {"arguments", function["parameters"]}, + }}, + {"required", json::array({"name", "arguments"})}, + }; + if (function.contains("description")) { + tool_schema["description"] = function["description"]; + } + if (inputs.parallel_tool_calls) { + tool_schema["properties"]["id"] = { + {"type", "string"}, + {"minLength", 4}, + }; + tool_schema["required"].push_back("id"); + } + tool_call_schemas.emplace_back(tool_schema); + }); + const auto tool_call = + inputs.parallel_tool_calls + ? json { + {"type", "object"}, + {"properties", { + {"tool_calls", { + {"type", "array"}, + {"items", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json { + {"anyOf", tool_call_schemas}, + }}, + {"minItems", 1}, + }}, + }}, + {"required", json::array({"tool_calls"})}, + } + : json { + {"type", "object"}, + {"properties", { + {"tool_call", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json { + {"anyOf", tool_call_schemas}, + }}, + }}, + {"required", json::array({"tool_call"})}, + }; + const auto schema = + inputs.tool_choice != "required" + ? json { + {"anyOf", json::array({ + tool_call, + { + {"type", "object"}, + {"properties", { + {"response", inputs.json_schema.is_null() + ? json {{"type", "string"}} + : inputs.json_schema + }, + }}, + {"required", json::array({"response"})}, + }, + })} + } + : tool_call; + + data.grammar_lazy = false; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + builder.add_schema("root", schema); + }, grammar_options); + + auto tweaked_messages = common_chat_template::add_system( + inputs.messages, + "Respond in JSON format, either with `tool_call` (a request to call tools) or with `response` reply to the user's request"); + + data.prompt = tmpl.apply(tweaked_messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); + data.format = COMMON_CHAT_FORMAT_GENERIC; + return data; +} +static common_chat_msg common_chat_parse_generic(const std::string & input) { + json data = json::parse(input); + common_chat_msg result; + result.role = "assistant"; + if (data.contains("tool_calls")) { + for (const auto & tool_call : data["tool_calls"]) { + result.tool_calls.push_back({ + tool_call["name"], + tool_call["arguments"].dump(), + tool_call.contains("id") ? tool_call["id"] : "", + }); + } + } else if (data.contains("tool_call")) { + result.tool_calls.push_back({ + data["tool_call"]["name"], + data["tool_call"]["arguments"].dump(), + /* id= */ "", + }); + } else if (data.contains("response")) { + const auto & response = data["response"]; + result.content = response.is_string() ? response.get() : response.dump(2); + } + return result; +} + +static common_chat_params common_chat_params_init_mistral_nemo(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { + common_chat_params data; + data.grammar_lazy = inputs.tool_choice != "required"; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + auto schemas = json::array(); + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool["function"]; + schemas.push_back({ + {"type", "object"}, + {"properties", { + // Important note: the model is probably trained to take a JSON stringified arguments value. + // It's hard to constrain that for now (while reusing the JSON schema conversion), so we're just expecting a plain object. + {"name", { + {"type", "string"}, + {"const", function["name"]}, + }}, + {"arguments", function["parameters"]}, + {"id", { + {"type", "string"}, + // Nemo's template expects a 9-character alphanumeric ID. + {"pattern", "^[a-zA-Z0-9]{9}$"}, + }}, + }}, + {"required", json::array({"name", "arguments", "id"})}, + }); + }); + auto schema = json { + {"type", "array"}, + {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}}, + {"minItems", 1}, + }; + if (!inputs.parallel_tool_calls) { + schema["maxItems"] = 1; + } + builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema)); + }, grammar_options); + data.grammar_triggers.push_back({"[TOOL_CALLS]", /* .at_start = */ true}); + data.prompt = tmpl.apply(inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); + data.format = COMMON_CHAT_FORMAT_MISTRAL_NEMO; + return data; +} +static common_chat_msg common_chat_parse_mistral_nemo(const std::string & input) { + return parse_prefixed_json_tool_call_array(input, "[TOOL_CALLS]"); +} + +static void expect_tool_parameters(const std::string & name, const json & parameters, const std::vector & expected_properties) { + if (!parameters.is_object() || !parameters.contains("type") || parameters["type"] != "object" || !parameters.contains("properties") || !parameters.contains("required")) { + throw std::runtime_error("Parameters of tool " + name + " must be an object w/ required properties"); + } + const auto & parameters_properties = parameters.at("properties"); + const auto & parameters_required = parameters.at("required"); + for (const auto & prop : expected_properties) { + if (!parameters_properties.contains(prop)) { + throw std::runtime_error("Parameters of tool " + name + " is missing property: " + prop); + } + if (std::find(parameters_required.begin(), parameters_required.end(), json(prop)) == parameters_required.end()) { + throw std::runtime_error("Parameters of tool " + name + " must have property marked as required: " + prop); + } + } + if (parameters_properties.size() != expected_properties.size()) { + throw std::runtime_error("Parameters of tool " + name + " must only have these properties:" + string_join(expected_properties, ", ")); + } +} + +static common_chat_params common_chat_params_init_llama_3_1_tool_calls(const common_chat_template & tmpl, const struct common_chat_inputs & inputs, bool allow_python_tag_builtin_tools) { + auto builtin_tools = json::array(); + common_chat_params data; + data.grammar_lazy = inputs.tool_choice != "required"; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + std::vector tool_rules; + + auto handle_builtin_tool = [&](const std::string & name, const json & parameters) { + if (name == "wolfram_alpha") { + // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py + expect_tool_parameters(name, parameters, {"query"}); + } else if (name == "web_search" || name == "brave_search") { + // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py + expect_tool_parameters(name, parameters, {"query"}); + } else if (name == "python" || name == "code_interpreter") { + // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py + expect_tool_parameters(name, parameters, {"code"}); + } else { + return false; + } + + std::vector kvs; + for (const auto & [key, value] : parameters.at("properties").items()) { + kvs.push_back("\"" + key + "=\" " + builder.add_schema(name + "-args-" + key, value)); + } + + tool_rules.push_back( + builder.add_rule( + name + "-call", + "\"<|python_tag|>" + name + ".call(\" " + string_join(kvs, " \", \" ") + " \")\"")); + builtin_tools.push_back(name); + + return true; + }; + + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool["function"]; + std::string name = function["name"]; + auto parameters = function["parameters"]; + builder.resolve_refs(parameters); + + // https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote/tool_runtime + if (allow_python_tag_builtin_tools) { + handle_builtin_tool(name, parameters); + } + tool_rules.push_back( + builder.add_rule( + name + "-call", + "\"{\" ( \"\\\"type\\\": \\\"function\\\", \" | space ) " + "\"\\\"name\\\": \\\"" + name + "\\\", \\\"parameters\\\": \" " + + builder.add_schema(name + "-args", parameters) + + " \"}\"")); + data.grammar_triggers.push_back({"{\"name\": \"" + name + "\"", /* .at_start = */ true}); + }); + data.grammar_triggers.push_back({"{\"name\":", /* .at_start = */ true}); + data.grammar_triggers.push_back({"{\"type\": \"function\"", /* .at_start = */ true}); + if (!builtin_tools.empty()) { + data.grammar_triggers.push_back({"<|python_tag|>", /* .at_start = */ false}); + } + builder.add_rule("root", string_join(tool_rules, " | ")); + }, grammar_options); + data.additional_stops.push_back("<|eom_id|>"); + data.prompt = tmpl.apply(inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, { + {"tools_in_user_message", false}, + {"builtin_tools", builtin_tools.empty() ? json() : builtin_tools}, + }); + data.format = allow_python_tag_builtin_tools && !builtin_tools.empty() + ? COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS + : COMMON_CHAT_FORMAT_LLAMA_3_X; + return data; +} +static common_chat_msg common_chat_parse_llama_3_1(const std::string & input, bool with_builtin_tools = false) { + // TODO: tighten & simplify the parser, don't accept leading text context. + static std::regex function_regex("\\{[\\s\\n\\r]*(?:\"type\"[\\s\\n\\r]*:[\\s\\n\\r]*\"function\"[\\s\\n\\r]*,[\\s\\n\\r]*|[\\s\\n\\r]*)\"name\"[\\s\\n\\r]*:[\\s\\n\\r]*\"([^\"]+)\"[\\s\\n\\r]*,[\\s\\n\\r]*\"parameters\": "); + static std::regex close_regex("\\}"); + static std::regex builtin_call_regex("<\\|python_tag\\|>([^.(]+)\\.call\\((.*)\\)"); + + if (with_builtin_tools) { + std::smatch match; + if (std::regex_match(input, match, builtin_call_regex)) { + auto name = match[1].str(); + auto raw_args = match[2].str(); + + // TODO: if/when builtin tools start accepting more than 1 argument, use parse_json for real parsing. + auto it_eq = raw_args.find('='); + auto arg_name = raw_args.substr(0, it_eq); + auto arg_value_str = raw_args.substr(it_eq + 1); + auto arg_value = json::parse(arg_value_str); + + return { + /* .role = */ "assistant", + /* .content = */ match.prefix().str(), + /* .tool_calls = */ { + { + /* .name = */ match[1], + /* .arguments = */ (json { + {arg_name, arg_value}, + }).dump(), + /* .id = */ "", + }, + }, + }; + } + } + return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex); +} + +static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { + common_chat_params data; + data.grammar_lazy = inputs.tool_choice != "required"; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + std::vector tool_rules; + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool["function"]; + std::string name = function["name"]; + auto parameters = function["parameters"]; + auto args_rule = builder.add_schema(name + "-args", parameters); + tool_rules.push_back(builder.add_rule(name + "-call", + "\"<|tool▁call▁begin|>function<|tool▁sep|>" + name + "\\n```json\\n\" " + args_rule + " \"```<|tool▁call▁end|>\"")); + }); + data.grammar_triggers.push_back({"<|tool▁calls▁begin|>", /* .at_start = */ false}); + builder.add_rule("root", "\"<|tool▁calls▁begin|>\" (" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " space"); + }, grammar_options); + data.prompt = tmpl.apply(inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); + data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1; + return data; +} +static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input) { + static std::regex trigger_regex("<|tool▁calls▁begin|>"); + static std::regex function_regex("<|tool▁call▁begin|>function<|tool▁sep|>([^\n]+)\n```json\n"); + static std::regex close_regex("```<|tool▁call▁end|>"); + return parse_json_tool_calls(input, trigger_regex, function_regex, close_regex); +} + +static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { + fprintf(stderr, "%s\n", __func__); + common_chat_params data; + data.prompt = tmpl.apply(inputs.messages, /* tools= */ nullptr, inputs.add_generation_prompt, { + {"datetime", "Jan 29 2025 13:00:00 GMT"}, + {"functions", json(inputs.tools.empty() ? "" : inputs.tools.dump(2))}, + }, /* adjust_inputs= */ false); + if (!inputs.tools.is_null() && !inputs.tools.empty()) { + data.grammar_lazy = inputs.tool_choice != "required"; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + auto schemas = json::array(); + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool["function"]; + schemas.push_back({ + {"type", "object"}, + {"properties", { + {"name", { + {"type", "string"}, + {"const", function["name"]}, + }}, + {"arguments", function["parameters"]}, + }}, + {"required", json::array({"name", "arguments", "id"})}, + }); + }); + auto schema = json { + {"type", "array"}, + {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}}, + {"minItems", 1}, + }; + if (!inputs.parallel_tool_calls) { + schema["maxItems"] = 1; + } + builder.add_rule("root", "\" functools\"? " + builder.add_schema("tool_calls", schema)); + }, grammar_options); + data.grammar_triggers.push_back({" functools[", /* .at_start = */ false}); + data.format = COMMON_CHAT_FORMAT_FIREFUNCTION_V2; + } else { + data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; + } + return data; +} +static common_chat_msg common_chat_parse_firefunction_v2(const std::string & input) { + return parse_prefixed_json_tool_call_array(input, " functools[", /* rstrip_prefix= */ 1); +} + +static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { + // >>>all\nlet's call functions>>>fn1\n{"arg1": 1...}\n>>>fn2\n{"arg1": 1...}... + // Using ">>>f1\n", ">>>f2\n"... as trigger words for the grammar + common_chat_params data; + data.prompt = tmpl.apply(inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); + data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2; + if (!inputs.tools.is_null() && !inputs.tools.empty()) { + data.grammar_lazy = inputs.tool_choice != "required"; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + std::vector first_tool_rules; + std::vector subsequent_tool_rules; + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool["function"]; + std::string name = function["name"]; + auto parameters = function["parameters"]; + auto args_rule = builder.add_schema(name + "-args", parameters); + first_tool_rules.push_back(builder.add_rule(name + "-call", "\"" + name + "\\n\" " + args_rule)); + subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\">>>" + name + "\\n\" " + args_rule)); + data.grammar_triggers.push_back({name, /* .at_start = */ true}); + data.grammar_triggers.push_back({">>>" + name, /* .at_start = */ false}); + }); + auto first_rule = first_tool_rules.empty() ? "" : builder.add_rule("first_tool_call", string_join(first_tool_rules, " | ")) + " space"; + if (inputs.parallel_tool_calls) { + auto subsequent_rule = builder.add_rule("subsequent_tool_call", string_join(subsequent_tool_rules, " | ")) + " space"; + builder.add_rule("root", first_rule + " (" + subsequent_rule + ")*"); + } else { + builder.add_rule("root", first_rule); + } + + }, grammar_options); + } + return data; +} + +static bool consume(std::string::const_iterator & it, const std::string::const_iterator & end, const std::string & expected) { + auto expected_it = expected.begin(); + auto tmp_it = it; + while (tmp_it != end && expected_it != expected.end() && *tmp_it == *expected_it) { + ++tmp_it; + ++expected_it; + } + if (expected_it == expected.end()) { + it = tmp_it; + return true; + } + return false; +} + +static common_chat_msg common_chat_parse_functionary_v3_2(const std::string & input) { + static std::regex function_regex(R"((?:>>>)?(\w+)\n)"); + static std::regex close_regex(R"($|(?=>>>))"); + + std::string content; + auto it = input.begin(); + const auto end = input.end(); + + if (consume(it, end, "all\n")) { + std::smatch match; + if (std::regex_search(it, end, match, function_regex)) { + auto fun_it = match.prefix().second; + content = std::string(it, fun_it); + it = fun_it; + } else { + common_chat_msg res; + res.role = "assistant"; + res.content = std::string(it, end); + return res; + } + } + // TODO: tighten & simplify. + auto res = parse_json_tool_calls(std::string(it, end), std::nullopt, function_regex, close_regex); + res.content = content; + return res; +} + +static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { + // https://github.com/MeetKai/functionary/blob/main/tests/prompt_test_v3-llama3.1.txt + common_chat_params data; + json tools = inputs.tools.is_null() ? inputs.tools : json::array(); + std::string python_code_argument_name; + auto has_raw_python = false; + + data.grammar_lazy = inputs.tool_choice != "required"; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + std::vector tool_rules; + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool["function"]; + const auto & parameters = function["parameters"]; + std::string name = function["name"]; + if (name == "python" || name == "ipython") { + if (!parameters.contains("type")) { + throw std::runtime_error("Missing type in python tool"); + } + has_raw_python = true; + auto type = parameters.at("type"); + if (type == "object") { + auto properties = parameters.at("properties"); + for (auto it = properties.begin(); it != properties.end(); ++it) { + if (it.value().at("type") == "string") { + if (!python_code_argument_name.empty()) { + throw std::runtime_error("Multiple string arguments found in python tool"); + } + python_code_argument_name = it.key(); + } + } + if (python_code_argument_name.empty()) { + throw std::runtime_error("No string argument found in python tool"); + } + } else if (type != "string") { + throw std::runtime_error("Invalid type in python tool: " + type.dump()); + } + } + tool_rules.push_back(builder.add_rule(name + "-call", "\"\" " + builder.add_schema(name + "-args", parameters) + " \"\" space")); + }); + if (has_raw_python) { + tool_rules.push_back(builder.add_rule("python-call", "\"<|python_tag|>\" .*")); + data.grammar_triggers.push_back({"<|python_tag|>", /* .at_start = */ false}); + } + auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " space"; + builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call); + data.grammar_triggers.push_back({"([\s\S\n]*)$)"); + std::smatch match; + if (std::regex_search(input, match, python_tag_regex)) { + auto code = match[1].str(); + return { + /* .role = */ "assistant", + /* .content = */ match.prefix().str(), + /* .tool_calls = */ { + { + /* .name = */ "python", + /* .arguments = */ (json {{"code", code}}).dump(), + /* .id = */ "", + }, + } + }; + } + static std::regex function_regex(R"()"); + static std::regex close_regex(R"()"); + // TODO: tighten & simplify. + return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex); +} + +static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { + common_chat_params data; + // (content)?({"name": "foo", "arguments": {"a": 1}})* + data.grammar_lazy = inputs.tool_choice != "required"; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + std::vector tool_rules; + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool["function"]; + std::string name = function["name"]; + auto parameters = function["parameters"]; + builder.resolve_refs(parameters); + tool_rules.push_back(builder.add_schema(name + "-call", { + {"type", "object"}, + {"properties", json { + {"name", json {{"const", name}}}, + {"arguments", parameters}, + }}, + {"required", json::array({"name", "arguments"})}, + })); + }); + auto tool_call = "\"\" space " + builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " \"\" space"; + builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call); + data.grammar_triggers.push_back({"", /* .at_start = */ false}); + // Not really a trigger but need to print this special token to get a successful parse. + data.grammar_triggers.push_back({"", /* .at_start = */ false}); + }, grammar_options); + + data.prompt = tmpl.apply(inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); + data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO; + return data; +} +static common_chat_msg common_chat_parse_hermes_2_pro(const std::string & input) { + try { + std::regex start_pattern(R"([\n\s]*)"); + std::regex middle_pattern(R"([\n\s]*[\n\s]*)"); + std::regex end_pattern(R"([\n\s]*[\n\s]*$)"); + + auto end = input.end(); + std::sregex_iterator rend; + std::sregex_iterator rit(input.begin(), end, start_pattern); + if (rit == rend) { + return { + /* .role = */ "assistant", + /* .content = */ input, + /* .tool_calls = */ {}, + }; + } + + common_chat_msg result; + result.role = "assistant"; + result.content = rit->prefix(); + + auto it = rit->suffix().first; + while (it != end) { + json call; + if (!parse_json(it, end, call)) { + throw std::runtime_error("Failed to parse json tool call"); + } + const auto & arguments = call["arguments"]; + result.tool_calls.push_back({ + call["name"], + arguments.dump(), + // arguments.is_string() ? arguments.get() : arguments.dump(), + /* id= */ "", + }); + rit = {it, end, middle_pattern}; + if (rit != rend) { + it = rit->suffix().first; + } else { + rit = {it, end, end_pattern}; + if (rit == rend) { + throw std::runtime_error("Malformed input, missing "); + } + break; + } + } + return result; + } catch (const std::exception & e) { + return { + /* .role = */ "assistant", + /* .content = */ input, + /* .tool_calls = */ {}, + }; + } +} + +static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { + common_chat_params data; + data.prompt = tmpl.apply(inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); + data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; + data.grammar_lazy = false; + if (!inputs.json_schema.is_null()) { + if (!inputs.grammar.empty()) { + throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both"); + } + data.grammar = json_schema_to_grammar(inputs.json_schema); + } else { + data.grammar = inputs.grammar.empty(); + } + return data; +} + +common_chat_params common_chat_params_init(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { + auto has_tools = !inputs.tools.is_null() && inputs.tool_choice != "none"; + LOG_DBG("[%s] has_tools=%s\n", __func__, has_tools ? "true" : "false"); + + if (has_tools && !inputs.grammar.empty()) { + throw std::runtime_error("Cannot specify grammar with tools"); + } + + const auto & src = tmpl.source(); + if (src.find(">>>all") != std::string::npos) { + // Functionary prepends "all\n" to plain content outputs, so we use the parser no matter when + return common_chat_params_init_functionary_v3_2(tmpl, inputs); + } + if (src.find(" functools[") != std::string::npos) { + // Firefunction v2 requires datetime and functions in the context, even w/o tools. + return common_chat_params_init_firefunction_v2(tmpl, inputs); + } + + if (!has_tools) { + return common_chat_params_init_without_tools(tmpl, inputs); + } + + if (src.find("") != std::string::npos) { + return common_chat_params_init_hermes_2_pro(tmpl, inputs); + } + if (src.find("<|start_header_id|>") != std::string::npos + && src.find("ipython<|end_header_id|>") != std::string::npos) { + auto allow_python_tag_builtin_tools = src.find("<|python_tag|>") != std::string::npos; + return common_chat_params_init_llama_3_1_tool_calls(tmpl, inputs, allow_python_tag_builtin_tools); + } + if (src.find("<|tool▁calls▁begin|>") != std::string::npos) { + return common_chat_params_init_deepseek_r1(tmpl, inputs); + } + if (src.find("[TOOL_CALLS]") != std::string::npos) { + return common_chat_params_init_mistral_nemo(tmpl, inputs); + } + return common_chat_params_init_generic(tmpl, inputs); +} + +static common_chat_msg common_chat_parse_content_only(const std::string & input) { + return { + /* .role = */ "assistant", + /* .content = */ input, + /* .tool_calls = */ {}, + }; +} + +common_chat_msg common_chat_parse(const std::string & input, common_chat_format format) { + switch (format) { + case COMMON_CHAT_FORMAT_CONTENT_ONLY: + return common_chat_parse_content_only(input); + case COMMON_CHAT_FORMAT_GENERIC: + return common_chat_parse_generic(input); + case COMMON_CHAT_FORMAT_MISTRAL_NEMO: + return common_chat_parse_mistral_nemo(input); + case COMMON_CHAT_FORMAT_LLAMA_3_X: + return common_chat_parse_llama_3_1(input); + case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: + return common_chat_parse_llama_3_1(input, /* with_builtin_tools= */ true); + case COMMON_CHAT_FORMAT_DEEPSEEK_R1: + return common_chat_parse_deepseek_r1(input); + case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: + return common_chat_parse_functionary_v3_2(input); + case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: + return common_chat_parse_functionary_v3_1_llama_3_1(input); + case COMMON_CHAT_FORMAT_HERMES_2_PRO: + return common_chat_parse_hermes_2_pro(input); + case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: + return common_chat_parse_firefunction_v2(input); + default: + throw std::runtime_error("Unsupported format: " + common_chat_format_name(format)); + } +} diff --git a/common/chat.hpp b/common/chat.hpp new file mode 100644 index 000000000..ca165aa13 --- /dev/null +++ b/common/chat.hpp @@ -0,0 +1,50 @@ +// Chat support (incl. tool call grammar constraining & output parsing) w/ generic & custom template handlers. + +#pragma once + +#include "common.h" +#include +#include +#include +#include + +using json = nlohmann::ordered_json; + +struct common_chat_inputs { + json messages; + json tools; + json tool_choice; + json json_schema; + bool parallel_tool_calls; + bool stream; + std::string grammar; + bool add_generation_prompt = true; +}; + +enum common_chat_format { + COMMON_CHAT_FORMAT_CONTENT_ONLY, + COMMON_CHAT_FORMAT_GENERIC, + COMMON_CHAT_FORMAT_MISTRAL_NEMO, + COMMON_CHAT_FORMAT_LLAMA_3_X, + COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS, + COMMON_CHAT_FORMAT_DEEPSEEK_R1, + COMMON_CHAT_FORMAT_FIREFUNCTION_V2, + COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, + COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1, + COMMON_CHAT_FORMAT_HERMES_2_PRO, + + COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats +}; + +struct common_chat_params { + common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY; + json prompt; + std::string grammar; + bool grammar_lazy = false; + std::vector grammar_triggers; + std::vector additional_stops; +}; + +struct common_chat_params common_chat_params_init(const common_chat_template & tmpl, const struct common_chat_inputs & params); +std::string common_chat_format_name(common_chat_format format); +common_chat_msg common_chat_parse( const std::string & input, common_chat_format format); diff --git a/common/common.cpp b/common/common.cpp index 6dea8e3d2..6c81d18f9 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -12,6 +12,7 @@ #include "json.hpp" #include "json-schema-to-grammar.h" #include "llama.h" +#include "chat.hpp" #include "chat-template.hpp" #include @@ -1774,11 +1775,13 @@ std::string common_detokenize(const struct llama_vocab * vocab, const std::vecto bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) { if (use_jinja) { try { - auto chat_template = minja::chat_template(tmpl, "", ""); - chat_template.apply({{ + auto chat_template = common_chat_template(tmpl, "", ""); + common_chat_inputs inputs; + inputs.messages = json::array({{ {"role", "user"}, {"content", "test"}, - }}, json(), true); + }}); + common_chat_params_init(chat_template, inputs); return true; } catch (const std::exception & e) { LOG_ERR("%s: failed to apply template: %s\n", __func__, e.what()); @@ -1800,7 +1803,10 @@ std::string common_chat_apply_template( for (const auto & msg : msgs) { messages.push_back({{"role", msg.role}, {"content", msg.content}}); } - return tmpl.apply(messages, /* tools= */ json(), add_ass); + common_chat_inputs inputs; + inputs.messages = messages; + inputs.add_generation_prompt = add_ass; + return common_chat_params_init(tmpl, inputs).prompt; } int alloc_size = 0; @@ -1855,10 +1861,10 @@ std::string common_chat_format_single( std::string common_chat_format_example(const common_chat_template & tmpl, bool use_jinja) { std::vector msgs = { - {"system", "You are a helpful assistant"}, - {"user", "Hello"}, - {"assistant", "Hi there"}, - {"user", "How are you?"}, + {"system", "You are a helpful assistant", {}}, + {"user", "Hello", {}}, + {"assistant", "Hi there", {}}, + {"user", "How are you?", {}}, }; return common_chat_apply_template(tmpl, msgs, true, use_jinja); } diff --git a/common/common.h b/common/common.h index 571260372..6c1809277 100644 --- a/common/common.h +++ b/common/common.h @@ -109,6 +109,11 @@ enum common_conversation_mode { COMMON_CONVERSATION_MODE_AUTO = 2, }; +struct common_grammar_trigger { + std::string word; + bool at_start; +}; + // sampling parameters struct common_params_sampling { uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler @@ -154,7 +159,10 @@ struct common_params_sampling { COMMON_SAMPLER_TYPE_TEMPERATURE, }; - std::string grammar; // optional BNF-like grammar to constrain sampling + std::string grammar; // optional BNF-like grammar to constrain sampling + bool grammar_lazy = false; + std::vector grammar_trigger_words; // optional trigger words to trigger lazy grammar + std::vector grammar_trigger_tokens; // optional trigger tokens to trigger lazy grammar and print trigger special tokens. std::vector logit_bias; // logit biases to apply @@ -602,10 +610,17 @@ std::string common_detokenize( // Chat template utils // +struct common_tool_call { + std::string name; + std::string arguments; + std::string id; +}; + // same with llama_chat_message, but uses std::string struct common_chat_msg { std::string role; std::string content; + std::vector tool_calls; }; // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index 4d426b6bd..1f47e313e 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -343,7 +343,7 @@ static std::string format_literal(const std::string & literal) { class SchemaConverter { private: - friend std::string build_grammar(const std::function & cb); + friend std::string build_grammar(const std::function & cb, const common_grammar_options & options); std::function _fetch_json; bool _dotall; std::map _rules; @@ -764,10 +764,11 @@ private: public: SchemaConverter( const std::function & fetch_json, - bool dotall) + bool dotall, + bool compact_spaces) : _fetch_json(fetch_json), _dotall(dotall) { - _rules["space"] = SPACE_RULE; + _rules["space"] = compact_spaces ? "\" \"?" : SPACE_RULE; } void resolve_refs(json & schema, const std::string & url) { @@ -991,16 +992,16 @@ public: }; std::string json_schema_to_grammar(const json & schema) { - return build_grammar([&](const llama_grammar_builder & callbacks) { + return build_grammar([&](const common_grammar_builder & callbacks) { auto copy = schema; callbacks.resolve_refs(copy); callbacks.add_schema("", copy); }); } -std::string build_grammar(const std::function & cb) { - SchemaConverter converter([&](const std::string &) { return json(); }, /* dotall= */ false); - llama_grammar_builder builder { +std::string build_grammar(const std::function & cb, const common_grammar_options & options) { + SchemaConverter converter([&](const std::string &) { return json(); }, options.dotall, options.compact_spaces); + common_grammar_builder builder { /* .add_rule = */ [&](const std::string & name, const std::string & rule) { return converter._add_rule(name, rule); }, diff --git a/common/json-schema-to-grammar.h b/common/json-schema-to-grammar.h index 4f43ab3a5..ba4112cb9 100644 --- a/common/json-schema-to-grammar.h +++ b/common/json-schema-to-grammar.h @@ -7,10 +7,15 @@ std::string json_schema_to_grammar(const nlohmann::ordered_json & schema); -struct llama_grammar_builder { +struct common_grammar_builder { std::function add_rule; std::function add_schema; std::function resolve_refs; }; -std::string build_grammar(const std::function & cb); +struct common_grammar_options { + bool dotall = false; + bool compact_spaces = false; +}; + +std::string build_grammar(const std::function & cb, const common_grammar_options & options = {}); diff --git a/common/sampling.cpp b/common/sampling.cpp index 7241ac321..bc7e49fdb 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -151,9 +151,18 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co lparams.no_perf = params.no_perf; + std::vector trigger_words; + trigger_words.reserve(params.grammar_trigger_words.size()); + for (const auto & str : params.grammar_trigger_words) { + trigger_words.push_back(str.word.c_str()); + } auto * result = new common_sampler { /* .params = */ params, - /* .grmr = */ llama_sampler_init_grammar(vocab, params.grammar.c_str(), "root"), + /* .grmr = */ params.grammar_lazy + ? llama_sampler_init_grammar_lazy(vocab, params.grammar.c_str(), "root", + trigger_words.data(), trigger_words.size(), + params.grammar_trigger_tokens.data(), params.grammar_trigger_tokens.size()) + : llama_sampler_init_grammar(vocab, params.grammar.c_str(), "root"), /* .chain = */ llama_sampler_chain_init(lparams), /* .prev = */ ring_buffer(std::max(32, params.n_prev)), /* .cur = */ {}, diff --git a/examples/gbnf-validator/gbnf-validator.cpp b/examples/gbnf-validator/gbnf-validator.cpp index 17a0e27c4..a610e6a0b 100644 --- a/examples/gbnf-validator/gbnf-validator.cpp +++ b/examples/gbnf-validator/gbnf-validator.cpp @@ -76,7 +76,7 @@ int main(int argc, char** argv) { grammar_str = buffer.str(); } - llama_grammar * grammar = llama_grammar_init_impl(nullptr, grammar_str.c_str(), "root"); + llama_grammar * grammar = llama_grammar_init_impl(nullptr, grammar_str.c_str(), "root", false, nullptr, 0, nullptr, 0); if (grammar == nullptr) { fprintf(stdout, "Failed to initialize llama_grammar\n"); return 1; diff --git a/examples/main/main.cpp b/examples/main/main.cpp index da2a03ab9..e654d3542 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -254,7 +254,7 @@ int main(int argc, char ** argv) { } } - const bool add_bos = llama_vocab_get_add_bos(vocab); + const bool add_bos = llama_vocab_get_add_bos(vocab) && !params.use_jinja; if (!llama_model_has_encoder(model)) { GGML_ASSERT(!llama_vocab_get_add_eos(vocab)); } @@ -264,9 +264,9 @@ int main(int argc, char ** argv) { std::vector embd_inp; auto chat_add_and_format = [&chat_msgs, &chat_templates](const std::string & role, const std::string & content) { - common_chat_msg new_msg{role, content}; + common_chat_msg new_msg{role, content, {}}; auto formatted = common_chat_format_single(*chat_templates.template_default, chat_msgs, new_msg, role == "user", g_params->use_jinja); - chat_msgs.push_back({role, content}); + chat_msgs.push_back({role, content, {}}); LOG_DBG("formatted: '%s'\n", formatted.c_str()); return formatted; }; @@ -503,12 +503,14 @@ int main(int argc, char ** argv) { std::vector embd; - // tokenized antiprompts - std::vector> antiprompt_ids; + // single-token antiprompts + std::vector antiprompt_token; - antiprompt_ids.reserve(params.antiprompt.size()); for (const std::string & antiprompt : params.antiprompt) { - antiprompt_ids.emplace_back(::common_tokenize(ctx, antiprompt, false, true)); + auto ids = ::common_tokenize(ctx, antiprompt, false, true); + if (ids.size() == 1) { + antiprompt_token.push_back(ids[0]); + } } if (llama_model_has_encoder(model)) { @@ -753,14 +755,11 @@ int main(int argc, char ** argv) { // check for reverse prompt using special tokens llama_token last_token = common_sampler_last(smpl); - for (std::vector ids : antiprompt_ids) { - if (ids.size() == 1 && last_token == ids[0]) { - if (params.interactive) { - is_interacting = true; - } - is_antiprompt = true; - break; + if (std::find(antiprompt_token.begin(), antiprompt_token.end(), last_token) != antiprompt_token.end()) { + if (params.interactive) { + is_interacting = true; } + is_antiprompt = true; } if (is_antiprompt) { diff --git a/examples/server/README.md b/examples/server/README.md index 44da503df..ce1ae8858 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -1117,6 +1117,82 @@ curl http://localhost:8080/v1/chat/completions \ }' ``` +... and even tool usage (needs `--jinja` flag): + + ```shell + llama-server --jinja -hfr lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF -hff Meta-Llama-3.1-8B-Instruct-Q5_K_M.gguf -fa + + # https://huggingface.co/meetkai/functionary-medium-v3.2 + llama-server --jinja -hfr bartowski/functionary-medium-v3.2-GGUF -hff functionary-medium-v3.2-IQ4_XS.gguf -fa + + # https://huggingface.co/meetkai/functionary-medium-v3.1 + llama-server --jinja -hfr meetkai/functionary-medium-v3.1-GGUF -hff functionary-medium-llama-3.1.Q4_0.gguf -fa + + curl http://localhost:8080/v1/chat/completions -d '{ + "model": "gpt-3.5-turbo", + "tools": [ + { + "type":"function", + "function":{ + "name":"get_current_weather", + "description":"Get the current weather in a given location", + "parameters":{ + "type":"object", + "properties":{ + "location":{ + "type":"string", + "description":"The city and state, e.g. San Francisco, CA" + } + }, + "required":["location"] + } + } + } + ], + "messages": [ + { + "role": "user", + "content": "What is the weather like in Istanbul?." + } + ] + }' + ``` + +
+ Show output + + ```json + { + "choices": [ + { + "finish_reason": "tool", + "index": 0, + "message": { + "content": null, + "tool_calls": [ + { + "name": "python", + "arguments": "{\"code\":\" \\nprint(\\\"Hello, World!\\\")\"}" + } + ], + "role": "assistant" + } + } + ], + "created": 1727287211, + "model": "gpt-3.5-turbo", + "object": "chat.completion", + "usage": { + "completion_tokens": 16, + "prompt_tokens": 44, + "total_tokens": 60 + }, + "id": "chatcmpl-Htbgh9feMmGM0LEH2hmQvwsCxq3c6Ni8" + } + ``` + +
+ ### POST `/v1/embeddings`: OpenAI-compatible embeddings API This endpoint requires that the model uses a pooling different than type `none`. The embeddings are normalized using the Eucledian norm. diff --git a/examples/server/server.cpp b/examples/server/server.cpp index b9aa5c81c..d1ea343dd 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -113,10 +113,11 @@ struct slot_params { struct common_params_speculative speculative; // OAI-compat fields - bool verbose = false; - oaicompat_type oaicompat = OAICOMPAT_TYPE_NONE; - std::string oaicompat_model; - std::string oaicompat_cmpl_id; + bool verbose = false; + oaicompat_type oaicompat = OAICOMPAT_TYPE_NONE; + std::string oaicompat_model; + std::string oaicompat_cmpl_id; + common_chat_format oaicompat_chat_format = COMMON_CHAT_FORMAT_CONTENT_ONLY; json to_json() const { std::vector samplers; @@ -164,6 +165,8 @@ struct slot_params { {"n_probs", sampling.n_probs}, {"min_keep", sampling.min_keep}, {"grammar", sampling.grammar}, + // {"grammar_trigger_words", sampling.grammar_trigger_words}, + {"grammar_trigger_tokens", sampling.grammar_trigger_tokens}, {"samplers", samplers}, {"speculative.n_max", speculative.n_max}, {"speculative.n_min", speculative.n_min}, @@ -325,12 +328,50 @@ struct server_task { if (data.contains("json_schema") && !data.contains("grammar")) { try { auto schema = json_value(data, "json_schema", json::object()); - params.sampling.grammar = json_schema_to_grammar(schema); + LOG_DBG("JSON schema: %s\n", schema.dump(2).c_str()); + params.sampling.grammar = json_schema_to_grammar(schema); + LOG_DBG("Converted grammar: %s\n", params.sampling.grammar.c_str()); } catch (const std::exception & e) { throw std::runtime_error(std::string("\"json_schema\": ") + e.what()); } } else { - params.sampling.grammar = json_value(data, "grammar", defaults.sampling.grammar); + params.sampling.grammar = json_value(data, "grammar", defaults.sampling.grammar); + LOG_DBG("Grammar: %s\n", params.sampling.grammar.c_str()); + params.sampling.grammar_lazy = json_value(data, "grammar_lazy", defaults.sampling.grammar_lazy); + LOG_DBG("Grammar lazy: %s\n", params.sampling.grammar_lazy ? "true" : "false"); + } + + { + auto it = data.find("chat_format"); + if (it != data.end()) { + params.oaicompat_chat_format = static_cast(it->get()); + LOG_DBG("Chat format: %s\n", common_chat_format_name(params.oaicompat_chat_format).c_str()); + } else { + params.oaicompat_chat_format = defaults.oaicompat_chat_format; + } + } + + { + const auto grammar_triggers = data.find("grammar_triggers"); + if (grammar_triggers != data.end()) { + for (const auto & t : *grammar_triggers) { + common_grammar_trigger trigger; + trigger.word = t.at("word"); + trigger.at_start = t.at("at_start"); + + auto ids = common_tokenize(vocab, trigger.word, /* add_special= */ false, /* parse_special= */ true); + if (ids.size() == 1) { + LOG_DBG("Grammar trigger token: %d (`%s`)\n", ids[0], trigger.word.c_str()); + params.sampling.grammar_trigger_tokens.push_back(ids[0]); + continue; + } + LOG_DBG("Grammar trigger word: `%s`\n", trigger.word.c_str()); + params.sampling.grammar_trigger_words.push_back(trigger); + } + } + if (params.sampling.grammar_lazy) { + GGML_ASSERT(params.sampling.grammar_trigger_tokens.size() > 0 || params.sampling.grammar_trigger_words.size() > 0); + } } { @@ -382,22 +423,12 @@ struct server_task { } { - const auto & samplers = data.find("samplers"); + const auto samplers = data.find("samplers"); if (samplers != data.end()) { if (samplers->is_array()) { - std::vector sampler_names; - for (const auto & name : *samplers) { - if (name.is_string()) { - sampler_names.emplace_back(name); - } - } - params.sampling.samplers = common_sampler_types_from_names(sampler_names, false); + params.sampling.samplers = common_sampler_types_from_names(*samplers, false); } else if (samplers->is_string()){ - std::string sampler_string; - for (const auto & name : *samplers) { - sampler_string += name; - } - params.sampling.samplers = common_sampler_types_from_chars(sampler_string); + params.sampling.samplers = common_sampler_types_from_chars(samplers->get()); } } else { params.sampling.samplers = defaults.sampling.samplers; @@ -544,7 +575,7 @@ struct completion_token_output { struct server_task_result_cmpl_final : server_task_result { int index = 0; - std::string content; + std::string content; llama_tokens tokens; bool stream; @@ -566,10 +597,11 @@ struct server_task_result_cmpl_final : server_task_result { slot_params generation_params; // OAI-compat fields - bool verbose = false; - oaicompat_type oaicompat = OAICOMPAT_TYPE_NONE; - std::string oaicompat_model; - std::string oaicompat_cmpl_id; + bool verbose = false; + oaicompat_type oaicompat = OAICOMPAT_TYPE_NONE; + std::string oaicompat_model; + std::string oaicompat_cmpl_id; + common_chat_format oaicompat_chat_format = COMMON_CHAT_FORMAT_CONTENT_ONLY; virtual int get_index() override { return index; @@ -663,18 +695,38 @@ struct server_task_result_cmpl_final : server_task_result { json to_json_oaicompat_chat() { std::string finish_reason = "length"; + common_chat_msg message; if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) { - finish_reason = "stop"; + message = common_chat_parse(content, oaicompat_chat_format); + finish_reason = message.tool_calls.empty() ? "stop" : "tool_calls"; + } else { + message.content = content; } - json choice = json{ + json tool_calls; + if (!message.tool_calls.empty()) { + tool_calls = json::array(); + for (const auto & tc : message.tool_calls) { + tool_calls.push_back({ + {"type", "function"}, + {"function", { + {"name", tc.name}, + {"arguments", tc.arguments}, + }}, + {"id", tc.id.empty() ? json() : json(tc.id)}, + }); + } + } + + json choice { {"finish_reason", finish_reason}, {"index", 0}, {"message", json { - {"content", content}, - {"role", "assistant"} - } - }}; + {"content", message.content}, + {"tool_calls", tool_calls}, + {"role", "assistant"}, + }}, + }; if (!stream && probs_output.size() > 0) { choice["logprobs"] = json{ @@ -716,7 +768,7 @@ struct server_task_result_cmpl_final : server_task_result { finish_reason = "stop"; } - json choice = json{ + json choice = json { {"finish_reason", finish_reason}, {"index", 0}, {"delta", json::object()} @@ -1191,6 +1243,8 @@ struct server_slot { llama_token sampled; + common_chat_format chat_format = COMMON_CHAT_FORMAT_CONTENT_ONLY; + // stats size_t n_sent_text = 0; // number of sent text character @@ -1815,17 +1869,16 @@ struct server_context { if (use_jinja) { auto templates = common_chat_templates_from_model(model, ""); + common_chat_inputs inputs; + inputs.messages = json::array({{ + {"role", "user"}, + {"content", "test"}, + }}); GGML_ASSERT(templates.template_default); try { - templates.template_default->apply({{ - {"role", "user"}, - {"content", "test"}, - }}, json(), true); + common_chat_params_init(*templates.template_default, inputs); if (templates.template_tool_use) { - templates.template_tool_use->apply({{ - {"role", "user"}, - {"content", "test"}, - }}, json(), true); + common_chat_params_init(*templates.template_tool_use, inputs); } return true; } catch (const std::exception & e) { @@ -2275,11 +2328,11 @@ struct server_context { res->id_slot = slot.id; res->index = slot.index; - res->content = slot.generated_text; - res->tokens = slot.generated_tokens; + res->content = std::move(slot.generated_text); + res->tokens = std::move(slot.generated_tokens); res->timings = slot.get_timings(); res->prompt = common_detokenize(ctx, slot.prompt_tokens, true); - res->response_fields = slot.params.response_fields; + res->response_fields = std::move(slot.params.response_fields); res->truncated = slot.truncated; res->n_decoded = slot.n_decoded; @@ -2290,12 +2343,12 @@ struct server_context { res->stop = slot.stop; res->post_sampling_probs = slot.params.post_sampling_probs; - res->verbose = slot.params.verbose; - res->stream = slot.params.stream; - res->oaicompat = slot.params.oaicompat; - res->oaicompat_model = slot.params.oaicompat_model; - res->oaicompat_cmpl_id = slot.params.oaicompat_cmpl_id; - + res->verbose = slot.params.verbose; + res->stream = slot.params.stream; + res->oaicompat = slot.params.oaicompat; + res->oaicompat_model = slot.params.oaicompat_model; + res->oaicompat_cmpl_id = slot.params.oaicompat_cmpl_id; + res->oaicompat_chat_format = slot.params.oaicompat_chat_format; // populate res.probs_output if (slot.params.sampling.n_probs > 0) { if (!slot.params.stream && slot.stop == STOP_TYPE_WORD) { @@ -2773,6 +2826,11 @@ struct server_context { // track if given slot can be batched with slots already in the batch server_slot * slot_batched = nullptr; + auto accept_special_token = [&](server_slot & slot, llama_token token) { + const auto & trigger_tokens = slot.params.sampling.grammar_trigger_tokens; + return params_base.special || std::find(trigger_tokens.begin(), trigger_tokens.end(), token) != trigger_tokens.end(); + }; + // frist, add sampled tokens from any ongoing sequences for (auto & slot : slots) { if (slot.state != SLOT_STATE_GENERATING) { @@ -3136,7 +3194,7 @@ struct server_context { completion_token_output result; result.tok = id; - result.text_to_send = common_token_to_piece(ctx, result.tok, params_base.special); + result.text_to_send = common_token_to_piece(ctx, result.tok, accept_special_token(slot, result.tok)); result.prob = 1.0f; // TODO: set it here instead of doing inside populate_token_probs if (slot.params.sampling.n_probs > 0) { @@ -3225,7 +3283,7 @@ struct server_context { completion_token_output result; result.tok = ids[i]; - result.text_to_send = common_token_to_piece(ctx, result.tok, params_base.special); + result.text_to_send = common_token_to_piece(ctx, result.tok, accept_special_token(slot, result.tok)); result.prob = 1.0f; // set later // TODO: set result.probs @@ -3722,6 +3780,8 @@ int main(int argc, char ** argv) { { "total_slots", ctx_server.params_base.n_parallel }, { "model_path", ctx_server.params_base.model }, { "chat_template", ctx_server.chat_templates.template_default->source() }, + { "bos_token", ctx_server.chat_templates.template_default->bos_token() }, + { "eos_token", ctx_server.chat_templates.template_default->eos_token() }, { "build_info", build_info }, }; if (ctx_server.params_base.use_jinja && ctx_server.chat_templates.template_tool_use) { @@ -3763,7 +3823,9 @@ int main(int argc, char ** argv) { std::vector tasks; try { - std::vector tokenized_prompts = tokenize_input_prompts(ctx_server.vocab, data.at("prompt"), true, true); + const auto & prompt = data.at("prompt"); + LOG_DBG("Prompt: %s\n", prompt.is_string() ? prompt.get().c_str() : prompt.dump(2).c_str()); + std::vector tokenized_prompts = tokenize_input_prompts(ctx_server.vocab, prompt, true, true); tasks.reserve(tokenized_prompts.size()); for (size_t i = 0; i < tokenized_prompts.size(); i++) { server_task task = server_task(type); @@ -3779,8 +3841,8 @@ int main(int argc, char ** argv) { task.id_selected_slot = json_value(data, "id_slot", -1); // OAI-compat - task.params.oaicompat = oaicompat; - task.params.oaicompat_cmpl_id = completion_id; + task.params.oaicompat = oaicompat; + task.params.oaicompat_cmpl_id = completion_id; // oaicompat_model is already populated by params_from_json_cmpl tasks.push_back(task); @@ -3949,14 +4011,14 @@ int main(int argc, char ** argv) { }; const auto handle_chat_completions = [&ctx_server, ¶ms, &res_error, &handle_completions_impl](const httplib::Request & req, httplib::Response & res) { + LOG_DBG("request: %s\n", req.body.c_str()); if (ctx_server.params_base.embedding) { res_error(res, format_error_response("This server does not support completions. Start it without `--embeddings`", ERROR_TYPE_NOT_SUPPORTED)); return; } auto body = json::parse(req.body); - const auto & chat_template = body.contains("tools") && ctx_server.chat_templates.template_tool_use ? *ctx_server.chat_templates.template_tool_use : *ctx_server.chat_templates.template_default; - json data = oaicompat_completion_params_parse(body, chat_template, params.use_jinja); + json data = oaicompat_completion_params_parse(body, params.use_jinja, ctx_server.chat_templates); return handle_completions_impl( SERVER_TASK_TYPE_COMPLETION, @@ -3966,6 +4028,13 @@ int main(int argc, char ** argv) { OAICOMPAT_TYPE_CHAT); }; + // same with handle_chat_completions, but without inference part + const auto handle_apply_template = [&ctx_server, ¶ms, &res_ok](const httplib::Request & req, httplib::Response & res) { + auto body = json::parse(req.body); + json data = oaicompat_completion_params_parse(body, params.use_jinja, ctx_server.chat_templates); + res_ok(res, {{ "prompt", std::move(data.at("prompt")) }}); + }; + const auto handle_models = [¶ms, &ctx_server, &res_ok](const httplib::Request &, httplib::Response & res) { json models = { {"object", "list"}, @@ -4124,14 +4193,6 @@ int main(int argc, char ** argv) { res_ok(res, root); }; - const auto handle_apply_template = [&ctx_server, ¶ms, &res_ok](const httplib::Request & req, httplib::Response & res) { - auto body = json::parse(req.body); - const auto & chat_template = body.contains("tools") && ctx_server.chat_templates.template_tool_use ? *ctx_server.chat_templates.template_tool_use : *ctx_server.chat_templates.template_default; - json data = oaicompat_completion_params_parse(body, chat_template, params.use_jinja); - - res_ok(res, {{ "prompt", data.at("prompt") }}); - }; - const auto handle_embeddings = [&handle_embeddings_impl](const httplib::Request & req, httplib::Response & res) { handle_embeddings_impl(req, res, OAICOMPAT_TYPE_NONE); }; diff --git a/examples/server/tests/README.md b/examples/server/tests/README.md index 5787276ab..1de0eb30e 100644 --- a/examples/server/tests/README.md +++ b/examples/server/tests/README.md @@ -31,8 +31,9 @@ It's possible to override some scenario steps values with environment variables: | `LLAMA_SERVER_BIN_PATH` | to change the server binary path, default: `../../../build/bin/llama-server` | | `DEBUG` | to enable steps and server verbose mode `--verbose` | | `N_GPU_LAYERS` | number of model layers to offload to VRAM `-ngl --n-gpu-layers` | +| `LLAMA_CACHE` | by default server tests re-download models to the `tmp` subfolder. Set this to your cache (e.g. `$HOME/Library/Caches/llama.cpp` on Mac or `$HOME/.cache/llama.cpp` on Unix) to avoid this | -To run slow tests: +To run slow tests (will download many models, make sure to set `LLAMA_CACHE` if needed): ```shell SLOW_TESTS=1 ./tests.sh @@ -44,10 +45,16 @@ To run with stdout/stderr display in real time (verbose output, but useful for d DEBUG=1 ./tests.sh -s -v -x ``` -To run single test unit: +To run all the tests in a file: ```shell -./tests.sh unit/test_{name of test case here}.py -v -x +./tests.sh unit/test_chat_completion.py.py -v -x +``` + +To run a single test: + +```shell +./tests.sh unit/test_chat_completion.py::test_invalid_chat_completion_req ``` Hint: You can compile and run test in single command, useful for local developement: diff --git a/examples/server/tests/pytest.ini b/examples/server/tests/pytest.ini new file mode 100644 index 000000000..6df308df7 --- /dev/null +++ b/examples/server/tests/pytest.ini @@ -0,0 +1,4 @@ +[pytest] +markers = + slow: marks tests as slow (deselect with '-m "not slow"') + serial diff --git a/examples/server/tests/tests.sh b/examples/server/tests/tests.sh index 1e0777de3..33fa8cc64 100755 --- a/examples/server/tests/tests.sh +++ b/examples/server/tests/tests.sh @@ -6,9 +6,18 @@ cd $SCRIPT_DIR set -eu +if [[ "${SLOW_TESTS:-0}" == 1 ]]; then + # Slow tests for tool calls need quite a few models ahead of time to avoid timing out. + python $SCRIPT_DIR/../../../scripts/fetch_server_test_models.py +fi + if [ $# -lt 1 ] then - pytest -v -x + if [[ "${SLOW_TESTS:-0}" == 1 ]]; then + pytest -v -x + else + pytest -v -x -m "not slow" + fi else pytest "$@" fi diff --git a/examples/server/tests/unit/test_chat_completion.py b/examples/server/tests/unit/test_chat_completion.py index add3f810f..0be04bab5 100644 --- a/examples/server/tests/unit/test_chat_completion.py +++ b/examples/server/tests/unit/test_chat_completion.py @@ -2,7 +2,7 @@ import pytest from openai import OpenAI from utils import * -server = ServerPreset.tinyllama2() +server: ServerProcess @pytest.fixture(autouse=True) def create_server(): @@ -13,11 +13,12 @@ def create_server(): @pytest.mark.parametrize( "model,system_prompt,user_prompt,max_tokens,re_content,n_prompt,n_predicted,finish_reason,jinja,chat_template", [ - (None, "Book", "What is the best book", 8, "(Suddenly)+", 77, 8, "length", False, None), - (None, "Book", "What is the best book", 8, "(Suddenly)+", 77, 8, "length", True, None), - (None, "Book", "What is the best book", 8, "^ blue", 23, 8, "length", True, "This is not a chat template, it is"), + (None, "Book", "What is the best book", 8, "(Suddenly)+|\\{ \" Sarax.", 77, 8, "length", False, None), ("codellama70b", "You are a coding assistant.", "Write the fibonacci function in c++.", 128, "(Aside|she|felter|alonger)+", 104, 64, "length", False, None), - ("codellama70b", "You are a coding assistant.", "Write the fibonacci function in c++.", 128, "(Aside|she|felter|alonger)+", 104, 64, "length", True, None), + # TODO: fix testing of non-tool jinja mode + # (None, "Book", "What is the best book", 8, "(Suddenly)+|\\{ \" Sarax.", 77, 8, "length", True, None), + # (None, "Book", "What is the best book", 8, "I want to play with", 23, 8, "length", True, "This is not a chat template, it is"), + # ("codellama70b", "You are a coding assistant.", "Write the fibonacci function in c++.", 128, "(Aside|she|felter|alonger)+", 104, 64, "length", True, None), ] ) def test_chat_completion(model, system_prompt, user_prompt, max_tokens, re_content, n_prompt, n_predicted, finish_reason, jinja, chat_template): diff --git a/examples/server/tests/unit/test_tool_call.py b/examples/server/tests/unit/test_tool_call.py new file mode 100644 index 000000000..e6ed9c9be --- /dev/null +++ b/examples/server/tests/unit/test_tool_call.py @@ -0,0 +1,352 @@ +import pytest +from utils import * + +server: ServerProcess + +TIMEOUT_SERVER_START = 15*60 +TIMEOUT_HTTP_REQUEST = 60 + +@pytest.fixture(autouse=True) +def create_server(): + global server + server = ServerPreset.tinyllama2() + server.model_alias = "tinyllama-2-tool-call" + server.server_port = 8081 + + +TEST_TOOL = { + "type":"function", + "function": { + "name": "test", + "description": "", + "parameters": { + "type": "object", + "properties": { + "success": {"type": "boolean", "const": True}, + }, + "required": ["success"] + } + } +} + +PYTHON_TOOL = { + "type": "function", + "function": { + "name": "python", + "description": "Runs code in an ipython interpreter and returns the result of the execution after 60 seconds.", + "parameters": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "The code to run in the ipython interpreter." + } + }, + "required": ["code"] + } + } +} + +WEATHER_TOOL = { + "type":"function", + "function":{ + "name":"get_current_weather", + "description":"Get the current weather in a given location", + "parameters":{ + "type":"object", + "properties":{ + "location":{ + "type":"string", + "description":"The city and country/state, e.g. 'San Francisco, CA', or 'Paris, France'" + } + }, + "required":["location"] + } + } +} + + +def do_test_completion_with_required_tool_tiny(template_name: str, tool: dict, argument_key: str | None): + n_predict = 512 + global server + # server = ServerPreset.stories15m_moe() + server.jinja = True + server.n_predict = n_predict + server.chat_template_file = f'../../../models/templates/{template_name}.jinja' + server.start(timeout_seconds=TIMEOUT_SERVER_START) + res = server.make_request("POST", "/chat/completions", data={ + "max_tokens": n_predict, + "messages": [ + {"role": "system", "content": "You are a coding assistant."}, + {"role": "user", "content": "Write an example"}, + ], + "tool_choice": "required", + "tools": [tool], + "parallel_tool_calls": False, + "temperature": 0.0, + "top_k": 1, + "top_p": 1.0, + }) + assert res.status_code == 200, f"Expected status code 200, got {res.status_code}" + choice = res.body["choices"][0] + tool_calls = choice["message"].get("tool_calls") + assert tool_calls and len(tool_calls) == 1, f'Expected 1 tool call in {choice["message"]}' + tool_call = tool_calls[0] + expected_function_name = "python" if tool["type"] == "code_interpreter" else tool["function"]["name"] + assert expected_function_name == tool_call["function"]["name"] + actual_arguments = tool_call["function"]["arguments"] + assert isinstance(actual_arguments, str) + if argument_key is not None: + actual_arguments = json.loads(actual_arguments) + assert argument_key in actual_arguments, f"tool arguments: {json.dumps(actual_arguments)}, expected: {argument_key}" + + +@pytest.mark.parametrize("template_name,tool,argument_key", [ + ("google-gemma-2-2b-it", TEST_TOOL, "success"), + ("meta-llama-Llama-3.3-70B-Instruct", TEST_TOOL, "success"), + ("meta-llama-Llama-3.3-70B-Instruct", PYTHON_TOOL, "code"), +]) +def test_completion_with_required_tool_tiny_fast(template_name: str, tool: dict, argument_key: str | None): + do_test_completion_with_required_tool_tiny(template_name, tool, argument_key) + + +@pytest.mark.slow +@pytest.mark.parametrize("template_name,tool,argument_key", [ + ("meta-llama-Llama-3.1-8B-Instruct", TEST_TOOL, "success"), + ("meta-llama-Llama-3.1-8B-Instruct", PYTHON_TOOL, "code"), + ("meetkai-functionary-medium-v3.1", TEST_TOOL, "success"), + ("meetkai-functionary-medium-v3.1", PYTHON_TOOL, "code"), + ("meetkai-functionary-medium-v3.2", TEST_TOOL, "success"), + ("meetkai-functionary-medium-v3.2", PYTHON_TOOL, "code"), + ("NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use", TEST_TOOL, "success"), + ("NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use", PYTHON_TOOL, "code"), + ("meta-llama-Llama-3.2-3B-Instruct", TEST_TOOL, "success"), + ("meta-llama-Llama-3.2-3B-Instruct", PYTHON_TOOL, "code"), + ("mistralai-Mistral-Nemo-Instruct-2407", TEST_TOOL, "success"), + ("mistralai-Mistral-Nemo-Instruct-2407", PYTHON_TOOL, "code"), + ("NousResearch-Hermes-3-Llama-3.1-8B-tool_use", TEST_TOOL, "success"), + ("NousResearch-Hermes-3-Llama-3.1-8B-tool_use", PYTHON_TOOL, "code"), + ("deepseek-ai-DeepSeek-R1-Distill-Llama-8B", TEST_TOOL, "success"), + ("deepseek-ai-DeepSeek-R1-Distill-Llama-8B", PYTHON_TOOL, "code"), + ("fireworks-ai-llama-3-firefunction-v2", TEST_TOOL, "success"), + ("fireworks-ai-llama-3-firefunction-v2", PYTHON_TOOL, "code"), +]) +def test_completion_with_required_tool_tiny_slow(template_name: str, tool: dict, argument_key: str | None): + do_test_completion_with_required_tool_tiny(template_name, tool, argument_key) + + +@pytest.mark.slow +@pytest.mark.parametrize("tool,argument_key,hf_repo,template_override", [ + (TEST_TOOL, "success", "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None), + (PYTHON_TOOL, "code", "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None), + (TEST_TOOL, "success", "bartowski/gemma-2-2b-it-GGUF:Q4_K_M", None), + (PYTHON_TOOL, "code", "bartowski/gemma-2-2b-it-GGUF:Q4_K_M", None), + (TEST_TOOL, "success", "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None), + (PYTHON_TOOL, "code", "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None), + (TEST_TOOL, "success", "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None), + (PYTHON_TOOL, "code", "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None), + (TEST_TOOL, "success", "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")), + (PYTHON_TOOL, "code", "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")), + (TEST_TOOL, "success", "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")), + (PYTHON_TOOL, "code", "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")), + (TEST_TOOL, "success", "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None), + (PYTHON_TOOL, "code", "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None), + (TEST_TOOL, "success", "bartowski/functionary-small-v3.2-GGUF:Q8_0", ("meetkai/functionary-medium-v3.2", None)), + (PYTHON_TOOL, "code", "bartowski/functionary-small-v3.2-GGUF:Q8_0", ("meetkai/functionary-medium-v3.2", None)), + (TEST_TOOL, "success", "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)), + (PYTHON_TOOL, "code", "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)), + (TEST_TOOL, "success", "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)), + (PYTHON_TOOL, "code", "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)), + # TODO: fix these + # (TEST_TOOL, "success", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None), + # (PYTHON_TOOL, "code", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None), +]) +def test_completion_with_required_tool_real_model(tool: dict, argument_key: str | None, hf_repo: str, template_override: Tuple[str, str | None] | None): + n_predict = 512 + server.n_slots = 1 + server.jinja = True + server.n_ctx = 8192 + server.n_predict = n_predict + server.model_hf_repo = hf_repo + server.model_hf_file = None + if template_override: + (template_hf_repo, template_variant) = template_override + server.chat_template_file = f"../../../models/templates/{template_hf_repo.replace('/', '-') + ('-' + template_variant if template_variant else '')}.jinja" + assert os.path.exists(server.chat_template_file), f"Template file {server.chat_template_file} does not exist. Run `python scripts/get_chat_template.py {template_hf_repo} {template_variant} > {server.chat_template_file}` to download the template." + server.start(timeout_seconds=TIMEOUT_SERVER_START) + res = server.make_request("POST", "/chat/completions", data={ + "max_tokens": n_predict, + "messages": [ + {"role": "system", "content": "You are a coding assistant."}, + {"role": "user", "content": "Write an example"}, + ], + "tool_choice": "required", + "tools": [tool], + "parallel_tool_calls": False, + "temperature": 0.0, + "top_k": 1, + "top_p": 1.0, + }, timeout=TIMEOUT_HTTP_REQUEST) + assert res.status_code == 200, f"Expected status code 200, got {res.status_code}" + choice = res.body["choices"][0] + tool_calls = choice["message"].get("tool_calls") + assert tool_calls and len(tool_calls) == 1, f'Expected 1 tool call in {choice["message"]}' + tool_call = tool_calls[0] + expected_function_name = "python" if tool["type"] == "code_interpreter" else tool["function"]["name"] + assert expected_function_name == tool_call["function"]["name"] + actual_arguments = tool_call["function"]["arguments"] + assert isinstance(actual_arguments, str) + if argument_key is not None: + actual_arguments = json.loads(actual_arguments) + assert argument_key in actual_arguments, f"tool arguments: {json.dumps(actual_arguments)}, expected: {argument_key}" + + +def do_test_completion_without_tool_call(template_name: str, n_predict: int, tools: list[dict], tool_choice: str | None): + global server + server.jinja = True + server.n_predict = n_predict + server.chat_template_file = f'../../../models/templates/{template_name}.jinja' + server.start(timeout_seconds=TIMEOUT_SERVER_START) + res = server.make_request("POST", "/chat/completions", data={ + "max_tokens": n_predict, + "messages": [ + {"role": "system", "content": "You are a coding assistant."}, + {"role": "user", "content": "say hello world with python"}, + ], + "tools": tools if tools else None, + "tool_choice": tool_choice, + "temperature": 0.0, + "top_k": 1, + "top_p": 1.0, + }, timeout=TIMEOUT_HTTP_REQUEST) + assert res.status_code == 200, f"Expected status code 200, got {res.status_code}" + choice = res.body["choices"][0] + assert choice["message"].get("tool_calls") is None, f'Expected no tool call in {choice["message"]}' + + +@pytest.mark.parametrize("template_name,n_predict,tools,tool_choice", [ + ("meta-llama-Llama-3.3-70B-Instruct", 128, [], None), + ("meta-llama-Llama-3.3-70B-Instruct", 128, [TEST_TOOL], None), + ("meta-llama-Llama-3.3-70B-Instruct", 128, [PYTHON_TOOL], 'none'), +]) +def test_completion_without_tool_call_fast(template_name: str, n_predict: int, tools: list[dict], tool_choice: str | None): + do_test_completion_without_tool_call(template_name, n_predict, tools, tool_choice) + + +@pytest.mark.slow +@pytest.mark.parametrize("template_name,n_predict,tools,tool_choice", [ + ("meetkai-functionary-medium-v3.2", 256, [], None), + ("meetkai-functionary-medium-v3.2", 256, [TEST_TOOL], None), + ("meetkai-functionary-medium-v3.2", 256, [PYTHON_TOOL], 'none'), + ("meetkai-functionary-medium-v3.1", 256, [], None), + ("meetkai-functionary-medium-v3.1", 256, [TEST_TOOL], None), + ("meetkai-functionary-medium-v3.1", 256, [PYTHON_TOOL], 'none'), + ("meta-llama-Llama-3.2-3B-Instruct", 256, [], None), + ("meta-llama-Llama-3.2-3B-Instruct", 256, [TEST_TOOL], None), + ("meta-llama-Llama-3.2-3B-Instruct", 256, [PYTHON_TOOL], 'none'), +]) +def test_completion_without_tool_call_slow(template_name: str, n_predict: int, tools: list[dict], tool_choice: str | None): + do_test_completion_without_tool_call(template_name, n_predict, tools, tool_choice) + + +@pytest.mark.slow +@pytest.mark.parametrize("hf_repo,template_override", [ + ("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None), + ("bartowski/gemma-2-2b-it-GGUF:Q4_K_M", None), + ("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None), + ("bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None), + ("bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")), + ("bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")), + ("bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None), + ("bartowski/functionary-small-v3.2-GGUF:Q8_0", ("meetkai/functionary-medium-v3.2", None)), + ("bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)), + # ("bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)), + # ("bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None), +]) +def test_weather_tool_call(hf_repo: str, template_override: Tuple[str, str | None] | None): + global server + server.n_slots = 1 + server.jinja = True + server.n_ctx = 8192 + server.n_predict = 512 + server.model_hf_repo = hf_repo + server.model_hf_file = None + if template_override: + (template_hf_repo, template_variant) = template_override + server.chat_template_file = f"../../../models/templates/{template_hf_repo.replace('/', '-') + ('-' + template_variant if template_variant else '')}.jinja" + assert os.path.exists(server.chat_template_file), f"Template file {server.chat_template_file} does not exist. Run `python scripts/get_chat_template.py {template_hf_repo} {template_variant} > {server.chat_template_file}` to download the template." + server.start(timeout_seconds=TIMEOUT_SERVER_START) + res = server.make_request("POST", "/chat/completions", data={ + "max_tokens": 256, + "messages": [ + {"role": "user", "content": "What is the weather in Istanbul?"}, + ], + "tools": [WEATHER_TOOL], + }, timeout=TIMEOUT_HTTP_REQUEST) + assert res.status_code == 200, f"Expected status code 200, got {res.status_code}" + choice = res.body["choices"][0] + tool_calls = choice["message"].get("tool_calls") + assert tool_calls and len(tool_calls) == 1, f'Expected 1 tool call in {choice["message"]}' + tool_call = tool_calls[0] + assert tool_call["function"]["name"] == WEATHER_TOOL["function"]["name"] + actual_arguments = json.loads(tool_call["function"]["arguments"]) + assert 'location' in actual_arguments, f"location not found in {json.dumps(actual_arguments)}" + location = actual_arguments["location"] + assert isinstance(location, str), f"Expected location to be a string, got {type(location)}: {json.dumps(location)}" + assert re.match('^Istanbul(, (TR|Turkey|Türkiye))?$', location), f'Expected Istanbul for location, got {location}' + + +@pytest.mark.slow +@pytest.mark.parametrize("expected_arguments_override,hf_repo,template_override", [ + (None, "bartowski/gemma-2-2b-it-GGUF:Q4_K_M", None), + (None, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None), + (None, "bartowski/functionary-small-v3.2-GGUF:Q8_0", ("meetkai-functionary-medium-v3.2", None)), + ('{"code":"print("}', "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M", None), + (None, "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama-Llama-3.2-3B-Instruct", None)), + ('{"code":"print("}', "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", ("meta-llama-Llama-3.2-3B-Instruct", None)), + (None, "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M", None), + (None, "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")), + (None, "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M", ("NousResearch-Hermes-3-Llama-3.1-8B", "tool_use")), + (None, "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None), + # (None, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None), +]) +def test_hello_world_tool_call(expected_arguments_override: str | None, hf_repo: str, template_override: Tuple[str, str | None] | None): + global server + server.n_slots = 1 + server.jinja = True + server.n_ctx = 8192 + server.n_predict = 128 + server.model_hf_repo = hf_repo + server.model_hf_file = None + if template_override: + (template_hf_repo, template_variant) = template_override + server.chat_template_file = f"../../../models/templates/{template_hf_repo.replace('/', '-') + ('-' + template_variant if template_variant else '')}.jinja" + assert os.path.exists(server.chat_template_file), f"Template file {server.chat_template_file} does not exist. Run `python scripts/get_chat_template.py {template_hf_repo} {template_variant} > {server.chat_template_file}` to download the template." + server.start(timeout_seconds=TIMEOUT_SERVER_START) + res = server.make_request("POST", "/chat/completions", data={ + "max_tokens": 256, + "messages": [ + {"role": "system", "content": "You are a coding assistant."}, + {"role": "user", "content": "say hello world with python"}, + ], + "tools": [PYTHON_TOOL], + # Note: without these greedy params, Functionary v3.2 writes `def hello_world():\n print("Hello, World!")\nhello_world()` which is correct but a pain to test. + "temperature": 0.0, + "top_k": 1, + "top_p": 1.0, + }, timeout=TIMEOUT_HTTP_REQUEST) + assert res.status_code == 200, f"Expected status code 200, got {res.status_code}" + choice = res.body["choices"][0] + tool_calls = choice["message"].get("tool_calls") + assert tool_calls and len(tool_calls) == 1, f'Expected 1 tool call in {choice["message"]}' + tool_call = tool_calls[0] + assert tool_call["function"]["name"] == PYTHON_TOOL["function"]["name"] + actual_arguments = tool_call["function"]["arguments"] + if expected_arguments_override is not None: + assert actual_arguments == expected_arguments_override + else: + actual_arguments = json.loads(actual_arguments) + assert 'code' in actual_arguments, f"code not found in {json.dumps(actual_arguments)}" + code = actual_arguments["code"] + assert isinstance(code, str), f"Expected code to be a string, got {type(code)}: {json.dumps(code)}" + assert re.match(r'''print\(("[Hh]ello,? [Ww]orld!?"|'[Hh]ello,? [Ww]orld!?')\)''', code), f'Expected hello world, got {code}' diff --git a/examples/server/tests/utils.py b/examples/server/tests/utils.py index 9964db2f9..ce0680662 100644 --- a/examples/server/tests/utils.py +++ b/examples/server/tests/utils.py @@ -26,7 +26,7 @@ from re import RegexFlag import wget -DEFAULT_HTTP_TIMEOUT = 10 if "LLAMA_SANITIZE" not in os.environ else 30 +DEFAULT_HTTP_TIMEOUT = 12 if "LLAMA_SANITIZE" not in os.environ else 30 class ServerResponse: @@ -41,7 +41,7 @@ class ServerProcess: server_port: int = 8080 server_host: str = "127.0.0.1" model_hf_repo: str = "ggml-org/models" - model_hf_file: str = "tinyllamas/stories260K.gguf" + model_hf_file: str | None = "tinyllamas/stories260K.gguf" model_alias: str = "tinyllama-2" temperature: float = 0.8 seed: int = 42 @@ -191,7 +191,7 @@ class ServerProcess: creationflags=flags, stdout=sys.stdout, stderr=sys.stdout, - env={**os.environ, "LLAMA_CACHE": "tmp"}, + env={**os.environ, "LLAMA_CACHE": "tmp"} if "LLAMA_CACHE" not in os.environ else None, ) server_instances.add(self) diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index c5987250c..3d2c04666 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -17,6 +17,7 @@ #define JSON_ASSERT GGML_ASSERT #include "json.hpp" #include "minja.hpp" +#include "chat.hpp" #include "chat-template.hpp" #include @@ -376,7 +377,7 @@ inline std::string format_chat(const common_chat_template & tmpl, const std::vec throw std::runtime_error("Missing 'content' (ref: https://github.com/ggerganov/llama.cpp/issues/8367)"); } - chat.push_back({role, content}); + chat.push_back({role, content, /* tool_calls= */ {}}); } const auto formatted_chat = common_chat_apply_template(tmpl, chat, true, /* use_jinja= */ false); @@ -483,14 +484,13 @@ static bool ends_with(const std::string & str, const std::string & suffix) { static size_t find_partial_stop_string(const std::string &stop, const std::string &text) { if (!text.empty() && !stop.empty()) { - const char text_last_char = text.back(); - for (int64_t char_index = stop.size() - 1; char_index >= 0; char_index--) { - if (stop[char_index] == text_last_char) { - const std::string current_partial = stop.substr(0, char_index + 1); - if (ends_with(text, current_partial)) { - return text.size() - char_index - 1; - } + auto it = std::find(stop.rbegin(), stop.rend(), text.back()); + while (it != stop.rend()) { + size_t length = std::distance(it, stop.rend()); + if (text.length() >= length && 0 == text.compare(text.length() - length, length, stop)) { + return text.length() - length; } + it = std::find(std::next(it), stop.rend(), text.back()); } } @@ -580,21 +580,30 @@ static json oaicompat_completion_params_parse(const json & body) { static json oaicompat_completion_params_parse( const json & body, /* openai api json semantics */ - const common_chat_template & tmpl, - bool use_jinja) + bool use_jinja, + const common_chat_templates & chat_templates) { json llama_params; + const auto & tmpl = body.contains("tools") && chat_templates.template_tool_use + ? *chat_templates.template_tool_use + : *chat_templates.template_default; auto tools = json_value(body, "tools", json()); - auto has_tools = tools.is_array() && !tools.empty(); + auto stream = json_value(body, "stream", false); - if (has_tools) { - if (use_jinja) { - LOG_WRN("tools param is not fully supported yet\n"); - } else { + if (tools.is_array() && !tools.empty()) { + if (stream) { + throw std::runtime_error("Cannot use tools with stream"); + } + if (!use_jinja) { throw std::runtime_error("tools param requires --jinja flag"); } } + if (!use_jinja) { + if (body.contains("tool_choice") && !body.at("tool_choice").is_null()) { + throw std::runtime_error("Unsupported param: tool_choice"); + } + } // Handle "stop" field if (body.contains("stop") && body.at("stop").is_string()) { @@ -619,7 +628,38 @@ static json oaicompat_completion_params_parse( // Apply chat template to the list of messages if (use_jinja) { - llama_params["prompt"] = tmpl.apply(body.at("messages"), tools, /* add_generation_prompt= */ true); + auto tool_choice = json_value(body, "tool_choice", std::string("auto")); + if (tool_choice != "none" && tool_choice != "auto" && tool_choice != "required") { + throw std::runtime_error("Invalid tool_choice: " + tool_choice); + } + if (tool_choice != "none" && llama_params.contains("grammar")) { + throw std::runtime_error("Cannot use custom grammar constraints with tools."); + } + common_chat_inputs inputs; + inputs.messages = body.at("messages"); + inputs.tools = tools; + inputs.tool_choice = tool_choice; + inputs.parallel_tool_calls = json_value(body, "parallel_tool_calls", false); + inputs.stream = stream; + // TODO: support mixing schema w/ tools beyond generic format. + inputs.json_schema = json_value(llama_params, "json_schema", json::object()); + auto chat_params = common_chat_params_init(tmpl, inputs); + + llama_params["chat_format"] = static_cast(chat_params.format); + llama_params["prompt"] = chat_params.prompt; + llama_params["grammar"] = chat_params.grammar; + llama_params["grammar_lazy"] = chat_params.grammar_lazy; + auto grammar_triggers = json::array(); + for (const auto & trigger : chat_params.grammar_triggers) { + grammar_triggers.push_back({ + {"word", trigger.word}, + {"at_start", trigger.at_start}, + }); + } + llama_params["grammar_triggers"] = grammar_triggers; + for (const auto & stop : chat_params.additional_stops) { + llama_params["stop"].push_back(stop); + } } else { llama_params["prompt"] = format_chat(tmpl, body.at("messages")); } @@ -638,14 +678,6 @@ static json oaicompat_completion_params_parse( throw std::runtime_error("top_logprobs requires logprobs to be set to true"); } - // Params supported by OAI but unsupported by llama.cpp - static const std::vector unsupported_params { "tool_choice" }; - for (const auto & param : unsupported_params) { - if (body.contains(param)) { - throw std::runtime_error("Unsupported param: " + param); - } - } - // Copy remaining properties to llama_params // This allows user to use llama.cpp-specific params like "mirostat", ... via OAI endpoint. // See "launch_slot_with_task()" for a complete list of params supported by llama.cpp diff --git a/include/llama.h b/include/llama.h index 3b75e7607..61907ed40 100644 --- a/include/llama.h +++ b/include/llama.h @@ -1199,6 +1199,18 @@ extern "C" { const char * grammar_str, const char * grammar_root); + /// @details Lazy grammar sampler, introduced in https://github.com/ggerganov/llama.cpp/pull/9639 + /// @param trigger_words A list of words that will trigger the grammar sampler. This may be updated to a loose regex syntax (w/ ^) in a near future. + /// @param trigger_tokens A list of tokens that will trigger the grammar sampler. + LLAMA_API struct llama_sampler * llama_sampler_init_grammar_lazy( + const struct llama_vocab * vocab, + const char * grammar_str, + const char * grammar_root, + const char ** trigger_words, + size_t num_trigger_words, + const llama_token * trigger_tokens, + size_t num_trigger_tokens); + /// NOTE: Avoid using on the full vocabulary as searching for repeated tokens can become slow. For example, apply top-k or top-p sampling first. LLAMA_API struct llama_sampler * llama_sampler_init_penalties( int32_t penalty_last_n, // last n tokens to penalize (0 = disable penalty, -1 = context size) diff --git a/models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja b/models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja new file mode 100644 index 000000000..f5baef30b --- /dev/null +++ b/models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja @@ -0,0 +1,202 @@ + +{%- macro json_to_python_type(json_spec) %} +{%- set basic_type_map = { + "string": "str", + "number": "float", + "integer": "int", + "boolean": "bool" +} %} + +{%- if basic_type_map[json_spec.type] is defined %} + {{- basic_type_map[json_spec.type] }} +{%- elif json_spec.type == "array" %} + {{- "List[" + json_to_python_type(json_spec.items) + "]"}} +{%- elif json_spec.type == "object" %} + {{- "Dict[str, " + json_to_python_type(json_spec.additionalProperties) + ']'}} +{%- elif json_spec.type is iterable %} + {{- "Union[" }} + {%- for t in json_spec.type %} + {{- json_to_python_type({"type": t}) }} + {%- if not loop.last %} + {{- "," }} + {%- endif %} + {%- endfor %} + {{- "]" }} +{%- else %} + {{- "Any" }} +{%- endif %} +{%- endmacro %} + +{%- macro old_tool_parser(tools) %} +{%- for tool in tools %} + {%- if loop.index0 != 0 %} + {{- '\n\n' }} + {%- endif %} + {{- '```python\ndef ' + tool.name + '(' }} + {%- for param_name, param_fields in tool.parameter_definitions|items %} + {%- if loop.index0 != 0 %} + {{- ', '}} + {%- endif %} + {{- param_name + ': ' }} + {%- if not param_fields.required %} + {{- 'Optional[' + param_fields.type + '] = None'}} + {%- else %} + {{- param_fields.type }} + {%- endif %} + {%- endfor %} + {{- ') -> List[Dict]:\n """'}} + {{- tool.description }} + {%- if tool.parameter_definitions|length != 0 %} + {{- '\n\n Args:\n '}} + {%- for param_name, param_fields in tool.parameter_definitions|items %} + {%- if loop.index0 != 0 %} + {{- '\n ' }} + {%- endif %} + {{- param_name + ' ('}} + {%- if not param_fields.required %} + {{- 'Optional[' + param_fields.type + ']'}} + {%- else %} + {{- param_fields.type }} + {%- endif %} + {{- '): ' + param_fields.description }} + {%- endfor %} + {%- endif %} + {{- '\n """\n pass\n```' }} +{%- endfor %} +{%- endmacro %} + +{%- macro new_tool_parser(tools) %} +{%- for tool in tools %} + {%- if loop.index0 != 0 %} + {{- '\n\n'}} + {%- endif %} + {%- if tool.function is defined %} + {%- set tool = tool.function %} + {%- endif %} + {{-'```python +def ' + tool.name + '('}} + {%- for param_name, param_fields in tool.parameters.properties|items %} + {%- if loop.index0 != 0 %} + {{- ', '}} + {%- endif %} + {{-param_name + ": "}} + {%- if not param_name in tool.parameters.required %} + {{-'Optional[' + json_to_python_type(param_fields) + '] = None'}} + {%- else %} + {{- json_to_python_type(param_fields) }} + {%- endif %} + {%- endfor %} + {{- ') -> List[Dict]: + """'}} + {{- tool.description }} + {%- if tool.parameters.properties|length != 0 %} + {{- '\n\n Args:\n '}} + {%- for param_name, param_fields in tool.parameters.properties|items %} + {%- if loop.index0 != 0 %} + {{- '\n ' }} + {%- endif %} + {{- param_name + ' ('}} + {%- if not param_name in tool.parameters.required %} + {{-'Optional[' + json_to_python_type(param_fields) + ']'}} + {%- else %} + {{- json_to_python_type(param_fields) }} + {%- endif %} + {{- '): ' + param_fields.description }} + {%- endfor %} + {%- endif %} + {{- '\n """\n pass\n```' }} +{%- endfor %} +{%- endmacro %} + +{{- bos_token }} +{%- if messages[0]['role'] == 'system' %} + {%- set loop_messages = messages[1:] %} + {%- set system_message = messages[0]['content'] %} +{%- else %} + {%- set loop_messages = messages %} + {%- set system_message = '## Task and Context\nYou help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user\'s needs as best you can, which will be wide-ranging.\n\n## Style Guide\nUnless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling.' %} +{%- endif %} +{{- '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' }} +{{- '# Safety Preamble' }} +{{- ' +The instructions in this section override those in the task description and style guide sections. Don\'t answer questions that are harmful or immoral.' }} +{{- ' + +# System Preamble' }} +{{- ' +## Basic Rules' }} +{{- ' +You are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user\'s requests, you cite your sources in your answers, according to those instructions.' }} +{{- ' + +# User Preamble' }} +{{- ' +' + system_message }} +{{-' + +## Available Tools +Here is a list of tools that you have available to you: + +'}} +{%- set ns = namespace(new_tools=true) %} +{%- for tool in tools %} + {%- if tool.parameter_definitions is defined %} + {%- set ns.new_tools = false %} + {%- endif %} +{%- endfor %} +{%- if ns.new_tools %} + {{- new_tool_parser(tools) }} +{%- else %} + {{- old_tool_parser(tools) }} +{%- endif %} +{{- '<|END_OF_TURN_TOKEN|>'}} +{%- for message in loop_messages %} + {%- set content = message['content'] %} + {%- if message.role == 'user' %} + {{- '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content|trim + '<|END_OF_TURN_TOKEN|>' }} + {%- elif message.role == 'system' %} + {{- '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + content|trim + '<|END_OF_TURN_TOKEN|>' }} + {%- elif message.role == 'assistant' and message.tool_calls is defined %} + {{- '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }} + {%- if message.content is defined %} + {{- message.content|trim }} + {%- endif %} + {{- '\nAction:\n```json\n[\n' }} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '{\n'|indent(4, first=true) }} + {{- '"tool_name": "'|indent(8, first=true) + tool_call.name + '",\n' }} + {{- '"parameters": '|indent(8, first=true) }} + {%- if tool_call.arguments is defined and tool_call.arguments|length > 0 %} + {{- tool_call.arguments|tojson(indent=4)|indent(8) }} + {{- '\n' }} + {%- else %} + {{- '{}\n' }} + {%- endif %} + {{- '}'|indent(4, first=true) }} + {%- if not loop.last %} + {{- ',\n' }} + {%- endif %} + {%- endfor %} + {{- "\n]```\n" }} + {%- elif message.role == 'assistant' %} + {{- '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' + content|trim + '<|END_OF_TURN_TOKEN|>' }} + {%- elif message.role == 'tool' %} + {{- '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>\n' }} + {{- message.content|trim }} + {{- '<|END_OF_TURN_TOKEN|>' }} + {%- endif %} +{%- endfor %} +{{-'<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write \'Action:\' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user\'s last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example: +```json +[ + { + "tool_name": title of the tool in the specification, + "parameters": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters + } +]```<|END_OF_TURN_TOKEN|>'}} +{%- if add_generation_prompt %} + {{- '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }} +{%- endif %} diff --git a/models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja b/models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja new file mode 100644 index 000000000..149250bd5 --- /dev/null +++ b/models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja @@ -0,0 +1,152 @@ +{%- macro json_to_python_type(json_spec) %} +{%- set basic_type_map = { + "string": "str", + "number": "float", + "integer": "int", + "boolean": "bool" +} %} + +{%- if basic_type_map[json_spec.type] is defined %} + {{- basic_type_map[json_spec.type] }} +{%- elif json_spec.type == "array" %} + {{- "list[" + json_to_python_type(json_spec|items) + "]"}} +{%- elif json_spec.type == "object" %} + {%- if json_spec.additionalProperties is defined %} + {{- "dict[str, " + json_to_python_type(json_spec.additionalProperties) + ']'}} + {%- else %} + {{- "dict" }} + {%- endif %} +{%- elif json_spec.type is iterable %} + {{- "Union[" }} + {%- for t in json_spec.type %} + {{- json_to_python_type({"type": t}) }} + {%- if not loop.last %} + {{- "," }} + {%- endif %} + {%- endfor %} + {{- "]" }} +{%- else %} + {{- "Any" }} +{%- endif %} +{%- endmacro %} + + +{{- bos_token }} +{{- '<|im_start|>system +' }} +{{- "You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: " }} +{%- for tool in tools %} + {%- if tool.function is defined %} + {%- set tool = tool.function %} + {%- endif %} + {{- '{"type": "function", "function": ' }} + {{- '{"name": "' + tool.name + '", ' }} + {{- '"description": "' + tool.name + '(' }} + {%- for param_name, param_fields in tool.parameters.properties|items %} + {{- param_name + ": " + json_to_python_type(param_fields) }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- if tool.return is defined %} + {{- " -> " + json_to_python_type(tool.return) }} + {%- endif %} + {{- " - " + tool.description + " + +" }} + {%- for param_name, param_fields in tool.parameters.properties|items %} + {%- if loop.first %} + {{- " Args: +" }} + {%- endif %} + {{- " " + param_name + "(" + json_to_python_type(param_fields) + "): " + param_fields.description|trim }} + {%- endfor %} + {%- if tool.return is defined and tool.return.description is defined %} + {{- " + Returns: + " + tool.return.description }} + {%- endif %} + {{- '"' }} + {{- ', "parameters": ' }} + {%- if tool.parameters.properties | length == 0 %} + {{- "{}" }} + {%- else %} + {{- tool.parameters|tojson }} + {%- endif %} + {{- "}" }} + {%- if not loop.last %} + {{- " +" }} + {%- endif %} +{%- endfor %} +{{- " " }} +{{- 'Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} +' }} +{{- "For each function call return a json object with function name and arguments within XML tags as follows: +" }} +{{- " +" }} +{{- '{"name": , "arguments": } +' }} +{{- '<|im_end|> +' }} +{%- for message in messages %} + {%- if message.role == "user" or message.role == "system" or (message.role == "assistant" and message.tool_calls is not defined) %} + {{- '<|im_start|>' + message.role + ' +' + message.content + '<|im_end|>' + ' +' }} + {%- elif message.role == "assistant" %} + {{- '<|im_start|>' + message.role }} + {%- for tool_call in message.tool_calls %} + {{- ' + +' }} {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '{' }} + {{- '"name": "' }} + {{- tool_call.name }} + {{- '"' }} + {{- ', '}} + {%- if tool_call.arguments is defined %} + {{- '"arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments|tojson }} + {%- endif %} + {%- endif %} + {{- '}' }} + {{- ' +' }} + {%- endfor %} + {{- '<|im_end|> +' }} + {%- elif message.role == "tool" %} + {%- if loop.previtem and loop.previtem.role != "tool" %} + {{- '<|im_start|>tool +' }} + {%- endif %} + {{- ' +' }} + {{- message.content }} + {%- if not loop.last %} + {{- ' + +' }} + {%- else %} + {{- ' +' }} + {%- endif %} + {%- if not loop.last and loop.nextitem.role != "tool" %} + {{- '<|im_end|>' }} + {%- elif loop.last %} + {{- '<|im_end|>' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant +' }} +{%- endif %} diff --git a/models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja b/models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja new file mode 100644 index 000000000..149250bd5 --- /dev/null +++ b/models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja @@ -0,0 +1,152 @@ +{%- macro json_to_python_type(json_spec) %} +{%- set basic_type_map = { + "string": "str", + "number": "float", + "integer": "int", + "boolean": "bool" +} %} + +{%- if basic_type_map[json_spec.type] is defined %} + {{- basic_type_map[json_spec.type] }} +{%- elif json_spec.type == "array" %} + {{- "list[" + json_to_python_type(json_spec|items) + "]"}} +{%- elif json_spec.type == "object" %} + {%- if json_spec.additionalProperties is defined %} + {{- "dict[str, " + json_to_python_type(json_spec.additionalProperties) + ']'}} + {%- else %} + {{- "dict" }} + {%- endif %} +{%- elif json_spec.type is iterable %} + {{- "Union[" }} + {%- for t in json_spec.type %} + {{- json_to_python_type({"type": t}) }} + {%- if not loop.last %} + {{- "," }} + {%- endif %} + {%- endfor %} + {{- "]" }} +{%- else %} + {{- "Any" }} +{%- endif %} +{%- endmacro %} + + +{{- bos_token }} +{{- '<|im_start|>system +' }} +{{- "You are a function calling AI model. You are provided with function signatures within XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: " }} +{%- for tool in tools %} + {%- if tool.function is defined %} + {%- set tool = tool.function %} + {%- endif %} + {{- '{"type": "function", "function": ' }} + {{- '{"name": "' + tool.name + '", ' }} + {{- '"description": "' + tool.name + '(' }} + {%- for param_name, param_fields in tool.parameters.properties|items %} + {{- param_name + ": " + json_to_python_type(param_fields) }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- if tool.return is defined %} + {{- " -> " + json_to_python_type(tool.return) }} + {%- endif %} + {{- " - " + tool.description + " + +" }} + {%- for param_name, param_fields in tool.parameters.properties|items %} + {%- if loop.first %} + {{- " Args: +" }} + {%- endif %} + {{- " " + param_name + "(" + json_to_python_type(param_fields) + "): " + param_fields.description|trim }} + {%- endfor %} + {%- if tool.return is defined and tool.return.description is defined %} + {{- " + Returns: + " + tool.return.description }} + {%- endif %} + {{- '"' }} + {{- ', "parameters": ' }} + {%- if tool.parameters.properties | length == 0 %} + {{- "{}" }} + {%- else %} + {{- tool.parameters|tojson }} + {%- endif %} + {{- "}" }} + {%- if not loop.last %} + {{- " +" }} + {%- endif %} +{%- endfor %} +{{- " " }} +{{- 'Use the following pydantic model json schema for each tool call you will make: {"properties": {"name": {"title": "Name", "type": "string"}, "arguments": {"title": "Arguments", "type": "object"}}, "required": ["name", "arguments"], "title": "FunctionCall", "type": "object"}} +' }} +{{- "For each function call return a json object with function name and arguments within XML tags as follows: +" }} +{{- " +" }} +{{- '{"name": , "arguments": } +' }} +{{- '<|im_end|> +' }} +{%- for message in messages %} + {%- if message.role == "user" or message.role == "system" or (message.role == "assistant" and message.tool_calls is not defined) %} + {{- '<|im_start|>' + message.role + ' +' + message.content + '<|im_end|>' + ' +' }} + {%- elif message.role == "assistant" %} + {{- '<|im_start|>' + message.role }} + {%- for tool_call in message.tool_calls %} + {{- ' + +' }} {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '{' }} + {{- '"name": "' }} + {{- tool_call.name }} + {{- '"' }} + {{- ', '}} + {%- if tool_call.arguments is defined %} + {{- '"arguments": ' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments|tojson }} + {%- endif %} + {%- endif %} + {{- '}' }} + {{- ' +' }} + {%- endfor %} + {{- '<|im_end|> +' }} + {%- elif message.role == "tool" %} + {%- if loop.previtem and loop.previtem.role != "tool" %} + {{- '<|im_start|>tool +' }} + {%- endif %} + {{- ' +' }} + {{- message.content }} + {%- if not loop.last %} + {{- ' + +' }} + {%- else %} + {{- ' +' }} + {%- endif %} + {%- if not loop.last and loop.nextitem.role != "tool" %} + {{- '<|im_end|>' }} + {%- elif loop.last %} + {{- '<|im_end|>' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant +' }} +{%- endif %} diff --git a/models/templates/Qwen-Qwen2.5-7B-Instruct.jinja b/models/templates/Qwen-Qwen2.5-7B-Instruct.jinja new file mode 100644 index 000000000..bdf7919a9 --- /dev/null +++ b/models/templates/Qwen-Qwen2.5-7B-Instruct.jinja @@ -0,0 +1,54 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0]['role'] == 'system' %} + {{- messages[0]['content'] }} + {%- else %} + {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }} + {%- endif %} + {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0]['role'] == 'system' %} + {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }} + {%- else %} + {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {{- '<|im_start|>' + message.role }} + {%- if message.content %} + {{- '\n' + message.content }} + {%- endif %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {{- tool_call.arguments | tojson }} + {{- '}\n' }} + {%- endfor %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} +{%- endif %} diff --git a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja new file mode 100644 index 000000000..02a1c3bce --- /dev/null +++ b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja @@ -0,0 +1 @@ +{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %} \ No newline at end of file diff --git a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja new file mode 100644 index 000000000..2ebfe7c1e --- /dev/null +++ b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja @@ -0,0 +1,56 @@ +{% if not add_generation_prompt is defined %} +{% set add_generation_prompt = false %} +{% endif %} +{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %} +{%- for message in messages %} +{%- if message['role'] == 'system' %} +{% set ns.system_prompt = message['content'] %} +{%- endif %} +{%- endfor %} +{{bos_token}} +{{ns.system_prompt}} +{%- for message in messages %} +{%- if message['role'] == 'user' %} +{%- set ns.is_tool = false -%} +{{'<|User|>' + message['content']}} +{%- endif %} +{%- if message['role'] == 'assistant' and message['content'] is none %} +{%- set ns.is_tool = false -%} +{%- for tool in message['tool_calls']%} +{%- if not ns.is_first %} +{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}} +{%- set ns.is_first = true -%} +{%- else %} +{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}} +{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}} +{%- endif %} +{%- endfor %} +{%- endif %} +{%- if message['role'] == 'assistant' and message['content'] is not none %} +{%- if ns.is_tool %} +{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}} +{%- set ns.is_tool = false -%} +{%- else %} +{% set content = message['content'] %} +{% if '' in content %} +{% set content = content.split('')[-1] %} +{% endif %} +{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}} +{%- endif %} +{%- endif %} +{%- if message['role'] == 'tool' %} +{%- set ns.is_tool = true -%} +{%- if ns.is_output_first %} +{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} +{%- set ns.is_output_first = false %} +{%- else %} +{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} +{%- endif %} +{%- endif %} +{%- endfor -%} +{% if ns.is_tool %} +{{'<|tool▁outputs▁end|>'}} +{% endif %} +{% if add_generation_prompt and not ns.is_tool %} +{{'<|Assistant|>'}} +{% endif %} \ No newline at end of file diff --git a/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja b/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja new file mode 100644 index 000000000..9b8136df7 --- /dev/null +++ b/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja @@ -0,0 +1,57 @@ +{%- set loop_messages = messages -%} +{%- set message_roles = ['system', 'user', 'assistant', 'tool'] -%} +{%- set system_prompt_suffix -%} +{%- filter trim -%} +In addition to plain text responses, you can chose to call one or more of the provided functions. + +Use the following rule to decide when to call a function: + * if the response can be generated from your internal knowledge (e.g., as in the case of queries like "What is the capital of Poland?"), do so + * if you need external information that can be obtained by calling one or more of the provided functions, generate a function calls + +If you decide to call functions: + * prefix function calls with functools marker (no closing marker required) + * all function calls should be generated in a single JSON list formatted as functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...] + * follow the provided JSON schema. Do not hallucinate arguments or values. Do to blindly copy values from the provided samples + * respect the argument type formatting. E.g., if the type if number and format is float, write value 7 as 7.0 + * make sure you pick the right functions that match the user intent + +Available functions as JSON spec: +{%- endfilter -%} +{%- endset -%} +{%- set system_prompt_suffix = system_prompt_suffix + "\n" + functions -%} +{%- set system_prompt_suffix = system_prompt_suffix + '\nToday is ' + datetime + '.' -%} +{%- set ns = namespace(role='', content='') -%} +{#- Basic consistency checks -#} +{%- if not loop_messages -%} + {{ raise_exception('Expected non-empty messages') }} +{%- endif -%} +{%- for message in loop_messages -%} + {%- set ns.role = message['role'] | lower -%} + {%- if ns.role not in message_roles -%} + {%- set message_roles_string = message_roles | join(', ') -%} + {{ raise_exception('Invalid role ' + message['role'] + '. Only ' + message_roles_string + ' are supported.') }} + {%- endif -%} + {%- set msg_content = message['content'] | default('', true) | trim -%} + {%- if loop.index0 == 0 -%} + {%- if ns.role == 'system' -%} + {%- set system_prompt = '<|start_header_id|>' + 'system' + '<|end_header_id|>\n\n' + message['content'] | trim + '\n' + system_prompt_suffix + '<|eot_id|>' -%} + {%- else -%} + {%- set system_prompt = '<|start_header_id|>' + 'system' + '<|end_header_id|>\n\nYou are a helpful assistant with access to functions.\n' + system_prompt_suffix + '<|eot_id|>' -%} + {%- endif -%} + {%- set ns.content = bos_token + system_prompt -%} + {{- ns.content -}} + {%- endif -%} + {%- if loop.index0 > 0 or ns.role != 'system' -%} + {%- set ns.content = '<|start_header_id|>' + ns.role + '<|end_header_id|>\n\n' + msg_content -%} + {%- if 'tool_calls' in message and message['tool_calls'] -%} + {%- set tool = namespace(calls=[]) -%} + {%- for call in message['tool_calls'] -%} + {%- set tool.calls = tool.calls + ['{"name": "' + call['function']['name'] + '", "arguments": ' + call['function']['arguments'] + '}'] -%} + {%- endfor -%} + {%- set ns.content = ns.content + ' functools[' + tool.calls | join(', ') + ']' -%} + {%- endif -%} + {%- set ns.content = ns.content + '<|eot_id|>' -%} + {{- ns.content -}} + {%- endif -%} +{%- endfor -%} +{{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} diff --git a/models/templates/google-gemma-2-2b-it.jinja b/models/templates/google-gemma-2-2b-it.jinja new file mode 100644 index 000000000..923ec253c --- /dev/null +++ b/models/templates/google-gemma-2-2b-it.jinja @@ -0,0 +1,4 @@ +{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + ' +' + message['content'] | trim + ' +' }}{% endfor %}{% if add_generation_prompt %}{{'model +'}}{% endif %} \ No newline at end of file diff --git a/models/templates/meetkai-functionary-medium-v3.1.jinja b/models/templates/meetkai-functionary-medium-v3.1.jinja new file mode 100644 index 000000000..29d64a215 --- /dev/null +++ b/models/templates/meetkai-functionary-medium-v3.1.jinja @@ -0,0 +1,58 @@ +{# version=v3-llama3.1 #}{%- if not tools is defined -%} + {%- set tools = none -%} +{%- endif -%} + +{%- set has_code_interpreter = tools | selectattr("type", "equalto", "code_interpreter") | list | length > 0 -%} +{%- if has_code_interpreter -%} + {%- set tools = tools | rejectattr("type", "equalto", "code_interpreter") | list -%} +{%- endif -%} + +{#- System message + builtin tools #} +{{- bos_token + "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if has_code_interpreter %} + {{- "Environment: ipython\n\n" }} +{%- else -%} + {{ "\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n\n" }} +{%- if tools %} + {{- "\nYou have access to the following functions:\n\n" }} + {%- for t in tools %} + {%- if "type" in t -%} + {{ "Use the function '"|safe + t["function"]["name"] + "' to '"|safe + t["function"]["description"] + "'\n"|safe + t["function"] | tojson() }} + {%- else -%} + {{ "Use the function '"|safe + t["name"] + "' to '"|safe + t["description"] + "'\n"|safe + t | tojson() }} + {%- endif -%} + {{- "\n\n" }} + {%- endfor %} + {{- '\nThink very carefully before calling functions.\nIf a you choose to call a function ONLY reply in the following format:\n<{start_tag}={function_name}>{parameters}{end_tag}\nwhere\n\nstart_tag => ` a JSON dict with the function argument name as key and function argument value as value.\nend_tag => ``\n\nHere is an example,\n{"example_name": "example_value"}\n\nReminder:\n- If looking for real time information use relevant functions before falling back to brave_search\n- Function calls MUST follow the specified format, start with \n- Required parameters MUST be specified\n- Only call one function at a time\n- Put the entire function call reply on one line\n\n' -}} +{%- endif %} +{{- "<|eot_id|>" -}} + +{%- for message in messages -%} + {%- if message['role'] == 'user' or message['role'] == 'system' -%} + {{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }} + {%- elif message['role'] == 'tool' -%} + {{ '<|start_header_id|>ipython<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }} + {%- else -%} + {{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'}} + {%- if message['content'] -%} + {{ message['content'] }} + {%- endif -%} + {%- if 'tool_calls' in message and message['tool_calls'] -%} + {%- for tool_call in message['tool_calls'] -%} + {%- if tool_call["function"]["name"] == "python" -%} + {{ '<|python_tag|>' + tool_call['function']['arguments'] }} + {%- else -%} + {{ '' + tool_call['function']['arguments'] + '' }} + {%- endif -%} + {%- endfor -%} + {{ '<|eom_id|>' }} + {%- else -%} + {{ '<|eot_id|>' }} + {%- endif -%} + {%- endif -%} +{%- endfor -%} +{%- if add_generation_prompt -%} + {{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif -%} \ No newline at end of file diff --git a/models/templates/meetkai-functionary-medium-v3.2.jinja b/models/templates/meetkai-functionary-medium-v3.2.jinja new file mode 100644 index 000000000..74fd1e7af --- /dev/null +++ b/models/templates/meetkai-functionary-medium-v3.2.jinja @@ -0,0 +1,287 @@ +{# version=v3.llama3 #}{%- macro append_new_param_info(param_declaration, comment_info, examples_info, depth) -%} + {%- set offset = "" -%} + {%- if depth >= 1 -%} + {%- set offset = " " * depth -%} + {%- endif -%} + {%- if comment_info != "<|NONE|>" -%} + {{ "\n" + offset + comment_info }} + {%- if examples_info | length > 0 -%} + {# Append each example info #} + {%- for example in examples_info -%} + {{ "\n" + offset + "// " + example|string|replace("'", '"') }} + {%- endfor -%} + {%- endif -%} + {%- endif -%} + {{ "\n" + offset + param_declaration }} +{%- endmacro -%} + +{%- macro convert_data_type(param_type) -%} + {%- if param_type == "integer" or param_type == "float" -%} + {{ "number" }} + {%- else -%} + {{ param_type }} + {%- endif -%} +{%- endmacro -%} + +{%- macro get_param_type(param) -%} + {%- set param_type = "any" -%} + + {%- if "type" in param -%} + {%- set raw_param_type = param["type"] -%} + {%- if raw_param_type is iterable and raw_param_type is not string -%} + {%- set param_type = raw_param_type | join(" | ") -%} + {%- else -%} + {%- set param_type = raw_param_type -%} + {%- endif -%} + {{ convert_data_type(param_type) }} + {%- elif "oneOf" in param -%} + {%- set one_of_types = param["oneOf"]|selectattr("type", "defined")|list -%} + {%- set one_of_types = one_of_types|map(attribute="type")|unique|list -%} + {{ convert_data_type(one_of_types | join(" | ")) }} + {%- endif -%} +{%- endmacro -%} + +{%- macro get_format_param(param) -%} + {%- if "format" in param -%} + {{ param["format"] }} + {%- elif "oneOf" in param -%} + {%- set formats = [] -%} + {%- for item in param["oneOf"] -%} + {%- if "format" in item -%} + {%- if item["format"] == param["oneOf"][-1]["format"] -%} + {{ item["format"] }} + {%- else -%} + {{ item["format"] + " or "}} + {%- endif -%} + {%- endif -%} + {%- endfor -%} + {%- else -%} + {{ "<|NONE|>" }} + {%- endif -%} +{%- endmacro -%} + +{%- macro get_param_info(param) -%} + {%- set param_type = param.get("type", "any") -%} + {%- set format_param = get_format_param(param) -%} + + {%- if "description" in param or "default" in param or format_param != "<|NONE|>" or param["maximum"] or param["minimum"] or param["maxLength"] or param["minLength"] -%} + {{ "//" }} + {%- if "description" in param -%} + {%- set desc = param["description"] -%} + {%- if not desc.endswith(".") -%} + {%- set desc = desc + "." -%} + {%- endif -%} + {{ " " + desc }} + {%- endif -%} + + {%- if "default" in param -%} + {%- set default_value = param["default"] -%} + {%- if param_type == "string" -%} + {%- set default_value = '"' ~ default_value ~ '"' -%} + {%- endif -%} + {{ " Default=" ~ default_value ~ "." }} + {%- endif -%} + + {%- set format_param = get_format_param(param) -%} + {%- if format_param != "<|NONE|>" -%} + {{ " Format=" ~ format_param }} + {%- endif -%} + + {%- for field, field_name in [("maximum", "Maximum"), ("minimum", "Minimum"), ("maxLength", "Maximum length"), ("minLength", "Minimum length")] -%} + {%- if field in param -%} + {{ " " + field_name ~ "=" ~ param[field] }} + {%- endif -%} + {%- endfor -%} + {%- else -%} + {{ "<|NONE|>"}} + {%- endif -%} +{%- endmacro -%} + +{%- macro get_enum_option_str(enum_options) -%} + {%- for v in enum_options -%} + {%- if v is string -%} + {{ '"' + v + '"' }} + {%- else -%} + {{ v }} + {%- endif -%} + {%- if enum_options|length > 0 and v != enum_options[-1] -%} + {{ " | " }} + {%- endif -%} + {%- endfor -%} +{%- endmacro -%} + +{%- macro get_array_typescript(param_name, param_dic, depth) -%} + {%- set offset = '' -%} + {%- if depth >= 1 -%} + {%- set offset = " " * depth -%} + {%- endif -%} + {%- set items_info = param_dic.get('items', {}) -%} + + {%- if items_info|length == 0 -%} + {%- if param_name -%} + {{ "\n" + offset + param_name + ": []" }} + {%- else -%} + {{ "\n" + offset + "[]" }} + {%- endif -%} + {%- else -%} + {%- set array_type = get_param_type(items_info) -%} + {%- if array_type == 'object' -%} + {%- if param_name -%} + {{ "\n" + offset + param_name + ": {" }} + {%- else -%} + {{ "\n" + offset + "{" }} + {%- endif -%} + {{ get_parameter_typescript(items_info.get('properties', {}), items_info.get('required', []), depth + 1) -}} + {{- "\n" + offset + "}[]" }} + {%- elif array_type == 'array' -%} + {%- set item_info = get_array_typescript(None, items_info, depth + 1) -%} + {%- if not param_name -%} + {{ "\n" + item_info + "[]" }} + {%- else -%} + {{ "\n" + offset + param_name + ": " + item_info|trim + "[]" }} + {%- endif -%} + {%- else -%} + {%- if 'enum' in items_info -%} + {%- set item_type = get_enum_option_str(items_info['enum']) -%} + {%- if param_name is none -%} + {{ "(" + item_type + ")[]"}} + {%- else -%} + {{ "\n" + offset + param_name + ": (" + item_type + ")[]" }} + {%- endif -%} + {%- else -%} + {%- if param_name is none -%} + {{ "\n" + array_type + "[]" }} + {%- else -%} + {{ "\n" + offset + param_name + ": " + array_type + "[]," }} + {%- endif -%} + {%- endif -%} + {%- endif -%} + {%- endif -%} +{%- endmacro -%} + +{%- macro get_parameter_typescript(properties, required_params, depth=0) -%} + {%- set res = "" -%} + {%- for param_name, param in properties.items() -%} + {%- if param is mapping -%} + {%- set comment_info = get_param_info(param) -%} + {# Param Examples #} + {%- set examples_info = [] -%} + {%- if "examples" in param -%} + {%- set examples_info = ["Example " + param_name + ":"] -%} + {%- set examples_info = examples_info + param["examples"] -%} + {%- endif -%} + + {# Param Name declaration #} + {%- set param_declaration = param_name -%} + {%- if required_params is iterable and param_name not in required_params -%} + {%- set param_declaration = param_declaration + "?" -%} + {%- endif -%} + + {%- set param_type = get_param_type(param) -%} + + {# Handle indentation based on depth #} + {%- set offset = "" -%} + {%- if depth >= 1 -%} + {%- set offset = " " * depth -%} + {%- endif -%} + + {%- if param_type == "object" -%} + {%- if comment_info != "<|NONE|>" -%} + {{ "\n" + offset + comment_info }} + {%- endif -%} + {%- if examples_info|length > 0 -%} + {%- for example in examples_info -%} + {{ "\n" + offset + "// " + example|string|replace("'", '"') }} + {%- endfor -%} + {%- endif -%} + {%- set param_declaration = param_declaration + ": {" -%} + {{ "\n" + offset + param_declaration -}} + {{- get_parameter_typescript(param.get("properties", {}), param.get("required", []), depth + 1) -}} + {{- "\n" + offset + "}," }} + {%- elif param_type == "array" -%} + {%- set item_info = param.get("items", {}) -%} + {%- if "type" not in item_info -%} + {%- set param_declaration = param_declaration + ": []," -%} + {{ append_new_param_info(param_declaration, comment_info, examples_info, depth) }} + {%- else -%} + {%- if comment_info != "<|NONE|>" -%} + {{ "\n" + offset + comment_info }} + {%- endif -%} + {%- if examples_info|length > 0 -%} + {%- for example in examples_info -%} + {{ "\n" + offset + "// " + example|string|replace("'", '"') }} + {%- endfor -%} + {%- endif -%} + {%- set array_declaration = get_array_typescript(param_declaration, param, depth) -%} + {%- if not array_declaration.endswith(",") -%} + {%- set array_declaration = array_declaration + "," -%} + {%- endif -%} + {{ array_declaration}} + {%- endif -%} + {%- else -%} + {%- if "enum" in param -%} + {%- set param_type = get_enum_option_str(param["enum"]) -%} + {%- endif -%} + {%- if "nullable" in param and param["nullable"] -%} + {%- set param_type = param_type + " | null" -%} + {%- endif -%} + {%- set param_declaration = param_declaration + ": " + param_type + "," -%} + {{ append_new_param_info(param_declaration, comment_info, examples_info, depth) }} + {%- endif -%} + {%- endif -%} + {%- endfor -%} +{%- endmacro -%} + +{%- macro generate_schema_from_functions(functions, namespace='functions') -%} + {{ "// Supported function definitions that should be called when necessary.\n" -}} + {{- "namespace " + namespace + " {\n\n" -}} + + {%- for function in functions -%} + {%- if function.get("function") -%} + {%- set function = function.get("function") -%} + {%- endif -%} + + {%- set function_name = function.get("name") -%} + {%- if function_name -%} + {%- set description = function.get('description', '') -%} + {%- set parameters = function.get('parameters', {}) -%} + {{- "// " + description + "\n" -}} + {{- "type " + function_name -}} + {%- if parameters and parameters.get("properties") -%} + {{- " = (_: {" -}} + {%- set required_params = parameters.get("required", []) -%} + {{ get_parameter_typescript(parameters.get("properties"), required_params, 0) -}} + {{- "\n}) => any;\n\n" }} + {%- else -%} + {{ " = () => any;\n\n" }} + {%- endif -%} + {%- endif -%} + {%- endfor -%} + {{ "} // namespace " + namespace }} +{%- endmacro -%} +{%- if not tools -%} + {%- set tools = [] -%} +{%- endif -%} +{{ bos_token + '<|start_header_id|>system<|end_header_id|>\n\nYou are capable of executing available function(s) if required.\nOnly execute function(s) when absolutely necessary.\nAsk for the required input to:recipient==all\nUse JSON for function arguments.\nRespond in this format:\n>>>${recipient}\n${content}\nAvailable functions:\n' + generate_schema_from_functions(tools) + '<|eot_id|>' -}} +{%- if tools|length > 0 and tools|selectattr("type", "equalto", "code_interpreter")|list|length > 0 -%} + {{ '<|start_header_id|>system<|end_header_id|>\n\nWhen you send a message containing Python code to python, it will be executed in a stateful Jupyter notebook environment. python will respond with the output of the execution or time out after 60.0 seconds. The drive at \'/mnt/data\' can be used to save and persist user files.<|eot_id|>' }} +{%- endif -%} +{%- for message in messages -%} + {%- if message['role'] == 'user' or message['role'] == 'system' -%} + {{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }} + {%- elif message['role'] == 'tool' -%} + {{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }} + {%- else -%} + {{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'}} + {%- if message['content'] -%} + {{ '>>>all\n' + message['content'] }} + {%- endif -%} + {%- if 'tool_calls' in message and message['tool_calls'] -%} + {%- for tool_call in message['tool_calls'] -%} + {{ '>>>' + tool_call['function']['name'] + '\n' + tool_call['function']['arguments'] }} + {%- endfor -%} + {%- endif -%} + {{ '<|eot_id|>' }} + {%- endif -%} +{%- endfor -%} +{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n>>>' }}{% endif %} \ No newline at end of file diff --git a/models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja b/models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja new file mode 100644 index 000000000..33089ace1 --- /dev/null +++ b/models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja b/models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja new file mode 100644 index 000000000..1bad6a0f6 --- /dev/null +++ b/models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja b/models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja new file mode 100644 index 000000000..33089ace1 --- /dev/null +++ b/models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/models/templates/microsoft-Phi-3.5-mini-instruct.jinja b/models/templates/microsoft-Phi-3.5-mini-instruct.jinja new file mode 100644 index 000000000..d1533d152 --- /dev/null +++ b/models/templates/microsoft-Phi-3.5-mini-instruct.jinja @@ -0,0 +1,8 @@ +{% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|> +' + message['content'] + '<|end|> +'}}{% elif message['role'] == 'user' %}{{'<|user|> +' + message['content'] + '<|end|> +'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|> +' + message['content'] + '<|end|> +'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|> +' }}{% else %}{{ eos_token }}{% endif %} \ No newline at end of file diff --git a/models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja b/models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja new file mode 100644 index 000000000..9c21a3f13 --- /dev/null +++ b/models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja @@ -0,0 +1,87 @@ +{%- if messages[0]["role"] == "system" %} + {%- set system_message = messages[0]["content"] %} + {%- set loop_messages = messages[1:] %} +{%- else %} + {%- set loop_messages = messages %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} +{%- set user_messages = loop_messages | selectattr("role", "equalto", "user") | list %} + +{#- This block checks for alternating user/assistant messages, skipping tool calling messages #} +{%- set ns = namespace() %} +{%- set ns.index = 0 %} +{%- for message in loop_messages %} + {%- if not (message.role == "tool" or message.role == "tool_results" or (message.tool_calls is defined and message.tool_calls is not none)) %} + {%- if (message["role"] == "user") != (ns.index % 2 == 0) %} + {{- raise_exception("After the optional system message, conversation roles must alternate user/assistant/user/assistant/...") }} + {%- endif %} + {%- set ns.index = ns.index + 1 %} + {%- endif %} +{%- endfor %} + +{{- bos_token }} +{%- for message in loop_messages %} + {%- if message["role"] == "user" %} + {%- if tools is not none and (message == user_messages[-1]) %} + {{- "[AVAILABLE_TOOLS][" }} + {%- for tool in tools %} + {%- set tool = tool.function %} + {{- '{"type": "function", "function": {' }} + {%- for key, val in tool.items() if key != "return" %} + {%- if val is string %} + {{- '"' + key + '": "' + val + '"' }} + {%- else %} + {{- '"' + key + '": ' + val|tojson }} + {%- endif %} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- "}}" }} + {%- if not loop.last %} + {{- ", " }} + {%- else %} + {{- "]" }} + {%- endif %} + {%- endfor %} + {{- "[/AVAILABLE_TOOLS]" }} + {%- endif %} + {%- if loop.last and system_message is defined %} + {{- "[INST]" + system_message + "\n\n" + message["content"] + "[/INST]" }} + {%- else %} + {{- "[INST]" + message["content"] + "[/INST]" }} + {%- endif %} + {%- elif (message.tool_calls is defined and message.tool_calls is not none) %} + {{- "[TOOL_CALLS][" }} + {%- for tool_call in message.tool_calls %} + {%- set out = tool_call.function|tojson %} + {{- out[:-1] }} + {%- if not tool_call.id is defined or tool_call.id|length != 9 %} + {{- raise_exception("Tool call IDs should be alphanumeric strings with length 9!") }} + {%- endif %} + {{- ', "id": "' + tool_call.id + '"}' }} + {%- if not loop.last %} + {{- ", " }} + {%- else %} + {{- "]" + eos_token }} + {%- endif %} + {%- endfor %} + {%- elif message["role"] == "assistant" %} + {{- message["content"] + eos_token}} + {%- elif message["role"] == "tool_results" or message["role"] == "tool" %} + {%- if message.content is defined and message.content.content is defined %} + {%- set content = message.content.content %} + {%- else %} + {%- set content = message.content %} + {%- endif %} + {{- '[TOOL_RESULTS]{"content": ' + content|string + ", " }} + {%- if not message.tool_call_id is defined or message.tool_call_id|length != 9 %} + {{- raise_exception("Tool call IDs should be alphanumeric strings with length 9!") }} + {%- endif %} + {{- '"call_id": "' + message.tool_call_id + '"}[/TOOL_RESULTS]' }} + {%- else %} + {{- raise_exception("Only user and assistant roles are supported, with the exception of an initial optional system message!") }} + {%- endif %} +{%- endfor %} diff --git a/scripts/fetch_server_test_models.py b/scripts/fetch_server_test_models.py new file mode 100755 index 000000000..05690b138 --- /dev/null +++ b/scripts/fetch_server_test_models.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python +''' + This script fetches all the models used in the server tests. + + This is useful for slow tests that use larger models, to avoid them timing out on the model downloads. + + It is meant to be run from the root of the repository. + + Example: + python scripts/fetch_server_test_models.py + ( cd examples/server/tests && ./tests.sh -v -x -m slow ) +''' +import ast +import glob +import logging +import os +from typing import Generator +from pydantic import BaseModel +from typing import Optional +import subprocess + + +class HuggingFaceModel(BaseModel): + hf_repo: str + hf_file: Optional[str] = None + + class Config: + frozen = True + + +def collect_hf_model_test_parameters(test_file) -> Generator[HuggingFaceModel, None, None]: + try: + with open(test_file) as f: + tree = ast.parse(f.read()) + except Exception as e: + logging.error(f'collect_hf_model_test_parameters failed on {test_file}: {e}') + return + + for node in ast.walk(tree): + if isinstance(node, ast.FunctionDef): + for dec in node.decorator_list: + if isinstance(dec, ast.Call) and isinstance(dec.func, ast.Attribute) and dec.func.attr == 'parametrize': + param_names = ast.literal_eval(dec.args[0]).split(",") + if "hf_repo" not in param_names: + continue + + raw_param_values = dec.args[1] + if not isinstance(raw_param_values, ast.List): + logging.warning(f'Skipping non-list parametrize entry at {test_file}:{node.lineno}') + continue + + hf_repo_idx = param_names.index("hf_repo") + hf_file_idx = param_names.index("hf_file") if "hf_file" in param_names else None + + for t in raw_param_values.elts: + if not isinstance(t, ast.Tuple): + logging.warning(f'Skipping non-tuple parametrize entry at {test_file}:{node.lineno}') + continue + yield HuggingFaceModel( + hf_repo=ast.literal_eval(t.elts[hf_repo_idx]), + hf_file=ast.literal_eval(t.elts[hf_file_idx]) if hf_file_idx is not None else None) + + +if __name__ == '__main__': + logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') + + models = sorted(list(set([ + model + for test_file in glob.glob('examples/server/tests/unit/test_*.py') + for model in collect_hf_model_test_parameters(test_file) + ])), key=lambda m: (m.hf_repo, m.hf_file)) + + logging.info(f'Found {len(models)} models in parameterized tests:') + for m in models: + logging.info(f' - {m.hf_repo} / {m.hf_file}') + + cli_path = os.environ.get( + 'LLAMA_SERVER_BIN_PATH', + os.path.join( + os.path.dirname(__file__), + '../build/bin/Release/llama-cli.exe' if os.name == 'nt' else '../build/bin/llama-cli')) + + for m in models: + if '<' in m.hf_repo or (m.hf_file is not None and '<' in m.hf_file): + continue + if m.hf_file is not None and '-of-' in m.hf_file: + logging.warning(f'Skipping model at {m.hf_repo} / {m.hf_file} because it is a split file') + continue + logging.info(f'Using llama-cli to ensure model {m.hf_repo}/{m.hf_file} was fetched') + cmd = [ + cli_path, + '-hfr', m.hf_repo, + *([] if m.hf_file is None else ['-hff', m.hf_file]), + '-n', '1', + '-p', 'Hey', + '--no-warmup', + '--log-disable', + '-no-cnv'] + if m.hf_file != 'tinyllamas/stories260K.gguf' and 'Mistral-Nemo' not in m.hf_repo: + cmd.append('-fa') + try: + subprocess.check_call(cmd) + except subprocess.CalledProcessError: + logging.error(f'Failed to fetch model at {m.hf_repo} / {m.hf_file} with command:\n {" ".join(cmd)}') + exit(1) diff --git a/scripts/get_hf_chat_template.py b/scripts/get_chat_template.py old mode 100755 new mode 100644 similarity index 86% rename from scripts/get_hf_chat_template.py rename to scripts/get_chat_template.py index 23bb1de59..e8982d11a --- a/scripts/get_hf_chat_template.py +++ b/scripts/get_chat_template.py @@ -4,12 +4,12 @@ If a model has multiple chat templates, you can specify the variant name. Syntax: - ./scripts/get_hf_chat_template.py model_id [variant] + ./scripts/get_chat_template.py model_id [variant] Examples: - ./scripts/get_hf_chat_template.py NousResearch/Meta-Llama-3-8B-Instruct - ./scripts/get_hf_chat_template.py NousResearch/Hermes-3-Llama-3.1-8B tool_use - ./scripts/get_hf_chat_template.py meta-llama/Llama-3.2-3B-Instruct + ./scripts/get_chat_template.py NousResearch/Meta-Llama-3-8B-Instruct + ./scripts/get_chat_template.py NousResearch/Hermes-3-Llama-3.1-8B tool_use + ./scripts/get_chat_template.py meta-llama/Llama-3.2-3B-Instruct ''' import json @@ -17,7 +17,7 @@ import re import sys -def get_hf_chat_template(model_id, variant=None): +def get_chat_template(model_id, variant=None): try: # Use huggingface_hub library if available. # Allows access to gated models if the user has access and ran `huggingface-cli login`. @@ -69,7 +69,7 @@ def main(args): model_id = args[0] variant = None if len(args) < 2 else args[1] - template = get_hf_chat_template(model_id, variant) + template = get_chat_template(model_id, variant) sys.stdout.write(template) diff --git a/src/llama-grammar.cpp b/src/llama-grammar.cpp index bebe4e9a3..6be5cbe0e 100644 --- a/src/llama-grammar.cpp +++ b/src/llama-grammar.cpp @@ -560,7 +560,7 @@ bool llama_grammar_parser::parse(const char * src) { } } } catch (const std::exception & err) { - fprintf(stderr, "%s: error parsing grammar: %s\n", __func__, err.what()); + fprintf(stderr, "%s: error parsing grammar: %s\n\n%s\n", __func__, err.what(), src); rules.clear(); return false; } @@ -960,10 +960,28 @@ struct llama_grammar * llama_grammar_init_impl( // Important: vec_rules has to be moved here, not copied, because stacks contains // pointers to elements of vec_rules. If vec_rules were copied into llama_grammar // then the pointers would be invalidated when the local vec_rules goes out of scope. - return new llama_grammar { vocab, std::move(vec_rules), std::move(stacks), {}, }; + return new llama_grammar { + vocab, + std::move(vec_rules), + std::move(stacks), + /* .partial_utf8 = */ {}, + /* .lazy =*/ false, + /* .awaiting_trigger = */ false, + /* .trigger_buffer = */ "", + /* .trigger_tokens = */ {}, + /* .trigger_words = */ {}, + }; } -struct llama_grammar * llama_grammar_init_impl(const struct llama_vocab * vocab, const char * grammar_str, const char * grammar_root) { +struct llama_grammar * llama_grammar_init_impl( + const struct llama_vocab * vocab, + const char * grammar_str, + const char * grammar_root, + bool lazy, + const char ** trigger_words, + size_t num_trigger_words, + const llama_token * trigger_tokens, + size_t num_trigger_tokens) { llama_grammar_parser parser; // if there is a grammar, parse it @@ -1035,10 +1053,31 @@ struct llama_grammar * llama_grammar_init_impl(const struct llama_vocab * vocab, } } while (true); + std::vector vec_trigger_tokens; + std::vector vec_trigger_words; + for (size_t i = 0; i < num_trigger_tokens; i++) { + GGML_ASSERT(trigger_tokens != nullptr); + vec_trigger_tokens.push_back(trigger_tokens[i]); + } + for (size_t i = 0; i < num_trigger_words; i++) { + GGML_ASSERT(trigger_words != nullptr); + vec_trigger_words.push_back(trigger_words[i]); + } + // Important: vec_rules has to be moved here, not copied, because stacks contains // pointers to elements of vec_rules. If vec_rules were copied into llama_grammar // then the pointers would be invalidated when the local vec_rules goes out of scope. - return new llama_grammar { vocab, std::move(vec_rules), std::move(stacks), {}, }; + return new llama_grammar { + vocab, + std::move(vec_rules), + std::move(stacks), + /* .partial_utf8 = */ {}, + /* .lazy = */ lazy, + /* .awaiting_trigger = */ lazy, + /* .trigger_buffer = */ "", + std::move(vec_trigger_tokens), + std::move(vec_trigger_words), + }; } void llama_grammar_free_impl(struct llama_grammar * grammar) { @@ -1055,6 +1094,11 @@ struct llama_grammar * llama_grammar_clone_impl(const struct llama_grammar & gra grammar.rules, grammar.stacks, grammar.partial_utf8, + grammar.lazy, + grammar.awaiting_trigger, + grammar.trigger_buffer, + grammar.trigger_tokens, + grammar.trigger_words, }; // redirect elements in stacks to point to new rules @@ -1076,6 +1120,10 @@ struct llama_grammar * llama_grammar_clone_impl(const struct llama_grammar & gra void llama_grammar_apply_impl(const struct llama_grammar & grammar, llama_token_data_array * cur_p) { GGML_ASSERT(grammar.vocab != nullptr); + if (grammar.awaiting_trigger) { + return; + } + bool allow_eog = false; for (const auto & stack : grammar.stacks) { if (stack.empty()) { @@ -1115,6 +1163,34 @@ void llama_grammar_apply_impl(const struct llama_grammar & grammar, llama_token_ void llama_grammar_accept_impl(struct llama_grammar & grammar, llama_token token) { GGML_ASSERT(grammar.vocab != nullptr); + const auto & piece = grammar.vocab->token_to_piece(token); + + if (grammar.awaiting_trigger) { + if (std::find(grammar.trigger_tokens.begin(), grammar.trigger_tokens.end(), token) != grammar.trigger_tokens.end()) { + grammar.awaiting_trigger = false; + grammar.trigger_buffer.clear(); + llama_grammar_accept_str(grammar, piece); + LLAMA_LOG_DEBUG("Grammar triggered on token %u (`%s`)", token, piece.c_str()); + return; + } else { + // TODO: consider a smarter incremental substring search algorithm (store last position to search from). + grammar.trigger_buffer += piece; + for (const auto & word : grammar.trigger_words) { + auto pos = grammar.trigger_buffer.find(word); + if (pos != std::string::npos) { + grammar.awaiting_trigger = false; + auto constrained_str = grammar.trigger_buffer.substr(pos); + grammar.trigger_buffer.clear(); + llama_grammar_accept_str(grammar, constrained_str); + LLAMA_LOG_DEBUG("Grammar triggered on word `%s`", word.c_str()); + return; + } + } + LLAMA_LOG_DEBUG("Grammar still awaiting trigger after token %d (`%s`) (buffer: `%s`)\n", token, piece.c_str(), grammar.trigger_buffer.c_str()); + return; + } + } + if (grammar.vocab->is_eog(token)) { for (const auto & stack : grammar.stacks) { if (stack.empty()) { @@ -1124,8 +1200,10 @@ void llama_grammar_accept_impl(struct llama_grammar & grammar, llama_token token GGML_ABORT("fatal error"); } - const std::string & piece = grammar.vocab->token_to_piece(token); + llama_grammar_accept_str(grammar, piece); +} +void llama_grammar_accept_str(struct llama_grammar & grammar, const std::string & piece) { // Note terminating 0 in decoded string const auto decoded = decode_utf8(piece, grammar.partial_utf8); const auto & code_points = decoded.first; diff --git a/src/llama-grammar.h b/src/llama-grammar.h index f8b40c651..252d54d4c 100644 --- a/src/llama-grammar.h +++ b/src/llama-grammar.h @@ -114,6 +114,15 @@ struct llama_grammar { // buffer for partially generated UTF-8 sequence from accepted tokens llama_partial_utf8 partial_utf8; + + // lazy grammars wait for trigger words or tokens before constraining the sampling. + // we still ahve trigger_tokens for non-lazy grammars to force printing of special trigger tokens. + // (useful e.g. for tool_choice=required) + bool lazy = false; + bool awaiting_trigger = false; // Initialized to true for lazy grammars only + std::string trigger_buffer; // Output buffered by lazy grammar. Will be cleared once trigger is found. + std::vector trigger_tokens; // Tokens that trigger a lazy grammar, or tokens to force printing of (even if special). + std::vector trigger_words; }; // @@ -127,7 +136,15 @@ struct llama_grammar * llama_grammar_init_impl( size_t n_rules, size_t start_rule_index); -struct llama_grammar * llama_grammar_init_impl(const struct llama_vocab * vocab, const char * grammar_str, const char * grammar_root); +struct llama_grammar * llama_grammar_init_impl( + const struct llama_vocab * vocab, + const char * grammar_str, + const char * grammar_root, + bool lazy, + const char ** trigger_words, + size_t num_trigger_words, + const llama_token * trigger_tokens, + size_t num_trigger_tokens); void llama_grammar_free_impl(struct llama_grammar * grammar); @@ -141,3 +158,7 @@ void llama_grammar_apply_impl( void llama_grammar_accept_impl( struct llama_grammar & grammar, llama_token token); + +void llama_grammar_accept_str( + struct llama_grammar & grammar, + const std::string & piece); diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp index b3a12386e..26974f539 100644 --- a/src/llama-sampling.cpp +++ b/src/llama-sampling.cpp @@ -1433,13 +1433,30 @@ static void llama_sampler_grammar_apply(struct llama_sampler * smpl, llama_token } } +// Fwd declare to break reset --> init_impl --> llama_sampler_grammar_i --> reset cycle. +static struct llama_sampler * llama_sampler_init_grammar_impl( + const struct llama_vocab * vocab, + const char * grammar_str, + const char * grammar_root, + bool lazy, + const char ** trigger_words, + size_t num_trigger_words, + const llama_token * trigger_tokens, + size_t num_trigger_tokens); + static void llama_sampler_grammar_reset(struct llama_sampler * smpl) { auto * ctx = (llama_sampler_grammar *) smpl->ctx; if (!ctx->grammar) { return; } - auto * grammar_new = llama_grammar_init_impl(ctx->grammar->vocab, ctx->grammar_str.c_str(), ctx->grammar_root.c_str()); + std::vector trigger_words; + for (auto & word : ctx->grammar->trigger_words) { + trigger_words.push_back(word.c_str()); + } + auto * grammar_new = llama_grammar_init_impl(ctx->grammar->vocab, ctx->grammar_str.c_str(), ctx->grammar_root.c_str(), + ctx->grammar->lazy, trigger_words.data(), trigger_words.size(), + ctx->grammar->trigger_tokens.data(), ctx->grammar->trigger_tokens.size()); llama_grammar_free_impl(ctx->grammar); ctx->grammar = grammar_new; @@ -1448,7 +1465,7 @@ static void llama_sampler_grammar_reset(struct llama_sampler * smpl) { static struct llama_sampler * llama_sampler_grammar_clone(const struct llama_sampler * smpl) { const auto * ctx = (const llama_sampler_grammar *) smpl->ctx; - auto * result = llama_sampler_init_grammar(ctx->vocab, nullptr, nullptr); + auto * result = llama_sampler_init_grammar_impl(ctx->vocab, nullptr, nullptr, false, nullptr, 0, nullptr, 0); // copy the state { @@ -1484,7 +1501,15 @@ static struct llama_sampler_i llama_sampler_grammar_i = { /* .free = */ llama_sampler_grammar_free, }; -struct llama_sampler * llama_sampler_init_grammar(const struct llama_vocab * vocab, const char * grammar_str, const char * grammar_root) { +static struct llama_sampler * llama_sampler_init_grammar_impl( + const struct llama_vocab * vocab, + const char * grammar_str, + const char * grammar_root, + bool lazy, + const char ** trigger_words, + size_t num_trigger_words, + const llama_token * trigger_tokens, + size_t num_trigger_tokens) { auto * ctx = new llama_sampler_grammar; if (grammar_str != nullptr && grammar_str[0] != '\0') { @@ -1492,7 +1517,7 @@ struct llama_sampler * llama_sampler_init_grammar(const struct llama_vocab * voc /* .vocab = */ vocab, /* .grammar_str = */ grammar_str, /* .grammar_root = */ grammar_root, - /* .grammar = */ llama_grammar_init_impl(vocab, grammar_str, grammar_root), + /* .grammar = */ llama_grammar_init_impl(vocab, grammar_str, grammar_root, lazy, trigger_words, num_trigger_words, trigger_tokens, num_trigger_tokens), }; } else { *ctx = { @@ -1509,6 +1534,24 @@ struct llama_sampler * llama_sampler_init_grammar(const struct llama_vocab * voc }; } +struct llama_sampler * llama_sampler_init_grammar( + const struct llama_vocab * vocab, + const char * grammar_str, + const char * grammar_root) { + return llama_sampler_init_grammar_impl(vocab, grammar_str, grammar_root, /* lazy= */ false, nullptr, 0, nullptr, 0); +} + +struct llama_sampler * llama_sampler_init_grammar_lazy( + const struct llama_vocab * vocab, + const char * grammar_str, + const char * grammar_root, + const char ** trigger_words, + size_t num_trigger_words, + const llama_token * trigger_tokens, + size_t num_trigger_tokens) { + return llama_sampler_init_grammar_impl(vocab, grammar_str, grammar_root, /* lazy= */ true, trigger_words, num_trigger_words, trigger_tokens, num_trigger_tokens); +} + // penalties struct llama_sampler_penalties { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 3fa43c295..40f83ff0d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -93,6 +93,7 @@ if (NOT WIN32) llama_target_and_test(test-grammar-parser.cpp) llama_target_and_test(test-grammar-integration.cpp) llama_target_and_test(test-llama-grammar.cpp) + llama_target_and_test(test-chat.cpp) # TODO: disabled on loongarch64 because the ggml-ci node lacks Python 3.8 if (NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64") llama_target_and_test(test-json-schema-to-grammar.cpp WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..) diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp index 190643136..4563f9dcb 100644 --- a/tests/test-chat-template.cpp +++ b/tests/test-chat-template.cpp @@ -328,7 +328,7 @@ int main(void) { // test llama_chat_format_single for system message printf("\n\n=== llama_chat_format_single (system message) ===\n\n"); std::vector chat2; - common_chat_msg sys_msg{"system", "You are a helpful assistant"}; + common_chat_msg sys_msg{"system", "You are a helpful assistant", {}}; auto fmt_sys = [&](std::string tmpl_str) { minja::chat_template tmpl(tmpl_str, "", ""); @@ -352,10 +352,10 @@ int main(void) { // test llama_chat_format_single for user message printf("\n\n=== llama_chat_format_single (user message) ===\n\n"); - chat2.push_back({"system", "You are a helpful assistant"}); - chat2.push_back({"user", "Hello"}); - chat2.push_back({"assistant", "I am assistant"}); - common_chat_msg new_msg{"user", "How are you"}; + chat2.push_back({"system", "You are a helpful assistant", {}}); + chat2.push_back({"user", "Hello", {}}); + chat2.push_back({"assistant", "I am assistant", {}}); + common_chat_msg new_msg{"user", "How are you", {}}; auto fmt_single = [&](std::string tmpl_str) { minja::chat_template tmpl(tmpl_str, "", ""); diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp new file mode 100644 index 000000000..ccc65d87a --- /dev/null +++ b/tests/test-chat.cpp @@ -0,0 +1,521 @@ +// Tests chat handling, including grammar generation and parsing for tool calling, for various templates. +// +// Also acts as a CLI to generate a Markdown summary of the formats of Jinja templates, +// e.g. given Minja (http://github.com/google/minja) checked out in parent dir: +// +// cmake -B build && cmake --build build --parallel && ./build/bin/test-chat ../minja/build/tests/*.jinja 2>/dev/null +// +#include +#include +#include +#include + +#include "chat-template.hpp" +#include "chat.hpp" +#include "llama-grammar.h" +#include "unicode.h" + +using json = nlohmann::ordered_json; + +static common_chat_msg msg_from_json(const json & message) { + common_chat_msg ret{ + "assistant", + "", + {}, + }; + if (message.contains("content") && !message.at("content").is_null()) { + ret.content = message.at("content").get(); + } + auto has_tool_calls = message.contains("tool_calls"); + if (has_tool_calls) { + for (const auto & tc : message.at("tool_calls")) { + const auto & arguments = tc.at("function").at("arguments"); + ret.tool_calls.push_back({ + tc.at("function").at("name").get(), + arguments.is_string() ? arguments.get() : arguments.dump(), + tc.contains("id") ? tc.at("id").get() : "", + }); + } + } + return ret; +} + +template static void assert_equals(const T & expected, const T & actual) { + if (expected != actual) { + std::cerr << "Expected: " << expected << std::endl; + std::cerr << "Actual: " << actual << std::endl; + std::cerr << std::flush; + throw std::runtime_error("Test failed"); + } +} + +static std::string read_file(const std::string & path) { + std::cerr << "# Reading: " << path << std::endl << std::flush; + std::ifstream fs(path, std::ios_base::binary); + if (!fs.is_open()) { + fs = std::ifstream("../" + path, std::ios_base::binary); + if (!fs.is_open()) { + throw std::runtime_error("Failed to open file: " + path); + } + } + fs.seekg(0, std::ios_base::end); + auto size = fs.tellg(); + fs.seekg(0); + std::string out; + out.resize(static_cast(size)); + fs.read(&out[0], static_cast(size)); + return out; +} + +static std::unique_ptr build_grammar(const std::string & grammar_str) { + return std::unique_ptr( + llama_grammar_init_impl(nullptr, grammar_str.c_str(), "root", false, nullptr, 0, nullptr, 0)); +} + +// TODO: extract to common helper (copied from test-grammar-integration.cpp) +static bool match_string(const std::string & input, llama_grammar * grammar) { + const auto cpts = unicode_cpts_from_utf8(input); + + auto & stacks_cur = llama_grammar_get_stacks(grammar); + + for (const auto & cpt : cpts) { + llama_grammar_accept(grammar, cpt); + + if (stacks_cur.empty()) { + // no stacks means that the grammar failed to match at this point + return false; + } + } + + for (const auto & stack : stacks_cur) { + if (stack.empty()) { + // An empty stack means that the grammar has been completed + return true; + } + } + + return false; +} + +// Dumps `{"a": 1}` as `"{\"a\": 1}"`, unlike nlohmann::json::dump which would dump it as `"{\"a\":1}"`. +static std::string dump(const json & j) { + return minja::Value(j).dump(-1, /* to_json= */ true); +} + +static void assert_msg_equals(const common_chat_msg & expected, const common_chat_msg & actual) { + assert_equals(expected.role, actual.role); + assert_equals(expected.content, actual.content); + assert_equals(expected.tool_calls.size(), actual.tool_calls.size()); + for (size_t i = 0; i < expected.tool_calls.size(); i++) { + const auto & expected_tool_call = expected.tool_calls[i]; + const auto & actual_tool_call = actual.tool_calls[i]; + assert_equals(expected_tool_call.name, actual_tool_call.name); + assert_equals(dump(json::parse(expected_tool_call.arguments)), dump(json::parse(actual_tool_call.arguments))); + assert_equals(expected_tool_call.id, actual_tool_call.id); + } +} + +const auto special_function_tool = json::parse(R"({ + "type": "function", + "function": { + "name": "special_function", + "description": "I'm special", + "parameters": { + "type": "object", + "properties": { + "arg1": { + "type": "integer", + "description": "The arg." + } + }, + "required": ["arg1"] + } + } +})"); +const auto python_tool = json::parse(R"({ + "type": "function", + "function": { + "name": "python", + "description": "an ipython interpreter", + "parameters": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "Python code to execute." + } + }, + "required": ["code"] + } + } +})"); +const auto code_interpreter_tool = json::parse(R"({ + "type": "function", + "function": { + "name": "code_interpreter", + "description": "an ipython interpreter", + "parameters": { + "type": "object", + "properties": { + "code": { + "type": "string", + "description": "Python code to execute." + } + }, + "required": ["code"] + } + } +})"); +const json tools = { special_function_tool, python_tool }; +const json llama_3_1_tools = { special_function_tool, code_interpreter_tool }; + +struct delta_data { + std::string delta; + std::string grammar; + common_chat_format format; +}; + +static delta_data init_delta(const common_chat_template & tmpl, const std::vector & end_tokens, + const json & user_message, const json & delta_message, const json & tools, + const json & tool_choice) { + common_chat_inputs inputs; + inputs.parallel_tool_calls = true; + inputs.messages = json::array(); + inputs.messages.push_back(user_message); + inputs.tools = tools; + inputs.tool_choice = tool_choice; + auto params_prefix = common_chat_params_init(tmpl, inputs); + + inputs.messages.push_back(delta_message); + inputs.add_generation_prompt = false; + auto params_full = common_chat_params_init(tmpl, inputs); + + std::string prefix = params_prefix.prompt; + std::string full = params_full.prompt; + + // Check full starts with prefix + if (full.find(prefix) != 0) { + fprintf(stderr, "Full:\n%s\n\nPrefix:\n%s\n\n", full.c_str(), prefix.c_str()); + throw std::runtime_error("Full message does not start with prefix"); + } + + if (full == prefix) { + throw std::runtime_error("Full message is the same as the prefix"); + } + + auto delta = full.substr(prefix.size()); + + // Strip end tokens + for (const auto & end_token : end_tokens) { + // rfind to find the last occurrence + auto pos = delta.rfind(end_token); + if (pos != std::string::npos) { + delta = delta.substr(0, pos); + break; + } + } + return { delta, params_full.grammar, params_full.format }; +} + +/* + Applies the template to 1 user message w/ add_generation_prompt=true, then w/ the test message w/ add_generation_prompt=false, + gets the diff, removes any end tokens and parses the result w/ the grammar, checking that + the parsed message is the same as the test_message +*/ +static void test_template(const common_chat_template & tmpl, const std::vector & end_tokens, + const json & test_message, const json & tools = {}, const std::string & expected_delta = "", + bool skip_grammar_test = false, bool skip_parser_test = false) { + common_chat_msg expected_msg = msg_from_json(test_message); + + auto user_message = json{ + { "role", "user" }, + { "content", "Hello, world!" } + }; + + for (const auto & tool_choice : json({ "auto", "required" })) { + auto data = init_delta(tmpl, end_tokens, user_message, test_message, tools, tool_choice); + if (!expected_delta.empty()) { + assert_equals(expected_delta, data.delta); + } + + if (!skip_parser_test) { + const auto msg = common_chat_parse(data.delta, data.format); + assert_msg_equals(expected_msg, msg); + } + + if (!expected_msg.tool_calls.empty()) { + GGML_ASSERT(!data.grammar.empty()); + } + if (!data.grammar.empty()) { + auto grammar = build_grammar(data.grammar); + if (!grammar) { + throw std::runtime_error("Failed to build grammar"); + } + // TODO: exercice lazy grammars + triggers here, instead of skipping the test + if (!skip_grammar_test) { + if (!match_string(data.delta, grammar.get())) { + throw std::runtime_error("Failed to match delta against grammar:\n\n" + data.delta + + "\n\nGrammar: " + data.grammar); + } + } + } + } +} + +static void test_template_output_parsers() { + auto text_message = json{ + { "role", "assistant" }, + { "content", "Hello, world!" }, + }; + auto tool_call_message = json{ + { "role", "assistant" }, + { "content", {} }, + { "tool_calls", json{ { + { "type", "function" }, + { "function", { { "name", "special_function" }, { "arguments", "{\"arg1\": 1}" } } }, + } } } + }; + auto tool_call_message_with_id = json::parse(tool_call_message.dump()); + tool_call_message_with_id["tool_calls"][0]["id"] = "123456789"; + + auto python_tool_call_message = json{ + { "role", "assistant" }, + { "content", {} }, + { "tool_calls", json{ { + { "type", "function" }, + { "function", + { + { "name", "python" }, + { "arguments", + { + { "code", "print('hey')" }, + } }, + } }, + } } } + }; + auto code_interpreter_tool_call_message = json{ + { "role", "assistant" }, + { "content", {} }, + { "tool_calls", json{ { + { "type", "function" }, + { "function", + { + { "name", "code_interpreter" }, + { "arguments", + { + { "code", "print('hey')" }, + } }, + } }, + } } } + }; + + common_chat_inputs inputs_no_tools; + inputs_no_tools.messages = { + { { "role", "user" }, { "content", "Hey" } } + }; + + common_chat_inputs inputs_tools = inputs_no_tools; + inputs_tools.tools = json::array(); + inputs_tools.tools.push_back(special_function_tool); + + common_chat_inputs inputs_tools_builtin = inputs_no_tools; + inputs_tools_builtin.tools = json::array(); + inputs_tools_builtin.tools.push_back(python_tool); + + { + const common_chat_template tmpl(read_file("models/templates/google-gemma-2-2b-it.jinja"), "", ""); + std::vector end_tokens{ "" }; + + assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_params_init(tmpl, inputs_no_tools).format); + assert_equals(COMMON_CHAT_FORMAT_GENERIC, common_chat_params_init(tmpl, inputs_tools).format); + assert_equals(COMMON_CHAT_FORMAT_GENERIC, + common_chat_params_init( + common_chat_template(read_file("models/templates/microsoft-Phi-3.5-mini-instruct.jinja"), + "", ""), + inputs_tools) + .format); + + // Generic tool calls doesn't generate / parse content-only messages symmetrically. + + assert_msg_equals(msg_from_json(text_message), + common_chat_parse("{\n" + " \"response\": \"Hello, world!\"\n" + "}", + common_chat_params_init(tmpl, inputs_tools).format)); + test_template(tmpl, end_tokens, tool_call_message_with_id, tools, + "{\n" + " \"tool_calls\": [\n" + " {\n" + " \"name\": \"special_function\",\n" + " \"arguments\": {\n" + " \"arg1\": 1\n" + " },\n" + " \"id\": \"123456789\"\n" + " }\n" + " ]\n" + "}"); + } + { + const common_chat_template tmpl(read_file("models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja"), "", + ""); + std::vector end_tokens{ "" }; + + assert_equals(COMMON_CHAT_FORMAT_MISTRAL_NEMO, common_chat_params_init(tmpl, inputs_tools).format); + + test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* skip_grammar_test= */ true); + test_template( + tmpl, end_tokens, tool_call_message_with_id, tools, + "[TOOL_CALLS][{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, \"id\": \"123456789\"}]", + /* skip_grammar_test= */ true); + } + { + const common_chat_template tmpl( + read_file("models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja"), "", ""); + std::vector end_tokens{ "<|im_end|>" }; + + assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_params_init(tmpl, inputs_tools).format); + assert_equals( + COMMON_CHAT_FORMAT_HERMES_2_PRO, + common_chat_params_init( + common_chat_template(read_file("models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja"), + "", ""), + inputs_tools) + .format); + assert_equals( + COMMON_CHAT_FORMAT_HERMES_2_PRO, + common_chat_params_init( + common_chat_template(read_file("models/templates/Qwen-Qwen2.5-7B-Instruct.jinja"), "", ""), + inputs_tools) + .format); + + test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* skip_grammar_test= */ true); + test_template(tmpl, end_tokens, tool_call_message, tools, + "\n" + "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" + ""); + test_template(tmpl, end_tokens, python_tool_call_message, tools, + "\n" + "{\"name\": \"python\", \"arguments\": {\"code\": \"print('hey')\"}}\n" + ""); + } + { + const common_chat_template tmpl(read_file("models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja"), "", + ""); + std::vector end_tokens{ "<|eom_id|>", "<|eot_id|>" }; + + assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_params_init(tmpl, inputs_tools).format); + assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS, + common_chat_params_init(tmpl, inputs_tools_builtin).format); + assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS, + common_chat_params_init( + common_chat_template(read_file("models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja"), + "", ""), + inputs_tools_builtin) + .format); + + // test_template(tmpl, end_tokens, text_message, tools, R"(?)", /* skip_grammar_test= */ true); + test_template(tmpl, end_tokens, code_interpreter_tool_call_message, llama_3_1_tools, + "<|python_tag|>code_interpreter.call(code=\"print('hey')\")"); + test_template(tmpl, end_tokens, python_tool_call_message, tools, + "<|python_tag|>python.call(code=\"print('hey')\")"); + test_template(tmpl, end_tokens, tool_call_message, tools, + "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}"); + } + { + const common_chat_template tmpl(read_file("models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja"), "", + ""); + std::vector end_tokens{ "<|eom_id|>", "<|eot_id|>" }; + + assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_params_init(tmpl, inputs_tools).format); + + test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* skip_grammar_test= */ true); + test_template(tmpl, end_tokens, tool_call_message, tools, + "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}"); + } + { + const common_chat_template tmpl(read_file("models/templates/meetkai-functionary-medium-v3.1.jinja"), "", + ""); + std::vector end_tokens{ "<|eom_id|>", "<|eot_id|>" }; + + assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1, + common_chat_params_init(tmpl, inputs_tools).format); + + test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* skip_grammar_test= */ true); + test_template(tmpl, end_tokens, tool_call_message, tools, + "{\"arg1\": 1}"); + } + { + const common_chat_template tmpl(read_file("models/templates/meetkai-functionary-medium-v3.2.jinja"), "", + ""); + std::vector end_tokens{ "<|eom_id|>", "<|eot_id|>" }; + + assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, common_chat_params_init(tmpl, inputs_no_tools).format); + assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, common_chat_params_init(tmpl, inputs_tools).format); + + test_template(tmpl, end_tokens, text_message, {}, + "all\n" + "Hello, world!", + /* skip_grammar_test= */ true); + test_template(tmpl, end_tokens, tool_call_message, tools, + "special_function\n" + "{\"arg1\": 1}"); + } + { + const common_chat_template tmpl(read_file("models/templates/fireworks-ai-llama-3-firefunction-v2.jinja"), "", + ""); + std::vector end_tokens{ "<|eot_id|>" }; + + assert_equals(COMMON_CHAT_FORMAT_FIREFUNCTION_V2, common_chat_params_init(tmpl, inputs_tools).format); + + test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* skip_grammar_test= */ true); + test_template(tmpl, end_tokens, tool_call_message, tools, + " functools[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]"); + } + { + const common_chat_template tmpl(read_file("models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja"), + "", ""); + std::vector end_tokens{ "<|end▁of▁sentence|>" }; + + assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_params_init(tmpl, inputs_tools).format); + + test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* skip_grammar_test= */ true); + test_template(tmpl, end_tokens, tool_call_message, tools, + "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n" + "```json\n" + "{\"arg1\": 1}\n" + "```<|tool▁call▁end|>"); + } +} + +int main(int argc, char ** argv) { +#ifndef _WIN32 + if (argc > 1) { + common_chat_inputs inputs; + inputs.messages = { + { { "role", "user" }, { "content", "Hey" } } + }; + inputs.tools = json::array({ special_function_tool }); + + std::cout << "| Template | Format |\n"; + std::cout << "|----------|--------|\n"; + + for (int i = 1; i < argc; i++) { + std::string path = argv[i]; + if (path.rfind(".jinja") != path.size() - 6) { + std::cerr << "Skipping non-jinja file: " << path << std::endl; + continue; + } + common_chat_template tmpl(read_file(path), "", ""); + auto parts = string_split(path, "/"); + auto name = parts[parts.size() - 1]; + std::cout << "| " << name << " | " << common_chat_format_name(common_chat_params_init(tmpl, inputs).format) + << " |\n"; + } + } else +#endif + { + test_template_output_parsers(); + std::cout << "\n[chat] All tests passed!" << std::endl; + } + return 0; +} diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp index e1bdbb925..288e08f51 100644 --- a/tests/test-grammar-integration.cpp +++ b/tests/test-grammar-integration.cpp @@ -13,7 +13,7 @@ using json = nlohmann::ordered_json; static llama_grammar * build_grammar(const std::string & grammar_str) { - return llama_grammar_init_impl(nullptr, grammar_str.c_str(), "root"); + return llama_grammar_init_impl(nullptr, grammar_str.c_str(), "root", false, nullptr, 0, nullptr, 0); } static bool test_build_grammar_fails(const std::string & grammar_str) { From 553f1e46e9e864514bbd6bf4009146db66be0541 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Thu, 30 Jan 2025 22:01:06 +0000 Subject: [PATCH 05/46] `ci`: ccache for all github worfklows (#11516) --- .github/workflows/build.yml | 136 ++++++++++++++++++++++++++++++++++-- 1 file changed, 130 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7eaf9c460..c02dd6a81 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -43,6 +43,12 @@ jobs: with: fetch-depth: 0 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: macOS-latest-cmake-arm64 + evict-old-files: 1d + - name: Dependencies id: depends continue-on-error: true @@ -108,6 +114,12 @@ jobs: with: fetch-depth: 0 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: macOS-latest-cmake-x64 + evict-old-files: 1d + - name: Dependencies id: depends continue-on-error: true @@ -172,6 +184,12 @@ jobs: with: fetch-depth: 0 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ubuntu-cpu-cmake + evict-old-files: 1d + - name: Dependencies id: depends run: | @@ -249,6 +267,12 @@ jobs: id: checkout uses: actions/checkout@v4 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ubuntu-latest-cmake-sanitizer-${{ matrix.sanitizer }} + evict-old-files: 1d + - name: Dependencies id: depends run: | @@ -296,6 +320,12 @@ jobs: id: checkout uses: actions/checkout@v4 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ubuntu-latest-cmake-rpc + evict-old-files: 1d + - name: Dependencies id: depends run: | @@ -325,6 +355,12 @@ jobs: id: checkout uses: actions/checkout@v4 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ubuntu-22-cmake-vulkan + evict-old-files: 1d + - name: Dependencies id: depends run: | @@ -364,6 +400,12 @@ jobs: sudo apt-get update sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ubuntu-22-cmake-hip + evict-old-files: 1d + - name: Build with native CMake HIP support id: cmake_build run: | @@ -396,6 +438,12 @@ jobs: apt-get update apt-get install -y build-essential git cmake libcurl4-openssl-dev + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ubuntu-22-cmake-musa + evict-old-files: 1d + - name: Build with native CMake MUSA support id: cmake_build run: | @@ -435,6 +483,12 @@ jobs: id: checkout uses: actions/checkout@v4 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ubuntu-22-cmake-sycl + evict-old-files: 1d + - name: Build id: cmake_build run: | @@ -479,6 +533,12 @@ jobs: id: checkout uses: actions/checkout@v4 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ubuntu-22-cmake-sycl-fp16 + evict-old-files: 1d + - name: Build id: cmake_build run: | @@ -500,6 +560,12 @@ jobs: id: checkout uses: actions/checkout@v4 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: macOS-latest-cmake-ios + evict-old-files: 1d + - name: Dependencies id: depends continue-on-error: true @@ -531,6 +597,12 @@ jobs: id: checkout uses: actions/checkout@v4 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: macOS-latest-cmake-tvos + evict-old-files: 1d + - name: Dependencies id: depends continue-on-error: true @@ -566,6 +638,12 @@ jobs: id: checkout uses: actions/checkout@v4 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: macOS-latest-swift + evict-old-files: 1d + - name: Dependencies id: depends continue-on-error: true @@ -607,6 +685,12 @@ jobs: - name: Clone uses: actions/checkout@v4 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: windows-msys2 + evict-old-files: 1d + - name: Setup ${{ matrix.sys }} uses: msys2/setup-msys2@v2 with: @@ -675,6 +759,12 @@ jobs: with: fetch-depth: 0 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: windows-latest-cmake-${{ matrix.build }} + evict-old-files: 1d + - name: Clone Kompute submodule id: clone_kompute if: ${{ matrix.build == 'kompute-x64' }} @@ -813,6 +903,8 @@ jobs: - name: Clone id: checkout uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Install dependencies env: @@ -821,6 +913,12 @@ jobs: apt update apt install -y cmake build-essential ninja-build libgomp1 git + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ubuntu-latest-cmake-cuda + evict-old-files: 1d + - name: Build with CMake run: | cmake -S . -B build -G Ninja \ @@ -847,6 +945,12 @@ jobs: with: fetch-depth: 0 + - name: Install ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }} + evict-old-files: 1d + - name: Install Cuda Toolkit 11.7 if: ${{ matrix.cuda == '11.7' }} run: | @@ -903,11 +1007,6 @@ jobs: echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 - - name: Install ccache - uses: hendrikmuhs/ccache-action@v1.2 - with: - key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }} - - name: Install Ninja id: install_ninja run: | @@ -987,6 +1086,12 @@ jobs: with: fetch-depth: 0 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: windows-latest-cmake-sycl + evict-old-files: 1d + - name: Install run: | scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL @@ -1066,9 +1171,10 @@ jobs: & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version - name: Install ccache - uses: hendrikmuhs/ccache-action@v1.2 + uses: hendrikmuhs/ccache-action@v1.2.16 with: key: ${{ github.job }} + evict-old-files: 1d - name: Build id: cmake_build @@ -1098,6 +1204,12 @@ jobs: with: fetch-depth: 0 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: windows-latest-cmake-hip-release + evict-old-files: 1d + - name: Install id: depends run: | @@ -1195,6 +1307,12 @@ jobs: - name: Clone uses: actions/checkout@v4 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: android-build + evict-old-files: 1d + - name: Set up JDK uses: actions/setup-java@v3 with: @@ -1232,6 +1350,12 @@ jobs: with: fetch-depth: 0 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: release + evict-old-files: 1d + - name: Determine tag name id: tag shell: bash From a2df2787b32e0846205f7151dfad88ceab592beb Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Fri, 31 Jan 2025 06:04:53 +0100 Subject: [PATCH 06/46] server : update help metrics processing/deferred (#11512) This commit updates the help text for the metrics `requests_processing` and `requests_deferred` to be more grammatically correct. Currently the returned metrics look like this: ```console \# HELP llamacpp:requests_processing Number of request processing. \# TYPE llamacpp:requests_processing gauge llamacpp:requests_processing 0 \# HELP llamacpp:requests_deferred Number of request deferred. \# TYPE llamacpp:requests_deferred gauge llamacpp:requests_deferred 0 ``` With this commit, the metrics will look like this: ```console \# HELP llamacpp:requests_processing Number of requests processing. \# TYPE llamacpp:requests_processing gauge llamacpp:requests_processing 0 \# HELP llamacpp:requests_deferred Number of requests deferred. \# TYPE llamacpp:requests_deferred gauge llamacpp:requests_deferred 0 ``` This is also consistent with the description of the metrics in the server examples [README.md](https://github.com/ggerganov/llama.cpp/tree/master/examples/server#get-metrics-prometheus-compatible-metrics-exporter). --- examples/server/server.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index d1ea343dd..1ebcb5085 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -3633,11 +3633,11 @@ int main(int argc, char ** argv) { {"value", (uint64_t) res_metrics->kv_cache_tokens_count} },{ {"name", "requests_processing"}, - {"help", "Number of request processing."}, + {"help", "Number of requests processing."}, {"value", (uint64_t) res_metrics->n_processing_slots} },{ {"name", "requests_deferred"}, - {"help", "Number of request deferred."}, + {"help", "Number of requests deferred."}, {"value", (uint64_t) res_metrics->n_tasks_deferred} }}} }; From 1bd3047a939e561adfb3c7dd2e17c4cc7a4e4e6f Mon Sep 17 00:00:00 2001 From: Steve Grubb Date: Fri, 31 Jan 2025 00:58:55 -0500 Subject: [PATCH 07/46] common: Add missing va_end (#11529) The va_copy man page states that va_end must be called to revert whatever the copy did. For some implementaions, not calling va_end has no consequences. For others it could leak memory. --- common/log.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/common/log.cpp b/common/log.cpp index 04c7c0ed1..0b8994ae1 100644 --- a/common/log.cpp +++ b/common/log.cpp @@ -206,6 +206,7 @@ public: vsnprintf(entry.msg.data(), entry.msg.size(), ss.str().c_str(), args_copy); } #endif + va_end(args_copy); } entry.level = level; From 4a2b196d03d52da31236390e9f5694a88d43d11d Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 31 Jan 2025 08:12:40 +0000 Subject: [PATCH 08/46] server : fix --jinja when there's no tools or schema (typo was forcing JSON) (#11531) --- examples/server/tests/unit/test_chat_completion.py | 7 +++---- examples/server/utils.hpp | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/examples/server/tests/unit/test_chat_completion.py b/examples/server/tests/unit/test_chat_completion.py index 0be04bab5..f5d8b0572 100644 --- a/examples/server/tests/unit/test_chat_completion.py +++ b/examples/server/tests/unit/test_chat_completion.py @@ -14,11 +14,10 @@ def create_server(): "model,system_prompt,user_prompt,max_tokens,re_content,n_prompt,n_predicted,finish_reason,jinja,chat_template", [ (None, "Book", "What is the best book", 8, "(Suddenly)+|\\{ \" Sarax.", 77, 8, "length", False, None), + (None, "Book", "What is the best book", 8, "(Suddenly)+|\\{ \" Sarax.", 77, 8, "length", True, None), + (None, "Book", "What is the best book", 8, "^ blue", 23, 8, "length", True, "This is not a chat template, it is"), ("codellama70b", "You are a coding assistant.", "Write the fibonacci function in c++.", 128, "(Aside|she|felter|alonger)+", 104, 64, "length", False, None), - # TODO: fix testing of non-tool jinja mode - # (None, "Book", "What is the best book", 8, "(Suddenly)+|\\{ \" Sarax.", 77, 8, "length", True, None), - # (None, "Book", "What is the best book", 8, "I want to play with", 23, 8, "length", True, "This is not a chat template, it is"), - # ("codellama70b", "You are a coding assistant.", "Write the fibonacci function in c++.", 128, "(Aside|she|felter|alonger)+", 104, 64, "length", True, None), + ("codellama70b", "You are a coding assistant.", "Write the fibonacci function in c++.", 128, "(Aside|she|felter|alonger)+", 104, 64, "length", True, None), ] ) def test_chat_completion(model, system_prompt, user_prompt, max_tokens, re_content, n_prompt, n_predicted, finish_reason, jinja, chat_template): diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index 3d2c04666..70bd6a42c 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -642,7 +642,7 @@ static json oaicompat_completion_params_parse( inputs.parallel_tool_calls = json_value(body, "parallel_tool_calls", false); inputs.stream = stream; // TODO: support mixing schema w/ tools beyond generic format. - inputs.json_schema = json_value(llama_params, "json_schema", json::object()); + inputs.json_schema = json_value(llama_params, "json_schema", json()); auto chat_params = common_chat_params_init(tmpl, inputs); llama_params["chat_format"] = static_cast(chat_params.format); From 5783575c9d99c4d9370495800663aa5397ceb0be Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 31 Jan 2025 08:24:29 +0000 Subject: [PATCH 09/46] Fix chatml fallback for unsupported builtin templates (when --jinja not enabled) (#11533) --- examples/server/server.cpp | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 1ebcb5085..e7daceef1 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1858,7 +1858,12 @@ struct server_context { llama_init_dft.context.reset(); } - chat_templates = common_chat_templates_from_model(model, params_base.chat_template); + if (params_base.chat_template.empty() && !validate_builtin_chat_template(params.use_jinja)) { + LOG_WRN("%s: The chat template that comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses\n", __func__); + chat_templates = common_chat_templates_from_model(model, "chatml"); + } else { + chat_templates = common_chat_templates_from_model(model, params_base.chat_template); + } GGML_ASSERT(chat_templates.template_default.get() != nullptr); return true; @@ -4435,14 +4440,6 @@ int main(int argc, char ** argv) { LOG_INF("%s: model loaded\n", __func__); - // if a custom chat template is not supplied, we will use the one that comes with the model (if any) - if (params.chat_template.empty()) { - if (!ctx_server.validate_builtin_chat_template(params.use_jinja)) { - LOG_WRN("%s: The chat template that comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses\n", __func__); - params.chat_template = "chatml"; - } - } - // print sample chat example to make it clear which template is used LOG_INF("%s: chat template, chat_template: %s, example_format: '%s'\n", __func__, ctx_server.chat_templates.template_default->source().c_str(), From b1bcd309fc8ac929cbd4a6207b3a19886bda031f Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 31 Jan 2025 13:48:31 +0000 Subject: [PATCH 10/46] fix stop regression (#11543) --- examples/server/utils.hpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index 70bd6a42c..94e189457 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -484,13 +484,14 @@ static bool ends_with(const std::string & str, const std::string & suffix) { static size_t find_partial_stop_string(const std::string &stop, const std::string &text) { if (!text.empty() && !stop.empty()) { - auto it = std::find(stop.rbegin(), stop.rend(), text.back()); - while (it != stop.rend()) { - size_t length = std::distance(it, stop.rend()); - if (text.length() >= length && 0 == text.compare(text.length() - length, length, stop)) { - return text.length() - length; + const char text_last_char = text.back(); + for (int64_t char_index = stop.size() - 1; char_index >= 0; char_index--) { + if (stop[char_index] == text_last_char) { + const std::string current_partial = stop.substr(0, char_index + 1); + if (ends_with(text, current_partial)) { + return text.size() - char_index - 1; + } } - it = std::find(std::next(it), stop.rend(), text.back()); } } From a83f528688324a21484a97af1d1be5e1bc8d4c8e Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 31 Jan 2025 14:15:25 +0000 Subject: [PATCH 11/46] `tool-call`: fix llama 3.x and functionary 3.2, play nice w/ pydantic_ai package, update readme (#11539) * An empty tool_call_id is better than none! * sync: minja (tool call name optional https://github.com/google/minja/pull/36) * Force-disable parallel_tool_calls if template doesn't support it * More debug logs * Llama 3.x tools: accept / trigger on more varied spaced outputs * Fix empty content for functionary v3.2 tool call * Add proper tool call docs to server README * readme: function calling *is* supported now * Apply suggestions from code review Co-authored-by: Georgi Gerganov --------- Co-authored-by: Georgi Gerganov --- common/chat-template.hpp | 4 +- common/chat.cpp | 21 +++++-- examples/server/README.md | 110 ++++++++++++++++++++++++++++++++++--- examples/server/server.cpp | 5 +- examples/server/utils.hpp | 4 ++ 5 files changed, 129 insertions(+), 15 deletions(-) diff --git a/common/chat-template.hpp b/common/chat-template.hpp index 75ba5d938..58e119a3b 100644 --- a/common/chat-template.hpp +++ b/common/chat-template.hpp @@ -283,10 +283,12 @@ class chat_template { message["role"] = "user"; auto obj = json { {"tool_response", { - {"tool", message.at("name")}, {"content", message.at("content")}, }}, }; + if (message.contains("name")) { + obj["tool_response"]["name"] = message.at("name"); + } if (message.contains("tool_call_id")) { obj["tool_response"]["tool_call_id"] = message.at("tool_call_id"); } diff --git a/common/chat.cpp b/common/chat.cpp index d9a654892..58db12af9 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -384,14 +384,19 @@ static common_chat_params common_chat_params_init_llama_3_1_tool_calls(const com tool_rules.push_back( builder.add_rule( name + "-call", - "\"{\" ( \"\\\"type\\\": \\\"function\\\", \" | space ) " + "\"{\" space " + "( \"\\\"type\\\":\" space \"\\\"function\\\",\" space )? " "\"\\\"name\\\": \\\"" + name + "\\\", \\\"parameters\\\": \" " + builder.add_schema(name + "-args", parameters) + " \"}\"")); data.grammar_triggers.push_back({"{\"name\": \"" + name + "\"", /* .at_start = */ true}); }); data.grammar_triggers.push_back({"{\"name\":", /* .at_start = */ true}); + data.grammar_triggers.push_back({"{\n \"name\":", /* .at_start = */ true}); + data.grammar_triggers.push_back({"{\n \"name\":", /* .at_start = */ true}); data.grammar_triggers.push_back({"{\"type\": \"function\"", /* .at_start = */ true}); + data.grammar_triggers.push_back({"{\n \"type\": \"function\"", /* .at_start = */ true}); + data.grammar_triggers.push_back({"{\n \"type\": \"function\"", /* .at_start = */ true}); if (!builtin_tools.empty()) { data.grammar_triggers.push_back({"<|python_tag|>", /* .at_start = */ false}); } @@ -586,9 +591,17 @@ static common_chat_msg common_chat_parse_functionary_v3_2(const std::string & in } } // TODO: tighten & simplify. - auto res = parse_json_tool_calls(std::string(it, end), std::nullopt, function_regex, close_regex); - res.content = content; - return res; + try { + auto res = parse_json_tool_calls(std::string(it, end), std::nullopt, function_regex, close_regex); + res.content = content + res.content; + return res; + } catch (const std::exception & e) { + LOG_ERR("Failed to parse functionary v3.2 input: %s\n", e.what()); + common_chat_msg res; + res.role = "assistant"; + res.content = input; + return res; + } } static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { diff --git a/examples/server/README.md b/examples/server/README.md index ce1ae8858..276b43013 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -126,7 +126,7 @@ The project is under active development, and we are [looking for feedback and co | `--grammar GRAMMAR` | BNF-like grammar to constrain generations (see samples in grammars/ dir) (default: '') | | `--grammar-file FNAME` | file to read grammar from | | `-j, --json-schema SCHEMA` | JSON schema to constrain generations (https://json-schema.org/), e.g. `{}` for any JSON object
For schemas w/ external $refs, use --grammar + example/json_schema_to_grammar.py instead | -| `--jinja` | Enable experimental Jinja templating engine (needed for tool use) | +| `--jinja` | Enable experimental Jinja templating engine (required for tool use) | **Example-specific params** @@ -1069,7 +1069,7 @@ Given a ChatML-formatted json description in `messages`, it returns the predicte *Options:* -See [OpenAI Chat Completions API documentation](https://platform.openai.com/docs/api-reference/chat). While some OpenAI-specific features such as function calling aren't supported, llama.cpp `/completion`-specific features such as `mirostat` are supported. +See [OpenAI Chat Completions API documentation](https://platform.openai.com/docs/api-reference/chat). llama.cpp `/completion`-specific features such as `mirostat` are also supported. The `response_format` parameter supports both plain JSON output (e.g. `{"type": "json_object"}`) and schema-constrained JSON (e.g. `{"type": "json_object", "schema": {"type": "string", "minLength": 10, "maxLength": 100}}` or `{"type": "json_schema", "schema": {"properties": { "name": { "title": "Name", "type": "string" }, "date": { "title": "Date", "type": "string" }, "participants": { "items": {"type: "string" }, "title": "Participants", "type": "string" } } } }`), similar to other OpenAI-inspired API providers. @@ -1117,17 +1117,111 @@ curl http://localhost:8080/v1/chat/completions \ }' ``` -... and even tool usage (needs `--jinja` flag): +*Tool call support* + +[Function calling](https://platform.openai.com/docs/guides/function-calling) is supported for all models (see https://github.com/ggerganov/llama.cpp/pull/9639): + +- Requires `--jinja` flag +- Native tool call formats supported: + - Llama 3.1 / 3.3 (including builtin tools support - tool names for `wolfram_alpha`, `web_search` / `brave_search`, `code_interpreter`), Llama 3.2 + - Functionary v3.1 / v3.2 + - Hermes 2/3, Qwen 2.5 + - Mistral Nemo + - Firefunction v2 + - DeepSeek R1 (WIP / seems reluctant to call any tools?) + +
+ Show some common templates and which format handler they use + + | Template | Format | + |----------|--------| + | CohereForAI-c4ai-command-r-plus-default.jinja | generic tool calls | + | CohereForAI-c4ai-command-r-plus-rag.jinja | generic tool calls | + | CohereForAI-c4ai-command-r-plus-tool_use.jinja | generic tool calls | + | MiniMaxAI-MiniMax-Text-01.jinja | generic tool calls | + | NexaAIDev-Octopus-v2.jinja | generic tool calls | + | NousResearch-Hermes-2-Pro-Llama-3-8B-default.jinja | generic tool calls | + | NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja | hermes 2 pro tool calls | + | NousResearch-Hermes-2-Pro-Mistral-7B-default.jinja | generic tool calls | + | NousResearch-Hermes-2-Pro-Mistral-7B-tool_use.jinja | hermes 2 pro tool calls | + | NousResearch-Hermes-3-Llama-3.1-70B-default.jinja | generic tool calls | + | NousResearch-Hermes-3-Llama-3.1-70B-tool_use.jinja | hermes 2 pro tool calls | + | OrionStarAI-Orion-14B-Chat.jinja | generic tool calls | + | Qwen-QwQ-32B-Preview.jinja | hermes 2 pro tool calls | + | Qwen-Qwen2-7B-Instruct.jinja | generic tool calls | + | Qwen-Qwen2-VL-7B-Instruct.jinja | generic tool calls | + | Qwen-Qwen2.5-7B-Instruct.jinja | hermes 2 pro tool calls | + | Qwen-Qwen2.5-Math-7B-Instruct.jinja | hermes 2 pro tool calls | + | TheBloke-FusionNet_34Bx2_MoE-AWQ.jinja | generic tool calls | + | abacusai-Fewshot-Metamath-OrcaVicuna-Mistral.jinja | generic tool calls | + | bofenghuang-vigogne-2-70b-chat.jinja | generic tool calls | + | databricks-dbrx-instruct.jinja | generic tool calls | + | deepseek-ai-DeepSeek-Coder-V2-Instruct.jinja | generic tool calls | + | deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja | deepseek r1 tool calls | + | deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja | deepseek r1 tool calls | + | deepseek-ai-DeepSeek-R1-Distill-Qwen-7B.jinja | deepseek r1 tool calls | + | deepseek-ai-DeepSeek-V2.5.jinja | deepseek r1 tool calls | + | deepseek-ai-deepseek-coder-33b-instruct.jinja | generic tool calls | + | google-gemma-2-2b-it.jinja | generic tool calls | + | google-gemma-7b-it.jinja | generic tool calls | + | indischepartij-MiniCPM-3B-OpenHermes-2.5-v2.jinja | generic tool calls | + | mattshumer-Reflection-Llama-3.1-70B.jinja | generic tool calls | + | meetkai-functionary-medium-v3.2.jinja | functionary v3.2 tool calls | + | meta-llama-Llama-3.1-8B-Instruct.jinja | llama 3.x tool calls (w/ builtin tools) | + | meta-llama-Llama-3.2-3B-Instruct.jinja | llama 3.x tool calls | + | meta-llama-Llama-3.3-70B-Instruct.jinja | llama 3.x tool calls (w/ builtin tools) | + | meta-llama-Meta-Llama-3.1-8B-Instruct.jinja | llama 3.x tool calls (w/ builtin tools) | + | microsoft-Phi-3-medium-4k-instruct.jinja | generic tool calls | + | microsoft-Phi-3-mini-4k-instruct.jinja | generic tool calls | + | microsoft-Phi-3-small-8k-instruct.jinja | generic tool calls | + | microsoft-Phi-3.5-mini-instruct.jinja | generic tool calls | + | microsoft-Phi-3.5-vision-instruct.jinja | generic tool calls | + | mistralai-Mistral-7B-Instruct-v0.2.jinja | generic tool calls | + | mistralai-Mistral-Large-Instruct-2407.jinja | mistral nemo tool calls | + | mistralai-Mistral-Large-Instruct-2411.jinja | generic tool calls | + | mistralai-Mistral-Nemo-Instruct-2407.jinja | mistral nemo tool calls | + | mistralai-Mixtral-8x7B-Instruct-v0.1.jinja | generic tool calls | + | mlabonne-AlphaMonarch-7B.jinja | generic tool calls | + | nvidia-Llama-3.1-Nemotron-70B-Instruct-HF.jinja | llama 3.x tool calls (w/ builtin tools) | + | openchat-openchat-3.5-0106.jinja | generic tool calls | + | teknium-OpenHermes-2.5-Mistral-7B.jinja | generic tool calls | + + This table can be generated with: + + ```bash + ./build/bin/test-chat ../minja/build/tests/*.jinja 2>/dev/null + +
+ +- Generic tool call is supported when the template isn't recognized by native format handlers (you'll see `Chat format: Generic` in the logs). + - Use `--chat-template-file` to override the template when appropriate (see examples below) + - Generic support may consume more tokens and be less efficient than a model's native format. + +- Run with: ```shell - llama-server --jinja -hfr lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF -hff Meta-Llama-3.1-8B-Instruct-Q5_K_M.gguf -fa + # Native support: + llama-server --jinja -fa -hf bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M + llama-server --jinja -fa -hf bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M + llama-server --jinja -fa -hf bartowski/Llama-3.2-3B-Instruct-GGUF:Q6_K + llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M + llama-server --jinja -fa -hf bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M \ + --chat-template-file <( python scripts/get_chat_template.py NousResearch/Hermes-2-Pro-Llama-3-8B ) - # https://huggingface.co/meetkai/functionary-medium-v3.2 - llama-server --jinja -hfr bartowski/functionary-medium-v3.2-GGUF -hff functionary-medium-v3.2-IQ4_XS.gguf -fa + # Native support requires the right template for these GGUFs: + llama-server --jinja -fa -hf bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M \ + --chat-template-file <( python scripts/get_chat_template.py NousResearch/Hermes-3-Llama-3.1-8B tool_use ) + llama-server --jinja -fa -hf bartowski/firefunction-v2-GGUF -hff firefunction-v2-IQ1_M.gguf \ + --chat-template-file <( python scripts/get_chat_template.py fireworks-ai/firellama-3-firefunction-v2 ) - # https://huggingface.co/meetkai/functionary-medium-v3.1 - llama-server --jinja -hfr meetkai/functionary-medium-v3.1-GGUF -hff functionary-medium-llama-3.1.Q4_0.gguf -fa + # Generic format support + llama-server --jinja -fa -hf bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M + llama-server --jinja -fa -hf bartowski/gemma-2-2b-it-GGUF:Q4_K_M + ``` +- Test in CLI: + + ```bash curl http://localhost:8080/v1/chat/completions -d '{ "model": "gpt-3.5-turbo", "tools": [ diff --git a/examples/server/server.cpp b/examples/server/server.cpp index e7daceef1..3451e96a2 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -345,7 +345,7 @@ struct server_task { auto it = data.find("chat_format"); if (it != data.end()) { params.oaicompat_chat_format = static_cast(it->get()); - LOG_DBG("Chat format: %s\n", common_chat_format_name(params.oaicompat_chat_format).c_str()); + LOG_INF("Chat format: %s\n", common_chat_format_name(params.oaicompat_chat_format).c_str()); } else { params.oaicompat_chat_format = defaults.oaicompat_chat_format; } @@ -697,6 +697,7 @@ struct server_task_result_cmpl_final : server_task_result { std::string finish_reason = "length"; common_chat_msg message; if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) { + LOG_DBG("Parsing chat message: %s\n", content.c_str()); message = common_chat_parse(content, oaicompat_chat_format); finish_reason = message.tool_calls.empty() ? "stop" : "tool_calls"; } else { @@ -713,7 +714,7 @@ struct server_task_result_cmpl_final : server_task_result { {"name", tc.name}, {"arguments", tc.arguments}, }}, - {"id", tc.id.empty() ? json() : json(tc.id)}, + {"id", tc.id}, }); } } diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index 94e189457..bfe623c4c 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -641,6 +641,10 @@ static json oaicompat_completion_params_parse( inputs.tools = tools; inputs.tool_choice = tool_choice; inputs.parallel_tool_calls = json_value(body, "parallel_tool_calls", false); + if (inputs.parallel_tool_calls && !tmpl.original_caps().supports_parallel_tool_calls) { + LOG_DBG("Disabling parallel_tool_calls because the template does not support it\n"); + inputs.parallel_tool_calls = false; + } inputs.stream = stream; // TODO: support mixing schema w/ tools beyond generic format. inputs.json_schema = json_value(llama_params, "json_schema", json()); From aa6fb1321333fae8853d0cdc26bcb5d438e650a1 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Fri, 31 Jan 2025 17:12:40 +0000 Subject: [PATCH 12/46] `ci`: use sccache on windows instead of ccache (#11545) * Use sccache on ci for windows * Detect sccache in cmake --- .github/workflows/build.yml | 6 ++++++ ggml/src/CMakeLists.txt | 12 +++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c02dd6a81..022b9bd03 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -689,6 +689,7 @@ jobs: uses: hendrikmuhs/ccache-action@v1.2.16 with: key: windows-msys2 + variant: sccache evict-old-files: 1d - name: Setup ${{ matrix.sys }} @@ -763,6 +764,7 @@ jobs: uses: hendrikmuhs/ccache-action@v1.2.16 with: key: windows-latest-cmake-${{ matrix.build }} + variant: sccache evict-old-files: 1d - name: Clone Kompute submodule @@ -949,6 +951,7 @@ jobs: uses: hendrikmuhs/ccache-action@v1.2.16 with: key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }} + variant: sccache evict-old-files: 1d - name: Install Cuda Toolkit 11.7 @@ -1090,6 +1093,7 @@ jobs: uses: hendrikmuhs/ccache-action@v1.2.16 with: key: windows-latest-cmake-sycl + variant: sccache evict-old-files: 1d - name: Install @@ -1174,6 +1178,7 @@ jobs: uses: hendrikmuhs/ccache-action@v1.2.16 with: key: ${{ github.job }} + variant: sccache evict-old-files: 1d - name: Build @@ -1208,6 +1213,7 @@ jobs: uses: hendrikmuhs/ccache-action@v1.2.16 with: key: windows-latest-cmake-hip-release + variant: sccache evict-old-files: 1d - name: Install diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index 566709135..0002ac18a 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -93,12 +93,18 @@ endif() if (GGML_CCACHE) find_program(GGML_CCACHE_FOUND ccache) + find_program(GGML_SCCACHE_FOUND sccache) - if (GGML_CCACHE_FOUND) + if (GGML_CCACHE_FOUND OR GGML_SCCACHE_FOUND) + if(GGML_CCACHE_FOUND) + set(GGML_CCACHE_VARIANT ccache) + else() + set(GGML_CCACHE_VARIANT sccache) + endif() # TODO: should not be set globally - set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache) + set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${GGML_CCACHE_VARIANT}") set(ENV{CCACHE_SLOPPINESS} time_macros) - message(STATUS "ccache found, compilation results will be cached. Disable with GGML_CCACHE=OFF.") + message(STATUS "${GGML_CCACHE_VARIANT} found, compilation results will be cached. Disable with GGML_CCACHE=OFF.") else() message(STATUS "Warning: ccache not found - consider installing it for faster compilation or disable this warning with GGML_CCACHE=OFF") endif () From 5bbc7362cb93265f4c853fd89800a6255cc26985 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Sat, 1 Feb 2025 00:01:20 +0000 Subject: [PATCH 13/46] ci: simplify cmake build commands (#11548) --- .github/workflows/build.yml | 86 +++++++++++++------------------------ 1 file changed, 30 insertions(+), 56 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 022b9bd03..03eabbbe5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -59,16 +59,14 @@ jobs: id: cmake_build run: | sysctl -a - mkdir build - cd build - cmake .. \ + cmake -B build \ -DCMAKE_BUILD_RPATH="@loader_path" \ -DLLAMA_FATAL_WARNINGS=ON \ -DLLAMA_CURL=ON \ -DGGML_METAL_USE_BF16=ON \ -DGGML_METAL_EMBED_LIBRARY=ON \ -DGGML_RPC=ON - cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) - name: Test id: cmake_test @@ -199,13 +197,11 @@ jobs: - name: Build id: cmake_build run: | - mkdir build - cd build - cmake .. \ + cmake -B build \ -DLLAMA_FATAL_WARNINGS=ON \ -DLLAMA_CURL=ON \ -DGGML_RPC=ON - cmake --build . --config Release -j $(nproc) + cmake --build build --config Release -j $(nproc) - name: Test id: cmake_test @@ -283,26 +279,22 @@ jobs: id: cmake_build if: ${{ matrix.sanitizer != 'THREAD' }} run: | - mkdir build - cd build - cmake .. \ + cmake -B build \ -DLLAMA_FATAL_WARNINGS=ON \ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} - cmake --build . --config ${{ matrix.build_type }} -j $(nproc) + cmake --build build --config ${{ matrix.build_type }} -j $(nproc) - name: Build (no OpenMP) id: cmake_build_no_openmp if: ${{ matrix.sanitizer == 'THREAD' }} run: | - mkdir build - cd build - cmake .. \ + cmake -B build \ -DLLAMA_FATAL_WARNINGS=ON \ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ -DGGML_OPENMP=OFF - cmake --build . --config ${{ matrix.build_type }} -j $(nproc) + cmake --build build --config ${{ matrix.build_type }} -j $(nproc) - name: Test id: cmake_test @@ -335,11 +327,9 @@ jobs: - name: Build id: cmake_build run: | - mkdir build - cd build - cmake .. \ + cmake -B build \ -DGGML_RPC=ON - cmake --build . --config Release -j $(nproc) + cmake --build build --config Release -j $(nproc) - name: Test id: cmake_test @@ -372,11 +362,9 @@ jobs: - name: Build id: cmake_build run: | - mkdir build - cd build - cmake .. \ + cmake -B build \ -DGGML_VULKAN=ON - cmake --build . --config Release -j $(nproc) + cmake --build build --config Release -j $(nproc) - name: Test id: cmake_test @@ -493,13 +481,11 @@ jobs: id: cmake_build run: | source /opt/intel/oneapi/setvars.sh - mkdir build - cd build - cmake .. \ + cmake -B build \ -DGGML_SYCL=ON \ -DCMAKE_C_COMPILER=icx \ -DCMAKE_CXX_COMPILER=icpx - cmake --build . --config Release -j $(nproc) + cmake --build build --config Release -j $(nproc) ubuntu-22-cmake-sycl-fp16: runs-on: ubuntu-22.04 @@ -543,14 +529,12 @@ jobs: id: cmake_build run: | source /opt/intel/oneapi/setvars.sh - mkdir build - cd build - cmake .. \ + cmake -B build \ -DGGML_SYCL=ON \ -DCMAKE_C_COMPILER=icx \ -DCMAKE_CXX_COMPILER=icpx \ -DGGML_SYCL_F16=ON - cmake --build . --config Release -j $(nproc) + cmake --build build --config Release -j $(nproc) macOS-latest-cmake-ios: runs-on: macos-latest @@ -576,9 +560,7 @@ jobs: id: cmake_build run: | sysctl -a - mkdir build - cd build - cmake -G Xcode .. \ + cmake -B build -G Xcode \ -DGGML_METAL_USE_BF16=ON \ -DGGML_METAL_EMBED_LIBRARY=ON \ -DLLAMA_BUILD_EXAMPLES=OFF \ @@ -587,7 +569,7 @@ jobs: -DCMAKE_SYSTEM_NAME=iOS \ -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml - cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO macOS-latest-cmake-tvos: runs-on: macos-latest @@ -613,9 +595,7 @@ jobs: id: cmake_build run: | sysctl -a - mkdir build - cd build - cmake -G Xcode .. \ + cmake -B build -G Xcode \ -DGGML_METAL_USE_BF16=ON \ -DGGML_METAL_EMBED_LIBRARY=ON \ -DLLAMA_BUILD_EXAMPLES=OFF \ @@ -624,7 +604,7 @@ jobs: -DCMAKE_SYSTEM_NAME=tvOS \ -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml - cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO macOS-latest-swift: runs-on: macos-latest @@ -654,17 +634,15 @@ jobs: id: cmake_build run: | sysctl -a - mkdir build - cd build - cmake -G Xcode .. \ + cmake -B build -G Xcode \ -DGGML_METAL_USE_BF16=ON \ -DGGML_METAL_EMBED_LIBRARY=ON \ -DLLAMA_BUILD_EXAMPLES=OFF \ -DLLAMA_BUILD_TESTS=OFF \ -DLLAMA_BUILD_SERVER=OFF \ -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" - cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) - sudo cmake --install . --config Release + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) + sudo cmake --install build --config Release - name: xcodebuild for swift package id: xcodebuild @@ -806,21 +784,19 @@ jobs: run: | git clone https://github.com/KhronosGroup/OpenCL-Headers cd OpenCL-Headers - mkdir build && cd build - cmake .. ` + cmake -B build ` -DBUILD_TESTING=OFF ` -DOPENCL_HEADERS_BUILD_TESTING=OFF ` -DOPENCL_HEADERS_BUILD_CXX_TESTS=OFF ` -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release" - cmake --build . --target install + cmake --build build --target install git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader cd OpenCL-ICD-Loader - mkdir build-arm64-release && cd build-arm64-release - cmake .. ` + cmake -B build-arm64-release ` -A arm64 ` -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" ` -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release" - cmake --build . --target install --config release + cmake --build build-arm64-release --target install --config release - name: Build id: cmake_build @@ -1284,9 +1260,7 @@ jobs: id: cmake_build run: | sysctl -a - mkdir build - cd build - cmake -G Xcode .. \ + cmake -B build -G Xcode \ -DGGML_METAL_USE_BF16=ON \ -DGGML_METAL_EMBED_LIBRARY=ON \ -DLLAMA_BUILD_EXAMPLES=OFF \ @@ -1295,8 +1269,8 @@ jobs: -DCMAKE_SYSTEM_NAME=iOS \ -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml - cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO - sudo cmake --install . --config Release + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO + sudo cmake --install build --config Release - name: xcodebuild for swift package id: xcodebuild From ecef206ccb186a1cde8dd2523b1da3e12f593f9e Mon Sep 17 00:00:00 2001 From: Eric Curtin Date: Sat, 1 Feb 2025 11:30:54 +0100 Subject: [PATCH 14/46] Implement s3:// protocol (#11511) For those that want to pull from s3 Signed-off-by: Eric Curtin --- examples/run/run.cpp | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/examples/run/run.cpp b/examples/run/run.cpp index 9cecae48c..cf61f4add 100644 --- a/examples/run/run.cpp +++ b/examples/run/run.cpp @@ -65,6 +65,13 @@ static int printe(const char * fmt, ...) { return ret; } +static std::string strftime_fmt(const char * fmt, const std::tm & tm) { + std::ostringstream oss; + oss << std::put_time(&tm, fmt); + + return oss.str(); +} + class Opt { public: int init(int argc, const char ** argv) { @@ -698,6 +705,39 @@ class LlamaData { return download(url, bn, true); } + int s3_dl(const std::string & model, const std::string & bn) { + const size_t slash_pos = model.find('/'); + if (slash_pos == std::string::npos) { + return 1; + } + + const std::string bucket = model.substr(0, slash_pos); + const std::string key = model.substr(slash_pos + 1); + const char * access_key = std::getenv("AWS_ACCESS_KEY_ID"); + const char * secret_key = std::getenv("AWS_SECRET_ACCESS_KEY"); + if (!access_key || !secret_key) { + printe("AWS credentials not found in environment\n"); + return 1; + } + + // Generate AWS Signature Version 4 headers + // (Implementation requires HMAC-SHA256 and date handling) + // Get current timestamp + const time_t now = time(nullptr); + const tm tm = *gmtime(&now); + const std::string date = strftime_fmt("%Y%m%d", tm); + const std::string datetime = strftime_fmt("%Y%m%dT%H%M%SZ", tm); + const std::vector headers = { + "Authorization: AWS4-HMAC-SHA256 Credential=" + std::string(access_key) + "/" + date + + "/us-east-1/s3/aws4_request", + "x-amz-content-sha256: UNSIGNED-PAYLOAD", "x-amz-date: " + datetime + }; + + const std::string url = "https://" + bucket + ".s3.amazonaws.com/" + key; + + return download(url, bn, true, headers); + } + std::string basename(const std::string & path) { const size_t pos = path.find_last_of("/\\"); if (pos == std::string::npos) { @@ -738,6 +778,9 @@ class LlamaData { rm_until_substring(model_, "github:"); rm_until_substring(model_, "://"); ret = github_dl(model_, bn); + } else if (string_starts_with(model_, "s3://")) { + rm_until_substring(model_, "://"); + ret = s3_dl(model_, bn); } else { // ollama:// or nothing rm_until_substring(model_, "ollama.com/library/"); rm_until_substring(model_, "://"); From cfd74c86dbaa95ed30aa6b30e14d8801eb975d63 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Sat, 1 Feb 2025 12:24:51 +0000 Subject: [PATCH 15/46] `sync`: minja (https://github.com/google/minja/commit/418a2364b56dc9be4ed9a1a2b0fb16fb53a7a22e) (#11574) --- common/minja.hpp | 49 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/common/minja.hpp b/common/minja.hpp index f0e80fd7c..bcb5a0824 100644 --- a/common/minja.hpp +++ b/common/minja.hpp @@ -693,7 +693,7 @@ enum SpaceHandling { Keep, Strip, StripSpaces, StripNewline }; class TemplateToken { public: - enum class Type { Text, Expression, If, Else, Elif, EndIf, For, EndFor, Generation, EndGeneration, Set, EndSet, Comment, Macro, EndMacro, Filter, EndFilter }; + enum class Type { Text, Expression, If, Else, Elif, EndIf, For, EndFor, Generation, EndGeneration, Set, EndSet, Comment, Macro, EndMacro, Filter, EndFilter, Break, Continue }; static std::string typeToString(Type t) { switch (t) { @@ -714,6 +714,8 @@ public: case Type::EndFilter: return "endfilter"; case Type::Generation: return "generation"; case Type::EndGeneration: return "endgeneration"; + case Type::Break: return "break"; + case Type::Continue: return "continue"; } return "Unknown"; } @@ -815,6 +817,22 @@ struct CommentTemplateToken : public TemplateToken { CommentTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post, const std::string& t) : TemplateToken(Type::Comment, location, pre, post), text(t) {} }; +enum class LoopControlType { Break, Continue }; + +class LoopControlException : public std::runtime_error { +public: + LoopControlType control_type; + LoopControlException(const std::string & message, LoopControlType control_type) : std::runtime_error(message), control_type(control_type) {} + LoopControlException(LoopControlType control_type) + : std::runtime_error((std::ostringstream() << (control_type == LoopControlType::Continue ? "continue" : "break") << " outside of a loop").str()), + control_type(control_type) {} +}; + +struct LoopControlTemplateToken : public TemplateToken { + LoopControlType control_type; + LoopControlTemplateToken(const Location & location, SpaceHandling pre, SpaceHandling post, LoopControlType control_type) : TemplateToken(Type::Break, location, pre, post), control_type(control_type) {} +}; + class TemplateNode { Location location_; protected: @@ -825,6 +843,12 @@ public: void render(std::ostringstream & out, const std::shared_ptr & context) const { try { do_render(out, context); + } catch (const LoopControlException & e) { + // TODO: make stack creation lazy. Only needed if it was thrown outside of a loop. + std::ostringstream err; + err << e.what(); + if (location_.source) err << error_location_suffix(*location_.source, location_.pos); + throw LoopControlException(err.str(), e.control_type); } catch (const std::exception & e) { std::ostringstream err; err << e.what(); @@ -897,6 +921,15 @@ public: } }; +class LoopControlNode : public TemplateNode { + LoopControlType control_type_; + public: + LoopControlNode(const Location & location, LoopControlType control_type) : TemplateNode(location), control_type_(control_type) {} + void do_render(std::ostringstream &, const std::shared_ptr &) const override { + throw LoopControlException(control_type_); + } +}; + class ForNode : public TemplateNode { std::vector var_names; std::shared_ptr iterable; @@ -961,7 +994,12 @@ public: loop.set("last", i == (n - 1)); loop.set("previtem", i > 0 ? filtered_items.at(i - 1) : Value()); loop.set("nextitem", i < n - 1 ? filtered_items.at(i + 1) : Value()); - body->render(out, loop_context); + try { + body->render(out, loop_context); + } catch (const LoopControlException & e) { + if (e.control_type == LoopControlType::Break) break; + if (e.control_type == LoopControlType::Continue) continue; + } } } }; @@ -2159,7 +2197,7 @@ private: static std::regex comment_tok(R"(\{#([-~]?)(.*?)([-~]?)#\})"); static std::regex expr_open_regex(R"(\{\{([-~])?)"); static std::regex block_open_regex(R"(^\{%([-~])?[\s\n\r]*)"); - static std::regex block_keyword_tok(R"((if|else|elif|endif|for|endfor|generation|endgeneration|set|endset|block|endblock|macro|endmacro|filter|endfilter)\b)"); + static std::regex block_keyword_tok(R"((if|else|elif|endif|for|endfor|generation|endgeneration|set|endset|block|endblock|macro|endmacro|filter|endfilter|break|continue)\b)"); static std::regex non_text_open_regex(R"(\{\{|\{%|\{#)"); static std::regex expr_close_regex(R"([\s\n\r]*([-~])?\}\})"); static std::regex block_close_regex(R"([\s\n\r]*([-~])?%\})"); @@ -2291,6 +2329,9 @@ private: } else if (keyword == "endfilter") { auto post_space = parseBlockClose(); tokens.push_back(std::make_unique(location, pre_space, post_space)); + } else if (keyword == "break" || keyword == "continue") { + auto post_space = parseBlockClose(); + tokens.push_back(std::make_unique(location, pre_space, post_space, keyword == "break" ? LoopControlType::Break : LoopControlType::Continue)); } else { throw std::runtime_error("Unexpected block: " + keyword); } @@ -2414,6 +2455,8 @@ private: children.emplace_back(std::make_shared(token->location, std::move(filter_token->filter), std::move(body))); } else if (dynamic_cast(token.get())) { // Ignore comments + } else if (auto ctrl_token = dynamic_cast(token.get())) { + children.emplace_back(std::make_shared(token->location, ctrl_token->control_type)); } else if (dynamic_cast(token.get()) || dynamic_cast(token.get()) || dynamic_cast(token.get()) From 53debe6f3c9cca87e9520a83ee8c14d88977afa4 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Sat, 1 Feb 2025 18:22:38 +0000 Subject: [PATCH 16/46] ci: use sccache on windows HIP jobs (#11553) --- .github/workflows/build.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 03eabbbe5..7392f2bfe 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1154,7 +1154,6 @@ jobs: uses: hendrikmuhs/ccache-action@v1.2.16 with: key: ${{ github.job }} - variant: sccache evict-old-files: 1d - name: Build @@ -1189,7 +1188,6 @@ jobs: uses: hendrikmuhs/ccache-action@v1.2.16 with: key: windows-latest-cmake-hip-release - variant: sccache evict-old-files: 1d - name: Install From 0cec062a638700495673f5494d200b74340538be Mon Sep 17 00:00:00 2001 From: piDack <104877312+piDack@users.noreply.github.com> Date: Sun, 2 Feb 2025 15:48:46 +0800 Subject: [PATCH 17/46] llama : add support for GLM-Edge and GLM-Edge-V series models (#10573) * add glm edge chat model * use config partial_rotary_factor as rope ratio * support for glm edge model * vision model support * remove debug info * fix format * llava.cpp trailing whitespace * remove unused AutoTokenizer * Update src/llama.cpp for not contain <|end|> or Co-authored-by: Xuan Son Nguyen * add edge template * fix chat template * fix confict * fix confict * fix ci err * fix format err * fix template err * 9b hf chat support * format * format clip.cpp * fix format * Apply suggestions from code review * Apply suggestions from code review * Update examples/llava/clip.cpp * fix format * minor : style --------- Co-authored-by: liyuhang Co-authored-by: piDack Co-authored-by: Xuan Son Nguyen Co-authored-by: liyuhang Co-authored-by: Georgi Gerganov --- README.md | 3 +- convert_hf_to_gguf.py | 58 +--- examples/llava/README-glmedge.md | 43 +++ examples/llava/clip.cpp | 110 ++++++- examples/llava/clip.h | 2 + .../glmedge-convert-image-encoder-to-gguf.py | 280 ++++++++++++++++++ examples/llava/glmedge-surgery.py | 33 +++ examples/llava/llava.cpp | 17 ++ gguf-py/gguf/constants.py | 3 + src/llama-arch.cpp | 3 + src/llama-chat.cpp | 11 +- src/llama-chat.h | 1 + src/llama-model.cpp | 28 +- src/llama.cpp | 35 ++- tests/test-chat-template.cpp | 8 + 15 files changed, 568 insertions(+), 67 deletions(-) create mode 100644 examples/llava/README-glmedge.md create mode 100644 examples/llava/glmedge-convert-image-encoder-to-gguf.py create mode 100644 examples/llava/glmedge-surgery.py diff --git a/README.md b/README.md index d40309875..7f306d199 100644 --- a/README.md +++ b/README.md @@ -96,7 +96,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo - [x] [Bitnet b1.58 models](https://huggingface.co/1bitLLM) - [x] [Flan T5](https://huggingface.co/models?search=flan-t5) - [x] [Open Elm models](https://huggingface.co/collections/apple/openelm-instruct-models-6619ad295d7ae9f868b759ca) -- [x] [ChatGLM3-6b](https://huggingface.co/THUDM/chatglm3-6b) + [ChatGLM4-9b](https://huggingface.co/THUDM/glm-4-9b) +- [x] [ChatGLM3-6b](https://huggingface.co/THUDM/chatglm3-6b) + [ChatGLM4-9b](https://huggingface.co/THUDM/glm-4-9b) + [GLMEdge-1.5b](https://huggingface.co/THUDM/glm-edge-1.5b-chat) + [GLMEdge-4b](https://huggingface.co/THUDM/glm-edge-4b-chat) - [x] [SmolLM](https://huggingface.co/collections/HuggingFaceTB/smollm-6695016cad7167254ce15966) - [x] [EXAONE-3.0-7.8B-Instruct](https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct) - [x] [FalconMamba Models](https://huggingface.co/collections/tiiuae/falconmamba-7b-66b9a580324dd1598b0f6d4a) @@ -117,6 +117,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo - [x] [Mini CPM](https://huggingface.co/models?search=MiniCPM) - [x] [Moondream](https://huggingface.co/vikhyatk/moondream2) - [x] [Bunny](https://github.com/BAAI-DCAI/Bunny) +- [x] [GLM-EDGE](https://huggingface.co/models?search=glm-edge) - [x] [Qwen2-VL](https://huggingface.co/collections/Qwen/qwen2-vl-66cee7455501d7126940800d) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 63b54a9cf..018a2a588 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -648,7 +648,7 @@ class Model: if chkhsh == "7967bfa498ade6b757b064f31e964dddbb80f8f9a4d68d4ba7998fcf281c531a": # ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-code res = "jina-v2-code" - if chkhsh == "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b": + if chkhsh == "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b" or chkhsh == "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516": # ref: https://huggingface.co/THUDM/glm-4-9b-chat res = "chatglm-bpe" if chkhsh == "7fc505bd3104ca1083b150b17d088b59534ede9bde81f0dd2090967d7fe52cee": @@ -4513,7 +4513,7 @@ class JaisModel(Model): self.gguf_writer.add_max_alibi_bias(self.max_alibi_bias) -@Model.register("ChatGLMModel", "ChatGLMForConditionalGeneration") +@Model.register("GlmForCausalLM", "ChatGLMModel", "ChatGLMForConditionalGeneration") class ChatGLMModel(Model): model_arch = gguf.MODEL_ARCH.CHATGLM @@ -4619,47 +4619,15 @@ class ChatGLMModel(Model): from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained(dir_model, trust_remote_code=True) - vocab_size = hparams["padded_vocab_size"] + vocab_size = hparams.get("padded_vocab_size",hparams["vocab_size"]) assert max(tokenizer.get_vocab().values()) < vocab_size - tokpre = self.get_vocab_base_pre(tokenizer) - - merges = [] - vocab = {} - mergeable_ranks = tokenizer.mergeable_ranks - for token, rank in mergeable_ranks.items(): - vocab[ChatGLMModel.token_bytes_to_string(token)] = rank - if len(token) == 1: - continue - merged = ChatGLMModel.bpe(mergeable_ranks, token, max_rank=rank) - assert len(merged) >= 2 and len(merged) <= 7 - merges.append(' '.join(map(ChatGLMModel.token_bytes_to_string, merged))) - - # for this kind of tokenizer, added_vocab is not a subset of vocab, so they need to be combined - added_vocab = tokenizer.get_added_vocab() - reverse_vocab = {id_ : encoded_tok for encoded_tok, id_ in {**vocab, **added_vocab}.items()} - - for i in range(vocab_size): - if i not in reverse_vocab: - tokens.append(f"[PAD{i}]") - toktypes.append(gguf.TokenType.UNUSED) - elif reverse_vocab[i] in added_vocab: - tokens.append(reverse_vocab[i]) - if tokenizer.added_tokens_decoder[i].special: - toktypes.append(gguf.TokenType.CONTROL) - else: - toktypes.append(gguf.TokenType.USER_DEFINED) - else: - tokens.append(reverse_vocab[i]) - toktypes.append(gguf.TokenType.NORMAL) - + tokens, toktypes, tokpre = self.get_vocab_base() self.gguf_writer.add_tokenizer_model("gpt2") self.gguf_writer.add_tokenizer_pre(tokpre) self.gguf_writer.add_token_list(tokens) self.gguf_writer.add_token_types(toktypes) - - special_vocab = gguf.SpecialVocab(dir_model, load_merges=False) - special_vocab.merges = merges + special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=True) # only add special tokens when they were not already loaded from config.json special_vocab._set_special_token("eos", tokenizer.get_added_vocab()["<|endoftext|>"]) special_vocab._set_special_token("eot", tokenizer.get_added_vocab()["<|user|>"]) @@ -4670,16 +4638,20 @@ class ChatGLMModel(Model): def set_gguf_parameters(self): n_embed = self.hparams.get("hidden_size", self.hparams.get("n_embed")) n_head = self.hparams.get("n_head", self.hparams.get("num_attention_heads")) - n_head_kv = self.hparams.get("multi_query_group_num", n_head) + n_head_kv = self.hparams.get("multi_query_group_num", self.hparams.get("num_key_value_heads", n_head)) self.gguf_writer.add_context_length(self.hparams.get("seq_length", n_embed)) self.gguf_writer.add_embedding_length(n_embed) - self.gguf_writer.add_feed_forward_length(self.hparams.get("ffn_hidden_size", 4 * n_embed)) - self.gguf_writer.add_block_count(self.hparams["num_layers"]) + self.gguf_writer.add_feed_forward_length(self.hparams.get("ffn_hidden_size", self.hparams.get("intermediate_size", 4 * n_embed))) + self.gguf_writer.add_block_count(self.hparams.get("num_layers", self.hparams["num_hidden_layers"])) self.gguf_writer.add_head_count(n_head) self.gguf_writer.add_head_count_kv(n_head_kv) - self.gguf_writer.add_layer_norm_rms_eps(self.hparams["layernorm_epsilon"]) + self.gguf_writer.add_layer_norm_rms_eps(self.hparams.get("layernorm_epsilon",1e-5)) self.gguf_writer.add_file_type(self.ftype) - self.gguf_writer.add_rope_dimension_count(64) + if "attention_dim" in self.hparams: + rope_dim = self.hparams["attention_dim"] + else: + rope_dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"] + self.gguf_writer.add_rope_dimension_count(int(rope_dim * self.hparams.get("partial_rotary_factor", 0.5))) self.gguf_writer.add_add_bos_token(False) rope_freq = 10000 if "rope_ratio" in self.hparams: @@ -4689,7 +4661,7 @@ class ChatGLMModel(Model): def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: del bid # unused - if name.endswith(".rotary_pos_emb.inv_freq"): + if name.endswith(".rotary_pos_emb.inv_freq") or name.startswith("model.vision."): return [] name = name.removeprefix("transformer.") diff --git a/examples/llava/README-glmedge.md b/examples/llava/README-glmedge.md new file mode 100644 index 000000000..603d01474 --- /dev/null +++ b/examples/llava/README-glmedge.md @@ -0,0 +1,43 @@ +# GLMV-EDGE + +Currently this implementation supports [glm-edge-v-2b](https://huggingface.co/THUDM/glm-edge-v-2b) and [glm-edge-v-5b](https://huggingface.co/THUDM/glm-edge-v-5b). + +## Usage +Build with cmake or run `make llama-llava-cli` to build it. + +After building, run: `./llama-llava-cli` to see the usage. For example: + +```sh +./llama-llava-cli -m model_path/ggml-model-f16.gguf --mmproj model_path/mmproj-model-f16.gguf --image img_path/image.jpg -p "<|system|>\n system prompt <|user|>\n prompt <|assistant|>\n" +``` + +**note**: A lower temperature like 0.1 is recommended for better quality. add `--temp 0.1` to the command to do so. +**note**: For GPU offloading ensure to use the `-ngl` flag just like usual + +## GGUF conversion + +1. Clone a GLMV-EDGE model ([2B](https://huggingface.co/THUDM/glm-edge-v-2b) or [5B](https://huggingface.co/THUDM/glm-edge-v-5b)). For example: + +```sh +git clone https://huggingface.co/THUDM/glm-edge-v-5b or https://huggingface.co/THUDM/glm-edge-v-2b +``` + +2. Use `glmedge-surgery.py` to split the GLMV-EDGE model to LLM and multimodel projector constituents: + +```sh +python ./examples/llava/glmedge-surgery.py -m ../model_path +``` + +4. Use `glmedge-convert-image-encoder-to-gguf.py` to convert the GLMV-EDGE image encoder to GGUF: + +```sh +python ./examples/llava/glmedge-convert-image-encoder-to-gguf.py -m ../model_path --llava-projector ../model_path/glm.projector --output-dir ../model_path +``` + +5. Use `examples/convert_hf_to_gguf.py` to convert the LLM part of GLMV-EDGE to GGUF: + +```sh +python convert_hf_to_gguf.py ../model_path +``` + +Now both the LLM part and the image encoder are in the `model_path` directory. diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index 24073c5a9..7367d44cb 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -102,6 +102,7 @@ static std::string format(const char * fmt, ...) { #define KEY_HAS_VIS_ENC "clip.has_vision_encoder" #define KEY_HAS_LLAVA_PROJ "clip.has_llava_projector" #define KEY_HAS_MINICPMV_PROJ "clip.has_minicpmv_projector" +#define KEY_HAS_GLM_PROJ "clip.has_glm_projector" #define KEY_MINICPMV_VERSION "clip.minicpmv_version" #define KEY_HAS_QWEN2VL_MERGER "clip.has_qwen2vl_merger" #define KEY_USE_GELU "clip.use_gelu" @@ -160,6 +161,15 @@ static std::string format(const char * fmt, ...) { #define TN_MINICPMV_ATTN "resampler.attn.%s.%s" #define TN_MINICPMV_LN "resampler.ln_%s.%s" +#define TN_GLM_ADAPER_CONV "adapter.conv.%s" +#define TN_GLM_ADAPTER_LINEAR "adapter.linear.linear.%s" +#define TN_GLM_ADAPTER_NORM_1 "adapter.linear.norm1.%s" +#define TN_GLM_ADAPTER_D_H_2_4H "adapter.linear.dense_h_to_4h.%s" +#define TN_GLM_ADAPTER_GATE "adapter.linear.gate.%s" +#define TN_GLM_ADAPTER_D_4H_2_H "adapter.linear.dense_4h_to_h.%s" +#define TN_GLM_BOI_W "adapter.boi" +#define TN_GLM_EOI_W "adapter.eoi" + enum projector_type { PROJECTOR_TYPE_MLP, @@ -167,6 +177,7 @@ enum projector_type { PROJECTOR_TYPE_LDP, PROJECTOR_TYPE_LDPV2, PROJECTOR_TYPE_RESAMPLER, + PROJECTOR_TYPE_GLM_EDGE, PROJECTOR_TYPE_MERGER, PROJECTOR_TYPE_UNKNOWN, }; @@ -176,6 +187,7 @@ static std::map PROJECTOR_TYPE_NAMES = { { PROJECTOR_TYPE_LDP, "ldp" }, { PROJECTOR_TYPE_LDPV2, "ldpv2"}, { PROJECTOR_TYPE_RESAMPLER, "resampler"}, + { PROJECTOR_TYPE_GLM_EDGE, "adapter"}, { PROJECTOR_TYPE_MERGER, "qwen2vl_merger"}, }; @@ -500,6 +512,12 @@ struct clip_vision_model { struct ggml_tensor * mm_4_w = NULL; struct ggml_tensor * mm_4_b = NULL; + //GLMV-Edge projection + struct ggml_tensor * mm_model_adapter_conv_w; + struct ggml_tensor * mm_model_adapter_conv_b; + struct ggml_tensor * boi_w; + struct ggml_tensor * eoi_w; + // MobileVLM projection struct ggml_tensor * mm_model_mlp_1_w; struct ggml_tensor * mm_model_mlp_1_b; @@ -560,6 +578,7 @@ struct clip_ctx { bool has_vision_encoder = false; bool has_llava_projector = false; bool has_minicpmv_projector = false; + bool has_glm_projector = false; bool has_qwen2vl_merger = false; int minicpmv_version = 2; @@ -638,7 +657,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 const int batch_size = imgs->size; - if (ctx->has_llava_projector || ctx->has_minicpmv_projector) { + if (ctx->has_llava_projector || ctx->has_minicpmv_projector || ctx->has_glm_projector) { GGML_ASSERT(batch_size == 1); } @@ -734,8 +753,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 } // loop over layers - if (ctx->has_minicpmv_projector || ctx->has_qwen2vl_merger) { - // TODO: figure out why we doing thing in this way ??? + if (ctx->has_minicpmv_projector || ctx->has_glm_projector || ctx->has_qwen2vl_merger) { n_layer += 1; } for (int il = 0; il < n_layer - 1; il++) { @@ -1095,7 +1113,33 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 GGML_ASSERT(false); } } - else if (ctx->proj_type == PROJECTOR_TYPE_MERGER) { + // glm projector + else if (ctx->has_glm_projector) { + if (ctx->proj_type == PROJECTOR_TYPE_GLM_EDGE) { + size_t gridsz = (size_t)sqrt(embeddings->ne[1]); + embeddings = ggml_cont(ctx0, ggml_permute(ctx0,embeddings,1,0,2,3)); + embeddings = ggml_reshape_3d(ctx0, embeddings, gridsz, gridsz, embeddings->ne[1]); + embeddings = ggml_conv_2d(ctx0, model.mm_model_adapter_conv_w, embeddings, 2, 2, 0, 0, 1, 1); + embeddings = ggml_reshape_3d(ctx0, embeddings,embeddings->ne[0]*embeddings->ne[1] , embeddings->ne[2], batch_size); + embeddings = ggml_cont(ctx0, ggml_permute(ctx0,embeddings, 1, 0, 2, 3)); + embeddings = ggml_add(ctx0, embeddings, model.mm_model_adapter_conv_b); + //GLU + { + embeddings = ggml_mul_mat(ctx0, model.mm_model_mlp_0_w, embeddings); + embeddings = ggml_norm(ctx0, embeddings, eps); + embeddings = ggml_add(ctx0, ggml_mul(ctx0, embeddings, model.mm_model_ln_q_w), model.mm_model_ln_q_b); + embeddings = ggml_gelu_inplace(ctx0, embeddings); + struct ggml_tensor * x = embeddings; + embeddings = ggml_mul_mat(ctx0, model.mm_model_mlp_2_w, embeddings); + x = ggml_mul_mat(ctx0, model.mm_model_mlp_1_w,x); + embeddings = ggml_silu_inplace(ctx0, embeddings); + embeddings = ggml_mul(ctx0, embeddings,x); + embeddings = ggml_mul_mat(ctx0, model.mm_model_mlp_3_w, embeddings); + } + } else { + GGML_ABORT("fatel error"); + } + } else if (ctx->proj_type == PROJECTOR_TYPE_MERGER) { embeddings = ggml_reshape_3d(ctx0, embeddings, hidden_size * 4, num_positions / 4, batch_size); embeddings = ggml_mul_mat(ctx0, model.mm_0_w, embeddings); @@ -1284,6 +1328,11 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { new_clip->minicpmv_version = gguf_get_val_i32(ctx, idx); } + idx = gguf_find_key(ctx, KEY_HAS_GLM_PROJ); + if (idx != -1) { + new_clip->has_glm_projector = gguf_get_val_bool(ctx, idx); + } + idx = gguf_find_key(ctx, KEY_HAS_QWEN2VL_MERGER); if (idx != -1) { new_clip->has_qwen2vl_merger = gguf_get_val_bool(ctx, idx); @@ -1308,6 +1357,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { LOG_INF("%s: vision_encoder: %d\n", __func__, new_clip->has_vision_encoder); LOG_INF("%s: llava_projector: %d\n", __func__, new_clip->has_llava_projector); LOG_INF("%s: minicpmv_projector: %d\n", __func__, new_clip->has_minicpmv_projector); + LOG_INF("%s: glm_projector: %d\n", __func__, new_clip->has_glm_projector); LOG_INF("%s: model size: %.2f MB\n", __func__, model_size / 1024.0 / 1024.0); LOG_INF("%s: metadata size: %.2f MB\n", __func__, ggml_get_mem_size(meta) / 1024.0 / 1024.0); } @@ -1575,6 +1625,18 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { vision_model.mm_model_ln_post_w = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_LN, "post", "weight")); vision_model.mm_model_ln_post_b = get_tensor(new_clip->ctx_data, format(TN_MINICPMV_LN, "post", "bias")); } + else if (new_clip->proj_type == PROJECTOR_TYPE_GLM_EDGE) { + vision_model.mm_model_adapter_conv_w = get_tensor(new_clip->ctx_data, format(TN_GLM_ADAPER_CONV, "weight")); + vision_model.mm_model_adapter_conv_b = get_tensor(new_clip->ctx_data, format(TN_GLM_ADAPER_CONV, "bias")); + vision_model.mm_model_mlp_0_w = get_tensor(new_clip->ctx_data, format(TN_GLM_ADAPTER_LINEAR,"weight")); + vision_model.mm_model_ln_q_w = get_tensor(new_clip->ctx_data, format(TN_GLM_ADAPTER_NORM_1,"weight")); + vision_model.mm_model_ln_q_b = get_tensor(new_clip->ctx_data, format(TN_GLM_ADAPTER_NORM_1,"bias")); + vision_model.mm_model_mlp_1_w = get_tensor(new_clip->ctx_data, format(TN_GLM_ADAPTER_D_H_2_4H,"weight")); + vision_model.mm_model_mlp_2_w = get_tensor(new_clip->ctx_data, format(TN_GLM_ADAPTER_GATE,"weight")); + vision_model.mm_model_mlp_3_w = get_tensor(new_clip->ctx_data, format(TN_GLM_ADAPTER_D_4H_2_H,"weight")); + vision_model.boi_w = get_tensor(new_clip->ctx_data, TN_GLM_BOI_W); + vision_model.eoi_w = get_tensor(new_clip->ctx_data, TN_GLM_EOI_W); + } else if (new_clip->proj_type == PROJECTOR_TYPE_MERGER) { vision_model.mm_0_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 0, "weight")); vision_model.mm_0_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 0, "bias")); @@ -2115,6 +2177,20 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli return true; } + if (ctx->has_glm_projector) { + res_imgs->size = 1; + res_imgs->data = new clip_image_f32[res_imgs->size]; + clip_image_u8 resized_image; + int32_t sz=ctx->vision_model.hparams.image_size; + bicubic_resize(*img, resized_image,sz,sz); + clip_image_f32 * res = clip_image_f32_init(); + //clip_image_save_to_bmp(resized_image, "resized.bmp"); + normalize_image_u8_to_f32(&resized_image, res, ctx->image_mean, ctx->image_std); + res_imgs->data[0] = *res; + clip_image_f32_free(res); + return true; + } + bool pad_to_square = true; if (!ctx->has_vision_encoder) { LOG_ERR("This gguf file seems to have no vision encoder\n"); @@ -2300,7 +2376,8 @@ void clip_free(clip_ctx * ctx) { } size_t clip_embd_nbytes(const struct clip_ctx * ctx) { - return clip_n_patches(ctx) * clip_n_mmproj_embd(ctx) * sizeof(float); + int extra_tokens = ctx->has_glm_projector ? 2 : 0; + return (clip_n_patches(ctx) + extra_tokens) * clip_n_mmproj_embd(ctx) * sizeof(float); } size_t clip_embd_nbytes_by_img(const struct clip_ctx * ctx, int img_h, int img_w) { @@ -2342,7 +2419,7 @@ int clip_n_patches_by_img(const struct clip_ctx * ctx, struct clip_image_f32 * i int n_patches = (params.image_size / params.patch_size) * (params.image_size / params.patch_size); - if (ctx->proj_type == PROJECTOR_TYPE_LDP || ctx->proj_type == PROJECTOR_TYPE_LDPV2) { + if (ctx->proj_type == PROJECTOR_TYPE_LDP || ctx->proj_type == PROJECTOR_TYPE_LDPV2 || ctx->proj_type == PROJECTOR_TYPE_GLM_EDGE) { n_patches /= 4; } else if (ctx->proj_type == PROJECTOR_TYPE_RESAMPLER) { if (ctx->minicpmv_version == 2) { @@ -2475,6 +2552,12 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima if (ctx->has_minicpmv_projector) { GGML_ASSERT(batch_size == 1); } + if (ctx->has_glm_projector) { + GGML_ASSERT(batch_size == 1); + ggml_tensor * boi = ctx->vision_model.boi_w; + ggml_backend_tensor_get(boi,vec,0,ggml_nbytes(boi)); + vec = (float*)(vec+ggml_nelements(boi)); //offset for boi + } // build the inference graph ggml_cgraph * gf = clip_image_build_graph(ctx, imgs, ctx->load_image_size, true); @@ -2627,7 +2710,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima ggml_backend_tensor_set(positions, positions_data, 0, ggml_nbytes(positions)); free(positions_data); - { + if (!ctx->has_glm_projector) { struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches"); int* patches_data = (int*)malloc(ggml_nbytes(patches)); for (int i = 0; i < num_patches; i++) { @@ -2651,6 +2734,13 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima // copy the embeddings to the location passed by the user ggml_backend_tensor_get(embeddings, vec, 0, ggml_nbytes(embeddings)); + if (ctx->has_glm_projector) { + //eoi + ggml_tensor * eoi = ctx->vision_model.eoi_w; + int offset = ggml_nelements(embeddings); + ggml_backend_tensor_get(eoi, vec+offset, 0, ggml_nbytes(eoi)); + } + return true; } @@ -2812,6 +2902,9 @@ int clip_n_mmproj_embd(const struct clip_ctx * ctx) { return 3584; } } + if (ctx->proj_type == PROJECTOR_TYPE_GLM_EDGE){ + return ctx->vision_model.mm_model_mlp_3_w->ne[1]; + } if (ctx->proj_type == PROJECTOR_TYPE_MERGER) { return ctx->vision_model.mm_1_b->ne[0]; } @@ -2827,6 +2920,9 @@ int clip_is_minicpmv(const struct clip_ctx * ctx) { return 0; } +bool clip_is_glm(const struct clip_ctx * ctx) { + return ctx->has_glm_projector; +} bool clip_is_qwen2vl(const struct clip_ctx * ctx) { return ctx->has_qwen2vl_merger; } diff --git a/examples/llava/clip.h b/examples/llava/clip.h index 1603edd26..841b4f6f9 100644 --- a/examples/llava/clip.h +++ b/examples/llava/clip.h @@ -93,6 +93,8 @@ CLIP_API bool clip_is_qwen2vl(const struct clip_ctx * ctx); CLIP_API bool clip_encode_float_image (struct clip_ctx * ctx, int n_threads, float * img, int h, int w, float * vec); +CLIP_API bool clip_is_glm(const struct clip_ctx * ctx); + #ifdef __cplusplus } #endif diff --git a/examples/llava/glmedge-convert-image-encoder-to-gguf.py b/examples/llava/glmedge-convert-image-encoder-to-gguf.py new file mode 100644 index 000000000..848ef1cf3 --- /dev/null +++ b/examples/llava/glmedge-convert-image-encoder-to-gguf.py @@ -0,0 +1,280 @@ +import argparse +import os +import json +import re + +import torch +import numpy as np +from gguf import * + +TEXT = "clip.text" +VISION = "clip.vision" +from transformers import SiglipVisionModel, SiglipVisionConfig + +def k(raw_key: str, arch: str) -> str: + return raw_key.format(arch=arch) + + +def should_skip_tensor(name: str, has_text: bool, has_vision: bool, has_llava: bool) -> bool: + if name in ( + "logit_scale", + "text_model.embeddings.position_ids", + "vision_model.embeddings.position_ids", + ): + return True + + if name in ( + "vision_model.head.probe", + "vision_model.head.attention.in_proj_weight", + "vision_model.head.attention.in_proj_bias", + "vision_model.head.attention.out_proj.weight", + "vision_model.head.attention.out_proj.bias", + "vision_model.head.layernorm.weight", + "vision_model.head.layernorm.bias", + "vision_model.head.mlp.fc1.weight", + "vision_model.head.mlp.fc1.bias", + "vision_model.head.mlp.fc2.weight", + "vision_model.head.mlp.fc2.bias" + ): + return True + + if name.startswith("v") and not has_vision: + return True + + if name.startswith("t") and not has_text: + return True + + return False + + +def get_tensor_name(name: str) -> str: + if "projection" in name: + return name + if "mm_projector" in name: + name = name.replace("model.mm_projector", "mm") + name = re.sub(r'mm\.mlp\.mlp', 'mm.model.mlp', name, count=1) + name = re.sub(r'mm\.peg\.peg', 'mm.model.peg', name, count=1) + return name + + return name.replace("text_model", "t").replace("vision_model", "v").replace("encoder.layers", "blk").replace("embeddings.", "").replace("_proj", "").replace("self_attn.", "attn_").replace("layer_norm", "ln").replace("layernorm", "ln").replace("mlp.fc1", "ffn_down").replace("mlp.fc2", "ffn_up").replace("embedding", "embd").replace("final", "post").replace("layrnorm", "ln") + + +def bytes_to_unicode(): + """ + Returns list of utf-8 byte and a corresponding list of unicode strings. + The reversible bpe codes work on unicode strings. + This means you need a large # of unicode characters in your vocab if you want to avoid UNKs. + When you're at something like a 10B token dataset you end up needing around 5K for decent coverage. + This is a significant percentage of your normal, say, 32K bpe vocab. + To avoid that, we want lookup tables between utf-8 bytes and unicode strings. + And avoids mapping to whitespace/control characters the bpe code barfs on. + """ + bs = ( + list(range(ord("!"), ord("~") + 1)) + + list(range(ord("¡"), ord("¬") + 1)) + + list(range(ord("®"), ord("ÿ") + 1)) + ) + cs = bs[:] + n = 0 + for b in range(2**8): + if b not in bs: + bs.append(b) + cs.append(2**8 + n) + n += 1 + cs = [chr(n) for n in cs] + return dict(zip(bs, cs)) + + +ap = argparse.ArgumentParser() +ap.add_argument("-m", "--model-dir", help="Path to model directory cloned from HF Hub", required=True) +ap.add_argument("--use-f32", action="store_true", default=False, help="Use f32 instead of f16") +ap.add_argument("--text-only", action="store_true", required=False, + help="Save a text-only model. It can't be used to encode images") +ap.add_argument("--vision-only", action="store_true", required=False, + help="Save a vision-only model. It can't be used to encode texts") +ap.add_argument("--clip-model-is-vision", action="store_true", required=False, + help="The clip model is a pure vision model (ShareGPT4V vision extract for example)") +ap.add_argument("--clip-model-is-openclip", action="store_true", required=False, + help="The clip model is from openclip (for ViT-SO400M type))") +ap.add_argument("--llava-projector", help="Path to llava.projector file. If specified, save an image encoder for LLaVA models.") +ap.add_argument("--projector-type", help="Type of projector. Possible values: mlp, ldp, ldpv2", choices=["mlp", "ldp", "ldpv2","adapter"], default="adapter") +ap.add_argument("-o", "--output-dir", help="Directory to save GGUF files. Default is the original model directory", default=None) +# Example --image_mean 0.48145466 0.4578275 0.40821073 --image_std 0.26862954 0.26130258 0.27577711 +# Example --image_mean 0.5 0.5 0.5 --image_std 0.5 0.5 0.5 +default_image_mean = [0.5, 0.5, 0.5] +default_image_std = [0.5, 0.5, 0.5] +ap.add_argument('--image-mean', type=float, nargs='+', help='Mean of the images for normalization (overrides processor) ', default=None) +ap.add_argument('--image-std', type=float, nargs='+', help='Standard deviation of the images for normalization (overrides processor)', default=None) + +# with proper +args = ap.parse_args() + + +if args.text_only and args.vision_only: + print("--text-only and --image-only arguments cannot be specified at the same time.") + exit(1) + +if args.use_f32: + print("WARNING: Weights for the convolution op is always saved in f16, as the convolution op in GGML does not support 32-bit kernel weights yet.") + +# output in the same directory as the model if output_dir is None +dir_model = args.model_dir + +if args.clip_model_is_vision or not os.path.exists(dir_model + "/vocab.json") or args.clip_model_is_openclip: + vocab = None + tokens = None +else: + with open(dir_model + "/vocab.json", "r", encoding="utf-8") as f: + vocab = json.load(f) + tokens = [key for key in vocab] + +with open(dir_model + "/config.json", "r", encoding="utf-8") as f: + config = json.load(f) + if args.clip_model_is_vision: + v_hparams = config + t_hparams = None + else: + v_hparams = config["vision_config"] + t_hparams = None + +# possible data types +# ftype == 0 -> float32 +# ftype == 1 -> float16 +# +# map from ftype to string +ftype_str = ["f32", "f16"] + +ftype = 1 +if args.use_f32: + ftype = 0 + +vision_config = SiglipVisionConfig(**v_hparams) +model = SiglipVisionModel(vision_config) +model.load_state_dict(torch.load(os.path.join(dir_model, "glm.clip"))) + +fname_middle = None +has_text_encoder = False +has_vision_encoder = True +has_glm_projector = True +if args.text_only: + fname_middle = "text-" + has_vision_encoder = False +elif args.llava_projector is not None: + fname_middle = "mmproj-" + has_text_encoder = False + has_glm_projector = True +elif args.vision_only: + fname_middle = "vision-" + has_text_encoder = False +else: + fname_middle = "" + +output_dir = args.output_dir if args.output_dir is not None else dir_model +os.makedirs(output_dir, exist_ok=True) +output_prefix = os.path.basename(output_dir).replace("ggml_", "") +fname_out = os.path.join(output_dir, f"{fname_middle}model-{ftype_str[ftype]}.gguf") +fout = GGUFWriter(path=fname_out, arch="clip") + +fout.add_bool("clip.has_text_encoder", has_text_encoder) +fout.add_bool("clip.has_vision_encoder", has_vision_encoder) +fout.add_bool("clip.has_glm_projector", has_glm_projector) +fout.add_file_type(ftype) +model_name = config["_name_or_path"] if "_name_or_path" in config else os.path.basename(dir_model) +fout.add_name(model_name) +if has_glm_projector: + fout.add_description("image encoder for glm4v") + fout.add_string("clip.projector_type", "adapter") +else: + fout.add_description("two-tower CLIP model") + +if has_text_encoder: + assert t_hparams is not None + assert tokens is not None + # text_model hparams + fout.add_uint32(k(KEY_CONTEXT_LENGTH, TEXT), t_hparams["max_position_embeddings"]) + fout.add_uint32(k(KEY_EMBEDDING_LENGTH, TEXT), t_hparams["hidden_size"]) + fout.add_uint32(k(KEY_FEED_FORWARD_LENGTH, TEXT), t_hparams["intermediate_size"]) + fout.add_uint32("clip.text.projection_dim", t_hparams.get("projection_dim", config["projection_dim"])) + fout.add_uint32(k(KEY_ATTENTION_HEAD_COUNT, TEXT), t_hparams["num_attention_heads"]) + fout.add_float32(k(KEY_ATTENTION_LAYERNORM_EPS, TEXT), t_hparams["layer_norm_eps"]) + fout.add_uint32(k(KEY_BLOCK_COUNT, TEXT), t_hparams["num_hidden_layers"]) + fout.add_token_list(tokens) + +if has_vision_encoder: + # vision_model hparams + fout.add_uint32("clip.vision.image_size", v_hparams["image_size"]) + fout.add_uint32("clip.vision.patch_size", v_hparams["patch_size"]) + fout.add_uint32(k(KEY_EMBEDDING_LENGTH, VISION), v_hparams["hidden_size"]) + fout.add_uint32(k(KEY_FEED_FORWARD_LENGTH, VISION), v_hparams["intermediate_size"]) + fout.add_uint32("clip.vision.projection_dim", 0) + fout.add_uint32(k(KEY_ATTENTION_HEAD_COUNT, VISION), v_hparams["num_attention_heads"]) + fout.add_float32(k(KEY_ATTENTION_LAYERNORM_EPS, VISION), 1e-6) + fout.add_uint32(k(KEY_BLOCK_COUNT, VISION), v_hparams["num_hidden_layers"]) + + image_mean = args.image_mean if args.image_mean is not None else default_image_mean + image_std = args.image_std if args.image_std is not None else default_image_std + fout.add_array("clip.vision.image_mean", image_mean) + fout.add_array("clip.vision.image_std", image_std) + +fout.add_bool("clip.use_gelu", True) + + +if has_glm_projector: + # model.vision_model.encoder.layers.pop(-1) # pyright: ignore[reportAttributeAccessIssue] + projector = torch.load(args.llava_projector) + for name, data in projector.items(): + name = get_tensor_name(name) + # pw and dw conv ndim==4 + if data.ndim == 2 or data.ndim == 4: + data = data.squeeze().numpy().astype(np.float16) + else: + data = data.squeeze().numpy().astype(np.float32) + if name.startswith("vision."): + name=name.replace("vision.","") + fout.add_tensor(name, data) + print(f"Projector {name} - {data.dtype} - shape = {data.shape}") + # print(f"Projector {name} tensors added\n") + +state_dict = model.state_dict() # pyright: ignore[reportAttributeAccessIssue] +for name, data in state_dict.items(): + if should_skip_tensor(name, has_text_encoder, has_vision_encoder, has_glm_projector): + # we don't need this + print(f"skipping parameter: {name}") + continue + + name = get_tensor_name(name) + data = data.squeeze().numpy() + + n_dims = len(data.shape) + + # ftype == 0 -> float32, ftype == 1 -> float16 + ftype_cur = 0 + if n_dims == 4: + print(f"tensor {name} is always saved in f16") + data = data.astype(np.float16) + ftype_cur = 1 + elif ftype == 1: + if name[-7:] == ".weight" and n_dims == 2: + # print(" Converting to float16") + data = data.astype(np.float16) + ftype_cur = 1 + else: + # print(" Converting to float32") + data = data.astype(np.float32) + ftype_cur = 0 + else: + if data.dtype != np.float32: + # print(" Converting to float32") + data = data.astype(np.float32) + ftype_cur = 0 + print(f"siglip {name} - {data.dtype} - shape = {data.shape}") + # print(f"{name} - {ftype_str[ftype_cur]} - shape = {data.shape}") + fout.add_tensor(name, data) + + +fout.write_header_to_file() +fout.write_kv_data_to_file() +fout.write_tensors_to_file() +fout.close() + +print("Done. Output file: " + fname_out) diff --git a/examples/llava/glmedge-surgery.py b/examples/llava/glmedge-surgery.py new file mode 100644 index 000000000..16bb915d0 --- /dev/null +++ b/examples/llava/glmedge-surgery.py @@ -0,0 +1,33 @@ +import argparse +import os +import torch +from transformers import AutoModel + +ap = argparse.ArgumentParser() +ap.add_argument("-m", "--model", help="Path to GLM model") +args = ap.parse_args() + +# find the model part that includes the the multimodal projector weights +model = AutoModel.from_pretrained(args.model, trust_remote_code=True, local_files_only=True) +checkpoint = model.state_dict() + +# get a list of mm tensor names +mm_tensors = [k for k, v in checkpoint.items() if k.startswith("vision.adapter.")] + +# store these tensors in a new dictionary and torch.save them +projector = {name: checkpoint[name].float() for name in mm_tensors} +torch.save(projector, f"{args.model}/glm.projector") + +clip_tensors = [k for k, v in checkpoint.items() if k.startswith("vision.vit.model.vision_model.")] +if len(clip_tensors) > 0: + clip = {name.replace("vision.vit.model.", ""): checkpoint[name].float() for name in clip_tensors} + torch.save(clip, f"{args.model}/glm.clip") + + # added tokens should be removed to be able to convert Mistral models + if os.path.exists(f"{args.model}/added_tokens.json"): + with open(f"{args.model}/added_tokens.json", "w") as f: + f.write("{}\n") + +print("Done!") +print(f"Now you can convert {args.model} to a regular LLaMA GGUF file.") +print(f"Also, use {args.model}glm.projector to prepare a glm-encoder.gguf file.") diff --git a/examples/llava/llava.cpp b/examples/llava/llava.cpp index 2cac7933d..300714045 100644 --- a/examples/llava/llava.cpp +++ b/examples/llava/llava.cpp @@ -311,6 +311,20 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli img_res_v.size = 0; img_res_v.data = nullptr; } + else if (clip_is_glm(ctx_clip)){ + struct clip_image_size * load_image_size = clip_image_size_init(); + load_image_size->width = img_res_v.data[0].nx; + load_image_size->height = img_res_v.data[0].ny; + clip_add_load_image_size(ctx_clip, load_image_size); + + bool encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[0], image_embd); + int pos = int(load_image_size->width/clip_patch_size(ctx_clip)/2); + *n_img_pos = (pos * pos + 2); + if (!encoded){ + LOG_ERR("Unable to encode image \n"); + return false; + } + } else if (strcmp(mm_patch_merge_type, "spatial_unpad") != 0) { // flat / default llava-1.5 type embedding *n_img_pos = clip_n_patches(ctx_clip); @@ -395,6 +409,9 @@ bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, co if (clip_is_minicpmv(ctx_clip)) { num_max_patches = 10; } + if (clip_is_glm(ctx_clip)) { + num_max_patches = 1; + } float * image_embd; if (clip_is_qwen2vl(ctx_clip)) { // qwen2vl don't split image into chunks, so `num_max_patches` is not needed. diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 8fe84df21..ecac5b4bb 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -1357,6 +1357,9 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { MODEL_TENSOR.OUTPUT, MODEL_TENSOR.ATTN_NORM, MODEL_TENSOR.ATTN_QKV, + MODEL_TENSOR.ATTN_Q, + MODEL_TENSOR.ATTN_K, + MODEL_TENSOR.ATTN_V, MODEL_TENSOR.ATTN_OUT, MODEL_TENSOR.FFN_NORM, MODEL_TENSOR.FFN_DOWN, diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp index a7260f495..97a1e7e5e 100644 --- a/src/llama-arch.cpp +++ b/src/llama-arch.cpp @@ -1024,6 +1024,9 @@ static const std::map> LLM_TENSOR_N { LLM_TENSOR_OUTPUT, "output" }, { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" }, { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" }, + { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" }, + { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" }, + { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" }, { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" }, { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" }, { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" }, diff --git a/src/llama-chat.cpp b/src/llama-chat.cpp index 5c19bab24..028a64794 100644 --- a/src/llama-chat.cpp +++ b/src/llama-chat.cpp @@ -51,6 +51,7 @@ static const std::map LLM_CHAT_TEMPLATES = { { "llama3", LLM_CHAT_TEMPLATE_LLAMA_3 }, { "chatglm3", LLM_CHAT_TEMPLATE_CHATGML_3 }, { "chatglm4", LLM_CHAT_TEMPLATE_CHATGML_4 }, + { "glmedge", LLM_CHAT_TEMPLATE_GLMEDGE }, { "minicpm", LLM_CHAT_TEMPLATE_MINICPM }, { "exaone3", LLM_CHAT_TEMPLATE_EXAONE_3 }, { "rwkv-world", LLM_CHAT_TEMPLATE_RWKV_WORLD }, @@ -115,7 +116,7 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) { } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) { return LLM_CHAT_TEMPLATE_PHI_3; } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) { - return LLM_CHAT_TEMPLATE_FALCON_3; + return tmpl_contains("") ? LLM_CHAT_TEMPLATE_FALCON_3 : LLM_CHAT_TEMPLATE_GLMEDGE; } else if (tmpl_contains("<|user|>") && tmpl_contains("<|endoftext|>")) { return LLM_CHAT_TEMPLATE_ZEPHYR; } else if (tmpl_contains("bos_token + message['role']")) { @@ -440,6 +441,14 @@ int32_t llm_chat_apply_template( if (add_ass) { ss << "<|assistant|>"; } + } else if (tmpl == LLM_CHAT_TEMPLATE_GLMEDGE) { + for (auto message : chat) { + std::string role(message->role); + ss << "<|" << role << "|>" << "\n" << message->content; + } + if (add_ass) { + ss << "<|assistant|>"; + } } else if (tmpl == LLM_CHAT_TEMPLATE_MINICPM) { // MiniCPM-3B-OpenHermes-2.5-v2-GGUF for (auto message : chat) { diff --git a/src/llama-chat.h b/src/llama-chat.h index 3a4d07ce3..2f6a0e3e2 100644 --- a/src/llama-chat.h +++ b/src/llama-chat.h @@ -31,6 +31,7 @@ enum llm_chat_template { LLM_CHAT_TEMPLATE_LLAMA_3, LLM_CHAT_TEMPLATE_CHATGML_3, LLM_CHAT_TEMPLATE_CHATGML_4, + LLM_CHAT_TEMPLATE_GLMEDGE, LLM_CHAT_TEMPLATE_MINICPM, LLM_CHAT_TEMPLATE_EXAONE_3, LLM_CHAT_TEMPLATE_RWKV_WORLD, diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 18bd0b071..0487c978b 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -1093,8 +1093,20 @@ void llama_model::load_hparams(llama_model_loader & ml) { { ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); switch (hparams.n_layer) { - case 28: type = LLM_TYPE_6B; break; - case 40: type = LLM_TYPE_9B; break; + case 28: { + if (hparams.n_head(0) == 16) { + type = LLM_TYPE_1_5B; + } else { + type = LLM_TYPE_6B; + } + } break; + case 40: { + if (hparams.n_head(0) == 24) { + type = LLM_TYPE_4B; + } else { + type = LLM_TYPE_9B; + } + } break; default: type = LLM_TYPE_UNKNOWN; } } break; @@ -3068,9 +3080,17 @@ bool llama_model::load_tensors(llama_model_loader & ml) { auto & layer = layers[i]; layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0); + layer.wqkv = create_tensor(tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, n_embd + 2*n_embd_gqa}, llama_model_loader::TENSOR_NOT_REQUIRED); + layer.bqkv = create_tensor(tn(LLM_TENSOR_ATTN_QKV, "bias", i), {n_embd + 2*n_embd_gqa}, llama_model_loader::TENSOR_NOT_REQUIRED); - layer.wqkv = create_tensor(tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, n_embd + 2*n_embd_gqa}, 0); - layer.bqkv = create_tensor(tn(LLM_TENSOR_ATTN_QKV, "bias", i), {n_embd + 2*n_embd_gqa}, 0); + if (layer.wqkv == nullptr) { + layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd_head_k * n_head}, 0); + layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), {n_embd, n_embd_k_gqa}, 0); + layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), {n_embd, n_embd_v_gqa}, 0); + layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED); + layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), {n_embd_gqa}, llama_model_loader::TENSOR_NOT_REQUIRED); + layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), {n_embd_gqa}, llama_model_loader::TENSOR_NOT_REQUIRED); + } layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}, 0); diff --git a/src/llama.cpp b/src/llama.cpp index 192b20a27..5760017e0 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -7215,17 +7215,30 @@ struct llm_build_context { struct ggml_tensor * Qcur = nullptr; struct ggml_tensor * Kcur = nullptr; struct ggml_tensor * Vcur = nullptr; - - cur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wqkv, cur); - cb(cur, "wqkv", il); - - cur = ggml_add(ctx0, cur, model.layers[il].bqkv); - cb(cur, "bqkv", il); - - Qcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd, n_tokens, cur->nb[1], 0*sizeof(float)*(n_embd))); - Kcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd))); - Vcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd + n_embd_gqa))); - + if (model.type == LLM_TYPE_1_5B || model.type == LLM_TYPE_4B || model.type == LLM_TYPE_9B) { + Qcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wq, cur); + if (model.layers[il].bq) { + Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq); + } + Kcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wk, cur); + if (model.layers[il].bk) { + Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk); + } + Vcur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wv, cur); + if (model.layers[il].bv) { + Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv); + } + } else { + cur = llm_build_lora_mm(lctx, ctx0, model.layers[il].wqkv, cur); + cb(cur, "wqkv", il); + if (model.layers[il].bqkv) { + cur = ggml_add(ctx0, cur, model.layers[il].bqkv); + cb(cur, "bqkv", il); + } + Qcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd, n_tokens, cur->nb[1], 0*sizeof(float)*(n_embd))); + Kcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd))); + Vcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd + n_embd_gqa))); + } cb(Qcur, "Qcur", il); cb(Kcur, "Kcur", il); cb(Vcur, "Vcur", il); diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp index 4563f9dcb..e0314ae1d 100644 --- a/tests/test-chat-template.cpp +++ b/tests/test-chat-template.cpp @@ -175,6 +175,14 @@ int main(void) { /* .bos_token= */ "", /* .eos_token= */ "", }, + { + /* .name= */ "GLMEdge", + /* .template_str= */ "{% for item in messages %}{% if item['role'] == 'system' %}<|system|>\n{{ item['content'] }}{% elif item['role'] == 'user' %}<|user|>\n{{ item['content'] }}{% elif item['role'] == 'assistant' %}<|assistant|>\n{{ item['content'] }}{% endif %}{% endfor %}<|assistant|>", + /* .expected_output= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>", + /* .expected_output_jinja= */ "<|system|>\nYou are a helpful assistant<|user|>\nHello<|assistant|>\nHi there<|user|>\nWho are you<|assistant|>\n I am an assistant <|user|>\nAnother question<|assistant|>", + /* .bos_token= */ "", + /* .eos_token= */ "", + }, { /* .name= */ "MiniCPM-3B-OpenHermes-2.5-v2-GGUF", /* .template_str= */ u8"{% for message in messages %}{% if message['role'] == 'user' %}{{'<用户>' + message['content'].strip() + ''}}{% else %}{{message['content'].strip()}}{% endif %}{% endfor %}", From ff227703d6d6e1888bdc7af6138514092ffcdb96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Moskal?= Date: Sat, 1 Feb 2025 23:55:32 -0800 Subject: [PATCH 18/46] sampling : support for llguidance grammars (#10224) * initial porting of previous LLG patch * update for new APIs * build: integrate llguidance as an external project * use '%llguidance' as marker to enable llg lark syntax * add some docs * clarify docs * code style fixes * remove llguidance.h from .gitignore * fix tests when llg is enabled * pass vocab not model to llama_sampler_init_llg() * copy test-grammar-integration.cpp to test-llguidance.cpp * clang fmt * fix ref-count bug * build and run test * gbnf -> lark syntax * conditionally include llguidance test based on LLAMA_LLGUIDANCE flag * rename llguidance test file to test-grammar-llguidance.cpp * add gh action for llg test * align tests with LLG grammar syntax and JSON Schema spec * llama_tokenizer() in fact requires valid utf8 * update llg * format file * add $LLGUIDANCE_LOG_LEVEL support * fix whitespace * fix warning * include for INFINITY * add final newline * fail llama_sampler_init_llg() at runtime * Link gbnf_to_lark.py script; fix links; refer to llg docs for lexemes * simplify #includes * improve doc string for LLAMA_LLGUIDANCE * typo in merge * bump llguidance to 0.6.12 --- .github/workflows/build.yml | 30 + CMakeLists.txt | 1 + common/CMakeLists.txt | 28 + common/json-schema-to-grammar.cpp | 9 +- common/json-schema-to-grammar.h | 3 +- common/llguidance.cpp | 270 ++++++ common/sampling.cpp | 22 +- common/sampling.h | 3 + docs/llguidance.md | 51 ++ tests/CMakeLists.txt | 3 + tests/test-grammar-integration.cpp | 2 +- tests/test-grammar-llguidance.cpp | 1140 +++++++++++++++++++++++++ tests/test-json-schema-to-grammar.cpp | 2 +- 13 files changed, 1555 insertions(+), 9 deletions(-) create mode 100644 common/llguidance.cpp create mode 100644 docs/llguidance.md create mode 100644 tests/test-grammar-llguidance.cpp diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7392f2bfe..8f9c82f87 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -302,6 +302,36 @@ jobs: cd build ctest -L main --verbose --timeout 900 + ubuntu-latest-llguidance: + runs-on: ubuntu-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential + + - name: Build + id: cmake_build + run: | + mkdir build + cd build + cmake .. \ + -DLLAMA_FATAL_WARNINGS=ON \ + -DLLAMA_LLGUIDANCE=ON + cmake --build . --config Release -j $(nproc) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L main --verbose --timeout 900 + ubuntu-latest-cmake-rpc: runs-on: ubuntu-latest diff --git a/CMakeLists.txt b/CMakeLists.txt index 4c62d1788..74b48d24d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -80,6 +80,7 @@ option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE}) # 3rd party libs option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF) +option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF) # Required for relocatable CMake package include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake) diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 72f0915c1..e61015d2a 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -65,6 +65,7 @@ add_library(${TARGET} STATIC console.h json-schema-to-grammar.cpp json.hpp + llguidance.cpp log.cpp log.h minja.hpp @@ -91,6 +92,33 @@ if (LLAMA_CURL) set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARY}) endif () +if (LLAMA_LLGUIDANCE) + include(ExternalProject) + set(LLGUIDANCE_SRC ${CMAKE_BINARY_DIR}/llguidance/source) + set(LLGUIDANCE_PATH ${LLGUIDANCE_SRC}/target/release) + ExternalProject_Add(llguidance_ext + GIT_REPOSITORY https://github.com/guidance-ai/llguidance + # v0.6.12: + GIT_TAG ced1c9023d47ec194fa977932d35ce65c2ebfc09 + PREFIX ${CMAKE_BINARY_DIR}/llguidance + SOURCE_DIR ${LLGUIDANCE_SRC} + BUILD_IN_SOURCE TRUE + CONFIGURE_COMMAND "" + BUILD_COMMAND cargo build --release + INSTALL_COMMAND "" + BUILD_BYPRODUCTS ${LLGUIDANCE_PATH}/libllguidance.a ${LLGUIDANCE_PATH}/llguidance.h + UPDATE_COMMAND "" + ) + target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_LLGUIDANCE) + + add_library(llguidance STATIC IMPORTED) + set_target_properties(llguidance PROPERTIES IMPORTED_LOCATION ${LLGUIDANCE_PATH}/libllguidance.a) + add_dependencies(llguidance llguidance_ext) + + target_include_directories(${TARGET} PRIVATE ${LLGUIDANCE_PATH}) + set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance) +endif () + target_include_directories(${TARGET} PUBLIC .) target_compile_features (${TARGET} PUBLIC cxx_std_17) target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads) diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index 1f47e313e..3ebcc3d9f 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -991,7 +991,14 @@ public: } }; -std::string json_schema_to_grammar(const json & schema) { +std::string json_schema_to_grammar(const json & schema, bool force_gbnf) { +#ifdef LLAMA_USE_LLGUIDANCE + if (!force_gbnf) { + return "%llguidance {}\nstart: %json " + schema.dump(); + } +#else + (void)force_gbnf; +#endif // LLAMA_USE_LLGUIDANCE return build_grammar([&](const common_grammar_builder & callbacks) { auto copy = schema; callbacks.resolve_refs(copy); diff --git a/common/json-schema-to-grammar.h b/common/json-schema-to-grammar.h index ba4112cb9..62a3b0a44 100644 --- a/common/json-schema-to-grammar.h +++ b/common/json-schema-to-grammar.h @@ -5,7 +5,8 @@ #define JSON_ASSERT GGML_ASSERT #include "json.hpp" -std::string json_schema_to_grammar(const nlohmann::ordered_json & schema); +std::string json_schema_to_grammar(const nlohmann::ordered_json & schema, + bool force_gbnf = false); struct common_grammar_builder { std::function add_rule; diff --git a/common/llguidance.cpp b/common/llguidance.cpp new file mode 100644 index 000000000..7aa8ddd80 --- /dev/null +++ b/common/llguidance.cpp @@ -0,0 +1,270 @@ +#include "sampling.h" +#include "log.h" + +#ifdef LLAMA_USE_LLGUIDANCE + +# include "llguidance.h" +# include + +struct llama_sampler_llg { + const llama_vocab * vocab; + std::string grammar_kind; + std::string grammar_data; + LlgTokenizer * tokenizer; + LlgConstraint * grammar; + LlgMaskResult llg_res; + bool has_llg_res; +}; + +static LlgConstraint * llama_sampler_llg_new(LlgTokenizer * tokenizer, const char * grammar_kind, + const char * grammar_data) { + LlgConstraintInit cinit; + llg_constraint_init_set_defaults(&cinit, tokenizer); + const char * log_level = getenv("LLGUIDANCE_LOG_LEVEL"); + if (log_level && *log_level) { + cinit.log_stderr_level = atoi(log_level); + } + auto c = llg_new_constraint_any(&cinit, grammar_kind, grammar_data); + if (llg_get_error(c)) { + LOG_ERR("llg error: %s\n", llg_get_error(c)); + llg_free_constraint(c); + return nullptr; + } + return c; +} + +static const char * llama_sampler_llg_name(const llama_sampler * /*smpl*/) { + return "llguidance"; +} + +static void llama_sampler_llg_accept_impl(llama_sampler * smpl, llama_token token) { + auto * ctx = (llama_sampler_llg *) smpl->ctx; + if (ctx->grammar) { + LlgCommitResult res; + llg_commit_token(ctx->grammar, token, &res); + ctx->has_llg_res = false; + } +} + +static void llama_sampler_llg_apply(llama_sampler * smpl, llama_token_data_array * cur_p) { + auto * ctx = (llama_sampler_llg *) smpl->ctx; + if (ctx->grammar) { + if (!ctx->has_llg_res) { + if (llg_compute_mask(ctx->grammar, &ctx->llg_res) == 0) { + ctx->has_llg_res = true; + } else { + LOG_ERR("llg error: %s\n", llg_get_error(ctx->grammar)); + llg_free_constraint(ctx->grammar); + ctx->grammar = nullptr; + } + } + if (ctx->has_llg_res) { + if (ctx->llg_res.is_stop) { + for (size_t i = 0; i < cur_p->size; ++i) { + if (!llama_vocab_is_eog(ctx->vocab, cur_p->data[i].id)) { + cur_p->data[i].logit = -INFINITY; + } + } + } else { + const uint32_t * mask = ctx->llg_res.sample_mask; + for (size_t i = 0; i < cur_p->size; ++i) { + auto token = cur_p->data[i].id; + if ((mask[token / 32] & (1 << (token % 32))) == 0) { + cur_p->data[i].logit = -INFINITY; + } + } + } + } + } +} + +static void llama_sampler_llg_reset(llama_sampler * smpl) { + auto * ctx = (llama_sampler_llg *) smpl->ctx; + if (!ctx->grammar) { + return; + } + + auto * grammar_new = llama_sampler_llg_new(ctx->tokenizer, ctx->grammar_kind.c_str(), ctx->grammar_data.c_str()); + llg_free_constraint(ctx->grammar); + ctx->grammar = grammar_new; + ctx->has_llg_res = false; +} + +static llama_sampler * llama_sampler_llg_clone(const llama_sampler * smpl) { + const auto * ctx = (const llama_sampler_llg *) smpl->ctx; + + auto * result = llama_sampler_init_llg(ctx->vocab, nullptr, nullptr); + + // copy the state + { + auto * result_ctx = (llama_sampler_llg *) result->ctx; + + if (ctx->grammar) { + result_ctx->grammar_kind = ctx->grammar_kind; + result_ctx->grammar_data = ctx->grammar_data; + result_ctx->grammar = llg_clone_constraint(ctx->grammar); + result_ctx->tokenizer = llg_clone_tokenizer(ctx->tokenizer); + } + } + + return result; +} + +static void llama_sampler_llg_free(llama_sampler * smpl) { + const auto * ctx = (llama_sampler_llg *) smpl->ctx; + + if (ctx->grammar) { + llg_free_constraint(ctx->grammar); + llg_free_tokenizer(ctx->tokenizer); + } + + delete ctx; +} + +static llama_sampler_i llama_sampler_llg_i = { + /* .name = */ llama_sampler_llg_name, + /* .accept = */ llama_sampler_llg_accept_impl, + /* .apply = */ llama_sampler_llg_apply, + /* .reset = */ llama_sampler_llg_reset, + /* .clone = */ llama_sampler_llg_clone, + /* .free = */ llama_sampler_llg_free, +}; + +static size_t llama_sampler_llg_tokenize_fn(const void * user_data, const uint8_t * bytes, size_t bytes_len, + uint32_t * output_tokens, size_t output_tokens_len) { + const llama_vocab * vocab = (const llama_vocab *) user_data; + int r = 0; + try { + r = llama_tokenize(vocab, (const char *) bytes, bytes_len, (int32_t *) output_tokens, output_tokens_len, false, + true); + } catch (const std::exception & e) { + GGML_ABORT("llama_tokenize failed: %s\n", e.what()); + } + if (r < 0) { + return -r; + } + return r; +} + +static LlgTokenizer * llama_sampler_llg_new_tokenizer(const llama_vocab * vocab) { + // TODO store the tokenizer in the vocab somehow + static const llama_vocab * vocab_cache; + static LlgTokenizer * tokenizer_cache; + + if (vocab_cache == vocab) { + return llg_clone_tokenizer(tokenizer_cache); + } + + auto tok_eos = llama_vocab_eot(vocab); + if (tok_eos == LLAMA_TOKEN_NULL) { + tok_eos = llama_vocab_eos(vocab); + } + + size_t vocab_size = llama_vocab_n_tokens(vocab); + + auto token_lens = new uint32_t[vocab_size]; + // we typically have ~7 bytes per token; let's go on the safe side here + auto token_bytes_size = vocab_size * 16 + 1024 * 1024; + auto token_bytes = new uint8_t[token_bytes_size]; + + size_t offset = 0; + for (size_t i = 0; i < vocab_size; i++) { + size_t max_token = 1024; + if (token_bytes_size - offset < max_token) { + GGML_ABORT("token_bytes buffer too small\n"); + } + + llama_token token = i; + auto dp = (char *) token_bytes + offset; + auto size = llama_detokenize(vocab, &token, 1, dp, max_token, false, false); + if (size < 0) { + GGML_ABORT("llama_detokenize failed\n"); + } + if (size == 0) { + size = llama_detokenize(vocab, &token, 1, dp + 1, max_token - 1, false, true); + if (size < 0) { + GGML_ABORT("llama_detokenize failed\n"); + } + if (size != 0) { + *dp = '\xff'; // special token prefix marker + size += 1; + } + } + + token_lens[i] = size; + offset += size; + } + + LlgTokenizerInit tinit = { + /* .vocab_size = */ (uint32_t) vocab_size, + /* .tok_eos = */ (uint32_t) tok_eos, + /* .token_lens = */ token_lens, + /* .token_bytes = */ token_bytes, + /* .tokenizer_json = */ nullptr, + /* .tokenize_assumes_string = */ true, + /* .tokenize_fn = */ llama_sampler_llg_tokenize_fn, + /* .use_approximate_greedy_tokenize_fn = */ false, + /* .tokenize_user_data = */ vocab, + }; + + char error_buffer[1024]; + LlgTokenizer * tokenizer = llg_new_tokenizer(&tinit, error_buffer, sizeof(error_buffer)); + + delete[] token_bytes; + delete[] token_lens; + + if (tokenizer == nullptr) { + LOG_ERR("llg tokenizer error: %s\n", error_buffer); + return tokenizer; + } + + if (tokenizer_cache) { + llg_free_tokenizer(tokenizer_cache); + } + vocab_cache = vocab; + tokenizer_cache = tokenizer; + + return llg_clone_tokenizer(tokenizer_cache); +} + +llama_sampler * llama_sampler_init_llg(const llama_vocab * vocab, const char * grammar_kind, + const char * grammar_data) { + auto * ctx = new llama_sampler_llg; + + if (grammar_kind != nullptr && grammar_kind[0] != '\0') { + auto tokenizer = llama_sampler_llg_new_tokenizer(vocab); + *ctx = { + /* .vocab = */ vocab, + /* .grammar_kind = */ grammar_kind, + /* .grammar_data = */ grammar_data, + /* .tokenizer = */ tokenizer, + /* .grammar = */ llama_sampler_llg_new(tokenizer, grammar_kind, grammar_data), + /* .llg_res = */ {}, + /* .has_llg_res = */ false, + }; + } else { + *ctx = { + /* .vocab = */ vocab, + /* .grammar_kind = */ {}, + /* .grammar_data = */ {}, + /* .tokenizer = */ nullptr, + /* .grammar = */ nullptr, + /* .llg_res = */ {}, + /* .has_llg_res = */ false, + }; + } + + return new llama_sampler{ + /* .iface = */ &llama_sampler_llg_i, + /* .ctx = */ ctx, + }; +} + +#else + +llama_sampler * llama_sampler_init_llg(const llama_vocab *, const char *, const char *) { + LOG_WRN("llguidance (cmake -DLLAMA_LLGUIDANCE=ON) is not enabled"); + return nullptr; +} + +#endif // LLAMA_USE_LLGUIDANCE diff --git a/common/sampling.cpp b/common/sampling.cpp index bc7e49fdb..e4b21ca10 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -156,13 +156,25 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co for (const auto & str : params.grammar_trigger_words) { trigger_words.push_back(str.word.c_str()); } + + struct llama_sampler * grmr; + if (params.grammar.compare(0, 11, "%llguidance") == 0) { +#ifdef LLAMA_USE_LLGUIDANCE + grmr = llama_sampler_init_llg(vocab, "lark", params.grammar.c_str()); +#else + GGML_ABORT("llguidance (cmake -DLLAMA_LLGUIDANCE=ON) is not enabled"); +#endif // LLAMA_USE_LLGUIDANCE + } else { + grmr = params.grammar_lazy + ? llama_sampler_init_grammar_lazy(vocab, params.grammar.c_str(), "root", + trigger_words.data(), trigger_words.size(), + params.grammar_trigger_tokens.data(), params.grammar_trigger_tokens.size()) + : llama_sampler_init_grammar(vocab, params.grammar.c_str(), "root"); + } + auto * result = new common_sampler { /* .params = */ params, - /* .grmr = */ params.grammar_lazy - ? llama_sampler_init_grammar_lazy(vocab, params.grammar.c_str(), "root", - trigger_words.data(), trigger_words.size(), - params.grammar_trigger_tokens.data(), params.grammar_trigger_tokens.size()) - : llama_sampler_init_grammar(vocab, params.grammar.c_str(), "root"), + /* .grmr = */ grmr, /* .chain = */ llama_sampler_chain_init(lparams), /* .prev = */ ring_buffer(std::max(32, params.n_prev)), /* .cur = */ {}, diff --git a/common/sampling.h b/common/sampling.h index 348911b18..2064421db 100644 --- a/common/sampling.h +++ b/common/sampling.h @@ -102,3 +102,6 @@ std::string common_sampler_type_to_str(enum common_sampler_type cnstr); std::vector common_sampler_types_from_names(const std::vector & names, bool allow_alt_names); std::vector common_sampler_types_from_chars(const std::string & chars); + +llama_sampler * llama_sampler_init_llg(const llama_vocab * vocab, + const char * grammar_kind, const char * grammar_data); diff --git a/docs/llguidance.md b/docs/llguidance.md new file mode 100644 index 000000000..792d20704 --- /dev/null +++ b/docs/llguidance.md @@ -0,0 +1,51 @@ +# LLGuidance Support in llama.cpp + +[LLGuidance](https://github.com/guidance-ai/llguidance) is a library for constrained decoding (also called constrained sampling or structured outputs) for Large Language Models (LLMs). Initially developed as the backend for the [Guidance](https://github.com/guidance-ai/guidance) library, it can also be used independently. + +LLGuidance supports JSON Schemas and arbitrary context-free grammars (CFGs) written in a [variant](https://github.com/guidance-ai/llguidance/blob/main/docs/syntax.md) of Lark syntax. It is [very fast](https://github.com/guidance-ai/jsonschemabench/tree/main/maskbench) and has [excellent](https://github.com/guidance-ai/llguidance/blob/main/docs/json_schema.md) JSON Schema coverage but requires the Rust compiler, which complicates the llama.cpp build process. + +## Building + +To enable LLGuidance support, build llama.cpp with the `LLAMA_LLGUIDANCE` option: + +```sh +cmake -B build -DLLAMA_LLGUIDANCE=ON +make -C build -j +``` + +This requires the Rust compiler and the `cargo` tool to be [installed](https://www.rust-lang.org/tools/install). + +## Interface + +There are no new command-line arguments or modifications to `common_params`. When enabled, grammars starting with `%llguidance` are passed to LLGuidance instead of the [current](../grammars/README.md) llama.cpp grammars. Additionally, JSON Schema requests (e.g., using the `-j` argument in `llama-cli`) are also passed to LLGuidance. + +For your existing GBNF grammars, you can use [gbnf_to_lark.py script](https://github.com/guidance-ai/llguidance/blob/main/scripts/gbnf_to_lark.py) to convert them to LLGuidance Lark-like format. + +## Performance + +Computing a "token mask" (i.e., the set of allowed tokens) for a llama3 tokenizer with 128k tokens takes, on average, 50μs of single-core CPU time for the [JSON Schema Bench](https://github.com/guidance-ai/jsonschemabench). The p99 time is 0.5ms, and the p100 time is 20ms. These results are due to the lexer/parser split and several [optimizations](https://github.com/guidance-ai/llguidance/blob/main/docs/optimizations.md). + +## JSON Schema + +LLGuidance adheres closely to the JSON Schema specification. For example: + +- `additionalProperties` defaults to `true`, unlike current grammars, though you can set `"additionalProperties": false` if needed. +- any whitespace is allowed. +- The definition order in the `"properties": {}` object is maintained, regardless of whether properties are required (current grammars always puts required properties first). + +Unsupported schemas result in an error message—no keywords are silently ignored. + +## Why Not Reuse GBNF Format? + +GBNF lacks the concept of a lexer. + +Most programming languages, including JSON, use a two-step process: a lexer (built with regular expressions) converts a byte stream into lexemes, which are then processed by a CFG parser. This approach is faster because lexers are cheaper to evaluate, and there is ~10x fewer lexemes than bytes. +LLM tokens often align with lexemes, so the parser is engaged in under 0.5% of tokens, with the lexer handling the rest. + +However, the user has to provide the distinction between lexemes and CFG symbols. In [Lark](https://github.com/lark-parser/lark), lexeme names are uppercase, while CFG symbols are lowercase. +The [gbnf_to_lark.py script](https://github.com/guidance-ai/llguidance/blob/main/scripts/gbnf_to_lark.py) can often take care of this automatically. +See [LLGuidance syntax docs](https://github.com/guidance-ai/llguidance/blob/main/docs/syntax.md#terminals-vs-rules) for more details. + +## Error Handling + +Errors are currently printed to `stderr`, and generation continues. Improved error handling may be added in the future. diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 40f83ff0d..7a158d602 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -86,6 +86,9 @@ llama_test(test-tokenizer-0 NAME test-tokenizer-0-qwen2 ARGS ${CMAKE llama_test(test-tokenizer-0 NAME test-tokenizer-0-refact ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf) llama_test(test-tokenizer-0 NAME test-tokenizer-0-starcoder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf) +if (LLAMA_LLGUIDANCE) + llama_target_and_test(test-grammar-llguidance.cpp ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-bpe.gguf) +endif () if (NOT WIN32) # these tests are disabled on Windows because they use internal functions not exported with LLAMA_API diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp index 288e08f51..890608648 100644 --- a/tests/test-grammar-integration.cpp +++ b/tests/test-grammar-integration.cpp @@ -129,7 +129,7 @@ static void test_grammar(const std::string & test_desc, const std::string & gram test(test_desc + ". Grammar: " + grammar_str, grammar_str, passing_strings, failing_strings); } static void test_schema(const std::string & test_desc, const std::string & schema_str, const std::vector & passing_strings, const std::vector & failing_strings) { - test(test_desc + ". Schema: " + schema_str, json_schema_to_grammar(json::parse(schema_str)), passing_strings, failing_strings); + test(test_desc + ". Schema: " + schema_str, json_schema_to_grammar(json::parse(schema_str), true), passing_strings, failing_strings); } static void test_simple_grammar() { diff --git a/tests/test-grammar-llguidance.cpp b/tests/test-grammar-llguidance.cpp new file mode 100644 index 000000000..8b696006b --- /dev/null +++ b/tests/test-grammar-llguidance.cpp @@ -0,0 +1,1140 @@ +#ifdef NDEBUG +# undef NDEBUG +#endif + +#include "unicode.h" +#include "sampling.h" + +#include +#include +#include + +static const llama_vocab * vocab; + +static bool match_string(const std::string & input, llama_sampler * grammar) { + llama_sampler_reset(grammar); + auto tokens = common_tokenize(vocab, input, false, false); + + auto n_vocab = llama_vocab_n_tokens(vocab); + + std::vector cur; + cur.reserve(n_vocab); + for (llama_token token_id = 0; token_id < (llama_token) n_vocab; token_id++) { + cur.emplace_back(llama_token_data{ token_id, 0.0f, 0.0f }); + } + auto tok_arr = llama_token_data_array{ cur.data(), cur.size(), -1, false }; + + for (const auto token : tokens) { + for (llama_token token_id = 0; token_id < (llama_token) n_vocab; token_id++) { + cur[token_id].logit = 0.0f; + } + llama_sampler_apply(grammar, &tok_arr); + if (cur[token].logit < 0.0f) { + return false; + } + llama_sampler_accept(grammar, token); + } + + // do we allow EOS at the end? if so the grammar is accepting + + auto tok_eos = llama_vocab_eot(vocab); + if (tok_eos == LLAMA_TOKEN_NULL) { + tok_eos = llama_vocab_eos(vocab); + } + + cur[tok_eos].logit = 0.0f; + llama_sampler_apply(grammar, &tok_arr); + + return cur[tok_eos].logit >= 0.0f; +} + +static void test(const std::string & test_desc, const std::string & grammar_str, + const std::vector & passing_strings, const std::vector & failing_strings) { + fprintf(stderr, "⚫ Testing %s\n%s\n", test_desc.c_str(), grammar_str.c_str()); + fflush(stderr); + + auto * grammar = llama_sampler_init_llg(vocab, "lark", grammar_str.c_str()); + + fprintf(stderr, " 🔵 Valid strings:\n"); + + // Passing strings + for (const auto & test_string : passing_strings) { + fprintf(stderr, " \"%s\" ", test_string.c_str()); + fflush(stderr); + + bool matched = match_string(test_string, grammar); + + if (!matched) { + fprintf(stderr, "❌ (failed to match)\n"); + + // DEBUG: Write strings to files so that we can analyze more easily with gbnf-validator program to see exactly where things failed. + // DEBUG: Write the grammar_str to test-grammar-integration.grammar.gbnf + FILE * grammar_file = fopen("test-grammar-integration.grammar.gbnf", "w"); + if (grammar_file) { + fprintf(grammar_file, "%s", grammar_str.c_str()); + fclose(grammar_file); + } + + // DEBUG: Write the test string to test-grammar-integration.string.txt + FILE * string_file = fopen("test-grammar-integration.string.txt", "w"); + if (string_file) { + fprintf(string_file, "%s", test_string.c_str()); + fclose(string_file); + } + + fprintf(stderr, + "\n NOTE: Debug grammar file generated. To analyze this failure in detail, run the following " + "command: ./llama-gbnf-validator test-grammar-integration.grammar.gbnf " + "test-grammar-integration.string.txt\n\n"); + } else { + fprintf(stdout, "✅︎\n"); + } + + assert(matched); + } + + fprintf(stderr, " 🟠 Invalid strings:\n"); + + // Failing strings + for (const auto & test_string : failing_strings) { + fprintf(stderr, " \"%s\" ", test_string.c_str()); + fflush(stderr); + + bool matched = match_string(test_string, grammar); + + if (matched) { + fprintf(stderr, "❌ (incorrectly matched)\n"); + } else { + fprintf(stdout, "✅︎\n"); + } + assert(!matched); + } + + llama_sampler_free(grammar); +} + +static void test_grammar(const std::string & test_desc, const std::string & grammar_str, + const std::vector & passing_strings, + const std::vector & failing_strings) { + test(test_desc + ". Grammar: " + grammar_str, grammar_str, passing_strings, failing_strings); +} + +static void test_schema(const std::string & test_desc, const std::string & schema_str, + const std::vector & passing_strings, + const std::vector & failing_strings) { + test(test_desc + ". Schema: " + schema_str, "%llguidance {}\nstart: %json " + schema_str, passing_strings, + failing_strings); +} + +static void test_simple_grammar() { + test_schema("min 0", + R"""({ + "type": "integer", + "minimum": 0 + })""", + // Passing strings + { + "0", + "10", + "12", + "10000", + }, + // Failing strings + { + "-1", + "-10", + "-10000", + "-100000000000000000000000000000000", + // "100000000000000000000000000000000", + "00", + "01", + "-0", + }); + test_schema("min 2", + // Schema + R"""({ + "type": "integer", + "minimum": 2 + })""", + // Passing strings + { + "2", + "3", + "4", + "10", + "20", + "1234567890000000", + }, + // Failing strings + { + "0", "1", "-1", "-100", "0", "1", "01", "02", + // "12345678900000000", + }); + test_schema("min 456", + R"""({ + "type": "integer", + "minimum": 456 + })""", + // Passing strings + { + "456", + "4560", + "457", + "460", + "500", + }, + // Failing strings + { + "455", + "356", + "50", + "050", + "-1", + "-456", + }); + test_schema("min -123", + R"""({ + "type": "integer", + "minimum": -123 + })""", + // Passing strings + { + "-123", + "-122", + "-11", + "-1", + "0", + "1", + "123", + "1234", + "2345", + }, + // Failing strings + { + "-1234", + "-124", + }); + + test_schema("max 9999", + // Schema + R"""({ + "type": "integer", + "maximum": 9999 + })""", + // Passing strings + { + "-99999", + "0", + "9999", + }, + // Failing strings + { + "10000", + "99991", + }); + test_schema("max -9999", + // Schema + R"""({ + "type": "integer", + "maximum": -9999 + })""", + // Passing strings + { + "-10000", + "-9999", + }, + // Failing strings + { + "-9998", + "0", + "9999", + }); + test_schema("min 5 max 30", + // Schema + R"""({ + "type": "integer", + "minimum": 5, + "maximum": 30 + })""", + // Passing strings + { + "5", + "10", + "30", + }, + // Failing strings + { + "05", + "4", + "-1", + "31", + "123", + "0123", + }); + test_schema("min -1 max 1", + R"""({ + "type": "integer", + "minimum": -1, + "maximum": 1 + })""", + // Passing strings + { + "-1", + "0", + "1", + }, + // Failing strings + { + "-11", + "-10", + "-2", + "2", + "10", + "11", + }); + test_schema("min -123 max 42", + R"""({ + "type": "integer", + "minimum": -123, + "maximum": 42 + })""", + // Passing strings + { + "-123", + "-122", + "-13", + "-11", + "-2", + "-1", + "0", + "1", + "5", + "10", + "39", + "40", + "42", + }, + // Failing strings + { + "-0123", + "-124", + "-1123", + "-200", + "43", + "123", + "0123", + }); + test_schema("exclusive min / max", + // Schema + R"""({ + "type": "integer", + "exclusiveMinimum": 0, + "exclusiveMaximum": 10000 + })""", + // Passing strings + { + "1", + "9999", + }, + // Failing strings + { + "0", + "01", + "10000", + "99999", + }); + + // Test case for a simple grammar + test_grammar("simple grammar", + R"""( + start: expr + expr: term ("+" term)* + term: number + number: /[0-9]+/ )""", + // Passing strings + { + "42", + "1+2+3+4+5", + "123+456", + }, + // Failing strings + { + "+", + "/ 3", + "1+2+3+4+5+", + "12a45", + }); +} + +static void test_complex_grammar() { + // Test case for a more complex grammar, with both failure strings and success strings + test_grammar("medium complexity grammar", + // Grammar + R"""( + start: expression + expression: term ws (("+"|"-") ws term)* + term: factor ws (("*"|"/") ws factor)* + factor: number | variable | "(" expression ")" | function-call + number: /[0-9]+/ + variable: /[a-zA-Z_][a-zA-Z0-9_]*/ + function-call: variable ws "(" (expression ("," ws expression)*)? ")" + ws: /[ \t\n\r]?/ )""", + // Passing strings + { "42", + "1*2*3*4*5", + "x", + "x+10", + "x1+y2", + "(a+b)*(c-d)", + "func()", + "func(x,y+2)", + "a*(b+c)-d/e", + "f(g(x),h(y,z))", + "x + 10", + "x1 + y2", + "(a + b) * (c - d)", + "func()", + "func(x, y + 2)", + "a * (b + c) - d / e", + "f(g(x), h(y, z))", + "123+456", + "123*456*789-123/456+789*123", + "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456" }, + // Failing strings + { + "+", + "/ 3x", + "x + + y", + "a * / b", + "func(,)", + "func(x y)", + "(a + b", + "x + y)", + "a + b * (c - d", + "42 +", + "x +", + "x + 10 +", + "(a + b) * (c - d", + "func(", + "func(x, y + 2", + "a * (b + c) - d /", + "f(g(x), h(y, z)", + "123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/", + }); +} + +static void test_special_chars() { + // A collection of tests to exercise special characters such as "." + test_grammar("special characters", + // Grammar + R"""( + start: /.../ "abc" /.../ + )""", + // Passing strings + { "abcabcabc", "aaaabcccc", + // NOTE: Also ensures that multi-byte characters still count as a single character + "🔵🟠✅abc❌🟠🔵" }, + // Failing strings + { "aaabcccc", "aaaaabcccc", "aaaabccc", "aaaabccccc", "🔵🟠✅❌abc❌✅🟠🔵", "🔵🟠abc🟠🔵" }); +} + +static void test_quantifiers() { + // A collection of tests to exercise * + and ? quantifiers + + test_grammar( + "* quantifier", + // Grammar + R"""(start: "a"*)""", + // Passing strings + { "", "a", "aaaaa", "aaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" }, + // Failing strings + { "b", "ab", "aab", "ba", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab" }); + test_grammar( + "+ quantifier", + // Grammar + R"""(start: "a"+)""", + // Passing strings + { "a", "aaaaa", "aaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" }, + // Failing strings + { "", "b", "ab", "aab", "ba", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab" }); + test_grammar("? quantifier", + // Grammar + R"""(start: "a"?)""", + // Passing strings + { "", "a" }, + // Failing strings + { + "b", + "ab", + "aa", + "ba", + }); + test_grammar("mixed quantifiers", + // Grammar + R"""( + start: cons+ vowel* cons? (vowel cons)* + vowel: /[aeiouy]/ + cons: /[bcdfghjklmnpqrstvwxyz]/ + )""", + // Passing strings + { + "yes", + "no", + "noyes", + "crwth", + "four", + "bryyyy", + }, + // Failing strings + { + "yess", + "yesno", + "forty", + "catyyy", + }); + test_grammar("simple exact repetition", + // Grammar + R"""( + start: /[ab]{4}/ + )""", + // Passing strings + { + "aaaa", + "bbbb", + "abab", + }, + // Failing strings + { + "a", + "b", + "aaaaa", + }); + test_grammar("simple min repetition", + // Grammar + R"""( + start: /[ab]{4,}/ + )""", + // Passing strings + { + "aaaa", + "aaaaab", + "bbbb", + "ababab", + }, + // Failing strings + { + "", + "aba", + }); + test_grammar("simple max repetition", + // Grammar + R"""( + start: /[ab]{0,4}/ + )""", + // Passing strings + { + "", + "a", + "aa", + "aaa", + "aaab", + }, + // Failing strings + { + "aaaaa", + }); + // test_grammar("min / max repetition", + // // Grammar + // R"""( + // start: ("0x" /[A-F0-9]{2}/ " "?){3,5} + // )""", + // // Passing strings + // { + // "0xFF 0x12 0xAB", + // "0xFF 0x12 0xAB 0x00 0x00", + // }, + // // Failing strings + // { + // "", + // "0xFF", + // "0xFF 0x12", + // "0xFF 0x12 0xAB 0x00 0x00 0x00", + // }); +} + +static void test_json_schema() { + // Note that this is similar to the regular grammar tests, + // but we convert each json schema to a grammar before parsing. + // Otherwise, this test structure is the same. + + test_schema("empty schema (object)", + // Schema + R"""( + {"type":"object"} + )""", + // Passing strings + { + R"""({})""", + R"""({"foo": "bar"})""", + }, + // Failing strings + { + "", + "[]", + "null", + R"""("")""", + "true", + }); + + test_schema( + "exotic formats (list)", + // Schema + R"""({ + "items": [ + { "format": "date" }, + { "format": "uuid" }, + { "format": "time" }, + { "format": "date-time" } + ] + })""", + // Passing strings + { + // "{}", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it? + // "[]", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it? + R"""(["2012-04-23", "12345678-1234-1234-1234-1234567890ab", "18:25:43.511Z", "2012-04-23T18:25:43.511Z"])""", + //R"""(["2012-04-23","12345678-1234-1234-1234-1234567890ab"])""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it? + //R"""({"foo": "bar"})""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it? + }, + // Failing strings + { + R"""(["foo", "bar"])""", + R"""(["12345678-1234-1234-1234-1234567890ab"])""", + }); + + test_schema("string", + // Schema + R"""({ + "type": "string" + })""", + // Passing strings + { + R"""("foo")""", + R"""("bar")""", + R"""("")""", + }, + // Failing strings + { + R"""({})""", + R"""("foo": "bar")""", + }); + + test_schema("string w/ min length 1", + // Schema + R"""({ + "type": "string", + "minLength": 1 + })""", + // Passing strings + { + R"""("foo")""", + R"""("bar")""", + }, + // Failing strings + { + R"""("")""", + R"""({})""", + R"""("foo": "bar")""", + }); + + test_schema("string w/ min length 3", + // Schema + R"""({ + "type": "string", + "minLength": 3 + })""", + // Passing strings + { + R"""("foo")""", + R"""("bar")""", + R"""("foobar")""", + }, + // Failing strings + { + R"""("")""", + R"""("f")""", + R"""("fo")""", + }); + + test_schema("string w/ max length", + // Schema + R"""({ + "type": "string", + "maxLength": 3 + })""", + // Passing strings + { + R"""("foo")""", + R"""("bar")""", + R"""("")""", + R"""("f")""", + R"""("fo")""", + }, + // Failing strings + { + R"""("foobar")""", + }); + + test_schema("string w/ min & max length", + // Schema + R"""({ + "type": "string", + "minLength": 1, + "maxLength": 4 + })""", + // Passing strings + { + R"""("foo")""", + R"""("bar")""", + R"""("f")""", + R"""("barf")""", + }, + // Failing strings + { + R"""("")""", + R"""("barfo")""", + R"""("foobar")""", + }); + + test_schema("boolean", + // Schema + R"""({ + "type": "boolean" + })""", + // Passing strings + { + "true", + "false", + }, + // Failing strings + { + R"""("")""", + R"""("true")""", + R"""(True)""", + R"""(FALSE)""", + }); + + test_schema("integer", + // Schema + R"""({ + "type": "integer" + })""", + // Passing strings + { + R"""(0)""", + R"""(12345)""", + R"""(1234567890123456)""", + }, + // Failing strings + { + R"""()""", + R"""(01)""", + R"""(007)""", + R"""(12345678901234567 )""", + }); + + test_schema("string const", + // Schema + R"""({ + "const": "foo" + })""", + // Passing strings + { + R"""("foo")""", + }, + // Failing strings + { + R"""(foo)""", + R"""("bar")""", + }); + + test_schema("non-string const", + // Schema + R"""({ + "const": true + })""", + // Passing strings + { + R"""(true)""", + }, + // Failing strings + { + R"""()""", + R"""(foo)""", + R"""("true")""", + }); + + test_schema("non-string const", + // Schema + R"""({ + "enum": ["red", "amber", "green", null, 42, ["foo"]] + })""", + // Passing strings + { + R"""("red")""", + R"""(null)""", + R"""(42)""", + R"""(["foo"])""", + }, + // Failing strings + { + R"""()""", + R"""(420)""", + R"""(true)""", + R"""(foo)""", + }); + + test_schema("simple pattern", + // Schema + R"""({ + "pattern": "^[a-zA-Z0-9_-]*$" + })""", + // Passing strings + { + R"""("")""", + R"""("He_llo-12")""", + }, + // Failing strings + { + R"""("!")""", + R"""("Hello World")""", + }); + + test_schema("pattern with escapes", + // Schema + R"""({ + "pattern": "^a\\^\\$\\.\\[\\]\\(\\)\\|\\{\\}\\*\\+\\?b$" + })""", + // Passing strings + { + R"""("a^$.[]()|{}*+?b")""", + }, + // Failing strings + { + R"""("ab")""", + }); + + test_schema("", + // Schema + R"""( + { + "type": ["array", "null"], + "items": { "type": "string" } + } + )""", + // Passing strings + { + "null", + "[]", + "[\"123\"]", + "[\"foo\", \"bar\"]", + }, + // Failing strings + { + "", + "[123]", + "\"foo\"", + "[\"foo\", 42]", + }); + + test_schema("min+max items", + // Schema + R"""({ + "items": { + "type": ["number", "integer"] + }, + "minItems": 3, + "maxItems": 5 + })""", + // Passing strings + { + R"""([1, 2, 3])""", + R"""([1, 2, 3, 4])""", + R"""([1, 2, 3, 4, 5])""", + // this is in fact correct; keyword do not apply if the type is wrong + R"""(1)""", + }, + // Failing strings + { + R"""([1, 2])""", + R"""([1, 2, 3, 4, 5, 6])""", + }); + + // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties) + test_schema("object properties", + // Schema + R"""({ + "type": "object", + "properties": { + "number": { "type": "number" }, + "street_name": { "type": "string" }, + "street_type": { "enum": ["Street", "Avenue", "Boulevard"] } + }, + "additionalProperties": false + })""", + // Passing strings + { + R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""", + // "By default, leaving out properties is valid" + R"""({ "street_name": "Pennsylvania" })""", + R"""({ "number": 1600, "street_name": "Pennsylvania" })""", + // "By extension, even an empty object is valid" + R"""({})""", + R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""", + }, + // Failing strings + { + // Change datatype from number to string + R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""", + // Reorder properties + R"""({ "street_name": "Pennsylvania", "number": 1600 })""", + // Reorder properties + R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""", + // Additional properties set to false + R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""", + + }); + + test_schema("additional properties can't override other properties", + R"""({ + "properties": { + "a": {"type": "integer"}, + "b": {"type": "integer"} + }, + "additionalProperties": true + })""", + // Passing strings + { + R"""({"a": 42})""", + R"""({"c": ""})""", + R"""({"a": 42, "c": ""})""", + R"""({"a_": ""})""", + }, + // Failing strings + { + R"""()""", + R"""({"a": ""})""", + R"""({"a": "", "b": ""})""", + }); + + // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties) + test_schema("object properties, additionalProperties: true", + // Schema + R"""({ + "type": "object", + "properties": { + "number": { "type": "number" }, + "street_name": { "type": "string" }, + "street_type": { "enum": ["Street", "Avenue", "Boulevard"] } + }, + "additionalProperties": true + })""", + // Passing strings + { + // "By extension, even an empty object is valid" + R"""({})""", + R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""", + // "By default, leaving out properties is valid" + R"""({ "street_name": "Pennsylvania" })""", + R"""({ "number": 1600, "street_name": "Pennsylvania" })""", + // "By default, providing additional properties is valid" + R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""", + R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""", + }, + // Failing strings + { + // Change datatype from number to string + R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""", + // Reorder properties + R"""({ "street_name": "Pennsylvania", "number": 1600, "street_type":"Avenue"})""", + }); + + // Additional properties: false + test_schema( + "required + optional props each in original order", + // Schema + R"""({ + "type": "object", + "properties": { + "number": { "type": "number" }, + "street_name": { "type": "string" }, + "street_type": { "enum": ["Street", "Avenue", "Boulevard"] } + }, + "additionalProperties": false + })""", + // Passing strings + { + R"""({ "street_name": "Pennsylvania" })""", + R"""({ "number": 1600, "street_type":"Avenue"})""", + R"""({ "number": 1600, "street_name": "Pennsylvania" })""", + R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""", + // Spaces are permitted around enum values + R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""", + }, + // Failing strings + { + // Reorder properties + R"""({ "street_type": "Avenue", "number": 1600 })""", + // Add "direction" + R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue", "direction": "NW" })""", + }); + + test_schema("required + optional props each in original order", + // Schema + R"""({ + "properties": { + "b": {"type": "string"}, + "a": {"type": "string"}, + "d": {"type": "string"}, + "c": {"type": "string"} + }, + "required": ["a", "b"], + "additionalProperties": false + })""", + // Passing strings + { + R"""({"b": "foo", "a": "bar"})""", + R"""({"b":"foo","a":"bar","d":"qux"})""", + R"""({"b":"foo", "a":"bar", "d":"qux", "c":"baz"})""", + }, + // Failing strings + { + R"""({"a": "foo", "b": "bar"})""", + R"""({"b": "bar"})""", + R"""({"a": "foo", "c": "baz"})""", + R"""({"a":"foo", "b":"bar", "c":"baz", "d":"qux"})""", + }); + + // NOTE: Example from https://json-schema.org/learn/getting-started-step-by-step#define-required-properties + test_schema( + "required props", + // Schema + R"""({ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.com/product.schema.json", + "title": "Product", + "description": "A product from Acme's catalog", + "type": "object", + "properties": { + "productId": { + "description": "The unique identifier for a product", + "type": "integer" + }, + "productName": { + "description": "Name of the product", + "type": "string" + }, + "price": { + "description": "The price of the product", + "type": "number", + "exclusiveMinimum": 0 + }, + "tags": { + "description": "Tags for the product", + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "DISABLED_uniqueItems": true + }, + "dimensions": { + "type": "object", + "properties": { + "length": { + "type": "number" + }, + "width": { + "type": "number" + }, + "height": { + "type": "number" + } + }, + "required": [ "length", "width", "height" ] + } + }, + "required": [ "productId", "productName", "price" ] + })""", + // Passing strings + { + R"""({"productId": 1, "productName": "A green door", "price": 12.50})""", + R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"]})""", + R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"], "dimensions": {"length": 785, "width": 250.5, "height": -0.359}})""", + }, + // Failing strings + { + R"""({})""", // Missing all required properties + R"""({"productName": "A green door", "price": 12.50, "productId": 1})""", // Out of order properties + // `exclusiveMinimum` is OK for llg + R"""({"productId": 1, "productName": "A green door", "price": -12.50})""", + R"""({"productId": 1, "productName": "A green door"})""", // Missing required property (price) + R"""({"productName": "A green door", "price": 12.50})""", // Missing required property (productId) + R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": []})""", // tags is empty, but minItems is 1 + R"""({"productId": 1, "productName": "A green door", "price": 12.50, "dimensions": {"length": 785, "width": 250.5, "height": -0.359}, "tags": ["home", "green"]})""", // Tags and dimensions are out of order + // TODO: The following line should fail, but currently it passes. `uniqueItems` is not supported, as it would likely be too difficult to implement. + // R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green", "home"]})""", + }); +} + +int main(int argc, const char ** argv) { + fprintf(stdout, "Running llguidance integration tests...\n"); + + if (argc != 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + const char * vocab_file = argv[1]; + + fprintf(stderr, "reading vocab from: '%s'\n", vocab_file); + + llama_model * model; + llama_context * ctx; + + llama_backend_init(); + + // load the vocab + { + auto mparams = llama_model_default_params(); + + mparams.vocab_only = true; + + model = llama_model_load_from_file(vocab_file, mparams); + + if (model == NULL) { + fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, vocab_file); + return 1; + } + + // needed? + auto cparams = llama_context_default_params(); + + ctx = llama_init_from_model(model, cparams); + + if (ctx == NULL) { + fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, vocab_file); + llama_model_free(model); + return 1; + } + } + + vocab = llama_model_get_vocab(model); + + test_simple_grammar(); + test_complex_grammar(); + test_special_chars(); + test_quantifiers(); + test_json_schema(); + fprintf(stdout, "All tests passed.\n"); + return 0; +} diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp index 9d2db91f5..f38994c92 100755 --- a/tests/test-json-schema-to-grammar.cpp +++ b/tests/test-json-schema-to-grammar.cpp @@ -1246,7 +1246,7 @@ int main() { test_all("C++", [](const TestCase & tc) { try { - tc.verify(json_schema_to_grammar(nlohmann::ordered_json::parse(tc.schema))); + tc.verify(json_schema_to_grammar(nlohmann::ordered_json::parse(tc.schema), true)); tc.verify_status(SUCCESS); } catch (const std::runtime_error & ex) { fprintf(stderr, "Error: %s\n", ex.what()); From 69804487e0b10f2c5c06316f0ac0eb6ada68433f Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Sun, 2 Feb 2025 09:10:15 +0000 Subject: [PATCH 19/46] Fix exotic ci env that lacks ostringstream::str (#11581) --- common/minja.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/minja.hpp b/common/minja.hpp index bcb5a0824..e77eb69d5 100644 --- a/common/minja.hpp +++ b/common/minja.hpp @@ -824,7 +824,7 @@ public: LoopControlType control_type; LoopControlException(const std::string & message, LoopControlType control_type) : std::runtime_error(message), control_type(control_type) {} LoopControlException(LoopControlType control_type) - : std::runtime_error((std::ostringstream() << (control_type == LoopControlType::Continue ? "continue" : "break") << " outside of a loop").str()), + : std::runtime_error((control_type == LoopControlType::Continue ? "continue" : "break") + std::string(" outside of a loop")), control_type(control_type) {} }; From bfcce4d693617ec843d0b2510f6ee16e6bc6720d Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Sun, 2 Feb 2025 09:25:38 +0000 Subject: [PATCH 20/46] `tool-call`: support Command R7B (+ return tool_plan "thoughts" in API) (#11585) * `tool-call`: support Command R7B (w/ tool_plan return) * `tool-call`: cleaner preservation of tokens + warn when likely bad chat template override * `tool-call`: test cleanup / handle lazy grammar triggers --- common/chat.cpp | 86 +++++++++- common/chat.hpp | 2 + common/common.h | 3 + examples/server/README.md | 22 ++- examples/server/server.cpp | 52 ++++-- examples/server/utils.hpp | 1 + ...AI-c4ai-command-r7b-12-2024-tool_use.jinja | 156 ++++++++++++++++++ tests/test-chat.cpp | 154 +++++++++++++---- 8 files changed, 420 insertions(+), 56 deletions(-) create mode 100644 models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja diff --git a/common/chat.cpp b/common/chat.cpp index 58db12af9..f87583d85 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -16,6 +16,7 @@ std::string common_chat_format_name(common_chat_format format) { case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2"; case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1"; case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro"; + case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B"; default: throw std::runtime_error("Unknown chat format"); } @@ -317,6 +318,79 @@ static common_chat_msg common_chat_parse_mistral_nemo(const std::string & input) return parse_prefixed_json_tool_call_array(input, "[TOOL_CALLS]"); } +static common_chat_params common_chat_params_init_command_r7b(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { + common_chat_params data; + data.grammar_lazy = inputs.tool_choice != "required"; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + auto schemas = json::array(); + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool["function"]; + schemas.push_back({ + {"type", "object"}, + {"properties", { + {"tool_call_id", { + {"type", "string"}, + // Command-R's template expects an integer string. + {"pattern", "^[0-9]{1,10}$"}, + }}, + {"tool_name", { + {"type", "string"}, + {"const", function["name"]}, + }}, + {"parameters", function["parameters"]}, + }}, + {"required", json::array({"tool_call_id", "tool_name", "parameters"})}, + }); + }); + auto schema = json { + {"type", "array"}, + {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}}, + {"minItems", 1}, + }; + if (!inputs.parallel_tool_calls) { + schema["maxItems"] = 1; + } + builder.add_rule("root", "\"<|START_ACTION|>\" " + builder.add_schema("tool_calls", schema) + " \"<|END_ACTION|>\""); + }, grammar_options); + data.grammar_triggers.push_back({"<|START_ACTION|>", /* .at_start = */ false}); + data.preserved_tokens = { + "<|START_RESPONSE|>", + "<|END_RESPONSE|>", + "<|START_THINKING|>", + "<|END_THINKING|>", + "<|END_ACTION|>", + }; + data.prompt = tmpl.apply(inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); + data.format = COMMON_CHAT_FORMAT_COMMAND_R7B; + return data; +} +static common_chat_msg common_chat_parse_command_r7b(const std::string & input) { + static std::regex response_regex("<\\|START_RESPONSE\\|>(.*?)<\\|END_RESPONSE\\|>"); + static std::regex thought_action_regex("<\\|START_THINKING\\|>([\\s\\S\\n\\r]*?)<\\|END_THINKING\\|><\\|START_ACTION\\|>([\\s\\S\\n\\r]*?)<\\|END_ACTION\\|>"); + std::smatch match; + + common_chat_msg result; + result.role = "assistant"; + if (std::regex_match(input, match, response_regex)) { + result.content = match[1].str(); + } else if (std::regex_match(input, match, thought_action_regex)) { + result.tool_plan = match[1].str(); + auto actions_str = match[2].str(); + auto actions = json::parse(actions_str); + for (const auto & action : actions) { + result.tool_calls.push_back({ + /* .name = */ action["tool_name"], + /* .arguments = */ action["parameters"].dump(), + /* .id = */ action["tool_call_id"], + }); + } + } else { + LOG_ERR("Failed to parse command_r output"); + result.content = input; + } + return result; +} + static void expect_tool_parameters(const std::string & name, const json & parameters, const std::vector & expected_properties) { if (!parameters.is_object() || !parameters.contains("type") || parameters["type"] != "object" || !parameters.contains("properties") || !parameters.contains("required")) { throw std::runtime_error("Parameters of tool " + name + " must be an object w/ required properties"); @@ -462,6 +536,10 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_ "\"<|tool▁call▁begin|>function<|tool▁sep|>" + name + "\\n```json\\n\" " + args_rule + " \"```<|tool▁call▁end|>\"")); }); data.grammar_triggers.push_back({"<|tool▁calls▁begin|>", /* .at_start = */ false}); + data.preserved_tokens = { + "<|tool▁sep|>", + "<|tool▁call▁end|>", + }; builder.add_rule("root", "\"<|tool▁calls▁begin|>\" (" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " space"); }, grammar_options); data.prompt = tmpl.apply(inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); @@ -704,8 +782,7 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat auto tool_call = "\"\" space " + builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " \"\" space"; builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call); data.grammar_triggers.push_back({"", /* .at_start = */ false}); - // Not really a trigger but need to print this special token to get a successful parse. - data.grammar_triggers.push_back({"", /* .at_start = */ false}); + data.preserved_tokens = { "
" }; }, grammar_options); data.prompt = tmpl.apply(inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); @@ -822,6 +899,9 @@ common_chat_params common_chat_params_init(const common_chat_template & tmpl, co if (src.find("[TOOL_CALLS]") != std::string::npos) { return common_chat_params_init_mistral_nemo(tmpl, inputs); } + if (src.find("<|END_THINKING|><|START_ACTION|>") != std::string::npos) { + return common_chat_params_init_command_r7b(tmpl, inputs); + } return common_chat_params_init_generic(tmpl, inputs); } @@ -855,6 +935,8 @@ common_chat_msg common_chat_parse(const std::string & input, common_chat_format return common_chat_parse_hermes_2_pro(input); case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return common_chat_parse_firefunction_v2(input); + case COMMON_CHAT_FORMAT_COMMAND_R7B: + return common_chat_parse_command_r7b(input); default: throw std::runtime_error("Unsupported format: " + common_chat_format_name(format)); } diff --git a/common/chat.hpp b/common/chat.hpp index ca165aa13..33e64a430 100644 --- a/common/chat.hpp +++ b/common/chat.hpp @@ -32,6 +32,7 @@ enum common_chat_format { COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1, COMMON_CHAT_FORMAT_HERMES_2_PRO, + COMMON_CHAT_FORMAT_COMMAND_R7B, COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats }; @@ -42,6 +43,7 @@ struct common_chat_params { std::string grammar; bool grammar_lazy = false; std::vector grammar_triggers; + std::vector preserved_tokens; std::vector additional_stops; }; diff --git a/common/common.h b/common/common.h index 6c1809277..b208d0c7e 100644 --- a/common/common.h +++ b/common/common.h @@ -4,6 +4,7 @@ #include "llama-cpp.h" +#include #include #include #include @@ -163,6 +164,7 @@ struct common_params_sampling { bool grammar_lazy = false; std::vector grammar_trigger_words; // optional trigger words to trigger lazy grammar std::vector grammar_trigger_tokens; // optional trigger tokens to trigger lazy grammar and print trigger special tokens. + std::set preserved_tokens; std::vector logit_bias; // logit biases to apply @@ -621,6 +623,7 @@ struct common_chat_msg { std::string role; std::string content; std::vector tool_calls; + std::string tool_plan = ""; }; // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid diff --git a/examples/server/README.md b/examples/server/README.md index 276b43013..e9d0374ad 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -1128,6 +1128,7 @@ curl http://localhost:8080/v1/chat/completions \ - Hermes 2/3, Qwen 2.5 - Mistral Nemo - Firefunction v2 + - Command R7B - DeepSeek R1 (WIP / seems reluctant to call any tools?)
@@ -1202,21 +1203,28 @@ curl http://localhost:8080/v1/chat/completions \ ```shell # Native support: llama-server --jinja -fa -hf bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M - llama-server --jinja -fa -hf bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M - llama-server --jinja -fa -hf bartowski/Llama-3.2-3B-Instruct-GGUF:Q6_K + llama-server --jinja -fa -hf bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q6_K_L llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M - llama-server --jinja -fa -hf bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M \ - --chat-template-file <( python scripts/get_chat_template.py NousResearch/Hermes-2-Pro-Llama-3-8B ) + llama-server --jinja -fa -hf bartowski/Llama-3.3-70B-Instruct-GGUF:Q4_K_M # Native support requires the right template for these GGUFs: + + llama-server --jinja -fa -hf bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M \ + --chat-template-file <( python scripts/get_chat_template.py NousResearch/Hermes-2-Pro-Llama-3-8B tool_use ) + llama-server --jinja -fa -hf bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M \ --chat-template-file <( python scripts/get_chat_template.py NousResearch/Hermes-3-Llama-3.1-8B tool_use ) + llama-server --jinja -fa -hf bartowski/firefunction-v2-GGUF -hff firefunction-v2-IQ1_M.gguf \ - --chat-template-file <( python scripts/get_chat_template.py fireworks-ai/firellama-3-firefunction-v2 ) + --chat-template-file <( python scripts/get_chat_template.py fireworks-ai/llama-3-firefunction-v2 tool_use ) + + llama-server --jinja -fa -hf bartowski/c4ai-command-r7b-12-2024-GGUF:Q6_K_L \ + --chat-template-file <( python scripts/get_chat_template.py CohereForAI/c4ai-command-r7b-12-2024 tool_use ) # Generic format support - llama-server --jinja -fa -hf bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M - llama-server --jinja -fa -hf bartowski/gemma-2-2b-it-GGUF:Q4_K_M + llama-server --jinja -fa -hf bartowski/phi-4-GGUF:Q4_0 + llama-server --jinja -fa -hf bartowski/gemma-2-2b-it-GGUF:Q8_0 + llama-server --jinja -fa -hf bartowski/c4ai-command-r-v01-GGUF:Q2_K ``` - Test in CLI: diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 3451e96a2..e0acc4705 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -131,6 +131,11 @@ struct slot_params { lora.push_back({{"id", i}, {"scale", this->lora[i].scale}}); } + std::vector grammar_trigger_words; + for (const auto & trigger : sampling.grammar_trigger_words) { + grammar_trigger_words.push_back(trigger.word); + } + return json { {"n_predict", n_predict}, // Server configured n_predict {"seed", sampling.seed}, @@ -165,8 +170,9 @@ struct slot_params { {"n_probs", sampling.n_probs}, {"min_keep", sampling.min_keep}, {"grammar", sampling.grammar}, - // {"grammar_trigger_words", sampling.grammar_trigger_words}, + {"grammar_trigger_words", grammar_trigger_words}, {"grammar_trigger_tokens", sampling.grammar_trigger_tokens}, + {"preserved_tokens", sampling.preserved_tokens}, {"samplers", samplers}, {"speculative.n_max", speculative.n_max}, {"speculative.n_min", speculative.n_min}, @@ -363,12 +369,26 @@ struct server_task { if (ids.size() == 1) { LOG_DBG("Grammar trigger token: %d (`%s`)\n", ids[0], trigger.word.c_str()); params.sampling.grammar_trigger_tokens.push_back(ids[0]); + params.sampling.preserved_tokens.insert(ids[0]); continue; } LOG_DBG("Grammar trigger word: `%s`\n", trigger.word.c_str()); params.sampling.grammar_trigger_words.push_back(trigger); } } + const auto preserved_tokens = data.find("preserved_tokens"); + if (preserved_tokens != data.end()) { + for (const auto & t : *preserved_tokens) { + auto ids = common_tokenize(vocab, t.get(), /* add_special= */ false, /* parse_special= */ true); + if (ids.size() == 1) { + LOG_DBG("Preserved token: %d\n", ids[0]); + params.sampling.preserved_tokens.insert(ids[0]); + } else { + // This may happen when using a tool call style meant for a model with special tokens to preserve on a model without said tokens. + LOG_WRN("Not preserved because more than 1 token (wrong chat template override?): %s\n", t.get().c_str()); + } + } + } if (params.sampling.grammar_lazy) { GGML_ASSERT(params.sampling.grammar_trigger_tokens.size() > 0 || params.sampling.grammar_trigger_words.size() > 0); } @@ -695,19 +715,19 @@ struct server_task_result_cmpl_final : server_task_result { json to_json_oaicompat_chat() { std::string finish_reason = "length"; - common_chat_msg message; + common_chat_msg msg; if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) { LOG_DBG("Parsing chat message: %s\n", content.c_str()); - message = common_chat_parse(content, oaicompat_chat_format); - finish_reason = message.tool_calls.empty() ? "stop" : "tool_calls"; + msg = common_chat_parse(content, oaicompat_chat_format); + finish_reason = msg.tool_calls.empty() ? "stop" : "tool_calls"; } else { - message.content = content; + msg.content = content; } json tool_calls; - if (!message.tool_calls.empty()) { + if (!msg.tool_calls.empty()) { tool_calls = json::array(); - for (const auto & tc : message.tool_calls) { + for (const auto & tc : msg.tool_calls) { tool_calls.push_back({ {"type", "function"}, {"function", { @@ -719,14 +739,19 @@ struct server_task_result_cmpl_final : server_task_result { } } + json message { + {"content", msg.content}, + {"tool_calls", tool_calls}, + {"role", "assistant"}, + }; + if (!msg.tool_plan.empty()) { + message["tool_plan"] = msg.tool_plan; + } + json choice { {"finish_reason", finish_reason}, {"index", 0}, - {"message", json { - {"content", message.content}, - {"tool_calls", tool_calls}, - {"role", "assistant"}, - }}, + {"message", message}, }; if (!stream && probs_output.size() > 0) { @@ -2833,8 +2858,7 @@ struct server_context { server_slot * slot_batched = nullptr; auto accept_special_token = [&](server_slot & slot, llama_token token) { - const auto & trigger_tokens = slot.params.sampling.grammar_trigger_tokens; - return params_base.special || std::find(trigger_tokens.begin(), trigger_tokens.end(), token) != trigger_tokens.end(); + return params_base.special || slot.params.sampling.preserved_tokens.find(token) != slot.params.sampling.preserved_tokens.end(); }; // frist, add sampled tokens from any ongoing sequences diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index bfe623c4c..fefdce55b 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -662,6 +662,7 @@ static json oaicompat_completion_params_parse( }); } llama_params["grammar_triggers"] = grammar_triggers; + llama_params["preserved_tokens"] = chat_params.preserved_tokens; for (const auto & stop : chat_params.additional_stops) { llama_params["stop"].push_back(stop); } diff --git a/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja b/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja new file mode 100644 index 000000000..078e9f545 --- /dev/null +++ b/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja @@ -0,0 +1,156 @@ +{{ bos_token }}{%- macro document_turn(documents) -%} +{# format documents into chat turn #} +<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|><|START_THINKING|>I will look through the document to address the users needs.<|END_THINKING|><|START_ACTION|>[ + {"tool_call_id": "0", "tool_name": "direct-injected-document", "parameters": {}} +]<|END_ACTION|><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|><|START_TOOL_RESULT|>[ + { + "tool_call_id": "0", + "results": { +{% for doc in documents %} + "{{ loop.index0 }}": {{doc|tojson}}{% if not loop.last %}, + {% endif %} +{% endfor %} + + }, + "is_error": null + } +]<|END_TOOL_RESULT|><|END_OF_TURN_TOKEN|>{%- endmacro %} +{%- macro tool_call_id_to_int(messages, tool_call_id) %} +{%- set counter = namespace(value=0) %} +{%- set tool_call_id_seen = namespace(value=false) %} +{%- for msg in messages %} + {%- if msg.tool_calls %} + {%- for tool_call in msg.tool_calls %} + {%- if tool_call.id == tool_call_id and not tool_call_id_seen.value -%} + {{ counter.value }} + {%- set tool_call_id_seen.value = true %} + {%- endif %} + {%- set counter.value = counter.value + 1 %} + {%- endfor %} + {%- endif %} +{%- endfor %} +{%- endmacro %} +{%- macro format_tool_message(messages, tool_msg) -%} +{# format tool message #} + { + "tool_call_id": "{{ tool_call_id_to_int(messages, tool_msg.tool_call_id) }}", + "results": { + "0": {{ tool_msg.content|tojson }} + }, + "is_error": null + } +{%- endmacro -%} +{%- if messages and messages[0]['role']|lower == 'system' %}{%- set developer_preamble = messages[0]['content'] %}{% endif %} +{%- set tool_idx = namespace(value=0) %} +{%- set tool_ids_seen = namespace(value=[]) %} +{%- set sent_documents = namespace(value=false) %} +<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble +You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes. + +Your information cutoff date is June 2024. + +You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew but have the ability to speak many more languages. +{% if tools or documents %} + +You have been trained to have advanced reasoning and tool-use capabilities and you should make best use of these skills to serve user's requests. + +## Tool Use +Think about how you can make best use of the provided tools to help with the task and come up with a high level plan that you will execute first. + +0. Start by writing <|START_THINKING|> followed by a detailed step by step plan of how you will solve the problem. For each step explain your thinking fully and give details of required tool calls (if needed). Unless specified otherwise, you write your plan in natural language. When you finish, close it out with <|END_THINKING|>. + You can optionally choose to skip this step when the user request is so straightforward to address that only a trivial plan would be needed. + NOTE: You MUST skip this step when you are directly responding to the user's request without using any tools. + +Then carry out your plan by repeatedly executing the following steps. +1. Action: write <|START_ACTION|> followed by a list of JSON-formatted tool calls, with each one containing "tool_name" and "parameters" fields. + When there are multiple tool calls which are completely independent of each other (i.e. they can be executed in parallel), you should list them out all together in one step. When you finish, close it out with <|END_ACTION|>. +2. Observation: you will then receive results of those tool calls in JSON format in the very next turn, wrapped around by <|START_TOOL_RESULT|> and <|END_TOOL_RESULT|>. Carefully observe those results and think about what to do next. Note that these results will be provided to you in a separate turn. NEVER hallucinate results. + Every tool call produces a list of results (when a tool call produces no result or a single result, it'll still get wrapped inside a list). Each result is clearly linked to its originating tool call via its "tool_call_id". +3. Reflection: start the next turn by writing <|START_THINKING|> followed by what you've figured out so far, any changes you need to make to your plan, and what you will do next. When you finish, close it out with <|END_THINKING|>. + You can optionally choose to skip this step when everything is going according to plan and no special pieces of information or reasoning chains need to be recorded. + NOTE: You MUST skip this step when you are done with tool-use actions and are ready to respond to the user. + +You can repeat the above 3 steps multiple times (could be 0 times too if no suitable tool calls are available or needed), until you decide it's time to finally respond to the user. + +4. Response: then break out of the loop and write <|START_RESPONSE|> followed by a piece of text which serves as a response to the user's last request. Use all previous tool calls and results to help you when formulating your response. When you finish, close it out with <|END_RESPONSE|>. +{% if enable_citations %} + +## Grounding +Importantly, note that "Reflection" and "Response" above can be grounded. +Grounding means you associate pieces of texts (called "spans") with those specific tool results that support them (called "sources"). And you use a pair of tags "" and "" to indicate when a span can be grounded onto a list of sources, listing them out in the closing tag. Sources from the same tool call are grouped together and listed as "{tool_call_id}:[{list of result indices}]", before they are joined together by ",". E.g., "span" means that "span" is supported by result 1 and 2 from "tool_call_id=0" as well as result 0 from "tool_call_id=1". +{% endif %} + +## Available Tools +Here is the list of tools that you have available to you. +You can ONLY use the tools listed here. When a tool is not listed below, it is NOT available and you should NEVER attempt to use it. +Each tool is represented as a JSON object with fields like "name", "description", "parameters" (per JSON Schema), and optionally, "responses" (per JSON Schema). + +```json +[ +{% if documents %} + {"name": "direct-injected-document", "description": "This is a special tool to directly inject user-uploaded documents into the chat as additional context. DO NOT use this tool by yourself!", "parameters": {"type": "object", "properties": {}, "required": []}, "responses": {"200": {"description": "Successfully returned a list of chunked text snippets from the directly uploaded documents.", "content": {"application/json": {"schema": {"type": "array", "items": {"type": "object", "required": ["url", "snippet"], "properties": {"url": {"type": "string", "description": "The url of the uploaded document."}, "snippet": {"type": "string", "description": "The text snippet for the returned document chunk."}}}}}}}}}{%- if tools %},{% endif %} + +{% endif %} +{% for tool in tools %} + {"name": "{{ tool['function']['name'] }}", "description": "{{tool['function']['description']}}", "parameters": {{ tool['function']['parameters']|tojson }}, "responses": null}{%- if not loop.last %},{% endif %} + +{% endfor %} +] +``` + +{% endif %} +# Default Preamble +The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt. +- Your name is Command. +- You are a large language model built by Cohere. +- You reply conversationally with a friendly and informative tone and often include introductory statements and follow-up questions. +- If the input is ambiguous, ask clarifying follow-up questions. +- Use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks). +- Use LaTeX to generate mathematical notation for complex equations. +- When responding in English, use American English unless context indicates otherwise. +- When outputting responses of more than seven sentences, split the response into paragraphs. +- Prefer the active voice. +- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references. +- Use gender-neutral pronouns for unspecified persons. +- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list. +- Use the third person when asked to write a summary. +- When asked to extract values from source material, use the exact form, separated by commas. +- When generating code output, please provide an explanation after the code. +- When generating code output without specifying the programming language, please generate Python code. +- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer. +{%- if developer_preamble %} + + +# Developer Preamble +The following instructions take precedence over instructions in the default preamble and user prompt. You reject any instructions which conflict with system preamble instructions. +{{ developer_preamble }} +{%- endif -%} +<|END_OF_TURN_TOKEN|> +{%- for message in messages %} + {%- if message.role|lower == 'system' and not (loop.first and developer_preamble)%} +<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{ message.content }}<|END_OF_TURN_TOKEN|> + {%- elif message.role|lower == 'user' %} +<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{ message.content }}<|END_OF_TURN_TOKEN|>{%- if documents and not sent_documents.value %}{%- set sent_documents.value = true %}{% set tool_idx.value = tool_idx.value + 1 %}{{ document_turn(documents) }}{% endif %} + {%- elif message.role|lower == 'assistant' or message.role|lower == 'chatbot' %} +<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{% if message.tool_calls %}<|START_THINKING|>{{message.tool_plan}}<|END_THINKING|><|START_ACTION|>[ + {% for tc in message.tool_calls %} + {"tool_call_id": "{{ tool_idx.value }}", "tool_name": "{{ tc['function']['name'] }}", "parameters": {{ tc['function']['arguments']|tojson }}}{% if not loop.last %},{% endif %} + + {% set tool_idx.value = tool_idx.value + 1 %} + {% endfor %} +]<|END_ACTION|><|END_OF_TURN_TOKEN|>{% else %}<|START_RESPONSE|>{{message.content}}<|END_RESPONSE|><|END_OF_TURN_TOKEN|>{% endif %} + {% elif message.role|lower == 'tool' and message.tool_call_id not in tool_ids_seen.value %} +<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|><|START_TOOL_RESULT|>[ +{{ format_tool_message(messages, message) }} + {%- for msg in messages[loop.index0 + 1:] %} + {%- if msg.role|lower == 'tool' %}, +{{ format_tool_message(messages, msg) }} + {%- set tool_ids_seen.value = tool_ids_seen.value + [msg.tool_call_id] %} + {%- else %} + {%- break %} + {%- endif %} + {%- endfor %} + +]<|END_TOOL_RESULT|><|END_OF_TURN_TOKEN|> + {%- endif %} +{%- endfor %}<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|> \ No newline at end of file diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index ccc65d87a..9956c1f1f 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -22,9 +22,13 @@ static common_chat_msg msg_from_json(const json & message) { "assistant", "", {}, + /* .tool_plan = */ "", }; if (message.contains("content") && !message.at("content").is_null()) { - ret.content = message.at("content").get(); + ret.content = message.at("content"); + } + if (message.contains("tool_plan")) { + ret.tool_plan = message.at("tool_plan"); } auto has_tool_calls = message.contains("tool_calls"); if (has_tool_calls) { @@ -171,8 +175,7 @@ const json llama_3_1_tools = { special_function_tool, code_interpreter_too struct delta_data { std::string delta; - std::string grammar; - common_chat_format format; + common_chat_params params; }; static delta_data init_delta(const common_chat_template & tmpl, const std::vector & end_tokens, @@ -214,7 +217,7 @@ static delta_data init_delta(const common_chat_template & tmpl, const std::vecto break; } } - return { delta, params_full.grammar, params_full.format }; + return { delta, params_full }; } /* @@ -224,7 +227,7 @@ static delta_data init_delta(const common_chat_template & tmpl, const std::vecto */ static void test_template(const common_chat_template & tmpl, const std::vector & end_tokens, const json & test_message, const json & tools = {}, const std::string & expected_delta = "", - bool skip_grammar_test = false, bool skip_parser_test = false) { + bool expect_grammar_triggered = true) { common_chat_msg expected_msg = msg_from_json(test_message); auto user_message = json{ @@ -238,45 +241,110 @@ static void test_template(const common_chat_template & tmpl, const std::vector 0 && trigger.at_start) { + fprintf(stderr, "Trigger %s not at start of message, skipping:\n\n%s\n\n", trigger.word.c_str(), constrained.c_str()); + continue; + } + if (earliest_trigger_pos == std::string::npos || pos < earliest_trigger_pos) { + earliest_trigger_pos = pos; + } + } + auto grammar_triggered = false; + if (earliest_trigger_pos != std::string::npos) { + constrained = constrained.substr(earliest_trigger_pos); + grammar_triggered = true; + } + if (data.params.grammar_lazy) { + assert_equals(expect_grammar_triggered, grammar_triggered); + } + + if (grammar_triggered && !match_string(constrained, grammar.get())) { + throw std::runtime_error("Failed to match delta against grammar:\n\n" + data.delta + + "\n\nGrammar: " + data.params.grammar); } } } } static void test_template_output_parsers() { - auto text_message = json{ + json text_message { { "role", "assistant" }, { "content", "Hello, world!" }, }; - auto tool_call_message = json{ + json tool_calls = json::array({{ + { "type", "function" }, + { "function", { { "name", "special_function" }, { "arguments", "{\"arg1\": 1}" } } }, + }}); + + json tool_call_message { + { "role", "assistant"}, + { "content", {}}, + { "tool_calls", { + { + { "type", "function" }, + { "function", { + { "name", "special_function" }, + { "arguments", "{\"arg1\": 1}" }, + }}, + }, + }}, + }; + json tool_call_message_with_id { + { "role", "assistant"}, + { "content", {}}, + { "tool_calls", { + { + { "type", "function" }, + { "function", { + { "name", "special_function" }, + { "arguments", "{\"arg1\": 1}" }, + }}, + {"id", "123456789"}, + }, + }}, { "role", "assistant" }, { "content", {} }, - { "tool_calls", json{ { - { "type", "function" }, - { "function", { { "name", "special_function" }, { "arguments", "{\"arg1\": 1}" } } }, - } } } + { "tool_calls", tool_calls } + }; + json tool_call_plan_message_with_idx { + { "role", "assistant"}, + { "content", {}}, + { "tool_plan", "I'm not so sure"}, + { "tool_calls", { + { + { "type", "function" }, + { "function", { + { "name", "special_function" }, + { "arguments", "{\"arg1\": 1}" }, + }}, + // Index of the tool call in the tool_calls array + {"id", "0"}, + }, + }}, + { "role", "assistant" }, + { "content", {} }, + { "tool_calls", tool_calls } }; - auto tool_call_message_with_id = json::parse(tool_call_message.dump()); - tool_call_message_with_id["tool_calls"][0]["id"] = "123456789"; auto python_tool_call_message = json{ { "role", "assistant" }, @@ -322,6 +390,27 @@ static void test_template_output_parsers() { inputs_tools_builtin.tools = json::array(); inputs_tools_builtin.tools.push_back(python_tool); + { + // Not supported yet + const common_chat_template tmpl(read_file("models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja"), "", ""); + assert_equals(COMMON_CHAT_FORMAT_GENERIC, common_chat_params_init(tmpl, inputs_tools).format); + } + { + const common_chat_template tmpl(read_file("models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja"), "", ""); + std::vector end_tokens{ "<|END_OF_TURN_TOKEN|>" }; + + assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_params_init(tmpl, inputs_no_tools).format); + assert_equals(COMMON_CHAT_FORMAT_COMMAND_R7B, common_chat_params_init(tmpl, inputs_tools).format); + + test_template(tmpl, end_tokens, tool_call_plan_message_with_idx, tools, + "<|START_THINKING|>I'm not so sure<|END_THINKING|>" + "<|START_ACTION|>[\n" + " {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n" + "]<|END_ACTION|>"); + test_template(tmpl, end_tokens, text_message, tools, + "<|START_RESPONSE|>Hello, world!<|END_RESPONSE|>", + /* expect_grammar_triggered= */ false); + } { const common_chat_template tmpl(read_file("models/templates/google-gemma-2-2b-it.jinja"), "", ""); std::vector end_tokens{ "" }; @@ -362,11 +451,10 @@ static void test_template_output_parsers() { assert_equals(COMMON_CHAT_FORMAT_MISTRAL_NEMO, common_chat_params_init(tmpl, inputs_tools).format); - test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* skip_grammar_test= */ true); + test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* expect_grammar_triggered= */ false); test_template( tmpl, end_tokens, tool_call_message_with_id, tools, - "[TOOL_CALLS][{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, \"id\": \"123456789\"}]", - /* skip_grammar_test= */ true); + "[TOOL_CALLS][{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, \"id\": \"123456789\"}]"); } { const common_chat_template tmpl( @@ -388,7 +476,7 @@ static void test_template_output_parsers() { inputs_tools) .format); - test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* skip_grammar_test= */ true); + test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* expect_grammar_triggered= */ false); test_template(tmpl, end_tokens, tool_call_message, tools, "\n" "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" @@ -413,7 +501,7 @@ static void test_template_output_parsers() { inputs_tools_builtin) .format); - // test_template(tmpl, end_tokens, text_message, tools, R"(?)", /* skip_grammar_test= */ true); + // test_template(tmpl, end_tokens, text_message, tools, R"(?)", /* expect_grammar_triggered= */ false); test_template(tmpl, end_tokens, code_interpreter_tool_call_message, llama_3_1_tools, "<|python_tag|>code_interpreter.call(code=\"print('hey')\")"); test_template(tmpl, end_tokens, python_tool_call_message, tools, @@ -428,7 +516,7 @@ static void test_template_output_parsers() { assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_params_init(tmpl, inputs_tools).format); - test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* skip_grammar_test= */ true); + test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* expect_grammar_triggered= */ false); test_template(tmpl, end_tokens, tool_call_message, tools, "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}"); } @@ -440,7 +528,7 @@ static void test_template_output_parsers() { assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1, common_chat_params_init(tmpl, inputs_tools).format); - test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* skip_grammar_test= */ true); + test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* expect_grammar_triggered= */ false); test_template(tmpl, end_tokens, tool_call_message, tools, "{\"arg1\": 1}"); } @@ -455,7 +543,7 @@ static void test_template_output_parsers() { test_template(tmpl, end_tokens, text_message, {}, "all\n" "Hello, world!", - /* skip_grammar_test= */ true); + /* expect_grammar_triggered= */ false); test_template(tmpl, end_tokens, tool_call_message, tools, "special_function\n" "{\"arg1\": 1}"); @@ -467,7 +555,7 @@ static void test_template_output_parsers() { assert_equals(COMMON_CHAT_FORMAT_FIREFUNCTION_V2, common_chat_params_init(tmpl, inputs_tools).format); - test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* skip_grammar_test= */ true); + test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* expect_grammar_triggered= */ false); test_template(tmpl, end_tokens, tool_call_message, tools, " functools[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]"); } @@ -478,7 +566,7 @@ static void test_template_output_parsers() { assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_params_init(tmpl, inputs_tools).format); - test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* skip_grammar_test= */ true); + test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* expect_grammar_triggered= */ false); test_template(tmpl, end_tokens, tool_call_message, tools, "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n" "```json\n" From 84ec8a58f7b6aad6887bbfbd1321f3ff417341a5 Mon Sep 17 00:00:00 2001 From: Eric Curtin Date: Sun, 2 Feb 2025 16:14:48 +0100 Subject: [PATCH 21/46] Name colors (#11573) It's more descriptive, use #define's so we can use compile-time concatenations. Signed-off-by: Eric Curtin --- common/log.cpp | 10 ---------- common/log.h | 10 ++++++++++ examples/run/run.cpp | 15 ++++++++------- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/common/log.cpp b/common/log.cpp index 0b8994ae1..4bfbecf15 100644 --- a/common/log.cpp +++ b/common/log.cpp @@ -14,16 +14,6 @@ void common_log_set_verbosity_thold(int verbosity) { common_log_verbosity_thold = verbosity; } -#define LOG_COL_DEFAULT "\033[0m" -#define LOG_COL_BOLD "\033[1m" -#define LOG_COL_RED "\033[31m" -#define LOG_COL_GREEN "\033[32m" -#define LOG_COL_YELLOW "\033[33m" -#define LOG_COL_BLUE "\033[34m" -#define LOG_COL_MAGENTA "\033[35m" -#define LOG_COL_CYAN "\033[36m" -#define LOG_COL_WHITE "\033[37m" - static int64_t t_us() { return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); } diff --git a/common/log.h b/common/log.h index 66605cc69..85dd4393b 100644 --- a/common/log.h +++ b/common/log.h @@ -2,6 +2,16 @@ #include "ggml.h" // for ggml_log_level +#define LOG_COL_DEFAULT "\033[0m" +#define LOG_COL_BOLD "\033[1m" +#define LOG_COL_RED "\033[31m" +#define LOG_COL_GREEN "\033[32m" +#define LOG_COL_YELLOW "\033[33m" +#define LOG_COL_BLUE "\033[34m" +#define LOG_COL_MAGENTA "\033[35m" +#define LOG_COL_CYAN "\033[36m" +#define LOG_COL_WHITE "\033[37m" + #ifndef __GNUC__ # define LOG_ATTRIBUTE_FORMAT(...) #elif defined(__MINGW32__) diff --git a/examples/run/run.cpp b/examples/run/run.cpp index cf61f4add..ca9273155 100644 --- a/examples/run/run.cpp +++ b/examples/run/run.cpp @@ -24,15 +24,16 @@ #include #include +#include "chat-template.hpp" #include "common.h" #include "json.hpp" #include "linenoise.cpp/linenoise.h" #include "llama-cpp.h" -#include "chat-template.hpp" +#include "log.h" #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || defined(_WIN32) [[noreturn]] static void sigint_handler(int) { - printf("\n\033[0m"); + printf("\n" LOG_COL_DEFAULT); exit(0); // not ideal, but it's the only way to guarantee exit in all cases } #endif @@ -890,7 +891,7 @@ static int check_context_size(const llama_context_ptr & ctx, const llama_batch & const int n_ctx = llama_n_ctx(ctx.get()); const int n_ctx_used = llama_get_kv_cache_used_cells(ctx.get()); if (n_ctx_used + batch.n_tokens > n_ctx) { - printf("\033[0m\n"); + printf(LOG_COL_DEFAULT "\n"); printe("context size exceeded\n"); return 1; } @@ -953,7 +954,7 @@ static int generate(LlamaData & llama_data, const std::string & prompt, std::str batch = llama_batch_get_one(&new_token_id, 1); } - printf("\033[0m"); + printf(LOG_COL_DEFAULT); return 0; } @@ -962,7 +963,7 @@ static int read_user_input(std::string & user_input) { #ifdef WIN32 printf( "\r%*s" - "\r\033[0m%s", + "\r" LOG_COL_DEFAULT "%s", get_terminal_width(), " ", prompt_prefix); std::getline(std::cin, user_input); @@ -999,7 +1000,7 @@ static int generate_response(LlamaData & llama_data, const std::string & prompt, const bool stdout_a_terminal) { // Set response color if (stdout_a_terminal) { - printf("\033[33m"); + printf(LOG_COL_YELLOW); } if (generate(llama_data, prompt, response)) { @@ -1008,7 +1009,7 @@ static int generate_response(LlamaData & llama_data, const std::string & prompt, } // End response with color reset and newline - printf("\n%s", stdout_a_terminal ? "\033[0m" : ""); + printf("\n%s", stdout_a_terminal ? LOG_COL_DEFAULT : ""); return 0; } From 864a0b67a6c8f648c43ce8271f9cb2e12dd5df6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Sun, 2 Feb 2025 19:31:09 +0100 Subject: [PATCH 22/46] CUDA: use mma PTX instructions for FlashAttention (#11583) * CUDA: use mma PTX instructions for FlashAttention * __shfl_sync workaround for movmatrix * add __shfl_sync to HIP Co-authored-by: Diego Devesa --- Makefile | 2 +- ggml/include/ggml.h | 2 +- ggml/src/ggml-cuda/CMakeLists.txt | 2 +- ggml/src/ggml-cuda/common.cuh | 6 +- ggml/src/ggml-cuda/fattn-common.cuh | 181 ++++- ggml/src/ggml-cuda/fattn-mma-f16.cuh | 637 +++++++++++++++++ ggml/src/ggml-cuda/fattn-tile-f16.cu | 24 +- ggml/src/ggml-cuda/fattn-tile-f32.cu | 19 +- ggml/src/ggml-cuda/fattn-vec-f16.cuh | 9 +- ggml/src/ggml-cuda/fattn-vec-f32.cuh | 8 +- ggml/src/ggml-cuda/fattn-wmma-f16.cu | 648 ++++++++++++++++++ ggml/src/ggml-cuda/fattn-wmma-f16.cuh | 542 +-------------- ggml/src/ggml-cuda/fattn.cu | 174 ++--- ggml/src/ggml-cuda/mma.cuh | 335 +++++++-- ggml/src/ggml-cuda/mmq.cu | 2 +- ggml/src/ggml-cuda/mmq.cuh | 349 +++++----- .../fattn-mma-f16-instance-cpb16.cu | 10 + .../fattn-mma-f16-instance-cpb32.cu | 10 + .../fattn-mma-f16-instance-cpb64.cu | 10 + .../fattn-mma-f16-instance-cpb8.cu | 10 + .../fattn-wmma-f16-instance-kqfloat-cpb16.cu | 10 - .../fattn-wmma-f16-instance-kqfloat-cpb32.cu | 9 - .../fattn-wmma-f16-instance-kqhalf-cpb16.cu | 10 - .../fattn-wmma-f16-instance-kqhalf-cpb32.cu | 10 - .../fattn-wmma-f16-instance-kqhalf-cpb8.cu | 8 - .../template-instances/generate_cu_files.py | 24 +- ggml/src/ggml-cuda/vendors/hip.h | 1 + ggml/src/ggml-hip/CMakeLists.txt | 2 +- ggml/src/ggml-musa/CMakeLists.txt | 2 +- 29 files changed, 2058 insertions(+), 998 deletions(-) create mode 100644 ggml/src/ggml-cuda/fattn-mma-f16.cuh create mode 100644 ggml/src/ggml-cuda/fattn-wmma-f16.cu create mode 100644 ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb16.cu create mode 100644 ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb32.cu create mode 100644 ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb64.cu create mode 100644 ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb8.cu delete mode 100644 ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu delete mode 100644 ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu delete mode 100644 ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu delete mode 100644 ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu delete mode 100644 ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu diff --git a/Makefile b/Makefile index ef152d246..dc3de3cb1 100644 --- a/Makefile +++ b/Makefile @@ -596,7 +596,7 @@ ifdef GGML_RPC OBJ_GGML_EXT += ggml/src/ggml-rpc.o endif # GGML_RPC -OBJ_CUDA_TMPL = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu)) +OBJ_CUDA_TMPL = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-mma*.cu)) OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/mmq*.cu)) ifdef GGML_CUDA_FA_ALL_QUANTS diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index 1198dc1fd..5bd8d9c8b 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -1775,7 +1775,7 @@ extern "C" { struct ggml_tensor * a, int k); -#define GGML_KQ_MASK_PAD 32 +#define GGML_KQ_MASK_PAD 64 // q: [n_embd, n_batch, n_head, 1] // k: [n_embd, n_kv, n_head_kv, 1] diff --git a/ggml/src/ggml-cuda/CMakeLists.txt b/ggml/src/ggml-cuda/CMakeLists.txt index 14761650f..119fd39b8 100644 --- a/ggml/src/ggml-cuda/CMakeLists.txt +++ b/ggml/src/ggml-cuda/CMakeLists.txt @@ -28,7 +28,7 @@ if (CUDAToolkit_FOUND) list(APPEND GGML_HEADERS_CUDA "../../include/ggml-cuda.h") file(GLOB GGML_SOURCES_CUDA "*.cu") - file(GLOB SRCS "template-instances/fattn-wmma*.cu") + file(GLOB SRCS "template-instances/fattn-mma*.cu") list(APPEND GGML_SOURCES_CUDA ${SRCS}) file(GLOB SRCS "template-instances/mmq*.cu") list(APPEND GGML_SOURCES_CUDA ${SRCS}) diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh index 8d8d3932e..88be8fc8a 100644 --- a/ggml/src/ggml-cuda/common.cuh +++ b/ggml/src/ggml-cuda/common.cuh @@ -148,7 +148,7 @@ typedef float2 dfloat2; #endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= GGML_CUDA_CC_VOLTA #if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= GGML_CUDA_CC_TURING -#define INT8_MMA_AVAILABLE +#define NEW_MMA_AVAILABLE #endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= GGML_CUDA_CC_TURING #if !(defined(GGML_USE_MUSA) && __MUSA_ARCH__ <= GGML_CUDA_CC_QY1) @@ -159,11 +159,13 @@ static constexpr bool fast_fp16_available(const int cc) { return cc >= GGML_CUDA_CC_PASCAL && cc != 610; } +// Any FP16 tensor cores are available. static constexpr bool fp16_mma_available(const int cc) { return cc < GGML_CUDA_CC_OFFSET_AMD && cc >= GGML_CUDA_CC_VOLTA; } -static constexpr bool int8_mma_available(const int cc) { +// Volta technically had FP16 tensor cores but they work very differently compared to Turing and later. +static constexpr bool new_mma_available(const int cc) { return cc < GGML_CUDA_CC_OFFSET_AMD && cc >= GGML_CUDA_CC_TURING; } diff --git a/ggml/src/ggml-cuda/fattn-common.cuh b/ggml/src/ggml-cuda/fattn-common.cuh index ee9752da6..cfd7c0f44 100644 --- a/ggml/src/ggml-cuda/fattn-common.cuh +++ b/ggml/src/ggml-cuda/fattn-common.cuh @@ -516,6 +516,104 @@ constexpr __device__ dequantize_1_f32_t get_dequantize_1_f32(ggml_type type_V) { nullptr; } +template // D == head size +#if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) +__launch_bounds__(D, 1) +#endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) +static __global__ void flash_attn_stream_k_fixup( + float * __restrict__ dst, const float2 * __restrict__ dst_fixup, const int ne01, const int ne02, const int ne11) { + const float * dst_fixup_data = ((const float *) dst_fixup) + gridDim.x*(2*2*ncols); + + const int iter_k = ne11 / KQ_stride; + const int iter_j = (ne01 + (ncols - 1)) / ncols; + + const int bidx0 = blockIdx.x; + + const int kbc0 = (bidx0 + 0)*iter_k*iter_j*ne02 / gridDim.x; + const int kbc0_stop = (bidx0 + 1)*iter_k*iter_j*ne02 / gridDim.x; + + const bool did_not_have_any_data = kbc0 == kbc0_stop; + const bool wrote_beginning_of_tile = kbc0 % iter_k == 0; + const bool did_not_write_last = kbc0/iter_k == kbc0_stop/iter_k && kbc0_stop % iter_k != 0; + if (did_not_have_any_data || wrote_beginning_of_tile || did_not_write_last) { + return; + } + + const int channel = kbc0 / (iter_k*iter_j); + const int jt = (kbc0 - channel*iter_k*iter_j) / iter_k; + + dst += jt*ncols*ne02*D + channel*D; + + // Load the partial result that needs a fixup: + float dst_val[ncols] = {0.0f}; + float max_val[ncols] = {0.0f}; + float rowsum[ncols] = {0.0f}; +#pragma unroll + for (int j = 0; j < ncols; ++j) { + if (jt*ncols + j >= ne01) { + break; + } + dst_val[j] = dst[j*ne02*D + threadIdx.x]; + + const float2 tmp = dst_fixup[bidx0*ncols + j]; + max_val[j] = tmp.x; + rowsum[j] = tmp.y; + } + + // Iterate over previous blocks and compute the combined results. + // All CUDA blocks that get here must have a previous block that needs a fixup. + int bidx = bidx0 - 1; + int kbc_stop = kbc0; + while(true) { + const int kbc = bidx*iter_k*iter_j*ne02 / gridDim.x; + if (kbc == kbc_stop) { // Did not have any data. + bidx--; + kbc_stop = kbc; + continue; + } + +#pragma unroll + for (int j = 0; j < ncols; ++j) { + if (jt*ncols + j >= ne01) { + break; + } + const float dst_add = dst_fixup_data[bidx*ncols*D + j*D + threadIdx.x]; + + const float2 tmp = dst_fixup[(gridDim.x + bidx)*ncols + j]; + + // Scale the current and new value accumulators depending on the max. values. + const float max_val_new = fmaxf(max_val[j], tmp.x); + + const float diff_val = max_val[j] - max_val_new; + const float diff_add = tmp.x - max_val_new; + + const float scale_val = diff_val >= SOFTMAX_FTZ_THRESHOLD ? expf(diff_val) : 0.0f; + const float scale_add = diff_add >= SOFTMAX_FTZ_THRESHOLD ? expf(diff_add) : 0.0f; + + dst_val[j] = scale_val*dst_val[j] + scale_add*dst_add; + rowsum[j] = scale_val*rowsum[j] + scale_add*tmp.y; + + max_val[j] = max_val_new; + } + + // If this block started in a previous tile we are done and don't need to combine additional partial results. + if (kbc % iter_k == 0 || kbc/iter_k < kbc0/iter_k) { + break; + } + bidx--; + kbc_stop = kbc; + } + + // Write back final result: +#pragma unroll + for (int j = 0; j < ncols; ++j) { + if (jt*ncols + j >= ne01) { + return; + } + dst[j*ne02*D + threadIdx.x] = dst_val[j] / rowsum[j]; + } +} + template // D == head size #if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) __launch_bounds__(D, 1) @@ -581,10 +679,11 @@ static void on_no_fattn_vec_case(const int D) { } } -template +// parallel_blocks == 0 is stream-k decomposition +template void launch_fattn( ggml_backend_cuda_context & ctx, ggml_tensor * dst, fattn_kernel_t fattn_kernel, - const int nwarps, const int cols_per_block, const bool need_f16_K, const bool need_f16_V + const int nwarps, const size_t nbytes_shared, const bool need_f16_K, const bool need_f16_V ) { const ggml_tensor * Q = dst->src[0]; const ggml_tensor * K = dst->src[1]; @@ -603,20 +702,23 @@ void launch_fattn( GGML_ASSERT(K->ne[1] % FATTN_KQ_STRIDE == 0 && "Incorrect KV cache padding."); + GGML_ASSERT(Q->ne[3] == 1); + ggml_cuda_pool & pool = ctx.pool(); cudaStream_t main_stream = ctx.stream(); + const int nsm = ggml_cuda_info().devices[ggml_cuda_get_device()].nsm; ggml_cuda_pool_alloc K_f16(pool); ggml_cuda_pool_alloc V_f16(pool); ggml_cuda_pool_alloc dst_tmp(pool); ggml_cuda_pool_alloc dst_tmp_meta(pool); - char * K_data = (char *) K->data; + const char * K_data = (const char *) K->data; size_t nb11 = K->nb[1]; size_t nb12 = K->nb[2]; size_t nb13 = K->nb[3]; - char * V_data = (char *) V->data; + const char * V_data = (const char *) V->data; size_t nb21 = V->nb[1]; size_t nb22 = V->nb[2]; size_t nb23 = V->nb[3]; @@ -649,39 +751,60 @@ void launch_fattn( nb23 = nb23*bs*sizeof(half)/ts; } - if (parallel_blocks > 1) { - dst_tmp.alloc(parallel_blocks*ggml_nelements(KQV)); - dst_tmp_meta.alloc(parallel_blocks*ggml_nrows(KQV)); - } + const int ntiles_x = ((Q->ne[1] + cols_per_block - 1) / cols_per_block); + const int ntiles_total = ntiles_x*Q->ne[2]*Q->ne[3]; const dim3 block_dim(WARP_SIZE, nwarps, 1); - const dim3 blocks_num(parallel_blocks*((Q->ne[1] + cols_per_block - 1) / cols_per_block), Q->ne[2], Q->ne[3]); - const int shmem = 0; + dim3 blocks_num; + if (parallel_blocks == 0) { + // For short contexts it can be faster to have the SMs work on whole tiles because this lets us skip the fixup. + const int tiles_nwaves = (ntiles_total - nsm - 1) / nsm; + const bool tiles_inefficient = 3*nsm < 2*tiles_nwaves*ntiles_total; + const bool short_context = K->ne[1] < 4096; + + const int nblocks_stream_k = 2*nsm; + + blocks_num.x = short_context && !tiles_inefficient ? ntiles_total : nblocks_stream_k; + blocks_num.y = 1; + blocks_num.z = 1; + + dst_tmp_meta.alloc(blocks_num.x*cols_per_block * (2*2 + D) * sizeof(float)); + } else { + blocks_num.x = parallel_blocks*ntiles_x; + blocks_num.y = Q->ne[2]; + blocks_num.z = Q->ne[3]; + + if (parallel_blocks > 1) { + dst_tmp.alloc(parallel_blocks*ggml_nelements(KQV)); + dst_tmp_meta.alloc(parallel_blocks*ggml_nrows(KQV)); + } + } + float scale = 1.0f; float max_bias = 0.0f; float logit_softcap = 0.0f; - memcpy(&scale, (float *) KQV->op_params + 0, sizeof(float)); - memcpy(&max_bias, (float *) KQV->op_params + 1, sizeof(float)); - memcpy(&logit_softcap, (float *) KQV->op_params + 2, sizeof(float)); + memcpy(&scale, (const float *) KQV->op_params + 0, sizeof(float)); + memcpy(&max_bias, (const float *) KQV->op_params + 1, sizeof(float)); + memcpy(&logit_softcap, (const float *) KQV->op_params + 2, sizeof(float)); if (logit_softcap != 0.0f) { scale /= logit_softcap; } const uint32_t n_head = Q->ne[2]; - const uint32_t n_head_log2 = 1u << (uint32_t) floorf(log2f((float) n_head)); + const uint32_t n_head_log2 = 1u << uint32_t(floorf(log2f(float(n_head)))); const float m0 = powf(2.0f, -(max_bias ) / n_head_log2); const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_head_log2); - fattn_kernel<<>>( + fattn_kernel<<>>( (const char *) Q->data, K_data, V_data, mask ? ((const char *) mask->data) : nullptr, - (parallel_blocks) == 1 ? (float *) KQV->data : dst_tmp.ptr, dst_tmp_meta.ptr, + (parallel_blocks) > 1 ? dst_tmp.ptr : (float *) KQV->data, dst_tmp_meta.ptr, scale, max_bias, m0, m1, n_head_log2, logit_softcap, Q->ne[0], Q->ne[1], Q->ne[2], Q->ne[3], K->ne[0], K->ne[1], K->ne[2], K->ne[3], @@ -693,16 +816,22 @@ void launch_fattn( ); CUDA_CHECK(cudaGetLastError()); - if ((parallel_blocks) == 1) { - return; + if constexpr (parallel_blocks == 0) { + if (blocks_num.x % ntiles_total != 0) { // Fixup is only needed if the SMs work on fractional tiles. + const dim3 block_dim_combine(D, 1, 1); + const dim3 blocks_num_combine = blocks_num; + + flash_attn_stream_k_fixup + <<>> + ((float *) KQV->data, dst_tmp_meta.ptr, Q->ne[1], Q->ne[2], K->ne[1]); + } + } else if constexpr (parallel_blocks > 1) { + const dim3 block_dim_combine(D, 1, 1); + const dim3 blocks_num_combine(Q->ne[1], blocks_num.y, blocks_num.z); + + flash_attn_combine_results + <<>> + (dst_tmp.ptr, dst_tmp_meta.ptr, (float *) KQV->data); } - - const dim3 block_dim_combine(D, 1, 1); - const dim3 blocks_num_combine(Q->ne[1], blocks_num.y, blocks_num.z); - const int shmem_combine = 0; - - flash_attn_combine_results - <<>> - (dst_tmp.ptr, dst_tmp_meta.ptr, (float *) KQV->data); CUDA_CHECK(cudaGetLastError()); } diff --git a/ggml/src/ggml-cuda/fattn-mma-f16.cuh b/ggml/src/ggml-cuda/fattn-mma-f16.cuh new file mode 100644 index 000000000..05bc91a3b --- /dev/null +++ b/ggml/src/ggml-cuda/fattn-mma-f16.cuh @@ -0,0 +1,637 @@ +#include "common.cuh" +#include "mma.cuh" +#include "fattn-common.cuh" + +template +static __device__ __forceinline__ void flash_attn_ext_f16_process_tile( + const float2 * const __restrict__ Q_f2, + const half2 * const __restrict__ K_h2, + const half2 * const __restrict__ V_h2, + const half * const __restrict__ maskh, + float2 * const __restrict__ dstk, + float2 * const __restrict__ dstk_fixup, + const float scale, + const float slope, + const float logit_softcap, + const int ne00, + const int ne01, + const int ne02, + const int ne03, + const int ne10, + const int ne11, + const int ne12, + const int ne13, + const int ne31, + const int nb31, + const int nb01, + const int nb02, + const int nb03, + const int nb11, + const int nb12, + const int nb13, + const int nb21, + const int nb22, + const int nb23, + const int ne0, + const int ne1, + const int ne2, + const int ne3, + const int jt, + const int kb0_start, + const int kb0_stop) { +#ifdef NEW_MMA_AVAILABLE + //In this kernel Q, K, V are matrices while i, j, k are matrix indices. + + typedef mma_A_I16K8 mma_A; + typedef mma_B_J8K8 mma_B; + typedef mma_C_I16J8 mma_C_KQ; + typedef mma_C_I16J8 mma_C_VKQ; + + static_assert(nwarps*mma_B::J % ncols == 0, "bad nwarps"); + constexpr int np = nwarps*mma_B::J / ncols; // Number of parallel CUDA warps per Q column. + + static_assert(D % nwarps == 0, "bad D"); + static_assert(KQ_stride % nwarps == 0, "bad KQ_stride"); + + constexpr int D2_padded = D/2 + 4; // Size of D in half2, padded to avoid shared memory bank conflicts. + extern __shared__ half2 tile_KV[]; // Temporary shared buffer for loading K/V data with KQ_stride*D logical elements. + + const int stride_Q = nb01 / sizeof(float2); + const int stride_KV = nb11 / sizeof(half2); + const int stride_mask = nb31 / sizeof(half); + + mma_B Q_B[D/(2*mma_B::K)]; + mma_C_VKQ VKQ_C[D/mma_C_VKQ::I]; + + float2 KQ_rowsum = {0.0f, 0.0f}; + float2 KQ_max = {-FLT_MAX/2.0f, -FLT_MAX/2.0f}; + float2 KQ_max_scale = {0.0f, 0.0f}; + + // Temporarily load Q data into tile_KV, will be loaded into registers afterwards. + // The loading is done with decreasing granularity for D for better memory bandwidth. + const half2 scale_h2 = make_half2(scale, scale); +#pragma unroll + for (int stride_k : {WARP_SIZE, WARP_SIZE/2, WARP_SIZE/4}) { + const int k0_start = stride_k == WARP_SIZE ? 0 : D/2 - (D/2) % (2*stride_k); + const int k0_stop = D/2 - (D/2) % (1*stride_k); + const int stride_j = WARP_SIZE / stride_k; + + if (nwarps*stride_j > ncols && threadIdx.y*stride_j >= ncols) { + break; + } + +#pragma unroll + for (int j0 = 0; j0 < ncols; j0 += nwarps*stride_j) { + const int j = j0 + threadIdx.y*stride_j + (stride_k == WARP_SIZE ? 0 : threadIdx.x / stride_k); + + if (jt*ncols + j < ne01) { +#pragma unroll + for (int k0 = k0_start; k0 < k0_stop; k0 += stride_k) { + const int k = k0 + (stride_k == WARP_SIZE ? threadIdx.x : threadIdx.x % stride_k); + + const float2 tmp = Q_f2[(jt*ncols + j)*stride_Q + k]; + tile_KV[j*D2_padded + k] = scale_h2 * make_half2(tmp.x, tmp.y); + } + } else { +#pragma unroll + for (int k0 = k0_start; k0 < k0_stop; k0 += stride_k) { + const int k = k0 + (stride_k == WARP_SIZE ? threadIdx.x : threadIdx.x % stride_k); + + tile_KV[j*D2_padded + k] = make_half2(0.0f, 0.0f); + } + } + } + } + + __syncthreads(); + + { + const int j0 = (threadIdx.y / np) * mma_B::J; + +#pragma unroll + for (int k0 = 0; k0 < D/2; k0 += mma_B::K) { + Q_B[k0/mma_B::K].load_ldmatrix(tile_KV + j0*D2_padded + k0, D2_padded); + } + } + + __syncthreads(); + + // Iterate over ne11 == previous tokens: + for (int kb0 = kb0_start; kb0 < kb0_stop; ++kb0) { + const int k_VKQ_0 = kb0*KQ_stride; + mma_C_KQ KQ_C[KQ_stride/(np*mma_C_KQ::I)]; + + // Load K data into tile with decreasing granularity for D for better memory bandwidth: + static_assert(KQ_stride % (4*nwarps) == 0, "out of bounds"); +#pragma unroll + for (int stride_k : {WARP_SIZE, WARP_SIZE/2, WARP_SIZE/4}) { + const int k0_start = stride_k == WARP_SIZE ? 0 : D/2 - (D/2) % (2*stride_k); + const int k0_stop = D/2 - (D/2) % (1*stride_k); + const int stride_i = WARP_SIZE / stride_k; + +#pragma unroll + for (int i_KQ_0 = 0; i_KQ_0 < KQ_stride; i_KQ_0 += nwarps*stride_i) { + const int i_KQ = i_KQ_0 + threadIdx.y*stride_i + (stride_k == WARP_SIZE ? 0 : threadIdx.x / stride_k); + +#pragma unroll + for (int k_KQ_0 = k0_start; k_KQ_0 < k0_stop; k_KQ_0 += stride_k) { + const int k_KQ = k_KQ_0 + (stride_k == WARP_SIZE ? threadIdx.x : threadIdx.x % stride_k); + + tile_KV[i_KQ*D2_padded + k_KQ] = K_h2[(k_VKQ_0 + i_KQ)*stride_KV + k_KQ]; + } + } + } + + __syncthreads(); + + // Calculate tile of KQ: +#pragma unroll + for (int i_KQ_00 = 0; i_KQ_00 < KQ_stride; i_KQ_00 += np*mma_A::I) { + const int i_KQ_0 = i_KQ_00 + (threadIdx.y % np)*mma_A::I; +#pragma unroll + for (int k_KQ_0 = 0; k_KQ_0 < D/2; k_KQ_0 += mma_A::K) { + mma_A K_A; + K_A.load_ldmatrix(tile_KV + i_KQ_0*D2_padded + k_KQ_0, D2_padded); + KQ_C[i_KQ_00/(np*mma_A::I)].mma(K_A, Q_B[k_KQ_0/mma_A::K]); + } + } + + __syncthreads(); + + if (use_logit_softcap) { + static_assert(KQ_stride % (np*mma_C_KQ::I) == 0, "bad loop size"); +#pragma unroll + for (int i = 0; i < KQ_stride/(np*mma_C_KQ::I); ++i) { +#pragma unroll + for (int l = 0; l < mma_C_KQ::ne; ++l) { + KQ_C[i].x[l] = logit_softcap*tanhf(KQ_C[i].x[l]); + } + } + } + + if (maskh) { + static_assert(KQ_stride % (np *mma_C_KQ::I) == 0, "bad loop size"); + static_assert(ncols % (nwarps/np*mma_C_KQ::J) == 0, "bad loop size"); +#pragma unroll + for (int i00 = 0; i00 < KQ_stride; i00 += np*mma_C_KQ::I) { + const int i0 = i00 + (threadIdx.y % np)*mma_C_KQ::I; +#pragma unroll + for (int l = 0; l < mma_C_KQ::ne; ++l) { + const int i = i0 + mma_C_KQ::get_i(l); + const int j = (threadIdx.y / np)*mma_C_KQ::J + mma_C_KQ::get_j(l); + + KQ_C[i00/(np*mma_C_KQ::I)].x[l] += slope*__half2float(maskh[j*stride_mask + k_VKQ_0 + i]); + } + } + } + + // Calculate softmax for each KQ column using the current max. value. + // The divisor is stored in KQ_rowsum and will be applied at the end. + float2 KQ_max_new = KQ_max; + static_assert(KQ_stride % (np*mma_C_KQ::I) == 0, "bad loop size"); +#pragma unroll + for (int k = 0; k < KQ_stride/(np*mma_C_KQ::I); ++k) { +#pragma unroll + for (int l0 = 0; l0 < mma_C_KQ::ne; l0 += 2) { + KQ_max_new.x = fmaxf(KQ_max_new.x, KQ_C[k].x[l0 + 0]); + KQ_max_new.y = fmaxf(KQ_max_new.y, KQ_C[k].x[l0 + 1]); + } + } + + // Values per KQ column are spread across 8 threads, does not need full warp reduce: +#pragma unroll + for (int offset = 16; offset > 2; offset >>= 1) { + KQ_max_new.x = fmaxf(KQ_max_new.x, __shfl_xor_sync(0xFFFFFFFF, KQ_max_new.x, offset, WARP_SIZE)); + KQ_max_new.y = fmaxf(KQ_max_new.y, __shfl_xor_sync(0xFFFFFFFF, KQ_max_new.y, offset, WARP_SIZE)); + } + + { + const float2 diff = make_float2(KQ_max.x - KQ_max_new.x, KQ_max.y - KQ_max_new.y); + KQ_max_scale = make_float2(expf(diff.x), expf(diff.y)); + if (diff.x <= SOFTMAX_FTZ_THRESHOLD) { + KQ_max_scale.x = 0.0f; + } + if (diff.y <= SOFTMAX_FTZ_THRESHOLD) { + KQ_max_scale.y = 0.0f; + } + KQ_max = KQ_max_new; + } + + float2 KQ_rowsum_add = make_float2(0.0f, 0.0f); + static_assert(KQ_stride % (np*mma_C_KQ::I) == 0, "bad loop size"); +#pragma unroll + for (int k = 0; k < KQ_stride/(np*mma_C_KQ::I); ++k) { +#pragma unroll + for (int l = 0; l < mma_C_KQ::ne; ++l) { + const float KQ_max_l = l % 2 == 0 ? KQ_max.x : KQ_max.y; + const float diff = KQ_C[k].x[l] - KQ_max_l; + KQ_C[k].x[l] = expf(diff); + if (diff <= SOFTMAX_FTZ_THRESHOLD) { + KQ_C[k].x[l] = 0.0f; + } + + if (l % 2 == 0) { + KQ_rowsum_add.x += KQ_C[k].x[l]; + } else { + KQ_rowsum_add.y += KQ_C[k].x[l]; + } + } + } + + // Scale previous KQ_rowsum to account for a potential increase in KQ_max: + KQ_rowsum.x = KQ_max_scale.x*KQ_rowsum.x + KQ_rowsum_add.x; + KQ_rowsum.y = KQ_max_scale.y*KQ_rowsum.y + KQ_rowsum_add.y; + + const half2 KQ_max_scale_h2 = make_half2(KQ_max_scale.x, KQ_max_scale.y); +#pragma unroll + for (int i = 0; i < D/mma_C_VKQ::I; ++i) { +#pragma unroll + for (int l = 0; l < mma_C_VKQ::ne; ++l) { + VKQ_C[i].x[l] *= KQ_max_scale_h2; + } + } + + // Convert KQ C tiles into B tiles for VKQ calculation: + mma_B B[KQ_stride/(np*2*mma_B::K)]; + static_assert(KQ_stride % (np*2*mma_B::K) == 0, "bad loop size"); +#pragma unroll + for (int k = 0; k < KQ_stride/(np*2*mma_B::K); ++k) { + B[k] = KQ_C[k].to_mma_B(); + } + + // Load V data into tile with decreasing granularity for D for better memory bandwidth: + static_assert(KQ_stride % (4*nwarps) == 0, "out of bounds"); +#pragma unroll + for (int stride_i : {WARP_SIZE, WARP_SIZE/2, WARP_SIZE/4}) { + const int i0_start = stride_i == WARP_SIZE ? 0 : D/2 - (D/2) % (2*stride_i); + const int i0_stop = D/2 - (D/2) % (1*stride_i); + const int stride_k = WARP_SIZE / stride_i; + +#pragma unroll + for (int k_V_0 = 0; k_V_0 < KQ_stride; k_V_0 += nwarps*stride_k) { + const int k_V = k_V_0 + threadIdx.y*stride_k + (stride_i == WARP_SIZE ? 0 : threadIdx.x / stride_i); + +#pragma unroll + for (int i_V_0 = i0_start; i_V_0 < i0_stop; i_V_0 += stride_i) { + const int i_V = i_V_0 + (stride_i == WARP_SIZE ? threadIdx.x : threadIdx.x % stride_i); + + tile_KV[k_V*D2_padded + i_V] = V_h2[(k_VKQ_0 + k_V)*stride_KV + i_V]; + } + } + } + + __syncthreads(); + + // Calculate VKQ tile: +#pragma unroll + for (int i_VKQ_0 = 0; i_VKQ_0 < D; i_VKQ_0 += mma_C_VKQ::I) { + static_assert((KQ_stride/2) % (np*mma_A::K) == 0, "bad loop size"); +#pragma unroll + for (int k00 = 0; k00 < KQ_stride/2; k00 += np*mma_A::K) { + const int k0 = k00 + (threadIdx.y % np)*mma_A::K; + + mma_A A; + A.load_ldmatrix_trans(tile_KV + 2*k0*D2_padded + i_VKQ_0/2, D2_padded); + VKQ_C[i_VKQ_0/mma_C_VKQ::I].mma(A, B[k00/(np*mma_A::K)]); + } + } + + __syncthreads(); + } + + // Finally, sum up partial KQ rowsums. + // The partial sums are spread across 8 threads each, does not need full reduce. +#pragma unroll + for (int offset = 16; offset > 2; offset >>= 1) { + KQ_rowsum.x += __shfl_xor_sync(0xFFFFFFFF, KQ_rowsum.x, offset, WARP_SIZE); + KQ_rowsum.y += __shfl_xor_sync(0xFFFFFFFF, KQ_rowsum.y, offset, WARP_SIZE); + } + + // Write VKQ accumulators to shared memory in column-major format. + // It's faster to do small writes to shared memory, then large write to VRAM than to do small writes to VRAM. + // Also for np > 1 the combination is done via these values in shared memory. + const int j_cwd = threadIdx.y*mma_B::J + mma_B::get_j(-1); // j combine write data +#pragma unroll + for (int k0 = 0; k0 < D/2; k0 += mma_B::K) { + const mma_B B = VKQ_C[k0/mma_B::K].to_mma_B(); // Conversion of C to B matrix puts it in column-major format. + +#pragma unroll + for (int l = 0; l < mma_B::ne; ++l) { + const int k = k0 + mma_B::get_k(l); + + tile_KV[j_cwd*D2_padded + k] = B.x[l]; + } + } + + const int j_cwmo = (threadIdx.x % (2*mma_C_VKQ::J)) / mma_C_VKQ::J; // j combine write meta offset + const int j_cwm = threadIdx.y*(2*mma_C_VKQ::J) + 2*mma_C_VKQ::get_j(-1) + j_cwmo; // j combine write meta + const float2 KQ_cmr = make_float2(((const float *) &KQ_max)[j_cwmo], ((const float *) &KQ_rowsum)[j_cwmo]); // KQ combine max rowsum + + if (((!needs_fixup && !is_fixup) || np > 1) && threadIdx.x < 2*mma_C_VKQ::J) { + // Use the 16 bytes of padding in each row to store the meta data: KQ max, KQ rowsum, KQ max scale. + ((float2 *) tile_KV)[j_cwm*(D2_padded/2) + D/4] = KQ_cmr; + } + + __syncthreads(); + + static_assert(np == 1 || np == 2 || np == 4, "bad np"); + if (np == 1) { + // No combination is needed, the meta data can be directly written from registers to VRAM. + if (needs_fixup && threadIdx.x < mma_B::J) { + float2 * dstk_fixup_meta = dstk_fixup + blockIdx.x*ncols; + dstk_fixup_meta[j_cwm] = KQ_cmr; + } + if (is_fixup && threadIdx.x < mma_B::J) { + float2 * dstk_fixup_meta = dstk_fixup + (gridDim.x + blockIdx.x)*ncols; + dstk_fixup_meta[j_cwm] = KQ_cmr; + } + } else if (threadIdx.y % np == 0) { + // Combine the meta data for parallel warps via shared memory. + // Warps with threadIdx.y % np != 0 must NOT return early. + // All threads must return simultaneously to avoid race conditions with work on the next tile. + + float * meta_j = (float *) tile_KV + (threadIdx.y*mma_B::J + threadIdx.x)*D2_padded + D/2; + + float KQ_cm = -FLT_MAX/2; // KQ combine max per parallel warp. + if (np*mma_B::J == WARP_SIZE || threadIdx.x < np*mma_B::J) { + KQ_cm = meta_j[0]; + } + + float KQ_cmn = KQ_cm; // KQ combine max new, max between all parallel warps. +#pragma unroll + for (int offset = np*mma_B::J/2; offset >= mma_B::J; offset >>= 1) { + KQ_cmn = fmaxf(KQ_cmn, __shfl_xor_sync(0xFFFFFFFF, KQ_cmn, offset, WARP_SIZE)); + } + + const float KQ_cms = expf(KQ_cm - KQ_cmn); // KQ combine max scale per warp. + float KQ_crs = 0.0f; // KQ combine rowsum, scaled sum of all parallel warps. + if (np*mma_B::J == WARP_SIZE || threadIdx.x < np*mma_B::J) { + KQ_crs = KQ_cms*meta_j[1]; + } +#pragma unroll + for (int offset = np*mma_B::J/2; offset >= mma_B::J; offset >>= 1) { + KQ_crs += __shfl_xor_sync(0xFFFFFFFF, KQ_crs, offset, WARP_SIZE); + } + + // Write back combined meta data: + if (np*mma_B::J == WARP_SIZE || threadIdx.x < np*mma_B::J) { + meta_j[0] = KQ_cmn; // Combined max. KQ values. + meta_j[1] = KQ_crs; // Combined KQ rowsums. + meta_j[2] = KQ_cms; // KQ max scales per parallel warp. + } + if (needs_fixup && threadIdx.x < mma_B::J) { + float2 * dstk_fixup_meta = dstk_fixup + blockIdx.x*ncols; + dstk_fixup_meta[(threadIdx.y/np)*mma_B::J + threadIdx.x] = make_float2(KQ_cmn, KQ_crs); + } + if (is_fixup && threadIdx.x < mma_B::J) { + float2 * dstk_fixup_meta = dstk_fixup + (gridDim.x + blockIdx.x)*ncols; + dstk_fixup_meta[(threadIdx.y/np)*mma_B::J + threadIdx.x] = make_float2(KQ_cmn, KQ_crs); + } + } + + if (np > 1) { + __syncthreads(); + } + + if (np == 1 || threadIdx.y % np == 0) { + // The first 2*2*gridDim.x*ncols floats in dstk_fixup are for storing max. values and row sums. + // The values after that are for the partial results of the individual blocks. + float2 * dstk_fixup_data = dstk_fixup + gridDim.x*(2*ncols) + blockIdx.x*(ncols*(D/2)); + +#pragma unroll + for (int stride_k : {WARP_SIZE, WARP_SIZE/2, WARP_SIZE/4}) { + const int k0_start = stride_k == WARP_SIZE ? 0 : D/2 - (D/2) % (2*stride_k); + const int k0_stop = D/2 - (D/2) % (1*stride_k); + const int stride_j = WARP_SIZE / stride_k; + + if (nwarps*stride_j > ncols && threadIdx.y*stride_j >= ncols) { + break; + } + +#pragma unroll + for (int j0_dst = 0; j0_dst < ncols; j0_dst += (nwarps/np)*stride_j) { + const int j_dst = j0_dst + (threadIdx.y/np)*stride_j + (stride_k == WARP_SIZE ? 0 : threadIdx.x / stride_k); + const int j_tile_KV = (j_dst/mma_B::J)*(np*mma_B::J) + j_dst % mma_B::J; + + if (!is_fixup && jt*ncols + j_dst >= ne01) { + continue; + } + const float * meta_j = (const float *) tile_KV + j_tile_KV*D2_padded + D/2; +#pragma unroll + for (int k0 = k0_start; k0 < k0_stop; k0 += stride_k) { + const int k = k0 + (stride_k == WARP_SIZE ? threadIdx.x : threadIdx.x % stride_k); + + float2 dstk_val = make_float2(0.0f, 0.0f); +#pragma unroll + for (int ip = 0; ip < np; ++ip) { + const float KQ_crs = np == 1 ? 1.0f : meta_j[ip*mma_B::J*D2_padded + 2]; + const float2 dstk_val_add = __half22float2(tile_KV[(j_tile_KV + ip*mma_B::J)*D2_padded + k]); + dstk_val.x += dstk_val_add.x*KQ_crs; + dstk_val.y += dstk_val_add.y*KQ_crs; + } + + if (!needs_fixup && !is_fixup) { + const float KQ_rowsum_j = meta_j[1]; + dstk_val.x /= KQ_rowsum_j; + dstk_val.y /= KQ_rowsum_j; + } + + if (is_fixup) { + dstk_fixup_data[j_dst*(D/2) + k] = dstk_val; + } else { + dstk[(jt*ncols + j_dst)*ne02*(D/2) + k] = dstk_val; + } + } + } + } + } + + if (np > 1) { + __syncthreads(); + } +#else + NO_DEVICE_CODE; +#endif // NEW_MMA_AVAILABLE +} + +template +#if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) +__launch_bounds__(nwarps*WARP_SIZE, 2) +#endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) +static __global__ void flash_attn_ext_f16( + const char * __restrict__ Q, + const char * __restrict__ K, + const char * __restrict__ V, + const char * __restrict__ mask, + float * __restrict__ dst, + float2 * __restrict__ dst_meta, + const float scale, + const float max_bias, + const float m0, + const float m1, + const uint32_t n_head_log2, + const float logit_softcap, + const int ne00, + const int ne01, + const int ne02, + const int ne03, + const int ne10, + const int ne11, + const int ne12, + const int ne13, + const int ne31, + const int nb31, + const int nb01, + const int nb02, + const int nb03, + const int nb11, + const int nb12, + const int nb13, + const int nb21, + const int nb22, + const int nb23, + const int ne0, + const int ne1, + const int ne2, + const int ne3) { + // Skip unused kernel variants for faster compilation: + if (use_logit_softcap && !(D == 128 || D == 256)) { + NO_DEVICE_CODE; + return; + } + + static_assert(FATTN_KQ_STRIDE % KQ_stride == 0, "bad KQ_stride"); + + const int gqa_ratio = ne02 / ne12; // With grouped query attention there are > 1 Q matrices per K, V matrix. + + const int iter_k = ne11 / KQ_stride; + const int iter_j = (ne01 + (ncols - 1)) / ncols; + + // kbc == k block continuous, current index in continuous ijk space. + int kbc = (blockIdx.x + 0)*iter_k*iter_j*ne02 / gridDim.x; + const int kbc_stop = (blockIdx.x + 1)*iter_k*iter_j*ne02 / gridDim.x; + + // If the seams of 2 CUDA blocks fall within an output tile their results need to be combined. + // For this we need to track both the block that starts the tile (needs_fixup) and the block that finishes the tile (is_fixup). + // In the most general case >2 seams can fall into the same tile. + + // kb0 == k start index when in the output tile. + int kb0_start = kbc % iter_k; + int kb0_stop = min(iter_k, kb0_start + kbc_stop - kbc); + while (kbc < kbc_stop && kb0_stop == iter_k) { + const int channel = kbc / (iter_k*iter_j); + const int jt = (kbc - channel*iter_k*iter_j) / iter_k; // j index of current tile. + + const float2 * Q_f2 = (const float2 *) (Q + nb02* channel); + const half2 * K_h2 = (const half2 *) (K + nb12*(channel / gqa_ratio)); + const half2 * V_h2 = (const half2 *) (V + nb12*(channel / gqa_ratio)); // K and V have same shape + const half * maskh = mask ? (const half *) mask + (nb31/sizeof(half))*jt*ncols : nullptr; + float2 * dstk = ((float2 *) dst) + channel*(D/2); + + const float slope = get_alibi_slope(max_bias, channel, n_head_log2, m0, m1); + + constexpr bool is_fixup = false; // All but (potentially) the last iterations write their data to dst rather than the fixup buffer. + if (kb0_start == 0) { + constexpr bool needs_fixup = false; // CUDA block is working on an entire tile. + flash_attn_ext_f16_process_tile + (Q_f2, K_h2, V_h2, maskh, dstk, dst_meta, scale, slope, logit_softcap, + ne00, ne01, ne02, ne03, ne10, ne11, ne12, ne13, ne31, nb31, nb01, nb02, nb03, nb11, nb12, nb13, nb21, nb22, nb23, ne0, ne1, ne2, ne3, + jt, kb0_start, kb0_stop); + } else { + constexpr bool needs_fixup = true; // CUDA block is working on the beginning of a tile. + flash_attn_ext_f16_process_tile + (Q_f2, K_h2, V_h2, maskh, dstk, dst_meta, scale, slope, logit_softcap, + ne00, ne01, ne02, ne03, ne10, ne11, ne12, ne13, ne31, nb31, nb01, nb02, nb03, nb11, nb12, nb13, nb21, nb22, nb23, ne0, ne1, ne2, ne3, + jt, kb0_start, kb0_stop); + } + + kbc += iter_k; + kbc -= kbc % iter_k; + + kb0_start = 0; + kb0_stop = min(iter_k, kbc_stop - kbc); + } + + if (kbc >= kbc_stop) { + return; + } + + const int channel = kbc / (iter_k*iter_j); + const int jt = (kbc - channel*iter_k*iter_j) / iter_k; // j index of current tile. + + const float2 * Q_f2 = (const float2 *) (Q + nb02* channel); + const half2 * K_h2 = (const half2 *) (K + nb12*(channel / gqa_ratio)); + const half2 * V_h2 = (const half2 *) (V + nb12*(channel / gqa_ratio)); // K and V have same shape + const half * maskh = mask ? (const half *) mask + (nb31/sizeof(half))*jt*ncols : nullptr; + float2 * dstk = ((float2 *) dst) + channel*(D/2); + + const float slope = get_alibi_slope(max_bias, channel, n_head_log2, m0, m1); + + constexpr bool is_fixup = true; // Last index writes its data to fixup buffer to avoid data races with other blocks. + constexpr bool needs_fixup = false; + flash_attn_ext_f16_process_tile + (Q_f2, K_h2, V_h2, maskh, dstk, dst_meta, scale, slope, logit_softcap, + ne00, ne01, ne02, ne03, ne10, ne11, ne12, ne13, ne31, nb31, nb01, nb02, nb03, nb11, nb12, nb13, nb21, nb22, nb23, ne0, ne1, ne2, ne3, + jt, kb0_start, kb0_stop); +} + +template +void ggml_cuda_flash_attn_ext_mma_f16_case(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + typedef mma_A_I16K8 mma_A; + typedef mma_B_J8K8 mma_B; + + static_assert(D % mma_B::K == 0, "bad D"); + static_assert(cols_per_block % mma_B::J == 0, "bad cols_per_block"); + + const ggml_tensor * KQV = dst; + + constexpr int KQ_stride = D <= 128 ? 64 : 32; + constexpr int nwarps = (KQ_stride == 32 && cols_per_block <= 16) ? + cols_per_block/mma_B::J * KQ_stride/mma_A::I : (cols_per_block <= 8 ? 4 : 8); + constexpr size_t nbytes_shared = std::max(KQ_stride, nwarps*mma_B::J) * (D + 8) * sizeof(half); + + float logit_softcap; + memcpy(&logit_softcap, (const float *) KQV->op_params + 2, sizeof(float)); + + fattn_kernel_t fattn_kernel; + if (logit_softcap == 0.0f) { + constexpr bool use_logit_softcap = false; + fattn_kernel = flash_attn_ext_f16; + } else { + constexpr bool use_logit_softcap = true; + fattn_kernel = flash_attn_ext_f16; + } + launch_fattn(ctx, dst, fattn_kernel, nwarps, nbytes_shared, true, true); +} + +#define DECL_FATTN_MMA_F16_CASE(D, cols_per_block) \ + template void ggml_cuda_flash_attn_ext_mma_f16_case \ + (ggml_backend_cuda_context & ctx, ggml_tensor * dst) \ + +extern DECL_FATTN_MMA_F16_CASE( 64, 8); +extern DECL_FATTN_MMA_F16_CASE( 80, 8); +extern DECL_FATTN_MMA_F16_CASE( 96, 8); +extern DECL_FATTN_MMA_F16_CASE(112, 8); +extern DECL_FATTN_MMA_F16_CASE(128, 8); +extern DECL_FATTN_MMA_F16_CASE(256, 8); + +extern DECL_FATTN_MMA_F16_CASE( 64, 16); +extern DECL_FATTN_MMA_F16_CASE( 80, 16); +extern DECL_FATTN_MMA_F16_CASE( 96, 16); +extern DECL_FATTN_MMA_F16_CASE(112, 16); +extern DECL_FATTN_MMA_F16_CASE(128, 16); +extern DECL_FATTN_MMA_F16_CASE(256, 16); + +extern DECL_FATTN_MMA_F16_CASE( 64, 32); +extern DECL_FATTN_MMA_F16_CASE( 80, 32); +extern DECL_FATTN_MMA_F16_CASE( 96, 32); +extern DECL_FATTN_MMA_F16_CASE(112, 32); +extern DECL_FATTN_MMA_F16_CASE(128, 32); +extern DECL_FATTN_MMA_F16_CASE(256, 32); + +extern DECL_FATTN_MMA_F16_CASE( 64, 64); +extern DECL_FATTN_MMA_F16_CASE( 80, 64); +extern DECL_FATTN_MMA_F16_CASE( 96, 64); +extern DECL_FATTN_MMA_F16_CASE(112, 64); +extern DECL_FATTN_MMA_F16_CASE(128, 64); +extern DECL_FATTN_MMA_F16_CASE(256, 64); diff --git a/ggml/src/ggml-cuda/fattn-tile-f16.cu b/ggml/src/ggml-cuda/fattn-tile-f16.cu index 4d314dacb..d4edbad07 100644 --- a/ggml/src/ggml-cuda/fattn-tile-f16.cu +++ b/ggml/src/ggml-cuda/fattn-tile-f16.cu @@ -45,7 +45,17 @@ static __global__ void flash_attn_tile_ext_f16( const int ne2, const int ne3) { #ifdef FP16_AVAILABLE + +#ifndef FLASH_ATTN_AVAILABLE + NO_DEVICE_CODE; + return; +#endif // FLASH_ATTN_AVAILABLE + // Skip unused kernel variants for faster compilation: +#ifdef FP16_MMA_AVAILABLE + NO_DEVICE_CODE; + return; +#endif // FP16_MMA_AVAILABLE if (use_logit_softcap && !(D == 128 || D == 256)) { NO_DEVICE_CODE; return; @@ -288,16 +298,18 @@ void launch_fattn_tile_f16_64_128(ggml_backend_cuda_context & ctx, ggml_tensor * const ggml_tensor * Q = dst->src[0]; switch (Q->ne[0]) { case 64: { - constexpr int D = 64; - constexpr int nwarps = 8; + constexpr int D = 64; + constexpr int nwarps = 8; + constexpr size_t nbytes_shared = 0; fattn_kernel_t fattn_kernel = flash_attn_tile_ext_f16; - launch_fattn(ctx, dst, fattn_kernel, nwarps, cols_per_block, true, true); + launch_fattn(ctx, dst, fattn_kernel, nwarps, nbytes_shared, true, true); } break; case 128: { - constexpr int D = 128; - constexpr int nwarps = 8; + constexpr int D = 128; + constexpr int nwarps = 8; + constexpr size_t nbytes_shared = 0; fattn_kernel_t fattn_kernel = flash_attn_tile_ext_f16; - launch_fattn(ctx, dst, fattn_kernel, nwarps, cols_per_block, true, true); + launch_fattn(ctx, dst, fattn_kernel, nwarps, nbytes_shared, true, true); } break; default: { GGML_ABORT("FlashAttention without tensor cores only supports head sizes 64 and 128."); diff --git a/ggml/src/ggml-cuda/fattn-tile-f32.cu b/ggml/src/ggml-cuda/fattn-tile-f32.cu index bb3360447..0d274f332 100644 --- a/ggml/src/ggml-cuda/fattn-tile-f32.cu +++ b/ggml/src/ggml-cuda/fattn-tile-f32.cu @@ -48,7 +48,12 @@ static __global__ void flash_attn_tile_ext_f32( NO_DEVICE_CODE; return; #endif // FLASH_ATTN_AVAILABLE + // Skip unused kernel variants for faster compilation: +#ifdef FP16_MMA_AVAILABLE + NO_DEVICE_CODE; + return; +#endif // FP16_MMA_AVAILABLE if (use_logit_softcap && !(D == 128 || D == 256)) { NO_DEVICE_CODE; return; @@ -287,16 +292,18 @@ void launch_fattn_tile_f32_64_128(ggml_backend_cuda_context & ctx, ggml_tensor * const ggml_tensor * Q = dst->src[0]; switch (Q->ne[0]) { case 64: { - constexpr int D = 64; - constexpr int nwarps = 8; + constexpr int D = 64; + constexpr int nwarps = 8; + constexpr size_t nbytes_shared = 0; fattn_kernel_t fattn_kernel = flash_attn_tile_ext_f32; - launch_fattn(ctx, dst, fattn_kernel, nwarps, cols_per_block, true, true); + launch_fattn(ctx, dst, fattn_kernel, nwarps, nbytes_shared, true, true); } break; case 128: { - constexpr int D = 128; - constexpr int nwarps = 8; + constexpr int D = 128; + constexpr int nwarps = 8; + constexpr size_t nbytes_shared = 0; fattn_kernel_t fattn_kernel = flash_attn_tile_ext_f32; - launch_fattn(ctx, dst, fattn_kernel, nwarps, cols_per_block, true, true); + launch_fattn(ctx, dst, fattn_kernel, nwarps, nbytes_shared, true, true); } break; default: { GGML_ABORT("FlashAttention without tensor cores only supports head sizes 64 and 128."); diff --git a/ggml/src/ggml-cuda/fattn-vec-f16.cuh b/ggml/src/ggml-cuda/fattn-vec-f16.cuh index 34a2992c7..d9ac44246 100644 --- a/ggml/src/ggml-cuda/fattn-vec-f16.cuh +++ b/ggml/src/ggml-cuda/fattn-vec-f16.cuh @@ -42,6 +42,12 @@ static __global__ void flash_attn_vec_ext_f16( const int ne2, const int ne3) { #ifdef FP16_AVAILABLE + +#ifndef FLASH_ATTN_AVAILABLE + NO_DEVICE_CODE; + return; +#endif // FLASH_ATTN_AVAILABLE + // Skip unused kernel variants for faster compilation: if (use_logit_softcap && !(D == 128 || D == 256)) { NO_DEVICE_CODE; @@ -303,7 +309,8 @@ void ggml_cuda_flash_attn_ext_vec_f16_case_impl(ggml_backend_cuda_context & ctx, fattn_kernel_t fattn_kernel = flash_attn_vec_ext_f16; constexpr bool need_f16_K = D != 128; constexpr bool need_f16_V = D != 128 && D != 64; - launch_fattn(ctx, dst, fattn_kernel, nwarps, cols_per_block, need_f16_K, need_f16_V); + constexpr size_t nbytes_shared = 0; + launch_fattn(ctx, dst, fattn_kernel, nwarps, nbytes_shared, need_f16_K, need_f16_V); } template diff --git a/ggml/src/ggml-cuda/fattn-vec-f32.cuh b/ggml/src/ggml-cuda/fattn-vec-f32.cuh index a28fc8b7f..6ef8f9dcc 100644 --- a/ggml/src/ggml-cuda/fattn-vec-f32.cuh +++ b/ggml/src/ggml-cuda/fattn-vec-f32.cuh @@ -41,6 +41,11 @@ static __global__ void flash_attn_vec_ext_f32( const int ne1, const int ne2, const int ne3) { +#ifndef FLASH_ATTN_AVAILABLE + NO_DEVICE_CODE; + return; +#endif // FLASH_ATTN_AVAILABLE + // Skip unused kernel variants for faster compilation: if (use_logit_softcap && !(D == 128 || D == 256)) { NO_DEVICE_CODE; @@ -284,7 +289,8 @@ void ggml_cuda_flash_attn_ext_vec_f32_case_impl(ggml_backend_cuda_context & ctx, fattn_kernel_t fattn_kernel = flash_attn_vec_ext_f32; constexpr bool need_f16_K = D != 128; constexpr bool need_f16_V = D != 128 && D != 64; - launch_fattn(ctx, dst, fattn_kernel, nwarps, cols_per_block, need_f16_K, need_f16_V); + constexpr size_t nbytes_shared = 0; + launch_fattn(ctx, dst, fattn_kernel, nwarps, nbytes_shared, need_f16_K, need_f16_V); } template diff --git a/ggml/src/ggml-cuda/fattn-wmma-f16.cu b/ggml/src/ggml-cuda/fattn-wmma-f16.cu new file mode 100644 index 000000000..1054ff95d --- /dev/null +++ b/ggml/src/ggml-cuda/fattn-wmma-f16.cu @@ -0,0 +1,648 @@ +// Old and deprecated WMMA FlashAttention implementation. +// It is still needed for Volta since the memory layout of NVIDIA tensor cores changed with Turing. +// Long-term the WMMA code should be replaced with a dedicated Volta implementation. + +#include "common.cuh" +#include "fattn-common.cuh" +#include "fattn-wmma-f16.cuh" + +#ifdef FP16_MMA_AVAILABLE +#include +#endif // FP16_MMA_AVAILABLE + +// D == head size, VKQ_stride == num VKQ rows calculated in parallel: +template +#if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) +__launch_bounds__(nwarps*WARP_SIZE, 1) +#endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) +static __global__ void flash_attn_ext_f16( + const char * __restrict__ Q, + const char * __restrict__ K, + const char * __restrict__ V, + const char * __restrict__ mask, + float * __restrict__ dst, + float2 * __restrict__ dst_meta, + const float scale, + const float max_bias, + const float m0, + const float m1, + const uint32_t n_head_log2, + const float logit_softcap, + const int ne00, + const int ne01, + const int ne02, + const int ne03, + const int ne10, + const int ne11, + const int ne12, + const int ne13, + const int ne31, + const int nb31, + const int nb01, + const int nb02, + const int nb03, + const int nb11, + const int nb12, + const int nb13, + const int nb21, + const int nb22, + const int nb23, + const int ne0, + const int ne1, + const int ne2, + const int ne3) { +#if __CUDA_ARCH__ == GGML_CUDA_CC_VOLTA + // Skip unused kernel variants for faster compilation: + if (use_logit_softcap && !(D == 128 || D == 256)) { + NO_DEVICE_CODE; + return; + } + + //In this kernel Q, K, V are matrices while i, j, k are matrix indices. + + const int ic0 = ncols*(blockIdx.x / parallel_blocks); // Index of the first Q/QKV column to work on. + const int ip = blockIdx.x % parallel_blocks; // Index in group of blocks running for the same column in parallel. + + static_assert(D <= FATTN_KQ_STRIDE, "D must be <= FATTN_KQ_STRIDE."); + static_assert(ncols == 8 || ncols % 16 == 0, "ncols must be 8 or a multiple of 16."); + constexpr int frag_m = ncols == 8 ? 32 : 16; + constexpr int frag_n = ncols == 8 ? 8 : 16; + static_assert(D % frag_m == 0, "If ncols == 8 then D % frag_m must be 0."); + typedef nvcuda::wmma::fragment frag_a_K; + typedef nvcuda::wmma::fragment frag_a_V; + typedef nvcuda::wmma::fragment frag_b; + typedef nvcuda::wmma::fragment frag_c_KQ; + typedef nvcuda::wmma::fragment frag_c_VKQ; + + constexpr int KQ_stride_tc = nwarps*frag_m; // Number of KQ rows calculated in parallel. + constexpr int VKQ_ratio = KQ_stride_tc/VKQ_stride; // Number of parallel VKQ accumulators needed to keep all warps busy. + static_assert(VKQ_ratio <= nwarps, "VKQ_ratio must be <= nwarps."); + + // Pad internal representation of KQ, KQV to reduce shared memory bank conflicts: + constexpr int D_padded = D + 8; + constexpr int kqs_padded = FATTN_KQ_STRIDE + 8; + constexpr int kqar = sizeof(KQ_acc_t)/sizeof(half); + + const int gqa_ratio = ne02 / ne12; // With grouped query attention there are > 1 Q matrices per K, V matrix. + const float * Q_f = (const float *) (Q + nb02* blockIdx.y + nb01*ic0); + const half * K_h = (const half *) (K + nb12*(blockIdx.y / gqa_ratio)); + const half * V_h = (const half *) (V + nb12*(blockIdx.y / gqa_ratio)); // K and V have same shape + const half * maskh = (const half *) mask + (nb31/sizeof(half))* ic0; + const half2 * mask2 = (const half2 *) mask + (nb31/sizeof(half))*(ic0/2); + + const int stride_Q = nb01 / sizeof(float); + const int stride_KV = nb11 / sizeof(half); + + const float slopef = get_alibi_slope(max_bias, blockIdx.y, n_head_log2, m0, m1); + const half slopeh = __float2half(slopef); + const half2 slope2 = make_half2(slopef, slopef); + + const half2 logit_softcap_2 = make_half2(logit_softcap, logit_softcap); + + frag_b Q_b[D/16][ncols/frag_n]; + + // A single buffer for temporarily holding tiles of KQ and VKQ parts: + constexpr int mem_KQ = ncols*kqs_padded*kqar; + constexpr int mem_VKQ_parts = VKQ_ratio*ncols*D_padded; + __shared__ half KQ[mem_KQ >= mem_VKQ_parts ? mem_KQ : mem_VKQ_parts]; + float * KQ_f = (float *) KQ; + half2 * KQ2 = (half2 *) KQ; + + float KQ_rowsum_f[ncols/nwarps] = {0.0f}; + float KQ_max_f[ncols/nwarps]; + float KQ_max_scale_f[ncols/nwarps] = {0.0f}; + +#pragma unroll + for (int j = 0; j < ncols/nwarps; ++j) { + KQ_max_f[j] = -FLT_MAX/2.0f; + } + + half2 KQ_rowsum_h2[ncols/nwarps] = {{0.0f, 0.0f}}; + half2 KQ_max_h2[ncols/nwarps]; + half2 KQ_max_scale_h2[ncols/nwarps] = {{0.0f, 0.0f}}; + +#pragma unroll + for (int j = 0; j < ncols/nwarps; ++j) { + KQ_max_h2[j] = make_half2(-HALF_MAX_HALF, -HALF_MAX_HALF); + } + + __shared__ half VKQ[ncols*D_padded]; // Accumulator for final VKQ slice. + half2 * VKQ2 = (half2 *) VKQ; +#pragma unroll + for (int j0 = 0; j0 < ncols; j0 += nwarps) { + const int j = j0 + threadIdx.y; +#pragma unroll + for (int i0 = 0; i0 < D/2; i0 += WARP_SIZE) { + const int i = i0 + threadIdx.x; + if (i0 + WARP_SIZE > D/2 && i >= D/2) { + break; + } + VKQ2[j*(D_padded/2) + i] = make_half2(0.0f, 0.0f); + } + } + + // Convert Q to half and apply scale, temporarily store in KQ: +#pragma unroll + for (int j0 = 0; j0 < ncols; j0 += nwarps) { + const int j = j0 + threadIdx.y; +#pragma unroll + for (int i0 = 0; i0 < D; i0 += WARP_SIZE) { + const int i = i0 + threadIdx.x; + if (i0 + WARP_SIZE > D && i >= D) { + break; + } + KQ[j*D_padded + i] = ic0 + j < ne01 ? Q_f[j*stride_Q + i] * scale : 0.0f; + } + } + + __syncthreads(); + + // Load Q into tensor core fragments/registers since it will be used frequently: +#pragma unroll + for (int i0 = 0; i0 < D; i0 += 16) { +#pragma unroll + for (int j0 = 0; j0 < ncols; j0 += frag_n) { + nvcuda::wmma::load_matrix_sync(Q_b[i0/16][j0/frag_n], KQ + j0*D_padded + i0, D_padded); + } + } + + __syncthreads(); + + // Iterate over ne11 == previous tokens: + for (int k_VKQ_0 = ip*FATTN_KQ_STRIDE; k_VKQ_0 < ne11; k_VKQ_0 += parallel_blocks*FATTN_KQ_STRIDE) { + // Calculate tile of KQ: +#pragma unroll + for (int i_KQ_0 = 0; i_KQ_0 < FATTN_KQ_STRIDE; i_KQ_0 += KQ_stride_tc) { + frag_c_KQ KQ_c[ncols/frag_n]; +#pragma unroll + for (int j = 0; j < ncols/frag_n; ++j) { + nvcuda::wmma::fill_fragment(KQ_c[j], 0.0f); + } +#pragma unroll + for (int k_KQ_0 = 0; k_KQ_0 < D; k_KQ_0 += 16) { + frag_a_K K_a; + nvcuda::wmma::load_matrix_sync(K_a, K_h + (k_VKQ_0 + i_KQ_0 + frag_m*threadIdx.y)*stride_KV + k_KQ_0, stride_KV); +#pragma unroll + for (int j = 0; j < ncols/frag_n; ++j) { + nvcuda::wmma::mma_sync(KQ_c[j], K_a, Q_b[k_KQ_0/16][j], KQ_c[j]); + } + } +#pragma unroll + for (int j0 = 0; j0 < ncols; j0 += frag_n) { + nvcuda::wmma::store_matrix_sync((KQ_acc_t *) KQ + j0*kqs_padded + i_KQ_0 + frag_m*threadIdx.y, KQ_c[j0/frag_n], kqs_padded, nvcuda::wmma::mem_col_major); + } + } + + __syncthreads(); + + // Calculate softmax for each KQ column using the current max. value. + // The divisor is stored in KQ_rowsum and will be applied at the end. +#pragma unroll + for (int j0 = 0; j0 < ncols; j0 += nwarps) { + const int j = j0 + threadIdx.y; + + if (std::is_same::value) { + float KQ_f_tmp[FATTN_KQ_STRIDE / WARP_SIZE]; +#pragma unroll + for (int k0 = 0; k0 < FATTN_KQ_STRIDE; k0 += WARP_SIZE) { + const int k = k0 + threadIdx.x; + + KQ_f_tmp[k0/WARP_SIZE] = KQ_f[j*kqs_padded + k]; + + if (use_logit_softcap) { + KQ_f_tmp[k0/WARP_SIZE] = logit_softcap*tanhf(KQ_f_tmp[k0/WARP_SIZE]); + } + } + + float KQ_max_new = KQ_max_f[j0/nwarps]; +#pragma unroll + for (int k0 = 0; k0 < FATTN_KQ_STRIDE; k0 += WARP_SIZE) { + const int k = k0 + threadIdx.x; + + KQ_f_tmp[k0/WARP_SIZE] += mask ? __half2float(slopeh*maskh[j*(nb31/sizeof(half)) + k_VKQ_0 + k]) : 0.0f; + KQ_max_new = max(KQ_max_new, KQ_f_tmp[k0/WARP_SIZE]); + } + KQ_max_new = warp_reduce_max(KQ_max_new); + + const float diff = KQ_max_f[j0/nwarps] - KQ_max_new; + KQ_max_scale_f[j0/nwarps] = expf(diff); + if (diff <= SOFTMAX_FTZ_THRESHOLD) { + KQ_max_scale_f[j0/nwarps] = 0.0f; + } + KQ_max_f[j0/nwarps] = KQ_max_new; + + float KQ_rowsum_add = 0.0f; +#pragma unroll + for (int k0 = 0; k0 < FATTN_KQ_STRIDE; k0 += WARP_SIZE) { + const int k = k0 + threadIdx.x; + + const float diff = KQ_f_tmp[k0/WARP_SIZE] - KQ_max_f[j0/nwarps]; + KQ_f_tmp[k0/WARP_SIZE] = expf(diff); + if (diff <= SOFTMAX_FTZ_THRESHOLD) { + KQ_f_tmp[k0/WARP_SIZE] = 0.0f; + } + KQ_rowsum_add += KQ_f_tmp[k0/WARP_SIZE]; + KQ[j*(kqar*kqs_padded) + k] = KQ_f_tmp[k0/WARP_SIZE]; + } + KQ_rowsum_add = warp_reduce_sum(KQ_rowsum_add); + + // Scale previous KQ_rowsum to account for a potential increase in KQ_max: + KQ_rowsum_f[j0/nwarps] = KQ_max_scale_f[j0/nwarps]*KQ_rowsum_f[j0/nwarps] + KQ_rowsum_add; + } else { + half2 KQ2_tmp[FATTN_KQ_STRIDE/(2*WARP_SIZE)]; +#pragma unroll + for (int k0 = 0; k0 < FATTN_KQ_STRIDE/2; k0 += WARP_SIZE) { + const int k = k0 + threadIdx.x; + + KQ2_tmp[k0/WARP_SIZE] = KQ2[j*(kqs_padded/2) + k]; + + if (use_logit_softcap) { + // There is no dedicated tangens hyperbolicus function for half2. + KQ2_tmp[k0/WARP_SIZE] = h2exp(KQ2_tmp[k0/WARP_SIZE]*make_half2(2.0f, 2.0f)); + KQ2_tmp[k0/WARP_SIZE] = (KQ2_tmp[k0/WARP_SIZE] - make_half2(1.0f, 1.0f)) + /(KQ2_tmp[k0/WARP_SIZE] + make_half2(1.0f, 1.0f)); + + KQ2_tmp[k0/WARP_SIZE] *= logit_softcap_2; + } + } + + half2 KQ_max_new = KQ_max_h2[j0/nwarps]; +#pragma unroll + for (int k0 = 0; k0 < FATTN_KQ_STRIDE/2; k0 += WARP_SIZE) { + const int k = k0 + threadIdx.x; + + KQ2_tmp[k0/WARP_SIZE] += mask ? slope2*mask2[(j*ne11 + k_VKQ_0)/2 + k] : make_half2(0.0f, 0.0f); + KQ_max_new = ggml_cuda_hmax2(KQ_max_new, KQ2_tmp[k0/WARP_SIZE]); + } + KQ_max_new = __half2half2(warp_reduce_max(ggml_cuda_hmax(__low2half(KQ_max_new), __high2half(KQ_max_new)))); + const half2 diff = KQ_max_h2[j0/nwarps] - KQ_max_new; + KQ_max_scale_h2[j0/nwarps] = h2exp(diff); + const uint32_t ftz_mask = __hgt2_mask(diff, make_half2(SOFTMAX_FTZ_THRESHOLD, SOFTMAX_FTZ_THRESHOLD)); + *((uint32_t *) &KQ_max_scale_h2[j0/nwarps]) &= ftz_mask; + KQ_max_h2[j0/nwarps] = KQ_max_new; + + half2 KQ_rowsum_add = make_half2(0.0f, 0.0f); +#pragma unroll + for (int k0 = 0; k0 < FATTN_KQ_STRIDE/2; k0 += WARP_SIZE) { + const int k = k0 + threadIdx.x; + + const half2 diff = KQ2_tmp[k0/WARP_SIZE] - KQ_max_h2[j0/nwarps]; + KQ2_tmp[k0/WARP_SIZE] = h2exp(diff); + const uint32_t ftz_mask = __hgt2_mask(diff, make_half2(SOFTMAX_FTZ_THRESHOLD, SOFTMAX_FTZ_THRESHOLD)); + *((uint32_t *) &KQ2_tmp[k0/WARP_SIZE]) &= ftz_mask; + KQ_rowsum_add += KQ2_tmp[k0/WARP_SIZE]; + KQ2[j*(kqs_padded/2) + k] = KQ2_tmp[k0/WARP_SIZE]; + } + KQ_rowsum_add = warp_reduce_sum(KQ_rowsum_add); + + // Scale previous KQ_rowsum to account for a potential increase in KQ_max: + KQ_rowsum_h2[j0/nwarps] = KQ_max_scale_h2[j0/nwarps]*KQ_rowsum_h2[j0/nwarps] + KQ_rowsum_add; + } + } + + __syncthreads(); + + frag_b KQ_b[FATTN_KQ_STRIDE/(VKQ_ratio*16)][ncols/frag_n]; +#pragma unroll + for (int j0 = 0; j0 < ncols; j0 += frag_n) { +#pragma unroll + for (int k0 = 0; k0 < FATTN_KQ_STRIDE; k0 += VKQ_ratio*16) { + const int k = k0 + (threadIdx.y % VKQ_ratio)*16; + nvcuda::wmma::load_matrix_sync( + KQ_b[k0/(VKQ_ratio*16)][j0/frag_n], + KQ + j0*(kqar*kqs_padded) + k, + kqar*kqs_padded); + } + } + + frag_c_VKQ VKQ_c[D/VKQ_stride][ncols/frag_n]; +#pragma unroll + for (int i_VKQ_0 = 0; i_VKQ_0 < D; i_VKQ_0 += VKQ_stride) { +#pragma unroll + for (int j = 0; j < ncols/frag_n; ++j) { + nvcuda::wmma::fill_fragment(VKQ_c[i_VKQ_0/VKQ_stride][j], 0.0f); + } + +#pragma unroll + for (int k0 = 0; k0 < FATTN_KQ_STRIDE; k0 += VKQ_ratio*16) { + const int k = k0 + (threadIdx.y % VKQ_ratio)*16; + + frag_a_V v_a; + nvcuda::wmma::load_matrix_sync(v_a, V_h + (k_VKQ_0 + k)*stride_KV + i_VKQ_0 + frag_m*(threadIdx.y/VKQ_ratio), stride_KV); +#pragma unroll + for (int j = 0; j < ncols/frag_n; ++j) { + nvcuda::wmma::mma_sync(VKQ_c[i_VKQ_0/VKQ_stride][j], v_a, KQ_b[k0/(VKQ_ratio*16)][j], VKQ_c[i_VKQ_0/VKQ_stride][j]); + } + } + } + + __syncthreads(); + + const int offset_k = (threadIdx.y % VKQ_ratio) * (ncols*D_padded); +#pragma unroll + for (int i_KQ_0 = 0; i_KQ_0 < D; i_KQ_0 += VKQ_stride) { +#pragma unroll + for (int j0 = 0; j0 < ncols; j0 += frag_n) { + nvcuda::wmma::store_matrix_sync( + KQ + offset_k + j0*D_padded + i_KQ_0 + frag_m*(threadIdx.y/VKQ_ratio), + VKQ_c[i_KQ_0/VKQ_stride][j0/frag_n], + D_padded, nvcuda::wmma::mem_col_major); + } + } + + __syncthreads(); + +#pragma unroll + for (int j0 = 0; j0 < ncols; j0 += nwarps) { + const int j = j0 + threadIdx.y; + + half2 VKQ_scale; + if (std::is_same::value) { + VKQ_scale = make_half2(KQ_max_scale_f[j0/nwarps], KQ_max_scale_f[j0/nwarps]); + } else { + VKQ_scale = KQ_max_scale_h2[j0/nwarps]; + } + +#pragma unroll + for (int i0 = 0; i0 < D/2; i0 += WARP_SIZE) { + const int i = i0 + threadIdx.x; + if (i0 + WARP_SIZE > D/2 && i >= D/2) { + break; + } + + half2 VKQ_add = make_half2(0.0f, 0.0f); +#pragma unroll + for (int l = 0; l < VKQ_ratio; ++l) { + VKQ_add += KQ2[l*(ncols*D_padded/2) + j*(D_padded/2) + i]; + } + VKQ2[j*(D_padded/2) + i] = VKQ_scale*VKQ2[j*(D_padded/2) + i] + VKQ_add; + } + } + + __syncthreads(); + } + +#pragma unroll + for (int j0 = 0; j0 < ncols; j0 += nwarps) { + const int j_VKQ = j0 + threadIdx.y; + if (ic0 + j_VKQ >= ne01) { + return; + } + const int j_dst = (ic0 + j_VKQ)*parallel_blocks + ip; + + float KQ_rowsum_j; + if (std::is_same::value) { + KQ_rowsum_j = KQ_rowsum_f[j0/nwarps]; + } else { + KQ_rowsum_j = __low2float(KQ_rowsum_h2[j0/nwarps]) + __high2float(KQ_rowsum_h2[j0/nwarps]); + } + +#pragma unroll + for (int i0 = 0; i0 < D; i0 += WARP_SIZE) { + const int i = i0 + threadIdx.x; + if (i0 + WARP_SIZE > D && i >= D) { + break; + } + float dst_val = VKQ[j_VKQ*D_padded + i]; + if (parallel_blocks == 1) { + dst_val /= KQ_rowsum_j; + } + dst[j_dst*gridDim.y*D + blockIdx.y*D + i] = dst_val; + } + + if (parallel_blocks == 1 || threadIdx.x != 0) { + continue; + } + + float2 dst_meta_val; + if (std::is_same::value) { + dst_meta_val.x = KQ_max_f[j0/nwarps]; + } else { + dst_meta_val.x = __low2float(KQ_max_h2[j0/nwarps]); + } + dst_meta_val.y = KQ_rowsum_j; + dst_meta[(ic0 + j_VKQ)*gridDim.y*parallel_blocks + blockIdx.y*parallel_blocks + ip] = dst_meta_val; + } +#else + NO_DEVICE_CODE; +#endif // __CUDA_ARCH__ == GGML_CUDA_CC_VOLTA +} + +constexpr int get_max_power_of_2(int x) { + return x % 2 == 0 ? 2*get_max_power_of_2(x/2) : 1; +} + +static_assert(get_max_power_of_2(1) == 1, "Test failed."); +static_assert(get_max_power_of_2(2) == 2, "Test failed."); +static_assert(get_max_power_of_2(4) == 4, "Test failed."); +static_assert(get_max_power_of_2(6) == 2, "Test failed."); + +// Number of VKQ rows calculated in parallel: +constexpr int get_VKQ_stride(int D, int nwarps, int frag_m) { + return (get_max_power_of_2(D/frag_m) < nwarps ? get_max_power_of_2(D/frag_m) : nwarps)*frag_m; +} + +static_assert(get_VKQ_stride(128, 1, 32) == 32, "Test failed."); +static_assert(get_VKQ_stride(128, 2, 32) == 64, "Test failed."); +static_assert(get_VKQ_stride(128, 4, 32) == 128, "Test failed."); +static_assert(get_VKQ_stride( 64, 1, 32) == 32, "Test failed."); +static_assert(get_VKQ_stride( 64, 2, 32) == 64, "Test failed."); +static_assert(get_VKQ_stride( 64, 4, 32) == 64, "Test failed."); +static_assert(get_VKQ_stride( 80, 1, 16) == 16, "Test failed."); +static_assert(get_VKQ_stride( 80, 2, 16) == 16, "Test failed."); +static_assert(get_VKQ_stride( 80, 4, 16) == 16, "Test failed."); + +template +void ggml_cuda_flash_attn_ext_wmma_f16_case(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + const ggml_tensor * KQV = dst; + const ggml_tensor * Q = dst->src[0]; + + constexpr int nwarps = 4; + + constexpr int frag_m = cols_per_block == 8 && D % 32 == 0 ? 32 : 16; + const int blocks_num_pb1 = ((Q->ne[1] + cols_per_block - 1) / cols_per_block)*Q->ne[2]*Q->ne[3]; + const int nsm = ggml_cuda_info().devices[ggml_cuda_get_device()].nsm; + + float logit_softcap; + memcpy(&logit_softcap, (const float *) KQV->op_params + 2, sizeof(float)); + + if (4*blocks_num_pb1 < 2*nsm) { + constexpr int parallel_blocks = 4; + fattn_kernel_t fattn_kernel; + if (logit_softcap == 0.0f) { + constexpr bool use_logit_softcap = false; + fattn_kernel = flash_attn_ext_f16< + D, cols_per_block, nwarps, get_VKQ_stride(D, nwarps, frag_m), parallel_blocks, KQ_acc_t, use_logit_softcap>; + } else { + constexpr bool use_logit_softcap = true; + fattn_kernel = flash_attn_ext_f16< + D, cols_per_block, nwarps, get_VKQ_stride(D, nwarps, frag_m), parallel_blocks, KQ_acc_t, use_logit_softcap>; + } + launch_fattn(ctx, dst, fattn_kernel, nwarps, 0, true, true); + return; + } + if (2*blocks_num_pb1 < 2*nsm) { + constexpr int parallel_blocks = 2; + fattn_kernel_t fattn_kernel; + if (logit_softcap == 0.0f) { + constexpr bool use_logit_softcap = false; + fattn_kernel = flash_attn_ext_f16< + D, cols_per_block, nwarps, get_VKQ_stride(D, nwarps, frag_m), parallel_blocks, KQ_acc_t, use_logit_softcap>; + } else { + constexpr bool use_logit_softcap = true; + fattn_kernel = flash_attn_ext_f16< + D, cols_per_block, nwarps, get_VKQ_stride(D, nwarps, frag_m), parallel_blocks, KQ_acc_t, use_logit_softcap>; + } + launch_fattn(ctx, dst, fattn_kernel, nwarps, 0, true, true); + return; + } + constexpr int parallel_blocks = 1; + fattn_kernel_t fattn_kernel; + if (logit_softcap == 0.0f) { + constexpr bool use_logit_softcap = false; + fattn_kernel = flash_attn_ext_f16< + D, cols_per_block, nwarps, get_VKQ_stride(D, nwarps, frag_m), parallel_blocks, KQ_acc_t, use_logit_softcap>; + } else { + constexpr bool use_logit_softcap = true; + fattn_kernel = flash_attn_ext_f16< + D, cols_per_block, nwarps, get_VKQ_stride(D, nwarps, frag_m), parallel_blocks, KQ_acc_t, use_logit_softcap>; + } + launch_fattn(ctx, dst, fattn_kernel, nwarps, 0, true, true); +} + +void ggml_cuda_flash_attn_ext_wmma_f16(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + const ggml_tensor * KQV = dst; + const ggml_tensor * Q = dst->src[0]; + + const enum ggml_prec prec = ggml_flash_attn_ext_get_prec(KQV); + + if (prec != GGML_PREC_DEFAULT) { + if (Q->ne[1] <= 32 || Q->ne[0] > 128) { + constexpr int cols_per_block = 16; + switch (Q->ne[0]) { + case 64: + ggml_cuda_flash_attn_ext_wmma_f16_case< 64, cols_per_block, float>(ctx, dst); + break; + case 80: + ggml_cuda_flash_attn_ext_wmma_f16_case< 80, cols_per_block, float>(ctx, dst); + break; + case 96: + ggml_cuda_flash_attn_ext_wmma_f16_case< 96, cols_per_block, float>(ctx, dst); + break; + case 112: + ggml_cuda_flash_attn_ext_wmma_f16_case<112, cols_per_block, float>(ctx, dst); + break; + case 128: + ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, float>(ctx, dst); + break; + case 256: + ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, float>(ctx, dst); + break; + default: + GGML_ABORT("fatal error"); + break; + } + } else { + constexpr int cols_per_block = 32; + switch (Q->ne[0]) { + case 64: + ggml_cuda_flash_attn_ext_wmma_f16_case< 64, cols_per_block, float>(ctx, dst); + break; + case 80: + ggml_cuda_flash_attn_ext_wmma_f16_case< 80, cols_per_block, float>(ctx, dst); + break; + case 96: + ggml_cuda_flash_attn_ext_wmma_f16_case< 96, cols_per_block, float>(ctx, dst); + break; + case 112: + ggml_cuda_flash_attn_ext_wmma_f16_case<112, cols_per_block, float>(ctx, dst); + break; + case 128: + ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, float>(ctx, dst); + break; + // case 256: + // ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, float>(ctx, dst); + // break; + default: + GGML_ABORT("fatal error"); + break; + } + } + return; + } + + if (Q->ne[1] <= 8 && Q->ne[0] % WARP_SIZE == 0) { + constexpr int cols_per_block = 8; + switch (Q->ne[0]) { + case 64: + ggml_cuda_flash_attn_ext_wmma_f16_case< 64, cols_per_block, half>(ctx, dst); + break; + case 96: + ggml_cuda_flash_attn_ext_wmma_f16_case< 96, cols_per_block, half>(ctx, dst); + break; + case 128: + ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, half>(ctx, dst); + break; + case 256: + ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, half>(ctx, dst); + break; + default: + GGML_ABORT("fatal error"); + break; + } + return; + } + + if (Q->ne[1] <= 32) { + constexpr int cols_per_block = 16; + switch (Q->ne[0]) { + case 64: + ggml_cuda_flash_attn_ext_wmma_f16_case< 64, cols_per_block, half>(ctx, dst); + break; + case 80: + ggml_cuda_flash_attn_ext_wmma_f16_case< 80, cols_per_block, half>(ctx, dst); + break; + case 96: + ggml_cuda_flash_attn_ext_wmma_f16_case< 96, cols_per_block, half>(ctx, dst); + break; + case 112: + ggml_cuda_flash_attn_ext_wmma_f16_case<112, cols_per_block, half>(ctx, dst); + break; + case 128: + ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, half>(ctx, dst); + break; + case 256: + ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, half>(ctx, dst); + break; + default: + GGML_ABORT("fatal error"); + break; + } + return; + } + + constexpr int cols_per_block = 32; + switch (Q->ne[0]) { + case 64: + ggml_cuda_flash_attn_ext_wmma_f16_case< 64, cols_per_block, half>(ctx, dst); + break; + case 80: + ggml_cuda_flash_attn_ext_wmma_f16_case< 80, cols_per_block, half>(ctx, dst); + break; + case 96: + ggml_cuda_flash_attn_ext_wmma_f16_case< 96, cols_per_block, half>(ctx, dst); + break; + case 112: + ggml_cuda_flash_attn_ext_wmma_f16_case<112, cols_per_block, half>(ctx, dst); + break; + case 128: + ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, half>(ctx, dst); + break; + case 256: + ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, half>(ctx, dst); + break; + default: + GGML_ABORT("fatal error"); + break; + } +} diff --git a/ggml/src/ggml-cuda/fattn-wmma-f16.cuh b/ggml/src/ggml-cuda/fattn-wmma-f16.cuh index 860d0e6dc..beeea95eb 100644 --- a/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +++ b/ggml/src/ggml-cuda/fattn-wmma-f16.cuh @@ -1,543 +1,3 @@ #include "common.cuh" -#include "fattn-common.cuh" -#ifdef FP16_MMA_AVAILABLE -#include -#endif // FP16_MMA_AVAILABLE - -// D == head size, VKQ_stride == num VKQ rows calculated in parallel: -template -#if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) -__launch_bounds__(nwarps*WARP_SIZE, 1) -#endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) -static __global__ void flash_attn_ext_f16( - const char * __restrict__ Q, - const char * __restrict__ K, - const char * __restrict__ V, - const char * __restrict__ mask, - float * __restrict__ dst, - float2 * __restrict__ dst_meta, - const float scale, - const float max_bias, - const float m0, - const float m1, - const uint32_t n_head_log2, - const float logit_softcap, - const int ne00, - const int ne01, - const int ne02, - const int ne03, - const int ne10, - const int ne11, - const int ne12, - const int ne13, - const int ne31, - const int nb31, - const int nb01, - const int nb02, - const int nb03, - const int nb11, - const int nb12, - const int nb13, - const int nb21, - const int nb22, - const int nb23, - const int ne0, - const int ne1, - const int ne2, - const int ne3) { -#ifdef FP16_MMA_AVAILABLE - // Skip unused kernel variants for faster compilation: - if (use_logit_softcap && !(D == 128 || D == 256)) { - NO_DEVICE_CODE; - return; - } - - //In this kernel Q, K, V are matrices while i, j, k are matrix indices. - - const int ic0 = ncols*(blockIdx.x / parallel_blocks); // Index of the first Q/QKV column to work on. - const int ip = blockIdx.x % parallel_blocks; // Index in group of blocks running for the same column in parallel. - - static_assert(D <= FATTN_KQ_STRIDE, "D must be <= FATTN_KQ_STRIDE."); - static_assert(ncols == 8 || ncols % 16 == 0, "ncols must be 8 or a multiple of 16."); - constexpr int frag_m = ncols == 8 ? 32 : 16; - constexpr int frag_n = ncols == 8 ? 8 : 16; - static_assert(D % frag_m == 0, "If ncols == 8 then D % frag_m must be 0."); - typedef nvcuda::wmma::fragment frag_a_K; - typedef nvcuda::wmma::fragment frag_a_V; - typedef nvcuda::wmma::fragment frag_b; - typedef nvcuda::wmma::fragment frag_c_KQ; - typedef nvcuda::wmma::fragment frag_c_VKQ; - - constexpr int KQ_stride_tc = nwarps*frag_m; // Number of KQ rows calculated in parallel. - constexpr int VKQ_ratio = KQ_stride_tc/VKQ_stride; // Number of parallel VKQ accumulators needed to keep all warps busy. - static_assert(VKQ_ratio <= nwarps, "VKQ_ratio must be <= nwarps."); - - // Pad internal representation of KQ, KQV to reduce shared memory bank conflicts: - constexpr int D_padded = D + 8; - constexpr int kqs_padded = FATTN_KQ_STRIDE + 8; - constexpr int kqar = sizeof(KQ_acc_t)/sizeof(half); - - const int gqa_ratio = ne02 / ne12; // With grouped query attention there are > 1 Q matrices per K, V matrix. - const float * Q_f = (const float *) (Q + nb02* blockIdx.y + nb01*ic0); - const half * K_h = (const half *) (K + nb12*(blockIdx.y / gqa_ratio)); - const half * V_h = (const half *) (V + nb12*(blockIdx.y / gqa_ratio)); // K and V have same shape - const half * maskh = (const half *) mask + (nb31/sizeof(half))* ic0; - const half2 * mask2 = (const half2 *) mask + (nb31/sizeof(half))*(ic0/2); - - const int stride_Q = nb01 / sizeof(float); - const int stride_KV = nb11 / sizeof(half); - - const float slopef = get_alibi_slope(max_bias, blockIdx.y, n_head_log2, m0, m1); - const half slopeh = __float2half(slopef); - const half2 slope2 = make_half2(slopef, slopef); - - const half2 logit_softcap_2 = make_half2(logit_softcap, logit_softcap); - - frag_b Q_b[D/16][ncols/frag_n]; - - // A single buffer for temporarily holding tiles of KQ and VKQ parts: - constexpr int mem_KQ = ncols*kqs_padded*kqar; - constexpr int mem_VKQ_parts = VKQ_ratio*ncols*D_padded; - __shared__ half KQ[mem_KQ >= mem_VKQ_parts ? mem_KQ : mem_VKQ_parts]; - float * KQ_f = (float *) KQ; - half2 * KQ2 = (half2 *) KQ; - - float KQ_rowsum_f[ncols/nwarps] = {0.0f}; - float KQ_max_f[ncols/nwarps]; - float KQ_max_scale_f[ncols/nwarps] = {0.0f}; - -#pragma unroll - for (int j = 0; j < ncols/nwarps; ++j) { - KQ_max_f[j] = -FLT_MAX/2.0f; - } - - half2 KQ_rowsum_h2[ncols/nwarps] = {{0.0f, 0.0f}}; - half2 KQ_max_h2[ncols/nwarps]; - half2 KQ_max_scale_h2[ncols/nwarps] = {{0.0f, 0.0f}}; - -#pragma unroll - for (int j = 0; j < ncols/nwarps; ++j) { - KQ_max_h2[j] = make_half2(-HALF_MAX_HALF, -HALF_MAX_HALF); - } - - __shared__ half VKQ[ncols*D_padded]; // Accumulator for final VKQ slice. - half2 * VKQ2 = (half2 *) VKQ; -#pragma unroll - for (int j0 = 0; j0 < ncols; j0 += nwarps) { - const int j = j0 + threadIdx.y; -#pragma unroll - for (int i0 = 0; i0 < D/2; i0 += WARP_SIZE) { - const int i = i0 + threadIdx.x; - if (i0 + WARP_SIZE > D/2 && i >= D/2) { - break; - } - VKQ2[j*(D_padded/2) + i] = make_half2(0.0f, 0.0f); - } - } - - // Convert Q to half and apply scale, temporarily store in KQ: -#pragma unroll - for (int j0 = 0; j0 < ncols; j0 += nwarps) { - const int j = j0 + threadIdx.y; -#pragma unroll - for (int i0 = 0; i0 < D; i0 += WARP_SIZE) { - const int i = i0 + threadIdx.x; - if (i0 + WARP_SIZE > D && i >= D) { - break; - } - KQ[j*D_padded + i] = ic0 + j < ne01 ? Q_f[j*stride_Q + i] * scale : 0.0f; - } - } - - __syncthreads(); - - // Load Q into tensor core fragments/registers since it will be used frequently: -#pragma unroll - for (int i0 = 0; i0 < D; i0 += 16) { -#pragma unroll - for (int j0 = 0; j0 < ncols; j0 += frag_n) { - nvcuda::wmma::load_matrix_sync(Q_b[i0/16][j0/frag_n], KQ + j0*D_padded + i0, D_padded); - } - } - - __syncthreads(); - - // Iterate over ne11 == previous tokens: - for (int k_VKQ_0 = ip*FATTN_KQ_STRIDE; k_VKQ_0 < ne11; k_VKQ_0 += parallel_blocks*FATTN_KQ_STRIDE) { - // Calculate tile of KQ: -#pragma unroll - for (int i_KQ_0 = 0; i_KQ_0 < FATTN_KQ_STRIDE; i_KQ_0 += KQ_stride_tc) { - frag_c_KQ KQ_c[ncols/frag_n]; -#pragma unroll - for (int j = 0; j < ncols/frag_n; ++j) { - nvcuda::wmma::fill_fragment(KQ_c[j], 0.0f); - } -#pragma unroll - for (int k_KQ_0 = 0; k_KQ_0 < D; k_KQ_0 += 16) { - frag_a_K K_a; - nvcuda::wmma::load_matrix_sync(K_a, K_h + (k_VKQ_0 + i_KQ_0 + frag_m*threadIdx.y)*stride_KV + k_KQ_0, stride_KV); -#pragma unroll - for (int j = 0; j < ncols/frag_n; ++j) { - nvcuda::wmma::mma_sync(KQ_c[j], K_a, Q_b[k_KQ_0/16][j], KQ_c[j]); - } - } -#pragma unroll - for (int j0 = 0; j0 < ncols; j0 += frag_n) { - nvcuda::wmma::store_matrix_sync((KQ_acc_t *) KQ + j0*kqs_padded + i_KQ_0 + frag_m*threadIdx.y, KQ_c[j0/frag_n], kqs_padded, nvcuda::wmma::mem_col_major); - } - } - - __syncthreads(); - - // Calculate softmax for each KQ column using the current max. value. - // The divisor is stored in KQ_rowsum and will be applied at the end. -#pragma unroll - for (int j0 = 0; j0 < ncols; j0 += nwarps) { - const int j = j0 + threadIdx.y; - - if (std::is_same::value) { - float KQ_f_tmp[FATTN_KQ_STRIDE / WARP_SIZE]; -#pragma unroll - for (int k0 = 0; k0 < FATTN_KQ_STRIDE; k0 += WARP_SIZE) { - const int k = k0 + threadIdx.x; - - KQ_f_tmp[k0/WARP_SIZE] = KQ_f[j*kqs_padded + k]; - - if (use_logit_softcap) { - KQ_f_tmp[k0/WARP_SIZE] = logit_softcap*tanhf(KQ_f_tmp[k0/WARP_SIZE]); - } - } - - float KQ_max_new = KQ_max_f[j0/nwarps]; -#pragma unroll - for (int k0 = 0; k0 < FATTN_KQ_STRIDE; k0 += WARP_SIZE) { - const int k = k0 + threadIdx.x; - - KQ_f_tmp[k0/WARP_SIZE] += mask ? __half2float(slopeh*maskh[j*(nb31/sizeof(half)) + k_VKQ_0 + k]) : 0.0f; - KQ_max_new = max(KQ_max_new, KQ_f_tmp[k0/WARP_SIZE]); - } - KQ_max_new = warp_reduce_max(KQ_max_new); - - const float diff = KQ_max_f[j0/nwarps] - KQ_max_new; - KQ_max_scale_f[j0/nwarps] = expf(diff); - if (diff <= SOFTMAX_FTZ_THRESHOLD) { - KQ_max_scale_f[j0/nwarps] = 0.0f; - } - KQ_max_f[j0/nwarps] = KQ_max_new; - - float KQ_rowsum_add = 0.0f; -#pragma unroll - for (int k0 = 0; k0 < FATTN_KQ_STRIDE; k0 += WARP_SIZE) { - const int k = k0 + threadIdx.x; - - const float diff = KQ_f_tmp[k0/WARP_SIZE] - KQ_max_f[j0/nwarps]; - KQ_f_tmp[k0/WARP_SIZE] = expf(diff); - if (diff <= SOFTMAX_FTZ_THRESHOLD) { - KQ_f_tmp[k0/WARP_SIZE] = 0.0f; - } - KQ_rowsum_add += KQ_f_tmp[k0/WARP_SIZE]; - KQ[j*(kqar*kqs_padded) + k] = KQ_f_tmp[k0/WARP_SIZE]; - } - KQ_rowsum_add = warp_reduce_sum(KQ_rowsum_add); - - // Scale previous KQ_rowsum to account for a potential increase in KQ_max: - KQ_rowsum_f[j0/nwarps] = KQ_max_scale_f[j0/nwarps]*KQ_rowsum_f[j0/nwarps] + KQ_rowsum_add; - } else { - half2 KQ2_tmp[FATTN_KQ_STRIDE/(2*WARP_SIZE)]; -#pragma unroll - for (int k0 = 0; k0 < FATTN_KQ_STRIDE/2; k0 += WARP_SIZE) { - const int k = k0 + threadIdx.x; - - KQ2_tmp[k0/WARP_SIZE] = KQ2[j*(kqs_padded/2) + k]; - - if (use_logit_softcap) { - // There is no dedicated tangens hyperbolicus function for half2. - KQ2_tmp[k0/WARP_SIZE] = h2exp(KQ2_tmp[k0/WARP_SIZE]*make_half2(2.0f, 2.0f)); - KQ2_tmp[k0/WARP_SIZE] = (KQ2_tmp[k0/WARP_SIZE] - make_half2(1.0f, 1.0f)) - /(KQ2_tmp[k0/WARP_SIZE] + make_half2(1.0f, 1.0f)); - - KQ2_tmp[k0/WARP_SIZE] *= logit_softcap_2; - } - } - - half2 KQ_max_new = KQ_max_h2[j0/nwarps]; -#pragma unroll - for (int k0 = 0; k0 < FATTN_KQ_STRIDE/2; k0 += WARP_SIZE) { - const int k = k0 + threadIdx.x; - - KQ2_tmp[k0/WARP_SIZE] += mask ? slope2*mask2[(j*ne11 + k_VKQ_0)/2 + k] : make_half2(0.0f, 0.0f); - KQ_max_new = ggml_cuda_hmax2(KQ_max_new, KQ2_tmp[k0/WARP_SIZE]); - } - KQ_max_new = __half2half2(warp_reduce_max(ggml_cuda_hmax(__low2half(KQ_max_new), __high2half(KQ_max_new)))); - const half2 diff = KQ_max_h2[j0/nwarps] - KQ_max_new; - KQ_max_scale_h2[j0/nwarps] = h2exp(diff); - const uint32_t ftz_mask = __hgt2_mask(diff, make_half2(SOFTMAX_FTZ_THRESHOLD, SOFTMAX_FTZ_THRESHOLD)); - *((uint32_t *) &KQ_max_scale_h2[j0/nwarps]) &= ftz_mask; - KQ_max_h2[j0/nwarps] = KQ_max_new; - - half2 KQ_rowsum_add = make_half2(0.0f, 0.0f); -#pragma unroll - for (int k0 = 0; k0 < FATTN_KQ_STRIDE/2; k0 += WARP_SIZE) { - const int k = k0 + threadIdx.x; - - const half2 diff = KQ2_tmp[k0/WARP_SIZE] - KQ_max_h2[j0/nwarps]; - KQ2_tmp[k0/WARP_SIZE] = h2exp(diff); - const uint32_t ftz_mask = __hgt2_mask(diff, make_half2(SOFTMAX_FTZ_THRESHOLD, SOFTMAX_FTZ_THRESHOLD)); - *((uint32_t *) &KQ2_tmp[k0/WARP_SIZE]) &= ftz_mask; - KQ_rowsum_add += KQ2_tmp[k0/WARP_SIZE]; - KQ2[j*(kqs_padded/2) + k] = KQ2_tmp[k0/WARP_SIZE]; - } - KQ_rowsum_add = warp_reduce_sum(KQ_rowsum_add); - - // Scale previous KQ_rowsum to account for a potential increase in KQ_max: - KQ_rowsum_h2[j0/nwarps] = KQ_max_scale_h2[j0/nwarps]*KQ_rowsum_h2[j0/nwarps] + KQ_rowsum_add; - } - } - - __syncthreads(); - - frag_b KQ_b[FATTN_KQ_STRIDE/(VKQ_ratio*16)][ncols/frag_n]; -#pragma unroll - for (int j0 = 0; j0 < ncols; j0 += frag_n) { -#pragma unroll - for (int k0 = 0; k0 < FATTN_KQ_STRIDE; k0 += VKQ_ratio*16) { - const int k = k0 + (threadIdx.y % VKQ_ratio)*16; - nvcuda::wmma::load_matrix_sync( - KQ_b[k0/(VKQ_ratio*16)][j0/frag_n], - KQ + j0*(kqar*kqs_padded) + k, - kqar*kqs_padded); - } - } - - frag_c_VKQ VKQ_c[D/VKQ_stride][ncols/frag_n]; -#pragma unroll - for (int i_VKQ_0 = 0; i_VKQ_0 < D; i_VKQ_0 += VKQ_stride) { -#pragma unroll - for (int j = 0; j < ncols/frag_n; ++j) { - nvcuda::wmma::fill_fragment(VKQ_c[i_VKQ_0/VKQ_stride][j], 0.0f); - } - -#pragma unroll - for (int k0 = 0; k0 < FATTN_KQ_STRIDE; k0 += VKQ_ratio*16) { - const int k = k0 + (threadIdx.y % VKQ_ratio)*16; - - frag_a_V v_a; - nvcuda::wmma::load_matrix_sync(v_a, V_h + (k_VKQ_0 + k)*stride_KV + i_VKQ_0 + frag_m*(threadIdx.y/VKQ_ratio), stride_KV); -#pragma unroll - for (int j = 0; j < ncols/frag_n; ++j) { - nvcuda::wmma::mma_sync(VKQ_c[i_VKQ_0/VKQ_stride][j], v_a, KQ_b[k0/(VKQ_ratio*16)][j], VKQ_c[i_VKQ_0/VKQ_stride][j]); - } - } - } - - __syncthreads(); - - const int offset_k = (threadIdx.y % VKQ_ratio) * (ncols*D_padded); -#pragma unroll - for (int i_KQ_0 = 0; i_KQ_0 < D; i_KQ_0 += VKQ_stride) { -#pragma unroll - for (int j0 = 0; j0 < ncols; j0 += frag_n) { - nvcuda::wmma::store_matrix_sync( - KQ + offset_k + j0*D_padded + i_KQ_0 + frag_m*(threadIdx.y/VKQ_ratio), - VKQ_c[i_KQ_0/VKQ_stride][j0/frag_n], - D_padded, nvcuda::wmma::mem_col_major); - } - } - - __syncthreads(); - -#pragma unroll - for (int j0 = 0; j0 < ncols; j0 += nwarps) { - const int j = j0 + threadIdx.y; - - half2 VKQ_scale; - if (std::is_same::value) { - VKQ_scale = make_half2(KQ_max_scale_f[j0/nwarps], KQ_max_scale_f[j0/nwarps]); - } else { - VKQ_scale = KQ_max_scale_h2[j0/nwarps]; - } - -#pragma unroll - for (int i0 = 0; i0 < D/2; i0 += WARP_SIZE) { - const int i = i0 + threadIdx.x; - if (i0 + WARP_SIZE > D/2 && i >= D/2) { - break; - } - - half2 VKQ_add = make_half2(0.0f, 0.0f); -#pragma unroll - for (int l = 0; l < VKQ_ratio; ++l) { - VKQ_add += KQ2[l*(ncols*D_padded/2) + j*(D_padded/2) + i]; - } - VKQ2[j*(D_padded/2) + i] = VKQ_scale*VKQ2[j*(D_padded/2) + i] + VKQ_add; - } - } - - __syncthreads(); - } - -#pragma unroll - for (int j0 = 0; j0 < ncols; j0 += nwarps) { - const int j_VKQ = j0 + threadIdx.y; - if (ic0 + j_VKQ >= ne01) { - return; - } - const int j_dst = (ic0 + j_VKQ)*parallel_blocks + ip; - - float KQ_rowsum_j; - if (std::is_same::value) { - KQ_rowsum_j = KQ_rowsum_f[j0/nwarps]; - } else { - KQ_rowsum_j = __low2float(KQ_rowsum_h2[j0/nwarps]) + __high2float(KQ_rowsum_h2[j0/nwarps]); - } - -#pragma unroll - for (int i0 = 0; i0 < D; i0 += WARP_SIZE) { - const int i = i0 + threadIdx.x; - if (i0 + WARP_SIZE > D && i >= D) { - break; - } - float dst_val = VKQ[j_VKQ*D_padded + i]; - if (parallel_blocks == 1) { - dst_val /= KQ_rowsum_j; - } - dst[j_dst*gridDim.y*D + blockIdx.y*D + i] = dst_val; - } - - if (parallel_blocks == 1 || threadIdx.x != 0) { - continue; - } - - float2 dst_meta_val; - if (std::is_same::value) { - dst_meta_val.x = KQ_max_f[j0/nwarps]; - } else { - dst_meta_val.x = __low2float(KQ_max_h2[j0/nwarps]); - } - dst_meta_val.y = KQ_rowsum_j; - dst_meta[(ic0 + j_VKQ)*gridDim.y*parallel_blocks + blockIdx.y*parallel_blocks + ip] = dst_meta_val; - } -#else - NO_DEVICE_CODE; -#endif // FP16_MMA_AVAILABLE -} - -constexpr int get_max_power_of_2(int x) { - return x % 2 == 0 ? 2*get_max_power_of_2(x/2) : 1; -} - -static_assert(get_max_power_of_2(1) == 1, "Test failed."); -static_assert(get_max_power_of_2(2) == 2, "Test failed."); -static_assert(get_max_power_of_2(4) == 4, "Test failed."); -static_assert(get_max_power_of_2(6) == 2, "Test failed."); - -// Number of VKQ rows calculated in parallel: -constexpr int get_VKQ_stride(int D, int nwarps, int frag_m) { - return (get_max_power_of_2(D/frag_m) < nwarps ? get_max_power_of_2(D/frag_m) : nwarps)*frag_m; -} - -static_assert(get_VKQ_stride(128, 1, 32) == 32, "Test failed."); -static_assert(get_VKQ_stride(128, 2, 32) == 64, "Test failed."); -static_assert(get_VKQ_stride(128, 4, 32) == 128, "Test failed."); -static_assert(get_VKQ_stride( 64, 1, 32) == 32, "Test failed."); -static_assert(get_VKQ_stride( 64, 2, 32) == 64, "Test failed."); -static_assert(get_VKQ_stride( 64, 4, 32) == 64, "Test failed."); -static_assert(get_VKQ_stride( 80, 1, 16) == 16, "Test failed."); -static_assert(get_VKQ_stride( 80, 2, 16) == 16, "Test failed."); -static_assert(get_VKQ_stride( 80, 4, 16) == 16, "Test failed."); - -template -void ggml_cuda_flash_attn_ext_wmma_f16_case(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { - const ggml_tensor * KQV = dst; - const ggml_tensor * Q = dst->src[0]; - - constexpr int nwarps = 4; - - constexpr int frag_m = cols_per_block == 8 && D % 32 == 0 ? 32 : 16; - const int blocks_num_pb1 = ((Q->ne[1] + cols_per_block - 1) / cols_per_block)*Q->ne[2]*Q->ne[3]; - const int nsm = ggml_cuda_info().devices[ggml_cuda_get_device()].nsm; - - float logit_softcap; - memcpy(&logit_softcap, (const float *) KQV->op_params + 2, sizeof(float)); - - if (4*blocks_num_pb1 < 2*nsm) { - constexpr int parallel_blocks = 4; - fattn_kernel_t fattn_kernel; - if (logit_softcap == 0.0f) { - constexpr bool use_logit_softcap = false; - fattn_kernel = flash_attn_ext_f16< - D, cols_per_block, nwarps, get_VKQ_stride(D, nwarps, frag_m), parallel_blocks, KQ_acc_t, use_logit_softcap>; - } else { - constexpr bool use_logit_softcap = true; - fattn_kernel = flash_attn_ext_f16< - D, cols_per_block, nwarps, get_VKQ_stride(D, nwarps, frag_m), parallel_blocks, KQ_acc_t, use_logit_softcap>; - } - launch_fattn(ctx, dst, fattn_kernel, nwarps, cols_per_block, true, true); - return; - } - if (2*blocks_num_pb1 < 2*nsm) { - constexpr int parallel_blocks = 2; - fattn_kernel_t fattn_kernel; - if (logit_softcap == 0.0f) { - constexpr bool use_logit_softcap = false; - fattn_kernel = flash_attn_ext_f16< - D, cols_per_block, nwarps, get_VKQ_stride(D, nwarps, frag_m), parallel_blocks, KQ_acc_t, use_logit_softcap>; - } else { - constexpr bool use_logit_softcap = true; - fattn_kernel = flash_attn_ext_f16< - D, cols_per_block, nwarps, get_VKQ_stride(D, nwarps, frag_m), parallel_blocks, KQ_acc_t, use_logit_softcap>; - } - launch_fattn(ctx, dst, fattn_kernel, nwarps, cols_per_block, true, true); - return; - } - constexpr int parallel_blocks = 1; - fattn_kernel_t fattn_kernel; - if (logit_softcap == 0.0f) { - constexpr bool use_logit_softcap = false; - fattn_kernel = flash_attn_ext_f16< - D, cols_per_block, nwarps, get_VKQ_stride(D, nwarps, frag_m), parallel_blocks, KQ_acc_t, use_logit_softcap>; - } else { - constexpr bool use_logit_softcap = true; - fattn_kernel = flash_attn_ext_f16< - D, cols_per_block, nwarps, get_VKQ_stride(D, nwarps, frag_m), parallel_blocks, KQ_acc_t, use_logit_softcap>; - } - launch_fattn(ctx, dst, fattn_kernel, nwarps, cols_per_block, true, true); -} - -#define DECL_FATTN_WMMA_F16_CASE(D, cols_per_block, KQ_acc_t) \ - template void ggml_cuda_flash_attn_ext_wmma_f16_case \ - (ggml_backend_cuda_context & ctx, ggml_tensor * dst) \ - -extern DECL_FATTN_WMMA_F16_CASE( 64, 16, float); -extern DECL_FATTN_WMMA_F16_CASE( 80, 16, float); -extern DECL_FATTN_WMMA_F16_CASE( 96, 16, float); -extern DECL_FATTN_WMMA_F16_CASE(112, 16, float); -extern DECL_FATTN_WMMA_F16_CASE(128, 16, float); -extern DECL_FATTN_WMMA_F16_CASE(256, 16, float); - -extern DECL_FATTN_WMMA_F16_CASE( 64, 32, float); -extern DECL_FATTN_WMMA_F16_CASE( 80, 32, float); -extern DECL_FATTN_WMMA_F16_CASE( 96, 32, float); -extern DECL_FATTN_WMMA_F16_CASE(112, 32, float); -extern DECL_FATTN_WMMA_F16_CASE(128, 32, float); -// extern DECL_FATTN_WMMA_F16_CASE(256, 16, float); - -extern DECL_FATTN_WMMA_F16_CASE( 64, 8, half); -extern DECL_FATTN_WMMA_F16_CASE( 96, 8, half); -extern DECL_FATTN_WMMA_F16_CASE(128, 8, half); -extern DECL_FATTN_WMMA_F16_CASE(256, 8, half); - -extern DECL_FATTN_WMMA_F16_CASE( 64, 16, half); -extern DECL_FATTN_WMMA_F16_CASE( 80, 16, half); -extern DECL_FATTN_WMMA_F16_CASE( 96, 16, half); -extern DECL_FATTN_WMMA_F16_CASE(112, 16, half); -extern DECL_FATTN_WMMA_F16_CASE(128, 16, half); -extern DECL_FATTN_WMMA_F16_CASE(256, 16, half); - -extern DECL_FATTN_WMMA_F16_CASE( 64, 32, half); -extern DECL_FATTN_WMMA_F16_CASE( 80, 32, half); -extern DECL_FATTN_WMMA_F16_CASE( 96, 32, half); -extern DECL_FATTN_WMMA_F16_CASE(112, 32, half); -extern DECL_FATTN_WMMA_F16_CASE(128, 32, half); -extern DECL_FATTN_WMMA_F16_CASE(256, 16, half); +void ggml_cuda_flash_attn_ext_wmma_f16(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/ggml/src/ggml-cuda/fattn.cu b/ggml/src/ggml-cuda/fattn.cu index 0b26b0f8e..b1e66d470 100644 --- a/ggml/src/ggml-cuda/fattn.cu +++ b/ggml/src/ggml-cuda/fattn.cu @@ -1,5 +1,6 @@ #include "common.cuh" #include "fattn-common.cuh" +#include "fattn-mma-f16.cuh" #include "fattn-tile-f16.cuh" #include "fattn-tile-f32.cuh" #include "fattn-vec-f16.cuh" @@ -7,144 +8,56 @@ #include "fattn-wmma-f16.cuh" #include "fattn.cuh" -#include +template +static void ggml_cuda_flash_attn_ext_mma_f16_switch_hs(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + const ggml_tensor * Q = dst->src[0]; -static void ggml_cuda_flash_attn_ext_wmma_f16(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { - const ggml_tensor * KQV = dst; - const ggml_tensor * Q = dst->src[0]; - - const enum ggml_prec prec = ggml_flash_attn_ext_get_prec(KQV); - - if (prec != GGML_PREC_DEFAULT) { - if (Q->ne[1] <= 32 || Q->ne[0] > 128) { - constexpr int cols_per_block = 16; - switch (Q->ne[0]) { - case 64: - ggml_cuda_flash_attn_ext_wmma_f16_case< 64, cols_per_block, float>(ctx, dst); - break; - case 80: - ggml_cuda_flash_attn_ext_wmma_f16_case< 80, cols_per_block, float>(ctx, dst); - break; - case 96: - ggml_cuda_flash_attn_ext_wmma_f16_case< 96, cols_per_block, float>(ctx, dst); - break; - case 112: - ggml_cuda_flash_attn_ext_wmma_f16_case<112, cols_per_block, float>(ctx, dst); - break; - case 128: - ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, float>(ctx, dst); - break; - case 256: - ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, float>(ctx, dst); - break; - default: - GGML_ABORT("fatal error"); - break; - } - } else { - constexpr int cols_per_block = 32; - switch (Q->ne[0]) { - case 64: - ggml_cuda_flash_attn_ext_wmma_f16_case< 64, cols_per_block, float>(ctx, dst); - break; - case 80: - ggml_cuda_flash_attn_ext_wmma_f16_case< 80, cols_per_block, float>(ctx, dst); - break; - case 96: - ggml_cuda_flash_attn_ext_wmma_f16_case< 96, cols_per_block, float>(ctx, dst); - break; - case 112: - ggml_cuda_flash_attn_ext_wmma_f16_case<112, cols_per_block, float>(ctx, dst); - break; - case 128: - ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, float>(ctx, dst); - break; - // case 256: - // ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, float>(ctx, dst); - // break; - default: - GGML_ABORT("fatal error"); - break; - } - } - return; - } - - if (Q->ne[1] <= 8 && Q->ne[0] % WARP_SIZE == 0) { - constexpr int cols_per_block = 8; - switch (Q->ne[0]) { - case 64: - ggml_cuda_flash_attn_ext_wmma_f16_case< 64, cols_per_block, half>(ctx, dst); - break; - case 96: - ggml_cuda_flash_attn_ext_wmma_f16_case< 96, cols_per_block, half>(ctx, dst); - break; - case 128: - ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, half>(ctx, dst); - break; - case 256: - ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, half>(ctx, dst); - break; - default: - GGML_ABORT("fatal error"); - break; - } - return; - } - - if (Q->ne[1] <= 32) { - constexpr int cols_per_block = 16; - switch (Q->ne[0]) { - case 64: - ggml_cuda_flash_attn_ext_wmma_f16_case< 64, cols_per_block, half>(ctx, dst); - break; - case 80: - ggml_cuda_flash_attn_ext_wmma_f16_case< 80, cols_per_block, half>(ctx, dst); - break; - case 96: - ggml_cuda_flash_attn_ext_wmma_f16_case< 96, cols_per_block, half>(ctx, dst); - break; - case 112: - ggml_cuda_flash_attn_ext_wmma_f16_case<112, cols_per_block, half>(ctx, dst); - break; - case 128: - ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, half>(ctx, dst); - break; - case 256: - ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, half>(ctx, dst); - break; - default: - GGML_ABORT("fatal error"); - break; - } - return; - } - - constexpr int cols_per_block = 32; switch (Q->ne[0]) { case 64: - ggml_cuda_flash_attn_ext_wmma_f16_case< 64, cols_per_block, half>(ctx, dst); + ggml_cuda_flash_attn_ext_mma_f16_case< 64, cols_per_block>(ctx, dst); break; case 80: - ggml_cuda_flash_attn_ext_wmma_f16_case< 80, cols_per_block, half>(ctx, dst); + ggml_cuda_flash_attn_ext_mma_f16_case< 80, cols_per_block>(ctx, dst); break; case 96: - ggml_cuda_flash_attn_ext_wmma_f16_case< 96, cols_per_block, half>(ctx, dst); + ggml_cuda_flash_attn_ext_mma_f16_case< 96, cols_per_block>(ctx, dst); break; case 112: - ggml_cuda_flash_attn_ext_wmma_f16_case<112, cols_per_block, half>(ctx, dst); + ggml_cuda_flash_attn_ext_mma_f16_case<112, cols_per_block>(ctx, dst); break; case 128: - ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, half>(ctx, dst); + ggml_cuda_flash_attn_ext_mma_f16_case<128, cols_per_block>(ctx, dst); break; case 256: - ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, half>(ctx, dst); + ggml_cuda_flash_attn_ext_mma_f16_case<256, cols_per_block>(ctx, dst); break; default: GGML_ABORT("fatal error"); break; } } + +static void ggml_cuda_flash_attn_ext_mma_f16(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + const ggml_tensor * Q = dst->src[0]; + + if (Q->ne[1] <= 8) { + ggml_cuda_flash_attn_ext_mma_f16_switch_hs<8>(ctx, dst); + return; + } + + if (Q->ne[1] <= 16) { + ggml_cuda_flash_attn_ext_mma_f16_switch_hs<16>(ctx, dst); + return; + } + + if (Q->ne[1] <= 32) { + ggml_cuda_flash_attn_ext_mma_f16_switch_hs<32>(ctx, dst); + return; + } + + ggml_cuda_flash_attn_ext_mma_f16_switch_hs<64>(ctx, dst); +} + #define FATTN_VEC_F16_CASE(D, type_K, type_V) \ if (Q->ne[0] == (D) && K->type == (type_K) && V->type == (type_V)) { \ ggml_cuda_flash_attn_ext_vec_f16_case(ctx, dst); \ @@ -322,11 +235,19 @@ void ggml_cuda_flash_attn_ext(ggml_backend_cuda_context & ctx, ggml_tensor * dst return; } - if (!fp16_mma_available(cc)) { - if (Q->ne[1] <= 8) { - ggml_cuda_flash_attn_ext_vec_f16(ctx, dst); + if (!new_mma_available(cc)) { + if (prec == GGML_PREC_DEFAULT) { + if (Q->ne[1] <= 8) { + ggml_cuda_flash_attn_ext_vec_f16(ctx, dst); + } else { + ggml_cuda_flash_attn_ext_tile_f16(ctx, dst); + } } else { - ggml_cuda_flash_attn_ext_tile_f16(ctx, dst); + if (Q->ne[1] <= 8) { + ggml_cuda_flash_attn_ext_vec_f32(ctx, dst); + } else { + ggml_cuda_flash_attn_ext_tile_f32(ctx, dst); + } } return; } @@ -341,5 +262,10 @@ void ggml_cuda_flash_attn_ext(ggml_backend_cuda_context & ctx, ggml_tensor * dst } } - ggml_cuda_flash_attn_ext_wmma_f16(ctx, dst); + // The MMA implementation needs Turing or newer, use the old WMMA code for Volta: + if (cc == GGML_CUDA_CC_VOLTA) { + ggml_cuda_flash_attn_ext_wmma_f16(ctx, dst); + } + + ggml_cuda_flash_attn_ext_mma_f16(ctx, dst); } diff --git a/ggml/src/ggml-cuda/mma.cuh b/ggml/src/ggml-cuda/mma.cuh index 7d11540af..9788a1389 100644 --- a/ggml/src/ggml-cuda/mma.cuh +++ b/ggml/src/ggml-cuda/mma.cuh @@ -1,11 +1,67 @@ +// This file contains primitives that expose the tensor core PTX instructions for CUDA code. +// The primitives can be used in a similar way as the nvcuda::wmma interface but with a well-defined memory layout. +// The documentation for the PTX instructions can be found under: +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#matrix-multiply-accumulate-operation-using-mma-instruction +// +// Like with nvcuda::wmma there are three types of matrix tiles: A, B, and C with A @ B = C. +// A is a row-major matrix with shape I x K. +// B is a column-major matrix with shape K x J. +// C is a column-major matrix with shape I x J. +// Note that along their lowest dimension I, J, and K are measured in physical 32 bit elements instead of logical elements. +// The functions get_i, get_j, and get_k can be used to get the physical 32 bit index of the lth element of a thread within a tile. +// All matrix tiles have ne physical 32 bit elements per warp. +// +// As described in the documentation, all pointers for load_ldmatrix must be to shared memory and aligned to 16 bytes. + #include "common.cuh" -struct mma_int_A_I16K4 { + +#if CUDART_VERSION >= 11800 + +static __device__ __forceinline__ int ggml_cuda_movmatrix(const int x) { + int ret = 0; + +#ifdef NEW_MMA_AVAILABLE + asm("movmatrix.sync.aligned.m8n8.trans.b16 %0, %1;" + : "+r"(ret) : "r"(x)); +#else + NO_DEVICE_CODE; +#endif // defined(NEW_MMA_AVAILABLE) + return ret; +} + +#else + +static __device__ __forceinline__ int ggml_cuda_movmatrix(const int x) { + // Imagine transposing row-major matrix to column-major matrix. + const int src_i_low = 2 * (threadIdx.x % 4); + const int src_i_high = src_i_low + 1; + const int src_j = threadIdx.x / 4; + + const int src_laneid_low = src_i_low * 4 + src_j / 2; + const int src_laneid_high = src_i_high * 4 + src_j / 2; + + const int shift_low = ((src_j + 0) % 2) * 16; + const int shift_high = ((src_j + 1) % 2) * 16; + + const int ret_low = (__shfl_sync(0xFFFFFFFF, x, src_laneid_low, WARP_SIZE) >> shift_low) & 0x0000FFFF; + const int ret_high = (__shfl_sync(0xFFFFFFFF, x, src_laneid_high, WARP_SIZE) << shift_high) & 0xFFFF0000; + + return ret_low | ret_high; +} + +#endif // CUDART_VERSION >= 11800 + + +template +struct mma_A_I16K4 { + static_assert(sizeof(T) == 4, "bad type size"); + static constexpr int I = 16; static constexpr int K = 4; static constexpr int ne = 2; - int x[ne] = {0}; + T x[ne]; static __device__ __forceinline__ int get_i(const int l) { const int ret = (l%2) * (I/2) + threadIdx.x / K; @@ -21,27 +77,35 @@ struct mma_int_A_I16K4 { return ret; } - __device__ __forceinline__ void load(const int * __restrict__ xs0, const int & stride) { -#if defined(INT8_MMA_AVAILABLE) - const int * xs = xs0 + (threadIdx.x%I)*stride; - asm("ldmatrix.sync.aligned.m8n8.x2.b16 {%0, %1}, [%2];" - : "+r"(x[0]), "+r"(x[1]) - : "l"(xs)); -#else + __device__ __forceinline__ void load_generic(const T * __restrict__ xs0, const int & stride) { #pragma unroll for (int l = 0; l < ne; ++l) { x[l] = xs0[get_i(l)*stride + get_k(l)]; } -#endif // defined(INT8_MMA_AVAILABLE) + } + + __device__ __forceinline__ void load_ldmatrix(const T * __restrict__ xs0, const int & stride) { +#ifdef NEW_MMA_AVAILABLE + int * xi = (int *) x; + const int * xs = (const int *) xs0 + (threadIdx.x%I)*stride; + asm("ldmatrix.sync.aligned.m8n8.x2.b16 {%0, %1}, [%2];" + : "+r"(xi[0]), "+r"(xi[1]) + : "l"(xs)); +#else + load_generic(xs0, stride); +#endif // NEW_MMA_AVAILABLE } }; -struct mma_int_A_I16K8 { +template +struct mma_A_I16K8 { + static_assert(sizeof(T) == 4, "bad type size"); + static constexpr int I = 16; static constexpr int K = 8; static constexpr int ne = 4; - int x[ne] = {0}; + T x[ne]; static __device__ __forceinline__ int get_i(const int l) { const int ret = (l%2) * (I/2) + threadIdx.x / (K/2); @@ -57,31 +121,62 @@ struct mma_int_A_I16K8 { return ret; } - __device__ __forceinline__ void load(const int * __restrict__ xs0, const int & stride) { -#if defined(INT8_MMA_AVAILABLE) - const int * xs = xs0 + (threadIdx.x%I)*stride + (threadIdx.x/I)*(K/2); - asm("ldmatrix.sync.aligned.m8n8.x4.b16 {%0, %1, %2, %3}, [%4];" - : "+r"(x[0]), "+r"(x[1]), "+r"(x[2]), "+r"(x[3]) - : "l"(xs)); -#else + __device__ __forceinline__ void load_generic(const T * __restrict__ xs0, const int & stride) { #pragma unroll for (int l = 0; l < ne; ++l) { x[l] = xs0[get_i(l)*stride + get_k(l)]; } -#endif // defined(INT8_MMA_AVAILABLE) } - __device__ __forceinline__ void load_low(const int * __restrict__ xs0, const int & stride) { - ((mma_int_A_I16K4 *) x)[0].load(xs0, stride); + __device__ __forceinline__ void load_ldmatrix(const T * __restrict__ xs0, const int & stride) { +#ifdef NEW_MMA_AVAILABLE + int * xi = (int * ) x; + const int * xs = (const int *) xs0 + (threadIdx.x%I)*stride + (threadIdx.x/I)*(K/2); + asm("ldmatrix.sync.aligned.m8n8.x4.b16 {%0, %1, %2, %3}, [%4];" + : "+r"(xi[0]), "+r"(xi[1]), "+r"(xi[2]), "+r"(xi[3]) + : "l"(xs)); +#else + GGML_UNUSED(xs0); + GGML_UNUSED(stride); + NO_DEVICE_CODE; +#endif // NEW_MMA_AVAILABLE + } + + __device__ __forceinline__ void load_ldmatrix_trans(const T * __restrict__ xs0, const int & stride) { +#ifdef NEW_MMA_AVAILABLE + int * xi = (int * ) x; + const int * xs = (const int *) xs0 + (threadIdx.x%I)*stride + (threadIdx.x/I)*(K/2); + asm("ldmatrix.sync.aligned.m8n8.x4.trans.b16 {%0, %1, %2, %3}, [%4];" + : "+r"(xi[0]), "+r"(xi[2]), "+r"(xi[1]), "+r"(xi[3]) + : "l"(xs)); +#else + GGML_UNUSED(xs0); + GGML_UNUSED(stride); + NO_DEVICE_CODE; +#endif // NEW_MMA_AVAILABLE + } + + __device__ __forceinline__ void transpose() { + int * xi = (int *) x; + xi[0] = ggml_cuda_movmatrix(xi[0]); + + const int tmp = ggml_cuda_movmatrix(xi[1]); + xi[1] = ggml_cuda_movmatrix(xi[2]); + xi[2] = tmp; + + xi[3] = ggml_cuda_movmatrix(xi[3]); } }; -struct mma_int_B_J8K4 { +template +struct mma_B_J8K4 { + static_assert(sizeof(T) == 4, "bad type size"); + static constexpr int J = 8; static constexpr int K = 4; static constexpr int ne = 1; - int x[ne] = {0}; + T x[ne]; static __device__ __forceinline__ int get_j(const int /* l */) { const int ret = threadIdx.x / K; @@ -97,27 +192,34 @@ struct mma_int_B_J8K4 { return ret; } - __device__ __forceinline__ void load(const int * __restrict__ xs0, const int & stride) { -#if defined(INT8_MMA_AVAILABLE) && false // Loading as 4 byte values is faster - const int * xs = xs0 + (threadIdx.x%J)*stride; - asm("ldmatrix.sync.aligned.m8n8.x1.b16 {%0}, [%1];" - : "+r"(x[0]) - : "l"(xs)); -#else + __device__ __forceinline__ void load_generic(const T * __restrict__ xs0, const int & stride) { #pragma unroll for (int l = 0; l < ne; ++l) { x[l] = xs0[get_j(l)*stride + get_k(l)]; } -#endif // defined(INT8_MMA_AVAILABLE) + } + + __device__ __forceinline__ void load_ldmatrix(const T * __restrict__ xs0, const int & stride) { +#ifdef NEW_MMA_AVAILABLE + int * xi = (int *) x; + const int * xs = (const int *) xs0 + (threadIdx.x%J)*stride; + asm("ldmatrix.sync.aligned.m8n8.x1.b16 {%0}, [%1];" + : "+r"(xi[0]) : "l"(xs)); +#else + load_generic(xs0, stride); +#endif // NEW_MMA_AVAILABLE } }; -struct mma_int_B_J8K8 { +template +struct mma_B_J8K8 { + static_assert(sizeof(T) == 4, "bad type size"); + static constexpr int J = 8; static constexpr int K = 8; static constexpr int ne = 2; - int x[ne] = {0}; + T x[ne]; static __device__ __forceinline__ int get_j(const int /* l */) { const int ret = threadIdx.x / (K/2); @@ -133,22 +235,31 @@ struct mma_int_B_J8K8 { return ret; } - __device__ __forceinline__ void load(const int * __restrict__ xs0, const int & stride) { -#if defined(INT8_MMA_AVAILABLE) && false // Loading as 4 byte values is faster - const int * xs = xs0 + (threadIdx.x%J)*stride + ((threadIdx.x/J)*(K/2)) % K; - asm("ldmatrix.sync.aligned.m8n8.x2.b16 {%0, %1}, [%2];" - : "+r"(x[0]), "+r"(x[1]) - : "l"(xs)); -#else + __device__ __forceinline__ void load_generic(const T * __restrict__ xs0, const int & stride) { #pragma unroll for (int l = 0; l < ne; ++l) { x[l] = xs0[get_j(l)*stride + get_k(l)]; } -#endif // defined(INT8_MMA_AVAILABLE) + } + + __device__ __forceinline__ void load_ldmatrix(const T * __restrict__ xs0, const int & stride) { +#ifdef NEW_MMA_AVAILABLE + int * xi = (int *) x; + const int * xs = (const int *) xs0 + (threadIdx.x%J)*stride + ((threadIdx.x/J)*(K/2)) % K; + asm("ldmatrix.sync.aligned.m8n8.x2.b16 {%0, %1}, [%2];" + : "+r"(xi[0]), "+r"(xi[1]) + : "l"(xs)); +#else + load_generic(xs0, stride); +#endif // NEW_MMA_AVAILABLE } }; -struct mma_int_C_I16J8 { +template +struct mma_C_I16J8 {}; + +template <> +struct mma_C_I16J8 { static constexpr int I = 16; static constexpr int J = 8; static constexpr int ne = 4; @@ -169,8 +280,8 @@ struct mma_int_C_I16J8 { return ret; } - __device__ __forceinline__ void mma_K4(const mma_int_A_I16K4 & mma_A, const mma_int_B_J8K4 & mma_B) { -#ifdef INT8_MMA_AVAILABLE + __device__ __forceinline__ void mma(const mma_A_I16K4 & mma_A, const mma_B_J8K4 & mma_B) { +#ifdef NEW_MMA_AVAILABLE #if __CUDA_ARCH__ >= GGML_CUDA_CC_AMPERE asm("mma.sync.aligned.m16n8k16.row.col.s32.s8.s8.s32 {%0, %1, %2, %3}, {%4, %5}, {%6}, {%0, %1, %2, %3};" : "+r"(x[0]), "+r"(x[1]), "+r"(x[2]), "+r"(x[3]) @@ -188,11 +299,11 @@ struct mma_int_C_I16J8 { GGML_UNUSED(mma_A); GGML_UNUSED(mma_B); NO_DEVICE_CODE; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } - __device__ __forceinline__ void mma_K8(const mma_int_A_I16K8 & mma_A, const mma_int_B_J8K8 & mma_B) { -#ifdef INT8_MMA_AVAILABLE + __device__ __forceinline__ void mma(const mma_A_I16K8 & mma_A, const mma_B_J8K8 & mma_B) { +#ifdef NEW_MMA_AVAILABLE #if __CUDA_ARCH__ >= GGML_CUDA_CC_AMPERE asm("mma.sync.aligned.m16n8k32.row.col.s32.s8.s8.s32 {%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9}, {%0, %1, %2, %3};" : "+r"(x[0]), "+r"(x[1]), "+r"(x[2]), "+r"(x[3]) @@ -216,6 +327,132 @@ struct mma_int_C_I16J8 { GGML_UNUSED(mma_A); GGML_UNUSED(mma_B); NO_DEVICE_CODE; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE + } +}; + +template <> +struct mma_C_I16J8 { + static constexpr int I = 16; + static constexpr int J = 4; + static constexpr int ne = 2; + + half2 x[ne] = {{0.0f, 0.0f}, {0.0f, 0.0f}}; + + static __device__ __forceinline__ int get_i(const int l) { + const int ret = l * (I/2) + threadIdx.x / J; + GGML_CUDA_ASSUME(ret >= 0); + GGML_CUDA_ASSUME(ret < I); + return ret; + } + + static __device__ __forceinline__ int get_j(const int /* l */) { + const int ret = threadIdx.x % J; + GGML_CUDA_ASSUME(ret >= 0); + GGML_CUDA_ASSUME(ret < J); + return ret; + } + + __device__ __forceinline__ void mma(const mma_A_I16K8 & mma_A, const mma_B_J8K8 & mma_B) { +#ifdef NEW_MMA_AVAILABLE + int * Axi = (int *) mma_A.x; + int * Bxi = (int *) mma_B.x; + int * xi = (int *) x; +#if __CUDA_ARCH__ >= GGML_CUDA_CC_AMPERE + asm("mma.sync.aligned.m16n8k16.row.col.f16.f16.f16.f16 {%0, %1}, {%2, %3, %4, %5}, {%6, %7}, {%0, %1};" + : "+r"(xi[0]), "+r"(xi[1]) + : "r"(Axi[0]), "r"(Axi[1]), "r"(Axi[2]), "r"(Axi[3]), "r"(Bxi[0]), "r"(Bxi[1])); +#else + // On Turing m16n8k16 mma is not available, use 2x m8n8k8 mma instead: + asm("mma.sync.aligned.m16n8k8.row.col.f16.f16.f16.f16 {%0, %1}, {%2, %3}, {%4}, {%0, %1};" + : "+r"(xi[0]), "+r"(xi[1]) + : "r"(Axi[0]), "r"(Axi[1]), "r"(Bxi[0])); + asm("mma.sync.aligned.m16n8k8.row.col.f16.f16.f16.f16 {%0, %1}, {%2, %3}, {%4}, {%0, %1};" + : "+r"(xi[0]), "+r"(xi[1]) + : "r"(Axi[2]), "r"(Axi[3]), "r"(Bxi[1])); +#endif // __CUDA_ARCH__ >= GGML_CUDA_CC_AMPERE +#else + GGML_UNUSED(mma_A); + GGML_UNUSED(mma_B); + NO_DEVICE_CODE; +#endif // NEW_MMA_AVAILABLE + } + + __device__ __forceinline__ mma_B_J8K8 to_mma_B() { + mma_B_J8K8 mma_B; + + int * xi = (int *) x; + int * Bxi = (int *) mma_B.x; + Bxi[0] = ggml_cuda_movmatrix(xi[0]); + Bxi[1] = ggml_cuda_movmatrix(xi[1]); + + return mma_B; + } +}; + +template <> +struct mma_C_I16J8 { + static constexpr int I = 16; + static constexpr int J = 8; + static constexpr int ne = 4; + + float x[ne] = {0.0f, 0.0f, 0.0f, 0.0f}; + + static __device__ __forceinline__ int get_i(const int l) { + const int ret = (l/2) * (I/2) + threadIdx.x / (J/2); + GGML_CUDA_ASSUME(ret >= 0); + GGML_CUDA_ASSUME(ret < I); + return ret; + } + + static __device__ __forceinline__ int get_j(const int l) { + const int ret = 2 * (threadIdx.x % (J/2)) + l%2; + GGML_CUDA_ASSUME(ret >= 0); + GGML_CUDA_ASSUME(ret < J); + return ret; + } + + __device__ __forceinline__ void mma(const mma_A_I16K8 & mma_A, const mma_B_J8K8 & mma_B) { +#ifdef NEW_MMA_AVAILABLE + int * Axi = (int *) mma_A.x; + int * Bxi = (int *) mma_B.x; + int * xi = (int *) x; +#if __CUDA_ARCH__ >= GGML_CUDA_CC_AMPERE + asm("mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 {%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9}, {%0, %1, %2, %3};" + : "+r"(xi[0]), "+r"(xi[1]), "+r"(xi[2]), "+r"(xi[3]) + : "r"(Axi[0]), "r"(Axi[1]), "r"(Axi[2]), "r"(Axi[3]), "r"(Bxi[0]), "r"(Bxi[1])); +#else + // On Turing m16n8k16 mma is not available, use 2x m8n8k8 mma instead: + asm("mma.sync.aligned.m16n8k8.row.col.f32.f16.f16.f32 {%0, %1, %2, %3}, {%4, %5}, {%6}, {%0, %1, %2, %3};" + : "+r"(xi[0]), "+r"(xi[1]), "+r"(xi[2]), "+r"(xi[3]) + : "r"(Axi[0]), "r"(Axi[1]), "r"(Bxi[0])); + asm("mma.sync.aligned.m16n8k8.row.col.f32.f16.f16.f32 {%0, %1, %2, %3}, {%4, %5}, {%6}, {%0, %1, %2, %3};" + : "+r"(xi[0]), "+r"(xi[1]), "+r"(xi[2]), "+r"(xi[3]) + : "r"(Axi[2]), "r"(Axi[3]), "r"(Bxi[1])); +#endif // __CUDA_ARCH__ >= GGML_CUDA_CC_AMPERE +#else + GGML_UNUSED(mma_A); + GGML_UNUSED(mma_B); + NO_DEVICE_CODE; +#endif // NEW_MMA_AVAILABLE + } + + __device__ __forceinline__ mma_B_J8K8 to_mma_B() { + mma_B_J8K8 mma_B; + mma_B.x[0] = make_half2(x[0], x[1]); + mma_B.x[1] = make_half2(x[2], x[3]); + + int * Bxi = (int *) mma_B.x; + Bxi[0] = ggml_cuda_movmatrix(Bxi[0]); + Bxi[1] = ggml_cuda_movmatrix(Bxi[1]); + + return mma_B; + } + + __device__ __forceinline__ void load_generic(const float * __restrict__ xs0, const int & stride) { +#pragma unroll + for (int l = 0; l < ne; ++l) { + x[l] = xs0[get_j(l)*stride + get_i(l)]; + } } }; diff --git a/ggml/src/ggml-cuda/mmq.cu b/ggml/src/ggml-cuda/mmq.cu index 270251df4..83cb78cbd 100644 --- a/ggml/src/ggml-cuda/mmq.cu +++ b/ggml/src/ggml-cuda/mmq.cu @@ -132,7 +132,7 @@ bool ggml_cuda_should_use_mmq(enum ggml_type type, int cc, int64_t ne11) { return false; } - if (int8_mma_available(cc)) { + if (new_mma_available(cc)) { return true; } diff --git a/ggml/src/ggml-cuda/mmq.cuh b/ggml/src/ggml-cuda/mmq.cuh index 3cd508a1d..c05c84778 100644 --- a/ggml/src/ggml-cuda/mmq.cuh +++ b/ggml/src/ggml-cuda/mmq.cuh @@ -87,7 +87,7 @@ struct tile_x_sizes { }; static constexpr int get_mmq_x_max_host(const int cc) { - return int8_mma_available(cc) ? 128 : + return new_mma_available(cc) ? 128 : #ifdef GGML_CUDA_FORCE_MMQ cc >= GGML_CUDA_CC_VOLTA && cc < GGML_CUDA_CC_OFFSET_AMD ? 128 : 64; #else @@ -96,9 +96,9 @@ static constexpr int get_mmq_x_max_host(const int cc) { } static constexpr __device__ int get_mmq_x_max_device() { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE return 128; -#else // INT8_MMA_AVAILABLE +#else // NEW_MMA_AVAILABLE #if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) return 128; @@ -116,7 +116,7 @@ static constexpr __device__ int get_mmq_x_max_device() { #endif // __CUDA_ARCH__ >= GGML_CUDA_CC_VOLTA #endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } static constexpr int get_mmq_y_host(const int cc) { @@ -209,10 +209,10 @@ static constexpr __host__ __device__ int mmq_get_mma_tile_x_k(ggml_type type) { #define MMQ_TILE_Y_K (WARP_SIZE + WARP_SIZE/QI8_1) static int mmq_get_granularity_host(const int mmq_x, const int cc) { - return int8_mma_available(cc) && mmq_x >= 48 ? 16 : 8; + return new_mma_available(cc) && mmq_x >= 48 ? 16 : 8; } -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE static constexpr __device__ int mmq_get_granularity_device(const int mmq_x) { return mmq_x >= 48 ? 16 : 8; } @@ -220,21 +220,21 @@ static constexpr __device__ int mmq_get_granularity_device(const int mmq_x) { static constexpr __device__ int mmq_get_granularity_device(const int /* mmq_x */) { return 8; } -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE // ------------------------------------------------------------ template static __device__ __forceinline__ void load_tiles_q4_0( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + 2*WARP_SIZE); #else constexpr tile_x_sizes txs = mmq_get_dp4a_tile_x_sizes(GGML_TYPE_Q4_0, mmq_y); int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + txs.qs); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kbx = threadIdx.x / QI4_0; const int kqsx = threadIdx.x % QI4_0; @@ -250,12 +250,12 @@ template static __device__ __forceinlin const block_q4_0 * bxi = (const block_q4_0 *) x + kbx0 + i*stride + kbx; const int qs0 = get_int_b2(bxi->qs, kqsx); -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + kbx*(2*QI4_0) + kqsx + 0] = __vsubss4((qs0 >> 0) & 0x0F0F0F0F, 0x08080808); x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + kbx*(2*QI4_0) + kqsx + QI4_0] = __vsubss4((qs0 >> 4) & 0x0F0F0F0F, 0x08080808); #else x_qs[i*(WARP_SIZE + 1) + threadIdx.x] = qs0; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } const int blocks_per_tile_x_row = WARP_SIZE / QI4_0; @@ -271,11 +271,11 @@ template static __device__ __forceinlin const block_q4_0 * bxi = (const block_q4_0 *) x + kbx0 + i*stride + kbxd; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_df[i*MMQ_MMA_TILE_X_K_Q8_0 + kbxd] = bxi->d; #else x_df[i*(WARP_SIZE/QI4_0) + i/QI4_0 + kbxd] = bxi->d; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } @@ -322,14 +322,14 @@ static __device__ __forceinline__ void vec_dot_q4_0_q8_1_dp4a( template static __device__ __forceinline__ void load_tiles_q4_1( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; half2 * x_dm = (half2 *) (x_qs + 2*WARP_SIZE); #else constexpr tile_x_sizes txs = mmq_get_dp4a_tile_x_sizes(GGML_TYPE_Q4_1, mmq_y); int * x_qs = (int *) x_tile; half2 * x_dm = (half2 *) (x_qs + txs.qs); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kbx = threadIdx.x / QI4_1; const int kqsx = threadIdx.x % QI4_1; @@ -345,12 +345,12 @@ template static __device__ __forceinlin const block_q4_1 * bxi = (const block_q4_1 *) x + kbx0 + i*stride + kbx; const int qs0 = get_int_b4(bxi->qs, kqsx); -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q8_1 + kbx*(2*QI4_1) + kqsx + 0] = (qs0 >> 0) & 0x0F0F0F0F; x_qs[i*MMQ_MMA_TILE_X_K_Q8_1 + kbx*(2*QI4_1) + kqsx + QI4_1] = (qs0 >> 4) & 0x0F0F0F0F; #else x_qs[i*(WARP_SIZE + 1) + threadIdx.x] = qs0; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } const int blocks_per_tile_x_row = WARP_SIZE / QI4_1; @@ -366,11 +366,11 @@ template static __device__ __forceinlin const block_q4_1 * bxi = (const block_q4_1 *) x + kbx0 + i*stride + kbxd; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_dm[i*MMQ_MMA_TILE_X_K_Q8_1 + kbxd] = bxi->dm; #else x_dm[i*(WARP_SIZE/QI4_1) + i/QI4_1 + kbxd] = bxi->dm; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } @@ -417,14 +417,14 @@ static __device__ __forceinline__ void vec_dot_q4_1_q8_1_dp4a( template static __device__ __forceinline__ void load_tiles_q5_0( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + WARP_SIZE*2); #else constexpr tile_x_sizes txs = mmq_get_dp4a_tile_x_sizes(GGML_TYPE_Q5_0, mmq_y); int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + txs.qs); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kbx = threadIdx.x / QI5_0; const int kqsx = threadIdx.x % QI5_0; @@ -456,13 +456,13 @@ template static __device__ __forceinlin qs1 |= (qh << 9) & 0x10000000; // 19 -> 28 qs1 = __vsubss4(qs1, 0x10101010); // subtract 16 -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + kbx*(2*QI5_0) + kqsx + 0] = qs0; x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + kbx*(2*QI5_0) + kqsx + QI5_0] = qs1; #else x_qs[i*(2*WARP_SIZE + 1) + kbx*(2*QI5_0) + kqsx + 0] = qs0; x_qs[i*(2*WARP_SIZE + 1) + kbx*(2*QI5_0) + kqsx + QI5_0] = qs1; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } const int blocks_per_tile_x_row = WARP_SIZE / QI5_0; @@ -478,25 +478,25 @@ template static __device__ __forceinlin const block_q5_0 * bxi = (const block_q5_0 *) x + kbx0 + i*stride + kbxd; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_df[i*MMQ_MMA_TILE_X_K_Q8_0 + kbxd] = bxi->d; #else x_df[i*(WARP_SIZE/QI5_0) + i/QI5_0 + kbxd] = bxi->d; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } template static __device__ __forceinline__ void load_tiles_q5_1( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; half2 * x_dm = (half2 *) (x_qs + 2*WARP_SIZE); #else constexpr tile_x_sizes txs = mmq_get_dp4a_tile_x_sizes(GGML_TYPE_Q5_1, mmq_y); int * x_qs = (int *) x_tile; half2 * x_dm = (half2 *) (x_qs + txs.qs); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kbx = threadIdx.x / QI5_1; const int kqsx = threadIdx.x % QI5_1; @@ -526,13 +526,13 @@ template static __device__ __forceinlin qs1 |= (qh << 2) & 0x00100000; // 18 -> 20 qs1 |= (qh << 9) & 0x10000000; // 19 -> 28 -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q8_1 + kbx*(2*QI5_1) + kqsx + 0] = qs0; x_qs[i*MMQ_MMA_TILE_X_K_Q8_1 + kbx*(2*QI5_1) + kqsx + QI5_1] = qs1; #else x_qs[i*(2*WARP_SIZE + 1) + kbx*(2*QI5_1) + kqsx + 0] = qs0; x_qs[i*(2*WARP_SIZE + 1) + kbx*(2*QI5_1) + kqsx + QI5_1] = qs1; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } const int blocks_per_tile_x_row = WARP_SIZE / QI5_1; @@ -548,25 +548,25 @@ template static __device__ __forceinlin const block_q5_1 * bxi = (const block_q5_1 *) x + kbx0 + i*stride + kbxd; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_dm[i*MMQ_MMA_TILE_X_K_Q8_1 + kbxd] = bxi->dm; #else x_dm[i*(WARP_SIZE/QI5_1) + i/QI5_1 + kbxd] = bxi->dm; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } template static __device__ __forceinline__ void load_tiles_q8_0( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; float * x_df = (float *) (x_tile + 2*WARP_SIZE); #else constexpr tile_x_sizes txs = mmq_get_dp4a_tile_x_sizes(GGML_TYPE_Q8_0, mmq_y); int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + txs.qs); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kbx = threadIdx.x / QI8_0; const int kqsx = threadIdx.x % QI8_0; @@ -581,13 +581,13 @@ template static __device__ __forceinlin const block_q8_0 * bxi = (const block_q8_0 *) x + kbx0 + i*stride + kbx; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + 0 + threadIdx.x] = get_int_b2(bxi[0].qs, kqsx); x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + WARP_SIZE + threadIdx.x] = get_int_b2(bxi[WARP_SIZE/QI8_0].qs, kqsx); #else x_qs[i*(2*WARP_SIZE + 1) + 0 + threadIdx.x] = get_int_b2(bxi[0].qs, kqsx); x_qs[i*(2*WARP_SIZE + 1) + WARP_SIZE + threadIdx.x] = get_int_b2(bxi[WARP_SIZE/QI8_0].qs, kqsx); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } const int blocks_per_tile_x_row = 2*WARP_SIZE / QI8_0; @@ -603,11 +603,11 @@ template static __device__ __forceinlin const block_q8_0 * bxi = (const block_q8_0 *) x + kbx0 + i*stride + kbxd; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_df[i*MMQ_MMA_TILE_X_K_Q8_0 + kbxd] = bxi->d; #else x_df[i*(2*WARP_SIZE/QI8_0) + i/(QI8_0/2) + kbxd] = bxi->d; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } @@ -645,9 +645,9 @@ template static __device__ __forceinline__ void vec_dot_q8_0_q8_1_mma( const int * __restrict__ x, const int * __restrict__ y, float * __restrict__ sum, const int & k00) { - typedef mma_int_A_I16K8 mma_A; - typedef mma_int_B_J8K8 mma_B; - typedef mma_int_C_I16J8 mma_C; + typedef mma_A_I16K8 mma_A; + typedef mma_B_J8K8 mma_B; + typedef mma_C_I16J8 mma_C; constexpr int granularity = mmq_get_granularity_device(mmq_x); constexpr int rows_per_warp = 2 * granularity; @@ -672,7 +672,7 @@ static __device__ __forceinline__ void vec_dot_q8_0_q8_1_mma( for (int k01 = 0; k01 < WARP_SIZE; k01 += QI8_0) { const int k0 = k00 + k01; - A[n][k01/QI8_0].load(x_qs + (i0 + n*mma_A::I)*MMQ_MMA_TILE_X_K_Q8_0 + k0, MMQ_MMA_TILE_X_K_Q8_0); + A[n][k01/QI8_0].load_ldmatrix(x_qs + (i0 + n*mma_A::I)*MMQ_MMA_TILE_X_K_Q8_0 + k0, MMQ_MMA_TILE_X_K_Q8_0); } #pragma unroll @@ -695,7 +695,7 @@ static __device__ __forceinline__ void vec_dot_q8_0_q8_1_mma( mma_B B; float dB[mma_C::ne/2]; - B.load(y_qs + j0*MMQ_TILE_Y_K + k01, MMQ_TILE_Y_K); + B.load_generic(y_qs + j0*MMQ_TILE_Y_K + k01, MMQ_TILE_Y_K); // faster than load_ldmatrix #pragma unroll for (int l = 0; l < mma_C::ne/2; ++l) { @@ -711,7 +711,7 @@ static __device__ __forceinline__ void vec_dot_q8_0_q8_1_mma( #pragma unroll for (int n = 0; n < ntx; ++n) { mma_C C; - C.mma_K8(A[n][k01/QI8_0], B); + C.mma(A[n][k01/QI8_0], B); #pragma unroll for (int l = 0; l < mma_C::ne; ++l) { @@ -756,9 +756,9 @@ template static __device__ __forceinline__ void vec_dot_q8_1_q8_1_mma( const int * __restrict__ x, const int * __restrict__ y, float * __restrict__ sum, const int & k00) { - typedef mma_int_A_I16K8 mma_A; - typedef mma_int_B_J8K8 mma_B; - typedef mma_int_C_I16J8 mma_C; + typedef mma_A_I16K8 mma_A; + typedef mma_B_J8K8 mma_B; + typedef mma_C_I16J8 mma_C; constexpr int granularity = mmq_get_granularity_device(mmq_x); constexpr int rows_per_warp = 2 * granularity; @@ -782,7 +782,7 @@ static __device__ __forceinline__ void vec_dot_q8_1_q8_1_mma( for (int k01 = 0; k01 < WARP_SIZE; k01 += QI8_1) { const int k0 = k00 + k01; - A[n][k01/QI8_1].load(x_qs + (i0 + n*mma_A::I)*MMQ_MMA_TILE_X_K_Q8_1 + k0, MMQ_MMA_TILE_X_K_Q8_1); + A[n][k01/QI8_1].load_ldmatrix(x_qs + (i0 + n*mma_A::I)*MMQ_MMA_TILE_X_K_Q8_1 + k0, MMQ_MMA_TILE_X_K_Q8_1); } #pragma unroll @@ -805,7 +805,7 @@ static __device__ __forceinline__ void vec_dot_q8_1_q8_1_mma( mma_B B; float2 dsB[mma_C::ne/2]; - B.load(y_qs + j0*MMQ_TILE_Y_K + k01, MMQ_TILE_Y_K); + B.load_generic(y_qs + j0*MMQ_TILE_Y_K + k01, MMQ_TILE_Y_K); // faster than load_ldmatrix #pragma unroll for (int l = 0; l < mma_C::ne/2; ++l) { @@ -817,7 +817,7 @@ static __device__ __forceinline__ void vec_dot_q8_1_q8_1_mma( #pragma unroll for (int n = 0; n < ntx; ++n) { mma_C C; - C.mma_K8(A[n][k01/QI8_1], B); + C.mma(A[n][k01/QI8_1], B); #pragma unroll for (int l = 0; l < mma_C::ne; ++l) { @@ -864,12 +864,12 @@ static __device__ __forceinline__ void vec_dot_q8_0_16_q8_1_dp4a( template static __device__ __forceinline__ void vec_dot_q8_0_16_q8_1_mma( const int * __restrict__ x, const int * __restrict__ y, float * __restrict__ sum, const int & k00) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE - typedef mma_int_A_I16K4 mma_A; - typedef mma_int_A_I16K8 mma_A_K8; - typedef mma_int_B_J8K4 mma_B; - typedef mma_int_C_I16J8 mma_C; + typedef mma_A_I16K4 mma_A; + typedef mma_A_I16K8 mma_A_K8; + typedef mma_B_J8K4 mma_B; + typedef mma_C_I16J8 mma_C; constexpr int granularity = mmq_get_granularity_device(mmq_x); constexpr int rows_per_warp = 2 * granularity; @@ -893,7 +893,7 @@ static __device__ __forceinline__ void vec_dot_q8_0_16_q8_1_mma( for (int k01 = 0; k01 < WARP_SIZE; k01 += 8) { const int k0 = k00 + k01; - ((mma_A_K8 *) A[n])[k01/8].load(x_qs + (i0 + n*mma_A::I)*MMQ_MMA_TILE_X_K_Q3_K + k0, MMQ_MMA_TILE_X_K_Q3_K); + ((mma_A_K8 *) A[n])[k01/8].load_ldmatrix(x_qs + (i0 + n*mma_A::I)*MMQ_MMA_TILE_X_K_Q3_K + k0, MMQ_MMA_TILE_X_K_Q3_K); } #pragma unroll @@ -916,8 +916,9 @@ static __device__ __forceinline__ void vec_dot_q8_0_16_q8_1_mma( mma_B B[2]; float dB[mma_C::ne/2]; - B[0].load(y_qs + j0*MMQ_TILE_Y_K + (k01 + 0), MMQ_TILE_Y_K); - B[1].load(y_qs + j0*MMQ_TILE_Y_K + (k01 + mma_B::K), MMQ_TILE_Y_K); + // Here load_generic is faster than load_ldmatrix. + B[0].load_generic(y_qs + j0*MMQ_TILE_Y_K + (k01 + 0), MMQ_TILE_Y_K); + B[1].load_generic(y_qs + j0*MMQ_TILE_Y_K + (k01 + mma_B::K), MMQ_TILE_Y_K); #pragma unroll for (int l = 0; l < mma_C::ne/2; ++l) { @@ -929,8 +930,8 @@ static __device__ __forceinline__ void vec_dot_q8_0_16_q8_1_mma( #pragma unroll for (int n = 0; n < ntx; ++n) { mma_C C[2]; - C[0].mma_K4(A[n][k01/4 + 0], B[0]); - C[1].mma_K4(A[n][k01/4 + 1], B[1]); + C[0].mma(A[n][k01/4 + 0], B[0]); + C[1].mma(A[n][k01/4 + 1], B[1]); #pragma unroll for (int l = 0; l < mma_C::ne; ++l) { @@ -942,20 +943,20 @@ static __device__ __forceinline__ void vec_dot_q8_0_16_q8_1_mma( #else GGML_UNUSED(x); GGML_UNUSED(y); GGML_UNUSED(sum); NO_DEVICE_CODE; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } template static __device__ __forceinline__ void load_tiles_q2_K( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; half2 * x_dm = (half2 *) (x_qs + 2*WARP_SIZE); #else constexpr tile_x_sizes txs = mmq_get_dp4a_tile_x_sizes(GGML_TYPE_Q2_K, mmq_y); int * x_qs = (int *) x_tile; half2 * x_dm = (half2 *) (x_qs + txs.qs); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kqsx = threadIdx.x % QI2_K; @@ -977,11 +978,11 @@ template static __device__ __forceinlin const int x_qs_k = (x_ql_0 >> (2*l)) & 0x03030303; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q2_K + k] = x_qs_k; #else x_qs[i*(2*WARP_SIZE + 1) + k] = x_qs_k; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } const int sc_m = bxi->scales[kqsx]; @@ -992,11 +993,11 @@ template static __device__ __forceinlin const half2 x_dm_ik = make_half2(bxi_dmf.x*(sc_m & 0x0F), bxi_dmf.y*(sc_m >> 4)); #endif // FAST_FP16_AVAILABLE -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_dm[i*MMQ_MMA_TILE_X_K_Q2_K + kqsx] = x_dm_ik; #else x_dm[i*(WARP_SIZE + 1) + kqsx] = x_dm_ik; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } @@ -1051,12 +1052,12 @@ static __device__ __forceinline__ void vec_dot_q2_K_q8_1_dp4a( template static __device__ __forceinline__ void vec_dot_q2_K_q8_1_mma( const int * __restrict__ x, const int * __restrict__ y, float * __restrict__ sum, const int & k00) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE - typedef mma_int_A_I16K4 mma_A; - typedef mma_int_A_I16K8 mma_A_K8; - typedef mma_int_B_J8K4 mma_B; - typedef mma_int_C_I16J8 mma_C; + typedef mma_A_I16K4 mma_A; + typedef mma_A_I16K8 mma_A_K8; + typedef mma_B_J8K4 mma_B; + typedef mma_C_I16J8 mma_C; constexpr int granularity = mmq_get_granularity_device(mmq_x); constexpr int rows_per_warp = 2 * granularity; @@ -1081,7 +1082,7 @@ static __device__ __forceinline__ void vec_dot_q2_K_q8_1_mma( for (int k01 = 0; k01 < WARP_SIZE; k01 += QI8_1) { const int k0 = k00 + k01; - ((mma_A_K8 *) A[n])[k01/QI8_1].load(x_qs + (i0 + n*mma_A::I)*MMQ_MMA_TILE_X_K_Q2_K + k0, MMQ_MMA_TILE_X_K_Q2_K); + ((mma_A_K8 *) A[n])[k01/QI8_1].load_ldmatrix(x_qs + (i0 + n*mma_A::I)*MMQ_MMA_TILE_X_K_Q2_K + k0, MMQ_MMA_TILE_X_K_Q2_K); } } @@ -1118,24 +1119,25 @@ static __device__ __forceinline__ void vec_dot_q2_K_q8_1_mma( for (int k01 = 0; k01 < WARP_SIZE; k01 += QI8_1) { mma_B B[2]; - B[0].load(y_qs + j0*MMQ_TILE_Y_K + (k01 + 0), MMQ_TILE_Y_K); - B[1].load(y_qs + j0*MMQ_TILE_Y_K + (k01 + mma_B::K), MMQ_TILE_Y_K); + // Here load_generic is faster than load_ldmatrix. + B[0].load_generic(y_qs + j0*MMQ_TILE_Y_K + (k01 + 0), MMQ_TILE_Y_K); + B[1].load_generic(y_qs + j0*MMQ_TILE_Y_K + (k01 + mma_B::K), MMQ_TILE_Y_K); mma_C Cm[2]; if (k01 >= WARP_SIZE * 3/4) { mma_A A1; A1.x[0] = 0x01010101; A1.x[1] = 0x01010101; - Cm[0].mma_K4(A1, B[0]); - Cm[1].mma_K4(A1, B[1]); + Cm[0].mma(A1, B[0]); + Cm[1].mma(A1, B[1]); } #pragma unroll for (int n = 0; n < ntx; ++n) { mma_C Cd[2]; - Cd[0].mma_K4(A[n][k01/4 + 0], B[0]); - Cd[1].mma_K4(A[n][k01/4 + 1], B[1]); + Cd[0].mma(A[n][k01/4 + 0], B[0]); + Cd[1].mma(A[n][k01/4 + 1], B[1]); #pragma unroll for (int l = 0; l < mma_C::ne; ++l) { @@ -1172,13 +1174,13 @@ static __device__ __forceinline__ void vec_dot_q2_K_q8_1_mma( #else GGML_UNUSED(x); GGML_UNUSED(y); GGML_UNUSED(sum); NO_DEVICE_CODE; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } template static __device__ __forceinline__ void load_tiles_q3_K( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + WARP_SIZE*2); #else @@ -1186,7 +1188,7 @@ template static __device__ __forceinlin int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + txs.qs); int * x_sc = (int *) (x_df + txs.dm); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kqsx = threadIdx.x % QI3_K; @@ -1212,11 +1214,11 @@ template static __device__ __forceinlin const int x_qs_k = __vsubss4(x_ql_k | x_qh_k, 0x04040404); -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q3_K + k] = x_qs_k; #else x_qs[i*(2*WARP_SIZE + 1) + k] = x_qs_k; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } @@ -1242,7 +1244,7 @@ template static __device__ __forceinlin const int sc = __vsubss4(sc_low | sc_high, 0x20202020); -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE const int8_t * sc8 = (const int8_t *) ≻ const float d = bxi->d; @@ -1252,10 +1254,10 @@ template static __device__ __forceinlin } #else x_sc[i*(WARP_SIZE/8) + i/8 + threadIdx.x % (WARP_SIZE/8)] = sc; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } -#ifndef INT8_MMA_AVAILABLE +#ifndef NEW_MMA_AVAILABLE #pragma unroll for (int i0 = 0; i0 < mmq_y; i0 += nwarps*WARP_SIZE) { int i = (i0 + threadIdx.y*WARP_SIZE + threadIdx.x) % mmq_y; @@ -1268,7 +1270,7 @@ template static __device__ __forceinlin x_df[i] = bxi->d; } -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } template @@ -1317,7 +1319,7 @@ static __device__ __forceinline__ int unpack_scales_q45_K(const int * scales, co template static __device__ __forceinline__ void load_tiles_q4_K( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; half2 * x_dm = (half2 *) (x_qs + 2*WARP_SIZE); #else @@ -1325,7 +1327,7 @@ template static __device__ __forceinlin int * x_qs = (int *) x_tile; half2 * x_dm = (half2 *) (x_qs + txs.qs); int * x_sc = (int *) (x_dm + txs.dm); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE #pragma unroll for (int i0 = 0; i0 < mmq_y; i0 += nwarps) { @@ -1338,15 +1340,15 @@ template static __device__ __forceinlin const block_q4_K * bxi = (const block_q4_K *) x + kbx0 + i*stride; const int qs0 = get_int_b4(bxi->qs, threadIdx.x); -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q8_1 + 16*(threadIdx.x/8) + threadIdx.x % 8 + 0] = (qs0 >> 0) & 0x0F0F0F0F; x_qs[i*MMQ_MMA_TILE_X_K_Q8_1 + 16*(threadIdx.x/8) + threadIdx.x % 8 + 8] = (qs0 >> 4) & 0x0F0F0F0F; #else x_qs[i*(WARP_SIZE + 1) + threadIdx.x] = qs0; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE #pragma unroll for (int i0 = 0; i0 < mmq_y; i0 += nwarps*16) { @@ -1407,7 +1409,7 @@ template static __device__ __forceinlin x_sc[i*(WARP_SIZE/8) + i/8 + ksc] = scales8; } -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } template @@ -1446,7 +1448,7 @@ static __device__ __forceinline__ void vec_dot_q4_K_q8_1_dp4a( template static __device__ __forceinline__ void load_tiles_q5_K( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; half2 * x_dm = (half2 *) (x_qs + WARP_SIZE*2); #else @@ -1454,7 +1456,7 @@ template static __device__ __forceinlin int * x_qs = (int *) x_tile; half2 * x_dm = (half2 *) (x_qs + txs.qs); int * x_sc = (int *) (x_dm + txs.dm); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE #pragma unroll for (int i0 = 0; i0 < mmq_y; i0 += nwarps) { @@ -1478,16 +1480,16 @@ template static __device__ __forceinlin const int kq0 = ky - ky % (QI5_K/2) + threadIdx.x % (QI5_K/4) + 0; const int kq1 = ky - ky % (QI5_K/2) + threadIdx.x % (QI5_K/4) + QI5_K/4; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q8_1 + kq0] = ql0 | qh0; x_qs[i*MMQ_MMA_TILE_X_K_Q8_1 + kq1] = ql1 | qh1; #else x_qs[i*(2*WARP_SIZE + 1) + kq0] = ql0 | qh0; x_qs[i*(2*WARP_SIZE + 1) + kq1] = ql1 | qh1; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE #pragma unroll for (int i0 = 0; i0 < mmq_y; i0 += nwarps*16) { @@ -1548,7 +1550,7 @@ template static __device__ __forceinlin x_sc[i*(WARP_SIZE/8) + i/8 + ksc] = scales8; } -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } template @@ -1587,7 +1589,7 @@ static __device__ __forceinline__ void vec_dot_q5_K_q8_1_dp4a( template static __device__ __forceinline__ void load_tiles_q6_K( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + WARP_SIZE*2); int * x_sc = (int *) (x_df + WARP_SIZE/QI6_K); @@ -1596,7 +1598,7 @@ template static __device__ __forceinlin int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + txs.qs); int * x_sc = (int *) (x_df + txs.dm); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE #pragma unroll for (int i0 = 0; i0 < mmq_y; i0 += nwarps) { @@ -1619,13 +1621,13 @@ template static __device__ __forceinlin const int kq0 = 2*threadIdx.x - threadIdx.x % (QI6_K/2) + 0; const int kq1 = 2*threadIdx.x - threadIdx.x % (QI6_K/2) + QI6_K/2; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q6_K + kq0] = __vsubss4(ql0 | qh0, 0x20202020); x_qs[i*MMQ_MMA_TILE_X_K_Q6_K + kq1] = __vsubss4(ql1 | qh1, 0x20202020); #else x_qs[i*(2*WARP_SIZE + 1) + kq0] = __vsubss4(ql0 | qh0, 0x20202020); x_qs[i*(2*WARP_SIZE + 1) + kq1] = __vsubss4(ql1 | qh1, 0x20202020); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } const int blocks_per_tile_x_row = WARP_SIZE / QI6_K; // == 1 if QK_K == 256 @@ -1641,11 +1643,11 @@ template static __device__ __forceinlin const block_q6_K * bxi = (const block_q6_K *) x + kbx0 + i*stride + kbxd; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_df[i*MMQ_MMA_TILE_X_K_Q6_K + kbxd] = bxi->d; #else x_df[i*(WARP_SIZE/QI6_K) + i/QI6_K + kbxd] = bxi->d; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } #pragma unroll @@ -1658,11 +1660,11 @@ template static __device__ __forceinlin const block_q6_K * bxi = (const block_q6_K *) x + kbx0 + i*stride + (threadIdx.x % (WARP_SIZE/8)) / 4; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_sc[i*MMQ_MMA_TILE_X_K_Q6_K + threadIdx.x % (WARP_SIZE/8)] = get_int_b2(bxi->scales, threadIdx.x % (QI6_K/8)); #else x_sc[i*(WARP_SIZE/8) + i/8 + threadIdx.x % (WARP_SIZE/8)] = get_int_b2(bxi->scales, threadIdx.x % (QI6_K/8)); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } @@ -1702,11 +1704,11 @@ static __device__ __forceinline__ void vec_dot_q6_K_q8_1_dp4a( template static __device__ __forceinline__ void vec_dot_q6_K_q8_1_mma( const int * __restrict__ x, const int * __restrict__ y, float * __restrict__ sum, const int & k00) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE - typedef mma_int_A_I16K4 mma_A; - typedef mma_int_B_J8K4 mma_B; - typedef mma_int_C_I16J8 mma_C; + typedef mma_A_I16K4 mma_A; + typedef mma_B_J8K4 mma_B; + typedef mma_C_I16J8 mma_C; constexpr int granularity = mmq_get_granularity_device(mmq_x); constexpr int rows_per_warp = 2 * granularity; @@ -1732,8 +1734,8 @@ static __device__ __forceinline__ void vec_dot_q6_K_q8_1_mma( for (int k01 = 0; k01 < WARP_SIZE; k01 += 8) { const int k0 = k00 + k01; - A[n][k01/4 + 0].load(x_qs + (i0 + n*mma_A::I)*MMQ_MMA_TILE_X_K_Q6_K + (k0 + 0), MMQ_MMA_TILE_X_K_Q6_K); - A[n][k01/4 + 1].load(x_qs + (i0 + n*mma_A::I)*MMQ_MMA_TILE_X_K_Q6_K + (k0 + mma_A::K), MMQ_MMA_TILE_X_K_Q6_K); + A[n][k01/4 + 0].load_ldmatrix(x_qs + (i0 + n*mma_A::I)*MMQ_MMA_TILE_X_K_Q6_K + (k0 + 0), MMQ_MMA_TILE_X_K_Q6_K); + A[n][k01/4 + 1].load_ldmatrix(x_qs + (i0 + n*mma_A::I)*MMQ_MMA_TILE_X_K_Q6_K + (k0 + mma_A::K), MMQ_MMA_TILE_X_K_Q6_K); } #pragma unroll @@ -1771,8 +1773,9 @@ static __device__ __forceinline__ void vec_dot_q6_K_q8_1_mma( mma_B B[2]; float dB[mma_C::ne/2]; - B[0].load(y_qs + j0*MMQ_TILE_Y_K + 0 + k01, MMQ_TILE_Y_K); - B[1].load(y_qs + j0*MMQ_TILE_Y_K + mma_B::K + k01, MMQ_TILE_Y_K); + // Here load_generic is faster than load_ldmatrix. + B[0].load_generic(y_qs + j0*MMQ_TILE_Y_K + 0 + k01, MMQ_TILE_Y_K); + B[1].load_generic(y_qs + j0*MMQ_TILE_Y_K + mma_B::K + k01, MMQ_TILE_Y_K); #pragma unroll for (int l = 0; l < mma_C::ne/2; ++l) { @@ -1784,8 +1787,8 @@ static __device__ __forceinline__ void vec_dot_q6_K_q8_1_mma( #pragma unroll for (int n = 0; n < ntx; ++n) { mma_C C[2]; - C[0].mma_K4(A[n][k01/4 + 0], B[0]); - C[1].mma_K4(A[n][k01/4 + 1], B[1]); + C[0].mma(A[n][k01/4 + 0], B[0]); + C[1].mma(A[n][k01/4 + 1], B[1]); #pragma unroll for (int l = 0; l < mma_C::ne; ++l) { @@ -1805,20 +1808,20 @@ static __device__ __forceinline__ void vec_dot_q6_K_q8_1_mma( #else GGML_UNUSED(x); GGML_UNUSED(y); GGML_UNUSED(sum); NO_DEVICE_CODE; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } template static __device__ __forceinline__ void load_tiles_iq4_nl( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + WARP_SIZE*2); #else constexpr tile_x_sizes txs = mmq_get_dp4a_tile_x_sizes(GGML_TYPE_IQ4_NL, mmq_y); int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + txs.qs); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kbx = threadIdx.x / QI4_NL; const int kqsx = threadIdx.x % QI4_NL; @@ -1836,13 +1839,13 @@ template static __device__ __forceinlin const int aux_q4 = get_int_b2(bxi->qs, kqsx); const int2 v = get_int_from_table_16(aux_q4); const int k0 = 8 * (threadIdx.x / 4) + threadIdx.x % 4; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + k0 + 0] = v.x; x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + k0 + 4] = v.y; #else x_qs[i*(2*WARP_SIZE + 1) + k0 + 0] = v.x; x_qs[i*(2*WARP_SIZE + 1) + k0 + 4] = v.y; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } const int blocks_per_tile_x_row = WARP_SIZE / QI4_NL; @@ -1858,25 +1861,25 @@ template static __device__ __forceinlin const block_iq4_nl * bxi = (const block_iq4_nl *) x + kbx0 + i*stride + kbxd; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_df[i*MMQ_MMA_TILE_X_K_Q8_0 + kbxd] = __half2float(bxi->d); #else x_df[i*(WARP_SIZE/4) + i/4 + kbxd] = __half2float(bxi->d); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } template static __device__ __forceinline__ void load_tiles_iq2_xxs( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + WARP_SIZE*2); #else constexpr tile_x_sizes txs = mmq_get_dp4a_tile_x_sizes(GGML_TYPE_IQ2_XXS, mmq_y); int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + txs.qs); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kqsx = threadIdx.x % (QI2_XXS/2); @@ -1905,36 +1908,36 @@ template static __device__ __forceinlin const int signs1 = __vcmpne4(((signs_packed & 0x30) << 3) | ((signs_packed & 0xC0) << 17), 0x00000000); const int grid1 = __vsub4(grid_pos[1] ^ signs1, signs1); -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + 8*kqsx + (2*l + 0)] = grid0; x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + 8*kqsx + (2*l + 1)] = grid1; #else x_qs[i*(2*WARP_SIZE + 1) + 8*kqsx + (2*l + 0)] = grid0; x_qs[i*(2*WARP_SIZE + 1) + 8*kqsx + (2*l + 1)] = grid1; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } const int ls = aux32 >> 28; const float d = bxi->d; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_df[i*MMQ_MMA_TILE_X_K_Q8_0 + kqsx] = (ls*d + d/2)/4; #else x_df[i*(WARP_SIZE/4) + i/4 + kqsx] = (ls*d + d/2)/4; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } template static __device__ __forceinline__ void load_tiles_iq2_xs( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + WARP_SIZE*2); #else constexpr tile_x_sizes txs = MMQ_DP4A_TXS_Q8_0_16; int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + txs.qs); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kqsx = threadIdx.x % (QI2_XS/2); @@ -1959,38 +1962,38 @@ template static __device__ __forceinlin const int grid_l = __vsub4(grid_pos[0] ^ signs[0], signs[0]); const int grid_h = __vsub4(grid_pos[1] ^ signs[1], signs[1]); -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q3_K + 8*kqsx + (2*l + 0)] = grid_l; x_qs[i*MMQ_MMA_TILE_X_K_Q3_K + 8*kqsx + (2*l + 1)] = grid_h; #else x_qs[i*(2*WARP_SIZE + 1) + 8*kqsx + (2*l + 0)] = grid_l; x_qs[i*(2*WARP_SIZE + 1) + 8*kqsx + (2*l + 1)] = grid_h; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } const int ls = bxi->scales[kqsx]; const float d = bxi->d; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_df[i*MMQ_MMA_TILE_X_K_Q3_K + 2*kqsx+0] = ((ls & 0x0F)*d + d/2)/4; x_df[i*MMQ_MMA_TILE_X_K_Q3_K + 2*kqsx+1] = ((ls >> 4)*d + d/2)/4; #else x_df[i*(2*WARP_SIZE*2/QI8_0) + i/(QI8_0/4) + 2*kqsx+0] = ((ls & 0x0F)*d + d/2)/4; x_df[i*(2*WARP_SIZE*2/QI8_0) + i/(QI8_0/4) + 2*kqsx+1] = ((ls >> 4)*d + d/2)/4; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } template static __device__ __forceinline__ void load_tiles_iq2_s( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + WARP_SIZE*2); #else constexpr tile_x_sizes txs = mmq_get_dp4a_tile_x_sizes(GGML_TYPE_IQ2_S, mmq_y); int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + txs.qs); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kqsx = threadIdx.x % (QI2_S/2); @@ -2022,38 +2025,38 @@ template static __device__ __forceinlin const int grid_l = __vsub4(grid_pos[0] ^ signs0, signs0); const int grid_h = __vsub4(grid_pos[1] ^ signs1, signs1); -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q3_K + 8*kqsx + (2*l + 0)] = grid_l; x_qs[i*MMQ_MMA_TILE_X_K_Q3_K + 8*kqsx + (2*l + 1)] = grid_h; #else x_qs[i*(2*WARP_SIZE + 1) + 8*kqsx + (2*l + 0)] = grid_l; x_qs[i*(2*WARP_SIZE + 1) + 8*kqsx + (2*l + 1)] = grid_h; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } const int ls = bxi->scales[kqsx]; const float d = bxi->d; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_df[i*MMQ_MMA_TILE_X_K_Q3_K + 2*kqsx+0] = ((ls & 0x0F)*d + d/2)/4; x_df[i*MMQ_MMA_TILE_X_K_Q3_K + 2*kqsx+1] = ((ls >> 4)*d + d/2)/4; #else x_df[i*(2*WARP_SIZE*2/QI8_0) + i/(QI8_0/4) + 2*kqsx+0] = ((ls & 0x0F)*d + d/2)/4; x_df[i*(2*WARP_SIZE*2/QI8_0) + i/(QI8_0/4) + 2*kqsx+1] = ((ls >> 4)*d + d/2)/4; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } template static __device__ __forceinline__ void load_tiles_iq3_xxs( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + WARP_SIZE*2); #else constexpr tile_x_sizes txs = mmq_get_dp4a_tile_x_sizes(GGML_TYPE_IQ3_XXS, mmq_y); int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + txs.qs); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kqsx = threadIdx.x % (QI3_XXS/2); @@ -2080,36 +2083,36 @@ template static __device__ __forceinlin const int grid_l = __vsub4(grid_pos.x ^ signs[0], signs[0]); const int grid_h = __vsub4(grid_pos.y ^ signs[1], signs[1]); -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + 8*kqsx + (2*l + 0)] = grid_l; x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + 8*kqsx + (2*l + 1)] = grid_h; #else x_qs[i*(2*WARP_SIZE + 1) + 8*kqsx + (2*l + 0)] = grid_l; x_qs[i*(2*WARP_SIZE + 1) + 8*kqsx + (2*l + 1)] = grid_h; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } const int ls = aux32 >> 28; const float d = bxi->d; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_df[i*MMQ_MMA_TILE_X_K_Q8_0 + kqsx] = (ls*d + d/2)/2; #else x_df[i*(WARP_SIZE/4) + i/4 + kqsx] = (ls*d + d/2)/2; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } template static __device__ __forceinline__ void load_tiles_iq3_s( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + WARP_SIZE*2); #else constexpr tile_x_sizes txs = mmq_get_dp4a_tile_x_sizes(GGML_TYPE_IQ3_S, mmq_y); int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + txs.qs); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kqsx = threadIdx.x % (QI3_S/2); @@ -2143,36 +2146,36 @@ template static __device__ __forceinlin const int grid_l = __vsub4(grid_pos.x ^ signs0, signs0); const int grid_h = __vsub4(grid_pos.y ^ signs1, signs1); -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + 8*kqsx + (2*l+0)] = grid_l; x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + 8*kqsx + (2*l+1)] = grid_h; #else x_qs[i*(2*WARP_SIZE + 1) + 8*kqsx + (2*l+0)] = grid_l; x_qs[i*(2*WARP_SIZE + 1) + 8*kqsx + (2*l+1)] = grid_h; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } const int ls = 1 + 2*((bxi->scales[kqsx/2] >> (((2*kqsx) << 1) & 0x04)) & 0x0F); const float d = bxi->d; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_df[i*MMQ_MMA_TILE_X_K_Q8_0 + kqsx] = ls*d; #else x_df[i*(WARP_SIZE/4) + i/4 + kqsx] = ls*d; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } template static __device__ __forceinline__ void load_tiles_iq1_s( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; half2 * x_ds = (half2 *) (x_qs + WARP_SIZE*2); #else constexpr tile_x_sizes txs = mmq_get_dp4a_tile_x_sizes(GGML_TYPE_IQ3_S, mmq_y); int * x_qs = (int *) x_tile; half2 * x_ds = (half2 *) (x_qs + txs.qs); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kqsx = threadIdx.x % QI1_S; @@ -2198,37 +2201,37 @@ template static __device__ __forceinlin const int grid0 = (grid >> 0) & 0x0F0F0F0F; const int grid1 = (grid >> 4) & 0x0F0F0F0F; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q8_1 + 8*kqsx + (2*l+0)] = grid0; x_qs[i*MMQ_MMA_TILE_X_K_Q8_1 + 8*kqsx + (2*l+1)] = grid1; #else x_qs[i*(2*WARP_SIZE + 1) + 8*kqsx + (2*l+0)] = grid0; x_qs[i*(2*WARP_SIZE + 1) + 8*kqsx + (2*l+1)] = grid1; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } const float d1q = __half2float(bxi->d) * (((qh >> 11) & 0x0E) + 1); const float delta = -1.0f + IQ1S_DELTA - (qh & 0x8000) * (2.0f*IQ1S_DELTA/0x8000); -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_ds[i*MMQ_MMA_TILE_X_K_Q8_1 + kqsx] = make_half2(d1q, d1q*delta); #else x_ds[i*(WARP_SIZE/4) + i/4 + kqsx] = make_half2(d1q, d1q*delta); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } template static __device__ __forceinline__ void load_tiles_iq4_xs( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + WARP_SIZE*2); #else constexpr tile_x_sizes txs = mmq_get_dp4a_tile_x_sizes(GGML_TYPE_IQ4_XS, mmq_y); int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + txs.qs); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kbx = 0; // threadIdx.x / QI4_XS const int kqsx = threadIdx.x; // threadIdx.x % QI4_XS @@ -2246,13 +2249,13 @@ template static __device__ __forceinlin const int aux_q4 = get_int_b4(bxi->qs, kqsx); const int2 v = get_int_from_table_16(aux_q4); const int k0 = 8 * (threadIdx.x / 4) + threadIdx.x % 4; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + k0 + 0] = v.x; x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + k0 + 4] = v.y; #else x_qs[i*(2*WARP_SIZE + 1) + k0 + 0] = v.x; x_qs[i*(2*WARP_SIZE + 1) + k0 + 4] = v.y; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } #pragma unroll @@ -2270,11 +2273,11 @@ template static __device__ __forceinlin const int ls = ((bxi->scales_l[(threadIdx.x % 8)/2] >> (4*(threadIdx.x % 2))) & 0x0F) | (((bxi->scales_h >> (2*(threadIdx.x % 8))) & 0x03) << 4); -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_df[i*MMQ_MMA_TILE_X_K_Q8_0 + threadIdx.x % 8] = d * (ls - 32); #else x_df[i*(WARP_SIZE/4) + i/4 + threadIdx.x % 8] = d * (ls - 32); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } @@ -2307,16 +2310,16 @@ template static __device__ __forceinline__ void mmq_write_back_mma( const float * __restrict__ sum, float * __restrict__ dst, const int & stride, const int & i_max, const int & j_max) { - typedef mma_int_C_I16J8 mma_C; + typedef mma_C_I16J8 mma_C; constexpr int granularity = mmq_get_granularity_device(mmq_x); constexpr int rows_per_warp = 2 * granularity; constexpr int ntx = rows_per_warp/mma_C::I; // Number of x minitiles per warp. const int i0 = (threadIdx.y / ntx) * (ntx*mma_C::I); -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE static_assert(nwarps*mma_C::I == mmq_y, "nwarps*mma_C::I != mmq_y"); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE #pragma unroll for (int j0 = 0; j0 < mmq_x; j0 += ntx*mma_C::J) { @@ -2505,13 +2508,13 @@ static __device__ void mul_mat_q_process_tile( int * tile_y = (int *) data_mul_mat_q; int * tile_x = tile_y + GGML_PAD(mmq_x*(WARP_SIZE + WARP_SIZE/QI8_1), nwarps*WARP_SIZE); -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE constexpr vec_dot_mmq_t vec_dot = mmq_type_traits::vec_dot_mma; constexpr mmq_write_back_t write_back = mmq_write_back_mma; #else constexpr vec_dot_mmq_t vec_dot = mmq_type_traits::vec_dot_dp4a; constexpr mmq_write_back_t write_back = mmq_write_back_dp4a; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE constexpr int blocks_per_iter = MMQ_ITER_K / qk; @@ -2643,7 +2646,7 @@ static __global__ void mul_mat_q( const int jt = kbc / (blocks_per_ne00*nty); const int it = (kbc - jt*(blocks_per_ne00*nty)) / blocks_per_ne00; - constexpr bool fixup = true; // Last index writes it data to fixup buffer to avoid data races with other blocks. + constexpr bool fixup = true; // Last index writes its data to fixup buffer to avoid data races with other blocks. mul_mat_q_process_tile (x, yc, dst, tmp_fixup, ne00, ne01, stride01, ne10, ne11, stride11, ne0, it, jt, kb0_start, kb0_stop); @@ -2749,7 +2752,7 @@ template static int mmq_get_shmem(const int mmq_x, const int mmq_y, const int cc) { const tile_x_sizes txs = mmq_get_dp4a_tile_x_sizes(type, mmq_y); const int mmq_tile_x_k = mmq_get_mma_tile_x_k(type); - const int shmem_x = int8_mma_available(cc) ? mmq_y*mmq_tile_x_k*sizeof(int) : txs.qs*sizeof(int) + txs.dm*sizeof(half2) + txs.sc*sizeof(int); + const int shmem_x = new_mma_available(cc) ? mmq_y*mmq_tile_x_k*sizeof(int) : txs.qs*sizeof(int) + txs.dm*sizeof(half2) + txs.sc*sizeof(int); const int shmem_y = mmq_x*sizeof(block_q8_1_mmq); return shmem_x + GGML_PAD(shmem_y, MMQ_NWARPS*WARP_SIZE*sizeof(int)); } diff --git a/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb16.cu b/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb16.cu new file mode 100644 index 000000000..f09bdeff7 --- /dev/null +++ b/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb16.cu @@ -0,0 +1,10 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-mma-f16.cuh" + +DECL_FATTN_MMA_F16_CASE(64, 16); +DECL_FATTN_MMA_F16_CASE(80, 16); +DECL_FATTN_MMA_F16_CASE(96, 16); +DECL_FATTN_MMA_F16_CASE(112, 16); +DECL_FATTN_MMA_F16_CASE(128, 16); +DECL_FATTN_MMA_F16_CASE(256, 16); diff --git a/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb32.cu b/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb32.cu new file mode 100644 index 000000000..221108873 --- /dev/null +++ b/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb32.cu @@ -0,0 +1,10 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-mma-f16.cuh" + +DECL_FATTN_MMA_F16_CASE(64, 32); +DECL_FATTN_MMA_F16_CASE(80, 32); +DECL_FATTN_MMA_F16_CASE(96, 32); +DECL_FATTN_MMA_F16_CASE(112, 32); +DECL_FATTN_MMA_F16_CASE(128, 32); +DECL_FATTN_MMA_F16_CASE(256, 32); diff --git a/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb64.cu b/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb64.cu new file mode 100644 index 000000000..d24b08575 --- /dev/null +++ b/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb64.cu @@ -0,0 +1,10 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-mma-f16.cuh" + +DECL_FATTN_MMA_F16_CASE(64, 64); +DECL_FATTN_MMA_F16_CASE(80, 64); +DECL_FATTN_MMA_F16_CASE(96, 64); +DECL_FATTN_MMA_F16_CASE(112, 64); +DECL_FATTN_MMA_F16_CASE(128, 64); +DECL_FATTN_MMA_F16_CASE(256, 64); diff --git a/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb8.cu b/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb8.cu new file mode 100644 index 000000000..bdf86c0ea --- /dev/null +++ b/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb8.cu @@ -0,0 +1,10 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-mma-f16.cuh" + +DECL_FATTN_MMA_F16_CASE(64, 8); +DECL_FATTN_MMA_F16_CASE(80, 8); +DECL_FATTN_MMA_F16_CASE(96, 8); +DECL_FATTN_MMA_F16_CASE(112, 8); +DECL_FATTN_MMA_F16_CASE(128, 8); +DECL_FATTN_MMA_F16_CASE(256, 8); diff --git a/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu b/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu deleted file mode 100644 index 2d94e65c2..000000000 --- a/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu +++ /dev/null @@ -1,10 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-wmma-f16.cuh" - -DECL_FATTN_WMMA_F16_CASE(64, 16, float); -DECL_FATTN_WMMA_F16_CASE(80, 16, float); -DECL_FATTN_WMMA_F16_CASE(96, 16, float); -DECL_FATTN_WMMA_F16_CASE(112, 16, float); -DECL_FATTN_WMMA_F16_CASE(128, 16, float); -DECL_FATTN_WMMA_F16_CASE(256, 16, float); diff --git a/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu b/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu deleted file mode 100644 index c3d9df3c4..000000000 --- a/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu +++ /dev/null @@ -1,9 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-wmma-f16.cuh" - -DECL_FATTN_WMMA_F16_CASE(64, 32, float); -DECL_FATTN_WMMA_F16_CASE(80, 32, float); -DECL_FATTN_WMMA_F16_CASE(96, 32, float); -DECL_FATTN_WMMA_F16_CASE(112, 32, float); -DECL_FATTN_WMMA_F16_CASE(128, 32, float); diff --git a/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu b/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu deleted file mode 100644 index bb680e401..000000000 --- a/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu +++ /dev/null @@ -1,10 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-wmma-f16.cuh" - -DECL_FATTN_WMMA_F16_CASE(64, 16, half); -DECL_FATTN_WMMA_F16_CASE(80, 16, half); -DECL_FATTN_WMMA_F16_CASE(96, 16, half); -DECL_FATTN_WMMA_F16_CASE(112, 16, half); -DECL_FATTN_WMMA_F16_CASE(128, 16, half); -DECL_FATTN_WMMA_F16_CASE(256, 16, half); diff --git a/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu b/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu deleted file mode 100644 index 073f71b1f..000000000 --- a/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu +++ /dev/null @@ -1,10 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-wmma-f16.cuh" - -DECL_FATTN_WMMA_F16_CASE(64, 32, half); -DECL_FATTN_WMMA_F16_CASE(80, 32, half); -DECL_FATTN_WMMA_F16_CASE(96, 32, half); -DECL_FATTN_WMMA_F16_CASE(112, 32, half); -DECL_FATTN_WMMA_F16_CASE(128, 32, half); -DECL_FATTN_WMMA_F16_CASE(256, 32, half); diff --git a/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu b/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu deleted file mode 100644 index d30710c5f..000000000 --- a/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu +++ /dev/null @@ -1,8 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-wmma-f16.cuh" - -DECL_FATTN_WMMA_F16_CASE(64, 8, half); -DECL_FATTN_WMMA_F16_CASE(96, 8, half); -DECL_FATTN_WMMA_F16_CASE(128, 8, half); -DECL_FATTN_WMMA_F16_CASE(256, 8, half); diff --git a/ggml/src/ggml-cuda/template-instances/generate_cu_files.py b/ggml/src/ggml-cuda/template-instances/generate_cu_files.py index d7874e6ea..a2628f16e 100755 --- a/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +++ b/ggml/src/ggml-cuda/template-instances/generate_cu_files.py @@ -12,13 +12,13 @@ SOURCE_FATTN_VEC = """// This file has been autogenerated by generate_cu_files.p DECL_FATTN_VEC_F{vkq_size}_CASE({head_size}, {type_k}, {type_v}); """ -SOURCE_FATTN_WMMA_START = """// This file has been autogenerated by generate_cu_files.py, do not edit manually. +SOURCE_FATTN_MMA_START = """// This file has been autogenerated by generate_cu_files.py, do not edit manually. -#include "../fattn-wmma-f16.cuh" +#include "../fattn-mma-f16.cuh" """ -SOURCE_FATTN_WMMA_CASE = "DECL_FATTN_WMMA_F16_CASE({head_size}, {cols_per_block}, {kq_acc_t});\n" +SOURCE_FATTN_MMA_CASE = "DECL_FATTN_MMA_F16_CASE({head_size}, {cols_per_block});\n" TYPES_MMQ = [ "GGML_TYPE_Q4_0", "GGML_TYPE_Q4_1", "GGML_TYPE_Q5_0", "GGML_TYPE_Q5_1", "GGML_TYPE_Q8_0", @@ -57,20 +57,12 @@ for vkq_size in [16, 32]: with open(f"fattn-vec-f{vkq_size}-instance-hs{head_size}-{get_short_name(type_k)}-{get_short_name(type_v)}.cu", "w") as f: f.write(SOURCE_FATTN_VEC.format(vkq_size=vkq_size, head_size=head_size, type_k=type_k, type_v=type_v)) -for kq_acc_t in ["half", "float"]: - for cols_per_block in [8, 16, 32]: - if kq_acc_t == "float" and cols_per_block == 8: - continue +for cols_per_block in [8, 16, 32, 64]: + with open(f"fattn-mma-f16-instance-cpb{cols_per_block}.cu", "w") as f: + f.write(SOURCE_FATTN_MMA_START) - with open(f"fattn-wmma-f16-instance-kq{kq_acc_t}-cpb{cols_per_block}.cu", "w") as f: - f.write(SOURCE_FATTN_WMMA_START) - - for head_size in [64, 80, 96, 112, 128, 256]: - if cols_per_block == 8 and head_size % 32 != 0: # wmma fragment is 8x32 - continue - if kq_acc_t == "float" and cols_per_block == 32 and head_size == 256: # register spilling, bad performance - continue - f.write(SOURCE_FATTN_WMMA_CASE.format(kq_acc_t=kq_acc_t, cols_per_block=cols_per_block, head_size=head_size)) + for head_size in [64, 80, 96, 112, 128, 256]: + f.write(SOURCE_FATTN_MMA_CASE.format(cols_per_block=cols_per_block, head_size=head_size)) for type in TYPES_MMQ: with open(f"mmq-instance-{get_short_name(type)}.cu", "w") as f: diff --git a/ggml/src/ggml-cuda/vendors/hip.h b/ggml/src/ggml-cuda/vendors/hip.h index 8594093f0..129478ed7 100644 --- a/ggml/src/ggml-cuda/vendors/hip.h +++ b/ggml/src/ggml-cuda/vendors/hip.h @@ -25,6 +25,7 @@ #define CU_MEM_LOCATION_TYPE_DEVICE hipMemLocationTypeDevice #define CU_MEM_ACCESS_FLAGS_PROT_READWRITE hipMemAccessFlagsProtReadWrite #define CU_CHECK(fn) {hipError_t err = fn; if(err != hipSuccess) { GGML_ABORT("HipVMM Failure: %s\n", hipGetErrorString(err)); }} +#define __shfl_sync(mask, var, laneMask, width) __shfl(var, laneMask, width) #define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width) #define cublasComputeType_t hipblasDatatype_t //deprecated, new hipblasComputeType_t not in 5.6 #define cublasCreate hipblasCreate diff --git a/ggml/src/ggml-hip/CMakeLists.txt b/ggml/src/ggml-hip/CMakeLists.txt index 7a877bdc1..eb03e10fa 100644 --- a/ggml/src/ggml-hip/CMakeLists.txt +++ b/ggml/src/ggml-hip/CMakeLists.txt @@ -50,7 +50,7 @@ file(GLOB GGML_HEADERS_ROCM "../ggml-cuda/*.cuh") list(APPEND GGML_HEADERS_ROCM "../../include/ggml-cuda.h") file(GLOB GGML_SOURCES_ROCM "../ggml-cuda/*.cu") -file(GLOB SRCS "../ggml-cuda/template-instances/fattn-wmma*.cu") +file(GLOB SRCS "../ggml-cuda/template-instances/fattn-mma*.cu") list(APPEND GGML_SOURCES_ROCM ${SRCS}) file(GLOB SRCS "../ggml-cuda/template-instances/mmq*.cu") list(APPEND GGML_SOURCES_ROCM ${SRCS}) diff --git a/ggml/src/ggml-musa/CMakeLists.txt b/ggml/src/ggml-musa/CMakeLists.txt index 415b2b2e0..2f555416e 100644 --- a/ggml/src/ggml-musa/CMakeLists.txt +++ b/ggml/src/ggml-musa/CMakeLists.txt @@ -29,7 +29,7 @@ if (MUSAToolkit_FOUND) list(APPEND GGML_HEADERS_MUSA "../../include/ggml-cuda.h") file(GLOB GGML_SOURCES_MUSA "../ggml-cuda/*.cu") - file(GLOB SRCS "../ggml-cuda/template-instances/fattn-wmma*.cu") + file(GLOB SRCS "../ggml-cuda/template-instances/fattn-mma*.cu") list(APPEND GGML_SOURCES_MUSA ${SRCS}) file(GLOB SRCS "../ggml-cuda/template-instances/mmq*.cu") list(APPEND GGML_SOURCES_MUSA ${SRCS}) From 90f9b88afb6447d3929843a2aa98c0f11074762d Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Sun, 2 Feb 2025 19:58:34 +0000 Subject: [PATCH 23/46] nit: more informative crash when grammar sampler fails (#11593) --- src/llama-grammar.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/llama-grammar.cpp b/src/llama-grammar.cpp index 6be5cbe0e..9b518d1ac 100644 --- a/src/llama-grammar.cpp +++ b/src/llama-grammar.cpp @@ -1213,5 +1213,7 @@ void llama_grammar_accept_str(struct llama_grammar & grammar, const std::string } grammar.partial_utf8 = decoded.second; - GGML_ASSERT(!grammar.stacks.empty()); + if (grammar.stacks.empty()) { + throw std::runtime_error("Unexpected empty grammar stack after accepting piece: " + piece); + } } From 4d0598e1445a64c99cf2faac72f8d5d023f1e6a1 Mon Sep 17 00:00:00 2001 From: uvos Date: Sun, 2 Feb 2025 22:08:05 +0100 Subject: [PATCH 24/46] HIP: add GGML_CUDA_CC_IS_* for amd familys as increasing cc archtectures for amd gpus are not supersets of eatch other (#11601) This fixes a bug where RDNA1 gpus other than gfx1010 where not handled correctly --- ggml/src/ggml-cuda/common.cuh | 7 +++++++ ggml/src/ggml-cuda/ggml-cuda.cu | 4 ++-- ggml/src/ggml-cuda/mmq.cu | 2 +- ggml/src/ggml-cuda/mmq.cuh | 2 +- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh index 88be8fc8a..232163c1c 100644 --- a/ggml/src/ggml-cuda/common.cuh +++ b/ggml/src/ggml-cuda/common.cuh @@ -61,6 +61,13 @@ #define GGML_CUDA_CC_RDNA2 (GGML_CUDA_CC_OFFSET_AMD + 0x1030) // RX 6000, minimum for dp4a #define GGML_CUDA_CC_RDNA3 (GGML_CUDA_CC_OFFSET_AMD + 0x1100) // RX 7000, minimum for WMMA +#define GGML_CUDA_CC_IS_RDNA(cc) (cc >= GGML_CUDA_CC_RDNA1) +#define GGML_CUDA_CC_IS_RDNA1(cc) (cc >= GGML_CUDA_CC_RDNA1 && cc < GGML_CUDA_CC_RDNA2) +#define GGML_CUDA_CC_IS_RDNA2(cc) (cc >= GGML_CUDA_CC_RDNA2 && cc < GGML_CUDA_CC_RDNA3) +#define GGML_CUDA_CC_IS_RDNA3(cc) (cc >= GGML_CUDA_CC_RDNA3) +#define GGML_CUDA_CC_IS_GCN(cc) (cc > GGML_CUDA_CC_OFFSET_AMD && cc < GGML_CUDA_CC_CDNA) +#define GGML_CUDA_CC_IS_CDNA(cc) (cc >= GGML_CUDA_CC_CDNA && cc < GGML_CUDA_CC_RDNA1) + #define GGML_CUDA_CC_QY1 210 #define GGML_CUDA_CC_QY2 220 diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index 383131c77..bda10aec1 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -1205,7 +1205,7 @@ static void ggml_cuda_op_mul_mat_cublas( CUBLAS_CHECK(cublasSetStream(ctx.cublas_handle(id), stream)); - if (compute_capability == GGML_CUDA_CC_CDNA) { + if (GGML_CUDA_CC_IS_CDNA(compute_capability)) { const float alpha = 1.0f; const float beta = 0.0f; CUBLAS_CHECK( @@ -1750,7 +1750,7 @@ static void ggml_cuda_mul_mat_batched_cublas(ggml_backend_cuda_context & ctx, co beta = &beta_f32; } - if (ggml_cuda_info().devices[ctx.device].cc == GGML_CUDA_CC_CDNA) { + if (GGML_CUDA_CC_IS_CDNA(ggml_cuda_info().devices[ctx.device].cc)) { cu_compute_type = CUBLAS_COMPUTE_32F; alpha = &alpha_f32; beta = &beta_f32; diff --git a/ggml/src/ggml-cuda/mmq.cu b/ggml/src/ggml-cuda/mmq.cu index 83cb78cbd..45212f66c 100644 --- a/ggml/src/ggml-cuda/mmq.cu +++ b/ggml/src/ggml-cuda/mmq.cu @@ -148,5 +148,5 @@ bool ggml_cuda_should_use_mmq(enum ggml_type type, int cc, int64_t ne11) { return cc < GGML_CUDA_CC_VOLTA || ne11 < MMQ_DP4A_MAX_BATCH_SIZE; } - return (cc < GGML_CUDA_CC_RDNA3 && cc != GGML_CUDA_CC_CDNA && cc != GGML_CUDA_CC_VEGA20) || ne11 < MMQ_DP4A_MAX_BATCH_SIZE; + return (!GGML_CUDA_CC_IS_RDNA3(cc) && !GGML_CUDA_CC_IS_CDNA(cc) && !GGML_CUDA_CC_IS_GCN(cc)) || ne11 < MMQ_DP4A_MAX_BATCH_SIZE; } diff --git a/ggml/src/ggml-cuda/mmq.cuh b/ggml/src/ggml-cuda/mmq.cuh index c05c84778..7a2c4d85b 100644 --- a/ggml/src/ggml-cuda/mmq.cuh +++ b/ggml/src/ggml-cuda/mmq.cuh @@ -120,7 +120,7 @@ static constexpr __device__ int get_mmq_x_max_device() { } static constexpr int get_mmq_y_host(const int cc) { - return cc >= GGML_CUDA_CC_OFFSET_AMD ? (cc == GGML_CUDA_CC_RDNA1 ? 64 : 128) : (cc >= GGML_CUDA_CC_VOLTA ? 128 : 64); + return cc >= GGML_CUDA_CC_OFFSET_AMD ? (GGML_CUDA_CC_IS_RDNA1(cc) ? 64 : 128) : (cc >= GGML_CUDA_CC_VOLTA ? 128 : 64); } static constexpr __device__ int get_mmq_y_device() { From 396856b40029dd6747d2fbdb179e828683418045 Mon Sep 17 00:00:00 2001 From: uvos Date: Sun, 2 Feb 2025 22:40:09 +0100 Subject: [PATCH 25/46] CUDA/HIP: add support for selectable warp size to mmv (#11519) CUDA/HIP: add support for selectable warp size to mmv --- ggml/src/ggml-cuda/common.cuh | 8 +++++++ ggml/src/ggml-cuda/mmv.cu | 38 ++++++++++++++++++++------------ ggml/src/ggml-cuda/vendors/hip.h | 2 ++ 3 files changed, 34 insertions(+), 14 deletions(-) diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh index 232163c1c..174916bc9 100644 --- a/ggml/src/ggml-cuda/common.cuh +++ b/ggml/src/ggml-cuda/common.cuh @@ -176,6 +176,14 @@ static constexpr bool new_mma_available(const int cc) { return cc < GGML_CUDA_CC_OFFSET_AMD && cc >= GGML_CUDA_CC_TURING; } +static constexpr __device__ int ggml_cuda_get_physical_warp_size() { +#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) + return __AMDGCN_WAVEFRONT_SIZE; +#else + return 32; +#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) +} + [[noreturn]] static __device__ void no_device_code( const char * file_name, const int line, const char * function_name, const int arch, const char * arch_list) { diff --git a/ggml/src/ggml-cuda/mmv.cu b/ggml/src/ggml-cuda/mmv.cu index ac45f2d17..5a9ddd958 100644 --- a/ggml/src/ggml-cuda/mmv.cu +++ b/ggml/src/ggml-cuda/mmv.cu @@ -5,9 +5,10 @@ template static __global__ void mul_mat_vec( const T * __restrict__ x, const float * __restrict__ y, float * __restrict__ dst, const int64_t ncols2, const int64_t stride_row, const int64_t channel_ratio, const int64_t stride_channel_x, const int64_t stride_channel_y, const int64_t stride_channel_dst) { - const int64_t row = blockIdx.x; - const int64_t channel = blockIdx.z; - const int tid = threadIdx.x; + const int64_t row = blockIdx.x; + const int64_t channel = blockIdx.z; + const int tid = threadIdx.x; + constexpr int warp_size = ggml_cuda_get_physical_warp_size(); x += (channel/channel_ratio)*stride_channel_x + row*stride_row; y += channel *stride_channel_y; @@ -18,8 +19,8 @@ static __global__ void mul_mat_vec( extern __shared__ char data_mmv[]; float * buf_iw = (float *) data_mmv; - if (block_size > WARP_SIZE) { - if (tid < WARP_SIZE) { + if (block_size > warp_size) { + if (tid < warp_size) { buf_iw[tid] = 0.0f; } __syncthreads(); @@ -67,16 +68,16 @@ static __global__ void mul_mat_vec( static_assert(std::is_same::value, "unsupported type"); } - sumf = warp_reduce_sum(sumf); + sumf = warp_reduce_sum(sumf); - if (block_size > WARP_SIZE) { - buf_iw[tid/WARP_SIZE] = sumf; + if (block_size > warp_size) { + buf_iw[tid/warp_size] = sumf; __syncthreads(); - if (tid >= WARP_SIZE) { + if (tid >= warp_size) { return; } sumf = buf_iw[tid]; - sumf = warp_reduce_sum(sumf); + sumf = warp_reduce_sum(sumf); } if (tid != 0) { @@ -96,10 +97,19 @@ static void launch_mul_mat_vec_cuda( GGML_ASSERT(stride_row % 2 == 0); GGML_ASSERT(nchannels_y % nchannels_x == 0); const int64_t channel_ratio = nchannels_y / nchannels_x; + int device; + int warp_size; - int64_t block_size_best = WARP_SIZE; - int64_t niter_best = (ncols + 2*WARP_SIZE - 1) / (2*WARP_SIZE); - for (int64_t block_size = 2*WARP_SIZE; block_size <= 256; block_size += WARP_SIZE) { + CUDA_CHECK(cudaGetDevice(&device)); + warp_size = ggml_cuda_info().devices[device].warp_size; + + int64_t block_size_best = warp_size; + int64_t niter_best = (ncols + 2*warp_size - 1) / (2*warp_size); + int64_t max_block_size = 256; + if(ggml_cuda_info().devices[device].cc > GGML_CUDA_CC_OFFSET_AMD && ggml_cuda_info().devices[device].cc < GGML_CUDA_CC_RDNA1) { + max_block_size = 128; + } + for (int64_t block_size = 2*warp_size; block_size <= max_block_size; block_size += warp_size) { const int64_t niter = (ncols + 2*block_size - 1) / (2*block_size); if (niter < niter_best) { niter_best = niter; @@ -107,7 +117,7 @@ static void launch_mul_mat_vec_cuda( } } - const int smem = WARP_SIZE*sizeof(float); + const int smem = warp_size*sizeof(float); const dim3 block_nums(nrows, 1, nchannels_y); const dim3 block_dims(block_size_best, 1, 1); switch (block_size_best) { diff --git a/ggml/src/ggml-cuda/vendors/hip.h b/ggml/src/ggml-cuda/vendors/hip.h index 129478ed7..81964611c 100644 --- a/ggml/src/ggml-cuda/vendors/hip.h +++ b/ggml/src/ggml-cuda/vendors/hip.h @@ -1,5 +1,6 @@ #pragma once +#define HIP_ENABLE_WARP_SYNC_BUILTINS 1 #include #include #include @@ -8,6 +9,7 @@ // for rocblas_initialize() #include "rocblas/rocblas.h" #endif // __HIP_PLATFORM_AMD__ + #define CUBLAS_COMPUTE_16F HIPBLAS_R_16F #define CUBLAS_COMPUTE_32F HIPBLAS_R_32F #define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F From 6eecde3cc8fda44da7794042e3668de4af3c32c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Sun, 2 Feb 2025 23:48:29 +0100 Subject: [PATCH 26/46] HIP: fix flash_attn_stream_k_fixup warning (#11604) --- ggml/src/ggml-cuda/fattn-common.cuh | 10 ++++++++++ ggml/src/ggml-cuda/softmax.cu | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-cuda/fattn-common.cuh b/ggml/src/ggml-cuda/fattn-common.cuh index cfd7c0f44..d40ee2da4 100644 --- a/ggml/src/ggml-cuda/fattn-common.cuh +++ b/ggml/src/ggml-cuda/fattn-common.cuh @@ -516,6 +516,12 @@ constexpr __device__ dequantize_1_f32_t get_dequantize_1_f32(ggml_type type_V) { nullptr; } +// The HIP compiler for some reason complains that it can't unroll a loop because of the jt*ncols + j >= ne01 conditional. +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wpass-failed" +#endif // __clang__ + template // D == head size #if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) __launch_bounds__(D, 1) @@ -614,6 +620,10 @@ static __global__ void flash_attn_stream_k_fixup( } } +#ifdef __clang__ +#pragma clang diagnostic pop +#endif // __clang__ + template // D == head size #if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) __launch_bounds__(D, 1) diff --git a/ggml/src/ggml-cuda/softmax.cu b/ggml/src/ggml-cuda/softmax.cu index da377200e..aac6e0999 100644 --- a/ggml/src/ggml-cuda/softmax.cu +++ b/ggml/src/ggml-cuda/softmax.cu @@ -18,7 +18,7 @@ __device__ float __forceinline__ t2f32(half val) { #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wpass-failed" -#endif +#endif // __clang__ template static __global__ void soft_max_f32( const float * x, const T * mask, float * dst, const int ncols_par, const int nrows_y, @@ -126,7 +126,7 @@ static __global__ void soft_max_f32( } #ifdef __clang__ #pragma clang diagnostic pop -#endif +#endif // __clang__ static __global__ void soft_max_back_f32( const float * grad, const float * dstf, float * dst, const int ncols, const float scale) { From d92cb67e37abc23b1c6f7b0ef27a9889da8537e3 Mon Sep 17 00:00:00 2001 From: mashdragon <122402293+mashdragon@users.noreply.github.com> Date: Mon, 3 Feb 2025 09:42:55 +0000 Subject: [PATCH 27/46] server : (webui) Fix Shift+Enter handling (#11609) * Fix Shift+Enter handling `exact` on the Enter handler means the message is not sent when Shift+Enter is pressed anyway * build index.html.gz --------- Co-authored-by: Xuan Son Nguyen --- examples/server/public/index.html.gz | Bin 1207150 -> 1207129 bytes examples/server/webui/index.html | 1 - 2 files changed, 1 deletion(-) diff --git a/examples/server/public/index.html.gz b/examples/server/public/index.html.gz index 582ccc0d3f8d07ce79dd9e978772f9c5ea85c426..3a2529aa2fb84485aa480d3d476e5ef27eab8e27 100644 GIT binary patch delta 8942 zcmV!{H12 z(h+_QKaXF)egsEW0U!zsLA0db#}RI6y1_zKW>!|_S8Eo}Z$Yk~HnVSGpXJZDr#87V z(Blg9_^FwHUE0m10q1fu*xm3pygeO|e~a7Qm%RNn`1=#n+b7N|tadR}jZ)0?f?Zp!lfBhwr3xtLY#n)~T|a~?AL*l@f53F`U$ zX6Az3Jl{aMUvoeHQp)#&eZI;0EO~vnd>=RGe}_%Im#{y^h;3S^|R#pxLoJ(y=S}}+vog_ z+s(NG^1oirZs9&c7wnenmjzq7(EeP`4CI?{&hNo5zlU>ISD&DNh;zT&08V_rncYJE zPb?qvZv{SoU-<2;VlNl=?8D~#e>v!>+@F2(?Pa+?+xl~Hk#8AZu#hyiMfVA~zl470 zg6%?XU@P_YY-U53{EqF*Z{SqGM+QBt-(yXgLDfCjU3>w0zF_gLzrsH7-(es3!iD|K z1h(qF+Z6k}vs>`{>$kIez>in6nL4}t1m9C`z&`0~xXeK8G0e z+1vR1Vguj(uAE;kuik)Wf3DtMy`O#mX+}3!A7<~~zVLoDlk3Zw{%+~1^YCT^Ulx9@ z|FCvG%H5=; zs{hmTtBW6>-&}lre)H_37DSNuhku-ZaelnJoiAU#b)Mf`ethvNf5~ZndHdn!`TWOV z?fv6jz|&qT*T1rOl=IQZo?UKEPmjP94e0K{^Rpyy?x*u0c5lj@b0;GYCCQWo4)t%z@n+>yg0sYN$ux>L zsgvYfa2S8Vp&#MA+Iey%MIimO_>j?bO-mX4h&87u3bf&T!OQ}0xF#$`5nlV0zs~4= zK&C!*0^b!ff21MLuy8^@xSwvVO30NhT5`R2qU=4Gnvj*!maHG{6S7HtO172(tsH@= z2dx6o)p57rP}Zj%g7=gwD$Fx*34{#u#ZJO4ik`=Tm)(c+c>k(&(sIHq4WA0~%%2Jk zK5t<%yM8a!!EM#*g%kM8)Jaw~l`K-{M&JPnE#;r3f6fhEi%kf1ZXRA7~CS zq0CJM)|s2OnW~%m2^9-2o<73Diz?Sj^Q2uAc2?M@6?$AapFS1+cpvf|M(;!5!oMS4jbK`08qb=>8!<=iy-R}N zxQVy7e8>jW5VI-1U7O5;!@be1JfB%*lHVJJm3otd!?>d z?ICp1r61vxNrbgZZ^W}?ZJOZi<$ocp(}P}w)7O%_cAXrT@3-XV@!bQG)xK^)IrKJprXd2EMB6`jE_Ww&9wfs>RR ze}d|VG;or(0Z^@BfWVpTM-fQ{I`y=4oA%Hcsnw`0{JR>hYD!w9?55R5 zlhZh+f;wf*%71JUih9MeS^SL2wi5uTe*~n&B+Aw)N#&GegoCFlv)|jnw*SSCmdy+g z6}TNjS?i;$RVmBb5ZZblZC#*k454lG(KZCygS2h-(KZFz_7K|RKHB37?LpeM`Y2mf z%EPp6_tCZm+V&uoO!|mVD#Uf!80b1mukR=o`i86;gQQpceNz?aPpm!V*ZN#ve`~nD zZ0p6(5FT0|4{bQSXnhc%wLpM6&VQ^SOpHDzA{OY6(vjK6L<9wM2<>qn?Xd`EkJ6IW z$HW?rJX#+*Xf5bq452;gqdj>P@@T!-pf$09Vb3EpulD<}T7#frZ1g^)(3?nMsfK2o zmTKrjcxZh*Gy#pwg3AyddLIvce>fWHeL$hN07ao8Z8C(3*~df#3awPq5GLb3CL&@K z6|u)dm{@&GEP=`6EXwXF%eP37(Yf`yU;{$qKPI?#*eWtwcpQG0klkW zSi2Z~Xks+cWUQG(h--bs1sJgg(boHD>jFd#BX0B&H-=-A(FYqw3v8&ve-u9Mqb}mb zFydApaqBTdBcl%3c* z`=Ddo0v$t0Ykj0e#27+a?;|Y&#Sqd)A88REhLASZZeqs}%72qS%9F>SlHCg&b{lXQhN2Cb0NcI5VK;#T@M+jQd(sCS zlO}Lz_Cz;T+f*k*cxZh*wBG|xlRn^>v;c=LFBh!_SK4H( z82kEQPDAhIB7ljmTc)ZhrZI?#(aS_Y6Wul~%~r;$HHeGZ%S8YbL)RyUp^PWP>N4)- zG8TtznmSe#Yu^pWoPUPZ%f+ezmpRdty)izgVfS*eYtW^uin;d?G^a7?>`(Z{N#k9FM`#%0{g zMFbZ`ooEwnteAtiSiM}V8ieVpsqVdHto9%JY@YgLV=oIVU=Wi@FOx|P!AxDZ#+o|J zDV2V&RB9MDR#eR%4|7X(5AW|fs_l`$5A#wLm=L3=AicAM)=@(1ETMOl&^t>Q9VLv` z65%?ac?#D74}U%wQn5M-V;7076wx{eSRshRd)RTJUsknxBk*vWZM;H)^O(u*2!}F{ zoP|yh;5g~63!d?5Op?|iqV?u0DPQ^)6=iZHjj3m3m zX^H}FoX4GEcOQKl-w?PdKca7cKNXIgFpyjKh9_x-w-hk za$yMFfk^l-B#n7WoH(ve@26^BsL*Ti5!B{7$#lM^w3_y7>_=1POGPUCz^ra+6loAY z!wHR|Bvv-udCO2=pl; zMt1HhA5}SZyb2qNS>2LoRaNL z1lubJR=x+JtbXwDD8?%%SU71KZ&=V$9xv8ihksfFn2M~m(PH-e&`%`~XKST4PaT&$ zN-E8i)jA#S&76{E{&heN>y#c>H&LcL%=MHw-a~os0}}QrU9Te#U)DzoVgY8=6^{^v z+Qw?KsJW*#mOBt`9^ch?2?O|QeC&eVpaG$-c|@SB7$nT20_X7^=Dqf`jIP__wd+JK z34iLfs&>A44v5+m!V@7=J&vwgE{oT6M{J<@Chzr?s{Wj4859IK16eU9m3px^G*0U2 zE%g-zl@(n)=LM{4DyU~6zePA&hYi<1l#m;*q2&%)&a2;|SMd>&j-=x}9s7}+k`Qul zb8M|74DcSb!!X!Mxp-gc>r%W7*5sk-IDf7OH>%Lw0tZHox*iUkIT!>$!S2E}=X0IF zJv5cSqHmckA02n~(vqZM(yvnNkZpyb&TgibvF7Wv+(=o**YINfO&6gN%i@=(qAU339a}`ubh<}oMY@p;0 zS7qJREbIb`d^myNjwyd}kLQGLp#I?;8z`hTVhEkAw31rP=1+l#!T|DJ#1Y&!<$%v* zC!>{CHb|ve1g#(#mB&$~je0el^PbZP&1r;}t2m>q;n?7%CM-fHFqpmzgD5+dR+J{w z(P*>T$Qxab)8$B06lIj%E`KG&kp5Is#}eWrW&R9Pj8myP57|oKqq(0CZ01hN99BZN zKG~edcc+qq6r_%kI&O%B&T(_gKhbDK`8Iu#N=|3V(YQR8oO!k^Or{ z6j`$QVj@??Merz%8chM3uC;UB_#w&jZw13)W&;yV6fcllwXvGrFiW52&GR5IIuPxp zF!qc(DLt4XxbN+0qX2eX=q?VXC?r|tEJ-E@BwErHIz2r_O0~dO3H{9teD~&5ipZww z2({+_mfaOstwpo$wSOAjV&(@>2KkYw0qGI-Ma*ycjYE6X9r($<$Lb8$?MF+f-%GEf z{u!Y^Y_lEa#g8Q9`RSwCJAlZlqAi67XE?Ey|v3Q#^!syq2{tg7Jni0!amj z+Oy*bDIP;_Ap%`KgdovwsQ;sY{?CSafS3$L;A#8;lgMi}H;5_8vMhBCWe_^a-%w``SAc@~ zNGOfLIQSdE)%5lJ2DN6WUM#DAY`_;i&Lg1wk#xLDlYbC6T` z0@(co>|S?N_*0A{unvFA6iCyN5-QmIBl9(fA*}-uNaT&t-ohRU2qjW!3@9Ygx~;9( zf!NM9YbpVol`jVLs-mDYhD9z97F^I5{ImKC+F(;d3D!`ya=duc*7$*6cDq15h#il0 zx+4nRR;*?*X5NPNTo!|ctq2_=#3XzZO)Kq{_OXyI3|g3#+m%Qwy-HO*Aa4V;8IP*ub(q(XbAutK^BIVNEs-1i#ckR?P`yu9K2Y4KZ>722Q}y zWq(qXk|8D;QVD00;Zp(-JD46Ixx4g$gCiJ~HqD;J0T4szkmdi_vYzd`{nNP{ZU zVPjoH}1}!mJFxL{32Dj)5s>jvN6u?@>WsOxzhI2jf z=Hu$bEtF?3bFwWs@ewoZE}S%eKn~8oiU@y ztj1T*rbJ-OkBQzo0sZ>Ou2B($}kg&i^nvka^q@J+tzbjI)9n% z;zBOWg>VWKf!0yN%?P{qj499L=$C-s#X4 z2?=7?!pA{CQsigIjT4`EXpzPtf`3jXn~p}F<7fA4Uv}egy~;c{!w@+bGci4zb?iMDqST&?{g+h$dIyX ztf1YRuq_(EJ;g-%suZLA_yd7+we3x9*=I-#SZ z0!1}h)l@9o)?`*D!arSB3?l@OQIRF{>7+JDv+h4AY=D1&JjMEh$ zeho-!-eTj$MAl3@z=o`-CNjP-W!pN6mAIU=Yhi!Rvt^7$Hq{=nK+R|S}0^(BQ zF;`~`fCo^E1N60vQe_eFeYNl*FL>198VJGbb|rMo%ktF+zeA(NswoFeKjP?_b-5uj+~I>yl`PU((ZPv!!i0p8nU|o#5i>3ymf*#=>yA@ZMr&z2tGZICswy#uyh)LkMWEBUn zI!j}et^5TAr>cyocV{sS@J*A!v#Y4$#-H`|eDH*%Cag z;3HKQ|JF&pPI)doU(e?Oc~UnxHaUN1n;i>(SvRj~m@OH*#)}(r&tAyB*Lde=cv;Tq zYxDbhUgG%_uQQV3Fk50L@^DJ>!myU>`SfE(((7zFi@bN`n+BO}AvI-ihFAj3g`HR4>K6*=1KXlT2zAsYg zKDX5nM9)=atKDIwT0ZTQrm_pF9Ju0Dk86RVU;^r zR=1^?YZOb%C!>|#?qVzaGdn&vQEpdQ)lw23{jz%ikkrpv!LGBw4b4dqZz?;?e$=tU zd@@P|+Qn)#=Jm~1aY}!=@fN6z8M+=giT@qBKak2~k1U*ZKnqa31{b&&vFJDHB zSlYT}4x^kT;99p7EgQb&38FMC#=xKsP3m5eU=P3mo8aXhWNS*7Fy1{P!cAALrLYV$HISKen;-X2!1|**Wiik zlpH)o2Q*GT-xLoCJcNFfKR9=vX?#PX?BJKi7WtlLQZoMo?d0c@=r03oWmM>;akAjU z>&w?u)`5gEbC@s*V2s0#coHvfw(KcN0^($FbdimC*sKNl5si`OFBSw=caT(ktCoZ^ z{!K`YgGIpdef}UVN$53Qk(9H9zlHnOM_&XY8M5;1M$Q8j5B%hqc&2Zc&0`WJe_#7( z-}HXZ31{fHHCdCkth{hz zBys}klkA_@Gn*p*Azs;IgDFXfe?vb9PDVdRU`=^K!lYsLmB4ZG$R*XZMM{2KlgPcV zCVJ`pX9#2UlfWlwkpk)}o}D0wH^lqQN2o<=PNERv|66dI1-~Os7Dq5bX0rYB*>7FI zKg4?XoWM56h?Nk6j(CA>LZdfmT?A-bofHnK^5gDTcZm~uh%YjrB#m8?e`RQXk8(3) zlb!8?MKKL-o?PwRpZLF{6msVVYmel5c^$nYzwNIQRy5ZMEtRra!9mb`35x+rMqveD z_U0uzd`H|k4B_jrJ*KitLX!7{+~9ZW(ca2>jHAuGTZqr|>NC-Q7C)=W_iycH`ND;t zU6R|bgHUwkmxrL@D7a@6fAb7F9FN>GczBA|89y$~zq=ec$RlAa9Ajr|*kBUHN>Tac z=&7xo7%l0>3ibFk+-i~ zzF)y9Tviq(Y%&0)hXB!_S$k<67wZi6B;YE9kWd&pH&-q@r}~J8e=JI8IhDV^fBmZP z)Wc_S%4dGir9VX}S@0#rlXBKye$l3cRw-HVQvh&0V(HV>dd^NqESDr*I#GN(Do-Mh z=0Q9kg-%9EIwE&$$e)celHQVZ^zLeQd3_~^o`fjea6wO{&-1{EZX}dKOGPm(PmvTw zF_S2TEgfDKd9k~Oe`wR(+?=zct}W@o>lecE#TA!%iXPB6zpZ0>9QtXTQHSzRQ=~pE zUqqjiB2|%92;!(4oy&CJwUp7ZoUtDow=QqjPJDMQar5yQ&n&eqb+$nRi zBdjCJyX;-7r)?xzFxxEhF8$1PQcom)0WbHXSX~2dQh)E2r0lmETRcKJrtHef zEMG|}W(^R~LO57ES&77amm+V7?G&T3Gn#K?NR=SadQ8#*>{`@9jShG;OCEp)TX%5A z2mJ*O8Ni0iftZm*UNzfkEn98Fdb;j-$gXFEiTmLL~s1MLL-=HiBn3nT)lihfG zoT40CR|nIzaW(`bOK1u`?5R{I=4T&Peslx+09LKjy5<7qu7SAJHL~J0?G~WpIO4Ww z>qcO`e>Jm&eYJU|L31x)QT?YQ$C!Hw`(OSe zx`>neRxwK}ulmBy3EljDyBo$2;u9omEGgcdwp6vRH?R((BB(Y)?ptWa+d%4uV}le z26s@1$h4~u>xuW>r&Hq>V&q#_m7OXO>)=osg*LX}4@f*REL~M6 z`goL6W@joHIlPTksK^ z{R;_wdU&u-Jcp9$HpjtJ^e^nFq0H9-WkzFruD!A-Civ*dh~L2lmvX`V3}8?G4|4D; IBq|0Y08m?OdH?_b delta 8964 zcmV+fBm3Oh)=BQxNq~d_gaU*Egam{Iga(8Mgb0KQgbIWUgbaiYgbsucgb;)gv=V(t ze>FL7YiYjMufS#Wb!LtW6jzZ_E_0PjE!WN}*Jj_c$5BC%kVGdplTa$Nvg>W$Iy&ZI z=5ZqCZ}ji!KQQwXCJsPS1Q$_E+ugS#I_=7&K;WE%gL6Ir2aC>cL9QP+v#(*D<&U?g zHn}p;;|lcnv6+8e+Rdc_`*Jc^-S9TNe?1+Li`(7jy!|xz`yxG-^XUX$%xz6Ex&v-eu z&iM_un{x%^f4!XD!hM7;*e%yDJ8b1b`*S%nkZ-;@zX!kk7WQ3TeT4oY&i!r!IPvXf zb_@AGvV6?H75MyZ;kUDjyF9Z6$LbRU8H zOXznl*ec`(mQr8OW;S%mZ`r#126hE}V9>+*J=T;NRNaHs#TTIG3wGZ17gz`WE3D&Q zxUjyNz*5~en_``Jb_;%g{bqI#`0;8sQ)ibS;d{yrSSNiA_nBO7X5YP;e|-(-Md6A_ zxM%zgoS(h=0PXOCZ4!8M{>|(v1O2_4T^XQ@^77eR?HQf_aFyq~Heb!Y|KL3TUitXu z$Lo(jsQ!=7uP%Ogesl5R`OUKrS`b0r@BeoG+4rw(qnwXM_Uv+VdU}KwaT+@GP@?IY(EEf)M_=R{hmyO8MI6y7 zQe{mgA^OI7PyY7l+VP{O@BNTu=oQ(Zw{hr1PcxGGi{rqL2wo9?xuR25){ZH;qu7r; z649x`Kcyt{Na{z+f2p&kan1()mj@hXc;zow*o`BaIvK@|_scq?(?w(fu9p!wd%5hP>|xyzD-l$NN{Mla>=^Y4}u- zXZ}=Z@OcZ9+4Vc24sNSfFPy+%rcSb|sbrBlHv$hxe`qQHEOl<^T5Lk7d!u}j*iECe z{y=kx31x07u+H4H%~ajgPpDXM@$?ZEUR1eOnkVg|u(QHGt4;;_wkxe7ycdbY6Q~~(|Fc2-iSGB z?_ClEf5%O{y(KC2T_?a`)6sMuQyPaNp|dP;A`!)l4nt{DY$+=eXocWPW;x-IUGIoE z-Ya#zY7e24F8v6nOd_mRdLy1CYtsa8FaI-PogVZeoW7Rawd>@#e7_|>kMAB>9_&*R zW>e^RpmOylT?FLr7=B^Thn^Vp<;KA}-1ZJDf1s83r;74tA$@DXD?fsKdyuc1Ls2Yr z?($kH4Q+)+tKoiJ`WcZC)|DPK^S7YuYSseJ@ zahpjTw<*DMvK3@0H6ebgvn*eL4MD>8NqnEKgYAbk4B~k1@{!j7%ws!5s^|=cDZ35Z ze+`_Zm^@Nu(Z8qk6d#W)u2Fsp8$S`9LpZY%j6?Yxy1yd?V* z%YIGinViNk71SwfR{mp?P}D1y&EjWFf3}?fKqVk0CQ-IdNh+r#BOE+cnf=}lw*Ak3 zv}|U0sKD(I%32>~tx8$ehS1jgXzK!PV+d`dkG3Ju9;9uvkG3h$wujIj_t73#Xb;l1 z)koQ?QXZylyN|Xl(6$GuWYR}`QX#I(#z5CmdVNQ!&^Kh&7$m*g@0+SXe`4(^f4|n} z`dY*FWm_+PhVan(cxc1nMeBnAtpx(qasFcsVPf<#5wSpjl#a|kCL$=9LuiltXpcoO zdz6-}J|@<1q#f6)7Q=)=)S?*j_G1t5kBNvP#`rPn+Jz>16HPREGJcGGsr`Pg z3ZP}0!`j8@LldKkCS%PULR{-3F2IO2h_>EGTNfZ|7;&SIxG@}?j6T>fe_CKe9j5Sc zA9WEgh7q^=h+B^t8X0}?U^L)i@41-KhYCguDyZW@{-g9dkcbB6WSG{~eh(FKK;6?G z%|8D(oBlsEtYP-SfZ2qBp~GIY4-U*G99Wgm&Hg-$J|2buImORp2oJN5hdCU1%sy-| zTd+Ybe}*uz`j}Y1X^v+0e}RSB0v3a8b<#(BBG8^_gPcw2^)?YKbj8Sj2C?aPYE@uk zjfb%r_d&mmJQ+fI+(&vm9C5~d zU@&e018oRxyN|Yr0mCeN(nnkb0ZSXymb*}3+=c>*HOw#j9aD6F17qKCm+4UwHh-3A zwAF_WRudhDP`3Lh+mFE=s}CBi7HHTPcdTAyu$sugkmnJaSNlC!tpU)m2DbZf!fxV( zZde-s8Nx&Ba zh7CCFKE$vah%p$I>^|(Uo7gdg@_(d{^5ik7WcLDx-3AS^- zp7a67qzN3FJ<(0oHr2@x9$Fs{?YDr_qz^bIEx@6e%0x3K+IR>LqmPFG9L89i!2kLX z7G@s{0XIxdv4^347uMhtyo1DCzGY$(0ksgxEVF;vA?6n)?2pqz%*%SEff zl{OhG#=bt7)6jdl2w#vz4)G4dP<Wc_ z2&S5rY3_|J{Tn!&J2+*{R&`S`Y;719qnC>aE}Ci@x}n&{ATDMv7k>d<^s#E`V_i3f zaT)h=5y3@KC)z|CE9M|BRxcN;24T8ts(UXPtNoiko4Y>Q*vkS77{p}K%VbhRFjLp9 zv8E1lN~PZ`l^TYP6;-px!`xEc!~46oYI`K`!@N`lCd5-zkltBB>nNdhme4y&=$$2u zjuJ*|iEtgzJca9kAAdd=Qn5M-V;7076wx{eSRpuv_poC}zpQHYM&QS7w&xWRoS&KO zKH*U2k+aYV0vsp3mE0ByoODULi)@?EaY3}R;$T))LRQnPe6{2B5GPXn#q-#^-%+ua z+|wwgheg6-9Rx=Y^&>UxV25+xaWFY#r^Jw4o&w8A;-n6ZJAWE#dv;7X8~@I~hprxSH2~}4 zogXfXgFeN{(TE(wDZQLg`I*z@AGc@Gl#1o2QOT9wn7#%4(es_hwE>GygoGhILAhtD7j(9p-vU9PgpL_W=p(l&;s2hlll% zg4hAGYQ!T1p|)o=*{QjwHI_RNZXVy&cnJge>iO6OyFmkvy5=VWWyK(2ekyPt-(lWs zPs`JFTYtQEoya9Yy;jxEH_rj5HihGfkg0x-u39dO*L26(K=Do9>nm0LIngpG2yh0n zVoWOaVsB`i)YDt)D+($ry135^Sk+We&q98UaI_8^u74;YH(o=_9kQHPzeca(BP1P3 z$9X#TBR3@>ZiIB@1* z5C8?c3)h^_bprR$RQ`;NMmzgI^j(J0k&w@W*rv&uCxHKJYZv4g^yOr7OJClJ;M$N~8^iaW{Vkk89jF8e8q zFMptXp^)5-I8Xpg)Ek-&Zwn)4ld+0xpwgC4Mhr2|o}7$U#PQCaJUM}qXHO71@%&rl zd#93!AH zW&y;m7f9m`@=|9*QWPww{0YYii6mYMp??!UO4gJKMC0W$AeiTp5M04@D!D7--pu2> zQq-IX&nzMKo_J_<_N1T)@@KwB=1y9QCkmL;fJ|tGGdCq90s+hC&)8HTfeoeb*eJzC z=!7{KTo%=k*hzhd``@YL2609TG3vtS^9kwfWW=&psJE)ojRQ1aVz?~=Yl?!{kbjQN zRZt-zO75|Nk~>_Lbyu^n3n=p81cE!J{KY+<6S{%=hjVP8kk*JHbh6S)YB8HX1s)0m z$afJ(aNCpvK9ik{R$AF0m1YsNf?!l0N0m0})o{*xP9rp@5nit1jIxGfgO{4H2%W%S z`YsHj>{MD&noLKd&1NHSbU99!BY#a%lu>rOln_JuQ%N05h>w){GfXi~rRqFnD}j&Z zem=07J1KKm3Elc+a~|KFN(xetI!5ZaAre9-FuF(OgyvKtw{U=n1+GQ*Zy8Z!$>xiRToo6=qcmzX1!%g~&UNF5B+tJU42PKwOf*rvKyKB>YIegc zeVRAVgTUxOw3ouzGwP)DV2a?rx2KH)*m0q|IGCc4WSO%hnH-R4NmuCf^b{%80$(Kb z7dPA}g>~Ar-hls5!N$LJ$$vVCHz2$O@D9IYcF7!0y9Ke)* zbdtZJ&K#})1@n#%OP0j|7Ag zsWb)@l4#x5*6ToQ=bAN@fX&Jm1A0|aP#Qy!%Y%Rm`iy^4e?}W@YAC@P%2tjSZ`v9^ z@XKx&s0Xp*v42ii=%#yTDpnih#zj>%jLY_k%O#S3kdPgX@L?(5)+t#NlW0O@3gR_Ody!4$QUZPVr z{2AnMY;Kt|?{@k>-ZPg3W|f0o(ME=BnTDgwnlVQFH-GN{6f~BNiH5{C>_5!jT$@l5 z*^b8EX=Paz8LXsd578px_Sfy{MM zvZ*0PE`Pwl2{^h;ic&JfBtr^fVg*I68X;6Aq>h1=%fvw-m^@Kb1ajr#u-`#0yjiaw zD(hD$zy)bgMVkDP>j`q<7Cmt~m=V$@a0uLG!q^j?3xh#ROcup3o+%yw}h7iMzhPh4quYh&%?G%jM%Qezi`Xdoic01=sZ1{X3Y>`a}t>1|?| z$d)I1#fV(ih`4N7oa>HDxHZZadSI8xkaf#qu8DKs1kt#RAljHWC>Js)&8^)=^2&bx zjYE#+SUB%==!%2{v1{SuARsC7GvvmJPk%hLNaGMeCzDM_BhT@(`?W8-aX510Fo`1) z(QL$u<8AR#GwL>|1AY(mTDo5$$l5Z7-hRkwwrC4z>_1#??OFcd-ZyaqLMhSq?t!bF z9jtxMgb5i^R*eYStqIGb0bHJCVMA7RRmcIcHzm+zQ#S$g^|3geV-Y%4k+rdkRDb1# zYRWAPnsGu$M+J&%vZ|?AwynvmOoV^BtQbZJ9-|^l)-@YjGT^eUP1tYy+cC0a)wVxh z8_jXQ(io>JK>Qkz)V#&Ui;1k6c7P38QB7ogX3Dx%HQAV$pF;)lr|(t$bH|XP&C^=P zn}f#d#ZX<}XQ<95yDMxfs90Oa_g}ar@>PzzI}C=~d+R$?kB?*_`@@#Jx*~s42dOfK zSPT7SBp@y&9&>dT06c(N9H1{uXYaj%#+m+BUFUzA3euqYjs3`|c zKjP?_b-5;6g6a^Bub1Cn5*B}58nU|n#5i>3ymf*#>C2QatwDR%En#QDPBUPojwCeN zzd4(8$M)G{BmCPMd=;af5K%3I`_14dEJ+q`^AKR`!W@fo?Q->Mr&z?A8Hu7*+apyV zViGtmS;YZFXK9SGmA{~%{6cIe>y+oh^YwflkSBG6W0P~X*s<`Jb@Q5r*^;qqytpCv?1k)m zjdy;Am*tGUHovduC7w_5IwL6#vn6&S52qwA46$6#rynwsUT4c$XSF=*C!O8y54I8*&f5tjsDVlRL+y za+1Po3x$3s@~%18oy8cM)(aO5(buQw;J-x&B89F||7^ zS7?|#yvTj;PXd3x`uHzN{m@D8`N~SA1Kuh^5IwJ>&?iV|&?B@BxVD0jAR~3w5n0pJ z3F@qAoD}lHGWa_hCr#R&Yof*TlkM_o>^W?y5P??$7;;yxZa`u|-J+v~{FMdr1D3${ z9>LhQ77G3%wJo{wBCz?WBJxTX;^e`yy0FEZs|Ze?j8=boyOXZ)PizC;M7dpIRZB^D z^z-f^L{i_W1?$!VH#8?fys4~F`%%Xl^~op^XczHp%#%?KEmW<0iNNHG%qCp*+)E(1c z1*Nlh_ZfdBA=P3mo8aXhWNS*7Fu@A zP!cAALrG<~$iiQKL+-%{ema2H;EC&$96Ut_G)_L<6b}hJgnpDiICr0Dd_$t_;OE97 z`<`Y}GXEXzeD9gKD{z;$V-h8QkDjzIo4@0PGxY14tV!Eq>JEF$=eQybsfJd$=-Kaz zR|V1it*}|+mb&&huy-FhkmXbAL`&wwWeU0DAztRs_X#Y~b638j?x!S;=g!;@e0pCb z!IFLkSk-Xw|CmnR?da`M0q$1a-uWGgoPhcy`}_6Grij0bSN7OoN)qCK&`*Jr(N7Ur zQ=X78X_$Q_upvEiNi}Vel3&*(a__5&UV8rt4#N6L;FGjS0d*D6P7uT!;(g*H)FL$} zQ3&y$7TjjRZ;6w|5sZ+TY=3|DTNm(mvEDtWz|ApY;`Becagg>9~U z!~E4<;zSqm zf0eMJxlU-Ql+6mZlIB4$1}GVY7{ctkm+0^-Vt(x->OG@E9)_i zHuG-bgr8TRiT=I#SxvrwYd6aS7k+k0Zo9Tp(Uo6rql%;8o=wbuGw5(Ua?9Z1DOzWI z!!`f*bL1e8gt4#@o<+gIB#Ky5`St4Ud$V@z5%b7=y?n_yU6L?n8{UqjvqP4Cg!r4L z1fBZ_|9;$(VHKO&eMi!F#Em0wU$=a>f_=QKEK1m907?%5qCvCv(mF2I8LVo+WeOpo zFm!IlK0BxSh#z`?l+JQ0e|`7*RpF_J&*GHN{GdyJic+%RLCKSH)?a>Ar-W81S@3-X zu)$*K)75&;_E#*IBwadDd^;-lGLPm#JRgNlMoBs%cWlU?jWUwnl63U;YIb>jC5N7b zDBO`jPo+=uz=>`oltN2IF@&c`ilUfFl)|DAFN?g`UBh{Q)!f{iv!kv>?ZNR2A$)Pg zWuBr3^wqEHm>!3I8fVm@{L>VvPsl zSCE{K$9QI`E!5ct4eYdKHE0MUnT)WGDDSd&MNivEvg2^G$h-72*GWB*_yxS&k76|j z+@$`_xg{chX-L^G8e2R%`xeyWxP#?%gD2yT#U8N+odM8!I@-e>_{NElVhx39b!noer$S2? z?6!tI{SC;96tA--3>4n2=b=yAe5*OZt2Q94tTaZN&}Adowhtx%r!bgJ6pO(f3%{)U z&QG|1WXtl<+-YGBWha@_&Hu7f9_5G7WAoSgAv+L?kF84X;On_Nv@JpBYet=?|B6BK zR^Y9FmTTMx=&P?#mIO@8dArGOyxovdj;*VM>Do9Ojx?z0#n$7qEK)PNnj$Dt@;B3VqKD z$Z=b%Z{6Umknv3#pU&*>+ie zV9koQ8C1EV(D3^0WeN>y{W}5c9dT`>TRd*36TI6cj=O5~x#DQ?fLS>^quMdpLF+{42 zB=$Kj_pkr^pa1W_{10^H`SeNqMqaMo9h=y8RcSjg*sET(@t?3A?=Z}kNBdOi|NWo; z_21EU(_dG${xGTCIJxIz%)NyDpZ|m|;^e+n%+kuMzOZvbH^1MmhVhL!Z4|{AA0Bd+|M-t+yQl`YScu59s}JirF}zQw#;?@Km#!)+RUp>EVHAtn zb8a0!04qBZHUoFX2=Z2VK~jP?5QR=RPU;m?JAOndaoBzd8Y6e*2N0Hj&W|TOMHwN} z@(5E#9rvb)g0nyy3-|*Pj|@v!)rmeH<&@d3OhyiGPdioHwi+!3c&=`O-R$1Gb0{nW z)-?uqYaW0%=6mUxIN!swg#N;~o=X2fLZ2QUtP{_nWV+39@D%+6+ixiIIH1gEY diff --git a/examples/server/webui/index.html b/examples/server/webui/index.html index d3893ea4e..882570c81 100644 --- a/examples/server/webui/index.html +++ b/examples/server/webui/index.html @@ -154,7 +154,6 @@ placeholder="Type a message (Shift+Enter to add a new line)" v-model="inputMsg" @keydown.enter.exact.prevent="sendMessage" - @keydown.enter.shift.exact.prevent="inputMsg += '\n'" :disabled="isGenerating" id="msg-input" dir="auto" From 21c84b5d2dc04050714567501bf78762bfa17846 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Mon, 3 Feb 2025 13:25:56 +0100 Subject: [PATCH 28/46] CUDA: fix Volta FlashAttention logic (#11615) --- ggml/src/ggml-cuda/fattn-wmma-f16.cu | 2 +- ggml/src/ggml-cuda/fattn.cu | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-cuda/fattn-wmma-f16.cu b/ggml/src/ggml-cuda/fattn-wmma-f16.cu index 1054ff95d..45702ad65 100644 --- a/ggml/src/ggml-cuda/fattn-wmma-f16.cu +++ b/ggml/src/ggml-cuda/fattn-wmma-f16.cu @@ -561,7 +561,7 @@ void ggml_cuda_flash_attn_ext_wmma_f16(ggml_backend_cuda_context & ctx, ggml_ten ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, float>(ctx, dst); break; // case 256: - // ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, float>(ctx, dst); + // ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, float>(ctx, dst); // break; default: GGML_ABORT("fatal error"); diff --git a/ggml/src/ggml-cuda/fattn.cu b/ggml/src/ggml-cuda/fattn.cu index b1e66d470..b0cf152f5 100644 --- a/ggml/src/ggml-cuda/fattn.cu +++ b/ggml/src/ggml-cuda/fattn.cu @@ -235,7 +235,7 @@ void ggml_cuda_flash_attn_ext(ggml_backend_cuda_context & ctx, ggml_tensor * dst return; } - if (!new_mma_available(cc)) { + if (!fp16_mma_available(cc)) { if (prec == GGML_PREC_DEFAULT) { if (Q->ne[1] <= 8) { ggml_cuda_flash_attn_ext_vec_f16(ctx, dst); @@ -265,6 +265,7 @@ void ggml_cuda_flash_attn_ext(ggml_backend_cuda_context & ctx, ggml_tensor * dst // The MMA implementation needs Turing or newer, use the old WMMA code for Volta: if (cc == GGML_CUDA_CC_VOLTA) { ggml_cuda_flash_attn_ext_wmma_f16(ctx, dst); + return; } ggml_cuda_flash_attn_ext_mma_f16(ctx, dst); From 8ec05832fa8409c49b3bbd13f957c6ae8486e618 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 3 Feb 2025 14:57:08 +0200 Subject: [PATCH 29/46] sync : ggml --- scripts/sync-ggml.last | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/sync-ggml.last b/scripts/sync-ggml.last index ddb9d817e..34f1cbf69 100644 --- a/scripts/sync-ggml.last +++ b/scripts/sync-ggml.last @@ -1 +1 @@ -32f0b85987396945afea2291d5f4c5862434292b +498e0ecd2c4f9379439fd413805af10e8e9ff349 From 5598f475be3e31430fbe17ebb85654ec90dc201e Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Mon, 3 Feb 2025 16:45:38 +0100 Subject: [PATCH 30/46] server : remove CPPHTTPLIB_NO_EXCEPTIONS define (#11622) This commit removes the CPPHTTPLIB_NO_EXCEPTIONS define from the server code. The motivation for this is that when using a debug build the server would crash when an exception was throws and terminate the server process, as it was unhandled. When CPPHTTPLIB_NO_EXCEPTIONS is set cpp_httplib will not call the exception handler, which would normally return a 500 error to the client. This caused tests to fail when using a debug build. Fixes: https://github.com/ggerganov/llama.cpp/issues/11613 --- examples/server/utils.hpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index fefdce55b..5f97df5fd 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -5,10 +5,6 @@ #include "llama.h" #include "common/base64.hpp" -#ifndef NDEBUG -// crash the server in debug mode, otherwise send an http 500 error -#define CPPHTTPLIB_NO_EXCEPTIONS 1 -#endif // increase max payload length to allow use of larger context size #define CPPHTTPLIB_FORM_URL_ENCODED_PAYLOAD_MAX_LENGTH 1048576 #include "httplib.h" From 1d1e6a90bcf485ad2dee309c31cf19bd802465e5 Mon Sep 17 00:00:00 2001 From: Woof Dog <197125663+woof-dog@users.noreply.github.com> Date: Mon, 3 Feb 2025 22:16:27 +0000 Subject: [PATCH 31/46] server : (webui) allow typing and submitting during llm response (#11626) --- examples/server/public/index.html.gz | Bin 1207129 -> 1207175 bytes examples/server/webui/index.html | 1 - examples/server/webui/src/main.js | 8 ++++++++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/examples/server/public/index.html.gz b/examples/server/public/index.html.gz index 3a2529aa2fb84485aa480d3d476e5ef27eab8e27..df3cb1bef96b805742b458d0abf6c9b16826fae8 100644 GIT binary patch delta 868279 zcmV(^K-It5)=7ugNq~d_gaWh!ZJU4OIm?T19nO083TqL$aVB(o2U~3qdfxoXybHd{ z(s`1bZZUjPV^WHc;NFJfZiClBTA=!+2QDo&-WIOsHtj{$Y2Gc%sjNQ`8pFohaGkdW zm59YJnsL;$T+i+5+M?DM&*AOnbR9P5^D*%=+K3yfX*n^*R+qyE*xC-3IAebsxFa)k zW{ZcTG%LI^Zf3HB9h+y3D;Fq+mffC}XJo@y*KRD%Oro~Fs9`u^(;)rlr56$Sh3uUj zfT_=x6^xZv{FmkS{HEu_>xyZ~!rXF>)_N=+UzpcULe#LGDNy;dVdhYvy2ig{SWuv5 z!4B?l)iw)vDdSwbW?@}7R-ny8=mmdP zWN3>94;~a~hrt$rX@Q)!%wL3`)|mgYsyDb^ufufskIU|N53j%E>&Jg@$>Z@wTs-!< z8-dgQF1&Ue)1dR!moHv+!V7DU2o9TGZq5DJIQ1}Et`Klsnkd3!*MCSJve{#Xy@K- zf3kdKcwNeu2j`WjIjjWBo5FcslP|l*)l*oP8aJxu`MS4m9YL*;eCPqwx}xuw3FPOM z8pT>nb?tTC8G@IxKQQwJiG`){E>~*rXC3awmK1C&?Tr^?VUBVu4ufx3;ob|e?5%Fj7_EFrBs}*}#k8OEk5ia1UOeTLe zD4F~y?xM{tl+yH_;`1Lp{18l~OO?i+sbs?^*=8yYz$xy+YAx13+X9SdX>JY|lNa$@ zQUx z+kn8B;i?dH%LK|nys)Y9&cP@I%J-|v2k2V1MKpkj?pExj zIJLKG`*xh<<2akNw{BmLwN8dj84=)b^|7j>_g!x7J7D`0l=^6Qt*w8@3zR@junn#i zgl*($!+l~l-@yi~t$f2AG+5waw`=y?V7HOA*Ah{f4JZBd>8rEyo-r|_)%j=hWttVo zW}e#n^5%cK?9TnVxVH0nV&>Mry85`iV|IEy`){{*u(|gaKm=+UG-s)|8Ay{n!1=!L zq<+V8g39jTYhEIwJvctKdA+ltg=~9i`I;juv+H&~vVu8(x_J@6=ESMXes)XrDkc2T z8HZR`g&}07faELG%mZh#W6dj0<7DP6Ev;-AO#6Q-)GxBDU_3K1AZ9~>U@a!0Ha?G& z1^V+Sd+d6v|7seI6brBvIdYaF4Q+w_LbrDic({g2yE~Zy*+92<(0OtMZ5l(U8<3O+ zB9-^de_nR$IfTH%tFVb~fNmF7kzKpOjZ39#Z_~qneNYeZIBaOfpH|&&X|a1KF1meE z61sop0RH^gd7xA(WeE+zxQDMM-_!4Zyu3WQoa8HFh z$23dl{nB-(?4+uFo5cGoi*&0l%_a%lF3Q2o;{3u)`ahT~Ez7LGS2FHfKQGfR^((&B zIE%;i?*OLt6YC;fFG;7<+!XzH$)va#1bBJ5U1z1Zg8<^a)+&8zc0TF~2n>P)%4L7> zI6$Je-9>&8Pttb-h-VPk;1jhbd+Kg2c3(G>t?JQim6h`wSyME(m#)MbiN|?S+m|Z~ zIc34c%2aUZN)jNq)eAYuJ!-YNStfD$Fv-$oHEchGc>Kl|H^x;JZVj-iYLB$4*cx(G zvpx2zsyT_O*lT09%G5zHbunWz6qI$`I9IZsXS z&1Ypn_3S+I!_F6Jkv87(pEXu7&@XR?g9YRtpSen#3)Y4xZdX?`kC*+*o4_xurQkM; zb+HEd?6tpa;Dh@5`COjsi|l{em*4~UzON~F3oO#zo|5I|Iv$O(!Mh8SneHo__Wi7P z-PneS8K>SF145T%@XcV77KND{ysyFff`k_|ykOx44=+S`A+PN^*nKL8WlP>QClOya zhu;2B*=_`ioOE%1UGHR+&L$>XgN0D|W;C%H9gw_Ux8UNJNImnDfM9>KHpI-=`8=M@ zYU`!&AJ+M56y~dCtuzvUXn+Aes7}4oVc`HM5@?G0@hmxC3_xuHuRw3tqqG38o<5=i zzPVaj`>j5LCmTiK+OB=Fm;nCbCd#)UYn;wz@iI4q^2gd;Bb^0Mm@bR+EL|lZD^Z{)GL!YxGLE(4lIGSDgfXX zuG1A{gfs&XX!y!i0tXz(3Ei3DPGI}?(t8wKmouaTC<3|OmtCX-7bp2#`>Q3$P0F$D zUw;6(icICJIfUN2E*+u3`m&!CX1*TH((xOJ(wCd013wK7&<5ouuN`nY=&+G8m*=Dd zD-8{8xE^??>q%KLaaMqrAEg5s3lQee%tvMtrq;}*mqVokE>NwE&U(G-n+`6sG%dnc z8_Tt`>n)sr8)w~qe;UtbP}Lui&_zmfrUNM%9CKT65X*Qn8g0RWOE9+u2QJ0j796-FbC;*415T=G~3Y8iXjEy467Ouo45&4N&-p|Q3rhs^hzT|`5yv=CDGL9lRD^I zuq6U9L7|RG<+DB6A&m!tqrG+@^(VWr)sx+~>ZumIau7hs?1M(!%0Ylcs}OvX2Z2jl zaL`J_GYAkB`N38i9w>+6Ru-N?fHXJEs}wgt2gFIls$`?}5*HYAGp6Mr@R_&jiML?> zgTSZSs%6@O^6?<>A+zflwP3)5z^BowWzmWJkkMVOeVx; zx^zWs5NL`xfh?K`jwclm9U;OZAv81?42;#^XDanr16(Is{CRf9C&Z?@gB+ zMY2Z0SBdGdw5n)jgaEN-S+!k2fY2@^B%$uMYwRFau?R_F`Td^X^N8~#W1tPKp~%eY z>Z)5=mX*TY&+*-l`&qA)1+SFdZI>&E_X}J1wQbUVi38#4CZhcsI2p;JJzvtu{rPXX zNajoNU5%G!W#ze_Kh@NDe`!7yi{aOwYCk<~7fJDR<+*rYmagkP%FEX!WA+G`M* zRR{op9K%naYWk;|8p?_Fl)qMg{iz6~_S2L0>rdVj@Kk$hKh<{s2cGDs+7tK$7Qbsh zRkyG?nol+TZ_mH{p&D!FeD{~#rFlDLFS*QbEBUgcU!%nR_tPI~f1bd@0MJiR(QPol zdHi?I)1!^u?G)YDPRPX(}iD&&7J z&V+L$f339tRAaB42n6v;e7ZOe{s>d_r#!UtEwa6#znyY1US3qoxh8$M{(dTo8|I^8 zhXXIObnDuE++d%4f6Ig9qb!gkGxILj^3IF;7T5CZ>GztSZ-MK|VXPO-wnTg0{u^!o zRRcG_d4|t#OFOm|+s(m>pRRrcz4N3fob~-x$b9a63POS3Uz2I+@@=D2HU$sci^ys}K z?kIiU-&S*0$PP(yYb~8yd!%>H5a{9zDe**#E9DMFY-{>8uG}w&1{?ND*REZGitQVO zUw}t8KKRRBZH!yYE%2PVb!8FXDq0pXlr%c7iRt$gf97SiKg$j&>vBr0<|6C${RWcf+}T&cwQFe2J4tPgLvtHgUny zDa$*4GfDODDqTJu$iF{GH&0-;XTs?eJilEVuA_wF_v>;kzpA+Ii)zWfsh>9~|Fuoa zH^R>^fB9B=F26?}?fDs^EuMYRBrwe~%XfPgl*sqtK>6iI-7`*VJ|53^j;rEyymDHX zw($1Z|32!Ej1+JjCf(Qmafw1)ze9$1q^*!)907I}?eqbjw^y6Df1PCROD-df1J~A`%IEK1Znu>M=7o;< zo({hbE8~Kxe=MB~n2P#$dI`_ZSKu%2;fLLHJStv-$Tn^NOA>@Tp=Yia`?A_=#vW4E z+mveqc@$OblU`OOvE?y!{rvnC1<47__WSwyDR%Vin4EvTtai!LS$dAAXw|fov2FK0 ze@8mc$t9P6sk8m1&h}$29J{Snu=OrB-pQ3%_qF}KdRHued0v6qN$d)ub%CL{5N|Jm z7hAV@x0di1-yf-9M|SR1kvF+i!xqy!Rph9)IIo;FC1gnu-o|uj)E4DArfsRy33+!Q zKHn&Qe_nt?Gmh0A3+GSYdo}H;(k_3ie<`;m8h63AwhEb@Shj!vh(BCzc4s_zrkwY* z*Ur0ad`WxnqjkJ0iTC2m%YXd+b+5{?MfCZYbJY3RX#`$-RbFKy0zXz1UzYN}{wQ^# zQ~7?*_{!*Fi(^_<1`F|D2bnb-eP*%8X9@_>Sx=3e+nE| zRz4S;`R)IopWjM-zi7u5;BVAH`uqewKR*$`r(M0L1NzqIz##UT(bCL$_Xbe^=+?cNp1)4(>;?0vI(=ML!UtV_l+C{?d?R=6};%XG>Uti~e?LY0e zZ2nO!sptLebE^qk)_l9A5qDdE_Lmb}r+K>u9BVFi{1?NH;^alOe{6TByYj`A4+)>Q zFn@obXbJJ}*P~JkZ};V1q3`O&zCA}~_YZB^6|^_oxL0w%-qKy}ttEN49sAbqpdKF1 z!#cm1T-o1M)h8yD`WXhYw&Um@6~!Sh7UIJ`8((PcxBm=w;=kt z(3<&s>9(>Pv^=>~fAOK_GmKjU)dvXkx&7l~#?dE)bY~2^Yp82KrS9_;fmUYOiT+X$ zC07f-%8`F%C7oz zp?dZ7@`U8~@rU%$c6Nr~E1Lho%JWi#4km*oIr)W{{Esaq-&=SWOMClDdeleJ4kq6W zkAfsY_~G)8wdptYr*UwRw51t;X%_sYS?~iK<&@k9e-y_5008HX03ak>Jt082dU^q& zaCNT0c8>t;kH)R4dhz>9B1fj4?7q&lsVdvlUFKHl`(eGhqgZ&0S}q3R^b%ZR73}uT zIm@V^>p4+A`a{h5WIv!I&(Ck?CL`NnQ{?EyOp`>9a=qWe2BqN1-?tf3)ZfX_had>z z$V~c=e_HiKxz6d^)n4aUnDahA(zlxf^HNmy5?fmKsF~LJ^gilh@6OF=*XJ`0zwK>( zd#k?cPv-}F$@yXY&~x8DS6_P9-hes74to871Fusq>K}_`TE?`sbj*8`pAvZobU*nS zMl{|0Tx!n~0(|f};zB-HrCs)5{Z-J1KnH zri81f%C@?42@rh+uaqfcE+!EUS5I)b`n>zMoPX}GVZY@6>7{OBhpztb{aLWpjLK=X z9O?4gJr`2oH4h(u{xx+?S?a2q)Sq8LYS-nbolxqI_+?9C4!Ue%q#Jq^zeKv0s=D^# ze+9+9c=CgnZIu{Vruo z2(DrWN`{8VH(>F!Dybip91wqgdL^(EeUo>TivDBjL}eA0w^Hc~N*wOW`g)o~vU|6z zf1*G2G1Z!#%f0W{E35s7^vY@<(kr{Ae?IilzW=@&As4-fbv5v{@9#%^QgQsf{dfo6 zWj|y!XEl9iC*+e_RA*$w-YG}`|6 zcH>=ym)*$Z$hUp-JA0CyE`;p67uLH7FMFa#Q4oD+KlJFNT0Q#Se!PotN9wl(e|g8o z*gx@J;$-dWUka6{gPPOMcU;bcxnj(9N-}=~z0~`zAFmzG4{GY(A@mQ>cYm)udMxuB zc)q*e`HPD#xVyw`RifX{e1Gn|Yu>k~QU2@N)urUC`eVi@-@r5c*xUJD z1>fqw{LXv)!k6ZaOZP`k@AE_V!yvHY=4 zU*3K{(B<^3&*_KiKEJO%Ua@^fA#MLd6<@hwK3;crHs0CC2K?eq_;}5Ff25vW-QKJD z1HPvJJfG9K%>-YwIdV84Jh(AAng|7gBuK@J?H3=>=^1J=M?>pXS>CU#5gPrDo_e<(qJlvNW zaoFf5cir3<%@t+3A1VrXfA~7HM_M>a+_!tTOg!d_UyeNK z0w%W`xnOt~t21YLmpI|Gts0Z3pMh1wR!Ell#P-_l0j)Soj7Z{mVC0^2>khJznloiB)hAQa$het{jC}+rnqxB|Y(Lw&DMcQOfUCaI&l)9!vP0 zvHY@)f5TA(Ot1pCh~joSx;IOFdACH#AKpcgw-0k=OTZW7d?jxy_q*HmyS={K;ro64 zuj=MM-ro#+_#?A2ClRu*KmtIN=bHv<{#oILp8il#d zd9du~r@ueFfX`pc&SZ*{*^UEu%Uam}(!ajCe}SC87Ot9sT=Po*iyVi#nj1tXcJKv! zsy>0ImWY?Sm?wH$7a?AE-Uv=U|0BU&fB8^6Jh~KO4uX#{=JMZL%iv#s`UKSg0Ql)? zY3msktbYBeu&0He@-mMEqGR%Ea?ulGFqx57CuxSPI ze-jixZG{t0wr|+Jo#>yi`U%b_)s9qG|AbN7<`=46svLeP=I#n!DVzZY<;e z1OrErMY$=TFe;x@2q|~s%kJJ(LvxYIa}Iw4MPBPCjM^dCHC2;iOO7IW{RMae5#X0n z@{VW%pRhWNy&^ZjVCV+i3y6Ng>O=ttf6<4Ae*ghO?0Qq9KSAt%IzQED^GVHhCV%~@ zJPW&7-_FI8y887e5c=s!U2p%5e*KAfMSj`^Cd~KbezCnAq8)A#e1h2BeC|KZ!>=Md zA@gV~#lIH%Ukm*YzR(X;?Pb@3x4(}1zjnZ1NBv(5{jY`or#G^Z)6^zTJW}~)otrV1(`d^o;FRygB`aUDr zD+q6w!x8?z#QL>X`}~Tc#oxW$`XbobC#(+PuL$;W)W8;bt#%c)^z#cEu5$EO_TO0p zw(GT8TCMvPdAelt+4-qhrtrxsXD#A;-v@gt&(u%XmikAK9i!?IJ5y6e?_r;4;IeBmwqOnJK;{ADX|ysyq(W`E2S z#sTj+2|reFTP6GB)aFO(z+WHqof_@84E;5i`n%b<7{jf);&XW(fZPMG>ZIXIXa&Ch z^tzj|xBLE4;rx1Ap3dCy@Bx()1zDU&bvyljNrE7Ep!>9o<8*0+w8L1q$S-^c?-HVe z*TFq_@qer>%6!?s zN^a=hmc$QR@Z5>#sT%$f?b9*J#9f!;m*sYj$}cmv6rz-VML!+ba~J-!IiPMEE}g(^9VS1$ z+yOo@2R^Kw+#L82m*s1@NPidwW~9gQ%U-$bmTmCr%L|yh?Am#tzvXeEl=(Z^)oxpG z@L5oNUUOLRpE6?Ka^AQ8&kwu0Xw?VI*N?*t6b{95m)P(7m&%Z)p8T!M0}Z=atvKsqPO#6(;n*&zzlv1%KFY!Q&e(cI_fs zjg@-J+sobyOUXGN@PvM_ojb7ncDaK)#z!cvq-%}Jm6W%If2RhWk4u#Le!kXoGNd|h zS00Ro(&@K`Z+I{g@1b#}@%gW4&$UH#`L4PXd*soE?xQB=1f%3;{x=G$@Ll0R_P6J9 z8%pc#hUBgYc<_=lrGMSRENi2Gc>)1od-L9d~Ep4V^SM1_2 z>~fEFZ?(V=7mJhnpP!4`E@7@N(?83ScyCr8ojT;dbLviLb&_)kE*y51SL-VX<#FoH zJak>pwL+ z`ZY@2e?RR*Qr(Om1blj`6j%Hkv4ghx=#~8PVHP{OtLL`I*I%(;Km_=7!@t#+oF;!g zh6?&l8__+QKPm2McNmD{@pkCcWzt_~-u}$H7`JOj+QmrTEA(G*@?GiFd;NL0EC2DA zVBiBve;GoMr~Ti3sGt+q{-1NNl+VwfcaH7PQI#PgeH$iQHzQsDoZpmlso*=+?0}9z ziSpWV2}A+6y+l`^fKNr*^W_CQrw;dPxCnbx)KKQU+h* zjl$Nif-4`~Dtx{M`}eybCDi-ZgPun<&2@?QOC0#6l#~BGPv4e4f!TS2I4x0vJ@FKA zySK-dUOJJu(c60^L1b_82;<*0yCvvvn%~>A_$|G? zJ^%0b_|X|Y>G22W`1XpvH~ZJ;@ck8?>$oI#c*mFYiR+Pf zUAbH}?(&ugzB+<{gJ=RNNhZog;e||l* zEjTpg+F`wwCgg>RGq<<_F7ct>y_?8~s=A8mUfmxfhUZS|c1#6L zH{{e!-f=W`d4zq)ZBE|3_r2wu@1WntGtY(kX#jEtE8FhXtJ5ohe->K9M=it> z#PjVt%uXx$z*hasr1H;bQh6}Szik5fVT+irXG`evGG0=3KT#9eipWm1xv$>&pLcv5 zV6UeFoU(Z?-UG)Y87lNXPR%+ZPj=7vSvyVpgQ@X$3%y?F$Nh5Rv~~x2Lz;DRK%7dd zE{}nIKMNUB-iO`5?UG9=e}^6)o(B1aec%0q?BjCkF~o~+0G}QJ@4a=af6wlf+^;b@e-A;jN}2n#udn;# z>+(SR`ntbDT!|F&x@APaoiV2K;R|2jw<5|;!PMWpfWR>r2#_Zr#{{HE_}4$dfBm~0 zF~oZT-}ZhhpsjGgTfM+`Ik;~udc4b_VD%RYX6^`c|(=%{-8_0>y`8HL}CL-Iwhid6I@ z^;jA&r$hVH@+j^%Rqix6rwebVm&&iNxxnj?n16hI-JT6+auMqV*vnpk+%NslS6fL__JTnBCr_=YID0|ieG0luDK#Z~!FLVWmf5!>dwO}o zi&kXYs><@LY~Qd$;xhaCTF~;mwj)vB)o4aR_EyLf^It_goq`GVvSa>T1QaUD9)OOP zmQj%HGS$F6f6r6{H}(9luQ_Wqe%U?=-ir4@!EKFquY|9!2Xp6~XLK-r-rs!+tVPzb zlzRM-hMMhf8x(i;5KOQt}^HwD|<*$>AbSq zH&#|Vt*mypvV29H_|N(J8s_BUL>LWTilVm%W4JCAJy3xwF^jhPNBi}B@q*rE7pTj* zMZzuoAyrkynNAyj`g_aNl_5Gax33S&bLB)@9m*W{I2*aE*HO2x+q!{Q36-n z7S6VLe;5GlvbOJxlT7%K8S9>X0gL9@MGJ}rauWFDhq~w3uZ|+Eh_?Qr5bI|V*59cl z-`i0^8P8w6g?o@g_Se@P;0sW)g*+s}$JAH&n2eT^(XX%Dr}EC+^I)Wui<5Gde&9s6 z;}v}Vyvxj0LSD{wKVHF|IB2KyQH+MJogzH4f5J+IF7aM3R@QGha_#cHwY}b!?F*iE zDmka+?iJYPJ?{sl*kNY_lly6SLV%)`(0TVE7($1v;PRnAFll-yY8357&$4`A4d9HnQ(`Z zUUgET*zw<@^qz0P)YPs|H z?JVH+{u;d6K9CO~Gz*)FTNsbn_T_ULk-NgV4YMu3b1eZGwOS1;jqI2I+d%XGf8Imz z5~oU>oT7ObMh!eKzhCU2Pz2Sn_n{npoduEh`P~*3_9Q{{lE~(}g*`oyKZsuo)JHJK z+gIGX<#J7r{YU^Hf)dczM?h>PXAte+aG+Mh@OR;S+0)-`Paa?J-9moqDSGtyRM_ti z>mm)32U@3lVIg)*WOLtp1w#-3fBgDzV+!xG9~|;H$q%`An-}S(z6#$Tm$jz^?z_d7 zxpRu#gW^)QhVfqcGpb1t#Cs5TDB4Lw-XWOfvX9ZrGQ@3idD@1X)UsTxd=TNd ztgHIh2Ov&prHT-W3cd$+e~VO>%iN-rCwOiW%BwSAzdM{a$>d4Pu24w>z%}^YC3Brf zJ~q_1X=J(7Z4!Bc>n4RPm%2ypu8zl}0Jq5F)uAdcN(E6g6#x3bE_xXKDQ$dgD(;cS zlYzNK8BYh}gfKp~kZ#k((=+Wz7FAXK`uOQ{N)jI%_H&A;=@^KifA5|kX9N*LRhR(Y zgZdafJljUs$l;!&5k|V9NAU^#m#2cDKEGW-os+>IN;9_zKSVRPAm9GtEWtne;4FhY z{N5}B-63x$&3T7-mdidyJj)QbiRWn>ZW7OOvBzGT=MWD*G|OPOh-bOXE#i5C=O*#I zIs^8*!+Dc;Trc$Q1uCY~p_ZW7OOse82W>UcZ~aEo|e9jeEk zmgl?ZVf2qaEX%<6i08?`+#;T*gYn?2^0WoFi0A3C_Q;#^#sT!`lk(&=diX_oa)>^Um~HA! zjpMKXtJs=`tm<#OU!Lu*zqI6$KKZ6U>MLZ zxvB*Uf7Sqk>zE`(=6^USCP}#XS()YE;D{s{^4=fHeG&8V-8h?#U&qVOPxidQZv_D744I8ob^Cx&4iPePajw^3M69@W4pww!@I!_o-L;f)G4a8NSgxT;(c zyo4cE!Bo+PtU$266;&Niy|z=wYxM;Kv&?GJU1%6w6*@D@%P{edtbZS)$!shKb#^$^g(E~@4 z*9=8k^&!Jc{Dg6MJ#%^9ZqcN@UbR^&liYdWPZQL}dTFo#BN^0Wcd1Ogv{6+Eu$t)V zWHGOX3t6pz5*-*_-Vp{oHO0^{S>UOoZnRBic4%=j2?DL-aD!RRNKk*>T4X%d(%Do8 zf2+fWn8>|skzpf~MG{|Y(t$*qWVM1qi%!jy0^XE*vPVET;u2G1%6ekhER$jhGJWRCz70k zRMrz-p2)pgQ;S86Je^DC)BU{AK9k1$DoUCZFtI=LRQkxqYU*S=; z9df3<@wHKzzy#Vt~6WAu{6dq|~fPnLY0t|1uK1Uxyg6bYEH0!asJd@;yKJ;R&+ zrsCAasDiHyki{mb?2o*}^nmWbM0GSVs*A`QXu8ksl;R2EcRg#Ekqkw5LDy%!zWzw%?op@CFgf0yiyT&wFF8^*_2x7md7W-weTYY%4;&e8{Ssbvog zbDqvw}X-GwX^6_CR(cyFnC7DTHOznkG4kCaDGA zt<&Dj;+w44Z_TA{*fueF8qnrKi5JaUiypPTs5UUIE;H$9ize-`_~n7LGas zGt|i@4W}{Lt49O_x98wUz^YRP>k3?lRoZj~>klcgwiZwjG_+QKmV#BqBv5zhlFZOy z-4?-!y|6PMubjBPs7wPe*q=e>5xyRLZ{u# zsHP-20mWn>~wweAv) zC8K5NOr}@)N>x`ODAJP#h=N)?-K+qPs!prRCeSoC1KH3P;n1#9meDpN+JZZ^HiBlE z$WZn&A|OIt_EOJW>ar}$i)P4kEo!u3s@kUR4>B_B2yjYSf1|p*s56T`ie)s^6~rmG zgjNtNh(77jR2{>z4z!uYTCchxytde-gp}+|2Yr1?wtE#}Ic!HbD~Cp?t%%0D17m!M zrkxJwNVe73_$l4zc?MMm-V5cBxK~e<1@?ghhR0PbPeibyOxX>*iz} z>&ZA14Y{SSAa4k?xTMFsV|6jryv2}_W-~4}D|8lw^1SDeE;N7_PH5Lg6H-c=_M~Ah zn>b(xHDO`_b9*r#^xyymxOuxfXhH1AsII$gVno=OPq>h~p`LLl3+t`?!%Rbq!DKXPB=02(| zd(AYiE*hM{GFzE(GX@lTEjj6c!?Y#vf+2^)I&ZshNI@YPuhoe%LfJuWzJ$zlC9i@i z1;A=QL()F5!F*yf!I^4TAe*{TpP)0owH9aWyw1l%FzsYc5Fpvesf%qel!4*I<*QA* ze^;}cawDB>Tr(Dc7B7Z%*6VU?bpX{nUL?$P5J32VHS1|O0BkyCD?x8r>Csa<8MLJ; zHEoRoC+Ue1Ka$1`h9-0t1=_8~m>qLMy&A83G411KN)IPZXw40*fl1dnK5b6$IX}eB z`3hkTf8DS~JY$9~Cr;L;*PIEG*P)_`e>7msNg6gMH9u>au1_Pi4yRzfI%?CSPGbX5 zYt60n*D}@BVIgW_SsKP(+DV79z&l(6mOAE&rL0z+#XJ3MRh^S`j4YUe)Lt4vLP;T) zP}*=9qpqb&LPv}iOIlKeH0dP`frQLCvgVvbaq$GM%tt{}Y?>%NNPPuc#=6_kf7!tI>8NrB)k`7b>dP z)b%KUIn$sNqAv(lDh-!Y!Lny;e`jRXe1{a`8By~a>IjfFaxg>|Ixes#I-9stAJ<2= z1wmjoL$stFCWb->eZKFb6{6MdtdZVi<)ymr5hybV=R7)CcL!QZ3@42)i!CR9zzr2@ z=*p8Rm;fo)+h|FZZm-&rWRuL;tGB#ZjOoQfw`XIJ8qVPfBtih#Lj94ze{^-MwK79f zLS%Ec^wvwRDyvAyQXM~_>L!G2I+8GKcEwa-EAAr8Y^b(s&wH-KX(Mzw6{gMQ1~pv{ zQ-g2_Vr@MqG!i$JCr6JO-LoOp>*9u?9KbGK3&#V z8KEu}ay95C6=Gvja~`XYf5Ro|b?hONMG506S|6jM73(cP*RnH|<)h)c>fnh86vdFOMW`HQrO2BQJuX}!nB_X3XBgID6vl`=G#4sC~qf7=FQ5{)L;Nf3cv z24G^_-Qh5lyX30vS!)N&dP1FYKwIFWNk%QmHHpNrla3P7bZlWcn$guZFld1b(wYn; zgkO*=x+OUES;)DbiI^+DOU}p^H-wsutxZWbs1NudiklMF;(&R)#ck=KDO|=wUCA3ye+a7YtrV@anx$^ z-I%ZGNHZphjBbz}wH261X02_F(gsLVaT@7BKq#M@x~$XCnVP7E)q2ZNAcCJja5y!l zRlbWL9R#q)R$p6m^cqItn^nNJ@lHkTxR$qWkJ^Bfph}yse*hhafHq4;TX5z;O*?kf z>Sq#QVyliikXm&pU?@nF0?279z1W6_VjK(yEf{r8Ja(9D zrZIS1UrZW)0Ihufv-Bxez*bT#$fH~WSfW9#yMzaAHXsd|xv0%{;t0O@1{n0WVU}?Pp zbwNu3kCP{F9Q^kP-^|Ds#b@{D8NQb z$$*Gq)D!0$V&lWigcxaJz-=~5X*1^RAnsWqP^ZbnMU1f$8)K!f50Hi5^GI#nT1Vq} zCQ%b>xn9LGFVNCjV@xYefC1U7MA}*i00dCgI$3I7s|;V=8aPM7JdYu{r2t zV~ZL{e;F~t2%OhMpIdkBIwg=~Y)f6+-vo1xw@e#gD5jfMMjQ>$gLcRDwBf90TduDl zIt0)8We{{JoaT8G4>|+Vpowx=d(`Mnj83ON z8_@9}bEv7k(L;`IQp|{@1!S}72VhHtEwGB9f8K&&=QT{SJZwyMnsrO(s4-1EG+y;=tzCN5RvmymhmZY^C=JHLXg)U7iO{kXd&-bP6y=!QrP;E5ni*)c$ou}4ovR5#(qX^gjlRGb-p%mS9`K|BIMEz&fx zRi8FeDIH^0)m@{+tX-+PgFZm}v-#Mue~BSB3PH2W@Sp$)mIAK2VuO?<$Z9XDo2a=k zC#ER%bg98`+(I=cc5O45YBjpv0U~IElQTP8%k4JV?ua7>pNT?BRoXorrxrtcl4WGt zVx@s2uL*MGO+`L|%=T~|U?w+Tih~wOc89Y*v}l+@$CsR>H(RWi@jzS+907+!emhxFlU02GvG=Ahcs08@1z! z;}JBm@e_xufz+7eYeKyxuUx<1e`&%?K8`G01H0)AXId&x#Yvr7^qUx_wi%MF1wdD+ z(^Y_4qx^7Y;(*xesPcFPa^A2-TG?7r)A0r#8TDapD8M!^G&+!7?bQcP7p_Pc1kHL9 zm;fMHcGaO8j^Z)kcBY|zzi6W3PMl!oej+}A4$jqS!jZMjJr0b^S<4gy5H=}$d}#x|U~ z&d3EMxwT{pLg}E6_OvBRah=4P6t&gsUg zW4WftRePN-Z}%(6x+6*g)-+=;YuJ_bRM>EpO5f&Wv!}Egx;cpHNkwzv3Wke4tHKDn zRrmNM!Y%cfv87SP0C-mR>%zQ^4SRGKwnVQ1jyX=}SwWb%a9CyMG}^${^?xbRCv@8L~235`olLV-( z^%=6M<2cWP*2bGs{V_VRIyi0_Z8+NaOj7Toj3g37MyZwIf)_+;VhAH@70bOK=$o>z z9J;NIJ!fX^Iku8wj|TLCCV#j5L{~!}3dNN__BS?V!>(sdxwc~QMkSrs6Qia@kYl@{ zJ)d|>V6isvNxe7k)YJBWh44kVZp$7#ukm`eu?CHaMRcJBH*8`9E7gXn$|)-T(CJbNr&+ZGZC)Eb=3ggy!ak z6_R%7d9G?+yh*zz-)(sfk#y`))j|MJ zQ*}%i;w8h?ds9Y7P>< z7R_m8hILj`zcZS22GiAKYR#%RYSOx_QB|ci=9rnbatsi}8!XLegx3WnS6OBQo^h9s zEG$>cSCI*t*mi#H)(6CV4tg4a+!?6p<8@IJo zOu{@(Y=p2cIzd&W1A(hAyc`PRaF~}c zi!3JMT3|TOn19jeq^{Y>j3yHZ@HmR{&=$FI$j|=9= ztU6|4(d*FEM7FZtP>I^TQFpoq2CyNZeaURO8&-*1z+k#;4s~a=f>De1W15>Vyfh)! zZJ4tgoq(FatE#Y;BiJ5y`g6s?V~>a91&79Zr)^j=H-D@pJ)tgIWIUKj)AtVdNXC|$z%Pnc8 zO*fXr28k63pX+0Rqcb{`{8+4Le!qb-;H;l{Hi4K>do*$viZ!hgf-rJ8Y(XqcwjE*2 za8wb+%ztXAwr^=mw+^6L5AGAfl4(TKe#?}YNaGBuj!gYPS0curRyc{>V1vP^QYXwo zwsC89HpYEv;fHt)pcYVW+jo=#3dk%*oHzjytE$GDZmso3NqGg8dU^+N%t)& zU`)NIjFC|fiB<_!T}B(2RK#Q+2}_I9f~vf90)M#GMon_*W)pP06u`d9Z&Z>B`VPge z=}2y_Q8W!4EQn~99oU=Ii0F$Al^TLt29J3Lu19@cp3Z7rcEm^xide8=Wh8(NR_<5U z{j@dl_+ZvEJO@>Vs}<3$SBEuo4j@KIX>?4OLJR8z@VMHMLTc8r0U}tEb5d-!eSW}3 ziht$00J$~?T%ghN7-vWL*dI11iyK9R)sxiT6rSr9Qdkoz1UKr8goq7b7Mp>Xam{KI zu_f6BM)6pJg$G(d?LF9o1wU5l<;)G#ZVZ00~_Ja(QsmnzT%T zVpCNi;L057TKzP1cyy!~!%c=eOMs4zg((7%fekTJg%%qR+9YbVIjy(6!7M@-^M4`R zGJ0cO9X3J0&s?}WH+!LDW=_;>+kqW(g9T-@aeX4tZGII9BUI=`cpA*sjkd_wK(!HT zoKqc!lg!VW*{Va4&5oQbhqkV6>Snub#Xy+)eWF8f9h)5yZWTcK1Rd0>3Mfy>wXjsz zYCILk%#v?!dLqVpUBjJKM4-|jI)7Xs&RdnG(IGIL2}fRa)Sk(V)Ni{r1g2N(AZc;y z*wcsUAgnagC9!CtPS(mO6bJlywK_&b0Sy^*h4clGH!(gUXl*tWNSN`tr)xGLQbdQrtQK|P7t0e`5h@H#WC z;2J445=v3$fXy#7u%*x=6n9x+fZ*9+?n;6)Bib#)^ZO~&7lT0xc587)1#QDZ7WG!T zXbtL&i8GR2HJQYpi-V324(-Muo`+##E#Rgypnc3^C&&~;T5%LlQfIKPWbj~ITPn1z z2%gsD6vUG7w9X+MAFvyD*nbd(#e7~TXA{mdny{;{`ja7|NNGoC`6xCD4SA@>^W_p5 zv}6jxa7oo+veo8NLNu$SH6OHedOYxH5Tg~!t(g(jPv|)>tmiPli37q{v5tUuRk1Z7 zR)mr*n5GD|=6!wLnRn<~R;QZmLJ|R&ucIB5Z^Z+)U-6x8AFeLL4S%VKbvGuxkuSJQ zRvS#JgAqC~p2PkK{wq#FU2++4pX45jzq?)GoY&=5xz;Frdg#Uy9ESm zy}A@wu3xWvKx~_s(QmD3nB-wTN$Va6+P-g$YY<>M;Tl~U6ygJOTW-0t3>vEWtesiR7OCmzp9-ZbpDS)&Cv)S3+XwRT7C2V+S~ z+Ras?HlKFERi(NN7pcD9_+CAlP~EgSq+2$d&HbQ)U>F2PLu_em47lM!4XP)9*=ZVu56CR_7Us<0q}U6;hpUIA=t2!^|_yui#n~jT$Xj8*cQj zHki~!yy=Hial(#53)byPY|$PN5w_&C`7G_U@L?dU87=gol!fb7w>z2|sw^d%qLNat z?V@6R&QY|aL~Wk!nfhFrrvlvsB~M8@Fv%=red1J;0DotNYHhkfJL_>US~EioRF#z+ zi!HrAn`HALJ0^QUzcwPURi3RwfQN2B?&%gyD+BGQq?9D&;= z7mw#8;NywM_FAj*X=y;JDjrV2Z5s9DP0bS-W-vAN)g+E4t?I;1hKtasvt0g~iRrPRv@o`60%5JKkGhNH!e%3V8F@y4>wm+f?m#FO*QjYjoQu4))JbQl4q|Pt zWVTx2T}X!}O>(^P6>@GzLh6tK(qs&r$&`lVU~+%L8jXfJo+1dSHytF}uzF3MENNNo zv}dYmkgI-}r2y`AY+TnDt0A1ykfIExN;8~9&UmyY6{fnF(Y!E4d>S32A+AeMmv8Zo z0e>}sdON1u&eEc$(QI6AHC>17CbOz-2vd`HBpT&SuuJIDoL}m*IX|2E223q`%$%kt z%w)A(5|dbqJ6?+(uRA>G^@l);2tyE^1T5ho+CsABwI>BF z%SkinFJL0A_AGcG-ov(4zU$`MAL|#0}2&pevv0LpoOb-SZ5F2FRAPl<0Dr7MZ zG}OnPa6O)rDvFtPtH-y9w$+@bI$4wZMpGnI!iGl>8+aHucO4_#teUg&8ru+>i&xSR zsSK9wCdY`4R@*n%Lv!A2umnaiV1EcC>xwt11MODZYPwv#XJ&14B#p3nrIuAC3YBbC zpf{uz^n%7Bq#c+H;RcI`6_lLz`B@0|!y#B%3M*M3d9F@$0%pl6K)Y^rosncYK)s6q z{H&QI;}H+dSY)-5H}%$Lg@F>?;=~>*F*qMgn@9^$)>E9H>uwKeS<_Xltbg@!7)&xZ zfC+Llml(XQjMpKbl5@!}AF|0*<##a`k3slE#5Pvbdz1noR=9I*)j0T7R5p@IWi|KG+`v_43a7XsH_PUsEqjaQmxrg7Nw+%D7A zqOR)2)ltKt*My_==^BSe>zY9$aVOgNT|XN`e!AQcv!Gug$zTEk4S%dzqbCvo8jG$^ zSv^@WTJEyCK~Z|W7|C;X2Fxb{>1#oUFyJABbOw_?4)pwvqmTSRY~W@E?Caf39t#LJ z_o{GhRsq`}8>4{U05}|!NKu*NSUXDa!NhKgASnx!6OX5xwcn52N~mCcWnz-3kp(Fn z;A3eTtGz*|liFxwxPP*H369u;8^$WzUrJ`D0*>5N%uyi=P2%5Wtu-Eo45KqJfI6|c*9>e6Ht z7|z!WJJzA)CJ^n&bB6wS0#*8r$pGrf+M?yfpxOti&B#RDc7M}%J1Qi`Tovrh=YT@f zbiFbdhm%g&XvG`F?lsyQq2lTMX39iaE$Yn>L1d9gvvEpP)Pv2;fCT!o!dQOl{v9VZlIWsL-p3Xgoo^gYAT8})#g%@(pB zvx^}B4CPjDHGc{x;@m~R&`se`Z^fImXD_9-q_E57sM}6NX%RQZU~@Wv7o%q15nE|T z_lB_7`u2yV&AuqdVNC5D7?xU?mTGImpg&OPe(j?V&(uKVy*lN#wx6eVF z@eB->g6AxK0`l=?CdP)L5FKzOZ5m!O>OoUUnu;T5NPoGiHd62b!O5P|nI>7p_0?sq z~0x&CFg8a03SuRRE%8vr;Ff zg(W=!A5H*&|W$Hn8dv5dtcfx0@ zBqDoGWjd}`C-!8q;w(v80xNdIF>cq7UAQkJ6=^w1lvqpYwKb3k)=p@jRD-!xU&IXI zLiC~F$P6PAPpSjJTzU{a{Fkh9l4` zP=VUirn%G|*&rEn$OM#}tkr~>EJfJ0Rt6r`RTk_nJ&((%Za7(iXX?BGUU7}B)c4;s0B$d@B<-!YA284gbm zuuO?Q;YS9VqL|ERuc(mR+ATv$=6AytJN$})q}yjPT#`PIZgTFJS5u)CfnT#M^?#M! zb_Y?tRii0dxC+7WYkdsoQ3u;mU3u~8Vm2&ID`fEtX{S~&7UAGY#iMUA%K#tLgY7YD zjrJT=-nBmvKY5>p;%%SbU7yvJIe+;j`L*ei zWip%<>>y816W7zsb_K}keK*a`hvOMfa1S)3>!9iO^SDaa27zQa#PE4$5figl5mc7b zE-2v*u-<#fb-=z?I*yW3I{?rp3YujKVPVSE+fwEx-a!?QlsaLO^B{SW_oT`(Fs~kX zJ3jXT$~L#PaV_kY7xL>`Dt})~i01Sa4&LY8$9?*cBite%`_V$9(po?bWdonPevc_T zU7#+l6s6Ax)RzgQ_2rR6rAt@^iB8k;8;*4y2_#_o87q^XDl66>0)j@dQ>fLix6h+#)RevuLG3jqFThKB1 zrgZ^?)Y*ew_ksoaWGBMzp0NG^G*wKP$Hss*rD`@wvO86p=EFJvQb_{fwd}nRABv(} zm3f_l%DhWK4I@$4@@GKyp z&&(0w!CB`F_mOCGo_}|ZA~-=AX;&R-AB^9eVlIYZD~lPNH-*B)HzRpr zR5w7N_2(dK`G1xrTMKm@$JC-V_qu>2>$|r|4d1vhBD@4=L9W*tZw^G#gJCXjP|^=e zar+CzwVd?OGnU5xAD5BV6XE96;AHen zxSb(C$#BAZr`RG{zIMT4ZLVEDH`n6zu+#l6kGeEAmVboz6aY&li{JjEy>Z z;F~I2qLs|?bnTkZP4UR-N$FZBT@_~40z{rL&1qmg^htkZq3wm7`=zs=Kb_j9XC0Ax z@mi=#@qZ)-r;iR>^Z?uigsC~i5tRBQ?fy=8rk^shv3G0K?LGx=F?q(Vi91Kl;dPYL z`iMw9RJulV@^B_cD$bCna(J~sR|gjqccU1?P@rQ;Ity$CA)Ru_(t`+%I*IirupGey z(TYCXX7trb5cj?F1PE23W4s(~at=!_Ax;C_w7!Wtgc0m`EIhhaRw z$o4EPH@ChY@|H?S;6XXwN@fQBCV+KQun1m*gwq*I&NB*^0b`-q5hDJKjs<_iFGC^K zteuI8>12-aqzGJV(6=ApAvx1XIxx>h_n@N5&22pT{c%N~*^Ht)zeR9=v~_H|360cR zu7BFaj1}9JWl2!0E8w0#m!dj7r@5AJ?uXda(1G*Y;#zyU5`5&%c=K-0DXPcfib$6W zk%?gh8=$pO+JG*}Vx6@^w?}|C$-kO_7a2ok2DKs_c_5bYyYcX*TbFZ~cz3K5P zsxSsc8yhn{?vzL^%UpLqq%ukT)G^!hOkL}HSQrCvI%ih4Q4K?^Ka!VmSE1+(?tk`N zAc$+TMKvrw%iER){Z69y7eBH1-z2B10pGtG1e+qm z^vZqIF@1F%gbG`|B*az)vZ+n^ewZDmm4GApE62&zscCDRN#I0mAuP`?ZLk0`Cs0?c z%G#8TU*RqmByPJEa`y%{A%MV%U`FogGfuFF7^6PHIWkayB>=|ewfoT zrO1f&oF7^}NL4&c(?BxY-~=J%xglU#;>;Faz%=kelji;lD#bbvm!hBeGJo!ZkC#(7 zH*^A3nQFlnQ5@=7NGsBo)At5yb%!5pN?^*sZ}k=vOd{kEi6bO%E^Vd2@UWp?0DK{} z;y#5}GJs!Ej^;%vqL!~J$C7E4Ch+eRa1SG@K`4)&thq1THMU7G3yn{}6hZE1w*^3~ z7!@uUq~hcFg>k*|DdV=I>VL>o9EO|upaoCu1}?s_Dd6ax7r)TFo}vhWX$ zRMF^7!T-ALtHvD#)>Z98Okz7D8!&tPElC>b-dI$~lUMuui;S&E<@NAj27fma^`)u$ zv9)jNdEaU)%)1XRVG1+_2wkvD~S|%WPoAFBE70A z`J1cI6zSj!G&P70DSz;r@_ja5JY^0GHo`qMze5WZLW%$^B0Rx=fi|fLfwIWdWAxVO zC#nF15@IHNcz*lXuX}uf@R9){YK-}#L+Y^~YIuM8W^t}x7|{k&G{I1jDJ10m_EWH2 zo62g{&eSPdyj2XiPkGq-{`E+wMuB4bO5G|ufQ^g|_kME?T`Qz$%|nWOXshd^KEwb87q|Z| zW$qpTZS!yCs%O+tgoYq>LK4T?Ph#8^A)o9|t~t&PrA9n~Ij_mt^GzLX+Mq;Ce^e0l zw$>^_C=>94N`I)*l%6x9_ifauUgs?+8ojb_j)kYYx}qr*nTNTYETLD}R1V5vt6o!Z znv0Viih_-Ipo$QsQ(enH{TjyOoSZOF&h$2+Z*!QI(j;W}b5e@qx6yY&2ZQ7+L|)Y9 zkCuWCPwE}G9zb%x%Qvo0Gpxq^u`1sz8dpx)YJhufrGErE1R+PH(p3Az4*O<4#blHA z;gn2+C)l=D0$FTdJ?CAKQviPu5YVJU?sS3t4(bYj)>#2?*=dBmkW)V{RJ86gSmI3p zf2$Z_Mc#?uRE50}lZUb~)e6`qgMrspgIVv1q_N1f5&YP;7Q&-`vK0Z*s^Ya8!B-4z zXNH*V1b=AkZ8om7w8wFblI+UduW=qh((w0vdc-H;#OcDu)LgP?|w zmVW?@1eS*?uvd;Ws4rZmg<6T_zW8b&H9hD=(f95X#ne%1jNtNp`qi|RFcA9$m%+pP zee{y(6-`8_Ob5}O4J6UO9f&FX)wRgFw;J_vv+kS2%9hIyIOC8~_F{i*1{~l(N4J-g z!|Jm?D1_01R%)P9Hv_zRUlMO@TfMK?Cx5Gon_T^E>+tr@h|jU)tRR7H>+1P%inmM@ zwz`KzV8zHL@8n4*Shh7|M;Mth!w-DU)jPK`av=ZkwBJ&<4MKahc4efAgp!}ot`#R( zHvPEsSIYHsPW!-79E_CJF&%&iP0XXdndr(6vStzaS@KvFJMRF zb%#p#D7CD>VKBL=666SyQ>5GBNUI5fb9@OILOvUWaQXq|bKc+*L+mOW+j7_XPiBTa@@eK(M08vs2* zM>DJkIEGHeCKL$WJ0n9WHeUC|O%pU#L>B}#6(M<24bK^-RUNd$#Gc7HBY&`HJJcHk zp<)YJ_mQb@@(@nFEepVIN)Qv8y*LAdRZD)MW%*YPb+xF(2)5Z^+noWBtT~H%9e_|W z0GgYN^wJ9xe_L_w7m@%we99tg{rTP1RGTz&w$-((Edw#l{C|_Rk3pUUsg!)i zArtwH8_wc+x$_fb)Cnkm6e80Q*0<6losElUV0aZHjmwwZK08)Yq;}$M&SEzu=4#;o zW~v#@2uSY66)JK5>?6WQT>KHYkDPDm23^O>L5b7blhIz74-&S66oPkmw&;Mf@q_Yw z_!0VqsLTP5;xak=XMefuNGhOtei8zMhpCwkJB^~QS0L-=q83EoppWW~cTLwaWg>uF zsJ!XSapVo{XW1;^$w)(y@gxNmUE{^!S&FPXWL|m6?&%rURsdgT@9dExAR%ZyI6~wO zaHI~jz=~+``K(?jdo2x5L-Tg3gsN9gOSWP_l8bPVw8gqu9Df5df%^kBLchgcc|mcS zAPJWNSE@KxZOyBY1;X#HgX@Gm8+j*naIHNw+(o|!e$cY~f+^e>Hx)D`Xn%;z@~O%_ zb+;59W1SNcbozh4%; zO<4_@@zhBjuzyrMb3@^F>S?di+Z8u|jtrYLGJCBEEFYFReEbeKe*0r%4$!xLa=28n zt)3u(#86cJT+$gA1OH%l63O6T4Zx4z>;MMn++ws^Ub&?26}n5>YWL|a9!%==j>roG z>kS6QJERVv!E&Ph_jQ5>Hlj_StL)Gl)iK>w*mryCotwoI9lRQ%~ytlEH6vUdF-nOjOcj%r`&%4s#5%%h_B{X?O=; zgnZoUdO}1VmzB}NvWT@r1DStJdeb%FA3;hdUX0U0!8hAH3Z$M8qaFtjhjE}`l{^B? z??%J9f`6mG0S~mG6_Jbff4^f}(8rhii5;PUUSbn_ZDooZ!9O?C;Lj|Wa+Y@~x%aft z6?zJz=!+^)<)u8_1riS&;LQ$%*+ss^5VT^UvktS;-K6VZ4uiuUXU|W7!kSr9t!)bh z;TZy3%Dddw3-u!^Ws8KWvPo~U(M$CvNlQ)4+JCsiF@`JBqmLvV^@7z8`$ci*K1`q5 zNI92M*kd$MsF_ZM!-QL__NT&2OH4|P8NJ^8D!zhMq$3?u`JSLELo-X3FvW>UB;aPz z9eBW>G->DU<~wT=$skCaH2clFpI4=DcF9KUx>23IdsE*{SWMeSjpre;q> zZ3MDXuk5(jTm8n!GN`V#JUG8Am{++_XMamXKL>cHlK1G$C<{=Xce%bG6R~lur`9sg zAfU6pwTj-;v1q!cQU}Ph+#vgYdxgF$V#9hi=8U|g4N{90moFn2eR)(J-JVGI$VJUh zrkR{cA5WJ(yY_u2;amzF`lWN~0MG}>55q0-^Lj?7pbP~$27l^{ z0Kxp^vE^!=r*G6?N>h*!@c+i-+eYD?f`-iuLm}&?M1fqUTcT0;E*8kp-g)~#3#~C3 zX0t#&{01_ux~qMsxKFoiq{jcj+lT6vQ3)6UP|dNlZfA%Ta?Sj7vRCAc>UPV&moA3w znl1W4fDS8*4&hOd(^pT$r)Z6NDu4WNvQaFcF89S2KO1zL8g5FXid~AP0PwT4*>u28Egn zeZq@#r^Cn~LBI^4%PjU|$YrA6V{`g3Jvy$WgsU0pSh$ylp8biyW$iMJD}OESrUTL%5E$t!1~PJAb)6-t}?TlT0$x76ohzzW0g-TWgBB_@jF*ofICT<$K<( zznCrep(Yrds(dA8VB7q~O`9^vBbXK|*HKBd%x}&uCcZ%5(0p z#!Jy)Ll#Kl=V&y;+WAR1 zTU?fhGUt@nkBV}sjk={*0lE{qsu!4l^YNMo+dG2J<4Zq%Y=3`XaUWejGtAGrqj~4o z-!E@o6093K)FP$8#)pZ2fTuCh$qZtVas-v2-dnsafaCXxl}63!ISC4baTi zu_o2FGoqu1i&Z)e`t*EwL*NhD(BCfC^H-ddgHt4X8A5|<^$wkKl*a+rO}2BQ%;4E^ ze1mQ8Excia?4hvsZKB|MtzWx z_In_&D5MaGDR8Ez?v{Qtrswz=T|C+W2W4KjWC}bCie)#UK$s{v;B+u;#FP)z0UY z3q|VpRVeMdv}?rB+%#jC=@qY)6a_Ym=trUG?WOhoaPrT_w5AW?th?Pl=66tH8k?rR zQGYTIZ-@v$Edq`gwBEQj^2SjTr-0lqdm>`(HrJ{o1KEZU0$GtnDNlZ*a}6ME-)l;Wb-yM@Ta2Po*bUY8Yz;{#~Lr zRU*h$yW=U$MtK!zr7ne7sG4^fx40sq|32YyRe60}~_1N8_aswD5$&Ww*u;^RwW7512EVQMHq$>z?0o^LR}2FQLi; zZvA-}$8|>szOpNZ`@kxA%YRWnZ;OM+@kFihmnXlQg(fS}>U2z;kk}jtHU02wcpI>P z6>hhMjgaT7lhG(uJm)Ot*Mg)p#C-{%9HdRDy2R^m|2AGD1=l6n6g3cfLYc7NUp1|3 zX&s((FLc&Ow3LJ$U>Nno*NO1MJ$zJU;t6s2@X4N@9`x&N@CCJ5b$?`dk{5xKFYh&l z>bgWrvgq)vl|l#GbLjDE^$%k4e;K;|BJ8j{AAs50`tR-GUS0Py(Yd#%Xi1C;TXBQj zUSM{y$sE1ufH{Lr88>) z)!wpT#1!?ykzm-{fPWuV)!^b_>A{gk`J-c)8(!x*ms64?A}wcKA|?9fU;rQcR^)S| zs9XvP@FgZ`ii0VXn#$t8|P~=XJB8KD0|NY zgo)qrD)JyLuh7qL)%^p=--PkMI(R340-FrDNT)|kl4DukDIMI{F_Rm z#oCg4f5T2$&wnUPwxAyr?nM@IQN89{D39~$W#!W5Zny`j%3Ef#(qw8nDyaw#T0qcl z>E4`RC2&@bHQ1>NM#^oCuO&Lx$W%(8q<437Mz}E-$2ymv>h$U6cOB>ygauQ z5{dURUN+yNCZ|eTaoCn>>+r1)Fw-M-zBWLE-KD$}ki*aj?0h+8r>wjRzi7bd&UII; z=i@U7!hgu2Eq?Mv^3$9fd4{#*wgqXRP=fAR0HoXdn5uCcpPOgim--W3JMWHM7W_RQ zC>PY>s`vTnmb^bQJ}w6%8yRb(_g)Ot1yt^~W~)eF{$>U3BImUm!Z?Af#N|5y{m5OE z!!5#=xzWkRK42R-Oz|6$E-QC522Y7=-%vBZm4CjrhK`2WC-7Z7Sok~nv$aT2dW^Gl zL{^TVM(e1V`L0CzF2PQv!}4i|Oa~giZsReM)r6A0Wx|QnhUc;U$Vpw>ddB;#O4i?N z{nF=f;acZY$>3A^{cmYA~M9cjDw-CI6H+6fy9ZhBpDMAEL0ehTpfhS zMt@7-C+<6T7b^kb8QpS=Xo7eSg9Qb&s?{LI{1~pSaygrYjY*0h5S8tW=c-NI<1jf;&fKB(^_GhdOIgbXWC|$&~i8zs?!~r

&tr_7GT;|&w-P(jL5%a3+Tm4;?J0auvkxniM*l+RbO+-}9^;b`KT)a1Jmg|` zVQS`Vtd3K7K;mKY@KGKV9*}$t2h~V-^~>UXP@6bQix^W}olmISPusa%%!ky*UP2YOj;L^Pb5%vc+DX1w zTZQ?=LmyU&KuYfCA^u}j!@muOlYeQ6a$hwgvS?;$g8HkrbfP8>$xvDeeMr{=w%>GK z$3|YDNn6}^t|d;Hrc#R^ZOlEtRab4xb7fyZtqu2sfj|mxQ6?_!fcCQ1>Xs!tPTjf1{($}zAwUo|+;uYm{yV`{$*$eygViKdbpQG0r&(SN-9=6k&h zRMO<>XBsCWe-3L4NX)YYYK0iBM{%jd8GILphV7>QWHYEPf_yp#N@Or-TfF^LVJj_l zZiw$29q(%Lcdl}43@DvxHozHJ{OD1QyhwiT3B&?DwEp^9+dDGke|#Rr<64H9w#5(!Ni(v@WF)XWS6fES+mtz0PlLlcDz zj!w0NRCEB8rlaz=AqqG4^6_dHI-RvdPrN9As@OmjrC>iNRn#^Wo$ZF8M{A{ z zSH)~wc!W^}2ya&|p(Pl9BD>0~oBSKh06QrSgEuo2EQhAc*%2fjO9 z-2IyMet>|xyFYF%FVUS({6v2k9W910LlJgNj32_b2b$M5&q%7u2NqcR-w z0#SJAj8}2SVXqccFn`qHIv5xdeEVQOR;+-gJHMm1fEX_Ux7Xs#5`T^FpBPQ$zP^}3 zeTfO~ZQpu^%8dipH!A&>;x9tiS)_1nY$;FM2cthf%N3~f7TWAc72Ld}&aPFcne1>T z08k?DZ}EUJ_b!85Z)6!meUzx9?g-PCRA4C_ogHOYbmjBwU4MGen0R&yzn!(Mh{-IQ z1&tUjyC#?I;n;qK-FIL-$N;;vp4YwzryPw-=0Usk{_i`>6=LZtRZ{cOx z_0S&QAFSQ&^%4hCt2sI8uf3EmF^HT#elIpfK3hZv`^u2>hq!^X{x-IqgGz*cwms%R zxh`T~!k5%?dVfqTQa{)q&H$nz{l->(lnM0hMwRZg$|c#ZnQEdR^CwRf79zr(3Qq4#)-;yd39Bf>XfZr=&~ zT(T0byu1B7HMR}Q;^NNvD+k@ia*l^*(*Y6 z0H@EQpvyUh50nb2s_hS#B^g*OEMKRVXlsC8g~uxc6z4K85th8_6UVJ~V&BDb0dEWLtR7G}$g}#|aa4X4K%6Uxc3#C;mST@5 zP!SN z4EpQfQ+nV+mvCnkqOB&9I;KOjD=qN?RkrfS*MDZ0=fzAQ{~>Tl%_uCj5<&4b-VlHD zocq~?7jdRv96>N5J(Bg+f{4|j%yvUw%!@ZjQM#s|EvLq`X+{dAv&P}_J@KNF9Rp(n z5OXXALY^Mml4t<0vTYoFRXV7L!7o}}galnahY_a&QgOebfafP8YCS%2VKv_c$d z^MAh4<2Z00*yAHy_jAf@%KyD|uY=8257SQ|>#Y48_lp{%?02_1tXVU*iv+JR<%@fQ%_TTDpJ_usr*sV6H?X z&%sUnXmcZ8^yO&O*j?(i0yBts3RaOBwC+zV&2L&dFlklh%r~)^O&QgbIaI`0&k8j| zf;rcJ6}#JZBrk*u@KJ)Y9l5D%%5RGii$?k@B&E8vA8kj4wywHY5lB@{oSgk)CVyXq z3;yAineO?jL+^|;5$uLJM1(cC{8-Cd*{6$HnS|Iya`9Wnl_t+$9G}mT``&gD#hC{D z+g_I27a^UG3;=&68I3tF0}Bj>cJ@hFEmEoVjnWg^D2 zNq#i$^02pSax4$OLBR*lB4Jop?oA`+vdEz2_{ni;jfqKxaw{9DwuDL)1`QzV|Mj2} zKZz-ie!+A4%-+#=4pudzu z`SAf1aPjDMQ3M)$%WDaBz0sRbLHBGg`jRI*w>`bxA$*4ZCUVG*8PcE@N(D~Wa}bWM zT^HP_SR3sM+sRV`T85H`JZoclqQSl^YsY1iywa&N^Y$`?xD% zN8>&tuMFTgP#>(j0e28IfPYqzJMX2Q1=MA0%2Oj3MuKve#|e;s7dio9G%dzRN5cHY zjpy~Sn?`NiotwOM*NxdXI;6zLi}GSBYRg*SadgX@Mi;rYS`P_R+A!fUKYkn{=enZ3 zvrAgE`#)L&``GP@pZUVA%$lmdEkUA;CG3SZ;R{?Wk*%d?&dmB=hkwIO2CGo>h`dhA zEub$&@0m4D^Utae6~WUY0;oP44gY%GLH8h|(m^qMvc7inXYi?*gj-!rKwA_1uuwYM zB0DG8XKe-1-_OMX7hD5maZTTnvneowMHT&hHe6hw=1`E4%H=27Lpj$hFe5oOjh(Ep z8@B2h&wAd}V!9YBZhxv`Y@1=FqEpcrDkjpQ8Hoo*QhGGn-TD+u;i>|JqF80*XF1SWjBa!+ZrGC2E1BmGb zq1E@1``i#Z)k`F>@H3A^2;!?()$_8D9nMdA;IWA$VvRIGX?f`EtbsDs!BOF;3g(53 zSrp&OAS9UuV5J3Q4uT)9{O&|;s74h|1501#0GsVga(}hL&QkG#PboraH1CWwGX`8- zWQakz`Ch?(Q>tHltNbB-KMNbXP328BPl_SU)mqcp9E;eSjE_8nIOQG`Cpr(&DdB&aKGETqG@7-MdaroH|$VoEUOB48{w4M|p9f=3(6A23! z6o2Nsq_pDzK|sF0D%|tC5>qg}$lb-^A;?V5Uuka#%wEZf{H-6w>)JaZj4ytt;ir5+ zq0(s)WhvR>L&Eo+zzzPY@RA~0bc>;g+vD9X9kb@A9s+rb^tb^(nT8Lxyms3{?+6)V zg=Z}co@ZNM;oebvg{Z-LXv44%kk1?H100->PGWzE5ngLBkmJs}(r4Pn;h{B3ZEo21 zlpQ3T%Y@RYeC!8N%#>LRaE%Wv_ixGYzAxzHDZKYAl_7_XfK2XZlszwirg`!~g9w{lm8eWzd&dua(yX3?xzNx&VCffZ4a z$Ot$?`E1kn_Ul2LA{MH)hk%uDwUMH%$hlus5@NHgc-E4(G$M~IWE2Jyqjf$;D{ zI^h~w^}{z?u<3IJpK)ywe;B3J>84r<{h5Esd8-&vW~u)~A$XpeK$QbjY0%^Qo_aWy zK!%ZF-rgE6w3nkM-%Yf%GwNEDb1P-{?T- zjU}9Ly{`nmLQM=s@iqAVt13^J`$gHGnYE3(p`Q`;cI_P z+;N?ZH<;IIvnGjrPi+w$Z1Z`Eqvm^sjRZDAI;C2bg5=5wwkEuStIup^MD6na3fLUs zM)w2AFsg-3sP%XINVurTs8S*_r^=hH+_4bCcX6?ay8yX3I1z@9B&?TlCKbzzTxyhA zA-Yc!M{-iM5|Fq>gVN#RsP6sKKka|*TprB|y99}QArH+Lz-4gsS7h-`NPAv7t;nb^ zf?fAZ?mXM&z-AQfiFAlu>v}e(#2$rQMI(RAjMRDGr_op{l?+n_&=bs3?30~9x%%^d zYxR5r*VYYOso`P*3n(TIY80JHgi_6~ZtRFeka%%%LSdd_nOxlaubxyepF4j#d5kF+ z*Ris`ZxsQr@Q4lP6LM-Dqv_#VP~|K16uO$X0Q}2CcTgPNWKuHkI_}IFmTPD26>S1skNN#5*kW^#BtCov=Swvp?P<$| zG^;Lz{`k6XosD_kdMiPu{BVB&_m&`od+*(wd+(3y)d%mFJu)B&GIdO85M+uxl)pL| zhOaa_2fzVP{meXDmUlMl%_kkC3YZua#yg zP8i4RQT4hwf-~qi1*+#L7vP7h)WUXQOPN?`V6ToW!gxodR8-Mhgz$e*L8eDE%Xk|r zabW`Sqi5qS_YIP9|BJWZ0y&XuLc8quXKS#Rre54m2_1LK(i*4MB(ifhT7zo1n8R6@ zo0LrGK+~1jsPjzunY> zuOp`dT(Mf%qFXT+**$+{?Hc&x{t>xjdu~D&Gt(a&u4bayM~sI^C*gqhZ@?`f;O;d~iM$5>T)aS8o8GHOHBq^Dy zCr!Tw8*4M+ve20}nuKjvh8RCS`vnl3)CfwGqx9K{D86?svps(zh6D7v%Zlu!4<0#= zT+c1)*g=z-$gIOf;0vR|RE%%*somzHLKA4?$o#l?&^Hkgx3Gk5IsGU>^bmAuL?x<<3Ow3A8vorksU%;w-&S z(S7k>_oxUhZ9=^iw`$U}0^^<1TY@c8EYMLzfUAN=)N+6LP6_$=B3u-eT{=S+_b%ZaUj0v5d3<2{zXYJS|Gau5r}fj zuw<78(N2Fd3GgsB^eN+wX&34f7$Xp87ctHy^+(Ow^UbCrTl==Wp#z_@ zUZ68?$q~{qQIOo_LeemCihc8>a#>5KjXx74Vm8L~m7IScJ0djw44u5~^pnpcX1{Mf zi^fLho#06Qe!%_sb7r?cHYydO30h8Sh-*cnY9}C<-B3a5ZXGSIE6wcK)avf~02ALZ zR<3{b?t*>Xmli8r9a6=`0Niqm6x$(UafZlEjq1(!04&?J(U`!461tD~+hV)tSTvUPzm zGB=rSNJ;yZ7naNqiTJG7HH#A?F5?M#0m*-N@B|W3c58B)ZzH(x7-|z`lPc|zhy1@Wri#vMq)bolrVg1mZb}>4 zKCy}qM7)Bd-kePr#h8&D78VVOq=i##0?7NNSf2-}=54P+vayXZrZITAF=)rbEOWX$$3d~do+SA)Uj=y z0#?}D4iksCco{YjSn+dcjxYPZ()$d#3HFhjV#-tq!X15H4OZRaju62^AzV5LQ?!C6 zFI%a6laJUYJpy3_;`PX?LJn4R^YMRzgO_Z^x*UsGCG;`wXia1^b!Zw_yC5?Sh?U!q zAQL&@Fpe}4lqYFc9F*XKW_dPM#2d;u7nt9#bC>(OYrF4z{~T7gUX$ZZ3o&hD-qQE{7DRb?KpuxaTJW=&rqVh5N4 zJ`2PI2r(HD_yf!bAukeN7w$9JAf?ZLpe(pe04x3y<1iG`qWFiOi)k?I7E9rWVVPvg zA3*SaYN~_8r|BEiW%2}0hV6eH6?#d`6Jz+LR~Ny)L1@Q7n7c5@RS&dal4Nkih5}sp z8Do0}aX?CiQOffsyPJz>tO8a5UNNgFK4N~9+Ka!OtRp0ah=<8;DQns74;>%4+o6m7 zP5nX>xZtk9;<;RW>Qf4Xv6flnKz)>QY`?b*kD{{JC;Il^O!1*J5wd?Nu?&WG&oYdk z>09lA2l%R}gM|b8G%gn)Ey4V27nt7Z2WbhWR|*{^0e>@1?@_2Wu&!M@SM0|d|-^j13X`4F2Zq3@c*o(OtsMJ3|_~J3sISyus#B%h@1vk|Cm=2kK=Q$#{QnGm(QX6bN=3!kC8w@5@sB z$)U%7!A?2GBIPy`Eg9jm2Ah1fM>?xZ<|sHS^kT&9g!4J`7aevxh(rsQ*JK5Z4xeJr zx$+3@yp<){v7XsLj*WmVy064JR@NWTZF~GGL6=LA7q0D@nZA!o6`1^LXa4-{f^pw! zb8I||${Bx%gxfg<(m;n<+K%yuSd$?RlGxJ8M?ABEso!fODdyx;bsrjkQ&J7^sesoC zs<)l@sKFgE8HN`6fe*wF7M|Y=++!j_F8vTe%5AQ)!j;hUtC*o1v*J*22nDr^){M#V z=vh$Dihd;E%7zP!H;{i7`|RaKVuW!d0h_DvPgH-kiz1cP?)xnqP1Bh&>SxI|##*yz zfGO;jfbWUG$9hlUZNu>Sr~R{0$({}m_XH5+mOaqTd0^=&HYez}Orws9-6fqMpJ|yH zEC(c}Lj`Ydh{lK*T7+cCD_gQOBute84FDRU^o1QGE}s56m-%hM0v%*!&aUg^T0^s3 zr8a*eg}ipdg#$H#gEQYO0Iq6H>^>7cww=awvcNzVF-bNyu!|PGR%G>ZvE_L7;{rGv zMBxX`>U|~dQb+s&h>?cDKP;7UmsinYh(?FeAUCFyJHI9cbJrZ*~sUIdS-E+O+qsg$CZ|BGv$AIo*N+#MEfS;V)p4bom@55fHgKg%mfp` zS^naWcyzAUa|6uDQ-q1H(|%r<5mP*9Q|Y$ELzhTB!X{*geVnjK8w{t0h>)P7V88N3 z9?PTAbO*&lh2MI5Vw9{P9NZiI3)?SMANZV=vV1kjZa4CLkJJgdbR6YrkY95T2>X9! zkfJ(k@nnpOvlR8P!XY&C)U~|knE(hab|J5f%4qNnkgX_pa1>qSw2X+ z`mdDlJXRfZw@}PqNt+(}0w0j=mM20;m0-&wv`03A?(=*qAzOSZ_zfT9*`T*NnpMET z(W?^uVe3mhc8%5M3(|fp=;-CL1%`q^Z1h+V)fDT=)Q}#?u!R&+rUoC?$HpHeIgVf-Veg`z4}3&&+(*A zAcn~^w&SHUYls{%-#Xumw!h!G+5R{1S`Y-uPTWjLY7zU zo5Iid6i+Ww3u-50uL{)m;$ro8o3$5c=`<*1`ed@F5hPyNUq zJIFf0#1|co=~tBD^|nCa2)T&~;yq%vcA29ZAoD8tq z#Ga*_D2+4DBK)9$3GvLyCefuNx87j8xG`B+qjp*%u|?ohYH~}K4m5xLc&(_wE{gQo z@6-B?_mzzRi1?+tQhif9Q5li8To^;{wX`7JNyOt|+EgD$HpBxux5+?idsz3wl8bmJ zRJ(EF2uui&Gq`X1E9mCadhnGFrhZZQvz{dWrWcd-@%yWq~90 z#g^u|!dFC<&3S7wmY#oyBt&Pgb+@np^k?eVU5%-&-qI?&;ZONhvHn|Dja4Lb)MjaC z$W>chy%eY21OS*%79W}d4S4CaiVqjl0$-y-5ZP3U|D3edg z2neozv50!^Ihu`16pYBj#j)Sb|Ac|S=*woMG?zHrNZZ`edVYTyK952=q^f#8+qame z1zL@#oV?7ZYH;yZZo)v8>EF zG9jbZZ*!@>an__kR?|D&;4Ym;SeE*kiMv4BOH){IxVFx%3Rf$mMWE@=(e;{mVr4Ee zpFv0ed}$N$e|vv0V4A;9ci4YG3Zy!fR^X$Gk0`WoRihbirw{;62nM2=ds<#9=ehl} zck?B_=SDGBQ=sH!8PlAhMs-5D{5*+N3l}JLhP)fL5$9#ft0fxQ$Ae-B%X+*P znj}Dsrt}S z2p`BJxW=EUg4{8%?3_d)6o3Zvev@u%%Y#ikHqF8c=<{2>ANXB2KQ{2@Vy84AfcS?`YY^pznUMkKXfw3h@5UTe$EmBGgR z4HrrAztn#mi(oj&RXT=4$lA#ZQyQXpL$?yp+ki~#szkReN%XOr@7mR!iXh92mkc8o ziHXtT6xmPI?}n1luJ8NZ$qWYZt0vN}(;9H^cUgC9JZxyG0yBwYEh?UDxS2A_XZVlU zm>8i+k+kT1_GT<<(jSCk{HCA5W%8IvxsHa_oG5y(4;Nfk3J}X z7`&Q3!9+x$J8PEJag(XcrrGpD%`14}<5?>u99mVrAa|`hq1&}@S#xoist?n(kt9z; z{#>wtM?yXhfM`XB?N@@F)%>YZJ_uN;jl=I_J*%dBoX|_wBKXYi^kPj&g>CXp`8E(3 zWj}vANkad?a&T6etAd2k{|I7KFwPCXIOG^UoP{>+F9^T=bHh2`DOlAvn zcf(C{7rK!&`HM`-0Os+J)n&MJp1RYkyXUbwRUA$6wHT5sSe)Z8b^!o~BsqxbpzD~u z@NXf=qJVpt{6T|M3MlHZbO&&gW=J2zSt@^3T&%hAA-+$~?sh*173oQYUh#Y$Cn?NW zjsYk|n-QHC2d#bR^y(Au*AlYScEV8g+b$6z1H$L=n&PD$r?|PtC)niPB5n{@L^KB? z8`=aS{KI?zKMM)|`FZv9a{xy2(ZU^yB~K=BFNfk+P_P_(T#o1y7k+M%dLO6 zn?h_1H<$&8N6Al;=h?N*rIjy7dw%bk;>C7i!N1eLBNjIW9>ozdfqd9hY+KcpPI20- z9Pg+&D1Me9vE-P%CnM;{_|cOi!}d$SBo8;wsQSTo6yGYe#-7{&U4AvS)^ZpdA3g0I zjw$lR&WSa$#2S!1xTLf6-RITQ1TlYs21cjF)osS>nS74GW=vU$R}w9J?85p{DWg_a zHI8_S(duew036mQ3XnBNVv$ESs|uGXHD`-wSdMPt>SsY|bB_R}5c#vfo;t)xhJ>&? zE}7!U$EER9lWApx_ZAV_e0Is~SS2yrF*8gg9PD^bSQyzJ0IOSgqq}dQv9<0bPDA2;tE^tBh<0 zzDV)7&mc}}q)UA%>df|ag@k`6v=E;t;habgROGfC=+l^OF%U?ZGXHIcVVJ+do=E`b zejMQGj)#~G0uVgsmfnv?NJZepu5wvfjE}uUA)5No$TN-jn_ zy$<#?@B;v#?r;dKt1fanzo*$DM@@`cSfM>MS+na^4JJ2uR+L0or}2M<#!E-i_zl7I zWxbaCj0_AJ5lCTZ^$85w+sX{ApQD)(Ia%KvR#87@oTkl~K4}w#I4Sb-kicIu&URK< zH$<;nKHqus1Iw@xgEk6WVKW?3f$E0dYLT?|^LNXPm6DwP3ah6iRBfECuRy(-C|0=& zzfuzAZz#6<)xKfneD8nEnSG?~>_!T%L>2?%2nxw<@ig}n$oTHXwwV*qi`vl}zMk9( z_L{wc)Sx=tC2G-)o6!UVpgL^3xG)*wbh!t0eV0iuOwR3XD|4MvLtq5ZHl%=r_& zFIp5U%y*)V$(#cl-NL#9x0ox-(K>`o{(iq!7y+OPboi<4U#$20GjKHiO0^*qM3p%o zQ`(!M%B5Dk5_1s7%KV$XsB1 zYT2eDaIy^C+EROcGU+rLtsvK=Djxb>9&2B4_8_uCq$Yozz=>Z9gWQX4BKyPIHKxg0 zJgwm=%aF*VAB}fxtXF4kW}(WUvPe!#%DAnnLa)lHf`aslg_vU!`0%9nP|Ny3)ZWOZ zake^uUfcW8lb`7RI0C?%fQwG{RMPou^&+n=vG(RwHn7}$K(t*2Y*W*dTb1jwHgh5f z(-re_0>OWK-`Y!=U9qk<`|#aI*do7c>&_6uF8nukF1?B?7ybCuo_bU!f{YCqcIl~Y8^X>w1o!h{StuPvmeqcokHpB2(LG56KJM6M19IP^O#5p$32ugJD+rXd~ z`Za&?=HQ)bDrRko`N9D7LQAotYrsAigX+{v&T9wblY!gvw-@fXmOJGvjg=E74s|03C^7?kYaanLGF~B=}MXK-Bt?B4gC*j#27WGmlj465OyH}DFW4Wr_XUz6#Ezx@mi+g|K zq6pLDCdjW|SuNRGeoR%1Ks%Xpt@vV=2sQG_`3CH3x|z`;hUjP4VH@qTlXB*QHGB*h z&cATLYxZ}`h7Z^7%VBi;Xqr|3-Re00WW8-?A-+Es)q`-=uHz*?`QgZ^{OChM6Yv$s z6=TV|Tt7qLqZ{v&Gkt);g%L#R+6{j)eaby{4~P9GifXuo?nfd zB;MWA#Rl(xU|@bBEYZzk=%b@sGH6Y z=I2{uA&`>i*T9#O0hKo_eWu|kTv4c*ZPErbJ-nfG%|IO04r|!VD%jL1 z`8MaHYI7>dh&V=|OrDh>S`aMH{dNi!u>_aSS=MgmBBp-4r9^PsGGKJpN>?$7`Jy)G zq?rr=dq+MJ?sYn(A|#~Lcw>KQa00BgZfq)RdeJ0l-s*m=wg4Rgp-5yHZ3Jl`&99Kv zhXVPRat=j2zH4tq*N6qx^Tt&Q2fRuw?D(afedk+1Raf{?I^`3$h1y=q8*OF>LbB^iCgq7G9uC*L<%G1wXsUlRS+~XC?Vg0f zIEzM^biCPJW8NGBEssa}4UYj;m6 z_0DyjmBl$SnGpf9C_#L(HU>GDG`sGppQ9>M=pg!f+)HxnTkbuDKds5EwZ3f#C)a{G zl-+x!XDDE@%lvuZP2H8O<=pGZFwy$%D;k#=)97k7m)G7nZCHQSFSnMo5Y0^Yo-^cI zZqofRmfAdhGKX>3;m#$zum3Q+1_$l zF)kzU9B3U|=N#$XE>)FK$+wvpl!!_l-a1vaTf?Z0^#XsAqt}&FcYPdf%>hJ6aUa|r zRaEnV$W4O1C1tnlC@Z;o>RysIL3W+Y^#i*CJlOSdp@ z-+%o3z0@`G@BjMGb07bs%zoK_)ywx`74TkFbwuj6Hpn3 zahU(7uy%j&QZK`>&_80L^4Hh63#;jo`AcGkVPr307>45;hGAfzeDwEA&=mTUtm}V3 zs=vx$s*WYysgTMa!?4KuhqNCI!(c|zkz|FGZGrxy-|#2T{WJ6rlyOqz!PCEH;4q9t zY7E0W#5xX0{#(*W92kau?LSFA`KyumRe2_Amo$G87)Eyf0g>-NP)Bg@#?B-d`ctZy z|7Ioqslo%leNaC2$Pu$;81^>)`TX*ykXFZz z`B2B%Xj!;vt)(k1IB~L>>W&YUEML%l#1%?ot`WOo1`z~nFXjSFj&@%0+>XP)Jzpaf z1#+-!r!1;%8h?#8wG^MSKH%UL5%R}+^%3rbBr$13xtVbsP)j#*O3+h<%rKjezg~YP z$@AaJ=$3eYkI&SM!4%z#BCp}_R}^!Nhk2|j3@yWXyz1TsDwG*d!WB+7D4s{UNwG9{ zvu$qJC}lv`;j%MLfZHN7A%?z{__G6})$b(Tr?D_zcM<`!&V*x3x(Xu5ko*}1oD(w% z=`f1UdFTeUjgZ*MlQ-}p6bh{q0qlP<-!P}^1tr=Hm(eob53as|01)Y(p!V06-LD*v zFgxL53yXcshrI<6Wi9GlbobDsRv(T%!edI76Zf}`)a7nQEk&kt0f58eSO4;1&}tby z+>f96Glq_@LrOKQjs-z1fs6!*gZq^9tiqqcyY(OJ1NsTNMi4-`&m9 z!=X0NAvlQN2@@kggT$K9bH3;R2DZL1H3l>5jOeaeT0kQ z@R#=8(YQ1YjdaePMx^}&>Q%zg>8}eKwu^4nftp^VFQt^w;vgTV{aGH7pfQL~_9`F1 zB=(Z8c21*(fZ|5TrG97>MP+}7CHX!g1g&xG0dM&4hWG)`s}~KZ1GO3t5-OXNCjJC- zC!j0gSz(kOE;z(NQogY88pV8Fk$%q36h!?K*OCx)hE59>glLskt^Q(z!+2DiKHC%+ z2#fVB;q7;Zrpp=Eb{h|q?5f}YqMxLpMjg9OMlv7zQNBOYP!){cR}_EBk({S=ea8vK z>eZcdT#>x0Qsk;Xj#rLn>ad2jWS)waUnY8Hk075Z@4VoH8jV{Pr;aRgw5X*{v*Isp zY!T$G(%{aIQZZ-y{5|xJgiusiGjMdcp$&U_26&!dFE4I|bVc*>%qvG`*0ucb38f*v zj+t8|X-C)N zT7(IzSnD9)Mt$?e$dZ7?j(U`X@rg1#q0gOgFr)*QD_4IbFpI~PKdvsS4n33m=ls!N z)VY=%UC^DOKjv|i@W#5?COS_3@#pL=@!UBF`Y)NO@UasJju&2S$bmw7QR$cI6S9?sXWebH3s z0Z5kan2tACxmULU)?F#BTK(V$I}!mEXPW@_nrDBYA4710InRljUr4I2p6yn!Yi@0X z^4!F}0E%x*G>pJ}67`|vnHC>to>oQcVRutz>VK9IIimhDppUNHluGJ$J)ifi!9hRG zM|`>+RE)5v6UPB$q_V6gM3u$(+fE)4Y_v?$o?Gck)dco55DqaRr&h?l$u;2Ss_Xqj z6lQ<6l$D`kcs76_ zmNV82v$z`pvt<&okLB$LWHf-rgkgW2oRC0cRdr0^bUeoCS-^eM=s^0p$h1oiHl*>Q z|GA2YONtD8~3d=A08Iawzq zu16@JlCDa!6hOPM>%yX58sNyQoATfxz>bnVG#`dkrMU?VCa~5gPz+=LQY84zwv#y% zAqwybBOqSc{f6hn9i8)m$0AW0Nf;ZU4@g#)oP*k_^1y0j0)eDVDC&RYCuE5Hw)a)N zSSLm&h*Wvy*heylnQ&n7~=cE>G{Z$kLYd8 zTHxQz40EME&iNkD-td3L5XZegIO+%0jcLO@(ze)pRpgi{;Zl#O=lr3D^YTH=S>vZSYp_>|;tbC7;i)!s_vD z?|PQ*KQq-*rHF~boy5I|(!3)u407M*kI_+D`tbW7bBNZ{Zx%>%Ih52c5%3AW(X!K z>Nt$2QLr(pX5Mxj**r_mc5kRbdS;v<2)wcSt)rNTU4E2yAF)|jD2>)s`e42pqs`Vb zjPUl+?md6KYriB!0vnOQU<#Hujrf?c*w;v|C2lO-p<@h>`z{Eq(R6WEv!q!hU=ixw zd7RO|vHiWkmP&z?D?LKI3U|+E6oxwyWqY{58~~cIzv_>|bdNGjzR&wsyVGoFeOTR* zISTz4+Ac?3f3WbT`ABh^>R8rw{tY0w8PFWO60CnPqt4#+KK|X!TwCQGTE*okNxrKQ z0aWlu#OWvt!xB@(b6CR}B}2D3uz{p7rJgU1=~2XJT_Pw{h(eR%oFx@1N%%ey9h==_ zF42H_ZJ#Uot3%!5kL`snY`h>w{YC0=6)BU&JF?heYFH58KWc#+U$XcIEN-*(+xc{u z6}W#lw>GeJIPhbQCk~Abroh28_e#dG#&k1+%Q}R%Y2MUS3 zRAKouf$OgB{H0BR55U=#07NODka6Vbe?x-;Vn|S8TG@R~ez-%369`@$h>{j~FrIAA z-+*|iUKhB&<9%|@V7kzdeaTe3ltlF-pC5nJ+0e|;bfHryP$#px9j$`;F|9qrh`7{} zr`Ne#7`7@s3#JFr&m=(Q{S~agwBO0uGyEF6p$$S{SXhy`XMnB#X_jb~m*EY{M+H3? zTbNqRJ9zjQ<(SKB#^id41Y7mI9T`8%yLdqWn76qYE}PO<{(U`_9#B0YBA0t&O4xs_ z;c#ut&4N*-_AiJE9wkHQu}4kX2x7e2V4h0~;r)>o?wva$0#AyRWYC7n3Zm#wwpnu! zoad9VfFm~1q{q~Uc7cS^5Ok_{16p}$-2%j2hYV87H6k+>8PiRj`Sv9j!qn*V1CSdR z5YZjqM4l?uV7|(NtBFOOPvvSI6xM&k=+ju~GIW)=3FOchDBu8@aXn~u@e3iaMQqL3 z<0GC^Py2J|Y_lu6x;88?IyQ4@+`b$VO01%XPr5PniVr7Ep+VXWjo0guY8ns#ylI7T z;^SIe$F^TJ%d34oCtHR|zl@aaV82CL&KVqa6F~fn-g!hjkxML9kY1Nr-sgXr9*>i3 z3dGEjD8$~Sdn6C?8%RzDYTQGP5Mm_#3*)#I6+0#*JI;Csx5NYk|6x+637t2(hbqmsrjX_%C3mVvSI7?<1EWwZ%MY*Z64HuWyLXDrb z?WZXxWglEPdg0B(LCHNBE+>EbiLix1dd9Fc<4L&P=|DYcmRUMU7$z-akrNQF(~UBS ziZOL91l6A#HG?<|WqUphW0zk0IYH)i!b!;G%8vx)lqH6x&AQ^c=1Frt0{Lc$r=FoqtJfyJjhpbmN zqwL9F%M61dX8oOEzJwCdSi5PmyA-~^rMZ>J%E&X_1-jCW@M@2^v8CkBIQcGs zmxod0jI8uPajjPQZnenD+fHl?-R>4f(Y{IarTb5+Q&I6XJd4bxEVv&@Q*!Fy;gneY zYwTb8rGG}HTUA#Iu=;<6-2t*c2bg>zqfZ17`9uWP$DbMq>mdt3us=k3ksb)#3CTT^ zeZL?Y8$46^sDJlW*Sv0Fj@WbfMr3J`I*<2VLj@87Ykl@|3n)3MdUZVO1;x~XxbJvR zWuMz6RF3Ivx5PZLqf6B1!jP{({GOg%EJn}V0pBH9AH63I#1Vg3Bl?4{YN@NC(Z;lE zoiIV<{)n_YPD@gK&tQz}DZIf|fbEZ~4FqptF!=-*kPfy=3`)q;LYUH#k?IT=AHkoM zh-nW$@1YDi9!Y})3TFbvZyGZ<_B~;kTZVpuLg^{SO5NAEJR8Z}`8=GEzT9i?U>!5< zN8g56v{Or|qnLkgz_s;kH<*YYa+RUNi9vs6s>letLWp9zkgZazQM@hNcu-Q@mK}K$ zQQcH%p+5?$At=<(dfw5^zVEIT5t$@ILE8>j_lAATORcuk1x7mby~%D|jd&)nfz6RG z5Ua!GLcc2Fe&wZ;;rpCA#y)sAGi6IzZbKsNi7Jp3>mGmDF{+0vARu@`wOLCFry(O1 z(0@W7RFy58wCb)ZEIX0!U$Q#4NMf@M*Radsl)(a}?ln1RZam(2E@i^-k{*_IC6*rA zFpkI=8;N4hqXbN`j2zx-{q4vN+Hp%fw`_&s zx@l%_sV{%e;<<=`0+*0}vGP-B-~iJu6}K2)zr&OYc?_B|qZ3=ZlA7bhLKdzpe5LKh z5(2(5$(mQ@W-a6K)Uo2Winc1?#(_}MKwr!r;PsK}P0&4hzBY#ObgZU*WLp&b<4;)n zb=%WP1r%j~w4+sa$qWJq$`OZ`qIY(NIP0o(b^?D;$Iq$u)$BEw*(L4g2j_=W>fJs8 z{qWu_i;+@KIq)PiErQ2QV;p!!T55#_fDa(>|59`oNfrV@5IqnJ96n3j-Pz&p&gm!S z2EFNy>a5Is#OG+Sd%D)dOg}~I#n8>1>)K;aiJO{Vu)=K+kpiekOK` z2j-h!>oko#{}E(%RMeKF?EC0W@6;FJaBhDQy0(Cb(f@gyL0KUK70jRU5e6PtPuhji zNPy4riZY#>aAmh)$*%U#RJb?ot_W))$Yf(7o7c&*!dQqi*e~w%wE$l1a${odfn^wx z#8YwJH*8|sNI*oKytEZsEazTmbgnq1JAobey#uK-*l)`9*0J?jcnO{zZrrzWz6F2( z(E;Fzkaa;jq*rSJh)G;w)Vwg8uQ_Vts7=4MTLfYH_O>DSAJ(KQUH@X9c6nD3)reLz zgKCRw7TDNwr2JC8i~g(#^gUETEm8QH>sGvXLC^eI1XmG(Hs}WR`c+kMr@#i#gQyl1 zjfhRgwJw6YBbJ=4g8$I;rdIE5P6>aVW^_2OEB%6*Y>lTc9Av-vod}jEFZ|~fltfS* zGxbCWsm5}KiGe=0J}lW%xcCrPy2q!EurZ$sd}u?IgG2D*U@0$UW*)Rghm1Z~ql%7L z1PH8E?_H$WR_KMpIs{V5D`!fWl=9OQoNR#EIq0!RUCu0vM-5bwIisf_^LT%>63doo zq8r0SoQQHhbkTMccLxl5J?o0Gum|fkciR#IVKd9)9K^AT0`2j_+*;lit{z>qJoImN3;ZDaO;PGcVe}@jJN;NI;Bwm#AZg z`*^fTM_)uN)xrC)XCUWgTL8#*O;lk%@2|(UBPbY z`C`~b8FSmwO-Ran3orXgR?!bbjlF5Y_+KLgmSofn)9)9a5LI~Y1eJf#bCPLSDD*?S zYLlOp73yN9lJFhB0$6`=xlvxF#0?+C2VUHx!#Csn(Y92J#(61}4kV5~qpe@_iHCWe zcG_lpH!T1Sd9>TyzaJ=%J?mLhz`GdgQh>je`Ny){FV>V@+9NYke23cmR(Byk1N(T? z7^(T@0mNk&cdnjot@M9BdNflCAhfdP2sHCCg!+f&WD^Ez=9<6W*z2g71K7UPUg5L! zOyP8Nj=J$-V2gZWFrA2D^($aPHw@`FI+=mgv2H2}pK`soxcmLd(CpZcmsZAS7x|v7 zAE2%a{^yL!1Hl8;?j`=}6sQ$Ea|zdXT=*r~U}9C&Gv5+Y+t7ay1BDMl$%OHAEvky_ zclT9O+p5T#&@q6|+xE6mtn~^h(ygR*>vDd}+&p^VPZ(4&P?_VcFPpQ4Ru4^O%%_=! zSpE4&U2>amEin)2$0-wu&^4{+ljyeH3d z7~L!PjaN@f*Br+DUX(Ks{~lnJR$++r8wzLl%<`C zZ)Cun83oKT1@`TY)7xh5>`4sU!#0)|Io-%aRF|RxCU}1&gp+;3v-L3mSa?rkiga2A zpE$$t{BG8HS9Cio4f8tP~pLQ7)r1qUI2xq$K}0> z!M5vFvY>y_m59q6{x6Xtsm8p^lfPs?l59ivQ4KX>UTs>H%688kW#Tc1^4qhmhm)oD z$ug79Q?_fC*EiYoaw01WAMrCdIdt)qy*b-3R}@VWUw_aiIEHdd-%hA+VotGXqA=&d zaAL?|>%#bc{g%6JtZf1H0|9<+fvwGBG^avRN$Y={araVr`1wcskU}eTu_nRd6AbTV z%3#@3V(6{h-c>vdl{Ry8J;ka~(<1mTjh&Y~{a23$Ro9g>JCbA6)PVzKuG6&5g&D!E zIY{%h2~40At`j<(BHH^X5eJD8VYQW{c&tVini;tAwEW5wkD0NMFI#Z;XWas5|8iY>76}e;*0N+Kx zNM)PxVNxWO0XwJ_D13&+>x@VS0JtVWt(AY(_zzZcU8ltA2vCiiXWbDpi;4Rml@GXT zD|1&VAxm__>w|f?42Fm%A;G20>7U$WR-O3Oz1P6$$#s>8VZn5fvykVdhP?t%U|Z3G zWp5{rBu#i(jK2t#t}@=IG#D$bM96zZF`#^(EGvs^Np0C?&U-dkGd$(rDFPKIDeiv{ z;+ImuQpkUeaCwu{8AV&8qm{3AKFJ#iCW93EF-D=o^i9ibTAoiHQ|PyBi8Ng^LQlAt zP>}}XQdQM*u(~P}e|~op=Q>VxA8c6|zKi|n`S8E4GqFzbVlrA_gH79sHN4IkohVk` zEqMNqxk7+JMDY#$9Hv6(i%t+?;-r7bjr3Tm`Fj|D-p}an$;#5Vm$e5zG<^>DJ&HpD z^w-kE$$|qWZ(pG+ra4QzPXy`>`>H2YWR5psef+v6VhK!nL{ZEG$jZ3&rxxw0U=6?R zkJ%x}k}yl6!sjZWSB+ZJkH60_iRw|P7eG@78MAOtidjFoZGFpq;}$(b)T4g@T)+Ct zRIZwUIoIU90yomzY3yxJ@G7aaI4=|$r>d};hr4@@Zb5zb_*aaukV1JE;MD9J?0-7Z5PPw3Sf z*|@35U@lArtkkJ(se#nj=y_lYBPR^_IfgqpSg>vEyU}aH0H16SWq2GK*H@W-_FbVk zil>4bV1(6l6}@hAk+*YCoW&^yh}P+MPM6t9x9l7a2;?eahvI@YuwZUSl;Q&9(sYDk zTLci@R!xEj$s!fKLg#-}EPIaAe5Cdfl5qW&=ZY+;vE8v7jj$kjqsEM=+FkGt9M(n` z(*#h*mwIH$78D-S%Wr@oxV7qV5RVy`QR3D<`-nl(SIH{cw?h(HT+Yqpgbm)X@ zs$2^StV${n6b}X5smv>Hz6h>y3270V`QEWuoD^3?mbW8+a+H5-m;QvividisF+a4{ z-~uf|K!(#%g#9Hxfv=rEJPgnct`)`B+))m8Rra9NP;ijzG#CK$td5xn<2kWaPAnM9 zn_Xg7##r5#HCiI?=<;a>D>|!@{X)tpGznyuY!XL~YL$J_HyyYlEXEZRpED z+vsWz>*&UlxDCLPb-$3J)Ahmr879;zm=7HjOe(L4zqB;2fL+(;q5;ezVlR99=_jau) z1W~RITHytw9xoG5bXGKWWmx@KP`b$l?g!R>OVGNj=i1z(q7o-Z+a~!y2f2|Zj!}na z)v_|@+fT{0qJso~W^90YDS5nZ+`YNwYlxssw3cr}N3pu4@Q8>-IpB5XoU_>$Z>TSo zu=F4phoX0#5b4UK#`M&1ie$Xin&KdhCy=q9$qY;t1F}i%F6!w0bWo~7C$A#DcGIW( zbttOo$!d#gg1>AV)MFAF=J{2yLgNw`iD9qPtqb9euN!fHmT;{}6U>P+ZfDw(eWvC4 zB2q5#Msbae12sOWI9lQ}Q;A36*O$E+^%X@j&m@Ym__}0u~b4{1XKH1-H-e zvR3A}i3a(&?e}(3OtF$qXW0bJ_sO-hA<#6oo) zdwPXv5dqEk%?wNN4v=9=hE-{+*uTqGpfpN;Cgs3?wNM3(ak6>JlgqMZg5twYOGGBL z@V%IRwSq$tmT>?L93D2Ol-IodI4c+KLP+Y!i2t1#m-l6y;=~URe_;C+FtdZz|J8^^g!np+xjZ54MP?2= z#35*ZQ_xTqGbu0iY>sVYg5C&1x(Kd%_<5*%Ty@wlfbAvpug+~q`kt{i3YM5@47%rd zzr{SdBXIW@)m2ppSam<6o0{!$wkvTGCNLA~b?&Xn4L%1L4FcDiDsAtrCXj&IRks}MqSsCmiU;0n`!{A5l|Ghs{jX|-s8+rt7)Pjm?yy3W$@ zqtt9SUywda{aPr0CwEL2X`+tKhDWPLZA5-6_5;fGtSb6?h6$N4Bc(AC0xz%E7^15~ z{Kkk4Q3**{bh~lrezfzm#UtwXMJ4bxXP>n6ECRvr*O`8ORCW;d=!3lWI(k$Ek@#Z;in)_ULeu!k4A>Z9G@!ztz1G?&-zs+HD}Q7@{H z^Y&wzEbu{<2=Bmv<;DA*wA@-HofKhOjNsRTW$$`j$wI-9v1T z=2CtfBycywSwlxKkHYEx1mr#V)-`76htrthi0X2?et|iSC6&qC$GQ*cT%7}eJ*?O0 zZ7CJkxqSl`ZksX5bSAGDvym-QwjKq5yed=gmLWS4r-zK)By9*OvhktrMnb=T&oO={ zJ0*;Kw#{4}2!5qPqg=e$@LgQd{ky95&!;Ttyu`uPdI})u1sUYs9Y{&^zj3(rxA5W@ zS5mdLl7@^RDd_XXs+K#^zP|krwAMIp8-y*MHl`J@N2|#`Yf%{p*Tq2!_V>d z&g~wJc*|1t;`ISi@ipR52ey}gQO39cC{mjmmZZ|dahVSf<6pOAl^AXzOKdk4afL~& zrDa}r8i$M<&5s|fkD+sHa`3d06EUeT1R*M*yk+S$15v#h+3(4h!@j?grs9MTC6Cd5 zA!nbUyfhLdJVGQ3PZ*Y;y%XiThx8k`5%Vm zm!R6~%SlerYq~fS65P&_MIW1g;Uv*Hb1EG`^!kvQ ziMV55MH`v+phD984=_2;fsf>I8BDrYny|=Z)OgO5hHGG5XylxqxI~4AcD28Sa5+>- ze@V+uCO`T$!5pE!vvztdHIMip(DLh$vw5m`R1dLIeG=nLhreo}w@g)s^#vs_eDArU zh;Z<&h~B$TK;y@MDe25?&%(A#DzuH+gewwV-#m%LJI6+&wq7(Bt?`=pLfdRF>_vvm z9D(f0E=kpfMh@Mj2)@JgVympd+`S2}JECY2R9zE@j%{scE5oVxBnxN_!nX6IE)_H} zC|Yi~*Vat*VD$Kg2-fohlFvJM45@l_pUUSL-5RzISYpY`JBZqQksFWv^p4HjIq zSEY#-k~1uulc9K7b9CzN>Cg8xNfNc=xy{m^BsM)8!|+74PFB0INd}DB<=QjNrd69s zD5c7$_&9ifX<*4E+sjGzi^R^Jv?sWP2!B+dmD5R{_F-uNQ9vq7-M+LyW%~72vdD>* zxbWfl(&!v>A&coxAuT#c^L^UBYE@+k3#b`#`IA!M@32t{ZgH42ukxlWyMETwW>%61 zx8i?nD<(|7(q5i_iL%d8uW!HBz!I~~mvfzbSaCJ(NOOo#97*jo6n7YdFl41kA1 zbi=0N&g763+Awl6o$5Fnd}^Kq3PaH>3IvfqVM-5ka%pwnlkdR^Dt9Nns`3pBNR}Mb zW9j-lJnXQT^rtXgrU=6PN-tyr!VZd46eT8RUW(-Of#Gn|bKHQ{egI7Fu{wMz1zcI+ zQ8w*=4bQdfHKd!s1gdQC`l#FSe*TF}48*Lc@gS;ak~PT`9s=w~=v9PGHbQp6)7kD( zD$Fli=_Wk0FPHZtZUB%?0gHzly^A6|FWRmkemzNQ`qssf!veS{Id1pK~56E$p^ zsaR#(sfLBw2et!_oi4MhmV7g@_1?;@8gD#*b-13t=e?6N;-EfIFx8UQpU@2c1RM`i zI7O3D6QA|-I+3V#Bp#W-rcT!gpu&6!lujQ{_>T4L2-k~|BZ{hlps-{!s7k5pZNKeC zm`~58DR@s;MxNnbQ*&xNEY(C)LMsV@02VE+^?(9yk4ChQ%Y1gs_xMQH6h2?P)b%BQ zL#ZOAO8l~UsM9iJG4im{!e;Q55+OhW>6D7x$EJj&LLOiW1ISN>g+VV=R(Rwh8qE2F zh+#3M6edwG*GqDq*8BW`2 z8OtL=ry7fYk6Oe0iyTicGSg;dB?4ePf(T&$D^Qz{=0k#+_mDV-#GCFzV0%QdwQ8|- z#4{d?b(n53L5K3#LC4>)d3*R=NvilK&qtH7V}(%p2xRYI)zHXH!%-s9RSg0!r5KI zmnOOj=L|w9OC)HWo~qQ%TPxl6z7pf$5LjRvo2Cxen5QH0wM<>Cr8+mJT4t;<-y-P= z1G)cfkD|Yxe|&3L_FxVL?1oTQ92WW=4(!X{{Y(VO;GVN3$R2yTuH)W+q#w>Htl3!4 z!=%7I7aQR&&b?5$5Lqz#?^NC>R9K9@a5D@xz)rqj$Luv+5PUL6q^W%LVDH947%YJP z63aQO)8JZ+3{{|mFI{&L9a9UIi!1d zq*7MD#L(2C0j(ZrNjFB5(8Z@na!>{ZbjNV<*3zNRNufSE*@SvR*lBUa{@L5`-ydtT z=`x0K7h8d#6LCX2LlFnro~;3wK*A=e_v_hvh;fPjC|#tnj&vD+l-6~;(W!y$4Y#SB zPm-fdZ0T=~036%uX{S^mKC@f8gR`g1Y8|?hZlhVt+)IfgN4sOL+c?9u;6}+Lj{g+{ z2L_ZAWUlZsBOhy-xW>)O-assgG zZ8n(*Zutz7^`S0*@daO*#;9L9+Mw0t9agGPeXKqOTD|9d^6`pPH(7i@P^eG!#!8`C z`|#%AyJ1NO*VPw@7yQ@hMVmb932Zrvy&^jg=nWH}Xp(BDij8B%7e7pX{J-uMVOwch zp5e^VYcg^OKkI?|HqQN&%W8JJ=oG=WJfAMIsO74r!5E=`taa_*NPQ_etLmx^3!|cN z)PW)^144^DpwS+Y$~>nyT=-paH;&O%9C)m*E$XMM-&g-y8K^(NNccKdSYpUBXwY{G zMg1WXdFWYRg$dM=g7&0Jz?|2L9x7NiIXwGbx}Kib87EnNxQG;$P2pO_OE|ba6sER= z0#Ic`EzM7VILp-GWt~-ytLA$o4JR&sy_U$5IF#Eu`NN*~aU8i&uclFe#XZ6e((Gxg zmd*Qtr*?HIEs+7q6W_{4^L`w?@q6*JEE{7_50yXAkz%|cM)TJGYBNBP<_BfMN95yAe~WsiWlSdGBT(P-wP30|Yz;Zbkx^=bz|wY9ZX&<6sR+y-Al+7T zU_y`05A(C6O~hJU>O`~}!zrh{^VkV#JL*>iq4F??B!CvD(m%V35Wm|ySkpQ92G04l zYQwC5E~k~IhI_G6mU#6&P1e`8_Eg3-g_0$6+BPUP?In#UQkYjVx0!=}M*?rs;5Mpn zYAdCTUSBQuKM!qxr|w7*@c>31SF)-~R`>m_4zAwp09MA4*2;@fxWM}aH}FM>iulba zR+edo6*^?qcfu-K@OwsU&3f&8@^lf1r@7YtbXZ<-9mw0P|2$z@n$-$oMZ^PTSw#JR zt)T;H`{}~rjkE&tm~kWeJUvPJ%8=a0HDR!h`baIa3Ss43`IVL^TRDpq?~~w5)8*-VQ;eJnX`96}F5Vg; zO&4s6m)~u;_`rh`vGenEZOC%6N;P~O`F>YZiT`a42_&hnu-s3=%tB#*U?5Q- zWRYZQ%3;HkDQxcwPXtge&&(djaoO;morp;4vHYsA3$4tY)_~V8fU}Z-3ijE;aQs;M zEV0^_7(hzGZhM8{QxRjyF*1QVwM^SO=OX#mboc6i4R_HRT7D(m3XAX3BS- zCOcucOm#AIdq2+s%C_}<

dO5Puse!~2k$eN)V9^`w{zU%3MmQzPilgj5Xx!AV-X zAK+7JIfcm68b*Pat#i9x1i2mb3xWi)Yy}Vpp>~&$wpF=M)pEpFOj`t2~7CVB^X) zXK`zF!NZmBGyaK5r{HaUvK&7Gu9Uv}y;?=*4<~)*GIguauG!;Hse85Fa~;B^QLRJh z%@DQfut0%%9Po2P(OQ^nFtSiNj%c@H+MDh`dPAePG@XD~U7!+wF47}+%%EK{ec>5$ zwNJN-e`6)5I->;u@ihf{tbfNVlB$B`CM;~@Hz(8&+*%{nU7d@~lh3I4Eh}(cdftQ? z)Q190CiM2JtT6N;Z+GSRl!o= zGp$!@w~=C8M1W6!rSU_iz!wXmg6Ud}Qmz<K(jCDB9yMpt@Lx%lN-8D+FFO__>q>CTr&*5WnQ|WXXucp_Igip z+o&i{DEcuiHZIMIVIe|nImx+!MffO2pD|<;himQ~V6EGK>Mqu>zLalUNP`)#YPJ%N zQ2;6e3Fzy8P-t5sb=cFjO{Euo;uUt?aVzJc^IP2MCx`CWqJx|4y*!3fvEFIuKo>%u zb%~rqK_owysFtAp4wA#E)DO-(U#Vhh5|g0fW6Mb$QP3wiydVq4qxAx$gTV+cq~enL zJsf!7%-mrns;}X!aKJOzQ}-L5wBd>o*jQO`dggP2<^KadkXR zb=%(4tA7%`YT2@8z?lfFedy+6$stu}pKx>0Fk{1FSbv?Tw-#B?tog<)Zl?+4rs zi)_MAoeG^>cusX)ZjlZq^R)Ii26n_@FpDIAz%(KDCWWPnI0_kn9hh`5^)$VL#>OVq z#z%NCxo%Ff@q`+7jIfb>^? z23*%{apQKlGbz_K$0?afij=+WS9jEncr-Uov5V%|0f6DAJ_U_qHt`5}BnE+B zN<1%0qAi^&T(J)X9@Z*dSU@f1m!dpBdg$B2;N+I=4g+J6KeVjL3&l%65!Xx(Rr?fUyJp!Rxwp4>-S$jklk za7dv;gO2Eq)pa+7sPOOIQUvR*xGm~iY}?X)@ru|S)WCZS6}m+9$zM&ap?I8sU>(IC z-2>8!Mv0i(F4l+6-ui)`P*gQLsRq3EUA1C)%WV`&{9K>+KXWRbBBek=54Uv)g6|wI z6?x9c$Yb4(TRh`m>#4%EPVf*U?9+p{E0PA9dDT^f)drWCU`hNwnY6@eN+%i0 zAKZgQMSsmsZg}uN6k2Z>V$tc8nZN!QVCr}#{Bbut(Hok}G3-)Xk%3@;dkFCu7aC4) zt9oKia>5K49U?{Uw~*82?)yu+JPXB#9Wq)qS)nOxmARHDyFSJzpZq9c|1htVSA@$H zei20UN*yLGnseUV+*D$ubhr@3@yiBEiUv|!#CaPJhP&OO}W&5F->tm1R5 zO&zb|pqj{+u)IPd_w40=ceycJqPQZ^tSscpdumUaQafUQ$0e|FP{2&!=nlg-6`c## zQn3hrzg()2@2FiLCqrM00{7B&wrVF)x6rQ3G!GiCK{ZTTXPa@#8jhq9_1x3f9?{S6 z5_lzL#9n|3{l3ggJW}Q1xUz$;+{Wx7fUZc*)})J7`e_ciFE&Si8I>gr$@qd*tM$FZ zgQ6!z9Y?9SgUWfMFyi{j>}V;QXEuTCENeuJo7ei{hcEDFryP<>EL@$D13850&sE5Y z*=CscAWxRV;|gU__~Lz3-;FnnPJQFmM+*b=xFl}q088GW4hz&pN)EptlCDcXaVam( zjG8&tep`KEa|kMbfG6VHCa4EGVYcOpU$%ZIaZ#OS!B=#pTNZ9jF+0Pcf=hubv1hUB zx}Izf_JLW4kwo8#G((pYE}9VJzGpE%-&LSu;%1d#O`>-T z5ML$#14kj_^Ya|1xOcDOBi%knz$Y|ZgCtt8IU<&(&~QqB!;D0FEZg4?Od1Ov%lU&{ zppvcZ8NnNwKcWi-Q)eweGz+B+;KB(se`e;XH^IzFP z)}QVfTJ1ee%VMX>tlzTMSg;&=X8E{)AcLN+no6IZ8uR+P2H~zzstpgTI*B0J<^|p=Z?W zkxI>;UGV2a(Ov>OS~s+c50l5!yHn>zco|(|5JE?Pg_$KB@s4iQEyJJN?!Ad;S@mhW zlXRf}Z|NE*;oxq5g`~=~TNT=|IA>8v=Cu!HDa6O3*R-5j5WwVksW-Xe9pD4S{(W5a zdvQKnABhP_$6qV0fK(={2@o4pzp4q^!uW+~Sr^^qM?)UOT^{*ycLuXG4iMS2ljr!d zRN6m(T|5%ju9M}y3HHi^^!CMCzwXaO9I4aI(wtsq3y<+8=7-_?t?zL;1MP5IBqJH2AU=ljWC{@t6h$ zY}wusEn!8=5FUxA{C75m#07j7t|X<}H_AhQxKSQ(G7l222{tbOz)N&w>FKXoJH~^v zXBzu6A0H~!pNwYk>iEF|Cvqg+$xl5~%`fi5o!1&@de^`L{J-ez{vF94D`2O&Q%h*W zOdVr%q3^zo(|JE~m4iA}WkiT{aawMd1ox!%uFZgH;?)0sfS%;~rcqw_M!{mXxjWf^ zfYx#UCZrQpfVv?O(UIBJ3Uus(dxvYj*Bp+SwDL#OEn2p2s6L5P8~l~9%1JJMXGkD( zSm{dfuy`i_x^0{`9@`xKE@=vu!+z>djWgZ-g!7Vr+8I$= zGIE|p4X+o3kd7+F04g*xMEs!1E~N@7r`c%Gaw@Z-bU1E)1jnlT*=|@hHJD`%%fHL2 zgiW5s&lD=m!mhm4@i&V$FmNT%tAGq##7_cH^aF_SpA(sF#Q=b#X{v!_3~Q{#lo2$A z>ubm1#rK;B?<7IPSaNxVJ*3@#I`|jO5heagD}wuBvqWRKrjLBfl_1$xI)3Lvq1(Z! z!4^A=dx(#oi&Qj{Eg0DI<*|3J6K@i+R=;Jgm^MmfTh~MPfE8Dza)hFgY%&;- z(OGA2V~Ink-J^wXntEleP3WoK;&UC>G&A;#CsxC2f8J)JMVk27m!+ z8iL&05P#`8M&dmBqLO2WM0msPX~f4u=IkelAtveBAO;LUp?agllMqdk3op=%V5o6^ zWeppJf|wN*k4V6pbYBrFIY9nPzm_lysKtQg3b&B09R5cVKx(8Gxp=u5b(cE91#ZM} zG;2NCvh$A;_!zCIibj2Zx@Pg)DC@#j$gea!iC#EV{$*jQArS9%89bAzMJ_xpd)inkOr^M!xU~*S+ zm4pMmf2@dVmR`{YajYs)9i~x1aNPN-!zi4i^Rq63bm%6F#jnYl`pKC9utW*K@1ypw znx0|si(kSb(FIvDd?fGDa`4sQ-SbUpT4;Np)855l-*uYD0KNft2)_} z@F~W({bRB*t-ZB>p(9)rbY^>^WTApcNdiyTREzMO|HFi_vh7rw_G&^(xzr_PI+huOqD*h zjw>ZN6~)i|F?q{T*Q4PDL8*AUomcoTvP~*`w^8QKT6Xp!EdH=N>Lwc2!zgf(tb>YT zNQ;z@^mZ%>1L|T0f-l9n?XRf^ni^$hJ4m=RKzZ1h<6h_FM=#-bwa|-K2u&}5TN=Zy z#c0LwSA9x<)52dsvD`-}8(jv%3nGs0tI_oFo#TdaMWo%Mn2TwkAp#wy zRcH=q6$#>)Kula*zr0n5sQzX9ntET?t zl#9@B0$14o-W75~!j^TLA||br!d->li49Y0-;PsoJSbxzLnm;5 zY}hEUGY+-s@p`<#r(u+2@}TIlbj+xa+${m2!{}3bmwP65a5?G7^*y5*5i3;%0OXU1 zV2EEmLhci@FoE?&b}3@0kKekDWF*A}%r67-2m|)q@?E_d5L*&gE$77?(klyy=G^mB z0fW6m^p0q)*i$*nb=Syx?m&+pGIkYzETRM9guT%n+L0HE@vnYdL7Z-SEgMqTRapsD zWBF~kqDJzh?x}(&DsZ|ii$L_`NgO>HAW5loR5+zbIz|%>Jcxj5`_`%mwA;F{t*~yb z!l6EIhtzaT_X0||7_O%&6~H_3?T4!zcQPY3CK(YavNxluK;NpbW<@V^t@P<*{G+CTVU{GFv$;=K5k z#H})Hl!8N z+I-zihCpnX+3!;la2yhH1>_0Ydi-j@_SohgUHRK8}{5 z7R+!_oro(YqpSk^&hrwxOTfZ{4x){=0fM)ov}d3&iWh2kOYGF?V`ZiPEV`V+*CEZ* z+gTQve-vm#ya&#I&5K&L%>{3^Y2^*&I|L!>G`fMhroDss6$b>NnPqub(KK64b##Ii zxaM9jED2kn9{Eej=Szc#gAsDiz3a?3Lcl4*56H*7#d!* zW9ph?fq-{ZZvx88YZ^DE;3tAr=cZy*Fs`U5i^a310)owdgD8BWc?Cr?sa+`$W>i{U4YuTa1tzc&4h{0}GjrpL51%W7-S@S30hGcU05{ z9Lur+PT2*2NA}S`0S#Bw7w^R%W}E5~tv2CHRJT;;m!OZTLiuB*HbLQkO&JnVsMK}_ zQ8znd=_FRCkGK&~DfFB&5l{(U?1ycgAheX!4k50F{#6#1)PuTRaEUc$T_4G!>jYeX zn8Fo8&7re*-)ZUCR%zB7KiOh-of$wbS4K3FvKRJ$2!oVNEOjkvn`n7=pSt$eIyL;0-}K+)9*lSM_cV(_kht>rr`ae z9$K5OVz`*nSEbKPQv0}C8sCl~R00CGaQQXJp4yyY*8+&^(J-gs=0t?N?gUSLA|P#= zf#?f==dU3sr%PNVch;n#EjF7z@}$#XC-PG&@V08oLYL=IcArLdkAl)pXEE>O@S)%V zJ?p%>T{5S^cL((|<^3)O^)tJYN40)}FgtTUbW6PQd*d$i3geTKb%+iU)OHl@VLf-5gg}b#{#uGUbgcfk-gA;RwW0IQU096`b7D?&} zF7>b(KhiWuHKH-rF>#|FUuY*saMvWfhMN9CS{e-I&9^_39vz!hs+>wZJ^P`%;Y<>L z)7$n1mr@z+RR;SH(`EVIZ@1is{U5r(`xsUB#`Q zPzmOyS_g>7wR?p3GQ{r@10M2>Kb?24Vu{LUHS@@0m@Y+L;hZ$g&47H%v|}Im6CSy* z%|vT7-zyw9>Z~VpRqg4amwi#e95EKk6~6 zTt`50%^xIWw+d#|V|eN19o<=!Fz{&T^ehWsUOQkIDY>D4R z#oP)PF<*T$aq>r*U`Ma_rzYvhrzu0-48Sm>RRYUJ&hszQPyq~U2^*vZBTNG zeoniR`Vyr80juzs_M2kYJhtu^GAm}zE`n|(dRjeq=|R)UrIrSJmPZzL^&mQXZ{VY^ z;S5qApl-Wu1E$Fly`CI~q|!6dZL08nt9dobIeE3ANjO5GtZ4f0r1#VSns@D;xB2J~0xQr0W2yg$Z{en^vA^fuHW7tgAvS(K;q^sEPF)eZtbf zssEzicEy(d>ifNdzt8f|FL2tMKe{&O5DoJsZUqjRVzK_mktC zQ9ucU69CWF2=B3J;F&Ef{SmnN>?@KWRk2N3b5v+^S~I_Y%Gk20KJlcGFkl3QB4;`j zaJQ1^lDt^hgrqB1l`9yc01CkO72Cx=kH*+dF2fc&=SI-3tiXl3v0r$V*V5&**Ogiq z0@>DQlrU)lG(%I}O(0>IdZ+iqmDW#G__X{nAo&3paVgLZfVZFFBkou72U2xx<^CGs zxo3vENJl?^G}B0ug2egS3IQHZVm|B#;Q2xxcOStKq#!N{}Q<4Rg=n83TVV(+; zt|*G^I>;bZxIs#Rjj{lNB$3&tcli?Bv)oz@3GQZpdnDrN)05Opq-~htNf9`)xI%*P z-gi6ugDrU>lCkZz)@1X0GGc=ZKXzX?fO2isRouqapX^4UQ$>YUGj8Z7qijlg|KhD*t&7j&;Nf%*tY`@esU(W5lBvkI_#Bm;D(kQxZQtz-tjwZ`Scas#Yk$iYRG= zx4>^u5R~pGXLg$G4}nmB&FZ}AkhjOTv>i*eaRGjO3JQCh}+V~ zqm*wv46&>o$RGFpslY9luY$de_F0uAo}FHeQ6NzbmPLs)Th0cjO@7CB?d8*B-|^Nc zvH%QuI~!$2BGuYZ&d<=Q$==mFTjA2wl!xU`ERnwX<8WDIp?PyeR^waIz z2|PY2ndMXGOn&`erimnQ)%1nyiJ}SlMv2L$3(tuCg}kA22GtRDIx8>M3kZhruKF0{ z2GJ8WE}bmx9cXNXMjm1|?&5i@XMsO|YqKk|68JS~7HSw?-3;}spJ<_ROpZE*II8q_ zjy?T|vhRNk6FF{7Fwbj4EI`wUKu`)>TP97D(BErX3aD!12ProRdm5O7w>(N{^7bv2 z{+J|)7apzADNeZ>ls4YbTUZ8RIb^C4-60|AP%IJ|^$-+_Z@ndqPGlW#THBw0Gs7y* z&I1CzQN6%*k@(cM>td9g9q!oFUBW+;MC0#4Uvu{zciF-$uztm7&96UKSlyfv8A6vN zHo|xxVq`y;XR+SCO3T$3+jM1!4aD}@rD0&A+oWEBM1@+)ZgbRbTDB%QsllLcaDne4 zd33Lv>dWqtmeE^}*MT(5C`T)Q5y#+gT=t2eH#C*oAWxw?wl=fTX2_qNY6vIwZp z-<6G}FNX?u|7FUu^}GUqsAf8am`9dm3v5HDiXUYi7bu5XTq6xeMMnI;UMs)fKyy?s z!H4~O6czwmp)s}t3#t?$==8WkU>;9J^niI^cj0PA93G21HO=`FjQ6!6gFg(2u7~LxFMK6<31tZU&OhS!8+|oqO1}igR z`}yH80mnnebwM9!gl(R&i)dN`jCpL$Fn$aTKvDtV7tA?RU`DE|-@hP+qYbZru76ir z`uc_8_CsZVlwVOI02x;eMlu_ZQ)bh}9ratM!4%09V*R#B_gI&_>G7BX(>J;JW(_2* z$7mtdbTC-#kuT(-k7NyenZ=msEqDrCpp03YL|2|8IuOj$zVqf8U3yZ85p3^9KP=~@ zu^58bWbt8bahsG zz4=_lHw>-B96!v%X5lVeN4qH%pb?gXH2Lbc)FVbg_?VuJ@z+OR`m>vw`bO~EBbU&N z6^^T76_WW2=`D0;FpgcXSag$(ON&hzue194abSG^#ima6>v`%ky_^yLMfNvR9}dFD z@?si)ZYjJzT1EL(;cgjUa{L<-L;dTa!;4w##@ zuq3(_J&&@K10a3*5&=h89`RPA%H%*hmh{-QD)uBcfd*mQD4 z&DVVI$LJu-`brWVdZ03tYi-5aMTzr&cc?~9v3b3x* zx1}_-kym!1>eEO!*XLSWK`rPI&(2ns5=D%nLV*p-M={j?@vMCN5x}13JS=S12h_Mj z2sdaB8zg*CGBnXTpd_!4SE}`wnC`D~`mvBgvm$T0{l-dq$B)QouBWGD7%*~wU}WFJ z{0yyYicH>NVB`JL+E$}e3J{XWYoO+OHjRFMipZnQxHyyp}(Dg$$rGcYQ{kc%zx z(>Kr-?HHqgk^6aJyTPT{JJ6Xua2+Ocw!K;W(z(2=>(JjeO_IA9l$yA zFv&3iE58KfxyK8@3T33cFMqg7Nuh7QwZn%xKgCUq ziAT8kS`U(SVs^tEmtJmM;PfSU^rRCo0&EfI!Y6+9bu$B$=ChQ4+$Qp4GmEmR)JOcc zTo=^~wY-Yg&{RH6Zl*&i-%E+3B~b7L$1SR`4bn7b#wP56DHq4S#oQpe^0FM-=Z-NjY%O4=qR%Q zDSk_Y;iGE_nKsIQl4y1rDc@9_`je6|C=ba)%sytryC;nA1ndjVY&`&!QC0U50SI0A zb#7Xr{H#KuKZG)PiN$QngA8-67{&Om0`4`Wq4Io_Ssqa-;v@~QZ$~#*YQ}DDVd(=< zhs4@Lx}(+arzf&}7Pj+)zs2C%wS9LOcw|w<mCjt(kK?UNxN3`h|;6j zg_*SPFTC38x;iSA-A8@42hK}w;GC8Cwo775W<<-fEd-*0cxpHrg;mor$Kpih1h)N( z^NctN7sSs4ZH$sQHTObP%xIfc&g~&V>wVH>nZhxDOQlQeNBd81?Irq4x!$Xh=}~iI zDmTQ<+{t-LiY{2Y!uXK&47;gh6tBH~EgtnTx0Z*Bi!-OM#3bFtTp@aQh@Tdz5kNvR-#EzedBfWO+AaWGzJ`ra;Kv=B^S!x5sV2t*y|!|Da^PQbDWtn)WuLP^fsoJ`g2U>6|9(*DZX8*$6$K1}w`1=MYvlHg4LVJ7 z0Pp5WfpI@pbM|f_dbe5WdVDb*KN#V+7-k@Y!d20_ErTt92wsIc4vh1GKrM(RviyyK zj?=83MSqF0V+JpLO=(3Or-JkaQgUy&c+A|E|2sUNjgm7IsN z?EijaQklc+6tf=VUO1be3n7h3SLdZ@&W@H=-*)%L&C1OVO)>+c^nw5;>!OW1K zy=0t_Fd~kk=aQBZYByqF`IK}s9UzmaIngwc2dTODDZBq>oCIILL>~+7QM z`@9A|K?71QL-S<%TcvR=mxrrc?}f(p>1m96>#~?aO_}1}0;g#;96)8DsQ6>e1CbfA zuC&A3a`gJq&hOYH`)9feSOET)3N+(?6b|09`W{dHV)jb~^YgAHWtVLSQETjqDVr|+4wCFMRQ}@lI<30zXYzZ;A3EqLNE0ySboi_{ zVfhAG>KkkJ`Ta}R9W_d@t+W?fh?+Xr)y`*3be`^-U@^CM?%}n^W2aHA=M17>ZB&CB zqEx#`M*3P~?M0DA0b2PZ5VLO7e>uY`bzR*kNPDCZ{oDbwKJ!n(*Dv)u^qcZAQC!qO z0~1YbTR>eE7E(Jj5qwA{PUA?dKF%~GMSKluQ$?Lxf0l5LP8nDmMVHqc-``EZAisf< zKkl4e!x*aZwjh3l)!F@{-wm8R74MK^Ny|{p%8hv+6JDbIHIPUdD>I1se`1s}D7wlJ zrZn>?PwHdVz=jUB$`Ply@SIy~cV8y%S2p@MTQ76t1;lhRL=9I*N0Mx*Zdo1UIDYy{ z>OCZIfxrO5hOrrAioE1ixXg2{_JRN>HbcU8vstNDsCPyQ+`nECK}vExb;(h z`kSj;v81AX&AMEEwMY^^fAsquckI!CQ&#*d>}NgGwyks#^mw-vPSjs@zVb&jpdiMv z7>*O;)}{FlR+e+#+BuhHrCbfdio4Wbo3b+!Icyn7V;;(`rj}-RZyaBrPYv?iS^=v@ zTj#<7dS8m-no=Y0{(Wt)LMZ5)f8|-xtC~N){*9aRfHTEAa@TJ_e+x-#^^j%G&Oy>K z#6};X33= zu#fOEfhEJ-sg}y$fXz)c-KctyF3)?ZS)VU}re>)Y&@PNdodIug1vEciX86vQwd;2RQHUxb!Bm^kDBt}j2Osc@?~QOwf~CBor!a_K_D6=Gr0 z2Q{x-Fbg?imdrb&g*!L~A0Kuzd{-hY10x0MHUf7Cf0#Ws*P(-s(tsMuer~vo2S3dd z11{sRmh~fXx#^}ridcsX5x=!LOmCw)6rGLSUbex zol##IQ|K+{3g0WR{{b!--EER9U=>4VaFp|cDS|s8w6>C>)TZi z)=PHawe?G8`QghP1}IOZVP)OQ30V$J?TjPp|E2QZ&U ziyN-qYQ{}j^O`5YzxHb0j-v&cv|V--sdSslK=bj(cGH2h+;!JJXof=WXl2`W-~`Qv$=MfRV{1 z6+9&&v#xFLXA#FMj(N<{uT}asCOrP#=+V*DS;E-6NYsbW5D%pfZ&r!X{CHRa<;hf5 zo$1s%I(}#oo|=`cSSU!~in{2|v}3m>RJs+)~Feva*gL!rsUvfZ4w%dexZ7@$RL zAlqjLLnWMP<$65zUrMzTE9g8{f1Vqq6Uht=u2qGUhtl4+=gQ^Wx6onleh-{Fz?ch# zZDzaS=mbFo|xd(FYI(7HJc>HK0vP@GYe-#z!zty#g zoFLpfU-!N%an(x0hmxQz__C}eF(llHGn!p=K)L~MD2vYjvK zSm_}!t(r`qO~p6O+mi?yfBD&n4(V7wI}2Fy1-%+}YIL|iV~I~9KYd*r)`Z)D z$?mz+k#O5Wg2Q*CGI)o$3!S5CA+}#XD@PoB*TF*fP2y*DkfyJS-N+jO`AQe?+_D|M zM>zVa8{N3M2B1QwP*!g0oVKlyUj1eZXrt*n)}k+~*2{%gf9c9QY_O*Nnq02< z+VU?oNxwKjyrhG4sJGrFNI5@5{CG@iOK+A;$x%@K=D1YwXcl*Ie3+`@JV)Pq<<682 zhp@N1T+eo6K0#%$1r$Lt>}{f%!)|&yNp|?Hi&FP=cD9l|sJu`gpsEb?gs#r7&Ofu; zG+!yxowysW;P~A8e`Z%Ncxl<TfCvw@;G=3~)Ai+xw4QW9`?G#CIZ#X}3QHybWw78J`k{}0&vpf?skYqwZn70g zve~SbOfPY)e}351rb(oy2(VCQ30?P5-xj>q-Q6xuCj+2v8a6nExQIZ`!CeMeRXg#W zM|QBO#$*D_AFaz(z5BCmpkO5wtUJRo7_4~N_7UiM<;9#0CPo-oQ4{Qrr(dOks#tbu zE0=aI@BxYfdKHlW+J5RIxP^H%=5tPLlWzFMxA;OGe@PwGYL`k&`TmVm7a^2UkZcw5A_9DQ%~XY zrIw>$e^g?XXKVMbQpaJD9ghXqz-ax=vx&m)MJOUetjkBVo|&6!LLR>pznwzSSX2b5 z8S`^-uZtOml%_ktt6Vl#Tr#h024DM)!>QfULBOVH>#7DFXrFx8($OSa6^2656&+fG z@m6D~%hipty6Hc(IZQybp_x-b6cGTi_j)BEe`|QRxe9@}37Em+9?EQS1XC7pB?Wy@ zWT01^y9wEcbe&FAYrBIbJ?I57B93jkiHL>dV2B=#LZet$rF+Mgp|MK0sO8%B0vq+Y zx#kr=9So|HC|grzktcIo!EiV!(za#w1#_~gRxrC12L)tS zcOM~(iCNTqJ+;S`ylL>dU3Zv)U~YG1YW()c2@5CY`57lKiUZsNW7>K11G)<=z>`aX zhThR&fhXL)WMp342ZyHis5J7M_U#8ye-OCyIZj*1^ULQn2q&<9HtTc&@|POGVhGjh z1KWBaK>N^pS_p@rO$mO&r*^BJg2U_m~ll^d+MyC##R^FDZO2Z zyN|EmBcgKV0VoPtLZg?V>1!8P5aN|>@!s8lxdInSs=O;GGq@$3Pd8DIV;YzbwY)E7 zq25SCvdtd9n%-k_^y1GIrOG_me?La3P)khXKVT5w++J5%BE_2T#bjy?X^OJ=wPkh3 zZ(gKd{~M4zl0i9S<&|Aqnxfj4;Ly~%29f`*OgBm#3f7!}i1zqBq1>tL94mB=pHxcZ z#s}dB@t`rfpn`PB?e}vq+1=ht; z+iCiMv!KG-GJ_N)7~R;wvBF!@PvjG;D)v^IHqXOrmi|@}8&P)9t1Y*?4z8cwe8;F= z7DKp$&5BHR?cj1BP-(O!sKZ(|9)Fr_h`D8Zpbk#%nfz+p1JotxG;5lWr6aPB?`J|o z)QBf6fU%YQ%g9OvX`9B0fAywuyS#w#0eVfobxK(7?zIoBp?!$V7huq+%^|}Bgfxp5 zeoOTk0cI1i^P+fFBoi}vAFPIqtHHxN%9cn7W0OHB~;e;I6z6Pzf{-_aIc zHBzqao6DLq3NY@`u50-Yp82LP<4_GY4dE0HB2oeA>KjVEUyl5-$GgH~jm>H)ywbad zWVaGTEn0$F-;Nhq!7bg3X_i+ZvgQK!|-$zG+eooWD$j+vb};Lf4;j@Vi(wE3u(KdNe_tq zHqzpyJh!IhVjAl1_T(nNucAb64>k)4EIe0Mx+*xDbEJs{?z%U2)cyf@36WVy{` zPMX;wkL11%XFCKN<(ESH7C3CvhPeq`u8nj2IIMQm zIMpdkb+{~H?4@!Xy4J42uToCr9~7 z`sC(F^c3N$>mvv;Bz&j+cV1de|%Kcf|9mc_XV_uJp*OE#Vnf7 z*xxJ>yzBe17>M~Z=g%ExcNmHJ&$z!yVmR|}RT|7-{%5Sh2o2-?ts5f=Ov`XzV_mE~ zhz64s?B>Y!-8`IetDEyXxi!%v<~DgpG4BTV_=mCQhckuf=gF=%3&`$gkF$#I5T0#9 zixU%Ve{SxI4;II6?TO%!bnZ|{K9%UQTz4P7)}jE^U~@jB#?453sjY6z%)RBV zQqWKKI!$whCE>&$JJb{m`He;tTMW4q8jg04V2#KEGT4yW}JxDGql z{(^|X|L%t8HN&}|h?i!gE?)c70ONDMw|7ocO<Kj+9 z+=-aqHVbJ;@VgsZV5swhLAzoAsmEn|DS)>pU;ClJmOZ ze?Vu(xl7O!2VVFIvKyC$g*fmo*IGm!iaNI~r8>x4_dDb=sBVu>l~WjMFHu!YSXl2E z^;EbpUkObWLUq44t=~XFKK(dC$nt>&yW%L2K_t5ED6K((y9BJ*L5F*>sFJ~wdKav> z!7KWOXh9)|2XW|Kp(BPa9NXb&Mz>fDe<2_y(>QQZ9HvkB3!=5m4fq0LLoFAPVB!O> z?Gf}6sce3dz$8B19+EO7zo*v`>r44@z@^AdJ9mPR@=8JRSN_E}Bm5h^f3al96jndag!lN98nTB-ju& zFq&6?#WsF!A7vA3@nK;<6Yu%dL#Q6|dxj<|z5O0Q0f(B&6Q7o|62e$e%EJ&#t19ZY z^;x^vFu|v?`ARMYs;V_^xj`QEf1PaT#a9}}K9MWR>`El^?(5gjfUK5;i`Kkv^4sX0 zV$&0T4|Gd{upieBIOg_R?wohLCI?4U1eim1ZGd?J(~{IZS$^j=sVa z=%W!IT9xikrdbnb9QERgE4h!d<8^pcP_5pk`VoIHajMG|1?f2e2VL3!ZV z`zr61M5DL2L1l)NlSS)1-IGjeEZ?RcRia+ehVA6z1jZC_Vm5-@Zo6Z;%hTm}4TWn0xe_~1eXxb5ynYs*)(9sMwu(Un{JbP45RqL3kK%4uz_@Eg zIwnFdSFsuavaTzrcg#n&e`^Rl-RnJR$-a(5y-Q;OZ#!wDJ-Zo(k^3w`*ddWv04xAU zNfw_zP8g&L+)RXSwV0XFbb^H;rZSg4k#yqoJGMz9w#gXvxrH9Ur8 ze~Dybx}iTf2N?3APm{qnx5mHDUn50M3C>f$8I|801DT-Ao60y1f5#xQO~N>g%>}N0 zGk^OHA8Zw~AULeerq7n41cIM6J}1Y7_IjQk2>Iv9W0J^|#}vN9G7PdMhbVR>)Xvh- zN(2qKOk^CODT#Iv&1q}eLU3HvjbXQgZ9~zEjoe1b|H`Mfnc&Cy!;(M2WNc2TE*6J0 zkDdkLp-u?oVuT~qW9U&^7X0EahK%s>e z2NmxWwLnx<)B->KM4|h4_&@LSzTG`@v17Xx51YzdxpL(yWSL8MlPitSXaW%2pq$)T zeY>&c;70xPjqhf+s?r4cH-Dr5m$ffzOCIUE{y`iN6a^1=4M{2!R0bfNVUP+0h#Iwk zU;kaZ5>W4_yWeN{4!Um@&`J%v_ORC4e4L_<1tn)Qd%rzwt2c;gu%t^bOtTTiJVk<7 z$7)VrL`p!!qW)OZ-Oyi%X3G;Lm{b@8`2JxpXZCPQkIAbeYeHg~(tj;E7{z#wkLeuX zIRjM6<|fZ*?!g-#kaXNp^F8W-{KW2TzkN7dr;98+E{6-E8rwcsE@*F)o|N%F9R9Sy z9FHFtHq~L_%21HxBV+0zgB70;B^;_NZ_2pW>Vs4Xl^F$P?R^k<_>wdL&TO| z=X`g~6?S&+F8cQI%HMh+hVQYk)kL%Y!#dg_4=)o8ctcMrfPb&igwyewpgxb5Zwo!1 z=CtBOTRAyt&B2bMZG-E=sH!Pv*7Q_JPQU7q+IDO>K61%;5++cBp2i`kigS7=H3cog zg9r$4kHn%E;;ti-K+FSspcf!R$Lv3*V5%n?*AG*x(M8^iw5>bf*&*mptD?NFPuO9{ zTotzb_qK+y?SIzxUdZcu=g3*RZ)=xo0;}-O*F&ru`&5bc`4vAKGWtnPh|FGd$T@%!5+UWLl1FtuwEuEH}#_1N7^yFcv34IfEXTxd&1P`A#s< zAwy0Z+1Wd)C80{D)9qMhGt5_89@*0@UA=n{naDN}O9PP~g55N6_&k>%?UUo$S zi#FCkZHjJBB%#68Ui}bpHg(v$=<>|rlrK1G-pRe?rKI`fr{+i{KS0uOka$<8y}@R* zqDk@0&%@^w_D|&8JR@cc&a)#Z?*L14?sicMf?Pgg%su+L!p|eGg43iK2)&Lcx)34D zcDV2J%YTqUcHQ43g~u_eB@E1cR@Tc0%3&p{bJd%E(xQ z$68w~y1BP$h<@PS)YO!D-dUHY9Inoow}fY37i4g1_W9tnke1q|Y3Mp?)AMEV?wnv@ zt8for)>E9pkZ4J~*;8)Ei${4Uuj{be?SE!LHrodoXcso4KKijBzt>@4<^9QSyq)g& z+$9axlXm^5g1u|e2WlahA0W;b-QD%`oRqxHNEK z%y%m-s3Qk~6JOVA_)4@jC20g%rOMilCB^60?X&iuKVY)0&w*X-L5`EDR-~RUv41`y zX+`|2P3V7(Cww|4EQy`y@s+3B0mf+U^DdSk)9R7x`m{VZLne*G@c?#chj?zVh~4T_ zm~&!kTo<(O;))wwTzR!E&c)caW7zs24mpRd&1my($1Vm#-*MaFNN<=HaWS zZ?`}CbjY2HayPU%5>P%|PBGmT4}bVzblK!inrqOtAhbAc(5cn((}ny^-i1w|r3i&Itcz4YgxE1z(y@{({%WNgRnXBW24n%?f=l~!SoQgsBxf~SL&?uthr zwQ(Mrm6PTtgKOCqHDwQs?h1c;F2*bU%BD(D*oHeVnN5)24$H$5Fc)GC_kUgz?m!7K ztwJBo%qxleALr>>JL+=J`TdXc zh~Lvi=Q{*Ql+%KQB_4_*M;yFA2Zw+pM80l(mD#LRuA7Wy|2@2mG}aka2#F3HXlTC~ z%HU+V4p=UsuN$VHlrN>rVt?hj3?yV0-tXiC{)hXxx_$16Yl*euV98J@OPzDfgUHI} zyP@W`je=6G{Rbdl5F}Gh|haW_s0G2ZVuet#bLsoEKTk|0WBGK9i{DJnCWymoQ;rNN~N_ol1W@>HB}ZN1`P zYANPSbh*+S4;XRQlVR(3a{lVdwMX-p)$1^Vl3%7u+f-&@kPYV)lta!>J0>%qSH+2H z9!Tb-vp9T6lGnwH9NTWGZBo8XtP|GnSNlRWGCWm|t#izMrhoC-l5949C#s0JRIYo; z0($qlkOLt&$v)oi#-+(~Ar9!NAQuTwcrRY@s8Jiowe>us@0j0w=9`Y-*5pLGn5pNac!$`2I>;*W7yiqa`jw>gkEfytD3WUsEN)6CCf=5+vlI|uvXgK%^$MN&ctKt z!h#u$b3t8MXMe|+B81YOE?$QLy<{7rRm^1J_=9o8>A>YWv#HJ(q^vF+K32(nl|fmR zJG=f2TDax-zO@rg+;<=S)b;E15RE@@t$m)P z+nrVX+!x?~%09W!D5mO@ryJ^Tts|3r`HUftdYbz5L4Rba(2FDT&UldXA|iUspbQ71 zB1twRqqh?rR%PRY&Yr2p++1VZDoi7g6DY?u0m1pX2v{na8%`55RSr%_^i>C6}>(0>)0{r{UQs`5aM?+R*NipMZ){eC*`Uxs9jbzNfOwKzO=Ift9QDQ{H>}+sfX65E^U+*nLeW{D!q#f( zU78T{BOhp=>9Idmtb+F(B~Nep8oU>~`+pF9lGbjWxcpL>Kh3BO#anX9<{(8m*=i5o zFi)KJlKw-;cynNYCG2|c%ZhB2-A8_YggLws(1V5-FbTcCjB`}yzmz|GBGSgsImUBW ze@0z%VqM(5u<2jKp#6tY|JX1Fnn9kCQeJ&&>mygvUeIb`pJ3^$nv~$!poTt~;(upq z&yTH%t^Ma^#VKUo_Ud1E_hT1ZyHCrx9)S)YmRjs0It&15e2`PR9X<&3ua(Igvj+TU z>~$*5JpYQ7RA(2AFyz|g%^GuumyPRmKinbmR|60oz zD6{z=>uig-wSl6IA&f6z_UNVc2}m15vbtSE9KRP5Onrrvmev=Qjstnlsec;vUcMei zf(#u8_$RmWcdB2XeJMQO&U||QaQJyhZJoaea0XO0KGDJtGtM#3OX4L7WsMp)5z$gPjwi*eLWSI#{=t;VAxX!3$Po0L4csTM^?NO}b z$=aHhnN3t-Pj}L9^pG$ahBPT&I4KrlKD0?zzjFfbiF!Ty<{Viy-CQgt`2=!FN?Dy8 zRhxNrcHH}`_2ig0Oj_<*n9H+~`>mT2HFG~8b zf{-R~qC3gLy<8EMKjvDd^BwXEUb9ez7Ts|LvWSoiKzBB=Q4dj8T&COyRhT?l)*tua z)51C$?&*&E&96D_yMNAR;?M7{KR5X(nY-=!wHj2vY-}$Ns8Rvl$sTINkvQu`P!nRb z3F^1WX=uZ|8Ixr2dyOfKeAL~R7Gayq9ZmTP6vkKeDwGvL=rNN#>QANT6HD5sqF(A( z-#>x$t=eCn723fUPgllX_4|S~$j>900oVSmy-KtDNSU>pb$|EKckUPShP%wtI$YSg znVGvl+kJEIO&`L3)r1S=B|T^4L!Q>6ec#-NV^3`!2?0%dxPE6o44|SUu5Bo#8%-L# ze=a%doG0?$yoOvLlb71`$7-Sxs_X!KnK+w;mmA0Vl(l>6$9{t}D~*OL{K(;6bAj7} zj!(tH=$@Zv6Mr^+2>R9aJ^`}P)vGPPo?>tY@GM(f+upaet(t|c!#Ji51rPz0nVlxu zwLzdEqBPM%v3V@)t1V6Ry$77q9hsQc*b0XIr!lU>40Ws)_%K*oJcr+7+}`-y2UAPG zVbOlZ{a4&cj{N`2P2(7G$(Qg2P^a>mSJ>ulj7Y=Xw|{PNaS4^1PE-)nUAQB$42X{X z-MRjxDtG8%wGLF~(37;qCG;Vc8v(*$arlT(D5h&?n>Q=(@v{NMG3$QsknoZ~CKPrKTbrNa# zVP@{5b9TJx)wO>M{e7}a&W~VnVCJsmgeSI$rGHOLS#B>wqvva@tcY@-+1iQ-Sb|C3 ze5#NKrM8h9i3OlC;ThFUCi1370C_37?Xo#lC$%6DMifplm&e{P37@E{2SV-d*V<0# zVto&?i5!nkX(9&r8m)(hZ!_UxNB}hib8lbeDN&dh$kWsV4{-}r$xvp3q_jG=_fkl9 zMt>(t4yD#ux1vNH=5ZN1M&ziB!U`$}xr_u(L@GNv%>6$)Nmvn;D_0|rL{V|x z2O!l(K<NmWW?H7vVXn^on=(#cJc~!aN_au)%rwUybakd)bG6u z87oj~%B2`V?o*~7{p~4h-X5C?m$t?Xe*$SUzUp7-UdpXd@!|wu)0viAQOd;D zywwBUS`&!b(Wa2p9nMonLbYY_1H@bOHSh<^t2w)f1 zo)V4;!(H_NP<$p!4$3xZ7 z@T?AqX>EBQc|>(iX*GT%n!DG!aHc0;u;QlG&#fNv^S(!4RToZY_kQD`F@Mqef#1H- z<@>x{>me)D>o;|u^;ZGW>|Ixa?Bn?i?@O>KoW~a`t53G6@j} z@P+x->Hh~j24V7E#5taU3hfW(b3|deH0BeNXN-$iqC>i{#yRdCQkK(&7L1&$1?E^> zVlfKY`wWJY8Q3XPTr=J8S>XKqx@0Oxp0h_&eY#>CxT=h{J5BkR%6|&=;wXVZ6=;=H z`Xn7y2nUqg{fqsiGT^|6+8F>87_#>P;O;i=MB+KX=PteUf5UmqU`Rq3Px~K0Dt-H! zQvKiIKSpuwA|{2L+J7$!^>guAt2s@cDkhQ3n+H@@<4|_<5{oM?WD`;0acJ|JkJ=f7 zHG~N95U0G&NnVO?u77pQ?b&|!`)RRXH}0Uk&V$isI~c6kuo5z<4pUV#(Dkz&hlJj3 zixv92s&pnqq-lVz<3hmDvRnWw02qS`SCEAHihfDNV zG;V)}`+zLheSgfmI_hQOX5*MEVejv;{|4Xmmo;&R!*zP%2`FH=?tK}=P*o5iY^ zJ?YBlVLy6-w0)ay9IxA%|K{pk&w#-OdZ@$Jo{!8v4QZ$WwY)9wTujXGxpBjayBt3E z#r-U!5JnEv4Z(d1HO<)v74znS#MR1~%>ZA|0FN!=Xn*2jqB&_U`}mnxW1|hNn*~+& zINesBy!YHmJe(nJn{Cw~E_0b3vk77Vo?rJ7;sfa^^E~83p|;L`xtcq#(%&KOL{27J zQO1#%XCWWd$?Vr9InD1p9lKLoiYX}RUve@jh5cErIb$EybcZ^j$|12>xLQQ(eJIVn z;$Z0PGJkl+c}A~H-f+31(@nVHCcpc*n4F=v%kDbfuCXmQUBometDQVEHB?v=pnzJX zdF^d;MLCJ7{_gaIYZPc)oEVp6@zfWPai7e<6cJc z=cOet^V6{V`Dqo(dg5l)DZGn4ck{s0Lw^zSs{SJ7z+Ya%<5cOhy|uRPS^80|wm}-P z6%#tjiVsnaE4s^av9){}hm@q^F3AQJ-Xklksob-EqF}!#RdwfMO{&EK0K7KzL&EZk zFYKNdI;uLX4OHc)!K#uoQM8I~oncEv9r&czRjdgf!QKD+PMStxb z)c=*XJXY_$Gv+QZ|K9I6_;=3|6zaGFje;jMjA@RP3qM7Q;%5M!WocJNFOjN%f3 zV-Y!)WcoPbAY9lf2UI4o=UD$;+aO;jT)Io#e?6j}-23`vo7Y$zb{TTXm5&k?9yld{S zHp7mK-#nCuvCCxbL5j_H#;Bz%P~Q$^f~l-A_Om9c@Te9{R?~N;E#d-y4s^3*NIL|? zF=tQHz|lQ>3pX;j@$Q(uwP8QZ_qL)=Oc!uZCdvd!JB;WT5f>3&H)qcjo4AZL@zG*Wxw~?OK05 zrt3JurLz^ga2xLWTfK8mRVm_XF?7HKXOLzk(s^`YWG?v9q%_uBxBGG|5I@54GV#kY^3=hJT3p7*q3ZdEeiBr`!JR zdfu73TIWZho_~Nir286mT6`MUuLv71bUbAj^}u5x?K{~Q5IT|0KBuQD@h?@(TfFW)GUzSkW~kQ=z;9;{^P{BA zoBqSR8+_q4CNWsJ=-g>9v>dXlFLC#aOMl6!&^e>fFn@@{1J4xWBxJ#P@B4A)5gjsG zy#giKoU(O9QR71A9Q6YEIgb+vCD-AvH3O^a{d)h}oH?5)z6j!Y4!Q}Ute@P|FmP1p zJJoB=l%VvY-TAC0Sl2*}#6RBaJ391)wcdTdx2?|rl{jw_<8q8k`R#q8S-ELQki98r_XVXUw^x2#QkuMWpR#-AJfHc*VxqQ_@MWl ze)@f@?_zwjy9nzjt;WJP9%~RHA#fon-@BQ5vB}VfT`p{BA_^hKViCw}5Nu=V!I*&3 zljNd(yR7DjRSuH#e6NOtPamHqqV>O7(_b0w~|tOAH%_l`o0$9guCocrZ?MS(BfG;jAK zD{wznh+j8a3+YW;_Q#y%Kd`IvCmycLsecSl@$P=HK>1^MiLW zb_TTy=?R9t@YFnuB-_MW)b4EeV<^#Q9&(yi?>6C-fqV~@_Y}~|>GrFjL~~}Z;XnWE z^V1g34EQbwK7mSZCNnOn90oy|`G1bAh!c-3Hy(!e_5@5S;(#5sKez5^IekMk?}XPA zdLGVbS$Pmwn}}F1jO2|3%k&yb<61ymt)s6;)HTC80Pk^ z8hhXM)j#H!oWp~ScIEfFT=zPjm)(!`-62ovU9YFpbUh_+<0)0kZH_VCu760JU9sjE zn-o&=P)4BgdtNpJx5S$NV3(`hXOH6`F0C?6teC9E6PPs-xs-O2VjQM1Cz4=;5T8^# zE(^QSW1Ey7xS(n$31gx|)a!E7EuYA^-jXhUH-6HKMnA3T>S&j%USW8}WZg7uyZwpz zni4ez`L*7Bn84!~vBpo4HGg*A_1Gr|Z-?<=bP}uRWTn_=@DFe7o5q$#mi(e?PZu>$tC)i5?jLJc>?qyZ~BU9 zyRQw-D9LA-b7?tkMd>NjP!7A;QV^NsN+xn6rlXW8?}_PAPk)UlCx1XUz|kL=_VaK8 zvN{OPEB6efzZ>q!#oBo8I3l`;s8;09u?|@G9(3lQ-#_T!5%v`j)Kekj7KgEucczEH zRxH}Q67IFE{Bv$eDt~ZY(O(}Mzc*BQd%u5;Xum;pnm6uh@=r<+X|fg1c06_4!B%|zE#C1YIFU2`DP96K|t>CMs^Y8ioy3Y4Ce)z}v z#yk2urovj$9BQ$3%4#Pr!BeG&bR)-;eiQU``N#gK#+6f6Tc-@CoP;Y-@+{6Wcand6 zh84yNb~(0DXw3b&&*x(htv{W9&o}l(FnI^}b|;Dl^7wb|p{8Lif7iCW}&(Ee;}(|9m(Wo4@a ztlc@RTM>w;2@kU; zJ6uv5)8c*1QTMUa{c*)PtF9VFCMLhu*vjH||JqQ{gUlI~jSI7&cmEjcEpSC3Ovu+r z^*@Hx^os-0^b43m6-w5>8!HXce z9;vRdCV%LjyBaxFmWy$F)_;oOJ*Q&{3(Oz*>K3z@XIYuIkXz} z-QF979?^<;lD`1%gRjv`{}UXpLJkQV_Ft3>a%{%kf5v<@`T!o|FsAn^kG6S*jAk3^ zgMdP%F_86k z_=S2bSPvIxFHE~T=W98s-{f%Guj^0L%Ala}^(Y04jnY z8Gr6&;BMbl0gd-ZM{&=F`V?MsecNISw=?>(1Zn2*6spkVpuBTP7}^>|(*9@D?eoI$ zK>pAD@c(5lmnoH~1D6q3ahK?zcmFj~q!idARrRL#H965IXU_AtRTXit?}9DzA~5MG zcA`3k?H$;AsvftmO!a%)?eI+6eBDk|gMX08;Dx9=Z!3{)mQhnm!X8C=dDqvQaRrU% z%!?JsDx*7oLYbX*Zp1eh?V%!6)e*VyGiZZDI10@pgInb9eWDzb*PQ{`xA#rw!(Xm7 zupgL%vg`SPz)quecEc;p+PYEkgJ&sz)ZklAiRl&8?&rH}9KgT(vAW;=TGln!%6|=6 z7*WKb$qXk|IjG03MX;8L_j(vmgHeE`Fd|vSMkIB}==J@)vgu#GKl6wa7k8me6?f-) zOedv$fsZ;RM1L}9woZwn(v5G&@|jA>SpIoWh?b5{hFfi6PuyplWQcVNZIbVQe#VL6 z`k5gijdUTozkY_3%I`;$<=4nbiGR|R-XBW7b-ZHR9L;*z(|#!ZJ800~H+8B-MB4tH zMyLN8C$;wB7I*#DfBLx|a@*q5v4;}!DY1Sb=hee#2C8sD+h>qoA5c&6fckNTOk^&J z^2)_oPePE3`YlAvxl1>XW2m06%qofAA~Q2kS>e4s*AApPSsF%U#@-dDeSfUal!6Ta z`_L9!F13Srnc3-MLz6+6Vm$`(>EZRQK>QDZoIKAydg3y~mL9UTlj9TGNv&WAJTy8n z(Dsqb4DPVQ$947BYYqIWo5jC5OktaAya;oL+sgD%#`pmHAkg(|Hy8cKNjDcUQRN$z z^^tRB?t*ot8(F?gKS46CR)0HhhV$#psZ>5!@;p0p3VUn1%L@9go_l9yA3ui>MUs42Xw7pTOxJ+EIq z3-`xaplmT}AMG(`iT$;HV1It#It#=JZf`nNJA)0p6BpU?xGs4W-hXn=xB8!Z)4PJg zVH*3%8hlOb(_Px$Nj8MQP>%qyxjUn_2OqVtQ@o92&G#KuuMpVYg|s=YD!(!HrNyx0tTTe(o# z1*l>|bL<11hZJ%SuYV=XB_By;_CEPQzbV|IUjD+=1qA=Y9!}sAw)!LGFYUE&{d|LUzUh$M8|=zYJu&=! z@}}!unfoZ|dPXaL#JgumQPP%=|w%>U!L!K%)s4?1{(w+h^rm%s1s!7H|-`cY>^TQ?^yo}-<^tG`Hp zU`V&w!K3-nQ^5=3lKP9>rAkep#+Xb1W9*{9`v(eKp~`Ch578w z(^{E6No4s_6S{xA@12H%(D%$`FJn#sb+ey8nJf?&d9cq>9HbwY5V#ZRA{uGw*ed5E z%q8obc>Cpcavy4td26S1NI$gyYWOl}r#dZ%+jD@t`Mn-+%xT~{+x98voqgYW^m}gR zs+dtF9EN>ig?Y;kW#CtymsDTX) z&b!SeWQQLZ9n1|^9TpC2=o8SzcRxX0WLx|K89I7skxx=w}9B5;7 zux>ag)Fq66^k%to^8fU+Z~gZ2w2K?>vA5pMH@JWHhTDe`C7jWu+(KNB1LQmCFyck@ zH6S_mK|=kn$h})a0cH+9eRW5;D*1ZDxdT~Fq>P-+$6F*(i&vZ})X&ok!Nyz^_n4{G zoviyK!5-UAJnT&$-}uIDf2><SICGrjP7pH%0zF4Uomgz5XCegFj#5si@o$U<5Gyh{= zYnV$@H@o_dywiR`ZuH$Zk0d{S*X?iT>>R28?H;aC3tY=%|8qIYVN5AB8KqCb*T8a| ztv3`BIr+H{N*!k#JBXp4_gDFUdEdBGs5MOCGimEQWr%-&Qu!nbWVAeFoQ#K4D*0)ZoZst$&kksB;t6tX<7F?DJAI2D(|h3)4&Q+jqw}()5)Lci($(XODkl^PH&v z`D{@(6d%ZE0Iej0e18vPn~+H&PkCv2x*@MY7xh$xOB&{sK>@Y?BUBq@~-*F&v9})}uFR%x*Z#pbOjD+N%cETH4{>Od zc(PiepuQYONBwsiXX5|TH~Wj9+3&W*Zhvr}AU|Kd2GKbzY?}B-ZA~GrcbFRi>;WXj z+CAE5Ci}hKx!5txt0ba$xz`aKPy?6yn}LD=@l zrW>navN(T6%3$vh&+P~^|4l^BP0du}k!_)%)1%M&{RQrCM;j}iBG${;Cz@7tI%t0!~2y&l8)Dte;v|&UR zku7WLpT{$M8}6oB_%;kZ&>=SFW!QTg;;`U`Qa-SMRkDo`}F4oQIkz>1#ZcN1&An}Y@M7!}%44eQjWisA>>Xq{ zXUg)S%;!|qlE-68l{K|N5dW0T%;#hyIW)d&iMhQGQR_`IRiW;qPfLhh93O;TUl~qg z1qJrWnd<-fJh5w^)s73?=KJBlJvV=2W_o(d+#yLc+$$OW*Ylbd6r@D`WNtLul}2a| z>QW)j_5@%7M;-t%`8o*C`H{!roxvWQxI=Wtc*0|}ja-K9zX^|HlqJV{{c-GLeXrl} z8R8+5{^-k}jy|FK>W#DvA*)HEP)KX@^eoYS+8+>KdJn*h{%6|z)O_9l42gfc8SPBJ zb)#gX6MMb`*O<(~YhfsIQ)uyy`z^H0j?BP)jCJ zwofHCHywNsSrM39#GQ_+{(E!%yY{cY3;1^n+gSY$wE*l2K^Vy2034S^m&eV6l6ZLX|r)~9&;H#mw8ws z3_-r}ALC~9Yw(I(*{xjk#&1rqSMHwfbVyi#^a3x{9TshTqW@)` zi@$mvl4Q89)h~VpFR(ASOSIQFJl+0?K$1Rv;3=h|a$@W`|F3iFgI9m{m;He_HA|83 zrK3B%KS0}m4R!4pVj{({>pe0v`d(0!HgX(g?uyyiV3VoR0nyGAQ(X2dNxq2TLouce z4s?z<(1Av5%G;`aFV-{g5&aeFZPAuzhxQaXlW3b49OHu<->SRsfBwC(`0=-G!=>D- zvBA#l4IgVz<9Of|G1-5k-r4v7*L@q2?(e36U5$P}XZjzFW0e2(*V;bfm=5x9$Mm#< z@A>IBhS!gKyMM+#Vfr9}2A18%7yZ=gZ*@+t`d5sm%U z1&MZ<;jvZqi-UbxC|CrBlx$qXKRr5kRsr+Dr&&vGy?apEtv`Ql!xTi=j(z)lkd;q$ zg=vhW>ilf?-cr+ht3Or1`eXFzgwa=<0Q9j$ZTgvDAJ2BIPK*1`{*PzPE>YXNz3bUv zGH@Ai$c$z8Mw zZJ)~AwcD?;PnJuzKRAZiKSx#M(2kj7;)oayz7>$=JlTIQvxxN{KRGCu>Y)qs+-h-H zy!MIc=Ey&AxbO9sAz8+_k4FMB?6ci?6l!;e{iV@|KY`o-7*9YxJ#FUVw8T9_#vAWk zV4ah}F1y_Jw|Q>jwR)=8eSrEiW*^MV89WyQ)SNe8UouO4b3m^9;n#fG=`Wopx9c(3 z?T;Zfd6|FLT8`*GgL!4_!d?{tQw3S>q&XXqsG6&WLYfaKU>?L-n)7@&xGQwtaYp#r zBOjamPDK5>V4}2?P|g#Rs?Mk|=V^etefNW>d}kl=%)6)Y(bHy+&?pV5dxEI{b(g|C z!*MK%1u$sq$b>N1I{Svu*_gQ}Y9=+2DWufcoUN!E7hDnpDS4E-0shoBj!9 zK|in$^sQEF*|0TZh{##dzWNw%rVVw}khGY>I>KIh*qe>@@B6U6d_dZa)|z*Cyjwgr z1#A&Uii5F}7XdfD734{vep@1>$* z=#GELJfh0fIEkp28P{&l1N+`++8|Ez-qd}@$Xj(3pi$ZX8}neB{!Zui-hy|u)6c!( zbo7Eb7a~Q2`;*C{hOc3;aW5nIx}O_cU4;_lgUgg-uy_=8Ts3zIh-rT_CifGIOfDby zu=#2Gf#G_sF+Y3MDB-cf%LdrLF<-wp9P58Q)yVUeG=ge!tto@r^d8^ySM0^yyTA0F z;fl57lL>MzwwdgI|FC(aV2cQQPb=Xt>}{k3Sy^CgOa~#Aqk60=iGDrZup`k5{V@MA zy9{btBR1HgvhgVD2X(yf+c=>@oHJbg%MQPM<6vGjao|_&)L^^sk>fTnytsd1!>61R z>|1-BSws#)qTH+BZ=8wpwL)|2sstK0D%Fw~}wKoj-qlst)IU%VTeiF2?asj=6}jmt2=)_~z3S$qGWXEWA=C zZaGfu>1fClj~10RYx()n+iG4$LS-KE&=)h*ejQKyU(w6L@UALbRD*gouR9+4ELwbx zF1_6!zZcwO0-C@!U%Mxf*(7irwSGxU?0=>y9Lm=fw^ZA(e*L__{b7GUJ!3C!EaIf| z@^u9rC%t-j(HD~oJh<^Vv58E~edL8erFRD}7I*;1Z~RE7tJBVF9Nqem{Fuv5Yu-zX zBOLRP%4}=?rb13;6%d*GME{(NxL!GBO{Nd_yRX+j$H>8@R12GcSDW|a1qx-on(u2$ z?;1lSQZxmq=vuk&Ky!cau|6*U9P7=mxN4sL9^CIV{@wRa&i2gX0=l@hf)uAK5nRQLJon)DyeFjL21kYhYHzGpr_rEvBh_NY{dm81KI zZNAt3nyR4>o5!aAjyb20P;~Sae*isKHJE;L4Ajf^)larpUExQC@px|ZE+G9~JNVZ8 z==k8O?e}}*n5H=a@ZXTb@I4Mem@EWQlWY%fQe=8hq;eV7YSWBNl>bh(@s-^AEvag( zt)h>id{x7t@N9p9n9>kA`XFVebEkUun!kRCqs!EIC6MjZTZY8PxsR3VqmXN!Pp>w zoSifc%8y0bng^i^d2@^OuM7>md(6vuUXnC9HBi0t{^7!xYCaF~L(b`{W`V)2B ztY5YI?(BcrCH@(I|7;el4?9`!>r34iy0S68FC>i3dUSVwP?W)JxovFX8p_%vKT;Ko1b6vIUx-;4On)o~S;q{YH(HX5lCZ56d2|%hLYUg( z@oDYwe6h%dFn)`!j4sjN*1VR0HL*sJ;U=*1h$vHwLXEb_OR-OVan<9Yx%TF$`xIke z-mN%BsT{QPa?{r8>}iYhi=gE@+-z5w&ANZj-Tlz!LVZIMKbyJ;?z1wR%}MQIFCZF^ z4=-7nl$1Y{>z+{c#=WZ_H&(GF%1Is6xso5?Ur_2$xaRw~Y#hjlEk{%L;y6qR;+^GN zxYHLushmquO<9%^n`d7l5yxbkeUjo!F!lJc-3}vh9zOW6{dkwd$=k4L=-K+9=W~DX zrs0qIP!PBH7SwSxc(smCbHlg&&{6uSlQM|Xd>fWUdlzj*+pD6D4ZCw@uvddfdQ)6S z?7iupXHRkU=!mjz&j7~cQcTLhtRJ6+1M+2sdWOWB6|uyeeeL1CxXuUc^XcU@K9vf! z5{k;d@TYK_f0FUVWAc7_QLik`>V1Ep7Qq?*AA9fCow%-af&QR8U@i&du_Vj3d;{4S zV%~yDWMc>fLxlPDXOs*k>F&MuT6^#BoR`z1#|TEUk)_hDRCCVJ->@%D8P z#^jf}X!tFqyM`0MM510Zw(m`oo;Ns!+xt(0i?!ypwZ3VT{cL;0tY3NQ)(wBwWkfBp z2(bJh)gjsrkh>J^*`os2F`Dn$f_3-PiFQ6|bfhiwgJ8VQD^bObEGi{2Od=)mS5MVd^)c$F!rYdtCRDwDV-CcRnDT!I}Wq(=I7|4eoL;9 z(oy^TH9neV&Hi4&eSIN?Ln42SZA|Z4hJgNoa@;w7PD;o)tc@{ISA36ZkFTYin~ucCb8=OFT?YJc^A+~$LY_8Sv^i-rDEJo(nG z#fNoic>;}aBU~ag;_m4L*O8s$@L>RH3`!6_$yG5KO5zUQydrydigJGkl8w!D&mxVF z!F_y#g(;j=$H{+D`QXuV6JjZ-64qvbT)s;`f%f<%@bqnx3ru~-|=6&JOh!AF?P_F>>X5MkqLkL)^-f0fIQI-#KG7E zHSL_7hYqguA*I&-<6&s0CXtp+gh7x?!GVgOT+~bl!s7jE}=i|gy z1MdukNI52p`2s`|6XCs<%6)?L^OYz$rzq2fK2DVq$MWJ(pidVAE?%b_DJ&BbyK@u? zYP77=?rT{qN}zud`aIF{AKs-`KtBar!3Nzb-}$D9OxU+|VyXG*H0D5%7Nd;33V7Ck zPFd?;!@9n-G2f*=pWkUeE^4ekP{AIy(T6E( z{y{Kqz0;2&4=5GE!`DO=ov$@=Nti3(0YxxRWevx2M+tw7;;!I~A;w3&@J@;vA*g{M zoue#~g9@=j9ubonlLHH)oTs9@Nq|`me*5tOKDoJ)h~H*f=ak*wPutRMzvsryyRpmn zeM$1Y{u=y7v-c`*{yUdOQ}io9v158nTBR<9Dunms_FlwP{0sLe7d4Pt92pJc$KbDoT?=;=Q9Cf`Mkq8AH8>ZD?k_yl_^!z zu<#VO%-o-YM7(H97QJ2Lcg8irA*Fo@37E#6&*jQFs?Q8=yrh!-a|}c`xX0&%no`P) z$48FZ3N%&!9RC1IR?{0I1lonBAloOs0eOA<8k4|jdW zd+hF6?=?2xt}Scb;v8H2!%siQw|I5w^_=yy-L(Wd(;`q+_w}n6+x|hKQG)SDfZmCvebl zhp~ULNq`hX&h#x~Dlw_bX5Ji-3zUM8;W%acW<4;y;y43XfaY16>p0My_kY1P++%ld zaXgo_-Cuk=Lu+uFe~u5|@;YdA;QuPN*W|r0&Kzqa#_!Sb%Yl4d#u=k>$K0ny#kwRy zJuM1npT*e}^+CyeS5V1KCQF~fNRA!mQILQ0FUoDE1{Mfzn??Q+AO{D{BglpmQ}xqd z@rih1O(ZKJ<*x+!K+Aww7_+PK55bZI4*H-13iFG%sZJo71;z6VCoD6C^!O#+Mnz&W z#3)|sgo`Ah5@Ys)lC#05Ws`S>yq>?s06Qzxq@-F8vBiBBU?abh<{8&9P`}5T|G#It@-6?2Wq4p zM*Ef85|Ns_nyBFOwSbbU9s0&BJ}rN%H0Bgy_uXBmYbs^v&_pf@JRG(IJY%DB@~}cI zx77UraTbKAghF=QE(HHAKKQEV{jj^^Ki7X7|4Dwum1H-);#QZk(Yb1M&WxX2jCgMG z4PVh<=5KhQzl{&|)pIm~G`2zZ1dMvV7`T>T0|ZL?wU`Bj?L*}0RgdCZzOH{sFt)ek zrYSQ=R9`Mh)D^!%d-88^X2gRAe?IM>03?EnkBC!!~~vG8hxJqV=e>rVx2kN&gKd^*JgUOcls#V9vv-CD@G2 z%Q}$dhlyDEq`L5^;?uctYn#Hl@K+FsiH<&lOWQt(A%b7(eFydk@s8iHi&(Gx8VmZ} zuN2I$aRB%E{wMe3O$VR|r+4jTe%CneUi~RI&0ieny1$%d+G@m5Hr%o;or_B zZk(&|iCChFxt-zmtp^j-Xt3ahQ`{AAlyN47O(o}U_vIWL!n2Ctf8J)0jeM%LkQV-U zD+SG0Hw>d|UXCX&M{`igbP_***oYZ@AM`ovezoIPH<&?`Sg(Im-i}#_wX;Xy6&(MtzBM(6P?#PQ_`+(g??QY9s~Up3i_71YvD3?gaPcrfruDGStV( zofokhOXQ9z9Xx-lgv6T6jG;N7?K6;kx9Jxvj#x2xT}@dlu{n&;wLU%1{bg2r}phIr-&z;Wu5T5f+KKG^iCxm0I`B~a&#m#^o@ zTw4R8>nDexn7YOo23OWZ)p<5A* zL9{VWyHMoKH3}jtPTz-Ph_b$e8W19-FXf3!NE~`5hw!op@r<1q2HA?+UI{HoOq)+c+7J@7yc6K1lC%q}SI!v?aRN zb%tJ}BWHKG9??60@GrKeTAhK9P~#pmjkDFZb}ETY4C7oO9x-)E^)axbYDKmfSF~c>cO!9Qix+bX{vffyGt0G%h z&aL&JV_mjPu!pfeUy&GVnNpF%#!`GtL5(9AZ7Ie$-#9=dn=)gx-bhBqmjN@CLFe85 zr2uX*I8200#W%Y>*?c79a}Ya}gwMj_F~-y91F!KL3?n!UOs>cZ>x?CK82u z=jnfKEL7H=7qsE_mMz~(qZjlm9}D6in)x#dr^=j2)KJ5-{(*`9XymQ}&Gnr{6UlLJ z_M-WP^}L*Hq0DUaMI6IzLa>i+_h6H=;fKKz{VX;WOaGFWHuA$FHj;u`UA{44sc~|x{qN?{eo;D+@kG*9_HlnfpHA{VKA%r+ElY@DVqVzu>!UQeDjoVh zq>!hJ0C;Gs(2WR-r}@)AXt!#(Dw6UQx8RIY-t1BS7j8_}Gq$~qetNE)8_P?0A*r+$-wTpj)Id-gkTF#5ZG$KX#z8M1%4 ze41^g%hu_oWCMpuj0Ka@n>|0e)U{S|;=Q2{=KRS0GIJqf=ZzXz4E?M*d-7%@m(5+5 z$wpUhjr)$5UBml-<<0oU3xMmP%Qsx-mAk$&Em664?H2ibHl5U&g<^P24i0%~yzyGR zC3{ZhQd&&0*pJ;}Tz3hT68%ZG%@%*}rb;Yk576(*J6&eJ0wi@l6QY|(i=`M zB{fB4`y!2z=Sshy9E?expFC5XO>@_$)OeUDvsnN#W=i7;2?0Jf#o{HZGs8Zo;rNnq z-!$ks5_#E+ta!GV%u*$IgvhiLO~u|Wi~uo#1Rf`iU9%dN zI<7tart@*-YyF|uAh7Ks7+)$We5&G)y=06?={7je#DoK$=cCTM_mR_#5!4Kn?A4PA zz3L&7K-wdWOR>aF-$b{gxcv@`ulOG(%xcIvG|#q)8rl2L%rwC>Fm8W<#D|B+VrL=7 z9QoKxQ8oRv)A@vDMKZ+Y;QQ$HDr@!$-?`Ou=uHI1Z^qlb((hAO8smk(XYHs+7=3@b zTy^UyYTY&Oay$RCM=&4g_h|7?IsB@{bP1H5P=RZ3{T`J1-bL$FKD}*WHbRW-XXliD z62aL`LsC;2Peu>WRoH(n)4N}x)fj4hh5$iF$DN6Qc7jq5xER|4my{m0?>l8=QXg@D zfc*MIgwsDffBG%P4!-jtLK_>?-8{d3bssu8FAJYn)02 z9|oNup7k1+9Zp4z^Kmmq@>uim-uAlaOWOt$nOH!i;S${1s8IvA)Q%tF^Ce=nlyCL+*8am-9HViZnJw5E7U@zJ2T;kzeWMWAlt> z$RJ1YyRv-{40#hRJii@4g0UTps3@B$%UMm+4~&1Y#xv#lY_pi-Fs4!;q5+v(71Mi5 zq5a$m#t#p{r^x?vJ&(L`yxz}MZhoX2FT&M_eO)KV?lh$rkes2eyz^>=Bo!d5(37Kp zh|q~-U|G4!h#VB2#`*^&Q!H|n4u96hn=;Vtchfbn&RImRYZbkQJU6j;Ehjhf_ezba>9+bay5DGi zv?ea{(&(uBa$*u`e-X(TNJ&qkG`SheIaDq|9X64j^ErRBWWZ=V7a&U$)bb$_dd3Y3E4tZ{Y`u!fZc??aNk_yS!wn0b;>$&_9XekxBMI)yRD33^5(Hd{HLG5;XlzF!Md2XY`X-6CXKjh$x*4QV>f{)kryd?|Oeu z$onnM;BiDf6&x{Sl_{w2{0CXX%{~1dx8KAQuqN*5xsa`uDN>gxi!SR~Z%S~k3zk$}u_K4Hjp^^qxQl1MJ%#*J*avrZ zr${^5&De1rUtwR`I`&stap(9@W*kN2 zkbAbp_aL_t^(Va! zS3Z^(j9dE~=K{_A{F8SZ65N*p+u(Tgdz{ev`3W)_$<#!>#ybzY!C=kktN4EluJv+9 znN+v$wMHM6S9?36K9K)_xZklL%j-Nd*Bppa-1eJgJEs*Q4({>icz$t>*ZmPYf7@&I zEt7xay-shMKp#Kw7Frhvb!@yPgOZzK@;pM{CL}%+OVnM@k?R%|ulf1>9u?{`N%-j1 z!tomYer^#Jy9<06!;tPdgOYzZ*0b%K!LwMOcQahC$qKo7Y;PUV_i1H#r4&>!`XCTf zQ<{D0EOBkjq~>F^Df$-PhR+-F=)6Qy`h^$;QQ*)#;AS$*P`k#ZWzNVXbjsDrrf_mi zb!IB_n37>MQz;}y>BUSijX!>W=;D6*YlrLkKJw=3xc;fQH1&Lq8M=SjNi5+1te(3& z2e~B1I4>yYVemd&l8E^(IG>$&M(5;zTgP4X2fj;@BH!in@%%rj-y4t5b$lWAKh$yf zO<(fQ+(+31T(pMBKht=Kb|=5#Hp2Bu&Lo?_b%^Gm&ZfLr3jryo2U3`-}ejgD(wZ zoSQRVJ*SJh#n9jx3c2@p*jEUw2%*zgAUG$YihkjlT|2gGq4R%jmRBUgF->J*ESO0P z<1@VXYZ=Lr@ZvNG6@M0Xo)EyilBR}8m`UcgVlj(ROLu3R4CCH+#~IUoJ9{y*-WXIL z)?K)b3-DE1rW(2MJRm*AZ8PFTbJ!w&N;s#Ew|k4a25O1L@R^zv&w*YIbSqfa5gu-C>42VnVtH^t;OBMUMw(1D`L+F0i3aKCWZL_jZ3m-V!PP z9Um~%KQT<-@qS6~#y9kbc6~uzx4#&Ne{v7yiQSw_>a%O(KJM#1`#%}CizpHzu+k;c z&k|WVX415mw~p@Z%F+p=<<>MaI!Va4u{&fpdto_SgOqb4~e|Q{r8I=zjb>_ZwqSYQrI4mql`p9+Z3dn-`Z?q zvMEu!DTS`}0>T|f{L||qf5A6fsd0V?R``4luO^Yf#%;dSq$WvcfcgmLlY&(1-~hUM zwEn8rBC>y^XmudmZ~WI_wpuuLs}V0Utu=O=Lx{ZENqytK_6jO0zw@3sAl`?_c}vCg z`KKEr&{yw*z!J!=Ir zMQklGl%5AHc%DA5&N~Nr1W(t$2jtJg_);)jA}4<{z23{Xna)sy92i)W2d$s+P+^>X z0Tl7EKjKT9eJ+lFw|+o>#Fzd-hb{RvUiUhFUiCT)Q6@^cMDPx%%p+fYn*i1iM?LRH zkQHg_W{7je8|pkoH@&uHHHK1BBBW1+|NN}D1G0+gq>ngY)SZqY8kk+=ty>|k5jEH+ zF<^h}DaMy(K*@Wr{FfSIPaO9N9zh5-aLXi|b_MF5C~L93#-V<->-!k%>zDXsoHHBW zg@l);kN>+hhqoXy;3l+dd`=Z3T6Y>TZf9Wsc3%VT6QI*drJ6GV$xk&Nqhifk|F8Bl zs@;$EalOO<&W*nyy@EO~qe=^T5x~6de$%2_ zgD`$=hny~R8Gll}wqjYS$2`%hsAcu7w_aw~%jp$Lj8yXxr;d(%&^9Gjgd&tG$!2pQ<~ zM1}B%zFg$l;0#By$+?9YWwXBwP@I3y{S`-{ zwHyk|ymMrK*O_*|ohOAi%yZ8FJ6+nd_%C&BZ+%C7eky$yV38^BzQ;T;g`5yk9g9Y!aa%Q{t0(F1b1;O=9u$4O|q-U4Bnw}9yi2coA$S=prQ6=)p^X9G* zeX_)^(Qm$pl_S;MXMnLPxYxbvIxoYYI=Na8_h0Mewtk*Lv+kt|om9ck_yVorY}|Bl zzwb}*JmGd$d)HJD|KHk0Pja-01Ef<+Ty26PrwsPt>M%aVn?R%!qw)a-^aO|#rcCT zk>`ZrUbudxGNQ116~%eN(3g!oW{9;EX*{jZly&Ok)h0N-B(r~aNG&Q$OO(uF=f*aK zmEb%zaBAVPHS#03HZzE&^us7w*+rU}zzI|7VGhX@HM*S`ZPPNT;%i!n_c7r$RadQf z;1G763_Op2kWhhZAa6p;8IG|+!CI{UfCs_2?8g7p;yyAPZtdI7Q9;9j-1wS))mOjS zNdF#J|C|1L8^?cr;~miB3UcVRJRj}2)T|-Yoy(*-J6r827=ibxY<9-;qNEOA)Y=5) zFctXTf4^+;@?u)N)HuLCDU^kzbxIp5T z!_nNXBp1`Om$3gNL1TJlvmuG4KU+zu_K#yqt zp6fQ88g&}qeHdx0S^O_K=bvN*B9q))`I>vCtor<{5|@9zn;5(C`@7S=z<1hrUZUK# zx;;vDJs^Jqa#+i;WAg___>`HCIpRv17|D-k=lp-$`{Ta!p+FvhGXq~@P|JVU?(o;z z2J4@^{6Be=^uF)=9^^mcW+A4sj^Q+AMC+U}>c#swQO4xFi?@4eOgekYDf!I8zc}Fa}D=4X6srlU2=~d6ela}$)$%Bww1|8J< zLH~afF+nvt3Al&QdI`NqaY~iMaYO~0^TKehTDjMK+!IIt8Poge?7y|csNF;Vk+1g0 z9`09euC?3#M*7#934dw7BeI;L)?;c?s0bA0XR--GbM(rvp3Tlv;$H^lZ|#&XQtR_| zYYPAQKjMOUzo7oX!{mE?Lo*g^uVX@3BG!MQ;<#T7Ol-?tNY2fE5Z7p(hfC*4nrGxS zblbSbZ1z)g6YKM;1$RlDpSX?Hvf(vlTNe<6hyfX8_i9rgF6yu9?{-~SjD$I@p4?jF zSX`3xL-@6unDfofqwRMUa%(r`vQsQq1Z}zIR;1_Hn@x8iJ|PhI24oZg#61 z-{b-J-5W#etXsH-C0a-KHfG7zr&@mpW4NuYn`^CSMYdI)bUA@@Ml8y9VN?<(WWr$k z`~~d8-R?iB7)|KBdvsMfr_zm)uVtz%SI5LQu`UQ(v_IUGVt6L%E5a+RXSKw$egdY` z>42ZWl3(>HZkouM6J7>p+b^%*-E^kE;i%j7*sV>P{{L;=e!1~7WyQ5^aV&ozwhXo) zjU(cHJUX+jVK%m-w4^dLg&DjNNRuER`5*&ffe`acwq=pXK<>!NdR*C2wxwc0#CbGx zWRk~1nk-K((*|;w8dF-HL#h(XK4&MhU)uD%3#bSxuM7hU%oJ*j3sc7VA5@LXBf%hlg9yRJWGH=c%X z_TCNSEC1^>$+gc=23mtQ`5M0O9m*jF2Ip^aE6CGRT%Hw-kV7{>4i!)=WYBpS$=lz+ z#`9wM8lE+$tr$Lx#Nh8~&QI}7RUP5XJy41570R*Y+pE?|uhoG-qt9WHP?t}Gjde&W2@&?yaC%v#} zxi0$-wgVR1uKQuq*XZ>a82YW{c7Lhyq zvVsp9Y2Mlpxe8)wjR;ieBOXOD%u;KlaXa?L-FWftYcmU&j+>b7=mhNH3Hf0ZkKQi! z0lw@nNV196#k^$wY7Gld4yY+cY+`QhT>GE#tR$mTX54kIKlp!goHJp{f!ha88V&~3 z(_m!2MxQxyUnlwu+DhcsBw%l(cYzs>?Fl#4%R_&C@?dl0yG9&Uia4q{9+DxJo{(R4 z2!o5I{r`F$gzfgtH=W(I_-^~(ZsU^{R~%c?V&7adK+M#Ymn9`G>epNrTcWHfy>wkr z7>j2R8mXbHSYm(SOQOv@y_#&imvV8<)`ZCUIjXwsf)BC6u1vo#0Pn71#{U2E1YX=uHp)jC)4J=lMjrwSCz| z3FF8XlYzw&O*?t0K=W4^A`IZ~1M1)sFuyPP2F8O+ikZp!Mfo>;EUO z%FBPZ&v*UHU-w}xPFHg+Tr@tk4#*W}xBjks8C+J0`I#2v93Zcn>A8hPsu68DpI3ix;uxtEU_U@+J_you+dmGno*n;c&+q!apHhuHnrq?~ykA3s6pMQV& zGp^jJ?Roa}sB5_P$-J=|gjmRR*{al)&x7%L=HJj~T=RAvu za3=p=<1sZt#A@W+f(>eCS0Q;U(cgdZqg}#jLXr2HfSX|Z*mH?#s82&;V?c;_ewamB z!&iHdh*uwhUBdehzS`{N#t(6=>*TIErrYN3GtwMUIJO1zKj>uM@zZ(_9Q3bxrkXS9 z7j4s4zfSYt$^_49llF||rfsJdQ|(vk!UL83#TJ8n7v+<8 zRx`!CtlTM#%QQ`fn%h^upZp+e*+*<3$dUK1xHYj7)P|hD%MpXct(_VTR9j}72yxz0 zI-cCkiN@38+Sm1`0y$<^SFL~4%7sQuC(>};!TZe1@Z^u6C68si} z+_u{v=Bpn%wGA(4)8ButKH-^XT_6hH-3w2*>gF?CMN5fe_C5$}wz4k9*{6op_yRfM zg_X>}nAD8G-V+nE^ECHkWfR0_NLKJ<@0sGk5DWlQK&-!B`T3X?XKul4|L^^_l69}T zsM;y+UNy9Ej-f>Ia4HzcAOe|EugDs|pn>fsChwcrxx?z&R5Khq zxDB!!ihx>>?LEmFj=X$-zqOHr`-0B0$|sUhioQH)2@tIErwjM@GYa&B>rhevA&{}u zZoU95t73!J3kEI+0o?v!#G6F45r+c?KjlC#a{NV{(h4BW$U!(0CC@Za?m_HM^fUEw zV_*s%D)|_jXPA7PX~^cg*onLk)~lwkXYpx8^hiDmpS?9saOY|OFoiZ(= zGP9?n#XA3xcEMj)=g*F< z`LxW_*mn6_R!j?T$G_BiR*(1mVN_1&6xzMO0%(I?cFPC9m^^58I*Vuu<=(717ud1gP^KcmV z8JAx^z2}H+ZnukIrPDJ=E)AJvom$FXlrfNC+8!GfNT8NL1Rk$X3)Uc}ILa z(l6$xqPsV@#zaP>%U+WBD;n=D-kinS#K(=l zU|)kNpM|c`_owS{yemfrMgX%JF_UpOvNmj(%&jQj*q=%E7(I3QBq<)_7t0+c`$hcK z_4A~*rkOQ=HfHoOd6~suGhxl5?#pDh3Fpen%GraJZZ{>Jzswfr*)ExXu9iXf>)|DE zS9|LTUXG9NlT9a1?Umfw4ASXyezbX+?#^q!H@+;)P5EfgqeT&Ijo#{O(Xn=#C#E6Z z)mZp&m1Hk!GhH`uBWXJ_MrNV;r$|n&AI`>%Rx7af^s@53kpV(+39L67^UN z3ETVifTjw@pmyNOzyrcaE+D~08BusG=@!$608IEX5X1{W0owo(uO3j2&l6B6aD=x6 z3eEKq%QHbTRlJV%?%4a^G%~L5nF36fYb@Uw zpKmgM!W0C^QyyR(4ORHO0qnmzBWaI19ggMg!^a3*@Sc+CW+5Agj5x;$X#NQ$x(u!mO$UwBPRe18A_ zdzmQ&nKsuU0W0_=U6-e5f1jzgd}By@7%?7yN}}6i47|ep`uATASfU2DCGh$Y%kzTm zVe0zsV||B+cQZ5SOR|J$*%^Y3IX)_@ByifB$do^Q}GnHl0=N z{nzrCxde{w^TXb?y@hxNW4>u@bX$La)WF|6@>~7?cCE`lL`E#McVP+v3g2%Gy1svw z|66-}Ywx@LeY2P37yC)nzp~fg>|qMwP}_g!K{fWUuOJXQ9TG*R_&3&nMSDF1%>&)6 zKOVjw0zqDB)bDk_Iu4D?=KEj&O-ev*MO4-$n7-T5FFislT1XGU3rM1L2El)Sxe=E% z<`)CA8h(3jVlS)(P7BnOwhCnDB4I8k2gr~CgHB%Lb?NhIG)i;LJfhb}4a!^KUk$vm zFi~eS1Dr01Bmw4}OL9Qc!M&g-RG4&?Z^IEa%_l49fDB_c;c=D+1sm{$;tNl_VG;j*)T^Gx?GHMw`Mh;rn!i1!52K=7WNx| zg?v}o2fl>-SlCZ|ImlJv*nG9i`xwpm#tVo!su)1qJa7}HtS zGzr`YHWp1%_l0eCbY8p5bn``Dz|D4B5@%DlhW%5$fAcAY=^PicSu{5{Z)U_e|^bdb$aLxzm})))a`Zq%OI>|?>t=EtLmkvhOgc5 z!|5%*=A+f3)7?7r`SW7hUBAq|=k$Y*2XaWw-L-%H|NrjemIA6|1!Oj7vjwLF7j@$# zIVrLUU*|p;_7HaSIoWc)pUr)s?%aN)*v;&4&3k;wV_SiLwN=1^b>wsO)0!#c#|l_g zp<-XrQ&6ZikAvJ_g6UIIR>&nU2o*z^iW(yUyffsHDGqD{##liaCT|oM6Z*56&SOO^ zGN_PKrwpYyK)#4nUV&?hAs<`9qNr#HWv~_pK7}a6GOQ~gVs6hC6&_XMeML_ZS}Uke z%1XkN*KEyymoOJb(B`#kQgYvf#0TbXK5GwHD2&-UT7n(TWL`nAna|wioU%2KPTrhi z^aM%ylE6HwWM$@JPHi=|HVf`EVkV)L8nb-i!!EI8UWx1TVyXgtA1HlaK*XG4J%4&> zOcmXbG$oa+DACWOTUc@*diojWLfSpDn9s^(1$_;{g$*n9nw4I@>CUkBfOcR$!5IG21M-9V-j#a#J2FNGi?}S%KK3 zb0I5OnTb^>QirqxxfXeqb9}}ejuoWUwN6d1CS@g7g(#?^6!R{uIXRs{bj@f!)_*Io zWHcXtD-!Qe*_4Z#dYpeMh)?GLl1&9-o6K^VkYk=~HYx9x`8-C|nJ}$@9qigx!YbRg z^OxZGKDeSj)o7xkN2tF9j^A;EW0Wj#!$rF(76%~d5J>8S;}tzcVu z-6*X@R7O@=R?yw>zC2bWsN$}b#@+Ciw&b(2tVnoG7gZ%77TsA|MLZVRu8F5ud|XsO zOzb1#z$Q@4XcYl<6Hh^uqCJ@I48b0M3`CD&{n28#lSL8c6@gEVePYh;*Adz;F;oBg zjAGV&KiewP2pxR?s7DoSf43d(!&vt_AO+eXwvDjKLfc`FY78kiVg<=qf%$}}CUUjk znGcxmS~IIqtOzM*b7dCTr;IiBAyE){Jj)c2dyfPlN|!eED%Jv@zhnueCsovc)spKr zNwL0WUfoj`wr`Xi{3WEm&!`nw$BL9yB?6sp7xkGCoxb#aW;N-mszj>Oowz=evrd2W zea4&gQB{djr+c+p-`jtz$d>psORhP6QZO@?JhrgkRzAfDcyjBP^OPNvISr(L0Fp&G z#+1j38kI&(*siYAiY_GD3$NjSRDo67HOuF54~27}g3+N8K%-fdRs>gQ#HwI`^$~Kk z4~DGQST{*(0|SQh(QYrRNN{gReeYZq`dM*(1`7Coh3_-MX?3hcD3BinZK-d+5i zTmESNFWzGa#2fy_dp7BRU>&ZTF(w}Pr8tMcW?sRLm6=xDR5Ggn`u%b2NT>2xnOWuM z*~nkQ0NV)b`7h@QbdvRK1xdIb)1Dfm4M~{0F`PJiQ=sQVzKPRMFRDWa$LqPxmwFzJ zLt3pRfhb9^UB^YOfV?V8&$j~S#7%RLeKvv63XZ1Nn|po{NN(1Dp8mc1_j@>fbhweN zacx0x><|jX0en$c$d)H?9V09&t(1@?hGnJ1mPGr7DD}MQ>ABg+)=?ei+*rs87ADT4 zf3Pu$bMtK-Ls%#mbgW1c<_oM}h|DxP;kDFr+7jp=>OSW+K1Xc1vB<}yT34_)qXSTP z37)u?Bc)W4a{R}C@ixWr1j{i58^?BtPm08|ywd9m_fIy%+%0R?Iks5iJek^&m{Ydo zwYB(B9;-Z7rm8A{c}23dDD-@2xdoiA?YW+qXg9{UJXBE}yD7-12pwIv9vk>ec<_C+ zRe0fsr=XtWywbGQ5@2S@6E_vEH5C-!(y(shInG(Pytcc41j|jfdR{sS1S}Gaj=0*^ z=3n>oda&jcM#WmL^*C#eewMooq+ed!-liCxxBX)s)^{5eRINFEw9(F^1H6cCF6^$m zm{S?JoH}U-4JZ!}7i2d)pORnRAOT6Kj)l^Stm6OhF_L{*su! z4^LkGYhGA?>b2$1yv+Au=nZ!GKDxi?y7_nSgSJ*bsQ|}#mNa$P^uO(`-P0um+~8L` zu@yCf1FvDk>0mwf+uyf+pLw_*`Sy(et`D=fzvwI2KPp-5_P7pbqDM+T1WWWRy@n(# z%x!&5^!oA7{odAfs^=F|t;eLwIVCnX-tgHt@8t7;W&3`P?=u(|)v1~Sv2=Y&T$2nv z+&}1fFvT_Dph-U|QU<5W#Cejqy1&H4Y1XZ{&So=)7U26Yd_N!n#Ke5_qP`&SYLlH6@k;T%7(`fqr*?_u!1)>Ted_lT*L=>>oZdwDyD7cjBP)mCna#5>hPwI;ypt`b$(nAPuu(k zuKDiyuYFpuHvg)?=NpG_V@F^AMto8N%j9Uge)fihVE(tb$zL&!d7t$2+q_j(3D)L+ zeaEs-+q}>Ex!?PYGr(s*ctZj@KbGAb94f{R{*pqI|D;4w9V^c8*==5l<({^AOubI6tjT|^lR}rf&FkuCw|RG1j(gs^uBmGFofV|Z3N`1A z8Tw{$&RU4#q^LbtP)Fj;zpovAfxaq#lP8YmPkazgIu7BEiSd%FEgdI(!iUhmgqFojto-I6_m49Fum99{+aK=I`j zo_|mYse(P0B#?m2pD;%Jq{$(Fz;hQ<`}Y~x31o`w5m&jCAo08>)z=9uC0XNNWPt`& zHrI3j^dJ2$LsI;2zl(=##K-)pAScWx1pW>~ea#Kn2D}MOXL#HF`r=Q| zt?+Mx-_J3gi7}Ys`872lkyy?QR)SPm&Pe-U zW%K+AkUlsWtQ4HET~b|tZV9iFE*SZ~l7NY5|v1raOr;4bi z-~RTy!Eeu{3cvqLyVdVwiN;RU;v@ZlCabUt#_L^X-xB zdj;fz{i_-8)OY)T{Q@?^@gum$51AdQZt+8Z{6`2}fhZxCWG`A*U9eY(D&hcJA(yv*qc9?n5iZ#pjm8yh3vos~ zV0(yn#8a$-*dV@SUoZs{2v);1AR(|WOj8mnHh^gfL&ct8T9FB`XP6GiL|Fg{BSNCY zx)*9x5ECfa!X5k5t(N7+`f;PxZV@m0j#>qZCM9q&~4KBIbdm%zGii(uIbaDZqqMq`sG{t z;Fcbm%uQe1df8u9i}}K!TdU5t&;4w>`FM52?cPTrZuf5J?cQ;cT+=PmmMvlJZ;NTr zrVDvXx6C#@5p8;Odw#-Io4#hZbQ-qz^M0FuUf$F1&v&hR`t98~7pg=*8;J-w|TGisk7wf9G?y`M#G{Uz=DN7Gw6SHbP~*I-b4 zcFdgUG3(8YYu-Az*~yxp!`rjgtYPm^X`6W-)x)Lfzg)DxW9*J(?c_D(_1@R zM^>BO+Bomt?@#Xcw|2(6Z9A@$ew&`O>Aib;dQV@=Hoa}<^}=k^%l7?xZ9A?r)uvx= zcF6m|J^gY|Z_D3r``~(YfBwCG`fWddSg-6``q+e9`r!Wla&S*C+w^<=<+M$|*WY8? zPuCmLrYCK?KSuX->z;nQ-@W(qlC^dtrm{_M>#wxh=Zm)fUD7_kw5)r2`+VBgU&-70 z6Km0?|38Gi*RHKhx2|_9);iitY>Nz?f60=`IcH8Ra?a2Rmhasf^ZVzVOY)X~JdR4^ zQ6*YYMTgejX8dVi{xko-`+)LyALtd1L7Z>c^@o16acxhstE`;6;(^!MCeH3ni>B2 z&sDZ{bNq{hp#R#p{9orb{Es3%(trIx@S3X%`L8V9O9K0!1w@H|di{@!>(4K>pI1;n zfBH}UC}utW|6KN%k)|EG-r3~Rf23mf*IhW9N9fk(s-KrxKZ&Q~zGRSEuiZT3pG9!y z(4ZfhP4KJ+p_4WGSjHOw2^@be`LyGm5E)|T3XN}TzyZQr#20&O(`eQK?T|@+PEf78*L>TZqV&bIWjKkeUH6!#E@Lxm^Iihc6qzVLq5Y+&3F*M?=N6D6*U)!N~F76bhn!pM8dg%9bXUC=o_;u!&4$eLOoXMa| zh&Oti1{Rlpp*@Z=e7+RmN_-ONVuxzxYG{T`48>W#mlBoGmMCx)=Ld`IGtjK_RO{o^ ztkpqnN93i>cKWL=Cmk+qR=1)BMLQ2c&N}naVOY7itXze!VC!Oc@5+mR7>usmg934( z38UF3>)UHpT|qWI;3qL;CKXYGOUh~E$TiX*l!Xv1B?P8T*Q1S)p!UM@Qo}}Hyjs5! z(8Jnt5H)$$#ErtSMcEwJf@D2^H(H%Pmr9WZC4WcgX}47|n)W}rpd0S0`E*UW<5PxT zwr~V5p6moYA7V2*!oP_0K+#+u`>CWMgf%eo`!rmK zTkdWzajnB9i?BeQx6hmQbhqXMX7Mq>owO0mozZ$*J+qw(yJ5TLes&=vTGZqeKisfE zd!6BA=I-)o9?_tG-(tdMfuU*2?I`Csd*^^8bq{=p|Sn1BsunpiAp1pl<2wOUH4~W?lD=_`GxP(0mw=N67zkd( zQf!=X^^2FFlLZ$?_bM{;N(@`(brI?@2K96qr!SH54f+WfTelxqq)DZt{iTx`|6cH^ z&gYBQo|Bx$fIpiqmAVWb7r#$NP$QjyGqWlV_3QGN{*whN7PLn_V82kROi;HtB&*Km zJ093Won0=MK9mI+0=Wm5S(F7Ue=|x-GuTh_lS=h#YwF%GWty*#290#7%$X`@qLU^1 z)8F*iO35)iI}*$p@4FS^h9u;?S%|CQTU0QGeQ43M2dpBXHwKF`RipGQK{@e@KzmcsY|$0AH5!ZnQAATK^h6-scm(juWYowEYd-_LUJB zv0D)NMw^tGXig%gCwUY~n~s2qmyPokM;gxBP+cD#I`=l3q{uE|8|j!eNgc$>#MR~s zOF^Kv1FZEJ(CP1TW(VR@6|x~({L-!gACsiw(0h!CnB{dOe+@dCfAd^)BHS8Zxa--Y zx%}`fG=PHli!fC&%AxX`Xl>#~GA(WJBaE&nF`#{pO07O$e_w0JlY3}-mc2vy<0fz= zfceH2S0PF(h* z>T{%IpnBCz>I-omf7wNZ$Mc6zwIZ8$J*Ly^x$nB!I!)LVybpVHU8#^WCRMEQv&$5~ z)r5OQcJ^gL#it3XhtNsrxREsSsEm4DFYEmJ%pzU(xv>~W|DAfYyFI7$O3Tn=hhz&j zk{JN!rzgFVJgJl~(-Br3UfRbUIxHhh{EO0WfJy;>&6+@xfA%^=Yg4Bn?rshhbMw2V z;6Yb0O?zvK*+{T%E58>tZOSF4y>TG$)>PXUmp zv2JavG$B#m-{F($J4))GaQ6bfyS-z5t>z;>Sm|`xf9`5L>dFx1GieeHAjiiE2R^?lA3h5+Nf`DC&?jnh~_phEc-!ynhp{R4Sv((B-boooj+ zvPd&$e>uZ8&Srv!DiXX!W$s?jMbPv1#454vUoKtg4t!uEX)~ETysW05=P2sjFb2d_ z=6-rCdU4Jn!_EZr@q3{5yeEiO1Y{DC+QB7ch$*j-u@-zICEVZF-pC;JE#I$w5Pb=i zoMI?kF{Czo&B#}PuI`NB?s68yz%Lf2ifOHve}J0SXoa?xp1G&pv{vzB6YJBI$GVrE z-W6ffMWWGtvzkOo>m78R8R$8hIH0-5oySb>I)XtPpX2CoH_~$ebl%V9L~m4rR$|=x zqL+CjYG8r)UI1D$N_KV#@SWG=s}=z^`;#{1)RyU&n#B0NodbHFBqwco+E`@eFFM$+ zf5Ptewa+td3ePZ*0e|!u+Qtq zL&{c7{VSnL(?*UrMhTga!!MBw9XsOyz_oNSr~?o4V_z z=!^*Y9JLtZo;4YBq0jkAzy-_A4|y&WHy2kc&;bZDAk@1+V$?q>WZnd}>+;-MW|CQ5GC5)7yc*iw;BS;&nyOVs8dSMpCPg4uf$fU3OZj<3`t(N-`uPU)$Y? zhNc#;CLw82MbI!wKheuL-j7&GUzR!`dCp=7d zm${tE1mz79yXh=U*{*stklD<$ZjhGKJ+HITO{i`lP9$&%3Q^~mm(-dCD*@k@n*|ns zfqeR#ug=4@kY`Bw+LT${s4WXTJ3VK3{sO6i?uxVU`2KIl8X06{8$!J{b_PoCc{ zuWIbmex-g*{j3#YPuaU&M9OwGkrb;)CU{>g_xN5UmjZ?6aYeI(CI{_>1sFbLI2=5u zoy4T4F|oeZ&sy9X&QXl7wuPQd&H^+?#G;tnkvJG5VrXvN2wMv#o2jX6$Co6K6vIHZ zT|Da<8D7-h=mnv^Kw}7hVe(Z0(>GDz!pl(;d@v5oo0O04A$B?L8Fy0hEde@f zBLjWT!aU#Zgzu#2zkR&;5BpNDuinwx$rGEaPI&V2E`7PNI{Cv8VTI4n&XpZC`iYCT zsu1ZbmmZ!4FMsC~Z!KMOAGX!NO`~eizlxpW^LWsrDX->ikQ7&~-l9%az|Y~LE+d?D zWlNrcwp=4UhrSz%qu;Zdz+C*$Gy8C;Gr2KfRbMG#gGk%ybH=rbYq~-tQr-21V|8!C zIeDA_7;Yjl-@elew!17?tkY;p>hr=t%bx*?qO_5`Z)FvI<_G$!QM-l`n>^2^`Jo(f z8Ze7E8pePycrkc~Xsk_;w9juesu)XLC7MRp{#GE#O32h=rLIf>?xj0ZbVu-AdHV>K z6NtWhIu>6nFix3W0D4N}9O7DSc~&7&hO|%(dH=4LFrNh&e=i7q;ayJsB-Ar)l&B=K z5r|5oiD?G%p%`q1=7Ugb07|4}$aWnAur^&jc?@|SP?eIlFN@pLN3n|-VDrfxVaE>K zEPS!rq9d})NTQ5-9?jX%#3UcGWIH}FRBz=YXj6F)5%yjVYga@8=tKaqE6>$X zT_K_=Kps@OL_*EX-Y-^ox$#y@s`=Ws@yMq?V<%yJ!);#wOB5-}8rkYRbH!o2O@XVO z@5}=SC2%?i^isg!2eJ$XO_uZD0E#PS0*z2Q2HfhwfAK-XXz7p|1*bje4@!u>oQQzx zhpoa`80YmNzP?Nhi?pF)4YX4aWJpa-f_V9qQUsLA14klWWK`R0i<79SwgNne=&zY8f}4odKT$((n5~3-ZK0qz>70EFlO59`R7%7V&z) z@1IoAZ+$DlV#}0Q>ARGoMva&jNmAageBobre_(S&#lO?4X^8G{?);fo_s&2R^_=es zioV=Q@xZBsJG^2RQ2Q$7S;9IjNRES=KLwPnTPaw+1K1t~Fagt_0;hcUXXx_kQNS;d zkB{A>dNV?)?Y+}3SM!=fvePZwI^|z8z3jkyH5$_H0S*fvZ2HL5R@(+n?Ce{*CX@Ew3=etfmz^Mn;*nmB~x9a0_dXSZ6? zp5;%!!l_1{QEO0C^dzE|>*i~qFlw!U@2bqF&@!bWY$6Q+wA9amO)tb6(mIUfADNf z<*U=xj$1y}eW6!PyxLCGp~@=^7Cm?HFvP=&m&DyZ^5XX^3beo|<0mavxNPhQDlWJT zKZztKtM*yaq@z%vNA+VxCk4YBS(Q%hkp|06go4C9kFK65F$YTyaH@o+P&g{p%OY>7 zj9j#|RwC4~b(fo>R>U0;=g2S|e{Py$N=3!hRu|M)xfw90NEKQAaU~dS1`Ms$?9J;Mx@a@WL!7O~%BV(IW^(J6&m|d0kT2r5o^pzDx z0E&6xT(6EP)(G!H(`gW)=&v-Yx-w|t1Ai#cN!00C0|*DF%!^S-L$7-ff8#G0^-0{o z!27fVk4zbXpAha6?z<b1@;ZOoapSm%4l2H@7cYT=9$fA3>nSCOJ#o?M zGZir8E&~UXm&Bs1S1K`#YhCrlCCX(S4oFIHeO#F+{T(Vf&1e~$bxqw~#8IL@q_n2X zzQc0`QThg${rp|TDBvZg1*CYU0BT(G{ec#sJT&8;3qeo_L<{{Ue^G=oj{A?A`K^_} zBtmFiMDBuN!b>uu*143Ag9706PEf9^!@}#h(6k{e9$=N}n*zd77lEPGm=4GRl0E<{ z+sZfbMK|LNzsim89Qsxe$#|xE(WHo*r59mcCd1=FT>-9>m2T>4iTG> zcLSCrb#Zm;;Zn&~f5|0l4!8Y@XS5RKtiqw! z9@KqEeHd#Z8^K~!G6WG`TILB^n`h_~a$c>+=t-T`V&N}~QmLqP!8a!-KQ>hqLbt%! z<~1F@GLYZV(5r1VrYt~$`+L-7U35=`;GkGN)7RgbR6@6lf2qLG>#k3)%R^Z!l5oR1 z3FZZK8xYpJnDE}&71Lkke=NY*y$s2jP zfEpEunoTk1*R51Vo)oM1+aIPt5x8I&7_uYOTVl;gI7uC-+2db@th=_?K-g}r_Apk6 zP{&!F^4F=P$WyGOnU)02NY9eDcjmFJ*160_QUJftm$aq@7Jui61NvmnxWqpKWG@yg zH>URK8937`86&@Ggk{026(Unw%HL6>KX#35;<8(CpERQ8$?x=RGf}n`-p;GeeR#>96gl0(5Y2MOBAp z0ZAXOUvludV&hQQ;In4hLI{WZ(1zVCa&C;+Kk$t+D1TM%Ae^vzqjIR;zw$Y7+!RGm zruDDuMjTsElt{FMjOd2_g3(UtXL7b(H^2=A<(Wi^7h@}1Y1Jq4B7lo%uzHpv{f;4o zGn|(QF-D$aZ+E5T#j+zUkI+sFBJMSb8}bb@S|w0Kb*JDIn?lIE*0sdCI}1s+L4*~V za-!5*Z+}J3wHKR7xo&ABv;&Kg>0DS7d_^|$B+pIHwuPYb#UK~EpWe%nKnbJ*b6_x@ z6Eo!Cv2k>5y@3^G%ZB-qZwZfrPc5hh`mz4*!&QCCE6@7Ry6tT`eF5q9EsPY7l4>4p zlaXyHy%>9VZ;H>)<>S@fouM~4l-!UhUYk}pP=AS;J`X~NF+?TOr>P>5TOm{>Wd|Ba zBdQ)`#O@KI`nYv# z{x%8x8u+3WY#m{srq{ZtsODK{#;HE z(zeV?XwZS%3#?NKL^V*nj8`}@P3rw^t##DdKwQRk3}MKpUu$^Ii6vv)x_7OU-gbz3 zO~fk7u-bEMU`>qv>wfghE$Bb~^MC$-orV6puaNV{R|xC>_zDU1|60!Pua}VJKYyQ} zu!vvTyXarBa!l>a!lNR_5&vjvGw!dLIpZI>2)e!aJw_AgC$K{Lr3LsCdGiyVv{tlX zjJa6+I2ZXpjWXLWza>3xNwP$WCP#mbFaN9M_e%!WPLLZ<>VJ=dk$&WmlVK%0!h+`{ z6n^xQz3f*M$4^RF8c76!Yr|$vLVx~83h-AW(oc9VYs=7Oep3;K%Aes9zvlBJPe@ln zm(U2lXNd8Wp4ij=n>uYMCM7h&P5F~tiP81H?a}mO6Ja(AM`F{o#QhRz?tW@cqhHi4 zd}XNdBR*KbzuNm53NxGe`dTRSB)_iT`Ky=iSD0bZ+bfcn#xj(2@Ijy!{eOmqI5zwn zrb(>NNhju7ZX0K>W<_=8U;S(CxBlv{PU)4@{gjLj-P;EP2mpN$)A>$@Syg!sd}byv zjeyy>=ju=iMzX0>&yNK`+tp}A`c^(xsl>Y zdY+l1w0Czftrnc7G9+$b@P^&O`%&;4B)4tJ=Hk z916V*=lcu*00QvMJdn!mXmWy}Gg*3%?9=#keXD*(>i{4B!+(O*H(j}Xzg?*F|JfVn&=TN zyqA0onsQV&qH9M21AIZ+UX0vc7!gtM#!_1}%`tWG5LFd{l6xrV&n&?K=+%xfZJ{+= z+Q)Avh_&ud+JCCBefT8&kRmZdYgD9BEIqxW|=_lvCtpEmj z6o}T8tcPG8Xs7j=z=B9^^K8LEc6~|0hva#aOfUffqvSu5MSelVfgfp#N-N8{8#i3Y z<8KweAZw-2+rc4{=}93r6#gnbjJj#21&i~=@De5!n|~=kKPeV1W%?ptMy5nu&ljSs zs4wPQ7f3j!f5aL}OIusL#AX$}NS|bdNGs}5?_rdz zH)b^P=I%h>O*zDv@I5Gx_`)$4jSD*>@TLze*m_sbSl_pRn+6c4iVHy<-ug=!ld|2x zj{_Ia8-Jh(%yK_j6>pQ9{Ow;;v_qwmM*CaRWtAE2j5ROs4`UF^h+** zIDc-G-Fd+>?zPJWo&x&F0X+)?%%{E#ISHHA6tC#vbA~4Nd$dJl1(C^r& zDpEE~prKN(gtov6S#7PL%30j-T2nYVh+WOaK7HveZlO(YNEwxB18Q}?SbblEq&|lr z)2+geLSg9Fo=BS{e6XdeOy{|`4uFbSXMe_FbG1UcaU`FUMqtpez{AQJkVz!5{*}gW z&RNToP44T=b>IY0$komc;Q^8nQ4}NUt$>8;x=(BkkulY!0G3%$q`?T|Nn6h8!f`lh zpx)-=^dN216iuEykohxp?X#r|Z0F(rJ!3MJvTvKp1U)yFtQN6CP^x{8$&h*XI)5Wj zyr;z&NRvCgVuRi6CAQ>Lh{#oa?$WH{&ADc#oK|@EcU}VU6q1TDuJO|B zY9!5MNptL;sFH{p%?;1z;N}(!jNk&R>C)&0Ll?-FYu5crV!)eT|+>p_`GDR?%F)o>aQUT2nPoqvh%{At&zuG@tD_5($9vooby#(CgFnyv&%BO3wk zQ0S=>?|hSp#X=0gDa+14l5bEAkTUNb5Tl{w(Vw78q6JwvP3-WCykl_UD%vu49ft=O zXl0J(i(X~*W<<~eO#KgCF;LQ~d)_uXny{Yn&P`8&z$f>4mt+=eWs(#iD1T`p)cI(~ z6|ffp&Un!&Xi{-ww48F6L1-!W&8AQzX#PAn<5ORb?%`d!oz_gIwt-eDLW6>ucKqx~ zrTborONFL7IzjJBjTWuDi0#%iA&BJM&7F!yBjOrD7fUBfuoG=lc~y~P%HuG(j91xV zXnf1ntaSl_oJHhlg<}{0tbZbR%MB0MaWUR4+z4(!1p)&EMbc5#7nY;6pwe_Y^DmR6 z8`YdmqjN06+Fr4+&Xo?fHIW~x5Zc5?PgpflzBaxmXU4BsM5SB3!xyO)ZB~rp!M^Q1 z&|-$+V<>(2s$032D@sa}T7jLr2J+>zd_(a=8aAdMBQ-5wv7zC_QGdA$iCRygSgI8vb$L=$Jtfn(-JW?lXtLH4` zRxRb$$U_IUjKdjNaewjw8}Z%M@48(2Ar$dpVHwK@dy0j(nyR=2@O?IX&BidfxjOwaspblKfTitMbwyt!Z#F#lNet$78Y<9z@m3F{xy9Xh~ zmDTXa`h?QqJO;5+S`x&{s+$kl4`JTv8nx>fjS1}j*|Ryf(ICMo8?@}*OF|uToml~_ z1iH7fzwcMTd92`bovkvg)7$|aO()KJNcZOFI4TCKd~!(M-*^3JX;63~<90MmQSKy~2`YTY(a>$K)n0*& zj&ghVZ$*71`Xc*+C6R*z3-F#s(^%9g<#)+N1x=4^=JJNV+zY=6_?yP<~6-=N_z_KmNw#{m#+~ zYxtsjK@DD-J5po&mM?3~-^o3>rn1rdNP_OSqDGr+lr|W15JZge&K;PhCGuRqHK(N2ZD#KP{AhoOv1#rgqz?lR0WUXl&i-x|wd=orK zR9pQM5nVHcc@lqO;W5Gie+%&{aJUXSjMX{J21XlX7)@&AX@JtPw3xdXys!cg7=QC4 zbPSI=S%XOjIsoEwKTWI*VVNk!hxmK|py|re%K3K-^%MBaR|_&|pyzY+K~2)=boEn; z2!D7a$V%U_>V@lrn4=-zx^MOvjz$4_vFzmT(ndpINz^qT@SAa^GnKlHLlonUoRjT&M4Rjy1{iIgnXc9)0~p0CTEdA zA7&a-?X!w+>Ix>UqKe-zSDo=Y`gTE=Pvyf=P~1pg)5A_rdJl##!N#QrO)P49D;n$} zr+0wf$?ffPvJDrgoIYiwBMUiwXm4eN{jPrq$5VMIW@d8-!EYy!hB;(vv}_eU5te%P zU4mfeByU!u)LtAcfX7I>yXDfS5@=vh(6T!*e#oAfx8A+Gr(*)yX0~SD4%E{0dQxcH z){MN!+4|mI<$UsR^vrs~vuVeem&6&276u z(BMU3YBaGCxvtkK6T_*D)@2k zOYRFx4gs+*KJTH;w#-~?DmrbNV>L}%AnluWQ}&Tqp%brJ@yNh?CTQHy_34u#eRQoQI1m@ z;B)BPSs-O{uGt z^qZp7fhjEVRyp2)#rsgBKH@<^Rz5s`%X4GTr2!k;%y59(c6OwF7O&zJ*EP zrpRZ*!ma7I-&n!Qy?7x-t9*Z8@F)o7?{jKH+H#@XTqD?fMw%R@5fD$WiNoKk$2|s^_!dKpj%rUZT zZR|7ki934sp!1R|6#!JKp^L7A0lVPF@eY5@R4KWsMTYOfW)C72U*om?SS z(=)fR8;IN;21lrp!e)QYXZ0Gj#thbdFc=6qA?Qkd`OjPT4hYUpIfCG~0z10(go$xp zOlQYKG*HV^W2W?i5!Y~B$M%Pb<~a480_e1?NQiiYFY7(d{#_8TFkTlUS8%ByLarr z=WcKx^1-PJ9|$R6VZubt$dj9NDb*3^HB<~qtzeBOPW&1=Jz+u@7WuiXMhGMy8FPhK zUy-O#giMeC_q%^beoCTAc)C`_9;+dtSX$qyrBNYpFbs6d1U^HNhhLSui3_W)j7X91 zNxqhOxaH^VY4_C%?9m{MukjJ;9ew`<)iveYrLzb~S7s)YTaIp+2;! ze`0k%Q?YL;fJqfV6T*AYmqJ# zzL&t?vl_-Jjamo_aAbcYnNQPFm&ORw$0BPdPg8%#qD<2ry<7#YDQbk3{Exz7;n3kf z-(G&FYvgNI`;l$RZ7HPt5Bt!DOanUa)h@*Zyl~cu!R#N&vdf+2W*M=En$309x>kq1 zFYQZT52x??^Y3&=Ki_kJ^qTMBV_B9@bJwq&bN`E!P2d2+m*Col{O|8A>|Zwj$hQ6Z z_~C!q|ITPfn+eV4Ecv5(Hu|(c8y}jn+32DN`oVq#BUhS}Ipm^!de%VP)-{^*x#UL% z?zU;w4qBRxxwQgk8k=eB(pQ?Xx%FZ&n!;$|C+85;vkgs;^!^2@T5D*g=X;w@=$EHW zT0?2p=6=ci{fy_&_2v+O0Xbd|Q_2FDFLrW(oCI-)FtZ~g*`Se(BrtP^<{GOuCvJ;fkJ&dFN5js zZO5Xubt5l6o_YR+)pGL1RX0t=|MbVy^r(z76EppM*W(;9G~c{5E6Co=H|;1fAV`1J zLu<-Z1>WGt0tt*+XPc%8P}laTOE)2l%Hz;eXddPf0^GVI}=gy_MS{KtEE7> zX}OBOOcb>sC@N~fllk8##l1h5_xpdm&zX~x%uaTsbhWhWz80nJgmwXi*g3e#*GxpP z8&9cnP{xUOU))N?i)Jky(!4Zf6QuyxiYPpXX#G)m4BCC9ol)vmQoLs6^l z#0rMjbLhuhvDoZCii5`vT#&ugVEfO*yUAH_v1MyvBKoKMa(@SeMkemm{j_EhcPj3H}MDsi}3-kxHaZH`TI*xT1t z4!evo^|?&UD;Oi~=-kkDamas-G(}Xmc1uI-Ze}`Y#X1j=ihTX}U+cBKHlnPGhDawXtv1!mGe?kL$bBzGv6;zEEz=jmYVm2G8a zMyeoNV>{Ya^Y?Lb2$bes<>e3|Ep*PoD6#j3cOQvPd5?HCH)4)d@AiKT-`nu8Jjd!8 z9|hMRdxbr>V|TFIMt4YKh0ewPNP?5k&SE#;Az*Pf3r@(cxuzmdIP9HT`_{ZM?tkn{;hygtXbRv)!b4o=(AA@9;YRvT%V& zyw|nh7M1wus~dfMbDMwbJ-~*4FWk+#%<}1|7K+)n$poG_JvNG|P&&<*%lT+(?hoGW zg`?zNA;z`p78$`_!6u@Fsc10V*DQ77}D|@&fmM3RmOxe>rb00U)=n|qqzCVAs>2f(%PdK{X9eLFA zQ&;dYMl>jcV>Axd&uOwbC@wMzhnBBXVpnOK8>8FkM(PVatedZ}tk)~l#ZEn;X>-%( ztCx3Hw<$5!`Lj^p5jJ)tNaADu8f+)cs<72UB~GqM^Rj^{hk^z*~{JLGdz9xc3;@DT6NL*)@+%3R)c@gx9TdV&meHsqVw&bHRz&l78~n* z^NVTzChN;|cA8kHv8!vvZ1@B%&~Jz7+T~S!K2J8CyyD z8yJ7a;(l~588)w5?7vYRkLKnuTBQ5z>lqDqidfugdS7ekRowHuvGnD1w>`Ki9oorr z=z;OBY|>7&@N4t7&vYRYr{3(*gyHDLN^`ZJzxT0Q74@?XyF}SezoNb8CQBQ8qu@%8 zaj`zP{5;)%j~K}{TSuDrg=IS?cslk;91nxb^oQ#xG|hORd`M(+xbh`edVg! z+D1GGzHVcD@SWG0B&#Xic?n4ukB53U&!_ouz8IE%I@7^tUtSvSp>of@o^aRUnE%P7QRxjT^kZtS*O?5I3Wbx<|=eQ(y)J_ z+_sxIEw8$N)UR$=x9>f*&Nuh{-Y+-$Gk1$=_hvJrB#!1T8^dM|+``?yII(TH&0wnS zbAH_F`^UBnly-M_N27b|_?k5;*6+J-ZWgzT+m35@J6VNKoS)W?*A?ES7}zNvO;2$? z+WY3&IuBR%ds7a{>3CUQJgs>!$3K5fO!TU=YN5UD;c#i{t)`8p9LEM>X+lGXTC2S5aT z^y7H&wSVTT@~s-tR2e>ZmNGjJhS_pA-o)b>C_yOK_#C`?*zJfXPQdbmJ}!S4(QJqw zj!)}I3H^P{KPb;uj?Pd-yso1U8HP3Bh7n9!`0=8IbI~wM0JT z)%=>AgGVu0D8{@o^5uFxf2YMkVOC?jEwdip@1&Zg-uYsNeg|f@8*QU#;U}f>cpeAp zLU~nIG)&EdH$6V1T3c+a0)Kz8<)-NFfpnUs|0f`o+dtn}cMk`zflSVA3f-e6va)2TBq*N)HU*OZ2IfPZa>~D`ucEx4qkto8TW2e@^D=*CNI!pc&lj&A=r~r+hL)^-Ejp>ud!ICHg0ITLvo!ssUG!JCIHX7J z?1$LgZ0hv&vdL|GJ~q2FogLb1wf-6$?61q|Q7-NGn;s5y9+LcgzTRC?(#8aBo|gwHK5{U znl%nqvP=A43^adb{@S&Rqq{DG1jl%@w)OaSino__*NzqC{&ud)LvKyF z(hv13`{aL71N}-xJClnW`wgc%ps_qQ8VLZmC2taBZxXm&b!PHdpm=a-I@t9aPhSS} zxi)6bOCB%~0nkS@(M$%|Et)kg(xIoNWABu8dYdNho675v_s-s4$j06&9lNLW?dDmc z`z+AgbgiFUxmoDgZPQqX%s?VFX_1|+B0E_kH4%T$3<%R1oT9V!W@qainW$FlRa^QR zD_4lIivba~M@g_3(H=qlWisl35EkUoiZEef#!?5B4#CRa4`{w*s%5~|FInlVLvaS5 z2Zr+4V!;rwMc{_b!#K@-4M;*-fv{>!hQM9S%h+Zq!pzP1YC7fw74jc&S9YQMS{{=n1(zpJ> zk(|FNaru2M@pJ0&(@9)QSXbhC=;8E4d3vI!NcrGg(jii;f|`h@KW)vvzOT#1lxGI* z=%=4kHiA&Bv7iJA?|$?lw2?Y>yGX^N9Ql75jZEWgV6?H^T*2T+jmMh7l3KS=@EdB= z$^_Bc&cBU1qeY*+K?6r8<8Z z*0)=eYA6a!q?$gs{n}YT;a~>VT5iq11TdPGSrY%&2md{eP~jHVZ#<3kH(l#_6d{qy z`VY=NI39Y~E&4Wj6zOgHb|oH*a!0yyni|w3=V}CEVVw?w7`eK#Ak{5s0+!bauyTt8 zy`=96C14>(frT3yGa{O`Q$!2Q47`6sYP26620Eh{we(3^Hw?JCram)CFyA0PFa6o2ANlpp%?d=VQealX6l7IyfH)(X;vRKDmvQKC4nlt?MqZh~ z$=eC{;`S)*MDASo5;|d&8V0e@?m7xl=_1h>sEy7TXr%6igMBy9K{50$3qaH&^2-MX zg2x90<6$_$3dRhwa-G9JxhPxxZ=J%hX3l zWh1XkFV=57yuFj#^}0&($1i`Gk+jd`f!{y0mvR{Y!2IA#_nEve`@ytr_MU~7xT6nq z?GodX5<|H9@+2Nb#0(51UA55_1hp9E25z}SP<>hYWvct$(#}(YL$cM{iRWNKAxf|g4Kz8r;^z=%a~>0#`uNYEwn=hAbpQdph)(JddmTr7#l9&hx3 zP?qSwZNRDLui{5v`7G2d-s+cBSoh?6`e1ouG8-1=I%S zuc(L&1QAAp?5K#a8B~9v#e!5jku~OuOM+|~N@a~)Pv?}fd>&wS<@S&xXx%gT(Gpn6 zd_i1-_OB&g7>E;$yU4~q9|0!_1U12LbcBbyheMf`M>d8gFmhgiAP50v8l*%oR2_so znIIz?S=vZ|HV&#>-~?7 zUArIL8h_}lkLTE~T#uWl#;cAGNE7H0daAtvs@`R~dx{ePbv z<4f{#Z%ZSI-=fzOKfL~9Bi`ToB4j7p{q}vlu>X~9721E%-e%Z&y7abf19^6J8P!qC z&0V5uzF&I*YEs-iEKSU>n9mL5>za|L+Ok3u%I0@qnY0sB%l$9~!40A48iJ?QMjF9n zr@zaI_HIh-H(TIri3Dl9Q@SIhM6Z@wdbv1bASKPqOQ4Gk*ekJ7bS0c++vSx-U0nmC zrryzrzTJN*0={-Qbz!);7|hjyk!oE6$1QQG4~8AK7Zj;?!#{iwsV6#5ZodC^f9rwH{n4MM{&qk90c)0d_trS~ynKIX_&%|9U!LJxT{gUI+_uJA0u#M> zX{unZ-ZmwX8&?<2*;C*5a*a@E8{)) zah?@~MM0$HhAC76tfojP0cBkBj%f~u`@4TOKW^XRrsc{+NA=Ex`J;a+h%bYg@UYaI zl%oQm7_PcCUZ#um^37MK?z468 zmw)?tAHRpLT{s)qcVo;W6Y?};t?FPU<%Zap4r+9xXz(p>G%`V%!PBLl&(F3}JHUS_ zHA%5bG=T|^D$)woHuB+>w^4j-Mt(lp!iy6Xz+KmZ`%) zy=ttah^4ZJmjjVuk&VGxH#D_~BE-%S%Mt{{(5>z9ifW>Fszp}#>{n@VoD&ro#TFtV zTg~>tDZQCYB;Zp|_gSWk)C8^;Zupqp7$f7iYIz_yW&}gazk+16B>sQaxNM=oK=!2n zz|tH{SA!MOi5AF%#$fvnG3)8@+o$?v`w!6ebm{5fz1i?LeOzg4dLH~kmzvg_ng7ro zUG`T@?uD$67o3h>HBtLCYyx2AYT3?B3@JuHr%A`R23HY;m1%Y45#h2nFbfbL>e7~K z*aD9UB=w?SxbmpqrB{E{gP1XmMr7uTW=RXOHxPBP+7uef>txe7ZH9G!e#YA7fj9>DQ?s3)2NB-mg_qhGj z#~r~I44iXK3+sXXZY+q!G)7{(Q6FKLGmjv$(OK;}p}PTA9^H-Sk(?h0c|>-R(0IHV zo4J)9d}0vvOt%5SFGhXDv%T4>v+&G_#skc{M%V4h5XBpTdA&9ZCDd@O5fN!-(YhO^ zXb7R%Q}-QSh&g}o$t(6%?wvNrTgZHQkej~xT7aVByF74^WPqJ$=H~J!V|v#=^A5tPwtw(*ABW!Zj2-+8L$C=5PYHsBX0{pDsz^!J zO>LvecppYz7~PFMVTuOacLn!-Fec0-p5zrRs9lN!@CtwC0i#RSQr!|{4M7x;mPQ&& z*Ov5ePRKU`g@RAGrI6GZb}NI=H||(*^&-W)00=PdqP1P1oMMcXqbE{d6WLAb%fo8% z1hv{LwFf4ea*EY`xP%8p^ulc-j5==JCyMX75~E5{Me(BE5GpmwqL(}t?b7y`k}{< zE&uTNZ~N5${>AejT3n=GptIl4eYV0mAXf(;^ccC;KXXZL<+YU&R_e(@K!Gc_r9nI- z#8H12tfs)JgR`fDI7WFhJ)P7T2hmOs=#}`e!al?xByH04=+6T$tVTK~SdWwwVGBZh zVi=4#;72wv9%7$)D9~+>m-N?Plcz=aeBM=QXyEFiiGW`uI z2&FOK9AGs^5OFwbNPhTGzN*(ie+#vgJWNcZl44!GgvuYW; zQ%Cv{t;@GcT3OIwBqr`s&kr7(9r>TKcAGd0%BXwhU(c1Kt3r4P`wdplP;(-H~BUY!RO`nhx zLU^>+n6!a{5=^&x?lLWhn~T%?%7ofug3(-g8~M;O1ebU|4`z+l2uB`~Eal-r{>mTp zvDb)d*>{}vD>q=!!ZmwYDd|Hyv~m$KCJbs&G~@B7Be&otdgXG|_O}P&`-R{;|gMnB>g>( z^X8fHqtB$Bh95us!!v(F^2xoQjYz58pgIu7Fo?`Wmkqc1-nKyI7Kw?uzo(30ZJdrY zsk;Jqt9NdQgm}4}I&Sn;`yf0dYNDN*3*uyX*+(zTGhGMbZ~UgjAn>~7V^f>$y+YkO zT-7kB=4o@4G8=%O4MAiG*gV0H%h}**kdLxVXWTY0U<>OOW-fmY-Zdj+m|vWYg+xkF z@v_?#HiOrtbVWuirW(PNB2#e#%zJyS0uZg}^PBVD??&Hw_n%n9J-I)6TiWB%A9(+X zHGku@jS@YNl0R(pfAW>S|9iiv&x`)pjqmvL&)j`_`L-dw?M5O+*x!KNr85kTYaq7A zf-Hl$^wG&hm?(dh88k5oEOJIFB0S9~FHKsI6mtF!L{nLiU5IEXmb#D;BfLZf%IC3; zNrPI~GP)=x9r6^W8o;DZEd$)v77kMEB9bn->H%YcaZrGCAU z6I<$;kC=UjpuQrMhPuyNEC|+c8aYD}+E6<#VFx5w)aQTpTb5%%PX@{khJS@U%XHvA z3jC-ftTFWdy>2*IgS(e8LsOJ{E9Xn&UE2_< zFYIS$XYxGqvyL_HemOZa=}DD3H2{_#5TYx@qL2UqevVw!*1cYFB(D!6saNh&uS9t* z^$PmrihlAO*&jC9?f&6swpr(Xbn9O^zK?B|{%L=@c7Nqx{){vF__X*dSJ=6Kz5n~X zGsy6+Mz-FTK7x8oeG(Yp`l|%BhCc=)U@-wVF@}DnbC+Nd<(MfLz1;&6)O38V$LV~( z%#R`Mc!I=rXSSK9Us0fi^Xtr4MkdHl0fa{l0uLjtO*)& zYXuG00$808QOS$NKHAkr$aX9AR?Iu_DfWL#D}{@OPjkCuO@?Q}b@|)T+~#{{FaoeT zH?|uewfB1nFxRI4@^cbxFgY$V6qQ==vvU{!RW}9g`=Iqm`IG^)GlL5}1=wI!@=K5- zKm^qsEapC?MBn84m_;MM48eo1NW%h#Fv`GYZUng0Qx0dCUItTG+MvxfTfTRnHgJD{ z)j@ul@&wpG&A{T0GZ>^;Ww1dO0fV_MXdEvDl<=nD@b4+f`6{Qu8qgLc&R}w!`6w#? z^$uxr(=hI?;Vz($?dlrK#YI2`F1S7K_b~%#7^P=&RI`r&9oCikEL3Ady*gz$s=Ugd40^CN>0=p4MjpOQe|PzOOYEChc9&5rI6B*8D*JsQZG1}e>-Nft#~O6a2j-ZW62SmAP+A!KJm z$gi~;&`%BMyn*cI?7(s`^z+n8@AL!)X@UEG8-jR}Yy~79_npr?anW^{slb#N+lA3d znfm50$8r8C3mO)jl884z?h?;z2E|oi;KlX4V6c@nL<_;+B zu!;bkio(kqq-%dE#&#-U0}s<~BuLRZv0k*u@JSK`LQA640gnXfS|^HgCmMd&`AshC z$>>wBnY%B)7B%^qk$nw})o|WGSGCal_`=itLsEt=Juq-t&W7MS9T!Lt5$p zu_D_p3T)GnAqcK>Is=pjzsr(;B~6=8AJ|Gh_mds0m~wxR^EXom@IQG+K5HW7j(|aV zPgD8EL9co~OdXjNaZi%AYzDBp%N&FL;a?OrAnVIGN*oBNLb<<#rIaZx=w%2~1CI^F z5g-CY$mIfdk5^sN74*FE8MSBFvD>DQG|>x*ujuaVVC^^r=|S^o5N{erW4jv(Bj{Ra z`}(cp5b}SiZ<>ZheZ7gh7Ug~Y-f@WesaNI8{!e|^wb<|L?+$i<(|-m=N*+;yUGCfg zt^FpaQf{$mXom$czU2~acrf%++QG8{z3p{OFcMB6PSgRGo+VC~!a5@-*7=Fzsem=z z(LmogJKzprp>@v|z|v#AkWn0_BoSI6@uBo`&kBFUC2k%*Pl16C#KOfHw>t29Lz}zN zS+cL(aR^Y{JQ_@!h7m63N4u_t0_6Nk$06FMK581q`g*JAS`_s47snySr(WVW>+9{H zYccNYZ;nH(PrceutrL*Iqrvlr8dwx)LSo_#sK&b;yyC9E7gQmgrdxPgvf3>~- z>fnFh^BMN?C3bsTPD#e814}kqN|`LEBW1Y|%s`u-RwFp2A>Z}%(dZvMNS3IKaRVf2 zG;(WLg!Cv*UA^4VO2WDgIIyy3|6k!*L$%~HZ>{I`Z+Kn*ujV0j-oKmYryi2Wx0~Z@ z>S^f^1^nnngOTtG!B-fW|9<{h0Fn3_39o+>PyH<(jm8_Q3GR3Q-cB@tG)XHqa#d#s zlU|3Sw4zw${tQ6ME)=m<)K`-#nbg;*^rfU7rHb7xfgs&ylCD!}PxL#idmDJO(zh_EVz(L~+`vP#J1kP^V@SIi7jJ)r zL8s{XMmW`ts=^<;d6l+Vwm0dBocuEus9Xv-|0^ED(l-D2F@%j!NoSz5kzPvtBI4yn zUq-!6Yry&!xF;tneGQzR{4-IFZYgl-8Bzx1SU%&>CI1@IU;I}zkop9I=G4GO>ULz$ zK$rSj`W63b+|u7mKYkA1;HC8Q_(p%?#cxhDF8>;j)I}>Szr)`Oz4I{b?O}ALc=An8 zcP{y675o9?fGu@5#mJz_p&wfs@<*eojpWq^WhZtA&DsIBq7SVaKv9BKzQV4=8UC}5 zI-pxSX|sM{)2e}}3U<9+nqV0d7;?qeH*Y2XwVXSJ%c<7cOYHV~cGdkoZq|Rn^PBcO zb%3YF_Fv-gHxD9^dfzFxlFy9>?X^UQT~t*m)!L17CjxwScXkN<6OQg8{U31n-}NW$ zgfH!Jv=zGIrBVK1NzD05FP}{aNSFGRxJ{$g_kX~mNAOQr)-+mkFACtRkGx3S{14|N zdCj?lkUHM~ru|3$(r>!&^9Fy!i51^`vIB>8%}MGMr*PHZ&rh3%)zTmNqmTbJ8TM!Y zMpmSJbl5J=(hv8w)L^FuVl7R2ACj;U+4W^z`j7uwe$t*dH@$sXVOKRe!YN5l{!I2Q z^{Br2J-@7J7))50MgM?c%3YZrqRlqvqN65H_PbM>*=~arD5@Fd|}ZEEd2+f zNPb*74h0~oyk`A%)L;9KL(%vBpuhgy4?pmkypBHb{dN3-@2`^&e1E-m9GM3|ntXPG zyLTUBbj_pD%cc!Z@Bws{(29$`^+DG;FQn&qaN=0ng2JO^vi!VasNYa2&kKbg3bFe&;P(9*89G@Z@f^cpJ4EVm%RJHe{h0n z-}f)rQuh4|wvb~Ef5AGABQ27QI}3sGXNQ1le#}o)#&~gs?HSO`4m3+g>L0%217xns z&4a`t2-3->fnPRHGz?63r!t>+`&_#&0=eRf08;EX;S_)2;?@G>_{qIK_lY}MuJDti zpYlY0s|O@#Eih;l7xJZc`edMi;gcrdsL=KF2^QY@AO)ZN+oKww1kAvQV8d*=UJUn#HQWb- znULQX_J4mCn6%FmB9Nap;^I4h_$f0e!2YA20|l4;E(_#>D}I#`Km$JiU1kXPCp7$& z`QP;#vpPuA-(?O83^*`>USI+o80x?%VIxotCX>m_8Vna~23+QkUD;AA<@c7I#G1=M z$h@Wxr~=c4EkhSDRG@%D*>1~FCCjNm6<{InTc3Y2JZ0Da6VGiyV}CwZmCp?%yg^{c zKl#*DuBS2>Y#4fgS<3oczR%w;;IUh$pKbG^?IjjZJ`tbm|f|1E~xt!m&FT3DK`WSOr zUjlz=4j4S($OLrw*uJZnlqm%q~`uunK zDP#51zb0TW&=|}N9F87x{`-FZwZw}m;dXyLUNTs5xc>jaKmTG}cZtVT;?ejgyd(L> zq_7e2CGqJ1*s|T0Aw{;MfP!!Q^z_?HIvR4kA3oc0q{wlFl5U0q8#(^}IX>y-3UdkH z)BD}a=P!Q#&|@diS>kCg$1le#pJSAe3Hd8N^6;V8yVqy3o!OVme*6+&CX;+!$^L&M zfh-_B{a2D68;PI){9N+SetSO9QwiUF>yOt*e?Z&I8T-s9{+{1|H9iV*9;w93Ro15r z74o;A*WdWhzsH3@C*SQDEIHV+e&5Rt$nr~mH<}4_lK9z5Jz+BW{O9*Ln?L!{5oiS@ zg%Cdcx0hd%Yc)6mUJ^bPAO6hx{#}2`CvREa0X%={7d(ehAVcEm&(~E0)&1EC?!P!7 zX#P1O2YS0v0*=7qZ~@FE|K9-1v=2PwcpV|-mdO6zQXV`8BhWJW9q>escPU^Fnu37u zhr4qrTp|ZU*0U<-~XTgOYTVUe;l{j;lF=hor`~}`wx2? z9@hUi-c|*_Z?Acg{kOihd34p`KQ6MH>T^dN3Z6gyLsgXj*4(rI_~#i)qdyN(a(^A7 zbf5oqh!VP8{(6P>uP}9%riwRsBV59`-H4)Ib25TLzWS6&DaKJ+3N+#_53=2&sPue? zS|BE0n+YCtbMxYIwk(&MMMr;&>U=_lZa8R~@cni=>?)HKcQt9-YSwt}*LZh!z2R|P zch~!ExVXpL{Y<0zYpS*Xo{~mSw>L#Y|GKzKmFccMc87 zYj#s->9k^Yb-4%V`SpJqozvCVd!6mZw^y5XMLKGBXsGXObJo)2X$Hx$8!2625T#n5 zvi)klFeZMnsAF=$$8)q?(?Qna=~z4Ny~8wX?bmed`_qZFA1$vhpaCZZ5-n~V1eazBz3OkD%0w8V6A&WpOo1Gq^>7;+*xxX5X4zK1I4Gz}X z&3LhZ%$b#O2F&%2i)ppNemJw{S~eVK=3r*u%GvW03a3QV$zVLYz?+!+TXnD}fJcML z>Tp7*o6$wN*fwmp&^4x!Z?I-Vra0Tdxr+%|_cZx1MX4b9$G;5QjvsVY{Lw(dpTe?XVKEl~+ zdv;+AgvafO7mvvqz__hk4xEj4nuhCD8kkN|?RYup#>J+-IlfhOF5f-X5W^SUBn7%G zC*#swDf(mT86st!I^Ao=IHLrWqk%qbRzU)rZn`>DWLkd+t;u;tx9rr2TcI?;Zn$T` z`!>hjexjh~@I2mrq1~y}mhNeqybrN5ldC??7T+#OLtf|6*B2>og{jw@m9@Q~ibRcZ zfiGVyZ8nJx>QZ=h%t$rm4Ab+XbH!E0qBC&Sy@2KZ{8oy3Vn4TGN3FKKZWn5GT5e;q z-crD)08@XyZ%NTyBiGZB$B0$qb?{t1@B$sS53fw2IxYDj%!nJkl-n(~bm#C=r?A%E z`?0wT0%LcbL^t-eI6vI#nS!N1-p492_iNJlyjWu8P^iIWvM}7u6tG6xF`q-)yazbJ znbtl5*GrDlLLsk5P;2^%ZFj5PirD9%W@)zBl-qx=IU>umAJiHn2BO}uDhfa)}X zI9}})K`VyVyZ4QaE9yP&dwxh!6$~bkd!m2kU~Qqh69Zpj4DmLo@vsQ za21qBK`po$j)>r7=`gD>+nRN)v-F1i)BW_7=FhpFRI_wX#6;M8J^i{pVYAx$%h}yk zhmXVS_+Zl|7xZ#n@MUA71>ITc`EXn223Lk#JJ(sOY^M*CA)$ENW#QzPNqr19OvHae zIA0{U3Yfn;nRgJqZ8GiOHQ z+$pvjYj&78&1J$b_s#wh%;^FLXqo9OEf%|bH9w@#S#AD+FMiDK2mMQI+-qPhqt%45 zh!_jcpy1e>FMn&&d&@R6YY@SX4)%X)Ei7^hn06V$D%ah&Sm)z(-U)lM-VYbf+}XWv z)9o6)R|Hg;ZVoCcb5*}>k4t295TuvI&9O+k_Gef1L~qtyZ#`B|+t`Z3QB_6z&6moW z&+yfO!fU>SUkS67hmSPwp8*<`W^?UCXYe;xD|gpWN)i- z6lOWOc)}|IKfiD@^`;4LoJ|RH`?gqNK@@cu@pG8nT-@5`ItnXw8Bm`F*2Og(-);4Y zd)mds&z#s+=jF8Am-~Bc(;F{T*PpznYkC_k^DCN~;(5gu`1fdh*nH*qTs-$l;Y`T+ z@_No%9!BB*O^(yUyWO9h#K?b^?lR`aWm35N(QTdY&HNBrb?JtRuonOqR-VM$>4y6%Mg7Wc$_hoRK%o`O8(1yN)N<+UhQsAZ&|O zZjb0-yf?E!^-_v!71ZO|Xj;?DBRKDjXovGOm*U25Gi$ zJhK+&R#;x&ev&qjiYJtjEI{n@A!F(qWcJ4)!5DBHXaWH z7hm{pZg-=pnU7{B+Z}&T_2bnl%dwhh96hfJ!`&V)rZmM-RBV0k3i@TQ*Tu{|7OHzJrtVRemBFzV$IM*KqWxKaXW4DCXm6*J z&nAPdnOx4=WB)|X*ZApT9-eKg&h7`x)aH-E7wC(6wME`r4cq?ya4r$q-_QBDxpF7S zl-W#I)Q7h#b!}H$Ux(XyNbz*>(9G2df4Rr7JG}+ zJNMaqxhP+?MJ}&rfc8p%e)gm6mb){yjpVWBn=-z@@oRjjN@ua0j`xDQm71jr{xY@; zMyD^Mt`)n&VbdDd(fk;oYMtG^G+P(=oU_vOquYwdJNfae7&0e3sUv8wD%{KW1pJfg zu4z==?@Akhwp>@%?c_5z7_M_XA- z#W&hxs;ChM+rImMkK#Kpg_#oS#E+9I(+gm47v|M%R(3!GCOKd9M1H@}=46&n_P7yT zS6oTUhTs;3ih7-J1LEqPC9w*QMkI<2``sieDUx6CNn8Xd%F8F>It&;-3me@pM_5W8 z7VolR7YlOqE-HEyQUT}LAujLPxIZ>vJF#!F5Wlz0iU2l$hCsMyiyn=f#CUvVPRT}e zXu6Z6c`6sHClq?NkG5J~HfAF| z8^X*V^e?ayrs!@4cqKjg`ODrmEp`^z8f_osdL^nvW*;UYS+ z=VDRV_@UR{tKhZXz591AIg)7d^uUXy$^uhZie~7gEW>L>A*OcOm4%s(Ue8ktZLT$M z5VZL)3~(5^_ziU_-z;`ayt6TDP6a1-Ch8?W``0(3Xq~_TpxsHY%NpZeT^q&^pQJaa zy>2XjYpGck61$MR5?9mGj)$c7AlL%~Mh~J+4V{3l4ppC58<5^t*(ZUZ-ak~(UT5HK zesp@}j}c(;m<4b^rh{Gjz?$`4=LjCd9pBLvXG78-$eFCX1?3pDt~4olpOz&=#FXy) zxMlx7Z{Wl0wvPcf^1QZd7)Ruw3E&2Mps|sEmn1PPB&l<-j-Q3^wn_jzN<(eO8Gttl zh|`1OJQrqLj22DpLyy|I3bbiAOCxoXZ+c30qjfYlP^bC(NdvTy8JLLWgT@n{@a<7K z4;!FiZ&#D7;-N~ThNjj($Y1b`*of@@9%;ki0%o%!j&|YF+Uvq7UoV8Lfn<0f+DJuz z(Zg6^^rGoD>VKC0l;Y3^1RNuJ@jm{C|)34OkrwDxiu62+OX zh!{uI@DTIc``tiWwD3lNBuvm)%08VDOcLMbCL;sSNtXFEjoJiaN3TSOTl^b(_yg%( z4^#o-Vjh9>amgEbM+VFr_rYb1y{UzNYEx2qI4GM*)JyUSB*t!Yp^rRn^GE*(@mMaJ zX3KZqnEbgg_8DzI2B37rH(X8XUa!>fe1{+j4u*k~Uv5+-M?>qDM>Ww91suHeC!^((JLIOYMvj9t=Ux0GXBEn@qrSplC=hB6K0!Ftb| zKiOA~vPjN>6C;>#dDwy0!MhyPmBkWpp_kP{7(U)-FagB%cJKDaR~OoR!mSQc(Cm*; zT5KK$V?4`4J(4cVt6EJ?WcAy^Y#$=uv&zy$2J{|`nvYkQy~%HcGwX?epl7ZSw>N<8 zYH$0wN1q~OUp{f%4sMo6O&724bu7I z@zToB)5BtL*!L=wvpmy(1Tuk!L0zqAx%jesVdnu0Xm!cuF_gSnF7ujX-tlao=9Efp z;j`2}!=F-MYz<7oseBD_H+l^CK1YE2 zCjygkzd&F$oDOO4P#Bu3neM`#br7LVdFRRifD7Y7M2GCHx^)$QWLP(2C$N$IT#(Mc zPsWsX_`N>coJ_q0f40zc0?o4ysA}`oNK#Xu=KxCu$i=Dcy~_RKF$%pTE$}f9N6E0p z>hK1N6_N=;rI1Y4#{~9imi%Vl6f~ZL_ox6W8yyODgCFKNlYn^MI5qRKp@D(ru@a%I zbRn%k+FluIjWrT~-KY>3>2Wx{waKqavjI0E92-#r)~WS7N==k=FP2iz*ucogGRU}U zp#o`9Yhys3%K=qzfYW~R+HrR>{Ki#>IJU?vG$C@a!-T5vv805l`ic&6=4f@|dQ)_) z(f-wb?hnNq*ZaaWqXk>#B>P@Pg>A@FRmZKyD~ydtXX=1|wiEKrP|c;M?iB|RKCO$U zSV^4$^_KQ>u{!uo4w|1`!3vnsh6-ks`Rp;9p~%Z{XTQv+GL0&!y0Z7+=wNz?5rONC zf$I(w%={ll0R&IMn3Juz!Q`eNQaZlM!2Yz}duaoV@f4iE?N9RoQrB$(Sh?^ijDlqF zls_6U;Ao|PSg`5@3*m`s&VY;TiV{0ufr__K|5Lc48e2gy;PV7*qVsx0DedtnCYsa=nb@<}-AyL`9)VyVmqs{ca3 zA_2lEbKtaOxX_zYvrmEABCvk@!3otT)0I%H0`d~$z0q5RvXM!u!3}wAW?Xbv-8fO5 zKQJACxSuJc*mHu1lZ)R=aHsTyOm15%jb9Bqu1bUM9&0<{y)kB`d$|N*LuK-R3&T{3to}na#M!}e73$4>h;5-``gDC&Aq)$UN2=eU)Do@`c9p& z*;qtb<_pQ;WR{af$^P}j1eu(G$Wv@D*us9f?xu!3JX5^H*TGnrS<9z7?J?z(>nP-Z zPF6aqUJBbe`r5!_#(B4J7n+a5Ojyd;lrOT)-TmY`5Si1i_8@8aTo{&X9*)jTth2!h z3?J(SjY8%{qzxXud^d~b47nh>x{BdJNwrCqP?cAfSUiUsOuFQJdx|SiozXXyv9oe_ z-`a@!Bcu&Y6Pny@*1Hv50H@{s$%kow84*PL#LeFZ(266U>-0z6-A=$HsrKPxWAal^7c}jkg~fZ;AY?0T~oS62u!_FLdWs@)Y1yHhdklwmZpxhqHSg`E)qNVdg5Z+a;IcVdK!_><{`ej-&-@apHmqLntgrhO>)rkICyPf$aM!sb~ zvqdJ6K%t(TQ3RDUguKpx!7th~;|4&=a2G^i^wRaKKN5U4e`aluQ_mfq6u7D4iLxljhYf4U4NCN+Jol-!zs7P_JH`uWCQiwG@}Jm{haNp? zr1u$=^MCPQ=; zeZuy~sI&p@K02s7hhsyH^@0DYdX@fommg3?d@WcSUvp@GFpWE-L6|0pYp@63PE#w+ zOi3k*a%SGY;Z^s3=pC79$QMcxLNoL(1$}*re@FiGWgbV5yx>$6BEuIz59KK5Ng%$3 zZr1=aL8+2zY7%jWE~jKM(ifXX{Zw$ko|zza3?~s2QFUB*usDje15^Z}Yj2Vs|BW5yJyFr6}zh@x_PQJfOq* z{)OyVxV@k}N{Qg1g5V84u@hMh z>e84M3Q2g*q&!hffc}=f7=Z5X1T6yo&ePLFl9o?@ey{IeYsqizze#k!e!MwyDr1Sf>Ks0Z@=$L?T2ZY^W>z)9<0|)x}o*(*>hGI6B*VTaHcho2n1D`z;e2OEkHGwG4 zEMD?i!C4UTYv))`(jAau%!KS)03YZb!Ry8blhmgA!4+q%x>vpFFh zf~Q@7ppg>Bn(xp$LTJ*3F-Hy75Y>+*J*unynQFf=V*SuU%@zAE3<`(q zILG{<#)sk(b-5JpQMPXN2t1J^sPU^_F|{Ip;!)*u0EHDpT7JRv!;6kut4ouKc|fDf z%bRx9!_X$|yTlu=f!YGPVpsdrtEZEx&6%f6Bu-}>o=InRSAOk5L5_j|k-AH_t-k8M z;IM*W9~GDK(`Ano5-%0yS<~LeN&2%v9NHYuMkOlddiC0~=WO<;w%wv|>4x*jTljo` z@wR7<^gSwux!=Tmkf}>_Hx$enXY-)Ko-Y4;X8H>=KIna;&RT~}D`>RCZh;Ef#Buc) zR=&Dr1lQvJh%TR^mJ&TO;>eO;JpQovysKW{%dMF3^+dQA8SL#Z^Xu_XD{m~k#2W;I zp~De>0PcX(l#EUMEmp{|)~CiD?SqDYqG z`4p>{`Y4B`F8=d(ZiL4p`l1k3Mf1>-rW#fT1iL&bJ_*n0it!6 zLgePf0FV)q5pQ+kh#-!2evz`ch!v#H_~RDXO`s5k-2t z;T;gbGHTjC5SklB$5J<+-P|63Svj%|pN%BE(WaU+5{$>UvdqtN1N$pvEVEb~JHc29 z-x5uVRn!WtWU&}Menz?qgtNa~#ZS2Xf~!h)i1ExyG>?BnnEOn5|It1F&;J&pApeIE zq&Ob_(1Wlp8kO7JQ*EaI+3jQT#WOyv3@((Nu`hSq6zgTF@NIH_NkpEDsuxRt& zyw^XQPKrEu`nUOq^0Lz3h|~O-388)FG!p-~VgH~0`G1CjBxuTK$Ulmezf<@}nIivD zWSk>Ej?rZj3{`)_MgGZuDM0@A*8&nVXSwHy`Dat{=>JHX_#?@p;HZD&zsePc)r0^3 zzkd`gMLvp0>M)G-kM~9TJ2hdKqN!!nJ4emGe2H@W55p?Mu(MUHpN;-0SpL2q|Ku_B ze@(UWA0XOAfNAbDXIF;tu5dH~04OVySN=sS!KOkQNrDaiWge`q2lK@d8ye+s zEJ-E&*iUCWoEv9!c|XMK-80aHC0rbt6_mglNx}_HzmjTs$Y|H4_>~lGblm5P@^)&o zi#;m6jx59O2wEJVg_16i?=FaNri3y^)(T8BgeWCWw*X3i`h2)1Moo@yau>rFitQ4) zv+xTw5U<5R@IlVzwH(z5gUtmUY~&=Y9L-lv^yUzO4;q#A2F8^&e)yv!?gxPKC#eWq z_}vyNTd|EJfA|?0X6HmiBK}a=QiR{n;^gqsnYl4EZT)%v6tS2Zh(=q3D-hdV(Y^cy zU$s_@G)s$r_zTb9k#n=-lj|NF1a-pUuNN2C04&}i)RQ7+4Z}rc45L8!SwS30LaiL zYUcnHKR<$HWo}+=MFH7d11OlZRe0Jq%h&wA8s?^?Gk=g)j=#IBVK} zBJZHeQMpECE&rF4z~%#JBy{?uvrjSZZTP*E5Ju|`HoO_AffcZ~Uku#PAcL=p){P*K z+lKdcn%+RCZp(re8tpRN)W%D|N$yCEwB2`~yBT6Y%(?WggwPcC3zjJYw=HIegBuS9 z;(&_5-}GlOGz~HgS8145N$9La?};;iExllYxS04a5{wuKo?|j(X|!3t&9)t@oNje7nDDyU$h9+va_FW;pe;NMQ!lXY^jBgu zTuR1wUd3jTK=4&REbx2!!wL8i_fS+@F7n(?Y~YA1#8ZMx0zS(8*dSVI7?>QDy*eI? zTiLBwI>od>+qfT(yslXsG;xi8@3vU)aD}Mx96}1zlzb$k!|*Y)_mm%(M~4E-+ga)} z9W7NFBq>^rdx0}nc3ud_9wAXu%W#ICB))>#5k)D(WfIS5S$O4^1F^7<(<|T+$8o$; zxWCSwEQBzLH&LbTC)kAVS1*)3BCfA(RQ{-l0yW7GiLD1AMnZ%G!mcwGN{kP(v};_iV225f zAvzs1Q;fOx4#LCk=h5=!Mk0P5inQ|)V?zzDnEfJ8S@j|8YRa4!rC2s2v8`paGDSDn z?*y0V;Y#w1z)GIw7^;PTZ&4D(wbMOC?+#?o&=6>p8ZOEs5OK|y;LKhrRQ9TubNLIx zZazRR&Tck1kOnY@%5dPN(d`7xTd9TTJQGhHf2*oKWqC&>tjbDn-LEKhM9_krtF&MC zytYDiEhe>UNHepXj`?&1m+16aQDxZ;2p#O><~|nUu3o@ahtU3 zZePiHF1W>*s>3saQYpV#TJB{<>dR6cYG`SVXgc-8Ky;ZSVi}Rfl5(<3?A-;*-Sa{! z`_4Q+@~@=$-VP8q%%R+)Ro>CG?bri1zK$~wd{&jTmt~10K}fmf{jx$hECrw@a=F)_0et7B1N%mVrb%DYkVD<9pf+oXQiSsh^@+ z7pmmw6*IKU6L|URYGMm1rQCYSrEI!`tpYqbr&@8!{KGnbV9M8l@L_z?;52-pHW`Un zYPK0l<@dTm9F^|xyHuYCxJB-B*>Jv_>;pH4?4;^LSjUfvMkd8rs-5AwYCt{yfL~HD z;&4Eb!)`x?Wck^<{*Ldiki1q}3|44w`Q;1zfV^392)n%81(=mDje#iGuBsUvPZ1ED6vWOqJQ<2DJ}ttBBE7`eg?|>zy;Vw04FGyV#0s4H+-x)yd(^DcY+lrUhSo1LSrrM+K zousLMjkLzXBOqS?O?LgZf+z3~s>pk$3~q_^IntP*WGo*}YFFHVz&C@#&e>v;KAp0?3Vs{A|l^Y4X<{8^N=wm`)$7@N4+m7?!*n|myuppUBh5| z>aB9Hm&k@up+CsZjFHcn8AnL~biJwK-Zuv8XnDSYrau}|3+j=s&k>oGbFq}$K~%oD zL8s=w)uD~l+0uJV8%cgS+x7Um6yRK?t_&du*lf=sCXvRJ)Xj)ssR_l>=2 zCRI1$Ns{)~M6>>Jj$v6lXWszmT_zQO(t`0KX4kXS5kbg^xFbqr2GY}anh-3u8Sv~d z=POyxPFC?uSVBr$zbBxUK)57hH1r*LN?a5Zt<36=WY z)ZL7Iaz)?{Hv$^F%QR%@SL8e0-JRs@HahU z?$|p;5nNv#^sZO+X&ZiciS=g0e~`; z-Gj$31%}V?iOityP6e;T`L$?sA!@4T7E&>_4)oN$zfn(oPadZ2W$4_YofEAWwH7{f zGCc8@2HKnd_^r;{GIJwlq%iA$rq3s}pWY<1e#lu~xm0J#$dt_UfdCFl{oxCc2rCX! zM7DZ37sD+g(r+ZmG&Lv!sVq(xu}0hp4(DY`WS0yjor6*9lp41l?&@wo_q7!m`B{lb1cqY+g+N$ao^}n;JD|;+s?M39kGYniE_myTH3)xPdCo7KvxY?DFPPRO%cM&X(6fC}| zZMvKb^b!PzThq1Vm%6`c>a5_rShc00Q4_lkHQ>@7^Z>w?MY>HF zpS_P|P?)rLGrQf{%5n$HVMZ2y!N=!F-R@Nj;aS5lNteMN%HCTPU|E~2Q}s}kSK<&w zutMjAMCGY}AsS*LlEvS1R|O@;Qw~80Sn0y5a+klW4fr_2Y1nlOyBN8P5tnFc9Dngg z6aw4;y-jOuKKl&lj}b|r_=Kc~#1N(!+x}MY)r*?1`z1O7^IWpKni(FYbvoG6*)o9$ zy}+pYqvU-<^e@|FOD0HC>9St$o{ulSS^HZtcMjTrQ?gO6UmlG)g&(if?=F8Q<@9Yj zJf`ouag}DyN#-aF%1|>K`s1S?;=6YzYqrScSKp%4yX)ZQX62x#_y$j{aw(QXFSv;LsL!#kuyf<$CLVs1sXe*L1=DZ3YG z5X1~;u!B=2IWm?crW+RVpP=}n`_)1bz>%+v_Zo9y_o2_Zz_upomQjDts?-7LX$xt8 z8j3mNq?$WulfO_K%0LCjFTiQQaKDw$=8~5nkPeW)F5zerFb_DG=1*oH#*C( z00*PtHv$JZt@-0HKqQ*)?6O5%n~YX9b95U#{pqxs$uV&OV2o@1Hc3u&TmEUgAeaF? zys~}-T&||QrbO1~ke}(z8HZZyo{JuTJ@3mpnnuJs*xZsKWU`IMZ6tmW+7sy12D4a) z?vY731P@u+lL9S#GYn-Ts3{HLpoZINX^G7XVfw-f?XS^H*nFP5qRTp$+|KMcoF|dl z#hOXGI$91@-s!rLbWI6cc7L3ABjA5b&apyHwodaf?aLfo!}1d27biuxipD z<+Y|L7YU@YqUM7vSvYTMV#sJV>0fZXRLlwTr)yJgt&kn=^#(w0|6W1F_$@;Jmq=@f zNp==!)QUL@K6LDrl?nm6!=j1_hQhokuNVIv zX{{qRhMWJtk=EEnKddb9|3s;O{Le^hxzG4yo|su+MWY1K!`$DJ@a$Ci5Pch&gEmX? zH=T*{%aS!r>w^SH8iSnt<_+c^JE8YT#k*Hp3k_Z3!E_4cka2At?1*Q1jSJHK>1r1j z0nyTgQlktav?a{`E$E$MV;SJ#p94SUgOpY%hCe(w6H#yPLL{4i#+M*}>@Sg+2n<0* zFrt^Lfxf=!gqwwK6_|?A(vm$PrZSg4k@W25JGMz9w#i04b10sH47-(5>XvxrH9Ur8 z=R`6w-OwML0}Of5$I;-MTjQBM5pyi31n2QS4bnj7HktalUoQ=3Q)Qh*Vr0-_=zp>W z{_T%#{^E8OHerAB(~NU}%rZjzF`TIQv(R><%z_}@44tLWHTZy(4H6i;&JM#6prP+^ zpKW3uSI1%SEwY|r6^dHMj77hhNu@zFq(7vCpmj{X8uC-C5}@DBzGWy4tN{g(M*pcQ z38Y78D8_s969Wla%NP+r-T{-NgaTC%8krltVxaA-w{_2yLxN|2vO)z2KoOYf@vn)7 zRAZDN8gZ!QM2O&*SyTLb#tdBb;KfSY7_y0CgFSI2rCo8pQWa}AA9OpTzU2x4=EjxC z8kat)4qwG<9rDFlE{P{H_jGf&Q_i@dcqB4aZ=4$A_`krnYvB%t}@R4N9Gf%~}&!49s%Fl6F75_n@pIYmA zTSZ#$dsS+S-7eJ1&3R9l4r zoeWsr<6>ZUDh+2YZlfe6D?$R3J8F+QJd5qxs;HdpsYV@tZ~F6*OR~|>>KK90FNt^l zMs?*%nnkc%$fOL(2kNWw%7gZAXn!!v^1eQLNTijLxgqxpeMmD z*vQp)BOWE>oEd6>c@}I;5<&`fm$l&at~1^_m9vZ!7Npm++T8FMYu>u^C2IL7>jMW< zBD%8=qfbA7XA>vF)xt?TY#0%)RkKnzkFRm%QtnBt@`CUb)~gv&clN#ai7ywofXa2O z5SAkEt-+70T`U#R)!3KK<0N2V zGIfyJIZRbZ4b1x-YJ5@2`}-|j06}&mqfa8k&d`%ZkCD_jPdsbM?gajxf#uxy)2UFM zb$kriY4YMb!W?TQZn;g~pDI(Q+$!`i)!Z|GaVn+Tv;c^$C459wzbU0lg(6d@)T|+t zu+-vB`^Os4kV)9$Mc@4y*jh_|uaK+VKOrN$Z?drU-D}M6ZLeoidJ_uqm~Jv;5UN$`nBh~X2#7^jeT%VKGv3@&0#XN zY!D7uL*H5%uT#E{cOi=kf|ZYREBh1sBR2lfO!B8B7zsN9nV(868GQlxtps=d6eG7) z{P4s~gRh+wWQt$%x;1|A7YK7#lN;=RJ4twSK~Nq>TSBR+5I9$tNZKmQG}v!|u6yPf zBj)+8IuEUKdVdSzUsv-UNulSRPStO9JEHxBcp}IiS9{bD`HqB7hWEs^B4VQ%=5fT+ zS2xxe_Rapcl&CbM(8Z?0+atD-@0@e|K8IFan1GpihocE4PWKv!R}~?cJmAxRb5Rm@ zxrl%X!l?nSXv=4#($#|`CNe=uqPN*ntU}Ph~_=zzm zfoIeO2mPhJYmU+|V z7Ttj7BxudjF!?Q_TZ@bF&d*mW>bM%;S6Zofwnj(1pF>JNLFWx?mo#T*RzEMh-pPJh z!4Q1eHgzVJNA~leC*KSN*&f~HRiul*&6QJnos0*ajSRQ_lWY4}H{Z^jI#5d*gy_Q- zDP4I~N7$af^al%lJz0}~$?u1wyFD(m*IW!V|7kF6$>PKF2+^=)ota&(pNRbfd=vs1 zkpS2Bdv?L6EheoluFFH~y~I+QHk$otpv51GNS}vt8UUD?x(+hgyT<2J!$_=A5Gc>q z{(YDf5DROSr7xnSNsgK$c~BR zx2&$jj4uga(KW2)vWRdSHS@@dbT?Ar-dwSaiO+VSBE6A27Ls8#IiLKBkQkLcNdrEq zoBOBFV)S|n)^wvZ!;__+YL>d z(`Kq0n&LMy;?t$YvnOWKNbwAV4&BF|Jh}zd>!)Y^cw!1jPA|VrLbqkjmLvBH> zE9TQK_qBq5xt}FtdN;dJcv;GRd-aC76M32d8;*q^QGZ<8?*uh&DQmS)#Mh8fVkcS= zsYqR4&5HE+nZ$Q@6!v{Ks@={<<}+84`C9i0mZ1Hq-Z$ZLl1&5W0xY2LUC4S349>hE z;}^g=Bbj7YO|@AIjrTK#4>K1&B}ywKawed{+LcIuU6{^h`F*F<18CYipg+DX!)5{D z%k}%mt_~Yvhq#o~0(zh9W9F?%s#_a3<0luUZL|_W%DwZ=U42ogk=G`*UCH0|k{g1^ zB<>$5c3*jkmA7FXs3Wwux(_>paaL07ItLX~_|YH_QCI%ZwGnq-#D!cMYht+eVK&9@ zgF?`M_8ARi$PC29Wus94M4ZVWcr*6@W&!^X6!19yiv;`+q|N%vMq_Ta@nPdKegU1O zf27>`580MT|B`)wBp<^t`M)Hb+n1Bma6`e=4#Q~YKZVntPVAIN$_b!5!?5=sR%r3k ze~^6t*0%S*nxJ;d%aI1 z2XyB=b}>IKIQ(W9>H611l}mJ=go7UA3d69=KkXgqUsCUn%=@DcTLWFGquiT9^B`kOySEB-3&KN6C8nZNK-<1f;G zjxiqL6TIRRJmVck=QSE71grgJa^dgz3ZyXJxb@^OOfg^>

_pZVT>z$>9H6Mvnd? zfd5ZXUsHr4`zMI|SCIH$veEyq%=im>_5M%kcrvJ60skGtH%e%H1!_E4%{w#Cbia^u zoWWO*Q*auYX%xkH3Xi!Vk#{grX!eeOfBu3{??rk7PvjJfWquahC~SinrgGDT`5Fk* z%>vsTT<8=L?+6CB3!BXUN%L+_kgg2#6pp=TD-y*MAZ{#`_gvDXcj>z>t4mZ={O_BcgaF}aI0`pif*KAg zDr&&4{_eY?W4vR`HRoEpckQYkHv`B+NSL2@ z_6m_OOo>t*0-i$G(G7Pa9HC4dgWf z45CU-9oPwA_CV)KVDPMhyipM&ROQ6n%p{^GWCJ)cwP7xR3uiv)(uPZ?bW zC>(A8vSZ-E))8RucpxYDCBQf%0m%9KCtyeN0d<_YwSwHzk=!tUtsUgl@DJc&s+j_u z@EhRp<$QytPFxh30{k14TfXh3y>`+buI`wGJj`pkN<&Hw3~7dVdC;k&0;Yk0N&*nj zwyC+trZ1^fsY50o_c}@p8o?=NI-pbsP256`L*q4{lj>w|^6BFh;nU7?v?=QW~oZ5 z_Ht*IJvG7HUB`HeY7$`xUgu__rDfC=OuJf`4z{}UAjcPfmCCtEEL_-f}=0+U*Y#__znF1_ATi~W8WZ`&L4p3-?sG)(*6Y&I(1(%a*+eofH4dr8r7n1 z(oOuz*dWw@s%zL#KwE#S6NsR*SAn_Q3lLG>+K%ry(59+Cv>P8ex)v!6g6?3lPJ=$F zRNi0+qhmBE2(e5DCjsX`9b;eu$Q2^kZY`<^I5DAvEIdjlK#2}P<72NmND54U#D^w+gRJ)S*F4j|f>l?}8>IFV z6C44U{_)|jbOOIYh5y*%7YM5W)BOd6{?isoKK{>qG#eLsl3+fa!04K<6I-@ahY2zAmO~PNpE>54UbNkTXVj zV*^GL+stx@$_AoJv70eNiyVRm9%A0e)==2OZJU(UkafA~N47T8L|WdWDpGN>dceMG zG?2P>NvMYC2@Z0jQi=>nL!)Ha1STRwaZ_2d2fQ(`uZ*LBc&e|#BBoHs*OdyCEufu$ zs+-=f)~Tscu9_eUQQv8R$)<){36$W5&L{w!LS4Hvmlss0e6~3&8^ACW0IaC&EJQKH zq!S+*7Wpq??At%MKoel}L-f|wFP)wJ0`OzCh6xXaRrw~x0p;A6#=Gakx=tSls7XaI?K7cUjofP-1(n`8FnG4jQNWhCH zh%pWIQiC~7v)6qOYIkjBg=P1|Sih9D))za2Dtn8SnvfHq%t5`Rsvshy7VwrB0!j_U zr3`YC+G=GaVV_%6j3=@nWNXxS81cdk29`pAb;zYkuSnqRr&A0K*p>#F*rzncG7|Kh%i`uBZg_-bROq3QDpqJ+49 z8me_P7y%#%(uXx;OWa43uy@uvln)vpC=8{eQ7Z*t@+HQ=@^QP_AgTcx#YQPbkg~|S zM(9e334w!gskn|ACx?or>sKUy`Mgo>Ds6~LBAq%&rNlxoReTs-6Oc-m6&oXpb#2l( zuoTY)6~Vis$Y0)Z`OZb2b`;eDv!40Hpy}UY^%vl{i_-7dC-T6zP6FClpai-KyPE@Lh*U9umKLGfxtTz6 zA*4BmA8s zy;K^^8UrRDJbl-;#P>jn_lQQ&Al~+HGJVJ!4^SRxAnE~gCx|v{l`0TT8?M2~G4ZZx zd!ms6_!?IT3QHIG(D90YW)3(LzC2+AEFeM-Lm}TRbh03g}9I#7Nc0PYe_Uc~l^r zy4GoB6cN4Jyfc9SGGtPs^&~}%IF6ROfKyH5iVA2$b=G*s;BTVsDy6|BK@`8#5<7a& z94vlpNI@}HWL!{N6(6;SH#Y{KylMIsr#HWJEPU(HpZHJSI{2|&=XVbI%{u|SJ`w)% zi>Dvm{Q_M#hxBBB4u7%pKfd7`jQ!7=^p(PMDRA$pMr!J*eWZa#4)UO6nFwwcbk=}Jy&t9$^y$G260T8uh z2I!N=J@YdZtrq1rz`9CJKwHo1WfSzoL$sMO*Y;I^5&IZ{fGGtmO)E`$d0$2l^rP!# z4(__BkEpltf;pzBkN5!S9dE!VUn5_6*`Hd`S3dil5B$>o?={ue`bp##E=qs$y&u2+ zz8@TSsd&4uGc@dn&3!GUP(>?J^<4}_xsx}$=p;KJP6xkcQ*TN)T6ADC z72;b^Qri%sD^1IbdGLN4My#up0?ov_3QI!Y=I?b?FV=&F^sWKbUpY_ty|(%l+gMS3 z`GM&VKZ+CiURQnm!keGCbVi_#PJi+*p6VulZ7e`B(r)kuO8I71-5#8amC1^dq|<-rhA@*lMCwfKnAO_t!ez$Pq{(1S#?pW&nzbH#9xu zbCvp1?5eT~naCk|0BDB2Au1h&Wi(pOiXKp159lb^?#}1{7e1pD`x%p<6pY>*E;6+npfoce`-O@sx$3g zgi(|tB%RiS#5FEQfbxRXv?IE;!nlS)%|*;1sEQb5)Im&+kf@DNZfo$Ex<~r8?r&9}T2}$G z0&mU}@xSY*$V)$K_FuaB727^_$Mkpp|JRzZsD;+i2-L;!w)xL`>RkXzK()W0)ac)H ze>hw^lYi1-yg3HW(Tw6LJKEH7*A>2LwSz#9bIZr6QIZ1&oY_9v`>~W8w8Lhp_{3K- z>{ANL?xb-V0!hnMKq@y&H-M3S3RCS*4mV?47(r8|7z5hf!LB2aVGJ3WCB2B7P2o{# z9jJ1pQ)vT1lu;dT2v?{&FpRiV27aLff7-l^77r)mS@7!S#uC|aywh^QOVANBav`WfGGyQHB1kN0Ae?I8_ z<;FJYSPS^g=~GSZTMh6O;#YiRLE(bZyaur75hDos{J5723_5UksFYyd?I7R?F-fcg z94s}y8nm_Ux0O-JW0lHF7dCznliXCL&f<=ZXp|uC*ccX>j?F;#l>kaWA|*N6Gz~Fh zEg9wzvNxz*HC}R+Qri`vI`HO%f8Tl9pM31I4voPNzrJ`Nqcjhw6XPsVe=o|M90Lp5e$@tko!2 z2WO@7wl-l=1-3KRn6!hSeQ2P;kf#Kz&PrnxBhc>E9%vh%23pxX0nhra6wNV^eT3*4 z_0HI4iixE}!F`S#4#ajeeJGLu)r5AXydpW|ObG1)hG#e`&YDC>bef!4?U~m5@ebwSV zx>Gy9;`7_zNx#;N#_zqdFP-_=>mX|A4cA}YEuk#}6z6#Q5R2?Pe}N=XId$wh#o9%4 zGEGV#!wz80+AEa{rV0h;hLWlb$Ia@DgdN4B7X_VTR2|Y`G$2Z_uTt9}-XqaUiWtct zlW(_rX2*kMK@n%+dE8TKi8@>SuCsCUu3Nt7Pwo3vjKC-FeUJMqC;syP#kc-^aiKzWG(RWO~6upd_Rwr zzJnT90zF>MH+8v>)z6#M?=Qg&g|Nqqut$%xxBvdN=l9{$+gGQt_Fcyb>6Z?Py_r8Y zYkciDeAjyh?e}_7)QJNb{mP9#YZLa@{fBq|!IwNL2l|M#m}4ic+db}63yQir!_dx% z1IlLY^-`wye-0&4W$Kh!kV5c%i3_xta%#$hh^MG@4O5j_tA0d&ymHkfV)FnU^*_%@DY5-`A!~hoz`0p9e_Op;B|6tt_a%$ktFY(8c!tp+ z>>0o9`3Ar8@sB*-HtlO5jh(vqm2Zn!@TdO%6EFVa1JmIx$M5@o;^`UkEW{;!`5GN! zMvz6iCwgJ3QaMyXY7p3T2oC;M8mJ|Tn za5v36-!R3i3{T~czFHIreSQ70)xr77A&S@6`NxN`-*w!ty89p97W^O|9q#GtYp?N7 zeb^j|zt@ufQYZP7Prmou94>D!X_q=ku}x6Ue~BKYYS~<q%#kdUx6) zWv`c!rNX^i3WP$&HY%s(wpYZyl&$qjjQa>7UK~}lG9bmasGVyR2YNC&8vrAXr79Nt zf9o{TA$6cOyHo;T7XhcUe5Zt*n1FHF!s1!VJ==a(VK1|fwNHx}S1qX~CoFluqe!w@D|lXy0P&%)U7LyWYJtk`it1+#p9 zEl@ApcgmS+7tWo3eW{BA@Jz>bGf~#-e{}>@zHa~VM7I%knHGTkG&@Vpx%by`2c5ebP7HDt8U}nAF`>BCtft!$vA;4cD_*alf zAwo}~z34`=BgeRxM4?fw7# zUR5L-ov$yKeanG}{w!6zsr9}>e<@<)@pvqc$Le?{|K$s89RQtxnU3R9O9H@xB?IbF zvpbN5$+6-td~i!UvaCW@99JAvarvO)S+GtAx6%h(b^vA_fGUxK;SI(+omg*37ltfgrdXu_p2TV0K80!G9f5kDKYE1`x zy0-}e%v4isagsy@$pNC0H>hOZx`a63K*$%K_;z!4mA>_rtlJ z=8OW}<0^LR2Z%s=JZ71$WgQS7l5FY@K$_@rmIScrV5Snu%Q>|u>m#{tsyor z=me%6C~A5Ggh(VOf#^eHb`Es=i>F}dSY~4*Z3ycC6STS?v%I6&rBYnK|1SYdaa?yG zx{?5KgdJ8@$P(k}LZF#Bz~)g)Qw`*4Y-R~^GZHtf!!S&20Ij|&I}EcwvJ0jh4g*J# zbL)hPVwcfs$`Kkke+pbg2E+k!QJMpkp0*q$9tR2HaA@xxXgczlr$?ur3<0c!T?p`X zs>3Y10hmr9{+g#{+r(5ERQ-rtNiK2@_@!T6_Ybz<*a$E`KpX^};IrBRdQ1`2s>&^i zPFaUOx&YvVI~q(o1adLABoK0iX%?S27@|6j{O>Wi|GCeAf19Eo@agBr7JzC7Zt3HI zg9O|=AiwRONY+cG8~`b%3BoxUw9KmTy#q?uQG)y&Hfcof9R!_tK0%19l54$nMjQ_= zN^(G{bbad~5t-OWe|#Guj&<|Y0j(xB!$9{eGSxYbTt>)+5gYhKBDI}^z{d9|u`$lD zpP4prv%r<@e;kB1zWWKjhu>}bpo#YIt39%93~=MdiH!r?2)SyggZP%@08n8gk=3gL zjPNEt85yGv;N-0jLrfDJL!m2zf3&??gtT><18P;c?toVQIv4s2=oIl!ALRJCF7$t; z`b#nYo$y0{*PQ`zwGnnlfp{}T$h5V7HERH+^`^BoX}5 zg6`hIqU$J*tzpV_H1wd1Zgvhb+s5jrz3n z=t%HpqC*A+ZaQr&E46Wi(1TY)my@ID3wIp_e@x7eR6{TJZS5S?wv7=%Vt%yisPk?& zgQ17$-F~udtbN)Af75rng@zsu@AivrWAbSi{B7RtcE#D+1~5faZV#NLHb_$h)sKw5 zckC6m*CF{TVyS}$ZVwM`3;(5J;lGTxUH%6>BU3DsLBw*#`~k2I%++)$Y*P4XND~`^ ze^fiZWtAeg^kDavk5m29K`QHhWO3kcxf5J_NOA1Pj3<6qy=eyyI-uPCztX#iGdKUt z+kNZ$7kw1}n|XK~1dsnP&rdsmTj?w3!t2VByO2UHXiGZ>QyT$=;Nm~e-v)qAvG6Z5 z8_AnBnsN_uwibN;k8uKVfL$RgHGQ#rf2TIWhXSpNm|a1m6CYo_37>Sbs-~#@!7Yha zR>G%s0N3i|eP_>Q;u9TTI^JsstM-hfSq`?0FznjoN5wRm3l_j>~H z@CVl*Q;}PwHN9a6V2dNkxe*(R97DuaKd~K5K*aQ=gH0A~lgNC>P1K0k>>5NWf9f=F z_0$Hs@T>o*VHHO2LxHo@EIC}P$(Evym_gV_AUrG|(#K{|En zZ{7O$cJ4gL0N9$evWWY34#s~S_eCXW{v~@Taut@{{)vaqj}Btc2_NsjfAxIJHOWmQ zcn^}8v9;Dbx)4B3_&<#QPFLRP-|5JI>aQkfuD^M60~XEK z2JTheztU2DV*ZWRM+5hUhb89owwG4$=6^Py2ZxRG=v3k3q4veYkNthieX4U6x#(L@ zI&fF5!C`9z9^_U1JYU38e@2h=_N$NoEgKeo=il1O+dc@nT4{hd#4gK_$6@4_**k~y z)zp88bN0?3|IvQLT*%QsiTDB(@?tp&0P`!?c=s*hPJH^A-!Sw*ZEyAsbKq0KFCuah zZ|KUmF4{mBIiY8N+Qr!I8{p#JAQ1KvF@O2aTScA?HJ^s|4NR?Sf69aEf6^|V4RC~) zU?$E(A?u*McUXG+O+RpipVmbl{r!A9hn{T%OXLrs$ZN3DbqoN%>h7He#4-FFpBs~U)P-v{UDC%hyHyWedyoE>4*M(Ty&jSzhg%{JGE`+y~YspPlDkk*6{RA*LEEj z`Rki5f71c&O}9bAe@o2$LnjG30HQbD-nPXa@fY1N3;;k)9H&SeYoFs_7y$jcUWwzc z>mPa|j2${>pt_3-gMn( z{@R-^|I8nL=;Y7*KXgx_Lu=Ii_zeSy{7tcz5Quw=Kqi2n9VuANmfHo1G*0?)#gT5YhJ! ztr&CohZev4e;t1Dkwl{J@i$*eF>Y1);tMi9^j|z7O!WPSwi4qG|Ik|Rar~j}x{h_9 zb*Bw;@~^x%}@`(4t4MM z^{<~}&M~0vwWGkGHzxoLTh^V5+A)a5 zxyHbTi|1jGgU>h%21{TFSPF6OfQX6v3b3QF6LbZGx)@*1pagIVat;U4o>9QWIBYR4 z1b`>zi#sunNX#<<@BpwQ&O4wtAba13#XMVao^o&(a?HehjYy2A6!Q-_EP+PF^;Eof ze@(@?&OdxHi0F@t`EJELk@QdFP2O@~@Fkv;^ABGf3Lb)g9tR8<6zl*z3Hlv{l9=CT z{_-Eb?-=NXd^{lTbAUzv_w?f<_xHSBU;KEFE8;?4jkx|0{Ld)p#P#6o+-9Ht*8t9d zQGgu>4x&tK#`a7P{^f#c9^9^W(`sf?bDD=enbM>_y_wTuM z0)SCU*5FjA5EJy+0cqAjmyrg@rh;U#vq%|C%Mp|bQwcun(uPJOo)}hO%|y3c$lqL#q7ShRqO1QWvLPksaj!NP1ETJymp6h*%K|EJdX9$3l7)# z;5cpPwgxU+|9;A^XLa1~<4D?F?iN)phMlR>aLF6Kj7l@ei=|yAd<&+pvOMQ2Y|O^6 zAdz0?_tgMwPky$ie|$a~MO0D-JTrG)wA9Ivck6t1X@lFvYKv)C2<0>2Oi%rtF^6s=^5u+hlXV3F1kMMkXU(_sCZjpj->~JZE!BM2T zzFhZkGd1K@{W!cv{I#8V9@tM0_Re2Grr?^n=7TBrdI6RVFFew01aHaS?+-%3v z%SsP8%j4}We-5l>)-M*4a+^)Yk(FF?AV2m+HYrX;GG6STm+@H6vemM`A4bY`^L(PL zUn8dn$>XUp z%Fg?7Z5U^0?9SoC$Ms^m9hOUeK9*{Ifp_3k^Q%woe{g$T_#~byJ~gCrYM*)*-%r7= znmFWj%TjL^_^&j*T2%n zqK{huRB|kvRF|Bl3CjEEM%gejkL&YBQcrGC^E{eLMmW-^qwuaE`>G`Bd3kf@hCR2` zbDw(8f4Yd|ZL&WHC%@esNxsr@{W#yE^SZeXkx?r9Q?T|lMWQ-q<&MeR#sr7z<>>fYfNSyBEuGboDQ#!{B-dhIlc*bla8>#a4=X zHeolpV~SNQnw#^yqm!%8-Q;=?7SvQ`MLlcw~{EIN6P(+7QC@^CdJS2=Rjd@AF4B=hX#l38?3$YqphRd3sFti|}M z(zE7UX#izAOAmS!g&GX%v}rLlLP zWu_KB>a^TNOVTbR-));RTaNT&T#i-^*aGPGk=Xt)kn3rsFA1ZfvOyJToHLHKMw^a5UHR{*q6m<7X zy}TYb{$T6oIbQAxn>G6Mlw=n@I`34L-SxpJ&uQlPPW3QZ1uOr&-`uX+nYP`q!H*c3 zuQQz#D>o@Zwlkb9Jw|bIe|l`j&%o`x+CD+EJZaqtHiu(h@?p5|x088WZ0k)qn@n1e zG;I=8_h-IV=OZN|8D8t=Ku)$N8Gbpdaa4Ccd>to7yxd&smE^0Duiw_qyhtba=?R8@ za$LRDTOY9QIQv^QWn-}v=sK2;QRZG-ms=Tvscb1wniplWXQ==S6>eykH&h+sUcz6QKw#~U7%m<_eo)S-W1$we`Y^1IHbM!c+%nCFYDkC z@3pO~tztQ(mE2Qxrmva0Z{4+fC<8WCCL`RCk&}XF&rahiR369CESTWCy0mWlwajMc z=4I@RNQ2K?)i0J|7~%QW^4nrCZ+CkJZr1`XOuJoIZZkS->*M@f^J=%8OqnviD7C`H zr)Ii#k|%?8e|B9fpG<;XgQu5k&_6@}ZsovM4;(k!*Eljfd7j(qcE##&i>8-LFIC5_ z4;FBHXmq8BE=IVm{WgTyGwM)I=DAlSsNmL06e^MP6JU>DEG)Yz*7tw6jxq)a@o+(h(#{we+p}_`qN@_+=cItrRZlT!&l#v z?<4xyKfUq%HE$0!KQ$Lf_U6z?o6>~aojpX=wSSDxllxZNYq3JIOENDrd*ZL6o4vf*8Ov<>x-gkCRkHx}u$)Rc22%72he`#x%*^Lahoz{h$nY2Hgxw?*WuwWKX zhG4QS%wuA_9_xFy(Ixv~x6ah@+hElD&oU~=e&$CjX_dX&QM9;9nyT!_!$n=hqoda@ zeFJLO$1IC?nwKic;k5Ls6;Xrj`Rp1{y++s3-5_T&<14qjPFM9jOZ@e04SCRvNu;kE zfA{HSlC+Q8^#Sg9wpE(fc}AAwGPvFQ06n<{W_BhQ5T2DQ-Sx?F+>TCrBV7A_(`L!? zY8{r^Y&PDnKs=f^o|y&*FX7-p0_Ogui(loq2 zDv+^_Ep(nA!Ej@XO?M8jT%T|8gF4gge{-l)zmJWRGZ~Y}jmlB7g z!}PIJg3Nk38!1ZLy}2sq$Dwzm@vxmilg+b3RPMAcE=SAYki1y2m53DKejCrVwRTA5 zO+IO6DoZvJTJA5ZbMt__Si@_*_7tnj#(7-=wVEFG*0c+@O^GzLNh>F1v%owgf6C%X z9{J6GDf)C3PNHo5Tm@R8@9N_;b8Te60pva#3lmxCdW7Ta(Zbf!AFJqJ|F zo>5-)GEaj}Mu*IRrd93g>3Y3hpQWx!Vmx|u>SSzQ0wvX$Gnv8DHA$S)ZGB3$etcPl zqw=UW^Z8-MbF`=e1{R}ui{QRVf6h^4^I$qm^7wJMlh{hiFY@gRh3Ro5o|m44(H1e;8*PZ|Ae| z)0eWGNi)7Fs%`G`JQ$IgJU+;)I8}@JLX*Su+Qgfu8sM{a$ajF8FL!5gRgZPXYQ&k=$zEk%Us7vUYh%2I$C!& zONOQGje9*?U)MVB9_JG+pWC)+{K;{(iC6jIx}OYTXl{z|*iK*je@%Rz-HjY2xrPy% zFHinp-kcg_jZ~zE#VtlT@@8mJO*YRtojroU+*}hQ9{Y=y%FeYMH@njYj^ZakX~yF29eSGq~Trl$k2UD#fPazM6Y-znq`d13zEcjalb%+zsbW+CJpL zdmVENQdd3fM$_Hof8dPn;MsO<+%-$@wKEi>naSPiem_(}5e+&YJJaNHt|qHOaTDz{ z+O64sZ^!jQRK03_7EINPf7CsehDFy)wgJoJFgY&H8%!m(dXDdBrO~Bwnue#*h`)R= zdJfO^+>IW#&e_o1P};q~rO^sPv; z>XtHv;i8+Df5;)q_)#Jlc-m}xm~MBSRH~`X`+ZxM_zExS-uCg6&FAwxt!HRFH{5#_ zDYJ=VPJ`v;Zee~q&9d>ZvPO_wmGKHnbz!Xwvak{xK zj(lnXZs}2#<%4z=Z?Zq8Zq*rmNj)lg?snvq=V zas$K#e=bWGfrgIgbk zhig1fAH(WWIy-qdPpUe9sTu}>)K12hrCr9BGg$;%G#<(8cw?~nFuu>p#xINKigUNz zMJYT5f5bfmO>jD`o~F7)>(Q9X=fieV`Rtl+&qtMB)EpfBeRH^7U)yzfnR@c3NYLw0 zZAQ_K!oiqyr)V+FnccX~R0;#fE9Hyl&No4}k49s4p~P`DYhaGc{b&e!2_942o+h`% zUMyrXxmmNjq{;sBlzUZ-aXpb0M5oH_a*=(ce|gmr`xaYQt4C$0ckX6>*7-WrR%kpO z4MRFCLuoq?r+2CLqEj2^P7yx2KWmlRK#sAT*+*}D==Lkx+^(DcY2WV&n?0}AJ-*9N za~PB4G1q3MeF?g!S4>o=^$ayZ*?D=~pYl9-9H*~tr%hOLr_Li8Fd=ScZcb*k0PDFC ze{IHG_DA80T%?`HM)uJxl`}xN9;LD|T_J0mzz7^M@J?%Id+h`Jxr*G-JEpKv3_mFi zb(mVqy@P6h^w?oJ-K9Qfel_pzXK$ z=CQx_@vPgJPObj_+>Sf{d^Mj1>de`NDI=L)N&4n(=d^Z6thh4plt9QAHSHP!Hr zs3m@XMYp{YvL%kaYqttV^uovfuw{?66fYNJWBc4y)hZl$30ZB~=SrPL^45LWmG8w2&9B1%(8bD)`7NBh7Ll{ZclugHuG>zq=j-wA zu?0tYv%XnZBWNs!jK?DE%wcywvU)k4W}2}o!;!C6lWTc*;mQrK^i@x*V6-Y@9dDE8 zG}-O8k+G%R+|DYz?k{_sb|C5xfBVOt+J2cOEQZm+x;6Z?J9jS)$41~y;)Q!r!uU4Q zB8_LM>MM`!Dc12R8}%c{*wyM~kv-f~b)6Kak*&a`x+;g~tik(HeNPfC)&0lo$kAz9 zQ2p#}PS#=HYU=UC*l}^Q+KaSRFQ@2e&z}4JK*wll2dqP{(WcSCd?DQ?f1B$*j8rld z?k1F6$r{_qtGA8Q>-~{!6vf)u1|?6ItNLHBUz;H#WjPN*{@8_s=0KA4eH& zyUtZ%y0f|=%jZ|!+m-W4f5`7wMV@TSY$unsHhbK5gJ*^kq^Tk87qDbEOHzmBdVhMt z-c^^g#cF)m*7=e>M)6@^HjigZeRah$a+~uQJr;bKcm23^Hu`h-l%}JT;{@ZY#h;Ep zVyjJiUeB$Ua{|NI*=FmYV3Kkb{y-gc>f7K&A&-3$2n}k+3 zAJ34q7*}ZbxB}E4PTLwE2kH*t$&q$3Sd4cd##P>ps@o_VoWyKt!*5qk4)iXy)$v>< zx#BqBHJ5f_W$MjKslxP9=EiBBU2c40e|_E>=`h_-XOn8NanVZ&iYuyCR`9ry7o`uR>u_gIwX+lkD`sXZ z4X(S(kr*dwr%tb9c#Ndc#SDRS#bsl+7u~-5b4ynjTVu8BXuk1Re0_f{)cxG=xAk;l zl#R1ngmzj_p5sj4>@U^~WO&I!*c!z;IqmS~am)_-u^T;tf6GuE^fI;26V%Ro`4!oV zySKMD3x0Pz=x)2!dY4$Z3-`0SDRlp_*fxV*?2%5h<1)BAGK?Hag zD8PI3;vvI(@40%_{{Ev=Iwhq;Rnw3RMbER=sj$U|IC?$K9s~2g>uP1N&F{!18jxXg+0s3O5f%ehf5IMqr&7CIbx&=jiSTv&VY5r&yZ~zy= zF!#}CvnNx>j4JMmk^aeOZ1v9GXzkC6cD1Y7?=uMxKDt)N%VQatChHI@`34FWyrc z0dQqHGR7HqWLPImw(QF6rtNyF>ZiSp$Z{HEe-C9@WSHyhkU(bW=b6G6l+d!|_%I!E z%?HMj6?}T+b=SM0=@l*BvU9NAl)4IX`kLo(gEYMY&irhoXj=kel`p;|WcLZ;VH6A$H6_#Su8X$rXkn{hD)ye=Ph0 z>kbD5#LJW0F=?W3+HNbSUtu~(PBBi#{*ovn02X*SLGN(2B9sj>(f98C{M=TEu+{F>;)FZf^7JEj4I`=Yt1;dqH59EM@ zOM5HWmBfoFo5+Z*=k4S=VYk?43 zza$Uhs%K@+Ao6DUj>`s9QwOmxy+|2JgH#enqOm0n(rY9+Y-Mg)TW3G4e7BXh?_+007xkosQ^{*QABSG0q8jo0rc-KV#g*M*V$^xCnSQ~?;6SnQqE{?5th&-f5yklr&+lUA1^rc zINWDXf)7iy;CV;~A(y7ng3x0A0?BY|xKH+u)GFlU?!Eo_DXVgJE5J?{xWpNUOlo(h z09PYZ7M$A;&OFY<>}%~FcqsBFT(=$hF?##u?R!m50Zn*)6l9Cei&2#Q9AYG!bCQeY zH=?XVx(GlUzdCEYe_6JL^=a!tev*^K-Y-jV@zx4q>_z9hDSA6CI$${fE#NGiXMxI{ zaobLfPaNVpsbUO9B`4qv@x`h^&qu;IHCu*5$i{ASifRK9>0KKESovIz8-*h%7eV(mbsq~-h^(fR@_l+WBNtO617B4S2edRC7zWp2qNe<8cQBy7$dhv?Ug8t2af zrtQJu-~pEj0e3>~X^eXhn%ki?T}@PFPE;9OH6>xhV=lf1c|_#s`XO` z6!&y0;B|7h46r(+c=>mxQ30G@aLnQquil7nEgjUrujmuhHXkCOejv4>Ee@vJ_<-bQ z7=l>c18C4GfA#+H^h!+K5hEW`u91&Z!wHHwAAD47>xnA3A0+yt(74lL3k;oYD`+PU z&=~SzAfc{6>z|A-Kfk93DSPa&$LH(!+tAY5t}*SG zMJ)FQf9fBv`(o#pQ+BzEvD~pHC~^k6b~3ViFdU>h6BJ~B+mtyC^VB!gs93-KKD8sN zAepK(T1V20D@Zv2keujJG*s!#lFPs`H;SU35y(W-Tf@)fotiY0d=-ojfyE-hn#caC zTCIMVdOoTu1rP81C|w58zDIf8F9|#=NSY(CouYY6`f*NU~WvQ7oSX zviSnmPEyek(zU=7>ZdsF#f_6f-$o|M<>#lIfS0?#&|uN??$*xbggQE5bo4MLbKiUD zu^!^L%Lj244yjO0yD3lx4o4u04*ribbQ1ws?hVms^T|ONz;DI0mH~GGUiZv%)!M{b ze@+oN%Rda5>egC0o!%km6*bWo%a ztAofLK2Jj+kkE(j8Q2g8Le@DhzhTnT+he`*YVyo0vjnSy>CdXl>E*wIabKxmOR|lK zT%{Ds=h5ZJpA=evNkh(a8=wAAYX2M>`4+cHL*>$;Mf%^5I()*PURor+DP4Q#_bI8#LEV zu>(fwU;R>Dz$OEV*xmDDb%xG~I5<+)W?0QEJu*tr?3(eHHeSu!4NL38e-{a2K{W1V z0bLbDL<(jP*UI`|dcT?Nt}{9luW02P^VsZ<9-DzoXQtZ_MG_g5Wtt0io4&lZRxX(uy!4y_IwhY)$M@yQ z-Z)uL35g6nJ#-HB>?CS=e+x8zQ*FQ!-1fB~lSbUF*TD7Nx$xe+ZT6e>(2PKYp|Ye2 z+?mpuqXa*crz9=)`MD3uRT~zvCuB4vgSw3en(Tj}o0Z_n+t#`OP!5!^Dp=D8-Y$MC zn1=>^UuMi>B_}=?ewaw1sxqX9al$jtD=8zCfNbLhqFz^VinW4re=@5>Qjnu+4wz)( zZzR1R6Nq}8vC#*`Tf&Q z`jrW6oW4_#nf+#82rLOv!rIQTw5cN~p~3L2B=MxpK>$pbh6gi;ngPXPFVDL7+?$9Z zHVq$BHiJZWNL@{Uf2Sve;E(mVr-{rY6tqoST!5W;41*5o=Hu=z&bV)~nQ6v&@nQ(C zvDQ`$fEu_+jqhMAK6oB9lO@;=$2-2W@Hu`OV-GkZm|Y|58sA$d8wEExQmjQ?`A~3J zTp2)?8e2gpO1j_no)sh`hpqu%u=X?9+p~w5AzU8ch7YIcf6YfGwXiA+Juzcg18&ix zoHErd`mnwb`!%0x zK|hwgSwy}qaNPI##l(*R%aGi-$M*+yg{e}~S|T8U;Y?!_n%E)?*QSoM^i z1m*s37a!^KdkP)LXyf9~V*(b)hR|j;XFXR$%~fc3Bd3kZEm7}WMm7q>v&Pwl z_iYgQ{akb=tAw zNuF{lCn*%a8Q^9EG$b%aqt~J+lT5Z%uj*LhF(A0^YONZTIXIOu#sYhS!#+S2e^lZk z_wT7vrbCOeoV`RiUnWcI3_%~;l4F*{fz6JMJlZH_aW;Bg-)vgvWQRq<79ysdB)vJG3ZrE4;lvPz9Sf^3@@9w9Qb9NtYrcE&_JlD?h!M zCC);UQr2%(?*UR|ZZv@&J27HLe=ucOxN{=-zz;wb*k`$-?KDe%7f4>J>px2vq56jp zYgATFN=Miq?s9qMJ)+*raOwz$hS2 zLSu|}8#~Xh;<_*o8SpwIkva_T0H`u7Rjsu(TZ!DKU0491f#oK>JOhd39ePO^ijUG? z&mZez571U9i;Vsqz2nj>{n;*4`~XvnTu>{g^?c*VDu*z?w`dr3&#vsXTv{+*w)4}@ zZh7*FOnN_}s0D3@=*awDe}cmxm`(X&;FHKd&n%Fzs984Gws@L6GPJzs+J!j1fEdQG zQ#cElv?5I6yoeTx=^C$)cJ%~MOfBFyUmG@_$ltJVw0*!(T2vfcXkJpOahm={I!<1= zrS18AxKdq3=~M~3RBZFjN5H5!*)}^>a##6Hn&e>EIJ$fZ{zN?Oe=Tft{O9;kuw-lU z8%OiAr=U$JV3bZj1Pd5)(QU)H2xjDO0P)kVqLjDS0gHio+v&ZdKOS)BLvR3igIRox zCK8Z=oZx?D>dT~pmeIM3ncd)#|Kg^Nz_+qZ! znvfcf0!-jQGVb%@GYTY^TU$~ zu-*`wg06;ph?*=4ChwbNTPFJ}n&V~JEJlh`%}zA|G2FXEs{KJ_nkb!06c7`N~0?SXVDdCtV-k zm+iu80rfU6lRyZKkVU^CL_^uabg%UworS@zj4Fdql*_v%3m1HRVt#Zp90)Q~EXFsn z58`x)Lh3}mn3TcY;Z(85QVS(PK4htj4wePOTHX#df6O-K{mZn+vb6r~Mqd;!p~@pcio| z4)7-$umF#%%|*}AdzU=|VI*)jzO*MnEduHd7<(BUadiPVnk8~nh`pB7pOadr{gvF^ z?xkaQe}qj$f|cJ$#%+Er2kpN;(Z@M$6uP6Gy2yyItvH+~in7VxK;Z~b!TcZ;*;9fskR|l_Jw` zaMYcx{Ke-(sP3bZv4{SN3Q;v@Ef?=V$^uKnxv?blwY{d;H@HaFR;jyYShjfgphCoz za(?y4CVxlLFBt)HTgg`k%{84u3WjtFjEi(aF^83l6_TKg@2M!VylKJ1RiTGFOlUPe z*w)2Ki2Zd-%Td&-k5BsTJuhI!Y(s(6hTBOGee#5zk$9dQ&y-3wI)7)iWX`5f^6Jhi z`&eZwjcTb)e&H@5L?*UYDA_s4Ww&^s>1@t}WvS;S6TcH%BjSk#u?- z(Vz~KBO`Zpq6VaOnnJ0LYr&F^5%lpe(MjPMe`F#3@fHf%91@9(^rP!bPMKPwlv&KY8Nm5nvMq*#nTZh}|N=D^hD9ZuKR@bi48 zwAY*OQ0*Taw^3Qef28`%Aj)FUZp1u}hQA766~J8Zw^>3KNv(!<)EhOcsBAOQ8*Q z)!h86bEZ)=sg6Uw!C2wfC+rX6xo4Nf8-J*NeBTJ3pu!|xQ z*Whey&}s!)M_DJ+mxsr;xIjr2Ew@NO(}elEL_NGDKU!q9NOX>>HB$pglN+AV)AHt+ z)XduH!6JgWB97rQO6e9{HCcDu!vFf8GH~Sov4Q`s{@*!T=l|qrdHeqF9Ie9q4=nBf zCqZlbFZBn=UH<8{#rSWn{yPIsjeq~qhcn#x5BHY*)lt0R`^9mLCeUALBm9?PZvPRA z83bZFc+92Z56ip$>A~6U&peJ9aZ8dV@?Sb}{y(f>?!WN5e;p>sgD3SrEDghOA;K`s z7d8B5>3<4wv1b@&`e!2N|N3K)7%(|R`llCX7_O)O3n$Dl+)rWZ-;qi%RDb@_IoQ9Z zh3T)A$`T%a#P8PCG&QSC3{e1sQ>GeO5bY%E0f&S^o|H}WT>F)26R?P0c=B2p?WgXNA+#<(G z1iCW}f;uJrG8|7#9;!m}5P#1=t|uP;nVi;xlx1#_KWaC~8|95Zr3`4FF_k85MQw;b z%2l(>nST;A#iCbjMv~#Yl9JF;ro`PM#KTtgdZ?Ge{?bK04wPb?BVhh9s>jvZsw!StZiQ=$^!`|6uE{KQyyDG_1j9v5md^s3-1lL9*_;QzhfaDRlUa+<} z2H;f|1WICBn~0hu`+v!}vRnJ{>#ZMl4Zoc;1V)c*;r8^o+CG{AXKt#qfm$P(vSMy8 zbR`!AdC823#OK1+FBD^p?44u z?6dYN4s2lm4%dB$xEV|4Juh!#sPkhovP#k*{Aw=VXk-`3j6Ts^`5P?S-v-y`g_55V zjfX-&tid$kl7GGfQ|YIZlU&a@taym)5^9g4Wpy8b@u-imXZSgOMF@A@se)jK9W_0; zc1fm8GJkza0hDZE1Ii`NO2xz&#^sL*Z{}jvt}gU~=BjAR>as}vs9%!)CaAU4J9GfF zkvL*5!>#mZN@D3ypr99rItn0f!9C|`hRL@a3K{^)DTug_6N^e)_PM7 zIN`;{81Z9hslF~}JJ{;&QAkZeRj-s;sOrZax2PJ9hPZ3p&nYWJuKwh&=2d*FQ!5v8 z%9|iX!ghM{7uxQ~{~j+3tsH0BkyAMe;bSX36dw&^$B-(Ja}RYKkI1={Vx;ek-$4iq z*HQp~!+*u_r!=Esy-83^#^^K+wnOP4IH>lqXCVC1H5h+IgX_tK;KCqU#QBVRfC$h7 zxHJ@zUcmeI%|0zlwpP6{yX~-|0txV$_`d*PV(~%(f}Xdq9RNLk8i58WGBHF@G0{Ua zm7U%EJoP!}Vqox^?bqsmGYf?L@X0e&p|EX>XAj!UgE(pWt0Kf4}W}vSh^67 zw~GQs^EnFTYV(2o2GHJdMp6<~>&Q=R+f~Rm_23A$;?R*T50@Gd%09%^L4-wY<$SHz zlYcYRV8(a(SW(M3dr7!<}a|Jw;`xM(+-yX)7S<$rx1JuU;zB}fWS?Z^2*WTf3i+g3Bn}1e6 z`FfP{5We7k)E`V1doHPh=3QK7mOGzzcsBA!GQKsToaIPhha;D*LnY1vIv0l$m1FmP zUwz-c3oNdazp+W|Hya^-X*q7VUK7Ja`N*K-F!yfERVtudjkw8k?9dk-&4PO-;FHVq zXRZyB&0eoP6?a6N2AM<2WN)94_o;wNJ1O+GXcJt*}%?P>A%yFSbg9Z;C8wn*Mk zB*@PgUfhF6r@Y0k9;k);a1?>gM0hn4uW;I%o{rL(|_JwMlT(r1Z8Y2F%ILtJY^AZbyl^7CYA6&Ra> zI$wYS%tbk+edH)9A!f1BoB)p^op^v{1d>qxGpetonjEX6GJSwCPd4E#KEkEjBb17Uqvv0S2 zqVaY^D0KFW-3pG8@}QTI7(evXb?-$y0&h(molG+5F5dr2o%zIAP+NxK2M)s$9Oc#8ju%*vM#5e2}jq8s8(stlh4 z3Ty~Tc!R{eEVc<5%(hJ!n_!4~?r+K(wtK#>l74gquSV|kVHhKu z@_6cD;-hw{Fqds>K*vYBPQVo*UgkH~#FE=!jtsd^9Z(l=$gxE#d9ZJ>&2i$!*uX2; zcmP|)Vbs6B_e@O0^@s4?JHb~MzHoZc9R#RdA)3WS6n_`zA$s-dhz1TpQC|)|@nI3w zxX0CWs&Dr>-Q?`>rwt<_cMFx`H?X1etVa()Z|L$PRf((-sDmnf#F0 zOWc#JuaeY$(7KQg!0wT_{RSLMPJCyx0Zxn?RZDtvfQAN?N-S8YUQQ=ilAkzmPqk|B z^OJi?Wq*;q|0Mzs^Z}>sa*X@j%N!^V$7f#*DKLP#pp^(uhX9aEFrS%V1p-AL1#e=b z3j~0oTkU{1>gCkUktgQ2E74J(`N)o`8nGjlLB&8)>%G z8#zZ>Oe#JFRJ&w?sqBmF!Z@&o;M(rHJm{@xz<)Kyw?1W6hiHch0cbOL_0y^2Y*z=x zD-%HOHpx7Hd?WS8+_;wATXDBo`536Gv#~>9EjIWA>1oFG3B#AGkTOHQ?s0XQE0P_Z zG0BZ$_g*4n2iXKJ$yc1J49^T1Br0DVyiPrb1n{arn~efENI&@LCr7F!D?iZV>Nl}T z6MuxcFvpH@f_J`8 ziYpVpbv(|{8mza;hJ%)hdXZGS?^>5@6@Lf|fZNJ@DhicK1pXTT_@S`{S#0nFX1Z$Q zsVU+^Aj|o{Desw*MS&@#k8u7QcuS9*J$C#}SX%;9JevJF>!rb9g01{%G1%)$RAKAmDPR;p=%Q4-N_0z9Z3pnvlz zKX>#<(TDn;@j{R+FCwu9ZPxrS#S;lWR-d-b4a()&b<+j9H*~2LUeWOv3GEcJpUlInnAbV}vj`WL~z= z=_!_>?Aw3h>P_1HmBF1;bV{u5)X#aft5J)Fi8)FVeF%a{cD~Ralh0RWjBxvIy9=RiDq>+cobu1@_?+(3(xm=aW{RqWskx2UiOn$C3o-| zZk3h1cbpaQMadNs5WsjqErc+EvGf2&-Uf8b9}`1Q6Am6%=N%&QvJnW+1~^3{mz+O0 zBfOKo-GYLHBKQV8LHREW`+q(zPcR&-hMjIdCK;{om|?Wtx6$sLVPS6VP0>+V@6Wsk zbV~a!gTS{EmRFh~u((l@TK<%qp^quGVU&0b7|BVoKBfzSEi7$bWvIM%xvS*Q@)VYg zBy4&?;=eD0M546HH_sO6@ce zdd6%HOG0Clhe~6RJ=9#C-R}m}WVpU_z2s#gl$3Q8rnR#Q4x4 zM?{POpMV_Mdu~Q1o1x2hy{&wpC|T^kdZXdImEA!X#b+CZ=BDnw^ARVZ@w0kfl&yf8 zJ$OKd=OoJw=zl%wn?j5Ltz(NaNJv^Y_G^;a@(h@6RUah9I#zH`IA2!AI}6H=lS>4H zf#P3nrSp?eLr+VXTl!t(tz;YK=KgyQ`S{^81!Jq|ZiBX~1l2tDG+Ys2%~fcwr|fmP zR}MKA?UMw%9AgFOsW$aYJ_1nz*THP-{H9P;A4nB0DSwEqX_^>|c-d)=2;yFLwP$-^ zMwXD{h@rna2ZG=qBm+O99jFPv@1c5F(Hxq^jEsPdxb@#{!u zrqY8Ag=g7N3r;cqYwpksml4h+kb2wmCngpQxO=8`O%xvo#DAnk-iw2e?oj!Ch}6aJ zR4Vb9L4QTKwzZQ-ua<@&4fA4;o5aTcM9qs(;ur^=S6rkejq_Sn|NHj51OtV2W0} zYcKpdxJLg4A{p@M?WBb0|BOm~5rPh+x5s2YuPzBqtPZDRVx=h?&W?Dgj%tyYZPs98 zU!Y-@0LQkM_a}m-u}0(bS-ED8-p!vgI&8O;j)N#28ZuPNfr#FK-lQ|xIufO zw3!#zoPuV7@kFDF!|J;0!3;SwP2ueBDZ%1TytnGR4o!C5AsC;D+0h)I+^D*>c6nfb zrm%ZJgm1(KxY*YDiZvXs7eel?`gH`;IuTuHhKEp zFQH@n;O9oL4jlj8Fzt$G8{n`QcXmsg-kEe6a-cloC~C8&;>u0EcWH2W=NpXA(3v?5 z)TSGXZnrWGFK6Up(8269IQVhNDw-iN5{YsH(R5`>BdRSg~>oxcwVhz*jpj(<}~ z-xxv=2TFazeBI_Zh8TX?6wFo1q4Wi=lv;63h(hBr8Ycd|AWsvK4Zgi41S-cF)m;KC zdxu(38B9Vy)<;KfsMWy(8G=Jh2)U0_UdEEC&yu}Ny+S}uA0YB3UU8A9N39K`I zmKUo}U*chW)7yj{W@Vnv>%DBJrDSeC`*{NGXOpD3h18g9qAxOoi_O^sg7tSUlOEG& z$x4a#16!+jxPgzhfs&6z2M)Jv#9GtAbiXxhiKBOnw~ns<6uFD$Sh3KOUVlua&`+m^ z-1V)apg`w}Jjbsz<(;G{xJAa^eRs`r9=~WeUsPfs~H6{A9+$FX5PP zD$Z{KvoXS>A60r~wga0A)_>Z39^WOQfKmcHu{WJNQ<);Q!>e%%?De?vc0X`0l(xt? zYMp*3(YZO$02J}#eEI2;jk4(0S7=E!f>Yyg0QVt=8O7xT{#pl=KS zETrz{%#mXPn2h2P4VA}BR(3&it&?~6-YCY?;y1j09Q4&&-lF?Pl%9=?iG#IkTkTcY zW!I^Lw>?c6312(D9}>DOYv~(6EaK-mi~#^$y`&^l8OJ{>HVaTjRR2iz=|0}%LV0wQ zz9e`q_gw9+88vk927efjo5wQBe3#I;rnAU22=Vh{Bo)wrKk;|Q+jqu0LOB<#AtheZ zvQQm9EznMnOFDwOEtWaigr56>TwcYeZiaTsDu`tUc^a zWMmgFq}mfrtb{tDi0TlsQ%61CrPKWR5=2cog(;Ukb=31zTz^3m879JOG)8rU7$`8Q zoxo!>4D3OEV}+mUdv^}loxxF|0OQl>aC|`NSR_)3%ugID>wrqMY7#kvqUx6W17yMn z_PpgtxAW|;I55XZq-7n|dm#d@2eFs)zL1=`aT!#63vwDVe(ne;`AI@lZ{IaIba;~7 zj$}@hlpy>xt$!gaDO~$whx#J}RCo2KgP2J%oBJC2`PP<^VVU8X&Y@<7u)loIu-5v{-#tj6c^<_2XvoVNMNig3b6*dk+p&xb2dT5V7fjjm! zakc6`7f5I~#TB30h^oqvHEFGlal&>M_WYK10gmwlH{x&i;%EVBlQYeK~KF{5zo zjejcqp3g^_q;5$}MxU*kq-xBJ^%k4r6`()yVn-W4wLs(;+ z9fDV0pQ+;Hofz+VvA3;P^cfvHMEBtIz8wuFXvw->2|hcWP-Z>5duyJ!C9~^$e0`I< zKLRzld9pmSAts+**qLKPj6Bm+f$1z;46U-CcWH^}tsiuX4}}qQL5E^2m1$f3%#w3k zp|mpfCR1mt-1&Tm8K)2WOE(%6e1BBn6IOm3SE#-(_MVT;^VeAQ=61 z@`o|HUOVp%Ugh0#@QD!0i~g9#yFbwaw%mi}+)4|1Q5_1){>Bd;)_g-{)13RdaEKW=vmYmsie0(fYHUe23tT zaKhJ7Dyn&+-oCV4w5z^_)TnotZOxn5Q>WNSACA?>{PPwkQ+|TNwDEON`OG!hh%OBWCXgl13#N-Y49$y6yP%LSn}vIyJ313=- zRaMU(o~Sn2>9L9T({_`}u4m>?z-if-fzPvv3^^sJY200_ZH0KbCyyG1&~WHcCfS0M z5>d&pJf1H4^IGmM@4U_dop`Q=F9SOpqH3~I`jI{$@jlzlyX-IWIx%G^ySq5^G%fW( zf9-E?#68#Ad_HwOXMZg$aRJt~&c$H+t7b!YAHemoz~?pcPsSCnIcAhk7I={WfcD67O8?K_2Y(V z`~cmGZDyB?XH0tC-@}y1A1sL9eI81$w9hVaZ&#ujwSQ3hq!_Mz;O8aKinC2U6JD;X z(!@X{UE18C{<;|SOye%6qd%$IP*CuM*1dUL#j7%%J-lDwJ{+J*ew$XgnA zN#+rh6_i(#>x^edBozS)t30=?xAgPFL7~>&9)ASRy%G<94dXd6)Cbzm)m424>5bXq zB@_G4a2c|*>y62AQ}j#IZPOg8=fk`w9OB8^)c6hJZTv1%nLm|rZnNWRJQ?nzKM3&QnVcn4vX3Ky(@1+7*3Ax$%@Hpp=dSlJ8^Bb z^nYq30T=2)$&P!Fo~CP z>7tmy9#>=}B0M+W_%n$PM1B@*)XN%edw+^Y)i0_qEja)pU0u|~ksoFzvM=#9ghHA) zh|=9udl9TeZ)>Xou158kUJ$>8VxNw;;q@WTwfLwkKNPPQr|87cOXsz75R5};6in0uBOZYoyQ{u^J3vF3c+aiYd+IjGbE#j^Y(ldJYxoOvrtABCn z-SW-l%N1U{Ck3oq#qk7#0q%{7TT?JxX}YnSQzCU{K1?ClXX+z0B6b5*{I<-&UZdWc ztk5d&vwHvFdPu}s4nHS%Gj(z0{X0^(+=iEP5Ze#b%$QzLzB9c-%URV8ECu0R4elX7 z1}_pa+wW*Wy$$$kioX9&a0O$qWq&=Z&vUQ&*Ed;2GP{;ujqouGo;|}lMMZv?nPMt&11+i+LRT4{NhhYF>k5Uq9kxV_`l5R&IqN2-*+3&v$e z#PxG62Gga6FhdV=n!uB*8iF>!o$()9nnN~7&gQVty&uq2x z$Bu$*4x3LfW1g(wx9&AIVplnao$!-vekz`9qUglo%zbkFn@nBO_(2hG(G3#6GHBTO zr=+m`_bq0m)}AjEWG_1rcmbadUFdua8L1~;y0|+|TdyG0_1< ztL^wq$j^S9fFwripvcchWv-8>u3)`h7o~ppuh|fwi8%437|Li_aNJFpS5YU%Q~`An zwr6m$qF{MN^M5uYA2+AR{Z!mPTtB>@v3cs{CZA^#c)4|sM`URD6qW{4Ve!k*p0j4r z+-X>y2B;%cDzuq znADlO(zoV?ry&X^lnt+|D_g0UB%jws-NQAPUpb-NjDOYa;tPeh=dpP28BSHvb0UIt zWMO+c6M2Ts@AuVHHOOezy2jf4aTH3%;`IbVy)n0TEu82^y2LHbV`n+VTOUJVXK!1> z!}};E4D*R5LuyF-Q=cy3<8BXG72?Ug2d@`<97xF@ec2~S*3v8E!6vf{ zfwVl#j(-Q`X8JEoP|rbqlXuvi;7yj{P}XF4YU(S&t^S<#8hL40LHFJ-Z|RqPwbbi1 zd#$a0>+k}fLW{Z%*iFXC&BpmpZ5r>IlEc*Pu=BnN}@NIor1jFajt!fcVvzG)tu6)6kGDl zF(@-$G3Kc!xSr7Il4IsuItk%Tio@=5o$S}MDoJsTX3CxshB?j7zniTt_^9&T+qwjK zcYnDTZEI<*@qUwGRcIXXZqxB1*xY)$eoUYjC-VM8Ug@Xv=QBKgT9DddP^OM`7iV`b zqtuj26g<7gHRa`8??%eiL8VBbo!@Y{UV*bsrr&!>haVL^TIw zUXbB3oP#Z#(BspxZG>29!M9Hzoy!f6%zwM)2h*by+!vj;eO!_gvEsTEIQleR+I3R% zPqpYhY$lFi)wsbp>b&fc^0px6JXp$vSCzr}$J#0Ha0)ITco&+doM(1^Gc$L-yt(tE zQ@DDFcynv7Im7UtL2h^CJlc8Cc_!3$UjdOhm)55auGM9@Y<(8CLrT|BdcXVRMSmHg zy+;q^o&Cux&HhdMdS_RWOt_HQd@ewz)S8l^Wt~gJowucv9_+BrAO z5=FH;8}J@Z75%WDedoBE(2=o2=+B&3Lv4^6Eab!sS+wuSUY7h6-C*3@9$8!&RJAXs zBC$#7aT2%y5*HLZWVmt9-JFu?(tmH}iKgY9I4j-dbv0N1GU*~sJ{s>a$?J9D+SPlQ zjB$?Mdr-Wv=hdB)=(?M?H_xt$a9w=weStG=+a0&A#%kp?pY!xZ@jUL%GfKLV!>MJS zb)@3V^`?BXp%|V{oZ^!ayfJiZ<*U$$YEYfY&%zntivQe6dmM2QXi$HzjeiAykJgBQ zHNNL!0Hy!uxQ`6@5;(_C^SD;hK;VXW7q}zLfM|e}hf661prBX@C^8rp_El;%SR*b( z1_N#scS0zKJjKV0Gea~AEeglu;@n}$LP(X;ZK(jOvpc~{!T@3@=TmMK8lvyT?! zdggPLp9<0JZ+4^!Lli7_h6KV3eLE+G3m%@UG7R{uGeil#5@A)VMfY&h(-KQwb;;_xi@dc+@A|FOU1q8|?z9sz6QejekJ10XGbJ$@3Yu z<2%OO_194>_T+u^2=aMzpWuNv?8h6KD;#)8CT%iAs-j z$}JR;D`e8to;m2Bfsd7qh~DgQ#!pL$NBqlZ9=ecgv*fwfk&$I)7<5WK|gvy}oZJ-M{HlAD%Yt>}7jDD@B4anzRi7!0pP)7mRKF zxO_l6tJ0IF?Zo7<^%k0BU4B&Df9BhH;W9^jKETC$!mGV8(U~87WCueBF~KO4RrG&VTM?gEvrgCLCt3AE3^| zn1<|p#iP>)>lG7~kRp4dj73-$pXa^VMD=o?Facxkv$25s$+89DpomHWNKF!vu)gaK zUQwc7M&g6T%U!o@km!-b$%*cQ6)ZXr0K#?7qy^Eq(s9JHCFh0$h$J|M(x5LYw&BtE zs0)2qok--yhJVz>>*prsfE;aEggk=gTqz^~kRq#1vF(;ckQMIhYQD9%xTrnEDX=F_ z1w~=$H_^nTuvMYVqgK)*8ty%-*z()4&1&V{__;^fTg%k$!}FDfqBomDdp4)Kix*RM zTzF!bTyZ&uz)#*jVN|G9rDm)p4{`<-@8j5-)FhLThJVHDrE?YeeEWqjs0rw^W-_8e z#Ti^%Z-_kK!`r5kty1JCW}B=`nGTbz*47U%AJq%I-2!`bBYw zhZ$EPZhu{~iExDHO`K)4=ZbK*(@wJzdVhSp6Id>CVD^tDDqgN~xofy9j8cAG17nUS zdwNR=+vVJ=8}Ws#15LA*(2MEfEg1%<}Eqx7Q}Q$e0;>15WcO=0z0E?MJYmvWRt z0YB~wR+=;7AT_H`Sz#?l(eL4;t^4f}O$`1-y2m$<$a%ND@?>)clGMT{VnU`Jz27f| zgMZI^SG5F$oU8Q?8K}8vOysiRJ6U$L=$DW(o^N+g+`{aAs%P&@DZFwVy_2(!rHMCz z$j4=R$?v?=*r;DpI)woF+0AL7I+h#-jYUIyq~xpoTnTT81bcLe^V*EbzzRh1V8xlx z_28lwqkJz{S7q}@K1ZR#@%);Zo2m!(yMJk%UD(*LIw@=oc|R98=jpZV-KJvU{M6^i z4H1(((}xPUhs-Dh0FW%*i}CQV+YyfB_0)(fT%qLS;Lp6KK_EAg_) zi@535kMn!KIEl#c?#{QKb=fj;D<gy#RA4-SJ9E&+wi z=XH^)XloUTeoK|ibi3M;;-wRaC`(t*Os zb(#zQyvNJ4cw9WpziJb8Ia_rU8!vyGg`4*&KQ!sT?TntcQGZ3@3Nl=~w)ZN!^^&wb z#HI)&WxMT_XPh1FTm(=1X@3M2@Nlo~G3~im@9mlB3rCmMi$!UFH+6ONqoIPH-%%yA zsCoDt(z#zUNsvh@U(q7ZYv}e=-<}iE9U`y4S^)-Hg?=Q==X6~~`K__4mCB>6yCX)ii7NrdnFxmS^=*H;>!t`eaeEPO$3EW(n+>kS z^@{E%HmmY=qT{E7i+><6Fi~WtyFO&B^hj8Hhoeu@65%mXo}Jzvk>U&U9OWtG;HRFd zYE}gEnkI!F$%Gob2?L0Gr}FECb5HTE_A;{s+8F2Xf>`f^TFJ`kJ@@sl9^b3G1$c5j zdAXqnrpg+m^Wg7(9rkP4UvoDOWSD-Wtz*U)S*7#*)&}lnEPu|s9+gLaqHd)~T;3FT zUoQ#$!P8SkQL8HYm~(ob*}~R7Js((CIW+=4VqPz5lKO_e(+ttt(Ml zAt<%?&FHVjdr#k=M@@3RI;g!%p3lp0D=l?QN`#(y9k4>Fun!rix(Wgv52ME_HV}d> zek_9Rajb|xlYiKc+J|7@g@?Y4>z>O>|J3(AH)G*l>GgHn(M@c5jnmCpRM8BcZkpE( zO4=j?S1zy0dm`koq;>E$oph3%R?X%VvJUsjmSW~P=<9jZlZ)lP*{TUOW2=p!#?+`J zuZ>@kxh?fdPgvs~w*=qZ2XipE`GqvQxk&IcW*x)rDt|$klCW|dc`f1Xo)~qiy-BM* zxn~>$!WDZ~`#SS3V}iK&uyyL2+egU7VPfFUoT|%8U*mP9QF%Iz;C}ZK$@%o_uK0m) zKVzr-U0Ll;iQW|_-l>A$dW^`-^c?%=+i`;a=qXSbBeA)>S+)RG1%ED^`Z>C2ebrIo zwvwC+QGdc-n+m`iTt{2nX_eYjd7y>#8JmW8#%9T;u=Cqb>Ug)%CbGCs2XoQma+Qg% z9>(^SUihh1oO0sm!lwjVW@KRSIg{j9)%qlPonL1Yk9Chtrh_flt{v-8w%EDl~)sD0pETjSN z0)r`k>XS(1%9yR{NQ}$anZvC(;G!Qs`ty(g7k5A8=J z=*i&DPm9m@rdtz+E|_b3vo(5^ua65qx_=#U-h#FhsXIf9^P|M=cXJ){H)CNim4f&| zO_IuAlm0%Z**x9$IKIB`w=L|}_s(4RC8Mv}o}S<7^xT!kDn$d+o@RwY>9!Q|3{RAk zHv@Cc&X=l^ww@sDyQ`dJ{weoS*gwylvBe$f`sw)|sCn7UvwBf;-hF?@>C=Hj=YQ~w z3o|C%?RKW@b$nq=uYU5l7TYwsD{uAl35ruC6U5+N*i<|vtwkn}4HdLt`}FBLCnx2E zuDJ9#6DP{r-zk~Afp^j7`yYRiL5X1o$45HJ1DfY_)<;58_jkTlWdLM$KUY|K)U(Ft zO>IozP?Z1a+fH`Iq1_a32q}TJqkpHb3PB&rzIxnU{uF{C9Q9typZJwwAI>c+KKBV1 zcQ|euINk`?`yD>E3B~g%)q@*s$@oEtd;B8PxFIeV1};=?hG|VG-7ajG{2itZ@i^ZC zW0Q8Gth;gRqIVZ5kHoO!;e50U9*dwlGr3ezDJP#Kn-(g{;ue4SP~sdcEr0ga10)Pv zfc4+KR{p2e%Io#F(+U9mApn@)_8sXe;J8K=M?VSDFpy?R*j$)$+35CHb$D9HdF4{%5W`F%Ri0C4w1i7+|ww^PbjtAKj({cAO(QrGk5OufG& zSODORA`$&67Oe)oN&q-2HwZzN3KXqJ&_CWBkYSfPKnDX3vFP;NLR<{Oj93 zMwX7wC-B?p1OO;7$M@CjX!de+ngJV-{iF2?TmaBle&?NR=N*0PdVjR(rR-zHyn&Pl zpo9yC0dNEA20#Y*1;{T%tFV+=y`D*fi#DM`qFm`5T=d>>5vlKTAV`OZf?9+3p8N)Q zX~hGfLb$>rTmdqJjW`QJx)<0e6}RXZmkLq6s zvSZ$7qxp`{<4Rc)swgxX4_9iv=C^2oW#kJm+VxT=4HxzDCVz{9jo`HK7c;D%0Jj6! zy;vPajaX8u=mx5X$6H96j@PxB!7G#AmUo-e7mtX6#sf-)BAYsGUUbnb3{WrDSq}nz z1`F82 zb|n`ZfXnT-dw&n`fQSQyW?u_X0Z4Xu1jWy~;NN-MNP=V;fNHcA;N3y$LJ^KgMrHXv z?2T9v!y_0hS`8oqaMD$P_)T3rU#b7tFFFf|;sqOpLMfyjyZFND2j14+q*dkxN1eGU z)FSF6MnX&pmF~E{e-7L}O4R5(&g9Mrz>C=P0=#3KEPvp?<23BZ7C!35 zZ1x8Ikc-gvLKj?l)(hFSy@Gl66zmeUWalRpK)f?QqgPhef&BT#DByttBna%0{gfa- zp2ICa_K^4J49jddd_vi?K-kfsFOg(4SqRH45SFZ0)`ZqU3oS@RrZYZ_YN@rQUDy)g zBD@IFmVaR~^v43a>2@z%A`KUarGLavv5sef(`zX(t-`H?$1Scx2_EF1%5 z`iOUNUUdby@+9ohVYA`K=k-lkqd>5c2$!6+!G9{UENu%5Z7U790NPJAyR^JzbcL02 z1)pvd-{8Q3_*?`oi@^fBJ>pQD7x`$DH7O2%mXW?;2)W=h4Eon^V1;2kpb-lX{DIRU z4f311|Kd&^;q|^e)n<8?&Pc2ENFVl4@AM7Sh$RpZzy4avUwa4(KPU@-1{E%04HzJh z41ccFEDl?5tSxi*Z#ya(sCSG2l?R!_p0EY1=GolgQjI(T`)A(rCV%lxSfV4A53D$W zBj!u~5}kWG74lbGQ*SMRqU!1a93se6-PH@sYvB>kv7*DEF}R}2-YR7H`**(J=)A=$ zKzaZ;9`n^f$#3ea4+@HW!A85@M7yzim4D&to6zW+(Rg1>QAnNz4nP|6JScund?kkw zEHxhBnX*Wnv=m&fp#UKMsRP|$XF=0APdMzQecNLpcwNB#>(AxapDWqP&VO(WZrKU^ zVjNr|5iXf1*as$NA*hCvP@1r+@m}>yM|1s9-d81B6<$~)7A%+bTvBm5D89U-hkpde zOmd?Q2{=Nw=YtXnY>AHfx7bdz>>`bT^^W=9;W7Ws3xz831rRlIot=N@jVoK!7_LnI zGsd6%VCSRz|*GD%)h zr?30~%M31kDPU6x!%kP(cAk0CJdn8~(1zf7|BA9yTaZflZ~sHl3KeBjX4?pMU$KZR0|j z@V;W~C% z1}Z&at*Zi1=F4u@TFoW3_kWhnkQ}NU|AlAFLUIR+5(p(|j`fr61+mBaZQdU|I~z@QVEZv9 zQQke$*rT)Z#r}VJjDINlU8iQeoOHtr1_Q$o@`Vk{(G=U4Zf$FBH^yj%s}wb3Xz!I? z(XTnNX~pyyLWbqdr9dLK)Ya-hlcK?jK|2 z;>u#jr^P;Gsh%gP!Y6f+iX#`O&3@XCe6Y>5T7H`xk^n_Oy1&?#0sMcv{uvMtereD4 z|Ihz__`>kRHxEC~f^TeGj=X>@q4-KC-HK{9=tJ`e2wJpc4!G`U| zD)uTGTSeI}^-wtE6@P6i=9(G(TwuQedozj+#Y`5N#U!*%$ zfqiCY08U4KQGR$?2RDk9H(65tKjV=~`55!?6BFN9A&tMeC)Xz!!M=1^jZNSNy-!P}34aLHhkjK@8~sHw5r{kY_Ii2SxsRLazsk~dvXUz2J6>z4tY;fLm8tC9p+L|>@mYAFgCHRuoU5nK~@5@tu8;kS%xJ0pA484CK=6a8O1?9q=8)=?vWX|jaD(u|O5M8-)!3{nxGOkNSX9a&DP6eFp-6eaecCm`EhO}eq z(Hbp*2OGUYRK(Hd2>>mn%r7TM+8=>0H<8B&l@)x{F0Cb^hg8xJg#(`Dw}xWth+6@H zAXQsF&uAMM6kFDOUROmi6F9*aO~M{QP1ix^*X+7n@ePg3nmM?}K{$+wMSN~$$VN(-)W8}+e&cgCj=l#S>L;|;s z>#+>Xk`AJJ^fV|tYF$=>m(ALWdaFJzf^MBSNO*NMaHvKfWvcGkM!O|YY|+nx1VypR zX1P?hAAXQC%U&G00v1<}Cz&-_aiL_$F{*lb1b%;ohyO* z0^FA$_7eJ;=QqB8VmY|r z>t=t3-IcFiFg$yI43N4+9_bs^x%z}2UYpgJiqL?;f^=yu!?a?AL&E_4fKiK z1gchKy&309dXqSPa%|k=%k^TN2i~k+14{_iAR3m*Esj;MZ@A7Quphdxw)fIL{2AC7 ziL>WYoq=%hV1ws>*7+<{ne-dS1^DKsKYZfr$H(WxU#vWQ;=qBQ{M{=XC4KqW_UC`! zee=VwzkF-dpZEYG+0}AugT)kWkSQCQG@nFHTd7<%#{#?XDPJav-A?MhtV)e?snm0} z8g;%br3hC`gP;qs^4AD5w(7!3gq_YwdaJip1)SGe#C4jtc}sSp)mi6LiVKoe`L+bT zQdl!ahEY*fOJ~$=Y1{HmVP#$h_?>^Z7f)Em*aT|B?Q!I6nM|jR{k)a~R9deUp)g@CkQ?~I~}j3 zfn?WK?>9gH_=#1@2Nyc#`aXYZT;o6f@}q?ves*9;75&sAoBaLUI5dz(->ec5Uyi-W zBK+cyP(PZnSDXyVTLlG9UnFN-)wi$e+e6?wbQUY04;-HlJN%ti)Ky!T!F0E z$d^`Khe)(IRwi`kg0*fm@U`A)Zdps!%e4Jvh-3!pt76DyM#&BY37>!M66EuddhOX( ziNHn`?|7Y6&n?HuuF~SGzI^%O9Xn-gx~S8^+w_Akx?tp%_Q!t?JpI}xlH{W<-v89% z5j?O|D)n7Ia`jK{IDG9l=X~*qjmz#6_7_&(eO^YHB|093>hdwe(bd+0p+I(v0dC7| z6h;-#yQCbus|NC&f#82ZS^h?~e6tW-Bh$!cIDVCw7|OI;wW(^G3HRU&d42g77%9^s z6`8E>+A*&WMqy`}jTiNC=x}BUo7@46p+|7g;M#9#g4Prf7% z-u1=LA3bDH>g)~bgU3W{OkP%{_C;-_R-xh@aTh`zo+7o@PF;T+L2pNk1$-m6J6C@Y z567lEc8OH60W}*eY-DGI3_+~ecq%T`2Q`l#>*N!3biHZikCxGdRQY!sb*jCqjP7ru zSY3cp>oIRD$94%2EUX6`&OS@X0|5Y5^L5)P)vu0&4sQR;4Sw^)&zgSu(UDhbWzY6M zJnC1Y{DYTYPW^u$j`rna#o-6Wa@0)O8*F+F*NCNN@1mm7a#pH?Bm=#QH$>>+T7x*) z`>|a!0aq=S!AzH%%vddt@63t8RaB8#=)j8aA%uPPF8zQ|54- z-<+Mv>K5Jea{T2Ph{b?9171FOM#uf+9}ymU!DYEja>*BU@niK?i%?FC@CA=UQTv>6tD<}H(@^BqFgf-eH zSSBo9ZU%pDTLAGl&7q?b&8v-Po#C{h=pf@+%C^4xn!lk^6`&*8-kIZoCRU!K+my$( znWS;xs%?Ft+2D(Z;s?7!1F%!cDhD1Wf9sX(8_z#}{$1Z+KR7tn5ijk5lV2V1Cr3x( zfBl;+JF&+;#xIxGO8t-j{Aj7=Z_V?|Ie$3*w+?^(+EzN^@!&P(5qF*^l}9{g{CYDG ztd|R4mZDUn81cMpC*vkV%H<(1isk6DHm~vkw^)cT;=1sx?JcqmYJQDOpP65p(@xxi zO1Lhni0Dv2%TtFsvdb}5Jv1p?#F$M>Yfw>An7tZYV3VpFJia7Laml%I%oNxLB2V=S z;(32|6bsAOBP@m%s^Ph@vP zD6*bnk&9qBIxMw3@;I1QJsHX zNEB(N3 zD`iDD3kbQ1C`zgX0XZu_;TJ)6d?a&<>2M%$Dp1I!Qu1r&$W@8B(vu+2S%7#58|a1} z3o{%MYRi*CPl;@WasghvfaS}uqD^IsbOhtOwArTQ&PP{D2KhKHl`Su#FOPq4v5n{E zqh90Z;|34;6)2Xv@JGHC4$L&1KV1A5KM#)e7aI@kpjf#5!@a)T{tq7LUmnIC_JzN7 z-BH)9$mZ?rFF*a!V6!*mesQ;y=(?IyqlSW0(wM}di?AYF#?ec!XUc>NEweJRY^sr- z@e>7E(hkwcm!rwrbj+=zBs+iTDhZ3L_SDT5`j2iep&06;yaB{kqr%ovtu`maD=+A^ z#t}~}*>Su0YTy=}D`I2XSTH40c z^69|^GKP&M#@mZ4J@~uedbx;XmM!UAaORJ?eCuQlyHZ?-wBWaXV&Q-C-ADMN1spu* zU)|*d_HV2XhwuLKweYtN*naDypEWr0`hWI*Nn`(Nyalwg0<2#hp51xxt#C?aleU03 z{7SGRZ9+d59exnFnFc<7;hM7GJs)bk^Ko0RQhZSA?M2pMtn!emIhalS+G-HtrfBO8 zCvM1rFI(^q^~_tO&3k`K|4)87yTU5hA_OKx84nJb74f1j7&(%KU08no_M~$-Qdb~k zAO>H*O%1#J?#KV+tgqJetD$`J9bs@3`=6Zki=Rs*2~~FJD!*Lo*c<($ZylJB4gc)j z6kz>oU_W?TdiB4Y%xLb0)`0)^&-x*x?nBP)go|14$$?UosyTn;%J>qva|4%hQvscU zWb|yLi;mld3ZU^zqr1Fw7-Zf#7)5L~vizzuf^DauoaL$oss82hGV6i13e5W_{s|PrIbT*r}=%(u$i%h_PV@$7$M_+sS=aO55Osn;|A)jtFcqmZeBCl1h|f9&P^tA9AZ{QR$0^`qPT z!N_l{A9`rzd567!IsA{F|Bs$!tlzcz*1^YKj5q4E4ihe%cHwQe63@ui(|@0 z&pN)CTv<+n+Zv!;B+rAH9~x}ywaK=q1%0y^=CzsH0;EDuZ za_R=V>VSWCD6(;lGYr!4(#mOqjk1bZ5H4CaX$9EA%Gl4B4Yp(4=i-7yMF)fkE0`w zzPjFz4*abv|JGlBxM2vlAKl_Nrp~|nr2P-BeYJn&@P~81Qn@NfjYTI=W`-Axw&>6h zA|9#sXlsir+h;nEU6<1~{ml2zh+C?A*3h9)&8fUMm)J(U<$6W#+iPXx7`K*d2bK@p zKXi`sZhU2b3bHjEPWDo}8bn8TI#1aZ8yJG4J6c0D?us9H6CZf9mg?s?9v2<;woe$+ z&;EZ5{D(UQzqQBj+W)E5e(JX$ERW7ssUN(7{oPLp{=Ap-vyb(SmlLRT>BnY&-)kU# zdEQ?=2RMutmvpJ|IwP)f$@`0=z?Rk8S>RDd1ueChQBLSPV{tAdl^w+_qHDXRUwTtg z61S9c1^y3v@3!nJudE5aOW$=kI>0tQ?Ye)dln@|}7DvNPhmCEHHny=1xTD{_LU_zM z|Fze@*6xam?yDX*1FS@(q|8(*^OM+Ug>^E-q@8I&UBv5F={rT~n5Br>J;N|~9mS8S z3)+wlYSyTBCM9Z39yXeu^?quYRoiaMU_y%A#a<DHbszp(507zGCI7((jQaEwOaJC;e#HhH_#>Ay`v)&!X4FStTQsvjW4$ln zy4{cKBJKu&qoTxKtOd{}*$`5Ul-xE!nVkEE#HjSYR1d86{bzF4BLi?@m{; zTjiLgS4C`BN{IqpEI^+Jco?j~&`N`Xly2QuqLVNZrm-OSSOIYvXuRZyEV+bBnO(Qzdtu z4-FK;URU2mMpO%q$8Mx@Z-K^WypwE#EADS%R>6+pO`HH*!tnG1nVeMg1e}YKjl;r1 z)im$}gEo2!pA1hisTGoVSkciDS4YjG;|K8o%Yu>FZ*E_sjOP`s1Gj&#|3=P#;u>l6 zJ9qN!kKH%6IocZX%VwbNm!1BJr~j7s?>hdMZ!`18$uQkwD2ObcXuo_MVm9O#=z>1? z{M2m{e?7>9`|_f8NxKT8o~9Ps$R@-quZrz_2uhzNe&AgmraZODjIBw}Op4pBRg(u? zq?d;NUKUwX*n6*;8^(VveBW>Wp$v1luuT-ZPgYnj&nKPgrw3h1e7k6n?Kfe9Th$dU zKfw)wJhjdJbLtksH(xQ2@rjuZ5T2K^6=lR%So0q&h-T%I@)IZg`YE6EpD;VWaa5l# zw&%M}jDPSMqmAdWwJ)78>n+m1gMQs^z4oo4t=2$w#YOf4Mq+>BiY2}rBig5`+C$5R zW{>ms8Q~feKc{ikR7F=KS%#Km?4W=Vqk`3JPHw+TXaL(icUIAb}k9Vm$%2}7XRv#3oCy_NV ztdiVB^9Fc;`C5N+EfZ$xH~$4I;S4f^m-X{}K)!OCPOUO^^}?FkizO?{Tt6j%-%!Qq zJtPNn8T=P(`+IDQj*#QeQy$i13Wr*VWOnAo!cnS-3b2!Qz=cCAi1iA^w zjDzRCIFY>5;foW|{@^t^zVT<>-lu#{=4Ds-l@I9Sot1wn{i~!RxUQFeki)Xfg%8v9 zEOND>!cZRfYR5#)O<0O+Q1J0{!mdI(F-4-GZskT$kp4D{KmrCkV+R>4Suo;}l!G9F zCc(qvd{$pPOY4t#mfo8G#1gbo2fsQu|C9F#cB9?vxi7^1Lq9ExO$xnJ976h%nHZ=$ zm=jsN-+h1jq+W`GGN>U&du0fEYn~U8moCh!TvZp5`c)ds-ji+B_NKB<)mpN1yQ6W& z>(P%51>pkw49ysfaVCZR#F+-`Cm%ibWjOLWeCIhOQyV{PW?MwmEw1kw34vwI?!*Q? zBIxA4t_{-7G~9&2z+SGL8_}nJ$UN?F76f5pA)$ZMCBP$YGW)UjyKn69SG(D-nAY{j zzWu_I{Tb(sASGu{+yyINrix9TgNdtNE*GLx=T*Sc)-a|=517V4GjXZ<@9uRVjVybLq z6h(DwcpkeKU24lbwt+w+U7!^osi7Z~f9H#xAZMx!_X_BUhfPO3EcsX=8mzIee(d{{ zc-GYZSX|!5IfIYyFCSEhV|H>aB40^hK%jreY@;KW`C*ma?W$r1juUMvp_--FmAZ~p z8%XTYg$qiEam-@TJz6!LsC`=O_$O15ELcE~aI82uF$XLHexy&m7hT9e9^=)2Z~zMb zF#SL2ZT>4Jmulb#BFBjQ(p}iC)S^f|(#wsc+H|9!|GpN=vupE1JXP8qISs!ak7+>#} zg3YuQ&-WYSyquap_LBP-f8_EP4uKX!#uc~cFN>;=9#W#N^pS3DcdiAyonY3|<{`78~S%hr4yHm_k2aC7a z;)CioUu`UM}EFBysO^*=-RKG;CElZnAg`c z5}6Rp_hMxrFTRp`eN)R!fnKt48Xk17W@a7xq)=E;+eb%iL&7?Nx~$p-o$r6UmTh#@ z_q^=v4P(CVDSQa5GM>l$!xv^_{L%m0e*J5H<}=##xsn@Lmx-cA_LRu$%HFK z&2>=jqD+}2ZhPJZLw;5jmlSDcF1jdk1JmfL!OFFj=XHAz;t^j>&%zZBfbShtfF+J& z1X*g*bU`MV890 z{fQwxu0%-mLfmi1&pH!Gwn77bi|-k47{}fJ$`i%Y?;3r?ocyx6<2rv``OrD|uN{My zlHSz=XdTsCW9&9tS|=|b?CKh8DI(dRca@=f6JH^nmn-p+{KN+&<`6h`9QmiVpY%D- z>;5m9N`GSKKmO;hSX%dE_d9L}e*ZSoqapt0Duz@W`@M_99~ni$!ZzGY;#+zsVof0j z$(a_F10V5acMLnbY=wU_diF|DvFG%rvW)0hvh#u6kz&icp_5^kpQ-CQQJJfHRw*Qp z8&`&#G23Wc)S9$p0;yNsoLhUU(F367Nvk_GnOcGGj@4~CKl&d!T1g5l=4Z53UPN0U(SGKdNk@`>mZnIa23mJC zXh{YIABVGC31)xZ#MVCX+`_%3sGDJIiTj}w9B!|K6`wyeC?uc^9^8M@3(7`6uTQ^N z#4`P^U3zlSf8!X^(C+`@TlLIGeGvWZ54o(b#05rV@YAWR1ci|`m4}$`ND~dCg%)_- z+A8(L1j9tSne>1B`id1Dw|aups*00~j4?@(M+rsFmi-H-S!i_Zr= z(-J-}qlbT${@}N+zx^*HktU=0rVpt2$3Iu^$&m6=db?tFYM~?{QtOilVrr>j(#xz z$)6ypS-)baQfCH*296YB7}%3r=d+M4B5vWEV~^k;c@sdN|xFR#d5j zyTIrnaE2SPy{7ijn#Emm)I6~=dr6U5yF;0Dm!;=b&!iv!O%N7L-9aO9-1qPOOl0)^ zMto`{~E_f9$nyg-8e#_*gOV8`73!foi&C%h;ls=EjO9t1xGJ%KbE*+v~pB{@Cl?AAH_8m#P0G3qO6VTi&kaziw2JKBJx7JcvgmTN- zWvRHA?bhtlQn$zbmlF5CCmk&SrFe%Z=SOL3txO~vO_uBAs+wu;tyq-jq^zG z6Q3UK033H4t4eiL+-ilN<9;(t=&0BC?H!7Eqx4w>;*=d^>!C+tzbmAWPAj6|ZlZsM z)yDFvxK%o&xF1)}0<7bcxAoeGG3q_ch0R(vJ6eNG!5#5^;>Un5K8yP`Xv{7B_6skz z7k}}hzvBPDZ0hh2Sf<_A*GP99+x5#A{*{Z?ix>&ZY~?d*;=ULBS*^=J0=J6~23VRk z6X$AgsXA-n;L(&3h(0D zG+FGJ**Yd_SL`&n8`U~=pbKzkA-SL#AxG~ZR>bPtwF>Q=mg{pgq>tTD7ntYTiNu`3 zJvb;g5l$Vsh)d-{G6a^b^g+hkvE8l<$SiDjeBbi^V`R;DAK$nicf`wl{pt}*@S`{V zKaB6V=GkWSFLPIn0W6F3mcM_CvXh^QsSc8iKJ?{uS-bm)1u2ySWR?Osb(z%5kaWUU zQ*|9eAcd?QWmyn8C`cORP!Cl4$et3Y$hGU%#smMRds{f#Pw0ne(-lg zg-%S1`F<{g;{m)lQ{dNpMaOxT$5DPTyMNZ^U;pajD*`~HxNDy~SPh(&>T|7Yc9|RPUw+X z@rkYFi?fc7@=u{nif{f-Z!YQ2RVG&zfP=;4A3$~3vU z>r6G|%t~;dI($!_ih^EtrPFR;F&iqrKLEI&2%ygakGaJiDy(07O?KPt2fs7Ke~pFw z;N*Y%V+dvI8|0z_C3)&7-9hKB4zP~pzB6`ZGP!rw`z9y83$cG;cbbbmYk&HrS5+n{ z8T6|wH>IaqIbyAEb}h@i=5dw!izZhRt8B|HzWS@5*A65Z&wF0^9QMWGelZDFAMm|} zeBS-FW^*Zzt9<@~8RMrHBzemZ*3Em-NTLqyPVopXXY8T& zEQ~y+>68Ag0l(r&e7^a{3;yDh{E1zD?Q=KcY#qpV z`%{k|Fo=KcwzCJj?AEf_Y@IIKYSq*w%5$dXY=Sz;*4%iD7#;ifg~dWe%-as}jDEH= zj1P_tH_I9rHLnS!MGcTsN6Dn4(py|l6JfCAJ;x1?01+}`kp2=s_~K_rpCJCpiHvc% zpPYz<(LZ9wzZiyZ9`HY5A^#3nlFq>EwYWd3SFe9QDT=9gb^KJBeUq8KJ=LP9b&c=J z&1K8j3gd+$OYPN?JJk(E6=CONdaT#-;Ju$pqRG;Tex$~d{}YxZ&kt~~oo$(fhHM({ z#O4_}l+3{0F%X`nmKklr=- zU*&%SBff+|g76j&+~c~(u;p?vr1Kw)HMd1SIzH~79rGxP|JVJ#BQ}oHd@&yAuls&} z{BohQh|*)fJlfXKcy1sq?T_NIXlmy20_`+RS=EL(uTDxq)x9r?np}yNNOy83*}B@b zM%SV}P=EWWUiMHG%4a>pJzenAmobY)dpdtH{CfRl^7={8k=P>JFml}6czriOykhoq z!0Z2iQttofC*{Jnf$=N_0OYgpJ%DoLJ5U9F?#e6$Ba@e)02(^1LfXQfDHJf1&kQ68 zcyPc3#7tg1}-et;AJ-7)q_iZK0)&lCSZktm`pM_Av*;% z+w#1DECQIYAwW8Q!cQxUeCyAKLXcSzAW05*$ZQjX%6uQtS`HXwu*raesR1I0pFaSM zfSl7F z*PYPmIjAk(ZG{Qc48l(1n?o_?`+4IE%ucrfD!k*H(-$$=on``L*k`bYbS_LlUh)AH zGMK_v0*C1)5I{u+HN+O^ynl&>3I<2m1fVi=CkX9V-(Ea_f%eEg^L2fLxI=%({zirr zKCfy8aN*C0xT_K1?B`skZ?$}i;Y|GK!FWi8HkQQWFC9?8lpn#g0eKBkhfbh1 z&|pZUZdYPHPGh7EE+C7r?GS$gMTD5)jH0rHlSKtV5F|m!gpDA~-@;MYe1zG1GL>ZM z81~cfho67*$2VYiSqmtPNSIKJ%ol(Y1g{R&_C7K}R9DbM5xP)%3nLS-9;pky;Tu%D zq%(S;p4x$O>3~?JYf|o~$}yqO7D(G-lqJQigQ6>`XXWMrWD}#R?W})5VFY4aG3&lR%{@skdQSEmGaOb1fZ14K-;e$%Q0s;DVLU;@$W`(OaE znHZMt0aQ24@)oQ*3iW?Ogh@%=ml0vy1qzDk0YT)pSu5u(!fNLzbwW*o)Je3mm8kv#}02$Ik~ z$-~e?rYo)L<}RLfgzHZP88ukunY_UyBm zW0(M0z$zVo`<0c9*MA@Z#;>jz+5;WOB5gN-6rb(1xZbuVwz3wXUwRqE>3}<;>SLd_ILWxY&;ed8D1$nX zJo088#T~S;ezXj*8@h3eXvqqsv>@^Ruql6+4??q;>%bw1RUkn4eAL{s)@9UKVbkc_ zVyw#>dceTjSC}S26t~Y7TW!l|#@nakp@(5CuT7JXZ#nPZ;9EWnJu1fX#x#lcE$984 zjpar$^w=BAUnYh%A5ZfDZxf@eMR`Vv^&R8YIQMoC;m#}?K9_c;32$QsZ*IS=(SCoW z*}s=FC}Te>$GIFMjq*9Rvdd8?3q0y_OHdX?e5~w@&WNNA`}yC_6D9zgsT~A%>EK!C za2d)V(mi-la_V554{7bicf%cQjoa%;rH64I^=#ah83H#5__#X7&;igj;yZ#w;?-!pf*T7~}hfe}0DIg+|4+Pe&03ZF7<_+TRpOTK zpY-6+cI<-OTf(Yw-ZaL!SxifJuyZOVSqq8G=h45Bv5e2nzx)_P>@2}fhXOlKc>jVc z$L@>YXXBi90Q@(7pVNWY!!A@zatF0#3pd$)0M-v+upd5Rj5}00e`{}JZcjKLD^R&@ z2k+G(FU;G~uh9QaW{7?vUq63GWhI{{W7TC5CI>nw$v^O;RL@rK+7=*cQChNMoDUVf zfY;A>mRp(2jrbQD++l9JF=TFGYa?$nBAEx6m?oE1*iq5wV2Jp<@*S%KK&WIdDtFx1 z?4bBd8Sh&!;&aw7T2()Qq6e38UV5>NAJ|mJgY_d@^}p8hK`JSRby0t2=y=j~Qobd)279s;F~soW>#gLL4shj~D5=MzWJ}(!)0w)b{2oP@g(^6;aC4Izs5a2fJH_6BPVKyMe9KaSW_&Oj`{ojs%a8~ z`y+p;<9|(u$?4zd3fGS|E1Dbk!wH+AimpImGiCH45wUZ1yho$|`1g9Rj(s}LFLGBp zsLd%xDiu$>Za(ki(cyRdG}9#D6z!|^0V!jDW!xuO8GQXP&Vzq3_X(wM`Kv>f^=f54 zPa=KP7Y}J)zm@yhBvcSDkMHII8UIDO$=?kw^IJLP_m*kGg4A%-s`tS9Z{(C(P zpD}zkw9i(_+LnLE5s1axpO~h_GgvaGi;CgH<2_%xmeTm#d!Vwz5LQ={JQ$mkJt5=e!RYX>BsBq$Oj<8 z`PgILE&?v^Q?=rJ@cEh3-N$yZAG&Y*(U-3Kwm%r@p5K4=D29L_h`u|J3Sx!Eq-;3`>7iJ>L`C^h44tv{bBz%y~usOKlDF3&B$;1W5MeeAAhkO z^YI zWd89TO~zwYG7ajY2c|mF0no2GFEt4u(HK-lz<7U**B==&go}F*aKvpNtH7Fj%Yd>+ z!{O!#pyl^Edo=Lt{>6BVACcB$sNi;$U>ceH%wJTwCTSqf-N?)+&^-P{8@aDu?^GIu z#S-%{BXglp?cnQ$Gin0n9~_!9Q4;c;<)wZPdVlx z&G(;jnICG||G6i?zeo1xUI9Y+A>_aA)&Ep(!=*uoKlM8xGkM03h*#JAWaI^C6!r{V zK^+gJ&Nv=2?%>8bu`=lRlMaI?$IDnejw^rjC=Vi_29S2>25Jh23^GtGh{yW?4jC%> ze;$kmQho0^UN`%nc`pGQ{`y`XZ$~Dlj~RM#I>z%mDuD!0&5s&C@$=YK0wKRwGT3!U zK_o`CEv-Hu}pBegqT5ZAj{*E7mgnju+e)tMH-tQIv{ROJU&!=el@7~Y7nSJZu0-A%Ca>((Ek54@Eo$)O# zIcOP}y!{%5q=N~^SNy*(AAiSJ%lm&Z=lvM}GyUtgT*wl~-L(8W14l0B8iPIOOQApg zGmhVk|4+t#i@)QjG0vC6>pd`3kNwo49F(zNW1c^8Gc@q>eTPKQcs=mq@4*=g#{S~< zWB7}_e)a9pq01;2yguHKynSi~DVG-=dicvp! zyR?+kkMc9pefdJ5ke}zqbIFXayzqVme*Xr%=60(5k*AK+rF`7Ke72mwL&y6+1+zn2 zK5jne^Q&(i*MF1a;k41d4I_X5WBpvtfAkgpA}4qaKG%2B{X@D-kgQ+%)W84Zd9-m3 zqjEhjjahED^k!^TaeOVjzk8<2-4LIf~frjgs zk->wfpG-a;c>SX{l%G>4f*gRI!JO-n!(a(=3tQlMGQNclA#Mj;#@m0D^5nYC_iK6` zvVe&EeA+Efwgn7%TrNXEA(w}++#>llJ+^&r~kO}D(deq-Kog( z=YL3|_}|(``XB%J;Tirln%S>U&m6^#Iv&yR&HjmeC}G4ubpZH%`ma)k=)&<**%~Ju zN9GxeUiOX|nL)witOGL(C?E_<;ewVGjg@)nK_Dn}=iAt^WGjEa#zuc$pOt6f(Y%-7j*4)_zH63m@xRT`t?`Mr?Y^9?&+%Gpv@8#DiP=ovaP)+8i-8_9SAU>Z% z=>@`j8_Z_&m9>}GY2#LH=ZJE=THXtgmtJN8KFR|z>x&$ird7K@d9O_X`Ia)mX^Eq|>S z;EKt)H}0!&zS^BP?#;~beQk$lx_tpI7oLDQR%C4@1&n zHQNc#R!di5_&6<7`Ebw8Nw>bfFGJ#i`H9`8wN^9jr3LC_61|2Ooo|NoPzvj;yetp1 z(-D8x(`|j+!)F3j+bW(@TNup!B3M)O+_!9w*s-m4Zl?RJLc`};%mP`Th3vh#(!?YiFvUbOXHg-S-LblQ6rNw;`r_H?mTz$NdyPVc3( z8Vsv7F6_MEqwLE!F_v}U%*8?M$%RD=;ywh>8(4<4%WxqxmUVZk#QrsIae0k3=-u!X7 z>DN7VhTXB4dAE$1L@xX-of$*rf?;4n{f~qD=B^~!eyQ%GX+|14JyrK+hc|W^i+O*Z zKaFBb+es2VdM~IB@%k8=C*eBt)+C&Ru5BO2t9jykCti5XVS+z z$+{i7v2v8!e6Fb8W_f!blhmAUhr)lT=Z}}>9>}xbB-hJDK9EbXw*CW6QgZ$ogdZ$pc!)8Qp{wTlY%X}z6TljlLGJi@b@Y(i<>zzLg~ zMX%^o+491PTXs@EpGFGbd2YtZ1(G|;hv%d=VAW@aEUO}wanmKCCN7*c7NW)A5}9rq zGhl^fkic5Fd3nFZ4Z1}EnmvE+luV}gK)Dr<1&eFa1fvB?zm6O%f;$8 zlik%1obI#UMtzHJt+_Xi{G^F>7#E?~!1Ne61!DO+KzHak63%eEtcAdcH?pN6Uo*pp zq}e*wgK0b*tV%gv($}HFk>#7JBVNr)6{F9k+6U6yF*iGN9S0~XR*Qc`o*gfO{hHq5 z^3fGRM9;-?ekpIxU0HwHtX^fSO)dw<^eRsa47FBwzF!y{SGj*qYiXC(?K{tB?5sT1 zr$UPI(+?$qz0o_CpW~rod(lx`0otCmfzGaCUG=Z6uvx`)a;VDb?siDeNpq8%D3|P% z^%*L4lAkAHb>3}e`(=OLs?)h}O$*0dubs8wUlQ-OWtY2}RJx9ar1 z(;yH1!|SLA&!vA_;I*`bVV_iwlQ%oxZX$g(LvC5q=;$(tfg!HqBrC7jM&9Jw#wFNCZUJ|B!7P%iO-61niQ9XcQw{= zl&w7x4ApF`aVI&8t^v3JtErjEFcE8_0LR4vCdOfWW zU6i}m+HmM){dRxV%PK;X2{rFtsbDKUM>u7z?Qb{hsS_u857N`mxOnKWUm7j{8SiA~Oi3g;qk3B;t6dPLGVyI5Ats_vl?<9Hj8b~ z6l7Z~tzQ}_WSG1Zkl?#&ymsdW1+>7C~uvn`*^aL)UU_%y7TSJX_wdcLol7s zs@48-cBv8D7s^hdEjgQHDcFlel&sq_6&-!@5gx_nvK-deIj9e7vDqc-iR|{bV0qKa zxjlcsy7j{p=krLbq{%edZzT6!3-9OIdEL$TDlUipN?R{B+o4~*747zJb{j`uhqKN4 z9khLKr!Vs%44)7mqih=C?WYF3kWb#j+gPTCuDJ0MFXx|-m}qD3^C&il+3j{cBa58Y zpVi5T&m?(HH#IUZKB3|C*dH!ceO{jS*QtMf^y6nGR~Y3E^{=x<9g88(%LkheEgI6z z;j`XLie0JJ2P7}iq~hrd{>tp9r7+aqx~~>_wRlOgsk&-v^HX)BSu+W=#o0OSAFlx3 z&)M`md%^ou9-Y_Ryyl&DI=tUb7)*tY|9Z~f^ILRGG6_HJZm8Lj9EA1m^`3u1`M7_i zsJjW>H5c@qC(7@YD0V67=6-h8uvLW0Orp)1Ua}5J*UMd(Hu+L%7fpiXeh}c8C$04A zyv|86+AE$RUk<{KO3kq@O8>EoUNtLqQY7ccGpc9LNrTZbpWmY8^Y!sPKbGlTzkJFr zYvPzNP4e@ip03~kuj#5h38Bgk&q;rCE$@r@>W2J9257;gKI?CbVS4_o1udgFLyFLy zii^4I*7@u;>6)2YzF8nXpXqaYwEgwxHBE)Bba~JPK9|Wdxj4#hyG5Vt=CnBE!i$Po zBJJMNsrBR9-tOW{xjZjkwy=5E`MSB6uh~AoIQnwZ9ww9dJbJ9RO_I%yr9XeKAI9me zRP(f{r#*-ZZTm{&>HM%1-%GKZ!oo2XXS=x7F%mQo`pIfx1xm7kZOkj_-SLMAK`(qr)X% zOtdu|#d)|q8fLS@G|!^sM8lOj-I#TMyDpNru%>WDskQQ#_Y)45rgzxU`BA$!ei7QI z^I-b9?`}^UwzzrysVvra&uSkXQ}5CI`3Wgmbh$~7?L9dJA9XK2dtIM9egE7E>qwkG zgPB8&^`X|ox%WzU_e-p8SC3=h9=cO)l~{?i{q&9`T~l8w19jJ*(IUE~uhx=(7K_;> zc|*8(zDpgQ40KVX)DZH8O{G~^PXuzopW>0|EYFfBzRWT=?{@c*>FeWfhT2^{Y}-tJ z4jUI288|(e-_MS+t&hvitBQ7QAFa0K>)x+=wOYTiCiwbF@3rmg9iOAq0$sF~>l(7^>)3G)}M_glIJ!|9CyBI7WRD-s6|`u zHmg;+PFH%fu=j&=n5?u!lmZiJ&x3Xsw_Tt*Yr7Y=(Ui_U`|?;#XI^0iQ+0VhhNoLy zZN%~w%%N@b=c2nkDA5dy z`*?$zdt}^~bi2O@{#C!MGJ9U8Xty+n)0J$z;8Z4yv$~0y<;NjeJfoXeOQ-8SST5)` zXP>o-_c=@TtK^gB+Gxit`~<~p-d;m{e?Q(|cWWkl!c;$?^76s@r|Ngp;&hlC?)%z2 zeQ=u&m%T~%iwClQlh=CT6=mF8#N5A+=t+Ha&; ztPYps+u5yena}KNnMT?+5pJ`~`cfzMZj#_`B`=L=`%GPb9g4YinJspT=+Jj`+WVWz za}sUke{Ah!LaTN&MUxY?J)JL#tICE(C;joc&8VQ;Yr(ea;as|IvM)k^62CpgtRo#O zXX_x-W+pa9XOe=gq#<1u*Y(7If2NbelP(7LvybTMb38zb z+H=j8^PPErns)ZRJJ9W&q``Cjc1R-~$>oWb@8DcM&0+K2&jo7}KNH){>X!*UdET{deQ00<9ThGa2kG2OR(x9vEJ^7IJHfGmFMa4++KrYCC<;A73Le4#QaZ6abO$53krpTNbvU;Ae2UzO70(xw3z5E^x?p3D7Ce_Pg* z%wBi@Z8iJpQe5DdRu0QMH0a~65>?F8XQAQ6BHV5li~EjEkEzCXMA5V^I@r1~pYM}_ zL~LtnV(f>PTQI*X*U9@cyzV`y(YBa- zrwm$W=1I@H_)=#~*A2BohM(=NEljgYdY~gq63&G!peC+CNTik!cmTed~0~(xGas zHa#WgHgF zKy_r{TyBuDR!>~Ag7U>SnB29h2ZsriD*>iA3qMc?>;bY~+&T6x66KxYE3CR5#6|0ATCqFEN zucqs1I>=tVkkpr89}f2RHZIq?!(_+QGUCK!D%Kb^?tE9 z1$MZVx`G}q@kW@q!NaK+&Nj(^7x-?(kvbRVg|@qkTl0W>!#PV6v zDfCm3z*Q_SDoMP@Ay}8Y^M0}|=V_UTo5I>vVxhe|W2c%k_de`S?}hq*u{L#B84G8a zZCY=>Wo^AvOY61VK01AWiR**?dIts7`fc!3jioy`R&`@3<)Vrv&U>>zE%I`12GR0( z9B3En&)MGHE}HlBKR5l_N%ed-?qw*+U`cqu9I=-&oyJjjSP@eJY zVLzR#zncmA%i7uAY@Sh}=84g&w4PeY)aOz9nD7Y$oH&UgT@pW^Sz7Zy*gB6@>w#{&zKeTC2IbUL z&T%XElylB{dUHN({}--%0*R%n5)@)lV@&;4rbIvy`J6%7kX`DLup zpk@=WL4%PK(pj{@v175jR9L?ur4&YR6_e-0v1`(im@2}#kl)0c%1DG}&jHGe{020lNr_6Pcqa<6R~QF+h>lHm=5 z;}Y;_cqFrb9Nx1ysR4?)B6Cd!h6M2JxYC|w9MiW`FNM?%n2V%*6C6PKxHbg0nj@|v z;W_^*@iLHX0DW9`R(TkNbL8GHwDcg0JFVeb9;|E(lmg0q0C=?=lo(MFSkt>2s&EwL z_=_A^0{o^3^;afOf#AA;5}e5%UKk8P$D>cO+x*^t%M-_~lLC=(k4~5nMVW|(o(S*} zQvSG^!FyxF-jw+5twAC+LRaJdQlCTd_E;RUter+F?uU@3+Pb6OnnCs!Y5fxfCY+WB z+)DUNJLa!acpx>imRP!F9Av`qGZI(t9jibSDJ+z1iWU^`)0X40yI)}0fW>o3VUIeA zXwZy*_$047%r!b@92MUv;H8Mxp%yGea0|?n+x9o*WIch@m7i>%JAwUV`KCeOg9DE)Olo9eR$}4gx zf!}R(Ouy#P>}@e+flheX{R3#DuDe4DM zT@aUSybKhrSo5^=QVnBJP#Gm~?3C*3qHSUxv}qUyN16@_Zgv*t+r|s!>2uQPOM~Rp zwz}w6WZ*^zm2EGOn$d<007_P^O(h3^Zy#dt)A94xp!YYii#-;Y+Qz8tB0&+_Y`&r+~nj)=Q)4R5X zu(9<3>}H}%JrXF+r?EJupW;#?J^Mz~1{)huKv!2y3|slCW!&@yvLpq%M!vRxRnSDv zj4zVddKO_g!WQTnx<4D-=n%IC)aNvF%2B1SC>z*;&}gLQaKf>%yKa+*(>?ug>MIf_ zS$yZFceOz}u*UsB!(`KPAFRnl^KPu&(H6oN6bg$ig5;a6UdfJ_BsK-_agrPn$0|^j z(|XB5R2KPeqGhn9;G>Z0pr>Vj^ByfxP{#%x@C8HeN~Gt8({lb(qvi0~*f^MzI#SbQNPb%yh8O>HHT=l1RmTZCvSB=E3uR1Oz&?M_ z0FJtW8rUk(q{4^&MKMcVUCd7+`^5249X`7zCxPE z+yf&2*>?&%rg9(;I&Fx}M9PJrg?D$Erwa?UZgvS&?+rhMMQl=j2F%{}%BM0-7BcQyg-_xTs z9HPHvz+u{h-gTX_k{`i;X`hbs!7>PoBARX)V|g92(h=Av9z^U-*fm#olUT&Z6?p^8 ztUP{K6df%UR?51W2q~=gMrTQu%dJ(d-AqZ>izQ^KBsh$_lnj>EyBi8XwXxM%u@$^~ zzJ83K-s9q;#9%Bv7}UB>=UtWcWsOkfBaOJwO{s4V1Wq46V-9A29FD3=l;sUCEwXe+ z6IgU+24RCAhOg$~zHEwmyJW>C~I)wb^nxrQfphn!k5x0Bo_k z3TTqC-r&H&F;$F$h3E?UDf<;Ug2+)G2=aumqvxt1SWV!65Wrra9R1s}u!B~O^XJ@!g;iU_T$tAto`u{#Ll^U#^=DgibLstmKk?109e^oEv_`-i+joA1f^E- zwS`$-;h&s3K&RZ4J?an5N)-s_9gJLJ=j~!7`p*t3a3wecK((!{@YWdS$mm$ywwxs} z)!PI_PT-#KfDGcVB$Ou|VQiUq7!)p%0V$&6=XP~JkxpVfT6O~7L7}_Dy<@Rxm2gdR z=^wFwr+$~z^LTb@fg@^y0(diP)(V4YQD%_o96Ncovualr>}p-sq?4}^^o9)(^>a|4 zV^gjWMmx?Z$eB0@z*ZM>*s))k)pLC~e?g9w;Aez<@|Z8v(?Aw!B)&7slU!XcO7WR; zUDdrqGgai6DCQ;6&I>-V)nlrS(d=o19Et#I28q*3>+!DvhV-c5b~?$Zmsw zlLI-?UH#PGO_;68kq)5(>G)I!0v{1U6QEUCw`05CV)Cv2=m(bXK}`R3B7^t>A1X?V zC?1|+Ku#KO$7u`MZs>UA&i}p!D=-9f_F($mZ=go5EQ9oKN4YP0rWR>OlICzd$Cd*M zQDuEd@d^iMnZ3;dj&aX1D}IJol%nTW{@04rX7XwNL{G+jQDjyaWlY?9$wQ;(A+ zQfa_0GUj3zQ_0cLk6X=!*(%kJ>O)sD;H!r2H9~z*1-LeHDIMU<{fYxA35$z=n-Qxi zm^=(|W>F_h&)Jv6nD&&Z+*{jhjFqIN&!U1y^~7o{r(1pC*R-+_l1JhrOR1}$Qs3od zaWMu`hghJ#y5!CE3c1a=cuz^hWI}pJ#j{;Rir-qJKSmba_~3seu1;KNMDV0~eU_A| z0<3S&O|K?=hn5&Wvp+rrtld99pSca@uOyc za)6$n+29fU4DFV2ds!!PVQN33yaG`Wo=VWm0RT# zopAZZJk^TOKehO}51ZCp&FTdbXae_kutFrH84)=3sIZ(Ux^wa>PZK16i1wgpGzVb2 z@bq?nh+6EZ5I|~4H0hK*T1nwvN%Vjoi^r|U*F8W^o)~|O}=HM<{4m|eyr?s zG)ZS9NiU|Gjt2Y%pnUz8jk{RqW!vwQZO+;+7LW{k((ChnBWpjTk&^rlFe$?H(?e2* z$cu5Vg6o;$+(M&yMKec{=JM!A2q2Z%>2)r=UKfHXx(T0n7-aa9z$YLc<;|?9BA;~0CMB1lgIdBeS%@Iue zL9+2syfYc7k|wql9)W6>q#on4nw5G9A%n_1(`=r=+8$Y4BfeW^cUJLQ zYI6fMy@{fK{AMP5?Jfjt50P{ZtGcGz4UOCDmi)bADqU4kX8eTcpuRivDfih{gCs*h zEf~_5;YKNtpv)OoY6p^BR|(9i?r6ma0Ad4(s*uW^BZ4oZDEe~UHz2SqJPkp|Bw{NK z?7sWi*L(lU2MQEIoTz6wsN~1Hv>7>4&EPp`*-zGgZKkhf4c$nS>Qb@|j{Ac1&gu2a z8If-)7Av-j_T`2+Mlw&U&tK1CfZg#`5pnDFZdB#JyADu!T21(`M8pOs>kxy_A>d?5 z-fkivYx0q|g}e2|$v+rA)(c*S0=dm3BMI%>upeh$Up=A&neobYn)XNZgG0$*{v}W! zQeAw1%dxY!(xc7+XLa>gR&og%9rg&7t&_)3@GX*uR{-X7;iJiI^PQo=!iz?JjZOY| zmsGd*CCZqgu$(q66q4AFsAYl=I&gY0i;7wwfsa{Co6K!zPyFEmS6J+1SaCwEgNA_R zGfgybfm5@45eD$wtg;`_CGjvB%+tKr_-fC8b!r1fBv3S2yU*m7f@U$jV2?5=m$yw2 z&>RD#;W#c_$Fpfji@Mh0Ta`WY5i+=C=oKe)FXfe`a5av^shO0O>2HVdg%Zoyj|*iOb}f+=|d@g_2Ct@5mdgf(e@ z;jRXv&PQcnM0p0ShuYa=Boj58c_GxeEndY%Xz{ls*tko1nqJS*+90InO7(i(4V+ri z6KE3J2WEbb7}>Nd>9nHadwRbVGZ8LO;1t`TUtW(#(I$_BG1W@O0iHic3D8XhsZKa` zlS6I#?}$aaDV44O3Lk_+=?S?h)cquX%lss%A$jJc8*^iC*nhi%o%|hB6~xw6OQ&cy z0PHTi_nHjTwqtaD%Me=->{YXYlU=i{3Z73yMg+1Vj+JvwqFdO3z($6upVn0T%lK(r z&p_O;S6}a$(vA<(E(5rpTU=G@?_?BRs;j%0s}rLApVPzX+dv+DOOut!s`S{7vm=L4Qm zH07uP*^0?P!1ZHYnul_lvP>mx`Lq!-`)Mls}k^$PRKPO7xARBCZn5Z zMA!3_LZkKb`pz6+nv?y`bF51nE?&CFM2_UdFYsBD@7VijkhNlecoT#!u0j!h7R|DphFVtQlBxj=FAZn?&nQOjfnorOFWgYd2NgpAM6 z<-KcWA1IlDC8bt>R8eE=nq?u;nNUw}%L%I;_7t#yqT4=cUg`tP z^-Nlckr>R)>n5hx>Gc}Oa5oM5G5K$HuwyXGWVCt@Y9v$&I6ihe5|Ft z7IfQ9=asXMvw(3;&kU5Ds%fX}VD4@UOkrdz>sW_BD`%kPmzH)`zw7qr1{L*~{# zMDV5=c|n97Tx~fho{x6N$!XfZ@=HJ_+K{cT`De3uO(vcln`>Ub0jzNnD*kZ-T$LZw znFf)6tfJ1T%p0JQj6XBJ5+2ydUtt1@OzsY&&CJdJg`Qwx3~F-t%LpYIyjr>#Pl+g$ z{1g&hJh7$ElD3(wcepNB$@8T*b~)Z130xM zgC`3~-cK;~&oldxtgDzzEh@@%u2C|_H8hg-c4YsJ(dJupGvjT{izLQa<7AD4E7I~HU)A|?pi zUE%l^<3K!jmv*hRLZ>JzcS8m97<%}{mfm2j;Pi{+^`e^i`B^y7AwA>_^ogW@PQ3U?6Ctz7rM-I#Lx*P~;mWa;LsIL-$gy4oYc1Wl&Js9rj4$&pqH zyeHv~*i5O!bO3#%E=tDHGQnGa4KTxGUSdhYwEL*mqMoKm-^nJb$K+#l^Kzig4opiH zg`U@HOVH&H6%Fa%vx`dFl~DN&VAh?pPl7$JC5&`u|3Pj4VX*(>fBvuk4#)miHrD;W zY%FiZ|BsE8o&UdX_&-#v?Z4FD&bj<^f{XujH?>>f#u%DM9R0Dj!$07EI{G8A<#)bB z0E^KC`V(*%&Obc+pZ3~jkdevQF_((tuiih4@U}AygBfv4k|k371ta{8&-oA2{jao> zAP=7WUHT5gaKT@_&r<3`bb>hv|K-t7{lB^W(-kA}n{aQ~%tsQh!B-?%$+;bWrAh@SY*&Uo_G9&rxLj&4T9w3s;;GZpuH+F_-TDM_>Fm zNS1eqIUB_zaTvz<=kS*P>IuE1ptZH!8EXFJM!d-1^LqYwYu#&PA!PPV{??CY|1s&^ z-%zw-{_SgCnyXpXA%hKFSOhpHw2Z@aVlK2C=3v}fw~MBg$D5UZz)7Fe(kJOdHZV%V z1ByN<9hq4{S743?$OMQK7Zk=v(SsuWCK*F$&d%zWZ^4t8(oXHc>mJ zIAwgUQ_DjH0y;C*YP8ud@T>X|==7H9UBNQ9P8|fu*Is{}eePD}SJgB(KL*k(ONA87 z_cuz;s(XYX=38BVmttrA22}z$jqD9xZ@*uQlX2C^K3cn_s}ly*CKqCv`^wz>`TjOH zudk{vGYz9&Q|=p0{yk}Fzgyidy{OCGa|ZRCc4<^y(YGJsV%EVCL2OOilVuYU*i{(k z_o39YlK|>=w}okiU{26CQWJ`-C@sd|#xk3B<+++`M4pg;S~s5X+s59NLN(BX?M>VL za@l}W3I0Ni0N@s?k2$dJ=q*uG&gKS*#XXmao|Rs{R1#q{BY#1%Bw3++l~_X`;WPf` z;Rq6gqKA*=B_CK-fx>5sl5$QEJJ*?XRZSnJASzXJ@a&s|ok4Pe!WyW%YW4G2kaTyh zWukU$|FofhLRjbGmsL4{RbOQ=#*Kpx087ILmFx?QQLCj}66`4o3gxU8!y=ztcB<)A z8Uo&uiFPmrVO!>0E;)b>I+;cC{CcRMW!Yxvq~@y{wB6N?kt5aLMn%8)W$0_KvD0n? z;NvA9B=Ryrx2cDIU znLELFoPjW#rw2Wve4p_B z%1Mxaz19XdyF2~>5M>z_Pz(H?3#;ZO}|3H)@i$av>A*dS_vBMCcq;xi`{MirUghOrw?{~m@fV171<39OXTeOjX7 ztoJUhyt>Vbi6|*AQ+U#|E{uKNah7<{qxbnCppwPBJxz2|MdY_n1yP+pLIAky(T|aT zM?vI0^pT=SLoeYB-L6S(P@cc)5a5L$#PhyU%PC*7AQ}_ytLvfl%$T$d{o-)Hq`vCs ztr+ijO#GQ!e%G+r+6!$gewa~7mqR5jnS7xd_YlV1m0iXiSv9w2H&C22lu*_1BV3VE zNM`N|#`>F47wZSFB!b~2n2F>`a|QT+4kbL1e099Fz1NqjrbpvC8)+|d<0}Sj7S!

((rM?vjx6-|acui#FGEPVt@$&ORy*HF$l z4h8TDF_Pc%lt%=u4dJi8xz`Y>1aLImkfTRNIjb7kS8ZlVK)Nq+#Mkt73kF$#F%~>) z$1H$HF_$J3ClG>Kudl=rxqa0lqm8sW*J^7IMYWFfs%6K=F;=48pX3EVfQ2LwV8RiX zuY05ydH8(Df8o~!wd_fu=q5_I1HX@K)g(GPgRL5zVnA_wtv*nCwffnxmC3t+2L9vO4Mik?Jf%)lyF?==Z7S-Cj{TW35+m;+P6DWqp;buNVao~gw zY-m7Ocg~szO;Cjg;eX7;=Ps#krwG27%CjhO%k5$d+g~ zB*obRH(8LRJ9+y(+}p3>ko+8N<}sM8Kq|Vk6VxWgqyaOlxoa196!dL)sTeEz#7!TB z&9-gKyk_xX(q{p9eG$Wi#braQfk(i|gn685%Cbd~*Ju^j#k34ZE=82O{e*UPbGSIP zqj5y*seJOrQ9pkz{bELc6{Yzt;wJC2@bYF$wKXa@#y>KVG{ia+6C|1#Nq(B)U}J?G zWk$irY0iDS|2~R=!4G)d|g*xYKN{bfNLX;k^a6Qv!!Gpfu{|hy88OPkvbf zvGeJ(i>nbm-G`zm4;|5!f)z${z_f=l*o~6JoicuApR@{RmKPs?PTa6~ogk%FC^^WT z3C^ZMZP5VY58(2cbU6b(Vd;Xr@IlE1O0Na;73qXV1#8O1yHvF8t4FaYDZp%$L6)Ud z02U!&W<%TLxsB&+NvK7KxP)w?|N5E7k4cncTt;^*2sT-e#ER2qk?6tVrdY&!`M!KQ z6UA(YF^X9 *zgR@Bd#x(_2H1cqHhA5mnjJ2*74Wx44jW;V~baUgsYQApNB=MP07c>uZ z?STQj{~6(bp7Lf3Zo)dPZ$5cL)|gzHC5YN6O<;s08OD^(x=<;ta)M!VPd63j4p2}! zOwvvE(NqL>9d6%kI{dB4;Nr?Fm`NX9s|%N@JvYE1w3Bem9p{q2nSygRNZTs`Di_-? zPHb6gJ*%HUnbL{VzR;aPpbvC%?cUI}q5I$gOGRCOgP6wP9mb`%RICt06yi6fJU5uf zKMTF8vRz{a@y}k(EgyuoHFPcvR{!yDB{y8^iu<=GN<;P z_*E3BE z=*HK7$S|Q#Bi1lx1S{Hj&E(!i?}9Xnf?8qu!lqRHkPd<&j+6=9n`Wfz4w@B=!Cl{807H8rdxq(gaO96C$ zGvd}>7_f6DKgBzjFNFx#Z7c5*%DR!ZUkb0xz`0Mcst|ZS_JIrCSTvWmbAu6g_9S4PXCWdbg_4|xlRW3I)goq?3STvzWq zY8|=)2%pPJ7^!utP-~E&gV$l99hwt=Baw&G^Hj(-L(=gZ($7yAbtK|nAm#eyB;)S9 zpQI;inDckxiP(*+HQNk+H5#XqtqGwZeIhh;Nm$^zAv*`ns2f3i4v^!?M+;SNcu*0Y z3offF2~WS)WMfc46q2cU&xU6-r%c5O`HS^B1yS^SAt7`PmoD1prbILfJdNsqvKkZ( z-uEn(Q0cd*@d`z&D!pjWxEfxpZx*JOrfP;fXC@K>G&1ZsOb@Zl$H;{GP7VS9M?&+6 z!%!mfCyY?HU!$)%aeXS#M3{@!&HS)ciUoiU}j6>9FFT{pL`K3F5V(2f2HwU(qpC@*if%dB;U5r3YyRW!#-wE4Iz)d3_ zfc&PX4WNNsTA#tDynj*#Jv-#l^ZU}~XjVCN8&EhvAn8rLr-heezID(5FWaSS)Q=>D zu+5QwI=dYXc%X`2b{XbLaT3VNvRW^GP>L$~FUb;r^7au3VJg-H z>AI`)+By%=T5;Jv-zLbO$=#{_f(YQo;QAMl;tB`71++$#P;2zF^$f-<%)Q4K@{}=Om?=yHSh*)kW_dNj!y;TN^2f)em-7%*x^W^ z4R_BsCHxG>%!lN0GP=xvST|!%wymGTt&d^&qK5`!@UabY)9J|Hk>!%hW)|$J{T5=1 zvYz1suLn&7b-^zL(}>KcWG>vK_2HeQg2jvlF%_x3m&G4Hx$VQNJwflpK(sZ91X7Q= zOc+n{>K@jhP7#TOnA<4XGNDb`T7F5Xn^4BTLRs$iUE4905?&_!NuEBgSd6uHLpTF4{ z0zce1?B3N9&m`u=xY%@K!>%(5?O@!~oeXc(GFuUt+)6%!xAku;F2uK!Gh;t!qP#0h z%`0BtNTrdkrk#0zKY|j_tKa(f@u5{^QhHg%L!|N$?}cz)4E4HWhs;lUvaoCa&QYDh zLk4HlC>v5kQe2)t7yOXl6iSma)JL;cGcR?#QwfWjR2E*Vf${-}#=*!oE4X9J4rB!7 zXSDJ3-1IlFzSQdBD;OgUoJunHPM^Ae$^R(Q6*ao97)`0xD6*cQJFCaTcX10OO zEZtp#-MsOCe2+E-oGBvXOH(E-iZs~jsgfx@Y3h64FEMywS*a*$`?F5&KJPJBBRzn@4qj4!!&kKS{au4MmbxwHl2LVd^ z@RElX66N0`j>7Z4A$F79I+U0$M$lcJ=e$rfG4g4DH(v|H@>m`krvaqaL~WRZN5Q-8 z;tmP7Kq_P$e-XpVW_aS!I3=c7UR>B2t=j2+AxMjLP*QEg+~w}KH@%J4|M*~294T^% z=q52>Y+*bp=^HLnE8Is14N_=%(g5W&d(MJ5_XJvOLBY!s=a?p&_%RV)44XaytE~n$ zBU2E6KAdr@lOz(BKVCSimS1f>rj)B^jzRWP717iTripWTYVYvz1G(&JMloW8o`cBa z-=(X|QnnDLCs~hSGx8#-TJ^Rc_|yki_zm3b=qXGH5$0LP#WR8_l@mI@G1~6&voxCs zX1d2$(oqys4pUSF0HHV06@q;CH*@K2p#S23F-gRyvfYVK2dJjZx>i2pNo=*{pKen3 zk3E01K1abY!9xmX0 zr_KOXSfBjcC1)#;umeG1P|=0WYeMyZ(F2&X>|iohTAV==h_h+m;$sX_S5Ugwo}06O zJ#6Y*;fsYP|d5&3txu zlfTdt?h?at$(u$mkHjUfDaJ2J6L(*Ke!VlLpUPOwUF=+3%vysutE3QsM5@SBJd)po z@q6y_I$c&^1f~!5RwL}Ze(6o}3ggfr-1-~aDs54c>tHoJkG{XB0x3^;B}AX}X(+sL z-cf1C8Fe+$!4G1intv04AN4>-a+AOG(V7$D4u;W2k5z33xyYud8RK&Mu|`mT%)+}} zTD6+ybOd{{D*!T&ku0~j93G$gWHykcBAYV=eW7=3BFZP?19Gwt_u~f zQ+M|mC$FIJt+-NMHG7etmriKbErTL`ZpD;;3F*2HSX4%1i0su~`OF18IRMz2eJy`+ zBY&x6+9-uW>m|B`$Y@&;2h!Ak5$ngIsgFp$jRgK>%q!OxI!D^HQX>W;qE{Db8lhD4 zU-QfPiNC^d0CNx1!d)~zfBnKgKgx*g9)vi7a(k~2`RyrUv2#Ml`<|cya9;QzqKYoo zyZ5uBDv#&5dD-Y#4rji`UxpXPSFOE(_9c^O7-y^#NPV;tM`H=CRbzjDD`iPguSylP zxO{9rOm4bSKq52>(ZEtaw7#!6Lb#v?6)_5B+aXsA#-hdyKKEs&%avWwSSE)JyI>>P>rm)oeg8OON&PTY+TCjua3-Acw*iw@j>=e+T$ibQ%OM-oFd zsW9GZpLQEkkLeOD53r;|D%#`8M@FUY8`4uIkAHbW0@uDQ@9rlNqzrQ=wgrs`pv)23 z#O6joABHZF7Wm^$kL@tD2M`s*|E4UBSPafznVmk|=EU>kKqwPRx;B2LIkf|2A7La% z{`3G_>2Xf-%M-4DL3H@UCr^mpy(5N@35Dy7P?W9mif-H$t#4S;_D#d2<|jmPEH#a4 zyz3Ze85}N@qCtGJAd0S~9)Fpjc-_UT=<@;&(8U_k+E*uKY_~nUz{t_AbQj=zlgB$5_ZVHZ< z%(>!pAn@u)hS{V~L8@pa#50R8(W_mGr`R6s6xlWc41X*=GFRfE``4~+u@(v#dInc} zeMj84oGnKXMRimXtd_vApMoGyVPuYywi0RAtf0>@IPEeU6SE`^k|?2M&k;YkKR=j% zT>>A`4*ez0bke)3b>HWmHp* zWiS+JM4>WCxRW34fyf&EegUs^Ed~gq+oez!4zix!4R1OmA%i8rd$tl>3oMgeQz`gId_u0aiCg#cc8E3? z@NQQVUN8+hrN^i6vqyt{zT8eH35$*GW2AzAEw;X*|5e#Au3QJ7`8kk=hhKZkTV`KL z&mv?cDEb#3#0(Q@RJ!{JAD_J++*e_%cMYAHc1?|gtfu9)8D8(5ntnY_J!&2EL^&S* zmrqVdxFn1Rvbg-Nt>PcAB`=u@LUV@kI3VNO_Ui@liZV_uGq`?ccxzJ7rZAnoOPKys zZ_3_e+filN7W^PJ0D&ZEWW$}h#;KtPh`N%55XIN$Kx;+peeStA-pf|RTA>iQ*|cfX z%yHvRta+_dD_Z^III8=$xVYDw?)j9ehnahCH2OBT4K4>-1OXejXfg+03M{^c5d1yr z0!z>wM@Zmea{nB{aR8K$7koL#>jGoIg}~yWfOr~&GHe#{bG(=GJO`r_UweJeFER2x z2*ZV@83ZhF`rqHTs67XW0Ra)WF+T^!0e{g8q+YcQn#7A(Q9|||0ug5;1_w*7vCo=C z4PLtvaoyQ2qApCwC}DS z>?UC*{jD>jQot?(IhVXz`rMMuqsx_YpkLLnv);i9Q@FVUilYfYb z3*v>)#K*;X%HRatjK%dc8)l9=-F@W@y@Y4(baWLTOLY&QsCpSLC(O?9WF<=XwsJ_q zR1`L}LOtZBx)H{Mo2E4@JF2MBSlU8pBmCJZV8Cv4m!?Nb+HXmcI$goLXOMfRkFUXU@CFx*0+(EUnTm|1;OFi$m>E!j*$L8r( z5x7$}z-f5yw-k%_*y-)!u|1!s(dPGO;hy3_MS)#k%rg+U?H%4@dBfJZ)tY$Z(M#(T zd?~0fY(%bkpX8-elZ`YQ{Z`$zUx9N#m;leQS~hL;&d)=xhD^h zuG4id)WG@3Q+l1-#ZWvamO3d8yui5lj*V&g_Iku*S13rIQpdkxqW)0Ss516oQ-BgFQr8e{0P_&Pr zK3|hWztjjjs~E1sfoX}W)-s~AuBOzSb7$-%0yEmuE?Rs%-PPfosei*W_bno?uP5`q zQ@!Y>e)fVnp-~!UbZHw}n*QXpK%kLiu&0Ma1?DlL@@bD`S8dr$Q$BacYliHeZPNH7 z5)466KHGJo(&^bZI>2P^hFbA5><#-A0);}p5a+U$v4ekooNvk`!PPNlDf4tz(|!Ch zD-YvbwU@J?%nk8b6o0l*gHMXh87t@#9V+FMEH9bA$YwHDomlfE+V;o9WCT>@;dEs@ zTolr%6QtCY!@KA@=8OjN-baTnbLq~wzQmSOX1>LpM2Q@&Z5vabTBn`t+DbGlaI$L~ z&11F-m(xt0oQT;WqGl#70bV7;-0iZGD33YF>Rs&^Z;vi&cz<6X!ChMdTOXNb7jK}| zVjZ^}@1DPrsu})+IlbESl25gLH}cy(ZQJ{a@lZFv^SPC~i~rY@U%`?Mt@>Mm2H%!7CK zYY^O4`cWj>eGG1zR$rEzoAIyl$tGbjn-qNK=?t`a4n7(qVqQ6l* z9X5V8I!(I0osO&`&=ZCUB6#Badz=rIYz{`XJZpnpRJdTc_gCy;Mtcte!Exw92>SwUiQb1 zZkos9(aB!YtE3c&-rje&S~IjT^9XYC+(i9$dp8%WWf*G!NkF#0f%)e;W^@X9%q_+6 zwTwOCPOmkzA6E=zSTW{VVfI;8@Ur!J0*NHV3V&UuZ1aEpDr7KhNJsagpI~RcjkMqH z=Ho{Dm&3}SWa|0kaXpQCvAw`evIDW4M+$*_$r9e4 zFm~3@n}WgM+PjT!U^j^87IW)@vCno~ugbSj2rudJX5|pR;xh2^j}Fxf>y7GyNnH*- z$8rE?OGSS$5hyI-!^W8 z$r-4h2^{wy(Le+vK&;Z7F#^-xO0%8^?2g?Y`q*(NyM_f%%f}RfJLgY7^+>OjkqKK> zRLZ!AErpy-SopQlNm&MAi|&=qNsFFuaFWeI$iRO~je7}2l>?`)jR)AgM{zkTl8`^@ zU4(U=Hnhl}?4a~hMSfMa9;IAxTiv*e`?yd0+uoaECTX`~);DTFlZidLX5HS;R%81wcqZ;Q;zLiAZ;k6tP9@DKDTk0MC8294zf6hJmudyfZ_ur`} zzg+|KTl_DubP4^Vw_)xoEdX5l9T*ac#}pIxJGBG=K-%9?A!~CVP{}0`JYv)yz5_|< zcz{D1$n%Mu0N{%6cXQ(J)RONU6{x4*zg9ykb$!j^K0N;GF=C3-sYLXvSk!wo9oP51 zEeWG!|4u3SP9-@emwcN+tUacoO?~Lh&=3H|w}~NwRAidScV~x}0pRTK;+CJOB;P3{ z003P1{(m?}0ntTG4z48t_Uqe3LYc_ycTF~)c?kgj{4D+9U=hrdUAh!abV%_#XaIn+2>Y@7fTUe?9$*$y5g$m z_1o=}B+N$q8_eJCr#LMm1g`vow%055*RG_|WIn8TPbzh&d4G48hq9-JCmF0DH4;67 zqaDW7%*Hwuh?{mwT_!6!gZ+NLkYZ@zJum|xpntxf-@SmhkqJRY7|r1;4_7vd^