From 69804487e0b10f2c5c06316f0ac0eb6ada68433f Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Sun, 2 Feb 2025 09:10:15 +0000 Subject: [PATCH 01/36] Fix exotic ci env that lacks ostringstream::str (#11581) --- common/minja.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/minja.hpp b/common/minja.hpp index bcb5a0824..e77eb69d5 100644 --- a/common/minja.hpp +++ b/common/minja.hpp @@ -824,7 +824,7 @@ public: LoopControlType control_type; LoopControlException(const std::string & message, LoopControlType control_type) : std::runtime_error(message), control_type(control_type) {} LoopControlException(LoopControlType control_type) - : std::runtime_error((std::ostringstream() << (control_type == LoopControlType::Continue ? "continue" : "break") << " outside of a loop").str()), + : std::runtime_error((control_type == LoopControlType::Continue ? "continue" : "break") + std::string(" outside of a loop")), control_type(control_type) {} }; From bfcce4d693617ec843d0b2510f6ee16e6bc6720d Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Sun, 2 Feb 2025 09:25:38 +0000 Subject: [PATCH 02/36] `tool-call`: support Command R7B (+ return tool_plan "thoughts" in API) (#11585) * `tool-call`: support Command R7B (w/ tool_plan return) * `tool-call`: cleaner preservation of tokens + warn when likely bad chat template override * `tool-call`: test cleanup / handle lazy grammar triggers --- common/chat.cpp | 86 +++++++++- common/chat.hpp | 2 + common/common.h | 3 + examples/server/README.md | 22 ++- examples/server/server.cpp | 52 ++++-- examples/server/utils.hpp | 1 + ...AI-c4ai-command-r7b-12-2024-tool_use.jinja | 156 ++++++++++++++++++ tests/test-chat.cpp | 154 +++++++++++++---- 8 files changed, 420 insertions(+), 56 deletions(-) create mode 100644 models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja diff --git a/common/chat.cpp b/common/chat.cpp index 58db12af9..f87583d85 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -16,6 +16,7 @@ std::string common_chat_format_name(common_chat_format format) { case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2"; case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1"; case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro"; + case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B"; default: throw std::runtime_error("Unknown chat format"); } @@ -317,6 +318,79 @@ static common_chat_msg common_chat_parse_mistral_nemo(const std::string & input) return parse_prefixed_json_tool_call_array(input, "[TOOL_CALLS]"); } +static common_chat_params common_chat_params_init_command_r7b(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { + common_chat_params data; + data.grammar_lazy = inputs.tool_choice != "required"; + data.grammar = build_grammar([&](const common_grammar_builder & builder) { + auto schemas = json::array(); + foreach_function(inputs.tools, [&](const json & tool) { + const auto & function = tool["function"]; + schemas.push_back({ + {"type", "object"}, + {"properties", { + {"tool_call_id", { + {"type", "string"}, + // Command-R's template expects an integer string. + {"pattern", "^[0-9]{1,10}$"}, + }}, + {"tool_name", { + {"type", "string"}, + {"const", function["name"]}, + }}, + {"parameters", function["parameters"]}, + }}, + {"required", json::array({"tool_call_id", "tool_name", "parameters"})}, + }); + }); + auto schema = json { + {"type", "array"}, + {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}}, + {"minItems", 1}, + }; + if (!inputs.parallel_tool_calls) { + schema["maxItems"] = 1; + } + builder.add_rule("root", "\"<|START_ACTION|>\" " + builder.add_schema("tool_calls", schema) + " \"<|END_ACTION|>\""); + }, grammar_options); + data.grammar_triggers.push_back({"<|START_ACTION|>", /* .at_start = */ false}); + data.preserved_tokens = { + "<|START_RESPONSE|>", + "<|END_RESPONSE|>", + "<|START_THINKING|>", + "<|END_THINKING|>", + "<|END_ACTION|>", + }; + data.prompt = tmpl.apply(inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); + data.format = COMMON_CHAT_FORMAT_COMMAND_R7B; + return data; +} +static common_chat_msg common_chat_parse_command_r7b(const std::string & input) { + static std::regex response_regex("<\\|START_RESPONSE\\|>(.*?)<\\|END_RESPONSE\\|>"); + static std::regex thought_action_regex("<\\|START_THINKING\\|>([\\s\\S\\n\\r]*?)<\\|END_THINKING\\|><\\|START_ACTION\\|>([\\s\\S\\n\\r]*?)<\\|END_ACTION\\|>"); + std::smatch match; + + common_chat_msg result; + result.role = "assistant"; + if (std::regex_match(input, match, response_regex)) { + result.content = match[1].str(); + } else if (std::regex_match(input, match, thought_action_regex)) { + result.tool_plan = match[1].str(); + auto actions_str = match[2].str(); + auto actions = json::parse(actions_str); + for (const auto & action : actions) { + result.tool_calls.push_back({ + /* .name = */ action["tool_name"], + /* .arguments = */ action["parameters"].dump(), + /* .id = */ action["tool_call_id"], + }); + } + } else { + LOG_ERR("Failed to parse command_r output"); + result.content = input; + } + return result; +} + static void expect_tool_parameters(const std::string & name, const json & parameters, const std::vector & expected_properties) { if (!parameters.is_object() || !parameters.contains("type") || parameters["type"] != "object" || !parameters.contains("properties") || !parameters.contains("required")) { throw std::runtime_error("Parameters of tool " + name + " must be an object w/ required properties"); @@ -462,6 +536,10 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_ "\"<|tool▁call▁begin|>function<|tool▁sep|>" + name + "\\n```json\\n\" " + args_rule + " \"```<|tool▁call▁end|>\"")); }); data.grammar_triggers.push_back({"<|tool▁calls▁begin|>", /* .at_start = */ false}); + data.preserved_tokens = { + "<|tool▁sep|>", + "<|tool▁call▁end|>", + }; builder.add_rule("root", "\"<|tool▁calls▁begin|>\" (" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " space"); }, grammar_options); data.prompt = tmpl.apply(inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); @@ -704,8 +782,7 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat auto tool_call = "\"\" space " + builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " \"\" space"; builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call); data.grammar_triggers.push_back({"", /* .at_start = */ false}); - // Not really a trigger but need to print this special token to get a successful parse. - data.grammar_triggers.push_back({"", /* .at_start = */ false}); + data.preserved_tokens = { "" }; }, grammar_options); data.prompt = tmpl.apply(inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); @@ -822,6 +899,9 @@ common_chat_params common_chat_params_init(const common_chat_template & tmpl, co if (src.find("[TOOL_CALLS]") != std::string::npos) { return common_chat_params_init_mistral_nemo(tmpl, inputs); } + if (src.find("<|END_THINKING|><|START_ACTION|>") != std::string::npos) { + return common_chat_params_init_command_r7b(tmpl, inputs); + } return common_chat_params_init_generic(tmpl, inputs); } @@ -855,6 +935,8 @@ common_chat_msg common_chat_parse(const std::string & input, common_chat_format return common_chat_parse_hermes_2_pro(input); case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return common_chat_parse_firefunction_v2(input); + case COMMON_CHAT_FORMAT_COMMAND_R7B: + return common_chat_parse_command_r7b(input); default: throw std::runtime_error("Unsupported format: " + common_chat_format_name(format)); } diff --git a/common/chat.hpp b/common/chat.hpp index ca165aa13..33e64a430 100644 --- a/common/chat.hpp +++ b/common/chat.hpp @@ -32,6 +32,7 @@ enum common_chat_format { COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1, COMMON_CHAT_FORMAT_HERMES_2_PRO, + COMMON_CHAT_FORMAT_COMMAND_R7B, COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats }; @@ -42,6 +43,7 @@ struct common_chat_params { std::string grammar; bool grammar_lazy = false; std::vector grammar_triggers; + std::vector preserved_tokens; std::vector additional_stops; }; diff --git a/common/common.h b/common/common.h index 6c1809277..b208d0c7e 100644 --- a/common/common.h +++ b/common/common.h @@ -4,6 +4,7 @@ #include "llama-cpp.h" +#include #include #include #include @@ -163,6 +164,7 @@ struct common_params_sampling { bool grammar_lazy = false; std::vector grammar_trigger_words; // optional trigger words to trigger lazy grammar std::vector grammar_trigger_tokens; // optional trigger tokens to trigger lazy grammar and print trigger special tokens. + std::set preserved_tokens; std::vector logit_bias; // logit biases to apply @@ -621,6 +623,7 @@ struct common_chat_msg { std::string role; std::string content; std::vector tool_calls; + std::string tool_plan = ""; }; // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid diff --git a/examples/server/README.md b/examples/server/README.md index 276b43013..e9d0374ad 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -1128,6 +1128,7 @@ curl http://localhost:8080/v1/chat/completions \ - Hermes 2/3, Qwen 2.5 - Mistral Nemo - Firefunction v2 + - Command R7B - DeepSeek R1 (WIP / seems reluctant to call any tools?)
@@ -1202,21 +1203,28 @@ curl http://localhost:8080/v1/chat/completions \ ```shell # Native support: llama-server --jinja -fa -hf bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M - llama-server --jinja -fa -hf bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M - llama-server --jinja -fa -hf bartowski/Llama-3.2-3B-Instruct-GGUF:Q6_K + llama-server --jinja -fa -hf bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q6_K_L llama-server --jinja -fa -hf bartowski/functionary-small-v3.2-GGUF:Q4_K_M - llama-server --jinja -fa -hf bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M \ - --chat-template-file <( python scripts/get_chat_template.py NousResearch/Hermes-2-Pro-Llama-3-8B ) + llama-server --jinja -fa -hf bartowski/Llama-3.3-70B-Instruct-GGUF:Q4_K_M # Native support requires the right template for these GGUFs: + + llama-server --jinja -fa -hf bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M \ + --chat-template-file <( python scripts/get_chat_template.py NousResearch/Hermes-2-Pro-Llama-3-8B tool_use ) + llama-server --jinja -fa -hf bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M \ --chat-template-file <( python scripts/get_chat_template.py NousResearch/Hermes-3-Llama-3.1-8B tool_use ) + llama-server --jinja -fa -hf bartowski/firefunction-v2-GGUF -hff firefunction-v2-IQ1_M.gguf \ - --chat-template-file <( python scripts/get_chat_template.py fireworks-ai/firellama-3-firefunction-v2 ) + --chat-template-file <( python scripts/get_chat_template.py fireworks-ai/llama-3-firefunction-v2 tool_use ) + + llama-server --jinja -fa -hf bartowski/c4ai-command-r7b-12-2024-GGUF:Q6_K_L \ + --chat-template-file <( python scripts/get_chat_template.py CohereForAI/c4ai-command-r7b-12-2024 tool_use ) # Generic format support - llama-server --jinja -fa -hf bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M - llama-server --jinja -fa -hf bartowski/gemma-2-2b-it-GGUF:Q4_K_M + llama-server --jinja -fa -hf bartowski/phi-4-GGUF:Q4_0 + llama-server --jinja -fa -hf bartowski/gemma-2-2b-it-GGUF:Q8_0 + llama-server --jinja -fa -hf bartowski/c4ai-command-r-v01-GGUF:Q2_K ``` - Test in CLI: diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 3451e96a2..e0acc4705 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -131,6 +131,11 @@ struct slot_params { lora.push_back({{"id", i}, {"scale", this->lora[i].scale}}); } + std::vector grammar_trigger_words; + for (const auto & trigger : sampling.grammar_trigger_words) { + grammar_trigger_words.push_back(trigger.word); + } + return json { {"n_predict", n_predict}, // Server configured n_predict {"seed", sampling.seed}, @@ -165,8 +170,9 @@ struct slot_params { {"n_probs", sampling.n_probs}, {"min_keep", sampling.min_keep}, {"grammar", sampling.grammar}, - // {"grammar_trigger_words", sampling.grammar_trigger_words}, + {"grammar_trigger_words", grammar_trigger_words}, {"grammar_trigger_tokens", sampling.grammar_trigger_tokens}, + {"preserved_tokens", sampling.preserved_tokens}, {"samplers", samplers}, {"speculative.n_max", speculative.n_max}, {"speculative.n_min", speculative.n_min}, @@ -363,12 +369,26 @@ struct server_task { if (ids.size() == 1) { LOG_DBG("Grammar trigger token: %d (`%s`)\n", ids[0], trigger.word.c_str()); params.sampling.grammar_trigger_tokens.push_back(ids[0]); + params.sampling.preserved_tokens.insert(ids[0]); continue; } LOG_DBG("Grammar trigger word: `%s`\n", trigger.word.c_str()); params.sampling.grammar_trigger_words.push_back(trigger); } } + const auto preserved_tokens = data.find("preserved_tokens"); + if (preserved_tokens != data.end()) { + for (const auto & t : *preserved_tokens) { + auto ids = common_tokenize(vocab, t.get(), /* add_special= */ false, /* parse_special= */ true); + if (ids.size() == 1) { + LOG_DBG("Preserved token: %d\n", ids[0]); + params.sampling.preserved_tokens.insert(ids[0]); + } else { + // This may happen when using a tool call style meant for a model with special tokens to preserve on a model without said tokens. + LOG_WRN("Not preserved because more than 1 token (wrong chat template override?): %s\n", t.get().c_str()); + } + } + } if (params.sampling.grammar_lazy) { GGML_ASSERT(params.sampling.grammar_trigger_tokens.size() > 0 || params.sampling.grammar_trigger_words.size() > 0); } @@ -695,19 +715,19 @@ struct server_task_result_cmpl_final : server_task_result { json to_json_oaicompat_chat() { std::string finish_reason = "length"; - common_chat_msg message; + common_chat_msg msg; if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) { LOG_DBG("Parsing chat message: %s\n", content.c_str()); - message = common_chat_parse(content, oaicompat_chat_format); - finish_reason = message.tool_calls.empty() ? "stop" : "tool_calls"; + msg = common_chat_parse(content, oaicompat_chat_format); + finish_reason = msg.tool_calls.empty() ? "stop" : "tool_calls"; } else { - message.content = content; + msg.content = content; } json tool_calls; - if (!message.tool_calls.empty()) { + if (!msg.tool_calls.empty()) { tool_calls = json::array(); - for (const auto & tc : message.tool_calls) { + for (const auto & tc : msg.tool_calls) { tool_calls.push_back({ {"type", "function"}, {"function", { @@ -719,14 +739,19 @@ struct server_task_result_cmpl_final : server_task_result { } } + json message { + {"content", msg.content}, + {"tool_calls", tool_calls}, + {"role", "assistant"}, + }; + if (!msg.tool_plan.empty()) { + message["tool_plan"] = msg.tool_plan; + } + json choice { {"finish_reason", finish_reason}, {"index", 0}, - {"message", json { - {"content", message.content}, - {"tool_calls", tool_calls}, - {"role", "assistant"}, - }}, + {"message", message}, }; if (!stream && probs_output.size() > 0) { @@ -2833,8 +2858,7 @@ struct server_context { server_slot * slot_batched = nullptr; auto accept_special_token = [&](server_slot & slot, llama_token token) { - const auto & trigger_tokens = slot.params.sampling.grammar_trigger_tokens; - return params_base.special || std::find(trigger_tokens.begin(), trigger_tokens.end(), token) != trigger_tokens.end(); + return params_base.special || slot.params.sampling.preserved_tokens.find(token) != slot.params.sampling.preserved_tokens.end(); }; // frist, add sampled tokens from any ongoing sequences diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index bfe623c4c..fefdce55b 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -662,6 +662,7 @@ static json oaicompat_completion_params_parse( }); } llama_params["grammar_triggers"] = grammar_triggers; + llama_params["preserved_tokens"] = chat_params.preserved_tokens; for (const auto & stop : chat_params.additional_stops) { llama_params["stop"].push_back(stop); } diff --git a/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja b/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja new file mode 100644 index 000000000..078e9f545 --- /dev/null +++ b/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja @@ -0,0 +1,156 @@ +{{ bos_token }}{%- macro document_turn(documents) -%} +{# format documents into chat turn #} +<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|><|START_THINKING|>I will look through the document to address the users needs.<|END_THINKING|><|START_ACTION|>[ + {"tool_call_id": "0", "tool_name": "direct-injected-document", "parameters": {}} +]<|END_ACTION|><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|><|START_TOOL_RESULT|>[ + { + "tool_call_id": "0", + "results": { +{% for doc in documents %} + "{{ loop.index0 }}": {{doc|tojson}}{% if not loop.last %}, + {% endif %} +{% endfor %} + + }, + "is_error": null + } +]<|END_TOOL_RESULT|><|END_OF_TURN_TOKEN|>{%- endmacro %} +{%- macro tool_call_id_to_int(messages, tool_call_id) %} +{%- set counter = namespace(value=0) %} +{%- set tool_call_id_seen = namespace(value=false) %} +{%- for msg in messages %} + {%- if msg.tool_calls %} + {%- for tool_call in msg.tool_calls %} + {%- if tool_call.id == tool_call_id and not tool_call_id_seen.value -%} + {{ counter.value }} + {%- set tool_call_id_seen.value = true %} + {%- endif %} + {%- set counter.value = counter.value + 1 %} + {%- endfor %} + {%- endif %} +{%- endfor %} +{%- endmacro %} +{%- macro format_tool_message(messages, tool_msg) -%} +{# format tool message #} + { + "tool_call_id": "{{ tool_call_id_to_int(messages, tool_msg.tool_call_id) }}", + "results": { + "0": {{ tool_msg.content|tojson }} + }, + "is_error": null + } +{%- endmacro -%} +{%- if messages and messages[0]['role']|lower == 'system' %}{%- set developer_preamble = messages[0]['content'] %}{% endif %} +{%- set tool_idx = namespace(value=0) %} +{%- set tool_ids_seen = namespace(value=[]) %} +{%- set sent_documents = namespace(value=false) %} +<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble +You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes. + +Your information cutoff date is June 2024. + +You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew but have the ability to speak many more languages. +{% if tools or documents %} + +You have been trained to have advanced reasoning and tool-use capabilities and you should make best use of these skills to serve user's requests. + +## Tool Use +Think about how you can make best use of the provided tools to help with the task and come up with a high level plan that you will execute first. + +0. Start by writing <|START_THINKING|> followed by a detailed step by step plan of how you will solve the problem. For each step explain your thinking fully and give details of required tool calls (if needed). Unless specified otherwise, you write your plan in natural language. When you finish, close it out with <|END_THINKING|>. + You can optionally choose to skip this step when the user request is so straightforward to address that only a trivial plan would be needed. + NOTE: You MUST skip this step when you are directly responding to the user's request without using any tools. + +Then carry out your plan by repeatedly executing the following steps. +1. Action: write <|START_ACTION|> followed by a list of JSON-formatted tool calls, with each one containing "tool_name" and "parameters" fields. + When there are multiple tool calls which are completely independent of each other (i.e. they can be executed in parallel), you should list them out all together in one step. When you finish, close it out with <|END_ACTION|>. +2. Observation: you will then receive results of those tool calls in JSON format in the very next turn, wrapped around by <|START_TOOL_RESULT|> and <|END_TOOL_RESULT|>. Carefully observe those results and think about what to do next. Note that these results will be provided to you in a separate turn. NEVER hallucinate results. + Every tool call produces a list of results (when a tool call produces no result or a single result, it'll still get wrapped inside a list). Each result is clearly linked to its originating tool call via its "tool_call_id". +3. Reflection: start the next turn by writing <|START_THINKING|> followed by what you've figured out so far, any changes you need to make to your plan, and what you will do next. When you finish, close it out with <|END_THINKING|>. + You can optionally choose to skip this step when everything is going according to plan and no special pieces of information or reasoning chains need to be recorded. + NOTE: You MUST skip this step when you are done with tool-use actions and are ready to respond to the user. + +You can repeat the above 3 steps multiple times (could be 0 times too if no suitable tool calls are available or needed), until you decide it's time to finally respond to the user. + +4. Response: then break out of the loop and write <|START_RESPONSE|> followed by a piece of text which serves as a response to the user's last request. Use all previous tool calls and results to help you when formulating your response. When you finish, close it out with <|END_RESPONSE|>. +{% if enable_citations %} + +## Grounding +Importantly, note that "Reflection" and "Response" above can be grounded. +Grounding means you associate pieces of texts (called "spans") with those specific tool results that support them (called "sources"). And you use a pair of tags "" and "" to indicate when a span can be grounded onto a list of sources, listing them out in the closing tag. Sources from the same tool call are grouped together and listed as "{tool_call_id}:[{list of result indices}]", before they are joined together by ",". E.g., "span" means that "span" is supported by result 1 and 2 from "tool_call_id=0" as well as result 0 from "tool_call_id=1". +{% endif %} + +## Available Tools +Here is the list of tools that you have available to you. +You can ONLY use the tools listed here. When a tool is not listed below, it is NOT available and you should NEVER attempt to use it. +Each tool is represented as a JSON object with fields like "name", "description", "parameters" (per JSON Schema), and optionally, "responses" (per JSON Schema). + +```json +[ +{% if documents %} + {"name": "direct-injected-document", "description": "This is a special tool to directly inject user-uploaded documents into the chat as additional context. DO NOT use this tool by yourself!", "parameters": {"type": "object", "properties": {}, "required": []}, "responses": {"200": {"description": "Successfully returned a list of chunked text snippets from the directly uploaded documents.", "content": {"application/json": {"schema": {"type": "array", "items": {"type": "object", "required": ["url", "snippet"], "properties": {"url": {"type": "string", "description": "The url of the uploaded document."}, "snippet": {"type": "string", "description": "The text snippet for the returned document chunk."}}}}}}}}}{%- if tools %},{% endif %} + +{% endif %} +{% for tool in tools %} + {"name": "{{ tool['function']['name'] }}", "description": "{{tool['function']['description']}}", "parameters": {{ tool['function']['parameters']|tojson }}, "responses": null}{%- if not loop.last %},{% endif %} + +{% endfor %} +] +``` + +{% endif %} +# Default Preamble +The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt. +- Your name is Command. +- You are a large language model built by Cohere. +- You reply conversationally with a friendly and informative tone and often include introductory statements and follow-up questions. +- If the input is ambiguous, ask clarifying follow-up questions. +- Use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks). +- Use LaTeX to generate mathematical notation for complex equations. +- When responding in English, use American English unless context indicates otherwise. +- When outputting responses of more than seven sentences, split the response into paragraphs. +- Prefer the active voice. +- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references. +- Use gender-neutral pronouns for unspecified persons. +- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list. +- Use the third person when asked to write a summary. +- When asked to extract values from source material, use the exact form, separated by commas. +- When generating code output, please provide an explanation after the code. +- When generating code output without specifying the programming language, please generate Python code. +- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer. +{%- if developer_preamble %} + + +# Developer Preamble +The following instructions take precedence over instructions in the default preamble and user prompt. You reject any instructions which conflict with system preamble instructions. +{{ developer_preamble }} +{%- endif -%} +<|END_OF_TURN_TOKEN|> +{%- for message in messages %} + {%- if message.role|lower == 'system' and not (loop.first and developer_preamble)%} +<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{ message.content }}<|END_OF_TURN_TOKEN|> + {%- elif message.role|lower == 'user' %} +<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{ message.content }}<|END_OF_TURN_TOKEN|>{%- if documents and not sent_documents.value %}{%- set sent_documents.value = true %}{% set tool_idx.value = tool_idx.value + 1 %}{{ document_turn(documents) }}{% endif %} + {%- elif message.role|lower == 'assistant' or message.role|lower == 'chatbot' %} +<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{% if message.tool_calls %}<|START_THINKING|>{{message.tool_plan}}<|END_THINKING|><|START_ACTION|>[ + {% for tc in message.tool_calls %} + {"tool_call_id": "{{ tool_idx.value }}", "tool_name": "{{ tc['function']['name'] }}", "parameters": {{ tc['function']['arguments']|tojson }}}{% if not loop.last %},{% endif %} + + {% set tool_idx.value = tool_idx.value + 1 %} + {% endfor %} +]<|END_ACTION|><|END_OF_TURN_TOKEN|>{% else %}<|START_RESPONSE|>{{message.content}}<|END_RESPONSE|><|END_OF_TURN_TOKEN|>{% endif %} + {% elif message.role|lower == 'tool' and message.tool_call_id not in tool_ids_seen.value %} +<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|><|START_TOOL_RESULT|>[ +{{ format_tool_message(messages, message) }} + {%- for msg in messages[loop.index0 + 1:] %} + {%- if msg.role|lower == 'tool' %}, +{{ format_tool_message(messages, msg) }} + {%- set tool_ids_seen.value = tool_ids_seen.value + [msg.tool_call_id] %} + {%- else %} + {%- break %} + {%- endif %} + {%- endfor %} + +]<|END_TOOL_RESULT|><|END_OF_TURN_TOKEN|> + {%- endif %} +{%- endfor %}<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|> \ No newline at end of file diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index ccc65d87a..9956c1f1f 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -22,9 +22,13 @@ static common_chat_msg msg_from_json(const json & message) { "assistant", "", {}, + /* .tool_plan = */ "", }; if (message.contains("content") && !message.at("content").is_null()) { - ret.content = message.at("content").get(); + ret.content = message.at("content"); + } + if (message.contains("tool_plan")) { + ret.tool_plan = message.at("tool_plan"); } auto has_tool_calls = message.contains("tool_calls"); if (has_tool_calls) { @@ -171,8 +175,7 @@ const json llama_3_1_tools = { special_function_tool, code_interpreter_too struct delta_data { std::string delta; - std::string grammar; - common_chat_format format; + common_chat_params params; }; static delta_data init_delta(const common_chat_template & tmpl, const std::vector & end_tokens, @@ -214,7 +217,7 @@ static delta_data init_delta(const common_chat_template & tmpl, const std::vecto break; } } - return { delta, params_full.grammar, params_full.format }; + return { delta, params_full }; } /* @@ -224,7 +227,7 @@ static delta_data init_delta(const common_chat_template & tmpl, const std::vecto */ static void test_template(const common_chat_template & tmpl, const std::vector & end_tokens, const json & test_message, const json & tools = {}, const std::string & expected_delta = "", - bool skip_grammar_test = false, bool skip_parser_test = false) { + bool expect_grammar_triggered = true) { common_chat_msg expected_msg = msg_from_json(test_message); auto user_message = json{ @@ -238,45 +241,110 @@ static void test_template(const common_chat_template & tmpl, const std::vector 0 && trigger.at_start) { + fprintf(stderr, "Trigger %s not at start of message, skipping:\n\n%s\n\n", trigger.word.c_str(), constrained.c_str()); + continue; + } + if (earliest_trigger_pos == std::string::npos || pos < earliest_trigger_pos) { + earliest_trigger_pos = pos; + } + } + auto grammar_triggered = false; + if (earliest_trigger_pos != std::string::npos) { + constrained = constrained.substr(earliest_trigger_pos); + grammar_triggered = true; + } + if (data.params.grammar_lazy) { + assert_equals(expect_grammar_triggered, grammar_triggered); + } + + if (grammar_triggered && !match_string(constrained, grammar.get())) { + throw std::runtime_error("Failed to match delta against grammar:\n\n" + data.delta + + "\n\nGrammar: " + data.params.grammar); } } } } static void test_template_output_parsers() { - auto text_message = json{ + json text_message { { "role", "assistant" }, { "content", "Hello, world!" }, }; - auto tool_call_message = json{ + json tool_calls = json::array({{ + { "type", "function" }, + { "function", { { "name", "special_function" }, { "arguments", "{\"arg1\": 1}" } } }, + }}); + + json tool_call_message { + { "role", "assistant"}, + { "content", {}}, + { "tool_calls", { + { + { "type", "function" }, + { "function", { + { "name", "special_function" }, + { "arguments", "{\"arg1\": 1}" }, + }}, + }, + }}, + }; + json tool_call_message_with_id { + { "role", "assistant"}, + { "content", {}}, + { "tool_calls", { + { + { "type", "function" }, + { "function", { + { "name", "special_function" }, + { "arguments", "{\"arg1\": 1}" }, + }}, + {"id", "123456789"}, + }, + }}, { "role", "assistant" }, { "content", {} }, - { "tool_calls", json{ { - { "type", "function" }, - { "function", { { "name", "special_function" }, { "arguments", "{\"arg1\": 1}" } } }, - } } } + { "tool_calls", tool_calls } + }; + json tool_call_plan_message_with_idx { + { "role", "assistant"}, + { "content", {}}, + { "tool_plan", "I'm not so sure"}, + { "tool_calls", { + { + { "type", "function" }, + { "function", { + { "name", "special_function" }, + { "arguments", "{\"arg1\": 1}" }, + }}, + // Index of the tool call in the tool_calls array + {"id", "0"}, + }, + }}, + { "role", "assistant" }, + { "content", {} }, + { "tool_calls", tool_calls } }; - auto tool_call_message_with_id = json::parse(tool_call_message.dump()); - tool_call_message_with_id["tool_calls"][0]["id"] = "123456789"; auto python_tool_call_message = json{ { "role", "assistant" }, @@ -322,6 +390,27 @@ static void test_template_output_parsers() { inputs_tools_builtin.tools = json::array(); inputs_tools_builtin.tools.push_back(python_tool); + { + // Not supported yet + const common_chat_template tmpl(read_file("models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja"), "", ""); + assert_equals(COMMON_CHAT_FORMAT_GENERIC, common_chat_params_init(tmpl, inputs_tools).format); + } + { + const common_chat_template tmpl(read_file("models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja"), "", ""); + std::vector end_tokens{ "<|END_OF_TURN_TOKEN|>" }; + + assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_params_init(tmpl, inputs_no_tools).format); + assert_equals(COMMON_CHAT_FORMAT_COMMAND_R7B, common_chat_params_init(tmpl, inputs_tools).format); + + test_template(tmpl, end_tokens, tool_call_plan_message_with_idx, tools, + "<|START_THINKING|>I'm not so sure<|END_THINKING|>" + "<|START_ACTION|>[\n" + " {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n" + "]<|END_ACTION|>"); + test_template(tmpl, end_tokens, text_message, tools, + "<|START_RESPONSE|>Hello, world!<|END_RESPONSE|>", + /* expect_grammar_triggered= */ false); + } { const common_chat_template tmpl(read_file("models/templates/google-gemma-2-2b-it.jinja"), "", ""); std::vector end_tokens{ "" }; @@ -362,11 +451,10 @@ static void test_template_output_parsers() { assert_equals(COMMON_CHAT_FORMAT_MISTRAL_NEMO, common_chat_params_init(tmpl, inputs_tools).format); - test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* skip_grammar_test= */ true); + test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* expect_grammar_triggered= */ false); test_template( tmpl, end_tokens, tool_call_message_with_id, tools, - "[TOOL_CALLS][{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, \"id\": \"123456789\"}]", - /* skip_grammar_test= */ true); + "[TOOL_CALLS][{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, \"id\": \"123456789\"}]"); } { const common_chat_template tmpl( @@ -388,7 +476,7 @@ static void test_template_output_parsers() { inputs_tools) .format); - test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* skip_grammar_test= */ true); + test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* expect_grammar_triggered= */ false); test_template(tmpl, end_tokens, tool_call_message, tools, "\n" "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n" @@ -413,7 +501,7 @@ static void test_template_output_parsers() { inputs_tools_builtin) .format); - // test_template(tmpl, end_tokens, text_message, tools, R"(?)", /* skip_grammar_test= */ true); + // test_template(tmpl, end_tokens, text_message, tools, R"(?)", /* expect_grammar_triggered= */ false); test_template(tmpl, end_tokens, code_interpreter_tool_call_message, llama_3_1_tools, "<|python_tag|>code_interpreter.call(code=\"print('hey')\")"); test_template(tmpl, end_tokens, python_tool_call_message, tools, @@ -428,7 +516,7 @@ static void test_template_output_parsers() { assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_params_init(tmpl, inputs_tools).format); - test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* skip_grammar_test= */ true); + test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* expect_grammar_triggered= */ false); test_template(tmpl, end_tokens, tool_call_message, tools, "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}"); } @@ -440,7 +528,7 @@ static void test_template_output_parsers() { assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1, common_chat_params_init(tmpl, inputs_tools).format); - test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* skip_grammar_test= */ true); + test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* expect_grammar_triggered= */ false); test_template(tmpl, end_tokens, tool_call_message, tools, "{\"arg1\": 1}"); } @@ -455,7 +543,7 @@ static void test_template_output_parsers() { test_template(tmpl, end_tokens, text_message, {}, "all\n" "Hello, world!", - /* skip_grammar_test= */ true); + /* expect_grammar_triggered= */ false); test_template(tmpl, end_tokens, tool_call_message, tools, "special_function\n" "{\"arg1\": 1}"); @@ -467,7 +555,7 @@ static void test_template_output_parsers() { assert_equals(COMMON_CHAT_FORMAT_FIREFUNCTION_V2, common_chat_params_init(tmpl, inputs_tools).format); - test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* skip_grammar_test= */ true); + test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* expect_grammar_triggered= */ false); test_template(tmpl, end_tokens, tool_call_message, tools, " functools[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]"); } @@ -478,7 +566,7 @@ static void test_template_output_parsers() { assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, common_chat_params_init(tmpl, inputs_tools).format); - test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* skip_grammar_test= */ true); + test_template(tmpl, end_tokens, text_message, tools, "Hello, world!", /* expect_grammar_triggered= */ false); test_template(tmpl, end_tokens, tool_call_message, tools, "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n" "```json\n" From 84ec8a58f7b6aad6887bbfbd1321f3ff417341a5 Mon Sep 17 00:00:00 2001 From: Eric Curtin Date: Sun, 2 Feb 2025 16:14:48 +0100 Subject: [PATCH 03/36] Name colors (#11573) It's more descriptive, use #define's so we can use compile-time concatenations. Signed-off-by: Eric Curtin --- common/log.cpp | 10 ---------- common/log.h | 10 ++++++++++ examples/run/run.cpp | 15 ++++++++------- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/common/log.cpp b/common/log.cpp index 0b8994ae1..4bfbecf15 100644 --- a/common/log.cpp +++ b/common/log.cpp @@ -14,16 +14,6 @@ void common_log_set_verbosity_thold(int verbosity) { common_log_verbosity_thold = verbosity; } -#define LOG_COL_DEFAULT "\033[0m" -#define LOG_COL_BOLD "\033[1m" -#define LOG_COL_RED "\033[31m" -#define LOG_COL_GREEN "\033[32m" -#define LOG_COL_YELLOW "\033[33m" -#define LOG_COL_BLUE "\033[34m" -#define LOG_COL_MAGENTA "\033[35m" -#define LOG_COL_CYAN "\033[36m" -#define LOG_COL_WHITE "\033[37m" - static int64_t t_us() { return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); } diff --git a/common/log.h b/common/log.h index 66605cc69..85dd4393b 100644 --- a/common/log.h +++ b/common/log.h @@ -2,6 +2,16 @@ #include "ggml.h" // for ggml_log_level +#define LOG_COL_DEFAULT "\033[0m" +#define LOG_COL_BOLD "\033[1m" +#define LOG_COL_RED "\033[31m" +#define LOG_COL_GREEN "\033[32m" +#define LOG_COL_YELLOW "\033[33m" +#define LOG_COL_BLUE "\033[34m" +#define LOG_COL_MAGENTA "\033[35m" +#define LOG_COL_CYAN "\033[36m" +#define LOG_COL_WHITE "\033[37m" + #ifndef __GNUC__ # define LOG_ATTRIBUTE_FORMAT(...) #elif defined(__MINGW32__) diff --git a/examples/run/run.cpp b/examples/run/run.cpp index cf61f4add..ca9273155 100644 --- a/examples/run/run.cpp +++ b/examples/run/run.cpp @@ -24,15 +24,16 @@ #include #include +#include "chat-template.hpp" #include "common.h" #include "json.hpp" #include "linenoise.cpp/linenoise.h" #include "llama-cpp.h" -#include "chat-template.hpp" +#include "log.h" #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || defined(_WIN32) [[noreturn]] static void sigint_handler(int) { - printf("\n\033[0m"); + printf("\n" LOG_COL_DEFAULT); exit(0); // not ideal, but it's the only way to guarantee exit in all cases } #endif @@ -890,7 +891,7 @@ static int check_context_size(const llama_context_ptr & ctx, const llama_batch & const int n_ctx = llama_n_ctx(ctx.get()); const int n_ctx_used = llama_get_kv_cache_used_cells(ctx.get()); if (n_ctx_used + batch.n_tokens > n_ctx) { - printf("\033[0m\n"); + printf(LOG_COL_DEFAULT "\n"); printe("context size exceeded\n"); return 1; } @@ -953,7 +954,7 @@ static int generate(LlamaData & llama_data, const std::string & prompt, std::str batch = llama_batch_get_one(&new_token_id, 1); } - printf("\033[0m"); + printf(LOG_COL_DEFAULT); return 0; } @@ -962,7 +963,7 @@ static int read_user_input(std::string & user_input) { #ifdef WIN32 printf( "\r%*s" - "\r\033[0m%s", + "\r" LOG_COL_DEFAULT "%s", get_terminal_width(), " ", prompt_prefix); std::getline(std::cin, user_input); @@ -999,7 +1000,7 @@ static int generate_response(LlamaData & llama_data, const std::string & prompt, const bool stdout_a_terminal) { // Set response color if (stdout_a_terminal) { - printf("\033[33m"); + printf(LOG_COL_YELLOW); } if (generate(llama_data, prompt, response)) { @@ -1008,7 +1009,7 @@ static int generate_response(LlamaData & llama_data, const std::string & prompt, } // End response with color reset and newline - printf("\n%s", stdout_a_terminal ? "\033[0m" : ""); + printf("\n%s", stdout_a_terminal ? LOG_COL_DEFAULT : ""); return 0; } From 864a0b67a6c8f648c43ce8271f9cb2e12dd5df6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Sun, 2 Feb 2025 19:31:09 +0100 Subject: [PATCH 04/36] CUDA: use mma PTX instructions for FlashAttention (#11583) * CUDA: use mma PTX instructions for FlashAttention * __shfl_sync workaround for movmatrix * add __shfl_sync to HIP Co-authored-by: Diego Devesa --- Makefile | 2 +- ggml/include/ggml.h | 2 +- ggml/src/ggml-cuda/CMakeLists.txt | 2 +- ggml/src/ggml-cuda/common.cuh | 6 +- ggml/src/ggml-cuda/fattn-common.cuh | 181 ++++- ggml/src/ggml-cuda/fattn-mma-f16.cuh | 637 +++++++++++++++++ ggml/src/ggml-cuda/fattn-tile-f16.cu | 24 +- ggml/src/ggml-cuda/fattn-tile-f32.cu | 19 +- ggml/src/ggml-cuda/fattn-vec-f16.cuh | 9 +- ggml/src/ggml-cuda/fattn-vec-f32.cuh | 8 +- ggml/src/ggml-cuda/fattn-wmma-f16.cu | 648 ++++++++++++++++++ ggml/src/ggml-cuda/fattn-wmma-f16.cuh | 542 +-------------- ggml/src/ggml-cuda/fattn.cu | 174 ++--- ggml/src/ggml-cuda/mma.cuh | 335 +++++++-- ggml/src/ggml-cuda/mmq.cu | 2 +- ggml/src/ggml-cuda/mmq.cuh | 349 +++++----- .../fattn-mma-f16-instance-cpb16.cu | 10 + .../fattn-mma-f16-instance-cpb32.cu | 10 + .../fattn-mma-f16-instance-cpb64.cu | 10 + .../fattn-mma-f16-instance-cpb8.cu | 10 + .../fattn-wmma-f16-instance-kqfloat-cpb16.cu | 10 - .../fattn-wmma-f16-instance-kqfloat-cpb32.cu | 9 - .../fattn-wmma-f16-instance-kqhalf-cpb16.cu | 10 - .../fattn-wmma-f16-instance-kqhalf-cpb32.cu | 10 - .../fattn-wmma-f16-instance-kqhalf-cpb8.cu | 8 - .../template-instances/generate_cu_files.py | 24 +- ggml/src/ggml-cuda/vendors/hip.h | 1 + ggml/src/ggml-hip/CMakeLists.txt | 2 +- ggml/src/ggml-musa/CMakeLists.txt | 2 +- 29 files changed, 2058 insertions(+), 998 deletions(-) create mode 100644 ggml/src/ggml-cuda/fattn-mma-f16.cuh create mode 100644 ggml/src/ggml-cuda/fattn-wmma-f16.cu create mode 100644 ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb16.cu create mode 100644 ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb32.cu create mode 100644 ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb64.cu create mode 100644 ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb8.cu delete mode 100644 ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu delete mode 100644 ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu delete mode 100644 ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu delete mode 100644 ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu delete mode 100644 ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu diff --git a/Makefile b/Makefile index ef152d246..dc3de3cb1 100644 --- a/Makefile +++ b/Makefile @@ -596,7 +596,7 @@ ifdef GGML_RPC OBJ_GGML_EXT += ggml/src/ggml-rpc.o endif # GGML_RPC -OBJ_CUDA_TMPL = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu)) +OBJ_CUDA_TMPL = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-mma*.cu)) OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/mmq*.cu)) ifdef GGML_CUDA_FA_ALL_QUANTS diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index 1198dc1fd..5bd8d9c8b 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -1775,7 +1775,7 @@ extern "C" { struct ggml_tensor * a, int k); -#define GGML_KQ_MASK_PAD 32 +#define GGML_KQ_MASK_PAD 64 // q: [n_embd, n_batch, n_head, 1] // k: [n_embd, n_kv, n_head_kv, 1] diff --git a/ggml/src/ggml-cuda/CMakeLists.txt b/ggml/src/ggml-cuda/CMakeLists.txt index 14761650f..119fd39b8 100644 --- a/ggml/src/ggml-cuda/CMakeLists.txt +++ b/ggml/src/ggml-cuda/CMakeLists.txt @@ -28,7 +28,7 @@ if (CUDAToolkit_FOUND) list(APPEND GGML_HEADERS_CUDA "../../include/ggml-cuda.h") file(GLOB GGML_SOURCES_CUDA "*.cu") - file(GLOB SRCS "template-instances/fattn-wmma*.cu") + file(GLOB SRCS "template-instances/fattn-mma*.cu") list(APPEND GGML_SOURCES_CUDA ${SRCS}) file(GLOB SRCS "template-instances/mmq*.cu") list(APPEND GGML_SOURCES_CUDA ${SRCS}) diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh index 8d8d3932e..88be8fc8a 100644 --- a/ggml/src/ggml-cuda/common.cuh +++ b/ggml/src/ggml-cuda/common.cuh @@ -148,7 +148,7 @@ typedef float2 dfloat2; #endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= GGML_CUDA_CC_VOLTA #if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= GGML_CUDA_CC_TURING -#define INT8_MMA_AVAILABLE +#define NEW_MMA_AVAILABLE #endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= GGML_CUDA_CC_TURING #if !(defined(GGML_USE_MUSA) && __MUSA_ARCH__ <= GGML_CUDA_CC_QY1) @@ -159,11 +159,13 @@ static constexpr bool fast_fp16_available(const int cc) { return cc >= GGML_CUDA_CC_PASCAL && cc != 610; } +// Any FP16 tensor cores are available. static constexpr bool fp16_mma_available(const int cc) { return cc < GGML_CUDA_CC_OFFSET_AMD && cc >= GGML_CUDA_CC_VOLTA; } -static constexpr bool int8_mma_available(const int cc) { +// Volta technically had FP16 tensor cores but they work very differently compared to Turing and later. +static constexpr bool new_mma_available(const int cc) { return cc < GGML_CUDA_CC_OFFSET_AMD && cc >= GGML_CUDA_CC_TURING; } diff --git a/ggml/src/ggml-cuda/fattn-common.cuh b/ggml/src/ggml-cuda/fattn-common.cuh index ee9752da6..cfd7c0f44 100644 --- a/ggml/src/ggml-cuda/fattn-common.cuh +++ b/ggml/src/ggml-cuda/fattn-common.cuh @@ -516,6 +516,104 @@ constexpr __device__ dequantize_1_f32_t get_dequantize_1_f32(ggml_type type_V) { nullptr; } +template // D == head size +#if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) +__launch_bounds__(D, 1) +#endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) +static __global__ void flash_attn_stream_k_fixup( + float * __restrict__ dst, const float2 * __restrict__ dst_fixup, const int ne01, const int ne02, const int ne11) { + const float * dst_fixup_data = ((const float *) dst_fixup) + gridDim.x*(2*2*ncols); + + const int iter_k = ne11 / KQ_stride; + const int iter_j = (ne01 + (ncols - 1)) / ncols; + + const int bidx0 = blockIdx.x; + + const int kbc0 = (bidx0 + 0)*iter_k*iter_j*ne02 / gridDim.x; + const int kbc0_stop = (bidx0 + 1)*iter_k*iter_j*ne02 / gridDim.x; + + const bool did_not_have_any_data = kbc0 == kbc0_stop; + const bool wrote_beginning_of_tile = kbc0 % iter_k == 0; + const bool did_not_write_last = kbc0/iter_k == kbc0_stop/iter_k && kbc0_stop % iter_k != 0; + if (did_not_have_any_data || wrote_beginning_of_tile || did_not_write_last) { + return; + } + + const int channel = kbc0 / (iter_k*iter_j); + const int jt = (kbc0 - channel*iter_k*iter_j) / iter_k; + + dst += jt*ncols*ne02*D + channel*D; + + // Load the partial result that needs a fixup: + float dst_val[ncols] = {0.0f}; + float max_val[ncols] = {0.0f}; + float rowsum[ncols] = {0.0f}; +#pragma unroll + for (int j = 0; j < ncols; ++j) { + if (jt*ncols + j >= ne01) { + break; + } + dst_val[j] = dst[j*ne02*D + threadIdx.x]; + + const float2 tmp = dst_fixup[bidx0*ncols + j]; + max_val[j] = tmp.x; + rowsum[j] = tmp.y; + } + + // Iterate over previous blocks and compute the combined results. + // All CUDA blocks that get here must have a previous block that needs a fixup. + int bidx = bidx0 - 1; + int kbc_stop = kbc0; + while(true) { + const int kbc = bidx*iter_k*iter_j*ne02 / gridDim.x; + if (kbc == kbc_stop) { // Did not have any data. + bidx--; + kbc_stop = kbc; + continue; + } + +#pragma unroll + for (int j = 0; j < ncols; ++j) { + if (jt*ncols + j >= ne01) { + break; + } + const float dst_add = dst_fixup_data[bidx*ncols*D + j*D + threadIdx.x]; + + const float2 tmp = dst_fixup[(gridDim.x + bidx)*ncols + j]; + + // Scale the current and new value accumulators depending on the max. values. + const float max_val_new = fmaxf(max_val[j], tmp.x); + + const float diff_val = max_val[j] - max_val_new; + const float diff_add = tmp.x - max_val_new; + + const float scale_val = diff_val >= SOFTMAX_FTZ_THRESHOLD ? expf(diff_val) : 0.0f; + const float scale_add = diff_add >= SOFTMAX_FTZ_THRESHOLD ? expf(diff_add) : 0.0f; + + dst_val[j] = scale_val*dst_val[j] + scale_add*dst_add; + rowsum[j] = scale_val*rowsum[j] + scale_add*tmp.y; + + max_val[j] = max_val_new; + } + + // If this block started in a previous tile we are done and don't need to combine additional partial results. + if (kbc % iter_k == 0 || kbc/iter_k < kbc0/iter_k) { + break; + } + bidx--; + kbc_stop = kbc; + } + + // Write back final result: +#pragma unroll + for (int j = 0; j < ncols; ++j) { + if (jt*ncols + j >= ne01) { + return; + } + dst[j*ne02*D + threadIdx.x] = dst_val[j] / rowsum[j]; + } +} + template // D == head size #if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) __launch_bounds__(D, 1) @@ -581,10 +679,11 @@ static void on_no_fattn_vec_case(const int D) { } } -template +// parallel_blocks == 0 is stream-k decomposition +template void launch_fattn( ggml_backend_cuda_context & ctx, ggml_tensor * dst, fattn_kernel_t fattn_kernel, - const int nwarps, const int cols_per_block, const bool need_f16_K, const bool need_f16_V + const int nwarps, const size_t nbytes_shared, const bool need_f16_K, const bool need_f16_V ) { const ggml_tensor * Q = dst->src[0]; const ggml_tensor * K = dst->src[1]; @@ -603,20 +702,23 @@ void launch_fattn( GGML_ASSERT(K->ne[1] % FATTN_KQ_STRIDE == 0 && "Incorrect KV cache padding."); + GGML_ASSERT(Q->ne[3] == 1); + ggml_cuda_pool & pool = ctx.pool(); cudaStream_t main_stream = ctx.stream(); + const int nsm = ggml_cuda_info().devices[ggml_cuda_get_device()].nsm; ggml_cuda_pool_alloc K_f16(pool); ggml_cuda_pool_alloc V_f16(pool); ggml_cuda_pool_alloc dst_tmp(pool); ggml_cuda_pool_alloc dst_tmp_meta(pool); - char * K_data = (char *) K->data; + const char * K_data = (const char *) K->data; size_t nb11 = K->nb[1]; size_t nb12 = K->nb[2]; size_t nb13 = K->nb[3]; - char * V_data = (char *) V->data; + const char * V_data = (const char *) V->data; size_t nb21 = V->nb[1]; size_t nb22 = V->nb[2]; size_t nb23 = V->nb[3]; @@ -649,39 +751,60 @@ void launch_fattn( nb23 = nb23*bs*sizeof(half)/ts; } - if (parallel_blocks > 1) { - dst_tmp.alloc(parallel_blocks*ggml_nelements(KQV)); - dst_tmp_meta.alloc(parallel_blocks*ggml_nrows(KQV)); - } + const int ntiles_x = ((Q->ne[1] + cols_per_block - 1) / cols_per_block); + const int ntiles_total = ntiles_x*Q->ne[2]*Q->ne[3]; const dim3 block_dim(WARP_SIZE, nwarps, 1); - const dim3 blocks_num(parallel_blocks*((Q->ne[1] + cols_per_block - 1) / cols_per_block), Q->ne[2], Q->ne[3]); - const int shmem = 0; + dim3 blocks_num; + if (parallel_blocks == 0) { + // For short contexts it can be faster to have the SMs work on whole tiles because this lets us skip the fixup. + const int tiles_nwaves = (ntiles_total - nsm - 1) / nsm; + const bool tiles_inefficient = 3*nsm < 2*tiles_nwaves*ntiles_total; + const bool short_context = K->ne[1] < 4096; + + const int nblocks_stream_k = 2*nsm; + + blocks_num.x = short_context && !tiles_inefficient ? ntiles_total : nblocks_stream_k; + blocks_num.y = 1; + blocks_num.z = 1; + + dst_tmp_meta.alloc(blocks_num.x*cols_per_block * (2*2 + D) * sizeof(float)); + } else { + blocks_num.x = parallel_blocks*ntiles_x; + blocks_num.y = Q->ne[2]; + blocks_num.z = Q->ne[3]; + + if (parallel_blocks > 1) { + dst_tmp.alloc(parallel_blocks*ggml_nelements(KQV)); + dst_tmp_meta.alloc(parallel_blocks*ggml_nrows(KQV)); + } + } + float scale = 1.0f; float max_bias = 0.0f; float logit_softcap = 0.0f; - memcpy(&scale, (float *) KQV->op_params + 0, sizeof(float)); - memcpy(&max_bias, (float *) KQV->op_params + 1, sizeof(float)); - memcpy(&logit_softcap, (float *) KQV->op_params + 2, sizeof(float)); + memcpy(&scale, (const float *) KQV->op_params + 0, sizeof(float)); + memcpy(&max_bias, (const float *) KQV->op_params + 1, sizeof(float)); + memcpy(&logit_softcap, (const float *) KQV->op_params + 2, sizeof(float)); if (logit_softcap != 0.0f) { scale /= logit_softcap; } const uint32_t n_head = Q->ne[2]; - const uint32_t n_head_log2 = 1u << (uint32_t) floorf(log2f((float) n_head)); + const uint32_t n_head_log2 = 1u << uint32_t(floorf(log2f(float(n_head)))); const float m0 = powf(2.0f, -(max_bias ) / n_head_log2); const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_head_log2); - fattn_kernel<<>>( + fattn_kernel<<>>( (const char *) Q->data, K_data, V_data, mask ? ((const char *) mask->data) : nullptr, - (parallel_blocks) == 1 ? (float *) KQV->data : dst_tmp.ptr, dst_tmp_meta.ptr, + (parallel_blocks) > 1 ? dst_tmp.ptr : (float *) KQV->data, dst_tmp_meta.ptr, scale, max_bias, m0, m1, n_head_log2, logit_softcap, Q->ne[0], Q->ne[1], Q->ne[2], Q->ne[3], K->ne[0], K->ne[1], K->ne[2], K->ne[3], @@ -693,16 +816,22 @@ void launch_fattn( ); CUDA_CHECK(cudaGetLastError()); - if ((parallel_blocks) == 1) { - return; + if constexpr (parallel_blocks == 0) { + if (blocks_num.x % ntiles_total != 0) { // Fixup is only needed if the SMs work on fractional tiles. + const dim3 block_dim_combine(D, 1, 1); + const dim3 blocks_num_combine = blocks_num; + + flash_attn_stream_k_fixup + <<>> + ((float *) KQV->data, dst_tmp_meta.ptr, Q->ne[1], Q->ne[2], K->ne[1]); + } + } else if constexpr (parallel_blocks > 1) { + const dim3 block_dim_combine(D, 1, 1); + const dim3 blocks_num_combine(Q->ne[1], blocks_num.y, blocks_num.z); + + flash_attn_combine_results + <<>> + (dst_tmp.ptr, dst_tmp_meta.ptr, (float *) KQV->data); } - - const dim3 block_dim_combine(D, 1, 1); - const dim3 blocks_num_combine(Q->ne[1], blocks_num.y, blocks_num.z); - const int shmem_combine = 0; - - flash_attn_combine_results - <<>> - (dst_tmp.ptr, dst_tmp_meta.ptr, (float *) KQV->data); CUDA_CHECK(cudaGetLastError()); } diff --git a/ggml/src/ggml-cuda/fattn-mma-f16.cuh b/ggml/src/ggml-cuda/fattn-mma-f16.cuh new file mode 100644 index 000000000..05bc91a3b --- /dev/null +++ b/ggml/src/ggml-cuda/fattn-mma-f16.cuh @@ -0,0 +1,637 @@ +#include "common.cuh" +#include "mma.cuh" +#include "fattn-common.cuh" + +template +static __device__ __forceinline__ void flash_attn_ext_f16_process_tile( + const float2 * const __restrict__ Q_f2, + const half2 * const __restrict__ K_h2, + const half2 * const __restrict__ V_h2, + const half * const __restrict__ maskh, + float2 * const __restrict__ dstk, + float2 * const __restrict__ dstk_fixup, + const float scale, + const float slope, + const float logit_softcap, + const int ne00, + const int ne01, + const int ne02, + const int ne03, + const int ne10, + const int ne11, + const int ne12, + const int ne13, + const int ne31, + const int nb31, + const int nb01, + const int nb02, + const int nb03, + const int nb11, + const int nb12, + const int nb13, + const int nb21, + const int nb22, + const int nb23, + const int ne0, + const int ne1, + const int ne2, + const int ne3, + const int jt, + const int kb0_start, + const int kb0_stop) { +#ifdef NEW_MMA_AVAILABLE + //In this kernel Q, K, V are matrices while i, j, k are matrix indices. + + typedef mma_A_I16K8 mma_A; + typedef mma_B_J8K8 mma_B; + typedef mma_C_I16J8 mma_C_KQ; + typedef mma_C_I16J8 mma_C_VKQ; + + static_assert(nwarps*mma_B::J % ncols == 0, "bad nwarps"); + constexpr int np = nwarps*mma_B::J / ncols; // Number of parallel CUDA warps per Q column. + + static_assert(D % nwarps == 0, "bad D"); + static_assert(KQ_stride % nwarps == 0, "bad KQ_stride"); + + constexpr int D2_padded = D/2 + 4; // Size of D in half2, padded to avoid shared memory bank conflicts. + extern __shared__ half2 tile_KV[]; // Temporary shared buffer for loading K/V data with KQ_stride*D logical elements. + + const int stride_Q = nb01 / sizeof(float2); + const int stride_KV = nb11 / sizeof(half2); + const int stride_mask = nb31 / sizeof(half); + + mma_B Q_B[D/(2*mma_B::K)]; + mma_C_VKQ VKQ_C[D/mma_C_VKQ::I]; + + float2 KQ_rowsum = {0.0f, 0.0f}; + float2 KQ_max = {-FLT_MAX/2.0f, -FLT_MAX/2.0f}; + float2 KQ_max_scale = {0.0f, 0.0f}; + + // Temporarily load Q data into tile_KV, will be loaded into registers afterwards. + // The loading is done with decreasing granularity for D for better memory bandwidth. + const half2 scale_h2 = make_half2(scale, scale); +#pragma unroll + for (int stride_k : {WARP_SIZE, WARP_SIZE/2, WARP_SIZE/4}) { + const int k0_start = stride_k == WARP_SIZE ? 0 : D/2 - (D/2) % (2*stride_k); + const int k0_stop = D/2 - (D/2) % (1*stride_k); + const int stride_j = WARP_SIZE / stride_k; + + if (nwarps*stride_j > ncols && threadIdx.y*stride_j >= ncols) { + break; + } + +#pragma unroll + for (int j0 = 0; j0 < ncols; j0 += nwarps*stride_j) { + const int j = j0 + threadIdx.y*stride_j + (stride_k == WARP_SIZE ? 0 : threadIdx.x / stride_k); + + if (jt*ncols + j < ne01) { +#pragma unroll + for (int k0 = k0_start; k0 < k0_stop; k0 += stride_k) { + const int k = k0 + (stride_k == WARP_SIZE ? threadIdx.x : threadIdx.x % stride_k); + + const float2 tmp = Q_f2[(jt*ncols + j)*stride_Q + k]; + tile_KV[j*D2_padded + k] = scale_h2 * make_half2(tmp.x, tmp.y); + } + } else { +#pragma unroll + for (int k0 = k0_start; k0 < k0_stop; k0 += stride_k) { + const int k = k0 + (stride_k == WARP_SIZE ? threadIdx.x : threadIdx.x % stride_k); + + tile_KV[j*D2_padded + k] = make_half2(0.0f, 0.0f); + } + } + } + } + + __syncthreads(); + + { + const int j0 = (threadIdx.y / np) * mma_B::J; + +#pragma unroll + for (int k0 = 0; k0 < D/2; k0 += mma_B::K) { + Q_B[k0/mma_B::K].load_ldmatrix(tile_KV + j0*D2_padded + k0, D2_padded); + } + } + + __syncthreads(); + + // Iterate over ne11 == previous tokens: + for (int kb0 = kb0_start; kb0 < kb0_stop; ++kb0) { + const int k_VKQ_0 = kb0*KQ_stride; + mma_C_KQ KQ_C[KQ_stride/(np*mma_C_KQ::I)]; + + // Load K data into tile with decreasing granularity for D for better memory bandwidth: + static_assert(KQ_stride % (4*nwarps) == 0, "out of bounds"); +#pragma unroll + for (int stride_k : {WARP_SIZE, WARP_SIZE/2, WARP_SIZE/4}) { + const int k0_start = stride_k == WARP_SIZE ? 0 : D/2 - (D/2) % (2*stride_k); + const int k0_stop = D/2 - (D/2) % (1*stride_k); + const int stride_i = WARP_SIZE / stride_k; + +#pragma unroll + for (int i_KQ_0 = 0; i_KQ_0 < KQ_stride; i_KQ_0 += nwarps*stride_i) { + const int i_KQ = i_KQ_0 + threadIdx.y*stride_i + (stride_k == WARP_SIZE ? 0 : threadIdx.x / stride_k); + +#pragma unroll + for (int k_KQ_0 = k0_start; k_KQ_0 < k0_stop; k_KQ_0 += stride_k) { + const int k_KQ = k_KQ_0 + (stride_k == WARP_SIZE ? threadIdx.x : threadIdx.x % stride_k); + + tile_KV[i_KQ*D2_padded + k_KQ] = K_h2[(k_VKQ_0 + i_KQ)*stride_KV + k_KQ]; + } + } + } + + __syncthreads(); + + // Calculate tile of KQ: +#pragma unroll + for (int i_KQ_00 = 0; i_KQ_00 < KQ_stride; i_KQ_00 += np*mma_A::I) { + const int i_KQ_0 = i_KQ_00 + (threadIdx.y % np)*mma_A::I; +#pragma unroll + for (int k_KQ_0 = 0; k_KQ_0 < D/2; k_KQ_0 += mma_A::K) { + mma_A K_A; + K_A.load_ldmatrix(tile_KV + i_KQ_0*D2_padded + k_KQ_0, D2_padded); + KQ_C[i_KQ_00/(np*mma_A::I)].mma(K_A, Q_B[k_KQ_0/mma_A::K]); + } + } + + __syncthreads(); + + if (use_logit_softcap) { + static_assert(KQ_stride % (np*mma_C_KQ::I) == 0, "bad loop size"); +#pragma unroll + for (int i = 0; i < KQ_stride/(np*mma_C_KQ::I); ++i) { +#pragma unroll + for (int l = 0; l < mma_C_KQ::ne; ++l) { + KQ_C[i].x[l] = logit_softcap*tanhf(KQ_C[i].x[l]); + } + } + } + + if (maskh) { + static_assert(KQ_stride % (np *mma_C_KQ::I) == 0, "bad loop size"); + static_assert(ncols % (nwarps/np*mma_C_KQ::J) == 0, "bad loop size"); +#pragma unroll + for (int i00 = 0; i00 < KQ_stride; i00 += np*mma_C_KQ::I) { + const int i0 = i00 + (threadIdx.y % np)*mma_C_KQ::I; +#pragma unroll + for (int l = 0; l < mma_C_KQ::ne; ++l) { + const int i = i0 + mma_C_KQ::get_i(l); + const int j = (threadIdx.y / np)*mma_C_KQ::J + mma_C_KQ::get_j(l); + + KQ_C[i00/(np*mma_C_KQ::I)].x[l] += slope*__half2float(maskh[j*stride_mask + k_VKQ_0 + i]); + } + } + } + + // Calculate softmax for each KQ column using the current max. value. + // The divisor is stored in KQ_rowsum and will be applied at the end. + float2 KQ_max_new = KQ_max; + static_assert(KQ_stride % (np*mma_C_KQ::I) == 0, "bad loop size"); +#pragma unroll + for (int k = 0; k < KQ_stride/(np*mma_C_KQ::I); ++k) { +#pragma unroll + for (int l0 = 0; l0 < mma_C_KQ::ne; l0 += 2) { + KQ_max_new.x = fmaxf(KQ_max_new.x, KQ_C[k].x[l0 + 0]); + KQ_max_new.y = fmaxf(KQ_max_new.y, KQ_C[k].x[l0 + 1]); + } + } + + // Values per KQ column are spread across 8 threads, does not need full warp reduce: +#pragma unroll + for (int offset = 16; offset > 2; offset >>= 1) { + KQ_max_new.x = fmaxf(KQ_max_new.x, __shfl_xor_sync(0xFFFFFFFF, KQ_max_new.x, offset, WARP_SIZE)); + KQ_max_new.y = fmaxf(KQ_max_new.y, __shfl_xor_sync(0xFFFFFFFF, KQ_max_new.y, offset, WARP_SIZE)); + } + + { + const float2 diff = make_float2(KQ_max.x - KQ_max_new.x, KQ_max.y - KQ_max_new.y); + KQ_max_scale = make_float2(expf(diff.x), expf(diff.y)); + if (diff.x <= SOFTMAX_FTZ_THRESHOLD) { + KQ_max_scale.x = 0.0f; + } + if (diff.y <= SOFTMAX_FTZ_THRESHOLD) { + KQ_max_scale.y = 0.0f; + } + KQ_max = KQ_max_new; + } + + float2 KQ_rowsum_add = make_float2(0.0f, 0.0f); + static_assert(KQ_stride % (np*mma_C_KQ::I) == 0, "bad loop size"); +#pragma unroll + for (int k = 0; k < KQ_stride/(np*mma_C_KQ::I); ++k) { +#pragma unroll + for (int l = 0; l < mma_C_KQ::ne; ++l) { + const float KQ_max_l = l % 2 == 0 ? KQ_max.x : KQ_max.y; + const float diff = KQ_C[k].x[l] - KQ_max_l; + KQ_C[k].x[l] = expf(diff); + if (diff <= SOFTMAX_FTZ_THRESHOLD) { + KQ_C[k].x[l] = 0.0f; + } + + if (l % 2 == 0) { + KQ_rowsum_add.x += KQ_C[k].x[l]; + } else { + KQ_rowsum_add.y += KQ_C[k].x[l]; + } + } + } + + // Scale previous KQ_rowsum to account for a potential increase in KQ_max: + KQ_rowsum.x = KQ_max_scale.x*KQ_rowsum.x + KQ_rowsum_add.x; + KQ_rowsum.y = KQ_max_scale.y*KQ_rowsum.y + KQ_rowsum_add.y; + + const half2 KQ_max_scale_h2 = make_half2(KQ_max_scale.x, KQ_max_scale.y); +#pragma unroll + for (int i = 0; i < D/mma_C_VKQ::I; ++i) { +#pragma unroll + for (int l = 0; l < mma_C_VKQ::ne; ++l) { + VKQ_C[i].x[l] *= KQ_max_scale_h2; + } + } + + // Convert KQ C tiles into B tiles for VKQ calculation: + mma_B B[KQ_stride/(np*2*mma_B::K)]; + static_assert(KQ_stride % (np*2*mma_B::K) == 0, "bad loop size"); +#pragma unroll + for (int k = 0; k < KQ_stride/(np*2*mma_B::K); ++k) { + B[k] = KQ_C[k].to_mma_B(); + } + + // Load V data into tile with decreasing granularity for D for better memory bandwidth: + static_assert(KQ_stride % (4*nwarps) == 0, "out of bounds"); +#pragma unroll + for (int stride_i : {WARP_SIZE, WARP_SIZE/2, WARP_SIZE/4}) { + const int i0_start = stride_i == WARP_SIZE ? 0 : D/2 - (D/2) % (2*stride_i); + const int i0_stop = D/2 - (D/2) % (1*stride_i); + const int stride_k = WARP_SIZE / stride_i; + +#pragma unroll + for (int k_V_0 = 0; k_V_0 < KQ_stride; k_V_0 += nwarps*stride_k) { + const int k_V = k_V_0 + threadIdx.y*stride_k + (stride_i == WARP_SIZE ? 0 : threadIdx.x / stride_i); + +#pragma unroll + for (int i_V_0 = i0_start; i_V_0 < i0_stop; i_V_0 += stride_i) { + const int i_V = i_V_0 + (stride_i == WARP_SIZE ? threadIdx.x : threadIdx.x % stride_i); + + tile_KV[k_V*D2_padded + i_V] = V_h2[(k_VKQ_0 + k_V)*stride_KV + i_V]; + } + } + } + + __syncthreads(); + + // Calculate VKQ tile: +#pragma unroll + for (int i_VKQ_0 = 0; i_VKQ_0 < D; i_VKQ_0 += mma_C_VKQ::I) { + static_assert((KQ_stride/2) % (np*mma_A::K) == 0, "bad loop size"); +#pragma unroll + for (int k00 = 0; k00 < KQ_stride/2; k00 += np*mma_A::K) { + const int k0 = k00 + (threadIdx.y % np)*mma_A::K; + + mma_A A; + A.load_ldmatrix_trans(tile_KV + 2*k0*D2_padded + i_VKQ_0/2, D2_padded); + VKQ_C[i_VKQ_0/mma_C_VKQ::I].mma(A, B[k00/(np*mma_A::K)]); + } + } + + __syncthreads(); + } + + // Finally, sum up partial KQ rowsums. + // The partial sums are spread across 8 threads each, does not need full reduce. +#pragma unroll + for (int offset = 16; offset > 2; offset >>= 1) { + KQ_rowsum.x += __shfl_xor_sync(0xFFFFFFFF, KQ_rowsum.x, offset, WARP_SIZE); + KQ_rowsum.y += __shfl_xor_sync(0xFFFFFFFF, KQ_rowsum.y, offset, WARP_SIZE); + } + + // Write VKQ accumulators to shared memory in column-major format. + // It's faster to do small writes to shared memory, then large write to VRAM than to do small writes to VRAM. + // Also for np > 1 the combination is done via these values in shared memory. + const int j_cwd = threadIdx.y*mma_B::J + mma_B::get_j(-1); // j combine write data +#pragma unroll + for (int k0 = 0; k0 < D/2; k0 += mma_B::K) { + const mma_B B = VKQ_C[k0/mma_B::K].to_mma_B(); // Conversion of C to B matrix puts it in column-major format. + +#pragma unroll + for (int l = 0; l < mma_B::ne; ++l) { + const int k = k0 + mma_B::get_k(l); + + tile_KV[j_cwd*D2_padded + k] = B.x[l]; + } + } + + const int j_cwmo = (threadIdx.x % (2*mma_C_VKQ::J)) / mma_C_VKQ::J; // j combine write meta offset + const int j_cwm = threadIdx.y*(2*mma_C_VKQ::J) + 2*mma_C_VKQ::get_j(-1) + j_cwmo; // j combine write meta + const float2 KQ_cmr = make_float2(((const float *) &KQ_max)[j_cwmo], ((const float *) &KQ_rowsum)[j_cwmo]); // KQ combine max rowsum + + if (((!needs_fixup && !is_fixup) || np > 1) && threadIdx.x < 2*mma_C_VKQ::J) { + // Use the 16 bytes of padding in each row to store the meta data: KQ max, KQ rowsum, KQ max scale. + ((float2 *) tile_KV)[j_cwm*(D2_padded/2) + D/4] = KQ_cmr; + } + + __syncthreads(); + + static_assert(np == 1 || np == 2 || np == 4, "bad np"); + if (np == 1) { + // No combination is needed, the meta data can be directly written from registers to VRAM. + if (needs_fixup && threadIdx.x < mma_B::J) { + float2 * dstk_fixup_meta = dstk_fixup + blockIdx.x*ncols; + dstk_fixup_meta[j_cwm] = KQ_cmr; + } + if (is_fixup && threadIdx.x < mma_B::J) { + float2 * dstk_fixup_meta = dstk_fixup + (gridDim.x + blockIdx.x)*ncols; + dstk_fixup_meta[j_cwm] = KQ_cmr; + } + } else if (threadIdx.y % np == 0) { + // Combine the meta data for parallel warps via shared memory. + // Warps with threadIdx.y % np != 0 must NOT return early. + // All threads must return simultaneously to avoid race conditions with work on the next tile. + + float * meta_j = (float *) tile_KV + (threadIdx.y*mma_B::J + threadIdx.x)*D2_padded + D/2; + + float KQ_cm = -FLT_MAX/2; // KQ combine max per parallel warp. + if (np*mma_B::J == WARP_SIZE || threadIdx.x < np*mma_B::J) { + KQ_cm = meta_j[0]; + } + + float KQ_cmn = KQ_cm; // KQ combine max new, max between all parallel warps. +#pragma unroll + for (int offset = np*mma_B::J/2; offset >= mma_B::J; offset >>= 1) { + KQ_cmn = fmaxf(KQ_cmn, __shfl_xor_sync(0xFFFFFFFF, KQ_cmn, offset, WARP_SIZE)); + } + + const float KQ_cms = expf(KQ_cm - KQ_cmn); // KQ combine max scale per warp. + float KQ_crs = 0.0f; // KQ combine rowsum, scaled sum of all parallel warps. + if (np*mma_B::J == WARP_SIZE || threadIdx.x < np*mma_B::J) { + KQ_crs = KQ_cms*meta_j[1]; + } +#pragma unroll + for (int offset = np*mma_B::J/2; offset >= mma_B::J; offset >>= 1) { + KQ_crs += __shfl_xor_sync(0xFFFFFFFF, KQ_crs, offset, WARP_SIZE); + } + + // Write back combined meta data: + if (np*mma_B::J == WARP_SIZE || threadIdx.x < np*mma_B::J) { + meta_j[0] = KQ_cmn; // Combined max. KQ values. + meta_j[1] = KQ_crs; // Combined KQ rowsums. + meta_j[2] = KQ_cms; // KQ max scales per parallel warp. + } + if (needs_fixup && threadIdx.x < mma_B::J) { + float2 * dstk_fixup_meta = dstk_fixup + blockIdx.x*ncols; + dstk_fixup_meta[(threadIdx.y/np)*mma_B::J + threadIdx.x] = make_float2(KQ_cmn, KQ_crs); + } + if (is_fixup && threadIdx.x < mma_B::J) { + float2 * dstk_fixup_meta = dstk_fixup + (gridDim.x + blockIdx.x)*ncols; + dstk_fixup_meta[(threadIdx.y/np)*mma_B::J + threadIdx.x] = make_float2(KQ_cmn, KQ_crs); + } + } + + if (np > 1) { + __syncthreads(); + } + + if (np == 1 || threadIdx.y % np == 0) { + // The first 2*2*gridDim.x*ncols floats in dstk_fixup are for storing max. values and row sums. + // The values after that are for the partial results of the individual blocks. + float2 * dstk_fixup_data = dstk_fixup + gridDim.x*(2*ncols) + blockIdx.x*(ncols*(D/2)); + +#pragma unroll + for (int stride_k : {WARP_SIZE, WARP_SIZE/2, WARP_SIZE/4}) { + const int k0_start = stride_k == WARP_SIZE ? 0 : D/2 - (D/2) % (2*stride_k); + const int k0_stop = D/2 - (D/2) % (1*stride_k); + const int stride_j = WARP_SIZE / stride_k; + + if (nwarps*stride_j > ncols && threadIdx.y*stride_j >= ncols) { + break; + } + +#pragma unroll + for (int j0_dst = 0; j0_dst < ncols; j0_dst += (nwarps/np)*stride_j) { + const int j_dst = j0_dst + (threadIdx.y/np)*stride_j + (stride_k == WARP_SIZE ? 0 : threadIdx.x / stride_k); + const int j_tile_KV = (j_dst/mma_B::J)*(np*mma_B::J) + j_dst % mma_B::J; + + if (!is_fixup && jt*ncols + j_dst >= ne01) { + continue; + } + const float * meta_j = (const float *) tile_KV + j_tile_KV*D2_padded + D/2; +#pragma unroll + for (int k0 = k0_start; k0 < k0_stop; k0 += stride_k) { + const int k = k0 + (stride_k == WARP_SIZE ? threadIdx.x : threadIdx.x % stride_k); + + float2 dstk_val = make_float2(0.0f, 0.0f); +#pragma unroll + for (int ip = 0; ip < np; ++ip) { + const float KQ_crs = np == 1 ? 1.0f : meta_j[ip*mma_B::J*D2_padded + 2]; + const float2 dstk_val_add = __half22float2(tile_KV[(j_tile_KV + ip*mma_B::J)*D2_padded + k]); + dstk_val.x += dstk_val_add.x*KQ_crs; + dstk_val.y += dstk_val_add.y*KQ_crs; + } + + if (!needs_fixup && !is_fixup) { + const float KQ_rowsum_j = meta_j[1]; + dstk_val.x /= KQ_rowsum_j; + dstk_val.y /= KQ_rowsum_j; + } + + if (is_fixup) { + dstk_fixup_data[j_dst*(D/2) + k] = dstk_val; + } else { + dstk[(jt*ncols + j_dst)*ne02*(D/2) + k] = dstk_val; + } + } + } + } + } + + if (np > 1) { + __syncthreads(); + } +#else + NO_DEVICE_CODE; +#endif // NEW_MMA_AVAILABLE +} + +template +#if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) +__launch_bounds__(nwarps*WARP_SIZE, 2) +#endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) +static __global__ void flash_attn_ext_f16( + const char * __restrict__ Q, + const char * __restrict__ K, + const char * __restrict__ V, + const char * __restrict__ mask, + float * __restrict__ dst, + float2 * __restrict__ dst_meta, + const float scale, + const float max_bias, + const float m0, + const float m1, + const uint32_t n_head_log2, + const float logit_softcap, + const int ne00, + const int ne01, + const int ne02, + const int ne03, + const int ne10, + const int ne11, + const int ne12, + const int ne13, + const int ne31, + const int nb31, + const int nb01, + const int nb02, + const int nb03, + const int nb11, + const int nb12, + const int nb13, + const int nb21, + const int nb22, + const int nb23, + const int ne0, + const int ne1, + const int ne2, + const int ne3) { + // Skip unused kernel variants for faster compilation: + if (use_logit_softcap && !(D == 128 || D == 256)) { + NO_DEVICE_CODE; + return; + } + + static_assert(FATTN_KQ_STRIDE % KQ_stride == 0, "bad KQ_stride"); + + const int gqa_ratio = ne02 / ne12; // With grouped query attention there are > 1 Q matrices per K, V matrix. + + const int iter_k = ne11 / KQ_stride; + const int iter_j = (ne01 + (ncols - 1)) / ncols; + + // kbc == k block continuous, current index in continuous ijk space. + int kbc = (blockIdx.x + 0)*iter_k*iter_j*ne02 / gridDim.x; + const int kbc_stop = (blockIdx.x + 1)*iter_k*iter_j*ne02 / gridDim.x; + + // If the seams of 2 CUDA blocks fall within an output tile their results need to be combined. + // For this we need to track both the block that starts the tile (needs_fixup) and the block that finishes the tile (is_fixup). + // In the most general case >2 seams can fall into the same tile. + + // kb0 == k start index when in the output tile. + int kb0_start = kbc % iter_k; + int kb0_stop = min(iter_k, kb0_start + kbc_stop - kbc); + while (kbc < kbc_stop && kb0_stop == iter_k) { + const int channel = kbc / (iter_k*iter_j); + const int jt = (kbc - channel*iter_k*iter_j) / iter_k; // j index of current tile. + + const float2 * Q_f2 = (const float2 *) (Q + nb02* channel); + const half2 * K_h2 = (const half2 *) (K + nb12*(channel / gqa_ratio)); + const half2 * V_h2 = (const half2 *) (V + nb12*(channel / gqa_ratio)); // K and V have same shape + const half * maskh = mask ? (const half *) mask + (nb31/sizeof(half))*jt*ncols : nullptr; + float2 * dstk = ((float2 *) dst) + channel*(D/2); + + const float slope = get_alibi_slope(max_bias, channel, n_head_log2, m0, m1); + + constexpr bool is_fixup = false; // All but (potentially) the last iterations write their data to dst rather than the fixup buffer. + if (kb0_start == 0) { + constexpr bool needs_fixup = false; // CUDA block is working on an entire tile. + flash_attn_ext_f16_process_tile + (Q_f2, K_h2, V_h2, maskh, dstk, dst_meta, scale, slope, logit_softcap, + ne00, ne01, ne02, ne03, ne10, ne11, ne12, ne13, ne31, nb31, nb01, nb02, nb03, nb11, nb12, nb13, nb21, nb22, nb23, ne0, ne1, ne2, ne3, + jt, kb0_start, kb0_stop); + } else { + constexpr bool needs_fixup = true; // CUDA block is working on the beginning of a tile. + flash_attn_ext_f16_process_tile + (Q_f2, K_h2, V_h2, maskh, dstk, dst_meta, scale, slope, logit_softcap, + ne00, ne01, ne02, ne03, ne10, ne11, ne12, ne13, ne31, nb31, nb01, nb02, nb03, nb11, nb12, nb13, nb21, nb22, nb23, ne0, ne1, ne2, ne3, + jt, kb0_start, kb0_stop); + } + + kbc += iter_k; + kbc -= kbc % iter_k; + + kb0_start = 0; + kb0_stop = min(iter_k, kbc_stop - kbc); + } + + if (kbc >= kbc_stop) { + return; + } + + const int channel = kbc / (iter_k*iter_j); + const int jt = (kbc - channel*iter_k*iter_j) / iter_k; // j index of current tile. + + const float2 * Q_f2 = (const float2 *) (Q + nb02* channel); + const half2 * K_h2 = (const half2 *) (K + nb12*(channel / gqa_ratio)); + const half2 * V_h2 = (const half2 *) (V + nb12*(channel / gqa_ratio)); // K and V have same shape + const half * maskh = mask ? (const half *) mask + (nb31/sizeof(half))*jt*ncols : nullptr; + float2 * dstk = ((float2 *) dst) + channel*(D/2); + + const float slope = get_alibi_slope(max_bias, channel, n_head_log2, m0, m1); + + constexpr bool is_fixup = true; // Last index writes its data to fixup buffer to avoid data races with other blocks. + constexpr bool needs_fixup = false; + flash_attn_ext_f16_process_tile + (Q_f2, K_h2, V_h2, maskh, dstk, dst_meta, scale, slope, logit_softcap, + ne00, ne01, ne02, ne03, ne10, ne11, ne12, ne13, ne31, nb31, nb01, nb02, nb03, nb11, nb12, nb13, nb21, nb22, nb23, ne0, ne1, ne2, ne3, + jt, kb0_start, kb0_stop); +} + +template +void ggml_cuda_flash_attn_ext_mma_f16_case(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + typedef mma_A_I16K8 mma_A; + typedef mma_B_J8K8 mma_B; + + static_assert(D % mma_B::K == 0, "bad D"); + static_assert(cols_per_block % mma_B::J == 0, "bad cols_per_block"); + + const ggml_tensor * KQV = dst; + + constexpr int KQ_stride = D <= 128 ? 64 : 32; + constexpr int nwarps = (KQ_stride == 32 && cols_per_block <= 16) ? + cols_per_block/mma_B::J * KQ_stride/mma_A::I : (cols_per_block <= 8 ? 4 : 8); + constexpr size_t nbytes_shared = std::max(KQ_stride, nwarps*mma_B::J) * (D + 8) * sizeof(half); + + float logit_softcap; + memcpy(&logit_softcap, (const float *) KQV->op_params + 2, sizeof(float)); + + fattn_kernel_t fattn_kernel; + if (logit_softcap == 0.0f) { + constexpr bool use_logit_softcap = false; + fattn_kernel = flash_attn_ext_f16; + } else { + constexpr bool use_logit_softcap = true; + fattn_kernel = flash_attn_ext_f16; + } + launch_fattn(ctx, dst, fattn_kernel, nwarps, nbytes_shared, true, true); +} + +#define DECL_FATTN_MMA_F16_CASE(D, cols_per_block) \ + template void ggml_cuda_flash_attn_ext_mma_f16_case \ + (ggml_backend_cuda_context & ctx, ggml_tensor * dst) \ + +extern DECL_FATTN_MMA_F16_CASE( 64, 8); +extern DECL_FATTN_MMA_F16_CASE( 80, 8); +extern DECL_FATTN_MMA_F16_CASE( 96, 8); +extern DECL_FATTN_MMA_F16_CASE(112, 8); +extern DECL_FATTN_MMA_F16_CASE(128, 8); +extern DECL_FATTN_MMA_F16_CASE(256, 8); + +extern DECL_FATTN_MMA_F16_CASE( 64, 16); +extern DECL_FATTN_MMA_F16_CASE( 80, 16); +extern DECL_FATTN_MMA_F16_CASE( 96, 16); +extern DECL_FATTN_MMA_F16_CASE(112, 16); +extern DECL_FATTN_MMA_F16_CASE(128, 16); +extern DECL_FATTN_MMA_F16_CASE(256, 16); + +extern DECL_FATTN_MMA_F16_CASE( 64, 32); +extern DECL_FATTN_MMA_F16_CASE( 80, 32); +extern DECL_FATTN_MMA_F16_CASE( 96, 32); +extern DECL_FATTN_MMA_F16_CASE(112, 32); +extern DECL_FATTN_MMA_F16_CASE(128, 32); +extern DECL_FATTN_MMA_F16_CASE(256, 32); + +extern DECL_FATTN_MMA_F16_CASE( 64, 64); +extern DECL_FATTN_MMA_F16_CASE( 80, 64); +extern DECL_FATTN_MMA_F16_CASE( 96, 64); +extern DECL_FATTN_MMA_F16_CASE(112, 64); +extern DECL_FATTN_MMA_F16_CASE(128, 64); +extern DECL_FATTN_MMA_F16_CASE(256, 64); diff --git a/ggml/src/ggml-cuda/fattn-tile-f16.cu b/ggml/src/ggml-cuda/fattn-tile-f16.cu index 4d314dacb..d4edbad07 100644 --- a/ggml/src/ggml-cuda/fattn-tile-f16.cu +++ b/ggml/src/ggml-cuda/fattn-tile-f16.cu @@ -45,7 +45,17 @@ static __global__ void flash_attn_tile_ext_f16( const int ne2, const int ne3) { #ifdef FP16_AVAILABLE + +#ifndef FLASH_ATTN_AVAILABLE + NO_DEVICE_CODE; + return; +#endif // FLASH_ATTN_AVAILABLE + // Skip unused kernel variants for faster compilation: +#ifdef FP16_MMA_AVAILABLE + NO_DEVICE_CODE; + return; +#endif // FP16_MMA_AVAILABLE if (use_logit_softcap && !(D == 128 || D == 256)) { NO_DEVICE_CODE; return; @@ -288,16 +298,18 @@ void launch_fattn_tile_f16_64_128(ggml_backend_cuda_context & ctx, ggml_tensor * const ggml_tensor * Q = dst->src[0]; switch (Q->ne[0]) { case 64: { - constexpr int D = 64; - constexpr int nwarps = 8; + constexpr int D = 64; + constexpr int nwarps = 8; + constexpr size_t nbytes_shared = 0; fattn_kernel_t fattn_kernel = flash_attn_tile_ext_f16; - launch_fattn(ctx, dst, fattn_kernel, nwarps, cols_per_block, true, true); + launch_fattn(ctx, dst, fattn_kernel, nwarps, nbytes_shared, true, true); } break; case 128: { - constexpr int D = 128; - constexpr int nwarps = 8; + constexpr int D = 128; + constexpr int nwarps = 8; + constexpr size_t nbytes_shared = 0; fattn_kernel_t fattn_kernel = flash_attn_tile_ext_f16; - launch_fattn(ctx, dst, fattn_kernel, nwarps, cols_per_block, true, true); + launch_fattn(ctx, dst, fattn_kernel, nwarps, nbytes_shared, true, true); } break; default: { GGML_ABORT("FlashAttention without tensor cores only supports head sizes 64 and 128."); diff --git a/ggml/src/ggml-cuda/fattn-tile-f32.cu b/ggml/src/ggml-cuda/fattn-tile-f32.cu index bb3360447..0d274f332 100644 --- a/ggml/src/ggml-cuda/fattn-tile-f32.cu +++ b/ggml/src/ggml-cuda/fattn-tile-f32.cu @@ -48,7 +48,12 @@ static __global__ void flash_attn_tile_ext_f32( NO_DEVICE_CODE; return; #endif // FLASH_ATTN_AVAILABLE + // Skip unused kernel variants for faster compilation: +#ifdef FP16_MMA_AVAILABLE + NO_DEVICE_CODE; + return; +#endif // FP16_MMA_AVAILABLE if (use_logit_softcap && !(D == 128 || D == 256)) { NO_DEVICE_CODE; return; @@ -287,16 +292,18 @@ void launch_fattn_tile_f32_64_128(ggml_backend_cuda_context & ctx, ggml_tensor * const ggml_tensor * Q = dst->src[0]; switch (Q->ne[0]) { case 64: { - constexpr int D = 64; - constexpr int nwarps = 8; + constexpr int D = 64; + constexpr int nwarps = 8; + constexpr size_t nbytes_shared = 0; fattn_kernel_t fattn_kernel = flash_attn_tile_ext_f32; - launch_fattn(ctx, dst, fattn_kernel, nwarps, cols_per_block, true, true); + launch_fattn(ctx, dst, fattn_kernel, nwarps, nbytes_shared, true, true); } break; case 128: { - constexpr int D = 128; - constexpr int nwarps = 8; + constexpr int D = 128; + constexpr int nwarps = 8; + constexpr size_t nbytes_shared = 0; fattn_kernel_t fattn_kernel = flash_attn_tile_ext_f32; - launch_fattn(ctx, dst, fattn_kernel, nwarps, cols_per_block, true, true); + launch_fattn(ctx, dst, fattn_kernel, nwarps, nbytes_shared, true, true); } break; default: { GGML_ABORT("FlashAttention without tensor cores only supports head sizes 64 and 128."); diff --git a/ggml/src/ggml-cuda/fattn-vec-f16.cuh b/ggml/src/ggml-cuda/fattn-vec-f16.cuh index 34a2992c7..d9ac44246 100644 --- a/ggml/src/ggml-cuda/fattn-vec-f16.cuh +++ b/ggml/src/ggml-cuda/fattn-vec-f16.cuh @@ -42,6 +42,12 @@ static __global__ void flash_attn_vec_ext_f16( const int ne2, const int ne3) { #ifdef FP16_AVAILABLE + +#ifndef FLASH_ATTN_AVAILABLE + NO_DEVICE_CODE; + return; +#endif // FLASH_ATTN_AVAILABLE + // Skip unused kernel variants for faster compilation: if (use_logit_softcap && !(D == 128 || D == 256)) { NO_DEVICE_CODE; @@ -303,7 +309,8 @@ void ggml_cuda_flash_attn_ext_vec_f16_case_impl(ggml_backend_cuda_context & ctx, fattn_kernel_t fattn_kernel = flash_attn_vec_ext_f16; constexpr bool need_f16_K = D != 128; constexpr bool need_f16_V = D != 128 && D != 64; - launch_fattn(ctx, dst, fattn_kernel, nwarps, cols_per_block, need_f16_K, need_f16_V); + constexpr size_t nbytes_shared = 0; + launch_fattn(ctx, dst, fattn_kernel, nwarps, nbytes_shared, need_f16_K, need_f16_V); } template diff --git a/ggml/src/ggml-cuda/fattn-vec-f32.cuh b/ggml/src/ggml-cuda/fattn-vec-f32.cuh index a28fc8b7f..6ef8f9dcc 100644 --- a/ggml/src/ggml-cuda/fattn-vec-f32.cuh +++ b/ggml/src/ggml-cuda/fattn-vec-f32.cuh @@ -41,6 +41,11 @@ static __global__ void flash_attn_vec_ext_f32( const int ne1, const int ne2, const int ne3) { +#ifndef FLASH_ATTN_AVAILABLE + NO_DEVICE_CODE; + return; +#endif // FLASH_ATTN_AVAILABLE + // Skip unused kernel variants for faster compilation: if (use_logit_softcap && !(D == 128 || D == 256)) { NO_DEVICE_CODE; @@ -284,7 +289,8 @@ void ggml_cuda_flash_attn_ext_vec_f32_case_impl(ggml_backend_cuda_context & ctx, fattn_kernel_t fattn_kernel = flash_attn_vec_ext_f32; constexpr bool need_f16_K = D != 128; constexpr bool need_f16_V = D != 128 && D != 64; - launch_fattn(ctx, dst, fattn_kernel, nwarps, cols_per_block, need_f16_K, need_f16_V); + constexpr size_t nbytes_shared = 0; + launch_fattn(ctx, dst, fattn_kernel, nwarps, nbytes_shared, need_f16_K, need_f16_V); } template diff --git a/ggml/src/ggml-cuda/fattn-wmma-f16.cu b/ggml/src/ggml-cuda/fattn-wmma-f16.cu new file mode 100644 index 000000000..1054ff95d --- /dev/null +++ b/ggml/src/ggml-cuda/fattn-wmma-f16.cu @@ -0,0 +1,648 @@ +// Old and deprecated WMMA FlashAttention implementation. +// It is still needed for Volta since the memory layout of NVIDIA tensor cores changed with Turing. +// Long-term the WMMA code should be replaced with a dedicated Volta implementation. + +#include "common.cuh" +#include "fattn-common.cuh" +#include "fattn-wmma-f16.cuh" + +#ifdef FP16_MMA_AVAILABLE +#include +#endif // FP16_MMA_AVAILABLE + +// D == head size, VKQ_stride == num VKQ rows calculated in parallel: +template +#if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) +__launch_bounds__(nwarps*WARP_SIZE, 1) +#endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) +static __global__ void flash_attn_ext_f16( + const char * __restrict__ Q, + const char * __restrict__ K, + const char * __restrict__ V, + const char * __restrict__ mask, + float * __restrict__ dst, + float2 * __restrict__ dst_meta, + const float scale, + const float max_bias, + const float m0, + const float m1, + const uint32_t n_head_log2, + const float logit_softcap, + const int ne00, + const int ne01, + const int ne02, + const int ne03, + const int ne10, + const int ne11, + const int ne12, + const int ne13, + const int ne31, + const int nb31, + const int nb01, + const int nb02, + const int nb03, + const int nb11, + const int nb12, + const int nb13, + const int nb21, + const int nb22, + const int nb23, + const int ne0, + const int ne1, + const int ne2, + const int ne3) { +#if __CUDA_ARCH__ == GGML_CUDA_CC_VOLTA + // Skip unused kernel variants for faster compilation: + if (use_logit_softcap && !(D == 128 || D == 256)) { + NO_DEVICE_CODE; + return; + } + + //In this kernel Q, K, V are matrices while i, j, k are matrix indices. + + const int ic0 = ncols*(blockIdx.x / parallel_blocks); // Index of the first Q/QKV column to work on. + const int ip = blockIdx.x % parallel_blocks; // Index in group of blocks running for the same column in parallel. + + static_assert(D <= FATTN_KQ_STRIDE, "D must be <= FATTN_KQ_STRIDE."); + static_assert(ncols == 8 || ncols % 16 == 0, "ncols must be 8 or a multiple of 16."); + constexpr int frag_m = ncols == 8 ? 32 : 16; + constexpr int frag_n = ncols == 8 ? 8 : 16; + static_assert(D % frag_m == 0, "If ncols == 8 then D % frag_m must be 0."); + typedef nvcuda::wmma::fragment frag_a_K; + typedef nvcuda::wmma::fragment frag_a_V; + typedef nvcuda::wmma::fragment frag_b; + typedef nvcuda::wmma::fragment frag_c_KQ; + typedef nvcuda::wmma::fragment frag_c_VKQ; + + constexpr int KQ_stride_tc = nwarps*frag_m; // Number of KQ rows calculated in parallel. + constexpr int VKQ_ratio = KQ_stride_tc/VKQ_stride; // Number of parallel VKQ accumulators needed to keep all warps busy. + static_assert(VKQ_ratio <= nwarps, "VKQ_ratio must be <= nwarps."); + + // Pad internal representation of KQ, KQV to reduce shared memory bank conflicts: + constexpr int D_padded = D + 8; + constexpr int kqs_padded = FATTN_KQ_STRIDE + 8; + constexpr int kqar = sizeof(KQ_acc_t)/sizeof(half); + + const int gqa_ratio = ne02 / ne12; // With grouped query attention there are > 1 Q matrices per K, V matrix. + const float * Q_f = (const float *) (Q + nb02* blockIdx.y + nb01*ic0); + const half * K_h = (const half *) (K + nb12*(blockIdx.y / gqa_ratio)); + const half * V_h = (const half *) (V + nb12*(blockIdx.y / gqa_ratio)); // K and V have same shape + const half * maskh = (const half *) mask + (nb31/sizeof(half))* ic0; + const half2 * mask2 = (const half2 *) mask + (nb31/sizeof(half))*(ic0/2); + + const int stride_Q = nb01 / sizeof(float); + const int stride_KV = nb11 / sizeof(half); + + const float slopef = get_alibi_slope(max_bias, blockIdx.y, n_head_log2, m0, m1); + const half slopeh = __float2half(slopef); + const half2 slope2 = make_half2(slopef, slopef); + + const half2 logit_softcap_2 = make_half2(logit_softcap, logit_softcap); + + frag_b Q_b[D/16][ncols/frag_n]; + + // A single buffer for temporarily holding tiles of KQ and VKQ parts: + constexpr int mem_KQ = ncols*kqs_padded*kqar; + constexpr int mem_VKQ_parts = VKQ_ratio*ncols*D_padded; + __shared__ half KQ[mem_KQ >= mem_VKQ_parts ? mem_KQ : mem_VKQ_parts]; + float * KQ_f = (float *) KQ; + half2 * KQ2 = (half2 *) KQ; + + float KQ_rowsum_f[ncols/nwarps] = {0.0f}; + float KQ_max_f[ncols/nwarps]; + float KQ_max_scale_f[ncols/nwarps] = {0.0f}; + +#pragma unroll + for (int j = 0; j < ncols/nwarps; ++j) { + KQ_max_f[j] = -FLT_MAX/2.0f; + } + + half2 KQ_rowsum_h2[ncols/nwarps] = {{0.0f, 0.0f}}; + half2 KQ_max_h2[ncols/nwarps]; + half2 KQ_max_scale_h2[ncols/nwarps] = {{0.0f, 0.0f}}; + +#pragma unroll + for (int j = 0; j < ncols/nwarps; ++j) { + KQ_max_h2[j] = make_half2(-HALF_MAX_HALF, -HALF_MAX_HALF); + } + + __shared__ half VKQ[ncols*D_padded]; // Accumulator for final VKQ slice. + half2 * VKQ2 = (half2 *) VKQ; +#pragma unroll + for (int j0 = 0; j0 < ncols; j0 += nwarps) { + const int j = j0 + threadIdx.y; +#pragma unroll + for (int i0 = 0; i0 < D/2; i0 += WARP_SIZE) { + const int i = i0 + threadIdx.x; + if (i0 + WARP_SIZE > D/2 && i >= D/2) { + break; + } + VKQ2[j*(D_padded/2) + i] = make_half2(0.0f, 0.0f); + } + } + + // Convert Q to half and apply scale, temporarily store in KQ: +#pragma unroll + for (int j0 = 0; j0 < ncols; j0 += nwarps) { + const int j = j0 + threadIdx.y; +#pragma unroll + for (int i0 = 0; i0 < D; i0 += WARP_SIZE) { + const int i = i0 + threadIdx.x; + if (i0 + WARP_SIZE > D && i >= D) { + break; + } + KQ[j*D_padded + i] = ic0 + j < ne01 ? Q_f[j*stride_Q + i] * scale : 0.0f; + } + } + + __syncthreads(); + + // Load Q into tensor core fragments/registers since it will be used frequently: +#pragma unroll + for (int i0 = 0; i0 < D; i0 += 16) { +#pragma unroll + for (int j0 = 0; j0 < ncols; j0 += frag_n) { + nvcuda::wmma::load_matrix_sync(Q_b[i0/16][j0/frag_n], KQ + j0*D_padded + i0, D_padded); + } + } + + __syncthreads(); + + // Iterate over ne11 == previous tokens: + for (int k_VKQ_0 = ip*FATTN_KQ_STRIDE; k_VKQ_0 < ne11; k_VKQ_0 += parallel_blocks*FATTN_KQ_STRIDE) { + // Calculate tile of KQ: +#pragma unroll + for (int i_KQ_0 = 0; i_KQ_0 < FATTN_KQ_STRIDE; i_KQ_0 += KQ_stride_tc) { + frag_c_KQ KQ_c[ncols/frag_n]; +#pragma unroll + for (int j = 0; j < ncols/frag_n; ++j) { + nvcuda::wmma::fill_fragment(KQ_c[j], 0.0f); + } +#pragma unroll + for (int k_KQ_0 = 0; k_KQ_0 < D; k_KQ_0 += 16) { + frag_a_K K_a; + nvcuda::wmma::load_matrix_sync(K_a, K_h + (k_VKQ_0 + i_KQ_0 + frag_m*threadIdx.y)*stride_KV + k_KQ_0, stride_KV); +#pragma unroll + for (int j = 0; j < ncols/frag_n; ++j) { + nvcuda::wmma::mma_sync(KQ_c[j], K_a, Q_b[k_KQ_0/16][j], KQ_c[j]); + } + } +#pragma unroll + for (int j0 = 0; j0 < ncols; j0 += frag_n) { + nvcuda::wmma::store_matrix_sync((KQ_acc_t *) KQ + j0*kqs_padded + i_KQ_0 + frag_m*threadIdx.y, KQ_c[j0/frag_n], kqs_padded, nvcuda::wmma::mem_col_major); + } + } + + __syncthreads(); + + // Calculate softmax for each KQ column using the current max. value. + // The divisor is stored in KQ_rowsum and will be applied at the end. +#pragma unroll + for (int j0 = 0; j0 < ncols; j0 += nwarps) { + const int j = j0 + threadIdx.y; + + if (std::is_same::value) { + float KQ_f_tmp[FATTN_KQ_STRIDE / WARP_SIZE]; +#pragma unroll + for (int k0 = 0; k0 < FATTN_KQ_STRIDE; k0 += WARP_SIZE) { + const int k = k0 + threadIdx.x; + + KQ_f_tmp[k0/WARP_SIZE] = KQ_f[j*kqs_padded + k]; + + if (use_logit_softcap) { + KQ_f_tmp[k0/WARP_SIZE] = logit_softcap*tanhf(KQ_f_tmp[k0/WARP_SIZE]); + } + } + + float KQ_max_new = KQ_max_f[j0/nwarps]; +#pragma unroll + for (int k0 = 0; k0 < FATTN_KQ_STRIDE; k0 += WARP_SIZE) { + const int k = k0 + threadIdx.x; + + KQ_f_tmp[k0/WARP_SIZE] += mask ? __half2float(slopeh*maskh[j*(nb31/sizeof(half)) + k_VKQ_0 + k]) : 0.0f; + KQ_max_new = max(KQ_max_new, KQ_f_tmp[k0/WARP_SIZE]); + } + KQ_max_new = warp_reduce_max(KQ_max_new); + + const float diff = KQ_max_f[j0/nwarps] - KQ_max_new; + KQ_max_scale_f[j0/nwarps] = expf(diff); + if (diff <= SOFTMAX_FTZ_THRESHOLD) { + KQ_max_scale_f[j0/nwarps] = 0.0f; + } + KQ_max_f[j0/nwarps] = KQ_max_new; + + float KQ_rowsum_add = 0.0f; +#pragma unroll + for (int k0 = 0; k0 < FATTN_KQ_STRIDE; k0 += WARP_SIZE) { + const int k = k0 + threadIdx.x; + + const float diff = KQ_f_tmp[k0/WARP_SIZE] - KQ_max_f[j0/nwarps]; + KQ_f_tmp[k0/WARP_SIZE] = expf(diff); + if (diff <= SOFTMAX_FTZ_THRESHOLD) { + KQ_f_tmp[k0/WARP_SIZE] = 0.0f; + } + KQ_rowsum_add += KQ_f_tmp[k0/WARP_SIZE]; + KQ[j*(kqar*kqs_padded) + k] = KQ_f_tmp[k0/WARP_SIZE]; + } + KQ_rowsum_add = warp_reduce_sum(KQ_rowsum_add); + + // Scale previous KQ_rowsum to account for a potential increase in KQ_max: + KQ_rowsum_f[j0/nwarps] = KQ_max_scale_f[j0/nwarps]*KQ_rowsum_f[j0/nwarps] + KQ_rowsum_add; + } else { + half2 KQ2_tmp[FATTN_KQ_STRIDE/(2*WARP_SIZE)]; +#pragma unroll + for (int k0 = 0; k0 < FATTN_KQ_STRIDE/2; k0 += WARP_SIZE) { + const int k = k0 + threadIdx.x; + + KQ2_tmp[k0/WARP_SIZE] = KQ2[j*(kqs_padded/2) + k]; + + if (use_logit_softcap) { + // There is no dedicated tangens hyperbolicus function for half2. + KQ2_tmp[k0/WARP_SIZE] = h2exp(KQ2_tmp[k0/WARP_SIZE]*make_half2(2.0f, 2.0f)); + KQ2_tmp[k0/WARP_SIZE] = (KQ2_tmp[k0/WARP_SIZE] - make_half2(1.0f, 1.0f)) + /(KQ2_tmp[k0/WARP_SIZE] + make_half2(1.0f, 1.0f)); + + KQ2_tmp[k0/WARP_SIZE] *= logit_softcap_2; + } + } + + half2 KQ_max_new = KQ_max_h2[j0/nwarps]; +#pragma unroll + for (int k0 = 0; k0 < FATTN_KQ_STRIDE/2; k0 += WARP_SIZE) { + const int k = k0 + threadIdx.x; + + KQ2_tmp[k0/WARP_SIZE] += mask ? slope2*mask2[(j*ne11 + k_VKQ_0)/2 + k] : make_half2(0.0f, 0.0f); + KQ_max_new = ggml_cuda_hmax2(KQ_max_new, KQ2_tmp[k0/WARP_SIZE]); + } + KQ_max_new = __half2half2(warp_reduce_max(ggml_cuda_hmax(__low2half(KQ_max_new), __high2half(KQ_max_new)))); + const half2 diff = KQ_max_h2[j0/nwarps] - KQ_max_new; + KQ_max_scale_h2[j0/nwarps] = h2exp(diff); + const uint32_t ftz_mask = __hgt2_mask(diff, make_half2(SOFTMAX_FTZ_THRESHOLD, SOFTMAX_FTZ_THRESHOLD)); + *((uint32_t *) &KQ_max_scale_h2[j0/nwarps]) &= ftz_mask; + KQ_max_h2[j0/nwarps] = KQ_max_new; + + half2 KQ_rowsum_add = make_half2(0.0f, 0.0f); +#pragma unroll + for (int k0 = 0; k0 < FATTN_KQ_STRIDE/2; k0 += WARP_SIZE) { + const int k = k0 + threadIdx.x; + + const half2 diff = KQ2_tmp[k0/WARP_SIZE] - KQ_max_h2[j0/nwarps]; + KQ2_tmp[k0/WARP_SIZE] = h2exp(diff); + const uint32_t ftz_mask = __hgt2_mask(diff, make_half2(SOFTMAX_FTZ_THRESHOLD, SOFTMAX_FTZ_THRESHOLD)); + *((uint32_t *) &KQ2_tmp[k0/WARP_SIZE]) &= ftz_mask; + KQ_rowsum_add += KQ2_tmp[k0/WARP_SIZE]; + KQ2[j*(kqs_padded/2) + k] = KQ2_tmp[k0/WARP_SIZE]; + } + KQ_rowsum_add = warp_reduce_sum(KQ_rowsum_add); + + // Scale previous KQ_rowsum to account for a potential increase in KQ_max: + KQ_rowsum_h2[j0/nwarps] = KQ_max_scale_h2[j0/nwarps]*KQ_rowsum_h2[j0/nwarps] + KQ_rowsum_add; + } + } + + __syncthreads(); + + frag_b KQ_b[FATTN_KQ_STRIDE/(VKQ_ratio*16)][ncols/frag_n]; +#pragma unroll + for (int j0 = 0; j0 < ncols; j0 += frag_n) { +#pragma unroll + for (int k0 = 0; k0 < FATTN_KQ_STRIDE; k0 += VKQ_ratio*16) { + const int k = k0 + (threadIdx.y % VKQ_ratio)*16; + nvcuda::wmma::load_matrix_sync( + KQ_b[k0/(VKQ_ratio*16)][j0/frag_n], + KQ + j0*(kqar*kqs_padded) + k, + kqar*kqs_padded); + } + } + + frag_c_VKQ VKQ_c[D/VKQ_stride][ncols/frag_n]; +#pragma unroll + for (int i_VKQ_0 = 0; i_VKQ_0 < D; i_VKQ_0 += VKQ_stride) { +#pragma unroll + for (int j = 0; j < ncols/frag_n; ++j) { + nvcuda::wmma::fill_fragment(VKQ_c[i_VKQ_0/VKQ_stride][j], 0.0f); + } + +#pragma unroll + for (int k0 = 0; k0 < FATTN_KQ_STRIDE; k0 += VKQ_ratio*16) { + const int k = k0 + (threadIdx.y % VKQ_ratio)*16; + + frag_a_V v_a; + nvcuda::wmma::load_matrix_sync(v_a, V_h + (k_VKQ_0 + k)*stride_KV + i_VKQ_0 + frag_m*(threadIdx.y/VKQ_ratio), stride_KV); +#pragma unroll + for (int j = 0; j < ncols/frag_n; ++j) { + nvcuda::wmma::mma_sync(VKQ_c[i_VKQ_0/VKQ_stride][j], v_a, KQ_b[k0/(VKQ_ratio*16)][j], VKQ_c[i_VKQ_0/VKQ_stride][j]); + } + } + } + + __syncthreads(); + + const int offset_k = (threadIdx.y % VKQ_ratio) * (ncols*D_padded); +#pragma unroll + for (int i_KQ_0 = 0; i_KQ_0 < D; i_KQ_0 += VKQ_stride) { +#pragma unroll + for (int j0 = 0; j0 < ncols; j0 += frag_n) { + nvcuda::wmma::store_matrix_sync( + KQ + offset_k + j0*D_padded + i_KQ_0 + frag_m*(threadIdx.y/VKQ_ratio), + VKQ_c[i_KQ_0/VKQ_stride][j0/frag_n], + D_padded, nvcuda::wmma::mem_col_major); + } + } + + __syncthreads(); + +#pragma unroll + for (int j0 = 0; j0 < ncols; j0 += nwarps) { + const int j = j0 + threadIdx.y; + + half2 VKQ_scale; + if (std::is_same::value) { + VKQ_scale = make_half2(KQ_max_scale_f[j0/nwarps], KQ_max_scale_f[j0/nwarps]); + } else { + VKQ_scale = KQ_max_scale_h2[j0/nwarps]; + } + +#pragma unroll + for (int i0 = 0; i0 < D/2; i0 += WARP_SIZE) { + const int i = i0 + threadIdx.x; + if (i0 + WARP_SIZE > D/2 && i >= D/2) { + break; + } + + half2 VKQ_add = make_half2(0.0f, 0.0f); +#pragma unroll + for (int l = 0; l < VKQ_ratio; ++l) { + VKQ_add += KQ2[l*(ncols*D_padded/2) + j*(D_padded/2) + i]; + } + VKQ2[j*(D_padded/2) + i] = VKQ_scale*VKQ2[j*(D_padded/2) + i] + VKQ_add; + } + } + + __syncthreads(); + } + +#pragma unroll + for (int j0 = 0; j0 < ncols; j0 += nwarps) { + const int j_VKQ = j0 + threadIdx.y; + if (ic0 + j_VKQ >= ne01) { + return; + } + const int j_dst = (ic0 + j_VKQ)*parallel_blocks + ip; + + float KQ_rowsum_j; + if (std::is_same::value) { + KQ_rowsum_j = KQ_rowsum_f[j0/nwarps]; + } else { + KQ_rowsum_j = __low2float(KQ_rowsum_h2[j0/nwarps]) + __high2float(KQ_rowsum_h2[j0/nwarps]); + } + +#pragma unroll + for (int i0 = 0; i0 < D; i0 += WARP_SIZE) { + const int i = i0 + threadIdx.x; + if (i0 + WARP_SIZE > D && i >= D) { + break; + } + float dst_val = VKQ[j_VKQ*D_padded + i]; + if (parallel_blocks == 1) { + dst_val /= KQ_rowsum_j; + } + dst[j_dst*gridDim.y*D + blockIdx.y*D + i] = dst_val; + } + + if (parallel_blocks == 1 || threadIdx.x != 0) { + continue; + } + + float2 dst_meta_val; + if (std::is_same::value) { + dst_meta_val.x = KQ_max_f[j0/nwarps]; + } else { + dst_meta_val.x = __low2float(KQ_max_h2[j0/nwarps]); + } + dst_meta_val.y = KQ_rowsum_j; + dst_meta[(ic0 + j_VKQ)*gridDim.y*parallel_blocks + blockIdx.y*parallel_blocks + ip] = dst_meta_val; + } +#else + NO_DEVICE_CODE; +#endif // __CUDA_ARCH__ == GGML_CUDA_CC_VOLTA +} + +constexpr int get_max_power_of_2(int x) { + return x % 2 == 0 ? 2*get_max_power_of_2(x/2) : 1; +} + +static_assert(get_max_power_of_2(1) == 1, "Test failed."); +static_assert(get_max_power_of_2(2) == 2, "Test failed."); +static_assert(get_max_power_of_2(4) == 4, "Test failed."); +static_assert(get_max_power_of_2(6) == 2, "Test failed."); + +// Number of VKQ rows calculated in parallel: +constexpr int get_VKQ_stride(int D, int nwarps, int frag_m) { + return (get_max_power_of_2(D/frag_m) < nwarps ? get_max_power_of_2(D/frag_m) : nwarps)*frag_m; +} + +static_assert(get_VKQ_stride(128, 1, 32) == 32, "Test failed."); +static_assert(get_VKQ_stride(128, 2, 32) == 64, "Test failed."); +static_assert(get_VKQ_stride(128, 4, 32) == 128, "Test failed."); +static_assert(get_VKQ_stride( 64, 1, 32) == 32, "Test failed."); +static_assert(get_VKQ_stride( 64, 2, 32) == 64, "Test failed."); +static_assert(get_VKQ_stride( 64, 4, 32) == 64, "Test failed."); +static_assert(get_VKQ_stride( 80, 1, 16) == 16, "Test failed."); +static_assert(get_VKQ_stride( 80, 2, 16) == 16, "Test failed."); +static_assert(get_VKQ_stride( 80, 4, 16) == 16, "Test failed."); + +template +void ggml_cuda_flash_attn_ext_wmma_f16_case(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + const ggml_tensor * KQV = dst; + const ggml_tensor * Q = dst->src[0]; + + constexpr int nwarps = 4; + + constexpr int frag_m = cols_per_block == 8 && D % 32 == 0 ? 32 : 16; + const int blocks_num_pb1 = ((Q->ne[1] + cols_per_block - 1) / cols_per_block)*Q->ne[2]*Q->ne[3]; + const int nsm = ggml_cuda_info().devices[ggml_cuda_get_device()].nsm; + + float logit_softcap; + memcpy(&logit_softcap, (const float *) KQV->op_params + 2, sizeof(float)); + + if (4*blocks_num_pb1 < 2*nsm) { + constexpr int parallel_blocks = 4; + fattn_kernel_t fattn_kernel; + if (logit_softcap == 0.0f) { + constexpr bool use_logit_softcap = false; + fattn_kernel = flash_attn_ext_f16< + D, cols_per_block, nwarps, get_VKQ_stride(D, nwarps, frag_m), parallel_blocks, KQ_acc_t, use_logit_softcap>; + } else { + constexpr bool use_logit_softcap = true; + fattn_kernel = flash_attn_ext_f16< + D, cols_per_block, nwarps, get_VKQ_stride(D, nwarps, frag_m), parallel_blocks, KQ_acc_t, use_logit_softcap>; + } + launch_fattn(ctx, dst, fattn_kernel, nwarps, 0, true, true); + return; + } + if (2*blocks_num_pb1 < 2*nsm) { + constexpr int parallel_blocks = 2; + fattn_kernel_t fattn_kernel; + if (logit_softcap == 0.0f) { + constexpr bool use_logit_softcap = false; + fattn_kernel = flash_attn_ext_f16< + D, cols_per_block, nwarps, get_VKQ_stride(D, nwarps, frag_m), parallel_blocks, KQ_acc_t, use_logit_softcap>; + } else { + constexpr bool use_logit_softcap = true; + fattn_kernel = flash_attn_ext_f16< + D, cols_per_block, nwarps, get_VKQ_stride(D, nwarps, frag_m), parallel_blocks, KQ_acc_t, use_logit_softcap>; + } + launch_fattn(ctx, dst, fattn_kernel, nwarps, 0, true, true); + return; + } + constexpr int parallel_blocks = 1; + fattn_kernel_t fattn_kernel; + if (logit_softcap == 0.0f) { + constexpr bool use_logit_softcap = false; + fattn_kernel = flash_attn_ext_f16< + D, cols_per_block, nwarps, get_VKQ_stride(D, nwarps, frag_m), parallel_blocks, KQ_acc_t, use_logit_softcap>; + } else { + constexpr bool use_logit_softcap = true; + fattn_kernel = flash_attn_ext_f16< + D, cols_per_block, nwarps, get_VKQ_stride(D, nwarps, frag_m), parallel_blocks, KQ_acc_t, use_logit_softcap>; + } + launch_fattn(ctx, dst, fattn_kernel, nwarps, 0, true, true); +} + +void ggml_cuda_flash_attn_ext_wmma_f16(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + const ggml_tensor * KQV = dst; + const ggml_tensor * Q = dst->src[0]; + + const enum ggml_prec prec = ggml_flash_attn_ext_get_prec(KQV); + + if (prec != GGML_PREC_DEFAULT) { + if (Q->ne[1] <= 32 || Q->ne[0] > 128) { + constexpr int cols_per_block = 16; + switch (Q->ne[0]) { + case 64: + ggml_cuda_flash_attn_ext_wmma_f16_case< 64, cols_per_block, float>(ctx, dst); + break; + case 80: + ggml_cuda_flash_attn_ext_wmma_f16_case< 80, cols_per_block, float>(ctx, dst); + break; + case 96: + ggml_cuda_flash_attn_ext_wmma_f16_case< 96, cols_per_block, float>(ctx, dst); + break; + case 112: + ggml_cuda_flash_attn_ext_wmma_f16_case<112, cols_per_block, float>(ctx, dst); + break; + case 128: + ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, float>(ctx, dst); + break; + case 256: + ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, float>(ctx, dst); + break; + default: + GGML_ABORT("fatal error"); + break; + } + } else { + constexpr int cols_per_block = 32; + switch (Q->ne[0]) { + case 64: + ggml_cuda_flash_attn_ext_wmma_f16_case< 64, cols_per_block, float>(ctx, dst); + break; + case 80: + ggml_cuda_flash_attn_ext_wmma_f16_case< 80, cols_per_block, float>(ctx, dst); + break; + case 96: + ggml_cuda_flash_attn_ext_wmma_f16_case< 96, cols_per_block, float>(ctx, dst); + break; + case 112: + ggml_cuda_flash_attn_ext_wmma_f16_case<112, cols_per_block, float>(ctx, dst); + break; + case 128: + ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, float>(ctx, dst); + break; + // case 256: + // ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, float>(ctx, dst); + // break; + default: + GGML_ABORT("fatal error"); + break; + } + } + return; + } + + if (Q->ne[1] <= 8 && Q->ne[0] % WARP_SIZE == 0) { + constexpr int cols_per_block = 8; + switch (Q->ne[0]) { + case 64: + ggml_cuda_flash_attn_ext_wmma_f16_case< 64, cols_per_block, half>(ctx, dst); + break; + case 96: + ggml_cuda_flash_attn_ext_wmma_f16_case< 96, cols_per_block, half>(ctx, dst); + break; + case 128: + ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, half>(ctx, dst); + break; + case 256: + ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, half>(ctx, dst); + break; + default: + GGML_ABORT("fatal error"); + break; + } + return; + } + + if (Q->ne[1] <= 32) { + constexpr int cols_per_block = 16; + switch (Q->ne[0]) { + case 64: + ggml_cuda_flash_attn_ext_wmma_f16_case< 64, cols_per_block, half>(ctx, dst); + break; + case 80: + ggml_cuda_flash_attn_ext_wmma_f16_case< 80, cols_per_block, half>(ctx, dst); + break; + case 96: + ggml_cuda_flash_attn_ext_wmma_f16_case< 96, cols_per_block, half>(ctx, dst); + break; + case 112: + ggml_cuda_flash_attn_ext_wmma_f16_case<112, cols_per_block, half>(ctx, dst); + break; + case 128: + ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, half>(ctx, dst); + break; + case 256: + ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, half>(ctx, dst); + break; + default: + GGML_ABORT("fatal error"); + break; + } + return; + } + + constexpr int cols_per_block = 32; + switch (Q->ne[0]) { + case 64: + ggml_cuda_flash_attn_ext_wmma_f16_case< 64, cols_per_block, half>(ctx, dst); + break; + case 80: + ggml_cuda_flash_attn_ext_wmma_f16_case< 80, cols_per_block, half>(ctx, dst); + break; + case 96: + ggml_cuda_flash_attn_ext_wmma_f16_case< 96, cols_per_block, half>(ctx, dst); + break; + case 112: + ggml_cuda_flash_attn_ext_wmma_f16_case<112, cols_per_block, half>(ctx, dst); + break; + case 128: + ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, half>(ctx, dst); + break; + case 256: + ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, half>(ctx, dst); + break; + default: + GGML_ABORT("fatal error"); + break; + } +} diff --git a/ggml/src/ggml-cuda/fattn-wmma-f16.cuh b/ggml/src/ggml-cuda/fattn-wmma-f16.cuh index 860d0e6dc..beeea95eb 100644 --- a/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +++ b/ggml/src/ggml-cuda/fattn-wmma-f16.cuh @@ -1,543 +1,3 @@ #include "common.cuh" -#include "fattn-common.cuh" -#ifdef FP16_MMA_AVAILABLE -#include -#endif // FP16_MMA_AVAILABLE - -// D == head size, VKQ_stride == num VKQ rows calculated in parallel: -template -#if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) -__launch_bounds__(nwarps*WARP_SIZE, 1) -#endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) -static __global__ void flash_attn_ext_f16( - const char * __restrict__ Q, - const char * __restrict__ K, - const char * __restrict__ V, - const char * __restrict__ mask, - float * __restrict__ dst, - float2 * __restrict__ dst_meta, - const float scale, - const float max_bias, - const float m0, - const float m1, - const uint32_t n_head_log2, - const float logit_softcap, - const int ne00, - const int ne01, - const int ne02, - const int ne03, - const int ne10, - const int ne11, - const int ne12, - const int ne13, - const int ne31, - const int nb31, - const int nb01, - const int nb02, - const int nb03, - const int nb11, - const int nb12, - const int nb13, - const int nb21, - const int nb22, - const int nb23, - const int ne0, - const int ne1, - const int ne2, - const int ne3) { -#ifdef FP16_MMA_AVAILABLE - // Skip unused kernel variants for faster compilation: - if (use_logit_softcap && !(D == 128 || D == 256)) { - NO_DEVICE_CODE; - return; - } - - //In this kernel Q, K, V are matrices while i, j, k are matrix indices. - - const int ic0 = ncols*(blockIdx.x / parallel_blocks); // Index of the first Q/QKV column to work on. - const int ip = blockIdx.x % parallel_blocks; // Index in group of blocks running for the same column in parallel. - - static_assert(D <= FATTN_KQ_STRIDE, "D must be <= FATTN_KQ_STRIDE."); - static_assert(ncols == 8 || ncols % 16 == 0, "ncols must be 8 or a multiple of 16."); - constexpr int frag_m = ncols == 8 ? 32 : 16; - constexpr int frag_n = ncols == 8 ? 8 : 16; - static_assert(D % frag_m == 0, "If ncols == 8 then D % frag_m must be 0."); - typedef nvcuda::wmma::fragment frag_a_K; - typedef nvcuda::wmma::fragment frag_a_V; - typedef nvcuda::wmma::fragment frag_b; - typedef nvcuda::wmma::fragment frag_c_KQ; - typedef nvcuda::wmma::fragment frag_c_VKQ; - - constexpr int KQ_stride_tc = nwarps*frag_m; // Number of KQ rows calculated in parallel. - constexpr int VKQ_ratio = KQ_stride_tc/VKQ_stride; // Number of parallel VKQ accumulators needed to keep all warps busy. - static_assert(VKQ_ratio <= nwarps, "VKQ_ratio must be <= nwarps."); - - // Pad internal representation of KQ, KQV to reduce shared memory bank conflicts: - constexpr int D_padded = D + 8; - constexpr int kqs_padded = FATTN_KQ_STRIDE + 8; - constexpr int kqar = sizeof(KQ_acc_t)/sizeof(half); - - const int gqa_ratio = ne02 / ne12; // With grouped query attention there are > 1 Q matrices per K, V matrix. - const float * Q_f = (const float *) (Q + nb02* blockIdx.y + nb01*ic0); - const half * K_h = (const half *) (K + nb12*(blockIdx.y / gqa_ratio)); - const half * V_h = (const half *) (V + nb12*(blockIdx.y / gqa_ratio)); // K and V have same shape - const half * maskh = (const half *) mask + (nb31/sizeof(half))* ic0; - const half2 * mask2 = (const half2 *) mask + (nb31/sizeof(half))*(ic0/2); - - const int stride_Q = nb01 / sizeof(float); - const int stride_KV = nb11 / sizeof(half); - - const float slopef = get_alibi_slope(max_bias, blockIdx.y, n_head_log2, m0, m1); - const half slopeh = __float2half(slopef); - const half2 slope2 = make_half2(slopef, slopef); - - const half2 logit_softcap_2 = make_half2(logit_softcap, logit_softcap); - - frag_b Q_b[D/16][ncols/frag_n]; - - // A single buffer for temporarily holding tiles of KQ and VKQ parts: - constexpr int mem_KQ = ncols*kqs_padded*kqar; - constexpr int mem_VKQ_parts = VKQ_ratio*ncols*D_padded; - __shared__ half KQ[mem_KQ >= mem_VKQ_parts ? mem_KQ : mem_VKQ_parts]; - float * KQ_f = (float *) KQ; - half2 * KQ2 = (half2 *) KQ; - - float KQ_rowsum_f[ncols/nwarps] = {0.0f}; - float KQ_max_f[ncols/nwarps]; - float KQ_max_scale_f[ncols/nwarps] = {0.0f}; - -#pragma unroll - for (int j = 0; j < ncols/nwarps; ++j) { - KQ_max_f[j] = -FLT_MAX/2.0f; - } - - half2 KQ_rowsum_h2[ncols/nwarps] = {{0.0f, 0.0f}}; - half2 KQ_max_h2[ncols/nwarps]; - half2 KQ_max_scale_h2[ncols/nwarps] = {{0.0f, 0.0f}}; - -#pragma unroll - for (int j = 0; j < ncols/nwarps; ++j) { - KQ_max_h2[j] = make_half2(-HALF_MAX_HALF, -HALF_MAX_HALF); - } - - __shared__ half VKQ[ncols*D_padded]; // Accumulator for final VKQ slice. - half2 * VKQ2 = (half2 *) VKQ; -#pragma unroll - for (int j0 = 0; j0 < ncols; j0 += nwarps) { - const int j = j0 + threadIdx.y; -#pragma unroll - for (int i0 = 0; i0 < D/2; i0 += WARP_SIZE) { - const int i = i0 + threadIdx.x; - if (i0 + WARP_SIZE > D/2 && i >= D/2) { - break; - } - VKQ2[j*(D_padded/2) + i] = make_half2(0.0f, 0.0f); - } - } - - // Convert Q to half and apply scale, temporarily store in KQ: -#pragma unroll - for (int j0 = 0; j0 < ncols; j0 += nwarps) { - const int j = j0 + threadIdx.y; -#pragma unroll - for (int i0 = 0; i0 < D; i0 += WARP_SIZE) { - const int i = i0 + threadIdx.x; - if (i0 + WARP_SIZE > D && i >= D) { - break; - } - KQ[j*D_padded + i] = ic0 + j < ne01 ? Q_f[j*stride_Q + i] * scale : 0.0f; - } - } - - __syncthreads(); - - // Load Q into tensor core fragments/registers since it will be used frequently: -#pragma unroll - for (int i0 = 0; i0 < D; i0 += 16) { -#pragma unroll - for (int j0 = 0; j0 < ncols; j0 += frag_n) { - nvcuda::wmma::load_matrix_sync(Q_b[i0/16][j0/frag_n], KQ + j0*D_padded + i0, D_padded); - } - } - - __syncthreads(); - - // Iterate over ne11 == previous tokens: - for (int k_VKQ_0 = ip*FATTN_KQ_STRIDE; k_VKQ_0 < ne11; k_VKQ_0 += parallel_blocks*FATTN_KQ_STRIDE) { - // Calculate tile of KQ: -#pragma unroll - for (int i_KQ_0 = 0; i_KQ_0 < FATTN_KQ_STRIDE; i_KQ_0 += KQ_stride_tc) { - frag_c_KQ KQ_c[ncols/frag_n]; -#pragma unroll - for (int j = 0; j < ncols/frag_n; ++j) { - nvcuda::wmma::fill_fragment(KQ_c[j], 0.0f); - } -#pragma unroll - for (int k_KQ_0 = 0; k_KQ_0 < D; k_KQ_0 += 16) { - frag_a_K K_a; - nvcuda::wmma::load_matrix_sync(K_a, K_h + (k_VKQ_0 + i_KQ_0 + frag_m*threadIdx.y)*stride_KV + k_KQ_0, stride_KV); -#pragma unroll - for (int j = 0; j < ncols/frag_n; ++j) { - nvcuda::wmma::mma_sync(KQ_c[j], K_a, Q_b[k_KQ_0/16][j], KQ_c[j]); - } - } -#pragma unroll - for (int j0 = 0; j0 < ncols; j0 += frag_n) { - nvcuda::wmma::store_matrix_sync((KQ_acc_t *) KQ + j0*kqs_padded + i_KQ_0 + frag_m*threadIdx.y, KQ_c[j0/frag_n], kqs_padded, nvcuda::wmma::mem_col_major); - } - } - - __syncthreads(); - - // Calculate softmax for each KQ column using the current max. value. - // The divisor is stored in KQ_rowsum and will be applied at the end. -#pragma unroll - for (int j0 = 0; j0 < ncols; j0 += nwarps) { - const int j = j0 + threadIdx.y; - - if (std::is_same::value) { - float KQ_f_tmp[FATTN_KQ_STRIDE / WARP_SIZE]; -#pragma unroll - for (int k0 = 0; k0 < FATTN_KQ_STRIDE; k0 += WARP_SIZE) { - const int k = k0 + threadIdx.x; - - KQ_f_tmp[k0/WARP_SIZE] = KQ_f[j*kqs_padded + k]; - - if (use_logit_softcap) { - KQ_f_tmp[k0/WARP_SIZE] = logit_softcap*tanhf(KQ_f_tmp[k0/WARP_SIZE]); - } - } - - float KQ_max_new = KQ_max_f[j0/nwarps]; -#pragma unroll - for (int k0 = 0; k0 < FATTN_KQ_STRIDE; k0 += WARP_SIZE) { - const int k = k0 + threadIdx.x; - - KQ_f_tmp[k0/WARP_SIZE] += mask ? __half2float(slopeh*maskh[j*(nb31/sizeof(half)) + k_VKQ_0 + k]) : 0.0f; - KQ_max_new = max(KQ_max_new, KQ_f_tmp[k0/WARP_SIZE]); - } - KQ_max_new = warp_reduce_max(KQ_max_new); - - const float diff = KQ_max_f[j0/nwarps] - KQ_max_new; - KQ_max_scale_f[j0/nwarps] = expf(diff); - if (diff <= SOFTMAX_FTZ_THRESHOLD) { - KQ_max_scale_f[j0/nwarps] = 0.0f; - } - KQ_max_f[j0/nwarps] = KQ_max_new; - - float KQ_rowsum_add = 0.0f; -#pragma unroll - for (int k0 = 0; k0 < FATTN_KQ_STRIDE; k0 += WARP_SIZE) { - const int k = k0 + threadIdx.x; - - const float diff = KQ_f_tmp[k0/WARP_SIZE] - KQ_max_f[j0/nwarps]; - KQ_f_tmp[k0/WARP_SIZE] = expf(diff); - if (diff <= SOFTMAX_FTZ_THRESHOLD) { - KQ_f_tmp[k0/WARP_SIZE] = 0.0f; - } - KQ_rowsum_add += KQ_f_tmp[k0/WARP_SIZE]; - KQ[j*(kqar*kqs_padded) + k] = KQ_f_tmp[k0/WARP_SIZE]; - } - KQ_rowsum_add = warp_reduce_sum(KQ_rowsum_add); - - // Scale previous KQ_rowsum to account for a potential increase in KQ_max: - KQ_rowsum_f[j0/nwarps] = KQ_max_scale_f[j0/nwarps]*KQ_rowsum_f[j0/nwarps] + KQ_rowsum_add; - } else { - half2 KQ2_tmp[FATTN_KQ_STRIDE/(2*WARP_SIZE)]; -#pragma unroll - for (int k0 = 0; k0 < FATTN_KQ_STRIDE/2; k0 += WARP_SIZE) { - const int k = k0 + threadIdx.x; - - KQ2_tmp[k0/WARP_SIZE] = KQ2[j*(kqs_padded/2) + k]; - - if (use_logit_softcap) { - // There is no dedicated tangens hyperbolicus function for half2. - KQ2_tmp[k0/WARP_SIZE] = h2exp(KQ2_tmp[k0/WARP_SIZE]*make_half2(2.0f, 2.0f)); - KQ2_tmp[k0/WARP_SIZE] = (KQ2_tmp[k0/WARP_SIZE] - make_half2(1.0f, 1.0f)) - /(KQ2_tmp[k0/WARP_SIZE] + make_half2(1.0f, 1.0f)); - - KQ2_tmp[k0/WARP_SIZE] *= logit_softcap_2; - } - } - - half2 KQ_max_new = KQ_max_h2[j0/nwarps]; -#pragma unroll - for (int k0 = 0; k0 < FATTN_KQ_STRIDE/2; k0 += WARP_SIZE) { - const int k = k0 + threadIdx.x; - - KQ2_tmp[k0/WARP_SIZE] += mask ? slope2*mask2[(j*ne11 + k_VKQ_0)/2 + k] : make_half2(0.0f, 0.0f); - KQ_max_new = ggml_cuda_hmax2(KQ_max_new, KQ2_tmp[k0/WARP_SIZE]); - } - KQ_max_new = __half2half2(warp_reduce_max(ggml_cuda_hmax(__low2half(KQ_max_new), __high2half(KQ_max_new)))); - const half2 diff = KQ_max_h2[j0/nwarps] - KQ_max_new; - KQ_max_scale_h2[j0/nwarps] = h2exp(diff); - const uint32_t ftz_mask = __hgt2_mask(diff, make_half2(SOFTMAX_FTZ_THRESHOLD, SOFTMAX_FTZ_THRESHOLD)); - *((uint32_t *) &KQ_max_scale_h2[j0/nwarps]) &= ftz_mask; - KQ_max_h2[j0/nwarps] = KQ_max_new; - - half2 KQ_rowsum_add = make_half2(0.0f, 0.0f); -#pragma unroll - for (int k0 = 0; k0 < FATTN_KQ_STRIDE/2; k0 += WARP_SIZE) { - const int k = k0 + threadIdx.x; - - const half2 diff = KQ2_tmp[k0/WARP_SIZE] - KQ_max_h2[j0/nwarps]; - KQ2_tmp[k0/WARP_SIZE] = h2exp(diff); - const uint32_t ftz_mask = __hgt2_mask(diff, make_half2(SOFTMAX_FTZ_THRESHOLD, SOFTMAX_FTZ_THRESHOLD)); - *((uint32_t *) &KQ2_tmp[k0/WARP_SIZE]) &= ftz_mask; - KQ_rowsum_add += KQ2_tmp[k0/WARP_SIZE]; - KQ2[j*(kqs_padded/2) + k] = KQ2_tmp[k0/WARP_SIZE]; - } - KQ_rowsum_add = warp_reduce_sum(KQ_rowsum_add); - - // Scale previous KQ_rowsum to account for a potential increase in KQ_max: - KQ_rowsum_h2[j0/nwarps] = KQ_max_scale_h2[j0/nwarps]*KQ_rowsum_h2[j0/nwarps] + KQ_rowsum_add; - } - } - - __syncthreads(); - - frag_b KQ_b[FATTN_KQ_STRIDE/(VKQ_ratio*16)][ncols/frag_n]; -#pragma unroll - for (int j0 = 0; j0 < ncols; j0 += frag_n) { -#pragma unroll - for (int k0 = 0; k0 < FATTN_KQ_STRIDE; k0 += VKQ_ratio*16) { - const int k = k0 + (threadIdx.y % VKQ_ratio)*16; - nvcuda::wmma::load_matrix_sync( - KQ_b[k0/(VKQ_ratio*16)][j0/frag_n], - KQ + j0*(kqar*kqs_padded) + k, - kqar*kqs_padded); - } - } - - frag_c_VKQ VKQ_c[D/VKQ_stride][ncols/frag_n]; -#pragma unroll - for (int i_VKQ_0 = 0; i_VKQ_0 < D; i_VKQ_0 += VKQ_stride) { -#pragma unroll - for (int j = 0; j < ncols/frag_n; ++j) { - nvcuda::wmma::fill_fragment(VKQ_c[i_VKQ_0/VKQ_stride][j], 0.0f); - } - -#pragma unroll - for (int k0 = 0; k0 < FATTN_KQ_STRIDE; k0 += VKQ_ratio*16) { - const int k = k0 + (threadIdx.y % VKQ_ratio)*16; - - frag_a_V v_a; - nvcuda::wmma::load_matrix_sync(v_a, V_h + (k_VKQ_0 + k)*stride_KV + i_VKQ_0 + frag_m*(threadIdx.y/VKQ_ratio), stride_KV); -#pragma unroll - for (int j = 0; j < ncols/frag_n; ++j) { - nvcuda::wmma::mma_sync(VKQ_c[i_VKQ_0/VKQ_stride][j], v_a, KQ_b[k0/(VKQ_ratio*16)][j], VKQ_c[i_VKQ_0/VKQ_stride][j]); - } - } - } - - __syncthreads(); - - const int offset_k = (threadIdx.y % VKQ_ratio) * (ncols*D_padded); -#pragma unroll - for (int i_KQ_0 = 0; i_KQ_0 < D; i_KQ_0 += VKQ_stride) { -#pragma unroll - for (int j0 = 0; j0 < ncols; j0 += frag_n) { - nvcuda::wmma::store_matrix_sync( - KQ + offset_k + j0*D_padded + i_KQ_0 + frag_m*(threadIdx.y/VKQ_ratio), - VKQ_c[i_KQ_0/VKQ_stride][j0/frag_n], - D_padded, nvcuda::wmma::mem_col_major); - } - } - - __syncthreads(); - -#pragma unroll - for (int j0 = 0; j0 < ncols; j0 += nwarps) { - const int j = j0 + threadIdx.y; - - half2 VKQ_scale; - if (std::is_same::value) { - VKQ_scale = make_half2(KQ_max_scale_f[j0/nwarps], KQ_max_scale_f[j0/nwarps]); - } else { - VKQ_scale = KQ_max_scale_h2[j0/nwarps]; - } - -#pragma unroll - for (int i0 = 0; i0 < D/2; i0 += WARP_SIZE) { - const int i = i0 + threadIdx.x; - if (i0 + WARP_SIZE > D/2 && i >= D/2) { - break; - } - - half2 VKQ_add = make_half2(0.0f, 0.0f); -#pragma unroll - for (int l = 0; l < VKQ_ratio; ++l) { - VKQ_add += KQ2[l*(ncols*D_padded/2) + j*(D_padded/2) + i]; - } - VKQ2[j*(D_padded/2) + i] = VKQ_scale*VKQ2[j*(D_padded/2) + i] + VKQ_add; - } - } - - __syncthreads(); - } - -#pragma unroll - for (int j0 = 0; j0 < ncols; j0 += nwarps) { - const int j_VKQ = j0 + threadIdx.y; - if (ic0 + j_VKQ >= ne01) { - return; - } - const int j_dst = (ic0 + j_VKQ)*parallel_blocks + ip; - - float KQ_rowsum_j; - if (std::is_same::value) { - KQ_rowsum_j = KQ_rowsum_f[j0/nwarps]; - } else { - KQ_rowsum_j = __low2float(KQ_rowsum_h2[j0/nwarps]) + __high2float(KQ_rowsum_h2[j0/nwarps]); - } - -#pragma unroll - for (int i0 = 0; i0 < D; i0 += WARP_SIZE) { - const int i = i0 + threadIdx.x; - if (i0 + WARP_SIZE > D && i >= D) { - break; - } - float dst_val = VKQ[j_VKQ*D_padded + i]; - if (parallel_blocks == 1) { - dst_val /= KQ_rowsum_j; - } - dst[j_dst*gridDim.y*D + blockIdx.y*D + i] = dst_val; - } - - if (parallel_blocks == 1 || threadIdx.x != 0) { - continue; - } - - float2 dst_meta_val; - if (std::is_same::value) { - dst_meta_val.x = KQ_max_f[j0/nwarps]; - } else { - dst_meta_val.x = __low2float(KQ_max_h2[j0/nwarps]); - } - dst_meta_val.y = KQ_rowsum_j; - dst_meta[(ic0 + j_VKQ)*gridDim.y*parallel_blocks + blockIdx.y*parallel_blocks + ip] = dst_meta_val; - } -#else - NO_DEVICE_CODE; -#endif // FP16_MMA_AVAILABLE -} - -constexpr int get_max_power_of_2(int x) { - return x % 2 == 0 ? 2*get_max_power_of_2(x/2) : 1; -} - -static_assert(get_max_power_of_2(1) == 1, "Test failed."); -static_assert(get_max_power_of_2(2) == 2, "Test failed."); -static_assert(get_max_power_of_2(4) == 4, "Test failed."); -static_assert(get_max_power_of_2(6) == 2, "Test failed."); - -// Number of VKQ rows calculated in parallel: -constexpr int get_VKQ_stride(int D, int nwarps, int frag_m) { - return (get_max_power_of_2(D/frag_m) < nwarps ? get_max_power_of_2(D/frag_m) : nwarps)*frag_m; -} - -static_assert(get_VKQ_stride(128, 1, 32) == 32, "Test failed."); -static_assert(get_VKQ_stride(128, 2, 32) == 64, "Test failed."); -static_assert(get_VKQ_stride(128, 4, 32) == 128, "Test failed."); -static_assert(get_VKQ_stride( 64, 1, 32) == 32, "Test failed."); -static_assert(get_VKQ_stride( 64, 2, 32) == 64, "Test failed."); -static_assert(get_VKQ_stride( 64, 4, 32) == 64, "Test failed."); -static_assert(get_VKQ_stride( 80, 1, 16) == 16, "Test failed."); -static_assert(get_VKQ_stride( 80, 2, 16) == 16, "Test failed."); -static_assert(get_VKQ_stride( 80, 4, 16) == 16, "Test failed."); - -template -void ggml_cuda_flash_attn_ext_wmma_f16_case(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { - const ggml_tensor * KQV = dst; - const ggml_tensor * Q = dst->src[0]; - - constexpr int nwarps = 4; - - constexpr int frag_m = cols_per_block == 8 && D % 32 == 0 ? 32 : 16; - const int blocks_num_pb1 = ((Q->ne[1] + cols_per_block - 1) / cols_per_block)*Q->ne[2]*Q->ne[3]; - const int nsm = ggml_cuda_info().devices[ggml_cuda_get_device()].nsm; - - float logit_softcap; - memcpy(&logit_softcap, (const float *) KQV->op_params + 2, sizeof(float)); - - if (4*blocks_num_pb1 < 2*nsm) { - constexpr int parallel_blocks = 4; - fattn_kernel_t fattn_kernel; - if (logit_softcap == 0.0f) { - constexpr bool use_logit_softcap = false; - fattn_kernel = flash_attn_ext_f16< - D, cols_per_block, nwarps, get_VKQ_stride(D, nwarps, frag_m), parallel_blocks, KQ_acc_t, use_logit_softcap>; - } else { - constexpr bool use_logit_softcap = true; - fattn_kernel = flash_attn_ext_f16< - D, cols_per_block, nwarps, get_VKQ_stride(D, nwarps, frag_m), parallel_blocks, KQ_acc_t, use_logit_softcap>; - } - launch_fattn(ctx, dst, fattn_kernel, nwarps, cols_per_block, true, true); - return; - } - if (2*blocks_num_pb1 < 2*nsm) { - constexpr int parallel_blocks = 2; - fattn_kernel_t fattn_kernel; - if (logit_softcap == 0.0f) { - constexpr bool use_logit_softcap = false; - fattn_kernel = flash_attn_ext_f16< - D, cols_per_block, nwarps, get_VKQ_stride(D, nwarps, frag_m), parallel_blocks, KQ_acc_t, use_logit_softcap>; - } else { - constexpr bool use_logit_softcap = true; - fattn_kernel = flash_attn_ext_f16< - D, cols_per_block, nwarps, get_VKQ_stride(D, nwarps, frag_m), parallel_blocks, KQ_acc_t, use_logit_softcap>; - } - launch_fattn(ctx, dst, fattn_kernel, nwarps, cols_per_block, true, true); - return; - } - constexpr int parallel_blocks = 1; - fattn_kernel_t fattn_kernel; - if (logit_softcap == 0.0f) { - constexpr bool use_logit_softcap = false; - fattn_kernel = flash_attn_ext_f16< - D, cols_per_block, nwarps, get_VKQ_stride(D, nwarps, frag_m), parallel_blocks, KQ_acc_t, use_logit_softcap>; - } else { - constexpr bool use_logit_softcap = true; - fattn_kernel = flash_attn_ext_f16< - D, cols_per_block, nwarps, get_VKQ_stride(D, nwarps, frag_m), parallel_blocks, KQ_acc_t, use_logit_softcap>; - } - launch_fattn(ctx, dst, fattn_kernel, nwarps, cols_per_block, true, true); -} - -#define DECL_FATTN_WMMA_F16_CASE(D, cols_per_block, KQ_acc_t) \ - template void ggml_cuda_flash_attn_ext_wmma_f16_case \ - (ggml_backend_cuda_context & ctx, ggml_tensor * dst) \ - -extern DECL_FATTN_WMMA_F16_CASE( 64, 16, float); -extern DECL_FATTN_WMMA_F16_CASE( 80, 16, float); -extern DECL_FATTN_WMMA_F16_CASE( 96, 16, float); -extern DECL_FATTN_WMMA_F16_CASE(112, 16, float); -extern DECL_FATTN_WMMA_F16_CASE(128, 16, float); -extern DECL_FATTN_WMMA_F16_CASE(256, 16, float); - -extern DECL_FATTN_WMMA_F16_CASE( 64, 32, float); -extern DECL_FATTN_WMMA_F16_CASE( 80, 32, float); -extern DECL_FATTN_WMMA_F16_CASE( 96, 32, float); -extern DECL_FATTN_WMMA_F16_CASE(112, 32, float); -extern DECL_FATTN_WMMA_F16_CASE(128, 32, float); -// extern DECL_FATTN_WMMA_F16_CASE(256, 16, float); - -extern DECL_FATTN_WMMA_F16_CASE( 64, 8, half); -extern DECL_FATTN_WMMA_F16_CASE( 96, 8, half); -extern DECL_FATTN_WMMA_F16_CASE(128, 8, half); -extern DECL_FATTN_WMMA_F16_CASE(256, 8, half); - -extern DECL_FATTN_WMMA_F16_CASE( 64, 16, half); -extern DECL_FATTN_WMMA_F16_CASE( 80, 16, half); -extern DECL_FATTN_WMMA_F16_CASE( 96, 16, half); -extern DECL_FATTN_WMMA_F16_CASE(112, 16, half); -extern DECL_FATTN_WMMA_F16_CASE(128, 16, half); -extern DECL_FATTN_WMMA_F16_CASE(256, 16, half); - -extern DECL_FATTN_WMMA_F16_CASE( 64, 32, half); -extern DECL_FATTN_WMMA_F16_CASE( 80, 32, half); -extern DECL_FATTN_WMMA_F16_CASE( 96, 32, half); -extern DECL_FATTN_WMMA_F16_CASE(112, 32, half); -extern DECL_FATTN_WMMA_F16_CASE(128, 32, half); -extern DECL_FATTN_WMMA_F16_CASE(256, 16, half); +void ggml_cuda_flash_attn_ext_wmma_f16(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/ggml/src/ggml-cuda/fattn.cu b/ggml/src/ggml-cuda/fattn.cu index 0b26b0f8e..b1e66d470 100644 --- a/ggml/src/ggml-cuda/fattn.cu +++ b/ggml/src/ggml-cuda/fattn.cu @@ -1,5 +1,6 @@ #include "common.cuh" #include "fattn-common.cuh" +#include "fattn-mma-f16.cuh" #include "fattn-tile-f16.cuh" #include "fattn-tile-f32.cuh" #include "fattn-vec-f16.cuh" @@ -7,144 +8,56 @@ #include "fattn-wmma-f16.cuh" #include "fattn.cuh" -#include +template +static void ggml_cuda_flash_attn_ext_mma_f16_switch_hs(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + const ggml_tensor * Q = dst->src[0]; -static void ggml_cuda_flash_attn_ext_wmma_f16(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { - const ggml_tensor * KQV = dst; - const ggml_tensor * Q = dst->src[0]; - - const enum ggml_prec prec = ggml_flash_attn_ext_get_prec(KQV); - - if (prec != GGML_PREC_DEFAULT) { - if (Q->ne[1] <= 32 || Q->ne[0] > 128) { - constexpr int cols_per_block = 16; - switch (Q->ne[0]) { - case 64: - ggml_cuda_flash_attn_ext_wmma_f16_case< 64, cols_per_block, float>(ctx, dst); - break; - case 80: - ggml_cuda_flash_attn_ext_wmma_f16_case< 80, cols_per_block, float>(ctx, dst); - break; - case 96: - ggml_cuda_flash_attn_ext_wmma_f16_case< 96, cols_per_block, float>(ctx, dst); - break; - case 112: - ggml_cuda_flash_attn_ext_wmma_f16_case<112, cols_per_block, float>(ctx, dst); - break; - case 128: - ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, float>(ctx, dst); - break; - case 256: - ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, float>(ctx, dst); - break; - default: - GGML_ABORT("fatal error"); - break; - } - } else { - constexpr int cols_per_block = 32; - switch (Q->ne[0]) { - case 64: - ggml_cuda_flash_attn_ext_wmma_f16_case< 64, cols_per_block, float>(ctx, dst); - break; - case 80: - ggml_cuda_flash_attn_ext_wmma_f16_case< 80, cols_per_block, float>(ctx, dst); - break; - case 96: - ggml_cuda_flash_attn_ext_wmma_f16_case< 96, cols_per_block, float>(ctx, dst); - break; - case 112: - ggml_cuda_flash_attn_ext_wmma_f16_case<112, cols_per_block, float>(ctx, dst); - break; - case 128: - ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, float>(ctx, dst); - break; - // case 256: - // ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, float>(ctx, dst); - // break; - default: - GGML_ABORT("fatal error"); - break; - } - } - return; - } - - if (Q->ne[1] <= 8 && Q->ne[0] % WARP_SIZE == 0) { - constexpr int cols_per_block = 8; - switch (Q->ne[0]) { - case 64: - ggml_cuda_flash_attn_ext_wmma_f16_case< 64, cols_per_block, half>(ctx, dst); - break; - case 96: - ggml_cuda_flash_attn_ext_wmma_f16_case< 96, cols_per_block, half>(ctx, dst); - break; - case 128: - ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, half>(ctx, dst); - break; - case 256: - ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, half>(ctx, dst); - break; - default: - GGML_ABORT("fatal error"); - break; - } - return; - } - - if (Q->ne[1] <= 32) { - constexpr int cols_per_block = 16; - switch (Q->ne[0]) { - case 64: - ggml_cuda_flash_attn_ext_wmma_f16_case< 64, cols_per_block, half>(ctx, dst); - break; - case 80: - ggml_cuda_flash_attn_ext_wmma_f16_case< 80, cols_per_block, half>(ctx, dst); - break; - case 96: - ggml_cuda_flash_attn_ext_wmma_f16_case< 96, cols_per_block, half>(ctx, dst); - break; - case 112: - ggml_cuda_flash_attn_ext_wmma_f16_case<112, cols_per_block, half>(ctx, dst); - break; - case 128: - ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, half>(ctx, dst); - break; - case 256: - ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, half>(ctx, dst); - break; - default: - GGML_ABORT("fatal error"); - break; - } - return; - } - - constexpr int cols_per_block = 32; switch (Q->ne[0]) { case 64: - ggml_cuda_flash_attn_ext_wmma_f16_case< 64, cols_per_block, half>(ctx, dst); + ggml_cuda_flash_attn_ext_mma_f16_case< 64, cols_per_block>(ctx, dst); break; case 80: - ggml_cuda_flash_attn_ext_wmma_f16_case< 80, cols_per_block, half>(ctx, dst); + ggml_cuda_flash_attn_ext_mma_f16_case< 80, cols_per_block>(ctx, dst); break; case 96: - ggml_cuda_flash_attn_ext_wmma_f16_case< 96, cols_per_block, half>(ctx, dst); + ggml_cuda_flash_attn_ext_mma_f16_case< 96, cols_per_block>(ctx, dst); break; case 112: - ggml_cuda_flash_attn_ext_wmma_f16_case<112, cols_per_block, half>(ctx, dst); + ggml_cuda_flash_attn_ext_mma_f16_case<112, cols_per_block>(ctx, dst); break; case 128: - ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, half>(ctx, dst); + ggml_cuda_flash_attn_ext_mma_f16_case<128, cols_per_block>(ctx, dst); break; case 256: - ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, half>(ctx, dst); + ggml_cuda_flash_attn_ext_mma_f16_case<256, cols_per_block>(ctx, dst); break; default: GGML_ABORT("fatal error"); break; } } + +static void ggml_cuda_flash_attn_ext_mma_f16(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + const ggml_tensor * Q = dst->src[0]; + + if (Q->ne[1] <= 8) { + ggml_cuda_flash_attn_ext_mma_f16_switch_hs<8>(ctx, dst); + return; + } + + if (Q->ne[1] <= 16) { + ggml_cuda_flash_attn_ext_mma_f16_switch_hs<16>(ctx, dst); + return; + } + + if (Q->ne[1] <= 32) { + ggml_cuda_flash_attn_ext_mma_f16_switch_hs<32>(ctx, dst); + return; + } + + ggml_cuda_flash_attn_ext_mma_f16_switch_hs<64>(ctx, dst); +} + #define FATTN_VEC_F16_CASE(D, type_K, type_V) \ if (Q->ne[0] == (D) && K->type == (type_K) && V->type == (type_V)) { \ ggml_cuda_flash_attn_ext_vec_f16_case(ctx, dst); \ @@ -322,11 +235,19 @@ void ggml_cuda_flash_attn_ext(ggml_backend_cuda_context & ctx, ggml_tensor * dst return; } - if (!fp16_mma_available(cc)) { - if (Q->ne[1] <= 8) { - ggml_cuda_flash_attn_ext_vec_f16(ctx, dst); + if (!new_mma_available(cc)) { + if (prec == GGML_PREC_DEFAULT) { + if (Q->ne[1] <= 8) { + ggml_cuda_flash_attn_ext_vec_f16(ctx, dst); + } else { + ggml_cuda_flash_attn_ext_tile_f16(ctx, dst); + } } else { - ggml_cuda_flash_attn_ext_tile_f16(ctx, dst); + if (Q->ne[1] <= 8) { + ggml_cuda_flash_attn_ext_vec_f32(ctx, dst); + } else { + ggml_cuda_flash_attn_ext_tile_f32(ctx, dst); + } } return; } @@ -341,5 +262,10 @@ void ggml_cuda_flash_attn_ext(ggml_backend_cuda_context & ctx, ggml_tensor * dst } } - ggml_cuda_flash_attn_ext_wmma_f16(ctx, dst); + // The MMA implementation needs Turing or newer, use the old WMMA code for Volta: + if (cc == GGML_CUDA_CC_VOLTA) { + ggml_cuda_flash_attn_ext_wmma_f16(ctx, dst); + } + + ggml_cuda_flash_attn_ext_mma_f16(ctx, dst); } diff --git a/ggml/src/ggml-cuda/mma.cuh b/ggml/src/ggml-cuda/mma.cuh index 7d11540af..9788a1389 100644 --- a/ggml/src/ggml-cuda/mma.cuh +++ b/ggml/src/ggml-cuda/mma.cuh @@ -1,11 +1,67 @@ +// This file contains primitives that expose the tensor core PTX instructions for CUDA code. +// The primitives can be used in a similar way as the nvcuda::wmma interface but with a well-defined memory layout. +// The documentation for the PTX instructions can be found under: +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#matrix-multiply-accumulate-operation-using-mma-instruction +// +// Like with nvcuda::wmma there are three types of matrix tiles: A, B, and C with A @ B = C. +// A is a row-major matrix with shape I x K. +// B is a column-major matrix with shape K x J. +// C is a column-major matrix with shape I x J. +// Note that along their lowest dimension I, J, and K are measured in physical 32 bit elements instead of logical elements. +// The functions get_i, get_j, and get_k can be used to get the physical 32 bit index of the lth element of a thread within a tile. +// All matrix tiles have ne physical 32 bit elements per warp. +// +// As described in the documentation, all pointers for load_ldmatrix must be to shared memory and aligned to 16 bytes. + #include "common.cuh" -struct mma_int_A_I16K4 { + +#if CUDART_VERSION >= 11800 + +static __device__ __forceinline__ int ggml_cuda_movmatrix(const int x) { + int ret = 0; + +#ifdef NEW_MMA_AVAILABLE + asm("movmatrix.sync.aligned.m8n8.trans.b16 %0, %1;" + : "+r"(ret) : "r"(x)); +#else + NO_DEVICE_CODE; +#endif // defined(NEW_MMA_AVAILABLE) + return ret; +} + +#else + +static __device__ __forceinline__ int ggml_cuda_movmatrix(const int x) { + // Imagine transposing row-major matrix to column-major matrix. + const int src_i_low = 2 * (threadIdx.x % 4); + const int src_i_high = src_i_low + 1; + const int src_j = threadIdx.x / 4; + + const int src_laneid_low = src_i_low * 4 + src_j / 2; + const int src_laneid_high = src_i_high * 4 + src_j / 2; + + const int shift_low = ((src_j + 0) % 2) * 16; + const int shift_high = ((src_j + 1) % 2) * 16; + + const int ret_low = (__shfl_sync(0xFFFFFFFF, x, src_laneid_low, WARP_SIZE) >> shift_low) & 0x0000FFFF; + const int ret_high = (__shfl_sync(0xFFFFFFFF, x, src_laneid_high, WARP_SIZE) << shift_high) & 0xFFFF0000; + + return ret_low | ret_high; +} + +#endif // CUDART_VERSION >= 11800 + + +template +struct mma_A_I16K4 { + static_assert(sizeof(T) == 4, "bad type size"); + static constexpr int I = 16; static constexpr int K = 4; static constexpr int ne = 2; - int x[ne] = {0}; + T x[ne]; static __device__ __forceinline__ int get_i(const int l) { const int ret = (l%2) * (I/2) + threadIdx.x / K; @@ -21,27 +77,35 @@ struct mma_int_A_I16K4 { return ret; } - __device__ __forceinline__ void load(const int * __restrict__ xs0, const int & stride) { -#if defined(INT8_MMA_AVAILABLE) - const int * xs = xs0 + (threadIdx.x%I)*stride; - asm("ldmatrix.sync.aligned.m8n8.x2.b16 {%0, %1}, [%2];" - : "+r"(x[0]), "+r"(x[1]) - : "l"(xs)); -#else + __device__ __forceinline__ void load_generic(const T * __restrict__ xs0, const int & stride) { #pragma unroll for (int l = 0; l < ne; ++l) { x[l] = xs0[get_i(l)*stride + get_k(l)]; } -#endif // defined(INT8_MMA_AVAILABLE) + } + + __device__ __forceinline__ void load_ldmatrix(const T * __restrict__ xs0, const int & stride) { +#ifdef NEW_MMA_AVAILABLE + int * xi = (int *) x; + const int * xs = (const int *) xs0 + (threadIdx.x%I)*stride; + asm("ldmatrix.sync.aligned.m8n8.x2.b16 {%0, %1}, [%2];" + : "+r"(xi[0]), "+r"(xi[1]) + : "l"(xs)); +#else + load_generic(xs0, stride); +#endif // NEW_MMA_AVAILABLE } }; -struct mma_int_A_I16K8 { +template +struct mma_A_I16K8 { + static_assert(sizeof(T) == 4, "bad type size"); + static constexpr int I = 16; static constexpr int K = 8; static constexpr int ne = 4; - int x[ne] = {0}; + T x[ne]; static __device__ __forceinline__ int get_i(const int l) { const int ret = (l%2) * (I/2) + threadIdx.x / (K/2); @@ -57,31 +121,62 @@ struct mma_int_A_I16K8 { return ret; } - __device__ __forceinline__ void load(const int * __restrict__ xs0, const int & stride) { -#if defined(INT8_MMA_AVAILABLE) - const int * xs = xs0 + (threadIdx.x%I)*stride + (threadIdx.x/I)*(K/2); - asm("ldmatrix.sync.aligned.m8n8.x4.b16 {%0, %1, %2, %3}, [%4];" - : "+r"(x[0]), "+r"(x[1]), "+r"(x[2]), "+r"(x[3]) - : "l"(xs)); -#else + __device__ __forceinline__ void load_generic(const T * __restrict__ xs0, const int & stride) { #pragma unroll for (int l = 0; l < ne; ++l) { x[l] = xs0[get_i(l)*stride + get_k(l)]; } -#endif // defined(INT8_MMA_AVAILABLE) } - __device__ __forceinline__ void load_low(const int * __restrict__ xs0, const int & stride) { - ((mma_int_A_I16K4 *) x)[0].load(xs0, stride); + __device__ __forceinline__ void load_ldmatrix(const T * __restrict__ xs0, const int & stride) { +#ifdef NEW_MMA_AVAILABLE + int * xi = (int * ) x; + const int * xs = (const int *) xs0 + (threadIdx.x%I)*stride + (threadIdx.x/I)*(K/2); + asm("ldmatrix.sync.aligned.m8n8.x4.b16 {%0, %1, %2, %3}, [%4];" + : "+r"(xi[0]), "+r"(xi[1]), "+r"(xi[2]), "+r"(xi[3]) + : "l"(xs)); +#else + GGML_UNUSED(xs0); + GGML_UNUSED(stride); + NO_DEVICE_CODE; +#endif // NEW_MMA_AVAILABLE + } + + __device__ __forceinline__ void load_ldmatrix_trans(const T * __restrict__ xs0, const int & stride) { +#ifdef NEW_MMA_AVAILABLE + int * xi = (int * ) x; + const int * xs = (const int *) xs0 + (threadIdx.x%I)*stride + (threadIdx.x/I)*(K/2); + asm("ldmatrix.sync.aligned.m8n8.x4.trans.b16 {%0, %1, %2, %3}, [%4];" + : "+r"(xi[0]), "+r"(xi[2]), "+r"(xi[1]), "+r"(xi[3]) + : "l"(xs)); +#else + GGML_UNUSED(xs0); + GGML_UNUSED(stride); + NO_DEVICE_CODE; +#endif // NEW_MMA_AVAILABLE + } + + __device__ __forceinline__ void transpose() { + int * xi = (int *) x; + xi[0] = ggml_cuda_movmatrix(xi[0]); + + const int tmp = ggml_cuda_movmatrix(xi[1]); + xi[1] = ggml_cuda_movmatrix(xi[2]); + xi[2] = tmp; + + xi[3] = ggml_cuda_movmatrix(xi[3]); } }; -struct mma_int_B_J8K4 { +template +struct mma_B_J8K4 { + static_assert(sizeof(T) == 4, "bad type size"); + static constexpr int J = 8; static constexpr int K = 4; static constexpr int ne = 1; - int x[ne] = {0}; + T x[ne]; static __device__ __forceinline__ int get_j(const int /* l */) { const int ret = threadIdx.x / K; @@ -97,27 +192,34 @@ struct mma_int_B_J8K4 { return ret; } - __device__ __forceinline__ void load(const int * __restrict__ xs0, const int & stride) { -#if defined(INT8_MMA_AVAILABLE) && false // Loading as 4 byte values is faster - const int * xs = xs0 + (threadIdx.x%J)*stride; - asm("ldmatrix.sync.aligned.m8n8.x1.b16 {%0}, [%1];" - : "+r"(x[0]) - : "l"(xs)); -#else + __device__ __forceinline__ void load_generic(const T * __restrict__ xs0, const int & stride) { #pragma unroll for (int l = 0; l < ne; ++l) { x[l] = xs0[get_j(l)*stride + get_k(l)]; } -#endif // defined(INT8_MMA_AVAILABLE) + } + + __device__ __forceinline__ void load_ldmatrix(const T * __restrict__ xs0, const int & stride) { +#ifdef NEW_MMA_AVAILABLE + int * xi = (int *) x; + const int * xs = (const int *) xs0 + (threadIdx.x%J)*stride; + asm("ldmatrix.sync.aligned.m8n8.x1.b16 {%0}, [%1];" + : "+r"(xi[0]) : "l"(xs)); +#else + load_generic(xs0, stride); +#endif // NEW_MMA_AVAILABLE } }; -struct mma_int_B_J8K8 { +template +struct mma_B_J8K8 { + static_assert(sizeof(T) == 4, "bad type size"); + static constexpr int J = 8; static constexpr int K = 8; static constexpr int ne = 2; - int x[ne] = {0}; + T x[ne]; static __device__ __forceinline__ int get_j(const int /* l */) { const int ret = threadIdx.x / (K/2); @@ -133,22 +235,31 @@ struct mma_int_B_J8K8 { return ret; } - __device__ __forceinline__ void load(const int * __restrict__ xs0, const int & stride) { -#if defined(INT8_MMA_AVAILABLE) && false // Loading as 4 byte values is faster - const int * xs = xs0 + (threadIdx.x%J)*stride + ((threadIdx.x/J)*(K/2)) % K; - asm("ldmatrix.sync.aligned.m8n8.x2.b16 {%0, %1}, [%2];" - : "+r"(x[0]), "+r"(x[1]) - : "l"(xs)); -#else + __device__ __forceinline__ void load_generic(const T * __restrict__ xs0, const int & stride) { #pragma unroll for (int l = 0; l < ne; ++l) { x[l] = xs0[get_j(l)*stride + get_k(l)]; } -#endif // defined(INT8_MMA_AVAILABLE) + } + + __device__ __forceinline__ void load_ldmatrix(const T * __restrict__ xs0, const int & stride) { +#ifdef NEW_MMA_AVAILABLE + int * xi = (int *) x; + const int * xs = (const int *) xs0 + (threadIdx.x%J)*stride + ((threadIdx.x/J)*(K/2)) % K; + asm("ldmatrix.sync.aligned.m8n8.x2.b16 {%0, %1}, [%2];" + : "+r"(xi[0]), "+r"(xi[1]) + : "l"(xs)); +#else + load_generic(xs0, stride); +#endif // NEW_MMA_AVAILABLE } }; -struct mma_int_C_I16J8 { +template +struct mma_C_I16J8 {}; + +template <> +struct mma_C_I16J8 { static constexpr int I = 16; static constexpr int J = 8; static constexpr int ne = 4; @@ -169,8 +280,8 @@ struct mma_int_C_I16J8 { return ret; } - __device__ __forceinline__ void mma_K4(const mma_int_A_I16K4 & mma_A, const mma_int_B_J8K4 & mma_B) { -#ifdef INT8_MMA_AVAILABLE + __device__ __forceinline__ void mma(const mma_A_I16K4 & mma_A, const mma_B_J8K4 & mma_B) { +#ifdef NEW_MMA_AVAILABLE #if __CUDA_ARCH__ >= GGML_CUDA_CC_AMPERE asm("mma.sync.aligned.m16n8k16.row.col.s32.s8.s8.s32 {%0, %1, %2, %3}, {%4, %5}, {%6}, {%0, %1, %2, %3};" : "+r"(x[0]), "+r"(x[1]), "+r"(x[2]), "+r"(x[3]) @@ -188,11 +299,11 @@ struct mma_int_C_I16J8 { GGML_UNUSED(mma_A); GGML_UNUSED(mma_B); NO_DEVICE_CODE; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } - __device__ __forceinline__ void mma_K8(const mma_int_A_I16K8 & mma_A, const mma_int_B_J8K8 & mma_B) { -#ifdef INT8_MMA_AVAILABLE + __device__ __forceinline__ void mma(const mma_A_I16K8 & mma_A, const mma_B_J8K8 & mma_B) { +#ifdef NEW_MMA_AVAILABLE #if __CUDA_ARCH__ >= GGML_CUDA_CC_AMPERE asm("mma.sync.aligned.m16n8k32.row.col.s32.s8.s8.s32 {%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9}, {%0, %1, %2, %3};" : "+r"(x[0]), "+r"(x[1]), "+r"(x[2]), "+r"(x[3]) @@ -216,6 +327,132 @@ struct mma_int_C_I16J8 { GGML_UNUSED(mma_A); GGML_UNUSED(mma_B); NO_DEVICE_CODE; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE + } +}; + +template <> +struct mma_C_I16J8 { + static constexpr int I = 16; + static constexpr int J = 4; + static constexpr int ne = 2; + + half2 x[ne] = {{0.0f, 0.0f}, {0.0f, 0.0f}}; + + static __device__ __forceinline__ int get_i(const int l) { + const int ret = l * (I/2) + threadIdx.x / J; + GGML_CUDA_ASSUME(ret >= 0); + GGML_CUDA_ASSUME(ret < I); + return ret; + } + + static __device__ __forceinline__ int get_j(const int /* l */) { + const int ret = threadIdx.x % J; + GGML_CUDA_ASSUME(ret >= 0); + GGML_CUDA_ASSUME(ret < J); + return ret; + } + + __device__ __forceinline__ void mma(const mma_A_I16K8 & mma_A, const mma_B_J8K8 & mma_B) { +#ifdef NEW_MMA_AVAILABLE + int * Axi = (int *) mma_A.x; + int * Bxi = (int *) mma_B.x; + int * xi = (int *) x; +#if __CUDA_ARCH__ >= GGML_CUDA_CC_AMPERE + asm("mma.sync.aligned.m16n8k16.row.col.f16.f16.f16.f16 {%0, %1}, {%2, %3, %4, %5}, {%6, %7}, {%0, %1};" + : "+r"(xi[0]), "+r"(xi[1]) + : "r"(Axi[0]), "r"(Axi[1]), "r"(Axi[2]), "r"(Axi[3]), "r"(Bxi[0]), "r"(Bxi[1])); +#else + // On Turing m16n8k16 mma is not available, use 2x m8n8k8 mma instead: + asm("mma.sync.aligned.m16n8k8.row.col.f16.f16.f16.f16 {%0, %1}, {%2, %3}, {%4}, {%0, %1};" + : "+r"(xi[0]), "+r"(xi[1]) + : "r"(Axi[0]), "r"(Axi[1]), "r"(Bxi[0])); + asm("mma.sync.aligned.m16n8k8.row.col.f16.f16.f16.f16 {%0, %1}, {%2, %3}, {%4}, {%0, %1};" + : "+r"(xi[0]), "+r"(xi[1]) + : "r"(Axi[2]), "r"(Axi[3]), "r"(Bxi[1])); +#endif // __CUDA_ARCH__ >= GGML_CUDA_CC_AMPERE +#else + GGML_UNUSED(mma_A); + GGML_UNUSED(mma_B); + NO_DEVICE_CODE; +#endif // NEW_MMA_AVAILABLE + } + + __device__ __forceinline__ mma_B_J8K8 to_mma_B() { + mma_B_J8K8 mma_B; + + int * xi = (int *) x; + int * Bxi = (int *) mma_B.x; + Bxi[0] = ggml_cuda_movmatrix(xi[0]); + Bxi[1] = ggml_cuda_movmatrix(xi[1]); + + return mma_B; + } +}; + +template <> +struct mma_C_I16J8 { + static constexpr int I = 16; + static constexpr int J = 8; + static constexpr int ne = 4; + + float x[ne] = {0.0f, 0.0f, 0.0f, 0.0f}; + + static __device__ __forceinline__ int get_i(const int l) { + const int ret = (l/2) * (I/2) + threadIdx.x / (J/2); + GGML_CUDA_ASSUME(ret >= 0); + GGML_CUDA_ASSUME(ret < I); + return ret; + } + + static __device__ __forceinline__ int get_j(const int l) { + const int ret = 2 * (threadIdx.x % (J/2)) + l%2; + GGML_CUDA_ASSUME(ret >= 0); + GGML_CUDA_ASSUME(ret < J); + return ret; + } + + __device__ __forceinline__ void mma(const mma_A_I16K8 & mma_A, const mma_B_J8K8 & mma_B) { +#ifdef NEW_MMA_AVAILABLE + int * Axi = (int *) mma_A.x; + int * Bxi = (int *) mma_B.x; + int * xi = (int *) x; +#if __CUDA_ARCH__ >= GGML_CUDA_CC_AMPERE + asm("mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 {%0, %1, %2, %3}, {%4, %5, %6, %7}, {%8, %9}, {%0, %1, %2, %3};" + : "+r"(xi[0]), "+r"(xi[1]), "+r"(xi[2]), "+r"(xi[3]) + : "r"(Axi[0]), "r"(Axi[1]), "r"(Axi[2]), "r"(Axi[3]), "r"(Bxi[0]), "r"(Bxi[1])); +#else + // On Turing m16n8k16 mma is not available, use 2x m8n8k8 mma instead: + asm("mma.sync.aligned.m16n8k8.row.col.f32.f16.f16.f32 {%0, %1, %2, %3}, {%4, %5}, {%6}, {%0, %1, %2, %3};" + : "+r"(xi[0]), "+r"(xi[1]), "+r"(xi[2]), "+r"(xi[3]) + : "r"(Axi[0]), "r"(Axi[1]), "r"(Bxi[0])); + asm("mma.sync.aligned.m16n8k8.row.col.f32.f16.f16.f32 {%0, %1, %2, %3}, {%4, %5}, {%6}, {%0, %1, %2, %3};" + : "+r"(xi[0]), "+r"(xi[1]), "+r"(xi[2]), "+r"(xi[3]) + : "r"(Axi[2]), "r"(Axi[3]), "r"(Bxi[1])); +#endif // __CUDA_ARCH__ >= GGML_CUDA_CC_AMPERE +#else + GGML_UNUSED(mma_A); + GGML_UNUSED(mma_B); + NO_DEVICE_CODE; +#endif // NEW_MMA_AVAILABLE + } + + __device__ __forceinline__ mma_B_J8K8 to_mma_B() { + mma_B_J8K8 mma_B; + mma_B.x[0] = make_half2(x[0], x[1]); + mma_B.x[1] = make_half2(x[2], x[3]); + + int * Bxi = (int *) mma_B.x; + Bxi[0] = ggml_cuda_movmatrix(Bxi[0]); + Bxi[1] = ggml_cuda_movmatrix(Bxi[1]); + + return mma_B; + } + + __device__ __forceinline__ void load_generic(const float * __restrict__ xs0, const int & stride) { +#pragma unroll + for (int l = 0; l < ne; ++l) { + x[l] = xs0[get_j(l)*stride + get_i(l)]; + } } }; diff --git a/ggml/src/ggml-cuda/mmq.cu b/ggml/src/ggml-cuda/mmq.cu index 270251df4..83cb78cbd 100644 --- a/ggml/src/ggml-cuda/mmq.cu +++ b/ggml/src/ggml-cuda/mmq.cu @@ -132,7 +132,7 @@ bool ggml_cuda_should_use_mmq(enum ggml_type type, int cc, int64_t ne11) { return false; } - if (int8_mma_available(cc)) { + if (new_mma_available(cc)) { return true; } diff --git a/ggml/src/ggml-cuda/mmq.cuh b/ggml/src/ggml-cuda/mmq.cuh index 3cd508a1d..c05c84778 100644 --- a/ggml/src/ggml-cuda/mmq.cuh +++ b/ggml/src/ggml-cuda/mmq.cuh @@ -87,7 +87,7 @@ struct tile_x_sizes { }; static constexpr int get_mmq_x_max_host(const int cc) { - return int8_mma_available(cc) ? 128 : + return new_mma_available(cc) ? 128 : #ifdef GGML_CUDA_FORCE_MMQ cc >= GGML_CUDA_CC_VOLTA && cc < GGML_CUDA_CC_OFFSET_AMD ? 128 : 64; #else @@ -96,9 +96,9 @@ static constexpr int get_mmq_x_max_host(const int cc) { } static constexpr __device__ int get_mmq_x_max_device() { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE return 128; -#else // INT8_MMA_AVAILABLE +#else // NEW_MMA_AVAILABLE #if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) return 128; @@ -116,7 +116,7 @@ static constexpr __device__ int get_mmq_x_max_device() { #endif // __CUDA_ARCH__ >= GGML_CUDA_CC_VOLTA #endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } static constexpr int get_mmq_y_host(const int cc) { @@ -209,10 +209,10 @@ static constexpr __host__ __device__ int mmq_get_mma_tile_x_k(ggml_type type) { #define MMQ_TILE_Y_K (WARP_SIZE + WARP_SIZE/QI8_1) static int mmq_get_granularity_host(const int mmq_x, const int cc) { - return int8_mma_available(cc) && mmq_x >= 48 ? 16 : 8; + return new_mma_available(cc) && mmq_x >= 48 ? 16 : 8; } -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE static constexpr __device__ int mmq_get_granularity_device(const int mmq_x) { return mmq_x >= 48 ? 16 : 8; } @@ -220,21 +220,21 @@ static constexpr __device__ int mmq_get_granularity_device(const int mmq_x) { static constexpr __device__ int mmq_get_granularity_device(const int /* mmq_x */) { return 8; } -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE // ------------------------------------------------------------ template static __device__ __forceinline__ void load_tiles_q4_0( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + 2*WARP_SIZE); #else constexpr tile_x_sizes txs = mmq_get_dp4a_tile_x_sizes(GGML_TYPE_Q4_0, mmq_y); int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + txs.qs); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kbx = threadIdx.x / QI4_0; const int kqsx = threadIdx.x % QI4_0; @@ -250,12 +250,12 @@ template static __device__ __forceinlin const block_q4_0 * bxi = (const block_q4_0 *) x + kbx0 + i*stride + kbx; const int qs0 = get_int_b2(bxi->qs, kqsx); -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + kbx*(2*QI4_0) + kqsx + 0] = __vsubss4((qs0 >> 0) & 0x0F0F0F0F, 0x08080808); x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + kbx*(2*QI4_0) + kqsx + QI4_0] = __vsubss4((qs0 >> 4) & 0x0F0F0F0F, 0x08080808); #else x_qs[i*(WARP_SIZE + 1) + threadIdx.x] = qs0; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } const int blocks_per_tile_x_row = WARP_SIZE / QI4_0; @@ -271,11 +271,11 @@ template static __device__ __forceinlin const block_q4_0 * bxi = (const block_q4_0 *) x + kbx0 + i*stride + kbxd; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_df[i*MMQ_MMA_TILE_X_K_Q8_0 + kbxd] = bxi->d; #else x_df[i*(WARP_SIZE/QI4_0) + i/QI4_0 + kbxd] = bxi->d; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } @@ -322,14 +322,14 @@ static __device__ __forceinline__ void vec_dot_q4_0_q8_1_dp4a( template static __device__ __forceinline__ void load_tiles_q4_1( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; half2 * x_dm = (half2 *) (x_qs + 2*WARP_SIZE); #else constexpr tile_x_sizes txs = mmq_get_dp4a_tile_x_sizes(GGML_TYPE_Q4_1, mmq_y); int * x_qs = (int *) x_tile; half2 * x_dm = (half2 *) (x_qs + txs.qs); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kbx = threadIdx.x / QI4_1; const int kqsx = threadIdx.x % QI4_1; @@ -345,12 +345,12 @@ template static __device__ __forceinlin const block_q4_1 * bxi = (const block_q4_1 *) x + kbx0 + i*stride + kbx; const int qs0 = get_int_b4(bxi->qs, kqsx); -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q8_1 + kbx*(2*QI4_1) + kqsx + 0] = (qs0 >> 0) & 0x0F0F0F0F; x_qs[i*MMQ_MMA_TILE_X_K_Q8_1 + kbx*(2*QI4_1) + kqsx + QI4_1] = (qs0 >> 4) & 0x0F0F0F0F; #else x_qs[i*(WARP_SIZE + 1) + threadIdx.x] = qs0; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } const int blocks_per_tile_x_row = WARP_SIZE / QI4_1; @@ -366,11 +366,11 @@ template static __device__ __forceinlin const block_q4_1 * bxi = (const block_q4_1 *) x + kbx0 + i*stride + kbxd; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_dm[i*MMQ_MMA_TILE_X_K_Q8_1 + kbxd] = bxi->dm; #else x_dm[i*(WARP_SIZE/QI4_1) + i/QI4_1 + kbxd] = bxi->dm; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } @@ -417,14 +417,14 @@ static __device__ __forceinline__ void vec_dot_q4_1_q8_1_dp4a( template static __device__ __forceinline__ void load_tiles_q5_0( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + WARP_SIZE*2); #else constexpr tile_x_sizes txs = mmq_get_dp4a_tile_x_sizes(GGML_TYPE_Q5_0, mmq_y); int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + txs.qs); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kbx = threadIdx.x / QI5_0; const int kqsx = threadIdx.x % QI5_0; @@ -456,13 +456,13 @@ template static __device__ __forceinlin qs1 |= (qh << 9) & 0x10000000; // 19 -> 28 qs1 = __vsubss4(qs1, 0x10101010); // subtract 16 -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + kbx*(2*QI5_0) + kqsx + 0] = qs0; x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + kbx*(2*QI5_0) + kqsx + QI5_0] = qs1; #else x_qs[i*(2*WARP_SIZE + 1) + kbx*(2*QI5_0) + kqsx + 0] = qs0; x_qs[i*(2*WARP_SIZE + 1) + kbx*(2*QI5_0) + kqsx + QI5_0] = qs1; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } const int blocks_per_tile_x_row = WARP_SIZE / QI5_0; @@ -478,25 +478,25 @@ template static __device__ __forceinlin const block_q5_0 * bxi = (const block_q5_0 *) x + kbx0 + i*stride + kbxd; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_df[i*MMQ_MMA_TILE_X_K_Q8_0 + kbxd] = bxi->d; #else x_df[i*(WARP_SIZE/QI5_0) + i/QI5_0 + kbxd] = bxi->d; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } template static __device__ __forceinline__ void load_tiles_q5_1( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; half2 * x_dm = (half2 *) (x_qs + 2*WARP_SIZE); #else constexpr tile_x_sizes txs = mmq_get_dp4a_tile_x_sizes(GGML_TYPE_Q5_1, mmq_y); int * x_qs = (int *) x_tile; half2 * x_dm = (half2 *) (x_qs + txs.qs); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kbx = threadIdx.x / QI5_1; const int kqsx = threadIdx.x % QI5_1; @@ -526,13 +526,13 @@ template static __device__ __forceinlin qs1 |= (qh << 2) & 0x00100000; // 18 -> 20 qs1 |= (qh << 9) & 0x10000000; // 19 -> 28 -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q8_1 + kbx*(2*QI5_1) + kqsx + 0] = qs0; x_qs[i*MMQ_MMA_TILE_X_K_Q8_1 + kbx*(2*QI5_1) + kqsx + QI5_1] = qs1; #else x_qs[i*(2*WARP_SIZE + 1) + kbx*(2*QI5_1) + kqsx + 0] = qs0; x_qs[i*(2*WARP_SIZE + 1) + kbx*(2*QI5_1) + kqsx + QI5_1] = qs1; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } const int blocks_per_tile_x_row = WARP_SIZE / QI5_1; @@ -548,25 +548,25 @@ template static __device__ __forceinlin const block_q5_1 * bxi = (const block_q5_1 *) x + kbx0 + i*stride + kbxd; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_dm[i*MMQ_MMA_TILE_X_K_Q8_1 + kbxd] = bxi->dm; #else x_dm[i*(WARP_SIZE/QI5_1) + i/QI5_1 + kbxd] = bxi->dm; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } template static __device__ __forceinline__ void load_tiles_q8_0( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; float * x_df = (float *) (x_tile + 2*WARP_SIZE); #else constexpr tile_x_sizes txs = mmq_get_dp4a_tile_x_sizes(GGML_TYPE_Q8_0, mmq_y); int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + txs.qs); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kbx = threadIdx.x / QI8_0; const int kqsx = threadIdx.x % QI8_0; @@ -581,13 +581,13 @@ template static __device__ __forceinlin const block_q8_0 * bxi = (const block_q8_0 *) x + kbx0 + i*stride + kbx; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + 0 + threadIdx.x] = get_int_b2(bxi[0].qs, kqsx); x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + WARP_SIZE + threadIdx.x] = get_int_b2(bxi[WARP_SIZE/QI8_0].qs, kqsx); #else x_qs[i*(2*WARP_SIZE + 1) + 0 + threadIdx.x] = get_int_b2(bxi[0].qs, kqsx); x_qs[i*(2*WARP_SIZE + 1) + WARP_SIZE + threadIdx.x] = get_int_b2(bxi[WARP_SIZE/QI8_0].qs, kqsx); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } const int blocks_per_tile_x_row = 2*WARP_SIZE / QI8_0; @@ -603,11 +603,11 @@ template static __device__ __forceinlin const block_q8_0 * bxi = (const block_q8_0 *) x + kbx0 + i*stride + kbxd; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_df[i*MMQ_MMA_TILE_X_K_Q8_0 + kbxd] = bxi->d; #else x_df[i*(2*WARP_SIZE/QI8_0) + i/(QI8_0/2) + kbxd] = bxi->d; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } @@ -645,9 +645,9 @@ template static __device__ __forceinline__ void vec_dot_q8_0_q8_1_mma( const int * __restrict__ x, const int * __restrict__ y, float * __restrict__ sum, const int & k00) { - typedef mma_int_A_I16K8 mma_A; - typedef mma_int_B_J8K8 mma_B; - typedef mma_int_C_I16J8 mma_C; + typedef mma_A_I16K8 mma_A; + typedef mma_B_J8K8 mma_B; + typedef mma_C_I16J8 mma_C; constexpr int granularity = mmq_get_granularity_device(mmq_x); constexpr int rows_per_warp = 2 * granularity; @@ -672,7 +672,7 @@ static __device__ __forceinline__ void vec_dot_q8_0_q8_1_mma( for (int k01 = 0; k01 < WARP_SIZE; k01 += QI8_0) { const int k0 = k00 + k01; - A[n][k01/QI8_0].load(x_qs + (i0 + n*mma_A::I)*MMQ_MMA_TILE_X_K_Q8_0 + k0, MMQ_MMA_TILE_X_K_Q8_0); + A[n][k01/QI8_0].load_ldmatrix(x_qs + (i0 + n*mma_A::I)*MMQ_MMA_TILE_X_K_Q8_0 + k0, MMQ_MMA_TILE_X_K_Q8_0); } #pragma unroll @@ -695,7 +695,7 @@ static __device__ __forceinline__ void vec_dot_q8_0_q8_1_mma( mma_B B; float dB[mma_C::ne/2]; - B.load(y_qs + j0*MMQ_TILE_Y_K + k01, MMQ_TILE_Y_K); + B.load_generic(y_qs + j0*MMQ_TILE_Y_K + k01, MMQ_TILE_Y_K); // faster than load_ldmatrix #pragma unroll for (int l = 0; l < mma_C::ne/2; ++l) { @@ -711,7 +711,7 @@ static __device__ __forceinline__ void vec_dot_q8_0_q8_1_mma( #pragma unroll for (int n = 0; n < ntx; ++n) { mma_C C; - C.mma_K8(A[n][k01/QI8_0], B); + C.mma(A[n][k01/QI8_0], B); #pragma unroll for (int l = 0; l < mma_C::ne; ++l) { @@ -756,9 +756,9 @@ template static __device__ __forceinline__ void vec_dot_q8_1_q8_1_mma( const int * __restrict__ x, const int * __restrict__ y, float * __restrict__ sum, const int & k00) { - typedef mma_int_A_I16K8 mma_A; - typedef mma_int_B_J8K8 mma_B; - typedef mma_int_C_I16J8 mma_C; + typedef mma_A_I16K8 mma_A; + typedef mma_B_J8K8 mma_B; + typedef mma_C_I16J8 mma_C; constexpr int granularity = mmq_get_granularity_device(mmq_x); constexpr int rows_per_warp = 2 * granularity; @@ -782,7 +782,7 @@ static __device__ __forceinline__ void vec_dot_q8_1_q8_1_mma( for (int k01 = 0; k01 < WARP_SIZE; k01 += QI8_1) { const int k0 = k00 + k01; - A[n][k01/QI8_1].load(x_qs + (i0 + n*mma_A::I)*MMQ_MMA_TILE_X_K_Q8_1 + k0, MMQ_MMA_TILE_X_K_Q8_1); + A[n][k01/QI8_1].load_ldmatrix(x_qs + (i0 + n*mma_A::I)*MMQ_MMA_TILE_X_K_Q8_1 + k0, MMQ_MMA_TILE_X_K_Q8_1); } #pragma unroll @@ -805,7 +805,7 @@ static __device__ __forceinline__ void vec_dot_q8_1_q8_1_mma( mma_B B; float2 dsB[mma_C::ne/2]; - B.load(y_qs + j0*MMQ_TILE_Y_K + k01, MMQ_TILE_Y_K); + B.load_generic(y_qs + j0*MMQ_TILE_Y_K + k01, MMQ_TILE_Y_K); // faster than load_ldmatrix #pragma unroll for (int l = 0; l < mma_C::ne/2; ++l) { @@ -817,7 +817,7 @@ static __device__ __forceinline__ void vec_dot_q8_1_q8_1_mma( #pragma unroll for (int n = 0; n < ntx; ++n) { mma_C C; - C.mma_K8(A[n][k01/QI8_1], B); + C.mma(A[n][k01/QI8_1], B); #pragma unroll for (int l = 0; l < mma_C::ne; ++l) { @@ -864,12 +864,12 @@ static __device__ __forceinline__ void vec_dot_q8_0_16_q8_1_dp4a( template static __device__ __forceinline__ void vec_dot_q8_0_16_q8_1_mma( const int * __restrict__ x, const int * __restrict__ y, float * __restrict__ sum, const int & k00) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE - typedef mma_int_A_I16K4 mma_A; - typedef mma_int_A_I16K8 mma_A_K8; - typedef mma_int_B_J8K4 mma_B; - typedef mma_int_C_I16J8 mma_C; + typedef mma_A_I16K4 mma_A; + typedef mma_A_I16K8 mma_A_K8; + typedef mma_B_J8K4 mma_B; + typedef mma_C_I16J8 mma_C; constexpr int granularity = mmq_get_granularity_device(mmq_x); constexpr int rows_per_warp = 2 * granularity; @@ -893,7 +893,7 @@ static __device__ __forceinline__ void vec_dot_q8_0_16_q8_1_mma( for (int k01 = 0; k01 < WARP_SIZE; k01 += 8) { const int k0 = k00 + k01; - ((mma_A_K8 *) A[n])[k01/8].load(x_qs + (i0 + n*mma_A::I)*MMQ_MMA_TILE_X_K_Q3_K + k0, MMQ_MMA_TILE_X_K_Q3_K); + ((mma_A_K8 *) A[n])[k01/8].load_ldmatrix(x_qs + (i0 + n*mma_A::I)*MMQ_MMA_TILE_X_K_Q3_K + k0, MMQ_MMA_TILE_X_K_Q3_K); } #pragma unroll @@ -916,8 +916,9 @@ static __device__ __forceinline__ void vec_dot_q8_0_16_q8_1_mma( mma_B B[2]; float dB[mma_C::ne/2]; - B[0].load(y_qs + j0*MMQ_TILE_Y_K + (k01 + 0), MMQ_TILE_Y_K); - B[1].load(y_qs + j0*MMQ_TILE_Y_K + (k01 + mma_B::K), MMQ_TILE_Y_K); + // Here load_generic is faster than load_ldmatrix. + B[0].load_generic(y_qs + j0*MMQ_TILE_Y_K + (k01 + 0), MMQ_TILE_Y_K); + B[1].load_generic(y_qs + j0*MMQ_TILE_Y_K + (k01 + mma_B::K), MMQ_TILE_Y_K); #pragma unroll for (int l = 0; l < mma_C::ne/2; ++l) { @@ -929,8 +930,8 @@ static __device__ __forceinline__ void vec_dot_q8_0_16_q8_1_mma( #pragma unroll for (int n = 0; n < ntx; ++n) { mma_C C[2]; - C[0].mma_K4(A[n][k01/4 + 0], B[0]); - C[1].mma_K4(A[n][k01/4 + 1], B[1]); + C[0].mma(A[n][k01/4 + 0], B[0]); + C[1].mma(A[n][k01/4 + 1], B[1]); #pragma unroll for (int l = 0; l < mma_C::ne; ++l) { @@ -942,20 +943,20 @@ static __device__ __forceinline__ void vec_dot_q8_0_16_q8_1_mma( #else GGML_UNUSED(x); GGML_UNUSED(y); GGML_UNUSED(sum); NO_DEVICE_CODE; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } template static __device__ __forceinline__ void load_tiles_q2_K( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; half2 * x_dm = (half2 *) (x_qs + 2*WARP_SIZE); #else constexpr tile_x_sizes txs = mmq_get_dp4a_tile_x_sizes(GGML_TYPE_Q2_K, mmq_y); int * x_qs = (int *) x_tile; half2 * x_dm = (half2 *) (x_qs + txs.qs); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kqsx = threadIdx.x % QI2_K; @@ -977,11 +978,11 @@ template static __device__ __forceinlin const int x_qs_k = (x_ql_0 >> (2*l)) & 0x03030303; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q2_K + k] = x_qs_k; #else x_qs[i*(2*WARP_SIZE + 1) + k] = x_qs_k; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } const int sc_m = bxi->scales[kqsx]; @@ -992,11 +993,11 @@ template static __device__ __forceinlin const half2 x_dm_ik = make_half2(bxi_dmf.x*(sc_m & 0x0F), bxi_dmf.y*(sc_m >> 4)); #endif // FAST_FP16_AVAILABLE -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_dm[i*MMQ_MMA_TILE_X_K_Q2_K + kqsx] = x_dm_ik; #else x_dm[i*(WARP_SIZE + 1) + kqsx] = x_dm_ik; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } @@ -1051,12 +1052,12 @@ static __device__ __forceinline__ void vec_dot_q2_K_q8_1_dp4a( template static __device__ __forceinline__ void vec_dot_q2_K_q8_1_mma( const int * __restrict__ x, const int * __restrict__ y, float * __restrict__ sum, const int & k00) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE - typedef mma_int_A_I16K4 mma_A; - typedef mma_int_A_I16K8 mma_A_K8; - typedef mma_int_B_J8K4 mma_B; - typedef mma_int_C_I16J8 mma_C; + typedef mma_A_I16K4 mma_A; + typedef mma_A_I16K8 mma_A_K8; + typedef mma_B_J8K4 mma_B; + typedef mma_C_I16J8 mma_C; constexpr int granularity = mmq_get_granularity_device(mmq_x); constexpr int rows_per_warp = 2 * granularity; @@ -1081,7 +1082,7 @@ static __device__ __forceinline__ void vec_dot_q2_K_q8_1_mma( for (int k01 = 0; k01 < WARP_SIZE; k01 += QI8_1) { const int k0 = k00 + k01; - ((mma_A_K8 *) A[n])[k01/QI8_1].load(x_qs + (i0 + n*mma_A::I)*MMQ_MMA_TILE_X_K_Q2_K + k0, MMQ_MMA_TILE_X_K_Q2_K); + ((mma_A_K8 *) A[n])[k01/QI8_1].load_ldmatrix(x_qs + (i0 + n*mma_A::I)*MMQ_MMA_TILE_X_K_Q2_K + k0, MMQ_MMA_TILE_X_K_Q2_K); } } @@ -1118,24 +1119,25 @@ static __device__ __forceinline__ void vec_dot_q2_K_q8_1_mma( for (int k01 = 0; k01 < WARP_SIZE; k01 += QI8_1) { mma_B B[2]; - B[0].load(y_qs + j0*MMQ_TILE_Y_K + (k01 + 0), MMQ_TILE_Y_K); - B[1].load(y_qs + j0*MMQ_TILE_Y_K + (k01 + mma_B::K), MMQ_TILE_Y_K); + // Here load_generic is faster than load_ldmatrix. + B[0].load_generic(y_qs + j0*MMQ_TILE_Y_K + (k01 + 0), MMQ_TILE_Y_K); + B[1].load_generic(y_qs + j0*MMQ_TILE_Y_K + (k01 + mma_B::K), MMQ_TILE_Y_K); mma_C Cm[2]; if (k01 >= WARP_SIZE * 3/4) { mma_A A1; A1.x[0] = 0x01010101; A1.x[1] = 0x01010101; - Cm[0].mma_K4(A1, B[0]); - Cm[1].mma_K4(A1, B[1]); + Cm[0].mma(A1, B[0]); + Cm[1].mma(A1, B[1]); } #pragma unroll for (int n = 0; n < ntx; ++n) { mma_C Cd[2]; - Cd[0].mma_K4(A[n][k01/4 + 0], B[0]); - Cd[1].mma_K4(A[n][k01/4 + 1], B[1]); + Cd[0].mma(A[n][k01/4 + 0], B[0]); + Cd[1].mma(A[n][k01/4 + 1], B[1]); #pragma unroll for (int l = 0; l < mma_C::ne; ++l) { @@ -1172,13 +1174,13 @@ static __device__ __forceinline__ void vec_dot_q2_K_q8_1_mma( #else GGML_UNUSED(x); GGML_UNUSED(y); GGML_UNUSED(sum); NO_DEVICE_CODE; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } template static __device__ __forceinline__ void load_tiles_q3_K( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + WARP_SIZE*2); #else @@ -1186,7 +1188,7 @@ template static __device__ __forceinlin int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + txs.qs); int * x_sc = (int *) (x_df + txs.dm); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kqsx = threadIdx.x % QI3_K; @@ -1212,11 +1214,11 @@ template static __device__ __forceinlin const int x_qs_k = __vsubss4(x_ql_k | x_qh_k, 0x04040404); -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q3_K + k] = x_qs_k; #else x_qs[i*(2*WARP_SIZE + 1) + k] = x_qs_k; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } @@ -1242,7 +1244,7 @@ template static __device__ __forceinlin const int sc = __vsubss4(sc_low | sc_high, 0x20202020); -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE const int8_t * sc8 = (const int8_t *) ≻ const float d = bxi->d; @@ -1252,10 +1254,10 @@ template static __device__ __forceinlin } #else x_sc[i*(WARP_SIZE/8) + i/8 + threadIdx.x % (WARP_SIZE/8)] = sc; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } -#ifndef INT8_MMA_AVAILABLE +#ifndef NEW_MMA_AVAILABLE #pragma unroll for (int i0 = 0; i0 < mmq_y; i0 += nwarps*WARP_SIZE) { int i = (i0 + threadIdx.y*WARP_SIZE + threadIdx.x) % mmq_y; @@ -1268,7 +1270,7 @@ template static __device__ __forceinlin x_df[i] = bxi->d; } -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } template @@ -1317,7 +1319,7 @@ static __device__ __forceinline__ int unpack_scales_q45_K(const int * scales, co template static __device__ __forceinline__ void load_tiles_q4_K( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; half2 * x_dm = (half2 *) (x_qs + 2*WARP_SIZE); #else @@ -1325,7 +1327,7 @@ template static __device__ __forceinlin int * x_qs = (int *) x_tile; half2 * x_dm = (half2 *) (x_qs + txs.qs); int * x_sc = (int *) (x_dm + txs.dm); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE #pragma unroll for (int i0 = 0; i0 < mmq_y; i0 += nwarps) { @@ -1338,15 +1340,15 @@ template static __device__ __forceinlin const block_q4_K * bxi = (const block_q4_K *) x + kbx0 + i*stride; const int qs0 = get_int_b4(bxi->qs, threadIdx.x); -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q8_1 + 16*(threadIdx.x/8) + threadIdx.x % 8 + 0] = (qs0 >> 0) & 0x0F0F0F0F; x_qs[i*MMQ_MMA_TILE_X_K_Q8_1 + 16*(threadIdx.x/8) + threadIdx.x % 8 + 8] = (qs0 >> 4) & 0x0F0F0F0F; #else x_qs[i*(WARP_SIZE + 1) + threadIdx.x] = qs0; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE #pragma unroll for (int i0 = 0; i0 < mmq_y; i0 += nwarps*16) { @@ -1407,7 +1409,7 @@ template static __device__ __forceinlin x_sc[i*(WARP_SIZE/8) + i/8 + ksc] = scales8; } -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } template @@ -1446,7 +1448,7 @@ static __device__ __forceinline__ void vec_dot_q4_K_q8_1_dp4a( template static __device__ __forceinline__ void load_tiles_q5_K( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; half2 * x_dm = (half2 *) (x_qs + WARP_SIZE*2); #else @@ -1454,7 +1456,7 @@ template static __device__ __forceinlin int * x_qs = (int *) x_tile; half2 * x_dm = (half2 *) (x_qs + txs.qs); int * x_sc = (int *) (x_dm + txs.dm); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE #pragma unroll for (int i0 = 0; i0 < mmq_y; i0 += nwarps) { @@ -1478,16 +1480,16 @@ template static __device__ __forceinlin const int kq0 = ky - ky % (QI5_K/2) + threadIdx.x % (QI5_K/4) + 0; const int kq1 = ky - ky % (QI5_K/2) + threadIdx.x % (QI5_K/4) + QI5_K/4; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q8_1 + kq0] = ql0 | qh0; x_qs[i*MMQ_MMA_TILE_X_K_Q8_1 + kq1] = ql1 | qh1; #else x_qs[i*(2*WARP_SIZE + 1) + kq0] = ql0 | qh0; x_qs[i*(2*WARP_SIZE + 1) + kq1] = ql1 | qh1; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE #pragma unroll for (int i0 = 0; i0 < mmq_y; i0 += nwarps*16) { @@ -1548,7 +1550,7 @@ template static __device__ __forceinlin x_sc[i*(WARP_SIZE/8) + i/8 + ksc] = scales8; } -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } template @@ -1587,7 +1589,7 @@ static __device__ __forceinline__ void vec_dot_q5_K_q8_1_dp4a( template static __device__ __forceinline__ void load_tiles_q6_K( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + WARP_SIZE*2); int * x_sc = (int *) (x_df + WARP_SIZE/QI6_K); @@ -1596,7 +1598,7 @@ template static __device__ __forceinlin int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + txs.qs); int * x_sc = (int *) (x_df + txs.dm); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE #pragma unroll for (int i0 = 0; i0 < mmq_y; i0 += nwarps) { @@ -1619,13 +1621,13 @@ template static __device__ __forceinlin const int kq0 = 2*threadIdx.x - threadIdx.x % (QI6_K/2) + 0; const int kq1 = 2*threadIdx.x - threadIdx.x % (QI6_K/2) + QI6_K/2; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q6_K + kq0] = __vsubss4(ql0 | qh0, 0x20202020); x_qs[i*MMQ_MMA_TILE_X_K_Q6_K + kq1] = __vsubss4(ql1 | qh1, 0x20202020); #else x_qs[i*(2*WARP_SIZE + 1) + kq0] = __vsubss4(ql0 | qh0, 0x20202020); x_qs[i*(2*WARP_SIZE + 1) + kq1] = __vsubss4(ql1 | qh1, 0x20202020); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } const int blocks_per_tile_x_row = WARP_SIZE / QI6_K; // == 1 if QK_K == 256 @@ -1641,11 +1643,11 @@ template static __device__ __forceinlin const block_q6_K * bxi = (const block_q6_K *) x + kbx0 + i*stride + kbxd; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_df[i*MMQ_MMA_TILE_X_K_Q6_K + kbxd] = bxi->d; #else x_df[i*(WARP_SIZE/QI6_K) + i/QI6_K + kbxd] = bxi->d; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } #pragma unroll @@ -1658,11 +1660,11 @@ template static __device__ __forceinlin const block_q6_K * bxi = (const block_q6_K *) x + kbx0 + i*stride + (threadIdx.x % (WARP_SIZE/8)) / 4; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_sc[i*MMQ_MMA_TILE_X_K_Q6_K + threadIdx.x % (WARP_SIZE/8)] = get_int_b2(bxi->scales, threadIdx.x % (QI6_K/8)); #else x_sc[i*(WARP_SIZE/8) + i/8 + threadIdx.x % (WARP_SIZE/8)] = get_int_b2(bxi->scales, threadIdx.x % (QI6_K/8)); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } @@ -1702,11 +1704,11 @@ static __device__ __forceinline__ void vec_dot_q6_K_q8_1_dp4a( template static __device__ __forceinline__ void vec_dot_q6_K_q8_1_mma( const int * __restrict__ x, const int * __restrict__ y, float * __restrict__ sum, const int & k00) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE - typedef mma_int_A_I16K4 mma_A; - typedef mma_int_B_J8K4 mma_B; - typedef mma_int_C_I16J8 mma_C; + typedef mma_A_I16K4 mma_A; + typedef mma_B_J8K4 mma_B; + typedef mma_C_I16J8 mma_C; constexpr int granularity = mmq_get_granularity_device(mmq_x); constexpr int rows_per_warp = 2 * granularity; @@ -1732,8 +1734,8 @@ static __device__ __forceinline__ void vec_dot_q6_K_q8_1_mma( for (int k01 = 0; k01 < WARP_SIZE; k01 += 8) { const int k0 = k00 + k01; - A[n][k01/4 + 0].load(x_qs + (i0 + n*mma_A::I)*MMQ_MMA_TILE_X_K_Q6_K + (k0 + 0), MMQ_MMA_TILE_X_K_Q6_K); - A[n][k01/4 + 1].load(x_qs + (i0 + n*mma_A::I)*MMQ_MMA_TILE_X_K_Q6_K + (k0 + mma_A::K), MMQ_MMA_TILE_X_K_Q6_K); + A[n][k01/4 + 0].load_ldmatrix(x_qs + (i0 + n*mma_A::I)*MMQ_MMA_TILE_X_K_Q6_K + (k0 + 0), MMQ_MMA_TILE_X_K_Q6_K); + A[n][k01/4 + 1].load_ldmatrix(x_qs + (i0 + n*mma_A::I)*MMQ_MMA_TILE_X_K_Q6_K + (k0 + mma_A::K), MMQ_MMA_TILE_X_K_Q6_K); } #pragma unroll @@ -1771,8 +1773,9 @@ static __device__ __forceinline__ void vec_dot_q6_K_q8_1_mma( mma_B B[2]; float dB[mma_C::ne/2]; - B[0].load(y_qs + j0*MMQ_TILE_Y_K + 0 + k01, MMQ_TILE_Y_K); - B[1].load(y_qs + j0*MMQ_TILE_Y_K + mma_B::K + k01, MMQ_TILE_Y_K); + // Here load_generic is faster than load_ldmatrix. + B[0].load_generic(y_qs + j0*MMQ_TILE_Y_K + 0 + k01, MMQ_TILE_Y_K); + B[1].load_generic(y_qs + j0*MMQ_TILE_Y_K + mma_B::K + k01, MMQ_TILE_Y_K); #pragma unroll for (int l = 0; l < mma_C::ne/2; ++l) { @@ -1784,8 +1787,8 @@ static __device__ __forceinline__ void vec_dot_q6_K_q8_1_mma( #pragma unroll for (int n = 0; n < ntx; ++n) { mma_C C[2]; - C[0].mma_K4(A[n][k01/4 + 0], B[0]); - C[1].mma_K4(A[n][k01/4 + 1], B[1]); + C[0].mma(A[n][k01/4 + 0], B[0]); + C[1].mma(A[n][k01/4 + 1], B[1]); #pragma unroll for (int l = 0; l < mma_C::ne; ++l) { @@ -1805,20 +1808,20 @@ static __device__ __forceinline__ void vec_dot_q6_K_q8_1_mma( #else GGML_UNUSED(x); GGML_UNUSED(y); GGML_UNUSED(sum); NO_DEVICE_CODE; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } template static __device__ __forceinline__ void load_tiles_iq4_nl( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + WARP_SIZE*2); #else constexpr tile_x_sizes txs = mmq_get_dp4a_tile_x_sizes(GGML_TYPE_IQ4_NL, mmq_y); int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + txs.qs); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kbx = threadIdx.x / QI4_NL; const int kqsx = threadIdx.x % QI4_NL; @@ -1836,13 +1839,13 @@ template static __device__ __forceinlin const int aux_q4 = get_int_b2(bxi->qs, kqsx); const int2 v = get_int_from_table_16(aux_q4); const int k0 = 8 * (threadIdx.x / 4) + threadIdx.x % 4; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + k0 + 0] = v.x; x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + k0 + 4] = v.y; #else x_qs[i*(2*WARP_SIZE + 1) + k0 + 0] = v.x; x_qs[i*(2*WARP_SIZE + 1) + k0 + 4] = v.y; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } const int blocks_per_tile_x_row = WARP_SIZE / QI4_NL; @@ -1858,25 +1861,25 @@ template static __device__ __forceinlin const block_iq4_nl * bxi = (const block_iq4_nl *) x + kbx0 + i*stride + kbxd; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_df[i*MMQ_MMA_TILE_X_K_Q8_0 + kbxd] = __half2float(bxi->d); #else x_df[i*(WARP_SIZE/4) + i/4 + kbxd] = __half2float(bxi->d); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } template static __device__ __forceinline__ void load_tiles_iq2_xxs( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + WARP_SIZE*2); #else constexpr tile_x_sizes txs = mmq_get_dp4a_tile_x_sizes(GGML_TYPE_IQ2_XXS, mmq_y); int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + txs.qs); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kqsx = threadIdx.x % (QI2_XXS/2); @@ -1905,36 +1908,36 @@ template static __device__ __forceinlin const int signs1 = __vcmpne4(((signs_packed & 0x30) << 3) | ((signs_packed & 0xC0) << 17), 0x00000000); const int grid1 = __vsub4(grid_pos[1] ^ signs1, signs1); -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + 8*kqsx + (2*l + 0)] = grid0; x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + 8*kqsx + (2*l + 1)] = grid1; #else x_qs[i*(2*WARP_SIZE + 1) + 8*kqsx + (2*l + 0)] = grid0; x_qs[i*(2*WARP_SIZE + 1) + 8*kqsx + (2*l + 1)] = grid1; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } const int ls = aux32 >> 28; const float d = bxi->d; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_df[i*MMQ_MMA_TILE_X_K_Q8_0 + kqsx] = (ls*d + d/2)/4; #else x_df[i*(WARP_SIZE/4) + i/4 + kqsx] = (ls*d + d/2)/4; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } template static __device__ __forceinline__ void load_tiles_iq2_xs( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + WARP_SIZE*2); #else constexpr tile_x_sizes txs = MMQ_DP4A_TXS_Q8_0_16; int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + txs.qs); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kqsx = threadIdx.x % (QI2_XS/2); @@ -1959,38 +1962,38 @@ template static __device__ __forceinlin const int grid_l = __vsub4(grid_pos[0] ^ signs[0], signs[0]); const int grid_h = __vsub4(grid_pos[1] ^ signs[1], signs[1]); -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q3_K + 8*kqsx + (2*l + 0)] = grid_l; x_qs[i*MMQ_MMA_TILE_X_K_Q3_K + 8*kqsx + (2*l + 1)] = grid_h; #else x_qs[i*(2*WARP_SIZE + 1) + 8*kqsx + (2*l + 0)] = grid_l; x_qs[i*(2*WARP_SIZE + 1) + 8*kqsx + (2*l + 1)] = grid_h; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } const int ls = bxi->scales[kqsx]; const float d = bxi->d; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_df[i*MMQ_MMA_TILE_X_K_Q3_K + 2*kqsx+0] = ((ls & 0x0F)*d + d/2)/4; x_df[i*MMQ_MMA_TILE_X_K_Q3_K + 2*kqsx+1] = ((ls >> 4)*d + d/2)/4; #else x_df[i*(2*WARP_SIZE*2/QI8_0) + i/(QI8_0/4) + 2*kqsx+0] = ((ls & 0x0F)*d + d/2)/4; x_df[i*(2*WARP_SIZE*2/QI8_0) + i/(QI8_0/4) + 2*kqsx+1] = ((ls >> 4)*d + d/2)/4; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } template static __device__ __forceinline__ void load_tiles_iq2_s( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + WARP_SIZE*2); #else constexpr tile_x_sizes txs = mmq_get_dp4a_tile_x_sizes(GGML_TYPE_IQ2_S, mmq_y); int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + txs.qs); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kqsx = threadIdx.x % (QI2_S/2); @@ -2022,38 +2025,38 @@ template static __device__ __forceinlin const int grid_l = __vsub4(grid_pos[0] ^ signs0, signs0); const int grid_h = __vsub4(grid_pos[1] ^ signs1, signs1); -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q3_K + 8*kqsx + (2*l + 0)] = grid_l; x_qs[i*MMQ_MMA_TILE_X_K_Q3_K + 8*kqsx + (2*l + 1)] = grid_h; #else x_qs[i*(2*WARP_SIZE + 1) + 8*kqsx + (2*l + 0)] = grid_l; x_qs[i*(2*WARP_SIZE + 1) + 8*kqsx + (2*l + 1)] = grid_h; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } const int ls = bxi->scales[kqsx]; const float d = bxi->d; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_df[i*MMQ_MMA_TILE_X_K_Q3_K + 2*kqsx+0] = ((ls & 0x0F)*d + d/2)/4; x_df[i*MMQ_MMA_TILE_X_K_Q3_K + 2*kqsx+1] = ((ls >> 4)*d + d/2)/4; #else x_df[i*(2*WARP_SIZE*2/QI8_0) + i/(QI8_0/4) + 2*kqsx+0] = ((ls & 0x0F)*d + d/2)/4; x_df[i*(2*WARP_SIZE*2/QI8_0) + i/(QI8_0/4) + 2*kqsx+1] = ((ls >> 4)*d + d/2)/4; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } template static __device__ __forceinline__ void load_tiles_iq3_xxs( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + WARP_SIZE*2); #else constexpr tile_x_sizes txs = mmq_get_dp4a_tile_x_sizes(GGML_TYPE_IQ3_XXS, mmq_y); int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + txs.qs); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kqsx = threadIdx.x % (QI3_XXS/2); @@ -2080,36 +2083,36 @@ template static __device__ __forceinlin const int grid_l = __vsub4(grid_pos.x ^ signs[0], signs[0]); const int grid_h = __vsub4(grid_pos.y ^ signs[1], signs[1]); -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + 8*kqsx + (2*l + 0)] = grid_l; x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + 8*kqsx + (2*l + 1)] = grid_h; #else x_qs[i*(2*WARP_SIZE + 1) + 8*kqsx + (2*l + 0)] = grid_l; x_qs[i*(2*WARP_SIZE + 1) + 8*kqsx + (2*l + 1)] = grid_h; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } const int ls = aux32 >> 28; const float d = bxi->d; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_df[i*MMQ_MMA_TILE_X_K_Q8_0 + kqsx] = (ls*d + d/2)/2; #else x_df[i*(WARP_SIZE/4) + i/4 + kqsx] = (ls*d + d/2)/2; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } template static __device__ __forceinline__ void load_tiles_iq3_s( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + WARP_SIZE*2); #else constexpr tile_x_sizes txs = mmq_get_dp4a_tile_x_sizes(GGML_TYPE_IQ3_S, mmq_y); int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + txs.qs); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kqsx = threadIdx.x % (QI3_S/2); @@ -2143,36 +2146,36 @@ template static __device__ __forceinlin const int grid_l = __vsub4(grid_pos.x ^ signs0, signs0); const int grid_h = __vsub4(grid_pos.y ^ signs1, signs1); -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + 8*kqsx + (2*l+0)] = grid_l; x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + 8*kqsx + (2*l+1)] = grid_h; #else x_qs[i*(2*WARP_SIZE + 1) + 8*kqsx + (2*l+0)] = grid_l; x_qs[i*(2*WARP_SIZE + 1) + 8*kqsx + (2*l+1)] = grid_h; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } const int ls = 1 + 2*((bxi->scales[kqsx/2] >> (((2*kqsx) << 1) & 0x04)) & 0x0F); const float d = bxi->d; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_df[i*MMQ_MMA_TILE_X_K_Q8_0 + kqsx] = ls*d; #else x_df[i*(WARP_SIZE/4) + i/4 + kqsx] = ls*d; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } template static __device__ __forceinline__ void load_tiles_iq1_s( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; half2 * x_ds = (half2 *) (x_qs + WARP_SIZE*2); #else constexpr tile_x_sizes txs = mmq_get_dp4a_tile_x_sizes(GGML_TYPE_IQ3_S, mmq_y); int * x_qs = (int *) x_tile; half2 * x_ds = (half2 *) (x_qs + txs.qs); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kqsx = threadIdx.x % QI1_S; @@ -2198,37 +2201,37 @@ template static __device__ __forceinlin const int grid0 = (grid >> 0) & 0x0F0F0F0F; const int grid1 = (grid >> 4) & 0x0F0F0F0F; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q8_1 + 8*kqsx + (2*l+0)] = grid0; x_qs[i*MMQ_MMA_TILE_X_K_Q8_1 + 8*kqsx + (2*l+1)] = grid1; #else x_qs[i*(2*WARP_SIZE + 1) + 8*kqsx + (2*l+0)] = grid0; x_qs[i*(2*WARP_SIZE + 1) + 8*kqsx + (2*l+1)] = grid1; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } const float d1q = __half2float(bxi->d) * (((qh >> 11) & 0x0E) + 1); const float delta = -1.0f + IQ1S_DELTA - (qh & 0x8000) * (2.0f*IQ1S_DELTA/0x8000); -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_ds[i*MMQ_MMA_TILE_X_K_Q8_1 + kqsx] = make_half2(d1q, d1q*delta); #else x_ds[i*(WARP_SIZE/4) + i/4 + kqsx] = make_half2(d1q, d1q*delta); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } template static __device__ __forceinline__ void load_tiles_iq4_xs( const char * __restrict__ x, int * __restrict__ x_tile, const int & kbx0, const int & i_max, const int & stride) { -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + WARP_SIZE*2); #else constexpr tile_x_sizes txs = mmq_get_dp4a_tile_x_sizes(GGML_TYPE_IQ4_XS, mmq_y); int * x_qs = (int *) x_tile; float * x_df = (float *) (x_qs + txs.qs); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE const int kbx = 0; // threadIdx.x / QI4_XS const int kqsx = threadIdx.x; // threadIdx.x % QI4_XS @@ -2246,13 +2249,13 @@ template static __device__ __forceinlin const int aux_q4 = get_int_b4(bxi->qs, kqsx); const int2 v = get_int_from_table_16(aux_q4); const int k0 = 8 * (threadIdx.x / 4) + threadIdx.x % 4; -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + k0 + 0] = v.x; x_qs[i*MMQ_MMA_TILE_X_K_Q8_0 + k0 + 4] = v.y; #else x_qs[i*(2*WARP_SIZE + 1) + k0 + 0] = v.x; x_qs[i*(2*WARP_SIZE + 1) + k0 + 4] = v.y; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } #pragma unroll @@ -2270,11 +2273,11 @@ template static __device__ __forceinlin const int ls = ((bxi->scales_l[(threadIdx.x % 8)/2] >> (4*(threadIdx.x % 2))) & 0x0F) | (((bxi->scales_h >> (2*(threadIdx.x % 8))) & 0x03) << 4); -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE x_df[i*MMQ_MMA_TILE_X_K_Q8_0 + threadIdx.x % 8] = d * (ls - 32); #else x_df[i*(WARP_SIZE/4) + i/4 + threadIdx.x % 8] = d * (ls - 32); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE } } @@ -2307,16 +2310,16 @@ template static __device__ __forceinline__ void mmq_write_back_mma( const float * __restrict__ sum, float * __restrict__ dst, const int & stride, const int & i_max, const int & j_max) { - typedef mma_int_C_I16J8 mma_C; + typedef mma_C_I16J8 mma_C; constexpr int granularity = mmq_get_granularity_device(mmq_x); constexpr int rows_per_warp = 2 * granularity; constexpr int ntx = rows_per_warp/mma_C::I; // Number of x minitiles per warp. const int i0 = (threadIdx.y / ntx) * (ntx*mma_C::I); -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE static_assert(nwarps*mma_C::I == mmq_y, "nwarps*mma_C::I != mmq_y"); -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE #pragma unroll for (int j0 = 0; j0 < mmq_x; j0 += ntx*mma_C::J) { @@ -2505,13 +2508,13 @@ static __device__ void mul_mat_q_process_tile( int * tile_y = (int *) data_mul_mat_q; int * tile_x = tile_y + GGML_PAD(mmq_x*(WARP_SIZE + WARP_SIZE/QI8_1), nwarps*WARP_SIZE); -#ifdef INT8_MMA_AVAILABLE +#ifdef NEW_MMA_AVAILABLE constexpr vec_dot_mmq_t vec_dot = mmq_type_traits::vec_dot_mma; constexpr mmq_write_back_t write_back = mmq_write_back_mma; #else constexpr vec_dot_mmq_t vec_dot = mmq_type_traits::vec_dot_dp4a; constexpr mmq_write_back_t write_back = mmq_write_back_dp4a; -#endif // INT8_MMA_AVAILABLE +#endif // NEW_MMA_AVAILABLE constexpr int blocks_per_iter = MMQ_ITER_K / qk; @@ -2643,7 +2646,7 @@ static __global__ void mul_mat_q( const int jt = kbc / (blocks_per_ne00*nty); const int it = (kbc - jt*(blocks_per_ne00*nty)) / blocks_per_ne00; - constexpr bool fixup = true; // Last index writes it data to fixup buffer to avoid data races with other blocks. + constexpr bool fixup = true; // Last index writes its data to fixup buffer to avoid data races with other blocks. mul_mat_q_process_tile (x, yc, dst, tmp_fixup, ne00, ne01, stride01, ne10, ne11, stride11, ne0, it, jt, kb0_start, kb0_stop); @@ -2749,7 +2752,7 @@ template static int mmq_get_shmem(const int mmq_x, const int mmq_y, const int cc) { const tile_x_sizes txs = mmq_get_dp4a_tile_x_sizes(type, mmq_y); const int mmq_tile_x_k = mmq_get_mma_tile_x_k(type); - const int shmem_x = int8_mma_available(cc) ? mmq_y*mmq_tile_x_k*sizeof(int) : txs.qs*sizeof(int) + txs.dm*sizeof(half2) + txs.sc*sizeof(int); + const int shmem_x = new_mma_available(cc) ? mmq_y*mmq_tile_x_k*sizeof(int) : txs.qs*sizeof(int) + txs.dm*sizeof(half2) + txs.sc*sizeof(int); const int shmem_y = mmq_x*sizeof(block_q8_1_mmq); return shmem_x + GGML_PAD(shmem_y, MMQ_NWARPS*WARP_SIZE*sizeof(int)); } diff --git a/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb16.cu b/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb16.cu new file mode 100644 index 000000000..f09bdeff7 --- /dev/null +++ b/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb16.cu @@ -0,0 +1,10 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-mma-f16.cuh" + +DECL_FATTN_MMA_F16_CASE(64, 16); +DECL_FATTN_MMA_F16_CASE(80, 16); +DECL_FATTN_MMA_F16_CASE(96, 16); +DECL_FATTN_MMA_F16_CASE(112, 16); +DECL_FATTN_MMA_F16_CASE(128, 16); +DECL_FATTN_MMA_F16_CASE(256, 16); diff --git a/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb32.cu b/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb32.cu new file mode 100644 index 000000000..221108873 --- /dev/null +++ b/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb32.cu @@ -0,0 +1,10 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-mma-f16.cuh" + +DECL_FATTN_MMA_F16_CASE(64, 32); +DECL_FATTN_MMA_F16_CASE(80, 32); +DECL_FATTN_MMA_F16_CASE(96, 32); +DECL_FATTN_MMA_F16_CASE(112, 32); +DECL_FATTN_MMA_F16_CASE(128, 32); +DECL_FATTN_MMA_F16_CASE(256, 32); diff --git a/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb64.cu b/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb64.cu new file mode 100644 index 000000000..d24b08575 --- /dev/null +++ b/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb64.cu @@ -0,0 +1,10 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-mma-f16.cuh" + +DECL_FATTN_MMA_F16_CASE(64, 64); +DECL_FATTN_MMA_F16_CASE(80, 64); +DECL_FATTN_MMA_F16_CASE(96, 64); +DECL_FATTN_MMA_F16_CASE(112, 64); +DECL_FATTN_MMA_F16_CASE(128, 64); +DECL_FATTN_MMA_F16_CASE(256, 64); diff --git a/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb8.cu b/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb8.cu new file mode 100644 index 000000000..bdf86c0ea --- /dev/null +++ b/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-cpb8.cu @@ -0,0 +1,10 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-mma-f16.cuh" + +DECL_FATTN_MMA_F16_CASE(64, 8); +DECL_FATTN_MMA_F16_CASE(80, 8); +DECL_FATTN_MMA_F16_CASE(96, 8); +DECL_FATTN_MMA_F16_CASE(112, 8); +DECL_FATTN_MMA_F16_CASE(128, 8); +DECL_FATTN_MMA_F16_CASE(256, 8); diff --git a/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu b/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu deleted file mode 100644 index 2d94e65c2..000000000 --- a/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu +++ /dev/null @@ -1,10 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-wmma-f16.cuh" - -DECL_FATTN_WMMA_F16_CASE(64, 16, float); -DECL_FATTN_WMMA_F16_CASE(80, 16, float); -DECL_FATTN_WMMA_F16_CASE(96, 16, float); -DECL_FATTN_WMMA_F16_CASE(112, 16, float); -DECL_FATTN_WMMA_F16_CASE(128, 16, float); -DECL_FATTN_WMMA_F16_CASE(256, 16, float); diff --git a/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu b/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu deleted file mode 100644 index c3d9df3c4..000000000 --- a/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu +++ /dev/null @@ -1,9 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-wmma-f16.cuh" - -DECL_FATTN_WMMA_F16_CASE(64, 32, float); -DECL_FATTN_WMMA_F16_CASE(80, 32, float); -DECL_FATTN_WMMA_F16_CASE(96, 32, float); -DECL_FATTN_WMMA_F16_CASE(112, 32, float); -DECL_FATTN_WMMA_F16_CASE(128, 32, float); diff --git a/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu b/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu deleted file mode 100644 index bb680e401..000000000 --- a/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu +++ /dev/null @@ -1,10 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-wmma-f16.cuh" - -DECL_FATTN_WMMA_F16_CASE(64, 16, half); -DECL_FATTN_WMMA_F16_CASE(80, 16, half); -DECL_FATTN_WMMA_F16_CASE(96, 16, half); -DECL_FATTN_WMMA_F16_CASE(112, 16, half); -DECL_FATTN_WMMA_F16_CASE(128, 16, half); -DECL_FATTN_WMMA_F16_CASE(256, 16, half); diff --git a/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu b/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu deleted file mode 100644 index 073f71b1f..000000000 --- a/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu +++ /dev/null @@ -1,10 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-wmma-f16.cuh" - -DECL_FATTN_WMMA_F16_CASE(64, 32, half); -DECL_FATTN_WMMA_F16_CASE(80, 32, half); -DECL_FATTN_WMMA_F16_CASE(96, 32, half); -DECL_FATTN_WMMA_F16_CASE(112, 32, half); -DECL_FATTN_WMMA_F16_CASE(128, 32, half); -DECL_FATTN_WMMA_F16_CASE(256, 32, half); diff --git a/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu b/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu deleted file mode 100644 index d30710c5f..000000000 --- a/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu +++ /dev/null @@ -1,8 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-wmma-f16.cuh" - -DECL_FATTN_WMMA_F16_CASE(64, 8, half); -DECL_FATTN_WMMA_F16_CASE(96, 8, half); -DECL_FATTN_WMMA_F16_CASE(128, 8, half); -DECL_FATTN_WMMA_F16_CASE(256, 8, half); diff --git a/ggml/src/ggml-cuda/template-instances/generate_cu_files.py b/ggml/src/ggml-cuda/template-instances/generate_cu_files.py index d7874e6ea..a2628f16e 100755 --- a/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +++ b/ggml/src/ggml-cuda/template-instances/generate_cu_files.py @@ -12,13 +12,13 @@ SOURCE_FATTN_VEC = """// This file has been autogenerated by generate_cu_files.p DECL_FATTN_VEC_F{vkq_size}_CASE({head_size}, {type_k}, {type_v}); """ -SOURCE_FATTN_WMMA_START = """// This file has been autogenerated by generate_cu_files.py, do not edit manually. +SOURCE_FATTN_MMA_START = """// This file has been autogenerated by generate_cu_files.py, do not edit manually. -#include "../fattn-wmma-f16.cuh" +#include "../fattn-mma-f16.cuh" """ -SOURCE_FATTN_WMMA_CASE = "DECL_FATTN_WMMA_F16_CASE({head_size}, {cols_per_block}, {kq_acc_t});\n" +SOURCE_FATTN_MMA_CASE = "DECL_FATTN_MMA_F16_CASE({head_size}, {cols_per_block});\n" TYPES_MMQ = [ "GGML_TYPE_Q4_0", "GGML_TYPE_Q4_1", "GGML_TYPE_Q5_0", "GGML_TYPE_Q5_1", "GGML_TYPE_Q8_0", @@ -57,20 +57,12 @@ for vkq_size in [16, 32]: with open(f"fattn-vec-f{vkq_size}-instance-hs{head_size}-{get_short_name(type_k)}-{get_short_name(type_v)}.cu", "w") as f: f.write(SOURCE_FATTN_VEC.format(vkq_size=vkq_size, head_size=head_size, type_k=type_k, type_v=type_v)) -for kq_acc_t in ["half", "float"]: - for cols_per_block in [8, 16, 32]: - if kq_acc_t == "float" and cols_per_block == 8: - continue +for cols_per_block in [8, 16, 32, 64]: + with open(f"fattn-mma-f16-instance-cpb{cols_per_block}.cu", "w") as f: + f.write(SOURCE_FATTN_MMA_START) - with open(f"fattn-wmma-f16-instance-kq{kq_acc_t}-cpb{cols_per_block}.cu", "w") as f: - f.write(SOURCE_FATTN_WMMA_START) - - for head_size in [64, 80, 96, 112, 128, 256]: - if cols_per_block == 8 and head_size % 32 != 0: # wmma fragment is 8x32 - continue - if kq_acc_t == "float" and cols_per_block == 32 and head_size == 256: # register spilling, bad performance - continue - f.write(SOURCE_FATTN_WMMA_CASE.format(kq_acc_t=kq_acc_t, cols_per_block=cols_per_block, head_size=head_size)) + for head_size in [64, 80, 96, 112, 128, 256]: + f.write(SOURCE_FATTN_MMA_CASE.format(cols_per_block=cols_per_block, head_size=head_size)) for type in TYPES_MMQ: with open(f"mmq-instance-{get_short_name(type)}.cu", "w") as f: diff --git a/ggml/src/ggml-cuda/vendors/hip.h b/ggml/src/ggml-cuda/vendors/hip.h index 8594093f0..129478ed7 100644 --- a/ggml/src/ggml-cuda/vendors/hip.h +++ b/ggml/src/ggml-cuda/vendors/hip.h @@ -25,6 +25,7 @@ #define CU_MEM_LOCATION_TYPE_DEVICE hipMemLocationTypeDevice #define CU_MEM_ACCESS_FLAGS_PROT_READWRITE hipMemAccessFlagsProtReadWrite #define CU_CHECK(fn) {hipError_t err = fn; if(err != hipSuccess) { GGML_ABORT("HipVMM Failure: %s\n", hipGetErrorString(err)); }} +#define __shfl_sync(mask, var, laneMask, width) __shfl(var, laneMask, width) #define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width) #define cublasComputeType_t hipblasDatatype_t //deprecated, new hipblasComputeType_t not in 5.6 #define cublasCreate hipblasCreate diff --git a/ggml/src/ggml-hip/CMakeLists.txt b/ggml/src/ggml-hip/CMakeLists.txt index 7a877bdc1..eb03e10fa 100644 --- a/ggml/src/ggml-hip/CMakeLists.txt +++ b/ggml/src/ggml-hip/CMakeLists.txt @@ -50,7 +50,7 @@ file(GLOB GGML_HEADERS_ROCM "../ggml-cuda/*.cuh") list(APPEND GGML_HEADERS_ROCM "../../include/ggml-cuda.h") file(GLOB GGML_SOURCES_ROCM "../ggml-cuda/*.cu") -file(GLOB SRCS "../ggml-cuda/template-instances/fattn-wmma*.cu") +file(GLOB SRCS "../ggml-cuda/template-instances/fattn-mma*.cu") list(APPEND GGML_SOURCES_ROCM ${SRCS}) file(GLOB SRCS "../ggml-cuda/template-instances/mmq*.cu") list(APPEND GGML_SOURCES_ROCM ${SRCS}) diff --git a/ggml/src/ggml-musa/CMakeLists.txt b/ggml/src/ggml-musa/CMakeLists.txt index 415b2b2e0..2f555416e 100644 --- a/ggml/src/ggml-musa/CMakeLists.txt +++ b/ggml/src/ggml-musa/CMakeLists.txt @@ -29,7 +29,7 @@ if (MUSAToolkit_FOUND) list(APPEND GGML_HEADERS_MUSA "../../include/ggml-cuda.h") file(GLOB GGML_SOURCES_MUSA "../ggml-cuda/*.cu") - file(GLOB SRCS "../ggml-cuda/template-instances/fattn-wmma*.cu") + file(GLOB SRCS "../ggml-cuda/template-instances/fattn-mma*.cu") list(APPEND GGML_SOURCES_MUSA ${SRCS}) file(GLOB SRCS "../ggml-cuda/template-instances/mmq*.cu") list(APPEND GGML_SOURCES_MUSA ${SRCS}) From 90f9b88afb6447d3929843a2aa98c0f11074762d Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Sun, 2 Feb 2025 19:58:34 +0000 Subject: [PATCH 05/36] nit: more informative crash when grammar sampler fails (#11593) --- src/llama-grammar.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/llama-grammar.cpp b/src/llama-grammar.cpp index 6be5cbe0e..9b518d1ac 100644 --- a/src/llama-grammar.cpp +++ b/src/llama-grammar.cpp @@ -1213,5 +1213,7 @@ void llama_grammar_accept_str(struct llama_grammar & grammar, const std::string } grammar.partial_utf8 = decoded.second; - GGML_ASSERT(!grammar.stacks.empty()); + if (grammar.stacks.empty()) { + throw std::runtime_error("Unexpected empty grammar stack after accepting piece: " + piece); + } } From 4d0598e1445a64c99cf2faac72f8d5d023f1e6a1 Mon Sep 17 00:00:00 2001 From: uvos Date: Sun, 2 Feb 2025 22:08:05 +0100 Subject: [PATCH 06/36] HIP: add GGML_CUDA_CC_IS_* for amd familys as increasing cc archtectures for amd gpus are not supersets of eatch other (#11601) This fixes a bug where RDNA1 gpus other than gfx1010 where not handled correctly --- ggml/src/ggml-cuda/common.cuh | 7 +++++++ ggml/src/ggml-cuda/ggml-cuda.cu | 4 ++-- ggml/src/ggml-cuda/mmq.cu | 2 +- ggml/src/ggml-cuda/mmq.cuh | 2 +- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh index 88be8fc8a..232163c1c 100644 --- a/ggml/src/ggml-cuda/common.cuh +++ b/ggml/src/ggml-cuda/common.cuh @@ -61,6 +61,13 @@ #define GGML_CUDA_CC_RDNA2 (GGML_CUDA_CC_OFFSET_AMD + 0x1030) // RX 6000, minimum for dp4a #define GGML_CUDA_CC_RDNA3 (GGML_CUDA_CC_OFFSET_AMD + 0x1100) // RX 7000, minimum for WMMA +#define GGML_CUDA_CC_IS_RDNA(cc) (cc >= GGML_CUDA_CC_RDNA1) +#define GGML_CUDA_CC_IS_RDNA1(cc) (cc >= GGML_CUDA_CC_RDNA1 && cc < GGML_CUDA_CC_RDNA2) +#define GGML_CUDA_CC_IS_RDNA2(cc) (cc >= GGML_CUDA_CC_RDNA2 && cc < GGML_CUDA_CC_RDNA3) +#define GGML_CUDA_CC_IS_RDNA3(cc) (cc >= GGML_CUDA_CC_RDNA3) +#define GGML_CUDA_CC_IS_GCN(cc) (cc > GGML_CUDA_CC_OFFSET_AMD && cc < GGML_CUDA_CC_CDNA) +#define GGML_CUDA_CC_IS_CDNA(cc) (cc >= GGML_CUDA_CC_CDNA && cc < GGML_CUDA_CC_RDNA1) + #define GGML_CUDA_CC_QY1 210 #define GGML_CUDA_CC_QY2 220 diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index 383131c77..bda10aec1 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -1205,7 +1205,7 @@ static void ggml_cuda_op_mul_mat_cublas( CUBLAS_CHECK(cublasSetStream(ctx.cublas_handle(id), stream)); - if (compute_capability == GGML_CUDA_CC_CDNA) { + if (GGML_CUDA_CC_IS_CDNA(compute_capability)) { const float alpha = 1.0f; const float beta = 0.0f; CUBLAS_CHECK( @@ -1750,7 +1750,7 @@ static void ggml_cuda_mul_mat_batched_cublas(ggml_backend_cuda_context & ctx, co beta = &beta_f32; } - if (ggml_cuda_info().devices[ctx.device].cc == GGML_CUDA_CC_CDNA) { + if (GGML_CUDA_CC_IS_CDNA(ggml_cuda_info().devices[ctx.device].cc)) { cu_compute_type = CUBLAS_COMPUTE_32F; alpha = &alpha_f32; beta = &beta_f32; diff --git a/ggml/src/ggml-cuda/mmq.cu b/ggml/src/ggml-cuda/mmq.cu index 83cb78cbd..45212f66c 100644 --- a/ggml/src/ggml-cuda/mmq.cu +++ b/ggml/src/ggml-cuda/mmq.cu @@ -148,5 +148,5 @@ bool ggml_cuda_should_use_mmq(enum ggml_type type, int cc, int64_t ne11) { return cc < GGML_CUDA_CC_VOLTA || ne11 < MMQ_DP4A_MAX_BATCH_SIZE; } - return (cc < GGML_CUDA_CC_RDNA3 && cc != GGML_CUDA_CC_CDNA && cc != GGML_CUDA_CC_VEGA20) || ne11 < MMQ_DP4A_MAX_BATCH_SIZE; + return (!GGML_CUDA_CC_IS_RDNA3(cc) && !GGML_CUDA_CC_IS_CDNA(cc) && !GGML_CUDA_CC_IS_GCN(cc)) || ne11 < MMQ_DP4A_MAX_BATCH_SIZE; } diff --git a/ggml/src/ggml-cuda/mmq.cuh b/ggml/src/ggml-cuda/mmq.cuh index c05c84778..7a2c4d85b 100644 --- a/ggml/src/ggml-cuda/mmq.cuh +++ b/ggml/src/ggml-cuda/mmq.cuh @@ -120,7 +120,7 @@ static constexpr __device__ int get_mmq_x_max_device() { } static constexpr int get_mmq_y_host(const int cc) { - return cc >= GGML_CUDA_CC_OFFSET_AMD ? (cc == GGML_CUDA_CC_RDNA1 ? 64 : 128) : (cc >= GGML_CUDA_CC_VOLTA ? 128 : 64); + return cc >= GGML_CUDA_CC_OFFSET_AMD ? (GGML_CUDA_CC_IS_RDNA1(cc) ? 64 : 128) : (cc >= GGML_CUDA_CC_VOLTA ? 128 : 64); } static constexpr __device__ int get_mmq_y_device() { From 396856b40029dd6747d2fbdb179e828683418045 Mon Sep 17 00:00:00 2001 From: uvos Date: Sun, 2 Feb 2025 22:40:09 +0100 Subject: [PATCH 07/36] CUDA/HIP: add support for selectable warp size to mmv (#11519) CUDA/HIP: add support for selectable warp size to mmv --- ggml/src/ggml-cuda/common.cuh | 8 +++++++ ggml/src/ggml-cuda/mmv.cu | 38 ++++++++++++++++++++------------ ggml/src/ggml-cuda/vendors/hip.h | 2 ++ 3 files changed, 34 insertions(+), 14 deletions(-) diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh index 232163c1c..174916bc9 100644 --- a/ggml/src/ggml-cuda/common.cuh +++ b/ggml/src/ggml-cuda/common.cuh @@ -176,6 +176,14 @@ static constexpr bool new_mma_available(const int cc) { return cc < GGML_CUDA_CC_OFFSET_AMD && cc >= GGML_CUDA_CC_TURING; } +static constexpr __device__ int ggml_cuda_get_physical_warp_size() { +#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) + return __AMDGCN_WAVEFRONT_SIZE; +#else + return 32; +#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) +} + [[noreturn]] static __device__ void no_device_code( const char * file_name, const int line, const char * function_name, const int arch, const char * arch_list) { diff --git a/ggml/src/ggml-cuda/mmv.cu b/ggml/src/ggml-cuda/mmv.cu index ac45f2d17..5a9ddd958 100644 --- a/ggml/src/ggml-cuda/mmv.cu +++ b/ggml/src/ggml-cuda/mmv.cu @@ -5,9 +5,10 @@ template static __global__ void mul_mat_vec( const T * __restrict__ x, const float * __restrict__ y, float * __restrict__ dst, const int64_t ncols2, const int64_t stride_row, const int64_t channel_ratio, const int64_t stride_channel_x, const int64_t stride_channel_y, const int64_t stride_channel_dst) { - const int64_t row = blockIdx.x; - const int64_t channel = blockIdx.z; - const int tid = threadIdx.x; + const int64_t row = blockIdx.x; + const int64_t channel = blockIdx.z; + const int tid = threadIdx.x; + constexpr int warp_size = ggml_cuda_get_physical_warp_size(); x += (channel/channel_ratio)*stride_channel_x + row*stride_row; y += channel *stride_channel_y; @@ -18,8 +19,8 @@ static __global__ void mul_mat_vec( extern __shared__ char data_mmv[]; float * buf_iw = (float *) data_mmv; - if (block_size > WARP_SIZE) { - if (tid < WARP_SIZE) { + if (block_size > warp_size) { + if (tid < warp_size) { buf_iw[tid] = 0.0f; } __syncthreads(); @@ -67,16 +68,16 @@ static __global__ void mul_mat_vec( static_assert(std::is_same::value, "unsupported type"); } - sumf = warp_reduce_sum(sumf); + sumf = warp_reduce_sum(sumf); - if (block_size > WARP_SIZE) { - buf_iw[tid/WARP_SIZE] = sumf; + if (block_size > warp_size) { + buf_iw[tid/warp_size] = sumf; __syncthreads(); - if (tid >= WARP_SIZE) { + if (tid >= warp_size) { return; } sumf = buf_iw[tid]; - sumf = warp_reduce_sum(sumf); + sumf = warp_reduce_sum(sumf); } if (tid != 0) { @@ -96,10 +97,19 @@ static void launch_mul_mat_vec_cuda( GGML_ASSERT(stride_row % 2 == 0); GGML_ASSERT(nchannels_y % nchannels_x == 0); const int64_t channel_ratio = nchannels_y / nchannels_x; + int device; + int warp_size; - int64_t block_size_best = WARP_SIZE; - int64_t niter_best = (ncols + 2*WARP_SIZE - 1) / (2*WARP_SIZE); - for (int64_t block_size = 2*WARP_SIZE; block_size <= 256; block_size += WARP_SIZE) { + CUDA_CHECK(cudaGetDevice(&device)); + warp_size = ggml_cuda_info().devices[device].warp_size; + + int64_t block_size_best = warp_size; + int64_t niter_best = (ncols + 2*warp_size - 1) / (2*warp_size); + int64_t max_block_size = 256; + if(ggml_cuda_info().devices[device].cc > GGML_CUDA_CC_OFFSET_AMD && ggml_cuda_info().devices[device].cc < GGML_CUDA_CC_RDNA1) { + max_block_size = 128; + } + for (int64_t block_size = 2*warp_size; block_size <= max_block_size; block_size += warp_size) { const int64_t niter = (ncols + 2*block_size - 1) / (2*block_size); if (niter < niter_best) { niter_best = niter; @@ -107,7 +117,7 @@ static void launch_mul_mat_vec_cuda( } } - const int smem = WARP_SIZE*sizeof(float); + const int smem = warp_size*sizeof(float); const dim3 block_nums(nrows, 1, nchannels_y); const dim3 block_dims(block_size_best, 1, 1); switch (block_size_best) { diff --git a/ggml/src/ggml-cuda/vendors/hip.h b/ggml/src/ggml-cuda/vendors/hip.h index 129478ed7..81964611c 100644 --- a/ggml/src/ggml-cuda/vendors/hip.h +++ b/ggml/src/ggml-cuda/vendors/hip.h @@ -1,5 +1,6 @@ #pragma once +#define HIP_ENABLE_WARP_SYNC_BUILTINS 1 #include #include #include @@ -8,6 +9,7 @@ // for rocblas_initialize() #include "rocblas/rocblas.h" #endif // __HIP_PLATFORM_AMD__ + #define CUBLAS_COMPUTE_16F HIPBLAS_R_16F #define CUBLAS_COMPUTE_32F HIPBLAS_R_32F #define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F From 6eecde3cc8fda44da7794042e3668de4af3c32c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Sun, 2 Feb 2025 23:48:29 +0100 Subject: [PATCH 08/36] HIP: fix flash_attn_stream_k_fixup warning (#11604) --- ggml/src/ggml-cuda/fattn-common.cuh | 10 ++++++++++ ggml/src/ggml-cuda/softmax.cu | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-cuda/fattn-common.cuh b/ggml/src/ggml-cuda/fattn-common.cuh index cfd7c0f44..d40ee2da4 100644 --- a/ggml/src/ggml-cuda/fattn-common.cuh +++ b/ggml/src/ggml-cuda/fattn-common.cuh @@ -516,6 +516,12 @@ constexpr __device__ dequantize_1_f32_t get_dequantize_1_f32(ggml_type type_V) { nullptr; } +// The HIP compiler for some reason complains that it can't unroll a loop because of the jt*ncols + j >= ne01 conditional. +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wpass-failed" +#endif // __clang__ + template // D == head size #if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) __launch_bounds__(D, 1) @@ -614,6 +620,10 @@ static __global__ void flash_attn_stream_k_fixup( } } +#ifdef __clang__ +#pragma clang diagnostic pop +#endif // __clang__ + template // D == head size #if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) __launch_bounds__(D, 1) diff --git a/ggml/src/ggml-cuda/softmax.cu b/ggml/src/ggml-cuda/softmax.cu index da377200e..aac6e0999 100644 --- a/ggml/src/ggml-cuda/softmax.cu +++ b/ggml/src/ggml-cuda/softmax.cu @@ -18,7 +18,7 @@ __device__ float __forceinline__ t2f32(half val) { #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wpass-failed" -#endif +#endif // __clang__ template static __global__ void soft_max_f32( const float * x, const T * mask, float * dst, const int ncols_par, const int nrows_y, @@ -126,7 +126,7 @@ static __global__ void soft_max_f32( } #ifdef __clang__ #pragma clang diagnostic pop -#endif +#endif // __clang__ static __global__ void soft_max_back_f32( const float * grad, const float * dstf, float * dst, const int ncols, const float scale) { From d92cb67e37abc23b1c6f7b0ef27a9889da8537e3 Mon Sep 17 00:00:00 2001 From: mashdragon <122402293+mashdragon@users.noreply.github.com> Date: Mon, 3 Feb 2025 09:42:55 +0000 Subject: [PATCH 09/36] server : (webui) Fix Shift+Enter handling (#11609) * Fix Shift+Enter handling `exact` on the Enter handler means the message is not sent when Shift+Enter is pressed anyway * build index.html.gz --------- Co-authored-by: Xuan Son Nguyen --- examples/server/public/index.html.gz | Bin 1207150 -> 1207129 bytes examples/server/webui/index.html | 1 - 2 files changed, 1 deletion(-) diff --git a/examples/server/public/index.html.gz b/examples/server/public/index.html.gz index 582ccc0d3f8d07ce79dd9e978772f9c5ea85c426..3a2529aa2fb84485aa480d3d476e5ef27eab8e27 100644 GIT binary patch delta 8942 zcmV!{H12 z(h+_QKaXF)egsEW0U!zsLA0db#}RI6y1_zKW>!|_S8Eo}Z$Yk~HnVSGpXJZDr#87V z(Blg9_^FwHUE0m10q1fu*xm3pygeO|e~a7Qm%RNn`1=#n+b7N|tadR}jZ)0?f?Zp!lfBhwr3xtLY#n)~T|a~?AL*l@f53F`U$ zX6Az3Jl{aMUvoeHQp)#&eZI;0EO~vnd>=RGe}_%Im#{y^h;3S^|R#pxLoJ(y=S}}+vog_ z+s(NG^1oirZs9&c7wnenmjzq7(EeP`4CI?{&hNo5zlU>ISD&DNh;zT&08V_rncYJE zPb?qvZv{SoU-<2;VlNl=?8D~#e>v!>+@F2(?Pa+?+xl~Hk#8AZu#hyiMfVA~zl470 zg6%?XU@P_YY-U53{EqF*Z{SqGM+QBt-(yXgLDfCjU3>w0zF_gLzrsH7-(es3!iD|K z1h(qF+Z6k}vs>`{>$kIez>in6nL4}t1m9C`z&`0~xXeK8G0e z+1vR1Vguj(uAE;kuik)Wf3DtMy`O#mX+}3!A7<~~zVLoDlk3Zw{%+~1^YCT^Ulx9@ z|FCvG%H5=; zs{hmTtBW6>-&}lre)H_37DSNuhku-ZaelnJoiAU#b)Mf`ethvNf5~ZndHdn!`TWOV z?fv6jz|&qT*T1rOl=IQZo?UKEPmjP94e0K{^Rpyy?x*u0c5lj@b0;GYCCQWo4)t%z@n+>yg0sYN$ux>L zsgvYfa2S8Vp&#MA+Iey%MIimO_>j?bO-mX4h&87u3bf&T!OQ}0xF#$`5nlV0zs~4= zK&C!*0^b!ff21MLuy8^@xSwvVO30NhT5`R2qU=4Gnvj*!maHG{6S7HtO172(tsH@= z2dx6o)p57rP}Zj%g7=gwD$Fx*34{#u#ZJO4ik`=Tm)(c+c>k(&(sIHq4WA0~%%2Jk zK5t<%yM8a!!EM#*g%kM8)Jaw~l`K-{M&JPnE#;r3f6fhEi%kf1ZXRA7~CS zq0CJM)|s2OnW~%m2^9-2o<73Diz?Sj^Q2uAc2?M@6?$AapFS1+cpvf|M(;!5!oMS4jbK`08qb=>8!<=iy-R}N zxQVy7e8>jW5VI-1U7O5;!@be1JfB%*lHVJJm3otd!?>d z?ICp1r61vxNrbgZZ^W}?ZJOZi<$ocp(}P}w)7O%_cAXrT@3-XV@!bQG)xK^)IrKJprXd2EMB6`jE_Ww&9wfs>RR ze}d|VG;or(0Z^@BfWVpTM-fQ{I`y=4oA%Hcsnw`0{JR>hYD!w9?55R5 zlhZh+f;wf*%71JUih9MeS^SL2wi5uTe*~n&B+Aw)N#&GegoCFlv)|jnw*SSCmdy+g z6}TNjS?i;$RVmBb5ZZblZC#*k454lG(KZCygS2h-(KZFz_7K|RKHB37?LpeM`Y2mf z%EPp6_tCZm+V&uoO!|mVD#Uf!80b1mukR=o`i86;gQQpceNz?aPpm!V*ZN#ve`~nD zZ0p6(5FT0|4{bQSXnhc%wLpM6&VQ^SOpHDzA{OY6(vjK6L<9wM2<>qn?Xd`EkJ6IW z$HW?rJX#+*Xf5bq452;gqdj>P@@T!-pf$09Vb3EpulD<}T7#frZ1g^)(3?nMsfK2o zmTKrjcxZh*Gy#pwg3AyddLIvce>fWHeL$hN07ao8Z8C(3*~df#3awPq5GLb3CL&@K z6|u)dm{@&GEP=`6EXwXF%eP37(Yf`yU;{$qKPI?#*eWtwcpQG0klkW zSi2Z~Xks+cWUQG(h--bs1sJgg(boHD>jFd#BX0B&H-=-A(FYqw3v8&ve-u9Mqb}mb zFydApaqBTdBcl%3c* z`=Ddo0v$t0Ykj0e#27+a?;|Y&#Sqd)A88REhLASZZeqs}%72qS%9F>SlHCg&b{lXQhN2Cb0NcI5VK;#T@M+jQd(sCS zlO}Lz_Cz;T+f*k*cxZh*wBG|xlRn^>v;c=LFBh!_SK4H( z82kEQPDAhIB7ljmTc)ZhrZI?#(aS_Y6Wul~%~r;$HHeGZ%S8YbL)RyUp^PWP>N4)- zG8TtznmSe#Yu^pWoPUPZ%f+ezmpRdty)izgVfS*eYtW^uin;d?G^a7?>`(Z{N#k9FM`#%0{g zMFbZ`ooEwnteAtiSiM}V8ieVpsqVdHto9%JY@YgLV=oIVU=Wi@FOx|P!AxDZ#+o|J zDV2V&RB9MDR#eR%4|7X(5AW|fs_l`$5A#wLm=L3=AicAM)=@(1ETMOl&^t>Q9VLv` z65%?ac?#D74}U%wQn5M-V;7076wx{eSRshRd)RTJUsknxBk*vWZM;H)^O(u*2!}F{ zoP|yh;5g~63!d?5Op?|iqV?u0DPQ^)6=iZHjj3m3m zX^H}FoX4GEcOQKl-w?PdKca7cKNXIgFpyjKh9_x-w-hk za$yMFfk^l-B#n7WoH(ve@26^BsL*Ti5!B{7$#lM^w3_y7>_=1POGPUCz^ra+6loAY z!wHR|Bvv-udCO2=pl; zMt1HhA5}SZyb2qNS>2LoRaNL z1lubJR=x+JtbXwDD8?%%SU71KZ&=V$9xv8ihksfFn2M~m(PH-e&`%`~XKST4PaT&$ zN-E8i)jA#S&76{E{&heN>y#c>H&LcL%=MHw-a~os0}}QrU9Te#U)DzoVgY8=6^{^v z+Qw?KsJW*#mOBt`9^ch?2?O|QeC&eVpaG$-c|@SB7$nT20_X7^=Dqf`jIP__wd+JK z34iLfs&>A44v5+m!V@7=J&vwgE{oT6M{J<@Chzr?s{Wj4859IK16eU9m3px^G*0U2 zE%g-zl@(n)=LM{4DyU~6zePA&hYi<1l#m;*q2&%)&a2;|SMd>&j-=x}9s7}+k`Qul zb8M|74DcSb!!X!Mxp-gc>r%W7*5sk-IDf7OH>%Lw0tZHox*iUkIT!>$!S2E}=X0IF zJv5cSqHmckA02n~(vqZM(yvnNkZpyb&TgibvF7Wv+(=o**YINfO&6gN%i@=(qAU339a}`ubh<}oMY@p;0 zS7qJREbIb`d^myNjwyd}kLQGLp#I?;8z`hTVhEkAw31rP=1+l#!T|DJ#1Y&!<$%v* zC!>{CHb|ve1g#(#mB&$~je0el^PbZP&1r;}t2m>q;n?7%CM-fHFqpmzgD5+dR+J{w z(P*>T$Qxab)8$B06lIj%E`KG&kp5Is#}eWrW&R9Pj8myP57|oKqq(0CZ01hN99BZN zKG~edcc+qq6r_%kI&O%B&T(_gKhbDK`8Iu#N=|3V(YQR8oO!k^Or{ z6j`$QVj@??Merz%8chM3uC;UB_#w&jZw13)W&;yV6fcllwXvGrFiW52&GR5IIuPxp zF!qc(DLt4XxbN+0qX2eX=q?VXC?r|tEJ-E@BwErHIz2r_O0~dO3H{9teD~&5ipZww z2({+_mfaOstwpo$wSOAjV&(@>2KkYw0qGI-Ma*ycjYE6X9r($<$Lb8$?MF+f-%GEf z{u!Y^Y_lEa#g8Q9`RSwCJAlZlqAi67XE?Ey|v3Q#^!syq2{tg7Jni0!amj z+Oy*bDIP;_Ap%`KgdovwsQ;sY{?CSafS3$L;A#8;lgMi}H;5_8vMhBCWe_^a-%w``SAc@~ zNGOfLIQSdE)%5lJ2DN6WUM#DAY`_;i&Lg1wk#xLDlYbC6T` z0@(co>|S?N_*0A{unvFA6iCyN5-QmIBl9(fA*}-uNaT&t-ohRU2qjW!3@9Ygx~;9( zf!NM9YbpVol`jVLs-mDYhD9z97F^I5{ImKC+F(;d3D!`ya=duc*7$*6cDq15h#il0 zx+4nRR;*?*X5NPNTo!|ctq2_=#3XzZO)Kq{_OXyI3|g3#+m%Qwy-HO*Aa4V;8IP*ub(q(XbAutK^BIVNEs-1i#ckR?P`yu9K2Y4KZ>722Q}y zWq(qXk|8D;QVD00;Zp(-JD46Ixx4g$gCiJ~HqD;J0T4szkmdi_vYzd`{nNP{ZU zVPjoH}1}!mJFxL{32Dj)5s>jvN6u?@>WsOxzhI2jf z=Hu$bEtF?3bFwWs@ewoZE}S%eKn~8oiU@y ztj1T*rbJ-OkBQzo0sZ>Ou2B($}kg&i^nvka^q@J+tzbjI)9n% z;zBOWg>VWKf!0yN%?P{qj499L=$C-s#X4 z2?=7?!pA{CQsigIjT4`EXpzPtf`3jXn~p}F<7fA4Uv}egy~;c{!w@+bGci4zb?iMDqST&?{g+h$dIyX ztf1YRuq_(EJ;g-%suZLA_yd7+we3x9*=I-#SZ z0!1}h)l@9o)?`*D!arSB3?l@OQIRF{>7+JDv+h4AY=D1&JjMEh$ zeho-!-eTj$MAl3@z=o`-CNjP-W!pN6mAIU=Yhi!Rvt^7$Hq{=nK+R|S}0^(BQ zF;`~`fCo^E1N60vQe_eFeYNl*FL>198VJGbb|rMo%ktF+zeA(NswoFeKjP?_b-5uj+~I>yl`PU((ZPv!!i0p8nU|o#5i>3ymf*#=>yA@ZMr&z2tGZICswy#uyh)LkMWEBUn zI!j}et^5TAr>cyocV{sS@J*A!v#Y4$#-H`|eDH*%Cag z;3HKQ|JF&pPI)doU(e?Oc~UnxHaUN1n;i>(SvRj~m@OH*#)}(r&tAyB*Lde=cv;Tq zYxDbhUgG%_uQQV3Fk50L@^DJ>!myU>`SfE(((7zFi@bN`n+BO}AvI-ihFAj3g`HR4>K6*=1KXlT2zAsYg zKDX5nM9)=atKDIwT0ZTQrm_pF9Ju0Dk86RVU;^r zR=1^?YZOb%C!>|#?qVzaGdn&vQEpdQ)lw23{jz%ikkrpv!LGBw4b4dqZz?;?e$=tU zd@@P|+Qn)#=Jm~1aY}!=@fN6z8M+=giT@qBKak2~k1U*ZKnqa31{b&&vFJDHB zSlYT}4x^kT;99p7EgQb&38FMC#=xKsP3m5eU=P3mo8aXhWNS*7Fy1{P!cAALrLYV$HISKen;-X2!1|**Wiik zlpH)o2Q*GT-xLoCJcNFfKR9=vX?#PX?BJKi7WtlLQZoMo?d0c@=r03oWmM>;akAjU z>&w?u)`5gEbC@s*V2s0#coHvfw(KcN0^($FbdimC*sKNl5si`OFBSw=caT(ktCoZ^ z{!K`YgGIpdef}UVN$53Qk(9H9zlHnOM_&XY8M5;1M$Q8j5B%hqc&2Zc&0`WJe_#7( z-}HXZ31{fHHCdCkth{hz zBys}klkA_@Gn*p*Azs;IgDFXfe?vb9PDVdRU`=^K!lYsLmB4ZG$R*XZMM{2KlgPcV zCVJ`pX9#2UlfWlwkpk)}o}D0wH^lqQN2o<=PNERv|66dI1-~Os7Dq5bX0rYB*>7FI zKg4?XoWM56h?Nk6j(CA>LZdfmT?A-bofHnK^5gDTcZm~uh%YjrB#m8?e`RQXk8(3) zlb!8?MKKL-o?PwRpZLF{6msVVYmel5c^$nYzwNIQRy5ZMEtRra!9mb`35x+rMqveD z_U0uzd`H|k4B_jrJ*KitLX!7{+~9ZW(ca2>jHAuGTZqr|>NC-Q7C)=W_iycH`ND;t zU6R|bgHUwkmxrL@D7a@6fAb7F9FN>GczBA|89y$~zq=ec$RlAa9Ajr|*kBUHN>Tac z=&7xo7%l0>3ibFk+-i~ zzF)y9Tviq(Y%&0)hXB!_S$k<67wZi6B;YE9kWd&pH&-q@r}~J8e=JI8IhDV^fBmZP z)Wc_S%4dGir9VX}S@0#rlXBKye$l3cRw-HVQvh&0V(HV>dd^NqESDr*I#GN(Do-Mh z=0Q9kg-%9EIwE&$$e)celHQVZ^zLeQd3_~^o`fjea6wO{&-1{EZX}dKOGPm(PmvTw zF_S2TEgfDKd9k~Oe`wR(+?=zct}W@o>lecE#TA!%iXPB6zpZ0>9QtXTQHSzRQ=~pE zUqqjiB2|%92;!(4oy&CJwUp7ZoUtDow=QqjPJDMQar5yQ&n&eqb+$nRi zBdjCJyX;-7r)?xzFxxEhF8$1PQcom)0WbHXSX~2dQh)E2r0lmETRcKJrtHef zEMG|}W(^R~LO57ES&77amm+V7?G&T3Gn#K?NR=SadQ8#*>{`@9jShG;OCEp)TX%5A z2mJ*O8Ni0iftZm*UNzfkEn98Fdb;j-$gXFEiTmLL~s1MLL-=HiBn3nT)lihfG zoT40CR|nIzaW(`bOK1u`?5R{I=4T&Peslx+09LKjy5<7qu7SAJHL~J0?G~WpIO4Ww z>qcO`e>Jm&eYJU|L31x)QT?YQ$C!Hw`(OSe zx`>neRxwK}ulmBy3EljDyBo$2;u9omEGgcdwp6vRH?R((BB(Y)?ptWa+d%4uV}le z26s@1$h4~u>xuW>r&Hq>V&q#_m7OXO>)=osg*LX}4@f*REL~M6 z`goL6W@joHIlPTksK^ z{R;_wdU&u-Jcp9$HpjtJ^e^nFq0H9-WkzFruD!A-Civ*dh~L2lmvX`V3}8?G4|4D; IBq|0Y08m?OdH?_b delta 8964 zcmV+fBm3Oh)=BQxNq~d_gaU*Egam{Iga(8Mgb0KQgbIWUgbaiYgbsucgb;)gv=V(t ze>FL7YiYjMufS#Wb!LtW6jzZ_E_0PjE!WN}*Jj_c$5BC%kVGdplTa$Nvg>W$Iy&ZI z=5ZqCZ}ji!KQQwXCJsPS1Q$_E+ugS#I_=7&K;WE%gL6Ir2aC>cL9QP+v#(*D<&U?g zHn}p;;|lcnv6+8e+Rdc_`*Jc^-S9TNe?1+Li`(7jy!|xz`yxG-^XUX$%xz6Ex&v-eu z&iM_un{x%^f4!XD!hM7;*e%yDJ8b1b`*S%nkZ-;@zX!kk7WQ3TeT4oY&i!r!IPvXf zb_@AGvV6?H75MyZ;kUDjyF9Z6$LbRU8H zOXznl*ec`(mQr8OW;S%mZ`r#126hE}V9>+*J=T;NRNaHs#TTIG3wGZ17gz`WE3D&Q zxUjyNz*5~en_``Jb_;%g{bqI#`0;8sQ)ibS;d{yrSSNiA_nBO7X5YP;e|-(-Md6A_ zxM%zgoS(h=0PXOCZ4!8M{>|(v1O2_4T^XQ@^77eR?HQf_aFyq~Heb!Y|KL3TUitXu z$Lo(jsQ!=7uP%Ogesl5R`OUKrS`b0r@BeoG+4rw(qnwXM_Uv+VdU}KwaT+@GP@?IY(EEf)M_=R{hmyO8MI6y7 zQe{mgA^OI7PyY7l+VP{O@BNTu=oQ(Zw{hr1PcxGGi{rqL2wo9?xuR25){ZH;qu7r; z649x`Kcyt{Na{z+f2p&kan1()mj@hXc;zow*o`BaIvK@|_scq?(?w(fu9p!wd%5hP>|xyzD-l$NN{Mla>=^Y4}u- zXZ}=Z@OcZ9+4Vc24sNSfFPy+%rcSb|sbrBlHv$hxe`qQHEOl<^T5Lk7d!u}j*iECe z{y=kx31x07u+H4H%~ajgPpDXM@$?ZEUR1eOnkVg|u(QHGt4;;_wkxe7ycdbY6Q~~(|Fc2-iSGB z?_ClEf5%O{y(KC2T_?a`)6sMuQyPaNp|dP;A`!)l4nt{DY$+=eXocWPW;x-IUGIoE z-Ya#zY7e24F8v6nOd_mRdLy1CYtsa8FaI-PogVZeoW7Rawd>@#e7_|>kMAB>9_&*R zW>e^RpmOylT?FLr7=B^Thn^Vp<;KA}-1ZJDf1s83r;74tA$@DXD?fsKdyuc1Ls2Yr z?($kH4Q+)+tKoiJ`WcZC)|DPK^S7YuYSseJ@ zahpjTw<*DMvK3@0H6ebgvn*eL4MD>8NqnEKgYAbk4B~k1@{!j7%ws!5s^|=cDZ35Z ze+`_Zm^@Nu(Z8qk6d#W)u2Fsp8$S`9LpZY%j6?Yxy1yd?V* z%YIGinViNk71SwfR{mp?P}D1y&EjWFf3}?fKqVk0CQ-IdNh+r#BOE+cnf=}lw*Ak3 zv}|U0sKD(I%32>~tx8$ehS1jgXzK!PV+d`dkG3Ju9;9uvkG3h$wujIj_t73#Xb;l1 z)koQ?QXZylyN|Xl(6$GuWYR}`QX#I(#z5CmdVNQ!&^Kh&7$m*g@0+SXe`4(^f4|n} z`dY*FWm_+PhVan(cxc1nMeBnAtpx(qasFcsVPf<#5wSpjl#a|kCL$=9LuiltXpcoO zdz6-}J|@<1q#f6)7Q=)=)S?*j_G1t5kBNvP#`rPn+Jz>16HPREGJcGGsr`Pg z3ZP}0!`j8@LldKkCS%PULR{-3F2IO2h_>EGTNfZ|7;&SIxG@}?j6T>fe_CKe9j5Sc zA9WEgh7q^=h+B^t8X0}?U^L)i@41-KhYCguDyZW@{-g9dkcbB6WSG{~eh(FKK;6?G z%|8D(oBlsEtYP-SfZ2qBp~GIY4-U*G99Wgm&Hg-$J|2buImORp2oJN5hdCU1%sy-| zTd+Ybe}*uz`j}Y1X^v+0e}RSB0v3a8b<#(BBG8^_gPcw2^)?YKbj8Sj2C?aPYE@uk zjfb%r_d&mmJQ+fI+(&vm9C5~d zU@&e018oRxyN|Yr0mCeN(nnkb0ZSXymb*}3+=c>*HOw#j9aD6F17qKCm+4UwHh-3A zwAF_WRudhDP`3Lh+mFE=s}CBi7HHTPcdTAyu$sugkmnJaSNlC!tpU)m2DbZf!fxV( zZde-s8Nx&Ba zh7CCFKE$vah%p$I>^|(Uo7gdg@_(d{^5ik7WcLDx-3AS^- zp7a67qzN3FJ<(0oHr2@x9$Fs{?YDr_qz^bIEx@6e%0x3K+IR>LqmPFG9L89i!2kLX z7G@s{0XIxdv4^347uMhtyo1DCzGY$(0ksgxEVF;vA?6n)?2pqz%*%SEff zl{OhG#=bt7)6jdl2w#vz4)G4dP<Wc_ z2&S5rY3_|J{Tn!&J2+*{R&`S`Y;719qnC>aE}Ci@x}n&{ATDMv7k>d<^s#E`V_i3f zaT)h=5y3@KC)z|CE9M|BRxcN;24T8ts(UXPtNoiko4Y>Q*vkS77{p}K%VbhRFjLp9 zv8E1lN~PZ`l^TYP6;-px!`xEc!~46oYI`K`!@N`lCd5-zkltBB>nNdhme4y&=$$2u zjuJ*|iEtgzJca9kAAdd=Qn5M-V;7076wx{eSRpuv_poC}zpQHYM&QS7w&xWRoS&KO zKH*U2k+aYV0vsp3mE0ByoODULi)@?EaY3}R;$T))LRQnPe6{2B5GPXn#q-#^-%+ua z+|wwgheg6-9Rx=Y^&>UxV25+xaWFY#r^Jw4o&w8A;-n6ZJAWE#dv;7X8~@I~hprxSH2~}4 zogXfXgFeN{(TE(wDZQLg`I*z@AGc@Gl#1o2QOT9wn7#%4(es_hwE>GygoGhILAhtD7j(9p-vU9PgpL_W=p(l&;s2hlll% zg4hAGYQ!T1p|)o=*{QjwHI_RNZXVy&cnJge>iO6OyFmkvy5=VWWyK(2ekyPt-(lWs zPs`JFTYtQEoya9Yy;jxEH_rj5HihGfkg0x-u39dO*L26(K=Do9>nm0LIngpG2yh0n zVoWOaVsB`i)YDt)D+($ry135^Sk+We&q98UaI_8^u74;YH(o=_9kQHPzeca(BP1P3 z$9X#TBR3@>ZiIB@1* z5C8?c3)h^_bprR$RQ`;NMmzgI^j(J0k&w@W*rv&uCxHKJYZv4g^yOr7OJClJ;M$N~8^iaW{Vkk89jF8e8q zFMptXp^)5-I8Xpg)Ek-&Zwn)4ld+0xpwgC4Mhr2|o}7$U#PQCaJUM}qXHO71@%&rl zd#93!AH zW&y;m7f9m`@=|9*QWPww{0YYii6mYMp??!UO4gJKMC0W$AeiTp5M04@D!D7--pu2> zQq-IX&nzMKo_J_<_N1T)@@KwB=1y9QCkmL;fJ|tGGdCq90s+hC&)8HTfeoeb*eJzC z=!7{KTo%=k*hzhd``@YL2609TG3vtS^9kwfWW=&psJE)ojRQ1aVz?~=Yl?!{kbjQN zRZt-zO75|Nk~>_Lbyu^n3n=p81cE!J{KY+<6S{%=hjVP8kk*JHbh6S)YB8HX1s)0m z$afJ(aNCpvK9ik{R$AF0m1YsNf?!l0N0m0})o{*xP9rp@5nit1jIxGfgO{4H2%W%S z`YsHj>{MD&noLKd&1NHSbU99!BY#a%lu>rOln_JuQ%N05h>w){GfXi~rRqFnD}j&Z zem=07J1KKm3Elc+a~|KFN(xetI!5ZaAre9-FuF(OgyvKtw{U=n1+GQ*Zy8Z!$>xiRToo6=qcmzX1!%g~&UNF5B+tJU42PKwOf*rvKyKB>YIegc zeVRAVgTUxOw3ouzGwP)DV2a?rx2KH)*m0q|IGCc4WSO%hnH-R4NmuCf^b{%80$(Kb z7dPA}g>~Ar-hls5!N$LJ$$vVCHz2$O@D9IYcF7!0y9Ke)* zbdtZJ&K#})1@n#%OP0j|7Ag zsWb)@l4#x5*6ToQ=bAN@fX&Jm1A0|aP#Qy!%Y%Rm`iy^4e?}W@YAC@P%2tjSZ`v9^ z@XKx&s0Xp*v42ii=%#yTDpnih#zj>%jLY_k%O#S3kdPgX@L?(5)+t#NlW0O@3gR_Ody!4$QUZPVr z{2AnMY;Kt|?{@k>-ZPg3W|f0o(ME=BnTDgwnlVQFH-GN{6f~BNiH5{C>_5!jT$@l5 z*^b8EX=Paz8LXsd578px_Sfy{MM zvZ*0PE`Pwl2{^h;ic&JfBtr^fVg*I68X;6Aq>h1=%fvw-m^@Kb1ajr#u-`#0yjiaw zD(hD$zy)bgMVkDP>j`q<7Cmt~m=V$@a0uLG!q^j?3xh#ROcup3o+%yw}h7iMzhPh4quYh&%?G%jM%Qezi`Xdoic01=sZ1{X3Y>`a}t>1|?| z$d)I1#fV(ih`4N7oa>HDxHZZadSI8xkaf#qu8DKs1kt#RAljHWC>Js)&8^)=^2&bx zjYE#+SUB%==!%2{v1{SuARsC7GvvmJPk%hLNaGMeCzDM_BhT@(`?W8-aX510Fo`1) z(QL$u<8AR#GwL>|1AY(mTDo5$$l5Z7-hRkwwrC4z>_1#??OFcd-ZyaqLMhSq?t!bF z9jtxMgb5i^R*eYStqIGb0bHJCVMA7RRmcIcHzm+zQ#S$g^|3geV-Y%4k+rdkRDb1# zYRWAPnsGu$M+J&%vZ|?AwynvmOoV^BtQbZJ9-|^l)-@YjGT^eUP1tYy+cC0a)wVxh z8_jXQ(io>JK>Qkz)V#&Ui;1k6c7P38QB7ogX3Dx%HQAV$pF;)lr|(t$bH|XP&C^=P zn}f#d#ZX<}XQ<95yDMxfs90Oa_g}ar@>PzzI}C=~d+R$?kB?*_`@@#Jx*~s42dOfK zSPT7SBp@y&9&>dT06c(N9H1{uXYaj%#+m+BUFUzA3euqYjs3`|c zKjP?_b-5;6g6a^Bub1Cn5*B}58nU|n#5i>3ymf*#>C2QatwDR%En#QDPBUPojwCeN zzd4(8$M)G{BmCPMd=;af5K%3I`_14dEJ+q`^AKR`!W@fo?Q->Mr&z?A8Hu7*+apyV zViGtmS;YZFXK9SGmA{~%{6cIe>y+oh^YwflkSBG6W0P~X*s<`Jb@Q5r*^;qqytpCv?1k)m zjdy;Am*tGUHovduC7w_5IwL6#vn6&S52qwA46$6#rynwsUT4c$XSF=*C!O8y54I8*&f5tjsDVlRL+y za+1Po3x$3s@~%18oy8cM)(aO5(buQw;J-x&B89F||7^ zS7?|#yvTj;PXd3x`uHzN{m@D8`N~SA1Kuh^5IwJ>&?iV|&?B@BxVD0jAR~3w5n0pJ z3F@qAoD}lHGWa_hCr#R&Yof*TlkM_o>^W?y5P??$7;;yxZa`u|-J+v~{FMdr1D3${ z9>LhQ77G3%wJo{wBCz?WBJxTX;^e`yy0FEZs|Ze?j8=boyOXZ)PizC;M7dpIRZB^D z^z-f^L{i_W1?$!VH#8?fys4~F`%%Xl^~op^XczHp%#%?KEmW<0iNNHG%qCp*+)E(1c z1*Nlh_ZfdBA=P3mo8aXhWNS*7Fu@A zP!cAALrG<~$iiQKL+-%{ema2H;EC&$96Ut_G)_L<6b}hJgnpDiICr0Dd_$t_;OE97 z`<`Y}GXEXzeD9gKD{z;$V-h8QkDjzIo4@0PGxY14tV!Eq>JEF$=eQybsfJd$=-Kaz zR|V1it*}|+mb&&huy-FhkmXbAL`&wwWeU0DAztRs_X#Y~b638j?x!S;=g!;@e0pCb z!IFLkSk-Xw|CmnR?da`M0q$1a-uWGgoPhcy`}_6Grij0bSN7OoN)qCK&`*Jr(N7Ur zQ=X78X_$Q_upvEiNi}Vel3&*(a__5&UV8rt4#N6L;FGjS0d*D6P7uT!;(g*H)FL$} zQ3&y$7TjjRZ;6w|5sZ+TY=3|DTNm(mvEDtWz|ApY;`Becagg>9~U z!~E4<;zSqm zf0eMJxlU-Ql+6mZlIB4$1}GVY7{ctkm+0^-Vt(x->OG@E9)_i zHuG-bgr8TRiT=I#SxvrwYd6aS7k+k0Zo9Tp(Uo6rql%;8o=wbuGw5(Ua?9Z1DOzWI z!!`f*bL1e8gt4#@o<+gIB#Ky5`St4Ud$V@z5%b7=y?n_yU6L?n8{UqjvqP4Cg!r4L z1fBZ_|9;$(VHKO&eMi!F#Em0wU$=a>f_=QKEK1m907?%5qCvCv(mF2I8LVo+WeOpo zFm!IlK0BxSh#z`?l+JQ0e|`7*RpF_J&*GHN{GdyJic+%RLCKSH)?a>Ar-W81S@3-X zu)$*K)75&;_E#*IBwadDd^;-lGLPm#JRgNlMoBs%cWlU?jWUwnl63U;YIb>jC5N7b zDBO`jPo+=uz=>`oltN2IF@&c`ilUfFl)|DAFN?g`UBh{Q)!f{iv!kv>?ZNR2A$)Pg zWuBr3^wqEHm>!3I8fVm@{L>VvPsl zSCE{K$9QI`E!5ct4eYdKHE0MUnT)WGDDSd&MNivEvg2^G$h-72*GWB*_yxS&k76|j z+@$`_xg{chX-L^G8e2R%`xeyWxP#?%gD2yT#U8N+odM8!I@-e>_{NElVhx39b!noer$S2? z?6!tI{SC;96tA--3>4n2=b=yAe5*OZt2Q94tTaZN&}Adowhtx%r!bgJ6pO(f3%{)U z&QG|1WXtl<+-YGBWha@_&Hu7f9_5G7WAoSgAv+L?kF84X;On_Nv@JpBYet=?|B6BK zR^Y9FmTTMx=&P?#mIO@8dArGOyxovdj;*VM>Do9Ojx?z0#n$7qEK)PNnj$Dt@;B3VqKD z$Z=b%Z{6Umknv3#pU&*>+ie zV9koQ8C1EV(D3^0WeN>y{W}5c9dT`>TRd*36TI6cj=O5~x#DQ?fLS>^quMdpLF+{42 zB=$Kj_pkr^pa1W_{10^H`SeNqMqaMo9h=y8RcSjg*sET(@t?3A?=Z}kNBdOi|NWo; z_21EU(_dG${xGTCIJxIz%)NyDpZ|m|;^e+n%+kuMzOZvbH^1MmhVhL!Z4|{AA0Bd+|M-t+yQl`YScu59s}JirF}zQw#;?@Km#!)+RUp>EVHAtn zb8a0!04qBZHUoFX2=Z2VK~jP?5QR=RPU;m?JAOndaoBzd8Y6e*2N0Hj&W|TOMHwN} z@(5E#9rvb)g0nyy3-|*Pj|@v!)rmeH<&@d3OhyiGPdioHwi+!3c&=`O-R$1Gb0{nW z)-?uqYaW0%=6mUxIN!swg#N;~o=X2fLZ2QUtP{_nWV+39@D%+6+ixiIIH1gEY diff --git a/examples/server/webui/index.html b/examples/server/webui/index.html index d3893ea4e..882570c81 100644 --- a/examples/server/webui/index.html +++ b/examples/server/webui/index.html @@ -154,7 +154,6 @@ placeholder="Type a message (Shift+Enter to add a new line)" v-model="inputMsg" @keydown.enter.exact.prevent="sendMessage" - @keydown.enter.shift.exact.prevent="inputMsg += '\n'" :disabled="isGenerating" id="msg-input" dir="auto" From 21c84b5d2dc04050714567501bf78762bfa17846 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Mon, 3 Feb 2025 13:25:56 +0100 Subject: [PATCH 10/36] CUDA: fix Volta FlashAttention logic (#11615) --- ggml/src/ggml-cuda/fattn-wmma-f16.cu | 2 +- ggml/src/ggml-cuda/fattn.cu | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-cuda/fattn-wmma-f16.cu b/ggml/src/ggml-cuda/fattn-wmma-f16.cu index 1054ff95d..45702ad65 100644 --- a/ggml/src/ggml-cuda/fattn-wmma-f16.cu +++ b/ggml/src/ggml-cuda/fattn-wmma-f16.cu @@ -561,7 +561,7 @@ void ggml_cuda_flash_attn_ext_wmma_f16(ggml_backend_cuda_context & ctx, ggml_ten ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, float>(ctx, dst); break; // case 256: - // ggml_cuda_flash_attn_ext_wmma_f16_case<128, cols_per_block, float>(ctx, dst); + // ggml_cuda_flash_attn_ext_wmma_f16_case<256, cols_per_block, float>(ctx, dst); // break; default: GGML_ABORT("fatal error"); diff --git a/ggml/src/ggml-cuda/fattn.cu b/ggml/src/ggml-cuda/fattn.cu index b1e66d470..b0cf152f5 100644 --- a/ggml/src/ggml-cuda/fattn.cu +++ b/ggml/src/ggml-cuda/fattn.cu @@ -235,7 +235,7 @@ void ggml_cuda_flash_attn_ext(ggml_backend_cuda_context & ctx, ggml_tensor * dst return; } - if (!new_mma_available(cc)) { + if (!fp16_mma_available(cc)) { if (prec == GGML_PREC_DEFAULT) { if (Q->ne[1] <= 8) { ggml_cuda_flash_attn_ext_vec_f16(ctx, dst); @@ -265,6 +265,7 @@ void ggml_cuda_flash_attn_ext(ggml_backend_cuda_context & ctx, ggml_tensor * dst // The MMA implementation needs Turing or newer, use the old WMMA code for Volta: if (cc == GGML_CUDA_CC_VOLTA) { ggml_cuda_flash_attn_ext_wmma_f16(ctx, dst); + return; } ggml_cuda_flash_attn_ext_mma_f16(ctx, dst); From 8ec05832fa8409c49b3bbd13f957c6ae8486e618 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 3 Feb 2025 14:57:08 +0200 Subject: [PATCH 11/36] sync : ggml --- scripts/sync-ggml.last | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/sync-ggml.last b/scripts/sync-ggml.last index ddb9d817e..34f1cbf69 100644 --- a/scripts/sync-ggml.last +++ b/scripts/sync-ggml.last @@ -1 +1 @@ -32f0b85987396945afea2291d5f4c5862434292b +498e0ecd2c4f9379439fd413805af10e8e9ff349 From 5598f475be3e31430fbe17ebb85654ec90dc201e Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Mon, 3 Feb 2025 16:45:38 +0100 Subject: [PATCH 12/36] server : remove CPPHTTPLIB_NO_EXCEPTIONS define (#11622) This commit removes the CPPHTTPLIB_NO_EXCEPTIONS define from the server code. The motivation for this is that when using a debug build the server would crash when an exception was throws and terminate the server process, as it was unhandled. When CPPHTTPLIB_NO_EXCEPTIONS is set cpp_httplib will not call the exception handler, which would normally return a 500 error to the client. This caused tests to fail when using a debug build. Fixes: https://github.com/ggerganov/llama.cpp/issues/11613 --- examples/server/utils.hpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index fefdce55b..5f97df5fd 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -5,10 +5,6 @@ #include "llama.h" #include "common/base64.hpp" -#ifndef NDEBUG -// crash the server in debug mode, otherwise send an http 500 error -#define CPPHTTPLIB_NO_EXCEPTIONS 1 -#endif // increase max payload length to allow use of larger context size #define CPPHTTPLIB_FORM_URL_ENCODED_PAYLOAD_MAX_LENGTH 1048576 #include "httplib.h" From 1d1e6a90bcf485ad2dee309c31cf19bd802465e5 Mon Sep 17 00:00:00 2001 From: Woof Dog <197125663+woof-dog@users.noreply.github.com> Date: Mon, 3 Feb 2025 22:16:27 +0000 Subject: [PATCH 13/36] server : (webui) allow typing and submitting during llm response (#11626) --- examples/server/public/index.html.gz | Bin 1207129 -> 1207175 bytes examples/server/webui/index.html | 1 - examples/server/webui/src/main.js | 8 ++++++++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/examples/server/public/index.html.gz b/examples/server/public/index.html.gz index 3a2529aa2fb84485aa480d3d476e5ef27eab8e27..df3cb1bef96b805742b458d0abf6c9b16826fae8 100644 GIT binary patch delta 868279 zcmV(^K-It5)=7ugNq~d_gaWh!ZJU4OIm?T19nO083TqL$aVB(o2U~3qdfxoXybHd{ z(s`1bZZUjPV^WHc;NFJfZiClBTA=!+2QDo&-WIOsHtj{$Y2Gc%sjNQ`8pFohaGkdW zm59YJnsL;$T+i+5+M?DM&*AOnbR9P5^D*%=+K3yfX*n^*R+qyE*xC-3IAebsxFa)k zW{ZcTG%LI^Zf3HB9h+y3D;Fq+mffC}XJo@y*KRD%Oro~Fs9`u^(;)rlr56$Sh3uUj zfT_=x6^xZv{FmkS{HEu_>xyZ~!rXF>)_N=+UzpcULe#LGDNy;dVdhYvy2ig{SWuv5 z!4B?l)iw)vDdSwbW?@}7R-ny8=mmdP zWN3>94;~a~hrt$rX@Q)!%wL3`)|mgYsyDb^ufufskIU|N53j%E>&Jg@$>Z@wTs-!< z8-dgQF1&Ue)1dR!moHv+!V7DU2o9TGZq5DJIQ1}Et`Klsnkd3!*MCSJve{#Xy@K- zf3kdKcwNeu2j`WjIjjWBo5FcslP|l*)l*oP8aJxu`MS4m9YL*;eCPqwx}xuw3FPOM z8pT>nb?tTC8G@IxKQQwJiG`){E>~*rXC3awmK1C&?Tr^?VUBVu4ufx3;ob|e?5%Fj7_EFrBs}*}#k8OEk5ia1UOeTLe zD4F~y?xM{tl+yH_;`1Lp{18l~OO?i+sbs?^*=8yYz$xy+YAx13+X9SdX>JY|lNa$@ zQUx z+kn8B;i?dH%LK|nys)Y9&cP@I%J-|v2k2V1MKpkj?pExj zIJLKG`*xh<<2akNw{BmLwN8dj84=)b^|7j>_g!x7J7D`0l=^6Qt*w8@3zR@junn#i zgl*($!+l~l-@yi~t$f2AG+5waw`=y?V7HOA*Ah{f4JZBd>8rEyo-r|_)%j=hWttVo zW}e#n^5%cK?9TnVxVH0nV&>Mry85`iV|IEy`){{*u(|gaKm=+UG-s)|8Ay{n!1=!L zq<+V8g39jTYhEIwJvctKdA+ltg=~9i`I;juv+H&~vVu8(x_J@6=ESMXes)XrDkc2T z8HZR`g&}07faELG%mZh#W6dj0<7DP6Ev;-AO#6Q-)GxBDU_3K1AZ9~>U@a!0Ha?G& z1^V+Sd+d6v|7seI6brBvIdYaF4Q+w_LbrDic({g2yE~Zy*+92<(0OtMZ5l(U8<3O+ zB9-^de_nR$IfTH%tFVb~fNmF7kzKpOjZ39#Z_~qneNYeZIBaOfpH|&&X|a1KF1meE z61sop0RH^gd7xA(WeE+zxQDMM-_!4Zyu3WQoa8HFh z$23dl{nB-(?4+uFo5cGoi*&0l%_a%lF3Q2o;{3u)`ahT~Ez7LGS2FHfKQGfR^((&B zIE%;i?*OLt6YC;fFG;7<+!XzH$)va#1bBJ5U1z1Zg8<^a)+&8zc0TF~2n>P)%4L7> zI6$Je-9>&8Pttb-h-VPk;1jhbd+Kg2c3(G>t?JQim6h`wSyME(m#)MbiN|?S+m|Z~ zIc34c%2aUZN)jNq)eAYuJ!-YNStfD$Fv-$oHEchGc>Kl|H^x;JZVj-iYLB$4*cx(G zvpx2zsyT_O*lT09%G5zHbunWz6qI$`I9IZsXS z&1Ypn_3S+I!_F6Jkv87(pEXu7&@XR?g9YRtpSen#3)Y4xZdX?`kC*+*o4_xurQkM; zb+HEd?6tpa;Dh@5`COjsi|l{em*4~UzON~F3oO#zo|5I|Iv$O(!Mh8SneHo__Wi7P z-PneS8K>SF145T%@XcV77KND{ysyFff`k_|ykOx44=+S`A+PN^*nKL8WlP>QClOya zhu;2B*=_`ioOE%1UGHR+&L$>XgN0D|W;C%H9gw_Ux8UNJNImnDfM9>KHpI-=`8=M@ zYU`!&AJ+M56y~dCtuzvUXn+Aes7}4oVc`HM5@?G0@hmxC3_xuHuRw3tqqG38o<5=i zzPVaj`>j5LCmTiK+OB=Fm;nCbCd#)UYn;wz@iI4q^2gd;Bb^0Mm@bR+EL|lZD^Z{)GL!YxGLE(4lIGSDgfXX zuG1A{gfs&XX!y!i0tXz(3Ei3DPGI}?(t8wKmouaTC<3|OmtCX-7bp2#`>Q3$P0F$D zUw;6(icICJIfUN2E*+u3`m&!CX1*TH((xOJ(wCd013wK7&<5ouuN`nY=&+G8m*=Dd zD-8{8xE^??>q%KLaaMqrAEg5s3lQee%tvMtrq;}*mqVokE>NwE&U(G-n+`6sG%dnc z8_Tt`>n)sr8)w~qe;UtbP}Lui&_zmfrUNM%9CKT65X*Qn8g0RWOE9+u2QJ0j796-FbC;*415T=G~3Y8iXjEy467Ouo45&4N&-p|Q3rhs^hzT|`5yv=CDGL9lRD^I zuq6U9L7|RG<+DB6A&m!tqrG+@^(VWr)sx+~>ZumIau7hs?1M(!%0Ylcs}OvX2Z2jl zaL`J_GYAkB`N38i9w>+6Ru-N?fHXJEs}wgt2gFIls$`?}5*HYAGp6Mr@R_&jiML?> zgTSZSs%6@O^6?<>A+zflwP3)5z^BowWzmWJkkMVOeVx; zx^zWs5NL`xfh?K`jwclm9U;OZAv81?42;#^XDanr16(Is{CRf9C&Z?@gB+ zMY2Z0SBdGdw5n)jgaEN-S+!k2fY2@^B%$uMYwRFau?R_F`Td^X^N8~#W1tPKp~%eY z>Z)5=mX*TY&+*-l`&qA)1+SFdZI>&E_X}J1wQbUVi38#4CZhcsI2p;JJzvtu{rPXX zNajoNU5%G!W#ze_Kh@NDe`!7yi{aOwYCk<~7fJDR<+*rYmagkP%FEX!WA+G`M* zRR{op9K%naYWk;|8p?_Fl)qMg{iz6~_S2L0>rdVj@Kk$hKh<{s2cGDs+7tK$7Qbsh zRkyG?nol+TZ_mH{p&D!FeD{~#rFlDLFS*QbEBUgcU!%nR_tPI~f1bd@0MJiR(QPol zdHi?I)1!^u?G)YDPRPX(}iD&&7J z&V+L$f339tRAaB42n6v;e7ZOe{s>d_r#!UtEwa6#znyY1US3qoxh8$M{(dTo8|I^8 zhXXIObnDuE++d%4f6Ig9qb!gkGxILj^3IF;7T5CZ>GztSZ-MK|VXPO-wnTg0{u^!o zRRcG_d4|t#OFOm|+s(m>pRRrcz4N3fob~-x$b9a63POS3Uz2I+@@=D2HU$sci^ys}K z?kIiU-&S*0$PP(yYb~8yd!%>H5a{9zDe**#E9DMFY-{>8uG}w&1{?ND*REZGitQVO zUw}t8KKRRBZH!yYE%2PVb!8FXDq0pXlr%c7iRt$gf97SiKg$j&>vBr0<|6C${RWcf+}T&cwQFe2J4tPgLvtHgUny zDa$*4GfDODDqTJu$iF{GH&0-;XTs?eJilEVuA_wF_v>;kzpA+Ii)zWfsh>9~|Fuoa zH^R>^fB9B=F26?}?fDs^EuMYRBrwe~%XfPgl*sqtK>6iI-7`*VJ|53^j;rEyymDHX zw($1Z|32!Ej1+JjCf(Qmafw1)ze9$1q^*!)907I}?eqbjw^y6Df1PCROD-df1J~A`%IEK1Znu>M=7o;< zo({hbE8~Kxe=MB~n2P#$dI`_ZSKu%2;fLLHJStv-$Tn^NOA>@Tp=Yia`?A_=#vW4E z+mveqc@$OblU`OOvE?y!{rvnC1<47__WSwyDR%Vin4EvTtai!LS$dAAXw|fov2FK0 ze@8mc$t9P6sk8m1&h}$29J{Snu=OrB-pQ3%_qF}KdRHued0v6qN$d)ub%CL{5N|Jm z7hAV@x0di1-yf-9M|SR1kvF+i!xqy!Rph9)IIo;FC1gnu-o|uj)E4DArfsRy33+!Q zKHn&Qe_nt?Gmh0A3+GSYdo}H;(k_3ie<`;m8h63AwhEb@Shj!vh(BCzc4s_zrkwY* z*Ur0ad`WxnqjkJ0iTC2m%YXd+b+5{?MfCZYbJY3RX#`$-RbFKy0zXz1UzYN}{wQ^# zQ~7?*_{!*Fi(^_<1`F|D2bnb-eP*%8X9@_>Sx=3e+nE| zRz4S;`R)IopWjM-zi7u5;BVAH`uqewKR*$`r(M0L1NzqIz##UT(bCL$_Xbe^=+?cNp1)4(>;?0vI(=ML!UtV_l+C{?d?R=6};%XG>Uti~e?LY0e zZ2nO!sptLebE^qk)_l9A5qDdE_Lmb}r+K>u9BVFi{1?NH;^alOe{6TByYj`A4+)>Q zFn@obXbJJ}*P~JkZ};V1q3`O&zCA}~_YZB^6|^_oxL0w%-qKy}ttEN49sAbqpdKF1 z!#cm1T-o1M)h8yD`WXhYw&Um@6~!Sh7UIJ`8((PcxBm=w;=kt z(3<&s>9(>Pv^=>~fAOK_GmKjU)dvXkx&7l~#?dE)bY~2^Yp82KrS9_;fmUYOiT+X$ zC07f-%8`F%C7oz zp?dZ7@`U8~@rU%$c6Nr~E1Lho%JWi#4km*oIr)W{{Esaq-&=SWOMClDdeleJ4kq6W zkAfsY_~G)8wdptYr*UwRw51t;X%_sYS?~iK<&@k9e-y_5008HX03ak>Jt082dU^q& zaCNT0c8>t;kH)R4dhz>9B1fj4?7q&lsVdvlUFKHl`(eGhqgZ&0S}q3R^b%ZR73}uT zIm@V^>p4+A`a{h5WIv!I&(Ck?CL`NnQ{?EyOp`>9a=qWe2BqN1-?tf3)ZfX_had>z z$V~c=e_HiKxz6d^)n4aUnDahA(zlxf^HNmy5?fmKsF~LJ^gilh@6OF=*XJ`0zwK>( zd#k?cPv-}F$@yXY&~x8DS6_P9-hes74to871Fusq>K}_`TE?`sbj*8`pAvZobU*nS zMl{|0Tx!n~0(|f};zB-HrCs)5{Z-J1KnH zri81f%C@?42@rh+uaqfcE+!EUS5I)b`n>zMoPX}GVZY@6>7{OBhpztb{aLWpjLK=X z9O?4gJr`2oH4h(u{xx+?S?a2q)Sq8LYS-nbolxqI_+?9C4!Ue%q#Jq^zeKv0s=D^# ze+9+9c=CgnZIu{Vruo z2(DrWN`{8VH(>F!Dybip91wqgdL^(EeUo>TivDBjL}eA0w^Hc~N*wOW`g)o~vU|6z zf1*G2G1Z!#%f0W{E35s7^vY@<(kr{Ae?IilzW=@&As4-fbv5v{@9#%^QgQsf{dfo6 zWj|y!XEl9iC*+e_RA*$w-YG}`|6 zcH>=ym)*$Z$hUp-JA0CyE`;p67uLH7FMFa#Q4oD+KlJFNT0Q#Se!PotN9wl(e|g8o z*gx@J;$-dWUka6{gPPOMcU;bcxnj(9N-}=~z0~`zAFmzG4{GY(A@mQ>cYm)udMxuB zc)q*e`HPD#xVyw`RifX{e1Gn|Yu>k~QU2@N)urUC`eVi@-@r5c*xUJD z1>fqw{LXv)!k6ZaOZP`k@AE_V!yvHY=4 zU*3K{(B<^3&*_KiKEJO%Ua@^fA#MLd6<@hwK3;crHs0CC2K?eq_;}5Ff25vW-QKJD z1HPvJJfG9K%>-YwIdV84Jh(AAng|7gBuK@J?H3=>=^1J=M?>pXS>CU#5gPrDo_e<(qJlvNW zaoFf5cir3<%@t+3A1VrXfA~7HM_M>a+_!tTOg!d_UyeNK z0w%W`xnOt~t21YLmpI|Gts0Z3pMh1wR!Ell#P-_l0j)Soj7Z{mVC0^2>khJznloiB)hAQa$het{jC}+rnqxB|Y(Lw&DMcQOfUCaI&l)9!vP0 zvHY@)f5TA(Ot1pCh~joSx;IOFdACH#AKpcgw-0k=OTZW7d?jxy_q*HmyS={K;ro64 zuj=MM-ro#+_#?A2ClRu*KmtIN=bHv<{#oILp8il#d zd9du~r@ueFfX`pc&SZ*{*^UEu%Uam}(!ajCe}SC87Ot9sT=Po*iyVi#nj1tXcJKv! zsy>0ImWY?Sm?wH$7a?AE-Uv=U|0BU&fB8^6Jh~KO4uX#{=JMZL%iv#s`UKSg0Ql)? zY3msktbYBeu&0He@-mMEqGR%Ea?ulGFqx57CuxSPI ze-jixZG{t0wr|+Jo#>yi`U%b_)s9qG|AbN7<`=46svLeP=I#n!DVzZY<;e z1OrErMY$=TFe;x@2q|~s%kJJ(LvxYIa}Iw4MPBPCjM^dCHC2;iOO7IW{RMae5#X0n z@{VW%pRhWNy&^ZjVCV+i3y6Ng>O=ttf6<4Ae*ghO?0Qq9KSAt%IzQED^GVHhCV%~@ zJPW&7-_FI8y887e5c=s!U2p%5e*KAfMSj`^Cd~KbezCnAq8)A#e1h2BeC|KZ!>=Md zA@gV~#lIH%Ukm*YzR(X;?Pb@3x4(}1zjnZ1NBv(5{jY`or#G^Z)6^zTJW}~)otrV1(`d^o;FRygB`aUDr zD+q6w!x8?z#QL>X`}~Tc#oxW$`XbobC#(+PuL$;W)W8;bt#%c)^z#cEu5$EO_TO0p zw(GT8TCMvPdAelt+4-qhrtrxsXD#A;-v@gt&(u%XmikAK9i!?IJ5y6e?_r;4;IeBmwqOnJK;{ADX|ysyq(W`E2S z#sTj+2|reFTP6GB)aFO(z+WHqof_@84E;5i`n%b<7{jf);&XW(fZPMG>ZIXIXa&Ch z^tzj|xBLE4;rx1Ap3dCy@Bx()1zDU&bvyljNrE7Ep!>9o<8*0+w8L1q$S-^c?-HVe z*TFq_@qer>%6!?s zN^a=hmc$QR@Z5>#sT%$f?b9*J#9f!;m*sYj$}cmv6rz-VML!+ba~J-!IiPMEE}g(^9VS1$ z+yOo@2R^Kw+#L82m*s1@NPidwW~9gQ%U-$bmTmCr%L|yh?Am#tzvXeEl=(Z^)oxpG z@L5oNUUOLRpE6?Ka^AQ8&kwu0Xw?VI*N?*t6b{95m)P(7m&%Z)p8T!M0}Z=atvKsqPO#6(;n*&zzlv1%KFY!Q&e(cI_fs zjg@-J+sobyOUXGN@PvM_ojb7ncDaK)#z!cvq-%}Jm6W%If2RhWk4u#Le!kXoGNd|h zS00Ro(&@K`Z+I{g@1b#}@%gW4&$UH#`L4PXd*soE?xQB=1f%3;{x=G$@Ll0R_P6J9 z8%pc#hUBgYc<_=lrGMSRENi2Gc>)1od-L9d~Ep4V^SM1_2 z>~fEFZ?(V=7mJhnpP!4`E@7@N(?83ScyCr8ojT;dbLviLb&_)kE*y51SL-VX<#FoH zJak>pwL+ z`ZY@2e?RR*Qr(Om1blj`6j%Hkv4ghx=#~8PVHP{OtLL`I*I%(;Km_=7!@t#+oF;!g zh6?&l8__+QKPm2McNmD{@pkCcWzt_~-u}$H7`JOj+QmrTEA(G*@?GiFd;NL0EC2DA zVBiBve;GoMr~Ti3sGt+q{-1NNl+VwfcaH7PQI#PgeH$iQHzQsDoZpmlso*=+?0}9z ziSpWV2}A+6y+l`^fKNr*^W_CQrw;dPxCnbx)KKQU+h* zjl$Nif-4`~Dtx{M`}eybCDi-ZgPun<&2@?QOC0#6l#~BGPv4e4f!TS2I4x0vJ@FKA zySK-dUOJJu(c60^L1b_82;<*0yCvvvn%~>A_$|G? zJ^%0b_|X|Y>G22W`1XpvH~ZJ;@ck8?>$oI#c*mFYiR+Pf zUAbH}?(&ugzB+<{gJ=RNNhZog;e||l* zEjTpg+F`wwCgg>RGq<<_F7ct>y_?8~s=A8mUfmxfhUZS|c1#6L zH{{e!-f=W`d4zq)ZBE|3_r2wu@1WntGtY(kX#jEtE8FhXtJ5ohe->K9M=it> z#PjVt%uXx$z*hasr1H;bQh6}Szik5fVT+irXG`evGG0=3KT#9eipWm1xv$>&pLcv5 zV6UeFoU(Z?-UG)Y87lNXPR%+ZPj=7vSvyVpgQ@X$3%y?F$Nh5Rv~~x2Lz;DRK%7dd zE{}nIKMNUB-iO`5?UG9=e}^6)o(B1aec%0q?BjCkF~o~+0G}QJ@4a=af6wlf+^;b@e-A;jN}2n#udn;# z>+(SR`ntbDT!|F&x@APaoiV2K;R|2jw<5|;!PMWpfWR>r2#_Zr#{{HE_}4$dfBm~0 zF~oZT-}ZhhpsjGgTfM+`Ik;~udc4b_VD%RYX6^`c|(=%{-8_0>y`8HL}CL-Iwhid6I@ z^;jA&r$hVH@+j^%Rqix6rwebVm&&iNxxnj?n16hI-JT6+auMqV*vnpk+%NslS6fL__JTnBCr_=YID0|ieG0luDK#Z~!FLVWmf5!>dwO}o zi&kXYs><@LY~Qd$;xhaCTF~;mwj)vB)o4aR_EyLf^It_goq`GVvSa>T1QaUD9)OOP zmQj%HGS$F6f6r6{H}(9luQ_Wqe%U?=-ir4@!EKFquY|9!2Xp6~XLK-r-rs!+tVPzb zlzRM-hMMhf8x(i;5KOQt}^HwD|<*$>AbSq zH&#|Vt*mypvV29H_|N(J8s_BUL>LWTilVm%W4JCAJy3xwF^jhPNBi}B@q*rE7pTj* zMZzuoAyrkynNAyj`g_aNl_5Gax33S&bLB)@9m*W{I2*aE*HO2x+q!{Q36-n z7S6VLe;5GlvbOJxlT7%K8S9>X0gL9@MGJ}rauWFDhq~w3uZ|+Eh_?Qr5bI|V*59cl z-`i0^8P8w6g?o@g_Se@P;0sW)g*+s}$JAH&n2eT^(XX%Dr}EC+^I)Wui<5Gde&9s6 z;}v}Vyvxj0LSD{wKVHF|IB2KyQH+MJogzH4f5J+IF7aM3R@QGha_#cHwY}b!?F*iE zDmka+?iJYPJ?{sl*kNY_lly6SLV%)`(0TVE7($1v;PRnAFll-yY8357&$4`A4d9HnQ(`Z zUUgET*zw<@^qz0P)YPs|H z?JVH+{u;d6K9CO~Gz*)FTNsbn_T_ULk-NgV4YMu3b1eZGwOS1;jqI2I+d%XGf8Imz z5~oU>oT7ObMh!eKzhCU2Pz2Sn_n{npoduEh`P~*3_9Q{{lE~(}g*`oyKZsuo)JHJK z+gIGX<#J7r{YU^Hf)dczM?h>PXAte+aG+Mh@OR;S+0)-`Paa?J-9moqDSGtyRM_ti z>mm)32U@3lVIg)*WOLtp1w#-3fBgDzV+!xG9~|;H$q%`An-}S(z6#$Tm$jz^?z_d7 zxpRu#gW^)QhVfqcGpb1t#Cs5TDB4Lw-XWOfvX9ZrGQ@3idD@1X)UsTxd=TNd ztgHIh2Ov&prHT-W3cd$+e~VO>%iN-rCwOiW%BwSAzdM{a$>d4Pu24w>z%}^YC3Brf zJ~q_1X=J(7Z4!Bc>n4RPm%2ypu8zl}0Jq5F)uAdcN(E6g6#x3bE_xXKDQ$dgD(;cS zlYzNK8BYh}gfKp~kZ#k((=+Wz7FAXK`uOQ{N)jI%_H&A;=@^KifA5|kX9N*LRhR(Y zgZdafJljUs$l;!&5k|V9NAU^#m#2cDKEGW-os+>IN;9_zKSVRPAm9GtEWtne;4FhY z{N5}B-63x$&3T7-mdidyJj)QbiRWn>ZW7OOvBzGT=MWD*G|OPOh-bOXE#i5C=O*#I zIs^8*!+Dc;Trc$Q1uCY~p_ZW7OOse82W>UcZ~aEo|e9jeEk zmgl?ZVf2qaEX%<6i08?`+#;T*gYn?2^0WoFi0A3C_Q;#^#sT!`lk(&=diX_oa)>^Um~HA! zjpMKXtJs=`tm<#OU!Lu*zqI6$KKZ6U>MLZ zxvB*Uf7Sqk>zE`(=6^USCP}#XS()YE;D{s{^4=fHeG&8V-8h?#U&qVOPxidQZv_D744I8ob^Cx&4iPePajw^3M69@W4pww!@I!_o-L;f)G4a8NSgxT;(c zyo4cE!Bo+PtU$266;&Niy|z=wYxM;Kv&?GJU1%6w6*@D@%P{edtbZS)$!shKb#^$^g(E~@4 z*9=8k^&!Jc{Dg6MJ#%^9ZqcN@UbR^&liYdWPZQL}dTFo#BN^0Wcd1Ogv{6+Eu$t)V zWHGOX3t6pz5*-*_-Vp{oHO0^{S>UOoZnRBic4%=j2?DL-aD!RRNKk*>T4X%d(%Do8 zf2+fWn8>|skzpf~MG{|Y(t$*qWVM1qi%!jy0^XE*vPVET;u2G1%6ekhER$jhGJWRCz70k zRMrz-p2)pgQ;S86Je^DC)BU{AK9k1$DoUCZFtI=LRQkxqYU*S=; z9df3<@wHKzzy#Vt~6WAu{6dq|~fPnLY0t|1uK1Uxyg6bYEH0!asJd@;yKJ;R&+ zrsCAasDiHyki{mb?2o*}^nmWbM0GSVs*A`QXu8ksl;R2EcRg#Ekqkw5LDy%!zWzw%?op@CFgf0yiyT&wFF8^*_2x7md7W-weTYY%4;&e8{Ssbvog zbDqvw}X-GwX^6_CR(cyFnC7DTHOznkG4kCaDGA zt<&Dj;+w44Z_TA{*fueF8qnrKi5JaUiypPTs5UUIE;H$9ize-`_~n7LGas zGt|i@4W}{Lt49O_x98wUz^YRP>k3?lRoZj~>klcgwiZwjG_+QKmV#BqBv5zhlFZOy z-4?-!y|6PMubjBPs7wPe*q=e>5xyRLZ{u# zsHP-20mWn>~wweAv) zC8K5NOr}@)N>x`ODAJP#h=N)?-K+qPs!prRCeSoC1KH3P;n1#9meDpN+JZZ^HiBlE z$WZn&A|OIt_EOJW>ar}$i)P4kEo!u3s@kUR4>B_B2yjYSf1|p*s56T`ie)s^6~rmG zgjNtNh(77jR2{>z4z!uYTCchxytde-gp}+|2Yr1?wtE#}Ic!HbD~Cp?t%%0D17m!M zrkxJwNVe73_$l4zc?MMm-V5cBxK~e<1@?ghhR0PbPeibyOxX>*iz} z>&ZA14Y{SSAa4k?xTMFsV|6jryv2}_W-~4}D|8lw^1SDeE;N7_PH5Lg6H-c=_M~Ah zn>b(xHDO`_b9*r#^xyymxOuxfXhH1AsII$gVno=OPq>h~p`LLl3+t`?!%Rbq!DKXPB=02(| zd(AYiE*hM{GFzE(GX@lTEjj6c!?Y#vf+2^)I&ZshNI@YPuhoe%LfJuWzJ$zlC9i@i z1;A=QL()F5!F*yf!I^4TAe*{TpP)0owH9aWyw1l%FzsYc5Fpvesf%qel!4*I<*QA* ze^;}cawDB>Tr(Dc7B7Z%*6VU?bpX{nUL?$P5J32VHS1|O0BkyCD?x8r>Csa<8MLJ; zHEoRoC+Ue1Ka$1`h9-0t1=_8~m>qLMy&A83G411KN)IPZXw40*fl1dnK5b6$IX}eB z`3hkTf8DS~JY$9~Cr;L;*PIEG*P)_`e>7msNg6gMH9u>au1_Pi4yRzfI%?CSPGbX5 zYt60n*D}@BVIgW_SsKP(+DV79z&l(6mOAE&rL0z+#XJ3MRh^S`j4YUe)Lt4vLP;T) zP}*=9qpqb&LPv}iOIlKeH0dP`frQLCvgVvbaq$GM%tt{}Y?>%NNPPuc#=6_kf7!tI>8NrB)k`7b>dP z)b%KUIn$sNqAv(lDh-!Y!Lny;e`jRXe1{a`8By~a>IjfFaxg>|Ixes#I-9stAJ<2= z1wmjoL$stFCWb->eZKFb6{6MdtdZVi<)ymr5hybV=R7)CcL!QZ3@42)i!CR9zzr2@ z=*p8Rm;fo)+h|FZZm-&rWRuL;tGB#ZjOoQfw`XIJ8qVPfBtih#Lj94ze{^-MwK79f zLS%Ec^wvwRDyvAyQXM~_>L!G2I+8GKcEwa-EAAr8Y^b(s&wH-KX(Mzw6{gMQ1~pv{ zQ-g2_Vr@MqG!i$JCr6JO-LoOp>*9u?9KbGK3&#V z8KEu}ay95C6=Gvja~`XYf5Ro|b?hONMG506S|6jM73(cP*RnH|<)h)c>fnh86vdFOMW`HQrO2BQJuX}!nB_X3XBgID6vl`=G#4sC~qf7=FQ5{)L;Nf3cv z24G^_-Qh5lyX30vS!)N&dP1FYKwIFWNk%QmHHpNrla3P7bZlWcn$guZFld1b(wYn; zgkO*=x+OUES;)DbiI^+DOU}p^H-wsutxZWbs1NudiklMF;(&R)#ck=KDO|=wUCA3ye+a7YtrV@anx$^ z-I%ZGNHZphjBbz}wH261X02_F(gsLVaT@7BKq#M@x~$XCnVP7E)q2ZNAcCJja5y!l zRlbWL9R#q)R$p6m^cqItn^nNJ@lHkTxR$qWkJ^Bfph}yse*hhafHq4;TX5z;O*?kf z>Sq#QVyliikXm&pU?@nF0?279z1W6_VjK(yEf{r8Ja(9D zrZIS1UrZW)0Ihufv-Bxez*bT#$fH~WSfW9#yMzaAHXsd|xv0%{;t0O@1{n0WVU}?Pp zbwNu3kCP{F9Q^kP-^|Ds#b@{D8NQb z$$*Gq)D!0$V&lWigcxaJz-=~5X*1^RAnsWqP^ZbnMU1f$8)K!f50Hi5^GI#nT1Vq} zCQ%b>xn9LGFVNCjV@xYefC1U7MA}*i00dCgI$3I7s|;V=8aPM7JdYu{r2t zV~ZL{e;F~t2%OhMpIdkBIwg=~Y)f6+-vo1xw@e#gD5jfMMjQ>$gLcRDwBf90TduDl zIt0)8We{{JoaT8G4>|+Vpowx=d(`Mnj83ON z8_@9}bEv7k(L;`IQp|{@1!S}72VhHtEwGB9f8K&&=QT{SJZwyMnsrO(s4-1EG+y;=tzCN5RvmymhmZY^C=JHLXg)U7iO{kXd&-bP6y=!QrP;E5ni*)c$ou}4ovR5#(qX^gjlRGb-p%mS9`K|BIMEz&fx zRi8FeDIH^0)m@{+tX-+PgFZm}v-#Mue~BSB3PH2W@Sp$)mIAK2VuO?<$Z9XDo2a=k zC#ER%bg98`+(I=cc5O45YBjpv0U~IElQTP8%k4JV?ua7>pNT?BRoXorrxrtcl4WGt zVx@s2uL*MGO+`L|%=T~|U?w+Tih~wOc89Y*v}l+@$CsR>H(RWi@jzS+907+!emhxFlU02GvG=Ahcs08@1z! z;}JBm@e_xufz+7eYeKyxuUx<1e`&%?K8`G01H0)AXId&x#Yvr7^qUx_wi%MF1wdD+ z(^Y_4qx^7Y;(*xesPcFPa^A2-TG?7r)A0r#8TDapD8M!^G&+!7?bQcP7p_Pc1kHL9 zm;fMHcGaO8j^Z)kcBY|zzi6W3PMl!oej+}A4$jqS!jZMjJr0b^S<4gy5H=}$d}#x|U~ z&d3EMxwT{pLg}E6_OvBRah=4P6t&gsUg zW4WftRePN-Z}%(6x+6*g)-+=;YuJ_bRM>EpO5f&Wv!}Egx;cpHNkwzv3Wke4tHKDn zRrmNM!Y%cfv87SP0C-mR>%zQ^4SRGKwnVQ1jyX=}SwWb%a9CyMG}^${^?xbRCv@8L~235`olLV-( z^%=6M<2cWP*2bGs{V_VRIyi0_Z8+NaOj7Toj3g37MyZwIf)_+;VhAH@70bOK=$o>z z9J;NIJ!fX^Iku8wj|TLCCV#j5L{~!}3dNN__BS?V!>(sdxwc~QMkSrs6Qia@kYl@{ zJ)d|>V6isvNxe7k)YJBWh44kVZp$7#ukm`eu?CHaMRcJBH*8`9E7gXn$|)-T(CJbNr&+ZGZC)Eb=3ggy!ak z6_R%7d9G?+yh*zz-)(sfk#y`))j|MJ zQ*}%i;w8h?ds9Y7P>< z7R_m8hILj`zcZS22GiAKYR#%RYSOx_QB|ci=9rnbatsi}8!XLegx3WnS6OBQo^h9s zEG$>cSCI*t*mi#H)(6CV4tg4a+!?6p<8@IJo zOu{@(Y=p2cIzd&W1A(hAyc`PRaF~}c zi!3JMT3|TOn19jeq^{Y>j3yHZ@HmR{&=$FI$j|=9= ztU6|4(d*FEM7FZtP>I^TQFpoq2CyNZeaURO8&-*1z+k#;4s~a=f>De1W15>Vyfh)! zZJ4tgoq(FatE#Y;BiJ5y`g6s?V~>a91&79Zr)^j=H-D@pJ)tgIWIUKj)AtVdNXC|$z%Pnc8 zO*fXr28k63pX+0Rqcb{`{8+4Le!qb-;H;l{Hi4K>do*$viZ!hgf-rJ8Y(XqcwjE*2 za8wb+%ztXAwr^=mw+^6L5AGAfl4(TKe#?}YNaGBuj!gYPS0curRyc{>V1vP^QYXwo zwsC89HpYEv;fHt)pcYVW+jo=#3dk%*oHzjytE$GDZmso3NqGg8dU^+N%t)& zU`)NIjFC|fiB<_!T}B(2RK#Q+2}_I9f~vf90)M#GMon_*W)pP06u`d9Z&Z>B`VPge z=}2y_Q8W!4EQn~99oU=Ii0F$Al^TLt29J3Lu19@cp3Z7rcEm^xide8=Wh8(NR_<5U z{j@dl_+ZvEJO@>Vs}<3$SBEuo4j@KIX>?4OLJR8z@VMHMLTc8r0U}tEb5d-!eSW}3 ziht$00J$~?T%ghN7-vWL*dI11iyK9R)sxiT6rSr9Qdkoz1UKr8goq7b7Mp>Xam{KI zu_f6BM)6pJg$G(d?LF9o1wU5l<;)G#ZVZ00~_Ja(QsmnzT%T zVpCNi;L057TKzP1cyy!~!%c=eOMs4zg((7%fekTJg%%qR+9YbVIjy(6!7M@-^M4`R zGJ0cO9X3J0&s?}WH+!LDW=_;>+kqW(g9T-@aeX4tZGII9BUI=`cpA*sjkd_wK(!HT zoKqc!lg!VW*{Va4&5oQbhqkV6>Snub#Xy+)eWF8f9h)5yZWTcK1Rd0>3Mfy>wXjsz zYCILk%#v?!dLqVpUBjJKM4-|jI)7Xs&RdnG(IGIL2}fRa)Sk(V)Ni{r1g2N(AZc;y z*wcsUAgnagC9!CtPS(mO6bJlywK_&b0Sy^*h4clGH!(gUXl*tWNSN`tr)xGLQbdQrtQK|P7t0e`5h@H#WC z;2J445=v3$fXy#7u%*x=6n9x+fZ*9+?n;6)Bib#)^ZO~&7lT0xc587)1#QDZ7WG!T zXbtL&i8GR2HJQYpi-V324(-Muo`+##E#Rgypnc3^C&&~;T5%LlQfIKPWbj~ITPn1z z2%gsD6vUG7w9X+MAFvyD*nbd(#e7~TXA{mdny{;{`ja7|NNGoC`6xCD4SA@>^W_p5 zv}6jxa7oo+veo8NLNu$SH6OHedOYxH5Tg~!t(g(jPv|)>tmiPli37q{v5tUuRk1Z7 zR)mr*n5GD|=6!wLnRn<~R;QZmLJ|R&ucIB5Z^Z+)U-6x8AFeLL4S%VKbvGuxkuSJQ zRvS#JgAqC~p2PkK{wq#FU2++4pX45jzq?)GoY&=5xz;Frdg#Uy9ESm zy}A@wu3xWvKx~_s(QmD3nB-wTN$Va6+P-g$YY<>M;Tl~U6ygJOTW-0t3>vEWtesiR7OCmzp9-ZbpDS)&Cv)S3+XwRT7C2V+S~ z+Ras?HlKFERi(NN7pcD9_+CAlP~EgSq+2$d&HbQ)U>F2PLu_em47lM!4XP)9*=ZVu56CR_7Us<0q}U6;hpUIA=t2!^|_yui#n~jT$Xj8*cQj zHki~!yy=Hial(#53)byPY|$PN5w_&C`7G_U@L?dU87=gol!fb7w>z2|sw^d%qLNat z?V@6R&QY|aL~Wk!nfhFrrvlvsB~M8@Fv%=red1J;0DotNYHhkfJL_>US~EioRF#z+ zi!HrAn`HALJ0^QUzcwPURi3RwfQN2B?&%gyD+BGQq?9D&;= z7mw#8;NywM_FAj*X=y;JDjrV2Z5s9DP0bS-W-vAN)g+E4t?I;1hKtasvt0g~iRrPRv@o`60%5JKkGhNH!e%3V8F@y4>wm+f?m#FO*QjYjoQu4))JbQl4q|Pt zWVTx2T}X!}O>(^P6>@GzLh6tK(qs&r$&`lVU~+%L8jXfJo+1dSHytF}uzF3MENNNo zv}dYmkgI-}r2y`AY+TnDt0A1ykfIExN;8~9&UmyY6{fnF(Y!E4d>S32A+AeMmv8Zo z0e>}sdON1u&eEc$(QI6AHC>17CbOz-2vd`HBpT&SuuJIDoL}m*IX|2E223q`%$%kt z%w)A(5|dbqJ6?+(uRA>G^@l);2tyE^1T5ho+CsABwI>BF z%SkinFJL0A_AGcG-ov(4zU$`MAL|#0}2&pevv0LpoOb-SZ5F2FRAPl<0Dr7MZ zG}OnPa6O)rDvFtPtH-y9w$+@bI$4wZMpGnI!iGl>8+aHucO4_#teUg&8ru+>i&xSR zsSK9wCdY`4R@*n%Lv!A2umnaiV1EcC>xwt11MODZYPwv#XJ&14B#p3nrIuAC3YBbC zpf{uz^n%7Bq#c+H;RcI`6_lLz`B@0|!y#B%3M*M3d9F@$0%pl6K)Y^rosncYK)s6q z{H&QI;}H+dSY)-5H}%$Lg@F>?;=~>*F*qMgn@9^$)>E9H>uwKeS<_Xltbg@!7)&xZ zfC+Llml(XQjMpKbl5@!}AF|0*<##a`k3slE#5Pvbdz1noR=9I*)j0T7R5p@IWi|KG+`v_43a7XsH_PUsEqjaQmxrg7Nw+%D7A zqOR)2)ltKt*My_==^BSe>zY9$aVOgNT|XN`e!AQcv!Gug$zTEk4S%dzqbCvo8jG$^ zSv^@WTJEyCK~Z|W7|C;X2Fxb{>1#oUFyJABbOw_?4)pwvqmTSRY~W@E?Caf39t#LJ z_o{GhRsq`}8>4{U05}|!NKu*NSUXDa!NhKgASnx!6OX5xwcn52N~mCcWnz-3kp(Fn z;A3eTtGz*|liFxwxPP*H369u;8^$WzUrJ`D0*>5N%uyi=P2%5Wtu-Eo45KqJfI6|c*9>e6Ht z7|z!WJJzA)CJ^n&bB6wS0#*8r$pGrf+M?yfpxOti&B#RDc7M}%J1Qi`Tovrh=YT@f zbiFbdhm%g&XvG`F?lsyQq2lTMX39iaE$Yn>L1d9gvvEpP)Pv2;fCT!o!dQOl{v9VZlIWsL-p3Xgoo^gYAT8})#g%@(pB zvx^}B4CPjDHGc{x;@m~R&`se`Z^fImXD_9-q_E57sM}6NX%RQZU~@Wv7o%q15nE|T z_lB_7`u2yV&AuqdVNC5D7?xU?mTGImpg&OPe(j?V&(uKVy*lN#wx6eVF z@eB->g6AxK0`l=?CdP)L5FKzOZ5m!O>OoUUnu;T5NPoGiHd62b!O5P|nI>7p_0?sq z~0x&CFg8a03SuRRE%8vr;Ff zg(W=!A5H*&|W$Hn8dv5dtcfx0@ zBqDoGWjd}`C-!8q;w(v80xNdIF>cq7UAQkJ6=^w1lvqpYwKb3k)=p@jRD-!xU&IXI zLiC~F$P6PAPpSjJTzU{a{Fkh9l4` zP=VUirn%G|*&rEn$OM#}tkr~>EJfJ0Rt6r`RTk_nJ&((%Za7(iXX?BGUU7}B)c4;s0B$d@B<-!YA284gbm zuuO?Q;YS9VqL|ERuc(mR+ATv$=6AytJN$})q}yjPT#`PIZgTFJS5u)CfnT#M^?#M! zb_Y?tRii0dxC+7WYkdsoQ3u;mU3u~8Vm2&ID`fEtX{S~&7UAGY#iMUA%K#tLgY7YD zjrJT=-nBmvKY5>p;%%SbU7yvJIe+;j`L*ei zWip%<>>y816W7zsb_K}keK*a`hvOMfa1S)3>!9iO^SDaa27zQa#PE4$5figl5mc7b zE-2v*u-<#fb-=z?I*yW3I{?rp3YujKVPVSE+fwEx-a!?QlsaLO^B{SW_oT`(Fs~kX zJ3jXT$~L#PaV_kY7xL>`Dt})~i01Sa4&LY8$9?*cBite%`_V$9(po?bWdonPevc_T zU7#+l6s6Ax)RzgQ_2rR6rAt@^iB8k;8;*4y2_#_o87q^XDl66>0)j@dQ>fLix6h+#)RevuLG3jqFThKB1 zrgZ^?)Y*ew_ksoaWGBMzp0NG^G*wKP$Hss*rD`@wvO86p=EFJvQb_{fwd}nRABv(} zm3f_l%DhWK4I@$4@@GKyp z&&(0w!CB`F_mOCGo_}|ZA~-=AX;&R-AB^9eVlIYZD~lPNH-*B)HzRpr zR5w7N_2(dK`G1xrTMKm@$JC-V_qu>2>$|r|4d1vhBD@4=L9W*tZw^G#gJCXjP|^=e zar+CzwVd?OGnU5xAD5BV6XE96;AHen zxSb(C$#BAZr`RG{zIMT4ZLVEDH`n6zu+#l6kGeEAmVboz6aY&li{JjEy>Z z;F~I2qLs|?bnTkZP4UR-N$FZBT@_~40z{rL&1qmg^htkZq3wm7`=zs=Kb_j9XC0Ax z@mi=#@qZ)-r;iR>^Z?uigsC~i5tRBQ?fy=8rk^shv3G0K?LGx=F?q(Vi91Kl;dPYL z`iMw9RJulV@^B_cD$bCna(J~sR|gjqccU1?P@rQ;Ity$CA)Ru_(t`+%I*IirupGey z(TYCXX7trb5cj?F1PE23W4s(~at=!_Ax;C_w7!Wtgc0m`EIhhaRw z$o4EPH@ChY@|H?S;6XXwN@fQBCV+KQun1m*gwq*I&NB*^0b`-q5hDJKjs<_iFGC^K zteuI8>12-aqzGJV(6=ApAvx1XIxx>h_n@N5&22pT{c%N~*^Ht)zeR9=v~_H|360cR zu7BFaj1}9JWl2!0E8w0#m!dj7r@5AJ?uXda(1G*Y;#zyU5`5&%c=K-0DXPcfib$6W zk%?gh8=$pO+JG*}Vx6@^w?}|C$-kO_7a2ok2DKs_c_5bYyYcX*TbFZ~cz3K5P zsxSsc8yhn{?vzL^%UpLqq%ukT)G^!hOkL}HSQrCvI%ih4Q4K?^Ka!VmSE1+(?tk`N zAc$+TMKvrw%iER){Z69y7eBH1-z2B10pGtG1e+qm z^vZqIF@1F%gbG`|B*az)vZ+n^ewZDmm4GApE62&zscCDRN#I0mAuP`?ZLk0`Cs0?c z%G#8TU*RqmByPJEa`y%{A%MV%U`FogGfuFF7^6PHIWkayB>=|ewfoT zrO1f&oF7^}NL4&c(?BxY-~=J%xglU#;>;Faz%=kelji;lD#bbvm!hBeGJo!ZkC#(7 zH*^A3nQFlnQ5@=7NGsBo)At5yb%!5pN?^*sZ}k=vOd{kEi6bO%E^Vd2@UWp?0DK{} z;y#5}GJs!Ej^;%vqL!~J$C7E4Ch+eRa1SG@K`4)&thq1THMU7G3yn{}6hZE1w*^3~ z7!@uUq~hcFg>k*|DdV=I>VL>o9EO|upaoCu1}?s_Dd6ax7r)TFo}vhWX$ zRMF^7!T-ALtHvD#)>Z98Okz7D8!&tPElC>b-dI$~lUMuui;S&E<@NAj27fma^`)u$ zv9)jNdEaU)%)1XRVG1+_2wkvD~S|%WPoAFBE70A z`J1cI6zSj!G&P70DSz;r@_ja5JY^0GHo`qMze5WZLW%$^B0Rx=fi|fLfwIWdWAxVO zC#nF15@IHNcz*lXuX}uf@R9){YK-}#L+Y^~YIuM8W^t}x7|{k&G{I1jDJ10m_EWH2 zo62g{&eSPdyj2XiPkGq-{`E+wMuB4bO5G|ufQ^g|_kME?T`Qz$%|nWOXshd^KEwb87q|Z| zW$qpTZS!yCs%O+tgoYq>LK4T?Ph#8^A)o9|t~t&PrA9n~Ij_mt^GzLX+Mq;Ce^e0l zw$>^_C=>94N`I)*l%6x9_ifauUgs?+8ojb_j)kYYx}qr*nTNTYETLD}R1V5vt6o!Z znv0Viih_-Ipo$QsQ(enH{TjyOoSZOF&h$2+Z*!QI(j;W}b5e@qx6yY&2ZQ7+L|)Y9 zkCuWCPwE}G9zb%x%Qvo0Gpxq^u`1sz8dpx)YJhufrGErE1R+PH(p3Az4*O<4#blHA z;gn2+C)l=D0$FTdJ?CAKQviPu5YVJU?sS3t4(bYj)>#2?*=dBmkW)V{RJ86gSmI3p zf2$Z_Mc#?uRE50}lZUb~)e6`qgMrspgIVv1q_N1f5&YP;7Q&-`vK0Z*s^Ya8!B-4z zXNH*V1b=AkZ8om7w8wFblI+UduW=qh((w0vdc-H;#OcDu)LgP?|w zmVW?@1eS*?uvd;Ws4rZmg<6T_zW8b&H9hD=(f95X#ne%1jNtNp`qi|RFcA9$m%+pP zee{y(6-`8_Ob5}O4J6UO9f&FX)wRgFw;J_vv+kS2%9hIyIOC8~_F{i*1{~l(N4J-g z!|Jm?D1_01R%)P9Hv_zRUlMO@TfMK?Cx5Gon_T^E>+tr@h|jU)tRR7H>+1P%inmM@ zwz`KzV8zHL@8n4*Shh7|M;Mth!w-DU)jPK`av=ZkwBJ&<4MKahc4efAgp!}ot`#R( zHvPEsSIYHsPW!-79E_CJF&%&iP0XXdndr(6vStzaS@KvFJMRF zb%#p#D7CD>VKBL=666SyQ>5GBNUI5fb9@OILOvUWaQXq|bKc+*L+mOW+j7_XPiBTa@@eK(M08vs2* zM>DJkIEGHeCKL$WJ0n9WHeUC|O%pU#L>B}#6(M<24bK^-RUNd$#Gc7HBY&`HJJcHk zp<)YJ_mQb@@(@nFEepVIN)Qv8y*LAdRZD)MW%*YPb+xF(2)5Z^+noWBtT~H%9e_|W z0GgYN^wJ9xe_L_w7m@%we99tg{rTP1RGTz&w$-((Edw#l{C|_Rk3pUUsg!)i zArtwH8_wc+x$_fb)Cnkm6e80Q*0<6losElUV0aZHjmwwZK08)Yq;}$M&SEzu=4#;o zW~v#@2uSY66)JK5>?6WQT>KHYkDPDm23^O>L5b7blhIz74-&S66oPkmw&;Mf@q_Yw z_!0VqsLTP5;xak=XMefuNGhOtei8zMhpCwkJB^~QS0L-=q83EoppWW~cTLwaWg>uF zsJ!XSapVo{XW1;^$w)(y@gxNmUE{^!S&FPXWL|m6?&%rURsdgT@9dExAR%ZyI6~wO zaHI~jz=~+``K(?jdo2x5L-Tg3gsN9gOSWP_l8bPVw8gqu9Df5df%^kBLchgcc|mcS zAPJWNSE@KxZOyBY1;X#HgX@Gm8+j*naIHNw+(o|!e$cY~f+^e>Hx)D`Xn%;z@~O%_ zb+;59W1SNcbozh4%; zO<4_@@zhBjuzyrMb3@^F>S?di+Z8u|jtrYLGJCBEEFYFReEbeKe*0r%4$!xLa=28n zt)3u(#86cJT+$gA1OH%l63O6T4Zx4z>;MMn++ws^Ub&?26}n5>YWL|a9!%==j>roG z>kS6QJERVv!E&Ph_jQ5>Hlj_StL)Gl)iK>w*mryCotwoI9lRQ%~ytlEH6vUdF-nOjOcj%r`&%4s#5%%h_B{X?O=; zgnZoUdO}1VmzB}NvWT@r1DStJdeb%FA3;hdUX0U0!8hAH3Z$M8qaFtjhjE}`l{^B? z??%J9f`6mG0S~mG6_Jbff4^f}(8rhii5;PUUSbn_ZDooZ!9O?C;Lj|Wa+Y@~x%aft z6?zJz=!+^)<)u8_1riS&;LQ$%*+ss^5VT^UvktS;-K6VZ4uiuUXU|W7!kSr9t!)bh z;TZy3%Dddw3-u!^Ws8KWvPo~U(M$CvNlQ)4+JCsiF@`JBqmLvV^@7z8`$ci*K1`q5 zNI92M*kd$MsF_ZM!-QL__NT&2OH4|P8NJ^8D!zhMq$3?u`JSLELo-X3FvW>UB;aPz z9eBW>G->DU<~wT=$skCaH2clFpI4=DcF9KUx>23IdsE*{SWMeSjpre;q> zZ3MDXuk5(jTm8n!GN`V#JUG8Am{++_XMamXKL>cHlK1G$C<{=Xce%bG6R~lur`9sg zAfU6pwTj-;v1q!cQU}Ph+#vgYdxgF$V#9hi=8U|g4N{90moFn2eR)(J-JVGI$VJUh zrkR{cA5WJ(yY_u2;amzF`lWN~0MG}>55q0-^Lj?7pbP~$27l^{ z0Kxp^vE^!=r*G6?N>h*!@c+i-+eYD?f`-iuLm}&?M1fqUTcT0;E*8kp-g)~#3#~C3 zX0t#&{01_ux~qMsxKFoiq{jcj+lT6vQ3)6UP|dNlZfA%Ta?Sj7vRCAc>UPV&moA3w znl1W4fDS8*4&hOd(^pT$r)Z6NDu4WNvQaFcF89S2KO1zL8g5FXid~AP0PwT4*>u28Egn zeZq@#r^Cn~LBI^4%PjU|$YrA6V{`g3Jvy$WgsU0pSh$ylp8biyW$iMJD}OESrUTL%5E$t!1~PJAb)6-t}?TlT0$x76ohzzW0g-TWgBB_@jF*ofICT<$K<( zznCrep(Yrds(dA8VB7q~O`9^vBbXK|*HKBd%x}&uCcZ%5(0p z#!Jy)Ll#Kl=V&y;+WAR1 zTU?fhGUt@nkBV}sjk={*0lE{qsu!4l^YNMo+dG2J<4Zq%Y=3`XaUWejGtAGrqj~4o z-!E@o6093K)FP$8#)pZ2fTuCh$qZtVas-v2-dnsafaCXxl}63!ISC4baTi zu_o2FGoqu1i&Z)e`t*EwL*NhD(BCfC^H-ddgHt4X8A5|<^$wkKl*a+rO}2BQ%;4E^ ze1mQ8Excia?4hvsZKB|MtzWx z_In_&D5MaGDR8Ez?v{Qtrswz=T|C+W2W4KjWC}bCie)#UK$s{v;B+u;#FP)z0UY z3q|VpRVeMdv}?rB+%#jC=@qY)6a_Ym=trUG?WOhoaPrT_w5AW?th?Pl=66tH8k?rR zQGYTIZ-@v$Edq`gwBEQj^2SjTr-0lqdm>`(HrJ{o1KEZU0$GtnDNlZ*a}6ME-)l;Wb-yM@Ta2Po*bUY8Yz;{#~Lr zRU*h$yW=U$MtK!zr7ne7sG4^fx40sq|32YyRe60}~_1N8_aswD5$&Ww*u;^RwW7512EVQMHq$>z?0o^LR}2FQLi; zZvA-}$8|>szOpNZ`@kxA%YRWnZ;OM+@kFihmnXlQg(fS}>U2z;kk}jtHU02wcpI>P z6>hhMjgaT7lhG(uJm)Ot*Mg)p#C-{%9HdRDy2R^m|2AGD1=l6n6g3cfLYc7NUp1|3 zX&s((FLc&Ow3LJ$U>Nno*NO1MJ$zJU;t6s2@X4N@9`x&N@CCJ5b$?`dk{5xKFYh&l z>bgWrvgq)vl|l#GbLjDE^$%k4e;K;|BJ8j{AAs50`tR-GUS0Py(Yd#%Xi1C;TXBQj zUSM{y$sE1ufH{Lr88>) z)!wpT#1!?ykzm-{fPWuV)!^b_>A{gk`J-c)8(!x*ms64?A}wcKA|?9fU;rQcR^)S| zs9XvP@FgZ`ii0VXn#$t8|P~=XJB8KD0|NY zgo)qrD)JyLuh7qL)%^p=--PkMI(R340-FrDNT)|kl4DukDIMI{F_Rm z#oCg4f5T2$&wnUPwxAyr?nM@IQN89{D39~$W#!W5Zny`j%3Ef#(qw8nDyaw#T0qcl z>E4`RC2&@bHQ1>NM#^oCuO&Lx$W%(8q<437Mz}E-$2ymv>h$U6cOB>ygauQ z5{dURUN+yNCZ|eTaoCn>>+r1)Fw-M-zBWLE-KD$}ki*aj?0h+8r>wjRzi7bd&UII; z=i@U7!hgu2Eq?Mv^3$9fd4{#*wgqXRP=fAR0HoXdn5uCcpPOgim--W3JMWHM7W_RQ zC>PY>s`vTnmb^bQJ}w6%8yRb(_g)Ot1yt^~W~)eF{$>U3BImUm!Z?Af#N|5y{m5OE z!!5#=xzWkRK42R-Oz|6$E-QC522Y7=-%vBZm4CjrhK`2WC-7Z7Sok~nv$aT2dW^Gl zL{^TVM(e1V`L0CzF2PQv!}4i|Oa~giZsReM)r6A0Wx|QnhUc;U$Vpw>ddB;#O4i?N z{nF=f;acZY$>3A^{cmYA~M9cjDw-CI6H+6fy9ZhBpDMAEL0ehTpfhS zMt@7-C+<6T7b^kb8QpS=Xo7eSg9Qb&s?{LI{1~pSaygrYjY*0h5S8tW=c-NI<1jf;&fKB(^_GhdOIgbXWC|$&~i8zs?!~r

&tr_7GT;|&w-P(jL5%a3+Tm4;?J0auvkxniM*l+RbO+-}9^;b`KT)a1Jmg|` zVQS`Vtd3K7K;mKY@KGKV9*}$t2h~V-^~>UXP@6bQix^W}olmISPusa%%!ky*UP2YOj;L^Pb5%vc+DX1w zTZQ?=LmyU&KuYfCA^u}j!@muOlYeQ6a$hwgvS?;$g8HkrbfP8>$xvDeeMr{=w%>GK z$3|YDNn6}^t|d;Hrc#R^ZOlEtRab4xb7fyZtqu2sfj|mxQ6?_!fcCQ1>Xs!tPTjf1{($}zAwUo|+;uYm{yV`{$*$eygViKdbpQG0r&(SN-9=6k&h zRMO<>XBsCWe-3L4NX)YYYK0iBM{%jd8GILphV7>QWHYEPf_yp#N@Or-TfF^LVJj_l zZiw$29q(%Lcdl}43@DvxHozHJ{OD1QyhwiT3B&?DwEp^9+dDGke|#Rr<64H9w#5(!Ni(v@WF)XWS6fES+mtz0PlLlcDz zj!w0NRCEB8rlaz=AqqG4^6_dHI-RvdPrN9As@OmjrC>iNRn#^Wo$ZF8M{A{ z zSH)~wc!W^}2ya&|p(Pl9BD>0~oBSKh06QrSgEuo2EQhAc*%2fjO9 z-2IyMet>|xyFYF%FVUS({6v2k9W910LlJgNj32_b2b$M5&q%7u2NqcR-w z0#SJAj8}2SVXqccFn`qHIv5xdeEVQOR;+-gJHMm1fEX_Ux7Xs#5`T^FpBPQ$zP^}3 zeTfO~ZQpu^%8dipH!A&>;x9tiS)_1nY$;FM2cthf%N3~f7TWAc72Ld}&aPFcne1>T z08k?DZ}EUJ_b!85Z)6!meUzx9?g-PCRA4C_ogHOYbmjBwU4MGen0R&yzn!(Mh{-IQ z1&tUjyC#?I;n;qK-FIL-$N;;vp4YwzryPw-=0Usk{_i`>6=LZtRZ{cOx z_0S&QAFSQ&^%4hCt2sI8uf3EmF^HT#elIpfK3hZv`^u2>hq!^X{x-IqgGz*cwms%R zxh`T~!k5%?dVfqTQa{)q&H$nz{l->(lnM0hMwRZg$|c#ZnQEdR^CwRf79zr(3Qq4#)-;yd39Bf>XfZr=&~ zT(T0byu1B7HMR}Q;^NNvD+k@ia*l^*(*Y6 z0H@EQpvyUh50nb2s_hS#B^g*OEMKRVXlsC8g~uxc6z4K85th8_6UVJ~V&BDb0dEWLtR7G}$g}#|aa4X4K%6Uxc3#C;mST@5 zP!SN z4EpQfQ+nV+mvCnkqOB&9I;KOjD=qN?RkrfS*MDZ0=fzAQ{~>Tl%_uCj5<&4b-VlHD zocq~?7jdRv96>N5J(Bg+f{4|j%yvUw%!@ZjQM#s|EvLq`X+{dAv&P}_J@KNF9Rp(n z5OXXALY^Mml4t<0vTYoFRXV7L!7o}}galnahY_a&QgOebfafP8YCS%2VKv_c$d z^MAh4<2Z00*yAHy_jAf@%KyD|uY=8257SQ|>#Y48_lp{%?02_1tXVU*iv+JR<%@fQ%_TTDpJ_usr*sV6H?X z&%sUnXmcZ8^yO&O*j?(i0yBts3RaOBwC+zV&2L&dFlklh%r~)^O&QgbIaI`0&k8j| zf;rcJ6}#JZBrk*u@KJ)Y9l5D%%5RGii$?k@B&E8vA8kj4wywHY5lB@{oSgk)CVyXq z3;yAineO?jL+^|;5$uLJM1(cC{8-Cd*{6$HnS|Iya`9Wnl_t+$9G}mT``&gD#hC{D z+g_I27a^UG3;=&68I3tF0}Bj>cJ@hFEmEoVjnWg^D2 zNq#i$^02pSax4$OLBR*lB4Jop?oA`+vdEz2_{ni;jfqKxaw{9DwuDL)1`QzV|Mj2} zKZz-ie!+A4%-+#=4pudzu z`SAf1aPjDMQ3M)$%WDaBz0sRbLHBGg`jRI*w>`bxA$*4ZCUVG*8PcE@N(D~Wa}bWM zT^HP_SR3sM+sRV`T85H`JZoclqQSl^YsY1iywa&N^Y$`?xD% zN8>&tuMFTgP#>(j0e28IfPYqzJMX2Q1=MA0%2Oj3MuKve#|e;s7dio9G%dzRN5cHY zjpy~Sn?`NiotwOM*NxdXI;6zLi}GSBYRg*SadgX@Mi;rYS`P_R+A!fUKYkn{=enZ3 zvrAgE`#)L&``GP@pZUVA%$lmdEkUA;CG3SZ;R{?Wk*%d?&dmB=hkwIO2CGo>h`dhA zEub$&@0m4D^Utae6~WUY0;oP44gY%GLH8h|(m^qMvc7inXYi?*gj-!rKwA_1uuwYM zB0DG8XKe-1-_OMX7hD5maZTTnvneowMHT&hHe6hw=1`E4%H=27Lpj$hFe5oOjh(Ep z8@B2h&wAd}V!9YBZhxv`Y@1=FqEpcrDkjpQ8Hoo*QhGGn-TD+u;i>|JqF80*XF1SWjBa!+ZrGC2E1BmGb zq1E@1``i#Z)k`F>@H3A^2;!?()$_8D9nMdA;IWA$VvRIGX?f`EtbsDs!BOF;3g(53 zSrp&OAS9UuV5J3Q4uT)9{O&|;s74h|1501#0GsVga(}hL&QkG#PboraH1CWwGX`8- zWQakz`Ch?(Q>tHltNbB-KMNbXP328BPl_SU)mqcp9E;eSjE_8nIOQG`Cpr(&DdB&aKGETqG@7-MdaroH|$VoEUOB48{w4M|p9f=3(6A23! z6o2Nsq_pDzK|sF0D%|tC5>qg}$lb-^A;?V5Uuka#%wEZf{H-6w>)JaZj4ytt;ir5+ zq0(s)WhvR>L&Eo+zzzPY@RA~0bc>;g+vD9X9kb@A9s+rb^tb^(nT8Lxyms3{?+6)V zg=Z}co@ZNM;oebvg{Z-LXv44%kk1?H100->PGWzE5ngLBkmJs}(r4Pn;h{B3ZEo21 zlpQ3T%Y@RYeC!8N%#>LRaE%Wv_ixGYzAxzHDZKYAl_7_XfK2XZlszwirg`!~g9w{lm8eWzd&dua(yX3?xzNx&VCffZ4a z$Ot$?`E1kn_Ul2LA{MH)hk%uDwUMH%$hlus5@NHgc-E4(G$M~IWE2Jyqjf$;D{ zI^h~w^}{z?u<3IJpK)ywe;B3J>84r<{h5Esd8-&vW~u)~A$XpeK$QbjY0%^Qo_aWy zK!%ZF-rgE6w3nkM-%Yf%GwNEDb1P-{?T- zjU}9Ly{`nmLQM=s@iqAVt13^J`$gHGnYE3(p`Q`;cI_P z+;N?ZH<;IIvnGjrPi+w$Z1Z`Eqvm^sjRZDAI;C2bg5=5wwkEuStIup^MD6na3fLUs zM)w2AFsg-3sP%XINVurTs8S*_r^=hH+_4bCcX6?ay8yX3I1z@9B&?TlCKbzzTxyhA zA-Yc!M{-iM5|Fq>gVN#RsP6sKKka|*TprB|y99}QArH+Lz-4gsS7h-`NPAv7t;nb^ zf?fAZ?mXM&z-AQfiFAlu>v}e(#2$rQMI(RAjMRDGr_op{l?+n_&=bs3?30~9x%%^d zYxR5r*VYYOso`P*3n(TIY80JHgi_6~ZtRFeka%%%LSdd_nOxlaubxyepF4j#d5kF+ z*Ris`ZxsQr@Q4lP6LM-Dqv_#VP~|K16uO$X0Q}2CcTgPNWKuHkI_}IFmTPD26>S1skNN#5*kW^#BtCov=Swvp?P<$| zG^;Lz{`k6XosD_kdMiPu{BVB&_m&`od+*(wd+(3y)d%mFJu)B&GIdO85M+uxl)pL| zhOaa_2fzVP{meXDmUlMl%_kkC3YZua#yg zP8i4RQT4hwf-~qi1*+#L7vP7h)WUXQOPN?`V6ToW!gxodR8-Mhgz$e*L8eDE%Xk|r zabW`Sqi5qS_YIP9|BJWZ0y&XuLc8quXKS#Rre54m2_1LK(i*4MB(ifhT7zo1n8R6@ zo0LrGK+~1jsPjzunY> zuOp`dT(Mf%qFXT+**$+{?Hc&x{t>xjdu~D&Gt(a&u4bayM~sI^C*gqhZ@?`f;O;d~iM$5>T)aS8o8GHOHBq^Dy zCr!Tw8*4M+ve20}nuKjvh8RCS`vnl3)CfwGqx9K{D86?svps(zh6D7v%Zlu!4<0#= zT+c1)*g=z-$gIOf;0vR|RE%%*somzHLKA4?$o#l?&^Hkgx3Gk5IsGU>^bmAuL?x<<3Ow3A8vorksU%;w-&S z(S7k>_oxUhZ9=^iw`$U}0^^<1TY@c8EYMLzfUAN=)N+6LP6_$=B3u-eT{=S+_b%ZaUj0v5d3<2{zXYJS|Gau5r}fj zuw<78(N2Fd3GgsB^eN+wX&34f7$Xp87ctHy^+(Ow^UbCrTl==Wp#z_@ zUZ68?$q~{qQIOo_LeemCihc8>a#>5KjXx74Vm8L~m7IScJ0djw44u5~^pnpcX1{Mf zi^fLho#06Qe!%_sb7r?cHYydO30h8Sh-*cnY9}C<-B3a5ZXGSIE6wcK)avf~02ALZ zR<3{b?t*>Xmli8r9a6=`0Niqm6x$(UafZlEjq1(!04&?J(U`!461tD~+hV)tSTvUPzm zGB=rSNJ;yZ7naNqiTJG7HH#A?F5?M#0m*-N@B|W3c58B)ZzH(x7-|z`lPc|zhy1@Wri#vMq)bolrVg1mZb}>4 zKCy}qM7)Bd-kePr#h8&D78VVOq=i##0?7NNSf2-}=54P+vayXZrZITAF=)rbEOWX$$3d~do+SA)Uj=y z0#?}D4iksCco{YjSn+dcjxYPZ()$d#3HFhjV#-tq!X15H4OZRaju62^AzV5LQ?!C6 zFI%a6laJUYJpy3_;`PX?LJn4R^YMRzgO_Z^x*UsGCG;`wXia1^b!Zw_yC5?Sh?U!q zAQL&@Fpe}4lqYFc9F*XKW_dPM#2d;u7nt9#bC>(OYrF4z{~T7gUX$ZZ3o&hD-qQE{7DRb?KpuxaTJW=&rqVh5N4 zJ`2PI2r(HD_yf!bAukeN7w$9JAf?ZLpe(pe04x3y<1iG`qWFiOi)k?I7E9rWVVPvg zA3*SaYN~_8r|BEiW%2}0hV6eH6?#d`6Jz+LR~Ny)L1@Q7n7c5@RS&dal4Nkih5}sp z8Do0}aX?CiQOffsyPJz>tO8a5UNNgFK4N~9+Ka!OtRp0ah=<8;DQns74;>%4+o6m7 zP5nX>xZtk9;<;RW>Qf4Xv6flnKz)>QY`?b*kD{{JC;Il^O!1*J5wd?Nu?&WG&oYdk z>09lA2l%R}gM|b8G%gn)Ey4V27nt7Z2WbhWR|*{^0e>@1?@_2Wu&!M@SM0|d|-^j13X`4F2Zq3@c*o(OtsMJ3|_~J3sISyus#B%h@1vk|Cm=2kK=Q$#{QnGm(QX6bN=3!kC8w@5@sB z$)U%7!A?2GBIPy`Eg9jm2Ah1fM>?xZ<|sHS^kT&9g!4J`7aevxh(rsQ*JK5Z4xeJr zx$+3@yp<){v7XsLj*WmVy064JR@NWTZF~GGL6=LA7q0D@nZA!o6`1^LXa4-{f^pw! zb8I||${Bx%gxfg<(m;n<+K%yuSd$?RlGxJ8M?ABEso!fODdyx;bsrjkQ&J7^sesoC zs<)l@sKFgE8HN`6fe*wF7M|Y=++!j_F8vTe%5AQ)!j;hUtC*o1v*J*22nDr^){M#V z=vh$Dihd;E%7zP!H;{i7`|RaKVuW!d0h_DvPgH-kiz1cP?)xnqP1Bh&>SxI|##*yz zfGO;jfbWUG$9hlUZNu>Sr~R{0$({}m_XH5+mOaqTd0^=&HYez}Orws9-6fqMpJ|yH zEC(c}Lj`Ydh{lK*T7+cCD_gQOBute84FDRU^o1QGE}s56m-%hM0v%*!&aUg^T0^s3 zr8a*eg}ipdg#$H#gEQYO0Iq6H>^>7cww=awvcNzVF-bNyu!|PGR%G>ZvE_L7;{rGv zMBxX`>U|~dQb+s&h>?cDKP;7UmsinYh(?FeAUCFyJHI9cbJrZ*~sUIdS-E+O+qsg$CZ|BGv$AIo*N+#MEfS;V)p4bom@55fHgKg%mfp` zS^naWcyzAUa|6uDQ-q1H(|%r<5mP*9Q|Y$ELzhTB!X{*geVnjK8w{t0h>)P7V88N3 z9?PTAbO*&lh2MI5Vw9{P9NZiI3)?SMANZV=vV1kjZa4CLkJJgdbR6YrkY95T2>X9! zkfJ(k@nnpOvlR8P!XY&C)U~|knE(hab|J5f%4qNnkgX_pa1>qSw2X+ z`mdDlJXRfZw@}PqNt+(}0w0j=mM20;m0-&wv`03A?(=*qAzOSZ_zfT9*`T*NnpMET z(W?^uVe3mhc8%5M3(|fp=;-CL1%`q^Z1h+V)fDT=)Q}#?u!R&+rUoC?$HpHeIgVf-Veg`z4}3&&+(*A zAcn~^w&SHUYls{%-#Xumw!h!G+5R{1S`Y-uPTWjLY7zU zo5Iid6i+Ww3u-50uL{)m;$ro8o3$5c=`<*1`ed@F5hPyNUq zJIFf0#1|co=~tBD^|nCa2)T&~;yq%vcA29ZAoD8tq z#Ga*_D2+4DBK)9$3GvLyCefuNx87j8xG`B+qjp*%u|?ohYH~}K4m5xLc&(_wE{gQo z@6-B?_mzzRi1?+tQhif9Q5li8To^;{wX`7JNyOt|+EgD$HpBxux5+?idsz3wl8bmJ zRJ(EF2uui&Gq`X1E9mCadhnGFrhZZQvz{dWrWcd-@%yWq~90 z#g^u|!dFC<&3S7wmY#oyBt&Pgb+@np^k?eVU5%-&-qI?&;ZONhvHn|Dja4Lb)MjaC z$W>chy%eY21OS*%79W}d4S4CaiVqjl0$-y-5ZP3U|D3edg z2neozv50!^Ihu`16pYBj#j)Sb|Ac|S=*woMG?zHrNZZ`edVYTyK952=q^f#8+qame z1zL@#oV?7ZYH;yZZo)v8>EF zG9jbZZ*!@>an__kR?|D&;4Ym;SeE*kiMv4BOH){IxVFx%3Rf$mMWE@=(e;{mVr4Ee zpFv0ed}$N$e|vv0V4A;9ci4YG3Zy!fR^X$Gk0`WoRihbirw{;62nM2=ds<#9=ehl} zck?B_=SDGBQ=sH!8PlAhMs-5D{5*+N3l}JLhP)fL5$9#ft0fxQ$Ae-B%X+*P znj}Dsrt}S z2p`BJxW=EUg4{8%?3_d)6o3Zvev@u%%Y#ikHqF8c=<{2>ANXB2KQ{2@Vy84AfcS?`YY^pznUMkKXfw3h@5UTe$EmBGgR z4HrrAztn#mi(oj&RXT=4$lA#ZQyQXpL$?yp+ki~#szkReN%XOr@7mR!iXh92mkc8o ziHXtT6xmPI?}n1luJ8NZ$qWYZt0vN}(;9H^cUgC9JZxyG0yBwYEh?UDxS2A_XZVlU zm>8i+k+kT1_GT<<(jSCk{HCA5W%8IvxsHa_oG5y(4;Nfk3J}X z7`&Q3!9+x$J8PEJag(XcrrGpD%`14}<5?>u99mVrAa|`hq1&}@S#xoist?n(kt9z; z{#>wtM?yXhfM`XB?N@@F)%>YZJ_uN;jl=I_J*%dBoX|_wBKXYi^kPj&g>CXp`8E(3 zWj}vANkad?a&T6etAd2k{|I7KFwPCXIOG^UoP{>+F9^T=bHh2`DOlAvn zcf(C{7rK!&`HM`-0Os+J)n&MJp1RYkyXUbwRUA$6wHT5sSe)Z8b^!o~BsqxbpzD~u z@NXf=qJVpt{6T|M3MlHZbO&&gW=J2zSt@^3T&%hAA-+$~?sh*173oQYUh#Y$Cn?NW zjsYk|n-QHC2d#bR^y(Au*AlYScEV8g+b$6z1H$L=n&PD$r?|PtC)niPB5n{@L^KB? z8`=aS{KI?zKMM)|`FZv9a{xy2(ZU^yB~K=BFNfk+P_P_(T#o1y7k+M%dLO6 zn?h_1H<$&8N6Al;=h?N*rIjy7dw%bk;>C7i!N1eLBNjIW9>ozdfqd9hY+KcpPI20- z9Pg+&D1Me9vE-P%CnM;{_|cOi!}d$SBo8;wsQSTo6yGYe#-7{&U4AvS)^ZpdA3g0I zjw$lR&WSa$#2S!1xTLf6-RITQ1TlYs21cjF)osS>nS74GW=vU$R}w9J?85p{DWg_a zHI8_S(duew036mQ3XnBNVv$ESs|uGXHD`-wSdMPt>SsY|bB_R}5c#vfo;t)xhJ>&? zE}7!U$EER9lWApx_ZAV_e0Is~SS2yrF*8gg9PD^bSQyzJ0IOSgqq}dQv9<0bPDA2;tE^tBh<0 zzDV)7&mc}}q)UA%>df|ag@k`6v=E;t;habgROGfC=+l^OF%U?ZGXHIcVVJ+do=E`b zejMQGj)#~G0uVgsmfnv?NJZepu5wvfjE}uUA)5No$TN-jn_ zy$<#?@B;v#?r;dKt1fanzo*$DM@@`cSfM>MS+na^4JJ2uR+L0or}2M<#!E-i_zl7I zWxbaCj0_AJ5lCTZ^$85w+sX{ApQD)(Ia%KvR#87@oTkl~K4}w#I4Sb-kicIu&URK< zH$<;nKHqus1Iw@xgEk6WVKW?3f$E0dYLT?|^LNXPm6DwP3ah6iRBfECuRy(-C|0=& zzfuzAZz#6<)xKfneD8nEnSG?~>_!T%L>2?%2nxw<@ig}n$oTHXwwV*qi`vl}zMk9( z_L{wc)Sx=tC2G-)o6!UVpgL^3xG)*wbh!t0eV0iuOwR3XD|4MvLtq5ZHl%=r_& zFIp5U%y*)V$(#cl-NL#9x0ox-(K>`o{(iq!7y+OPboi<4U#$20GjKHiO0^*qM3p%o zQ`(!M%B5Dk5_1s7%KV$XsB1 zYT2eDaIy^C+EROcGU+rLtsvK=Djxb>9&2B4_8_uCq$Yozz=>Z9gWQX4BKyPIHKxg0 zJgwm=%aF*VAB}fxtXF4kW}(WUvPe!#%DAnnLa)lHf`aslg_vU!`0%9nP|Ny3)ZWOZ zake^uUfcW8lb`7RI0C?%fQwG{RMPou^&+n=vG(RwHn7}$K(t*2Y*W*dTb1jwHgh5f z(-re_0>OWK-`Y!=U9qk<`|#aI*do7c>&_6uF8nukF1?B?7ybCuo_bU!f{YCqcIl~Y8^X>w1o!h{StuPvmeqcokHpB2(LG56KJM6M19IP^O#5p$32ugJD+rXd~ z`Za&?=HQ)bDrRko`N9D7LQAotYrsAigX+{v&T9wblY!gvw-@fXmOJGvjg=E74s|03C^7?kYaanLGF~B=}MXK-Bt?B4gC*j#27WGmlj465OyH}DFW4Wr_XUz6#Ezx@mi+g|K zq6pLDCdjW|SuNRGeoR%1Ks%Xpt@vV=2sQG_`3CH3x|z`;hUjP4VH@qTlXB*QHGB*h z&cATLYxZ}`h7Z^7%VBi;Xqr|3-Re00WW8-?A-+Es)q`-=uHz*?`QgZ^{OChM6Yv$s z6=TV|Tt7qLqZ{v&Gkt);g%L#R+6{j)eaby{4~P9GifXuo?nfd zB;MWA#Rl(xU|@bBEYZzk=%b@sGH6Y z=I2{uA&`>i*T9#O0hKo_eWu|kTv4c*ZPErbJ-nfG%|IO04r|!VD%jL1 z`8MaHYI7>dh&V=|OrDh>S`aMH{dNi!u>_aSS=MgmBBp-4r9^PsGGKJpN>?$7`Jy)G zq?rr=dq+MJ?sYn(A|#~Lcw>KQa00BgZfq)RdeJ0l-s*m=wg4Rgp-5yHZ3Jl`&99Kv zhXVPRat=j2zH4tq*N6qx^Tt&Q2fRuw?D(afedk+1Raf{?I^`3$h1y=q8*OF>LbB^iCgq7G9uC*L<%G1wXsUlRS+~XC?Vg0f zIEzM^biCPJW8NGBEssa}4UYj;m6 z_0DyjmBl$SnGpf9C_#L(HU>GDG`sGppQ9>M=pg!f+)HxnTkbuDKds5EwZ3f#C)a{G zl-+x!XDDE@%lvuZP2H8O<=pGZFwy$%D;k#=)97k7m)G7nZCHQSFSnMo5Y0^Yo-^cI zZqofRmfAdhGKX>3;m#$zum3Q+1_$l zF)kzU9B3U|=N#$XE>)FK$+wvpl!!_l-a1vaTf?Z0^#XsAqt}&FcYPdf%>hJ6aUa|r zRaEnV$W4O1C1tnlC@Z;o>RysIL3W+Y^#i*CJlOSdp@ z-+%o3z0@`G@BjMGb07bs%zoK_)ywx`74TkFbwuj6Hpn3 zahU(7uy%j&QZK`>&_80L^4Hh63#;jo`AcGkVPr307>45;hGAfzeDwEA&=mTUtm}V3 zs=vx$s*WYysgTMa!?4KuhqNCI!(c|zkz|FGZGrxy-|#2T{WJ6rlyOqz!PCEH;4q9t zY7E0W#5xX0{#(*W92kau?LSFA`KyumRe2_Amo$G87)Eyf0g>-NP)Bg@#?B-d`ctZy z|7Ioqslo%leNaC2$Pu$;81^>)`TX*ykXFZz z`B2B%Xj!;vt)(k1IB~L>>W&YUEML%l#1%?ot`WOo1`z~nFXjSFj&@%0+>XP)Jzpaf z1#+-!r!1;%8h?#8wG^MSKH%UL5%R}+^%3rbBr$13xtVbsP)j#*O3+h<%rKjezg~YP z$@AaJ=$3eYkI&SM!4%z#BCp}_R}^!Nhk2|j3@yWXyz1TsDwG*d!WB+7D4s{UNwG9{ zvu$qJC}lv`;j%MLfZHN7A%?z{__G6})$b(Tr?D_zcM<`!&V*x3x(Xu5ko*}1oD(w% z=`f1UdFTeUjgZ*MlQ-}p6bh{q0qlP<-!P}^1tr=Hm(eob53as|01)Y(p!V06-LD*v zFgxL53yXcshrI<6Wi9GlbobDsRv(T%!edI76Zf}`)a7nQEk&kt0f58eSO4;1&}tby z+>f96Glq_@LrOKQjs-z1fs6!*gZq^9tiqqcyY(OJ1NsTNMi4-`&m9 z!=X0NAvlQN2@@kggT$K9bH3;R2DZL1H3l>5jOeaeT0kQ z@R#=8(YQ1YjdaePMx^}&>Q%zg>8}eKwu^4nftp^VFQt^w;vgTV{aGH7pfQL~_9`F1 zB=(Z8c21*(fZ|5TrG97>MP+}7CHX!g1g&xG0dM&4hWG)`s}~KZ1GO3t5-OXNCjJC- zC!j0gSz(kOE;z(NQogY88pV8Fk$%q36h!?K*OCx)hE59>glLskt^Q(z!+2DiKHC%+ z2#fVB;q7;Zrpp=Eb{h|q?5f}YqMxLpMjg9OMlv7zQNBOYP!){cR}_EBk({S=ea8vK z>eZcdT#>x0Qsk;Xj#rLn>ad2jWS)waUnY8Hk075Z@4VoH8jV{Pr;aRgw5X*{v*Isp zY!T$G(%{aIQZZ-y{5|xJgiusiGjMdcp$&U_26&!dFE4I|bVc*>%qvG`*0ucb38f*v zj+t8|X-C)N zT7(IzSnD9)Mt$?e$dZ7?j(U`X@rg1#q0gOgFr)*QD_4IbFpI~PKdvsS4n33m=ls!N z)VY=%UC^DOKjv|i@W#5?COS_3@#pL=@!UBF`Y)NO@UasJju&2S$bmw7QR$cI6S9?sXWebH3s z0Z5kan2tACxmULU)?F#BTK(V$I}!mEXPW@_nrDBYA4710InRljUr4I2p6yn!Yi@0X z^4!F}0E%x*G>pJ}67`|vnHC>to>oQcVRutz>VK9IIimhDppUNHluGJ$J)ifi!9hRG zM|`>+RE)5v6UPB$q_V6gM3u$(+fE)4Y_v?$o?Gck)dco55DqaRr&h?l$u;2Ss_Xqj z6lQ<6l$D`kcs76_ zmNV82v$z`pvt<&okLB$LWHf-rgkgW2oRC0cRdr0^bUeoCS-^eM=s^0p$h1oiHl*>Q z|GA2YONtD8~3d=A08Iawzq zu16@JlCDa!6hOPM>%yX58sNyQoATfxz>bnVG#`dkrMU?VCa~5gPz+=LQY84zwv#y% zAqwybBOqSc{f6hn9i8)m$0AW0Nf;ZU4@g#)oP*k_^1y0j0)eDVDC&RYCuE5Hw)a)N zSSLm&h*Wvy*heylnQ&n7~=cE>G{Z$kLYd8 zTHxQz40EME&iNkD-td3L5XZegIO+%0jcLO@(ze)pRpgi{;Zl#O=lr3D^YTH=S>vZSYp_>|;tbC7;i)!s_vD z?|PQ*KQq-*rHF~boy5I|(!3)u407M*kI_+D`tbW7bBNZ{Zx%>%Ih52c5%3AW(X!K z>Nt$2QLr(pX5Mxj**r_mc5kRbdS;v<2)wcSt)rNTU4E2yAF)|jD2>)s`e42pqs`Vb zjPUl+?md6KYriB!0vnOQU<#Hujrf?c*w;v|C2lO-p<@h>`z{Eq(R6WEv!q!hU=ixw zd7RO|vHiWkmP&z?D?LKI3U|+E6oxwyWqY{58~~cIzv_>|bdNGjzR&wsyVGoFeOTR* zISTz4+Ac?3f3WbT`ABh^>R8rw{tY0w8PFWO60CnPqt4#+KK|X!TwCQGTE*okNxrKQ z0aWlu#OWvt!xB@(b6CR}B}2D3uz{p7rJgU1=~2XJT_Pw{h(eR%oFx@1N%%ey9h==_ zF42H_ZJ#Uot3%!5kL`snY`h>w{YC0=6)BU&JF?heYFH58KWc#+U$XcIEN-*(+xc{u z6}W#lw>GeJIPhbQCk~Abroh28_e#dG#&k1+%Q}R%Y2MUS3 zRAKouf$OgB{H0BR55U=#07NODka6Vbe?x-;Vn|S8TG@R~ez-%369`@$h>{j~FrIAA z-+*|iUKhB&<9%|@V7kzdeaTe3ltlF-pC5nJ+0e|;bfHryP$#px9j$`;F|9qrh`7{} zr`Ne#7`7@s3#JFr&m=(Q{S~agwBO0uGyEF6p$$S{SXhy`XMnB#X_jb~m*EY{M+H3? zTbNqRJ9zjQ<(SKB#^id41Y7mI9T`8%yLdqWn76qYE}PO<{(U`_9#B0YBA0t&O4xs_ z;c#ut&4N*-_AiJE9wkHQu}4kX2x7e2V4h0~;r)>o?wva$0#AyRWYC7n3Zm#wwpnu! zoad9VfFm~1q{q~Uc7cS^5Ok_{16p}$-2%j2hYV87H6k+>8PiRj`Sv9j!qn*V1CSdR z5YZjqM4l?uV7|(NtBFOOPvvSI6xM&k=+ju~GIW)=3FOchDBu8@aXn~u@e3iaMQqL3 z<0GC^Py2J|Y_lu6x;88?IyQ4@+`b$VO01%XPr5PniVr7Ep+VXWjo0guY8ns#ylI7T z;^SIe$F^TJ%d34oCtHR|zl@aaV82CL&KVqa6F~fn-g!hjkxML9kY1Nr-sgXr9*>i3 z3dGEjD8$~Sdn6C?8%RzDYTQGP5Mm_#3*)#I6+0#*JI;Csx5NYk|6x+637t2(hbqmsrjX_%C3mVvSI7?<1EWwZ%MY*Z64HuWyLXDrb z?WZXxWglEPdg0B(LCHNBE+>EbiLix1dd9Fc<4L&P=|DYcmRUMU7$z-akrNQF(~UBS ziZOL91l6A#HG?<|WqUphW0zk0IYH)i!b!;G%8vx)lqH6x&AQ^c=1Frt0{Lc$r=FoqtJfyJjhpbmN zqwL9F%M61dX8oOEzJwCdSi5PmyA-~^rMZ>J%E&X_1-jCW@M@2^v8CkBIQcGs zmxod0jI8uPajjPQZnenD+fHl?-R>4f(Y{IarTb5+Q&I6XJd4bxEVv&@Q*!Fy;gneY zYwTb8rGG}HTUA#Iu=;<6-2t*c2bg>zqfZ17`9uWP$DbMq>mdt3us=k3ksb)#3CTT^ zeZL?Y8$46^sDJlW*Sv0Fj@WbfMr3J`I*<2VLj@87Ykl@|3n)3MdUZVO1;x~XxbJvR zWuMz6RF3Ivx5PZLqf6B1!jP{({GOg%EJn}V0pBH9AH63I#1Vg3Bl?4{YN@NC(Z;lE zoiIV<{)n_YPD@gK&tQz}DZIf|fbEZ~4FqptF!=-*kPfy=3`)q;LYUH#k?IT=AHkoM zh-nW$@1YDi9!Y})3TFbvZyGZ<_B~;kTZVpuLg^{SO5NAEJR8Z}`8=GEzT9i?U>!5< zN8g56v{Or|qnLkgz_s;kH<*YYa+RUNi9vs6s>letLWp9zkgZazQM@hNcu-Q@mK}K$ zQQcH%p+5?$At=<(dfw5^zVEIT5t$@ILE8>j_lAATORcuk1x7mby~%D|jd&)nfz6RG z5Ua!GLcc2Fe&wZ;;rpCA#y)sAGi6IzZbKsNi7Jp3>mGmDF{+0vARu@`wOLCFry(O1 z(0@W7RFy58wCb)ZEIX0!U$Q#4NMf@M*Radsl)(a}?ln1RZam(2E@i^-k{*_IC6*rA zFpkI=8;N4hqXbN`j2zx-{q4vN+Hp%fw`_&s zx@l%_sV{%e;<<=`0+*0}vGP-B-~iJu6}K2)zr&OYc?_B|qZ3=ZlA7bhLKdzpe5LKh z5(2(5$(mQ@W-a6K)Uo2Winc1?#(_}MKwr!r;PsK}P0&4hzBY#ObgZU*WLp&b<4;)n zb=%WP1r%j~w4+sa$qWJq$`OZ`qIY(NIP0o(b^?D;$Iq$u)$BEw*(L4g2j_=W>fJs8 z{qWu_i;+@KIq)PiErQ2QV;p!!T55#_fDa(>|59`oNfrV@5IqnJ96n3j-Pz&p&gm!S z2EFNy>a5Is#OG+Sd%D)dOg}~I#n8>1>)K;aiJO{Vu)=K+kpiekOK` z2j-h!>oko#{}E(%RMeKF?EC0W@6;FJaBhDQy0(Cb(f@gyL0KUK70jRU5e6PtPuhji zNPy4riZY#>aAmh)$*%U#RJb?ot_W))$Yf(7o7c&*!dQqi*e~w%wE$l1a${odfn^wx z#8YwJH*8|sNI*oKytEZsEazTmbgnq1JAobey#uK-*l)`9*0J?jcnO{zZrrzWz6F2( z(E;Fzkaa;jq*rSJh)G;w)Vwg8uQ_Vts7=4MTLfYH_O>DSAJ(KQUH@X9c6nD3)reLz zgKCRw7TDNwr2JC8i~g(#^gUETEm8QH>sGvXLC^eI1XmG(Hs}WR`c+kMr@#i#gQyl1 zjfhRgwJw6YBbJ=4g8$I;rdIE5P6>aVW^_2OEB%6*Y>lTc9Av-vod}jEFZ|~fltfS* zGxbCWsm5}KiGe=0J}lW%xcCrPy2q!EurZ$sd}u?IgG2D*U@0$UW*)Rghm1Z~ql%7L z1PH8E?_H$WR_KMpIs{V5D`!fWl=9OQoNR#EIq0!RUCu0vM-5bwIisf_^LT%>63doo zq8r0SoQQHhbkTMccLxl5J?o0Gum|fkciR#IVKd9)9K^AT0`2j_+*;lit{z>qJoImN3;ZDaO;PGcVe}@jJN;NI;Bwm#AZg z`*^fTM_)uN)xrC)XCUWgTL8#*O;lk%@2|(UBPbY z`C`~b8FSmwO-Ran3orXgR?!bbjlF5Y_+KLgmSofn)9)9a5LI~Y1eJf#bCPLSDD*?S zYLlOp73yN9lJFhB0$6`=xlvxF#0?+C2VUHx!#Csn(Y92J#(61}4kV5~qpe@_iHCWe zcG_lpH!T1Sd9>TyzaJ=%J?mLhz`GdgQh>je`Ny){FV>V@+9NYke23cmR(Byk1N(T? z7^(T@0mNk&cdnjot@M9BdNflCAhfdP2sHCCg!+f&WD^Ez=9<6W*z2g71K7UPUg5L! zOyP8Nj=J$-V2gZWFrA2D^($aPHw@`FI+=mgv2H2}pK`soxcmLd(CpZcmsZAS7x|v7 zAE2%a{^yL!1Hl8;?j`=}6sQ$Ea|zdXT=*r~U}9C&Gv5+Y+t7ay1BDMl$%OHAEvky_ zclT9O+p5T#&@q6|+xE6mtn~^h(ygR*>vDd}+&p^VPZ(4&P?_VcFPpQ4Ru4^O%%_=! zSpE4&U2>amEin)2$0-wu&^4{+ljyeH3d z7~L!PjaN@f*Br+DUX(Ks{~lnJR$++r8wzLl%<`C zZ)Cun83oKT1@`TY)7xh5>`4sU!#0)|Io-%aRF|RxCU}1&gp+;3v-L3mSa?rkiga2A zpE$$t{BG8HS9Cio4f8tP~pLQ7)r1qUI2xq$K}0> z!M5vFvY>y_m59q6{x6Xtsm8p^lfPs?l59ivQ4KX>UTs>H%688kW#Tc1^4qhmhm)oD z$ug79Q?_fC*EiYoaw01WAMrCdIdt)qy*b-3R}@VWUw_aiIEHdd-%hA+VotGXqA=&d zaAL?|>%#bc{g%6JtZf1H0|9<+fvwGBG^avRN$Y={araVr`1wcskU}eTu_nRd6AbTV z%3#@3V(6{h-c>vdl{Ry8J;ka~(<1mTjh&Y~{a23$Ro9g>JCbA6)PVzKuG6&5g&D!E zIY{%h2~40At`j<(BHH^X5eJD8VYQW{c&tVini;tAwEW5wkD0NMFI#Z;XWas5|8iY>76}e;*0N+Kx zNM)PxVNxWO0XwJ_D13&+>x@VS0JtVWt(AY(_zzZcU8ltA2vCiiXWbDpi;4Rml@GXT zD|1&VAxm__>w|f?42Fm%A;G20>7U$WR-O3Oz1P6$$#s>8VZn5fvykVdhP?t%U|Z3G zWp5{rBu#i(jK2t#t}@=IG#D$bM96zZF`#^(EGvs^Np0C?&U-dkGd$(rDFPKIDeiv{ z;+ImuQpkUeaCwu{8AV&8qm{3AKFJ#iCW93EF-D=o^i9ibTAoiHQ|PyBi8Ng^LQlAt zP>}}XQdQM*u(~P}e|~op=Q>VxA8c6|zKi|n`S8E4GqFzbVlrA_gH79sHN4IkohVk` zEqMNqxk7+JMDY#$9Hv6(i%t+?;-r7bjr3Tm`Fj|D-p}an$;#5Vm$e5zG<^>DJ&HpD z^w-kE$$|qWZ(pG+ra4QzPXy`>`>H2YWR5psef+v6VhK!nL{ZEG$jZ3&rxxw0U=6?R zkJ%x}k}yl6!sjZWSB+ZJkH60_iRw|P7eG@78MAOtidjFoZGFpq;}$(b)T4g@T)+Ct zRIZwUIoIU90yomzY3yxJ@G7aaI4=|$r>d};hr4@@Zb5zb_*aaukV1JE;MD9J?0-7Z5PPw3Sf z*|@35U@lArtkkJ(se#nj=y_lYBPR^_IfgqpSg>vEyU}aH0H16SWq2GK*H@W-_FbVk zil>4bV1(6l6}@hAk+*YCoW&^yh}P+MPM6t9x9l7a2;?eahvI@YuwZUSl;Q&9(sYDk zTLci@R!xEj$s!fKLg#-}EPIaAe5Cdfl5qW&=ZY+;vE8v7jj$kjqsEM=+FkGt9M(n` z(*#h*mwIH$78D-S%Wr@oxV7qV5RVy`QR3D<`-nl(SIH{cw?h(HT+Yqpgbm)X@ zs$2^StV${n6b}X5smv>Hz6h>y3270V`QEWuoD^3?mbW8+a+H5-m;QvividisF+a4{ z-~uf|K!(#%g#9Hxfv=rEJPgnct`)`B+))m8Rra9NP;ijzG#CK$td5xn<2kWaPAnM9 zn_Xg7##r5#HCiI?=<;a>D>|!@{X)tpGznyuY!XL~YL$J_HyyYlEXEZRpED z+vsWz>*&UlxDCLPb-$3J)Ahmr879;zm=7HjOe(L4zqB;2fL+(;q5;ezVlR99=_jau) z1W~RITHytw9xoG5bXGKWWmx@KP`b$l?g!R>OVGNj=i1z(q7o-Z+a~!y2f2|Zj!}na z)v_|@+fT{0qJso~W^90YDS5nZ+`YNwYlxssw3cr}N3pu4@Q8>-IpB5XoU_>$Z>TSo zu=F4phoX0#5b4UK#`M&1ie$Xin&KdhCy=q9$qY;t1F}i%F6!w0bWo~7C$A#DcGIW( zbttOo$!d#gg1>AV)MFAF=J{2yLgNw`iD9qPtqb9euN!fHmT;{}6U>P+ZfDw(eWvC4 zB2q5#Msbae12sOWI9lQ}Q;A36*O$E+^%X@j&m@Ym__}0u~b4{1XKH1-H-e zvR3A}i3a(&?e}(3OtF$qXW0bJ_sO-hA<#6oo) zdwPXv5dqEk%?wNN4v=9=hE-{+*uTqGpfpN;Cgs3?wNM3(ak6>JlgqMZg5twYOGGBL z@V%IRwSq$tmT>?L93D2Ol-IodI4c+KLP+Y!i2t1#m-l6y;=~URe_;C+FtdZz|J8^^g!np+xjZ54MP?2= z#35*ZQ_xTqGbu0iY>sVYg5C&1x(Kd%_<5*%Ty@wlfbAvpug+~q`kt{i3YM5@47%rd zzr{SdBXIW@)m2ppSam<6o0{!$wkvTGCNLA~b?&Xn4L%1L4FcDiDsAtrCXj&IRks}MqSsCmiU;0n`!{A5l|Ghs{jX|-s8+rt7)Pjm?yy3W$@ zqtt9SUywda{aPr0CwEL2X`+tKhDWPLZA5-6_5;fGtSb6?h6$N4Bc(AC0xz%E7^15~ z{Kkk4Q3**{bh~lrezfzm#UtwXMJ4bxXP>n6ECRvr*O`8ORCW;d=!3lWI(k$Ek@#Z;in)_ULeu!k4A>Z9G@!ztz1G?&-zs+HD}Q7@{H z^Y&wzEbu{<2=Bmv<;DA*wA@-HofKhOjNsRTW$$`j$wI-9v1T z=2CtfBycywSwlxKkHYEx1mr#V)-`76htrthi0X2?et|iSC6&qC$GQ*cT%7}eJ*?O0 zZ7CJkxqSl`ZksX5bSAGDvym-QwjKq5yed=gmLWS4r-zK)By9*OvhktrMnb=T&oO={ zJ0*;Kw#{4}2!5qPqg=e$@LgQd{ky95&!;Ttyu`uPdI})u1sUYs9Y{&^zj3(rxA5W@ zS5mdLl7@^RDd_XXs+K#^zP|krwAMIp8-y*MHl`J@N2|#`Yf%{p*Tq2!_V>d z&g~wJc*|1t;`ISi@ipR52ey}gQO39cC{mjmmZZ|dahVSf<6pOAl^AXzOKdk4afL~& zrDa}r8i$M<&5s|fkD+sHa`3d06EUeT1R*M*yk+S$15v#h+3(4h!@j?grs9MTC6Cd5 zA!nbUyfhLdJVGQ3PZ*Y;y%XiThx8k`5%Vm zm!R6~%SlerYq~fS65P&_MIW1g;Uv*Hb1EG`^!kvQ ziMV55MH`v+phD984=_2;fsf>I8BDrYny|=Z)OgO5hHGG5XylxqxI~4AcD28Sa5+>- ze@V+uCO`T$!5pE!vvztdHIMip(DLh$vw5m`R1dLIeG=nLhreo}w@g)s^#vs_eDArU zh;Z<&h~B$TK;y@MDe25?&%(A#DzuH+gewwV-#m%LJI6+&wq7(Bt?`=pLfdRF>_vvm z9D(f0E=kpfMh@Mj2)@JgVympd+`S2}JECY2R9zE@j%{scE5oVxBnxN_!nX6IE)_H} zC|Yi~*Vat*VD$Kg2-fohlFvJM45@l_pUUSL-5RzISYpY`JBZqQksFWv^p4HjIq zSEY#-k~1uulc9K7b9CzN>Cg8xNfNc=xy{m^BsM)8!|+74PFB0INd}DB<=QjNrd69s zD5c7$_&9ifX<*4E+sjGzi^R^Jv?sWP2!B+dmD5R{_F-uNQ9vq7-M+LyW%~72vdD>* zxbWfl(&!v>A&coxAuT#c^L^UBYE@+k3#b`#`IA!M@32t{ZgH42ukxlWyMETwW>%61 zx8i?nD<(|7(q5i_iL%d8uW!HBz!I~~mvfzbSaCJ(NOOo#97*jo6n7YdFl41kA1 zbi=0N&g763+Awl6o$5Fnd}^Kq3PaH>3IvfqVM-5ka%pwnlkdR^Dt9Nns`3pBNR}Mb zW9j-lJnXQT^rtXgrU=6PN-tyr!VZd46eT8RUW(-Of#Gn|bKHQ{egI7Fu{wMz1zcI+ zQ8w*=4bQdfHKd!s1gdQC`l#FSe*TF}48*Lc@gS;ak~PT`9s=w~=v9PGHbQp6)7kD( zD$Fli=_Wk0FPHZtZUB%?0gHzly^A6|FWRmkemzNQ`qssf!veS{Id1pK~56E$p^ zsaR#(sfLBw2et!_oi4MhmV7g@_1?;@8gD#*b-13t=e?6N;-EfIFx8UQpU@2c1RM`i zI7O3D6QA|-I+3V#Bp#W-rcT!gpu&6!lujQ{_>T4L2-k~|BZ{hlps-{!s7k5pZNKeC zm`~58DR@s;MxNnbQ*&xNEY(C)LMsV@02VE+^?(9yk4ChQ%Y1gs_xMQH6h2?P)b%BQ zL#ZOAO8l~UsM9iJG4im{!e;Q55+OhW>6D7x$EJj&LLOiW1ISN>g+VV=R(Rwh8qE2F zh+#3M6edwG*GqDq*8BW`2 z8OtL=ry7fYk6Oe0iyTicGSg;dB?4ePf(T&$D^Qz{=0k#+_mDV-#GCFzV0%QdwQ8|- z#4{d?b(n53L5K3#LC4>)d3*R=NvilK&qtH7V}(%p2xRYI)zHXH!%-s9RSg0!r5KI zmnOOj=L|w9OC)HWo~qQ%TPxl6z7pf$5LjRvo2Cxen5QH0wM<>Cr8+mJT4t;<-y-P= z1G)cfkD|Yxe|&3L_FxVL?1oTQ92WW=4(!X{{Y(VO;GVN3$R2yTuH)W+q#w>Htl3!4 z!=%7I7aQR&&b?5$5Lqz#?^NC>R9K9@a5D@xz)rqj$Luv+5PUL6q^W%LVDH947%YJP z63aQO)8JZ+3{{|mFI{&L9a9UIi!1d zq*7MD#L(2C0j(ZrNjFB5(8Z@na!>{ZbjNV<*3zNRNufSE*@SvR*lBUa{@L5`-ydtT z=`x0K7h8d#6LCX2LlFnro~;3wK*A=e_v_hvh;fPjC|#tnj&vD+l-6~;(W!y$4Y#SB zPm-fdZ0T=~036%uX{S^mKC@f8gR`g1Y8|?hZlhVt+)IfgN4sOL+c?9u;6}+Lj{g+{ z2L_ZAWUlZsBOhy-xW>)O-assgG zZ8n(*Zutz7^`S0*@daO*#;9L9+Mw0t9agGPeXKqOTD|9d^6`pPH(7i@P^eG!#!8`C z`|#%AyJ1NO*VPw@7yQ@hMVmb932Zrvy&^jg=nWH}Xp(BDij8B%7e7pX{J-uMVOwch zp5e^VYcg^OKkI?|HqQN&%W8JJ=oG=WJfAMIsO74r!5E=`taa_*NPQ_etLmx^3!|cN z)PW)^144^DpwS+Y$~>nyT=-paH;&O%9C)m*E$XMM-&g-y8K^(NNccKdSYpUBXwY{G zMg1WXdFWYRg$dM=g7&0Jz?|2L9x7NiIXwGbx}Kib87EnNxQG;$P2pO_OE|ba6sER= z0#Ic`EzM7VILp-GWt~-ytLA$o4JR&sy_U$5IF#Eu`NN*~aU8i&uclFe#XZ6e((Gxg zmd*Qtr*?HIEs+7q6W_{4^L`w?@q6*JEE{7_50yXAkz%|cM)TJGYBNBP<_BfMN95yAe~WsiWlSdGBT(P-wP30|Yz;Zbkx^=bz|wY9ZX&<6sR+y-Al+7T zU_y`05A(C6O~hJU>O`~}!zrh{^VkV#JL*>iq4F??B!CvD(m%V35Wm|ySkpQ92G04l zYQwC5E~k~IhI_G6mU#6&P1e`8_Eg3-g_0$6+BPUP?In#UQkYjVx0!=}M*?rs;5Mpn zYAdCTUSBQuKM!qxr|w7*@c>31SF)-~R`>m_4zAwp09MA4*2;@fxWM}aH}FM>iulba zR+edo6*^?qcfu-K@OwsU&3f&8@^lf1r@7YtbXZ<-9mw0P|2$z@n$-$oMZ^PTSw#JR zt)T;H`{}~rjkE&tm~kWeJUvPJ%8=a0HDR!h`baIa3Ss43`IVL^TRDpq?~~w5)8*-VQ;eJnX`96}F5Vg; zO&4s6m)~u;_`rh`vGenEZOC%6N;P~O`F>YZiT`a42_&hnu-s3=%tB#*U?5Q- zWRYZQ%3;HkDQxcwPXtge&&(djaoO;morp;4vHYsA3$4tY)_~V8fU}Z-3ijE;aQs;M zEV0^_7(hzGZhM8{QxRjyF*1QVwM^SO=OX#mboc6i4R_HRT7D(m3XAX3BS- zCOcucOm#AIdq2+s%C_}<

dO5Puse!~2k$eN)V9^`w{zU%3MmQzPilgj5Xx!AV-X zAK+7JIfcm68b*Pat#i9x1i2mb3xWi)Yy}Vpp>~&$wpF=M)pEpFOj`t2~7CVB^X) zXK`zF!NZmBGyaK5r{HaUvK&7Gu9Uv}y;?=*4<~)*GIguauG!;Hse85Fa~;B^QLRJh z%@DQfut0%%9Po2P(OQ^nFtSiNj%c@H+MDh`dPAePG@XD~U7!+wF47}+%%EK{ec>5$ zwNJN-e`6)5I->;u@ihf{tbfNVlB$B`CM;~@Hz(8&+*%{nU7d@~lh3I4Eh}(cdftQ? z)Q190CiM2JtT6N;Z+GSRl!o= zGp$!@w~=C8M1W6!rSU_iz!wXmg6Ud}Qmz<K(jCDB9yMpt@Lx%lN-8D+FFO__>q>CTr&*5WnQ|WXXucp_Igip z+o&i{DEcuiHZIMIVIe|nImx+!MffO2pD|<;himQ~V6EGK>Mqu>zLalUNP`)#YPJ%N zQ2;6e3Fzy8P-t5sb=cFjO{Euo;uUt?aVzJc^IP2MCx`CWqJx|4y*!3fvEFIuKo>%u zb%~rqK_owysFtAp4wA#E)DO-(U#Vhh5|g0fW6Mb$QP3wiydVq4qxAx$gTV+cq~enL zJsf!7%-mrns;}X!aKJOzQ}-L5wBd>o*jQO`dggP2<^KadkXR zb=%(4tA7%`YT2@8z?lfFedy+6$stu}pKx>0Fk{1FSbv?Tw-#B?tog<)Zl?+4rs zi)_MAoeG^>cusX)ZjlZq^R)Ii26n_@FpDIAz%(KDCWWPnI0_kn9hh`5^)$VL#>OVq z#z%NCxo%Ff@q`+7jIfb>^? z23*%{apQKlGbz_K$0?afij=+WS9jEncr-Uov5V%|0f6DAJ_U_qHt`5}BnE+B zN<1%0qAi^&T(J)X9@Z*dSU@f1m!dpBdg$B2;N+I=4g+J6KeVjL3&l%65!Xx(Rr?fUyJp!Rxwp4>-S$jklk za7dv;gO2Eq)pa+7sPOOIQUvR*xGm~iY}?X)@ru|S)WCZS6}m+9$zM&ap?I8sU>(IC z-2>8!Mv0i(F4l+6-ui)`P*gQLsRq3EUA1C)%WV`&{9K>+KXWRbBBek=54Uv)g6|wI z6?x9c$Yb4(TRh`m>#4%EPVf*U?9+p{E0PA9dDT^f)drWCU`hNwnY6@eN+%i0 zAKZgQMSsmsZg}uN6k2Z>V$tc8nZN!QVCr}#{Bbut(Hok}G3-)Xk%3@;dkFCu7aC4) zt9oKia>5K49U?{Uw~*82?)yu+JPXB#9Wq)qS)nOxmARHDyFSJzpZq9c|1htVSA@$H zei20UN*yLGnseUV+*D$ubhr@3@yiBEiUv|!#CaPJhP&OO}W&5F->tm1R5 zO&zb|pqj{+u)IPd_w40=ceycJqPQZ^tSscpdumUaQafUQ$0e|FP{2&!=nlg-6`c## zQn3hrzg()2@2FiLCqrM00{7B&wrVF)x6rQ3G!GiCK{ZTTXPa@#8jhq9_1x3f9?{S6 z5_lzL#9n|3{l3ggJW}Q1xUz$;+{Wx7fUZc*)})J7`e_ciFE&Si8I>gr$@qd*tM$FZ zgQ6!z9Y?9SgUWfMFyi{j>}V;QXEuTCENeuJo7ei{hcEDFryP<>EL@$D13850&sE5Y z*=CscAWxRV;|gU__~Lz3-;FnnPJQFmM+*b=xFl}q088GW4hz&pN)EptlCDcXaVam( zjG8&tep`KEa|kMbfG6VHCa4EGVYcOpU$%ZIaZ#OS!B=#pTNZ9jF+0Pcf=hubv1hUB zx}Izf_JLW4kwo8#G((pYE}9VJzGpE%-&LSu;%1d#O`>-T z5ML$#14kj_^Ya|1xOcDOBi%knz$Y|ZgCtt8IU<&(&~QqB!;D0FEZg4?Od1Ov%lU&{ zppvcZ8NnNwKcWi-Q)eweGz+B+;KB(se`e;XH^IzFP z)}QVfTJ1ee%VMX>tlzTMSg;&=X8E{)AcLN+no6IZ8uR+P2H~zzstpgTI*B0J<^|p=Z?W zkxI>;UGV2a(Ov>OS~s+c50l5!yHn>zco|(|5JE?Pg_$KB@s4iQEyJJN?!Ad;S@mhW zlXRf}Z|NE*;oxq5g`~=~TNT=|IA>8v=Cu!HDa6O3*R-5j5WwVksW-Xe9pD4S{(W5a zdvQKnABhP_$6qV0fK(={2@o4pzp4q^!uW+~Sr^^qM?)UOT^{*ycLuXG4iMS2ljr!d zRN6m(T|5%ju9M}y3HHi^^!CMCzwXaO9I4aI(wtsq3y<+8=7-_?t?zL;1MP5IBqJH2AU=ljWC{@t6h$ zY}wusEn!8=5FUxA{C75m#07j7t|X<}H_AhQxKSQ(G7l222{tbOz)N&w>FKXoJH~^v zXBzu6A0H~!pNwYk>iEF|Cvqg+$xl5~%`fi5o!1&@de^`L{J-ez{vF94D`2O&Q%h*W zOdVr%q3^zo(|JE~m4iA}WkiT{aawMd1ox!%uFZgH;?)0sfS%;~rcqw_M!{mXxjWf^ zfYx#UCZrQpfVv?O(UIBJ3Uus(dxvYj*Bp+SwDL#OEn2p2s6L5P8~l~9%1JJMXGkD( zSm{dfuy`i_x^0{`9@`xKE@=vu!+z>djWgZ-g!7Vr+8I$= zGIE|p4X+o3kd7+F04g*xMEs!1E~N@7r`c%Gaw@Z-bU1E)1jnlT*=|@hHJD`%%fHL2 zgiW5s&lD=m!mhm4@i&V$FmNT%tAGq##7_cH^aF_SpA(sF#Q=b#X{v!_3~Q{#lo2$A z>ubm1#rK;B?<7IPSaNxVJ*3@#I`|jO5heagD}wuBvqWRKrjLBfl_1$xI)3Lvq1(Z! z!4^A=dx(#oi&Qj{Eg0DI<*|3J6K@i+R=;Jgm^MmfTh~MPfE8Dza)hFgY%&;- z(OGA2V~Ink-J^wXntEleP3WoK;&UC>G&A;#CsxC2f8J)JMVk27m!+ z8iL&05P#`8M&dmBqLO2WM0msPX~f4u=IkelAtveBAO;LUp?agllMqdk3op=%V5o6^ zWeppJf|wN*k4V6pbYBrFIY9nPzm_lysKtQg3b&B09R5cVKx(8Gxp=u5b(cE91#ZM} zG;2NCvh$A;_!zCIibj2Zx@Pg)DC@#j$gea!iC#EV{$*jQArS9%89bAzMJ_xpd)inkOr^M!xU~*S+ zm4pMmf2@dVmR`{YajYs)9i~x1aNPN-!zi4i^Rq63bm%6F#jnYl`pKC9utW*K@1ypw znx0|si(kSb(FIvDd?fGDa`4sQ-SbUpT4;Np)855l-*uYD0KNft2)_} z@F~W({bRB*t-ZB>p(9)rbY^>^WTApcNdiyTREzMO|HFi_vh7rw_G&^(xzr_PI+huOqD*h zjw>ZN6~)i|F?q{T*Q4PDL8*AUomcoTvP~*`w^8QKT6Xp!EdH=N>Lwc2!zgf(tb>YT zNQ;z@^mZ%>1L|T0f-l9n?XRf^ni^$hJ4m=RKzZ1h<6h_FM=#-bwa|-K2u&}5TN=Zy z#c0LwSA9x<)52dsvD`-}8(jv%3nGs0tI_oFo#TdaMWo%Mn2TwkAp#wy zRcH=q6$#>)Kula*zr0n5sQzX9ntET?t zl#9@B0$14o-W75~!j^TLA||br!d->li49Y0-;PsoJSbxzLnm;5 zY}hEUGY+-s@p`<#r(u+2@}TIlbj+xa+${m2!{}3bmwP65a5?G7^*y5*5i3;%0OXU1 zV2EEmLhci@FoE?&b}3@0kKekDWF*A}%r67-2m|)q@?E_d5L*&gE$77?(klyy=G^mB z0fW6m^p0q)*i$*nb=Syx?m&+pGIkYzETRM9guT%n+L0HE@vnYdL7Z-SEgMqTRapsD zWBF~kqDJzh?x}(&DsZ|ii$L_`NgO>HAW5loR5+zbIz|%>Jcxj5`_`%mwA;F{t*~yb z!l6EIhtzaT_X0||7_O%&6~H_3?T4!zcQPY3CK(YavNxluK;NpbW<@V^t@P<*{G+CTVU{GFv$;=K5k z#H})Hl!8N z+I-zihCpnX+3!;la2yhH1>_0Ydi-j@_SohgUHRK8}{5 z7R+!_oro(YqpSk^&hrwxOTfZ{4x){=0fM)ov}d3&iWh2kOYGF?V`ZiPEV`V+*CEZ* z+gTQve-vm#ya&#I&5K&L%>{3^Y2^*&I|L!>G`fMhroDss6$b>NnPqub(KK64b##Ii zxaM9jED2kn9{Eej=Szc#gAsDiz3a?3Lcl4*56H*7#d!* zW9ph?fq-{ZZvx88YZ^DE;3tAr=cZy*Fs`U5i^a310)owdgD8BWc?Cr?sa+`$W>i{U4YuTa1tzc&4h{0}GjrpL51%W7-S@S30hGcU05{ z9Lur+PT2*2NA}S`0S#Bw7w^R%W}E5~tv2CHRJT;;m!OZTLiuB*HbLQkO&JnVsMK}_ zQ8znd=_FRCkGK&~DfFB&5l{(U?1ycgAheX!4k50F{#6#1)PuTRaEUc$T_4G!>jYeX zn8Fo8&7re*-)ZUCR%zB7KiOh-of$wbS4K3FvKRJ$2!oVNEOjkvn`n7=pSt$eIyL;0-}K+)9*lSM_cV(_kht>rr`ae z9$K5OVz`*nSEbKPQv0}C8sCl~R00CGaQQXJp4yyY*8+&^(J-gs=0t?N?gUSLA|P#= zf#?f==dU3sr%PNVch;n#EjF7z@}$#XC-PG&@V08oLYL=IcArLdkAl)pXEE>O@S)%V zJ?p%>T{5S^cL((|<^3)O^)tJYN40)}FgtTUbW6PQd*d$i3geTKb%+iU)OHl@VLf-5gg}b#{#uGUbgcfk-gA;RwW0IQU096`b7D?&} zF7>b(KhiWuHKH-rF>#|FUuY*saMvWfhMN9CS{e-I&9^_39vz!hs+>wZJ^P`%;Y<>L z)7$n1mr@z+RR;SH(`EVIZ@1is{U5r(`xsUB#`Q zPzmOyS_g>7wR?p3GQ{r@10M2>Kb?24Vu{LUHS@@0m@Y+L;hZ$g&47H%v|}Im6CSy* z%|vT7-zyw9>Z~VpRqg4amwi#e95EKk6~6 zTt`50%^xIWw+d#|V|eN19o<=!Fz{&T^ehWsUOQkIDY>D4R z#oP)PF<*T$aq>r*U`Ma_rzYvhrzu0-48Sm>RRYUJ&hszQPyq~U2^*vZBTNG zeoniR`Vyr80juzs_M2kYJhtu^GAm}zE`n|(dRjeq=|R)UrIrSJmPZzL^&mQXZ{VY^ z;S5qApl-Wu1E$Fly`CI~q|!6dZL08nt9dobIeE3ANjO5GtZ4f0r1#VSns@D;xB2J~0xQr0W2yg$Z{en^vA^fuHW7tgAvS(K;q^sEPF)eZtbf zssEzicEy(d>ifNdzt8f|FL2tMKe{&O5DoJsZUqjRVzK_mktC zQ9ucU69CWF2=B3J;F&Ef{SmnN>?@KWRk2N3b5v+^S~I_Y%Gk20KJlcGFkl3QB4;`j zaJQ1^lDt^hgrqB1l`9yc01CkO72Cx=kH*+dF2fc&=SI-3tiXl3v0r$V*V5&**Ogiq z0@>DQlrU)lG(%I}O(0>IdZ+iqmDW#G__X{nAo&3paVgLZfVZFFBkou72U2xx<^CGs zxo3vENJl?^G}B0ug2egS3IQHZVm|B#;Q2xxcOStKq#!N{}Q<4Rg=n83TVV(+; zt|*G^I>;bZxIs#Rjj{lNB$3&tcli?Bv)oz@3GQZpdnDrN)05Opq-~htNf9`)xI%*P z-gi6ugDrU>lCkZz)@1X0GGc=ZKXzX?fO2isRouqapX^4UQ$>YUGj8Z7qijlg|KhD*t&7j&;Nf%*tY`@esU(W5lBvkI_#Bm;D(kQxZQtz-tjwZ`Scas#Yk$iYRG= zx4>^u5R~pGXLg$G4}nmB&FZ}AkhjOTv>i*eaRGjO3JQCh}+V~ zqm*wv46&>o$RGFpslY9luY$de_F0uAo}FHeQ6NzbmPLs)Th0cjO@7CB?d8*B-|^Nc zvH%QuI~!$2BGuYZ&d<=Q$==mFTjA2wl!xU`ERnwX<8WDIp?PyeR^waIz z2|PY2ndMXGOn&`erimnQ)%1nyiJ}SlMv2L$3(tuCg}kA22GtRDIx8>M3kZhruKF0{ z2GJ8WE}bmx9cXNXMjm1|?&5i@XMsO|YqKk|68JS~7HSw?-3;}spJ<_ROpZE*II8q_ zjy?T|vhRNk6FF{7Fwbj4EI`wUKu`)>TP97D(BErX3aD!12ProRdm5O7w>(N{^7bv2 z{+J|)7apzADNeZ>ls4YbTUZ8RIb^C4-60|AP%IJ|^$-+_Z@ndqPGlW#THBw0Gs7y* z&I1CzQN6%*k@(cM>td9g9q!oFUBW+;MC0#4Uvu{zciF-$uztm7&96UKSlyfv8A6vN zHo|xxVq`y;XR+SCO3T$3+jM1!4aD}@rD0&A+oWEBM1@+)ZgbRbTDB%QsllLcaDne4 zd33Lv>dWqtmeE^}*MT(5C`T)Q5y#+gT=t2eH#C*oAWxw?wl=fTX2_qNY6vIwZp z-<6G}FNX?u|7FUu^}GUqsAf8am`9dm3v5HDiXUYi7bu5XTq6xeMMnI;UMs)fKyy?s z!H4~O6czwmp)s}t3#t?$==8WkU>;9J^niI^cj0PA93G21HO=`FjQ6!6gFg(2u7~LxFMK6<31tZU&OhS!8+|oqO1}igR z`}yH80mnnebwM9!gl(R&i)dN`jCpL$Fn$aTKvDtV7tA?RU`DE|-@hP+qYbZru76ir z`uc_8_CsZVlwVOI02x;eMlu_ZQ)bh}9ratM!4%09V*R#B_gI&_>G7BX(>J;JW(_2* z$7mtdbTC-#kuT(-k7NyenZ=msEqDrCpp03YL|2|8IuOj$zVqf8U3yZ85p3^9KP=~@ zu^58bWbt8bahsG zz4=_lHw>-B96!v%X5lVeN4qH%pb?gXH2Lbc)FVbg_?VuJ@z+OR`m>vw`bO~EBbU&N z6^^T76_WW2=`D0;FpgcXSag$(ON&hzue194abSG^#ima6>v`%ky_^yLMfNvR9}dFD z@?si)ZYjJzT1EL(;cgjUa{L<-L;dTa!;4w##@ zuq3(_J&&@K10a3*5&=h89`RPA%H%*hmh{-QD)uBcfd*mQD4 z&DVVI$LJu-`brWVdZ03tYi-5aMTzr&cc?~9v3b3x* zx1}_-kym!1>eEO!*XLSWK`rPI&(2ns5=D%nLV*p-M={j?@vMCN5x}13JS=S12h_Mj z2sdaB8zg*CGBnXTpd_!4SE}`wnC`D~`mvBgvm$T0{l-dq$B)QouBWGD7%*~wU}WFJ z{0yyYicH>NVB`JL+E$}e3J{XWYoO+OHjRFMipZnQxHyyp}(Dg$$rGcYQ{kc%zx z(>Kr-?HHqgk^6aJyTPT{JJ6Xua2+Ocw!K;W(z(2=>(JjeO_IA9l$yA zFv&3iE58KfxyK8@3T33cFMqg7Nuh7QwZn%xKgCUq ziAT8kS`U(SVs^tEmtJmM;PfSU^rRCo0&EfI!Y6+9bu$B$=ChQ4+$Qp4GmEmR)JOcc zTo=^~wY-Yg&{RH6Zl*&i-%E+3B~b7L$1SR`4bn7b#wP56DHq4S#oQpe^0FM-=Z-NjY%O4=qR%Q zDSk_Y;iGE_nKsIQl4y1rDc@9_`je6|C=ba)%sytryC;nA1ndjVY&`&!QC0U50SI0A zb#7Xr{H#KuKZG)PiN$QngA8-67{&Om0`4`Wq4Io_Ssqa-;v@~QZ$~#*YQ}DDVd(=< zhs4@Lx}(+arzf&}7Pj+)zs2C%wS9LOcw|w<mCjt(kK?UNxN3`h|;6j zg_*SPFTC38x;iSA-A8@42hK}w;GC8Cwo775W<<-fEd-*0cxpHrg;mor$Kpih1h)N( z^NctN7sSs4ZH$sQHTObP%xIfc&g~&V>wVH>nZhxDOQlQeNBd81?Irq4x!$Xh=}~iI zDmTQ<+{t-LiY{2Y!uXK&47;gh6tBH~EgtnTx0Z*Bi!-OM#3bFtTp@aQh@Tdz5kNvR-#EzedBfWO+AaWGzJ`ra;Kv=B^S!x5sV2t*y|!|Da^PQbDWtn)WuLP^fsoJ`g2U>6|9(*DZX8*$6$K1}w`1=MYvlHg4LVJ7 z0Pp5WfpI@pbM|f_dbe5WdVDb*KN#V+7-k@Y!d20_ErTt92wsIc4vh1GKrM(RviyyK zj?=83MSqF0V+JpLO=(3Or-JkaQgUy&c+A|E|2sUNjgm7IsN z?EijaQklc+6tf=VUO1be3n7h3SLdZ@&W@H=-*)%L&C1OVO)>+c^nw5;>!OW1K zy=0t_Fd~kk=aQBZYByqF`IK}s9UzmaIngwc2dTODDZBq>oCIILL>~+7QM z`@9A|K?71QL-S<%TcvR=mxrrc?}f(p>1m96>#~?aO_}1}0;g#;96)8DsQ6>e1CbfA zuC&A3a`gJq&hOYH`)9feSOET)3N+(?6b|09`W{dHV)jb~^YgAHWtVLSQETjqDVr|+4wCFMRQ}@lI<30zXYzZ;A3EqLNE0ySboi_{ zVfhAG>KkkJ`Ta}R9W_d@t+W?fh?+Xr)y`*3be`^-U@^CM?%}n^W2aHA=M17>ZB&CB zqEx#`M*3P~?M0DA0b2PZ5VLO7e>uY`bzR*kNPDCZ{oDbwKJ!n(*Dv)u^qcZAQC!qO z0~1YbTR>eE7E(Jj5qwA{PUA?dKF%~GMSKluQ$?Lxf0l5LP8nDmMVHqc-``EZAisf< zKkl4e!x*aZwjh3l)!F@{-wm8R74MK^Ny|{p%8hv+6JDbIHIPUdD>I1se`1s}D7wlJ zrZn>?PwHdVz=jUB$`Ply@SIy~cV8y%S2p@MTQ76t1;lhRL=9I*N0Mx*Zdo1UIDYy{ z>OCZIfxrO5hOrrAioE1ixXg2{_JRN>HbcU8vstNDsCPyQ+`nECK}vExb;(h z`kSj;v81AX&AMEEwMY^^fAsquckI!CQ&#*d>}NgGwyks#^mw-vPSjs@zVb&jpdiMv z7>*O;)}{FlR+e+#+BuhHrCbfdio4Wbo3b+!Icyn7V;;(`rj}-RZyaBrPYv?iS^=v@ zTj#<7dS8m-no=Y0{(Wt)LMZ5)f8|-xtC~N){*9aRfHTEAa@TJ_e+x-#^^j%G&Oy>K z#6};X33= zu#fOEfhEJ-sg}y$fXz)c-KctyF3)?ZS)VU}re>)Y&@PNdodIug1vEciX86vQwd;2RQHUxb!Bm^kDBt}j2Osc@?~QOwf~CBor!a_K_D6=Gr0 z2Q{x-Fbg?imdrb&g*!L~A0Kuzd{-hY10x0MHUf7Cf0#Ws*P(-s(tsMuer~vo2S3dd z11{sRmh~fXx#^}ridcsX5x=!LOmCw)6rGLSUbex zol##IQ|K+{3g0WR{{b!--EER9U=>4VaFp|cDS|s8w6>C>)TZi z)=PHawe?G8`QghP1}IOZVP)OQ30V$J?TjPp|E2QZ&U ziyN-qYQ{}j^O`5YzxHb0j-v&cv|V--sdSslK=bj(cGH2h+;!JJXof=WXl2`W-~`Qv$=MfRV{1 z6+9&&v#xFLXA#FMj(N<{uT}asCOrP#=+V*DS;E-6NYsbW5D%pfZ&r!X{CHRa<;hf5 zo$1s%I(}#oo|=`cSSU!~in{2|v}3m>RJs+)~Feva*gL!rsUvfZ4w%dexZ7@$RL zAlqjLLnWMP<$65zUrMzTE9g8{f1Vqq6Uht=u2qGUhtl4+=gQ^Wx6onleh-{Fz?ch# zZDzaS=mbFo|xd(FYI(7HJc>HK0vP@GYe-#z!zty#g zoFLpfU-!N%an(x0hmxQz__C}eF(llHGn!p=K)L~MD2vYjvK zSm_}!t(r`qO~p6O+mi?yfBD&n4(V7wI}2Fy1-%+}YIL|iV~I~9KYd*r)`Z)D z$?mz+k#O5Wg2Q*CGI)o$3!S5CA+}#XD@PoB*TF*fP2y*DkfyJS-N+jO`AQe?+_D|M zM>zVa8{N3M2B1QwP*!g0oVKlyUj1eZXrt*n)}k+~*2{%gf9c9QY_O*Nnq02< z+VU?oNxwKjyrhG4sJGrFNI5@5{CG@iOK+A;$x%@K=D1YwXcl*Ie3+`@JV)Pq<<682 zhp@N1T+eo6K0#%$1r$Lt>}{f%!)|&yNp|?Hi&FP=cD9l|sJu`gpsEb?gs#r7&Ofu; zG+!yxowysW;P~A8e`Z%Ncxl<TfCvw@;G=3~)Ai+xw4QW9`?G#CIZ#X}3QHybWw78J`k{}0&vpf?skYqwZn70g zve~SbOfPY)e}351rb(oy2(VCQ30?P5-xj>q-Q6xuCj+2v8a6nExQIZ`!CeMeRXg#W zM|QBO#$*D_AFaz(z5BCmpkO5wtUJRo7_4~N_7UiM<;9#0CPo-oQ4{Qrr(dOks#tbu zE0=aI@BxYfdKHlW+J5RIxP^H%=5tPLlWzFMxA;OGe@PwGYL`k&`TmVm7a^2UkZcw5A_9DQ%~XY zrIw>$e^g?XXKVMbQpaJD9ghXqz-ax=vx&m)MJOUetjkBVo|&6!LLR>pznwzSSX2b5 z8S`^-uZtOml%_ktt6Vl#Tr#h024DM)!>QfULBOVH>#7DFXrFx8($OSa6^2656&+fG z@m6D~%hipty6Hc(IZQybp_x-b6cGTi_j)BEe`|QRxe9@}37Em+9?EQS1XC7pB?Wy@ zWT01^y9wEcbe&FAYrBIbJ?I57B93jkiHL>dV2B=#LZet$rF+Mgp|MK0sO8%B0vq+Y zx#kr=9So|HC|grzktcIo!EiV!(za#w1#_~gRxrC12L)tS zcOM~(iCNTqJ+;S`ylL>dU3Zv)U~YG1YW()c2@5CY`57lKiUZsNW7>K11G)<=z>`aX zhThR&fhXL)WMp342ZyHis5J7M_U#8ye-OCyIZj*1^ULQn2q&<9HtTc&@|POGVhGjh z1KWBaK>N^pS_p@rO$mO&r*^BJg2U_m~ll^d+MyC##R^FDZO2Z zyN|EmBcgKV0VoPtLZg?V>1!8P5aN|>@!s8lxdInSs=O;GGq@$3Pd8DIV;YzbwY)E7 zq25SCvdtd9n%-k_^y1GIrOG_me?La3P)khXKVT5w++J5%BE_2T#bjy?X^OJ=wPkh3 zZ(gKd{~M4zl0i9S<&|Aqnxfj4;Ly~%29f`*OgBm#3f7!}i1zqBq1>tL94mB=pHxcZ z#s}dB@t`rfpn`PB?e}vq+1=ht; z+iCiMv!KG-GJ_N)7~R;wvBF!@PvjG;D)v^IHqXOrmi|@}8&P)9t1Y*?4z8cwe8;F= z7DKp$&5BHR?cj1BP-(O!sKZ(|9)Fr_h`D8Zpbk#%nfz+p1JotxG;5lWr6aPB?`J|o z)QBf6fU%YQ%g9OvX`9B0fAywuyS#w#0eVfobxK(7?zIoBp?!$V7huq+%^|}Bgfxp5 zeoOTk0cI1i^P+fFBoi}vAFPIqtHHxN%9cn7W0OHB~;e;I6z6Pzf{-_aIc zHBzqao6DLq3NY@`u50-Yp82LP<4_GY4dE0HB2oeA>KjVEUyl5-$GgH~jm>H)ywbad zWVaGTEn0$F-;Nhq!7bg3X_i+ZvgQK!|-$zG+eooWD$j+vb};Lf4;j@Vi(wE3u(KdNe_tq zHqzpyJh!IhVjAl1_T(nNucAb64>k)4EIe0Mx+*xDbEJs{?z%U2)cyf@36WVy{` zPMX;wkL11%XFCKN<(ESH7C3CvhPeq`u8nj2IIMQm zIMpdkb+{~H?4@!Xy4J42uToCr9~7 z`sC(F^c3N$>mvv;Bz&j+cV1de|%Kcf|9mc_XV_uJp*OE#Vnf7 z*xxJ>yzBe17>M~Z=g%ExcNmHJ&$z!yVmR|}RT|7-{%5Sh2o2-?ts5f=Ov`XzV_mE~ zhz64s?B>Y!-8`IetDEyXxi!%v<~DgpG4BTV_=mCQhckuf=gF=%3&`$gkF$#I5T0#9 zixU%Ve{SxI4;II6?TO%!bnZ|{K9%UQTz4P7)}jE^U~@jB#?453sjY6z%)RBV zQqWKKI!$whCE>&$JJb{m`He;tTMW4q8jg04V2#KEGT4yW}JxDGql z{(^|X|L%t8HN&}|h?i!gE?)c70ONDMw|7ocO<Kj+9 z+=-aqHVbJ;@VgsZV5swhLAzoAsmEn|DS)>pU;ClJmOZ ze?Vu(xl7O!2VVFIvKyC$g*fmo*IGm!iaNI~r8>x4_dDb=sBVu>l~WjMFHu!YSXl2E z^;EbpUkObWLUq44t=~XFKK(dC$nt>&yW%L2K_t5ED6K((y9BJ*L5F*>sFJ~wdKav> z!7KWOXh9)|2XW|Kp(BPa9NXb&Mz>fDe<2_y(>QQZ9HvkB3!=5m4fq0LLoFAPVB!O> z?Gf}6sce3dz$8B19+EO7zo*v`>r44@z@^AdJ9mPR@=8JRSN_E}Bm5h^f3al96jndag!lN98nTB-ju& zFq&6?#WsF!A7vA3@nK;<6Yu%dL#Q6|dxj<|z5O0Q0f(B&6Q7o|62e$e%EJ&#t19ZY z^;x^vFu|v?`ARMYs;V_^xj`QEf1PaT#a9}}K9MWR>`El^?(5gjfUK5;i`Kkv^4sX0 zV$&0T4|Gd{upieBIOg_R?wohLCI?4U1eim1ZGd?J(~{IZS$^j=sVa z=%W!IT9xikrdbnb9QERgE4h!d<8^pcP_5pk`VoIHajMG|1?f2e2VL3!ZV z`zr61M5DL2L1l)NlSS)1-IGjeEZ?RcRia+ehVA6z1jZC_Vm5-@Zo6Z;%hTm}4TWn0xe_~1eXxb5ynYs*)(9sMwu(Un{JbP45RqL3kK%4uz_@Eg zIwnFdSFsuavaTzrcg#n&e`^Rl-RnJR$-a(5y-Q;OZ#!wDJ-Zo(k^3w`*ddWv04xAU zNfw_zP8g&L+)RXSwV0XFbb^H;rZSg4k#yqoJGMz9w#gXvxrH9Ur8 ze~Dybx}iTf2N?3APm{qnx5mHDUn50M3C>f$8I|801DT-Ao60y1f5#xQO~N>g%>}N0 zGk^OHA8Zw~AULeerq7n41cIM6J}1Y7_IjQk2>Iv9W0J^|#}vN9G7PdMhbVR>)Xvh- zN(2qKOk^CODT#Iv&1q}eLU3HvjbXQgZ9~zEjoe1b|H`Mfnc&Cy!;(M2WNc2TE*6J0 zkDdkLp-u?oVuT~qW9U&^7X0EahK%s>e z2NmxWwLnx<)B->KM4|h4_&@LSzTG`@v17Xx51YzdxpL(yWSL8MlPitSXaW%2pq$)T zeY>&c;70xPjqhf+s?r4cH-Dr5m$ffzOCIUE{y`iN6a^1=4M{2!R0bfNVUP+0h#Iwk zU;kaZ5>W4_yWeN{4!Um@&`J%v_ORC4e4L_<1tn)Qd%rzwt2c;gu%t^bOtTTiJVk<7 z$7)VrL`p!!qW)OZ-Oyi%X3G;Lm{b@8`2JxpXZCPQkIAbeYeHg~(tj;E7{z#wkLeuX zIRjM6<|fZ*?!g-#kaXNp^F8W-{KW2TzkN7dr;98+E{6-E8rwcsE@*F)o|N%F9R9Sy z9FHFtHq~L_%21HxBV+0zgB70;B^;_NZ_2pW>Vs4Xl^F$P?R^k<_>wdL&TO| z=X`g~6?S&+F8cQI%HMh+hVQYk)kL%Y!#dg_4=)o8ctcMrfPb&igwyewpgxb5Zwo!1 z=CtBOTRAyt&B2bMZG-E=sH!Pv*7Q_JPQU7q+IDO>K61%;5++cBp2i`kigS7=H3cog zg9r$4kHn%E;;ti-K+FSspcf!R$Lv3*V5%n?*AG*x(M8^iw5>bf*&*mptD?NFPuO9{ zTotzb_qK+y?SIzxUdZcu=g3*RZ)=xo0;}-O*F&ru`&5bc`4vAKGWtnPh|FGd$T@%!5+UWLl1FtuwEuEH}#_1N7^yFcv34IfEXTxd&1P`A#s< zAwy0Z+1Wd)C80{D)9qMhGt5_89@*0@UA=n{naDN}O9PP~g55N6_&k>%?UUo$S zi#FCkZHjJBB%#68Ui}bpHg(v$=<>|rlrK1G-pRe?rKI`fr{+i{KS0uOka$<8y}@R* zqDk@0&%@^w_D|&8JR@cc&a)#Z?*L14?sicMf?Pgg%su+L!p|eGg43iK2)&Lcx)34D zcDV2J%YTqUcHQ43g~u_eB@E1cR@Tc0%3&p{bJd%E(xQ z$68w~y1BP$h<@PS)YO!D-dUHY9Inoow}fY37i4g1_W9tnke1q|Y3Mp?)AMEV?wnv@ zt8for)>E9pkZ4J~*;8)Ei${4Uuj{be?SE!LHrodoXcso4KKijBzt>@4<^9QSyq)g& z+$9axlXm^5g1u|e2WlahA0W;b-QD%`oRqxHNEK z%y%m-s3Qk~6JOVA_)4@jC20g%rOMilCB^60?X&iuKVY)0&w*X-L5`EDR-~RUv41`y zX+`|2P3V7(Cww|4EQy`y@s+3B0mf+U^DdSk)9R7x`m{VZLne*G@c?#chj?zVh~4T_ zm~&!kTo<(O;))wwTzR!E&c)caW7zs24mpRd&1my($1Vm#-*MaFNN<=HaWS zZ?`}CbjY2HayPU%5>P%|PBGmT4}bVzblK!inrqOtAhbAc(5cn((}ny^-i1w|r3i&Itcz4YgxE1z(y@{({%WNgRnXBW24n%?f=l~!SoQgsBxf~SL&?uthr zwQ(Mrm6PTtgKOCqHDwQs?h1c;F2*bU%BD(D*oHeVnN5)24$H$5Fc)GC_kUgz?m!7K ztwJBo%qxleALr>>JL+=J`TdXc zh~Lvi=Q{*Ql+%KQB_4_*M;yFA2Zw+pM80l(mD#LRuA7Wy|2@2mG}aka2#F3HXlTC~ z%HU+V4p=UsuN$VHlrN>rVt?hj3?yV0-tXiC{)hXxx_$16Yl*euV98J@OPzDfgUHI} zyP@W`je=6G{Rbdl5F}Gh|haW_s0G2ZVuet#bLsoEKTk|0WBGK9i{DJnCWymoQ;rNN~N_ol1W@>HB}ZN1`P zYANPSbh*+S4;XRQlVR(3a{lVdwMX-p)$1^Vl3%7u+f-&@kPYV)lta!>J0>%qSH+2H z9!Tb-vp9T6lGnwH9NTWGZBo8XtP|GnSNlRWGCWm|t#izMrhoC-l5949C#s0JRIYo; z0($qlkOLt&$v)oi#-+(~Ar9!NAQuTwcrRY@s8Jiowe>us@0j0w=9`Y-*5pLGn5pNac!$`2I>;*W7yiqa`jw>gkEfytD3WUsEN)6CCf=5+vlI|uvXgK%^$MN&ctKt z!h#u$b3t8MXMe|+B81YOE?$QLy<{7rRm^1J_=9o8>A>YWv#HJ(q^vF+K32(nl|fmR zJG=f2TDax-zO@rg+;<=S)b;E15RE@@t$m)P z+nrVX+!x?~%09W!D5mO@ryJ^Tts|3r`HUftdYbz5L4Rba(2FDT&UldXA|iUspbQ71 zB1twRqqh?rR%PRY&Yr2p++1VZDoi7g6DY?u0m1pX2v{na8%`55RSr%_^i>C6}>(0>)0{r{UQs`5aM?+R*NipMZ){eC*`Uxs9jbzNfOwKzO=Ift9QDQ{H>}+sfX65E^U+*nLeW{D!q#f( zU78T{BOhp=>9Idmtb+F(B~Nep8oU>~`+pF9lGbjWxcpL>Kh3BO#anX9<{(8m*=i5o zFi)KJlKw-;cynNYCG2|c%ZhB2-A8_YggLws(1V5-FbTcCjB`}yzmz|GBGSgsImUBW ze@0z%VqM(5u<2jKp#6tY|JX1Fnn9kCQeJ&&>mygvUeIb`pJ3^$nv~$!poTt~;(upq z&yTH%t^Ma^#VKUo_Ud1E_hT1ZyHCrx9)S)YmRjs0It&15e2`PR9X<&3ua(Igvj+TU z>~$*5JpYQ7RA(2AFyz|g%^GuumyPRmKinbmR|60oz zD6{z=>uig-wSl6IA&f6z_UNVc2}m15vbtSE9KRP5Onrrvmev=Qjstnlsec;vUcMei zf(#u8_$RmWcdB2XeJMQO&U||QaQJyhZJoaea0XO0KGDJtGtM#3OX4L7WsMp)5z$gPjwi*eLWSI#{=t;VAxX!3$Po0L4csTM^?NO}b z$=aHhnN3t-Pj}L9^pG$ahBPT&I4KrlKD0?zzjFfbiF!Ty<{Viy-CQgt`2=!FN?Dy8 zRhxNrcHH}`_2ig0Oj_<*n9H+~`>mT2HFG~8b zf{-R~qC3gLy<8EMKjvDd^BwXEUb9ez7Ts|LvWSoiKzBB=Q4dj8T&COyRhT?l)*tua z)51C$?&*&E&96D_yMNAR;?M7{KR5X(nY-=!wHj2vY-}$Ns8Rvl$sTINkvQu`P!nRb z3F^1WX=uZ|8Ixr2dyOfKeAL~R7Gayq9ZmTP6vkKeDwGvL=rNN#>QANT6HD5sqF(A( z-#>x$t=eCn723fUPgllX_4|S~$j>900oVSmy-KtDNSU>pb$|EKckUPShP%wtI$YSg znVGvl+kJEIO&`L3)r1S=B|T^4L!Q>6ec#-NV^3`!2?0%dxPE6o44|SUu5Bo#8%-L# ze=a%doG0?$yoOvLlb71`$7-Sxs_X!KnK+w;mmA0Vl(l>6$9{t}D~*OL{K(;6bAj7} zj!(tH=$@Zv6Mr^+2>R9aJ^`}P)vGPPo?>tY@GM(f+upaet(t|c!#Ji51rPz0nVlxu zwLzdEqBPM%v3V@)t1V6Ry$77q9hsQc*b0XIr!lU>40Ws)_%K*oJcr+7+}`-y2UAPG zVbOlZ{a4&cj{N`2P2(7G$(Qg2P^a>mSJ>ulj7Y=Xw|{PNaS4^1PE-)nUAQB$42X{X z-MRjxDtG8%wGLF~(37;qCG;Vc8v(*$arlT(D5h&?n>Q=(@v{NMG3$QsknoZ~CKPrKTbrNa# zVP@{5b9TJx)wO>M{e7}a&W~VnVCJsmgeSI$rGHOLS#B>wqvva@tcY@-+1iQ-Sb|C3 ze5#NKrM8h9i3OlC;ThFUCi1370C_37?Xo#lC$%6DMifplm&e{P37@E{2SV-d*V<0# zVto&?i5!nkX(9&r8m)(hZ!_UxNB}hib8lbeDN&dh$kWsV4{-}r$xvp3q_jG=_fkl9 zMt>(t4yD#ux1vNH=5ZN1M&ziB!U`$}xr_u(L@GNv%>6$)Nmvn;D_0|rL{V|x z2O!l(K<NmWW?H7vVXn^on=(#cJc~!aN_au)%rwUybakd)bG6u z87oj~%B2`V?o*~7{p~4h-X5C?m$t?Xe*$SUzUp7-UdpXd@!|wu)0viAQOd;D zywwBUS`&!b(Wa2p9nMonLbYY_1H@bOHSh<^t2w)f1 zo)V4;!(H_NP<$p!4$3xZ7 z@T?AqX>EBQc|>(iX*GT%n!DG!aHc0;u;QlG&#fNv^S(!4RToZY_kQD`F@Mqef#1H- z<@>x{>me)D>o;|u^;ZGW>|Ixa?Bn?i?@O>KoW~a`t53G6@j} z@P+x->Hh~j24V7E#5taU3hfW(b3|deH0BeNXN-$iqC>i{#yRdCQkK(&7L1&$1?E^> zVlfKY`wWJY8Q3XPTr=J8S>XKqx@0Oxp0h_&eY#>CxT=h{J5BkR%6|&=;wXVZ6=;=H z`Xn7y2nUqg{fqsiGT^|6+8F>87_#>P;O;i=MB+KX=PteUf5UmqU`Rq3Px~K0Dt-H! zQvKiIKSpuwA|{2L+J7$!^>guAt2s@cDkhQ3n+H@@<4|_<5{oM?WD`;0acJ|JkJ=f7 zHG~N95U0G&NnVO?u77pQ?b&|!`)RRXH}0Uk&V$isI~c6kuo5z<4pUV#(Dkz&hlJj3 zixv92s&pnqq-lVz<3hmDvRnWw02qS`SCEAHihfDNV zG;V)}`+zLheSgfmI_hQOX5*MEVejv;{|4Xmmo;&R!*zP%2`FH=?tK}=P*o5iY^ zJ?YBlVLy6-w0)ay9IxA%|K{pk&w#-OdZ@$Jo{!8v4QZ$WwY)9wTujXGxpBjayBt3E z#r-U!5JnEv4Z(d1HO<)v74znS#MR1~%>ZA|0FN!=Xn*2jqB&_U`}mnxW1|hNn*~+& zINesBy!YHmJe(nJn{Cw~E_0b3vk77Vo?rJ7;sfa^^E~83p|;L`xtcq#(%&KOL{27J zQO1#%XCWWd$?Vr9InD1p9lKLoiYX}RUve@jh5cErIb$EybcZ^j$|12>xLQQ(eJIVn z;$Z0PGJkl+c}A~H-f+31(@nVHCcpc*n4F=v%kDbfuCXmQUBometDQVEHB?v=pnzJX zdF^d;MLCJ7{_gaIYZPc)oEVp6@zfWPai7e<6cJc z=cOet^V6{V`Dqo(dg5l)DZGn4ck{s0Lw^zSs{SJ7z+Ya%<5cOhy|uRPS^80|wm}-P z6%#tjiVsnaE4s^av9){}hm@q^F3AQJ-Xklksob-EqF}!#RdwfMO{&EK0K7KzL&EZk zFYKNdI;uLX4OHc)!K#uoQM8I~oncEv9r&czRjdgf!QKD+PMStxb z)c=*XJXY_$Gv+QZ|K9I6_;=3|6zaGFje;jMjA@RP3qM7Q;%5M!WocJNFOjN%f3 zV-Y!)WcoPbAY9lf2UI4o=UD$;+aO;jT)Io#e?6j}-23`vo7Y$zb{TTXm5&k?9yld{S zHp7mK-#nCuvCCxbL5j_H#;Bz%P~Q$^f~l-A_Om9c@Te9{R?~N;E#d-y4s^3*NIL|? zF=tQHz|lQ>3pX;j@$Q(uwP8QZ_qL)=Oc!uZCdvd!JB;WT5f>3&H)qcjo4AZL@zG*Wxw~?OK05 zrt3JurLz^ga2xLWTfK8mRVm_XF?7HKXOLzk(s^`YWG?v9q%_uBxBGG|5I@54GV#kY^3=hJT3p7*q3ZdEeiBr`!JR zdfu73TIWZho_~Nir286mT6`MUuLv71bUbAj^}u5x?K{~Q5IT|0KBuQD@h?@(TfFW)GUzSkW~kQ=z;9;{^P{BA zoBqSR8+_q4CNWsJ=-g>9v>dXlFLC#aOMl6!&^e>fFn@@{1J4xWBxJ#P@B4A)5gjsG zy#giKoU(O9QR71A9Q6YEIgb+vCD-AvH3O^a{d)h}oH?5)z6j!Y4!Q}Ute@P|FmP1p zJJoB=l%VvY-TAC0Sl2*}#6RBaJ391)wcdTdx2?|rl{jw_<8q8k`R#q8S-ELQki98r_XVXUw^x2#QkuMWpR#-AJfHc*VxqQ_@MWl ze)@f@?_zwjy9nzjt;WJP9%~RHA#fon-@BQ5vB}VfT`p{BA_^hKViCw}5Nu=V!I*&3 zljNd(yR7DjRSuH#e6NOtPamHqqV>O7(_b0w~|tOAH%_l`o0$9guCocrZ?MS(BfG;jAK zD{wznh+j8a3+YW;_Q#y%Kd`IvCmycLsecSl@$P=HK>1^MiLW zb_TTy=?R9t@YFnuB-_MW)b4EeV<^#Q9&(yi?>6C-fqV~@_Y}~|>GrFjL~~}Z;XnWE z^V1g34EQbwK7mSZCNnOn90oy|`G1bAh!c-3Hy(!e_5@5S;(#5sKez5^IekMk?}XPA zdLGVbS$Pmwn}}F1jO2|3%k&yb<61ymt)s6;)HTC80Pk^ z8hhXM)j#H!oWp~ScIEfFT=zPjm)(!`-62ovU9YFpbUh_+<0)0kZH_VCu760JU9sjE zn-o&=P)4BgdtNpJx5S$NV3(`hXOH6`F0C?6teC9E6PPs-xs-O2VjQM1Cz4=;5T8^# zE(^QSW1Ey7xS(n$31gx|)a!E7EuYA^-jXhUH-6HKMnA3T>S&j%USW8}WZg7uyZwpz zni4ez`L*7Bn84!~vBpo4HGg*A_1Gr|Z-?<=bP}uRWTn_=@DFe7o5q$#mi(e?PZu>$tC)i5?jLJc>?qyZ~BU9 zyRQw-D9LA-b7?tkMd>NjP!7A;QV^NsN+xn6rlXW8?}_PAPk)UlCx1XUz|kL=_VaK8 zvN{OPEB6efzZ>q!#oBo8I3l`;s8;09u?|@G9(3lQ-#_T!5%v`j)Kekj7KgEucczEH zRxH}Q67IFE{Bv$eDt~ZY(O(}Mzc*BQd%u5;Xum;pnm6uh@=r<+X|fg1c06_4!B%|zE#C1YIFU2`DP96K|t>CMs^Y8ioy3Y4Ce)z}v z#yk2urovj$9BQ$3%4#Pr!BeG&bR)-;eiQU``N#gK#+6f6Tc-@CoP;Y-@+{6Wcand6 zh84yNb~(0DXw3b&&*x(htv{W9&o}l(FnI^}b|;Dl^7wb|p{8Lif7iCW}&(Ee;}(|9m(Wo4@a ztlc@RTM>w;2@kU; zJ6uv5)8c*1QTMUa{c*)PtF9VFCMLhu*vjH||JqQ{gUlI~jSI7&cmEjcEpSC3Ovu+r z^*@Hx^os-0^b43m6-w5>8!HXce z9;vRdCV%LjyBaxFmWy$F)_;oOJ*Q&{3(Oz*>K3z@XIYuIkXz} z-QF979?^<;lD`1%gRjv`{}UXpLJkQV_Ft3>a%{%kf5v<@`T!o|FsAn^kG6S*jAk3^ zgMdP%F_86k z_=S2bSPvIxFHE~T=W98s-{f%Guj^0L%Ala}^(Y04jnY z8Gr6&;BMbl0gd-ZM{&=F`V?MsecNISw=?>(1Zn2*6spkVpuBTP7}^>|(*9@D?eoI$ zK>pAD@c(5lmnoH~1D6q3ahK?zcmFj~q!idARrRL#H965IXU_AtRTXit?}9DzA~5MG zcA`3k?H$;AsvftmO!a%)?eI+6eBDk|gMX08;Dx9=Z!3{)mQhnm!X8C=dDqvQaRrU% z%!?JsDx*7oLYbX*Zp1eh?V%!6)e*VyGiZZDI10@pgInb9eWDzb*PQ{`xA#rw!(Xm7 zupgL%vg`SPz)quecEc;p+PYEkgJ&sz)ZklAiRl&8?&rH}9KgT(vAW;=TGln!%6|=6 z7*WKb$qXk|IjG03MX;8L_j(vmgHeE`Fd|vSMkIB}==J@)vgu#GKl6wa7k8me6?f-) zOedv$fsZ;RM1L}9woZwn(v5G&@|jA>SpIoWh?b5{hFfi6PuyplWQcVNZIbVQe#VL6 z`k5gijdUTozkY_3%I`;$<=4nbiGR|R-XBW7b-ZHR9L;*z(|#!ZJ800~H+8B-MB4tH zMyLN8C$;wB7I*#DfBLx|a@*q5v4;}!DY1Sb=hee#2C8sD+h>qoA5c&6fckNTOk^&J z^2)_oPePE3`YlAvxl1>XW2m06%qofAA~Q2kS>e4s*AApPSsF%U#@-dDeSfUal!6Ta z`_L9!F13Srnc3-MLz6+6Vm$`(>EZRQK>QDZoIKAydg3y~mL9UTlj9TGNv&WAJTy8n z(Dsqb4DPVQ$947BYYqIWo5jC5OktaAya;oL+sgD%#`pmHAkg(|Hy8cKNjDcUQRN$z z^^tRB?t*ot8(F?gKS46CR)0HhhV$#psZ>5!@;p0p3VUn1%L@9go_l9yA3ui>MUs42Xw7pTOxJ+EIq z3-`xaplmT}AMG(`iT$;HV1It#It#=JZf`nNJA)0p6BpU?xGs4W-hXn=xB8!Z)4PJg zVH*3%8hlOb(_Px$Nj8MQP>%qyxjUn_2OqVtQ@o92&G#KuuMpVYg|s=YD!(!HrNyx0tTTe(o# z1*l>|bL<11hZJ%SuYV=XB_By;_CEPQzbV|IUjD+=1qA=Y9!}sAw)!LGFYUE&{d|LUzUh$M8|=zYJu&=! z@}}!unfoZ|dPXaL#JgumQPP%=|w%>U!L!K%)s4?1{(w+h^rm%s1s!7H|-`cY>^TQ?^yo}-<^tG`Hp zU`V&w!K3-nQ^5=3lKP9>rAkep#+Xb1W9*{9`v(eKp~`Ch578w z(^{E6No4s_6S{xA@12H%(D%$`FJn#sb+ey8nJf?&d9cq>9HbwY5V#ZRA{uGw*ed5E z%q8obc>Cpcavy4td26S1NI$gyYWOl}r#dZ%+jD@t`Mn-+%xT~{+x98voqgYW^m}gR zs+dtF9EN>ig?Y;kW#CtymsDTX) z&b!SeWQQLZ9n1|^9TpC2=o8SzcRxX0WLx|K89I7skxx=w}9B5;7 zux>ag)Fq66^k%to^8fU+Z~gZ2w2K?>vA5pMH@JWHhTDe`C7jWu+(KNB1LQmCFyck@ zH6S_mK|=kn$h})a0cH+9eRW5;D*1ZDxdT~Fq>P-+$6F*(i&vZ})X&ok!Nyz^_n4{G zoviyK!5-UAJnT&$-}uIDf2><SICGrjP7pH%0zF4Uomgz5XCegFj#5si@o$U<5Gyh{= zYnV$@H@o_dywiR`ZuH$Zk0d{S*X?iT>>R28?H;aC3tY=%|8qIYVN5AB8KqCb*T8a| ztv3`BIr+H{N*!k#JBXp4_gDFUdEdBGs5MOCGimEQWr%-&Qu!nbWVAeFoQ#K4D*0)ZoZst$&kksB;t6tX<7F?DJAI2D(|h3)4&Q+jqw}()5)Lci($(XODkl^PH&v z`D{@(6d%ZE0Iej0e18vPn~+H&PkCv2x*@MY7xh$xOB&{sK>@Y?BUBq@~-*F&v9})}uFR%x*Z#pbOjD+N%cETH4{>Od zc(PiepuQYONBwsiXX5|TH~Wj9+3&W*Zhvr}AU|Kd2GKbzY?}B-ZA~GrcbFRi>;WXj z+CAE5Ci}hKx!5txt0ba$xz`aKPy?6yn}LD=@l zrW>navN(T6%3$vh&+P~^|4l^BP0du}k!_)%)1%M&{RQrCM;j}iBG${;Cz@7tI%t0!~2y&l8)Dte;v|&UR zku7WLpT{$M8}6oB_%;kZ&>=SFW!QTg;;`U`Qa-SMRkDo`}F4oQIkz>1#ZcN1&An}Y@M7!}%44eQjWisA>>Xq{ zXUg)S%;!|qlE-68l{K|N5dW0T%;#hyIW)d&iMhQGQR_`IRiW;qPfLhh93O;TUl~qg z1qJrWnd<-fJh5w^)s73?=KJBlJvV=2W_o(d+#yLc+$$OW*Ylbd6r@D`WNtLul}2a| z>QW)j_5@%7M;-t%`8o*C`H{!roxvWQxI=Wtc*0|}ja-K9zX^|HlqJV{{c-GLeXrl} z8R8+5{^-k}jy|FK>W#DvA*)HEP)KX@^eoYS+8+>KdJn*h{%6|z)O_9l42gfc8SPBJ zb)#gX6MMb`*O<(~YhfsIQ)uyy`z^H0j?BP)jCJ zwofHCHywNsSrM39#GQ_+{(E!%yY{cY3;1^n+gSY$wE*l2K^Vy2034S^m&eV6l6ZLX|r)~9&;H#mw8ws z3_-r}ALC~9Yw(I(*{xjk#&1rqSMHwfbVyi#^a3x{9TshTqW@)` zi@$mvl4Q89)h~VpFR(ASOSIQFJl+0?K$1Rv;3=h|a$@W`|F3iFgI9m{m;He_HA|83 zrK3B%KS0}m4R!4pVj{({>pe0v`d(0!HgX(g?uyyiV3VoR0nyGAQ(X2dNxq2TLouce z4s?z<(1Av5%G;`aFV-{g5&aeFZPAuzhxQaXlW3b49OHu<->SRsfBwC(`0=-G!=>D- zvBA#l4IgVz<9Of|G1-5k-r4v7*L@q2?(e36U5$P}XZjzFW0e2(*V;bfm=5x9$Mm#< z@A>IBhS!gKyMM+#Vfr9}2A18%7yZ=gZ*@+t`d5sm%U z1&MZ<;jvZqi-UbxC|CrBlx$qXKRr5kRsr+Dr&&vGy?apEtv`Ql!xTi=j(z)lkd;q$ zg=vhW>ilf?-cr+ht3Or1`eXFzgwa=<0Q9j$ZTgvDAJ2BIPK*1`{*PzPE>YXNz3bUv zGH@Ai$c$z8Mw zZJ)~AwcD?;PnJuzKRAZiKSx#M(2kj7;)oayz7>$=JlTIQvxxN{KRGCu>Y)qs+-h-H zy!MIc=Ey&AxbO9sAz8+_k4FMB?6ci?6l!;e{iV@|KY`o-7*9YxJ#FUVw8T9_#vAWk zV4ah}F1y_Jw|Q>jwR)=8eSrEiW*^MV89WyQ)SNe8UouO4b3m^9;n#fG=`Wopx9c(3 z?T;Zfd6|FLT8`*GgL!4_!d?{tQw3S>q&XXqsG6&WLYfaKU>?L-n)7@&xGQwtaYp#r zBOjamPDK5>V4}2?P|g#Rs?Mk|=V^etefNW>d}kl=%)6)Y(bHy+&?pV5dxEI{b(g|C z!*MK%1u$sq$b>N1I{Svu*_gQ}Y9=+2DWufcoUN!E7hDnpDS4E-0shoBj!9 zK|in$^sQEF*|0TZh{##dzWNw%rVVw}khGY>I>KIh*qe>@@B6U6d_dZa)|z*Cyjwgr z1#A&Uii5F}7XdfD734{vep@1>$* z=#GELJfh0fIEkp28P{&l1N+`++8|Ez-qd}@$Xj(3pi$ZX8}neB{!Zui-hy|u)6c!( zbo7Eb7a~Q2`;*C{hOc3;aW5nIx}O_cU4;_lgUgg-uy_=8Ts3zIh-rT_CifGIOfDby zu=#2Gf#G_sF+Y3MDB-cf%LdrLF<-wp9P58Q)yVUeG=ge!tto@r^d8^ySM0^yyTA0F z;fl57lL>MzwwdgI|FC(aV2cQQPb=Xt>}{k3Sy^CgOa~#Aqk60=iGDrZup`k5{V@MA zy9{btBR1HgvhgVD2X(yf+c=>@oHJbg%MQPM<6vGjao|_&)L^^sk>fTnytsd1!>61R z>|1-BSws#)qTH+BZ=8wpwL)|2sstK0D%Fw~}wKoj-qlst)IU%VTeiF2?asj=6}jmt2=)_~z3S$qGWXEWA=C zZaGfu>1fClj~10RYx()n+iG4$LS-KE&=)h*ejQKyU(w6L@UALbRD*gouR9+4ELwbx zF1_6!zZcwO0-C@!U%Mxf*(7irwSGxU?0=>y9Lm=fw^ZA(e*L__{b7GUJ!3C!EaIf| z@^u9rC%t-j(HD~oJh<^Vv58E~edL8erFRD}7I*;1Z~RE7tJBVF9Nqem{Fuv5Yu-zX zBOLRP%4}=?rb13;6%d*GME{(NxL!GBO{Nd_yRX+j$H>8@R12GcSDW|a1qx-on(u2$ z?;1lSQZxmq=vuk&Ky!cau|6*U9P7=mxN4sL9^CIV{@wRa&i2gX0=l@hf)uAK5nRQLJon)DyeFjL21kYhYHzGpr_rEvBh_NY{dm81KI zZNAt3nyR4>o5!aAjyb20P;~Sae*isKHJE;L4Ajf^)larpUExQC@px|ZE+G9~JNVZ8 z==k8O?e}}*n5H=a@ZXTb@I4Mem@EWQlWY%fQe=8hq;eV7YSWBNl>bh(@s-^AEvag( zt)h>id{x7t@N9p9n9>kA`XFVebEkUun!kRCqs!EIC6MjZTZY8PxsR3VqmXN!Pp>w zoSifc%8y0bng^i^d2@^OuM7>md(6vuUXnC9HBi0t{^7!xYCaF~L(b`{W`V)2B ztY5YI?(BcrCH@(I|7;el4?9`!>r34iy0S68FC>i3dUSVwP?W)JxovFX8p_%vKT;Ko1b6vIUx-;4On)o~S;q{YH(HX5lCZ56d2|%hLYUg( z@oDYwe6h%dFn)`!j4sjN*1VR0HL*sJ;U=*1h$vHwLXEb_OR-OVan<9Yx%TF$`xIke z-mN%BsT{QPa?{r8>}iYhi=gE@+-z5w&ANZj-Tlz!LVZIMKbyJ;?z1wR%}MQIFCZF^ z4=-7nl$1Y{>z+{c#=WZ_H&(GF%1Is6xso5?Ur_2$xaRw~Y#hjlEk{%L;y6qR;+^GN zxYHLushmquO<9%^n`d7l5yxbkeUjo!F!lJc-3}vh9zOW6{dkwd$=k4L=-K+9=W~DX zrs0qIP!PBH7SwSxc(smCbHlg&&{6uSlQM|Xd>fWUdlzj*+pD6D4ZCw@uvddfdQ)6S z?7iupXHRkU=!mjz&j7~cQcTLhtRJ6+1M+2sdWOWB6|uyeeeL1CxXuUc^XcU@K9vf! z5{k;d@TYK_f0FUVWAc7_QLik`>V1Ep7Qq?*AA9fCow%-af&QR8U@i&du_Vj3d;{4S zV%~yDWMc>fLxlPDXOs*k>F&MuT6^#BoR`z1#|TEUk)_hDRCCVJ->@%D8P z#^jf}X!tFqyM`0MM510Zw(m`oo;Ns!+xt(0i?!ypwZ3VT{cL;0tY3NQ)(wBwWkfBp z2(bJh)gjsrkh>J^*`os2F`Dn$f_3-PiFQ6|bfhiwgJ8VQD^bObEGi{2Od=)mS5MVd^)c$F!rYdtCRDwDV-CcRnDT!I}Wq(=I7|4eoL;9 z(oy^TH9neV&Hi4&eSIN?Ln42SZA|Z4hJgNoa@;w7PD;o)tc@{ISA36ZkFTYin~ucCb8=OFT?YJc^A+~$LY_8Sv^i-rDEJo(nG z#fNoic>;}aBU~ag;_m4L*O8s$@L>RH3`!6_$yG5KO5zUQydrydigJGkl8w!D&mxVF z!F_y#g(;j=$H{+D`QXuV6JjZ-64qvbT)s;`f%f<%@bqnx3ru~-|=6&JOh!AF?P_F>>X5MkqLkL)^-f0fIQI-#KG7E zHSL_7hYqguA*I&-<6&s0CXtp+gh7x?!GVgOT+~bl!s7jE}=i|gy z1MdukNI52p`2s`|6XCs<%6)?L^OYz$rzq2fK2DVq$MWJ(pidVAE?%b_DJ&BbyK@u? zYP77=?rT{qN}zud`aIF{AKs-`KtBar!3Nzb-}$D9OxU+|VyXG*H0D5%7Nd;33V7Ck zPFd?;!@9n-G2f*=pWkUeE^4ekP{AIy(T6E( z{y{Kqz0;2&4=5GE!`DO=ov$@=Nti3(0YxxRWevx2M+tw7;;!I~A;w3&@J@;vA*g{M zoue#~g9@=j9ubonlLHH)oTs9@Nq|`me*5tOKDoJ)h~H*f=ak*wPutRMzvsryyRpmn zeM$1Y{u=y7v-c`*{yUdOQ}io9v158nTBR<9Dunms_FlwP{0sLe7d4Pt92pJc$KbDoT?=;=Q9Cf`Mkq8AH8>ZD?k_yl_^!z zu<#VO%-o-YM7(H97QJ2Lcg8irA*Fo@37E#6&*jQFs?Q8=yrh!-a|}c`xX0&%no`P) z$48FZ3N%&!9RC1IR?{0I1lonBAloOs0eOA<8k4|jdW zd+hF6?=?2xt}Scb;v8H2!%siQw|I5w^_=yy-L(Wd(;`q+_w}n6+x|hKQG)SDfZmCvebl zhp~ULNq`hX&h#x~Dlw_bX5Ji-3zUM8;W%acW<4;y;y43XfaY16>p0My_kY1P++%ld zaXgo_-Cuk=Lu+uFe~u5|@;YdA;QuPN*W|r0&Kzqa#_!Sb%Yl4d#u=k>$K0ny#kwRy zJuM1npT*e}^+CyeS5V1KCQF~fNRA!mQILQ0FUoDE1{Mfzn??Q+AO{D{BglpmQ}xqd z@rih1O(ZKJ<*x+!K+Aww7_+PK55bZI4*H-13iFG%sZJo71;z6VCoD6C^!O#+Mnz&W z#3)|sgo`Ah5@Ys)lC#05Ws`S>yq>?s06Qzxq@-F8vBiBBU?abh<{8&9P`}5T|G#It@-6?2Wq4p zM*Ef85|Ns_nyBFOwSbbU9s0&BJ}rN%H0Bgy_uXBmYbs^v&_pf@JRG(IJY%DB@~}cI zx77UraTbKAghF=QE(HHAKKQEV{jj^^Ki7X7|4Dwum1H-);#QZk(Yb1M&WxX2jCgMG z4PVh<=5KhQzl{&|)pIm~G`2zZ1dMvV7`T>T0|ZL?wU`Bj?L*}0RgdCZzOH{sFt)ek zrYSQ=R9`Mh)D^!%d-88^X2gRAe?IM>03?EnkBC!!~~vG8hxJqV=e>rVx2kN&gKd^*JgUOcls#V9vv-CD@G2 z%Q}$dhlyDEq`L5^;?uctYn#Hl@K+FsiH<&lOWQt(A%b7(eFydk@s8iHi&(Gx8VmZ} zuN2I$aRB%E{wMe3O$VR|r+4jTe%CneUi~RI&0ieny1$%d+G@m5Hr%o;or_B zZk(&|iCChFxt-zmtp^j-Xt3ahQ`{AAlyN47O(o}U_vIWL!n2Ctf8J)0jeM%LkQV-U zD+SG0Hw>d|UXCX&M{`igbP_***oYZ@AM`ovezoIPH<&?`Sg(Im-i}#_wX;Xy6&(MtzBM(6P?#PQ_`+(g??QY9s~Up3i_71YvD3?gaPcrfruDGStV( zofokhOXQ9z9Xx-lgv6T6jG;N7?K6;kx9Jxvj#x2xT}@dlu{n&;wLU%1{bg2r}phIr-&z;Wu5T5f+KKG^iCxm0I`B~a&#m#^o@ zTw4R8>nDexn7YOo23OWZ)p<5A* zL9{VWyHMoKH3}jtPTz-Ph_b$e8W19-FXf3!NE~`5hw!op@r<1q2HA?+UI{HoOq)+c+7J@7yc6K1lC%q}SI!v?aRN zb%tJ}BWHKG9??60@GrKeTAhK9P~#pmjkDFZb}ETY4C7oO9x-)E^)axbYDKmfSF~c>cO!9Qix+bX{vffyGt0G%h z&aL&JV_mjPu!pfeUy&GVnNpF%#!`GtL5(9AZ7Ie$-#9=dn=)gx-bhBqmjN@CLFe85 zr2uX*I8200#W%Y>*?c79a}Ya}gwMj_F~-y91F!KL3?n!UOs>cZ>x?CK82u z=jnfKEL7H=7qsE_mMz~(qZjlm9}D6in)x#dr^=j2)KJ5-{(*`9XymQ}&Gnr{6UlLJ z_M-WP^}L*Hq0DUaMI6IzLa>i+_h6H=;fKKz{VX;WOaGFWHuA$FHj;u`UA{44sc~|x{qN?{eo;D+@kG*9_HlnfpHA{VKA%r+ElY@DVqVzu>!UQeDjoVh zq>!hJ0C;Gs(2WR-r}@)AXt!#(Dw6UQx8RIY-t1BS7j8_}Gq$~qetNE)8_P?0A*r+$-wTpj)Id-gkTF#5ZG$KX#z8M1%4 ze41^g%hu_oWCMpuj0Ka@n>|0e)U{S|;=Q2{=KRS0GIJqf=ZzXz4E?M*d-7%@m(5+5 z$wpUhjr)$5UBml-<<0oU3xMmP%Qsx-mAk$&Em664?H2ibHl5U&g<^P24i0%~yzyGR zC3{ZhQd&&0*pJ;}Tz3hT68%ZG%@%*}rb;Yk576(*J6&eJ0wi@l6QY|(i=`M zB{fB4`y!2z=Sshy9E?expFC5XO>@_$)OeUDvsnN#W=i7;2?0Jf#o{HZGs8Zo;rNnq z-!$ks5_#E+ta!GV%u*$IgvhiLO~u|Wi~uo#1Rf`iU9%dN zI<7tart@*-YyF|uAh7Ks7+)$We5&G)y=06?={7je#DoK$=cCTM_mR_#5!4Kn?A4PA zz3L&7K-wdWOR>aF-$b{gxcv@`ulOG(%xcIvG|#q)8rl2L%rwC>Fm8W<#D|B+VrL=7 z9QoKxQ8oRv)A@vDMKZ+Y;QQ$HDr@!$-?`Ou=uHI1Z^qlb((hAO8smk(XYHs+7=3@b zTy^UyYTY&Oay$RCM=&4g_h|7?IsB@{bP1H5P=RZ3{T`J1-bL$FKD}*WHbRW-XXliD z62aL`LsC;2Peu>WRoH(n)4N}x)fj4hh5$iF$DN6Qc7jq5xER|4my{m0?>l8=QXg@D zfc*MIgwsDffBG%P4!-jtLK_>?-8{d3bssu8FAJYn)02 z9|oNup7k1+9Zp4z^Kmmq@>uim-uAlaOWOt$nOH!i;S${1s8IvA)Q%tF^Ce=nlyCL+*8am-9HViZnJw5E7U@zJ2T;kzeWMWAlt> z$RJ1YyRv-{40#hRJii@4g0UTps3@B$%UMm+4~&1Y#xv#lY_pi-Fs4!;q5+v(71Mi5 zq5a$m#t#p{r^x?vJ&(L`yxz}MZhoX2FT&M_eO)KV?lh$rkes2eyz^>=Bo!d5(37Kp zh|q~-U|G4!h#VB2#`*^&Q!H|n4u96hn=;Vtchfbn&RImRYZbkQJU6j;Ehjhf_ezba>9+bay5DGi zv?ea{(&(uBa$*u`e-X(TNJ&qkG`SheIaDq|9X64j^ErRBWWZ=V7a&U$)bb$_dd3Y3E4tZ{Y`u!fZc??aNk_yS!wn0b;>$&_9XekxBMI)yRD33^5(Hd{HLG5;XlzF!Md2XY`X-6CXKjh$x*4QV>f{)kryd?|Oeu z$onnM;BiDf6&x{Sl_{w2{0CXX%{~1dx8KAQuqN*5xsa`uDN>gxi!SR~Z%S~k3zk$}u_K4Hjp^^qxQl1MJ%#*J*avrZ zr${^5&De1rUtwR`I`&stap(9@W*kN2 zkbAbp_aL_t^(Va! zS3Z^(j9dE~=K{_A{F8SZ65N*p+u(Tgdz{ev`3W)_$<#!>#ybzY!C=kktN4EluJv+9 znN+v$wMHM6S9?36K9K)_xZklL%j-Nd*Bppa-1eJgJEs*Q4({>icz$t>*ZmPYf7@&I zEt7xay-shMKp#Kw7Frhvb!@yPgOZzK@;pM{CL}%+OVnM@k?R%|ulf1>9u?{`N%-j1 z!tomYer^#Jy9<06!;tPdgOYzZ*0b%K!LwMOcQahC$qKo7Y;PUV_i1H#r4&>!`XCTf zQ<{D0EOBkjq~>F^Df$-PhR+-F=)6Qy`h^$;QQ*)#;AS$*P`k#ZWzNVXbjsDrrf_mi zb!IB_n37>MQz;}y>BUSijX!>W=;D6*YlrLkKJw=3xc;fQH1&Lq8M=SjNi5+1te(3& z2e~B1I4>yYVemd&l8E^(IG>$&M(5;zTgP4X2fj;@BH!in@%%rj-y4t5b$lWAKh$yf zO<(fQ+(+31T(pMBKht=Kb|=5#Hp2Bu&Lo?_b%^Gm&ZfLr3jryo2U3`-}ejgD(wZ zoSQRVJ*SJh#n9jx3c2@p*jEUw2%*zgAUG$YihkjlT|2gGq4R%jmRBUgF->J*ESO0P z<1@VXYZ=Lr@ZvNG6@M0Xo)EyilBR}8m`UcgVlj(ROLu3R4CCH+#~IUoJ9{y*-WXIL z)?K)b3-DE1rW(2MJRm*AZ8PFTbJ!w&N;s#Ew|k4a25O1L@R^zv&w*YIbSqfa5gu-C>42VnVtH^t;OBMUMw(1D`L+F0i3aKCWZL_jZ3m-V!PP z9Um~%KQT<-@qS6~#y9kbc6~uzx4#&Ne{v7yiQSw_>a%O(KJM#1`#%}CizpHzu+k;c z&k|WVX415mw~p@Z%F+p=<<>MaI!Va4u{&fpdto_SgOqb4~e|Q{r8I=zjb>_ZwqSYQrI4mql`p9+Z3dn-`Z?q zvMEu!DTS`}0>T|f{L||qf5A6fsd0V?R``4luO^Yf#%;dSq$WvcfcgmLlY&(1-~hUM zwEn8rBC>y^XmudmZ~WI_wpuuLs}V0Utu=O=Lx{ZENqytK_6jO0zw@3sAl`?_c}vCg z`KKEr&{yw*z!J!=Ir zMQklGl%5AHc%DA5&N~Nr1W(t$2jtJg_);)jA}4<{z23{Xna)sy92i)W2d$s+P+^>X z0Tl7EKjKT9eJ+lFw|+o>#Fzd-hb{RvUiUhFUiCT)Q6@^cMDPx%%p+fYn*i1iM?LRH zkQHg_W{7je8|pkoH@&uHHHK1BBBW1+|NN}D1G0+gq>ngY)SZqY8kk+=ty>|k5jEH+ zF<^h}DaMy(K*@Wr{FfSIPaO9N9zh5-aLXi|b_MF5C~L93#-V<->-!k%>zDXsoHHBW zg@l);kN>+hhqoXy;3l+dd`=Z3T6Y>TZf9Wsc3%VT6QI*drJ6GV$xk&Nqhifk|F8Bl zs@;$EalOO<&W*nyy@EO~qe=^T5x~6de$%2_ zgD`$=hny~R8Gll}wqjYS$2`%hsAcu7w_aw~%jp$Lj8yXxr;d(%&^9Gjgd&tG$!2pQ<~ zM1}B%zFg$l;0#By$+?9YWwXBwP@I3y{S`-{ zwHyk|ymMrK*O_*|ohOAi%yZ8FJ6+nd_%C&BZ+%C7eky$yV38^BzQ;T;g`5yk9g9Y!aa%Q{t0(F1b1;O=9u$4O|q-U4Bnw}9yi2coA$S=prQ6=)p^X9G* zeX_)^(Qm$pl_S;MXMnLPxYxbvIxoYYI=Na8_h0Mewtk*Lv+kt|om9ck_yVorY}|Bl zzwb}*JmGd$d)HJD|KHk0Pja-01Ef<+Ty26PrwsPt>M%aVn?R%!qw)a-^aO|#rcCT zk>`ZrUbudxGNQ116~%eN(3g!oW{9;EX*{jZly&Ok)h0N-B(r~aNG&Q$OO(uF=f*aK zmEb%zaBAVPHS#03HZzE&^us7w*+rU}zzI|7VGhX@HM*S`ZPPNT;%i!n_c7r$RadQf z;1G763_Op2kWhhZAa6p;8IG|+!CI{UfCs_2?8g7p;yyAPZtdI7Q9;9j-1wS))mOjS zNdF#J|C|1L8^?cr;~miB3UcVRJRj}2)T|-Yoy(*-J6r827=ibxY<9-;qNEOA)Y=5) zFctXTf4^+;@?u)N)HuLCDU^kzbxIp5T z!_nNXBp1`Om$3gNL1TJlvmuG4KU+zu_K#yqt zp6fQ88g&}qeHdx0S^O_K=bvN*B9q))`I>vCtor<{5|@9zn;5(C`@7S=z<1hrUZUK# zx;;vDJs^Jqa#+i;WAg___>`HCIpRv17|D-k=lp-$`{Ta!p+FvhGXq~@P|JVU?(o;z z2J4@^{6Be=^uF)=9^^mcW+A4sj^Q+AMC+U}>c#swQO4xFi?@4eOgekYDf!I8zc}Fa}D=4X6srlU2=~d6ela}$)$%Bww1|8J< zLH~afF+nvt3Al&QdI`NqaY~iMaYO~0^TKehTDjMK+!IIt8Poge?7y|csNF;Vk+1g0 z9`09euC?3#M*7#934dw7BeI;L)?;c?s0bA0XR--GbM(rvp3Tlv;$H^lZ|#&XQtR_| zYYPAQKjMOUzo7oX!{mE?Lo*g^uVX@3BG!MQ;<#T7Ol-?tNY2fE5Z7p(hfC*4nrGxS zblbSbZ1z)g6YKM;1$RlDpSX?Hvf(vlTNe<6hyfX8_i9rgF6yu9?{-~SjD$I@p4?jF zSX`3xL-@6unDfofqwRMUa%(r`vQsQq1Z}zIR;1_Hn@x8iJ|PhI24oZg#61 z-{b-J-5W#etXsH-C0a-KHfG7zr&@mpW4NuYn`^CSMYdI)bUA@@Ml8y9VN?<(WWr$k z`~~d8-R?iB7)|KBdvsMfr_zm)uVtz%SI5LQu`UQ(v_IUGVt6L%E5a+RXSKw$egdY` z>42ZWl3(>HZkouM6J7>p+b^%*-E^kE;i%j7*sV>P{{L;=e!1~7WyQ5^aV&ozwhXo) zjU(cHJUX+jVK%m-w4^dLg&DjNNRuER`5*&ffe`acwq=pXK<>!NdR*C2wxwc0#CbGx zWRk~1nk-K((*|;w8dF-HL#h(XK4&MhU)uD%3#bSxuM7hU%oJ*j3sc7VA5@LXBf%hlg9yRJWGH=c%X z_TCNSEC1^>$+gc=23mtQ`5M0O9m*jF2Ip^aE6CGRT%Hw-kV7{>4i!)=WYBpS$=lz+ z#`9wM8lE+$tr$Lx#Nh8~&QI}7RUP5XJy41570R*Y+pE?|uhoG-qt9WHP?t}Gjde&W2@&?yaC%v#} zxi0$-wgVR1uKQuq*XZ>a82YW{c7Lhyq zvVsp9Y2Mlpxe8)wjR;ieBOXOD%u;KlaXa?L-FWftYcmU&j+>b7=mhNH3Hf0ZkKQi! z0lw@nNV196#k^$wY7Gld4yY+cY+`QhT>GE#tR$mTX54kIKlp!goHJp{f!ha88V&~3 z(_m!2MxQxyUnlwu+DhcsBw%l(cYzs>?Fl#4%R_&C@?dl0yG9&Uia4q{9+DxJo{(R4 z2!o5I{r`F$gzfgtH=W(I_-^~(ZsU^{R~%c?V&7adK+M#Ymn9`G>epNrTcWHfy>wkr z7>j2R8mXbHSYm(SOQOv@y_#&imvV8<)`ZCUIjXwsf)BC6u1vo#0Pn71#{U2E1YX=uHp)jC)4J=lMjrwSCz| z3FF8XlYzw&O*?t0K=W4^A`IZ~1M1)sFuyPP2F8O+ikZp!Mfo>;EUO z%FBPZ&v*UHU-w}xPFHg+Tr@tk4#*W}xBjks8C+J0`I#2v93Zcn>A8hPsu68DpI3ix;uxtEU_U@+J_you+dmGno*n;c&+q!apHhuHnrq?~ykA3s6pMQV& zGp^jJ?Roa}sB5_P$-J=|gjmRR*{al)&x7%L=HJj~T=RAvu za3=p=<1sZt#A@W+f(>eCS0Q;U(cgdZqg}#jLXr2HfSX|Z*mH?#s82&;V?c;_ewamB z!&iHdh*uwhUBdehzS`{N#t(6=>*TIErrYN3GtwMUIJO1zKj>uM@zZ(_9Q3bxrkXS9 z7j4s4zfSYt$^_49llF||rfsJdQ|(vk!UL83#TJ8n7v+<8 zRx`!CtlTM#%QQ`fn%h^upZp+e*+*<3$dUK1xHYj7)P|hD%MpXct(_VTR9j}72yxz0 zI-cCkiN@38+Sm1`0y$<^SFL~4%7sQuC(>};!TZe1@Z^u6C68si} z+_u{v=Bpn%wGA(4)8ButKH-^XT_6hH-3w2*>gF?CMN5fe_C5$}wz4k9*{6op_yRfM zg_X>}nAD8G-V+nE^ECHkWfR0_NLKJ<@0sGk5DWlQK&-!B`T3X?XKul4|L^^_l69}T zsM;y+UNy9Ej-f>Ia4HzcAOe|EugDs|pn>fsChwcrxx?z&R5Khq zxDB!!ihx>>?LEmFj=X$-zqOHr`-0B0$|sUhioQH)2@tIErwjM@GYa&B>rhevA&{}u zZoU95t73!J3kEI+0o?v!#G6F45r+c?KjlC#a{NV{(h4BW$U!(0CC@Za?m_HM^fUEw zV_*s%D)|_jXPA7PX~^cg*onLk)~lwkXYpx8^hiDmpS?9saOY|OFoiZ(= zGP9?n#XA3xcEMj)=g*F< z`LxW_*mn6_R!j?T$G_BiR*(1mVN_1&6xzMO0%(I?cFPC9m^^58I*Vuu<=(717ud1gP^KcmV z8JAx^z2}H+ZnukIrPDJ=E)AJvom$FXlrfNC+8!GfNT8NL1Rk$X3)Uc}ILa z(l6$xqPsV@#zaP>%U+WBD;n=D-kinS#K(=l zU|)kNpM|c`_owS{yemfrMgX%JF_UpOvNmj(%&jQj*q=%E7(I3QBq<)_7t0+c`$hcK z_4A~*rkOQ=HfHoOd6~suGhxl5?#pDh3Fpen%GraJZZ{>Jzswfr*)ExXu9iXf>)|DE zS9|LTUXG9NlT9a1?Umfw4ASXyezbX+?#^q!H@+;)P5EfgqeT&Ijo#{O(Xn=#C#E6Z z)mZp&m1Hk!GhH`uBWXJ_MrNV;r$|n&AI`>%Rx7af^s@53kpV(+39L67^UN z3ETVifTjw@pmyNOzyrcaE+D~08BusG=@!$608IEX5X1{W0owo(uO3j2&l6B6aD=x6 z3eEKq%QHbTRlJV%?%4a^G%~L5nF36fYb@Uw zpKmgM!W0C^QyyR(4ORHO0qnmzBWaI19ggMg!^a3*@Sc+CW+5Agj5x;$X#NQ$x(u!mO$UwBPRe18A_ zdzmQ&nKsuU0W0_=U6-e5f1jzgd}By@7%?7yN}}6i47|ep`uATASfU2DCGh$Y%kzTm zVe0zsV||B+cQZ5SOR|J$*%^Y3IX)_@ByifB$do^Q}GnHl0=N z{nzrCxde{w^TXb?y@hxNW4>u@bX$La)WF|6@>~7?cCE`lL`E#McVP+v3g2%Gy1svw z|66-}Ywx@LeY2P37yC)nzp~fg>|qMwP}_g!K{fWUuOJXQ9TG*R_&3&nMSDF1%>&)6 zKOVjw0zqDB)bDk_Iu4D?=KEj&O-ev*MO4-$n7-T5FFislT1XGU3rM1L2El)Sxe=E% z<`)CA8h(3jVlS)(P7BnOwhCnDB4I8k2gr~CgHB%Lb?NhIG)i;LJfhb}4a!^KUk$vm zFi~eS1Dr01Bmw4}OL9Qc!M&g-RG4&?Z^IEa%_l49fDB_c;c=D+1sm{$;tNl_VG;j*)T^Gx?GHMw`Mh;rn!i1!52K=7WNx| zg?v}o2fl>-SlCZ|ImlJv*nG9i`xwpm#tVo!su)1qJa7}HtS zGzr`YHWp1%_l0eCbY8p5bn``Dz|D4B5@%DlhW%5$fAcAY=^PicSu{5{Z)U_e|^bdb$aLxzm})))a`Zq%OI>|?>t=EtLmkvhOgc5 z!|5%*=A+f3)7?7r`SW7hUBAq|=k$Y*2XaWw-L-%H|NrjemIA6|1!Oj7vjwLF7j@$# zIVrLUU*|p;_7HaSIoWc)pUr)s?%aN)*v;&4&3k;wV_SiLwN=1^b>wsO)0!#c#|l_g zp<-XrQ&6ZikAvJ_g6UIIR>&nU2o*z^iW(yUyffsHDGqD{##liaCT|oM6Z*56&SOO^ zGN_PKrwpYyK)#4nUV&?hAs<`9qNr#HWv~_pK7}a6GOQ~gVs6hC6&_XMeML_ZS}Uke z%1XkN*KEyymoOJb(B`#kQgYvf#0TbXK5GwHD2&-UT7n(TWL`nAna|wioU%2KPTrhi z^aM%ylE6HwWM$@JPHi=|HVf`EVkV)L8nb-i!!EI8UWx1TVyXgtA1HlaK*XG4J%4&> zOcmXbG$oa+DACWOTUc@*diojWLfSpDn9s^(1$_;{g$*n9nw4I@>CUkBfOcR$!5IG21M-9V-j#a#J2FNGi?}S%KK3 zb0I5OnTb^>QirqxxfXeqb9}}ejuoWUwN6d1CS@g7g(#?^6!R{uIXRs{bj@f!)_*Io zWHcXtD-!Qe*_4Z#dYpeMh)?GLl1&9-o6K^VkYk=~HYx9x`8-C|nJ}$@9qigx!YbRg z^OxZGKDeSj)o7xkN2tF9j^A;EW0Wj#!$rF(76%~d5J>8S;}tzcVu z-6*X@R7O@=R?yw>zC2bWsN$}b#@+Ciw&b(2tVnoG7gZ%77TsA|MLZVRu8F5ud|XsO zOzb1#z$Q@4XcYl<6Hh^uqCJ@I48b0M3`CD&{n28#lSL8c6@gEVePYh;*Adz;F;oBg zjAGV&KiewP2pxR?s7DoSf43d(!&vt_AO+eXwvDjKLfc`FY78kiVg<=qf%$}}CUUjk znGcxmS~IIqtOzM*b7dCTr;IiBAyE){Jj)c2dyfPlN|!eED%Jv@zhnueCsovc)spKr zNwL0WUfoj`wr`Xi{3WEm&!`nw$BL9yB?6sp7xkGCoxb#aW;N-mszj>Oowz=evrd2W zea4&gQB{djr+c+p-`jtz$d>psORhP6QZO@?JhrgkRzAfDcyjBP^OPNvISr(L0Fp&G z#+1j38kI&(*siYAiY_GD3$NjSRDo67HOuF54~27}g3+N8K%-fdRs>gQ#HwI`^$~Kk z4~DGQST{*(0|SQh(QYrRNN{gReeYZq`dM*(1`7Coh3_-MX?3hcD3BinZK-d+5i zTmESNFWzGa#2fy_dp7BRU>&ZTF(w}Pr8tMcW?sRLm6=xDR5Ggn`u%b2NT>2xnOWuM z*~nkQ0NV)b`7h@QbdvRK1xdIb)1Dfm4M~{0F`PJiQ=sQVzKPRMFRDWa$LqPxmwFzJ zLt3pRfhb9^UB^YOfV?V8&$j~S#7%RLeKvv63XZ1Nn|po{NN(1Dp8mc1_j@>fbhweN zacx0x><|jX0en$c$d)H?9V09&t(1@?hGnJ1mPGr7DD}MQ>ABg+)=?ei+*rs87ADT4 zf3Pu$bMtK-Ls%#mbgW1c<_oM}h|DxP;kDFr+7jp=>OSW+K1Xc1vB<}yT34_)qXSTP z37)u?Bc)W4a{R}C@ixWr1j{i58^?BtPm08|ywd9m_fIy%+%0R?Iks5iJek^&m{Ydo zwYB(B9;-Z7rm8A{c}23dDD-@2xdoiA?YW+qXg9{UJXBE}yD7-12pwIv9vk>ec<_C+ zRe0fsr=XtWywbGQ5@2S@6E_vEH5C-!(y(shInG(Pytcc41j|jfdR{sS1S}Gaj=0*^ z=3n>oda&jcM#WmL^*C#eewMooq+ed!-liCxxBX)s)^{5eRINFEw9(F^1H6cCF6^$m zm{S?JoH}U-4JZ!}7i2d)pORnRAOT6Kj)l^Stm6OhF_L{*su! z4^LkGYhGA?>b2$1yv+Au=nZ!GKDxi?y7_nSgSJ*bsQ|}#mNa$P^uO(`-P0um+~8L` zu@yCf1FvDk>0mwf+uyf+pLw_*`Sy(et`D=fzvwI2KPp-5_P7pbqDM+T1WWWRy@n(# z%x!&5^!oA7{odAfs^=F|t;eLwIVCnX-tgHt@8t7;W&3`P?=u(|)v1~Sv2=Y&T$2nv z+&}1fFvT_Dph-U|QU<5W#Cejqy1&H4Y1XZ{&So=)7U26Yd_N!n#Ke5_qP`&SYLlH6@k;T%7(`fqr*?_u!1)>Ted_lT*L=>>oZdwDyD7cjBP)mCna#5>hPwI;ypt`b$(nAPuu(k zuKDiyuYFpuHvg)?=NpG_V@F^AMto8N%j9Uge)fihVE(tb$zL&!d7t$2+q_j(3D)L+ zeaEs-+q}>Ex!?PYGr(s*ctZj@KbGAb94f{R{*pqI|D;4w9V^c8*==5l<({^AOubI6tjT|^lR}rf&FkuCw|RG1j(gs^uBmGFofV|Z3N`1A z8Tw{$&RU4#q^LbtP)Fj;zpovAfxaq#lP8YmPkazgIu7BEiSd%FEgdI(!iUhmgqFojto-I6_m49Fum99{+aK=I`j zo_|mYse(P0B#?m2pD;%Jq{$(Fz;hQ<`}Y~x31o`w5m&jCAo08>)z=9uC0XNNWPt`& zHrI3j^dJ2$LsI;2zl(=##K-)pAScWx1pW>~ea#Kn2D}MOXL#HF`r=Q| zt?+Mx-_J3gi7}Ys`872lkyy?QR)SPm&Pe-U zW%K+AkUlsWtQ4HET~b|tZV9iFE*SZ~l7NY5|v1raOr;4bi z-~RTy!Eeu{3cvqLyVdVwiN;RU;v@ZlCabUt#_L^X-xB zdj;fz{i_-8)OY)T{Q@?^@gum$51AdQZt+8Z{6`2}fhZxCWG`A*U9eY(D&hcJA(yv*qc9?n5iZ#pjm8yh3vos~ zV0(yn#8a$-*dV@SUoZs{2v);1AR(|WOj8mnHh^gfL&ct8T9FB`XP6GiL|Fg{BSNCY zx)*9x5ECfa!X5k5t(N7+`f;PxZV@m0j#>qZCM9q&~4KBIbdm%zGii(uIbaDZqqMq`sG{t z;Fcbm%uQe1df8u9i}}K!TdU5t&;4w>`FM52?cPTrZuf5J?cQ;cT+=PmmMvlJZ;NTr zrVDvXx6C#@5p8;Odw#-Io4#hZbQ-qz^M0FuUf$F1&v&hR`t98~7pg=*8;J-w|TGisk7wf9G?y`M#G{Uz=DN7Gw6SHbP~*I-b4 zcFdgUG3(8YYu-Az*~yxp!`rjgtYPm^X`6W-)x)Lfzg)DxW9*J(?c_D(_1@R zM^>BO+Bomt?@#Xcw|2(6Z9A@$ew&`O>Aib;dQV@=Hoa}<^}=k^%l7?xZ9A?r)uvx= zcF6m|J^gY|Z_D3r``~(YfBwCG`fWddSg-6``q+e9`r!Wla&S*C+w^<=<+M$|*WY8? zPuCmLrYCK?KSuX->z;nQ-@W(qlC^dtrm{_M>#wxh=Zm)fUD7_kw5)r2`+VBgU&-70 z6Km0?|38Gi*RHKhx2|_9);iitY>Nz?f60=`IcH8Ra?a2Rmhasf^ZVzVOY)X~JdR4^ zQ6*YYMTgejX8dVi{xko-`+)LyALtd1L7Z>c^@o16acxhstE`;6;(^!MCeH3ni>B2 z&sDZ{bNq{hp#R#p{9orb{Es3%(trIx@S3X%`L8V9O9K0!1w@H|di{@!>(4K>pI1;n zfBH}UC}utW|6KN%k)|EG-r3~Rf23mf*IhW9N9fk(s-KrxKZ&Q~zGRSEuiZT3pG9!y z(4ZfhP4KJ+p_4WGSjHOw2^@be`LyGm5E)|T3XN}TzyZQr#20&O(`eQK?T|@+PEf78*L>TZqV&bIWjKkeUH6!#E@Lxm^Iihc6qzVLq5Y+&3F*M?=N6D6*U)!N~F76bhn!pM8dg%9bXUC=o_;u!&4$eLOoXMa| zh&Oti1{Rlpp*@Z=e7+RmN_-ONVuxzxYG{T`48>W#mlBoGmMCx)=Ld`IGtjK_RO{o^ ztkpqnN93i>cKWL=Cmk+qR=1)BMLQ2c&N}naVOY7itXze!VC!Oc@5+mR7>usmg934( z38UF3>)UHpT|qWI;3qL;CKXYGOUh~E$TiX*l!Xv1B?P8T*Q1S)p!UM@Qo}}Hyjs5! z(8Jnt5H)$$#ErtSMcEwJf@D2^H(H%Pmr9WZC4WcgX}47|n)W}rpd0S0`E*UW<5PxT zwr~V5p6moYA7V2*!oP_0K+#+u`>CWMgf%eo`!rmK zTkdWzajnB9i?BeQx6hmQbhqXMX7Mq>owO0mozZ$*J+qw(yJ5TLes&=vTGZqeKisfE zd!6BA=I-)o9?_tG-(tdMfuU*2?I`Csd*^^8bq{=p|Sn1BsunpiAp1pl<2wOUH4~W?lD=_`GxP(0mw=N67zkd( zQf!=X^^2FFlLZ$?_bM{;N(@`(brI?@2K96qr!SH54f+WfTelxqq)DZt{iTx`|6cH^ z&gYBQo|Bx$fIpiqmAVWb7r#$NP$QjyGqWlV_3QGN{*whN7PLn_V82kROi;HtB&*Km zJ093Won0=MK9mI+0=Wm5S(F7Ue=|x-GuTh_lS=h#YwF%GWty*#290#7%$X`@qLU^1 z)8F*iO35)iI}*$p@4FS^h9u;?S%|CQTU0QGeQ43M2dpBXHwKF`RipGQK{@e@KzmcsY|$0AH5!ZnQAATK^h6-scm(juWYowEYd-_LUJB zv0D)NMw^tGXig%gCwUY~n~s2qmyPokM;gxBP+cD#I`=l3q{uE|8|j!eNgc$>#MR~s zOF^Kv1FZEJ(CP1TW(VR@6|x~({L-!gACsiw(0h!CnB{dOe+@dCfAd^)BHS8Zxa--Y zx%}`fG=PHli!fC&%AxX`Xl>#~GA(WJBaE&nF`#{pO07O$e_w0JlY3}-mc2vy<0fz= zfceH2S0PF(h* z>T{%IpnBCz>I-omf7wNZ$Mc6zwIZ8$J*Ly^x$nB!I!)LVybpVHU8#^WCRMEQv&$5~ z)r5OQcJ^gL#it3XhtNsrxREsSsEm4DFYEmJ%pzU(xv>~W|DAfYyFI7$O3Tn=hhz&j zk{JN!rzgFVJgJl~(-Br3UfRbUIxHhh{EO0WfJy;>&6+@xfA%^=Yg4Bn?rshhbMw2V z;6Yb0O?zvK*+{T%E58>tZOSF4y>TG$)>PXUmp zv2JavG$B#m-{F($J4))GaQ6bfyS-z5t>z;>Sm|`xf9`5L>dFx1GieeHAjiiE2R^?lA3h5+Nf`DC&?jnh~_phEc-!ynhp{R4Sv((B-boooj+ zvPd&$e>uZ8&Srv!DiXX!W$s?jMbPv1#454vUoKtg4t!uEX)~ETysW05=P2sjFb2d_ z=6-rCdU4Jn!_EZr@q3{5yeEiO1Y{DC+QB7ch$*j-u@-zICEVZF-pC;JE#I$w5Pb=i zoMI?kF{Czo&B#}PuI`NB?s68yz%Lf2ifOHve}J0SXoa?xp1G&pv{vzB6YJBI$GVrE z-W6ffMWWGtvzkOo>m78R8R$8hIH0-5oySb>I)XtPpX2CoH_~$ebl%V9L~m4rR$|=x zqL+CjYG8r)UI1D$N_KV#@SWG=s}=z^`;#{1)RyU&n#B0NodbHFBqwco+E`@eFFM$+ zf5Ptewa+td3ePZ*0e|!u+Qtq zL&{c7{VSnL(?*UrMhTga!!MBw9XsOyz_oNSr~?o4V_z z=!^*Y9JLtZo;4YBq0jkAzy-_A4|y&WHy2kc&;bZDAk@1+V$?q>WZnd}>+;-MW|CQ5GC5)7yc*iw;BS;&nyOVs8dSMpCPg4uf$fU3OZj<3`t(N-`uPU)$Y? zhNc#;CLw82MbI!wKheuL-j7&GUzR!`dCp=7d zm${tE1mz79yXh=U*{*stklD<$ZjhGKJ+HITO{i`lP9$&%3Q^~mm(-dCD*@k@n*|ns zfqeR#ug=4@kY`Bw+LT${s4WXTJ3VK3{sO6i?uxVU`2KIl8X06{8$!J{b_PoCc{ zuWIbmex-g*{j3#YPuaU&M9OwGkrb;)CU{>g_xN5UmjZ?6aYeI(CI{_>1sFbLI2=5u zoy4T4F|oeZ&sy9X&QXl7wuPQd&H^+?#G;tnkvJG5VrXvN2wMv#o2jX6$Co6K6vIHZ zT|Da<8D7-h=mnv^Kw}7hVe(Z0(>GDz!pl(;d@v5oo0O04A$B?L8Fy0hEde@f zBLjWT!aU#Zgzu#2zkR&;5BpNDuinwx$rGEaPI&V2E`7PNI{Cv8VTI4n&XpZC`iYCT zsu1ZbmmZ!4FMsC~Z!KMOAGX!NO`~eizlxpW^LWsrDX->ikQ7&~-l9%az|Y~LE+d?D zWlNrcwp=4UhrSz%qu;Zdz+C*$Gy8C;Gr2KfRbMG#gGk%ybH=rbYq~-tQr-21V|8!C zIeDA_7;Yjl-@elew!17?tkY;p>hr=t%bx*?qO_5`Z)FvI<_G$!QM-l`n>^2^`Jo(f z8Ze7E8pePycrkc~Xsk_;w9juesu)XLC7MRp{#GE#O32h=rLIf>?xj0ZbVu-AdHV>K z6NtWhIu>6nFix3W0D4N}9O7DSc~&7&hO|%(dH=4LFrNh&e=i7q;ayJsB-Ar)l&B=K z5r|5oiD?G%p%`q1=7Ugb07|4}$aWnAur^&jc?@|SP?eIlFN@pLN3n|-VDrfxVaE>K zEPS!rq9d})NTQ5-9?jX%#3UcGWIH}FRBz=YXj6F)5%yjVYga@8=tKaqE6>$X zT_K_=Kps@OL_*EX-Y-^ox$#y@s`=Ws@yMq?V<%yJ!);#wOB5-}8rkYRbH!o2O@XVO z@5}=SC2%?i^isg!2eJ$XO_uZD0E#PS0*z2Q2HfhwfAK-XXz7p|1*bje4@!u>oQQzx zhpoa`80YmNzP?Nhi?pF)4YX4aWJpa-f_V9qQUsLA14klWWK`R0i<79SwgNne=&zY8f}4odKT$((n5~3-ZK0qz>70EFlO59`R7%7V&z) z@1IoAZ+$DlV#}0Q>ARGoMva&jNmAageBobre_(S&#lO?4X^8G{?);fo_s&2R^_=es zioV=Q@xZBsJG^2RQ2Q$7S;9IjNRES=KLwPnTPaw+1K1t~Fagt_0;hcUXXx_kQNS;d zkB{A>dNV?)?Y+}3SM!=fvePZwI^|z8z3jkyH5$_H0S*fvZ2HL5R@(+n?Ce{*CX@Ew3=etfmz^Mn;*nmB~x9a0_dXSZ6? zp5;%!!l_1{QEO0C^dzE|>*i~qFlw!U@2bqF&@!bWY$6Q+wA9amO)tb6(mIUfADNf z<*U=xj$1y}eW6!PyxLCGp~@=^7Cm?HFvP=&m&DyZ^5XX^3beo|<0mavxNPhQDlWJT zKZztKtM*yaq@z%vNA+VxCk4YBS(Q%hkp|06go4C9kFK65F$YTyaH@o+P&g{p%OY>7 zj9j#|RwC4~b(fo>R>U0;=g2S|e{Py$N=3!hRu|M)xfw90NEKQAaU~dS1`Ms$?9J;Mx@a@WL!7O~%BV(IW^(J6&m|d0kT2r5o^pzDx z0E&6xT(6EP)(G!H(`gW)=&v-Yx-w|t1Ai#cN!00C0|*DF%!^S-L$7-ff8#G0^-0{o z!27fVk4zbXpAha6?z<b1@;ZOoapSm%4l2H@7cYT=9$fA3>nSCOJ#o?M zGZir8E&~UXm&Bs1S1K`#YhCrlCCX(S4oFIHeO#F+{T(Vf&1e~$bxqw~#8IL@q_n2X zzQc0`QThg${rp|TDBvZg1*CYU0BT(G{ec#sJT&8;3qeo_L<{{Ue^G=oj{A?A`K^_} zBtmFiMDBuN!b>uu*143Ag9706PEf9^!@}#h(6k{e9$=N}n*zd77lEPGm=4GRl0E<{ z+sZfbMK|LNzsim89Qsxe$#|xE(WHo*r59mcCd1=FT>-9>m2T>4iTG> zcLSCrb#Zm;;Zn&~f5|0l4!8Y@XS5RKtiqw! z9@KqEeHd#Z8^K~!G6WG`TILB^n`h_~a$c>+=t-T`V&N}~QmLqP!8a!-KQ>hqLbt%! z<~1F@GLYZV(5r1VrYt~$`+L-7U35=`;GkGN)7RgbR6@6lf2qLG>#k3)%R^Z!l5oR1 z3FZZK8xYpJnDE}&71Lkke=NY*y$s2jP zfEpEunoTk1*R51Vo)oM1+aIPt5x8I&7_uYOTVl;gI7uC-+2db@th=_?K-g}r_Apk6 zP{&!F^4F=P$WyGOnU)02NY9eDcjmFJ*160_QUJftm$aq@7Jui61NvmnxWqpKWG@yg zH>URK8937`86&@Ggk{026(Unw%HL6>KX#35;<8(CpERQ8$?x=RGf}n`-p;GeeR#>96gl0(5Y2MOBAp z0ZAXOUvludV&hQQ;In4hLI{WZ(1zVCa&C;+Kk$t+D1TM%Ae^vzqjIR;zw$Y7+!RGm zruDDuMjTsElt{FMjOd2_g3(UtXL7b(H^2=A<(Wi^7h@}1Y1Jq4B7lo%uzHpv{f;4o zGn|(QF-D$aZ+E5T#j+zUkI+sFBJMSb8}bb@S|w0Kb*JDIn?lIE*0sdCI}1s+L4*~V za-!5*Z+}J3wHKR7xo&ABv;&Kg>0DS7d_^|$B+pIHwuPYb#UK~EpWe%nKnbJ*b6_x@ z6Eo!Cv2k>5y@3^G%ZB-qZwZfrPc5hh`mz4*!&QCCE6@7Ry6tT`eF5q9EsPY7l4>4p zlaXyHy%>9VZ;H>)<>S@fouM~4l-!UhUYk}pP=AS;J`X~NF+?TOr>P>5TOm{>Wd|Ba zBdQ)`#O@KI`nYv# z{x%8x8u+3WY#m{srq{ZtsODK{#;HE z(zeV?XwZS%3#?NKL^V*nj8`}@P3rw^t##DdKwQRk3}MKpUu$^Ii6vv)x_7OU-gbz3 zO~fk7u-bEMU`>qv>wfghE$Bb~^MC$-orV6puaNV{R|xC>_zDU1|60!Pua}VJKYyQ} zu!vvTyXarBa!l>a!lNR_5&vjvGw!dLIpZI>2)e!aJw_AgC$K{Lr3LsCdGiyVv{tlX zjJa6+I2ZXpjWXLWza>3xNwP$WCP#mbFaN9M_e%!WPLLZ<>VJ=dk$&WmlVK%0!h+`{ z6n^xQz3f*M$4^RF8c76!Yr|$vLVx~83h-AW(oc9VYs=7Oep3;K%Aes9zvlBJPe@ln zm(U2lXNd8Wp4ij=n>uYMCM7h&P5F~tiP81H?a}mO6Ja(AM`F{o#QhRz?tW@cqhHi4 zd}XNdBR*KbzuNm53NxGe`dTRSB)_iT`Ky=iSD0bZ+bfcn#xj(2@Ijy!{eOmqI5zwn zrb(>NNhju7ZX0K>W<_=8U;S(CxBlv{PU)4@{gjLj-P;EP2mpN$)A>$@Syg!sd}byv zjeyy>=ju=iMzX0>&yNK`+tp}A`c^(xsl>Y zdY+l1w0Czftrnc7G9+$b@P^&O`%&;4B)4tJ=Hk z916V*=lcu*00QvMJdn!mXmWy}Gg*3%?9=#keXD*(>i{4B!+(O*H(j}Xzg?*F|JfVn&=TN zyqA0onsQV&qH9M21AIZ+UX0vc7!gtM#!_1}%`tWG5LFd{l6xrV&n&?K=+%xfZJ{+= z+Q)Avh_&ud+JCCBefT8&kRmZdYgD9BEIqxW|=_lvCtpEmj z6o}T8tcPG8Xs7j=z=B9^^K8LEc6~|0hva#aOfUffqvSu5MSelVfgfp#N-N8{8#i3Y z<8KweAZw-2+rc4{=}93r6#gnbjJj#21&i~=@De5!n|~=kKPeV1W%?ptMy5nu&ljSs zs4wPQ7f3j!f5aL}OIusL#AX$}NS|bdNGs}5?_rdz zH)b^P=I%h>O*zDv@I5Gx_`)$4jSD*>@TLze*m_sbSl_pRn+6c4iVHy<-ug=!ld|2x zj{_Ia8-Jh(%yK_j6>pQ9{Ow;;v_qwmM*CaRWtAE2j5ROs4`UF^h+** zIDc-G-Fd+>?zPJWo&x&F0X+)?%%{E#ISHHA6tC#vbA~4Nd$dJl1(C^r& zDpEE~prKN(gtov6S#7PL%30j-T2nYVh+WOaK7HveZlO(YNEwxB18Q}?SbblEq&|lr z)2+geLSg9Fo=BS{e6XdeOy{|`4uFbSXMe_FbG1UcaU`FUMqtpez{AQJkVz!5{*}gW z&RNToP44T=b>IY0$komc;Q^8nQ4}NUt$>8;x=(BkkulY!0G3%$q`?T|Nn6h8!f`lh zpx)-=^dN216iuEykohxp?X#r|Z0F(rJ!3MJvTvKp1U)yFtQN6CP^x{8$&h*XI)5Wj zyr;z&NRvCgVuRi6CAQ>Lh{#oa?$WH{&ADc#oK|@EcU}VU6q1TDuJO|B zY9!5MNptL;sFH{p%?;1z;N}(!jNk&R>C)&0Ll?-FYu5crV!)eT|+>p_`GDR?%F)o>aQUT2nPoqvh%{At&zuG@tD_5($9vooby#(CgFnyv&%BO3wk zQ0S=>?|hSp#X=0gDa+14l5bEAkTUNb5Tl{w(Vw78q6JwvP3-WCykl_UD%vu49ft=O zXl0J(i(X~*W<<~eO#KgCF;LQ~d)_uXny{Yn&P`8&z$f>4mt+=eWs(#iD1T`p)cI(~ z6|ffp&Un!&Xi{-ww48F6L1-!W&8AQzX#PAn<5ORb?%`d!oz_gIwt-eDLW6>ucKqx~ zrTborONFL7IzjJBjTWuDi0#%iA&BJM&7F!yBjOrD7fUBfuoG=lc~y~P%HuG(j91xV zXnf1ntaSl_oJHhlg<}{0tbZbR%MB0MaWUR4+z4(!1p)&EMbc5#7nY;6pwe_Y^DmR6 z8`YdmqjN06+Fr4+&Xo?fHIW~x5Zc5?PgpflzBaxmXU4BsM5SB3!xyO)ZB~rp!M^Q1 z&|-$+V<>(2s$032D@sa}T7jLr2J+>zd_(a=8aAdMBQ-5wv7zC_QGdA$iCRygSgI8vb$L=$Jtfn(-JW?lXtLH4` zRxRb$$U_IUjKdjNaewjw8}Z%M@48(2Ar$dpVHwK@dy0j(nyR=2@O?IX&BidfxjOwaspblKfTitMbwyt!Z#F#lNet$78Y<9z@m3F{xy9Xh~ zmDTXa`h?QqJO;5+S`x&{s+$kl4`JTv8nx>fjS1}j*|Ryf(ICMo8?@}*OF|uToml~_ z1iH7fzwcMTd92`bovkvg)7$|aO()KJNcZOFI4TCKd~!(M-*^3JX;63~<90MmQSKy~2`YTY(a>$K)n0*& zj&ghVZ$*71`Xc*+C6R*z3-F#s(^%9g<#)+N1x=4^=JJNV+zY=6_?yP<~6-=N_z_KmNw#{m#+~ zYxtsjK@DD-J5po&mM?3~-^o3>rn1rdNP_OSqDGr+lr|W15JZge&K;PhCGuRqHK(N2ZD#KP{AhoOv1#rgqz?lR0WUXl&i-x|wd=orK zR9pQM5nVHcc@lqO;W5Gie+%&{aJUXSjMX{J21XlX7)@&AX@JtPw3xdXys!cg7=QC4 zbPSI=S%XOjIsoEwKTWI*VVNk!hxmK|py|re%K3K-^%MBaR|_&|pyzY+K~2)=boEn; z2!D7a$V%U_>V@lrn4=-zx^MOvjz$4_vFzmT(ndpINz^qT@SAa^GnKlHLlonUoRjT&M4Rjy1{iIgnXc9)0~p0CTEdA zA7&a-?X!w+>Ix>UqKe-zSDo=Y`gTE=Pvyf=P~1pg)5A_rdJl##!N#QrO)P49D;n$} zr+0wf$?ffPvJDrgoIYiwBMUiwXm4eN{jPrq$5VMIW@d8-!EYy!hB;(vv}_eU5te%P zU4mfeByU!u)LtAcfX7I>yXDfS5@=vh(6T!*e#oAfx8A+Gr(*)yX0~SD4%E{0dQxcH z){MN!+4|mI<$UsR^vrs~vuVeem&6&276u z(BMU3YBaGCxvtkK6T_*D)@2k zOYRFx4gs+*KJTH;w#-~?DmrbNV>L}%AnluWQ}&Tqp%brJ@yNh?CTQHy_34u#eRQoQI1m@ z;B)BPSs-O{uGt z^qZp7fhjEVRyp2)#rsgBKH@<^Rz5s`%X4GTr2!k;%y59(c6OwF7O&zJ*EP zrpRZ*!ma7I-&n!Qy?7x-t9*Z8@F)o7?{jKH+H#@XTqD?fMw%R@5fD$WiNoKk$2|s^_!dKpj%rUZT zZR|7ki934sp!1R|6#!JKp^L7A0lVPF@eY5@R4KWsMTYOfW)C72U*om?SS z(=)fR8;IN;21lrp!e)QYXZ0Gj#thbdFc=6qA?Qkd`OjPT4hYUpIfCG~0z10(go$xp zOlQYKG*HV^W2W?i5!Y~B$M%Pb<~a480_e1?NQiiYFY7(d{#_8TFkTlUS8%ByLarr z=WcKx^1-PJ9|$R6VZubt$dj9NDb*3^HB<~qtzeBOPW&1=Jz+u@7WuiXMhGMy8FPhK zUy-O#giMeC_q%^beoCTAc)C`_9;+dtSX$qyrBNYpFbs6d1U^HNhhLSui3_W)j7X91 zNxqhOxaH^VY4_C%?9m{MukjJ;9ew`<)iveYrLzb~S7s)YTaIp+2;! ze`0k%Q?YL;fJqfV6T*AYmqJ# zzL&t?vl_-Jjamo_aAbcYnNQPFm&ORw$0BPdPg8%#qD<2ry<7#YDQbk3{Exz7;n3kf z-(G&FYvgNI`;l$RZ7HPt5Bt!DOanUa)h@*Zyl~cu!R#N&vdf+2W*M=En$309x>kq1 zFYQZT52x??^Y3&=Ki_kJ^qTMBV_B9@bJwq&bN`E!P2d2+m*Col{O|8A>|Zwj$hQ6Z z_~C!q|ITPfn+eV4Ecv5(Hu|(c8y}jn+32DN`oVq#BUhS}Ipm^!de%VP)-{^*x#UL% z?zU;w4qBRxxwQgk8k=eB(pQ?Xx%FZ&n!;$|C+85;vkgs;^!^2@T5D*g=X;w@=$EHW zT0?2p=6=ci{fy_&_2v+O0Xbd|Q_2FDFLrW(oCI-)FtZ~g*`Se(BrtP^<{GOuCvJ;fkJ&dFN5js zZO5Xubt5l6o_YR+)pGL1RX0t=|MbVy^r(z76EppM*W(;9G~c{5E6Co=H|;1fAV`1J zLu<-Z1>WGt0tt*+XPc%8P}laTOE)2l%Hz;eXddPf0^GVI}=gy_MS{KtEE7> zX}OBOOcb>sC@N~fllk8##l1h5_xpdm&zX~x%uaTsbhWhWz80nJgmwXi*g3e#*GxpP z8&9cnP{xUOU))N?i)Jky(!4Zf6QuyxiYPpXX#G)m4BCC9ol)vmQoLs6^l z#0rMjbLhuhvDoZCii5`vT#&ugVEfO*yUAH_v1MyvBKoKMa(@SeMkemm{j_EhcPj3H}MDsi}3-kxHaZH`TI*xT1t z4!evo^|?&UD;Oi~=-kkDamas-G(}Xmc1uI-Ze}`Y#X1j=ihTX}U+cBKHlnPGhDawXtv1!mGe?kL$bBzGv6;zEEz=jmYVm2G8a zMyeoNV>{Ya^Y?Lb2$bes<>e3|Ep*PoD6#j3cOQvPd5?HCH)4)d@AiKT-`nu8Jjd!8 z9|hMRdxbr>V|TFIMt4YKh0ewPNP?5k&SE#;Az*Pf3r@(cxuzmdIP9HT`_{ZM?tkn{;hygtXbRv)!b4o=(AA@9;YRvT%V& zyw|nh7M1wus~dfMbDMwbJ-~*4FWk+#%<}1|7K+)n$poG_JvNG|P&&<*%lT+(?hoGW zg`?zNA;z`p78$`_!6u@Fsc10V*DQ77}D|@&fmM3RmOxe>rb00U)=n|qqzCVAs>2f(%PdK{X9eLFA zQ&;dYMl>jcV>Axd&uOwbC@wMzhnBBXVpnOK8>8FkM(PVatedZ}tk)~l#ZEn;X>-%( ztCx3Hw<$5!`Lj^p5jJ)tNaADu8f+)cs<72UB~GqM^Rj^{hk^z*~{JLGdz9xc3;@DT6NL*)@+%3R)c@gx9TdV&meHsqVw&bHRz&l78~n* z^NVTzChN;|cA8kHv8!vvZ1@B%&~Jz7+T~S!K2J8CyyD z8yJ7a;(l~588)w5?7vYRkLKnuTBQ5z>lqDqidfugdS7ekRowHuvGnD1w>`Ki9oorr z=z;OBY|>7&@N4t7&vYRYr{3(*gyHDLN^`ZJzxT0Q74@?XyF}SezoNb8CQBQ8qu@%8 zaj`zP{5;)%j~K}{TSuDrg=IS?cslk;91nxb^oQ#xG|hORd`M(+xbh`edVg! z+D1GGzHVcD@SWG0B&#Xic?n4ukB53U&!_ouz8IE%I@7^tUtSvSp>of@o^aRUnE%P7QRxjT^kZtS*O?5I3Wbx<|=eQ(y)J_ z+_sxIEw8$N)UR$=x9>f*&Nuh{-Y+-$Gk1$=_hvJrB#!1T8^dM|+``?yII(TH&0wnS zbAH_F`^UBnly-M_N27b|_?k5;*6+J-ZWgzT+m35@J6VNKoS)W?*A?ES7}zNvO;2$? z+WY3&IuBR%ds7a{>3CUQJgs>!$3K5fO!TU=YN5UD;c#i{t)`8p9LEM>X+lGXTC2S5aT z^y7H&wSVTT@~s-tR2e>ZmNGjJhS_pA-o)b>C_yOK_#C`?*zJfXPQdbmJ}!S4(QJqw zj!)}I3H^P{KPb;uj?Pd-yso1U8HP3Bh7n9!`0=8IbI~wM0JT z)%=>AgGVu0D8{@o^5uFxf2YMkVOC?jEwdip@1&Zg-uYsNeg|f@8*QU#;U}f>cpeAp zLU~nIG)&EdH$6V1T3c+a0)Kz8<)-NFfpnUs|0f`o+dtn}cMk`zflSVA3f-e6va)2TBq*N)HU*OZ2IfPZa>~D`ucEx4qkto8TW2e@^D=*CNI!pc&lj&A=r~r+hL)^-Ejp>ud!ICHg0ITLvo!ssUG!JCIHX7J z?1$LgZ0hv&vdL|GJ~q2FogLb1wf-6$?61q|Q7-NGn;s5y9+LcgzTRC?(#8aBo|gwHK5{U znl%nqvP=A43^adb{@S&Rqq{DG1jl%@w)OaSino__*NzqC{&ud)LvKyF z(hv13`{aL71N}-xJClnW`wgc%ps_qQ8VLZmC2taBZxXm&b!PHdpm=a-I@t9aPhSS} zxi)6bOCB%~0nkS@(M$%|Et)kg(xIoNWABu8dYdNho675v_s-s4$j06&9lNLW?dDmc z`z+AgbgiFUxmoDgZPQqX%s?VFX_1|+B0E_kH4%T$3<%R1oT9V!W@qainW$FlRa^QR zD_4lIivba~M@g_3(H=qlWisl35EkUoiZEef#!?5B4#CRa4`{w*s%5~|FInlVLvaS5 z2Zr+4V!;rwMc{_b!#K@-4M;*-fv{>!hQM9S%h+Zq!pzP1YC7fw74jc&S9YQMS{{=n1(zpJ> zk(|FNaru2M@pJ0&(@9)QSXbhC=;8E4d3vI!NcrGg(jii;f|`h@KW)vvzOT#1lxGI* z=%=4kHiA&Bv7iJA?|$?lw2?Y>yGX^N9Ql75jZEWgV6?H^T*2T+jmMh7l3KS=@EdB= z$^_Bc&cBU1qeY*+K?6r8<8Z z*0)=eYA6a!q?$gs{n}YT;a~>VT5iq11TdPGSrY%&2md{eP~jHVZ#<3kH(l#_6d{qy z`VY=NI39Y~E&4Wj6zOgHb|oH*a!0yyni|w3=V}CEVVw?w7`eK#Ak{5s0+!bauyTt8 zy`=96C14>(frT3yGa{O`Q$!2Q47`6sYP26620Eh{we(3^Hw?JCram)CFyA0PFa6o2ANlpp%?d=VQealX6l7IyfH)(X;vRKDmvQKC4nlt?MqZh~ z$=eC{;`S)*MDASo5;|d&8V0e@?m7xl=_1h>sEy7TXr%6igMBy9K{50$3qaH&^2-MX zg2x90<6$_$3dRhwa-G9JxhPxxZ=J%hX3l zWh1XkFV=57yuFj#^}0&($1i`Gk+jd`f!{y0mvR{Y!2IA#_nEve`@ytr_MU~7xT6nq z?GodX5<|H9@+2Nb#0(51UA55_1hp9E25z}SP<>hYWvct$(#}(YL$cM{iRWNKAxf|g4Kz8r;^z=%a~>0#`uNYEwn=hAbpQdph)(JddmTr7#l9&hx3 zP?qSwZNRDLui{5v`7G2d-s+cBSoh?6`e1ouG8-1=I%S zuc(L&1QAAp?5K#a8B~9v#e!5jku~OuOM+|~N@a~)Pv?}fd>&wS<@S&xXx%gT(Gpn6 zd_i1-_OB&g7>E;$yU4~q9|0!_1U12LbcBbyheMf`M>d8gFmhgiAP50v8l*%oR2_so znIIz?S=vZ|HV&#>-~?7 zUArIL8h_}lkLTE~T#uWl#;cAGNE7H0daAtvs@`R~dx{ePbv z<4f{#Z%ZSI-=fzOKfL~9Bi`ToB4j7p{q}vlu>X~9721E%-e%Z&y7abf19^6J8P!qC z&0V5uzF&I*YEs-iEKSU>n9mL5>za|L+Ok3u%I0@qnY0sB%l$9~!40A48iJ?QMjF9n zr@zaI_HIh-H(TIri3Dl9Q@SIhM6Z@wdbv1bASKPqOQ4Gk*ekJ7bS0c++vSx-U0nmC zrryzrzTJN*0={-Qbz!);7|hjyk!oE6$1QQG4~8AK7Zj;?!#{iwsV6#5ZodC^f9rwH{n4MM{&qk90c)0d_trS~ynKIX_&%|9U!LJxT{gUI+_uJA0u#M> zX{unZ-ZmwX8&?<2*;C*5a*a@E8{)) zah?@~MM0$HhAC76tfojP0cBkBj%f~u`@4TOKW^XRrsc{+NA=Ex`J;a+h%bYg@UYaI zl%oQm7_PcCUZ#um^37MK?z468 zmw)?tAHRpLT{s)qcVo;W6Y?};t?FPU<%Zap4r+9xXz(p>G%`V%!PBLl&(F3}JHUS_ zHA%5bG=T|^D$)woHuB+>w^4j-Mt(lp!iy6Xz+KmZ`%) zy=ttah^4ZJmjjVuk&VGxH#D_~BE-%S%Mt{{(5>z9ifW>Fszp}#>{n@VoD&ro#TFtV zTg~>tDZQCYB;Zp|_gSWk)C8^;Zupqp7$f7iYIz_yW&}gazk+16B>sQaxNM=oK=!2n zz|tH{SA!MOi5AF%#$fvnG3)8@+o$?v`w!6ebm{5fz1i?LeOzg4dLH~kmzvg_ng7ro zUG`T@?uD$67o3h>HBtLCYyx2AYT3?B3@JuHr%A`R23HY;m1%Y45#h2nFbfbL>e7~K z*aD9UB=w?SxbmpqrB{E{gP1XmMr7uTW=RXOHxPBP+7uef>txe7ZH9G!e#YA7fj9>DQ?s3)2NB-mg_qhGj z#~r~I44iXK3+sXXZY+q!G)7{(Q6FKLGmjv$(OK;}p}PTA9^H-Sk(?h0c|>-R(0IHV zo4J)9d}0vvOt%5SFGhXDv%T4>v+&G_#skc{M%V4h5XBpTdA&9ZCDd@O5fN!-(YhO^ zXb7R%Q}-QSh&g}o$t(6%?wvNrTgZHQkej~xT7aVByF74^WPqJ$=H~J!V|v#=^A5tPwtw(*ABW!Zj2-+8L$C=5PYHsBX0{pDsz^!J zO>LvecppYz7~PFMVTuOacLn!-Fec0-p5zrRs9lN!@CtwC0i#RSQr!|{4M7x;mPQ&& z*Ov5ePRKU`g@RAGrI6GZb}NI=H||(*^&-W)00=PdqP1P1oMMcXqbE{d6WLAb%fo8% z1hv{LwFf4ea*EY`xP%8p^ulc-j5==JCyMX75~E5{Me(BE5GpmwqL(}t?b7y`k}{< zE&uTNZ~N5${>AejT3n=GptIl4eYV0mAXf(;^ccC;KXXZL<+YU&R_e(@K!Gc_r9nI- z#8H12tfs)JgR`fDI7WFhJ)P7T2hmOs=#}`e!al?xByH04=+6T$tVTK~SdWwwVGBZh zVi=4#;72wv9%7$)D9~+>m-N?Plcz=aeBM=QXyEFiiGW`uI z2&FOK9AGs^5OFwbNPhTGzN*(ie+#vgJWNcZl44!GgvuYW; zQ%Cv{t;@GcT3OIwBqr`s&kr7(9r>TKcAGd0%BXwhU(c1Kt3r4P`wdplP;(-H~BUY!RO`nhx zLU^>+n6!a{5=^&x?lLWhn~T%?%7ofug3(-g8~M;O1ebU|4`z+l2uB`~Eal-r{>mTp zvDb)d*>{}vD>q=!!ZmwYDd|Hyv~m$KCJbs&G~@B7Be&otdgXG|_O}P&`-R{;|gMnB>g>( z^X8fHqtB$Bh95us!!v(F^2xoQjYz58pgIu7Fo?`Wmkqc1-nKyI7Kw?uzo(30ZJdrY zsk;Jqt9NdQgm}4}I&Sn;`yf0dYNDN*3*uyX*+(zTGhGMbZ~UgjAn>~7V^f>$y+YkO zT-7kB=4o@4G8=%O4MAiG*gV0H%h}**kdLxVXWTY0U<>OOW-fmY-Zdj+m|vWYg+xkF z@v_?#HiOrtbVWuirW(PNB2#e#%zJyS0uZg}^PBVD??&Hw_n%n9J-I)6TiWB%A9(+X zHGku@jS@YNl0R(pfAW>S|9iiv&x`)pjqmvL&)j`_`L-dw?M5O+*x!KNr85kTYaq7A zf-Hl$^wG&hm?(dh88k5oEOJIFB0S9~FHKsI6mtF!L{nLiU5IEXmb#D;BfLZf%IC3; zNrPI~GP)=x9r6^W8o;DZEd$)v77kMEB9bn->H%YcaZrGCAU z6I<$;kC=UjpuQrMhPuyNEC|+c8aYD}+E6<#VFx5w)aQTpTb5%%PX@{khJS@U%XHvA z3jC-ftTFWdy>2*IgS(e8LsOJ{E9Xn&UE2_< zFYIS$XYxGqvyL_HemOZa=}DD3H2{_#5TYx@qL2UqevVw!*1cYFB(D!6saNh&uS9t* z^$PmrihlAO*&jC9?f&6swpr(Xbn9O^zK?B|{%L=@c7Nqx{){vF__X*dSJ=6Kz5n~X zGsy6+Mz-FTK7x8oeG(Yp`l|%BhCc=)U@-wVF@}DnbC+Nd<(MfLz1;&6)O38V$LV~( z%#R`Mc!I=rXSSK9Us0fi^Xtr4MkdHl0fa{l0uLjtO*)& zYXuG00$808QOS$NKHAkr$aX9AR?Iu_DfWL#D}{@OPjkCuO@?Q}b@|)T+~#{{FaoeT zH?|uewfB1nFxRI4@^cbxFgY$V6qQ==vvU{!RW}9g`=Iqm`IG^)GlL5}1=wI!@=K5- zKm^qsEapC?MBn84m_;MM48eo1NW%h#Fv`GYZUng0Qx0dCUItTG+MvxfTfTRnHgJD{ z)j@ul@&wpG&A{T0GZ>^;Ww1dO0fV_MXdEvDl<=nD@b4+f`6{Qu8qgLc&R}w!`6w#? z^$uxr(=hI?;Vz($?dlrK#YI2`F1S7K_b~%#7^P=&RI`r&9oCikEL3Ady*gz$s=Ugd40^CN>0=p4MjpOQe|PzOOYEChc9&5rI6B*8D*JsQZG1}e>-Nft#~O6a2j-ZW62SmAP+A!KJm z$gi~;&`%BMyn*cI?7(s`^z+n8@AL!)X@UEG8-jR}Yy~79_npr?anW^{slb#N+lA3d znfm50$8r8C3mO)jl884z?h?;z2E|oi;KlX4V6c@nL<_;+B zu!;bkio(kqq-%dE#&#-U0}s<~BuLRZv0k*u@JSK`LQA640gnXfS|^HgCmMd&`AshC z$>>wBnY%B)7B%^qk$nw})o|WGSGCal_`=itLsEt=Juq-t&W7MS9T!Lt5$p zu_D_p3T)GnAqcK>Is=pjzsr(;B~6=8AJ|Gh_mds0m~wxR^EXom@IQG+K5HW7j(|aV zPgD8EL9co~OdXjNaZi%AYzDBp%N&FL;a?OrAnVIGN*oBNLb<<#rIaZx=w%2~1CI^F z5g-CY$mIfdk5^sN74*FE8MSBFvD>DQG|>x*ujuaVVC^^r=|S^o5N{erW4jv(Bj{Ra z`}(cp5b}SiZ<>ZheZ7gh7Ug~Y-f@WesaNI8{!e|^wb<|L?+$i<(|-m=N*+;yUGCfg zt^FpaQf{$mXom$czU2~acrf%++QG8{z3p{OFcMB6PSgRGo+VC~!a5@-*7=Fzsem=z z(LmogJKzprp>@v|z|v#AkWn0_BoSI6@uBo`&kBFUC2k%*Pl16C#KOfHw>t29Lz}zN zS+cL(aR^Y{JQ_@!h7m63N4u_t0_6Nk$06FMK581q`g*JAS`_s47snySr(WVW>+9{H zYccNYZ;nH(PrceutrL*Iqrvlr8dwx)LSo_#sK&b;yyC9E7gQmgrdxPgvf3>~- z>fnFh^BMN?C3bsTPD#e814}kqN|`LEBW1Y|%s`u-RwFp2A>Z}%(dZvMNS3IKaRVf2 zG;(WLg!Cv*UA^4VO2WDgIIyy3|6k!*L$%~HZ>{I`Z+Kn*ujV0j-oKmYryi2Wx0~Z@ z>S^f^1^nnngOTtG!B-fW|9<{h0Fn3_39o+>PyH<(jm8_Q3GR3Q-cB@tG)XHqa#d#s zlU|3Sw4zw${tQ6ME)=m<)K`-#nbg;*^rfU7rHb7xfgs&ylCD!}PxL#idmDJO(zh_EVz(L~+`vP#J1kP^V@SIi7jJ)r zL8s{XMmW`ts=^<;d6l+Vwm0dBocuEus9Xv-|0^ED(l-D2F@%j!NoSz5kzPvtBI4yn zUq-!6Yry&!xF;tneGQzR{4-IFZYgl-8Bzx1SU%&>CI1@IU;I}zkop9I=G4GO>ULz$ zK$rSj`W63b+|u7mKYkA1;HC8Q_(p%?#cxhDF8>;j)I}>Szr)`Oz4I{b?O}ALc=An8 zcP{y675o9?fGu@5#mJz_p&wfs@<*eojpWq^WhZtA&DsIBq7SVaKv9BKzQV4=8UC}5 zI-pxSX|sM{)2e}}3U<9+nqV0d7;?qeH*Y2XwVXSJ%c<7cOYHV~cGdkoZq|Rn^PBcO zb%3YF_Fv-gHxD9^dfzFxlFy9>?X^UQT~t*m)!L17CjxwScXkN<6OQg8{U31n-}NW$ zgfH!Jv=zGIrBVK1NzD05FP}{aNSFGRxJ{$g_kX~mNAOQr)-+mkFACtRkGx3S{14|N zdCj?lkUHM~ru|3$(r>!&^9Fy!i51^`vIB>8%}MGMr*PHZ&rh3%)zTmNqmTbJ8TM!Y zMpmSJbl5J=(hv8w)L^FuVl7R2ACj;U+4W^z`j7uwe$t*dH@$sXVOKRe!YN5l{!I2Q z^{Br2J-@7J7))50MgM?c%3YZrqRlqvqN65H_PbM>*=~arD5@Fd|}ZEEd2+f zNPb*74h0~oyk`A%)L;9KL(%vBpuhgy4?pmkypBHb{dN3-@2`^&e1E-m9GM3|ntXPG zyLTUBbj_pD%cc!Z@Bws{(29$`^+DG;FQn&qaN=0ng2JO^vi!VasNYa2&kKbg3bFe&;P(9*89G@Z@f^cpJ4EVm%RJHe{h0n z-}f)rQuh4|wvb~Ef5AGABQ27QI}3sGXNQ1le#}o)#&~gs?HSO`4m3+g>L0%217xns z&4a`t2-3->fnPRHGz?63r!t>+`&_#&0=eRf08;EX;S_)2;?@G>_{qIK_lY}MuJDti zpYlY0s|O@#Eih;l7xJZc`edMi;gcrdsL=KF2^QY@AO)ZN+oKww1kAvQV8d*=UJUn#HQWb- znULQX_J4mCn6%FmB9Nap;^I4h_$f0e!2YA20|l4;E(_#>D}I#`Km$JiU1kXPCp7$& z`QP;#vpPuA-(?O83^*`>USI+o80x?%VIxotCX>m_8Vna~23+QkUD;AA<@c7I#G1=M z$h@Wxr~=c4EkhSDRG@%D*>1~FCCjNm6<{InTc3Y2JZ0Da6VGiyV}CwZmCp?%yg^{c zKl#*DuBS2>Y#4fgS<3oczR%w;;IUh$pKbG^?IjjZJ`tbm|f|1E~xt!m&FT3DK`WSOr zUjlz=4j4S($OLrw*uJZnlqm%q~`uunK zDP#51zb0TW&=|}N9F87x{`-FZwZw}m;dXyLUNTs5xc>jaKmTG}cZtVT;?ejgyd(L> zq_7e2CGqJ1*s|T0Aw{;MfP!!Q^z_?HIvR4kA3oc0q{wlFl5U0q8#(^}IX>y-3UdkH z)BD}a=P!Q#&|@diS>kCg$1le#pJSAe3Hd8N^6;V8yVqy3o!OVme*6+&CX;+!$^L&M zfh-_B{a2D68;PI){9N+SetSO9QwiUF>yOt*e?Z&I8T-s9{+{1|H9iV*9;w93Ro15r z74o;A*WdWhzsH3@C*SQDEIHV+e&5Rt$nr~mH<}4_lK9z5Jz+BW{O9*Ln?L!{5oiS@ zg%Cdcx0hd%Yc)6mUJ^bPAO6hx{#}2`CvREa0X%={7d(ehAVcEm&(~E0)&1EC?!P!7 zX#P1O2YS0v0*=7qZ~@FE|K9-1v=2PwcpV|-mdO6zQXV`8BhWJW9q>escPU^Fnu37u zhr4qrTp|ZU*0U<-~XTgOYTVUe;l{j;lF=hor`~}`wx2? z9@hUi-c|*_Z?Acg{kOihd34p`KQ6MH>T^dN3Z6gyLsgXj*4(rI_~#i)qdyN(a(^A7 zbf5oqh!VP8{(6P>uP}9%riwRsBV59`-H4)Ib25TLzWS6&DaKJ+3N+#_53=2&sPue? zS|BE0n+YCtbMxYIwk(&MMMr;&>U=_lZa8R~@cni=>?)HKcQt9-YSwt}*LZh!z2R|P zch~!ExVXpL{Y<0zYpS*Xo{~mSw>L#Y|GKzKmFccMc87 zYj#s->9k^Yb-4%V`SpJqozvCVd!6mZw^y5XMLKGBXsGXObJo)2X$Hx$8!2625T#n5 zvi)klFeZMnsAF=$$8)q?(?Qna=~z4Ny~8wX?bmed`_qZFA1$vhpaCZZ5-n~V1eazBz3OkD%0w8V6A&WpOo1Gq^>7;+*xxX5X4zK1I4Gz}X z&3LhZ%$b#O2F&%2i)ppNemJw{S~eVK=3r*u%GvW03a3QV$zVLYz?+!+TXnD}fJcML z>Tp7*o6$wN*fwmp&^4x!Z?I-Vra0Tdxr+%|_cZx1MX4b9$G;5QjvsVY{Lw(dpTe?XVKEl~+ zdv;+AgvafO7mvvqz__hk4xEj4nuhCD8kkN|?RYup#>J+-IlfhOF5f-X5W^SUBn7%G zC*#swDf(mT86st!I^Ao=IHLrWqk%qbRzU)rZn`>DWLkd+t;u;tx9rr2TcI?;Zn$T` z`!>hjexjh~@I2mrq1~y}mhNeqybrN5ldC??7T+#OLtf|6*B2>og{jw@m9@Q~ibRcZ zfiGVyZ8nJx>QZ=h%t$rm4Ab+XbH!E0qBC&Sy@2KZ{8oy3Vn4TGN3FKKZWn5GT5e;q z-crD)08@XyZ%NTyBiGZB$B0$qb?{t1@B$sS53fw2IxYDj%!nJkl-n(~bm#C=r?A%E z`?0wT0%LcbL^t-eI6vI#nS!N1-p492_iNJlyjWu8P^iIWvM}7u6tG6xF`q-)yazbJ znbtl5*GrDlLLsk5P;2^%ZFj5PirD9%W@)zBl-qx=IU>umAJiHn2BO}uDhfa)}X zI9}})K`VyVyZ4QaE9yP&dwxh!6$~bkd!m2kU~Qqh69Zpj4DmLo@vsQ za21qBK`po$j)>r7=`gD>+nRN)v-F1i)BW_7=FhpFRI_wX#6;M8J^i{pVYAx$%h}yk zhmXVS_+Zl|7xZ#n@MUA71>ITc`EXn223Lk#JJ(sOY^M*CA)$ENW#QzPNqr19OvHae zIA0{U3Yfn;nRgJqZ8GiOHQ z+$pvjYj&78&1J$b_s#wh%;^FLXqo9OEf%|bH9w@#S#AD+FMiDK2mMQI+-qPhqt%45 zh!_jcpy1e>FMn&&d&@R6YY@SX4)%X)Ei7^hn06V$D%ah&Sm)z(-U)lM-VYbf+}XWv z)9o6)R|Hg;ZVoCcb5*}>k4t295TuvI&9O+k_Gef1L~qtyZ#`B|+t`Z3QB_6z&6moW z&+yfO!fU>SUkS67hmSPwp8*<`W^?UCXYe;xD|gpWN)i- z6lOWOc)}|IKfiD@^`;4LoJ|RH`?gqNK@@cu@pG8nT-@5`ItnXw8Bm`F*2Og(-);4Y zd)mds&z#s+=jF8Am-~Bc(;F{T*PpznYkC_k^DCN~;(5gu`1fdh*nH*qTs-$l;Y`T+ z@_No%9!BB*O^(yUyWO9h#K?b^?lR`aWm35N(QTdY&HNBrb?JtRuonOqR-VM$>4y6%Mg7Wc$_hoRK%o`O8(1yN)N<+UhQsAZ&|O zZjb0-yf?E!^-_v!71ZO|Xj;?DBRKDjXovGOm*U25Gi$ zJhK+&R#;x&ev&qjiYJtjEI{n@A!F(qWcJ4)!5DBHXaWH z7hm{pZg-=pnU7{B+Z}&T_2bnl%dwhh96hfJ!`&V)rZmM-RBV0k3i@TQ*Tu{|7OHzJrtVRemBFzV$IM*KqWxKaXW4DCXm6*J z&nAPdnOx4=WB)|X*ZApT9-eKg&h7`x)aH-E7wC(6wME`r4cq?ya4r$q-_QBDxpF7S zl-W#I)Q7h#b!}H$Ux(XyNbz*>(9G2df4Rr7JG}+ zJNMaqxhP+?MJ}&rfc8p%e)gm6mb){yjpVWBn=-z@@oRjjN@ua0j`xDQm71jr{xY@; zMyD^Mt`)n&VbdDd(fk;oYMtG^G+P(=oU_vOquYwdJNfae7&0e3sUv8wD%{KW1pJfg zu4z==?@Akhwp>@%?c_5z7_M_XA- z#W&hxs;ChM+rImMkK#Kpg_#oS#E+9I(+gm47v|M%R(3!GCOKd9M1H@}=46&n_P7yT zS6oTUhTs;3ih7-J1LEqPC9w*QMkI<2``sieDUx6CNn8Xd%F8F>It&;-3me@pM_5W8 z7VolR7YlOqE-HEyQUT}LAujLPxIZ>vJF#!F5Wlz0iU2l$hCsMyiyn=f#CUvVPRT}e zXu6Z6c`6sHClq?NkG5J~HfAF| z8^X*V^e?ayrs!@4cqKjg`ODrmEp`^z8f_osdL^nvW*;UYS+ z=VDRV_@UR{tKhZXz591AIg)7d^uUXy$^uhZie~7gEW>L>A*OcOm4%s(Ue8ktZLT$M z5VZL)3~(5^_ziU_-z;`ayt6TDP6a1-Ch8?W``0(3Xq~_TpxsHY%NpZeT^q&^pQJaa zy>2XjYpGck61$MR5?9mGj)$c7AlL%~Mh~J+4V{3l4ppC58<5^t*(ZUZ-ak~(UT5HK zesp@}j}c(;m<4b^rh{Gjz?$`4=LjCd9pBLvXG78-$eFCX1?3pDt~4olpOz&=#FXy) zxMlx7Z{Wl0wvPcf^1QZd7)Ruw3E&2Mps|sEmn1PPB&l<-j-Q3^wn_jzN<(eO8Gttl zh|`1OJQrqLj22DpLyy|I3bbiAOCxoXZ+c30qjfYlP^bC(NdvTy8JLLWgT@n{@a<7K z4;!FiZ&#D7;-N~ThNjj($Y1b`*of@@9%;ki0%o%!j&|YF+Uvq7UoV8Lfn<0f+DJuz z(Zg6^^rGoD>VKC0l;Y3^1RNuJ@jm{C|)34OkrwDxiu62+OX zh!{uI@DTIc``tiWwD3lNBuvm)%08VDOcLMbCL;sSNtXFEjoJiaN3TSOTl^b(_yg%( z4^#o-Vjh9>amgEbM+VFr_rYb1y{UzNYEx2qI4GM*)JyUSB*t!Yp^rRn^GE*(@mMaJ zX3KZqnEbgg_8DzI2B37rH(X8XUa!>fe1{+j4u*k~Uv5+-M?>qDM>Ww91suHeC!^((JLIOYMvj9t=Ux0GXBEn@qrSplC=hB6K0!Ftb| zKiOA~vPjN>6C;>#dDwy0!MhyPmBkWpp_kP{7(U)-FagB%cJKDaR~OoR!mSQc(Cm*; zT5KK$V?4`4J(4cVt6EJ?WcAy^Y#$=uv&zy$2J{|`nvYkQy~%HcGwX?epl7ZSw>N<8 zYH$0wN1q~OUp{f%4sMo6O&724bu7I z@zToB)5BtL*!L=wvpmy(1Tuk!L0zqAx%jesVdnu0Xm!cuF_gSnF7ujX-tlao=9Efp z;j`2}!=F-MYz<7oseBD_H+l^CK1YE2 zCjygkzd&F$oDOO4P#Bu3neM`#br7LVdFRRifD7Y7M2GCHx^)$QWLP(2C$N$IT#(Mc zPsWsX_`N>coJ_q0f40zc0?o4ysA}`oNK#Xu=KxCu$i=Dcy~_RKF$%pTE$}f9N6E0p z>hK1N6_N=;rI1Y4#{~9imi%Vl6f~ZL_ox6W8yyODgCFKNlYn^MI5qRKp@D(ru@a%I zbRn%k+FluIjWrT~-KY>3>2Wx{waKqavjI0E92-#r)~WS7N==k=FP2iz*ucogGRU}U zp#o`9Yhys3%K=qzfYW~R+HrR>{Ki#>IJU?vG$C@a!-T5vv805l`ic&6=4f@|dQ)_) z(f-wb?hnNq*ZaaWqXk>#B>P@Pg>A@FRmZKyD~ydtXX=1|wiEKrP|c;M?iB|RKCO$U zSV^4$^_KQ>u{!uo4w|1`!3vnsh6-ks`Rp;9p~%Z{XTQv+GL0&!y0Z7+=wNz?5rONC zf$I(w%={ll0R&IMn3Juz!Q`eNQaZlM!2Yz}duaoV@f4iE?N9RoQrB$(Sh?^ijDlqF zls_6U;Ao|PSg`5@3*m`s&VY;TiV{0ufr__K|5Lc48e2gy;PV7*qVsx0DedtnCYsa=nb@<}-AyL`9)VyVmqs{ca3 zA_2lEbKtaOxX_zYvrmEABCvk@!3otT)0I%H0`d~$z0q5RvXM!u!3}wAW?Xbv-8fO5 zKQJACxSuJc*mHu1lZ)R=aHsTyOm15%jb9Bqu1bUM9&0<{y)kB`d$|N*LuK-R3&T{3to}na#M!}e73$4>h;5-``gDC&Aq)$UN2=eU)Do@`c9p& z*;qtb<_pQ;WR{af$^P}j1eu(G$Wv@D*us9f?xu!3JX5^H*TGnrS<9z7?J?z(>nP-Z zPF6aqUJBbe`r5!_#(B4J7n+a5Ojyd;lrOT)-TmY`5Si1i_8@8aTo{&X9*)jTth2!h z3?J(SjY8%{qzxXud^d~b47nh>x{BdJNwrCqP?cAfSUiUsOuFQJdx|SiozXXyv9oe_ z-`a@!Bcu&Y6Pny@*1Hv50H@{s$%kow84*PL#LeFZ(266U>-0z6-A=$HsrKPxWAal^7c}jkg~fZ;AY?0T~oS62u!_FLdWs@)Y1yHhdklwmZpxhqHSg`E)qNVdg5Z+a;IcVdK!_><{`ej-&-@apHmqLntgrhO>)rkICyPf$aM!sb~ zvqdJ6K%t(TQ3RDUguKpx!7th~;|4&=a2G^i^wRaKKN5U4e`aluQ_mfq6u7D4iLxljhYf4U4NCN+Jol-!zs7P_JH`uWCQiwG@}Jm{haNp? zr1u$=^MCPQ=; zeZuy~sI&p@K02s7hhsyH^@0DYdX@fommg3?d@WcSUvp@GFpWE-L6|0pYp@63PE#w+ zOi3k*a%SGY;Z^s3=pC79$QMcxLNoL(1$}*re@FiGWgbV5yx>$6BEuIz59KK5Ng%$3 zZr1=aL8+2zY7%jWE~jKM(ifXX{Zw$ko|zza3?~s2QFUB*usDje15^Z}Yj2Vs|BW5yJyFr6}zh@x_PQJfOq* z{)OyVxV@k}N{Qg1g5V84u@hMh z>e84M3Q2g*q&!hffc}=f7=Z5X1T6yo&ePLFl9o?@ey{IeYsqizze#k!e!MwyDr1Sf>Ks0Z@=$L?T2ZY^W>z)9<0|)x}o*(*>hGI6B*VTaHcho2n1D`z;e2OEkHGwG4 zEMD?i!C4UTYv))`(jAau%!KS)03YZb!Ry8blhmgA!4+q%x>vpFFh zf~Q@7ppg>Bn(xp$LTJ*3F-Hy75Y>+*J*unynQFf=V*SuU%@zAE3<`(q zILG{<#)sk(b-5JpQMPXN2t1J^sPU^_F|{Ip;!)*u0EHDpT7JRv!;6kut4ouKc|fDf z%bRx9!_X$|yTlu=f!YGPVpsdrtEZEx&6%f6Bu-}>o=InRSAOk5L5_j|k-AH_t-k8M z;IM*W9~GDK(`Ano5-%0yS<~LeN&2%v9NHYuMkOlddiC0~=WO<;w%wv|>4x*jTljo` z@wR7<^gSwux!=Tmkf}>_Hx$enXY-)Ko-Y4;X8H>=KIna;&RT~}D`>RCZh;Ef#Buc) zR=&Dr1lQvJh%TR^mJ&TO;>eO;JpQovysKW{%dMF3^+dQA8SL#Z^Xu_XD{m~k#2W;I zp~De>0PcX(l#EUMEmp{|)~CiD?SqDYqG z`4p>{`Y4B`F8=d(ZiL4p`l1k3Mf1>-rW#fT1iL&bJ_*n0it!6 zLgePf0FV)q5pQ+kh#-!2evz`ch!v#H_~RDXO`s5k-2t z;T;gbGHTjC5SklB$5J<+-P|63Svj%|pN%BE(WaU+5{$>UvdqtN1N$pvEVEb~JHc29 z-x5uVRn!WtWU&}Menz?qgtNa~#ZS2Xf~!h)i1ExyG>?BnnEOn5|It1F&;J&pApeIE zq&Ob_(1Wlp8kO7JQ*EaI+3jQT#WOyv3@((Nu`hSq6zgTF@NIH_NkpEDsuxRt& zyw^XQPKrEu`nUOq^0Lz3h|~O-388)FG!p-~VgH~0`G1CjBxuTK$Ulmezf<@}nIivD zWSk>Ej?rZj3{`)_MgGZuDM0@A*8&nVXSwHy`Dat{=>JHX_#?@p;HZD&zsePc)r0^3 zzkd`gMLvp0>M)G-kM~9TJ2hdKqN!!nJ4emGe2H@W55p?Mu(MUHpN;-0SpL2q|Ku_B ze@(UWA0XOAfNAbDXIF;tu5dH~04OVySN=sS!KOkQNrDaiWge`q2lK@d8ye+s zEJ-E&*iUCWoEv9!c|XMK-80aHC0rbt6_mglNx}_HzmjTs$Y|H4_>~lGblm5P@^)&o zi#;m6jx59O2wEJVg_16i?=FaNri3y^)(T8BgeWCWw*X3i`h2)1Moo@yau>rFitQ4) zv+xTw5U<5R@IlVzwH(z5gUtmUY~&=Y9L-lv^yUzO4;q#A2F8^&e)yv!?gxPKC#eWq z_}vyNTd|EJfA|?0X6HmiBK}a=QiR{n;^gqsnYl4EZT)%v6tS2Zh(=q3D-hdV(Y^cy zU$s_@G)s$r_zTb9k#n=-lj|NF1a-pUuNN2C04&}i)RQ7+4Z}rc45L8!SwS30LaiL zYUcnHKR<$HWo}+=MFH7d11OlZRe0Jq%h&wA8s?^?Gk=g)j=#IBVK} zBJZHeQMpECE&rF4z~%#JBy{?uvrjSZZTP*E5Ju|`HoO_AffcZ~Uku#PAcL=p){P*K z+lKdcn%+RCZp(re8tpRN)W%D|N$yCEwB2`~yBT6Y%(?WggwPcC3zjJYw=HIegBuS9 z;(&_5-}GlOGz~HgS8145N$9La?};;iExllYxS04a5{wuKo?|j(X|!3t&9)t@oNje7nDDyU$h9+va_FW;pe;NMQ!lXY^jBgu zTuR1wUd3jTK=4&REbx2!!wL8i_fS+@F7n(?Y~YA1#8ZMx0zS(8*dSVI7?>QDy*eI? zTiLBwI>od>+qfT(yslXsG;xi8@3vU)aD}Mx96}1zlzb$k!|*Y)_mm%(M~4E-+ga)} z9W7NFBq>^rdx0}nc3ud_9wAXu%W#ICB))>#5k)D(WfIS5S$O4^1F^7<(<|T+$8o$; zxWCSwEQBzLH&LbTC)kAVS1*)3BCfA(RQ{-l0yW7GiLD1AMnZ%G!mcwGN{kP(v};_iV225f zAvzs1Q;fOx4#LCk=h5=!Mk0P5inQ|)V?zzDnEfJ8S@j|8YRa4!rC2s2v8`paGDSDn z?*y0V;Y#w1z)GIw7^;PTZ&4D(wbMOC?+#?o&=6>p8ZOEs5OK|y;LKhrRQ9TubNLIx zZazRR&Tck1kOnY@%5dPN(d`7xTd9TTJQGhHf2*oKWqC&>tjbDn-LEKhM9_krtF&MC zytYDiEhe>UNHepXj`?&1m+16aQDxZ;2p#O><~|nUu3o@ahtU3 zZePiHF1W>*s>3saQYpV#TJB{<>dR6cYG`SVXgc-8Ky;ZSVi}Rfl5(<3?A-;*-Sa{! z`_4Q+@~@=$-VP8q%%R+)Ro>CG?bri1zK$~wd{&jTmt~10K}fmf{jx$hECrw@a=F)_0et7B1N%mVrb%DYkVD<9pf+oXQiSsh^@+ z7pmmw6*IKU6L|URYGMm1rQCYSrEI!`tpYqbr&@8!{KGnbV9M8l@L_z?;52-pHW`Un zYPK0l<@dTm9F^|xyHuYCxJB-B*>Jv_>;pH4?4;^LSjUfvMkd8rs-5AwYCt{yfL~HD z;&4Eb!)`x?Wck^<{*Ldiki1q}3|44w`Q;1zfV^392)n%81(=mDje#iGuBsUvPZ1ED6vWOqJQ<2DJ}ttBBE7`eg?|>zy;Vw04FGyV#0s4H+-x)yd(^DcY+lrUhSo1LSrrM+K zousLMjkLzXBOqS?O?LgZf+z3~s>pk$3~q_^IntP*WGo*}YFFHVz&C@#&e>v;KAp0?3Vs{A|l^Y4X<{8^N=wm`)$7@N4+m7?!*n|myuppUBh5| z>aB9Hm&k@up+CsZjFHcn8AnL~biJwK-Zuv8XnDSYrau}|3+j=s&k>oGbFq}$K~%oD zL8s=w)uD~l+0uJV8%cgS+x7Um6yRK?t_&du*lf=sCXvRJ)Xj)ssR_l>=2 zCRI1$Ns{)~M6>>Jj$v6lXWszmT_zQO(t`0KX4kXS5kbg^xFbqr2GY}anh-3u8Sv~d z=POyxPFC?uSVBr$zbBxUK)57hH1r*LN?a5Zt<36=WY z)ZL7Iaz)?{Hv$^F%QR%@SL8e0-JRs@HahU z?$|p;5nNv#^sZO+X&ZiciS=g0e~`; z-Gj$31%}V?iOityP6e;T`L$?sA!@4T7E&>_4)oN$zfn(oPadZ2W$4_YofEAWwH7{f zGCc8@2HKnd_^r;{GIJwlq%iA$rq3s}pWY<1e#lu~xm0J#$dt_UfdCFl{oxCc2rCX! zM7DZ37sD+g(r+ZmG&Lv!sVq(xu}0hp4(DY`WS0yjor6*9lp41l?&@wo_q7!m`B{lb1cqY+g+N$ao^}n;JD|;+s?M39kGYniE_myTH3)xPdCo7KvxY?DFPPRO%cM&X(6fC}| zZMvKb^b!PzThq1Vm%6`c>a5_rShc00Q4_lkHQ>@7^Z>w?MY>HF zpS_P|P?)rLGrQf{%5n$HVMZ2y!N=!F-R@Nj;aS5lNteMN%HCTPU|E~2Q}s}kSK<&w zutMjAMCGY}AsS*LlEvS1R|O@;Qw~80Sn0y5a+klW4fr_2Y1nlOyBN8P5tnFc9Dngg z6aw4;y-jOuKKl&lj}b|r_=Kc~#1N(!+x}MY)r*?1`z1O7^IWpKni(FYbvoG6*)o9$ zy}+pYqvU-<^e@|FOD0HC>9St$o{ulSS^HZtcMjTrQ?gO6UmlG)g&(if?=F8Q<@9Yj zJf`ouag}DyN#-aF%1|>K`s1S?;=6YzYqrScSKp%4yX)ZQX62x#_y$j{aw(QXFSv;LsL!#kuyf<$CLVs1sXe*L1=DZ3YG z5X1~;u!B=2IWm?crW+RVpP=}n`_)1bz>%+v_Zo9y_o2_Zz_upomQjDts?-7LX$xt8 z8j3mNq?$WulfO_K%0LCjFTiQQaKDw$=8~5nkPeW)F5zerFb_DG=1*oH#*C( z00*PtHv$JZt@-0HKqQ*)?6O5%n~YX9b95U#{pqxs$uV&OV2o@1Hc3u&TmEUgAeaF? zys~}-T&||QrbO1~ke}(z8HZZyo{JuTJ@3mpnnuJs*xZsKWU`IMZ6tmW+7sy12D4a) z?vY731P@u+lL9S#GYn-Ts3{HLpoZINX^G7XVfw-f?XS^H*nFP5qRTp$+|KMcoF|dl z#hOXGI$91@-s!rLbWI6cc7L3ABjA5b&apyHwodaf?aLfo!}1d27biuxipD z<+Y|L7YU@YqUM7vSvYTMV#sJV>0fZXRLlwTr)yJgt&kn=^#(w0|6W1F_$@;Jmq=@f zNp==!)QUL@K6LDrl?nm6!=j1_hQhokuNVIv zX{{qRhMWJtk=EEnKddb9|3s;O{Le^hxzG4yo|su+MWY1K!`$DJ@a$Ci5Pch&gEmX? zH=T*{%aS!r>w^SH8iSnt<_+c^JE8YT#k*Hp3k_Z3!E_4cka2At?1*Q1jSJHK>1r1j z0nyTgQlktav?a{`E$E$MV;SJ#p94SUgOpY%hCe(w6H#yPLL{4i#+M*}>@Sg+2n<0* zFrt^Lfxf=!gqwwK6_|?A(vm$PrZSg4k@W25JGMz9w#i04b10sH47-(5>XvxrH9Ur8 z=R`6w-OwML0}Of5$I;-MTjQBM5pyi31n2QS4bnj7HktalUoQ=3Q)Qh*Vr0-_=zp>W z{_T%#{^E8OHerAB(~NU}%rZjzF`TIQv(R><%z_}@44tLWHTZy(4H6i;&JM#6prP+^ zpKW3uSI1%SEwY|r6^dHMj77hhNu@zFq(7vCpmj{X8uC-C5}@DBzGWy4tN{g(M*pcQ z38Y78D8_s969Wla%NP+r-T{-NgaTC%8krltVxaA-w{_2yLxN|2vO)z2KoOYf@vn)7 zRAZDN8gZ!QM2O&*SyTLb#tdBb;KfSY7_y0CgFSI2rCo8pQWa}AA9OpTzU2x4=EjxC z8kat)4qwG<9rDFlE{P{H_jGf&Q_i@dcqB4aZ=4$A_`krnYvB%t}@R4N9Gf%~}&!49s%Fl6F75_n@pIYmA zTSZ#$dsS+S-7eJ1&3R9l4r zoeWsr<6>ZUDh+2YZlfe6D?$R3J8F+QJd5qxs;HdpsYV@tZ~F6*OR~|>>KK90FNt^l zMs?*%nnkc%$fOL(2kNWw%7gZAXn!!v^1eQLNTijLxgqxpeMmD z*vQp)BOWE>oEd6>c@}I;5<&`fm$l&at~1^_m9vZ!7Npm++T8FMYu>u^C2IL7>jMW< zBD%8=qfbA7XA>vF)xt?TY#0%)RkKnzkFRm%QtnBt@`CUb)~gv&clN#ai7ywofXa2O z5SAkEt-+70T`U#R)!3KK<0N2V zGIfyJIZRbZ4b1x-YJ5@2`}-|j06}&mqfa8k&d`%ZkCD_jPdsbM?gajxf#uxy)2UFM zb$kriY4YMb!W?TQZn;g~pDI(Q+$!`i)!Z|GaVn+Tv;c^$C459wzbU0lg(6d@)T|+t zu+-vB`^Os4kV)9$Mc@4y*jh_|uaK+VKOrN$Z?drU-D}M6ZLeoidJ_uqm~Jv;5UN$`nBh~X2#7^jeT%VKGv3@&0#XN zY!D7uL*H5%uT#E{cOi=kf|ZYREBh1sBR2lfO!B8B7zsN9nV(868GQlxtps=d6eG7) z{P4s~gRh+wWQt$%x;1|A7YK7#lN;=RJ4twSK~Nq>TSBR+5I9$tNZKmQG}v!|u6yPf zBj)+8IuEUKdVdSzUsv-UNulSRPStO9JEHxBcp}IiS9{bD`HqB7hWEs^B4VQ%=5fT+ zS2xxe_Rapcl&CbM(8Z?0+atD-@0@e|K8IFan1GpihocE4PWKv!R}~?cJmAxRb5Rm@ zxrl%X!l?nSXv=4#($#|`CNe=uqPN*ntU}Ph~_=zzm zfoIeO2mPhJYmU+|V z7Ttj7BxudjF!?Q_TZ@bF&d*mW>bM%;S6Zofwnj(1pF>JNLFWx?mo#T*RzEMh-pPJh z!4Q1eHgzVJNA~leC*KSN*&f~HRiul*&6QJnos0*ajSRQ_lWY4}H{Z^jI#5d*gy_Q- zDP4I~N7$af^al%lJz0}~$?u1wyFD(m*IW!V|7kF6$>PKF2+^=)ota&(pNRbfd=vs1 zkpS2Bdv?L6EheoluFFH~y~I+QHk$otpv51GNS}vt8UUD?x(+hgyT<2J!$_=A5Gc>q z{(YDf5DROSr7xnSNsgK$c~BR zx2&$jj4uga(KW2)vWRdSHS@@dbT?Ar-dwSaiO+VSBE6A27Ls8#IiLKBkQkLcNdrEq zoBOBFV)S|n)^wvZ!;__+YL>d z(`Kq0n&LMy;?t$YvnOWKNbwAV4&BF|Jh}zd>!)Y^cw!1jPA|VrLbqkjmLvBH> zE9TQK_qBq5xt}FtdN;dJcv;GRd-aC76M32d8;*q^QGZ<8?*uh&DQmS)#Mh8fVkcS= zsYqR4&5HE+nZ$Q@6!v{Ks@={<<}+84`C9i0mZ1Hq-Z$ZLl1&5W0xY2LUC4S349>hE z;}^g=Bbj7YO|@AIjrTK#4>K1&B}ywKawed{+LcIuU6{^h`F*F<18CYipg+DX!)5{D z%k}%mt_~Yvhq#o~0(zh9W9F?%s#_a3<0luUZL|_W%DwZ=U42ogk=G`*UCH0|k{g1^ zB<>$5c3*jkmA7FXs3Wwux(_>paaL07ItLX~_|YH_QCI%ZwGnq-#D!cMYht+eVK&9@ zgF?`M_8ARi$PC29Wus94M4ZVWcr*6@W&!^X6!19yiv;`+q|N%vMq_Ta@nPdKegU1O zf27>`580MT|B`)wBp<^t`M)Hb+n1Bma6`e=4#Q~YKZVntPVAIN$_b!5!?5=sR%r3k ze~^6t*0%S*nxJ;d%aI1 z2XyB=b}>IKIQ(W9>H611l}mJ=go7UA3d69=KkXgqUsCUn%=@DcTLWFGquiT9^B`kOySEB-3&KN6C8nZNK-<1f;G zjxiqL6TIRRJmVck=QSE71grgJa^dgz3ZyXJxb@^OOfg^>

_pZVT>z$>9H6Mvnd? zfd5ZXUsHr4`zMI|SCIH$veEyq%=im>_5M%kcrvJ60skGtH%e%H1!_E4%{w#Cbia^u zoWWO*Q*auYX%xkH3Xi!Vk#{grX!eeOfBu3{??rk7PvjJfWquahC~SinrgGDT`5Fk* z%>vsTT<8=L?+6CB3!BXUN%L+_kgg2#6pp=TD-y*MAZ{#`_gvDXcj>z>t4mZ={O_BcgaF}aI0`pif*KAg zDr&&4{_eY?W4vR`HRoEpckQYkHv`B+NSL2@ z_6m_OOo>t*0-i$G(G7Pa9HC4dgWf z45CU-9oPwA_CV)KVDPMhyipM&ROQ6n%p{^GWCJ)cwP7xR3uiv)(uPZ?bW zC>(A8vSZ-E))8RucpxYDCBQf%0m%9KCtyeN0d<_YwSwHzk=!tUtsUgl@DJc&s+j_u z@EhRp<$QytPFxh30{k14TfXh3y>`+buI`wGJj`pkN<&Hw3~7dVdC;k&0;Yk0N&*nj zwyC+trZ1^fsY50o_c}@p8o?=NI-pbsP256`L*q4{lj>w|^6BFh;nU7?v?=QW~oZ5 z_Ht*IJvG7HUB`HeY7$`xUgu__rDfC=OuJf`4z{}UAjcPfmCCtEEL_-f}=0+U*Y#__znF1_ATi~W8WZ`&L4p3-?sG)(*6Y&I(1(%a*+eofH4dr8r7n1 z(oOuz*dWw@s%zL#KwE#S6NsR*SAn_Q3lLG>+K%ry(59+Cv>P8ex)v!6g6?3lPJ=$F zRNi0+qhmBE2(e5DCjsX`9b;eu$Q2^kZY`<^I5DAvEIdjlK#2}P<72NmND54U#D^w+gRJ)S*F4j|f>l?}8>IFV z6C44U{_)|jbOOIYh5y*%7YM5W)BOd6{?isoKK{>qG#eLsl3+fa!04K<6I-@ahY2zAmO~PNpE>54UbNkTXVj zV*^GL+stx@$_AoJv70eNiyVRm9%A0e)==2OZJU(UkafA~N47T8L|WdWDpGN>dceMG zG?2P>NvMYC2@Z0jQi=>nL!)Ha1STRwaZ_2d2fQ(`uZ*LBc&e|#BBoHs*OdyCEufu$ zs+-=f)~Tscu9_eUQQv8R$)<){36$W5&L{w!LS4Hvmlss0e6~3&8^ACW0IaC&EJQKH zq!S+*7Wpq??At%MKoel}L-f|wFP)wJ0`OzCh6xXaRrw~x0p;A6#=Gakx=tSls7XaI?K7cUjofP-1(n`8FnG4jQNWhCH zh%pWIQiC~7v)6qOYIkjBg=P1|Sih9D))za2Dtn8SnvfHq%t5`Rsvshy7VwrB0!j_U zr3`YC+G=GaVV_%6j3=@nWNXxS81cdk29`pAb;zYkuSnqRr&A0K*p>#F*rzncG7|Kh%i`uBZg_-bROq3QDpqJ+49 z8me_P7y%#%(uXx;OWa43uy@uvln)vpC=8{eQ7Z*t@+HQ=@^QP_AgTcx#YQPbkg~|S zM(9e334w!gskn|ACx?or>sKUy`Mgo>Ds6~LBAq%&rNlxoReTs-6Oc-m6&oXpb#2l( zuoTY)6~Vis$Y0)Z`OZb2b`;eDv!40Hpy}UY^%vl{i_-7dC-T6zP6FClpai-KyPE@Lh*U9umKLGfxtTz6 zA*4BmA8s zy;K^^8UrRDJbl-;#P>jn_lQQ&Al~+HGJVJ!4^SRxAnE~gCx|v{l`0TT8?M2~G4ZZx zd!ms6_!?IT3QHIG(D90YW)3(LzC2+AEFeM-Lm}TRbh03g}9I#7Nc0PYe_Uc~l^r zy4GoB6cN4Jyfc9SGGtPs^&~}%IF6ROfKyH5iVA2$b=G*s;BTVsDy6|BK@`8#5<7a& z94vlpNI@}HWL!{N6(6;SH#Y{KylMIsr#HWJEPU(HpZHJSI{2|&=XVbI%{u|SJ`w)% zi>Dvm{Q_M#hxBBB4u7%pKfd7`jQ!7=^p(PMDRA$pMr!J*eWZa#4)UO6nFwwcbk=}Jy&t9$^y$G260T8uh z2I!N=J@YdZtrq1rz`9CJKwHo1WfSzoL$sMO*Y;I^5&IZ{fGGtmO)E`$d0$2l^rP!# z4(__BkEpltf;pzBkN5!S9dE!VUn5_6*`Hd`S3dil5B$>o?={ue`bp##E=qs$y&u2+ zz8@TSsd&4uGc@dn&3!GUP(>?J^<4}_xsx}$=p;KJP6xkcQ*TN)T6ADC z72;b^Qri%sD^1IbdGLN4My#up0?ov_3QI!Y=I?b?FV=&F^sWKbUpY_ty|(%l+gMS3 z`GM&VKZ+CiURQnm!keGCbVi_#PJi+*p6VulZ7e`B(r)kuO8I71-5#8amC1^dq|<-rhA@*lMCwfKnAO_t!ez$Pq{(1S#?pW&nzbH#9xu zbCvp1?5eT~naCk|0BDB2Au1h&Wi(pOiXKp159lb^?#}1{7e1pD`x%p<6pY>*E;6+npfoce`-O@sx$3g zgi(|tB%RiS#5FEQfbxRXv?IE;!nlS)%|*;1sEQb5)Im&+kf@DNZfo$Ex<~r8?r&9}T2}$G z0&mU}@xSY*$V)$K_FuaB727^_$Mkpp|JRzZsD;+i2-L;!w)xL`>RkXzK()W0)ac)H ze>hw^lYi1-yg3HW(Tw6LJKEH7*A>2LwSz#9bIZr6QIZ1&oY_9v`>~W8w8Lhp_{3K- z>{ANL?xb-V0!hnMKq@y&H-M3S3RCS*4mV?47(r8|7z5hf!LB2aVGJ3WCB2B7P2o{# z9jJ1pQ)vT1lu;dT2v?{&FpRiV27aLff7-l^77r)mS@7!S#uC|aywh^QOVANBav`WfGGyQHB1kN0Ae?I8_ z<;FJYSPS^g=~GSZTMh6O;#YiRLE(bZyaur75hDos{J5723_5UksFYyd?I7R?F-fcg z94s}y8nm_Ux0O-JW0lHF7dCznliXCL&f<=ZXp|uC*ccX>j?F;#l>kaWA|*N6Gz~Fh zEg9wzvNxz*HC}R+Qri`vI`HO%f8Tl9pM31I4voPNzrJ`Nqcjhw6XPsVe=o|M90Lp5e$@tko!2 z2WO@7wl-l=1-3KRn6!hSeQ2P;kf#Kz&PrnxBhc>E9%vh%23pxX0nhra6wNV^eT3*4 z_0HI4iixE}!F`S#4#ajeeJGLu)r5AXydpW|ObG1)hG#e`&YDC>bef!4?U~m5@ebwSV zx>Gy9;`7_zNx#;N#_zqdFP-_=>mX|A4cA}YEuk#}6z6#Q5R2?Pe}N=XId$wh#o9%4 zGEGV#!wz80+AEa{rV0h;hLWlb$Ia@DgdN4B7X_VTR2|Y`G$2Z_uTt9}-XqaUiWtct zlW(_rX2*kMK@n%+dE8TKi8@>SuCsCUu3Nt7Pwo3vjKC-FeUJMqC;syP#kc-^aiKzWG(RWO~6upd_Rwr zzJnT90zF>MH+8v>)z6#M?=Qg&g|Nqqut$%xxBvdN=l9{$+gGQt_Fcyb>6Z?Py_r8Y zYkciDeAjyh?e}_7)QJNb{mP9#YZLa@{fBq|!IwNL2l|M#m}4ic+db}63yQir!_dx% z1IlLY^-`wye-0&4W$Kh!kV5c%i3_xta%#$hh^MG@4O5j_tA0d&ymHkfV)FnU^*_%@DY5-`A!~hoz`0p9e_Op;B|6tt_a%$ktFY(8c!tp+ z>>0o9`3Ar8@sB*-HtlO5jh(vqm2Zn!@TdO%6EFVa1JmIx$M5@o;^`UkEW{;!`5GN! zMvz6iCwgJ3QaMyXY7p3T2oC;M8mJ|Tn za5v36-!R3i3{T~czFHIreSQ70)xr77A&S@6`NxN`-*w!ty89p97W^O|9q#GtYp?N7 zeb^j|zt@ufQYZP7Prmou94>D!X_q=ku}x6Ue~BKYYS~<q%#kdUx6) zWv`c!rNX^i3WP$&HY%s(wpYZyl&$qjjQa>7UK~}lG9bmasGVyR2YNC&8vrAXr79Nt zf9o{TA$6cOyHo;T7XhcUe5Zt*n1FHF!s1!VJ==a(VK1|fwNHx}S1qX~CoFluqe!w@D|lXy0P&%)U7LyWYJtk`it1+#p9 zEl@ApcgmS+7tWo3eW{BA@Jz>bGf~#-e{}>@zHa~VM7I%knHGTkG&@Vpx%by`2c5ebP7HDt8U}nAF`>BCtft!$vA;4cD_*alf zAwo}~z34`=BgeRxM4?fw7# zUR5L-ov$yKeanG}{w!6zsr9}>e<@<)@pvqc$Le?{|K$s89RQtxnU3R9O9H@xB?IbF zvpbN5$+6-td~i!UvaCW@99JAvarvO)S+GtAx6%h(b^vA_fGUxK;SI(+omg*37ltfgrdXu_p2TV0K80!G9f5kDKYE1`x zy0-}e%v4isagsy@$pNC0H>hOZx`a63K*$%K_;z!4mA>_rtlJ z=8OW}<0^LR2Z%s=JZ71$WgQS7l5FY@K$_@rmIScrV5Snu%Q>|u>m#{tsyor z=me%6C~A5Ggh(VOf#^eHb`Es=i>F}dSY~4*Z3ycC6STS?v%I6&rBYnK|1SYdaa?yG zx{?5KgdJ8@$P(k}LZF#Bz~)g)Qw`*4Y-R~^GZHtf!!S&20Ij|&I}EcwvJ0jh4g*J# zbL)hPVwcfs$`Kkke+pbg2E+k!QJMpkp0*q$9tR2HaA@xxXgczlr$?ur3<0c!T?p`X zs>3Y10hmr9{+g#{+r(5ERQ-rtNiK2@_@!T6_Ybz<*a$E`KpX^};IrBRdQ1`2s>&^i zPFaUOx&YvVI~q(o1adLABoK0iX%?S27@|6j{O>Wi|GCeAf19Eo@agBr7JzC7Zt3HI zg9O|=AiwRONY+cG8~`b%3BoxUw9KmTy#q?uQG)y&Hfcof9R!_tK0%19l54$nMjQ_= zN^(G{bbad~5t-OWe|#Guj&<|Y0j(xB!$9{eGSxYbTt>)+5gYhKBDI}^z{d9|u`$lD zpP4prv%r<@e;kB1zWWKjhu>}bpo#YIt39%93~=MdiH!r?2)SyggZP%@08n8gk=3gL zjPNEt85yGv;N-0jLrfDJL!m2zf3&??gtT><18P;c?toVQIv4s2=oIl!ALRJCF7$t; z`b#nYo$y0{*PQ`zwGnnlfp{}T$h5V7HERH+^`^BoX}5 zg6`hIqU$J*tzpV_H1wd1Zgvhb+s5jrz3n z=t%HpqC*A+ZaQr&E46Wi(1TY)my@ID3wIp_e@x7eR6{TJZS5S?wv7=%Vt%yisPk?& zgQ17$-F~udtbN)Af75rng@zsu@AivrWAbSi{B7RtcE#D+1~5faZV#NLHb_$h)sKw5 zckC6m*CF{TVyS}$ZVwM`3;(5J;lGTxUH%6>BU3DsLBw*#`~k2I%++)$Y*P4XND~`^ ze^fiZWtAeg^kDavk5m29K`QHhWO3kcxf5J_NOA1Pj3<6qy=eyyI-uPCztX#iGdKUt z+kNZ$7kw1}n|XK~1dsnP&rdsmTj?w3!t2VByO2UHXiGZ>QyT$=;Nm~e-v)qAvG6Z5 z8_AnBnsN_uwibN;k8uKVfL$RgHGQ#rf2TIWhXSpNm|a1m6CYo_37>Sbs-~#@!7Yha zR>G%s0N3i|eP_>Q;u9TTI^JsstM-hfSq`?0FznjoN5wRm3l_j>~H z@CVl*Q;}PwHN9a6V2dNkxe*(R97DuaKd~K5K*aQ=gH0A~lgNC>P1K0k>>5NWf9f=F z_0$Hs@T>o*VHHO2LxHo@EIC}P$(Evym_gV_AUrG|(#K{|En zZ{7O$cJ4gL0N9$evWWY34#s~S_eCXW{v~@Taut@{{)vaqj}Btc2_NsjfAxIJHOWmQ zcn^}8v9;Dbx)4B3_&<#QPFLRP-|5JI>aQkfuD^M60~XEK z2JTheztU2DV*ZWRM+5hUhb89owwG4$=6^Py2ZxRG=v3k3q4veYkNthieX4U6x#(L@ zI&fF5!C`9z9^_U1JYU38e@2h=_N$NoEgKeo=il1O+dc@nT4{hd#4gK_$6@4_**k~y z)zp88bN0?3|IvQLT*%QsiTDB(@?tp&0P`!?c=s*hPJH^A-!Sw*ZEyAsbKq0KFCuah zZ|KUmF4{mBIiY8N+Qr!I8{p#JAQ1KvF@O2aTScA?HJ^s|4NR?Sf69aEf6^|V4RC~) zU?$E(A?u*McUXG+O+RpipVmbl{r!A9hn{T%OXLrs$ZN3DbqoN%>h7He#4-FFpBs~U)P-v{UDC%hyHyWedyoE>4*M(Ty&jSzhg%{JGE`+y~YspPlDkk*6{RA*LEEj z`Rki5f71c&O}9bAe@o2$LnjG30HQbD-nPXa@fY1N3;;k)9H&SeYoFs_7y$jcUWwzc z>mPa|j2${>pt_3-gMn( z{@R-^|I8nL=;Y7*KXgx_Lu=Ii_zeSy{7tcz5Quw=Kqi2n9VuANmfHo1G*0?)#gT5YhJ! ztr&CohZev4e;t1Dkwl{J@i$*eF>Y1);tMi9^j|z7O!WPSwi4qG|Ik|Rar~j}x{h_9 zb*Bw;@~^x%}@`(4t4MM z^{<~}&M~0vwWGkGHzxoLTh^V5+A)a5 zxyHbTi|1jGgU>h%21{TFSPF6OfQX6v3b3QF6LbZGx)@*1pagIVat;U4o>9QWIBYR4 z1b`>zi#sunNX#<<@BpwQ&O4wtAba13#XMVao^o&(a?HehjYy2A6!Q-_EP+PF^;Eof ze@(@?&OdxHi0F@t`EJELk@QdFP2O@~@Fkv;^ABGf3Lb)g9tR8<6zl*z3Hlv{l9=CT z{_-Eb?-=NXd^{lTbAUzv_w?f<_xHSBU;KEFE8;?4jkx|0{Ld)p#P#6o+-9Ht*8t9d zQGgu>4x&tK#`a7P{^f#c9^9^W(`sf?bDD=enbM>_y_wTuM z0)SCU*5FjA5EJy+0cqAjmyrg@rh;U#vq%|C%Mp|bQwcun(uPJOo)}hO%|y3c$lqL#q7ShRqO1QWvLPksaj!NP1ETJymp6h*%K|EJdX9$3l7)# z;5cpPwgxU+|9;A^XLa1~<4D?F?iN)phMlR>aLF6Kj7l@ei=|yAd<&+pvOMQ2Y|O^6 zAdz0?_tgMwPky$ie|$a~MO0D-JTrG)wA9Ivck6t1X@lFvYKv)C2<0>2Oi%rtF^6s=^5u+hlXV3F1kMMkXU(_sCZjpj->~JZE!BM2T zzFhZkGd1K@{W!cv{I#8V9@tM0_Re2Grr?^n=7TBrdI6RVFFew01aHaS?+-%3v z%SsP8%j4}We-5l>)-M*4a+^)Yk(FF?AV2m+HYrX;GG6STm+@H6vemM`A4bY`^L(PL zUn8dn$>XUp z%Fg?7Z5U^0?9SoC$Ms^m9hOUeK9*{Ifp_3k^Q%woe{g$T_#~byJ~gCrYM*)*-%r7= znmFWj%TjL^_^&j*T2%n zqK{huRB|kvRF|Bl3CjEEM%gejkL&YBQcrGC^E{eLMmW-^qwuaE`>G`Bd3kf@hCR2` zbDw(8f4Yd|ZL&WHC%@esNxsr@{W#yE^SZeXkx?r9Q?T|lMWQ-q<&MeR#sr7z<>>fYfNSyBEuGboDQ#!{B-dhIlc*bla8>#a4=X zHeolpV~SNQnw#^yqm!%8-Q;=?7SvQ`MLlcw~{EIN6P(+7QC@^CdJS2=Rjd@AF4B=hX#l38?3$YqphRd3sFti|}M z(zE7UX#izAOAmS!g&GX%v}rLlLP zWu_KB>a^TNOVTbR-));RTaNT&T#i-^*aGPGk=Xt)kn3rsFA1ZfvOyJToHLHKMw^a5UHR{*q6m<7X zy}TYb{$T6oIbQAxn>G6Mlw=n@I`34L-SxpJ&uQlPPW3QZ1uOr&-`uX+nYP`q!H*c3 zuQQz#D>o@Zwlkb9Jw|bIe|l`j&%o`x+CD+EJZaqtHiu(h@?p5|x088WZ0k)qn@n1e zG;I=8_h-IV=OZN|8D8t=Ku)$N8Gbpdaa4Ccd>to7yxd&smE^0Duiw_qyhtba=?R8@ za$LRDTOY9QIQv^QWn-}v=sK2;QRZG-ms=Tvscb1wniplWXQ==S6>eykH&h+sUcz6QKw#~U7%m<_eo)S-W1$we`Y^1IHbM!c+%nCFYDkC z@3pO~tztQ(mE2Qxrmva0Z{4+fC<8WCCL`RCk&}XF&rahiR369CESTWCy0mWlwajMc z=4I@RNQ2K?)i0J|7~%QW^4nrCZ+CkJZr1`XOuJoIZZkS->*M@f^J=%8OqnviD7C`H zr)Ii#k|%?8e|B9fpG<;XgQu5k&_6@}ZsovM4;(k!*Eljfd7j(qcE##&i>8-LFIC5_ z4;FBHXmq8BE=IVm{WgTyGwM)I=DAlSsNmL06e^MP6JU>DEG)Yz*7tw6jxq)a@o+(h(#{we+p}_`qN@_+=cItrRZlT!&l#v z?<4xyKfUq%HE$0!KQ$Lf_U6z?o6>~aojpX=wSSDxllxZNYq3JIOENDrd*ZL6o4vf*8Ov<>x-gkCRkHx}u$)Rc22%72he`#x%*^Lahoz{h$nY2Hgxw?*WuwWKX zhG4QS%wuA_9_xFy(Ixv~x6ah@+hElD&oU~=e&$CjX_dX&QM9;9nyT!_!$n=hqoda@ zeFJLO$1IC?nwKic;k5Ls6;Xrj`Rp1{y++s3-5_T&<14qjPFM9jOZ@e04SCRvNu;kE zfA{HSlC+Q8^#Sg9wpE(fc}AAwGPvFQ06n<{W_BhQ5T2DQ-Sx?F+>TCrBV7A_(`L!? zY8{r^Y&PDnKs=f^o|y&*FX7-p0_Ogui(loq2 zDv+^_Ep(nA!Ej@XO?M8jT%T|8gF4gge{-l)zmJWRGZ~Y}jmlB7g z!}PIJg3Nk38!1ZLy}2sq$Dwzm@vxmilg+b3RPMAcE=SAYki1y2m53DKejCrVwRTA5 zO+IO6DoZvJTJA5ZbMt__Si@_*_7tnj#(7-=wVEFG*0c+@O^GzLNh>F1v%owgf6C%X z9{J6GDf)C3PNHo5Tm@R8@9N_;b8Te60pva#3lmxCdW7Ta(Zbf!AFJqJ|F zo>5-)GEaj}Mu*IRrd93g>3Y3hpQWx!Vmx|u>SSzQ0wvX$Gnv8DHA$S)ZGB3$etcPl zqw=UW^Z8-MbF`=e1{R}ui{QRVf6h^4^I$qm^7wJMlh{hiFY@gRh3Ro5o|m44(H1e;8*PZ|Ae| z)0eWGNi)7Fs%`G`JQ$IgJU+;)I8}@JLX*Su+Qgfu8sM{a$ajF8FL!5gRgZPXYQ&k=$zEk%Us7vUYh%2I$C!& zONOQGje9*?U)MVB9_JG+pWC)+{K;{(iC6jIx}OYTXl{z|*iK*je@%Rz-HjY2xrPy% zFHinp-kcg_jZ~zE#VtlT@@8mJO*YRtojroU+*}hQ9{Y=y%FeYMH@njYj^ZakX~yF29eSGq~Trl$k2UD#fPazM6Y-znq`d13zEcjalb%+zsbW+CJpL zdmVENQdd3fM$_Hof8dPn;MsO<+%-$@wKEi>naSPiem_(}5e+&YJJaNHt|qHOaTDz{ z+O64sZ^!jQRK03_7EINPf7CsehDFy)wgJoJFgY&H8%!m(dXDdBrO~Bwnue#*h`)R= zdJfO^+>IW#&e_o1P};q~rO^sPv; z>XtHv;i8+Df5;)q_)#Jlc-m}xm~MBSRH~`X`+ZxM_zExS-uCg6&FAwxt!HRFH{5#_ zDYJ=VPJ`v;Zee~q&9d>ZvPO_wmGKHnbz!Xwvak{xK zj(lnXZs}2#<%4z=Z?Zq8Zq*rmNj)lg?snvq=V zas$K#e=bWGfrgIgbk zhig1fAH(WWIy-qdPpUe9sTu}>)K12hrCr9BGg$;%G#<(8cw?~nFuu>p#xINKigUNz zMJYT5f5bfmO>jD`o~F7)>(Q9X=fieV`Rtl+&qtMB)EpfBeRH^7U)yzfnR@c3NYLw0 zZAQ_K!oiqyr)V+FnccX~R0;#fE9Hyl&No4}k49s4p~P`DYhaGc{b&e!2_942o+h`% zUMyrXxmmNjq{;sBlzUZ-aXpb0M5oH_a*=(ce|gmr`xaYQt4C$0ckX6>*7-WrR%kpO z4MRFCLuoq?r+2CLqEj2^P7yx2KWmlRK#sAT*+*}D==Lkx+^(DcY2WV&n?0}AJ-*9N za~PB4G1q3MeF?g!S4>o=^$ayZ*?D=~pYl9-9H*~tr%hOLr_Li8Fd=ScZcb*k0PDFC ze{IHG_DA80T%?`HM)uJxl`}xN9;LD|T_J0mzz7^M@J?%Id+h`Jxr*G-JEpKv3_mFi zb(mVqy@P6h^w?oJ-K9Qfel_pzXK$ z=CQx_@vPgJPObj_+>Sf{d^Mj1>de`NDI=L)N&4n(=d^Z6thh4plt9QAHSHP!Hr zs3m@XMYp{YvL%kaYqttV^uovfuw{?66fYNJWBc4y)hZl$30ZB~=SrPL^45LWmG8w2&9B1%(8bD)`7NBh7Ll{ZclugHuG>zq=j-wA zu?0tYv%XnZBWNs!jK?DE%wcywvU)k4W}2}o!;!C6lWTc*;mQrK^i@x*V6-Y@9dDE8 zG}-O8k+G%R+|DYz?k{_sb|C5xfBVOt+J2cOEQZm+x;6Z?J9jS)$41~y;)Q!r!uU4Q zB8_LM>MM`!Dc12R8}%c{*wyM~kv-f~b)6Kak*&a`x+;g~tik(HeNPfC)&0lo$kAz9 zQ2p#}PS#=HYU=UC*l}^Q+KaSRFQ@2e&z}4JK*wll2dqP{(WcSCd?DQ?f1B$*j8rld z?k1F6$r{_qtGA8Q>-~{!6vf)u1|?6ItNLHBUz;H#WjPN*{@8_s=0KA4eH& zyUtZ%y0f|=%jZ|!+m-W4f5`7wMV@TSY$unsHhbK5gJ*^kq^Tk87qDbEOHzmBdVhMt z-c^^g#cF)m*7=e>M)6@^HjigZeRah$a+~uQJr;bKcm23^Hu`h-l%}JT;{@ZY#h;Ep zVyjJiUeB$Ua{|NI*=FmYV3Kkb{y-gc>f7K&A&-3$2n}k+3 zAJ34q7*}ZbxB}E4PTLwE2kH*t$&q$3Sd4cd##P>ps@o_VoWyKt!*5qk4)iXy)$v>< zx#BqBHJ5f_W$MjKslxP9=EiBBU2c40e|_E>=`h_-XOn8NanVZ&iYuyCR`9ry7o`uR>u_gIwX+lkD`sXZ z4X(S(kr*dwr%tb9c#Ndc#SDRS#bsl+7u~-5b4ynjTVu8BXuk1Re0_f{)cxG=xAk;l zl#R1ngmzj_p5sj4>@U^~WO&I!*c!z;IqmS~am)_-u^T;tf6GuE^fI;26V%Ro`4!oV zySKMD3x0Pz=x)2!dY4$Z3-`0SDRlp_*fxV*?2%5h<1)BAGK?Hag zD8PI3;vvI(@40%_{{Ev=Iwhq;Rnw3RMbER=sj$U|IC?$K9s~2g>uP1N&F{!18jxXg+0s3O5f%ehf5IMqr&7CIbx&=jiSTv&VY5r&yZ~zy= zF!#}CvnNx>j4JMmk^aeOZ1v9GXzkC6cD1Y7?=uMxKDt)N%VQatChHI@`34FWyrc z0dQqHGR7HqWLPImw(QF6rtNyF>ZiSp$Z{HEe-C9@WSHyhkU(bW=b6G6l+d!|_%I!E z%?HMj6?}T+b=SM0=@l*BvU9NAl)4IX`kLo(gEYMY&irhoXj=kel`p;|WcLZ;VH6A$H6_#Su8X$rXkn{hD)ye=Ph0 z>kbD5#LJW0F=?W3+HNbSUtu~(PBBi#{*ovn02X*SLGN(2B9sj>(f98C{M=TEu+{F>;)FZf^7JEj4I`=Yt1;dqH59EM@ zOM5HWmBfoFo5+Z*=k4S=VYk?43 zza$Uhs%K@+Ao6DUj>`s9QwOmxy+|2JgH#enqOm0n(rY9+Y-Mg)TW3G4e7BXh?_+007xkosQ^{*QABSG0q8jo0rc-KV#g*M*V$^xCnSQ~?;6SnQqE{?5th&-f5yklr&+lUA1^rc zINWDXf)7iy;CV;~A(y7ng3x0A0?BY|xKH+u)GFlU?!Eo_DXVgJE5J?{xWpNUOlo(h z09PYZ7M$A;&OFY<>}%~FcqsBFT(=$hF?##u?R!m50Zn*)6l9Cei&2#Q9AYG!bCQeY zH=?XVx(GlUzdCEYe_6JL^=a!tev*^K-Y-jV@zx4q>_z9hDSA6CI$${fE#NGiXMxI{ zaobLfPaNVpsbUO9B`4qv@x`h^&qu;IHCu*5$i{ASifRK9>0KKESovIz8-*h%7eV(mbsq~-h^(fR@_l+WBNtO617B4S2edRC7zWp2qNe<8cQBy7$dhv?Ug8t2af zrtQJu-~pEj0e3>~X^eXhn%ki?T}@PFPE;9OH6>xhV=lf1c|_#s`XO` z6!&y0;B|7h46r(+c=>mxQ30G@aLnQquil7nEgjUrujmuhHXkCOejv4>Ee@vJ_<-bQ z7=l>c18C4GfA#+H^h!+K5hEW`u91&Z!wHHwAAD47>xnA3A0+yt(74lL3k;oYD`+PU z&=~SzAfc{6>z|A-Kfk93DSPa&$LH(!+tAY5t}*SG zMJ)FQf9fBv`(o#pQ+BzEvD~pHC~^k6b~3ViFdU>h6BJ~B+mtyC^VB!gs93-KKD8sN zAepK(T1V20D@Zv2keujJG*s!#lFPs`H;SU35y(W-Tf@)fotiY0d=-ojfyE-hn#caC zTCIMVdOoTu1rP81C|w58zDIf8F9|#=NSY(CouYY6`f*NU~WvQ7oSX zviSnmPEyek(zU=7>ZdsF#f_6f-$o|M<>#lIfS0?#&|uN??$*xbggQE5bo4MLbKiUD zu^!^L%Lj244yjO0yD3lx4o4u04*ribbQ1ws?hVms^T|ONz;DI0mH~GGUiZv%)!M{b ze@+oN%Rda5>egC0o!%km6*bWo%a ztAofLK2Jj+kkE(j8Q2g8Le@DhzhTnT+he`*YVyo0vjnSy>CdXl>E*wIabKxmOR|lK zT%{Ds=h5ZJpA=evNkh(a8=wAAYX2M>`4+cHL*>$;Mf%^5I()*PURor+DP4Q#_bI8#LEV zu>(fwU;R>Dz$OEV*xmDDb%xG~I5<+)W?0QEJu*tr?3(eHHeSu!4NL38e-{a2K{W1V z0bLbDL<(jP*UI`|dcT?Nt}{9luW02P^VsZ<9-DzoXQtZ_MG_g5Wtt0io4&lZRxX(uy!4y_IwhY)$M@yQ z-Z)uL35g6nJ#-HB>?CS=e+x8zQ*FQ!-1fB~lSbUF*TD7Nx$xe+ZT6e>(2PKYp|Ye2 z+?mpuqXa*crz9=)`MD3uRT~zvCuB4vgSw3en(Tj}o0Z_n+t#`OP!5!^Dp=D8-Y$MC zn1=>^UuMi>B_}=?ewaw1sxqX9al$jtD=8zCfNbLhqFz^VinW4re=@5>Qjnu+4wz)( zZzR1R6Nq}8vC#*`Tf&Q z`jrW6oW4_#nf+#82rLOv!rIQTw5cN~p~3L2B=MxpK>$pbh6gi;ngPXPFVDL7+?$9Z zHVq$BHiJZWNL@{Uf2Sve;E(mVr-{rY6tqoST!5W;41*5o=Hu=z&bV)~nQ6v&@nQ(C zvDQ`$fEu_+jqhMAK6oB9lO@;=$2-2W@Hu`OV-GkZm|Y|58sA$d8wEExQmjQ?`A~3J zTp2)?8e2gpO1j_no)sh`hpqu%u=X?9+p~w5AzU8ch7YIcf6YfGwXiA+Juzcg18&ix zoHErd`mnwb`!%0x zK|hwgSwy}qaNPI##l(*R%aGi-$M*+yg{e}~S|T8U;Y?!_n%E)?*QSoM^i z1m*s37a!^KdkP)LXyf9~V*(b)hR|j;XFXR$%~fc3Bd3kZEm7}WMm7q>v&Pwl z_iYgQ{akb=tAw zNuF{lCn*%a8Q^9EG$b%aqt~J+lT5Z%uj*LhF(A0^YONZTIXIOu#sYhS!#+S2e^lZk z_wT7vrbCOeoV`RiUnWcI3_%~;l4F*{fz6JMJlZH_aW;Bg-)vgvWQRq<79ysdB)vJG3ZrE4;lvPz9Sf^3@@9w9Qb9NtYrcE&_JlD?h!M zCC);UQr2%(?*UR|ZZv@&J27HLe=ucOxN{=-zz;wb*k`$-?KDe%7f4>J>px2vq56jp zYgATFN=Miq?s9qMJ)+*raOwz$hS2 zLSu|}8#~Xh;<_*o8SpwIkva_T0H`u7Rjsu(TZ!DKU0491f#oK>JOhd39ePO^ijUG? z&mZez571U9i;Vsqz2nj>{n;*4`~XvnTu>{g^?c*VDu*z?w`dr3&#vsXTv{+*w)4}@ zZh7*FOnN_}s0D3@=*awDe}cmxm`(X&;FHKd&n%Fzs984Gws@L6GPJzs+J!j1fEdQG zQ#cElv?5I6yoeTx=^C$)cJ%~MOfBFyUmG@_$ltJVw0*!(T2vfcXkJpOahm={I!<1= zrS18AxKdq3=~M~3RBZFjN5H5!*)}^>a##6Hn&e>EIJ$fZ{zN?Oe=Tft{O9;kuw-lU z8%OiAr=U$JV3bZj1Pd5)(QU)H2xjDO0P)kVqLjDS0gHio+v&ZdKOS)BLvR3igIRox zCK8Z=oZx?D>dT~pmeIM3ncd)#|Kg^Nz_+qZ! znvfcf0!-jQGVb%@GYTY^TU$~ zu-*`wg06;ph?*=4ChwbNTPFJ}n&V~JEJlh`%}zA|G2FXEs{KJ_nkb!06c7`N~0?SXVDdCtV-k zm+iu80rfU6lRyZKkVU^CL_^uabg%UworS@zj4Fdql*_v%3m1HRVt#Zp90)Q~EXFsn z58`x)Lh3}mn3TcY;Z(85QVS(PK4htj4wePOTHX#df6O-K{mZn+vb6r~Mqd;!p~@pcio| z4)7-$umF#%%|*}AdzU=|VI*)jzO*MnEduHd7<(BUadiPVnk8~nh`pB7pOadr{gvF^ z?xkaQe}qj$f|cJ$#%+Er2kpN;(Z@M$6uP6Gy2yyItvH+~in7VxK;Z~b!TcZ;*;9fskR|l_Jw` zaMYcx{Ke-(sP3bZv4{SN3Q;v@Ef?=V$^uKnxv?blwY{d;H@HaFR;jyYShjfgphCoz za(?y4CVxlLFBt)HTgg`k%{84u3WjtFjEi(aF^83l6_TKg@2M!VylKJ1RiTGFOlUPe z*w)2Ki2Zd-%Td&-k5BsTJuhI!Y(s(6hTBOGee#5zk$9dQ&y-3wI)7)iWX`5f^6Jhi z`&eZwjcTb)e&H@5L?*UYDA_s4Ww&^s>1@t}WvS;S6TcH%BjSk#u?- z(Vz~KBO`Zpq6VaOnnJ0LYr&F^5%lpe(MjPMe`F#3@fHf%91@9(^rP!bPMKPwlv&KY8Nm5nvMq*#nTZh}|N=D^hD9ZuKR@bi48 zwAY*OQ0*Taw^3Qef28`%Aj)FUZp1u}hQA766~J8Zw^>3KNv(!<)EhOcsBAOQ8*Q z)!h86bEZ)=sg6Uw!C2wfC+rX6xo4Nf8-J*NeBTJ3pu!|xQ z*Whey&}s!)M_DJ+mxsr;xIjr2Ew@NO(}elEL_NGDKU!q9NOX>>HB$pglN+AV)AHt+ z)XduH!6JgWB97rQO6e9{HCcDu!vFf8GH~Sov4Q`s{@*!T=l|qrdHeqF9Ie9q4=nBf zCqZlbFZBn=UH<8{#rSWn{yPIsjeq~qhcn#x5BHY*)lt0R`^9mLCeUALBm9?PZvPRA z83bZFc+92Z56ip$>A~6U&peJ9aZ8dV@?Sb}{y(f>?!WN5e;p>sgD3SrEDghOA;K`s z7d8B5>3<4wv1b@&`e!2N|N3K)7%(|R`llCX7_O)O3n$Dl+)rWZ-;qi%RDb@_IoQ9Z zh3T)A$`T%a#P8PCG&QSC3{e1sQ>GeO5bY%E0f&S^o|H}WT>F)26R?P0c=B2p?WgXNA+#<(G z1iCW}f;uJrG8|7#9;!m}5P#1=t|uP;nVi;xlx1#_KWaC~8|95Zr3`4FF_k85MQw;b z%2l(>nST;A#iCbjMv~#Yl9JF;ro`PM#KTtgdZ?Ge{?bK04wPb?BVhh9s>jvZsw!StZiQ=$^!`|6uE{KQyyDG_1j9v5md^s3-1lL9*_;QzhfaDRlUa+<} z2H;f|1WICBn~0hu`+v!}vRnJ{>#ZMl4Zoc;1V)c*;r8^o+CG{AXKt#qfm$P(vSMy8 zbR`!AdC823#OK1+FBD^p?44u z?6dYN4s2lm4%dB$xEV|4Juh!#sPkhovP#k*{Aw=VXk-`3j6Ts^`5P?S-v-y`g_55V zjfX-&tid$kl7GGfQ|YIZlU&a@taym)5^9g4Wpy8b@u-imXZSgOMF@A@se)jK9W_0; zc1fm8GJkza0hDZE1Ii`NO2xz&#^sL*Z{}jvt}gU~=BjAR>as}vs9%!)CaAU4J9GfF zkvL*5!>#mZN@D3ypr99rItn0f!9C|`hRL@a3K{^)DTug_6N^e)_PM7 zIN`;{81Z9hslF~}JJ{;&QAkZeRj-s;sOrZax2PJ9hPZ3p&nYWJuKwh&=2d*FQ!5v8 z%9|iX!ghM{7uxQ~{~j+3tsH0BkyAMe;bSX36dw&^$B-(Ja}RYKkI1={Vx;ek-$4iq z*HQp~!+*u_r!=Esy-83^#^^K+wnOP4IH>lqXCVC1H5h+IgX_tK;KCqU#QBVRfC$h7 zxHJ@zUcmeI%|0zlwpP6{yX~-|0txV$_`d*PV(~%(f}Xdq9RNLk8i58WGBHF@G0{Ua zm7U%EJoP!}Vqox^?bqsmGYf?L@X0e&p|EX>XAj!UgE(pWt0Kf4}W}vSh^67 zw~GQs^EnFTYV(2o2GHJdMp6<~>&Q=R+f~Rm_23A$;?R*T50@Gd%09%^L4-wY<$SHz zlYcYRV8(a(SW(M3dr7!<}a|Jw;`xM(+-yX)7S<$rx1JuU;zB}fWS?Z^2*WTf3i+g3Bn}1e6 z`FfP{5We7k)E`V1doHPh=3QK7mOGzzcsBA!GQKsToaIPhha;D*LnY1vIv0l$m1FmP zUwz-c3oNdazp+W|Hya^-X*q7VUK7Ja`N*K-F!yfERVtudjkw8k?9dk-&4PO-;FHVq zXRZyB&0eoP6?a6N2AM<2WN)94_o;wNJ1O+GXcJt*}%?P>A%yFSbg9Z;C8wn*Mk zB*@PgUfhF6r@Y0k9;k);a1?>gM0hn4uW;I%o{rL(|_JwMlT(r1Z8Y2F%ILtJY^AZbyl^7CYA6&Ra> zI$wYS%tbk+edH)9A!f1BoB)p^op^v{1d>qxGpetonjEX6GJSwCPd4E#KEkEjBb17Uqvv0S2 zqVaY^D0KFW-3pG8@}QTI7(evXb?-$y0&h(molG+5F5dr2o%zIAP+NxK2M)s$9Oc#8ju%*vM#5e2}jq8s8(stlh4 z3Ty~Tc!R{eEVc<5%(hJ!n_!4~?r+K(wtK#>l74gquSV|kVHhKu z@_6cD;-hw{Fqds>K*vYBPQVo*UgkH~#FE=!jtsd^9Z(l=$gxE#d9ZJ>&2i$!*uX2; zcmP|)Vbs6B_e@O0^@s4?JHb~MzHoZc9R#RdA)3WS6n_`zA$s-dhz1TpQC|)|@nI3w zxX0CWs&Dr>-Q?`>rwt<_cMFx`H?X1etVa()Z|L$PRf((-sDmnf#F0 zOWc#JuaeY$(7KQg!0wT_{RSLMPJCyx0Zxn?RZDtvfQAN?N-S8YUQQ=ilAkzmPqk|B z^OJi?Wq*;q|0Mzs^Z}>sa*X@j%N!^V$7f#*DKLP#pp^(uhX9aEFrS%V1p-AL1#e=b z3j~0oTkU{1>gCkUktgQ2E74J(`N)o`8nGjlLB&8)>%G z8#zZ>Oe#JFRJ&w?sqBmF!Z@&o;M(rHJm{@xz<)Kyw?1W6hiHch0cbOL_0y^2Y*z=x zD-%HOHpx7Hd?WS8+_;wATXDBo`536Gv#~>9EjIWA>1oFG3B#AGkTOHQ?s0XQE0P_Z zG0BZ$_g*4n2iXKJ$yc1J49^T1Br0DVyiPrb1n{arn~efENI&@LCr7F!D?iZV>Nl}T z6MuxcFvpH@f_J`8 ziYpVpbv(|{8mza;hJ%)hdXZGS?^>5@6@Lf|fZNJ@DhicK1pXTT_@S`{S#0nFX1Z$Q zsVU+^Aj|o{Desw*MS&@#k8u7QcuS9*J$C#}SX%;9JevJF>!rb9g01{%G1%)$RAKAmDPR;p=%Q4-N_0z9Z3pnvlz zKX>#<(TDn;@j{R+FCwu9ZPxrS#S;lWR-d-b4a()&b<+j9H*~2LUeWOv3GEcJpUlInnAbV}vj`WL~z= z=_!_>?Aw3h>P_1HmBF1;bV{u5)X#aft5J)Fi8)FVeF%a{cD~Ralh0RWjBxvIy9=RiDq>+cobu1@_?+(3(xm=aW{RqWskx2UiOn$C3o-| zZk3h1cbpaQMadNs5WsjqErc+EvGf2&-Uf8b9}`1Q6Am6%=N%&QvJnW+1~^3{mz+O0 zBfOKo-GYLHBKQV8LHREW`+q(zPcR&-hMjIdCK;{om|?Wtx6$sLVPS6VP0>+V@6Wsk zbV~a!gTS{EmRFh~u((l@TK<%qp^quGVU&0b7|BVoKBfzSEi7$bWvIM%xvS*Q@)VYg zBy4&?;=eD0M546HH_sO6@ce zdd6%HOG0Clhe~6RJ=9#C-R}m}WVpU_z2s#gl$3Q8rnR#Q4x4 zM?{POpMV_Mdu~Q1o1x2hy{&wpC|T^kdZXdImEA!X#b+CZ=BDnw^ARVZ@w0kfl&yf8 zJ$OKd=OoJw=zl%wn?j5Ltz(NaNJv^Y_G^;a@(h@6RUah9I#zH`IA2!AI}6H=lS>4H zf#P3nrSp?eLr+VXTl!t(tz;YK=KgyQ`S{^81!Jq|ZiBX~1l2tDG+Ys2%~fcwr|fmP zR}MKA?UMw%9AgFOsW$aYJ_1nz*THP-{H9P;A4nB0DSwEqX_^>|c-d)=2;yFLwP$-^ zMwXD{h@rna2ZG=qBm+O99jFPv@1c5F(Hxq^jEsPdxb@#{!u zrqY8Ag=g7N3r;cqYwpksml4h+kb2wmCngpQxO=8`O%xvo#DAnk-iw2e?oj!Ch}6aJ zR4Vb9L4QTKwzZQ-ua<@&4fA4;o5aTcM9qs(;ur^=S6rkejq_Sn|NHj51OtV2W0} zYcKpdxJLg4A{p@M?WBb0|BOm~5rPh+x5s2YuPzBqtPZDRVx=h?&W?Dgj%tyYZPs98 zU!Y-@0LQkM_a}m-u}0(bS-ED8-p!vgI&8O;j)N#28ZuPNfr#FK-lQ|xIufO zw3!#zoPuV7@kFDF!|J;0!3;SwP2ueBDZ%1TytnGR4o!C5AsC;D+0h)I+^D*>c6nfb zrm%ZJgm1(KxY*YDiZvXs7eel?`gH`;IuTuHhKEp zFQH@n;O9oL4jlj8Fzt$G8{n`QcXmsg-kEe6a-cloC~C8&;>u0EcWH2W=NpXA(3v?5 z)TSGXZnrWGFK6Up(8269IQVhNDw-iN5{YsH(R5`>BdRSg~>oxcwVhz*jpj(<}~ z-xxv=2TFazeBI_Zh8TX?6wFo1q4Wi=lv;63h(hBr8Ycd|AWsvK4Zgi41S-cF)m;KC zdxu(38B9Vy)<;KfsMWy(8G=Jh2)U0_UdEEC&yu}Ny+S}uA0YB3UU8A9N39K`I zmKUo}U*chW)7yj{W@Vnv>%DBJrDSeC`*{NGXOpD3h18g9qAxOoi_O^sg7tSUlOEG& z$x4a#16!+jxPgzhfs&6z2M)Jv#9GtAbiXxhiKBOnw~ns<6uFD$Sh3KOUVlua&`+m^ z-1V)apg`w}Jjbsz<(;G{xJAa^eRs`r9=~WeUsPfs~H6{A9+$FX5PP zD$Z{KvoXS>A60r~wga0A)_>Z39^WOQfKmcHu{WJNQ<);Q!>e%%?De?vc0X`0l(xt? zYMp*3(YZO$02J}#eEI2;jk4(0S7=E!f>Yyg0QVt=8O7xT{#pl=KS zETrz{%#mXPn2h2P4VA}BR(3&it&?~6-YCY?;y1j09Q4&&-lF?Pl%9=?iG#IkTkTcY zW!I^Lw>?c6312(D9}>DOYv~(6EaK-mi~#^$y`&^l8OJ{>HVaTjRR2iz=|0}%LV0wQ zz9e`q_gw9+88vk927efjo5wQBe3#I;rnAU22=Vh{Bo)wrKk;|Q+jqu0LOB<#AtheZ zvQQm9EznMnOFDwOEtWaigr56>TwcYeZiaTsDu`tUc^a zWMmgFq}mfrtb{tDi0TlsQ%61CrPKWR5=2cog(;Ukb=31zTz^3m879JOG)8rU7$`8Q zoxo!>4D3OEV}+mUdv^}loxxF|0OQl>aC|`NSR_)3%ugID>wrqMY7#kvqUx6W17yMn z_PpgtxAW|;I55XZq-7n|dm#d@2eFs)zL1=`aT!#63vwDVe(ne;`AI@lZ{IaIba;~7 zj$}@hlpy>xt$!gaDO~$whx#J}RCo2KgP2J%oBJC2`PP<^VVU8X&Y@<7u)loIu-5v{-#tj6c^<_2XvoVNMNig3b6*dk+p&xb2dT5V7fjjm! zakc6`7f5I~#TB30h^oqvHEFGlal&>M_WYK10gmwlH{x&i;%EVBlQYeK~KF{5zo zjejcqp3g^_q;5$}MxU*kq-xBJ^%k4r6`()yVn-W4wLs(;+ z9fDV0pQ+;Hofz+VvA3;P^cfvHMEBtIz8wuFXvw->2|hcWP-Z>5duyJ!C9~^$e0`I< zKLRzld9pmSAts+**qLKPj6Bm+f$1z;46U-CcWH^}tsiuX4}}qQL5E^2m1$f3%#w3k zp|mpfCR1mt-1&Tm8K)2WOE(%6e1BBn6IOm3SE#-(_MVT;^VeAQ=61 z@`o|HUOVp%Ugh0#@QD!0i~g9#yFbwaw%mi}+)4|1Q5_1){>Bd;)_g-{)13RdaEKW=vmYmsie0(fYHUe23tT zaKhJ7Dyn&+-oCV4w5z^_)TnotZOxn5Q>WNSACA?>{PPwkQ+|TNwDEON`OG!hh%OBWCXgl13#N-Y49$y6yP%LSn}vIyJ313=- zRaMU(o~Sn2>9L9T({_`}u4m>?z-if-fzPvv3^^sJY200_ZH0KbCyyG1&~WHcCfS0M z5>d&pJf1H4^IGmM@4U_dop`Q=F9SOpqH3~I`jI{$@jlzlyX-IWIx%G^ySq5^G%fW( zf9-E?#68#Ad_HwOXMZg$aRJt~&c$H+t7b!YAHemoz~?pcPsSCnIcAhk7I={WfcD67O8?K_2Y(V z`~cmGZDyB?XH0tC-@}y1A1sL9eI81$w9hVaZ&#ujwSQ3hq!_Mz;O8aKinC2U6JD;X z(!@X{UE18C{<;|SOye%6qd%$IP*CuM*1dUL#j7%%J-lDwJ{+J*ew$XgnA zN#+rh6_i(#>x^edBozS)t30=?xAgPFL7~>&9)ASRy%G<94dXd6)Cbzm)m424>5bXq zB@_G4a2c|*>y62AQ}j#IZPOg8=fk`w9OB8^)c6hJZTv1%nLm|rZnNWRJQ?nzKM3&QnVcn4vX3Ky(@1+7*3Ax$%@Hpp=dSlJ8^Bb z^nYq30T=2)$&P!Fo~CP z>7tmy9#>=}B0M+W_%n$PM1B@*)XN%edw+^Y)i0_qEja)pU0u|~ksoFzvM=#9ghHA) zh|=9udl9TeZ)>Xou158kUJ$>8VxNw;;q@WTwfLwkKNPPQr|87cOXsz75R5};6in0uBOZYoyQ{u^J3vF3c+aiYd+IjGbE#j^Y(ldJYxoOvrtABCn z-SW-l%N1U{Ck3oq#qk7#0q%{7TT?JxX}YnSQzCU{K1?ClXX+z0B6b5*{I<-&UZdWc ztk5d&vwHvFdPu}s4nHS%Gj(z0{X0^(+=iEP5Ze#b%$QzLzB9c-%URV8ECu0R4elX7 z1}_pa+wW*Wy$$$kioX9&a0O$qWq&=Z&vUQ&*Ed;2GP{;ujqouGo;|}lMMZv?nPMt&11+i+LRT4{NhhYF>k5Uq9kxV_`l5R&IqN2-*+3&v$e z#PxG62Gga6FhdV=n!uB*8iF>!o$()9nnN~7&gQVty&uq2x z$Bu$*4x3LfW1g(wx9&AIVplnao$!-vekz`9qUglo%zbkFn@nBO_(2hG(G3#6GHBTO zr=+m`_bq0m)}AjEWG_1rcmbadUFdua8L1~;y0|+|TdyG0_1< ztL^wq$j^S9fFwripvcchWv-8>u3)`h7o~ppuh|fwi8%437|Li_aNJFpS5YU%Q~`An zwr6m$qF{MN^M5uYA2+AR{Z!mPTtB>@v3cs{CZA^#c)4|sM`URD6qW{4Ve!k*p0j4r z+-X>y2B;%cDzuq znADlO(zoV?ry&X^lnt+|D_g0UB%jws-NQAPUpb-NjDOYa;tPeh=dpP28BSHvb0UIt zWMO+c6M2Ts@AuVHHOOezy2jf4aTH3%;`IbVy)n0TEu82^y2LHbV`n+VTOUJVXK!1> z!}};E4D*R5LuyF-Q=cy3<8BXG72?Ug2d@`<97xF@ec2~S*3v8E!6vf{ zfwVl#j(-Q`X8JEoP|rbqlXuvi;7yj{P}XF4YU(S&t^S<#8hL40LHFJ-Z|RqPwbbi1 zd#$a0>+k}fLW{Z%*iFXC&BpmpZ5r>IlEc*Pu=BnN}@NIor1jFajt!fcVvzG)tu6)6kGDl zF(@-$G3Kc!xSr7Il4IsuItk%Tio@=5o$S}MDoJsTX3CxshB?j7zniTt_^9&T+qwjK zcYnDTZEI<*@qUwGRcIXXZqxB1*xY)$eoUYjC-VM8Ug@Xv=QBKgT9DddP^OM`7iV`b zqtuj26g<7gHRa`8??%eiL8VBbo!@Y{UV*bsrr&!>haVL^TIw zUXbB3oP#Z#(BspxZG>29!M9Hzoy!f6%zwM)2h*by+!vj;eO!_gvEsTEIQleR+I3R% zPqpYhY$lFi)wsbp>b&fc^0px6JXp$vSCzr}$J#0Ha0)ITco&+doM(1^Gc$L-yt(tE zQ@DDFcynv7Im7UtL2h^CJlc8Cc_!3$UjdOhm)55auGM9@Y<(8CLrT|BdcXVRMSmHg zy+;q^o&Cux&HhdMdS_RWOt_HQd@ewz)S8l^Wt~gJowucv9_+BrAO z5=FH;8}J@Z75%WDedoBE(2=o2=+B&3Lv4^6Eab!sS+wuSUY7h6-C*3@9$8!&RJAXs zBC$#7aT2%y5*HLZWVmt9-JFu?(tmH}iKgY9I4j-dbv0N1GU*~sJ{s>a$?J9D+SPlQ zjB$?Mdr-Wv=hdB)=(?M?H_xt$a9w=weStG=+a0&A#%kp?pY!xZ@jUL%GfKLV!>MJS zb)@3V^`?BXp%|V{oZ^!ayfJiZ<*U$$YEYfY&%zntivQe6dmM2QXi$HzjeiAykJgBQ zHNNL!0Hy!uxQ`6@5;(_C^SD;hK;VXW7q}zLfM|e}hf661prBX@C^8rp_El;%SR*b( z1_N#scS0zKJjKV0Gea~AEeglu;@n}$LP(X;ZK(jOvpc~{!T@3@=TmMK8lvyT?! zdggPLp9<0JZ+4^!Lli7_h6KV3eLE+G3m%@UG7R{uGeil#5@A)VMfY&h(-KQwb;;_xi@dc+@A|FOU1q8|?z9sz6QejekJ10XGbJ$@3Yu z<2%OO_194>_T+u^2=aMzpWuNv?8h6KD;#)8CT%iAs-j z$}JR;D`e8to;m2Bfsd7qh~DgQ#!pL$NBqlZ9=ecgv*fwfk&$I)7<5WK|gvy}oZJ-M{HlAD%Yt>}7jDD@B4anzRi7!0pP)7mRKF zxO_l6tJ0IF?Zo7<^%k0BU4B&Df9BhH;W9^jKETC$!mGV8(U~87WCueBF~KO4RrG&VTM?gEvrgCLCt3AE3^| zn1<|p#iP>)>lG7~kRp4dj73-$pXa^VMD=o?Facxkv$25s$+89DpomHWNKF!vu)gaK zUQwc7M&g6T%U!o@km!-b$%*cQ6)ZXr0K#?7qy^Eq(s9JHCFh0$h$J|M(x5LYw&BtE zs0)2qok--yhJVz>>*prsfE;aEggk=gTqz^~kRq#1vF(;ckQMIhYQD9%xTrnEDX=F_ z1w~=$H_^nTuvMYVqgK)*8ty%-*z()4&1&V{__;^fTg%k$!}FDfqBomDdp4)Kix*RM zTzF!bTyZ&uz)#*jVN|G9rDm)p4{`<-@8j5-)FhLThJVHDrE?YeeEWqjs0rw^W-_8e z#Ti^%Z-_kK!`r5kty1JCW}B=`nGTbz*47U%AJq%I-2!`bBYw zhZ$EPZhu{~iExDHO`K)4=ZbK*(@wJzdVhSp6Id>CVD^tDDqgN~xofy9j8cAG17nUS zdwNR=+vVJ=8}Ws#15LA*(2MEfEg1%<}Eqx7Q}Q$e0;>15WcO=0z0E?MJYmvWRt z0YB~wR+=;7AT_H`Sz#?l(eL4;t^4f}O$`1-y2m$<$a%ND@?>)clGMT{VnU`Jz27f| zgMZI^SG5F$oU8Q?8K}8vOysiRJ6U$L=$DW(o^N+g+`{aAs%P&@DZFwVy_2(!rHMCz z$j4=R$?v?=*r;DpI)woF+0AL7I+h#-jYUIyq~xpoTnTT81bcLe^V*EbzzRh1V8xlx z_28lwqkJz{S7q}@K1ZR#@%);Zo2m!(yMJk%UD(*LIw@=oc|R98=jpZV-KJvU{M6^i z4H1(((}xPUhs-Dh0FW%*i}CQV+YyfB_0)(fT%qLS;Lp6KK_EAg_) zi@535kMn!KIEl#c?#{QKb=fj;D<gy#RA4-SJ9E&+wi z=XH^)XloUTeoK|ibi3M;;-wRaC`(t*Os zb(#zQyvNJ4cw9WpziJb8Ia_rU8!vyGg`4*&KQ!sT?TntcQGZ3@3Nl=~w)ZN!^^&wb z#HI)&WxMT_XPh1FTm(=1X@3M2@Nlo~G3~im@9mlB3rCmMi$!UFH+6ONqoIPH-%%yA zsCoDt(z#zUNsvh@U(q7ZYv}e=-<}iE9U`y4S^)-Hg?=Q==X6~~`K__4mCB>6yCX)ii7NrdnFxmS^=*H;>!t`eaeEPO$3EW(n+>kS z^@{E%HmmY=qT{E7i+><6Fi~WtyFO&B^hj8Hhoeu@65%mXo}Jzvk>U&U9OWtG;HRFd zYE}gEnkI!F$%Gob2?L0Gr}FECb5HTE_A;{s+8F2Xf>`f^TFJ`kJ@@sl9^b3G1$c5j zdAXqnrpg+m^Wg7(9rkP4UvoDOWSD-Wtz*U)S*7#*)&}lnEPu|s9+gLaqHd)~T;3FT zUoQ#$!P8SkQL8HYm~(ob*}~R7Js((CIW+=4VqPz5lKO_e(+ttt(Ml zAt<%?&FHVjdr#k=M@@3RI;g!%p3lp0D=l?QN`#(y9k4>Fun!rix(Wgv52ME_HV}d> zek_9Rajb|xlYiKc+J|7@g@?Y4>z>O>|J3(AH)G*l>GgHn(M@c5jnmCpRM8BcZkpE( zO4=j?S1zy0dm`koq;>E$oph3%R?X%VvJUsjmSW~P=<9jZlZ)lP*{TUOW2=p!#?+`J zuZ>@kxh?fdPgvs~w*=qZ2XipE`GqvQxk&IcW*x)rDt|$klCW|dc`f1Xo)~qiy-BM* zxn~>$!WDZ~`#SS3V}iK&uyyL2+egU7VPfFUoT|%8U*mP9QF%Iz;C}ZK$@%o_uK0m) zKVzr-U0Ll;iQW|_-l>A$dW^`-^c?%=+i`;a=qXSbBeA)>S+)RG1%ED^`Z>C2ebrIo zwvwC+QGdc-n+m`iTt{2nX_eYjd7y>#8JmW8#%9T;u=Cqb>Ug)%CbGCs2XoQma+Qg% z9>(^SUihh1oO0sm!lwjVW@KRSIg{j9)%qlPonL1Yk9Chtrh_flt{v-8w%EDl~)sD0pETjSN z0)r`k>XS(1%9yR{NQ}$anZvC(;G!Qs`ty(g7k5A8=J z=*i&DPm9m@rdtz+E|_b3vo(5^ua65qx_=#U-h#FhsXIf9^P|M=cXJ){H)CNim4f&| zO_IuAlm0%Z**x9$IKIB`w=L|}_s(4RC8Mv}o}S<7^xT!kDn$d+o@RwY>9!Q|3{RAk zHv@Cc&X=l^ww@sDyQ`dJ{weoS*gwylvBe$f`sw)|sCn7UvwBf;-hF?@>C=Hj=YQ~w z3o|C%?RKW@b$nq=uYU5l7TYwsD{uAl35ruC6U5+N*i<|vtwkn}4HdLt`}FBLCnx2E zuDJ9#6DP{r-zk~Afp^j7`yYRiL5X1o$45HJ1DfY_)<;58_jkTlWdLM$KUY|K)U(Ft zO>IozP?Z1a+fH`Iq1_a32q}TJqkpHb3PB&rzIxnU{uF{C9Q9typZJwwAI>c+KKBV1 zcQ|euINk`?`yD>E3B~g%)q@*s$@oEtd;B8PxFIeV1};=?hG|VG-7ajG{2itZ@i^ZC zW0Q8Gth;gRqIVZ5kHoO!;e50U9*dwlGr3ezDJP#Kn-(g{;ue4SP~sdcEr0ga10)Pv zfc4+KR{p2e%Io#F(+U9mApn@)_8sXe;J8K=M?VSDFpy?R*j$)$+35CHb$D9HdF4{%5W`F%Ri0C4w1i7+|ww^PbjtAKj({cAO(QrGk5OufG& zSODORA`$&67Oe)oN&q-2HwZzN3KXqJ&_CWBkYSfPKnDX3vFP;NLR<{Oj93 zMwX7wC-B?p1OO;7$M@CjX!de+ngJV-{iF2?TmaBle&?NR=N*0PdVjR(rR-zHyn&Pl zpo9yC0dNEA20#Y*1;{T%tFV+=y`D*fi#DM`qFm`5T=d>>5vlKTAV`OZf?9+3p8N)Q zX~hGfLb$>rTmdqJjW`QJx)<0e6}RXZmkLq6s zvSZ$7qxp`{<4Rc)swgxX4_9iv=C^2oW#kJm+VxT=4HxzDCVz{9jo`HK7c;D%0Jj6! zy;vPajaX8u=mx5X$6H96j@PxB!7G#AmUo-e7mtX6#sf-)BAYsGUUbnb3{WrDSq}nz z1`F82 zb|n`ZfXnT-dw&n`fQSQyW?u_X0Z4Xu1jWy~;NN-MNP=V;fNHcA;N3y$LJ^KgMrHXv z?2T9v!y_0hS`8oqaMD$P_)T3rU#b7tFFFf|;sqOpLMfyjyZFND2j14+q*dkxN1eGU z)FSF6MnX&pmF~E{e-7L}O4R5(&g9Mrz>C=P0=#3KEPvp?<23BZ7C!35 zZ1x8Ikc-gvLKj?l)(hFSy@Gl66zmeUWalRpK)f?QqgPhef&BT#DByttBna%0{gfa- zp2ICa_K^4J49jddd_vi?K-kfsFOg(4SqRH45SFZ0)`ZqU3oS@RrZYZ_YN@rQUDy)g zBD@IFmVaR~^v43a>2@z%A`KUarGLavv5sef(`zX(t-`H?$1Scx2_EF1%5 z`iOUNUUdby@+9ohVYA`K=k-lkqd>5c2$!6+!G9{UENu%5Z7U790NPJAyR^JzbcL02 z1)pvd-{8Q3_*?`oi@^fBJ>pQD7x`$DH7O2%mXW?;2)W=h4Eon^V1;2kpb-lX{DIRU z4f311|Kd&^;q|^e)n<8?&Pc2ENFVl4@AM7Sh$RpZzy4avUwa4(KPU@-1{E%04HzJh z41ccFEDl?5tSxi*Z#ya(sCSG2l?R!_p0EY1=GolgQjI(T`)A(rCV%lxSfV4A53D$W zBj!u~5}kWG74lbGQ*SMRqU!1a93se6-PH@sYvB>kv7*DEF}R}2-YR7H`**(J=)A=$ zKzaZ;9`n^f$#3ea4+@HW!A85@M7yzim4D&to6zW+(Rg1>QAnNz4nP|6JScund?kkw zEHxhBnX*Wnv=m&fp#UKMsRP|$XF=0APdMzQecNLpcwNB#>(AxapDWqP&VO(WZrKU^ zVjNr|5iXf1*as$NA*hCvP@1r+@m}>yM|1s9-d81B6<$~)7A%+bTvBm5D89U-hkpde zOmd?Q2{=Nw=YtXnY>AHfx7bdz>>`bT^^W=9;W7Ws3xz831rRlIot=N@jVoK!7_LnI zGsd6%VCSRz|*GD%)h zr?30~%M31kDPU6x!%kP(cAk0CJdn8~(1zf7|BA9yTaZflZ~sHl3KeBjX4?pMU$KZR0|j z@V;W~C% z1}Z&at*Zi1=F4u@TFoW3_kWhnkQ}NU|AlAFLUIR+5(p(|j`fr61+mBaZQdU|I~z@QVEZv9 zQQke$*rT)Z#r}VJjDINlU8iQeoOHtr1_Q$o@`Vk{(G=U4Zf$FBH^yj%s}wb3Xz!I? z(XTnNX~pyyLWbqdr9dLK)Ya-hlcK?jK|2 z;>u#jr^P;Gsh%gP!Y6f+iX#`O&3@XCe6Y>5T7H`xk^n_Oy1&?#0sMcv{uvMtereD4 z|Ihz__`>kRHxEC~f^TeGj=X>@q4-KC-HK{9=tJ`e2wJpc4!G`U| zD)uTGTSeI}^-wtE6@P6i=9(G(TwuQedozj+#Y`5N#U!*%$ zfqiCY08U4KQGR$?2RDk9H(65tKjV=~`55!?6BFN9A&tMeC)Xz!!M=1^jZNSNy-!P}34aLHhkjK@8~sHw5r{kY_Ii2SxsRLazsk~dvXUz2J6>z4tY;fLm8tC9p+L|>@mYAFgCHRuoU5nK~@5@tu8;kS%xJ0pA484CK=6a8O1?9q=8)=?vWX|jaD(u|O5M8-)!3{nxGOkNSX9a&DP6eFp-6eaecCm`EhO}eq z(Hbp*2OGUYRK(Hd2>>mn%r7TM+8=>0H<8B&l@)x{F0Cb^hg8xJg#(`Dw}xWth+6@H zAXQsF&uAMM6kFDOUROmi6F9*aO~M{QP1ix^*X+7n@ePg3nmM?}K{$+wMSN~$$VN(-)W8}+e&cgCj=l#S>L;|;s z>#+>Xk`AJJ^fV|tYF$=>m(ALWdaFJzf^MBSNO*NMaHvKfWvcGkM!O|YY|+nx1VypR zX1P?hAAXQC%U&G00v1<}Cz&-_aiL_$F{*lb1b%;ohyO* z0^FA$_7eJ;=QqB8VmY|r z>t=t3-IcFiFg$yI43N4+9_bs^x%z}2UYpgJiqL?;f^=yu!?a?AL&E_4fKiK z1gchKy&309dXqSPa%|k=%k^TN2i~k+14{_iAR3m*Esj;MZ@A7Quphdxw)fIL{2AC7 ziL>WYoq=%hV1ws>*7+<{ne-dS1^DKsKYZfr$H(WxU#vWQ;=qBQ{M{=XC4KqW_UC`! zee=VwzkF-dpZEYG+0}AugT)kWkSQCQG@nFHTd7<%#{#?XDPJav-A?MhtV)e?snm0} z8g;%br3hC`gP;qs^4AD5w(7!3gq_YwdaJip1)SGe#C4jtc}sSp)mi6LiVKoe`L+bT zQdl!ahEY*fOJ~$=Y1{HmVP#$h_?>^Z7f)Em*aT|B?Q!I6nM|jR{k)a~R9deUp)g@CkQ?~I~}j3 zfn?WK?>9gH_=#1@2Nyc#`aXYZT;o6f@}q?ves*9;75&sAoBaLUI5dz(->ec5Uyi-W zBK+cyP(PZnSDXyVTLlG9UnFN-)wi$e+e6?wbQUY04;-HlJN%ti)Ky!T!F0E z$d^`Khe)(IRwi`kg0*fm@U`A)Zdps!%e4Jvh-3!pt76DyM#&BY37>!M66EuddhOX( ziNHn`?|7Y6&n?HuuF~SGzI^%O9Xn-gx~S8^+w_Akx?tp%_Q!t?JpI}xlH{W<-v89% z5j?O|D)n7Ia`jK{IDG9l=X~*qjmz#6_7_&(eO^YHB|093>hdwe(bd+0p+I(v0dC7| z6h;-#yQCbus|NC&f#82ZS^h?~e6tW-Bh$!cIDVCw7|OI;wW(^G3HRU&d42g77%9^s z6`8E>+A*&WMqy`}jTiNC=x}BUo7@46p+|7g;M#9#g4Prf7% z-u1=LA3bDH>g)~bgU3W{OkP%{_C;-_R-xh@aTh`zo+7o@PF;T+L2pNk1$-m6J6C@Y z567lEc8OH60W}*eY-DGI3_+~ecq%T`2Q`l#>*N!3biHZikCxGdRQY!sb*jCqjP7ru zSY3cp>oIRD$94%2EUX6`&OS@X0|5Y5^L5)P)vu0&4sQR;4Sw^)&zgSu(UDhbWzY6M zJnC1Y{DYTYPW^u$j`rna#o-6Wa@0)O8*F+F*NCNN@1mm7a#pH?Bm=#QH$>>+T7x*) z`>|a!0aq=S!AzH%%vddt@63t8RaB8#=)j8aA%uPPF8zQ|54- z-<+Mv>K5Jea{T2Ph{b?9171FOM#uf+9}ymU!DYEja>*BU@niK?i%?FC@CA=UQTv>6tD<}H(@^BqFgf-eH zSSBo9ZU%pDTLAGl&7q?b&8v-Po#C{h=pf@+%C^4xn!lk^6`&*8-kIZoCRU!K+my$( znWS;xs%?Ft+2D(Z;s?7!1F%!cDhD1Wf9sX(8_z#}{$1Z+KR7tn5ijk5lV2V1Cr3x( zfBl;+JF&+;#xIxGO8t-j{Aj7=Z_V?|Ie$3*w+?^(+EzN^@!&P(5qF*^l}9{g{CYDG ztd|R4mZDUn81cMpC*vkV%H<(1isk6DHm~vkw^)cT;=1sx?JcqmYJQDOpP65p(@xxi zO1Lhni0Dv2%TtFsvdb}5Jv1p?#F$M>Yfw>An7tZYV3VpFJia7Laml%I%oNxLB2V=S z;(32|6bsAOBP@m%s^Ph@vP zD6*bnk&9qBIxMw3@;I1QJsHX zNEB(N3 zD`iDD3kbQ1C`zgX0XZu_;TJ)6d?a&<>2M%$Dp1I!Qu1r&$W@8B(vu+2S%7#58|a1} z3o{%MYRi*CPl;@WasghvfaS}uqD^IsbOhtOwArTQ&PP{D2KhKHl`Su#FOPq4v5n{E zqh90Z;|34;6)2Xv@JGHC4$L&1KV1A5KM#)e7aI@kpjf#5!@a)T{tq7LUmnIC_JzN7 z-BH)9$mZ?rFF*a!V6!*mesQ;y=(?IyqlSW0(wM}di?AYF#?ec!XUc>NEweJRY^sr- z@e>7E(hkwcm!rwrbj+=zBs+iTDhZ3L_SDT5`j2iep&06;yaB{kqr%ovtu`maD=+A^ z#t}~}*>Su0YTy=}D`I2XSTH40c z^69|^GKP&M#@mZ4J@~uedbx;XmM!UAaORJ?eCuQlyHZ?-wBWaXV&Q-C-ADMN1spu* zU)|*d_HV2XhwuLKweYtN*naDypEWr0`hWI*Nn`(Nyalwg0<2#hp51xxt#C?aleU03 z{7SGRZ9+d59exnFnFc<7;hM7GJs)bk^Ko0RQhZSA?M2pMtn!emIhalS+G-HtrfBO8 zCvM1rFI(^q^~_tO&3k`K|4)87yTU5hA_OKx84nJb74f1j7&(%KU08no_M~$-Qdb~k zAO>H*O%1#J?#KV+tgqJetD$`J9bs@3`=6Zki=Rs*2~~FJD!*Lo*c<($ZylJB4gc)j z6kz>oU_W?TdiB4Y%xLb0)`0)^&-x*x?nBP)go|14$$?UosyTn;%J>qva|4%hQvscU zWb|yLi;mld3ZU^zqr1Fw7-Zf#7)5L~vizzuf^DauoaL$oss82hGV6i13e5W_{s|PrIbT*r}=%(u$i%h_PV@$7$M_+sS=aO55Osn;|A)jtFcqmZeBCl1h|f9&P^tA9AZ{QR$0^`qPT z!N_l{A9`rzd567!IsA{F|Bs$!tlzcz*1^YKj5q4E4ihe%cHwQe63@ui(|@0 z&pN)CTv<+n+Zv!;B+rAH9~x}ywaK=q1%0y^=CzsH0;EDuZ za_R=V>VSWCD6(;lGYr!4(#mOqjk1bZ5H4CaX$9EA%Gl4B4Yp(4=i-7yMF)fkE0`w zzPjFz4*abv|JGlBxM2vlAKl_Nrp~|nr2P-BeYJn&@P~81Qn@NfjYTI=W`-Axw&>6h zA|9#sXlsir+h;nEU6<1~{ml2zh+C?A*3h9)&8fUMm)J(U<$6W#+iPXx7`K*d2bK@p zKXi`sZhU2b3bHjEPWDo}8bn8TI#1aZ8yJG4J6c0D?us9H6CZf9mg?s?9v2<;woe$+ z&;EZ5{D(UQzqQBj+W)E5e(JX$ERW7ssUN(7{oPLp{=Ap-vyb(SmlLRT>BnY&-)kU# zdEQ?=2RMutmvpJ|IwP)f$@`0=z?Rk8S>RDd1ueChQBLSPV{tAdl^w+_qHDXRUwTtg z61S9c1^y3v@3!nJudE5aOW$=kI>0tQ?Ye)dln@|}7DvNPhmCEHHny=1xTD{_LU_zM z|Fze@*6xam?yDX*1FS@(q|8(*^OM+Ug>^E-q@8I&UBv5F={rT~n5Br>J;N|~9mS8S z3)+wlYSyTBCM9Z39yXeu^?quYRoiaMU_y%A#a<DHbszp(507zGCI7((jQaEwOaJC;e#HhH_#>Ay`v)&!X4FStTQsvjW4$ln zy4{cKBJKu&qoTxKtOd{}*$`5Ul-xE!nVkEE#HjSYR1d86{bzF4BLi?@m{; zTjiLgS4C`BN{IqpEI^+Jco?j~&`N`Xly2QuqLVNZrm-OSSOIYvXuRZyEV+bBnO(Qzdtu z4-FK;URU2mMpO%q$8Mx@Z-K^WypwE#EADS%R>6+pO`HH*!tnG1nVeMg1e}YKjl;r1 z)im$}gEo2!pA1hisTGoVSkciDS4YjG;|K8o%Yu>FZ*E_sjOP`s1Gj&#|3=P#;u>l6 zJ9qN!kKH%6IocZX%VwbNm!1BJr~j7s?>hdMZ!`18$uQkwD2ObcXuo_MVm9O#=z>1? z{M2m{e?7>9`|_f8NxKT8o~9Ps$R@-quZrz_2uhzNe&AgmraZODjIBw}Op4pBRg(u? zq?d;NUKUwX*n6*;8^(VveBW>Wp$v1luuT-ZPgYnj&nKPgrw3h1e7k6n?Kfe9Th$dU zKfw)wJhjdJbLtksH(xQ2@rjuZ5T2K^6=lR%So0q&h-T%I@)IZg`YE6EpD;VWaa5l# zw&%M}jDPSMqmAdWwJ)78>n+m1gMQs^z4oo4t=2$w#YOf4Mq+>BiY2}rBig5`+C$5R zW{>ms8Q~feKc{ikR7F=KS%#Km?4W=Vqk`3JPHw+TXaL(icUIAb}k9Vm$%2}7XRv#3oCy_NV ztdiVB^9Fc;`C5N+EfZ$xH~$4I;S4f^m-X{}K)!OCPOUO^^}?FkizO?{Tt6j%-%!Qq zJtPNn8T=P(`+IDQj*#QeQy$i13Wr*VWOnAo!cnS-3b2!Qz=cCAi1iA^w zjDzRCIFY>5;foW|{@^t^zVT<>-lu#{=4Ds-l@I9Sot1wn{i~!RxUQFeki)Xfg%8v9 zEOND>!cZRfYR5#)O<0O+Q1J0{!mdI(F-4-GZskT$kp4D{KmrCkV+R>4Suo;}l!G9F zCc(qvd{$pPOY4t#mfo8G#1gbo2fsQu|C9F#cB9?vxi7^1Lq9ExO$xnJ976h%nHZ=$ zm=jsN-+h1jq+W`GGN>U&du0fEYn~U8moCh!TvZp5`c)ds-ji+B_NKB<)mpN1yQ6W& z>(P%51>pkw49ysfaVCZR#F+-`Cm%ibWjOLWeCIhOQyV{PW?MwmEw1kw34vwI?!*Q? zBIxA4t_{-7G~9&2z+SGL8_}nJ$UN?F76f5pA)$ZMCBP$YGW)UjyKn69SG(D-nAY{j zzWu_I{Tb(sASGu{+yyINrix9TgNdtNE*GLx=T*Sc)-a|=517V4GjXZ<@9uRVjVybLq z6h(DwcpkeKU24lbwt+w+U7!^osi7Z~f9H#xAZMx!_X_BUhfPO3EcsX=8mzIee(d{{ zc-GYZSX|!5IfIYyFCSEhV|H>aB40^hK%jreY@;KW`C*ma?W$r1juUMvp_--FmAZ~p z8%XTYg$qiEam-@TJz6!LsC`=O_$O15ELcE~aI82uF$XLHexy&m7hT9e9^=)2Z~zMb zF#SL2ZT>4Jmulb#BFBjQ(p}iC)S^f|(#wsc+H|9!|GpN=vupE1JXP8qISs!ak7+>#} zg3YuQ&-WYSyquap_LBP-f8_EP4uKX!#uc~cFN>;=9#W#N^pS3DcdiAyonY3|<{`78~S%hr4yHm_k2aC7a z;)CioUu`UM}EFBysO^*=-RKG;CElZnAg`c z5}6Rp_hMxrFTRp`eN)R!fnKt48Xk17W@a7xq)=E;+eb%iL&7?Nx~$p-o$r6UmTh#@ z_q^=v4P(CVDSQa5GM>l$!xv^_{L%m0e*J5H<}=##xsn@Lmx-cA_LRu$%HFK z&2>=jqD+}2ZhPJZLw;5jmlSDcF1jdk1JmfL!OFFj=XHAz;t^j>&%zZBfbShtfF+J& z1X*g*bU`MV890 z{fQwxu0%-mLfmi1&pH!Gwn77bi|-k47{}fJ$`i%Y?;3r?ocyx6<2rv``OrD|uN{My zlHSz=XdTsCW9&9tS|=|b?CKh8DI(dRca@=f6JH^nmn-p+{KN+&<`6h`9QmiVpY%D- z>;5m9N`GSKKmO;hSX%dE_d9L}e*ZSoqapt0Duz@W`@M_99~ni$!ZzGY;#+zsVof0j z$(a_F10V5acMLnbY=wU_diF|DvFG%rvW)0hvh#u6kz&icp_5^kpQ-CQQJJfHRw*Qp z8&`&#G23Wc)S9$p0;yNsoLhUU(F367Nvk_GnOcGGj@4~CKl&d!T1g5l=4Z53UPN0U(SGKdNk@`>mZnIa23mJC zXh{YIABVGC31)xZ#MVCX+`_%3sGDJIiTj}w9B!|K6`wyeC?uc^9^8M@3(7`6uTQ^N z#4`P^U3zlSf8!X^(C+`@TlLIGeGvWZ54o(b#05rV@YAWR1ci|`m4}$`ND~dCg%)_- z+A8(L1j9tSne>1B`id1Dw|aups*00~j4?@(M+rsFmi-H-S!i_Zr= z(-J-}qlbT${@}N+zx^*HktU=0rVpt2$3Iu^$&m6=db?tFYM~?{QtOilVrr>j(#xz z$)6ypS-)baQfCH*296YB7}%3r=d+M4B5vWEV~^k;c@sdN|xFR#d5j zyTIrnaE2SPy{7ijn#Emm)I6~=dr6U5yF;0Dm!;=b&!iv!O%N7L-9aO9-1qPOOl0)^ zMto`{~E_f9$nyg-8e#_*gOV8`73!foi&C%h;ls=EjO9t1xGJ%KbE*+v~pB{@Cl?AAH_8m#P0G3qO6VTi&kaziw2JKBJx7JcvgmTN- zWvRHA?bhtlQn$zbmlF5CCmk&SrFe%Z=SOL3txO~vO_uBAs+wu;tyq-jq^zG z6Q3UK033H4t4eiL+-ilN<9;(t=&0BC?H!7Eqx4w>;*=d^>!C+tzbmAWPAj6|ZlZsM z)yDFvxK%o&xF1)}0<7bcxAoeGG3q_ch0R(vJ6eNG!5#5^;>Un5K8yP`Xv{7B_6skz z7k}}hzvBPDZ0hh2Sf<_A*GP99+x5#A{*{Z?ix>&ZY~?d*;=ULBS*^=J0=J6~23VRk z6X$AgsXA-n;L(&3h(0D zG+FGJ**Yd_SL`&n8`U~=pbKzkA-SL#AxG~ZR>bPtwF>Q=mg{pgq>tTD7ntYTiNu`3 zJvb;g5l$Vsh)d-{G6a^b^g+hkvE8l<$SiDjeBbi^V`R;DAK$nicf`wl{pt}*@S`{V zKaB6V=GkWSFLPIn0W6F3mcM_CvXh^QsSc8iKJ?{uS-bm)1u2ySWR?Osb(z%5kaWUU zQ*|9eAcd?QWmyn8C`cORP!Cl4$et3Y$hGU%#smMRds{f#Pw0ne(-lg zg-%S1`F<{g;{m)lQ{dNpMaOxT$5DPTyMNZ^U;pajD*`~HxNDy~SPh(&>T|7Yc9|RPUw+X z@rkYFi?fc7@=u{nif{f-Z!YQ2RVG&zfP=;4A3$~3vU z>r6G|%t~;dI($!_ih^EtrPFR;F&iqrKLEI&2%ygakGaJiDy(07O?KPt2fs7Ke~pFw z;N*Y%V+dvI8|0z_C3)&7-9hKB4zP~pzB6`ZGP!rw`z9y83$cG;cbbbmYk&HrS5+n{ z8T6|wH>IaqIbyAEb}h@i=5dw!izZhRt8B|HzWS@5*A65Z&wF0^9QMWGelZDFAMm|} zeBS-FW^*Zzt9<@~8RMrHBzemZ*3Em-NTLqyPVopXXY8T& zEQ~y+>68Ag0l(r&e7^a{3;yDh{E1zD?Q=KcY#qpV z`%{k|Fo=KcwzCJj?AEf_Y@IIKYSq*w%5$dXY=Sz;*4%iD7#;ifg~dWe%-as}jDEH= zj1P_tH_I9rHLnS!MGcTsN6Dn4(py|l6JfCAJ;x1?01+}`kp2=s_~K_rpCJCpiHvc% zpPYz<(LZ9wzZiyZ9`HY5A^#3nlFq>EwYWd3SFe9QDT=9gb^KJBeUq8KJ=LP9b&c=J z&1K8j3gd+$OYPN?JJk(E6=CONdaT#-;Ju$pqRG;Tex$~d{}YxZ&kt~~oo$(fhHM({ z#O4_}l+3{0F%X`nmKklr=- zU*&%SBff+|g76j&+~c~(u;p?vr1Kw)HMd1SIzH~79rGxP|JVJ#BQ}oHd@&yAuls&} z{BohQh|*)fJlfXKcy1sq?T_NIXlmy20_`+RS=EL(uTDxq)x9r?np}yNNOy83*}B@b zM%SV}P=EWWUiMHG%4a>pJzenAmobY)dpdtH{CfRl^7={8k=P>JFml}6czriOykhoq z!0Z2iQttofC*{Jnf$=N_0OYgpJ%DoLJ5U9F?#e6$Ba@e)02(^1LfXQfDHJf1&kQ68 zcyPc3#7tg1}-et;AJ-7)q_iZK0)&lCSZktm`pM_Av*;% z+w#1DECQIYAwW8Q!cQxUeCyAKLXcSzAW05*$ZQjX%6uQtS`HXwu*raesR1I0pFaSM zfSl7F z*PYPmIjAk(ZG{Qc48l(1n?o_?`+4IE%ucrfD!k*H(-$$=on``L*k`bYbS_LlUh)AH zGMK_v0*C1)5I{u+HN+O^ynl&>3I<2m1fVi=CkX9V-(Ea_f%eEg^L2fLxI=%({zirr zKCfy8aN*C0xT_K1?B`skZ?$}i;Y|GK!FWi8HkQQWFC9?8lpn#g0eKBkhfbh1 z&|pZUZdYPHPGh7EE+C7r?GS$gMTD5)jH0rHlSKtV5F|m!gpDA~-@;MYe1zG1GL>ZM z81~cfho67*$2VYiSqmtPNSIKJ%ol(Y1g{R&_C7K}R9DbM5xP)%3nLS-9;pky;Tu%D zq%(S;p4x$O>3~?JYf|o~$}yqO7D(G-lqJQigQ6>`XXWMrWD}#R?W})5VFY4aG3&lR%{@skdQSEmGaOb1fZ14K-;e$%Q0s;DVLU;@$W`(OaE znHZMt0aQ24@)oQ*3iW?Ogh@%=ml0vy1qzDk0YT)pSu5u(!fNLzbwW*o)Je3mm8kv#}02$Ik~ z$-~e?rYo)L<}RLfgzHZP88ukunY_UyBm zW0(M0z$zVo`<0c9*MA@Z#;>jz+5;WOB5gN-6rb(1xZbuVwz3wXUwRqE>3}<;>SLd_ILWxY&;ed8D1$nX zJo088#T~S;ezXj*8@h3eXvqqsv>@^Ruql6+4??q;>%bw1RUkn4eAL{s)@9UKVbkc_ zVyw#>dceTjSC}S26t~Y7TW!l|#@nakp@(5CuT7JXZ#nPZ;9EWnJu1fX#x#lcE$984 zjpar$^w=BAUnYh%A5ZfDZxf@eMR`Vv^&R8YIQMoC;m#}?K9_c;32$QsZ*IS=(SCoW z*}s=FC}Te>$GIFMjq*9Rvdd8?3q0y_OHdX?e5~w@&WNNA`}yC_6D9zgsT~A%>EK!C za2d)V(mi-la_V554{7bicf%cQjoa%;rH64I^=#ah83H#5__#X7&;igj;yZ#w;?-!pf*T7~}hfe}0DIg+|4+Pe&03ZF7<_+TRpOTK zpY-6+cI<-OTf(Yw-ZaL!SxifJuyZOVSqq8G=h45Bv5e2nzx)_P>@2}fhXOlKc>jVc z$L@>YXXBi90Q@(7pVNWY!!A@zatF0#3pd$)0M-v+upd5Rj5}00e`{}JZcjKLD^R&@ z2k+G(FU;G~uh9QaW{7?vUq63GWhI{{W7TC5CI>nw$v^O;RL@rK+7=*cQChNMoDUVf zfY;A>mRp(2jrbQD++l9JF=TFGYa?$nBAEx6m?oE1*iq5wV2Jp<@*S%KK&WIdDtFx1 z?4bBd8Sh&!;&aw7T2()Qq6e38UV5>NAJ|mJgY_d@^}p8hK`JSRby0t2=y=j~Qobd)279s;F~soW>#gLL4shj~D5=MzWJ}(!)0w)b{2oP@g(^6;aC4Izs5a2fJH_6BPVKyMe9KaSW_&Oj`{ojs%a8~ z`y+p;<9|(u$?4zd3fGS|E1Dbk!wH+AimpImGiCH45wUZ1yho$|`1g9Rj(s}LFLGBp zsLd%xDiu$>Za(ki(cyRdG}9#D6z!|^0V!jDW!xuO8GQXP&Vzq3_X(wM`Kv>f^=f54 zPa=KP7Y}J)zm@yhBvcSDkMHII8UIDO$=?kw^IJLP_m*kGg4A%-s`tS9Z{(C(P zpD}zkw9i(_+LnLE5s1axpO~h_GgvaGi;CgH<2_%xmeTm#d!Vwz5LQ={JQ$mkJt5=e!RYX>BsBq$Oj<8 z`PgILE&?v^Q?=rJ@cEh3-N$yZAG&Y*(U-3Kwm%r@p5K4=D29L_h`u|J3Sx!Eq-;3`>7iJ>L`C^h44tv{bBz%y~usOKlDF3&B$;1W5MeeAAhkO z^YI zWd89TO~zwYG7ajY2c|mF0no2GFEt4u(HK-lz<7U**B==&go}F*aKvpNtH7Fj%Yd>+ z!{O!#pyl^Edo=Lt{>6BVACcB$sNi;$U>ceH%wJTwCTSqf-N?)+&^-P{8@aDu?^GIu z#S-%{BXglp?cnQ$Gin0n9~_!9Q4;c;<)wZPdVlx z&G(;jnICG||G6i?zeo1xUI9Y+A>_aA)&Ep(!=*uoKlM8xGkM03h*#JAWaI^C6!r{V zK^+gJ&Nv=2?%>8bu`=lRlMaI?$IDnejw^rjC=Vi_29S2>25Jh23^GtGh{yW?4jC%> ze;$kmQho0^UN`%nc`pGQ{`y`XZ$~Dlj~RM#I>z%mDuD!0&5s&C@$=YK0wKRwGT3!U zK_o`CEv-Hu}pBegqT5ZAj{*E7mgnju+e)tMH-tQIv{ROJU&!=el@7~Y7nSJZu0-A%Ca>((Ek54@Eo$)O# zIcOP}y!{%5q=N~^SNy*(AAiSJ%lm&Z=lvM}GyUtgT*wl~-L(8W14l0B8iPIOOQApg zGmhVk|4+t#i@)QjG0vC6>pd`3kNwo49F(zNW1c^8Gc@q>eTPKQcs=mq@4*=g#{S~< zWB7}_e)a9pq01;2yguHKynSi~DVG-=dicvp! zyR?+kkMc9pefdJ5ke}zqbIFXayzqVme*Xr%=60(5k*AK+rF`7Ke72mwL&y6+1+zn2 zK5jne^Q&(i*MF1a;k41d4I_X5WBpvtfAkgpA}4qaKG%2B{X@D-kgQ+%)W84Zd9-m3 zqjEhjjahED^k!^TaeOVjzk8<2-4LIf~frjgs zk->wfpG-a;c>SX{l%G>4f*gRI!JO-n!(a(=3tQlMGQNclA#Mj;#@m0D^5nYC_iK6` zvVe&EeA+Efwgn7%TrNXEA(w}++#>llJ+^&r~kO}D(deq-Kog( z=YL3|_}|(``XB%J;Tirln%S>U&m6^#Iv&yR&HjmeC}G4ubpZH%`ma)k=)&<**%~Ju zN9GxeUiOX|nL)witOGL(C?E_<;ewVGjg@)nK_Dn}=iAt^WGjEa#zuc$pOt6f(Y%-7j*4)_zH63m@xRT`t?`Mr?Y^9?&+%Gpv@8#DiP=ovaP)+8i-8_9SAU>Z% z=>@`j8_Z_&m9>}GY2#LH=ZJE=THXtgmtJN8KFR|z>x&$ird7K@d9O_X`Ia)mX^Eq|>S z;EKt)H}0!&zS^BP?#;~beQk$lx_tpI7oLDQR%C4@1&n zHQNc#R!di5_&6<7`Ebw8Nw>bfFGJ#i`H9`8wN^9jr3LC_61|2Ooo|NoPzvj;yetp1 z(-D8x(`|j+!)F3j+bW(@TNup!B3M)O+_!9w*s-m4Zl?RJLc`};%mP`Th3vh#(!?YiFvUbOXHg-S-LblQ6rNw;`r_H?mTz$NdyPVc3( z8Vsv7F6_MEqwLE!F_v}U%*8?M$%RD=;ywh>8(4<4%WxqxmUVZk#QrsIae0k3=-u!X7 z>DN7VhTXB4dAE$1L@xX-of$*rf?;4n{f~qD=B^~!eyQ%GX+|14JyrK+hc|W^i+O*Z zKaFBb+es2VdM~IB@%k8=C*eBt)+C&Ru5BO2t9jykCti5XVS+z z$+{i7v2v8!e6Fb8W_f!blhmAUhr)lT=Z}}>9>}xbB-hJDK9EbXw*CW6QgZ$ogdZ$pc!)8Qp{wTlY%X}z6TljlLGJi@b@Y(i<>zzLg~ zMX%^o+491PTXs@EpGFGbd2YtZ1(G|;hv%d=VAW@aEUO}wanmKCCN7*c7NW)A5}9rq zGhl^fkic5Fd3nFZ4Z1}EnmvE+luV}gK)Dr<1&eFa1fvB?zm6O%f;$8 zlik%1obI#UMtzHJt+_Xi{G^F>7#E?~!1Ne61!DO+KzHak63%eEtcAdcH?pN6Uo*pp zq}e*wgK0b*tV%gv($}HFk>#7JBVNr)6{F9k+6U6yF*iGN9S0~XR*Qc`o*gfO{hHq5 z^3fGRM9;-?ekpIxU0HwHtX^fSO)dw<^eRsa47FBwzF!y{SGj*qYiXC(?K{tB?5sT1 zr$UPI(+?$qz0o_CpW~rod(lx`0otCmfzGaCUG=Z6uvx`)a;VDb?siDeNpq8%D3|P% z^%*L4lAkAHb>3}e`(=OLs?)h}O$*0dubs8wUlQ-OWtY2}RJx9ar1 z(;yH1!|SLA&!vA_;I*`bVV_iwlQ%oxZX$g(LvC5q=;$(tfg!HqBrC7jM&9Jw#wFNCZUJ|B!7P%iO-61niQ9XcQw{= zl&w7x4ApF`aVI&8t^v3JtErjEFcE8_0LR4vCdOfWW zU6i}m+HmM){dRxV%PK;X2{rFtsbDKUM>u7z?Qb{hsS_u857N`mxOnKWUm7j{8SiA~Oi3g;qk3B;t6dPLGVyI5Ats_vl?<9Hj8b~ z6l7Z~tzQ}_WSG1Zkl?#&ymsdW1+>7C~uvn`*^aL)UU_%y7TSJX_wdcLol7s zs@48-cBv8D7s^hdEjgQHDcFlel&sq_6&-!@5gx_nvK-deIj9e7vDqc-iR|{bV0qKa zxjlcsy7j{p=krLbq{%edZzT6!3-9OIdEL$TDlUipN?R{B+o4~*747zJb{j`uhqKN4 z9khLKr!Vs%44)7mqih=C?WYF3kWb#j+gPTCuDJ0MFXx|-m}qD3^C&il+3j{cBa58Y zpVi5T&m?(HH#IUZKB3|C*dH!ceO{jS*QtMf^y6nGR~Y3E^{=x<9g88(%LkheEgI6z z;j`XLie0JJ2P7}iq~hrd{>tp9r7+aqx~~>_wRlOgsk&-v^HX)BSu+W=#o0OSAFlx3 z&)M`md%^ou9-Y_Ryyl&DI=tUb7)*tY|9Z~f^ILRGG6_HJZm8Lj9EA1m^`3u1`M7_i zsJjW>H5c@qC(7@YD0V67=6-h8uvLW0Orp)1Ua}5J*UMd(Hu+L%7fpiXeh}c8C$04A zyv|86+AE$RUk<{KO3kq@O8>EoUNtLqQY7ccGpc9LNrTZbpWmY8^Y!sPKbGlTzkJFr zYvPzNP4e@ip03~kuj#5h38Bgk&q;rCE$@r@>W2J9257;gKI?CbVS4_o1udgFLyFLy zii^4I*7@u;>6)2YzF8nXpXqaYwEgwxHBE)Bba~JPK9|Wdxj4#hyG5Vt=CnBE!i$Po zBJJMNsrBR9-tOW{xjZjkwy=5E`MSB6uh~AoIQnwZ9ww9dJbJ9RO_I%yr9XeKAI9me zRP(f{r#*-ZZTm{&>HM%1-%GKZ!oo2XXS=x7F%mQo`pIfx1xm7kZOkj_-SLMAK`(qr)X% zOtdu|#d)|q8fLS@G|!^sM8lOj-I#TMyDpNru%>WDskQQ#_Y)45rgzxU`BA$!ei7QI z^I-b9?`}^UwzzrysVvra&uSkXQ}5CI`3Wgmbh$~7?L9dJA9XK2dtIM9egE7E>qwkG zgPB8&^`X|ox%WzU_e-p8SC3=h9=cO)l~{?i{q&9`T~l8w19jJ*(IUE~uhx=(7K_;> zc|*8(zDpgQ40KVX)DZH8O{G~^PXuzopW>0|EYFfBzRWT=?{@c*>FeWfhT2^{Y}-tJ z4jUI288|(e-_MS+t&hvitBQ7QAFa0K>)x+=wOYTiCiwbF@3rmg9iOAq0$sF~>l(7^>)3G)}M_glIJ!|9CyBI7WRD-s6|`u zHmg;+PFH%fu=j&=n5?u!lmZiJ&x3Xsw_Tt*Yr7Y=(Ui_U`|?;#XI^0iQ+0VhhNoLy zZN%~w%%N@b=c2nkDA5dy z`*?$zdt}^~bi2O@{#C!MGJ9U8Xty+n)0J$z;8Z4yv$~0y<;NjeJfoXeOQ-8SST5)` zXP>o-_c=@TtK^gB+Gxit`~<~p-d;m{e?Q(|cWWkl!c;$?^76s@r|Ngp;&hlC?)%z2 zeQ=u&m%T~%iwClQlh=CT6=mF8#N5A+=t+Ha&; ztPYps+u5yena}KNnMT?+5pJ`~`cfzMZj#_`B`=L=`%GPb9g4YinJspT=+Jj`+WVWz za}sUke{Ah!LaTN&MUxY?J)JL#tICE(C;joc&8VQ;Yr(ea;as|IvM)k^62CpgtRo#O zXX_x-W+pa9XOe=gq#<1u*Y(7If2NbelP(7LvybTMb38zb z+H=j8^PPErns)ZRJJ9W&q``Cjc1R-~$>oWb@8DcM&0+K2&jo7}KNH){>X!*UdET{deQ00<9ThGa2kG2OR(x9vEJ^7IJHfGmFMa4++KrYCC<;A73Le4#QaZ6abO$53krpTNbvU;Ae2UzO70(xw3z5E^x?p3D7Ce_Pg* z%wBi@Z8iJpQe5DdRu0QMH0a~65>?F8XQAQ6BHV5li~EjEkEzCXMA5V^I@r1~pYM}_ zL~LtnV(f>PTQI*X*U9@cyzV`y(YBa- zrwm$W=1I@H_)=#~*A2BohM(=NEljgYdY~gq63&G!peC+CNTik!cmTed~0~(xGas zHa#WgHgF zKy_r{TyBuDR!>~Ag7U>SnB29h2ZsriD*>iA3qMc?>;bY~+&T6x66KxYE3CR5#6|0ATCqFEN zucqs1I>=tVkkpr89}f2RHZIq?!(_+QGUCK!D%Kb^?tE9 z1$MZVx`G}q@kW@q!NaK+&Nj(^7x-?(kvbRVg|@qkTl0W>!#PV6v zDfCm3z*Q_SDoMP@Ay}8Y^M0}|=V_UTo5I>vVxhe|W2c%k_de`S?}hq*u{L#B84G8a zZCY=>Wo^AvOY61VK01AWiR**?dIts7`fc!3jioy`R&`@3<)Vrv&U>>zE%I`12GR0( z9B3En&)MGHE}HlBKR5l_N%ed-?qw*+U`cqu9I=-&oyJjjSP@eJY zVLzR#zncmA%i7uAY@Sh}=84g&w4PeY)aOz9nD7Y$oH&UgT@pW^Sz7Zy*gB6@>w#{&zKeTC2IbUL z&T%XElylB{dUHN({}--%0*R%n5)@)lV@&;4rbIvy`J6%7kX`DLup zpk@=WL4%PK(pj{@v175jR9L?ur4&YR6_e-0v1`(im@2}#kl)0c%1DG}&jHGe{020lNr_6Pcqa<6R~QF+h>lHm=5 z;}Y;_cqFrb9Nx1ysR4?)B6Cd!h6M2JxYC|w9MiW`FNM?%n2V%*6C6PKxHbg0nj@|v z;W_^*@iLHX0DW9`R(TkNbL8GHwDcg0JFVeb9;|E(lmg0q0C=?=lo(MFSkt>2s&EwL z_=_A^0{o^3^;afOf#AA;5}e5%UKk8P$D>cO+x*^t%M-_~lLC=(k4~5nMVW|(o(S*} zQvSG^!FyxF-jw+5twAC+LRaJdQlCTd_E;RUter+F?uU@3+Pb6OnnCs!Y5fxfCY+WB z+)DUNJLa!acpx>imRP!F9Av`qGZI(t9jibSDJ+z1iWU^`)0X40yI)}0fW>o3VUIeA zXwZy*_$047%r!b@92MUv;H8Mxp%yGea0|?n+x9o*WIch@m7i>%JAwUV`KCeOg9DE)Olo9eR$}4gx zf!}R(Ouy#P>}@e+flheX{R3#DuDe4DM zT@aUSybKhrSo5^=QVnBJP#Gm~?3C*3qHSUxv}qUyN16@_Zgv*t+r|s!>2uQPOM~Rp zwz}w6WZ*^zm2EGOn$d<007_P^O(h3^Zy#dt)A94xp!YYii#-;Y+Qz8tB0&+_Y`&r+~nj)=Q)4R5X zu(9<3>}H}%JrXF+r?EJupW;#?J^Mz~1{)huKv!2y3|slCW!&@yvLpq%M!vRxRnSDv zj4zVddKO_g!WQTnx<4D-=n%IC)aNvF%2B1SC>z*;&}gLQaKf>%yKa+*(>?ug>MIf_ zS$yZFceOz}u*UsB!(`KPAFRnl^KPu&(H6oN6bg$ig5;a6UdfJ_BsK-_agrPn$0|^j z(|XB5R2KPeqGhn9;G>Z0pr>Vj^ByfxP{#%x@C8HeN~Gt8({lb(qvi0~*f^MzI#SbQNPb%yh8O>HHT=l1RmTZCvSB=E3uR1Oz&?M_ z0FJtW8rUk(q{4^&MKMcVUCd7+`^5249X`7zCxPE z+yf&2*>?&%rg9(;I&Fx}M9PJrg?D$Erwa?UZgvS&?+rhMMQl=j2F%{}%BM0-7BcQyg-_xTs z9HPHvz+u{h-gTX_k{`i;X`hbs!7>PoBARX)V|g92(h=Av9z^U-*fm#olUT&Z6?p^8 ztUP{K6df%UR?51W2q~=gMrTQu%dJ(d-AqZ>izQ^KBsh$_lnj>EyBi8XwXxM%u@$^~ zzJ83K-s9q;#9%Bv7}UB>=UtWcWsOkfBaOJwO{s4V1Wq46V-9A29FD3=l;sUCEwXe+ z6IgU+24RCAhOg$~zHEwmyJW>C~I)wb^nxrQfphn!k5x0Bo_k z3TTqC-r&H&F;$F$h3E?UDf<;Ug2+)G2=aumqvxt1SWV!65Wrra9R1s}u!B~O^XJ@!g;iU_T$tAto`u{#Ll^U#^=DgibLstmKk?109e^oEv_`-i+joA1f^E- zwS`$-;h&s3K&RZ4J?an5N)-s_9gJLJ=j~!7`p*t3a3wecK((!{@YWdS$mm$ywwxs} z)!PI_PT-#KfDGcVB$Ou|VQiUq7!)p%0V$&6=XP~JkxpVfT6O~7L7}_Dy<@Rxm2gdR z=^wFwr+$~z^LTb@fg@^y0(diP)(V4YQD%_o96Ncovualr>}p-sq?4}^^o9)(^>a|4 zV^gjWMmx?Z$eB0@z*ZM>*s))k)pLC~e?g9w;Aez<@|Z8v(?Aw!B)&7slU!XcO7WR; zUDdrqGgai6DCQ;6&I>-V)nlrS(d=o19Et#I28q*3>+!DvhV-c5b~?$Zmsw zlLI-?UH#PGO_;68kq)5(>G)I!0v{1U6QEUCw`05CV)Cv2=m(bXK}`R3B7^t>A1X?V zC?1|+Ku#KO$7u`MZs>UA&i}p!D=-9f_F($mZ=go5EQ9oKN4YP0rWR>OlICzd$Cd*M zQDuEd@d^iMnZ3;dj&aX1D}IJol%nTW{@04rX7XwNL{G+jQDjyaWlY?9$wQ;(A+ zQfa_0GUj3zQ_0cLk6X=!*(%kJ>O)sD;H!r2H9~z*1-LeHDIMU<{fYxA35$z=n-Qxi zm^=(|W>F_h&)Jv6nD&&Z+*{jhjFqIN&!U1y^~7o{r(1pC*R-+_l1JhrOR1}$Qs3od zaWMu`hghJ#y5!CE3c1a=cuz^hWI}pJ#j{;Rir-qJKSmba_~3seu1;KNMDV0~eU_A| z0<3S&O|K?=hn5&Wvp+rrtld99pSca@uOyc za)6$n+29fU4DFV2ds!!PVQN33yaG`Wo=VWm0RT# zopAZZJk^TOKehO}51ZCp&FTdbXae_kutFrH84)=3sIZ(Ux^wa>PZK16i1wgpGzVb2 z@bq?nh+6EZ5I|~4H0hK*T1nwvN%Vjoi^r|U*F8W^o)~|O}=HM<{4m|eyr?s zG)ZS9NiU|Gjt2Y%pnUz8jk{RqW!vwQZO+;+7LW{k((ChnBWpjTk&^rlFe$?H(?e2* z$cu5Vg6o;$+(M&yMKec{=JM!A2q2Z%>2)r=UKfHXx(T0n7-aa9z$YLc<;|?9BA;~0CMB1lgIdBeS%@Iue zL9+2syfYc7k|wql9)W6>q#on4nw5G9A%n_1(`=r=+8$Y4BfeW^cUJLQ zYI6fMy@{fK{AMP5?Jfjt50P{ZtGcGz4UOCDmi)bADqU4kX8eTcpuRivDfih{gCs*h zEf~_5;YKNtpv)OoY6p^BR|(9i?r6ma0Ad4(s*uW^BZ4oZDEe~UHz2SqJPkp|Bw{NK z?7sWi*L(lU2MQEIoTz6wsN~1Hv>7>4&EPp`*-zGgZKkhf4c$nS>Qb@|j{Ac1&gu2a z8If-)7Av-j_T`2+Mlw&U&tK1CfZg#`5pnDFZdB#JyADu!T21(`M8pOs>kxy_A>d?5 z-fkivYx0q|g}e2|$v+rA)(c*S0=dm3BMI%>upeh$Up=A&neobYn)XNZgG0$*{v}W! zQeAw1%dxY!(xc7+XLa>gR&og%9rg&7t&_)3@GX*uR{-X7;iJiI^PQo=!iz?JjZOY| zmsGd*CCZqgu$(q66q4AFsAYl=I&gY0i;7wwfsa{Co6K!zPyFEmS6J+1SaCwEgNA_R zGfgybfm5@45eD$wtg;`_CGjvB%+tKr_-fC8b!r1fBv3S2yU*m7f@U$jV2?5=m$yw2 z&>RD#;W#c_$Fpfji@Mh0Ta`WY5i+=C=oKe)FXfe`a5av^shO0O>2HVdg%Zoyj|*iOb}f+=|d@g_2Ct@5mdgf(e@ z;jRXv&PQcnM0p0ShuYa=Boj58c_GxeEndY%Xz{ls*tko1nqJS*+90InO7(i(4V+ri z6KE3J2WEbb7}>Nd>9nHadwRbVGZ8LO;1t`TUtW(#(I$_BG1W@O0iHic3D8XhsZKa` zlS6I#?}$aaDV44O3Lk_+=?S?h)cquX%lss%A$jJc8*^iC*nhi%o%|hB6~xw6OQ&cy z0PHTi_nHjTwqtaD%Me=->{YXYlU=i{3Z73yMg+1Vj+JvwqFdO3z($6upVn0T%lK(r z&p_O;S6}a$(vA<(E(5rpTU=G@?_?BRs;j%0s}rLApVPzX+dv+DOOut!s`S{7vm=L4Qm zH07uP*^0?P!1ZHYnul_lvP>mx`Lq!-`)Mls}k^$PRKPO7xARBCZn5Z zMA!3_LZkKb`pz6+nv?y`bF51nE?&CFM2_UdFYsBD@7VijkhNlecoT#!u0j!h7R|DphFVtQlBxj=FAZn?&nQOjfnorOFWgYd2NgpAM6 z<-KcWA1IlDC8bt>R8eE=nq?u;nNUw}%L%I;_7t#yqT4=cUg`tP z^-Nlckr>R)>n5hx>Gc}Oa5oM5G5K$HuwyXGWVCt@Y9v$&I6ihe5|Ft z7IfQ9=asXMvw(3;&kU5Ds%fX}VD4@UOkrdz>sW_BD`%kPmzH)`zw7qr1{L*~{# zMDV5=c|n97Tx~fho{x6N$!XfZ@=HJ_+K{cT`De3uO(vcln`>Ub0jzNnD*kZ-T$LZw znFf)6tfJ1T%p0JQj6XBJ5+2ydUtt1@OzsY&&CJdJg`Qwx3~F-t%LpYIyjr>#Pl+g$ z{1g&hJh7$ElD3(wcepNB$@8T*b~)Z130xM zgC`3~-cK;~&oldxtgDzzEh@@%u2C|_H8hg-c4YsJ(dJupGvjT{izLQa<7AD4E7I~HU)A|?pi zUE%l^<3K!jmv*hRLZ>JzcS8m97<%}{mfm2j;Pi{+^`e^i`B^y7AwA>_^ogW@PQ3U?6Ctz7rM-I#Lx*P~;mWa;LsIL-$gy4oYc1Wl&Js9rj4$&pqH zyeHv~*i5O!bO3#%E=tDHGQnGa4KTxGUSdhYwEL*mqMoKm-^nJb$K+#l^Kzig4opiH zg`U@HOVH&H6%Fa%vx`dFl~DN&VAh?pPl7$JC5&`u|3Pj4VX*(>fBvuk4#)miHrD;W zY%FiZ|BsE8o&UdX_&-#v?Z4FD&bj<^f{XujH?>>f#u%DM9R0Dj!$07EI{G8A<#)bB z0E^KC`V(*%&Obc+pZ3~jkdevQF_((tuiih4@U}AygBfv4k|k371ta{8&-oA2{jao> zAP=7WUHT5gaKT@_&r<3`bb>hv|K-t7{lB^W(-kA}n{aQ~%tsQh!B-?%$+;bWrAh@SY*&Uo_G9&rxLj&4T9w3s;;GZpuH+F_-TDM_>Fm zNS1eqIUB_zaTvz<=kS*P>IuE1ptZH!8EXFJM!d-1^LqYwYu#&PA!PPV{??CY|1s&^ z-%zw-{_SgCnyXpXA%hKFSOhpHw2Z@aVlK2C=3v}fw~MBg$D5UZz)7Fe(kJOdHZV%V z1ByN<9hq4{S743?$OMQK7Zk=v(SsuWCK*F$&d%zWZ^4t8(oXHc>mJ zIAwgUQ_DjH0y;C*YP8ud@T>X|==7H9UBNQ9P8|fu*Is{}eePD}SJgB(KL*k(ONA87 z_cuz;s(XYX=38BVmttrA22}z$jqD9xZ@*uQlX2C^K3cn_s}ly*CKqCv`^wz>`TjOH zudk{vGYz9&Q|=p0{yk}Fzgyidy{OCGa|ZRCc4<^y(YGJsV%EVCL2OOilVuYU*i{(k z_o39YlK|>=w}okiU{26CQWJ`-C@sd|#xk3B<+++`M4pg;S~s5X+s59NLN(BX?M>VL za@l}W3I0Ni0N@s?k2$dJ=q*uG&gKS*#XXmao|Rs{R1#q{BY#1%Bw3++l~_X`;WPf` z;Rq6gqKA*=B_CK-fx>5sl5$QEJJ*?XRZSnJASzXJ@a&s|ok4Pe!WyW%YW4G2kaTyh zWukU$|FofhLRjbGmsL4{RbOQ=#*Kpx087ILmFx?QQLCj}66`4o3gxU8!y=ztcB<)A z8Uo&uiFPmrVO!>0E;)b>I+;cC{CcRMW!Yxvq~@y{wB6N?kt5aLMn%8)W$0_KvD0n? z;NvA9B=Ryrx2cDIU znLELFoPjW#rw2Wve4p_B z%1Mxaz19XdyF2~>5M>z_Pz(H?3#;ZO}|3H)@i$av>A*dS_vBMCcq;xi`{MirUghOrw?{~m@fV171<39OXTeOjX7 ztoJUhyt>Vbi6|*AQ+U#|E{uKNah7<{qxbnCppwPBJxz2|MdY_n1yP+pLIAky(T|aT zM?vI0^pT=SLoeYB-L6S(P@cc)5a5L$#PhyU%PC*7AQ}_ytLvfl%$T$d{o-)Hq`vCs ztr+ijO#GQ!e%G+r+6!$gewa~7mqR5jnS7xd_YlV1m0iXiSv9w2H&C22lu*_1BV3VE zNM`N|#`>F47wZSFB!b~2n2F>`a|QT+4kbL1e099Fz1NqjrbpvC8)+|d<0}Sj7S!

((rM?vjx6-|acui#FGEPVt@$&ORy*HF$l z4h8TDF_Pc%lt%=u4dJi8xz`Y>1aLImkfTRNIjb7kS8ZlVK)Nq+#Mkt73kF$#F%~>) z$1H$HF_$J3ClG>Kudl=rxqa0lqm8sW*J^7IMYWFfs%6K=F;=48pX3EVfQ2LwV8RiX zuY05ydH8(Df8o~!wd_fu=q5_I1HX@K)g(GPgRL5zVnA_wtv*nCwffnxmC3t+2L9vO4Mik?Jf%)lyF?==Z7S-Cj{TW35+m;+P6DWqp;buNVao~gw zY-m7Ocg~szO;Cjg;eX7;=Ps#krwG27%CjhO%k5$d+g~ zB*obRH(8LRJ9+y(+}p3>ko+8N<}sM8Kq|Vk6VxWgqyaOlxoa196!dL)sTeEz#7!TB z&9-gKyk_xX(q{p9eG$Wi#braQfk(i|gn685%Cbd~*Ju^j#k34ZE=82O{e*UPbGSIP zqj5y*seJOrQ9pkz{bELc6{Yzt;wJC2@bYF$wKXa@#y>KVG{ia+6C|1#Nq(B)U}J?G zWk$irY0iDS|2~R=!4G)d|g*xYKN{bfNLX;k^a6Qv!!Gpfu{|hy88OPkvbf zvGeJ(i>nbm-G`zm4;|5!f)z${z_f=l*o~6JoicuApR@{RmKPs?PTa6~ogk%FC^^WT z3C^ZMZP5VY58(2cbU6b(Vd;Xr@IlE1O0Na;73qXV1#8O1yHvF8t4FaYDZp%$L6)Ud z02U!&W<%TLxsB&+NvK7KxP)w?|N5E7k4cncTt;^*2sT-e#ER2qk?6tVrdY&!`M!KQ z6UA(YF^X9 *zgR@Bd#x(_2H1cqHhA5mnjJ2*74Wx44jW;V~baUgsYQApNB=MP07c>uZ z?STQj{~6(bp7Lf3Zo)dPZ$5cL)|gzHC5YN6O<;s08OD^(x=<;ta)M!VPd63j4p2}! zOwvvE(NqL>9d6%kI{dB4;Nr?Fm`NX9s|%N@JvYE1w3Bem9p{q2nSygRNZTs`Di_-? zPHb6gJ*%HUnbL{VzR;aPpbvC%?cUI}q5I$gOGRCOgP6wP9mb`%RICt06yi6fJU5uf zKMTF8vRz{a@y}k(EgyuoHFPcvR{!yDB{y8^iu<=GN<;P z_*E3BE z=*HK7$S|Q#Bi1lx1S{Hj&E(!i?}9Xnf?8qu!lqRHkPd<&j+6=9n`Wfz4w@B=!Cl{807H8rdxq(gaO96C$ zGvd}>7_f6DKgBzjFNFx#Z7c5*%DR!ZUkb0xz`0Mcst|ZS_JIrCSTvWmbAu6g_9S4PXCWdbg_4|xlRW3I)goq?3STvzWq zY8|=)2%pPJ7^!utP-~E&gV$l99hwt=Baw&G^Hj(-L(=gZ($7yAbtK|nAm#eyB;)S9 zpQI;inDckxiP(*+HQNk+H5#XqtqGwZeIhh;Nm$^zAv*`ns2f3i4v^!?M+;SNcu*0Y z3offF2~WS)WMfc46q2cU&xU6-r%c5O`HS^B1yS^SAt7`PmoD1prbILfJdNsqvKkZ( z-uEn(Q0cd*@d`z&D!pjWxEfxpZx*JOrfP;fXC@K>G&1ZsOb@Zl$H;{GP7VS9M?&+6 z!%!mfCyY?HU!$)%aeXS#M3{@!&HS)ciUoiU}j6>9FFT{pL`K3F5V(2f2HwU(qpC@*if%dB;U5r3YyRW!#-wE4Iz)d3_ zfc&PX4WNNsTA#tDynj*#Jv-#l^ZU}~XjVCN8&EhvAn8rLr-heezID(5FWaSS)Q=>D zu+5QwI=dYXc%X`2b{XbLaT3VNvRW^GP>L$~FUb;r^7au3VJg-H z>AI`)+By%=T5;Jv-zLbO$=#{_f(YQo;QAMl;tB`71++$#P;2zF^$f-<%)Q4K@{}=Om?=yHSh*)kW_dNj!y;TN^2f)em-7%*x^W^ z4R_BsCHxG>%!lN0GP=xvST|!%wymGTt&d^&qK5`!@UabY)9J|Hk>!%hW)|$J{T5=1 zvYz1suLn&7b-^zL(}>KcWG>vK_2HeQg2jvlF%_x3m&G4Hx$VQNJwflpK(sZ91X7Q= zOc+n{>K@jhP7#TOnA<4XGNDb`T7F5Xn^4BTLRs$iUE4905?&_!NuEBgSd6uHLpTF4{ z0zce1?B3N9&m`u=xY%@K!>%(5?O@!~oeXc(GFuUt+)6%!xAku;F2uK!Gh;t!qP#0h z%`0BtNTrdkrk#0zKY|j_tKa(f@u5{^QhHg%L!|N$?}cz)4E4HWhs;lUvaoCa&QYDh zLk4HlC>v5kQe2)t7yOXl6iSma)JL;cGcR?#QwfWjR2E*Vf${-}#=*!oE4X9J4rB!7 zXSDJ3-1IlFzSQdBD;OgUoJunHPM^Ae$^R(Q6*ao97)`0xD6*cQJFCaTcX10OO zEZtp#-MsOCe2+E-oGBvXOH(E-iZs~jsgfx@Y3h64FEMywS*a*$`?F5&KJPJBBRzn@4qj4!!&kKS{au4MmbxwHl2LVd^ z@RElX66N0`j>7Z4A$F79I+U0$M$lcJ=e$rfG4g4DH(v|H@>m`krvaqaL~WRZN5Q-8 z;tmP7Kq_P$e-XpVW_aS!I3=c7UR>B2t=j2+AxMjLP*QEg+~w}KH@%J4|M*~294T^% z=q52>Y+*bp=^HLnE8Is14N_=%(g5W&d(MJ5_XJvOLBY!s=a?p&_%RV)44XaytE~n$ zBU2E6KAdr@lOz(BKVCSimS1f>rj)B^jzRWP717iTripWTYVYvz1G(&JMloW8o`cBa z-=(X|QnnDLCs~hSGx8#-TJ^Rc_|yki_zm3b=qXGH5$0LP#WR8_l@mI@G1~6&voxCs zX1d2$(oqys4pUSF0HHV06@q;CH*@K2p#S23F-gRyvfYVK2dJjZx>i2pNo=*{pKen3 zk3E01K1abY!9xmX0 zr_KOXSfBjcC1)#;umeG1P|=0WYeMyZ(F2&X>|iohTAV==h_h+m;$sX_S5Ugwo}06O zJ#6Y*;fsYP|d5&3txu zlfTdt?h?at$(u$mkHjUfDaJ2J6L(*Ke!VlLpUPOwUF=+3%vysutE3QsM5@SBJd)po z@q6y_I$c&^1f~!5RwL}Ze(6o}3ggfr-1-~aDs54c>tHoJkG{XB0x3^;B}AX}X(+sL z-cf1C8Fe+$!4G1intv04AN4>-a+AOG(V7$D4u;W2k5z33xyYud8RK&Mu|`mT%)+}} zTD6+ybOd{{D*!T&ku0~j93G$gWHykcBAYV=eW7=3BFZP?19Gwt_u~f zQ+M|mC$FIJt+-NMHG7etmriKbErTL`ZpD;;3F*2HSX4%1i0su~`OF18IRMz2eJy`+ zBY&x6+9-uW>m|B`$Y@&;2h!Ak5$ngIsgFp$jRgK>%q!OxI!D^HQX>W;qE{Db8lhD4 zU-QfPiNC^d0CNx1!d)~zfBnKgKgx*g9)vi7a(k~2`RyrUv2#Ml`<|cya9;QzqKYoo zyZ5uBDv#&5dD-Y#4rji`UxpXPSFOE(_9c^O7-y^#NPV;tM`H=CRbzjDD`iPguSylP zxO{9rOm4bSKq52>(ZEtaw7#!6Lb#v?6)_5B+aXsA#-hdyKKEs&%avWwSSE)JyI>>P>rm)oeg8OON&PTY+TCjua3-Acw*iw@j>=e+T$ibQ%OM-oFd zsW9GZpLQEkkLeOD53r;|D%#`8M@FUY8`4uIkAHbW0@uDQ@9rlNqzrQ=wgrs`pv)23 z#O6joABHZF7Wm^$kL@tD2M`s*|E4UBSPafznVmk|=EU>kKqwPRx;B2LIkf|2A7La% z{`3G_>2Xf-%M-4DL3H@UCr^mpy(5N@35Dy7P?W9mif-H$t#4S;_D#d2<|jmPEH#a4 zyz3Ze85}N@qCtGJAd0S~9)Fpjc-_UT=<@;&(8U_k+E*uKY_~nUz{t_AbQj=zlgB$5_ZVHZ< z%(>!pAn@u)hS{V~L8@pa#50R8(W_mGr`R6s6xlWc41X*=GFRfE``4~+u@(v#dInc} zeMj84oGnKXMRimXtd_vApMoGyVPuYywi0RAtf0>@IPEeU6SE`^k|?2M&k;YkKR=j% zT>>A`4*ez0bke)3b>HWmHp* zWiS+JM4>WCxRW34fyf&EegUs^Ed~gq+oez!4zix!4R1OmA%i8rd$tl>3oMgeQz`gId_u0aiCg#cc8E3? z@NQQVUN8+hrN^i6vqyt{zT8eH35$*GW2AzAEw;X*|5e#Au3QJ7`8kk=hhKZkTV`KL z&mv?cDEb#3#0(Q@RJ!{JAD_J++*e_%cMYAHc1?|gtfu9)8D8(5ntnY_J!&2EL^&S* zmrqVdxFn1Rvbg-Nt>PcAB`=u@LUV@kI3VNO_Ui@liZV_uGq`?ccxzJ7rZAnoOPKys zZ_3_e+filN7W^PJ0D&ZEWW$}h#;KtPh`N%55XIN$Kx;+peeStA-pf|RTA>iQ*|cfX z%yHvRta+_dD_Z^III8=$xVYDw?)j9ehnahCH2OBT4K4>-1OXejXfg+03M{^c5d1yr z0!z>wM@Zmea{nB{aR8K$7koL#>jGoIg}~yWfOr~&GHe#{bG(=GJO`r_UweJeFER2x z2*ZV@83ZhF`rqHTs67XW0Ra)WF+T^!0e{g8q+YcQn#7A(Q9|||0ug5;1_w*7vCo=C z4PLtvaoyQ2qApCwC}DS z>?UC*{jD>jQot?(IhVXz`rMMuqsx_YpkLLnv);i9Q@FVUilYfYb z3*v>)#K*;X%HRatjK%dc8)l9=-F@W@y@Y4(baWLTOLY&QsCpSLC(O?9WF<=XwsJ_q zR1`L}LOtZBx)H{Mo2E4@JF2MBSlU8pBmCJZV8Cv4m!?Nb+HXmcI$goLXOMfRkFUXU@CFx*0+(EUnTm|1;OFi$m>E!j*$L8r( z5x7$}z-f5yw-k%_*y-)!u|1!s(dPGO;hy3_MS)#k%rg+U?H%4@dBfJZ)tY$Z(M#(T zd?~0fY(%bkpX8-elZ`YQ{Z`$zUx9N#m;leQS~hL;&d)=xhD^h zuG4id)WG@3Q+l1-#ZWvamO3d8yui5lj*V&g_Iku*S13rIQpdkxqW)0Ss516oQ-BgFQr8e{0P_&Pr zK3|hWztjjjs~E1sfoX}W)-s~AuBOzSb7$-%0yEmuE?Rs%-PPfosei*W_bno?uP5`q zQ@!Y>e)fVnp-~!UbZHw}n*QXpK%kLiu&0Ma1?DlL@@bD`S8dr$Q$BacYliHeZPNH7 z5)466KHGJo(&^bZI>2P^hFbA5><#-A0);}p5a+U$v4ekooNvk`!PPNlDf4tz(|!Ch zD-YvbwU@J?%nk8b6o0l*gHMXh87t@#9V+FMEH9bA$YwHDomlfE+V;o9WCT>@;dEs@ zTolr%6QtCY!@KA@=8OjN-baTnbLq~wzQmSOX1>LpM2Q@&Z5vabTBn`t+DbGlaI$L~ z&11F-m(xt0oQT;WqGl#70bV7;-0iZGD33YF>Rs&^Z;vi&cz<6X!ChMdTOXNb7jK}| zVjZ^}@1DPrsu})+IlbESl25gLH}cy(ZQJ{a@lZFv^SPC~i~rY@U%`?Mt@>Mm2H%!7CK zYY^O4`cWj>eGG1zR$rEzoAIyl$tGbjn-qNK=?t`a4n7(qVqQ6l* z9X5V8I!(I0osO&`&=ZCUB6#Badz=rIYz{`XJZpnpRJdTc_gCy;Mtcte!Exw92>SwUiQb1 zZkos9(aB!YtE3c&-rje&S~IjT^9XYC+(i9$dp8%WWf*G!NkF#0f%)e;W^@X9%q_+6 zwTwOCPOmkzA6E=zSTW{VVfI;8@Ur!J0*NHV3V&UuZ1aEpDr7KhNJsagpI~RcjkMqH z=Ho{Dm&3}SWa|0kaXpQCvAw`evIDW4M+$*_$r9e4 zFm~3@n}WgM+PjT!U^j^87IW)@vCno~ugbSj2rudJX5|pR;xh2^j}Fxf>y7GyNnH*- z$8rE?OGSS$5hyI-!^W8 z$r-4h2^{wy(Le+vK&;Z7F#^-xO0%8^?2g?Y`q*(NyM_f%%f}RfJLgY7^+>OjkqKK> zRLZ!AErpy-SopQlNm&MAi|&=qNsFFuaFWeI$iRO~je7}2l>?`)jR)AgM{zkTl8`^@ zU4(U=Hnhl}?4a~hMSfMa9;IAxTiv*e`?yd0+uoaECTX`~);DTFlZidLX5HS;R%81wcqZ;Q;zLiAZ;k6tP9@DKDTk0MC8294zf6hJmudyfZ_ur`} zzg+|KTl_DubP4^Vw_)xoEdX5l9T*ac#}pIxJGBG=K-%9?A!~CVP{}0`JYv)yz5_|< zcz{D1$n%Mu0N{%6cXQ(J)RONU6{x4*zg9ykb$!j^K0N;GF=C3-sYLXvSk!wo9oP51 zEeWG!|4u3SP9-@emwcN+tUacoO?~Lh&=3H|w}~NwRAidScV~x}0pRTK;+CJOB;P3{ z003P1{(m?}0ntTG4z48t_Uqe3LYc_ycTF~)c?kgj{4D+9U=hrdUAh!abV%_#XaIn+2>Y@7fTUe?9$*$y5g$m z_1o=}B+N$q8_eJCr#LMm1g`vow%055*RG_|WIn8TPbzh&d4G48hq9-JCmF0DH4;67 zqaDW7%*Hwuh?{mwT_!6!gZ+NLkYZ@zJum|xpntxf-@SmhkqJRY7|r1;4_7vd^

From 902368a06b915b860236cfc97ff885b2aceae256 Mon Sep 17 00:00:00 2001 From: Charles Duffy Date: Wed, 5 Feb 2025 19:52:31 -0600 Subject: [PATCH 33/36] metal : avoid breaking build when metal API predates TARGET_OS_VISION (#11690) Avoids breakage in nix flake build introduced by b0569130c5e9c671152c913d82803b7c2f014ff9 --- ggml/src/ggml-metal/ggml-metal.m | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m index c63dbad20..944d90af3 100644 --- a/ggml/src/ggml-metal/ggml-metal.m +++ b/ggml/src/ggml-metal/ggml-metal.m @@ -19,6 +19,10 @@ // max number of MTLCommandBuffer used to submit a graph for processing #define GGML_METAL_MAX_COMMAND_BUFFERS 8 +#ifndef TARGET_OS_VISION +#define TARGET_OS_VISION 0 +#endif + // create residency sets only on macOS >= 15.0 #if TARGET_OS_OSX && __MAC_OS_X_VERSION_MAX_ALLOWED >= 150000 || \ TARGET_OS_IOS && __IPHONE_OS_VERSION_MAX_ALLOWED >= 180000 || \ From 1b598b30581bad59e5af86c94362f9a30f261fac Mon Sep 17 00:00:00 2001 From: Jeff Bolz Date: Thu, 6 Feb 2025 00:02:18 -0600 Subject: [PATCH 34/36] vulkan: use smaller combined allocations to avoid fragmentation (#11551) --- ggml/src/ggml-alloc.c | 14 +------------- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 19 +++++++++++++++++-- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/ggml/src/ggml-alloc.c b/ggml/src/ggml-alloc.c index 9a3bf9f29..7244a9cbb 100644 --- a/ggml/src/ggml-alloc.c +++ b/ggml/src/ggml-alloc.c @@ -989,19 +989,7 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte this_size = GGML_PAD(ggml_backend_buft_get_alloc_size(buft, t), alignment); } - if (this_size > max_size) { - GGML_LOG_ERROR("%s: tensor %s is too large to fit in a %s buffer (tensor size: %zu, max buffer size: %zu)\n", - __func__, t->name, - ggml_backend_buft_name(buft), - this_size, max_size); - for (size_t i = 0; i < n_buffers; i++) { - ggml_backend_buffer_free(buffers[i]); - } - free(buffers); - return NULL; - } - - if ((cur_buf_size + this_size) > max_size) { + if (cur_buf_size > 0 && (cur_buf_size + this_size) > max_size) { // allocate tensors in the current buffer if (!alloc_tensor_range(ctx, first, t, buft, cur_buf_size, &buffers, &n_buffers)) { return NULL; diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 48ac489a6..2e1bcf691 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -156,6 +156,7 @@ struct vk_device_struct { vk::PhysicalDeviceProperties properties; std::string name; uint64_t max_memory_allocation_size; + uint64_t suballocation_block_size; bool fp16; bool pipeline_robustness; vk::Device device; @@ -2269,6 +2270,7 @@ static vk_device ggml_vk_get_device(size_t idx) { device->physical_device.getProperties2(&props2); device->properties = props2.properties; + device->vendor_id = device->properties.vendorID; const char* GGML_VK_FORCE_MAX_ALLOCATION_SIZE = getenv("GGML_VK_FORCE_MAX_ALLOCATION_SIZE"); @@ -2280,7 +2282,20 @@ static vk_device ggml_vk_get_device(size_t idx) { device->max_memory_allocation_size = props3.maxMemoryAllocationSize; } - device->vendor_id = device->properties.vendorID; + const char* GGML_VK_SUBALLOCATION_BLOCK_SIZE = getenv("GGML_VK_SUBALLOCATION_BLOCK_SIZE"); + + if (GGML_VK_SUBALLOCATION_BLOCK_SIZE != nullptr) { + device->suballocation_block_size = std::stoul(GGML_VK_SUBALLOCATION_BLOCK_SIZE); +#if defined(_WIN32) + } else if (device->vendor_id == VK_VENDOR_ID_NVIDIA) { + // Limit batching of allocations to 1GB by default to avoid fragmentation issues + device->suballocation_block_size = 1024*1024*1024; +#endif + } else { + device->suballocation_block_size = device->max_memory_allocation_size; + } + device->suballocation_block_size = std::min(device->suballocation_block_size, device->max_memory_allocation_size); + device->subgroup_size = subgroup_props.subgroupSize; device->uma = device->properties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu; if (sm_builtins) { @@ -7561,7 +7576,7 @@ static size_t ggml_backend_vk_buffer_type_get_alignment(ggml_backend_buffer_type static size_t ggml_backend_vk_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) { ggml_backend_vk_buffer_type_context * ctx = (ggml_backend_vk_buffer_type_context *) buft->context; - return ctx->device->max_memory_allocation_size; + return ctx->device->suballocation_block_size; } static size_t ggml_backend_vk_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) { From 8a7e3bf17aa5a8412854787746c92a28623a8925 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20O?= Date: Thu, 6 Feb 2025 07:09:59 +0100 Subject: [PATCH 35/36] vulkan: initial support for IQ4_XS quantization (#11501) --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 25 ++++++++++++ .../vulkan-shaders/copy_from_quant.comp | 2 +- .../vulkan-shaders/copy_to_quant.comp | 2 +- .../vulkan-shaders/dequant_funcs.comp | 38 ++++++++++++++++++- .../vulkan-shaders/dequant_funcs_cm2.comp | 23 +++++++++++ .../vulkan-shaders/dequant_iq4_xs.comp | 34 +++++++++++++++++ .../vulkan-shaders/flash_attn_cm2.comp | 2 +- .../vulkan-shaders/get_rows_quant.comp | 2 +- .../vulkan-shaders/mul_mat_vec.comp | 2 +- .../ggml-vulkan/vulkan-shaders/mul_mm.comp | 21 +++++++++- .../vulkan-shaders/mul_mm_cm2.comp | 2 +- .../src/ggml-vulkan/vulkan-shaders/types.comp | 28 +++++++++++--- .../vulkan-shaders/vulkan-shaders-gen.cpp | 1 + 13 files changed, 169 insertions(+), 13 deletions(-) create mode 100644 ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 2e1bcf691..1c99ebe2e 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -1622,6 +1622,7 @@ static void ggml_vk_load_shaders(vk_device& device) { //CREATE_FA(GGML_TYPE_IQ2_S, iq2_s) //CREATE_FA(GGML_TYPE_IQ3_XXS, iq3_xxs) //CREATE_FA(GGML_TYPE_IQ3_S, iq3_s) + //CREATE_FA(GGML_TYPE_IQ4_XS, iq4_xs) CREATE_FA(GGML_TYPE_IQ4_NL, iq4_nl) #undef CREATE_FA @@ -1655,6 +1656,7 @@ static void ggml_vk_load_shaders(vk_device& device) { CREATE_MM(pipeline_dequant_mul_mat_mat_f16[GGML_TYPE_IQ2_S].f16acc, matmul_iq2_s_f16, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3) CREATE_MM(pipeline_dequant_mul_mat_mat_f16[GGML_TYPE_IQ3_XXS].f16acc, matmul_iq3_xxs_f16, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3) CREATE_MM(pipeline_dequant_mul_mat_mat_f16[GGML_TYPE_IQ3_S].f16acc, matmul_iq3_s_f16, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3) + CREATE_MM(pipeline_dequant_mul_mat_mat_f16[GGML_TYPE_IQ4_XS].f16acc, matmul_iq4_xs_f16, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3) CREATE_MM(pipeline_dequant_mul_mat_mat_f16[GGML_TYPE_IQ4_NL].f16acc, matmul_iq4_nl_f16, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3) CREATE_MM2(pipeline_matmul_id_f16, matmul_id_f16, wg_denoms, warptile, vk_mat_mat_id_push_constants, 4) @@ -1673,6 +1675,7 @@ static void ggml_vk_load_shaders(vk_device& device) { CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ2_S].f16acc, matmul_id_iq2_s_f16, , mmqid_wg_denoms, warptile_mmqid, vk_mat_mat_id_push_constants, 4) CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_XXS].f16acc, matmul_id_iq3_xxs_f16, , mmqid_wg_denoms, warptile_mmqid, vk_mat_mat_id_push_constants, 4) CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_S].f16acc, matmul_id_iq3_s_f16, , mmqid_wg_denoms, warptile_mmqid, vk_mat_mat_id_push_constants, 4) + CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_XS].f16acc, matmul_id_iq4_xs_f16, , mmqid_wg_denoms, warptile_mmqid, vk_mat_mat_id_push_constants, 4) CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL].f16acc, matmul_id_iq4_nl_f16, , mmqid_wg_denoms, warptile_mmqid, vk_mat_mat_id_push_constants, 4) #undef CREATE_MM #undef CREATE_MM2 @@ -1726,6 +1729,7 @@ static void ggml_vk_load_shaders(vk_device& device) { CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ2_S].f16acc, matmul_iq2_s_f32, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ3_XXS].f16acc, matmul_iq3_xxs_f32, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ3_S].f16acc, matmul_iq3_s_f32, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); + CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_XS].f16acc, matmul_iq4_xs_f32, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL].f16acc, matmul_iq4_nl_f32, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); } else { CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_0].f16acc, matmul_q4_0_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); @@ -1744,6 +1748,7 @@ static void ggml_vk_load_shaders(vk_device& device) { CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ2_S].f16acc, matmul_iq2_s_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ3_XXS].f16acc, matmul_iq3_xxs_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ3_S].f16acc, matmul_iq3_s_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); + CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_XS].f16acc, matmul_iq4_xs_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL].f16acc, matmul_iq4_nl_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); } @@ -1770,6 +1775,7 @@ static void ggml_vk_load_shaders(vk_device& device) { CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ2_S].f16acc, matmul_id_iq2_s_f32, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_XXS].f16acc, matmul_id_iq3_xxs_f32, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_S].f16acc, matmul_id_iq3_s_f32, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); + CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_XS].f16acc, matmul_id_iq4_xs_f32, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL].f16acc, matmul_id_iq4_nl_f32, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); } else { CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_0].f16acc, matmul_id_q4_0_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); @@ -1788,6 +1794,7 @@ static void ggml_vk_load_shaders(vk_device& device) { CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ2_S].f16acc, matmul_id_iq2_s_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_XXS].f16acc, matmul_id_iq3_xxs_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_S].f16acc, matmul_id_iq3_s_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); + CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_XS].f16acc, matmul_id_iq4_xs_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL].f16acc, matmul_id_iq4_nl_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); } } @@ -1837,6 +1844,7 @@ static void ggml_vk_load_shaders(vk_device& device) { CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ2_S].f16acc, matmul_iq2_s_f32, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ3_XXS].f16acc, matmul_iq3_xxs_f32, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ3_S].f16acc, matmul_iq3_s_f32, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); + CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_XS].f16acc, matmul_iq4_xs_f32, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL].f16acc, matmul_iq4_nl_f32, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); // If there's not enough shared memory for row_ids and the result tile, don't create these pipelines. @@ -1861,6 +1869,7 @@ static void ggml_vk_load_shaders(vk_device& device) { CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ2_S].f16acc, matmul_id_iq2_s_f32, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_XXS].f16acc, matmul_id_iq3_xxs_f32, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_S].f16acc, matmul_id_iq3_s_f32, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); + CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_XS].f16acc, matmul_id_iq4_xs_f32, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL].f16acc, matmul_id_iq4_nl_f32, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); } #undef CREATE_MM2 @@ -1902,6 +1911,7 @@ static void ggml_vk_load_shaders(vk_device& device) { CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ2_S].f32acc, matmul_iq2_s_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ3_XXS].f32acc, matmul_iq3_xxs_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ3_S].f32acc, matmul_iq3_s_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); + CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_XS].f32acc, matmul_iq4_xs_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL].f32acc, matmul_iq4_nl_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); // If there's not enough shared memory for row_ids and the result tile, don't create these pipelines. @@ -1926,6 +1936,7 @@ static void ggml_vk_load_shaders(vk_device& device) { CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ2_S].f32acc, matmul_id_iq2_s_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_XXS].f32acc, matmul_id_iq3_xxs_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_S].f32acc, matmul_id_iq3_s_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); + CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_XS].f32acc, matmul_id_iq4_xs_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL].f32acc, matmul_id_iq4_nl_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); } #undef CREATE_MM @@ -1962,6 +1973,7 @@ static void ggml_vk_load_shaders(vk_device& device) { ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[GGML_TYPE_IQ2_S][i], "mul_mat_vec_iq2_s_f32_f32_"+std::to_string(i+1), mul_mat_vec_iq2_s_f32_f32_len, mul_mat_vec_iq2_s_f32_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq, i+1}, 1, true); ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[GGML_TYPE_IQ3_XXS][i], "mul_mat_vec_iq3_xxs_f32_f32_"+std::to_string(i+1), mul_mat_vec_iq3_xxs_f32_f32_len, mul_mat_vec_iq3_xxs_f32_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq, i+1}, 1, true); ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[GGML_TYPE_IQ3_S][i], "mul_mat_vec_iq3_s_f32_f32_"+std::to_string(i+1), mul_mat_vec_iq3_s_f32_f32_len, mul_mat_vec_iq3_s_f32_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq, i+1}, 1, true); + ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[GGML_TYPE_IQ4_XS][i], "mul_mat_vec_iq4_xs_f32_f32_"+std::to_string(i+1), mul_mat_vec_iq4_xs_f32_f32_len, mul_mat_vec_iq4_xs_f32_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq, i+1}, 1, true); ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[GGML_TYPE_IQ4_NL][i], "mul_mat_vec_iq4_nl_f32_f32_"+std::to_string(i+1), mul_mat_vec_iq4_nl_f32_f32_len, mul_mat_vec_iq4_nl_f32_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {2*rm_stdq, 1, 1}, {subgroup_size_16, 2*rm_stdq, i+1}, 1, true); ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_F32 ][i], "mul_mat_vec_f32_f16_f32_"+std::to_string(i+1), mul_mat_vec_f32_f16_f32_len, mul_mat_vec_f32_f16_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {2, 1, 1}, {device->subgroup_size, 2, i+1}, 1); @@ -1981,6 +1993,7 @@ static void ggml_vk_load_shaders(vk_device& device) { ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_IQ2_S][i], "mul_mat_vec_iq2_s_f16_f32_"+std::to_string(i+1), mul_mat_vec_iq2_s_f16_f32_len, mul_mat_vec_iq2_s_f16_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq, i+1}, 1, true); ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_IQ3_XXS][i], "mul_mat_vec_iq3_xxs_f16_f32_"+std::to_string(i+1), mul_mat_vec_iq3_xxs_f16_f32_len, mul_mat_vec_iq3_xxs_f16_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq, i+1}, 1, true); ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_IQ3_S][i], "mul_mat_vec_iq3_s_f16_f32_"+std::to_string(i+1), mul_mat_vec_iq3_s_f16_f32_len, mul_mat_vec_iq3_s_f16_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq, i+1}, 1, true); + ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_IQ4_XS][i], "mul_mat_vec_iq4_xs_f16_f32_"+std::to_string(i+1), mul_mat_vec_iq4_xs_f16_f32_len, mul_mat_vec_iq4_xs_f16_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq, i+1}, 1, true); ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_IQ4_NL][i], "mul_mat_vec_iq4_nl_f16_f32_"+std::to_string(i+1), mul_mat_vec_iq4_nl_f16_f32_len, mul_mat_vec_iq4_nl_f16_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {2*rm_stdq, 1, 1}, {subgroup_size_16, 2*rm_stdq, i+1}, 1, true); } @@ -2001,6 +2014,7 @@ static void ggml_vk_load_shaders(vk_device& device) { ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[GGML_TYPE_IQ2_S], "mul_mat_vec_id_iq2_s_f32", mul_mat_vec_id_iq2_s_f32_len, mul_mat_vec_id_iq2_s_f32_data, "main", 4, sizeof(vk_mat_vec_id_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq}, 1, true); ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[GGML_TYPE_IQ3_XXS], "mul_mat_vec_id_iq3_xxs_f32", mul_mat_vec_id_iq3_xxs_f32_len, mul_mat_vec_id_iq3_xxs_f32_data, "main", 4, sizeof(vk_mat_vec_id_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq}, 1, true); ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[GGML_TYPE_IQ3_S], "mul_mat_vec_id_iq3_s_f32", mul_mat_vec_id_iq3_s_f32_len, mul_mat_vec_id_iq3_s_f32_data, "main", 4, sizeof(vk_mat_vec_id_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq}, 1, true); + ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[GGML_TYPE_IQ4_XS], "mul_mat_vec_id_iq4_xs_f32", mul_mat_vec_id_iq4_xs_f32_len, mul_mat_vec_id_iq4_xs_f32_data, "main", 4, sizeof(vk_mat_vec_id_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq}, 1, true); ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[GGML_TYPE_IQ4_NL], "mul_mat_vec_id_iq4_nl_f32", mul_mat_vec_id_iq4_nl_f32_len, mul_mat_vec_id_iq4_nl_f32_data, "main", 4, sizeof(vk_mat_vec_id_push_constants), {2*rm_stdq, 1, 1}, {subgroup_size_16, 2*rm_stdq}, 1, true); // dequant shaders @@ -2020,6 +2034,7 @@ static void ggml_vk_load_shaders(vk_device& device) { ggml_vk_create_pipeline(device, device->pipeline_dequant[GGML_TYPE_IQ2_S], "dequant_iq2_s", dequant_iq2_s_len, dequant_iq2_s_data, "main", 2, 5 * sizeof(uint32_t), {256 * 32, 1, 1}, {}, 1); ggml_vk_create_pipeline(device, device->pipeline_dequant[GGML_TYPE_IQ3_XXS], "dequant_iq3_xxs", dequant_iq3_xxs_len, dequant_iq3_xxs_data, "main", 2, 5 * sizeof(uint32_t), {256 * 32, 1, 1}, {}, 1); ggml_vk_create_pipeline(device, device->pipeline_dequant[GGML_TYPE_IQ3_S], "dequant_iq3_s", dequant_iq3_s_len, dequant_iq3_s_data, "main", 2, 5 * sizeof(uint32_t), {256 * 32, 1, 1}, {}, 1); + ggml_vk_create_pipeline(device, device->pipeline_dequant[GGML_TYPE_IQ4_XS], "dequant_iq4_xs", dequant_iq4_xs_len, dequant_iq4_xs_data, "main", 2, 5 * sizeof(uint32_t), {256 * 32, 1, 1}, {}, 1); ggml_vk_create_pipeline(device, device->pipeline_dequant[GGML_TYPE_IQ4_NL], "dequant_iq4_nl", dequant_iq4_nl_len, dequant_iq4_nl_data, "main", 2, 5 * sizeof(uint32_t), {256 * 16, 1, 1}, {}, 1); // get_rows @@ -2035,6 +2050,7 @@ static void ggml_vk_load_shaders(vk_device& device) { ggml_vk_create_pipeline(device, device->pipeline_get_rows[GGML_TYPE_IQ2_S], "get_rows_iq2_s", get_rows_iq2_s_len, get_rows_iq2_s_data, "main", 3, sizeof(vk_op_binary_push_constants), {1024, 1, 1}, {}, 1); ggml_vk_create_pipeline(device, device->pipeline_get_rows[GGML_TYPE_IQ3_XXS], "get_rows_iq3_xxs", get_rows_iq3_xxs_len, get_rows_iq3_xxs_data, "main", 3, sizeof(vk_op_binary_push_constants), {1024, 1, 1}, {}, 1); ggml_vk_create_pipeline(device, device->pipeline_get_rows[GGML_TYPE_IQ3_S], "get_rows_iq3_s", get_rows_iq3_s_len, get_rows_iq3_s_data, "main", 3, sizeof(vk_op_binary_push_constants), {1024, 1, 1}, {}, 1); + ggml_vk_create_pipeline(device, device->pipeline_get_rows[GGML_TYPE_IQ4_XS], "get_rows_iq4_xs", get_rows_iq4_xs_len, get_rows_iq4_xs_data, "main", 3, sizeof(vk_op_binary_push_constants), {1024, 1, 1}, {}, 1); ggml_vk_create_pipeline(device, device->pipeline_get_rows[GGML_TYPE_IQ4_NL], "get_rows_iq4_nl", get_rows_iq4_nl_len, get_rows_iq4_nl_data, "main", 3, sizeof(vk_op_binary_push_constants), {1024, 1, 1}, {}, 1); ggml_vk_create_pipeline(device, device->pipeline_get_rows_f32[GGML_TYPE_F32 ], "get_rows_f32_f32", get_rows_f32_f32_len, get_rows_f32_f32_data, "main", 3, sizeof(vk_op_binary_push_constants), { 512, 1, 1}, {}, 1); @@ -2049,6 +2065,7 @@ static void ggml_vk_load_shaders(vk_device& device) { ggml_vk_create_pipeline(device, device->pipeline_get_rows_f32[GGML_TYPE_IQ2_S], "get_rows_iq2_s_f32", get_rows_iq2_s_f32_len, get_rows_iq2_s_f32_data, "main", 3, sizeof(vk_op_binary_push_constants), {1024, 1, 1}, {}, 1); ggml_vk_create_pipeline(device, device->pipeline_get_rows_f32[GGML_TYPE_IQ3_XXS], "get_rows_iq3_xxs_f32", get_rows_iq3_xxs_f32_len, get_rows_iq3_xxs_f32_data, "main", 3, sizeof(vk_op_binary_push_constants), {1024, 1, 1}, {}, 1); ggml_vk_create_pipeline(device, device->pipeline_get_rows_f32[GGML_TYPE_IQ3_S], "get_rows_iq3_s_f32", get_rows_iq3_s_f32_len, get_rows_iq3_s_f32_data, "main", 3, sizeof(vk_op_binary_push_constants), {1024, 1, 1}, {}, 1); + ggml_vk_create_pipeline(device, device->pipeline_get_rows_f32[GGML_TYPE_IQ4_XS], "get_rows_iq4_xs_f32", get_rows_iq4_xs_f32_len, get_rows_iq4_xs_f32_data, "main", 3, sizeof(vk_op_binary_push_constants), {1024, 1, 1}, {}, 1); ggml_vk_create_pipeline(device, device->pipeline_get_rows_f32[GGML_TYPE_IQ4_NL], "get_rows_iq4_nl_f32", get_rows_iq4_nl_f32_len, get_rows_iq4_nl_f32_data, "main", 3, sizeof(vk_op_binary_push_constants), {1024, 1, 1}, {}, 1); ggml_vk_create_pipeline(device, device->pipeline_matmul_split_k_reduce, "split_k_reduce", split_k_reduce_len, split_k_reduce_data, "main", 2, 2 * sizeof(uint32_t), {256 * 4, 1, 1}, {}, 1); @@ -2995,6 +3012,7 @@ static vk_pipeline ggml_vk_get_to_fp16(ggml_backend_vk_context * ctx, ggml_type case GGML_TYPE_IQ2_S: case GGML_TYPE_IQ3_XXS: case GGML_TYPE_IQ3_S: + case GGML_TYPE_IQ4_XS: case GGML_TYPE_IQ4_NL: break; default: @@ -3048,6 +3066,7 @@ static vk_matmul_pipeline ggml_vk_get_mul_mat_mat_pipeline(ggml_backend_vk_conte case GGML_TYPE_IQ2_S: case GGML_TYPE_IQ3_XXS: case GGML_TYPE_IQ3_S: + case GGML_TYPE_IQ4_XS: case GGML_TYPE_IQ4_NL: break; default: @@ -3084,6 +3103,7 @@ static vk_pipeline ggml_vk_get_dequantize_mul_mat_vec(ggml_backend_vk_context * case GGML_TYPE_IQ2_S: case GGML_TYPE_IQ3_XXS: case GGML_TYPE_IQ3_S: + case GGML_TYPE_IQ4_XS: case GGML_TYPE_IQ4_NL: break; default: @@ -3132,6 +3152,7 @@ static vk_matmul_pipeline ggml_vk_get_mul_mat_mat_id_pipeline(ggml_backend_vk_co case GGML_TYPE_IQ2_S: case GGML_TYPE_IQ3_XXS: case GGML_TYPE_IQ3_S: + case GGML_TYPE_IQ4_XS: case GGML_TYPE_IQ4_NL: break; default: @@ -3163,6 +3184,7 @@ static vk_pipeline ggml_vk_get_dequantize_mul_mat_vec_id(ggml_backend_vk_context case GGML_TYPE_IQ2_S: case GGML_TYPE_IQ3_XXS: case GGML_TYPE_IQ3_S: + case GGML_TYPE_IQ4_XS: case GGML_TYPE_IQ4_NL: break; default: @@ -8037,6 +8059,7 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm case GGML_TYPE_IQ2_S: case GGML_TYPE_IQ3_XXS: case GGML_TYPE_IQ3_S: + case GGML_TYPE_IQ4_XS: case GGML_TYPE_IQ4_NL: break; default: @@ -8110,6 +8133,7 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm //case GGML_TYPE_IQ2_S: //case GGML_TYPE_IQ3_XXS: //case GGML_TYPE_IQ3_S: + //case GGML_TYPE_IQ4_XS: case GGML_TYPE_IQ4_NL: break; default: @@ -8132,6 +8156,7 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm case GGML_TYPE_IQ2_S: case GGML_TYPE_IQ3_XXS: case GGML_TYPE_IQ3_S: + case GGML_TYPE_IQ4_XS: case GGML_TYPE_IQ4_NL: return true; default: diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp b/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp index aeae5400d..9c9fe9626 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp @@ -12,7 +12,7 @@ layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; #endif void main() { -#if defined(DATA_A_IQ2_XXS) || defined(DATA_A_IQ2_XS) || defined(DATA_A_IQ2_S) || defined(DATA_A_IQ3_XXS) || defined(DATA_A_IQ3_S) || defined(DATA_A_IQ4_NL) +#if defined(DATA_A_IQ2_XXS) || defined(DATA_A_IQ2_XS) || defined(DATA_A_IQ2_S) || defined(DATA_A_IQ3_XXS) || defined(DATA_A_IQ3_S) || defined(DATA_A_IQ4_XS) || defined(DATA_A_IQ4_NL) init_iq_shmem(gl_WorkGroupSize); if (gl_LocalInvocationIndex.x != 0) { return; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp b/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp index d4b068e61..660811086 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp @@ -217,7 +217,7 @@ void quantize(uint dst_idx, uint src_idx) #endif void main() { -#if defined(DATA_A_IQ2_XXS) || defined(DATA_A_IQ2_XS) || defined(DATA_A_IQ2_S) || defined(DATA_A_IQ3_XXS) || defined(DATA_A_IQ3_S) || defined(DATA_A_IQ4_NL) +#if defined(DATA_A_IQ2_XXS) || defined(DATA_A_IQ2_XS) || defined(DATA_A_IQ2_S) || defined(DATA_A_IQ3_XXS) || defined(DATA_A_IQ3_S) || defined(DATA_A_IQ4_XS) || defined(DATA_A_IQ4_NL) init_iq_shmem(gl_WorkGroupSize); if (gl_LocalInvocationIndex.x != 0) { return; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp index ee6877531..ecfdbfaa8 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp @@ -304,6 +304,42 @@ vec4 dequantize4(uint ib, uint iqs, uint a_offset) { } #endif +#if defined(DATA_A_IQ4_XS) +vec2 dequantize(uint ib, uint iqs, uint a_offset) { + const uint ib32 = iqs / 32; + const uint iq = 16 * ib32 + (iqs % 16); + + const uint sl = (data_a[a_offset + ib].scales_l[ib32/2] >> (4 * (ib32 & 1))) & 0xF; + const uint sh = (data_a[a_offset + ib].scales_h >> (2 * ib32)) & 3; + const uint qshift = (iqs & 16) >> 2; + u8vec2 qs = u8vec2(data_a[a_offset + ib].qs[iq], data_a[a_offset + ib].qs[iq + 1]); + qs = (qs >> qshift) & uint8_t(0xF); + + const float dl = float(int(sl | (sh << 4)) - 32); + return dl * vec2(kvalues_iq4nl[qs.x], kvalues_iq4nl[qs.y]); +} +vec4 dequantize4(uint ib, uint iqs, uint a_offset) { + const uint ib32 = iqs / 32; + const uint iq = 16 * ib32 + (iqs % 16); + + const uint sl = (data_a[a_offset + ib].scales_l[ib32/2] >> (4 * (ib32 & 1))) & 0xF; + const uint sh = (data_a[a_offset + ib].scales_h >> (2 * ib32)) & 3; + const uint qshift = (iqs & 16) >> 2; + u8vec4 qs = u8vec4( + data_a[a_offset + ib].qs[iq + 0], + data_a[a_offset + ib].qs[iq + 1], + data_a[a_offset + ib].qs[iq + 2], + data_a[a_offset + ib].qs[iq + 3] + ); + qs = (qs >> qshift) & uint8_t(0xF); + + const float dl = float(int(sl | (sh << 4)) - 32); + return dl * vec4( + kvalues_iq4nl[qs.x], kvalues_iq4nl[qs.y], + kvalues_iq4nl[qs.z], kvalues_iq4nl[qs.w]); +} +#endif + #if defined(DATA_A_IQ4_NL) vec2 dequantize(uint ib, uint iqs, uint a_offset) { const uint vui = uint(data_a[a_offset + ib].qs[iqs]); @@ -321,7 +357,7 @@ vec2 get_dm(uint ib, uint a_offset) { } #endif -#if defined(DATA_A_Q4_0) || defined(DATA_A_Q5_0) || defined(DATA_A_Q8_0) || defined(DATA_A_IQ2_XXS) || defined(DATA_A_IQ2_XS) || defined(DATA_A_IQ2_S) || defined(DATA_A_IQ3_XXS) || defined(DATA_A_IQ3_S) || defined(DATA_A_IQ4_NL) +#if defined(DATA_A_Q4_0) || defined(DATA_A_Q5_0) || defined(DATA_A_Q8_0) || defined(DATA_A_IQ2_XXS) || defined(DATA_A_IQ2_XS) || defined(DATA_A_IQ2_S) || defined(DATA_A_IQ3_XXS) || defined(DATA_A_IQ3_S) || defined(DATA_A_IQ4_XS) || defined(DATA_A_IQ4_NL) vec2 get_dm(uint ib, uint a_offset) { return vec2(float(data_a[a_offset + ib].d), 0); } diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp index 974efd3f9..78c3bddf2 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp @@ -454,6 +454,27 @@ float16_t dequantFuncIQ3_S(const in decodeBufIQ3_S bl, const in uint blockCoords } #endif +#if defined(DATA_A_IQ4_XS) +layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufIQ4_XS { + block_iq4_xs block; +}; + +float16_t dequantFuncIQ4_XS(const in decodeBufIQ4_XS bl, const in uint blockCoords[2], const in uint coordInBlock[2]) +{ + const float16_t d = bl.block.d; + const uint idx = coordInBlock[1]; + + const uint ib32 = (idx & 0xE0) >> 5; // 0..7 + + const uint sl = (bl.block.scales_l[ib32/2] >> (4 * (ib32 & 1))) & 0xF; + const uint sh = ((bl.block.scales_h) >> (2 * ib32)) & 3; + const uint qshift = (idx & 16) >> 2; + const uint q = (bl.block.qs[16 * ib32 + (idx % 16)] >> qshift) & 0xF; + + float16_t ret = d * float16_t(int(sl | (sh << 4)) - 32) * float16_t(kvalues_iq4nl[q]); + return ret; +} +#endif #if defined(DATA_A_IQ4_NL) layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufIQ4_NL { @@ -504,6 +525,8 @@ float16_t dequantFuncIQ4_NL(const in decodeBufIQ4_NL bl, const in uint blockCoor #define dequantFuncA dequantFuncIQ3_XXS #elif defined(DATA_A_IQ3_S) #define dequantFuncA dequantFuncIQ3_S +#elif defined(DATA_A_IQ4_XS) +#define dequantFuncA dequantFuncIQ4_XS #elif defined(DATA_A_IQ4_NL) #define dequantFuncA dequantFuncIQ4_NL #endif diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp new file mode 100644 index 000000000..f930852a4 --- /dev/null +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp @@ -0,0 +1,34 @@ +#version 450 + +#include "dequant_head.comp" + +layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; + +layout (binding = 0) readonly buffer A {block_iq4_xs data_a[];}; +layout (binding = 1) writeonly buffer D {D_TYPE data_b[];}; + +void main() { + // Each thread handles 1 subblock (1 scale and 32 quantized values) + const uint ib = gl_WorkGroupID.x * 32 + gl_LocalInvocationID.x / 8; + + init_iq_shmem(gl_WorkGroupSize); + + if (ib >= p.nel / 256) { + return; + } + + const uint ib32 = gl_LocalInvocationID.x % 8; + + const float d = float(data_a[ib].d); + // Scales are 6 bits + const uint scale = ((data_a[ib].scales_l[ib32/2] >> (4 * (ib32 & 1))) & 0xF) + | (((data_a[ib].scales_h >> (2 * ib32)) & 3) << 4); + const float dl = d * (int(scale) - 32); + + const uint b_idx = 256 * ib + 32 * ib32; + const uint q_idx = 16 * ib32; + [[unroll]] for (uint l = 0; l < 16; ++l) { + data_b[b_idx + l + 0] = D_TYPE(dl * kvalues_iq4nl[data_a[ib].qs[q_idx + l] & 0xF]); + data_b[b_idx + l + 16] = D_TYPE(dl * kvalues_iq4nl[data_a[ib].qs[q_idx + l] >> 4]); + } +} diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp b/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp index 043a53023..ba88ce79a 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp @@ -104,7 +104,7 @@ ACC_TYPE Max(const in uint32_t row, const in uint32_t col, const in ACC_TYPE ele #endif void main() { -#if defined(DATA_A_IQ2_XXS) || defined(DATA_A_IQ2_XS) || defined(DATA_A_IQ2_S) || defined(DATA_A_IQ3_XXS) || defined(DATA_A_IQ3_S) || defined(DATA_A_IQ4_NL) +#if defined(DATA_A_IQ2_XXS) || defined(DATA_A_IQ2_XS) || defined(DATA_A_IQ2_S) || defined(DATA_A_IQ3_XXS) || defined(DATA_A_IQ3_S) || defined(DATA_A_IQ4_XS) || defined(DATA_A_IQ4_NL) init_iq_shmem(gl_WorkGroupSize); #endif diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp b/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp index 09dc43d8d..c16a2a9f6 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp @@ -12,7 +12,7 @@ void main() { const uint i11 = (gl_GlobalInvocationID.z)/p.ne12; const uint i12 = (gl_GlobalInvocationID.z)%p.ne12; -#if defined(DATA_A_IQ2_XXS) || defined(DATA_A_IQ2_XS) || defined(DATA_A_IQ2_S) || defined(DATA_A_IQ3_XXS) || defined(DATA_A_IQ3_S) || defined(DATA_A_IQ4_NL) +#if defined(DATA_A_IQ2_XXS) || defined(DATA_A_IQ2_XS) || defined(DATA_A_IQ2_S) || defined(DATA_A_IQ3_XXS) || defined(DATA_A_IQ3_S) || defined(DATA_A_IQ4_XS) || defined(DATA_A_IQ4_NL) init_iq_shmem(gl_WorkGroupSize); #endif diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp index 48156e7ba..d7e99727d 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp @@ -133,7 +133,7 @@ void compute_outputs(const uint32_t first_row, const uint32_t num_rows) { void main() { const uint first_row = NUM_ROWS * (gl_WorkGroupID.x + gl_NumWorkGroups.x * gl_WorkGroupID.z); -#if defined(DATA_A_IQ2_XXS) || defined(DATA_A_IQ2_XS) || defined(DATA_A_IQ2_S) || defined(DATA_A_IQ3_XXS) || defined(DATA_A_IQ3_S) || defined(DATA_A_IQ4_NL) +#if defined(DATA_A_IQ2_XXS) || defined(DATA_A_IQ2_XS) || defined(DATA_A_IQ2_S) || defined(DATA_A_IQ3_XXS) || defined(DATA_A_IQ3_S) || defined(DATA_A_IQ4_XS) || defined(DATA_A_IQ4_NL) init_iq_shmem(gl_WorkGroupSize); #endif diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp index d0559aac8..33b2234e7 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp @@ -95,7 +95,7 @@ shared ACC_TYPE coopmat_stage[TM * TN * NUM_WARPS]; #endif void main() { -#if defined(DATA_A_IQ2_XXS) || defined(DATA_A_IQ2_XS) || defined(DATA_A_IQ2_S) || defined(DATA_A_IQ3_XXS) || defined(DATA_A_IQ3_S) || defined(DATA_A_IQ4_NL) +#if defined(DATA_A_IQ2_XXS) || defined(DATA_A_IQ2_XS) || defined(DATA_A_IQ2_S) || defined(DATA_A_IQ3_XXS) || defined(DATA_A_IQ3_S) || defined(DATA_A_IQ4_XS) || defined(DATA_A_IQ4_NL) init_iq_shmem(gl_WorkGroupSize); #endif @@ -547,6 +547,25 @@ void main() { const uint32_t grid = iq3s_grid[qs | ((qh << (8 - (iqs % 8))) & 256)] >> (16 * (idx % 2)); const vec2 v = db * vec2(sign01) * vec2(unpack8(grid).xy); + buf_a[buf_idx ] = FLOAT_TYPE(v.x); + buf_a[buf_idx + 1] = FLOAT_TYPE(v.y); +#elif defined(DATA_A_IQ4_XS) + const uint idx = pos_a + (loadc_a + l) * p.stride_a / LOAD_VEC_A + loadr_a; + const uint buf_idx = (loadc_a + l) * SHMEM_STRIDE + loadr_a * LOAD_VEC_A; + + const uint ib = idx / 128; // 2 values per idx + const uint ib32 = (idx % 128) / 16; // 0..7 + const uint iq = 16 * ib32 + 2 * (idx % 8); + + const uint sl = (data_a[ib].scales_l[ib32/2] >> (4 * (ib32 & 1))) & 0xF; + const uint sh = ((data_a[ib].scales_h) >> (2 * ib32)) & 3; + const uint qshift = (idx & 8) >> 1; + u8vec2 qs = u8vec2(data_a[ib].qs[iq], data_a[ib].qs[iq + 1]); + qs = (qs >> qshift) & uint8_t(0xF); + + const float d = float(data_a[ib].d); + const vec2 v = d * float(int(sl | (sh << 4)) - 32) * vec2(kvalues_iq4nl[qs.x], kvalues_iq4nl[qs.y]); + buf_a[buf_idx ] = FLOAT_TYPE(v.x); buf_a[buf_idx + 1] = FLOAT_TYPE(v.y); #elif defined(DATA_A_IQ4_NL) diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp index 27c5d68b3..7e29bbfec 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp @@ -106,7 +106,7 @@ D_TYPE perElemOpD(const in uint32_t r, const in uint32_t c, const in D_TYPE elem #endif void main() { -#if defined(DATA_A_IQ2_XXS) || defined(DATA_A_IQ2_XS) || defined(DATA_A_IQ2_S) || defined(DATA_A_IQ3_XXS) || defined(DATA_A_IQ3_S) || defined(DATA_A_IQ4_NL) +#if defined(DATA_A_IQ2_XXS) || defined(DATA_A_IQ2_XS) || defined(DATA_A_IQ2_S) || defined(DATA_A_IQ3_XXS) || defined(DATA_A_IQ3_S) || defined(DATA_A_IQ4_XS) || defined(DATA_A_IQ4_NL) init_iq_shmem(gl_WorkGroupSize); #endif diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/types.comp b/ggml/src/ggml-vulkan/vulkan-shaders/types.comp index 9e56a3530..db643a54c 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/types.comp @@ -1026,6 +1026,23 @@ void init_iq_shmem(uvec3 wgsize) #define A_TYPE_PACKED16 block_iq3_s_packed16 #endif +#define QUANT_K_IQ4_XS 256 +#define QUANT_R_IQ4_XS 1 + +struct block_iq4_xs +{ + float16_t d; + uint16_t scales_h; + uint8_t scales_l[QUANT_K_IQ4_XS/64]; + uint8_t qs[QUANT_K_IQ4_XS/2]; +}; + +#if defined(DATA_A_IQ4_XS) +#define QUANT_K QUANT_K_IQ4_XS +#define QUANT_R QUANT_R_IQ4_XS +#define A_TYPE block_iq4_xs +#endif + #define QUANT_K_IQ4_NL 32 #define QUANT_R_IQ4_NL 2 @@ -1042,7 +1059,13 @@ struct block_iq4_nl_packed16 }; #if defined(DATA_A_IQ4_NL) +#define QUANT_K QUANT_K_IQ4_NL +#define QUANT_R QUANT_R_IQ4_NL +#define A_TYPE block_iq4_nl +#define A_TYPE_PACKED16 block_iq4_nl_packed16 +#endif +#if defined(DATA_A_IQ4_NL) || defined(DATA_A_IQ4_XS) const int8_t kvalues_iq4nl_const[16] = { int8_t(-127), int8_t(-104), int8_t(-83), int8_t(-65), int8_t(-49), int8_t(-35), int8_t(-22), int8_t(-10), int8_t(1), int8_t(13), int8_t(25), int8_t(38), int8_t(53), int8_t(69), int8_t(89), int8_t(113) @@ -1058,11 +1081,6 @@ void init_iq_shmem(uvec3 wgsize) } barrier(); } - -#define QUANT_K QUANT_K_IQ4_NL -#define QUANT_R QUANT_R_IQ4_NL -#define A_TYPE block_iq4_nl -#define A_TYPE_PACKED16 block_iq4_nl_packed16 #endif #endif // !defined(GGML_TYPES_COMP) diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp index 93ddbfadc..77e7e1148 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp @@ -60,6 +60,7 @@ const std::vector type_names = { "iq2_s", "iq3_xxs", "iq3_s", + "iq4_xs", "iq4_nl" }; From 2c6c8df56d8a3edd657b9a295e95d469a37f0044 Mon Sep 17 00:00:00 2001 From: Jeff Bolz Date: Thu, 6 Feb 2025 00:15:30 -0600 Subject: [PATCH 36/36] vulkan: optimize coopmat2 iq2/iq3 callbacks (#11521) * vulkan: optimize coopmat2 iq2/iq3 callbacks * build: trigger CI on GLSL compute shader changes --- .github/workflows/build.yml | 4 +- .../vulkan-shaders/dequant_funcs_cm2.comp | 79 +++++++++---------- 2 files changed, 40 insertions(+), 43 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8f9c82f87..6841ba589 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -10,10 +10,10 @@ on: push: branches: - master - paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal'] + paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp'] pull_request: types: [opened, synchronize, reopened] - paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal'] + paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp'] concurrency: group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp index 78c3bddf2..0eba37420 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp @@ -323,15 +323,16 @@ float16_t dequantFuncIQ2_XXS(const in decodeBufIQ2_XXS bl, const in uint blockCo const uint8_t qs = bl.block.qs[iqs]; const uint signscale = pack32(u16vec2(bl16.block.qs[4*ib32+2], bl16.block.qs[4*ib32+3])); - const float16_t dscale = bl.block.d * 0.25hf * (0.5hf + float16_t(signscale >> 28)); + const float dscale = float(bl.block.d) * 0.25 * (0.5 + float(signscale >> 28)); uint sign = bitfieldExtract(signscale, 7 * int(ib8), 7); sign |= bitCount(sign) << 7; - const uint8_t g = unpack8(iq2xxs_grid[qs][(idx & 4) >> 2])[idx & 3]; + uint g2 = iq2xxs_grid[qs][(idx & 4) >> 2]; + g2 >>= (idx & 2) * 8; + const vec2 g = vec2(unpack8(g2)); - float16_t ret = dscale * float16_t(g) * ((sign & (1 << (idx & 7))) != 0 ? -1.0hf : 1.0hf); - - return ret; + vec2 ret = dscale * g * ((sign & (1 << (idx & 7))) != 0 ? -1.0hf : 1.0hf); + return float16_t(ret[idx & 1]); } #endif @@ -350,14 +351,16 @@ float16_t dequantFuncIQ2_XS(const in decodeBufIQ2_XS bl, const in uint blockCoor const uint iqs = (idx & 0xF8) >> 3; // 0..63 const uint16_t qs = bl.block.qs[iqs]; - const float16_t dscale = bl.block.d * 0.25hf * (0.5hf + float16_t((bl.block.scales[is] >> sshift) & 0xF)); + const float dscale = float(bl.block.d) * 0.25 * (0.5 + float((bl.block.scales[is] >> sshift) & 0xF)); uint sign = uint(qs >> 9); sign |= bitCount(sign) << 7; - const uint8_t g = unpack8(iq2xs_grid[qs & 0x1FF][(idx & 4) >> 2])[idx & 3]; + uint g2 = iq2xs_grid[qs & 0x1FF][(idx & 4) >> 2]; + g2 >>= (idx & 2) * 8; + const vec2 g = vec2(unpack8(g2)); - float16_t ret = dscale * float16_t(g) * ((sign & (1 << (idx & 7))) != 0 ? -1.0hf : 1.0hf); - return ret; + vec2 ret = dscale * g * ((sign & (1 << (idx & 7))) != 0 ? -1.0hf : 1.0hf); + return float16_t(ret[idx & 1]); } #endif @@ -369,24 +372,23 @@ layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufIQ2 float16_t dequantFuncIQ2_S(const in decodeBufIQ2_S bl, const in uint blockCoords[2], const in uint coordInBlock[2]) { uint idx = coordInBlock[1]; - uint lsb = idx & 1; - idx /= 2; - const uint ib8 = (idx % 128) / 4; // 0..31 - const uint ib32 = ib8 / 4; // 0..7 + const uint ib32 = (idx & 0xE0) >> 5; // 0..7 + const uint ib8 = (idx & 0xF8) >> 3; // 0..31 + const uint qhshift = 2 * (ib8 % 4); - const uint scale = (bl.block.scales[ib32] >> (2 * (ib8 & 2))) & 0xf; + const uint scale = (bl.block.scales[ib32] >> ((idx & 0x10) >> 2)) & 0xf; const uint qs = bl.block.qs[ib8]; const uint qh = bl.block.qh[ib32]; - const uint qhshift = 2 * (ib8 % 4); - const uint sign = bl.block.qs[QUANT_K / 8 + ib8] >> (2 * (idx % 4)); + const uint sign = bl.block.qs[QUANT_K / 8 + ib8] >> (idx & 0x6); const float d = float(bl.block.d); const float db = d * 0.25 * (0.5 + scale); - const i8vec2 sign01 = i8vec2(1 - (2 & i8vec2(int8_t(sign << 1), int8_t(sign)))); - const uint16_t grid = unpack16(iq2s_grid[qs | ((qh << (8 - qhshift)) & 0x300)][(idx & 2) >> 1])[idx & 1]; - const vec2 v = db * vec2(sign01) * vec2(unpack8(grid)); - return float16_t(v[lsb]); + const ivec2 sign01 = 1 - (2 & ivec2(sign << 1, sign)); + uint g2 = iq2s_grid[qs | ((qh << (8 - qhshift)) & 0x300)][(idx & 4) >> 2]; + g2 >>= (idx & 2) * 8; + const vec2 v = db * vec2(sign01) * vec2(unpack8(g2)); + return float16_t(v[idx & 1]); } #endif @@ -401,28 +403,25 @@ layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufIQ3 float16_t dequantFuncIQ3_XXS(const in decodeBufIQ3_XXS bl, const in uint blockCoords[2], const in uint coordInBlock[2]) { + decodeBufIQ3_XXS_packed16 bl16 = decodeBufIQ3_XXS_packed16(bl); uint idx = coordInBlock[1]; - uint lsb = idx & 1; - idx /= 2; - const uint iqs = (idx % 128) / 2; // 0..63 - const uint is = QUANT_K / 4 + 4 * (iqs / 8); // 8 values + const uint iqs = (idx & 0xFC) >> 2; // 0..63 + const uint is = QUANT_K / 4 + ((idx & 0xE0) >> 3);// 8 values const float d = float(bl.block.d); const uint qs = bl.block.qs[iqs]; - const uint signs = pack32(u8vec4( - bl.block.qs[is+0], - bl.block.qs[is+1], - bl.block.qs[is+2], - bl.block.qs[is+3] + const uint signs = pack32(u16vec2( + bl16.block.qs[is/2+0], + bl16.block.qs[is/2+1] )); const float db = d * 0.5 * (0.5 + (signs >> 28)); const uint32_t sign7 = bitfieldExtract(signs, 7 * (int(iqs / 2) % 4), 7); - const uint sign = (sign7 | (bitCount(sign7) << 7)) >> (2 * (idx % 4)); - const i8vec2 sign01 = i8vec2(1 - (2 & i8vec2(int8_t(sign << 1), int8_t(sign)))); - const uint grid = iq3xxs_grid[qs] >> (16 * (idx & 1)); + const uint sign = (sign7 | (bitCount(sign7) << 7)) >> (idx & 0x6); + const ivec2 sign01 = ivec2(1 - (2 & ivec2(sign << 1, sign))); + const uint grid = iq3xxs_grid[qs] >> (16 * ((idx & 2) >> 1)); const vec2 v = db * vec2(sign01) * vec2(unpack8(grid).xy); - return float16_t(v[lsb]); + return float16_t(v[idx & 1]); } #endif @@ -434,23 +433,21 @@ layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufIQ3 float16_t dequantFuncIQ3_S(const in decodeBufIQ3_S bl, const in uint blockCoords[2], const in uint coordInBlock[2]) { uint idx = coordInBlock[1]; - uint lsb = idx & 1; - idx /= 2; - const uint iqs = (idx % 128) / 2; // 0..63 - const uint iqh = iqs / 8; + const uint iqs = (idx & 0xFC) >> 2; // 0..63 + const uint iqh = (idx & 0xE0) >> 5; const float d = float(bl.block.d); const uint qs = bl.block.qs[iqs]; const uint qh = bl.block.qh[iqh]; - const int8_t sign = int8_t(bl.block.signs[iqs / 2] >> (2 * (idx % 4))); + const int8_t sign = int8_t(bl.block.signs[iqs / 2] >> (idx & 0x6)); const uint scale = bl.block.scales[iqs / 16]; - const i8vec2 sign01 = i8vec2(1 - (2 & i8vec2(sign << 1, sign))); + const ivec2 sign01 = ivec2(1 - (2 & ivec2(sign << 1, sign))); const float db = d * (1 + 2 * ((scale >> (4 * (iqh & 1))) & 0xf)); - const uint32_t grid = iq3s_grid[qs | ((qh << (8 - (iqs % 8))) & 256)] >> (16 * (idx % 2)); + const uint32_t grid = iq3s_grid[qs | ((qh << (8 - (iqs % 8))) & 256)] >> ((idx & 2) << 3); const vec2 v = db * vec2(sign01) * vec2(unpack8(grid).xy); - return float16_t(v[lsb]); + return float16_t(v[idx & 1]); } #endif