From 2960eae847f8dbde23be6d170a61bcf44ebf32de Mon Sep 17 00:00:00 2001 From: Clint Herron Date: Sun, 10 Mar 2024 11:17:43 -0400 Subject: [PATCH 01/48] grammar : verify parsed state (#5950) --- common/grammar-parser.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/common/grammar-parser.cpp b/common/grammar-parser.cpp index bf89a96f3..2a1301569 100644 --- a/common/grammar-parser.cpp +++ b/common/grammar-parser.cpp @@ -278,6 +278,22 @@ namespace grammar_parser { while (*pos) { pos = parse_rule(state, pos); } + // Validate the state to ensure that all rules are defined + for (const auto & rule : state.rules) { + for (const auto & elem : rule) { + if (elem.type == LLAMA_GRETYPE_RULE_REF) { + // Ensure that the rule at that location exists + if (elem.value >= state.rules.size() || state.rules[elem.value].empty()) { + // Get the name of the rule that is missing + for (const auto & kv : state.symbol_ids) { + if (kv.second == elem.value) { + throw std::runtime_error("Undefined rule identifier '" + kv.first + "'"); + } + } + } + } + } + } return state; } catch (const std::exception & err) { fprintf(stderr, "%s: error parsing grammar: %s\n", __func__, err.what()); From bcebd7dbf62fd7b293d5ed089023e4e733269c71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?DAN=E2=84=A2?= Date: Sun, 10 Mar 2024 11:56:30 -0400 Subject: [PATCH 02/48] llama : add support for GritLM (#5959) * add gritlm example * gritlm results match * tabs to spaces * comment out debug printing * rebase to new embed * gritlm embeddings are back babeee * add to gitignore * allow to toggle embedding mode * Clean-up GritLM sample code. * Fix types. * Flush stdout and output ending newline if streaming. * mostly style fixes; correct KQ_mask comment * add causal_attn flag to llama_cparams * gritml : minor * llama : minor --------- Co-authored-by: Douglas Hanley Co-authored-by: Georgi Gerganov --- .gitignore | 1 + Makefile | 6 +- examples/CMakeLists.txt | 1 + examples/gritlm/CMakeLists.txt | 5 + examples/gritlm/gritlm.cpp | 229 +++++++++++++++++++++++++++++++++ llama.cpp | 25 +++- llama.h | 4 + 7 files changed, 267 insertions(+), 4 deletions(-) create mode 100644 examples/gritlm/CMakeLists.txt create mode 100644 examples/gritlm/gritlm.cpp diff --git a/.gitignore b/.gitignore index 62b6b8b1a..d28f4d1b8 100644 --- a/.gitignore +++ b/.gitignore @@ -45,6 +45,7 @@ models-mnt /embedding /gguf /gguf-llama-simple +/gritlm /imatrix /infill /libllama.so diff --git a/Makefile b/Makefile index d809b8b3b..6492da8a0 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ BUILD_TARGETS = \ main quantize quantize-stats perplexity imatrix embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \ simple batched batched-bench save-load-state server gguf llama-bench libllava.a llava-cli baby-llama beam-search \ - speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup passkey tests/test-c.o + speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup passkey gritlm tests/test-c.o # Binaries only useful for tests TEST_TARGETS = \ @@ -724,6 +724,10 @@ embedding: examples/embedding/embedding.cpp ggml.o llama.o $(C $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) +gritlm: examples/gritlm/gritlm.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 653abc73a..e762cf8b9 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -20,6 +20,7 @@ else() add_subdirectory(convert-llama2c-to-ggml) add_subdirectory(embedding) add_subdirectory(finetune) + add_subdirectory(gritlm) add_subdirectory(infill) add_subdirectory(llama-bench) add_subdirectory(llava) diff --git a/examples/gritlm/CMakeLists.txt b/examples/gritlm/CMakeLists.txt new file mode 100644 index 000000000..ac4a5ae79 --- /dev/null +++ b/examples/gritlm/CMakeLists.txt @@ -0,0 +1,5 @@ +set(TARGET gritlm) +add_executable(${TARGET} gritlm.cpp) +install(TARGETS ${TARGET} RUNTIME) +target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) +target_compile_features(${TARGET} PRIVATE cxx_std_11) diff --git a/examples/gritlm/gritlm.cpp b/examples/gritlm/gritlm.cpp new file mode 100644 index 000000000..3d4b085d6 --- /dev/null +++ b/examples/gritlm/gritlm.cpp @@ -0,0 +1,229 @@ +#include "common.h" +#include "llama.h" + +#include +#include + +// #define GRIT_DEBUG + +static float dot_product(const std::vector & v1, const std::vector & v2) { + float dot = 0.0f; + for (uint64_t i = 0; i < v1.size(); ++i) { + dot += v1[i] * v2[i]; + } + return dot; +} + +static float norm(const std::vector & v) { + return std::sqrt(dot_product(v, v)); +} + +static float cosine_similarity(const std::vector & v1, const std::vector & v2) { + return dot_product(v1, v2) / (norm(v1) * norm(v2)); +} + +static std::vector> encode(llama_context * ctx, const std::vector & sentences, const std::string & instruction) { + std::vector> result; + + const llama_model * mdl = llama_get_model(ctx); + + llama_batch batch = llama_batch_init(llama_n_batch(ctx), 0, 1); + + for (uint64_t i = 0; i < sentences.size(); i++) { + llama_batch_clear(batch); + + const std::string input_string = instruction + sentences[i]; + + std::vector inputs = llama_tokenize(mdl, input_string, true, false); + + const int32_t n_toks = inputs.size(); + + // GritLM seems to have EOS = "" + // https://github.com/ContextualAI/gritlm/blob/92025b16534712b31b3c4aaaf069350e222bd5f8/gritlm/gritlm.py#L18 + // inputs.push_back(llama_token_eos(mdl)); + + // we want to ignore instruction tokens for mean pooling + const int32_t n_inst = llama_tokenize(mdl, instruction, true, false).size(); + +#ifdef GRIT_DEBUG + // debug tokens - should be matching as referenced in the GritLM sample + std::for_each(inputs.begin(), inputs.end(), [&ctx](llama_token t) { + std::printf("[%u:%s]", t, llama_token_to_piece(ctx, t).c_str()); + }); + std::printf("\n"); +#endif + + // add input to batch (this increments n_tokens) + for (int32_t j = 0; j < n_toks; j++) { + llama_batch_add(batch, inputs[j], j, { 0 }, j >= n_inst); + } + + // clear previous kv_cache values (irrelevant for embeddings) + llama_kv_cache_clear(ctx); + llama_set_causal_attn(ctx, false); + + // run model + llama_decode(ctx, batch); + + // get embedding dimensions + uint64_t n_embd = llama_n_embd(mdl); + + // allocate embedding output + std::vector emb_unorm(n_embd, 0.0f); + + // sum up all token embeddings + for (int32_t k = n_inst; k < n_toks; k++) { + float * emb = llama_get_embeddings_ith(ctx, k); + for (uint64_t j = 0; j < n_embd; j++) { + emb_unorm[j] += emb[j]; + } + } + + // divide by number of tokens (mean pooling) + { + const uint64_t n_sent = n_toks - n_inst; + + for (uint64_t j = 0; j < n_embd; j++) { + emb_unorm[j] /= n_sent; + } + } + + std::vector emb_norm(emb_unorm.size()); + llama_embd_normalize(emb_unorm.data(), emb_norm.data(), n_embd); + result.push_back(emb_norm); + +#ifdef GRIT_DEBUG + // print out emb_norm + std::printf("embedding %ld: ", i); + for (uint64_t j = 0; j < n_embd; j++) { + std::printf("%.5f ", emb_norm[j]); + } + std::printf("\n\n"); +#endif + } + + llama_batch_free(batch); + + return result; +} + +static std::string generate(llama_context * ctx, const std::string & prompt, bool stream) { + std::string result; + + const llama_model * mdl = llama_get_model(ctx); + llama_token eos_token = llama_token_eos(mdl); + + llama_kv_cache_clear(ctx); + llama_set_causal_attn(ctx, true); + llama_batch bat = llama_batch_init(llama_n_batch(ctx), 0, 1); + + std::vector inputs = llama_tokenize(mdl, prompt, false, true); + int32_t i_current_token = 0; + + while (true) { + llama_batch_clear(bat); + auto n_inputs = (int32_t)inputs.size(); + for (int32_t i = 0; i < n_inputs; i++) { + llama_batch_add(bat, inputs[i], i_current_token++, { 0 }, i == n_inputs - 1); + } + inputs.clear(); + + llama_decode(ctx, bat); + auto logits = llama_get_logits_ith(ctx, bat.n_tokens - 1); + + auto candidates = std::vector(llama_n_vocab(mdl)); + auto n_candidates = (int32_t)candidates.size(); + for (int32_t token = 0; token < n_candidates; token++) { + candidates[token] = llama_token_data{ token, logits[token], 0.0f }; + } + auto candidates_p = llama_token_data_array{ candidates.data(), candidates.size(), false }; + + llama_token token = llama_sample_token_greedy(ctx, &candidates_p); + if (token == eos_token) { + break; + } + + std::string piece = llama_token_to_piece(ctx, token); + if (stream) { + std::printf("%s", piece.c_str()); + std::fflush(stdout); + } + + inputs.push_back(token); + + result += piece; + } + + if (stream) { + std::printf("\n"); + } + + llama_batch_free(bat); + + return result; +} + +static std::string gritlm_instruction(const std::string & instruction) { + return !instruction.empty() ? "<|user|>\n" + instruction + "\n<|embed|>\n" : "<|embed|>\n"; +} + +int main(int argc, char * argv[]) { + gpt_params params; + if (!gpt_params_parse(argc, argv, params)) { + return 1; + } + + llama_model_params mparams = llama_model_params_from_gpt_params(params); + llama_context_params cparams = llama_context_params_from_gpt_params(params); + + llama_backend_init(); + + llama_model * mdl = llama_load_model_from_file(params.model.c_str(), mparams); + + // create new context - set to embedding mode + cparams.embeddings = true; + llama_context * ctx = llama_new_context_with_model(mdl, cparams); + + // ### Embedding/Representation ### + // samples taken from: https://github.com/ContextualAI/gritlm#basic + { + const std::string instruction = "Given a scientific paper title, retrieve the paper's abstract"; + + const std::vector queries = { + "Bitcoin: A Peer-to-Peer Electronic Cash System", + "Generative Representational Instruction Tuning", + }; + + const std::vector documents = { + "A purely peer-to-peer version of electronic cash would allow online payments to be sent directly from one party to another without going through a financial institution. Digital signatures provide part of the solution, but the main benefits are lost if a trusted third party is still required to prevent double-spending. We propose a solution to the double-spending problem using a peer-to-peer network. The network timestamps transactions by hashing them into an ongoing chain of hash-based proof-of-work, forming a record that cannot be changed without redoing the proof-of-work. The longest chain not only serves as proof of the sequence of events witnessed, but proof that it came from the largest pool of CPU power. As long as a majority of CPU power is controlled by nodes that are not cooperating to attack the network, they'll generate the longest chain and outpace attackers. The network itself requires minimal structure. Messages are broadcast on a best effort basis, and nodes can leave and rejoin the network at will, accepting the longest proof-of-work chain as proof of what happened while they were gone.", + "All text-based language problems can be reduced to either generation or embedding. Current models only perform well at one or the other. We introduce generative representational instruction tuning (GRIT) whereby a large language model is trained to handle both generative and embedding tasks by distinguishing between them through instructions. Compared to other open models, our resulting GritLM 7B sets a new state of the art on the Massive Text Embedding Benchmark (MTEB) and outperforms all models up to its size on a range of generative tasks. By scaling up further, GritLM 8X7B outperforms all open generative language models that we tried while still being among the best embedding models. Notably, we find that GRIT matches training on only generative or embedding data, thus we can unify both at no performance loss. Among other benefits, the unification via GRIT speeds up Retrieval-Augmented Generation (RAG) by > 60% for long documents, by no longer requiring separate retrieval and generation models. Models, code, etc. are freely available at https://github.com/ContextualAI/gritlm.", + }; + + // No need to add instruction for retrieval documents + const std::vector> d_rep = encode(ctx, documents, gritlm_instruction("")); + const std::vector> q_rep = encode(ctx, queries, gritlm_instruction(instruction)); + + const float cosine_sim_q0_d0 = cosine_similarity(q_rep[0], d_rep[0]); + const float cosine_sim_q0_d1 = cosine_similarity(q_rep[0], d_rep[1]); + const float cosine_sim_q1_d0 = cosine_similarity(q_rep[1], d_rep[0]); + const float cosine_sim_q1_d1 = cosine_similarity(q_rep[1], d_rep[1]); + + std::printf("Cosine similarity between \"%.50s\" and \"%.50s\" is: %.3f\n", queries[0].c_str(), documents[0].c_str(), cosine_sim_q0_d0); + std::printf("Cosine similarity between \"%.50s\" and \"%.50s\" is: %.3f\n", queries[0].c_str(), documents[1].c_str(), cosine_sim_q0_d1); + std::printf("Cosine similarity between \"%.50s\" and \"%.50s\" is: %.3f\n", queries[1].c_str(), documents[0].c_str(), cosine_sim_q1_d0); + std::printf("Cosine similarity between \"%.50s\" and \"%.50s\" is: %.3f\n", queries[1].c_str(), documents[1].c_str(), cosine_sim_q1_d1); + } + + // ### Generation ### + // GritLM models are not finetuned with system prompts, as you can just include system-like instructions together with your user instruction + { + const std::string prompt = "<|user|>\nPlease write me a poem about my recent hike of Mt. Fuji at midnight in the style of Shakespeare.\n<|assistant|>\n"; + std::string response = generate(ctx, prompt, true); + } + + llama_free(ctx); + llama_free_model(mdl); + llama_backend_free(); + + return 0; +} diff --git a/llama.cpp b/llama.cpp index b19616e8f..249442166 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1744,6 +1744,7 @@ struct llama_cparams { float defrag_thold; bool embeddings; + bool causal_attn; bool offload_kqv; enum llama_pooling_type pooling_type; @@ -3939,6 +3940,7 @@ static void llm_load_print_meta(llama_model_loader & ml, llama_model & model) { LLAMA_LOG_INFO("%s: n_ff = %u\n", __func__, hparams.n_ff); LLAMA_LOG_INFO("%s: n_expert = %u\n", __func__, hparams.n_expert); LLAMA_LOG_INFO("%s: n_expert_used = %u\n", __func__, hparams.n_expert_used); + LLAMA_LOG_INFO("%s: causal attm = %d\n", __func__, hparams.causal_attn); LLAMA_LOG_INFO("%s: pooling type = %d\n", __func__, hparams.pooling_type); LLAMA_LOG_INFO("%s: rope type = %d\n", __func__, hparams.rope_type); LLAMA_LOG_INFO("%s: rope scaling = %s\n", __func__, rope_scaling_type); @@ -8532,7 +8534,13 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) { ggml_backend_tensor_set(lctx.inp_pos, batch.pos, 0, n_tokens*ggml_element_size(lctx.inp_pos)); } - if (hparams.causal_attn) { + GGML_ASSERT( + (hparams.causal_attn || !cparams.causal_attn) && + "non-causal attention with generative models is not supported" + ); + + // NOTE: hparams.causal_attn indicates the model is capable of generation and uses the kv cache. + if (cparams.causal_attn) { const int64_t n_kv = kv_self.n; const int64_t n_tokens = batch.n_tokens; @@ -8560,8 +8568,9 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) { } } } else { - // non-causal attention attends only the tokens within the batch (i.e. the KV cache is not used) + // when using kv cache, the mask needs to match the kv cache size const int64_t n_tokens = batch.n_tokens; + const int64_t n_stride = hparams.causal_attn ? kv_self.n : n_tokens; assert(ggml_backend_buffer_is_host(lctx.inp_KQ_mask->buffer)); @@ -8580,7 +8589,11 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) { } } - data[h*(n_tokens*n_tokens) + j*n_tokens + i] = f; + data[h*(n_tokens*n_tokens) + j*n_stride + i] = f; + } + + for (int i = n_tokens; i < n_stride; ++i) { + data[h*(n_tokens*n_tokens) + j*n_stride + i] = -INFINITY; } } } @@ -12733,6 +12746,8 @@ struct llama_context * llama_new_context_with_model( cparams.yarn_ext_factor = rope_scaling_type == LLAMA_ROPE_SCALING_TYPE_YARN ? 1.0f : 0.0f; } + cparams.causal_attn = hparams.causal_attn; + if (cparams.pooling_type == LLAMA_POOLING_TYPE_UNSPECIFIED) { if (hparams.pooling_type == LLAMA_POOLING_TYPE_UNSPECIFIED) { cparams.pooling_type = LLAMA_POOLING_TYPE_NONE; @@ -13767,6 +13782,10 @@ void llama_set_abort_callback(struct llama_context * ctx, bool (*abort_callback) ctx->abort_callback_data = abort_callback_data; } +void llama_set_causal_attn(struct llama_context * ctx, bool causal_attn) { + ctx->cparams.causal_attn = causal_attn; +} + struct llama_batch llama_batch_get_one( llama_token * tokens, int32_t n_tokens, diff --git a/llama.h b/llama.h index 7a107c7f3..c8e05aadd 100644 --- a/llama.h +++ b/llama.h @@ -643,6 +643,10 @@ extern "C" { // n_threads_batch is the number of threads used for prompt and batch processing (multiple tokens) LLAMA_API void llama_set_n_threads(struct llama_context * ctx, uint32_t n_threads, uint32_t n_threads_batch); + // Set whether to use causal attention or not + // If set to true, the model will only attend to the past tokens + LLAMA_API void llama_set_causal_attn(struct llama_context * ctx, bool causal_attn); + // Set abort callback LLAMA_API void llama_set_abort_callback(struct llama_context * ctx, ggml_abort_callback abort_callback, void * abort_callback_data); From fa8a809a9119411bc9c3f00026550d0343f4d9b7 Mon Sep 17 00:00:00 2001 From: Pierrick Hymbert Date: Sun, 10 Mar 2024 18:17:47 +0100 Subject: [PATCH 03/48] server: ci: windows build and tests (#5968) * server: ci: windows build and tests * server: ci: remove tmp push branch * server: ci: EOF EOL * Use builti Co-authored-by: Jared Van Bortel * server: tests: server graceful shutdown, then kill, then hard kill * server: tests: remove python2 unicode string * server: tests: remove wrong comment on server starting, close_fds is always true * server: tests: server kill, if pid exists * server: tests: remove dependency to killall * server: tests: ci windows: pid exists better handling --------- Co-authored-by: Jared Van Bortel --- .github/workflows/server.yml | 46 ++++- examples/server/tests/features/environment.py | 66 +++++-- examples/server/tests/features/server.feature | 2 +- examples/server/tests/features/steps/steps.py | 176 ++++++++++-------- 4 files changed, 191 insertions(+), 99 deletions(-) diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index f9aeefaa8..e385e03f3 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -47,6 +47,8 @@ jobs: - name: Clone id: checkout uses: actions/checkout@v3 + with: + fetch-depth: 0 - name: Dependencies id: depends @@ -58,7 +60,6 @@ jobs: cmake \ python3-pip \ wget \ - psmisc \ language-pack-en - name: Build @@ -90,3 +91,46 @@ jobs: run: | cd examples/server/tests PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow + + + server-windows: + runs-on: windows-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Build + id: cmake_build + run: | + mkdir build + cd build + cmake .. -DLLAMA_BUILD_SERVER=ON -DCMAKE_BUILD_TYPE=Release ; + cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server + + - name: Python setup + id: setup_python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Tests dependencies + id: test_dependencies + run: | + pip install -r examples/server/tests/requirements.txt + + - name: Tests + id: server_integration_tests + run: | + cd examples/server/tests + behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp + + - name: Slow tests + id: server_integration_tests_slow + if: ${{ github.event.schedule != '' || github.event.inputs.slow_tests == 'true' }} + run: | + cd examples/server/tests + behave.exe --stop --no-skipped --no-capture --tags slow diff --git a/examples/server/tests/features/environment.py b/examples/server/tests/features/environment.py index 9fd330db6..8ad987e1b 100644 --- a/examples/server/tests/features/environment.py +++ b/examples/server/tests/features/environment.py @@ -1,9 +1,10 @@ +import errno import os import socket import subprocess import time from contextlib import closing -from signal import SIGKILL +import signal def before_scenario(context, scenario): @@ -29,44 +30,71 @@ def after_scenario(context, scenario): for line in f: print(line) if not is_server_listening(context.server_fqdn, context.server_port): - print("\x1b[33;101mERROR: Server stopped listening\x1b[0m") + print("\x1b[33;101mERROR: Server stopped listening\x1b[0m\n") if not pid_exists(context.server_process.pid): assert False, f"Server not running pid={context.server_process.pid} ..." - print(f"stopping server pid={context.server_process.pid} ...") - context.server_process.kill() + server_graceful_shutdown(context) + # Wait few for socket to free up time.sleep(0.05) attempts = 0 - while is_server_listening(context.server_fqdn, context.server_port): - print(f"stopping server pid={context.server_process.pid} ...") - os.kill(context.server_process.pid, SIGKILL) + while pid_exists(context.server_process.pid) or is_server_listening(context.server_fqdn, context.server_port): + server_kill(context) time.sleep(0.1) attempts += 1 if attempts > 5: - print(f"Server dangling exits, killing all {context.server_path} ...") - process = subprocess.run(['killall', '-9', context.server_path], - stderr=subprocess.PIPE, - universal_newlines=True) - print(process) + server_kill_hard(context) + + +def server_graceful_shutdown(context): + print(f"shutting down server pid={context.server_process.pid} ...\n") + if os.name == 'nt': + os.kill(context.server_process.pid, signal.CTRL_C_EVENT) + else: + os.kill(context.server_process.pid, signal.SIGINT) + + +def server_kill(context): + print(f"killing server pid={context.server_process.pid} ...\n") + context.server_process.kill() + + +def server_kill_hard(context): + pid = context.server_process.pid + path = context.server_path + + print(f"Server dangling exits, hard killing force {pid}={path}...\n") + if os.name == 'nt': + process = subprocess.check_output(['taskkill', '/F', '/pid', str(pid)]).decode() + print(process) + else: + os.kill(-pid, signal.SIGKILL) def is_server_listening(server_fqdn, server_port): with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: result = sock.connect_ex((server_fqdn, server_port)) - return result == 0 + _is_server_listening = result == 0 + if _is_server_listening: + print(f"server is listening on {server_fqdn}:{server_port}...\n") + return _is_server_listening def pid_exists(pid): """Check whether pid exists in the current process table.""" - import errno if pid < 0: return False - try: - os.kill(pid, 0) - except OSError as e: - return e.errno == errno.EPERM + if os.name == 'nt': + output = subprocess.check_output(['TASKLIST', '/FI', f'pid eq {pid}']).decode() + print(output) + return "No tasks are running" not in output else: - return True + try: + os.kill(pid, 0) + except OSError as e: + return e.errno == errno.EPERM + else: + return True diff --git a/examples/server/tests/features/server.feature b/examples/server/tests/features/server.feature index aa132fa34..5014f326d 100644 --- a/examples/server/tests/features/server.feature +++ b/examples/server/tests/features/server.feature @@ -47,7 +47,7 @@ Feature: llama.cpp server Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. """ And a completion request with no api error - Then 64 tokens are predicted matching fun|Annaks|popcorns + Then 64 tokens are predicted matching fun|Annaks|popcorns|pictry And the completion is truncated And 109 prompt tokens are processed diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index 142048509..4e81a255a 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -18,7 +18,7 @@ from huggingface_hub import hf_hub_download from prometheus_client import parser -@step(u"a server listening on {server_fqdn}:{server_port}") +@step("a server listening on {server_fqdn}:{server_port}") def step_server_config(context, server_fqdn, server_port): context.server_fqdn = server_fqdn context.server_port = int(server_port) @@ -57,24 +57,24 @@ def step_server_config(context, server_fqdn, server_port): context.prompts = [] -@step(u'a model file {hf_file} from HF repo {hf_repo}') +@step('a model file {hf_file} from HF repo {hf_repo}') def step_download_hf_model(context, hf_file, hf_repo): context.model_file = hf_hub_download(repo_id=hf_repo, filename=hf_file) if context.debug: print(f"model file: {context.model_file}\n") -@step(u'a model alias {model_alias}') +@step('a model alias {model_alias}') def step_model_alias(context, model_alias): context.model_alias = model_alias -@step(u'{seed:d} as server seed') +@step('{seed:d} as server seed') def step_seed(context, seed): context.server_seed = seed -@step(u'{ngl:d} GPU offloaded layers') +@step('{ngl:d} GPU offloaded layers') def step_n_gpu_layer(context, ngl): if 'N_GPU_LAYERS' in os.environ: new_ngl = int(os.environ['N_GPU_LAYERS']) @@ -84,37 +84,37 @@ def step_n_gpu_layer(context, ngl): context.n_gpu_layer = ngl -@step(u'{n_ctx:d} KV cache size') +@step('{n_ctx:d} KV cache size') def step_n_ctx(context, n_ctx): context.n_ctx = n_ctx -@step(u'{n_slots:d} slots') +@step('{n_slots:d} slots') def step_n_slots(context, n_slots): context.n_slots = n_slots -@step(u'{n_predict:d} server max tokens to predict') +@step('{n_predict:d} server max tokens to predict') def step_server_n_predict(context, n_predict): context.n_server_predict = n_predict -@step(u'continuous batching') +@step('continuous batching') def step_server_continuous_batching(context): context.server_continuous_batching = True -@step(u'embeddings extraction') +@step('embeddings extraction') def step_server_embeddings(context): context.server_embeddings = True -@step(u'prometheus compatible metrics exposed') +@step('prometheus compatible metrics exposed') def step_server_metrics(context): context.server_metrics = True -@step(u"the server is starting") +@step("the server is starting") def step_start_server(context): start_server_background(context) attempts = 0 @@ -131,7 +131,7 @@ def step_start_server(context): time.sleep(0.1) -@step(u"the server is {expecting_status}") +@step("the server is {expecting_status}") @async_run_until_complete async def step_wait_for_the_server_to_be_started(context, expecting_status): match expecting_status: @@ -160,7 +160,7 @@ async def step_wait_for_the_server_to_be_started(context, expecting_status): assert False, "unknown status" -@step(u'all slots are {expected_slot_status_string}') +@step('all slots are {expected_slot_status_string}') @async_run_until_complete async def step_all_slots_status(context, expected_slot_status_string): match expected_slot_status_string: @@ -176,7 +176,7 @@ async def step_all_slots_status(context, expected_slot_status_string): await request_slots_status(context, expected_slots) -@step(u'a completion request with {api_error} api error') +@step('a completion request with {api_error} api error') @async_run_until_complete async def step_request_completion(context, api_error): expect_api_error = api_error == 'raised' @@ -194,133 +194,133 @@ async def step_request_completion(context, api_error): assert completion == 401, f"completion must be an 401 status code: {completion}" -@step(u'{predicted_n:d} tokens are predicted matching {re_content}') +@step('{predicted_n:d} tokens are predicted matching {re_content}') def step_n_tokens_predicted_with_content(context, predicted_n, re_content): context.completion = context.tasks_result.pop() assert_n_tokens_predicted(context.completion, predicted_n, re_content) -@step(u'{predicted_n:d} tokens are predicted') +@step('{predicted_n:d} tokens are predicted') def step_n_tokens_predicted(context, predicted_n): context.completion = context.tasks_result.pop() assert_n_tokens_predicted(context.completion, predicted_n) -@step(u'the completion is truncated') +@step('the completion is truncated') def step_assert_completion_truncated(context): step_assert_completion_truncated(context, '') -@step(u'the completion is {truncated} truncated') +@step('the completion is {truncated} truncated') def step_assert_completion_truncated(context, truncated): truncated = truncated != "not" assert context.completion['truncated'] == truncated, f'{context.completion}' -@step(u'{n_prompt:d} prompt tokens are processed') +@step('{n_prompt:d} prompt tokens are processed') def step_impl(context, n_prompt): assert n_prompt < 0 or n_prompt == context.completion['timings']['prompt_n'], f"n_prompt={context.completion['timings']['prompt_n']}" -@step(u'a user prompt {user_prompt}') +@step('a user prompt {user_prompt}') def step_user_prompt(context, user_prompt): context.prompts.append(user_prompt) context.n_prompts = len(context.prompts) -@step(u'a system prompt {system_prompt}') +@step('a system prompt {system_prompt}') def step_system_prompt(context, system_prompt): context.system_prompt = system_prompt -@step(u'a model {model}') +@step('a model {model}') def step_model(context, model): context.model = model -@step(u'{max_tokens:d} max tokens to predict') +@step('{max_tokens:d} max tokens to predict') def step_max_tokens(context, max_tokens): context.n_predict = max_tokens -@step(u'streaming is {enable_streaming}') +@step('streaming is {enable_streaming}') def step_streaming(context, enable_streaming): context.enable_streaming = enable_streaming == 'enabled' -@step(u'a user api key {user_api_key}') +@step('a user api key {user_api_key}') def step_user_api_key(context, user_api_key): context.user_api_key = user_api_key -@step(u'no user api key') +@step('no user api key') def step_no_user_api_key(context): context.user_api_key = None -@step(u'a user api key ') +@step('a user api key ') def step_no_user_api_key_space(context): context.user_api_key = None -@step(u'a server api key {server_api_key}') +@step('a server api key {server_api_key}') def step_server_api_key(context, server_api_key): context.server_api_key = server_api_key -@step(u'{n_junk:d} as number of junk') +@step('{n_junk:d} as number of junk') def step_n_junk(context, n_junk): context.n_junk = n_junk -@step(u'{n_batch:d} as batch size') +@step('{n_batch:d} as batch size') def step_n_batch(context, n_batch): context.n_batch = n_batch -@step(u'{seed:d} as seed') +@step('{seed:d} as seed') def step_seed(context, seed): context.seed = seed -@step(u'a prefix prompt') +@step('a prefix prompt') def step_prompt_prefix(context): - context.prompt_prefix = context.text + context.prompt_prefix = context_text(context) -@step(u'a junk suffix prompt') +@step('a junk suffix prompt') def step_prompt_junk_suffix(context): - context.prompt_junk_suffix = context.text + context.prompt_junk_suffix = context_text(context) -@step(u'a suffix prompt') +@step('a suffix prompt') def step_prompt_suffix(context): - context.prompt_suffix = context.text + context.prompt_suffix = context_text(context) -@step(u'{n_ga:d} group attention factor' - u' to extend context size through self-extend') +@step('{n_ga:d} group attention factor' + ' to extend context size through self-extend') def step_impl(context, n_ga): context.n_ga = n_ga -@step(u'{n_ga_w:d} group attention width to extend context size through self-extend') +@step('{n_ga_w:d} group attention width to extend context size through self-extend') def step_impl(context, n_ga_w): context.n_ga_w = n_ga_w -@step(u'a passkey prompt template') +@step('a passkey prompt template') def step_prompt_passkey(context): - context.prompt_passkey = context.text + context.prompt_passkey = context_text(context) -@step(u'{n_prompts:d} fixed prompts') +@step('{n_prompts:d} fixed prompts') def step_fixed_prompts(context, n_prompts): context.prompts.extend([str(0)*(context.n_batch if context.n_batch is not None else 512) for i in range(n_prompts)]) context.n_prompts = n_prompts -@step(u'a "{passkey}" passkey challenge prompt with the passkey inserted every {i_pos:d} junk') +@step('a "{passkey}" passkey challenge prompt with the passkey inserted every {i_pos:d} junk') def step_prompt_passkey(context, passkey, i_pos): prompt = "" for i in range(context.n_junk): @@ -334,7 +334,7 @@ def step_prompt_passkey(context, passkey, i_pos): context.n_prompts = len(context.prompts) -@step(u'an OAI compatible chat completions request with {api_error} api error') +@step('an OAI compatible chat completions request with {api_error} api error') @async_run_until_complete async def step_oai_chat_completions(context, api_error): if context.debug: @@ -369,19 +369,19 @@ async def step_oai_chat_completions(context, api_error): print(f"Completion response: {completion}") -@step(u'a prompt') +@step('a prompt') def step_a_prompt(context): - context.prompts.append(context.text) + context.prompts.append(context_text(context)) context.n_prompts = len(context.prompts) -@step(u'a prompt {prompt}') +@step('a prompt {prompt}') def step_a_prompt_prompt(context, prompt): context.prompts.append(prompt) context.n_prompts = len(context.prompts) -@step(u'concurrent completion requests') +@step('concurrent completion requests') @async_run_until_complete() async def step_concurrent_completion_requests(context): await concurrent_requests(context, @@ -397,7 +397,7 @@ async def step_concurrent_completion_requests(context): 'user_api_key') else None) -@step(u'concurrent OAI completions requests') +@step('concurrent OAI completions requests') @async_run_until_complete async def step_oai_chat_completions(context): await concurrent_requests(context, oai_chat_completions, @@ -417,7 +417,7 @@ async def step_oai_chat_completions(context): if hasattr(context, 'user_api_key') else None) -@step(u'concurrent OAI completions requests no v1') +@step('concurrent OAI completions requests no v1') @async_run_until_complete async def step_oai_chat_completions(context): await concurrent_requests(context, oai_chat_completions, @@ -440,13 +440,13 @@ async def step_oai_chat_completions(context): if hasattr(context, 'user_api_key') else None) -@step(u'all prompts are predicted') +@step('all prompts are predicted') @async_run_until_complete async def step_all_prompts_are_predicted(context): await all_prompts_are_predicted(context) -@step(u'all prompts are predicted with {n_expected_predicted:d} tokens') +@step('all prompts are predicted with {n_expected_predicted:d} tokens') @async_run_until_complete async def step_all_prompts_are_predicted_with_n_tokens(context, n_expected_predicted): await all_prompts_are_predicted(context, n_expected_predicted) @@ -460,14 +460,14 @@ async def all_prompts_are_predicted(context, expected_predicted_n=None): assert len(context.concurrent_tasks) == 0, f"{len(context.concurrent_tasks)} pending requests" -@step(u'embeddings are computed for') +@step('embeddings are computed for') @async_run_until_complete async def step_compute_embedding(context): context.n_prompts = 1 - context.embeddings = await request_embedding(context.text, base_url=context.base_url) + context.embeddings = await request_embedding(context_text(context), base_url=context.base_url) -@step(u'all embeddings are the same') +@step('all embeddings are the same') @async_run_until_complete async def step_all_embeddings_are_the_same(context): n_embedding_requests = await gather_tasks_results(context) @@ -491,7 +491,8 @@ async def step_all_embeddings_are_the_same(context): print(f"{msg}\n") assert np.isclose(similarity, 1.0, rtol=1e-05, atol=1e-08, equal_nan=False), msg -@step(u'embeddings are generated') + +@step('embeddings are generated') def step_assert_embeddings(context): assert context.n_prompts == len(context.embeddings), (f"unexpected response:\n" f"context.n_prompts={context.n_prompts}\n" @@ -500,17 +501,17 @@ def step_assert_embeddings(context): assert_embeddings(embedding) -@step(u'an OAI compatible embeddings computation request for') +@step('an OAI compatible embeddings computation request for') @async_run_until_complete async def step_oai_compute_embeddings(context): context.n_prompts = 1 - context.embeddings = await request_oai_embeddings(context.text, + context.embeddings = await request_oai_embeddings(context_text(context), base_url=context.base_url, user_api_key=context.user_api_key, model=context.model) -@step(u'an OAI compatible embeddings computation request for multiple inputs') +@step('an OAI compatible embeddings computation request for multiple inputs') @async_run_until_complete async def step_oai_compute_embeddings_multiple_inputs(context): context.embeddings = await request_oai_embeddings(context.prompts, @@ -520,7 +521,7 @@ async def step_oai_compute_embeddings_multiple_inputs(context): context.prompts.clear() -@step(u'concurrent embedding requests') +@step('concurrent embedding requests') @async_run_until_complete() async def step_concurrent_embedding_requests(context): await concurrent_requests(context, @@ -529,7 +530,7 @@ async def step_concurrent_embedding_requests(context): base_url=context.base_url) -@step(u'concurrent OAI embedding requests') +@step('concurrent OAI embedding requests') @async_run_until_complete() async def step_concurrent_oai_embedding_requests(context): await concurrent_requests(context, @@ -540,7 +541,7 @@ async def step_concurrent_oai_embedding_requests(context): model=context.model) -@step(u'all embeddings are generated') +@step('all embeddings are generated') @async_run_until_complete() async def all_embeddings_are_generated(context): n_embedding_requests = await gather_tasks_results(context) @@ -549,10 +550,10 @@ async def all_embeddings_are_generated(context): assert_embeddings(context.tasks_result.pop().pop()) -@step(u'tokenizing') +@step('tokenizing') @async_run_until_complete async def step_tokenize(context): - context.tokenized_text = context.text + context.tokenized_text = context_text(context) async with aiohttp.ClientSession() as session: async with session.post(f'{context.base_url}/tokenize', json={ @@ -563,7 +564,7 @@ async def step_tokenize(context): context.tokens = tokenize_json['tokens'] -@step(u'tokens can be detokenize') +@step('tokens can be detokenize') @async_run_until_complete async def step_detokenize(context): assert len(context.tokens) > 0 @@ -578,7 +579,7 @@ async def step_detokenize(context): assert context.tokenized_text == detokenize_json['content'].strip() -@step(u'an OPTIONS request is sent from {origin}') +@step('an OPTIONS request is sent from {origin}') @async_run_until_complete async def step_options_request(context, origin): async with aiohttp.ClientSession() as session: @@ -589,12 +590,12 @@ async def step_options_request(context, origin): context.options_response = response -@step(u'CORS header {cors_header} is set to {cors_header_value}') +@step('CORS header {cors_header} is set to {cors_header_value}') def step_check_options_header_value(context, cors_header, cors_header_value): assert context.options_response.headers[cors_header] == cors_header_value -@step(u'prometheus metrics are exposed') +@step('prometheus metrics are exposed') @async_run_until_complete async def step_prometheus_metrics_exported(context): async with aiohttp.ClientSession() as session: @@ -616,14 +617,14 @@ async def step_prometheus_metrics_exported(context): assert metric_exported, "No metrics exported" -@step(u'metric {metric_name} is {metric_value:d}') +@step('metric {metric_name} is {metric_value:d}') def step_assert_metric_value(context, metric_name, metric_value): if metric_name not in context.metrics: assert False, f"no metric {metric_name} in {context.metrics.keys()}" assert context.metrics[metric_name].samples[0].value == metric_value, f"metric: {context.metrics[metric_name]}" -@step(u'available models') +@step('available models') def step_available_models(context): # openai client always expects an api_key openai.api_key = context.user_api_key if context.user_api_key is not None else 'nope' @@ -631,14 +632,14 @@ def step_available_models(context): context.models = openai.Model.list().data -@step(u'{n_model:d} models are supported') +@step('{n_model:d} models are supported') def step_supported_models(context, n_model): if context.debug: print("server models available:", context.models) assert len(context.models) == n_model -@step(u'model {i_model:d} is {param} {preposition} {param_value}') +@step('model {i_model:d} is {param} {preposition} {param_value}') def step_supported_models(context, i_model, param, preposition, param_value): assert i_model < len(context.models) model = context.models[i_model] @@ -1007,12 +1008,22 @@ async def completions_seed(context): else context.server_seed if hasattr(context, 'server_seed') else None +def context_text(context): + return context.text.replace('\r', '') + + def start_server_background(context): - context.server_path = '../../../build/bin/server' + if os.name == 'nt': + context.server_path = '../../../build/bin/Release/server.exe' + else: + context.server_path = '../../../build/bin/server' if 'LLAMA_SERVER_BIN_PATH' in os.environ: context.server_path = os.environ['LLAMA_SERVER_BIN_PATH'] + server_listen_addr = context.server_fqdn + if os.name == 'nt': + server_listen_addr = '0.0.0.0' server_args = [ - '--host', context.server_fqdn, + '--host', server_listen_addr, '--port', context.server_port, '--model', context.model_file ] @@ -1045,7 +1056,16 @@ def start_server_background(context): if 'SERVER_LOG_FORMAT_JSON' not in os.environ: server_args.extend(['--log-format', "text"]) print(f"starting server with: {context.server_path} {server_args}\n") + flags = 0 + if 'nt' == os.name: + flags |= subprocess.DETACHED_PROCESS + flags |= subprocess.CREATE_NEW_PROCESS_GROUP + flags |= subprocess.CREATE_NO_WINDOW + + pkwargs = { + 'creationflags': flags, + } context.server_process = subprocess.Popen( [str(arg) for arg in [context.server_path, *server_args]], - close_fds=True) - print(f"server pid={context.server_process.pid}") + **pkwargs) + print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}") From bf47a5eefc669fdba71af096942af999bc1167d4 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 10 Mar 2024 20:09:24 +0200 Subject: [PATCH 04/48] ggml : remove __constant__ specifier for CUDA tables (#5940) --- ggml-common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml-common.h b/ggml-common.h index 4b6d248b6..84981212c 100644 --- a/ggml-common.h +++ b/ggml-common.h @@ -17,7 +17,7 @@ #elif defined(GGML_COMMON_IMPL_CUDA) #include -#define GGML_TABLE_BEGIN(type, name, size) static const __device__ __constant__ type name[size] = { +#define GGML_TABLE_BEGIN(type, name, size) static const __device__ type name[size] = { #define GGML_TABLE_END() }; #define GGML_COMMON_IMPL From df4dc3e7cb43162862f339525638ba4febcb6158 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 8 Mar 2024 23:45:07 +0200 Subject: [PATCH 05/48] ggml : try fix 32-bit arm compat (whisper/1938) * ggml : try fix 32-bit arm compat * ggml : fix cont --- ggml-quants.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/ggml-quants.c b/ggml-quants.c index 807c5e391..42d8a5d80 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -9009,8 +9009,8 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * restrict s, size_t bs, const void * static const uint8_t k_mask2[16] = {0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,}; - const uint8x16x2_t mask1 = vld1q_u8_x2(k_mask1); - const uint8x16_t mask2 = vld1q_u8(k_mask2); + const ggml_uint8x16x2_t mask1 = ggml_vld1q_u8_x2(k_mask1); + const uint8x16_t mask2 = vld1q_u8(k_mask2); const uint8x16_t m1 = vdupq_n_u8(1); const int32x4_t vzero = vdupq_n_s32(0); @@ -9354,11 +9354,12 @@ void ggml_vec_dot_iq3_s_q8_K (int n, float * restrict s, size_t bs, const void * static const int16_t k_shift[8] = {8, 7, 6, 5, 4, 3, 2, 1}; - const uint8x16x2_t mask1 = vld1q_u8_x2(k_mask1); - const uint8x16_t mask2 = vld1q_u8(k_mask2); - const int16x8_t hshift = vld1q_s16(k_shift); - const uint16x8_t m256 = vdupq_n_u16(256); - const uint8x16_t m1 = vdupq_n_u8(1); + const ggml_uint8x16x2_t mask1 = ggml_vld1q_u8_x2(k_mask1); + const uint8x16_t mask2 = vld1q_u8(k_mask2); + + const int16x8_t hshift = vld1q_s16(k_shift); + const uint16x8_t m256 = vdupq_n_u16(256); + const uint8x16_t m1 = vdupq_n_u8(1); uint8x16x2_t vs; ggml_int8x16x4_t q3s; From b838b53ad6de2e53f23ddf8f3ad5e6891cc3dd05 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 10 Mar 2024 20:10:46 +0200 Subject: [PATCH 06/48] sync : ggml --- scripts/sync-ggml.last | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/sync-ggml.last b/scripts/sync-ggml.last index afe68077d..7c30162e2 100644 --- a/scripts/sync-ggml.last +++ b/scripts/sync-ggml.last @@ -1 +1 @@ -8695910a39102609073d0e099aa7c97d6bcb3bf9 +43a6d4af1971ee2912ff7bc2404011ff327b6a60 From d9f65c97c3dc3aa6fa27470b8c6e69b437ec1a27 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 10 Mar 2024 20:58:26 +0200 Subject: [PATCH 07/48] readme : update hot topics --- README.md | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index e3ec0817a..98fdc6808 100644 --- a/README.md +++ b/README.md @@ -8,11 +8,6 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) in pure C/C++ -> [!IMPORTANT] -> **Quantization blind testing: https://github.com/ggerganov/llama.cpp/discussions/5962** -> -> Vote for which quantization type provides better responses, all other parameters being the same. - ### Recent API changes - [2024 Mar 8] `llama_kv_cache_seq_rm()` returns a `bool` instead of `void`, and new `llama_n_max_seq()` returns the upper limit of acceptable `seq_id` in batches (relevant when dealing with multiple sequences) https://github.com/ggerganov/llama.cpp/pull/5328 @@ -21,6 +16,8 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) ### Hot topics +- Looking for contributions to add Deepseek support: https://github.com/ggerganov/llama.cpp/issues/5981 +- Quantization blind testing: https://github.com/ggerganov/llama.cpp/discussions/5962 - Initial Mamba support has been added: https://github.com/ggerganov/llama.cpp/pull/5328 ---- From 7ab7b733bb48250b2df26c12b00256ef42c76932 Mon Sep 17 00:00:00 2001 From: Dean Date: Mon, 11 Mar 2024 04:03:17 +0800 Subject: [PATCH 08/48] android : fix utf8 decoding error (#5935) * examples: fix utf8 decoding error some models have a tokenizer that decodes an id into an incomplete utf8 sequence, need to validate and wait for next token one example would be: https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat-GGUF/resolve/main/qwen1_5-1_8b-chat-q4_0.gguf and and an example of the token is 18137 * android : minor --------- Co-authored-by: zhangfuwen Co-authored-by: Georgi Gerganov --- .../app/src/main/cpp/llama-android.cpp | 53 ++++++++++++++++++- .../src/main/java/com/example/llama/Llm.kt | 4 +- 2 files changed, 53 insertions(+), 4 deletions(-) diff --git a/examples/llama.android/app/src/main/cpp/llama-android.cpp b/examples/llama.android/app/src/main/cpp/llama-android.cpp index 2beb1e0d5..ce8ab3b70 100644 --- a/examples/llama.android/app/src/main/cpp/llama-android.cpp +++ b/examples/llama.android/app/src/main/cpp/llama-android.cpp @@ -33,6 +33,45 @@ jclass la_int_var; jmethodID la_int_var_value; jmethodID la_int_var_inc; +std::string cached_token_chars; + +bool is_valid_utf8(const char * string) { + if (!string) { + return true; + } + + const unsigned char * bytes = (const unsigned char *)string; + int num; + + while (*bytes != 0x00) { + if ((*bytes & 0x80) == 0x00) { + // U+0000 to U+007F + num = 1; + } else if ((*bytes & 0xE0) == 0xC0) { + // U+0080 to U+07FF + num = 2; + } else if ((*bytes & 0xF0) == 0xE0) { + // U+0800 to U+FFFF + num = 3; + } else if ((*bytes & 0xF8) == 0xF0) { + // U+10000 to U+10FFFF + num = 4; + } else { + return false; + } + + bytes += 1; + for (int i = 1; i < num; ++i) { + if ((*bytes & 0xC0) != 0x80) { + return false; + } + bytes += 1; + } + } + + return true; +} + static void log_callback(ggml_log_level level, const char * fmt, void * data) { if (level == GGML_LOG_LEVEL_ERROR) __android_log_print(ANDROID_LOG_ERROR, TAG, fmt, data); else if (level == GGML_LOG_LEVEL_INFO) __android_log_print(ANDROID_LOG_INFO, TAG, fmt, data); @@ -295,6 +334,8 @@ Java_com_example_llama_Llm_completion_1init( jint n_len ) { + cached_token_chars.clear(); + const auto text = env->GetStringUTFChars(jtext, 0); const auto context = reinterpret_cast(context_pointer); const auto batch = reinterpret_cast(batch_pointer); @@ -372,8 +413,16 @@ Java_com_example_llama_Llm_completion_1loop( } auto new_token_chars = llama_token_to_piece(context, new_token_id); - LOGi("new_token_chars: `%s`", new_token_chars.c_str()); - auto new_token = env->NewStringUTF(new_token_chars.c_str()); + cached_token_chars += new_token_chars; + + jstring new_token = nullptr; + if (is_valid_utf8(cached_token_chars.c_str())) { + new_token = env->NewStringUTF(cached_token_chars.c_str()); + LOGi("cached: %s, new_token_chars: `%s`, id: %d", cached_token_chars.c_str(), new_token_chars.c_str(), new_token_id); + cached_token_chars.clear(); + } else { + new_token = env->NewStringUTF(""); + } llama_batch_clear(*batch); llama_batch_add(*batch, new_token_id, n_cur, { 0 }, true); diff --git a/examples/llama.android/app/src/main/java/com/example/llama/Llm.kt b/examples/llama.android/app/src/main/java/com/example/llama/Llm.kt index 5f3270372..d86afee37 100644 --- a/examples/llama.android/app/src/main/java/com/example/llama/Llm.kt +++ b/examples/llama.android/app/src/main/java/com/example/llama/Llm.kt @@ -71,7 +71,7 @@ class Llm { batch: Long, nLen: Int, ncur: IntVar - ): String + ): String? private external fun kv_cache_clear(context: Long) @@ -115,7 +115,7 @@ class Llm { val ncur = IntVar(completion_init(state.context, state.batch, message, nlen)) while (ncur.value <= nlen) { val str = completion_loop(state.context, state.batch, nlen, ncur) - if (str.isEmpty()) { + if (str == null) { break } emit(str) From bb6d00bbf98476215596b9df3870b5504ef5a29a Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 10 Mar 2024 23:12:48 +0200 Subject: [PATCH 09/48] metal : move mm_id indices to shared mem (#5982) --- ggml-metal.m | 6 +++--- ggml-metal.metal | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ggml-metal.m b/ggml-metal.m index 00df22838..3cf80de7b 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -1642,8 +1642,8 @@ static enum ggml_status ggml_metal_graph_compute( // TODO: make this more general GGML_ASSERT(n_as <= 8); - // max size of the src1ids array in the kernel stack - GGML_ASSERT(ne11 <= 512); + // max size of the src1ids array in the kernel shared buffer + GGML_ASSERT(ne11 <= 4096); const int64_t ne20 = src2 ? src2->ne[0] : 0; const int64_t ne21 = src2 ? src2->ne[1] : 0; @@ -1741,7 +1741,7 @@ static enum ggml_status ggml_metal_graph_compute( [encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:19 + j]; } - [encoder setThreadgroupMemoryLength:8192 atIndex:0]; + [encoder setThreadgroupMemoryLength:GGML_PAD(8192 + 2*ne11, 16) atIndex:0]; [encoder dispatchThreadgroups:MTLSizeMake((ne11 + 31)/32, (ne21 + 63)/64, n_as*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(128, 1, 1)]; } else { diff --git a/ggml-metal.metal b/ggml-metal.metal index 6ebbbd195..50185ae4d 100644 --- a/ggml-metal.metal +++ b/ggml-metal.metal @@ -5386,7 +5386,7 @@ template Date: Mon, 11 Mar 2024 01:13:57 +0000 Subject: [PATCH 10/48] [SYCL] Add support for SYCL Nvidia target (#5738) * Add support for nvidia target in CMake * Update sycl read-me for Nvidia target * Fix errors --- CMakeLists.txt | 14 +++++++++++++- README-sycl.md | 26 ++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9309ca6bb..00a26391a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -116,6 +116,7 @@ option(LLAMA_MPI "llama: use MPI" option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF) option(LLAMA_SYCL "llama: use SYCL" OFF) option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF) +set(LLAMA_SYCL_TARGET "INTEL" CACHE STRING "llama: sycl target device") option(LLAMA_CPU_HBM "llama: use memkind for CPU HBM" OFF) option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE}) @@ -534,6 +535,10 @@ if (LLAMA_HIPBLAS) endif() if (LLAMA_SYCL) + if (NOT LLAMA_SYCL_TARGET MATCHES "^(INTEL|NVIDIA)$") + message(FATAL_ERROR "Invalid backend chosen, supported options are INTEL or NVIDIA") + endif() + if ( NOT DEFINED ENV{ONEAPI_ROOT}) message(FATAL_ERROR "Not detect ENV {ONEAPI_ROOT}, please install oneAPI & source it, like: source /opt/intel/oneapi/setvars.sh") endif() @@ -555,6 +560,9 @@ if (LLAMA_SYCL) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib") + if (LLAMA_SYCL_TARGET STREQUAL "NVIDIA") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda") + endif() set(GGML_HEADERS_SYCL ggml-sycl.h) set(GGML_SOURCES_SYCL ggml-sycl.cpp) @@ -562,7 +570,11 @@ if (LLAMA_SYCL) if (WIN32) set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl sycl7 OpenCL mkl_sycl_blas_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib) else() - set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread) + if (LLAMA_SYCL_TARGET STREQUAL "INTEL") + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread) + elseif (LLAMA_SYCL_TARGET STREQUAL "NVIDIA") + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl pthread m dl onemkl) + endif() endif() endif() diff --git a/README-sycl.md b/README-sycl.md index 85eb16f2b..9359a9490 100644 --- a/README-sycl.md +++ b/README-sycl.md @@ -73,6 +73,29 @@ For iGPU, please make sure the shared memory from host memory is enough. For lla For dGPU, please make sure the device memory is enough. For llama-2-7b.Q4_0, recommend the device memory is 4GB+. +## Nvidia GPU + +### Verified + +|Intel GPU| Status | Verified Model| +|-|-|-| +|Ampere Series| Support| A100| + +### oneMKL + +The current oneMKL release does not contain the oneMKL cuBlas backend. +As a result for Nvidia GPU's oneMKL must be built from source. + +``` +git clone https://github.com/oneapi-src/oneMKL +cd oneMKL +mkdir build +cd build +cmake -G Ninja .. -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENABLE_MKLGPU_BACKEND=OFF -DENABLE_MKLCPU_BACKEND=OFF -DENABLE_CUBLAS_BACKEND=ON +ninja +// Add paths as necessary +``` + ## Docker Note: @@ -186,6 +209,9 @@ source /opt/intel/oneapi/setvars.sh # Or, for FP32: cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx +# For Nvidia GPUs +cmake .. -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx + # Build example/main only #cmake --build . --config Release --target main From ef3ced26a3817d92890b97b83acaeb018ade02d0 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> Date: Mon, 11 Mar 2024 10:27:56 +0530 Subject: [PATCH 11/48] [SYCL] Add q3_s and q1_s (#5886) * Add q3_s and q1_s * fix compilation * fix build * fix build * fix build * enable ops * rm macro * increase grid space --- ggml-sycl.cpp | 378 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 375 insertions(+), 3 deletions(-) diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index 6fafa5282..6d5684582 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -3494,6 +3494,31 @@ typedef struct dpct_type_block_iq3_xxs { } block_iq3_xxs; static_assert(sizeof(block_iq3_xxs) == sizeof(ggml_fp16_t) + 3*(QK_K/8), "wrong iq3_xxs block size/padding"); +#define QR3_XS 8 +#define QI3_XS (QK_K / (4*QR3_XS)) +#if QK_K == 64 +#define IQ3S_N_SCALE 2 +#else +#define IQ3S_N_SCALE QK_K/64 +#endif +typedef struct { + sycl::half d; + uint8_t qs[QK_K/4]; + uint8_t qh[QK_K/32]; + uint8_t signs[QK_K/8]; + uint8_t scales[IQ3S_N_SCALE]; +} block_iq3_s; +static_assert(sizeof(block_iq3_s) == sizeof(ggml_fp16_t) + 13*(QK_K/32) + IQ3S_N_SCALE, "wrong iq3_s block size/padding"); + +#define QR1_S 8 +#define QI1_S (QK_K / (4*QR1_S)) +typedef struct { + sycl::half d; + uint8_t qs[QK_K/8]; + uint8_t scales[QK_K/16]; +} block_iq1_s; +static_assert(sizeof(block_iq1_s) == sizeof(ggml_fp16_t) + QK_K/8 + QK_K/16, "wrong iq1_s block size/padding"); + #define WARP_SIZE 32 #define MATRIX_ROW_PADDING 512 // last row of quant. matrices is a multiple of this to avoid out-of-bounds memory accesses @@ -4833,6 +4858,62 @@ static void dequantize_block_iq3_xxs(const void * __restrict__ vx, dst_t * __res } +template +static void dequantize_block_iq3_s(const void * __restrict__ vx, dst_t * __restrict__ yy, + const sycl::nd_item<3> &item_ct1, + const uint32_t *iq3s_grid, + const uint8_t *ksigns_iq2xs, + const uint8_t *kmask_iq2xs) { + + const int i = item_ct1.get_group(2); + const block_iq3_s * x = (const block_iq3_s *) vx; + + const int tid = item_ct1.get_local_id(2); +#if QK_K == 256 + const int il = tid/8; // 0...3 + const int ib = tid%8; // 0...7 + dst_t * y = yy + i*QK_K + 32*ib + 8*il; + const uint8_t * qs = x[i].qs + 8*ib; + const uint8_t * grid1 = (const uint8_t *)(iq3s_grid + qs[2*il+0]); + const uint8_t * grid2 = (const uint8_t *)(iq3s_grid + qs[2*il+1]); + const float d = (float)x[i].d * (1 + 2*((x[i].scales[ib/2] >> 4*(ib%2)) & 0xf)); + const uint8_t signs = x[i].signs[4*ib + il]; + for (int j = 0; j < 4; ++j) { + y[j+0] = d * grid1[j] * (signs & kmask_iq2xs[j+0] ? -1.f : 1.f); + y[j+4] = d * grid2[j] * (signs & kmask_iq2xs[j+4] ? -1.f : 1.f); + } +#else + assert(false); +#endif + +} + +template +static void dequantize_block_iq1_s(const void * __restrict__ vx, dst_t * __restrict__ yy, + const sycl::nd_item<3> &item_ct1, + const uint64_t *iq1s_grid, + const uint8_t *ksigns_iq2xs, + const uint8_t *kmask_iq2xs) { + + const int i = item_ct1.get_group(2); + const block_iq1_s * x = (const block_iq1_s *) vx; + + const int tid = item_ct1.get_local_id(2); +#if QK_K == 256 + const int il = tid/8; // 0...3 + const int ib = tid%8; // 0...7 + dst_t * y = yy + i*QK_K + 32*ib + 8*il; + const int i8 = 4*ib+il; + uint8_t h = x[i].scales[i8/2] >> 4*(i8%2); + const int8_t * grid = (const int8_t *)(iq1s_grid + (x[i].qs[i8] | ((h & 8) << 5))); + const float d = (float)x[i].d * (2*(h & 7) + 1); + for (int j = 0; j < 8; ++j) y[j] = d * grid[j]; +#else + assert(false); +#endif + +} + /* DPCT1110:4: The total declared local variable size in device function dequantize_mul_mat_vec_q2_k exceeds 128 bytes and may cause high register @@ -7679,6 +7760,76 @@ vec_dot_iq3_xxs_q8_1(const void *__restrict__ vbq, #endif } +static __dpct_inline__ float +vec_dot_iq3_s_q8_1(const void *__restrict__ vbq, + const block_q8_1 *__restrict__ bq8_1, const int &iqs, + const uint32_t *iq3s_grid, const uint64_t *ksigns64) { +#if DPCT_COMPATIBILITY_TEMP >= \ + MIN_CC_DP4A // lowest compute capability for integer intrinsics +#if QK_K == 256 + const block_iq3_s * bq2 = (const block_iq3_s *) vbq; + + const int ib32 = iqs; + const uint8_t * qs = bq2->qs + 8*ib32; + const int8_t * q8 = bq8_1[ib32].qs; + int sumi = 0; + for (int l = 0; l < 4; ++l) { + const uint32_t * grid1 = iq3s_grid + (qs[2*l+0] | ((bq2->qh[ib32] << (8 - 2*l)) & 256)); + const uint32_t * grid2 = iq3s_grid + (qs[2*l+1] | ((bq2->qh[ib32] << (7 - 2*l)) & 256)); + uint32_t signs0 = dpct::vectorized_binary( + ((bq2->signs[4*ib32+l] & 0xf) * 0x01010101) & 0x08040201, 0x08040201, std::equal_to<>()); + uint32_t signs1 = dpct::vectorized_binary( + ((bq2->signs[4*ib32+l] >> 4) * 0x01010101) & 0x08040201, 0x08040201, std::equal_to<>()); + const int grid_l = dpct::vectorized_binary( + grid1[0] ^ signs0, signs0, std::minus<>()); + const int grid_h = dpct::vectorized_binary( + grid2[0] ^ signs1, signs1, std::minus<>()); + sumi = dpct::dp4a(grid_l, *((int *)q8 + 0), sumi); + sumi = dpct::dp4a(grid_h, *((int *)q8 + 1), sumi); + q8 += 8; + } + const float d = (float)bq2->d * (1 + 2*((bq2->scales[ib32/2] >> 4*(ib32%2)) & 0xf)) * bq8_1[ib32].ds[0]; + return d * sumi; +#else + assert(false); + return 0.f; +#endif +#else + assert(false); + return 0.f; +#endif +} + +static __dpct_inline__ float +vec_dot_iq1_s_q8_1(const void *__restrict__ vbq, + const block_q8_1 *__restrict__ bq8_1, const int &iqs, + const uint64_t *iq1s_grid, const uint64_t *ksigns64) { +#if QK_K == 256 + const block_iq1_s * bq1 = (const block_iq1_s *) vbq; + + const int ib32 = iqs; + int sumi1 = 0, sumi2 = 0, sumi3 = 0, sumi4 = 0; + const uint8_t h1 = bq1->scales[2*ib32+0]; + const uint8_t h2 = bq1->scales[2*ib32+1]; + const int * q8 = (const int *)bq8_1[ib32].qs; + const int * grid1 = (const int *)(iq1s_grid + (bq1->qs[4*ib32+0] | ((h1 & 0x08) << 5))); + const int * grid2 = (const int *)(iq1s_grid + (bq1->qs[4*ib32+1] | ((h1 & 0x80) << 1))); + const int * grid3 = (const int *)(iq1s_grid + (bq1->qs[4*ib32+2] | ((h2 & 0x08) << 5))); + const int * grid4 = (const int *)(iq1s_grid + (bq1->qs[4*ib32+3] | ((h2 & 0x80) << 1))); + for (int j = 0; j < 2; ++j) { + sumi1 = dpct::dp4a(q8[j+0], grid1[j], sumi1); + sumi2 = dpct::dp4a(q8[j+2], grid2[j], sumi2); + sumi3 = dpct::dp4a(q8[j+4], grid3[j], sumi3); + sumi4 = dpct::dp4a(q8[j+6], grid4[j], sumi4); + } + const float d = (float)bq1->d * bq8_1[ib32].ds[0]; + return d * (sumi1 * (2*(h1 & 7) + 1) + sumi2 * (2*((h1 >> 4) & 7) + 1) + + sumi3 * (2*(h2 & 7) + 1) + sumi4 * (2*((h2 >> 4) & 7) + 1)); +#else + assert(false); + return 0.f; +#endif +} template +static void mul_mat_vec_q_iq3_s_q8_1(const void * __restrict__ vx, const void * __restrict__ vy, float * __restrict__ dst, const int ncols, const int nrows, + const sycl::nd_item<3> &item_ct1, + const uint32_t *iq3s_grid_ptr, const uint64_t *ksigns64_ptr ) { + const int row = item_ct1.get_group(2) * item_ct1.get_local_range(1) + + item_ct1.get_local_id(1); + + if (row >= nrows) { + return; + } + + const int blocks_per_row = ncols / qk; + const int blocks_per_warp = vdr * WARP_SIZE / qi; + +// partial sum for each thread + float tmp = 0.0f; + + const block_q_t * x = (const block_q_t *) vx; + const block_q8_1 * y = (const block_q8_1 *) vy; + + for (int i = item_ct1.get_local_id(2) / (qi / vdr); i < blocks_per_row; + i += blocks_per_warp) { + const int ibx = row*blocks_per_row + i; // x block index + + const int iby = i * (qk/QK8_1); // y block index that aligns with ibx + + const int iqs = + vdr * + (item_ct1.get_local_id(2) % + (qi / vdr)); // x block quant index when casting the quants to int + + tmp += vec_dot_iq3_s_q8_1(&x[ibx], &y[iby], iqs, iq3s_grid_ptr, ksigns64_ptr); + } + + // sum up partial sums and write back result +#pragma unroll + for (int mask = 16; mask > 0; mask >>= 1) { + tmp += + dpct::permute_sub_group_by_xor(item_ct1.get_sub_group(), tmp, mask); + } + + if (item_ct1.get_local_id(2) == 0) { + dst[row] = tmp; + } +} + +template +static void mul_mat_vec_q_iq1_s_q8_1(const void * __restrict__ vx, const void * __restrict__ vy, float * __restrict__ dst, const int ncols, const int nrows, + const sycl::nd_item<3> &item_ct1, + const uint64_t *iq1s_grid_ptr, const uint64_t *ksigns64_ptr ) { + const int row = item_ct1.get_group(2) * item_ct1.get_local_range(1) + + item_ct1.get_local_id(1); + + if (row >= nrows) { + return; + } + + const int blocks_per_row = ncols / qk; + const int blocks_per_warp = vdr * WARP_SIZE / qi; + +// partial sum for each thread + float tmp = 0.0f; + + const block_q_t * x = (const block_q_t *) vx; + const block_q8_1 * y = (const block_q8_1 *) vy; + + for (int i = item_ct1.get_local_id(2) / (qi / vdr); i < blocks_per_row; + i += blocks_per_warp) { + const int ibx = row*blocks_per_row + i; // x block index + + const int iby = i * (qk/QK8_1); // y block index that aligns with ibx + + const int iqs = + vdr * + (item_ct1.get_local_id(2) % + (qi / vdr)); // x block quant index when casting the quants to int + + tmp += vec_dot_iq1_s_q8_1(&x[ibx], &y[iby], iqs, iq1s_grid_ptr, ksigns64_ptr); + } + + // sum up partial sums and write back result +#pragma unroll + for (int mask = 16; mask > 0; mask >>= 1) { + tmp += + dpct::permute_sub_group_by_xor(item_ct1.get_sub_group(), tmp, mask); + } + + if (item_ct1.get_local_id(2) == 0) { + dst[row] = tmp; + } +} + template static void dequantize_mul_mat_vec(const void * __restrict__ vx, const dfloat * __restrict__ y, float * __restrict__ dst, const int ncols, const int nrows, const sycl::nd_item<3> &item_ct1) { @@ -10129,6 +10372,64 @@ static void dequantize_row_iq3_xxs_sycl(const void *vx, dst_t *y, const int k, } } +template +static void dequantize_row_iq3_s_sycl(const void *vx, dst_t *y, const int k, + dpct::queue_ptr stream) { + const int nb = k / QK_K; + { + iq3s_grid.init(*stream); + ksigns_iq2xs.init(*stream); + kmask_iq2xs.init(*stream); + + dpct::has_capability_or_fail(stream->get_device(), + {sycl::aspect::fp16}); + + stream->submit([&](sycl::handler &cgh) { + auto iq3s_grid_ptr_ct1 = iq3s_grid.get_ptr(); + auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs.get_ptr(); + auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs.get_ptr(); + + cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * + sycl::range<3>(1, 1, 32), + sycl::range<3>(1, 1, 32)), + [=](sycl::nd_item<3> item_ct1) { + dequantize_block_iq3_s( + vx, y, item_ct1, iq3s_grid_ptr_ct1, + ksigns_iq2xs_ptr_ct1, kmask_iq2xs_ptr_ct1); + }); + }); + } +} + +template +static void dequantize_row_iq1_s_sycl(const void *vx, dst_t *y, const int k, + dpct::queue_ptr stream) { + const int nb = k / QK_K; + { + iq1s_grid.init(*stream); + ksigns_iq2xs.init(*stream); + kmask_iq2xs.init(*stream); + + dpct::has_capability_or_fail(stream->get_device(), + {sycl::aspect::fp16}); + + stream->submit([&](sycl::handler &cgh) { + auto iq1s_grid_ptr_ct1 = iq1s_grid.get_ptr(); + auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs.get_ptr(); + auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs.get_ptr(); + + cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * + sycl::range<3>(1, 1, 32), + sycl::range<3>(1, 1, 32)), + [=](sycl::nd_item<3> item_ct1) { + dequantize_block_iq1_s( + vx, y, item_ct1, iq1s_grid_ptr_ct1, + ksigns_iq2xs_ptr_ct1, kmask_iq2xs_ptr_ct1); + }); + }); + } +} + template static void convert_unary_sycl(const void *__restrict__ vx, dst_t *__restrict__ y, const int k, @@ -10179,6 +10480,10 @@ static to_fp16_sycl_t ggml_get_to_fp16_sycl(ggml_type type) try { return dequantize_row_iq2_xs_sycl; case GGML_TYPE_IQ3_XXS: return dequantize_row_iq3_xxs_sycl; + case GGML_TYPE_IQ3_S: + return dequantize_row_iq3_s_sycl; + case GGML_TYPE_IQ1_S: + return dequantize_row_iq1_s_sycl; case GGML_TYPE_F32: return convert_unary_sycl; default: @@ -10219,6 +10524,10 @@ static to_fp32_sycl_t ggml_get_to_fp32_sycl(ggml_type type) { return dequantize_row_iq2_xs_sycl; case GGML_TYPE_IQ3_XXS: return dequantize_row_iq3_xxs_sycl; + case GGML_TYPE_IQ3_S: + return dequantize_row_iq3_s_sycl; + case GGML_TYPE_IQ1_S: + return dequantize_row_iq1_s_sycl; case GGML_TYPE_F16: return convert_unary_sycl; default: @@ -10808,6 +11117,61 @@ static void mul_mat_vec_iq3_xxs_q8_1_sycl(const void *vx, const void *vy, } } +static void mul_mat_vec_iq3_s_q8_1_sycl(const void *vx, const void *vy, + float *dst, const int ncols, + const int nrows, + dpct::queue_ptr stream) { + GGML_ASSERT(ncols % QK_K == 0); + const int block_num_y = (nrows + GGML_SYCL_MMV_Y - 1) / GGML_SYCL_MMV_Y; + const sycl::range<3> block_nums(1, 1, block_num_y); + const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); + { + iq3s_grid.init(*stream); + ksigns64.init(*stream); + + stream->submit([&](sycl::handler &cgh) { + auto iq3s_grid_ptr_ct1 = iq3s_grid.get_ptr(); + auto ksigns64_ptr_ct1 = ksigns64.get_ptr(); + + cgh.parallel_for( + sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) + [[intel::reqd_sub_group_size(32)]] { + mul_mat_vec_q_iq3_s_q8_1( + vx, vy, dst, ncols, nrows, item_ct1, + iq3s_grid_ptr_ct1, ksigns64_ptr_ct1); + }); + }); + } +} + +static void mul_mat_vec_iq1_s_q8_1_sycl(const void *vx, const void *vy, + float *dst, const int ncols, + const int nrows, + dpct::queue_ptr stream) { + GGML_ASSERT(ncols % QK_K == 0); + const int block_num_y = (nrows + GGML_SYCL_MMV_Y - 1) / GGML_SYCL_MMV_Y; + const sycl::range<3> block_nums(1, 1, block_num_y); + const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); + { + iq1s_grid.init(*stream); + ksigns64.init(*stream); + + stream->submit([&](sycl::handler &cgh) { + auto iq1s_grid_ptr_ct1 = iq1s_grid.get_ptr(); + auto ksigns64_ptr_ct1 = ksigns64.get_ptr(); + + cgh.parallel_for( + sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) + [[intel::reqd_sub_group_size(32)]] { + mul_mat_vec_q_iq1_s_q8_1( + vx, vy, dst, ncols, nrows, item_ct1, + iq1s_grid_ptr_ct1, ksigns64_ptr_ct1); + }); + }); + } +} static void ggml_mul_mat_q4_0_q8_1_sycl(const void *vx, const void *vy, float *dst, const int ncols_x, @@ -13556,8 +13920,11 @@ static int64_t get_row_rounding(ggml_type type, const std::array= VER_GEN9 ? 128 : 64; + case GGML_TYPE_IQ3_S: + return max_compute_capability >= VER_GEN9 ? 128 : 64; case GGML_TYPE_Q6_K: return 64; default: @@ -13618,6 +13985,12 @@ inline void ggml_sycl_op_mul_mat_vec_q( case GGML_TYPE_IQ3_XXS: mul_mat_vec_iq3_xxs_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); break; + case GGML_TYPE_IQ3_S: + mul_mat_vec_iq3_s_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); + break; + case GGML_TYPE_IQ1_S: + mul_mat_vec_iq1_s_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); + break; default: GGML_ASSERT(false); break; @@ -16963,9 +17336,8 @@ GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, cons return false; } ggml_type a_type = a->type; - if (a_type == GGML_TYPE_IQ2_XXS || a_type == GGML_TYPE_IQ2_XS || a_type == GGML_TYPE_IQ3_XXS || - a_type == GGML_TYPE_IQ1_S || a_type == GGML_TYPE_IQ4_NL || a_type == GGML_TYPE_IQ3_S || - a_type == GGML_TYPE_IQ2_S || a_type == GGML_TYPE_IQ4_XS) { + if (a_type == GGML_TYPE_IQ4_NL || a_type == GGML_TYPE_IQ2_S || + a_type == GGML_TYPE_IQ4_XS) { return false; } return true; From be858f620508385ad12d0e5e862010e666ca729c Mon Sep 17 00:00:00 2001 From: Kawrakow <48489457+ikawrakow@users.noreply.github.com> Date: Mon, 11 Mar 2024 07:51:49 +0100 Subject: [PATCH 12/48] Better 1.5 bit quantization (#5971) * Trying blocvks of 16 for IQ1_S - seems slightly better * iq1s_blocks16: Adjust scale fudge factor to 1.125 * iq1s_blocks16: going to blocks of 32 with 2048 lattice points, so same bpw. This is even better than blocks of 16. Should I try blocks of 64? But to keep the same bpw, when I go to 4096 lattice points, I need to remove blocks alltogether and just have superblocks of 256 weights. * iq1s_blocks16: Use 2* as sigma2 in weight adjustment * iq1s_blocks16: scalar and AVX2 dot products * iq1s_blocks16: CUDA dot product * iq1s_blocks16: Metal works, Neon does not Metal works but TG is dog slow (35 t/s). PP is OKish (493 t/s). Not seeing the bug in the Neon implementation for now. * iq1s_blocks16: fixed Neon * iq1s_blocks16: very slightly faster TG on Metal Still pathetic at 37 t/s * iq1s_blocks16: speedup Metal by packing codebook into uint32_t's * Formatting * iq1s_blocks16: uint32_t codebook is also better in CUDA TG-128 is now 204 t/s up from 194 t/s. PP-512 is 5890 t/s, so significantly better than other quants * iq1s_blocks16: slightly faster Neon dot product * iq1s_blocks16: faster AVX2 dot product * iq1s_blocks16: adjust to ggml-common.h --------- Co-authored-by: Iwan Kawrakow --- ggml-common.h | 903 ++++++++++++++++++++++++++++++++++++++++------- ggml-cuda.cu | 62 ++-- ggml-metal.metal | 66 ++-- ggml-quants.c | 510 +++++++++++++++----------- ggml-quants.h | 4 +- 5 files changed, 1152 insertions(+), 393 deletions(-) diff --git a/ggml-common.h b/ggml-common.h index 84981212c..5dd918081 100644 --- a/ggml-common.h +++ b/ggml-common.h @@ -644,136 +644,781 @@ GGML_TABLE_BEGIN(uint32_t, iq3s_grid, 512) 0x0f090307, 0x0f090501, 0x0f090b01, 0x0f0b0505, 0x0f0b0905, 0x0f0d0105, 0x0f0d0703, 0x0f0f0101, GGML_TABLE_END() -#define NGRID_IQ2XXS 512 -GGML_TABLE_BEGIN(uint64_t, iq1s_grid, NGRID_IQ2XXS) - 0xffffffffffff0101, 0xffffffffff01ff00, 0xffffffffff010100, 0xffffffff00000000, - 0xffffffff01ff00ff, 0xffffffff01ff0001, 0xffffffff0101ffff, 0xffffffff0101ff01, - 0xffffff00ff000000, 0xffffff000000ff00, 0xffffff00000000ff, 0xffffff0000000100, - 0xffffff0000010000, 0xffffff0001000000, 0xffffff01ffff00ff, 0xffffff01ff01ff00, - 0xffffff01ff010100, 0xffffff0100000001, 0xffffff0101ffff00, 0xffffff0101ff0101, - 0xffffff0101010100, 0xffff00ffff00ff01, 0xffff00ffff0000ff, 0xffff00ff00ff0100, - 0xffff00ff0100ff00, 0xffff00ff010001ff, 0xffff0000ff0101ff, 0xffff000000ffff00, - 0xffff000000000000, 0xffff00000001ff01, 0xffff000001000101, 0xffff0000010100ff, - 0xffff0001ffff0100, 0xffff00010000ff00, 0xffff000100010101, 0xffff000101000000, - 0xffff01ffffff0000, 0xffff01ffff01ffff, 0xffff01ffff010100, 0xffff01ff00000000, - 0xffff01ff01ffffff, 0xffff01ff01ff0001, 0xffff01ff0101ffff, 0xffff01ff01010001, - 0xffff0100ffffff01, 0xffff01000000ffff, 0xffff010000000100, 0xffff010001ff01ff, - 0xffff010001000000, 0xffff0101ff000000, 0xffff0101000101ff, 0xffff010101ffff01, - 0xffff01010101ff00, 0xff00ffffff000000, 0xff00ffff00ffff00, 0xff00ffff00000001, - 0xff00ffff000001ff, 0xff00ffff01010000, 0xff00ff00ffff0000, 0xff00ff00ff00ff00, - 0xff00ff00ff0000ff, 0xff00ff00ff000100, 0xff00ff00ff010001, 0xff00ff0000ff0001, - 0xff00ff000000ffff, 0xff00ff0000000000, 0xff00ff000001ff00, 0xff00ff0000010100, - 0xff00ff0001ff0000, 0xff00ff000100ff00, 0xff00ff0001000100, 0xff00ff01ff000000, - 0xff00ff0100ff0000, 0xff00ff01000001ff, 0xff00ff0101010001, 0xff0000ff00000000, - 0xff0000ff0001ff00, 0xff0000ff00010100, 0xff000000ffff0101, 0xff000000ff000000, - 0xff000000ff01ff00, 0xff00000000ff0000, 0xff0000000000ff00, 0xff000000000000ff, - 0xff00000000000000, 0xff00000000000001, 0xff00000000000100, 0xff0000000001ffff, - 0xff00000000010000, 0xff00000001000000, 0xff00000001010100, 0xff000001ff00ff01, - 0xff000001ff0100ff, 0xff00000100000000, 0xff0000010001ff00, 0xff00000101ff0100, - 0xff0000010100ff00, 0xff0001ff00ff00ff, 0xff0001ff00000101, 0xff0001ff000100ff, - 0xff0001ff01000000, 0xff000100ff0001ff, 0xff0001000000ff01, 0xff00010000000000, - 0xff00010000010001, 0xff00010000010100, 0xff00010001ffff00, 0xff00010001ff0101, - 0xff00010001010000, 0xff000101ffffffff, 0xff000101ff000101, 0xff00010101ff00ff, - 0xff00010101000001, 0xff000101010100ff, 0xff01ffffff000101, 0xff01ffffff01ffff, - 0xff01ffffff01ff01, 0xff01ffffff0101ff, 0xff01ffff00000000, 0xff01ffff01ff0001, - 0xff01ffff0101ff01, 0xff01ff00ff000000, 0xff01ff0000ff0100, 0xff01ff000000ff01, - 0xff01ff0000010000, 0xff01ff00010000ff, 0xff01ff01ff01ff00, 0xff01ff0100000101, - 0xff0100ffffff0000, 0xff0100ffff010000, 0xff0100ff01ff00ff, 0xff0100ff01000100, - 0xff0100ff010100ff, 0xff010000ffffff01, 0xff01000000000000, 0xff0100000101ff00, - 0xff010001ffff00ff, 0xff010001ff000100, 0xff01000100ffff00, 0xff01000100010001, - 0xff01000101ff0001, 0xff010001010001ff, 0xff0101ffffffffff, 0xff0101ffff01ffff, - 0xff0101ffff010101, 0xff0101ff0000ff00, 0xff0101ff01010001, 0xff010100ff000000, - 0xff010100ff01ff01, 0xff01010000ff0001, 0xff01010000000100, 0xff01010001000000, - 0xff0101010100ffff, 0x00ffffff0000ff01, 0x00ffffff000000ff, 0x00ffffff00000100, - 0x00ffffff00010000, 0x00ffff00ffff0001, 0x00ffff00ff0000ff, 0x00ffff00ff000100, - 0x00ffff0000000000, 0x00ffff0001000100, 0x00ffff0001010001, 0x00ffff01ff00ff01, - 0x00ffff0100ff0100, 0x00ffff010000ff00, 0x00ffff01000100ff, 0x00ffff0101ff00ff, - 0x00ffff010101ff00, 0x00ff00ffffffffff, 0x00ff00ffffff01ff, 0x00ff00ffff000101, - 0x00ff00ff00000000, 0x00ff00ff000101ff, 0x00ff00ff01010101, 0x00ff0000ff000000, - 0x00ff0000ff01ffff, 0x00ff000000ff0000, 0x00ff00000000ff00, 0x00ff0000000000ff, - 0x00ff000000000000, 0x00ff000000000001, 0x00ff000000000100, 0x00ff000000010000, - 0x00ff000001ffff01, 0x00ff000001000000, 0x00ff0001ff000101, 0x00ff000100ffffff, - 0x00ff000100000000, 0x00ff0001010001ff, 0x00ff01ffff000000, 0x00ff01ff0001ff00, - 0x00ff01ff01ff0100, 0x00ff0100ff01ff01, 0x00ff010000ff00ff, 0x00ff010000ff0101, - 0x00ff010000000000, 0x00ff010000010101, 0x00ff01000100ff00, 0x00ff010001010000, - 0x00ff0101ffffff00, 0x00ff01010000ff01, 0x00ff010100000100, 0x00ff010101ff0000, - 0x0000ffffffff0100, 0x0000ffffff00ff00, 0x0000ffffff0000ff, 0x0000ffffff010000, - 0x0000ffff00000000, 0x0000ffff00010101, 0x0000ffff01ffff01, 0x0000ffff01000100, - 0x0000ff00ff000000, 0x0000ff00ff01ff00, 0x0000ff00ff0101ff, 0x0000ff0000ff0000, - 0x0000ff000000ff00, 0x0000ff00000000ff, 0x0000ff0000000000, 0x0000ff0000000001, - 0x0000ff0000000100, 0x0000ff0000010000, 0x0000ff0001ffffff, 0x0000ff0001ff01ff, - 0x0000ff0001000000, 0x0000ff000101ffff, 0x0000ff01ffff0101, 0x0000ff01ff010000, - 0x0000ff0100000000, 0x0000ff0101000101, 0x000000ffffff0001, 0x000000ffff000000, - 0x000000ff00ff0000, 0x000000ff0000ff00, 0x000000ff000000ff, 0x000000ff00000000, - 0x000000ff00000001, 0x000000ff00000100, 0x000000ff00010000, 0x000000ff01000000, - 0x000000ff0101ff00, 0x00000000ffff0000, 0x00000000ff00ff00, 0x00000000ff0000ff, - 0x00000000ff000000, 0x00000000ff000001, 0x00000000ff000100, 0x00000000ff010000, - 0x0000000000ffff00, 0x0000000000ff00ff, 0x0000000000ff0000, 0x0000000000ff0001, +#define NGRID_IQ1S 2048 +#if defined(GGML_COMMON_IMPL_C) +GGML_TABLE_BEGIN(uint64_t, iq1s_grid, NGRID_IQ1S) + 0xffffffffffffffff, 0xffffffffffffff01, 0xffffffffffff0000, 0xffffffffffff01ff, + 0xffffffffffff0101, 0xffffffffff00ff00, 0xffffffffff000000, 0xffffffffff01ffff, + 0xffffffffff01ff01, 0xffffffffff0101ff, 0xffffffffff010101, 0xffffffff00ff0000, + 0xffffffff0000ff00, 0xffffffff000000ff, 0xffffffff00000001, 0xffffffff00010000, + 0xffffffff01ffffff, 0xffffffff01ffff01, 0xffffffff01ff01ff, 0xffffffff01ff0101, + 0xffffffff01000000, 0xffffffff0101ffff, 0xffffffff0101ff01, 0xffffffff010101ff, + 0xffffffff01010101, 0xffffff00ffff00ff, 0xffffff00ffff0000, 0xffffff00ff00ff00, + 0xffffff00ff0000ff, 0xffffff00ff000001, 0xffffff00ff000100, 0xffffff00ff000101, + 0xffffff00ff010000, 0xffffff0000ffff00, 0xffffff0000ff0001, 0xffffff0000ff0100, + 0xffffff000000ff01, 0xffffff0000000000, 0xffffff0000000101, 0xffffff000001ff00, + 0xffffff00000100ff, 0xffffff0000010001, 0xffffff00000101ff, 0xffffff0001ff0000, + 0xffffff000100ff00, 0xffffff00010000ff, 0xffffff0001000001, 0xffffff0001010000, + 0xffffff01ffffffff, 0xffffff01ffffff01, 0xffffff01ffff01ff, 0xffffff01ffff0101, + 0xffffff01ff000000, 0xffffff01ff01ffff, 0xffffff01ff01ff01, 0xffffff01ff0101ff, + 0xffffff01ff010101, 0xffffff0100ff0000, 0xffffff010000ff00, 0xffffff0100000100, + 0xffffff01000100ff, 0xffffff0100010100, 0xffffff0101ffffff, 0xffffff0101ffff01, + 0xffffff0101ff01ff, 0xffffff0101ff0101, 0xffffff010100ff00, 0xffffff0101000000, + 0xffffff0101000100, 0xffffff010101ffff, 0xffffff010101ff01, 0xffffff01010101ff, + 0xffffff0101010101, 0xffff00ffff00ff00, 0xffff00ffff0000ff, 0xffff00ffff000001, + 0xffff00ffff010000, 0xffff00ff00ffff00, 0xffff00ff00ff0100, 0xffff00ff00000000, + 0xffff00ff00000101, 0xffff00ff000100ff, 0xffff00ff00010000, 0xffff00ff0100ff00, + 0xffff00ff01000100, 0xffff00ff01010000, 0xffff0000ffffff00, 0xffff0000ffff00ff, + 0xffff0000ffff0000, 0xffff0000ffff0001, 0xffff0000ff000000, 0xffff0000ff0001ff, + 0xffff0000ff000101, 0xffff0000ff010100, 0xffff000000ffffff, 0xffff000000ff0000, + 0xffff000000ff0101, 0xffff00000000ffff, 0xffff00000000ff00, 0xffff0000000000ff, + 0xffff000000000000, 0xffff000000000001, 0xffff000000000100, 0xffff00000001ffff, + 0xffff00000001ff01, 0xffff000000010000, 0xffff0000000101ff, 0xffff000000010101, + 0xffff000001ffff00, 0xffff00000100ff00, 0xffff000001000000, 0xffff0000010001ff, + 0xffff000001000101, 0xffff00000101ff00, 0xffff0000010100ff, 0xffff000001010000, + 0xffff000001010001, 0xffff000001010100, 0xffff0001ff0000ff, 0xffff0001ff000100, + 0xffff000100ffff00, 0xffff000100ff00ff, 0xffff00010000ffff, 0xffff00010000ff01, + 0xffff000100000000, 0xffff0001000001ff, 0xffff00010001ffff, 0xffff00010001ff00, + 0xffff000100010001, 0xffff000100010100, 0xffff000101ff0000, 0xffff00010100ff00, + 0xffff0001010000ff, 0xffff000101000100, 0xffff01ffffffffff, 0xffff01ffffffff01, + 0xffff01ffffff01ff, 0xffff01ffffff0101, 0xffff01ffff000000, 0xffff01ffff01ffff, + 0xffff01ffff01ff01, 0xffff01ffff0101ff, 0xffff01ffff010101, 0xffff01ff00ff0000, + 0xffff01ff0000ff00, 0xffff01ff00000001, 0xffff01ff00010000, 0xffff01ff01ffffff, + 0xffff01ff01ffff01, 0xffff01ff01ff01ff, 0xffff01ff01ff0101, 0xffff01ff01000000, + 0xffff01ff0101ffff, 0xffff01ff0101ff01, 0xffff01ff010101ff, 0xffff01ff01010101, + 0xffff0100ffff0000, 0xffff0100ff00ff00, 0xffff0100ff0000ff, 0xffff0100ff000100, + 0xffff0100ff0100ff, 0xffff0100ff010000, 0xffff010000ffff00, 0xffff01000000ffff, + 0xffff01000000ff00, 0xffff010000000000, 0xffff01000001ff00, 0xffff0100000100ff, + 0xffff010000010100, 0xffff01000100ff00, 0xffff0100010000ff, 0xffff010001000001, + 0xffff010001000100, 0xffff010001010000, 0xffff0101ffffffff, 0xffff0101ffffff01, + 0xffff0101ffff01ff, 0xffff0101ffff0101, 0xffff0101ff000000, 0xffff0101ff01ffff, + 0xffff0101ff01ff01, 0xffff0101ff0101ff, 0xffff0101ff010101, 0xffff010100ff0000, + 0xffff01010000ff00, 0xffff010100000100, 0xffff01010001ff00, 0xffff010100010000, + 0xffff010101ffffff, 0xffff010101ffff01, 0xffff010101ff0000, 0xffff010101ff01ff, + 0xffff010101ff0101, 0xffff010101000000, 0xffff01010101ffff, 0xffff01010101ff01, + 0xffff0101010101ff, 0xffff010101010101, 0xff00ffffff00ffff, 0xff00ffffff00ff00, + 0xff00ffffff0000ff, 0xff00ffffff000100, 0xff00ffffff0100ff, 0xff00ffffff010000, + 0xff00ffff00ffff00, 0xff00ffff00ff00ff, 0xff00ffff0000ffff, 0xff00ffff00000000, + 0xff00ffff000001ff, 0xff00ffff0001ff00, 0xff00ffff000100ff, 0xff00ffff00010000, + 0xff00ffff00010100, 0xff00ffff0100ff00, 0xff00ffff010000ff, 0xff00ffff01000001, + 0xff00ffff0101ff00, 0xff00ffff01010000, 0xff00ff00ffffff00, 0xff00ff00ffff00ff, + 0xff00ff00ffff0001, 0xff00ff00ffff0100, 0xff00ff00ff00ffff, 0xff00ff00ff00ff01, + 0xff00ff00ff000000, 0xff00ff00ff0001ff, 0xff00ff00ff01ff00, 0xff00ff00ff0100ff, + 0xff00ff00ff010100, 0xff00ff0000ff0000, 0xff00ff0000ff0101, 0xff00ff000000ffff, + 0xff00ff000000ff00, 0xff00ff000000ff01, 0xff00ff00000000ff, 0xff00ff0000000000, + 0xff00ff0000000001, 0xff00ff0000000100, 0xff00ff000001ffff, 0xff00ff0000010000, + 0xff00ff0001ff00ff, 0xff00ff000100ff01, 0xff00ff0001000000, 0xff00ff000101ff00, + 0xff00ff00010100ff, 0xff00ff01ff00ff00, 0xff00ff01ff0000ff, 0xff00ff01ff000001, + 0xff00ff01ff010000, 0xff00ff0100ffffff, 0xff00ff0100ff0001, 0xff00ff0100ff0100, + 0xff00ff010000ff01, 0xff00ff0100000000, 0xff00ff01000001ff, 0xff00ff0100000101, + 0xff00ff01000100ff, 0xff00ff0100010001, 0xff00ff0101ff0000, 0xff00ff010100ff00, + 0xff00ff01010000ff, 0xff00ff0101000001, 0xff00ff0101010000, 0xff0000ffffffff00, + 0xff0000ffffff0001, 0xff0000ffffff0100, 0xff0000ffff0000ff, 0xff0000ffff000000, + 0xff0000ffff0001ff, 0xff0000ffff000100, 0xff0000ffff01ff00, 0xff0000ffff010001, + 0xff0000ff00ffff00, 0xff0000ff00ff0000, 0xff0000ff00ff0001, 0xff0000ff00ff01ff, + 0xff0000ff00ff0101, 0xff0000ff0000ff00, 0xff0000ff000000ff, 0xff0000ff00000000, + 0xff0000ff00000001, 0xff0000ff00000100, 0xff0000ff0001ff01, 0xff0000ff00010000, + 0xff0000ff000101ff, 0xff0000ff01ff00ff, 0xff0000ff01ff0100, 0xff0000ff0100ffff, + 0xff0000ff010000ff, 0xff0000ff01000000, 0xff0000ff010001ff, 0xff0000ff01000100, + 0xff0000ff01000101, 0xff0000ff0101ff00, 0xff0000ff010100ff, 0xff0000ff01010000, + 0xff0000ff01010100, 0xff000000ffffff01, 0xff000000ffff0000, 0xff000000ffff0101, + 0xff000000ff00ff00, 0xff000000ff0000ff, 0xff000000ff000000, 0xff000000ff000001, + 0xff000000ff000100, 0xff000000ff01ffff, 0xff000000ff01ff01, 0xff000000ff010000, + 0xff000000ff0101ff, 0xff000000ff010101, 0xff00000000ffff00, 0xff00000000ff00ff, + 0xff00000000ff0000, 0xff00000000ff0001, 0xff0000000000ff00, 0xff0000000000ff01, + 0xff000000000000ff, 0xff00000000000000, 0xff00000000000001, 0xff00000000000100, + 0xff00000000000101, 0xff0000000001ff00, 0xff000000000100ff, 0xff00000000010000, + 0xff00000000010001, 0xff00000000010100, 0xff00000001ffffff, 0xff00000001ffff01, + 0xff00000001ff00ff, 0xff00000001ff0000, 0xff00000001ff01ff, 0xff00000001ff0101, + 0xff0000000100ffff, 0xff0000000100ff00, 0xff000000010000ff, 0xff00000001000000, + 0xff00000001000001, 0xff00000001000100, 0xff00000001000101, 0xff0000000101ffff, + 0xff0000000101ff01, 0xff00000001010000, 0xff000001ffffff00, 0xff000001ffff00ff, + 0xff000001ffff0000, 0xff000001ffff0001, 0xff000001ff000000, 0xff000001ff000001, + 0xff000001ff0001ff, 0xff000001ff000101, 0xff000001ff01ff00, 0xff000001ff010001, + 0xff00000100ffffff, 0xff00000100ffff01, 0xff00000100ff00ff, 0xff00000100ff0000, + 0xff00000100ff01ff, 0xff00000100ff0101, 0xff0000010000ff00, 0xff00000100000000, + 0xff00000100000001, 0xff000001000001ff, 0xff00000100000100, 0xff0000010001ff00, + 0xff000001000100ff, 0xff00000100010000, 0xff000001000101ff, 0xff00000100010100, + 0xff00000100010101, 0xff00000101ff0001, 0xff00000101ff0101, 0xff0000010100ff01, + 0xff00000101000000, 0xff000001010100ff, 0xff00000101010100, 0xff0001ffff00ff00, + 0xff0001ffff000001, 0xff0001ffff010000, 0xff0001ff00ffff00, 0xff0001ff00ff00ff, + 0xff0001ff00ff0001, 0xff0001ff00ff0100, 0xff0001ff0000ffff, 0xff0001ff00000000, + 0xff0001ff000001ff, 0xff0001ff00000101, 0xff0001ff0001ffff, 0xff0001ff0001ff00, + 0xff0001ff000100ff, 0xff0001ff00010001, 0xff0001ff00010100, 0xff0001ff01ff0000, + 0xff0001ff0100ff00, 0xff0001ff010000ff, 0xff0001ff01010000, 0xff000100ff00ffff, + 0xff000100ff00ff01, 0xff000100ff000000, 0xff000100ff000101, 0xff000100ff01ff00, + 0xff000100ff010000, 0xff00010000ffff01, 0xff00010000ff00ff, 0xff00010000ff0000, + 0xff00010000ff01ff, 0xff0001000000ff00, 0xff000100000000ff, 0xff00010000000000, + 0xff00010000000001, 0xff00010000000100, 0xff00010000000101, 0xff0001000001ffff, + 0xff00010000010000, 0xff00010000010101, 0xff00010001ff0100, 0xff0001000100ff00, + 0xff0001000100ff01, 0xff00010001000000, 0xff000100010001ff, 0xff0001000101ff00, + 0xff00010001010001, 0xff00010001010100, 0xff000101ffff0100, 0xff000101ff000001, + 0xff000101ff0100ff, 0xff000101ff010001, 0xff00010100ff00ff, 0xff00010100ff0001, + 0xff00010100ff0100, 0xff0001010000ffff, 0xff0001010000ff01, 0xff00010100000000, + 0xff000101000001ff, 0xff0001010001ff00, 0xff00010100010001, 0xff00010100010100, + 0xff00010101ff0000, 0xff0001010100ff00, 0xff00010101000001, 0xff00010101000101, + 0xff01ffffffffffff, 0xff01ffffffffff01, 0xff01ffffffff01ff, 0xff01ffffffff0101, + 0xff01ffffff000000, 0xff01ffffff01ffff, 0xff01ffffff01ff01, 0xff01ffffff010000, + 0xff01ffffff0101ff, 0xff01ffffff010101, 0xff01ffff00ff0000, 0xff01ffff0000ff00, + 0xff01ffff00000100, 0xff01ffff0001ff00, 0xff01ffff00010000, 0xff01ffff01ffffff, + 0xff01ffff01ffff01, 0xff01ffff01ff01ff, 0xff01ffff01ff0101, 0xff01ffff01000000, + 0xff01ffff0101ffff, 0xff01ffff0101ff01, 0xff01ffff01010000, 0xff01ffff010101ff, + 0xff01ffff01010101, 0xff01ff00ffff0000, 0xff01ff00ff00ff00, 0xff01ff00ff0000ff, + 0xff01ff00ff000100, 0xff01ff00ff010000, 0xff01ff0000ffff01, 0xff01ff0000ff00ff, + 0xff01ff0000ff0100, 0xff01ff0000000000, 0xff01ff00000001ff, 0xff01ff0000000101, + 0xff01ff000001ff00, 0xff01ff00000100ff, 0xff01ff0000010000, 0xff01ff0000010001, + 0xff01ff0001ff0000, 0xff01ff000100ffff, 0xff01ff0001000001, 0xff01ff0001000100, + 0xff01ff0001010000, 0xff01ff01ffffff00, 0xff01ff01ffff01ff, 0xff01ff01ffff0101, + 0xff01ff01ff00ff00, 0xff01ff01ff000000, 0xff01ff01ff01ffff, 0xff01ff01ff01ff01, + 0xff01ff01ff0101ff, 0xff01ff01ff010101, 0xff01ff0100ff0000, 0xff01ff010000ff00, + 0xff01ff0100000001, 0xff01ff0100000100, 0xff01ff0100010000, 0xff01ff0101ffff00, + 0xff01ff0101ff01ff, 0xff01ff0101ff0101, 0xff01ff010100ff00, 0xff01ff0101000000, + 0xff01ff010101ffff, 0xff01ff010101ff01, 0xff01ff01010101ff, 0xff01ff0101010101, + 0xff0100ffffff0000, 0xff0100ffff0000ff, 0xff0100ffff000001, 0xff0100ffff000100, + 0xff0100ffff010000, 0xff0100ff00ff00ff, 0xff0100ff00ff0000, 0xff0100ff00ff0001, + 0xff0100ff00ff0100, 0xff0100ff0000ff01, 0xff0100ff00000000, 0xff0100ff000001ff, + 0xff0100ff00000101, 0xff0100ff00010001, 0xff0100ff01ff0000, 0xff0100ff0100ff00, + 0xff0100ff010000ff, 0xff0100ff01000100, 0xff0100ff0101ff00, 0xff0100ff01010000, + 0xff010000ffff0100, 0xff010000ff000000, 0xff010000ff01ff00, 0xff010000ff010100, + 0xff01000000ffffff, 0xff01000000ff0000, 0xff01000000ff01ff, 0xff0100000000ff00, + 0xff010000000000ff, 0xff01000000000000, 0xff01000000000100, 0xff0100000001ff01, + 0xff01000000010000, 0xff010000000101ff, 0xff01000001ff0100, 0xff0100000100ffff, + 0xff010000010000ff, 0xff01000001000000, 0xff010000010001ff, 0xff01000001000101, + 0xff0100000101ff00, 0xff010000010100ff, 0xff01000001010001, 0xff01000001010100, + 0xff010001ffff0000, 0xff010001ff00ffff, 0xff010001ff00ff01, 0xff010001ff000100, + 0xff010001ff010000, 0xff01000100ffff00, 0xff01000100ff0100, 0xff01000100000000, + 0xff0100010001ffff, 0xff0100010001ff00, 0xff01000100010100, 0xff01000101ff00ff, + 0xff01000101ff0001, 0xff0100010100ffff, 0xff01000101000101, 0xff0101ffffffffff, + 0xff0101ffffffff01, 0xff0101ffffff01ff, 0xff0101ffffff0101, 0xff0101ffff000000, + 0xff0101ffff01ffff, 0xff0101ffff01ff01, 0xff0101ffff0101ff, 0xff0101ffff010101, + 0xff0101ff00ff0000, 0xff0101ff0000ff00, 0xff0101ff000000ff, 0xff0101ff00010000, + 0xff0101ff01ffffff, 0xff0101ff01ffff01, 0xff0101ff01ff01ff, 0xff0101ff01ff0101, + 0xff0101ff0101ffff, 0xff0101ff0101ff01, 0xff0101ff010101ff, 0xff0101ff01010101, + 0xff010100ffff0100, 0xff010100ff00ff00, 0xff010100ff0000ff, 0xff010100ff000100, + 0xff010100ff010000, 0xff01010000ff0001, 0xff01010000ff0100, 0xff0101000000ff01, + 0xff01010000000000, 0xff0101000001ff00, 0xff010100000100ff, 0xff01010000010001, + 0xff01010000010100, 0xff01010001ff0000, 0xff0101000100ffff, 0xff01010001000001, + 0xff01010001000100, 0xff010100010100ff, 0xff01010001010000, 0xff010101ffffffff, + 0xff010101ffffff01, 0xff010101ffff01ff, 0xff010101ffff0101, 0xff010101ff01ffff, + 0xff010101ff01ff01, 0xff010101ff0101ff, 0xff010101ff010101, 0xff01010100ff0000, + 0xff0101010000ff00, 0xff01010100000001, 0xff01010100000100, 0xff01010100010000, + 0xff01010101ffffff, 0xff01010101ffff01, 0xff01010101ff01ff, 0xff01010101ff0101, + 0xff01010101000000, 0xff0101010101ffff, 0xff0101010101ff01, 0xff010101010101ff, + 0xff01010101010101, 0x00ffffffffff0000, 0x00ffffffff00ff00, 0x00ffffffff000001, + 0x00ffffffff010000, 0x00ffffff00ff0100, 0x00ffffff0000ff01, 0x00ffffff00000000, + 0x00ffffff000001ff, 0x00ffffff00000101, 0x00ffffff0001ff00, 0x00ffffff000100ff, + 0x00ffffff00010001, 0x00ffffff010000ff, 0x00ffffff01000100, 0x00ffffff0101ff00, + 0x00ffffff01010001, 0x00ffff00ffffffff, 0x00ffff00ffffff00, 0x00ffff00ffff00ff, + 0x00ffff00ffff0001, 0x00ffff00ffff0100, 0x00ffff00ff00ff01, 0x00ffff00ff000000, + 0x00ffff00ff000001, 0x00ffff00ff0001ff, 0x00ffff00ff000101, 0x00ffff00ff01ff00, + 0x00ffff00ff010001, 0x00ffff00ff010100, 0x00ffff0000ff0000, 0x00ffff0000ff01ff, + 0x00ffff0000ff0101, 0x00ffff000000ff00, 0x00ffff00000000ff, 0x00ffff0000000000, + 0x00ffff0000000001, 0x00ffff0000000100, 0x00ffff0000000101, 0x00ffff0000010000, + 0x00ffff00000101ff, 0x00ffff0000010101, 0x00ffff0001ffff00, 0x00ffff0001ff00ff, + 0x00ffff0001ff0001, 0x00ffff000100ffff, 0x00ffff000100ff01, 0x00ffff0001000000, + 0x00ffff000101ffff, 0x00ffff000101ff00, 0x00ffff000101ff01, 0x00ffff01ffff0000, + 0x00ffff01ff00ff00, 0x00ffff01ff0000ff, 0x00ffff01ff000001, 0x00ffff01ff010000, + 0x00ffff0100ffff00, 0x00ffff010000ff01, 0x00ffff0100000000, 0x00ffff0100000101, + 0x00ffff01000100ff, 0x00ffff0100010100, 0x00ffff0101ff0100, 0x00ffff01010000ff, + 0x00ffff0101010000, 0x00ff00ffffffff00, 0x00ff00ffff000000, 0x00ff00ffff000100, + 0x00ff00ffff010100, 0x00ff00ff00ff0000, 0x00ff00ff00ff01ff, 0x00ff00ff00ff0101, + 0x00ff00ff0000ff00, 0x00ff00ff000000ff, 0x00ff00ff00000000, 0x00ff00ff00000001, + 0x00ff00ff0001ff00, 0x00ff00ff0001ff01, 0x00ff00ff00010000, 0x00ff00ff000101ff, + 0x00ff00ff00010101, 0x00ff00ff01ffff00, 0x00ff00ff01ff0001, 0x00ff00ff01ff0100, + 0x00ff00ff0100ffff, 0x00ff00ff0100ff01, 0x00ff00ff01000000, 0x00ff00ff0101ffff, + 0x00ff00ff0101ff00, 0x00ff00ff01010100, 0x00ff0000ffffff00, 0x00ff0000ffffff01, + 0x00ff0000ffff0000, 0x00ff0000ffff0101, 0x00ff0000ff00ff00, 0x00ff0000ff0000ff, + 0x00ff0000ff000000, 0x00ff0000ff000001, 0x00ff0000ff000100, 0x00ff0000ff01ffff, + 0x00ff0000ff010000, 0x00ff0000ff010101, 0x00ff000000ffff00, 0x00ff000000ff00ff, + 0x00ff000000ff0000, 0x00ff000000ff0001, 0x00ff000000ff0100, 0x00ff00000000ffff, + 0x00ff00000000ff00, 0x00ff0000000000ff, 0x00ff000000000000, 0x00ff000000000001, + 0x00ff0000000001ff, 0x00ff000000000100, 0x00ff00000001ff00, 0x00ff0000000100ff, + 0x00ff000000010000, 0x00ff000000010001, 0x00ff000000010100, 0x00ff000001ffff01, + 0x00ff000001ff00ff, 0x00ff000001ff0000, 0x00ff000001ff01ff, 0x00ff00000100ff00, + 0x00ff0000010000ff, 0x00ff000001000000, 0x00ff000001000001, 0x00ff000001000100, + 0x00ff000001000101, 0x00ff000001010000, 0x00ff0000010101ff, 0x00ff000001010101, + 0x00ff0001ffffff00, 0x00ff0001ffff0000, 0x00ff0001ffff0100, 0x00ff0001ff0000ff, + 0x00ff0001ff000000, 0x00ff0001ff0001ff, 0x00ff0001ff000101, 0x00ff0001ff01ff00, + 0x00ff0001ff0100ff, 0x00ff0001ff010100, 0x00ff000100ffffff, 0x00ff000100ffff01, + 0x00ff000100ff0000, 0x00ff000100ff01ff, 0x00ff00010000ffff, 0x00ff00010000ff00, + 0x00ff00010000ff01, 0x00ff000100000000, 0x00ff000100000001, 0x00ff000100000100, + 0x00ff00010001ff01, 0x00ff000100010000, 0x00ff0001000101ff, 0x00ff000101ffff00, + 0x00ff000101ff0000, 0x00ff000101ff0101, 0x00ff0001010000ff, 0x00ff000101000000, + 0x00ff00010101ff00, 0x00ff0001010100ff, 0x00ff000101010001, 0x00ff01ffffff0000, + 0x00ff01ffff00ff00, 0x00ff01ffff000000, 0x00ff01ffff000101, 0x00ff01ffff010000, + 0x00ff01ff00ffff01, 0x00ff01ff00ff0100, 0x00ff01ff0000ffff, 0x00ff01ff00000000, + 0x00ff01ff000001ff, 0x00ff01ff0001ff00, 0x00ff01ff000100ff, 0x00ff01ff00010001, + 0x00ff01ff00010100, 0x00ff01ff01ff0000, 0x00ff01ff0100ff00, 0x00ff01ff010000ff, + 0x00ff01ff01000001, 0x00ff01ff01000100, 0x00ff01ff01010000, 0x00ff0100ffffff00, + 0x00ff0100ffff0000, 0x00ff0100ffff0001, 0x00ff0100ffff0101, 0x00ff0100ff00ffff, + 0x00ff0100ff0000ff, 0x00ff0100ff000000, 0x00ff0100ff0001ff, 0x00ff0100ff01ff00, + 0x00ff0100ff0100ff, 0x00ff0100ff010001, 0x00ff010000ffffff, 0x00ff010000ff0000, + 0x00ff010000ff0101, 0x00ff01000000ff00, 0x00ff01000000ff01, 0x00ff0100000000ff, + 0x00ff010000000000, 0x00ff010000000001, 0x00ff010000000100, 0x00ff01000001ffff, + 0x00ff01000001ff01, 0x00ff010000010000, 0x00ff010000010001, 0x00ff010000010101, + 0x00ff010001ff0001, 0x00ff010001ff0100, 0x00ff01000100ff01, 0x00ff010001000000, + 0x00ff010001000001, 0x00ff0100010001ff, 0x00ff01000101ff00, 0x00ff0100010100ff, + 0x00ff010001010001, 0x00ff010001010100, 0x00ff0101ff000001, 0x00ff010100ff00ff, + 0x00ff010100ff0001, 0x00ff010100ff0100, 0x00ff010100000000, 0x00ff0101000001ff, + 0x00ff010100000101, 0x00ff0101000100ff, 0x00ff010100010100, 0x00ff0101010000ff, + 0x00ff010101010000, 0x0000ffffffffff00, 0x0000ffffffff00ff, 0x0000ffffffff0000, + 0x0000ffffffff0001, 0x0000ffffffff0100, 0x0000ffffff00ff01, 0x0000ffffff000000, + 0x0000ffffff000101, 0x0000ffffff01ff00, 0x0000ffffff0100ff, 0x0000ffffff010100, + 0x0000ffff00ffffff, 0x0000ffff00ff0000, 0x0000ffff00ff01ff, 0x0000ffff0000ff00, + 0x0000ffff000000ff, 0x0000ffff00000000, 0x0000ffff00000001, 0x0000ffff00000100, + 0x0000ffff00010000, 0x0000ffff000101ff, 0x0000ffff01ff0001, 0x0000ffff01ff0100, + 0x0000ffff01000000, 0x0000ffff010001ff, 0x0000ffff0101ffff, 0x0000ffff0101ff00, + 0x0000ffff01010001, 0x0000ffff01010100, 0x0000ff00ffff0000, 0x0000ff00ffff01ff, + 0x0000ff00ffff0100, 0x0000ff00ffff0101, 0x0000ff00ff00ff00, 0x0000ff00ff0000ff, + 0x0000ff00ff000000, 0x0000ff00ff000001, 0x0000ff00ff0001ff, 0x0000ff00ff000100, + 0x0000ff00ff01ffff, 0x0000ff00ff010000, 0x0000ff00ff010001, 0x0000ff00ff0101ff, + 0x0000ff00ff010101, 0x0000ff0000ffff00, 0x0000ff0000ff00ff, 0x0000ff0000ff0000, + 0x0000ff0000ff0001, 0x0000ff0000ff0100, 0x0000ff000000ffff, 0x0000ff000000ff00, + 0x0000ff000000ff01, 0x0000ff00000000ff, 0x0000ff0000000000, 0x0000ff0000000001, + 0x0000ff00000001ff, 0x0000ff0000000100, 0x0000ff0000000101, 0x0000ff000001ff00, + 0x0000ff00000100ff, 0x0000ff0000010000, 0x0000ff0000010001, 0x0000ff0000010100, + 0x0000ff0001ffff01, 0x0000ff0001ff0000, 0x0000ff000100ff00, 0x0000ff00010000ff, + 0x0000ff0001000000, 0x0000ff0001000001, 0x0000ff0001000100, 0x0000ff000101ffff, + 0x0000ff0001010000, 0x0000ff0001010101, 0x0000ff01ffffff00, 0x0000ff01ffff0001, + 0x0000ff01ff00ff01, 0x0000ff01ff000000, 0x0000ff01ff000101, 0x0000ff01ff01ff00, + 0x0000ff01ff0100ff, 0x0000ff0100ffff01, 0x0000ff0100ff0000, 0x0000ff0100ff0101, + 0x0000ff010000ff00, 0x0000ff01000000ff, 0x0000ff0100000000, 0x0000ff0100000001, + 0x0000ff0100000100, 0x0000ff010001ff01, 0x0000ff0100010000, 0x0000ff0101ff0000, + 0x0000ff010100ffff, 0x0000ff010100ff01, 0x0000ff0101000000, 0x0000ff0101000100, + 0x0000ff0101000101, 0x0000ff01010100ff, 0x000000ffffff00ff, 0x000000ffffff0000, + 0x000000ffff00ff00, 0x000000ffff0000ff, 0x000000ffff000000, 0x000000ffff000001, + 0x000000ffff0001ff, 0x000000ffff000100, 0x000000ffff01ff00, 0x000000ffff010000, + 0x000000ffff0101ff, 0x000000ffff010101, 0x000000ff00ffff00, 0x000000ff00ff00ff, + 0x000000ff00ff0000, 0x000000ff00ff0001, 0x000000ff00ff0100, 0x000000ff00ff0101, + 0x000000ff0000ffff, 0x000000ff0000ff00, 0x000000ff000000ff, 0x000000ff00000000, + 0x000000ff00000001, 0x000000ff000001ff, 0x000000ff00000100, 0x000000ff00000101, + 0x000000ff0001ff00, 0x000000ff0001ff01, 0x000000ff000100ff, 0x000000ff00010000, + 0x000000ff00010001, 0x000000ff00010100, 0x000000ff01ffffff, 0x000000ff01ff01ff, + 0x000000ff01ff0101, 0x000000ff0100ff00, 0x000000ff010000ff, 0x000000ff01000000, + 0x000000ff01000001, 0x000000ff01000100, 0x000000ff0101ff00, 0x000000ff010100ff, + 0x000000ff01010000, 0x000000ff01010101, 0x00000000ffffff00, 0x00000000ffffff01, + 0x00000000ffff00ff, 0x00000000ffff0000, 0x00000000ffff0001, 0x00000000ffff0100, + 0x00000000ff00ffff, 0x00000000ff00ff00, 0x00000000ff00ff01, 0x00000000ff0000ff, + 0x00000000ff000000, 0x00000000ff000001, 0x00000000ff000100, 0x00000000ff000101, + 0x00000000ff01ff00, 0x00000000ff0100ff, 0x00000000ff010000, 0x00000000ff010001, + 0x00000000ff010100, 0x0000000000ffffff, 0x0000000000ffff00, 0x0000000000ffff01, + 0x0000000000ff00ff, 0x0000000000ff0000, 0x0000000000ff0001, 0x0000000000ff01ff, 0x0000000000ff0100, 0x000000000000ffff, 0x000000000000ff00, 0x000000000000ff01, - 0x00000000000000ff, 0x0000000000000001, 0x00000000000001ff, 0x0000000000000100, - 0x0000000000000101, 0x000000000001ff00, 0x00000000000100ff, 0x0000000000010000, - 0x0000000000010001, 0x0000000000010100, 0x0000000001ff0000, 0x000000000100ff00, - 0x00000000010000ff, 0x0000000001000000, 0x0000000001000001, 0x0000000001000100, - 0x0000000001010000, 0x00000001ffff01ff, 0x00000001ff000000, 0x0000000100ff0000, - 0x000000010000ff00, 0x00000001000000ff, 0x0000000100000000, 0x0000000100000001, - 0x0000000100000100, 0x0000000100010000, 0x0000000101000000, 0x000001ffff00ff00, - 0x000001ffff010001, 0x000001ffff0101ff, 0x000001ff00ffff01, 0x000001ff0000ffff, - 0x000001ff00000000, 0x000001ff010000ff, 0x000001ff01010100, 0x00000100ffff0100, - 0x00000100ff000000, 0x0000010000ff0000, 0x000001000000ff00, 0x00000100000000ff, - 0x0000010000000000, 0x0000010000000001, 0x0000010000000100, 0x0000010000010000, - 0x0000010001000000, 0x000001000101ff01, 0x00000101ffff0001, 0x00000101ff01ffff, - 0x0000010100000000, 0x0000010101010100, 0x0001ffffff000000, 0x0001ffff00ffffff, - 0x0001ffff00000100, 0x0001ffff0001ff00, 0x0001ffff01000000, 0x0001ff00ffffff00, - 0x0001ff00ffff01ff, 0x0001ff00ff010000, 0x0001ff0000000000, 0x0001ff0000010001, - 0x0001ff0001ff0000, 0x0001ff0001010100, 0x0001ff01ff0000ff, 0x0001ff01ff000001, - 0x0001ff0100ffffff, 0x0001ff010001ffff, 0x0001ff01000101ff, 0x0001ff010100ff01, - 0x000100ffff00ffff, 0x000100ffff00ff01, 0x000100ffff000100, 0x000100ff00000000, - 0x000100ff000101ff, 0x000100ff01ff0101, 0x000100ff0100ffff, 0x000100ff01010101, - 0x00010000ff000000, 0x00010000ff010100, 0x0001000000ff0000, 0x000100000000ff00, - 0x00010000000000ff, 0x0001000000000000, 0x0001000000000001, 0x0001000000000100, - 0x0001000000010000, 0x0001000001ffff01, 0x0001000001000000, 0x0001000100ff0101, - 0x0001000100000000, 0x00010001010100ff, 0x000101ffffff01ff, 0x000101ffffff0101, - 0x000101ff00010000, 0x000101ff01ff0000, 0x000101ff0100ff01, 0x00010100ffff0000, - 0x0001010000000000, 0x000101000001ffff, 0x0001010000010101, 0x00010100010001ff, - 0x00010101ff00ff00, 0x00010101ff010001, 0x0001010100ffffff, 0x0001010100ff01ff, - 0x00010101000101ff, 0x0001010101ff0000, 0x000101010100ff01, 0x0001010101000101, - 0x01ffffffffff0101, 0x01ffffffff01ffff, 0x01ffffffff01ff01, 0x01ffffffff0101ff, - 0x01ffffffff010101, 0x01ffffff00000000, 0x01ffffff01ff01ff, 0x01ffffff01000101, - 0x01ffffff0101ff01, 0x01ffffff010100ff, 0x01ffff000000ff00, 0x01ffff0000000001, - 0x01ffff00000001ff, 0x01ffff0000010000, 0x01ffff0001ff0000, 0x01ffff01ffffffff, - 0x01ffff01ffff01ff, 0x01ffff01ff000000, 0x01ffff01ff01ffff, 0x01ffff01ff0101ff, - 0x01ffff010100ffff, 0x01ff00ffffff0000, 0x01ff00ffff010000, 0x01ff00ff00ffff01, - 0x01ff0000ff0000ff, 0x01ff000000000000, 0x01ff00000001ff01, 0x01ff000001ffffff, - 0x01ff000001010100, 0x01ff0001ffffff01, 0x01ff0001ff010001, 0x01ff000101ff0100, - 0x01ff000101000001, 0x01ff0001010100ff, 0x01ff01ffff00ffff, 0x01ff01ff00010001, - 0x01ff01ff01000000, 0x01ff01ff010101ff, 0x01ff0100ff000001, 0x01ff010000ffff00, - 0x01ff010000000100, 0x01ff010001ff01ff, 0x01ff01000101ffff, 0x01ff0101ffff00ff, - 0x01ff0101ffff0101, 0x01ff0101ff0101ff, 0x01ff010100010000, 0x0100ffff00ff00ff, - 0x0100ffff00ff0001, 0x0100ffff00000100, 0x0100ffff0100ff00, 0x0100ff00ffff0000, - 0x0100ff00ff00ffff, 0x0100ff00ff00ff01, 0x0100ff00ff000100, 0x0100ff00ff010000, - 0x0100ff0000000000, 0x0100ff00000100ff, 0x0100ff0001ff0101, 0x0100ff0001010101, - 0x0100ff0100ff00ff, 0x0100ff0100ff0001, 0x0100ff0100000100, 0x0100ff0100010001, - 0x0100ff0101000000, 0x010000ffff00ff00, 0x010000ff0000ffff, 0x010000ff00000000, - 0x010000ff010001ff, 0x010000ff01010001, 0x01000000ffffff00, 0x01000000ffff0101, - 0x01000000ff000000, 0x01000000ff0100ff, 0x01000000ff010101, 0x0100000000ff0000, - 0x010000000000ff00, 0x01000000000000ff, 0x0100000000000000, 0x0100000000000001, - 0x0100000000000100, 0x0100000000010000, 0x0100000001000000, 0x0100000100000000, - 0x01000001000101ff, 0x0100000101ffff01, 0x010001ffff000101, 0x010001ff00ff0100, - 0x010001ff0000ff00, 0x010001ff000100ff, 0x010001ff01ffffff, 0x01000100ffff0000, - 0x01000100ff0001ff, 0x0100010000000000, 0x010001000001ff00, 0x0100010001ff0000, - 0x01000100010000ff, 0x0100010001000101, 0x01000101ff00ff01, 0x0100010100ff0100, - 0x010001010000ffff, 0x0100010101010001, 0x0101ffffffff0101, 0x0101ffffff0001ff, - 0x0101ffffff01ffff, 0x0101ffffff010101, 0x0101ffff00000000, 0x0101ffff0101ffff, - 0x0101ffff010101ff, 0x0101ff00ff000000, 0x0101ff0000ff0100, 0x0101ff000000ff00, - 0x0101ff0000010000, 0x0101ff00010000ff, 0x0101ff0001000001, 0x0101ff01ff010101, - 0x0101ff0100000000, 0x0101ff010101ff00, 0x010100ffffff0000, 0x010100ffff010000, - 0x010100ff00ff01ff, 0x010100ff000000ff, 0x010100ff00000101, 0x010100ff01ffff00, - 0x01010000ffffff01, 0x01010000ff000100, 0x01010000ff01ff01, 0x0101000000000000, - 0x01010000000100ff, 0x010100000101ff01, 0x01010001ffff0000, 0x01010001ff00ffff, - 0x01010001ff010000, 0x0101000101ffffff, 0x0101000101ff01ff, 0x0101000101010101, - 0x010101ffff01ffff, 0x010101ff00000000, 0x010101ff0001ff01, 0x010101ff0101ffff, - 0x010101ff010101ff, 0x01010100ffffffff, 0x01010100ff000001, 0x010101000000ff00, - 0x0101010001010000, 0x0101010100ff0001, 0x010101010001ff01, 0x010101010101ffff, + 0x00000000000000ff, 0x0000000000000000, 0x0000000000000001, 0x00000000000001ff, + 0x0000000000000100, 0x0000000000000101, 0x000000000001ffff, 0x000000000001ff00, + 0x00000000000100ff, 0x0000000000010000, 0x0000000000010001, 0x00000000000101ff, + 0x0000000000010100, 0x0000000000010101, 0x0000000001ffff00, 0x0000000001ff00ff, + 0x0000000001ff0000, 0x0000000001ff0100, 0x0000000001ff0101, 0x000000000100ffff, + 0x000000000100ff00, 0x00000000010000ff, 0x0000000001000000, 0x0000000001000001, + 0x00000000010001ff, 0x0000000001000100, 0x000000000101ff00, 0x00000000010100ff, + 0x0000000001010000, 0x0000000001010001, 0x0000000001010100, 0x00000001ffffffff, + 0x00000001ffffff00, 0x00000001ffffff01, 0x00000001ffff00ff, 0x00000001ffff0001, + 0x00000001ffff01ff, 0x00000001ffff0100, 0x00000001ff00ff00, 0x00000001ff0000ff, + 0x00000001ff000000, 0x00000001ff0001ff, 0x00000001ff000100, 0x00000001ff01ffff, + 0x00000001ff01ff00, 0x00000001ff01ff01, 0x00000001ff0100ff, 0x00000001ff010000, + 0x00000001ff010001, 0x00000001ff0101ff, 0x00000001ff010100, 0x0000000100ffff00, + 0x0000000100ff0000, 0x0000000100ff0001, 0x0000000100ff01ff, 0x0000000100ff0100, + 0x0000000100ff0101, 0x000000010000ffff, 0x000000010000ff00, 0x000000010000ff01, + 0x00000001000000ff, 0x0000000100000000, 0x0000000100000001, 0x00000001000001ff, + 0x0000000100000100, 0x0000000100000101, 0x000000010001ff00, 0x00000001000100ff, + 0x0000000100010000, 0x0000000100010100, 0x0000000101ffff01, 0x0000000101ff0000, + 0x0000000101ff0001, 0x0000000101ff01ff, 0x0000000101ff0100, 0x0000000101ff0101, + 0x000000010100ff00, 0x0000000101000000, 0x0000000101000101, 0x000000010101ff01, + 0x0000000101010000, 0x0000000101010001, 0x00000001010101ff, 0x0000000101010100, + 0x000001ffffff00ff, 0x000001ffffff0000, 0x000001ffffff0001, 0x000001ffffff0100, + 0x000001ffff00ffff, 0x000001ffff000000, 0x000001ffff0001ff, 0x000001ffff01ff00, + 0x000001ffff010101, 0x000001ff00ff0000, 0x000001ff00ff01ff, 0x000001ff00ff0101, + 0x000001ff0000ff00, 0x000001ff000000ff, 0x000001ff00000000, 0x000001ff00000001, + 0x000001ff000001ff, 0x000001ff00000100, 0x000001ff0001ffff, 0x000001ff0001ff01, + 0x000001ff000100ff, 0x000001ff00010000, 0x000001ff01ffff01, 0x000001ff01ff0100, + 0x000001ff0100ffff, 0x000001ff0100ff01, 0x000001ff01000000, 0x000001ff010001ff, + 0x000001ff0101ff00, 0x000001ff01010100, 0x00000100ffffff00, 0x00000100ffffff01, + 0x00000100ffff0000, 0x00000100ffff0101, 0x00000100ff00ff00, 0x00000100ff0000ff, + 0x00000100ff000000, 0x00000100ff000001, 0x00000100ff000100, 0x00000100ff010000, + 0x0000010000ffff00, 0x0000010000ff00ff, 0x0000010000ff0000, 0x0000010000ff0001, + 0x0000010000ff0100, 0x000001000000ffff, 0x000001000000ff00, 0x000001000000ff01, + 0x00000100000000ff, 0x0000010000000000, 0x0000010000000001, 0x00000100000001ff, + 0x0000010000000100, 0x0000010000000101, 0x000001000001ff00, 0x00000100000100ff, + 0x0000010000010000, 0x0000010000010001, 0x0000010000010100, 0x0000010001ffff00, + 0x0000010001ff0000, 0x0000010001ff0100, 0x000001000100ff00, 0x00000100010000ff, + 0x0000010001000000, 0x0000010001000001, 0x00000100010001ff, 0x0000010001000100, + 0x0000010001010000, 0x00000101ffff00ff, 0x00000101ffff01ff, 0x00000101ff000000, + 0x00000101ff000101, 0x00000101ff01ffff, 0x00000101ff010000, 0x00000101ff010001, + 0x00000101ff010100, 0x0000010100ff0000, 0x0000010100ff01ff, 0x0000010100ff0100, + 0x000001010000ff00, 0x0000010100000000, 0x0000010100000001, 0x00000101000001ff, + 0x0000010100000100, 0x000001010001ff01, 0x0000010100010000, 0x00000101000101ff, + 0x0000010100010101, 0x0000010101ffff00, 0x0000010101ff0101, 0x000001010100ff01, + 0x0000010101000000, 0x0000010101000001, 0x00000101010001ff, 0x0000010101000101, + 0x000001010101ff00, 0x0001ffffffff0000, 0x0001ffffff0000ff, 0x0001ffffff000001, + 0x0001ffffff000100, 0x0001ffffff010000, 0x0001ffff00ff00ff, 0x0001ffff0000ffff, + 0x0001ffff00000000, 0x0001ffff00000001, 0x0001ffff000001ff, 0x0001ffff00000101, + 0x0001ffff0001ff00, 0x0001ffff000100ff, 0x0001ffff00010001, 0x0001ffff00010100, + 0x0001ffff01ffff00, 0x0001ffff01000001, 0x0001ffff01010000, 0x0001ff00ffffff00, + 0x0001ff00ffff00ff, 0x0001ff00ffff0001, 0x0001ff00ffff0100, 0x0001ff00ff00ff01, + 0x0001ff00ff000000, 0x0001ff00ff01ff00, 0x0001ff00ff01ff01, 0x0001ff00ff010001, + 0x0001ff00ff010100, 0x0001ff0000ff0000, 0x0001ff0000ff0100, 0x0001ff000000ff00, + 0x0001ff0000000000, 0x0001ff0000000001, 0x0001ff0000000100, 0x0001ff0000010000, + 0x0001ff0000010001, 0x0001ff0000010101, 0x0001ff0001ff00ff, 0x0001ff0001ff0101, + 0x0001ff000100ff01, 0x0001ff0001000000, 0x0001ff000101ff00, 0x0001ff0001010001, + 0x0001ff0001010100, 0x0001ff01ff00ff00, 0x0001ff01ff000001, 0x0001ff01ff000100, + 0x0001ff0100ffffff, 0x0001ff0100ffff00, 0x0001ff0100ff0001, 0x0001ff0100000000, + 0x0001ff0100000001, 0x0001ff01000001ff, 0x0001ff010001ffff, 0x0001ff0101ff0000, + 0x0001ff010100ff00, 0x0001ff0101000001, 0x0001ff0101010000, 0x000100ffff00ff00, + 0x000100ffff00ff01, 0x000100ffff000000, 0x000100ffff000001, 0x000100ffff000101, + 0x000100ffff01ff00, 0x000100ffff010001, 0x000100ffff010100, 0x000100ff00ffffff, + 0x000100ff00ffff01, 0x000100ff00ff0000, 0x000100ff00ff01ff, 0x000100ff00ff0101, + 0x000100ff0000ff00, 0x000100ff000000ff, 0x000100ff00000000, 0x000100ff00000001, + 0x000100ff00000100, 0x000100ff00000101, 0x000100ff0001ffff, 0x000100ff0001ff01, + 0x000100ff00010000, 0x000100ff01ff00ff, 0x000100ff01ff0000, 0x000100ff01ff0100, + 0x000100ff0100ffff, 0x000100ff0100ff01, 0x000100ff010000ff, 0x000100ff01000000, + 0x000100ff01000001, 0x000100ff010001ff, 0x000100ff01000101, 0x000100ff0101ff00, + 0x000100ff010100ff, 0x000100ff01010100, 0x00010000ffff0000, 0x00010000ffff01ff, + 0x00010000ffff0101, 0x00010000ff00ff00, 0x00010000ff000000, 0x00010000ff000001, + 0x00010000ff000100, 0x0001000000ff00ff, 0x0001000000ff0000, 0x0001000000ff0001, + 0x0001000000ff0100, 0x000100000000ffff, 0x000100000000ff00, 0x00010000000000ff, + 0x0001000000000000, 0x0001000000000001, 0x0001000000000100, 0x000100000001ff00, + 0x00010000000100ff, 0x0001000000010000, 0x0001000000010001, 0x0001000000010100, + 0x0001000001ff0001, 0x0001000001ff0100, 0x0001000001ff0101, 0x000100000100ff00, + 0x0001000001000000, 0x0001000001000001, 0x0001000001000100, 0x0001000001000101, + 0x000100000101ff01, 0x0001000001010000, 0x0001000001010001, 0x00010000010101ff, + 0x00010001ffffff01, 0x00010001ffff0100, 0x00010001ff000000, 0x00010001ff01ffff, + 0x00010001ff010001, 0x00010001ff0101ff, 0x00010001ff010100, 0x0001000100ffffff, + 0x0001000100ff0000, 0x0001000100ff01ff, 0x0001000100ff0101, 0x000100010000ff00, + 0x00010001000000ff, 0x0001000100000000, 0x0001000100000001, 0x00010001000001ff, + 0x0001000100000101, 0x000100010001ffff, 0x0001000100010000, 0x00010001000101ff, + 0x0001000101ffffff, 0x0001000101ffff01, 0x0001000101ff0000, 0x0001000101ff0101, + 0x00010001010000ff, 0x0001000101000001, 0x00010001010001ff, 0x0001000101000100, + 0x000100010101ffff, 0x00010001010100ff, 0x0001000101010001, 0x0001000101010101, + 0x000101ffff000001, 0x000101ffff000100, 0x000101ffff010000, 0x000101ff00ffff00, + 0x000101ff0000ff01, 0x000101ff00000000, 0x000101ff00000101, 0x000101ff0001ff00, + 0x000101ff00010100, 0x000101ff01ff0000, 0x000101ff0100ff00, 0x000101ff010001ff, + 0x000101ff01010001, 0x00010100ffffff00, 0x00010100ffff00ff, 0x00010100ff00ffff, + 0x00010100ff000000, 0x00010100ff01ff00, 0x00010100ff0100ff, 0x00010100ff010001, + 0x00010100ff010100, 0x0001010000ffffff, 0x0001010000ffff00, 0x0001010000ff0000, + 0x0001010000ff0001, 0x0001010000ff01ff, 0x000101000000ff00, 0x00010100000000ff, + 0x0001010000000000, 0x0001010000000001, 0x0001010000000100, 0x000101000001ffff, + 0x0001010000010000, 0x0001010000010101, 0x0001010001ffff01, 0x0001010001ff00ff, + 0x0001010001ff0101, 0x0001010001000000, 0x000101000101ff00, 0x00010100010100ff, + 0x0001010001010000, 0x0001010001010100, 0x00010101ff00ff00, 0x00010101ff000001, + 0x00010101ff0001ff, 0x0001010100ffff00, 0x0001010100ff00ff, 0x0001010100ff0100, + 0x000101010000ffff, 0x0001010100000000, 0x00010101000001ff, 0x0001010100000101, + 0x00010101000100ff, 0x0001010100010000, 0x0001010100010100, 0x0001010101ff0001, + 0x00010101010000ff, 0x00010101010001ff, 0x0001010101000101, 0x0001010101010001, + 0x01ffffffffffffff, 0x01ffffffffffff01, 0x01ffffffffff01ff, 0x01ffffffffff0101, + 0x01ffffffff01ffff, 0x01ffffffff01ff01, 0x01ffffffff0101ff, 0x01ffffffff010101, + 0x01ffffff00ff0000, 0x01ffffff0000ffff, 0x01ffffff0000ff00, 0x01ffffff000000ff, + 0x01ffffff00000001, 0x01ffffff00000100, 0x01ffffff00010000, 0x01ffffff01ffffff, + 0x01ffffff01ffff01, 0x01ffffff01ff01ff, 0x01ffffff01ff0101, 0x01ffffff01000000, + 0x01ffffff0101ffff, 0x01ffffff0101ff01, 0x01ffffff010101ff, 0x01ffffff01010101, + 0x01ffff00ffff0000, 0x01ffff00ff00ff00, 0x01ffff00ff0000ff, 0x01ffff00ff000001, + 0x01ffff00ff000100, 0x01ffff00ff010000, 0x01ffff0000ffff00, 0x01ffff0000ff00ff, + 0x01ffff0000ff0100, 0x01ffff000000ffff, 0x01ffff000000ff01, 0x01ffff0000000000, + 0x01ffff0000000001, 0x01ffff00000001ff, 0x01ffff0000000100, 0x01ffff00000100ff, + 0x01ffff0000010001, 0x01ffff0000010100, 0x01ffff0001ff0000, 0x01ffff0001ff0100, + 0x01ffff00010000ff, 0x01ffff0001000001, 0x01ffff0001000100, 0x01ffff0001010000, + 0x01ffff01ffffffff, 0x01ffff01ffffff01, 0x01ffff01ffff01ff, 0x01ffff01ffff0101, + 0x01ffff01ff000000, 0x01ffff01ff01ffff, 0x01ffff01ff01ff01, 0x01ffff01ff0101ff, + 0x01ffff01ff010101, 0x01ffff010000ff00, 0x01ffff01000000ff, 0x01ffff0100000100, + 0x01ffff0100010000, 0x01ffff0101ffffff, 0x01ffff0101ffff01, 0x01ffff0101ff01ff, + 0x01ffff0101ff0101, 0x01ffff0101000000, 0x01ffff010101ffff, 0x01ffff010101ff01, + 0x01ffff01010101ff, 0x01ffff0101010101, 0x01ff00ffff0000ff, 0x01ff00ffff000100, + 0x01ff00ff00ffff00, 0x01ff00ff00ff00ff, 0x01ff00ff0000ff00, 0x01ff00ff00000000, + 0x01ff00ff00000101, 0x01ff00ff0001ff00, 0x01ff00ff000100ff, 0x01ff00ff00010100, + 0x01ff00ff010000ff, 0x01ff00ff01000100, 0x01ff0000ffffff00, 0x01ff0000ffff0100, + 0x01ff0000ff00ff01, 0x01ff0000ff000000, 0x01ff0000ff000101, 0x01ff0000ff010001, + 0x01ff0000ff010100, 0x01ff000000ffffff, 0x01ff000000ffff00, 0x01ff000000ff0000, + 0x01ff000000ff01ff, 0x01ff00000000ff00, 0x01ff0000000000ff, 0x01ff000000000000, + 0x01ff000000000001, 0x01ff000000000100, 0x01ff000000000101, 0x01ff000000010000, + 0x01ff000000010001, 0x01ff0000000101ff, 0x01ff000000010101, 0x01ff000001ffff00, + 0x01ff000001ff00ff, 0x01ff000001ff0001, 0x01ff000001ff0100, 0x01ff00000100ffff, + 0x01ff00000100ff01, 0x01ff000001000000, 0x01ff0000010001ff, 0x01ff000001010001, + 0x01ff0001ff00ff00, 0x01ff0001ff000001, 0x01ff0001ff000100, 0x01ff0001ff010000, + 0x01ff000100ffff00, 0x01ff000100ff00ff, 0x01ff000100ff0100, 0x01ff000100ff0101, + 0x01ff00010000ffff, 0x01ff000100000000, 0x01ff000100000100, 0x01ff000100000101, + 0x01ff00010001ff00, 0x01ff000100010001, 0x01ff000100010101, 0x01ff000101ff0000, + 0x01ff00010100ff00, 0x01ff000101000101, 0x01ff0001010100ff, 0x01ff01ffffffffff, + 0x01ff01ffffffff01, 0x01ff01ffffff01ff, 0x01ff01ffffff0101, 0x01ff01ffff000000, + 0x01ff01ffff01ffff, 0x01ff01ffff01ff01, 0x01ff01ffff0101ff, 0x01ff01ffff010101, + 0x01ff01ff00ffff00, 0x01ff01ff00ff0000, 0x01ff01ff0000ff00, 0x01ff01ff000000ff, + 0x01ff01ff00000100, 0x01ff01ff00010000, 0x01ff01ff00010100, 0x01ff01ff01ffffff, + 0x01ff01ff01ffff01, 0x01ff01ff01ff01ff, 0x01ff01ff01ff0101, 0x01ff01ff01000000, + 0x01ff01ff0101ffff, 0x01ff01ff0101ff01, 0x01ff01ff010101ff, 0x01ff01ff01010101, + 0x01ff0100ffff0000, 0x01ff0100ffff0001, 0x01ff0100ff00ff00, 0x01ff0100ff0000ff, + 0x01ff0100ff000001, 0x01ff0100ff010000, 0x01ff010000ffff00, 0x01ff010000ff00ff, + 0x01ff010000ff0001, 0x01ff010000ff0100, 0x01ff01000000ffff, 0x01ff01000000ff01, + 0x01ff010000000000, 0x01ff010000000101, 0x01ff01000001ff00, 0x01ff0100000100ff, + 0x01ff010001ff0000, 0x01ff010001000001, 0x01ff010001000100, 0x01ff010001010000, + 0x01ff0101ffffffff, 0x01ff0101ffffff01, 0x01ff0101ffff01ff, 0x01ff0101ffff0101, + 0x01ff0101ff000000, 0x01ff0101ff01ffff, 0x01ff0101ff01ff01, 0x01ff0101ff0101ff, + 0x01ff0101ff010101, 0x01ff010100ff0000, 0x01ff01010000ff00, 0x01ff0101000000ff, + 0x01ff010100000001, 0x01ff010101ffffff, 0x01ff010101ffff01, 0x01ff010101ff01ff, + 0x01ff010101ff0101, 0x01ff010101000000, 0x01ff01010101ffff, 0x01ff01010101ff01, + 0x01ff0101010101ff, 0x01ff010101010101, 0x0100ffffffff0000, 0x0100ffffff00ff00, + 0x0100ffffff000001, 0x0100ffffff0001ff, 0x0100ffffff000100, 0x0100ffffff010000, + 0x0100ffff00ffff00, 0x0100ffff00ff0001, 0x0100ffff00ff0100, 0x0100ffff00000000, + 0x0100ffff000001ff, 0x0100ffff00000101, 0x0100ffff00010100, 0x0100ffff00010101, + 0x0100ffff01ff0000, 0x0100ffff0100ff00, 0x0100ffff010000ff, 0x0100ffff01000001, + 0x0100ffff01000100, 0x0100ffff01010000, 0x0100ff00ffffff00, 0x0100ff00ffff00ff, + 0x0100ff00ffff0001, 0x0100ff00ffff0100, 0x0100ff00ff00ffff, 0x0100ff00ff000000, + 0x0100ff00ff0001ff, 0x0100ff00ff000101, 0x0100ff00ff01ff00, 0x0100ff00ff0100ff, + 0x0100ff00ff010001, 0x0100ff00ff010100, 0x0100ff0000ffffff, 0x0100ff0000ff0000, + 0x0100ff000000ffff, 0x0100ff000000ff00, 0x0100ff00000000ff, 0x0100ff0000000000, + 0x0100ff0000000001, 0x0100ff0000000100, 0x0100ff000001ff01, 0x0100ff0000010000, + 0x0100ff0001ff00ff, 0x0100ff0001ff0001, 0x0100ff000100ff01, 0x0100ff0001000000, + 0x0100ff00010001ff, 0x0100ff000101ff00, 0x0100ff00010100ff, 0x0100ff0001010001, + 0x0100ff0001010100, 0x0100ff01ffff0000, 0x0100ff01ff00ff00, 0x0100ff01ff0000ff, + 0x0100ff01ff000100, 0x0100ff01ff010000, 0x0100ff0100ff00ff, 0x0100ff0100ff0001, + 0x0100ff0100ff0100, 0x0100ff010000ffff, 0x0100ff010000ff01, 0x0100ff0100000000, + 0x0100ff01000001ff, 0x0100ff0100010001, 0x0100ff0100010100, 0x0100ff0101ff0000, + 0x0100ff01010000ff, 0x0100ff0101000001, 0x0100ff0101010100, 0x010000ffffffff00, + 0x010000ffffff00ff, 0x010000ffffff0001, 0x010000ffff00ffff, 0x010000ffff000000, + 0x010000ffff0001ff, 0x010000ffff010001, 0x010000ff00ffffff, 0x010000ff00ff0101, + 0x010000ff0000ff00, 0x010000ff000000ff, 0x010000ff00000000, 0x010000ff00000001, + 0x010000ff000001ff, 0x010000ff00000100, 0x010000ff0001ffff, 0x010000ff0001ff00, + 0x010000ff0001ff01, 0x010000ff00010000, 0x010000ff01ff00ff, 0x010000ff01ff0001, + 0x010000ff0100ff01, 0x010000ff010000ff, 0x010000ff01000000, 0x010000ff010001ff, + 0x010000ff0101ff00, 0x010000ff01010100, 0x01000000ffffffff, 0x01000000ffff0000, + 0x01000000ffff01ff, 0x01000000ffff0101, 0x01000000ff00ffff, 0x01000000ff00ff00, + 0x01000000ff0000ff, 0x01000000ff000000, 0x01000000ff000001, 0x01000000ff000100, + 0x01000000ff01ff00, 0x01000000ff010000, 0x01000000ff010100, 0x01000000ff010101, + 0x0100000000ffff00, 0x0100000000ff00ff, 0x0100000000ff0000, 0x0100000000ff0001, + 0x0100000000ff0100, 0x010000000000ffff, 0x010000000000ff00, 0x010000000000ff01, + 0x01000000000000ff, 0x0100000000000000, 0x0100000000000001, 0x01000000000001ff, + 0x0100000000000100, 0x0100000000000101, 0x010000000001ff00, 0x01000000000100ff, + 0x0100000000010000, 0x0100000000010001, 0x0100000000010100, 0x0100000001ffff00, + 0x0100000001ff0000, 0x0100000001ff01ff, 0x010000000100ff00, 0x010000000100ff01, + 0x01000000010000ff, 0x0100000001000000, 0x0100000001000001, 0x0100000001000100, + 0x0100000001000101, 0x010000000101ffff, 0x010000000101ff01, 0x0100000001010000, + 0x01000000010101ff, 0x0100000001010101, 0x01000001ffffff00, 0x01000001ffff00ff, + 0x01000001ff00ffff, 0x01000001ff000000, 0x01000001ff000100, 0x01000001ff01ffff, + 0x01000001ff010001, 0x01000001ff010100, 0x0100000100ff0000, 0x0100000100ff01ff, + 0x0100000100ff0100, 0x010000010000ff00, 0x010000010000ff01, 0x0100000100000000, + 0x0100000100000001, 0x0100000100000100, 0x0100000100010000, 0x01000001000101ff, + 0x0100000101ffff01, 0x0100000101ff00ff, 0x0100000101ff0100, 0x0100000101ff0101, + 0x010000010100ff01, 0x01000001010000ff, 0x0100000101000000, 0x01000001010100ff, + 0x0100000101010001, 0x0100000101010100, 0x010001ffffff0000, 0x010001ffff000001, + 0x010001ffff000100, 0x010001ffff010000, 0x010001ff00ffff00, 0x010001ff00ff0001, + 0x010001ff0000ffff, 0x010001ff0000ff01, 0x010001ff00000000, 0x010001ff00000001, + 0x010001ff00000101, 0x010001ff000100ff, 0x010001ff00010000, 0x010001ff01ff0000, + 0x010001ff0100ff00, 0x010001ff01000001, 0x010001ff01000100, 0x010001ff01010000, + 0x01000100ffff00ff, 0x01000100ffff0001, 0x01000100ffff0100, 0x01000100ff00ffff, + 0x01000100ff00ff01, 0x01000100ff000000, 0x01000100ff0001ff, 0x01000100ff000101, + 0x01000100ff01ffff, 0x01000100ff01ff00, 0x01000100ff0100ff, 0x01000100ff010001, + 0x0100010000ffffff, 0x0100010000ffff01, 0x0100010000ff0000, 0x0100010000ff01ff, + 0x0100010000ff0101, 0x010001000000ff00, 0x01000100000000ff, 0x0100010000000000, + 0x0100010000000001, 0x0100010000000100, 0x010001000001ff01, 0x0100010000010000, + 0x0100010000010001, 0x0100010000010101, 0x0100010001ffff00, 0x0100010001ff00ff, + 0x010001000100ffff, 0x010001000100ff01, 0x0100010001000000, 0x0100010001000101, + 0x010001000101ff00, 0x0100010001010001, 0x01000101ffff0000, 0x01000101ff000000, + 0x01000101ff010000, 0x0100010100ff00ff, 0x0100010100ff0001, 0x0100010100ff0100, + 0x010001010000ffff, 0x0100010100000000, 0x01000101000001ff, 0x010001010001ff00, + 0x0100010101ff0000, 0x010001010100ff00, 0x01000101010000ff, 0x0100010101000000, + 0x0100010101000001, 0x0101ffffffffffff, 0x0101ffffffffff01, 0x0101ffffffff01ff, + 0x0101ffffffff0101, 0x0101ffffff000000, 0x0101ffffff01ffff, 0x0101ffffff01ff01, + 0x0101ffffff0101ff, 0x0101ffffff010101, 0x0101ffff00ff0000, 0x0101ffff0000ff00, + 0x0101ffff000000ff, 0x0101ffff00000001, 0x0101ffff00000100, 0x0101ffff01ffffff, + 0x0101ffff01ffff01, 0x0101ffff01ff01ff, 0x0101ffff01ff0101, 0x0101ffff01000000, + 0x0101ffff0101ffff, 0x0101ffff0101ff01, 0x0101ffff010101ff, 0x0101ffff01010101, + 0x0101ff00ffff0000, 0x0101ff00ffff0100, 0x0101ff00ff00ff00, 0x0101ff00ff0000ff, + 0x0101ff00ff000001, 0x0101ff00ff000100, 0x0101ff00ff000101, 0x0101ff0000ff0001, + 0x0101ff0000ff0100, 0x0101ff000000ff00, 0x0101ff0000000000, 0x0101ff00000001ff, + 0x0101ff0000000101, 0x0101ff000001ff00, 0x0101ff00000100ff, 0x0101ff0001ff0000, + 0x0101ff000100ffff, 0x0101ff000100ff01, 0x0101ff0001000001, 0x0101ff0001000100, + 0x0101ff01ffffff01, 0x0101ff01ffff01ff, 0x0101ff01ffff0101, 0x0101ff01ff00ffff, + 0x0101ff01ff000100, 0x0101ff01ff01ff01, 0x0101ff01ff0101ff, 0x0101ff01ff010101, + 0x0101ff0100ff0000, 0x0101ff010000ff00, 0x0101ff0100000001, 0x0101ff0100000100, + 0x0101ff0100010000, 0x0101ff0101ffffff, 0x0101ff0101ffff01, 0x0101ff0101ff01ff, + 0x0101ff0101ff0101, 0x0101ff0101000000, 0x0101ff010101ffff, 0x0101ff010101ff01, + 0x0101ff01010101ff, 0x0101ff0101010101, 0x010100ffff000100, 0x010100ffff010000, + 0x010100ff00ffff00, 0x010100ff00ff00ff, 0x010100ff0000ffff, 0x010100ff000000ff, + 0x010100ff00000000, 0x010100ff000001ff, 0x010100ff00000101, 0x010100ff0001ff00, + 0x010100ff00010000, 0x010100ff00010001, 0x010100ff000101ff, 0x010100ff00010100, + 0x010100ff01ff0000, 0x01010000ffff0001, 0x01010000ffff0100, 0x01010000ff00ffff, + 0x01010000ff00ff01, 0x01010000ff000000, 0x01010000ff0001ff, 0x01010000ff010001, + 0x01010000ff010100, 0x0101000000ffff01, 0x0101000000ff0000, 0x010100000000ff00, + 0x01010000000000ff, 0x0101000000000000, 0x0101000000000001, 0x0101000000000100, + 0x0101000000010000, 0x0101000000010101, 0x0101000001ffff00, 0x0101000001ff00ff, + 0x0101000001ff0000, 0x0101000001ff0001, 0x0101000001ff0100, 0x010100000100ff01, + 0x0101000001000000, 0x01010000010001ff, 0x01010001ffff0000, 0x01010001ff00ff00, + 0x01010001ff000001, 0x01010001ff000101, 0x01010001ff01ff00, 0x01010001ff010000, + 0x0101000100ff00ff, 0x0101000100ff0001, 0x0101000100ff0101, 0x010100010000ff01, + 0x0101000100000000, 0x0101000100000001, 0x01010001000001ff, 0x010100010001ffff, + 0x010100010001ff01, 0x0101000101ff0001, 0x010100010100ffff, 0x0101000101000000, + 0x0101000101000001, 0x0101000101000100, 0x010100010101ff00, 0x01010001010100ff, + 0x0101000101010001, 0x010101ffffffffff, 0x010101ffffffff01, 0x010101ffffff01ff, + 0x010101ffffff0101, 0x010101ffff01ffff, 0x010101ffff01ff01, 0x010101ffff0101ff, + 0x010101ffff010101, 0x010101ff0000ff00, 0x010101ff000000ff, 0x010101ff00000001, + 0x010101ff00000100, 0x010101ff01ffffff, 0x010101ff01ffff01, 0x010101ff01ff01ff, + 0x010101ff01ff0101, 0x010101ff01000000, 0x010101ff0101ffff, 0x010101ff0101ff01, + 0x010101ff010101ff, 0x010101ff01010101, 0x01010100ffff0000, 0x01010100ff0000ff, + 0x01010100ff000100, 0x01010100ff01ff00, 0x01010100ff010000, 0x0101010000ffff00, + 0x010101000000ffff, 0x0101010000000000, 0x0101010000000101, 0x010101000001ff00, + 0x0101010000010001, 0x0101010000010100, 0x010101000100ffff, 0x0101010001000001, + 0x01010101ffffffff, 0x01010101ffffff01, 0x01010101ffff01ff, 0x01010101ffff0101, + 0x01010101ff01ffff, 0x01010101ff01ff01, 0x01010101ff0101ff, 0x01010101ff010101, + 0x010101010000ff00, 0x01010101000000ff, 0x0101010100000001, 0x0101010101ffffff, + 0x0101010101ffff01, 0x0101010101ff01ff, 0x0101010101ff0101, 0x0101010101000000, + 0x010101010101ffff, 0x010101010101ff01, 0x01010101010101ff, 0x0101010101010101, GGML_TABLE_END() +#else +GGML_TABLE_BEGIN(uint32_t, iq1s_grid_gpu, NGRID_IQ1S) + 0x00000000, 0x00000002, 0x00000101, 0x00000200, 0x00000202, 0x00010001, 0x00010101, 0x00020000, + 0x00020002, 0x00020200, 0x00020202, 0x01000101, 0x01010001, 0x01010100, 0x01010102, 0x01020101, + 0x02000000, 0x02000002, 0x02000200, 0x02000202, 0x02010101, 0x02020000, 0x02020002, 0x02020200, + 0x02020202, 0x00000110, 0x00000111, 0x00010011, 0x00010110, 0x00010112, 0x00010211, 0x00010212, + 0x00020111, 0x01000011, 0x01000112, 0x01000211, 0x01010012, 0x01010111, 0x01010212, 0x01020011, + 0x01020110, 0x01020112, 0x01020210, 0x02000111, 0x02010011, 0x02010110, 0x02010112, 0x02020111, + 0x00000020, 0x00000022, 0x00000220, 0x00000222, 0x00010121, 0x00020020, 0x00020022, 0x00020220, + 0x00020222, 0x01000121, 0x01010021, 0x01010221, 0x01020120, 0x01020221, 0x02000020, 0x02000022, + 0x02000220, 0x02000222, 0x02010021, 0x02010121, 0x02010221, 0x02020020, 0x02020022, 0x02020220, + 0x02020222, 0x00011001, 0x00011100, 0x00011102, 0x00021101, 0x01001001, 0x01001201, 0x01011101, + 0x01011202, 0x01021100, 0x01021101, 0x02011001, 0x02011201, 0x02021101, 0x00001011, 0x00001110, + 0x00001111, 0x00001112, 0x00011111, 0x00011210, 0x00011212, 0x00021211, 0x01001010, 0x01001111, + 0x01001212, 0x01011010, 0x01011011, 0x01011110, 0x01011111, 0x01011112, 0x01011211, 0x01021010, + 0x01021012, 0x01021111, 0x01021210, 0x01021212, 0x02001011, 0x02011011, 0x02011111, 0x02011210, + 0x02011212, 0x02021011, 0x02021110, 0x02021111, 0x02021112, 0x02021211, 0x00011120, 0x00011221, + 0x01001021, 0x01001120, 0x01011020, 0x01011022, 0x01011121, 0x01011220, 0x01021020, 0x01021021, + 0x01021122, 0x01021221, 0x02001121, 0x02011021, 0x02011120, 0x02011221, 0x00002000, 0x00002002, + 0x00002200, 0x00002202, 0x00012101, 0x00022000, 0x00022002, 0x00022200, 0x00022202, 0x01002101, + 0x01012001, 0x01012102, 0x01022101, 0x02002000, 0x02002002, 0x02002200, 0x02002202, 0x02012101, + 0x02022000, 0x02022002, 0x02022200, 0x02022202, 0x00002111, 0x00012011, 0x00012110, 0x00012211, + 0x00022110, 0x00022111, 0x01002011, 0x01012010, 0x01012011, 0x01012111, 0x01022011, 0x01022110, + 0x01022211, 0x02012011, 0x02012110, 0x02012112, 0x02012211, 0x02022111, 0x00002020, 0x00002022, + 0x00002220, 0x00002222, 0x00012121, 0x00022020, 0x00022022, 0x00022220, 0x00022222, 0x01002121, + 0x01012021, 0x01012221, 0x01022021, 0x01022121, 0x02002020, 0x02002022, 0x02002121, 0x02002220, + 0x02002222, 0x02012121, 0x02022020, 0x02022022, 0x02022220, 0x02022222, 0x00110000, 0x00110001, + 0x00110100, 0x00110201, 0x00120100, 0x00120101, 0x01100001, 0x01100100, 0x01110000, 0x01110101, + 0x01110200, 0x01120001, 0x01120100, 0x01120101, 0x01120201, 0x02110001, 0x02110100, 0x02110102, + 0x02120001, 0x02120101, 0x00100011, 0x00100110, 0x00100112, 0x00100211, 0x00110010, 0x00110012, + 0x00110111, 0x00110210, 0x00120011, 0x00120110, 0x00120211, 0x01100111, 0x01100212, 0x01110010, + 0x01110011, 0x01110012, 0x01110110, 0x01110111, 0x01110112, 0x01110211, 0x01120010, 0x01120111, + 0x02100110, 0x02110012, 0x02110111, 0x02120011, 0x02120110, 0x00110021, 0x00110120, 0x00110122, + 0x00120121, 0x01100020, 0x01100122, 0x01100221, 0x01110022, 0x01110121, 0x01110220, 0x01110222, + 0x01120120, 0x01120122, 0x02100121, 0x02110021, 0x02110120, 0x02110122, 0x02120121, 0x00101001, + 0x00101102, 0x00101201, 0x00111100, 0x00111101, 0x00111200, 0x00111201, 0x00121001, 0x00121102, + 0x01101001, 0x01101101, 0x01101102, 0x01101200, 0x01101202, 0x01111001, 0x01111100, 0x01111101, + 0x01111102, 0x01111201, 0x01121002, 0x01121101, 0x01121200, 0x02101100, 0x02101201, 0x02111000, + 0x02111100, 0x02111101, 0x02111200, 0x02111201, 0x02111202, 0x02121001, 0x02121100, 0x02121101, + 0x02121201, 0x00101012, 0x00101111, 0x00101212, 0x00111011, 0x00111110, 0x00111111, 0x00111112, + 0x00111211, 0x00121010, 0x00121012, 0x00121111, 0x00121210, 0x00121212, 0x01101011, 0x01101110, + 0x01101111, 0x01101112, 0x01111011, 0x01111012, 0x01111110, 0x01111111, 0x01111112, 0x01111211, + 0x01111212, 0x01121011, 0x01121110, 0x01121111, 0x01121112, 0x01121211, 0x02101010, 0x02101012, + 0x02101110, 0x02101111, 0x02101210, 0x02101212, 0x02111010, 0x02111011, 0x02111110, 0x02111111, + 0x02111112, 0x02111211, 0x02111212, 0x02121010, 0x02121012, 0x02121111, 0x00101021, 0x00101120, + 0x00101121, 0x00101122, 0x00111121, 0x00111122, 0x00111220, 0x00111222, 0x00121021, 0x00121122, + 0x01101020, 0x01101022, 0x01101120, 0x01101121, 0x01101220, 0x01101222, 0x01111021, 0x01111121, + 0x01111122, 0x01111220, 0x01111221, 0x01121021, 0x01121120, 0x01121121, 0x01121220, 0x01121221, + 0x01121222, 0x02101122, 0x02101222, 0x02111022, 0x02111121, 0x02121120, 0x02121221, 0x00112001, + 0x00112102, 0x00122101, 0x01102001, 0x01102100, 0x01102102, 0x01102201, 0x01112000, 0x01112101, + 0x01112200, 0x01112202, 0x01122000, 0x01122001, 0x01122100, 0x01122102, 0x01122201, 0x02102101, + 0x02112001, 0x02112100, 0x02122101, 0x00112010, 0x00112012, 0x00112111, 0x00112212, 0x00122011, + 0x00122111, 0x01102012, 0x01102110, 0x01102111, 0x01102210, 0x01112011, 0x01112110, 0x01112111, + 0x01112112, 0x01112211, 0x01112212, 0x01122010, 0x01122111, 0x01122212, 0x02102211, 0x02112011, + 0x02112012, 0x02112111, 0x02112210, 0x02122011, 0x02122112, 0x02122211, 0x00102221, 0x00112122, + 0x00122120, 0x00122122, 0x01102120, 0x01102122, 0x01102221, 0x01112020, 0x01112022, 0x01112121, + 0x01112220, 0x01122021, 0x01122122, 0x01122221, 0x02102121, 0x02112021, 0x02112122, 0x02112222, + 0x00200000, 0x00200002, 0x00200200, 0x00200202, 0x00210101, 0x00220000, 0x00220002, 0x00220101, + 0x00220200, 0x00220202, 0x01200101, 0x01210001, 0x01210201, 0x01220001, 0x01220101, 0x02200000, + 0x02200002, 0x02200200, 0x02200202, 0x02210101, 0x02220000, 0x02220002, 0x02220101, 0x02220200, + 0x02220202, 0x00200111, 0x00210011, 0x00210110, 0x00210211, 0x00220111, 0x01200012, 0x01200110, + 0x01200211, 0x01210111, 0x01210210, 0x01210212, 0x01220011, 0x01220110, 0x01220111, 0x01220112, + 0x02200111, 0x02210010, 0x02210112, 0x02210211, 0x02220111, 0x00200021, 0x00200220, 0x00200222, + 0x00210021, 0x00210121, 0x00220020, 0x00220022, 0x00220220, 0x00220222, 0x01200121, 0x01210021, + 0x01210122, 0x01210221, 0x01220121, 0x02200021, 0x02200220, 0x02200222, 0x02210021, 0x02210121, + 0x02220020, 0x02220022, 0x02220220, 0x02220222, 0x00201101, 0x00211100, 0x00211102, 0x00211201, + 0x00221101, 0x01201100, 0x01201101, 0x01201102, 0x01201201, 0x01211002, 0x01211101, 0x01211200, + 0x01211202, 0x01221102, 0x02201101, 0x02211001, 0x02211100, 0x02211201, 0x02221001, 0x02221101, + 0x00201211, 0x00211111, 0x00221011, 0x00221211, 0x01201010, 0x01201111, 0x01201210, 0x01211011, + 0x01211110, 0x01211111, 0x01211211, 0x01221012, 0x01221111, 0x01221210, 0x02201211, 0x02211010, + 0x02211110, 0x02211111, 0x02211210, 0x02211212, 0x02221011, 0x02221110, 0x02221112, 0x02221211, + 0x00201121, 0x00211020, 0x00211022, 0x00211221, 0x00221121, 0x01201021, 0x01201221, 0x01211121, + 0x01221020, 0x01221021, 0x01221221, 0x02201120, 0x02201122, 0x02211020, 0x02211222, 0x00202000, + 0x00202002, 0x00202200, 0x00202202, 0x00212101, 0x00222000, 0x00222002, 0x00222200, 0x00222202, + 0x01202101, 0x01212001, 0x01212100, 0x01222101, 0x02202000, 0x02202002, 0x02202200, 0x02202202, + 0x02222000, 0x02222002, 0x02222200, 0x02222202, 0x00202211, 0x00212011, 0x00212110, 0x00212211, + 0x00222111, 0x01202112, 0x01202211, 0x01212012, 0x01212111, 0x01222011, 0x01222110, 0x01222112, + 0x01222211, 0x02202111, 0x02212010, 0x02212112, 0x02212211, 0x02222110, 0x02222111, 0x00202020, + 0x00202022, 0x00202220, 0x00202222, 0x00222020, 0x00222022, 0x00222220, 0x00222222, 0x01202121, + 0x01212021, 0x01212122, 0x01212221, 0x01222121, 0x02202020, 0x02202022, 0x02202220, 0x02202222, + 0x02212121, 0x02222020, 0x02222022, 0x02222220, 0x02222222, 0x10000101, 0x10010001, 0x10010102, + 0x10020101, 0x11000201, 0x11010002, 0x11010101, 0x11010200, 0x11010202, 0x11020001, 0x11020100, + 0x11020102, 0x12010100, 0x12010201, 0x12020001, 0x12020102, 0x10000010, 0x10000011, 0x10000110, + 0x10000112, 0x10000211, 0x10010012, 0x10010111, 0x10010112, 0x10010210, 0x10010212, 0x10020011, + 0x10020112, 0x10020211, 0x11000111, 0x11000210, 0x11000212, 0x11010011, 0x11010110, 0x11010111, + 0x11010112, 0x11010211, 0x11010212, 0x11020111, 0x11020210, 0x11020212, 0x12000011, 0x12000110, + 0x12000112, 0x12010010, 0x12010012, 0x12010111, 0x12020010, 0x12020011, 0x12020012, 0x10000121, + 0x10010021, 0x10010120, 0x10010122, 0x10020121, 0x11000021, 0x11010022, 0x11010121, 0x11010222, + 0x11020120, 0x11020221, 0x12000221, 0x12010120, 0x12020121, 0x10001001, 0x10011101, 0x10011201, + 0x10021201, 0x11001101, 0x11001200, 0x11001202, 0x11011001, 0x11011100, 0x11011101, 0x11011102, + 0x11021001, 0x11021002, 0x11021101, 0x11021200, 0x11021202, 0x12001001, 0x12001102, 0x12001201, + 0x12011000, 0x12011002, 0x12011101, 0x12021000, 0x12021001, 0x12021201, 0x10001011, 0x10001012, + 0x10001111, 0x10001212, 0x10011011, 0x10011110, 0x10011111, 0x10011112, 0x10011211, 0x10021010, + 0x10021111, 0x10021212, 0x11001011, 0x11001110, 0x11001111, 0x11001112, 0x11001211, 0x11011010, + 0x11011011, 0x11011110, 0x11011111, 0x11011112, 0x11011210, 0x11011211, 0x11021011, 0x11021110, + 0x11021111, 0x11021112, 0x11021211, 0x12001012, 0x12001110, 0x12001111, 0x12001210, 0x12011011, + 0x12011110, 0x12011111, 0x12011112, 0x12011211, 0x12011212, 0x12021111, 0x12021210, 0x12021212, + 0x10001021, 0x10001121, 0x10001221, 0x10011120, 0x10011121, 0x10011220, 0x10011222, 0x10021021, + 0x10021120, 0x10021221, 0x11001020, 0x11001022, 0x11001121, 0x11001220, 0x11011020, 0x11011021, + 0x11011022, 0x11011121, 0x11011122, 0x11011221, 0x11021022, 0x11021121, 0x11021220, 0x12001021, + 0x12001121, 0x12001222, 0x12011120, 0x12011121, 0x12021021, 0x12021120, 0x12021122, 0x10002101, + 0x10012001, 0x10012101, 0x10012202, 0x10022101, 0x11002002, 0x11002201, 0x11012000, 0x11012101, + 0x11012200, 0x11022001, 0x11022100, 0x11022102, 0x11022201, 0x12002101, 0x12012001, 0x12012100, + 0x12012102, 0x12012201, 0x12022101, 0x10002011, 0x10002111, 0x10002112, 0x10002212, 0x10012010, + 0x10012110, 0x10012111, 0x10012210, 0x10022011, 0x10022110, 0x10022112, 0x11002010, 0x11002111, + 0x11002212, 0x11012011, 0x11012012, 0x11012110, 0x11012111, 0x11012112, 0x11012211, 0x11022010, + 0x11022012, 0x11022111, 0x11022112, 0x11022212, 0x12002112, 0x12002211, 0x12012012, 0x12012111, + 0x12012112, 0x12012210, 0x12022011, 0x12022110, 0x12022112, 0x12022211, 0x10012122, 0x11002120, + 0x11002122, 0x11002221, 0x11012121, 0x11012220, 0x11012222, 0x11022120, 0x11022221, 0x12012120, + 0x12022121, 0x10100001, 0x10100100, 0x10100101, 0x10100102, 0x10100201, 0x10110002, 0x10110101, + 0x10110202, 0x10120001, 0x10120100, 0x10120201, 0x11100000, 0x11100101, 0x11100200, 0x11110001, + 0x11110100, 0x11110101, 0x11110102, 0x11110201, 0x11120101, 0x11120200, 0x12100102, 0x12100201, + 0x12110101, 0x12110200, 0x12120000, 0x12120001, 0x12120102, 0x12120201, 0x10100111, 0x10100210, + 0x10100211, 0x10100212, 0x10110011, 0x10110110, 0x10110111, 0x10110112, 0x10110210, 0x10110211, + 0x10120010, 0x10120111, 0x10120112, 0x10120210, 0x10120212, 0x11100011, 0x11100110, 0x11100111, + 0x11100112, 0x11100211, 0x11110010, 0x11110011, 0x11110012, 0x11110110, 0x11110111, 0x11110112, + 0x11110210, 0x11110211, 0x11110212, 0x11120011, 0x11120110, 0x11120111, 0x11120112, 0x11120211, + 0x12100012, 0x12100111, 0x12110011, 0x12110110, 0x12110111, 0x12110112, 0x12110211, 0x12120010, + 0x12120111, 0x12120212, 0x10100021, 0x10100122, 0x10110022, 0x10110121, 0x10110222, 0x10120021, + 0x10120120, 0x11100022, 0x11100121, 0x11100222, 0x11110021, 0x11110120, 0x11110121, 0x11110122, + 0x11110221, 0x11120022, 0x11120121, 0x12100121, 0x12110020, 0x12110022, 0x12110121, 0x12110221, + 0x12110222, 0x12120120, 0x10101100, 0x10101101, 0x10111001, 0x10111100, 0x10111101, 0x10111102, + 0x10111200, 0x10111201, 0x10121001, 0x10121101, 0x10121200, 0x10121202, 0x11101001, 0x11101100, + 0x11101101, 0x11101102, 0x11101201, 0x11101202, 0x11111000, 0x11111001, 0x11111100, 0x11111101, + 0x11111102, 0x11111200, 0x11111201, 0x11111202, 0x11121001, 0x11121002, 0x11121100, 0x11121101, + 0x11121102, 0x11121201, 0x12101000, 0x12101200, 0x12101202, 0x12111001, 0x12111100, 0x12111101, + 0x12111102, 0x12111201, 0x12121001, 0x12121100, 0x12121101, 0x12121202, 0x10101011, 0x10101012, + 0x10101110, 0x10101111, 0x10101112, 0x10101211, 0x10111010, 0x10111011, 0x10111012, 0x10111110, + 0x10111111, 0x10111112, 0x10111211, 0x10111212, 0x10121011, 0x10121110, 0x10121111, 0x10121112, + 0x10121211, 0x11101010, 0x11101011, 0x11101012, 0x11101110, 0x11101111, 0x11101112, 0x11101210, + 0x11101211, 0x11111010, 0x11111011, 0x11111012, 0x11111110, 0x11111111, 0x11111112, 0x11111210, + 0x11111211, 0x11111212, 0x11121010, 0x11121011, 0x11121110, 0x11121111, 0x11121112, 0x11121210, + 0x11121211, 0x11121212, 0x12101011, 0x12101110, 0x12101111, 0x12101211, 0x12101212, 0x12111010, + 0x12111011, 0x12111110, 0x12111111, 0x12111112, 0x12111210, 0x12111211, 0x12121011, 0x12121110, + 0x12121111, 0x12121112, 0x12121211, 0x10101020, 0x10101021, 0x10101022, 0x10101120, 0x10101122, + 0x10101220, 0x10101221, 0x10111021, 0x10111120, 0x10111121, 0x10111220, 0x10111221, 0x10121020, + 0x10121021, 0x10121022, 0x10121120, 0x10121121, 0x10121122, 0x10121220, 0x10121221, 0x11101021, + 0x11101121, 0x11101122, 0x11101220, 0x11101221, 0x11101222, 0x11111020, 0x11111021, 0x11111022, + 0x11111120, 0x11111121, 0x11111122, 0x11111220, 0x11111221, 0x11111222, 0x11121021, 0x11121120, + 0x11121121, 0x11121221, 0x12101022, 0x12101121, 0x12101122, 0x12101220, 0x12101221, 0x12101222, + 0x12111021, 0x12111121, 0x12111222, 0x12121022, 0x12121121, 0x12121122, 0x12121220, 0x12121221, + 0x10102100, 0x10102101, 0x10102102, 0x10102201, 0x10112000, 0x10112101, 0x10112200, 0x10122001, + 0x10122202, 0x11102101, 0x11102200, 0x11102202, 0x11112001, 0x11112100, 0x11112101, 0x11112102, + 0x11112200, 0x11112201, 0x11122000, 0x11122002, 0x11122100, 0x11122101, 0x12102002, 0x12102201, + 0x12112000, 0x12112002, 0x12112101, 0x12112200, 0x12122001, 0x12122201, 0x10102011, 0x10102012, + 0x10102111, 0x10102212, 0x10112011, 0x10112110, 0x10112111, 0x10112112, 0x10112211, 0x10122111, + 0x11102011, 0x11102110, 0x11102111, 0x11102112, 0x11102211, 0x11112010, 0x11112011, 0x11112012, + 0x11112110, 0x11112111, 0x11112112, 0x11112210, 0x11112211, 0x11112212, 0x11122011, 0x11122110, + 0x11122111, 0x11122112, 0x11122211, 0x12102011, 0x12102111, 0x12102211, 0x12112011, 0x12112110, + 0x12112111, 0x12112112, 0x12112210, 0x12112211, 0x12122111, 0x10102120, 0x10102220, 0x10112121, + 0x10112222, 0x10122020, 0x10122121, 0x10122122, 0x10122221, 0x11102121, 0x11102220, 0x11102221, + 0x11112021, 0x11112121, 0x11112122, 0x11112220, 0x11112221, 0x11122022, 0x11122121, 0x11122220, + 0x11122222, 0x12102021, 0x12102222, 0x12112022, 0x12112121, 0x12112122, 0x12112220, 0x12112222, + 0x12122021, 0x10200101, 0x10210100, 0x10210102, 0x10210201, 0x10220101, 0x11200100, 0x11210000, + 0x11210101, 0x11210102, 0x11210200, 0x11210202, 0x11220001, 0x11220100, 0x11220102, 0x11220201, + 0x12200001, 0x12210102, 0x12220101, 0x10200011, 0x10200110, 0x10200112, 0x10200211, 0x10210012, + 0x10210111, 0x10220011, 0x10220012, 0x10220112, 0x10220211, 0x11200111, 0x11200211, 0x11210011, + 0x11210111, 0x11210112, 0x11210211, 0x11220111, 0x11220112, 0x11220212, 0x12200110, 0x12200212, + 0x12210012, 0x12210111, 0x12220011, 0x12220112, 0x12220211, 0x10210021, 0x10210122, 0x10210221, + 0x11200020, 0x11200021, 0x11200122, 0x11210121, 0x11210122, 0x11210220, 0x11220020, 0x12200121, + 0x12210021, 0x12210122, 0x12220121, 0x10211001, 0x10211002, 0x10211101, 0x10211102, 0x10211202, + 0x10221001, 0x10221102, 0x10221201, 0x11201000, 0x11201002, 0x11201101, 0x11201200, 0x11201202, + 0x11211001, 0x11211100, 0x11211101, 0x11211102, 0x11211201, 0x11211202, 0x11221000, 0x11221002, + 0x11221101, 0x12201100, 0x12201101, 0x12201201, 0x12211000, 0x12211002, 0x12211100, 0x12211101, + 0x12211102, 0x12211200, 0x12211202, 0x12221001, 0x12221100, 0x12221201, 0x10201111, 0x10201210, + 0x10201212, 0x10211011, 0x10211111, 0x10211112, 0x10211211, 0x11201110, 0x11201111, 0x11201112, + 0x11201211, 0x11211010, 0x11211011, 0x11211110, 0x11211111, 0x11211112, 0x11211211, 0x11221011, + 0x11221110, 0x11221111, 0x11221112, 0x11221211, 0x12201112, 0x12201211, 0x12201212, 0x12211011, + 0x12211111, 0x12211112, 0x12211211, 0x12211212, 0x12221012, 0x12221111, 0x12221112, 0x12221210, + 0x10201022, 0x10201221, 0x10211121, 0x10221020, 0x10221122, 0x10221220, 0x10221221, 0x11201020, + 0x11201121, 0x11201220, 0x11201222, 0x11211021, 0x11211120, 0x11211121, 0x11211122, 0x11211220, + 0x11211222, 0x11221020, 0x11221121, 0x11221220, 0x12201020, 0x12201022, 0x12201121, 0x12201222, + 0x12211120, 0x12211122, 0x12211220, 0x12211221, 0x12221020, 0x12221120, 0x12221122, 0x12221222, + 0x10212102, 0x10212201, 0x10222101, 0x11202001, 0x11212002, 0x11212101, 0x11212202, 0x11222001, + 0x11222201, 0x12202101, 0x12212001, 0x12212200, 0x12222102, 0x10202011, 0x10202110, 0x10212010, + 0x10212111, 0x10222011, 0x10222110, 0x10222112, 0x10222211, 0x11202010, 0x11202011, 0x11202111, + 0x11202112, 0x11202210, 0x11212011, 0x11212110, 0x11212111, 0x11212112, 0x11212211, 0x11222010, + 0x11222111, 0x11222212, 0x12202012, 0x12202110, 0x12202212, 0x12212111, 0x12222011, 0x12222110, + 0x12222111, 0x12222211, 0x10212021, 0x10212122, 0x10212220, 0x11202021, 0x11202120, 0x11202221, + 0x11212020, 0x11212121, 0x11212220, 0x11212222, 0x11222120, 0x11222121, 0x11222221, 0x12202122, + 0x12212120, 0x12212220, 0x12212222, 0x12222122, 0x20000000, 0x20000002, 0x20000200, 0x20000202, + 0x20020000, 0x20020002, 0x20020200, 0x20020202, 0x21000101, 0x21010000, 0x21010001, 0x21010100, + 0x21010102, 0x21010201, 0x21020101, 0x22000000, 0x22000002, 0x22000200, 0x22000202, 0x22010101, + 0x22020000, 0x22020002, 0x22020200, 0x22020202, 0x20000111, 0x20010011, 0x20010110, 0x20010112, + 0x20010211, 0x20020111, 0x21000011, 0x21000110, 0x21000211, 0x21010010, 0x21010012, 0x21010111, + 0x21010112, 0x21010210, 0x21010211, 0x21020110, 0x21020112, 0x21020211, 0x22000111, 0x22000211, + 0x22010110, 0x22010112, 0x22010211, 0x22020111, 0x20000020, 0x20000022, 0x20000220, 0x20000222, + 0x20010121, 0x20020020, 0x20020022, 0x20020220, 0x20020222, 0x21010021, 0x21010120, 0x21010221, + 0x21020121, 0x22000020, 0x22000022, 0x22000220, 0x22000222, 0x22010121, 0x22020020, 0x22020022, + 0x22020220, 0x22020222, 0x20011100, 0x20011201, 0x21001001, 0x21001100, 0x21011001, 0x21011101, + 0x21011202, 0x21021001, 0x21021100, 0x21021201, 0x22011100, 0x22011201, 0x20001011, 0x20001211, + 0x20011012, 0x20011111, 0x20011212, 0x20021112, 0x20021211, 0x21001010, 0x21001011, 0x21001111, + 0x21001210, 0x21011011, 0x21011110, 0x21011111, 0x21011112, 0x21011211, 0x21011212, 0x21021111, + 0x21021112, 0x21021210, 0x21021212, 0x22001011, 0x22001110, 0x22001112, 0x22001211, 0x22011010, + 0x22011012, 0x22011111, 0x22011210, 0x22021112, 0x20011021, 0x20011122, 0x20011221, 0x20021121, + 0x21001021, 0x21001120, 0x21001221, 0x21001222, 0x21011020, 0x21011121, 0x21011221, 0x21011222, + 0x21021021, 0x21021122, 0x21021222, 0x22001121, 0x22011021, 0x22011222, 0x22021120, 0x20002000, + 0x20002002, 0x20002200, 0x20002202, 0x20012101, 0x20022000, 0x20022002, 0x20022200, 0x20022202, + 0x21002001, 0x21002101, 0x21012001, 0x21012100, 0x21012201, 0x21022101, 0x21022201, 0x22002000, + 0x22002002, 0x22002200, 0x22002202, 0x22012101, 0x22022000, 0x22022002, 0x22022200, 0x22022202, + 0x20002111, 0x20002112, 0x20012011, 0x20012110, 0x20012112, 0x20022111, 0x21002011, 0x21002110, + 0x21002112, 0x21002211, 0x21012010, 0x21012012, 0x21012111, 0x21012212, 0x21022011, 0x21022110, + 0x22002111, 0x22012112, 0x22012211, 0x22022111, 0x20002020, 0x20002022, 0x20002220, 0x20002222, + 0x20012121, 0x20022020, 0x20022022, 0x20022220, 0x20022222, 0x21002121, 0x21012021, 0x21012120, + 0x21012122, 0x22002020, 0x22002022, 0x22002220, 0x22002222, 0x22012121, 0x22022020, 0x22022022, + 0x22022220, 0x22022222, 0x20100101, 0x20110001, 0x20110102, 0x20110200, 0x20110201, 0x20120101, + 0x21100001, 0x21100102, 0x21100201, 0x21110101, 0x21110200, 0x21110202, 0x21120201, 0x21120202, + 0x22100101, 0x22110001, 0x22110100, 0x22110102, 0x22110201, 0x22120101, 0x20100011, 0x20100110, + 0x20100112, 0x20100211, 0x20110010, 0x20110111, 0x20110210, 0x20110212, 0x20120011, 0x20120110, + 0x20120112, 0x20120211, 0x21100010, 0x21100111, 0x21110010, 0x21110011, 0x21110110, 0x21110111, + 0x21110112, 0x21110211, 0x21120012, 0x21120111, 0x22100110, 0x22100112, 0x22110012, 0x22110111, + 0x22110210, 0x22120011, 0x22120110, 0x22120112, 0x22120211, 0x20100121, 0x20110021, 0x20110120, + 0x20110221, 0x20120121, 0x21100120, 0x21100122, 0x21100221, 0x21110020, 0x21110022, 0x21110121, + 0x21110220, 0x21120122, 0x21120221, 0x22100121, 0x22110120, 0x22110122, 0x22120221, 0x20101001, + 0x20101100, 0x20101102, 0x20111000, 0x20111101, 0x20111200, 0x20121102, 0x21101000, 0x21101202, + 0x21111001, 0x21111100, 0x21111101, 0x21111102, 0x21111200, 0x21111201, 0x21121000, 0x21121001, + 0x21121002, 0x21121101, 0x22101100, 0x22101102, 0x22111002, 0x22111100, 0x22111101, 0x22111200, + 0x22121001, 0x22121201, 0x20101010, 0x20101111, 0x20101210, 0x20101212, 0x20111010, 0x20111011, + 0x20111110, 0x20111111, 0x20111112, 0x20111211, 0x20121011, 0x20121111, 0x20121211, 0x20121212, + 0x21101011, 0x21101110, 0x21101111, 0x21101112, 0x21101211, 0x21111010, 0x21111011, 0x21111012, + 0x21111110, 0x21111111, 0x21111112, 0x21111210, 0x21111211, 0x21111212, 0x21121011, 0x21121110, + 0x21121111, 0x21121112, 0x21121211, 0x22101011, 0x22101111, 0x22101210, 0x22111011, 0x22111012, + 0x22111110, 0x22111111, 0x22111112, 0x22111211, 0x22111212, 0x22121010, 0x22121012, 0x22121111, + 0x22121210, 0x22121212, 0x20101021, 0x20101120, 0x20111020, 0x20111121, 0x20111221, 0x20121020, + 0x20121122, 0x20121221, 0x21101121, 0x21101220, 0x21101221, 0x21111021, 0x21111022, 0x21111121, + 0x21111122, 0x21111221, 0x21121121, 0x21121220, 0x22101022, 0x22101120, 0x22101221, 0x22101222, + 0x22111022, 0x22111120, 0x22111121, 0x22121120, 0x22121122, 0x22121221, 0x20102101, 0x20112102, + 0x20112201, 0x20122101, 0x21102001, 0x21102102, 0x21112000, 0x21112002, 0x21112101, 0x21112102, + 0x21112202, 0x21122100, 0x21122101, 0x22102101, 0x22112001, 0x22112102, 0x22112201, 0x22122101, + 0x20102110, 0x20102112, 0x20102211, 0x20112010, 0x20112012, 0x20112111, 0x20112210, 0x20112212, + 0x20122010, 0x20122011, 0x20122110, 0x20122112, 0x21102010, 0x21102012, 0x21102111, 0x21102210, + 0x21102212, 0x21112011, 0x21112110, 0x21112111, 0x21112112, 0x21112211, 0x21122012, 0x21122111, + 0x21122112, 0x21122212, 0x22102011, 0x22102110, 0x22112010, 0x22112012, 0x22112111, 0x22112212, + 0x22122011, 0x22122112, 0x20102121, 0x20112121, 0x20122121, 0x21102120, 0x21102122, 0x21102221, + 0x21112020, 0x21112121, 0x21112220, 0x21122021, 0x22102121, 0x22112021, 0x22112120, 0x22112121, + 0x22112122, 0x20200000, 0x20200002, 0x20200200, 0x20200202, 0x20210101, 0x20220000, 0x20220002, + 0x20220200, 0x20220202, 0x21200101, 0x21210001, 0x21210100, 0x21210102, 0x21210201, 0x22200000, + 0x22200002, 0x22200200, 0x22200202, 0x22210101, 0x22220000, 0x22220002, 0x22220200, 0x22220202, + 0x20200111, 0x20200211, 0x20210011, 0x20210110, 0x20210112, 0x20210211, 0x20210212, 0x21200112, + 0x21200211, 0x21210011, 0x21210111, 0x21210210, 0x21210212, 0x21220011, 0x21220110, 0x22200111, + 0x22210010, 0x22210012, 0x22210112, 0x22210211, 0x20200022, 0x20200220, 0x20200222, 0x20210020, + 0x20210221, 0x20220022, 0x20220220, 0x20220222, 0x21200121, 0x21210021, 0x21210122, 0x21210221, + 0x21220121, 0x22200020, 0x22200022, 0x22200220, 0x22200222, 0x22210121, 0x22220020, 0x22220022, + 0x22220220, 0x22220222, 0x20211201, 0x20221101, 0x21201001, 0x21201100, 0x21211000, 0x21211100, + 0x21211101, 0x21211200, 0x21211202, 0x21221001, 0x21221101, 0x21221102, 0x21221200, 0x21221201, + 0x22201101, 0x20201112, 0x20201211, 0x20211010, 0x20211012, 0x20211111, 0x20211210, 0x20221112, + 0x20221211, 0x21201012, 0x21201111, 0x21211011, 0x21211110, 0x21211111, 0x21211112, 0x21211211, + 0x21221111, 0x21221212, 0x22201011, 0x22201110, 0x22201111, 0x22201112, 0x22201211, 0x22211012, + 0x22211111, 0x22211210, 0x20201121, 0x20211021, 0x20211122, 0x20211222, 0x20221021, 0x20221121, + 0x21201120, 0x21201122, 0x21201222, 0x21211022, 0x21211121, 0x21211122, 0x21211220, 0x21221020, + 0x21221022, 0x22201122, 0x22211020, 0x22211121, 0x22211122, 0x22211221, 0x22221021, 0x22221120, + 0x22221122, 0x20202000, 0x20202002, 0x20202200, 0x20202202, 0x20222000, 0x20222002, 0x20222200, + 0x20222202, 0x21212001, 0x21212100, 0x21212102, 0x21212201, 0x22202000, 0x22202002, 0x22202200, + 0x22202202, 0x22212101, 0x22222000, 0x22222002, 0x22222200, 0x22222202, 0x20202111, 0x20212110, + 0x20212211, 0x20222011, 0x20222111, 0x21202011, 0x21212010, 0x21212111, 0x21212212, 0x21222011, + 0x21222112, 0x21222211, 0x22212010, 0x22212112, 0x20202020, 0x20202022, 0x20202220, 0x20202222, + 0x20222020, 0x20222022, 0x20222220, 0x20222222, 0x21212021, 0x21212120, 0x21212122, 0x22202020, + 0x22202022, 0x22202220, 0x22202222, 0x22212121, 0x22222020, 0x22222022, 0x22222220, 0x22222222, +GGML_TABLE_END() +#endif #endif // GGML_COMMON_IMPL diff --git a/ggml-cuda.cu b/ggml-cuda.cu index c207ff87a..d2945d3c2 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -565,8 +565,8 @@ static_assert(sizeof(block_iq3_s) == sizeof(ggml_fp16_t) + 13*(QK_K/32) + IQ3S_N #define QI1_S (QK_K / (4*QR1_S)) typedef struct { half d; - uint8_t qs[QK_K/8]; - uint8_t scales[QK_K/16]; + uint8_t qs[QK_K/8]; + uint16_t qh[QK_K/32]; } block_iq1_s; static_assert(sizeof(block_iq1_s) == sizeof(ggml_fp16_t) + QK_K/8 + QK_K/16, "wrong iq1_s block size/padding"); @@ -1722,11 +1722,22 @@ static __global__ void dequantize_block_iq1_s(const void * __restrict__ vx, dst_ const int il = tid/8; // 0...3 const int ib = tid%8; // 0...7 dst_t * y = yy + i*QK_K + 32*ib + 8*il; - const int i8 = 4*ib+il; - uint8_t h = x[i].scales[i8/2] >> 4*(i8%2); - const int8_t * grid = (const int8_t *)(iq1s_grid + (x[i].qs[i8] | ((h & 8) << 5))); - const float d = (float)x[i].d * (2*(h & 7) + 1); - for (int j = 0; j < 8; ++j) y[j] = d * grid[j]; + const float d = (float)x[i].d * (2*((x[i].qh[ib] >> 12) & 0xf) + 1); +#if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics + int grid32[2]; const int8_t * q = (const int8_t *)grid32; + grid32[0] = *((const int *)(iq1s_grid_gpu + (x[i].qs[4*ib+il] | (((x[i].qh[ib] >> 3*il) & 7) << 8)))); + grid32[1] = __vsub4((grid32[0] >> 4) & 0x0f0f0f0f, 0x01010101); + grid32[0] = __vsub4(grid32[0] & 0x0f0f0f0f, 0x01010101); + for (int j = 0; j < 8; ++j) { + y[j] = d * q[j]; + } +#else + const uint8_t * grid = (const uint8_t *)(iq1s_grid_gpu + (x[i].qs[4*ib+il] | (((x[i].qh[ib] >> 3*il) & 7) << 8))); + for (int j = 0; j < 4; ++j) { + y[j+0] = d * ((grid[j] & 0xf) - 1); + y[j+4] = d * ((grid[j] >> 4) - 1); + } +#endif #else assert(false); #endif @@ -4538,44 +4549,33 @@ static __device__ __forceinline__ float vec_dot_iq3_s_q8_1( #endif } - static __device__ __forceinline__ float vec_dot_iq1_s_q8_1( const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & iqs) { #if QK_K == 256 const block_iq1_s * bq1 = (const block_iq1_s *) vbq; const int ib32 = iqs; - int sumi1 = 0, sumi2 = 0, sumi3 = 0, sumi4 = 0; - const uint8_t h1 = bq1->scales[2*ib32+0]; - const uint8_t h2 = bq1->scales[2*ib32+1]; + int sumi = 0; #if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics const int * q8 = (const int *)bq8_1[ib32].qs; - const int * grid1 = (const int *)(iq1s_grid + (bq1->qs[4*ib32+0] | ((h1 & 0x08) << 5))); - const int * grid2 = (const int *)(iq1s_grid + (bq1->qs[4*ib32+1] | ((h1 & 0x80) << 1))); - const int * grid3 = (const int *)(iq1s_grid + (bq1->qs[4*ib32+2] | ((h2 & 0x08) << 5))); - const int * grid4 = (const int *)(iq1s_grid + (bq1->qs[4*ib32+3] | ((h2 & 0x80) << 1))); - for (int j = 0; j < 2; ++j) { - sumi1 = __dp4a(q8[j+0], grid1[j], sumi1); - sumi2 = __dp4a(q8[j+2], grid2[j], sumi2); - sumi3 = __dp4a(q8[j+4], grid3[j], sumi3); - sumi4 = __dp4a(q8[j+6], grid4[j], sumi4); + for (int l = 0; l < 4; ++l) { + const int * grid = (const int *)(iq1s_grid_gpu + (bq1->qs[4*ib32+l] | (((bq1->qh[ib32] >> 3*l) & 7) << 8))); + int grid0 = __vsub4(grid[0] & 0x0f0f0f0f, 0x01010101); + int grid1 = __vsub4((grid[0] >> 4) & 0x0f0f0f0f, 0x01010101); + sumi = __dp4a(q8[2*l+1], grid1, __dp4a(q8[2*l+0], grid0, sumi)); } #else const int8_t * q8 = bq8_1[ib32].qs; - const int8_t * grid1 = (const int8_t *)(iq1s_grid + (bq1->qs[4*ib32+0] | ((h1 & 0x08) << 5))); - const int8_t * grid2 = (const int8_t *)(iq1s_grid + (bq1->qs[4*ib32+1] | ((h1 & 0x80) << 1))); - const int8_t * grid3 = (const int8_t *)(iq1s_grid + (bq1->qs[4*ib32+2] | ((h2 & 0x08) << 5))); - const int8_t * grid4 = (const int8_t *)(iq1s_grid + (bq1->qs[4*ib32+3] | ((h2 & 0x80) << 1))); - for (int j = 0; j < 8; ++j) { - sumi1 += q8[j+ 0] * grid1[j]; - sumi2 += q8[j+ 8] * grid2[j]; - sumi3 += q8[j+16] * grid3[j]; - sumi4 += q8[j+24] * grid4[j]; + for (int l = 0; l < 4; ++l) { + const uint8_t * grid = (const uint8_t *)(iq1s_grid_gpu + (bq1->qs[4*ib32+l] | (((bq1->qh[ib32] >> 3*l) & 7) << 8))); + for (int j = 0; j < 4; ++j) { + sumi += q8[j] * ((grid[j] & 0xf) - 1) + q8[j+4] * ((grid[j] >> 4) - 1); + } + q8 += 8; } #endif const float d = (float)bq1->d * __low2float(bq8_1[ib32].ds); - return d * (sumi1 * (2*(h1 & 7) + 1) + sumi2 * (2*((h1 >> 4) & 7) + 1) + - sumi3 * (2*(h2 & 7) + 1) + sumi4 * (2*((h2 >> 4) & 7) + 1)); + return d * sumi * (2*(bq1->qh[ib32] >> 12) + 1); #else assert(false); return 0.f; diff --git a/ggml-metal.metal b/ggml-metal.metal index 50185ae4d..912822ee6 100644 --- a/ggml-metal.metal +++ b/ggml-metal.metal @@ -2595,8 +2595,8 @@ typedef struct { typedef struct { half d; - uint8_t qs[QK_K/8]; - uint8_t scales[QK_K/16]; + uint8_t qs[QK_K/8]; + uint16_t qh[QK_K/32]; } block_iq1_s; // Non-linear quants @@ -4338,48 +4338,53 @@ void kernel_mul_mv_iq1_s_f32_impl( device const block_iq1_s * x = (device const block_iq1_s *) src0 + ib_row + offset0; device const float * y = (device const float *) src1 + r1*ne10 + im*ne00*ne1; - float yl[16]; + float yl[32]; float sumf[N_DST]={0.f}, all_sum; const int nb32 = nb * (QK_K / 32); - const int ix = tiisg/2; - const int il = tiisg%2; + const int ix = tiisg; - device const float * y4 = y + 32 * ix + 16 * il; + device const float * y4 = y + 32 * ix; - for (int ib32 = ix; ib32 < nb32; ib32 += 16) { + for (int ib32 = ix; ib32 < nb32; ib32 += 32) { - for (int i = 0; i < 16; ++i) { + float sumy = 0; + for (int i = 0; i < 32; ++i) { yl[i] = y4[i]; + sumy += yl[i]; } const int ibl = ib32 / (QK_K / 32); const int ib = ib32 % (QK_K / 32); device const block_iq1_s * xr = x + ibl; - device const uint8_t * qs = xr->qs + 4 * ib + 2 * il; - device const uint8_t * sc = xr->scales + 2 * ib + il; - device const half * dh = &xr->d; + device const uint8_t * qs = xr->qs + 4 * ib; + device const uint16_t * qh = xr->qh + ib; + device const half * dh = &xr->d; for (int row = 0; row < N_DST; row++) { - constant int8_t * grid1 = (constant int8_t *)(iq1s_grid + (qs[0] | ((sc[0] & 0x08) << 5))); - constant int8_t * grid2 = (constant int8_t *)(iq1s_grid + (qs[1] | ((sc[0] & 0x80) << 1))); + constant uint8_t * grid1 = (constant uint8_t *)(iq1s_grid_gpu + (qs[0] | ((qh[0] << 8) & 0x700))); + constant uint8_t * grid2 = (constant uint8_t *)(iq1s_grid_gpu + (qs[1] | ((qh[0] << 5) & 0x700))); + constant uint8_t * grid3 = (constant uint8_t *)(iq1s_grid_gpu + (qs[2] | ((qh[0] << 2) & 0x700))); + constant uint8_t * grid4 = (constant uint8_t *)(iq1s_grid_gpu + (qs[3] | ((qh[0] >> 1) & 0x700))); - float2 sum = {0}; - for (int j = 0; j < 8; ++j) { - sum[0] += yl[j+ 0] * grid1[j]; - sum[1] += yl[j+ 8] * grid2[j]; + float sum = 0; + for (int j = 0; j < 4; ++j) { + sum += yl[j+ 0] * (grid1[j] & 0xf) + yl[j+ 4] * (grid1[j] >> 4) + + yl[j+ 8] * (grid2[j] & 0xf) + yl[j+12] * (grid2[j] >> 4) + + yl[j+16] * (grid3[j] & 0xf) + yl[j+20] * (grid3[j] >> 4) + + yl[j+24] * (grid4[j] & 0xf) + yl[j+28] * (grid4[j] >> 4); } - sumf[row] += (float)dh[0] * (sum[0] * (2*(sc[0] & 7) + 1) + sum[1] * (2*((sc[0] >> 4) & 7) + 1)); + sumf[row] += (float)dh[0] * (sum - sumy) * (2*(qh[0] >> 12) + 1); dh += nb*sizeof(block_iq1_s)/2; qs += nb*sizeof(block_iq1_s); - sc += nb*sizeof(block_iq1_s); + qh += nb*sizeof(block_iq1_s)/2; } - y4 += 16 * 32; + y4 += 32 * 32; } for (int row = 0; row < N_DST; ++row) { @@ -5066,16 +5071,19 @@ void dequantize_iq2_s(device const block_iq2_s * xb, short il, thread type4x4 & template void dequantize_iq1_s(device const block_iq1_s * xb, short il, thread type4x4 & reg) { // il is 0...15 for QK_K = 256 => index of block of 32 is il/2 + const int ib32 = il/2; + il = il%2; const float d = xb->d; - device const uint8_t * qs = xb->qs + 2*il; - device const uint8_t * sc = xb->scales + il; - const float dl1 = d * (2*(sc[0] & 7) + 1); - const float dl2 = d * (2*((sc[0] >> 4) & 7) + 1); - constant int8_t * grid1 = (constant int8_t *)(iq1s_grid + (qs[0] | ((sc[0] & 0x08) << 5))); - constant int8_t * grid2 = (constant int8_t *)(iq1s_grid + (qs[1] | ((sc[0] & 0x80) << 1))); - for (int i = 0; i < 8; ++i) { - reg[i/4+0][i%4] = dl1 * grid1[i]; - reg[i/4+2][i%4] = dl2 * grid2[i]; + device const uint8_t * qs = xb->qs + 4*ib32 + 2*il; + device const uint16_t * qh = xb->qh; + const float dl = d * (2*(qh[ib32] >> 12) + 1); + constant uint8_t * grid1 = (constant uint8_t *)(iq1s_grid_gpu + (qs[0] | (((qh[ib32] >> (6*il+0)) & 7) << 8))); + constant uint8_t * grid2 = (constant uint8_t *)(iq1s_grid_gpu + (qs[1] | (((qh[ib32] >> (6*il+3)) & 7) << 8))); + for (int i = 0; i < 4; ++i) { + reg[0][i] = dl * (grid1[i] & 0xf) - dl; + reg[1][i] = dl * (grid1[i] >> 4) - dl; + reg[2][i] = dl * (grid2[i] & 0xf) - dl; + reg[3][i] = dl * (grid2[i] >> 4) - dl; } } diff --git a/ggml-quants.c b/ggml-quants.c index 42d8a5d80..f9a3d9fd2 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -3449,39 +3449,22 @@ void dequantize_row_iq1_s(const block_iq1_s * restrict x, float * restrict y, in assert(k % QK_K == 0); const int nb = k / QK_K; - float db[4]; - uint16_t idx[4]; - //const int8_t * grid[4]; - for (int i = 0; i < nb; i++) { const float d = GGML_FP16_TO_FP32(x[i].d); - const uint8_t * sc = x[i].scales; - const uint8_t * qs = x[i].qs; + const uint8_t * qs = x[i].qs; + const uint16_t * qh = x[i].qh; - for (int i8 = 0; i8 < QK_K/8; i8 += 4) { - idx[0] = qs[0] | ((sc[0] & 0x08) << 5); - idx[1] = qs[1] | ((sc[0] & 0x80) << 1); - idx[2] = qs[2] | ((sc[1] & 0x08) << 5); - idx[3] = qs[3] | ((sc[1] & 0x80) << 1); - //grid[0] = (const int8_t *)(iq1s_grid + (qs[0] | ((sc[0] & 0x08) << 5))); - //grid[1] = (const int8_t *)(iq1s_grid + (qs[1] | ((sc[0] & 0x80) << 1))); - //grid[2] = (const int8_t *)(iq1s_grid + (qs[2] | ((sc[1] & 0x08) << 5))); - //grid[3] = (const int8_t *)(iq1s_grid + (qs[3] | ((sc[1] & 0x80) << 1))); - db[0] = d * (2*(sc[0] & 7) + 1); - db[1] = d * (2*((sc[0] >> 4) & 7) + 1); - db[2] = d * (2*(sc[1] & 7) + 1); - db[3] = d * (2*((sc[1] >> 4) & 7) + 1); + for (int ib = 0; ib < QK_K/32; ++ib) { + const float dl = d * (2*(qh[ib] >> 12) + 1); for (int l = 0; l < 4; ++l) { - const int8_t * grid = (const int8_t *)(iq1s_grid + idx[l]); + const int8_t * grid = (const int8_t *)(iq1s_grid + (qs[l] | (((qh[ib] >> 3*l) & 7) << 8))); for (int j = 0; j < 8; ++j) { - //y[j] = db[l] * grid[l][j]; - y[j] = db[l] * grid[j]; + y[j] = dl * grid[j]; } y += 8; } qs += 4; - sc += 2; } } } @@ -9587,113 +9570,72 @@ void ggml_vec_dot_iq1_s_q8_K (int n, float * restrict s, size_t bs, const void const int nb = n / QK_K; - // TODO: implement for QK_K = 64 -#if defined __ARM_NEON && QK_K == 256 +#if defined __ARM_NEON - const uint8x16_t m8 = vdupq_n_u8(0x08); - const uint8x16_t m7 = vdupq_n_u8(0x07); - const uint8x16_t m1 = vdupq_n_u8(0x01); - const int32x4_t vzero = vdupq_n_s32(0); - - uint16_t gindex[8]; - uint16x8x2_t vindex; - int8x16x4_t q1b; + ggml_int8x16x4_t q1b; ggml_int8x16x4_t q8b; - uint16x8x4_t scales; - int32x4x2_t sumi; - int32x4x2_t dotq; float sumf = 0; for (int i = 0; i < nb; ++i) { - const int8_t * q8 = y[i].qs; - const uint8_t * qs = x[i].qs; - const uint8_t * sc = x[i].scales; + const int8_t * q8 = y[i].qs; + const uint8_t * qs = x[i].qs; + const uint16_t * qh = x[i].qh; - sumi.val[0] = sumi.val[1] = vzero; + int sumi1 = 0, sumi2 = 0; - for (int i128 = 0; i128 < QK_K/128; ++i128) { - const uint8x16_t ql = vld1q_u8(qs); qs += 16; - const uint8x8_t tm1 = vld1_u8 (sc); sc += 8; - const uint8x8_t tm2 = vshr_n_u8(tm1, 4); - const uint8x16_t qh = vcombine_u8(vzip1_u8(tm1, tm2), vzip2_u8(tm1, tm2)); - const uint8x16_t hbit = vandq_u8(qh, m8); - vindex.val[0] = vorrq_u16(vmovl_u8(vget_low_u8 (ql)), vshlq_n_u16(vmovl_u8(vget_low_u8 (hbit)), 5)); - vindex.val[1] = vorrq_u16(vmovl_u8(vget_high_u8(ql)), vshlq_n_u16(vmovl_u8(vget_high_u8(hbit)), 5)); - const uint8x16_t scales8 = vorrq_u8(vshlq_n_u8(vandq_u8(qh, m7), 1), m1); - scales.val[0] = vmovl_u8(vget_low_u8 (scales8)); - scales.val[1] = vmovl_u8(vget_high_u8 (scales8)); + for (int ib = 0; ib < QK_K/32; ib += 2) { - for (int l = 0; l < 2; ++l) { - vst1q_u16(gindex+0, vindex.val[l]); - q1b.val[0] = vcombine_s8(vld1_s8((const void *)(iq1s_grid+gindex[0])), vld1_s8((const void *)(iq1s_grid+gindex[1]))); - q1b.val[1] = vcombine_s8(vld1_s8((const void *)(iq1s_grid+gindex[2])), vld1_s8((const void *)(iq1s_grid+gindex[3]))); - q1b.val[2] = vcombine_s8(vld1_s8((const void *)(iq1s_grid+gindex[4])), vld1_s8((const void *)(iq1s_grid+gindex[5]))); - q1b.val[3] = vcombine_s8(vld1_s8((const void *)(iq1s_grid+gindex[6])), vld1_s8((const void *)(iq1s_grid+gindex[7]))); - q8b = ggml_vld1q_s8_x4(q8); q8 += 64; + q1b.val[0] = vcombine_s8(vld1_s8((const int8_t *)(iq1s_grid + (qs[0] | ((qh[ib+0] << 8) & 0x700)))), + vld1_s8((const int8_t *)(iq1s_grid + (qs[1] | ((qh[ib+0] << 5) & 0x700))))); + q1b.val[1] = vcombine_s8(vld1_s8((const int8_t *)(iq1s_grid + (qs[2] | ((qh[ib+0] << 2) & 0x700)))), + vld1_s8((const int8_t *)(iq1s_grid + (qs[3] | ((qh[ib+0] >> 1) & 0x700))))); + q1b.val[2] = vcombine_s8(vld1_s8((const int8_t *)(iq1s_grid + (qs[4] | ((qh[ib+1] << 8) & 0x700)))), + vld1_s8((const int8_t *)(iq1s_grid + (qs[5] | ((qh[ib+1] << 5) & 0x700))))); + q1b.val[3] = vcombine_s8(vld1_s8((const int8_t *)(iq1s_grid + (qs[6] | ((qh[ib+1] << 2) & 0x700)))), + vld1_s8((const int8_t *)(iq1s_grid + (qs[7] | ((qh[ib+1] >> 1) & 0x700))))); + qs += 8; - dotq.val[0] = vpaddq_s32(ggml_vdotq_s32(vzero, q1b.val[0], q8b.val[0]), ggml_vdotq_s32(vzero, q1b.val[1], q8b.val[1])); - dotq.val[1] = vpaddq_s32(ggml_vdotq_s32(vzero, q1b.val[2], q8b.val[2]), ggml_vdotq_s32(vzero, q1b.val[3], q8b.val[3])); + q8b = ggml_vld1q_s8_x4(q8); q8 += 64; + + const int32x4_t p1 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q1b.val[0], q8b.val[0]), q1b.val[1], q8b.val[1]); + const int32x4_t p2 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q1b.val[2], q8b.val[2]), q1b.val[3], q8b.val[3]); + + sumi1 += vaddvq_s32(p1) * (2*(qh[ib+0] >> 12) + 1); + sumi2 += vaddvq_s32(p2) * (2*(qh[ib+1] >> 12) + 1); - sumi.val[0] = vmlaq_s32(sumi.val[0], dotq.val[0], vreinterpretq_s32_u32(vmovl_u16(vget_low_u16 (scales.val[l])))); - sumi.val[1] = vmlaq_s32(sumi.val[1], dotq.val[1], vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(scales.val[l])))); - } } - sumf += y[i].d * GGML_FP16_TO_FP32(x[i].d) * vaddvq_s32(vaddq_s32(sumi.val[0], sumi.val[1])); + sumf += y[i].d * GGML_FP16_TO_FP32(x[i].d) * (sumi1 + sumi2); } *s = sumf; - // TODO: implement for QK_K = 64 -#elif defined __AVX2__ && QK_K == 256 - - const __m128i m8 = _mm_set1_epi8(0x08); - const __m128i m7 = _mm_set1_epi8(0x07); - const __m128i m1 = _mm_set1_epi8(0x01); - const __m128i shuffle_h = _mm_set_epi8(15, 7, 14, 6, 13, 5, 12, 4, 11, 3, 10, 2, 9, 1, 8, 0); - const __m128i shuffle_s[4] = { - _mm_set_epi32(0x03030303, 0x02020202, 0x01010101, 0x00000000), - _mm_set_epi32(0x07070707, 0x06060606, 0x05050505, 0x04040404), - _mm_set_epi32(0x0b0b0b0b, 0x0a0a0a0a, 0x09090909, 0x08080808), - _mm_set_epi32(0x0f0f0f0f, 0x0e0e0e0e, 0x0d0d0d0d, 0x0c0c0c0c) - }; - - uint64_t aux64; - - typedef union m256i_uint16 { - __m256i reg; - uint16_t s[16]; - } m256i_uint16_t; - - m256i_uint16_t v_gindex; +#elif defined __AVX2__ __m256 accum = _mm256_setzero_ps(); for (int i = 0; i < nb; ++i) { - const int8_t * q8 = y[i].qs; - const uint8_t * qs = x[i].qs; - const uint8_t * sc = x[i].scales; + const int8_t * q8 = y[i].qs; + const uint8_t * qs = x[i].qs; + const uint16_t * qh = x[i].qh; __m256i sumi = _mm256_setzero_si256(); - for (int i128 = 0; i128 < QK_K/128; ++i128) { - const __m128i ql = _mm_loadu_si128((const __m128i*)qs); qs += 16; - memcpy(&aux64, sc, 8); sc += 8; - const __m128i qh = _mm_shuffle_epi8(_mm_set_epi64x(aux64 >> 4, aux64), shuffle_h); - const __m256i hbit = _mm256_cvtepu8_epi16(_mm_and_si128(qh, m8)); - v_gindex.reg = _mm256_or_si256(_mm256_cvtepu8_epi16(ql), _mm256_slli_epi16(hbit, 5)); - const __m128i scales = _mm_or_si128(_mm_slli_epi16(_mm_and_si128(qh, m7), 1), m1); + for (int ib = 0; ib < QK_K/32; ib += 2) { + const __m256i q1b_1 = _mm256_set_epi64x(iq1s_grid[qs[3] | ((qh[ib+0] >> 1) & 0x700)], iq1s_grid[qs[2] | ((qh[ib+0] << 2) & 0x700)], + iq1s_grid[qs[1] | ((qh[ib+0] << 5) & 0x700)], iq1s_grid[qs[0] | ((qh[ib+0] << 8) & 0x700)]); + const __m256i q1b_2 = _mm256_set_epi64x(iq1s_grid[qs[7] | ((qh[ib+1] >> 1) & 0x700)], iq1s_grid[qs[6] | ((qh[ib+1] << 2) & 0x700)], + iq1s_grid[qs[5] | ((qh[ib+1] << 5) & 0x700)], iq1s_grid[qs[4] | ((qh[ib+1] << 8) & 0x700)]); + qs += 8; + const __m256i q8b_1 = _mm256_loadu_si256((const __m256i*)q8); q8 += 32; + const __m256i q8b_2 = _mm256_loadu_si256((const __m256i*)q8); q8 += 32; - for (int i32 = 0; i32 < 4; ++i32) { - const __m256i q8b = _mm256_loadu_si256((const __m256i*)q8); q8 += 32; - const __m256i q1b = _mm256_set_epi64x(iq1s_grid[v_gindex.s[4*i32+3]], iq1s_grid[v_gindex.s[4*i32+2]], - iq1s_grid[v_gindex.s[4*i32+1]], iq1s_grid[v_gindex.s[4*i32+0]]); - const __m256i dot = mul_add_epi8(q1b, q8b); - const __m256i s16 = _mm256_cvtepi8_epi16(_mm_shuffle_epi8(scales, shuffle_s[i32])); - const __m256i p = _mm256_madd_epi16(s16, dot); - sumi = _mm256_add_epi32(sumi, p); - } + const __m256i dot1 = mul_add_epi8(q1b_1, q8b_1); + const __m256i dot2 = mul_add_epi8(q1b_2, q8b_2); + const __m256i p1 = _mm256_madd_epi16(dot1, _mm256_set1_epi16(2*(qh[ib+0] >> 12) + 1)); + const __m256i p2 = _mm256_madd_epi16(dot2, _mm256_set1_epi16(2*(qh[ib+1] >> 12) + 1)); + sumi = _mm256_add_epi32(sumi, _mm256_add_epi32(p1, p2)); } accum = _mm256_fmadd_ps(_mm256_set1_ps(y[i].d * GGML_FP16_TO_FP32(x[i].d)), _mm256_cvtepi32_ps(sumi), accum); @@ -9704,35 +9646,26 @@ void ggml_vec_dot_iq1_s_q8_K (int n, float * restrict s, size_t bs, const void #else - int db[4]; - uint16_t idx[4]; - float sumf = 0; - for (int i = 0; i < nb; ++i) { + for (int i = 0; i < nb; i++) { - const int8_t * q8 = y[i].qs; - const uint8_t * qs = x[i].qs; - const uint8_t * sc = x[i].scales; + const int8_t * q8 = y[i].qs; + const uint8_t * qs = x[i].qs; + const uint16_t * qh = x[i].qh; int sumi = 0; - for (int i32 = 0; i32 < QK_K/32; ++i32) { - idx[0] = qs[0] | ((sc[0] & 0x08) << 5); - idx[1] = qs[1] | ((sc[0] & 0x80) << 1); - idx[2] = qs[2] | ((sc[1] & 0x08) << 5); - idx[3] = qs[3] | ((sc[1] & 0x80) << 1); - db[0] = (2*(sc[0] & 7) + 1); - db[1] = (2*((sc[0] >> 4) & 7) + 1); - db[2] = (2*(sc[1] & 7) + 1); - db[3] = (2*((sc[1] >> 4) & 7) + 1); + for (int ib = 0; ib < QK_K/32; ++ib) { + const int ls = 2*(qh[ib] >> 12) + 1; + int lsum = 0; for (int l = 0; l < 4; ++l) { - const int8_t * grid = (const int8_t *)(iq1s_grid + idx[l]); - int suml = 0; - for (int j = 0; j < 8; ++j) suml += q8[j] * grid[j]; - sumi += db[l] * suml; + const int8_t * grid = (const int8_t *)(iq1s_grid + (qs[l] | (((qh[ib] >> 3*l) & 7) << 8))); + for (int j = 0; j < 8; ++j) { + lsum += q8[j] * grid[j]; + } q8 += 8; } + sumi += ls * lsum; qs += 4; - sc += 2; } sumf += GGML_FP16_TO_FP32(x[i].d) * y[i].d * sumi; @@ -9996,7 +9929,7 @@ static inline int iq2_grid_size(enum ggml_type type) { GGML_ASSERT(type == GGML_TYPE_IQ2_XXS || type == GGML_TYPE_IQ2_XS || type == GGML_TYPE_IQ1_S || type == GGML_TYPE_IQ2_S); return type == GGML_TYPE_IQ2_XXS ? 256 : type == GGML_TYPE_IQ2_XS ? 512 : - type == GGML_TYPE_IQ1_S ? 512 : 1024; + type == GGML_TYPE_IQ1_S ? NGRID_IQ1S : 1024; } static int iq2_compare_func(const void * left, const void * right) { @@ -10063,39 +9996,135 @@ void iq2xs_init_impl(enum ggml_type type) { 40962, 40968, 40970, 40992, 41002, 41120, 41297, 41305, 41382, 41472, 41474, 41480, 41514, 41600, 41632, 42048, 42133, 42597, 42648, 43018, 43040, 43042, 43048, 43168, 43176, 43268, 43396, 43398, 43560, 43562, 43665, 43690, }; - static const uint16_t kgrid_1bit_512[512] = { - 10, 33, 41, 85, 132, 134, 160, 162, 277, 337, 340, 345, 357, 405, 516, 545, - 553, 598, 641, 650, 681, 1042, 1044, 1097, 1169, 1176, 1320, 1345, 1365, 1378, 1434, 1444, - 1545, 1617, 1642, 1685, 2053, 2080, 2089, 2133, 2176, 2182, 2208, 2214, 2306, 2384, 2393, 2440, - 2453, 2581, 2664, 2690, 2721, 4117, 4161, 4182, 4184, 4261, 4357, 4369, 4372, 4377, 4390, 4422, - 4432, 4437, 4449, 4457, 4485, 4497, 4505, 4629, 4677, 4696, 4774, 5205, 5217, 5225, 5386, 5397, - 5409, 5445, 5457, 5460, 5461, 5462, 5465, 5472, 5477, 5525, 5545, 5650, 5668, 5717, 5729, 5769, - 5777, 6212, 6234, 6244, 6293, 6424, 6482, 6485, 6502, 6505, 6529, 6538, 6565, 6656, 6682, 6788, - 6806, 6820, 8218, 8224, 8226, 8232, 8277, 8326, 8354, 8469, 8521, 8530, 8549, 8596, 8737, 8794, - 9221, 9253, 9348, 9369, 9380, 9474, 9557, 9633, 9732, 9753, 9793, 9830, 9862, 9880, 10240, 10272, - 10282, 10321, 10406, 10517, 10530, 10566, 10585, 10645, 10896, 16466, 16468, 16473, 16485, 16646, 16660, 16665, - 16725, 16793, 16806, 16914, 16969, 16977, 16996, 17028, 17057, 17408, 17416, 17434, 17493, 17512, 17578, 17685, - 17696, 17733, 17745, 17748, 17749, 17750, 17753, 17765, 17794, 17813, 17946, 17984, 18005, 18072, 18453, 18529, - 18569, 18722, 18756, 18762, 18773, 18794, 18833, 18853, 18945, 19026, 19033, 19077, 20489, 20497, 20500, 20517, - 20565, 20586, 20610, 20633, 20757, 20769, 20776, 20805, 20817, 20820, 20821, 20822, 20825, 20837, 20864, 20872, - 20885, 20896, 21002, 21029, 21077, 21146, 21510, 21525, 21573, 21585, 21588, 21589, 21590, 21593, 21605, 21653, - 21665, 21765, 21777, 21780, 21781, 21782, 21785, 21797, 21825, 21828, 21829, 21830, 21833, 21840, 21841, 21842, - 21844, 21846, 21848, 21849, 21850, 21857, 21860, 21861, 21862, 21865, 21893, 21905, 21908, 21909, 21910, 21913, - 21925, 22024, 22037, 22085, 22097, 22100, 22101, 22102, 22105, 22117, 22165, 22545, 22566, 22568, 22594, 22608, - 22613, 22676, 22697, 22793, 22805, 22853, 22865, 22868, 22869, 22870, 22873, 22885, 22933, 22946, 23046, 23072, - 23125, 23209, 24597, 24640, 24665, 24673, 24725, 24833, 24840, 24869, 24917, 24934, 24965, 25001, 25108, 25110, - 25152, 25184, 25192, 25234, 25616, 25618, 25625, 25685, 25704, 25738, 25744, 25770, 25877, 25897, 25925, 25937, - 25940, 25941, 25942, 25945, 25957, 25986, 26005, 26186, 26197, 26276, 26632, 26634, 26725, 26757, 26770, 26885, - 26965, 26976, 26986, 27032, 27153, 27174, 27200, 27208, 27240, 27269, 27282, 27290, 32778, 32800, 32802, 32808, - 32810, 32853, 32904, 32922, 32930, 32932, 33105, 33110, 33112, 33125, 33157, 33280, 33288, 33301, 33312, 33320, - 33424, 33797, 33829, 33858, 34068, 34133, 34146, 34176, 34217, 34306, 34342, 34441, 34454, 34468, 34832, 34918, - 34965, 34984, 35094, 35137, 35161, 35208, 35232, 35332, 35338, 35368, 35429, 36932, 36934, 36953, 37009, 37125, - 37136, 37138, 37145, 37157, 37205, 37220, 37258, 37290, 37444, 37446, 37465, 37478, 37525, 37905, 37968, 37973, - 38040, 38054, 38145, 38154, 38165, 38180, 38186, 38213, 38225, 38228, 38229, 38230, 38233, 38245, 38293, 38485, - 38504, 38530, 38938, 38985, 38993, 39012, 39040, 39173, 39192, 39253, 39265, 39301, 39316, 39322, 39442, 39497, - 39504, 39590, 40970, 40984, 40992, 41002, 41045, 41120, 41128, 41237, 41289, 41297, 41317, 41364, 41366, 41514, - 41557, 41633, 41989, 42021, 42056, 42068, 42074, 42113, 42242, 42265, 42274, 42325, 42340, 42402, 42501, 42512, - 42533, 42624, 42632, 42666, 43040, 43093, 43106, 43168, 43176, 43264, 43286, 43345, 43429, 43590, 43618, 43680, + static const uint16_t kgrid_1bit_2048[NGRID_IQ1S] = { + 0, 2, 5, 8, 10, 17, 21, 32, 34, 40, 42, 69, 81, 84, 86, 101, + 128, 130, 136, 138, 149, 160, 162, 168, 170, 260, 261, 273, 276, 278, 281, 282, + 293, 321, 326, 329, 338, 341, 346, 353, 356, 358, 360, 389, 401, 404, 406, 421, + 512, 514, 520, 522, 533, 544, 546, 552, 554, 581, 593, 601, 612, 617, 640, 642, + 648, 650, 657, 661, 665, 672, 674, 680, 682, 1041, 1044, 1046, 1061, 1089, 1097, 1109, + 1114, 1124, 1125, 1169, 1177, 1189, 1281, 1284, 1285, 1286, 1301, 1304, 1306, 1321, 1344, 1349, + 1354, 1360, 1361, 1364, 1365, 1366, 1369, 1376, 1378, 1381, 1384, 1386, 1409, 1425, 1429, 1432, + 1434, 1441, 1444, 1445, 1446, 1449, 1556, 1561, 1601, 1604, 1616, 1618, 1621, 1624, 1632, 1633, + 1638, 1641, 1669, 1681, 1684, 1689, 2048, 2050, 2056, 2058, 2069, 2080, 2082, 2088, 2090, 2117, + 2129, 2134, 2149, 2176, 2178, 2184, 2186, 2197, 2208, 2210, 2216, 2218, 2309, 2321, 2324, 2329, + 2340, 2341, 2369, 2384, 2385, 2389, 2401, 2404, 2409, 2449, 2452, 2454, 2457, 2469, 2560, 2562, + 2568, 2570, 2581, 2592, 2594, 2600, 2602, 2629, 2641, 2649, 2657, 2661, 2688, 2690, 2693, 2696, + 2698, 2709, 2720, 2722, 2728, 2730, 4112, 4113, 4116, 4121, 4132, 4133, 4161, 4164, 4176, 4181, + 4184, 4193, 4196, 4197, 4201, 4241, 4244, 4246, 4257, 4261, 4353, 4356, 4358, 4361, 4368, 4370, + 4373, 4376, 4385, 4388, 4393, 4421, 4426, 4432, 4433, 4434, 4436, 4437, 4438, 4441, 4448, 4453, + 4484, 4498, 4501, 4513, 4516, 4625, 4628, 4630, 4645, 4672, 4678, 4681, 4690, 4693, 4696, 4698, + 4708, 4710, 4741, 4753, 4756, 4758, 4773, 5121, 5126, 5129, 5140, 5141, 5144, 5145, 5153, 5158, + 5185, 5189, 5190, 5192, 5194, 5201, 5204, 5205, 5206, 5209, 5218, 5221, 5224, 5252, 5257, 5264, + 5268, 5269, 5272, 5273, 5274, 5281, 5284, 5285, 5289, 5378, 5381, 5386, 5393, 5396, 5397, 5398, + 5401, 5408, 5410, 5413, 5416, 5418, 5441, 5444, 5445, 5446, 5457, 5458, 5460, 5461, 5462, 5465, + 5466, 5473, 5476, 5477, 5478, 5481, 5504, 5506, 5508, 5509, 5512, 5514, 5520, 5521, 5524, 5525, + 5526, 5529, 5530, 5536, 5538, 5541, 5633, 5636, 5637, 5638, 5653, 5654, 5656, 5658, 5665, 5670, + 5696, 5698, 5700, 5701, 5704, 5706, 5713, 5717, 5718, 5720, 5721, 5729, 5732, 5733, 5736, 5737, + 5738, 5766, 5770, 5778, 5781, 5796, 5801, 6161, 6166, 6181, 6209, 6212, 6214, 6217, 6224, 6229, + 6232, 6234, 6240, 6241, 6244, 6246, 6249, 6277, 6289, 6292, 6309, 6416, 6418, 6421, 6426, 6433, + 6437, 6466, 6468, 6469, 6472, 6481, 6484, 6485, 6486, 6489, 6490, 6496, 6501, 6506, 6537, 6545, + 6546, 6549, 6552, 6561, 6566, 6569, 6665, 6678, 6692, 6694, 6724, 6726, 6729, 6736, 6738, 6741, + 6744, 6753, 6758, 6761, 6789, 6801, 6806, 6810, 8192, 8194, 8200, 8202, 8213, 8224, 8226, 8229, + 8232, 8234, 8261, 8273, 8281, 8289, 8293, 8320, 8322, 8328, 8330, 8341, 8352, 8354, 8357, 8360, + 8362, 8453, 8465, 8468, 8473, 8485, 8514, 8516, 8521, 8533, 8536, 8538, 8545, 8548, 8549, 8550, + 8581, 8592, 8598, 8601, 8613, 8705, 8712, 8714, 8721, 8725, 8736, 8738, 8744, 8746, 8773, 8785, + 8790, 8793, 8805, 8833, 8840, 8842, 8849, 8853, 8864, 8866, 8872, 8874, 9221, 9236, 9238, 9241, + 9253, 9284, 9285, 9286, 9289, 9298, 9301, 9304, 9306, 9318, 9349, 9361, 9364, 9369, 9377, 9381, + 9481, 9493, 9505, 9513, 9536, 9541, 9544, 9553, 9556, 9557, 9561, 9570, 9573, 9576, 9609, 9616, + 9620, 9621, 9624, 9626, 9633, 9636, 9638, 9641, 9733, 9744, 9746, 9753, 9765, 9793, 9801, 9813, + 9824, 9825, 9833, 9860, 9862, 9872, 9882, 10240, 10242, 10248, 10250, 10261, 10272, 10274, 10280, 10282, + 10309, 10321, 10324, 10341, 10368, 10370, 10376, 10378, 10400, 10402, 10408, 10410, 10505, 10513, 10516, 10521, + 10533, 10566, 10569, 10578, 10581, 10593, 10596, 10598, 10601, 10629, 10640, 10646, 10649, 10660, 10661, 10752, + 10754, 10760, 10762, 10784, 10786, 10792, 10794, 10821, 10833, 10838, 10841, 10853, 10880, 10882, 10888, 10890, + 10901, 10912, 10914, 10920, 10922, 16389, 16401, 16406, 16421, 16457, 16466, 16469, 16472, 16474, 16481, 16484, + 16486, 16532, 16537, 16545, 16550, 16640, 16641, 16644, 16646, 16649, 16658, 16661, 16662, 16664, 16666, 16673, + 16678, 16681, 16709, 16712, 16714, 16721, 16724, 16725, 16726, 16729, 16730, 16741, 16744, 16746, 16769, 16772, + 16774, 16784, 16786, 16789, 16800, 16801, 16802, 16901, 16913, 16916, 16918, 16933, 16961, 16978, 16981, 16986, + 16996, 17001, 17033, 17044, 17061, 17409, 17429, 17433, 17449, 17477, 17480, 17482, 17489, 17492, 17493, 17494, + 17505, 17506, 17509, 17512, 17514, 17537, 17542, 17545, 17552, 17554, 17557, 17568, 17569, 17577, 17665, 17666, + 17669, 17674, 17681, 17684, 17685, 17686, 17689, 17696, 17701, 17706, 17729, 17732, 17733, 17734, 17737, 17744, + 17745, 17748, 17749, 17750, 17752, 17753, 17761, 17764, 17765, 17766, 17769, 17794, 17796, 17797, 17800, 17809, + 17812, 17813, 17814, 17817, 17818, 17829, 17832, 17834, 17921, 17925, 17929, 17940, 17941, 17944, 17946, 17953, + 17956, 17961, 17984, 17986, 17989, 17992, 18000, 18001, 18002, 18005, 18006, 18009, 18018, 18021, 18024, 18049, + 18053, 18058, 18068, 18069, 18081, 18084, 18086, 18437, 18449, 18453, 18458, 18469, 18498, 18505, 18512, 18517, + 18520, 18529, 18532, 18534, 18537, 18565, 18577, 18580, 18582, 18585, 18597, 18689, 18693, 18694, 18698, 18704, + 18708, 18709, 18712, 18721, 18724, 18726, 18752, 18757, 18762, 18769, 18770, 18772, 18773, 18774, 18777, 18784, + 18786, 18789, 18790, 18794, 18822, 18825, 18834, 18837, 18838, 18840, 18849, 18852, 18854, 18857, 18966, 19012, + 19014, 19017, 19029, 19032, 19034, 19044, 19049, 19092, 19109, 20481, 20484, 20485, 20486, 20489, 20498, 20501, + 20506, 20513, 20516, 20521, 20544, 20549, 20552, 20561, 20564, 20565, 20566, 20569, 20581, 20584, 20614, 20617, + 20629, 20632, 20640, 20641, 20646, 20649, 20741, 20744, 20745, 20746, 20753, 20756, 20757, 20758, 20760, 20761, + 20768, 20773, 20774, 20776, 20778, 20801, 20804, 20805, 20806, 20809, 20816, 20817, 20818, 20820, 20821, 20822, + 20824, 20825, 20826, 20833, 20836, 20837, 20838, 20841, 20866, 20869, 20881, 20884, 20885, 20886, 20889, 20896, + 20901, 20906, 20993, 20998, 21010, 21013, 21018, 21025, 21028, 21058, 21061, 21066, 21073, 21076, 21077, 21078, + 21081, 21090, 21093, 21125, 21136, 21138, 21141, 21145, 21146, 21156, 21508, 21509, 21521, 21524, 21525, 21526, + 21528, 21529, 21537, 21541, 21544, 21546, 21569, 21572, 21573, 21574, 21577, 21578, 21584, 21585, 21588, 21589, + 21590, 21592, 21593, 21594, 21601, 21602, 21604, 21605, 21606, 21609, 21632, 21640, 21642, 21649, 21652, 21653, + 21654, 21657, 21665, 21668, 21669, 21674, 21761, 21762, 21764, 21765, 21766, 21769, 21776, 21777, 21778, 21780, + 21781, 21782, 21785, 21786, 21793, 21796, 21797, 21798, 21801, 21824, 21825, 21826, 21828, 21829, 21830, 21832, + 21833, 21840, 21841, 21842, 21844, 21845, 21846, 21848, 21849, 21850, 21856, 21857, 21860, 21861, 21862, 21864, + 21865, 21866, 21889, 21892, 21893, 21897, 21898, 21904, 21905, 21908, 21909, 21910, 21912, 21913, 21921, 21924, + 21925, 21926, 21929, 22016, 22017, 22018, 22020, 22022, 22024, 22025, 22033, 22036, 22037, 22040, 22041, 22048, + 22049, 22050, 22052, 22053, 22054, 22056, 22057, 22081, 22085, 22086, 22088, 22089, 22090, 22096, 22097, 22098, + 22100, 22101, 22102, 22104, 22105, 22106, 22113, 22116, 22117, 22121, 22146, 22149, 22150, 22152, 22153, 22154, + 22161, 22165, 22170, 22178, 22181, 22182, 22184, 22185, 22532, 22533, 22534, 22537, 22544, 22549, 22552, 22561, + 22570, 22597, 22600, 22602, 22609, 22612, 22613, 22614, 22616, 22617, 22624, 22626, 22628, 22629, 22658, 22665, + 22672, 22674, 22677, 22680, 22689, 22697, 22785, 22786, 22789, 22794, 22801, 22804, 22805, 22806, 22809, 22821, + 22849, 22852, 22853, 22854, 22857, 22864, 22865, 22866, 22868, 22869, 22870, 22872, 22873, 22874, 22881, 22884, + 22885, 22886, 22889, 22913, 22917, 22921, 22929, 22932, 22933, 22934, 22936, 22937, 22949, 23044, 23048, 23061, + 23066, 23072, 23077, 23078, 23081, 23109, 23112, 23113, 23121, 23125, 23126, 23128, 23129, 23138, 23141, 23144, + 23146, 23169, 23178, 23186, 23189, 23190, 23192, 23194, 23201, 24581, 24596, 24598, 24601, 24613, 24644, 24656, + 24661, 24662, 24664, 24666, 24673, 24676, 24678, 24681, 24705, 24726, 24741, 24833, 24836, 24838, 24841, 24850, + 24853, 24865, 24866, 24870, 24873, 24901, 24905, 24913, 24917, 24918, 24921, 24933, 24934, 24938, 24964, 24970, + 24978, 24981, 24993, 24998, 25001, 25105, 25110, 25113, 25152, 25153, 25158, 25173, 25174, 25176, 25184, 25221, + 25233, 25238, 25253, 25617, 25618, 25621, 25622, 25626, 25633, 25638, 25641, 25664, 25666, 25669, 25672, 25674, + 25681, 25684, 25685, 25686, 25689, 25690, 25696, 25698, 25701, 25732, 25733, 25737, 25744, 25746, 25748, 25749, + 25750, 25752, 25754, 25761, 25764, 25769, 25861, 25864, 25866, 25873, 25877, 25878, 25881, 25924, 25925, 25926, + 25929, 25936, 25937, 25940, 25941, 25942, 25945, 25953, 25956, 25957, 25958, 25961, 25990, 25993, 25994, 26001, + 26005, 26006, 26009, 26010, 26018, 26021, 26022, 26024, 26114, 26121, 26133, 26144, 26150, 26152, 26153, 26176, + 26181, 26184, 26186, 26193, 26196, 26197, 26198, 26200, 26202, 26208, 26213, 26216, 26240, 26242, 26245, 26250, + 26260, 26262, 26264, 26265, 26272, 26276, 26278, 26282, 26646, 26649, 26661, 26689, 26706, 26709, 26714, 26721, + 26729, 26757, 26769, 26776, 26790, 26881, 26884, 26896, 26901, 26913, 26916, 26918, 26921, 26944, 26945, 26949, + 26950, 26952, 26961, 26964, 26965, 26966, 26969, 26976, 26981, 26986, 27010, 27012, 27018, 27029, 27041, 27044, + 27045, 27049, 27153, 27158, 27160, 27201, 27204, 27209, 27216, 27221, 27224, 27226, 27236, 27237, 27241, 27270, + 27284, 27288, 27290, 27302, 32768, 32770, 32776, 32778, 32800, 32802, 32808, 32810, 32837, 32848, 32849, 32852, + 32854, 32857, 32869, 32896, 32898, 32904, 32906, 32917, 32928, 32930, 32936, 32938, 33029, 33041, 33044, 33046, + 33049, 33061, 33089, 33092, 33097, 33104, 33106, 33109, 33110, 33112, 33113, 33124, 33126, 33129, 33157, 33161, + 33172, 33174, 33177, 33189, 33280, 33282, 33288, 33290, 33301, 33312, 33314, 33320, 33322, 33361, 33364, 33369, + 33381, 33408, 33410, 33416, 33418, 33429, 33440, 33442, 33448, 33450, 33812, 33817, 33857, 33860, 33873, 33877, + 33882, 33889, 33892, 33897, 33940, 33945, 34049, 34057, 34066, 34069, 34074, 34086, 34089, 34112, 34113, 34117, + 34120, 34129, 34132, 34133, 34134, 34137, 34138, 34149, 34150, 34152, 34154, 34177, 34180, 34182, 34185, 34192, + 34194, 34197, 34200, 34214, 34321, 34326, 34329, 34341, 34369, 34372, 34377, 34378, 34384, 34389, 34393, 34394, + 34401, 34406, 34410, 34437, 34449, 34458, 34468, 34816, 34818, 34824, 34826, 34837, 34848, 34850, 34856, 34858, + 34881, 34885, 34897, 34900, 34905, 34917, 34921, 34944, 34946, 34952, 34954, 34965, 34976, 34978, 34984, 34986, + 35077, 35078, 35089, 35092, 35094, 35109, 35137, 35140, 35142, 35145, 35152, 35154, 35157, 35162, 35169, 35172, + 35205, 35222, 35225, 35237, 35328, 35330, 35336, 35338, 35349, 35360, 35362, 35368, 35370, 35397, 35409, 35412, + 35414, 35456, 35458, 35464, 35466, 35477, 35488, 35490, 35496, 35498, 36869, 36881, 36886, 36888, 36889, 36901, + 36929, 36934, 36937, 36949, 36952, 36954, 36969, 36970, 36997, 37009, 37012, 37014, 37017, 37029, 37121, 37124, + 37126, 37129, 37136, 37141, 37144, 37146, 37153, 37156, 37158, 37161, 37184, 37189, 37200, 37201, 37204, 37205, + 37206, 37209, 37218, 37221, 37252, 37254, 37266, 37269, 37272, 37281, 37284, 37286, 37289, 37381, 37393, 37396, + 37401, 37413, 37444, 37446, 37449, 37456, 37458, 37461, 37464, 37478, 37481, 37509, 37524, 37526, 37545, 37889, + 37892, 37894, 37904, 37909, 37912, 37926, 37952, 37962, 37969, 37972, 37973, 37974, 37976, 37977, 37984, 37985, + 37986, 37989, 38020, 38022, 38034, 38036, 38037, 38040, 38049, 38057, 38144, 38149, 38152, 38154, 38160, 38161, + 38164, 38165, 38166, 38169, 38177, 38181, 38185, 38186, 38209, 38212, 38213, 38214, 38217, 38224, 38225, 38226, + 38228, 38229, 38230, 38232, 38233, 38234, 38241, 38244, 38245, 38246, 38249, 38273, 38277, 38280, 38289, 38290, + 38292, 38293, 38294, 38297, 38298, 38304, 38306, 38309, 38312, 38314, 38401, 38404, 38416, 38421, 38425, 38432, + 38438, 38441, 38469, 38472, 38473, 38481, 38482, 38485, 38486, 38489, 38501, 38504, 38530, 38532, 38537, 38538, + 38546, 38548, 38549, 38564, 38566, 38569, 38917, 38934, 38937, 38949, 38977, 38982, 38992, 38994, 38997, 38998, + 39002, 39012, 39013, 39045, 39057, 39062, 39065, 39077, 39172, 39174, 39177, 39184, 39186, 39189, 39192, 39194, + 39200, 39201, 39204, 39206, 39232, 39234, 39237, 39240, 39242, 39249, 39252, 39253, 39254, 39257, 39266, 39269, + 39270, 39274, 39297, 39300, 39312, 39314, 39317, 39322, 39329, 39334, 39429, 39445, 39461, 39492, 39494, 39497, + 39504, 39509, 39512, 39521, 39557, 39569, 39572, 39573, 39574, 40960, 40962, 40968, 40970, 40981, 40992, 40994, + 41000, 41002, 41029, 41041, 41044, 41046, 41049, 41088, 41090, 41096, 41098, 41109, 41120, 41122, 41128, 41130, + 41221, 41225, 41233, 41236, 41238, 41241, 41242, 41286, 41289, 41297, 41301, 41304, 41306, 41313, 41316, 41349, + 41360, 41362, 41366, 41369, 41474, 41480, 41482, 41488, 41497, 41506, 41512, 41514, 41541, 41553, 41558, 41561, + 41573, 41600, 41602, 41608, 41610, 41621, 41632, 41634, 41640, 41642, 42009, 42021, 42049, 42052, 42064, 42068, + 42069, 42072, 42074, 42081, 42085, 42086, 42088, 42089, 42117, 42246, 42249, 42256, 42258, 42261, 42264, 42278, + 42281, 42306, 42309, 42321, 42324, 42325, 42326, 42329, 42341, 42346, 42369, 42372, 42373, 42374, 42377, 42386, + 42389, 42392, 42501, 42513, 42518, 42522, 42529, 42533, 42564, 42566, 42570, 42578, 42581, 42582, 42584, 42592, + 42594, 42630, 42640, 42645, 42646, 42649, 42657, 42660, 42662, 43008, 43010, 43016, 43018, 43040, 43042, 43048, + 43050, 43089, 43092, 43094, 43097, 43136, 43138, 43144, 43146, 43157, 43168, 43170, 43176, 43178, 43269, 43284, + 43289, 43297, 43301, 43329, 43344, 43349, 43354, 43361, 43366, 43369, 43408, 43414, 43520, 43522, 43528, 43530, + 43552, 43554, 43560, 43562, 43601, 43604, 43606, 43648, 43650, 43656, 43658, 43669, 43680, 43682, 43688, 43690, }; static const uint16_t kgrid_2bit_1024[1024] = { 0, 2, 5, 8, 10, 17, 20, 22, 25, 32, 34, 37, 40, 65, 68, 70, @@ -10169,7 +10198,7 @@ void iq2xs_init_impl(enum ggml_type type) { const int nwant = type == GGML_TYPE_IQ1_S ? 3 : type == GGML_TYPE_IQ2_S ? 1 : 2; const uint16_t * kgrid = type == GGML_TYPE_IQ2_XXS ? kgrid_2bit_256 : type == GGML_TYPE_IQ2_XS ? kgrid_2bit_512 : - type == GGML_TYPE_IQ1_S ? kgrid_1bit_512 : kgrid_2bit_1024; + type == GGML_TYPE_IQ1_S ? kgrid_1bit_2048 : kgrid_2bit_1024; uint64_t * kgrid_q2xs; int * kmap_q2xs; uint16_t * kneighbors_q2xs; @@ -11408,12 +11437,70 @@ static int iq1_find_best_neighbour(const uint16_t * restrict neighbours, const u return grid_index; } +static int iq1_find_best_neighbour2(const uint16_t * restrict neighbours, const uint64_t * restrict grid, + const float * restrict xval, const float * restrict weight, float scale, int8_t * restrict L, int ngrid) { + int num_neighbors = neighbours[0]; + GGML_ASSERT(num_neighbors > 0); + float best_score = FLT_MAX; + int grid_index = -1; + for (int j = 1; j <= num_neighbors; ++j) { + const int8_t * pg = (const int8_t *)(grid + neighbours[j]); + float d2 = 0; + for (int i = 0; i < 8; ++i) { + float q = (pg[i] - 3)/2; + float w = weight[i]; + float diff = scale*q - xval[i]; + d2 += w*diff*diff; + } + if (d2 < best_score) { + best_score = d2; + grid_index = neighbours[j]; + } + } + if (grid_index < 0) { + for (int i = 0; i < ngrid; ++i) { + const int8_t * grid_i = (const int8_t *)(grid + i); + float d2 = 0; + for (int j = 0; j < 8; ++j) { + float w = weight[j]; + float q = (grid_i[j] - 3)/2; + float diff = scale*q - xval[i]; + d2 += w*diff*diff; + } + if (d2 < best_score) { + best_score = d2; + grid_index = i; + } + } + } + if (grid_index < 0) { + printf("Oops, did not find grid point\n"); + printf("Have %d neighbours\n", num_neighbors); + for (int j = 1; j <= num_neighbors; ++j) { + const int8_t * pg = (const int8_t *)(grid + neighbours[j]); + float sumqx = 0, sumq2 = 0; + for (int i = 0; i < 8; ++i) { + float q = (pg[i] - 3)/2; + float w = weight[i]; + sumqx += w*q*xval[i]; + sumq2 += w*q*q; + } + printf(" neighbour %d: sumqx = %g sumq2 = %g\n", j, (double)sumqx, (double)sumq2); + } + } + GGML_ASSERT(grid_index >= 0); + const int8_t * pg = (const int8_t *)(grid + grid_index); + for (int i = 0; i < 8; ++i) L[i] = (pg[i] - 1)/2; + return grid_index; +} + static int iq1_sort_helper(const void * left, const void * right) { const float * l = left; const float * r = right; return *l < *r ? -1 : *l > *r ? 1 : 0; } +#define IQ1S_BLOCK_SIZE 32 static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy, int n, const float * restrict quant_weights) { const int gindex = iq2_data_index(GGML_TYPE_IQ1_S); @@ -11432,37 +11519,37 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy block_iq1_s * y = vy; - float scales[QK_K/8]; - float weight[8]; - int8_t L[8]; - float sumx[9]; - float sumw[9]; - float pairs[16]; + float scales[QK_K/IQ1S_BLOCK_SIZE]; + float weight[IQ1S_BLOCK_SIZE]; + int8_t L[IQ1S_BLOCK_SIZE]; + float sumx[IQ1S_BLOCK_SIZE+1]; + float sumw[IQ1S_BLOCK_SIZE+1]; + float pairs[2*IQ1S_BLOCK_SIZE]; int * idx = (int *)(pairs + 1); - uint8_t hbit[QK_K/8]; + uint16_t index[IQ1S_BLOCK_SIZE/8]; for (int ibl = 0; ibl < nbl; ++ibl) { y[ibl].d = GGML_FP32_TO_FP16(0.f); memset(y[ibl].qs, 0, QK_K/8); - memset(y[ibl].scales, 0, QK_K/16); + memset(y[ibl].qh, 0, QK_K/16); float max_scale = 0; const float * xbl = x + QK_K*ibl; float sumx2 = 0; for (int i = 0; i < QK_K; ++i) sumx2 += xbl[i]*xbl[i]; - float sigma2 = sumx2/QK_K; + float sigma2 = 2*sumx2/QK_K; - for (int ib = 0; ib < QK_K/8; ++ib) { - const float * xb = xbl + 8*ib; - const float * qw = quant_weights + QK_K*ibl + 8*ib; - for (int i = 0; i < 8; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]); + for (int ib = 0; ib < QK_K/IQ1S_BLOCK_SIZE; ++ib) { + const float * xb = xbl + IQ1S_BLOCK_SIZE*ib; + const float * qw = quant_weights + QK_K*ibl + IQ1S_BLOCK_SIZE*ib; + for (int i = 0; i < IQ1S_BLOCK_SIZE; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]); float max = fabsf(xb[0]); - for (int i = 1; i < 8; ++i) max = MAX(max, fabsf(xb[i])); + for (int i = 1; i < IQ1S_BLOCK_SIZE; ++i) max = MAX(max, fabsf(xb[i])); if (!max) { scales[ib] = 0; - memset(L, 1, 8); + memset(L, 1, IQ1S_BLOCK_SIZE); continue; } // Here we solve exactly the sum of squared difference (SSD) weighted minimization problem. @@ -11471,14 +11558,14 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy // in ascending order, compute Si = sum[weight[j] xb[j], j = 0...i] and // Wi = sum[weight[j], j = 0...i], and use these to quckly get get the optimum scale // for each possible and score for each split. - for (int j = 0; j < 8; ++j) { + for (int j = 0; j < IQ1S_BLOCK_SIZE; ++j) { pairs[2*j] = xb[j]; idx[2*j] = j; } - qsort(pairs, 8, 2*sizeof(float), iq1_sort_helper); + qsort(pairs, IQ1S_BLOCK_SIZE, 2*sizeof(float), iq1_sort_helper); { sumx[0] = sumw[0] = 0; - for (int j = 0; j < 8; ++j) { + for (int j = 0; j < IQ1S_BLOCK_SIZE; ++j) { int i = idx[2*j]; sumx[j+1] = sumx[j] + weight[i]*xb[i]; sumw[j+1] = sumw[j] + weight[i]; @@ -11486,10 +11573,10 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy } float best_score = 0, scale = max; int besti1 = 0, besti2 = 0; - for (int i1 = 0; i1 <= 8; ++i1) { - for (int i2 = i1; i2 <= 8; ++i2) { - float sumqx = -(sumx[i1] - sumx[0]) + (sumx[8] - sumx[i2]); - float sumq2 = (sumw[i1] - sumw[0]) + (sumw[8] - sumw[i2]); + for (int i1 = 0; i1 <= IQ1S_BLOCK_SIZE; ++i1) { + for (int i2 = i1; i2 <= IQ1S_BLOCK_SIZE; ++i2) { + float sumqx = -(sumx[i1] - sumx[0]) + (sumx[IQ1S_BLOCK_SIZE] - sumx[i2]); + float sumq2 = (sumw[i1] - sumw[0]) + (sumw[IQ1S_BLOCK_SIZE] - sumw[i2]); if (sumq2 > 0 && sumqx*sumqx > best_score*sumq2) { scale = sumqx/sumq2; best_score = scale*sumqx; besti1 = i1; besti2 = i2; @@ -11498,23 +11585,43 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy } for (int j = 0; j < besti1; ++j) L[idx[2*j]] = 0; for (int j = besti1; j < besti2; ++j) L[idx[2*j]] = 1; - for (int j = besti2; j < 8; ++j) L[idx[2*j]] = 2; + for (int j = besti2; j < IQ1S_BLOCK_SIZE; ++j) L[idx[2*j]] = 2; if (scale < 0) { - for (int j = 0; j < 8; ++j) L[j] = 2 - L[j]; + for (int j = 0; j < IQ1S_BLOCK_SIZE; ++j) L[j] = 2 - L[j]; scale = -scale; } - // Now we check if the solution found above corresponds to a grid point and, if not, use a neighbouring - // grid point that minimizes SSD. - uint16_t u = 0; - for (int j = 0; j < 8; ++j) u |= (L[j] << 2*j); - int grid_index = kmap_q2xs[u]; - if (grid_index < 0) { - const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1; - grid_index = iq1_find_best_neighbour(neighbours, kgrid_q2xs, xb, weight, &scale, L, NGRID_IQ2XXS); - GGML_ASSERT(grid_index >= 0); + bool all_on_grid = true; + for (int k = 0; k < IQ1S_BLOCK_SIZE/8; ++k) { + uint16_t u = 0; + for (int j = 0; j < 8; ++j) u |= (L[8*k+j] << 2*j); + int grid_index = kmap_q2xs[u]; + if (grid_index < 0) { + all_on_grid = false; + const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1; + grid_index = iq1_find_best_neighbour2(neighbours, kgrid_q2xs, xb + 8*k, weight + 8*k, scale, L + 8*k, NGRID_IQ1S); + GGML_ASSERT(grid_index >= 0); + } + index[k] = grid_index; } - y[ibl].qs[ib] = grid_index & 255; - hbit[ib] = grid_index >> 8; + if (!all_on_grid) { + float sumqx = 0, sumq2 = 0; + for (int k = 0; k < IQ1S_BLOCK_SIZE/8; ++k) { + const int8_t * pg = (const int8_t *)(kgrid_q2xs + index[k]); + for (int j = 0; j < 8; ++j) { + float w = weight[8*k + j]; + float q = (pg[j] - 3)/2; + sumqx += w*q*xb[8*k+j]; + sumq2 += w*q*q; + } + } + if (sumqx > 0 && sumq2 > 0) scale = sumqx/sumq2; + } + uint16_t h = 0; + for (int k = 0; k < IQ1S_BLOCK_SIZE/8; ++k) { + y[ibl].qs[(IQ1S_BLOCK_SIZE/8)*ib + k] = index[k] & 255; + h |= (index[k] >> 8) << 3*k; + } + y[ibl].qh[ib] = h; GGML_ASSERT(scale >= 0); scales[ib] = scale; max_scale = MAX(max_scale, scale); @@ -11525,14 +11632,13 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy continue; } - float d = max_scale/15; - y[ibl].d = GGML_FP32_TO_FP16(d*1.085f); // 1.085f is another fudge factor. Don't ask me why it is needed. + float d = max_scale/31; + y[ibl].d = GGML_FP32_TO_FP16(d*1.125f); // 1.085f is another fudge factor. Don't ask me why it is needed. float id = 1/d; - for (int ib = 0; ib < QK_K/8; ++ib) { + for (int ib = 0; ib < QK_K/IQ1S_BLOCK_SIZE; ++ib) { int l = nearest_int(0.5f*(id*scales[ib]-1)); - l = MAX(0, MIN(7, l)); - if (hbit[ib]) l |= 8; - y[ibl].scales[ib/2] |= (l << 4*(ib%2)); + l = MAX(0, MIN(15, l)); + y[ibl].qh[ib] |= (l << 12); } } } diff --git a/ggml-quants.h b/ggml-quants.h index 47dd52856..74aabf415 100644 --- a/ggml-quants.h +++ b/ggml-quants.h @@ -217,8 +217,8 @@ static_assert(sizeof(block_iq3_s) == sizeof(ggml_fp16_t) + 13*(QK_K/32) + IQ3S_N typedef struct { ggml_fp16_t d; - uint8_t qs[QK_K/8]; - uint8_t scales[QK_K/16]; + uint8_t qs[QK_K/8]; + uint16_t qh[QK_K/32]; } block_iq1_s; static_assert(sizeof(block_iq1_s) == sizeof(ggml_fp16_t) + QK_K/8 + QK_K/16, "wrong iq1_s block size/padding"); From ee35600b9061b1ea0c4ea87fce6844297632b2a8 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 11 Mar 2024 09:56:47 +0200 Subject: [PATCH 13/48] llama : fix F16/F32 downcast + improve names (#5980) --- llama.cpp | 67 +++++++++++++++++++++++++++++-------------------------- llama.h | 2 +- 2 files changed, 36 insertions(+), 33 deletions(-) diff --git a/llama.cpp b/llama.cpp index 249442166..110e509cc 100644 --- a/llama.cpp +++ b/llama.cpp @@ -11636,7 +11636,7 @@ static void llama_tensor_dequantize_internal( workers.clear(); } -static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_type, const ggml_tensor * tensor, llama_ftype ftype) { +static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type new_type, const ggml_tensor * tensor, llama_ftype ftype) { const std::string name = ggml_get_name(tensor); // TODO: avoid hardcoded tensor names - use the TN_* constants @@ -11951,40 +11951,40 @@ static int32_t llama_tensor_quantize_internal(enum ggml_type new_type, const flo } static void llama_model_quantize_internal(const std::string & fname_inp, const std::string & fname_out, const llama_model_quantize_params * params) { - ggml_type quantized_type; + ggml_type default_type; llama_ftype ftype = params->ftype; switch (params->ftype) { - case LLAMA_FTYPE_MOSTLY_Q4_0: quantized_type = GGML_TYPE_Q4_0; break; - case LLAMA_FTYPE_MOSTLY_Q4_1: quantized_type = GGML_TYPE_Q4_1; break; - case LLAMA_FTYPE_MOSTLY_Q5_0: quantized_type = GGML_TYPE_Q5_0; break; - case LLAMA_FTYPE_MOSTLY_Q5_1: quantized_type = GGML_TYPE_Q5_1; break; - case LLAMA_FTYPE_MOSTLY_Q8_0: quantized_type = GGML_TYPE_Q8_0; break; - case LLAMA_FTYPE_MOSTLY_F16: quantized_type = GGML_TYPE_F16; break; - case LLAMA_FTYPE_ALL_F32: quantized_type = GGML_TYPE_F32; break; + case LLAMA_FTYPE_MOSTLY_Q4_0: default_type = GGML_TYPE_Q4_0; break; + case LLAMA_FTYPE_MOSTLY_Q4_1: default_type = GGML_TYPE_Q4_1; break; + case LLAMA_FTYPE_MOSTLY_Q5_0: default_type = GGML_TYPE_Q5_0; break; + case LLAMA_FTYPE_MOSTLY_Q5_1: default_type = GGML_TYPE_Q5_1; break; + case LLAMA_FTYPE_MOSTLY_Q8_0: default_type = GGML_TYPE_Q8_0; break; + case LLAMA_FTYPE_MOSTLY_F16: default_type = GGML_TYPE_F16; break; + case LLAMA_FTYPE_ALL_F32: default_type = GGML_TYPE_F32; break; // K-quants case LLAMA_FTYPE_MOSTLY_Q2_K_S: - case LLAMA_FTYPE_MOSTLY_Q2_K: quantized_type = GGML_TYPE_Q2_K; break; - case LLAMA_FTYPE_MOSTLY_IQ3_XS: quantized_type = GGML_TYPE_IQ3_S; break; + case LLAMA_FTYPE_MOSTLY_Q2_K: default_type = GGML_TYPE_Q2_K; break; + case LLAMA_FTYPE_MOSTLY_IQ3_XS: default_type = GGML_TYPE_IQ3_S; break; case LLAMA_FTYPE_MOSTLY_Q3_K_S: case LLAMA_FTYPE_MOSTLY_Q3_K_M: - case LLAMA_FTYPE_MOSTLY_Q3_K_L: quantized_type = GGML_TYPE_Q3_K; break; + case LLAMA_FTYPE_MOSTLY_Q3_K_L: default_type = GGML_TYPE_Q3_K; break; case LLAMA_FTYPE_MOSTLY_Q4_K_S: - case LLAMA_FTYPE_MOSTLY_Q4_K_M: quantized_type = GGML_TYPE_Q4_K; break; + case LLAMA_FTYPE_MOSTLY_Q4_K_M: default_type = GGML_TYPE_Q4_K; break; case LLAMA_FTYPE_MOSTLY_Q5_K_S: - case LLAMA_FTYPE_MOSTLY_Q5_K_M: quantized_type = GGML_TYPE_Q5_K; break; - case LLAMA_FTYPE_MOSTLY_Q6_K: quantized_type = GGML_TYPE_Q6_K; break; - case LLAMA_FTYPE_MOSTLY_IQ2_XXS: quantized_type = GGML_TYPE_IQ2_XXS; break; - case LLAMA_FTYPE_MOSTLY_IQ2_XS: quantized_type = GGML_TYPE_IQ2_XS; break; - case LLAMA_FTYPE_MOSTLY_IQ2_S: quantized_type = GGML_TYPE_IQ2_XS; break; - case LLAMA_FTYPE_MOSTLY_IQ2_M: quantized_type = GGML_TYPE_IQ2_S; break; - case LLAMA_FTYPE_MOSTLY_IQ3_XXS: quantized_type = GGML_TYPE_IQ3_XXS; break; - case LLAMA_FTYPE_MOSTLY_IQ1_S: quantized_type = GGML_TYPE_IQ1_S; break; - case LLAMA_FTYPE_MOSTLY_IQ4_NL: quantized_type = GGML_TYPE_IQ4_NL; break; - case LLAMA_FTYPE_MOSTLY_IQ4_XS: quantized_type = GGML_TYPE_IQ4_XS; break; - case LLAMA_FTYPE_MOSTLY_IQ3_S: quantized_type = GGML_TYPE_IQ3_S; break; - case LLAMA_FTYPE_MOSTLY_IQ3_M: quantized_type = GGML_TYPE_IQ3_S; break; + case LLAMA_FTYPE_MOSTLY_Q5_K_M: default_type = GGML_TYPE_Q5_K; break; + case LLAMA_FTYPE_MOSTLY_Q6_K: default_type = GGML_TYPE_Q6_K; break; + case LLAMA_FTYPE_MOSTLY_IQ2_XXS: default_type = GGML_TYPE_IQ2_XXS; break; + case LLAMA_FTYPE_MOSTLY_IQ2_XS: default_type = GGML_TYPE_IQ2_XS; break; + case LLAMA_FTYPE_MOSTLY_IQ2_S: default_type = GGML_TYPE_IQ2_XS; break; + case LLAMA_FTYPE_MOSTLY_IQ2_M: default_type = GGML_TYPE_IQ2_S; break; + case LLAMA_FTYPE_MOSTLY_IQ3_XXS: default_type = GGML_TYPE_IQ3_XXS; break; + case LLAMA_FTYPE_MOSTLY_IQ1_S: default_type = GGML_TYPE_IQ1_S; break; + case LLAMA_FTYPE_MOSTLY_IQ4_NL: default_type = GGML_TYPE_IQ4_NL; break; + case LLAMA_FTYPE_MOSTLY_IQ4_XS: default_type = GGML_TYPE_IQ4_XS; break; + case LLAMA_FTYPE_MOSTLY_IQ3_S: default_type = GGML_TYPE_IQ3_S; break; + case LLAMA_FTYPE_MOSTLY_IQ3_M: default_type = GGML_TYPE_IQ3_S; break; default: throw std::runtime_error(format("invalid output file type %d\n", ftype)); } @@ -12125,23 +12125,26 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s // do not quantize Mamba's small yet 2D weights // NOTE: can't use LLM_TN here because the layer number is not known quantize &= name.find("ssm_conv1d.weight") == std::string::npos; - quantize &= name.find("ssm_x.weight") == std::string::npos; - quantize &= name.find("ssm_dt.weight") == std::string::npos; + quantize &= name.find("ssm_x.weight") == std::string::npos; + quantize &= name.find("ssm_dt.weight") == std::string::npos; enum ggml_type new_type; void * new_data; size_t new_size; if (quantize) { - new_type = quantized_type; - if (!params->pure) { - new_type = get_k_quant_type(qs, new_type, tensor, ftype); + new_type = default_type; + + // get more optimal quantization type based on the tensor shape, layer, etc. + if (!params->pure && ggml_is_quantized(default_type)) { + new_type = llama_tensor_get_type(qs, new_type, tensor, ftype); } // If we've decided to quantize to the same type the tensor is already // in then there's nothing to do. quantize = tensor->type != new_type; } + if (!quantize) { new_type = tensor->type; new_data = tensor->data; @@ -12187,7 +12190,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s f32_data = (float *) f32_conv_buf.data(); } - LLAMA_LOG_INFO("quantizing to %s .. ", ggml_type_name(new_type)); + LLAMA_LOG_INFO("converting to %s .. ", ggml_type_name(new_type)); fflush(stdout); if (work.size() < nelements * 4) { @@ -12235,7 +12238,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s LLAMA_LOG_INFO("%s: quant size = %8.2f MB\n", __func__, total_size_new/1024.0/1024.0); if (qs.n_fallback > 0) { - LLAMA_LOG_WARN("%s: WARNING: %d of %d tensor(s) incompatible with k-quants and required fallback quantization\n", + LLAMA_LOG_WARN("%s: WARNING: %d of %d tensor(s) required fallback quantization\n", __func__, qs.n_fallback, qs.n_k_quantized + qs.n_fallback); } } diff --git a/llama.h b/llama.h index c8e05aadd..ccf65ca4e 100644 --- a/llama.h +++ b/llama.h @@ -278,7 +278,7 @@ extern "C" { bool allow_requantize; // allow quantizing non-f32/f16 tensors bool quantize_output_tensor; // quantize output.weight bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored - bool pure; // disable k-quant mixtures and quantize all tensors to the same type + bool pure; // quantize all tensors to the default type void * imatrix; // pointer to importance matrix data } llama_model_quantize_params; From ecab1c75de68de7c41c254e2ae170d3b07bee6d4 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Mon, 11 Mar 2024 10:00:08 +0200 Subject: [PATCH 14/48] cmake : fix subdir for `LLAMA_METAL_EMBED_LIBRARY` (#5985) --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 00a26391a..60b592bf5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -208,7 +208,7 @@ if (LLAMA_METAL) enable_language(ASM) add_compile_definitions(GGML_METAL_EMBED_LIBRARY) - set(METALLIB_SOURCE "${CMAKE_SOURCE_DIR}/ggml-metal.metal") + set(METALLIB_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal") file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/autogenerated") set(EMBED_METALLIB_ASSEMBLY "${CMAKE_BINARY_DIR}/autogenerated/ggml-embed-metallib.s") From 332bdfd7980718abf664bfa5460f2288a3314984 Mon Sep 17 00:00:00 2001 From: Minsoo Cheong <54794500+mscheong01@users.noreply.github.com> Date: Mon, 11 Mar 2024 17:09:32 +0900 Subject: [PATCH 15/48] server : maintain chat completion id for streaming responses (#5988) * server: maintain chat completion id for streaming responses * Update examples/server/utils.hpp * Update examples/server/utils.hpp --------- Co-authored-by: Georgi Gerganov --- examples/server/server.cpp | 7 ++++--- examples/server/utils.hpp | 12 ++++++------ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index c7d3ed01b..3951507aa 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -3195,11 +3195,12 @@ int main(int argc, char ** argv) { ctx_server.queue_results.add_waiting_task_id(id_task); ctx_server.request_completion(id_task, -1, data, false, false); + const auto completion_id = gen_chatcmplid(); if (!json_value(data, "stream", false)) { server_task_result result = ctx_server.queue_results.recv(id_task); if (!result.error && result.stop) { - json result_oai = format_final_response_oaicompat(data, result.data); + json result_oai = format_final_response_oaicompat(data, result.data, completion_id); res.set_content(result_oai.dump(-1, ' ', false, json::error_handler_t::replace), "application/json; charset=utf-8"); } else { @@ -3208,11 +3209,11 @@ int main(int argc, char ** argv) { } ctx_server.queue_results.remove_waiting_task_id(id_task); } else { - const auto chunked_content_provider = [id_task, &ctx_server](size_t, httplib::DataSink & sink) { + const auto chunked_content_provider = [id_task, &ctx_server, completion_id](size_t, httplib::DataSink & sink) { while (true) { server_task_result result = ctx_server.queue_results.recv(id_task); if (!result.error) { - std::vector result_array = format_partial_response_oaicompat(result.data); + std::vector result_array = format_partial_response_oaicompat(result.data, completion_id); for (auto it = result_array.begin(); it != result_array.end(); ++it) { if (!it->empty()) { diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index df0a27782..f27af81e9 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -378,7 +378,7 @@ static json oaicompat_completion_params_parse( return llama_params; } -static json format_final_response_oaicompat(const json & request, json result, bool streaming = false) { +static json format_final_response_oaicompat(const json & request, json result, const std::string & completion_id, bool streaming = false) { bool stopped_word = result.count("stopped_word") != 0; bool stopped_eos = json_value(result, "stopped_eos", false); int num_tokens_predicted = json_value(result, "tokens_predicted", 0); @@ -412,7 +412,7 @@ static json format_final_response_oaicompat(const json & request, json result, b {"prompt_tokens", num_prompt_tokens}, {"total_tokens", num_tokens_predicted + num_prompt_tokens} }}, - {"id", gen_chatcmplid()} + {"id", completion_id} }; if (server_verbose) { @@ -427,7 +427,7 @@ static json format_final_response_oaicompat(const json & request, json result, b } // return value is vector as there is one case where we might need to generate two responses -static std::vector format_partial_response_oaicompat(json result) { +static std::vector format_partial_response_oaicompat(json result, const std::string & completion_id) { if (!result.contains("model") || !result.contains("oaicompat_token_ctr")) { return std::vector({result}); } @@ -471,7 +471,7 @@ static std::vector format_partial_response_oaicompat(json result) { {"role", "assistant"} }}}})}, {"created", t}, - {"id", gen_chatcmplid()}, + {"id", completion_id}, {"model", modelname}, {"object", "chat.completion.chunk"}}; @@ -482,7 +482,7 @@ static std::vector format_partial_response_oaicompat(json result) { {"content", content}}} }})}, {"created", t}, - {"id", gen_chatcmplid()}, + {"id", completion_id}, {"model", modelname}, {"object", "chat.completion.chunk"}}; @@ -509,7 +509,7 @@ static std::vector format_partial_response_oaicompat(json result) { json ret = json { {"choices", choices}, {"created", t}, - {"id", gen_chatcmplid()}, + {"id", completion_id}, {"model", modelname}, {"object", "chat.completion.chunk"} }; From 3202361c5b1ba15e695b31209567ef42c22c5c32 Mon Sep 17 00:00:00 2001 From: Michael Podvitskiy Date: Mon, 11 Mar 2024 10:28:51 +0100 Subject: [PATCH 16/48] ggml, ci : Windows ARM runner and build fixes (#5979) * windows arm ci * fix `error C2078: too many initializers` with ggml_vld1q_u32 macro for MSVC ARM64 * fix `warning C4146: unary minus operator applied to unsigned type, result still unsigned` * fix `error C2065: '__fp16': undeclared identifier` --- .github/workflows/build.yml | 4 +++- ggml-impl.h | 8 ++++++-- ggml-quants.c | 16 ++++++++-------- ggml.c | 4 ++-- llama.cpp | 4 ++-- 5 files changed, 21 insertions(+), 15 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9144f9266..d39cd6bc3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -425,6 +425,8 @@ jobs: defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON' - build: 'vulkan' defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_VULKAN=ON -DBUILD_SHARED_LIBS=ON' + - build: 'arm64' + defines: '-A ARM64 -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON' steps: - name: Clone @@ -520,7 +522,7 @@ jobs: - name: Test id: cmake_test # not all machines have native AVX-512 - if: ${{ matrix.build != 'clblast' && matrix.build != 'kompute' && matrix.build != 'vulkan' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} + if: ${{ matrix.build != 'arm64' && matrix.build != 'clblast' && matrix.build != 'kompute' && matrix.build != 'vulkan' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} run: | cd build ctest -L main -C Release --verbose --timeout 900 diff --git a/ggml-impl.h b/ggml-impl.h index c5637e4d4..e68b72877 100644 --- a/ggml-impl.h +++ b/ggml-impl.h @@ -53,26 +53,30 @@ extern "C" { // #include +typedef __fp16 ggml_fp16_internal_t; + #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x) #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x) #define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x) static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) { - __fp16 tmp; + ggml_fp16_internal_t tmp; memcpy(&tmp, &h, sizeof(ggml_fp16_t)); return (float)tmp; } static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) { ggml_fp16_t res; - __fp16 tmp = f; + ggml_fp16_internal_t tmp = f; memcpy(&res, &tmp, sizeof(ggml_fp16_t)); return res; } #else +typedef uint16_t ggml_fp16_internal_t; + #ifdef __wasm_simd128__ #include #else diff --git a/ggml-quants.c b/ggml-quants.c index f9a3d9fd2..86b0764cb 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -9374,15 +9374,15 @@ void ggml_vec_dot_iq3_s_q8_K (int n, float * restrict s, size_t bs, const void * const uint8x16_t idx_l = vld1q_u8(qs); qs += 16; idx.vec_index = vorrq_u16(vmovl_u8(vget_low_u8 (idx_l)), vandq_u16(vshlq_u16(vdupq_n_u16(qh[ib32+0]), hshift), m256)); - const uint32x4_t aux32x4_0 = {iq3s_grid[idx.index[0]], iq3s_grid[idx.index[1]], - iq3s_grid[idx.index[2]], iq3s_grid[idx.index[3]]}; - const uint32x4_t aux32x4_1 = {iq3s_grid[idx.index[4]], iq3s_grid[idx.index[5]], - iq3s_grid[idx.index[6]], iq3s_grid[idx.index[7]]}; + const uint32x4_t aux32x4_0 = ggml_vld1q_u32(iq3s_grid[idx.index[0]], iq3s_grid[idx.index[1]], + iq3s_grid[idx.index[2]], iq3s_grid[idx.index[3]]); + const uint32x4_t aux32x4_1 = ggml_vld1q_u32(iq3s_grid[idx.index[4]], iq3s_grid[idx.index[5]], + iq3s_grid[idx.index[6]], iq3s_grid[idx.index[7]]); idx.vec_index = vorrq_u16(vmovl_u8(vget_high_u8(idx_l)), vandq_u16(vshlq_u16(vdupq_n_u16(qh[ib32+1]), hshift), m256)); - const uint32x4_t aux32x4_2 = {iq3s_grid[idx.index[0]], iq3s_grid[idx.index[1]], - iq3s_grid[idx.index[2]], iq3s_grid[idx.index[3]]}; - const uint32x4_t aux32x4_3 = {iq3s_grid[idx.index[4]], iq3s_grid[idx.index[5]], - iq3s_grid[idx.index[6]], iq3s_grid[idx.index[7]]}; + const uint32x4_t aux32x4_2 = ggml_vld1q_u32(iq3s_grid[idx.index[0]], iq3s_grid[idx.index[1]], + iq3s_grid[idx.index[2]], iq3s_grid[idx.index[3]]); + const uint32x4_t aux32x4_3 = ggml_vld1q_u32(iq3s_grid[idx.index[4]], iq3s_grid[idx.index[5]], + iq3s_grid[idx.index[6]], iq3s_grid[idx.index[7]]); vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[0] | (signs[1] << 16))); diff --git a/ggml.c b/ggml.c index 80efa6f2a..9a7bd1d8c 100644 --- a/ggml.c +++ b/ggml.c @@ -857,7 +857,7 @@ inline static float vaddvq_f32(float32x4_t v) { #define GGML_F16x8 float16x8_t #define GGML_F16x8_ZERO vdupq_n_f16(0.0f) #define GGML_F16x8_SET1(x) vdupq_n_f16(x) - #define GGML_F16x8_LOAD(x) vld1q_f16((const __fp16 *)(x)) + #define GGML_F16x8_LOAD(x) vld1q_f16((const ggml_fp16_internal_t *)(x)) #define GGML_F16x8_STORE vst1q_f16 #define GGML_F16x8_FMA(a, b, c) vfmaq_f16(a, b, c) #define GGML_F16x8_ADD vaddq_f16 @@ -900,7 +900,7 @@ inline static float vaddvq_f32(float32x4_t v) { #define GGML_F32Cx4 float32x4_t #define GGML_F32Cx4_ZERO vdupq_n_f32(0.0f) #define GGML_F32Cx4_SET1(x) vdupq_n_f32(x) - #define GGML_F32Cx4_LOAD(x) vcvt_f32_f16(vld1_f16((const __fp16 *)(x))) + #define GGML_F32Cx4_LOAD(x) vcvt_f32_f16(vld1_f16((const ggml_fp16_internal_t *)(x))) #define GGML_F32Cx4_STORE(x, y) vst1_f16(x, vcvt_f16_f32(y)) #define GGML_F32Cx4_FMA(a, b, c) vfmaq_f32(a, b, c) #define GGML_F32Cx4_ADD vaddq_f32 diff --git a/llama.cpp b/llama.cpp index 110e509cc..f61ea791b 100644 --- a/llama.cpp +++ b/llama.cpp @@ -13980,7 +13980,7 @@ int32_t llama_token_to_piece(const struct llama_model * model, llama_token token } else if (llama_is_user_defined_token(model->vocab, token)) { std::string result = model->vocab.id_to_token[token].text; if (length < (int) result.length()) { - return -result.length(); + return -(int) result.length(); } memcpy(buf, result.c_str(), result.length()); return result.length(); @@ -14015,7 +14015,7 @@ int32_t llama_token_to_piece(const struct llama_model * model, llama_token token } else if (llama_is_user_defined_token(model->vocab, token)) { std::string result = model->vocab.id_to_token[token].text; if (length < (int) result.length()) { - return -result.length(); + return -(int) result.length(); } memcpy(buf, result.c_str(), result.length()); return result.length(); From caa106d4e05a0ab94225c220b81f9e2cd522339b Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Mon, 11 Mar 2024 10:56:41 +0100 Subject: [PATCH 17/48] Server: format error to json (#5961) * server: format error to json * server: do not crash on grammar error * fix api key test case * revert limit max n_predict * small fix * correct coding style * update completion.js * launch_slot_with_task * update docs * update_slots * update webui * update readme --- examples/server/README.md | 46 +- examples/server/completion.js.hpp | 469 +++++++++--------- examples/server/public/completion.js | 20 +- examples/server/server.cpp | 159 +++--- examples/server/tests/features/steps/steps.py | 2 +- examples/server/utils.hpp | 51 ++ 6 files changed, 430 insertions(+), 317 deletions(-) diff --git a/examples/server/README.md b/examples/server/README.md index 23606b32a..376739051 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -556,9 +556,51 @@ Run with bash: bash chat.sh ``` -### API like OAI +### OAI-like API -The HTTP server supports OAI-like API +The HTTP server supports OAI-like API: https://github.com/openai/openai-openapi + +### API errors + +Server returns error in the same format as OAI: https://github.com/openai/openai-openapi + +Example of an error: + +```json +{ + "error": { + "code": 401, + "message": "Invalid API Key", + "type": "authentication_error" + } +} +``` + +Apart from error types supported by OAI, we also have custom types that are specific to functionalities of llama.cpp: + +**When /metrics or /slots endpoint is disabled** + +```json +{ + "error": { + "code": 501, + "message": "This server does not support metrics endpoint.", + "type": "not_supported_error" + } +} +``` + +**When the server receives invalid grammar via */completions endpoint** + +```json +{ + "error": { + "code": 400, + "message": "Failed to parse grammar", + "type": "invalid_request_error" + } +} +``` ### Extending or building alternative Web Front End diff --git a/examples/server/completion.js.hpp b/examples/server/completion.js.hpp index f5e696e17..10734091e 100644 --- a/examples/server/completion.js.hpp +++ b/examples/server/completion.js.hpp @@ -231,255 +231,256 @@ unsigned char completion_js[] = { 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, - 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, - 0x20, 0x4a, 0x53, 0x4f, 0x4e, 0x2e, 0x70, 0x61, 0x72, 0x73, 0x65, 0x28, - 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, - 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, - 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x63, 0x6f, 0x6e, 0x74, - 0x65, 0x6e, 0x74, 0x2e, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x73, - 0x28, 0x27, 0x73, 0x6c, 0x6f, 0x74, 0x20, 0x75, 0x6e, 0x61, 0x76, 0x61, - 0x69, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x27, 0x29, 0x29, 0x20, 0x7b, 0x0a, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x74, 0x72, + 0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, + 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x4a, 0x53, 0x4f, + 0x4e, 0x2e, 0x70, 0x61, 0x72, 0x73, 0x65, 0x28, 0x72, 0x65, 0x73, 0x75, + 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x3b, 0x0a, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, + 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, + 0x65, 0x2e, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x73, 0x28, 0x27, + 0x73, 0x6c, 0x6f, 0x74, 0x20, 0x75, 0x6e, 0x61, 0x76, 0x61, 0x69, 0x6c, + 0x61, 0x62, 0x6c, 0x65, 0x27, 0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x54, 0x68, 0x72, 0x6f, 0x77, 0x20, 0x61, 0x6e, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20, 0x62, 0x65, 0x20, 0x63, 0x61, 0x75, 0x67, 0x68, 0x74, 0x20, 0x62, 0x79, 0x20, 0x75, 0x70, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x20, 0x63, 0x61, 0x6c, 0x6c, 0x65, 0x72, 0x73, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x74, 0x68, 0x72, 0x6f, 0x77, - 0x20, 0x6e, 0x65, 0x77, 0x20, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x27, - 0x73, 0x6c, 0x6f, 0x74, 0x20, 0x75, 0x6e, 0x61, 0x76, 0x61, 0x69, 0x6c, - 0x61, 0x62, 0x6c, 0x65, 0x27, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x20, 0x65, 0x6c, - 0x73, 0x65, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, - 0x6c, 0x65, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x60, 0x6c, 0x6c, - 0x61, 0x6d, 0x61, 0x2e, 0x63, 0x70, 0x70, 0x20, 0x65, 0x72, 0x72, 0x6f, - 0x72, 0x3a, 0x20, 0x24, 0x7b, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, - 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, - 0x74, 0x7d, 0x60, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x74, 0x68, 0x72, + 0x6f, 0x77, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x45, 0x72, 0x72, 0x6f, 0x72, + 0x28, 0x27, 0x73, 0x6c, 0x6f, 0x74, 0x20, 0x75, 0x6e, 0x61, 0x76, 0x61, + 0x69, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x27, 0x29, 0x3b, 0x0a, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x7d, 0x20, 0x65, 0x6c, 0x73, 0x65, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65, 0x2e, 0x65, 0x72, 0x72, + 0x6f, 0x72, 0x28, 0x60, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2e, 0x63, 0x70, + 0x70, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x5b, 0x24, 0x7b, 0x72, + 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, + 0x63, 0x6f, 0x64, 0x65, 0x7d, 0x20, 0x2d, 0x20, 0x24, 0x7b, 0x72, 0x65, + 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x74, + 0x79, 0x70, 0x65, 0x7d, 0x5d, 0x3a, 0x20, 0x24, 0x7b, 0x72, 0x65, 0x73, + 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x6d, 0x65, + 0x73, 0x73, 0x61, 0x67, 0x65, 0x7d, 0x60, 0x29, 0x3b, 0x0a, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x7d, 0x20, 0x63, 0x61, 0x74, 0x63, 0x68, 0x28, 0x65, 0x29, + 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65, + 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x60, 0x6c, 0x6c, 0x61, 0x6d, + 0x61, 0x2e, 0x63, 0x70, 0x70, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, + 0x24, 0x7b, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, + 0x6f, 0x72, 0x7d, 0x60, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65, - 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x20, - 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, - 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x4a, 0x53, 0x4f, 0x4e, 0x2e, 0x70, 0x61, - 0x72, 0x73, 0x65, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, - 0x72, 0x72, 0x6f, 0x72, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, - 0x6c, 0x65, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x60, 0x6c, 0x6c, - 0x61, 0x6d, 0x61, 0x2e, 0x63, 0x70, 0x70, 0x20, 0x65, 0x72, 0x72, 0x6f, - 0x72, 0x3a, 0x20, 0x24, 0x7b, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, - 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, - 0x74, 0x7d, 0x60, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, - 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x7d, 0x20, 0x63, 0x61, - 0x74, 0x63, 0x68, 0x20, 0x28, 0x65, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, - 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x65, 0x2e, 0x6e, 0x61, 0x6d, 0x65, - 0x20, 0x21, 0x3d, 0x3d, 0x20, 0x27, 0x41, 0x62, 0x6f, 0x72, 0x74, 0x45, - 0x72, 0x72, 0x6f, 0x72, 0x27, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65, 0x2e, 0x65, - 0x72, 0x72, 0x6f, 0x72, 0x28, 0x22, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x20, - 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3a, 0x20, 0x22, 0x2c, 0x20, 0x65, 0x29, - 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, - 0x74, 0x68, 0x72, 0x6f, 0x77, 0x20, 0x65, 0x3b, 0x0a, 0x20, 0x20, 0x7d, - 0x0a, 0x20, 0x20, 0x66, 0x69, 0x6e, 0x61, 0x6c, 0x6c, 0x79, 0x20, 0x7b, - 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, - 0x6c, 0x65, 0x72, 0x2e, 0x61, 0x62, 0x6f, 0x72, 0x74, 0x28, 0x29, 0x3b, - 0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, - 0x72, 0x6e, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b, 0x0a, - 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c, 0x6c, 0x20, 0x6c, - 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, - 0x20, 0x61, 0x6e, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x20, 0x74, 0x61, - 0x72, 0x67, 0x65, 0x74, 0x20, 0x74, 0x68, 0x61, 0x74, 0x20, 0x79, 0x6f, - 0x75, 0x20, 0x63, 0x61, 0x6e, 0x20, 0x73, 0x75, 0x62, 0x73, 0x63, 0x72, - 0x69, 0x62, 0x65, 0x20, 0x74, 0x6f, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, - 0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x3a, 0x0a, 0x2f, 0x2f, - 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6f, 0x72, - 0x74, 0x20, 0x7b, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x45, 0x76, 0x65, - 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x7d, 0x20, 0x66, - 0x72, 0x6f, 0x6d, 0x20, 0x27, 0x2f, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, - 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6a, 0x73, 0x27, 0x0a, 0x2f, 0x2f, 0x0a, - 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, - 0x63, 0x6f, 0x6e, 0x6e, 0x20, 0x3d, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, - 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x28, - 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20, - 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x6e, 0x2e, 0x61, 0x64, 0x64, 0x45, 0x76, - 0x65, 0x6e, 0x74, 0x4c, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x65, 0x72, 0x28, - 0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x2c, 0x20, 0x28, - 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, - 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x63, 0x75, - 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69, 0x74, 0x65, 0x28, 0x63, - 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x2e, - 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, - 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x2f, 0x2f, 0x0a, 0x65, 0x78, 0x70, - 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, - 0x61, 0x6d, 0x61, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, - 0x65, 0x74, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, - 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20, 0x3d, 0x20, 0x7b, - 0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x20, 0x3d, 0x20, - 0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x63, - 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, - 0x72, 0x67, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x45, - 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x28, 0x29, - 0x3b, 0x0a, 0x20, 0x20, 0x28, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, - 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c, - 0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d, - 0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6f, 0x72, - 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73, - 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, 0x6c, - 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, - 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, - 0x66, 0x69, 0x67, 0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, - 0x64, 0x61, 0x74, 0x61, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, - 0x2b, 0x3d, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, - 0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b, 0x0a, 0x20, + 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x7d, 0x20, + 0x63, 0x61, 0x74, 0x63, 0x68, 0x20, 0x28, 0x65, 0x29, 0x20, 0x7b, 0x0a, + 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x65, 0x2e, 0x6e, 0x61, + 0x6d, 0x65, 0x20, 0x21, 0x3d, 0x3d, 0x20, 0x27, 0x41, 0x62, 0x6f, 0x72, + 0x74, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x27, 0x29, 0x20, 0x7b, 0x0a, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65, + 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x22, 0x6c, 0x6c, 0x61, 0x6d, + 0x61, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3a, 0x20, 0x22, 0x2c, 0x20, + 0x65, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, + 0x20, 0x20, 0x74, 0x68, 0x72, 0x6f, 0x77, 0x20, 0x65, 0x3b, 0x0a, 0x20, + 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x66, 0x69, 0x6e, 0x61, 0x6c, 0x6c, 0x79, + 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72, + 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2e, 0x61, 0x62, 0x6f, 0x72, 0x74, 0x28, + 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x72, 0x65, + 0x74, 0x75, 0x72, 0x6e, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, + 0x3b, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c, 0x6c, + 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65, 0x74, 0x75, + 0x72, 0x6e, 0x20, 0x61, 0x6e, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x20, + 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x74, 0x68, 0x61, 0x74, 0x20, + 0x79, 0x6f, 0x75, 0x20, 0x63, 0x61, 0x6e, 0x20, 0x73, 0x75, 0x62, 0x73, + 0x63, 0x72, 0x69, 0x62, 0x65, 0x20, 0x74, 0x6f, 0x0a, 0x2f, 0x2f, 0x0a, + 0x2f, 0x2f, 0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x3a, 0x0a, + 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, + 0x6f, 0x72, 0x74, 0x20, 0x7b, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x45, + 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x7d, + 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x27, 0x2f, 0x63, 0x6f, 0x6d, 0x70, + 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6a, 0x73, 0x27, 0x0a, 0x2f, + 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, + 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x6e, 0x20, 0x3d, 0x20, 0x6c, 0x6c, 0x61, + 0x6d, 0x61, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, + 0x74, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f, + 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x6e, 0x2e, 0x61, 0x64, 0x64, + 0x45, 0x76, 0x65, 0x6e, 0x74, 0x4c, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x65, + 0x72, 0x28, 0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x2c, + 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20, + 0x7b, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, + 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69, 0x74, 0x65, + 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x65, 0x74, 0x61, 0x69, + 0x6c, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a, 0x2f, + 0x2f, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x2f, 0x2f, 0x0a, 0x65, + 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, + 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, + 0x72, 0x67, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72, 0x6f, 0x6d, + 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20, 0x3d, + 0x20, 0x7b, 0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x20, + 0x3d, 0x20, 0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, + 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, + 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x6e, 0x65, 0x77, + 0x20, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, + 0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x28, 0x61, 0x73, 0x79, 0x6e, 0x63, + 0x20, 0x28, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, + 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, + 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, + 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28, 0x63, 0x6f, + 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66, + 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, + 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63, + 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e, + 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, + 0x74, 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, + 0x61, 0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b, + 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, + 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, + 0x70, 0x61, 0x74, 0x63, 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, + 0x65, 0x77, 0x20, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, + 0x6e, 0x74, 0x28, 0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, + 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, + 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x20, 0x7d, + 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68, + 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, + 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, + 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, + 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68, + 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75, + 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x67, + 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, + 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, + 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, + 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, + 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, + 0x73, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, + 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, + 0x74, 0x69, 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, - 0x28, 0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x2c, 0x20, + 0x28, 0x22, 0x74, 0x69, 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, - 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x20, 0x7d, 0x29, 0x29, - 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e, - 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72, - 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, - 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, - 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68, 0x45, 0x76, - 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75, 0x73, 0x74, - 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x67, 0x65, 0x6e, - 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, - 0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, - 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, - 0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, - 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20, - 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, - 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, - 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74, 0x69, - 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, - 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, - 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, - 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, - 0x74, 0x69, 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20, 0x7b, 0x20, - 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, 0x75, 0x6e, - 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74, 0x69, 0x6d, 0x69, 0x6e, - 0x67, 0x73, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, - 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, - 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68, 0x45, 0x76, - 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75, 0x73, 0x74, - 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x64, 0x6f, 0x6e, - 0x65, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, - 0x3a, 0x20, 0x7b, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, - 0x7d, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x29, 0x28, - 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, - 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x3b, - 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c, 0x6c, 0x20, - 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, - 0x6e, 0x20, 0x61, 0x20, 0x70, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x20, - 0x74, 0x68, 0x61, 0x74, 0x20, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76, 0x65, - 0x73, 0x20, 0x74, 0x6f, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x6f, 0x6d, - 0x70, 0x6c, 0x65, 0x74, 0x65, 0x64, 0x20, 0x74, 0x65, 0x78, 0x74, 0x2e, - 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x64, 0x6f, 0x65, 0x73, 0x20, 0x6e, - 0x6f, 0x74, 0x20, 0x73, 0x75, 0x70, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x73, - 0x74, 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, 0x67, 0x0a, 0x2f, 0x2f, 0x0a, - 0x2f, 0x2f, 0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x3a, 0x0a, - 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x6c, - 0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x70, - 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x2e, 0x74, 0x68, 0x65, 0x6e, 0x28, - 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x20, 0x3d, 0x3e, - 0x20, 0x7b, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69, - 0x74, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a, - 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x2f, 0x2f, - 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6f, 0x72, 0x0a, 0x2f, - 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, - 0x73, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d, - 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, - 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x70, 0x72, 0x6f, 0x6d, - 0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64, - 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69, 0x74, - 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a, 0x2f, - 0x2f, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, - 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d, - 0x69, 0x73, 0x65, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, - 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20, 0x3d, 0x20, - 0x7b, 0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x20, 0x3d, - 0x20, 0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, - 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x50, - 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x61, 0x73, 0x79, 0x6e, 0x63, - 0x20, 0x28, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76, 0x65, 0x2c, 0x20, 0x72, - 0x65, 0x6a, 0x65, 0x63, 0x74, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, - 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, - 0x65, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20, 0x20, - 0x20, 0x20, 0x74, 0x72, 0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, + 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74, 0x69, 0x6d, + 0x69, 0x6e, 0x67, 0x73, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, + 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, + 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68, + 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75, + 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x64, + 0x6f, 0x6e, 0x65, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, + 0x69, 0x6c, 0x3a, 0x20, 0x7b, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, + 0x74, 0x20, 0x7d, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d, + 0x29, 0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, + 0x6e, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, + 0x74, 0x3b, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c, + 0x6c, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65, 0x74, + 0x75, 0x72, 0x6e, 0x20, 0x61, 0x20, 0x70, 0x72, 0x6f, 0x6d, 0x69, 0x73, + 0x65, 0x20, 0x74, 0x68, 0x61, 0x74, 0x20, 0x72, 0x65, 0x73, 0x6f, 0x6c, + 0x76, 0x65, 0x73, 0x20, 0x74, 0x6f, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, + 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x65, 0x64, 0x20, 0x74, 0x65, 0x78, + 0x74, 0x2e, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x64, 0x6f, 0x65, 0x73, + 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x73, 0x75, 0x70, 0x70, 0x6f, 0x72, 0x74, + 0x20, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, 0x67, 0x0a, 0x2f, + 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, + 0x3a, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, + 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x2e, 0x74, 0x68, 0x65, + 0x6e, 0x28, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x20, + 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, + 0x72, 0x69, 0x74, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, + 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, + 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6f, 0x72, + 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, + 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, + 0x20, 0x3d, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x6c, 0x6c, 0x61, + 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x70, 0x72, + 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, + 0x69, 0x74, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, + 0x0a, 0x2f, 0x2f, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, + 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x50, 0x72, + 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72, 0x6f, + 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20, + 0x3d, 0x20, 0x7b, 0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, + 0x20, 0x3d, 0x20, 0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, + 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x6e, 0x65, 0x77, + 0x20, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x61, 0x73, 0x79, + 0x6e, 0x63, 0x20, 0x28, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76, 0x65, 0x2c, + 0x20, 0x72, 0x65, 0x6a, 0x65, 0x63, 0x74, 0x29, 0x20, 0x3d, 0x3e, 0x20, + 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f, + 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x0a, + 0x20, 0x20, 0x20, 0x20, 0x74, 0x72, 0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, + 0x74, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, + 0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, + 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, + 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x29, 0x29, + 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, + 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x68, + 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e, + 0x74, 0x65, 0x6e, 0x74, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x6f, + 0x6c, 0x76, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, + 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x20, 0x63, 0x61, 0x74, 0x63, + 0x68, 0x20, 0x28, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x20, 0x7b, 0x0a, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x6a, 0x65, 0x63, 0x74, + 0x28, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, + 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x7d, 0x29, 0x3b, 0x0a, 0x7d, 0x3b, 0x0a, + 0x0a, 0x2f, 0x2a, 0x2a, 0x0a, 0x20, 0x2a, 0x20, 0x28, 0x64, 0x65, 0x70, + 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, 0x29, 0x0a, 0x20, 0x2a, 0x2f, + 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, + 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x43, 0x6f, 0x6d, 0x70, 0x6c, + 0x65, 0x74, 0x65, 0x20, 0x3d, 0x20, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, + 0x28, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, + 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2c, 0x20, 0x63, 0x61, 0x6c, + 0x6c, 0x62, 0x61, 0x63, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, - 0x20, 0x6f, 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x72, - 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, - 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x29, 0x29, 0x20, 0x7b, - 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, - 0x74, 0x65, 0x6e, 0x74, 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x68, 0x75, 0x6e, - 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, - 0x6e, 0x74, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76, - 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x3b, 0x0a, - 0x20, 0x20, 0x20, 0x20, 0x7d, 0x20, 0x63, 0x61, 0x74, 0x63, 0x68, 0x20, - 0x28, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x6a, 0x65, 0x63, 0x74, 0x28, 0x65, - 0x72, 0x72, 0x6f, 0x72, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, - 0x0a, 0x20, 0x20, 0x7d, 0x29, 0x3b, 0x0a, 0x7d, 0x3b, 0x0a, 0x0a, 0x2f, - 0x2a, 0x2a, 0x0a, 0x20, 0x2a, 0x20, 0x28, 0x64, 0x65, 0x70, 0x72, 0x65, - 0x63, 0x61, 0x74, 0x65, 0x64, 0x29, 0x0a, 0x20, 0x2a, 0x2f, 0x0a, 0x65, - 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, - 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, - 0x65, 0x20, 0x3d, 0x20, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, 0x70, - 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72, - 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2c, 0x20, 0x63, 0x61, 0x6c, 0x6c, 0x62, - 0x61, 0x63, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, - 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28, 0x63, - 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, - 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x61, 0x72, 0x61, - 0x6d, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, - 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x7b, 0x20, 0x63, 0x6f, 0x6e, - 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x20, 0x7d, 0x29, 0x29, 0x20, - 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x61, 0x6c, 0x6c, 0x62, 0x61, - 0x63, 0x6b, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x3b, 0x0a, 0x20, - 0x20, 0x7d, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x47, 0x65, 0x74, - 0x20, 0x74, 0x68, 0x65, 0x20, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x20, 0x69, - 0x6e, 0x66, 0x6f, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x74, 0x68, 0x65, - 0x20, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x20, 0x54, 0x68, 0x69, - 0x73, 0x20, 0x69, 0x73, 0x20, 0x75, 0x73, 0x65, 0x66, 0x75, 0x6c, 0x20, - 0x66, 0x6f, 0x72, 0x20, 0x67, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x20, - 0x74, 0x68, 0x65, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x20, - 0x77, 0x69, 0x6e, 0x64, 0x6f, 0x77, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x73, - 0x6f, 0x20, 0x6f, 0x6e, 0x2e, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, - 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, - 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x49, 0x6e, 0x66, 0x6f, 0x20, 0x3d, 0x20, - 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, 0x29, 0x20, 0x3d, 0x3e, 0x20, - 0x7b, 0x0a, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x21, 0x67, 0x65, 0x6e, - 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, - 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, - 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x70, 0x72, 0x6f, 0x70, 0x73, 0x20, - 0x3d, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x66, 0x65, 0x74, 0x63, - 0x68, 0x28, 0x22, 0x2f, 0x70, 0x72, 0x6f, 0x70, 0x73, 0x22, 0x29, 0x2e, - 0x74, 0x68, 0x65, 0x6e, 0x28, 0x72, 0x20, 0x3d, 0x3e, 0x20, 0x72, 0x2e, - 0x6a, 0x73, 0x6f, 0x6e, 0x28, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, - 0x20, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, - 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x3d, 0x20, 0x70, - 0x72, 0x6f, 0x70, 0x73, 0x2e, 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, - 0x5f, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, - 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x3b, 0x0a, 0x20, 0x20, - 0x7d, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x67, + 0x20, 0x6f, 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x61, + 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, + 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x7b, 0x20, 0x63, + 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x20, 0x7d, 0x29, + 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x61, 0x6c, 0x6c, + 0x62, 0x61, 0x63, 0x6b, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x3b, + 0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x47, + 0x65, 0x74, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6d, 0x6f, 0x64, 0x65, 0x6c, + 0x20, 0x69, 0x6e, 0x66, 0x6f, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x74, + 0x68, 0x65, 0x20, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x20, 0x54, + 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x75, 0x73, 0x65, 0x66, 0x75, + 0x6c, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x67, 0x65, 0x74, 0x74, 0x69, 0x6e, + 0x67, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, + 0x74, 0x20, 0x77, 0x69, 0x6e, 0x64, 0x6f, 0x77, 0x20, 0x61, 0x6e, 0x64, + 0x20, 0x73, 0x6f, 0x20, 0x6f, 0x6e, 0x2e, 0x0a, 0x65, 0x78, 0x70, 0x6f, + 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, + 0x6d, 0x61, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x49, 0x6e, 0x66, 0x6f, 0x20, + 0x3d, 0x20, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, 0x29, 0x20, 0x3d, + 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x21, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, - 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x3b, 0x0a, 0x7d, 0x0a + 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, + 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x70, 0x72, 0x6f, 0x70, + 0x73, 0x20, 0x3d, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x66, 0x65, + 0x74, 0x63, 0x68, 0x28, 0x22, 0x2f, 0x70, 0x72, 0x6f, 0x70, 0x73, 0x22, + 0x29, 0x2e, 0x74, 0x68, 0x65, 0x6e, 0x28, 0x72, 0x20, 0x3d, 0x3e, 0x20, + 0x72, 0x2e, 0x6a, 0x73, 0x6f, 0x6e, 0x28, 0x29, 0x29, 0x3b, 0x0a, 0x20, + 0x20, 0x20, 0x20, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, + 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x3d, + 0x20, 0x70, 0x72, 0x6f, 0x70, 0x73, 0x2e, 0x64, 0x65, 0x66, 0x61, 0x75, + 0x6c, 0x74, 0x5f, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, + 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x3b, 0x0a, + 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, + 0x20, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, + 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x3b, 0x0a, 0x7d, 0x0a }; -unsigned int completion_js_len = 5782; +unsigned int completion_js_len = 5796; diff --git a/examples/server/public/completion.js b/examples/server/public/completion.js index ab38a7b40..835ce6e68 100644 --- a/examples/server/public/completion.js +++ b/examples/server/public/completion.js @@ -96,18 +96,18 @@ export async function* llama(prompt, params = {}, config = {}) { } } if (result.error) { - result.error = JSON.parse(result.error); - if (result.error.content.includes('slot unavailable')) { - // Throw an error to be caught by upstream callers - throw new Error('slot unavailable'); - } else { - console.error(`llama.cpp error: ${result.error.content}`); + try { + result.error = JSON.parse(result.error); + if (result.error.message.includes('slot unavailable')) { + // Throw an error to be caught by upstream callers + throw new Error('slot unavailable'); + } else { + console.error(`llama.cpp error [${result.error.code} - ${result.error.type}]: ${result.error.message}`); + } + } catch(e) { + console.error(`llama.cpp error ${result.error}`) } } - if (result.error) { - result.error = JSON.parse(result.error); - console.error(`llama.cpp error: ${result.error.content}`); - } } } } diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 3951507aa..b63a6f243 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -396,7 +396,7 @@ struct server_queue { // callback functions std::function callback_new_task; std::function callback_finish_multitask; - std::function callback_run_slots; + std::function callback_update_slots; // Add a new task to the end of the queue int post(server_task task) { @@ -435,8 +435,8 @@ struct server_queue { } // Register the function to be called when all slots data is ready to be processed - void on_run_slots(std::function callback) { - callback_run_slots = std::move(callback); + void on_update_slots(std::function callback) { + callback_update_slots = std::move(callback); } // Call when the state of one slot is changed @@ -461,7 +461,7 @@ struct server_queue { * - Wait until a new task arrives * - Process the task (i.e. maybe copy data into slot) * - Check if multitask is finished - * - Run all slots + * - Update all slots */ void start_loop() { running = true; @@ -499,9 +499,9 @@ struct server_queue { } // all tasks in the current loop is processed, slots data is now ready - LOG_VERBOSE("callback_run_slots", {}); + LOG_VERBOSE("callback_update_slots", {}); - callback_run_slots(); + callback_update_slots(); LOG_VERBOSE("wait for new task", {}); { @@ -805,9 +805,10 @@ struct server_context { return last_used; } - bool launch_slot_with_data(server_slot & slot, json data) const { + bool launch_slot_with_task(server_slot & slot, const server_task & task) { slot_params default_params; llama_sampling_params default_sparams; + auto & data = task.data; if (data.count("__oaicompat") != 0) { slot.oaicompat = true; @@ -864,10 +865,15 @@ struct server_context { { const auto & prompt = data.find("prompt"); if (prompt == data.end()) { - slot.prompt = ""; + send_error(task, "Either \"prompt\" or \"messages\" must be provided", ERROR_TYPE_INVALID_REQUEST); + return false; } else { slot.prompt = *prompt; } + if (slot.prompt.is_array() && slot.prompt.size() == 0) { + send_error(task, "\"prompt\" cannot be an empty array", ERROR_TYPE_INVALID_REQUEST); + return false; + } } // penalize user-provided tokens @@ -926,6 +932,7 @@ struct server_context { if (logit_bias != data.end() && logit_bias->is_array()) { const int n_vocab = llama_n_vocab(model); for (const auto & el : *logit_bias) { + // TODO: we may want to throw errors here, in case "el" is incorrect if (el.is_array() && el.size() == 2) { float bias; if (el[1].is_number()) { @@ -985,6 +992,11 @@ struct server_context { llama_sampling_free(slot.ctx_sampling); } slot.ctx_sampling = llama_sampling_init(slot.sparams); + if (slot.ctx_sampling == nullptr) { + // for now, the only error that may happen here is invalid grammar + send_error(task, "Failed to parse grammar", ERROR_TYPE_INVALID_REQUEST); + return false; + } llama_set_rng_seed(ctx, slot.params.seed); } @@ -1226,15 +1238,23 @@ struct server_context { }; } - void send_error(const server_task & task, const std::string & error) { - LOG_TEE("task %i - error: %s\n", task.id, error.c_str()); + void send_error(const server_task & task, const std::string & error, const enum error_type type = ERROR_TYPE_SERVER) { + send_error(task.id, task.id_multi, error, type); + } + + void send_error(const server_slot & slot, const std::string & error, const enum error_type type = ERROR_TYPE_SERVER) { + send_error(slot.id_task, slot.id_multi, error, type); + } + + void send_error(const int id_task, const int id_multi, const std::string & error, const enum error_type type = ERROR_TYPE_SERVER) { + LOG_TEE("task %i - error: %s\n", id_task, error.c_str()); server_task_result res; - res.id = task.id; - res.id_multi = task.id_multi; + res.id = id_task; + res.id_multi = id_multi; res.stop = false; res.error = true; - res.data = { { "content", error } }; + res.data = format_error_response(error, type); queue_results.send(res); } @@ -1468,9 +1488,8 @@ struct server_context { slot->infill = task.infill; slot->embedding = task.embedding; - if (!launch_slot_with_data(*slot, task.data)) { - // send error result - send_error(task, "internal_error"); + if (!launch_slot_with_task(*slot, task)) { + LOG_ERROR("error while launching slot", task.data); break; } } break; @@ -1587,7 +1606,7 @@ struct server_context { queue_results.send(result); } - bool update_slots() { + void update_slots() { if (system_need_update) { system_prompt_update(); } @@ -1630,7 +1649,7 @@ struct server_context { kv_cache_clear(); } - return true; + return; } } @@ -1975,8 +1994,7 @@ struct server_context { if (batch.n_tokens == 0) { LOG_VERBOSE("no tokens to decode", {}); - - return true; + return; } LOG_VERBOSE("decoding batch", { @@ -2033,7 +2051,13 @@ struct server_context { if (n_batch == 1 || ret < 0) { // if you get here, it means the KV cache is full - try increasing it via the context size LOG_TEE("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret); - return false; + for (auto & slot : slots) { + slot.state = SLOT_STATE_PROCESSING; + slot.command = SLOT_COMMAND_NONE; + slot.release(); + send_error(slot, "Input prompt is too big compared to KV size. Please try increasing KV size."); + } + break; // break loop of n_batch } LOG_TEE("%s : failed to find free space in the KV cache, retrying with smaller n_batch = %d\n", __func__, n_batch / 2); @@ -2042,12 +2066,12 @@ struct server_context { n_batch /= 2; i -= n_batch; - continue; + continue; // continue loop of n_batch } for (auto & slot : slots) { if (slot.state != SLOT_STATE_PROCESSING || slot.i_batch < (int) i || slot.i_batch >= (int) (i + n_tokens)) { - continue; + continue; // continue loop of slots } // prompt evaluated for embedding @@ -2055,7 +2079,7 @@ struct server_context { send_embedding(slot, batch_view); slot.release(); slot.i_batch = -1; - continue; + continue; // continue loop of slots } completion_token_output result; @@ -2097,9 +2121,7 @@ struct server_context { } } - LOG_VERBOSE("slots updated", {}); - - return true; + LOG_VERBOSE("run slots completed", {}); } json model_meta() const { @@ -2745,32 +2767,32 @@ int main(int argc, char ** argv) { svr->set_logger(log_server_request); - svr->set_exception_handler([](const httplib::Request &, httplib::Response & res, std::exception_ptr ep) { - const char fmt[] = "500 Internal Server Error\n%s"; + auto res_error = [](httplib::Response & res, json error_data) { + json final_response {{"error", error_data}}; + res.set_content(final_response.dump(), "application/json; charset=utf-8"); + res.status = json_value(error_data, "code", 500); + }; - char buf[BUFSIZ]; + svr->set_exception_handler([&res_error](const httplib::Request &, httplib::Response & res, std::exception_ptr ep) { + std::string message; try { std::rethrow_exception(std::move(ep)); - } catch (std::exception &e) { - snprintf(buf, sizeof(buf), fmt, e.what()); + } catch (std::exception & e) { + message = e.what(); } catch (...) { - snprintf(buf, sizeof(buf), fmt, "Unknown Exception"); + message = "Unknown Exception"; } - res.set_content(buf, "text/plain; charset=utf-8"); - res.status = 500; + json formatted_error = format_error_response(message, ERROR_TYPE_SERVER); + LOG_VERBOSE("Got exception", formatted_error); + res_error(res, formatted_error); }); - svr->set_error_handler([](const httplib::Request &, httplib::Response & res) { - if (res.status == 401) { - res.set_content("Unauthorized", "text/plain; charset=utf-8"); - } - if (res.status == 400) { - res.set_content("Invalid request", "text/plain; charset=utf-8"); - } + svr->set_error_handler([&res_error](const httplib::Request &, httplib::Response & res) { if (res.status == 404) { - res.set_content("File Not Found", "text/plain; charset=utf-8"); + res_error(res, format_error_response("File Not Found", ERROR_TYPE_NOT_FOUND)); } + // for other error codes, we skip processing here because it's already done by res_error() }); // set timeouts and change hostname and port @@ -2835,7 +2857,7 @@ int main(int argc, char ** argv) { // Middlewares // - auto middleware_validate_api_key = [&sparams](const httplib::Request & req, httplib::Response & res) { + auto middleware_validate_api_key = [&sparams, &res_error](const httplib::Request & req, httplib::Response & res) { // TODO: should we apply API key to all endpoints, including "/health" and "/models"? static const std::set protected_endpoints = { "/props", @@ -2876,8 +2898,7 @@ int main(int argc, char ** argv) { // API key is invalid or not provided // TODO: make another middleware for CORS related logic res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin")); - res.set_content("Unauthorized: Invalid API Key", "text/plain; charset=utf-8"); - res.status = 401; // Unauthorized + res_error(res, format_error_response("Invalid API Key", ERROR_TYPE_AUTHENTICATION)); LOG_WARNING("Unauthorized: Invalid API Key", {}); @@ -2940,21 +2961,18 @@ int main(int argc, char ** argv) { } case SERVER_STATE_LOADING_MODEL: { - res.set_content(R"({"status": "loading model"})", "application/json"); - res.status = 503; // HTTP Service Unavailable + res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE)); } break; case SERVER_STATE_ERROR: { - res.set_content(R"({"status": "error", "error": "Model failed to load"})", "application/json"); - res.status = 500; // HTTP Internal Server Error + res_error(res, format_error_response("Model failed to load", ERROR_TYPE_SERVER)); } break; } }; const auto handle_slots = [&](const httplib::Request &, httplib::Response & res) { if (!sparams.slots_endpoint) { - res.status = 501; - res.set_content("This server does not support slots endpoint.", "text/plain; charset=utf-8"); + res_error(res, format_error_response("This server does not support slots endpoint.", ERROR_TYPE_NOT_SUPPORTED)); return; } @@ -2978,8 +2996,7 @@ int main(int argc, char ** argv) { const auto handle_metrics = [&](const httplib::Request &, httplib::Response & res) { if (!sparams.metrics_endpoint) { - res.status = 501; - res.set_content("This server does not support metrics endpoint.", "text/plain; charset=utf-8"); + res_error(res, format_error_response("This server does not support metrics endpoint.", ERROR_TYPE_NOT_SUPPORTED)); return; } @@ -3090,7 +3107,7 @@ int main(int argc, char ** argv) { res.set_content(data.dump(), "application/json; charset=utf-8"); }; - const auto handle_completions = [&ctx_server](const httplib::Request & req, httplib::Response & res) { + const auto handle_completions = [&ctx_server, &res_error](const httplib::Request & req, httplib::Response & res) { res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin")); json data = json::parse(req.body); @@ -3105,8 +3122,7 @@ int main(int argc, char ** argv) { if (!result.error && result.stop) { res.set_content(result.data.dump(-1, ' ', false, json::error_handler_t::replace), "application/json; charset=utf-8"); } else { - res.status = 500; - res.set_content(result.data["content"], "text/plain; charset=utf-8"); + res_error(res, result.data); } ctx_server.queue_results.remove_waiting_task_id(id_task); @@ -3186,7 +3202,7 @@ int main(int argc, char ** argv) { res.set_content(models.dump(), "application/json; charset=utf-8"); }; - const auto handle_chat_completions = [&ctx_server, &sparams](const httplib::Request & req, httplib::Response & res) { + const auto handle_chat_completions = [&ctx_server, &sparams, &res_error](const httplib::Request & req, httplib::Response & res) { res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin")); json data = oaicompat_completion_params_parse(ctx_server.model, json::parse(req.body), sparams.chat_template); @@ -3204,8 +3220,7 @@ int main(int argc, char ** argv) { res.set_content(result_oai.dump(-1, ' ', false, json::error_handler_t::replace), "application/json; charset=utf-8"); } else { - res.status = 500; - res.set_content(result.data["content"], "text/plain; charset=utf-8"); + res_error(res, result.data); } ctx_server.queue_results.remove_waiting_task_id(id_task); } else { @@ -3259,7 +3274,7 @@ int main(int argc, char ** argv) { } }; - const auto handle_infill = [&ctx_server](const httplib::Request & req, httplib::Response & res) { + const auto handle_infill = [&ctx_server, &res_error](const httplib::Request & req, httplib::Response & res) { res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin")); json data = json::parse(req.body); @@ -3274,8 +3289,7 @@ int main(int argc, char ** argv) { if (!result.error && result.stop) { res.set_content(result.data.dump(-1, ' ', false, json::error_handler_t::replace), "application/json; charset=utf-8"); } else { - res.status = 404; - res.set_content(result.data["content"], "text/plain; charset=utf-8"); + res_error(res, result.data); } ctx_server.queue_results.remove_waiting_task_id(id_task); @@ -3346,7 +3360,7 @@ int main(int argc, char ** argv) { return res.set_content(data.dump(), "application/json; charset=utf-8"); }; - const auto handle_embeddings = [¶ms, &ctx_server](const httplib::Request & req, httplib::Response & res) { + const auto handle_embeddings = [¶ms, &ctx_server, &res_error](const httplib::Request & req, httplib::Response & res) { res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin")); if (!params.embedding) { res.status = 501; @@ -3375,8 +3389,8 @@ int main(int argc, char ** argv) { std::string content = body["content"]; prompts.push_back(content); } else { - // TODO @ngxson : should return an error here - prompts.push_back(""); + res_error(res, format_error_response("\"input\" or \"content\" must be provided", ERROR_TYPE_INVALID_REQUEST)); + return; } // process all prompts @@ -3392,9 +3406,14 @@ int main(int argc, char ** argv) { // get the result server_task_result result = ctx_server.queue_results.recv(id_task); ctx_server.queue_results.remove_waiting_task_id(id_task); - - // append to the responses - responses.push_back(result.data); + if (!result.error) { + // append to the responses + responses.push_back(result.data); + } else { + // error received, ignore everything else + res_error(res, result.data); + return; + } } // write JSON response @@ -3488,7 +3507,7 @@ int main(int argc, char ** argv) { &server_context::process_single_task, &ctx_server, std::placeholders::_1)); ctx_server.queue_tasks.on_finish_multitask(std::bind( &server_context::on_finish_multitask, &ctx_server, std::placeholders::_1)); - ctx_server.queue_tasks.on_run_slots(std::bind( + ctx_server.queue_tasks.on_update_slots(std::bind( &server_context::update_slots, &ctx_server)); ctx_server.queue_results.on_multitask_update(std::bind( &server_queue::update_multitask, diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index 4e81a255a..98c2b6174 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -801,7 +801,7 @@ async def oai_chat_completions(user_prompt, stream=enable_streaming, seed=seed ) - except openai.error.APIError as e: + except openai.error.AuthenticationError as e: if expect_api_error is not None and expect_api_error: return 401 else: diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index f27af81e9..48aeef4eb 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -14,6 +14,17 @@ using json = nlohmann::json; +// https://community.openai.com/t/openai-chat-list-of-error-codes-and-types/357791/11 +enum error_type { + ERROR_TYPE_INVALID_REQUEST, + ERROR_TYPE_AUTHENTICATION, + ERROR_TYPE_SERVER, + ERROR_TYPE_NOT_FOUND, + ERROR_TYPE_PERMISSION, + ERROR_TYPE_UNAVAILABLE, // custom error + ERROR_TYPE_NOT_SUPPORTED, // custom error +}; + extern bool server_verbose; extern bool server_log_json; @@ -542,3 +553,43 @@ static json format_detokenized_response(const std::string & content) { {"content", content} }; } + +static json format_error_response(const std::string & message, const enum error_type type) { + std::string type_str; + int code = 500; + switch (type) { + case ERROR_TYPE_INVALID_REQUEST: + type_str = "invalid_request_error"; + code = 400; + break; + case ERROR_TYPE_AUTHENTICATION: + type_str = "authentication_error"; + code = 401; + break; + case ERROR_TYPE_NOT_FOUND: + type_str = "not_found_error"; + code = 404; + break; + case ERROR_TYPE_SERVER: + type_str = "server_error"; + code = 500; + break; + case ERROR_TYPE_PERMISSION: + type_str = "permission_error"; + code = 403; + break; + case ERROR_TYPE_NOT_SUPPORTED: + type_str = "not_supported_error"; + code = 501; + break; + case ERROR_TYPE_UNAVAILABLE: + type_str = "unavailable_error"; + code = 503; + break; + } + return json { + {"code", code}, + {"message", message}, + {"type", type_str}, + }; +} From 828defefb66fc8a25404f5de845897145bf34061 Mon Sep 17 00:00:00 2001 From: Jakub N Date: Mon, 11 Mar 2024 14:40:42 +0100 Subject: [PATCH 18/48] Update server docker image URLs (#5997) --- examples/server/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/server/README.md b/examples/server/README.md index 376739051..8f8454aff 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -123,10 +123,10 @@ You can consume the endpoints with Postman or NodeJS with axios library. You can ### Docker ```bash -docker run -p 8080:8080 -v /path/to/models:/models ggerganov/llama.cpp:server -m models/7B/ggml-model.gguf -c 512 --host 0.0.0.0 --port 8080 +docker run -p 8080:8080 -v /path/to/models:/models ghcr.io/ggerganov/llama.cpp:server -m models/7B/ggml-model.gguf -c 512 --host 0.0.0.0 --port 8080 # or, with CUDA: -docker run -p 8080:8080 -v /path/to/models:/models --gpus all ggerganov/llama.cpp:server-cuda -m models/7B/ggml-model.gguf -c 512 --host 0.0.0.0 --port 8080 --n-gpu-layers 99 +docker run -p 8080:8080 -v /path/to/models:/models --gpus all ghcr.io/ggerganov/llama.cpp:server-cuda -m models/7B/ggml-model.gguf -c 512 --host 0.0.0.0 --port 8080 --n-gpu-layers 99 ``` ## Testing with CURL From 83796e62bc9f6caae6228168e359890f51e60fee Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 11 Mar 2024 17:47:47 +0200 Subject: [PATCH 19/48] llama : refactor unicode stuff (#5992) * llama : refactor unicode stuff ggml-ci * unicode : names * make : fix c++ compiler * unicode : names * unicode : straighten tables * zig : fix build * unicode : put nfd normalization behind API ggml-ci * swift : fix build * unicode : add BOM * unicode : add ggml-ci * unicode : pass as cpts as const ref --- CMakeLists.txt | 2 + Makefile | 7 +- Package.swift | 1 + build.zig | 15 +- llama.cpp | 87 +- tests/test-tokenizer-1-bpe.cpp | 4 +- tests/test-tokenizer-1-llama.cpp | 2 +- unicode.cpp | 1672 ++++++++++++++++++++++++++++++ unicode.h | 790 +------------- 9 files changed, 1744 insertions(+), 836 deletions(-) create mode 100644 unicode.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 60b592bf5..7ab13cbd5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1141,6 +1141,8 @@ endif() add_library(llama llama.cpp llama.h + unicode.h + unicode.cpp ) target_include_directories(llama PUBLIC .) diff --git a/Makefile b/Makefile index 6492da8a0..c8fd3f5c5 100644 --- a/Makefile +++ b/Makefile @@ -633,9 +633,12 @@ ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-common.h $(CC) $(CFLAGS) -c $< -o $@ -OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o +unicode.o: unicode.cpp unicode.h + $(CXX) $(CXXFLAGS) -c $< -o $@ -llama.o: llama.cpp ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h +OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o + +llama.o: llama.cpp unicode.h ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h $(CXX) $(CXXFLAGS) -c $< -o $@ COMMON_H_DEPS = common/common.h common/sampling.h common/log.h diff --git a/Package.swift b/Package.swift index b24c9204a..4696ae5e4 100644 --- a/Package.swift +++ b/Package.swift @@ -31,6 +31,7 @@ let package = Package( sources: [ "ggml.c", "llama.cpp", + "unicode.cpp", "ggml-alloc.c", "ggml-backend.c", "ggml-quants.c", diff --git a/build.zig b/build.zig index c0af454dc..90609359b 100644 --- a/build.zig +++ b/build.zig @@ -115,6 +115,7 @@ pub fn build(b: *std.build.Builder) !void { const ggml_alloc = make.obj("ggml-alloc", "ggml-alloc.c"); const ggml_backend = make.obj("ggml-backend", "ggml-backend.c"); const ggml_quants = make.obj("ggml-quants", "ggml-quants.c"); + const unicode = make.obj("unicode", "unicode.cpp"); const llama = make.obj("llama", "llama.cpp"); const buildinfo = make.obj("common", "common/build-info.cpp"); const common = make.obj("common", "common/common.cpp"); @@ -125,14 +126,14 @@ pub fn build(b: *std.build.Builder) !void { const clip = make.obj("clip", "examples/llava/clip.cpp"); const llava = make.obj("llava", "examples/llava/llava.cpp"); - _ = make.exe("main", "examples/main/main.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, sampling, console, grammar_parser }); - _ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo }); - _ = make.exe("perplexity", "examples/perplexity/perplexity.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo }); - _ = make.exe("embedding", "examples/embedding/embedding.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo }); - _ = make.exe("finetune", "examples/finetune/finetune.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, train }); - _ = make.exe("train-text-from-scratch", "examples/train-text-from-scratch/train-text-from-scratch.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, train }); + _ = make.exe("main", "examples/main/main.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, common, buildinfo, sampling, console, grammar_parser }); + _ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, common, buildinfo }); + _ = make.exe("perplexity", "examples/perplexity/perplexity.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, common, buildinfo }); + _ = make.exe("embedding", "examples/embedding/embedding.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, common, buildinfo }); + _ = make.exe("finetune", "examples/finetune/finetune.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, common, buildinfo, train }); + _ = make.exe("train-text-from-scratch", "examples/train-text-from-scratch/train-text-from-scratch.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, common, buildinfo, train }); - const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, sampling, grammar_parser, clip, llava }); + const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, common, buildinfo, sampling, grammar_parser, clip, llava }); if (server.target.isWindows()) { server.linkSystemLibrary("ws2_32"); } diff --git a/llama.cpp b/llama.cpp index f61ea791b..6aa88ce54 100644 --- a/llama.cpp +++ b/llama.cpp @@ -3703,7 +3703,7 @@ static void llm_load_vocab( for (int i = 0; i < n_merges; i++) { const std::string word = gguf_get_arr_str(ctx, merges_keyidx, i); - GGML_ASSERT(codepoints_from_utf8(word).size() > 0); + GGML_ASSERT(unicode_cpts_from_utf8(word).size() > 0); std::string first; std::string second; @@ -3748,7 +3748,7 @@ static void llm_load_vocab( for (uint32_t i = 0; i < n_vocab; i++) { std::string word = gguf_get_arr_str(ctx, token_idx, i); - GGML_ASSERT(codepoints_from_utf8(word).size() > 0); + GGML_ASSERT(unicode_cpts_from_utf8(word).size() > 0); vocab.token_to_id[word] = i; @@ -9340,7 +9340,7 @@ static uint8_t llama_token_to_byte(const llama_vocab& vocab, llama_token id) { } case LLAMA_VOCAB_TYPE_BPE: { GGML_ASSERT(false); - return unicode_to_bytes_bpe(token_data.text); + return unicode_utf8_to_byte(token_data.text); } case LLAMA_VOCAB_TYPE_WPM: { GGML_ASSERT(false); @@ -9365,7 +9365,7 @@ static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch) { } case LLAMA_VOCAB_TYPE_WPM: case LLAMA_VOCAB_TYPE_BPE: { - return vocab.token_to_id.at(bytes_to_unicode_bpe(ch)); + return vocab.token_to_id.at(unicode_byte_to_utf8(ch)); } default: GGML_ASSERT(false); @@ -9705,9 +9705,9 @@ private: bpe_words.reserve(text.size()); bpe_encoded_words.reserve(text.size()); - auto cps = codepoints_from_utf8(text); - for (size_t i = 0; i < cps.size(); ++i) - text_utf.emplace_back(codepoint_to_utf8(cps[i])); + const auto cpts = unicode_cpts_from_utf8(text); + for (size_t i = 0; i < cpts.size(); ++i) + text_utf.emplace_back(unicode_cpt_to_utf8(cpts[i])); for (int i = 0; i < (int)text_utf.size(); i++) { const std::string & utf_char = text_utf[i]; @@ -9757,40 +9757,40 @@ private: } if (!split_condition && !collecting) { - if (codepoint_type(utf_char) == CODEPOINT_TYPE_LETTER || (!token.size() && utf_char == " " && codepoint_type(utf_char_next) == CODEPOINT_TYPE_LETTER)) { + if (unicode_cpt_type(utf_char) == CODEPOINT_TYPE_LETTER || (!token.size() && utf_char == " " && unicode_cpt_type(utf_char_next) == CODEPOINT_TYPE_LETTER)) { collecting_letter = true; collecting = true; } - else if (codepoint_type(utf_char) == CODEPOINT_TYPE_DIGIT || (!token.size() && utf_char == " " && codepoint_type(utf_char_next) == CODEPOINT_TYPE_DIGIT)) { + else if (unicode_cpt_type(utf_char) == CODEPOINT_TYPE_DIGIT || (!token.size() && utf_char == " " && unicode_cpt_type(utf_char_next) == CODEPOINT_TYPE_DIGIT)) { collecting_numeric = true; collecting = true; } else if ( - ((codepoint_type(utf_char) != CODEPOINT_TYPE_LETTER && codepoint_type(utf_char) != CODEPOINT_TYPE_DIGIT) && (codepoint_type(utf_char) != CODEPOINT_TYPE_WHITESPACE)) || - (!token.size() && utf_char == " " && codepoint_type(utf_char_next) != CODEPOINT_TYPE_LETTER && codepoint_type(utf_char_next) != CODEPOINT_TYPE_DIGIT && codepoint_type(utf_char_next) != CODEPOINT_TYPE_WHITESPACE) + ((unicode_cpt_type(utf_char) != CODEPOINT_TYPE_LETTER && unicode_cpt_type(utf_char) != CODEPOINT_TYPE_DIGIT) && (unicode_cpt_type(utf_char) != CODEPOINT_TYPE_WHITESPACE)) || + (!token.size() && utf_char == " " && unicode_cpt_type(utf_char_next) != CODEPOINT_TYPE_LETTER && unicode_cpt_type(utf_char_next) != CODEPOINT_TYPE_DIGIT && unicode_cpt_type(utf_char_next) != CODEPOINT_TYPE_WHITESPACE) ) { collecting_special = true; collecting = true; } - else if (codepoint_type(utf_char) == CODEPOINT_TYPE_WHITESPACE && codepoint_type(utf_char_next) == CODEPOINT_TYPE_WHITESPACE) { + else if (unicode_cpt_type(utf_char) == CODEPOINT_TYPE_WHITESPACE && unicode_cpt_type(utf_char_next) == CODEPOINT_TYPE_WHITESPACE) { collecting_whitespace_lookahead = true; collecting = true; } - else if (codepoint_type(utf_char) == CODEPOINT_TYPE_WHITESPACE) { + else if (unicode_cpt_type(utf_char) == CODEPOINT_TYPE_WHITESPACE) { split_condition = true; } } else if (!split_condition && collecting) { - if (collecting_letter && codepoint_type(utf_char) != CODEPOINT_TYPE_LETTER) { + if (collecting_letter && unicode_cpt_type(utf_char) != CODEPOINT_TYPE_LETTER) { split_condition = true; } - else if (collecting_numeric && codepoint_type(utf_char) != CODEPOINT_TYPE_DIGIT) { + else if (collecting_numeric && unicode_cpt_type(utf_char) != CODEPOINT_TYPE_DIGIT) { split_condition = true; } - else if (collecting_special && (codepoint_type(utf_char) == CODEPOINT_TYPE_LETTER || codepoint_type(utf_char) == CODEPOINT_TYPE_DIGIT || codepoint_type(utf_char) == CODEPOINT_TYPE_WHITESPACE)) { + else if (collecting_special && (unicode_cpt_type(utf_char) == CODEPOINT_TYPE_LETTER || unicode_cpt_type(utf_char) == CODEPOINT_TYPE_DIGIT || unicode_cpt_type(utf_char) == CODEPOINT_TYPE_WHITESPACE)) { split_condition = true; } - else if (collecting_whitespace_lookahead && (codepoint_type(utf_char_next) == CODEPOINT_TYPE_LETTER || codepoint_type(utf_char_next) == CODEPOINT_TYPE_DIGIT)) { + else if (collecting_whitespace_lookahead && (unicode_cpt_type(utf_char_next) == CODEPOINT_TYPE_LETTER || unicode_cpt_type(utf_char_next) == CODEPOINT_TYPE_DIGIT)) { split_condition = true; } } @@ -9819,7 +9819,7 @@ private: for (std::string & word : bpe_words) { std::string encoded_token = ""; for (char & c : word) { - encoded_token += bytes_to_unicode_bpe(c); + encoded_token += unicode_byte_to_utf8(c); } bpe_encoded_words.emplace_back(encoded_token); } @@ -9893,25 +9893,13 @@ struct llm_tokenizer_wpm { } std::vector preprocess(const std::string & text) { - // normalalization form D - std::vector codepoints = codepoints_from_utf8(text); - std::vector nfd_codepoints; - for (uint32_t code : codepoints) { - auto it = nfd_map.equal_range(code); - if (it.first != it.second) { - for (auto jt = it.first; jt != it.second; jt++) { - nfd_codepoints.push_back(jt->second); - } - } else { - nfd_codepoints.push_back(code); - } - } + std::vector cpts_nfd = unicode_cpts_normalize_nfd(unicode_cpts_from_utf8(text)); // strip accents, strip control, uniformize whitespace, // to lowercase, pad chinese characters, pad punctuation std::string new_str = ""; - for (uint32_t code : nfd_codepoints) { - int type = codepoint_type(code); + for (uint32_t code : cpts_nfd) { + int type = unicode_cpt_type(code); if (type == CODEPOINT_TYPE_ACCENT_MARK || type == CODEPOINT_TYPE_CONTROL) { continue; } @@ -9919,7 +9907,7 @@ struct llm_tokenizer_wpm { if (type == CODEPOINT_TYPE_WHITESPACE) { code = ' '; } - std::string s = codepoint_to_utf8(code); + std::string s = unicode_cpt_to_utf8(code); if (type == CODEPOINT_TYPE_PUNCTUATION || is_ascii_punct(code) || is_chinese_char(code)) { new_str += " "; new_str += s; @@ -9939,8 +9927,7 @@ struct llm_tokenizer_wpm { if (r > l) words.push_back(new_str.substr(l, (r - l))); l = r + 1; r = l; - } - else { + } else { r += 1; } } @@ -9964,17 +9951,17 @@ struct llm_tokenizer_wpm { return code < 256 && ispunct(code); } - bool is_chinese_char(uint32_t codepoint) { - if ((codepoint >= 0x4E00 && codepoint <= 0x9FFF) || - (codepoint >= 0x3400 && codepoint <= 0x4DBF) || - (codepoint >= 0x20000 && codepoint <= 0x2A6DF) || - (codepoint >= 0x2A700 && codepoint <= 0x2B73F) || - (codepoint >= 0x2B740 && codepoint <= 0x2B81F) || - (codepoint >= 0x2B920 && codepoint <= 0x2CEAF) || // this should be 0x2B820 but in hf rust code it is 0x2B920 - (codepoint >= 0xF900 && codepoint <= 0xFAFF) || - (codepoint >= 0x2F800 && codepoint <= 0x2FA1F) || - (codepoint >= 0x3000 && codepoint <= 0x303F) || - (codepoint >= 0xFF00 && codepoint <= 0xFFEF)) { + bool is_chinese_char(uint32_t cpt) { + if ((cpt >= 0x4E00 && cpt <= 0x9FFF) || + (cpt >= 0x3400 && cpt <= 0x4DBF) || + (cpt >= 0x20000 && cpt <= 0x2A6DF) || + (cpt >= 0x2A700 && cpt <= 0x2B73F) || + (cpt >= 0x2B740 && cpt <= 0x2B81F) || + (cpt >= 0x2B920 && cpt <= 0x2CEAF) || // this should be 0x2B820 but in hf rust code it is 0x2B920 + (cpt >= 0xF900 && cpt <= 0xFAFF) || + (cpt >= 0x2F800 && cpt <= 0x2FA1F) || + (cpt >= 0x3000 && cpt <= 0x303F) || + (cpt >= 0xFF00 && cpt <= 0xFFEF)) { return true; // NOLINT } return false; @@ -13953,9 +13940,9 @@ int32_t llama_tokenize( static std::string llama_decode_text(const std::string & text) { std::string decoded_text; - auto unicode_sequences = codepoints_from_utf8(text); - for (auto& unicode_sequence : unicode_sequences) { - decoded_text += unicode_to_bytes_bpe(codepoint_to_utf8(unicode_sequence)); + auto unicode_sequences = unicode_cpts_from_utf8(text); + for (auto & unicode_sequence : unicode_sequences) { + decoded_text += unicode_utf8_to_byte(unicode_cpt_to_utf8(unicode_sequence)); } return decoded_text; diff --git a/tests/test-tokenizer-1-bpe.cpp b/tests/test-tokenizer-1-bpe.cpp index 3596ce55a..a0e2caf94 100644 --- a/tests/test-tokenizer-1-bpe.cpp +++ b/tests/test-tokenizer-1-bpe.cpp @@ -64,7 +64,7 @@ int main(int argc, char **argv) { for (int i = 0; i < n_vocab; ++i) { std::string str = llama_detokenize_bpe(ctx, std::vector(1, i)); try { - auto cps = codepoints_from_utf8(str); + auto cps = unicode_cpts_from_utf8(str); std::vector tokens = llama_tokenize(ctx, str, false); std::string check = llama_detokenize_bpe(ctx, tokens); if (check != str) { @@ -97,7 +97,7 @@ int main(int argc, char **argv) { continue; } - std::string str = codepoint_to_utf8(cp); + std::string str = unicode_cpt_to_utf8(cp); std::vector tokens = llama_tokenize(ctx, str, false); std::string check = llama_detokenize_bpe(ctx, tokens); if (cp != 9601 && str != check) { diff --git a/tests/test-tokenizer-1-llama.cpp b/tests/test-tokenizer-1-llama.cpp index 9333f8686..8caf0b24e 100644 --- a/tests/test-tokenizer-1-llama.cpp +++ b/tests/test-tokenizer-1-llama.cpp @@ -85,7 +85,7 @@ int main(int argc, char **argv) { continue; } - std::string str = codepoint_to_utf8(cp); + std::string str = unicode_cpt_to_utf8(cp); std::vector tokens = llama_tokenize(ctx, str, false); std::string check = llama_detokenize_spm(ctx, tokens); if (cp != 9601 && str != check) { diff --git a/unicode.cpp b/unicode.cpp new file mode 100644 index 000000000..7fce6fb34 --- /dev/null +++ b/unicode.cpp @@ -0,0 +1,1672 @@ +#include "unicode.h" + +#include +#include +#include +#include +#include +#include + +static const std::vector> unicode_ranges_digit = { +{0x00000030, 0x00000039}, {0x000000B2, 0x000000B3}, {0x000000B9, 0x000000B9}, {0x00000660, 0x00000669}, +{0x000006F0, 0x000006F9}, {0x000007C0, 0x000007C9}, {0x00000966, 0x0000096F}, {0x000009E6, 0x000009EF}, +{0x00000A66, 0x00000A6F}, {0x00000AE6, 0x00000AEF}, {0x00000B66, 0x00000B6F}, {0x00000BE6, 0x00000BEF}, +{0x00000C66, 0x00000C6F}, {0x00000CE6, 0x00000CEF}, {0x00000D66, 0x00000D6F}, {0x00000DE6, 0x00000DEF}, +{0x00000E50, 0x00000E59}, {0x00000ED0, 0x00000ED9}, {0x00000F20, 0x00000F29}, {0x00001040, 0x00001049}, +{0x00001090, 0x00001099}, {0x00001369, 0x00001371}, {0x000017E0, 0x000017E9}, {0x00001810, 0x00001819}, +{0x00001946, 0x0000194F}, {0x000019D0, 0x000019DA}, {0x00001A80, 0x00001A89}, {0x00001A90, 0x00001A99}, +{0x00001B50, 0x00001B59}, {0x00001BB0, 0x00001BB9}, {0x00001C40, 0x00001C49}, {0x00001C50, 0x00001C59}, +{0x00002070, 0x00002070}, {0x00002074, 0x00002079}, {0x00002080, 0x00002089}, {0x00002460, 0x00002468}, +{0x00002474, 0x0000247C}, {0x00002488, 0x00002490}, {0x000024EA, 0x000024EA}, {0x000024F5, 0x000024FD}, +{0x000024FF, 0x000024FF}, {0x00002776, 0x0000277E}, {0x00002780, 0x00002788}, {0x0000278A, 0x00002792}, +{0x0000A620, 0x0000A629}, {0x0000A8D0, 0x0000A8D9}, {0x0000A900, 0x0000A909}, {0x0000A9D0, 0x0000A9D9}, +{0x0000A9F0, 0x0000A9F9}, {0x0000AA50, 0x0000AA59}, {0x0000ABF0, 0x0000ABF9}, {0x0000FF10, 0x0000FF19}, +{0x000104A0, 0x000104A9}, {0x00010A40, 0x00010A43}, {0x00010D30, 0x00010D39}, {0x00010E60, 0x00010E68}, +{0x00011052, 0x0001105A}, {0x00011066, 0x0001106F}, {0x000110F0, 0x000110F9}, {0x00011136, 0x0001113F}, +{0x000111D0, 0x000111D9}, {0x000112F0, 0x000112F9}, {0x00011450, 0x00011459}, {0x000114D0, 0x000114D9}, +{0x00011650, 0x00011659}, {0x000116C0, 0x000116C9}, {0x00011730, 0x00011739}, {0x000118E0, 0x000118E9}, +{0x00011950, 0x00011959}, {0x00011C50, 0x00011C59}, {0x00011D50, 0x00011D59}, {0x00011DA0, 0x00011DA9}, +{0x00016A60, 0x00016A69}, {0x00016B50, 0x00016B59}, {0x0001D7CE, 0x0001D7FF}, {0x0001E140, 0x0001E149}, +{0x0001E2F0, 0x0001E2F9}, {0x0001E950, 0x0001E959}, {0x0001F100, 0x0001F10A}, {0x0001FBF0, 0x0001FBF9}, +}; + +static const std::vector> unicode_ranges_letter = { +{0x00000041, 0x0000005A}, {0x00000061, 0x0000007A}, {0x000000AA, 0x000000AA}, {0x000000B5, 0x000000B5}, +{0x000000BA, 0x000000BA}, {0x000000C0, 0x000000D6}, {0x000000D8, 0x000000F6}, {0x000000F8, 0x000002C1}, +{0x000002C6, 0x000002D1}, {0x000002E0, 0x000002E4}, {0x000002EC, 0x000002EC}, {0x000002EE, 0x000002EE}, +{0x00000370, 0x00000374}, {0x00000376, 0x00000377}, {0x0000037A, 0x0000037D}, {0x0000037F, 0x0000037F}, +{0x00000386, 0x00000386}, {0x00000388, 0x0000038A}, {0x0000038C, 0x0000038C}, {0x0000038E, 0x000003A1}, +{0x000003A3, 0x000003F5}, {0x000003F7, 0x00000481}, {0x0000048A, 0x0000052F}, {0x00000531, 0x00000556}, +{0x00000559, 0x00000559}, {0x00000560, 0x00000588}, {0x000005D0, 0x000005EA}, {0x000005EF, 0x000005F2}, +{0x00000620, 0x0000064A}, {0x0000066E, 0x0000066F}, {0x00000671, 0x000006D3}, {0x000006D5, 0x000006D5}, +{0x000006E5, 0x000006E6}, {0x000006EE, 0x000006EF}, {0x000006FA, 0x000006FC}, {0x000006FF, 0x000006FF}, +{0x00000710, 0x00000710}, {0x00000712, 0x0000072F}, {0x0000074D, 0x000007A5}, {0x000007B1, 0x000007B1}, +{0x000007CA, 0x000007EA}, {0x000007F4, 0x000007F5}, {0x000007FA, 0x000007FA}, {0x00000800, 0x00000815}, +{0x0000081A, 0x0000081A}, {0x00000824, 0x00000824}, {0x00000828, 0x00000828}, {0x00000840, 0x00000858}, +{0x00000860, 0x0000086A}, {0x000008A0, 0x000008B4}, {0x000008B6, 0x000008C7}, {0x00000904, 0x00000939}, +{0x0000093D, 0x0000093D}, {0x00000950, 0x00000950}, {0x00000958, 0x00000961}, {0x00000971, 0x00000980}, +{0x00000985, 0x0000098C}, {0x0000098F, 0x00000990}, {0x00000993, 0x000009A8}, {0x000009AA, 0x000009B0}, +{0x000009B2, 0x000009B2}, {0x000009B6, 0x000009B9}, {0x000009BD, 0x000009BD}, {0x000009CE, 0x000009CE}, +{0x000009DC, 0x000009DD}, {0x000009DF, 0x000009E1}, {0x000009F0, 0x000009F1}, {0x000009FC, 0x000009FC}, +{0x00000A05, 0x00000A0A}, {0x00000A0F, 0x00000A10}, {0x00000A13, 0x00000A28}, {0x00000A2A, 0x00000A30}, +{0x00000A32, 0x00000A33}, {0x00000A35, 0x00000A36}, {0x00000A38, 0x00000A39}, {0x00000A59, 0x00000A5C}, +{0x00000A5E, 0x00000A5E}, {0x00000A72, 0x00000A74}, {0x00000A85, 0x00000A8D}, {0x00000A8F, 0x00000A91}, +{0x00000A93, 0x00000AA8}, {0x00000AAA, 0x00000AB0}, {0x00000AB2, 0x00000AB3}, {0x00000AB5, 0x00000AB9}, +{0x00000ABD, 0x00000ABD}, {0x00000AD0, 0x00000AD0}, {0x00000AE0, 0x00000AE1}, {0x00000AF9, 0x00000AF9}, +{0x00000B05, 0x00000B0C}, {0x00000B0F, 0x00000B10}, {0x00000B13, 0x00000B28}, {0x00000B2A, 0x00000B30}, +{0x00000B32, 0x00000B33}, {0x00000B35, 0x00000B39}, {0x00000B3D, 0x00000B3D}, {0x00000B5C, 0x00000B5D}, +{0x00000B5F, 0x00000B61}, {0x00000B71, 0x00000B71}, {0x00000B83, 0x00000B83}, {0x00000B85, 0x00000B8A}, +{0x00000B8E, 0x00000B90}, {0x00000B92, 0x00000B95}, {0x00000B99, 0x00000B9A}, {0x00000B9C, 0x00000B9C}, +{0x00000B9E, 0x00000B9F}, {0x00000BA3, 0x00000BA4}, {0x00000BA8, 0x00000BAA}, {0x00000BAE, 0x00000BB9}, +{0x00000BD0, 0x00000BD0}, {0x00000C05, 0x00000C0C}, {0x00000C0E, 0x00000C10}, {0x00000C12, 0x00000C28}, +{0x00000C2A, 0x00000C39}, {0x00000C3D, 0x00000C3D}, {0x00000C58, 0x00000C5A}, {0x00000C60, 0x00000C61}, +{0x00000C80, 0x00000C80}, {0x00000C85, 0x00000C8C}, {0x00000C8E, 0x00000C90}, {0x00000C92, 0x00000CA8}, +{0x00000CAA, 0x00000CB3}, {0x00000CB5, 0x00000CB9}, {0x00000CBD, 0x00000CBD}, {0x00000CDE, 0x00000CDE}, +{0x00000CE0, 0x00000CE1}, {0x00000CF1, 0x00000CF2}, {0x00000D04, 0x00000D0C}, {0x00000D0E, 0x00000D10}, +{0x00000D12, 0x00000D3A}, {0x00000D3D, 0x00000D3D}, {0x00000D4E, 0x00000D4E}, {0x00000D54, 0x00000D56}, +{0x00000D5F, 0x00000D61}, {0x00000D7A, 0x00000D7F}, {0x00000D85, 0x00000D96}, {0x00000D9A, 0x00000DB1}, +{0x00000DB3, 0x00000DBB}, {0x00000DBD, 0x00000DBD}, {0x00000DC0, 0x00000DC6}, {0x00000E01, 0x00000E30}, +{0x00000E32, 0x00000E33}, {0x00000E40, 0x00000E46}, {0x00000E81, 0x00000E82}, {0x00000E84, 0x00000E84}, +{0x00000E86, 0x00000E8A}, {0x00000E8C, 0x00000EA3}, {0x00000EA5, 0x00000EA5}, {0x00000EA7, 0x00000EB0}, +{0x00000EB2, 0x00000EB3}, {0x00000EBD, 0x00000EBD}, {0x00000EC0, 0x00000EC4}, {0x00000EC6, 0x00000EC6}, +{0x00000EDC, 0x00000EDF}, {0x00000F00, 0x00000F00}, {0x00000F40, 0x00000F47}, {0x00000F49, 0x00000F6C}, +{0x00000F88, 0x00000F8C}, {0x00001000, 0x0000102A}, {0x0000103F, 0x0000103F}, {0x00001050, 0x00001055}, +{0x0000105A, 0x0000105D}, {0x00001061, 0x00001061}, {0x00001065, 0x00001066}, {0x0000106E, 0x00001070}, +{0x00001075, 0x00001081}, {0x0000108E, 0x0000108E}, {0x000010A0, 0x000010C5}, {0x000010C7, 0x000010C7}, +{0x000010CD, 0x000010CD}, {0x000010D0, 0x000010FA}, {0x000010FC, 0x00001248}, {0x0000124A, 0x0000124D}, +{0x00001250, 0x00001256}, {0x00001258, 0x00001258}, {0x0000125A, 0x0000125D}, {0x00001260, 0x00001288}, +{0x0000128A, 0x0000128D}, {0x00001290, 0x000012B0}, {0x000012B2, 0x000012B5}, {0x000012B8, 0x000012BE}, +{0x000012C0, 0x000012C0}, {0x000012C2, 0x000012C5}, {0x000012C8, 0x000012D6}, {0x000012D8, 0x00001310}, +{0x00001312, 0x00001315}, {0x00001318, 0x0000135A}, {0x00001380, 0x0000138F}, {0x000013A0, 0x000013F5}, +{0x000013F8, 0x000013FD}, {0x00001401, 0x0000166C}, {0x0000166F, 0x0000167F}, {0x00001681, 0x0000169A}, +{0x000016A0, 0x000016EA}, {0x000016F1, 0x000016F8}, {0x00001700, 0x0000170C}, {0x0000170E, 0x00001711}, +{0x00001720, 0x00001731}, {0x00001740, 0x00001751}, {0x00001760, 0x0000176C}, {0x0000176E, 0x00001770}, +{0x00001780, 0x000017B3}, {0x000017D7, 0x000017D7}, {0x000017DC, 0x000017DC}, {0x00001820, 0x00001878}, +{0x00001880, 0x00001884}, {0x00001887, 0x000018A8}, {0x000018AA, 0x000018AA}, {0x000018B0, 0x000018F5}, +{0x00001900, 0x0000191E}, {0x00001950, 0x0000196D}, {0x00001970, 0x00001974}, {0x00001980, 0x000019AB}, +{0x000019B0, 0x000019C9}, {0x00001A00, 0x00001A16}, {0x00001A20, 0x00001A54}, {0x00001AA7, 0x00001AA7}, +{0x00001B05, 0x00001B33}, {0x00001B45, 0x00001B4B}, {0x00001B83, 0x00001BA0}, {0x00001BAE, 0x00001BAF}, +{0x00001BBA, 0x00001BE5}, {0x00001C00, 0x00001C23}, {0x00001C4D, 0x00001C4F}, {0x00001C5A, 0x00001C7D}, +{0x00001C80, 0x00001C88}, {0x00001C90, 0x00001CBA}, {0x00001CBD, 0x00001CBF}, {0x00001CE9, 0x00001CEC}, +{0x00001CEE, 0x00001CF3}, {0x00001CF5, 0x00001CF6}, {0x00001CFA, 0x00001CFA}, {0x00001D00, 0x00001DBF}, +{0x00001E00, 0x00001F15}, {0x00001F18, 0x00001F1D}, {0x00001F20, 0x00001F45}, {0x00001F48, 0x00001F4D}, +{0x00001F50, 0x00001F57}, {0x00001F59, 0x00001F59}, {0x00001F5B, 0x00001F5B}, {0x00001F5D, 0x00001F5D}, +{0x00001F5F, 0x00001F7D}, {0x00001F80, 0x00001FB4}, {0x00001FB6, 0x00001FBC}, {0x00001FBE, 0x00001FBE}, +{0x00001FC2, 0x00001FC4}, {0x00001FC6, 0x00001FCC}, {0x00001FD0, 0x00001FD3}, {0x00001FD6, 0x00001FDB}, +{0x00001FE0, 0x00001FEC}, {0x00001FF2, 0x00001FF4}, {0x00001FF6, 0x00001FFC}, {0x00002071, 0x00002071}, +{0x0000207F, 0x0000207F}, {0x00002090, 0x0000209C}, {0x00002102, 0x00002102}, {0x00002107, 0x00002107}, +{0x0000210A, 0x00002113}, {0x00002115, 0x00002115}, {0x00002119, 0x0000211D}, {0x00002124, 0x00002124}, +{0x00002126, 0x00002126}, {0x00002128, 0x00002128}, {0x0000212A, 0x0000212D}, {0x0000212F, 0x00002139}, +{0x0000213C, 0x0000213F}, {0x00002145, 0x00002149}, {0x0000214E, 0x0000214E}, {0x00002183, 0x00002184}, +{0x00002C00, 0x00002C2E}, {0x00002C30, 0x00002C5E}, {0x00002C60, 0x00002CE4}, {0x00002CEB, 0x00002CEE}, +{0x00002CF2, 0x00002CF3}, {0x00002D00, 0x00002D25}, {0x00002D27, 0x00002D27}, {0x00002D2D, 0x00002D2D}, +{0x00002D30, 0x00002D67}, {0x00002D6F, 0x00002D6F}, {0x00002D80, 0x00002D96}, {0x00002DA0, 0x00002DA6}, +{0x00002DA8, 0x00002DAE}, {0x00002DB0, 0x00002DB6}, {0x00002DB8, 0x00002DBE}, {0x00002DC0, 0x00002DC6}, +{0x00002DC8, 0x00002DCE}, {0x00002DD0, 0x00002DD6}, {0x00002DD8, 0x00002DDE}, {0x00002E2F, 0x00002E2F}, +{0x00003005, 0x00003006}, {0x00003031, 0x00003035}, {0x0000303B, 0x0000303C}, {0x00003041, 0x00003096}, +{0x0000309D, 0x0000309F}, {0x000030A1, 0x000030FA}, {0x000030FC, 0x000030FF}, {0x00003105, 0x0000312F}, +{0x00003131, 0x0000318E}, {0x000031A0, 0x000031BF}, {0x000031F0, 0x000031FF}, {0x00003400, 0x00004DBF}, +{0x00004E00, 0x00009FFC}, {0x0000A000, 0x0000A48C}, {0x0000A4D0, 0x0000A4FD}, {0x0000A500, 0x0000A60C}, +{0x0000A610, 0x0000A61F}, {0x0000A62A, 0x0000A62B}, {0x0000A640, 0x0000A66E}, {0x0000A67F, 0x0000A69D}, +{0x0000A6A0, 0x0000A6E5}, {0x0000A717, 0x0000A71F}, {0x0000A722, 0x0000A788}, {0x0000A78B, 0x0000A7BF}, +{0x0000A7C2, 0x0000A7CA}, {0x0000A7F5, 0x0000A801}, {0x0000A803, 0x0000A805}, {0x0000A807, 0x0000A80A}, +{0x0000A80C, 0x0000A822}, {0x0000A840, 0x0000A873}, {0x0000A882, 0x0000A8B3}, {0x0000A8F2, 0x0000A8F7}, +{0x0000A8FB, 0x0000A8FB}, {0x0000A8FD, 0x0000A8FE}, {0x0000A90A, 0x0000A925}, {0x0000A930, 0x0000A946}, +{0x0000A960, 0x0000A97C}, {0x0000A984, 0x0000A9B2}, {0x0000A9CF, 0x0000A9CF}, {0x0000A9E0, 0x0000A9E4}, +{0x0000A9E6, 0x0000A9EF}, {0x0000A9FA, 0x0000A9FE}, {0x0000AA00, 0x0000AA28}, {0x0000AA40, 0x0000AA42}, +{0x0000AA44, 0x0000AA4B}, {0x0000AA60, 0x0000AA76}, {0x0000AA7A, 0x0000AA7A}, {0x0000AA7E, 0x0000AAAF}, +{0x0000AAB1, 0x0000AAB1}, {0x0000AAB5, 0x0000AAB6}, {0x0000AAB9, 0x0000AABD}, {0x0000AAC0, 0x0000AAC0}, +{0x0000AAC2, 0x0000AAC2}, {0x0000AADB, 0x0000AADD}, {0x0000AAE0, 0x0000AAEA}, {0x0000AAF2, 0x0000AAF4}, +{0x0000AB01, 0x0000AB06}, {0x0000AB09, 0x0000AB0E}, {0x0000AB11, 0x0000AB16}, {0x0000AB20, 0x0000AB26}, +{0x0000AB28, 0x0000AB2E}, {0x0000AB30, 0x0000AB5A}, {0x0000AB5C, 0x0000AB69}, {0x0000AB70, 0x0000ABE2}, +{0x0000AC00, 0x0000D7A3}, {0x0000D7B0, 0x0000D7C6}, {0x0000D7CB, 0x0000D7FB}, {0x0000F900, 0x0000FA6D}, +{0x0000FA70, 0x0000FAD9}, {0x0000FB00, 0x0000FB06}, {0x0000FB13, 0x0000FB17}, {0x0000FB1D, 0x0000FB1D}, +{0x0000FB1F, 0x0000FB28}, {0x0000FB2A, 0x0000FB36}, {0x0000FB38, 0x0000FB3C}, {0x0000FB3E, 0x0000FB3E}, +{0x0000FB40, 0x0000FB41}, {0x0000FB43, 0x0000FB44}, {0x0000FB46, 0x0000FBB1}, {0x0000FBD3, 0x0000FD3D}, +{0x0000FD50, 0x0000FD8F}, {0x0000FD92, 0x0000FDC7}, {0x0000FDF0, 0x0000FDFB}, {0x0000FE70, 0x0000FE74}, +{0x0000FE76, 0x0000FEFC}, {0x0000FF21, 0x0000FF3A}, {0x0000FF41, 0x0000FF5A}, {0x0000FF66, 0x0000FFBE}, +{0x0000FFC2, 0x0000FFC7}, {0x0000FFCA, 0x0000FFCF}, {0x0000FFD2, 0x0000FFD7}, {0x0000FFDA, 0x0000FFDC}, +{0x00010000, 0x0001000B}, {0x0001000D, 0x00010026}, {0x00010028, 0x0001003A}, {0x0001003C, 0x0001003D}, +{0x0001003F, 0x0001004D}, {0x00010050, 0x0001005D}, {0x00010080, 0x000100FA}, {0x00010280, 0x0001029C}, +{0x000102A0, 0x000102D0}, {0x00010300, 0x0001031F}, {0x0001032D, 0x00010340}, {0x00010342, 0x00010349}, +{0x00010350, 0x00010375}, {0x00010380, 0x0001039D}, {0x000103A0, 0x000103C3}, {0x000103C8, 0x000103CF}, +{0x00010400, 0x0001049D}, {0x000104B0, 0x000104D3}, {0x000104D8, 0x000104FB}, {0x00010500, 0x00010527}, +{0x00010530, 0x00010563}, {0x00010600, 0x00010736}, {0x00010740, 0x00010755}, {0x00010760, 0x00010767}, +{0x00010800, 0x00010805}, {0x00010808, 0x00010808}, {0x0001080A, 0x00010835}, {0x00010837, 0x00010838}, +{0x0001083C, 0x0001083C}, {0x0001083F, 0x00010855}, {0x00010860, 0x00010876}, {0x00010880, 0x0001089E}, +{0x000108E0, 0x000108F2}, {0x000108F4, 0x000108F5}, {0x00010900, 0x00010915}, {0x00010920, 0x00010939}, +{0x00010980, 0x000109B7}, {0x000109BE, 0x000109BF}, {0x00010A00, 0x00010A00}, {0x00010A10, 0x00010A13}, +{0x00010A15, 0x00010A17}, {0x00010A19, 0x00010A35}, {0x00010A60, 0x00010A7C}, {0x00010A80, 0x00010A9C}, +{0x00010AC0, 0x00010AC7}, {0x00010AC9, 0x00010AE4}, {0x00010B00, 0x00010B35}, {0x00010B40, 0x00010B55}, +{0x00010B60, 0x00010B72}, {0x00010B80, 0x00010B91}, {0x00010C00, 0x00010C48}, {0x00010C80, 0x00010CB2}, +{0x00010CC0, 0x00010CF2}, {0x00010D00, 0x00010D23}, {0x00010E80, 0x00010EA9}, {0x00010EB0, 0x00010EB1}, +{0x00010F00, 0x00010F1C}, {0x00010F27, 0x00010F27}, {0x00010F30, 0x00010F45}, {0x00010FB0, 0x00010FC4}, +{0x00010FE0, 0x00010FF6}, {0x00011003, 0x00011037}, {0x00011083, 0x000110AF}, {0x000110D0, 0x000110E8}, +{0x00011103, 0x00011126}, {0x00011144, 0x00011144}, {0x00011147, 0x00011147}, {0x00011150, 0x00011172}, +{0x00011176, 0x00011176}, {0x00011183, 0x000111B2}, {0x000111C1, 0x000111C4}, {0x000111DA, 0x000111DA}, +{0x000111DC, 0x000111DC}, {0x00011200, 0x00011211}, {0x00011213, 0x0001122B}, {0x00011280, 0x00011286}, +{0x00011288, 0x00011288}, {0x0001128A, 0x0001128D}, {0x0001128F, 0x0001129D}, {0x0001129F, 0x000112A8}, +{0x000112B0, 0x000112DE}, {0x00011305, 0x0001130C}, {0x0001130F, 0x00011310}, {0x00011313, 0x00011328}, +{0x0001132A, 0x00011330}, {0x00011332, 0x00011333}, {0x00011335, 0x00011339}, {0x0001133D, 0x0001133D}, +{0x00011350, 0x00011350}, {0x0001135D, 0x00011361}, {0x00011400, 0x00011434}, {0x00011447, 0x0001144A}, +{0x0001145F, 0x00011461}, {0x00011480, 0x000114AF}, {0x000114C4, 0x000114C5}, {0x000114C7, 0x000114C7}, +{0x00011580, 0x000115AE}, {0x000115D8, 0x000115DB}, {0x00011600, 0x0001162F}, {0x00011644, 0x00011644}, +{0x00011680, 0x000116AA}, {0x000116B8, 0x000116B8}, {0x00011700, 0x0001171A}, {0x00011800, 0x0001182B}, +{0x000118A0, 0x000118DF}, {0x000118FF, 0x00011906}, {0x00011909, 0x00011909}, {0x0001190C, 0x00011913}, +{0x00011915, 0x00011916}, {0x00011918, 0x0001192F}, {0x0001193F, 0x0001193F}, {0x00011941, 0x00011941}, +{0x000119A0, 0x000119A7}, {0x000119AA, 0x000119D0}, {0x000119E1, 0x000119E1}, {0x000119E3, 0x000119E3}, +{0x00011A00, 0x00011A00}, {0x00011A0B, 0x00011A32}, {0x00011A3A, 0x00011A3A}, {0x00011A50, 0x00011A50}, +{0x00011A5C, 0x00011A89}, {0x00011A9D, 0x00011A9D}, {0x00011AC0, 0x00011AF8}, {0x00011C00, 0x00011C08}, +{0x00011C0A, 0x00011C2E}, {0x00011C40, 0x00011C40}, {0x00011C72, 0x00011C8F}, {0x00011D00, 0x00011D06}, +{0x00011D08, 0x00011D09}, {0x00011D0B, 0x00011D30}, {0x00011D46, 0x00011D46}, {0x00011D60, 0x00011D65}, +{0x00011D67, 0x00011D68}, {0x00011D6A, 0x00011D89}, {0x00011D98, 0x00011D98}, {0x00011EE0, 0x00011EF2}, +{0x00011FB0, 0x00011FB0}, {0x00012000, 0x00012399}, {0x00012480, 0x00012543}, {0x00013000, 0x0001342E}, +{0x00014400, 0x00014646}, {0x00016800, 0x00016A38}, {0x00016A40, 0x00016A5E}, {0x00016AD0, 0x00016AED}, +{0x00016B00, 0x00016B2F}, {0x00016B40, 0x00016B43}, {0x00016B63, 0x00016B77}, {0x00016B7D, 0x00016B8F}, +{0x00016E40, 0x00016E7F}, {0x00016F00, 0x00016F4A}, {0x00016F50, 0x00016F50}, {0x00016F93, 0x00016F9F}, +{0x00016FE0, 0x00016FE1}, {0x00016FE3, 0x00016FE3}, {0x00017000, 0x000187F7}, {0x00018800, 0x00018CD5}, +{0x00018D00, 0x00018D08}, {0x0001B000, 0x0001B11E}, {0x0001B150, 0x0001B152}, {0x0001B164, 0x0001B167}, +{0x0001B170, 0x0001B2FB}, {0x0001BC00, 0x0001BC6A}, {0x0001BC70, 0x0001BC7C}, {0x0001BC80, 0x0001BC88}, +{0x0001BC90, 0x0001BC99}, {0x0001D400, 0x0001D454}, {0x0001D456, 0x0001D49C}, {0x0001D49E, 0x0001D49F}, +{0x0001D4A2, 0x0001D4A2}, {0x0001D4A5, 0x0001D4A6}, {0x0001D4A9, 0x0001D4AC}, {0x0001D4AE, 0x0001D4B9}, +{0x0001D4BB, 0x0001D4BB}, {0x0001D4BD, 0x0001D4C3}, {0x0001D4C5, 0x0001D505}, {0x0001D507, 0x0001D50A}, +{0x0001D50D, 0x0001D514}, {0x0001D516, 0x0001D51C}, {0x0001D51E, 0x0001D539}, {0x0001D53B, 0x0001D53E}, +{0x0001D540, 0x0001D544}, {0x0001D546, 0x0001D546}, {0x0001D54A, 0x0001D550}, {0x0001D552, 0x0001D6A5}, +{0x0001D6A8, 0x0001D6C0}, {0x0001D6C2, 0x0001D6DA}, {0x0001D6DC, 0x0001D6FA}, {0x0001D6FC, 0x0001D714}, +{0x0001D716, 0x0001D734}, {0x0001D736, 0x0001D74E}, {0x0001D750, 0x0001D76E}, {0x0001D770, 0x0001D788}, +{0x0001D78A, 0x0001D7A8}, {0x0001D7AA, 0x0001D7C2}, {0x0001D7C4, 0x0001D7CB}, {0x0001E100, 0x0001E12C}, +{0x0001E137, 0x0001E13D}, {0x0001E14E, 0x0001E14E}, {0x0001E2C0, 0x0001E2EB}, {0x0001E800, 0x0001E8C4}, +{0x0001E900, 0x0001E943}, {0x0001E94B, 0x0001E94B}, {0x0001EE00, 0x0001EE03}, {0x0001EE05, 0x0001EE1F}, +{0x0001EE21, 0x0001EE22}, {0x0001EE24, 0x0001EE24}, {0x0001EE27, 0x0001EE27}, {0x0001EE29, 0x0001EE32}, +{0x0001EE34, 0x0001EE37}, {0x0001EE39, 0x0001EE39}, {0x0001EE3B, 0x0001EE3B}, {0x0001EE42, 0x0001EE42}, +{0x0001EE47, 0x0001EE47}, {0x0001EE49, 0x0001EE49}, {0x0001EE4B, 0x0001EE4B}, {0x0001EE4D, 0x0001EE4F}, +{0x0001EE51, 0x0001EE52}, {0x0001EE54, 0x0001EE54}, {0x0001EE57, 0x0001EE57}, {0x0001EE59, 0x0001EE59}, +{0x0001EE5B, 0x0001EE5B}, {0x0001EE5D, 0x0001EE5D}, {0x0001EE5F, 0x0001EE5F}, {0x0001EE61, 0x0001EE62}, +{0x0001EE64, 0x0001EE64}, {0x0001EE67, 0x0001EE6A}, {0x0001EE6C, 0x0001EE72}, {0x0001EE74, 0x0001EE77}, +{0x0001EE79, 0x0001EE7C}, {0x0001EE7E, 0x0001EE7E}, {0x0001EE80, 0x0001EE89}, {0x0001EE8B, 0x0001EE9B}, +{0x0001EEA1, 0x0001EEA3}, {0x0001EEA5, 0x0001EEA9}, {0x0001EEAB, 0x0001EEBB}, {0x00020000, 0x0002A6DD}, +{0x0002A700, 0x0002B734}, {0x0002B740, 0x0002B81D}, {0x0002B820, 0x0002CEA1}, {0x0002CEB0, 0x0002EBE0}, +{0x0002F800, 0x0002FA1D}, {0x00030000, 0x0003134A}, +}; + +static const std::vector> unicode_ranges_whitespace = { +{0x00000009, 0x0000000D}, {0x0000001C, 0x00000020}, {0x00000085, 0x00000085}, {0x000000A0, 0x000000A0}, +{0x00001680, 0x00001680}, {0x00002000, 0x0000200A}, {0x00002028, 0x00002029}, {0x0000202F, 0x0000202F}, +{0x0000205F, 0x0000205F}, {0x00003000, 0x00003000}, +}; + +static const std::vector> unicode_ranges_accent_mark = { +{0x00000300, 0x0000036F}, {0x00000483, 0x00000489}, {0x00000591, 0x000005BD}, {0x000005BF, 0x000005BF}, +{0x000005C1, 0x000005C2}, {0x000005C4, 0x000005C5}, {0x000005C7, 0x000005C7}, {0x00000610, 0x0000061A}, +{0x0000064B, 0x0000065F}, {0x00000670, 0x00000670}, {0x000006D6, 0x000006DC}, {0x000006DF, 0x000006E4}, +{0x000006E7, 0x000006E8}, {0x000006EA, 0x000006ED}, {0x00000711, 0x00000711}, {0x00000730, 0x0000074A}, +{0x000007A6, 0x000007B0}, {0x000007EB, 0x000007F3}, {0x000007FD, 0x000007FD}, {0x00000816, 0x00000819}, +{0x0000081B, 0x00000823}, {0x00000825, 0x00000827}, {0x00000829, 0x0000082D}, {0x00000859, 0x0000085B}, +{0x000008D3, 0x000008E1}, {0x000008E3, 0x00000903}, {0x0000093A, 0x0000093C}, {0x0000093E, 0x0000094F}, +{0x00000951, 0x00000957}, {0x00000962, 0x00000963}, {0x00000981, 0x00000983}, {0x000009BC, 0x000009BC}, +{0x000009BE, 0x000009C4}, {0x000009C7, 0x000009C8}, {0x000009CB, 0x000009CD}, {0x000009D7, 0x000009D7}, +{0x000009E2, 0x000009E3}, {0x000009FE, 0x000009FE}, {0x00000A01, 0x00000A03}, {0x00000A3C, 0x00000A3C}, +{0x00000A3E, 0x00000A42}, {0x00000A47, 0x00000A48}, {0x00000A4B, 0x00000A4D}, {0x00000A51, 0x00000A51}, +{0x00000A70, 0x00000A71}, {0x00000A75, 0x00000A75}, {0x00000A81, 0x00000A83}, {0x00000ABC, 0x00000ABC}, +{0x00000ABE, 0x00000AC5}, {0x00000AC7, 0x00000AC9}, {0x00000ACB, 0x00000ACD}, {0x00000AE2, 0x00000AE3}, +{0x00000AFA, 0x00000AFF}, {0x00000B01, 0x00000B03}, {0x00000B3C, 0x00000B3C}, {0x00000B3E, 0x00000B44}, +{0x00000B47, 0x00000B48}, {0x00000B4B, 0x00000B4D}, {0x00000B55, 0x00000B57}, {0x00000B62, 0x00000B63}, +{0x00000B82, 0x00000B82}, {0x00000BBE, 0x00000BC2}, {0x00000BC6, 0x00000BC8}, {0x00000BCA, 0x00000BCD}, +{0x00000BD7, 0x00000BD7}, {0x00000C00, 0x00000C04}, {0x00000C3E, 0x00000C44}, {0x00000C46, 0x00000C48}, +{0x00000C4A, 0x00000C4D}, {0x00000C55, 0x00000C56}, {0x00000C62, 0x00000C63}, {0x00000C81, 0x00000C83}, +{0x00000CBC, 0x00000CBC}, {0x00000CBE, 0x00000CC4}, {0x00000CC6, 0x00000CC8}, {0x00000CCA, 0x00000CCD}, +{0x00000CD5, 0x00000CD6}, {0x00000CE2, 0x00000CE3}, {0x00000D00, 0x00000D03}, {0x00000D3B, 0x00000D3C}, +{0x00000D3E, 0x00000D44}, {0x00000D46, 0x00000D48}, {0x00000D4A, 0x00000D4D}, {0x00000D57, 0x00000D57}, +{0x00000D62, 0x00000D63}, {0x00000D81, 0x00000D83}, {0x00000DCA, 0x00000DCA}, {0x00000DCF, 0x00000DD4}, +{0x00000DD6, 0x00000DD6}, {0x00000DD8, 0x00000DDF}, {0x00000DF2, 0x00000DF3}, {0x00000E31, 0x00000E31}, +{0x00000E34, 0x00000E3A}, {0x00000E47, 0x00000E4E}, {0x00000EB1, 0x00000EB1}, {0x00000EB4, 0x00000EBC}, +{0x00000EC8, 0x00000ECD}, {0x00000F18, 0x00000F19}, {0x00000F35, 0x00000F35}, {0x00000F37, 0x00000F37}, +{0x00000F39, 0x00000F39}, {0x00000F3E, 0x00000F3F}, {0x00000F71, 0x00000F84}, {0x00000F86, 0x00000F87}, +{0x00000F8D, 0x00000F97}, {0x00000F99, 0x00000FBC}, {0x00000FC6, 0x00000FC6}, {0x0000102B, 0x0000103E}, +{0x00001056, 0x00001059}, {0x0000105E, 0x00001060}, {0x00001062, 0x00001064}, {0x00001067, 0x0000106D}, +{0x00001071, 0x00001074}, {0x00001082, 0x0000108D}, {0x0000108F, 0x0000108F}, {0x0000109A, 0x0000109D}, +{0x0000135D, 0x0000135F}, {0x00001712, 0x00001714}, {0x00001732, 0x00001734}, {0x00001752, 0x00001753}, +{0x00001772, 0x00001773}, {0x000017B4, 0x000017D3}, {0x000017DD, 0x000017DD}, {0x0000180B, 0x0000180D}, +{0x00001885, 0x00001886}, {0x000018A9, 0x000018A9}, {0x00001920, 0x0000192B}, {0x00001930, 0x0000193B}, +{0x00001A17, 0x00001A1B}, {0x00001A55, 0x00001A5E}, {0x00001A60, 0x00001A7C}, {0x00001A7F, 0x00001A7F}, +{0x00001AB0, 0x00001AC0}, {0x00001B00, 0x00001B04}, {0x00001B34, 0x00001B44}, {0x00001B6B, 0x00001B73}, +{0x00001B80, 0x00001B82}, {0x00001BA1, 0x00001BAD}, {0x00001BE6, 0x00001BF3}, {0x00001C24, 0x00001C37}, +{0x00001CD0, 0x00001CD2}, {0x00001CD4, 0x00001CE8}, {0x00001CED, 0x00001CED}, {0x00001CF4, 0x00001CF4}, +{0x00001CF7, 0x00001CF9}, {0x00001DC0, 0x00001DF9}, {0x00001DFB, 0x00001DFF}, {0x000020D0, 0x000020F0}, +{0x00002CEF, 0x00002CF1}, {0x00002D7F, 0x00002D7F}, {0x00002DE0, 0x00002DFF}, {0x0000302A, 0x0000302F}, +{0x00003099, 0x0000309A}, {0x0000A66F, 0x0000A672}, {0x0000A674, 0x0000A67D}, {0x0000A69E, 0x0000A69F}, +{0x0000A6F0, 0x0000A6F1}, {0x0000A802, 0x0000A802}, {0x0000A806, 0x0000A806}, {0x0000A80B, 0x0000A80B}, +{0x0000A823, 0x0000A827}, {0x0000A82C, 0x0000A82C}, {0x0000A880, 0x0000A881}, {0x0000A8B4, 0x0000A8C5}, +{0x0000A8E0, 0x0000A8F1}, {0x0000A8FF, 0x0000A8FF}, {0x0000A926, 0x0000A92D}, {0x0000A947, 0x0000A953}, +{0x0000A980, 0x0000A983}, {0x0000A9B3, 0x0000A9C0}, {0x0000A9E5, 0x0000A9E5}, {0x0000AA29, 0x0000AA36}, +{0x0000AA43, 0x0000AA43}, {0x0000AA4C, 0x0000AA4D}, {0x0000AA7B, 0x0000AA7D}, {0x0000AAB0, 0x0000AAB0}, +{0x0000AAB2, 0x0000AAB4}, {0x0000AAB7, 0x0000AAB8}, {0x0000AABE, 0x0000AABF}, {0x0000AAC1, 0x0000AAC1}, +{0x0000AAEB, 0x0000AAEF}, {0x0000AAF5, 0x0000AAF6}, {0x0000ABE3, 0x0000ABEA}, {0x0000ABEC, 0x0000ABED}, +{0x0000FB1E, 0x0000FB1E}, {0x0000FE00, 0x0000FE0F}, {0x0000FE20, 0x0000FE2F}, {0x000101FD, 0x000101FD}, +{0x000102E0, 0x000102E0}, {0x00010376, 0x0001037A}, {0x00010A01, 0x00010A03}, {0x00010A05, 0x00010A06}, +{0x00010A0C, 0x00010A0F}, {0x00010A38, 0x00010A3A}, {0x00010A3F, 0x00010A3F}, {0x00010AE5, 0x00010AE6}, +{0x00010D24, 0x00010D27}, {0x00010EAB, 0x00010EAC}, {0x00010F46, 0x00010F50}, {0x00011000, 0x00011002}, +{0x00011038, 0x00011046}, {0x0001107F, 0x00011082}, {0x000110B0, 0x000110BA}, {0x00011100, 0x00011102}, +{0x00011127, 0x00011134}, {0x00011145, 0x00011146}, {0x00011173, 0x00011173}, {0x00011180, 0x00011182}, +{0x000111B3, 0x000111C0}, {0x000111C9, 0x000111CC}, {0x000111CE, 0x000111CF}, {0x0001122C, 0x00011237}, +{0x0001123E, 0x0001123E}, {0x000112DF, 0x000112EA}, {0x00011300, 0x00011303}, {0x0001133B, 0x0001133C}, +{0x0001133E, 0x00011344}, {0x00011347, 0x00011348}, {0x0001134B, 0x0001134D}, {0x00011357, 0x00011357}, +{0x00011362, 0x00011363}, {0x00011366, 0x0001136C}, {0x00011370, 0x00011374}, {0x00011435, 0x00011446}, +{0x0001145E, 0x0001145E}, {0x000114B0, 0x000114C3}, {0x000115AF, 0x000115B5}, {0x000115B8, 0x000115C0}, +{0x000115DC, 0x000115DD}, {0x00011630, 0x00011640}, {0x000116AB, 0x000116B7}, {0x0001171D, 0x0001172B}, +{0x0001182C, 0x0001183A}, {0x00011930, 0x00011935}, {0x00011937, 0x00011938}, {0x0001193B, 0x0001193E}, +{0x00011940, 0x00011940}, {0x00011942, 0x00011943}, {0x000119D1, 0x000119D7}, {0x000119DA, 0x000119E0}, +{0x000119E4, 0x000119E4}, {0x00011A01, 0x00011A0A}, {0x00011A33, 0x00011A39}, {0x00011A3B, 0x00011A3E}, +{0x00011A47, 0x00011A47}, {0x00011A51, 0x00011A5B}, {0x00011A8A, 0x00011A99}, {0x00011C2F, 0x00011C36}, +{0x00011C38, 0x00011C3F}, {0x00011C92, 0x00011CA7}, {0x00011CA9, 0x00011CB6}, {0x00011D31, 0x00011D36}, +{0x00011D3A, 0x00011D3A}, {0x00011D3C, 0x00011D3D}, {0x00011D3F, 0x00011D45}, {0x00011D47, 0x00011D47}, +{0x00011D8A, 0x00011D8E}, {0x00011D90, 0x00011D91}, {0x00011D93, 0x00011D97}, {0x00011EF3, 0x00011EF6}, +{0x00016AF0, 0x00016AF4}, {0x00016B30, 0x00016B36}, {0x00016F4F, 0x00016F4F}, {0x00016F51, 0x00016F87}, +{0x00016F8F, 0x00016F92}, {0x00016FE4, 0x00016FE4}, {0x00016FF0, 0x00016FF1}, {0x0001BC9D, 0x0001BC9E}, +{0x0001D165, 0x0001D169}, {0x0001D16D, 0x0001D172}, {0x0001D17B, 0x0001D182}, {0x0001D185, 0x0001D18B}, +{0x0001D1AA, 0x0001D1AD}, {0x0001D242, 0x0001D244}, {0x0001DA00, 0x0001DA36}, {0x0001DA3B, 0x0001DA6C}, +{0x0001DA75, 0x0001DA75}, {0x0001DA84, 0x0001DA84}, {0x0001DA9B, 0x0001DA9F}, {0x0001DAA1, 0x0001DAAF}, +{0x0001E000, 0x0001E006}, {0x0001E008, 0x0001E018}, {0x0001E01B, 0x0001E021}, {0x0001E023, 0x0001E024}, +{0x0001E026, 0x0001E02A}, {0x0001E130, 0x0001E136}, {0x0001E2EC, 0x0001E2EF}, {0x0001E8D0, 0x0001E8D6}, +{0x0001E944, 0x0001E94A}, {0x000E0100, 0x000E01EF}, +}; + +static const std::vector> unicode_ranges_punctuation = { +{0x00000021, 0x00000023}, {0x00000025, 0x0000002A}, {0x0000002C, 0x0000002F}, {0x0000003A, 0x0000003B}, +{0x0000003F, 0x00000040}, {0x0000005B, 0x0000005D}, {0x0000005F, 0x0000005F}, {0x0000007B, 0x0000007B}, +{0x0000007D, 0x0000007D}, {0x000000A1, 0x000000A1}, {0x000000A7, 0x000000A7}, {0x000000AB, 0x000000AB}, +{0x000000B6, 0x000000B7}, {0x000000BB, 0x000000BB}, {0x000000BF, 0x000000BF}, {0x0000037E, 0x0000037E}, +{0x00000387, 0x00000387}, {0x0000055A, 0x0000055F}, {0x00000589, 0x0000058A}, {0x000005BE, 0x000005BE}, +{0x000005C0, 0x000005C0}, {0x000005C3, 0x000005C3}, {0x000005C6, 0x000005C6}, {0x000005F3, 0x000005F4}, +{0x00000609, 0x0000060A}, {0x0000060C, 0x0000060D}, {0x0000061B, 0x0000061B}, {0x0000061E, 0x0000061F}, +{0x0000066A, 0x0000066D}, {0x000006D4, 0x000006D4}, {0x00000700, 0x0000070D}, {0x000007F7, 0x000007F9}, +{0x00000830, 0x0000083E}, {0x0000085E, 0x0000085E}, {0x00000964, 0x00000965}, {0x00000970, 0x00000970}, +{0x000009FD, 0x000009FD}, {0x00000A76, 0x00000A76}, {0x00000AF0, 0x00000AF0}, {0x00000C77, 0x00000C77}, +{0x00000C84, 0x00000C84}, {0x00000DF4, 0x00000DF4}, {0x00000E4F, 0x00000E4F}, {0x00000E5A, 0x00000E5B}, +{0x00000F04, 0x00000F12}, {0x00000F14, 0x00000F14}, {0x00000F3A, 0x00000F3D}, {0x00000F85, 0x00000F85}, +{0x00000FD0, 0x00000FD4}, {0x00000FD9, 0x00000FDA}, {0x0000104A, 0x0000104F}, {0x000010FB, 0x000010FB}, +{0x00001360, 0x00001368}, {0x00001400, 0x00001400}, {0x0000166E, 0x0000166E}, {0x0000169B, 0x0000169C}, +{0x000016EB, 0x000016ED}, {0x00001735, 0x00001736}, {0x000017D4, 0x000017D6}, {0x000017D8, 0x000017DA}, +{0x00001800, 0x0000180A}, {0x00001944, 0x00001945}, {0x00001A1E, 0x00001A1F}, {0x00001AA0, 0x00001AA6}, +{0x00001AA8, 0x00001AAD}, {0x00001B5A, 0x00001B60}, {0x00001BFC, 0x00001BFF}, {0x00001C3B, 0x00001C3F}, +{0x00001C7E, 0x00001C7F}, {0x00001CC0, 0x00001CC7}, {0x00001CD3, 0x00001CD3}, {0x00002010, 0x00002027}, +{0x00002030, 0x00002043}, {0x00002045, 0x00002051}, {0x00002053, 0x0000205E}, {0x0000207D, 0x0000207E}, +{0x0000208D, 0x0000208E}, {0x00002308, 0x0000230B}, {0x00002329, 0x0000232A}, {0x00002768, 0x00002775}, +{0x000027C5, 0x000027C6}, {0x000027E6, 0x000027EF}, {0x00002983, 0x00002998}, {0x000029D8, 0x000029DB}, +{0x000029FC, 0x000029FD}, {0x00002CF9, 0x00002CFC}, {0x00002CFE, 0x00002CFF}, {0x00002D70, 0x00002D70}, +{0x00002E00, 0x00002E2E}, {0x00002E30, 0x00002E4F}, {0x00002E52, 0x00002E52}, {0x00003001, 0x00003003}, +{0x00003008, 0x00003011}, {0x00003014, 0x0000301F}, {0x00003030, 0x00003030}, {0x0000303D, 0x0000303D}, +{0x000030A0, 0x000030A0}, {0x000030FB, 0x000030FB}, {0x0000A4FE, 0x0000A4FF}, {0x0000A60D, 0x0000A60F}, +{0x0000A673, 0x0000A673}, {0x0000A67E, 0x0000A67E}, {0x0000A6F2, 0x0000A6F7}, {0x0000A874, 0x0000A877}, +{0x0000A8CE, 0x0000A8CF}, {0x0000A8F8, 0x0000A8FA}, {0x0000A8FC, 0x0000A8FC}, {0x0000A92E, 0x0000A92F}, +{0x0000A95F, 0x0000A95F}, {0x0000A9C1, 0x0000A9CD}, {0x0000A9DE, 0x0000A9DF}, {0x0000AA5C, 0x0000AA5F}, +{0x0000AADE, 0x0000AADF}, {0x0000AAF0, 0x0000AAF1}, {0x0000ABEB, 0x0000ABEB}, {0x0000FD3E, 0x0000FD3F}, +{0x0000FE10, 0x0000FE19}, {0x0000FE30, 0x0000FE52}, {0x0000FE54, 0x0000FE61}, {0x0000FE63, 0x0000FE63}, +{0x0000FE68, 0x0000FE68}, {0x0000FE6A, 0x0000FE6B}, {0x0000FF01, 0x0000FF03}, {0x0000FF05, 0x0000FF0A}, +{0x0000FF0C, 0x0000FF0F}, {0x0000FF1A, 0x0000FF1B}, {0x0000FF1F, 0x0000FF20}, {0x0000FF3B, 0x0000FF3D}, +{0x0000FF3F, 0x0000FF3F}, {0x0000FF5B, 0x0000FF5B}, {0x0000FF5D, 0x0000FF5D}, {0x0000FF5F, 0x0000FF65}, +{0x00010100, 0x00010102}, {0x0001039F, 0x0001039F}, {0x000103D0, 0x000103D0}, {0x0001056F, 0x0001056F}, +{0x00010857, 0x00010857}, {0x0001091F, 0x0001091F}, {0x0001093F, 0x0001093F}, {0x00010A50, 0x00010A58}, +{0x00010A7F, 0x00010A7F}, {0x00010AF0, 0x00010AF6}, {0x00010B39, 0x00010B3F}, {0x00010B99, 0x00010B9C}, +{0x00010EAD, 0x00010EAD}, {0x00010F55, 0x00010F59}, {0x00011047, 0x0001104D}, {0x000110BB, 0x000110BC}, +{0x000110BE, 0x000110C1}, {0x00011140, 0x00011143}, {0x00011174, 0x00011175}, {0x000111C5, 0x000111C8}, +{0x000111CD, 0x000111CD}, {0x000111DB, 0x000111DB}, {0x000111DD, 0x000111DF}, {0x00011238, 0x0001123D}, +{0x000112A9, 0x000112A9}, {0x0001144B, 0x0001144F}, {0x0001145A, 0x0001145B}, {0x0001145D, 0x0001145D}, +{0x000114C6, 0x000114C6}, {0x000115C1, 0x000115D7}, {0x00011641, 0x00011643}, {0x00011660, 0x0001166C}, +{0x0001173C, 0x0001173E}, {0x0001183B, 0x0001183B}, {0x00011944, 0x00011946}, {0x000119E2, 0x000119E2}, +{0x00011A3F, 0x00011A46}, {0x00011A9A, 0x00011A9C}, {0x00011A9E, 0x00011AA2}, {0x00011C41, 0x00011C45}, +{0x00011C70, 0x00011C71}, {0x00011EF7, 0x00011EF8}, {0x00011FFF, 0x00011FFF}, {0x00012470, 0x00012474}, +{0x00016A6E, 0x00016A6F}, {0x00016AF5, 0x00016AF5}, {0x00016B37, 0x00016B3B}, {0x00016B44, 0x00016B44}, +{0x00016E97, 0x00016E9A}, {0x00016FE2, 0x00016FE2}, {0x0001BC9F, 0x0001BC9F}, {0x0001DA87, 0x0001DA8B}, +{0x0001E95E, 0x0001E95F}, +}; + +static const std::vector> unicode_ranges_symbol = { +{0x00000024, 0x00000024}, {0x0000002B, 0x0000002B}, {0x0000003C, 0x0000003E}, {0x0000005E, 0x0000005E}, +{0x00000060, 0x00000060}, {0x0000007C, 0x0000007C}, {0x0000007E, 0x0000007E}, {0x000000A2, 0x000000A6}, +{0x000000A8, 0x000000A9}, {0x000000AC, 0x000000AC}, {0x000000AE, 0x000000B1}, {0x000000B4, 0x000000B4}, +{0x000000B8, 0x000000B8}, {0x000000D7, 0x000000D7}, {0x000000F7, 0x000000F7}, {0x000002C2, 0x000002C5}, +{0x000002D2, 0x000002DF}, {0x000002E5, 0x000002EB}, {0x000002ED, 0x000002ED}, {0x000002EF, 0x000002FF}, +{0x00000375, 0x00000375}, {0x00000384, 0x00000385}, {0x000003F6, 0x000003F6}, {0x00000482, 0x00000482}, +{0x0000058D, 0x0000058F}, {0x00000606, 0x00000608}, {0x0000060B, 0x0000060B}, {0x0000060E, 0x0000060F}, +{0x000006DE, 0x000006DE}, {0x000006E9, 0x000006E9}, {0x000006FD, 0x000006FE}, {0x000007F6, 0x000007F6}, +{0x000007FE, 0x000007FF}, {0x000009F2, 0x000009F3}, {0x000009FA, 0x000009FB}, {0x00000AF1, 0x00000AF1}, +{0x00000B70, 0x00000B70}, {0x00000BF3, 0x00000BFA}, {0x00000C7F, 0x00000C7F}, {0x00000D4F, 0x00000D4F}, +{0x00000D79, 0x00000D79}, {0x00000E3F, 0x00000E3F}, {0x00000F01, 0x00000F03}, {0x00000F13, 0x00000F13}, +{0x00000F15, 0x00000F17}, {0x00000F1A, 0x00000F1F}, {0x00000F34, 0x00000F34}, {0x00000F36, 0x00000F36}, +{0x00000F38, 0x00000F38}, {0x00000FBE, 0x00000FC5}, {0x00000FC7, 0x00000FCC}, {0x00000FCE, 0x00000FCF}, +{0x00000FD5, 0x00000FD8}, {0x0000109E, 0x0000109F}, {0x00001390, 0x00001399}, {0x0000166D, 0x0000166D}, +{0x000017DB, 0x000017DB}, {0x00001940, 0x00001940}, {0x000019DE, 0x000019FF}, {0x00001B61, 0x00001B6A}, +{0x00001B74, 0x00001B7C}, {0x00001FBD, 0x00001FBD}, {0x00001FBF, 0x00001FC1}, {0x00001FCD, 0x00001FCF}, +{0x00001FDD, 0x00001FDF}, {0x00001FED, 0x00001FEF}, {0x00001FFD, 0x00001FFE}, {0x00002044, 0x00002044}, +{0x00002052, 0x00002052}, {0x0000207A, 0x0000207C}, {0x0000208A, 0x0000208C}, {0x000020A0, 0x000020BF}, +{0x00002100, 0x00002101}, {0x00002103, 0x00002106}, {0x00002108, 0x00002109}, {0x00002114, 0x00002114}, +{0x00002116, 0x00002118}, {0x0000211E, 0x00002123}, {0x00002125, 0x00002125}, {0x00002127, 0x00002127}, +{0x00002129, 0x00002129}, {0x0000212E, 0x0000212E}, {0x0000213A, 0x0000213B}, {0x00002140, 0x00002144}, +{0x0000214A, 0x0000214D}, {0x0000214F, 0x0000214F}, {0x0000218A, 0x0000218B}, {0x00002190, 0x00002307}, +{0x0000230C, 0x00002328}, {0x0000232B, 0x00002426}, {0x00002440, 0x0000244A}, {0x0000249C, 0x000024E9}, +{0x00002500, 0x00002767}, {0x00002794, 0x000027C4}, {0x000027C7, 0x000027E5}, {0x000027F0, 0x00002982}, +{0x00002999, 0x000029D7}, {0x000029DC, 0x000029FB}, {0x000029FE, 0x00002B73}, {0x00002B76, 0x00002B95}, +{0x00002B97, 0x00002BFF}, {0x00002CE5, 0x00002CEA}, {0x00002E50, 0x00002E51}, {0x00002E80, 0x00002E99}, +{0x00002E9B, 0x00002EF3}, {0x00002F00, 0x00002FD5}, {0x00002FF0, 0x00002FFB}, {0x00003004, 0x00003004}, +{0x00003012, 0x00003013}, {0x00003020, 0x00003020}, {0x00003036, 0x00003037}, {0x0000303E, 0x0000303F}, +{0x0000309B, 0x0000309C}, {0x00003190, 0x00003191}, {0x00003196, 0x0000319F}, {0x000031C0, 0x000031E3}, +{0x00003200, 0x0000321E}, {0x0000322A, 0x00003247}, {0x00003250, 0x00003250}, {0x00003260, 0x0000327F}, +{0x0000328A, 0x000032B0}, {0x000032C0, 0x000033FF}, {0x00004DC0, 0x00004DFF}, {0x0000A490, 0x0000A4C6}, +{0x0000A700, 0x0000A716}, {0x0000A720, 0x0000A721}, {0x0000A789, 0x0000A78A}, {0x0000A828, 0x0000A82B}, +{0x0000A836, 0x0000A839}, {0x0000AA77, 0x0000AA79}, {0x0000AB5B, 0x0000AB5B}, {0x0000AB6A, 0x0000AB6B}, +{0x0000FB29, 0x0000FB29}, {0x0000FBB2, 0x0000FBC1}, {0x0000FDFC, 0x0000FDFD}, {0x0000FE62, 0x0000FE62}, +{0x0000FE64, 0x0000FE66}, {0x0000FE69, 0x0000FE69}, {0x0000FF04, 0x0000FF04}, {0x0000FF0B, 0x0000FF0B}, +{0x0000FF1C, 0x0000FF1E}, {0x0000FF3E, 0x0000FF3E}, {0x0000FF40, 0x0000FF40}, {0x0000FF5C, 0x0000FF5C}, +{0x0000FF5E, 0x0000FF5E}, {0x0000FFE0, 0x0000FFE6}, {0x0000FFE8, 0x0000FFEE}, {0x0000FFFC, 0x0000FFFD}, +{0x00010137, 0x0001013F}, {0x00010179, 0x00010189}, {0x0001018C, 0x0001018E}, {0x00010190, 0x0001019C}, +{0x000101A0, 0x000101A0}, {0x000101D0, 0x000101FC}, {0x00010877, 0x00010878}, {0x00010AC8, 0x00010AC8}, +{0x0001173F, 0x0001173F}, {0x00011FD5, 0x00011FF1}, {0x00016B3C, 0x00016B3F}, {0x00016B45, 0x00016B45}, +{0x0001BC9C, 0x0001BC9C}, {0x0001D000, 0x0001D0F5}, {0x0001D100, 0x0001D126}, {0x0001D129, 0x0001D164}, +{0x0001D16A, 0x0001D16C}, {0x0001D183, 0x0001D184}, {0x0001D18C, 0x0001D1A9}, {0x0001D1AE, 0x0001D1E8}, +{0x0001D200, 0x0001D241}, {0x0001D245, 0x0001D245}, {0x0001D300, 0x0001D356}, {0x0001D6C1, 0x0001D6C1}, +{0x0001D6DB, 0x0001D6DB}, {0x0001D6FB, 0x0001D6FB}, {0x0001D715, 0x0001D715}, {0x0001D735, 0x0001D735}, +{0x0001D74F, 0x0001D74F}, {0x0001D76F, 0x0001D76F}, {0x0001D789, 0x0001D789}, {0x0001D7A9, 0x0001D7A9}, +{0x0001D7C3, 0x0001D7C3}, {0x0001D800, 0x0001D9FF}, {0x0001DA37, 0x0001DA3A}, {0x0001DA6D, 0x0001DA74}, +{0x0001DA76, 0x0001DA83}, {0x0001DA85, 0x0001DA86}, {0x0001E14F, 0x0001E14F}, {0x0001E2FF, 0x0001E2FF}, +{0x0001ECAC, 0x0001ECAC}, {0x0001ECB0, 0x0001ECB0}, {0x0001ED2E, 0x0001ED2E}, {0x0001EEF0, 0x0001EEF1}, +{0x0001F000, 0x0001F02B}, {0x0001F030, 0x0001F093}, {0x0001F0A0, 0x0001F0AE}, {0x0001F0B1, 0x0001F0BF}, +{0x0001F0C1, 0x0001F0CF}, {0x0001F0D1, 0x0001F0F5}, {0x0001F10D, 0x0001F1AD}, {0x0001F1E6, 0x0001F202}, +{0x0001F210, 0x0001F23B}, {0x0001F240, 0x0001F248}, {0x0001F250, 0x0001F251}, {0x0001F260, 0x0001F265}, +{0x0001F300, 0x0001F6D7}, {0x0001F6E0, 0x0001F6EC}, {0x0001F6F0, 0x0001F6FC}, {0x0001F700, 0x0001F773}, +{0x0001F780, 0x0001F7D8}, {0x0001F7E0, 0x0001F7EB}, {0x0001F800, 0x0001F80B}, {0x0001F810, 0x0001F847}, +{0x0001F850, 0x0001F859}, {0x0001F860, 0x0001F887}, {0x0001F890, 0x0001F8AD}, {0x0001F8B0, 0x0001F8B1}, +{0x0001F900, 0x0001F978}, {0x0001F97A, 0x0001F9CB}, {0x0001F9CD, 0x0001FA53}, {0x0001FA60, 0x0001FA6D}, +{0x0001FA70, 0x0001FA74}, {0x0001FA78, 0x0001FA7A}, {0x0001FA80, 0x0001FA86}, {0x0001FA90, 0x0001FAA8}, +{0x0001FAB0, 0x0001FAB6}, {0x0001FAC0, 0x0001FAC2}, {0x0001FAD0, 0x0001FAD6}, {0x0001FB00, 0x0001FB92}, +{0x0001FB94, 0x0001FBCA}, +}; + +static const std::vector> unicode_ranges_control = { +{0x00000000, 0x00000008}, {0x0000000E, 0x0000001B}, {0x0000007F, 0x00000084}, {0x00000086, 0x0000009F}, +{0x000000AD, 0x000000AD}, {0x00000378, 0x00000379}, {0x00000380, 0x00000383}, {0x0000038B, 0x0000038B}, +{0x0000038D, 0x0000038D}, {0x000003A2, 0x000003A2}, {0x00000530, 0x00000530}, {0x00000557, 0x00000558}, +{0x0000058B, 0x0000058C}, {0x00000590, 0x00000590}, {0x000005C8, 0x000005CF}, {0x000005EB, 0x000005EE}, +{0x000005F5, 0x00000605}, {0x0000061C, 0x0000061D}, {0x000006DD, 0x000006DD}, {0x0000070E, 0x0000070F}, +{0x0000074B, 0x0000074C}, {0x000007B2, 0x000007BF}, {0x000007FB, 0x000007FC}, {0x0000082E, 0x0000082F}, +{0x0000083F, 0x0000083F}, {0x0000085C, 0x0000085D}, {0x0000085F, 0x0000085F}, {0x0000086B, 0x0000089F}, +{0x000008B5, 0x000008B5}, {0x000008C8, 0x000008D2}, {0x000008E2, 0x000008E2}, {0x00000984, 0x00000984}, +{0x0000098D, 0x0000098E}, {0x00000991, 0x00000992}, {0x000009A9, 0x000009A9}, {0x000009B1, 0x000009B1}, +{0x000009B3, 0x000009B5}, {0x000009BA, 0x000009BB}, {0x000009C5, 0x000009C6}, {0x000009C9, 0x000009CA}, +{0x000009CF, 0x000009D6}, {0x000009D8, 0x000009DB}, {0x000009DE, 0x000009DE}, {0x000009E4, 0x000009E5}, +{0x000009FF, 0x00000A00}, {0x00000A04, 0x00000A04}, {0x00000A0B, 0x00000A0E}, {0x00000A11, 0x00000A12}, +{0x00000A29, 0x00000A29}, {0x00000A31, 0x00000A31}, {0x00000A34, 0x00000A34}, {0x00000A37, 0x00000A37}, +{0x00000A3A, 0x00000A3B}, {0x00000A3D, 0x00000A3D}, {0x00000A43, 0x00000A46}, {0x00000A49, 0x00000A4A}, +{0x00000A4E, 0x00000A50}, {0x00000A52, 0x00000A58}, {0x00000A5D, 0x00000A5D}, {0x00000A5F, 0x00000A65}, +{0x00000A77, 0x00000A80}, {0x00000A84, 0x00000A84}, {0x00000A8E, 0x00000A8E}, {0x00000A92, 0x00000A92}, +{0x00000AA9, 0x00000AA9}, {0x00000AB1, 0x00000AB1}, {0x00000AB4, 0x00000AB4}, {0x00000ABA, 0x00000ABB}, +{0x00000AC6, 0x00000AC6}, {0x00000ACA, 0x00000ACA}, {0x00000ACE, 0x00000ACF}, {0x00000AD1, 0x00000ADF}, +{0x00000AE4, 0x00000AE5}, {0x00000AF2, 0x00000AF8}, {0x00000B00, 0x00000B00}, {0x00000B04, 0x00000B04}, +{0x00000B0D, 0x00000B0E}, {0x00000B11, 0x00000B12}, {0x00000B29, 0x00000B29}, {0x00000B31, 0x00000B31}, +{0x00000B34, 0x00000B34}, {0x00000B3A, 0x00000B3B}, {0x00000B45, 0x00000B46}, {0x00000B49, 0x00000B4A}, +{0x00000B4E, 0x00000B54}, {0x00000B58, 0x00000B5B}, {0x00000B5E, 0x00000B5E}, {0x00000B64, 0x00000B65}, +{0x00000B78, 0x00000B81}, {0x00000B84, 0x00000B84}, {0x00000B8B, 0x00000B8D}, {0x00000B91, 0x00000B91}, +{0x00000B96, 0x00000B98}, {0x00000B9B, 0x00000B9B}, {0x00000B9D, 0x00000B9D}, {0x00000BA0, 0x00000BA2}, +{0x00000BA5, 0x00000BA7}, {0x00000BAB, 0x00000BAD}, {0x00000BBA, 0x00000BBD}, {0x00000BC3, 0x00000BC5}, +{0x00000BC9, 0x00000BC9}, {0x00000BCE, 0x00000BCF}, {0x00000BD1, 0x00000BD6}, {0x00000BD8, 0x00000BE5}, +{0x00000BFB, 0x00000BFF}, {0x00000C0D, 0x00000C0D}, {0x00000C11, 0x00000C11}, {0x00000C29, 0x00000C29}, +{0x00000C3A, 0x00000C3C}, {0x00000C45, 0x00000C45}, {0x00000C49, 0x00000C49}, {0x00000C4E, 0x00000C54}, +{0x00000C57, 0x00000C57}, {0x00000C5B, 0x00000C5F}, {0x00000C64, 0x00000C65}, {0x00000C70, 0x00000C76}, +{0x00000C8D, 0x00000C8D}, {0x00000C91, 0x00000C91}, {0x00000CA9, 0x00000CA9}, {0x00000CB4, 0x00000CB4}, +{0x00000CBA, 0x00000CBB}, {0x00000CC5, 0x00000CC5}, {0x00000CC9, 0x00000CC9}, {0x00000CCE, 0x00000CD4}, +{0x00000CD7, 0x00000CDD}, {0x00000CDF, 0x00000CDF}, {0x00000CE4, 0x00000CE5}, {0x00000CF0, 0x00000CF0}, +{0x00000CF3, 0x00000CFF}, {0x00000D0D, 0x00000D0D}, {0x00000D11, 0x00000D11}, {0x00000D45, 0x00000D45}, +{0x00000D49, 0x00000D49}, {0x00000D50, 0x00000D53}, {0x00000D64, 0x00000D65}, {0x00000D80, 0x00000D80}, +{0x00000D84, 0x00000D84}, {0x00000D97, 0x00000D99}, {0x00000DB2, 0x00000DB2}, {0x00000DBC, 0x00000DBC}, +{0x00000DBE, 0x00000DBF}, {0x00000DC7, 0x00000DC9}, {0x00000DCB, 0x00000DCE}, {0x00000DD5, 0x00000DD5}, +{0x00000DD7, 0x00000DD7}, {0x00000DE0, 0x00000DE5}, {0x00000DF0, 0x00000DF1}, {0x00000DF5, 0x00000E00}, +{0x00000E3B, 0x00000E3E}, {0x00000E5C, 0x00000E80}, {0x00000E83, 0x00000E83}, {0x00000E85, 0x00000E85}, +{0x00000E8B, 0x00000E8B}, {0x00000EA4, 0x00000EA4}, {0x00000EA6, 0x00000EA6}, {0x00000EBE, 0x00000EBF}, +{0x00000EC5, 0x00000EC5}, {0x00000EC7, 0x00000EC7}, {0x00000ECE, 0x00000ECF}, {0x00000EDA, 0x00000EDB}, +{0x00000EE0, 0x00000EFF}, {0x00000F48, 0x00000F48}, {0x00000F6D, 0x00000F70}, {0x00000F98, 0x00000F98}, +{0x00000FBD, 0x00000FBD}, {0x00000FCD, 0x00000FCD}, {0x00000FDB, 0x00000FFF}, {0x000010C6, 0x000010C6}, +{0x000010C8, 0x000010CC}, {0x000010CE, 0x000010CF}, {0x00001249, 0x00001249}, {0x0000124E, 0x0000124F}, +{0x00001257, 0x00001257}, {0x00001259, 0x00001259}, {0x0000125E, 0x0000125F}, {0x00001289, 0x00001289}, +{0x0000128E, 0x0000128F}, {0x000012B1, 0x000012B1}, {0x000012B6, 0x000012B7}, {0x000012BF, 0x000012BF}, +{0x000012C1, 0x000012C1}, {0x000012C6, 0x000012C7}, {0x000012D7, 0x000012D7}, {0x00001311, 0x00001311}, +{0x00001316, 0x00001317}, {0x0000135B, 0x0000135C}, {0x0000137D, 0x0000137F}, {0x0000139A, 0x0000139F}, +{0x000013F6, 0x000013F7}, {0x000013FE, 0x000013FF}, {0x0000169D, 0x0000169F}, {0x000016F9, 0x000016FF}, +{0x0000170D, 0x0000170D}, {0x00001715, 0x0000171F}, {0x00001737, 0x0000173F}, {0x00001754, 0x0000175F}, +{0x0000176D, 0x0000176D}, {0x00001771, 0x00001771}, {0x00001774, 0x0000177F}, {0x000017DE, 0x000017DF}, +{0x000017EA, 0x000017EF}, {0x000017FA, 0x000017FF}, {0x0000180E, 0x0000180F}, {0x0000181A, 0x0000181F}, +{0x00001879, 0x0000187F}, {0x000018AB, 0x000018AF}, {0x000018F6, 0x000018FF}, {0x0000191F, 0x0000191F}, +{0x0000192C, 0x0000192F}, {0x0000193C, 0x0000193F}, {0x00001941, 0x00001943}, {0x0000196E, 0x0000196F}, +{0x00001975, 0x0000197F}, {0x000019AC, 0x000019AF}, {0x000019CA, 0x000019CF}, {0x000019DB, 0x000019DD}, +{0x00001A1C, 0x00001A1D}, {0x00001A5F, 0x00001A5F}, {0x00001A7D, 0x00001A7E}, {0x00001A8A, 0x00001A8F}, +{0x00001A9A, 0x00001A9F}, {0x00001AAE, 0x00001AAF}, {0x00001AC1, 0x00001AFF}, {0x00001B4C, 0x00001B4F}, +{0x00001B7D, 0x00001B7F}, {0x00001BF4, 0x00001BFB}, {0x00001C38, 0x00001C3A}, {0x00001C4A, 0x00001C4C}, +{0x00001C89, 0x00001C8F}, {0x00001CBB, 0x00001CBC}, {0x00001CC8, 0x00001CCF}, {0x00001CFB, 0x00001CFF}, +{0x00001DFA, 0x00001DFA}, {0x00001F16, 0x00001F17}, {0x00001F1E, 0x00001F1F}, {0x00001F46, 0x00001F47}, +{0x00001F4E, 0x00001F4F}, {0x00001F58, 0x00001F58}, {0x00001F5A, 0x00001F5A}, {0x00001F5C, 0x00001F5C}, +{0x00001F5E, 0x00001F5E}, {0x00001F7E, 0x00001F7F}, {0x00001FB5, 0x00001FB5}, {0x00001FC5, 0x00001FC5}, +{0x00001FD4, 0x00001FD5}, {0x00001FDC, 0x00001FDC}, {0x00001FF0, 0x00001FF1}, {0x00001FF5, 0x00001FF5}, +{0x00001FFF, 0x00001FFF}, {0x0000200B, 0x0000200F}, {0x0000202A, 0x0000202E}, {0x00002060, 0x0000206F}, +{0x00002072, 0x00002073}, {0x0000208F, 0x0000208F}, {0x0000209D, 0x0000209F}, {0x000020C0, 0x000020CF}, +{0x000020F1, 0x000020FF}, {0x0000218C, 0x0000218F}, {0x00002427, 0x0000243F}, {0x0000244B, 0x0000245F}, +{0x00002B74, 0x00002B75}, {0x00002B96, 0x00002B96}, {0x00002C2F, 0x00002C2F}, {0x00002C5F, 0x00002C5F}, +{0x00002CF4, 0x00002CF8}, {0x00002D26, 0x00002D26}, {0x00002D28, 0x00002D2C}, {0x00002D2E, 0x00002D2F}, +{0x00002D68, 0x00002D6E}, {0x00002D71, 0x00002D7E}, {0x00002D97, 0x00002D9F}, {0x00002DA7, 0x00002DA7}, +{0x00002DAF, 0x00002DAF}, {0x00002DB7, 0x00002DB7}, {0x00002DBF, 0x00002DBF}, {0x00002DC7, 0x00002DC7}, +{0x00002DCF, 0x00002DCF}, {0x00002DD7, 0x00002DD7}, {0x00002DDF, 0x00002DDF}, {0x00002E53, 0x00002E7F}, +{0x00002E9A, 0x00002E9A}, {0x00002EF4, 0x00002EFF}, {0x00002FD6, 0x00002FEF}, {0x00002FFC, 0x00002FFF}, +{0x00003040, 0x00003040}, {0x00003097, 0x00003098}, {0x00003100, 0x00003104}, {0x00003130, 0x00003130}, +{0x0000318F, 0x0000318F}, {0x000031E4, 0x000031EF}, {0x0000321F, 0x0000321F}, {0x00009FFD, 0x00009FFF}, +{0x0000A48D, 0x0000A48F}, {0x0000A4C7, 0x0000A4CF}, {0x0000A62C, 0x0000A63F}, {0x0000A6F8, 0x0000A6FF}, +{0x0000A7C0, 0x0000A7C1}, {0x0000A7CB, 0x0000A7F4}, {0x0000A82D, 0x0000A82F}, {0x0000A83A, 0x0000A83F}, +{0x0000A878, 0x0000A87F}, {0x0000A8C6, 0x0000A8CD}, {0x0000A8DA, 0x0000A8DF}, {0x0000A954, 0x0000A95E}, +{0x0000A97D, 0x0000A97F}, {0x0000A9CE, 0x0000A9CE}, {0x0000A9DA, 0x0000A9DD}, {0x0000A9FF, 0x0000A9FF}, +{0x0000AA37, 0x0000AA3F}, {0x0000AA4E, 0x0000AA4F}, {0x0000AA5A, 0x0000AA5B}, {0x0000AAC3, 0x0000AADA}, +{0x0000AAF7, 0x0000AB00}, {0x0000AB07, 0x0000AB08}, {0x0000AB0F, 0x0000AB10}, {0x0000AB17, 0x0000AB1F}, +{0x0000AB27, 0x0000AB27}, {0x0000AB2F, 0x0000AB2F}, {0x0000AB6C, 0x0000AB6F}, {0x0000ABEE, 0x0000ABEF}, +{0x0000ABFA, 0x0000ABFF}, {0x0000D7A4, 0x0000D7AF}, {0x0000D7C7, 0x0000D7CA}, {0x0000D7FC, 0x0000F8FF}, +{0x0000FA6E, 0x0000FA6F}, {0x0000FADA, 0x0000FAFF}, {0x0000FB07, 0x0000FB12}, {0x0000FB18, 0x0000FB1C}, +{0x0000FB37, 0x0000FB37}, {0x0000FB3D, 0x0000FB3D}, {0x0000FB3F, 0x0000FB3F}, {0x0000FB42, 0x0000FB42}, +{0x0000FB45, 0x0000FB45}, {0x0000FBC2, 0x0000FBD2}, {0x0000FD40, 0x0000FD4F}, {0x0000FD90, 0x0000FD91}, +{0x0000FDC8, 0x0000FDEF}, {0x0000FDFE, 0x0000FDFF}, {0x0000FE1A, 0x0000FE1F}, {0x0000FE53, 0x0000FE53}, +{0x0000FE67, 0x0000FE67}, {0x0000FE6C, 0x0000FE6F}, {0x0000FE75, 0x0000FE75}, {0x0000FEFD, 0x0000FF00}, +{0x0000FFBF, 0x0000FFC1}, {0x0000FFC8, 0x0000FFC9}, {0x0000FFD0, 0x0000FFD1}, {0x0000FFD8, 0x0000FFD9}, +{0x0000FFDD, 0x0000FFDF}, {0x0000FFE7, 0x0000FFE7}, {0x0000FFEF, 0x0000FFFB}, {0x0000FFFE, 0x0000FFFF}, +{0x0001000C, 0x0001000C}, {0x00010027, 0x00010027}, {0x0001003B, 0x0001003B}, {0x0001003E, 0x0001003E}, +{0x0001004E, 0x0001004F}, {0x0001005E, 0x0001007F}, {0x000100FB, 0x000100FF}, {0x00010103, 0x00010106}, +{0x00010134, 0x00010136}, {0x0001018F, 0x0001018F}, {0x0001019D, 0x0001019F}, {0x000101A1, 0x000101CF}, +{0x000101FE, 0x0001027F}, {0x0001029D, 0x0001029F}, {0x000102D1, 0x000102DF}, {0x000102FC, 0x000102FF}, +{0x00010324, 0x0001032C}, {0x0001034B, 0x0001034F}, {0x0001037B, 0x0001037F}, {0x0001039E, 0x0001039E}, +{0x000103C4, 0x000103C7}, {0x000103D6, 0x000103FF}, {0x0001049E, 0x0001049F}, {0x000104AA, 0x000104AF}, +{0x000104D4, 0x000104D7}, {0x000104FC, 0x000104FF}, {0x00010528, 0x0001052F}, {0x00010564, 0x0001056E}, +{0x00010570, 0x000105FF}, {0x00010737, 0x0001073F}, {0x00010756, 0x0001075F}, {0x00010768, 0x000107FF}, +{0x00010806, 0x00010807}, {0x00010809, 0x00010809}, {0x00010836, 0x00010836}, {0x00010839, 0x0001083B}, +{0x0001083D, 0x0001083E}, {0x00010856, 0x00010856}, {0x0001089F, 0x000108A6}, {0x000108B0, 0x000108DF}, +{0x000108F3, 0x000108F3}, {0x000108F6, 0x000108FA}, {0x0001091C, 0x0001091E}, {0x0001093A, 0x0001093E}, +{0x00010940, 0x0001097F}, {0x000109B8, 0x000109BB}, {0x000109D0, 0x000109D1}, {0x00010A04, 0x00010A04}, +{0x00010A07, 0x00010A0B}, {0x00010A14, 0x00010A14}, {0x00010A18, 0x00010A18}, {0x00010A36, 0x00010A37}, +{0x00010A3B, 0x00010A3E}, {0x00010A49, 0x00010A4F}, {0x00010A59, 0x00010A5F}, {0x00010AA0, 0x00010ABF}, +{0x00010AE7, 0x00010AEA}, {0x00010AF7, 0x00010AFF}, {0x00010B36, 0x00010B38}, {0x00010B56, 0x00010B57}, +{0x00010B73, 0x00010B77}, {0x00010B92, 0x00010B98}, {0x00010B9D, 0x00010BA8}, {0x00010BB0, 0x00010BFF}, +{0x00010C49, 0x00010C7F}, {0x00010CB3, 0x00010CBF}, {0x00010CF3, 0x00010CF9}, {0x00010D28, 0x00010D2F}, +{0x00010D3A, 0x00010E5F}, {0x00010E7F, 0x00010E7F}, {0x00010EAA, 0x00010EAA}, {0x00010EAE, 0x00010EAF}, +{0x00010EB2, 0x00010EFF}, {0x00010F28, 0x00010F2F}, {0x00010F5A, 0x00010FAF}, {0x00010FCC, 0x00010FDF}, +{0x00010FF7, 0x00010FFF}, {0x0001104E, 0x00011051}, {0x00011070, 0x0001107E}, {0x000110BD, 0x000110BD}, +{0x000110C2, 0x000110CF}, {0x000110E9, 0x000110EF}, {0x000110FA, 0x000110FF}, {0x00011135, 0x00011135}, +{0x00011148, 0x0001114F}, {0x00011177, 0x0001117F}, {0x000111E0, 0x000111E0}, {0x000111F5, 0x000111FF}, +{0x00011212, 0x00011212}, {0x0001123F, 0x0001127F}, {0x00011287, 0x00011287}, {0x00011289, 0x00011289}, +{0x0001128E, 0x0001128E}, {0x0001129E, 0x0001129E}, {0x000112AA, 0x000112AF}, {0x000112EB, 0x000112EF}, +{0x000112FA, 0x000112FF}, {0x00011304, 0x00011304}, {0x0001130D, 0x0001130E}, {0x00011311, 0x00011312}, +{0x00011329, 0x00011329}, {0x00011331, 0x00011331}, {0x00011334, 0x00011334}, {0x0001133A, 0x0001133A}, +{0x00011345, 0x00011346}, {0x00011349, 0x0001134A}, {0x0001134E, 0x0001134F}, {0x00011351, 0x00011356}, +{0x00011358, 0x0001135C}, {0x00011364, 0x00011365}, {0x0001136D, 0x0001136F}, {0x00011375, 0x000113FF}, +{0x0001145C, 0x0001145C}, {0x00011462, 0x0001147F}, {0x000114C8, 0x000114CF}, {0x000114DA, 0x0001157F}, +{0x000115B6, 0x000115B7}, {0x000115DE, 0x000115FF}, {0x00011645, 0x0001164F}, {0x0001165A, 0x0001165F}, +{0x0001166D, 0x0001167F}, {0x000116B9, 0x000116BF}, {0x000116CA, 0x000116FF}, {0x0001171B, 0x0001171C}, +{0x0001172C, 0x0001172F}, {0x00011740, 0x000117FF}, {0x0001183C, 0x0001189F}, {0x000118F3, 0x000118FE}, +{0x00011907, 0x00011908}, {0x0001190A, 0x0001190B}, {0x00011914, 0x00011914}, {0x00011917, 0x00011917}, +{0x00011936, 0x00011936}, {0x00011939, 0x0001193A}, {0x00011947, 0x0001194F}, {0x0001195A, 0x0001199F}, +{0x000119A8, 0x000119A9}, {0x000119D8, 0x000119D9}, {0x000119E5, 0x000119FF}, {0x00011A48, 0x00011A4F}, +{0x00011AA3, 0x00011ABF}, {0x00011AF9, 0x00011BFF}, {0x00011C09, 0x00011C09}, {0x00011C37, 0x00011C37}, +{0x00011C46, 0x00011C4F}, {0x00011C6D, 0x00011C6F}, {0x00011C90, 0x00011C91}, {0x00011CA8, 0x00011CA8}, +{0x00011CB7, 0x00011CFF}, {0x00011D07, 0x00011D07}, {0x00011D0A, 0x00011D0A}, {0x00011D37, 0x00011D39}, +{0x00011D3B, 0x00011D3B}, {0x00011D3E, 0x00011D3E}, {0x00011D48, 0x00011D4F}, {0x00011D5A, 0x00011D5F}, +{0x00011D66, 0x00011D66}, {0x00011D69, 0x00011D69}, {0x00011D8F, 0x00011D8F}, {0x00011D92, 0x00011D92}, +{0x00011D99, 0x00011D9F}, {0x00011DAA, 0x00011EDF}, {0x00011EF9, 0x00011FAF}, {0x00011FB1, 0x00011FBF}, +{0x00011FF2, 0x00011FFE}, {0x0001239A, 0x000123FF}, {0x0001246F, 0x0001246F}, {0x00012475, 0x0001247F}, +{0x00012544, 0x00012FFF}, {0x0001342F, 0x000143FF}, {0x00014647, 0x000167FF}, {0x00016A39, 0x00016A3F}, +{0x00016A5F, 0x00016A5F}, {0x00016A6A, 0x00016A6D}, {0x00016A70, 0x00016ACF}, {0x00016AEE, 0x00016AEF}, +{0x00016AF6, 0x00016AFF}, {0x00016B46, 0x00016B4F}, {0x00016B5A, 0x00016B5A}, {0x00016B62, 0x00016B62}, +{0x00016B78, 0x00016B7C}, {0x00016B90, 0x00016E3F}, {0x00016E9B, 0x00016EFF}, {0x00016F4B, 0x00016F4E}, +{0x00016F88, 0x00016F8E}, {0x00016FA0, 0x00016FDF}, {0x00016FE5, 0x00016FEF}, {0x00016FF2, 0x00016FFF}, +{0x000187F8, 0x000187FF}, {0x00018CD6, 0x00018CFF}, {0x00018D09, 0x0001AFFF}, {0x0001B11F, 0x0001B14F}, +{0x0001B153, 0x0001B163}, {0x0001B168, 0x0001B16F}, {0x0001B2FC, 0x0001BBFF}, {0x0001BC6B, 0x0001BC6F}, +{0x0001BC7D, 0x0001BC7F}, {0x0001BC89, 0x0001BC8F}, {0x0001BC9A, 0x0001BC9B}, {0x0001BCA0, 0x0001CFFF}, +{0x0001D0F6, 0x0001D0FF}, {0x0001D127, 0x0001D128}, {0x0001D173, 0x0001D17A}, {0x0001D1E9, 0x0001D1FF}, +{0x0001D246, 0x0001D2DF}, {0x0001D2F4, 0x0001D2FF}, {0x0001D357, 0x0001D35F}, {0x0001D379, 0x0001D3FF}, +{0x0001D455, 0x0001D455}, {0x0001D49D, 0x0001D49D}, {0x0001D4A0, 0x0001D4A1}, {0x0001D4A3, 0x0001D4A4}, +{0x0001D4A7, 0x0001D4A8}, {0x0001D4AD, 0x0001D4AD}, {0x0001D4BA, 0x0001D4BA}, {0x0001D4BC, 0x0001D4BC}, +{0x0001D4C4, 0x0001D4C4}, {0x0001D506, 0x0001D506}, {0x0001D50B, 0x0001D50C}, {0x0001D515, 0x0001D515}, +{0x0001D51D, 0x0001D51D}, {0x0001D53A, 0x0001D53A}, {0x0001D53F, 0x0001D53F}, {0x0001D545, 0x0001D545}, +{0x0001D547, 0x0001D549}, {0x0001D551, 0x0001D551}, {0x0001D6A6, 0x0001D6A7}, {0x0001D7CC, 0x0001D7CD}, +{0x0001DA8C, 0x0001DA9A}, {0x0001DAA0, 0x0001DAA0}, {0x0001DAB0, 0x0001DFFF}, {0x0001E007, 0x0001E007}, +{0x0001E019, 0x0001E01A}, {0x0001E022, 0x0001E022}, {0x0001E025, 0x0001E025}, {0x0001E02B, 0x0001E0FF}, +{0x0001E12D, 0x0001E12F}, {0x0001E13E, 0x0001E13F}, {0x0001E14A, 0x0001E14D}, {0x0001E150, 0x0001E2BF}, +{0x0001E2FA, 0x0001E2FE}, {0x0001E300, 0x0001E7FF}, {0x0001E8C5, 0x0001E8C6}, {0x0001E8D7, 0x0001E8FF}, +{0x0001E94C, 0x0001E94F}, {0x0001E95A, 0x0001E95D}, {0x0001E960, 0x0001EC70}, {0x0001ECB5, 0x0001ED00}, +{0x0001ED3E, 0x0001EDFF}, {0x0001EE04, 0x0001EE04}, {0x0001EE20, 0x0001EE20}, {0x0001EE23, 0x0001EE23}, +{0x0001EE25, 0x0001EE26}, {0x0001EE28, 0x0001EE28}, {0x0001EE33, 0x0001EE33}, {0x0001EE38, 0x0001EE38}, +{0x0001EE3A, 0x0001EE3A}, {0x0001EE3C, 0x0001EE41}, {0x0001EE43, 0x0001EE46}, {0x0001EE48, 0x0001EE48}, +{0x0001EE4A, 0x0001EE4A}, {0x0001EE4C, 0x0001EE4C}, {0x0001EE50, 0x0001EE50}, {0x0001EE53, 0x0001EE53}, +{0x0001EE55, 0x0001EE56}, {0x0001EE58, 0x0001EE58}, {0x0001EE5A, 0x0001EE5A}, {0x0001EE5C, 0x0001EE5C}, +{0x0001EE5E, 0x0001EE5E}, {0x0001EE60, 0x0001EE60}, {0x0001EE63, 0x0001EE63}, {0x0001EE65, 0x0001EE66}, +{0x0001EE6B, 0x0001EE6B}, {0x0001EE73, 0x0001EE73}, {0x0001EE78, 0x0001EE78}, {0x0001EE7D, 0x0001EE7D}, +{0x0001EE7F, 0x0001EE7F}, {0x0001EE8A, 0x0001EE8A}, {0x0001EE9C, 0x0001EEA0}, {0x0001EEA4, 0x0001EEA4}, +{0x0001EEAA, 0x0001EEAA}, {0x0001EEBC, 0x0001EEEF}, {0x0001EEF2, 0x0001EFFF}, {0x0001F02C, 0x0001F02F}, +{0x0001F094, 0x0001F09F}, {0x0001F0AF, 0x0001F0B0}, {0x0001F0C0, 0x0001F0C0}, {0x0001F0D0, 0x0001F0D0}, +{0x0001F0F6, 0x0001F0FF}, {0x0001F1AE, 0x0001F1E5}, {0x0001F203, 0x0001F20F}, {0x0001F23C, 0x0001F23F}, +{0x0001F249, 0x0001F24F}, {0x0001F252, 0x0001F25F}, {0x0001F266, 0x0001F2FF}, {0x0001F6D8, 0x0001F6DF}, +{0x0001F6ED, 0x0001F6EF}, {0x0001F6FD, 0x0001F6FF}, {0x0001F774, 0x0001F77F}, {0x0001F7D9, 0x0001F7DF}, +{0x0001F7EC, 0x0001F7FF}, {0x0001F80C, 0x0001F80F}, {0x0001F848, 0x0001F84F}, {0x0001F85A, 0x0001F85F}, +{0x0001F888, 0x0001F88F}, {0x0001F8AE, 0x0001F8AF}, {0x0001F8B2, 0x0001F8FF}, {0x0001F979, 0x0001F979}, +{0x0001F9CC, 0x0001F9CC}, {0x0001FA54, 0x0001FA5F}, {0x0001FA6E, 0x0001FA6F}, {0x0001FA75, 0x0001FA77}, +{0x0001FA7B, 0x0001FA7F}, {0x0001FA87, 0x0001FA8F}, {0x0001FAA9, 0x0001FAAF}, {0x0001FAB7, 0x0001FABF}, +{0x0001FAC3, 0x0001FACF}, {0x0001FAD7, 0x0001FAFF}, {0x0001FB93, 0x0001FB93}, {0x0001FBCB, 0x0001FBEF}, +{0x0001FBFA, 0x0001FFFF}, {0x0002A6DE, 0x0002A6FF}, {0x0002B735, 0x0002B73F}, {0x0002B81E, 0x0002B81F}, +{0x0002CEA2, 0x0002CEAF}, {0x0002EBE1, 0x0002F7FF}, {0x0002FA1E, 0x0002FFFF}, {0x0003134B, 0x000E00FF}, +{0x000E01F0, 0x0010FFFF}, +}; + +static const std::multimap unicode_map_nfd = { +{0x000000C0, 0x00000041}, {0x000000C0, 0x00000300}, {0x000000C1, 0x00000041}, {0x000000C1, 0x00000301}, +{0x000000C2, 0x00000041}, {0x000000C2, 0x00000302}, {0x000000C3, 0x00000041}, {0x000000C3, 0x00000303}, +{0x000000C4, 0x00000041}, {0x000000C4, 0x00000308}, {0x000000C5, 0x00000041}, {0x000000C5, 0x0000030A}, +{0x000000C7, 0x00000043}, {0x000000C7, 0x00000327}, {0x000000C8, 0x00000045}, {0x000000C8, 0x00000300}, +{0x000000C9, 0x00000045}, {0x000000C9, 0x00000301}, {0x000000CA, 0x00000045}, {0x000000CA, 0x00000302}, +{0x000000CB, 0x00000045}, {0x000000CB, 0x00000308}, {0x000000CC, 0x00000049}, {0x000000CC, 0x00000300}, +{0x000000CD, 0x00000049}, {0x000000CD, 0x00000301}, {0x000000CE, 0x00000049}, {0x000000CE, 0x00000302}, +{0x000000CF, 0x00000049}, {0x000000CF, 0x00000308}, {0x000000D1, 0x0000004E}, {0x000000D1, 0x00000303}, +{0x000000D2, 0x0000004F}, {0x000000D2, 0x00000300}, {0x000000D3, 0x0000004F}, {0x000000D3, 0x00000301}, +{0x000000D4, 0x0000004F}, {0x000000D4, 0x00000302}, {0x000000D5, 0x0000004F}, {0x000000D5, 0x00000303}, +{0x000000D6, 0x0000004F}, {0x000000D6, 0x00000308}, {0x000000D9, 0x00000055}, {0x000000D9, 0x00000300}, +{0x000000DA, 0x00000055}, {0x000000DA, 0x00000301}, {0x000000DB, 0x00000055}, {0x000000DB, 0x00000302}, +{0x000000DC, 0x00000055}, {0x000000DC, 0x00000308}, {0x000000DD, 0x00000059}, {0x000000DD, 0x00000301}, +{0x000000E0, 0x00000061}, {0x000000E0, 0x00000300}, {0x000000E1, 0x00000061}, {0x000000E1, 0x00000301}, +{0x000000E2, 0x00000061}, {0x000000E2, 0x00000302}, {0x000000E3, 0x00000061}, {0x000000E3, 0x00000303}, +{0x000000E4, 0x00000061}, {0x000000E4, 0x00000308}, {0x000000E5, 0x00000061}, {0x000000E5, 0x0000030A}, +{0x000000E7, 0x00000063}, {0x000000E7, 0x00000327}, {0x000000E8, 0x00000065}, {0x000000E8, 0x00000300}, +{0x000000E9, 0x00000065}, {0x000000E9, 0x00000301}, {0x000000EA, 0x00000065}, {0x000000EA, 0x00000302}, +{0x000000EB, 0x00000065}, {0x000000EB, 0x00000308}, {0x000000EC, 0x00000069}, {0x000000EC, 0x00000300}, +{0x000000ED, 0x00000069}, {0x000000ED, 0x00000301}, {0x000000EE, 0x00000069}, {0x000000EE, 0x00000302}, +{0x000000EF, 0x00000069}, {0x000000EF, 0x00000308}, {0x000000F1, 0x0000006E}, {0x000000F1, 0x00000303}, +{0x000000F2, 0x0000006F}, {0x000000F2, 0x00000300}, {0x000000F3, 0x0000006F}, {0x000000F3, 0x00000301}, +{0x000000F4, 0x0000006F}, {0x000000F4, 0x00000302}, {0x000000F5, 0x0000006F}, {0x000000F5, 0x00000303}, +{0x000000F6, 0x0000006F}, {0x000000F6, 0x00000308}, {0x000000F9, 0x00000075}, {0x000000F9, 0x00000300}, +{0x000000FA, 0x00000075}, {0x000000FA, 0x00000301}, {0x000000FB, 0x00000075}, {0x000000FB, 0x00000302}, +{0x000000FC, 0x00000075}, {0x000000FC, 0x00000308}, {0x000000FD, 0x00000079}, {0x000000FD, 0x00000301}, +{0x000000FF, 0x00000079}, {0x000000FF, 0x00000308}, {0x00000100, 0x00000041}, {0x00000100, 0x00000304}, +{0x00000101, 0x00000061}, {0x00000101, 0x00000304}, {0x00000102, 0x00000041}, {0x00000102, 0x00000306}, +{0x00000103, 0x00000061}, {0x00000103, 0x00000306}, {0x00000104, 0x00000041}, {0x00000104, 0x00000328}, +{0x00000105, 0x00000061}, {0x00000105, 0x00000328}, {0x00000106, 0x00000043}, {0x00000106, 0x00000301}, +{0x00000107, 0x00000063}, {0x00000107, 0x00000301}, {0x00000108, 0x00000043}, {0x00000108, 0x00000302}, +{0x00000109, 0x00000063}, {0x00000109, 0x00000302}, {0x0000010A, 0x00000043}, {0x0000010A, 0x00000307}, +{0x0000010B, 0x00000063}, {0x0000010B, 0x00000307}, {0x0000010C, 0x00000043}, {0x0000010C, 0x0000030C}, +{0x0000010D, 0x00000063}, {0x0000010D, 0x0000030C}, {0x0000010E, 0x00000044}, {0x0000010E, 0x0000030C}, +{0x0000010F, 0x00000064}, {0x0000010F, 0x0000030C}, {0x00000112, 0x00000045}, {0x00000112, 0x00000304}, +{0x00000113, 0x00000065}, {0x00000113, 0x00000304}, {0x00000114, 0x00000045}, {0x00000114, 0x00000306}, +{0x00000115, 0x00000065}, {0x00000115, 0x00000306}, {0x00000116, 0x00000045}, {0x00000116, 0x00000307}, +{0x00000117, 0x00000065}, {0x00000117, 0x00000307}, {0x00000118, 0x00000045}, {0x00000118, 0x00000328}, +{0x00000119, 0x00000065}, {0x00000119, 0x00000328}, {0x0000011A, 0x00000045}, {0x0000011A, 0x0000030C}, +{0x0000011B, 0x00000065}, {0x0000011B, 0x0000030C}, {0x0000011C, 0x00000047}, {0x0000011C, 0x00000302}, +{0x0000011D, 0x00000067}, {0x0000011D, 0x00000302}, {0x0000011E, 0x00000047}, {0x0000011E, 0x00000306}, +{0x0000011F, 0x00000067}, {0x0000011F, 0x00000306}, {0x00000120, 0x00000047}, {0x00000120, 0x00000307}, +{0x00000121, 0x00000067}, {0x00000121, 0x00000307}, {0x00000122, 0x00000047}, {0x00000122, 0x00000327}, +{0x00000123, 0x00000067}, {0x00000123, 0x00000327}, {0x00000124, 0x00000048}, {0x00000124, 0x00000302}, +{0x00000125, 0x00000068}, {0x00000125, 0x00000302}, {0x00000128, 0x00000049}, {0x00000128, 0x00000303}, +{0x00000129, 0x00000069}, {0x00000129, 0x00000303}, {0x0000012A, 0x00000049}, {0x0000012A, 0x00000304}, +{0x0000012B, 0x00000069}, {0x0000012B, 0x00000304}, {0x0000012C, 0x00000049}, {0x0000012C, 0x00000306}, +{0x0000012D, 0x00000069}, {0x0000012D, 0x00000306}, {0x0000012E, 0x00000049}, {0x0000012E, 0x00000328}, +{0x0000012F, 0x00000069}, {0x0000012F, 0x00000328}, {0x00000130, 0x00000049}, {0x00000130, 0x00000307}, +{0x00000134, 0x0000004A}, {0x00000134, 0x00000302}, {0x00000135, 0x0000006A}, {0x00000135, 0x00000302}, +{0x00000136, 0x0000004B}, {0x00000136, 0x00000327}, {0x00000137, 0x0000006B}, {0x00000137, 0x00000327}, +{0x00000139, 0x0000004C}, {0x00000139, 0x00000301}, {0x0000013A, 0x0000006C}, {0x0000013A, 0x00000301}, +{0x0000013B, 0x0000004C}, {0x0000013B, 0x00000327}, {0x0000013C, 0x0000006C}, {0x0000013C, 0x00000327}, +{0x0000013D, 0x0000004C}, {0x0000013D, 0x0000030C}, {0x0000013E, 0x0000006C}, {0x0000013E, 0x0000030C}, +{0x00000143, 0x0000004E}, {0x00000143, 0x00000301}, {0x00000144, 0x0000006E}, {0x00000144, 0x00000301}, +{0x00000145, 0x0000004E}, {0x00000145, 0x00000327}, {0x00000146, 0x0000006E}, {0x00000146, 0x00000327}, +{0x00000147, 0x0000004E}, {0x00000147, 0x0000030C}, {0x00000148, 0x0000006E}, {0x00000148, 0x0000030C}, +{0x0000014C, 0x0000004F}, {0x0000014C, 0x00000304}, {0x0000014D, 0x0000006F}, {0x0000014D, 0x00000304}, +{0x0000014E, 0x0000004F}, {0x0000014E, 0x00000306}, {0x0000014F, 0x0000006F}, {0x0000014F, 0x00000306}, +{0x00000150, 0x0000004F}, {0x00000150, 0x0000030B}, {0x00000151, 0x0000006F}, {0x00000151, 0x0000030B}, +{0x00000154, 0x00000052}, {0x00000154, 0x00000301}, {0x00000155, 0x00000072}, {0x00000155, 0x00000301}, +{0x00000156, 0x00000052}, {0x00000156, 0x00000327}, {0x00000157, 0x00000072}, {0x00000157, 0x00000327}, +{0x00000158, 0x00000052}, {0x00000158, 0x0000030C}, {0x00000159, 0x00000072}, {0x00000159, 0x0000030C}, +{0x0000015A, 0x00000053}, {0x0000015A, 0x00000301}, {0x0000015B, 0x00000073}, {0x0000015B, 0x00000301}, +{0x0000015C, 0x00000053}, {0x0000015C, 0x00000302}, {0x0000015D, 0x00000073}, {0x0000015D, 0x00000302}, +{0x0000015E, 0x00000053}, {0x0000015E, 0x00000327}, {0x0000015F, 0x00000073}, {0x0000015F, 0x00000327}, +{0x00000160, 0x00000053}, {0x00000160, 0x0000030C}, {0x00000161, 0x00000073}, {0x00000161, 0x0000030C}, +{0x00000162, 0x00000054}, {0x00000162, 0x00000327}, {0x00000163, 0x00000074}, {0x00000163, 0x00000327}, +{0x00000164, 0x00000054}, {0x00000164, 0x0000030C}, {0x00000165, 0x00000074}, {0x00000165, 0x0000030C}, +{0x00000168, 0x00000055}, {0x00000168, 0x00000303}, {0x00000169, 0x00000075}, {0x00000169, 0x00000303}, +{0x0000016A, 0x00000055}, {0x0000016A, 0x00000304}, {0x0000016B, 0x00000075}, {0x0000016B, 0x00000304}, +{0x0000016C, 0x00000055}, {0x0000016C, 0x00000306}, {0x0000016D, 0x00000075}, {0x0000016D, 0x00000306}, +{0x0000016E, 0x00000055}, {0x0000016E, 0x0000030A}, {0x0000016F, 0x00000075}, {0x0000016F, 0x0000030A}, +{0x00000170, 0x00000055}, {0x00000170, 0x0000030B}, {0x00000171, 0x00000075}, {0x00000171, 0x0000030B}, +{0x00000172, 0x00000055}, {0x00000172, 0x00000328}, {0x00000173, 0x00000075}, {0x00000173, 0x00000328}, +{0x00000174, 0x00000057}, {0x00000174, 0x00000302}, {0x00000175, 0x00000077}, {0x00000175, 0x00000302}, +{0x00000176, 0x00000059}, {0x00000176, 0x00000302}, {0x00000177, 0x00000079}, {0x00000177, 0x00000302}, +{0x00000178, 0x00000059}, {0x00000178, 0x00000308}, {0x00000179, 0x0000005A}, {0x00000179, 0x00000301}, +{0x0000017A, 0x0000007A}, {0x0000017A, 0x00000301}, {0x0000017B, 0x0000005A}, {0x0000017B, 0x00000307}, +{0x0000017C, 0x0000007A}, {0x0000017C, 0x00000307}, {0x0000017D, 0x0000005A}, {0x0000017D, 0x0000030C}, +{0x0000017E, 0x0000007A}, {0x0000017E, 0x0000030C}, {0x000001A0, 0x0000004F}, {0x000001A0, 0x0000031B}, +{0x000001A1, 0x0000006F}, {0x000001A1, 0x0000031B}, {0x000001AF, 0x00000055}, {0x000001AF, 0x0000031B}, +{0x000001B0, 0x00000075}, {0x000001B0, 0x0000031B}, {0x000001CD, 0x00000041}, {0x000001CD, 0x0000030C}, +{0x000001CE, 0x00000061}, {0x000001CE, 0x0000030C}, {0x000001CF, 0x00000049}, {0x000001CF, 0x0000030C}, +{0x000001D0, 0x00000069}, {0x000001D0, 0x0000030C}, {0x000001D1, 0x0000004F}, {0x000001D1, 0x0000030C}, +{0x000001D2, 0x0000006F}, {0x000001D2, 0x0000030C}, {0x000001D3, 0x00000055}, {0x000001D3, 0x0000030C}, +{0x000001D4, 0x00000075}, {0x000001D4, 0x0000030C}, {0x000001D5, 0x00000055}, {0x000001D5, 0x00000308}, +{0x000001D5, 0x00000304}, {0x000001D6, 0x00000075}, {0x000001D6, 0x00000308}, {0x000001D6, 0x00000304}, +{0x000001D7, 0x00000055}, {0x000001D7, 0x00000308}, {0x000001D7, 0x00000301}, {0x000001D8, 0x00000075}, +{0x000001D8, 0x00000308}, {0x000001D8, 0x00000301}, {0x000001D9, 0x00000055}, {0x000001D9, 0x00000308}, +{0x000001D9, 0x0000030C}, {0x000001DA, 0x00000075}, {0x000001DA, 0x00000308}, {0x000001DA, 0x0000030C}, +{0x000001DB, 0x00000055}, {0x000001DB, 0x00000308}, {0x000001DB, 0x00000300}, {0x000001DC, 0x00000075}, +{0x000001DC, 0x00000308}, {0x000001DC, 0x00000300}, {0x000001DE, 0x00000041}, {0x000001DE, 0x00000308}, +{0x000001DE, 0x00000304}, {0x000001DF, 0x00000061}, {0x000001DF, 0x00000308}, {0x000001DF, 0x00000304}, +{0x000001E0, 0x00000041}, {0x000001E0, 0x00000307}, {0x000001E0, 0x00000304}, {0x000001E1, 0x00000061}, +{0x000001E1, 0x00000307}, {0x000001E1, 0x00000304}, {0x000001E2, 0x000000C6}, {0x000001E2, 0x00000304}, +{0x000001E3, 0x000000E6}, {0x000001E3, 0x00000304}, {0x000001E6, 0x00000047}, {0x000001E6, 0x0000030C}, +{0x000001E7, 0x00000067}, {0x000001E7, 0x0000030C}, {0x000001E8, 0x0000004B}, {0x000001E8, 0x0000030C}, +{0x000001E9, 0x0000006B}, {0x000001E9, 0x0000030C}, {0x000001EA, 0x0000004F}, {0x000001EA, 0x00000328}, +{0x000001EB, 0x0000006F}, {0x000001EB, 0x00000328}, {0x000001EC, 0x0000004F}, {0x000001EC, 0x00000328}, +{0x000001EC, 0x00000304}, {0x000001ED, 0x0000006F}, {0x000001ED, 0x00000328}, {0x000001ED, 0x00000304}, +{0x000001EE, 0x000001B7}, {0x000001EE, 0x0000030C}, {0x000001EF, 0x00000292}, {0x000001EF, 0x0000030C}, +{0x000001F0, 0x0000006A}, {0x000001F0, 0x0000030C}, {0x000001F4, 0x00000047}, {0x000001F4, 0x00000301}, +{0x000001F5, 0x00000067}, {0x000001F5, 0x00000301}, {0x000001F8, 0x0000004E}, {0x000001F8, 0x00000300}, +{0x000001F9, 0x0000006E}, {0x000001F9, 0x00000300}, {0x000001FA, 0x00000041}, {0x000001FA, 0x0000030A}, +{0x000001FA, 0x00000301}, {0x000001FB, 0x00000061}, {0x000001FB, 0x0000030A}, {0x000001FB, 0x00000301}, +{0x000001FC, 0x000000C6}, {0x000001FC, 0x00000301}, {0x000001FD, 0x000000E6}, {0x000001FD, 0x00000301}, +{0x000001FE, 0x000000D8}, {0x000001FE, 0x00000301}, {0x000001FF, 0x000000F8}, {0x000001FF, 0x00000301}, +{0x00000200, 0x00000041}, {0x00000200, 0x0000030F}, {0x00000201, 0x00000061}, {0x00000201, 0x0000030F}, +{0x00000202, 0x00000041}, {0x00000202, 0x00000311}, {0x00000203, 0x00000061}, {0x00000203, 0x00000311}, +{0x00000204, 0x00000045}, {0x00000204, 0x0000030F}, {0x00000205, 0x00000065}, {0x00000205, 0x0000030F}, +{0x00000206, 0x00000045}, {0x00000206, 0x00000311}, {0x00000207, 0x00000065}, {0x00000207, 0x00000311}, +{0x00000208, 0x00000049}, {0x00000208, 0x0000030F}, {0x00000209, 0x00000069}, {0x00000209, 0x0000030F}, +{0x0000020A, 0x00000049}, {0x0000020A, 0x00000311}, {0x0000020B, 0x00000069}, {0x0000020B, 0x00000311}, +{0x0000020C, 0x0000004F}, {0x0000020C, 0x0000030F}, {0x0000020D, 0x0000006F}, {0x0000020D, 0x0000030F}, +{0x0000020E, 0x0000004F}, {0x0000020E, 0x00000311}, {0x0000020F, 0x0000006F}, {0x0000020F, 0x00000311}, +{0x00000210, 0x00000052}, {0x00000210, 0x0000030F}, {0x00000211, 0x00000072}, {0x00000211, 0x0000030F}, +{0x00000212, 0x00000052}, {0x00000212, 0x00000311}, {0x00000213, 0x00000072}, {0x00000213, 0x00000311}, +{0x00000214, 0x00000055}, {0x00000214, 0x0000030F}, {0x00000215, 0x00000075}, {0x00000215, 0x0000030F}, +{0x00000216, 0x00000055}, {0x00000216, 0x00000311}, {0x00000217, 0x00000075}, {0x00000217, 0x00000311}, +{0x00000218, 0x00000053}, {0x00000218, 0x00000326}, {0x00000219, 0x00000073}, {0x00000219, 0x00000326}, +{0x0000021A, 0x00000054}, {0x0000021A, 0x00000326}, {0x0000021B, 0x00000074}, {0x0000021B, 0x00000326}, +{0x0000021E, 0x00000048}, {0x0000021E, 0x0000030C}, {0x0000021F, 0x00000068}, {0x0000021F, 0x0000030C}, +{0x00000226, 0x00000041}, {0x00000226, 0x00000307}, {0x00000227, 0x00000061}, {0x00000227, 0x00000307}, +{0x00000228, 0x00000045}, {0x00000228, 0x00000327}, {0x00000229, 0x00000065}, {0x00000229, 0x00000327}, +{0x0000022A, 0x0000004F}, {0x0000022A, 0x00000308}, {0x0000022A, 0x00000304}, {0x0000022B, 0x0000006F}, +{0x0000022B, 0x00000308}, {0x0000022B, 0x00000304}, {0x0000022C, 0x0000004F}, {0x0000022C, 0x00000303}, +{0x0000022C, 0x00000304}, {0x0000022D, 0x0000006F}, {0x0000022D, 0x00000303}, {0x0000022D, 0x00000304}, +{0x0000022E, 0x0000004F}, {0x0000022E, 0x00000307}, {0x0000022F, 0x0000006F}, {0x0000022F, 0x00000307}, +{0x00000230, 0x0000004F}, {0x00000230, 0x00000307}, {0x00000230, 0x00000304}, {0x00000231, 0x0000006F}, +{0x00000231, 0x00000307}, {0x00000231, 0x00000304}, {0x00000232, 0x00000059}, {0x00000232, 0x00000304}, +{0x00000233, 0x00000079}, {0x00000233, 0x00000304}, {0x00000340, 0x00000300}, {0x00000341, 0x00000301}, +{0x00000343, 0x00000313}, {0x00000344, 0x00000308}, {0x00000344, 0x00000301}, {0x00000374, 0x000002B9}, +{0x0000037E, 0x0000003B}, {0x00000385, 0x000000A8}, {0x00000385, 0x00000301}, {0x00000386, 0x00000391}, +{0x00000386, 0x00000301}, {0x00000387, 0x000000B7}, {0x00000388, 0x00000395}, {0x00000388, 0x00000301}, +{0x00000389, 0x00000397}, {0x00000389, 0x00000301}, {0x0000038A, 0x00000399}, {0x0000038A, 0x00000301}, +{0x0000038C, 0x0000039F}, {0x0000038C, 0x00000301}, {0x0000038E, 0x000003A5}, {0x0000038E, 0x00000301}, +{0x0000038F, 0x000003A9}, {0x0000038F, 0x00000301}, {0x00000390, 0x000003B9}, {0x00000390, 0x00000308}, +{0x00000390, 0x00000301}, {0x000003AA, 0x00000399}, {0x000003AA, 0x00000308}, {0x000003AB, 0x000003A5}, +{0x000003AB, 0x00000308}, {0x000003AC, 0x000003B1}, {0x000003AC, 0x00000301}, {0x000003AD, 0x000003B5}, +{0x000003AD, 0x00000301}, {0x000003AE, 0x000003B7}, {0x000003AE, 0x00000301}, {0x000003AF, 0x000003B9}, +{0x000003AF, 0x00000301}, {0x000003B0, 0x000003C5}, {0x000003B0, 0x00000308}, {0x000003B0, 0x00000301}, +{0x000003CA, 0x000003B9}, {0x000003CA, 0x00000308}, {0x000003CB, 0x000003C5}, {0x000003CB, 0x00000308}, +{0x000003CC, 0x000003BF}, {0x000003CC, 0x00000301}, {0x000003CD, 0x000003C5}, {0x000003CD, 0x00000301}, +{0x000003CE, 0x000003C9}, {0x000003CE, 0x00000301}, {0x000003D3, 0x000003D2}, {0x000003D3, 0x00000301}, +{0x000003D4, 0x000003D2}, {0x000003D4, 0x00000308}, {0x00000400, 0x00000415}, {0x00000400, 0x00000300}, +{0x00000401, 0x00000415}, {0x00000401, 0x00000308}, {0x00000403, 0x00000413}, {0x00000403, 0x00000301}, +{0x00000407, 0x00000406}, {0x00000407, 0x00000308}, {0x0000040C, 0x0000041A}, {0x0000040C, 0x00000301}, +{0x0000040D, 0x00000418}, {0x0000040D, 0x00000300}, {0x0000040E, 0x00000423}, {0x0000040E, 0x00000306}, +{0x00000419, 0x00000418}, {0x00000419, 0x00000306}, {0x00000439, 0x00000438}, {0x00000439, 0x00000306}, +{0x00000450, 0x00000435}, {0x00000450, 0x00000300}, {0x00000451, 0x00000435}, {0x00000451, 0x00000308}, +{0x00000453, 0x00000433}, {0x00000453, 0x00000301}, {0x00000457, 0x00000456}, {0x00000457, 0x00000308}, +{0x0000045C, 0x0000043A}, {0x0000045C, 0x00000301}, {0x0000045D, 0x00000438}, {0x0000045D, 0x00000300}, +{0x0000045E, 0x00000443}, {0x0000045E, 0x00000306}, {0x00000476, 0x00000474}, {0x00000476, 0x0000030F}, +{0x00000477, 0x00000475}, {0x00000477, 0x0000030F}, {0x000004C1, 0x00000416}, {0x000004C1, 0x00000306}, +{0x000004C2, 0x00000436}, {0x000004C2, 0x00000306}, {0x000004D0, 0x00000410}, {0x000004D0, 0x00000306}, +{0x000004D1, 0x00000430}, {0x000004D1, 0x00000306}, {0x000004D2, 0x00000410}, {0x000004D2, 0x00000308}, +{0x000004D3, 0x00000430}, {0x000004D3, 0x00000308}, {0x000004D6, 0x00000415}, {0x000004D6, 0x00000306}, +{0x000004D7, 0x00000435}, {0x000004D7, 0x00000306}, {0x000004DA, 0x000004D8}, {0x000004DA, 0x00000308}, +{0x000004DB, 0x000004D9}, {0x000004DB, 0x00000308}, {0x000004DC, 0x00000416}, {0x000004DC, 0x00000308}, +{0x000004DD, 0x00000436}, {0x000004DD, 0x00000308}, {0x000004DE, 0x00000417}, {0x000004DE, 0x00000308}, +{0x000004DF, 0x00000437}, {0x000004DF, 0x00000308}, {0x000004E2, 0x00000418}, {0x000004E2, 0x00000304}, +{0x000004E3, 0x00000438}, {0x000004E3, 0x00000304}, {0x000004E4, 0x00000418}, {0x000004E4, 0x00000308}, +{0x000004E5, 0x00000438}, {0x000004E5, 0x00000308}, {0x000004E6, 0x0000041E}, {0x000004E6, 0x00000308}, +{0x000004E7, 0x0000043E}, {0x000004E7, 0x00000308}, {0x000004EA, 0x000004E8}, {0x000004EA, 0x00000308}, +{0x000004EB, 0x000004E9}, {0x000004EB, 0x00000308}, {0x000004EC, 0x0000042D}, {0x000004EC, 0x00000308}, +{0x000004ED, 0x0000044D}, {0x000004ED, 0x00000308}, {0x000004EE, 0x00000423}, {0x000004EE, 0x00000304}, +{0x000004EF, 0x00000443}, {0x000004EF, 0x00000304}, {0x000004F0, 0x00000423}, {0x000004F0, 0x00000308}, +{0x000004F1, 0x00000443}, {0x000004F1, 0x00000308}, {0x000004F2, 0x00000423}, {0x000004F2, 0x0000030B}, +{0x000004F3, 0x00000443}, {0x000004F3, 0x0000030B}, {0x000004F4, 0x00000427}, {0x000004F4, 0x00000308}, +{0x000004F5, 0x00000447}, {0x000004F5, 0x00000308}, {0x000004F8, 0x0000042B}, {0x000004F8, 0x00000308}, +{0x000004F9, 0x0000044B}, {0x000004F9, 0x00000308}, {0x00000622, 0x00000627}, {0x00000622, 0x00000653}, +{0x00000623, 0x00000627}, {0x00000623, 0x00000654}, {0x00000624, 0x00000648}, {0x00000624, 0x00000654}, +{0x00000625, 0x00000627}, {0x00000625, 0x00000655}, {0x00000626, 0x0000064A}, {0x00000626, 0x00000654}, +{0x000006C0, 0x000006D5}, {0x000006C0, 0x00000654}, {0x000006C2, 0x000006C1}, {0x000006C2, 0x00000654}, +{0x000006D3, 0x000006D2}, {0x000006D3, 0x00000654}, {0x00000929, 0x00000928}, {0x00000929, 0x0000093C}, +{0x00000931, 0x00000930}, {0x00000931, 0x0000093C}, {0x00000934, 0x00000933}, {0x00000934, 0x0000093C}, +{0x00000958, 0x00000915}, {0x00000958, 0x0000093C}, {0x00000959, 0x00000916}, {0x00000959, 0x0000093C}, +{0x0000095A, 0x00000917}, {0x0000095A, 0x0000093C}, {0x0000095B, 0x0000091C}, {0x0000095B, 0x0000093C}, +{0x0000095C, 0x00000921}, {0x0000095C, 0x0000093C}, {0x0000095D, 0x00000922}, {0x0000095D, 0x0000093C}, +{0x0000095E, 0x0000092B}, {0x0000095E, 0x0000093C}, {0x0000095F, 0x0000092F}, {0x0000095F, 0x0000093C}, +{0x000009CB, 0x000009C7}, {0x000009CB, 0x000009BE}, {0x000009CC, 0x000009C7}, {0x000009CC, 0x000009D7}, +{0x000009DC, 0x000009A1}, {0x000009DC, 0x000009BC}, {0x000009DD, 0x000009A2}, {0x000009DD, 0x000009BC}, +{0x000009DF, 0x000009AF}, {0x000009DF, 0x000009BC}, {0x00000A33, 0x00000A32}, {0x00000A33, 0x00000A3C}, +{0x00000A36, 0x00000A38}, {0x00000A36, 0x00000A3C}, {0x00000A59, 0x00000A16}, {0x00000A59, 0x00000A3C}, +{0x00000A5A, 0x00000A17}, {0x00000A5A, 0x00000A3C}, {0x00000A5B, 0x00000A1C}, {0x00000A5B, 0x00000A3C}, +{0x00000A5E, 0x00000A2B}, {0x00000A5E, 0x00000A3C}, {0x00000B48, 0x00000B47}, {0x00000B48, 0x00000B56}, +{0x00000B4B, 0x00000B47}, {0x00000B4B, 0x00000B3E}, {0x00000B4C, 0x00000B47}, {0x00000B4C, 0x00000B57}, +{0x00000B5C, 0x00000B21}, {0x00000B5C, 0x00000B3C}, {0x00000B5D, 0x00000B22}, {0x00000B5D, 0x00000B3C}, +{0x00000B94, 0x00000B92}, {0x00000B94, 0x00000BD7}, {0x00000BCA, 0x00000BC6}, {0x00000BCA, 0x00000BBE}, +{0x00000BCB, 0x00000BC7}, {0x00000BCB, 0x00000BBE}, {0x00000BCC, 0x00000BC6}, {0x00000BCC, 0x00000BD7}, +{0x00000C48, 0x00000C46}, {0x00000C48, 0x00000C56}, {0x00000CC0, 0x00000CBF}, {0x00000CC0, 0x00000CD5}, +{0x00000CC7, 0x00000CC6}, {0x00000CC7, 0x00000CD5}, {0x00000CC8, 0x00000CC6}, {0x00000CC8, 0x00000CD6}, +{0x00000CCA, 0x00000CC6}, {0x00000CCA, 0x00000CC2}, {0x00000CCB, 0x00000CC6}, {0x00000CCB, 0x00000CC2}, +{0x00000CCB, 0x00000CD5}, {0x00000D4A, 0x00000D46}, {0x00000D4A, 0x00000D3E}, {0x00000D4B, 0x00000D47}, +{0x00000D4B, 0x00000D3E}, {0x00000D4C, 0x00000D46}, {0x00000D4C, 0x00000D57}, {0x00000DDA, 0x00000DD9}, +{0x00000DDA, 0x00000DCA}, {0x00000DDC, 0x00000DD9}, {0x00000DDC, 0x00000DCF}, {0x00000DDD, 0x00000DD9}, +{0x00000DDD, 0x00000DCF}, {0x00000DDD, 0x00000DCA}, {0x00000DDE, 0x00000DD9}, {0x00000DDE, 0x00000DDF}, +{0x00000F43, 0x00000F42}, {0x00000F43, 0x00000FB7}, {0x00000F4D, 0x00000F4C}, {0x00000F4D, 0x00000FB7}, +{0x00000F52, 0x00000F51}, {0x00000F52, 0x00000FB7}, {0x00000F57, 0x00000F56}, {0x00000F57, 0x00000FB7}, +{0x00000F5C, 0x00000F5B}, {0x00000F5C, 0x00000FB7}, {0x00000F69, 0x00000F40}, {0x00000F69, 0x00000FB5}, +{0x00000F73, 0x00000F71}, {0x00000F73, 0x00000F72}, {0x00000F75, 0x00000F71}, {0x00000F75, 0x00000F74}, +{0x00000F76, 0x00000FB2}, {0x00000F76, 0x00000F80}, {0x00000F78, 0x00000FB3}, {0x00000F78, 0x00000F80}, +{0x00000F81, 0x00000F71}, {0x00000F81, 0x00000F80}, {0x00000F93, 0x00000F92}, {0x00000F93, 0x00000FB7}, +{0x00000F9D, 0x00000F9C}, {0x00000F9D, 0x00000FB7}, {0x00000FA2, 0x00000FA1}, {0x00000FA2, 0x00000FB7}, +{0x00000FA7, 0x00000FA6}, {0x00000FA7, 0x00000FB7}, {0x00000FAC, 0x00000FAB}, {0x00000FAC, 0x00000FB7}, +{0x00000FB9, 0x00000F90}, {0x00000FB9, 0x00000FB5}, {0x00001026, 0x00001025}, {0x00001026, 0x0000102E}, +{0x00001B06, 0x00001B05}, {0x00001B06, 0x00001B35}, {0x00001B08, 0x00001B07}, {0x00001B08, 0x00001B35}, +{0x00001B0A, 0x00001B09}, {0x00001B0A, 0x00001B35}, {0x00001B0C, 0x00001B0B}, {0x00001B0C, 0x00001B35}, +{0x00001B0E, 0x00001B0D}, {0x00001B0E, 0x00001B35}, {0x00001B12, 0x00001B11}, {0x00001B12, 0x00001B35}, +{0x00001B3B, 0x00001B3A}, {0x00001B3B, 0x00001B35}, {0x00001B3D, 0x00001B3C}, {0x00001B3D, 0x00001B35}, +{0x00001B40, 0x00001B3E}, {0x00001B40, 0x00001B35}, {0x00001B41, 0x00001B3F}, {0x00001B41, 0x00001B35}, +{0x00001B43, 0x00001B42}, {0x00001B43, 0x00001B35}, {0x00001E00, 0x00000041}, {0x00001E00, 0x00000325}, +{0x00001E01, 0x00000061}, {0x00001E01, 0x00000325}, {0x00001E02, 0x00000042}, {0x00001E02, 0x00000307}, +{0x00001E03, 0x00000062}, {0x00001E03, 0x00000307}, {0x00001E04, 0x00000042}, {0x00001E04, 0x00000323}, +{0x00001E05, 0x00000062}, {0x00001E05, 0x00000323}, {0x00001E06, 0x00000042}, {0x00001E06, 0x00000331}, +{0x00001E07, 0x00000062}, {0x00001E07, 0x00000331}, {0x00001E08, 0x00000043}, {0x00001E08, 0x00000327}, +{0x00001E08, 0x00000301}, {0x00001E09, 0x00000063}, {0x00001E09, 0x00000327}, {0x00001E09, 0x00000301}, +{0x00001E0A, 0x00000044}, {0x00001E0A, 0x00000307}, {0x00001E0B, 0x00000064}, {0x00001E0B, 0x00000307}, +{0x00001E0C, 0x00000044}, {0x00001E0C, 0x00000323}, {0x00001E0D, 0x00000064}, {0x00001E0D, 0x00000323}, +{0x00001E0E, 0x00000044}, {0x00001E0E, 0x00000331}, {0x00001E0F, 0x00000064}, {0x00001E0F, 0x00000331}, +{0x00001E10, 0x00000044}, {0x00001E10, 0x00000327}, {0x00001E11, 0x00000064}, {0x00001E11, 0x00000327}, +{0x00001E12, 0x00000044}, {0x00001E12, 0x0000032D}, {0x00001E13, 0x00000064}, {0x00001E13, 0x0000032D}, +{0x00001E14, 0x00000045}, {0x00001E14, 0x00000304}, {0x00001E14, 0x00000300}, {0x00001E15, 0x00000065}, +{0x00001E15, 0x00000304}, {0x00001E15, 0x00000300}, {0x00001E16, 0x00000045}, {0x00001E16, 0x00000304}, +{0x00001E16, 0x00000301}, {0x00001E17, 0x00000065}, {0x00001E17, 0x00000304}, {0x00001E17, 0x00000301}, +{0x00001E18, 0x00000045}, {0x00001E18, 0x0000032D}, {0x00001E19, 0x00000065}, {0x00001E19, 0x0000032D}, +{0x00001E1A, 0x00000045}, {0x00001E1A, 0x00000330}, {0x00001E1B, 0x00000065}, {0x00001E1B, 0x00000330}, +{0x00001E1C, 0x00000045}, {0x00001E1C, 0x00000327}, {0x00001E1C, 0x00000306}, {0x00001E1D, 0x00000065}, +{0x00001E1D, 0x00000327}, {0x00001E1D, 0x00000306}, {0x00001E1E, 0x00000046}, {0x00001E1E, 0x00000307}, +{0x00001E1F, 0x00000066}, {0x00001E1F, 0x00000307}, {0x00001E20, 0x00000047}, {0x00001E20, 0x00000304}, +{0x00001E21, 0x00000067}, {0x00001E21, 0x00000304}, {0x00001E22, 0x00000048}, {0x00001E22, 0x00000307}, +{0x00001E23, 0x00000068}, {0x00001E23, 0x00000307}, {0x00001E24, 0x00000048}, {0x00001E24, 0x00000323}, +{0x00001E25, 0x00000068}, {0x00001E25, 0x00000323}, {0x00001E26, 0x00000048}, {0x00001E26, 0x00000308}, +{0x00001E27, 0x00000068}, {0x00001E27, 0x00000308}, {0x00001E28, 0x00000048}, {0x00001E28, 0x00000327}, +{0x00001E29, 0x00000068}, {0x00001E29, 0x00000327}, {0x00001E2A, 0x00000048}, {0x00001E2A, 0x0000032E}, +{0x00001E2B, 0x00000068}, {0x00001E2B, 0x0000032E}, {0x00001E2C, 0x00000049}, {0x00001E2C, 0x00000330}, +{0x00001E2D, 0x00000069}, {0x00001E2D, 0x00000330}, {0x00001E2E, 0x00000049}, {0x00001E2E, 0x00000308}, +{0x00001E2E, 0x00000301}, {0x00001E2F, 0x00000069}, {0x00001E2F, 0x00000308}, {0x00001E2F, 0x00000301}, +{0x00001E30, 0x0000004B}, {0x00001E30, 0x00000301}, {0x00001E31, 0x0000006B}, {0x00001E31, 0x00000301}, +{0x00001E32, 0x0000004B}, {0x00001E32, 0x00000323}, {0x00001E33, 0x0000006B}, {0x00001E33, 0x00000323}, +{0x00001E34, 0x0000004B}, {0x00001E34, 0x00000331}, {0x00001E35, 0x0000006B}, {0x00001E35, 0x00000331}, +{0x00001E36, 0x0000004C}, {0x00001E36, 0x00000323}, {0x00001E37, 0x0000006C}, {0x00001E37, 0x00000323}, +{0x00001E38, 0x0000004C}, {0x00001E38, 0x00000323}, {0x00001E38, 0x00000304}, {0x00001E39, 0x0000006C}, +{0x00001E39, 0x00000323}, {0x00001E39, 0x00000304}, {0x00001E3A, 0x0000004C}, {0x00001E3A, 0x00000331}, +{0x00001E3B, 0x0000006C}, {0x00001E3B, 0x00000331}, {0x00001E3C, 0x0000004C}, {0x00001E3C, 0x0000032D}, +{0x00001E3D, 0x0000006C}, {0x00001E3D, 0x0000032D}, {0x00001E3E, 0x0000004D}, {0x00001E3E, 0x00000301}, +{0x00001E3F, 0x0000006D}, {0x00001E3F, 0x00000301}, {0x00001E40, 0x0000004D}, {0x00001E40, 0x00000307}, +{0x00001E41, 0x0000006D}, {0x00001E41, 0x00000307}, {0x00001E42, 0x0000004D}, {0x00001E42, 0x00000323}, +{0x00001E43, 0x0000006D}, {0x00001E43, 0x00000323}, {0x00001E44, 0x0000004E}, {0x00001E44, 0x00000307}, +{0x00001E45, 0x0000006E}, {0x00001E45, 0x00000307}, {0x00001E46, 0x0000004E}, {0x00001E46, 0x00000323}, +{0x00001E47, 0x0000006E}, {0x00001E47, 0x00000323}, {0x00001E48, 0x0000004E}, {0x00001E48, 0x00000331}, +{0x00001E49, 0x0000006E}, {0x00001E49, 0x00000331}, {0x00001E4A, 0x0000004E}, {0x00001E4A, 0x0000032D}, +{0x00001E4B, 0x0000006E}, {0x00001E4B, 0x0000032D}, {0x00001E4C, 0x0000004F}, {0x00001E4C, 0x00000303}, +{0x00001E4C, 0x00000301}, {0x00001E4D, 0x0000006F}, {0x00001E4D, 0x00000303}, {0x00001E4D, 0x00000301}, +{0x00001E4E, 0x0000004F}, {0x00001E4E, 0x00000303}, {0x00001E4E, 0x00000308}, {0x00001E4F, 0x0000006F}, +{0x00001E4F, 0x00000303}, {0x00001E4F, 0x00000308}, {0x00001E50, 0x0000004F}, {0x00001E50, 0x00000304}, +{0x00001E50, 0x00000300}, {0x00001E51, 0x0000006F}, {0x00001E51, 0x00000304}, {0x00001E51, 0x00000300}, +{0x00001E52, 0x0000004F}, {0x00001E52, 0x00000304}, {0x00001E52, 0x00000301}, {0x00001E53, 0x0000006F}, +{0x00001E53, 0x00000304}, {0x00001E53, 0x00000301}, {0x00001E54, 0x00000050}, {0x00001E54, 0x00000301}, +{0x00001E55, 0x00000070}, {0x00001E55, 0x00000301}, {0x00001E56, 0x00000050}, {0x00001E56, 0x00000307}, +{0x00001E57, 0x00000070}, {0x00001E57, 0x00000307}, {0x00001E58, 0x00000052}, {0x00001E58, 0x00000307}, +{0x00001E59, 0x00000072}, {0x00001E59, 0x00000307}, {0x00001E5A, 0x00000052}, {0x00001E5A, 0x00000323}, +{0x00001E5B, 0x00000072}, {0x00001E5B, 0x00000323}, {0x00001E5C, 0x00000052}, {0x00001E5C, 0x00000323}, +{0x00001E5C, 0x00000304}, {0x00001E5D, 0x00000072}, {0x00001E5D, 0x00000323}, {0x00001E5D, 0x00000304}, +{0x00001E5E, 0x00000052}, {0x00001E5E, 0x00000331}, {0x00001E5F, 0x00000072}, {0x00001E5F, 0x00000331}, +{0x00001E60, 0x00000053}, {0x00001E60, 0x00000307}, {0x00001E61, 0x00000073}, {0x00001E61, 0x00000307}, +{0x00001E62, 0x00000053}, {0x00001E62, 0x00000323}, {0x00001E63, 0x00000073}, {0x00001E63, 0x00000323}, +{0x00001E64, 0x00000053}, {0x00001E64, 0x00000301}, {0x00001E64, 0x00000307}, {0x00001E65, 0x00000073}, +{0x00001E65, 0x00000301}, {0x00001E65, 0x00000307}, {0x00001E66, 0x00000053}, {0x00001E66, 0x0000030C}, +{0x00001E66, 0x00000307}, {0x00001E67, 0x00000073}, {0x00001E67, 0x0000030C}, {0x00001E67, 0x00000307}, +{0x00001E68, 0x00000053}, {0x00001E68, 0x00000323}, {0x00001E68, 0x00000307}, {0x00001E69, 0x00000073}, +{0x00001E69, 0x00000323}, {0x00001E69, 0x00000307}, {0x00001E6A, 0x00000054}, {0x00001E6A, 0x00000307}, +{0x00001E6B, 0x00000074}, {0x00001E6B, 0x00000307}, {0x00001E6C, 0x00000054}, {0x00001E6C, 0x00000323}, +{0x00001E6D, 0x00000074}, {0x00001E6D, 0x00000323}, {0x00001E6E, 0x00000054}, {0x00001E6E, 0x00000331}, +{0x00001E6F, 0x00000074}, {0x00001E6F, 0x00000331}, {0x00001E70, 0x00000054}, {0x00001E70, 0x0000032D}, +{0x00001E71, 0x00000074}, {0x00001E71, 0x0000032D}, {0x00001E72, 0x00000055}, {0x00001E72, 0x00000324}, +{0x00001E73, 0x00000075}, {0x00001E73, 0x00000324}, {0x00001E74, 0x00000055}, {0x00001E74, 0x00000330}, +{0x00001E75, 0x00000075}, {0x00001E75, 0x00000330}, {0x00001E76, 0x00000055}, {0x00001E76, 0x0000032D}, +{0x00001E77, 0x00000075}, {0x00001E77, 0x0000032D}, {0x00001E78, 0x00000055}, {0x00001E78, 0x00000303}, +{0x00001E78, 0x00000301}, {0x00001E79, 0x00000075}, {0x00001E79, 0x00000303}, {0x00001E79, 0x00000301}, +{0x00001E7A, 0x00000055}, {0x00001E7A, 0x00000304}, {0x00001E7A, 0x00000308}, {0x00001E7B, 0x00000075}, +{0x00001E7B, 0x00000304}, {0x00001E7B, 0x00000308}, {0x00001E7C, 0x00000056}, {0x00001E7C, 0x00000303}, +{0x00001E7D, 0x00000076}, {0x00001E7D, 0x00000303}, {0x00001E7E, 0x00000056}, {0x00001E7E, 0x00000323}, +{0x00001E7F, 0x00000076}, {0x00001E7F, 0x00000323}, {0x00001E80, 0x00000057}, {0x00001E80, 0x00000300}, +{0x00001E81, 0x00000077}, {0x00001E81, 0x00000300}, {0x00001E82, 0x00000057}, {0x00001E82, 0x00000301}, +{0x00001E83, 0x00000077}, {0x00001E83, 0x00000301}, {0x00001E84, 0x00000057}, {0x00001E84, 0x00000308}, +{0x00001E85, 0x00000077}, {0x00001E85, 0x00000308}, {0x00001E86, 0x00000057}, {0x00001E86, 0x00000307}, +{0x00001E87, 0x00000077}, {0x00001E87, 0x00000307}, {0x00001E88, 0x00000057}, {0x00001E88, 0x00000323}, +{0x00001E89, 0x00000077}, {0x00001E89, 0x00000323}, {0x00001E8A, 0x00000058}, {0x00001E8A, 0x00000307}, +{0x00001E8B, 0x00000078}, {0x00001E8B, 0x00000307}, {0x00001E8C, 0x00000058}, {0x00001E8C, 0x00000308}, +{0x00001E8D, 0x00000078}, {0x00001E8D, 0x00000308}, {0x00001E8E, 0x00000059}, {0x00001E8E, 0x00000307}, +{0x00001E8F, 0x00000079}, {0x00001E8F, 0x00000307}, {0x00001E90, 0x0000005A}, {0x00001E90, 0x00000302}, +{0x00001E91, 0x0000007A}, {0x00001E91, 0x00000302}, {0x00001E92, 0x0000005A}, {0x00001E92, 0x00000323}, +{0x00001E93, 0x0000007A}, {0x00001E93, 0x00000323}, {0x00001E94, 0x0000005A}, {0x00001E94, 0x00000331}, +{0x00001E95, 0x0000007A}, {0x00001E95, 0x00000331}, {0x00001E96, 0x00000068}, {0x00001E96, 0x00000331}, +{0x00001E97, 0x00000074}, {0x00001E97, 0x00000308}, {0x00001E98, 0x00000077}, {0x00001E98, 0x0000030A}, +{0x00001E99, 0x00000079}, {0x00001E99, 0x0000030A}, {0x00001E9B, 0x0000017F}, {0x00001E9B, 0x00000307}, +{0x00001EA0, 0x00000041}, {0x00001EA0, 0x00000323}, {0x00001EA1, 0x00000061}, {0x00001EA1, 0x00000323}, +{0x00001EA2, 0x00000041}, {0x00001EA2, 0x00000309}, {0x00001EA3, 0x00000061}, {0x00001EA3, 0x00000309}, +{0x00001EA4, 0x00000041}, {0x00001EA4, 0x00000302}, {0x00001EA4, 0x00000301}, {0x00001EA5, 0x00000061}, +{0x00001EA5, 0x00000302}, {0x00001EA5, 0x00000301}, {0x00001EA6, 0x00000041}, {0x00001EA6, 0x00000302}, +{0x00001EA6, 0x00000300}, {0x00001EA7, 0x00000061}, {0x00001EA7, 0x00000302}, {0x00001EA7, 0x00000300}, +{0x00001EA8, 0x00000041}, {0x00001EA8, 0x00000302}, {0x00001EA8, 0x00000309}, {0x00001EA9, 0x00000061}, +{0x00001EA9, 0x00000302}, {0x00001EA9, 0x00000309}, {0x00001EAA, 0x00000041}, {0x00001EAA, 0x00000302}, +{0x00001EAA, 0x00000303}, {0x00001EAB, 0x00000061}, {0x00001EAB, 0x00000302}, {0x00001EAB, 0x00000303}, +{0x00001EAC, 0x00000041}, {0x00001EAC, 0x00000323}, {0x00001EAC, 0x00000302}, {0x00001EAD, 0x00000061}, +{0x00001EAD, 0x00000323}, {0x00001EAD, 0x00000302}, {0x00001EAE, 0x00000041}, {0x00001EAE, 0x00000306}, +{0x00001EAE, 0x00000301}, {0x00001EAF, 0x00000061}, {0x00001EAF, 0x00000306}, {0x00001EAF, 0x00000301}, +{0x00001EB0, 0x00000041}, {0x00001EB0, 0x00000306}, {0x00001EB0, 0x00000300}, {0x00001EB1, 0x00000061}, +{0x00001EB1, 0x00000306}, {0x00001EB1, 0x00000300}, {0x00001EB2, 0x00000041}, {0x00001EB2, 0x00000306}, +{0x00001EB2, 0x00000309}, {0x00001EB3, 0x00000061}, {0x00001EB3, 0x00000306}, {0x00001EB3, 0x00000309}, +{0x00001EB4, 0x00000041}, {0x00001EB4, 0x00000306}, {0x00001EB4, 0x00000303}, {0x00001EB5, 0x00000061}, +{0x00001EB5, 0x00000306}, {0x00001EB5, 0x00000303}, {0x00001EB6, 0x00000041}, {0x00001EB6, 0x00000323}, +{0x00001EB6, 0x00000306}, {0x00001EB7, 0x00000061}, {0x00001EB7, 0x00000323}, {0x00001EB7, 0x00000306}, +{0x00001EB8, 0x00000045}, {0x00001EB8, 0x00000323}, {0x00001EB9, 0x00000065}, {0x00001EB9, 0x00000323}, +{0x00001EBA, 0x00000045}, {0x00001EBA, 0x00000309}, {0x00001EBB, 0x00000065}, {0x00001EBB, 0x00000309}, +{0x00001EBC, 0x00000045}, {0x00001EBC, 0x00000303}, {0x00001EBD, 0x00000065}, {0x00001EBD, 0x00000303}, +{0x00001EBE, 0x00000045}, {0x00001EBE, 0x00000302}, {0x00001EBE, 0x00000301}, {0x00001EBF, 0x00000065}, +{0x00001EBF, 0x00000302}, {0x00001EBF, 0x00000301}, {0x00001EC0, 0x00000045}, {0x00001EC0, 0x00000302}, +{0x00001EC0, 0x00000300}, {0x00001EC1, 0x00000065}, {0x00001EC1, 0x00000302}, {0x00001EC1, 0x00000300}, +{0x00001EC2, 0x00000045}, {0x00001EC2, 0x00000302}, {0x00001EC2, 0x00000309}, {0x00001EC3, 0x00000065}, +{0x00001EC3, 0x00000302}, {0x00001EC3, 0x00000309}, {0x00001EC4, 0x00000045}, {0x00001EC4, 0x00000302}, +{0x00001EC4, 0x00000303}, {0x00001EC5, 0x00000065}, {0x00001EC5, 0x00000302}, {0x00001EC5, 0x00000303}, +{0x00001EC6, 0x00000045}, {0x00001EC6, 0x00000323}, {0x00001EC6, 0x00000302}, {0x00001EC7, 0x00000065}, +{0x00001EC7, 0x00000323}, {0x00001EC7, 0x00000302}, {0x00001EC8, 0x00000049}, {0x00001EC8, 0x00000309}, +{0x00001EC9, 0x00000069}, {0x00001EC9, 0x00000309}, {0x00001ECA, 0x00000049}, {0x00001ECA, 0x00000323}, +{0x00001ECB, 0x00000069}, {0x00001ECB, 0x00000323}, {0x00001ECC, 0x0000004F}, {0x00001ECC, 0x00000323}, +{0x00001ECD, 0x0000006F}, {0x00001ECD, 0x00000323}, {0x00001ECE, 0x0000004F}, {0x00001ECE, 0x00000309}, +{0x00001ECF, 0x0000006F}, {0x00001ECF, 0x00000309}, {0x00001ED0, 0x0000004F}, {0x00001ED0, 0x00000302}, +{0x00001ED0, 0x00000301}, {0x00001ED1, 0x0000006F}, {0x00001ED1, 0x00000302}, {0x00001ED1, 0x00000301}, +{0x00001ED2, 0x0000004F}, {0x00001ED2, 0x00000302}, {0x00001ED2, 0x00000300}, {0x00001ED3, 0x0000006F}, +{0x00001ED3, 0x00000302}, {0x00001ED3, 0x00000300}, {0x00001ED4, 0x0000004F}, {0x00001ED4, 0x00000302}, +{0x00001ED4, 0x00000309}, {0x00001ED5, 0x0000006F}, {0x00001ED5, 0x00000302}, {0x00001ED5, 0x00000309}, +{0x00001ED6, 0x0000004F}, {0x00001ED6, 0x00000302}, {0x00001ED6, 0x00000303}, {0x00001ED7, 0x0000006F}, +{0x00001ED7, 0x00000302}, {0x00001ED7, 0x00000303}, {0x00001ED8, 0x0000004F}, {0x00001ED8, 0x00000323}, +{0x00001ED8, 0x00000302}, {0x00001ED9, 0x0000006F}, {0x00001ED9, 0x00000323}, {0x00001ED9, 0x00000302}, +{0x00001EDA, 0x0000004F}, {0x00001EDA, 0x0000031B}, {0x00001EDA, 0x00000301}, {0x00001EDB, 0x0000006F}, +{0x00001EDB, 0x0000031B}, {0x00001EDB, 0x00000301}, {0x00001EDC, 0x0000004F}, {0x00001EDC, 0x0000031B}, +{0x00001EDC, 0x00000300}, {0x00001EDD, 0x0000006F}, {0x00001EDD, 0x0000031B}, {0x00001EDD, 0x00000300}, +{0x00001EDE, 0x0000004F}, {0x00001EDE, 0x0000031B}, {0x00001EDE, 0x00000309}, {0x00001EDF, 0x0000006F}, +{0x00001EDF, 0x0000031B}, {0x00001EDF, 0x00000309}, {0x00001EE0, 0x0000004F}, {0x00001EE0, 0x0000031B}, +{0x00001EE0, 0x00000303}, {0x00001EE1, 0x0000006F}, {0x00001EE1, 0x0000031B}, {0x00001EE1, 0x00000303}, +{0x00001EE2, 0x0000004F}, {0x00001EE2, 0x0000031B}, {0x00001EE2, 0x00000323}, {0x00001EE3, 0x0000006F}, +{0x00001EE3, 0x0000031B}, {0x00001EE3, 0x00000323}, {0x00001EE4, 0x00000055}, {0x00001EE4, 0x00000323}, +{0x00001EE5, 0x00000075}, {0x00001EE5, 0x00000323}, {0x00001EE6, 0x00000055}, {0x00001EE6, 0x00000309}, +{0x00001EE7, 0x00000075}, {0x00001EE7, 0x00000309}, {0x00001EE8, 0x00000055}, {0x00001EE8, 0x0000031B}, +{0x00001EE8, 0x00000301}, {0x00001EE9, 0x00000075}, {0x00001EE9, 0x0000031B}, {0x00001EE9, 0x00000301}, +{0x00001EEA, 0x00000055}, {0x00001EEA, 0x0000031B}, {0x00001EEA, 0x00000300}, {0x00001EEB, 0x00000075}, +{0x00001EEB, 0x0000031B}, {0x00001EEB, 0x00000300}, {0x00001EEC, 0x00000055}, {0x00001EEC, 0x0000031B}, +{0x00001EEC, 0x00000309}, {0x00001EED, 0x00000075}, {0x00001EED, 0x0000031B}, {0x00001EED, 0x00000309}, +{0x00001EEE, 0x00000055}, {0x00001EEE, 0x0000031B}, {0x00001EEE, 0x00000303}, {0x00001EEF, 0x00000075}, +{0x00001EEF, 0x0000031B}, {0x00001EEF, 0x00000303}, {0x00001EF0, 0x00000055}, {0x00001EF0, 0x0000031B}, +{0x00001EF0, 0x00000323}, {0x00001EF1, 0x00000075}, {0x00001EF1, 0x0000031B}, {0x00001EF1, 0x00000323}, +{0x00001EF2, 0x00000059}, {0x00001EF2, 0x00000300}, {0x00001EF3, 0x00000079}, {0x00001EF3, 0x00000300}, +{0x00001EF4, 0x00000059}, {0x00001EF4, 0x00000323}, {0x00001EF5, 0x00000079}, {0x00001EF5, 0x00000323}, +{0x00001EF6, 0x00000059}, {0x00001EF6, 0x00000309}, {0x00001EF7, 0x00000079}, {0x00001EF7, 0x00000309}, +{0x00001EF8, 0x00000059}, {0x00001EF8, 0x00000303}, {0x00001EF9, 0x00000079}, {0x00001EF9, 0x00000303}, +{0x00001F00, 0x000003B1}, {0x00001F00, 0x00000313}, {0x00001F01, 0x000003B1}, {0x00001F01, 0x00000314}, +{0x00001F02, 0x000003B1}, {0x00001F02, 0x00000313}, {0x00001F02, 0x00000300}, {0x00001F03, 0x000003B1}, +{0x00001F03, 0x00000314}, {0x00001F03, 0x00000300}, {0x00001F04, 0x000003B1}, {0x00001F04, 0x00000313}, +{0x00001F04, 0x00000301}, {0x00001F05, 0x000003B1}, {0x00001F05, 0x00000314}, {0x00001F05, 0x00000301}, +{0x00001F06, 0x000003B1}, {0x00001F06, 0x00000313}, {0x00001F06, 0x00000342}, {0x00001F07, 0x000003B1}, +{0x00001F07, 0x00000314}, {0x00001F07, 0x00000342}, {0x00001F08, 0x00000391}, {0x00001F08, 0x00000313}, +{0x00001F09, 0x00000391}, {0x00001F09, 0x00000314}, {0x00001F0A, 0x00000391}, {0x00001F0A, 0x00000313}, +{0x00001F0A, 0x00000300}, {0x00001F0B, 0x00000391}, {0x00001F0B, 0x00000314}, {0x00001F0B, 0x00000300}, +{0x00001F0C, 0x00000391}, {0x00001F0C, 0x00000313}, {0x00001F0C, 0x00000301}, {0x00001F0D, 0x00000391}, +{0x00001F0D, 0x00000314}, {0x00001F0D, 0x00000301}, {0x00001F0E, 0x00000391}, {0x00001F0E, 0x00000313}, +{0x00001F0E, 0x00000342}, {0x00001F0F, 0x00000391}, {0x00001F0F, 0x00000314}, {0x00001F0F, 0x00000342}, +{0x00001F10, 0x000003B5}, {0x00001F10, 0x00000313}, {0x00001F11, 0x000003B5}, {0x00001F11, 0x00000314}, +{0x00001F12, 0x000003B5}, {0x00001F12, 0x00000313}, {0x00001F12, 0x00000300}, {0x00001F13, 0x000003B5}, +{0x00001F13, 0x00000314}, {0x00001F13, 0x00000300}, {0x00001F14, 0x000003B5}, {0x00001F14, 0x00000313}, +{0x00001F14, 0x00000301}, {0x00001F15, 0x000003B5}, {0x00001F15, 0x00000314}, {0x00001F15, 0x00000301}, +{0x00001F18, 0x00000395}, {0x00001F18, 0x00000313}, {0x00001F19, 0x00000395}, {0x00001F19, 0x00000314}, +{0x00001F1A, 0x00000395}, {0x00001F1A, 0x00000313}, {0x00001F1A, 0x00000300}, {0x00001F1B, 0x00000395}, +{0x00001F1B, 0x00000314}, {0x00001F1B, 0x00000300}, {0x00001F1C, 0x00000395}, {0x00001F1C, 0x00000313}, +{0x00001F1C, 0x00000301}, {0x00001F1D, 0x00000395}, {0x00001F1D, 0x00000314}, {0x00001F1D, 0x00000301}, +{0x00001F20, 0x000003B7}, {0x00001F20, 0x00000313}, {0x00001F21, 0x000003B7}, {0x00001F21, 0x00000314}, +{0x00001F22, 0x000003B7}, {0x00001F22, 0x00000313}, {0x00001F22, 0x00000300}, {0x00001F23, 0x000003B7}, +{0x00001F23, 0x00000314}, {0x00001F23, 0x00000300}, {0x00001F24, 0x000003B7}, {0x00001F24, 0x00000313}, +{0x00001F24, 0x00000301}, {0x00001F25, 0x000003B7}, {0x00001F25, 0x00000314}, {0x00001F25, 0x00000301}, +{0x00001F26, 0x000003B7}, {0x00001F26, 0x00000313}, {0x00001F26, 0x00000342}, {0x00001F27, 0x000003B7}, +{0x00001F27, 0x00000314}, {0x00001F27, 0x00000342}, {0x00001F28, 0x00000397}, {0x00001F28, 0x00000313}, +{0x00001F29, 0x00000397}, {0x00001F29, 0x00000314}, {0x00001F2A, 0x00000397}, {0x00001F2A, 0x00000313}, +{0x00001F2A, 0x00000300}, {0x00001F2B, 0x00000397}, {0x00001F2B, 0x00000314}, {0x00001F2B, 0x00000300}, +{0x00001F2C, 0x00000397}, {0x00001F2C, 0x00000313}, {0x00001F2C, 0x00000301}, {0x00001F2D, 0x00000397}, +{0x00001F2D, 0x00000314}, {0x00001F2D, 0x00000301}, {0x00001F2E, 0x00000397}, {0x00001F2E, 0x00000313}, +{0x00001F2E, 0x00000342}, {0x00001F2F, 0x00000397}, {0x00001F2F, 0x00000314}, {0x00001F2F, 0x00000342}, +{0x00001F30, 0x000003B9}, {0x00001F30, 0x00000313}, {0x00001F31, 0x000003B9}, {0x00001F31, 0x00000314}, +{0x00001F32, 0x000003B9}, {0x00001F32, 0x00000313}, {0x00001F32, 0x00000300}, {0x00001F33, 0x000003B9}, +{0x00001F33, 0x00000314}, {0x00001F33, 0x00000300}, {0x00001F34, 0x000003B9}, {0x00001F34, 0x00000313}, +{0x00001F34, 0x00000301}, {0x00001F35, 0x000003B9}, {0x00001F35, 0x00000314}, {0x00001F35, 0x00000301}, +{0x00001F36, 0x000003B9}, {0x00001F36, 0x00000313}, {0x00001F36, 0x00000342}, {0x00001F37, 0x000003B9}, +{0x00001F37, 0x00000314}, {0x00001F37, 0x00000342}, {0x00001F38, 0x00000399}, {0x00001F38, 0x00000313}, +{0x00001F39, 0x00000399}, {0x00001F39, 0x00000314}, {0x00001F3A, 0x00000399}, {0x00001F3A, 0x00000313}, +{0x00001F3A, 0x00000300}, {0x00001F3B, 0x00000399}, {0x00001F3B, 0x00000314}, {0x00001F3B, 0x00000300}, +{0x00001F3C, 0x00000399}, {0x00001F3C, 0x00000313}, {0x00001F3C, 0x00000301}, {0x00001F3D, 0x00000399}, +{0x00001F3D, 0x00000314}, {0x00001F3D, 0x00000301}, {0x00001F3E, 0x00000399}, {0x00001F3E, 0x00000313}, +{0x00001F3E, 0x00000342}, {0x00001F3F, 0x00000399}, {0x00001F3F, 0x00000314}, {0x00001F3F, 0x00000342}, +{0x00001F40, 0x000003BF}, {0x00001F40, 0x00000313}, {0x00001F41, 0x000003BF}, {0x00001F41, 0x00000314}, +{0x00001F42, 0x000003BF}, {0x00001F42, 0x00000313}, {0x00001F42, 0x00000300}, {0x00001F43, 0x000003BF}, +{0x00001F43, 0x00000314}, {0x00001F43, 0x00000300}, {0x00001F44, 0x000003BF}, {0x00001F44, 0x00000313}, +{0x00001F44, 0x00000301}, {0x00001F45, 0x000003BF}, {0x00001F45, 0x00000314}, {0x00001F45, 0x00000301}, +{0x00001F48, 0x0000039F}, {0x00001F48, 0x00000313}, {0x00001F49, 0x0000039F}, {0x00001F49, 0x00000314}, +{0x00001F4A, 0x0000039F}, {0x00001F4A, 0x00000313}, {0x00001F4A, 0x00000300}, {0x00001F4B, 0x0000039F}, +{0x00001F4B, 0x00000314}, {0x00001F4B, 0x00000300}, {0x00001F4C, 0x0000039F}, {0x00001F4C, 0x00000313}, +{0x00001F4C, 0x00000301}, {0x00001F4D, 0x0000039F}, {0x00001F4D, 0x00000314}, {0x00001F4D, 0x00000301}, +{0x00001F50, 0x000003C5}, {0x00001F50, 0x00000313}, {0x00001F51, 0x000003C5}, {0x00001F51, 0x00000314}, +{0x00001F52, 0x000003C5}, {0x00001F52, 0x00000313}, {0x00001F52, 0x00000300}, {0x00001F53, 0x000003C5}, +{0x00001F53, 0x00000314}, {0x00001F53, 0x00000300}, {0x00001F54, 0x000003C5}, {0x00001F54, 0x00000313}, +{0x00001F54, 0x00000301}, {0x00001F55, 0x000003C5}, {0x00001F55, 0x00000314}, {0x00001F55, 0x00000301}, +{0x00001F56, 0x000003C5}, {0x00001F56, 0x00000313}, {0x00001F56, 0x00000342}, {0x00001F57, 0x000003C5}, +{0x00001F57, 0x00000314}, {0x00001F57, 0x00000342}, {0x00001F59, 0x000003A5}, {0x00001F59, 0x00000314}, +{0x00001F5B, 0x000003A5}, {0x00001F5B, 0x00000314}, {0x00001F5B, 0x00000300}, {0x00001F5D, 0x000003A5}, +{0x00001F5D, 0x00000314}, {0x00001F5D, 0x00000301}, {0x00001F5F, 0x000003A5}, {0x00001F5F, 0x00000314}, +{0x00001F5F, 0x00000342}, {0x00001F60, 0x000003C9}, {0x00001F60, 0x00000313}, {0x00001F61, 0x000003C9}, +{0x00001F61, 0x00000314}, {0x00001F62, 0x000003C9}, {0x00001F62, 0x00000313}, {0x00001F62, 0x00000300}, +{0x00001F63, 0x000003C9}, {0x00001F63, 0x00000314}, {0x00001F63, 0x00000300}, {0x00001F64, 0x000003C9}, +{0x00001F64, 0x00000313}, {0x00001F64, 0x00000301}, {0x00001F65, 0x000003C9}, {0x00001F65, 0x00000314}, +{0x00001F65, 0x00000301}, {0x00001F66, 0x000003C9}, {0x00001F66, 0x00000313}, {0x00001F66, 0x00000342}, +{0x00001F67, 0x000003C9}, {0x00001F67, 0x00000314}, {0x00001F67, 0x00000342}, {0x00001F68, 0x000003A9}, +{0x00001F68, 0x00000313}, {0x00001F69, 0x000003A9}, {0x00001F69, 0x00000314}, {0x00001F6A, 0x000003A9}, +{0x00001F6A, 0x00000313}, {0x00001F6A, 0x00000300}, {0x00001F6B, 0x000003A9}, {0x00001F6B, 0x00000314}, +{0x00001F6B, 0x00000300}, {0x00001F6C, 0x000003A9}, {0x00001F6C, 0x00000313}, {0x00001F6C, 0x00000301}, +{0x00001F6D, 0x000003A9}, {0x00001F6D, 0x00000314}, {0x00001F6D, 0x00000301}, {0x00001F6E, 0x000003A9}, +{0x00001F6E, 0x00000313}, {0x00001F6E, 0x00000342}, {0x00001F6F, 0x000003A9}, {0x00001F6F, 0x00000314}, +{0x00001F6F, 0x00000342}, {0x00001F70, 0x000003B1}, {0x00001F70, 0x00000300}, {0x00001F71, 0x000003B1}, +{0x00001F71, 0x00000301}, {0x00001F72, 0x000003B5}, {0x00001F72, 0x00000300}, {0x00001F73, 0x000003B5}, +{0x00001F73, 0x00000301}, {0x00001F74, 0x000003B7}, {0x00001F74, 0x00000300}, {0x00001F75, 0x000003B7}, +{0x00001F75, 0x00000301}, {0x00001F76, 0x000003B9}, {0x00001F76, 0x00000300}, {0x00001F77, 0x000003B9}, +{0x00001F77, 0x00000301}, {0x00001F78, 0x000003BF}, {0x00001F78, 0x00000300}, {0x00001F79, 0x000003BF}, +{0x00001F79, 0x00000301}, {0x00001F7A, 0x000003C5}, {0x00001F7A, 0x00000300}, {0x00001F7B, 0x000003C5}, +{0x00001F7B, 0x00000301}, {0x00001F7C, 0x000003C9}, {0x00001F7C, 0x00000300}, {0x00001F7D, 0x000003C9}, +{0x00001F7D, 0x00000301}, {0x00001F80, 0x000003B1}, {0x00001F80, 0x00000313}, {0x00001F80, 0x00000345}, +{0x00001F81, 0x000003B1}, {0x00001F81, 0x00000314}, {0x00001F81, 0x00000345}, {0x00001F82, 0x000003B1}, +{0x00001F82, 0x00000313}, {0x00001F82, 0x00000300}, {0x00001F82, 0x00000345}, {0x00001F83, 0x000003B1}, +{0x00001F83, 0x00000314}, {0x00001F83, 0x00000300}, {0x00001F83, 0x00000345}, {0x00001F84, 0x000003B1}, +{0x00001F84, 0x00000313}, {0x00001F84, 0x00000301}, {0x00001F84, 0x00000345}, {0x00001F85, 0x000003B1}, +{0x00001F85, 0x00000314}, {0x00001F85, 0x00000301}, {0x00001F85, 0x00000345}, {0x00001F86, 0x000003B1}, +{0x00001F86, 0x00000313}, {0x00001F86, 0x00000342}, {0x00001F86, 0x00000345}, {0x00001F87, 0x000003B1}, +{0x00001F87, 0x00000314}, {0x00001F87, 0x00000342}, {0x00001F87, 0x00000345}, {0x00001F88, 0x00000391}, +{0x00001F88, 0x00000313}, {0x00001F88, 0x00000345}, {0x00001F89, 0x00000391}, {0x00001F89, 0x00000314}, +{0x00001F89, 0x00000345}, {0x00001F8A, 0x00000391}, {0x00001F8A, 0x00000313}, {0x00001F8A, 0x00000300}, +{0x00001F8A, 0x00000345}, {0x00001F8B, 0x00000391}, {0x00001F8B, 0x00000314}, {0x00001F8B, 0x00000300}, +{0x00001F8B, 0x00000345}, {0x00001F8C, 0x00000391}, {0x00001F8C, 0x00000313}, {0x00001F8C, 0x00000301}, +{0x00001F8C, 0x00000345}, {0x00001F8D, 0x00000391}, {0x00001F8D, 0x00000314}, {0x00001F8D, 0x00000301}, +{0x00001F8D, 0x00000345}, {0x00001F8E, 0x00000391}, {0x00001F8E, 0x00000313}, {0x00001F8E, 0x00000342}, +{0x00001F8E, 0x00000345}, {0x00001F8F, 0x00000391}, {0x00001F8F, 0x00000314}, {0x00001F8F, 0x00000342}, +{0x00001F8F, 0x00000345}, {0x00001F90, 0x000003B7}, {0x00001F90, 0x00000313}, {0x00001F90, 0x00000345}, +{0x00001F91, 0x000003B7}, {0x00001F91, 0x00000314}, {0x00001F91, 0x00000345}, {0x00001F92, 0x000003B7}, +{0x00001F92, 0x00000313}, {0x00001F92, 0x00000300}, {0x00001F92, 0x00000345}, {0x00001F93, 0x000003B7}, +{0x00001F93, 0x00000314}, {0x00001F93, 0x00000300}, {0x00001F93, 0x00000345}, {0x00001F94, 0x000003B7}, +{0x00001F94, 0x00000313}, {0x00001F94, 0x00000301}, {0x00001F94, 0x00000345}, {0x00001F95, 0x000003B7}, +{0x00001F95, 0x00000314}, {0x00001F95, 0x00000301}, {0x00001F95, 0x00000345}, {0x00001F96, 0x000003B7}, +{0x00001F96, 0x00000313}, {0x00001F96, 0x00000342}, {0x00001F96, 0x00000345}, {0x00001F97, 0x000003B7}, +{0x00001F97, 0x00000314}, {0x00001F97, 0x00000342}, {0x00001F97, 0x00000345}, {0x00001F98, 0x00000397}, +{0x00001F98, 0x00000313}, {0x00001F98, 0x00000345}, {0x00001F99, 0x00000397}, {0x00001F99, 0x00000314}, +{0x00001F99, 0x00000345}, {0x00001F9A, 0x00000397}, {0x00001F9A, 0x00000313}, {0x00001F9A, 0x00000300}, +{0x00001F9A, 0x00000345}, {0x00001F9B, 0x00000397}, {0x00001F9B, 0x00000314}, {0x00001F9B, 0x00000300}, +{0x00001F9B, 0x00000345}, {0x00001F9C, 0x00000397}, {0x00001F9C, 0x00000313}, {0x00001F9C, 0x00000301}, +{0x00001F9C, 0x00000345}, {0x00001F9D, 0x00000397}, {0x00001F9D, 0x00000314}, {0x00001F9D, 0x00000301}, +{0x00001F9D, 0x00000345}, {0x00001F9E, 0x00000397}, {0x00001F9E, 0x00000313}, {0x00001F9E, 0x00000342}, +{0x00001F9E, 0x00000345}, {0x00001F9F, 0x00000397}, {0x00001F9F, 0x00000314}, {0x00001F9F, 0x00000342}, +{0x00001F9F, 0x00000345}, {0x00001FA0, 0x000003C9}, {0x00001FA0, 0x00000313}, {0x00001FA0, 0x00000345}, +{0x00001FA1, 0x000003C9}, {0x00001FA1, 0x00000314}, {0x00001FA1, 0x00000345}, {0x00001FA2, 0x000003C9}, +{0x00001FA2, 0x00000313}, {0x00001FA2, 0x00000300}, {0x00001FA2, 0x00000345}, {0x00001FA3, 0x000003C9}, +{0x00001FA3, 0x00000314}, {0x00001FA3, 0x00000300}, {0x00001FA3, 0x00000345}, {0x00001FA4, 0x000003C9}, +{0x00001FA4, 0x00000313}, {0x00001FA4, 0x00000301}, {0x00001FA4, 0x00000345}, {0x00001FA5, 0x000003C9}, +{0x00001FA5, 0x00000314}, {0x00001FA5, 0x00000301}, {0x00001FA5, 0x00000345}, {0x00001FA6, 0x000003C9}, +{0x00001FA6, 0x00000313}, {0x00001FA6, 0x00000342}, {0x00001FA6, 0x00000345}, {0x00001FA7, 0x000003C9}, +{0x00001FA7, 0x00000314}, {0x00001FA7, 0x00000342}, {0x00001FA7, 0x00000345}, {0x00001FA8, 0x000003A9}, +{0x00001FA8, 0x00000313}, {0x00001FA8, 0x00000345}, {0x00001FA9, 0x000003A9}, {0x00001FA9, 0x00000314}, +{0x00001FA9, 0x00000345}, {0x00001FAA, 0x000003A9}, {0x00001FAA, 0x00000313}, {0x00001FAA, 0x00000300}, +{0x00001FAA, 0x00000345}, {0x00001FAB, 0x000003A9}, {0x00001FAB, 0x00000314}, {0x00001FAB, 0x00000300}, +{0x00001FAB, 0x00000345}, {0x00001FAC, 0x000003A9}, {0x00001FAC, 0x00000313}, {0x00001FAC, 0x00000301}, +{0x00001FAC, 0x00000345}, {0x00001FAD, 0x000003A9}, {0x00001FAD, 0x00000314}, {0x00001FAD, 0x00000301}, +{0x00001FAD, 0x00000345}, {0x00001FAE, 0x000003A9}, {0x00001FAE, 0x00000313}, {0x00001FAE, 0x00000342}, +{0x00001FAE, 0x00000345}, {0x00001FAF, 0x000003A9}, {0x00001FAF, 0x00000314}, {0x00001FAF, 0x00000342}, +{0x00001FAF, 0x00000345}, {0x00001FB0, 0x000003B1}, {0x00001FB0, 0x00000306}, {0x00001FB1, 0x000003B1}, +{0x00001FB1, 0x00000304}, {0x00001FB2, 0x000003B1}, {0x00001FB2, 0x00000300}, {0x00001FB2, 0x00000345}, +{0x00001FB3, 0x000003B1}, {0x00001FB3, 0x00000345}, {0x00001FB4, 0x000003B1}, {0x00001FB4, 0x00000301}, +{0x00001FB4, 0x00000345}, {0x00001FB6, 0x000003B1}, {0x00001FB6, 0x00000342}, {0x00001FB7, 0x000003B1}, +{0x00001FB7, 0x00000342}, {0x00001FB7, 0x00000345}, {0x00001FB8, 0x00000391}, {0x00001FB8, 0x00000306}, +{0x00001FB9, 0x00000391}, {0x00001FB9, 0x00000304}, {0x00001FBA, 0x00000391}, {0x00001FBA, 0x00000300}, +{0x00001FBB, 0x00000391}, {0x00001FBB, 0x00000301}, {0x00001FBC, 0x00000391}, {0x00001FBC, 0x00000345}, +{0x00001FBE, 0x000003B9}, {0x00001FC1, 0x000000A8}, {0x00001FC1, 0x00000342}, {0x00001FC2, 0x000003B7}, +{0x00001FC2, 0x00000300}, {0x00001FC2, 0x00000345}, {0x00001FC3, 0x000003B7}, {0x00001FC3, 0x00000345}, +{0x00001FC4, 0x000003B7}, {0x00001FC4, 0x00000301}, {0x00001FC4, 0x00000345}, {0x00001FC6, 0x000003B7}, +{0x00001FC6, 0x00000342}, {0x00001FC7, 0x000003B7}, {0x00001FC7, 0x00000342}, {0x00001FC7, 0x00000345}, +{0x00001FC8, 0x00000395}, {0x00001FC8, 0x00000300}, {0x00001FC9, 0x00000395}, {0x00001FC9, 0x00000301}, +{0x00001FCA, 0x00000397}, {0x00001FCA, 0x00000300}, {0x00001FCB, 0x00000397}, {0x00001FCB, 0x00000301}, +{0x00001FCC, 0x00000397}, {0x00001FCC, 0x00000345}, {0x00001FCD, 0x00001FBF}, {0x00001FCD, 0x00000300}, +{0x00001FCE, 0x00001FBF}, {0x00001FCE, 0x00000301}, {0x00001FCF, 0x00001FBF}, {0x00001FCF, 0x00000342}, +{0x00001FD0, 0x000003B9}, {0x00001FD0, 0x00000306}, {0x00001FD1, 0x000003B9}, {0x00001FD1, 0x00000304}, +{0x00001FD2, 0x000003B9}, {0x00001FD2, 0x00000308}, {0x00001FD2, 0x00000300}, {0x00001FD3, 0x000003B9}, +{0x00001FD3, 0x00000308}, {0x00001FD3, 0x00000301}, {0x00001FD6, 0x000003B9}, {0x00001FD6, 0x00000342}, +{0x00001FD7, 0x000003B9}, {0x00001FD7, 0x00000308}, {0x00001FD7, 0x00000342}, {0x00001FD8, 0x00000399}, +{0x00001FD8, 0x00000306}, {0x00001FD9, 0x00000399}, {0x00001FD9, 0x00000304}, {0x00001FDA, 0x00000399}, +{0x00001FDA, 0x00000300}, {0x00001FDB, 0x00000399}, {0x00001FDB, 0x00000301}, {0x00001FDD, 0x00001FFE}, +{0x00001FDD, 0x00000300}, {0x00001FDE, 0x00001FFE}, {0x00001FDE, 0x00000301}, {0x00001FDF, 0x00001FFE}, +{0x00001FDF, 0x00000342}, {0x00001FE0, 0x000003C5}, {0x00001FE0, 0x00000306}, {0x00001FE1, 0x000003C5}, +{0x00001FE1, 0x00000304}, {0x00001FE2, 0x000003C5}, {0x00001FE2, 0x00000308}, {0x00001FE2, 0x00000300}, +{0x00001FE3, 0x000003C5}, {0x00001FE3, 0x00000308}, {0x00001FE3, 0x00000301}, {0x00001FE4, 0x000003C1}, +{0x00001FE4, 0x00000313}, {0x00001FE5, 0x000003C1}, {0x00001FE5, 0x00000314}, {0x00001FE6, 0x000003C5}, +{0x00001FE6, 0x00000342}, {0x00001FE7, 0x000003C5}, {0x00001FE7, 0x00000308}, {0x00001FE7, 0x00000342}, +{0x00001FE8, 0x000003A5}, {0x00001FE8, 0x00000306}, {0x00001FE9, 0x000003A5}, {0x00001FE9, 0x00000304}, +{0x00001FEA, 0x000003A5}, {0x00001FEA, 0x00000300}, {0x00001FEB, 0x000003A5}, {0x00001FEB, 0x00000301}, +{0x00001FEC, 0x000003A1}, {0x00001FEC, 0x00000314}, {0x00001FED, 0x000000A8}, {0x00001FED, 0x00000300}, +{0x00001FEE, 0x000000A8}, {0x00001FEE, 0x00000301}, {0x00001FEF, 0x00000060}, {0x00001FF2, 0x000003C9}, +{0x00001FF2, 0x00000300}, {0x00001FF2, 0x00000345}, {0x00001FF3, 0x000003C9}, {0x00001FF3, 0x00000345}, +{0x00001FF4, 0x000003C9}, {0x00001FF4, 0x00000301}, {0x00001FF4, 0x00000345}, {0x00001FF6, 0x000003C9}, +{0x00001FF6, 0x00000342}, {0x00001FF7, 0x000003C9}, {0x00001FF7, 0x00000342}, {0x00001FF7, 0x00000345}, +{0x00001FF8, 0x0000039F}, {0x00001FF8, 0x00000300}, {0x00001FF9, 0x0000039F}, {0x00001FF9, 0x00000301}, +{0x00001FFA, 0x000003A9}, {0x00001FFA, 0x00000300}, {0x00001FFB, 0x000003A9}, {0x00001FFB, 0x00000301}, +{0x00001FFC, 0x000003A9}, {0x00001FFC, 0x00000345}, {0x00001FFD, 0x000000B4}, {0x00002000, 0x00002002}, +{0x00002001, 0x00002003}, {0x00002126, 0x000003A9}, {0x0000212A, 0x0000004B}, {0x0000212B, 0x00000041}, +{0x0000212B, 0x0000030A}, {0x0000219A, 0x00002190}, {0x0000219A, 0x00000338}, {0x0000219B, 0x00002192}, +{0x0000219B, 0x00000338}, {0x000021AE, 0x00002194}, {0x000021AE, 0x00000338}, {0x000021CD, 0x000021D0}, +{0x000021CD, 0x00000338}, {0x000021CE, 0x000021D4}, {0x000021CE, 0x00000338}, {0x000021CF, 0x000021D2}, +{0x000021CF, 0x00000338}, {0x00002204, 0x00002203}, {0x00002204, 0x00000338}, {0x00002209, 0x00002208}, +{0x00002209, 0x00000338}, {0x0000220C, 0x0000220B}, {0x0000220C, 0x00000338}, {0x00002224, 0x00002223}, +{0x00002224, 0x00000338}, {0x00002226, 0x00002225}, {0x00002226, 0x00000338}, {0x00002241, 0x0000223C}, +{0x00002241, 0x00000338}, {0x00002244, 0x00002243}, {0x00002244, 0x00000338}, {0x00002247, 0x00002245}, +{0x00002247, 0x00000338}, {0x00002249, 0x00002248}, {0x00002249, 0x00000338}, {0x00002260, 0x0000003D}, +{0x00002260, 0x00000338}, {0x00002262, 0x00002261}, {0x00002262, 0x00000338}, {0x0000226D, 0x0000224D}, +{0x0000226D, 0x00000338}, {0x0000226E, 0x0000003C}, {0x0000226E, 0x00000338}, {0x0000226F, 0x0000003E}, +{0x0000226F, 0x00000338}, {0x00002270, 0x00002264}, {0x00002270, 0x00000338}, {0x00002271, 0x00002265}, +{0x00002271, 0x00000338}, {0x00002274, 0x00002272}, {0x00002274, 0x00000338}, {0x00002275, 0x00002273}, +{0x00002275, 0x00000338}, {0x00002278, 0x00002276}, {0x00002278, 0x00000338}, {0x00002279, 0x00002277}, +{0x00002279, 0x00000338}, {0x00002280, 0x0000227A}, {0x00002280, 0x00000338}, {0x00002281, 0x0000227B}, +{0x00002281, 0x00000338}, {0x00002284, 0x00002282}, {0x00002284, 0x00000338}, {0x00002285, 0x00002283}, +{0x00002285, 0x00000338}, {0x00002288, 0x00002286}, {0x00002288, 0x00000338}, {0x00002289, 0x00002287}, +{0x00002289, 0x00000338}, {0x000022AC, 0x000022A2}, {0x000022AC, 0x00000338}, {0x000022AD, 0x000022A8}, +{0x000022AD, 0x00000338}, {0x000022AE, 0x000022A9}, {0x000022AE, 0x00000338}, {0x000022AF, 0x000022AB}, +{0x000022AF, 0x00000338}, {0x000022E0, 0x0000227C}, {0x000022E0, 0x00000338}, {0x000022E1, 0x0000227D}, +{0x000022E1, 0x00000338}, {0x000022E2, 0x00002291}, {0x000022E2, 0x00000338}, {0x000022E3, 0x00002292}, +{0x000022E3, 0x00000338}, {0x000022EA, 0x000022B2}, {0x000022EA, 0x00000338}, {0x000022EB, 0x000022B3}, +{0x000022EB, 0x00000338}, {0x000022EC, 0x000022B4}, {0x000022EC, 0x00000338}, {0x000022ED, 0x000022B5}, +{0x000022ED, 0x00000338}, {0x00002329, 0x00003008}, {0x0000232A, 0x00003009}, {0x00002ADC, 0x00002ADD}, +{0x00002ADC, 0x00000338}, {0x0000304C, 0x0000304B}, {0x0000304C, 0x00003099}, {0x0000304E, 0x0000304D}, +{0x0000304E, 0x00003099}, {0x00003050, 0x0000304F}, {0x00003050, 0x00003099}, {0x00003052, 0x00003051}, +{0x00003052, 0x00003099}, {0x00003054, 0x00003053}, {0x00003054, 0x00003099}, {0x00003056, 0x00003055}, +{0x00003056, 0x00003099}, {0x00003058, 0x00003057}, {0x00003058, 0x00003099}, {0x0000305A, 0x00003059}, +{0x0000305A, 0x00003099}, {0x0000305C, 0x0000305B}, {0x0000305C, 0x00003099}, {0x0000305E, 0x0000305D}, +{0x0000305E, 0x00003099}, {0x00003060, 0x0000305F}, {0x00003060, 0x00003099}, {0x00003062, 0x00003061}, +{0x00003062, 0x00003099}, {0x00003065, 0x00003064}, {0x00003065, 0x00003099}, {0x00003067, 0x00003066}, +{0x00003067, 0x00003099}, {0x00003069, 0x00003068}, {0x00003069, 0x00003099}, {0x00003070, 0x0000306F}, +{0x00003070, 0x00003099}, {0x00003071, 0x0000306F}, {0x00003071, 0x0000309A}, {0x00003073, 0x00003072}, +{0x00003073, 0x00003099}, {0x00003074, 0x00003072}, {0x00003074, 0x0000309A}, {0x00003076, 0x00003075}, +{0x00003076, 0x00003099}, {0x00003077, 0x00003075}, {0x00003077, 0x0000309A}, {0x00003079, 0x00003078}, +{0x00003079, 0x00003099}, {0x0000307A, 0x00003078}, {0x0000307A, 0x0000309A}, {0x0000307C, 0x0000307B}, +{0x0000307C, 0x00003099}, {0x0000307D, 0x0000307B}, {0x0000307D, 0x0000309A}, {0x00003094, 0x00003046}, +{0x00003094, 0x00003099}, {0x0000309E, 0x0000309D}, {0x0000309E, 0x00003099}, {0x000030AC, 0x000030AB}, +{0x000030AC, 0x00003099}, {0x000030AE, 0x000030AD}, {0x000030AE, 0x00003099}, {0x000030B0, 0x000030AF}, +{0x000030B0, 0x00003099}, {0x000030B2, 0x000030B1}, {0x000030B2, 0x00003099}, {0x000030B4, 0x000030B3}, +{0x000030B4, 0x00003099}, {0x000030B6, 0x000030B5}, {0x000030B6, 0x00003099}, {0x000030B8, 0x000030B7}, +{0x000030B8, 0x00003099}, {0x000030BA, 0x000030B9}, {0x000030BA, 0x00003099}, {0x000030BC, 0x000030BB}, +{0x000030BC, 0x00003099}, {0x000030BE, 0x000030BD}, {0x000030BE, 0x00003099}, {0x000030C0, 0x000030BF}, +{0x000030C0, 0x00003099}, {0x000030C2, 0x000030C1}, {0x000030C2, 0x00003099}, {0x000030C5, 0x000030C4}, +{0x000030C5, 0x00003099}, {0x000030C7, 0x000030C6}, {0x000030C7, 0x00003099}, {0x000030C9, 0x000030C8}, +{0x000030C9, 0x00003099}, {0x000030D0, 0x000030CF}, {0x000030D0, 0x00003099}, {0x000030D1, 0x000030CF}, +{0x000030D1, 0x0000309A}, {0x000030D3, 0x000030D2}, {0x000030D3, 0x00003099}, {0x000030D4, 0x000030D2}, +{0x000030D4, 0x0000309A}, {0x000030D6, 0x000030D5}, {0x000030D6, 0x00003099}, {0x000030D7, 0x000030D5}, +{0x000030D7, 0x0000309A}, {0x000030D9, 0x000030D8}, {0x000030D9, 0x00003099}, {0x000030DA, 0x000030D8}, +{0x000030DA, 0x0000309A}, {0x000030DC, 0x000030DB}, {0x000030DC, 0x00003099}, {0x000030DD, 0x000030DB}, +{0x000030DD, 0x0000309A}, {0x000030F4, 0x000030A6}, {0x000030F4, 0x00003099}, {0x000030F7, 0x000030EF}, +{0x000030F7, 0x00003099}, {0x000030F8, 0x000030F0}, {0x000030F8, 0x00003099}, {0x000030F9, 0x000030F1}, +{0x000030F9, 0x00003099}, {0x000030FA, 0x000030F2}, {0x000030FA, 0x00003099}, {0x000030FE, 0x000030FD}, +{0x000030FE, 0x00003099}, {0x0000F900, 0x00008C48}, {0x0000F901, 0x000066F4}, {0x0000F902, 0x00008ECA}, +{0x0000F903, 0x00008CC8}, {0x0000F904, 0x00006ED1}, {0x0000F905, 0x00004E32}, {0x0000F906, 0x000053E5}, +{0x0000F907, 0x00009F9C}, {0x0000F908, 0x00009F9C}, {0x0000F909, 0x00005951}, {0x0000F90A, 0x000091D1}, +{0x0000F90B, 0x00005587}, {0x0000F90C, 0x00005948}, {0x0000F90D, 0x000061F6}, {0x0000F90E, 0x00007669}, +{0x0000F90F, 0x00007F85}, {0x0000F910, 0x0000863F}, {0x0000F911, 0x000087BA}, {0x0000F912, 0x000088F8}, +{0x0000F913, 0x0000908F}, {0x0000F914, 0x00006A02}, {0x0000F915, 0x00006D1B}, {0x0000F916, 0x000070D9}, +{0x0000F917, 0x000073DE}, {0x0000F918, 0x0000843D}, {0x0000F919, 0x0000916A}, {0x0000F91A, 0x000099F1}, +{0x0000F91B, 0x00004E82}, {0x0000F91C, 0x00005375}, {0x0000F91D, 0x00006B04}, {0x0000F91E, 0x0000721B}, +{0x0000F91F, 0x0000862D}, {0x0000F920, 0x00009E1E}, {0x0000F921, 0x00005D50}, {0x0000F922, 0x00006FEB}, +{0x0000F923, 0x000085CD}, {0x0000F924, 0x00008964}, {0x0000F925, 0x000062C9}, {0x0000F926, 0x000081D8}, +{0x0000F927, 0x0000881F}, {0x0000F928, 0x00005ECA}, {0x0000F929, 0x00006717}, {0x0000F92A, 0x00006D6A}, +{0x0000F92B, 0x000072FC}, {0x0000F92C, 0x000090CE}, {0x0000F92D, 0x00004F86}, {0x0000F92E, 0x000051B7}, +{0x0000F92F, 0x000052DE}, {0x0000F930, 0x000064C4}, {0x0000F931, 0x00006AD3}, {0x0000F932, 0x00007210}, +{0x0000F933, 0x000076E7}, {0x0000F934, 0x00008001}, {0x0000F935, 0x00008606}, {0x0000F936, 0x0000865C}, +{0x0000F937, 0x00008DEF}, {0x0000F938, 0x00009732}, {0x0000F939, 0x00009B6F}, {0x0000F93A, 0x00009DFA}, +{0x0000F93B, 0x0000788C}, {0x0000F93C, 0x0000797F}, {0x0000F93D, 0x00007DA0}, {0x0000F93E, 0x000083C9}, +{0x0000F93F, 0x00009304}, {0x0000F940, 0x00009E7F}, {0x0000F941, 0x00008AD6}, {0x0000F942, 0x000058DF}, +{0x0000F943, 0x00005F04}, {0x0000F944, 0x00007C60}, {0x0000F945, 0x0000807E}, {0x0000F946, 0x00007262}, +{0x0000F947, 0x000078CA}, {0x0000F948, 0x00008CC2}, {0x0000F949, 0x000096F7}, {0x0000F94A, 0x000058D8}, +{0x0000F94B, 0x00005C62}, {0x0000F94C, 0x00006A13}, {0x0000F94D, 0x00006DDA}, {0x0000F94E, 0x00006F0F}, +{0x0000F94F, 0x00007D2F}, {0x0000F950, 0x00007E37}, {0x0000F951, 0x0000964B}, {0x0000F952, 0x000052D2}, +{0x0000F953, 0x0000808B}, {0x0000F954, 0x000051DC}, {0x0000F955, 0x000051CC}, {0x0000F956, 0x00007A1C}, +{0x0000F957, 0x00007DBE}, {0x0000F958, 0x000083F1}, {0x0000F959, 0x00009675}, {0x0000F95A, 0x00008B80}, +{0x0000F95B, 0x000062CF}, {0x0000F95C, 0x00006A02}, {0x0000F95D, 0x00008AFE}, {0x0000F95E, 0x00004E39}, +{0x0000F95F, 0x00005BE7}, {0x0000F960, 0x00006012}, {0x0000F961, 0x00007387}, {0x0000F962, 0x00007570}, +{0x0000F963, 0x00005317}, {0x0000F964, 0x000078FB}, {0x0000F965, 0x00004FBF}, {0x0000F966, 0x00005FA9}, +{0x0000F967, 0x00004E0D}, {0x0000F968, 0x00006CCC}, {0x0000F969, 0x00006578}, {0x0000F96A, 0x00007D22}, +{0x0000F96B, 0x000053C3}, {0x0000F96C, 0x0000585E}, {0x0000F96D, 0x00007701}, {0x0000F96E, 0x00008449}, +{0x0000F96F, 0x00008AAA}, {0x0000F970, 0x00006BBA}, {0x0000F971, 0x00008FB0}, {0x0000F972, 0x00006C88}, +{0x0000F973, 0x000062FE}, {0x0000F974, 0x000082E5}, {0x0000F975, 0x000063A0}, {0x0000F976, 0x00007565}, +{0x0000F977, 0x00004EAE}, {0x0000F978, 0x00005169}, {0x0000F979, 0x000051C9}, {0x0000F97A, 0x00006881}, +{0x0000F97B, 0x00007CE7}, {0x0000F97C, 0x0000826F}, {0x0000F97D, 0x00008AD2}, {0x0000F97E, 0x000091CF}, +{0x0000F97F, 0x000052F5}, {0x0000F980, 0x00005442}, {0x0000F981, 0x00005973}, {0x0000F982, 0x00005EEC}, +{0x0000F983, 0x000065C5}, {0x0000F984, 0x00006FFE}, {0x0000F985, 0x0000792A}, {0x0000F986, 0x000095AD}, +{0x0000F987, 0x00009A6A}, {0x0000F988, 0x00009E97}, {0x0000F989, 0x00009ECE}, {0x0000F98A, 0x0000529B}, +{0x0000F98B, 0x000066C6}, {0x0000F98C, 0x00006B77}, {0x0000F98D, 0x00008F62}, {0x0000F98E, 0x00005E74}, +{0x0000F98F, 0x00006190}, {0x0000F990, 0x00006200}, {0x0000F991, 0x0000649A}, {0x0000F992, 0x00006F23}, +{0x0000F993, 0x00007149}, {0x0000F994, 0x00007489}, {0x0000F995, 0x000079CA}, {0x0000F996, 0x00007DF4}, +{0x0000F997, 0x0000806F}, {0x0000F998, 0x00008F26}, {0x0000F999, 0x000084EE}, {0x0000F99A, 0x00009023}, +{0x0000F99B, 0x0000934A}, {0x0000F99C, 0x00005217}, {0x0000F99D, 0x000052A3}, {0x0000F99E, 0x000054BD}, +{0x0000F99F, 0x000070C8}, {0x0000F9A0, 0x000088C2}, {0x0000F9A1, 0x00008AAA}, {0x0000F9A2, 0x00005EC9}, +{0x0000F9A3, 0x00005FF5}, {0x0000F9A4, 0x0000637B}, {0x0000F9A5, 0x00006BAE}, {0x0000F9A6, 0x00007C3E}, +{0x0000F9A7, 0x00007375}, {0x0000F9A8, 0x00004EE4}, {0x0000F9A9, 0x000056F9}, {0x0000F9AA, 0x00005BE7}, +{0x0000F9AB, 0x00005DBA}, {0x0000F9AC, 0x0000601C}, {0x0000F9AD, 0x000073B2}, {0x0000F9AE, 0x00007469}, +{0x0000F9AF, 0x00007F9A}, {0x0000F9B0, 0x00008046}, {0x0000F9B1, 0x00009234}, {0x0000F9B2, 0x000096F6}, +{0x0000F9B3, 0x00009748}, {0x0000F9B4, 0x00009818}, {0x0000F9B5, 0x00004F8B}, {0x0000F9B6, 0x000079AE}, +{0x0000F9B7, 0x000091B4}, {0x0000F9B8, 0x000096B8}, {0x0000F9B9, 0x000060E1}, {0x0000F9BA, 0x00004E86}, +{0x0000F9BB, 0x000050DA}, {0x0000F9BC, 0x00005BEE}, {0x0000F9BD, 0x00005C3F}, {0x0000F9BE, 0x00006599}, +{0x0000F9BF, 0x00006A02}, {0x0000F9C0, 0x000071CE}, {0x0000F9C1, 0x00007642}, {0x0000F9C2, 0x000084FC}, +{0x0000F9C3, 0x0000907C}, {0x0000F9C4, 0x00009F8D}, {0x0000F9C5, 0x00006688}, {0x0000F9C6, 0x0000962E}, +{0x0000F9C7, 0x00005289}, {0x0000F9C8, 0x0000677B}, {0x0000F9C9, 0x000067F3}, {0x0000F9CA, 0x00006D41}, +{0x0000F9CB, 0x00006E9C}, {0x0000F9CC, 0x00007409}, {0x0000F9CD, 0x00007559}, {0x0000F9CE, 0x0000786B}, +{0x0000F9CF, 0x00007D10}, {0x0000F9D0, 0x0000985E}, {0x0000F9D1, 0x0000516D}, {0x0000F9D2, 0x0000622E}, +{0x0000F9D3, 0x00009678}, {0x0000F9D4, 0x0000502B}, {0x0000F9D5, 0x00005D19}, {0x0000F9D6, 0x00006DEA}, +{0x0000F9D7, 0x00008F2A}, {0x0000F9D8, 0x00005F8B}, {0x0000F9D9, 0x00006144}, {0x0000F9DA, 0x00006817}, +{0x0000F9DB, 0x00007387}, {0x0000F9DC, 0x00009686}, {0x0000F9DD, 0x00005229}, {0x0000F9DE, 0x0000540F}, +{0x0000F9DF, 0x00005C65}, {0x0000F9E0, 0x00006613}, {0x0000F9E1, 0x0000674E}, {0x0000F9E2, 0x000068A8}, +{0x0000F9E3, 0x00006CE5}, {0x0000F9E4, 0x00007406}, {0x0000F9E5, 0x000075E2}, {0x0000F9E6, 0x00007F79}, +{0x0000F9E7, 0x000088CF}, {0x0000F9E8, 0x000088E1}, {0x0000F9E9, 0x000091CC}, {0x0000F9EA, 0x000096E2}, +{0x0000F9EB, 0x0000533F}, {0x0000F9EC, 0x00006EBA}, {0x0000F9ED, 0x0000541D}, {0x0000F9EE, 0x000071D0}, +{0x0000F9EF, 0x00007498}, {0x0000F9F0, 0x000085FA}, {0x0000F9F1, 0x000096A3}, {0x0000F9F2, 0x00009C57}, +{0x0000F9F3, 0x00009E9F}, {0x0000F9F4, 0x00006797}, {0x0000F9F5, 0x00006DCB}, {0x0000F9F6, 0x000081E8}, +{0x0000F9F7, 0x00007ACB}, {0x0000F9F8, 0x00007B20}, {0x0000F9F9, 0x00007C92}, {0x0000F9FA, 0x000072C0}, +{0x0000F9FB, 0x00007099}, {0x0000F9FC, 0x00008B58}, {0x0000F9FD, 0x00004EC0}, {0x0000F9FE, 0x00008336}, +{0x0000F9FF, 0x0000523A}, {0x0000FA00, 0x00005207}, {0x0000FA01, 0x00005EA6}, {0x0000FA02, 0x000062D3}, +{0x0000FA03, 0x00007CD6}, {0x0000FA04, 0x00005B85}, {0x0000FA05, 0x00006D1E}, {0x0000FA06, 0x000066B4}, +{0x0000FA07, 0x00008F3B}, {0x0000FA08, 0x0000884C}, {0x0000FA09, 0x0000964D}, {0x0000FA0A, 0x0000898B}, +{0x0000FA0B, 0x00005ED3}, {0x0000FA0C, 0x00005140}, {0x0000FA0D, 0x000055C0}, {0x0000FA10, 0x0000585A}, +{0x0000FA12, 0x00006674}, {0x0000FA15, 0x000051DE}, {0x0000FA16, 0x0000732A}, {0x0000FA17, 0x000076CA}, +{0x0000FA18, 0x0000793C}, {0x0000FA19, 0x0000795E}, {0x0000FA1A, 0x00007965}, {0x0000FA1B, 0x0000798F}, +{0x0000FA1C, 0x00009756}, {0x0000FA1D, 0x00007CBE}, {0x0000FA1E, 0x00007FBD}, {0x0000FA20, 0x00008612}, +{0x0000FA22, 0x00008AF8}, {0x0000FA25, 0x00009038}, {0x0000FA26, 0x000090FD}, {0x0000FA2A, 0x000098EF}, +{0x0000FA2B, 0x000098FC}, {0x0000FA2C, 0x00009928}, {0x0000FA2D, 0x00009DB4}, {0x0000FA2E, 0x000090DE}, +{0x0000FA2F, 0x000096B7}, {0x0000FA30, 0x00004FAE}, {0x0000FA31, 0x000050E7}, {0x0000FA32, 0x0000514D}, +{0x0000FA33, 0x000052C9}, {0x0000FA34, 0x000052E4}, {0x0000FA35, 0x00005351}, {0x0000FA36, 0x0000559D}, +{0x0000FA37, 0x00005606}, {0x0000FA38, 0x00005668}, {0x0000FA39, 0x00005840}, {0x0000FA3A, 0x000058A8}, +{0x0000FA3B, 0x00005C64}, {0x0000FA3C, 0x00005C6E}, {0x0000FA3D, 0x00006094}, {0x0000FA3E, 0x00006168}, +{0x0000FA3F, 0x0000618E}, {0x0000FA40, 0x000061F2}, {0x0000FA41, 0x0000654F}, {0x0000FA42, 0x000065E2}, +{0x0000FA43, 0x00006691}, {0x0000FA44, 0x00006885}, {0x0000FA45, 0x00006D77}, {0x0000FA46, 0x00006E1A}, +{0x0000FA47, 0x00006F22}, {0x0000FA48, 0x0000716E}, {0x0000FA49, 0x0000722B}, {0x0000FA4A, 0x00007422}, +{0x0000FA4B, 0x00007891}, {0x0000FA4C, 0x0000793E}, {0x0000FA4D, 0x00007949}, {0x0000FA4E, 0x00007948}, +{0x0000FA4F, 0x00007950}, {0x0000FA50, 0x00007956}, {0x0000FA51, 0x0000795D}, {0x0000FA52, 0x0000798D}, +{0x0000FA53, 0x0000798E}, {0x0000FA54, 0x00007A40}, {0x0000FA55, 0x00007A81}, {0x0000FA56, 0x00007BC0}, +{0x0000FA57, 0x00007DF4}, {0x0000FA58, 0x00007E09}, {0x0000FA59, 0x00007E41}, {0x0000FA5A, 0x00007F72}, +{0x0000FA5B, 0x00008005}, {0x0000FA5C, 0x000081ED}, {0x0000FA5D, 0x00008279}, {0x0000FA5E, 0x00008279}, +{0x0000FA5F, 0x00008457}, {0x0000FA60, 0x00008910}, {0x0000FA61, 0x00008996}, {0x0000FA62, 0x00008B01}, +{0x0000FA63, 0x00008B39}, {0x0000FA64, 0x00008CD3}, {0x0000FA65, 0x00008D08}, {0x0000FA66, 0x00008FB6}, +{0x0000FA67, 0x00009038}, {0x0000FA68, 0x000096E3}, {0x0000FA69, 0x000097FF}, {0x0000FA6A, 0x0000983B}, +{0x0000FA6B, 0x00006075}, {0x0000FA6C, 0x000242EE}, {0x0000FA6D, 0x00008218}, {0x0000FA70, 0x00004E26}, +{0x0000FA71, 0x000051B5}, {0x0000FA72, 0x00005168}, {0x0000FA73, 0x00004F80}, {0x0000FA74, 0x00005145}, +{0x0000FA75, 0x00005180}, {0x0000FA76, 0x000052C7}, {0x0000FA77, 0x000052FA}, {0x0000FA78, 0x0000559D}, +{0x0000FA79, 0x00005555}, {0x0000FA7A, 0x00005599}, {0x0000FA7B, 0x000055E2}, {0x0000FA7C, 0x0000585A}, +{0x0000FA7D, 0x000058B3}, {0x0000FA7E, 0x00005944}, {0x0000FA7F, 0x00005954}, {0x0000FA80, 0x00005A62}, +{0x0000FA81, 0x00005B28}, {0x0000FA82, 0x00005ED2}, {0x0000FA83, 0x00005ED9}, {0x0000FA84, 0x00005F69}, +{0x0000FA85, 0x00005FAD}, {0x0000FA86, 0x000060D8}, {0x0000FA87, 0x0000614E}, {0x0000FA88, 0x00006108}, +{0x0000FA89, 0x0000618E}, {0x0000FA8A, 0x00006160}, {0x0000FA8B, 0x000061F2}, {0x0000FA8C, 0x00006234}, +{0x0000FA8D, 0x000063C4}, {0x0000FA8E, 0x0000641C}, {0x0000FA8F, 0x00006452}, {0x0000FA90, 0x00006556}, +{0x0000FA91, 0x00006674}, {0x0000FA92, 0x00006717}, {0x0000FA93, 0x0000671B}, {0x0000FA94, 0x00006756}, +{0x0000FA95, 0x00006B79}, {0x0000FA96, 0x00006BBA}, {0x0000FA97, 0x00006D41}, {0x0000FA98, 0x00006EDB}, +{0x0000FA99, 0x00006ECB}, {0x0000FA9A, 0x00006F22}, {0x0000FA9B, 0x0000701E}, {0x0000FA9C, 0x0000716E}, +{0x0000FA9D, 0x000077A7}, {0x0000FA9E, 0x00007235}, {0x0000FA9F, 0x000072AF}, {0x0000FAA0, 0x0000732A}, +{0x0000FAA1, 0x00007471}, {0x0000FAA2, 0x00007506}, {0x0000FAA3, 0x0000753B}, {0x0000FAA4, 0x0000761D}, +{0x0000FAA5, 0x0000761F}, {0x0000FAA6, 0x000076CA}, {0x0000FAA7, 0x000076DB}, {0x0000FAA8, 0x000076F4}, +{0x0000FAA9, 0x0000774A}, {0x0000FAAA, 0x00007740}, {0x0000FAAB, 0x000078CC}, {0x0000FAAC, 0x00007AB1}, +{0x0000FAAD, 0x00007BC0}, {0x0000FAAE, 0x00007C7B}, {0x0000FAAF, 0x00007D5B}, {0x0000FAB0, 0x00007DF4}, +{0x0000FAB1, 0x00007F3E}, {0x0000FAB2, 0x00008005}, {0x0000FAB3, 0x00008352}, {0x0000FAB4, 0x000083EF}, +{0x0000FAB5, 0x00008779}, {0x0000FAB6, 0x00008941}, {0x0000FAB7, 0x00008986}, {0x0000FAB8, 0x00008996}, +{0x0000FAB9, 0x00008ABF}, {0x0000FABA, 0x00008AF8}, {0x0000FABB, 0x00008ACB}, {0x0000FABC, 0x00008B01}, +{0x0000FABD, 0x00008AFE}, {0x0000FABE, 0x00008AED}, {0x0000FABF, 0x00008B39}, {0x0000FAC0, 0x00008B8A}, +{0x0000FAC1, 0x00008D08}, {0x0000FAC2, 0x00008F38}, {0x0000FAC3, 0x00009072}, {0x0000FAC4, 0x00009199}, +{0x0000FAC5, 0x00009276}, {0x0000FAC6, 0x0000967C}, {0x0000FAC7, 0x000096E3}, {0x0000FAC8, 0x00009756}, +{0x0000FAC9, 0x000097DB}, {0x0000FACA, 0x000097FF}, {0x0000FACB, 0x0000980B}, {0x0000FACC, 0x0000983B}, +{0x0000FACD, 0x00009B12}, {0x0000FACE, 0x00009F9C}, {0x0000FACF, 0x0002284A}, {0x0000FAD0, 0x00022844}, +{0x0000FAD1, 0x000233D5}, {0x0000FAD2, 0x00003B9D}, {0x0000FAD3, 0x00004018}, {0x0000FAD4, 0x00004039}, +{0x0000FAD5, 0x00025249}, {0x0000FAD6, 0x00025CD0}, {0x0000FAD7, 0x00027ED3}, {0x0000FAD8, 0x00009F43}, +{0x0000FAD9, 0x00009F8E}, {0x0000FB1D, 0x000005D9}, {0x0000FB1D, 0x000005B4}, {0x0000FB1F, 0x000005F2}, +{0x0000FB1F, 0x000005B7}, {0x0000FB2A, 0x000005E9}, {0x0000FB2A, 0x000005C1}, {0x0000FB2B, 0x000005E9}, +{0x0000FB2B, 0x000005C2}, {0x0000FB2C, 0x000005E9}, {0x0000FB2C, 0x000005BC}, {0x0000FB2C, 0x000005C1}, +{0x0000FB2D, 0x000005E9}, {0x0000FB2D, 0x000005BC}, {0x0000FB2D, 0x000005C2}, {0x0000FB2E, 0x000005D0}, +{0x0000FB2E, 0x000005B7}, {0x0000FB2F, 0x000005D0}, {0x0000FB2F, 0x000005B8}, {0x0000FB30, 0x000005D0}, +{0x0000FB30, 0x000005BC}, {0x0000FB31, 0x000005D1}, {0x0000FB31, 0x000005BC}, {0x0000FB32, 0x000005D2}, +{0x0000FB32, 0x000005BC}, {0x0000FB33, 0x000005D3}, {0x0000FB33, 0x000005BC}, {0x0000FB34, 0x000005D4}, +{0x0000FB34, 0x000005BC}, {0x0000FB35, 0x000005D5}, {0x0000FB35, 0x000005BC}, {0x0000FB36, 0x000005D6}, +{0x0000FB36, 0x000005BC}, {0x0000FB38, 0x000005D8}, {0x0000FB38, 0x000005BC}, {0x0000FB39, 0x000005D9}, +{0x0000FB39, 0x000005BC}, {0x0000FB3A, 0x000005DA}, {0x0000FB3A, 0x000005BC}, {0x0000FB3B, 0x000005DB}, +{0x0000FB3B, 0x000005BC}, {0x0000FB3C, 0x000005DC}, {0x0000FB3C, 0x000005BC}, {0x0000FB3E, 0x000005DE}, +{0x0000FB3E, 0x000005BC}, {0x0000FB40, 0x000005E0}, {0x0000FB40, 0x000005BC}, {0x0000FB41, 0x000005E1}, +{0x0000FB41, 0x000005BC}, {0x0000FB43, 0x000005E3}, {0x0000FB43, 0x000005BC}, {0x0000FB44, 0x000005E4}, +{0x0000FB44, 0x000005BC}, {0x0000FB46, 0x000005E6}, {0x0000FB46, 0x000005BC}, {0x0000FB47, 0x000005E7}, +{0x0000FB47, 0x000005BC}, {0x0000FB48, 0x000005E8}, {0x0000FB48, 0x000005BC}, {0x0000FB49, 0x000005E9}, +{0x0000FB49, 0x000005BC}, {0x0000FB4A, 0x000005EA}, {0x0000FB4A, 0x000005BC}, {0x0000FB4B, 0x000005D5}, +{0x0000FB4B, 0x000005B9}, {0x0000FB4C, 0x000005D1}, {0x0000FB4C, 0x000005BF}, {0x0000FB4D, 0x000005DB}, +{0x0000FB4D, 0x000005BF}, {0x0000FB4E, 0x000005E4}, {0x0000FB4E, 0x000005BF}, {0x0001109A, 0x00011099}, +{0x0001109A, 0x000110BA}, {0x0001109C, 0x0001109B}, {0x0001109C, 0x000110BA}, {0x000110AB, 0x000110A5}, +{0x000110AB, 0x000110BA}, {0x0001112E, 0x00011131}, {0x0001112E, 0x00011127}, {0x0001112F, 0x00011132}, +{0x0001112F, 0x00011127}, {0x0001134B, 0x00011347}, {0x0001134B, 0x0001133E}, {0x0001134C, 0x00011347}, +{0x0001134C, 0x00011357}, {0x000114BB, 0x000114B9}, {0x000114BB, 0x000114BA}, {0x000114BC, 0x000114B9}, +{0x000114BC, 0x000114B0}, {0x000114BE, 0x000114B9}, {0x000114BE, 0x000114BD}, {0x000115BA, 0x000115B8}, +{0x000115BA, 0x000115AF}, {0x000115BB, 0x000115B9}, {0x000115BB, 0x000115AF}, {0x0001D15E, 0x0001D157}, +{0x0001D15E, 0x0001D165}, {0x0001D15F, 0x0001D158}, {0x0001D15F, 0x0001D165}, {0x0001D160, 0x0001D158}, +{0x0001D160, 0x0001D165}, {0x0001D160, 0x0001D16E}, {0x0001D161, 0x0001D158}, {0x0001D161, 0x0001D165}, +{0x0001D161, 0x0001D16F}, {0x0001D162, 0x0001D158}, {0x0001D162, 0x0001D165}, {0x0001D162, 0x0001D170}, +{0x0001D163, 0x0001D158}, {0x0001D163, 0x0001D165}, {0x0001D163, 0x0001D171}, {0x0001D164, 0x0001D158}, +{0x0001D164, 0x0001D165}, {0x0001D164, 0x0001D172}, {0x0001D1BB, 0x0001D1B9}, {0x0001D1BB, 0x0001D165}, +{0x0001D1BC, 0x0001D1BA}, {0x0001D1BC, 0x0001D165}, {0x0001D1BD, 0x0001D1B9}, {0x0001D1BD, 0x0001D165}, +{0x0001D1BD, 0x0001D16E}, {0x0001D1BE, 0x0001D1BA}, {0x0001D1BE, 0x0001D165}, {0x0001D1BE, 0x0001D16E}, +{0x0001D1BF, 0x0001D1B9}, {0x0001D1BF, 0x0001D165}, {0x0001D1BF, 0x0001D16F}, {0x0001D1C0, 0x0001D1BA}, +{0x0001D1C0, 0x0001D165}, {0x0001D1C0, 0x0001D16F}, {0x0002F800, 0x00004E3D}, {0x0002F801, 0x00004E38}, +{0x0002F802, 0x00004E41}, {0x0002F803, 0x00020122}, {0x0002F804, 0x00004F60}, {0x0002F805, 0x00004FAE}, +{0x0002F806, 0x00004FBB}, {0x0002F807, 0x00005002}, {0x0002F808, 0x0000507A}, {0x0002F809, 0x00005099}, +{0x0002F80A, 0x000050E7}, {0x0002F80B, 0x000050CF}, {0x0002F80C, 0x0000349E}, {0x0002F80D, 0x0002063A}, +{0x0002F80E, 0x0000514D}, {0x0002F80F, 0x00005154}, {0x0002F810, 0x00005164}, {0x0002F811, 0x00005177}, +{0x0002F812, 0x0002051C}, {0x0002F813, 0x000034B9}, {0x0002F814, 0x00005167}, {0x0002F815, 0x0000518D}, +{0x0002F816, 0x0002054B}, {0x0002F817, 0x00005197}, {0x0002F818, 0x000051A4}, {0x0002F819, 0x00004ECC}, +{0x0002F81A, 0x000051AC}, {0x0002F81B, 0x000051B5}, {0x0002F81C, 0x000291DF}, {0x0002F81D, 0x000051F5}, +{0x0002F81E, 0x00005203}, {0x0002F81F, 0x000034DF}, {0x0002F820, 0x0000523B}, {0x0002F821, 0x00005246}, +{0x0002F822, 0x00005272}, {0x0002F823, 0x00005277}, {0x0002F824, 0x00003515}, {0x0002F825, 0x000052C7}, +{0x0002F826, 0x000052C9}, {0x0002F827, 0x000052E4}, {0x0002F828, 0x000052FA}, {0x0002F829, 0x00005305}, +{0x0002F82A, 0x00005306}, {0x0002F82B, 0x00005317}, {0x0002F82C, 0x00005349}, {0x0002F82D, 0x00005351}, +{0x0002F82E, 0x0000535A}, {0x0002F82F, 0x00005373}, {0x0002F830, 0x0000537D}, {0x0002F831, 0x0000537F}, +{0x0002F832, 0x0000537F}, {0x0002F833, 0x0000537F}, {0x0002F834, 0x00020A2C}, {0x0002F835, 0x00007070}, +{0x0002F836, 0x000053CA}, {0x0002F837, 0x000053DF}, {0x0002F838, 0x00020B63}, {0x0002F839, 0x000053EB}, +{0x0002F83A, 0x000053F1}, {0x0002F83B, 0x00005406}, {0x0002F83C, 0x0000549E}, {0x0002F83D, 0x00005438}, +{0x0002F83E, 0x00005448}, {0x0002F83F, 0x00005468}, {0x0002F840, 0x000054A2}, {0x0002F841, 0x000054F6}, +{0x0002F842, 0x00005510}, {0x0002F843, 0x00005553}, {0x0002F844, 0x00005563}, {0x0002F845, 0x00005584}, +{0x0002F846, 0x00005584}, {0x0002F847, 0x00005599}, {0x0002F848, 0x000055AB}, {0x0002F849, 0x000055B3}, +{0x0002F84A, 0x000055C2}, {0x0002F84B, 0x00005716}, {0x0002F84C, 0x00005606}, {0x0002F84D, 0x00005717}, +{0x0002F84E, 0x00005651}, {0x0002F84F, 0x00005674}, {0x0002F850, 0x00005207}, {0x0002F851, 0x000058EE}, +{0x0002F852, 0x000057CE}, {0x0002F853, 0x000057F4}, {0x0002F854, 0x0000580D}, {0x0002F855, 0x0000578B}, +{0x0002F856, 0x00005832}, {0x0002F857, 0x00005831}, {0x0002F858, 0x000058AC}, {0x0002F859, 0x000214E4}, +{0x0002F85A, 0x000058F2}, {0x0002F85B, 0x000058F7}, {0x0002F85C, 0x00005906}, {0x0002F85D, 0x0000591A}, +{0x0002F85E, 0x00005922}, {0x0002F85F, 0x00005962}, {0x0002F860, 0x000216A8}, {0x0002F861, 0x000216EA}, +{0x0002F862, 0x000059EC}, {0x0002F863, 0x00005A1B}, {0x0002F864, 0x00005A27}, {0x0002F865, 0x000059D8}, +{0x0002F866, 0x00005A66}, {0x0002F867, 0x000036EE}, {0x0002F868, 0x000036FC}, {0x0002F869, 0x00005B08}, +{0x0002F86A, 0x00005B3E}, {0x0002F86B, 0x00005B3E}, {0x0002F86C, 0x000219C8}, {0x0002F86D, 0x00005BC3}, +{0x0002F86E, 0x00005BD8}, {0x0002F86F, 0x00005BE7}, {0x0002F870, 0x00005BF3}, {0x0002F871, 0x00021B18}, +{0x0002F872, 0x00005BFF}, {0x0002F873, 0x00005C06}, {0x0002F874, 0x00005F53}, {0x0002F875, 0x00005C22}, +{0x0002F876, 0x00003781}, {0x0002F877, 0x00005C60}, {0x0002F878, 0x00005C6E}, {0x0002F879, 0x00005CC0}, +{0x0002F87A, 0x00005C8D}, {0x0002F87B, 0x00021DE4}, {0x0002F87C, 0x00005D43}, {0x0002F87D, 0x00021DE6}, +{0x0002F87E, 0x00005D6E}, {0x0002F87F, 0x00005D6B}, {0x0002F880, 0x00005D7C}, {0x0002F881, 0x00005DE1}, +{0x0002F882, 0x00005DE2}, {0x0002F883, 0x0000382F}, {0x0002F884, 0x00005DFD}, {0x0002F885, 0x00005E28}, +{0x0002F886, 0x00005E3D}, {0x0002F887, 0x00005E69}, {0x0002F888, 0x00003862}, {0x0002F889, 0x00022183}, +{0x0002F88A, 0x0000387C}, {0x0002F88B, 0x00005EB0}, {0x0002F88C, 0x00005EB3}, {0x0002F88D, 0x00005EB6}, +{0x0002F88E, 0x00005ECA}, {0x0002F88F, 0x0002A392}, {0x0002F890, 0x00005EFE}, {0x0002F891, 0x00022331}, +{0x0002F892, 0x00022331}, {0x0002F893, 0x00008201}, {0x0002F894, 0x00005F22}, {0x0002F895, 0x00005F22}, +{0x0002F896, 0x000038C7}, {0x0002F897, 0x000232B8}, {0x0002F898, 0x000261DA}, {0x0002F899, 0x00005F62}, +{0x0002F89A, 0x00005F6B}, {0x0002F89B, 0x000038E3}, {0x0002F89C, 0x00005F9A}, {0x0002F89D, 0x00005FCD}, +{0x0002F89E, 0x00005FD7}, {0x0002F89F, 0x00005FF9}, {0x0002F8A0, 0x00006081}, {0x0002F8A1, 0x0000393A}, +{0x0002F8A2, 0x0000391C}, {0x0002F8A3, 0x00006094}, {0x0002F8A4, 0x000226D4}, {0x0002F8A5, 0x000060C7}, +{0x0002F8A6, 0x00006148}, {0x0002F8A7, 0x0000614C}, {0x0002F8A8, 0x0000614E}, {0x0002F8A9, 0x0000614C}, +{0x0002F8AA, 0x0000617A}, {0x0002F8AB, 0x0000618E}, {0x0002F8AC, 0x000061B2}, {0x0002F8AD, 0x000061A4}, +{0x0002F8AE, 0x000061AF}, {0x0002F8AF, 0x000061DE}, {0x0002F8B0, 0x000061F2}, {0x0002F8B1, 0x000061F6}, +{0x0002F8B2, 0x00006210}, {0x0002F8B3, 0x0000621B}, {0x0002F8B4, 0x0000625D}, {0x0002F8B5, 0x000062B1}, +{0x0002F8B6, 0x000062D4}, {0x0002F8B7, 0x00006350}, {0x0002F8B8, 0x00022B0C}, {0x0002F8B9, 0x0000633D}, +{0x0002F8BA, 0x000062FC}, {0x0002F8BB, 0x00006368}, {0x0002F8BC, 0x00006383}, {0x0002F8BD, 0x000063E4}, +{0x0002F8BE, 0x00022BF1}, {0x0002F8BF, 0x00006422}, {0x0002F8C0, 0x000063C5}, {0x0002F8C1, 0x000063A9}, +{0x0002F8C2, 0x00003A2E}, {0x0002F8C3, 0x00006469}, {0x0002F8C4, 0x0000647E}, {0x0002F8C5, 0x0000649D}, +{0x0002F8C6, 0x00006477}, {0x0002F8C7, 0x00003A6C}, {0x0002F8C8, 0x0000654F}, {0x0002F8C9, 0x0000656C}, +{0x0002F8CA, 0x0002300A}, {0x0002F8CB, 0x000065E3}, {0x0002F8CC, 0x000066F8}, {0x0002F8CD, 0x00006649}, +{0x0002F8CE, 0x00003B19}, {0x0002F8CF, 0x00006691}, {0x0002F8D0, 0x00003B08}, {0x0002F8D1, 0x00003AE4}, +{0x0002F8D2, 0x00005192}, {0x0002F8D3, 0x00005195}, {0x0002F8D4, 0x00006700}, {0x0002F8D5, 0x0000669C}, +{0x0002F8D6, 0x000080AD}, {0x0002F8D7, 0x000043D9}, {0x0002F8D8, 0x00006717}, {0x0002F8D9, 0x0000671B}, +{0x0002F8DA, 0x00006721}, {0x0002F8DB, 0x0000675E}, {0x0002F8DC, 0x00006753}, {0x0002F8DD, 0x000233C3}, +{0x0002F8DE, 0x00003B49}, {0x0002F8DF, 0x000067FA}, {0x0002F8E0, 0x00006785}, {0x0002F8E1, 0x00006852}, +{0x0002F8E2, 0x00006885}, {0x0002F8E3, 0x0002346D}, {0x0002F8E4, 0x0000688E}, {0x0002F8E5, 0x0000681F}, +{0x0002F8E6, 0x00006914}, {0x0002F8E7, 0x00003B9D}, {0x0002F8E8, 0x00006942}, {0x0002F8E9, 0x000069A3}, +{0x0002F8EA, 0x000069EA}, {0x0002F8EB, 0x00006AA8}, {0x0002F8EC, 0x000236A3}, {0x0002F8ED, 0x00006ADB}, +{0x0002F8EE, 0x00003C18}, {0x0002F8EF, 0x00006B21}, {0x0002F8F0, 0x000238A7}, {0x0002F8F1, 0x00006B54}, +{0x0002F8F2, 0x00003C4E}, {0x0002F8F3, 0x00006B72}, {0x0002F8F4, 0x00006B9F}, {0x0002F8F5, 0x00006BBA}, +{0x0002F8F6, 0x00006BBB}, {0x0002F8F7, 0x00023A8D}, {0x0002F8F8, 0x00021D0B}, {0x0002F8F9, 0x00023AFA}, +{0x0002F8FA, 0x00006C4E}, {0x0002F8FB, 0x00023CBC}, {0x0002F8FC, 0x00006CBF}, {0x0002F8FD, 0x00006CCD}, +{0x0002F8FE, 0x00006C67}, {0x0002F8FF, 0x00006D16}, {0x0002F900, 0x00006D3E}, {0x0002F901, 0x00006D77}, +{0x0002F902, 0x00006D41}, {0x0002F903, 0x00006D69}, {0x0002F904, 0x00006D78}, {0x0002F905, 0x00006D85}, +{0x0002F906, 0x00023D1E}, {0x0002F907, 0x00006D34}, {0x0002F908, 0x00006E2F}, {0x0002F909, 0x00006E6E}, +{0x0002F90A, 0x00003D33}, {0x0002F90B, 0x00006ECB}, {0x0002F90C, 0x00006EC7}, {0x0002F90D, 0x00023ED1}, +{0x0002F90E, 0x00006DF9}, {0x0002F90F, 0x00006F6E}, {0x0002F910, 0x00023F5E}, {0x0002F911, 0x00023F8E}, +{0x0002F912, 0x00006FC6}, {0x0002F913, 0x00007039}, {0x0002F914, 0x0000701E}, {0x0002F915, 0x0000701B}, +{0x0002F916, 0x00003D96}, {0x0002F917, 0x0000704A}, {0x0002F918, 0x0000707D}, {0x0002F919, 0x00007077}, +{0x0002F91A, 0x000070AD}, {0x0002F91B, 0x00020525}, {0x0002F91C, 0x00007145}, {0x0002F91D, 0x00024263}, +{0x0002F91E, 0x0000719C}, {0x0002F91F, 0x000243AB}, {0x0002F920, 0x00007228}, {0x0002F921, 0x00007235}, +{0x0002F922, 0x00007250}, {0x0002F923, 0x00024608}, {0x0002F924, 0x00007280}, {0x0002F925, 0x00007295}, +{0x0002F926, 0x00024735}, {0x0002F927, 0x00024814}, {0x0002F928, 0x0000737A}, {0x0002F929, 0x0000738B}, +{0x0002F92A, 0x00003EAC}, {0x0002F92B, 0x000073A5}, {0x0002F92C, 0x00003EB8}, {0x0002F92D, 0x00003EB8}, +{0x0002F92E, 0x00007447}, {0x0002F92F, 0x0000745C}, {0x0002F930, 0x00007471}, {0x0002F931, 0x00007485}, +{0x0002F932, 0x000074CA}, {0x0002F933, 0x00003F1B}, {0x0002F934, 0x00007524}, {0x0002F935, 0x00024C36}, +{0x0002F936, 0x0000753E}, {0x0002F937, 0x00024C92}, {0x0002F938, 0x00007570}, {0x0002F939, 0x0002219F}, +{0x0002F93A, 0x00007610}, {0x0002F93B, 0x00024FA1}, {0x0002F93C, 0x00024FB8}, {0x0002F93D, 0x00025044}, +{0x0002F93E, 0x00003FFC}, {0x0002F93F, 0x00004008}, {0x0002F940, 0x000076F4}, {0x0002F941, 0x000250F3}, +{0x0002F942, 0x000250F2}, {0x0002F943, 0x00025119}, {0x0002F944, 0x00025133}, {0x0002F945, 0x0000771E}, +{0x0002F946, 0x0000771F}, {0x0002F947, 0x0000771F}, {0x0002F948, 0x0000774A}, {0x0002F949, 0x00004039}, +{0x0002F94A, 0x0000778B}, {0x0002F94B, 0x00004046}, {0x0002F94C, 0x00004096}, {0x0002F94D, 0x0002541D}, +{0x0002F94E, 0x0000784E}, {0x0002F94F, 0x0000788C}, {0x0002F950, 0x000078CC}, {0x0002F951, 0x000040E3}, +{0x0002F952, 0x00025626}, {0x0002F953, 0x00007956}, {0x0002F954, 0x0002569A}, {0x0002F955, 0x000256C5}, +{0x0002F956, 0x0000798F}, {0x0002F957, 0x000079EB}, {0x0002F958, 0x0000412F}, {0x0002F959, 0x00007A40}, +{0x0002F95A, 0x00007A4A}, {0x0002F95B, 0x00007A4F}, {0x0002F95C, 0x0002597C}, {0x0002F95D, 0x00025AA7}, +{0x0002F95E, 0x00025AA7}, {0x0002F95F, 0x00007AEE}, {0x0002F960, 0x00004202}, {0x0002F961, 0x00025BAB}, +{0x0002F962, 0x00007BC6}, {0x0002F963, 0x00007BC9}, {0x0002F964, 0x00004227}, {0x0002F965, 0x00025C80}, +{0x0002F966, 0x00007CD2}, {0x0002F967, 0x000042A0}, {0x0002F968, 0x00007CE8}, {0x0002F969, 0x00007CE3}, +{0x0002F96A, 0x00007D00}, {0x0002F96B, 0x00025F86}, {0x0002F96C, 0x00007D63}, {0x0002F96D, 0x00004301}, +{0x0002F96E, 0x00007DC7}, {0x0002F96F, 0x00007E02}, {0x0002F970, 0x00007E45}, {0x0002F971, 0x00004334}, +{0x0002F972, 0x00026228}, {0x0002F973, 0x00026247}, {0x0002F974, 0x00004359}, {0x0002F975, 0x000262D9}, +{0x0002F976, 0x00007F7A}, {0x0002F977, 0x0002633E}, {0x0002F978, 0x00007F95}, {0x0002F979, 0x00007FFA}, +{0x0002F97A, 0x00008005}, {0x0002F97B, 0x000264DA}, {0x0002F97C, 0x00026523}, {0x0002F97D, 0x00008060}, +{0x0002F97E, 0x000265A8}, {0x0002F97F, 0x00008070}, {0x0002F980, 0x0002335F}, {0x0002F981, 0x000043D5}, +{0x0002F982, 0x000080B2}, {0x0002F983, 0x00008103}, {0x0002F984, 0x0000440B}, {0x0002F985, 0x0000813E}, +{0x0002F986, 0x00005AB5}, {0x0002F987, 0x000267A7}, {0x0002F988, 0x000267B5}, {0x0002F989, 0x00023393}, +{0x0002F98A, 0x0002339C}, {0x0002F98B, 0x00008201}, {0x0002F98C, 0x00008204}, {0x0002F98D, 0x00008F9E}, +{0x0002F98E, 0x0000446B}, {0x0002F98F, 0x00008291}, {0x0002F990, 0x0000828B}, {0x0002F991, 0x0000829D}, +{0x0002F992, 0x000052B3}, {0x0002F993, 0x000082B1}, {0x0002F994, 0x000082B3}, {0x0002F995, 0x000082BD}, +{0x0002F996, 0x000082E6}, {0x0002F997, 0x00026B3C}, {0x0002F998, 0x000082E5}, {0x0002F999, 0x0000831D}, +{0x0002F99A, 0x00008363}, {0x0002F99B, 0x000083AD}, {0x0002F99C, 0x00008323}, {0x0002F99D, 0x000083BD}, +{0x0002F99E, 0x000083E7}, {0x0002F99F, 0x00008457}, {0x0002F9A0, 0x00008353}, {0x0002F9A1, 0x000083CA}, +{0x0002F9A2, 0x000083CC}, {0x0002F9A3, 0x000083DC}, {0x0002F9A4, 0x00026C36}, {0x0002F9A5, 0x00026D6B}, +{0x0002F9A6, 0x00026CD5}, {0x0002F9A7, 0x0000452B}, {0x0002F9A8, 0x000084F1}, {0x0002F9A9, 0x000084F3}, +{0x0002F9AA, 0x00008516}, {0x0002F9AB, 0x000273CA}, {0x0002F9AC, 0x00008564}, {0x0002F9AD, 0x00026F2C}, +{0x0002F9AE, 0x0000455D}, {0x0002F9AF, 0x00004561}, {0x0002F9B0, 0x00026FB1}, {0x0002F9B1, 0x000270D2}, +{0x0002F9B2, 0x0000456B}, {0x0002F9B3, 0x00008650}, {0x0002F9B4, 0x0000865C}, {0x0002F9B5, 0x00008667}, +{0x0002F9B6, 0x00008669}, {0x0002F9B7, 0x000086A9}, {0x0002F9B8, 0x00008688}, {0x0002F9B9, 0x0000870E}, +{0x0002F9BA, 0x000086E2}, {0x0002F9BB, 0x00008779}, {0x0002F9BC, 0x00008728}, {0x0002F9BD, 0x0000876B}, +{0x0002F9BE, 0x00008786}, {0x0002F9BF, 0x000045D7}, {0x0002F9C0, 0x000087E1}, {0x0002F9C1, 0x00008801}, +{0x0002F9C2, 0x000045F9}, {0x0002F9C3, 0x00008860}, {0x0002F9C4, 0x00008863}, {0x0002F9C5, 0x00027667}, +{0x0002F9C6, 0x000088D7}, {0x0002F9C7, 0x000088DE}, {0x0002F9C8, 0x00004635}, {0x0002F9C9, 0x000088FA}, +{0x0002F9CA, 0x000034BB}, {0x0002F9CB, 0x000278AE}, {0x0002F9CC, 0x00027966}, {0x0002F9CD, 0x000046BE}, +{0x0002F9CE, 0x000046C7}, {0x0002F9CF, 0x00008AA0}, {0x0002F9D0, 0x00008AED}, {0x0002F9D1, 0x00008B8A}, +{0x0002F9D2, 0x00008C55}, {0x0002F9D3, 0x00027CA8}, {0x0002F9D4, 0x00008CAB}, {0x0002F9D5, 0x00008CC1}, +{0x0002F9D6, 0x00008D1B}, {0x0002F9D7, 0x00008D77}, {0x0002F9D8, 0x00027F2F}, {0x0002F9D9, 0x00020804}, +{0x0002F9DA, 0x00008DCB}, {0x0002F9DB, 0x00008DBC}, {0x0002F9DC, 0x00008DF0}, {0x0002F9DD, 0x000208DE}, +{0x0002F9DE, 0x00008ED4}, {0x0002F9DF, 0x00008F38}, {0x0002F9E0, 0x000285D2}, {0x0002F9E1, 0x000285ED}, +{0x0002F9E2, 0x00009094}, {0x0002F9E3, 0x000090F1}, {0x0002F9E4, 0x00009111}, {0x0002F9E5, 0x0002872E}, +{0x0002F9E6, 0x0000911B}, {0x0002F9E7, 0x00009238}, {0x0002F9E8, 0x000092D7}, {0x0002F9E9, 0x000092D8}, +{0x0002F9EA, 0x0000927C}, {0x0002F9EB, 0x000093F9}, {0x0002F9EC, 0x00009415}, {0x0002F9ED, 0x00028BFA}, +{0x0002F9EE, 0x0000958B}, {0x0002F9EF, 0x00004995}, {0x0002F9F0, 0x000095B7}, {0x0002F9F1, 0x00028D77}, +{0x0002F9F2, 0x000049E6}, {0x0002F9F3, 0x000096C3}, {0x0002F9F4, 0x00005DB2}, {0x0002F9F5, 0x00009723}, +{0x0002F9F6, 0x00029145}, {0x0002F9F7, 0x0002921A}, {0x0002F9F8, 0x00004A6E}, {0x0002F9F9, 0x00004A76}, +{0x0002F9FA, 0x000097E0}, {0x0002F9FB, 0x0002940A}, {0x0002F9FC, 0x00004AB2}, {0x0002F9FD, 0x00029496}, +{0x0002F9FE, 0x0000980B}, {0x0002F9FF, 0x0000980B}, {0x0002FA00, 0x00009829}, {0x0002FA01, 0x000295B6}, +{0x0002FA02, 0x000098E2}, {0x0002FA03, 0x00004B33}, {0x0002FA04, 0x00009929}, {0x0002FA05, 0x000099A7}, +{0x0002FA06, 0x000099C2}, {0x0002FA07, 0x000099FE}, {0x0002FA08, 0x00004BCE}, {0x0002FA09, 0x00029B30}, +{0x0002FA0A, 0x00009B12}, {0x0002FA0B, 0x00009C40}, {0x0002FA0C, 0x00009CFD}, {0x0002FA0D, 0x00004CCE}, +{0x0002FA0E, 0x00004CED}, {0x0002FA0F, 0x00009D67}, {0x0002FA10, 0x0002A0CE}, {0x0002FA11, 0x00004CF8}, +{0x0002FA12, 0x0002A105}, {0x0002FA13, 0x0002A20E}, {0x0002FA14, 0x0002A291}, {0x0002FA15, 0x00009EBB}, +{0x0002FA16, 0x00004D56}, {0x0002FA17, 0x00009EF9}, {0x0002FA18, 0x00009EFE}, {0x0002FA19, 0x00009F05}, +{0x0002FA1A, 0x00009F0F}, {0x0002FA1B, 0x00009F16}, {0x0002FA1D, 0x0002A600}, +}; + +static std::string unicode_cpts_to_utf8(const std::vector & cps) { + std::string result; + for (size_t i = 0; i < cps.size(); ++i) { + result.append(unicode_cpt_to_utf8(cps[i])); + } + return result; +} + +static uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset) { + assert(offset < utf8.size()); + if (!(utf8[offset + 0] & 0x80)) { + auto result = utf8[offset + 0]; + offset += 1; + return result; + } + if (!(utf8[offset + 0] & 0x40)) { + throw std::invalid_argument("invalid character"); + } + if (!(utf8[offset + 0] & 0x20)) { + if (offset + 1 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80)) { + throw std::invalid_argument("invalid character"); + } + auto result = ((utf8[offset + 0] & 0x1f) << 6) | (utf8[offset + 1] & 0x3f); + offset += 2; + return result; + } + if (!(utf8[offset + 0] & 0x10)) { + if (offset + 2 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80)) { + throw std::invalid_argument("invalid character"); + } + auto result = ((utf8[offset + 0] & 0x0f) << 12) | ((utf8[offset + 1] & 0x3f) << 6) | (utf8[offset + 2] & 0x3f); + offset += 3; + return result; + } + if (!(utf8[offset + 0] & 0x08)) { + if (offset + 3 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80) || !((utf8[offset + 3] & 0xc0) == 0x80)) { + throw std::invalid_argument("invalid character"); + } + auto result = ((utf8[offset + 0] & 0x07) << 18) | ((utf8[offset + 1] & 0x3f) << 12) | ((utf8[offset + 2] & 0x3f) << 6) | (utf8[offset + 3] & 0x3f); + offset += 4; + return result; + } + throw std::invalid_argument("invalid string"); +} + +static std::vector unicode_cpt_to_utf16(uint32_t cp) { + std::vector result; + if (/* 0x0000 <= cp && */ cp <= 0xffff) { + result.emplace_back(cp); + } + else if (0x10000 <= cp && cp <= 0x10ffff) { + result.emplace_back(0xd800 | ((cp - 0x10000) >> 10)); + result.emplace_back(0xdc00 | ((cp - 0x10000) & 0x03ff)); + } + else { + throw std::invalid_argument("invalid cpt"); + } + return result; +} + +//static std::vector unicode_cpts_to_utf16(const std::vector & cps) { +// std::vector result; +// for (size_t i = 0; i < cps.size(); ++i) { +// auto temp = unicode_cpt_to_utf16(cps[i]); +// result.insert(result.end(), temp.begin(), temp.end()); +// } +// return result; +//} + +static uint32_t cpt_from_utf16(const std::vector & utf16, size_t & offset) { + assert(offset < utf16.size()); + if (((utf16[0] >> 10) << 10) != 0xd800) { + auto result = utf16[offset + 0]; + offset += 1; + return result; + } + + if (offset + 1 >= utf16.size() || !((utf16[1] & 0xdc00) == 0xdc00)) { + throw std::invalid_argument("invalid character"); + } + + auto result = 0x10000 + (((utf16[0] & 0x03ff) << 10) | (utf16[1] & 0x03ff)); + offset += 2; + return result; +} + +//static std::vector unicode_cpts_from_utf16(const std::vector & utf16) { +// std::vector result; +// size_t offset = 0; +// while (offset < utf16.size()) { +// result.push_back(cpt_from_utf16(utf16, offset)); +// } +// return result; +//} + +static std::unordered_map unicode_cpt_type_map() { + std::unordered_map cpt_types; + for (auto p : unicode_ranges_digit) { + for (auto i = p.first; i <= p.second; ++ i) { + cpt_types[i] = CODEPOINT_TYPE_DIGIT; + } + } + for (auto p : unicode_ranges_letter) { + for (auto i = p.first; i <= p.second; ++ i) { + cpt_types[i] = CODEPOINT_TYPE_LETTER; + } + } + for (auto p : unicode_ranges_whitespace) { + for (auto i = p.first; i <= p.second; ++ i) { + cpt_types[i] = CODEPOINT_TYPE_WHITESPACE; + } + } + for (auto p : unicode_ranges_accent_mark) { + for (auto i = p.first; i <= p.second; ++ i) { + cpt_types[i] = CODEPOINT_TYPE_ACCENT_MARK; + } + } + for (auto p : unicode_ranges_punctuation) { + for (auto i = p.first; i <= p.second; ++ i) { + cpt_types[i] = CODEPOINT_TYPE_PUNCTUATION; + } + } + for (auto p : unicode_ranges_symbol) { + for (auto i = p.first; i <= p.second; ++i) { + cpt_types[i] = CODEPOINT_TYPE_SYMBOL; + } + } + for (auto p : unicode_ranges_control) { + for (auto i = p.first; i <= p.second; ++ i) { + cpt_types[i] = CODEPOINT_TYPE_CONTROL; + } + } + return cpt_types; +} + +static std::unordered_map unicode_byte_to_utf8_map() { + std::unordered_map map; + for (int ch = u'!'; ch <= u'~'; ++ch) { + assert(0 <= ch && ch < 256); + map[ch] = unicode_cpt_to_utf8(ch); + } + for (int ch = u'¡'; ch <= u'¬'; ++ch) { + assert(0 <= ch && ch < 256); + map[ch] = unicode_cpt_to_utf8(ch); + } + for (int ch = u'®'; ch <= u'ÿ'; ++ch) { + assert(0 <= ch && ch < 256); + map[ch] = unicode_cpt_to_utf8(ch); + } + auto n = 0; + for (int ch = 0; ch < 256; ++ch) { + if (map.find(ch) == map.end()) { + map[ch] = unicode_cpt_to_utf8(256 + n); + ++n; + } + } + return map; +} + +static std::unordered_map unicode_utf8_to_byte_map() { + std::unordered_map map; + for (int ch = u'!'; ch <= u'~'; ++ch) { + assert(0 <= ch && ch < 256); + map[unicode_cpt_to_utf8(ch)] = ch; + } + for (int ch = u'¡'; ch <= u'¬'; ++ch) { + assert(0 <= ch && ch < 256); + map[unicode_cpt_to_utf8(ch)] = ch; + } + for (int ch = u'®'; ch <= u'ÿ'; ++ch) { + assert(0 <= ch && ch < 256); + map[unicode_cpt_to_utf8(ch)] = ch; + } + auto n = 0; + for (int ch = 0; ch < 256; ++ch) { + if (map.find(unicode_cpt_to_utf8(ch)) == map.end()) { + map[unicode_cpt_to_utf8(256 + n)] = ch; + ++n; + } + } + return map; +} + +// +// interface +// + +std::string unicode_cpt_to_utf8(uint32_t cp) { + std::string result; + if (/* 0x00 <= cp && */ cp <= 0x7f) { + result.push_back(cp); + } + else if (0x80 <= cp && cp <= 0x7ff) { + result.push_back(0xc0 | ((cp >> 6) & 0x1f)); + result.push_back(0x80 | (cp & 0x3f)); + } + else if (0x800 <= cp && cp <= 0xffff) { + result.push_back(0xe0 | ((cp >> 12) & 0x0f)); + result.push_back(0x80 | ((cp >> 6) & 0x3f)); + result.push_back(0x80 | (cp & 0x3f)); + } + else if (0x10000 <= cp && cp <= 0x10ffff) { + result.push_back(0xf0 | ((cp >> 18) & 0x07)); + result.push_back(0x80 | ((cp >> 12) & 0x3f)); + result.push_back(0x80 | ((cp >> 6) & 0x3f)); + result.push_back(0x80 | (cp & 0x3f)); + } + else { + throw std::invalid_argument("invalid codepoint"); + } + return result; +} + +std::vector unicode_cpts_normalize_nfd(const std::vector & cpts) { + std::vector result; + result.reserve(cpts.size()); + for (size_t i = 0; i < cpts.size(); ++i) { + auto it = unicode_map_nfd.find(cpts[i]); + if (it == unicode_map_nfd.end()) { + result.push_back(cpts[i]); + } else { + result.push_back(it->second); + } + } + return result; +} + +std::vector unicode_cpts_from_utf8(const std::string & utf8) { + std::vector result; + size_t offset = 0; + while (offset < utf8.size()) { + result.push_back(unicode_cpt_from_utf8(utf8, offset)); + } + return result; +} + +int unicode_cpt_type(uint32_t cp) { + static std::unordered_map cpt_types = unicode_cpt_type_map(); + const auto it = cpt_types.find(cp); + return it == cpt_types.end() ? CODEPOINT_TYPE_UNIDENTIFIED : it->second; +} + +int unicode_cpt_type(const std::string & utf8) { + if (utf8.length() == 0) { + return CODEPOINT_TYPE_UNIDENTIFIED; + } + size_t offset = 0; + return unicode_cpt_type(unicode_cpt_from_utf8(utf8, offset)); +} + +std::string unicode_byte_to_utf8(uint8_t byte) { + static std::unordered_map map = unicode_byte_to_utf8_map(); + return map.at(byte); +} + +uint8_t unicode_utf8_to_byte(const std::string & utf8) { + static std::unordered_map map = unicode_utf8_to_byte_map(); + return map.at(utf8); +} + diff --git a/unicode.h b/unicode.h index f6be4549b..6d14a5a33 100644 --- a/unicode.h +++ b/unicode.h @@ -1,784 +1,26 @@ -#pragma once +#pragma once -#include -#include -#include +#include #include -#include #include -static const std::vector> digit_ranges = { -{0x30, 0x39}, {0xB2, 0xB3}, {0xB9, 0xB9}, {0x660, 0x669}, {0x6F0, 0x6F9}, {0x7C0, 0x7C9}, {0x966, 0x96F}, {0x9E6, 0x9EF}, {0xA66, 0xA6F}, {0xAE6, 0xAEF}, {0xB66, 0xB6F}, {0xBE6, 0xBEF}, {0xC66, 0xC6F}, -{0xCE6, 0xCEF}, {0xD66, 0xD6F}, {0xDE6, 0xDEF}, {0xE50, 0xE59}, {0xED0, 0xED9}, {0xF20, 0xF29}, {0x1040, 0x1049}, {0x1090, 0x1099}, {0x1369, 0x1371}, {0x17E0, 0x17E9}, {0x1810, 0x1819}, {0x1946, 0x194F}, -{0x19D0, 0x19DA}, {0x1A80, 0x1A89}, {0x1A90, 0x1A99}, {0x1B50, 0x1B59}, {0x1BB0, 0x1BB9}, {0x1C40, 0x1C49}, {0x1C50, 0x1C59}, {0x2070, 0x2070}, {0x2074, 0x2079}, {0x2080, 0x2089}, {0x2460, 0x2468}, -{0x2474, 0x247C}, {0x2488, 0x2490}, {0x24EA, 0x24EA}, {0x24F5, 0x24FD}, {0x24FF, 0x24FF}, {0x2776, 0x277E}, {0x2780, 0x2788}, {0x278A, 0x2792}, {0xA620, 0xA629}, {0xA8D0, 0xA8D9}, {0xA900, 0xA909}, -{0xA9D0, 0xA9D9}, {0xA9F0, 0xA9F9}, {0xAA50, 0xAA59}, {0xABF0, 0xABF9}, {0xFF10, 0xFF19}, {0x104A0, 0x104A9}, {0x10A40, 0x10A43}, {0x10D30, 0x10D39}, {0x10E60, 0x10E68}, {0x11052, 0x1105A}, -{0x11066, 0x1106F}, {0x110F0, 0x110F9}, {0x11136, 0x1113F}, {0x111D0, 0x111D9}, {0x112F0, 0x112F9}, {0x11450, 0x11459}, {0x114D0, 0x114D9}, {0x11650, 0x11659}, {0x116C0, 0x116C9}, {0x11730, 0x11739}, -{0x118E0, 0x118E9}, {0x11950, 0x11959}, {0x11C50, 0x11C59}, {0x11D50, 0x11D59}, {0x11DA0, 0x11DA9}, {0x16A60, 0x16A69}, {0x16B50, 0x16B59}, {0x1D7CE, 0x1D7FF}, {0x1E140, 0x1E149}, {0x1E2F0, 0x1E2F9}, -{0x1E950, 0x1E959}, {0x1F100, 0x1F10A}, {0x1FBF0, 0x1FBF9}, -}; - -static const std::vector> letter_ranges = { -{0x41, 0x5A}, {0x61, 0x7A}, {0xAA, 0xAA}, {0xB5, 0xB5}, {0xBA, 0xBA}, {0xC0, 0xD6}, {0xD8, 0xF6}, {0xF8, 0x2C1}, {0x2C6, 0x2D1}, {0x2E0, 0x2E4}, {0x2EC, 0x2EC}, {0x2EE, 0x2EE}, {0x370, 0x374}, -{0x376, 0x377}, {0x37A, 0x37D}, {0x37F, 0x37F}, {0x386, 0x386}, {0x388, 0x38A}, {0x38C, 0x38C}, {0x38E, 0x3A1}, {0x3A3, 0x3F5}, {0x3F7, 0x481}, {0x48A, 0x52F}, {0x531, 0x556}, {0x559, 0x559}, -{0x560, 0x588}, {0x5D0, 0x5EA}, {0x5EF, 0x5F2}, {0x620, 0x64A}, {0x66E, 0x66F}, {0x671, 0x6D3}, {0x6D5, 0x6D5}, {0x6E5, 0x6E6}, {0x6EE, 0x6EF}, {0x6FA, 0x6FC}, {0x6FF, 0x6FF}, {0x710, 0x710}, -{0x712, 0x72F}, {0x74D, 0x7A5}, {0x7B1, 0x7B1}, {0x7CA, 0x7EA}, {0x7F4, 0x7F5}, {0x7FA, 0x7FA}, {0x800, 0x815}, {0x81A, 0x81A}, {0x824, 0x824}, {0x828, 0x828}, {0x840, 0x858}, {0x860, 0x86A}, -{0x8A0, 0x8B4}, {0x8B6, 0x8C7}, {0x904, 0x939}, {0x93D, 0x93D}, {0x950, 0x950}, {0x958, 0x961}, {0x971, 0x980}, {0x985, 0x98C}, {0x98F, 0x990}, {0x993, 0x9A8}, {0x9AA, 0x9B0}, {0x9B2, 0x9B2}, -{0x9B6, 0x9B9}, {0x9BD, 0x9BD}, {0x9CE, 0x9CE}, {0x9DC, 0x9DD}, {0x9DF, 0x9E1}, {0x9F0, 0x9F1}, {0x9FC, 0x9FC}, {0xA05, 0xA0A}, {0xA0F, 0xA10}, {0xA13, 0xA28}, {0xA2A, 0xA30}, {0xA32, 0xA33}, -{0xA35, 0xA36}, {0xA38, 0xA39}, {0xA59, 0xA5C}, {0xA5E, 0xA5E}, {0xA72, 0xA74}, {0xA85, 0xA8D}, {0xA8F, 0xA91}, {0xA93, 0xAA8}, {0xAAA, 0xAB0}, {0xAB2, 0xAB3}, {0xAB5, 0xAB9}, {0xABD, 0xABD}, -{0xAD0, 0xAD0}, {0xAE0, 0xAE1}, {0xAF9, 0xAF9}, {0xB05, 0xB0C}, {0xB0F, 0xB10}, {0xB13, 0xB28}, {0xB2A, 0xB30}, {0xB32, 0xB33}, {0xB35, 0xB39}, {0xB3D, 0xB3D}, {0xB5C, 0xB5D}, {0xB5F, 0xB61}, -{0xB71, 0xB71}, {0xB83, 0xB83}, {0xB85, 0xB8A}, {0xB8E, 0xB90}, {0xB92, 0xB95}, {0xB99, 0xB9A}, {0xB9C, 0xB9C}, {0xB9E, 0xB9F}, {0xBA3, 0xBA4}, {0xBA8, 0xBAA}, {0xBAE, 0xBB9}, {0xBD0, 0xBD0}, -{0xC05, 0xC0C}, {0xC0E, 0xC10}, {0xC12, 0xC28}, {0xC2A, 0xC39}, {0xC3D, 0xC3D}, {0xC58, 0xC5A}, {0xC60, 0xC61}, {0xC80, 0xC80}, {0xC85, 0xC8C}, {0xC8E, 0xC90}, {0xC92, 0xCA8}, {0xCAA, 0xCB3}, -{0xCB5, 0xCB9}, {0xCBD, 0xCBD}, {0xCDE, 0xCDE}, {0xCE0, 0xCE1}, {0xCF1, 0xCF2}, {0xD04, 0xD0C}, {0xD0E, 0xD10}, {0xD12, 0xD3A}, {0xD3D, 0xD3D}, {0xD4E, 0xD4E}, {0xD54, 0xD56}, {0xD5F, 0xD61}, -{0xD7A, 0xD7F}, {0xD85, 0xD96}, {0xD9A, 0xDB1}, {0xDB3, 0xDBB}, {0xDBD, 0xDBD}, {0xDC0, 0xDC6}, {0xE01, 0xE30}, {0xE32, 0xE33}, {0xE40, 0xE46}, {0xE81, 0xE82}, {0xE84, 0xE84}, {0xE86, 0xE8A}, -{0xE8C, 0xEA3}, {0xEA5, 0xEA5}, {0xEA7, 0xEB0}, {0xEB2, 0xEB3}, {0xEBD, 0xEBD}, {0xEC0, 0xEC4}, {0xEC6, 0xEC6}, {0xEDC, 0xEDF}, {0xF00, 0xF00}, {0xF40, 0xF47}, {0xF49, 0xF6C}, {0xF88, 0xF8C}, -{0x1000, 0x102A}, {0x103F, 0x103F}, {0x1050, 0x1055}, {0x105A, 0x105D}, {0x1061, 0x1061}, {0x1065, 0x1066}, {0x106E, 0x1070}, {0x1075, 0x1081}, {0x108E, 0x108E}, {0x10A0, 0x10C5}, {0x10C7, 0x10C7}, -{0x10CD, 0x10CD}, {0x10D0, 0x10FA}, {0x10FC, 0x1248}, {0x124A, 0x124D}, {0x1250, 0x1256}, {0x1258, 0x1258}, {0x125A, 0x125D}, {0x1260, 0x1288}, {0x128A, 0x128D}, {0x1290, 0x12B0}, {0x12B2, 0x12B5}, -{0x12B8, 0x12BE}, {0x12C0, 0x12C0}, {0x12C2, 0x12C5}, {0x12C8, 0x12D6}, {0x12D8, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x135A}, {0x1380, 0x138F}, {0x13A0, 0x13F5}, {0x13F8, 0x13FD}, {0x1401, 0x166C}, -{0x166F, 0x167F}, {0x1681, 0x169A}, {0x16A0, 0x16EA}, {0x16F1, 0x16F8}, {0x1700, 0x170C}, {0x170E, 0x1711}, {0x1720, 0x1731}, {0x1740, 0x1751}, {0x1760, 0x176C}, {0x176E, 0x1770}, {0x1780, 0x17B3}, -{0x17D7, 0x17D7}, {0x17DC, 0x17DC}, {0x1820, 0x1878}, {0x1880, 0x1884}, {0x1887, 0x18A8}, {0x18AA, 0x18AA}, {0x18B0, 0x18F5}, {0x1900, 0x191E}, {0x1950, 0x196D}, {0x1970, 0x1974}, {0x1980, 0x19AB}, -{0x19B0, 0x19C9}, {0x1A00, 0x1A16}, {0x1A20, 0x1A54}, {0x1AA7, 0x1AA7}, {0x1B05, 0x1B33}, {0x1B45, 0x1B4B}, {0x1B83, 0x1BA0}, {0x1BAE, 0x1BAF}, {0x1BBA, 0x1BE5}, {0x1C00, 0x1C23}, {0x1C4D, 0x1C4F}, -{0x1C5A, 0x1C7D}, {0x1C80, 0x1C88}, {0x1C90, 0x1CBA}, {0x1CBD, 0x1CBF}, {0x1CE9, 0x1CEC}, {0x1CEE, 0x1CF3}, {0x1CF5, 0x1CF6}, {0x1CFA, 0x1CFA}, {0x1D00, 0x1DBF}, {0x1E00, 0x1F15}, {0x1F18, 0x1F1D}, -{0x1F20, 0x1F45}, {0x1F48, 0x1F4D}, {0x1F50, 0x1F57}, {0x1F59, 0x1F59}, {0x1F5B, 0x1F5B}, {0x1F5D, 0x1F5D}, {0x1F5F, 0x1F7D}, {0x1F80, 0x1FB4}, {0x1FB6, 0x1FBC}, {0x1FBE, 0x1FBE}, {0x1FC2, 0x1FC4}, -{0x1FC6, 0x1FCC}, {0x1FD0, 0x1FD3}, {0x1FD6, 0x1FDB}, {0x1FE0, 0x1FEC}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFC}, {0x2071, 0x2071}, {0x207F, 0x207F}, {0x2090, 0x209C}, {0x2102, 0x2102}, {0x2107, 0x2107}, -{0x210A, 0x2113}, {0x2115, 0x2115}, {0x2119, 0x211D}, {0x2124, 0x2124}, {0x2126, 0x2126}, {0x2128, 0x2128}, {0x212A, 0x212D}, {0x212F, 0x2139}, {0x213C, 0x213F}, {0x2145, 0x2149}, {0x214E, 0x214E}, -{0x2183, 0x2184}, {0x2C00, 0x2C2E}, {0x2C30, 0x2C5E}, {0x2C60, 0x2CE4}, {0x2CEB, 0x2CEE}, {0x2CF2, 0x2CF3}, {0x2D00, 0x2D25}, {0x2D27, 0x2D27}, {0x2D2D, 0x2D2D}, {0x2D30, 0x2D67}, {0x2D6F, 0x2D6F}, -{0x2D80, 0x2D96}, {0x2DA0, 0x2DA6}, {0x2DA8, 0x2DAE}, {0x2DB0, 0x2DB6}, {0x2DB8, 0x2DBE}, {0x2DC0, 0x2DC6}, {0x2DC8, 0x2DCE}, {0x2DD0, 0x2DD6}, {0x2DD8, 0x2DDE}, {0x2E2F, 0x2E2F}, {0x3005, 0x3006}, -{0x3031, 0x3035}, {0x303B, 0x303C}, {0x3041, 0x3096}, {0x309D, 0x309F}, {0x30A1, 0x30FA}, {0x30FC, 0x30FF}, {0x3105, 0x312F}, {0x3131, 0x318E}, {0x31A0, 0x31BF}, {0x31F0, 0x31FF}, {0x3400, 0x4DBF}, -{0x4E00, 0x9FFC}, {0xA000, 0xA48C}, {0xA4D0, 0xA4FD}, {0xA500, 0xA60C}, {0xA610, 0xA61F}, {0xA62A, 0xA62B}, {0xA640, 0xA66E}, {0xA67F, 0xA69D}, {0xA6A0, 0xA6E5}, {0xA717, 0xA71F}, {0xA722, 0xA788}, -{0xA78B, 0xA7BF}, {0xA7C2, 0xA7CA}, {0xA7F5, 0xA801}, {0xA803, 0xA805}, {0xA807, 0xA80A}, {0xA80C, 0xA822}, {0xA840, 0xA873}, {0xA882, 0xA8B3}, {0xA8F2, 0xA8F7}, {0xA8FB, 0xA8FB}, {0xA8FD, 0xA8FE}, -{0xA90A, 0xA925}, {0xA930, 0xA946}, {0xA960, 0xA97C}, {0xA984, 0xA9B2}, {0xA9CF, 0xA9CF}, {0xA9E0, 0xA9E4}, {0xA9E6, 0xA9EF}, {0xA9FA, 0xA9FE}, {0xAA00, 0xAA28}, {0xAA40, 0xAA42}, {0xAA44, 0xAA4B}, -{0xAA60, 0xAA76}, {0xAA7A, 0xAA7A}, {0xAA7E, 0xAAAF}, {0xAAB1, 0xAAB1}, {0xAAB5, 0xAAB6}, {0xAAB9, 0xAABD}, {0xAAC0, 0xAAC0}, {0xAAC2, 0xAAC2}, {0xAADB, 0xAADD}, {0xAAE0, 0xAAEA}, {0xAAF2, 0xAAF4}, -{0xAB01, 0xAB06}, {0xAB09, 0xAB0E}, {0xAB11, 0xAB16}, {0xAB20, 0xAB26}, {0xAB28, 0xAB2E}, {0xAB30, 0xAB5A}, {0xAB5C, 0xAB69}, {0xAB70, 0xABE2}, {0xAC00, 0xD7A3}, {0xD7B0, 0xD7C6}, {0xD7CB, 0xD7FB}, -{0xF900, 0xFA6D}, {0xFA70, 0xFAD9}, {0xFB00, 0xFB06}, {0xFB13, 0xFB17}, {0xFB1D, 0xFB1D}, {0xFB1F, 0xFB28}, {0xFB2A, 0xFB36}, {0xFB38, 0xFB3C}, {0xFB3E, 0xFB3E}, {0xFB40, 0xFB41}, {0xFB43, 0xFB44}, -{0xFB46, 0xFBB1}, {0xFBD3, 0xFD3D}, {0xFD50, 0xFD8F}, {0xFD92, 0xFDC7}, {0xFDF0, 0xFDFB}, {0xFE70, 0xFE74}, {0xFE76, 0xFEFC}, {0xFF21, 0xFF3A}, {0xFF41, 0xFF5A}, {0xFF66, 0xFFBE}, {0xFFC2, 0xFFC7}, -{0xFFCA, 0xFFCF}, {0xFFD2, 0xFFD7}, {0xFFDA, 0xFFDC}, {0x10000, 0x1000B}, {0x1000D, 0x10026}, {0x10028, 0x1003A}, {0x1003C, 0x1003D}, {0x1003F, 0x1004D}, {0x10050, 0x1005D}, {0x10080, 0x100FA}, -{0x10280, 0x1029C}, {0x102A0, 0x102D0}, {0x10300, 0x1031F}, {0x1032D, 0x10340}, {0x10342, 0x10349}, {0x10350, 0x10375}, {0x10380, 0x1039D}, {0x103A0, 0x103C3}, {0x103C8, 0x103CF}, {0x10400, 0x1049D}, -{0x104B0, 0x104D3}, {0x104D8, 0x104FB}, {0x10500, 0x10527}, {0x10530, 0x10563}, {0x10600, 0x10736}, {0x10740, 0x10755}, {0x10760, 0x10767}, {0x10800, 0x10805}, {0x10808, 0x10808}, {0x1080A, 0x10835}, -{0x10837, 0x10838}, {0x1083C, 0x1083C}, {0x1083F, 0x10855}, {0x10860, 0x10876}, {0x10880, 0x1089E}, {0x108E0, 0x108F2}, {0x108F4, 0x108F5}, {0x10900, 0x10915}, {0x10920, 0x10939}, {0x10980, 0x109B7}, -{0x109BE, 0x109BF}, {0x10A00, 0x10A00}, {0x10A10, 0x10A13}, {0x10A15, 0x10A17}, {0x10A19, 0x10A35}, {0x10A60, 0x10A7C}, {0x10A80, 0x10A9C}, {0x10AC0, 0x10AC7}, {0x10AC9, 0x10AE4}, {0x10B00, 0x10B35}, -{0x10B40, 0x10B55}, {0x10B60, 0x10B72}, {0x10B80, 0x10B91}, {0x10C00, 0x10C48}, {0x10C80, 0x10CB2}, {0x10CC0, 0x10CF2}, {0x10D00, 0x10D23}, {0x10E80, 0x10EA9}, {0x10EB0, 0x10EB1}, {0x10F00, 0x10F1C}, -{0x10F27, 0x10F27}, {0x10F30, 0x10F45}, {0x10FB0, 0x10FC4}, {0x10FE0, 0x10FF6}, {0x11003, 0x11037}, {0x11083, 0x110AF}, {0x110D0, 0x110E8}, {0x11103, 0x11126}, {0x11144, 0x11144}, {0x11147, 0x11147}, -{0x11150, 0x11172}, {0x11176, 0x11176}, {0x11183, 0x111B2}, {0x111C1, 0x111C4}, {0x111DA, 0x111DA}, {0x111DC, 0x111DC}, {0x11200, 0x11211}, {0x11213, 0x1122B}, {0x11280, 0x11286}, {0x11288, 0x11288}, -{0x1128A, 0x1128D}, {0x1128F, 0x1129D}, {0x1129F, 0x112A8}, {0x112B0, 0x112DE}, {0x11305, 0x1130C}, {0x1130F, 0x11310}, {0x11313, 0x11328}, {0x1132A, 0x11330}, {0x11332, 0x11333}, {0x11335, 0x11339}, -{0x1133D, 0x1133D}, {0x11350, 0x11350}, {0x1135D, 0x11361}, {0x11400, 0x11434}, {0x11447, 0x1144A}, {0x1145F, 0x11461}, {0x11480, 0x114AF}, {0x114C4, 0x114C5}, {0x114C7, 0x114C7}, {0x11580, 0x115AE}, -{0x115D8, 0x115DB}, {0x11600, 0x1162F}, {0x11644, 0x11644}, {0x11680, 0x116AA}, {0x116B8, 0x116B8}, {0x11700, 0x1171A}, {0x11800, 0x1182B}, {0x118A0, 0x118DF}, {0x118FF, 0x11906}, {0x11909, 0x11909}, -{0x1190C, 0x11913}, {0x11915, 0x11916}, {0x11918, 0x1192F}, {0x1193F, 0x1193F}, {0x11941, 0x11941}, {0x119A0, 0x119A7}, {0x119AA, 0x119D0}, {0x119E1, 0x119E1}, {0x119E3, 0x119E3}, {0x11A00, 0x11A00}, -{0x11A0B, 0x11A32}, {0x11A3A, 0x11A3A}, {0x11A50, 0x11A50}, {0x11A5C, 0x11A89}, {0x11A9D, 0x11A9D}, {0x11AC0, 0x11AF8}, {0x11C00, 0x11C08}, {0x11C0A, 0x11C2E}, {0x11C40, 0x11C40}, {0x11C72, 0x11C8F}, -{0x11D00, 0x11D06}, {0x11D08, 0x11D09}, {0x11D0B, 0x11D30}, {0x11D46, 0x11D46}, {0x11D60, 0x11D65}, {0x11D67, 0x11D68}, {0x11D6A, 0x11D89}, {0x11D98, 0x11D98}, {0x11EE0, 0x11EF2}, {0x11FB0, 0x11FB0}, -{0x12000, 0x12399}, {0x12480, 0x12543}, {0x13000, 0x1342E}, {0x14400, 0x14646}, {0x16800, 0x16A38}, {0x16A40, 0x16A5E}, {0x16AD0, 0x16AED}, {0x16B00, 0x16B2F}, {0x16B40, 0x16B43}, {0x16B63, 0x16B77}, -{0x16B7D, 0x16B8F}, {0x16E40, 0x16E7F}, {0x16F00, 0x16F4A}, {0x16F50, 0x16F50}, {0x16F93, 0x16F9F}, {0x16FE0, 0x16FE1}, {0x16FE3, 0x16FE3}, {0x17000, 0x187F7}, {0x18800, 0x18CD5}, {0x18D00, 0x18D08}, -{0x1B000, 0x1B11E}, {0x1B150, 0x1B152}, {0x1B164, 0x1B167}, {0x1B170, 0x1B2FB}, {0x1BC00, 0x1BC6A}, {0x1BC70, 0x1BC7C}, {0x1BC80, 0x1BC88}, {0x1BC90, 0x1BC99}, {0x1D400, 0x1D454}, {0x1D456, 0x1D49C}, -{0x1D49E, 0x1D49F}, {0x1D4A2, 0x1D4A2}, {0x1D4A5, 0x1D4A6}, {0x1D4A9, 0x1D4AC}, {0x1D4AE, 0x1D4B9}, {0x1D4BB, 0x1D4BB}, {0x1D4BD, 0x1D4C3}, {0x1D4C5, 0x1D505}, {0x1D507, 0x1D50A}, {0x1D50D, 0x1D514}, -{0x1D516, 0x1D51C}, {0x1D51E, 0x1D539}, {0x1D53B, 0x1D53E}, {0x1D540, 0x1D544}, {0x1D546, 0x1D546}, {0x1D54A, 0x1D550}, {0x1D552, 0x1D6A5}, {0x1D6A8, 0x1D6C0}, {0x1D6C2, 0x1D6DA}, {0x1D6DC, 0x1D6FA}, -{0x1D6FC, 0x1D714}, {0x1D716, 0x1D734}, {0x1D736, 0x1D74E}, {0x1D750, 0x1D76E}, {0x1D770, 0x1D788}, {0x1D78A, 0x1D7A8}, {0x1D7AA, 0x1D7C2}, {0x1D7C4, 0x1D7CB}, {0x1E100, 0x1E12C}, {0x1E137, 0x1E13D}, -{0x1E14E, 0x1E14E}, {0x1E2C0, 0x1E2EB}, {0x1E800, 0x1E8C4}, {0x1E900, 0x1E943}, {0x1E94B, 0x1E94B}, {0x1EE00, 0x1EE03}, {0x1EE05, 0x1EE1F}, {0x1EE21, 0x1EE22}, {0x1EE24, 0x1EE24}, {0x1EE27, 0x1EE27}, -{0x1EE29, 0x1EE32}, {0x1EE34, 0x1EE37}, {0x1EE39, 0x1EE39}, {0x1EE3B, 0x1EE3B}, {0x1EE42, 0x1EE42}, {0x1EE47, 0x1EE47}, {0x1EE49, 0x1EE49}, {0x1EE4B, 0x1EE4B}, {0x1EE4D, 0x1EE4F}, {0x1EE51, 0x1EE52}, -{0x1EE54, 0x1EE54}, {0x1EE57, 0x1EE57}, {0x1EE59, 0x1EE59}, {0x1EE5B, 0x1EE5B}, {0x1EE5D, 0x1EE5D}, {0x1EE5F, 0x1EE5F}, {0x1EE61, 0x1EE62}, {0x1EE64, 0x1EE64}, {0x1EE67, 0x1EE6A}, {0x1EE6C, 0x1EE72}, -{0x1EE74, 0x1EE77}, {0x1EE79, 0x1EE7C}, {0x1EE7E, 0x1EE7E}, {0x1EE80, 0x1EE89}, {0x1EE8B, 0x1EE9B}, {0x1EEA1, 0x1EEA3}, {0x1EEA5, 0x1EEA9}, {0x1EEAB, 0x1EEBB}, {0x20000, 0x2A6DD}, {0x2A700, 0x2B734}, -{0x2B740, 0x2B81D}, {0x2B820, 0x2CEA1}, {0x2CEB0, 0x2EBE0}, {0x2F800, 0x2FA1D}, {0x30000, 0x3134A}, -}; - -static const std::vector> whitespace_ranges = { -{0x9, 0xD}, {0x1C, 0x20}, {0x85, 0x85}, {0xA0, 0xA0}, {0x1680, 0x1680}, {0x2000, 0x200A}, {0x2028, 0x2029}, {0x202F, 0x202F}, {0x205F, 0x205F}, {0x3000, 0x3000}, -}; - -static const std::vector> accent_mark_ranges = { -{0x300, 0x36F}, {0x483, 0x489}, {0x591, 0x5BD}, {0x5BF, 0x5BF}, {0x5C1, 0x5C2}, {0x5C4, 0x5C5}, {0x5C7, 0x5C7}, {0x610, 0x61A}, {0x64B, 0x65F}, {0x670, 0x670}, {0x6D6, 0x6DC}, {0x6DF, 0x6E4}, -{0x6E7, 0x6E8}, {0x6EA, 0x6ED}, {0x711, 0x711}, {0x730, 0x74A}, {0x7A6, 0x7B0}, {0x7EB, 0x7F3}, {0x7FD, 0x7FD}, {0x816, 0x819}, {0x81B, 0x823}, {0x825, 0x827}, {0x829, 0x82D}, {0x859, 0x85B}, -{0x8D3, 0x8E1}, {0x8E3, 0x903}, {0x93A, 0x93C}, {0x93E, 0x94F}, {0x951, 0x957}, {0x962, 0x963}, {0x981, 0x983}, {0x9BC, 0x9BC}, {0x9BE, 0x9C4}, {0x9C7, 0x9C8}, {0x9CB, 0x9CD}, {0x9D7, 0x9D7}, -{0x9E2, 0x9E3}, {0x9FE, 0x9FE}, {0xA01, 0xA03}, {0xA3C, 0xA3C}, {0xA3E, 0xA42}, {0xA47, 0xA48}, {0xA4B, 0xA4D}, {0xA51, 0xA51}, {0xA70, 0xA71}, {0xA75, 0xA75}, {0xA81, 0xA83}, {0xABC, 0xABC}, -{0xABE, 0xAC5}, {0xAC7, 0xAC9}, {0xACB, 0xACD}, {0xAE2, 0xAE3}, {0xAFA, 0xAFF}, {0xB01, 0xB03}, {0xB3C, 0xB3C}, {0xB3E, 0xB44}, {0xB47, 0xB48}, {0xB4B, 0xB4D}, {0xB55, 0xB57}, {0xB62, 0xB63}, -{0xB82, 0xB82}, {0xBBE, 0xBC2}, {0xBC6, 0xBC8}, {0xBCA, 0xBCD}, {0xBD7, 0xBD7}, {0xC00, 0xC04}, {0xC3E, 0xC44}, {0xC46, 0xC48}, {0xC4A, 0xC4D}, {0xC55, 0xC56}, {0xC62, 0xC63}, {0xC81, 0xC83}, -{0xCBC, 0xCBC}, {0xCBE, 0xCC4}, {0xCC6, 0xCC8}, {0xCCA, 0xCCD}, {0xCD5, 0xCD6}, {0xCE2, 0xCE3}, {0xD00, 0xD03}, {0xD3B, 0xD3C}, {0xD3E, 0xD44}, {0xD46, 0xD48}, {0xD4A, 0xD4D}, {0xD57, 0xD57}, -{0xD62, 0xD63}, {0xD81, 0xD83}, {0xDCA, 0xDCA}, {0xDCF, 0xDD4}, {0xDD6, 0xDD6}, {0xDD8, 0xDDF}, {0xDF2, 0xDF3}, {0xE31, 0xE31}, {0xE34, 0xE3A}, {0xE47, 0xE4E}, {0xEB1, 0xEB1}, {0xEB4, 0xEBC}, -{0xEC8, 0xECD}, {0xF18, 0xF19}, {0xF35, 0xF35}, {0xF37, 0xF37}, {0xF39, 0xF39}, {0xF3E, 0xF3F}, {0xF71, 0xF84}, {0xF86, 0xF87}, {0xF8D, 0xF97}, {0xF99, 0xFBC}, {0xFC6, 0xFC6}, {0x102B, 0x103E}, -{0x1056, 0x1059}, {0x105E, 0x1060}, {0x1062, 0x1064}, {0x1067, 0x106D}, {0x1071, 0x1074}, {0x1082, 0x108D}, {0x108F, 0x108F}, {0x109A, 0x109D}, {0x135D, 0x135F}, {0x1712, 0x1714}, {0x1732, 0x1734}, -{0x1752, 0x1753}, {0x1772, 0x1773}, {0x17B4, 0x17D3}, {0x17DD, 0x17DD}, {0x180B, 0x180D}, {0x1885, 0x1886}, {0x18A9, 0x18A9}, {0x1920, 0x192B}, {0x1930, 0x193B}, {0x1A17, 0x1A1B}, {0x1A55, 0x1A5E}, -{0x1A60, 0x1A7C}, {0x1A7F, 0x1A7F}, {0x1AB0, 0x1AC0}, {0x1B00, 0x1B04}, {0x1B34, 0x1B44}, {0x1B6B, 0x1B73}, {0x1B80, 0x1B82}, {0x1BA1, 0x1BAD}, {0x1BE6, 0x1BF3}, {0x1C24, 0x1C37}, {0x1CD0, 0x1CD2}, -{0x1CD4, 0x1CE8}, {0x1CED, 0x1CED}, {0x1CF4, 0x1CF4}, {0x1CF7, 0x1CF9}, {0x1DC0, 0x1DF9}, {0x1DFB, 0x1DFF}, {0x20D0, 0x20F0}, {0x2CEF, 0x2CF1}, {0x2D7F, 0x2D7F}, {0x2DE0, 0x2DFF}, {0x302A, 0x302F}, -{0x3099, 0x309A}, {0xA66F, 0xA672}, {0xA674, 0xA67D}, {0xA69E, 0xA69F}, {0xA6F0, 0xA6F1}, {0xA802, 0xA802}, {0xA806, 0xA806}, {0xA80B, 0xA80B}, {0xA823, 0xA827}, {0xA82C, 0xA82C}, {0xA880, 0xA881}, -{0xA8B4, 0xA8C5}, {0xA8E0, 0xA8F1}, {0xA8FF, 0xA8FF}, {0xA926, 0xA92D}, {0xA947, 0xA953}, {0xA980, 0xA983}, {0xA9B3, 0xA9C0}, {0xA9E5, 0xA9E5}, {0xAA29, 0xAA36}, {0xAA43, 0xAA43}, {0xAA4C, 0xAA4D}, -{0xAA7B, 0xAA7D}, {0xAAB0, 0xAAB0}, {0xAAB2, 0xAAB4}, {0xAAB7, 0xAAB8}, {0xAABE, 0xAABF}, {0xAAC1, 0xAAC1}, {0xAAEB, 0xAAEF}, {0xAAF5, 0xAAF6}, {0xABE3, 0xABEA}, {0xABEC, 0xABED}, {0xFB1E, 0xFB1E}, -{0xFE00, 0xFE0F}, {0xFE20, 0xFE2F}, {0x101FD, 0x101FD}, {0x102E0, 0x102E0}, {0x10376, 0x1037A}, {0x10A01, 0x10A03}, {0x10A05, 0x10A06}, {0x10A0C, 0x10A0F}, {0x10A38, 0x10A3A}, {0x10A3F, 0x10A3F}, -{0x10AE5, 0x10AE6}, {0x10D24, 0x10D27}, {0x10EAB, 0x10EAC}, {0x10F46, 0x10F50}, {0x11000, 0x11002}, {0x11038, 0x11046}, {0x1107F, 0x11082}, {0x110B0, 0x110BA}, {0x11100, 0x11102}, {0x11127, 0x11134}, -{0x11145, 0x11146}, {0x11173, 0x11173}, {0x11180, 0x11182}, {0x111B3, 0x111C0}, {0x111C9, 0x111CC}, {0x111CE, 0x111CF}, {0x1122C, 0x11237}, {0x1123E, 0x1123E}, {0x112DF, 0x112EA}, {0x11300, 0x11303}, -{0x1133B, 0x1133C}, {0x1133E, 0x11344}, {0x11347, 0x11348}, {0x1134B, 0x1134D}, {0x11357, 0x11357}, {0x11362, 0x11363}, {0x11366, 0x1136C}, {0x11370, 0x11374}, {0x11435, 0x11446}, {0x1145E, 0x1145E}, -{0x114B0, 0x114C3}, {0x115AF, 0x115B5}, {0x115B8, 0x115C0}, {0x115DC, 0x115DD}, {0x11630, 0x11640}, {0x116AB, 0x116B7}, {0x1171D, 0x1172B}, {0x1182C, 0x1183A}, {0x11930, 0x11935}, {0x11937, 0x11938}, -{0x1193B, 0x1193E}, {0x11940, 0x11940}, {0x11942, 0x11943}, {0x119D1, 0x119D7}, {0x119DA, 0x119E0}, {0x119E4, 0x119E4}, {0x11A01, 0x11A0A}, {0x11A33, 0x11A39}, {0x11A3B, 0x11A3E}, {0x11A47, 0x11A47}, -{0x11A51, 0x11A5B}, {0x11A8A, 0x11A99}, {0x11C2F, 0x11C36}, {0x11C38, 0x11C3F}, {0x11C92, 0x11CA7}, {0x11CA9, 0x11CB6}, {0x11D31, 0x11D36}, {0x11D3A, 0x11D3A}, {0x11D3C, 0x11D3D}, {0x11D3F, 0x11D45}, -{0x11D47, 0x11D47}, {0x11D8A, 0x11D8E}, {0x11D90, 0x11D91}, {0x11D93, 0x11D97}, {0x11EF3, 0x11EF6}, {0x16AF0, 0x16AF4}, {0x16B30, 0x16B36}, {0x16F4F, 0x16F4F}, {0x16F51, 0x16F87}, {0x16F8F, 0x16F92}, -{0x16FE4, 0x16FE4}, {0x16FF0, 0x16FF1}, {0x1BC9D, 0x1BC9E}, {0x1D165, 0x1D169}, {0x1D16D, 0x1D172}, {0x1D17B, 0x1D182}, {0x1D185, 0x1D18B}, {0x1D1AA, 0x1D1AD}, {0x1D242, 0x1D244}, {0x1DA00, 0x1DA36}, -{0x1DA3B, 0x1DA6C}, {0x1DA75, 0x1DA75}, {0x1DA84, 0x1DA84}, {0x1DA9B, 0x1DA9F}, {0x1DAA1, 0x1DAAF}, {0x1E000, 0x1E006}, {0x1E008, 0x1E018}, {0x1E01B, 0x1E021}, {0x1E023, 0x1E024}, {0x1E026, 0x1E02A}, -{0x1E130, 0x1E136}, {0x1E2EC, 0x1E2EF}, {0x1E8D0, 0x1E8D6}, {0x1E944, 0x1E94A}, {0xE0100, 0xE01EF}, -}; - -static const std::vector> punctuation_ranges = { -{0x21, 0x23}, {0x25, 0x2A}, {0x2C, 0x2F}, {0x3A, 0x3B}, {0x3F, 0x40}, {0x5B, 0x5D}, {0x5F, 0x5F}, {0x7B, 0x7B}, {0x7D, 0x7D}, {0xA1, 0xA1}, {0xA7, 0xA7}, {0xAB, 0xAB}, {0xB6, 0xB7}, {0xBB, 0xBB}, -{0xBF, 0xBF}, {0x37E, 0x37E}, {0x387, 0x387}, {0x55A, 0x55F}, {0x589, 0x58A}, {0x5BE, 0x5BE}, {0x5C0, 0x5C0}, {0x5C3, 0x5C3}, {0x5C6, 0x5C6}, {0x5F3, 0x5F4}, {0x609, 0x60A}, {0x60C, 0x60D}, -{0x61B, 0x61B}, {0x61E, 0x61F}, {0x66A, 0x66D}, {0x6D4, 0x6D4}, {0x700, 0x70D}, {0x7F7, 0x7F9}, {0x830, 0x83E}, {0x85E, 0x85E}, {0x964, 0x965}, {0x970, 0x970}, {0x9FD, 0x9FD}, {0xA76, 0xA76}, -{0xAF0, 0xAF0}, {0xC77, 0xC77}, {0xC84, 0xC84}, {0xDF4, 0xDF4}, {0xE4F, 0xE4F}, {0xE5A, 0xE5B}, {0xF04, 0xF12}, {0xF14, 0xF14}, {0xF3A, 0xF3D}, {0xF85, 0xF85}, {0xFD0, 0xFD4}, {0xFD9, 0xFDA}, -{0x104A, 0x104F}, {0x10FB, 0x10FB}, {0x1360, 0x1368}, {0x1400, 0x1400}, {0x166E, 0x166E}, {0x169B, 0x169C}, {0x16EB, 0x16ED}, {0x1735, 0x1736}, {0x17D4, 0x17D6}, {0x17D8, 0x17DA}, {0x1800, 0x180A}, -{0x1944, 0x1945}, {0x1A1E, 0x1A1F}, {0x1AA0, 0x1AA6}, {0x1AA8, 0x1AAD}, {0x1B5A, 0x1B60}, {0x1BFC, 0x1BFF}, {0x1C3B, 0x1C3F}, {0x1C7E, 0x1C7F}, {0x1CC0, 0x1CC7}, {0x1CD3, 0x1CD3}, {0x2010, 0x2027}, -{0x2030, 0x2043}, {0x2045, 0x2051}, {0x2053, 0x205E}, {0x207D, 0x207E}, {0x208D, 0x208E}, {0x2308, 0x230B}, {0x2329, 0x232A}, {0x2768, 0x2775}, {0x27C5, 0x27C6}, {0x27E6, 0x27EF}, {0x2983, 0x2998}, -{0x29D8, 0x29DB}, {0x29FC, 0x29FD}, {0x2CF9, 0x2CFC}, {0x2CFE, 0x2CFF}, {0x2D70, 0x2D70}, {0x2E00, 0x2E2E}, {0x2E30, 0x2E4F}, {0x2E52, 0x2E52}, {0x3001, 0x3003}, {0x3008, 0x3011}, {0x3014, 0x301F}, -{0x3030, 0x3030}, {0x303D, 0x303D}, {0x30A0, 0x30A0}, {0x30FB, 0x30FB}, {0xA4FE, 0xA4FF}, {0xA60D, 0xA60F}, {0xA673, 0xA673}, {0xA67E, 0xA67E}, {0xA6F2, 0xA6F7}, {0xA874, 0xA877}, {0xA8CE, 0xA8CF}, -{0xA8F8, 0xA8FA}, {0xA8FC, 0xA8FC}, {0xA92E, 0xA92F}, {0xA95F, 0xA95F}, {0xA9C1, 0xA9CD}, {0xA9DE, 0xA9DF}, {0xAA5C, 0xAA5F}, {0xAADE, 0xAADF}, {0xAAF0, 0xAAF1}, {0xABEB, 0xABEB}, {0xFD3E, 0xFD3F}, -{0xFE10, 0xFE19}, {0xFE30, 0xFE52}, {0xFE54, 0xFE61}, {0xFE63, 0xFE63}, {0xFE68, 0xFE68}, {0xFE6A, 0xFE6B}, {0xFF01, 0xFF03}, {0xFF05, 0xFF0A}, {0xFF0C, 0xFF0F}, {0xFF1A, 0xFF1B}, {0xFF1F, 0xFF20}, -{0xFF3B, 0xFF3D}, {0xFF3F, 0xFF3F}, {0xFF5B, 0xFF5B}, {0xFF5D, 0xFF5D}, {0xFF5F, 0xFF65}, {0x10100, 0x10102}, {0x1039F, 0x1039F}, {0x103D0, 0x103D0}, {0x1056F, 0x1056F}, {0x10857, 0x10857}, -{0x1091F, 0x1091F}, {0x1093F, 0x1093F}, {0x10A50, 0x10A58}, {0x10A7F, 0x10A7F}, {0x10AF0, 0x10AF6}, {0x10B39, 0x10B3F}, {0x10B99, 0x10B9C}, {0x10EAD, 0x10EAD}, {0x10F55, 0x10F59}, {0x11047, 0x1104D}, -{0x110BB, 0x110BC}, {0x110BE, 0x110C1}, {0x11140, 0x11143}, {0x11174, 0x11175}, {0x111C5, 0x111C8}, {0x111CD, 0x111CD}, {0x111DB, 0x111DB}, {0x111DD, 0x111DF}, {0x11238, 0x1123D}, {0x112A9, 0x112A9}, -{0x1144B, 0x1144F}, {0x1145A, 0x1145B}, {0x1145D, 0x1145D}, {0x114C6, 0x114C6}, {0x115C1, 0x115D7}, {0x11641, 0x11643}, {0x11660, 0x1166C}, {0x1173C, 0x1173E}, {0x1183B, 0x1183B}, {0x11944, 0x11946}, -{0x119E2, 0x119E2}, {0x11A3F, 0x11A46}, {0x11A9A, 0x11A9C}, {0x11A9E, 0x11AA2}, {0x11C41, 0x11C45}, {0x11C70, 0x11C71}, {0x11EF7, 0x11EF8}, {0x11FFF, 0x11FFF}, {0x12470, 0x12474}, {0x16A6E, 0x16A6F}, -{0x16AF5, 0x16AF5}, {0x16B37, 0x16B3B}, {0x16B44, 0x16B44}, {0x16E97, 0x16E9A}, {0x16FE2, 0x16FE2}, {0x1BC9F, 0x1BC9F}, {0x1DA87, 0x1DA8B}, {0x1E95E, 0x1E95F}, -}; - -static const std::vector> symbol_ranges = { -{0x24, 0x24}, {0x2B, 0x2B}, {0x3C, 0x3E}, {0x5E, 0x5E}, {0x60, 0x60}, {0x7C, 0x7C}, {0x7E, 0x7E}, {0xA2, 0xA6}, {0xA8, 0xA9}, {0xAC, 0xAC}, {0xAE, 0xB1}, {0xB4, 0xB4}, {0xB8, 0xB8}, {0xD7, 0xD7}, -{0xF7, 0xF7}, {0x2C2, 0x2C5}, {0x2D2, 0x2DF}, {0x2E5, 0x2EB}, {0x2ED, 0x2ED}, {0x2EF, 0x2FF}, {0x375, 0x375}, {0x384, 0x385}, {0x3F6, 0x3F6}, {0x482, 0x482}, {0x58D, 0x58F}, {0x606, 0x608}, -{0x60B, 0x60B}, {0x60E, 0x60F}, {0x6DE, 0x6DE}, {0x6E9, 0x6E9}, {0x6FD, 0x6FE}, {0x7F6, 0x7F6}, {0x7FE, 0x7FF}, {0x9F2, 0x9F3}, {0x9FA, 0x9FB}, {0xAF1, 0xAF1}, {0xB70, 0xB70}, {0xBF3, 0xBFA}, -{0xC7F, 0xC7F}, {0xD4F, 0xD4F}, {0xD79, 0xD79}, {0xE3F, 0xE3F}, {0xF01, 0xF03}, {0xF13, 0xF13}, {0xF15, 0xF17}, {0xF1A, 0xF1F}, {0xF34, 0xF34}, {0xF36, 0xF36}, {0xF38, 0xF38}, {0xFBE, 0xFC5}, -{0xFC7, 0xFCC}, {0xFCE, 0xFCF}, {0xFD5, 0xFD8}, {0x109E, 0x109F}, {0x1390, 0x1399}, {0x166D, 0x166D}, {0x17DB, 0x17DB}, {0x1940, 0x1940}, {0x19DE, 0x19FF}, {0x1B61, 0x1B6A}, {0x1B74, 0x1B7C}, -{0x1FBD, 0x1FBD}, {0x1FBF, 0x1FC1}, {0x1FCD, 0x1FCF}, {0x1FDD, 0x1FDF}, {0x1FED, 0x1FEF}, {0x1FFD, 0x1FFE}, {0x2044, 0x2044}, {0x2052, 0x2052}, {0x207A, 0x207C}, {0x208A, 0x208C}, {0x20A0, 0x20BF}, -{0x2100, 0x2101}, {0x2103, 0x2106}, {0x2108, 0x2109}, {0x2114, 0x2114}, {0x2116, 0x2118}, {0x211E, 0x2123}, {0x2125, 0x2125}, {0x2127, 0x2127}, {0x2129, 0x2129}, {0x212E, 0x212E}, {0x213A, 0x213B}, -{0x2140, 0x2144}, {0x214A, 0x214D}, {0x214F, 0x214F}, {0x218A, 0x218B}, {0x2190, 0x2307}, {0x230C, 0x2328}, {0x232B, 0x2426}, {0x2440, 0x244A}, {0x249C, 0x24E9}, {0x2500, 0x2767}, {0x2794, 0x27C4}, -{0x27C7, 0x27E5}, {0x27F0, 0x2982}, {0x2999, 0x29D7}, {0x29DC, 0x29FB}, {0x29FE, 0x2B73}, {0x2B76, 0x2B95}, {0x2B97, 0x2BFF}, {0x2CE5, 0x2CEA}, {0x2E50, 0x2E51}, {0x2E80, 0x2E99}, {0x2E9B, 0x2EF3}, -{0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB}, {0x3004, 0x3004}, {0x3012, 0x3013}, {0x3020, 0x3020}, {0x3036, 0x3037}, {0x303E, 0x303F}, {0x309B, 0x309C}, {0x3190, 0x3191}, {0x3196, 0x319F}, {0x31C0, 0x31E3}, -{0x3200, 0x321E}, {0x322A, 0x3247}, {0x3250, 0x3250}, {0x3260, 0x327F}, {0x328A, 0x32B0}, {0x32C0, 0x33FF}, {0x4DC0, 0x4DFF}, {0xA490, 0xA4C6}, {0xA700, 0xA716}, {0xA720, 0xA721}, {0xA789, 0xA78A}, -{0xA828, 0xA82B}, {0xA836, 0xA839}, {0xAA77, 0xAA79}, {0xAB5B, 0xAB5B}, {0xAB6A, 0xAB6B}, {0xFB29, 0xFB29}, {0xFBB2, 0xFBC1}, {0xFDFC, 0xFDFD}, {0xFE62, 0xFE62}, {0xFE64, 0xFE66}, {0xFE69, 0xFE69}, -{0xFF04, 0xFF04}, {0xFF0B, 0xFF0B}, {0xFF1C, 0xFF1E}, {0xFF3E, 0xFF3E}, {0xFF40, 0xFF40}, {0xFF5C, 0xFF5C}, {0xFF5E, 0xFF5E}, {0xFFE0, 0xFFE6}, {0xFFE8, 0xFFEE}, {0xFFFC, 0xFFFD}, {0x10137, 0x1013F}, -{0x10179, 0x10189}, {0x1018C, 0x1018E}, {0x10190, 0x1019C}, {0x101A0, 0x101A0}, {0x101D0, 0x101FC}, {0x10877, 0x10878}, {0x10AC8, 0x10AC8}, {0x1173F, 0x1173F}, {0x11FD5, 0x11FF1}, {0x16B3C, 0x16B3F}, -{0x16B45, 0x16B45}, {0x1BC9C, 0x1BC9C}, {0x1D000, 0x1D0F5}, {0x1D100, 0x1D126}, {0x1D129, 0x1D164}, {0x1D16A, 0x1D16C}, {0x1D183, 0x1D184}, {0x1D18C, 0x1D1A9}, {0x1D1AE, 0x1D1E8}, {0x1D200, 0x1D241}, -{0x1D245, 0x1D245}, {0x1D300, 0x1D356}, {0x1D6C1, 0x1D6C1}, {0x1D6DB, 0x1D6DB}, {0x1D6FB, 0x1D6FB}, {0x1D715, 0x1D715}, {0x1D735, 0x1D735}, {0x1D74F, 0x1D74F}, {0x1D76F, 0x1D76F}, {0x1D789, 0x1D789}, -{0x1D7A9, 0x1D7A9}, {0x1D7C3, 0x1D7C3}, {0x1D800, 0x1D9FF}, {0x1DA37, 0x1DA3A}, {0x1DA6D, 0x1DA74}, {0x1DA76, 0x1DA83}, {0x1DA85, 0x1DA86}, {0x1E14F, 0x1E14F}, {0x1E2FF, 0x1E2FF}, {0x1ECAC, 0x1ECAC}, -{0x1ECB0, 0x1ECB0}, {0x1ED2E, 0x1ED2E}, {0x1EEF0, 0x1EEF1}, {0x1F000, 0x1F02B}, {0x1F030, 0x1F093}, {0x1F0A0, 0x1F0AE}, {0x1F0B1, 0x1F0BF}, {0x1F0C1, 0x1F0CF}, {0x1F0D1, 0x1F0F5}, {0x1F10D, 0x1F1AD}, -{0x1F1E6, 0x1F202}, {0x1F210, 0x1F23B}, {0x1F240, 0x1F248}, {0x1F250, 0x1F251}, {0x1F260, 0x1F265}, {0x1F300, 0x1F6D7}, {0x1F6E0, 0x1F6EC}, {0x1F6F0, 0x1F6FC}, {0x1F700, 0x1F773}, {0x1F780, 0x1F7D8}, -{0x1F7E0, 0x1F7EB}, {0x1F800, 0x1F80B}, {0x1F810, 0x1F847}, {0x1F850, 0x1F859}, {0x1F860, 0x1F887}, {0x1F890, 0x1F8AD}, {0x1F8B0, 0x1F8B1}, {0x1F900, 0x1F978}, {0x1F97A, 0x1F9CB}, {0x1F9CD, 0x1FA53}, -{0x1FA60, 0x1FA6D}, {0x1FA70, 0x1FA74}, {0x1FA78, 0x1FA7A}, {0x1FA80, 0x1FA86}, {0x1FA90, 0x1FAA8}, {0x1FAB0, 0x1FAB6}, {0x1FAC0, 0x1FAC2}, {0x1FAD0, 0x1FAD6}, {0x1FB00, 0x1FB92}, {0x1FB94, 0x1FBCA}, -}; - -static const std::vector> control_ranges = { -{0x0, 0x8}, {0xE, 0x1B}, {0x7F, 0x84}, {0x86, 0x9F}, {0xAD, 0xAD}, {0x378, 0x379}, {0x380, 0x383}, {0x38B, 0x38B}, {0x38D, 0x38D}, {0x3A2, 0x3A2}, {0x530, 0x530}, {0x557, 0x558}, {0x58B, 0x58C}, -{0x590, 0x590}, {0x5C8, 0x5CF}, {0x5EB, 0x5EE}, {0x5F5, 0x605}, {0x61C, 0x61D}, {0x6DD, 0x6DD}, {0x70E, 0x70F}, {0x74B, 0x74C}, {0x7B2, 0x7BF}, {0x7FB, 0x7FC}, {0x82E, 0x82F}, {0x83F, 0x83F}, -{0x85C, 0x85D}, {0x85F, 0x85F}, {0x86B, 0x89F}, {0x8B5, 0x8B5}, {0x8C8, 0x8D2}, {0x8E2, 0x8E2}, {0x984, 0x984}, {0x98D, 0x98E}, {0x991, 0x992}, {0x9A9, 0x9A9}, {0x9B1, 0x9B1}, {0x9B3, 0x9B5}, -{0x9BA, 0x9BB}, {0x9C5, 0x9C6}, {0x9C9, 0x9CA}, {0x9CF, 0x9D6}, {0x9D8, 0x9DB}, {0x9DE, 0x9DE}, {0x9E4, 0x9E5}, {0x9FF, 0xA00}, {0xA04, 0xA04}, {0xA0B, 0xA0E}, {0xA11, 0xA12}, {0xA29, 0xA29}, -{0xA31, 0xA31}, {0xA34, 0xA34}, {0xA37, 0xA37}, {0xA3A, 0xA3B}, {0xA3D, 0xA3D}, {0xA43, 0xA46}, {0xA49, 0xA4A}, {0xA4E, 0xA50}, {0xA52, 0xA58}, {0xA5D, 0xA5D}, {0xA5F, 0xA65}, {0xA77, 0xA80}, -{0xA84, 0xA84}, {0xA8E, 0xA8E}, {0xA92, 0xA92}, {0xAA9, 0xAA9}, {0xAB1, 0xAB1}, {0xAB4, 0xAB4}, {0xABA, 0xABB}, {0xAC6, 0xAC6}, {0xACA, 0xACA}, {0xACE, 0xACF}, {0xAD1, 0xADF}, {0xAE4, 0xAE5}, -{0xAF2, 0xAF8}, {0xB00, 0xB00}, {0xB04, 0xB04}, {0xB0D, 0xB0E}, {0xB11, 0xB12}, {0xB29, 0xB29}, {0xB31, 0xB31}, {0xB34, 0xB34}, {0xB3A, 0xB3B}, {0xB45, 0xB46}, {0xB49, 0xB4A}, {0xB4E, 0xB54}, -{0xB58, 0xB5B}, {0xB5E, 0xB5E}, {0xB64, 0xB65}, {0xB78, 0xB81}, {0xB84, 0xB84}, {0xB8B, 0xB8D}, {0xB91, 0xB91}, {0xB96, 0xB98}, {0xB9B, 0xB9B}, {0xB9D, 0xB9D}, {0xBA0, 0xBA2}, {0xBA5, 0xBA7}, -{0xBAB, 0xBAD}, {0xBBA, 0xBBD}, {0xBC3, 0xBC5}, {0xBC9, 0xBC9}, {0xBCE, 0xBCF}, {0xBD1, 0xBD6}, {0xBD8, 0xBE5}, {0xBFB, 0xBFF}, {0xC0D, 0xC0D}, {0xC11, 0xC11}, {0xC29, 0xC29}, {0xC3A, 0xC3C}, -{0xC45, 0xC45}, {0xC49, 0xC49}, {0xC4E, 0xC54}, {0xC57, 0xC57}, {0xC5B, 0xC5F}, {0xC64, 0xC65}, {0xC70, 0xC76}, {0xC8D, 0xC8D}, {0xC91, 0xC91}, {0xCA9, 0xCA9}, {0xCB4, 0xCB4}, {0xCBA, 0xCBB}, -{0xCC5, 0xCC5}, {0xCC9, 0xCC9}, {0xCCE, 0xCD4}, {0xCD7, 0xCDD}, {0xCDF, 0xCDF}, {0xCE4, 0xCE5}, {0xCF0, 0xCF0}, {0xCF3, 0xCFF}, {0xD0D, 0xD0D}, {0xD11, 0xD11}, {0xD45, 0xD45}, {0xD49, 0xD49}, -{0xD50, 0xD53}, {0xD64, 0xD65}, {0xD80, 0xD80}, {0xD84, 0xD84}, {0xD97, 0xD99}, {0xDB2, 0xDB2}, {0xDBC, 0xDBC}, {0xDBE, 0xDBF}, {0xDC7, 0xDC9}, {0xDCB, 0xDCE}, {0xDD5, 0xDD5}, {0xDD7, 0xDD7}, -{0xDE0, 0xDE5}, {0xDF0, 0xDF1}, {0xDF5, 0xE00}, {0xE3B, 0xE3E}, {0xE5C, 0xE80}, {0xE83, 0xE83}, {0xE85, 0xE85}, {0xE8B, 0xE8B}, {0xEA4, 0xEA4}, {0xEA6, 0xEA6}, {0xEBE, 0xEBF}, {0xEC5, 0xEC5}, -{0xEC7, 0xEC7}, {0xECE, 0xECF}, {0xEDA, 0xEDB}, {0xEE0, 0xEFF}, {0xF48, 0xF48}, {0xF6D, 0xF70}, {0xF98, 0xF98}, {0xFBD, 0xFBD}, {0xFCD, 0xFCD}, {0xFDB, 0xFFF}, {0x10C6, 0x10C6}, {0x10C8, 0x10CC}, -{0x10CE, 0x10CF}, {0x1249, 0x1249}, {0x124E, 0x124F}, {0x1257, 0x1257}, {0x1259, 0x1259}, {0x125E, 0x125F}, {0x1289, 0x1289}, {0x128E, 0x128F}, {0x12B1, 0x12B1}, {0x12B6, 0x12B7}, {0x12BF, 0x12BF}, -{0x12C1, 0x12C1}, {0x12C6, 0x12C7}, {0x12D7, 0x12D7}, {0x1311, 0x1311}, {0x1316, 0x1317}, {0x135B, 0x135C}, {0x137D, 0x137F}, {0x139A, 0x139F}, {0x13F6, 0x13F7}, {0x13FE, 0x13FF}, {0x169D, 0x169F}, -{0x16F9, 0x16FF}, {0x170D, 0x170D}, {0x1715, 0x171F}, {0x1737, 0x173F}, {0x1754, 0x175F}, {0x176D, 0x176D}, {0x1771, 0x1771}, {0x1774, 0x177F}, {0x17DE, 0x17DF}, {0x17EA, 0x17EF}, {0x17FA, 0x17FF}, -{0x180E, 0x180F}, {0x181A, 0x181F}, {0x1879, 0x187F}, {0x18AB, 0x18AF}, {0x18F6, 0x18FF}, {0x191F, 0x191F}, {0x192C, 0x192F}, {0x193C, 0x193F}, {0x1941, 0x1943}, {0x196E, 0x196F}, {0x1975, 0x197F}, -{0x19AC, 0x19AF}, {0x19CA, 0x19CF}, {0x19DB, 0x19DD}, {0x1A1C, 0x1A1D}, {0x1A5F, 0x1A5F}, {0x1A7D, 0x1A7E}, {0x1A8A, 0x1A8F}, {0x1A9A, 0x1A9F}, {0x1AAE, 0x1AAF}, {0x1AC1, 0x1AFF}, {0x1B4C, 0x1B4F}, -{0x1B7D, 0x1B7F}, {0x1BF4, 0x1BFB}, {0x1C38, 0x1C3A}, {0x1C4A, 0x1C4C}, {0x1C89, 0x1C8F}, {0x1CBB, 0x1CBC}, {0x1CC8, 0x1CCF}, {0x1CFB, 0x1CFF}, {0x1DFA, 0x1DFA}, {0x1F16, 0x1F17}, {0x1F1E, 0x1F1F}, -{0x1F46, 0x1F47}, {0x1F4E, 0x1F4F}, {0x1F58, 0x1F58}, {0x1F5A, 0x1F5A}, {0x1F5C, 0x1F5C}, {0x1F5E, 0x1F5E}, {0x1F7E, 0x1F7F}, {0x1FB5, 0x1FB5}, {0x1FC5, 0x1FC5}, {0x1FD4, 0x1FD5}, {0x1FDC, 0x1FDC}, -{0x1FF0, 0x1FF1}, {0x1FF5, 0x1FF5}, {0x1FFF, 0x1FFF}, {0x200B, 0x200F}, {0x202A, 0x202E}, {0x2060, 0x206F}, {0x2072, 0x2073}, {0x208F, 0x208F}, {0x209D, 0x209F}, {0x20C0, 0x20CF}, {0x20F1, 0x20FF}, -{0x218C, 0x218F}, {0x2427, 0x243F}, {0x244B, 0x245F}, {0x2B74, 0x2B75}, {0x2B96, 0x2B96}, {0x2C2F, 0x2C2F}, {0x2C5F, 0x2C5F}, {0x2CF4, 0x2CF8}, {0x2D26, 0x2D26}, {0x2D28, 0x2D2C}, {0x2D2E, 0x2D2F}, -{0x2D68, 0x2D6E}, {0x2D71, 0x2D7E}, {0x2D97, 0x2D9F}, {0x2DA7, 0x2DA7}, {0x2DAF, 0x2DAF}, {0x2DB7, 0x2DB7}, {0x2DBF, 0x2DBF}, {0x2DC7, 0x2DC7}, {0x2DCF, 0x2DCF}, {0x2DD7, 0x2DD7}, {0x2DDF, 0x2DDF}, -{0x2E53, 0x2E7F}, {0x2E9A, 0x2E9A}, {0x2EF4, 0x2EFF}, {0x2FD6, 0x2FEF}, {0x2FFC, 0x2FFF}, {0x3040, 0x3040}, {0x3097, 0x3098}, {0x3100, 0x3104}, {0x3130, 0x3130}, {0x318F, 0x318F}, {0x31E4, 0x31EF}, -{0x321F, 0x321F}, {0x9FFD, 0x9FFF}, {0xA48D, 0xA48F}, {0xA4C7, 0xA4CF}, {0xA62C, 0xA63F}, {0xA6F8, 0xA6FF}, {0xA7C0, 0xA7C1}, {0xA7CB, 0xA7F4}, {0xA82D, 0xA82F}, {0xA83A, 0xA83F}, {0xA878, 0xA87F}, -{0xA8C6, 0xA8CD}, {0xA8DA, 0xA8DF}, {0xA954, 0xA95E}, {0xA97D, 0xA97F}, {0xA9CE, 0xA9CE}, {0xA9DA, 0xA9DD}, {0xA9FF, 0xA9FF}, {0xAA37, 0xAA3F}, {0xAA4E, 0xAA4F}, {0xAA5A, 0xAA5B}, {0xAAC3, 0xAADA}, -{0xAAF7, 0xAB00}, {0xAB07, 0xAB08}, {0xAB0F, 0xAB10}, {0xAB17, 0xAB1F}, {0xAB27, 0xAB27}, {0xAB2F, 0xAB2F}, {0xAB6C, 0xAB6F}, {0xABEE, 0xABEF}, {0xABFA, 0xABFF}, {0xD7A4, 0xD7AF}, {0xD7C7, 0xD7CA}, -{0xD7FC, 0xF8FF}, {0xFA6E, 0xFA6F}, {0xFADA, 0xFAFF}, {0xFB07, 0xFB12}, {0xFB18, 0xFB1C}, {0xFB37, 0xFB37}, {0xFB3D, 0xFB3D}, {0xFB3F, 0xFB3F}, {0xFB42, 0xFB42}, {0xFB45, 0xFB45}, {0xFBC2, 0xFBD2}, -{0xFD40, 0xFD4F}, {0xFD90, 0xFD91}, {0xFDC8, 0xFDEF}, {0xFDFE, 0xFDFF}, {0xFE1A, 0xFE1F}, {0xFE53, 0xFE53}, {0xFE67, 0xFE67}, {0xFE6C, 0xFE6F}, {0xFE75, 0xFE75}, {0xFEFD, 0xFF00}, {0xFFBF, 0xFFC1}, -{0xFFC8, 0xFFC9}, {0xFFD0, 0xFFD1}, {0xFFD8, 0xFFD9}, {0xFFDD, 0xFFDF}, {0xFFE7, 0xFFE7}, {0xFFEF, 0xFFFB}, {0xFFFE, 0xFFFF}, {0x1000C, 0x1000C}, {0x10027, 0x10027}, {0x1003B, 0x1003B}, -{0x1003E, 0x1003E}, {0x1004E, 0x1004F}, {0x1005E, 0x1007F}, {0x100FB, 0x100FF}, {0x10103, 0x10106}, {0x10134, 0x10136}, {0x1018F, 0x1018F}, {0x1019D, 0x1019F}, {0x101A1, 0x101CF}, {0x101FE, 0x1027F}, -{0x1029D, 0x1029F}, {0x102D1, 0x102DF}, {0x102FC, 0x102FF}, {0x10324, 0x1032C}, {0x1034B, 0x1034F}, {0x1037B, 0x1037F}, {0x1039E, 0x1039E}, {0x103C4, 0x103C7}, {0x103D6, 0x103FF}, {0x1049E, 0x1049F}, -{0x104AA, 0x104AF}, {0x104D4, 0x104D7}, {0x104FC, 0x104FF}, {0x10528, 0x1052F}, {0x10564, 0x1056E}, {0x10570, 0x105FF}, {0x10737, 0x1073F}, {0x10756, 0x1075F}, {0x10768, 0x107FF}, {0x10806, 0x10807}, -{0x10809, 0x10809}, {0x10836, 0x10836}, {0x10839, 0x1083B}, {0x1083D, 0x1083E}, {0x10856, 0x10856}, {0x1089F, 0x108A6}, {0x108B0, 0x108DF}, {0x108F3, 0x108F3}, {0x108F6, 0x108FA}, {0x1091C, 0x1091E}, -{0x1093A, 0x1093E}, {0x10940, 0x1097F}, {0x109B8, 0x109BB}, {0x109D0, 0x109D1}, {0x10A04, 0x10A04}, {0x10A07, 0x10A0B}, {0x10A14, 0x10A14}, {0x10A18, 0x10A18}, {0x10A36, 0x10A37}, {0x10A3B, 0x10A3E}, -{0x10A49, 0x10A4F}, {0x10A59, 0x10A5F}, {0x10AA0, 0x10ABF}, {0x10AE7, 0x10AEA}, {0x10AF7, 0x10AFF}, {0x10B36, 0x10B38}, {0x10B56, 0x10B57}, {0x10B73, 0x10B77}, {0x10B92, 0x10B98}, {0x10B9D, 0x10BA8}, -{0x10BB0, 0x10BFF}, {0x10C49, 0x10C7F}, {0x10CB3, 0x10CBF}, {0x10CF3, 0x10CF9}, {0x10D28, 0x10D2F}, {0x10D3A, 0x10E5F}, {0x10E7F, 0x10E7F}, {0x10EAA, 0x10EAA}, {0x10EAE, 0x10EAF}, {0x10EB2, 0x10EFF}, -{0x10F28, 0x10F2F}, {0x10F5A, 0x10FAF}, {0x10FCC, 0x10FDF}, {0x10FF7, 0x10FFF}, {0x1104E, 0x11051}, {0x11070, 0x1107E}, {0x110BD, 0x110BD}, {0x110C2, 0x110CF}, {0x110E9, 0x110EF}, {0x110FA, 0x110FF}, -{0x11135, 0x11135}, {0x11148, 0x1114F}, {0x11177, 0x1117F}, {0x111E0, 0x111E0}, {0x111F5, 0x111FF}, {0x11212, 0x11212}, {0x1123F, 0x1127F}, {0x11287, 0x11287}, {0x11289, 0x11289}, {0x1128E, 0x1128E}, -{0x1129E, 0x1129E}, {0x112AA, 0x112AF}, {0x112EB, 0x112EF}, {0x112FA, 0x112FF}, {0x11304, 0x11304}, {0x1130D, 0x1130E}, {0x11311, 0x11312}, {0x11329, 0x11329}, {0x11331, 0x11331}, {0x11334, 0x11334}, -{0x1133A, 0x1133A}, {0x11345, 0x11346}, {0x11349, 0x1134A}, {0x1134E, 0x1134F}, {0x11351, 0x11356}, {0x11358, 0x1135C}, {0x11364, 0x11365}, {0x1136D, 0x1136F}, {0x11375, 0x113FF}, {0x1145C, 0x1145C}, -{0x11462, 0x1147F}, {0x114C8, 0x114CF}, {0x114DA, 0x1157F}, {0x115B6, 0x115B7}, {0x115DE, 0x115FF}, {0x11645, 0x1164F}, {0x1165A, 0x1165F}, {0x1166D, 0x1167F}, {0x116B9, 0x116BF}, {0x116CA, 0x116FF}, -{0x1171B, 0x1171C}, {0x1172C, 0x1172F}, {0x11740, 0x117FF}, {0x1183C, 0x1189F}, {0x118F3, 0x118FE}, {0x11907, 0x11908}, {0x1190A, 0x1190B}, {0x11914, 0x11914}, {0x11917, 0x11917}, {0x11936, 0x11936}, -{0x11939, 0x1193A}, {0x11947, 0x1194F}, {0x1195A, 0x1199F}, {0x119A8, 0x119A9}, {0x119D8, 0x119D9}, {0x119E5, 0x119FF}, {0x11A48, 0x11A4F}, {0x11AA3, 0x11ABF}, {0x11AF9, 0x11BFF}, {0x11C09, 0x11C09}, -{0x11C37, 0x11C37}, {0x11C46, 0x11C4F}, {0x11C6D, 0x11C6F}, {0x11C90, 0x11C91}, {0x11CA8, 0x11CA8}, {0x11CB7, 0x11CFF}, {0x11D07, 0x11D07}, {0x11D0A, 0x11D0A}, {0x11D37, 0x11D39}, {0x11D3B, 0x11D3B}, -{0x11D3E, 0x11D3E}, {0x11D48, 0x11D4F}, {0x11D5A, 0x11D5F}, {0x11D66, 0x11D66}, {0x11D69, 0x11D69}, {0x11D8F, 0x11D8F}, {0x11D92, 0x11D92}, {0x11D99, 0x11D9F}, {0x11DAA, 0x11EDF}, {0x11EF9, 0x11FAF}, -{0x11FB1, 0x11FBF}, {0x11FF2, 0x11FFE}, {0x1239A, 0x123FF}, {0x1246F, 0x1246F}, {0x12475, 0x1247F}, {0x12544, 0x12FFF}, {0x1342F, 0x143FF}, {0x14647, 0x167FF}, {0x16A39, 0x16A3F}, {0x16A5F, 0x16A5F}, -{0x16A6A, 0x16A6D}, {0x16A70, 0x16ACF}, {0x16AEE, 0x16AEF}, {0x16AF6, 0x16AFF}, {0x16B46, 0x16B4F}, {0x16B5A, 0x16B5A}, {0x16B62, 0x16B62}, {0x16B78, 0x16B7C}, {0x16B90, 0x16E3F}, {0x16E9B, 0x16EFF}, -{0x16F4B, 0x16F4E}, {0x16F88, 0x16F8E}, {0x16FA0, 0x16FDF}, {0x16FE5, 0x16FEF}, {0x16FF2, 0x16FFF}, {0x187F8, 0x187FF}, {0x18CD6, 0x18CFF}, {0x18D09, 0x1AFFF}, {0x1B11F, 0x1B14F}, {0x1B153, 0x1B163}, -{0x1B168, 0x1B16F}, {0x1B2FC, 0x1BBFF}, {0x1BC6B, 0x1BC6F}, {0x1BC7D, 0x1BC7F}, {0x1BC89, 0x1BC8F}, {0x1BC9A, 0x1BC9B}, {0x1BCA0, 0x1CFFF}, {0x1D0F6, 0x1D0FF}, {0x1D127, 0x1D128}, {0x1D173, 0x1D17A}, -{0x1D1E9, 0x1D1FF}, {0x1D246, 0x1D2DF}, {0x1D2F4, 0x1D2FF}, {0x1D357, 0x1D35F}, {0x1D379, 0x1D3FF}, {0x1D455, 0x1D455}, {0x1D49D, 0x1D49D}, {0x1D4A0, 0x1D4A1}, {0x1D4A3, 0x1D4A4}, {0x1D4A7, 0x1D4A8}, -{0x1D4AD, 0x1D4AD}, {0x1D4BA, 0x1D4BA}, {0x1D4BC, 0x1D4BC}, {0x1D4C4, 0x1D4C4}, {0x1D506, 0x1D506}, {0x1D50B, 0x1D50C}, {0x1D515, 0x1D515}, {0x1D51D, 0x1D51D}, {0x1D53A, 0x1D53A}, {0x1D53F, 0x1D53F}, -{0x1D545, 0x1D545}, {0x1D547, 0x1D549}, {0x1D551, 0x1D551}, {0x1D6A6, 0x1D6A7}, {0x1D7CC, 0x1D7CD}, {0x1DA8C, 0x1DA9A}, {0x1DAA0, 0x1DAA0}, {0x1DAB0, 0x1DFFF}, {0x1E007, 0x1E007}, {0x1E019, 0x1E01A}, -{0x1E022, 0x1E022}, {0x1E025, 0x1E025}, {0x1E02B, 0x1E0FF}, {0x1E12D, 0x1E12F}, {0x1E13E, 0x1E13F}, {0x1E14A, 0x1E14D}, {0x1E150, 0x1E2BF}, {0x1E2FA, 0x1E2FE}, {0x1E300, 0x1E7FF}, {0x1E8C5, 0x1E8C6}, -{0x1E8D7, 0x1E8FF}, {0x1E94C, 0x1E94F}, {0x1E95A, 0x1E95D}, {0x1E960, 0x1EC70}, {0x1ECB5, 0x1ED00}, {0x1ED3E, 0x1EDFF}, {0x1EE04, 0x1EE04}, {0x1EE20, 0x1EE20}, {0x1EE23, 0x1EE23}, {0x1EE25, 0x1EE26}, -{0x1EE28, 0x1EE28}, {0x1EE33, 0x1EE33}, {0x1EE38, 0x1EE38}, {0x1EE3A, 0x1EE3A}, {0x1EE3C, 0x1EE41}, {0x1EE43, 0x1EE46}, {0x1EE48, 0x1EE48}, {0x1EE4A, 0x1EE4A}, {0x1EE4C, 0x1EE4C}, {0x1EE50, 0x1EE50}, -{0x1EE53, 0x1EE53}, {0x1EE55, 0x1EE56}, {0x1EE58, 0x1EE58}, {0x1EE5A, 0x1EE5A}, {0x1EE5C, 0x1EE5C}, {0x1EE5E, 0x1EE5E}, {0x1EE60, 0x1EE60}, {0x1EE63, 0x1EE63}, {0x1EE65, 0x1EE66}, {0x1EE6B, 0x1EE6B}, -{0x1EE73, 0x1EE73}, {0x1EE78, 0x1EE78}, {0x1EE7D, 0x1EE7D}, {0x1EE7F, 0x1EE7F}, {0x1EE8A, 0x1EE8A}, {0x1EE9C, 0x1EEA0}, {0x1EEA4, 0x1EEA4}, {0x1EEAA, 0x1EEAA}, {0x1EEBC, 0x1EEEF}, {0x1EEF2, 0x1EFFF}, -{0x1F02C, 0x1F02F}, {0x1F094, 0x1F09F}, {0x1F0AF, 0x1F0B0}, {0x1F0C0, 0x1F0C0}, {0x1F0D0, 0x1F0D0}, {0x1F0F6, 0x1F0FF}, {0x1F1AE, 0x1F1E5}, {0x1F203, 0x1F20F}, {0x1F23C, 0x1F23F}, {0x1F249, 0x1F24F}, -{0x1F252, 0x1F25F}, {0x1F266, 0x1F2FF}, {0x1F6D8, 0x1F6DF}, {0x1F6ED, 0x1F6EF}, {0x1F6FD, 0x1F6FF}, {0x1F774, 0x1F77F}, {0x1F7D9, 0x1F7DF}, {0x1F7EC, 0x1F7FF}, {0x1F80C, 0x1F80F}, {0x1F848, 0x1F84F}, -{0x1F85A, 0x1F85F}, {0x1F888, 0x1F88F}, {0x1F8AE, 0x1F8AF}, {0x1F8B2, 0x1F8FF}, {0x1F979, 0x1F979}, {0x1F9CC, 0x1F9CC}, {0x1FA54, 0x1FA5F}, {0x1FA6E, 0x1FA6F}, {0x1FA75, 0x1FA77}, {0x1FA7B, 0x1FA7F}, -{0x1FA87, 0x1FA8F}, {0x1FAA9, 0x1FAAF}, {0x1FAB7, 0x1FABF}, {0x1FAC3, 0x1FACF}, {0x1FAD7, 0x1FAFF}, {0x1FB93, 0x1FB93}, {0x1FBCB, 0x1FBEF}, {0x1FBFA, 0x1FFFF}, {0x2A6DE, 0x2A6FF}, {0x2B735, 0x2B73F}, -{0x2B81E, 0x2B81F}, {0x2CEA2, 0x2CEAF}, {0x2EBE1, 0x2F7FF}, {0x2FA1E, 0x2FFFF}, {0x3134B, 0xE00FF}, {0xE01F0, 0x10FFFF}, -}; - -static const std::multimap nfd_map = { -{0xC0, 0x41}, {0xC0, 0x300}, {0xC1, 0x41}, {0xC1, 0x301}, {0xC2, 0x41}, {0xC2, 0x302}, {0xC3, 0x41}, {0xC3, 0x303}, {0xC4, 0x41}, {0xC4, 0x308}, {0xC5, 0x41}, {0xC5, 0x30A}, {0xC7, 0x43}, -{0xC7, 0x327}, {0xC8, 0x45}, {0xC8, 0x300}, {0xC9, 0x45}, {0xC9, 0x301}, {0xCA, 0x45}, {0xCA, 0x302}, {0xCB, 0x45}, {0xCB, 0x308}, {0xCC, 0x49}, {0xCC, 0x300}, {0xCD, 0x49}, {0xCD, 0x301}, -{0xCE, 0x49}, {0xCE, 0x302}, {0xCF, 0x49}, {0xCF, 0x308}, {0xD1, 0x4E}, {0xD1, 0x303}, {0xD2, 0x4F}, {0xD2, 0x300}, {0xD3, 0x4F}, {0xD3, 0x301}, {0xD4, 0x4F}, {0xD4, 0x302}, {0xD5, 0x4F}, -{0xD5, 0x303}, {0xD6, 0x4F}, {0xD6, 0x308}, {0xD9, 0x55}, {0xD9, 0x300}, {0xDA, 0x55}, {0xDA, 0x301}, {0xDB, 0x55}, {0xDB, 0x302}, {0xDC, 0x55}, {0xDC, 0x308}, {0xDD, 0x59}, {0xDD, 0x301}, -{0xE0, 0x61}, {0xE0, 0x300}, {0xE1, 0x61}, {0xE1, 0x301}, {0xE2, 0x61}, {0xE2, 0x302}, {0xE3, 0x61}, {0xE3, 0x303}, {0xE4, 0x61}, {0xE4, 0x308}, {0xE5, 0x61}, {0xE5, 0x30A}, {0xE7, 0x63}, -{0xE7, 0x327}, {0xE8, 0x65}, {0xE8, 0x300}, {0xE9, 0x65}, {0xE9, 0x301}, {0xEA, 0x65}, {0xEA, 0x302}, {0xEB, 0x65}, {0xEB, 0x308}, {0xEC, 0x69}, {0xEC, 0x300}, {0xED, 0x69}, {0xED, 0x301}, -{0xEE, 0x69}, {0xEE, 0x302}, {0xEF, 0x69}, {0xEF, 0x308}, {0xF1, 0x6E}, {0xF1, 0x303}, {0xF2, 0x6F}, {0xF2, 0x300}, {0xF3, 0x6F}, {0xF3, 0x301}, {0xF4, 0x6F}, {0xF4, 0x302}, {0xF5, 0x6F}, -{0xF5, 0x303}, {0xF6, 0x6F}, {0xF6, 0x308}, {0xF9, 0x75}, {0xF9, 0x300}, {0xFA, 0x75}, {0xFA, 0x301}, {0xFB, 0x75}, {0xFB, 0x302}, {0xFC, 0x75}, {0xFC, 0x308}, {0xFD, 0x79}, {0xFD, 0x301}, -{0xFF, 0x79}, {0xFF, 0x308}, {0x100, 0x41}, {0x100, 0x304}, {0x101, 0x61}, {0x101, 0x304}, {0x102, 0x41}, {0x102, 0x306}, {0x103, 0x61}, {0x103, 0x306}, {0x104, 0x41}, {0x104, 0x328}, {0x105, 0x61}, -{0x105, 0x328}, {0x106, 0x43}, {0x106, 0x301}, {0x107, 0x63}, {0x107, 0x301}, {0x108, 0x43}, {0x108, 0x302}, {0x109, 0x63}, {0x109, 0x302}, {0x10A, 0x43}, {0x10A, 0x307}, {0x10B, 0x63}, -{0x10B, 0x307}, {0x10C, 0x43}, {0x10C, 0x30C}, {0x10D, 0x63}, {0x10D, 0x30C}, {0x10E, 0x44}, {0x10E, 0x30C}, {0x10F, 0x64}, {0x10F, 0x30C}, {0x112, 0x45}, {0x112, 0x304}, {0x113, 0x65}, -{0x113, 0x304}, {0x114, 0x45}, {0x114, 0x306}, {0x115, 0x65}, {0x115, 0x306}, {0x116, 0x45}, {0x116, 0x307}, {0x117, 0x65}, {0x117, 0x307}, {0x118, 0x45}, {0x118, 0x328}, {0x119, 0x65}, -{0x119, 0x328}, {0x11A, 0x45}, {0x11A, 0x30C}, {0x11B, 0x65}, {0x11B, 0x30C}, {0x11C, 0x47}, {0x11C, 0x302}, {0x11D, 0x67}, {0x11D, 0x302}, {0x11E, 0x47}, {0x11E, 0x306}, {0x11F, 0x67}, -{0x11F, 0x306}, {0x120, 0x47}, {0x120, 0x307}, {0x121, 0x67}, {0x121, 0x307}, {0x122, 0x47}, {0x122, 0x327}, {0x123, 0x67}, {0x123, 0x327}, {0x124, 0x48}, {0x124, 0x302}, {0x125, 0x68}, -{0x125, 0x302}, {0x128, 0x49}, {0x128, 0x303}, {0x129, 0x69}, {0x129, 0x303}, {0x12A, 0x49}, {0x12A, 0x304}, {0x12B, 0x69}, {0x12B, 0x304}, {0x12C, 0x49}, {0x12C, 0x306}, {0x12D, 0x69}, -{0x12D, 0x306}, {0x12E, 0x49}, {0x12E, 0x328}, {0x12F, 0x69}, {0x12F, 0x328}, {0x130, 0x49}, {0x130, 0x307}, {0x134, 0x4A}, {0x134, 0x302}, {0x135, 0x6A}, {0x135, 0x302}, {0x136, 0x4B}, -{0x136, 0x327}, {0x137, 0x6B}, {0x137, 0x327}, {0x139, 0x4C}, {0x139, 0x301}, {0x13A, 0x6C}, {0x13A, 0x301}, {0x13B, 0x4C}, {0x13B, 0x327}, {0x13C, 0x6C}, {0x13C, 0x327}, {0x13D, 0x4C}, -{0x13D, 0x30C}, {0x13E, 0x6C}, {0x13E, 0x30C}, {0x143, 0x4E}, {0x143, 0x301}, {0x144, 0x6E}, {0x144, 0x301}, {0x145, 0x4E}, {0x145, 0x327}, {0x146, 0x6E}, {0x146, 0x327}, {0x147, 0x4E}, -{0x147, 0x30C}, {0x148, 0x6E}, {0x148, 0x30C}, {0x14C, 0x4F}, {0x14C, 0x304}, {0x14D, 0x6F}, {0x14D, 0x304}, {0x14E, 0x4F}, {0x14E, 0x306}, {0x14F, 0x6F}, {0x14F, 0x306}, {0x150, 0x4F}, -{0x150, 0x30B}, {0x151, 0x6F}, {0x151, 0x30B}, {0x154, 0x52}, {0x154, 0x301}, {0x155, 0x72}, {0x155, 0x301}, {0x156, 0x52}, {0x156, 0x327}, {0x157, 0x72}, {0x157, 0x327}, {0x158, 0x52}, -{0x158, 0x30C}, {0x159, 0x72}, {0x159, 0x30C}, {0x15A, 0x53}, {0x15A, 0x301}, {0x15B, 0x73}, {0x15B, 0x301}, {0x15C, 0x53}, {0x15C, 0x302}, {0x15D, 0x73}, {0x15D, 0x302}, {0x15E, 0x53}, -{0x15E, 0x327}, {0x15F, 0x73}, {0x15F, 0x327}, {0x160, 0x53}, {0x160, 0x30C}, {0x161, 0x73}, {0x161, 0x30C}, {0x162, 0x54}, {0x162, 0x327}, {0x163, 0x74}, {0x163, 0x327}, {0x164, 0x54}, -{0x164, 0x30C}, {0x165, 0x74}, {0x165, 0x30C}, {0x168, 0x55}, {0x168, 0x303}, {0x169, 0x75}, {0x169, 0x303}, {0x16A, 0x55}, {0x16A, 0x304}, {0x16B, 0x75}, {0x16B, 0x304}, {0x16C, 0x55}, -{0x16C, 0x306}, {0x16D, 0x75}, {0x16D, 0x306}, {0x16E, 0x55}, {0x16E, 0x30A}, {0x16F, 0x75}, {0x16F, 0x30A}, {0x170, 0x55}, {0x170, 0x30B}, {0x171, 0x75}, {0x171, 0x30B}, {0x172, 0x55}, -{0x172, 0x328}, {0x173, 0x75}, {0x173, 0x328}, {0x174, 0x57}, {0x174, 0x302}, {0x175, 0x77}, {0x175, 0x302}, {0x176, 0x59}, {0x176, 0x302}, {0x177, 0x79}, {0x177, 0x302}, {0x178, 0x59}, -{0x178, 0x308}, {0x179, 0x5A}, {0x179, 0x301}, {0x17A, 0x7A}, {0x17A, 0x301}, {0x17B, 0x5A}, {0x17B, 0x307}, {0x17C, 0x7A}, {0x17C, 0x307}, {0x17D, 0x5A}, {0x17D, 0x30C}, {0x17E, 0x7A}, -{0x17E, 0x30C}, {0x1A0, 0x4F}, {0x1A0, 0x31B}, {0x1A1, 0x6F}, {0x1A1, 0x31B}, {0x1AF, 0x55}, {0x1AF, 0x31B}, {0x1B0, 0x75}, {0x1B0, 0x31B}, {0x1CD, 0x41}, {0x1CD, 0x30C}, {0x1CE, 0x61}, -{0x1CE, 0x30C}, {0x1CF, 0x49}, {0x1CF, 0x30C}, {0x1D0, 0x69}, {0x1D0, 0x30C}, {0x1D1, 0x4F}, {0x1D1, 0x30C}, {0x1D2, 0x6F}, {0x1D2, 0x30C}, {0x1D3, 0x55}, {0x1D3, 0x30C}, {0x1D4, 0x75}, -{0x1D4, 0x30C}, {0x1D5, 0x55}, {0x1D5, 0x308}, {0x1D5, 0x304}, {0x1D6, 0x75}, {0x1D6, 0x308}, {0x1D6, 0x304}, {0x1D7, 0x55}, {0x1D7, 0x308}, {0x1D7, 0x301}, {0x1D8, 0x75}, {0x1D8, 0x308}, -{0x1D8, 0x301}, {0x1D9, 0x55}, {0x1D9, 0x308}, {0x1D9, 0x30C}, {0x1DA, 0x75}, {0x1DA, 0x308}, {0x1DA, 0x30C}, {0x1DB, 0x55}, {0x1DB, 0x308}, {0x1DB, 0x300}, {0x1DC, 0x75}, {0x1DC, 0x308}, -{0x1DC, 0x300}, {0x1DE, 0x41}, {0x1DE, 0x308}, {0x1DE, 0x304}, {0x1DF, 0x61}, {0x1DF, 0x308}, {0x1DF, 0x304}, {0x1E0, 0x41}, {0x1E0, 0x307}, {0x1E0, 0x304}, {0x1E1, 0x61}, {0x1E1, 0x307}, -{0x1E1, 0x304}, {0x1E2, 0xC6}, {0x1E2, 0x304}, {0x1E3, 0xE6}, {0x1E3, 0x304}, {0x1E6, 0x47}, {0x1E6, 0x30C}, {0x1E7, 0x67}, {0x1E7, 0x30C}, {0x1E8, 0x4B}, {0x1E8, 0x30C}, {0x1E9, 0x6B}, -{0x1E9, 0x30C}, {0x1EA, 0x4F}, {0x1EA, 0x328}, {0x1EB, 0x6F}, {0x1EB, 0x328}, {0x1EC, 0x4F}, {0x1EC, 0x328}, {0x1EC, 0x304}, {0x1ED, 0x6F}, {0x1ED, 0x328}, {0x1ED, 0x304}, {0x1EE, 0x1B7}, -{0x1EE, 0x30C}, {0x1EF, 0x292}, {0x1EF, 0x30C}, {0x1F0, 0x6A}, {0x1F0, 0x30C}, {0x1F4, 0x47}, {0x1F4, 0x301}, {0x1F5, 0x67}, {0x1F5, 0x301}, {0x1F8, 0x4E}, {0x1F8, 0x300}, {0x1F9, 0x6E}, -{0x1F9, 0x300}, {0x1FA, 0x41}, {0x1FA, 0x30A}, {0x1FA, 0x301}, {0x1FB, 0x61}, {0x1FB, 0x30A}, {0x1FB, 0x301}, {0x1FC, 0xC6}, {0x1FC, 0x301}, {0x1FD, 0xE6}, {0x1FD, 0x301}, {0x1FE, 0xD8}, -{0x1FE, 0x301}, {0x1FF, 0xF8}, {0x1FF, 0x301}, {0x200, 0x41}, {0x200, 0x30F}, {0x201, 0x61}, {0x201, 0x30F}, {0x202, 0x41}, {0x202, 0x311}, {0x203, 0x61}, {0x203, 0x311}, {0x204, 0x45}, -{0x204, 0x30F}, {0x205, 0x65}, {0x205, 0x30F}, {0x206, 0x45}, {0x206, 0x311}, {0x207, 0x65}, {0x207, 0x311}, {0x208, 0x49}, {0x208, 0x30F}, {0x209, 0x69}, {0x209, 0x30F}, {0x20A, 0x49}, -{0x20A, 0x311}, {0x20B, 0x69}, {0x20B, 0x311}, {0x20C, 0x4F}, {0x20C, 0x30F}, {0x20D, 0x6F}, {0x20D, 0x30F}, {0x20E, 0x4F}, {0x20E, 0x311}, {0x20F, 0x6F}, {0x20F, 0x311}, {0x210, 0x52}, -{0x210, 0x30F}, {0x211, 0x72}, {0x211, 0x30F}, {0x212, 0x52}, {0x212, 0x311}, {0x213, 0x72}, {0x213, 0x311}, {0x214, 0x55}, {0x214, 0x30F}, {0x215, 0x75}, {0x215, 0x30F}, {0x216, 0x55}, -{0x216, 0x311}, {0x217, 0x75}, {0x217, 0x311}, {0x218, 0x53}, {0x218, 0x326}, {0x219, 0x73}, {0x219, 0x326}, {0x21A, 0x54}, {0x21A, 0x326}, {0x21B, 0x74}, {0x21B, 0x326}, {0x21E, 0x48}, -{0x21E, 0x30C}, {0x21F, 0x68}, {0x21F, 0x30C}, {0x226, 0x41}, {0x226, 0x307}, {0x227, 0x61}, {0x227, 0x307}, {0x228, 0x45}, {0x228, 0x327}, {0x229, 0x65}, {0x229, 0x327}, {0x22A, 0x4F}, -{0x22A, 0x308}, {0x22A, 0x304}, {0x22B, 0x6F}, {0x22B, 0x308}, {0x22B, 0x304}, {0x22C, 0x4F}, {0x22C, 0x303}, {0x22C, 0x304}, {0x22D, 0x6F}, {0x22D, 0x303}, {0x22D, 0x304}, {0x22E, 0x4F}, -{0x22E, 0x307}, {0x22F, 0x6F}, {0x22F, 0x307}, {0x230, 0x4F}, {0x230, 0x307}, {0x230, 0x304}, {0x231, 0x6F}, {0x231, 0x307}, {0x231, 0x304}, {0x232, 0x59}, {0x232, 0x304}, {0x233, 0x79}, -{0x233, 0x304}, {0x340, 0x300}, {0x341, 0x301}, {0x343, 0x313}, {0x344, 0x308}, {0x344, 0x301}, {0x374, 0x2B9}, {0x37E, 0x3B}, {0x385, 0xA8}, {0x385, 0x301}, {0x386, 0x391}, {0x386, 0x301}, -{0x387, 0xB7}, {0x388, 0x395}, {0x388, 0x301}, {0x389, 0x397}, {0x389, 0x301}, {0x38A, 0x399}, {0x38A, 0x301}, {0x38C, 0x39F}, {0x38C, 0x301}, {0x38E, 0x3A5}, {0x38E, 0x301}, {0x38F, 0x3A9}, -{0x38F, 0x301}, {0x390, 0x3B9}, {0x390, 0x308}, {0x390, 0x301}, {0x3AA, 0x399}, {0x3AA, 0x308}, {0x3AB, 0x3A5}, {0x3AB, 0x308}, {0x3AC, 0x3B1}, {0x3AC, 0x301}, {0x3AD, 0x3B5}, {0x3AD, 0x301}, -{0x3AE, 0x3B7}, {0x3AE, 0x301}, {0x3AF, 0x3B9}, {0x3AF, 0x301}, {0x3B0, 0x3C5}, {0x3B0, 0x308}, {0x3B0, 0x301}, {0x3CA, 0x3B9}, {0x3CA, 0x308}, {0x3CB, 0x3C5}, {0x3CB, 0x308}, {0x3CC, 0x3BF}, -{0x3CC, 0x301}, {0x3CD, 0x3C5}, {0x3CD, 0x301}, {0x3CE, 0x3C9}, {0x3CE, 0x301}, {0x3D3, 0x3D2}, {0x3D3, 0x301}, {0x3D4, 0x3D2}, {0x3D4, 0x308}, {0x400, 0x415}, {0x400, 0x300}, {0x401, 0x415}, -{0x401, 0x308}, {0x403, 0x413}, {0x403, 0x301}, {0x407, 0x406}, {0x407, 0x308}, {0x40C, 0x41A}, {0x40C, 0x301}, {0x40D, 0x418}, {0x40D, 0x300}, {0x40E, 0x423}, {0x40E, 0x306}, {0x419, 0x418}, -{0x419, 0x306}, {0x439, 0x438}, {0x439, 0x306}, {0x450, 0x435}, {0x450, 0x300}, {0x451, 0x435}, {0x451, 0x308}, {0x453, 0x433}, {0x453, 0x301}, {0x457, 0x456}, {0x457, 0x308}, {0x45C, 0x43A}, -{0x45C, 0x301}, {0x45D, 0x438}, {0x45D, 0x300}, {0x45E, 0x443}, {0x45E, 0x306}, {0x476, 0x474}, {0x476, 0x30F}, {0x477, 0x475}, {0x477, 0x30F}, {0x4C1, 0x416}, {0x4C1, 0x306}, {0x4C2, 0x436}, -{0x4C2, 0x306}, {0x4D0, 0x410}, {0x4D0, 0x306}, {0x4D1, 0x430}, {0x4D1, 0x306}, {0x4D2, 0x410}, {0x4D2, 0x308}, {0x4D3, 0x430}, {0x4D3, 0x308}, {0x4D6, 0x415}, {0x4D6, 0x306}, {0x4D7, 0x435}, -{0x4D7, 0x306}, {0x4DA, 0x4D8}, {0x4DA, 0x308}, {0x4DB, 0x4D9}, {0x4DB, 0x308}, {0x4DC, 0x416}, {0x4DC, 0x308}, {0x4DD, 0x436}, {0x4DD, 0x308}, {0x4DE, 0x417}, {0x4DE, 0x308}, {0x4DF, 0x437}, -{0x4DF, 0x308}, {0x4E2, 0x418}, {0x4E2, 0x304}, {0x4E3, 0x438}, {0x4E3, 0x304}, {0x4E4, 0x418}, {0x4E4, 0x308}, {0x4E5, 0x438}, {0x4E5, 0x308}, {0x4E6, 0x41E}, {0x4E6, 0x308}, {0x4E7, 0x43E}, -{0x4E7, 0x308}, {0x4EA, 0x4E8}, {0x4EA, 0x308}, {0x4EB, 0x4E9}, {0x4EB, 0x308}, {0x4EC, 0x42D}, {0x4EC, 0x308}, {0x4ED, 0x44D}, {0x4ED, 0x308}, {0x4EE, 0x423}, {0x4EE, 0x304}, {0x4EF, 0x443}, -{0x4EF, 0x304}, {0x4F0, 0x423}, {0x4F0, 0x308}, {0x4F1, 0x443}, {0x4F1, 0x308}, {0x4F2, 0x423}, {0x4F2, 0x30B}, {0x4F3, 0x443}, {0x4F3, 0x30B}, {0x4F4, 0x427}, {0x4F4, 0x308}, {0x4F5, 0x447}, -{0x4F5, 0x308}, {0x4F8, 0x42B}, {0x4F8, 0x308}, {0x4F9, 0x44B}, {0x4F9, 0x308}, {0x622, 0x627}, {0x622, 0x653}, {0x623, 0x627}, {0x623, 0x654}, {0x624, 0x648}, {0x624, 0x654}, {0x625, 0x627}, -{0x625, 0x655}, {0x626, 0x64A}, {0x626, 0x654}, {0x6C0, 0x6D5}, {0x6C0, 0x654}, {0x6C2, 0x6C1}, {0x6C2, 0x654}, {0x6D3, 0x6D2}, {0x6D3, 0x654}, {0x929, 0x928}, {0x929, 0x93C}, {0x931, 0x930}, -{0x931, 0x93C}, {0x934, 0x933}, {0x934, 0x93C}, {0x958, 0x915}, {0x958, 0x93C}, {0x959, 0x916}, {0x959, 0x93C}, {0x95A, 0x917}, {0x95A, 0x93C}, {0x95B, 0x91C}, {0x95B, 0x93C}, {0x95C, 0x921}, -{0x95C, 0x93C}, {0x95D, 0x922}, {0x95D, 0x93C}, {0x95E, 0x92B}, {0x95E, 0x93C}, {0x95F, 0x92F}, {0x95F, 0x93C}, {0x9CB, 0x9C7}, {0x9CB, 0x9BE}, {0x9CC, 0x9C7}, {0x9CC, 0x9D7}, {0x9DC, 0x9A1}, -{0x9DC, 0x9BC}, {0x9DD, 0x9A2}, {0x9DD, 0x9BC}, {0x9DF, 0x9AF}, {0x9DF, 0x9BC}, {0xA33, 0xA32}, {0xA33, 0xA3C}, {0xA36, 0xA38}, {0xA36, 0xA3C}, {0xA59, 0xA16}, {0xA59, 0xA3C}, {0xA5A, 0xA17}, -{0xA5A, 0xA3C}, {0xA5B, 0xA1C}, {0xA5B, 0xA3C}, {0xA5E, 0xA2B}, {0xA5E, 0xA3C}, {0xB48, 0xB47}, {0xB48, 0xB56}, {0xB4B, 0xB47}, {0xB4B, 0xB3E}, {0xB4C, 0xB47}, {0xB4C, 0xB57}, {0xB5C, 0xB21}, -{0xB5C, 0xB3C}, {0xB5D, 0xB22}, {0xB5D, 0xB3C}, {0xB94, 0xB92}, {0xB94, 0xBD7}, {0xBCA, 0xBC6}, {0xBCA, 0xBBE}, {0xBCB, 0xBC7}, {0xBCB, 0xBBE}, {0xBCC, 0xBC6}, {0xBCC, 0xBD7}, {0xC48, 0xC46}, -{0xC48, 0xC56}, {0xCC0, 0xCBF}, {0xCC0, 0xCD5}, {0xCC7, 0xCC6}, {0xCC7, 0xCD5}, {0xCC8, 0xCC6}, {0xCC8, 0xCD6}, {0xCCA, 0xCC6}, {0xCCA, 0xCC2}, {0xCCB, 0xCC6}, {0xCCB, 0xCC2}, {0xCCB, 0xCD5}, -{0xD4A, 0xD46}, {0xD4A, 0xD3E}, {0xD4B, 0xD47}, {0xD4B, 0xD3E}, {0xD4C, 0xD46}, {0xD4C, 0xD57}, {0xDDA, 0xDD9}, {0xDDA, 0xDCA}, {0xDDC, 0xDD9}, {0xDDC, 0xDCF}, {0xDDD, 0xDD9}, {0xDDD, 0xDCF}, -{0xDDD, 0xDCA}, {0xDDE, 0xDD9}, {0xDDE, 0xDDF}, {0xF43, 0xF42}, {0xF43, 0xFB7}, {0xF4D, 0xF4C}, {0xF4D, 0xFB7}, {0xF52, 0xF51}, {0xF52, 0xFB7}, {0xF57, 0xF56}, {0xF57, 0xFB7}, {0xF5C, 0xF5B}, -{0xF5C, 0xFB7}, {0xF69, 0xF40}, {0xF69, 0xFB5}, {0xF73, 0xF71}, {0xF73, 0xF72}, {0xF75, 0xF71}, {0xF75, 0xF74}, {0xF76, 0xFB2}, {0xF76, 0xF80}, {0xF78, 0xFB3}, {0xF78, 0xF80}, {0xF81, 0xF71}, -{0xF81, 0xF80}, {0xF93, 0xF92}, {0xF93, 0xFB7}, {0xF9D, 0xF9C}, {0xF9D, 0xFB7}, {0xFA2, 0xFA1}, {0xFA2, 0xFB7}, {0xFA7, 0xFA6}, {0xFA7, 0xFB7}, {0xFAC, 0xFAB}, {0xFAC, 0xFB7}, {0xFB9, 0xF90}, -{0xFB9, 0xFB5}, {0x1026, 0x1025}, {0x1026, 0x102E}, {0x1B06, 0x1B05}, {0x1B06, 0x1B35}, {0x1B08, 0x1B07}, {0x1B08, 0x1B35}, {0x1B0A, 0x1B09}, {0x1B0A, 0x1B35}, {0x1B0C, 0x1B0B}, {0x1B0C, 0x1B35}, -{0x1B0E, 0x1B0D}, {0x1B0E, 0x1B35}, {0x1B12, 0x1B11}, {0x1B12, 0x1B35}, {0x1B3B, 0x1B3A}, {0x1B3B, 0x1B35}, {0x1B3D, 0x1B3C}, {0x1B3D, 0x1B35}, {0x1B40, 0x1B3E}, {0x1B40, 0x1B35}, {0x1B41, 0x1B3F}, -{0x1B41, 0x1B35}, {0x1B43, 0x1B42}, {0x1B43, 0x1B35}, {0x1E00, 0x41}, {0x1E00, 0x325}, {0x1E01, 0x61}, {0x1E01, 0x325}, {0x1E02, 0x42}, {0x1E02, 0x307}, {0x1E03, 0x62}, {0x1E03, 0x307}, -{0x1E04, 0x42}, {0x1E04, 0x323}, {0x1E05, 0x62}, {0x1E05, 0x323}, {0x1E06, 0x42}, {0x1E06, 0x331}, {0x1E07, 0x62}, {0x1E07, 0x331}, {0x1E08, 0x43}, {0x1E08, 0x327}, {0x1E08, 0x301}, {0x1E09, 0x63}, -{0x1E09, 0x327}, {0x1E09, 0x301}, {0x1E0A, 0x44}, {0x1E0A, 0x307}, {0x1E0B, 0x64}, {0x1E0B, 0x307}, {0x1E0C, 0x44}, {0x1E0C, 0x323}, {0x1E0D, 0x64}, {0x1E0D, 0x323}, {0x1E0E, 0x44}, {0x1E0E, 0x331}, -{0x1E0F, 0x64}, {0x1E0F, 0x331}, {0x1E10, 0x44}, {0x1E10, 0x327}, {0x1E11, 0x64}, {0x1E11, 0x327}, {0x1E12, 0x44}, {0x1E12, 0x32D}, {0x1E13, 0x64}, {0x1E13, 0x32D}, {0x1E14, 0x45}, {0x1E14, 0x304}, -{0x1E14, 0x300}, {0x1E15, 0x65}, {0x1E15, 0x304}, {0x1E15, 0x300}, {0x1E16, 0x45}, {0x1E16, 0x304}, {0x1E16, 0x301}, {0x1E17, 0x65}, {0x1E17, 0x304}, {0x1E17, 0x301}, {0x1E18, 0x45}, {0x1E18, 0x32D}, -{0x1E19, 0x65}, {0x1E19, 0x32D}, {0x1E1A, 0x45}, {0x1E1A, 0x330}, {0x1E1B, 0x65}, {0x1E1B, 0x330}, {0x1E1C, 0x45}, {0x1E1C, 0x327}, {0x1E1C, 0x306}, {0x1E1D, 0x65}, {0x1E1D, 0x327}, {0x1E1D, 0x306}, -{0x1E1E, 0x46}, {0x1E1E, 0x307}, {0x1E1F, 0x66}, {0x1E1F, 0x307}, {0x1E20, 0x47}, {0x1E20, 0x304}, {0x1E21, 0x67}, {0x1E21, 0x304}, {0x1E22, 0x48}, {0x1E22, 0x307}, {0x1E23, 0x68}, {0x1E23, 0x307}, -{0x1E24, 0x48}, {0x1E24, 0x323}, {0x1E25, 0x68}, {0x1E25, 0x323}, {0x1E26, 0x48}, {0x1E26, 0x308}, {0x1E27, 0x68}, {0x1E27, 0x308}, {0x1E28, 0x48}, {0x1E28, 0x327}, {0x1E29, 0x68}, {0x1E29, 0x327}, -{0x1E2A, 0x48}, {0x1E2A, 0x32E}, {0x1E2B, 0x68}, {0x1E2B, 0x32E}, {0x1E2C, 0x49}, {0x1E2C, 0x330}, {0x1E2D, 0x69}, {0x1E2D, 0x330}, {0x1E2E, 0x49}, {0x1E2E, 0x308}, {0x1E2E, 0x301}, {0x1E2F, 0x69}, -{0x1E2F, 0x308}, {0x1E2F, 0x301}, {0x1E30, 0x4B}, {0x1E30, 0x301}, {0x1E31, 0x6B}, {0x1E31, 0x301}, {0x1E32, 0x4B}, {0x1E32, 0x323}, {0x1E33, 0x6B}, {0x1E33, 0x323}, {0x1E34, 0x4B}, {0x1E34, 0x331}, -{0x1E35, 0x6B}, {0x1E35, 0x331}, {0x1E36, 0x4C}, {0x1E36, 0x323}, {0x1E37, 0x6C}, {0x1E37, 0x323}, {0x1E38, 0x4C}, {0x1E38, 0x323}, {0x1E38, 0x304}, {0x1E39, 0x6C}, {0x1E39, 0x323}, {0x1E39, 0x304}, -{0x1E3A, 0x4C}, {0x1E3A, 0x331}, {0x1E3B, 0x6C}, {0x1E3B, 0x331}, {0x1E3C, 0x4C}, {0x1E3C, 0x32D}, {0x1E3D, 0x6C}, {0x1E3D, 0x32D}, {0x1E3E, 0x4D}, {0x1E3E, 0x301}, {0x1E3F, 0x6D}, {0x1E3F, 0x301}, -{0x1E40, 0x4D}, {0x1E40, 0x307}, {0x1E41, 0x6D}, {0x1E41, 0x307}, {0x1E42, 0x4D}, {0x1E42, 0x323}, {0x1E43, 0x6D}, {0x1E43, 0x323}, {0x1E44, 0x4E}, {0x1E44, 0x307}, {0x1E45, 0x6E}, {0x1E45, 0x307}, -{0x1E46, 0x4E}, {0x1E46, 0x323}, {0x1E47, 0x6E}, {0x1E47, 0x323}, {0x1E48, 0x4E}, {0x1E48, 0x331}, {0x1E49, 0x6E}, {0x1E49, 0x331}, {0x1E4A, 0x4E}, {0x1E4A, 0x32D}, {0x1E4B, 0x6E}, {0x1E4B, 0x32D}, -{0x1E4C, 0x4F}, {0x1E4C, 0x303}, {0x1E4C, 0x301}, {0x1E4D, 0x6F}, {0x1E4D, 0x303}, {0x1E4D, 0x301}, {0x1E4E, 0x4F}, {0x1E4E, 0x303}, {0x1E4E, 0x308}, {0x1E4F, 0x6F}, {0x1E4F, 0x303}, {0x1E4F, 0x308}, -{0x1E50, 0x4F}, {0x1E50, 0x304}, {0x1E50, 0x300}, {0x1E51, 0x6F}, {0x1E51, 0x304}, {0x1E51, 0x300}, {0x1E52, 0x4F}, {0x1E52, 0x304}, {0x1E52, 0x301}, {0x1E53, 0x6F}, {0x1E53, 0x304}, {0x1E53, 0x301}, -{0x1E54, 0x50}, {0x1E54, 0x301}, {0x1E55, 0x70}, {0x1E55, 0x301}, {0x1E56, 0x50}, {0x1E56, 0x307}, {0x1E57, 0x70}, {0x1E57, 0x307}, {0x1E58, 0x52}, {0x1E58, 0x307}, {0x1E59, 0x72}, {0x1E59, 0x307}, -{0x1E5A, 0x52}, {0x1E5A, 0x323}, {0x1E5B, 0x72}, {0x1E5B, 0x323}, {0x1E5C, 0x52}, {0x1E5C, 0x323}, {0x1E5C, 0x304}, {0x1E5D, 0x72}, {0x1E5D, 0x323}, {0x1E5D, 0x304}, {0x1E5E, 0x52}, {0x1E5E, 0x331}, -{0x1E5F, 0x72}, {0x1E5F, 0x331}, {0x1E60, 0x53}, {0x1E60, 0x307}, {0x1E61, 0x73}, {0x1E61, 0x307}, {0x1E62, 0x53}, {0x1E62, 0x323}, {0x1E63, 0x73}, {0x1E63, 0x323}, {0x1E64, 0x53}, {0x1E64, 0x301}, -{0x1E64, 0x307}, {0x1E65, 0x73}, {0x1E65, 0x301}, {0x1E65, 0x307}, {0x1E66, 0x53}, {0x1E66, 0x30C}, {0x1E66, 0x307}, {0x1E67, 0x73}, {0x1E67, 0x30C}, {0x1E67, 0x307}, {0x1E68, 0x53}, {0x1E68, 0x323}, -{0x1E68, 0x307}, {0x1E69, 0x73}, {0x1E69, 0x323}, {0x1E69, 0x307}, {0x1E6A, 0x54}, {0x1E6A, 0x307}, {0x1E6B, 0x74}, {0x1E6B, 0x307}, {0x1E6C, 0x54}, {0x1E6C, 0x323}, {0x1E6D, 0x74}, {0x1E6D, 0x323}, -{0x1E6E, 0x54}, {0x1E6E, 0x331}, {0x1E6F, 0x74}, {0x1E6F, 0x331}, {0x1E70, 0x54}, {0x1E70, 0x32D}, {0x1E71, 0x74}, {0x1E71, 0x32D}, {0x1E72, 0x55}, {0x1E72, 0x324}, {0x1E73, 0x75}, {0x1E73, 0x324}, -{0x1E74, 0x55}, {0x1E74, 0x330}, {0x1E75, 0x75}, {0x1E75, 0x330}, {0x1E76, 0x55}, {0x1E76, 0x32D}, {0x1E77, 0x75}, {0x1E77, 0x32D}, {0x1E78, 0x55}, {0x1E78, 0x303}, {0x1E78, 0x301}, {0x1E79, 0x75}, -{0x1E79, 0x303}, {0x1E79, 0x301}, {0x1E7A, 0x55}, {0x1E7A, 0x304}, {0x1E7A, 0x308}, {0x1E7B, 0x75}, {0x1E7B, 0x304}, {0x1E7B, 0x308}, {0x1E7C, 0x56}, {0x1E7C, 0x303}, {0x1E7D, 0x76}, {0x1E7D, 0x303}, -{0x1E7E, 0x56}, {0x1E7E, 0x323}, {0x1E7F, 0x76}, {0x1E7F, 0x323}, {0x1E80, 0x57}, {0x1E80, 0x300}, {0x1E81, 0x77}, {0x1E81, 0x300}, {0x1E82, 0x57}, {0x1E82, 0x301}, {0x1E83, 0x77}, {0x1E83, 0x301}, -{0x1E84, 0x57}, {0x1E84, 0x308}, {0x1E85, 0x77}, {0x1E85, 0x308}, {0x1E86, 0x57}, {0x1E86, 0x307}, {0x1E87, 0x77}, {0x1E87, 0x307}, {0x1E88, 0x57}, {0x1E88, 0x323}, {0x1E89, 0x77}, {0x1E89, 0x323}, -{0x1E8A, 0x58}, {0x1E8A, 0x307}, {0x1E8B, 0x78}, {0x1E8B, 0x307}, {0x1E8C, 0x58}, {0x1E8C, 0x308}, {0x1E8D, 0x78}, {0x1E8D, 0x308}, {0x1E8E, 0x59}, {0x1E8E, 0x307}, {0x1E8F, 0x79}, {0x1E8F, 0x307}, -{0x1E90, 0x5A}, {0x1E90, 0x302}, {0x1E91, 0x7A}, {0x1E91, 0x302}, {0x1E92, 0x5A}, {0x1E92, 0x323}, {0x1E93, 0x7A}, {0x1E93, 0x323}, {0x1E94, 0x5A}, {0x1E94, 0x331}, {0x1E95, 0x7A}, {0x1E95, 0x331}, -{0x1E96, 0x68}, {0x1E96, 0x331}, {0x1E97, 0x74}, {0x1E97, 0x308}, {0x1E98, 0x77}, {0x1E98, 0x30A}, {0x1E99, 0x79}, {0x1E99, 0x30A}, {0x1E9B, 0x17F}, {0x1E9B, 0x307}, {0x1EA0, 0x41}, {0x1EA0, 0x323}, -{0x1EA1, 0x61}, {0x1EA1, 0x323}, {0x1EA2, 0x41}, {0x1EA2, 0x309}, {0x1EA3, 0x61}, {0x1EA3, 0x309}, {0x1EA4, 0x41}, {0x1EA4, 0x302}, {0x1EA4, 0x301}, {0x1EA5, 0x61}, {0x1EA5, 0x302}, {0x1EA5, 0x301}, -{0x1EA6, 0x41}, {0x1EA6, 0x302}, {0x1EA6, 0x300}, {0x1EA7, 0x61}, {0x1EA7, 0x302}, {0x1EA7, 0x300}, {0x1EA8, 0x41}, {0x1EA8, 0x302}, {0x1EA8, 0x309}, {0x1EA9, 0x61}, {0x1EA9, 0x302}, {0x1EA9, 0x309}, -{0x1EAA, 0x41}, {0x1EAA, 0x302}, {0x1EAA, 0x303}, {0x1EAB, 0x61}, {0x1EAB, 0x302}, {0x1EAB, 0x303}, {0x1EAC, 0x41}, {0x1EAC, 0x323}, {0x1EAC, 0x302}, {0x1EAD, 0x61}, {0x1EAD, 0x323}, {0x1EAD, 0x302}, -{0x1EAE, 0x41}, {0x1EAE, 0x306}, {0x1EAE, 0x301}, {0x1EAF, 0x61}, {0x1EAF, 0x306}, {0x1EAF, 0x301}, {0x1EB0, 0x41}, {0x1EB0, 0x306}, {0x1EB0, 0x300}, {0x1EB1, 0x61}, {0x1EB1, 0x306}, {0x1EB1, 0x300}, -{0x1EB2, 0x41}, {0x1EB2, 0x306}, {0x1EB2, 0x309}, {0x1EB3, 0x61}, {0x1EB3, 0x306}, {0x1EB3, 0x309}, {0x1EB4, 0x41}, {0x1EB4, 0x306}, {0x1EB4, 0x303}, {0x1EB5, 0x61}, {0x1EB5, 0x306}, {0x1EB5, 0x303}, -{0x1EB6, 0x41}, {0x1EB6, 0x323}, {0x1EB6, 0x306}, {0x1EB7, 0x61}, {0x1EB7, 0x323}, {0x1EB7, 0x306}, {0x1EB8, 0x45}, {0x1EB8, 0x323}, {0x1EB9, 0x65}, {0x1EB9, 0x323}, {0x1EBA, 0x45}, {0x1EBA, 0x309}, -{0x1EBB, 0x65}, {0x1EBB, 0x309}, {0x1EBC, 0x45}, {0x1EBC, 0x303}, {0x1EBD, 0x65}, {0x1EBD, 0x303}, {0x1EBE, 0x45}, {0x1EBE, 0x302}, {0x1EBE, 0x301}, {0x1EBF, 0x65}, {0x1EBF, 0x302}, {0x1EBF, 0x301}, -{0x1EC0, 0x45}, {0x1EC0, 0x302}, {0x1EC0, 0x300}, {0x1EC1, 0x65}, {0x1EC1, 0x302}, {0x1EC1, 0x300}, {0x1EC2, 0x45}, {0x1EC2, 0x302}, {0x1EC2, 0x309}, {0x1EC3, 0x65}, {0x1EC3, 0x302}, {0x1EC3, 0x309}, -{0x1EC4, 0x45}, {0x1EC4, 0x302}, {0x1EC4, 0x303}, {0x1EC5, 0x65}, {0x1EC5, 0x302}, {0x1EC5, 0x303}, {0x1EC6, 0x45}, {0x1EC6, 0x323}, {0x1EC6, 0x302}, {0x1EC7, 0x65}, {0x1EC7, 0x323}, {0x1EC7, 0x302}, -{0x1EC8, 0x49}, {0x1EC8, 0x309}, {0x1EC9, 0x69}, {0x1EC9, 0x309}, {0x1ECA, 0x49}, {0x1ECA, 0x323}, {0x1ECB, 0x69}, {0x1ECB, 0x323}, {0x1ECC, 0x4F}, {0x1ECC, 0x323}, {0x1ECD, 0x6F}, {0x1ECD, 0x323}, -{0x1ECE, 0x4F}, {0x1ECE, 0x309}, {0x1ECF, 0x6F}, {0x1ECF, 0x309}, {0x1ED0, 0x4F}, {0x1ED0, 0x302}, {0x1ED0, 0x301}, {0x1ED1, 0x6F}, {0x1ED1, 0x302}, {0x1ED1, 0x301}, {0x1ED2, 0x4F}, {0x1ED2, 0x302}, -{0x1ED2, 0x300}, {0x1ED3, 0x6F}, {0x1ED3, 0x302}, {0x1ED3, 0x300}, {0x1ED4, 0x4F}, {0x1ED4, 0x302}, {0x1ED4, 0x309}, {0x1ED5, 0x6F}, {0x1ED5, 0x302}, {0x1ED5, 0x309}, {0x1ED6, 0x4F}, {0x1ED6, 0x302}, -{0x1ED6, 0x303}, {0x1ED7, 0x6F}, {0x1ED7, 0x302}, {0x1ED7, 0x303}, {0x1ED8, 0x4F}, {0x1ED8, 0x323}, {0x1ED8, 0x302}, {0x1ED9, 0x6F}, {0x1ED9, 0x323}, {0x1ED9, 0x302}, {0x1EDA, 0x4F}, {0x1EDA, 0x31B}, -{0x1EDA, 0x301}, {0x1EDB, 0x6F}, {0x1EDB, 0x31B}, {0x1EDB, 0x301}, {0x1EDC, 0x4F}, {0x1EDC, 0x31B}, {0x1EDC, 0x300}, {0x1EDD, 0x6F}, {0x1EDD, 0x31B}, {0x1EDD, 0x300}, {0x1EDE, 0x4F}, {0x1EDE, 0x31B}, -{0x1EDE, 0x309}, {0x1EDF, 0x6F}, {0x1EDF, 0x31B}, {0x1EDF, 0x309}, {0x1EE0, 0x4F}, {0x1EE0, 0x31B}, {0x1EE0, 0x303}, {0x1EE1, 0x6F}, {0x1EE1, 0x31B}, {0x1EE1, 0x303}, {0x1EE2, 0x4F}, {0x1EE2, 0x31B}, -{0x1EE2, 0x323}, {0x1EE3, 0x6F}, {0x1EE3, 0x31B}, {0x1EE3, 0x323}, {0x1EE4, 0x55}, {0x1EE4, 0x323}, {0x1EE5, 0x75}, {0x1EE5, 0x323}, {0x1EE6, 0x55}, {0x1EE6, 0x309}, {0x1EE7, 0x75}, {0x1EE7, 0x309}, -{0x1EE8, 0x55}, {0x1EE8, 0x31B}, {0x1EE8, 0x301}, {0x1EE9, 0x75}, {0x1EE9, 0x31B}, {0x1EE9, 0x301}, {0x1EEA, 0x55}, {0x1EEA, 0x31B}, {0x1EEA, 0x300}, {0x1EEB, 0x75}, {0x1EEB, 0x31B}, {0x1EEB, 0x300}, -{0x1EEC, 0x55}, {0x1EEC, 0x31B}, {0x1EEC, 0x309}, {0x1EED, 0x75}, {0x1EED, 0x31B}, {0x1EED, 0x309}, {0x1EEE, 0x55}, {0x1EEE, 0x31B}, {0x1EEE, 0x303}, {0x1EEF, 0x75}, {0x1EEF, 0x31B}, {0x1EEF, 0x303}, -{0x1EF0, 0x55}, {0x1EF0, 0x31B}, {0x1EF0, 0x323}, {0x1EF1, 0x75}, {0x1EF1, 0x31B}, {0x1EF1, 0x323}, {0x1EF2, 0x59}, {0x1EF2, 0x300}, {0x1EF3, 0x79}, {0x1EF3, 0x300}, {0x1EF4, 0x59}, {0x1EF4, 0x323}, -{0x1EF5, 0x79}, {0x1EF5, 0x323}, {0x1EF6, 0x59}, {0x1EF6, 0x309}, {0x1EF7, 0x79}, {0x1EF7, 0x309}, {0x1EF8, 0x59}, {0x1EF8, 0x303}, {0x1EF9, 0x79}, {0x1EF9, 0x303}, {0x1F00, 0x3B1}, {0x1F00, 0x313}, -{0x1F01, 0x3B1}, {0x1F01, 0x314}, {0x1F02, 0x3B1}, {0x1F02, 0x313}, {0x1F02, 0x300}, {0x1F03, 0x3B1}, {0x1F03, 0x314}, {0x1F03, 0x300}, {0x1F04, 0x3B1}, {0x1F04, 0x313}, {0x1F04, 0x301}, -{0x1F05, 0x3B1}, {0x1F05, 0x314}, {0x1F05, 0x301}, {0x1F06, 0x3B1}, {0x1F06, 0x313}, {0x1F06, 0x342}, {0x1F07, 0x3B1}, {0x1F07, 0x314}, {0x1F07, 0x342}, {0x1F08, 0x391}, {0x1F08, 0x313}, -{0x1F09, 0x391}, {0x1F09, 0x314}, {0x1F0A, 0x391}, {0x1F0A, 0x313}, {0x1F0A, 0x300}, {0x1F0B, 0x391}, {0x1F0B, 0x314}, {0x1F0B, 0x300}, {0x1F0C, 0x391}, {0x1F0C, 0x313}, {0x1F0C, 0x301}, -{0x1F0D, 0x391}, {0x1F0D, 0x314}, {0x1F0D, 0x301}, {0x1F0E, 0x391}, {0x1F0E, 0x313}, {0x1F0E, 0x342}, {0x1F0F, 0x391}, {0x1F0F, 0x314}, {0x1F0F, 0x342}, {0x1F10, 0x3B5}, {0x1F10, 0x313}, -{0x1F11, 0x3B5}, {0x1F11, 0x314}, {0x1F12, 0x3B5}, {0x1F12, 0x313}, {0x1F12, 0x300}, {0x1F13, 0x3B5}, {0x1F13, 0x314}, {0x1F13, 0x300}, {0x1F14, 0x3B5}, {0x1F14, 0x313}, {0x1F14, 0x301}, -{0x1F15, 0x3B5}, {0x1F15, 0x314}, {0x1F15, 0x301}, {0x1F18, 0x395}, {0x1F18, 0x313}, {0x1F19, 0x395}, {0x1F19, 0x314}, {0x1F1A, 0x395}, {0x1F1A, 0x313}, {0x1F1A, 0x300}, {0x1F1B, 0x395}, -{0x1F1B, 0x314}, {0x1F1B, 0x300}, {0x1F1C, 0x395}, {0x1F1C, 0x313}, {0x1F1C, 0x301}, {0x1F1D, 0x395}, {0x1F1D, 0x314}, {0x1F1D, 0x301}, {0x1F20, 0x3B7}, {0x1F20, 0x313}, {0x1F21, 0x3B7}, -{0x1F21, 0x314}, {0x1F22, 0x3B7}, {0x1F22, 0x313}, {0x1F22, 0x300}, {0x1F23, 0x3B7}, {0x1F23, 0x314}, {0x1F23, 0x300}, {0x1F24, 0x3B7}, {0x1F24, 0x313}, {0x1F24, 0x301}, {0x1F25, 0x3B7}, -{0x1F25, 0x314}, {0x1F25, 0x301}, {0x1F26, 0x3B7}, {0x1F26, 0x313}, {0x1F26, 0x342}, {0x1F27, 0x3B7}, {0x1F27, 0x314}, {0x1F27, 0x342}, {0x1F28, 0x397}, {0x1F28, 0x313}, {0x1F29, 0x397}, -{0x1F29, 0x314}, {0x1F2A, 0x397}, {0x1F2A, 0x313}, {0x1F2A, 0x300}, {0x1F2B, 0x397}, {0x1F2B, 0x314}, {0x1F2B, 0x300}, {0x1F2C, 0x397}, {0x1F2C, 0x313}, {0x1F2C, 0x301}, {0x1F2D, 0x397}, -{0x1F2D, 0x314}, {0x1F2D, 0x301}, {0x1F2E, 0x397}, {0x1F2E, 0x313}, {0x1F2E, 0x342}, {0x1F2F, 0x397}, {0x1F2F, 0x314}, {0x1F2F, 0x342}, {0x1F30, 0x3B9}, {0x1F30, 0x313}, {0x1F31, 0x3B9}, -{0x1F31, 0x314}, {0x1F32, 0x3B9}, {0x1F32, 0x313}, {0x1F32, 0x300}, {0x1F33, 0x3B9}, {0x1F33, 0x314}, {0x1F33, 0x300}, {0x1F34, 0x3B9}, {0x1F34, 0x313}, {0x1F34, 0x301}, {0x1F35, 0x3B9}, -{0x1F35, 0x314}, {0x1F35, 0x301}, {0x1F36, 0x3B9}, {0x1F36, 0x313}, {0x1F36, 0x342}, {0x1F37, 0x3B9}, {0x1F37, 0x314}, {0x1F37, 0x342}, {0x1F38, 0x399}, {0x1F38, 0x313}, {0x1F39, 0x399}, -{0x1F39, 0x314}, {0x1F3A, 0x399}, {0x1F3A, 0x313}, {0x1F3A, 0x300}, {0x1F3B, 0x399}, {0x1F3B, 0x314}, {0x1F3B, 0x300}, {0x1F3C, 0x399}, {0x1F3C, 0x313}, {0x1F3C, 0x301}, {0x1F3D, 0x399}, -{0x1F3D, 0x314}, {0x1F3D, 0x301}, {0x1F3E, 0x399}, {0x1F3E, 0x313}, {0x1F3E, 0x342}, {0x1F3F, 0x399}, {0x1F3F, 0x314}, {0x1F3F, 0x342}, {0x1F40, 0x3BF}, {0x1F40, 0x313}, {0x1F41, 0x3BF}, -{0x1F41, 0x314}, {0x1F42, 0x3BF}, {0x1F42, 0x313}, {0x1F42, 0x300}, {0x1F43, 0x3BF}, {0x1F43, 0x314}, {0x1F43, 0x300}, {0x1F44, 0x3BF}, {0x1F44, 0x313}, {0x1F44, 0x301}, {0x1F45, 0x3BF}, -{0x1F45, 0x314}, {0x1F45, 0x301}, {0x1F48, 0x39F}, {0x1F48, 0x313}, {0x1F49, 0x39F}, {0x1F49, 0x314}, {0x1F4A, 0x39F}, {0x1F4A, 0x313}, {0x1F4A, 0x300}, {0x1F4B, 0x39F}, {0x1F4B, 0x314}, -{0x1F4B, 0x300}, {0x1F4C, 0x39F}, {0x1F4C, 0x313}, {0x1F4C, 0x301}, {0x1F4D, 0x39F}, {0x1F4D, 0x314}, {0x1F4D, 0x301}, {0x1F50, 0x3C5}, {0x1F50, 0x313}, {0x1F51, 0x3C5}, {0x1F51, 0x314}, -{0x1F52, 0x3C5}, {0x1F52, 0x313}, {0x1F52, 0x300}, {0x1F53, 0x3C5}, {0x1F53, 0x314}, {0x1F53, 0x300}, {0x1F54, 0x3C5}, {0x1F54, 0x313}, {0x1F54, 0x301}, {0x1F55, 0x3C5}, {0x1F55, 0x314}, -{0x1F55, 0x301}, {0x1F56, 0x3C5}, {0x1F56, 0x313}, {0x1F56, 0x342}, {0x1F57, 0x3C5}, {0x1F57, 0x314}, {0x1F57, 0x342}, {0x1F59, 0x3A5}, {0x1F59, 0x314}, {0x1F5B, 0x3A5}, {0x1F5B, 0x314}, -{0x1F5B, 0x300}, {0x1F5D, 0x3A5}, {0x1F5D, 0x314}, {0x1F5D, 0x301}, {0x1F5F, 0x3A5}, {0x1F5F, 0x314}, {0x1F5F, 0x342}, {0x1F60, 0x3C9}, {0x1F60, 0x313}, {0x1F61, 0x3C9}, {0x1F61, 0x314}, -{0x1F62, 0x3C9}, {0x1F62, 0x313}, {0x1F62, 0x300}, {0x1F63, 0x3C9}, {0x1F63, 0x314}, {0x1F63, 0x300}, {0x1F64, 0x3C9}, {0x1F64, 0x313}, {0x1F64, 0x301}, {0x1F65, 0x3C9}, {0x1F65, 0x314}, -{0x1F65, 0x301}, {0x1F66, 0x3C9}, {0x1F66, 0x313}, {0x1F66, 0x342}, {0x1F67, 0x3C9}, {0x1F67, 0x314}, {0x1F67, 0x342}, {0x1F68, 0x3A9}, {0x1F68, 0x313}, {0x1F69, 0x3A9}, {0x1F69, 0x314}, -{0x1F6A, 0x3A9}, {0x1F6A, 0x313}, {0x1F6A, 0x300}, {0x1F6B, 0x3A9}, {0x1F6B, 0x314}, {0x1F6B, 0x300}, {0x1F6C, 0x3A9}, {0x1F6C, 0x313}, {0x1F6C, 0x301}, {0x1F6D, 0x3A9}, {0x1F6D, 0x314}, -{0x1F6D, 0x301}, {0x1F6E, 0x3A9}, {0x1F6E, 0x313}, {0x1F6E, 0x342}, {0x1F6F, 0x3A9}, {0x1F6F, 0x314}, {0x1F6F, 0x342}, {0x1F70, 0x3B1}, {0x1F70, 0x300}, {0x1F71, 0x3B1}, {0x1F71, 0x301}, -{0x1F72, 0x3B5}, {0x1F72, 0x300}, {0x1F73, 0x3B5}, {0x1F73, 0x301}, {0x1F74, 0x3B7}, {0x1F74, 0x300}, {0x1F75, 0x3B7}, {0x1F75, 0x301}, {0x1F76, 0x3B9}, {0x1F76, 0x300}, {0x1F77, 0x3B9}, -{0x1F77, 0x301}, {0x1F78, 0x3BF}, {0x1F78, 0x300}, {0x1F79, 0x3BF}, {0x1F79, 0x301}, {0x1F7A, 0x3C5}, {0x1F7A, 0x300}, {0x1F7B, 0x3C5}, {0x1F7B, 0x301}, {0x1F7C, 0x3C9}, {0x1F7C, 0x300}, -{0x1F7D, 0x3C9}, {0x1F7D, 0x301}, {0x1F80, 0x3B1}, {0x1F80, 0x313}, {0x1F80, 0x345}, {0x1F81, 0x3B1}, {0x1F81, 0x314}, {0x1F81, 0x345}, {0x1F82, 0x3B1}, {0x1F82, 0x313}, {0x1F82, 0x300}, -{0x1F82, 0x345}, {0x1F83, 0x3B1}, {0x1F83, 0x314}, {0x1F83, 0x300}, {0x1F83, 0x345}, {0x1F84, 0x3B1}, {0x1F84, 0x313}, {0x1F84, 0x301}, {0x1F84, 0x345}, {0x1F85, 0x3B1}, {0x1F85, 0x314}, -{0x1F85, 0x301}, {0x1F85, 0x345}, {0x1F86, 0x3B1}, {0x1F86, 0x313}, {0x1F86, 0x342}, {0x1F86, 0x345}, {0x1F87, 0x3B1}, {0x1F87, 0x314}, {0x1F87, 0x342}, {0x1F87, 0x345}, {0x1F88, 0x391}, -{0x1F88, 0x313}, {0x1F88, 0x345}, {0x1F89, 0x391}, {0x1F89, 0x314}, {0x1F89, 0x345}, {0x1F8A, 0x391}, {0x1F8A, 0x313}, {0x1F8A, 0x300}, {0x1F8A, 0x345}, {0x1F8B, 0x391}, {0x1F8B, 0x314}, -{0x1F8B, 0x300}, {0x1F8B, 0x345}, {0x1F8C, 0x391}, {0x1F8C, 0x313}, {0x1F8C, 0x301}, {0x1F8C, 0x345}, {0x1F8D, 0x391}, {0x1F8D, 0x314}, {0x1F8D, 0x301}, {0x1F8D, 0x345}, {0x1F8E, 0x391}, -{0x1F8E, 0x313}, {0x1F8E, 0x342}, {0x1F8E, 0x345}, {0x1F8F, 0x391}, {0x1F8F, 0x314}, {0x1F8F, 0x342}, {0x1F8F, 0x345}, {0x1F90, 0x3B7}, {0x1F90, 0x313}, {0x1F90, 0x345}, {0x1F91, 0x3B7}, -{0x1F91, 0x314}, {0x1F91, 0x345}, {0x1F92, 0x3B7}, {0x1F92, 0x313}, {0x1F92, 0x300}, {0x1F92, 0x345}, {0x1F93, 0x3B7}, {0x1F93, 0x314}, {0x1F93, 0x300}, {0x1F93, 0x345}, {0x1F94, 0x3B7}, -{0x1F94, 0x313}, {0x1F94, 0x301}, {0x1F94, 0x345}, {0x1F95, 0x3B7}, {0x1F95, 0x314}, {0x1F95, 0x301}, {0x1F95, 0x345}, {0x1F96, 0x3B7}, {0x1F96, 0x313}, {0x1F96, 0x342}, {0x1F96, 0x345}, -{0x1F97, 0x3B7}, {0x1F97, 0x314}, {0x1F97, 0x342}, {0x1F97, 0x345}, {0x1F98, 0x397}, {0x1F98, 0x313}, {0x1F98, 0x345}, {0x1F99, 0x397}, {0x1F99, 0x314}, {0x1F99, 0x345}, {0x1F9A, 0x397}, -{0x1F9A, 0x313}, {0x1F9A, 0x300}, {0x1F9A, 0x345}, {0x1F9B, 0x397}, {0x1F9B, 0x314}, {0x1F9B, 0x300}, {0x1F9B, 0x345}, {0x1F9C, 0x397}, {0x1F9C, 0x313}, {0x1F9C, 0x301}, {0x1F9C, 0x345}, -{0x1F9D, 0x397}, {0x1F9D, 0x314}, {0x1F9D, 0x301}, {0x1F9D, 0x345}, {0x1F9E, 0x397}, {0x1F9E, 0x313}, {0x1F9E, 0x342}, {0x1F9E, 0x345}, {0x1F9F, 0x397}, {0x1F9F, 0x314}, {0x1F9F, 0x342}, -{0x1F9F, 0x345}, {0x1FA0, 0x3C9}, {0x1FA0, 0x313}, {0x1FA0, 0x345}, {0x1FA1, 0x3C9}, {0x1FA1, 0x314}, {0x1FA1, 0x345}, {0x1FA2, 0x3C9}, {0x1FA2, 0x313}, {0x1FA2, 0x300}, {0x1FA2, 0x345}, -{0x1FA3, 0x3C9}, {0x1FA3, 0x314}, {0x1FA3, 0x300}, {0x1FA3, 0x345}, {0x1FA4, 0x3C9}, {0x1FA4, 0x313}, {0x1FA4, 0x301}, {0x1FA4, 0x345}, {0x1FA5, 0x3C9}, {0x1FA5, 0x314}, {0x1FA5, 0x301}, -{0x1FA5, 0x345}, {0x1FA6, 0x3C9}, {0x1FA6, 0x313}, {0x1FA6, 0x342}, {0x1FA6, 0x345}, {0x1FA7, 0x3C9}, {0x1FA7, 0x314}, {0x1FA7, 0x342}, {0x1FA7, 0x345}, {0x1FA8, 0x3A9}, {0x1FA8, 0x313}, -{0x1FA8, 0x345}, {0x1FA9, 0x3A9}, {0x1FA9, 0x314}, {0x1FA9, 0x345}, {0x1FAA, 0x3A9}, {0x1FAA, 0x313}, {0x1FAA, 0x300}, {0x1FAA, 0x345}, {0x1FAB, 0x3A9}, {0x1FAB, 0x314}, {0x1FAB, 0x300}, -{0x1FAB, 0x345}, {0x1FAC, 0x3A9}, {0x1FAC, 0x313}, {0x1FAC, 0x301}, {0x1FAC, 0x345}, {0x1FAD, 0x3A9}, {0x1FAD, 0x314}, {0x1FAD, 0x301}, {0x1FAD, 0x345}, {0x1FAE, 0x3A9}, {0x1FAE, 0x313}, -{0x1FAE, 0x342}, {0x1FAE, 0x345}, {0x1FAF, 0x3A9}, {0x1FAF, 0x314}, {0x1FAF, 0x342}, {0x1FAF, 0x345}, {0x1FB0, 0x3B1}, {0x1FB0, 0x306}, {0x1FB1, 0x3B1}, {0x1FB1, 0x304}, {0x1FB2, 0x3B1}, -{0x1FB2, 0x300}, {0x1FB2, 0x345}, {0x1FB3, 0x3B1}, {0x1FB3, 0x345}, {0x1FB4, 0x3B1}, {0x1FB4, 0x301}, {0x1FB4, 0x345}, {0x1FB6, 0x3B1}, {0x1FB6, 0x342}, {0x1FB7, 0x3B1}, {0x1FB7, 0x342}, -{0x1FB7, 0x345}, {0x1FB8, 0x391}, {0x1FB8, 0x306}, {0x1FB9, 0x391}, {0x1FB9, 0x304}, {0x1FBA, 0x391}, {0x1FBA, 0x300}, {0x1FBB, 0x391}, {0x1FBB, 0x301}, {0x1FBC, 0x391}, {0x1FBC, 0x345}, -{0x1FBE, 0x3B9}, {0x1FC1, 0xA8}, {0x1FC1, 0x342}, {0x1FC2, 0x3B7}, {0x1FC2, 0x300}, {0x1FC2, 0x345}, {0x1FC3, 0x3B7}, {0x1FC3, 0x345}, {0x1FC4, 0x3B7}, {0x1FC4, 0x301}, {0x1FC4, 0x345}, -{0x1FC6, 0x3B7}, {0x1FC6, 0x342}, {0x1FC7, 0x3B7}, {0x1FC7, 0x342}, {0x1FC7, 0x345}, {0x1FC8, 0x395}, {0x1FC8, 0x300}, {0x1FC9, 0x395}, {0x1FC9, 0x301}, {0x1FCA, 0x397}, {0x1FCA, 0x300}, -{0x1FCB, 0x397}, {0x1FCB, 0x301}, {0x1FCC, 0x397}, {0x1FCC, 0x345}, {0x1FCD, 0x1FBF}, {0x1FCD, 0x300}, {0x1FCE, 0x1FBF}, {0x1FCE, 0x301}, {0x1FCF, 0x1FBF}, {0x1FCF, 0x342}, {0x1FD0, 0x3B9}, -{0x1FD0, 0x306}, {0x1FD1, 0x3B9}, {0x1FD1, 0x304}, {0x1FD2, 0x3B9}, {0x1FD2, 0x308}, {0x1FD2, 0x300}, {0x1FD3, 0x3B9}, {0x1FD3, 0x308}, {0x1FD3, 0x301}, {0x1FD6, 0x3B9}, {0x1FD6, 0x342}, -{0x1FD7, 0x3B9}, {0x1FD7, 0x308}, {0x1FD7, 0x342}, {0x1FD8, 0x399}, {0x1FD8, 0x306}, {0x1FD9, 0x399}, {0x1FD9, 0x304}, {0x1FDA, 0x399}, {0x1FDA, 0x300}, {0x1FDB, 0x399}, {0x1FDB, 0x301}, -{0x1FDD, 0x1FFE}, {0x1FDD, 0x300}, {0x1FDE, 0x1FFE}, {0x1FDE, 0x301}, {0x1FDF, 0x1FFE}, {0x1FDF, 0x342}, {0x1FE0, 0x3C5}, {0x1FE0, 0x306}, {0x1FE1, 0x3C5}, {0x1FE1, 0x304}, {0x1FE2, 0x3C5}, -{0x1FE2, 0x308}, {0x1FE2, 0x300}, {0x1FE3, 0x3C5}, {0x1FE3, 0x308}, {0x1FE3, 0x301}, {0x1FE4, 0x3C1}, {0x1FE4, 0x313}, {0x1FE5, 0x3C1}, {0x1FE5, 0x314}, {0x1FE6, 0x3C5}, {0x1FE6, 0x342}, -{0x1FE7, 0x3C5}, {0x1FE7, 0x308}, {0x1FE7, 0x342}, {0x1FE8, 0x3A5}, {0x1FE8, 0x306}, {0x1FE9, 0x3A5}, {0x1FE9, 0x304}, {0x1FEA, 0x3A5}, {0x1FEA, 0x300}, {0x1FEB, 0x3A5}, {0x1FEB, 0x301}, -{0x1FEC, 0x3A1}, {0x1FEC, 0x314}, {0x1FED, 0xA8}, {0x1FED, 0x300}, {0x1FEE, 0xA8}, {0x1FEE, 0x301}, {0x1FEF, 0x60}, {0x1FF2, 0x3C9}, {0x1FF2, 0x300}, {0x1FF2, 0x345}, {0x1FF3, 0x3C9}, {0x1FF3, 0x345}, -{0x1FF4, 0x3C9}, {0x1FF4, 0x301}, {0x1FF4, 0x345}, {0x1FF6, 0x3C9}, {0x1FF6, 0x342}, {0x1FF7, 0x3C9}, {0x1FF7, 0x342}, {0x1FF7, 0x345}, {0x1FF8, 0x39F}, {0x1FF8, 0x300}, {0x1FF9, 0x39F}, -{0x1FF9, 0x301}, {0x1FFA, 0x3A9}, {0x1FFA, 0x300}, {0x1FFB, 0x3A9}, {0x1FFB, 0x301}, {0x1FFC, 0x3A9}, {0x1FFC, 0x345}, {0x1FFD, 0xB4}, {0x2000, 0x2002}, {0x2001, 0x2003}, {0x2126, 0x3A9}, -{0x212A, 0x4B}, {0x212B, 0x41}, {0x212B, 0x30A}, {0x219A, 0x2190}, {0x219A, 0x338}, {0x219B, 0x2192}, {0x219B, 0x338}, {0x21AE, 0x2194}, {0x21AE, 0x338}, {0x21CD, 0x21D0}, {0x21CD, 0x338}, -{0x21CE, 0x21D4}, {0x21CE, 0x338}, {0x21CF, 0x21D2}, {0x21CF, 0x338}, {0x2204, 0x2203}, {0x2204, 0x338}, {0x2209, 0x2208}, {0x2209, 0x338}, {0x220C, 0x220B}, {0x220C, 0x338}, {0x2224, 0x2223}, -{0x2224, 0x338}, {0x2226, 0x2225}, {0x2226, 0x338}, {0x2241, 0x223C}, {0x2241, 0x338}, {0x2244, 0x2243}, {0x2244, 0x338}, {0x2247, 0x2245}, {0x2247, 0x338}, {0x2249, 0x2248}, {0x2249, 0x338}, -{0x2260, 0x3D}, {0x2260, 0x338}, {0x2262, 0x2261}, {0x2262, 0x338}, {0x226D, 0x224D}, {0x226D, 0x338}, {0x226E, 0x3C}, {0x226E, 0x338}, {0x226F, 0x3E}, {0x226F, 0x338}, {0x2270, 0x2264}, -{0x2270, 0x338}, {0x2271, 0x2265}, {0x2271, 0x338}, {0x2274, 0x2272}, {0x2274, 0x338}, {0x2275, 0x2273}, {0x2275, 0x338}, {0x2278, 0x2276}, {0x2278, 0x338}, {0x2279, 0x2277}, {0x2279, 0x338}, -{0x2280, 0x227A}, {0x2280, 0x338}, {0x2281, 0x227B}, {0x2281, 0x338}, {0x2284, 0x2282}, {0x2284, 0x338}, {0x2285, 0x2283}, {0x2285, 0x338}, {0x2288, 0x2286}, {0x2288, 0x338}, {0x2289, 0x2287}, -{0x2289, 0x338}, {0x22AC, 0x22A2}, {0x22AC, 0x338}, {0x22AD, 0x22A8}, {0x22AD, 0x338}, {0x22AE, 0x22A9}, {0x22AE, 0x338}, {0x22AF, 0x22AB}, {0x22AF, 0x338}, {0x22E0, 0x227C}, {0x22E0, 0x338}, -{0x22E1, 0x227D}, {0x22E1, 0x338}, {0x22E2, 0x2291}, {0x22E2, 0x338}, {0x22E3, 0x2292}, {0x22E3, 0x338}, {0x22EA, 0x22B2}, {0x22EA, 0x338}, {0x22EB, 0x22B3}, {0x22EB, 0x338}, {0x22EC, 0x22B4}, -{0x22EC, 0x338}, {0x22ED, 0x22B5}, {0x22ED, 0x338}, {0x2329, 0x3008}, {0x232A, 0x3009}, {0x2ADC, 0x2ADD}, {0x2ADC, 0x338}, {0x304C, 0x304B}, {0x304C, 0x3099}, {0x304E, 0x304D}, {0x304E, 0x3099}, -{0x3050, 0x304F}, {0x3050, 0x3099}, {0x3052, 0x3051}, {0x3052, 0x3099}, {0x3054, 0x3053}, {0x3054, 0x3099}, {0x3056, 0x3055}, {0x3056, 0x3099}, {0x3058, 0x3057}, {0x3058, 0x3099}, {0x305A, 0x3059}, -{0x305A, 0x3099}, {0x305C, 0x305B}, {0x305C, 0x3099}, {0x305E, 0x305D}, {0x305E, 0x3099}, {0x3060, 0x305F}, {0x3060, 0x3099}, {0x3062, 0x3061}, {0x3062, 0x3099}, {0x3065, 0x3064}, {0x3065, 0x3099}, -{0x3067, 0x3066}, {0x3067, 0x3099}, {0x3069, 0x3068}, {0x3069, 0x3099}, {0x3070, 0x306F}, {0x3070, 0x3099}, {0x3071, 0x306F}, {0x3071, 0x309A}, {0x3073, 0x3072}, {0x3073, 0x3099}, {0x3074, 0x3072}, -{0x3074, 0x309A}, {0x3076, 0x3075}, {0x3076, 0x3099}, {0x3077, 0x3075}, {0x3077, 0x309A}, {0x3079, 0x3078}, {0x3079, 0x3099}, {0x307A, 0x3078}, {0x307A, 0x309A}, {0x307C, 0x307B}, {0x307C, 0x3099}, -{0x307D, 0x307B}, {0x307D, 0x309A}, {0x3094, 0x3046}, {0x3094, 0x3099}, {0x309E, 0x309D}, {0x309E, 0x3099}, {0x30AC, 0x30AB}, {0x30AC, 0x3099}, {0x30AE, 0x30AD}, {0x30AE, 0x3099}, {0x30B0, 0x30AF}, -{0x30B0, 0x3099}, {0x30B2, 0x30B1}, {0x30B2, 0x3099}, {0x30B4, 0x30B3}, {0x30B4, 0x3099}, {0x30B6, 0x30B5}, {0x30B6, 0x3099}, {0x30B8, 0x30B7}, {0x30B8, 0x3099}, {0x30BA, 0x30B9}, {0x30BA, 0x3099}, -{0x30BC, 0x30BB}, {0x30BC, 0x3099}, {0x30BE, 0x30BD}, {0x30BE, 0x3099}, {0x30C0, 0x30BF}, {0x30C0, 0x3099}, {0x30C2, 0x30C1}, {0x30C2, 0x3099}, {0x30C5, 0x30C4}, {0x30C5, 0x3099}, {0x30C7, 0x30C6}, -{0x30C7, 0x3099}, {0x30C9, 0x30C8}, {0x30C9, 0x3099}, {0x30D0, 0x30CF}, {0x30D0, 0x3099}, {0x30D1, 0x30CF}, {0x30D1, 0x309A}, {0x30D3, 0x30D2}, {0x30D3, 0x3099}, {0x30D4, 0x30D2}, {0x30D4, 0x309A}, -{0x30D6, 0x30D5}, {0x30D6, 0x3099}, {0x30D7, 0x30D5}, {0x30D7, 0x309A}, {0x30D9, 0x30D8}, {0x30D9, 0x3099}, {0x30DA, 0x30D8}, {0x30DA, 0x309A}, {0x30DC, 0x30DB}, {0x30DC, 0x3099}, {0x30DD, 0x30DB}, -{0x30DD, 0x309A}, {0x30F4, 0x30A6}, {0x30F4, 0x3099}, {0x30F7, 0x30EF}, {0x30F7, 0x3099}, {0x30F8, 0x30F0}, {0x30F8, 0x3099}, {0x30F9, 0x30F1}, {0x30F9, 0x3099}, {0x30FA, 0x30F2}, {0x30FA, 0x3099}, -{0x30FE, 0x30FD}, {0x30FE, 0x3099}, {0xF900, 0x8C48}, {0xF901, 0x66F4}, {0xF902, 0x8ECA}, {0xF903, 0x8CC8}, {0xF904, 0x6ED1}, {0xF905, 0x4E32}, {0xF906, 0x53E5}, {0xF907, 0x9F9C}, {0xF908, 0x9F9C}, -{0xF909, 0x5951}, {0xF90A, 0x91D1}, {0xF90B, 0x5587}, {0xF90C, 0x5948}, {0xF90D, 0x61F6}, {0xF90E, 0x7669}, {0xF90F, 0x7F85}, {0xF910, 0x863F}, {0xF911, 0x87BA}, {0xF912, 0x88F8}, {0xF913, 0x908F}, -{0xF914, 0x6A02}, {0xF915, 0x6D1B}, {0xF916, 0x70D9}, {0xF917, 0x73DE}, {0xF918, 0x843D}, {0xF919, 0x916A}, {0xF91A, 0x99F1}, {0xF91B, 0x4E82}, {0xF91C, 0x5375}, {0xF91D, 0x6B04}, {0xF91E, 0x721B}, -{0xF91F, 0x862D}, {0xF920, 0x9E1E}, {0xF921, 0x5D50}, {0xF922, 0x6FEB}, {0xF923, 0x85CD}, {0xF924, 0x8964}, {0xF925, 0x62C9}, {0xF926, 0x81D8}, {0xF927, 0x881F}, {0xF928, 0x5ECA}, {0xF929, 0x6717}, -{0xF92A, 0x6D6A}, {0xF92B, 0x72FC}, {0xF92C, 0x90CE}, {0xF92D, 0x4F86}, {0xF92E, 0x51B7}, {0xF92F, 0x52DE}, {0xF930, 0x64C4}, {0xF931, 0x6AD3}, {0xF932, 0x7210}, {0xF933, 0x76E7}, {0xF934, 0x8001}, -{0xF935, 0x8606}, {0xF936, 0x865C}, {0xF937, 0x8DEF}, {0xF938, 0x9732}, {0xF939, 0x9B6F}, {0xF93A, 0x9DFA}, {0xF93B, 0x788C}, {0xF93C, 0x797F}, {0xF93D, 0x7DA0}, {0xF93E, 0x83C9}, {0xF93F, 0x9304}, -{0xF940, 0x9E7F}, {0xF941, 0x8AD6}, {0xF942, 0x58DF}, {0xF943, 0x5F04}, {0xF944, 0x7C60}, {0xF945, 0x807E}, {0xF946, 0x7262}, {0xF947, 0x78CA}, {0xF948, 0x8CC2}, {0xF949, 0x96F7}, {0xF94A, 0x58D8}, -{0xF94B, 0x5C62}, {0xF94C, 0x6A13}, {0xF94D, 0x6DDA}, {0xF94E, 0x6F0F}, {0xF94F, 0x7D2F}, {0xF950, 0x7E37}, {0xF951, 0x964B}, {0xF952, 0x52D2}, {0xF953, 0x808B}, {0xF954, 0x51DC}, {0xF955, 0x51CC}, -{0xF956, 0x7A1C}, {0xF957, 0x7DBE}, {0xF958, 0x83F1}, {0xF959, 0x9675}, {0xF95A, 0x8B80}, {0xF95B, 0x62CF}, {0xF95C, 0x6A02}, {0xF95D, 0x8AFE}, {0xF95E, 0x4E39}, {0xF95F, 0x5BE7}, {0xF960, 0x6012}, -{0xF961, 0x7387}, {0xF962, 0x7570}, {0xF963, 0x5317}, {0xF964, 0x78FB}, {0xF965, 0x4FBF}, {0xF966, 0x5FA9}, {0xF967, 0x4E0D}, {0xF968, 0x6CCC}, {0xF969, 0x6578}, {0xF96A, 0x7D22}, {0xF96B, 0x53C3}, -{0xF96C, 0x585E}, {0xF96D, 0x7701}, {0xF96E, 0x8449}, {0xF96F, 0x8AAA}, {0xF970, 0x6BBA}, {0xF971, 0x8FB0}, {0xF972, 0x6C88}, {0xF973, 0x62FE}, {0xF974, 0x82E5}, {0xF975, 0x63A0}, {0xF976, 0x7565}, -{0xF977, 0x4EAE}, {0xF978, 0x5169}, {0xF979, 0x51C9}, {0xF97A, 0x6881}, {0xF97B, 0x7CE7}, {0xF97C, 0x826F}, {0xF97D, 0x8AD2}, {0xF97E, 0x91CF}, {0xF97F, 0x52F5}, {0xF980, 0x5442}, {0xF981, 0x5973}, -{0xF982, 0x5EEC}, {0xF983, 0x65C5}, {0xF984, 0x6FFE}, {0xF985, 0x792A}, {0xF986, 0x95AD}, {0xF987, 0x9A6A}, {0xF988, 0x9E97}, {0xF989, 0x9ECE}, {0xF98A, 0x529B}, {0xF98B, 0x66C6}, {0xF98C, 0x6B77}, -{0xF98D, 0x8F62}, {0xF98E, 0x5E74}, {0xF98F, 0x6190}, {0xF990, 0x6200}, {0xF991, 0x649A}, {0xF992, 0x6F23}, {0xF993, 0x7149}, {0xF994, 0x7489}, {0xF995, 0x79CA}, {0xF996, 0x7DF4}, {0xF997, 0x806F}, -{0xF998, 0x8F26}, {0xF999, 0x84EE}, {0xF99A, 0x9023}, {0xF99B, 0x934A}, {0xF99C, 0x5217}, {0xF99D, 0x52A3}, {0xF99E, 0x54BD}, {0xF99F, 0x70C8}, {0xF9A0, 0x88C2}, {0xF9A1, 0x8AAA}, {0xF9A2, 0x5EC9}, -{0xF9A3, 0x5FF5}, {0xF9A4, 0x637B}, {0xF9A5, 0x6BAE}, {0xF9A6, 0x7C3E}, {0xF9A7, 0x7375}, {0xF9A8, 0x4EE4}, {0xF9A9, 0x56F9}, {0xF9AA, 0x5BE7}, {0xF9AB, 0x5DBA}, {0xF9AC, 0x601C}, {0xF9AD, 0x73B2}, -{0xF9AE, 0x7469}, {0xF9AF, 0x7F9A}, {0xF9B0, 0x8046}, {0xF9B1, 0x9234}, {0xF9B2, 0x96F6}, {0xF9B3, 0x9748}, {0xF9B4, 0x9818}, {0xF9B5, 0x4F8B}, {0xF9B6, 0x79AE}, {0xF9B7, 0x91B4}, {0xF9B8, 0x96B8}, -{0xF9B9, 0x60E1}, {0xF9BA, 0x4E86}, {0xF9BB, 0x50DA}, {0xF9BC, 0x5BEE}, {0xF9BD, 0x5C3F}, {0xF9BE, 0x6599}, {0xF9BF, 0x6A02}, {0xF9C0, 0x71CE}, {0xF9C1, 0x7642}, {0xF9C2, 0x84FC}, {0xF9C3, 0x907C}, -{0xF9C4, 0x9F8D}, {0xF9C5, 0x6688}, {0xF9C6, 0x962E}, {0xF9C7, 0x5289}, {0xF9C8, 0x677B}, {0xF9C9, 0x67F3}, {0xF9CA, 0x6D41}, {0xF9CB, 0x6E9C}, {0xF9CC, 0x7409}, {0xF9CD, 0x7559}, {0xF9CE, 0x786B}, -{0xF9CF, 0x7D10}, {0xF9D0, 0x985E}, {0xF9D1, 0x516D}, {0xF9D2, 0x622E}, {0xF9D3, 0x9678}, {0xF9D4, 0x502B}, {0xF9D5, 0x5D19}, {0xF9D6, 0x6DEA}, {0xF9D7, 0x8F2A}, {0xF9D8, 0x5F8B}, {0xF9D9, 0x6144}, -{0xF9DA, 0x6817}, {0xF9DB, 0x7387}, {0xF9DC, 0x9686}, {0xF9DD, 0x5229}, {0xF9DE, 0x540F}, {0xF9DF, 0x5C65}, {0xF9E0, 0x6613}, {0xF9E1, 0x674E}, {0xF9E2, 0x68A8}, {0xF9E3, 0x6CE5}, {0xF9E4, 0x7406}, -{0xF9E5, 0x75E2}, {0xF9E6, 0x7F79}, {0xF9E7, 0x88CF}, {0xF9E8, 0x88E1}, {0xF9E9, 0x91CC}, {0xF9EA, 0x96E2}, {0xF9EB, 0x533F}, {0xF9EC, 0x6EBA}, {0xF9ED, 0x541D}, {0xF9EE, 0x71D0}, {0xF9EF, 0x7498}, -{0xF9F0, 0x85FA}, {0xF9F1, 0x96A3}, {0xF9F2, 0x9C57}, {0xF9F3, 0x9E9F}, {0xF9F4, 0x6797}, {0xF9F5, 0x6DCB}, {0xF9F6, 0x81E8}, {0xF9F7, 0x7ACB}, {0xF9F8, 0x7B20}, {0xF9F9, 0x7C92}, {0xF9FA, 0x72C0}, -{0xF9FB, 0x7099}, {0xF9FC, 0x8B58}, {0xF9FD, 0x4EC0}, {0xF9FE, 0x8336}, {0xF9FF, 0x523A}, {0xFA00, 0x5207}, {0xFA01, 0x5EA6}, {0xFA02, 0x62D3}, {0xFA03, 0x7CD6}, {0xFA04, 0x5B85}, {0xFA05, 0x6D1E}, -{0xFA06, 0x66B4}, {0xFA07, 0x8F3B}, {0xFA08, 0x884C}, {0xFA09, 0x964D}, {0xFA0A, 0x898B}, {0xFA0B, 0x5ED3}, {0xFA0C, 0x5140}, {0xFA0D, 0x55C0}, {0xFA10, 0x585A}, {0xFA12, 0x6674}, {0xFA15, 0x51DE}, -{0xFA16, 0x732A}, {0xFA17, 0x76CA}, {0xFA18, 0x793C}, {0xFA19, 0x795E}, {0xFA1A, 0x7965}, {0xFA1B, 0x798F}, {0xFA1C, 0x9756}, {0xFA1D, 0x7CBE}, {0xFA1E, 0x7FBD}, {0xFA20, 0x8612}, {0xFA22, 0x8AF8}, -{0xFA25, 0x9038}, {0xFA26, 0x90FD}, {0xFA2A, 0x98EF}, {0xFA2B, 0x98FC}, {0xFA2C, 0x9928}, {0xFA2D, 0x9DB4}, {0xFA2E, 0x90DE}, {0xFA2F, 0x96B7}, {0xFA30, 0x4FAE}, {0xFA31, 0x50E7}, {0xFA32, 0x514D}, -{0xFA33, 0x52C9}, {0xFA34, 0x52E4}, {0xFA35, 0x5351}, {0xFA36, 0x559D}, {0xFA37, 0x5606}, {0xFA38, 0x5668}, {0xFA39, 0x5840}, {0xFA3A, 0x58A8}, {0xFA3B, 0x5C64}, {0xFA3C, 0x5C6E}, {0xFA3D, 0x6094}, -{0xFA3E, 0x6168}, {0xFA3F, 0x618E}, {0xFA40, 0x61F2}, {0xFA41, 0x654F}, {0xFA42, 0x65E2}, {0xFA43, 0x6691}, {0xFA44, 0x6885}, {0xFA45, 0x6D77}, {0xFA46, 0x6E1A}, {0xFA47, 0x6F22}, {0xFA48, 0x716E}, -{0xFA49, 0x722B}, {0xFA4A, 0x7422}, {0xFA4B, 0x7891}, {0xFA4C, 0x793E}, {0xFA4D, 0x7949}, {0xFA4E, 0x7948}, {0xFA4F, 0x7950}, {0xFA50, 0x7956}, {0xFA51, 0x795D}, {0xFA52, 0x798D}, {0xFA53, 0x798E}, -{0xFA54, 0x7A40}, {0xFA55, 0x7A81}, {0xFA56, 0x7BC0}, {0xFA57, 0x7DF4}, {0xFA58, 0x7E09}, {0xFA59, 0x7E41}, {0xFA5A, 0x7F72}, {0xFA5B, 0x8005}, {0xFA5C, 0x81ED}, {0xFA5D, 0x8279}, {0xFA5E, 0x8279}, -{0xFA5F, 0x8457}, {0xFA60, 0x8910}, {0xFA61, 0x8996}, {0xFA62, 0x8B01}, {0xFA63, 0x8B39}, {0xFA64, 0x8CD3}, {0xFA65, 0x8D08}, {0xFA66, 0x8FB6}, {0xFA67, 0x9038}, {0xFA68, 0x96E3}, {0xFA69, 0x97FF}, -{0xFA6A, 0x983B}, {0xFA6B, 0x6075}, {0xFA6C, 0x242EE}, {0xFA6D, 0x8218}, {0xFA70, 0x4E26}, {0xFA71, 0x51B5}, {0xFA72, 0x5168}, {0xFA73, 0x4F80}, {0xFA74, 0x5145}, {0xFA75, 0x5180}, {0xFA76, 0x52C7}, -{0xFA77, 0x52FA}, {0xFA78, 0x559D}, {0xFA79, 0x5555}, {0xFA7A, 0x5599}, {0xFA7B, 0x55E2}, {0xFA7C, 0x585A}, {0xFA7D, 0x58B3}, {0xFA7E, 0x5944}, {0xFA7F, 0x5954}, {0xFA80, 0x5A62}, {0xFA81, 0x5B28}, -{0xFA82, 0x5ED2}, {0xFA83, 0x5ED9}, {0xFA84, 0x5F69}, {0xFA85, 0x5FAD}, {0xFA86, 0x60D8}, {0xFA87, 0x614E}, {0xFA88, 0x6108}, {0xFA89, 0x618E}, {0xFA8A, 0x6160}, {0xFA8B, 0x61F2}, {0xFA8C, 0x6234}, -{0xFA8D, 0x63C4}, {0xFA8E, 0x641C}, {0xFA8F, 0x6452}, {0xFA90, 0x6556}, {0xFA91, 0x6674}, {0xFA92, 0x6717}, {0xFA93, 0x671B}, {0xFA94, 0x6756}, {0xFA95, 0x6B79}, {0xFA96, 0x6BBA}, {0xFA97, 0x6D41}, -{0xFA98, 0x6EDB}, {0xFA99, 0x6ECB}, {0xFA9A, 0x6F22}, {0xFA9B, 0x701E}, {0xFA9C, 0x716E}, {0xFA9D, 0x77A7}, {0xFA9E, 0x7235}, {0xFA9F, 0x72AF}, {0xFAA0, 0x732A}, {0xFAA1, 0x7471}, {0xFAA2, 0x7506}, -{0xFAA3, 0x753B}, {0xFAA4, 0x761D}, {0xFAA5, 0x761F}, {0xFAA6, 0x76CA}, {0xFAA7, 0x76DB}, {0xFAA8, 0x76F4}, {0xFAA9, 0x774A}, {0xFAAA, 0x7740}, {0xFAAB, 0x78CC}, {0xFAAC, 0x7AB1}, {0xFAAD, 0x7BC0}, -{0xFAAE, 0x7C7B}, {0xFAAF, 0x7D5B}, {0xFAB0, 0x7DF4}, {0xFAB1, 0x7F3E}, {0xFAB2, 0x8005}, {0xFAB3, 0x8352}, {0xFAB4, 0x83EF}, {0xFAB5, 0x8779}, {0xFAB6, 0x8941}, {0xFAB7, 0x8986}, {0xFAB8, 0x8996}, -{0xFAB9, 0x8ABF}, {0xFABA, 0x8AF8}, {0xFABB, 0x8ACB}, {0xFABC, 0x8B01}, {0xFABD, 0x8AFE}, {0xFABE, 0x8AED}, {0xFABF, 0x8B39}, {0xFAC0, 0x8B8A}, {0xFAC1, 0x8D08}, {0xFAC2, 0x8F38}, {0xFAC3, 0x9072}, -{0xFAC4, 0x9199}, {0xFAC5, 0x9276}, {0xFAC6, 0x967C}, {0xFAC7, 0x96E3}, {0xFAC8, 0x9756}, {0xFAC9, 0x97DB}, {0xFACA, 0x97FF}, {0xFACB, 0x980B}, {0xFACC, 0x983B}, {0xFACD, 0x9B12}, {0xFACE, 0x9F9C}, -{0xFACF, 0x2284A}, {0xFAD0, 0x22844}, {0xFAD1, 0x233D5}, {0xFAD2, 0x3B9D}, {0xFAD3, 0x4018}, {0xFAD4, 0x4039}, {0xFAD5, 0x25249}, {0xFAD6, 0x25CD0}, {0xFAD7, 0x27ED3}, {0xFAD8, 0x9F43}, -{0xFAD9, 0x9F8E}, {0xFB1D, 0x5D9}, {0xFB1D, 0x5B4}, {0xFB1F, 0x5F2}, {0xFB1F, 0x5B7}, {0xFB2A, 0x5E9}, {0xFB2A, 0x5C1}, {0xFB2B, 0x5E9}, {0xFB2B, 0x5C2}, {0xFB2C, 0x5E9}, {0xFB2C, 0x5BC}, -{0xFB2C, 0x5C1}, {0xFB2D, 0x5E9}, {0xFB2D, 0x5BC}, {0xFB2D, 0x5C2}, {0xFB2E, 0x5D0}, {0xFB2E, 0x5B7}, {0xFB2F, 0x5D0}, {0xFB2F, 0x5B8}, {0xFB30, 0x5D0}, {0xFB30, 0x5BC}, {0xFB31, 0x5D1}, -{0xFB31, 0x5BC}, {0xFB32, 0x5D2}, {0xFB32, 0x5BC}, {0xFB33, 0x5D3}, {0xFB33, 0x5BC}, {0xFB34, 0x5D4}, {0xFB34, 0x5BC}, {0xFB35, 0x5D5}, {0xFB35, 0x5BC}, {0xFB36, 0x5D6}, {0xFB36, 0x5BC}, -{0xFB38, 0x5D8}, {0xFB38, 0x5BC}, {0xFB39, 0x5D9}, {0xFB39, 0x5BC}, {0xFB3A, 0x5DA}, {0xFB3A, 0x5BC}, {0xFB3B, 0x5DB}, {0xFB3B, 0x5BC}, {0xFB3C, 0x5DC}, {0xFB3C, 0x5BC}, {0xFB3E, 0x5DE}, -{0xFB3E, 0x5BC}, {0xFB40, 0x5E0}, {0xFB40, 0x5BC}, {0xFB41, 0x5E1}, {0xFB41, 0x5BC}, {0xFB43, 0x5E3}, {0xFB43, 0x5BC}, {0xFB44, 0x5E4}, {0xFB44, 0x5BC}, {0xFB46, 0x5E6}, {0xFB46, 0x5BC}, -{0xFB47, 0x5E7}, {0xFB47, 0x5BC}, {0xFB48, 0x5E8}, {0xFB48, 0x5BC}, {0xFB49, 0x5E9}, {0xFB49, 0x5BC}, {0xFB4A, 0x5EA}, {0xFB4A, 0x5BC}, {0xFB4B, 0x5D5}, {0xFB4B, 0x5B9}, {0xFB4C, 0x5D1}, -{0xFB4C, 0x5BF}, {0xFB4D, 0x5DB}, {0xFB4D, 0x5BF}, {0xFB4E, 0x5E4}, {0xFB4E, 0x5BF}, {0x1109A, 0x11099}, {0x1109A, 0x110BA}, {0x1109C, 0x1109B}, {0x1109C, 0x110BA}, {0x110AB, 0x110A5}, -{0x110AB, 0x110BA}, {0x1112E, 0x11131}, {0x1112E, 0x11127}, {0x1112F, 0x11132}, {0x1112F, 0x11127}, {0x1134B, 0x11347}, {0x1134B, 0x1133E}, {0x1134C, 0x11347}, {0x1134C, 0x11357}, {0x114BB, 0x114B9}, -{0x114BB, 0x114BA}, {0x114BC, 0x114B9}, {0x114BC, 0x114B0}, {0x114BE, 0x114B9}, {0x114BE, 0x114BD}, {0x115BA, 0x115B8}, {0x115BA, 0x115AF}, {0x115BB, 0x115B9}, {0x115BB, 0x115AF}, {0x1D15E, 0x1D157}, -{0x1D15E, 0x1D165}, {0x1D15F, 0x1D158}, {0x1D15F, 0x1D165}, {0x1D160, 0x1D158}, {0x1D160, 0x1D165}, {0x1D160, 0x1D16E}, {0x1D161, 0x1D158}, {0x1D161, 0x1D165}, {0x1D161, 0x1D16F}, {0x1D162, 0x1D158}, -{0x1D162, 0x1D165}, {0x1D162, 0x1D170}, {0x1D163, 0x1D158}, {0x1D163, 0x1D165}, {0x1D163, 0x1D171}, {0x1D164, 0x1D158}, {0x1D164, 0x1D165}, {0x1D164, 0x1D172}, {0x1D1BB, 0x1D1B9}, {0x1D1BB, 0x1D165}, -{0x1D1BC, 0x1D1BA}, {0x1D1BC, 0x1D165}, {0x1D1BD, 0x1D1B9}, {0x1D1BD, 0x1D165}, {0x1D1BD, 0x1D16E}, {0x1D1BE, 0x1D1BA}, {0x1D1BE, 0x1D165}, {0x1D1BE, 0x1D16E}, {0x1D1BF, 0x1D1B9}, {0x1D1BF, 0x1D165}, -{0x1D1BF, 0x1D16F}, {0x1D1C0, 0x1D1BA}, {0x1D1C0, 0x1D165}, {0x1D1C0, 0x1D16F}, {0x2F800, 0x4E3D}, {0x2F801, 0x4E38}, {0x2F802, 0x4E41}, {0x2F803, 0x20122}, {0x2F804, 0x4F60}, {0x2F805, 0x4FAE}, -{0x2F806, 0x4FBB}, {0x2F807, 0x5002}, {0x2F808, 0x507A}, {0x2F809, 0x5099}, {0x2F80A, 0x50E7}, {0x2F80B, 0x50CF}, {0x2F80C, 0x349E}, {0x2F80D, 0x2063A}, {0x2F80E, 0x514D}, {0x2F80F, 0x5154}, -{0x2F810, 0x5164}, {0x2F811, 0x5177}, {0x2F812, 0x2051C}, {0x2F813, 0x34B9}, {0x2F814, 0x5167}, {0x2F815, 0x518D}, {0x2F816, 0x2054B}, {0x2F817, 0x5197}, {0x2F818, 0x51A4}, {0x2F819, 0x4ECC}, -{0x2F81A, 0x51AC}, {0x2F81B, 0x51B5}, {0x2F81C, 0x291DF}, {0x2F81D, 0x51F5}, {0x2F81E, 0x5203}, {0x2F81F, 0x34DF}, {0x2F820, 0x523B}, {0x2F821, 0x5246}, {0x2F822, 0x5272}, {0x2F823, 0x5277}, -{0x2F824, 0x3515}, {0x2F825, 0x52C7}, {0x2F826, 0x52C9}, {0x2F827, 0x52E4}, {0x2F828, 0x52FA}, {0x2F829, 0x5305}, {0x2F82A, 0x5306}, {0x2F82B, 0x5317}, {0x2F82C, 0x5349}, {0x2F82D, 0x5351}, -{0x2F82E, 0x535A}, {0x2F82F, 0x5373}, {0x2F830, 0x537D}, {0x2F831, 0x537F}, {0x2F832, 0x537F}, {0x2F833, 0x537F}, {0x2F834, 0x20A2C}, {0x2F835, 0x7070}, {0x2F836, 0x53CA}, {0x2F837, 0x53DF}, -{0x2F838, 0x20B63}, {0x2F839, 0x53EB}, {0x2F83A, 0x53F1}, {0x2F83B, 0x5406}, {0x2F83C, 0x549E}, {0x2F83D, 0x5438}, {0x2F83E, 0x5448}, {0x2F83F, 0x5468}, {0x2F840, 0x54A2}, {0x2F841, 0x54F6}, -{0x2F842, 0x5510}, {0x2F843, 0x5553}, {0x2F844, 0x5563}, {0x2F845, 0x5584}, {0x2F846, 0x5584}, {0x2F847, 0x5599}, {0x2F848, 0x55AB}, {0x2F849, 0x55B3}, {0x2F84A, 0x55C2}, {0x2F84B, 0x5716}, -{0x2F84C, 0x5606}, {0x2F84D, 0x5717}, {0x2F84E, 0x5651}, {0x2F84F, 0x5674}, {0x2F850, 0x5207}, {0x2F851, 0x58EE}, {0x2F852, 0x57CE}, {0x2F853, 0x57F4}, {0x2F854, 0x580D}, {0x2F855, 0x578B}, -{0x2F856, 0x5832}, {0x2F857, 0x5831}, {0x2F858, 0x58AC}, {0x2F859, 0x214E4}, {0x2F85A, 0x58F2}, {0x2F85B, 0x58F7}, {0x2F85C, 0x5906}, {0x2F85D, 0x591A}, {0x2F85E, 0x5922}, {0x2F85F, 0x5962}, -{0x2F860, 0x216A8}, {0x2F861, 0x216EA}, {0x2F862, 0x59EC}, {0x2F863, 0x5A1B}, {0x2F864, 0x5A27}, {0x2F865, 0x59D8}, {0x2F866, 0x5A66}, {0x2F867, 0x36EE}, {0x2F868, 0x36FC}, {0x2F869, 0x5B08}, -{0x2F86A, 0x5B3E}, {0x2F86B, 0x5B3E}, {0x2F86C, 0x219C8}, {0x2F86D, 0x5BC3}, {0x2F86E, 0x5BD8}, {0x2F86F, 0x5BE7}, {0x2F870, 0x5BF3}, {0x2F871, 0x21B18}, {0x2F872, 0x5BFF}, {0x2F873, 0x5C06}, -{0x2F874, 0x5F53}, {0x2F875, 0x5C22}, {0x2F876, 0x3781}, {0x2F877, 0x5C60}, {0x2F878, 0x5C6E}, {0x2F879, 0x5CC0}, {0x2F87A, 0x5C8D}, {0x2F87B, 0x21DE4}, {0x2F87C, 0x5D43}, {0x2F87D, 0x21DE6}, -{0x2F87E, 0x5D6E}, {0x2F87F, 0x5D6B}, {0x2F880, 0x5D7C}, {0x2F881, 0x5DE1}, {0x2F882, 0x5DE2}, {0x2F883, 0x382F}, {0x2F884, 0x5DFD}, {0x2F885, 0x5E28}, {0x2F886, 0x5E3D}, {0x2F887, 0x5E69}, -{0x2F888, 0x3862}, {0x2F889, 0x22183}, {0x2F88A, 0x387C}, {0x2F88B, 0x5EB0}, {0x2F88C, 0x5EB3}, {0x2F88D, 0x5EB6}, {0x2F88E, 0x5ECA}, {0x2F88F, 0x2A392}, {0x2F890, 0x5EFE}, {0x2F891, 0x22331}, -{0x2F892, 0x22331}, {0x2F893, 0x8201}, {0x2F894, 0x5F22}, {0x2F895, 0x5F22}, {0x2F896, 0x38C7}, {0x2F897, 0x232B8}, {0x2F898, 0x261DA}, {0x2F899, 0x5F62}, {0x2F89A, 0x5F6B}, {0x2F89B, 0x38E3}, -{0x2F89C, 0x5F9A}, {0x2F89D, 0x5FCD}, {0x2F89E, 0x5FD7}, {0x2F89F, 0x5FF9}, {0x2F8A0, 0x6081}, {0x2F8A1, 0x393A}, {0x2F8A2, 0x391C}, {0x2F8A3, 0x6094}, {0x2F8A4, 0x226D4}, {0x2F8A5, 0x60C7}, -{0x2F8A6, 0x6148}, {0x2F8A7, 0x614C}, {0x2F8A8, 0x614E}, {0x2F8A9, 0x614C}, {0x2F8AA, 0x617A}, {0x2F8AB, 0x618E}, {0x2F8AC, 0x61B2}, {0x2F8AD, 0x61A4}, {0x2F8AE, 0x61AF}, {0x2F8AF, 0x61DE}, -{0x2F8B0, 0x61F2}, {0x2F8B1, 0x61F6}, {0x2F8B2, 0x6210}, {0x2F8B3, 0x621B}, {0x2F8B4, 0x625D}, {0x2F8B5, 0x62B1}, {0x2F8B6, 0x62D4}, {0x2F8B7, 0x6350}, {0x2F8B8, 0x22B0C}, {0x2F8B9, 0x633D}, -{0x2F8BA, 0x62FC}, {0x2F8BB, 0x6368}, {0x2F8BC, 0x6383}, {0x2F8BD, 0x63E4}, {0x2F8BE, 0x22BF1}, {0x2F8BF, 0x6422}, {0x2F8C0, 0x63C5}, {0x2F8C1, 0x63A9}, {0x2F8C2, 0x3A2E}, {0x2F8C3, 0x6469}, -{0x2F8C4, 0x647E}, {0x2F8C5, 0x649D}, {0x2F8C6, 0x6477}, {0x2F8C7, 0x3A6C}, {0x2F8C8, 0x654F}, {0x2F8C9, 0x656C}, {0x2F8CA, 0x2300A}, {0x2F8CB, 0x65E3}, {0x2F8CC, 0x66F8}, {0x2F8CD, 0x6649}, -{0x2F8CE, 0x3B19}, {0x2F8CF, 0x6691}, {0x2F8D0, 0x3B08}, {0x2F8D1, 0x3AE4}, {0x2F8D2, 0x5192}, {0x2F8D3, 0x5195}, {0x2F8D4, 0x6700}, {0x2F8D5, 0x669C}, {0x2F8D6, 0x80AD}, {0x2F8D7, 0x43D9}, -{0x2F8D8, 0x6717}, {0x2F8D9, 0x671B}, {0x2F8DA, 0x6721}, {0x2F8DB, 0x675E}, {0x2F8DC, 0x6753}, {0x2F8DD, 0x233C3}, {0x2F8DE, 0x3B49}, {0x2F8DF, 0x67FA}, {0x2F8E0, 0x6785}, {0x2F8E1, 0x6852}, -{0x2F8E2, 0x6885}, {0x2F8E3, 0x2346D}, {0x2F8E4, 0x688E}, {0x2F8E5, 0x681F}, {0x2F8E6, 0x6914}, {0x2F8E7, 0x3B9D}, {0x2F8E8, 0x6942}, {0x2F8E9, 0x69A3}, {0x2F8EA, 0x69EA}, {0x2F8EB, 0x6AA8}, -{0x2F8EC, 0x236A3}, {0x2F8ED, 0x6ADB}, {0x2F8EE, 0x3C18}, {0x2F8EF, 0x6B21}, {0x2F8F0, 0x238A7}, {0x2F8F1, 0x6B54}, {0x2F8F2, 0x3C4E}, {0x2F8F3, 0x6B72}, {0x2F8F4, 0x6B9F}, {0x2F8F5, 0x6BBA}, -{0x2F8F6, 0x6BBB}, {0x2F8F7, 0x23A8D}, {0x2F8F8, 0x21D0B}, {0x2F8F9, 0x23AFA}, {0x2F8FA, 0x6C4E}, {0x2F8FB, 0x23CBC}, {0x2F8FC, 0x6CBF}, {0x2F8FD, 0x6CCD}, {0x2F8FE, 0x6C67}, {0x2F8FF, 0x6D16}, -{0x2F900, 0x6D3E}, {0x2F901, 0x6D77}, {0x2F902, 0x6D41}, {0x2F903, 0x6D69}, {0x2F904, 0x6D78}, {0x2F905, 0x6D85}, {0x2F906, 0x23D1E}, {0x2F907, 0x6D34}, {0x2F908, 0x6E2F}, {0x2F909, 0x6E6E}, -{0x2F90A, 0x3D33}, {0x2F90B, 0x6ECB}, {0x2F90C, 0x6EC7}, {0x2F90D, 0x23ED1}, {0x2F90E, 0x6DF9}, {0x2F90F, 0x6F6E}, {0x2F910, 0x23F5E}, {0x2F911, 0x23F8E}, {0x2F912, 0x6FC6}, {0x2F913, 0x7039}, -{0x2F914, 0x701E}, {0x2F915, 0x701B}, {0x2F916, 0x3D96}, {0x2F917, 0x704A}, {0x2F918, 0x707D}, {0x2F919, 0x7077}, {0x2F91A, 0x70AD}, {0x2F91B, 0x20525}, {0x2F91C, 0x7145}, {0x2F91D, 0x24263}, -{0x2F91E, 0x719C}, {0x2F91F, 0x243AB}, {0x2F920, 0x7228}, {0x2F921, 0x7235}, {0x2F922, 0x7250}, {0x2F923, 0x24608}, {0x2F924, 0x7280}, {0x2F925, 0x7295}, {0x2F926, 0x24735}, {0x2F927, 0x24814}, -{0x2F928, 0x737A}, {0x2F929, 0x738B}, {0x2F92A, 0x3EAC}, {0x2F92B, 0x73A5}, {0x2F92C, 0x3EB8}, {0x2F92D, 0x3EB8}, {0x2F92E, 0x7447}, {0x2F92F, 0x745C}, {0x2F930, 0x7471}, {0x2F931, 0x7485}, -{0x2F932, 0x74CA}, {0x2F933, 0x3F1B}, {0x2F934, 0x7524}, {0x2F935, 0x24C36}, {0x2F936, 0x753E}, {0x2F937, 0x24C92}, {0x2F938, 0x7570}, {0x2F939, 0x2219F}, {0x2F93A, 0x7610}, {0x2F93B, 0x24FA1}, -{0x2F93C, 0x24FB8}, {0x2F93D, 0x25044}, {0x2F93E, 0x3FFC}, {0x2F93F, 0x4008}, {0x2F940, 0x76F4}, {0x2F941, 0x250F3}, {0x2F942, 0x250F2}, {0x2F943, 0x25119}, {0x2F944, 0x25133}, {0x2F945, 0x771E}, -{0x2F946, 0x771F}, {0x2F947, 0x771F}, {0x2F948, 0x774A}, {0x2F949, 0x4039}, {0x2F94A, 0x778B}, {0x2F94B, 0x4046}, {0x2F94C, 0x4096}, {0x2F94D, 0x2541D}, {0x2F94E, 0x784E}, {0x2F94F, 0x788C}, -{0x2F950, 0x78CC}, {0x2F951, 0x40E3}, {0x2F952, 0x25626}, {0x2F953, 0x7956}, {0x2F954, 0x2569A}, {0x2F955, 0x256C5}, {0x2F956, 0x798F}, {0x2F957, 0x79EB}, {0x2F958, 0x412F}, {0x2F959, 0x7A40}, -{0x2F95A, 0x7A4A}, {0x2F95B, 0x7A4F}, {0x2F95C, 0x2597C}, {0x2F95D, 0x25AA7}, {0x2F95E, 0x25AA7}, {0x2F95F, 0x7AEE}, {0x2F960, 0x4202}, {0x2F961, 0x25BAB}, {0x2F962, 0x7BC6}, {0x2F963, 0x7BC9}, -{0x2F964, 0x4227}, {0x2F965, 0x25C80}, {0x2F966, 0x7CD2}, {0x2F967, 0x42A0}, {0x2F968, 0x7CE8}, {0x2F969, 0x7CE3}, {0x2F96A, 0x7D00}, {0x2F96B, 0x25F86}, {0x2F96C, 0x7D63}, {0x2F96D, 0x4301}, -{0x2F96E, 0x7DC7}, {0x2F96F, 0x7E02}, {0x2F970, 0x7E45}, {0x2F971, 0x4334}, {0x2F972, 0x26228}, {0x2F973, 0x26247}, {0x2F974, 0x4359}, {0x2F975, 0x262D9}, {0x2F976, 0x7F7A}, {0x2F977, 0x2633E}, -{0x2F978, 0x7F95}, {0x2F979, 0x7FFA}, {0x2F97A, 0x8005}, {0x2F97B, 0x264DA}, {0x2F97C, 0x26523}, {0x2F97D, 0x8060}, {0x2F97E, 0x265A8}, {0x2F97F, 0x8070}, {0x2F980, 0x2335F}, {0x2F981, 0x43D5}, -{0x2F982, 0x80B2}, {0x2F983, 0x8103}, {0x2F984, 0x440B}, {0x2F985, 0x813E}, {0x2F986, 0x5AB5}, {0x2F987, 0x267A7}, {0x2F988, 0x267B5}, {0x2F989, 0x23393}, {0x2F98A, 0x2339C}, {0x2F98B, 0x8201}, -{0x2F98C, 0x8204}, {0x2F98D, 0x8F9E}, {0x2F98E, 0x446B}, {0x2F98F, 0x8291}, {0x2F990, 0x828B}, {0x2F991, 0x829D}, {0x2F992, 0x52B3}, {0x2F993, 0x82B1}, {0x2F994, 0x82B3}, {0x2F995, 0x82BD}, -{0x2F996, 0x82E6}, {0x2F997, 0x26B3C}, {0x2F998, 0x82E5}, {0x2F999, 0x831D}, {0x2F99A, 0x8363}, {0x2F99B, 0x83AD}, {0x2F99C, 0x8323}, {0x2F99D, 0x83BD}, {0x2F99E, 0x83E7}, {0x2F99F, 0x8457}, -{0x2F9A0, 0x8353}, {0x2F9A1, 0x83CA}, {0x2F9A2, 0x83CC}, {0x2F9A3, 0x83DC}, {0x2F9A4, 0x26C36}, {0x2F9A5, 0x26D6B}, {0x2F9A6, 0x26CD5}, {0x2F9A7, 0x452B}, {0x2F9A8, 0x84F1}, {0x2F9A9, 0x84F3}, -{0x2F9AA, 0x8516}, {0x2F9AB, 0x273CA}, {0x2F9AC, 0x8564}, {0x2F9AD, 0x26F2C}, {0x2F9AE, 0x455D}, {0x2F9AF, 0x4561}, {0x2F9B0, 0x26FB1}, {0x2F9B1, 0x270D2}, {0x2F9B2, 0x456B}, {0x2F9B3, 0x8650}, -{0x2F9B4, 0x865C}, {0x2F9B5, 0x8667}, {0x2F9B6, 0x8669}, {0x2F9B7, 0x86A9}, {0x2F9B8, 0x8688}, {0x2F9B9, 0x870E}, {0x2F9BA, 0x86E2}, {0x2F9BB, 0x8779}, {0x2F9BC, 0x8728}, {0x2F9BD, 0x876B}, -{0x2F9BE, 0x8786}, {0x2F9BF, 0x45D7}, {0x2F9C0, 0x87E1}, {0x2F9C1, 0x8801}, {0x2F9C2, 0x45F9}, {0x2F9C3, 0x8860}, {0x2F9C4, 0x8863}, {0x2F9C5, 0x27667}, {0x2F9C6, 0x88D7}, {0x2F9C7, 0x88DE}, -{0x2F9C8, 0x4635}, {0x2F9C9, 0x88FA}, {0x2F9CA, 0x34BB}, {0x2F9CB, 0x278AE}, {0x2F9CC, 0x27966}, {0x2F9CD, 0x46BE}, {0x2F9CE, 0x46C7}, {0x2F9CF, 0x8AA0}, {0x2F9D0, 0x8AED}, {0x2F9D1, 0x8B8A}, -{0x2F9D2, 0x8C55}, {0x2F9D3, 0x27CA8}, {0x2F9D4, 0x8CAB}, {0x2F9D5, 0x8CC1}, {0x2F9D6, 0x8D1B}, {0x2F9D7, 0x8D77}, {0x2F9D8, 0x27F2F}, {0x2F9D9, 0x20804}, {0x2F9DA, 0x8DCB}, {0x2F9DB, 0x8DBC}, -{0x2F9DC, 0x8DF0}, {0x2F9DD, 0x208DE}, {0x2F9DE, 0x8ED4}, {0x2F9DF, 0x8F38}, {0x2F9E0, 0x285D2}, {0x2F9E1, 0x285ED}, {0x2F9E2, 0x9094}, {0x2F9E3, 0x90F1}, {0x2F9E4, 0x9111}, {0x2F9E5, 0x2872E}, -{0x2F9E6, 0x911B}, {0x2F9E7, 0x9238}, {0x2F9E8, 0x92D7}, {0x2F9E9, 0x92D8}, {0x2F9EA, 0x927C}, {0x2F9EB, 0x93F9}, {0x2F9EC, 0x9415}, {0x2F9ED, 0x28BFA}, {0x2F9EE, 0x958B}, {0x2F9EF, 0x4995}, -{0x2F9F0, 0x95B7}, {0x2F9F1, 0x28D77}, {0x2F9F2, 0x49E6}, {0x2F9F3, 0x96C3}, {0x2F9F4, 0x5DB2}, {0x2F9F5, 0x9723}, {0x2F9F6, 0x29145}, {0x2F9F7, 0x2921A}, {0x2F9F8, 0x4A6E}, {0x2F9F9, 0x4A76}, -{0x2F9FA, 0x97E0}, {0x2F9FB, 0x2940A}, {0x2F9FC, 0x4AB2}, {0x2F9FD, 0x29496}, {0x2F9FE, 0x980B}, {0x2F9FF, 0x980B}, {0x2FA00, 0x9829}, {0x2FA01, 0x295B6}, {0x2FA02, 0x98E2}, {0x2FA03, 0x4B33}, -{0x2FA04, 0x9929}, {0x2FA05, 0x99A7}, {0x2FA06, 0x99C2}, {0x2FA07, 0x99FE}, {0x2FA08, 0x4BCE}, {0x2FA09, 0x29B30}, {0x2FA0A, 0x9B12}, {0x2FA0B, 0x9C40}, {0x2FA0C, 0x9CFD}, {0x2FA0D, 0x4CCE}, -{0x2FA0E, 0x4CED}, {0x2FA0F, 0x9D67}, {0x2FA10, 0x2A0CE}, {0x2FA11, 0x4CF8}, {0x2FA12, 0x2A105}, {0x2FA13, 0x2A20E}, {0x2FA14, 0x2A291}, {0x2FA15, 0x9EBB}, {0x2FA16, 0x4D56}, {0x2FA17, 0x9EF9}, -{0x2FA18, 0x9EFE}, {0x2FA19, 0x9F05}, {0x2FA1A, 0x9F0F}, {0x2FA1B, 0x9F16}, {0x2FA1D, 0x2A600}, -}; - -static std::string codepoint_to_utf8(uint32_t cp) { - std::string result; - if (/* 0x00 <= cp && */ cp <= 0x7f) { - result.push_back(cp); - } - else if (0x80 <= cp && cp <= 0x7ff) { - result.push_back(0xc0 | ((cp >> 6) & 0x1f)); - result.push_back(0x80 | (cp & 0x3f)); - } - else if (0x800 <= cp && cp <= 0xffff) { - result.push_back(0xe0 | ((cp >> 12) & 0x0f)); - result.push_back(0x80 | ((cp >> 6) & 0x3f)); - result.push_back(0x80 | (cp & 0x3f)); - } - else if (0x10000 <= cp && cp <= 0x10ffff) { - result.push_back(0xf0 | ((cp >> 18) & 0x07)); - result.push_back(0x80 | ((cp >> 12) & 0x3f)); - result.push_back(0x80 | ((cp >> 6) & 0x3f)); - result.push_back(0x80 | (cp & 0x3f)); - } - else { - throw std::invalid_argument("invalid codepoint"); - } - return result; -} - -static std::string codepoints_to_utf8(const std::vector & cps) { - std::string result; - for (size_t i = 0; i < cps.size(); ++i) { - result.append(codepoint_to_utf8(cps[i])); - } - return result; -} - -static uint32_t codepoint_from_utf8(const std::string & utf8, size_t & offset) { - assert(offset < utf8.size()); - if (!(utf8[offset + 0] & 0x80)) { - auto result = utf8[offset + 0]; - offset += 1; - return result; - } - if (!(utf8[offset + 0] & 0x40)) { - throw std::invalid_argument("invalid character"); - } - if (!(utf8[offset + 0] & 0x20)) { - if (offset + 1 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80)) { - throw std::invalid_argument("invalid character"); - } - auto result = ((utf8[offset + 0] & 0x1f) << 6) | (utf8[offset + 1] & 0x3f); - offset += 2; - return result; - } - if (!(utf8[offset + 0] & 0x10)) { - if (offset + 2 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80)) { - throw std::invalid_argument("invalid character"); - } - auto result = ((utf8[offset + 0] & 0x0f) << 12) | ((utf8[offset + 1] & 0x3f) << 6) | (utf8[offset + 2] & 0x3f); - offset += 3; - return result; - } - if (!(utf8[offset + 0] & 0x08)) { - if (offset + 3 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80) || !((utf8[offset + 3] & 0xc0) == 0x80)) { - throw std::invalid_argument("invalid character"); - } - auto result = ((utf8[offset + 0] & 0x07) << 18) | ((utf8[offset + 1] & 0x3f) << 12) | ((utf8[offset + 2] & 0x3f) << 6) | (utf8[offset + 3] & 0x3f); - offset += 4; - return result; - } - throw std::invalid_argument("invalid string"); -} - -static std::vector codepoints_from_utf8(const std::string & utf8) { - std::vector result; - size_t offset = 0; - while (offset < utf8.size()) { - result.push_back(codepoint_from_utf8(utf8, offset)); - } - return result; -} - -static std::vector codepoint_to_utf16(uint32_t cp) { - std::vector result; - if (/* 0x0000 <= cp && */ cp <= 0xffff) { - result.emplace_back(cp); - } - else if (0x10000 <= cp && cp <= 0x10ffff) { - result.emplace_back(0xd800 | ((cp - 0x10000) >> 10)); - result.emplace_back(0xdc00 | ((cp - 0x10000) & 0x03ff)); - } - else { - throw std::invalid_argument("invalid codepoint"); - } - return result; -} - -static std::vector codepoints_to_utf16(const std::vector & cps) { - std::vector result; - for (size_t i = 0; i < cps.size(); ++i) { - auto temp = codepoint_to_utf16(cps[i]); - result.insert(result.end(), temp.begin(), temp.end()); - } - return result; -} - -static uint32_t codepoint_from_utf16(const std::vector & utf16, size_t & offset) { - assert(offset < utf16.size()); - if (((utf16[0] >> 10) << 10) != 0xd800) { - auto result = utf16[offset + 0]; - offset += 1; - return result; - } - - if (offset + 1 >= utf16.size() || !((utf16[1] & 0xdc00) == 0xdc00)) { - throw std::invalid_argument("invalid character"); - } - - auto result = 0x10000 + (((utf16[0] & 0x03ff) << 10) | (utf16[1] & 0x03ff)); - offset += 2; - return result; -} - -static std::vector codepoints_from_utf16(const std::vector & utf16) { - std::vector result; - size_t offset = 0; - while (offset < utf16.size()) { - result.push_back(codepoint_from_utf16(utf16, offset)); - } - return result; -} - #define CODEPOINT_TYPE_UNIDENTIFIED 0 -#define CODEPOINT_TYPE_DIGIT 1 -#define CODEPOINT_TYPE_LETTER 2 -#define CODEPOINT_TYPE_WHITESPACE 3 -#define CODEPOINT_TYPE_ACCENT_MARK 4 -#define CODEPOINT_TYPE_PUNCTUATION 5 -#define CODEPOINT_TYPE_SYMBOL 6 -#define CODEPOINT_TYPE_CONTROL 7 +#define CODEPOINT_TYPE_DIGIT 1 +#define CODEPOINT_TYPE_LETTER 2 +#define CODEPOINT_TYPE_WHITESPACE 3 +#define CODEPOINT_TYPE_ACCENT_MARK 4 +#define CODEPOINT_TYPE_PUNCTUATION 5 +#define CODEPOINT_TYPE_SYMBOL 6 +#define CODEPOINT_TYPE_CONTROL 7 -static std::unordered_map codepoint_type_map() { - std::unordered_map codepoint_types; - for (auto p : digit_ranges) { - for (auto i = p.first; i <= p.second; ++ i) { - codepoint_types[i] = CODEPOINT_TYPE_DIGIT; - } - } - for (auto p : letter_ranges) { - for (auto i = p.first; i <= p.second; ++ i) { - codepoint_types[i] = CODEPOINT_TYPE_LETTER; - } - } - for (auto p : whitespace_ranges) { - for (auto i = p.first; i <= p.second; ++ i) { - codepoint_types[i] = CODEPOINT_TYPE_WHITESPACE; - } - } - for (auto p : accent_mark_ranges) { - for (auto i = p.first; i <= p.second; ++ i) { - codepoint_types[i] = CODEPOINT_TYPE_ACCENT_MARK; - } - } - for (auto p : punctuation_ranges) { - for (auto i = p.first; i <= p.second; ++ i) { - codepoint_types[i] = CODEPOINT_TYPE_PUNCTUATION; - } - } - for (auto p : symbol_ranges) { - for (auto i = p.first; i <= p.second; ++i) { - codepoint_types[i] = CODEPOINT_TYPE_SYMBOL; - } - } - for (auto p : control_ranges) { - for (auto i = p.first; i <= p.second; ++ i) { - codepoint_types[i] = CODEPOINT_TYPE_CONTROL; - } - } - return codepoint_types; -} +std::string unicode_cpt_to_utf8(uint32_t cp); +std::vector unicode_cpts_from_utf8(const std::string & utf8); -static int codepoint_type(uint32_t cp) { - static std::unordered_map codepoint_types = codepoint_type_map(); - const auto it = codepoint_types.find(cp); - return it == codepoint_types.end() ? CODEPOINT_TYPE_UNIDENTIFIED : it->second; -} +std::vector unicode_cpts_normalize_nfd(const std::vector & cpts); -static int codepoint_type(const std::string & utf8) { - if (utf8.length() == 0) { - return CODEPOINT_TYPE_UNIDENTIFIED; - } - size_t offset = 0; - return codepoint_type(codepoint_from_utf8(utf8, offset)); -} +int unicode_cpt_type(uint32_t cp); +int unicode_cpt_type(const std::string & utf8); -static std::unordered_map bytes_to_unicode_map_bpe() { - std::unordered_map map; - for (int ch = u'!'; ch <= u'~'; ++ch) { - assert(0 <= ch && ch < 256); - map[ch] = codepoint_to_utf8(ch); - } - for (int ch = u'¡'; ch <= u'¬'; ++ch) { - assert(0 <= ch && ch < 256); - map[ch] = codepoint_to_utf8(ch); - } - for (int ch = u'®'; ch <= u'ÿ'; ++ch) { - assert(0 <= ch && ch < 256); - map[ch] = codepoint_to_utf8(ch); - } - auto n = 0; - for (int ch = 0; ch < 256; ++ch) { - if (map.find(ch) == map.end()) { - map[ch] = codepoint_to_utf8(256 + n); - ++n; - } - } - return map; -} - -static std::string bytes_to_unicode_bpe(uint8_t byte) { - static std::unordered_map map = bytes_to_unicode_map_bpe(); - return map.at(byte); -} - -static std::unordered_map unicode_to_bytes_map_bpe() { - std::unordered_map map; - for (int ch = u'!'; ch <= u'~'; ++ch) { - assert(0 <= ch && ch < 256); - map[codepoint_to_utf8(ch)] = ch; - } - for (int ch = u'¡'; ch <= u'¬'; ++ch) { - assert(0 <= ch && ch < 256); - map[codepoint_to_utf8(ch)] = ch; - } - for (int ch = u'®'; ch <= u'ÿ'; ++ch) { - assert(0 <= ch && ch < 256); - map[codepoint_to_utf8(ch)] = ch; - } - auto n = 0; - for (int ch = 0; ch < 256; ++ch) { - if (map.find(codepoint_to_utf8(ch)) == map.end()) { - map[codepoint_to_utf8(256 + n)] = ch; - ++n; - } - } - return map; -} - -static uint8_t unicode_to_bytes_bpe(const std::string & utf8) { - static std::unordered_map map = unicode_to_bytes_map_bpe(); - return map.at(utf8); -} +std::string unicode_byte_to_utf8(uint8_t byte); +uint8_t unicode_utf8_to_byte(const std::string & utf8); From 05b06210c954491cf0f12034b0a62bd4d69ce78b Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 11 Mar 2024 17:49:47 +0200 Subject: [PATCH 20/48] llama : more consistent names of count variables (#5994) * llama : more consistent names of count variables ggml-ci * llama : n_parallel -> n_seq_max * common : fix param name * examples : fix param name --- README.md | 2 +- common/common.cpp | 18 +++++++++--------- examples/batched-bench/batched-bench.cpp | 2 +- examples/batched/batched.cpp | 2 +- examples/main/main.cpp | 1 + examples/perplexity/perplexity.cpp | 6 +++--- llama.cpp | 24 ++++++++++++------------ llama.h | 14 +++++++------- 8 files changed, 35 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index 98fdc6808..54bf84bec 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) ### Recent API changes -- [2024 Mar 8] `llama_kv_cache_seq_rm()` returns a `bool` instead of `void`, and new `llama_n_max_seq()` returns the upper limit of acceptable `seq_id` in batches (relevant when dealing with multiple sequences) https://github.com/ggerganov/llama.cpp/pull/5328 +- [2024 Mar 8] `llama_kv_cache_seq_rm()` returns a `bool` instead of `void`, and new `llama_n_seq_max()` returns the upper limit of acceptable `seq_id` in batches (relevant when dealing with multiple sequences) https://github.com/ggerganov/llama.cpp/pull/5328 - [2024 Mar 4] Embeddings API updated https://github.com/ggerganov/llama.cpp/pull/5796 - [2024 Mar 3] `struct llama_context_params` https://github.com/ggerganov/llama.cpp/pull/5849 diff --git a/common/common.cpp b/common/common.cpp index 16ef4d7f7..2f38ac632 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1288,7 +1288,7 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param cparams.n_ctx = params.n_ctx; cparams.n_batch = params.n_batch; - cparams.n_parallel = params.n_parallel; + cparams.n_seq_max = params.n_parallel; cparams.n_threads = params.n_threads; cparams.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch; cparams.seed = params.seed; @@ -1786,17 +1786,17 @@ void dump_kv_cache_view(const llama_kv_cache_view & view, int row_size) { static const char slot_chars[] = ".123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+"; printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d", - view.n_cells, view.n_max_seq, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx); + view.n_cells, view.n_seq_max, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx); llama_kv_cache_view_cell * c_curr = view.cells; llama_seq_id * cs_curr = view.cells_sequences; - for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_max_seq) { + for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) { if (i % row_size == 0) { printf("\n%5d: ", i); } int seq_count = 0; - for (int j = 0; j < view.n_max_seq; j++) { + for (int j = 0; j < view.n_seq_max; j++) { if (cs_curr[j] >= 0) { seq_count++; } } putchar(slot_chars[std::min(sizeof(slot_chars) - 2, size_t(seq_count))]); @@ -1809,14 +1809,14 @@ void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size) { static const char slot_chars[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d\n", - view.n_cells, view.n_max_seq, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx); + view.n_cells, view.n_seq_max, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx); std::unordered_map seqs; llama_kv_cache_view_cell * c_curr = view.cells; llama_seq_id * cs_curr = view.cells_sequences; - for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_max_seq) { - for (int j = 0; j < view.n_max_seq; j++) { + for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) { + for (int j = 0; j < view.n_seq_max; j++) { if (cs_curr[j] < 0) { continue; } if (seqs.find(cs_curr[j]) == seqs.end()) { if (seqs.size() + 1 >= sizeof(slot_chars)) { break; } @@ -1835,11 +1835,11 @@ void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size) { c_curr = view.cells; cs_curr = view.cells_sequences; - for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_max_seq) { + for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) { if (i % row_size == 0) { printf("\n%5d: ", i); } - for (int j = 0; j < view.n_max_seq; j++) { + for (int j = 0; j < view.n_seq_max; j++) { if (cs_curr[j] >= 0) { const auto & it = seqs.find(cs_curr[j]); putchar(it != seqs.end() ? int(slot_chars[it->second]) : '+'); diff --git a/examples/batched-bench/batched-bench.cpp b/examples/batched-bench/batched-bench.cpp index dff6c68ec..22bc93bca 100644 --- a/examples/batched-bench/batched-bench.cpp +++ b/examples/batched-bench/batched-bench.cpp @@ -106,7 +106,7 @@ int main(int argc, char ** argv) { ctx_params.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch; // ensure enough sequences are available - ctx_params.n_parallel = *std::max_element(n_pl.begin(), n_pl.end()); + ctx_params.n_seq_max = *std::max_element(n_pl.begin(), n_pl.end()); llama_context * ctx = llama_new_context_with_model(model, ctx_params); diff --git a/examples/batched/batched.cpp b/examples/batched/batched.cpp index dde4d5a06..ee1f8f1bf 100644 --- a/examples/batched/batched.cpp +++ b/examples/batched/batched.cpp @@ -80,7 +80,7 @@ int main(int argc, char ** argv) { ctx_params.seed = 1234; ctx_params.n_ctx = n_kv_req; ctx_params.n_batch = std::max(n_len, n_parallel); - ctx_params.n_parallel = n_parallel; + ctx_params.n_seq_max = n_parallel; ctx_params.n_threads = params.n_threads; ctx_params.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch; diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 47059e582..e2d07a631 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -878,6 +878,7 @@ int main(int argc, char ** argv) { const auto line_pfx = ::llama_tokenize(ctx, params.input_prefix, false, true); const auto line_inp = ::llama_tokenize(ctx, buffer, false, false); const auto line_sfx = ::llama_tokenize(ctx, params.input_suffix, false, true); + LOG("input tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, line_inp).c_str()); embd_inp.insert(embd_inp.end(), line_pfx.begin(), line_pfx.end()); diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index 293eb52c3..fdfc8f5dc 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -841,7 +841,7 @@ static void hellaswag_score(llama_context * ctx, const gpt_params & params) { const int n_batch = params.n_batch; const int max_tasks_per_batch = 32; - const int max_seq = std::min(4*max_tasks_per_batch, (int) llama_n_max_seq(ctx)); + const int max_seq = std::min(4*max_tasks_per_batch, (int) llama_n_seq_max(ctx)); llama_batch batch = llama_batch_init(n_ctx, 0, max_seq); @@ -1118,7 +1118,7 @@ static void winogrande_score(llama_context * ctx, const gpt_params & params) { const int n_batch = params.n_batch; const int max_tasks_per_batch = 128; - const int max_seq = std::min(2*max_tasks_per_batch, (int) llama_n_max_seq(ctx)); + const int max_seq = std::min(2*max_tasks_per_batch, (int) llama_n_seq_max(ctx)); llama_batch batch = llama_batch_init(n_ctx, 0, max_seq); @@ -1470,7 +1470,7 @@ static void multiple_choice_score(llama_context * ctx, const gpt_params & params const int n_batch = params.n_batch; const int max_tasks_per_batch = 32; - const int max_seq = std::min(4*max_tasks_per_batch, (int) llama_n_max_seq(ctx)); + const int max_seq = std::min(4*max_tasks_per_batch, (int) llama_n_seq_max(ctx)); llama_batch batch = llama_batch_init(n_ctx, 0, max_seq); diff --git a/llama.cpp b/llama.cpp index 6aa88ce54..98ec147ae 100644 --- a/llama.cpp +++ b/llama.cpp @@ -12538,7 +12538,7 @@ struct llama_context_params llama_context_default_params() { /*.seed =*/ LLAMA_DEFAULT_SEED, /*.n_ctx =*/ 512, /*.n_batch =*/ 512, - /*.n_parallel =*/ 1, + /*.n_seq_max =*/ 1, /*.n_threads =*/ GGML_DEFAULT_N_THREADS, // TODO: better default /*.n_threads_batch =*/ GGML_DEFAULT_N_THREADS, /*.rope_scaling_type =*/ LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED, @@ -12700,7 +12700,7 @@ struct llama_context * llama_new_context_with_model( auto & cparams = ctx->cparams; cparams.n_batch = params.n_batch; - // TODO: maybe add n_parallel here too + // TODO: maybe add n_seq_max here too cparams.n_threads = params.n_threads; cparams.n_threads_batch = params.n_threads_batch; cparams.yarn_ext_factor = params.yarn_ext_factor; @@ -12767,7 +12767,7 @@ struct llama_context * llama_new_context_with_model( // Mamba only needs a constant number of KV cache cells per sequence if (model->arch == LLM_ARCH_MAMBA) { // Mamba needs at least as many KV cells as there are sequences kept at any time - kv_size = std::max((uint32_t) 1, params.n_parallel); + kv_size = std::max((uint32_t) 1, params.n_seq_max); // it's probably best to keep as much precision as possible for the states type_k = GGML_TYPE_F32; // required by ggml_ssm_conv for Mamba's conv_states type_v = GGML_TYPE_F32; // required by ggml_ssm_scan for Mamba's ssm_states @@ -13024,7 +13024,7 @@ uint32_t llama_n_batch(const struct llama_context * ctx) { return ctx->cparams.n_batch; } -uint32_t llama_n_max_seq(const struct llama_context * ctx) { +uint32_t llama_n_seq_max(const struct llama_context * ctx) { return ctx->kv_self.size; } @@ -13188,10 +13188,10 @@ int32_t llama_model_apply_lora_from_file(const struct llama_model * model, const } } -struct llama_kv_cache_view llama_kv_cache_view_init(const struct llama_context * ctx, int32_t n_max_seq) { +struct llama_kv_cache_view llama_kv_cache_view_init(const struct llama_context * ctx, int32_t n_seq_max) { struct llama_kv_cache_view result = { /*.n_cells = */ 0, - /*.n_max_seq = */ n_max_seq, + /*.n_seq_max = */ n_seq_max, /*.token_count = */ 0, /*.used_cells = */ llama_get_kv_cache_used_cells(ctx), /*.max_contiguous = */ 0, @@ -13219,7 +13219,7 @@ void llama_kv_cache_view_update(const struct llama_context * ctx, struct llama_k void * p = realloc(view->cells, sizeof(struct llama_kv_cache_view_cell) * view->n_cells); GGML_ASSERT(p != nullptr && "Failed to alloc kv_cache_view cells"); view->cells = (struct llama_kv_cache_view_cell *)p; - p = realloc(view->cells_sequences, sizeof(llama_seq_id) * view->n_max_seq * view->n_cells); + p = realloc(view->cells_sequences, sizeof(llama_seq_id) * view->n_seq_max * view->n_cells); GGML_ASSERT(p != nullptr && "Failed to alloc kv_cache_view cells sequences"); view->cells_sequences = (llama_seq_id *)p; } @@ -13233,7 +13233,7 @@ void llama_kv_cache_view_update(const struct llama_context * ctx, struct llama_k uint32_t max_contig = 0; int32_t max_contig_idx = -1; - for (int32_t i = 0; i < int32_t(ctx->kv_self.size); i++, c_curr++, cs_curr += view->n_max_seq) { + for (int32_t i = 0; i < int32_t(ctx->kv_self.size); i++, c_curr++, cs_curr += view->n_seq_max) { const size_t curr_size = kv_cells[i].seq_id.size(); token_count += curr_size; c_curr->pos = kv_cells[i].pos + kv_cells[i].delta; @@ -13250,7 +13250,7 @@ void llama_kv_cache_view_update(const struct llama_context * ctx, struct llama_k int seq_idx = 0; for (const llama_seq_id it : kv_cells[i].seq_id) { - if (seq_idx >= view->n_max_seq) { + if (seq_idx >= view->n_seq_max) { break; } cs_curr[seq_idx] = it; @@ -13259,7 +13259,7 @@ void llama_kv_cache_view_update(const struct llama_context * ctx, struct llama_k if (seq_idx != 0) { used_cells++; } - for (; seq_idx < view->n_max_seq; seq_idx++) { + for (; seq_idx < view->n_seq_max; seq_idx++) { cs_curr[seq_idx] = -1; } } @@ -13921,12 +13921,12 @@ int32_t llama_tokenize( const char * text, int32_t text_len, llama_token * tokens, - int32_t n_max_tokens, + int32_t n_tokens_max, bool add_bos, bool special) { auto res = llama_tokenize_internal(model->vocab, std::string(text, text_len), add_bos, special); - if (n_max_tokens < (int) res.size()) { + if (n_tokens_max < (int) res.size()) { // LLAMA_LOG_ERROR("%s: too many tokens\n", __func__); return -((int) res.size()); } diff --git a/llama.h b/llama.h index ccf65ca4e..446899da6 100644 --- a/llama.h +++ b/llama.h @@ -235,7 +235,7 @@ extern "C" { uint32_t seed; // RNG seed, -1 for random uint32_t n_ctx; // text context, 0 = from model uint32_t n_batch; // prompt processing maximum batch size - uint32_t n_parallel; // number of parallel sequences (i.e. distinct states for recurrent models) + uint32_t n_seq_max; // max number of sequences (i.e. distinct states for recurrent models) uint32_t n_threads; // number of threads to use for generation uint32_t n_threads_batch; // number of threads to use for batch processing @@ -377,7 +377,7 @@ extern "C" { LLAMA_API uint32_t llama_n_ctx (const struct llama_context * ctx); LLAMA_API uint32_t llama_n_batch (const struct llama_context * ctx); - LLAMA_API uint32_t llama_n_max_seq (const struct llama_context * ctx); + LLAMA_API uint32_t llama_n_seq_max (const struct llama_context * ctx); LLAMA_API enum llama_vocab_type llama_vocab_type(const struct llama_model * model); LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model * model); @@ -456,7 +456,7 @@ extern "C" { // Maximum number of sequences that can exist in a cell. It's not an error // if there are more sequences in a cell than this value, however they will // not be visible in the view cells_sequences. - int32_t n_max_seq; + int32_t n_seq_max; // Number of tokens in the cache. For example, if there are two populated // cells, the first with 1 sequence id in it and the second with 2 sequence @@ -476,12 +476,12 @@ extern "C" { // Information for an individual cell. struct llama_kv_cache_view_cell * cells; - // The sequences for each cell. There will be n_max_seq items per cell. + // The sequences for each cell. There will be n_seq_max items per cell. llama_seq_id * cells_sequences; }; // Create an empty KV cache view. (use only for debugging purposes) - LLAMA_API struct llama_kv_cache_view llama_kv_cache_view_init(const struct llama_context * ctx, int32_t n_max_seq); + LLAMA_API struct llama_kv_cache_view llama_kv_cache_view_init(const struct llama_context * ctx, int32_t n_seq_max); // Free a KV cache view. (use only for debugging purposes) LLAMA_API void llama_kv_cache_view_free(struct llama_kv_cache_view * view); @@ -708,7 +708,7 @@ extern "C" { /// @details Convert the provided text into tokens. /// @param tokens The tokens pointer must be large enough to hold the resulting tokens. - /// @return Returns the number of tokens on success, no more than n_max_tokens + /// @return Returns the number of tokens on success, no more than n_tokens_max /// @return Returns a negative number on failure - the number of tokens that would have been returned /// @param special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext. /// Does not insert a leading space. @@ -717,7 +717,7 @@ extern "C" { const char * text, int32_t text_len, llama_token * tokens, - int32_t n_max_tokens, + int32_t n_tokens_max, bool add_bos, bool special); From 44ca159faf4fbe1a7ace13a962845ba7cdfd95ec Mon Sep 17 00:00:00 2001 From: Kawrakow <48489457+ikawrakow@users.noreply.github.com> Date: Mon, 11 Mar 2024 16:53:15 +0100 Subject: [PATCH 21/48] 1.5 bit: we can do even better (#5999) * iq1_s: we can do even better Spent one of the 4 scale bits on a signs of a 0.125 shift. I.e., quants are now -1 + delta, delta, 1 + delta, where delta is +/- 0.125. CUDA works, same performance as before. PPL(LLaMA-v2-7B) is now 11.85! * iq1_s: make scalar and AVX2 work with the new version * iq1_s: make Neon work with new version. ~10% drop in performance, so will need some more work. * iq1_s: make Metal work with new version * iq1_s: very slightly faster dequantize on Metal * iq1_s: fix dequantize on the CPU --------- Co-authored-by: Iwan Kawrakow --- ggml-common.h | 1 + ggml-cuda.cu | 36 ++++++++++----------- ggml-metal.metal | 18 ++++++----- ggml-quants.c | 83 ++++++++++++++++++++++++++++++++---------------- 4 files changed, 83 insertions(+), 55 deletions(-) diff --git a/ggml-common.h b/ggml-common.h index 5dd918081..2402b773c 100644 --- a/ggml-common.h +++ b/ggml-common.h @@ -645,6 +645,7 @@ GGML_TABLE_BEGIN(uint32_t, iq3s_grid, 512) GGML_TABLE_END() #define NGRID_IQ1S 2048 +#define IQ1S_DELTA 0.125f #if defined(GGML_COMMON_IMPL_C) GGML_TABLE_BEGIN(uint64_t, iq1s_grid, NGRID_IQ1S) 0xffffffffffffffff, 0xffffffffffffff01, 0xffffffffffff0000, 0xffffffffffff01ff, diff --git a/ggml-cuda.cu b/ggml-cuda.cu index d2945d3c2..01b1f15ee 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -1722,22 +1722,15 @@ static __global__ void dequantize_block_iq1_s(const void * __restrict__ vx, dst_ const int il = tid/8; // 0...3 const int ib = tid%8; // 0...7 dst_t * y = yy + i*QK_K + 32*ib + 8*il; - const float d = (float)x[i].d * (2*((x[i].qh[ib] >> 12) & 0xf) + 1); -#if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics - int grid32[2]; const int8_t * q = (const int8_t *)grid32; - grid32[0] = *((const int *)(iq1s_grid_gpu + (x[i].qs[4*ib+il] | (((x[i].qh[ib] >> 3*il) & 7) << 8)))); - grid32[1] = __vsub4((grid32[0] >> 4) & 0x0f0f0f0f, 0x01010101); - grid32[0] = __vsub4(grid32[0] & 0x0f0f0f0f, 0x01010101); + const float delta = x[i].qh[ib] & 0x8000 ? -1 - IQ1S_DELTA : -1 + IQ1S_DELTA; + const float d = (float)x[i].d * (2*((x[i].qh[ib] >> 12) & 7) + 1); + uint32_t grid32[2]; const int8_t * q = (const int8_t *)grid32; + grid32[0] = iq1s_grid_gpu[x[i].qs[4*ib+il] | (((x[i].qh[ib] >> 3*il) & 7) << 8)]; + grid32[1] = (grid32[0] >> 4) & 0x0f0f0f0f; + grid32[0] &= 0x0f0f0f0f; for (int j = 0; j < 8; ++j) { - y[j] = d * q[j]; + y[j] = d * (q[j] + delta); } -#else - const uint8_t * grid = (const uint8_t *)(iq1s_grid_gpu + (x[i].qs[4*ib+il] | (((x[i].qh[ib] >> 3*il) & 7) << 8))); - for (int j = 0; j < 4; ++j) { - y[j+0] = d * ((grid[j] & 0xf) - 1); - y[j+4] = d * ((grid[j] >> 4) - 1); - } -#endif #else assert(false); #endif @@ -4560,22 +4553,25 @@ static __device__ __forceinline__ float vec_dot_iq1_s_q8_1( const int * q8 = (const int *)bq8_1[ib32].qs; for (int l = 0; l < 4; ++l) { const int * grid = (const int *)(iq1s_grid_gpu + (bq1->qs[4*ib32+l] | (((bq1->qh[ib32] >> 3*l) & 7) << 8))); - int grid0 = __vsub4(grid[0] & 0x0f0f0f0f, 0x01010101); - int grid1 = __vsub4((grid[0] >> 4) & 0x0f0f0f0f, 0x01010101); + int grid0 = grid[0] & 0x0f0f0f0f; + int grid1 = (grid[0] >> 4) & 0x0f0f0f0f; sumi = __dp4a(q8[2*l+1], grid1, __dp4a(q8[2*l+0], grid0, sumi)); } #else - const int8_t * q8 = bq8_1[ib32].qs; + const int8_t * q8 = bq8_1[ib32].qs; for (int l = 0; l < 4; ++l) { const uint8_t * grid = (const uint8_t *)(iq1s_grid_gpu + (bq1->qs[4*ib32+l] | (((bq1->qh[ib32] >> 3*l) & 7) << 8))); for (int j = 0; j < 4; ++j) { - sumi += q8[j] * ((grid[j] & 0xf) - 1) + q8[j+4] * ((grid[j] >> 4) - 1); + sumi += q8[j] * (grid[j] & 0xf) + q8[j+4] * (grid[j] >> 4); } q8 += 8; } #endif - const float d = (float)bq1->d * __low2float(bq8_1[ib32].ds); - return d * sumi * (2*(bq1->qh[ib32] >> 12) + 1); + const float delta = bq1->qh[ib32] & 0x8000 ? -1-IQ1S_DELTA : -1+IQ1S_DELTA; + const float d1q = (float)bq1->d * (2*((bq1->qh[ib32] >> 12) & 7) + 1); + const float d = d1q * __low2float (bq8_1[ib32].ds); + const float m = d1q * __high2float(bq8_1[ib32].ds); + return d * sumi + m * delta; #else assert(false); return 0.f; diff --git a/ggml-metal.metal b/ggml-metal.metal index 912822ee6..21298a8c2 100644 --- a/ggml-metal.metal +++ b/ggml-metal.metal @@ -4377,7 +4377,7 @@ void kernel_mul_mv_iq1_s_f32_impl( + yl[j+16] * (grid3[j] & 0xf) + yl[j+20] * (grid3[j] >> 4) + yl[j+24] * (grid4[j] & 0xf) + yl[j+28] * (grid4[j] >> 4); } - sumf[row] += (float)dh[0] * (sum - sumy) * (2*(qh[0] >> 12) + 1); + sumf[row] += (float)dh[0] * (sum + sumy * (qh[0] & 0x8000 ? -1 - IQ1S_DELTA : -1 + IQ1S_DELTA)) * (2*((qh[0] >> 12) & 7) + 1); dh += nb*sizeof(block_iq1_s)/2; qs += nb*sizeof(block_iq1_s); @@ -5076,14 +5076,16 @@ void dequantize_iq1_s(device const block_iq1_s * xb, short il, thread type4x4 & const float d = xb->d; device const uint8_t * qs = xb->qs + 4*ib32 + 2*il; device const uint16_t * qh = xb->qh; - const float dl = d * (2*(qh[ib32] >> 12) + 1); - constant uint8_t * grid1 = (constant uint8_t *)(iq1s_grid_gpu + (qs[0] | (((qh[ib32] >> (6*il+0)) & 7) << 8))); - constant uint8_t * grid2 = (constant uint8_t *)(iq1s_grid_gpu + (qs[1] | (((qh[ib32] >> (6*il+3)) & 7) << 8))); + const float dl = d * (2*((qh[ib32] >> 12) & 7) + 1); + const float ml = dl * (qh[ib32] & 0x8000 ? -1 - IQ1S_DELTA : -1 + IQ1S_DELTA); + const uint16_t h = qh[ib32] >> 6*il; + constant uint8_t * grid1 = (constant uint8_t *)(iq1s_grid_gpu + (qs[0] | ((h << 8) & 0x700))); + constant uint8_t * grid2 = (constant uint8_t *)(iq1s_grid_gpu + (qs[1] | ((h << 5) & 0x700))); for (int i = 0; i < 4; ++i) { - reg[0][i] = dl * (grid1[i] & 0xf) - dl; - reg[1][i] = dl * (grid1[i] >> 4) - dl; - reg[2][i] = dl * (grid2[i] & 0xf) - dl; - reg[3][i] = dl * (grid2[i] >> 4) - dl; + reg[0][i] = dl * (grid1[i] & 0xf) + ml; + reg[1][i] = dl * (grid1[i] >> 4) + ml; + reg[2][i] = dl * (grid2[i] & 0xf) + ml; + reg[3][i] = dl * (grid2[i] >> 4) + ml; } } diff --git a/ggml-quants.c b/ggml-quants.c index 86b0764cb..06665eb2d 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -3456,11 +3456,12 @@ void dequantize_row_iq1_s(const block_iq1_s * restrict x, float * restrict y, in const uint16_t * qh = x[i].qh; for (int ib = 0; ib < QK_K/32; ++ib) { - const float dl = d * (2*(qh[ib] >> 12) + 1); + const float dl = d * (2*((qh[ib] >> 12) & 7) + 1); + const float delta = qh[ib] & 0x8000 ? -IQ1S_DELTA : IQ1S_DELTA; for (int l = 0; l < 4; ++l) { const int8_t * grid = (const int8_t *)(iq1s_grid + (qs[l] | (((qh[ib] >> 3*l) & 7) << 8))); for (int j = 0; j < 8; ++j) { - y[j] = dl * grid[j]; + y[j] = dl * (grid[j] + delta); } y += 8; } @@ -9582,7 +9583,7 @@ void ggml_vec_dot_iq1_s_q8_K (int n, float * restrict s, size_t bs, const void const uint8_t * qs = x[i].qs; const uint16_t * qh = x[i].qh; - int sumi1 = 0, sumi2 = 0; + int sumi1 = 0, sumi2 = 0, sumi3 = 0; for (int ib = 0; ib < QK_K/32; ib += 2) { @@ -9601,12 +9602,16 @@ void ggml_vec_dot_iq1_s_q8_K (int n, float * restrict s, size_t bs, const void const int32x4_t p1 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q1b.val[0], q8b.val[0]), q1b.val[1], q8b.val[1]); const int32x4_t p2 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q1b.val[2], q8b.val[2]), q1b.val[3], q8b.val[3]); - sumi1 += vaddvq_s32(p1) * (2*(qh[ib+0] >> 12) + 1); - sumi2 += vaddvq_s32(p2) * (2*(qh[ib+1] >> 12) + 1); + const int ls1 = 2*((qh[ib+0] >> 12) & 7) + 1; + const int ls2 = 2*((qh[ib+1] >> 12) & 7) + 1; + sumi1 += vaddvq_s32(p1) * ls1; + sumi2 += vaddvq_s32(p2) * ls2; + sumi3 += (y[i].bsums[2*ib+0] + y[i].bsums[2*ib+1]) * ls1 * (qh[ib+0] & 0x8000 ? -1 : 1) + + (y[i].bsums[2*ib+2] + y[i].bsums[2*ib+3]) * ls2 * (qh[ib+1] & 0x8000 ? -1 : 1); } - sumf += y[i].d * GGML_FP16_TO_FP32(x[i].d) * (sumi1 + sumi2); + sumf += y[i].d * GGML_FP16_TO_FP32(x[i].d) * (sumi1 + sumi2 + IQ1S_DELTA * sumi3); } *s = sumf; @@ -9614,6 +9619,7 @@ void ggml_vec_dot_iq1_s_q8_K (int n, float * restrict s, size_t bs, const void #elif defined __AVX2__ __m256 accum = _mm256_setzero_ps(); + float accum1 = 0; for (int i = 0; i < nb; ++i) { const int8_t * q8 = y[i].qs; @@ -9621,6 +9627,7 @@ void ggml_vec_dot_iq1_s_q8_K (int n, float * restrict s, size_t bs, const void const uint16_t * qh = x[i].qh; __m256i sumi = _mm256_setzero_si256(); + int sumi1 = 0; for (int ib = 0; ib < QK_K/32; ib += 2) { const __m256i q1b_1 = _mm256_set_epi64x(iq1s_grid[qs[3] | ((qh[ib+0] >> 1) & 0x700)], iq1s_grid[qs[2] | ((qh[ib+0] << 2) & 0x700)], iq1s_grid[qs[1] | ((qh[ib+0] << 5) & 0x700)], iq1s_grid[qs[0] | ((qh[ib+0] << 8) & 0x700)]); @@ -9632,17 +9639,23 @@ void ggml_vec_dot_iq1_s_q8_K (int n, float * restrict s, size_t bs, const void const __m256i dot1 = mul_add_epi8(q1b_1, q8b_1); const __m256i dot2 = mul_add_epi8(q1b_2, q8b_2); - const __m256i p1 = _mm256_madd_epi16(dot1, _mm256_set1_epi16(2*(qh[ib+0] >> 12) + 1)); - const __m256i p2 = _mm256_madd_epi16(dot2, _mm256_set1_epi16(2*(qh[ib+1] >> 12) + 1)); + const int16_t ls1 = 2*((qh[ib+0] >> 12) & 7) + 1; + const int16_t ls2 = 2*((qh[ib+1] >> 12) & 7) + 1; + const __m256i p1 = _mm256_madd_epi16(dot1, _mm256_set1_epi16(ls1)); + const __m256i p2 = _mm256_madd_epi16(dot2, _mm256_set1_epi16(ls2)); sumi = _mm256_add_epi32(sumi, _mm256_add_epi32(p1, p2)); + sumi1 += (y[i].bsums[2*ib+0] + y[i].bsums[2*ib+1]) * (qh[ib+0] & 0x8000 ? -1 : 1) * ls1 + + (y[i].bsums[2*ib+2] + y[i].bsums[2*ib+3]) * (qh[ib+1] & 0x8000 ? -1 : 1) * ls2; } - accum = _mm256_fmadd_ps(_mm256_set1_ps(y[i].d * GGML_FP16_TO_FP32(x[i].d)), _mm256_cvtepi32_ps(sumi), accum); + const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); + accum = _mm256_fmadd_ps(_mm256_set1_ps(d), _mm256_cvtepi32_ps(sumi), accum); + accum1 += d * sumi1; } - *s = hsum_float_8(accum); + *s = hsum_float_8(accum) + IQ1S_DELTA * accum1; #else @@ -9653,9 +9666,10 @@ void ggml_vec_dot_iq1_s_q8_K (int n, float * restrict s, size_t bs, const void const uint8_t * qs = x[i].qs; const uint16_t * qh = x[i].qh; - int sumi = 0; + int sumi = 0, sumi1 = 0; for (int ib = 0; ib < QK_K/32; ++ib) { - const int ls = 2*(qh[ib] >> 12) + 1; + const int ls = 2*((qh[ib] >> 12) & 7) + 1; + const int delta = qh[ib] & 0x8000 ? -1 : 1; int lsum = 0; for (int l = 0; l < 4; ++l) { const int8_t * grid = (const int8_t *)(iq1s_grid + (qs[l] | (((qh[ib] >> 3*l) & 7) << 8))); @@ -9664,11 +9678,12 @@ void ggml_vec_dot_iq1_s_q8_K (int n, float * restrict s, size_t bs, const void } q8 += 8; } - sumi += ls * lsum; + sumi += ls * lsum; + sumi1 += ls * delta * (y[i].bsums[2*ib+0] + y[i].bsums[2*ib+1]); qs += 4; } - sumf += GGML_FP16_TO_FP32(x[i].d) * y[i].d * sumi; + sumf += GGML_FP16_TO_FP32(x[i].d) * y[i].d * (sumi + IQ1S_DELTA * sumi1); } *s = sumf; @@ -11438,7 +11453,7 @@ static int iq1_find_best_neighbour(const uint16_t * restrict neighbours, const u } static int iq1_find_best_neighbour2(const uint16_t * restrict neighbours, const uint64_t * restrict grid, - const float * restrict xval, const float * restrict weight, float scale, int8_t * restrict L, int ngrid) { + const float * restrict xval, const float * restrict weight, float scale, const float * restrict xg, int8_t * restrict L, int ngrid) { int num_neighbors = neighbours[0]; GGML_ASSERT(num_neighbors > 0); float best_score = FLT_MAX; @@ -11447,7 +11462,7 @@ static int iq1_find_best_neighbour2(const uint16_t * restrict neighbours, const const int8_t * pg = (const int8_t *)(grid + neighbours[j]); float d2 = 0; for (int i = 0; i < 8; ++i) { - float q = (pg[i] - 3)/2; + float q = xg[(pg[i] - 1)/2]; float w = weight[i]; float diff = scale*q - xval[i]; d2 += w*diff*diff; @@ -11463,7 +11478,7 @@ static int iq1_find_best_neighbour2(const uint16_t * restrict neighbours, const float d2 = 0; for (int j = 0; j < 8; ++j) { float w = weight[j]; - float q = (grid_i[j] - 3)/2; + float q = xg[(grid_i[j] - 1)/2]; float diff = scale*q - xval[i]; d2 += w*diff*diff; } @@ -11480,7 +11495,7 @@ static int iq1_find_best_neighbour2(const uint16_t * restrict neighbours, const const int8_t * pg = (const int8_t *)(grid + neighbours[j]); float sumqx = 0, sumq2 = 0; for (int i = 0; i < 8; ++i) { - float q = (pg[i] - 3)/2; + float q = xg[(pg[i] - 1)/2]; float w = weight[i]; sumqx += w*q*xval[i]; sumq2 += w*q*q; @@ -11519,6 +11534,9 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy block_iq1_s * y = vy; + const float x_p[3] = {-1 + IQ1S_DELTA, IQ1S_DELTA, 1 + IQ1S_DELTA}; + const float x_m[3] = {-1 - IQ1S_DELTA, -IQ1S_DELTA, 1 - IQ1S_DELTA}; + float scales[QK_K/IQ1S_BLOCK_SIZE]; float weight[IQ1S_BLOCK_SIZE]; int8_t L[IQ1S_BLOCK_SIZE]; @@ -11527,6 +11545,7 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy float pairs[2*IQ1S_BLOCK_SIZE]; int * idx = (int *)(pairs + 1); uint16_t index[IQ1S_BLOCK_SIZE/8]; + int8_t shifts[QK_K/IQ1S_BLOCK_SIZE]; for (int ibl = 0; ibl < nbl; ++ibl) { @@ -11572,25 +11591,33 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy } } float best_score = 0, scale = max; - int besti1 = 0, besti2 = 0; + int besti1 = -1, besti2 = -1, best_shift = 0; for (int i1 = 0; i1 <= IQ1S_BLOCK_SIZE; ++i1) { for (int i2 = i1; i2 <= IQ1S_BLOCK_SIZE; ++i2) { - float sumqx = -(sumx[i1] - sumx[0]) + (sumx[IQ1S_BLOCK_SIZE] - sumx[i2]); - float sumq2 = (sumw[i1] - sumw[0]) + (sumw[IQ1S_BLOCK_SIZE] - sumw[i2]); + float sumqx = (sumx[i1] - sumx[0])*x_p[0] + (sumx[i2] - sumx[i1])*x_p[1] + (sumx[IQ1S_BLOCK_SIZE] - sumx[i2])*x_p[2]; + float sumq2 = (sumw[i1] - sumw[0])*x_p[0]*x_p[0] + (sumw[i2] - sumw[i1])*x_p[1]*x_p[1] + (sumw[IQ1S_BLOCK_SIZE] - sumw[i2])*x_p[2]*x_p[2]; if (sumq2 > 0 && sumqx*sumqx > best_score*sumq2) { scale = sumqx/sumq2; best_score = scale*sumqx; - besti1 = i1; besti2 = i2; + besti1 = i1; besti2 = i2; best_shift = 1; + } + sumqx = (sumx[i1] - sumx[0])*x_m[0] + (sumx[i2] - sumx[i1])*x_m[1] + (sumx[IQ1S_BLOCK_SIZE] - sumx[i2])*x_m[2]; + sumq2 = (sumw[i1] - sumw[0])*x_m[0]*x_m[0] + (sumw[i2] - sumw[i1])*x_m[1]*x_m[1] + (sumw[IQ1S_BLOCK_SIZE] - sumw[i2])*x_m[2]*x_m[2]; + if (sumq2 > 0 && sumqx*sumqx > best_score*sumq2) { + scale = sumqx/sumq2; best_score = scale*sumqx; + besti1 = i1; besti2 = i2; best_shift = -1; } } } + GGML_ASSERT(besti1 >= 0 && besti2 >= 0 && best_shift != 0); for (int j = 0; j < besti1; ++j) L[idx[2*j]] = 0; for (int j = besti1; j < besti2; ++j) L[idx[2*j]] = 1; for (int j = besti2; j < IQ1S_BLOCK_SIZE; ++j) L[idx[2*j]] = 2; if (scale < 0) { for (int j = 0; j < IQ1S_BLOCK_SIZE; ++j) L[j] = 2 - L[j]; - scale = -scale; + scale = -scale; best_shift = -best_shift; } bool all_on_grid = true; + const float * xx = best_shift == 1 ? x_p : x_m; for (int k = 0; k < IQ1S_BLOCK_SIZE/8; ++k) { uint16_t u = 0; for (int j = 0; j < 8; ++j) u |= (L[8*k+j] << 2*j); @@ -11598,7 +11625,7 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy if (grid_index < 0) { all_on_grid = false; const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1; - grid_index = iq1_find_best_neighbour2(neighbours, kgrid_q2xs, xb + 8*k, weight + 8*k, scale, L + 8*k, NGRID_IQ1S); + grid_index = iq1_find_best_neighbour2(neighbours, kgrid_q2xs, xb + 8*k, weight + 8*k, scale, xx, L + 8*k, NGRID_IQ1S); GGML_ASSERT(grid_index >= 0); } index[k] = grid_index; @@ -11609,7 +11636,7 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy const int8_t * pg = (const int8_t *)(kgrid_q2xs + index[k]); for (int j = 0; j < 8; ++j) { float w = weight[8*k + j]; - float q = (pg[j] - 3)/2; + float q = xx[(pg[j] - 1)/2]; sumqx += w*q*xb[8*k+j]; sumq2 += w*q*q; } @@ -11624,6 +11651,7 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy y[ibl].qh[ib] = h; GGML_ASSERT(scale >= 0); scales[ib] = scale; + shifts[ib] = best_shift; max_scale = MAX(max_scale, scale); } @@ -11632,12 +11660,13 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy continue; } - float d = max_scale/31; + float d = max_scale/15; y[ibl].d = GGML_FP32_TO_FP16(d*1.125f); // 1.085f is another fudge factor. Don't ask me why it is needed. float id = 1/d; for (int ib = 0; ib < QK_K/IQ1S_BLOCK_SIZE; ++ib) { int l = nearest_int(0.5f*(id*scales[ib]-1)); - l = MAX(0, MIN(15, l)); + l = MAX(0, MIN(7, l)); + if (shifts[ib] == -1) l |= 8; y[ibl].qh[ib] |= (l << 12); } } From 5cdb371731caa2c41fcca42d4d2d43f94f6883b4 Mon Sep 17 00:00:00 2001 From: gliptic Date: Mon, 11 Mar 2024 20:59:03 +0100 Subject: [PATCH 22/48] grammar : fix unnecessarily retained pointer to rules (#6003) --- llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index 98ec147ae..ad7b7b7d4 100644 --- a/llama.cpp +++ b/llama.cpp @@ -10538,7 +10538,7 @@ struct llama_grammar * llama_grammar_init( // loop over alternates of start rule to build initial stacks std::vector> stacks; - pos = rules[start_rule_index]; + pos = vec_rules[start_rule_index].data(); do { std::vector stack; if (!llama_grammar_is_end_of_sequence(pos)) { From 48358b2e5b3983c41ba7e61a493e84d3901dc7b9 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 12 Mar 2024 11:15:05 +0200 Subject: [PATCH 23/48] sycl : update IQ1_S kernels (WIP - not working!) (#5995) * sycl : try to fix after IQ1_S changes * sycl : iq1s_grid -> iq1s_grid_gpu * sycl : fix grid type --- ggml-sycl.cpp | 64 ++++++++++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index 6d5684582..24187e7d5 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -3514,8 +3514,8 @@ static_assert(sizeof(block_iq3_s) == sizeof(ggml_fp16_t) + 13*(QK_K/32) + IQ3S_N #define QI1_S (QK_K / (4*QR1_S)) typedef struct { sycl::half d; - uint8_t qs[QK_K/8]; - uint8_t scales[QK_K/16]; + uint8_t qs[QK_K/8]; + uint16_t qh[QK_K/32]; } block_iq1_s; static_assert(sizeof(block_iq1_s) == sizeof(ggml_fp16_t) + QK_K/8 + QK_K/16, "wrong iq1_s block size/padding"); @@ -4891,10 +4891,9 @@ static void dequantize_block_iq3_s(const void * __restrict__ vx, dst_t * __restr template static void dequantize_block_iq1_s(const void * __restrict__ vx, dst_t * __restrict__ yy, const sycl::nd_item<3> &item_ct1, - const uint64_t *iq1s_grid, + const uint32_t *iq1s_grid, const uint8_t *ksigns_iq2xs, const uint8_t *kmask_iq2xs) { - const int i = item_ct1.get_group(2); const block_iq1_s * x = (const block_iq1_s *) vx; @@ -4903,11 +4902,15 @@ static void dequantize_block_iq1_s(const void * __restrict__ vx, dst_t * __restr const int il = tid/8; // 0...3 const int ib = tid%8; // 0...7 dst_t * y = yy + i*QK_K + 32*ib + 8*il; - const int i8 = 4*ib+il; - uint8_t h = x[i].scales[i8/2] >> 4*(i8%2); - const int8_t * grid = (const int8_t *)(iq1s_grid + (x[i].qs[i8] | ((h & 8) << 5))); - const float d = (float)x[i].d * (2*(h & 7) + 1); - for (int j = 0; j < 8; ++j) y[j] = d * grid[j]; + const uint8_t * qs = x[i].qs + 8*ib; + const uint8_t * grid1 = (const uint8_t *)(iq1s_grid + qs[2*il+0]); + const uint8_t * grid2 = (const uint8_t *)(iq1s_grid + qs[2*il+1]); + const float d = (float)x[i].d * (2*((x[i].qh[ib] >> 12) & 0xf) + 1); + const uint8_t signs = ksigns_iq2xs[(x[i].qh[ib] >> 3*il) & 7]; + for (int j = 0; j < 4; ++j) { + y[j+0] = d * grid1[j] * (signs & kmask_iq2xs[j+0] ? -1.f : 1.f); + y[j+4] = d * grid2[j] * (signs & kmask_iq2xs[j+4] ? -1.f : 1.f); + } #else assert(false); #endif @@ -7803,28 +7806,27 @@ vec_dot_iq3_s_q8_1(const void *__restrict__ vbq, static __dpct_inline__ float vec_dot_iq1_s_q8_1(const void *__restrict__ vbq, const block_q8_1 *__restrict__ bq8_1, const int &iqs, - const uint64_t *iq1s_grid, const uint64_t *ksigns64) { + const uint32_t *iq1s_grid, const uint64_t *ksigns64) { #if QK_K == 256 const block_iq1_s * bq1 = (const block_iq1_s *) vbq; const int ib32 = iqs; - int sumi1 = 0, sumi2 = 0, sumi3 = 0, sumi4 = 0; - const uint8_t h1 = bq1->scales[2*ib32+0]; - const uint8_t h2 = bq1->scales[2*ib32+1]; - const int * q8 = (const int *)bq8_1[ib32].qs; - const int * grid1 = (const int *)(iq1s_grid + (bq1->qs[4*ib32+0] | ((h1 & 0x08) << 5))); - const int * grid2 = (const int *)(iq1s_grid + (bq1->qs[4*ib32+1] | ((h1 & 0x80) << 1))); - const int * grid3 = (const int *)(iq1s_grid + (bq1->qs[4*ib32+2] | ((h2 & 0x08) << 5))); - const int * grid4 = (const int *)(iq1s_grid + (bq1->qs[4*ib32+3] | ((h2 & 0x80) << 1))); - for (int j = 0; j < 2; ++j) { - sumi1 = dpct::dp4a(q8[j+0], grid1[j], sumi1); - sumi2 = dpct::dp4a(q8[j+2], grid2[j], sumi2); - sumi3 = dpct::dp4a(q8[j+4], grid3[j], sumi3); - sumi4 = dpct::dp4a(q8[j+6], grid4[j], sumi4); + const uint8_t * qs = bq1->qs + 4*ib32; + const int8_t * q8 = bq8_1[ib32].qs; + int sumi = 0; + for (int l = 0; l < 4; ++l) { + const uint32_t * grid = (const uint32_t *)(iq1s_grid + qs[l]); + const uint32_t * signs = (const uint32_t *)(ksigns64 + (qs[l] >> 8)); + const int grid_l = dpct::vectorized_binary( + grid[0] ^ signs[0], signs[0], std::minus<>()); + const int grid_h = dpct::vectorized_binary( + grid[1] ^ signs[1], signs[1], std::minus<>()); + sumi = dpct::dp4a(grid_l, *((int *)q8 + 0), sumi); + sumi = dpct::dp4a(grid_h, *((int *)q8 + 1), sumi); + q8 += 8; } - const float d = (float)bq1->d * bq8_1[ib32].ds[0]; - return d * (sumi1 * (2*(h1 & 7) + 1) + sumi2 * (2*((h1 >> 4) & 7) + 1) + - sumi3 * (2*(h2 & 7) + 1) + sumi4 * (2*((h2 >> 4) & 7) + 1)); + const float d = (float)bq1->d * bq8_1[ib32].ds[0] * 0.25f; + return d * sumi; #else assert(false); return 0.f; @@ -8644,7 +8646,7 @@ static void mul_mat_vec_q_iq3_s_q8_1(const void * __restrict__ vx, const void * template static void mul_mat_vec_q_iq1_s_q8_1(const void * __restrict__ vx, const void * __restrict__ vy, float * __restrict__ dst, const int ncols, const int nrows, const sycl::nd_item<3> &item_ct1, - const uint64_t *iq1s_grid_ptr, const uint64_t *ksigns64_ptr ) { + const uint32_t *iq1s_grid_ptr, const uint64_t *ksigns64_ptr ) { const int row = item_ct1.get_group(2) * item_ct1.get_local_range(1) + item_ct1.get_local_id(1); @@ -10406,7 +10408,7 @@ static void dequantize_row_iq1_s_sycl(const void *vx, dst_t *y, const int k, dpct::queue_ptr stream) { const int nb = k / QK_K; { - iq1s_grid.init(*stream); + iq1s_grid_gpu.init(*stream); ksigns_iq2xs.init(*stream); kmask_iq2xs.init(*stream); @@ -10414,7 +10416,7 @@ static void dequantize_row_iq1_s_sycl(const void *vx, dst_t *y, const int k, {sycl::aspect::fp16}); stream->submit([&](sycl::handler &cgh) { - auto iq1s_grid_ptr_ct1 = iq1s_grid.get_ptr(); + auto iq1s_grid_ptr_ct1 = iq1s_grid_gpu.get_ptr(); auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs.get_ptr(); auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs.get_ptr(); @@ -11154,11 +11156,11 @@ static void mul_mat_vec_iq1_s_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - iq1s_grid.init(*stream); + iq1s_grid_gpu.init(*stream); ksigns64.init(*stream); stream->submit([&](sycl::handler &cgh) { - auto iq1s_grid_ptr_ct1 = iq1s_grid.get_ptr(); + auto iq1s_grid_ptr_ct1 = iq1s_grid_gpu.get_ptr(); auto ksigns64_ptr_ct1 = ksigns64.get_ptr(); cgh.parallel_for( From 184215e783dfad76aded2c68244c327a5c507df5 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 12 Mar 2024 13:49:55 +0200 Subject: [PATCH 24/48] ggml : fix UB in IQ2_S and IQ3_S (#6012) --- ggml-quants.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ggml-quants.c b/ggml-quants.c index 06665eb2d..3ad558a1b 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -9025,7 +9025,7 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * restrict s, size_t bs, const void * vld1_s8((const int8_t *)(iq2s_grid + (qs[7] | ((qh[ib32+1] << 2) & 0x300))))); qs += 8; - vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[0] | (signs[1] << 16))); + vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[0] | ((uint32_t) signs[1] << 16))); vs.val[1] = vandq_u8(ggml_vqtbl1q_u8(vs.val[0], mask1.val[1]), mask2); vs.val[0] = vandq_u8(ggml_vqtbl1q_u8(vs.val[0], mask1.val[0]), mask2); vs.val[0] = vceqq_u8(vs.val[0], mask2); @@ -9034,7 +9034,7 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * restrict s, size_t bs, const void * q2s.val[0] = vmulq_s8(vreinterpretq_s8_u8(vorrq_u8(vs.val[0], m1)), q2s.val[0]); q2s.val[1] = vmulq_s8(vreinterpretq_s8_u8(vorrq_u8(vs.val[1], m1)), q2s.val[1]); - vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[2] | (signs[3] << 16))); + vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[2] | ((uint32_t) signs[3] << 16))); vs.val[1] = vandq_u8(ggml_vqtbl1q_u8(vs.val[0], mask1.val[1]), mask2); vs.val[0] = vandq_u8(ggml_vqtbl1q_u8(vs.val[0], mask1.val[0]), mask2); vs.val[0] = vceqq_u8(vs.val[0], mask2); @@ -9105,12 +9105,12 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * restrict s, size_t bs, const void * iq2s_grid[qs[4] | ((qh[ib32+1] << 8) & 0x300)]); qs += 8; - __m256i aux256 = _mm256_set1_epi32(signs[0] | (signs[1] << 16)); + __m256i aux256 = _mm256_set1_epi32(signs[0] | ((uint32_t) signs[1] << 16)); aux256 = _mm256_and_si256(_mm256_shuffle_epi8(aux256,mask1), mask2); const __m256i s2_1 = _mm256_cmpeq_epi8(aux256, mask2); const __m256i q8s_1 = _mm256_sub_epi8(_mm256_xor_si256(s2_1, q8_1), s2_1); - aux256 = _mm256_set1_epi32(signs[2] | (signs[3] << 16)); + aux256 = _mm256_set1_epi32(signs[2] | ((uint32_t) signs[3] << 16)); aux256 = _mm256_and_si256(_mm256_shuffle_epi8(aux256,mask1), mask2); const __m256i s2_2 = _mm256_cmpeq_epi8(aux256, mask2); const __m256i q8s_2 = _mm256_sub_epi8(_mm256_xor_si256(s2_2, q8_2), s2_2); @@ -9386,7 +9386,7 @@ void ggml_vec_dot_iq3_s_q8_K (int n, float * restrict s, size_t bs, const void * iq3s_grid[idx.index[6]], iq3s_grid[idx.index[7]]); - vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[0] | (signs[1] << 16))); + vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[0] | ((uint32_t) signs[1] << 16))); vs.val[1] = vandq_u8(ggml_vqtbl1q_u8(vs.val[0], mask1.val[1]), mask2); vs.val[0] = vandq_u8(ggml_vqtbl1q_u8(vs.val[0], mask1.val[0]), mask2); vs.val[0] = vorrq_u8(vceqq_u8(vs.val[0], mask2), m1); @@ -9395,7 +9395,7 @@ void ggml_vec_dot_iq3_s_q8_K (int n, float * restrict s, size_t bs, const void * q3s.val[0] = vmulq_s8(vreinterpretq_s8_u8(vs.val[0]), vreinterpretq_s8_u32(aux32x4_0)); q3s.val[1] = vmulq_s8(vreinterpretq_s8_u8(vs.val[1]), vreinterpretq_s8_u32(aux32x4_1)); - vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[2] | (signs[3] << 16))); + vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[2] | ((uint32_t) signs[3] << 16))); vs.val[1] = vandq_u8(ggml_vqtbl1q_u8(vs.val[0], mask1.val[1]), mask2); vs.val[0] = vandq_u8(ggml_vqtbl1q_u8(vs.val[0], mask1.val[0]), mask2); vs.val[0] = vorrq_u8(vceqq_u8(vs.val[0], mask2), m1); From 8030da7afea2d89f997aeadbd14183d399a017b9 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 12 Mar 2024 14:27:20 +0200 Subject: [PATCH 25/48] ggml : reuse quantum structs across backends (#5943) * ggml : reuse quant blocks across backends ggml-ci * ggml : define helper constants only for CUDA and SYCL ggml-ci * ggml : define helper quantum constants for SYCL ggml-ci --- ggml-common.h | 409 ++++++++++++++++++++++++++++++++++++++++++++++- ggml-cuda.cu | 244 ++-------------------------- ggml-metal.m | 2 +- ggml-metal.metal | 180 +-------------------- ggml-quants.c | 78 ++++----- ggml-quants.h | 246 +--------------------------- ggml-sycl.cpp | 192 +--------------------- 7 files changed, 470 insertions(+), 881 deletions(-) diff --git a/ggml-common.h b/ggml-common.h index 2402b773c..0257c928c 100644 --- a/ggml-common.h +++ b/ggml-common.h @@ -1,4 +1,408 @@ -#pragma once +#ifndef GGML_COMMON_DECL + +#if defined(GGML_COMMON_DECL_C) +#include + +typedef uint16_t ggml_half; +typedef uint32_t ggml_half2; + +#define GGML_COMMON_AGGR + +#define GGML_COMMON_DECL +#elif defined(GGML_COMMON_DECL_METAL) +#include + +typedef half ggml_half; +typedef half2 ggml_half2; + +#define GGML_COMMON_AGGR + +#define GGML_COMMON_DECL +#elif defined(GGML_COMMON_DECL_CUDA) +#include +#include + +typedef half ggml_half; +typedef half2 ggml_half2; + +#define GGML_COMMON_AGGR data + +#define GGML_COMMON_DECL +#elif defined(GGML_COMMON_DECL_HIP) +#include +#include + +typedef half ggml_half; +typedef half2 ggml_half2; + +#define GGML_COMMON_AGGR data + +#define GGML_COMMON_DECL +#elif defined(GGML_COMMON_DECL_SYCL) +#include +#include + +typedef sycl::half ggml_half; +typedef sycl::half2 ggml_half2; + +#define GGML_COMMON_AGGR data + +#define GGML_COMMON_DECL +#endif + +#if defined(GGML_COMMON_DECL) + +#ifndef __cplusplus +#ifndef static_assert +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201100L) +#define static_assert(cond, msg) _Static_assert(cond, msg) +#else +#define static_assert(cond, msg) struct global_scope_noop_trick +#endif +#endif +#endif // __cplusplus + +// QK = number of values after dequantization +// QK_K = super-block size + +#ifdef GGML_QKK_64 +#define QK_K 64 +#define K_SCALE_SIZE 4 +#else +#define QK_K 256 +#define K_SCALE_SIZE 12 +#endif // GGML_QKK_64 + +#if defined(GGML_COMMON_DECL_CUDA) || defined(GGML_COMMON_DECL_HIP) || defined(GGML_COMMON_DECL_SYCL) +// QR = QK / number of values before dequantization +// QI = number of 32 bit integers before dequantization + +#define QI4_0 (QK4_0 / (4 * QR4_0)) +#define QR4_0 2 + +#define QI4_1 (QK4_1 / (4 * QR4_1)) +#define QR4_1 2 + +#define QI5_0 (QK5_0 / (4 * QR5_0)) +#define QR5_0 2 + +#define QI5_1 (QK5_1 / (4 * QR5_1)) +#define QR5_1 2 + +#define QI8_0 (QK8_0 / (4 * QR8_0)) +#define QR8_0 1 + +#define QI8_1 (QK8_1 / (4 * QR8_1)) +#define QR8_1 1 + +#define QI2_K (QK_K / (4*QR2_K)) +#define QR2_K 4 + +#define QI3_K (QK_K / (4*QR3_K)) +#define QR3_K 4 + +#define QI4_K (QK_K / (4*QR4_K)) +#define QR4_K 2 + +#define QI5_K (QK_K / (4*QR5_K)) +#define QR5_K 2 + +#define QI6_K (QK_K / (4*QR6_K)) +#define QR6_K 2 + +#define QI2_XXS (QK_K / (4*QR2_XXS)) +#define QR2_XXS 8 + +#define QI2_XS (QK_K / (4*QR2_XS)) +#define QR2_XS 8 + +#define QI2_S (QK_K / (4*QR2_S)) +#define QR2_S 8 + +#define QI3_XXS (QK_K / (4*QR3_XXS)) +#define QR3_XXS 8 + +#define QI3_XS (QK_K / (4*QR3_XS)) +#define QR3_XS 8 + +#define QI1_S (QK_K / (4*QR1_S)) +#define QR1_S 8 + +#define QI4_NL (QK4_NL / (4*QR4_NL)) +#define QR4_NL 2 + +#if QK_K == 64 +#define QI4_XS QI4_NL +#define QR4_XS QR4_NL +#else +#define QI4_XS (QK_K / (4*QR4_XS)) +#define QR4_XS 8 +#endif + +#endif // GGML_COMMON_DECL_CUDA || GGML_COMMON_DECL_HIP + +#define QK4_0 32 +typedef struct { + ggml_half d; // delta + uint8_t qs[QK4_0 / 2]; // nibbles / quants +} block_q4_0; +static_assert(sizeof(block_q4_0) == sizeof(ggml_half) + QK4_0 / 2, "wrong q4_0 block size/padding"); + +#define QK4_1 32 +typedef struct { + union { + struct { + ggml_half d; // delta + ggml_half m; // min + } GGML_COMMON_AGGR; + ggml_half2 dm; + }; + uint8_t qs[QK4_1 / 2]; // nibbles / quants +} block_q4_1; +static_assert(sizeof(block_q4_1) == 2 * sizeof(ggml_half) + QK4_1 / 2, "wrong q4_1 block size/padding"); + +#define QK5_0 32 +typedef struct { + ggml_half d; // delta + uint8_t qh[4]; // 5-th bit of quants + uint8_t qs[QK5_0 / 2]; // nibbles / quants +} block_q5_0; +static_assert(sizeof(block_q5_0) == sizeof(ggml_half) + sizeof(uint32_t) + QK5_0 / 2, "wrong q5_0 block size/padding"); + +#define QK5_1 32 +typedef struct { + union { + struct { + ggml_half d; // delta + ggml_half m; // min + } GGML_COMMON_AGGR; + ggml_half2 dm; + }; + uint8_t qh[4]; // 5-th bit of quants + uint8_t qs[QK5_1 / 2]; // nibbles / quants +} block_q5_1; +static_assert(sizeof(block_q5_1) == 2 * sizeof(ggml_half) + sizeof(uint32_t) + QK5_1 / 2, "wrong q5_1 block size/padding"); + +#define QK8_0 32 +typedef struct { + ggml_half d; // delta + int8_t qs[QK8_0]; // quants +} block_q8_0; +static_assert(sizeof(block_q8_0) == sizeof(ggml_half) + QK8_0, "wrong q8_0 block size/padding"); + +#define QK8_1 32 +typedef struct { + union { + struct { + ggml_half d; // delta + ggml_half s; // d * sum(qs[i]) + } GGML_COMMON_AGGR; + ggml_half2 ds; + }; + int8_t qs[QK8_1]; // quants +} block_q8_1; +static_assert(sizeof(block_q8_1) == 2*sizeof(ggml_half) + QK8_1, "wrong q8_1 block size/padding"); + +// +// Super-block quantization structures +// + +// 2-bit quantization +// weight is represented as x = a * q + b +// 16 blocks of 16 elements each +// Effectively 2.625 bits per weight +typedef struct { + uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits + uint8_t qs[QK_K/4]; // quants + union { + struct { + ggml_half d; // super-block scale for quantized scales + ggml_half dmin; // super-block scale for quantized mins + } GGML_COMMON_AGGR; + ggml_half2 dm; + }; +} block_q2_K; +static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_half) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding"); + +// 3-bit quantization +// weight is represented as x = a * q +// 16 blocks of 16 elements each +// Effectively 3.4375 bits per weight +#ifdef GGML_QKK_64 +typedef struct { + uint8_t hmask[QK_K/8]; // quants - high bit + uint8_t qs[QK_K/4]; // quants - low 2 bits + uint8_t scales[2]; + ggml_half d; // super-block scale +} block_q3_K; +static_assert(sizeof(block_q3_K) == sizeof(ggml_half) + QK_K / 4 + QK_K / 8 + 2, "wrong q3_K block size/padding"); +#else +typedef struct { + uint8_t hmask[QK_K/8]; // quants - high bit + uint8_t qs[QK_K/4]; // quants - low 2 bits + uint8_t scales[12]; // scales, quantized with 6 bits + ggml_half d; // super-block scale +} block_q3_K; +static_assert(sizeof(block_q3_K) == sizeof(ggml_half) + QK_K / 4 + QK_K / 8 + 12, "wrong q3_K block size/padding"); +#endif + +// 4-bit quantization +// 8 blocks of 32 elements each +// weight is represented as x = a * q + b +// Effectively 4.5 bits per weight +#ifdef GGML_QKK_64 +typedef struct { + ggml_half d[2]; // super-block scales/mins + uint8_t scales[2]; // 4-bit block scales/mins + uint8_t qs[QK_K/2]; // 4--bit quants +} block_q4_K; +static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_half) + QK_K/2 + 2, "wrong q4_K block size/padding"); +#else +typedef struct { + union { + struct { + ggml_half d; // super-block scale for quantized scales + ggml_half dmin; // super-block scale for quantized mins + } GGML_COMMON_AGGR; + ggml_half2 dm; + }; + uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits + uint8_t qs[QK_K/2]; // 4--bit quants +} block_q4_K; +static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_half) + K_SCALE_SIZE + QK_K/2, "wrong q4_K block size/padding"); +#endif + +// 5-bit quantization +// 8 blocks of 32 elements each +// weight is represented as x = a * q + b +// Effectively 5.5 bits per weight +#ifdef GGML_QKK_64 +typedef struct { + ggml_half d; // super-block scale + int8_t scales[QK_K/16]; // 8-bit block scales + uint8_t qh[QK_K/8]; // quants, high bit + uint8_t qs[QK_K/2]; // quants, low 4 bits +} block_q5_K; +static_assert(sizeof(block_q5_K) == sizeof(ggml_half) + QK_K/2 + QK_K/8 + QK_K/16, "wrong q5_K block size/padding"); +#else +typedef struct { + union { + struct { + ggml_half d; // super-block scale for quantized scales + ggml_half dmin; // super-block scale for quantized mins + } GGML_COMMON_AGGR; + ggml_half2 dm; + }; + uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits + uint8_t qh[QK_K/8]; // quants, high bit + uint8_t qs[QK_K/2]; // quants, low 4 bits +} block_q5_K; +static_assert(sizeof(block_q5_K) == 2*sizeof(ggml_half) + K_SCALE_SIZE + QK_K/2 + QK_K/8, "wrong q5_K block size/padding"); +#endif + +// 6-bit quantization +// weight is represented as x = a * q +// 16 blocks of 16 elements each +// Effectively 6.5625 bits per weight +typedef struct { + uint8_t ql[QK_K/2]; // quants, lower 4 bits + uint8_t qh[QK_K/4]; // quants, upper 2 bits + int8_t scales[QK_K/16]; // scales, quantized with 8 bits + ggml_half d; // super-block scale +} block_q6_K; +static_assert(sizeof(block_q6_K) == sizeof(ggml_half) + QK_K / 16 + 3*QK_K/4, "wrong q6_K block size/padding"); + +// This is only used for intermediate quantization and dot products +typedef struct { + float d; // delta + int8_t qs[QK_K]; // quants + int16_t bsums[QK_K/16]; // sum of quants in groups of 16 +} block_q8_K; +static_assert(sizeof(block_q8_K) == sizeof(float) + QK_K + QK_K/16*sizeof(int16_t), "wrong q8_K block size/padding"); + +// (Almost) "true" 2-bit quantization. +// Due to the need to use blocks as per ggml design, it ends up using +// 2.0625 bpw because of the 16-bit scale for each block of 256. +typedef struct { + ggml_half d; + uint16_t qs[QK_K/8]; +} block_iq2_xxs; +static_assert(sizeof(block_iq2_xxs) == sizeof(ggml_half) + QK_K/8*sizeof(uint16_t), "wrong iq2_xxs block size/padding"); + +// 2.3125 bpw quants +typedef struct { + ggml_half d; + uint16_t qs[QK_K/8]; + uint8_t scales[QK_K/32]; +} block_iq2_xs; +static_assert(sizeof(block_iq2_xs) == sizeof(ggml_half) + QK_K/8*sizeof(uint16_t) + QK_K/32, "wrong iq2_xs block size/padding"); + +// 2.5625 bpw quants +typedef struct { + ggml_half d; + uint8_t qs[QK_K/4]; + uint8_t qh[QK_K/32]; + uint8_t scales[QK_K/32]; +} block_iq2_s; +static_assert(sizeof(block_iq2_s) == sizeof(ggml_half) + QK_K/4 + QK_K/16, "wrong iq2_s block size/padding"); + +// (Almost) "true" 3-bit quantization. +// Due to the need to use blocks as per ggml design, it ends up using +// 3.0625 bpw because of the 16-bit scale for each block of 256. +typedef struct { + ggml_half d; + uint8_t qs[3*QK_K/8]; +} block_iq3_xxs; +static_assert(sizeof(block_iq3_xxs) == sizeof(ggml_half) + 3*(QK_K/8), "wrong iq3_xxs block size/padding"); + +// 3.4375 bpw +#if QK_K == 64 +#define IQ3S_N_SCALE 2 +#else +#define IQ3S_N_SCALE QK_K/64 +#endif +typedef struct { + ggml_half d; + uint8_t qs[QK_K/4]; + uint8_t qh[QK_K/32]; + uint8_t signs[QK_K/8]; + uint8_t scales[IQ3S_N_SCALE]; +} block_iq3_s; +static_assert(sizeof(block_iq3_s) == sizeof(ggml_half) + 13*(QK_K/32) + IQ3S_N_SCALE, "wrong iq3_s block size/padding"); + +typedef struct { + ggml_half d; + uint8_t qs[QK_K/8]; + uint16_t qh[QK_K/32]; +} block_iq1_s; +static_assert(sizeof(block_iq1_s) == sizeof(ggml_half) + QK_K/8 + QK_K/16, "wrong iq1_s block size/padding"); + +// Non-linear quants +#define QK4_NL 32 +typedef struct { + ggml_half d; + uint8_t qs[QK4_NL/2]; +} block_iq4_nl; +static_assert(sizeof(block_iq4_nl) == sizeof(ggml_half) + QK4_NL/2, "wrong iq4_nl block size/padding"); + +#if QK_K == 64 +#define block_iq4_xs block_iq4_nl +#else +typedef struct { + ggml_half d; + uint16_t scales_h; + uint8_t scales_l[QK_K/64]; + uint8_t qs[QK_K/2]; +} block_iq4_xs; +static_assert(sizeof(block_iq4_xs) == sizeof(ggml_half) + sizeof(uint16_t) + QK_K/64 + QK_K/2, "wrong iq4_xs block size/padding"); +#endif + +#endif // GGML_COMMON_DECL +#endif // GGML_COMMON_DECL + +//////////////////////////////////////////////////////////////////////////////// + +#ifndef GGML_COMMON_IMPL #if defined(GGML_COMMON_IMPL_C) #include @@ -14,7 +418,7 @@ #define GGML_TABLE_END() }; #define GGML_COMMON_IMPL -#elif defined(GGML_COMMON_IMPL_CUDA) +#elif defined(GGML_COMMON_IMPL_CUDA) || defined(GGML_COMMON_IMPL_HIP) #include #define GGML_TABLE_BEGIN(type, name, size) static const __device__ type name[size] = { @@ -1423,3 +1827,4 @@ GGML_TABLE_END() #endif #endif // GGML_COMMON_IMPL +#endif // GGML_COMMON_IMPL diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 01b1f15ee..b8834ed05 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -2,7 +2,13 @@ #include "ggml.h" #include "ggml-backend-impl.h" +#if defined(GGML_USE_HIPBLAS) +#define GGML_COMMON_DECL_HIP +#define GGML_COMMON_IMPL_HIP +#else +#define GGML_COMMON_DECL_CUDA #define GGML_COMMON_IMPL_CUDA +#endif #include "ggml-common.h" #include @@ -359,66 +365,6 @@ typedef void (*ggml_cuda_op_flatten_t)( const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const float * src0_dd, const float * src1_dd, float * dst_dd, cudaStream_t main_stream); -// QK = number of values after dequantization -// QR = QK / number of values before dequantization -// QI = number of 32 bit integers before dequantization - -#define QK4_0 32 -#define QR4_0 2 -#define QI4_0 (QK4_0 / (4 * QR4_0)) -typedef struct { - half d; // delta - uint8_t qs[QK4_0 / 2]; // nibbles / quants -} block_q4_0; -static_assert(sizeof(block_q4_0) == sizeof(ggml_fp16_t) + QK4_0 / 2, "wrong q4_0 block size/padding"); - -#define QK4_1 32 -#define QR4_1 2 -#define QI4_1 (QK4_1 / (4 * QR4_1)) -typedef struct { - half2 dm; // dm.x = delta, dm.y = min - uint8_t qs[QK4_1 / 2]; // nibbles / quants -} block_q4_1; -static_assert(sizeof(block_q4_1) == sizeof(ggml_fp16_t) * 2 + QK4_1 / 2, "wrong q4_1 block size/padding"); - -#define QK5_0 32 -#define QR5_0 2 -#define QI5_0 (QK5_0 / (4 * QR5_0)) -typedef struct { - half d; // delta - uint8_t qh[4]; // 5-th bit of quants - uint8_t qs[QK5_0 / 2]; // nibbles / quants -} block_q5_0; -static_assert(sizeof(block_q5_0) == sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_0 / 2, "wrong q5_0 block size/padding"); - -#define QK5_1 32 -#define QR5_1 2 -#define QI5_1 (QK5_1 / (4 * QR5_1)) -typedef struct { - half2 dm; // dm.x = delta, dm.y = min - uint8_t qh[4]; // 5-th bit of quants - uint8_t qs[QK5_1 / 2]; // nibbles / quants -} block_q5_1; -static_assert(sizeof(block_q5_1) == 2 * sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_1 / 2, "wrong q5_1 block size/padding"); - -#define QK8_0 32 -#define QR8_0 1 -#define QI8_0 (QK8_0 / (4 * QR8_0)) -typedef struct { - half d; // delta - int8_t qs[QK8_0]; // quants -} block_q8_0; -static_assert(sizeof(block_q8_0) == sizeof(ggml_fp16_t) + QK8_0, "wrong q8_0 block size/padding"); - -#define QK8_1 32 -#define QR8_1 1 -#define QI8_1 (QK8_1 / (4 * QR8_1)) -typedef struct { - half2 ds; // ds.x = delta, ds.y = sum - int8_t qs[QK8_0]; // quants -} block_q8_1; -static_assert(sizeof(block_q8_1) == 2*sizeof(ggml_fp16_t) + QK8_0, "wrong q8_1 block size/padding"); - typedef float (*vec_dot_q_cuda_t)(const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & iqs); typedef void (*allocate_tiles_cuda_t)(int ** x_ql, half2 ** x_dm, int ** x_qh, int ** x_sc); typedef void (*load_tiles_cuda_t)( @@ -428,174 +374,6 @@ typedef float (*vec_dot_q_mul_mat_cuda_t)( const int * __restrict__ x_ql, const half2 * __restrict__ x_dm, const int * __restrict__ x_qh, const int * __restrict__ x_sc, const int * __restrict__ y_qs, const half2 * __restrict__ y_ms, const int & i, const int & j, const int & k); -//================================= k-quants - -#ifdef GGML_QKK_64 -#define QK_K 64 -#define K_SCALE_SIZE 4 -#else -#define QK_K 256 -#define K_SCALE_SIZE 12 -#endif - -#define QR2_K 4 -#define QI2_K (QK_K / (4*QR2_K)) -typedef struct { - uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits - uint8_t qs[QK_K/4]; // quants - half2 dm; // super-block scale for quantized scales/mins -} block_q2_K; -static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_fp16_t) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding"); - -#define QR3_K 4 -#define QI3_K (QK_K / (4*QR3_K)) -typedef struct { - uint8_t hmask[QK_K/8]; // quants - high bit - uint8_t qs[QK_K/4]; // quants - low 2 bits -#ifdef GGML_QKK_64 - uint8_t scales[2]; // scales, quantized with 8 bits -#else - uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits -#endif - half d; // super-block scale -} block_q3_K; -//static_assert(sizeof(block_q3_K) == sizeof(ggml_fp16_t) + QK_K / 4 + QK_K / 8 + K_SCALE_SIZE, "wrong q3_K block size/padding"); - -#define QR4_K 2 -#define QI4_K (QK_K / (4*QR4_K)) -#ifdef GGML_QKK_64 -typedef struct { - half dm[2]; // super-block scales/mins - uint8_t scales[2]; // 4-bit block scales/mins - uint8_t qs[QK_K/2]; // 4--bit quants -} block_q4_K; -static_assert(sizeof(block_q4_K) == sizeof(half2) + QK_K/2 + 2, "wrong q4_K block size/padding"); -#else -typedef struct { - half2 dm; // super-block scale for quantized scales/mins - uint8_t scales[3*QK_K/64]; // scales, quantized with 6 bits - uint8_t qs[QK_K/2]; // 4--bit quants -} block_q4_K; -static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_fp16_t) + 3*QK_K/64 + QK_K/2, "wrong q4_K block size/padding"); -#endif - -#define QR5_K 2 -#define QI5_K (QK_K / (4*QR5_K)) -#ifdef GGML_QKK_64 -typedef struct { - half d; // super-block scale - int8_t scales[QK_K/16]; // block scales - uint8_t qh[QK_K/8]; // quants, high bit - uint8_t qs[QK_K/2]; // quants, low 4 bits -} block_q5_K; -static_assert(sizeof(block_q5_K) == sizeof(ggml_fp16_t) + QK_K/2 + QK_K/8 + QK_K/16, "wrong q5_K block size/padding"); -#else -typedef struct { - half2 dm; // super-block scale for quantized scales/mins - uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits - uint8_t qh[QK_K/8]; // quants, high bit - uint8_t qs[QK_K/2]; // quants, low 4 bits -} block_q5_K; -static_assert(sizeof(block_q5_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/2 + QK_K/8, "wrong q5_K block size/padding"); -#endif - -#define QR6_K 2 -#define QI6_K (QK_K / (4*QR6_K)) -typedef struct { - uint8_t ql[QK_K/2]; // quants, lower 4 bits - uint8_t qh[QK_K/4]; // quants, upper 2 bits - int8_t scales[QK_K/16]; // scales - half d; // delta -} block_q6_K; -static_assert(sizeof(block_q6_K) == sizeof(ggml_fp16_t) + 13*QK_K/16, "wrong q6_K block size/padding"); - -#define QR2_XXS 8 -#define QI2_XXS (QK_K / (4*QR2_XXS)) -typedef struct { - half d; - uint16_t qs[QK_K/8]; -} block_iq2_xxs; -static_assert(sizeof(block_iq2_xxs) == sizeof(ggml_fp16_t) + QK_K/8*sizeof(uint16_t), "wrong iq2_xxs block size/padding"); - -#define QR2_XS 8 -#define QI2_XS (QK_K / (4*QR2_XS)) -typedef struct { - half d; - uint16_t qs[QK_K/8]; - uint8_t scales[QK_K/32]; -} block_iq2_xs; -static_assert(sizeof(block_iq2_xs) == sizeof(ggml_fp16_t) + QK_K/8*sizeof(uint16_t) + QK_K/32, "wrong iq2_xs block size/padding"); - -// 2.5625 bpw quants -#define QR2_S 8 -#define QI2_S (QK_K / (4*QR2_S)) -typedef struct { - half d; - uint8_t qs[QK_K/4]; - uint8_t qh[QK_K/32]; - uint8_t scales[QK_K/32]; -} block_iq2_s; -static_assert(sizeof(block_iq2_s) == sizeof(ggml_fp16_t) + QK_K/4 + QK_K/16, "wrong iq2_s block size/padding"); - -#define QR3_XXS 8 -#define QI3_XXS (QK_K / (4*QR3_XXS)) -typedef struct { - half d; - uint8_t qs[3*(QK_K/8)]; -} block_iq3_xxs; -static_assert(sizeof(block_iq3_xxs) == sizeof(ggml_fp16_t) + 3*(QK_K/8), "wrong iq3_xxs block size/padding"); - -#define QR3_XS 8 -#define QI3_XS (QK_K / (4*QR3_XS)) -#if QK_K == 64 -#define IQ3S_N_SCALE 2 -#else -#define IQ3S_N_SCALE QK_K/64 -#endif -typedef struct { - half d; - uint8_t qs[QK_K/4]; - uint8_t qh[QK_K/32]; - uint8_t signs[QK_K/8]; - uint8_t scales[IQ3S_N_SCALE]; -} block_iq3_s; -static_assert(sizeof(block_iq3_s) == sizeof(ggml_fp16_t) + 13*(QK_K/32) + IQ3S_N_SCALE, "wrong iq3_s block size/padding"); - -#define QR1_S 8 -#define QI1_S (QK_K / (4*QR1_S)) -typedef struct { - half d; - uint8_t qs[QK_K/8]; - uint16_t qh[QK_K/32]; -} block_iq1_s; -static_assert(sizeof(block_iq1_s) == sizeof(ggml_fp16_t) + QK_K/8 + QK_K/16, "wrong iq1_s block size/padding"); - -#define QK4_NL 32 -#define QR4_NL 2 -#define QI4_NL (QK4_NL / (4*QR4_NL)) -typedef struct { - half d; - uint8_t qs[QK4_NL/2]; -} block_iq4_nl; -static_assert(sizeof(block_iq4_nl) == sizeof(ggml_fp16_t) + QK4_NL/2, "wrong iq4_nl block size/padding"); - -#if QK_K == 64 -#define block_iq4_xs block_iq4_nl -#define QR4_XS QR4_NL -#define QI4_XS QI4_NL -#else -// QR4_XS = 8 is very slightly faster than QR4_XS = 4 -#define QR4_XS 8 -#define QI4_XS (QK_K / (4*QR4_XS)) -typedef struct { - half d; - uint16_t scales_h; - uint8_t scales_l[QK_K/64]; - uint8_t qs[QK_K/2]; -} block_iq4_xs; -static_assert(sizeof(block_iq4_xs) == sizeof(ggml_fp16_t) + sizeof(uint16_t) + QK_K/64 + QK_K/2, "wrong iq4_xs block size/padding"); -#endif - #define WARP_SIZE 32 #define MATRIX_ROW_PADDING 512 // last row of quant. matrices is a multiple of this to avoid out-of-bounds memory accesses @@ -3570,7 +3348,7 @@ static __device__ __forceinline__ float vec_dot_q2_K_q8_1( #pragma unroll for (int i = 0; i < QR2_K; ++ i) { u[i] = get_int_from_int8_aligned(bq8_1[bq8_offset + i].qs, iqs % QI8_1); - d8[i] = __low2half(bq8_1[bq8_offset + i].ds); + d8[i] = __low2float(bq8_1[bq8_offset + i].ds); } return vec_dot_q2_K_q8_1_impl_mmvq(v, u, scales, bq2_K->dm, d8); @@ -3692,7 +3470,7 @@ static __device__ __forceinline__ float vec_dot_q3_K_q8_1( #pragma unroll for (int i = 0; i < QR3_K; ++i) { u[i] = get_int_from_int8_aligned(bq8_1[bq8_offset + i].qs, iqs % QI8_1); - d8[i] = __low2half(bq8_1[bq8_offset + i].ds); + d8[i] = __low2float(bq8_1[bq8_offset + i].ds); } return vec_dot_q3_K_q8_1_impl_mmvq(vl, vh, u, bq3_K->scales, scale_offset, d, d8); @@ -3861,7 +3639,7 @@ static __device__ __forceinline__ float vec_dot_q4_K_q8_1( for (int i = 0; i < QR4_K; ++i) { const block_q8_1 * bq8i = bq8_1 + bq8_offset + i; - d8[i] = __low2half(bq8i->ds); + d8[i] = __low2float(bq8i->ds); const int * q8 = (const int *)bq8i->qs + ((iqs/2)%4); u[2*i+0] = q8[0]; @@ -4226,7 +4004,7 @@ static __device__ __forceinline__ float vec_dot_q6_K_q8_1( #pragma unroll for (int i = 0; i < QR6_K; ++i) { u[i] = get_int_from_int8_aligned(bq8_1[bq8_offset + 2*i].qs, iqs % QI8_1); - d8[i] = __low2half(bq8_1[bq8_offset + 2*i].ds); + d8[i] = __low2float(bq8_1[bq8_offset + 2*i].ds); } return vec_dot_q6_K_q8_1_impl_mmvq(vl, vh, u, scales, bq6_K->d, d8); @@ -4763,7 +4541,7 @@ static __device__ __forceinline__ void mul_mat_q( *dsi_dst = *dsi_src; } else { float * dfi_dst = (float *) dsi_dst; - *dfi_dst = __low2half(*dsi_src); + *dfi_dst = __low2float(*dsi_src); } } diff --git a/ggml-metal.m b/ggml-metal.m index 3cf80de7b..1825d3320 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -336,7 +336,7 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) { NSMutableDictionary * prep = [NSMutableDictionary dictionary]; #ifdef GGML_QKK_64 - prep[@"QK_K"] = @(64); + prep[@"GGML_QKK_64"] = @(1); #endif MTLCompileOptions* options = [MTLCompileOptions new]; diff --git a/ggml-metal.metal b/ggml-metal.metal index 21298a8c2..ebf2f5b47 100644 --- a/ggml-metal.metal +++ b/ggml-metal.metal @@ -1,3 +1,7 @@ +#define GGML_COMMON_DECL_METAL +#define GGML_COMMON_IMPL_METAL +#include "ggml-common.h" + #include #define GGML_COMMON_IMPL_METAL @@ -9,41 +13,6 @@ using namespace metal; #define MIN(x, y) ((x) < (y) ? (x) : (y)) #define SWAP(x, y) { auto tmp = (x); (x) = (y); (y) = tmp; } -#define QK4_0 32 -#define QR4_0 2 -typedef struct { - half d; // delta - uint8_t qs[QK4_0 / 2]; // nibbles / quants -} block_q4_0; - -#define QK4_1 32 -typedef struct { - half d; // delta - half m; // min - uint8_t qs[QK4_1 / 2]; // nibbles / quants -} block_q4_1; - -#define QK5_0 32 -typedef struct { - half d; // delta - uint8_t qh[4]; // 5-th bit of quants - uint8_t qs[QK5_0 / 2]; // nibbles / quants -} block_q5_0; - -#define QK5_1 32 -typedef struct { - half d; // delta - half m; // min - uint8_t qh[4]; // 5-th bit of quants - uint8_t qs[QK5_1 / 2]; // nibbles / quants -} block_q5_1; - -#define QK8_0 32 -typedef struct { - half d; // delta - int8_t qs[QK8_0]; // quants -} block_q8_0; - #define N_SIMDWIDTH 32 // assuming SIMD group size is 32 enum ggml_sort_order { @@ -2478,147 +2447,6 @@ kernel void kernel_concat( } } -//============================================ k-quants ====================================================== - -#ifndef QK_K -#define QK_K 256 -#else -static_assert(QK_K == 256 || QK_K == 64, "QK_K must be 256 or 64"); -#endif - -#if QK_K == 256 -#define K_SCALE_SIZE 12 -#else -#define K_SCALE_SIZE 4 -#endif - -typedef struct { - uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits - uint8_t qs[QK_K/4]; // quants - half d; // super-block scale for quantized scales - half dmin; // super-block scale for quantized mins -} block_q2_K; -// 84 bytes / block - -typedef struct { - uint8_t hmask[QK_K/8]; // quants - high bit - uint8_t qs[QK_K/4]; // quants - low 2 bits -#if QK_K == 64 - uint8_t scales[2]; -#else - uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits -#endif - half d; // super-block scale -} block_q3_K; - -#if QK_K == 64 -typedef struct { - half d[2]; // super-block scales/mins - uint8_t scales[2]; - uint8_t qs[QK_K/2]; // 4-bit quants -} block_q4_K; -#else -typedef struct { - half d; // super-block scale for quantized scales - half dmin; // super-block scale for quantized mins - uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits - uint8_t qs[QK_K/2]; // 4--bit quants -} block_q4_K; -#endif - -#if QK_K == 64 -typedef struct { - half d; // super-block scales/mins - int8_t scales[QK_K/16]; // 8-bit block scales - uint8_t qh[QK_K/8]; // quants, high bit - uint8_t qs[QK_K/2]; // quants, low 4 bits -} block_q5_K; -#else -typedef struct { - half d; // super-block scale for quantized scales - half dmin; // super-block scale for quantized mins - uint8_t scales[3*QK_K/64]; // scales and mins, quantized with 6 bits - uint8_t qh[QK_K/8]; // quants, high bit - uint8_t qs[QK_K/2]; // quants, low 4 bits -} block_q5_K; -// 176 bytes / block -#endif - -typedef struct { - uint8_t ql[QK_K/2]; // quants, lower 4 bits - uint8_t qh[QK_K/4]; // quants, upper 2 bits - int8_t scales[QK_K/16]; // scales, quantized with 8 bits - half d; // super-block scale -} block_q6_K; -// 210 bytes / block - -typedef struct { - half d; - uint16_t qs[QK_K/8]; -} block_iq2_xxs; -// 66 bytes / block for QK_K = 256, so 2.0625 bpw - -typedef struct { - half d; - uint16_t qs[QK_K/8]; - uint8_t scales[QK_K/32]; -} block_iq2_xs; -// 74 bytes / block for QK_K = 256, so 2.3125 bpw - -// 2.5625 bpw quants -typedef struct { - half d; - uint8_t qs[QK_K/4]; - uint8_t qh[QK_K/32]; - uint8_t scales[QK_K/32]; -} block_iq2_s; - -typedef struct { - half d; - uint8_t qs[3*QK_K/8]; -} block_iq3_xxs; -// 98 bytes / block for QK_K = 256, so 3.0625 bpw - -// 3.4375 bpw -#if QK_K == 64 -#define IQ3S_N_SCALE 2 -#else -#define IQ3S_N_SCALE QK_K/64 -#endif -typedef struct { - half d; - uint8_t qs[QK_K/4]; - uint8_t qh[QK_K/32]; - uint8_t signs[QK_K/8]; - uint8_t scales[IQ3S_N_SCALE]; -} block_iq3_s; - -typedef struct { - half d; - uint8_t qs[QK_K/8]; - uint16_t qh[QK_K/32]; -} block_iq1_s; - -// Non-linear quants -#define QK4_NL 32 -typedef struct { - half d; - uint8_t qs[QK4_NL/2]; -} block_iq4_nl; - -#if QK_K == 64 -#define block_iq4_xs block_iq4_nl -#else -typedef struct { - half d; - uint16_t scales_h; - uint8_t scales_l[QK_K/64]; - uint8_t qs[QK_K/2]; -} block_iq4_xs; -#endif - -//====================================== dot products ========================= - void kernel_mul_mv_q2_K_f32_impl( device const void * src0, device const float * src1, diff --git a/ggml-quants.c b/ggml-quants.c index 3ad558a1b..109dd6660 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -1,3 +1,6 @@ +#define GGML_COMMON_IMPL_C +#include "ggml-common.h" + #include "ggml-quants.h" #include "ggml-impl.h" @@ -951,7 +954,7 @@ void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict const float d = amax / ((1 << 7) - 1); const float id = d ? 1.0f/d : 0.0f; - y[i].d = d; + y[i].d = GGML_FP32_TO_FP16(d); int sum = 0; @@ -966,7 +969,7 @@ void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict sum += y[i].qs[QK8_1/2 + j]; } - y[i].s = sum*d; + y[i].s = GGML_FP32_TO_FP16(sum*d); } } @@ -994,7 +997,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) { const float d = amax / ((1 << 7) - 1); const float id = d ? 1.0f/d : 0.0f; - y[i].d = d; + y[i].d = GGML_FP32_TO_FP16(d); int32x4_t accv = vdupq_n_s32(0); @@ -1010,7 +1013,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) { accv = vaddq_s32(accv, vi); } - y[i].s = d * vaddvq_s32(accv); + y[i].s = GGML_FP32_TO_FP16(d * vaddvq_s32(accv)); } #elif defined(__wasm_simd128__) for (int i = 0; i < nb; i++) { @@ -1033,7 +1036,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) { const float d = amax / ((1 << 7) - 1); const float id = d ? 1.0f/d : 0.0f; - y[i].d = d; + y[i].d = GGML_FP32_TO_FP16(d); v128_t accv = wasm_i32x4_splat(0); @@ -1049,10 +1052,11 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) { accv = wasm_i32x4_add(accv, vi); } - y[i].s = d * (wasm_i32x4_extract_lane(accv, 0) + - wasm_i32x4_extract_lane(accv, 1) + - wasm_i32x4_extract_lane(accv, 2) + - wasm_i32x4_extract_lane(accv, 3)); + y[i].s = GGML_FP32_TO_FP16( + d * (wasm_i32x4_extract_lane(accv, 0) + + wasm_i32x4_extract_lane(accv, 1) + + wasm_i32x4_extract_lane(accv, 2) + + wasm_i32x4_extract_lane(accv, 3))); } #elif defined(__AVX2__) || defined(__AVX__) for (int i = 0; i < nb; i++) { @@ -1077,7 +1081,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) { // Quantize these floats const float d = maxScalar / 127.f; - y[i].d = d; + y[i].d = GGML_FP32_TO_FP16(d); const float id = ( maxScalar != 0.0f ) ? 127.f / maxScalar : 0.0f; const __m256 mul = _mm256_set1_ps( id ); @@ -1101,7 +1105,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) { #if defined(__AVX2__) // Compute the sum of the quants and set y[i].s - y[i].s = d * hsum_i32_8(_mm256_add_epi32(_mm256_add_epi32(i0, i1), _mm256_add_epi32(i2, i3))); + y[i].s = GGML_FP32_TO_FP16(d * hsum_i32_8(_mm256_add_epi32(_mm256_add_epi32(i0, i1), _mm256_add_epi32(i2, i3)))); // Convert int32 to int16 i0 = _mm256_packs_epi32( i0, i1 ); // 0, 1, 2, 3, 8, 9, 10, 11, 4, 5, 6, 7, 12, 13, 14, 15 @@ -1131,7 +1135,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) { // Compute the sum of the quants and set y[i].s const __m128i s0 = _mm_add_epi32(_mm_add_epi32(ni0, ni1), _mm_add_epi32(ni2, ni3)); const __m128i s1 = _mm_add_epi32(_mm_add_epi32(ni4, ni5), _mm_add_epi32(ni6, ni7)); - y[i].s = d * hsum_i32_4(_mm_add_epi32(s0, s1)); + y[i].s = GGML_FP32_TO_FP16(d * hsum_i32_4(_mm_add_epi32(s0, s1))); // Convert int32 to int16 ni0 = _mm_packs_epi32( ni0, ni1 ); @@ -1162,7 +1166,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) { const float d = amax / ((1 << 7) - 1); const float id = d ? 1.0f/d : 0.0f; - y[i].d = d; + y[i].d = GGML_FP32_TO_FP16(d); vfloat32m4_t x0 = __riscv_vfmul_vf_f32m4(v_x, id, vl); @@ -1179,7 +1183,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) { // set y[i].s int sum = __riscv_vmv_x_s_i16m1_i16(vwrs); - y[i].s = sum*d; + y[i].s = GGML_FP32_TO_FP16(sum*d); } #else GGML_UNUSED(nb); @@ -4019,10 +4023,10 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r const block_q8_1 * restrict b_y0 = &vy0[i]; const block_q8_1 * restrict b_y1 = &vy1[i]; - float32x4_t summs_t = {GGML_FP16_TO_FP32(b_x0->m) * b_y0->s, - GGML_FP16_TO_FP32(b_x1->m) * b_y0->s, - GGML_FP16_TO_FP32(b_x0->m) * b_y1->s, - GGML_FP16_TO_FP32(b_x1->m) * b_y1->s}; + float32x4_t summs_t = {GGML_FP16_TO_FP32(b_x0->m) * GGML_FP16_TO_FP32(b_y0->s), + GGML_FP16_TO_FP32(b_x1->m) * GGML_FP16_TO_FP32(b_y0->s), + GGML_FP16_TO_FP32(b_x0->m) * GGML_FP16_TO_FP32(b_y1->s), + GGML_FP16_TO_FP32(b_x1->m) * GGML_FP16_TO_FP32(b_y1->s)}; summs0 += summs_t; const uint8x16_t m4b = vdupq_n_u8(0x0F); @@ -4087,7 +4091,7 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r const block_q8_1 * restrict y0 = &y[i + 0]; const block_q8_1 * restrict y1 = &y[i + 1]; - summs += GGML_FP16_TO_FP32(x0->m) * y0->s + GGML_FP16_TO_FP32(x1->m) * y1->s; + summs += GGML_FP16_TO_FP32(x0->m) * GGML_FP16_TO_FP32(y0->s) + GGML_FP16_TO_FP32(x1->m) * GGML_FP16_TO_FP32(y1->s); const uint8x16_t m4b = vdupq_n_u8(0x0F); @@ -4110,8 +4114,8 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r const int32x4_t p_0 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), v0_0l, v1_0l), v0_0h, v1_0h); const int32x4_t p_1 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), v0_1l, v1_1l), v0_1h, v1_1h); - sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(p_0), GGML_FP16_TO_FP32(x0->d)*y0->d); - sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(p_1), GGML_FP16_TO_FP32(x1->d)*y1->d); + sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(p_0), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d)); + sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(p_1), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d)); } *s = vaddvq_f32(sumv0) + vaddvq_f32(sumv1) + summs; @@ -4124,9 +4128,9 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r // Main loop for (int i = 0; i < nb; ++i) { const float d0 = GGML_FP16_TO_FP32(x[i].d); - const float d1 = y[i].d; + const float d1 = GGML_FP16_TO_FP32(y[i].d); - summs += GGML_FP16_TO_FP32(x[i].m) * y[i].s; + summs += GGML_FP16_TO_FP32(x[i].m) * GGML_FP16_TO_FP32(y[i].s); const __m256 d0v = _mm256_set1_ps( d0 ); const __m256 d1v = _mm256_set1_ps( d1 ); @@ -4178,7 +4182,7 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r int sumi = __riscv_vmv_x_s_i32m1_i32(vs2); - sumf += (GGML_FP16_TO_FP32(x[i].d)*y[i].d)*sumi + GGML_FP16_TO_FP32(x[i].m)*y[i].s; + sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d))*sumi + GGML_FP16_TO_FP32(x[i].m)*GGML_FP16_TO_FP32(y[i].s); } *s = sumf; @@ -4196,7 +4200,7 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r sumi += (v0 * y[i].qs[j]) + (v1 * y[i].qs[j + qk/2]); } - sumf += (GGML_FP16_TO_FP32(x[i].d)*y[i].d)*sumi + GGML_FP16_TO_FP32(x[i].m)*y[i].s; + sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d))*sumi + GGML_FP16_TO_FP32(x[i].m)*GGML_FP16_TO_FP32(y[i].s); } *s = sumf; @@ -4532,8 +4536,8 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r const uint8x16_t m4b = vdupq_n_u8(0x0F); - summs0 += GGML_FP16_TO_FP32(x0->m) * y0->s; - summs1 += GGML_FP16_TO_FP32(x1->m) * y1->s; + summs0 += GGML_FP16_TO_FP32(x0->m) * GGML_FP16_TO_FP32(y0->s); + summs1 += GGML_FP16_TO_FP32(x1->m) * GGML_FP16_TO_FP32(y1->s); // extract the 5th bit via lookup table ((b) << 4) memcpy(&qh0, x0->qh, sizeof(qh0)); @@ -4577,10 +4581,10 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32( ggml_vdotq_s32(vdupq_n_s32(0), v0_0lf, v1_0l), - ggml_vdotq_s32(vdupq_n_s32(0), v0_0hf, v1_0h))), GGML_FP16_TO_FP32(x0->d)*y0->d); + ggml_vdotq_s32(vdupq_n_s32(0), v0_0hf, v1_0h))), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d)); sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32( ggml_vdotq_s32(vdupq_n_s32(0), v0_1lf, v1_1l), - ggml_vdotq_s32(vdupq_n_s32(0), v0_1hf, v1_1h))), GGML_FP16_TO_FP32(x1->d)*y1->d); + ggml_vdotq_s32(vdupq_n_s32(0), v0_1hf, v1_1h))), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d)); } *s = vaddvq_f32(sumv0) + vaddvq_f32(sumv1) + summs0 + summs1; @@ -4597,7 +4601,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r const block_q5_1 * restrict x0 = &x[i]; const block_q8_1 * restrict y0 = &y[i]; - summs += GGML_FP16_TO_FP32(x0->m) * y0->s; + summs += GGML_FP16_TO_FP32(x0->m) * GGML_FP16_TO_FP32(y0->s); const v128_t m4b = wasm_i8x16_splat(0x0F); @@ -4644,7 +4648,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r wasm_i32x4_dot_i16x8(v0lfh, v1lh)), wasm_i32x4_add(wasm_i32x4_dot_i16x8(v0hfl, v1hl), wasm_i32x4_dot_i16x8(v0hfh, v1hh)))), - wasm_f32x4_splat(GGML_FP16_TO_FP32(x0->d) * y0->d))); + wasm_f32x4_splat(GGML_FP16_TO_FP32(x0->d) * GGML_FP16_TO_FP32(y0->d)))); } *s = wasm_f32x4_extract_lane(sumv, 0) + wasm_f32x4_extract_lane(sumv, 1) + @@ -4659,14 +4663,14 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r for (int i = 0; i < nb; i++) { const __m256 dx = _mm256_set1_ps(GGML_FP16_TO_FP32(x[i].d)); - summs += GGML_FP16_TO_FP32(x[i].m) * y[i].s; + summs += GGML_FP16_TO_FP32(x[i].m) * GGML_FP16_TO_FP32(y[i].s); __m256i qx = bytes_from_nibbles_32(x[i].qs); __m256i bxhi = bytes_from_bits_32(x[i].qh); bxhi = _mm256_and_si256(bxhi, _mm256_set1_epi8(0x10)); qx = _mm256_or_si256(qx, bxhi); - const __m256 dy = _mm256_set1_ps(y[i].d); + const __m256 dy = _mm256_set1_ps(GGML_FP16_TO_FP32(y[i].d)); const __m256i qy = _mm256_loadu_si256((const __m256i *)y[i].qs); const __m256 q = mul_sum_us8_pairs_float(qx, qy); @@ -4686,7 +4690,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r for (int i = 0; i < nb; i++) { const __m256 dx = _mm256_set1_ps(GGML_FP16_TO_FP32(x[i].d)); - summs += GGML_FP16_TO_FP32(x[i].m) * y[i].s; + summs += GGML_FP16_TO_FP32(x[i].m) * GGML_FP16_TO_FP32(y[i].s); __m256i bx_0 = bytes_from_nibbles_32(x[i].qs); const __m256i bxhi = bytes_from_bits_32(x[i].qh); @@ -4700,7 +4704,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r bxh = _mm_or_si128(bxh, bxhih); bx_0 = MM256_SET_M128I(bxh, bxl); - const __m256 dy = _mm256_set1_ps(y[i].d); + const __m256 dy = _mm256_set1_ps(GGML_FP16_TO_FP32(y[i].d)); const __m256i by_0 = _mm256_loadu_si256((const __m256i *)y[i].qs); const __m256 q = mul_sum_us8_pairs_float(bx_0, by_0); @@ -4767,7 +4771,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r int sumi = __riscv_vmv_x_s_i32m1_i32(vs2); - sumf += (GGML_FP16_TO_FP32(x[i].d)*y[i].d)*sumi + GGML_FP16_TO_FP32(x[i].m)*y[i].s; + sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d))*sumi + GGML_FP16_TO_FP32(x[i].m)*GGML_FP16_TO_FP32(y[i].s); } *s = sumf; @@ -4791,7 +4795,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r sumi += (x0 * y[i].qs[j]) + (x1 * y[i].qs[j + qk/2]); } - sumf += (GGML_FP16_TO_FP32(x[i].d)*y[i].d)*sumi + GGML_FP16_TO_FP32(x[i].m)*y[i].s; + sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d))*sumi + GGML_FP16_TO_FP32(x[i].m)*GGML_FP16_TO_FP32(y[i].s); } *s = sumf; diff --git a/ggml-quants.h b/ggml-quants.h index 74aabf415..aa7e54a16 100644 --- a/ggml-quants.h +++ b/ggml-quants.h @@ -1,248 +1,12 @@ #pragma once +#define GGML_COMMON_DECL_C +#include "ggml-common.h" + +#include "ggml.h" + // GGML internal header -#include "ggml-impl.h" - -#include -#include - -#define QK4_0 32 -typedef struct { - ggml_fp16_t d; // delta - uint8_t qs[QK4_0 / 2]; // nibbles / quants -} block_q4_0; -static_assert(sizeof(block_q4_0) == sizeof(ggml_fp16_t) + QK4_0 / 2, "wrong q4_0 block size/padding"); - -#define QK4_1 32 -typedef struct { - ggml_fp16_t d; // delta - ggml_fp16_t m; // min - uint8_t qs[QK4_1 / 2]; // nibbles / quants -} block_q4_1; -static_assert(sizeof(block_q4_1) == 2 * sizeof(ggml_fp16_t) + QK4_1 / 2, "wrong q4_1 block size/padding"); - -#define QK5_0 32 -typedef struct { - ggml_fp16_t d; // delta - uint8_t qh[4]; // 5-th bit of quants - uint8_t qs[QK5_0 / 2]; // nibbles / quants -} block_q5_0; -static_assert(sizeof(block_q5_0) == sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_0 / 2, "wrong q5_0 block size/padding"); - -#define QK5_1 32 -typedef struct { - ggml_fp16_t d; // delta - ggml_fp16_t m; // min - uint8_t qh[4]; // 5-th bit of quants - uint8_t qs[QK5_1 / 2]; // nibbles / quants -} block_q5_1; -static_assert(sizeof(block_q5_1) == 2 * sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_1 / 2, "wrong q5_1 block size/padding"); - -#define QK8_0 32 -typedef struct { - ggml_fp16_t d; // delta - int8_t qs[QK8_0]; // quants -} block_q8_0; -static_assert(sizeof(block_q8_0) == sizeof(ggml_fp16_t) + QK8_0, "wrong q8_0 block size/padding"); - -#define QK8_1 32 -typedef struct { - float d; // delta - float s; // d * sum(qs[i]) - int8_t qs[QK8_1]; // quants -} block_q8_1; -static_assert(sizeof(block_q8_1) == 2*sizeof(float) + QK8_1, "wrong q8_1 block size/padding"); - -// -// Super-block quantization structures -// - -// Super-block size -#ifdef GGML_QKK_64 -#define QK_K 64 -#define K_SCALE_SIZE 4 -#else -#define QK_K 256 -#define K_SCALE_SIZE 12 -#endif - -// 2-bit quantization -// weight is represented as x = a * q + b -// 16 blocks of 16 elements each -// Effectively 2.625 bits per weight -typedef struct { - uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits - uint8_t qs[QK_K/4]; // quants - ggml_fp16_t d; // super-block scale for quantized scales - ggml_fp16_t dmin; // super-block scale for quantized mins -} block_q2_K; -static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_fp16_t) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding"); - -// 3-bit quantization -// weight is represented as x = a * q -// 16 blocks of 16 elements each -// Effectively 3.4375 bits per weight -#ifdef GGML_QKK_64 -typedef struct { - uint8_t hmask[QK_K/8]; // quants - high bit - uint8_t qs[QK_K/4]; // quants - low 2 bits - uint8_t scales[2]; - ggml_fp16_t d; // super-block scale -} block_q3_K; -static_assert(sizeof(block_q3_K) == sizeof(ggml_fp16_t) + QK_K / 4 + QK_K / 8 + 2, "wrong q3_K block size/padding"); -#else -typedef struct { - uint8_t hmask[QK_K/8]; // quants - high bit - uint8_t qs[QK_K/4]; // quants - low 2 bits - uint8_t scales[12]; // scales, quantized with 6 bits - ggml_fp16_t d; // super-block scale -} block_q3_K; -static_assert(sizeof(block_q3_K) == sizeof(ggml_fp16_t) + QK_K / 4 + QK_K / 8 + 12, "wrong q3_K block size/padding"); -#endif - -// 4-bit quantization -// 8 blocks of 32 elements each -// weight is represented as x = a * q + b -// Effectively 4.5 bits per weight -#ifdef GGML_QKK_64 -typedef struct { - ggml_fp16_t d[2]; // super-block scales/mins - uint8_t scales[2]; // 4-bit block scales/mins - uint8_t qs[QK_K/2]; // 4--bit quants -} block_q4_K; -static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_fp16_t) + QK_K/2 + 2, "wrong q4_K block size/padding"); -#else -typedef struct { - ggml_fp16_t d; // super-block scale for quantized scales - ggml_fp16_t dmin; // super-block scale for quantized mins - uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits - uint8_t qs[QK_K/2]; // 4--bit quants -} block_q4_K; -static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/2, "wrong q4_K block size/padding"); -#endif - -// 5-bit quantization -// 8 blocks of 32 elements each -// weight is represented as x = a * q + b -// Effectively 5.5 bits per weight -#ifdef GGML_QKK_64 -typedef struct { - ggml_fp16_t d; // super-block scale - int8_t scales[QK_K/16]; // 8-bit block scales - uint8_t qh[QK_K/8]; // quants, high bit - uint8_t qs[QK_K/2]; // quants, low 4 bits -} block_q5_K; -static_assert(sizeof(block_q5_K) == sizeof(ggml_fp16_t) + QK_K/2 + QK_K/8 + QK_K/16, "wrong q5_K block size/padding"); -#else -typedef struct { - ggml_fp16_t d; // super-block scale for quantized scales - ggml_fp16_t dmin; // super-block scale for quantized mins - uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits - uint8_t qh[QK_K/8]; // quants, high bit - uint8_t qs[QK_K/2]; // quants, low 4 bits -} block_q5_K; -static_assert(sizeof(block_q5_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/2 + QK_K/8, "wrong q5_K block size/padding"); -#endif - -// 6-bit quantization -// weight is represented as x = a * q -// 16 blocks of 16 elements each -// Effectively 6.5625 bits per weight -typedef struct { - uint8_t ql[QK_K/2]; // quants, lower 4 bits - uint8_t qh[QK_K/4]; // quants, upper 2 bits - int8_t scales[QK_K/16]; // scales, quantized with 8 bits - ggml_fp16_t d; // super-block scale -} block_q6_K; -static_assert(sizeof(block_q6_K) == sizeof(ggml_fp16_t) + QK_K / 16 + 3*QK_K/4, "wrong q6_K block size/padding"); - -// This is only used for intermediate quantization and dot products -typedef struct { - float d; // delta - int8_t qs[QK_K]; // quants - int16_t bsums[QK_K/16]; // sum of quants in groups of 16 -} block_q8_K; -static_assert(sizeof(block_q8_K) == sizeof(float) + QK_K + QK_K/16*sizeof(int16_t), "wrong q8_K block size/padding"); - -// (Almost) "true" 2-bit quantization. -// Due to the need to use blocks as per ggml design, it ends up using -// 2.0625 bpw because of the 16-bit scale for each block of 256. -typedef struct { - ggml_fp16_t d; - uint16_t qs[QK_K/8]; -} block_iq2_xxs; -static_assert(sizeof(block_iq2_xxs) == sizeof(ggml_fp16_t) + QK_K/8*sizeof(uint16_t), "wrong iq2_xxs block size/padding"); - -// 2.3125 bpw quants -typedef struct { - ggml_fp16_t d; - uint16_t qs[QK_K/8]; - uint8_t scales[QK_K/32]; -} block_iq2_xs; -static_assert(sizeof(block_iq2_xs) == sizeof(ggml_fp16_t) + QK_K/8*sizeof(uint16_t) + QK_K/32, "wrong iq2_xs block size/padding"); - -// 2.5625 bpw quants -typedef struct { - ggml_fp16_t d; - uint8_t qs[QK_K/4]; - uint8_t qh[QK_K/32]; - uint8_t scales[QK_K/32]; -} block_iq2_s; -static_assert(sizeof(block_iq2_s) == sizeof(ggml_fp16_t) + QK_K/4 + QK_K/16, "wrong iq2_s block size/padding"); - -// (Almost) "true" 3-bit quantization. -// Due to the need to use blocks as per ggml design, it ends up using -// 3.0625 bpw because of the 16-bit scale for each block of 256. -typedef struct { - ggml_fp16_t d; - uint8_t qs[3*QK_K/8]; -} block_iq3_xxs; -static_assert(sizeof(block_iq3_xxs) == sizeof(ggml_fp16_t) + 3*(QK_K/8), "wrong iq3_xxs block size/padding"); - -// 3.4375 bpw -#if QK_K == 64 -#define IQ3S_N_SCALE 2 -#else -#define IQ3S_N_SCALE QK_K/64 -#endif -typedef struct { - ggml_fp16_t d; - uint8_t qs[QK_K/4]; - uint8_t qh[QK_K/32]; - uint8_t signs[QK_K/8]; - uint8_t scales[IQ3S_N_SCALE]; -} block_iq3_s; -static_assert(sizeof(block_iq3_s) == sizeof(ggml_fp16_t) + 13*(QK_K/32) + IQ3S_N_SCALE, "wrong iq3_s block size/padding"); - -typedef struct { - ggml_fp16_t d; - uint8_t qs[QK_K/8]; - uint16_t qh[QK_K/32]; -} block_iq1_s; -static_assert(sizeof(block_iq1_s) == sizeof(ggml_fp16_t) + QK_K/8 + QK_K/16, "wrong iq1_s block size/padding"); - -// Non-linear quants -#define QK4_NL 32 -typedef struct { - ggml_fp16_t d; - uint8_t qs[QK4_NL/2]; -} block_iq4_nl; -static_assert(sizeof(block_iq4_nl) == sizeof(ggml_fp16_t) + QK4_NL/2, "wrong iq4_nl block size/padding"); - -#if QK_K == 64 -#define block_iq4_xs block_iq4_nl -//typedef struct block_iq4_nl block_iq4_xs; -#else -typedef struct { - ggml_fp16_t d; - uint16_t scales_h; - uint8_t scales_l[QK_K/64]; - uint8_t qs[QK_K/2]; -} block_iq4_xs; -static_assert(sizeof(block_iq4_xs) == sizeof(ggml_fp16_t) + sizeof(uint16_t) + QK_K/64 + QK_K/2, "wrong iq4_xs block size/padding"); -#endif - #ifdef __cplusplus extern "C" { #endif diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index 24187e7d5..cfb09934d 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -3144,6 +3144,7 @@ namespace dpct } // COPY from DPCT head files +#define GGML_COMMON_DECL_SYCL #define GGML_COMMON_IMPL_SYCL #include "ggml-common.h" @@ -3312,66 +3313,6 @@ typedef void (*ggml_sycl_op_flatten_t)(const ggml_tensor *src0, const float *src1_dd, float *dst_dd, const dpct::queue_ptr &main_stream); -// QK = number of values after dequantization -// QR = QK / number of values before dequantization -// QI = number of 32 bit integers before dequantization - -#define QK4_0 32 -#define QR4_0 2 -#define QI4_0 (QK4_0 / (4 * QR4_0)) -typedef struct dpct_type_block_q4_0 { - sycl::half d; // delta - uint8_t qs[QK4_0 / 2]; // nibbles / quants -} block_q4_0; -static_assert(sizeof(block_q4_0) == sizeof(ggml_fp16_t) + QK4_0 / 2, "wrong q4_0 block size/padding"); - -#define QK4_1 32 -#define QR4_1 2 -#define QI4_1 (QK4_1 / (4 * QR4_1)) -typedef struct dpct_type_block_q4_1 { - sycl::half2 dm; // dm.x = delta, dm.y = min - uint8_t qs[QK4_1 / 2]; // nibbles / quants -} block_q4_1; -static_assert(sizeof(block_q4_1) == sizeof(ggml_fp16_t) * 2 + QK4_1 / 2, "wrong q4_1 block size/padding"); - -#define QK5_0 32 -#define QR5_0 2 -#define QI5_0 (QK5_0 / (4 * QR5_0)) -typedef struct dpct_type_block_q5_0 { - sycl::half d; // delta - uint8_t qh[4]; // 5-th bit of quants - uint8_t qs[QK5_0 / 2]; // nibbles / quants -} block_q5_0; -static_assert(sizeof(block_q5_0) == sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_0 / 2, "wrong q5_0 block size/padding"); - -#define QK5_1 32 -#define QR5_1 2 -#define QI5_1 (QK5_1 / (4 * QR5_1)) -typedef struct dpct_type_block_q5_1 { - sycl::half2 dm; // dm.x = delta, dm.y = min - uint8_t qh[4]; // 5-th bit of quants - uint8_t qs[QK5_1 / 2]; // nibbles / quants -} block_q5_1; -static_assert(sizeof(block_q5_1) == 2 * sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_1 / 2, "wrong q5_1 block size/padding"); - -#define QK8_0 32 -#define QR8_0 1 -#define QI8_0 (QK8_0 / (4 * QR8_0)) -typedef struct dpct_type_block_q8_0 { - sycl::half d; // delta - int8_t qs[QK8_0]; // quants -} block_q8_0; -static_assert(sizeof(block_q8_0) == sizeof(ggml_fp16_t) + QK8_0, "wrong q8_0 block size/padding"); - -#define QK8_1 32 -#define QR8_1 1 -#define QI8_1 (QK8_1 / (4 * QR8_1)) -typedef struct dpct_type_block_q8_1 { - sycl::half2 ds; // ds.x = delta, ds.y = sum - int8_t qs[QK8_0]; // quants -} block_q8_1; -static_assert(sizeof(block_q8_1) == 2*sizeof(ggml_fp16_t) + QK8_0, "wrong q8_1 block size/padding"); - typedef float (*vec_dot_q_sycl_t)(const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & iqs); typedef void (*allocate_tiles_sycl_t)(int **x_ql, sycl::half2 **x_dm, int **x_qh, int **x_sc); @@ -3388,137 +3329,6 @@ typedef float (*vec_dot_q_mul_mat_sycl_t)( const int *__restrict__ y_qs, const sycl::half2 *__restrict__ y_ms, const int &i, const int &j, const int &k); -//================================= k-quants - -#ifdef GGML_QKK_64 -#define QK_K 64 -#define K_SCALE_SIZE 4 -#else -#define QK_K 256 -#define K_SCALE_SIZE 12 -#endif - -#define QR2_K 4 -#define QI2_K (QK_K / (4*QR2_K)) -typedef struct dpct_type_block_q2_K { - uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits - uint8_t qs[QK_K/4]; // quants - sycl::half2 dm; // super-block scale for quantized scales/mins -} block_q2_K; -static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_fp16_t) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding"); - -#define QR3_K 4 -#define QI3_K (QK_K / (4*QR3_K)) -typedef struct dpct_type_block_q3_K { - uint8_t hmask[QK_K/8]; // quants - high bit - uint8_t qs[QK_K/4]; // quants - low 2 bits -#ifdef GGML_QKK_64 - uint8_t scales[2]; // scales, quantized with 8 bits -#else - uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits -#endif - sycl::half d; // super-block scale -} block_q3_K; -//static_assert(sizeof(block_q3_K) == sizeof(ggml_fp16_t) + QK_K / 4 + QK_K / 8 + K_SCALE_SIZE, "wrong q3_K block size/padding"); - -#define QR4_K 2 -#define QI4_K (QK_K / (4*QR4_K)) -#ifdef GGML_QKK_64 -typedef struct { - sycl::half dm[2]; // super-block scales/mins - uint8_t scales[2]; // 4-bit block scales/mins - uint8_t qs[QK_K/2]; // 4--bit quants -} block_q4_K; -static_assert(sizeof(block_q4_K) == sizeof(sycl::half2) + QK_K/2 + 2, "wrong q4_K block size/padding"); -#else -typedef struct dpct_type_block_q4_K { - sycl::half2 dm; // super-block scale for quantized scales/mins - uint8_t scales[3*QK_K/64]; // scales, quantized with 6 bits - uint8_t qs[QK_K/2]; // 4--bit quants -} block_q4_K; -static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_fp16_t) + 3*QK_K/64 + QK_K/2, "wrong q4_K block size/padding"); -#endif - -#define QR5_K 2 -#define QI5_K (QK_K / (4*QR5_K)) -#ifdef GGML_QKK_64 -typedef struct { - sycl::half d; // super-block scale - int8_t scales[QK_K/16]; // block scales - uint8_t qh[QK_K/8]; // quants, high bit - uint8_t qs[QK_K/2]; // quants, low 4 bits -} block_q5_K; -static_assert(sizeof(block_q5_K) == sizeof(ggml_fp16_t) + QK_K/2 + QK_K/8 + QK_K/16, "wrong q5_K block size/padding"); -#else -typedef struct dpct_type_block_q5_K { - sycl::half2 dm; // super-block scale for quantized scales/mins - uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits - uint8_t qh[QK_K/8]; // quants, high bit - uint8_t qs[QK_K/2]; // quants, low 4 bits -} block_q5_K; -static_assert(sizeof(block_q5_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/2 + QK_K/8, "wrong q5_K block size/padding"); -#endif - -#define QR6_K 2 -#define QI6_K (QK_K / (4*QR6_K)) -typedef struct dpct_type_block_q6_K { - uint8_t ql[QK_K/2]; // quants, lower 4 bits - uint8_t qh[QK_K/4]; // quants, upper 2 bits - int8_t scales[QK_K/16]; // scales - sycl::half d; // delta -} block_q6_K; -static_assert(sizeof(block_q6_K) == sizeof(ggml_fp16_t) + 13*QK_K/16, "wrong q6_K block size/padding"); - -#define QR2_XXS 8 -#define QI2_XXS (QK_K / (4*QR2_XXS)) -typedef struct dpct_type_block_iq2_xxs { - sycl::half d; - uint16_t qs[QK_K/8]; -} block_iq2_xxs; -static_assert(sizeof(block_iq2_xxs) == sizeof(ggml_fp16_t) + QK_K/8*sizeof(uint16_t), "wrong iq2_xxs block size/padding"); - -#define QR2_XS 8 -#define QI2_XS (QK_K / (4*QR2_XS)) -typedef struct dpct_type_block_iq2_xs { - sycl::half d; - uint16_t qs[QK_K/8]; - uint8_t scales[QK_K/32]; -} block_iq2_xs; -static_assert(sizeof(block_iq2_xs) == sizeof(ggml_fp16_t) + QK_K/8*sizeof(uint16_t) + QK_K/32, "wrong iq2_xs block size/padding"); - -#define QR3_XXS 8 -#define QI3_XXS (QK_K / (4*QR3_XXS)) -typedef struct dpct_type_block_iq3_xxs { - sycl::half d; - uint8_t qs[3*(QK_K/8)]; -} block_iq3_xxs; -static_assert(sizeof(block_iq3_xxs) == sizeof(ggml_fp16_t) + 3*(QK_K/8), "wrong iq3_xxs block size/padding"); - -#define QR3_XS 8 -#define QI3_XS (QK_K / (4*QR3_XS)) -#if QK_K == 64 -#define IQ3S_N_SCALE 2 -#else -#define IQ3S_N_SCALE QK_K/64 -#endif -typedef struct { - sycl::half d; - uint8_t qs[QK_K/4]; - uint8_t qh[QK_K/32]; - uint8_t signs[QK_K/8]; - uint8_t scales[IQ3S_N_SCALE]; -} block_iq3_s; -static_assert(sizeof(block_iq3_s) == sizeof(ggml_fp16_t) + 13*(QK_K/32) + IQ3S_N_SCALE, "wrong iq3_s block size/padding"); - -#define QR1_S 8 -#define QI1_S (QK_K / (4*QR1_S)) -typedef struct { - sycl::half d; - uint8_t qs[QK_K/8]; - uint16_t qh[QK_K/32]; -} block_iq1_s; -static_assert(sizeof(block_iq1_s) == sizeof(ggml_fp16_t) + QK_K/8 + QK_K/16, "wrong iq1_s block size/padding"); - #define WARP_SIZE 32 #define MATRIX_ROW_PADDING 512 // last row of quant. matrices is a multiple of this to avoid out-of-bounds memory accesses From 306d34be7ad19e768975409fc80791a274ea0230 Mon Sep 17 00:00:00 2001 From: slaren Date: Tue, 12 Mar 2024 16:55:19 +0100 Subject: [PATCH 26/48] ci : remove tidy-review (#6021) --- .github/workflows/tidy-post.yml | 20 -------------------- .github/workflows/tidy-review.yml | 23 ----------------------- 2 files changed, 43 deletions(-) delete mode 100644 .github/workflows/tidy-post.yml delete mode 100644 .github/workflows/tidy-review.yml diff --git a/.github/workflows/tidy-post.yml b/.github/workflows/tidy-post.yml deleted file mode 100644 index 03652760c..000000000 --- a/.github/workflows/tidy-post.yml +++ /dev/null @@ -1,20 +0,0 @@ -name: clang-tidy review post comments - -on: - workflow_dispatch: - workflows: ["clang-tidy-review"] - types: - - completed - -jobs: - build: - runs-on: ubuntu-latest - - steps: - - uses: ZedThree/clang-tidy-review/post@v0.13.0 - # lgtm_comment_body, max_comments, and annotations need to be set on the posting workflow in a split setup - with: - # adjust options as necessary - lgtm_comment_body: '' - annotations: false - max_comments: 25 diff --git a/.github/workflows/tidy-review.yml b/.github/workflows/tidy-review.yml deleted file mode 100644 index a4bc8d976..000000000 --- a/.github/workflows/tidy-review.yml +++ /dev/null @@ -1,23 +0,0 @@ -name: clang-tidy-review - -on: - pull_request: - branches: - - master - -jobs: - clang-tidy-review: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v3 - - - uses: ZedThree/clang-tidy-review@v0.13.0 - id: review - with: - lgtm_comment_body: '' - build_dir: build - cmake_command: cmake . -B build -DCMAKE_EXPORT_COMPILE_COMMANDS=on - split_workflow: true - - - uses: ZedThree/clang-tidy-review/upload@v0.13.0 From 99b71c068f624521ad977e08e41589e2971fa1c7 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Wed, 13 Mar 2024 11:39:11 +0100 Subject: [PATCH 27/48] Server: Use multi-task for embeddings endpoint (#6001) * use multitask for embd endpoint * specify types * remove redundant {"n_predict", 0} --- examples/server/server.cpp | 76 ++++++++++++++------------------------ examples/server/utils.hpp | 12 +++++- 2 files changed, 38 insertions(+), 50 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index b63a6f243..3172d96dd 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2763,6 +2763,7 @@ int main(int argc, char ** argv) { res.set_header("Access-Control-Allow-Credentials", "true"); res.set_header("Access-Control-Allow-Methods", "POST"); res.set_header("Access-Control-Allow-Headers", "*"); + return res.set_content("", "application/json; charset=utf-8"); }); svr->set_logger(log_server_request); @@ -3371,44 +3372,37 @@ int main(int argc, char ** argv) { const json body = json::parse(req.body); bool is_openai = false; - // an input prompt can string or a list of tokens (integer) - std::vector prompts; + // an input prompt can be a string or a list of tokens (integer) + json prompt; if (body.count("input") != 0) { is_openai = true; - if (body["input"].is_array()) { - // support multiple prompts - for (const json & elem : body["input"]) { - prompts.push_back(elem); - } - } else { - // single input prompt - prompts.push_back(body["input"]); - } + prompt = body["input"]; } else if (body.count("content") != 0) { - // only support single prompt here - std::string content = body["content"]; - prompts.push_back(content); + // with "content", we only support single prompt + prompt = std::vector{body["content"]}; } else { res_error(res, format_error_response("\"input\" or \"content\" must be provided", ERROR_TYPE_INVALID_REQUEST)); return; } - // process all prompts - json responses = json::array(); - for (auto & prompt : prompts) { - // TODO @ngxson : maybe support multitask for this endpoint? - // create and queue the task + // create and queue the task + json responses; + { const int id_task = ctx_server.queue_tasks.get_new_id(); - ctx_server.queue_results.add_waiting_task_id(id_task); - ctx_server.request_completion(id_task, -1, { {"prompt", prompt}, { "n_predict", 0}}, false, true); + ctx_server.request_completion(id_task, -1, {{"prompt", prompt}}, false, true); // get the result server_task_result result = ctx_server.queue_results.recv(id_task); ctx_server.queue_results.remove_waiting_task_id(id_task); if (!result.error) { - // append to the responses - responses.push_back(result.data); + if (result.data.count("results")) { + // result for multi-task + responses = result.data["results"]; + } else { + // result for single task + responses = std::vector{result.data}; + } } else { // error received, ignore everything else res_error(res, result.data); @@ -3417,24 +3411,19 @@ int main(int argc, char ** argv) { } // write JSON response - json root; - if (is_openai) { - json res_oai = json::array(); - int i = 0; - for (auto & elem : responses) { - res_oai.push_back(json{ - {"embedding", json_value(elem, "embedding", json::array())}, - {"index", i++}, - {"object", "embedding"} - }); - } - root = format_embeddings_response_oaicompat(body, res_oai); - } else { - root = responses[0]; - } + json root = is_openai + ? format_embeddings_response_oaicompat(body, responses) + : responses[0]; return res.set_content(root.dump(), "application/json; charset=utf-8"); }; + auto handle_static_file = [](unsigned char * content, size_t len, const char * mime_type) { + return [content, len, mime_type](const httplib::Request &, httplib::Response & res) { + res.set_content(reinterpret_cast(content), len, mime_type); + return false; + }; + }; + // // Router // @@ -3446,17 +3435,6 @@ int main(int argc, char ** argv) { } // using embedded static files - auto handle_static_file = [](unsigned char * content, size_t len, const char * mime_type) { - return [content, len, mime_type](const httplib::Request &, httplib::Response & res) { - res.set_content(reinterpret_cast(content), len, mime_type); - return false; - }; - }; - - svr->Options(R"(/.*)", [](const httplib::Request &, httplib::Response & res) { - // TODO @ngxson : I have no idea what it is... maybe this is redundant? - return res.set_content("", "application/json; charset=utf-8"); - }); svr->Get("/", handle_static_file(index_html, index_html_len, "text/html; charset=utf-8")); svr->Get("/index.js", handle_static_file(index_js, index_js_len, "text/javascript; charset=utf-8")); svr->Get("/completion.js", handle_static_file(completion_js, completion_js_len, "text/javascript; charset=utf-8")); diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index 48aeef4eb..2ddb2cd21 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -529,6 +529,16 @@ static std::vector format_partial_response_oaicompat(json result, const st } static json format_embeddings_response_oaicompat(const json & request, const json & embeddings) { + json data = json::array(); + int i = 0; + for (auto & elem : embeddings) { + data.push_back(json{ + {"embedding", json_value(elem, "embedding", json::array())}, + {"index", i++}, + {"object", "embedding"} + }); + } + json res = json { {"model", json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))}, {"object", "list"}, @@ -536,7 +546,7 @@ static json format_embeddings_response_oaicompat(const json & request, const jso {"prompt_tokens", 0}, {"total_tokens", 0} }}, - {"data", embeddings} + {"data", data} }; return res; From b3d978600f07f22e94f2e797f18a8b5f6df23c89 Mon Sep 17 00:00:00 2001 From: AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com> Date: Wed, 13 Mar 2024 13:17:54 +0000 Subject: [PATCH 28/48] Update get version (#6025) --- ggml-sycl.cpp | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index cfb09934d..c2ab13034 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -202,24 +202,29 @@ namespace dpct // Version string has the following format: // a. OpenCL // b. + // c. e.g gfx1030 std::string ver; ver = dev.get_info(); std::string::size_type i = 0; - while (i < ver.size()) - { - if (isdigit(ver[i])) - break; - i++; + while (i < ver.size()) { + if (isdigit(ver[i])) + break; + i++; } major = std::stoi(&(ver[i])); - while (i < ver.size()) - { - if (ver[i] == '.') - break; - i++; + while (i < ver.size()) { + if (ver[i] == '.') + break; + i++; + } + if (i < ver.size()) { + // a. and b. + i++; + minor = std::stoi(&(ver[i])); + } else { + // c. + minor = 0; } - i++; - minor = std::stoi(&(ver[i])); } template From d8fd0ccf6ac8b07791ffd1575eed436930854ae3 Mon Sep 17 00:00:00 2001 From: slaren Date: Wed, 13 Mar 2024 14:58:30 +0100 Subject: [PATCH 29/48] test-backend-ops : skip CPU backend by default (#6028) --- tests/test-backend-ops.cpp | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index fc5edcc4b..c2916c3e4 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -2222,8 +2222,8 @@ static void usage(char ** argv) { int main(int argc, char ** argv) { test_mode mode = MODE_TEST; - const char * op_name = NULL; - const char * backend = NULL; + const char * op_name_filter = NULL; + const char * backend_filter = NULL; for (int i = 1; i < argc; i++) { if (strcmp(argv[i], "test") == 0) { @@ -2232,14 +2232,14 @@ int main(int argc, char ** argv) { mode = MODE_PERF; } else if (strcmp(argv[i], "-o") == 0) { if (i + 1 < argc) { - op_name = argv[++i]; + op_name_filter = argv[++i]; } else { usage(argv); return 1; } } else if (strcmp(argv[i], "-b") == 0) { if (i + 1 < argc) { - backend = argv[++i]; + backend_filter = argv[++i]; } else { usage(argv); return 1; @@ -2258,7 +2258,7 @@ int main(int argc, char ** argv) { for (size_t i = 0; i < ggml_backend_reg_get_count(); i++) { printf("Backend %zu/%zu (%s)\n", i + 1, ggml_backend_reg_get_count(), ggml_backend_reg_get_name(i)); - if (backend != NULL && strcmp(backend, ggml_backend_reg_get_name(i)) != 0) { + if (backend_filter != NULL && strcmp(backend_filter, ggml_backend_reg_get_name(i)) != 0) { printf(" Skipping\n"); n_ok++; continue; @@ -2266,9 +2266,17 @@ int main(int argc, char ** argv) { ggml_backend_t backend = ggml_backend_reg_init_backend(i, NULL); GGML_ASSERT(backend != NULL); + + if (backend_filter == NULL && ggml_backend_is_cpu(backend)) { + printf(" Skipping CPU backend\n"); + ggml_backend_free(backend); + n_ok++; + continue; + } + printf(" Backend name: %s\n", ggml_backend_name(backend)); - bool ok = test_backend(backend, mode, op_name); + bool ok = test_backend(backend, mode, op_name_filter); printf(" Backend %s: ", ggml_backend_name(backend)); if (ok) { From f30ea47a87ed4446ad55adb265755dc9102956a2 Mon Sep 17 00:00:00 2001 From: slaren Date: Wed, 13 Mar 2024 18:54:21 +0100 Subject: [PATCH 30/48] llama : add pipeline parallelism support (#6017) * llama : add pipeline parallelism support for batch processing with multiple CUDA GPUs ggml-ci * server : add -ub, --ubatch-size parameter * fix server embedding test * llama : fix Mamba inference for pipeline parallelism Tested to work correctly with both `main` and `parallel` examples. * llama : limit max batch size to n_batch * add LLAMA_SCHED_MAX_COPIES to configure the number of input copies for pipeline parallelism default increase to 4 (from 2) changing this value may improve performance for some systems, but increases memory usage * fix hip build * fix sycl build (disable cpy_tensor_async) * fix hip build * llama : limit n_batch and n_ubatch to n_ctx during context creation * llama : fix norm backend * batched-bench : sync after decode * swiftui : sync after decode * ggml : allow ggml_get_rows to use multiple threads if they are available * check n_ubatch >= n_tokens with non-casual attention * llama : do not limit n_batch to n_ctx with non-casual attn * server : construct batch with size of llama_n_batch * ggml_backend_cpu_graph_compute : fix return value when alloc fails * llama : better n_batch and n_ubatch comment * fix merge * small fix * reduce default n_batch to 2048 --------- Co-authored-by: Francis Couture-Harpin Co-authored-by: Georgi Gerganov --- CMakeLists.txt | 3 + Makefile | 4 + common/common.cpp | 14 +- common/common.h | 3 +- examples/batched-bench/batched-bench.cpp | 2 + examples/embedding/embedding.cpp | 2 +- examples/llama-bench/llama-bench.cpp | 53 +- .../llama.cpp.swift/LibLlama.swift | 2 + examples/perplexity/perplexity.cpp | 3 +- examples/server/server.cpp | 32 +- .../server/tests/features/embeddings.feature | 1 + examples/server/tests/features/steps/steps.py | 8 + ggml-alloc.c | 109 +- ggml-alloc.h | 18 +- ggml-backend-impl.h | 17 +- ggml-backend.c | 517 +++++-- ggml-backend.h | 58 +- ggml-cuda.cu | 175 ++- ggml-kompute.cpp | 5 + ggml-metal.m | 5 + ggml-sycl.cpp | 7 +- ggml-vulkan.cpp | 5 + ggml.c | 113 +- llama.cpp | 1189 +++++++++-------- llama.h | 9 +- 25 files changed, 1467 insertions(+), 887 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7ab13cbd5..a8abf4088 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -118,6 +118,7 @@ option(LLAMA_SYCL "llama: use SYCL" option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF) set(LLAMA_SYCL_TARGET "INTEL" CACHE STRING "llama: sycl target device") option(LLAMA_CPU_HBM "llama: use memkind for CPU HBM" OFF) +set(LLAMA_SCHED_MAX_COPIES "4" CACHE STRING "llama: max input copies for pipeline parallelism") option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE}) option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE}) @@ -147,6 +148,8 @@ set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) include(CheckCXXCompilerFlag) +add_compile_definitions(GGML_SCHED_MAX_COPIES=${LLAMA_SCHED_MAX_COPIES}) + # enable libstdc++ assertions for debug builds if (CMAKE_SYSTEM_NAME MATCHES "Linux") add_compile_definitions($<$:_GLIBCXX_ASSERTIONS>) diff --git a/Makefile b/Makefile index c8fd3f5c5..db9968efb 100644 --- a/Makefile +++ b/Makefile @@ -167,6 +167,10 @@ ifeq ($(UNAME_S),OpenBSD) MK_CPPFLAGS += -D_BSD_SOURCE endif +ifdef LLAMA_SCHED_MAX_COPIES + MK_CPPFLAGS += -DGGML_SCHED_MAX_COPIES=$(LLAMA_SCHED_MAX_COPIES) +endif + ifdef LLAMA_DEBUG MK_CFLAGS += -O0 -g MK_CXXFLAGS += -O0 -g diff --git a/common/common.cpp b/common/common.cpp index 2f38ac632..73b1b61ba 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -483,6 +483,12 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { break; } params.n_batch = std::stoi(argv[i]); + } else if (arg == "-ub" || arg == "--ubatch-size") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.n_ubatch = std::stoi(argv[i]); } else if (arg == "--keep") { if (++i >= argc) { invalid_param = true; @@ -977,7 +983,9 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" binary file containing multiple choice tasks.\n"); printf(" -n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict); printf(" -c N, --ctx-size N size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx); - printf(" -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch); + printf(" -b N, --batch-size N logical maximum batch size (default: %d)\n", params.n_batch); + printf(" -ub N, --ubatch-size N\n"); + printf(" physical maximum batch size (default: %d)\n", params.n_ubatch); printf(" --samplers samplers that will be used for generation in the order, separated by \';\'\n"); printf(" (default: %s)\n", sampler_type_names.c_str()); printf(" --sampling-seq simplified sequence for samplers that will be used (default: %s)\n", sampler_type_chars.c_str()); @@ -1287,8 +1295,9 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param auto cparams = llama_context_default_params(); cparams.n_ctx = params.n_ctx; - cparams.n_batch = params.n_batch; cparams.n_seq_max = params.n_parallel; + cparams.n_batch = params.n_batch; + cparams.n_ubatch = params.n_ubatch; cparams.n_threads = params.n_threads; cparams.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch; cparams.seed = params.seed; @@ -1379,6 +1388,7 @@ std::tuple llama_init_from_gpt_par std::vector tmp = { llama_token_bos(model), llama_token_eos(model), }; llama_decode(lctx, llama_batch_get_one(tmp.data(), std::min(tmp.size(), (size_t) params.n_batch), 0, 0)); llama_kv_cache_clear(lctx); + llama_synchronize(lctx); llama_reset_timings(lctx); } diff --git a/common/common.h b/common/common.h index f8d82b871..0f178b9eb 100644 --- a/common/common.h +++ b/common/common.h @@ -51,7 +51,8 @@ struct gpt_params { int32_t n_threads_batch_draft = -1; int32_t n_predict = -1; // new tokens to predict int32_t n_ctx = 512; // context size - int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS) + int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS) + int32_t n_ubatch = 512; // physical batch size for prompt processing (must be >=32 to use BLAS) int32_t n_keep = 0; // number of tokens to keep from initial prompt int32_t n_draft = 5; // number of tokens to draft during speculative decoding int32_t n_chunks = -1; // max number of chunks to process (-1 = unlimited) diff --git a/examples/batched-bench/batched-bench.cpp b/examples/batched-bench/batched-bench.cpp index 22bc93bca..19674dfd3 100644 --- a/examples/batched-bench/batched-bench.cpp +++ b/examples/batched-bench/batched-bench.cpp @@ -138,6 +138,8 @@ int main(int argc, char ** argv) { LOG_TEE("failed to decode the batch, n_batch = %d, ret = %d\n", n_batch, ret); return false; } + + llama_synchronize(ctx); } return true; diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index a553ae1c3..49302a199 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -107,7 +107,7 @@ int main(int argc, char ** argv) { // max batch size const uint64_t n_batch = params.n_batch; - GGML_ASSERT(params.n_batch == params.n_ctx); + GGML_ASSERT(params.n_batch >= params.n_ctx); // tokenize the prompts and trim std::vector> inputs; diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index 2ff86ef6f..bf94e7e7a 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -164,6 +164,7 @@ struct cmd_params { std::vector n_prompt; std::vector n_gen; std::vector n_batch; + std::vector n_ubatch; std::vector type_k; std::vector type_v; std::vector n_threads; @@ -183,7 +184,8 @@ static const cmd_params cmd_params_defaults = { /* model */ {"models/7B/ggml-model-q4_0.gguf"}, /* n_prompt */ {512}, /* n_gen */ {128}, - /* n_batch */ {512}, + /* n_batch */ {2048}, + /* n_ubatch */ {512}, /* type_k */ {GGML_TYPE_F16}, /* type_v */ {GGML_TYPE_F16}, /* n_threads */ {get_num_physical_cores()}, @@ -208,6 +210,7 @@ static void print_usage(int /* argc */, char ** argv) { printf(" -p, --n-prompt (default: %s)\n", join(cmd_params_defaults.n_prompt, ",").c_str()); printf(" -n, --n-gen (default: %s)\n", join(cmd_params_defaults.n_gen, ",").c_str()); printf(" -b, --batch-size (default: %s)\n", join(cmd_params_defaults.n_batch, ",").c_str()); + printf(" -ub N, --ubatch-size (default: %s)\n", join(cmd_params_defaults.n_ubatch, ",").c_str()); printf(" -ctk , --cache-type-k (default: %s)\n", join(transform_to_str(cmd_params_defaults.type_k, ggml_type_name), ",").c_str()); printf(" -ctv , --cache-type-v (default: %s)\n", join(transform_to_str(cmd_params_defaults.type_v, ggml_type_name), ",").c_str()); printf(" -t, --threads (default: %s)\n", join(cmd_params_defaults.n_threads, ",").c_str()); @@ -217,7 +220,7 @@ static void print_usage(int /* argc */, char ** argv) { printf(" -nkvo, --no-kv-offload <0|1> (default: %s)\n", join(cmd_params_defaults.no_kv_offload, ",").c_str()); printf(" -mmp, --mmap <0|1> (default: %s)\n", join(cmd_params_defaults.use_mmap, ",").c_str()); printf(" -embd, --embeddings <0|1> (default: %s)\n", join(cmd_params_defaults.embeddings, ",").c_str()); - printf(" -ts, --tensor_split (default: 0)\n"); + printf(" -ts, --tensor-split (default: 0)\n"); printf(" -r, --repetitions (default: %d)\n", cmd_params_defaults.reps); printf(" -o, --output (default: %s)\n", output_format_str(cmd_params_defaults.output_format)); printf(" -v, --verbose (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0"); @@ -297,6 +300,13 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { } auto p = split(argv[i], split_delim); params.n_batch.insert(params.n_batch.end(), p.begin(), p.end()); + } else if (arg == "-ub" || arg == "--ubatch-size") { + if (++i >= argc) { + invalid_param = true; + break; + } + auto p = split(argv[i], split_delim); + params.n_ubatch.insert(params.n_ubatch.end(), p.begin(), p.end()); } else if (arg == "-ctk" || arg == "--cache-type-k") { if (++i >= argc) { invalid_param = true; @@ -455,6 +465,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { if (params.n_prompt.empty()) { params.n_prompt = cmd_params_defaults.n_prompt; } if (params.n_gen.empty()) { params.n_gen = cmd_params_defaults.n_gen; } if (params.n_batch.empty()) { params.n_batch = cmd_params_defaults.n_batch; } + if (params.n_ubatch.empty()) { params.n_ubatch = cmd_params_defaults.n_ubatch; } if (params.type_k.empty()) { params.type_k = cmd_params_defaults.type_k; } if (params.type_v.empty()) { params.type_v = cmd_params_defaults.type_v; } if (params.n_gpu_layers.empty()) { params.n_gpu_layers = cmd_params_defaults.n_gpu_layers; } @@ -474,6 +485,7 @@ struct cmd_params_instance { int n_prompt; int n_gen; int n_batch; + int n_ubatch; ggml_type type_k; ggml_type type_v; int n_threads; @@ -511,6 +523,7 @@ struct cmd_params_instance { cparams.n_ctx = n_prompt + n_gen; cparams.n_batch = n_batch; + cparams.n_ubatch = n_ubatch; cparams.type_k = type_k; cparams.type_v = type_v; cparams.offload_kqv = !no_kv_offload; @@ -532,6 +545,7 @@ static std::vector get_cmd_params_instances(const cmd_param for (const auto & mmp : params.use_mmap) for (const auto & embd : params.embeddings) for (const auto & nb : params.n_batch) + for (const auto & nub : params.n_ubatch) for (const auto & tk : params.type_k) for (const auto & tv : params.type_v) for (const auto & nkvo : params.no_kv_offload) @@ -545,6 +559,7 @@ static std::vector get_cmd_params_instances(const cmd_param /* .n_prompt = */ n_prompt, /* .n_gen = */ 0, /* .n_batch = */ nb, + /* .n_ubatch = */ nub, /* .type_k = */ tk, /* .type_v = */ tv, /* .n_threads = */ nt, @@ -568,6 +583,7 @@ static std::vector get_cmd_params_instances(const cmd_param /* .n_prompt = */ 0, /* .n_gen = */ n_gen, /* .n_batch = */ nb, + /* .n_ubatch = */ nub, /* .type_k = */ tk, /* .type_v = */ tv, /* .n_threads = */ nt, @@ -604,6 +620,7 @@ struct test { uint64_t model_size; uint64_t model_n_params; int n_batch; + int n_ubatch; int n_threads; ggml_type type_k; ggml_type type_v; @@ -627,6 +644,7 @@ struct test { model_size = llama_model_size(lmodel); model_n_params = llama_model_n_params(lmodel); n_batch = inst.n_batch; + n_ubatch = inst.n_ubatch; n_threads = inst.n_threads; type_k = inst.type_k; type_v = inst.type_v; @@ -705,7 +723,8 @@ struct test { "cuda", "opencl", "vulkan", "kompute", "metal", "sycl", "gpu_blas", "blas", "cpu_info", "gpu_info", "model_filename", "model_type", "model_size", "model_n_params", - "n_batch", "n_threads", "type_k", "type_v", + "n_batch", "n_ubatch", + "n_threads", "type_k", "type_v", "n_gpu_layers", "split_mode", "main_gpu", "no_kv_offload", "tensor_split", "use_mmap", "embeddings", @@ -719,7 +738,8 @@ struct test { enum field_type {STRING, BOOL, INT, FLOAT}; static field_type get_field_type(const std::string & field) { - if (field == "build_number" || field == "n_batch" || field == "n_threads" || + if (field == "build_number" || field == "n_batch" || field == "n_ubatch" || + field == "n_threads" || field == "model_size" || field == "model_n_params" || field == "n_gpu_layers" || field == "main_gpu" || field == "n_prompt" || field == "n_gen" || @@ -759,7 +779,8 @@ struct test { std::to_string(metal), std::to_string(sycl), std::to_string(gpu_blas), std::to_string(blas), cpu_info, gpu_info, model_filename, model_type, std::to_string(model_size), std::to_string(model_n_params), - std::to_string(n_batch), std::to_string(n_threads), ggml_type_name(type_k), ggml_type_name(type_v), + std::to_string(n_batch), std::to_string(n_ubatch), + std::to_string(n_threads), ggml_type_name(type_k), ggml_type_name(type_v), std::to_string(n_gpu_layers), split_mode_str(split_mode), std::to_string(main_gpu), std::to_string(no_kv_offload), tensor_split_str, std::to_string(use_mmap), std::to_string(embeddings), @@ -957,6 +978,9 @@ struct markdown_printer : public printer { if (params.n_batch.size() > 1 || params.n_batch != cmd_params_defaults.n_batch) { fields.emplace_back("n_batch"); } + if (params.n_ubatch.size() > 1 || params.n_ubatch != cmd_params_defaults.n_ubatch) { + fields.emplace_back("n_ubatch"); + } if (params.type_k.size() > 1 || params.type_k != cmd_params_defaults.type_k) { fields.emplace_back("type_k"); } @@ -1096,25 +1120,32 @@ struct sql_printer : public printer { }; static void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_batch, int n_threads) { + llama_set_n_threads(ctx, n_threads, n_threads); + + //std::vector tokens(n_prompt, llama_token_bos(llama_get_model(ctx))); + //llama_decode(ctx, llama_batch_get_one(tokens.data(), n_prompt, n_past, 0)); + //GGML_UNUSED(n_batch); + std::vector tokens(n_batch, llama_token_bos(llama_get_model(ctx))); int n_processed = 0; - llama_set_n_threads(ctx, n_threads, n_threads); - while (n_processed < n_prompt) { int n_tokens = std::min(n_prompt - n_processed, n_batch); llama_decode(ctx, llama_batch_get_one(tokens.data(), n_tokens, n_past + n_processed, 0)); n_processed += n_tokens; } + + llama_synchronize(ctx); } static void test_gen(llama_context * ctx, int n_gen, int n_past, int n_threads) { - llama_token token = llama_token_bos(llama_get_model(ctx)); - llama_set_n_threads(ctx, n_threads, n_threads); + llama_token token = llama_token_bos(llama_get_model(ctx)); + for (int i = 0; i < n_gen; i++) { llama_decode(ctx, llama_batch_get_one(&token, 1, n_past + i, 0)); + llama_synchronize(ctx); } } @@ -1203,7 +1234,8 @@ int main(int argc, char ** argv) { // warmup run if (t.n_prompt > 0) { - test_prompt(ctx, std::min(2, t.n_batch), 0, t.n_batch, t.n_threads); + //test_prompt(ctx, std::min(t.n_batch, std::min(t.n_prompt, 32)), 0, t.n_batch, t.n_threads); + test_prompt(ctx, t.n_prompt, 0, t.n_batch, t.n_threads); } if (t.n_gen > 0) { test_gen(ctx, 1, 0, t.n_threads); @@ -1219,6 +1251,7 @@ int main(int argc, char ** argv) { if (t.n_gen > 0) { test_gen(ctx, t.n_gen, t.n_prompt, t.n_threads); } + uint64_t t_ns = get_time_ns() - t_start; t.samples_ns.push_back(t_ns); } diff --git a/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift b/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift index 58fcf40c6..c249291ae 100644 --- a/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift +++ b/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift @@ -221,6 +221,7 @@ actor LlamaContext { if llama_decode(context, batch) != 0 { print("llama_decode() failed during prompt") } + llama_synchronize(context) let t_pp_end = ggml_time_us() @@ -240,6 +241,7 @@ actor LlamaContext { if llama_decode(context, batch) != 0 { print("llama_decode() failed during text generation") } + llama_synchronize(context) } let t_tg_end = ggml_time_us() diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index fdfc8f5dc..d766aef6a 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -589,9 +589,10 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par } } - const auto t_end = std::chrono::high_resolution_clock::now(); if (i == 0) { + llama_synchronize(ctx); + const auto t_end = std::chrono::high_resolution_clock::now(); const float t_total = std::chrono::duration(t_end - t_start).count(); fprintf(stderr, "%s: %.2f seconds per pass - ETA ", __func__, t_total); int total_seconds = (int)(t_total*n_chunk/n_seq); diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 3172d96dd..895d608fd 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -147,7 +147,7 @@ struct server_slot { int32_t n_decoded = 0; int32_t n_remaining = -1; int32_t i_batch = -1; - int32_t n_predict = -1; + int32_t n_predict = -1; // TODO: disambiguate from params.n_predict int32_t n_prompt_tokens = 0; int32_t n_prompt_tokens_processed = 0; @@ -739,7 +739,13 @@ struct server_context { default_generation_settings_for_props = get_formated_generation(slots.front()); default_generation_settings_for_props["seed"] = -1; - batch = llama_batch_init(n_ctx, 0, params.n_parallel); + // the update_slots() logic will always submit a maximum of n_batch tokens + // note that n_batch can be > n_ctx (e.g. for non-causal attention models such as BERT where the KV cache is not used) + { + const int32_t n_batch = llama_n_batch(ctx); + + batch = llama_batch_init(n_batch, 0, params.n_parallel); + } metrics.init(); } @@ -1036,8 +1042,10 @@ struct server_context { llama_batch_add(batch, system_tokens[i], i, { 0 }, false); } - for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += params.n_batch) { - const int32_t n_tokens = std::min(params.n_batch, (int32_t) (batch.n_tokens - i)); + const int32_t n_batch = llama_n_batch(ctx); + + for (int32_t i = 0; i < batch.n_tokens; i += n_batch) { + const int32_t n_tokens = std::min(params.n_batch, batch.n_tokens - i); llama_batch batch_view = { n_tokens, batch.token + i, @@ -1226,7 +1234,7 @@ struct server_context { {"mirostat_eta", slot.sparams.mirostat_eta}, {"penalize_nl", slot.sparams.penalize_nl}, {"stop", slot.params.antiprompt}, - {"n_predict", slot.params.n_predict}, + {"n_predict", slot.params.n_predict}, // TODO: fix duplicate key n_predict {"n_keep", params.n_keep}, {"ignore_eos", ignore_eos}, {"stream", slot.params.stream}, @@ -1738,7 +1746,8 @@ struct server_context { } // process in chunks of params.n_batch - int32_t n_batch = params.n_batch; + int32_t n_batch = llama_n_batch(ctx); + int32_t n_ubatch = llama_n_ubatch(ctx); // next, batch any pending prompts without exceeding n_batch if (params.cont_batching || batch.n_tokens == 0) { @@ -1811,7 +1820,7 @@ struct server_context { if (slot.embedding) { // this prompt is too large to process - discard it - if (slot.n_prompt_tokens > n_batch) { + if (slot.n_prompt_tokens > n_ubatch) { slot.state = SLOT_STATE_PROCESSING; slot.command = SLOT_COMMAND_NONE; slot.release(); @@ -2157,7 +2166,8 @@ static void server_print_usage(const char * argv0, const gpt_params & params, co printf(" --pooling {none,mean,cls} pooling type for embeddings, use model default if unspecified\n"); printf(" -dt N, --defrag-thold N\n"); printf(" KV cache defragmentation threshold (default: %.1f, < 0 - disabled)\n", params.defrag_thold); - printf(" -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch); + printf(" -b N, --batch-size N logical maximum batch size (default: %d)\n", params.n_batch); + printf(" -ub N, --ubatch-size N physical maximum batch size (default: %d)\n", params.n_ubatch); printf(" --memory-f32 use f32 instead of f16 for memory key+value (default: disabled)\n"); printf(" not recommended: doubles context memory required and no measurable increase in quality\n"); if (llama_supports_mlock()) { @@ -2424,6 +2434,12 @@ static void server_params_parse(int argc, char ** argv, server_params & sparams, break; } params.n_batch = std::stoi(argv[i]); + } else if (arg == "-ub" || arg == "--ubatch-size") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.n_ubatch = std::stoi(argv[i]); } else if (arg == "--gpu-layers" || arg == "-ngl" || arg == "--n-gpu-layers") { if (++i >= argc) { invalid_param = true; diff --git a/examples/server/tests/features/embeddings.feature b/examples/server/tests/features/embeddings.feature index b47661e94..57359b267 100644 --- a/examples/server/tests/features/embeddings.feature +++ b/examples/server/tests/features/embeddings.feature @@ -9,6 +9,7 @@ Feature: llama.cpp server And 42 as server seed And 2 slots And 1024 as batch size + And 1024 as ubatch size And 2048 KV cache size And embeddings extraction Then the server is starting diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index 98c2b6174..cfa9f96ec 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -33,6 +33,7 @@ def step_server_config(context, server_fqdn, server_port): context.model_alias = None context.n_batch = None + context.n_ubatch = None context.n_ctx = None context.n_ga = None context.n_ga_w = None @@ -278,6 +279,11 @@ def step_n_batch(context, n_batch): context.n_batch = n_batch +@step('{n_ubatch:d} as ubatch size') +def step_n_ubatch(context, n_ubatch): + context.n_ubatch = n_ubatch + + @step('{seed:d} as seed') def step_seed(context, seed): context.seed = seed @@ -1029,6 +1035,8 @@ def start_server_background(context): ] if context.n_batch: server_args.extend(['--batch-size', context.n_batch]) + if context.n_ubatch: + server_args.extend(['--ubatch-size', context.n_ubatch]) if context.n_gpu_layer: server_args.extend(['--n-gpu-layers', context.n_gpu_layer]) if context.server_continuous_batching: diff --git a/ggml-alloc.c b/ggml-alloc.c index e675306c8..8ac1d3e51 100644 --- a/ggml-alloc.c +++ b/ggml-alloc.c @@ -61,7 +61,6 @@ static bool ggml_op_can_inplace(enum ggml_op op) { } } -// TODO: GGML_PAD ? static size_t aligned_offset(const void * buffer, size_t offset, size_t alignment) { assert(alignment && !(alignment & (alignment - 1))); // power of 2 size_t align = (alignment - (((uintptr_t)buffer + offset) % alignment)) % alignment; @@ -69,25 +68,14 @@ static size_t aligned_offset(const void * buffer, size_t offset, size_t alignmen } // tallocr -struct ggml_tallocr { - ggml_backend_buffer_t buffer; - void * base; - size_t alignment; - size_t offset; -}; - -ggml_tallocr_t ggml_tallocr_new(ggml_backend_buffer_t buffer) { - ggml_tallocr_t talloc = malloc(sizeof(struct ggml_tallocr)); - if (talloc == NULL) { - return NULL; - } +struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer) { void * base = ggml_backend_buffer_get_base(buffer); size_t align = ggml_backend_buffer_get_alignment(buffer); assert(align && !(align & (align - 1))); // power of 2 - *talloc = (struct ggml_tallocr) { + struct ggml_tallocr talloc = (struct ggml_tallocr) { /*.buffer = */ buffer, /*.base = */ base, /*.alignment = */ align, @@ -96,11 +84,7 @@ ggml_tallocr_t ggml_tallocr_new(ggml_backend_buffer_t buffer) { return talloc; } -void ggml_tallocr_free(ggml_tallocr_t talloc) { - free(talloc); -} - -void ggml_tallocr_alloc(ggml_tallocr_t talloc, struct ggml_tensor * tensor) { +void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) { size_t size = ggml_backend_buffer_get_alloc_size(talloc->buffer, tensor); size = GGML_PAD(size, talloc->alignment); @@ -354,12 +338,16 @@ struct hash_node { bool allocated; }; -// struct tensor_alloc { size_t offset; size_t size_max; // 0 = pre-allocated, unused, or view }; +struct leaf_alloc { + int buffer_id; + struct tensor_alloc leaf; +}; + struct node_alloc { int buffer_id; struct tensor_alloc dst; @@ -378,7 +366,7 @@ struct ggml_gallocr { struct node_alloc * node_allocs; // [n_nodes] int n_nodes; - struct tensor_alloc * leaf_allocs; // [n_leafs] + struct leaf_alloc * leaf_allocs; // [n_leafs] int n_leafs; }; @@ -543,13 +531,20 @@ static int get_node_buffer_id(const int * node_buffer_ids, int i) { return node_buffer_ids ? node_buffer_ids[i] : 0; } -static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids) { +static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) { // clear hash tables memset(galloc->hash_set.keys, 0, galloc->hash_set.size * sizeof(struct ggml_tensor *)); memset(galloc->hash_values, 0, galloc->hash_set.size * sizeof(struct hash_node)); + // allocate leafs + // these may be tensors that the application is not using in the graph, but may still want to allocate for other purposes + for (int i = 0; i < graph->n_leafs; i++) { + struct ggml_tensor * leaf = graph->leafs[i]; + ggml_gallocr_allocate_node(galloc, leaf, get_node_buffer_id(leaf_buffer_ids, i)); + } + // count number of children and views - // allocate all graph inputs and leafs first to avoid overwriting them + // allocate other graph inputs and leafs first to avoid overwriting them for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; @@ -577,19 +572,6 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr } } - // allocate the remaining leafs that are unused on the graph - // these are effectively static tensors that the application is not using in the graph, but may still want to allocate for other purposes - for (int i = 0; i < graph->n_leafs; i++) { - struct ggml_tensor * leaf = graph->leafs[i]; - struct hash_node * hn = ggml_gallocr_hash_get(galloc, leaf); - - if (hn->n_children == 0) { - assert(!hn->allocated); - // since buffer ids are only given for nodes, these leafs are always allocated in the first buffer - ggml_gallocr_allocate_node(galloc, leaf, 0); - } - } - // allocate tensors for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; @@ -652,7 +634,7 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr } } -bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids) { +bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) { size_t hash_size = graph->visited_hash_table.size; // initialize hash table @@ -676,7 +658,7 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c } // allocate in hash table - ggml_gallocr_alloc_graph_impl(galloc, graph, node_buffer_ids); + ggml_gallocr_alloc_graph_impl(galloc, graph, node_buffer_ids, leaf_buffer_ids); // set the node_allocs from the hash table if (galloc->n_nodes < graph->n_nodes) { @@ -711,15 +693,16 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c } if (galloc->n_leafs < graph->n_leafs) { free(galloc->leaf_allocs); - galloc->leaf_allocs = calloc(sizeof(struct tensor_alloc), graph->n_leafs); + galloc->leaf_allocs = calloc(sizeof(galloc->leaf_allocs[0]), graph->n_leafs); GGML_ASSERT(galloc->leaf_allocs != NULL); } galloc->n_leafs = graph->n_leafs; for (int i = 0; i < graph->n_leafs; i++) { struct ggml_tensor * leaf = graph->leafs[i]; struct hash_node * hn = ggml_gallocr_hash_get(galloc, leaf); - galloc->leaf_allocs[i].offset = hn->offset; - galloc->leaf_allocs[i].size_max = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], leaf); + galloc->leaf_allocs[i].buffer_id = hn->buffer_id; + galloc->leaf_allocs[i].leaf.offset = hn->offset; + galloc->leaf_allocs[i].leaf.size_max = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], leaf); } // reallocate buffers if needed @@ -727,7 +710,8 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c size_t cur_size = galloc->buffers[i] ? ggml_backend_buffer_get_size(galloc->buffers[i]) : 0; size_t new_size = ggml_dyn_tallocr_max_size(galloc->buf_tallocs[i]); - if (new_size > cur_size) { + // even if there are no tensors allocated in this buffer, we still need to allocate it to initialize views + if (new_size > cur_size || galloc->buffers[i] == NULL) { #ifndef NDEBUG fprintf(stderr, "%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), cur_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0); #endif @@ -744,30 +728,30 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c } bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph *graph) { - return ggml_gallocr_reserve_n(galloc, graph, NULL); + return ggml_gallocr_reserve_n(galloc, graph, NULL, NULL); } -static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor * node, int buffer_id, struct tensor_alloc * tensor_alloc) { - assert(node->data || node->view_src || ggml_backend_buffer_get_alloc_size(galloc->buffers[buffer_id], node) <= tensor_alloc->size_max); +static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor * tensor, int buffer_id, struct tensor_alloc * tensor_alloc) { + assert(tensor->data || tensor->view_src || ggml_backend_buffer_get_alloc_size(galloc->buffers[buffer_id], tensor) <= tensor_alloc->size_max); - if (node->view_src != NULL) { - if (node->buffer == NULL) { + if (tensor->view_src != NULL) { + if (tensor->buffer == NULL) { assert(tensor_alloc->offset == SIZE_MAX); - if (node->view_src->buffer == NULL) { + if (tensor->view_src->buffer == NULL) { // this tensor was allocated without ggml-backend return; } - ggml_backend_view_init(galloc->buffers[buffer_id], node); + ggml_backend_view_init(galloc->buffers[buffer_id], tensor); } } else { - if (node->data == NULL) { + if (tensor->data == NULL) { assert(tensor_alloc->offset != SIZE_MAX); - assert(ggml_backend_buffer_get_alloc_size(galloc->buffers[buffer_id], node) <= tensor_alloc->size_max); + assert(ggml_backend_buffer_get_alloc_size(galloc->buffers[buffer_id], tensor) <= tensor_alloc->size_max); void * base = ggml_backend_buffer_get_base(galloc->buffers[buffer_id]); void * addr = (char *)base + tensor_alloc->offset; - ggml_backend_tensor_alloc(galloc->buffers[buffer_id], node, addr); + ggml_backend_tensor_alloc(galloc->buffers[buffer_id], tensor, addr); } else { - if (node->buffer == NULL) { + if (tensor->buffer == NULL) { // this tensor was allocated without ggml-backend return; } @@ -843,13 +827,18 @@ bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph) // reset buffers for (int i = 0; i < galloc->n_buffers; i++) { - // zero size buffers are not allocated if (galloc->buffers[i] != NULL) { ggml_backend_buffer_reset(galloc->buffers[i]); } } // allocate the graph tensors from the previous assignments + // leafs + for (int i = 0; i < graph->n_leafs; i++) { + struct ggml_tensor * leaf = graph->leafs[i]; + struct leaf_alloc * leaf_alloc = &galloc->leaf_allocs[i]; + ggml_gallocr_init_tensor(galloc, leaf, leaf_alloc->buffer_id, &leaf_alloc->leaf); + } // nodes for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; @@ -863,12 +852,6 @@ bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph) } ggml_gallocr_init_tensor(galloc, node, node_alloc->buffer_id, &node_alloc->dst); } - // leafs - for (int i = 0; i < graph->n_leafs; i++) { - struct ggml_tensor * leaf = graph->leafs[i]; - struct tensor_alloc * leaf_alloc = &galloc->leaf_allocs[i]; - ggml_gallocr_init_tensor(galloc, leaf, 0, leaf_alloc); - } return true; } @@ -900,12 +883,12 @@ static bool alloc_tensor_range(struct ggml_context * ctx, return false; } - struct ggml_tallocr * tallocr = ggml_tallocr_new(buffer); + struct ggml_tallocr tallocr = ggml_tallocr_new(buffer); for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) { if (t->data == NULL) { if (t->view_src == NULL) { - ggml_tallocr_alloc(tallocr, t); + ggml_tallocr_alloc(&tallocr, t); } else if (t->buffer == NULL) { ggml_backend_view_init(buffer, t); } @@ -917,8 +900,6 @@ static bool alloc_tensor_range(struct ggml_context * ctx, } } - ggml_tallocr_free(tallocr); - *buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1)); (*buffers)[(*n_buffers)++] = buffer; diff --git a/ggml-alloc.h b/ggml-alloc.h index 1d9085d15..434c13b34 100644 --- a/ggml-alloc.h +++ b/ggml-alloc.h @@ -11,11 +11,15 @@ typedef struct ggml_backend_buffer * ggml_backend_buffer_t; typedef struct ggml_backend * ggml_backend_t; // Tensor allocator -typedef struct ggml_tallocr * ggml_tallocr_t; +struct ggml_tallocr { + ggml_backend_buffer_t buffer; + void * base; + size_t alignment; + size_t offset; +}; -GGML_API ggml_tallocr_t ggml_tallocr_new(ggml_backend_buffer_t buffer); -GGML_API void ggml_tallocr_free(ggml_tallocr_t talloc); -GGML_API void ggml_tallocr_alloc(ggml_tallocr_t talloc, struct ggml_tensor * tensor); +GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer); +GGML_API void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor); // Graph allocator /* @@ -50,7 +54,11 @@ GGML_API void ggml_gallocr_free(ggml_gallocr_t galloc); // not strictly required for single buffer usage: ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed // returns false if the buffer allocation failed GGML_API bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph * graph); -GGML_API bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids); +GGML_API bool ggml_gallocr_reserve_n( + ggml_gallocr_t galloc, + struct ggml_cgraph * graph, + const int * node_buffer_ids, + const int * leaf_buffer_ids); // automatic reallocation if the topology changes when using a single buffer // returns false if using multiple buffers and a re-allocation is needed (call ggml_gallocr_reserve_n first to set the node buffers) diff --git a/ggml-backend-impl.h b/ggml-backend-impl.h index 2e9ba58a9..e475e20e5 100644 --- a/ggml-backend-impl.h +++ b/ggml-backend-impl.h @@ -86,12 +86,12 @@ extern "C" { // (optional) asynchronous tensor data access void (*GGML_CALL set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); void (*GGML_CALL get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size); - bool (*GGML_CALL cpy_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * src, struct ggml_tensor * dst); + bool (*GGML_CALL cpy_tensor_async)(ggml_backend_t backend_src, ggml_backend_t backend_dst, const struct ggml_tensor * src, struct ggml_tensor * dst); // (optional) complete all pending operations void (*GGML_CALL synchronize)(ggml_backend_t backend); - // create a plan for ggml_cgraph and free it + // compute graph with a plan (not used currently) ggml_backend_graph_plan_t (*GGML_CALL graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph); void (*GGML_CALL graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan); @@ -102,16 +102,27 @@ extern "C" { // check if the backend supports an operation bool (*GGML_CALL supports_op)(ggml_backend_t backend, const struct ggml_tensor * op); + + // (optional) event synchronization + ggml_backend_event_t (*GGML_CALL event_new) (ggml_backend_t backend); + void (*GGML_CALL event_free) (ggml_backend_event_t event); + void (*GGML_CALL event_record) (ggml_backend_event_t event); + void (*GGML_CALL event_wait) (ggml_backend_t backend, ggml_backend_event_t event); + void (*GGML_CALL event_synchronize) (ggml_backend_event_t event); }; struct ggml_backend { ggml_guid_t guid; struct ggml_backend_i iface; - ggml_backend_context_t context; }; + struct ggml_backend_event { + ggml_backend_t backend; + void * context; + }; + // // Backend registry // diff --git a/ggml-backend.c b/ggml-backend.c index d60d98414..31f8d5a6d 100644 --- a/ggml-backend.c +++ b/ggml-backend.c @@ -221,29 +221,29 @@ void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_ten GGML_CALL void ggml_backend_tensor_set(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) { ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer; - GGML_ASSERT(tensor->data != NULL && "tensor not allocated"); GGML_ASSERT(buf != NULL && "tensor buffer not set"); + GGML_ASSERT(tensor->data != NULL && "tensor not allocated"); GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds"); if (!size) { return; } - tensor->buffer->iface.set_tensor(buf, tensor, data, offset, size); + buf->iface.set_tensor(buf, tensor, data, offset, size); } GGML_CALL void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) { ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer; + GGML_ASSERT(buf != NULL && "tensor buffer not set"); GGML_ASSERT(tensor->data != NULL && "tensor not allocated"); - GGML_ASSERT(tensor->buffer != NULL && "tensor buffer not set"); GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor read out of bounds"); if (!size) { return; } - tensor->buffer->iface.get_tensor(buf, tensor, data, offset, size); + buf->iface.get_tensor(buf, tensor, data, offset, size); } void ggml_backend_synchronize(ggml_backend_t backend) { @@ -255,18 +255,30 @@ void ggml_backend_synchronize(ggml_backend_t backend) { } ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph) { + GGML_ASSERT(backend->iface.graph_plan_create != NULL); + return backend->iface.graph_plan_create(backend, cgraph); } void ggml_backend_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) { + GGML_ASSERT(backend->iface.graph_plan_free != NULL); + backend->iface.graph_plan_free(backend, plan); } enum ggml_status ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) { + GGML_ASSERT(backend->iface.graph_plan_compute != NULL); + return backend->iface.graph_plan_compute(backend, plan); } enum ggml_status ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { + enum ggml_status err = ggml_backend_graph_compute_async(backend, cgraph); + ggml_backend_synchronize(backend); + return err; +} + +bool ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph) { return backend->iface.graph_compute(backend, cgraph); } @@ -314,34 +326,68 @@ void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst } } -void ggml_backend_tensor_copy_async(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst) { +void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst) { GGML_ASSERT(ggml_are_same_layout(src, dst) && "cannot copy tensors with different layouts"); if (src == dst) { return; } - if (ggml_backend_buft_supports_backend(src->buffer->buft, backend) && ggml_backend_buft_supports_backend(dst->buffer->buft, backend)) { - if (backend->iface.cpy_tensor_async != NULL) { - if (backend->iface.cpy_tensor_async(backend, src, dst)) { - return; - } + if (backend_dst->iface.cpy_tensor_async != NULL) { + if (backend_dst->iface.cpy_tensor_async(backend_src, backend_dst, src, dst)) { + return; } } - size_t nbytes = ggml_nbytes(src); + // an async copy would normally happen after all the queued operations on both backends are completed + // sync src, set_async dst if (ggml_backend_buffer_is_host(src->buffer)) { - ggml_backend_tensor_set_async(backend, dst, src->data, 0, nbytes); - } - else { + ggml_backend_synchronize(backend_src); + ggml_backend_tensor_set_async(backend_dst, dst, src->data, 0, ggml_nbytes(src)); + } else { + ggml_backend_synchronize(backend_src); ggml_backend_tensor_copy(src, dst); + ggml_backend_synchronize(backend_dst); } } +// events + +ggml_backend_event_t ggml_backend_event_new(ggml_backend_t backend) { + if (backend->iface.event_new == NULL) { + return NULL; + } + return backend->iface.event_new(backend); +} + +void ggml_backend_event_free(ggml_backend_event_t event) { + if (event == NULL) { + return; + } + event->backend->iface.event_free(event); +} + +void ggml_backend_event_record(ggml_backend_event_t event) { + GGML_ASSERT(event->backend->iface.event_record != NULL); + + event->backend->iface.event_record(event); +} + +void ggml_backend_event_synchronize(ggml_backend_event_t event) { + GGML_ASSERT(event->backend->iface.event_synchronize != NULL); + + event->backend->iface.event_synchronize(event); +} + +void ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event) { + GGML_ASSERT(backend->iface.event_wait != NULL); + + backend->iface.event_wait(backend, event); +} // backend registry -#define GGML_MAX_BACKENDS_REG 16 +#define GGML_REG_MAX_BACKENDS 16 struct ggml_backend_reg { char name[128]; @@ -350,7 +396,7 @@ struct ggml_backend_reg { void * user_data; }; -static struct ggml_backend_reg ggml_backend_registry[GGML_MAX_BACKENDS_REG]; +static struct ggml_backend_reg ggml_backend_registry[GGML_REG_MAX_BACKENDS]; static size_t ggml_backend_registry_count = 0; GGML_CALL static ggml_backend_t ggml_backend_reg_cpu_init(const char * params, void * user_data); @@ -395,7 +441,7 @@ GGML_CALL static void ggml_backend_registry_init(void) { } GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) { - GGML_ASSERT(ggml_backend_registry_count < GGML_MAX_BACKENDS_REG); + GGML_ASSERT(ggml_backend_registry_count < GGML_REG_MAX_BACKENDS); size_t id = ggml_backend_registry_count; @@ -746,8 +792,12 @@ GGML_CALL static enum ggml_status ggml_backend_cpu_graph_compute(ggml_backend_t struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads); if (cpu_ctx->work_size < cplan.work_size) { - // TODO: may be faster to free and use malloc to avoid the copy - cpu_ctx->work_data = realloc(cpu_ctx->work_data, cplan.work_size); + free(cpu_ctx->work_data); + cpu_ctx->work_data = malloc(cplan.work_size); + if (cpu_ctx->work_data == NULL) { + cpu_ctx->work_size = 0; + return GGML_STATUS_ALLOC_FAILED; + } cpu_ctx->work_size = cplan.work_size; } cplan.work_data = cpu_ctx->work_data; @@ -784,6 +834,11 @@ static struct ggml_backend_i cpu_backend_i = { /* .graph_plan_compute = */ ggml_backend_cpu_graph_plan_compute, /* .graph_compute = */ ggml_backend_cpu_graph_compute, /* .supports_op = */ ggml_backend_cpu_supports_op, + /* .event_new = */ NULL, + /* .event_free = */ NULL, + /* .event_record = */ NULL, + /* .event_wait = */ NULL, + /* .event_synchronize = */ NULL, }; static ggml_guid_t ggml_backend_cpu_guid(void) { @@ -939,15 +994,27 @@ static bool ggml_is_view_op(enum ggml_op op) { // scheduler -#define GGML_MAX_BACKENDS 16 -#define GGML_MAX_SPLITS 256 -#define GGML_MAX_SPLIT_INPUTS 16 +#ifndef GGML_SCHED_MAX_BACKENDS +#define GGML_SCHED_MAX_BACKENDS 16 +#endif + +#ifndef GGML_SCHED_MAX_SPLITS +#define GGML_SCHED_MAX_SPLITS 256 +#endif + +#ifndef GGML_SCHED_MAX_SPLIT_INPUTS +#define GGML_SCHED_MAX_SPLIT_INPUTS 16 +#endif + +#ifndef GGML_SCHED_MAX_COPIES +#define GGML_SCHED_MAX_COPIES 4 +#endif struct ggml_backend_sched_split { int backend_id; int i_start; int i_end; - struct ggml_tensor * inputs[GGML_MAX_SPLIT_INPUTS]; + struct ggml_tensor * inputs[GGML_SCHED_MAX_SPLIT_INPUTS]; int n_inputs; // graph view of this split struct ggml_cgraph graph; @@ -955,45 +1022,53 @@ struct ggml_backend_sched_split { struct ggml_backend_sched { bool is_reset; // true if the scheduler has been reset since the last graph split + bool is_alloc; int n_backends; - ggml_backend_t backends[GGML_MAX_BACKENDS]; - ggml_backend_buffer_type_t bufts[GGML_MAX_BACKENDS]; + ggml_backend_t backends[GGML_SCHED_MAX_BACKENDS]; + ggml_backend_buffer_type_t bufts[GGML_SCHED_MAX_BACKENDS]; ggml_gallocr_t galloc; // hash keys of the nodes in the graph struct ggml_hash_set hash_set; // hash values int * tensor_backend_id; - struct ggml_tensor * (* tensor_copies)[GGML_MAX_BACKENDS]; + struct ggml_tensor * (* tensor_copies)[GGML_SCHED_MAX_BACKENDS][GGML_SCHED_MAX_COPIES]; - int * node_backend_ids; // [n_nodes] - int n_nodes; + int * node_backend_ids; // [graph_size] + int * leaf_backend_ids; // [graph_size] // copy of the graph with modified inputs struct ggml_cgraph * graph; - struct ggml_backend_sched_split splits[GGML_MAX_SPLITS]; + // graph splits + struct ggml_backend_sched_split splits[GGML_SCHED_MAX_SPLITS]; int n_splits; + // pipeline parallelism support + int n_copies; + int cur_copy; + ggml_backend_event_t events[GGML_SCHED_MAX_BACKENDS][GGML_SCHED_MAX_COPIES]; + struct ggml_tensor * graph_inputs[GGML_SCHED_MAX_SPLIT_INPUTS]; + int n_graph_inputs; + struct ggml_context * ctx; ggml_backend_sched_eval_callback callback_eval; void * callback_eval_user_data; // align context_buffer to GGML_MEM_ALIGN - #ifdef _MSC_VER +#ifdef _MSC_VER __declspec(align(GGML_MEM_ALIGN)) - #else +#else __attribute__((aligned(GGML_MEM_ALIGN))) - #endif - char context_buffer[GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + sizeof(struct ggml_cgraph)]; +#endif + char context_buffer[GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + sizeof(struct ggml_cgraph)]; }; -#define hash_id(node) ggml_hash_find_or_insert(sched->hash_set, node) -#define tensor_backend_id(node) sched->tensor_backend_id[hash_id(node)] -#define tensor_backend(node) (tensor_backend_id(node) == -1 ? NULL : sched->backends[tensor_backend_id(node)]) +#define hash_id(tensor) ggml_hash_find_or_insert(sched->hash_set, tensor) +#define tensor_backend_id(tensor) sched->tensor_backend_id[hash_id(tensor)] // returns the priority of the backend, lower id is higher priority static int ggml_backend_sched_backend_id(ggml_backend_sched_t sched, ggml_backend_t backend) { @@ -1005,7 +1080,8 @@ static int ggml_backend_sched_backend_id(ggml_backend_sched_t sched, ggml_backen return -1; } -static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, ggml_backend_buffer_t buffer) { +static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, const struct ggml_tensor * tensor) { + ggml_backend_buffer_t buffer = tensor->buffer; if (buffer == NULL) { return -1; } @@ -1016,12 +1092,16 @@ static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, gg return i; } } - GGML_ASSERT(false && "tensor buffer type not supported by any backend"); - return -1; // silence warning + + fprintf(stderr, "%s: error: no backend supports buffer type %s used in tensor %s\n", + __func__, ggml_backend_buffer_name(buffer), tensor->name); + GGML_ASSERT(false); + + return -1; } #if 0 -static char causes[GGML_DEFAULT_GRAPH_SIZE*16 + GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS][128]; // debug only +static char causes[GGML_DEFAULT_GRAPH_SIZE*16 + GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS][128]; // debug only #define SET_CAUSE(node, ...) sprintf(causes[hash_id(node)], __VA_ARGS__) #define GET_CAUSE(node) causes[hash_id(node)] #else @@ -1035,19 +1115,28 @@ static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, st // assign pre-allocated nodes to their backend // dst - int cur_backend = ggml_backend_sched_backend_from_buffer(sched, tensor->buffer); + int cur_backend = ggml_backend_sched_backend_from_buffer(sched, tensor); if (cur_backend != -1) { - SET_CAUSE(node, "1.dst"); + SET_CAUSE(tensor, "1.dst"); return cur_backend; } + // view_src if (tensor->view_src != NULL) { - cur_backend = ggml_backend_sched_backend_from_buffer(sched, tensor->view_src->buffer); + cur_backend = ggml_backend_sched_backend_from_buffer(sched, tensor->view_src); if (cur_backend != -1) { - SET_CAUSE(node, "1.vsrc"); + SET_CAUSE(tensor, "1.vsrc"); return cur_backend; } } + + // input + if (tensor->flags & GGML_TENSOR_FLAG_INPUT) { + cur_backend = sched->n_backends - 1; // last backend (assumed CPU) + SET_CAUSE(tensor, "1.inp"); + return cur_backend; + } + // assign nodes that use weights to the backend of the weights for (int i = 0; i < GGML_MAX_SRC; i++) { const struct ggml_tensor * src = tensor->src[i]; @@ -1055,9 +1144,9 @@ static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, st continue; } if (src->buffer != NULL && src->buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) { - int src_backend = ggml_backend_sched_backend_from_buffer(sched, src->buffer); + int src_backend = ggml_backend_sched_backend_from_buffer(sched, src); // operations with weights are always run on the same backend as the weights - SET_CAUSE(node, "1.wgt%d", i); + SET_CAUSE(tensor, "1.wgt%d", i); return src_backend; } } @@ -1093,7 +1182,7 @@ static void ggml_backend_sched_print_assignments(ggml_backend_sched_t sched, str if (ggml_is_view_op(node->op)) { continue; } - ggml_backend_t tensor_backend = tensor_backend(node); + ggml_backend_t tensor_backend = ggml_backend_sched_get_tensor_backend(sched, node); fprintf(stderr, "node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s]:", i, ggml_op_name(node->op), node->name, fmt_size(ggml_nbytes(node)), tensor_backend ? ggml_backend_name(tensor_backend) : "NULL", GET_CAUSE(node)); for (int j = 0; j < GGML_MAX_SRC; j++) { @@ -1101,7 +1190,7 @@ static void ggml_backend_sched_print_assignments(ggml_backend_sched_t sched, str if (src == NULL) { continue; } - ggml_backend_t src_backend = tensor_backend(src); + ggml_backend_t src_backend = ggml_backend_sched_get_tensor_backend(sched, src); fprintf(stderr, " %20.20s (%5.5s) [%5.5s %8.8s]", src->name, fmt_size(ggml_nbytes(src)), src_backend ? ggml_backend_name(src_backend) : "NULL", GET_CAUSE(src)); } @@ -1118,6 +1207,7 @@ static void ggml_backend_sched_print_assignments(ggml_backend_sched_t sched, str static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) { // reset splits sched->n_splits = 0; + sched->n_graph_inputs = 0; sched->is_reset = false; struct ggml_init_params params = { @@ -1163,7 +1253,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg } } #ifdef DEBUG_PASS1 - fprintf(stderr, "PASS 1 ASSIGNMENTS\n"); sched_print_assignments(sched, graph); + fprintf(stderr, "PASS 1 ASSIGNMENTS\n"); ggml_backend_sched_print_assignments(sched, graph); #endif // pass 2: expand current backend assignments @@ -1171,28 +1261,6 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg // expand gpu backends (i.e. non last prio) up and down, ignoring cpu (the lowest priority backend) // thus, cpu will never be used unless weights are on cpu, or there are no gpu ops between cpu ops - // pass 2.1 expand gpu up - { - int cur_backend_id = -1; - for (int i = graph->n_nodes - 1; i >= 0; i--) { - struct ggml_tensor * node = graph->nodes[i]; - if (ggml_is_view_op(node->op)) { - continue; - } - int tensor_backend_id = tensor_backend_id(node); - if (tensor_backend_id != -1) { - if (tensor_backend_id == sched->n_backends - 1) { - // skip cpu (lowest prio backend) - cur_backend_id = -1; - } else { - cur_backend_id = tensor_backend_id; - } - } else { - tensor_backend_id(node) = cur_backend_id; - SET_CAUSE(node, "2.1"); - } - } - } // pass 2.2 expand gpu down { @@ -1217,7 +1285,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg } } - // pass 2.3 expand rest up + // pass 2.1 expand gpu up { int cur_backend_id = -1; for (int i = graph->n_nodes - 1; i >= 0; i--) { @@ -1227,14 +1295,20 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg } int tensor_backend_id = tensor_backend_id(node); if (tensor_backend_id != -1) { - cur_backend_id = tensor_backend_id; + if (tensor_backend_id == sched->n_backends - 1) { + // skip cpu (lowest prio backend) + cur_backend_id = -1; + } else { + cur_backend_id = tensor_backend_id; + } } else { tensor_backend_id(node) = cur_backend_id; - SET_CAUSE(node, "2.3"); + SET_CAUSE(node, "2.1"); } } } + // pass 2.4 expand rest down { int cur_backend_id = -1; @@ -1252,8 +1326,26 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg } } } + // pass 2.3 expand rest up + { + int cur_backend_id = -1; + for (int i = graph->n_nodes - 1; i >= 0; i--) { + struct ggml_tensor * node = graph->nodes[i]; + if (ggml_is_view_op(node->op)) { + continue; + } + int tensor_backend_id = tensor_backend_id(node); + if (tensor_backend_id != -1) { + cur_backend_id = tensor_backend_id; + } else { + tensor_backend_id(node) = cur_backend_id; + SET_CAUSE(node, "2.3"); + } + } + } + #ifdef DEBUG_PASS2 - fprintf(stderr, "PASS 2 ASSIGNMENTS\n"); sched_print_assignments(sched, graph); + fprintf(stderr, "PASS 2 ASSIGNMENTS\n"); ggml_backend_sched_print_assignments(sched, graph); #endif // pass 3: assign backends to remaining src from dst and view_src @@ -1283,7 +1375,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg } } #ifdef DEBUG_PASS3 - fprintf(stderr, "PASS 3 ASSIGNMENTS\n"); sched_print_assignments(sched, graph); + fprintf(stderr, "PASS 3 ASSIGNMENTS\n"); ggml_backend_sched_print_assignments(sched, graph); #endif // pass 4: split graph, find tensors that need to be copied @@ -1315,7 +1407,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg if (tensor_backend_id != cur_backend_id) { sched->splits[cur_split].i_end = i; cur_split++; - GGML_ASSERT(cur_split < GGML_MAX_SPLITS); + GGML_ASSERT(cur_split < GGML_SCHED_MAX_SPLITS); sched->splits[cur_split].backend_id = tensor_backend_id; sched->splits[cur_split].i_start = i; sched->splits[cur_split].n_inputs = 0; @@ -1328,25 +1420,57 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg if (src == NULL) { continue; } + int src_backend_id = tensor_backend_id(src); assert(src_backend_id != -1); // all inputs should be assigned by now + + if (src->flags & GGML_TENSOR_FLAG_INPUT) { + size_t id = hash_id(src); + if (sched->tensor_copies[id][src_backend_id][0] == NULL) { + ggml_backend_t backend = sched->backends[src_backend_id]; + for (int c = 0; c < sched->n_copies; c++) { + struct ggml_tensor * tensor_copy; + if (c == sched->cur_copy) { + tensor_copy = src; // use the original tensor as the current copy + } else { + tensor_copy = ggml_dup_tensor_layout(sched->ctx, src); + ggml_format_name(tensor_copy, "%s#%s#%d", ggml_backend_name(backend), src->name, c); + } + if (sched->n_copies > 1) { + ggml_set_input(tensor_copy); + ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor + } + sched->tensor_copies[id][src_backend_id][c] = tensor_copy; + tensor_backend_id(tensor_copy) = src_backend_id; + SET_CAUSE(tensor_copy, "4.cpy"); + } + int n_graph_inputs = sched->n_graph_inputs++; + GGML_ASSERT(n_graph_inputs < GGML_SCHED_MAX_SPLIT_INPUTS); + sched->graph_inputs[n_graph_inputs] = src; + } + } + if (src_backend_id != tensor_backend_id) { // create a copy of the input in the split's backend size_t id = hash_id(src); - if (sched->tensor_copies[id][cur_backend_id] == NULL) { + if (sched->tensor_copies[id][cur_backend_id][0] == NULL) { ggml_backend_t backend = sched->backends[cur_backend_id]; - struct ggml_tensor * tensor_copy = ggml_dup_tensor_layout(sched->ctx, src); - ggml_format_name(tensor_copy, "%s#%s", ggml_backend_name(backend), src->name); - - sched->tensor_copies[id][cur_backend_id] = tensor_copy; - tensor_backend_id(tensor_copy) = cur_backend_id; - SET_CAUSE(tensor_copy, "4.cpy"); - + for (int c = 0; c < sched->n_copies; c++) { + struct ggml_tensor * tensor_copy = ggml_dup_tensor_layout(sched->ctx, src); + ggml_format_name(tensor_copy, "%s#%s#%d", ggml_backend_name(backend), src->name, c); + if (sched->n_copies > 1) { + ggml_set_input(tensor_copy); + ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor + } + sched->tensor_copies[id][cur_backend_id][c] = tensor_copy; + tensor_backend_id(tensor_copy) = cur_backend_id; + SET_CAUSE(tensor_copy, "4.cpy"); + } int n_inputs = sched->splits[cur_split].n_inputs++; - GGML_ASSERT(n_inputs < GGML_MAX_SPLIT_INPUTS); + GGML_ASSERT(n_inputs < GGML_SCHED_MAX_SPLIT_INPUTS); sched->splits[cur_split].inputs[n_inputs] = src; } - node->src[j] = sched->tensor_copies[id][cur_backend_id]; + node->src[j] = sched->tensor_copies[id][cur_backend_id][sched->cur_copy]; } } } @@ -1354,37 +1478,39 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg sched->n_splits = cur_split + 1; } #ifdef DEBUG_PASS4 - fprintf(stderr, "PASS 4 ASSIGNMENTS\n"); sched_print_assignments(sched, graph); + fprintf(stderr, "PASS 4 ASSIGNMENTS\n"); ggml_backend_sched_print_assignments(sched, graph); #endif #ifndef NDEBUG // sanity check: all sources should have the same backend as the node for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; - ggml_backend_t tensor_backend = tensor_backend(node); + ggml_backend_t tensor_backend = ggml_backend_sched_get_tensor_backend(sched, node); if (tensor_backend == NULL) { fprintf(stderr, "!!!!!!! %s has no backend\n", node->name); } - if (node->view_src != NULL && tensor_backend != tensor_backend(node->view_src)) { + if (node->view_src != NULL && tensor_backend != ggml_backend_sched_get_tensor_backend(sched, node->view_src)) { fprintf(stderr, "!!!!!!! %s has backend %s, view_src %s has backend %s\n", node->name, tensor_backend ? ggml_backend_name(tensor_backend) : "NULL", - node->view_src->name, tensor_backend(node->view_src) ? ggml_backend_name(tensor_backend(node->view_src)) : "NULL"); + node->view_src->name, ggml_backend_sched_get_tensor_backend(sched, node->view_src) ? + ggml_backend_name(ggml_backend_sched_get_tensor_backend(sched, node->view_src)) : "NULL"); } for (int j = 0; j < GGML_MAX_SRC; j++) { struct ggml_tensor * src = node->src[j]; if (src == NULL) { continue; } - ggml_backend_t src_backend = tensor_backend(src); + ggml_backend_t src_backend = ggml_backend_sched_get_tensor_backend(sched, src); if (src_backend != tensor_backend /* && src_backend != NULL */) { fprintf(stderr, "!!!! %s has backend %s, src %d (%s) has backend %s\n", node->name, tensor_backend ? ggml_backend_name(tensor_backend) : "NULL", j, src->name, src_backend ? ggml_backend_name(src_backend) : "NULL"); } - if (src->view_src != NULL && src_backend != tensor_backend(src->view_src)) { + if (src->view_src != NULL && src_backend != ggml_backend_sched_get_tensor_backend(sched, src->view_src)) { fprintf(stderr, "!!!!!!! [src] %s has backend %s, view_src %s has backend %s\n", src->name, src_backend ? ggml_backend_name(src_backend) : "NULL", - src->view_src->name, tensor_backend(src->view_src) ? ggml_backend_name(tensor_backend(src->view_src)) : "NULL"); + src->view_src->name, ggml_backend_sched_get_tensor_backend(sched, src->view_src) ? + ggml_backend_name(ggml_backend_sched_get_tensor_backend(sched, src->view_src)) : "NULL"); } } } @@ -1392,18 +1518,20 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg #endif // create copies of the graph for each split - // FIXME: avoid this copy, pass split inputs to ggml_gallocr_alloc_graph_n in some other way - struct ggml_cgraph * graph_copy = ggml_new_graph_custom(sched->ctx, graph->n_nodes + sched->n_splits*GGML_MAX_SPLIT_INPUTS, false); + // TODO: avoid this copy + struct ggml_cgraph * graph_copy = ggml_new_graph_custom(sched->ctx, graph->n_nodes + sched->n_splits*GGML_SCHED_MAX_SPLIT_INPUTS, false); for (int i = 0; i < sched->n_splits; i++) { struct ggml_backend_sched_split * split = &sched->splits[i]; split->graph = ggml_graph_view(graph, split->i_start, split->i_end); + // add inputs to the graph copy so that they are allocated by ggml-alloc at the start of the split for (int j = 0; j < split->n_inputs; j++) { struct ggml_tensor * input = split->inputs[j]; - struct ggml_tensor * input_cpy = sched->tensor_copies[hash_id(input)][split->backend_id]; + struct ggml_tensor * input_cpy = sched->tensor_copies[hash_id(input)][split->backend_id][sched->cur_copy]; // add a dependency to the input source so that it is not freed before the copy is done struct ggml_tensor * input_dep = ggml_view_tensor(sched->ctx, input); + input_dep->src[0] = input; sched->node_backend_ids[graph_copy->n_nodes] = tensor_backend_id(input); graph_copy->nodes[graph_copy->n_nodes++] = input_dep; @@ -1417,18 +1545,56 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg graph_copy->nodes[graph_copy->n_nodes++] = graph->nodes[j]; } } + + if (sched->n_copies > 1) { + // add input copies as leafs so that they are allocated first + for (int i = 0; i < sched->n_graph_inputs; i++) { + struct ggml_tensor * input = sched->graph_inputs[i]; + size_t id = hash_id(input); + int backend_id = tensor_backend_id(input); + for (int c = 0; c < sched->n_copies; c++) { + struct ggml_tensor * input_cpy = sched->tensor_copies[id][backend_id][c]; + sched->leaf_backend_ids[graph_copy->n_leafs] = backend_id; + graph_copy->leafs[graph_copy->n_leafs++] = input_cpy; + } + } + + for (int i = 0; i < sched->n_splits; i++) { + struct ggml_backend_sched_split * split = &sched->splits[i]; + int backend_id = split->backend_id; + for (int j = 0; j < split->n_inputs; j++) { + struct ggml_tensor * input = split->inputs[j]; + size_t id = hash_id(input); + for (int c = 0; c < sched->n_copies; c++) { + struct ggml_tensor * input_cpy = sched->tensor_copies[id][backend_id][c]; + sched->leaf_backend_ids[graph_copy->n_leafs] = backend_id; + graph_copy->leafs[graph_copy->n_leafs++] = input_cpy; + } + } + } + } + + // add leafs from the original graph + for (int i = 0; i < graph->n_leafs; i++) { + struct ggml_tensor * leaf = graph->leafs[i]; + sched->leaf_backend_ids[graph_copy->n_leafs] = tensor_backend_id(leaf); + graph_copy->leafs[graph_copy->n_leafs++] = leaf; + } + sched->graph = graph_copy; } static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) { - // ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids); + // allocate graph if (!ggml_gallocr_alloc_graph(sched->galloc, sched->graph)) { + // the re-allocation may cause the split inputs to be moved to a different address + ggml_backend_sched_synchronize(sched); #ifndef NDEBUG - fprintf(stderr, "ggml_backend_sched: failed to allocate graph, reserving\n"); + fprintf(stderr, "%s: failed to allocate graph, reserving\n", __func__); #endif - ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids); + ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids, sched->leaf_backend_ids); if (!ggml_gallocr_alloc_graph(sched->galloc, sched->graph)) { - fprintf(stderr, "ggml_backend_sched: failed to allocate graph\n"); + fprintf(stderr, "%s: failed to allocate graph\n", __func__); return false; } } @@ -1437,9 +1603,6 @@ static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) { } static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) { - uint64_t copy_us[GGML_MAX_BACKENDS] = {0}; - uint64_t compute_us[GGML_MAX_BACKENDS] = {0}; - struct ggml_backend_sched_split * splits = sched->splits; for (int i = 0; i < sched->n_splits; i++) { @@ -1448,34 +1611,36 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s ggml_backend_t split_backend = sched->backends[split_backend_id]; // copy the input tensors to the split backend - uint64_t copy_start_us = ggml_time_us(); for (int j = 0; j < split->n_inputs; j++) { + ggml_backend_t input_backend = ggml_backend_sched_get_tensor_backend(sched, split->inputs[j]); struct ggml_tensor * input = split->inputs[j]; - struct ggml_tensor * input_cpy = sched->tensor_copies[hash_id(input)][split_backend_id]; + struct ggml_tensor * input_cpy = sched->tensor_copies[hash_id(input)][split_backend_id][sched->cur_copy]; - GGML_ASSERT(input->buffer != NULL); - GGML_ASSERT(input_cpy->buffer != NULL); + if (input->flags & GGML_TENSOR_FLAG_INPUT) { + // inputs from the user must be copied immediately to prevent the user overwriting the data before the copy is done + if (sched->events[split_backend_id][sched->cur_copy] != NULL) { + ggml_backend_event_synchronize(sched->events[split_backend_id][sched->cur_copy]); + } else { + ggml_backend_synchronize(split_backend); + } + ggml_backend_tensor_copy(input, input_cpy); + } else { + if (sched->events[split_backend_id][sched->cur_copy] != NULL) { + ggml_backend_event_wait(split_backend, sched->events[split_backend_id][sched->cur_copy]); + } else { + ggml_backend_synchronize(split_backend); + ggml_backend_synchronize(input_backend); + } - ggml_backend_tensor_copy_async(split_backend, input, input_cpy); + ggml_backend_tensor_copy_async(input_backend, split_backend, input, input_cpy); + } } - //ggml_backend_synchronize(split_backend); // necessary to measure copy time - int64_t copy_end_us = ggml_time_us(); - copy_us[split_backend_id] += copy_end_us - copy_start_us; -#if 0 - char split_filename[GGML_MAX_NAME]; - snprintf(split_filename, GGML_MAX_NAME, "split_%i_%s.dot", i, ggml_backend_name(split_backend)); - ggml_graph_dump_dot(split->graph, NULL, split_filename); -#endif - - - uint64_t compute_start_us = ggml_time_us(); if (!sched->callback_eval) { - enum ggml_status ec = ggml_backend_graph_compute(split_backend, &split->graph); + enum ggml_status ec = ggml_backend_graph_compute_async(split_backend, &split->graph); if (ec != GGML_STATUS_SUCCESS) { return ec; } - //ggml_backend_synchronize(split_backend); // necessary to measure compute time } else { // similar to ggml_backend_compare_graph_backend for (int j0 = 0; j0 < split->graph.n_nodes; j0++) { @@ -1494,11 +1659,14 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s struct ggml_cgraph gv = ggml_graph_view(&split->graph, j0, j1 + 1); - enum ggml_status ec = ggml_backend_graph_compute(split_backend, &gv); + enum ggml_status ec = ggml_backend_graph_compute_async(split_backend, &gv); if (ec != GGML_STATUS_SUCCESS) { return ec; } + // TODO: pass backend to the callback, then the user can decide if they want to synchronize + ggml_backend_synchronize(split_backend); + if (need && !sched->callback_eval(t, false, sched->callback_eval_user_data)) { break; } @@ -1506,39 +1674,54 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s j0 = j1; } } - uint64_t compute_end_us = ggml_time_us(); - compute_us[split_backend_id] += compute_end_us - compute_start_us; - } -#if 0 - // per-backend timings - fprintf(stderr, "sched_compute_splits times (%d splits):\n", sched->n_splits); - for (int i = 0; i < sched->n_backends; i++) { - if (copy_us[i] > 0 || compute_us[i] > 0) { - fprintf(stderr, "\t%5.5s: %lu us copy, %lu us compute\n", ggml_backend_name(sched->backends[i]), copy_us[i], compute_us[i]); + // record the event of this copy + if (split->n_inputs > 0) { + if (sched->events[split_backend_id][sched->cur_copy] != NULL) { + ggml_backend_event_record(sched->events[split_backend_id][sched->cur_copy]); + } } } -#endif + + sched->cur_copy = (sched->cur_copy + 1) % sched->n_copies; return GGML_STATUS_SUCCESS; } -ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size) { +ggml_backend_sched_t ggml_backend_sched_new( + ggml_backend_t * backends, + ggml_backend_buffer_type_t * bufts, + int n_backends, + size_t graph_size, + bool parallel) { GGML_ASSERT(n_backends > 0); - GGML_ASSERT(n_backends <= GGML_MAX_BACKENDS); + GGML_ASSERT(n_backends <= GGML_SCHED_MAX_BACKENDS); + GGML_ASSERT(ggml_backend_is_cpu(backends[n_backends - 1])); // last backend must be CPU struct ggml_backend_sched * sched = calloc(sizeof(struct ggml_backend_sched), 1); // initialize hash table - sched->hash_set = ggml_hash_set_new(graph_size + GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS); + sched->hash_set = ggml_hash_set_new(graph_size + GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS); sched->tensor_backend_id = calloc(sizeof(sched->tensor_backend_id[0]), sched->hash_set.size); sched->tensor_copies = calloc(sizeof(sched->tensor_copies[0]), sched->hash_set.size); sched->node_backend_ids = calloc(sizeof(sched->node_backend_ids[0]), graph_size); + sched->leaf_backend_ids = calloc(sizeof(sched->leaf_backend_ids[0]), graph_size); sched->n_backends = n_backends; - for (int i = 0; i < n_backends; i++) { - sched->backends[i] = backends[i]; - sched->bufts[i] = bufts ? bufts[i] : ggml_backend_get_default_buffer_type(backends[i]); + + sched->n_copies = parallel ? GGML_SCHED_MAX_COPIES : 1; + + GGML_ASSERT(sched->n_copies <= GGML_SCHED_MAX_COPIES); + + for (int b = 0; b < n_backends; b++) { + sched->backends[b] = backends[b]; + sched->bufts[b] = bufts ? bufts[b] : ggml_backend_get_default_buffer_type(backends[b]); + GGML_ASSERT(ggml_backend_buft_supports_backend(sched->bufts[b], backends[b])); + if (sched->n_copies > 1) { + for (int c = 0; c < sched->n_copies; c++) { + sched->events[b][c] = ggml_backend_event_new(backends[b]); + } + } } sched->galloc = ggml_gallocr_new_n(sched->bufts, n_backends); @@ -1552,12 +1735,18 @@ void ggml_backend_sched_free(ggml_backend_sched_t sched) { if (sched == NULL) { return; } + for (int b = 0; b < sched->n_backends; b++) { + for (int c = 0; c < sched->n_copies; c++) { + ggml_backend_event_free(sched->events[b][c]); + } + } ggml_gallocr_free(sched->galloc); ggml_free(sched->ctx); free(sched->hash_set.keys); free(sched->tensor_backend_id); free(sched->tensor_copies); free(sched->node_backend_ids); + free(sched->leaf_backend_ids); free(sched); } @@ -1569,34 +1758,63 @@ void ggml_backend_sched_reset(ggml_backend_sched_t sched) { memset(sched->tensor_copies, 0, sizeof(sched->tensor_copies[0]) * hash_size); sched->is_reset = true; + sched->is_alloc = false; } bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph) { ggml_backend_sched_split_graph(sched, measure_graph); - if (!ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids)) { + // TODO: extract this to a separate function + if (!ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids, sched->leaf_backend_ids)) { return false; } ggml_backend_sched_reset(sched); + ggml_backend_sched_synchronize(sched); + + return true; +} + +bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) { + GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes + GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS); + + ggml_backend_sched_split_graph(sched, graph); + + if (!ggml_backend_sched_alloc_splits(sched)) { + return false; + } + + sched->is_alloc = true; + return true; } enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph) { - GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes + GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS); + enum ggml_status err = ggml_backend_sched_graph_compute_async(sched, graph); + ggml_backend_sched_synchronize(sched); + return err; +} - if (!sched->is_reset) { +enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph) { + if (!sched->is_reset && !sched->is_alloc) { ggml_backend_sched_reset(sched); } - ggml_backend_sched_split_graph(sched, graph); - if (!ggml_backend_sched_alloc_splits(sched)) { - return GGML_STATUS_ALLOC_FAILED; + if (!sched->is_alloc) { + if (!ggml_backend_sched_alloc_graph(sched, graph)) { + return GGML_STATUS_ALLOC_FAILED; + } } return ggml_backend_sched_compute_splits(sched); } +void ggml_backend_sched_synchronize(ggml_backend_sched_t sched) { + for (int i = 0; i < sched->n_backends; i++) { + ggml_backend_synchronize(sched->backends[i]); + } +} + void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data) { sched->callback_eval = callback; sched->callback_eval_user_data = user_data; @@ -1606,19 +1824,24 @@ int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched) { return sched->n_splits; } +int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched) { + return sched->n_copies; +} + size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend) { int backend_index = ggml_backend_sched_backend_id(sched, backend); GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends); + return ggml_gallocr_get_buffer_size(sched->galloc, backend_index); } -void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend) { +void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend) { int backend_index = ggml_backend_sched_backend_id(sched, backend); GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends); tensor_backend_id(node) = backend_index; } -ggml_backend_t ggml_backend_sched_get_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node) { +ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node) { int backend_index = tensor_backend_id(node); if (backend_index == -1) { return NULL; diff --git a/ggml-backend.h b/ggml-backend.h index 8bed22578..099d9c258 100644 --- a/ggml-backend.h +++ b/ggml-backend.h @@ -9,6 +9,7 @@ extern "C" { typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t; typedef struct ggml_backend_buffer * ggml_backend_buffer_t; + typedef struct ggml_backend_event * ggml_backend_event_t; typedef struct ggml_backend * ggml_backend_t; typedef void * ggml_backend_graph_plan_t; @@ -72,11 +73,24 @@ extern "C" { GGML_API enum ggml_status ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan); GGML_API enum ggml_status ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph); + GGML_API bool ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph); GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op); // tensor copy between different backends GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst); - GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst); // automatic fallback to sync copy + + // asynchronous copy + // the copy is performed after all the currently queued operations in backend_src + // backend_dst will wait for the copy to complete before performing other operations + // automatic fallback to sync copy if async is not supported + GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst); + + // events + GGML_API ggml_backend_event_t ggml_backend_event_new (ggml_backend_t backend); + GGML_API void ggml_backend_event_free (ggml_backend_event_t event); + GGML_API void ggml_backend_event_record (ggml_backend_event_t event); + GGML_API void ggml_backend_event_synchronize(ggml_backend_event_t event); + GGML_API void ggml_backend_event_wait (ggml_backend_t backend, ggml_backend_event_t event); // wait async on event // // CPU backend @@ -123,27 +137,31 @@ extern "C" { /* Example usage: - sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, num_backends); - // sched is initialized with measure allocators and cannot be used until allocated with a measure graph + // operations that use tensors allocated in a buffer with USAGE_WEIGHTS will be asigned + // preferrably to run on the same backend as the buffer + ggml_backend_buffer_set_usage(buf_weights, GGML_BACKEND_BUFFER_USAGE_WEIGHTS); - // initialize buffers from a measure graph - measure_graph = build_graph(sched); // use the allocr to allocate inputs as needed + sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, NULL, num_backends, GGML_DEFAULT_GRAPH_SIZE, false); - // in build_graph: - build_graph(...) { - // manually assign nodes to a backend (optional, should not be needed in most cases) - struct ggml_tensor * node = ggml_mul_mat(ctx, ...); - ggml_backend_sched_set_node_backend(sched, node, backend_gpu); - } + // initialize buffers from a max size graph (optional) + reserve_graph = build_graph(sched, max_batch_size); - // allocate backend buffers from measure graph - ggml_backend_sched_init_measure(sched, measure_graph); + // manually assign nodes to a backend (optional, should not be needed in most cases) + struct ggml_tensor * node = ggml_mul_mat(ctx, ...); + ggml_backend_sched_set_tensor_backend(sched, node, backend_gpu); - // the scheduler is now ready to compute graphs + ggml_backend_sched_reserve(sched, reserve_graph); // compute graph = build_graph(sched); ggml_backend_sched_graph_compute(sched, graph); + + // if there are graph inputs: + ggml_backend_sched_reset(sched); + ggml_backend_sched_alloc_graph(sched, graph); + ggml_backend_tensor_set(input_tensor, ...); + ggml_backend_sched_graph_compute(sched, graph); + } */ struct ggml_backend_sched; @@ -158,20 +176,26 @@ extern "C" { typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data); // Initialize a backend scheduler - GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size); + GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel); GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched); + // Initialize backend buffers from a measure graph GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph); + // Get the number of splits of the last graph GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched); + GGML_API int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched); GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend); - GGML_API void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend); - GGML_API ggml_backend_t ggml_backend_sched_get_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node); + GGML_API void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend); + GGML_API ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node); // Allocate and compute graph on the backend scheduler + GGML_API bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph); GGML_API enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph); + GGML_API enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph); + GGML_API void ggml_backend_sched_synchronize(ggml_backend_sched_t sched); // Reset all assignments and allocators - must be called before changing the node backends GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched); diff --git a/ggml-cuda.cu b/ggml-cuda.cu index b8834ed05..d1b5e52ba 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -72,6 +72,7 @@ #define cudaEventCreateWithFlags hipEventCreateWithFlags #define cudaEventDisableTiming hipEventDisableTiming #define cudaEventRecord hipEventRecord +#define cudaEventSynchronize hipEventSynchronize #define cudaEvent_t hipEvent_t #define cudaEventDestroy hipEventDestroy #define cudaFree hipFree @@ -81,6 +82,7 @@ #define cudaGetDeviceProperties hipGetDeviceProperties #define cudaGetErrorString hipGetErrorString #define cudaGetLastError hipGetLastError +#define cudaLaunchHostFunc hipLaunchHostFunc #ifdef GGML_HIP_UMA #define cudaMalloc hipMallocManaged #define cudaMallocHost(ptr, size) hipHostMalloc(ptr, size) @@ -104,6 +106,7 @@ #define cudaStreamCreateWithFlags hipStreamCreateWithFlags #define cudaStreamFireAndForget hipStreamFireAndForget #define cudaStreamNonBlocking hipStreamNonBlocking +#define cudaStreamPerThread hipStreamPerThread #define cudaStreamSynchronize hipStreamSynchronize #define cudaStreamWaitEvent(stream, event, flags) hipStreamWaitEvent(stream, event, flags) #define cudaStream_t hipStream_t @@ -10641,8 +10644,20 @@ GGML_CALL void ggml_cuda_get_device_description(int device, char * description, #define UNUSED GGML_UNUSED struct ggml_backend_cuda_context { + explicit ggml_backend_cuda_context(int device) : + device(device), + name(GGML_CUDA_NAME + std::to_string(device)) { + } + + ~ggml_backend_cuda_context() { + if (copy_event != nullptr) { + CUDA_CHECK(cudaEventDestroy(copy_event)); + } + } + int device; std::string name; + cudaEvent_t copy_event = nullptr; }; // cuda buffer @@ -10732,9 +10747,8 @@ GGML_CALL static void ggml_backend_cuda_buffer_set_tensor(ggml_backend_buffer_t ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context; ggml_cuda_set_device(ctx->device); - CUDA_CHECK(cudaDeviceSynchronize()); - CUDA_CHECK(cudaMemcpy((char *)tensor->data + offset, data, size, cudaMemcpyHostToDevice)); - CUDA_CHECK(cudaDeviceSynchronize()); + CUDA_CHECK(cudaMemcpyAsync((char *)tensor->data + offset, data, size, cudaMemcpyHostToDevice, cudaStreamPerThread)); + CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread)); } GGML_CALL static void ggml_backend_cuda_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) { @@ -10743,26 +10757,25 @@ GGML_CALL static void ggml_backend_cuda_buffer_get_tensor(ggml_backend_buffer_t ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context; ggml_cuda_set_device(ctx->device); - CUDA_CHECK(cudaDeviceSynchronize()); - CUDA_CHECK(cudaMemcpy(data, (const char *)tensor->data + offset, size, cudaMemcpyDeviceToHost)); - CUDA_CHECK(cudaDeviceSynchronize()); + CUDA_CHECK(cudaMemcpyAsync(data, (const char *)tensor->data + offset, size, cudaMemcpyDeviceToHost, cudaStreamPerThread)); + CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread)); } GGML_CALL static bool ggml_backend_cuda_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * src, ggml_tensor * dst) { if (ggml_backend_buffer_is_cuda(src->buffer)) { ggml_backend_cuda_buffer_context * src_ctx = (ggml_backend_cuda_buffer_context *)src->buffer->context; - ggml_backend_cuda_buffer_context * dst_ctx = (ggml_backend_cuda_buffer_context *)buffer->context; - - ggml_cuda_set_device(src_ctx->device); - CUDA_CHECK(cudaDeviceSynchronize()); - ggml_cuda_set_device(dst_ctx->device); - CUDA_CHECK(cudaDeviceSynchronize()); - CUDA_CHECK(cudaMemcpy((char *)dst->data, (const char *)src->data, ggml_nbytes(src), cudaMemcpyDeviceToDevice)); - CUDA_CHECK(cudaDeviceSynchronize()); - + ggml_backend_cuda_buffer_context * dst_ctx = (ggml_backend_cuda_buffer_context *)dst->buffer->context; + if (src_ctx->device == dst_ctx->device) { + CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(src), cudaMemcpyDeviceToDevice, cudaStreamPerThread)); + } else { + CUDA_CHECK(cudaMemcpyPeerAsync(dst->data, dst_ctx->device, src->data, src_ctx->device, ggml_nbytes(src), cudaStreamPerThread)); + } + CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread)); return true; } return false; + + UNUSED(buffer); } GGML_CALL static void ggml_backend_cuda_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) { @@ -11007,7 +11020,11 @@ GGML_CALL static void ggml_backend_cuda_split_buffer_set_tensor(ggml_backend_buf } const char * buf_host = (const char *)data + offset_split; - CUDA_CHECK(cudaMemcpy(extra->data_device[id], buf_host, original_size, cudaMemcpyHostToDevice)); + CUDA_CHECK(cudaMemcpyAsync(extra->data_device[id], buf_host, original_size, cudaMemcpyHostToDevice, cudaStreamPerThread)); + } + + for (int id = 0; id < g_device_count; ++id) { + CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread)); } } @@ -11041,7 +11058,11 @@ GGML_CALL static void ggml_backend_cuda_split_buffer_get_tensor(ggml_backend_buf } char * buf_host = (char *)data + offset_split; - CUDA_CHECK(cudaMemcpy(buf_host, extra->data_device[id], original_size, cudaMemcpyDeviceToHost)); + CUDA_CHECK(cudaMemcpyAsync(buf_host, extra->data_device[id], original_size, cudaMemcpyDeviceToHost, cudaStreamPerThread)); + } + + for (int id = 0; id < g_device_count; ++id) { + CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread)); } } @@ -11220,6 +11241,10 @@ GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type() { return &ggml_backend_cuda_buffer_type_host; } +//static bool ggml_backend_buffer_is_cuda_host(ggml_backend_buffer_t buffer) { +// return buffer->buft->iface.get_name == ggml_backend_cuda_host_buffer_type_name; +//} + // backend GGML_CALL static const char * ggml_backend_cuda_name(ggml_backend_t backend) { @@ -11243,8 +11268,9 @@ GGML_CALL static ggml_backend_buffer_type_t ggml_backend_cuda_get_default_buffer GGML_CALL static void ggml_backend_cuda_set_tensor_async(ggml_backend_t backend, ggml_tensor * tensor, const void * data, size_t offset, size_t size) { ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context; + ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer; - GGML_ASSERT(tensor->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) && "unsupported buffer type"); + GGML_ASSERT(buf->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) && "unsupported buffer type"); GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU); CUDA_CHECK(cudaMemcpyAsync((char *)tensor->data + offset, data, size, cudaMemcpyHostToDevice, g_cudaStreams[cuda_ctx->device][0])); @@ -11252,22 +11278,61 @@ GGML_CALL static void ggml_backend_cuda_set_tensor_async(ggml_backend_t backend, GGML_CALL static void ggml_backend_cuda_get_tensor_async(ggml_backend_t backend, const ggml_tensor * tensor, void * data, size_t offset, size_t size) { ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context; + ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer; - GGML_ASSERT(tensor->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) && "unsupported buffer type"); + GGML_ASSERT(buf->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) && "unsupported buffer type"); GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU); CUDA_CHECK(cudaMemcpyAsync(data, (const char *)tensor->data + offset, size, cudaMemcpyDeviceToHost, g_cudaStreams[cuda_ctx->device][0])); } -GGML_CALL static bool ggml_backend_cuda_cpy_tensor_async(ggml_backend_t backend, const ggml_tensor * src, ggml_tensor * dst) { - ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context; +GGML_CALL static bool ggml_backend_cuda_cpy_tensor_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, const ggml_tensor * src, ggml_tensor * dst) { + GGML_ASSERT(ggml_backend_is_cuda(backend_src) || ggml_backend_is_cuda(backend_dst)); - if (dst->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) && ggml_backend_buffer_is_cuda(src->buffer)) { - CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(dst), cudaMemcpyDeviceToDevice, g_cudaStreams[cuda_ctx->device][0])); - return true; + ggml_backend_buffer_t buf_src = src->view_src ? src->view_src->buffer : src->buffer; + ggml_backend_buffer_t buf_dst = dst->view_src ? dst->view_src->buffer : dst->buffer; + + if (!ggml_backend_buffer_is_cuda(src->buffer)) { + return false; } - return false; + if (!ggml_backend_buffer_is_cuda(dst->buffer)) { + return false; + } + + // device -> device + ggml_backend_cuda_context * cuda_ctx_src = (ggml_backend_cuda_context *)backend_src->context; + ggml_backend_cuda_context * cuda_ctx_dst = (ggml_backend_cuda_context *)backend_dst->context; + + if (backend_src != backend_dst) { + ggml_backend_cuda_buffer_context * buf_ctx_src = (ggml_backend_cuda_buffer_context *)buf_src->context; + ggml_backend_cuda_buffer_context * buf_ctx_dst = (ggml_backend_cuda_buffer_context *)buf_dst->context; + + GGML_ASSERT(cuda_ctx_src->device == buf_ctx_src->device); + GGML_ASSERT(cuda_ctx_dst->device == buf_ctx_dst->device); + + if (!cuda_ctx_src->copy_event) { + ggml_cuda_set_device(cuda_ctx_src->device); + CUDA_CHECK(cudaEventCreateWithFlags(&cuda_ctx_src->copy_event, cudaEventDisableTiming)); + } + + // copy on src stream + if (cuda_ctx_src->device == cuda_ctx_dst->device) { + CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(dst), cudaMemcpyDeviceToDevice, g_cudaStreams[cuda_ctx_dst->device][0])); + } else { + CUDA_CHECK(cudaMemcpyPeerAsync(dst->data, cuda_ctx_dst->device, src->data, cuda_ctx_src->device, ggml_nbytes(dst), g_cudaStreams[cuda_ctx_src->device][0])); + } + + // record event on src stream + CUDA_CHECK(cudaEventRecord(cuda_ctx_src->copy_event, g_cudaStreams[cuda_ctx_src->device][0])); + + // wait on dst stream for the copy to complete + CUDA_CHECK(cudaStreamWaitEvent(g_cudaStreams[cuda_ctx_dst->device][0], cuda_ctx_src->copy_event, 0)); + } else { + // src and dst are on the same backend + CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(dst), cudaMemcpyDeviceToDevice, g_cudaStreams[cuda_ctx_dst->device][0])); + } + return true; } GGML_CALL static void ggml_backend_cuda_synchronize(ggml_backend_t backend) { @@ -11444,6 +11509,52 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons UNUSED(backend); } +static ggml_backend_event_t ggml_backend_cuda_event_new(ggml_backend_t backend) { + ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context; + + ggml_cuda_set_device(cuda_ctx->device); + + cudaEvent_t event; + CUDA_CHECK(cudaEventCreateWithFlags(&event, cudaEventDisableTiming)); + + return new ggml_backend_event { + /* .backend = */ backend, + /* .context = */ event, + }; +} + +static void ggml_backend_cuda_event_free(ggml_backend_event_t event) { + CUDA_CHECK(cudaEventDestroy((cudaEvent_t)event->context)); + + delete event; +} + +static void ggml_backend_cuda_event_record(ggml_backend_event_t event) { + ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)event->backend->context; + + CUDA_CHECK(cudaEventRecord((cudaEvent_t)event->context, g_cudaStreams[cuda_ctx->device][0])); +} + +static void ggml_backend_cuda_event_wait(ggml_backend_t backend, ggml_backend_event_t event) { + ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context; + + if (ggml_backend_is_cuda(event->backend)) { + CUDA_CHECK(cudaStreamWaitEvent(g_cudaStreams[cuda_ctx->device][0], (cudaEvent_t)event->context, 0)); + } else { + // untested + auto wait_fn = [](void * user_data) { + ggml_backend_event_t event = (ggml_backend_event_t)user_data; + ggml_backend_event_synchronize(event); + }; + + CUDA_CHECK(cudaLaunchHostFunc(g_cudaStreams[cuda_ctx->device][0], wait_fn, event)); + } +} + +static void ggml_backend_cuda_event_synchronize(ggml_backend_event_t event) { + CUDA_CHECK(cudaEventSynchronize((cudaEvent_t)event->context)); +} + static ggml_backend_i ggml_backend_cuda_interface = { /* .get_name = */ ggml_backend_cuda_name, /* .free = */ ggml_backend_cuda_free, @@ -11457,6 +11568,11 @@ static ggml_backend_i ggml_backend_cuda_interface = { /* .graph_plan_compute = */ NULL, /* .graph_compute = */ ggml_backend_cuda_graph_compute, /* .supports_op = */ ggml_backend_cuda_supports_op, + /* .event_new = */ ggml_backend_cuda_event_new, + /* .event_free = */ ggml_backend_cuda_event_free, + /* .event_record = */ ggml_backend_cuda_event_record, + /* .event_wait = */ ggml_backend_cuda_event_wait, + /* .event_synchronize = */ ggml_backend_cuda_event_synchronize, }; static ggml_guid_t ggml_backend_cuda_guid() { @@ -11475,10 +11591,11 @@ GGML_CALL ggml_backend_t ggml_backend_cuda_init(int device) { // not strictly necessary, but it may reduce the overhead of the first graph_compute ggml_cuda_set_main_device(device); - ggml_backend_cuda_context * ctx = new ggml_backend_cuda_context { - /* .device = */ device, - /* .name = */ GGML_CUDA_NAME + std::to_string(device), - }; + ggml_backend_cuda_context * ctx = new ggml_backend_cuda_context(device); + if (ctx == nullptr) { + fprintf(stderr, "%s: error: failed to allocate context\n", __func__); + return nullptr; + } ggml_backend_t cuda_backend = new ggml_backend { /* .guid = */ ggml_backend_cuda_guid(), diff --git a/ggml-kompute.cpp b/ggml-kompute.cpp index 83a7822fd..4caf2c9e7 100644 --- a/ggml-kompute.cpp +++ b/ggml-kompute.cpp @@ -1951,6 +1951,11 @@ static struct ggml_backend_i kompute_backend_i = { /* .graph_plan_compute = */ NULL, /* .graph_compute = */ ggml_backend_kompute_graph_compute, /* .supports_op = */ ggml_backend_kompute_supports_op, + /* .event_new = */ NULL, + /* .event_free = */ NULL, + /* .event_record = */ NULL, + /* .event_wait = */ NULL, + /* .event_synchronize = */ NULL, }; static ggml_guid_t ggml_backend_kompute_guid() { diff --git a/ggml-metal.m b/ggml-metal.m index 1825d3320..3a5476c52 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -2820,6 +2820,11 @@ static struct ggml_backend_i ggml_backend_metal_i = { /* .graph_plan_compute = */ NULL, /* .graph_compute = */ ggml_backend_metal_graph_compute, /* .supports_op = */ ggml_backend_metal_supports_op, + /* .event_new = */ NULL, + /* .event_free = */ NULL, + /* .event_record = */ NULL, + /* .event_wait = */ NULL, + /* .event_synchronize = */ NULL, }; void ggml_backend_metal_log_set_callback(ggml_log_callback log_callback, void * user_data) { diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index c2ab13034..9f6506383 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -17249,13 +17249,18 @@ static ggml_backend_i ggml_backend_sycl_interface = { /* .get_default_buffer_type = */ ggml_backend_sycl_get_default_buffer_type, /* .set_tensor_async = */ ggml_backend_sycl_set_tensor_async, /* .get_tensor_async = */ ggml_backend_sycl_get_tensor_async, - /* .cpy_tensor_async = */ ggml_backend_sycl_cpy_tensor_async, + /* .cpy_tensor_async = */ NULL, //ggml_backend_sycl_cpy_tensor_async, // TODO: update for the new interface /* .synchronize = */ ggml_backend_sycl_synchronize, /* .graph_plan_create = */ NULL, /* .graph_plan_free = */ NULL, /* .graph_plan_compute = */ NULL, /* .graph_compute = */ ggml_backend_sycl_graph_compute, /* .supports_op = */ ggml_backend_sycl_supports_op, + /* .event_new = */ NULL, + /* .event_free = */ NULL, + /* .event_record = */ NULL, + /* .event_wait = */ NULL, + /* .event_synchronize = */ NULL, }; static ggml_guid_t ggml_backend_sycl_guid() { diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp index d41aa7d22..7cce616ba 100644 --- a/ggml-vulkan.cpp +++ b/ggml-vulkan.cpp @@ -5693,6 +5693,11 @@ static ggml_backend_i ggml_backend_vk_interface = { /* .graph_plan_compute = */ NULL, /* .graph_compute = */ ggml_backend_vk_graph_compute, /* .supports_op = */ ggml_backend_vk_supports_op, + /* .event_new = */ NULL, + /* .event_free = */ NULL, + /* .event_record = */ NULL, + /* .event_wait = */ NULL, + /* .event_synchronize = */ NULL, }; static ggml_guid_t ggml_backend_vk_guid() { diff --git a/ggml.c b/ggml.c index 9a7bd1d8c..fbc66f65b 100644 --- a/ggml.c +++ b/ggml.c @@ -11560,8 +11560,6 @@ static void ggml_compute_forward_get_rows_q( const struct ggml_tensor * src0 = dst->src[0]; const struct ggml_tensor * src1 = dst->src[1]; - assert(params->ith == 0); - if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { return; } @@ -11569,7 +11567,7 @@ static void ggml_compute_forward_get_rows_q( GGML_TENSOR_BINARY_OP_LOCALS const int64_t nc = ne00; - const int64_t nr = ggml_nelements(src1); GGML_UNUSED(nr); + const int64_t nr = ggml_nelements(src1); const enum ggml_type type = src0->type; ggml_to_float_t const dequantize_row_q = type_traits[type].to_float; @@ -11579,17 +11577,25 @@ static void ggml_compute_forward_get_rows_q( assert(nb00 == ggml_type_size(type)); assert(ggml_nrows(dst) == nr); - // TODO: multi-thread - for (int64_t i12 = 0; i12 < ne12; ++i12) { - for (int64_t i11 = 0; i11 < ne11; ++i11) { - for (int64_t i10 = 0; i10 < ne10; ++i10) { - const int64_t i01 = *(int32_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12); + const int ith = params->ith; + const int nth = params->nth; - dequantize_row_q( - (const void *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03), - (float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3), nc); - } - } + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int64_t i = ir0; i < ir1; ++i) { + const int64_t i12 = i/(ne11*ne10); + const int64_t i11 = (i - i12*ne11*ne10)/ne10; + const int64_t i10 = (i - i12*ne11*ne10 - i11*ne10); + const int64_t i01 = *(int32_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12); + + dequantize_row_q( + (const void *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03), + (float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3), nc); } } @@ -11600,8 +11606,6 @@ static void ggml_compute_forward_get_rows_f16( const struct ggml_tensor * src0 = dst->src[0]; const struct ggml_tensor * src1 = dst->src[1]; - assert(params->ith == 0); - if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { return; } @@ -11609,24 +11613,32 @@ static void ggml_compute_forward_get_rows_f16( GGML_TENSOR_BINARY_OP_LOCALS const int64_t nc = ne00; - const int64_t nr = ggml_nelements(src1); GGML_UNUSED(nr); + const int64_t nr = ggml_nelements(src1); assert(ne0 == nc); assert(ne02 == ne11); assert(nb00 == sizeof(ggml_fp16_t)); assert(ggml_nrows(dst) == nr); - // TODO: multi-thread - for (int64_t i12 = 0; i12 < ne12; ++i12) { - for (int64_t i11 = 0; i11 < ne11; ++i11) { - for (int64_t i10 = 0; i10 < ne10; ++i10) { - const int64_t i01 = *(int32_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12); + const int ith = params->ith; + const int nth = params->nth; - ggml_fp16_to_fp32_row( - (const void *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03), - (float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3), nc); - } - } + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int64_t i = ir0; i < ir1; ++i) { + const int64_t i12 = i/(ne11*ne10); + const int64_t i11 = (i - i12*ne11*ne10)/ne10; + const int64_t i10 = (i - i12*ne11*ne10 - i11*ne10); + const int64_t i01 = *(int32_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12); + + ggml_fp16_to_fp32_row( + (const void *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03), + (float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3), nc); } } @@ -11637,8 +11649,6 @@ static void ggml_compute_forward_get_rows_f32( const struct ggml_tensor * src0 = dst->src[0]; const struct ggml_tensor * src1 = dst->src[1]; - assert(params->ith == 0); - if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { return; } @@ -11646,24 +11656,32 @@ static void ggml_compute_forward_get_rows_f32( GGML_TENSOR_BINARY_OP_LOCALS const int64_t nc = ne00; - const int64_t nr = ggml_nelements(src1); GGML_UNUSED(nr); + const int64_t nr = ggml_nelements(src1); assert(ne0 == nc); assert(ne02 == ne11); assert(nb00 == sizeof(float)); assert(ggml_nrows(dst) == nr); - // TODO: multi-thread - for (int64_t i12 = 0; i12 < ne12; ++i12) { - for (int64_t i11 = 0; i11 < ne11; ++i11) { - for (int64_t i10 = 0; i10 < ne10; ++i10) { - const int64_t i01 = *(int32_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12); + const int ith = params->ith; + const int nth = params->nth; - ggml_vec_cpy_f32(nc, - (float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3), - (float *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03)); - } - } + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int64_t i = ir0; i < ir1; ++i) { + const int64_t i12 = i/(ne11*ne10); + const int64_t i11 = (i - i12*ne11*ne10)/ne10; + const int64_t i10 = (i - i12*ne11*ne10 - i11*ne10); + const int64_t i01 = *(int32_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12); + + ggml_vec_cpy_f32(nc, + (float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3), + (float *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03)); } } @@ -17796,7 +17814,7 @@ static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const node->perf_time_us += time_us_cur; } -static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) { +static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads, int n_cur_threads) { int n_tasks = 0; switch (node->op) { @@ -17877,6 +17895,12 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) { { n_tasks = n_threads; } break; + case GGML_OP_GET_ROWS: + { + // FIXME: the cost of launching additional threads decreases performance with GPU offloading + //n_tasks = MIN(n_threads, ggml_nelements(node->src[1])); + n_tasks = MIN(n_cur_threads, ggml_nelements(node->src[1])); + } break; case GGML_OP_SCALE: case GGML_OP_SET: case GGML_OP_CONT: @@ -17884,7 +17908,6 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) { case GGML_OP_VIEW: case GGML_OP_PERMUTE: case GGML_OP_TRANSPOSE: - case GGML_OP_GET_ROWS: case GGML_OP_GET_ROWS_BACK: case GGML_OP_DIAG: { @@ -18102,7 +18125,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { /* FINALIZE */ struct ggml_tensor * node = cgraph->nodes[node_n]; if (GGML_OP_HAS_FINALIZE[node->op]) { - params.nth = ggml_get_n_tasks(node, n_threads); + params.nth = ggml_get_n_tasks(node, n_threads, state->shared->n_threads); ggml_compute_forward(¶ms, node); } ggml_graph_compute_perf_stats_node(node, state->shared); @@ -18112,7 +18135,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { while (++node_n < cgraph->n_nodes) { GGML_PRINT_DEBUG_5("%s: %d/%d\n", __func__, node_n, cgraph->n_nodes); struct ggml_tensor * node = cgraph->nodes[node_n]; - const int n_tasks = ggml_get_n_tasks(node, n_threads); + const int n_tasks = ggml_get_n_tasks(node, n_threads, state->shared->n_threads); state->shared->perf_node_start_cycles = ggml_perf_cycles(); state->shared->perf_node_start_time_us = ggml_perf_time_us(); @@ -18160,7 +18183,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { /* INIT & COMPUTE */ struct ggml_tensor * node = cgraph->nodes[node_n]; - const int n_tasks = ggml_get_n_tasks(node, n_threads); + const int n_tasks = ggml_get_n_tasks(node, n_threads, state->shared->n_threads); struct ggml_compute_params params = { /*.type =*/ GGML_TASK_TYPE_INIT, @@ -18225,7 +18248,7 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa for (int i = 0; i < cgraph->n_nodes; i++) { struct ggml_tensor * node = cgraph->nodes[i]; - const int n_tasks = ggml_get_n_tasks(node, n_threads); + const int n_tasks = ggml_get_n_tasks(node, n_threads, 1); max_tasks = MAX(max_tasks, n_tasks); diff --git a/llama.cpp b/llama.cpp index ad7b7b7d4..38e7036a7 100644 --- a/llama.cpp +++ b/llama.cpp @@ -978,21 +978,6 @@ static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) { } } -// -// ggml helpers -// - -static void ggml_graph_compute_helper(std::vector & buf, ggml_cgraph * graph, int n_threads) { - struct ggml_cplan plan = ggml_graph_plan(graph, n_threads); - - if (plan.work_size > 0) { - buf.resize(plan.work_size); - plan.work_data = buf.data(); - } - - ggml_graph_compute(graph, &plan); -} - // // llama helpers // @@ -1728,6 +1713,7 @@ struct llama_hparams { struct llama_cparams { uint32_t n_ctx; // context size used during inference uint32_t n_batch; + uint32_t n_ubatch; uint32_t n_threads; // number of threads to use for generation uint32_t n_threads_batch; // number of threads to use for batch processing @@ -2024,8 +2010,7 @@ struct llama_context { ggml_vk_free_cpu_assist(); #endif - ggml_backend_buffer_free(buf_input); - ggml_free(ctx_input); + ggml_backend_buffer_free(buf_output); } llama_cparams cparams; @@ -2051,12 +2036,20 @@ struct llama_context { int64_t t_p_eval_us = 0; int64_t t_eval_us = 0; + int64_t t_compute_start_us = 0; + int64_t n_queued_tokens = 0; + int32_t n_sample = 0; // number of tokens sampled int32_t n_p_eval = 0; // number of tokens in eval calls for the prompt (with batch size > 1) int32_t n_eval = 0; // number of eval calls - // logits output (2-dimensional array: [n_tokens][n_vocab]) - std::vector logits; + // host buffer for the model output (logits and embeddings) + ggml_backend_buffer_t buf_output = nullptr; + + // decode output (2-dimensional array: [n_tokens][n_vocab]) + size_t logits_size = 0; + float * logits = nullptr; + #ifndef NDEBUG // guard against access to unset logits std::vector logits_valid; @@ -2065,7 +2058,8 @@ struct llama_context { // embeddings output (2-dimensional array: [n_tokens][n_embd]) // populated only when pooling_type == LLAMA_POOLING_TYPE_NONE - std::vector embd; + size_t embd_size = 0; + float * embd = nullptr; // sequence embeddings output (map of [n_embd] vectors) // populated only when pooling_type != LLAMA_POOLING_TYPE_NONE @@ -2079,8 +2073,6 @@ struct llama_context { void * abort_callback_data = nullptr; // input tensors - ggml_backend_buffer_t buf_input = nullptr; - ggml_context * ctx_input = nullptr; struct ggml_tensor * inp_tokens; // I32 [n_batch] struct ggml_tensor * inp_embd; // F32 [n_embd, n_batch] struct ggml_tensor * inp_pos; // I32 [n_batch] @@ -2090,7 +2082,7 @@ struct llama_context { struct ggml_tensor * inp_mean; // F32 [n_batch, n_batch] struct ggml_tensor * inp_cls; // I32 [n_batch] struct ggml_tensor * inp_s_copy; // I32 [kv_size] - struct ggml_tensor * inp_s_mask; // F32 [kv_size] + struct ggml_tensor * inp_s_mask; // F32 [1, kv_size] struct ggml_tensor * inp_s_seq; // I32 [kv_size, n_batch] #ifdef GGML_USE_MPI @@ -4005,6 +3997,7 @@ static bool llm_load_tensors( // there is very little benefit to offloading the input layer, so always keep it on the CPU model.buft_input = llama_default_buffer_type_cpu(true); + //model.buft_input = llama_default_buffer_type_offload(main_gpu); model.buft_layer.resize(n_layer); @@ -5094,29 +5087,32 @@ enum llm_norm_type { static struct ggml_tensor * llm_build_inp_embd( struct ggml_context * ctx, + struct llama_context & lctx, const llama_hparams & hparams, const llama_batch & batch, struct ggml_tensor * tok_embd, - struct ggml_tensor * inp_tokens, - struct ggml_tensor * inp_embd, const llm_build_cb & cb) { const int64_t n_embd = hparams.n_embd; struct ggml_tensor * inpL; if (batch.token) { - struct ggml_tensor * inp_tokens_v = ggml_view_1d(ctx, inp_tokens, batch.n_tokens, 0); - cb(inp_tokens, "inp_tokens", -1); + lctx.inp_tokens = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, batch.n_tokens); + cb(lctx.inp_tokens, "inp_tokens", -1); + ggml_set_input(lctx.inp_tokens); - inpL = ggml_get_rows(ctx, tok_embd, inp_tokens_v); + inpL = ggml_get_rows(ctx, tok_embd, lctx.inp_tokens); } else { #ifdef GGML_USE_MPI GGML_ASSERT(false && "not implemented"); #endif - - inpL = ggml_view_2d(ctx, inp_embd, n_embd, batch.n_tokens, inp_embd->nb[1], 0); + lctx.inp_embd = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, batch.n_tokens); + inpL = lctx.inp_embd; + ggml_set_input(lctx.inp_embd); } + cb(inpL, "inp_embd", -1); + return inpL; } @@ -5420,7 +5416,7 @@ static struct ggml_tensor * llm_build_kv( struct llm_build_context { const llama_model & model; - const llama_context & lctx; + llama_context & lctx; const llama_hparams & hparams; const llama_cparams & cparams; const llama_batch & batch; @@ -5513,6 +5509,18 @@ struct llm_build_context { }; ctx0 = ggml_init(params); + + lctx.inp_tokens = nullptr; + lctx.inp_embd = nullptr; + lctx.inp_pos = nullptr; + lctx.inp_KQ_mask = nullptr; + lctx.inp_KQ_pos = nullptr; + lctx.inp_K_shift = nullptr; + lctx.inp_mean = nullptr; + lctx.inp_cls = nullptr; + lctx.inp_s_copy = nullptr; + lctx.inp_s_mask = nullptr; + lctx.inp_s_seq = nullptr; } void free() { @@ -5527,6 +5535,10 @@ struct llm_build_context { GGML_ASSERT(kv_self.size == n_ctx); + lctx.inp_K_shift = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_ctx); + cb(lctx.inp_K_shift, "K_shift", -1); + ggml_set_input(lctx.inp_K_shift); + for (int il = 0; il < n_layer; ++il) { struct ggml_tensor * tmp = // we rotate only the first n_rot dimensions @@ -5550,12 +5562,14 @@ struct llm_build_context { GGML_ASSERT(kv_self.recurrent); + struct ggml_tensor * state_copy = build_inp_s_copy(); + for (int il = 0; il < n_layer; ++il) { struct ggml_tensor * conv_states = ggml_reshape_2d(ctx0, kv_self.k_l[il], hparams.n_embd_k_s(), kv_self.size); struct ggml_tensor * ssm_states = ggml_reshape_2d(ctx0, kv_self.v_l[il], hparams.n_embd_v_s(), kv_self.size); - conv_states = ggml_get_rows(ctx0, conv_states, lctx.inp_s_copy); - ssm_states = ggml_get_rows(ctx0, ssm_states, lctx.inp_s_copy); + conv_states = ggml_get_rows(ctx0, conv_states, state_copy); + ssm_states = ggml_get_rows(ctx0, ssm_states, state_copy); // TODO: name the intermediate tensors with cb() @@ -5615,6 +5629,66 @@ struct llm_build_context { return gf; } + struct ggml_tensor * build_inp_pos() { + lctx.inp_pos = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_tokens); + cb(lctx.inp_pos, "inp_pos", -1); + ggml_set_input(lctx.inp_pos); + return lctx.inp_pos; + } + + struct ggml_tensor * build_inp_KQ_mask(bool causal = true) { + if (causal) { + lctx.inp_KQ_mask = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_kv, n_tokens); + } else { + lctx.inp_KQ_mask = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_tokens, n_tokens); + } + cb(lctx.inp_KQ_mask, "KQ_mask", -1); + ggml_set_input(lctx.inp_KQ_mask); + return lctx.inp_KQ_mask; + } + + struct ggml_tensor * build_inp_KQ_pos() { + lctx.inp_KQ_pos = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, n_kv); + cb(lctx.inp_KQ_pos, "KQ_pos", -1); + ggml_set_input(lctx.inp_KQ_pos); + return lctx.inp_KQ_pos; + } + + struct ggml_tensor * build_inp_mean() { + lctx.inp_mean = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_tokens, n_tokens); + cb(lctx.inp_mean, "inp_mean", -1); + ggml_set_input(lctx.inp_mean); + return lctx.inp_mean; + } + + struct ggml_tensor * build_inp_cls() { + lctx.inp_cls = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_tokens); + cb(lctx.inp_cls, "inp_cls", -1); + ggml_set_input(lctx.inp_cls); + return lctx.inp_cls; + } + + struct ggml_tensor * build_inp_s_copy() { + lctx.inp_s_copy = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, kv_self.size); + cb(lctx.inp_s_copy, "inp_s_copy", -1); + ggml_set_input(lctx.inp_s_copy); + return lctx.inp_s_copy; + } + + struct ggml_tensor * build_inp_s_mask() { + lctx.inp_s_mask = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, 1, n_kv); + cb(lctx.inp_s_mask, "inp_s_mask", -1); + ggml_set_input(lctx.inp_s_mask); + return lctx.inp_s_mask; + } + + struct ggml_tensor * build_inp_s_seq() { + lctx.inp_s_seq = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_kv, n_tokens); + cb(lctx.inp_s_seq, "inp_s_seq", -1); + ggml_set_input(lctx.inp_s_seq); + return lctx.inp_s_seq; + } + struct ggml_cgraph * build_llama() { struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false); @@ -5625,16 +5699,13 @@ struct llm_build_context { struct ggml_tensor * cur; struct ggml_tensor * inpL; - inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); - cb(inpL, "inp_embd", -1); + inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb); // inp_pos - contains the positions - struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0); - cb(inp_pos, "inp_pos", -1); + struct ggml_tensor * inp_pos = build_inp_pos(); // KQ_mask (mask for 1 head, it will be broadcasted to all heads) - struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0); - cb(KQ_mask, "KQ_mask", -1); + struct ggml_tensor * KQ_mask = build_inp_KQ_mask(); for (int il = 0; il < n_layer; ++il) { struct ggml_tensor * inpSA = inpL; @@ -5686,7 +5757,6 @@ struct llm_build_context { cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, model.layers[il].wo, model.layers[il].bo, Kcur, Vcur, Qcur, KQ_mask, nullptr, n_ctx, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il); - cb(cur, "kqv_out", il); } struct ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); @@ -5804,20 +5874,16 @@ struct llm_build_context { struct ggml_tensor * cur; struct ggml_tensor * inpL; - inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); - cb(inpL, "inp_embd", -1); + inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb); // inp_pos - contains the positions - struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0); - cb(inp_pos, "inp_pos", -1); + struct ggml_tensor * inp_pos = build_inp_pos(); // KQ_mask (mask for 1 head, it will be broadcasted to all heads) - struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0); - cb(KQ_mask, "KQ_mask", -1); + struct ggml_tensor * KQ_mask = build_inp_KQ_mask(); // positions of the tokens in the KV cache - struct ggml_tensor * KQ_pos = ggml_view_1d(ctx0, lctx.inp_KQ_pos, n_kv, 0); - cb(KQ_pos, "KQ_pos", -1); + struct ggml_tensor * KQ_pos = build_inp_KQ_pos(); for (int il = 0; il < n_layer; ++il) { struct ggml_tensor * inpSA = inpL; @@ -5865,7 +5931,6 @@ struct llm_build_context { cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, model.layers[il].wo, NULL, Kcur, Vcur, Qcur, KQ_mask, KQ_pos, n_ctx, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il); - cb(cur, "kqv_out", il); } struct ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); @@ -5921,16 +5986,13 @@ struct llm_build_context { struct ggml_tensor * cur; struct ggml_tensor * inpL; - inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); - cb(inpL, "inp_embd", -1); + inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb); // inp_pos - contains the positions - struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0); - cb(inp_pos, "inp_pos", -1); + struct ggml_tensor * inp_pos = build_inp_pos(); // KQ_mask (mask for 1 head, it will be broadcasted to all heads) - struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0); - cb(KQ_mask, "KQ_mask", -1); + struct ggml_tensor * KQ_mask = build_inp_KQ_mask(); for (int il = 0; il < n_layer; ++il) { struct ggml_tensor * attn_norm; @@ -5984,7 +6046,6 @@ struct llm_build_context { cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, model.layers[il].wo, NULL, Kcur, Vcur, Qcur, KQ_mask, nullptr, n_ctx, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il); - cb(cur, "kqv_out", il); } struct ggml_tensor * ffn_inp = cur; @@ -6035,21 +6096,17 @@ struct llm_build_context { GGML_ASSERT(n_embd_head == hparams.n_embd_head_k); struct ggml_tensor * cur; - struct ggml_tensor * pos; struct ggml_tensor * inpL; - inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); - cb(inpL, "inp_embd", -1); + inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb); // inp_pos - contains the positions - struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0); - cb(inp_pos, "inp_pos", -1); + struct ggml_tensor * inp_pos = build_inp_pos(); // KQ_mask (mask for 1 head, it will be broadcasted to all heads) - struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0); - cb(KQ_mask, "KQ_mask", -1); + struct ggml_tensor * KQ_mask = build_inp_KQ_mask(); - pos = ggml_get_rows(ctx0, model.pos_embd, inp_pos); + struct ggml_tensor * pos = ggml_get_rows(ctx0, model.pos_embd, inp_pos); cb(pos, "pos_embd", -1); inpL = ggml_add(ctx0, inpL, pos); @@ -6083,7 +6140,6 @@ struct llm_build_context { cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, model.layers[il].wo, model.layers[il].bo, Kcur, Vcur, Qcur, KQ_mask, nullptr, n_ctx, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il); - cb(cur, "kqv_out", il); } // add the input @@ -6135,16 +6191,13 @@ struct llm_build_context { struct ggml_tensor * cur; struct ggml_tensor * inpL; - inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); - cb(inpL, "inp_embd", -1); + inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb); // inp_pos - contains the positions - struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0); - cb(inp_pos, "inp_pos", -1); + struct ggml_tensor * inp_pos = build_inp_pos(); // KQ_mask (mask for 1 head, it will be broadcasted to all heads) - struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0); - cb(KQ_mask, "KQ_mask", -1); + struct ggml_tensor * KQ_mask = build_inp_KQ_mask(); for (int il = 0; il < n_layer; ++il) { struct ggml_tensor * residual = inpL; @@ -6284,7 +6337,6 @@ struct llm_build_context { cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, model.layers[il].wo, model.layers[il].bo, Kcur, Vcur, Q, KQ_mask, nullptr, n_ctx, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il); - cb(cur, "kqv_out", il); } struct ggml_tensor * ffn_inp = ggml_add(ctx0, residual, cur); @@ -6338,16 +6390,13 @@ struct llm_build_context { struct ggml_tensor * cur; struct ggml_tensor * inpL; - inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); - cb(inpL, "inp_embd", -1); + inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb); // KQ_mask (mask for 1 head, it will be broadcasted to all heads) - struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0); - cb(KQ_mask, "KQ_mask", -1); + struct ggml_tensor * KQ_mask = build_inp_KQ_mask(); // positions of the tokens in the KV cache - struct ggml_tensor * KQ_pos = ggml_view_1d(ctx0, lctx.inp_KQ_pos, n_kv, 0); - cb(KQ_pos, "KQ_pos", -1); + struct ggml_tensor * KQ_pos = build_inp_KQ_pos(); for (int il = 0; il < n_layer; ++il) { struct ggml_tensor * inpSA = inpL; @@ -6377,7 +6426,6 @@ struct llm_build_context { cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, model.layers[il].wo, NULL, Kcur, Vcur, Qcur, KQ_mask, KQ_pos, n_ctx, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il); - cb(cur, "kqv_out", il); } struct ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); @@ -6433,15 +6481,12 @@ struct llm_build_context { struct ggml_tensor * cur; struct ggml_tensor * inpL; - // get input vectors with right size - const size_t stride1 = n_tokens * ggml_type_size(lctx.inp_tokens->type); - - struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0); - struct ggml_tensor * inp_mean = ggml_view_2d(ctx0, lctx.inp_mean, n_tokens, n_tokens, stride1, 0); - struct ggml_tensor * inp_cls = ggml_view_1d(ctx0, lctx.inp_cls, n_tokens, 0); + struct ggml_tensor * inp_pos = build_inp_pos(); + struct ggml_tensor * inp_mean = build_inp_mean(); + struct ggml_tensor * inp_cls = build_inp_cls(); // construct input embeddings (token, type, position) - inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); + inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb); // token types are hardcoded to zero ("Sentence A") struct ggml_tensor * type_row0 = ggml_view_1d(ctx0, model.type_embd, n_embd, 0); @@ -6456,8 +6501,7 @@ struct llm_build_context { cb(inpL, "inp_norm", -1); // KQ_mask (mask for 1 head, it will be broadcasted to all heads) - struct ggml_tensor * KQ_mask = ggml_cont(ctx0, ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_tokens, n_tokens, n_tokens*ggml_type_size(lctx.inp_KQ_mask->type), 0)); - cb(KQ_mask, "KQ_mask", -1); // [n_tokens, n_tokens] + struct ggml_tensor * KQ_mask = build_inp_KQ_mask(false); // iterate layers for (int il = 0; il < n_layer; ++il) { @@ -6619,16 +6663,13 @@ struct llm_build_context { struct ggml_tensor * cur; struct ggml_tensor * inpL; - inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); - cb(inpL, "inp_embd", -1); + inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb); // KQ_mask (mask for 1 head, it will be broadcasted to all heads) - struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0); - cb(KQ_mask, "KQ_mask", -1); + struct ggml_tensor * KQ_mask = build_inp_KQ_mask(); // positions of the tokens in the KV cache - struct ggml_tensor * KQ_pos = ggml_view_1d(ctx0, lctx.inp_KQ_pos, n_kv, 0); - cb(KQ_pos, "KQ_pos", -1); + struct ggml_tensor * KQ_pos = build_inp_KQ_pos(); inpL = llm_build_norm(ctx0, inpL, hparams, model.tok_norm, @@ -6664,7 +6705,6 @@ struct llm_build_context { cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, model.layers[il].wo, model.layers[il].bo, Kcur, Vcur, Qcur, KQ_mask, KQ_pos, n_ctx, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il); - cb(cur, "kqv_out", il); } // Add the input @@ -6716,16 +6756,13 @@ struct llm_build_context { struct ggml_tensor * cur; struct ggml_tensor * inpL; - inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); - cb(inpL, "inp_embd", -1); + inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb); // KQ_mask (mask for 1 head, it will be broadcasted to all heads) - struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0); - cb(KQ_mask, "KQ_mask", -1); + struct ggml_tensor * KQ_mask = build_inp_KQ_mask(); // positions of the tokens in the KV cache - struct ggml_tensor * KQ_pos = ggml_view_1d(ctx0, lctx.inp_KQ_pos, n_kv, 0); - cb(KQ_pos, "KQ_pos", -1); + struct ggml_tensor * KQ_pos = build_inp_KQ_pos(); for (int il = 0; il < n_layer; ++il) { struct ggml_tensor * attn_norm; @@ -6766,7 +6803,6 @@ struct llm_build_context { cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, model.layers[il].wo, model.layers[il].bo, Kcur, Vcur, Qcur, KQ_mask, KQ_pos, n_ctx, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il); - cb(cur, "kqv_out", il); } // Add the input @@ -6821,16 +6857,13 @@ struct llm_build_context { struct ggml_tensor * cur; struct ggml_tensor * inpL; - inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); - cb(inpL, "inp_embd", -1); + inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb); // inp_pos - contains the positions - struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0); - cb(inp_pos, "inp_pos", -1); + struct ggml_tensor * inp_pos = build_inp_pos(); // KQ_mask (mask for 1 head, it will be broadcasted to all heads) - struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0); - cb(KQ_mask, "KQ_mask", -1); + struct ggml_tensor * KQ_mask = build_inp_KQ_mask(); for (int il = 0; il < n_layer; ++il) { struct ggml_tensor * inpSA = inpL; @@ -6883,7 +6916,6 @@ struct llm_build_context { cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, model.layers[il].wo, NULL, Kcur, Vcur, Qcur, KQ_mask, nullptr, n_ctx, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il); - cb(cur, "kqv_out", il); } struct ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); @@ -6939,16 +6971,13 @@ struct llm_build_context { struct ggml_tensor * cur; struct ggml_tensor * inpL; - inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); - cb(inpL, "inp_embd", -1); + inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb); // inp_pos - contains the positions - struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0); - cb(inp_pos, "inp_pos", -1); + struct ggml_tensor * inp_pos = build_inp_pos(); // KQ_mask (mask for 1 head, it will be broadcasted to all heads) - struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0); - cb(KQ_mask, "KQ_mask", -1); + struct ggml_tensor * KQ_mask = build_inp_KQ_mask(); for (int il = 0; il < n_layer; ++il) { struct ggml_tensor * inpSA = inpL; @@ -6993,7 +7022,6 @@ struct llm_build_context { cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, model.layers[il].wo, NULL, Kcur, Vcur, Qcur, KQ_mask, nullptr, n_ctx, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il); - cb(cur, "kqv_out", il); } struct ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); @@ -7048,16 +7076,13 @@ struct llm_build_context { struct ggml_tensor * cur; struct ggml_tensor * inpL; - inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); - cb(inpL, "inp_embd", -1); + inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb); // inp_pos - contains the positions - struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0); - cb(inp_pos, "inp_pos", -1); + struct ggml_tensor * inp_pos = build_inp_pos(); // KQ_mask (mask for 1 head, it will be broadcasted to all heads) - struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0); - cb(KQ_mask, "KQ_mask", -1); + struct ggml_tensor * KQ_mask = build_inp_KQ_mask(); for (int il = 0; il < n_layer; ++il) { struct ggml_tensor * inpSA = inpL; @@ -7109,7 +7134,6 @@ struct llm_build_context { cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, model.layers[il].wo, model.layers[il].bo, Kcur, Vcur, Qcur, KQ_mask, nullptr, n_ctx, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il); - cb(cur, "kqv_out", il); } struct ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); @@ -7164,16 +7188,13 @@ struct llm_build_context { struct ggml_tensor * ffn_output; struct ggml_tensor * inpL; - inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); - cb(inpL, "inp_embd", -1); + inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb); // inp_pos - contains the positions - struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0); - cb(inp_pos, "inp_pos", -1); + struct ggml_tensor * inp_pos = build_inp_pos(); // KQ_mask (mask for 1 head, it will be broadcasted to all heads) - struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0); - cb(KQ_mask, "KQ_mask", -1); + struct ggml_tensor * KQ_mask = build_inp_KQ_mask(); for (int il = 0; il < n_layer; ++il) { attn_norm_output = llm_build_norm(ctx0, inpL, hparams, @@ -7231,7 +7252,6 @@ struct llm_build_context { cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, model.layers[il].wo, model.layers[il].bo, Kcur, Vcur, Qcur, KQ_mask, nullptr, n_ctx, n_tokens, kv_head, n_kv, 1.0f, cb, il); - cb(cur, "kqv_out", il); } // FF @@ -7281,16 +7301,13 @@ struct llm_build_context { struct ggml_tensor * cur; struct ggml_tensor * inpL; - inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); - cb(inpL, "inp_embd", -1); + inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb); // inp_pos - contains the positions - struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0); - cb(inp_pos, "inp_pos", -1); + struct ggml_tensor * inp_pos = build_inp_pos(); // KQ_mask (mask for 1 head, it will be broadcasted to all heads) - struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0); - cb(KQ_mask, "KQ_mask", -1); + struct ggml_tensor * KQ_mask = build_inp_KQ_mask(); for (int il = 0; il < n_layer; ++il) { @@ -7329,7 +7346,6 @@ struct llm_build_context { cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, model.layers[il].wo, NULL, Kcur, Vcur, Qcur, KQ_mask, nullptr, n_ctx, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il); - cb(cur, "kqv_out", il); } struct ggml_tensor * sa_out = cur; @@ -7383,16 +7399,13 @@ struct llm_build_context { struct ggml_tensor * pos; struct ggml_tensor * inpL; - inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); - cb(inpL, "inp_embd", -1); + inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb); // inp_pos - contains the positions - struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0); - cb(inp_pos, "inp_pos", -1); + struct ggml_tensor * inp_pos = build_inp_pos(); // KQ_mask (mask for 1 head, it will be broadcasted to all heads) - struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0); - cb(KQ_mask, "KQ_mask", -1); + struct ggml_tensor * KQ_mask = build_inp_KQ_mask(); pos = ggml_get_rows(ctx0, model.pos_embd, inp_pos); cb(pos, "pos_embd", -1); @@ -7428,7 +7441,6 @@ struct llm_build_context { cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, model.layers[il].wo, model.layers[il].bo, Kcur, Vcur, Qcur, KQ_mask, nullptr, n_ctx, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il); - cb(cur, "kqv_out", il); } // add the input @@ -7481,16 +7493,13 @@ struct llm_build_context { struct ggml_tensor * cur; struct ggml_tensor * inpL; - inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); - cb(inpL, "inp_embd", -1); + inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb); // inp_pos - contains the positions - struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0); - cb(inp_pos, "inp_pos", -1); + struct ggml_tensor * inp_pos = build_inp_pos(); // KQ_mask (mask for 1 head, it will be broadcasted to all heads) - struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0); - cb(KQ_mask, "KQ_mask", -1); + struct ggml_tensor * KQ_mask = build_inp_KQ_mask(); for (int il = 0; il < n_layer; ++il) { cur = llm_build_norm(ctx0, inpL, hparams, @@ -7532,7 +7541,6 @@ struct llm_build_context { cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, model.layers[il].wo, model.layers[il].bo, Kcur, Vcur, Qcur, KQ_mask, nullptr, n_ctx, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il); - cb(cur, "kqv_out", il); } // add the input @@ -7584,16 +7592,13 @@ struct llm_build_context { struct ggml_tensor * cur; struct ggml_tensor * inpL; - inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); - cb(inpL, "inp_embd", -1); + inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb); // inp_pos - contains the positions - struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0); - cb(inp_pos, "inp_pos", -1); + struct ggml_tensor * inp_pos = build_inp_pos(); // KQ_mask (mask for 1 head, it will be broadcasted to all heads) - struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0); - cb(KQ_mask, "KQ_mask", -1); + struct ggml_tensor * KQ_mask = build_inp_KQ_mask(); for (int il = 0; il < n_layer; ++il) { struct ggml_tensor * inpSA = inpL; @@ -7645,7 +7650,6 @@ struct llm_build_context { cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, model.layers[il].wo, NULL, Kcur, Vcur, Qcur, KQ_mask, nullptr, n_ctx, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il); - cb(cur, "kqv_out", il); } struct ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); @@ -7698,16 +7702,13 @@ struct llm_build_context { struct ggml_tensor * cur; struct ggml_tensor * inpL; - inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); - cb(inpL, "inp_embd", -1); + inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb); // inp_pos - contains the positions - struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0); - cb(inp_pos, "inp_pos", -1); + struct ggml_tensor * inp_pos = build_inp_pos(); // KQ_mask (mask for 1 head, it will be broadcasted to all heads) - struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0); - cb(KQ_mask, "KQ_mask", -1); + struct ggml_tensor * KQ_mask = build_inp_KQ_mask(); for (int il = 0; il < n_layer; ++il) { struct ggml_tensor * inpSA = inpL; @@ -7759,7 +7760,6 @@ struct llm_build_context { cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, model.layers[il].wo, model.layers[il].bo, Kcur, Vcur, Qcur, KQ_mask, nullptr, n_ctx, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il); - cb(cur, "kqv_out", il); } struct ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); @@ -7821,20 +7821,17 @@ struct llm_build_context { struct ggml_tensor * cur; struct ggml_tensor * inpL; - inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); - cb(inpL, "inp_embd", -1); + inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb); // scale the input embeddings inpL = ggml_scale(ctx0, inpL, scale_embd); cb(inpL, "inp_scaled", -1); // inp_pos - contains the positions - struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0); - cb(inp_pos, "inp_pos", -1); + struct ggml_tensor * inp_pos = build_inp_pos(); // KQ_mask (mask for 1 head, it will be broadcasted to all heads) - struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0); - cb(KQ_mask, "KQ_mask", -1); + struct ggml_tensor * KQ_mask = build_inp_KQ_mask(); for (int il = 0; il < n_layer; ++il) { struct ggml_tensor * inpSA = inpL; @@ -7886,7 +7883,6 @@ struct llm_build_context { cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, model.layers[il].wo, model.layers[il].bo, Kcur, Vcur, Qcur, KQ_mask, nullptr, n_ctx, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il); - cb(cur, "kqv_out", il); } // scale_res - scale the hidden states for residual connection @@ -7953,22 +7949,18 @@ struct llm_build_context { struct ggml_tensor * cur; struct ggml_tensor * inpL; - inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); - cb(inpL, "inp_embd", -1); + inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb); inpL = ggml_scale(ctx0, inpL, sqrtf(n_embd)); cb(inpL, "inp_scaled", -1); // inp_pos - contains the positions - struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0); - cb(inp_pos, "inp_pos", -1); + struct ggml_tensor * inp_pos = build_inp_pos(); // KQ_mask (mask for 1 head, it will be broadcasted to all heads) - struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0); - cb(KQ_mask, "KQ_mask", -1); + struct ggml_tensor * KQ_mask = build_inp_KQ_mask(); for (int il = 0; il < n_layer; ++il) { - // norm cur = llm_build_norm(ctx0, inpL, hparams, model.layers[il].attn_norm, NULL, @@ -8005,7 +7997,6 @@ struct llm_build_context { cur = llm_build_kv(ctx0, model, hparams, kv_self, gf, model.layers[il].wo, NULL, Kcur, Vcur, Qcur, KQ_mask, nullptr, n_ctx, n_tokens, kv_head, n_kv, 1.0f, cb, il); - cb(cur, "kqv_out", il); } struct ggml_tensor * sa_out = ggml_add(ctx0, cur, inpL); @@ -8060,16 +8051,13 @@ struct llm_build_context { struct ggml_tensor * cur; struct ggml_tensor * inpL; - inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); - cb(inpL, "inp_embd", -1); + inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb); // inp_pos - contains the positions - struct ggml_tensor * inp_pos = ggml_view_1d(ctx0, lctx.inp_pos, n_tokens, 0); - cb(inp_pos, "inp_pos", -1); + struct ggml_tensor * inp_pos = build_inp_pos(); // KQ_mask (mask for 1 head, it will be broadcasted to all heads) - struct ggml_tensor * KQ_mask = ggml_view_2d(ctx0, lctx.inp_KQ_mask, n_kv, n_tokens, n_kv*ggml_type_size(lctx.inp_KQ_mask->type), 0); - cb(KQ_mask, "KQ_mask", -1); + struct ggml_tensor * KQ_mask = build_inp_KQ_mask(); for (int il = 0; il < n_layer; ++il) { struct ggml_tensor * inpSA = inpL; @@ -8178,11 +8166,10 @@ struct llm_build_context { struct ggml_tensor * inpL; // {n_embd, n_tokens} - inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, lctx.inp_tokens, lctx.inp_embd, cb); - cb(inpL, "inp_embd", -1); + inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb); - struct ggml_tensor * state_mask = ggml_view_2d(ctx0, lctx.inp_s_mask, 1, n_kv, lctx.inp_s_mask->nb[0], 0); - struct ggml_tensor * state_seq = ggml_view_2d(ctx0, lctx.inp_s_seq, n_kv, n_tokens, n_kv*ggml_element_size(lctx.inp_s_seq), 0); + struct ggml_tensor * state_mask = build_inp_s_mask(); + struct ggml_tensor * state_seq = build_inp_s_seq(); for (int il = 0; il < n_layer; ++il) { // (ab)using the KV cache to store the states @@ -8234,7 +8221,7 @@ struct llm_build_context { ggml_build_forward_expand(gf, ggml_cpy(ctx0, ggml_view_2d(ctx0, x_conv, d_conv - 1, d_inner*n_kv, d_conv*ggml_element_size(x_conv), (1+d_inner*n_tokens)*ggml_element_size(x_conv)), - ggml_view_1d(ctx0, kv_self.k_l[il], (d_conv - 1)*(d_inner)*(n_kv), kv_self.head*(d_conv - 1)*(d_inner)*ggml_element_size(x_conv)))); + ggml_view_1d(ctx0, kv_self.k_l[il], (d_conv - 1)*(d_inner)*(n_kv), kv_head*(d_conv - 1)*(d_inner)*ggml_element_size(x_conv)))); // extract x from x_conv x = ggml_view_2d(ctx0, x_conv, d_inner, n_tokens, d_inner*ggml_element_size(x_conv), 0); @@ -8268,7 +8255,7 @@ struct llm_build_context { ggml_build_forward_expand(gf, ggml_cpy(ctx0, ggml_view_1d(ctx0, y_ssm_states, d_state*d_inner*n_kv, d_inner*n_tokens*ggml_element_size(y_ssm_states)), - ggml_view_1d(ctx0, kv_self.v_l[il], d_state*d_inner*n_kv, kv_self.head*d_state*d_inner*ggml_element_size(ssm_states)))); + ggml_view_1d(ctx0, kv_self.v_l[il], d_state*d_inner*n_kv, kv_head*d_state*d_inner*ggml_element_size(ssm_states)))); struct ggml_tensor * y = ggml_view_2d(ctx0, y_ssm_states, d_inner, n_tokens, d_inner*ggml_element_size(y_ssm_states), 0); @@ -8372,7 +8359,18 @@ static struct ggml_cgraph * llama_build_graph( if (!lctx.cparams.offload_kqv) { if (strcmp(name, "kqv_merged_cont") == 0) { // all nodes between the KV store and the attention output are run on the CPU - ggml_backend_sched_set_node_backend(lctx.sched, cur, lctx.backend_cpu); + ggml_backend_sched_set_tensor_backend(lctx.sched, cur, lctx.backend_cpu); + } + } + + // norm may be automatically assigned to the backend of the previous layer, increasing data transfer between backends + // to fix this, we assign the norm layer manually to the backend of its layer + if (il != -1 && strcmp(name, "norm") == 0) { + for (auto * backend : lctx.backends) { + if (ggml_backend_buft_supports_backend(lctx.model.buft_layer[il].buft, backend)) { + ggml_backend_sched_set_tensor_backend(lctx.sched, cur, backend); + break; + } } } }; @@ -8528,7 +8526,7 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) { ggml_backend_tensor_set(lctx.inp_embd, batch.embd, 0, n_tokens*n_embd*ggml_element_size(lctx.inp_embd)); } - if (batch.pos) { + if (batch.pos && lctx.inp_pos) { const int64_t n_tokens = batch.n_tokens; ggml_backend_tensor_set(lctx.inp_pos, batch.pos, 0, n_tokens*ggml_element_size(lctx.inp_pos)); @@ -8539,61 +8537,63 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) { "non-causal attention with generative models is not supported" ); - // NOTE: hparams.causal_attn indicates the model is capable of generation and uses the kv cache. - if (cparams.causal_attn) { - const int64_t n_kv = kv_self.n; - const int64_t n_tokens = batch.n_tokens; + if (lctx.inp_KQ_mask) { + // NOTE: hparams.causal_attn indicates the model is capable of generation and uses the kv cache. + if (cparams.causal_attn) { + const int64_t n_kv = kv_self.n; + const int64_t n_tokens = batch.n_tokens; - assert(ggml_backend_buffer_is_host(lctx.inp_KQ_mask->buffer)); + GGML_ASSERT(ggml_backend_buffer_is_host(lctx.inp_KQ_mask->buffer)); - float * data = (float *) lctx.inp_KQ_mask->data; + float * data = (float *) lctx.inp_KQ_mask->data; - // For causal attention, use only the previous KV cells - // of the correct sequence for each token of the batch. - // It's assumed that if a token in the batch has multiple sequences, they are equivalent. - for (int h = 0; h < 1; ++h) { - for (int j = 0; j < n_tokens; ++j) { - const llama_pos pos = batch.pos[j]; - const llama_seq_id seq_id = batch.seq_id[j][0]; + // For causal attention, use only the previous KV cells + // of the correct sequence for each token of the batch. + // It's assumed that if a token in the batch has multiple sequences, they are equivalent. + for (int h = 0; h < 1; ++h) { + for (int j = 0; j < n_tokens; ++j) { + const llama_pos pos = batch.pos[j]; + const llama_seq_id seq_id = batch.seq_id[j][0]; - for (int i = 0; i < n_kv; ++i) { - float f; - if (!lctx.kv_self.cells[i].has_seq_id(seq_id) || lctx.kv_self.cells[i].pos > pos) { - f = -INFINITY; - } else { - f = 0.0f; + for (int i = 0; i < n_kv; ++i) { + float f; + if (!lctx.kv_self.cells[i].has_seq_id(seq_id) || lctx.kv_self.cells[i].pos > pos) { + f = -INFINITY; + } else { + f = 0.0f; + } + data[h*(n_kv*n_tokens) + j*n_kv + i] = f; } - data[h*(n_kv*n_tokens) + j*n_kv + i] = f; } } - } - } else { - // when using kv cache, the mask needs to match the kv cache size - const int64_t n_tokens = batch.n_tokens; - const int64_t n_stride = hparams.causal_attn ? kv_self.n : n_tokens; + } else { + // when using kv cache, the mask needs to match the kv cache size + const int64_t n_tokens = batch.n_tokens; + const int64_t n_stride = hparams.causal_attn ? kv_self.n : n_tokens; - assert(ggml_backend_buffer_is_host(lctx.inp_KQ_mask->buffer)); + GGML_ASSERT(ggml_backend_buffer_is_host(lctx.inp_KQ_mask->buffer)); - float * data = (float *) lctx.inp_KQ_mask->data; + float * data = (float *) lctx.inp_KQ_mask->data; - for (int h = 0; h < 1; ++h) { - for (int j = 0; j < n_tokens; ++j) { - const llama_seq_id seq_id = batch.seq_id[j][0]; + for (int h = 0; h < 1; ++h) { + for (int j = 0; j < n_tokens; ++j) { + const llama_seq_id seq_id = batch.seq_id[j][0]; - for (int i = 0; i < n_tokens; ++i) { - float f = -INFINITY; - for (int s = 0; s < batch.n_seq_id[i]; ++s) { - if (batch.seq_id[i][s] == seq_id) { - f = 0.0f; - break; + for (int i = 0; i < n_tokens; ++i) { + float f = -INFINITY; + for (int s = 0; s < batch.n_seq_id[i]; ++s) { + if (batch.seq_id[i][s] == seq_id) { + f = 0.0f; + break; + } } + + data[h*(n_tokens*n_tokens) + j*n_stride + i] = f; } - data[h*(n_tokens*n_tokens) + j*n_stride + i] = f; - } - - for (int i = n_tokens; i < n_stride; ++i) { - data[h*(n_tokens*n_tokens) + j*n_stride + i] = -INFINITY; + for (int i = n_tokens; i < n_stride; ++i) { + data[h*(n_tokens*n_tokens) + j*n_stride + i] = -INFINITY; + } } } } @@ -8602,7 +8602,8 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) { if (hparams.need_kq_pos) { const int64_t n_kv = kv_self.n; - assert(ggml_backend_buffer_is_host(lctx.inp_KQ_pos->buffer)); + GGML_ASSERT(lctx.inp_KQ_pos); + GGML_ASSERT(ggml_backend_buffer_is_host(lctx.inp_KQ_pos->buffer)); float * data = (float *) lctx.inp_KQ_pos->data; @@ -8614,6 +8615,7 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) { if (cparams.pooling_type == LLAMA_POOLING_TYPE_MEAN) { const int64_t n_tokens = batch.n_tokens; + GGML_ASSERT(lctx.inp_mean); GGML_ASSERT(ggml_backend_buffer_is_host(lctx.inp_mean->buffer)); float * data = (float *) lctx.inp_mean->data; @@ -8645,6 +8647,7 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) { if (cparams.pooling_type == LLAMA_POOLING_TYPE_CLS) { const int64_t n_tokens = batch.n_tokens; + GGML_ASSERT(lctx.inp_cls); GGML_ASSERT(ggml_backend_buffer_is_host(lctx.inp_cls->buffer)); uint32_t * data = (uint32_t *) lctx.inp_cls->data; @@ -8665,7 +8668,7 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) { if (kv_self.recurrent) { const int64_t n_kv = kv_self.n; - { + if (lctx.inp_s_mask) { GGML_ASSERT(ggml_backend_buffer_is_host(lctx.inp_s_mask->buffer)); float * data = (float *) lctx.inp_s_mask->data; @@ -8687,7 +8690,7 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) { // update the correct state(s)/sequence(s) for each token of the batch. // Like with the KQ_mask, if a token in the batch has multiple sequences, // they are assumed to be equivalent (not here, but in ggml_ssm_scan and ggml_ssm_conv). - { + if (lctx.inp_s_seq) { const int64_t n_tokens = batch.n_tokens; GGML_ASSERT(ggml_backend_buffer_is_host(lctx.inp_s_seq->buffer)); @@ -8730,7 +8733,7 @@ static void llama_graph_compute( ggml_backend_cpu_set_abort_callback(lctx.backend_cpu, lctx.abort_callback, lctx.abort_callback_data); } - ggml_backend_sched_graph_compute(lctx.sched, gf); + ggml_backend_sched_graph_compute_async(lctx.sched, gf); // fprintf(stderr, "splits: %d\n", ggml_backend_sched_get_n_splits(lctx.sched)); @@ -8750,10 +8753,11 @@ static void llama_graph_compute( // static int llama_decode_internal( llama_context & lctx, - llama_batch batch) { - const uint32_t n_tokens = batch.n_tokens; + llama_batch batch_all) { // TODO: rename back to batch - if (n_tokens == 0) { + const uint32_t n_tokens_all = batch_all.n_tokens; + + if (n_tokens_all == 0) { LLAMA_LOG_ERROR("%s: n_tokens == 0", __func__); return -1; } @@ -8762,14 +8766,16 @@ static int llama_decode_internal( const auto & hparams = model.hparams; const auto & cparams = lctx.cparams; - const auto n_batch = cparams.n_batch; + GGML_ASSERT((!batch_all.token && batch_all.embd) || (batch_all.token && !batch_all.embd)); // NOLINT - GGML_ASSERT(n_tokens <= n_batch); - GGML_ASSERT((!batch.token && batch.embd) || (batch.token && !batch.embd)); // NOLINT + GGML_ASSERT(n_tokens_all <= cparams.n_batch); - int n_threads = n_tokens == 1 ? cparams.n_threads : cparams.n_threads_batch; + GGML_ASSERT((cparams.causal_attn || cparams.n_ubatch >= n_tokens_all) && "non-causal attention requires n_ubatch >= n_tokens"); - const int64_t t_start_us = ggml_time_us(); + if (lctx.t_compute_start_us == 0) { + lctx.t_compute_start_us = ggml_time_us(); + } + lctx.n_queued_tokens += n_tokens_all; #ifdef GGML_USE_MPI // TODO: needs fix after #3228 @@ -8777,128 +8783,261 @@ static int llama_decode_internal( //ggml_mpi_eval_init(lctx.ctx_mpi, &n_tokens, &n_past, &n_threads); #endif - GGML_ASSERT(n_threads > 0); - auto & kv_self = lctx.kv_self; const int64_t n_embd = hparams.n_embd; const int64_t n_vocab = hparams.n_vocab; - // helpers for smoother batch API transition - // after deprecating the llama_eval calls, these will be removed - std::vector pos; + auto * logits_out = lctx.logits; + +#ifndef NDEBUG + auto & logits_valid = lctx.logits_valid; + logits_valid.clear(); + logits_valid.resize(n_tokens_all); + + memset(logits_out, 0, lctx.logits_size*sizeof(float)); +#endif + + const auto n_ubatch = cparams.n_ubatch; + + std::vector pos; std::vector n_seq_id; std::vector seq_id_arr; std::vector> seq_id; - if (batch.pos == nullptr) { - pos.resize(n_tokens); - for (uint32_t i = 0; i < n_tokens; i++) { - pos[i] = batch.all_pos_0 + i*batch.all_pos_1; - } + for (uint32_t cur_token = 0; cur_token < n_tokens_all; cur_token += n_ubatch) { + const uint32_t n_tokens = std::min(n_ubatch, n_tokens_all - cur_token); + llama_batch u_batch = { + /* .n_tokens = */ (int32_t) n_tokens, + /* .token = */ batch_all.token ? batch_all.token + cur_token : nullptr, + /* .embd = */ batch_all.embd ? batch_all.embd + cur_token*n_embd : nullptr, + /* .pos = */ batch_all.pos ? batch_all.pos + cur_token : nullptr, + /* .n_seq_id = */ batch_all.n_seq_id ? batch_all.n_seq_id + cur_token : nullptr, + /* .seq_id = */ batch_all.seq_id ? batch_all.seq_id + cur_token : nullptr, + /* .logits = */ batch_all.logits ? batch_all.logits + cur_token : nullptr, + /* .all_pos_0 = */ batch_all.all_pos_0 + (llama_pos) cur_token*batch_all.all_pos_1, + /* .all_pos_1 = */ batch_all.all_pos_1, + /* .all_seq_id = */ batch_all.all_seq_id, + }; - batch.pos = pos.data(); - } + int n_threads = n_tokens == 1 ? cparams.n_threads : cparams.n_threads_batch; + GGML_ASSERT(n_threads > 0); - if (batch.seq_id == nullptr) { - n_seq_id.resize(n_tokens); - seq_id.resize(n_tokens); - seq_id_arr.resize(n_tokens); - for (uint32_t i = 0; i < n_tokens; i++) { - n_seq_id[i] = 1; - seq_id[i].resize(1); - seq_id[i][0] = batch.all_seq_id; - seq_id_arr[i] = seq_id[i].data(); - } - - batch.n_seq_id = n_seq_id.data(); - batch.seq_id = seq_id_arr.data(); - } - - // non-causal masks do not use the KV cache - if (hparams.causal_attn) { - llama_kv_cache_update(&lctx); - - // if we have enough unused cells before the current head -> - // better to start searching from the beginning of the cache, hoping to fill it - if (kv_self.head > kv_self.used + 2*n_tokens) { - kv_self.head = 0; - } - - if (!llama_kv_cache_find_slot(kv_self, batch)) { - return 1; - } - - if (!kv_self.recurrent) { - // a heuristic, to avoid attending the full cache if it is not yet utilized - // after enough generations, the benefit from this heuristic disappears - // if we start defragmenting the cache, the benefit from this will be more important - kv_self.n = std::min(kv_self.size, std::max(32u, GGML_PAD(llama_kv_cache_cell_max(kv_self), 32))); - //kv_self.n = llama_kv_cache_cell_max(kv_self); - } - } - - //printf("kv_self.n = %5d, kv_self.used = %5d, kv_self.head = %5d\n", kv_self.n, kv_self.used, kv_self.head); - - ggml_backend_sched_reset(lctx.sched); - ggml_backend_sched_set_eval_callback(lctx.sched, lctx.cparams.cb_eval, lctx.cparams.cb_eval_user_data); - - ggml_cgraph * gf = llama_build_graph(lctx, batch, false); - - // the output is always the last tensor in the graph - struct ggml_tensor * res = gf->nodes[gf->n_nodes - 1]; - struct ggml_tensor * embd = gf->nodes[gf->n_nodes - 2]; - - if (!hparams.causal_attn) { - res = nullptr; // do not extract logits for embedding models such as BERT - - // token or sequence embeddings - embd = gf->nodes[gf->n_nodes - 1]; - - GGML_ASSERT(strcmp(embd->name, "result_embd") == 0 || strcmp(embd->name, "result_embd_pooled") == 0); - } else { - if (strcmp(res->name, "result_output") == 0) { - // the token embeddings could be the second to last tensor, or the third to last tensor - if (strcmp(embd->name, "result_norm") != 0) { - embd = gf->nodes[gf->n_nodes - 3]; - GGML_ASSERT(strcmp(embd->name, "result_norm") == 0); + // helpers for smoother batch API transition + // after deprecating the llama_eval calls, these will be removed + if (u_batch.pos == nullptr) { + pos.resize(n_tokens); + for (uint32_t i = 0; i < n_tokens; i++) { + pos[i] = u_batch.all_pos_0 + i*u_batch.all_pos_1; } + + u_batch.pos = pos.data(); + } + + if (u_batch.seq_id == nullptr) { + n_seq_id.resize(n_tokens); + seq_id.resize(n_tokens); + seq_id_arr.resize(n_tokens); + for (uint32_t i = 0; i < n_tokens; i++) { + n_seq_id[i] = 1; + seq_id[i].resize(1); + seq_id[i][0] = u_batch.all_seq_id; + seq_id_arr[i] = seq_id[i].data(); + } + + u_batch.n_seq_id = n_seq_id.data(); + u_batch.seq_id = seq_id_arr.data(); + } + + // non-causal masks do not use the KV cache + if (hparams.causal_attn) { + llama_kv_cache_update(&lctx); + + // if we have enough unused cells before the current head -> + // better to start searching from the beginning of the cache, hoping to fill it + if (kv_self.head > kv_self.used + 2*n_tokens) { + kv_self.head = 0; + } + + if (!llama_kv_cache_find_slot(kv_self, u_batch)) { + return 1; + } + + if (!kv_self.recurrent) { + // a heuristic, to avoid attending the full cache if it is not yet utilized + // after enough generations, the benefit from this heuristic disappears + // if we start defragmenting the cache, the benefit from this will be more important + kv_self.n = std::min(kv_self.size, std::max(32u, GGML_PAD(llama_kv_cache_cell_max(kv_self), 32))); + //kv_self.n = llama_kv_cache_cell_max(kv_self); + } + } + + //printf("kv_self.n = %5d, kv_self.used = %5d, kv_self.head = %5d\n", kv_self.n, kv_self.used, kv_self.head); + + ggml_backend_sched_reset(lctx.sched); + ggml_backend_sched_set_eval_callback(lctx.sched, lctx.cparams.cb_eval, lctx.cparams.cb_eval_user_data); + + ggml_cgraph * gf = llama_build_graph(lctx, u_batch, false); + + // the output is always the last tensor in the graph + struct ggml_tensor * res = gf->nodes[gf->n_nodes - 1]; + struct ggml_tensor * embd = gf->nodes[gf->n_nodes - 2]; + + if (!hparams.causal_attn) { + res = nullptr; // do not extract logits for embedding models such as BERT + + // token or sequence embeddings + embd = gf->nodes[gf->n_nodes - 1]; + + GGML_ASSERT(strcmp(embd->name, "result_embd") == 0 || strcmp(embd->name, "result_embd_pooled") == 0); } else { - GGML_ASSERT(false && "missing result_output tensor"); + if (strcmp(res->name, "result_output") == 0) { + // the token embeddings could be the second to last tensor, or the third to last tensor + if (strcmp(embd->name, "result_norm") != 0) { + embd = gf->nodes[gf->n_nodes - 3]; + GGML_ASSERT(strcmp(embd->name, "result_norm") == 0); + } + } else { + GGML_ASSERT(false && "missing result_output tensor"); + } + } + // LLAMA_LOG_INFO("graph build time: %.3f ms (%d nodes, %d leafs)\n", (ggml_time_us() - t_start_us)/1000.0, gf->n_nodes, gf->n_leafs); + + // for big prompts, if BLAS is enabled, it is better to use only one thread + // otherwise, the threads are spin-lock waiting for the BLAS calls and are degrading the performance + // TODO: this is mostly important for Apple Silicon where CBLAS is still performing very well + // we still need some threads to process all non-mul_mat ops, but not too much to avoid interfering + // with the BLAS calls. need a better solution + // MoE Special Case: This logic applies when hparams.n_expert == 0, i.e. the model is NOT an MoE model. When an MoE is + // being processed then Accelerate/BLAS will not be involved, so capping would limit performance. + if (n_tokens >= 32 && hparams.n_expert == 0 && ggml_cpu_has_blas() && !ggml_cpu_has_gpublas()) { + n_threads = std::min(4, n_threads); + } + + ggml_backend_sched_alloc_graph(lctx.sched, gf); + + llama_set_inputs(lctx, u_batch); + + llama_graph_compute(lctx, gf, n_threads); + + // update the kv ring buffer + { + kv_self.head += n_tokens; + + // Ensure kv cache head points to a valid index. + if (kv_self.head >= kv_self.size) { + kv_self.head = 0; + } + } + +#ifdef GGML_PERF + // print timing information per ggml operation (for debugging purposes) + // requires GGML_PERF to be defined + ggml_graph_print(gf); +#endif + + // plot the computation graph in dot format (for debugging purposes) + //if (n_past%100 == 0) { + // ggml_graph_dump_dot(gf, NULL, "llama.dot"); + //} + + // extract logits + // TODO: do not compute and extract logits if only embeddings are needed + // update the graphs to skip "result_output" if logits are not needed + if (res) { + ggml_backend_t backend_res = ggml_backend_sched_get_tensor_backend(lctx.sched, res); + GGML_ASSERT(backend_res != nullptr); + if (u_batch.logits) { + int32_t i_first = -1; + for (uint32_t i = 0; i < n_tokens; i++) { + if (u_batch.logits[i] && i_first == -1) { + i_first = (int32_t) i; + } + if (u_batch.logits[i] == 0 || i == n_tokens - 1) { + if (i_first != -1) { + int i_last = u_batch.logits[i] == 0 ? i : i + 1; + // extract logits for the range [i_first, i_last) + // group the requests to minimize the number of calls to the backend + ggml_backend_tensor_get_async(backend_res, res, + logits_out + n_vocab*(cur_token + i_first), + i_first*n_vocab*sizeof(float), + (i_last - i_first)*n_vocab*sizeof(float)); + i_first = -1; + } + } +#ifndef NDEBUG + logits_valid[cur_token + i] = u_batch.logits[i] != 0;; +#endif + } + } else if (lctx.logits_all) { + ggml_backend_tensor_get_async(backend_res, res, logits_out + n_vocab*cur_token, 0, n_vocab*n_tokens*sizeof(float)); +#ifndef NDEBUG + std::fill(logits_valid.begin() + cur_token, logits_valid.begin() + cur_token + n_tokens, true); +#endif + } else { + if (cur_token + n_tokens >= n_tokens_all) { + ggml_backend_tensor_get_async(backend_res, res, logits_out, n_vocab*(n_tokens - 1)*sizeof(float), n_vocab*sizeof(float)); +#ifndef NDEBUG + logits_valid[0] = true; +#endif + } + } + } + + // extract embeddings + if (cparams.embeddings && embd) { + ggml_backend_t backend_embd = ggml_backend_sched_get_tensor_backend(lctx.sched, embd); + GGML_ASSERT(backend_embd != nullptr); + + switch (cparams.pooling_type) { + case LLAMA_POOLING_TYPE_NONE: + { + // extract token embeddings + auto & embd_out = lctx.embd; + + if (u_batch.logits) { + //embd_out.resize(n_embd * n_tokens); + for (uint32_t i = 0; i < n_tokens; i++) { + if (u_batch.logits[i] == 0) { + continue; + } + ggml_backend_tensor_get_async(backend_embd, embd, embd_out + n_embd*(i + cur_token), (n_embd*i)*sizeof(float), n_embd*sizeof(float)); + } + } + } break; + case LLAMA_POOLING_TYPE_CLS: + case LLAMA_POOLING_TYPE_MEAN: + { + GGML_ASSERT(strcmp(embd->name, "result_embd_pooled") == 0); + + // extract sequence embeddings + auto & embd_seq_out = lctx.embd_seq; + embd_seq_out.clear(); + + for (uint32_t i = 0; i < n_tokens; i++) { + const llama_seq_id seq_id = u_batch.seq_id[i][0]; + if (embd_seq_out.find(seq_id) != embd_seq_out.end()) { + continue; + } + embd_seq_out[seq_id].resize(n_embd); + ggml_backend_tensor_get_async(backend_embd, embd, embd_seq_out[seq_id].data(), (n_embd*seq_id)*sizeof(float), n_embd*sizeof(float)); + } + } break; + case LLAMA_POOLING_TYPE_UNSPECIFIED: + { + GGML_ASSERT(false && "unknown pooling type"); + } break; + } } } - // LLAMA_LOG_INFO("graph build time: %.3f ms (%d nodes, %d leafs)\n", (ggml_time_us() - t_start_us)/1000.0, gf->n_nodes, gf->n_leafs); - - // for big prompts, if BLAS is enabled, it is better to use only one thread - // otherwise, the threads are spin-lock waiting for the BLAS calls and are degrading the performance - // TODO: this is mostly important for Apple Silicon where CBLAS is still performing very well - // we still need some threads to process all non-mul_mat ops, but not too much to avoid interfering - // with the BLAS calls. need a better solution - // MoE Special Case: This logic applies when hparams.n_expert == 0, i.e. the model is NOT an MoE model. When an MoE is - // being processed then Accelerate/BLAS will not be involved, so capping would limit performance. - if (n_tokens >= 32 && hparams.n_expert == 0 && ggml_cpu_has_blas() && !ggml_cpu_has_gpublas()) { - n_threads = std::min(4, n_threads); - } - - llama_set_inputs(lctx, batch); - - llama_graph_compute(lctx, gf, n_threads); - - // update the kv ring buffer - { - kv_self.head += n_tokens; - - // Ensure kv cache head points to a valid index. - if (kv_self.head >= kv_self.size) { - kv_self.head = 0; - } - } + // wait for the computation to finish (automatically done when obtaining the model output) + //llama_synchronize(&lctx); // decide if we need to defrag the kv cache if (cparams.defrag_thold >= 0.0f) { - const float fragmentation = kv_self.n >= 128 ? 1.0f - float(kv_self.used + n_tokens)/float(kv_self.n) : 0.0f; + const float fragmentation = kv_self.n >= 128 ? 1.0f - float(kv_self.used + n_tokens_all)/float(kv_self.n) : 0.0f; // queue defragmentation for next llama_kv_cache_update if (fragmentation > cparams.defrag_thold) { @@ -8908,141 +9047,10 @@ static int llama_decode_internal( } } -#ifdef GGML_PERF - // print timing information per ggml operation (for debugging purposes) - // requires GGML_PERF to be defined - ggml_graph_print(gf); -#endif - - // plot the computation graph in dot format (for debugging purposes) - //if (n_past%100 == 0) { - // ggml_graph_dump_dot(gf, NULL, "llama.dot"); - //} - - // extract logits - // TODO: do not compute and extract logits if only embeddings are needed - // need to update the graphs to skip "result_output" - if (res) { - auto & logits_out = lctx.logits; - -#ifndef NDEBUG - auto & logits_valid = lctx.logits_valid; - logits_valid.clear(); - logits_valid.resize(n_tokens); - - logits_out.clear(); -#endif - - ggml_backend_t backend_res = ggml_backend_sched_get_node_backend(lctx.sched, res); - GGML_ASSERT(backend_res != nullptr); - - if (batch.logits) { - logits_out.resize(n_vocab * n_tokens); - int32_t i_first = -1; - for (uint32_t i = 0; i < n_tokens; i++) { - if (batch.logits[i] && i_first == -1) { - i_first = (int32_t) i; - } - if (batch.logits[i] == 0 || i == n_tokens - 1) { - if (i_first != -1) { - int i_last = batch.logits[i] == 0 ? i : i + 1; - // extract logits for the range [i_first, i_last) - // group the requests to minimize the number of calls to the backend - ggml_backend_tensor_get_async(backend_res, res, - logits_out.data() + (n_vocab*i_first), - (n_vocab*i_first)*sizeof(float), - (i_last - i_first)*n_vocab*sizeof(float)); - i_first = -1; - } - } -#ifndef NDEBUG - logits_valid[i] = batch.logits[i] != 0; -#endif - } - } else if (lctx.logits_all) { - logits_out.resize(n_vocab*n_tokens); - ggml_backend_tensor_get_async(backend_res, res, logits_out.data(), 0, n_vocab*n_tokens*sizeof(float)); -#ifndef NDEBUG - std::fill(logits_valid.begin(), logits_valid.end(), true); -#endif - } else { - logits_out.resize(n_vocab); - ggml_backend_tensor_get_async(backend_res, res, logits_out.data(), (n_vocab*(n_tokens - 1))*sizeof(float), n_vocab*sizeof(float)); -#ifndef NDEBUG - logits_valid[0] = true; -#endif - } - ggml_backend_synchronize(backend_res); - } - - // extract embeddings - if (cparams.embeddings && embd) { - ggml_backend_t backend_embd = ggml_backend_sched_get_node_backend(lctx.sched, embd); - GGML_ASSERT(backend_embd != nullptr); - - switch (cparams.pooling_type) { - case LLAMA_POOLING_TYPE_NONE: - { - // extract token embeddings - auto & embd_out = lctx.embd; - - if (batch.logits) { - embd_out.resize(n_embd * n_tokens); - for (uint32_t i = 0; i < n_tokens; i++) { - if (batch.logits[i] == 0) { - continue; - } - - ggml_backend_tensor_get_async(backend_embd, embd, embd_out.data() + (n_embd*i), (n_embd*i)*sizeof(float), n_embd*sizeof(float)); - } - } - } break; - case LLAMA_POOLING_TYPE_CLS: - case LLAMA_POOLING_TYPE_MEAN: - { - GGML_ASSERT(strcmp(embd->name, "result_embd_pooled") == 0); - - // extract sequence embeddings - auto & embd_seq_out = lctx.embd_seq; - embd_seq_out.clear(); - - for (uint32_t i = 0; i < n_tokens; i++) { - const llama_seq_id seq_id = batch.seq_id[i][0]; - if (embd_seq_out.find(seq_id) != embd_seq_out.end()) { - continue; - } - embd_seq_out[seq_id].resize(n_embd); - ggml_backend_tensor_get_async(backend_embd, embd, embd_seq_out[seq_id].data(), (n_embd*seq_id)*sizeof(float), n_embd*sizeof(float)); - } - } break; - case LLAMA_POOLING_TYPE_UNSPECIFIED: - { - GGML_ASSERT(false && "unknown pooling type"); - } break; - } - ggml_backend_synchronize(backend_embd); - } - - // measure the performance only for the single-token evals - if (n_tokens == 1) { - lctx.t_eval_us += ggml_time_us() - t_start_us; - lctx.n_eval++; - } - else if (n_tokens > 1) { - lctx.t_p_eval_us += ggml_time_us() - t_start_us; - lctx.n_p_eval += n_tokens; - } - - // get a more accurate load time, upon first eval - // TODO: fix this - if (!lctx.has_evaluated_once) { - lctx.t_load_us = ggml_time_us() - lctx.t_start_us; - lctx.has_evaluated_once = true; - } - return 0; } + // find holes from the beginning of the KV cache and fill them by moving data from the end of the cache static void llama_kv_cache_defrag_internal(struct llama_context & lctx) { auto & kv_self = lctx.kv_self; @@ -9242,6 +9250,8 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) { #else // ggml_graph defrag + ggml_backend_sched_reset(lctx.sched); + ggml_cgraph * gf = llama_build_graph_defrag(lctx, ids); llama_graph_compute(lctx, gf, lctx.cparams.n_threads); @@ -9253,14 +9263,22 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) { } static void llama_kv_cache_update_internal(struct llama_context & lctx) { + bool need_reserve = false; + // apply K-shift if needed if (lctx.model.hparams.rope_type != LLAMA_ROPE_TYPE_NONE && lctx.kv_self.has_shift) { - llama_set_k_shift(lctx); - { + ggml_backend_sched_reset(lctx.sched); + ggml_cgraph * gf = llama_build_graph_k_shift(lctx); + ggml_backend_sched_alloc_graph(lctx.sched, gf); + + llama_set_k_shift(lctx); + llama_graph_compute(lctx, gf, lctx.cparams.n_threads); + + need_reserve = true; } { @@ -9275,12 +9293,18 @@ static void llama_kv_cache_update_internal(struct llama_context & lctx) { } if (lctx.kv_self.recurrent && lctx.kv_self.do_copy) { - llama_set_s_copy(lctx); - { + ggml_backend_sched_reset(lctx.sched); + ggml_cgraph * gf = llama_build_graph_s_copy(lctx); + ggml_backend_sched_alloc_graph(lctx.sched, gf); + + llama_set_s_copy(lctx); + llama_graph_compute(lctx, gf, lctx.cparams.n_threads); + + need_reserve = true; } { @@ -9298,8 +9322,26 @@ static void llama_kv_cache_update_internal(struct llama_context & lctx) { if (lctx.kv_self.do_defrag) { llama_kv_cache_defrag_internal(lctx); + need_reserve = true; + lctx.kv_self.do_defrag = false; } + + // reserve a worst case graph again + if (need_reserve) { + // TODO: extract to a function + // build worst-case graph + int n_tokens = (int)std::min(lctx.cparams.n_ctx, lctx.cparams.n_ubatch); + int n_past = lctx.cparams.n_ctx - n_tokens; + llama_token token = llama_token_bos(&lctx.model); // not actually used by llama_build_graph, but required to choose between token and embedding inputs graph + ggml_cgraph * gf = llama_build_graph(lctx, llama_batch_get_one(&token, n_tokens, n_past, 0), true); + + // initialize scheduler with the worst-case graph + ggml_backend_sched_reset(lctx.sched); + if (!ggml_backend_sched_reserve(lctx.sched, gf)) { + LLAMA_LOG_ERROR("%s: failed to allocate compute buffers\n", __func__); + } + } } // @@ -12537,7 +12579,8 @@ struct llama_context_params llama_context_default_params() { struct llama_context_params result = { /*.seed =*/ LLAMA_DEFAULT_SEED, /*.n_ctx =*/ 512, - /*.n_batch =*/ 512, + /*.n_batch =*/ 2048, + /*.n_ubatch =*/ 512, /*.n_seq_max =*/ 1, /*.n_threads =*/ GGML_DEFAULT_N_THREADS, // TODO: better default /*.n_threads_batch =*/ GGML_DEFAULT_N_THREADS, @@ -12691,6 +12734,17 @@ struct llama_context * llama_new_context_with_model( struct llama_context_params params) { if (!model) { + LLAMA_LOG_ERROR("%s: model cannot be NULL\n", __func__); + return nullptr; + } + + if (params.n_batch == 0 && params.n_ubatch == 0) { + LLAMA_LOG_ERROR("%s: n_batch and n_ubatch cannot both be zero\n", __func__); + return nullptr; + } + + if (params.n_ctx == 0 && model->hparams.n_ctx_train == 0) { + LLAMA_LOG_ERROR("%s: n_ctx and model->hparams.n_ctx_train cannot both be zero\n", __func__); return nullptr; } @@ -12699,7 +12753,6 @@ struct llama_context * llama_new_context_with_model( const auto & hparams = model->hparams; auto & cparams = ctx->cparams; - cparams.n_batch = params.n_batch; // TODO: maybe add n_seq_max here too cparams.n_threads = params.n_threads; cparams.n_threads_batch = params.n_threads_batch; @@ -12716,6 +12769,11 @@ struct llama_context * llama_new_context_with_model( cparams.rope_freq_base = params.rope_freq_base == 0.0f ? hparams.rope_freq_base_train : params.rope_freq_base; cparams.rope_freq_scale = params.rope_freq_scale == 0.0f ? hparams.rope_freq_scale_train : params.rope_freq_scale; + // with causal attention, the batch size is limited by the context size + cparams.n_batch = hparams.causal_attn ? std::min(cparams.n_ctx, params.n_batch) : params.n_batch; + cparams.n_ubatch = std::min(cparams.n_batch, params.n_ubatch == 0 ? params.n_batch : params.n_ubatch); + + cparams.n_yarn_orig_ctx = params.yarn_orig_ctx != 0 ? params.yarn_orig_ctx : hparams.n_yarn_orig_ctx != 0 ? hparams.n_yarn_orig_ctx : hparams.n_ctx_train; @@ -12751,6 +12809,8 @@ struct llama_context * llama_new_context_with_model( } LLAMA_LOG_INFO("%s: n_ctx = %u\n", __func__, cparams.n_ctx); + LLAMA_LOG_INFO("%s: n_batch = %u\n", __func__, cparams.n_batch); + LLAMA_LOG_INFO("%s: n_ubatch = %u\n", __func__, cparams.n_ubatch); LLAMA_LOG_INFO("%s: freq_base = %.1f\n", __func__, cparams.rope_freq_base); LLAMA_LOG_INFO("%s: freq_scale = %g\n", __func__, cparams.rope_freq_scale); @@ -12895,54 +12955,31 @@ struct llama_context * llama_new_context_with_model( ggml_type_name(type_v), (float)memory_size_v / (1024.0f * 1024.0f)); } - // resized during inference, reserve maximum - ctx->logits.reserve(hparams.n_vocab*cparams.n_batch); - - if (params.embeddings) { - ctx->embd.reserve(hparams.n_embd*cparams.n_batch); - } - - // graph inputs + // graph outputs buffer { - ggml_init_params init_params = { - /* .mem_size */ ggml_tensor_overhead()*(8 + 3*(ctx->kv_self.recurrent)), - /* .mem_buffer */ nullptr, - /* .no_alloc */ true, - }; - ctx->ctx_input = ggml_init(init_params); + // resized during inference, reserve maximum + ctx->logits_size = hparams.n_vocab*cparams.n_batch; + ctx->embd_size = params.embeddings ? hparams.n_embd*cparams.n_batch : 0; - ctx->inp_tokens = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_batch); - ctx->inp_embd = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, hparams.n_embd, cparams.n_batch); - ctx->inp_pos = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_batch); - ctx->inp_KQ_mask = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, kv_size, cparams.n_batch); - ctx->inp_KQ_pos = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_F32, kv_size); - ctx->inp_K_shift = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, kv_size); - ctx->inp_mean = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, cparams.n_batch, cparams.n_batch); - ctx->inp_cls = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_batch); - if (ctx->kv_self.recurrent) { - ctx->inp_s_copy = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, kv_size); - ctx->inp_s_mask = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_F32, kv_size); - ctx->inp_s_seq = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_I32, kv_size, cparams.n_batch); + const size_t buf_output_size = (ctx->logits_size + ctx->embd_size)*sizeof(float); + + ctx->buf_output = ggml_backend_buft_alloc_buffer(llama_default_buffer_type_cpu(true), buf_output_size); + if (ctx->buf_output == nullptr) { + LLAMA_LOG_ERROR("%s: failed to allocate logits buffer\n", __func__); + llama_free(ctx); + return nullptr; + } + ggml_backend_buffer_clear(ctx->buf_output, 0); + + + ctx->logits = (float *) ggml_backend_buffer_get_base(ctx->buf_output); + if (params.embeddings) { + ctx->embd = ctx->logits + ctx->logits_size; } - ggml_set_name(ctx->inp_tokens, "inp_tokens"); - ggml_set_name(ctx->inp_embd, "inp_embd"); - ggml_set_name(ctx->inp_pos, "inp_pos"); - ggml_set_name(ctx->inp_KQ_mask, "inp_KQ_mask"); - ggml_set_name(ctx->inp_KQ_pos, "inp_KQ_pos"); - ggml_set_name(ctx->inp_K_shift, "inp_K_shift"); - ggml_set_name(ctx->inp_mean, "inp_mean"); - ggml_set_name(ctx->inp_cls, "inp_cls"); - if (ctx->kv_self.recurrent) { - ggml_set_name(ctx->inp_s_copy, "inp_s_copy"); - ggml_set_name(ctx->inp_s_mask, "inp_s_mask"); - ggml_set_name(ctx->inp_s_seq, "inp_s_seq"); - } - - ctx->buf_input = ggml_backend_alloc_ctx_tensors_from_buft(ctx->ctx_input, llama_default_buffer_type_cpu(true)); - LLAMA_LOG_INFO("%s: %10s input buffer size = %8.2f MiB\n", __func__, - ggml_backend_buffer_name(ctx->buf_input), - ggml_backend_buffer_get_size(ctx->buf_input) / 1024.0 / 1024.0); + LLAMA_LOG_INFO("%s: %10s output buffer size = %8.2f MiB\n", __func__, + ggml_backend_buffer_name(ctx->buf_output), + ggml_backend_buffer_get_size(ctx->buf_output) / 1024.0 / 1024.0); } // scheduler and compute buffers @@ -12961,10 +12998,21 @@ struct llama_context * llama_new_context_with_model( // buffer used to store the computation graph and the tensor meta data ctx->buf_compute_meta.resize(ggml_tensor_overhead()*LLAMA_MAX_NODES + ggml_graph_overhead_custom(LLAMA_MAX_NODES, false)); - ctx->sched = ggml_backend_sched_new(ctx->backends.data(), backend_buft.data(), ctx->backends.size(), LLAMA_MAX_NODES); + // enabling pipeline parallelism in the scheduler increases memory usage, so it is only done when necessary + bool pipeline_parallel = llama_get_device_count() > 1 && model->n_gpu_layers > (int)model->hparams.n_layer && model->split_mode == LLAMA_SPLIT_MODE_LAYER; +#ifndef GGML_USE_CUBLAS + // pipeline parallelism requires support for async compute and events + // currently this is only implemented in the CUDA backend + pipeline_parallel = false; +#endif + ctx->sched = ggml_backend_sched_new(ctx->backends.data(), backend_buft.data(), ctx->backends.size(), LLAMA_MAX_NODES, pipeline_parallel); + + if (pipeline_parallel) { + LLAMA_LOG_INFO("%s: pipeline parallelism enabled (n_copies=%d)\n", __func__, ggml_backend_sched_get_n_copies(ctx->sched)); + } // build worst-case graph - int n_tokens = (int)std::min(cparams.n_ctx, cparams.n_batch); + int n_tokens = (int)std::min(cparams.n_ctx, cparams.n_ubatch); int n_past = cparams.n_ctx - n_tokens; llama_token token = llama_token_bos(&ctx->model); // not actually used by llama_build_graph, but required to choose between token and embedding inputs graph ggml_cgraph * gf = llama_build_graph(*ctx, llama_batch_get_one(&token, n_tokens, n_past, 0), true); @@ -12987,7 +13035,7 @@ struct llama_context * llama_new_context_with_model( // note: the number of splits during measure is higher than during inference due to the kv shift int n_splits = ggml_backend_sched_get_n_splits(ctx->sched); - LLAMA_LOG_INFO("%s: graph splits (measure): %d\n", __func__, n_splits); + LLAMA_LOG_INFO("%s: graph splits: %d\n", __func__, n_splits); } } @@ -13024,6 +13072,10 @@ uint32_t llama_n_batch(const struct llama_context * ctx) { return ctx->cparams.n_batch; } +uint32_t llama_n_ubatch(const struct llama_context * ctx) { + return ctx->cparams.n_ubatch; +} + uint32_t llama_n_seq_max(const struct llama_context * ctx) { return ctx->kv_self.size; } @@ -13347,9 +13399,9 @@ size_t llama_get_state_size(const struct llama_context * ctx) { const size_t s_rng = LLAMA_MAX_RNG_STATE; const size_t s_logits_size = sizeof(size_t); // assume worst case for logits although only currently set ones are serialized - const size_t s_logits = ctx->logits.capacity() * sizeof(float); + const size_t s_logits = ctx->logits_size * sizeof(float); const size_t s_embedding_size = sizeof(size_t); - const size_t s_embedding = ctx->embd.capacity() * sizeof(float); + const size_t s_embedding = ctx->embd_size * sizeof(float); const size_t s_kv_buf_size = sizeof(size_t); const size_t s_kv_head = sizeof(uint32_t); const size_t s_kv_size = sizeof(uint32_t); @@ -13447,23 +13499,23 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat // copy logits { - const size_t logits_size = ctx->logits.size(); + const size_t logits_size = ctx->logits_size; data_ctx->write(&logits_size, sizeof(logits_size)); if (logits_size) { - data_ctx->write(ctx->logits.data(), logits_size * sizeof(float)); + data_ctx->write(ctx->logits, logits_size * sizeof(float)); } } // copy embeddings { - const size_t embeddings_size = ctx->embd.size(); + const size_t embeddings_size = ctx->embd_size; data_ctx->write(&embeddings_size, sizeof(embeddings_size)); if (embeddings_size) { - data_ctx->write(ctx->embd.data(), embeddings_size * sizeof(float)); + data_ctx->write(ctx->embd, embeddings_size * sizeof(float)); } } @@ -13566,12 +13618,10 @@ size_t llama_set_state_data(struct llama_context * ctx, const uint8_t * src) { memcpy(&logits_size, inp, sizeof(logits_size)); inp += sizeof(logits_size); - GGML_ASSERT(ctx->logits.capacity() >= logits_size); + GGML_ASSERT(ctx->logits_size >= logits_size); if (logits_size) { - ctx->logits.resize(logits_size); - - memcpy(ctx->logits.data(), inp, logits_size * sizeof(float)); + memcpy(ctx->logits, inp, logits_size * sizeof(float)); inp += logits_size * sizeof(float); } } @@ -13582,12 +13632,10 @@ size_t llama_set_state_data(struct llama_context * ctx, const uint8_t * src) { memcpy(&embeddings_size, inp, sizeof(embeddings_size)); inp += sizeof(embeddings_size); - GGML_ASSERT(ctx->embd.capacity() == embeddings_size); + GGML_ASSERT(ctx->embd_size == embeddings_size); if (embeddings_size) { - ctx->embd.resize(embeddings_size); - - memcpy(ctx->embd.data(), inp, embeddings_size * sizeof(float)); + memcpy(ctx->embd, inp, embeddings_size * sizeof(float)); inp += embeddings_size * sizeof(float); } } @@ -13842,24 +13890,61 @@ int32_t llama_decode( return ret; } +void llama_synchronize(struct llama_context * ctx) { + ggml_backend_sched_synchronize(ctx->sched); + + // FIXME: if multiple single tokens are evaluated without a synchronization, + // the stats will be added to the prompt evaluation stats + // this should only happen when using batch size 1 to evaluate a batch + + // add the evaluation to the stats + if (ctx->n_queued_tokens == 1) { + ctx->t_eval_us += ggml_time_us() - ctx->t_compute_start_us; + ctx->n_eval++; + } else if (ctx->n_queued_tokens > 1) { + ctx->t_p_eval_us += ggml_time_us() - ctx->t_compute_start_us; + ctx->n_p_eval += ctx->n_queued_tokens; + } + + // get a more accurate load time, upon first eval + if (ctx->n_queued_tokens > 0 && !ctx->has_evaluated_once) { + ctx->t_load_us = ggml_time_us() - ctx->t_start_us; + ctx->has_evaluated_once = true; + } + + ctx->n_queued_tokens = 0; + ctx->t_compute_start_us = 0; +} + float * llama_get_logits(struct llama_context * ctx) { - return ctx->logits.data(); + llama_synchronize(ctx); + + return ctx->logits; } float * llama_get_logits_ith(struct llama_context * ctx, int32_t i) { assert(ctx->logits_valid.at(i)); - return ctx->logits.data() + i*ctx->model.hparams.n_vocab; + + llama_synchronize(ctx); + + return ctx->logits + i*ctx->model.hparams.n_vocab; } float * llama_get_embeddings(struct llama_context * ctx) { - return ctx->embd.data(); + llama_synchronize(ctx); + + return ctx->embd; } float * llama_get_embeddings_ith(struct llama_context * ctx, int32_t i) { - return ctx->embd.data() + i*ctx->model.hparams.n_embd; + llama_synchronize(ctx); + + return ctx->embd + i*ctx->model.hparams.n_embd; } float * llama_get_embeddings_seq(struct llama_context * ctx, llama_seq_id seq_id) { + llama_synchronize(ctx); + auto it = ctx->embd_seq.find(seq_id); if (it == ctx->embd_seq.end()) { return nullptr; diff --git a/llama.h b/llama.h index 446899da6..2d16cc9b9 100644 --- a/llama.h +++ b/llama.h @@ -234,7 +234,8 @@ extern "C" { struct llama_context_params { uint32_t seed; // RNG seed, -1 for random uint32_t n_ctx; // text context, 0 = from model - uint32_t n_batch; // prompt processing maximum batch size + uint32_t n_batch; // logical maximum batch size that can be submitted to llama_decode + uint32_t n_ubatch; // physical maximum batch size uint32_t n_seq_max; // max number of sequences (i.e. distinct states for recurrent models) uint32_t n_threads; // number of threads to use for generation uint32_t n_threads_batch; // number of threads to use for batch processing @@ -377,6 +378,7 @@ extern "C" { LLAMA_API uint32_t llama_n_ctx (const struct llama_context * ctx); LLAMA_API uint32_t llama_n_batch (const struct llama_context * ctx); + LLAMA_API uint32_t llama_n_ubatch (const struct llama_context * ctx); LLAMA_API uint32_t llama_n_seq_max (const struct llama_context * ctx); LLAMA_API enum llama_vocab_type llama_vocab_type(const struct llama_model * model); @@ -650,6 +652,11 @@ extern "C" { // Set abort callback LLAMA_API void llama_set_abort_callback(struct llama_context * ctx, ggml_abort_callback abort_callback, void * abort_callback_data); + // Wait until all computations are finished + // This is automatically done when using one of the functions below to obtain the computation results + // and is not necessary to call it explicitly in most cases + LLAMA_API void llama_synchronize(struct llama_context * ctx); + // Token logits obtained from the last call to llama_decode() // The logits for the last token are stored in the last row // Logits for which llama_batch.logits[i] == 0 are undefined From 463628372d5fe3a0c1e5864aa5fc57deb7387039 Mon Sep 17 00:00:00 2001 From: Clint Herron Date: Wed, 13 Mar 2024 14:10:40 -0400 Subject: [PATCH 31/48] grammar : handle missing "root" node (#6004) --- common/sampling.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/common/sampling.cpp b/common/sampling.cpp index 823031feb..5a5450982 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -17,6 +17,13 @@ struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_ return nullptr; } + // Ensure that there is a "root" node. + if (result->parsed_grammar.symbol_ids.find("root") == result->parsed_grammar.symbol_ids.end()) { + fprintf(stderr, "%s: grammar does not contain a 'root' symbol\n", __func__); + delete result; + return nullptr; + } + std::vector grammar_rules(result->parsed_grammar.c_rules()); result->grammar = llama_grammar_init( From 76a936c8939c249a7c3e8e66dfefbab13eae194f Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 13 Mar 2024 20:33:56 +0200 Subject: [PATCH 32/48] readme : update API changes and hot topics --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 54bf84bec..80037782f 100644 --- a/README.md +++ b/README.md @@ -10,12 +10,14 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) ### Recent API changes +- [2024 Mar 13] Add `llama_synchronize()` + `llama_context_params.n_ubatch` https://github.com/ggerganov/llama.cpp/pull/6017 - [2024 Mar 8] `llama_kv_cache_seq_rm()` returns a `bool` instead of `void`, and new `llama_n_seq_max()` returns the upper limit of acceptable `seq_id` in batches (relevant when dealing with multiple sequences) https://github.com/ggerganov/llama.cpp/pull/5328 - [2024 Mar 4] Embeddings API updated https://github.com/ggerganov/llama.cpp/pull/5796 - [2024 Mar 3] `struct llama_context_params` https://github.com/ggerganov/llama.cpp/pull/5849 ### Hot topics +- Multi-GPU pipeline parallelizm support https://github.com/ggerganov/llama.cpp/pull/6017 - Looking for contributions to add Deepseek support: https://github.com/ggerganov/llama.cpp/issues/5981 - Quantization blind testing: https://github.com/ggerganov/llama.cpp/discussions/5962 - Initial Mamba support has been added: https://github.com/ggerganov/llama.cpp/pull/5328 From 19885d205e768579ab090d1e99281cae58c21b54 Mon Sep 17 00:00:00 2001 From: Linwei Wang Date: Thu, 14 Mar 2024 02:34:40 +0800 Subject: [PATCH 33/48] readme : update details about running llama in Termux on Android (#6039) --- README.md | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 80037782f..61bedc3f8 100644 --- a/README.md +++ b/README.md @@ -904,6 +904,9 @@ First, install the essential packages for termux: pkg install clang wget git cmake ``` Second, obtain the [Android NDK](https://developer.android.com/ndk) and then build with CMake: + +You can execute the following commands on your computer to avoid downloading the NDK to your mobile. Of course, you can also do this in Termux. + ``` $ mkdir build-android $ cd build-android @@ -912,7 +915,28 @@ $ cmake -DCMAKE_TOOLCHAIN_FILE=$NDK/build/cmake/android.toolchain.cmake -DANDROI $ make ``` Install [termux](https://termux.dev/) on your device and run `termux-setup-storage` to get access to your SD card. -Finally, copy the `llama` binary and the model files to your device storage. Here is a demo of an interactive session running on Pixel 5 phone: +Finally, copy these built `llama` binaries and the model file to your device storage. Because the file permissions in the Android sdcard cannot be changed, you can copy the executable files to the `/data/data/com.termux/files/home/bin` path, and then execute the following commands in Termux to add executable permission: + +(Assumed that you have pushed the built executable files to the /sdcard/llama.cpp/bin path using `adb push`) +``` +$cp -r /sdcard/llama.cpp/bin /data/data/com.termux/files/home/ +$cd /data/data/com.termux/files/home/bin +$chmod +x ./* +``` + +Download model [llama-2-7b-chat.Q4_K_M.gguf](https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/blob/main/llama-2-7b-chat.Q4_K_M.gguf), and push it to `/sdcard/llama.cpp/`, then move it to `/data/data/com.termux/files/home/model/` + +``` +$mv /sdcard/llama.cpp/llama-2-7b-chat.Q4_K_M.gguf /data/data/com.termux/files/home/model/ +``` + +Now, you can start chatting: +``` +$cd /data/data/com.termux/files/home/bin +$./main -m ../model/llama-2-7b-chat.Q4_K_M.gguf -n 128 -cml +``` + +Here is a demo of an interactive session running on Pixel 5 phone: https://user-images.githubusercontent.com/271616/225014776-1d567049-ad71-4ef2-b050-55b0b3b9274c.mp4 From 0fd6c1f015f6cccf3b527f7dbd8386a434728126 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 14 Mar 2024 10:12:29 +0200 Subject: [PATCH 34/48] embedding : print cosine similarity (#899) --- common/common.cpp | 13 +++++++++++++ common/common.h | 1 + examples/embedding/embedding.cpp | 21 ++++++++++++++++----- examples/gritlm/gritlm.cpp | 26 ++++++-------------------- 4 files changed, 36 insertions(+), 25 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 73b1b61ba..58fbd05aa 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1877,3 +1877,16 @@ void llama_embd_normalize(const float * inp, float * out, int n) { } } +float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n){ + double sum = 0.0; + double sum1 = 0.0; + double sum2 = 0.0; + + for (int i = 0; i < n; i++) { + sum += embd1[i] * embd2[i]; + sum1 += embd1[i] * embd1[i]; + sum2 += embd2[i] * embd2[i]; + } + + return sum / (sqrt(sum1) * sqrt(sum2)); +} diff --git a/common/common.h b/common/common.h index 0f178b9eb..d250eef8b 100644 --- a/common/common.h +++ b/common/common.h @@ -268,3 +268,4 @@ void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size = 40 void llama_embd_normalize(const float * inp, float * out, int n); +float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n); diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 49302a199..f390c4061 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -168,14 +168,25 @@ int main(int argc, char ** argv) { batch_decode(ctx, batch, out, s, n_embd); // print first 3 embeddings + fprintf(stdout, "\n"); for (int j = 0; j < std::min(3, n_prompts); j++) { - fprintf(stderr, "embedding %d: ", j); - for (int i = 0; i < n_embd; i++) { - fprintf(stderr, "%f ", emb[j * n_embd + i]); + fprintf(stdout, "embedding %d: ", j); + for (int i = 0; i < std::min(16, n_embd); i++) { + fprintf(stdout, "%f ", emb[j * n_embd + i]); } - fprintf(stderr, "\n\n"); + fprintf(stdout, "\n"); + } + + // print cosine similarity matrix + fprintf(stdout, "\n"); + printf("cosine similarity matrix:\n\n"); + for (int i = 0; i < n_prompts; i++) { + for (int j = 0; j < n_prompts; j++) { + float sim = llama_embd_similarity_cos(emb + i * n_embd, emb + j * n_embd, n_embd); + fprintf(stdout, "%6.2f ", sim); + } + fprintf(stdout, "\n"); } - fprintf(stderr, "\n"); // clean up llama_print_timings(ctx); diff --git a/examples/gritlm/gritlm.cpp b/examples/gritlm/gritlm.cpp index 3d4b085d6..52fd719b3 100644 --- a/examples/gritlm/gritlm.cpp +++ b/examples/gritlm/gritlm.cpp @@ -6,22 +6,6 @@ // #define GRIT_DEBUG -static float dot_product(const std::vector & v1, const std::vector & v2) { - float dot = 0.0f; - for (uint64_t i = 0; i < v1.size(); ++i) { - dot += v1[i] * v2[i]; - } - return dot; -} - -static float norm(const std::vector & v) { - return std::sqrt(dot_product(v, v)); -} - -static float cosine_similarity(const std::vector & v1, const std::vector & v2) { - return dot_product(v1, v2) / (norm(v1) * norm(v2)); -} - static std::vector> encode(llama_context * ctx, const std::vector & sentences, const std::string & instruction) { std::vector> result; @@ -203,10 +187,12 @@ int main(int argc, char * argv[]) { const std::vector> d_rep = encode(ctx, documents, gritlm_instruction("")); const std::vector> q_rep = encode(ctx, queries, gritlm_instruction(instruction)); - const float cosine_sim_q0_d0 = cosine_similarity(q_rep[0], d_rep[0]); - const float cosine_sim_q0_d1 = cosine_similarity(q_rep[0], d_rep[1]); - const float cosine_sim_q1_d0 = cosine_similarity(q_rep[1], d_rep[0]); - const float cosine_sim_q1_d1 = cosine_similarity(q_rep[1], d_rep[1]); + const int n_embd = llama_n_embd(mdl); + + const float cosine_sim_q0_d0 = llama_embd_similarity_cos(q_rep[0].data(), d_rep[0].data(), n_embd); + const float cosine_sim_q0_d1 = llama_embd_similarity_cos(q_rep[0].data(), d_rep[1].data(), n_embd); + const float cosine_sim_q1_d0 = llama_embd_similarity_cos(q_rep[1].data(), d_rep[0].data(), n_embd); + const float cosine_sim_q1_d1 = llama_embd_similarity_cos(q_rep[1].data(), d_rep[1].data(), n_embd); std::printf("Cosine similarity between \"%.50s\" and \"%.50s\" is: %.3f\n", queries[0].c_str(), documents[0].c_str(), cosine_sim_q0_d0); std::printf("Cosine similarity between \"%.50s\" and \"%.50s\" is: %.3f\n", queries[0].c_str(), documents[1].c_str(), cosine_sim_q0_d1); From 381da2d9f0940d7009e3e918bed36338c8ff2fbb Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 14 Mar 2024 11:55:23 +0200 Subject: [PATCH 35/48] metal : build metallib + fix embed path (#6015) * metal : build metallib + fix embed path ggml-ci * metal : fix embed build + update library load logic ggml-ci * metal : fix embeded library build ggml-ci * ci : fix iOS builds to use embedded library --- .github/workflows/build.yml | 2 ++ .gitignore | 2 ++ CMakeLists.txt | 70 ++++++++++++++++++++----------------- Makefile | 15 ++++---- ggml-metal.m | 49 +++++++++++++++++--------- ggml-metal.metal | 3 -- 6 files changed, 83 insertions(+), 58 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d39cd6bc3..0da01d5ba 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -333,6 +333,7 @@ jobs: mkdir build cd build cmake -G Xcode .. \ + -DLLAMA_METAL_EMBED_LIBRARY=ON \ -DLLAMA_BUILD_EXAMPLES=OFF \ -DLLAMA_BUILD_TESTS=OFF \ -DLLAMA_BUILD_SERVER=OFF \ @@ -361,6 +362,7 @@ jobs: mkdir build cd build cmake -G Xcode .. \ + -DLLAMA_METAL_EMBED_LIBRARY=ON \ -DLLAMA_BUILD_EXAMPLES=OFF \ -DLLAMA_BUILD_TESTS=OFF \ -DLLAMA_BUILD_SERVER=OFF \ diff --git a/.gitignore b/.gitignore index d28f4d1b8..1ad8d929b 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,8 @@ .vscode/ .idea/ +ggml-metal-embed.metal + lcov-report/ gcovr-report/ diff --git a/CMakeLists.txt b/CMakeLists.txt index a8abf4088..3ac2804a6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -200,9 +200,6 @@ if (LLAMA_METAL) add_compile_definitions(GGML_METAL_NDEBUG) endif() - # get full path to the file - #add_compile_definitions(GGML_METAL_DIR_KERNELS="${CMAKE_CURRENT_SOURCE_DIR}/") - # copy ggml-common.h and ggml-metal.metal to bin directory configure_file(ggml-common.h ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h COPYONLY) configure_file(ggml-metal.metal ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal COPYONLY) @@ -211,53 +208,62 @@ if (LLAMA_METAL) enable_language(ASM) add_compile_definitions(GGML_METAL_EMBED_LIBRARY) + set(METALLIB_COMMON "${CMAKE_CURRENT_SOURCE_DIR}/ggml-common.h") set(METALLIB_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal") + file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/autogenerated") - set(EMBED_METALLIB_ASSEMBLY "${CMAKE_BINARY_DIR}/autogenerated/ggml-embed-metallib.s") + + # merge ggml-common.h and ggml-metal.metal into a single file + set(METALLIB_EMBED_ASM "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.s") + set(METALLIB_SOURCE_EMBED "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.metal") add_custom_command( - OUTPUT ${EMBED_METALLIB_ASSEMBLY} - COMMAND echo ".section __DATA,__ggml_metallib" > ${EMBED_METALLIB_ASSEMBLY} - COMMAND echo ".globl _ggml_metallib_start" >> ${EMBED_METALLIB_ASSEMBLY} - COMMAND echo "_ggml_metallib_start:" >> ${EMBED_METALLIB_ASSEMBLY} - COMMAND echo ".incbin \\\"${METALLIB_SOURCE}\\\"" >> ${EMBED_METALLIB_ASSEMBLY} - COMMAND echo ".globl _ggml_metallib_end" >> ${EMBED_METALLIB_ASSEMBLY} - COMMAND echo "_ggml_metallib_end:" >> ${EMBED_METALLIB_ASSEMBLY} - DEPENDS ${METALLIB_SOURCE} + OUTPUT ${METALLIB_EMBED_ASM} + COMMAND echo "Embedding Metal library" + COMMAND sed -e '/\#include \"ggml-common.h\"/r ${METALLIB_COMMON}' -e '/\#include \"ggml-common.h\"/d' < ${METALLIB_SOURCE} > ${METALLIB_SOURCE_EMBED} + COMMAND echo ".section __DATA,__ggml_metallib" > ${METALLIB_EMBED_ASM} + COMMAND echo ".globl _ggml_metallib_start" >> ${METALLIB_EMBED_ASM} + COMMAND echo "_ggml_metallib_start:" >> ${METALLIB_EMBED_ASM} + COMMAND echo ".incbin \\\"${METALLIB_SOURCE_EMBED}\\\"" >> ${METALLIB_EMBED_ASM} + COMMAND echo ".globl _ggml_metallib_end" >> ${METALLIB_EMBED_ASM} + COMMAND echo "_ggml_metallib_end:" >> ${METALLIB_EMBED_ASM} + DEPENDS ggml-metal.metal ggml-common.h COMMENT "Generate assembly for embedded Metal library" ) - set(GGML_SOURCES_METAL ${GGML_SOURCES_METAL} ${EMBED_METALLIB_ASSEMBLY}) - endif() - - if (LLAMA_METAL_SHADER_DEBUG) - # custom command to do the following: - # xcrun -sdk macosx metal -fno-fast-math -c ggml-metal.metal -o ggml-metal.air - # xcrun -sdk macosx metallib ggml-metal.air -o default.metallib - # - # note: this is the only way I found to disable fast-math in Metal. it's ugly, but at least it works - # disabling fast math is needed in order to pass tests/test-backend-ops - # note: adding -fno-inline fixes the tests when using MTL_SHADER_VALIDATION=1 - # note: unfortunately, we have to call it default.metallib instead of ggml.metallib - # ref: https://github.com/ggerganov/whisper.cpp/issues/1720 - set(XC_FLAGS -fno-fast-math -fno-inline -g) - if (LLAMA_QKK_64) - set(XC_FLAGS ${XC_FLAGS} -DQK_K=64) + set(GGML_SOURCES_METAL ${GGML_SOURCES_METAL} ${METALLIB_EMBED_ASM}) + else() + if (LLAMA_METAL_SHADER_DEBUG) + # custom command to do the following: + # xcrun -sdk macosx metal -fno-fast-math -c ggml-metal.metal -o ggml-metal.air + # xcrun -sdk macosx metallib ggml-metal.air -o default.metallib + # + # note: this is the only way I found to disable fast-math in Metal. it's ugly, but at least it works + # disabling fast math is needed in order to pass tests/test-backend-ops + # note: adding -fno-inline fixes the tests when using MTL_SHADER_VALIDATION=1 + # note: unfortunately, we have to call it default.metallib instead of ggml.metallib + # ref: https://github.com/ggerganov/whisper.cpp/issues/1720 + set(XC_FLAGS -fno-fast-math -fno-inline -g) + else() + set(XC_FLAGS -O3) endif() add_custom_command( OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib COMMAND xcrun -sdk macosx metal ${XC_FLAGS} -c ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air COMMAND xcrun -sdk macosx metallib ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib - DEPENDS ggml-metal.metal + COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air + COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h + COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal + DEPENDS ggml-metal.metal ggml-common.h COMMENT "Compiling Metal kernels" - ) + ) add_custom_target( ggml-metal ALL DEPENDS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib - ) - endif() + ) + endif() # LLAMA_METAL_EMBED_LIBRARY set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${FOUNDATION_LIBRARY} diff --git a/Makefile b/Makefile index db9968efb..cb597b209 100644 --- a/Makefile +++ b/Makefile @@ -557,15 +557,16 @@ ggml-metal.o: ggml-metal.m ggml-metal.h $(CC) $(CFLAGS) -c $< -o $@ ifdef LLAMA_METAL_EMBED_LIBRARY -ggml-metal-embed.o: ggml-metal.metal +ggml-metal-embed.o: ggml-metal.metal ggml-common.h @echo "Embedding Metal library" + @sed -e '/#include "ggml-common.h"/r ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml-metal.metal > ggml-metal-embed.metal $(eval TEMP_ASSEMBLY=$(shell mktemp)) - @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY) - @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY) - @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY) - @echo ".incbin \"$<\"" >> $(TEMP_ASSEMBLY) - @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY) - @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY) + @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY) + @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY) + @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY) + @echo ".incbin \"ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY) + @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY) + @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY) @$(AS) $(TEMP_ASSEMBLY) -o $@ @rm -f ${TEMP_ASSEMBLY} endif diff --git a/ggml-metal.m b/ggml-metal.m index 3a5476c52..c3451a79b 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -280,6 +280,11 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) { id metal_library; // load library + // + // - first check if the library is embedded + // - then check if the library is in the bundle + // - if not found, load the source and compile it + // - if that fails, return NULL { NSBundle * bundle = nil; #ifdef SWIFT_PACKAGE @@ -287,12 +292,21 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) { #else bundle = [NSBundle bundleForClass:[GGMLMetalClass class]]; #endif + NSError * error = nil; - NSString * libPath = [bundle pathForResource:@"default" ofType:@"metallib"]; - if (libPath != nil) { + +#if GGML_METAL_EMBED_LIBRARY + const bool try_metallib = false; +#else + const bool try_metallib = true; +#endif + + NSString * path_lib = [bundle pathForResource:@"default" ofType:@"metallib"]; + if (try_metallib && path_lib != nil) { // pre-compiled library found - NSURL * libURL = [NSURL fileURLWithPath:libPath]; - GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [libPath UTF8String]); + NSURL * libURL = [NSURL fileURLWithPath:path_lib]; + GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [path_lib UTF8String]); + metal_library = [ctx->device newLibraryWithURL:libURL error:&error]; if (error) { GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]); @@ -305,31 +319,34 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) { extern const char ggml_metallib_start[]; extern const char ggml_metallib_end[]; - NSString * src = [[NSString alloc] initWithBytes:ggml_metallib_start length:(ggml_metallib_end-ggml_metallib_start) encoding:NSUTF8StringEncoding]; + NSString * src = [[NSString alloc] initWithBytes:ggml_metallib_start length:(ggml_metallib_end-ggml_metallib_start) encoding:NSUTF8StringEncoding]; #else GGML_METAL_LOG_INFO("%s: default.metallib not found, loading from source\n", __func__); - NSString * sourcePath; - NSString * ggmlMetalPathResources = [[NSProcessInfo processInfo].environment objectForKey:@"GGML_METAL_PATH_RESOURCES"]; + NSString * path_source; + NSString * path_resource = [[NSProcessInfo processInfo].environment objectForKey:@"GGML_METAL_PATH_RESOURCES"]; - GGML_METAL_LOG_INFO("%s: GGML_METAL_PATH_RESOURCES = %s\n", __func__, ggmlMetalPathResources ? [ggmlMetalPathResources UTF8String] : "nil"); + GGML_METAL_LOG_INFO("%s: GGML_METAL_PATH_RESOURCES = %s\n", __func__, path_resource ? [path_resource UTF8String] : "nil"); - if (ggmlMetalPathResources) { - sourcePath = [ggmlMetalPathResources stringByAppendingPathComponent:@"ggml-metal.metal"]; + if (path_resource) { + path_source = [path_resource stringByAppendingPathComponent:@"ggml-metal.metal"]; } else { - sourcePath = [bundle pathForResource:@"ggml-metal" ofType:@"metal"]; + path_source = [bundle pathForResource:@"ggml-metal" ofType:@"metal"]; } - if (sourcePath == nil) { + + if (path_source == nil) { GGML_METAL_LOG_WARN("%s: error: could not use bundle path to find ggml-metal.metal, falling back to trying cwd\n", __func__); - sourcePath = @"ggml-metal.metal"; + path_source = @"ggml-metal.metal"; } - GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [sourcePath UTF8String]); - NSString * src = [NSString stringWithContentsOfFile:sourcePath encoding:NSUTF8StringEncoding error:&error]; + + GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [path_source UTF8String]); + + NSString * src = [NSString stringWithContentsOfFile:path_source encoding:NSUTF8StringEncoding error:&error]; if (error) { GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]); return NULL; } -#endif +#endif // GGML_METAL_EMBED_LIBRARY @autoreleasepool { // dictionary of preprocessor macros diff --git a/ggml-metal.metal b/ggml-metal.metal index ebf2f5b47..63de56325 100644 --- a/ggml-metal.metal +++ b/ggml-metal.metal @@ -4,9 +4,6 @@ #include -#define GGML_COMMON_IMPL_METAL -#include "ggml-common.h" - using namespace metal; #define MAX(x, y) ((x) > (y) ? (x) : (y)) From 68265ebfc6a1bed022973ea0c3145be1450b7e70 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 14 Mar 2024 12:37:20 +0200 Subject: [PATCH 36/48] embedding : print all resulting embeddings (#899) --- examples/embedding/embedding.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index f390c4061..895469a31 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -167,9 +167,9 @@ int main(int argc, char ** argv) { float * out = emb + p * n_embd; batch_decode(ctx, batch, out, s, n_embd); - // print first 3 embeddings + // print the first part of the embeddings fprintf(stdout, "\n"); - for (int j = 0; j < std::min(3, n_prompts); j++) { + for (int j = 0; j < n_prompts; j++) { fprintf(stdout, "embedding %d: ", j); for (int i = 0; i < std::min(16, n_embd); i++) { fprintf(stdout, "%f ", emb[j * n_embd + i]); From 3fe8d7a17f84bd721cd4d8db35365da44b69f68b Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 14 Mar 2024 12:38:37 +0200 Subject: [PATCH 37/48] ggml : designate enum vals for integer types (#6050) --- ggml.h | 64 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/ggml.h b/ggml.h index 1171088a9..ab26c8f59 100644 --- a/ggml.h +++ b/ggml.h @@ -337,24 +337,24 @@ extern "C" { struct ggml_object; struct ggml_context; + // NOTE: always add types at the end of the enum to keep backward compatibility enum ggml_type { - GGML_TYPE_F32 = 0, - GGML_TYPE_F16 = 1, - GGML_TYPE_Q4_0 = 2, - GGML_TYPE_Q4_1 = 3, + GGML_TYPE_F32 = 0, + GGML_TYPE_F16 = 1, + GGML_TYPE_Q4_0 = 2, + GGML_TYPE_Q4_1 = 3, // GGML_TYPE_Q4_2 = 4, support has been removed - // GGML_TYPE_Q4_3 (5) support has been removed - GGML_TYPE_Q5_0 = 6, - GGML_TYPE_Q5_1 = 7, - GGML_TYPE_Q8_0 = 8, - GGML_TYPE_Q8_1 = 9, - // k-quantizations - GGML_TYPE_Q2_K = 10, - GGML_TYPE_Q3_K = 11, - GGML_TYPE_Q4_K = 12, - GGML_TYPE_Q5_K = 13, - GGML_TYPE_Q6_K = 14, - GGML_TYPE_Q8_K = 15, + // GGML_TYPE_Q4_3 = 5, support has been removed + GGML_TYPE_Q5_0 = 6, + GGML_TYPE_Q5_1 = 7, + GGML_TYPE_Q8_0 = 8, + GGML_TYPE_Q8_1 = 9, + GGML_TYPE_Q2_K = 10, + GGML_TYPE_Q3_K = 11, + GGML_TYPE_Q4_K = 12, + GGML_TYPE_Q5_K = 13, + GGML_TYPE_Q6_K = 14, + GGML_TYPE_Q8_K = 15, GGML_TYPE_IQ2_XXS = 16, GGML_TYPE_IQ2_XS = 17, GGML_TYPE_IQ3_XXS = 18, @@ -363,9 +363,9 @@ extern "C" { GGML_TYPE_IQ3_S = 21, GGML_TYPE_IQ2_S = 22, GGML_TYPE_IQ4_XS = 23, - GGML_TYPE_I8, - GGML_TYPE_I16, - GGML_TYPE_I32, + GGML_TYPE_I8 = 24, + GGML_TYPE_I16 = 25, + GGML_TYPE_I32 = 26, GGML_TYPE_COUNT, }; @@ -383,20 +383,20 @@ extern "C" { // model file types enum ggml_ftype { - GGML_FTYPE_UNKNOWN = -1, - GGML_FTYPE_ALL_F32 = 0, - GGML_FTYPE_MOSTLY_F16 = 1, // except 1d tensors - GGML_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors - GGML_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors + GGML_FTYPE_UNKNOWN = -1, + GGML_FTYPE_ALL_F32 = 0, + GGML_FTYPE_MOSTLY_F16 = 1, // except 1d tensors + GGML_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors + GGML_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors GGML_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16 - GGML_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors - GGML_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors - GGML_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors - GGML_FTYPE_MOSTLY_Q2_K = 10, // except 1d tensors - GGML_FTYPE_MOSTLY_Q3_K = 11, // except 1d tensors - GGML_FTYPE_MOSTLY_Q4_K = 12, // except 1d tensors - GGML_FTYPE_MOSTLY_Q5_K = 13, // except 1d tensors - GGML_FTYPE_MOSTLY_Q6_K = 14, // except 1d tensors + GGML_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors + GGML_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors + GGML_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors + GGML_FTYPE_MOSTLY_Q2_K = 10, // except 1d tensors + GGML_FTYPE_MOSTLY_Q3_K = 11, // except 1d tensors + GGML_FTYPE_MOSTLY_Q4_K = 12, // except 1d tensors + GGML_FTYPE_MOSTLY_Q5_K = 13, // except 1d tensors + GGML_FTYPE_MOSTLY_Q6_K = 14, // except 1d tensors GGML_FTYPE_MOSTLY_IQ2_XXS = 15, // except 1d tensors GGML_FTYPE_MOSTLY_IQ2_XS = 16, // except 1d tensors GGML_FTYPE_MOSTLY_IQ3_XXS = 17, // except 1d tensors From 3ca23481dd309bd51cc31c73a4cc34f922cc372f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20=C4=8Cert=C3=ADk?= Date: Thu, 14 Mar 2024 04:40:14 -0600 Subject: [PATCH 38/48] gguf-py : add support for I8, I16 and I32 (#6045) * Refactor dtype handling to be extensible This code is equivalent as before, but now it is prepared to easily add more NumPy dtypes. * Add support for I8, I16 and I32 These types are allowed in the GGUF specification. * Add support for I8, I16 and I32 to gguf_writer * Add support for I8, I16, I32 to gguf_reader --- gguf-py/gguf/constants.py | 6 ++++++ gguf-py/gguf/gguf_reader.py | 9 +++++++++ gguf-py/gguf/gguf_writer.py | 16 ++++++++++++---- 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index b23badb10..99f71f0a1 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -661,6 +661,9 @@ class GGMLQuantizationType(IntEnum): IQ3_S = 21 IQ2_S = 22 IQ4_XS = 23 + I8 = 24 + I16 = 25 + I32 = 26 class GGUFEndian(IntEnum): @@ -727,6 +730,9 @@ GGML_QUANT_SIZES = { GGMLQuantizationType.IQ3_S: (256, 2 + QK_K // 4 + QK_K // 8 + QK_K // 32 + 4), GGMLQuantizationType.IQ2_S: (256, 2 + QK_K // 4 + QK_K // 16), GGMLQuantizationType.IQ4_XS: (256, 2 + 2 + QK_K // 2 + QK_K // 64), + GGMLQuantizationType.I8: (1, 1), + GGMLQuantizationType.I16: (1, 2), + GGMLQuantizationType.I32: (1, 4), } diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index 5b6d4ba6b..1c10f5753 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -248,6 +248,15 @@ class GGUFReader: elif ggml_type == GGMLQuantizationType.F16: item_count = n_elems item_type = np.float16 + elif ggml_type == GGMLQuantizationType.I8: + item_count = n_elems + item_type = np.int8 + elif ggml_type == GGMLQuantizationType.I16: + item_count = n_elems + item_type = np.int16 + elif ggml_type == GGMLQuantizationType.I32: + item_count = n_elems + item_type = np.int32 else: item_count = n_bytes item_type = np.uint8 diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index e49c5db68..9c1eeac31 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -196,9 +196,6 @@ class GGUFWriter: if self.state is not WriterState.EMPTY: raise ValueError(f'Expected output file to be empty, got {self.state}') - if raw_dtype is None and tensor_dtype not in (np.float32, np.float16): - raise ValueError("Only F32 and F16 tensors are supported for now") - encoded_name = name.encode("utf8") self.ti_data += self._pack("Q", len(encoded_name)) self.ti_data += encoded_name @@ -207,7 +204,18 @@ class GGUFWriter: for i in range(n_dims): self.ti_data += self._pack("Q", tensor_shape[n_dims - 1 - i]) if raw_dtype is None: - dtype = GGMLQuantizationType.F32 if tensor_dtype == np.float32 else GGMLQuantizationType.F16 + if tensor_shape == np.float32: + dtype = GGMLQuantizationType.F32 + elif tensor_dtype == np.float16: + dtype = GGMLQuantizationType.F16 + elif tensor_dtype == np.int8: + dtype = GGMLQuantizationType.I8 + elif tensor_dtype == np.int16: + dtype = GGMLQuantizationType.I16 + elif tensor_dtype == np.int32: + dtype = GGMLQuantizationType.I32 + else: + raise ValueError("Only F32, F16, I8, I16, I32 tensors are supported for now") else: dtype = raw_dtype self.ti_data += self._pack("I", dtype) From 2c4fb69246834503db7b78bcbedcef506bbc60c4 Mon Sep 17 00:00:00 2001 From: Michael Podvitskiy Date: Thu, 14 Mar 2024 11:56:48 +0100 Subject: [PATCH 39/48] llama : optimize defrag moves + fix fragmentation calculation (#6037) * attempt to reduce the impact of a worst-case scenario * fragmentation calculation fix * Update llama.cpp --------- Co-authored-by: Georgi Gerganov --- llama.cpp | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/llama.cpp b/llama.cpp index 38e7036a7..ff467c575 100644 --- a/llama.cpp +++ b/llama.cpp @@ -9036,8 +9036,8 @@ static int llama_decode_internal( //llama_synchronize(&lctx); // decide if we need to defrag the kv cache - if (cparams.defrag_thold >= 0.0f) { - const float fragmentation = kv_self.n >= 128 ? 1.0f - float(kv_self.used + n_tokens_all)/float(kv_self.n) : 0.0f; + if (cparams.causal_attn && cparams.defrag_thold >= 0.0f) { + const float fragmentation = kv_self.n >= 128 ? 1.0f - float(kv_self.used)/float(kv_self.n) : 0.0f; // queue defragmentation for next llama_kv_cache_update if (fragmentation > cparams.defrag_thold) { @@ -9069,6 +9069,11 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) { // number of cells moved uint32_t n_moves = 0; + // each move requires 6*n_layer tensors (see build_defrag) + // - source view, destination view, copy operation + // - x2 for keys and values + const uint32_t max_moves = LLAMA_MAX_NODES/(6*n_layer); + // determine which KV cells to move where // // cell i moves to ids[i] @@ -9095,15 +9100,6 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) { nh++; } - // each move requires 6*n_layer tensors (see build_defrag) - // - source view, destination view, copy operation - // - x2 for keys and values - // - if (6*(n_moves + nh)*n_layer >= LLAMA_MAX_NODES) { - // the graph is too big, we cannot move more cells - break; - } - uint32_t nf = 0; uint32_t is = n_kv - 1; @@ -9133,11 +9129,19 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) { // are we moving a continuous block of memory? bool cont = false; + // should we stop searching for the next move? + bool stop = false; + // go back and move the nf cells to the hole for (; i1 < n_kv; ++i1) { auto & cell1 = kv_self.cells[i1]; if (cell1.is_empty() || ids[i1] != n_kv) { + if (n_moves == max_moves) { + stop = true; + break; + } + cont = false; continue; } @@ -9164,6 +9168,10 @@ static void llama_kv_cache_defrag_internal(struct llama_context & lctx) { } } + if (stop || n_moves == max_moves) { + break; + } + //LLAMA_LOG_INFO("(tmp log) KV defrag: move [%u, %u) to [%u, %u)\n", is, i1 + 1, i0, i0 + nh); i0 += nh - 1; From a44bc969e4cd62ca9f4332e17fe3c51f2093e7c6 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 14 Mar 2024 13:13:06 +0200 Subject: [PATCH 40/48] llama : fix typo --- llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index ff467c575..eb48b1e90 100644 --- a/llama.cpp +++ b/llama.cpp @@ -3932,7 +3932,7 @@ static void llm_load_print_meta(llama_model_loader & ml, llama_model & model) { LLAMA_LOG_INFO("%s: n_ff = %u\n", __func__, hparams.n_ff); LLAMA_LOG_INFO("%s: n_expert = %u\n", __func__, hparams.n_expert); LLAMA_LOG_INFO("%s: n_expert_used = %u\n", __func__, hparams.n_expert_used); - LLAMA_LOG_INFO("%s: causal attm = %d\n", __func__, hparams.causal_attn); + LLAMA_LOG_INFO("%s: causal attn = %d\n", __func__, hparams.causal_attn); LLAMA_LOG_INFO("%s: pooling type = %d\n", __func__, hparams.pooling_type); LLAMA_LOG_INFO("%s: rope type = %d\n", __func__, hparams.rope_type); LLAMA_LOG_INFO("%s: rope scaling = %s\n", __func__, rope_scaling_type); From 43241adf22e8231ffaf3827d2c9310cc0ffd5ac5 Mon Sep 17 00:00:00 2001 From: Pierrick Hymbert Date: Thu, 14 Mar 2024 12:15:39 +0100 Subject: [PATCH 41/48] server: disable debug release type sanitizer, simplify trigger (#6047) - increase time out for server - do not fail fast --- .github/workflows/server.yml | 17 ++++++++--------- examples/server/tests/features/steps/steps.py | 9 ++++++++- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index e385e03f3..5e38b3547 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -25,17 +25,14 @@ jobs: strategy: matrix: sanitizer: [ADDRESS, THREAD, UNDEFINED] - build_type: [Debug, Release] + build_type: [Debug] include: - build_type: Release sanitizer: "" - exclude: - - build_type: Release - sanitizer: ADDRESS - - build_type: Release + - build_type: Debug sanitizer: THREAD - - build_type: Release - sanitizer: UNDEFINED + disabled_on_pr: true + fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken container: image: ubuntu:latest @@ -81,13 +78,14 @@ jobs: - name: Tests id: server_integration_tests + if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }} run: | cd examples/server/tests PORT=8888 ./tests.sh - name: Slow tests id: server_integration_tests_slow - if: ${{ github.event.schedule != '' && matrix.build_type == 'Release' || github.event.inputs.slow_tests == 'true' }} + if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }} run: | cd examples/server/tests PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow @@ -124,13 +122,14 @@ jobs: - name: Tests id: server_integration_tests + if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }} run: | cd examples/server/tests behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp - name: Slow tests id: server_integration_tests_slow - if: ${{ github.event.schedule != '' || github.event.inputs.slow_tests == 'true' }} + if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }} run: | cd examples/server/tests behave.exe --stop --no-skipped --no-capture --tags slow diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index cfa9f96ec..a59a52d21 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -119,6 +119,10 @@ def step_server_metrics(context): def step_start_server(context): start_server_background(context) attempts = 0 + max_attempts = 20 + if 'GITHUB_ACTIONS' in os.environ: + max_attempts *= 2 + while True: with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: result = sock.connect_ex((context.server_fqdn, context.server_port)) @@ -126,7 +130,7 @@ def step_start_server(context): print("\x1b[33;46mserver started!\x1b[0m") return attempts += 1 - if attempts > 20: + if attempts > max_attempts: assert False, "server not started" print(f"waiting for server to start, connect error code = {result}...") time.sleep(0.1) @@ -943,6 +947,9 @@ async def wait_for_health_status(context, print(f"Starting checking for health for expected_health_status={expected_health_status}\n") interval = 0.5 counter = 0 + if 'GITHUB_ACTIONS' in os.environ: + timeout *= 2 + async with aiohttp.ClientSession() as session: while True: async with await session.get(f'{base_url}/health', params=params) as health_response: From 15a333260ab637a040ed0864c206a2ceaf806bb8 Mon Sep 17 00:00:00 2001 From: Jian Liao Date: Thu, 14 Mar 2024 04:18:23 -0700 Subject: [PATCH 42/48] readme : improve readme for Llava-1.6 example (#6044) Co-authored-by: Jian Liao --- examples/llava/README.md | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/examples/llava/README.md b/examples/llava/README.md index 35e6d9e5d..67cb0f22b 100644 --- a/examples/llava/README.md +++ b/examples/llava/README.md @@ -63,12 +63,20 @@ Now both the LLaMA part and the image encoder is in the `llava-v1.5-7b` director ```console git clone https://huggingface.co/liuhaotian/llava-v1.6-vicuna-7b ``` -2) Use `llava-surgery-v2.py` which also supports llava-1.5 variants pytorch as well as safetensor models: + +2) Install the required Python packages: + +```sh +pip install -r examples/llava/requirements.txt +``` + +3) Use `llava-surgery-v2.py` which also supports llava-1.5 variants pytorch as well as safetensor models: ```console python examples/llava/llava-surgery-v2.py -C -m ../llava-v1.6-vicuna-7b/ ``` - you will find a llava.projector and a llava.clip file in your model directory -3) Copy the llava.clip file into a subdirectory (like vit), rename it to pytorch_model.bin and add a fitting vit configuration to the directory: + +4) Copy the llava.clip file into a subdirectory (like vit), rename it to pytorch_model.bin and add a fitting vit configuration to the directory: ```console mkdir vit cp ../llava-v1.6-vicuna-7b/llava.clip vit/pytorch_model.bin @@ -76,18 +84,18 @@ cp ../llava-v1.6-vicuna-7b/llava.projector vit/ curl -s -q https://huggingface.co/cmp-nct/llava-1.6-gguf/raw/main/config_vit.json -o vit/config.json ``` -4) Create the visual gguf model: +5) Create the visual gguf model: ```console python ./examples/llava/convert-image-encoder-to-gguf.py -m vit --llava-projector vit/llava.projector --output-dir vit --clip-model-is-vision ``` - This is similar to llava-1.5, the difference is that we tell the encoder that we are working with the pure vision model part of CLIP -5) Then convert the model to gguf format: +6) Then convert the model to gguf format: ```console python ./convert.py ../llava-v1.6-vicuna-7b/ --skip-unknown ``` -6) And finally we can run the llava-cli using the 1.6 model version: +7) And finally we can run the llava-cli using the 1.6 model version: ```console ./llava-cli -m ../llava-v1.6-vicuna-7b/ggml-model-f16.gguf --mmproj vit/mmproj-model-f16.gguf --image some-image.jpg -c 4096 ``` From 77178eedc83d49f31bf757d8e12315d76460be78 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 14 Mar 2024 13:32:14 +0200 Subject: [PATCH 43/48] gguf-py : fix dtype check (#6045) --- gguf-py/gguf/gguf_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 9c1eeac31..4d389be95 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -204,7 +204,7 @@ class GGUFWriter: for i in range(n_dims): self.ti_data += self._pack("Q", tensor_shape[n_dims - 1 - i]) if raw_dtype is None: - if tensor_shape == np.float32: + if tensor_dtype == np.float32: dtype = GGMLQuantizationType.F32 elif tensor_dtype == np.float16: dtype = GGMLQuantizationType.F16 From 044ec4b2a567f649459ccd20af2f387c784faa51 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 14 Mar 2024 15:14:14 +0200 Subject: [PATCH 44/48] embedding : add EOS token if not present (#899) --- examples/embedding/embedding.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 895469a31..cbf9aa2b5 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -112,13 +112,20 @@ int main(int argc, char ** argv) { // tokenize the prompts and trim std::vector> inputs; for (const auto & prompt : prompts) { - auto inp = ::llama_tokenize(ctx, prompt, true); + auto inp = ::llama_tokenize(ctx, prompt, true, false); if (inp.size() > n_batch) { inp.resize(n_batch); } inputs.push_back(inp); } + // add eos if not present + for (auto & inp : inputs) { + if (inp.empty() || inp.back() != llama_token_eos(model)) { + inp.push_back(llama_token_eos(model)); + } + } + // tokenization stats if (params.verbose_prompt) { for (int i = 0; i < (int) inputs.size(); i++) { @@ -172,7 +179,7 @@ int main(int argc, char ** argv) { for (int j = 0; j < n_prompts; j++) { fprintf(stdout, "embedding %d: ", j); for (int i = 0; i < std::min(16, n_embd); i++) { - fprintf(stdout, "%f ", emb[j * n_embd + i]); + fprintf(stdout, "%9.6f ", emb[j * n_embd + i]); } fprintf(stdout, "\n"); } From 69ff61397d2b7b550dcdda4a35b35128892408b0 Mon Sep 17 00:00:00 2001 From: Michael Podvitskiy Date: Thu, 14 Mar 2024 17:21:56 +0100 Subject: [PATCH 45/48] llama : support models without vocabulary (#5798) * additional methods to read model and ctx parameters * vocab size as a part of a model metadata * models without vocabulary, convert.py part * models without vocabulary, llama.cpp part * PR clean up * converter scrypt fixes * llama_vocab_type update (renamed the new key) * pr review fixes * revert function renaming * one more NoVocab assert --- convert.py | 126 +++++++++++++++++++++--------------- gguf-py/gguf/constants.py | 2 + gguf-py/gguf/gguf_writer.py | 3 + llama.cpp | 92 +++++++++++++++++--------- llama.h | 7 +- 5 files changed, 142 insertions(+), 88 deletions(-) diff --git a/convert.py b/convert.py index c15f8c47e..161430f3e 100755 --- a/convert.py +++ b/convert.py @@ -332,6 +332,9 @@ class Params: # class BpeVocab: + tokenizer_model = "gpt2" + name = "bpe" + def __init__(self, fname_tokenizer: Path, fname_added_tokens: Path | None) -> None: self.bpe_tokenizer = json.loads(open(str(fname_tokenizer), encoding="utf-8").read()) if isinstance(self.bpe_tokenizer.get('model'), dict): @@ -390,6 +393,9 @@ class BpeVocab: class SentencePieceVocab: + tokenizer_model = "llama" + name = "spm" + def __init__(self, fname_tokenizer: Path, fname_added_tokens: Path | None) -> None: self.sentencepiece_tokenizer = SentencePieceProcessor(str(fname_tokenizer)) added_tokens: dict[str, int] @@ -453,6 +459,9 @@ class SentencePieceVocab: class HfVocab: + tokenizer_model = "llama" + name = "hfft" + def __init__(self, fname_tokenizer: Path, fname_added_tokens: Path | None = None) -> None: try: from transformers import AutoTokenizer @@ -553,7 +562,15 @@ class HfVocab: return f"" -Vocab: TypeAlias = "BpeVocab | SentencePieceVocab | HfVocab" +class NoVocab: + tokenizer_model = "no_vocab" + name = "no_vocab" + + def __repr__(self) -> str: + return "" + + +Vocab: TypeAlias = "BpeVocab | SentencePieceVocab | HfVocab | NoVocab" # @@ -935,8 +952,10 @@ def check_vocab_size(params: Params, vocab: Vocab, pad_vocab: bool = False) -> N # Handle special case where the model's vocab size is not set if params.n_vocab == -1: raise ValueError( - f"The model's vocab size is set to -1 in params.json. Please update it manually. Maybe {vocab.vocab_size}?" + f"The model's vocab size is set to -1 in params.json. Please update it manually.{f' Maybe {vocab.vocab_size}?' if hasattr(vocab, 'vocab_size') else ''}" ) + if isinstance(vocab, NoVocab): + return # model has no vocab # Check for a vocab size mismatch if params.n_vocab == vocab.vocab_size: @@ -977,6 +996,7 @@ class OutputFile: name = str(params.path_model.parent).split('/')[-1] self.gguf.add_name (name) + self.gguf.add_vocab_size (params.n_vocab) self.gguf.add_context_length (params.n_ctx) self.gguf.add_embedding_length (params.n_embd) self.gguf.add_block_count (params.n_layer) @@ -1013,21 +1033,9 @@ class OutputFile: if params.ftype is not None: self.gguf.add_file_type(params.ftype) - def handle_tokenizer_model(self, vocab: Vocab) -> str: - # Map the vocab types to the supported tokenizer models - tokenizer_model = { - SentencePieceVocab: "llama", - HfVocab: "llama", - BpeVocab: "gpt2", - }.get(type(vocab)) - - # Block if vocab type is not predefined - if tokenizer_model is None: - raise ValueError("Unknown vocab type: Not supported") - - return tokenizer_model - def extract_vocabulary_from_model(self, vocab: Vocab) -> tuple[list[bytes], list[float], list[gguf.TokenType]]: + assert not isinstance(vocab, NoVocab) + tokens = [] scores = [] toktypes = [] @@ -1043,11 +1051,8 @@ class OutputFile: return tokens, scores, toktypes def add_meta_vocab(self, vocab: Vocab) -> None: - # Handle the tokenizer model - tokenizer_model = self.handle_tokenizer_model(vocab) - # Ensure that tokenizer_model is added to the GGUF model - self.gguf.add_tokenizer_model(tokenizer_model) + self.gguf.add_tokenizer_model(vocab.tokenizer_model) # Extract model vocabulary for model conversion tokens, scores, toktypes = self.extract_vocabulary_from_model(vocab) @@ -1074,6 +1079,26 @@ class OutputFile: def write_tensor_info(self) -> None: self.gguf.write_ti_data_to_file() + def write_tensor_data(self, ftype: GGMLFileType, model: LazyModel, concurrency: int) -> None: + ndarrays_inner = bounded_parallel_map(OutputFile.do_item, model.items(), concurrency=concurrency) + if ftype == GGMLFileType.MostlyQ8_0: + ndarrays = bounded_parallel_map( + OutputFile.maybe_do_quantize, ndarrays_inner, concurrency=concurrency, max_workers=concurrency, + use_processpool_executor=True, + ) + else: + ndarrays = map(OutputFile.maybe_do_quantize, ndarrays_inner) + + start = time.time() + for i, ((name, lazy_tensor), ndarray) in enumerate(zip(model.items(), ndarrays)): + elapsed = time.time() - start + size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape) + padi = len(str(len(model))) + print( + f"[{i + 1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type.name:4} | T+{int(elapsed):4}" + ) + self.gguf.write_tensor_data(ndarray) + def close(self) -> None: self.gguf.close() @@ -1082,7 +1107,7 @@ class OutputFile: fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE, pad_vocab: bool = False, ) -> None: - check_vocab_size(params, vocab, pad_vocab = pad_vocab) + check_vocab_size(params, vocab, pad_vocab=pad_vocab) of = OutputFile(fname_out, endianess=endianess) @@ -1120,8 +1145,11 @@ class OutputFile: # meta data of.add_meta_arch(params) - of.add_meta_vocab(vocab) - of.add_meta_special_vocab(svocab) + if isinstance(vocab, NoVocab): + of.gguf.add_tokenizer_model(vocab.tokenizer_model) + else: + of.add_meta_vocab(vocab) + of.add_meta_special_vocab(svocab) # tensor info for name, lazy_tensor in model.items(): @@ -1131,24 +1159,7 @@ class OutputFile: of.write_tensor_info() # tensor data - ndarrays_inner = bounded_parallel_map(OutputFile.do_item, model.items(), concurrency = concurrency) - if ftype == GGMLFileType.MostlyQ8_0: - ndarrays = bounded_parallel_map( - OutputFile.maybe_do_quantize, ndarrays_inner, concurrency=concurrency, max_workers=concurrency, - use_processpool_executor=True, - ) - else: - ndarrays = map(OutputFile.maybe_do_quantize, ndarrays_inner) - - start = time.time() - for i, ((name, lazy_tensor), ndarray) in enumerate(zip(model.items(), ndarrays)): - elapsed = time.time() - start - size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape) - padi = len(str(len(model))) - print( - f"[{i+1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type.name:4} | T+{int(elapsed):4}" - ) - of.gguf.write_tensor_data(ndarray) + of.write_tensor_data(ftype, model, concurrency) of.close() @@ -1309,8 +1320,8 @@ class VocabFactory: return vtype, path raise FileNotFoundError(f"Could not find any of {[self._FILES[vt] for vt in vocab_types]}") - def _create_special_vocab(self, vocab: Vocab, vocabtype: str, model_parent_path: Path) -> gguf.SpecialVocab: - load_merges = vocabtype == "bpe" + def _create_special_vocab(self, vocab: Vocab, model_parent_path: Path) -> gguf.SpecialVocab: + load_merges = vocab.name == "bpe" n_vocab = vocab.vocab_size if hasattr(vocab, "vocab_size") else None return gguf.SpecialVocab( model_parent_path, @@ -1319,30 +1330,34 @@ class VocabFactory: n_vocab=n_vocab, ) - def load_vocab(self, vocab_types: list[str], model_parent_path: Path) -> tuple[Vocab, gguf.SpecialVocab]: + def _create_vocab_by_path(self, vocab_types: list[str]) -> Vocab: vocab_type, path = self._select_file(vocab_types) print(f"Loading vocab file {path!r}, type {vocab_type!r}") added_tokens_path = path.parent / "added_tokens.json" - vocab: Vocab if vocab_type == "bpe": - vocab = BpeVocab( + return BpeVocab( path, added_tokens_path if added_tokens_path.exists() else None ) - elif vocab_type == "spm": - vocab = SentencePieceVocab( + if vocab_type == "spm": + return SentencePieceVocab( path, added_tokens_path if added_tokens_path.exists() else None ) - elif vocab_type == "hfft": - vocab = HfVocab( + if vocab_type == "hfft": + return HfVocab( path.parent, added_tokens_path if added_tokens_path.exists() else None ) + raise ValueError(vocab_type) + + def load_vocab(self, vocab_types: list[str], model_parent_path: Path) -> tuple[Vocab, gguf.SpecialVocab]: + vocab: Vocab + if len(vocab_types) == 1 and "no_vocab" in vocab_types: + vocab = NoVocab() else: - raise ValueError(vocab_type) + vocab = self._create_vocab_by_path(vocab_types) # FIXME: Respect --vocab-dir? special_vocab = self._create_special_vocab( vocab, - vocab_type, model_parent_path, ) return vocab, special_vocab @@ -1380,6 +1395,7 @@ def main(args_in: list[str] | None = None) -> None: parser.add_argument("--dump", action="store_true", help="don't convert, just show what's in the model") parser.add_argument("--dump-single", action="store_true", help="don't convert, just show what's in a single model file") parser.add_argument("--vocab-only", action="store_true", help="extract only the vocab") + parser.add_argument("--no-vocab", action="store_true", help="store model without the vocab") parser.add_argument("--outtype", choices=output_choices, help="output format - note: q8_0 may be very slow (default: f16 or f32 based on input)") parser.add_argument("--vocab-dir", type=Path, help="directory containing tokenizer.model, if separate from model file") parser.add_argument("--vocab-type", help="vocab types to try in order, choose from 'spm', 'bpe', 'hfft' (default: spm,hfft)", default="spm,hfft") @@ -1392,6 +1408,10 @@ def main(args_in: list[str] | None = None) -> None: parser.add_argument("--skip-unknown", action="store_true", help="skip unknown tensor names instead of failing") args = parser.parse_args(args_in) + if args.no_vocab: + if args.vocab_only: + raise ValueError("no need to specify --vocab-only if using --no-vocab") + args.vocab_type = "no_vocab" if args.dump_single: model_plus = lazy_load_file(args.model) @@ -1442,7 +1462,7 @@ def main(args_in: list[str] | None = None) -> None: print(f"Wrote {outfile}") return - if model_plus.vocab is not None and args.vocab_dir is None: + if model_plus.vocab is not None and args.vocab_dir is None and not args.no_vocab: vocab = model_plus.vocab print(f"Vocab info: {vocab}") diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 99f71f0a1..2d7cf16c1 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -32,6 +32,7 @@ class Keys: FILE_TYPE = "general.file_type" class LLM: + VOCAB_SIZE = "{arch}.vocab_size" CONTEXT_LENGTH = "{arch}.context_length" EMBEDDING_LENGTH = "{arch}.embedding_length" BLOCK_COUNT = "{arch}.block_count" @@ -752,6 +753,7 @@ KEY_GENERAL_SOURCE_HF_REPO = Keys.General.SOURCE_HF_REPO KEY_GENERAL_FILE_TYPE = Keys.General.FILE_TYPE # LLM +KEY_VOCAB_SIZE = Keys.LLM.VOCAB_SIZE KEY_CONTEXT_LENGTH = Keys.LLM.CONTEXT_LENGTH KEY_EMBEDDING_LENGTH = Keys.LLM.EMBEDDING_LENGTH KEY_BLOCK_COUNT = Keys.LLM.BLOCK_COUNT diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 4d389be95..81b2eb884 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -321,6 +321,9 @@ class GGUFWriter: self.data_alignment = alignment self.add_uint32(Keys.General.ALIGNMENT, alignment) + def add_vocab_size(self, size: int) -> None: + self.add_uint32(Keys.LLM.VOCAB_SIZE.format(arch=self.arch), size) + def add_context_length(self, length: int) -> None: self.add_uint32(Keys.LLM.CONTEXT_LENGTH.format(arch=self.arch), length) diff --git a/llama.cpp b/llama.cpp index eb48b1e90..10fd53469 100644 --- a/llama.cpp +++ b/llama.cpp @@ -258,6 +258,7 @@ enum llm_kv { LLM_KV_GENERAL_SOURCE_URL, LLM_KV_GENERAL_SOURCE_HF_REPO, + LLM_KV_VOCAB_SIZE, LLM_KV_CONTEXT_LENGTH, LLM_KV_EMBEDDING_LENGTH, LLM_KV_BLOCK_COUNT, @@ -321,6 +322,7 @@ static const std::map LLM_KV_NAMES = { { LLM_KV_GENERAL_SOURCE_URL, "general.source.url" }, { LLM_KV_GENERAL_SOURCE_HF_REPO, "general.source.huggingface.repository" }, + { LLM_KV_VOCAB_SIZE, "%s.vocab_size" }, { LLM_KV_CONTEXT_LENGTH, "%s.context_length" }, { LLM_KV_EMBEDDING_LENGTH, "%s.embedding_length" }, { LLM_KV_BLOCK_COUNT, "%s.block_count" }, @@ -3242,10 +3244,11 @@ static const char * llama_model_type_name(e_model type) { static const char * llama_model_vocab_type_name(enum llama_vocab_type type){ switch (type) { - case LLAMA_VOCAB_TYPE_SPM: return "SPM"; - case LLAMA_VOCAB_TYPE_BPE: return "BPE"; - case LLAMA_VOCAB_TYPE_WPM: return "WPM"; - default: return "unknown"; + case LLAMA_VOCAB_TYPE_NONE: return "no vocab"; + case LLAMA_VOCAB_TYPE_SPM: return "SPM"; + case LLAMA_VOCAB_TYPE_BPE: return "BPE"; + case LLAMA_VOCAB_TYPE_WPM: return "WPM"; + default: return "unknown"; } } @@ -3277,14 +3280,14 @@ static void llm_load_hparams( ml.get_key(LLM_KV_GENERAL_NAME, model.name, false); // get hparams kv - ml.get_arr_n(LLM_KV_TOKENIZER_LIST, hparams.n_vocab); - ml.get_key (LLM_KV_CONTEXT_LENGTH, hparams.n_ctx_train); - ml.get_key (LLM_KV_EMBEDDING_LENGTH, hparams.n_embd); - ml.get_key (LLM_KV_FEED_FORWARD_LENGTH, hparams.n_ff); - ml.get_key (LLM_KV_ATTENTION_HEAD_COUNT, hparams.n_head); - ml.get_key (LLM_KV_BLOCK_COUNT, hparams.n_layer); - ml.get_key (LLM_KV_EXPERT_COUNT, hparams.n_expert, false); - ml.get_key (LLM_KV_EXPERT_USED_COUNT, hparams.n_expert_used, false); + ml.get_key(LLM_KV_VOCAB_SIZE, hparams.n_vocab, false) || ml.get_arr_n(LLM_KV_TOKENIZER_LIST, hparams.n_vocab); + ml.get_key(LLM_KV_CONTEXT_LENGTH, hparams.n_ctx_train); + ml.get_key(LLM_KV_EMBEDDING_LENGTH, hparams.n_embd); + ml.get_key(LLM_KV_FEED_FORWARD_LENGTH, hparams.n_ff); + ml.get_key(LLM_KV_ATTENTION_HEAD_COUNT, hparams.n_head); + ml.get_key(LLM_KV_BLOCK_COUNT, hparams.n_layer); + ml.get_key(LLM_KV_EXPERT_COUNT, hparams.n_expert, false); + ml.get_key(LLM_KV_EXPERT_USED_COUNT, hparams.n_expert_used, false); GGML_ASSERT(hparams.n_expert <= LLAMA_MAX_EXPERTS); GGML_ASSERT(hparams.n_expert_used <= hparams.n_expert); @@ -3645,30 +3648,25 @@ static void llm_load_vocab( const auto kv = LLM_KV(model.arch); - const int token_idx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_LIST).c_str()); - if (token_idx == -1) { - throw std::runtime_error("cannot find tokenizer vocab in model file\n"); - } - - const float * scores = nullptr; - const int score_idx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_SCORES).c_str()); - if (score_idx != -1) { - scores = (const float * ) gguf_get_arr_data(ctx, score_idx); - } - - const int * toktypes = nullptr; - const int toktype_idx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_TOKEN_TYPE).c_str()); - if (toktype_idx != -1) { - toktypes = (const int * ) gguf_get_arr_data(ctx, toktype_idx); - } - // determine vocab type { std::string tokenizer_name; ml.get_key(LLM_KV_TOKENIZER_MODEL, tokenizer_name); - if (tokenizer_name == "llama") { + if (tokenizer_name == "no_vocab") { + vocab.type = LLAMA_VOCAB_TYPE_NONE; + + // default special tokens + vocab.special_bos_id = -1; + vocab.special_eos_id = -1; + vocab.special_unk_id = -1; + vocab.special_sep_id = -1; + vocab.special_pad_id = -1; + vocab.linefeed_id = -1; + + return; + } else if (tokenizer_name == "llama") { vocab.type = LLAMA_VOCAB_TYPE_SPM; // default special tokens @@ -3734,6 +3732,23 @@ static void llm_load_vocab( } } + const int token_idx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_LIST).c_str()); + if (token_idx == -1) { + throw std::runtime_error("cannot find tokenizer vocab in model file\n"); + } + + const float * scores = nullptr; + const int score_idx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_SCORES).c_str()); + if (score_idx != -1) { + scores = (const float * ) gguf_get_arr_data(ctx, score_idx); + } + + const int * toktypes = nullptr; + const int toktype_idx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_TOKEN_TYPE).c_str()); + if (toktype_idx != -1) { + toktypes = (const int * ) gguf_get_arr_data(ctx, toktype_idx); + } + const uint32_t n_vocab = gguf_get_arr_n(ctx, token_idx); vocab.id_to_token.resize(n_vocab); @@ -5023,7 +5038,8 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam llm_load_print_meta(ml, model); - if (model.hparams.n_vocab != model.vocab.id_to_token.size()) { + if (model.vocab.type != LLAMA_VOCAB_TYPE_NONE && + model.hparams.n_vocab != model.vocab.id_to_token.size()) { throw std::runtime_error("vocab size mismatch"); } @@ -9361,26 +9377,32 @@ static enum llama_vocab_type llama_vocab_get_type(const llama_vocab & vocab) { } static bool llama_is_normal_token(const llama_vocab & vocab, llama_token id) { + GGML_ASSERT(vocab.type != LLAMA_VOCAB_TYPE_NONE); return vocab.id_to_token[id].type == LLAMA_TOKEN_TYPE_NORMAL; } static bool llama_is_unknown_token(const llama_vocab & vocab, llama_token id) { + GGML_ASSERT(vocab.type != LLAMA_VOCAB_TYPE_NONE); return vocab.id_to_token[id].type == LLAMA_TOKEN_TYPE_UNKNOWN; } static bool llama_is_control_token(const llama_vocab & vocab, llama_token id) { + GGML_ASSERT(vocab.type != LLAMA_VOCAB_TYPE_NONE); return vocab.id_to_token[id].type == LLAMA_TOKEN_TYPE_CONTROL; } static bool llama_is_byte_token(const llama_vocab & vocab, llama_token id) { + GGML_ASSERT(vocab.type != LLAMA_VOCAB_TYPE_NONE); return vocab.id_to_token[id].type == LLAMA_TOKEN_TYPE_BYTE; } static bool llama_is_user_defined_token(const llama_vocab& vocab, llama_token id) { + GGML_ASSERT(vocab.type != LLAMA_VOCAB_TYPE_NONE); return vocab.id_to_token[id].type == LLAMA_TOKEN_TYPE_USER_DEFINED; } static uint8_t llama_token_to_byte(const llama_vocab& vocab, llama_token id) { + GGML_ASSERT(llama_vocab_get_type(vocab) != LLAMA_VOCAB_TYPE_NONE); GGML_ASSERT(llama_is_byte_token(vocab, id)); const auto& token_data = vocab.id_to_token.at(id); switch (llama_vocab_get_type(vocab)) { @@ -9401,6 +9423,7 @@ static uint8_t llama_token_to_byte(const llama_vocab& vocab, llama_token id) { } static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch) { + GGML_ASSERT(llama_vocab_get_type(vocab) != LLAMA_VOCAB_TYPE_NONE); static const char * hex = "0123456789ABCDEF"; switch (llama_vocab_get_type(vocab)) { case LLAMA_VOCAB_TYPE_SPM: { @@ -10232,6 +10255,8 @@ static std::vector llama_tokenize_internal(const llama_vocab & } } } break; + case LLAMA_VOCAB_TYPE_NONE: + GGML_ASSERT(false); } return output; @@ -13138,7 +13163,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) { } int32_t llama_n_vocab(const struct llama_model * model) { - return model->vocab.id_to_token.size(); + return model->hparams.n_vocab; } int32_t llama_n_ctx_train(const struct llama_model * model) { @@ -13962,14 +13987,17 @@ float * llama_get_embeddings_seq(struct llama_context * ctx, llama_seq_id seq_id } const char * llama_token_get_text(const struct llama_model * model, llama_token token) { + GGML_ASSERT(model->vocab.type != LLAMA_VOCAB_TYPE_NONE); return model->vocab.id_to_token[token].text.c_str(); } float llama_token_get_score(const struct llama_model * model, llama_token token) { + GGML_ASSERT(model->vocab.type != LLAMA_VOCAB_TYPE_NONE); return model->vocab.id_to_token[token].score; } llama_token_type llama_token_get_type(const struct llama_model * model, llama_token token) { + GGML_ASSERT(model->vocab.type != LLAMA_VOCAB_TYPE_NONE); return model->vocab.id_to_token[token].type; } diff --git a/llama.h b/llama.h index 2d16cc9b9..90aa5372e 100644 --- a/llama.h +++ b/llama.h @@ -59,9 +59,10 @@ extern "C" { typedef int32_t llama_seq_id; enum llama_vocab_type { - LLAMA_VOCAB_TYPE_SPM = 0, // SentencePiece - LLAMA_VOCAB_TYPE_BPE = 1, // Byte Pair Encoding - LLAMA_VOCAB_TYPE_WPM = 2, // WordPiece + LLAMA_VOCAB_TYPE_NONE = 0, // For models without vocab + LLAMA_VOCAB_TYPE_SPM = 1, // SentencePiece + LLAMA_VOCAB_TYPE_BPE = 2, // Byte Pair Encoding + LLAMA_VOCAB_TYPE_WPM = 3, // WordPiece }; // note: these values should be synchronized with ggml_rope From 727107707a73b3dc8a497cf9fc9405722c16dd2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20=C4=8Cert=C3=ADk?= Date: Thu, 14 Mar 2024 11:57:31 -0600 Subject: [PATCH 46/48] gguf-py : bump version to 0.8.0 (#6060) --- gguf-py/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gguf-py/pyproject.toml b/gguf-py/pyproject.toml index 9789c2c87..96396e04e 100644 --- a/gguf-py/pyproject.toml +++ b/gguf-py/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "gguf" -version = "0.7.0" +version = "0.8.0" description = "Read and write ML models in GGUF for GGML" authors = ["GGML "] packages = [ From 6e0438da3cc95b89cdbf55f45fa4e324d9076792 Mon Sep 17 00:00:00 2001 From: Steve Grubb Date: Thu, 14 Mar 2024 14:29:32 -0400 Subject: [PATCH 47/48] gguf : fix resource leaks (#6061) There several places where a gguf context is allocated. A call to gguf_free is missing in some error paths. Also on linux, llama-bench was missing a fclose. --- examples/gguf/gguf.cpp | 1 + examples/llama-bench/llama-bench.cpp | 1 + examples/llava/clip.cpp | 4 ++++ examples/train-text-from-scratch/train-text-from-scratch.cpp | 1 + 4 files changed, 7 insertions(+) diff --git a/examples/gguf/gguf.cpp b/examples/gguf/gguf.cpp index e67be4fb2..5444503a5 100644 --- a/examples/gguf/gguf.cpp +++ b/examples/gguf/gguf.cpp @@ -211,6 +211,7 @@ static bool gguf_ex_read_1(const std::string & fname) { for (int j = 0; j < ggml_nelements(cur); ++j) { if (data[j] != 100 + i) { fprintf(stderr, "%s: tensor[%d]: data[%d] = %f\n", __func__, i, j, data[j]); + gguf_free(ctx); return false; } } diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index bf94e7e7a..d6e5e0497 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -103,6 +103,7 @@ static std::string get_cpu_info() { } } } + fclose(f); } #endif // TODO: other platforms diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index 6653b815d..2035554ea 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -995,6 +995,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { if (!new_clip->ctx_data) { fprintf(stderr, "%s: ggml_init() failed\n", __func__); clip_free(new_clip); + gguf_free(ctx); return nullptr; } @@ -1002,6 +1003,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { if (!fin) { printf("cannot open model file for loading tensors\n"); clip_free(new_clip); + gguf_free(ctx); return nullptr; } @@ -1023,6 +1025,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { if (!fin) { printf("%s: failed to seek for tensor %s\n", __func__, name); clip_free(new_clip); + gguf_free(ctx); return nullptr; } int num_bytes = ggml_nbytes(cur); @@ -1908,6 +1911,7 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i break; default: printf("Please use an input file in f32 or f16\n"); + gguf_free(ctx_out); return false; } diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp index 7eafe8515..7d06e401b 100644 --- a/examples/train-text-from-scratch/train-text-from-scratch.cpp +++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp @@ -711,6 +711,7 @@ static bool load_checkpoint_file(const char * filename, struct my_llama_model * load_checkpoint_gguf(fctx, f_ggml_ctx, model, train); + gguf_free(fctx); return true; } From 4755afd1cbd40d93c017e5b98c39796f52345314 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 14 Mar 2024 22:58:41 +0200 Subject: [PATCH 48/48] llama : fix integer overflow during quantization (#6063) --- llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index 10fd53469..2c3841974 100644 --- a/llama.cpp +++ b/llama.cpp @@ -11977,7 +11977,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n return new_type; } -static int32_t llama_tensor_quantize_internal(enum ggml_type new_type, const float * f32_data, void * new_data, const int chunk_size, int nrows, int n_per_row, const float * imatrix, std::vector & workers, const int nthread) { +static size_t llama_tensor_quantize_internal(enum ggml_type new_type, const float * f32_data, void * new_data, const int chunk_size, int nrows, int n_per_row, const float * imatrix, std::vector & workers, const int nthread) { std::mutex mutex; int counter = 0; size_t new_size = 0;