Merge branch 'master' into hp/server/bench/init
This commit is contained in:
commit
5d25f74821
38 changed files with 2904 additions and 3610 deletions
|
@ -199,7 +199,8 @@ if (LLAMA_METAL)
|
||||||
# get full path to the file
|
# get full path to the file
|
||||||
#add_compile_definitions(GGML_METAL_DIR_KERNELS="${CMAKE_CURRENT_SOURCE_DIR}/")
|
#add_compile_definitions(GGML_METAL_DIR_KERNELS="${CMAKE_CURRENT_SOURCE_DIR}/")
|
||||||
|
|
||||||
# copy ggml-metal.metal to bin directory
|
# copy ggml-common.h and ggml-metal.metal to bin directory
|
||||||
|
configure_file(ggml-common.h ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h COPYONLY)
|
||||||
configure_file(ggml-metal.metal ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal COPYONLY)
|
configure_file(ggml-metal.metal ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal COPYONLY)
|
||||||
|
|
||||||
if (LLAMA_METAL_EMBED_LIBRARY)
|
if (LLAMA_METAL_EMBED_LIBRARY)
|
||||||
|
|
8
Makefile
8
Makefile
|
@ -201,6 +201,10 @@ ifdef LLAMA_SERVER_VERBOSE
|
||||||
MK_CPPFLAGS += -DSERVER_VERBOSE=$(LLAMA_SERVER_VERBOSE)
|
MK_CPPFLAGS += -DSERVER_VERBOSE=$(LLAMA_SERVER_VERBOSE)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifdef LLAMA_SERVER_SSL
|
||||||
|
MK_CPPFLAGS += -DCPPHTTPLIB_OPENSSL_SUPPORT
|
||||||
|
MK_LDFLAGS += -lssl -lcrypto
|
||||||
|
endif
|
||||||
|
|
||||||
ifdef LLAMA_CODE_COVERAGE
|
ifdef LLAMA_CODE_COVERAGE
|
||||||
MK_CXXFLAGS += -fprofile-arcs -ftest-coverage -dumpbase ''
|
MK_CXXFLAGS += -fprofile-arcs -ftest-coverage -dumpbase ''
|
||||||
|
@ -449,7 +453,7 @@ endif # LLAMA_CUDA_PEER_MAX_BATCH_SIZE
|
||||||
ifdef LLAMA_CUDA_CCBIN
|
ifdef LLAMA_CUDA_CCBIN
|
||||||
MK_NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
|
MK_NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
|
||||||
endif
|
endif
|
||||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
|
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml-common.h
|
||||||
ifdef JETSON_EOL_MODULE_DETECT
|
ifdef JETSON_EOL_MODULE_DETECT
|
||||||
$(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
|
$(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
|
||||||
else
|
else
|
||||||
|
@ -626,7 +630,7 @@ ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h
|
||||||
ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
|
ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
|
||||||
$(CC) $(CFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) -c $< -o $@
|
||||||
|
|
||||||
ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h
|
ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-common.h
|
||||||
$(CC) $(CFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) -c $< -o $@
|
||||||
|
|
||||||
OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o
|
OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o
|
||||||
|
|
13
README.md
13
README.md
|
@ -8,18 +8,20 @@
|
||||||
|
|
||||||
Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) in pure C/C++
|
Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) in pure C/C++
|
||||||
|
|
||||||
|
> [!IMPORTANT]
|
||||||
|
> **Quantization blind testing: https://github.com/ggerganov/llama.cpp/discussions/5962**
|
||||||
|
>
|
||||||
|
> Vote for which quantization type provides better responses, all other parameters being the same.
|
||||||
|
|
||||||
### Recent API changes
|
### Recent API changes
|
||||||
|
|
||||||
|
- [2024 Mar 8] `llama_kv_cache_seq_rm()` returns a `bool` instead of `void`, and new `llama_n_max_seq()` returns the upper limit of acceptable `seq_id` in batches (relevant when dealing with multiple sequences) https://github.com/ggerganov/llama.cpp/pull/5328
|
||||||
- [2024 Mar 4] Embeddings API updated https://github.com/ggerganov/llama.cpp/pull/5796
|
- [2024 Mar 4] Embeddings API updated https://github.com/ggerganov/llama.cpp/pull/5796
|
||||||
- [2024 Mar 3] `struct llama_context_params` https://github.com/ggerganov/llama.cpp/pull/5849
|
- [2024 Mar 3] `struct llama_context_params` https://github.com/ggerganov/llama.cpp/pull/5849
|
||||||
|
|
||||||
### Hot topics
|
### Hot topics
|
||||||
|
|
||||||
- The `api_like_OAI.py` script has been removed - use `server` instead ([#5766](https://github.com/ggerganov/llama.cpp/issues/5766#issuecomment-1969037761))
|
- Initial Mamba support has been added: https://github.com/ggerganov/llama.cpp/pull/5328
|
||||||
- Support for chat templates: [Wiki (contributions welcome)](https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template)
|
|
||||||
- Support for Gemma models: https://github.com/ggerganov/llama.cpp/pull/5631
|
|
||||||
- Non-linear quantization IQ4_NL: https://github.com/ggerganov/llama.cpp/pull/5590
|
|
||||||
- Looking for contributions to improve and maintain the `server` example: https://github.com/ggerganov/llama.cpp/issues/4216
|
|
||||||
|
|
||||||
----
|
----
|
||||||
|
|
||||||
|
@ -110,6 +112,7 @@ Typically finetunes of the base models below are supported as well.
|
||||||
- [x] [InternLM2](https://huggingface.co/models?search=internlm2)
|
- [x] [InternLM2](https://huggingface.co/models?search=internlm2)
|
||||||
- [x] [CodeShell](https://github.com/WisdomShell/codeshell)
|
- [x] [CodeShell](https://github.com/WisdomShell/codeshell)
|
||||||
- [x] [Gemma](https://ai.google.dev/gemma)
|
- [x] [Gemma](https://ai.google.dev/gemma)
|
||||||
|
- [x] [Mamba](https://github.com/state-spaces/mamba)
|
||||||
|
|
||||||
**Multimodal models:**
|
**Multimodal models:**
|
||||||
|
|
||||||
|
|
|
@ -1288,6 +1288,7 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param
|
||||||
|
|
||||||
cparams.n_ctx = params.n_ctx;
|
cparams.n_ctx = params.n_ctx;
|
||||||
cparams.n_batch = params.n_batch;
|
cparams.n_batch = params.n_batch;
|
||||||
|
cparams.n_parallel = params.n_parallel;
|
||||||
cparams.n_threads = params.n_threads;
|
cparams.n_threads = params.n_threads;
|
||||||
cparams.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch;
|
cparams.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch;
|
||||||
cparams.seed = params.seed;
|
cparams.seed = params.seed;
|
||||||
|
@ -1851,3 +1852,18 @@ void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size) {
|
||||||
|
|
||||||
printf("\n=== Done dumping\n");
|
printf("\n=== Done dumping\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void llama_embd_normalize(const float * inp, float * out, int n) {
|
||||||
|
double sum = 0.0;
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
sum += inp[i] * inp[i];
|
||||||
|
}
|
||||||
|
sum = sqrt(sum);
|
||||||
|
|
||||||
|
const float norm = sum > 0.0 ? 1.0f / sum : 0.0f;
|
||||||
|
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
out[i] = inp[i] * norm;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -260,3 +260,10 @@ void dump_kv_cache_view(const llama_kv_cache_view & view, int row_size = 80);
|
||||||
|
|
||||||
// Dump the KV cache view showing individual sequences in each cell (long output).
|
// Dump the KV cache view showing individual sequences in each cell (long output).
|
||||||
void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size = 40);
|
void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size = 40);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Embedding utils
|
||||||
|
//
|
||||||
|
|
||||||
|
void llama_embd_normalize(const float * inp, float * out, int n);
|
||||||
|
|
||||||
|
|
|
@ -1847,6 +1847,124 @@ class StarCoder2Model(Model):
|
||||||
model_arch = gguf.MODEL_ARCH.STARCODER2
|
model_arch = gguf.MODEL_ARCH.STARCODER2
|
||||||
|
|
||||||
|
|
||||||
|
@Model.register("MambaForCausalLM", "MambaLMHeadModel")
|
||||||
|
class MambaModel(Model):
|
||||||
|
model_arch = gguf.MODEL_ARCH.MAMBA
|
||||||
|
|
||||||
|
def set_vocab(self):
|
||||||
|
vocab_size = self.hparams["vocab_size"]
|
||||||
|
# Round vocab size to next multiple of 8
|
||||||
|
pad_vocab = self.hparams.get("pad_vocab_size_multiple", 8)
|
||||||
|
# pad using ceiling division
|
||||||
|
# ref: https://stackoverflow.com/a/17511341/22827863
|
||||||
|
vocab_size = -(vocab_size // -pad_vocab) * pad_vocab
|
||||||
|
self.hparams["vocab_size"] = vocab_size
|
||||||
|
|
||||||
|
if (self.dir_model / "tokenizer.json").is_file():
|
||||||
|
self._set_vocab_gpt2()
|
||||||
|
else:
|
||||||
|
# Use the GPT-NeoX tokenizer when no tokenizer files are present
|
||||||
|
tokenizer_path = Path(sys.path[0]) / "models" / "ggml-vocab-gpt-neox.gguf"
|
||||||
|
print(f"Using tokenizer from '{os.path.relpath(tokenizer_path, os.getcwd())}'")
|
||||||
|
neox_reader = gguf.GGUFReader(tokenizer_path, "r")
|
||||||
|
|
||||||
|
field = neox_reader.get_field(gguf.Keys.Tokenizer.MODEL)
|
||||||
|
self.gguf_writer.add_tokenizer_model(bytes(field.parts[-1]))
|
||||||
|
field = neox_reader.get_field(gguf.Keys.Tokenizer.LIST)
|
||||||
|
self.gguf_writer.add_token_list([bytes(field.parts[i]) for i in field.data][:vocab_size])
|
||||||
|
field = neox_reader.get_field(gguf.Keys.Tokenizer.TOKEN_TYPE)
|
||||||
|
self.gguf_writer.add_token_types([field.parts[i].tolist()[0] for i in field.data][:vocab_size])
|
||||||
|
field = neox_reader.get_field(gguf.Keys.Tokenizer.MERGES)
|
||||||
|
self.gguf_writer.add_token_merges([bytes(field.parts[i]) for i in field.data])
|
||||||
|
field = neox_reader.get_field(gguf.Keys.Tokenizer.BOS_ID)
|
||||||
|
self.gguf_writer.add_bos_token_id(field.parts[-1].tolist()[0])
|
||||||
|
field = neox_reader.get_field(gguf.Keys.Tokenizer.EOS_ID)
|
||||||
|
self.gguf_writer.add_eos_token_id(field.parts[-1].tolist()[0])
|
||||||
|
field = neox_reader.get_field(gguf.Keys.Tokenizer.UNK_ID)
|
||||||
|
self.gguf_writer.add_unk_token_id(field.parts[-1].tolist()[0])
|
||||||
|
|
||||||
|
def set_gguf_parameters(self):
|
||||||
|
d_model = self.find_hparam(["hidden_size", "d_model"])
|
||||||
|
d_conv = self.find_hparam(["conv_kernel", "d_conv"], optional=True) or 4
|
||||||
|
d_inner = self.find_hparam(["intermediate_size", "d_inner"], optional=True) or 2 * d_model
|
||||||
|
d_state = self.find_hparam(["state_size", "d_state"], optional=True) or 16
|
||||||
|
# ceiling division
|
||||||
|
# ref: https://stackoverflow.com/a/17511341/22827863
|
||||||
|
# ref: https://github.com/state-spaces/mamba/blob/ce59daea3a090d011d6476c6e5b97f6d58ddad8b/mamba_ssm/modules/mamba_simple.py#L58
|
||||||
|
dt_rank = self.find_hparam(["time_step_rank", "dt_rank"], optional=True) or -(d_model // -16)
|
||||||
|
rms_norm_eps = self.find_hparam(["layer_norm_epsilon", "rms_norm_eps"], optional=True) or 1e-5
|
||||||
|
|
||||||
|
# Fail early for models which don't have a block expansion factor of 2
|
||||||
|
assert d_inner == 2 * d_model
|
||||||
|
|
||||||
|
self.gguf_writer.add_name(self.dir_model.name)
|
||||||
|
self.gguf_writer.add_context_length(2**20) # arbitrary value; for those who use the default
|
||||||
|
self.gguf_writer.add_embedding_length(d_model)
|
||||||
|
self.gguf_writer.add_feed_forward_length(0) # unused, but seemingly required when loading
|
||||||
|
self.gguf_writer.add_head_count(0) # unused, but seemingly required when loading
|
||||||
|
self.gguf_writer.add_block_count(self.hparams["n_layer"])
|
||||||
|
self.gguf_writer.add_ssm_conv_kernel(d_conv)
|
||||||
|
self.gguf_writer.add_ssm_inner_size(d_inner)
|
||||||
|
self.gguf_writer.add_ssm_state_size(d_state)
|
||||||
|
self.gguf_writer.add_ssm_time_step_rank(dt_rank)
|
||||||
|
self.gguf_writer.add_layer_norm_rms_eps(rms_norm_eps)
|
||||||
|
self.gguf_writer.add_file_type(self.ftype)
|
||||||
|
|
||||||
|
def write_tensors(self):
|
||||||
|
block_count = self.hparams["n_layer"]
|
||||||
|
tensor_map = gguf.get_tensor_name_map(self.model_arch, block_count)
|
||||||
|
|
||||||
|
tok_embd = None
|
||||||
|
tok_embd_name = gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.TOKEN_EMBD] + ".weight"
|
||||||
|
output_name = gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.OUTPUT] + ".weight"
|
||||||
|
|
||||||
|
for name, data_torch in self.get_tensors():
|
||||||
|
old_dtype = data_torch.dtype
|
||||||
|
|
||||||
|
# convert any unsupported data types to float32
|
||||||
|
if data_torch.dtype not in (torch.float16, torch.float32):
|
||||||
|
data_torch = data_torch.to(torch.float32)
|
||||||
|
|
||||||
|
# map tensor names
|
||||||
|
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
|
||||||
|
if new_name is None:
|
||||||
|
print(f"Can not map tensor {name!r}")
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
if name.endswith(".A_log"):
|
||||||
|
print("A_log --> A ==> " + new_name)
|
||||||
|
data_torch = -torch.exp(data_torch)
|
||||||
|
|
||||||
|
# assuming token_embd.weight is seen before output.weight
|
||||||
|
if tok_embd is not None and new_name == output_name:
|
||||||
|
if torch.equal(tok_embd, data_torch):
|
||||||
|
print(f"{output_name} is equivalent to {tok_embd_name}, omitting")
|
||||||
|
continue
|
||||||
|
if new_name == tok_embd_name:
|
||||||
|
tok_embd = data_torch
|
||||||
|
|
||||||
|
data = data_torch.squeeze().numpy()
|
||||||
|
|
||||||
|
n_dims = len(data.shape)
|
||||||
|
data_dtype = data.dtype
|
||||||
|
|
||||||
|
# if f32 desired, convert any float16 to float32
|
||||||
|
if self.ftype == 0 and data_dtype == np.float16:
|
||||||
|
data = data.astype(np.float32)
|
||||||
|
|
||||||
|
# TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
|
||||||
|
if self.ftype == 1 and data_dtype == np.float16 and n_dims == 1:
|
||||||
|
data = data.astype(np.float32)
|
||||||
|
|
||||||
|
# if f16 desired, convert big float32 2-dim weight tensors to float16
|
||||||
|
if self.ftype == 1 and data_dtype == np.float32 and new_name.removesuffix(".weight").endswith((".ssm_in", ".ssm_out", "token_embd", "output")) and n_dims == 2:
|
||||||
|
data = data.astype(np.float16)
|
||||||
|
|
||||||
|
print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
|
||||||
|
|
||||||
|
self.gguf_writer.add_tensor(new_name, data)
|
||||||
|
|
||||||
|
|
||||||
###### CONVERSION LOGIC ######
|
###### CONVERSION LOGIC ######
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -105,6 +105,9 @@ int main(int argc, char ** argv) {
|
||||||
ctx_params.n_threads = params.n_threads;
|
ctx_params.n_threads = params.n_threads;
|
||||||
ctx_params.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch;
|
ctx_params.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch;
|
||||||
|
|
||||||
|
// ensure enough sequences are available
|
||||||
|
ctx_params.n_parallel = *std::max_element(n_pl.begin(), n_pl.end());
|
||||||
|
|
||||||
llama_context * ctx = llama_new_context_with_model(model, ctx_params);
|
llama_context * ctx = llama_new_context_with_model(model, ctx_params);
|
||||||
|
|
||||||
if (ctx == NULL) {
|
if (ctx == NULL) {
|
||||||
|
@ -174,10 +177,10 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
llama_batch_clear(batch);
|
llama_batch_clear(batch);
|
||||||
|
|
||||||
const int n_tokens = is_pp_shared ? pp : pl*pp;
|
for (int i = 0; i < pp; ++i) {
|
||||||
|
for (int j = 0; j < (is_pp_shared ? 1 : pl); ++j) {
|
||||||
for (int i = 0; i < n_tokens; ++i) {
|
llama_batch_add(batch, 0, i, { j }, false);
|
||||||
llama_batch_add(batch, 0, i, { 0 }, false);
|
}
|
||||||
}
|
}
|
||||||
batch.logits[batch.n_tokens - 1] = true;
|
batch.logits[batch.n_tokens - 1] = true;
|
||||||
|
|
||||||
|
@ -192,7 +195,7 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
if (is_pp_shared) {
|
if (is_pp_shared) {
|
||||||
for (int32_t i = 1; i < pl; ++i) {
|
for (int32_t i = 1; i < pl; ++i) {
|
||||||
llama_kv_cache_seq_cp(ctx, 0, i, 0, pp);
|
llama_kv_cache_seq_cp(ctx, 0, i, -1, -1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -80,6 +80,7 @@ int main(int argc, char ** argv) {
|
||||||
ctx_params.seed = 1234;
|
ctx_params.seed = 1234;
|
||||||
ctx_params.n_ctx = n_kv_req;
|
ctx_params.n_ctx = n_kv_req;
|
||||||
ctx_params.n_batch = std::max(n_len, n_parallel);
|
ctx_params.n_batch = std::max(n_len, n_parallel);
|
||||||
|
ctx_params.n_parallel = n_parallel;
|
||||||
ctx_params.n_threads = params.n_threads;
|
ctx_params.n_threads = params.n_threads;
|
||||||
ctx_params.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch;
|
ctx_params.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch;
|
||||||
|
|
||||||
|
@ -132,7 +133,7 @@ int main(int argc, char ** argv) {
|
||||||
// assign the system KV cache to all parallel sequences
|
// assign the system KV cache to all parallel sequences
|
||||||
// this way, the parallel sequences will "reuse" the prompt tokens without having to copy them
|
// this way, the parallel sequences will "reuse" the prompt tokens without having to copy them
|
||||||
for (int32_t i = 1; i < n_parallel; ++i) {
|
for (int32_t i = 1; i < n_parallel; ++i) {
|
||||||
llama_kv_cache_seq_cp(ctx, 0, i, 0, batch.n_tokens);
|
llama_kv_cache_seq_cp(ctx, 0, i, -1, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (n_parallel > 1) {
|
if (n_parallel > 1) {
|
||||||
|
|
|
@ -189,12 +189,10 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
int32_t nelements = sizex*sizey;
|
int32_t nelements = sizex*sizey;
|
||||||
|
|
||||||
std::vector<int64_t> hist_cur(1 << 4, 0);
|
|
||||||
|
|
||||||
// Set up a the benchmark matrices
|
// Set up a the benchmark matrices
|
||||||
// printf("Creating new tensor q11 & Running quantize\n");
|
// printf("Creating new tensor q11 & Running quantize\n");
|
||||||
struct ggml_tensor * q11 = ggml_new_tensor_2d(ctx, qtype, sizex, sizey);
|
struct ggml_tensor * q11 = ggml_new_tensor_2d(ctx, qtype, sizex, sizey);
|
||||||
ggml_quantize_chunk(qtype, (const float *) m11->data, q11->data, 0, nelements/m11->ne[0], m11->ne[0], hist_cur.data(), nullptr);
|
ggml_quantize_chunk(qtype, (const float *) m11->data, q11->data, 0, nelements/m11->ne[0], m11->ne[0], nullptr);
|
||||||
|
|
||||||
// Set up a the compute graph
|
// Set up a the compute graph
|
||||||
// printf("Creating new tensor q31\n");
|
// printf("Creating new tensor q31\n");
|
||||||
|
@ -207,7 +205,7 @@ int main(int argc, char ** argv) {
|
||||||
// Set up a second graph computation to make sure we override the CPU cache lines
|
// Set up a second graph computation to make sure we override the CPU cache lines
|
||||||
// printf("Creating new tensor q12 & Running quantize\n");
|
// printf("Creating new tensor q12 & Running quantize\n");
|
||||||
struct ggml_tensor * q12 = ggml_new_tensor_2d(ctx, qtype, sizex, sizey);
|
struct ggml_tensor * q12 = ggml_new_tensor_2d(ctx, qtype, sizex, sizey);
|
||||||
ggml_quantize_chunk(qtype, (const float *) m12->data, q12->data, 0, nelements/m12->ne[0], m12->ne[0], hist_cur.data(), nullptr);
|
ggml_quantize_chunk(qtype, (const float *) m12->data, q12->data, 0, nelements/m12->ne[0], m12->ne[0], nullptr);
|
||||||
|
|
||||||
// printf("Creating new tensor q32\n");
|
// printf("Creating new tensor q32\n");
|
||||||
struct ggml_tensor * q32 = ggml_mul_mat(ctx, q12, m2);
|
struct ggml_tensor * q32 = ggml_mul_mat(ctx, q12, m2);
|
||||||
|
|
|
@ -23,17 +23,6 @@ static void batch_add_seq(llama_batch & batch, const std::vector<int32_t> & toke
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void normalize(const float * vec, float * out, int n) {
|
|
||||||
float norm = 0;
|
|
||||||
for (int i = 0; i < n; i++) {
|
|
||||||
norm += vec[i] * vec[i];
|
|
||||||
}
|
|
||||||
norm = sqrt(norm);
|
|
||||||
for (int i = 0; i < n; i++) {
|
|
||||||
out[i] = vec[i] / norm;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void batch_decode(llama_context * ctx, llama_batch & batch, float * output, int n_seq, int n_embd) {
|
static void batch_decode(llama_context * ctx, llama_batch & batch, float * output, int n_seq, int n_embd) {
|
||||||
// clear previous kv_cache values (irrelevant for embeddings)
|
// clear previous kv_cache values (irrelevant for embeddings)
|
||||||
llama_kv_cache_clear(ctx);
|
llama_kv_cache_clear(ctx);
|
||||||
|
@ -44,7 +33,6 @@ static void batch_decode(llama_context * ctx, llama_batch & batch, float * outpu
|
||||||
fprintf(stderr, "%s : failed to decode\n", __func__);
|
fprintf(stderr, "%s : failed to decode\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
// normalize on copy
|
|
||||||
for (int i = 0; i < batch.n_tokens; i++) {
|
for (int i = 0; i < batch.n_tokens; i++) {
|
||||||
if (!batch.logits[i]) {
|
if (!batch.logits[i]) {
|
||||||
continue;
|
continue;
|
||||||
|
@ -61,7 +49,7 @@ static void batch_decode(llama_context * ctx, llama_batch & batch, float * outpu
|
||||||
}
|
}
|
||||||
|
|
||||||
float * out = output + batch.seq_id[i][0] * n_embd;
|
float * out = output + batch.seq_id[i][0] * n_embd;
|
||||||
normalize(embd, out, n_embd);
|
llama_embd_normalize(embd, out, n_embd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1862,7 +1862,6 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
|
||||||
|
|
||||||
std::vector<uint8_t> work(512);
|
std::vector<uint8_t> work(512);
|
||||||
std::vector<float> conv_buf(512);
|
std::vector<float> conv_buf(512);
|
||||||
std::vector<int64_t> hist_all(1 << 4, 0);
|
|
||||||
size_t total_size_org = 0;
|
size_t total_size_org = 0;
|
||||||
size_t total_size_new = 0;
|
size_t total_size_new = 0;
|
||||||
|
|
||||||
|
@ -1917,48 +1916,7 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
|
||||||
}
|
}
|
||||||
new_data = work.data();
|
new_data = work.data();
|
||||||
|
|
||||||
std::vector<int64_t> hist_cur(1 << 4, 0);
|
new_size = ggml_quantize_chunk(new_type, f32_data, new_data, 0, n_elms/cur->ne[0], cur->ne[0], nullptr);
|
||||||
|
|
||||||
switch (new_type) {
|
|
||||||
case GGML_TYPE_Q4_0: {
|
|
||||||
new_size = ggml_quantize_q4_0(f32_data, new_data, n_elms, cur->ne[0], hist_cur.data());
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_Q4_1: {
|
|
||||||
new_size = ggml_quantize_q4_1(f32_data, new_data, n_elms, cur->ne[0], hist_cur.data());
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_Q5_0: {
|
|
||||||
new_size = ggml_quantize_q5_0(f32_data, new_data, n_elms, cur->ne[0], hist_cur.data());
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_Q5_1: {
|
|
||||||
new_size = ggml_quantize_q5_1(f32_data, new_data, n_elms, cur->ne[0], hist_cur.data());
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_Q8_0: {
|
|
||||||
new_size = ggml_quantize_q8_0(f32_data, new_data, n_elms, cur->ne[0], hist_cur.data());
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_Q2_K: {
|
|
||||||
new_size = ggml_quantize_q2_K(f32_data, new_data, n_elms, cur->ne[0], hist_cur.data());
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_Q3_K: {
|
|
||||||
new_size = ggml_quantize_q3_K(f32_data, new_data, n_elms, cur->ne[0], hist_cur.data());
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_Q4_K: {
|
|
||||||
new_size = ggml_quantize_q4_K(f32_data, new_data, n_elms, cur->ne[0], hist_cur.data());
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_Q5_K: {
|
|
||||||
new_size = ggml_quantize_q5_K(f32_data, new_data, n_elms, cur->ne[0], hist_cur.data());
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_Q6_K: {
|
|
||||||
new_size = ggml_quantize_q6_K(f32_data, new_data, n_elms, cur->ne[0], hist_cur.data());
|
|
||||||
} break;
|
|
||||||
default: {
|
|
||||||
fprintf(stderr, "%s: unsupported quantization type %d\n", __func__, new_type);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t j = 0; j < hist_cur.size(); ++j) {
|
|
||||||
hist_all[j] += hist_cur[j];
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
new_type = cur->type;
|
new_type = cur->type;
|
||||||
new_data = cur->data;
|
new_data = cur->data;
|
||||||
|
@ -1993,17 +1951,6 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
|
||||||
{
|
{
|
||||||
printf("%s: original size = %8.2f MB\n", __func__, total_size_org / 1024.0 / 1024.0);
|
printf("%s: original size = %8.2f MB\n", __func__, total_size_org / 1024.0 / 1024.0);
|
||||||
printf("%s: quantized size = %8.2f MB\n", __func__, total_size_new / 1024.0 / 1024.0);
|
printf("%s: quantized size = %8.2f MB\n", __func__, total_size_new / 1024.0 / 1024.0);
|
||||||
|
|
||||||
int64_t sum_all = 0;
|
|
||||||
for (size_t i = 0; i < hist_all.size(); ++i) {
|
|
||||||
sum_all += hist_all[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("%s: hist: ", __func__);
|
|
||||||
for (size_t i = 0; i < hist_all.size(); ++i) {
|
|
||||||
printf("%5.3f ", hist_all[i] / (float)sum_all);
|
|
||||||
}
|
|
||||||
printf("\n");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -107,6 +107,9 @@ int main(int argc, char ** argv) {
|
||||||
// number of simultaneous "clients" to simulate
|
// number of simultaneous "clients" to simulate
|
||||||
const int32_t n_clients = params.n_parallel;
|
const int32_t n_clients = params.n_parallel;
|
||||||
|
|
||||||
|
// dedicate one sequence to the system prompt
|
||||||
|
params.n_parallel += 1;
|
||||||
|
|
||||||
// requests to simulate
|
// requests to simulate
|
||||||
const int32_t n_seq = params.n_sequences;
|
const int32_t n_seq = params.n_sequences;
|
||||||
|
|
||||||
|
@ -196,8 +199,8 @@ int main(int argc, char ** argv) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// assign the system KV cache to all parallel sequences
|
// assign the system KV cache to all parallel sequences
|
||||||
for (int32_t i = 1; i < n_clients; ++i) {
|
for (int32_t i = 1; i <= n_clients; ++i) {
|
||||||
llama_kv_cache_seq_cp(ctx, 0, i, 0, n_tokens_system);
|
llama_kv_cache_seq_cp(ctx, 0, i, -1, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_TEE("\n");
|
LOG_TEE("\n");
|
||||||
|
@ -221,15 +224,17 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
client.i_batch = batch.n_tokens;
|
client.i_batch = batch.n_tokens;
|
||||||
|
|
||||||
llama_batch_add(batch, client.sampled, n_tokens_system + client.n_prompt + client.n_decoded, { client.id }, true);
|
llama_batch_add(batch, client.sampled, n_tokens_system + client.n_prompt + client.n_decoded, { client.id + 1 }, true);
|
||||||
|
|
||||||
client.n_decoded += 1;
|
client.n_decoded += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (batch.n_tokens == 0) {
|
if (batch.n_tokens == 0) {
|
||||||
// all sequences have ended - clear the entire KV cache
|
// all sequences have ended - clear the entire KV cache
|
||||||
for (int i = 0; i < n_clients; ++i) {
|
for (int i = 1; i <= n_clients; ++i) {
|
||||||
llama_kv_cache_seq_rm(ctx, i, n_tokens_system, -1);
|
llama_kv_cache_seq_rm(ctx, i, -1, -1);
|
||||||
|
// but keep the system prompt
|
||||||
|
llama_kv_cache_seq_cp(ctx, 0, i, -1, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_TEE("%s: clearing the KV cache\n", __func__);
|
LOG_TEE("%s: clearing the KV cache\n", __func__);
|
||||||
|
@ -255,7 +260,7 @@ int main(int argc, char ** argv) {
|
||||||
tokens_prompt = ::llama_tokenize(ctx, client.prompt, false);
|
tokens_prompt = ::llama_tokenize(ctx, client.prompt, false);
|
||||||
|
|
||||||
for (size_t i = 0; i < tokens_prompt.size(); ++i) {
|
for (size_t i = 0; i < tokens_prompt.size(); ++i) {
|
||||||
llama_batch_add(batch, tokens_prompt[i], i + n_tokens_system, { client.id }, false);
|
llama_batch_add(batch, tokens_prompt[i], i + n_tokens_system, { client.id + 1 }, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
// extract the logits only for the last token
|
// extract the logits only for the last token
|
||||||
|
@ -366,7 +371,8 @@ int main(int argc, char ** argv) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// delete only the generated part of the sequence, i.e. keep the system prompt in the cache
|
// delete only the generated part of the sequence, i.e. keep the system prompt in the cache
|
||||||
llama_kv_cache_seq_rm(ctx, client.id, n_tokens_system, -1);
|
llama_kv_cache_seq_rm(ctx, client.id + 1, -1, -1);
|
||||||
|
llama_kv_cache_seq_cp(ctx, 0, client.id + 1, -1, -1);
|
||||||
|
|
||||||
const auto t_main_end = ggml_time_us();
|
const auto t_main_end = ggml_time_us();
|
||||||
|
|
||||||
|
|
|
@ -809,7 +809,7 @@ static void hellaswag_score(llama_context * ctx, const gpt_params & params) {
|
||||||
const int n_batch = params.n_batch;
|
const int n_batch = params.n_batch;
|
||||||
|
|
||||||
const int max_tasks_per_batch = 32;
|
const int max_tasks_per_batch = 32;
|
||||||
const int max_seq = 4*max_tasks_per_batch;
|
const int max_seq = std::min(4*max_tasks_per_batch, (int) llama_n_max_seq(ctx));
|
||||||
|
|
||||||
llama_batch batch = llama_batch_init(n_ctx, 0, max_seq);
|
llama_batch batch = llama_batch_init(n_ctx, 0, max_seq);
|
||||||
|
|
||||||
|
@ -1086,7 +1086,7 @@ static void winogrande_score(llama_context * ctx, const gpt_params & params) {
|
||||||
const int n_batch = params.n_batch;
|
const int n_batch = params.n_batch;
|
||||||
|
|
||||||
const int max_tasks_per_batch = 128;
|
const int max_tasks_per_batch = 128;
|
||||||
const int max_seq = 2*max_tasks_per_batch;
|
const int max_seq = std::min(2*max_tasks_per_batch, (int) llama_n_max_seq(ctx));
|
||||||
|
|
||||||
llama_batch batch = llama_batch_init(n_ctx, 0, max_seq);
|
llama_batch batch = llama_batch_init(n_ctx, 0, max_seq);
|
||||||
|
|
||||||
|
@ -1438,7 +1438,7 @@ static void multiple_choice_score(llama_context * ctx, const gpt_params & params
|
||||||
const int n_batch = params.n_batch;
|
const int n_batch = params.n_batch;
|
||||||
|
|
||||||
const int max_tasks_per_batch = 32;
|
const int max_tasks_per_batch = 32;
|
||||||
const int max_seq = 4*max_tasks_per_batch;
|
const int max_seq = std::min(4*max_tasks_per_batch, (int) llama_n_max_seq(ctx));
|
||||||
|
|
||||||
llama_batch batch = llama_batch_init(n_ctx, 0, max_seq);
|
llama_batch batch = llama_batch_init(n_ctx, 0, max_seq);
|
||||||
|
|
||||||
|
@ -1815,6 +1815,9 @@ int main(int argc, char ** argv) {
|
||||||
llama_model * model;
|
llama_model * model;
|
||||||
llama_context * ctx;
|
llama_context * ctx;
|
||||||
|
|
||||||
|
// ensure there's at least enough seq_ids for HellaSwag
|
||||||
|
params.n_parallel = std::max(4, params.n_parallel);
|
||||||
|
|
||||||
// load the model and apply lora adapter, if any
|
// load the model and apply lora adapter, if any
|
||||||
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
||||||
if (model == NULL) {
|
if (model == NULL) {
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
set(TARGET server)
|
set(TARGET server)
|
||||||
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
|
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
|
||||||
|
option(LLAMA_SERVER_SSL "Build SSL support for the server" OFF)
|
||||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
add_executable(${TARGET} server.cpp utils.hpp json.hpp httplib.h)
|
add_executable(${TARGET} server.cpp utils.hpp json.hpp httplib.h)
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
install(TARGETS ${TARGET} RUNTIME)
|
||||||
|
@ -7,6 +8,11 @@ target_compile_definitions(${TARGET} PRIVATE
|
||||||
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
|
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
|
||||||
)
|
)
|
||||||
target_link_libraries(${TARGET} PRIVATE common ${CMAKE_THREAD_LIBS_INIT})
|
target_link_libraries(${TARGET} PRIVATE common ${CMAKE_THREAD_LIBS_INIT})
|
||||||
|
if (LLAMA_SERVER_SSL)
|
||||||
|
find_package(OpenSSL REQUIRED)
|
||||||
|
target_link_libraries(${TARGET} PRIVATE OpenSSL::SSL OpenSSL::Crypto)
|
||||||
|
target_compile_definitions(${TARGET} PRIVATE CPPHTTPLIB_OPENSSL_SUPPORT)
|
||||||
|
endif()
|
||||||
if (WIN32)
|
if (WIN32)
|
||||||
TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
|
TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
|
||||||
endif()
|
endif()
|
||||||
|
|
|
@ -42,7 +42,7 @@ see https://github.com/ggerganov/llama.cpp/issues/1437
|
||||||
- `-to N`, `--timeout N`: Server read/write timeout in seconds. Default `600`.
|
- `-to N`, `--timeout N`: Server read/write timeout in seconds. Default `600`.
|
||||||
- `--host`: Set the hostname or ip address to listen. Default `127.0.0.1`.
|
- `--host`: Set the hostname or ip address to listen. Default `127.0.0.1`.
|
||||||
- `--port`: Set the port to listen. Default: `8080`.
|
- `--port`: Set the port to listen. Default: `8080`.
|
||||||
- `--path`: path from which to serve static files (default examples/server/public)
|
- `--path`: path from which to serve static files (default: disabled)
|
||||||
- `--api-key`: Set an api key for request authorization. By default the server responds to every request. With an api key set, the requests must have the Authorization header set with the api key as Bearer token. May be used multiple times to enable multiple valid keys.
|
- `--api-key`: Set an api key for request authorization. By default the server responds to every request. With an api key set, the requests must have the Authorization header set with the api key as Bearer token. May be used multiple times to enable multiple valid keys.
|
||||||
- `--api-key-file`: path to file containing api keys delimited by new lines. If set, requests must include one of the keys for access. May be used in conjunction with `--api-key`'s.
|
- `--api-key-file`: path to file containing api keys delimited by new lines. If set, requests must include one of the keys for access. May be used in conjunction with `--api-key`'s.
|
||||||
- `--embedding`: Enable embedding extraction, Default: disabled.
|
- `--embedding`: Enable embedding extraction, Default: disabled.
|
||||||
|
@ -59,6 +59,10 @@ see https://github.com/ggerganov/llama.cpp/issues/1437
|
||||||
- `--log-disable`: Output logs to stdout only, default: enabled.
|
- `--log-disable`: Output logs to stdout only, default: enabled.
|
||||||
- `--log-format FORMAT`: Define the log output to FORMAT: json or text (default: json)
|
- `--log-format FORMAT`: Define the log output to FORMAT: json or text (default: json)
|
||||||
|
|
||||||
|
**If compiled with `LLAMA_SERVER_SSL=ON`**
|
||||||
|
- `--ssl-key-file FNAME`: path to file a PEM-encoded SSL private key
|
||||||
|
- `--ssl-cert-file FNAME`: path to file a PEM-encoded SSL certificate
|
||||||
|
|
||||||
## Build
|
## Build
|
||||||
|
|
||||||
server is build alongside everything else from the root of the project
|
server is build alongside everything else from the root of the project
|
||||||
|
@ -75,6 +79,28 @@ server is build alongside everything else from the root of the project
|
||||||
cmake --build . --config Release
|
cmake --build . --config Release
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Build with SSL
|
||||||
|
|
||||||
|
server can also be built with SSL support using OpenSSL 3
|
||||||
|
|
||||||
|
- Using `make`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# NOTE: For non-system openssl, use the following:
|
||||||
|
# CXXFLAGS="-I /path/to/openssl/include"
|
||||||
|
# LDFLAGS="-L /path/to/openssl/lib"
|
||||||
|
make LLAMA_SERVER_SSL=true server
|
||||||
|
```
|
||||||
|
|
||||||
|
- Using `CMake`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake .. -DLLAMA_SERVER_SSL=ON
|
||||||
|
make server
|
||||||
|
```
|
||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
||||||
To get started right away, run the following command, making sure to use the correct path for the model you have:
|
To get started right away, run the following command, making sure to use the correct path for the model you have:
|
||||||
|
@ -169,7 +195,11 @@ node index.js
|
||||||
|
|
||||||
*Options:*
|
*Options:*
|
||||||
|
|
||||||
`prompt`: Provide the prompt for this completion as a string or as an array of strings or numbers representing tokens. Internally, the prompt is compared to the previous completion and only the "unseen" suffix is evaluated. If the prompt is a string or an array with the first element given as a string, a `bos` token is inserted in the front like `main` does.
|
`prompt`: Provide the prompt for this completion as a string or as an array of strings or numbers representing tokens. Internally, if `cache_prompt` is `true`, the prompt is compared to the previous completion and only the "unseen" suffix is evaluated. A `BOS` token is inserted at the start, if all of the following conditions are true:
|
||||||
|
|
||||||
|
- The prompt is a string or an array with the first element given as a string
|
||||||
|
- The model's `tokenizer.ggml.add_bos_token` metadata is `true`
|
||||||
|
- The system prompt is empty
|
||||||
|
|
||||||
`temperature`: Adjust the randomness of the generated text (default: 0.8).
|
`temperature`: Adjust the randomness of the generated text (default: 0.8).
|
||||||
|
|
||||||
|
@ -282,7 +312,7 @@ Notice that each `probs` is an array of length `n_probs`.
|
||||||
|
|
||||||
`content`: Set the text to tokenize.
|
`content`: Set the text to tokenize.
|
||||||
|
|
||||||
Note that the special `BOS` token is not added in front of the text and also a space character is not inserted automatically as it is for `/completion`.
|
Note that a special `BOS` token is never inserted.
|
||||||
|
|
||||||
- **POST** `/detokenize`: Convert tokens to text.
|
- **POST** `/detokenize`: Convert tokens to text.
|
||||||
|
|
||||||
|
@ -532,7 +562,7 @@ The HTTP server supports OAI-like API
|
||||||
|
|
||||||
### Extending or building alternative Web Front End
|
### Extending or building alternative Web Front End
|
||||||
|
|
||||||
The default location for the static files is `examples/server/public`. You can extend the front end by running the server binary with `--path` set to `./your-directory` and importing `/completion.js` to get access to the llamaComplete() method.
|
You can extend the front end by running the server binary with `--path` set to `./your-directory` and importing `/completion.js` to get access to the llamaComplete() method.
|
||||||
|
|
||||||
Read the documentation in `/completion.js` to see convenient ways to access llama.
|
Read the documentation in `/completion.js` to see convenient ways to access llama.
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -6,8 +6,8 @@ Feature: Parallel
|
||||||
Given a server listening on localhost:8080
|
Given a server listening on localhost:8080
|
||||||
And a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
|
And a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
|
||||||
And 42 as server seed
|
And 42 as server seed
|
||||||
And 512 as batch size
|
And 128 as batch size
|
||||||
And 64 KV cache size
|
And 256 KV cache size
|
||||||
And 2 slots
|
And 2 slots
|
||||||
And continuous batching
|
And continuous batching
|
||||||
Then the server is starting
|
Then the server is starting
|
||||||
|
@ -76,6 +76,7 @@ Feature: Parallel
|
||||||
| disabled | 128 |
|
| disabled | 128 |
|
||||||
| enabled | 64 |
|
| enabled | 64 |
|
||||||
|
|
||||||
|
|
||||||
Scenario: Multi users with total number of tokens to predict exceeds the KV Cache size #3969
|
Scenario: Multi users with total number of tokens to predict exceeds the KV Cache size #3969
|
||||||
Given a prompt:
|
Given a prompt:
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -39,8 +39,9 @@ Feature: Security
|
||||||
|
|
||||||
|
|
||||||
Scenario Outline: CORS Options
|
Scenario Outline: CORS Options
|
||||||
When an OPTIONS request is sent from <origin>
|
Given a user api key llama.cpp
|
||||||
Then CORS header <cors_header> is set to <cors_header_value>
|
When an OPTIONS request is sent from <origin>
|
||||||
|
Then CORS header <cors_header> is set to <cors_header_value>
|
||||||
|
|
||||||
Examples: Headers
|
Examples: Headers
|
||||||
| origin | cors_header | cors_header_value |
|
| origin | cors_header | cors_header_value |
|
||||||
|
|
|
@ -10,11 +10,10 @@ Feature: llama.cpp server
|
||||||
# KV Cache corresponds to the total amount of tokens
|
# KV Cache corresponds to the total amount of tokens
|
||||||
# that can be stored across all independent sequences: #4130
|
# that can be stored across all independent sequences: #4130
|
||||||
# see --ctx-size and #5568
|
# see --ctx-size and #5568
|
||||||
And 32 KV cache size
|
And 256 KV cache size
|
||||||
And 512 as batch size
|
And 32 as batch size
|
||||||
And 1 slots
|
And 2 slots
|
||||||
And embeddings extraction
|
And 64 server max tokens to predict
|
||||||
And 32 server max tokens to predict
|
|
||||||
And prometheus compatible metrics exposed
|
And prometheus compatible metrics exposed
|
||||||
Then the server is starting
|
Then the server is starting
|
||||||
Then the server is healthy
|
Then the server is healthy
|
||||||
|
@ -23,18 +22,35 @@ Feature: llama.cpp server
|
||||||
Then the server is ready
|
Then the server is ready
|
||||||
And all slots are idle
|
And all slots are idle
|
||||||
|
|
||||||
|
|
||||||
Scenario Outline: Completion
|
Scenario Outline: Completion
|
||||||
Given a prompt <prompt>
|
Given a prompt <prompt>
|
||||||
And <n_predict> max tokens to predict
|
And <n_predict> max tokens to predict
|
||||||
And a completion request with no api error
|
And a completion request with no api error
|
||||||
Then <n_predicted> tokens are predicted matching <re_content>
|
Then <n_predicted> tokens are predicted matching <re_content>
|
||||||
|
And the completion is <truncated> truncated
|
||||||
|
And <n_prompt> prompt tokens are processed
|
||||||
And prometheus metrics are exposed
|
And prometheus metrics are exposed
|
||||||
And metric llamacpp:tokens_predicted is <n_predicted>
|
And metric llamacpp:tokens_predicted is <n_predicted>
|
||||||
|
|
||||||
Examples: Prompts
|
Examples: Prompts
|
||||||
| prompt | n_predict | re_content | n_predicted |
|
| prompt | n_predict | re_content | n_prompt | n_predicted | truncated |
|
||||||
| I believe the meaning of life is | 8 | (read\|going)+ | 8 |
|
| I believe the meaning of life is | 8 | (read\|going)+ | 18 | 8 | not |
|
||||||
| Write a joke about AI | 64 | (park\|friends\|scared\|always)+ | 32 |
|
| Write a joke about AI from a very long prompt which will not be truncated | 256 | (princesses\|everyone\|kids)+ | 46 | 64 | not |
|
||||||
|
|
||||||
|
Scenario: Completion prompt truncated
|
||||||
|
Given a prompt:
|
||||||
|
"""
|
||||||
|
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
|
||||||
|
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
|
||||||
|
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.
|
||||||
|
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
"""
|
||||||
|
And a completion request with no api error
|
||||||
|
Then 64 tokens are predicted matching fun|Annaks|popcorns
|
||||||
|
And the completion is truncated
|
||||||
|
And 109 prompt tokens are processed
|
||||||
|
|
||||||
|
|
||||||
Scenario Outline: OAI Compatibility
|
Scenario Outline: OAI Compatibility
|
||||||
Given a model <model>
|
Given a model <model>
|
||||||
|
@ -44,11 +60,14 @@ Feature: llama.cpp server
|
||||||
And streaming is <enable_streaming>
|
And streaming is <enable_streaming>
|
||||||
Given an OAI compatible chat completions request with no api error
|
Given an OAI compatible chat completions request with no api error
|
||||||
Then <n_predicted> tokens are predicted matching <re_content>
|
Then <n_predicted> tokens are predicted matching <re_content>
|
||||||
|
And <n_prompt> prompt tokens are processed
|
||||||
|
And the completion is <truncated> truncated
|
||||||
|
|
||||||
Examples: Prompts
|
Examples: Prompts
|
||||||
| model | system_prompt | user_prompt | max_tokens | re_content | n_predicted | enable_streaming |
|
| model | system_prompt | user_prompt | max_tokens | re_content | n_prompt | n_predicted | enable_streaming | truncated |
|
||||||
| llama-2 | Book | What is the best book | 8 | (Mom\|what)+ | 8 | disabled |
|
| llama-2 | Book | What is the best book | 8 | (Here\|what)+ | 77 | 8 | disabled | not |
|
||||||
| codellama70b | You are a coding assistant. | Write the fibonacci function in c++. | 64 | (thanks\|happy\|bird)+ | 32 | enabled |
|
| codellama70b | You are a coding assistant. | Write the fibonacci function in c++. | 128 | (thanks\|happy\|bird)+ | -1 | 64 | enabled | |
|
||||||
|
|
||||||
|
|
||||||
Scenario: Tokenize / Detokenize
|
Scenario: Tokenize / Detokenize
|
||||||
When tokenizing:
|
When tokenizing:
|
||||||
|
|
|
@ -196,12 +196,30 @@ async def step_request_completion(context, api_error):
|
||||||
|
|
||||||
@step(u'{predicted_n:d} tokens are predicted matching {re_content}')
|
@step(u'{predicted_n:d} tokens are predicted matching {re_content}')
|
||||||
def step_n_tokens_predicted_with_content(context, predicted_n, re_content):
|
def step_n_tokens_predicted_with_content(context, predicted_n, re_content):
|
||||||
assert_n_tokens_predicted(context.tasks_result.pop(), predicted_n, re_content)
|
context.completion = context.tasks_result.pop()
|
||||||
|
assert_n_tokens_predicted(context.completion, predicted_n, re_content)
|
||||||
|
|
||||||
|
|
||||||
@step(u'{predicted_n:d} tokens are predicted')
|
@step(u'{predicted_n:d} tokens are predicted')
|
||||||
def step_n_tokens_predicted(context, predicted_n):
|
def step_n_tokens_predicted(context, predicted_n):
|
||||||
assert_n_tokens_predicted(context.tasks_result.pop(), predicted_n)
|
context.completion = context.tasks_result.pop()
|
||||||
|
assert_n_tokens_predicted(context.completion, predicted_n)
|
||||||
|
|
||||||
|
|
||||||
|
@step(u'the completion is truncated')
|
||||||
|
def step_assert_completion_truncated(context):
|
||||||
|
step_assert_completion_truncated(context, '')
|
||||||
|
|
||||||
|
|
||||||
|
@step(u'the completion is {truncated} truncated')
|
||||||
|
def step_assert_completion_truncated(context, truncated):
|
||||||
|
truncated = truncated != "not"
|
||||||
|
assert context.completion['truncated'] == truncated, f'{context.completion}'
|
||||||
|
|
||||||
|
|
||||||
|
@step(u'{n_prompt:d} prompt tokens are processed')
|
||||||
|
def step_impl(context, n_prompt):
|
||||||
|
assert n_prompt < 0 or n_prompt == context.completion['timings']['prompt_n'], f"n_prompt={context.completion['timings']['prompt_n']}"
|
||||||
|
|
||||||
|
|
||||||
@step(u'a user prompt {user_prompt}')
|
@step(u'a user prompt {user_prompt}')
|
||||||
|
@ -564,8 +582,9 @@ async def step_detokenize(context):
|
||||||
@async_run_until_complete
|
@async_run_until_complete
|
||||||
async def step_options_request(context, origin):
|
async def step_options_request(context, origin):
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
|
headers = {'Authorization': f'Bearer {context.user_api_key}', 'Origin': origin}
|
||||||
async with session.options(f'{context.base_url}/v1/chat/completions',
|
async with session.options(f'{context.base_url}/v1/chat/completions',
|
||||||
headers={"Origin": origin}) as response:
|
headers=headers) as response:
|
||||||
assert response.status == 200
|
assert response.status == 200
|
||||||
context.options_response = response
|
context.options_response = response
|
||||||
|
|
||||||
|
@ -722,7 +741,8 @@ async def oai_chat_completions(user_prompt,
|
||||||
completion_response = {
|
completion_response = {
|
||||||
'content': '',
|
'content': '',
|
||||||
'timings': {
|
'timings': {
|
||||||
'predicted_n': 0
|
'predicted_n': 0,
|
||||||
|
'prompt_n': 0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if async_client:
|
if async_client:
|
||||||
|
@ -763,7 +783,8 @@ async def oai_chat_completions(user_prompt,
|
||||||
completion_response = {
|
completion_response = {
|
||||||
'content': chat_completion_raw['choices'][0]['message'],
|
'content': chat_completion_raw['choices'][0]['message'],
|
||||||
'timings': {
|
'timings': {
|
||||||
'predicted_n': chat_completion_raw['usage']['completion_tokens']
|
'predicted_n': chat_completion_raw['usage']['completion_tokens'],
|
||||||
|
'prompt_n': chat_completion_raw['usage']['prompt_tokens']
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
|
@ -792,13 +813,16 @@ async def oai_chat_completions(user_prompt,
|
||||||
if 'content' in delta:
|
if 'content' in delta:
|
||||||
completion_response['content'] += delta['content']
|
completion_response['content'] += delta['content']
|
||||||
completion_response['timings']['predicted_n'] += 1
|
completion_response['timings']['predicted_n'] += 1
|
||||||
|
completion_response['truncated'] = chunk.choices[0].finish_reason != 'stop'
|
||||||
else:
|
else:
|
||||||
assert len(chat_completion.choices) == 1
|
assert len(chat_completion.choices) == 1
|
||||||
completion_response = {
|
completion_response = {
|
||||||
'content': chat_completion.choices[0].message.content,
|
'content': chat_completion.choices[0].message.content,
|
||||||
'timings': {
|
'timings': {
|
||||||
'predicted_n': chat_completion.usage.completion_tokens
|
'predicted_n': chat_completion.usage.completion_tokens,
|
||||||
}
|
'prompt_n': chat_completion.usage.prompt_tokens
|
||||||
|
},
|
||||||
|
'truncated': chat_completion.choices[0].finish_reason != 'stop'
|
||||||
}
|
}
|
||||||
if debug:
|
if debug:
|
||||||
print("OAI response formatted to llama.cpp:", completion_response)
|
print("OAI response formatted to llama.cpp:", completion_response)
|
||||||
|
|
779
ggml-common.h
Normal file
779
ggml-common.h
Normal file
|
@ -0,0 +1,779 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#if defined(GGML_COMMON_IMPL_C)
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#define GGML_TABLE_BEGIN(type, name, size) static const type name[size] = {
|
||||||
|
#define GGML_TABLE_END() };
|
||||||
|
|
||||||
|
#define GGML_COMMON_IMPL
|
||||||
|
#elif defined(GGML_COMMON_IMPL_METAL)
|
||||||
|
#include <metal_stdlib>
|
||||||
|
|
||||||
|
#define GGML_TABLE_BEGIN(type, name, size) static const constant type name[size] = {
|
||||||
|
#define GGML_TABLE_END() };
|
||||||
|
|
||||||
|
#define GGML_COMMON_IMPL
|
||||||
|
#elif defined(GGML_COMMON_IMPL_CUDA)
|
||||||
|
#include <cstdint>
|
||||||
|
|
||||||
|
#define GGML_TABLE_BEGIN(type, name, size) static const __device__ __constant__ type name[size] = {
|
||||||
|
#define GGML_TABLE_END() };
|
||||||
|
|
||||||
|
#define GGML_COMMON_IMPL
|
||||||
|
#elif defined(GGML_COMMON_IMPL_SYCL)
|
||||||
|
#include <cstdint>
|
||||||
|
|
||||||
|
#define GGML_TABLE_BEGIN(type, name, size) static dpct::global_memory<const type, 1> name(sycl::range<1>(size), {
|
||||||
|
#define GGML_TABLE_END() });
|
||||||
|
|
||||||
|
#define GGML_COMMON_IMPL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(GGML_COMMON_IMPL)
|
||||||
|
|
||||||
|
GGML_TABLE_BEGIN(uint8_t, kmask_iq2xs, 8)
|
||||||
|
1, 2, 4, 8, 16, 32, 64, 128
|
||||||
|
GGML_TABLE_END()
|
||||||
|
|
||||||
|
GGML_TABLE_BEGIN(uint8_t, ksigns_iq2xs, 128)
|
||||||
|
0, 129, 130, 3, 132, 5, 6, 135, 136, 9, 10, 139, 12, 141, 142, 15,
|
||||||
|
144, 17, 18, 147, 20, 149, 150, 23, 24, 153, 154, 27, 156, 29, 30, 159,
|
||||||
|
160, 33, 34, 163, 36, 165, 166, 39, 40, 169, 170, 43, 172, 45, 46, 175,
|
||||||
|
48, 177, 178, 51, 180, 53, 54, 183, 184, 57, 58, 187, 60, 189, 190, 63,
|
||||||
|
192, 65, 66, 195, 68, 197, 198, 71, 72, 201, 202, 75, 204, 77, 78, 207,
|
||||||
|
80, 209, 210, 83, 212, 85, 86, 215, 216, 89, 90, 219, 92, 221, 222, 95,
|
||||||
|
96, 225, 226, 99, 228, 101, 102, 231, 232, 105, 106, 235, 108, 237, 238, 111,
|
||||||
|
240, 113, 114, 243, 116, 245, 246, 119, 120, 249, 250, 123, 252, 125, 126, 255,
|
||||||
|
GGML_TABLE_END()
|
||||||
|
|
||||||
|
//#if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics
|
||||||
|
GGML_TABLE_BEGIN(uint64_t, ksigns64, 128)
|
||||||
|
0x0000000000000000, 0xff000000000000ff, 0xff0000000000ff00, 0x000000000000ffff,
|
||||||
|
0xff00000000ff0000, 0x0000000000ff00ff, 0x0000000000ffff00, 0xff00000000ffffff,
|
||||||
|
0xff000000ff000000, 0x00000000ff0000ff, 0x00000000ff00ff00, 0xff000000ff00ffff,
|
||||||
|
0x00000000ffff0000, 0xff000000ffff00ff, 0xff000000ffffff00, 0x00000000ffffffff,
|
||||||
|
0xff0000ff00000000, 0x000000ff000000ff, 0x000000ff0000ff00, 0xff0000ff0000ffff,
|
||||||
|
0x000000ff00ff0000, 0xff0000ff00ff00ff, 0xff0000ff00ffff00, 0x000000ff00ffffff,
|
||||||
|
0x000000ffff000000, 0xff0000ffff0000ff, 0xff0000ffff00ff00, 0x000000ffff00ffff,
|
||||||
|
0xff0000ffffff0000, 0x000000ffffff00ff, 0x000000ffffffff00, 0xff0000ffffffffff,
|
||||||
|
0xff00ff0000000000, 0x0000ff00000000ff, 0x0000ff000000ff00, 0xff00ff000000ffff,
|
||||||
|
0x0000ff0000ff0000, 0xff00ff0000ff00ff, 0xff00ff0000ffff00, 0x0000ff0000ffffff,
|
||||||
|
0x0000ff00ff000000, 0xff00ff00ff0000ff, 0xff00ff00ff00ff00, 0x0000ff00ff00ffff,
|
||||||
|
0xff00ff00ffff0000, 0x0000ff00ffff00ff, 0x0000ff00ffffff00, 0xff00ff00ffffffff,
|
||||||
|
0x0000ffff00000000, 0xff00ffff000000ff, 0xff00ffff0000ff00, 0x0000ffff0000ffff,
|
||||||
|
0xff00ffff00ff0000, 0x0000ffff00ff00ff, 0x0000ffff00ffff00, 0xff00ffff00ffffff,
|
||||||
|
0xff00ffffff000000, 0x0000ffffff0000ff, 0x0000ffffff00ff00, 0xff00ffffff00ffff,
|
||||||
|
0x0000ffffffff0000, 0xff00ffffffff00ff, 0xff00ffffffffff00, 0x0000ffffffffffff,
|
||||||
|
0xffff000000000000, 0x00ff0000000000ff, 0x00ff00000000ff00, 0xffff00000000ffff,
|
||||||
|
0x00ff000000ff0000, 0xffff000000ff00ff, 0xffff000000ffff00, 0x00ff000000ffffff,
|
||||||
|
0x00ff0000ff000000, 0xffff0000ff0000ff, 0xffff0000ff00ff00, 0x00ff0000ff00ffff,
|
||||||
|
0xffff0000ffff0000, 0x00ff0000ffff00ff, 0x00ff0000ffffff00, 0xffff0000ffffffff,
|
||||||
|
0x00ff00ff00000000, 0xffff00ff000000ff, 0xffff00ff0000ff00, 0x00ff00ff0000ffff,
|
||||||
|
0xffff00ff00ff0000, 0x00ff00ff00ff00ff, 0x00ff00ff00ffff00, 0xffff00ff00ffffff,
|
||||||
|
0xffff00ffff000000, 0x00ff00ffff0000ff, 0x00ff00ffff00ff00, 0xffff00ffff00ffff,
|
||||||
|
0x00ff00ffffff0000, 0xffff00ffffff00ff, 0xffff00ffffffff00, 0x00ff00ffffffffff,
|
||||||
|
0x00ffff0000000000, 0xffffff00000000ff, 0xffffff000000ff00, 0x00ffff000000ffff,
|
||||||
|
0xffffff0000ff0000, 0x00ffff0000ff00ff, 0x00ffff0000ffff00, 0xffffff0000ffffff,
|
||||||
|
0xffffff00ff000000, 0x00ffff00ff0000ff, 0x00ffff00ff00ff00, 0xffffff00ff00ffff,
|
||||||
|
0x00ffff00ffff0000, 0xffffff00ffff00ff, 0xffffff00ffffff00, 0x00ffff00ffffffff,
|
||||||
|
0xffffffff00000000, 0x00ffffff000000ff, 0x00ffffff0000ff00, 0xffffffff0000ffff,
|
||||||
|
0x00ffffff00ff0000, 0xffffffff00ff00ff, 0xffffffff00ffff00, 0x00ffffff00ffffff,
|
||||||
|
0x00ffffffff000000, 0xffffffffff0000ff, 0xffffffffff00ff00, 0x00ffffffff00ffff,
|
||||||
|
0xffffffffffff0000, 0x00ffffffffff00ff, 0x00ffffffffffff00, 0xffffffffffffffff,
|
||||||
|
GGML_TABLE_END()
|
||||||
|
//#endif
|
||||||
|
|
||||||
|
|
||||||
|
GGML_TABLE_BEGIN(uint64_t, iq2xxs_grid, 256)
|
||||||
|
0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
|
||||||
|
0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x08080808082b0808,
|
||||||
|
0x08080808082b082b, 0x08080808082b2b08, 0x08080808082b2b2b, 0x0808080819080819,
|
||||||
|
0x0808080819081908, 0x0808080819190808, 0x0808080819192b08, 0x08080808192b0819,
|
||||||
|
0x08080808192b1908, 0x080808082b080808, 0x080808082b08082b, 0x080808082b082b2b,
|
||||||
|
0x080808082b2b082b, 0x0808081908080819, 0x0808081908081908, 0x0808081908190808,
|
||||||
|
0x0808081908191919, 0x0808081919080808, 0x080808192b081908, 0x080808192b192b08,
|
||||||
|
0x0808082b08080808, 0x0808082b0808082b, 0x0808082b082b082b, 0x0808082b2b08082b,
|
||||||
|
0x0808190808080819, 0x0808190808081908, 0x0808190808190808, 0x08081908082b0819,
|
||||||
|
0x08081908082b1908, 0x0808190819080808, 0x080819081908082b, 0x0808190819082b08,
|
||||||
|
0x08081908192b0808, 0x080819082b080819, 0x080819082b081908, 0x080819082b190808,
|
||||||
|
0x080819082b2b1908, 0x0808191908080808, 0x080819190808082b, 0x0808191908082b08,
|
||||||
|
0x08081919082b0808, 0x080819191908192b, 0x08081919192b2b19, 0x080819192b080808,
|
||||||
|
0x080819192b190819, 0x0808192b08082b19, 0x0808192b08190808, 0x0808192b19080808,
|
||||||
|
0x0808192b2b081908, 0x0808192b2b2b1908, 0x08082b0808080808, 0x08082b0808081919,
|
||||||
|
0x08082b0808082b08, 0x08082b0808191908, 0x08082b08082b2b08, 0x08082b0819080819,
|
||||||
|
0x08082b0819081908, 0x08082b0819190808, 0x08082b081919082b, 0x08082b082b082b08,
|
||||||
|
0x08082b1908081908, 0x08082b1919080808, 0x08082b2b0808082b, 0x08082b2b08191908,
|
||||||
|
0x0819080808080819, 0x0819080808081908, 0x0819080808190808, 0x08190808082b0819,
|
||||||
|
0x0819080819080808, 0x08190808192b0808, 0x081908082b081908, 0x081908082b190808,
|
||||||
|
0x081908082b191919, 0x0819081908080808, 0x0819081908082b08, 0x08190819082b0808,
|
||||||
|
0x0819081919190808, 0x0819081919192b2b, 0x081908192b080808, 0x0819082b082b1908,
|
||||||
|
0x0819082b19081919, 0x0819190808080808, 0x0819190808082b08, 0x08191908082b0808,
|
||||||
|
0x08191908082b1919, 0x0819190819082b19, 0x081919082b080808, 0x0819191908192b08,
|
||||||
|
0x08191919192b082b, 0x0819192b08080808, 0x0819192b0819192b, 0x08192b0808080819,
|
||||||
|
0x08192b0808081908, 0x08192b0808190808, 0x08192b0819080808, 0x08192b082b080819,
|
||||||
|
0x08192b1908080808, 0x08192b1908081919, 0x08192b192b2b0808, 0x08192b2b19190819,
|
||||||
|
0x082b080808080808, 0x082b08080808082b, 0x082b080808082b2b, 0x082b080819081908,
|
||||||
|
0x082b0808192b0819, 0x082b08082b080808, 0x082b08082b08082b, 0x082b0819082b2b19,
|
||||||
|
0x082b081919082b08, 0x082b082b08080808, 0x082b082b0808082b, 0x082b190808080819,
|
||||||
|
0x082b190808081908, 0x082b190808190808, 0x082b190819080808, 0x082b19081919192b,
|
||||||
|
0x082b191908080808, 0x082b191919080819, 0x082b1919192b1908, 0x082b192b2b190808,
|
||||||
|
0x082b2b0808082b08, 0x082b2b08082b0808, 0x082b2b082b191908, 0x082b2b2b19081908,
|
||||||
|
0x1908080808080819, 0x1908080808081908, 0x1908080808190808, 0x1908080808192b08,
|
||||||
|
0x19080808082b0819, 0x19080808082b1908, 0x1908080819080808, 0x1908080819082b08,
|
||||||
|
0x190808081919192b, 0x19080808192b0808, 0x190808082b080819, 0x190808082b081908,
|
||||||
|
0x190808082b190808, 0x1908081908080808, 0x19080819082b0808, 0x19080819192b0819,
|
||||||
|
0x190808192b080808, 0x190808192b081919, 0x1908082b08080819, 0x1908082b08190808,
|
||||||
|
0x1908082b19082b08, 0x1908082b1919192b, 0x1908082b192b2b08, 0x1908190808080808,
|
||||||
|
0x1908190808082b08, 0x19081908082b0808, 0x190819082b080808, 0x190819082b192b19,
|
||||||
|
0x190819190819082b, 0x19081919082b1908, 0x1908192b08080808, 0x19082b0808080819,
|
||||||
|
0x19082b0808081908, 0x19082b0808190808, 0x19082b0819080808, 0x19082b0819081919,
|
||||||
|
0x19082b1908080808, 0x19082b1919192b08, 0x19082b19192b0819, 0x19082b192b08082b,
|
||||||
|
0x19082b2b19081919, 0x19082b2b2b190808, 0x1919080808080808, 0x1919080808082b08,
|
||||||
|
0x1919080808190819, 0x1919080808192b19, 0x19190808082b0808, 0x191908082b080808,
|
||||||
|
0x191908082b082b08, 0x1919081908081908, 0x191908191908082b, 0x191908192b2b1908,
|
||||||
|
0x1919082b2b190819, 0x191919082b190808, 0x191919082b19082b, 0x1919191908082b2b,
|
||||||
|
0x1919192b08080819, 0x1919192b19191908, 0x19192b0808080808, 0x19192b0808190819,
|
||||||
|
0x19192b0808192b19, 0x19192b08192b1908, 0x19192b1919080808, 0x19192b2b08082b08,
|
||||||
|
0x192b080808081908, 0x192b080808190808, 0x192b080819080808, 0x192b0808192b2b08,
|
||||||
|
0x192b081908080808, 0x192b081919191919, 0x192b082b08192b08, 0x192b082b192b0808,
|
||||||
|
0x192b190808080808, 0x192b190808081919, 0x192b191908190808, 0x192b19190819082b,
|
||||||
|
0x192b19192b081908, 0x192b2b081908082b, 0x2b08080808080808, 0x2b0808080808082b,
|
||||||
|
0x2b08080808082b2b, 0x2b08080819080819, 0x2b0808082b08082b, 0x2b08081908081908,
|
||||||
|
0x2b08081908192b08, 0x2b08081919080808, 0x2b08082b08190819, 0x2b08190808080819,
|
||||||
|
0x2b08190808081908, 0x2b08190808190808, 0x2b08190808191919, 0x2b08190819080808,
|
||||||
|
0x2b081908192b0808, 0x2b08191908080808, 0x2b0819191908192b, 0x2b0819192b191908,
|
||||||
|
0x2b08192b08082b19, 0x2b08192b19080808, 0x2b08192b192b0808, 0x2b082b080808082b,
|
||||||
|
0x2b082b1908081908, 0x2b082b2b08190819, 0x2b19080808081908, 0x2b19080808190808,
|
||||||
|
0x2b190808082b1908, 0x2b19080819080808, 0x2b1908082b2b0819, 0x2b1908190819192b,
|
||||||
|
0x2b1908192b080808, 0x2b19082b19081919, 0x2b19190808080808, 0x2b191908082b082b,
|
||||||
|
0x2b19190819081908, 0x2b19191919190819, 0x2b192b082b080819, 0x2b192b19082b0808,
|
||||||
|
0x2b2b08080808082b, 0x2b2b080819190808, 0x2b2b08082b081919, 0x2b2b081908082b19,
|
||||||
|
0x2b2b082b08080808, 0x2b2b190808192b08, 0x2b2b2b0819190808, 0x2b2b2b1908081908,
|
||||||
|
GGML_TABLE_END()
|
||||||
|
|
||||||
|
GGML_TABLE_BEGIN(uint64_t, iq2xs_grid, 512)
|
||||||
|
0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
|
||||||
|
0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x080808080819192b,
|
||||||
|
0x0808080808192b19, 0x08080808082b0808, 0x08080808082b082b, 0x08080808082b1919,
|
||||||
|
0x08080808082b2b08, 0x0808080819080819, 0x0808080819081908, 0x080808081908192b,
|
||||||
|
0x0808080819082b19, 0x0808080819190808, 0x080808081919082b, 0x0808080819191919,
|
||||||
|
0x0808080819192b08, 0x08080808192b0819, 0x08080808192b1908, 0x080808082b080808,
|
||||||
|
0x080808082b08082b, 0x080808082b081919, 0x080808082b082b08, 0x080808082b190819,
|
||||||
|
0x080808082b191908, 0x080808082b192b19, 0x080808082b2b0808, 0x0808081908080819,
|
||||||
|
0x0808081908081908, 0x080808190808192b, 0x0808081908082b19, 0x0808081908190808,
|
||||||
|
0x080808190819082b, 0x0808081908191919, 0x0808081908192b08, 0x0808081908192b2b,
|
||||||
|
0x08080819082b0819, 0x08080819082b1908, 0x0808081919080808, 0x080808191908082b,
|
||||||
|
0x0808081919081919, 0x0808081919082b08, 0x0808081919190819, 0x0808081919191908,
|
||||||
|
0x08080819192b0808, 0x08080819192b2b08, 0x080808192b080819, 0x080808192b081908,
|
||||||
|
0x080808192b190808, 0x0808082b08080808, 0x0808082b0808082b, 0x0808082b08081919,
|
||||||
|
0x0808082b08082b08, 0x0808082b08190819, 0x0808082b08191908, 0x0808082b082b0808,
|
||||||
|
0x0808082b19080819, 0x0808082b19081908, 0x0808082b19190808, 0x0808082b19191919,
|
||||||
|
0x0808082b2b080808, 0x0808082b2b082b2b, 0x0808190808080819, 0x0808190808081908,
|
||||||
|
0x080819080808192b, 0x0808190808082b19, 0x0808190808190808, 0x080819080819082b,
|
||||||
|
0x0808190808191919, 0x0808190808192b08, 0x08081908082b0819, 0x08081908082b1908,
|
||||||
|
0x0808190819080808, 0x080819081908082b, 0x0808190819081919, 0x0808190819082b08,
|
||||||
|
0x0808190819190819, 0x0808190819191908, 0x080819081919192b, 0x08081908192b0808,
|
||||||
|
0x080819082b080819, 0x080819082b081908, 0x080819082b190808, 0x0808191908080808,
|
||||||
|
0x080819190808082b, 0x0808191908081919, 0x0808191908082b08, 0x0808191908190819,
|
||||||
|
0x0808191908191908, 0x08081919082b0808, 0x0808191919080819, 0x0808191919081908,
|
||||||
|
0x0808191919190808, 0x08081919192b0819, 0x080819192b080808, 0x0808192b08080819,
|
||||||
|
0x0808192b08081908, 0x0808192b08190808, 0x0808192b082b192b, 0x0808192b19080808,
|
||||||
|
0x0808192b1908082b, 0x0808192b2b081908, 0x08082b0808080808, 0x08082b080808082b,
|
||||||
|
0x08082b0808081919, 0x08082b0808082b08, 0x08082b0808082b2b, 0x08082b0808190819,
|
||||||
|
0x08082b0808191908, 0x08082b08082b0808, 0x08082b08082b1919, 0x08082b0819080819,
|
||||||
|
0x08082b0819081908, 0x08082b0819190808, 0x08082b0819192b08, 0x08082b082b080808,
|
||||||
|
0x08082b082b2b0808, 0x08082b082b2b2b2b, 0x08082b1908080819, 0x08082b1908081908,
|
||||||
|
0x08082b1908190808, 0x08082b1919080808, 0x08082b192b080819, 0x08082b192b082b19,
|
||||||
|
0x08082b2b08080808, 0x08082b2b082b0808, 0x08082b2b082b2b08, 0x08082b2b2b19192b,
|
||||||
|
0x08082b2b2b2b0808, 0x0819080808080819, 0x0819080808081908, 0x081908080808192b,
|
||||||
|
0x0819080808082b19, 0x0819080808190808, 0x081908080819082b, 0x0819080808191919,
|
||||||
|
0x0819080808192b08, 0x08190808082b0819, 0x08190808082b1908, 0x0819080819080808,
|
||||||
|
0x081908081908082b, 0x0819080819081919, 0x0819080819082b08, 0x0819080819190819,
|
||||||
|
0x0819080819191908, 0x08190808192b0808, 0x08190808192b2b2b, 0x081908082b080819,
|
||||||
|
0x081908082b081908, 0x081908082b190808, 0x0819081908080808, 0x081908190808082b,
|
||||||
|
0x0819081908081919, 0x0819081908082b08, 0x0819081908190819, 0x0819081908191908,
|
||||||
|
0x08190819082b0808, 0x0819081919080819, 0x0819081919081908, 0x0819081919190808,
|
||||||
|
0x081908192b080808, 0x081908192b191908, 0x081908192b19192b, 0x0819082b08080819,
|
||||||
|
0x0819082b08081908, 0x0819082b0808192b, 0x0819082b08190808, 0x0819082b19080808,
|
||||||
|
0x0819082b192b0808, 0x0819190808080808, 0x081919080808082b, 0x0819190808081919,
|
||||||
|
0x0819190808082b08, 0x0819190808190819, 0x0819190808191908, 0x08191908082b0808,
|
||||||
|
0x0819190819080819, 0x0819190819081908, 0x0819190819082b19, 0x0819190819190808,
|
||||||
|
0x08191908192b1908, 0x081919082b080808, 0x0819191908080819, 0x0819191908081908,
|
||||||
|
0x0819191908190808, 0x0819191919080808, 0x0819192b08080808, 0x0819192b08191908,
|
||||||
|
0x0819192b19082b19, 0x08192b0808080819, 0x08192b0808081908, 0x08192b0808190808,
|
||||||
|
0x08192b080819082b, 0x08192b0819080808, 0x08192b0819191908, 0x08192b082b08192b,
|
||||||
|
0x08192b1908080808, 0x08192b1908081919, 0x08192b19192b192b, 0x08192b2b19190819,
|
||||||
|
0x08192b2b2b2b2b19, 0x082b080808080808, 0x082b08080808082b, 0x082b080808081919,
|
||||||
|
0x082b080808082b08, 0x082b080808082b2b, 0x082b080808190819, 0x082b080808191908,
|
||||||
|
0x082b0808082b0808, 0x082b080819080819, 0x082b080819081908, 0x082b080819190808,
|
||||||
|
0x082b08082b080808, 0x082b08082b2b0808, 0x082b081908080819, 0x082b081908081908,
|
||||||
|
0x082b081908190808, 0x082b081919080808, 0x082b081919082b08, 0x082b0819192b1919,
|
||||||
|
0x082b082b08080808, 0x082b082b082b082b, 0x082b082b2b080808, 0x082b082b2b2b2b08,
|
||||||
|
0x082b190808080819, 0x082b190808081908, 0x082b190808190808, 0x082b1908082b2b19,
|
||||||
|
0x082b190819080808, 0x082b191908080808, 0x082b191919080819, 0x082b19191919082b,
|
||||||
|
0x082b19192b192b19, 0x082b192b08080819, 0x082b192b08192b2b, 0x082b192b2b2b192b,
|
||||||
|
0x082b2b0808080808, 0x082b2b0808082b08, 0x082b2b0808082b2b, 0x082b2b08082b0808,
|
||||||
|
0x082b2b0819191919, 0x082b2b082b082b08, 0x082b2b082b2b082b, 0x082b2b19192b2b08,
|
||||||
|
0x082b2b192b190808, 0x082b2b2b08082b08, 0x082b2b2b082b0808, 0x082b2b2b2b08082b,
|
||||||
|
0x082b2b2b2b082b08, 0x082b2b2b2b082b2b, 0x1908080808080819, 0x1908080808081908,
|
||||||
|
0x190808080808192b, 0x1908080808082b19, 0x1908080808190808, 0x190808080819082b,
|
||||||
|
0x1908080808191919, 0x1908080808192b08, 0x19080808082b0819, 0x19080808082b1908,
|
||||||
|
0x1908080819080808, 0x190808081908082b, 0x1908080819081919, 0x1908080819082b08,
|
||||||
|
0x1908080819082b2b, 0x1908080819190819, 0x1908080819191908, 0x19080808192b0808,
|
||||||
|
0x19080808192b1919, 0x190808082b080819, 0x190808082b081908, 0x190808082b190808,
|
||||||
|
0x1908081908080808, 0x190808190808082b, 0x1908081908081919, 0x1908081908082b08,
|
||||||
|
0x1908081908190819, 0x1908081908191908, 0x19080819082b0808, 0x1908081919080819,
|
||||||
|
0x1908081919081908, 0x1908081919190808, 0x190808192b080808, 0x190808192b081919,
|
||||||
|
0x190808192b2b082b, 0x1908082b08080819, 0x1908082b08081908, 0x1908082b08190808,
|
||||||
|
0x1908082b0819082b, 0x1908082b082b2b19, 0x1908082b19080808, 0x1908190808080808,
|
||||||
|
0x190819080808082b, 0x1908190808081919, 0x1908190808082b08, 0x1908190808190819,
|
||||||
|
0x1908190808191908, 0x1908190808192b19, 0x19081908082b0808, 0x1908190819080819,
|
||||||
|
0x1908190819081908, 0x1908190819190808, 0x190819082b080808, 0x190819082b191908,
|
||||||
|
0x1908191908080819, 0x1908191908081908, 0x1908191908190808, 0x19081919082b1908,
|
||||||
|
0x1908191919080808, 0x190819192b192b2b, 0x1908192b08080808, 0x1908192b08082b2b,
|
||||||
|
0x1908192b19081908, 0x1908192b19190808, 0x19082b0808080819, 0x19082b0808081908,
|
||||||
|
0x19082b0808190808, 0x19082b0819080808, 0x19082b0819081919, 0x19082b0819191908,
|
||||||
|
0x19082b08192b082b, 0x19082b1908080808, 0x19082b1908190819, 0x19082b1919081908,
|
||||||
|
0x19082b1919190808, 0x19082b19192b2b19, 0x19082b2b08081908, 0x1919080808080808,
|
||||||
|
0x191908080808082b, 0x1919080808081919, 0x1919080808082b08, 0x1919080808190819,
|
||||||
|
0x1919080808191908, 0x19190808082b0808, 0x19190808082b2b08, 0x1919080819080819,
|
||||||
|
0x1919080819081908, 0x1919080819190808, 0x191908082b080808, 0x1919081908080819,
|
||||||
|
0x1919081908081908, 0x1919081908190808, 0x1919081908191919, 0x1919081919080808,
|
||||||
|
0x191908191908082b, 0x1919082b08080808, 0x1919082b19081908, 0x1919082b2b2b2b2b,
|
||||||
|
0x1919190808080819, 0x1919190808081908, 0x1919190808190808, 0x19191908082b0819,
|
||||||
|
0x1919190819080808, 0x19191908192b0808, 0x191919082b080819, 0x191919082b2b0819,
|
||||||
|
0x1919191908080808, 0x1919191908082b08, 0x191919192b080808, 0x191919192b082b08,
|
||||||
|
0x1919192b082b0819, 0x1919192b192b2b08, 0x1919192b2b2b0819, 0x19192b0808080808,
|
||||||
|
0x19192b0808191908, 0x19192b0819080819, 0x19192b0819190808, 0x19192b082b192b19,
|
||||||
|
0x19192b1908192b2b, 0x19192b1919080808, 0x19192b191908082b, 0x19192b2b2b081919,
|
||||||
|
0x192b080808080819, 0x192b080808081908, 0x192b080808190808, 0x192b080819080808,
|
||||||
|
0x192b080819191908, 0x192b0808192b082b, 0x192b08082b08192b, 0x192b08082b2b2b19,
|
||||||
|
0x192b081908080808, 0x192b082b082b1908, 0x192b082b19082b2b, 0x192b082b2b19082b,
|
||||||
|
0x192b190808080808, 0x192b19080819192b, 0x192b191908190808, 0x192b191919080808,
|
||||||
|
0x192b191919081919, 0x192b19192b2b1908, 0x192b2b0808080819, 0x192b2b08192b2b2b,
|
||||||
|
0x192b2b19082b1919, 0x192b2b2b0808192b, 0x192b2b2b19191908, 0x192b2b2b192b082b,
|
||||||
|
0x2b08080808080808, 0x2b0808080808082b, 0x2b08080808081919, 0x2b08080808082b08,
|
||||||
|
0x2b08080808190819, 0x2b08080808191908, 0x2b080808082b0808, 0x2b080808082b2b2b,
|
||||||
|
0x2b08080819080819, 0x2b08080819081908, 0x2b08080819190808, 0x2b0808082b080808,
|
||||||
|
0x2b0808082b08082b, 0x2b0808082b2b2b08, 0x2b0808082b2b2b2b, 0x2b08081908080819,
|
||||||
|
0x2b08081908081908, 0x2b0808190808192b, 0x2b08081908190808, 0x2b08081919080808,
|
||||||
|
0x2b08081919190819, 0x2b08081919192b19, 0x2b08082b08080808, 0x2b08082b082b0808,
|
||||||
|
0x2b08082b2b080808, 0x2b08082b2b08082b, 0x2b08082b2b2b0808, 0x2b08082b2b2b2b08,
|
||||||
|
0x2b08190808080819, 0x2b08190808081908, 0x2b08190808190808, 0x2b0819080819082b,
|
||||||
|
0x2b08190808191919, 0x2b08190819080808, 0x2b081908192b0808, 0x2b0819082b082b19,
|
||||||
|
0x2b08191908080808, 0x2b08191919081908, 0x2b0819192b2b1919, 0x2b08192b08192b08,
|
||||||
|
0x2b08192b192b2b2b, 0x2b082b0808080808, 0x2b082b0808082b08, 0x2b082b08082b1919,
|
||||||
|
0x2b082b0819192b2b, 0x2b082b082b080808, 0x2b082b082b08082b, 0x2b082b082b2b2b08,
|
||||||
|
0x2b082b190808192b, 0x2b082b2b082b082b, 0x2b082b2b2b080808, 0x2b082b2b2b082b08,
|
||||||
|
0x2b082b2b2b19192b, 0x2b082b2b2b2b2b08, 0x2b19080808080819, 0x2b19080808081908,
|
||||||
|
0x2b19080808190808, 0x2b19080819080808, 0x2b1908081919192b, 0x2b1908082b081908,
|
||||||
|
0x2b19081908080808, 0x2b190819082b082b, 0x2b190819192b1908, 0x2b19082b1919192b,
|
||||||
|
0x2b19082b2b082b19, 0x2b19190808080808, 0x2b19190808081919, 0x2b19190819081908,
|
||||||
|
0x2b19190819190808, 0x2b19190819192b08, 0x2b191919082b2b19, 0x2b1919192b190808,
|
||||||
|
0x2b1919192b19082b, 0x2b19192b19080819, 0x2b192b0819190819, 0x2b192b082b2b192b,
|
||||||
|
0x2b192b1919082b19, 0x2b192b2b08191919, 0x2b192b2b192b0808, 0x2b2b080808080808,
|
||||||
|
0x2b2b08080808082b, 0x2b2b080808082b08, 0x2b2b080808082b2b, 0x2b2b0808082b0808,
|
||||||
|
0x2b2b0808082b2b2b, 0x2b2b08082b2b0808, 0x2b2b081919190819, 0x2b2b081919192b19,
|
||||||
|
0x2b2b08192b2b192b, 0x2b2b082b08080808, 0x2b2b082b0808082b, 0x2b2b082b08082b08,
|
||||||
|
0x2b2b082b082b2b2b, 0x2b2b082b2b080808, 0x2b2b082b2b2b0808, 0x2b2b190819080808,
|
||||||
|
0x2b2b19082b191919, 0x2b2b192b192b1919, 0x2b2b192b2b192b08, 0x2b2b2b0808082b2b,
|
||||||
|
0x2b2b2b08082b0808, 0x2b2b2b08082b082b, 0x2b2b2b08082b2b08, 0x2b2b2b082b2b0808,
|
||||||
|
0x2b2b2b082b2b2b08, 0x2b2b2b1908081908, 0x2b2b2b192b081908, 0x2b2b2b192b08192b,
|
||||||
|
0x2b2b2b2b082b2b08, 0x2b2b2b2b082b2b2b, 0x2b2b2b2b2b190819, 0x2b2b2b2b2b2b2b2b,
|
||||||
|
GGML_TABLE_END()
|
||||||
|
|
||||||
|
GGML_TABLE_BEGIN(uint64_t, iq2s_grid, 1024)
|
||||||
|
0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
|
||||||
|
0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x080808080819192b,
|
||||||
|
0x0808080808192b19, 0x08080808082b0808, 0x08080808082b082b, 0x08080808082b1919,
|
||||||
|
0x08080808082b2b08, 0x0808080819080819, 0x0808080819081908, 0x080808081908192b,
|
||||||
|
0x0808080819082b19, 0x0808080819190808, 0x080808081919082b, 0x0808080819191919,
|
||||||
|
0x0808080819192b08, 0x08080808192b0819, 0x08080808192b1908, 0x08080808192b192b,
|
||||||
|
0x08080808192b2b19, 0x080808082b080808, 0x080808082b08082b, 0x080808082b081919,
|
||||||
|
0x080808082b082b08, 0x080808082b190819, 0x080808082b191908, 0x080808082b2b0808,
|
||||||
|
0x080808082b2b1919, 0x080808082b2b2b2b, 0x0808081908080819, 0x0808081908081908,
|
||||||
|
0x080808190808192b, 0x0808081908082b19, 0x0808081908190808, 0x080808190819082b,
|
||||||
|
0x0808081908191919, 0x0808081908192b08, 0x08080819082b0819, 0x08080819082b1908,
|
||||||
|
0x0808081919080808, 0x080808191908082b, 0x0808081919081919, 0x0808081919082b08,
|
||||||
|
0x0808081919190819, 0x0808081919191908, 0x080808191919192b, 0x0808081919192b19,
|
||||||
|
0x08080819192b0808, 0x08080819192b1919, 0x08080819192b2b08, 0x080808192b080819,
|
||||||
|
0x080808192b081908, 0x080808192b190808, 0x080808192b19082b, 0x080808192b191919,
|
||||||
|
0x080808192b2b0819, 0x080808192b2b1908, 0x0808082b08080808, 0x0808082b0808082b,
|
||||||
|
0x0808082b08081919, 0x0808082b08082b08, 0x0808082b08190819, 0x0808082b08191908,
|
||||||
|
0x0808082b082b0808, 0x0808082b082b2b2b, 0x0808082b19080819, 0x0808082b19081908,
|
||||||
|
0x0808082b1908192b, 0x0808082b19082b19, 0x0808082b19190808, 0x0808082b19191919,
|
||||||
|
0x0808082b2b080808, 0x0808082b2b081919, 0x0808082b2b082b2b, 0x0808082b2b191908,
|
||||||
|
0x0808082b2b2b082b, 0x0808190808080819, 0x0808190808081908, 0x080819080808192b,
|
||||||
|
0x0808190808082b19, 0x0808190808190808, 0x080819080819082b, 0x0808190808191919,
|
||||||
|
0x0808190808192b08, 0x08081908082b0819, 0x08081908082b1908, 0x08081908082b192b,
|
||||||
|
0x08081908082b2b19, 0x0808190819080808, 0x080819081908082b, 0x0808190819081919,
|
||||||
|
0x0808190819082b08, 0x0808190819082b2b, 0x0808190819190819, 0x0808190819191908,
|
||||||
|
0x080819081919192b, 0x0808190819192b19, 0x08081908192b0808, 0x08081908192b082b,
|
||||||
|
0x08081908192b1919, 0x080819082b080819, 0x080819082b081908, 0x080819082b08192b,
|
||||||
|
0x080819082b082b19, 0x080819082b190808, 0x080819082b191919, 0x080819082b192b08,
|
||||||
|
0x080819082b2b0819, 0x080819082b2b1908, 0x0808191908080808, 0x080819190808082b,
|
||||||
|
0x0808191908081919, 0x0808191908082b08, 0x0808191908082b2b, 0x0808191908190819,
|
||||||
|
0x0808191908191908, 0x080819190819192b, 0x0808191908192b19, 0x08081919082b0808,
|
||||||
|
0x08081919082b1919, 0x08081919082b2b08, 0x0808191919080819, 0x0808191919081908,
|
||||||
|
0x080819191908192b, 0x0808191919082b19, 0x0808191919190808, 0x080819191919082b,
|
||||||
|
0x0808191919191919, 0x0808191919192b08, 0x08081919192b0819, 0x08081919192b1908,
|
||||||
|
0x080819192b080808, 0x080819192b08082b, 0x080819192b081919, 0x080819192b082b08,
|
||||||
|
0x080819192b190819, 0x080819192b191908, 0x080819192b2b0808, 0x0808192b08080819,
|
||||||
|
0x0808192b08081908, 0x0808192b0808192b, 0x0808192b08082b19, 0x0808192b08190808,
|
||||||
|
0x0808192b08191919, 0x0808192b19080808, 0x0808192b19081919, 0x0808192b19082b08,
|
||||||
|
0x0808192b19190819, 0x0808192b19191908, 0x0808192b192b0808, 0x0808192b2b080819,
|
||||||
|
0x0808192b2b081908, 0x0808192b2b190808, 0x08082b0808080808, 0x08082b080808082b,
|
||||||
|
0x08082b0808081919, 0x08082b0808082b08, 0x08082b0808190819, 0x08082b0808191908,
|
||||||
|
0x08082b080819192b, 0x08082b0808192b19, 0x08082b08082b0808, 0x08082b08082b1919,
|
||||||
|
0x08082b08082b2b2b, 0x08082b0819080819, 0x08082b0819081908, 0x08082b081908192b,
|
||||||
|
0x08082b0819082b19, 0x08082b0819190808, 0x08082b081919082b, 0x08082b0819191919,
|
||||||
|
0x08082b0819192b08, 0x08082b08192b0819, 0x08082b08192b1908, 0x08082b082b080808,
|
||||||
|
0x08082b082b081919, 0x08082b082b191908, 0x08082b082b2b2b2b, 0x08082b1908080819,
|
||||||
|
0x08082b1908081908, 0x08082b1908190808, 0x08082b190819082b, 0x08082b1908191919,
|
||||||
|
0x08082b1908192b08, 0x08082b19082b0819, 0x08082b1919080808, 0x08082b1919081919,
|
||||||
|
0x08082b1919082b08, 0x08082b1919190819, 0x08082b1919191908, 0x08082b19192b0808,
|
||||||
|
0x08082b192b080819, 0x08082b192b190808, 0x08082b2b08080808, 0x08082b2b08190819,
|
||||||
|
0x08082b2b08191908, 0x08082b2b082b082b, 0x08082b2b082b2b08, 0x08082b2b082b2b2b,
|
||||||
|
0x08082b2b19190808, 0x08082b2b2b192b19, 0x0819080808080819, 0x0819080808081908,
|
||||||
|
0x081908080808192b, 0x0819080808082b19, 0x0819080808190808, 0x081908080819082b,
|
||||||
|
0x0819080808191919, 0x0819080808192b08, 0x08190808082b0819, 0x08190808082b1908,
|
||||||
|
0x08190808082b192b, 0x0819080819080808, 0x081908081908082b, 0x0819080819081919,
|
||||||
|
0x0819080819082b08, 0x0819080819190819, 0x0819080819191908, 0x081908081919192b,
|
||||||
|
0x0819080819192b19, 0x08190808192b0808, 0x08190808192b082b, 0x08190808192b1919,
|
||||||
|
0x08190808192b2b08, 0x081908082b080819, 0x081908082b081908, 0x081908082b08192b,
|
||||||
|
0x081908082b190808, 0x081908082b191919, 0x081908082b192b08, 0x081908082b2b0819,
|
||||||
|
0x081908082b2b1908, 0x0819081908080808, 0x081908190808082b, 0x0819081908081919,
|
||||||
|
0x0819081908082b08, 0x0819081908082b2b, 0x0819081908190819, 0x0819081908191908,
|
||||||
|
0x081908190819192b, 0x0819081908192b19, 0x08190819082b0808, 0x08190819082b082b,
|
||||||
|
0x08190819082b1919, 0x08190819082b2b08, 0x0819081919080819, 0x0819081919081908,
|
||||||
|
0x081908191908192b, 0x0819081919082b19, 0x0819081919190808, 0x081908191919082b,
|
||||||
|
0x0819081919191919, 0x0819081919192b08, 0x08190819192b0819, 0x08190819192b1908,
|
||||||
|
0x081908192b080808, 0x081908192b08082b, 0x081908192b081919, 0x081908192b082b08,
|
||||||
|
0x081908192b190819, 0x081908192b191908, 0x0819082b08080819, 0x0819082b08081908,
|
||||||
|
0x0819082b08082b19, 0x0819082b08190808, 0x0819082b08191919, 0x0819082b082b0819,
|
||||||
|
0x0819082b082b1908, 0x0819082b19080808, 0x0819082b19081919, 0x0819082b19190819,
|
||||||
|
0x0819082b19191908, 0x0819082b2b080819, 0x0819082b2b081908, 0x0819082b2b190808,
|
||||||
|
0x0819190808080808, 0x081919080808082b, 0x0819190808081919, 0x0819190808082b08,
|
||||||
|
0x0819190808190819, 0x0819190808191908, 0x081919080819192b, 0x0819190808192b19,
|
||||||
|
0x08191908082b0808, 0x08191908082b1919, 0x08191908082b2b08, 0x0819190819080819,
|
||||||
|
0x0819190819081908, 0x081919081908192b, 0x0819190819082b19, 0x0819190819190808,
|
||||||
|
0x081919081919082b, 0x0819190819191919, 0x0819190819192b08, 0x08191908192b0819,
|
||||||
|
0x08191908192b1908, 0x081919082b080808, 0x081919082b08082b, 0x081919082b081919,
|
||||||
|
0x081919082b082b08, 0x081919082b190819, 0x081919082b191908, 0x081919082b2b0808,
|
||||||
|
0x0819191908080819, 0x0819191908081908, 0x081919190808192b, 0x0819191908082b19,
|
||||||
|
0x0819191908190808, 0x081919190819082b, 0x0819191908191919, 0x0819191908192b08,
|
||||||
|
0x08191919082b0819, 0x08191919082b1908, 0x0819191919080808, 0x081919191908082b,
|
||||||
|
0x0819191919081919, 0x0819191919082b08, 0x0819191919190819, 0x0819191919191908,
|
||||||
|
0x08191919192b0808, 0x081919192b080819, 0x081919192b081908, 0x081919192b190808,
|
||||||
|
0x0819192b08080808, 0x0819192b08081919, 0x0819192b08082b08, 0x0819192b08190819,
|
||||||
|
0x0819192b08191908, 0x0819192b082b0808, 0x0819192b19080819, 0x0819192b19081908,
|
||||||
|
0x0819192b19190808, 0x0819192b2b080808, 0x0819192b2b2b2b2b, 0x08192b0808080819,
|
||||||
|
0x08192b0808081908, 0x08192b080808192b, 0x08192b0808082b19, 0x08192b0808190808,
|
||||||
|
0x08192b0808191919, 0x08192b0808192b08, 0x08192b08082b0819, 0x08192b0819080808,
|
||||||
|
0x08192b081908082b, 0x08192b0819081919, 0x08192b0819082b08, 0x08192b0819190819,
|
||||||
|
0x08192b0819191908, 0x08192b08192b0808, 0x08192b082b080819, 0x08192b082b081908,
|
||||||
|
0x08192b1908080808, 0x08192b190808082b, 0x08192b1908081919, 0x08192b1908082b08,
|
||||||
|
0x08192b1908190819, 0x08192b1908191908, 0x08192b19082b0808, 0x08192b1919080819,
|
||||||
|
0x08192b1919081908, 0x08192b1919190808, 0x08192b19192b2b19, 0x08192b192b2b082b,
|
||||||
|
0x08192b2b08081908, 0x08192b2b08190808, 0x08192b2b19080808, 0x08192b2b1919192b,
|
||||||
|
0x082b080808080808, 0x082b08080808082b, 0x082b080808081919, 0x082b080808082b08,
|
||||||
|
0x082b080808190819, 0x082b080808191908, 0x082b08080819192b, 0x082b080808192b19,
|
||||||
|
0x082b0808082b0808, 0x082b0808082b1919, 0x082b0808082b2b2b, 0x082b080819080819,
|
||||||
|
0x082b080819081908, 0x082b080819190808, 0x082b08081919082b, 0x082b080819191919,
|
||||||
|
0x082b0808192b1908, 0x082b08082b080808, 0x082b08082b082b2b, 0x082b08082b191908,
|
||||||
|
0x082b08082b2b2b2b, 0x082b081908080819, 0x082b081908081908, 0x082b081908190808,
|
||||||
|
0x082b08190819082b, 0x082b081908191919, 0x082b0819082b0819, 0x082b081919080808,
|
||||||
|
0x082b08191908082b, 0x082b081919081919, 0x082b081919190819, 0x082b081919191908,
|
||||||
|
0x082b0819192b0808, 0x082b08192b080819, 0x082b08192b081908, 0x082b08192b190808,
|
||||||
|
0x082b082b08080808, 0x082b082b08082b2b, 0x082b082b082b082b, 0x082b082b082b2b08,
|
||||||
|
0x082b082b082b2b2b, 0x082b082b19081908, 0x082b082b19190808, 0x082b082b2b082b08,
|
||||||
|
0x082b082b2b082b2b, 0x082b082b2b2b2b08, 0x082b190808080819, 0x082b190808081908,
|
||||||
|
0x082b19080808192b, 0x082b190808082b19, 0x082b190808190808, 0x082b190808191919,
|
||||||
|
0x082b190808192b08, 0x082b1908082b0819, 0x082b1908082b1908, 0x082b190819080808,
|
||||||
|
0x082b19081908082b, 0x082b190819081919, 0x082b190819082b08, 0x082b190819190819,
|
||||||
|
0x082b190819191908, 0x082b1908192b0808, 0x082b19082b080819, 0x082b19082b081908,
|
||||||
|
0x082b19082b190808, 0x082b191908080808, 0x082b191908081919, 0x082b191908082b08,
|
||||||
|
0x082b191908190819, 0x082b191908191908, 0x082b1919082b0808, 0x082b191919080819,
|
||||||
|
0x082b191919081908, 0x082b191919190808, 0x082b1919192b192b, 0x082b19192b080808,
|
||||||
|
0x082b192b08080819, 0x082b192b08081908, 0x082b192b08190808, 0x082b192b19080808,
|
||||||
|
0x082b192b19192b19, 0x082b2b0808080808, 0x082b2b0808081919, 0x082b2b0808190819,
|
||||||
|
0x082b2b0808191908, 0x082b2b0819080819, 0x082b2b0819081908, 0x082b2b0819190808,
|
||||||
|
0x082b2b082b082b2b, 0x082b2b082b2b2b2b, 0x082b2b1908080819, 0x082b2b1908081908,
|
||||||
|
0x082b2b1908190808, 0x082b2b192b191919, 0x082b2b2b08082b2b, 0x082b2b2b082b082b,
|
||||||
|
0x082b2b2b192b1908, 0x082b2b2b2b082b08, 0x082b2b2b2b082b2b, 0x1908080808080819,
|
||||||
|
0x1908080808081908, 0x190808080808192b, 0x1908080808082b19, 0x1908080808190808,
|
||||||
|
0x190808080819082b, 0x1908080808191919, 0x1908080808192b08, 0x1908080808192b2b,
|
||||||
|
0x19080808082b0819, 0x19080808082b1908, 0x19080808082b192b, 0x1908080819080808,
|
||||||
|
0x190808081908082b, 0x1908080819081919, 0x1908080819082b08, 0x1908080819082b2b,
|
||||||
|
0x1908080819190819, 0x1908080819191908, 0x190808081919192b, 0x1908080819192b19,
|
||||||
|
0x19080808192b0808, 0x19080808192b082b, 0x19080808192b1919, 0x190808082b080819,
|
||||||
|
0x190808082b081908, 0x190808082b190808, 0x190808082b191919, 0x190808082b192b08,
|
||||||
|
0x190808082b2b0819, 0x190808082b2b1908, 0x1908081908080808, 0x190808190808082b,
|
||||||
|
0x1908081908081919, 0x1908081908082b08, 0x1908081908190819, 0x1908081908191908,
|
||||||
|
0x190808190819192b, 0x1908081908192b19, 0x19080819082b0808, 0x19080819082b082b,
|
||||||
|
0x19080819082b1919, 0x1908081919080819, 0x1908081919081908, 0x190808191908192b,
|
||||||
|
0x1908081919082b19, 0x1908081919190808, 0x190808191919082b, 0x1908081919191919,
|
||||||
|
0x1908081919192b08, 0x19080819192b0819, 0x19080819192b1908, 0x190808192b080808,
|
||||||
|
0x190808192b08082b, 0x190808192b081919, 0x190808192b082b08, 0x190808192b190819,
|
||||||
|
0x190808192b191908, 0x190808192b2b0808, 0x1908082b08080819, 0x1908082b08081908,
|
||||||
|
0x1908082b08190808, 0x1908082b0819082b, 0x1908082b08191919, 0x1908082b08192b08,
|
||||||
|
0x1908082b082b1908, 0x1908082b19080808, 0x1908082b19081919, 0x1908082b19082b08,
|
||||||
|
0x1908082b19190819, 0x1908082b19191908, 0x1908082b192b0808, 0x1908082b2b080819,
|
||||||
|
0x1908082b2b081908, 0x1908190808080808, 0x190819080808082b, 0x1908190808081919,
|
||||||
|
0x1908190808082b08, 0x1908190808082b2b, 0x1908190808190819, 0x1908190808191908,
|
||||||
|
0x190819080819192b, 0x1908190808192b19, 0x19081908082b0808, 0x19081908082b082b,
|
||||||
|
0x19081908082b1919, 0x19081908082b2b08, 0x1908190819080819, 0x1908190819081908,
|
||||||
|
0x190819081908192b, 0x1908190819082b19, 0x1908190819190808, 0x190819081919082b,
|
||||||
|
0x1908190819191919, 0x1908190819192b08, 0x19081908192b0819, 0x19081908192b1908,
|
||||||
|
0x190819082b080808, 0x190819082b08082b, 0x190819082b081919, 0x190819082b082b08,
|
||||||
|
0x190819082b190819, 0x190819082b191908, 0x190819082b2b0808, 0x1908191908080819,
|
||||||
|
0x1908191908081908, 0x190819190808192b, 0x1908191908082b19, 0x1908191908190808,
|
||||||
|
0x190819190819082b, 0x1908191908191919, 0x1908191908192b08, 0x19081919082b0819,
|
||||||
|
0x19081919082b1908, 0x1908191919080808, 0x190819191908082b, 0x1908191919081919,
|
||||||
|
0x1908191919082b08, 0x1908191919190819, 0x1908191919191908, 0x19081919192b0808,
|
||||||
|
0x19081919192b2b2b, 0x190819192b080819, 0x190819192b081908, 0x190819192b190808,
|
||||||
|
0x1908192b08080808, 0x1908192b0808082b, 0x1908192b08081919, 0x1908192b08082b08,
|
||||||
|
0x1908192b08190819, 0x1908192b08191908, 0x1908192b082b0808, 0x1908192b19080819,
|
||||||
|
0x1908192b19081908, 0x1908192b19190808, 0x1908192b2b080808, 0x1908192b2b2b1919,
|
||||||
|
0x19082b0808080819, 0x19082b0808081908, 0x19082b0808082b19, 0x19082b0808190808,
|
||||||
|
0x19082b080819082b, 0x19082b0808191919, 0x19082b0808192b08, 0x19082b08082b0819,
|
||||||
|
0x19082b08082b1908, 0x19082b0819080808, 0x19082b081908082b, 0x19082b0819081919,
|
||||||
|
0x19082b0819082b08, 0x19082b0819190819, 0x19082b0819191908, 0x19082b08192b0808,
|
||||||
|
0x19082b082b081908, 0x19082b082b190808, 0x19082b1908080808, 0x19082b190808082b,
|
||||||
|
0x19082b1908081919, 0x19082b1908082b08, 0x19082b1908190819, 0x19082b1908191908,
|
||||||
|
0x19082b19082b0808, 0x19082b1919080819, 0x19082b1919081908, 0x19082b1919190808,
|
||||||
|
0x19082b192b080808, 0x19082b192b19192b, 0x19082b2b08080819, 0x19082b2b08081908,
|
||||||
|
0x19082b2b08190808, 0x19082b2b19080808, 0x1919080808080808, 0x191908080808082b,
|
||||||
|
0x1919080808081919, 0x1919080808082b08, 0x1919080808190819, 0x1919080808191908,
|
||||||
|
0x191908080819192b, 0x1919080808192b19, 0x19190808082b0808, 0x19190808082b082b,
|
||||||
|
0x19190808082b1919, 0x19190808082b2b08, 0x1919080819080819, 0x1919080819081908,
|
||||||
|
0x191908081908192b, 0x1919080819082b19, 0x1919080819190808, 0x191908081919082b,
|
||||||
|
0x1919080819191919, 0x1919080819192b08, 0x19190808192b0819, 0x19190808192b1908,
|
||||||
|
0x191908082b080808, 0x191908082b08082b, 0x191908082b081919, 0x191908082b082b08,
|
||||||
|
0x191908082b190819, 0x191908082b191908, 0x1919081908080819, 0x1919081908081908,
|
||||||
|
0x191908190808192b, 0x1919081908082b19, 0x1919081908190808, 0x191908190819082b,
|
||||||
|
0x1919081908191919, 0x1919081908192b08, 0x19190819082b0819, 0x19190819082b1908,
|
||||||
|
0x1919081919080808, 0x191908191908082b, 0x1919081919081919, 0x1919081919082b08,
|
||||||
|
0x1919081919190819, 0x1919081919191908, 0x19190819192b0808, 0x191908192b080819,
|
||||||
|
0x191908192b081908, 0x191908192b190808, 0x1919082b08080808, 0x1919082b08081919,
|
||||||
|
0x1919082b08082b08, 0x1919082b08190819, 0x1919082b08191908, 0x1919082b082b0808,
|
||||||
|
0x1919082b19080819, 0x1919082b19081908, 0x1919082b19190808, 0x1919082b192b2b19,
|
||||||
|
0x1919082b2b080808, 0x1919190808080819, 0x1919190808081908, 0x191919080808192b,
|
||||||
|
0x1919190808082b19, 0x1919190808190808, 0x191919080819082b, 0x1919190808191919,
|
||||||
|
0x1919190808192b08, 0x19191908082b0819, 0x19191908082b1908, 0x1919190819080808,
|
||||||
|
0x191919081908082b, 0x1919190819081919, 0x1919190819082b08, 0x1919190819190819,
|
||||||
|
0x1919190819191908, 0x19191908192b0808, 0x191919082b080819, 0x191919082b081908,
|
||||||
|
0x191919082b190808, 0x1919191908080808, 0x191919190808082b, 0x1919191908081919,
|
||||||
|
0x1919191908082b08, 0x1919191908190819, 0x1919191908191908, 0x19191919082b0808,
|
||||||
|
0x1919191919080819, 0x1919191919081908, 0x1919191919190808, 0x191919192b080808,
|
||||||
|
0x1919192b08080819, 0x1919192b08081908, 0x1919192b08190808, 0x1919192b082b192b,
|
||||||
|
0x1919192b19080808, 0x19192b0808080808, 0x19192b080808082b, 0x19192b0808081919,
|
||||||
|
0x19192b0808082b08, 0x19192b0808190819, 0x19192b0808191908, 0x19192b08082b0808,
|
||||||
|
0x19192b0819080819, 0x19192b0819081908, 0x19192b0819190808, 0x19192b0819192b2b,
|
||||||
|
0x19192b082b080808, 0x19192b1908080819, 0x19192b1908081908, 0x19192b1908190808,
|
||||||
|
0x19192b1919080808, 0x19192b2b08080808, 0x19192b2b08192b19, 0x19192b2b2b081919,
|
||||||
|
0x19192b2b2b2b2b08, 0x192b080808080819, 0x192b080808081908, 0x192b08080808192b,
|
||||||
|
0x192b080808190808, 0x192b08080819082b, 0x192b080808191919, 0x192b080808192b08,
|
||||||
|
0x192b0808082b0819, 0x192b0808082b1908, 0x192b080819080808, 0x192b080819081919,
|
||||||
|
0x192b080819082b08, 0x192b080819190819, 0x192b080819191908, 0x192b0808192b0808,
|
||||||
|
0x192b08082b081908, 0x192b08082b190808, 0x192b081908080808, 0x192b08190808082b,
|
||||||
|
0x192b081908081919, 0x192b081908082b08, 0x192b081908190819, 0x192b081908191908,
|
||||||
|
0x192b0819082b0808, 0x192b081919080819, 0x192b081919081908, 0x192b081919190808,
|
||||||
|
0x192b08192b080808, 0x192b08192b192b19, 0x192b082b08081908, 0x192b082b08190808,
|
||||||
|
0x192b082b19080808, 0x192b082b1919192b, 0x192b082b2b2b0819, 0x192b190808080808,
|
||||||
|
0x192b190808081919, 0x192b190808082b08, 0x192b190808190819, 0x192b190808191908,
|
||||||
|
0x192b1908082b0808, 0x192b190819080819, 0x192b190819081908, 0x192b190819190808,
|
||||||
|
0x192b19082b080808, 0x192b191908080819, 0x192b191908081908, 0x192b191908190808,
|
||||||
|
0x192b191919080808, 0x192b191919082b2b, 0x192b1919192b2b08, 0x192b19192b19082b,
|
||||||
|
0x192b192b08080808, 0x192b192b2b191908, 0x192b2b0808080819, 0x192b2b0808081908,
|
||||||
|
0x192b2b0808190808, 0x192b2b08192b1919, 0x192b2b082b192b08, 0x192b2b1908080808,
|
||||||
|
0x192b2b19082b2b2b, 0x192b2b2b1908082b, 0x192b2b2b2b2b0819, 0x2b08080808080808,
|
||||||
|
0x2b0808080808082b, 0x2b08080808081919, 0x2b08080808082b08, 0x2b08080808190819,
|
||||||
|
0x2b08080808191908, 0x2b08080808192b19, 0x2b080808082b0808, 0x2b080808082b1919,
|
||||||
|
0x2b08080819080819, 0x2b08080819081908, 0x2b08080819190808, 0x2b0808081919082b,
|
||||||
|
0x2b08080819191919, 0x2b08080819192b08, 0x2b080808192b0819, 0x2b0808082b080808,
|
||||||
|
0x2b0808082b081919, 0x2b0808082b190819, 0x2b0808082b191908, 0x2b08081908080819,
|
||||||
|
0x2b08081908081908, 0x2b08081908082b19, 0x2b08081908190808, 0x2b0808190819082b,
|
||||||
|
0x2b08081908191919, 0x2b08081908192b08, 0x2b080819082b0819, 0x2b080819082b1908,
|
||||||
|
0x2b08081919080808, 0x2b0808191908082b, 0x2b08081919081919, 0x2b08081919082b08,
|
||||||
|
0x2b08081919190819, 0x2b08081919191908, 0x2b0808192b080819, 0x2b0808192b081908,
|
||||||
|
0x2b0808192b190808, 0x2b0808192b2b2b19, 0x2b08082b08080808, 0x2b08082b08081919,
|
||||||
|
0x2b08082b08082b2b, 0x2b08082b08190819, 0x2b08082b08191908, 0x2b08082b19080819,
|
||||||
|
0x2b08082b19081908, 0x2b08082b19190808, 0x2b08190808080819, 0x2b08190808081908,
|
||||||
|
0x2b0819080808192b, 0x2b08190808082b19, 0x2b08190808190808, 0x2b0819080819082b,
|
||||||
|
0x2b08190808191919, 0x2b08190808192b08, 0x2b081908082b0819, 0x2b08190819080808,
|
||||||
|
0x2b0819081908082b, 0x2b08190819081919, 0x2b08190819082b08, 0x2b08190819190819,
|
||||||
|
0x2b08190819191908, 0x2b081908192b0808, 0x2b0819082b080819, 0x2b0819082b081908,
|
||||||
|
0x2b0819082b190808, 0x2b08191908080808, 0x2b0819190808082b, 0x2b08191908081919,
|
||||||
|
0x2b08191908082b08, 0x2b08191908190819, 0x2b08191908191908, 0x2b081919082b0808,
|
||||||
|
0x2b08191919080819, 0x2b08191919081908, 0x2b08191919190808, 0x2b0819192b080808,
|
||||||
|
0x2b0819192b082b2b, 0x2b08192b08080819, 0x2b08192b08081908, 0x2b08192b08190808,
|
||||||
|
0x2b08192b082b2b19, 0x2b08192b19080808, 0x2b082b0808080808, 0x2b082b0808081919,
|
||||||
|
0x2b082b0808190819, 0x2b082b0808191908, 0x2b082b0819080819, 0x2b082b0819081908,
|
||||||
|
0x2b082b0819190808, 0x2b082b082b2b082b, 0x2b082b1908080819, 0x2b082b1908081908,
|
||||||
|
0x2b082b1919080808, 0x2b082b19192b1919, 0x2b082b2b082b082b, 0x2b082b2b19192b08,
|
||||||
|
0x2b082b2b19192b2b, 0x2b082b2b2b08082b, 0x2b082b2b2b2b082b, 0x2b19080808080819,
|
||||||
|
0x2b19080808081908, 0x2b19080808082b19, 0x2b19080808190808, 0x2b1908080819082b,
|
||||||
|
0x2b19080808191919, 0x2b19080808192b08, 0x2b190808082b1908, 0x2b19080819080808,
|
||||||
|
0x2b1908081908082b, 0x2b19080819081919, 0x2b19080819082b08, 0x2b19080819190819,
|
||||||
|
0x2b19080819191908, 0x2b190808192b0808, 0x2b1908082b080819, 0x2b1908082b081908,
|
||||||
|
0x2b1908082b190808, 0x2b19081908080808, 0x2b19081908081919, 0x2b19081908190819,
|
||||||
|
0x2b19081908191908, 0x2b19081919080819, 0x2b19081919081908, 0x2b19081919190808,
|
||||||
|
0x2b19081919192b2b, 0x2b19082b08080819, 0x2b19082b08081908, 0x2b19082b08190808,
|
||||||
|
0x2b19082b19080808, 0x2b19082b2b2b192b, 0x2b19190808080808, 0x2b1919080808082b,
|
||||||
|
0x2b19190808081919, 0x2b19190808082b08, 0x2b19190808190819, 0x2b19190808191908,
|
||||||
|
0x2b191908082b0808, 0x2b19190819080819, 0x2b19190819081908, 0x2b19190819190808,
|
||||||
|
0x2b1919082b080808, 0x2b1919082b19192b, 0x2b19191908080819, 0x2b19191908081908,
|
||||||
|
0x2b19191908190808, 0x2b19191919080808, 0x2b1919192b192b08, 0x2b1919192b2b0819,
|
||||||
|
0x2b19192b08080808, 0x2b19192b1908192b, 0x2b19192b192b1908, 0x2b192b0808080819,
|
||||||
|
0x2b192b0808081908, 0x2b192b0808190808, 0x2b192b08082b192b, 0x2b192b0819080808,
|
||||||
|
0x2b192b082b2b2b19, 0x2b192b1908080808, 0x2b192b1919082b19, 0x2b192b191919082b,
|
||||||
|
0x2b192b2b2b190808, 0x2b2b080808080808, 0x2b2b080808081919, 0x2b2b080808082b2b,
|
||||||
|
0x2b2b080808191908, 0x2b2b0808082b082b, 0x2b2b0808082b2b2b, 0x2b2b080819080819,
|
||||||
|
0x2b2b080819081908, 0x2b2b080819190808, 0x2b2b08082b2b082b, 0x2b2b08082b2b2b2b,
|
||||||
|
0x2b2b081919080808, 0x2b2b0819192b1919, 0x2b2b082b0808082b, 0x2b2b082b08082b2b,
|
||||||
|
0x2b2b082b082b082b, 0x2b2b082b082b2b08, 0x2b2b082b082b2b2b, 0x2b2b082b2b08082b,
|
||||||
|
0x2b2b082b2b082b08, 0x2b2b082b2b082b2b, 0x2b2b082b2b2b2b08, 0x2b2b190808080819,
|
||||||
|
0x2b2b190808081908, 0x2b2b190808190808, 0x2b2b190819080808, 0x2b2b19082b082b19,
|
||||||
|
0x2b2b19082b2b1908, 0x2b2b191908080808, 0x2b2b191908192b19, 0x2b2b192b19190819,
|
||||||
|
0x2b2b2b0808082b2b, 0x2b2b2b08082b2b08, 0x2b2b2b082b2b082b, 0x2b2b2b1919191908,
|
||||||
|
0x2b2b2b192b08192b, 0x2b2b2b2b08082b08, 0x2b2b2b2b08082b2b, 0x2b2b2b2b082b0808,
|
||||||
|
0x2b2b2b2b082b082b, 0x2b2b2b2b082b2b08, 0x2b2b2b2b2b082b08, 0x2b2b2b2b2b2b2b2b,
|
||||||
|
GGML_TABLE_END()
|
||||||
|
|
||||||
|
GGML_TABLE_BEGIN(uint32_t, iq3xxs_grid, 256)
|
||||||
|
0x04040404, 0x04040414, 0x04040424, 0x04040c0c, 0x04040c1c, 0x04040c3e, 0x04041404, 0x04041414,
|
||||||
|
0x04041c0c, 0x04042414, 0x04043e1c, 0x04043e2c, 0x040c040c, 0x040c041c, 0x040c0c04, 0x040c0c14,
|
||||||
|
0x040c140c, 0x040c142c, 0x040c1c04, 0x040c1c14, 0x040c240c, 0x040c2c24, 0x040c3e04, 0x04140404,
|
||||||
|
0x04140414, 0x04140424, 0x04140c0c, 0x04141404, 0x04141414, 0x04141c0c, 0x04141c1c, 0x04141c3e,
|
||||||
|
0x04142c0c, 0x04142c3e, 0x04143e2c, 0x041c040c, 0x041c043e, 0x041c0c04, 0x041c0c14, 0x041c142c,
|
||||||
|
0x041c3e04, 0x04240c1c, 0x04241c3e, 0x04242424, 0x04242c3e, 0x04243e1c, 0x04243e2c, 0x042c040c,
|
||||||
|
0x042c043e, 0x042c1c14, 0x042c2c14, 0x04341c2c, 0x04343424, 0x043e0c04, 0x043e0c24, 0x043e0c34,
|
||||||
|
0x043e241c, 0x043e340c, 0x0c04040c, 0x0c04041c, 0x0c040c04, 0x0c040c14, 0x0c04140c, 0x0c04141c,
|
||||||
|
0x0c041c04, 0x0c041c14, 0x0c041c24, 0x0c04243e, 0x0c042c04, 0x0c0c0404, 0x0c0c0414, 0x0c0c0c0c,
|
||||||
|
0x0c0c1404, 0x0c0c1414, 0x0c14040c, 0x0c14041c, 0x0c140c04, 0x0c140c14, 0x0c14140c, 0x0c141c04,
|
||||||
|
0x0c143e14, 0x0c1c0404, 0x0c1c0414, 0x0c1c1404, 0x0c1c1c0c, 0x0c1c2434, 0x0c1c3434, 0x0c24040c,
|
||||||
|
0x0c24042c, 0x0c242c04, 0x0c2c1404, 0x0c2c1424, 0x0c2c2434, 0x0c2c3e0c, 0x0c34042c, 0x0c3e1414,
|
||||||
|
0x0c3e2404, 0x14040404, 0x14040414, 0x14040c0c, 0x14040c1c, 0x14041404, 0x14041414, 0x14041434,
|
||||||
|
0x14041c0c, 0x14042414, 0x140c040c, 0x140c041c, 0x140c042c, 0x140c0c04, 0x140c0c14, 0x140c140c,
|
||||||
|
0x140c1c04, 0x140c341c, 0x140c343e, 0x140c3e04, 0x14140404, 0x14140414, 0x14140c0c, 0x14140c3e,
|
||||||
|
0x14141404, 0x14141414, 0x14141c3e, 0x14142404, 0x14142c2c, 0x141c040c, 0x141c0c04, 0x141c0c24,
|
||||||
|
0x141c3e04, 0x141c3e24, 0x14241c2c, 0x14242c1c, 0x142c041c, 0x142c143e, 0x142c240c, 0x142c3e24,
|
||||||
|
0x143e040c, 0x143e041c, 0x143e0c34, 0x143e242c, 0x1c04040c, 0x1c040c04, 0x1c040c14, 0x1c04140c,
|
||||||
|
0x1c04141c, 0x1c042c04, 0x1c04342c, 0x1c043e14, 0x1c0c0404, 0x1c0c0414, 0x1c0c1404, 0x1c0c1c0c,
|
||||||
|
0x1c0c2424, 0x1c0c2434, 0x1c14040c, 0x1c14041c, 0x1c140c04, 0x1c14142c, 0x1c142c14, 0x1c143e14,
|
||||||
|
0x1c1c0c0c, 0x1c1c1c1c, 0x1c241c04, 0x1c24243e, 0x1c243e14, 0x1c2c0404, 0x1c2c0434, 0x1c2c1414,
|
||||||
|
0x1c2c2c2c, 0x1c340c24, 0x1c341c34, 0x1c34341c, 0x1c3e1c1c, 0x1c3e3404, 0x24040424, 0x24040c3e,
|
||||||
|
0x24041c2c, 0x24041c3e, 0x24042c1c, 0x24042c3e, 0x240c3e24, 0x24141404, 0x24141c3e, 0x24142404,
|
||||||
|
0x24143404, 0x24143434, 0x241c043e, 0x241c242c, 0x24240424, 0x24242c0c, 0x24243424, 0x242c142c,
|
||||||
|
0x242c241c, 0x242c3e04, 0x243e042c, 0x243e0c04, 0x243e0c14, 0x243e1c04, 0x2c040c14, 0x2c04240c,
|
||||||
|
0x2c043e04, 0x2c0c0404, 0x2c0c0434, 0x2c0c1434, 0x2c0c2c2c, 0x2c140c24, 0x2c141c14, 0x2c143e14,
|
||||||
|
0x2c1c0414, 0x2c1c2c1c, 0x2c240c04, 0x2c24141c, 0x2c24143e, 0x2c243e14, 0x2c2c0414, 0x2c2c1c0c,
|
||||||
|
0x2c342c04, 0x2c3e1424, 0x2c3e2414, 0x34041424, 0x34042424, 0x34042434, 0x34043424, 0x340c140c,
|
||||||
|
0x340c340c, 0x34140c3e, 0x34143424, 0x341c1c04, 0x341c1c34, 0x34242424, 0x342c042c, 0x342c2c14,
|
||||||
|
0x34341c1c, 0x343e041c, 0x343e140c, 0x3e04041c, 0x3e04042c, 0x3e04043e, 0x3e040c04, 0x3e041c14,
|
||||||
|
0x3e042c14, 0x3e0c1434, 0x3e0c2404, 0x3e140c14, 0x3e14242c, 0x3e142c14, 0x3e1c0404, 0x3e1c0c2c,
|
||||||
|
0x3e1c1c1c, 0x3e1c3404, 0x3e24140c, 0x3e24240c, 0x3e2c0404, 0x3e2c0414, 0x3e2c1424, 0x3e341c04,
|
||||||
|
GGML_TABLE_END()
|
||||||
|
|
||||||
|
GGML_TABLE_BEGIN(uint32_t, iq3s_grid, 512)
|
||||||
|
0x01010101, 0x01010103, 0x01010105, 0x0101010b, 0x0101010f, 0x01010301, 0x01010303, 0x01010305,
|
||||||
|
0x01010309, 0x0101030d, 0x01010501, 0x01010503, 0x0101050b, 0x01010707, 0x01010901, 0x01010905,
|
||||||
|
0x0101090b, 0x0101090f, 0x01010b03, 0x01010b07, 0x01010d01, 0x01010d05, 0x01010f03, 0x01010f09,
|
||||||
|
0x01010f0f, 0x01030101, 0x01030103, 0x01030105, 0x01030109, 0x01030301, 0x01030303, 0x0103030b,
|
||||||
|
0x01030501, 0x01030507, 0x0103050f, 0x01030703, 0x0103070b, 0x01030909, 0x01030d03, 0x01030d0b,
|
||||||
|
0x01030f05, 0x01050101, 0x01050103, 0x0105010b, 0x0105010f, 0x01050301, 0x01050307, 0x0105030d,
|
||||||
|
0x01050503, 0x0105050b, 0x01050701, 0x01050709, 0x01050905, 0x0105090b, 0x0105090f, 0x01050b03,
|
||||||
|
0x01050b07, 0x01050f01, 0x01050f07, 0x01070107, 0x01070303, 0x0107030b, 0x01070501, 0x01070505,
|
||||||
|
0x01070703, 0x01070707, 0x0107070d, 0x01070909, 0x01070b01, 0x01070b05, 0x01070d0f, 0x01070f03,
|
||||||
|
0x01070f0b, 0x01090101, 0x01090307, 0x0109030f, 0x01090503, 0x01090509, 0x01090705, 0x01090901,
|
||||||
|
0x01090907, 0x01090b03, 0x01090f01, 0x010b0105, 0x010b0109, 0x010b0501, 0x010b0505, 0x010b050d,
|
||||||
|
0x010b0707, 0x010b0903, 0x010b090b, 0x010b090f, 0x010b0d0d, 0x010b0f07, 0x010d010d, 0x010d0303,
|
||||||
|
0x010d0307, 0x010d0703, 0x010d0b05, 0x010d0f03, 0x010f0101, 0x010f0105, 0x010f0109, 0x010f0501,
|
||||||
|
0x010f0505, 0x010f050d, 0x010f0707, 0x010f0b01, 0x010f0b09, 0x03010101, 0x03010103, 0x03010105,
|
||||||
|
0x03010109, 0x03010301, 0x03010303, 0x03010307, 0x0301030b, 0x0301030f, 0x03010501, 0x03010505,
|
||||||
|
0x03010703, 0x03010709, 0x0301070d, 0x03010b09, 0x03010b0d, 0x03010d03, 0x03010f05, 0x03030101,
|
||||||
|
0x03030103, 0x03030107, 0x0303010d, 0x03030301, 0x03030309, 0x03030503, 0x03030701, 0x03030707,
|
||||||
|
0x03030903, 0x03030b01, 0x03030b05, 0x03030f01, 0x03030f0d, 0x03050101, 0x03050305, 0x0305030b,
|
||||||
|
0x0305030f, 0x03050501, 0x03050509, 0x03050705, 0x03050901, 0x03050907, 0x03050b0b, 0x03050d01,
|
||||||
|
0x03050f05, 0x03070103, 0x03070109, 0x0307010f, 0x03070301, 0x03070307, 0x03070503, 0x0307050f,
|
||||||
|
0x03070701, 0x03070709, 0x03070903, 0x03070d05, 0x03070f01, 0x03090107, 0x0309010b, 0x03090305,
|
||||||
|
0x03090309, 0x03090703, 0x03090707, 0x03090905, 0x0309090d, 0x03090b01, 0x03090b09, 0x030b0103,
|
||||||
|
0x030b0301, 0x030b0307, 0x030b0503, 0x030b0701, 0x030b0705, 0x030b0b03, 0x030d0501, 0x030d0509,
|
||||||
|
0x030d050f, 0x030d0909, 0x030d090d, 0x030f0103, 0x030f0107, 0x030f0301, 0x030f0305, 0x030f0503,
|
||||||
|
0x030f070b, 0x030f0903, 0x030f0d05, 0x030f0f01, 0x05010101, 0x05010103, 0x05010107, 0x0501010b,
|
||||||
|
0x0501010f, 0x05010301, 0x05010305, 0x05010309, 0x0501030d, 0x05010503, 0x05010507, 0x0501050f,
|
||||||
|
0x05010701, 0x05010705, 0x05010903, 0x05010907, 0x0501090b, 0x05010b01, 0x05010b05, 0x05010d0f,
|
||||||
|
0x05010f01, 0x05010f07, 0x05010f0b, 0x05030101, 0x05030105, 0x05030301, 0x05030307, 0x0503030f,
|
||||||
|
0x05030505, 0x0503050b, 0x05030703, 0x05030709, 0x05030905, 0x05030b03, 0x05050103, 0x05050109,
|
||||||
|
0x0505010f, 0x05050503, 0x05050507, 0x05050701, 0x0505070f, 0x05050903, 0x05050b07, 0x05050b0f,
|
||||||
|
0x05050f03, 0x05050f09, 0x05070101, 0x05070105, 0x0507010b, 0x05070303, 0x05070505, 0x05070509,
|
||||||
|
0x05070703, 0x05070707, 0x05070905, 0x05070b01, 0x05070d0d, 0x05090103, 0x0509010f, 0x05090501,
|
||||||
|
0x05090507, 0x05090705, 0x0509070b, 0x05090903, 0x05090f05, 0x05090f0b, 0x050b0109, 0x050b0303,
|
||||||
|
0x050b0505, 0x050b070f, 0x050b0901, 0x050b0b07, 0x050b0f01, 0x050d0101, 0x050d0105, 0x050d010f,
|
||||||
|
0x050d0503, 0x050d0b0b, 0x050d0d03, 0x050f010b, 0x050f0303, 0x050f050d, 0x050f0701, 0x050f0907,
|
||||||
|
0x050f0b01, 0x07010105, 0x07010303, 0x07010307, 0x0701030b, 0x0701030f, 0x07010505, 0x07010703,
|
||||||
|
0x07010707, 0x0701070b, 0x07010905, 0x07010909, 0x0701090f, 0x07010b03, 0x07010d07, 0x07010f03,
|
||||||
|
0x07030103, 0x07030107, 0x0703010b, 0x07030309, 0x07030503, 0x07030507, 0x07030901, 0x07030d01,
|
||||||
|
0x07030f05, 0x07030f0d, 0x07050101, 0x07050305, 0x07050501, 0x07050705, 0x07050709, 0x07050b01,
|
||||||
|
0x07070103, 0x07070301, 0x07070309, 0x07070503, 0x07070507, 0x0707050f, 0x07070701, 0x07070903,
|
||||||
|
0x07070907, 0x0707090f, 0x07070b0b, 0x07070f07, 0x07090107, 0x07090303, 0x0709030d, 0x07090505,
|
||||||
|
0x07090703, 0x07090b05, 0x07090d01, 0x07090d09, 0x070b0103, 0x070b0301, 0x070b0305, 0x070b050b,
|
||||||
|
0x070b0705, 0x070b0909, 0x070b0b0d, 0x070b0f07, 0x070d030d, 0x070d0903, 0x070f0103, 0x070f0107,
|
||||||
|
0x070f0501, 0x070f0505, 0x070f070b, 0x09010101, 0x09010109, 0x09010305, 0x09010501, 0x09010509,
|
||||||
|
0x0901050f, 0x09010705, 0x09010903, 0x09010b01, 0x09010f01, 0x09030105, 0x0903010f, 0x09030303,
|
||||||
|
0x09030307, 0x09030505, 0x09030701, 0x0903070b, 0x09030907, 0x09030b03, 0x09030b0b, 0x09050103,
|
||||||
|
0x09050107, 0x09050301, 0x0905030b, 0x09050503, 0x09050707, 0x09050901, 0x09050b0f, 0x09050d05,
|
||||||
|
0x09050f01, 0x09070109, 0x09070303, 0x09070307, 0x09070501, 0x09070505, 0x09070703, 0x0907070b,
|
||||||
|
0x09090101, 0x09090105, 0x09090509, 0x0909070f, 0x09090901, 0x09090f03, 0x090b010b, 0x090b010f,
|
||||||
|
0x090b0503, 0x090b0d05, 0x090d0307, 0x090d0709, 0x090d0d01, 0x090f0301, 0x090f030b, 0x090f0701,
|
||||||
|
0x090f0907, 0x090f0b03, 0x0b010105, 0x0b010301, 0x0b010309, 0x0b010505, 0x0b010901, 0x0b010909,
|
||||||
|
0x0b01090f, 0x0b010b05, 0x0b010d0d, 0x0b010f09, 0x0b030103, 0x0b030107, 0x0b03010b, 0x0b030305,
|
||||||
|
0x0b030503, 0x0b030705, 0x0b030f05, 0x0b050101, 0x0b050303, 0x0b050507, 0x0b050701, 0x0b05070d,
|
||||||
|
0x0b050b07, 0x0b070105, 0x0b07010f, 0x0b070301, 0x0b07050f, 0x0b070909, 0x0b070b03, 0x0b070d0b,
|
||||||
|
0x0b070f07, 0x0b090103, 0x0b090109, 0x0b090501, 0x0b090705, 0x0b09090d, 0x0b0b0305, 0x0b0b050d,
|
||||||
|
0x0b0b0b03, 0x0b0b0b07, 0x0b0d0905, 0x0b0f0105, 0x0b0f0109, 0x0b0f0505, 0x0d010303, 0x0d010307,
|
||||||
|
0x0d01030b, 0x0d010703, 0x0d010707, 0x0d010d01, 0x0d030101, 0x0d030501, 0x0d03050f, 0x0d030d09,
|
||||||
|
0x0d050305, 0x0d050709, 0x0d050905, 0x0d050b0b, 0x0d050d05, 0x0d050f01, 0x0d070101, 0x0d070309,
|
||||||
|
0x0d070503, 0x0d070901, 0x0d09050b, 0x0d090907, 0x0d090d05, 0x0d0b0101, 0x0d0b0107, 0x0d0b0709,
|
||||||
|
0x0d0b0d01, 0x0d0d010b, 0x0d0d0901, 0x0d0f0303, 0x0d0f0307, 0x0f010101, 0x0f010109, 0x0f01010f,
|
||||||
|
0x0f010501, 0x0f010505, 0x0f01070d, 0x0f010901, 0x0f010b09, 0x0f010d05, 0x0f030105, 0x0f030303,
|
||||||
|
0x0f030509, 0x0f030907, 0x0f03090b, 0x0f050103, 0x0f050109, 0x0f050301, 0x0f05030d, 0x0f050503,
|
||||||
|
0x0f050701, 0x0f050b03, 0x0f070105, 0x0f070705, 0x0f07070b, 0x0f070b07, 0x0f090103, 0x0f09010b,
|
||||||
|
0x0f090307, 0x0f090501, 0x0f090b01, 0x0f0b0505, 0x0f0b0905, 0x0f0d0105, 0x0f0d0703, 0x0f0f0101,
|
||||||
|
GGML_TABLE_END()
|
||||||
|
|
||||||
|
#define NGRID_IQ2XXS 512
|
||||||
|
GGML_TABLE_BEGIN(uint64_t, iq1s_grid, NGRID_IQ2XXS)
|
||||||
|
0xffffffffffff0101, 0xffffffffff01ff00, 0xffffffffff010100, 0xffffffff00000000,
|
||||||
|
0xffffffff01ff00ff, 0xffffffff01ff0001, 0xffffffff0101ffff, 0xffffffff0101ff01,
|
||||||
|
0xffffff00ff000000, 0xffffff000000ff00, 0xffffff00000000ff, 0xffffff0000000100,
|
||||||
|
0xffffff0000010000, 0xffffff0001000000, 0xffffff01ffff00ff, 0xffffff01ff01ff00,
|
||||||
|
0xffffff01ff010100, 0xffffff0100000001, 0xffffff0101ffff00, 0xffffff0101ff0101,
|
||||||
|
0xffffff0101010100, 0xffff00ffff00ff01, 0xffff00ffff0000ff, 0xffff00ff00ff0100,
|
||||||
|
0xffff00ff0100ff00, 0xffff00ff010001ff, 0xffff0000ff0101ff, 0xffff000000ffff00,
|
||||||
|
0xffff000000000000, 0xffff00000001ff01, 0xffff000001000101, 0xffff0000010100ff,
|
||||||
|
0xffff0001ffff0100, 0xffff00010000ff00, 0xffff000100010101, 0xffff000101000000,
|
||||||
|
0xffff01ffffff0000, 0xffff01ffff01ffff, 0xffff01ffff010100, 0xffff01ff00000000,
|
||||||
|
0xffff01ff01ffffff, 0xffff01ff01ff0001, 0xffff01ff0101ffff, 0xffff01ff01010001,
|
||||||
|
0xffff0100ffffff01, 0xffff01000000ffff, 0xffff010000000100, 0xffff010001ff01ff,
|
||||||
|
0xffff010001000000, 0xffff0101ff000000, 0xffff0101000101ff, 0xffff010101ffff01,
|
||||||
|
0xffff01010101ff00, 0xff00ffffff000000, 0xff00ffff00ffff00, 0xff00ffff00000001,
|
||||||
|
0xff00ffff000001ff, 0xff00ffff01010000, 0xff00ff00ffff0000, 0xff00ff00ff00ff00,
|
||||||
|
0xff00ff00ff0000ff, 0xff00ff00ff000100, 0xff00ff00ff010001, 0xff00ff0000ff0001,
|
||||||
|
0xff00ff000000ffff, 0xff00ff0000000000, 0xff00ff000001ff00, 0xff00ff0000010100,
|
||||||
|
0xff00ff0001ff0000, 0xff00ff000100ff00, 0xff00ff0001000100, 0xff00ff01ff000000,
|
||||||
|
0xff00ff0100ff0000, 0xff00ff01000001ff, 0xff00ff0101010001, 0xff0000ff00000000,
|
||||||
|
0xff0000ff0001ff00, 0xff0000ff00010100, 0xff000000ffff0101, 0xff000000ff000000,
|
||||||
|
0xff000000ff01ff00, 0xff00000000ff0000, 0xff0000000000ff00, 0xff000000000000ff,
|
||||||
|
0xff00000000000000, 0xff00000000000001, 0xff00000000000100, 0xff0000000001ffff,
|
||||||
|
0xff00000000010000, 0xff00000001000000, 0xff00000001010100, 0xff000001ff00ff01,
|
||||||
|
0xff000001ff0100ff, 0xff00000100000000, 0xff0000010001ff00, 0xff00000101ff0100,
|
||||||
|
0xff0000010100ff00, 0xff0001ff00ff00ff, 0xff0001ff00000101, 0xff0001ff000100ff,
|
||||||
|
0xff0001ff01000000, 0xff000100ff0001ff, 0xff0001000000ff01, 0xff00010000000000,
|
||||||
|
0xff00010000010001, 0xff00010000010100, 0xff00010001ffff00, 0xff00010001ff0101,
|
||||||
|
0xff00010001010000, 0xff000101ffffffff, 0xff000101ff000101, 0xff00010101ff00ff,
|
||||||
|
0xff00010101000001, 0xff000101010100ff, 0xff01ffffff000101, 0xff01ffffff01ffff,
|
||||||
|
0xff01ffffff01ff01, 0xff01ffffff0101ff, 0xff01ffff00000000, 0xff01ffff01ff0001,
|
||||||
|
0xff01ffff0101ff01, 0xff01ff00ff000000, 0xff01ff0000ff0100, 0xff01ff000000ff01,
|
||||||
|
0xff01ff0000010000, 0xff01ff00010000ff, 0xff01ff01ff01ff00, 0xff01ff0100000101,
|
||||||
|
0xff0100ffffff0000, 0xff0100ffff010000, 0xff0100ff01ff00ff, 0xff0100ff01000100,
|
||||||
|
0xff0100ff010100ff, 0xff010000ffffff01, 0xff01000000000000, 0xff0100000101ff00,
|
||||||
|
0xff010001ffff00ff, 0xff010001ff000100, 0xff01000100ffff00, 0xff01000100010001,
|
||||||
|
0xff01000101ff0001, 0xff010001010001ff, 0xff0101ffffffffff, 0xff0101ffff01ffff,
|
||||||
|
0xff0101ffff010101, 0xff0101ff0000ff00, 0xff0101ff01010001, 0xff010100ff000000,
|
||||||
|
0xff010100ff01ff01, 0xff01010000ff0001, 0xff01010000000100, 0xff01010001000000,
|
||||||
|
0xff0101010100ffff, 0x00ffffff0000ff01, 0x00ffffff000000ff, 0x00ffffff00000100,
|
||||||
|
0x00ffffff00010000, 0x00ffff00ffff0001, 0x00ffff00ff0000ff, 0x00ffff00ff000100,
|
||||||
|
0x00ffff0000000000, 0x00ffff0001000100, 0x00ffff0001010001, 0x00ffff01ff00ff01,
|
||||||
|
0x00ffff0100ff0100, 0x00ffff010000ff00, 0x00ffff01000100ff, 0x00ffff0101ff00ff,
|
||||||
|
0x00ffff010101ff00, 0x00ff00ffffffffff, 0x00ff00ffffff01ff, 0x00ff00ffff000101,
|
||||||
|
0x00ff00ff00000000, 0x00ff00ff000101ff, 0x00ff00ff01010101, 0x00ff0000ff000000,
|
||||||
|
0x00ff0000ff01ffff, 0x00ff000000ff0000, 0x00ff00000000ff00, 0x00ff0000000000ff,
|
||||||
|
0x00ff000000000000, 0x00ff000000000001, 0x00ff000000000100, 0x00ff000000010000,
|
||||||
|
0x00ff000001ffff01, 0x00ff000001000000, 0x00ff0001ff000101, 0x00ff000100ffffff,
|
||||||
|
0x00ff000100000000, 0x00ff0001010001ff, 0x00ff01ffff000000, 0x00ff01ff0001ff00,
|
||||||
|
0x00ff01ff01ff0100, 0x00ff0100ff01ff01, 0x00ff010000ff00ff, 0x00ff010000ff0101,
|
||||||
|
0x00ff010000000000, 0x00ff010000010101, 0x00ff01000100ff00, 0x00ff010001010000,
|
||||||
|
0x00ff0101ffffff00, 0x00ff01010000ff01, 0x00ff010100000100, 0x00ff010101ff0000,
|
||||||
|
0x0000ffffffff0100, 0x0000ffffff00ff00, 0x0000ffffff0000ff, 0x0000ffffff010000,
|
||||||
|
0x0000ffff00000000, 0x0000ffff00010101, 0x0000ffff01ffff01, 0x0000ffff01000100,
|
||||||
|
0x0000ff00ff000000, 0x0000ff00ff01ff00, 0x0000ff00ff0101ff, 0x0000ff0000ff0000,
|
||||||
|
0x0000ff000000ff00, 0x0000ff00000000ff, 0x0000ff0000000000, 0x0000ff0000000001,
|
||||||
|
0x0000ff0000000100, 0x0000ff0000010000, 0x0000ff0001ffffff, 0x0000ff0001ff01ff,
|
||||||
|
0x0000ff0001000000, 0x0000ff000101ffff, 0x0000ff01ffff0101, 0x0000ff01ff010000,
|
||||||
|
0x0000ff0100000000, 0x0000ff0101000101, 0x000000ffffff0001, 0x000000ffff000000,
|
||||||
|
0x000000ff00ff0000, 0x000000ff0000ff00, 0x000000ff000000ff, 0x000000ff00000000,
|
||||||
|
0x000000ff00000001, 0x000000ff00000100, 0x000000ff00010000, 0x000000ff01000000,
|
||||||
|
0x000000ff0101ff00, 0x00000000ffff0000, 0x00000000ff00ff00, 0x00000000ff0000ff,
|
||||||
|
0x00000000ff000000, 0x00000000ff000001, 0x00000000ff000100, 0x00000000ff010000,
|
||||||
|
0x0000000000ffff00, 0x0000000000ff00ff, 0x0000000000ff0000, 0x0000000000ff0001,
|
||||||
|
0x0000000000ff0100, 0x000000000000ffff, 0x000000000000ff00, 0x000000000000ff01,
|
||||||
|
0x00000000000000ff, 0x0000000000000001, 0x00000000000001ff, 0x0000000000000100,
|
||||||
|
0x0000000000000101, 0x000000000001ff00, 0x00000000000100ff, 0x0000000000010000,
|
||||||
|
0x0000000000010001, 0x0000000000010100, 0x0000000001ff0000, 0x000000000100ff00,
|
||||||
|
0x00000000010000ff, 0x0000000001000000, 0x0000000001000001, 0x0000000001000100,
|
||||||
|
0x0000000001010000, 0x00000001ffff01ff, 0x00000001ff000000, 0x0000000100ff0000,
|
||||||
|
0x000000010000ff00, 0x00000001000000ff, 0x0000000100000000, 0x0000000100000001,
|
||||||
|
0x0000000100000100, 0x0000000100010000, 0x0000000101000000, 0x000001ffff00ff00,
|
||||||
|
0x000001ffff010001, 0x000001ffff0101ff, 0x000001ff00ffff01, 0x000001ff0000ffff,
|
||||||
|
0x000001ff00000000, 0x000001ff010000ff, 0x000001ff01010100, 0x00000100ffff0100,
|
||||||
|
0x00000100ff000000, 0x0000010000ff0000, 0x000001000000ff00, 0x00000100000000ff,
|
||||||
|
0x0000010000000000, 0x0000010000000001, 0x0000010000000100, 0x0000010000010000,
|
||||||
|
0x0000010001000000, 0x000001000101ff01, 0x00000101ffff0001, 0x00000101ff01ffff,
|
||||||
|
0x0000010100000000, 0x0000010101010100, 0x0001ffffff000000, 0x0001ffff00ffffff,
|
||||||
|
0x0001ffff00000100, 0x0001ffff0001ff00, 0x0001ffff01000000, 0x0001ff00ffffff00,
|
||||||
|
0x0001ff00ffff01ff, 0x0001ff00ff010000, 0x0001ff0000000000, 0x0001ff0000010001,
|
||||||
|
0x0001ff0001ff0000, 0x0001ff0001010100, 0x0001ff01ff0000ff, 0x0001ff01ff000001,
|
||||||
|
0x0001ff0100ffffff, 0x0001ff010001ffff, 0x0001ff01000101ff, 0x0001ff010100ff01,
|
||||||
|
0x000100ffff00ffff, 0x000100ffff00ff01, 0x000100ffff000100, 0x000100ff00000000,
|
||||||
|
0x000100ff000101ff, 0x000100ff01ff0101, 0x000100ff0100ffff, 0x000100ff01010101,
|
||||||
|
0x00010000ff000000, 0x00010000ff010100, 0x0001000000ff0000, 0x000100000000ff00,
|
||||||
|
0x00010000000000ff, 0x0001000000000000, 0x0001000000000001, 0x0001000000000100,
|
||||||
|
0x0001000000010000, 0x0001000001ffff01, 0x0001000001000000, 0x0001000100ff0101,
|
||||||
|
0x0001000100000000, 0x00010001010100ff, 0x000101ffffff01ff, 0x000101ffffff0101,
|
||||||
|
0x000101ff00010000, 0x000101ff01ff0000, 0x000101ff0100ff01, 0x00010100ffff0000,
|
||||||
|
0x0001010000000000, 0x000101000001ffff, 0x0001010000010101, 0x00010100010001ff,
|
||||||
|
0x00010101ff00ff00, 0x00010101ff010001, 0x0001010100ffffff, 0x0001010100ff01ff,
|
||||||
|
0x00010101000101ff, 0x0001010101ff0000, 0x000101010100ff01, 0x0001010101000101,
|
||||||
|
0x01ffffffffff0101, 0x01ffffffff01ffff, 0x01ffffffff01ff01, 0x01ffffffff0101ff,
|
||||||
|
0x01ffffffff010101, 0x01ffffff00000000, 0x01ffffff01ff01ff, 0x01ffffff01000101,
|
||||||
|
0x01ffffff0101ff01, 0x01ffffff010100ff, 0x01ffff000000ff00, 0x01ffff0000000001,
|
||||||
|
0x01ffff00000001ff, 0x01ffff0000010000, 0x01ffff0001ff0000, 0x01ffff01ffffffff,
|
||||||
|
0x01ffff01ffff01ff, 0x01ffff01ff000000, 0x01ffff01ff01ffff, 0x01ffff01ff0101ff,
|
||||||
|
0x01ffff010100ffff, 0x01ff00ffffff0000, 0x01ff00ffff010000, 0x01ff00ff00ffff01,
|
||||||
|
0x01ff0000ff0000ff, 0x01ff000000000000, 0x01ff00000001ff01, 0x01ff000001ffffff,
|
||||||
|
0x01ff000001010100, 0x01ff0001ffffff01, 0x01ff0001ff010001, 0x01ff000101ff0100,
|
||||||
|
0x01ff000101000001, 0x01ff0001010100ff, 0x01ff01ffff00ffff, 0x01ff01ff00010001,
|
||||||
|
0x01ff01ff01000000, 0x01ff01ff010101ff, 0x01ff0100ff000001, 0x01ff010000ffff00,
|
||||||
|
0x01ff010000000100, 0x01ff010001ff01ff, 0x01ff01000101ffff, 0x01ff0101ffff00ff,
|
||||||
|
0x01ff0101ffff0101, 0x01ff0101ff0101ff, 0x01ff010100010000, 0x0100ffff00ff00ff,
|
||||||
|
0x0100ffff00ff0001, 0x0100ffff00000100, 0x0100ffff0100ff00, 0x0100ff00ffff0000,
|
||||||
|
0x0100ff00ff00ffff, 0x0100ff00ff00ff01, 0x0100ff00ff000100, 0x0100ff00ff010000,
|
||||||
|
0x0100ff0000000000, 0x0100ff00000100ff, 0x0100ff0001ff0101, 0x0100ff0001010101,
|
||||||
|
0x0100ff0100ff00ff, 0x0100ff0100ff0001, 0x0100ff0100000100, 0x0100ff0100010001,
|
||||||
|
0x0100ff0101000000, 0x010000ffff00ff00, 0x010000ff0000ffff, 0x010000ff00000000,
|
||||||
|
0x010000ff010001ff, 0x010000ff01010001, 0x01000000ffffff00, 0x01000000ffff0101,
|
||||||
|
0x01000000ff000000, 0x01000000ff0100ff, 0x01000000ff010101, 0x0100000000ff0000,
|
||||||
|
0x010000000000ff00, 0x01000000000000ff, 0x0100000000000000, 0x0100000000000001,
|
||||||
|
0x0100000000000100, 0x0100000000010000, 0x0100000001000000, 0x0100000100000000,
|
||||||
|
0x01000001000101ff, 0x0100000101ffff01, 0x010001ffff000101, 0x010001ff00ff0100,
|
||||||
|
0x010001ff0000ff00, 0x010001ff000100ff, 0x010001ff01ffffff, 0x01000100ffff0000,
|
||||||
|
0x01000100ff0001ff, 0x0100010000000000, 0x010001000001ff00, 0x0100010001ff0000,
|
||||||
|
0x01000100010000ff, 0x0100010001000101, 0x01000101ff00ff01, 0x0100010100ff0100,
|
||||||
|
0x010001010000ffff, 0x0100010101010001, 0x0101ffffffff0101, 0x0101ffffff0001ff,
|
||||||
|
0x0101ffffff01ffff, 0x0101ffffff010101, 0x0101ffff00000000, 0x0101ffff0101ffff,
|
||||||
|
0x0101ffff010101ff, 0x0101ff00ff000000, 0x0101ff0000ff0100, 0x0101ff000000ff00,
|
||||||
|
0x0101ff0000010000, 0x0101ff00010000ff, 0x0101ff0001000001, 0x0101ff01ff010101,
|
||||||
|
0x0101ff0100000000, 0x0101ff010101ff00, 0x010100ffffff0000, 0x010100ffff010000,
|
||||||
|
0x010100ff00ff01ff, 0x010100ff000000ff, 0x010100ff00000101, 0x010100ff01ffff00,
|
||||||
|
0x01010000ffffff01, 0x01010000ff000100, 0x01010000ff01ff01, 0x0101000000000000,
|
||||||
|
0x01010000000100ff, 0x010100000101ff01, 0x01010001ffff0000, 0x01010001ff00ffff,
|
||||||
|
0x01010001ff010000, 0x0101000101ffffff, 0x0101000101ff01ff, 0x0101000101010101,
|
||||||
|
0x010101ffff01ffff, 0x010101ff00000000, 0x010101ff0001ff01, 0x010101ff0101ffff,
|
||||||
|
0x010101ff010101ff, 0x01010100ffffffff, 0x01010100ff000001, 0x010101000000ff00,
|
||||||
|
0x0101010001010000, 0x0101010100ff0001, 0x010101010001ff01, 0x010101010101ffff,
|
||||||
|
GGML_TABLE_END()
|
||||||
|
|
||||||
|
#endif // GGML_COMMON_IMPL
|
743
ggml-cuda.cu
743
ggml-cuda.cu
|
@ -2,6 +2,9 @@
|
||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
#include "ggml-backend-impl.h"
|
#include "ggml-backend-impl.h"
|
||||||
|
|
||||||
|
#define GGML_COMMON_IMPL_CUDA
|
||||||
|
#include "ggml-common.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
|
@ -1569,746 +1572,6 @@ static __global__ void dequantize_block_q6_K(const void * __restrict__ vx, dst_t
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static const __device__ uint64_t iq2xxs_grid[256] = {
|
|
||||||
0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
|
|
||||||
0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x08080808082b0808,
|
|
||||||
0x08080808082b082b, 0x08080808082b2b08, 0x08080808082b2b2b, 0x0808080819080819,
|
|
||||||
0x0808080819081908, 0x0808080819190808, 0x0808080819192b08, 0x08080808192b0819,
|
|
||||||
0x08080808192b1908, 0x080808082b080808, 0x080808082b08082b, 0x080808082b082b2b,
|
|
||||||
0x080808082b2b082b, 0x0808081908080819, 0x0808081908081908, 0x0808081908190808,
|
|
||||||
0x0808081908191919, 0x0808081919080808, 0x080808192b081908, 0x080808192b192b08,
|
|
||||||
0x0808082b08080808, 0x0808082b0808082b, 0x0808082b082b082b, 0x0808082b2b08082b,
|
|
||||||
0x0808190808080819, 0x0808190808081908, 0x0808190808190808, 0x08081908082b0819,
|
|
||||||
0x08081908082b1908, 0x0808190819080808, 0x080819081908082b, 0x0808190819082b08,
|
|
||||||
0x08081908192b0808, 0x080819082b080819, 0x080819082b081908, 0x080819082b190808,
|
|
||||||
0x080819082b2b1908, 0x0808191908080808, 0x080819190808082b, 0x0808191908082b08,
|
|
||||||
0x08081919082b0808, 0x080819191908192b, 0x08081919192b2b19, 0x080819192b080808,
|
|
||||||
0x080819192b190819, 0x0808192b08082b19, 0x0808192b08190808, 0x0808192b19080808,
|
|
||||||
0x0808192b2b081908, 0x0808192b2b2b1908, 0x08082b0808080808, 0x08082b0808081919,
|
|
||||||
0x08082b0808082b08, 0x08082b0808191908, 0x08082b08082b2b08, 0x08082b0819080819,
|
|
||||||
0x08082b0819081908, 0x08082b0819190808, 0x08082b081919082b, 0x08082b082b082b08,
|
|
||||||
0x08082b1908081908, 0x08082b1919080808, 0x08082b2b0808082b, 0x08082b2b08191908,
|
|
||||||
0x0819080808080819, 0x0819080808081908, 0x0819080808190808, 0x08190808082b0819,
|
|
||||||
0x0819080819080808, 0x08190808192b0808, 0x081908082b081908, 0x081908082b190808,
|
|
||||||
0x081908082b191919, 0x0819081908080808, 0x0819081908082b08, 0x08190819082b0808,
|
|
||||||
0x0819081919190808, 0x0819081919192b2b, 0x081908192b080808, 0x0819082b082b1908,
|
|
||||||
0x0819082b19081919, 0x0819190808080808, 0x0819190808082b08, 0x08191908082b0808,
|
|
||||||
0x08191908082b1919, 0x0819190819082b19, 0x081919082b080808, 0x0819191908192b08,
|
|
||||||
0x08191919192b082b, 0x0819192b08080808, 0x0819192b0819192b, 0x08192b0808080819,
|
|
||||||
0x08192b0808081908, 0x08192b0808190808, 0x08192b0819080808, 0x08192b082b080819,
|
|
||||||
0x08192b1908080808, 0x08192b1908081919, 0x08192b192b2b0808, 0x08192b2b19190819,
|
|
||||||
0x082b080808080808, 0x082b08080808082b, 0x082b080808082b2b, 0x082b080819081908,
|
|
||||||
0x082b0808192b0819, 0x082b08082b080808, 0x082b08082b08082b, 0x082b0819082b2b19,
|
|
||||||
0x082b081919082b08, 0x082b082b08080808, 0x082b082b0808082b, 0x082b190808080819,
|
|
||||||
0x082b190808081908, 0x082b190808190808, 0x082b190819080808, 0x082b19081919192b,
|
|
||||||
0x082b191908080808, 0x082b191919080819, 0x082b1919192b1908, 0x082b192b2b190808,
|
|
||||||
0x082b2b0808082b08, 0x082b2b08082b0808, 0x082b2b082b191908, 0x082b2b2b19081908,
|
|
||||||
0x1908080808080819, 0x1908080808081908, 0x1908080808190808, 0x1908080808192b08,
|
|
||||||
0x19080808082b0819, 0x19080808082b1908, 0x1908080819080808, 0x1908080819082b08,
|
|
||||||
0x190808081919192b, 0x19080808192b0808, 0x190808082b080819, 0x190808082b081908,
|
|
||||||
0x190808082b190808, 0x1908081908080808, 0x19080819082b0808, 0x19080819192b0819,
|
|
||||||
0x190808192b080808, 0x190808192b081919, 0x1908082b08080819, 0x1908082b08190808,
|
|
||||||
0x1908082b19082b08, 0x1908082b1919192b, 0x1908082b192b2b08, 0x1908190808080808,
|
|
||||||
0x1908190808082b08, 0x19081908082b0808, 0x190819082b080808, 0x190819082b192b19,
|
|
||||||
0x190819190819082b, 0x19081919082b1908, 0x1908192b08080808, 0x19082b0808080819,
|
|
||||||
0x19082b0808081908, 0x19082b0808190808, 0x19082b0819080808, 0x19082b0819081919,
|
|
||||||
0x19082b1908080808, 0x19082b1919192b08, 0x19082b19192b0819, 0x19082b192b08082b,
|
|
||||||
0x19082b2b19081919, 0x19082b2b2b190808, 0x1919080808080808, 0x1919080808082b08,
|
|
||||||
0x1919080808190819, 0x1919080808192b19, 0x19190808082b0808, 0x191908082b080808,
|
|
||||||
0x191908082b082b08, 0x1919081908081908, 0x191908191908082b, 0x191908192b2b1908,
|
|
||||||
0x1919082b2b190819, 0x191919082b190808, 0x191919082b19082b, 0x1919191908082b2b,
|
|
||||||
0x1919192b08080819, 0x1919192b19191908, 0x19192b0808080808, 0x19192b0808190819,
|
|
||||||
0x19192b0808192b19, 0x19192b08192b1908, 0x19192b1919080808, 0x19192b2b08082b08,
|
|
||||||
0x192b080808081908, 0x192b080808190808, 0x192b080819080808, 0x192b0808192b2b08,
|
|
||||||
0x192b081908080808, 0x192b081919191919, 0x192b082b08192b08, 0x192b082b192b0808,
|
|
||||||
0x192b190808080808, 0x192b190808081919, 0x192b191908190808, 0x192b19190819082b,
|
|
||||||
0x192b19192b081908, 0x192b2b081908082b, 0x2b08080808080808, 0x2b0808080808082b,
|
|
||||||
0x2b08080808082b2b, 0x2b08080819080819, 0x2b0808082b08082b, 0x2b08081908081908,
|
|
||||||
0x2b08081908192b08, 0x2b08081919080808, 0x2b08082b08190819, 0x2b08190808080819,
|
|
||||||
0x2b08190808081908, 0x2b08190808190808, 0x2b08190808191919, 0x2b08190819080808,
|
|
||||||
0x2b081908192b0808, 0x2b08191908080808, 0x2b0819191908192b, 0x2b0819192b191908,
|
|
||||||
0x2b08192b08082b19, 0x2b08192b19080808, 0x2b08192b192b0808, 0x2b082b080808082b,
|
|
||||||
0x2b082b1908081908, 0x2b082b2b08190819, 0x2b19080808081908, 0x2b19080808190808,
|
|
||||||
0x2b190808082b1908, 0x2b19080819080808, 0x2b1908082b2b0819, 0x2b1908190819192b,
|
|
||||||
0x2b1908192b080808, 0x2b19082b19081919, 0x2b19190808080808, 0x2b191908082b082b,
|
|
||||||
0x2b19190819081908, 0x2b19191919190819, 0x2b192b082b080819, 0x2b192b19082b0808,
|
|
||||||
0x2b2b08080808082b, 0x2b2b080819190808, 0x2b2b08082b081919, 0x2b2b081908082b19,
|
|
||||||
0x2b2b082b08080808, 0x2b2b190808192b08, 0x2b2b2b0819190808, 0x2b2b2b1908081908,
|
|
||||||
};
|
|
||||||
|
|
||||||
static const __device__ uint64_t iq2xs_grid[512] = {
|
|
||||||
0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
|
|
||||||
0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x080808080819192b,
|
|
||||||
0x0808080808192b19, 0x08080808082b0808, 0x08080808082b082b, 0x08080808082b1919,
|
|
||||||
0x08080808082b2b08, 0x0808080819080819, 0x0808080819081908, 0x080808081908192b,
|
|
||||||
0x0808080819082b19, 0x0808080819190808, 0x080808081919082b, 0x0808080819191919,
|
|
||||||
0x0808080819192b08, 0x08080808192b0819, 0x08080808192b1908, 0x080808082b080808,
|
|
||||||
0x080808082b08082b, 0x080808082b081919, 0x080808082b082b08, 0x080808082b190819,
|
|
||||||
0x080808082b191908, 0x080808082b192b19, 0x080808082b2b0808, 0x0808081908080819,
|
|
||||||
0x0808081908081908, 0x080808190808192b, 0x0808081908082b19, 0x0808081908190808,
|
|
||||||
0x080808190819082b, 0x0808081908191919, 0x0808081908192b08, 0x0808081908192b2b,
|
|
||||||
0x08080819082b0819, 0x08080819082b1908, 0x0808081919080808, 0x080808191908082b,
|
|
||||||
0x0808081919081919, 0x0808081919082b08, 0x0808081919190819, 0x0808081919191908,
|
|
||||||
0x08080819192b0808, 0x08080819192b2b08, 0x080808192b080819, 0x080808192b081908,
|
|
||||||
0x080808192b190808, 0x0808082b08080808, 0x0808082b0808082b, 0x0808082b08081919,
|
|
||||||
0x0808082b08082b08, 0x0808082b08190819, 0x0808082b08191908, 0x0808082b082b0808,
|
|
||||||
0x0808082b19080819, 0x0808082b19081908, 0x0808082b19190808, 0x0808082b19191919,
|
|
||||||
0x0808082b2b080808, 0x0808082b2b082b2b, 0x0808190808080819, 0x0808190808081908,
|
|
||||||
0x080819080808192b, 0x0808190808082b19, 0x0808190808190808, 0x080819080819082b,
|
|
||||||
0x0808190808191919, 0x0808190808192b08, 0x08081908082b0819, 0x08081908082b1908,
|
|
||||||
0x0808190819080808, 0x080819081908082b, 0x0808190819081919, 0x0808190819082b08,
|
|
||||||
0x0808190819190819, 0x0808190819191908, 0x080819081919192b, 0x08081908192b0808,
|
|
||||||
0x080819082b080819, 0x080819082b081908, 0x080819082b190808, 0x0808191908080808,
|
|
||||||
0x080819190808082b, 0x0808191908081919, 0x0808191908082b08, 0x0808191908190819,
|
|
||||||
0x0808191908191908, 0x08081919082b0808, 0x0808191919080819, 0x0808191919081908,
|
|
||||||
0x0808191919190808, 0x08081919192b0819, 0x080819192b080808, 0x0808192b08080819,
|
|
||||||
0x0808192b08081908, 0x0808192b08190808, 0x0808192b082b192b, 0x0808192b19080808,
|
|
||||||
0x0808192b1908082b, 0x0808192b2b081908, 0x08082b0808080808, 0x08082b080808082b,
|
|
||||||
0x08082b0808081919, 0x08082b0808082b08, 0x08082b0808082b2b, 0x08082b0808190819,
|
|
||||||
0x08082b0808191908, 0x08082b08082b0808, 0x08082b08082b1919, 0x08082b0819080819,
|
|
||||||
0x08082b0819081908, 0x08082b0819190808, 0x08082b0819192b08, 0x08082b082b080808,
|
|
||||||
0x08082b082b2b0808, 0x08082b082b2b2b2b, 0x08082b1908080819, 0x08082b1908081908,
|
|
||||||
0x08082b1908190808, 0x08082b1919080808, 0x08082b192b080819, 0x08082b192b082b19,
|
|
||||||
0x08082b2b08080808, 0x08082b2b082b0808, 0x08082b2b082b2b08, 0x08082b2b2b19192b,
|
|
||||||
0x08082b2b2b2b0808, 0x0819080808080819, 0x0819080808081908, 0x081908080808192b,
|
|
||||||
0x0819080808082b19, 0x0819080808190808, 0x081908080819082b, 0x0819080808191919,
|
|
||||||
0x0819080808192b08, 0x08190808082b0819, 0x08190808082b1908, 0x0819080819080808,
|
|
||||||
0x081908081908082b, 0x0819080819081919, 0x0819080819082b08, 0x0819080819190819,
|
|
||||||
0x0819080819191908, 0x08190808192b0808, 0x08190808192b2b2b, 0x081908082b080819,
|
|
||||||
0x081908082b081908, 0x081908082b190808, 0x0819081908080808, 0x081908190808082b,
|
|
||||||
0x0819081908081919, 0x0819081908082b08, 0x0819081908190819, 0x0819081908191908,
|
|
||||||
0x08190819082b0808, 0x0819081919080819, 0x0819081919081908, 0x0819081919190808,
|
|
||||||
0x081908192b080808, 0x081908192b191908, 0x081908192b19192b, 0x0819082b08080819,
|
|
||||||
0x0819082b08081908, 0x0819082b0808192b, 0x0819082b08190808, 0x0819082b19080808,
|
|
||||||
0x0819082b192b0808, 0x0819190808080808, 0x081919080808082b, 0x0819190808081919,
|
|
||||||
0x0819190808082b08, 0x0819190808190819, 0x0819190808191908, 0x08191908082b0808,
|
|
||||||
0x0819190819080819, 0x0819190819081908, 0x0819190819082b19, 0x0819190819190808,
|
|
||||||
0x08191908192b1908, 0x081919082b080808, 0x0819191908080819, 0x0819191908081908,
|
|
||||||
0x0819191908190808, 0x0819191919080808, 0x0819192b08080808, 0x0819192b08191908,
|
|
||||||
0x0819192b19082b19, 0x08192b0808080819, 0x08192b0808081908, 0x08192b0808190808,
|
|
||||||
0x08192b080819082b, 0x08192b0819080808, 0x08192b0819191908, 0x08192b082b08192b,
|
|
||||||
0x08192b1908080808, 0x08192b1908081919, 0x08192b19192b192b, 0x08192b2b19190819,
|
|
||||||
0x08192b2b2b2b2b19, 0x082b080808080808, 0x082b08080808082b, 0x082b080808081919,
|
|
||||||
0x082b080808082b08, 0x082b080808082b2b, 0x082b080808190819, 0x082b080808191908,
|
|
||||||
0x082b0808082b0808, 0x082b080819080819, 0x082b080819081908, 0x082b080819190808,
|
|
||||||
0x082b08082b080808, 0x082b08082b2b0808, 0x082b081908080819, 0x082b081908081908,
|
|
||||||
0x082b081908190808, 0x082b081919080808, 0x082b081919082b08, 0x082b0819192b1919,
|
|
||||||
0x082b082b08080808, 0x082b082b082b082b, 0x082b082b2b080808, 0x082b082b2b2b2b08,
|
|
||||||
0x082b190808080819, 0x082b190808081908, 0x082b190808190808, 0x082b1908082b2b19,
|
|
||||||
0x082b190819080808, 0x082b191908080808, 0x082b191919080819, 0x082b19191919082b,
|
|
||||||
0x082b19192b192b19, 0x082b192b08080819, 0x082b192b08192b2b, 0x082b192b2b2b192b,
|
|
||||||
0x082b2b0808080808, 0x082b2b0808082b08, 0x082b2b0808082b2b, 0x082b2b08082b0808,
|
|
||||||
0x082b2b0819191919, 0x082b2b082b082b08, 0x082b2b082b2b082b, 0x082b2b19192b2b08,
|
|
||||||
0x082b2b192b190808, 0x082b2b2b08082b08, 0x082b2b2b082b0808, 0x082b2b2b2b08082b,
|
|
||||||
0x082b2b2b2b082b08, 0x082b2b2b2b082b2b, 0x1908080808080819, 0x1908080808081908,
|
|
||||||
0x190808080808192b, 0x1908080808082b19, 0x1908080808190808, 0x190808080819082b,
|
|
||||||
0x1908080808191919, 0x1908080808192b08, 0x19080808082b0819, 0x19080808082b1908,
|
|
||||||
0x1908080819080808, 0x190808081908082b, 0x1908080819081919, 0x1908080819082b08,
|
|
||||||
0x1908080819082b2b, 0x1908080819190819, 0x1908080819191908, 0x19080808192b0808,
|
|
||||||
0x19080808192b1919, 0x190808082b080819, 0x190808082b081908, 0x190808082b190808,
|
|
||||||
0x1908081908080808, 0x190808190808082b, 0x1908081908081919, 0x1908081908082b08,
|
|
||||||
0x1908081908190819, 0x1908081908191908, 0x19080819082b0808, 0x1908081919080819,
|
|
||||||
0x1908081919081908, 0x1908081919190808, 0x190808192b080808, 0x190808192b081919,
|
|
||||||
0x190808192b2b082b, 0x1908082b08080819, 0x1908082b08081908, 0x1908082b08190808,
|
|
||||||
0x1908082b0819082b, 0x1908082b082b2b19, 0x1908082b19080808, 0x1908190808080808,
|
|
||||||
0x190819080808082b, 0x1908190808081919, 0x1908190808082b08, 0x1908190808190819,
|
|
||||||
0x1908190808191908, 0x1908190808192b19, 0x19081908082b0808, 0x1908190819080819,
|
|
||||||
0x1908190819081908, 0x1908190819190808, 0x190819082b080808, 0x190819082b191908,
|
|
||||||
0x1908191908080819, 0x1908191908081908, 0x1908191908190808, 0x19081919082b1908,
|
|
||||||
0x1908191919080808, 0x190819192b192b2b, 0x1908192b08080808, 0x1908192b08082b2b,
|
|
||||||
0x1908192b19081908, 0x1908192b19190808, 0x19082b0808080819, 0x19082b0808081908,
|
|
||||||
0x19082b0808190808, 0x19082b0819080808, 0x19082b0819081919, 0x19082b0819191908,
|
|
||||||
0x19082b08192b082b, 0x19082b1908080808, 0x19082b1908190819, 0x19082b1919081908,
|
|
||||||
0x19082b1919190808, 0x19082b19192b2b19, 0x19082b2b08081908, 0x1919080808080808,
|
|
||||||
0x191908080808082b, 0x1919080808081919, 0x1919080808082b08, 0x1919080808190819,
|
|
||||||
0x1919080808191908, 0x19190808082b0808, 0x19190808082b2b08, 0x1919080819080819,
|
|
||||||
0x1919080819081908, 0x1919080819190808, 0x191908082b080808, 0x1919081908080819,
|
|
||||||
0x1919081908081908, 0x1919081908190808, 0x1919081908191919, 0x1919081919080808,
|
|
||||||
0x191908191908082b, 0x1919082b08080808, 0x1919082b19081908, 0x1919082b2b2b2b2b,
|
|
||||||
0x1919190808080819, 0x1919190808081908, 0x1919190808190808, 0x19191908082b0819,
|
|
||||||
0x1919190819080808, 0x19191908192b0808, 0x191919082b080819, 0x191919082b2b0819,
|
|
||||||
0x1919191908080808, 0x1919191908082b08, 0x191919192b080808, 0x191919192b082b08,
|
|
||||||
0x1919192b082b0819, 0x1919192b192b2b08, 0x1919192b2b2b0819, 0x19192b0808080808,
|
|
||||||
0x19192b0808191908, 0x19192b0819080819, 0x19192b0819190808, 0x19192b082b192b19,
|
|
||||||
0x19192b1908192b2b, 0x19192b1919080808, 0x19192b191908082b, 0x19192b2b2b081919,
|
|
||||||
0x192b080808080819, 0x192b080808081908, 0x192b080808190808, 0x192b080819080808,
|
|
||||||
0x192b080819191908, 0x192b0808192b082b, 0x192b08082b08192b, 0x192b08082b2b2b19,
|
|
||||||
0x192b081908080808, 0x192b082b082b1908, 0x192b082b19082b2b, 0x192b082b2b19082b,
|
|
||||||
0x192b190808080808, 0x192b19080819192b, 0x192b191908190808, 0x192b191919080808,
|
|
||||||
0x192b191919081919, 0x192b19192b2b1908, 0x192b2b0808080819, 0x192b2b08192b2b2b,
|
|
||||||
0x192b2b19082b1919, 0x192b2b2b0808192b, 0x192b2b2b19191908, 0x192b2b2b192b082b,
|
|
||||||
0x2b08080808080808, 0x2b0808080808082b, 0x2b08080808081919, 0x2b08080808082b08,
|
|
||||||
0x2b08080808190819, 0x2b08080808191908, 0x2b080808082b0808, 0x2b080808082b2b2b,
|
|
||||||
0x2b08080819080819, 0x2b08080819081908, 0x2b08080819190808, 0x2b0808082b080808,
|
|
||||||
0x2b0808082b08082b, 0x2b0808082b2b2b08, 0x2b0808082b2b2b2b, 0x2b08081908080819,
|
|
||||||
0x2b08081908081908, 0x2b0808190808192b, 0x2b08081908190808, 0x2b08081919080808,
|
|
||||||
0x2b08081919190819, 0x2b08081919192b19, 0x2b08082b08080808, 0x2b08082b082b0808,
|
|
||||||
0x2b08082b2b080808, 0x2b08082b2b08082b, 0x2b08082b2b2b0808, 0x2b08082b2b2b2b08,
|
|
||||||
0x2b08190808080819, 0x2b08190808081908, 0x2b08190808190808, 0x2b0819080819082b,
|
|
||||||
0x2b08190808191919, 0x2b08190819080808, 0x2b081908192b0808, 0x2b0819082b082b19,
|
|
||||||
0x2b08191908080808, 0x2b08191919081908, 0x2b0819192b2b1919, 0x2b08192b08192b08,
|
|
||||||
0x2b08192b192b2b2b, 0x2b082b0808080808, 0x2b082b0808082b08, 0x2b082b08082b1919,
|
|
||||||
0x2b082b0819192b2b, 0x2b082b082b080808, 0x2b082b082b08082b, 0x2b082b082b2b2b08,
|
|
||||||
0x2b082b190808192b, 0x2b082b2b082b082b, 0x2b082b2b2b080808, 0x2b082b2b2b082b08,
|
|
||||||
0x2b082b2b2b19192b, 0x2b082b2b2b2b2b08, 0x2b19080808080819, 0x2b19080808081908,
|
|
||||||
0x2b19080808190808, 0x2b19080819080808, 0x2b1908081919192b, 0x2b1908082b081908,
|
|
||||||
0x2b19081908080808, 0x2b190819082b082b, 0x2b190819192b1908, 0x2b19082b1919192b,
|
|
||||||
0x2b19082b2b082b19, 0x2b19190808080808, 0x2b19190808081919, 0x2b19190819081908,
|
|
||||||
0x2b19190819190808, 0x2b19190819192b08, 0x2b191919082b2b19, 0x2b1919192b190808,
|
|
||||||
0x2b1919192b19082b, 0x2b19192b19080819, 0x2b192b0819190819, 0x2b192b082b2b192b,
|
|
||||||
0x2b192b1919082b19, 0x2b192b2b08191919, 0x2b192b2b192b0808, 0x2b2b080808080808,
|
|
||||||
0x2b2b08080808082b, 0x2b2b080808082b08, 0x2b2b080808082b2b, 0x2b2b0808082b0808,
|
|
||||||
0x2b2b0808082b2b2b, 0x2b2b08082b2b0808, 0x2b2b081919190819, 0x2b2b081919192b19,
|
|
||||||
0x2b2b08192b2b192b, 0x2b2b082b08080808, 0x2b2b082b0808082b, 0x2b2b082b08082b08,
|
|
||||||
0x2b2b082b082b2b2b, 0x2b2b082b2b080808, 0x2b2b082b2b2b0808, 0x2b2b190819080808,
|
|
||||||
0x2b2b19082b191919, 0x2b2b192b192b1919, 0x2b2b192b2b192b08, 0x2b2b2b0808082b2b,
|
|
||||||
0x2b2b2b08082b0808, 0x2b2b2b08082b082b, 0x2b2b2b08082b2b08, 0x2b2b2b082b2b0808,
|
|
||||||
0x2b2b2b082b2b2b08, 0x2b2b2b1908081908, 0x2b2b2b192b081908, 0x2b2b2b192b08192b,
|
|
||||||
0x2b2b2b2b082b2b08, 0x2b2b2b2b082b2b2b, 0x2b2b2b2b2b190819, 0x2b2b2b2b2b2b2b2b,
|
|
||||||
};
|
|
||||||
|
|
||||||
static const __device__ uint64_t iq2s_grid[1024] = {
|
|
||||||
0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
|
|
||||||
0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x080808080819192b,
|
|
||||||
0x0808080808192b19, 0x08080808082b0808, 0x08080808082b082b, 0x08080808082b1919,
|
|
||||||
0x08080808082b2b08, 0x0808080819080819, 0x0808080819081908, 0x080808081908192b,
|
|
||||||
0x0808080819082b19, 0x0808080819190808, 0x080808081919082b, 0x0808080819191919,
|
|
||||||
0x0808080819192b08, 0x08080808192b0819, 0x08080808192b1908, 0x08080808192b192b,
|
|
||||||
0x08080808192b2b19, 0x080808082b080808, 0x080808082b08082b, 0x080808082b081919,
|
|
||||||
0x080808082b082b08, 0x080808082b190819, 0x080808082b191908, 0x080808082b2b0808,
|
|
||||||
0x080808082b2b1919, 0x080808082b2b2b2b, 0x0808081908080819, 0x0808081908081908,
|
|
||||||
0x080808190808192b, 0x0808081908082b19, 0x0808081908190808, 0x080808190819082b,
|
|
||||||
0x0808081908191919, 0x0808081908192b08, 0x08080819082b0819, 0x08080819082b1908,
|
|
||||||
0x0808081919080808, 0x080808191908082b, 0x0808081919081919, 0x0808081919082b08,
|
|
||||||
0x0808081919190819, 0x0808081919191908, 0x080808191919192b, 0x0808081919192b19,
|
|
||||||
0x08080819192b0808, 0x08080819192b1919, 0x08080819192b2b08, 0x080808192b080819,
|
|
||||||
0x080808192b081908, 0x080808192b190808, 0x080808192b19082b, 0x080808192b191919,
|
|
||||||
0x080808192b2b0819, 0x080808192b2b1908, 0x0808082b08080808, 0x0808082b0808082b,
|
|
||||||
0x0808082b08081919, 0x0808082b08082b08, 0x0808082b08190819, 0x0808082b08191908,
|
|
||||||
0x0808082b082b0808, 0x0808082b082b2b2b, 0x0808082b19080819, 0x0808082b19081908,
|
|
||||||
0x0808082b1908192b, 0x0808082b19082b19, 0x0808082b19190808, 0x0808082b19191919,
|
|
||||||
0x0808082b2b080808, 0x0808082b2b081919, 0x0808082b2b082b2b, 0x0808082b2b191908,
|
|
||||||
0x0808082b2b2b082b, 0x0808190808080819, 0x0808190808081908, 0x080819080808192b,
|
|
||||||
0x0808190808082b19, 0x0808190808190808, 0x080819080819082b, 0x0808190808191919,
|
|
||||||
0x0808190808192b08, 0x08081908082b0819, 0x08081908082b1908, 0x08081908082b192b,
|
|
||||||
0x08081908082b2b19, 0x0808190819080808, 0x080819081908082b, 0x0808190819081919,
|
|
||||||
0x0808190819082b08, 0x0808190819082b2b, 0x0808190819190819, 0x0808190819191908,
|
|
||||||
0x080819081919192b, 0x0808190819192b19, 0x08081908192b0808, 0x08081908192b082b,
|
|
||||||
0x08081908192b1919, 0x080819082b080819, 0x080819082b081908, 0x080819082b08192b,
|
|
||||||
0x080819082b082b19, 0x080819082b190808, 0x080819082b191919, 0x080819082b192b08,
|
|
||||||
0x080819082b2b0819, 0x080819082b2b1908, 0x0808191908080808, 0x080819190808082b,
|
|
||||||
0x0808191908081919, 0x0808191908082b08, 0x0808191908082b2b, 0x0808191908190819,
|
|
||||||
0x0808191908191908, 0x080819190819192b, 0x0808191908192b19, 0x08081919082b0808,
|
|
||||||
0x08081919082b1919, 0x08081919082b2b08, 0x0808191919080819, 0x0808191919081908,
|
|
||||||
0x080819191908192b, 0x0808191919082b19, 0x0808191919190808, 0x080819191919082b,
|
|
||||||
0x0808191919191919, 0x0808191919192b08, 0x08081919192b0819, 0x08081919192b1908,
|
|
||||||
0x080819192b080808, 0x080819192b08082b, 0x080819192b081919, 0x080819192b082b08,
|
|
||||||
0x080819192b190819, 0x080819192b191908, 0x080819192b2b0808, 0x0808192b08080819,
|
|
||||||
0x0808192b08081908, 0x0808192b0808192b, 0x0808192b08082b19, 0x0808192b08190808,
|
|
||||||
0x0808192b08191919, 0x0808192b19080808, 0x0808192b19081919, 0x0808192b19082b08,
|
|
||||||
0x0808192b19190819, 0x0808192b19191908, 0x0808192b192b0808, 0x0808192b2b080819,
|
|
||||||
0x0808192b2b081908, 0x0808192b2b190808, 0x08082b0808080808, 0x08082b080808082b,
|
|
||||||
0x08082b0808081919, 0x08082b0808082b08, 0x08082b0808190819, 0x08082b0808191908,
|
|
||||||
0x08082b080819192b, 0x08082b0808192b19, 0x08082b08082b0808, 0x08082b08082b1919,
|
|
||||||
0x08082b08082b2b2b, 0x08082b0819080819, 0x08082b0819081908, 0x08082b081908192b,
|
|
||||||
0x08082b0819082b19, 0x08082b0819190808, 0x08082b081919082b, 0x08082b0819191919,
|
|
||||||
0x08082b0819192b08, 0x08082b08192b0819, 0x08082b08192b1908, 0x08082b082b080808,
|
|
||||||
0x08082b082b081919, 0x08082b082b191908, 0x08082b082b2b2b2b, 0x08082b1908080819,
|
|
||||||
0x08082b1908081908, 0x08082b1908190808, 0x08082b190819082b, 0x08082b1908191919,
|
|
||||||
0x08082b1908192b08, 0x08082b19082b0819, 0x08082b1919080808, 0x08082b1919081919,
|
|
||||||
0x08082b1919082b08, 0x08082b1919190819, 0x08082b1919191908, 0x08082b19192b0808,
|
|
||||||
0x08082b192b080819, 0x08082b192b190808, 0x08082b2b08080808, 0x08082b2b08190819,
|
|
||||||
0x08082b2b08191908, 0x08082b2b082b082b, 0x08082b2b082b2b08, 0x08082b2b082b2b2b,
|
|
||||||
0x08082b2b19190808, 0x08082b2b2b192b19, 0x0819080808080819, 0x0819080808081908,
|
|
||||||
0x081908080808192b, 0x0819080808082b19, 0x0819080808190808, 0x081908080819082b,
|
|
||||||
0x0819080808191919, 0x0819080808192b08, 0x08190808082b0819, 0x08190808082b1908,
|
|
||||||
0x08190808082b192b, 0x0819080819080808, 0x081908081908082b, 0x0819080819081919,
|
|
||||||
0x0819080819082b08, 0x0819080819190819, 0x0819080819191908, 0x081908081919192b,
|
|
||||||
0x0819080819192b19, 0x08190808192b0808, 0x08190808192b082b, 0x08190808192b1919,
|
|
||||||
0x08190808192b2b08, 0x081908082b080819, 0x081908082b081908, 0x081908082b08192b,
|
|
||||||
0x081908082b190808, 0x081908082b191919, 0x081908082b192b08, 0x081908082b2b0819,
|
|
||||||
0x081908082b2b1908, 0x0819081908080808, 0x081908190808082b, 0x0819081908081919,
|
|
||||||
0x0819081908082b08, 0x0819081908082b2b, 0x0819081908190819, 0x0819081908191908,
|
|
||||||
0x081908190819192b, 0x0819081908192b19, 0x08190819082b0808, 0x08190819082b082b,
|
|
||||||
0x08190819082b1919, 0x08190819082b2b08, 0x0819081919080819, 0x0819081919081908,
|
|
||||||
0x081908191908192b, 0x0819081919082b19, 0x0819081919190808, 0x081908191919082b,
|
|
||||||
0x0819081919191919, 0x0819081919192b08, 0x08190819192b0819, 0x08190819192b1908,
|
|
||||||
0x081908192b080808, 0x081908192b08082b, 0x081908192b081919, 0x081908192b082b08,
|
|
||||||
0x081908192b190819, 0x081908192b191908, 0x0819082b08080819, 0x0819082b08081908,
|
|
||||||
0x0819082b08082b19, 0x0819082b08190808, 0x0819082b08191919, 0x0819082b082b0819,
|
|
||||||
0x0819082b082b1908, 0x0819082b19080808, 0x0819082b19081919, 0x0819082b19190819,
|
|
||||||
0x0819082b19191908, 0x0819082b2b080819, 0x0819082b2b081908, 0x0819082b2b190808,
|
|
||||||
0x0819190808080808, 0x081919080808082b, 0x0819190808081919, 0x0819190808082b08,
|
|
||||||
0x0819190808190819, 0x0819190808191908, 0x081919080819192b, 0x0819190808192b19,
|
|
||||||
0x08191908082b0808, 0x08191908082b1919, 0x08191908082b2b08, 0x0819190819080819,
|
|
||||||
0x0819190819081908, 0x081919081908192b, 0x0819190819082b19, 0x0819190819190808,
|
|
||||||
0x081919081919082b, 0x0819190819191919, 0x0819190819192b08, 0x08191908192b0819,
|
|
||||||
0x08191908192b1908, 0x081919082b080808, 0x081919082b08082b, 0x081919082b081919,
|
|
||||||
0x081919082b082b08, 0x081919082b190819, 0x081919082b191908, 0x081919082b2b0808,
|
|
||||||
0x0819191908080819, 0x0819191908081908, 0x081919190808192b, 0x0819191908082b19,
|
|
||||||
0x0819191908190808, 0x081919190819082b, 0x0819191908191919, 0x0819191908192b08,
|
|
||||||
0x08191919082b0819, 0x08191919082b1908, 0x0819191919080808, 0x081919191908082b,
|
|
||||||
0x0819191919081919, 0x0819191919082b08, 0x0819191919190819, 0x0819191919191908,
|
|
||||||
0x08191919192b0808, 0x081919192b080819, 0x081919192b081908, 0x081919192b190808,
|
|
||||||
0x0819192b08080808, 0x0819192b08081919, 0x0819192b08082b08, 0x0819192b08190819,
|
|
||||||
0x0819192b08191908, 0x0819192b082b0808, 0x0819192b19080819, 0x0819192b19081908,
|
|
||||||
0x0819192b19190808, 0x0819192b2b080808, 0x0819192b2b2b2b2b, 0x08192b0808080819,
|
|
||||||
0x08192b0808081908, 0x08192b080808192b, 0x08192b0808082b19, 0x08192b0808190808,
|
|
||||||
0x08192b0808191919, 0x08192b0808192b08, 0x08192b08082b0819, 0x08192b0819080808,
|
|
||||||
0x08192b081908082b, 0x08192b0819081919, 0x08192b0819082b08, 0x08192b0819190819,
|
|
||||||
0x08192b0819191908, 0x08192b08192b0808, 0x08192b082b080819, 0x08192b082b081908,
|
|
||||||
0x08192b1908080808, 0x08192b190808082b, 0x08192b1908081919, 0x08192b1908082b08,
|
|
||||||
0x08192b1908190819, 0x08192b1908191908, 0x08192b19082b0808, 0x08192b1919080819,
|
|
||||||
0x08192b1919081908, 0x08192b1919190808, 0x08192b19192b2b19, 0x08192b192b2b082b,
|
|
||||||
0x08192b2b08081908, 0x08192b2b08190808, 0x08192b2b19080808, 0x08192b2b1919192b,
|
|
||||||
0x082b080808080808, 0x082b08080808082b, 0x082b080808081919, 0x082b080808082b08,
|
|
||||||
0x082b080808190819, 0x082b080808191908, 0x082b08080819192b, 0x082b080808192b19,
|
|
||||||
0x082b0808082b0808, 0x082b0808082b1919, 0x082b0808082b2b2b, 0x082b080819080819,
|
|
||||||
0x082b080819081908, 0x082b080819190808, 0x082b08081919082b, 0x082b080819191919,
|
|
||||||
0x082b0808192b1908, 0x082b08082b080808, 0x082b08082b082b2b, 0x082b08082b191908,
|
|
||||||
0x082b08082b2b2b2b, 0x082b081908080819, 0x082b081908081908, 0x082b081908190808,
|
|
||||||
0x082b08190819082b, 0x082b081908191919, 0x082b0819082b0819, 0x082b081919080808,
|
|
||||||
0x082b08191908082b, 0x082b081919081919, 0x082b081919190819, 0x082b081919191908,
|
|
||||||
0x082b0819192b0808, 0x082b08192b080819, 0x082b08192b081908, 0x082b08192b190808,
|
|
||||||
0x082b082b08080808, 0x082b082b08082b2b, 0x082b082b082b082b, 0x082b082b082b2b08,
|
|
||||||
0x082b082b082b2b2b, 0x082b082b19081908, 0x082b082b19190808, 0x082b082b2b082b08,
|
|
||||||
0x082b082b2b082b2b, 0x082b082b2b2b2b08, 0x082b190808080819, 0x082b190808081908,
|
|
||||||
0x082b19080808192b, 0x082b190808082b19, 0x082b190808190808, 0x082b190808191919,
|
|
||||||
0x082b190808192b08, 0x082b1908082b0819, 0x082b1908082b1908, 0x082b190819080808,
|
|
||||||
0x082b19081908082b, 0x082b190819081919, 0x082b190819082b08, 0x082b190819190819,
|
|
||||||
0x082b190819191908, 0x082b1908192b0808, 0x082b19082b080819, 0x082b19082b081908,
|
|
||||||
0x082b19082b190808, 0x082b191908080808, 0x082b191908081919, 0x082b191908082b08,
|
|
||||||
0x082b191908190819, 0x082b191908191908, 0x082b1919082b0808, 0x082b191919080819,
|
|
||||||
0x082b191919081908, 0x082b191919190808, 0x082b1919192b192b, 0x082b19192b080808,
|
|
||||||
0x082b192b08080819, 0x082b192b08081908, 0x082b192b08190808, 0x082b192b19080808,
|
|
||||||
0x082b192b19192b19, 0x082b2b0808080808, 0x082b2b0808081919, 0x082b2b0808190819,
|
|
||||||
0x082b2b0808191908, 0x082b2b0819080819, 0x082b2b0819081908, 0x082b2b0819190808,
|
|
||||||
0x082b2b082b082b2b, 0x082b2b082b2b2b2b, 0x082b2b1908080819, 0x082b2b1908081908,
|
|
||||||
0x082b2b1908190808, 0x082b2b192b191919, 0x082b2b2b08082b2b, 0x082b2b2b082b082b,
|
|
||||||
0x082b2b2b192b1908, 0x082b2b2b2b082b08, 0x082b2b2b2b082b2b, 0x1908080808080819,
|
|
||||||
0x1908080808081908, 0x190808080808192b, 0x1908080808082b19, 0x1908080808190808,
|
|
||||||
0x190808080819082b, 0x1908080808191919, 0x1908080808192b08, 0x1908080808192b2b,
|
|
||||||
0x19080808082b0819, 0x19080808082b1908, 0x19080808082b192b, 0x1908080819080808,
|
|
||||||
0x190808081908082b, 0x1908080819081919, 0x1908080819082b08, 0x1908080819082b2b,
|
|
||||||
0x1908080819190819, 0x1908080819191908, 0x190808081919192b, 0x1908080819192b19,
|
|
||||||
0x19080808192b0808, 0x19080808192b082b, 0x19080808192b1919, 0x190808082b080819,
|
|
||||||
0x190808082b081908, 0x190808082b190808, 0x190808082b191919, 0x190808082b192b08,
|
|
||||||
0x190808082b2b0819, 0x190808082b2b1908, 0x1908081908080808, 0x190808190808082b,
|
|
||||||
0x1908081908081919, 0x1908081908082b08, 0x1908081908190819, 0x1908081908191908,
|
|
||||||
0x190808190819192b, 0x1908081908192b19, 0x19080819082b0808, 0x19080819082b082b,
|
|
||||||
0x19080819082b1919, 0x1908081919080819, 0x1908081919081908, 0x190808191908192b,
|
|
||||||
0x1908081919082b19, 0x1908081919190808, 0x190808191919082b, 0x1908081919191919,
|
|
||||||
0x1908081919192b08, 0x19080819192b0819, 0x19080819192b1908, 0x190808192b080808,
|
|
||||||
0x190808192b08082b, 0x190808192b081919, 0x190808192b082b08, 0x190808192b190819,
|
|
||||||
0x190808192b191908, 0x190808192b2b0808, 0x1908082b08080819, 0x1908082b08081908,
|
|
||||||
0x1908082b08190808, 0x1908082b0819082b, 0x1908082b08191919, 0x1908082b08192b08,
|
|
||||||
0x1908082b082b1908, 0x1908082b19080808, 0x1908082b19081919, 0x1908082b19082b08,
|
|
||||||
0x1908082b19190819, 0x1908082b19191908, 0x1908082b192b0808, 0x1908082b2b080819,
|
|
||||||
0x1908082b2b081908, 0x1908190808080808, 0x190819080808082b, 0x1908190808081919,
|
|
||||||
0x1908190808082b08, 0x1908190808082b2b, 0x1908190808190819, 0x1908190808191908,
|
|
||||||
0x190819080819192b, 0x1908190808192b19, 0x19081908082b0808, 0x19081908082b082b,
|
|
||||||
0x19081908082b1919, 0x19081908082b2b08, 0x1908190819080819, 0x1908190819081908,
|
|
||||||
0x190819081908192b, 0x1908190819082b19, 0x1908190819190808, 0x190819081919082b,
|
|
||||||
0x1908190819191919, 0x1908190819192b08, 0x19081908192b0819, 0x19081908192b1908,
|
|
||||||
0x190819082b080808, 0x190819082b08082b, 0x190819082b081919, 0x190819082b082b08,
|
|
||||||
0x190819082b190819, 0x190819082b191908, 0x190819082b2b0808, 0x1908191908080819,
|
|
||||||
0x1908191908081908, 0x190819190808192b, 0x1908191908082b19, 0x1908191908190808,
|
|
||||||
0x190819190819082b, 0x1908191908191919, 0x1908191908192b08, 0x19081919082b0819,
|
|
||||||
0x19081919082b1908, 0x1908191919080808, 0x190819191908082b, 0x1908191919081919,
|
|
||||||
0x1908191919082b08, 0x1908191919190819, 0x1908191919191908, 0x19081919192b0808,
|
|
||||||
0x19081919192b2b2b, 0x190819192b080819, 0x190819192b081908, 0x190819192b190808,
|
|
||||||
0x1908192b08080808, 0x1908192b0808082b, 0x1908192b08081919, 0x1908192b08082b08,
|
|
||||||
0x1908192b08190819, 0x1908192b08191908, 0x1908192b082b0808, 0x1908192b19080819,
|
|
||||||
0x1908192b19081908, 0x1908192b19190808, 0x1908192b2b080808, 0x1908192b2b2b1919,
|
|
||||||
0x19082b0808080819, 0x19082b0808081908, 0x19082b0808082b19, 0x19082b0808190808,
|
|
||||||
0x19082b080819082b, 0x19082b0808191919, 0x19082b0808192b08, 0x19082b08082b0819,
|
|
||||||
0x19082b08082b1908, 0x19082b0819080808, 0x19082b081908082b, 0x19082b0819081919,
|
|
||||||
0x19082b0819082b08, 0x19082b0819190819, 0x19082b0819191908, 0x19082b08192b0808,
|
|
||||||
0x19082b082b081908, 0x19082b082b190808, 0x19082b1908080808, 0x19082b190808082b,
|
|
||||||
0x19082b1908081919, 0x19082b1908082b08, 0x19082b1908190819, 0x19082b1908191908,
|
|
||||||
0x19082b19082b0808, 0x19082b1919080819, 0x19082b1919081908, 0x19082b1919190808,
|
|
||||||
0x19082b192b080808, 0x19082b192b19192b, 0x19082b2b08080819, 0x19082b2b08081908,
|
|
||||||
0x19082b2b08190808, 0x19082b2b19080808, 0x1919080808080808, 0x191908080808082b,
|
|
||||||
0x1919080808081919, 0x1919080808082b08, 0x1919080808190819, 0x1919080808191908,
|
|
||||||
0x191908080819192b, 0x1919080808192b19, 0x19190808082b0808, 0x19190808082b082b,
|
|
||||||
0x19190808082b1919, 0x19190808082b2b08, 0x1919080819080819, 0x1919080819081908,
|
|
||||||
0x191908081908192b, 0x1919080819082b19, 0x1919080819190808, 0x191908081919082b,
|
|
||||||
0x1919080819191919, 0x1919080819192b08, 0x19190808192b0819, 0x19190808192b1908,
|
|
||||||
0x191908082b080808, 0x191908082b08082b, 0x191908082b081919, 0x191908082b082b08,
|
|
||||||
0x191908082b190819, 0x191908082b191908, 0x1919081908080819, 0x1919081908081908,
|
|
||||||
0x191908190808192b, 0x1919081908082b19, 0x1919081908190808, 0x191908190819082b,
|
|
||||||
0x1919081908191919, 0x1919081908192b08, 0x19190819082b0819, 0x19190819082b1908,
|
|
||||||
0x1919081919080808, 0x191908191908082b, 0x1919081919081919, 0x1919081919082b08,
|
|
||||||
0x1919081919190819, 0x1919081919191908, 0x19190819192b0808, 0x191908192b080819,
|
|
||||||
0x191908192b081908, 0x191908192b190808, 0x1919082b08080808, 0x1919082b08081919,
|
|
||||||
0x1919082b08082b08, 0x1919082b08190819, 0x1919082b08191908, 0x1919082b082b0808,
|
|
||||||
0x1919082b19080819, 0x1919082b19081908, 0x1919082b19190808, 0x1919082b192b2b19,
|
|
||||||
0x1919082b2b080808, 0x1919190808080819, 0x1919190808081908, 0x191919080808192b,
|
|
||||||
0x1919190808082b19, 0x1919190808190808, 0x191919080819082b, 0x1919190808191919,
|
|
||||||
0x1919190808192b08, 0x19191908082b0819, 0x19191908082b1908, 0x1919190819080808,
|
|
||||||
0x191919081908082b, 0x1919190819081919, 0x1919190819082b08, 0x1919190819190819,
|
|
||||||
0x1919190819191908, 0x19191908192b0808, 0x191919082b080819, 0x191919082b081908,
|
|
||||||
0x191919082b190808, 0x1919191908080808, 0x191919190808082b, 0x1919191908081919,
|
|
||||||
0x1919191908082b08, 0x1919191908190819, 0x1919191908191908, 0x19191919082b0808,
|
|
||||||
0x1919191919080819, 0x1919191919081908, 0x1919191919190808, 0x191919192b080808,
|
|
||||||
0x1919192b08080819, 0x1919192b08081908, 0x1919192b08190808, 0x1919192b082b192b,
|
|
||||||
0x1919192b19080808, 0x19192b0808080808, 0x19192b080808082b, 0x19192b0808081919,
|
|
||||||
0x19192b0808082b08, 0x19192b0808190819, 0x19192b0808191908, 0x19192b08082b0808,
|
|
||||||
0x19192b0819080819, 0x19192b0819081908, 0x19192b0819190808, 0x19192b0819192b2b,
|
|
||||||
0x19192b082b080808, 0x19192b1908080819, 0x19192b1908081908, 0x19192b1908190808,
|
|
||||||
0x19192b1919080808, 0x19192b2b08080808, 0x19192b2b08192b19, 0x19192b2b2b081919,
|
|
||||||
0x19192b2b2b2b2b08, 0x192b080808080819, 0x192b080808081908, 0x192b08080808192b,
|
|
||||||
0x192b080808190808, 0x192b08080819082b, 0x192b080808191919, 0x192b080808192b08,
|
|
||||||
0x192b0808082b0819, 0x192b0808082b1908, 0x192b080819080808, 0x192b080819081919,
|
|
||||||
0x192b080819082b08, 0x192b080819190819, 0x192b080819191908, 0x192b0808192b0808,
|
|
||||||
0x192b08082b081908, 0x192b08082b190808, 0x192b081908080808, 0x192b08190808082b,
|
|
||||||
0x192b081908081919, 0x192b081908082b08, 0x192b081908190819, 0x192b081908191908,
|
|
||||||
0x192b0819082b0808, 0x192b081919080819, 0x192b081919081908, 0x192b081919190808,
|
|
||||||
0x192b08192b080808, 0x192b08192b192b19, 0x192b082b08081908, 0x192b082b08190808,
|
|
||||||
0x192b082b19080808, 0x192b082b1919192b, 0x192b082b2b2b0819, 0x192b190808080808,
|
|
||||||
0x192b190808081919, 0x192b190808082b08, 0x192b190808190819, 0x192b190808191908,
|
|
||||||
0x192b1908082b0808, 0x192b190819080819, 0x192b190819081908, 0x192b190819190808,
|
|
||||||
0x192b19082b080808, 0x192b191908080819, 0x192b191908081908, 0x192b191908190808,
|
|
||||||
0x192b191919080808, 0x192b191919082b2b, 0x192b1919192b2b08, 0x192b19192b19082b,
|
|
||||||
0x192b192b08080808, 0x192b192b2b191908, 0x192b2b0808080819, 0x192b2b0808081908,
|
|
||||||
0x192b2b0808190808, 0x192b2b08192b1919, 0x192b2b082b192b08, 0x192b2b1908080808,
|
|
||||||
0x192b2b19082b2b2b, 0x192b2b2b1908082b, 0x192b2b2b2b2b0819, 0x2b08080808080808,
|
|
||||||
0x2b0808080808082b, 0x2b08080808081919, 0x2b08080808082b08, 0x2b08080808190819,
|
|
||||||
0x2b08080808191908, 0x2b08080808192b19, 0x2b080808082b0808, 0x2b080808082b1919,
|
|
||||||
0x2b08080819080819, 0x2b08080819081908, 0x2b08080819190808, 0x2b0808081919082b,
|
|
||||||
0x2b08080819191919, 0x2b08080819192b08, 0x2b080808192b0819, 0x2b0808082b080808,
|
|
||||||
0x2b0808082b081919, 0x2b0808082b190819, 0x2b0808082b191908, 0x2b08081908080819,
|
|
||||||
0x2b08081908081908, 0x2b08081908082b19, 0x2b08081908190808, 0x2b0808190819082b,
|
|
||||||
0x2b08081908191919, 0x2b08081908192b08, 0x2b080819082b0819, 0x2b080819082b1908,
|
|
||||||
0x2b08081919080808, 0x2b0808191908082b, 0x2b08081919081919, 0x2b08081919082b08,
|
|
||||||
0x2b08081919190819, 0x2b08081919191908, 0x2b0808192b080819, 0x2b0808192b081908,
|
|
||||||
0x2b0808192b190808, 0x2b0808192b2b2b19, 0x2b08082b08080808, 0x2b08082b08081919,
|
|
||||||
0x2b08082b08082b2b, 0x2b08082b08190819, 0x2b08082b08191908, 0x2b08082b19080819,
|
|
||||||
0x2b08082b19081908, 0x2b08082b19190808, 0x2b08190808080819, 0x2b08190808081908,
|
|
||||||
0x2b0819080808192b, 0x2b08190808082b19, 0x2b08190808190808, 0x2b0819080819082b,
|
|
||||||
0x2b08190808191919, 0x2b08190808192b08, 0x2b081908082b0819, 0x2b08190819080808,
|
|
||||||
0x2b0819081908082b, 0x2b08190819081919, 0x2b08190819082b08, 0x2b08190819190819,
|
|
||||||
0x2b08190819191908, 0x2b081908192b0808, 0x2b0819082b080819, 0x2b0819082b081908,
|
|
||||||
0x2b0819082b190808, 0x2b08191908080808, 0x2b0819190808082b, 0x2b08191908081919,
|
|
||||||
0x2b08191908082b08, 0x2b08191908190819, 0x2b08191908191908, 0x2b081919082b0808,
|
|
||||||
0x2b08191919080819, 0x2b08191919081908, 0x2b08191919190808, 0x2b0819192b080808,
|
|
||||||
0x2b0819192b082b2b, 0x2b08192b08080819, 0x2b08192b08081908, 0x2b08192b08190808,
|
|
||||||
0x2b08192b082b2b19, 0x2b08192b19080808, 0x2b082b0808080808, 0x2b082b0808081919,
|
|
||||||
0x2b082b0808190819, 0x2b082b0808191908, 0x2b082b0819080819, 0x2b082b0819081908,
|
|
||||||
0x2b082b0819190808, 0x2b082b082b2b082b, 0x2b082b1908080819, 0x2b082b1908081908,
|
|
||||||
0x2b082b1919080808, 0x2b082b19192b1919, 0x2b082b2b082b082b, 0x2b082b2b19192b08,
|
|
||||||
0x2b082b2b19192b2b, 0x2b082b2b2b08082b, 0x2b082b2b2b2b082b, 0x2b19080808080819,
|
|
||||||
0x2b19080808081908, 0x2b19080808082b19, 0x2b19080808190808, 0x2b1908080819082b,
|
|
||||||
0x2b19080808191919, 0x2b19080808192b08, 0x2b190808082b1908, 0x2b19080819080808,
|
|
||||||
0x2b1908081908082b, 0x2b19080819081919, 0x2b19080819082b08, 0x2b19080819190819,
|
|
||||||
0x2b19080819191908, 0x2b190808192b0808, 0x2b1908082b080819, 0x2b1908082b081908,
|
|
||||||
0x2b1908082b190808, 0x2b19081908080808, 0x2b19081908081919, 0x2b19081908190819,
|
|
||||||
0x2b19081908191908, 0x2b19081919080819, 0x2b19081919081908, 0x2b19081919190808,
|
|
||||||
0x2b19081919192b2b, 0x2b19082b08080819, 0x2b19082b08081908, 0x2b19082b08190808,
|
|
||||||
0x2b19082b19080808, 0x2b19082b2b2b192b, 0x2b19190808080808, 0x2b1919080808082b,
|
|
||||||
0x2b19190808081919, 0x2b19190808082b08, 0x2b19190808190819, 0x2b19190808191908,
|
|
||||||
0x2b191908082b0808, 0x2b19190819080819, 0x2b19190819081908, 0x2b19190819190808,
|
|
||||||
0x2b1919082b080808, 0x2b1919082b19192b, 0x2b19191908080819, 0x2b19191908081908,
|
|
||||||
0x2b19191908190808, 0x2b19191919080808, 0x2b1919192b192b08, 0x2b1919192b2b0819,
|
|
||||||
0x2b19192b08080808, 0x2b19192b1908192b, 0x2b19192b192b1908, 0x2b192b0808080819,
|
|
||||||
0x2b192b0808081908, 0x2b192b0808190808, 0x2b192b08082b192b, 0x2b192b0819080808,
|
|
||||||
0x2b192b082b2b2b19, 0x2b192b1908080808, 0x2b192b1919082b19, 0x2b192b191919082b,
|
|
||||||
0x2b192b2b2b190808, 0x2b2b080808080808, 0x2b2b080808081919, 0x2b2b080808082b2b,
|
|
||||||
0x2b2b080808191908, 0x2b2b0808082b082b, 0x2b2b0808082b2b2b, 0x2b2b080819080819,
|
|
||||||
0x2b2b080819081908, 0x2b2b080819190808, 0x2b2b08082b2b082b, 0x2b2b08082b2b2b2b,
|
|
||||||
0x2b2b081919080808, 0x2b2b0819192b1919, 0x2b2b082b0808082b, 0x2b2b082b08082b2b,
|
|
||||||
0x2b2b082b082b082b, 0x2b2b082b082b2b08, 0x2b2b082b082b2b2b, 0x2b2b082b2b08082b,
|
|
||||||
0x2b2b082b2b082b08, 0x2b2b082b2b082b2b, 0x2b2b082b2b2b2b08, 0x2b2b190808080819,
|
|
||||||
0x2b2b190808081908, 0x2b2b190808190808, 0x2b2b190819080808, 0x2b2b19082b082b19,
|
|
||||||
0x2b2b19082b2b1908, 0x2b2b191908080808, 0x2b2b191908192b19, 0x2b2b192b19190819,
|
|
||||||
0x2b2b2b0808082b2b, 0x2b2b2b08082b2b08, 0x2b2b2b082b2b082b, 0x2b2b2b1919191908,
|
|
||||||
0x2b2b2b192b08192b, 0x2b2b2b2b08082b08, 0x2b2b2b2b08082b2b, 0x2b2b2b2b082b0808,
|
|
||||||
0x2b2b2b2b082b082b, 0x2b2b2b2b082b2b08, 0x2b2b2b2b2b082b08, 0x2b2b2b2b2b2b2b2b,
|
|
||||||
};
|
|
||||||
|
|
||||||
static const __device__ uint32_t iq3xxs_grid[256] = {
|
|
||||||
0x04040404, 0x04040414, 0x04040424, 0x04040c0c, 0x04040c1c, 0x04040c3e, 0x04041404, 0x04041414,
|
|
||||||
0x04041c0c, 0x04042414, 0x04043e1c, 0x04043e2c, 0x040c040c, 0x040c041c, 0x040c0c04, 0x040c0c14,
|
|
||||||
0x040c140c, 0x040c142c, 0x040c1c04, 0x040c1c14, 0x040c240c, 0x040c2c24, 0x040c3e04, 0x04140404,
|
|
||||||
0x04140414, 0x04140424, 0x04140c0c, 0x04141404, 0x04141414, 0x04141c0c, 0x04141c1c, 0x04141c3e,
|
|
||||||
0x04142c0c, 0x04142c3e, 0x04143e2c, 0x041c040c, 0x041c043e, 0x041c0c04, 0x041c0c14, 0x041c142c,
|
|
||||||
0x041c3e04, 0x04240c1c, 0x04241c3e, 0x04242424, 0x04242c3e, 0x04243e1c, 0x04243e2c, 0x042c040c,
|
|
||||||
0x042c043e, 0x042c1c14, 0x042c2c14, 0x04341c2c, 0x04343424, 0x043e0c04, 0x043e0c24, 0x043e0c34,
|
|
||||||
0x043e241c, 0x043e340c, 0x0c04040c, 0x0c04041c, 0x0c040c04, 0x0c040c14, 0x0c04140c, 0x0c04141c,
|
|
||||||
0x0c041c04, 0x0c041c14, 0x0c041c24, 0x0c04243e, 0x0c042c04, 0x0c0c0404, 0x0c0c0414, 0x0c0c0c0c,
|
|
||||||
0x0c0c1404, 0x0c0c1414, 0x0c14040c, 0x0c14041c, 0x0c140c04, 0x0c140c14, 0x0c14140c, 0x0c141c04,
|
|
||||||
0x0c143e14, 0x0c1c0404, 0x0c1c0414, 0x0c1c1404, 0x0c1c1c0c, 0x0c1c2434, 0x0c1c3434, 0x0c24040c,
|
|
||||||
0x0c24042c, 0x0c242c04, 0x0c2c1404, 0x0c2c1424, 0x0c2c2434, 0x0c2c3e0c, 0x0c34042c, 0x0c3e1414,
|
|
||||||
0x0c3e2404, 0x14040404, 0x14040414, 0x14040c0c, 0x14040c1c, 0x14041404, 0x14041414, 0x14041434,
|
|
||||||
0x14041c0c, 0x14042414, 0x140c040c, 0x140c041c, 0x140c042c, 0x140c0c04, 0x140c0c14, 0x140c140c,
|
|
||||||
0x140c1c04, 0x140c341c, 0x140c343e, 0x140c3e04, 0x14140404, 0x14140414, 0x14140c0c, 0x14140c3e,
|
|
||||||
0x14141404, 0x14141414, 0x14141c3e, 0x14142404, 0x14142c2c, 0x141c040c, 0x141c0c04, 0x141c0c24,
|
|
||||||
0x141c3e04, 0x141c3e24, 0x14241c2c, 0x14242c1c, 0x142c041c, 0x142c143e, 0x142c240c, 0x142c3e24,
|
|
||||||
0x143e040c, 0x143e041c, 0x143e0c34, 0x143e242c, 0x1c04040c, 0x1c040c04, 0x1c040c14, 0x1c04140c,
|
|
||||||
0x1c04141c, 0x1c042c04, 0x1c04342c, 0x1c043e14, 0x1c0c0404, 0x1c0c0414, 0x1c0c1404, 0x1c0c1c0c,
|
|
||||||
0x1c0c2424, 0x1c0c2434, 0x1c14040c, 0x1c14041c, 0x1c140c04, 0x1c14142c, 0x1c142c14, 0x1c143e14,
|
|
||||||
0x1c1c0c0c, 0x1c1c1c1c, 0x1c241c04, 0x1c24243e, 0x1c243e14, 0x1c2c0404, 0x1c2c0434, 0x1c2c1414,
|
|
||||||
0x1c2c2c2c, 0x1c340c24, 0x1c341c34, 0x1c34341c, 0x1c3e1c1c, 0x1c3e3404, 0x24040424, 0x24040c3e,
|
|
||||||
0x24041c2c, 0x24041c3e, 0x24042c1c, 0x24042c3e, 0x240c3e24, 0x24141404, 0x24141c3e, 0x24142404,
|
|
||||||
0x24143404, 0x24143434, 0x241c043e, 0x241c242c, 0x24240424, 0x24242c0c, 0x24243424, 0x242c142c,
|
|
||||||
0x242c241c, 0x242c3e04, 0x243e042c, 0x243e0c04, 0x243e0c14, 0x243e1c04, 0x2c040c14, 0x2c04240c,
|
|
||||||
0x2c043e04, 0x2c0c0404, 0x2c0c0434, 0x2c0c1434, 0x2c0c2c2c, 0x2c140c24, 0x2c141c14, 0x2c143e14,
|
|
||||||
0x2c1c0414, 0x2c1c2c1c, 0x2c240c04, 0x2c24141c, 0x2c24143e, 0x2c243e14, 0x2c2c0414, 0x2c2c1c0c,
|
|
||||||
0x2c342c04, 0x2c3e1424, 0x2c3e2414, 0x34041424, 0x34042424, 0x34042434, 0x34043424, 0x340c140c,
|
|
||||||
0x340c340c, 0x34140c3e, 0x34143424, 0x341c1c04, 0x341c1c34, 0x34242424, 0x342c042c, 0x342c2c14,
|
|
||||||
0x34341c1c, 0x343e041c, 0x343e140c, 0x3e04041c, 0x3e04042c, 0x3e04043e, 0x3e040c04, 0x3e041c14,
|
|
||||||
0x3e042c14, 0x3e0c1434, 0x3e0c2404, 0x3e140c14, 0x3e14242c, 0x3e142c14, 0x3e1c0404, 0x3e1c0c2c,
|
|
||||||
0x3e1c1c1c, 0x3e1c3404, 0x3e24140c, 0x3e24240c, 0x3e2c0404, 0x3e2c0414, 0x3e2c1424, 0x3e341c04,
|
|
||||||
};
|
|
||||||
|
|
||||||
static const __device__ uint32_t iq3s_grid[512] = {
|
|
||||||
0x01010101, 0x01010103, 0x01010105, 0x0101010b, 0x0101010f, 0x01010301, 0x01010303, 0x01010305,
|
|
||||||
0x01010309, 0x0101030d, 0x01010501, 0x01010503, 0x0101050b, 0x01010707, 0x01010901, 0x01010905,
|
|
||||||
0x0101090b, 0x0101090f, 0x01010b03, 0x01010b07, 0x01010d01, 0x01010d05, 0x01010f03, 0x01010f09,
|
|
||||||
0x01010f0f, 0x01030101, 0x01030103, 0x01030105, 0x01030109, 0x01030301, 0x01030303, 0x0103030b,
|
|
||||||
0x01030501, 0x01030507, 0x0103050f, 0x01030703, 0x0103070b, 0x01030909, 0x01030d03, 0x01030d0b,
|
|
||||||
0x01030f05, 0x01050101, 0x01050103, 0x0105010b, 0x0105010f, 0x01050301, 0x01050307, 0x0105030d,
|
|
||||||
0x01050503, 0x0105050b, 0x01050701, 0x01050709, 0x01050905, 0x0105090b, 0x0105090f, 0x01050b03,
|
|
||||||
0x01050b07, 0x01050f01, 0x01050f07, 0x01070107, 0x01070303, 0x0107030b, 0x01070501, 0x01070505,
|
|
||||||
0x01070703, 0x01070707, 0x0107070d, 0x01070909, 0x01070b01, 0x01070b05, 0x01070d0f, 0x01070f03,
|
|
||||||
0x01070f0b, 0x01090101, 0x01090307, 0x0109030f, 0x01090503, 0x01090509, 0x01090705, 0x01090901,
|
|
||||||
0x01090907, 0x01090b03, 0x01090f01, 0x010b0105, 0x010b0109, 0x010b0501, 0x010b0505, 0x010b050d,
|
|
||||||
0x010b0707, 0x010b0903, 0x010b090b, 0x010b090f, 0x010b0d0d, 0x010b0f07, 0x010d010d, 0x010d0303,
|
|
||||||
0x010d0307, 0x010d0703, 0x010d0b05, 0x010d0f03, 0x010f0101, 0x010f0105, 0x010f0109, 0x010f0501,
|
|
||||||
0x010f0505, 0x010f050d, 0x010f0707, 0x010f0b01, 0x010f0b09, 0x03010101, 0x03010103, 0x03010105,
|
|
||||||
0x03010109, 0x03010301, 0x03010303, 0x03010307, 0x0301030b, 0x0301030f, 0x03010501, 0x03010505,
|
|
||||||
0x03010703, 0x03010709, 0x0301070d, 0x03010b09, 0x03010b0d, 0x03010d03, 0x03010f05, 0x03030101,
|
|
||||||
0x03030103, 0x03030107, 0x0303010d, 0x03030301, 0x03030309, 0x03030503, 0x03030701, 0x03030707,
|
|
||||||
0x03030903, 0x03030b01, 0x03030b05, 0x03030f01, 0x03030f0d, 0x03050101, 0x03050305, 0x0305030b,
|
|
||||||
0x0305030f, 0x03050501, 0x03050509, 0x03050705, 0x03050901, 0x03050907, 0x03050b0b, 0x03050d01,
|
|
||||||
0x03050f05, 0x03070103, 0x03070109, 0x0307010f, 0x03070301, 0x03070307, 0x03070503, 0x0307050f,
|
|
||||||
0x03070701, 0x03070709, 0x03070903, 0x03070d05, 0x03070f01, 0x03090107, 0x0309010b, 0x03090305,
|
|
||||||
0x03090309, 0x03090703, 0x03090707, 0x03090905, 0x0309090d, 0x03090b01, 0x03090b09, 0x030b0103,
|
|
||||||
0x030b0301, 0x030b0307, 0x030b0503, 0x030b0701, 0x030b0705, 0x030b0b03, 0x030d0501, 0x030d0509,
|
|
||||||
0x030d050f, 0x030d0909, 0x030d090d, 0x030f0103, 0x030f0107, 0x030f0301, 0x030f0305, 0x030f0503,
|
|
||||||
0x030f070b, 0x030f0903, 0x030f0d05, 0x030f0f01, 0x05010101, 0x05010103, 0x05010107, 0x0501010b,
|
|
||||||
0x0501010f, 0x05010301, 0x05010305, 0x05010309, 0x0501030d, 0x05010503, 0x05010507, 0x0501050f,
|
|
||||||
0x05010701, 0x05010705, 0x05010903, 0x05010907, 0x0501090b, 0x05010b01, 0x05010b05, 0x05010d0f,
|
|
||||||
0x05010f01, 0x05010f07, 0x05010f0b, 0x05030101, 0x05030105, 0x05030301, 0x05030307, 0x0503030f,
|
|
||||||
0x05030505, 0x0503050b, 0x05030703, 0x05030709, 0x05030905, 0x05030b03, 0x05050103, 0x05050109,
|
|
||||||
0x0505010f, 0x05050503, 0x05050507, 0x05050701, 0x0505070f, 0x05050903, 0x05050b07, 0x05050b0f,
|
|
||||||
0x05050f03, 0x05050f09, 0x05070101, 0x05070105, 0x0507010b, 0x05070303, 0x05070505, 0x05070509,
|
|
||||||
0x05070703, 0x05070707, 0x05070905, 0x05070b01, 0x05070d0d, 0x05090103, 0x0509010f, 0x05090501,
|
|
||||||
0x05090507, 0x05090705, 0x0509070b, 0x05090903, 0x05090f05, 0x05090f0b, 0x050b0109, 0x050b0303,
|
|
||||||
0x050b0505, 0x050b070f, 0x050b0901, 0x050b0b07, 0x050b0f01, 0x050d0101, 0x050d0105, 0x050d010f,
|
|
||||||
0x050d0503, 0x050d0b0b, 0x050d0d03, 0x050f010b, 0x050f0303, 0x050f050d, 0x050f0701, 0x050f0907,
|
|
||||||
0x050f0b01, 0x07010105, 0x07010303, 0x07010307, 0x0701030b, 0x0701030f, 0x07010505, 0x07010703,
|
|
||||||
0x07010707, 0x0701070b, 0x07010905, 0x07010909, 0x0701090f, 0x07010b03, 0x07010d07, 0x07010f03,
|
|
||||||
0x07030103, 0x07030107, 0x0703010b, 0x07030309, 0x07030503, 0x07030507, 0x07030901, 0x07030d01,
|
|
||||||
0x07030f05, 0x07030f0d, 0x07050101, 0x07050305, 0x07050501, 0x07050705, 0x07050709, 0x07050b01,
|
|
||||||
0x07070103, 0x07070301, 0x07070309, 0x07070503, 0x07070507, 0x0707050f, 0x07070701, 0x07070903,
|
|
||||||
0x07070907, 0x0707090f, 0x07070b0b, 0x07070f07, 0x07090107, 0x07090303, 0x0709030d, 0x07090505,
|
|
||||||
0x07090703, 0x07090b05, 0x07090d01, 0x07090d09, 0x070b0103, 0x070b0301, 0x070b0305, 0x070b050b,
|
|
||||||
0x070b0705, 0x070b0909, 0x070b0b0d, 0x070b0f07, 0x070d030d, 0x070d0903, 0x070f0103, 0x070f0107,
|
|
||||||
0x070f0501, 0x070f0505, 0x070f070b, 0x09010101, 0x09010109, 0x09010305, 0x09010501, 0x09010509,
|
|
||||||
0x0901050f, 0x09010705, 0x09010903, 0x09010b01, 0x09010f01, 0x09030105, 0x0903010f, 0x09030303,
|
|
||||||
0x09030307, 0x09030505, 0x09030701, 0x0903070b, 0x09030907, 0x09030b03, 0x09030b0b, 0x09050103,
|
|
||||||
0x09050107, 0x09050301, 0x0905030b, 0x09050503, 0x09050707, 0x09050901, 0x09050b0f, 0x09050d05,
|
|
||||||
0x09050f01, 0x09070109, 0x09070303, 0x09070307, 0x09070501, 0x09070505, 0x09070703, 0x0907070b,
|
|
||||||
0x09090101, 0x09090105, 0x09090509, 0x0909070f, 0x09090901, 0x09090f03, 0x090b010b, 0x090b010f,
|
|
||||||
0x090b0503, 0x090b0d05, 0x090d0307, 0x090d0709, 0x090d0d01, 0x090f0301, 0x090f030b, 0x090f0701,
|
|
||||||
0x090f0907, 0x090f0b03, 0x0b010105, 0x0b010301, 0x0b010309, 0x0b010505, 0x0b010901, 0x0b010909,
|
|
||||||
0x0b01090f, 0x0b010b05, 0x0b010d0d, 0x0b010f09, 0x0b030103, 0x0b030107, 0x0b03010b, 0x0b030305,
|
|
||||||
0x0b030503, 0x0b030705, 0x0b030f05, 0x0b050101, 0x0b050303, 0x0b050507, 0x0b050701, 0x0b05070d,
|
|
||||||
0x0b050b07, 0x0b070105, 0x0b07010f, 0x0b070301, 0x0b07050f, 0x0b070909, 0x0b070b03, 0x0b070d0b,
|
|
||||||
0x0b070f07, 0x0b090103, 0x0b090109, 0x0b090501, 0x0b090705, 0x0b09090d, 0x0b0b0305, 0x0b0b050d,
|
|
||||||
0x0b0b0b03, 0x0b0b0b07, 0x0b0d0905, 0x0b0f0105, 0x0b0f0109, 0x0b0f0505, 0x0d010303, 0x0d010307,
|
|
||||||
0x0d01030b, 0x0d010703, 0x0d010707, 0x0d010d01, 0x0d030101, 0x0d030501, 0x0d03050f, 0x0d030d09,
|
|
||||||
0x0d050305, 0x0d050709, 0x0d050905, 0x0d050b0b, 0x0d050d05, 0x0d050f01, 0x0d070101, 0x0d070309,
|
|
||||||
0x0d070503, 0x0d070901, 0x0d09050b, 0x0d090907, 0x0d090d05, 0x0d0b0101, 0x0d0b0107, 0x0d0b0709,
|
|
||||||
0x0d0b0d01, 0x0d0d010b, 0x0d0d0901, 0x0d0f0303, 0x0d0f0307, 0x0f010101, 0x0f010109, 0x0f01010f,
|
|
||||||
0x0f010501, 0x0f010505, 0x0f01070d, 0x0f010901, 0x0f010b09, 0x0f010d05, 0x0f030105, 0x0f030303,
|
|
||||||
0x0f030509, 0x0f030907, 0x0f03090b, 0x0f050103, 0x0f050109, 0x0f050301, 0x0f05030d, 0x0f050503,
|
|
||||||
0x0f050701, 0x0f050b03, 0x0f070105, 0x0f070705, 0x0f07070b, 0x0f070b07, 0x0f090103, 0x0f09010b,
|
|
||||||
0x0f090307, 0x0f090501, 0x0f090b01, 0x0f0b0505, 0x0f0b0905, 0x0f0d0105, 0x0f0d0703, 0x0f0f0101,
|
|
||||||
};
|
|
||||||
|
|
||||||
static const __device__ uint64_t iq1s_grid[512] = {
|
|
||||||
0xffffffffffff0101, 0xffffffffff01ff00, 0xffffffffff010100, 0xffffffff00000000,
|
|
||||||
0xffffffff01ff00ff, 0xffffffff01ff0001, 0xffffffff0101ffff, 0xffffffff0101ff01,
|
|
||||||
0xffffff00ff000000, 0xffffff000000ff00, 0xffffff00000000ff, 0xffffff0000000100,
|
|
||||||
0xffffff0000010000, 0xffffff0001000000, 0xffffff01ffff00ff, 0xffffff01ff01ff00,
|
|
||||||
0xffffff01ff010100, 0xffffff0100000001, 0xffffff0101ffff00, 0xffffff0101ff0101,
|
|
||||||
0xffffff0101010100, 0xffff00ffff00ff01, 0xffff00ffff0000ff, 0xffff00ff00ff0100,
|
|
||||||
0xffff00ff0100ff00, 0xffff00ff010001ff, 0xffff0000ff0101ff, 0xffff000000ffff00,
|
|
||||||
0xffff000000000000, 0xffff00000001ff01, 0xffff000001000101, 0xffff0000010100ff,
|
|
||||||
0xffff0001ffff0100, 0xffff00010000ff00, 0xffff000100010101, 0xffff000101000000,
|
|
||||||
0xffff01ffffff0000, 0xffff01ffff01ffff, 0xffff01ffff010100, 0xffff01ff00000000,
|
|
||||||
0xffff01ff01ffffff, 0xffff01ff01ff0001, 0xffff01ff0101ffff, 0xffff01ff01010001,
|
|
||||||
0xffff0100ffffff01, 0xffff01000000ffff, 0xffff010000000100, 0xffff010001ff01ff,
|
|
||||||
0xffff010001000000, 0xffff0101ff000000, 0xffff0101000101ff, 0xffff010101ffff01,
|
|
||||||
0xffff01010101ff00, 0xff00ffffff000000, 0xff00ffff00ffff00, 0xff00ffff00000001,
|
|
||||||
0xff00ffff000001ff, 0xff00ffff01010000, 0xff00ff00ffff0000, 0xff00ff00ff00ff00,
|
|
||||||
0xff00ff00ff0000ff, 0xff00ff00ff000100, 0xff00ff00ff010001, 0xff00ff0000ff0001,
|
|
||||||
0xff00ff000000ffff, 0xff00ff0000000000, 0xff00ff000001ff00, 0xff00ff0000010100,
|
|
||||||
0xff00ff0001ff0000, 0xff00ff000100ff00, 0xff00ff0001000100, 0xff00ff01ff000000,
|
|
||||||
0xff00ff0100ff0000, 0xff00ff01000001ff, 0xff00ff0101010001, 0xff0000ff00000000,
|
|
||||||
0xff0000ff0001ff00, 0xff0000ff00010100, 0xff000000ffff0101, 0xff000000ff000000,
|
|
||||||
0xff000000ff01ff00, 0xff00000000ff0000, 0xff0000000000ff00, 0xff000000000000ff,
|
|
||||||
0xff00000000000000, 0xff00000000000001, 0xff00000000000100, 0xff0000000001ffff,
|
|
||||||
0xff00000000010000, 0xff00000001000000, 0xff00000001010100, 0xff000001ff00ff01,
|
|
||||||
0xff000001ff0100ff, 0xff00000100000000, 0xff0000010001ff00, 0xff00000101ff0100,
|
|
||||||
0xff0000010100ff00, 0xff0001ff00ff00ff, 0xff0001ff00000101, 0xff0001ff000100ff,
|
|
||||||
0xff0001ff01000000, 0xff000100ff0001ff, 0xff0001000000ff01, 0xff00010000000000,
|
|
||||||
0xff00010000010001, 0xff00010000010100, 0xff00010001ffff00, 0xff00010001ff0101,
|
|
||||||
0xff00010001010000, 0xff000101ffffffff, 0xff000101ff000101, 0xff00010101ff00ff,
|
|
||||||
0xff00010101000001, 0xff000101010100ff, 0xff01ffffff000101, 0xff01ffffff01ffff,
|
|
||||||
0xff01ffffff01ff01, 0xff01ffffff0101ff, 0xff01ffff00000000, 0xff01ffff01ff0001,
|
|
||||||
0xff01ffff0101ff01, 0xff01ff00ff000000, 0xff01ff0000ff0100, 0xff01ff000000ff01,
|
|
||||||
0xff01ff0000010000, 0xff01ff00010000ff, 0xff01ff01ff01ff00, 0xff01ff0100000101,
|
|
||||||
0xff0100ffffff0000, 0xff0100ffff010000, 0xff0100ff01ff00ff, 0xff0100ff01000100,
|
|
||||||
0xff0100ff010100ff, 0xff010000ffffff01, 0xff01000000000000, 0xff0100000101ff00,
|
|
||||||
0xff010001ffff00ff, 0xff010001ff000100, 0xff01000100ffff00, 0xff01000100010001,
|
|
||||||
0xff01000101ff0001, 0xff010001010001ff, 0xff0101ffffffffff, 0xff0101ffff01ffff,
|
|
||||||
0xff0101ffff010101, 0xff0101ff0000ff00, 0xff0101ff01010001, 0xff010100ff000000,
|
|
||||||
0xff010100ff01ff01, 0xff01010000ff0001, 0xff01010000000100, 0xff01010001000000,
|
|
||||||
0xff0101010100ffff, 0x00ffffff0000ff01, 0x00ffffff000000ff, 0x00ffffff00000100,
|
|
||||||
0x00ffffff00010000, 0x00ffff00ffff0001, 0x00ffff00ff0000ff, 0x00ffff00ff000100,
|
|
||||||
0x00ffff0000000000, 0x00ffff0001000100, 0x00ffff0001010001, 0x00ffff01ff00ff01,
|
|
||||||
0x00ffff0100ff0100, 0x00ffff010000ff00, 0x00ffff01000100ff, 0x00ffff0101ff00ff,
|
|
||||||
0x00ffff010101ff00, 0x00ff00ffffffffff, 0x00ff00ffffff01ff, 0x00ff00ffff000101,
|
|
||||||
0x00ff00ff00000000, 0x00ff00ff000101ff, 0x00ff00ff01010101, 0x00ff0000ff000000,
|
|
||||||
0x00ff0000ff01ffff, 0x00ff000000ff0000, 0x00ff00000000ff00, 0x00ff0000000000ff,
|
|
||||||
0x00ff000000000000, 0x00ff000000000001, 0x00ff000000000100, 0x00ff000000010000,
|
|
||||||
0x00ff000001ffff01, 0x00ff000001000000, 0x00ff0001ff000101, 0x00ff000100ffffff,
|
|
||||||
0x00ff000100000000, 0x00ff0001010001ff, 0x00ff01ffff000000, 0x00ff01ff0001ff00,
|
|
||||||
0x00ff01ff01ff0100, 0x00ff0100ff01ff01, 0x00ff010000ff00ff, 0x00ff010000ff0101,
|
|
||||||
0x00ff010000000000, 0x00ff010000010101, 0x00ff01000100ff00, 0x00ff010001010000,
|
|
||||||
0x00ff0101ffffff00, 0x00ff01010000ff01, 0x00ff010100000100, 0x00ff010101ff0000,
|
|
||||||
0x0000ffffffff0100, 0x0000ffffff00ff00, 0x0000ffffff0000ff, 0x0000ffffff010000,
|
|
||||||
0x0000ffff00000000, 0x0000ffff00010101, 0x0000ffff01ffff01, 0x0000ffff01000100,
|
|
||||||
0x0000ff00ff000000, 0x0000ff00ff01ff00, 0x0000ff00ff0101ff, 0x0000ff0000ff0000,
|
|
||||||
0x0000ff000000ff00, 0x0000ff00000000ff, 0x0000ff0000000000, 0x0000ff0000000001,
|
|
||||||
0x0000ff0000000100, 0x0000ff0000010000, 0x0000ff0001ffffff, 0x0000ff0001ff01ff,
|
|
||||||
0x0000ff0001000000, 0x0000ff000101ffff, 0x0000ff01ffff0101, 0x0000ff01ff010000,
|
|
||||||
0x0000ff0100000000, 0x0000ff0101000101, 0x000000ffffff0001, 0x000000ffff000000,
|
|
||||||
0x000000ff00ff0000, 0x000000ff0000ff00, 0x000000ff000000ff, 0x000000ff00000000,
|
|
||||||
0x000000ff00000001, 0x000000ff00000100, 0x000000ff00010000, 0x000000ff01000000,
|
|
||||||
0x000000ff0101ff00, 0x00000000ffff0000, 0x00000000ff00ff00, 0x00000000ff0000ff,
|
|
||||||
0x00000000ff000000, 0x00000000ff000001, 0x00000000ff000100, 0x00000000ff010000,
|
|
||||||
0x0000000000ffff00, 0x0000000000ff00ff, 0x0000000000ff0000, 0x0000000000ff0001,
|
|
||||||
0x0000000000ff0100, 0x000000000000ffff, 0x000000000000ff00, 0x000000000000ff01,
|
|
||||||
0x00000000000000ff, 0x0000000000000001, 0x00000000000001ff, 0x0000000000000100,
|
|
||||||
0x0000000000000101, 0x000000000001ff00, 0x00000000000100ff, 0x0000000000010000,
|
|
||||||
0x0000000000010001, 0x0000000000010100, 0x0000000001ff0000, 0x000000000100ff00,
|
|
||||||
0x00000000010000ff, 0x0000000001000000, 0x0000000001000001, 0x0000000001000100,
|
|
||||||
0x0000000001010000, 0x00000001ffff01ff, 0x00000001ff000000, 0x0000000100ff0000,
|
|
||||||
0x000000010000ff00, 0x00000001000000ff, 0x0000000100000000, 0x0000000100000001,
|
|
||||||
0x0000000100000100, 0x0000000100010000, 0x0000000101000000, 0x000001ffff00ff00,
|
|
||||||
0x000001ffff010001, 0x000001ffff0101ff, 0x000001ff00ffff01, 0x000001ff0000ffff,
|
|
||||||
0x000001ff00000000, 0x000001ff010000ff, 0x000001ff01010100, 0x00000100ffff0100,
|
|
||||||
0x00000100ff000000, 0x0000010000ff0000, 0x000001000000ff00, 0x00000100000000ff,
|
|
||||||
0x0000010000000000, 0x0000010000000001, 0x0000010000000100, 0x0000010000010000,
|
|
||||||
0x0000010001000000, 0x000001000101ff01, 0x00000101ffff0001, 0x00000101ff01ffff,
|
|
||||||
0x0000010100000000, 0x0000010101010100, 0x0001ffffff000000, 0x0001ffff00ffffff,
|
|
||||||
0x0001ffff00000100, 0x0001ffff0001ff00, 0x0001ffff01000000, 0x0001ff00ffffff00,
|
|
||||||
0x0001ff00ffff01ff, 0x0001ff00ff010000, 0x0001ff0000000000, 0x0001ff0000010001,
|
|
||||||
0x0001ff0001ff0000, 0x0001ff0001010100, 0x0001ff01ff0000ff, 0x0001ff01ff000001,
|
|
||||||
0x0001ff0100ffffff, 0x0001ff010001ffff, 0x0001ff01000101ff, 0x0001ff010100ff01,
|
|
||||||
0x000100ffff00ffff, 0x000100ffff00ff01, 0x000100ffff000100, 0x000100ff00000000,
|
|
||||||
0x000100ff000101ff, 0x000100ff01ff0101, 0x000100ff0100ffff, 0x000100ff01010101,
|
|
||||||
0x00010000ff000000, 0x00010000ff010100, 0x0001000000ff0000, 0x000100000000ff00,
|
|
||||||
0x00010000000000ff, 0x0001000000000000, 0x0001000000000001, 0x0001000000000100,
|
|
||||||
0x0001000000010000, 0x0001000001ffff01, 0x0001000001000000, 0x0001000100ff0101,
|
|
||||||
0x0001000100000000, 0x00010001010100ff, 0x000101ffffff01ff, 0x000101ffffff0101,
|
|
||||||
0x000101ff00010000, 0x000101ff01ff0000, 0x000101ff0100ff01, 0x00010100ffff0000,
|
|
||||||
0x0001010000000000, 0x000101000001ffff, 0x0001010000010101, 0x00010100010001ff,
|
|
||||||
0x00010101ff00ff00, 0x00010101ff010001, 0x0001010100ffffff, 0x0001010100ff01ff,
|
|
||||||
0x00010101000101ff, 0x0001010101ff0000, 0x000101010100ff01, 0x0001010101000101,
|
|
||||||
0x01ffffffffff0101, 0x01ffffffff01ffff, 0x01ffffffff01ff01, 0x01ffffffff0101ff,
|
|
||||||
0x01ffffffff010101, 0x01ffffff00000000, 0x01ffffff01ff01ff, 0x01ffffff01000101,
|
|
||||||
0x01ffffff0101ff01, 0x01ffffff010100ff, 0x01ffff000000ff00, 0x01ffff0000000001,
|
|
||||||
0x01ffff00000001ff, 0x01ffff0000010000, 0x01ffff0001ff0000, 0x01ffff01ffffffff,
|
|
||||||
0x01ffff01ffff01ff, 0x01ffff01ff000000, 0x01ffff01ff01ffff, 0x01ffff01ff0101ff,
|
|
||||||
0x01ffff010100ffff, 0x01ff00ffffff0000, 0x01ff00ffff010000, 0x01ff00ff00ffff01,
|
|
||||||
0x01ff0000ff0000ff, 0x01ff000000000000, 0x01ff00000001ff01, 0x01ff000001ffffff,
|
|
||||||
0x01ff000001010100, 0x01ff0001ffffff01, 0x01ff0001ff010001, 0x01ff000101ff0100,
|
|
||||||
0x01ff000101000001, 0x01ff0001010100ff, 0x01ff01ffff00ffff, 0x01ff01ff00010001,
|
|
||||||
0x01ff01ff01000000, 0x01ff01ff010101ff, 0x01ff0100ff000001, 0x01ff010000ffff00,
|
|
||||||
0x01ff010000000100, 0x01ff010001ff01ff, 0x01ff01000101ffff, 0x01ff0101ffff00ff,
|
|
||||||
0x01ff0101ffff0101, 0x01ff0101ff0101ff, 0x01ff010100010000, 0x0100ffff00ff00ff,
|
|
||||||
0x0100ffff00ff0001, 0x0100ffff00000100, 0x0100ffff0100ff00, 0x0100ff00ffff0000,
|
|
||||||
0x0100ff00ff00ffff, 0x0100ff00ff00ff01, 0x0100ff00ff000100, 0x0100ff00ff010000,
|
|
||||||
0x0100ff0000000000, 0x0100ff00000100ff, 0x0100ff0001ff0101, 0x0100ff0001010101,
|
|
||||||
0x0100ff0100ff00ff, 0x0100ff0100ff0001, 0x0100ff0100000100, 0x0100ff0100010001,
|
|
||||||
0x0100ff0101000000, 0x010000ffff00ff00, 0x010000ff0000ffff, 0x010000ff00000000,
|
|
||||||
0x010000ff010001ff, 0x010000ff01010001, 0x01000000ffffff00, 0x01000000ffff0101,
|
|
||||||
0x01000000ff000000, 0x01000000ff0100ff, 0x01000000ff010101, 0x0100000000ff0000,
|
|
||||||
0x010000000000ff00, 0x01000000000000ff, 0x0100000000000000, 0x0100000000000001,
|
|
||||||
0x0100000000000100, 0x0100000000010000, 0x0100000001000000, 0x0100000100000000,
|
|
||||||
0x01000001000101ff, 0x0100000101ffff01, 0x010001ffff000101, 0x010001ff00ff0100,
|
|
||||||
0x010001ff0000ff00, 0x010001ff000100ff, 0x010001ff01ffffff, 0x01000100ffff0000,
|
|
||||||
0x01000100ff0001ff, 0x0100010000000000, 0x010001000001ff00, 0x0100010001ff0000,
|
|
||||||
0x01000100010000ff, 0x0100010001000101, 0x01000101ff00ff01, 0x0100010100ff0100,
|
|
||||||
0x010001010000ffff, 0x0100010101010001, 0x0101ffffffff0101, 0x0101ffffff0001ff,
|
|
||||||
0x0101ffffff01ffff, 0x0101ffffff010101, 0x0101ffff00000000, 0x0101ffff0101ffff,
|
|
||||||
0x0101ffff010101ff, 0x0101ff00ff000000, 0x0101ff0000ff0100, 0x0101ff000000ff00,
|
|
||||||
0x0101ff0000010000, 0x0101ff00010000ff, 0x0101ff0001000001, 0x0101ff01ff010101,
|
|
||||||
0x0101ff0100000000, 0x0101ff010101ff00, 0x010100ffffff0000, 0x010100ffff010000,
|
|
||||||
0x010100ff00ff01ff, 0x010100ff000000ff, 0x010100ff00000101, 0x010100ff01ffff00,
|
|
||||||
0x01010000ffffff01, 0x01010000ff000100, 0x01010000ff01ff01, 0x0101000000000000,
|
|
||||||
0x01010000000100ff, 0x010100000101ff01, 0x01010001ffff0000, 0x01010001ff00ffff,
|
|
||||||
0x01010001ff010000, 0x0101000101ffffff, 0x0101000101ff01ff, 0x0101000101010101,
|
|
||||||
0x010101ffff01ffff, 0x010101ff00000000, 0x010101ff0001ff01, 0x010101ff0101ffff,
|
|
||||||
0x010101ff010101ff, 0x01010100ffffffff, 0x01010100ff000001, 0x010101000000ff00,
|
|
||||||
0x0101010001010000, 0x0101010100ff0001, 0x010101010001ff01, 0x010101010101ffff,
|
|
||||||
};
|
|
||||||
|
|
||||||
static const __device__ uint8_t ksigns_iq2xs[128] = {
|
|
||||||
0, 129, 130, 3, 132, 5, 6, 135, 136, 9, 10, 139, 12, 141, 142, 15,
|
|
||||||
144, 17, 18, 147, 20, 149, 150, 23, 24, 153, 154, 27, 156, 29, 30, 159,
|
|
||||||
160, 33, 34, 163, 36, 165, 166, 39, 40, 169, 170, 43, 172, 45, 46, 175,
|
|
||||||
48, 177, 178, 51, 180, 53, 54, 183, 184, 57, 58, 187, 60, 189, 190, 63,
|
|
||||||
192, 65, 66, 195, 68, 197, 198, 71, 72, 201, 202, 75, 204, 77, 78, 207,
|
|
||||||
80, 209, 210, 83, 212, 85, 86, 215, 216, 89, 90, 219, 92, 221, 222, 95,
|
|
||||||
96, 225, 226, 99, 228, 101, 102, 231, 232, 105, 106, 235, 108, 237, 238, 111,
|
|
||||||
240, 113, 114, 243, 116, 245, 246, 119, 120, 249, 250, 123, 252, 125, 126, 255,
|
|
||||||
};
|
|
||||||
|
|
||||||
//#if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics
|
|
||||||
static const __device__ uint64_t ksigns64[128] = {
|
|
||||||
0x0000000000000000, 0xff000000000000ff, 0xff0000000000ff00, 0x000000000000ffff,
|
|
||||||
0xff00000000ff0000, 0x0000000000ff00ff, 0x0000000000ffff00, 0xff00000000ffffff,
|
|
||||||
0xff000000ff000000, 0x00000000ff0000ff, 0x00000000ff00ff00, 0xff000000ff00ffff,
|
|
||||||
0x00000000ffff0000, 0xff000000ffff00ff, 0xff000000ffffff00, 0x00000000ffffffff,
|
|
||||||
0xff0000ff00000000, 0x000000ff000000ff, 0x000000ff0000ff00, 0xff0000ff0000ffff,
|
|
||||||
0x000000ff00ff0000, 0xff0000ff00ff00ff, 0xff0000ff00ffff00, 0x000000ff00ffffff,
|
|
||||||
0x000000ffff000000, 0xff0000ffff0000ff, 0xff0000ffff00ff00, 0x000000ffff00ffff,
|
|
||||||
0xff0000ffffff0000, 0x000000ffffff00ff, 0x000000ffffffff00, 0xff0000ffffffffff,
|
|
||||||
0xff00ff0000000000, 0x0000ff00000000ff, 0x0000ff000000ff00, 0xff00ff000000ffff,
|
|
||||||
0x0000ff0000ff0000, 0xff00ff0000ff00ff, 0xff00ff0000ffff00, 0x0000ff0000ffffff,
|
|
||||||
0x0000ff00ff000000, 0xff00ff00ff0000ff, 0xff00ff00ff00ff00, 0x0000ff00ff00ffff,
|
|
||||||
0xff00ff00ffff0000, 0x0000ff00ffff00ff, 0x0000ff00ffffff00, 0xff00ff00ffffffff,
|
|
||||||
0x0000ffff00000000, 0xff00ffff000000ff, 0xff00ffff0000ff00, 0x0000ffff0000ffff,
|
|
||||||
0xff00ffff00ff0000, 0x0000ffff00ff00ff, 0x0000ffff00ffff00, 0xff00ffff00ffffff,
|
|
||||||
0xff00ffffff000000, 0x0000ffffff0000ff, 0x0000ffffff00ff00, 0xff00ffffff00ffff,
|
|
||||||
0x0000ffffffff0000, 0xff00ffffffff00ff, 0xff00ffffffffff00, 0x0000ffffffffffff,
|
|
||||||
0xffff000000000000, 0x00ff0000000000ff, 0x00ff00000000ff00, 0xffff00000000ffff,
|
|
||||||
0x00ff000000ff0000, 0xffff000000ff00ff, 0xffff000000ffff00, 0x00ff000000ffffff,
|
|
||||||
0x00ff0000ff000000, 0xffff0000ff0000ff, 0xffff0000ff00ff00, 0x00ff0000ff00ffff,
|
|
||||||
0xffff0000ffff0000, 0x00ff0000ffff00ff, 0x00ff0000ffffff00, 0xffff0000ffffffff,
|
|
||||||
0x00ff00ff00000000, 0xffff00ff000000ff, 0xffff00ff0000ff00, 0x00ff00ff0000ffff,
|
|
||||||
0xffff00ff00ff0000, 0x00ff00ff00ff00ff, 0x00ff00ff00ffff00, 0xffff00ff00ffffff,
|
|
||||||
0xffff00ffff000000, 0x00ff00ffff0000ff, 0x00ff00ffff00ff00, 0xffff00ffff00ffff,
|
|
||||||
0x00ff00ffffff0000, 0xffff00ffffff00ff, 0xffff00ffffffff00, 0x00ff00ffffffffff,
|
|
||||||
0x00ffff0000000000, 0xffffff00000000ff, 0xffffff000000ff00, 0x00ffff000000ffff,
|
|
||||||
0xffffff0000ff0000, 0x00ffff0000ff00ff, 0x00ffff0000ffff00, 0xffffff0000ffffff,
|
|
||||||
0xffffff00ff000000, 0x00ffff00ff0000ff, 0x00ffff00ff00ff00, 0xffffff00ff00ffff,
|
|
||||||
0x00ffff00ffff0000, 0xffffff00ffff00ff, 0xffffff00ffffff00, 0x00ffff00ffffffff,
|
|
||||||
0xffffffff00000000, 0x00ffffff000000ff, 0x00ffffff0000ff00, 0xffffffff0000ffff,
|
|
||||||
0x00ffffff00ff0000, 0xffffffff00ff00ff, 0xffffffff00ffff00, 0x00ffffff00ffffff,
|
|
||||||
0x00ffffffff000000, 0xffffffffff0000ff, 0xffffffffff00ff00, 0x00ffffffff00ffff,
|
|
||||||
0xffffffffffff0000, 0x00ffffffffff00ff, 0x00ffffffffffff00, 0xffffffffffffffff,
|
|
||||||
};
|
|
||||||
//#endif
|
|
||||||
|
|
||||||
static const __device__ uint8_t kmask_iq2xs[8] = {1, 2, 4, 8, 16, 32, 64, 128};
|
|
||||||
|
|
||||||
inline bool ggml_cuda_supports_mmq(enum ggml_type type) {
|
inline bool ggml_cuda_supports_mmq(enum ggml_type type) {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case GGML_TYPE_Q4_0:
|
case GGML_TYPE_Q4_0:
|
||||||
|
|
707
ggml-metal.metal
707
ggml-metal.metal
|
@ -1,5 +1,8 @@
|
||||||
#include <metal_stdlib>
|
#include <metal_stdlib>
|
||||||
|
|
||||||
|
#define GGML_COMMON_IMPL_METAL
|
||||||
|
#include "ggml-common.h"
|
||||||
|
|
||||||
using namespace metal;
|
using namespace metal;
|
||||||
|
|
||||||
#define MAX(x, y) ((x) > (y) ? (x) : (y))
|
#define MAX(x, y) ((x) > (y) ? (x) : (y))
|
||||||
|
@ -3638,710 +3641,6 @@ kernel void kernel_mul_mv_q6_K_f32(
|
||||||
|
|
||||||
// ======================= "True" 2-bit
|
// ======================= "True" 2-bit
|
||||||
|
|
||||||
constexpr constant static uint64_t iq2xxs_grid[256] = {
|
|
||||||
0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
|
|
||||||
0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x08080808082b0808,
|
|
||||||
0x08080808082b082b, 0x08080808082b2b08, 0x08080808082b2b2b, 0x0808080819080819,
|
|
||||||
0x0808080819081908, 0x0808080819190808, 0x0808080819192b08, 0x08080808192b0819,
|
|
||||||
0x08080808192b1908, 0x080808082b080808, 0x080808082b08082b, 0x080808082b082b2b,
|
|
||||||
0x080808082b2b082b, 0x0808081908080819, 0x0808081908081908, 0x0808081908190808,
|
|
||||||
0x0808081908191919, 0x0808081919080808, 0x080808192b081908, 0x080808192b192b08,
|
|
||||||
0x0808082b08080808, 0x0808082b0808082b, 0x0808082b082b082b, 0x0808082b2b08082b,
|
|
||||||
0x0808190808080819, 0x0808190808081908, 0x0808190808190808, 0x08081908082b0819,
|
|
||||||
0x08081908082b1908, 0x0808190819080808, 0x080819081908082b, 0x0808190819082b08,
|
|
||||||
0x08081908192b0808, 0x080819082b080819, 0x080819082b081908, 0x080819082b190808,
|
|
||||||
0x080819082b2b1908, 0x0808191908080808, 0x080819190808082b, 0x0808191908082b08,
|
|
||||||
0x08081919082b0808, 0x080819191908192b, 0x08081919192b2b19, 0x080819192b080808,
|
|
||||||
0x080819192b190819, 0x0808192b08082b19, 0x0808192b08190808, 0x0808192b19080808,
|
|
||||||
0x0808192b2b081908, 0x0808192b2b2b1908, 0x08082b0808080808, 0x08082b0808081919,
|
|
||||||
0x08082b0808082b08, 0x08082b0808191908, 0x08082b08082b2b08, 0x08082b0819080819,
|
|
||||||
0x08082b0819081908, 0x08082b0819190808, 0x08082b081919082b, 0x08082b082b082b08,
|
|
||||||
0x08082b1908081908, 0x08082b1919080808, 0x08082b2b0808082b, 0x08082b2b08191908,
|
|
||||||
0x0819080808080819, 0x0819080808081908, 0x0819080808190808, 0x08190808082b0819,
|
|
||||||
0x0819080819080808, 0x08190808192b0808, 0x081908082b081908, 0x081908082b190808,
|
|
||||||
0x081908082b191919, 0x0819081908080808, 0x0819081908082b08, 0x08190819082b0808,
|
|
||||||
0x0819081919190808, 0x0819081919192b2b, 0x081908192b080808, 0x0819082b082b1908,
|
|
||||||
0x0819082b19081919, 0x0819190808080808, 0x0819190808082b08, 0x08191908082b0808,
|
|
||||||
0x08191908082b1919, 0x0819190819082b19, 0x081919082b080808, 0x0819191908192b08,
|
|
||||||
0x08191919192b082b, 0x0819192b08080808, 0x0819192b0819192b, 0x08192b0808080819,
|
|
||||||
0x08192b0808081908, 0x08192b0808190808, 0x08192b0819080808, 0x08192b082b080819,
|
|
||||||
0x08192b1908080808, 0x08192b1908081919, 0x08192b192b2b0808, 0x08192b2b19190819,
|
|
||||||
0x082b080808080808, 0x082b08080808082b, 0x082b080808082b2b, 0x082b080819081908,
|
|
||||||
0x082b0808192b0819, 0x082b08082b080808, 0x082b08082b08082b, 0x082b0819082b2b19,
|
|
||||||
0x082b081919082b08, 0x082b082b08080808, 0x082b082b0808082b, 0x082b190808080819,
|
|
||||||
0x082b190808081908, 0x082b190808190808, 0x082b190819080808, 0x082b19081919192b,
|
|
||||||
0x082b191908080808, 0x082b191919080819, 0x082b1919192b1908, 0x082b192b2b190808,
|
|
||||||
0x082b2b0808082b08, 0x082b2b08082b0808, 0x082b2b082b191908, 0x082b2b2b19081908,
|
|
||||||
0x1908080808080819, 0x1908080808081908, 0x1908080808190808, 0x1908080808192b08,
|
|
||||||
0x19080808082b0819, 0x19080808082b1908, 0x1908080819080808, 0x1908080819082b08,
|
|
||||||
0x190808081919192b, 0x19080808192b0808, 0x190808082b080819, 0x190808082b081908,
|
|
||||||
0x190808082b190808, 0x1908081908080808, 0x19080819082b0808, 0x19080819192b0819,
|
|
||||||
0x190808192b080808, 0x190808192b081919, 0x1908082b08080819, 0x1908082b08190808,
|
|
||||||
0x1908082b19082b08, 0x1908082b1919192b, 0x1908082b192b2b08, 0x1908190808080808,
|
|
||||||
0x1908190808082b08, 0x19081908082b0808, 0x190819082b080808, 0x190819082b192b19,
|
|
||||||
0x190819190819082b, 0x19081919082b1908, 0x1908192b08080808, 0x19082b0808080819,
|
|
||||||
0x19082b0808081908, 0x19082b0808190808, 0x19082b0819080808, 0x19082b0819081919,
|
|
||||||
0x19082b1908080808, 0x19082b1919192b08, 0x19082b19192b0819, 0x19082b192b08082b,
|
|
||||||
0x19082b2b19081919, 0x19082b2b2b190808, 0x1919080808080808, 0x1919080808082b08,
|
|
||||||
0x1919080808190819, 0x1919080808192b19, 0x19190808082b0808, 0x191908082b080808,
|
|
||||||
0x191908082b082b08, 0x1919081908081908, 0x191908191908082b, 0x191908192b2b1908,
|
|
||||||
0x1919082b2b190819, 0x191919082b190808, 0x191919082b19082b, 0x1919191908082b2b,
|
|
||||||
0x1919192b08080819, 0x1919192b19191908, 0x19192b0808080808, 0x19192b0808190819,
|
|
||||||
0x19192b0808192b19, 0x19192b08192b1908, 0x19192b1919080808, 0x19192b2b08082b08,
|
|
||||||
0x192b080808081908, 0x192b080808190808, 0x192b080819080808, 0x192b0808192b2b08,
|
|
||||||
0x192b081908080808, 0x192b081919191919, 0x192b082b08192b08, 0x192b082b192b0808,
|
|
||||||
0x192b190808080808, 0x192b190808081919, 0x192b191908190808, 0x192b19190819082b,
|
|
||||||
0x192b19192b081908, 0x192b2b081908082b, 0x2b08080808080808, 0x2b0808080808082b,
|
|
||||||
0x2b08080808082b2b, 0x2b08080819080819, 0x2b0808082b08082b, 0x2b08081908081908,
|
|
||||||
0x2b08081908192b08, 0x2b08081919080808, 0x2b08082b08190819, 0x2b08190808080819,
|
|
||||||
0x2b08190808081908, 0x2b08190808190808, 0x2b08190808191919, 0x2b08190819080808,
|
|
||||||
0x2b081908192b0808, 0x2b08191908080808, 0x2b0819191908192b, 0x2b0819192b191908,
|
|
||||||
0x2b08192b08082b19, 0x2b08192b19080808, 0x2b08192b192b0808, 0x2b082b080808082b,
|
|
||||||
0x2b082b1908081908, 0x2b082b2b08190819, 0x2b19080808081908, 0x2b19080808190808,
|
|
||||||
0x2b190808082b1908, 0x2b19080819080808, 0x2b1908082b2b0819, 0x2b1908190819192b,
|
|
||||||
0x2b1908192b080808, 0x2b19082b19081919, 0x2b19190808080808, 0x2b191908082b082b,
|
|
||||||
0x2b19190819081908, 0x2b19191919190819, 0x2b192b082b080819, 0x2b192b19082b0808,
|
|
||||||
0x2b2b08080808082b, 0x2b2b080819190808, 0x2b2b08082b081919, 0x2b2b081908082b19,
|
|
||||||
0x2b2b082b08080808, 0x2b2b190808192b08, 0x2b2b2b0819190808, 0x2b2b2b1908081908,
|
|
||||||
};
|
|
||||||
|
|
||||||
constexpr constant static uint64_t iq2xs_grid[512] = {
|
|
||||||
0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
|
|
||||||
0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x080808080819192b,
|
|
||||||
0x0808080808192b19, 0x08080808082b0808, 0x08080808082b082b, 0x08080808082b1919,
|
|
||||||
0x08080808082b2b08, 0x0808080819080819, 0x0808080819081908, 0x080808081908192b,
|
|
||||||
0x0808080819082b19, 0x0808080819190808, 0x080808081919082b, 0x0808080819191919,
|
|
||||||
0x0808080819192b08, 0x08080808192b0819, 0x08080808192b1908, 0x080808082b080808,
|
|
||||||
0x080808082b08082b, 0x080808082b081919, 0x080808082b082b08, 0x080808082b190819,
|
|
||||||
0x080808082b191908, 0x080808082b192b19, 0x080808082b2b0808, 0x0808081908080819,
|
|
||||||
0x0808081908081908, 0x080808190808192b, 0x0808081908082b19, 0x0808081908190808,
|
|
||||||
0x080808190819082b, 0x0808081908191919, 0x0808081908192b08, 0x0808081908192b2b,
|
|
||||||
0x08080819082b0819, 0x08080819082b1908, 0x0808081919080808, 0x080808191908082b,
|
|
||||||
0x0808081919081919, 0x0808081919082b08, 0x0808081919190819, 0x0808081919191908,
|
|
||||||
0x08080819192b0808, 0x08080819192b2b08, 0x080808192b080819, 0x080808192b081908,
|
|
||||||
0x080808192b190808, 0x0808082b08080808, 0x0808082b0808082b, 0x0808082b08081919,
|
|
||||||
0x0808082b08082b08, 0x0808082b08190819, 0x0808082b08191908, 0x0808082b082b0808,
|
|
||||||
0x0808082b19080819, 0x0808082b19081908, 0x0808082b19190808, 0x0808082b19191919,
|
|
||||||
0x0808082b2b080808, 0x0808082b2b082b2b, 0x0808190808080819, 0x0808190808081908,
|
|
||||||
0x080819080808192b, 0x0808190808082b19, 0x0808190808190808, 0x080819080819082b,
|
|
||||||
0x0808190808191919, 0x0808190808192b08, 0x08081908082b0819, 0x08081908082b1908,
|
|
||||||
0x0808190819080808, 0x080819081908082b, 0x0808190819081919, 0x0808190819082b08,
|
|
||||||
0x0808190819190819, 0x0808190819191908, 0x080819081919192b, 0x08081908192b0808,
|
|
||||||
0x080819082b080819, 0x080819082b081908, 0x080819082b190808, 0x0808191908080808,
|
|
||||||
0x080819190808082b, 0x0808191908081919, 0x0808191908082b08, 0x0808191908190819,
|
|
||||||
0x0808191908191908, 0x08081919082b0808, 0x0808191919080819, 0x0808191919081908,
|
|
||||||
0x0808191919190808, 0x08081919192b0819, 0x080819192b080808, 0x0808192b08080819,
|
|
||||||
0x0808192b08081908, 0x0808192b08190808, 0x0808192b082b192b, 0x0808192b19080808,
|
|
||||||
0x0808192b1908082b, 0x0808192b2b081908, 0x08082b0808080808, 0x08082b080808082b,
|
|
||||||
0x08082b0808081919, 0x08082b0808082b08, 0x08082b0808082b2b, 0x08082b0808190819,
|
|
||||||
0x08082b0808191908, 0x08082b08082b0808, 0x08082b08082b1919, 0x08082b0819080819,
|
|
||||||
0x08082b0819081908, 0x08082b0819190808, 0x08082b0819192b08, 0x08082b082b080808,
|
|
||||||
0x08082b082b2b0808, 0x08082b082b2b2b2b, 0x08082b1908080819, 0x08082b1908081908,
|
|
||||||
0x08082b1908190808, 0x08082b1919080808, 0x08082b192b080819, 0x08082b192b082b19,
|
|
||||||
0x08082b2b08080808, 0x08082b2b082b0808, 0x08082b2b082b2b08, 0x08082b2b2b19192b,
|
|
||||||
0x08082b2b2b2b0808, 0x0819080808080819, 0x0819080808081908, 0x081908080808192b,
|
|
||||||
0x0819080808082b19, 0x0819080808190808, 0x081908080819082b, 0x0819080808191919,
|
|
||||||
0x0819080808192b08, 0x08190808082b0819, 0x08190808082b1908, 0x0819080819080808,
|
|
||||||
0x081908081908082b, 0x0819080819081919, 0x0819080819082b08, 0x0819080819190819,
|
|
||||||
0x0819080819191908, 0x08190808192b0808, 0x08190808192b2b2b, 0x081908082b080819,
|
|
||||||
0x081908082b081908, 0x081908082b190808, 0x0819081908080808, 0x081908190808082b,
|
|
||||||
0x0819081908081919, 0x0819081908082b08, 0x0819081908190819, 0x0819081908191908,
|
|
||||||
0x08190819082b0808, 0x0819081919080819, 0x0819081919081908, 0x0819081919190808,
|
|
||||||
0x081908192b080808, 0x081908192b191908, 0x081908192b19192b, 0x0819082b08080819,
|
|
||||||
0x0819082b08081908, 0x0819082b0808192b, 0x0819082b08190808, 0x0819082b19080808,
|
|
||||||
0x0819082b192b0808, 0x0819190808080808, 0x081919080808082b, 0x0819190808081919,
|
|
||||||
0x0819190808082b08, 0x0819190808190819, 0x0819190808191908, 0x08191908082b0808,
|
|
||||||
0x0819190819080819, 0x0819190819081908, 0x0819190819082b19, 0x0819190819190808,
|
|
||||||
0x08191908192b1908, 0x081919082b080808, 0x0819191908080819, 0x0819191908081908,
|
|
||||||
0x0819191908190808, 0x0819191919080808, 0x0819192b08080808, 0x0819192b08191908,
|
|
||||||
0x0819192b19082b19, 0x08192b0808080819, 0x08192b0808081908, 0x08192b0808190808,
|
|
||||||
0x08192b080819082b, 0x08192b0819080808, 0x08192b0819191908, 0x08192b082b08192b,
|
|
||||||
0x08192b1908080808, 0x08192b1908081919, 0x08192b19192b192b, 0x08192b2b19190819,
|
|
||||||
0x08192b2b2b2b2b19, 0x082b080808080808, 0x082b08080808082b, 0x082b080808081919,
|
|
||||||
0x082b080808082b08, 0x082b080808082b2b, 0x082b080808190819, 0x082b080808191908,
|
|
||||||
0x082b0808082b0808, 0x082b080819080819, 0x082b080819081908, 0x082b080819190808,
|
|
||||||
0x082b08082b080808, 0x082b08082b2b0808, 0x082b081908080819, 0x082b081908081908,
|
|
||||||
0x082b081908190808, 0x082b081919080808, 0x082b081919082b08, 0x082b0819192b1919,
|
|
||||||
0x082b082b08080808, 0x082b082b082b082b, 0x082b082b2b080808, 0x082b082b2b2b2b08,
|
|
||||||
0x082b190808080819, 0x082b190808081908, 0x082b190808190808, 0x082b1908082b2b19,
|
|
||||||
0x082b190819080808, 0x082b191908080808, 0x082b191919080819, 0x082b19191919082b,
|
|
||||||
0x082b19192b192b19, 0x082b192b08080819, 0x082b192b08192b2b, 0x082b192b2b2b192b,
|
|
||||||
0x082b2b0808080808, 0x082b2b0808082b08, 0x082b2b0808082b2b, 0x082b2b08082b0808,
|
|
||||||
0x082b2b0819191919, 0x082b2b082b082b08, 0x082b2b082b2b082b, 0x082b2b19192b2b08,
|
|
||||||
0x082b2b192b190808, 0x082b2b2b08082b08, 0x082b2b2b082b0808, 0x082b2b2b2b08082b,
|
|
||||||
0x082b2b2b2b082b08, 0x082b2b2b2b082b2b, 0x1908080808080819, 0x1908080808081908,
|
|
||||||
0x190808080808192b, 0x1908080808082b19, 0x1908080808190808, 0x190808080819082b,
|
|
||||||
0x1908080808191919, 0x1908080808192b08, 0x19080808082b0819, 0x19080808082b1908,
|
|
||||||
0x1908080819080808, 0x190808081908082b, 0x1908080819081919, 0x1908080819082b08,
|
|
||||||
0x1908080819082b2b, 0x1908080819190819, 0x1908080819191908, 0x19080808192b0808,
|
|
||||||
0x19080808192b1919, 0x190808082b080819, 0x190808082b081908, 0x190808082b190808,
|
|
||||||
0x1908081908080808, 0x190808190808082b, 0x1908081908081919, 0x1908081908082b08,
|
|
||||||
0x1908081908190819, 0x1908081908191908, 0x19080819082b0808, 0x1908081919080819,
|
|
||||||
0x1908081919081908, 0x1908081919190808, 0x190808192b080808, 0x190808192b081919,
|
|
||||||
0x190808192b2b082b, 0x1908082b08080819, 0x1908082b08081908, 0x1908082b08190808,
|
|
||||||
0x1908082b0819082b, 0x1908082b082b2b19, 0x1908082b19080808, 0x1908190808080808,
|
|
||||||
0x190819080808082b, 0x1908190808081919, 0x1908190808082b08, 0x1908190808190819,
|
|
||||||
0x1908190808191908, 0x1908190808192b19, 0x19081908082b0808, 0x1908190819080819,
|
|
||||||
0x1908190819081908, 0x1908190819190808, 0x190819082b080808, 0x190819082b191908,
|
|
||||||
0x1908191908080819, 0x1908191908081908, 0x1908191908190808, 0x19081919082b1908,
|
|
||||||
0x1908191919080808, 0x190819192b192b2b, 0x1908192b08080808, 0x1908192b08082b2b,
|
|
||||||
0x1908192b19081908, 0x1908192b19190808, 0x19082b0808080819, 0x19082b0808081908,
|
|
||||||
0x19082b0808190808, 0x19082b0819080808, 0x19082b0819081919, 0x19082b0819191908,
|
|
||||||
0x19082b08192b082b, 0x19082b1908080808, 0x19082b1908190819, 0x19082b1919081908,
|
|
||||||
0x19082b1919190808, 0x19082b19192b2b19, 0x19082b2b08081908, 0x1919080808080808,
|
|
||||||
0x191908080808082b, 0x1919080808081919, 0x1919080808082b08, 0x1919080808190819,
|
|
||||||
0x1919080808191908, 0x19190808082b0808, 0x19190808082b2b08, 0x1919080819080819,
|
|
||||||
0x1919080819081908, 0x1919080819190808, 0x191908082b080808, 0x1919081908080819,
|
|
||||||
0x1919081908081908, 0x1919081908190808, 0x1919081908191919, 0x1919081919080808,
|
|
||||||
0x191908191908082b, 0x1919082b08080808, 0x1919082b19081908, 0x1919082b2b2b2b2b,
|
|
||||||
0x1919190808080819, 0x1919190808081908, 0x1919190808190808, 0x19191908082b0819,
|
|
||||||
0x1919190819080808, 0x19191908192b0808, 0x191919082b080819, 0x191919082b2b0819,
|
|
||||||
0x1919191908080808, 0x1919191908082b08, 0x191919192b080808, 0x191919192b082b08,
|
|
||||||
0x1919192b082b0819, 0x1919192b192b2b08, 0x1919192b2b2b0819, 0x19192b0808080808,
|
|
||||||
0x19192b0808191908, 0x19192b0819080819, 0x19192b0819190808, 0x19192b082b192b19,
|
|
||||||
0x19192b1908192b2b, 0x19192b1919080808, 0x19192b191908082b, 0x19192b2b2b081919,
|
|
||||||
0x192b080808080819, 0x192b080808081908, 0x192b080808190808, 0x192b080819080808,
|
|
||||||
0x192b080819191908, 0x192b0808192b082b, 0x192b08082b08192b, 0x192b08082b2b2b19,
|
|
||||||
0x192b081908080808, 0x192b082b082b1908, 0x192b082b19082b2b, 0x192b082b2b19082b,
|
|
||||||
0x192b190808080808, 0x192b19080819192b, 0x192b191908190808, 0x192b191919080808,
|
|
||||||
0x192b191919081919, 0x192b19192b2b1908, 0x192b2b0808080819, 0x192b2b08192b2b2b,
|
|
||||||
0x192b2b19082b1919, 0x192b2b2b0808192b, 0x192b2b2b19191908, 0x192b2b2b192b082b,
|
|
||||||
0x2b08080808080808, 0x2b0808080808082b, 0x2b08080808081919, 0x2b08080808082b08,
|
|
||||||
0x2b08080808190819, 0x2b08080808191908, 0x2b080808082b0808, 0x2b080808082b2b2b,
|
|
||||||
0x2b08080819080819, 0x2b08080819081908, 0x2b08080819190808, 0x2b0808082b080808,
|
|
||||||
0x2b0808082b08082b, 0x2b0808082b2b2b08, 0x2b0808082b2b2b2b, 0x2b08081908080819,
|
|
||||||
0x2b08081908081908, 0x2b0808190808192b, 0x2b08081908190808, 0x2b08081919080808,
|
|
||||||
0x2b08081919190819, 0x2b08081919192b19, 0x2b08082b08080808, 0x2b08082b082b0808,
|
|
||||||
0x2b08082b2b080808, 0x2b08082b2b08082b, 0x2b08082b2b2b0808, 0x2b08082b2b2b2b08,
|
|
||||||
0x2b08190808080819, 0x2b08190808081908, 0x2b08190808190808, 0x2b0819080819082b,
|
|
||||||
0x2b08190808191919, 0x2b08190819080808, 0x2b081908192b0808, 0x2b0819082b082b19,
|
|
||||||
0x2b08191908080808, 0x2b08191919081908, 0x2b0819192b2b1919, 0x2b08192b08192b08,
|
|
||||||
0x2b08192b192b2b2b, 0x2b082b0808080808, 0x2b082b0808082b08, 0x2b082b08082b1919,
|
|
||||||
0x2b082b0819192b2b, 0x2b082b082b080808, 0x2b082b082b08082b, 0x2b082b082b2b2b08,
|
|
||||||
0x2b082b190808192b, 0x2b082b2b082b082b, 0x2b082b2b2b080808, 0x2b082b2b2b082b08,
|
|
||||||
0x2b082b2b2b19192b, 0x2b082b2b2b2b2b08, 0x2b19080808080819, 0x2b19080808081908,
|
|
||||||
0x2b19080808190808, 0x2b19080819080808, 0x2b1908081919192b, 0x2b1908082b081908,
|
|
||||||
0x2b19081908080808, 0x2b190819082b082b, 0x2b190819192b1908, 0x2b19082b1919192b,
|
|
||||||
0x2b19082b2b082b19, 0x2b19190808080808, 0x2b19190808081919, 0x2b19190819081908,
|
|
||||||
0x2b19190819190808, 0x2b19190819192b08, 0x2b191919082b2b19, 0x2b1919192b190808,
|
|
||||||
0x2b1919192b19082b, 0x2b19192b19080819, 0x2b192b0819190819, 0x2b192b082b2b192b,
|
|
||||||
0x2b192b1919082b19, 0x2b192b2b08191919, 0x2b192b2b192b0808, 0x2b2b080808080808,
|
|
||||||
0x2b2b08080808082b, 0x2b2b080808082b08, 0x2b2b080808082b2b, 0x2b2b0808082b0808,
|
|
||||||
0x2b2b0808082b2b2b, 0x2b2b08082b2b0808, 0x2b2b081919190819, 0x2b2b081919192b19,
|
|
||||||
0x2b2b08192b2b192b, 0x2b2b082b08080808, 0x2b2b082b0808082b, 0x2b2b082b08082b08,
|
|
||||||
0x2b2b082b082b2b2b, 0x2b2b082b2b080808, 0x2b2b082b2b2b0808, 0x2b2b190819080808,
|
|
||||||
0x2b2b19082b191919, 0x2b2b192b192b1919, 0x2b2b192b2b192b08, 0x2b2b2b0808082b2b,
|
|
||||||
0x2b2b2b08082b0808, 0x2b2b2b08082b082b, 0x2b2b2b08082b2b08, 0x2b2b2b082b2b0808,
|
|
||||||
0x2b2b2b082b2b2b08, 0x2b2b2b1908081908, 0x2b2b2b192b081908, 0x2b2b2b192b08192b,
|
|
||||||
0x2b2b2b2b082b2b08, 0x2b2b2b2b082b2b2b, 0x2b2b2b2b2b190819, 0x2b2b2b2b2b2b2b2b,
|
|
||||||
};
|
|
||||||
|
|
||||||
constexpr constant static uint64_t iq2s_grid[1024] = {
|
|
||||||
0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
|
|
||||||
0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x080808080819192b,
|
|
||||||
0x0808080808192b19, 0x08080808082b0808, 0x08080808082b082b, 0x08080808082b1919,
|
|
||||||
0x08080808082b2b08, 0x0808080819080819, 0x0808080819081908, 0x080808081908192b,
|
|
||||||
0x0808080819082b19, 0x0808080819190808, 0x080808081919082b, 0x0808080819191919,
|
|
||||||
0x0808080819192b08, 0x08080808192b0819, 0x08080808192b1908, 0x08080808192b192b,
|
|
||||||
0x08080808192b2b19, 0x080808082b080808, 0x080808082b08082b, 0x080808082b081919,
|
|
||||||
0x080808082b082b08, 0x080808082b190819, 0x080808082b191908, 0x080808082b2b0808,
|
|
||||||
0x080808082b2b1919, 0x080808082b2b2b2b, 0x0808081908080819, 0x0808081908081908,
|
|
||||||
0x080808190808192b, 0x0808081908082b19, 0x0808081908190808, 0x080808190819082b,
|
|
||||||
0x0808081908191919, 0x0808081908192b08, 0x08080819082b0819, 0x08080819082b1908,
|
|
||||||
0x0808081919080808, 0x080808191908082b, 0x0808081919081919, 0x0808081919082b08,
|
|
||||||
0x0808081919190819, 0x0808081919191908, 0x080808191919192b, 0x0808081919192b19,
|
|
||||||
0x08080819192b0808, 0x08080819192b1919, 0x08080819192b2b08, 0x080808192b080819,
|
|
||||||
0x080808192b081908, 0x080808192b190808, 0x080808192b19082b, 0x080808192b191919,
|
|
||||||
0x080808192b2b0819, 0x080808192b2b1908, 0x0808082b08080808, 0x0808082b0808082b,
|
|
||||||
0x0808082b08081919, 0x0808082b08082b08, 0x0808082b08190819, 0x0808082b08191908,
|
|
||||||
0x0808082b082b0808, 0x0808082b082b2b2b, 0x0808082b19080819, 0x0808082b19081908,
|
|
||||||
0x0808082b1908192b, 0x0808082b19082b19, 0x0808082b19190808, 0x0808082b19191919,
|
|
||||||
0x0808082b2b080808, 0x0808082b2b081919, 0x0808082b2b082b2b, 0x0808082b2b191908,
|
|
||||||
0x0808082b2b2b082b, 0x0808190808080819, 0x0808190808081908, 0x080819080808192b,
|
|
||||||
0x0808190808082b19, 0x0808190808190808, 0x080819080819082b, 0x0808190808191919,
|
|
||||||
0x0808190808192b08, 0x08081908082b0819, 0x08081908082b1908, 0x08081908082b192b,
|
|
||||||
0x08081908082b2b19, 0x0808190819080808, 0x080819081908082b, 0x0808190819081919,
|
|
||||||
0x0808190819082b08, 0x0808190819082b2b, 0x0808190819190819, 0x0808190819191908,
|
|
||||||
0x080819081919192b, 0x0808190819192b19, 0x08081908192b0808, 0x08081908192b082b,
|
|
||||||
0x08081908192b1919, 0x080819082b080819, 0x080819082b081908, 0x080819082b08192b,
|
|
||||||
0x080819082b082b19, 0x080819082b190808, 0x080819082b191919, 0x080819082b192b08,
|
|
||||||
0x080819082b2b0819, 0x080819082b2b1908, 0x0808191908080808, 0x080819190808082b,
|
|
||||||
0x0808191908081919, 0x0808191908082b08, 0x0808191908082b2b, 0x0808191908190819,
|
|
||||||
0x0808191908191908, 0x080819190819192b, 0x0808191908192b19, 0x08081919082b0808,
|
|
||||||
0x08081919082b1919, 0x08081919082b2b08, 0x0808191919080819, 0x0808191919081908,
|
|
||||||
0x080819191908192b, 0x0808191919082b19, 0x0808191919190808, 0x080819191919082b,
|
|
||||||
0x0808191919191919, 0x0808191919192b08, 0x08081919192b0819, 0x08081919192b1908,
|
|
||||||
0x080819192b080808, 0x080819192b08082b, 0x080819192b081919, 0x080819192b082b08,
|
|
||||||
0x080819192b190819, 0x080819192b191908, 0x080819192b2b0808, 0x0808192b08080819,
|
|
||||||
0x0808192b08081908, 0x0808192b0808192b, 0x0808192b08082b19, 0x0808192b08190808,
|
|
||||||
0x0808192b08191919, 0x0808192b19080808, 0x0808192b19081919, 0x0808192b19082b08,
|
|
||||||
0x0808192b19190819, 0x0808192b19191908, 0x0808192b192b0808, 0x0808192b2b080819,
|
|
||||||
0x0808192b2b081908, 0x0808192b2b190808, 0x08082b0808080808, 0x08082b080808082b,
|
|
||||||
0x08082b0808081919, 0x08082b0808082b08, 0x08082b0808190819, 0x08082b0808191908,
|
|
||||||
0x08082b080819192b, 0x08082b0808192b19, 0x08082b08082b0808, 0x08082b08082b1919,
|
|
||||||
0x08082b08082b2b2b, 0x08082b0819080819, 0x08082b0819081908, 0x08082b081908192b,
|
|
||||||
0x08082b0819082b19, 0x08082b0819190808, 0x08082b081919082b, 0x08082b0819191919,
|
|
||||||
0x08082b0819192b08, 0x08082b08192b0819, 0x08082b08192b1908, 0x08082b082b080808,
|
|
||||||
0x08082b082b081919, 0x08082b082b191908, 0x08082b082b2b2b2b, 0x08082b1908080819,
|
|
||||||
0x08082b1908081908, 0x08082b1908190808, 0x08082b190819082b, 0x08082b1908191919,
|
|
||||||
0x08082b1908192b08, 0x08082b19082b0819, 0x08082b1919080808, 0x08082b1919081919,
|
|
||||||
0x08082b1919082b08, 0x08082b1919190819, 0x08082b1919191908, 0x08082b19192b0808,
|
|
||||||
0x08082b192b080819, 0x08082b192b190808, 0x08082b2b08080808, 0x08082b2b08190819,
|
|
||||||
0x08082b2b08191908, 0x08082b2b082b082b, 0x08082b2b082b2b08, 0x08082b2b082b2b2b,
|
|
||||||
0x08082b2b19190808, 0x08082b2b2b192b19, 0x0819080808080819, 0x0819080808081908,
|
|
||||||
0x081908080808192b, 0x0819080808082b19, 0x0819080808190808, 0x081908080819082b,
|
|
||||||
0x0819080808191919, 0x0819080808192b08, 0x08190808082b0819, 0x08190808082b1908,
|
|
||||||
0x08190808082b192b, 0x0819080819080808, 0x081908081908082b, 0x0819080819081919,
|
|
||||||
0x0819080819082b08, 0x0819080819190819, 0x0819080819191908, 0x081908081919192b,
|
|
||||||
0x0819080819192b19, 0x08190808192b0808, 0x08190808192b082b, 0x08190808192b1919,
|
|
||||||
0x08190808192b2b08, 0x081908082b080819, 0x081908082b081908, 0x081908082b08192b,
|
|
||||||
0x081908082b190808, 0x081908082b191919, 0x081908082b192b08, 0x081908082b2b0819,
|
|
||||||
0x081908082b2b1908, 0x0819081908080808, 0x081908190808082b, 0x0819081908081919,
|
|
||||||
0x0819081908082b08, 0x0819081908082b2b, 0x0819081908190819, 0x0819081908191908,
|
|
||||||
0x081908190819192b, 0x0819081908192b19, 0x08190819082b0808, 0x08190819082b082b,
|
|
||||||
0x08190819082b1919, 0x08190819082b2b08, 0x0819081919080819, 0x0819081919081908,
|
|
||||||
0x081908191908192b, 0x0819081919082b19, 0x0819081919190808, 0x081908191919082b,
|
|
||||||
0x0819081919191919, 0x0819081919192b08, 0x08190819192b0819, 0x08190819192b1908,
|
|
||||||
0x081908192b080808, 0x081908192b08082b, 0x081908192b081919, 0x081908192b082b08,
|
|
||||||
0x081908192b190819, 0x081908192b191908, 0x0819082b08080819, 0x0819082b08081908,
|
|
||||||
0x0819082b08082b19, 0x0819082b08190808, 0x0819082b08191919, 0x0819082b082b0819,
|
|
||||||
0x0819082b082b1908, 0x0819082b19080808, 0x0819082b19081919, 0x0819082b19190819,
|
|
||||||
0x0819082b19191908, 0x0819082b2b080819, 0x0819082b2b081908, 0x0819082b2b190808,
|
|
||||||
0x0819190808080808, 0x081919080808082b, 0x0819190808081919, 0x0819190808082b08,
|
|
||||||
0x0819190808190819, 0x0819190808191908, 0x081919080819192b, 0x0819190808192b19,
|
|
||||||
0x08191908082b0808, 0x08191908082b1919, 0x08191908082b2b08, 0x0819190819080819,
|
|
||||||
0x0819190819081908, 0x081919081908192b, 0x0819190819082b19, 0x0819190819190808,
|
|
||||||
0x081919081919082b, 0x0819190819191919, 0x0819190819192b08, 0x08191908192b0819,
|
|
||||||
0x08191908192b1908, 0x081919082b080808, 0x081919082b08082b, 0x081919082b081919,
|
|
||||||
0x081919082b082b08, 0x081919082b190819, 0x081919082b191908, 0x081919082b2b0808,
|
|
||||||
0x0819191908080819, 0x0819191908081908, 0x081919190808192b, 0x0819191908082b19,
|
|
||||||
0x0819191908190808, 0x081919190819082b, 0x0819191908191919, 0x0819191908192b08,
|
|
||||||
0x08191919082b0819, 0x08191919082b1908, 0x0819191919080808, 0x081919191908082b,
|
|
||||||
0x0819191919081919, 0x0819191919082b08, 0x0819191919190819, 0x0819191919191908,
|
|
||||||
0x08191919192b0808, 0x081919192b080819, 0x081919192b081908, 0x081919192b190808,
|
|
||||||
0x0819192b08080808, 0x0819192b08081919, 0x0819192b08082b08, 0x0819192b08190819,
|
|
||||||
0x0819192b08191908, 0x0819192b082b0808, 0x0819192b19080819, 0x0819192b19081908,
|
|
||||||
0x0819192b19190808, 0x0819192b2b080808, 0x0819192b2b2b2b2b, 0x08192b0808080819,
|
|
||||||
0x08192b0808081908, 0x08192b080808192b, 0x08192b0808082b19, 0x08192b0808190808,
|
|
||||||
0x08192b0808191919, 0x08192b0808192b08, 0x08192b08082b0819, 0x08192b0819080808,
|
|
||||||
0x08192b081908082b, 0x08192b0819081919, 0x08192b0819082b08, 0x08192b0819190819,
|
|
||||||
0x08192b0819191908, 0x08192b08192b0808, 0x08192b082b080819, 0x08192b082b081908,
|
|
||||||
0x08192b1908080808, 0x08192b190808082b, 0x08192b1908081919, 0x08192b1908082b08,
|
|
||||||
0x08192b1908190819, 0x08192b1908191908, 0x08192b19082b0808, 0x08192b1919080819,
|
|
||||||
0x08192b1919081908, 0x08192b1919190808, 0x08192b19192b2b19, 0x08192b192b2b082b,
|
|
||||||
0x08192b2b08081908, 0x08192b2b08190808, 0x08192b2b19080808, 0x08192b2b1919192b,
|
|
||||||
0x082b080808080808, 0x082b08080808082b, 0x082b080808081919, 0x082b080808082b08,
|
|
||||||
0x082b080808190819, 0x082b080808191908, 0x082b08080819192b, 0x082b080808192b19,
|
|
||||||
0x082b0808082b0808, 0x082b0808082b1919, 0x082b0808082b2b2b, 0x082b080819080819,
|
|
||||||
0x082b080819081908, 0x082b080819190808, 0x082b08081919082b, 0x082b080819191919,
|
|
||||||
0x082b0808192b1908, 0x082b08082b080808, 0x082b08082b082b2b, 0x082b08082b191908,
|
|
||||||
0x082b08082b2b2b2b, 0x082b081908080819, 0x082b081908081908, 0x082b081908190808,
|
|
||||||
0x082b08190819082b, 0x082b081908191919, 0x082b0819082b0819, 0x082b081919080808,
|
|
||||||
0x082b08191908082b, 0x082b081919081919, 0x082b081919190819, 0x082b081919191908,
|
|
||||||
0x082b0819192b0808, 0x082b08192b080819, 0x082b08192b081908, 0x082b08192b190808,
|
|
||||||
0x082b082b08080808, 0x082b082b08082b2b, 0x082b082b082b082b, 0x082b082b082b2b08,
|
|
||||||
0x082b082b082b2b2b, 0x082b082b19081908, 0x082b082b19190808, 0x082b082b2b082b08,
|
|
||||||
0x082b082b2b082b2b, 0x082b082b2b2b2b08, 0x082b190808080819, 0x082b190808081908,
|
|
||||||
0x082b19080808192b, 0x082b190808082b19, 0x082b190808190808, 0x082b190808191919,
|
|
||||||
0x082b190808192b08, 0x082b1908082b0819, 0x082b1908082b1908, 0x082b190819080808,
|
|
||||||
0x082b19081908082b, 0x082b190819081919, 0x082b190819082b08, 0x082b190819190819,
|
|
||||||
0x082b190819191908, 0x082b1908192b0808, 0x082b19082b080819, 0x082b19082b081908,
|
|
||||||
0x082b19082b190808, 0x082b191908080808, 0x082b191908081919, 0x082b191908082b08,
|
|
||||||
0x082b191908190819, 0x082b191908191908, 0x082b1919082b0808, 0x082b191919080819,
|
|
||||||
0x082b191919081908, 0x082b191919190808, 0x082b1919192b192b, 0x082b19192b080808,
|
|
||||||
0x082b192b08080819, 0x082b192b08081908, 0x082b192b08190808, 0x082b192b19080808,
|
|
||||||
0x082b192b19192b19, 0x082b2b0808080808, 0x082b2b0808081919, 0x082b2b0808190819,
|
|
||||||
0x082b2b0808191908, 0x082b2b0819080819, 0x082b2b0819081908, 0x082b2b0819190808,
|
|
||||||
0x082b2b082b082b2b, 0x082b2b082b2b2b2b, 0x082b2b1908080819, 0x082b2b1908081908,
|
|
||||||
0x082b2b1908190808, 0x082b2b192b191919, 0x082b2b2b08082b2b, 0x082b2b2b082b082b,
|
|
||||||
0x082b2b2b192b1908, 0x082b2b2b2b082b08, 0x082b2b2b2b082b2b, 0x1908080808080819,
|
|
||||||
0x1908080808081908, 0x190808080808192b, 0x1908080808082b19, 0x1908080808190808,
|
|
||||||
0x190808080819082b, 0x1908080808191919, 0x1908080808192b08, 0x1908080808192b2b,
|
|
||||||
0x19080808082b0819, 0x19080808082b1908, 0x19080808082b192b, 0x1908080819080808,
|
|
||||||
0x190808081908082b, 0x1908080819081919, 0x1908080819082b08, 0x1908080819082b2b,
|
|
||||||
0x1908080819190819, 0x1908080819191908, 0x190808081919192b, 0x1908080819192b19,
|
|
||||||
0x19080808192b0808, 0x19080808192b082b, 0x19080808192b1919, 0x190808082b080819,
|
|
||||||
0x190808082b081908, 0x190808082b190808, 0x190808082b191919, 0x190808082b192b08,
|
|
||||||
0x190808082b2b0819, 0x190808082b2b1908, 0x1908081908080808, 0x190808190808082b,
|
|
||||||
0x1908081908081919, 0x1908081908082b08, 0x1908081908190819, 0x1908081908191908,
|
|
||||||
0x190808190819192b, 0x1908081908192b19, 0x19080819082b0808, 0x19080819082b082b,
|
|
||||||
0x19080819082b1919, 0x1908081919080819, 0x1908081919081908, 0x190808191908192b,
|
|
||||||
0x1908081919082b19, 0x1908081919190808, 0x190808191919082b, 0x1908081919191919,
|
|
||||||
0x1908081919192b08, 0x19080819192b0819, 0x19080819192b1908, 0x190808192b080808,
|
|
||||||
0x190808192b08082b, 0x190808192b081919, 0x190808192b082b08, 0x190808192b190819,
|
|
||||||
0x190808192b191908, 0x190808192b2b0808, 0x1908082b08080819, 0x1908082b08081908,
|
|
||||||
0x1908082b08190808, 0x1908082b0819082b, 0x1908082b08191919, 0x1908082b08192b08,
|
|
||||||
0x1908082b082b1908, 0x1908082b19080808, 0x1908082b19081919, 0x1908082b19082b08,
|
|
||||||
0x1908082b19190819, 0x1908082b19191908, 0x1908082b192b0808, 0x1908082b2b080819,
|
|
||||||
0x1908082b2b081908, 0x1908190808080808, 0x190819080808082b, 0x1908190808081919,
|
|
||||||
0x1908190808082b08, 0x1908190808082b2b, 0x1908190808190819, 0x1908190808191908,
|
|
||||||
0x190819080819192b, 0x1908190808192b19, 0x19081908082b0808, 0x19081908082b082b,
|
|
||||||
0x19081908082b1919, 0x19081908082b2b08, 0x1908190819080819, 0x1908190819081908,
|
|
||||||
0x190819081908192b, 0x1908190819082b19, 0x1908190819190808, 0x190819081919082b,
|
|
||||||
0x1908190819191919, 0x1908190819192b08, 0x19081908192b0819, 0x19081908192b1908,
|
|
||||||
0x190819082b080808, 0x190819082b08082b, 0x190819082b081919, 0x190819082b082b08,
|
|
||||||
0x190819082b190819, 0x190819082b191908, 0x190819082b2b0808, 0x1908191908080819,
|
|
||||||
0x1908191908081908, 0x190819190808192b, 0x1908191908082b19, 0x1908191908190808,
|
|
||||||
0x190819190819082b, 0x1908191908191919, 0x1908191908192b08, 0x19081919082b0819,
|
|
||||||
0x19081919082b1908, 0x1908191919080808, 0x190819191908082b, 0x1908191919081919,
|
|
||||||
0x1908191919082b08, 0x1908191919190819, 0x1908191919191908, 0x19081919192b0808,
|
|
||||||
0x19081919192b2b2b, 0x190819192b080819, 0x190819192b081908, 0x190819192b190808,
|
|
||||||
0x1908192b08080808, 0x1908192b0808082b, 0x1908192b08081919, 0x1908192b08082b08,
|
|
||||||
0x1908192b08190819, 0x1908192b08191908, 0x1908192b082b0808, 0x1908192b19080819,
|
|
||||||
0x1908192b19081908, 0x1908192b19190808, 0x1908192b2b080808, 0x1908192b2b2b1919,
|
|
||||||
0x19082b0808080819, 0x19082b0808081908, 0x19082b0808082b19, 0x19082b0808190808,
|
|
||||||
0x19082b080819082b, 0x19082b0808191919, 0x19082b0808192b08, 0x19082b08082b0819,
|
|
||||||
0x19082b08082b1908, 0x19082b0819080808, 0x19082b081908082b, 0x19082b0819081919,
|
|
||||||
0x19082b0819082b08, 0x19082b0819190819, 0x19082b0819191908, 0x19082b08192b0808,
|
|
||||||
0x19082b082b081908, 0x19082b082b190808, 0x19082b1908080808, 0x19082b190808082b,
|
|
||||||
0x19082b1908081919, 0x19082b1908082b08, 0x19082b1908190819, 0x19082b1908191908,
|
|
||||||
0x19082b19082b0808, 0x19082b1919080819, 0x19082b1919081908, 0x19082b1919190808,
|
|
||||||
0x19082b192b080808, 0x19082b192b19192b, 0x19082b2b08080819, 0x19082b2b08081908,
|
|
||||||
0x19082b2b08190808, 0x19082b2b19080808, 0x1919080808080808, 0x191908080808082b,
|
|
||||||
0x1919080808081919, 0x1919080808082b08, 0x1919080808190819, 0x1919080808191908,
|
|
||||||
0x191908080819192b, 0x1919080808192b19, 0x19190808082b0808, 0x19190808082b082b,
|
|
||||||
0x19190808082b1919, 0x19190808082b2b08, 0x1919080819080819, 0x1919080819081908,
|
|
||||||
0x191908081908192b, 0x1919080819082b19, 0x1919080819190808, 0x191908081919082b,
|
|
||||||
0x1919080819191919, 0x1919080819192b08, 0x19190808192b0819, 0x19190808192b1908,
|
|
||||||
0x191908082b080808, 0x191908082b08082b, 0x191908082b081919, 0x191908082b082b08,
|
|
||||||
0x191908082b190819, 0x191908082b191908, 0x1919081908080819, 0x1919081908081908,
|
|
||||||
0x191908190808192b, 0x1919081908082b19, 0x1919081908190808, 0x191908190819082b,
|
|
||||||
0x1919081908191919, 0x1919081908192b08, 0x19190819082b0819, 0x19190819082b1908,
|
|
||||||
0x1919081919080808, 0x191908191908082b, 0x1919081919081919, 0x1919081919082b08,
|
|
||||||
0x1919081919190819, 0x1919081919191908, 0x19190819192b0808, 0x191908192b080819,
|
|
||||||
0x191908192b081908, 0x191908192b190808, 0x1919082b08080808, 0x1919082b08081919,
|
|
||||||
0x1919082b08082b08, 0x1919082b08190819, 0x1919082b08191908, 0x1919082b082b0808,
|
|
||||||
0x1919082b19080819, 0x1919082b19081908, 0x1919082b19190808, 0x1919082b192b2b19,
|
|
||||||
0x1919082b2b080808, 0x1919190808080819, 0x1919190808081908, 0x191919080808192b,
|
|
||||||
0x1919190808082b19, 0x1919190808190808, 0x191919080819082b, 0x1919190808191919,
|
|
||||||
0x1919190808192b08, 0x19191908082b0819, 0x19191908082b1908, 0x1919190819080808,
|
|
||||||
0x191919081908082b, 0x1919190819081919, 0x1919190819082b08, 0x1919190819190819,
|
|
||||||
0x1919190819191908, 0x19191908192b0808, 0x191919082b080819, 0x191919082b081908,
|
|
||||||
0x191919082b190808, 0x1919191908080808, 0x191919190808082b, 0x1919191908081919,
|
|
||||||
0x1919191908082b08, 0x1919191908190819, 0x1919191908191908, 0x19191919082b0808,
|
|
||||||
0x1919191919080819, 0x1919191919081908, 0x1919191919190808, 0x191919192b080808,
|
|
||||||
0x1919192b08080819, 0x1919192b08081908, 0x1919192b08190808, 0x1919192b082b192b,
|
|
||||||
0x1919192b19080808, 0x19192b0808080808, 0x19192b080808082b, 0x19192b0808081919,
|
|
||||||
0x19192b0808082b08, 0x19192b0808190819, 0x19192b0808191908, 0x19192b08082b0808,
|
|
||||||
0x19192b0819080819, 0x19192b0819081908, 0x19192b0819190808, 0x19192b0819192b2b,
|
|
||||||
0x19192b082b080808, 0x19192b1908080819, 0x19192b1908081908, 0x19192b1908190808,
|
|
||||||
0x19192b1919080808, 0x19192b2b08080808, 0x19192b2b08192b19, 0x19192b2b2b081919,
|
|
||||||
0x19192b2b2b2b2b08, 0x192b080808080819, 0x192b080808081908, 0x192b08080808192b,
|
|
||||||
0x192b080808190808, 0x192b08080819082b, 0x192b080808191919, 0x192b080808192b08,
|
|
||||||
0x192b0808082b0819, 0x192b0808082b1908, 0x192b080819080808, 0x192b080819081919,
|
|
||||||
0x192b080819082b08, 0x192b080819190819, 0x192b080819191908, 0x192b0808192b0808,
|
|
||||||
0x192b08082b081908, 0x192b08082b190808, 0x192b081908080808, 0x192b08190808082b,
|
|
||||||
0x192b081908081919, 0x192b081908082b08, 0x192b081908190819, 0x192b081908191908,
|
|
||||||
0x192b0819082b0808, 0x192b081919080819, 0x192b081919081908, 0x192b081919190808,
|
|
||||||
0x192b08192b080808, 0x192b08192b192b19, 0x192b082b08081908, 0x192b082b08190808,
|
|
||||||
0x192b082b19080808, 0x192b082b1919192b, 0x192b082b2b2b0819, 0x192b190808080808,
|
|
||||||
0x192b190808081919, 0x192b190808082b08, 0x192b190808190819, 0x192b190808191908,
|
|
||||||
0x192b1908082b0808, 0x192b190819080819, 0x192b190819081908, 0x192b190819190808,
|
|
||||||
0x192b19082b080808, 0x192b191908080819, 0x192b191908081908, 0x192b191908190808,
|
|
||||||
0x192b191919080808, 0x192b191919082b2b, 0x192b1919192b2b08, 0x192b19192b19082b,
|
|
||||||
0x192b192b08080808, 0x192b192b2b191908, 0x192b2b0808080819, 0x192b2b0808081908,
|
|
||||||
0x192b2b0808190808, 0x192b2b08192b1919, 0x192b2b082b192b08, 0x192b2b1908080808,
|
|
||||||
0x192b2b19082b2b2b, 0x192b2b2b1908082b, 0x192b2b2b2b2b0819, 0x2b08080808080808,
|
|
||||||
0x2b0808080808082b, 0x2b08080808081919, 0x2b08080808082b08, 0x2b08080808190819,
|
|
||||||
0x2b08080808191908, 0x2b08080808192b19, 0x2b080808082b0808, 0x2b080808082b1919,
|
|
||||||
0x2b08080819080819, 0x2b08080819081908, 0x2b08080819190808, 0x2b0808081919082b,
|
|
||||||
0x2b08080819191919, 0x2b08080819192b08, 0x2b080808192b0819, 0x2b0808082b080808,
|
|
||||||
0x2b0808082b081919, 0x2b0808082b190819, 0x2b0808082b191908, 0x2b08081908080819,
|
|
||||||
0x2b08081908081908, 0x2b08081908082b19, 0x2b08081908190808, 0x2b0808190819082b,
|
|
||||||
0x2b08081908191919, 0x2b08081908192b08, 0x2b080819082b0819, 0x2b080819082b1908,
|
|
||||||
0x2b08081919080808, 0x2b0808191908082b, 0x2b08081919081919, 0x2b08081919082b08,
|
|
||||||
0x2b08081919190819, 0x2b08081919191908, 0x2b0808192b080819, 0x2b0808192b081908,
|
|
||||||
0x2b0808192b190808, 0x2b0808192b2b2b19, 0x2b08082b08080808, 0x2b08082b08081919,
|
|
||||||
0x2b08082b08082b2b, 0x2b08082b08190819, 0x2b08082b08191908, 0x2b08082b19080819,
|
|
||||||
0x2b08082b19081908, 0x2b08082b19190808, 0x2b08190808080819, 0x2b08190808081908,
|
|
||||||
0x2b0819080808192b, 0x2b08190808082b19, 0x2b08190808190808, 0x2b0819080819082b,
|
|
||||||
0x2b08190808191919, 0x2b08190808192b08, 0x2b081908082b0819, 0x2b08190819080808,
|
|
||||||
0x2b0819081908082b, 0x2b08190819081919, 0x2b08190819082b08, 0x2b08190819190819,
|
|
||||||
0x2b08190819191908, 0x2b081908192b0808, 0x2b0819082b080819, 0x2b0819082b081908,
|
|
||||||
0x2b0819082b190808, 0x2b08191908080808, 0x2b0819190808082b, 0x2b08191908081919,
|
|
||||||
0x2b08191908082b08, 0x2b08191908190819, 0x2b08191908191908, 0x2b081919082b0808,
|
|
||||||
0x2b08191919080819, 0x2b08191919081908, 0x2b08191919190808, 0x2b0819192b080808,
|
|
||||||
0x2b0819192b082b2b, 0x2b08192b08080819, 0x2b08192b08081908, 0x2b08192b08190808,
|
|
||||||
0x2b08192b082b2b19, 0x2b08192b19080808, 0x2b082b0808080808, 0x2b082b0808081919,
|
|
||||||
0x2b082b0808190819, 0x2b082b0808191908, 0x2b082b0819080819, 0x2b082b0819081908,
|
|
||||||
0x2b082b0819190808, 0x2b082b082b2b082b, 0x2b082b1908080819, 0x2b082b1908081908,
|
|
||||||
0x2b082b1919080808, 0x2b082b19192b1919, 0x2b082b2b082b082b, 0x2b082b2b19192b08,
|
|
||||||
0x2b082b2b19192b2b, 0x2b082b2b2b08082b, 0x2b082b2b2b2b082b, 0x2b19080808080819,
|
|
||||||
0x2b19080808081908, 0x2b19080808082b19, 0x2b19080808190808, 0x2b1908080819082b,
|
|
||||||
0x2b19080808191919, 0x2b19080808192b08, 0x2b190808082b1908, 0x2b19080819080808,
|
|
||||||
0x2b1908081908082b, 0x2b19080819081919, 0x2b19080819082b08, 0x2b19080819190819,
|
|
||||||
0x2b19080819191908, 0x2b190808192b0808, 0x2b1908082b080819, 0x2b1908082b081908,
|
|
||||||
0x2b1908082b190808, 0x2b19081908080808, 0x2b19081908081919, 0x2b19081908190819,
|
|
||||||
0x2b19081908191908, 0x2b19081919080819, 0x2b19081919081908, 0x2b19081919190808,
|
|
||||||
0x2b19081919192b2b, 0x2b19082b08080819, 0x2b19082b08081908, 0x2b19082b08190808,
|
|
||||||
0x2b19082b19080808, 0x2b19082b2b2b192b, 0x2b19190808080808, 0x2b1919080808082b,
|
|
||||||
0x2b19190808081919, 0x2b19190808082b08, 0x2b19190808190819, 0x2b19190808191908,
|
|
||||||
0x2b191908082b0808, 0x2b19190819080819, 0x2b19190819081908, 0x2b19190819190808,
|
|
||||||
0x2b1919082b080808, 0x2b1919082b19192b, 0x2b19191908080819, 0x2b19191908081908,
|
|
||||||
0x2b19191908190808, 0x2b19191919080808, 0x2b1919192b192b08, 0x2b1919192b2b0819,
|
|
||||||
0x2b19192b08080808, 0x2b19192b1908192b, 0x2b19192b192b1908, 0x2b192b0808080819,
|
|
||||||
0x2b192b0808081908, 0x2b192b0808190808, 0x2b192b08082b192b, 0x2b192b0819080808,
|
|
||||||
0x2b192b082b2b2b19, 0x2b192b1908080808, 0x2b192b1919082b19, 0x2b192b191919082b,
|
|
||||||
0x2b192b2b2b190808, 0x2b2b080808080808, 0x2b2b080808081919, 0x2b2b080808082b2b,
|
|
||||||
0x2b2b080808191908, 0x2b2b0808082b082b, 0x2b2b0808082b2b2b, 0x2b2b080819080819,
|
|
||||||
0x2b2b080819081908, 0x2b2b080819190808, 0x2b2b08082b2b082b, 0x2b2b08082b2b2b2b,
|
|
||||||
0x2b2b081919080808, 0x2b2b0819192b1919, 0x2b2b082b0808082b, 0x2b2b082b08082b2b,
|
|
||||||
0x2b2b082b082b082b, 0x2b2b082b082b2b08, 0x2b2b082b082b2b2b, 0x2b2b082b2b08082b,
|
|
||||||
0x2b2b082b2b082b08, 0x2b2b082b2b082b2b, 0x2b2b082b2b2b2b08, 0x2b2b190808080819,
|
|
||||||
0x2b2b190808081908, 0x2b2b190808190808, 0x2b2b190819080808, 0x2b2b19082b082b19,
|
|
||||||
0x2b2b19082b2b1908, 0x2b2b191908080808, 0x2b2b191908192b19, 0x2b2b192b19190819,
|
|
||||||
0x2b2b2b0808082b2b, 0x2b2b2b08082b2b08, 0x2b2b2b082b2b082b, 0x2b2b2b1919191908,
|
|
||||||
0x2b2b2b192b08192b, 0x2b2b2b2b08082b08, 0x2b2b2b2b08082b2b, 0x2b2b2b2b082b0808,
|
|
||||||
0x2b2b2b2b082b082b, 0x2b2b2b2b082b2b08, 0x2b2b2b2b2b082b08, 0x2b2b2b2b2b2b2b2b,
|
|
||||||
};
|
|
||||||
|
|
||||||
constexpr constant static uint32_t iq3xxs_grid[256] = {
|
|
||||||
0x04040404, 0x04040414, 0x04040424, 0x04040c0c, 0x04040c1c, 0x04040c3e, 0x04041404, 0x04041414,
|
|
||||||
0x04041c0c, 0x04042414, 0x04043e1c, 0x04043e2c, 0x040c040c, 0x040c041c, 0x040c0c04, 0x040c0c14,
|
|
||||||
0x040c140c, 0x040c142c, 0x040c1c04, 0x040c1c14, 0x040c240c, 0x040c2c24, 0x040c3e04, 0x04140404,
|
|
||||||
0x04140414, 0x04140424, 0x04140c0c, 0x04141404, 0x04141414, 0x04141c0c, 0x04141c1c, 0x04141c3e,
|
|
||||||
0x04142c0c, 0x04142c3e, 0x04143e2c, 0x041c040c, 0x041c043e, 0x041c0c04, 0x041c0c14, 0x041c142c,
|
|
||||||
0x041c3e04, 0x04240c1c, 0x04241c3e, 0x04242424, 0x04242c3e, 0x04243e1c, 0x04243e2c, 0x042c040c,
|
|
||||||
0x042c043e, 0x042c1c14, 0x042c2c14, 0x04341c2c, 0x04343424, 0x043e0c04, 0x043e0c24, 0x043e0c34,
|
|
||||||
0x043e241c, 0x043e340c, 0x0c04040c, 0x0c04041c, 0x0c040c04, 0x0c040c14, 0x0c04140c, 0x0c04141c,
|
|
||||||
0x0c041c04, 0x0c041c14, 0x0c041c24, 0x0c04243e, 0x0c042c04, 0x0c0c0404, 0x0c0c0414, 0x0c0c0c0c,
|
|
||||||
0x0c0c1404, 0x0c0c1414, 0x0c14040c, 0x0c14041c, 0x0c140c04, 0x0c140c14, 0x0c14140c, 0x0c141c04,
|
|
||||||
0x0c143e14, 0x0c1c0404, 0x0c1c0414, 0x0c1c1404, 0x0c1c1c0c, 0x0c1c2434, 0x0c1c3434, 0x0c24040c,
|
|
||||||
0x0c24042c, 0x0c242c04, 0x0c2c1404, 0x0c2c1424, 0x0c2c2434, 0x0c2c3e0c, 0x0c34042c, 0x0c3e1414,
|
|
||||||
0x0c3e2404, 0x14040404, 0x14040414, 0x14040c0c, 0x14040c1c, 0x14041404, 0x14041414, 0x14041434,
|
|
||||||
0x14041c0c, 0x14042414, 0x140c040c, 0x140c041c, 0x140c042c, 0x140c0c04, 0x140c0c14, 0x140c140c,
|
|
||||||
0x140c1c04, 0x140c341c, 0x140c343e, 0x140c3e04, 0x14140404, 0x14140414, 0x14140c0c, 0x14140c3e,
|
|
||||||
0x14141404, 0x14141414, 0x14141c3e, 0x14142404, 0x14142c2c, 0x141c040c, 0x141c0c04, 0x141c0c24,
|
|
||||||
0x141c3e04, 0x141c3e24, 0x14241c2c, 0x14242c1c, 0x142c041c, 0x142c143e, 0x142c240c, 0x142c3e24,
|
|
||||||
0x143e040c, 0x143e041c, 0x143e0c34, 0x143e242c, 0x1c04040c, 0x1c040c04, 0x1c040c14, 0x1c04140c,
|
|
||||||
0x1c04141c, 0x1c042c04, 0x1c04342c, 0x1c043e14, 0x1c0c0404, 0x1c0c0414, 0x1c0c1404, 0x1c0c1c0c,
|
|
||||||
0x1c0c2424, 0x1c0c2434, 0x1c14040c, 0x1c14041c, 0x1c140c04, 0x1c14142c, 0x1c142c14, 0x1c143e14,
|
|
||||||
0x1c1c0c0c, 0x1c1c1c1c, 0x1c241c04, 0x1c24243e, 0x1c243e14, 0x1c2c0404, 0x1c2c0434, 0x1c2c1414,
|
|
||||||
0x1c2c2c2c, 0x1c340c24, 0x1c341c34, 0x1c34341c, 0x1c3e1c1c, 0x1c3e3404, 0x24040424, 0x24040c3e,
|
|
||||||
0x24041c2c, 0x24041c3e, 0x24042c1c, 0x24042c3e, 0x240c3e24, 0x24141404, 0x24141c3e, 0x24142404,
|
|
||||||
0x24143404, 0x24143434, 0x241c043e, 0x241c242c, 0x24240424, 0x24242c0c, 0x24243424, 0x242c142c,
|
|
||||||
0x242c241c, 0x242c3e04, 0x243e042c, 0x243e0c04, 0x243e0c14, 0x243e1c04, 0x2c040c14, 0x2c04240c,
|
|
||||||
0x2c043e04, 0x2c0c0404, 0x2c0c0434, 0x2c0c1434, 0x2c0c2c2c, 0x2c140c24, 0x2c141c14, 0x2c143e14,
|
|
||||||
0x2c1c0414, 0x2c1c2c1c, 0x2c240c04, 0x2c24141c, 0x2c24143e, 0x2c243e14, 0x2c2c0414, 0x2c2c1c0c,
|
|
||||||
0x2c342c04, 0x2c3e1424, 0x2c3e2414, 0x34041424, 0x34042424, 0x34042434, 0x34043424, 0x340c140c,
|
|
||||||
0x340c340c, 0x34140c3e, 0x34143424, 0x341c1c04, 0x341c1c34, 0x34242424, 0x342c042c, 0x342c2c14,
|
|
||||||
0x34341c1c, 0x343e041c, 0x343e140c, 0x3e04041c, 0x3e04042c, 0x3e04043e, 0x3e040c04, 0x3e041c14,
|
|
||||||
0x3e042c14, 0x3e0c1434, 0x3e0c2404, 0x3e140c14, 0x3e14242c, 0x3e142c14, 0x3e1c0404, 0x3e1c0c2c,
|
|
||||||
0x3e1c1c1c, 0x3e1c3404, 0x3e24140c, 0x3e24240c, 0x3e2c0404, 0x3e2c0414, 0x3e2c1424, 0x3e341c04,
|
|
||||||
};
|
|
||||||
|
|
||||||
constexpr constant static uint32_t iq3s_grid[512] = {
|
|
||||||
0x01010101, 0x01010103, 0x01010105, 0x0101010b, 0x0101010f, 0x01010301, 0x01010303, 0x01010305,
|
|
||||||
0x01010309, 0x0101030d, 0x01010501, 0x01010503, 0x0101050b, 0x01010707, 0x01010901, 0x01010905,
|
|
||||||
0x0101090b, 0x0101090f, 0x01010b03, 0x01010b07, 0x01010d01, 0x01010d05, 0x01010f03, 0x01010f09,
|
|
||||||
0x01010f0f, 0x01030101, 0x01030103, 0x01030105, 0x01030109, 0x01030301, 0x01030303, 0x0103030b,
|
|
||||||
0x01030501, 0x01030507, 0x0103050f, 0x01030703, 0x0103070b, 0x01030909, 0x01030d03, 0x01030d0b,
|
|
||||||
0x01030f05, 0x01050101, 0x01050103, 0x0105010b, 0x0105010f, 0x01050301, 0x01050307, 0x0105030d,
|
|
||||||
0x01050503, 0x0105050b, 0x01050701, 0x01050709, 0x01050905, 0x0105090b, 0x0105090f, 0x01050b03,
|
|
||||||
0x01050b07, 0x01050f01, 0x01050f07, 0x01070107, 0x01070303, 0x0107030b, 0x01070501, 0x01070505,
|
|
||||||
0x01070703, 0x01070707, 0x0107070d, 0x01070909, 0x01070b01, 0x01070b05, 0x01070d0f, 0x01070f03,
|
|
||||||
0x01070f0b, 0x01090101, 0x01090307, 0x0109030f, 0x01090503, 0x01090509, 0x01090705, 0x01090901,
|
|
||||||
0x01090907, 0x01090b03, 0x01090f01, 0x010b0105, 0x010b0109, 0x010b0501, 0x010b0505, 0x010b050d,
|
|
||||||
0x010b0707, 0x010b0903, 0x010b090b, 0x010b090f, 0x010b0d0d, 0x010b0f07, 0x010d010d, 0x010d0303,
|
|
||||||
0x010d0307, 0x010d0703, 0x010d0b05, 0x010d0f03, 0x010f0101, 0x010f0105, 0x010f0109, 0x010f0501,
|
|
||||||
0x010f0505, 0x010f050d, 0x010f0707, 0x010f0b01, 0x010f0b09, 0x03010101, 0x03010103, 0x03010105,
|
|
||||||
0x03010109, 0x03010301, 0x03010303, 0x03010307, 0x0301030b, 0x0301030f, 0x03010501, 0x03010505,
|
|
||||||
0x03010703, 0x03010709, 0x0301070d, 0x03010b09, 0x03010b0d, 0x03010d03, 0x03010f05, 0x03030101,
|
|
||||||
0x03030103, 0x03030107, 0x0303010d, 0x03030301, 0x03030309, 0x03030503, 0x03030701, 0x03030707,
|
|
||||||
0x03030903, 0x03030b01, 0x03030b05, 0x03030f01, 0x03030f0d, 0x03050101, 0x03050305, 0x0305030b,
|
|
||||||
0x0305030f, 0x03050501, 0x03050509, 0x03050705, 0x03050901, 0x03050907, 0x03050b0b, 0x03050d01,
|
|
||||||
0x03050f05, 0x03070103, 0x03070109, 0x0307010f, 0x03070301, 0x03070307, 0x03070503, 0x0307050f,
|
|
||||||
0x03070701, 0x03070709, 0x03070903, 0x03070d05, 0x03070f01, 0x03090107, 0x0309010b, 0x03090305,
|
|
||||||
0x03090309, 0x03090703, 0x03090707, 0x03090905, 0x0309090d, 0x03090b01, 0x03090b09, 0x030b0103,
|
|
||||||
0x030b0301, 0x030b0307, 0x030b0503, 0x030b0701, 0x030b0705, 0x030b0b03, 0x030d0501, 0x030d0509,
|
|
||||||
0x030d050f, 0x030d0909, 0x030d090d, 0x030f0103, 0x030f0107, 0x030f0301, 0x030f0305, 0x030f0503,
|
|
||||||
0x030f070b, 0x030f0903, 0x030f0d05, 0x030f0f01, 0x05010101, 0x05010103, 0x05010107, 0x0501010b,
|
|
||||||
0x0501010f, 0x05010301, 0x05010305, 0x05010309, 0x0501030d, 0x05010503, 0x05010507, 0x0501050f,
|
|
||||||
0x05010701, 0x05010705, 0x05010903, 0x05010907, 0x0501090b, 0x05010b01, 0x05010b05, 0x05010d0f,
|
|
||||||
0x05010f01, 0x05010f07, 0x05010f0b, 0x05030101, 0x05030105, 0x05030301, 0x05030307, 0x0503030f,
|
|
||||||
0x05030505, 0x0503050b, 0x05030703, 0x05030709, 0x05030905, 0x05030b03, 0x05050103, 0x05050109,
|
|
||||||
0x0505010f, 0x05050503, 0x05050507, 0x05050701, 0x0505070f, 0x05050903, 0x05050b07, 0x05050b0f,
|
|
||||||
0x05050f03, 0x05050f09, 0x05070101, 0x05070105, 0x0507010b, 0x05070303, 0x05070505, 0x05070509,
|
|
||||||
0x05070703, 0x05070707, 0x05070905, 0x05070b01, 0x05070d0d, 0x05090103, 0x0509010f, 0x05090501,
|
|
||||||
0x05090507, 0x05090705, 0x0509070b, 0x05090903, 0x05090f05, 0x05090f0b, 0x050b0109, 0x050b0303,
|
|
||||||
0x050b0505, 0x050b070f, 0x050b0901, 0x050b0b07, 0x050b0f01, 0x050d0101, 0x050d0105, 0x050d010f,
|
|
||||||
0x050d0503, 0x050d0b0b, 0x050d0d03, 0x050f010b, 0x050f0303, 0x050f050d, 0x050f0701, 0x050f0907,
|
|
||||||
0x050f0b01, 0x07010105, 0x07010303, 0x07010307, 0x0701030b, 0x0701030f, 0x07010505, 0x07010703,
|
|
||||||
0x07010707, 0x0701070b, 0x07010905, 0x07010909, 0x0701090f, 0x07010b03, 0x07010d07, 0x07010f03,
|
|
||||||
0x07030103, 0x07030107, 0x0703010b, 0x07030309, 0x07030503, 0x07030507, 0x07030901, 0x07030d01,
|
|
||||||
0x07030f05, 0x07030f0d, 0x07050101, 0x07050305, 0x07050501, 0x07050705, 0x07050709, 0x07050b01,
|
|
||||||
0x07070103, 0x07070301, 0x07070309, 0x07070503, 0x07070507, 0x0707050f, 0x07070701, 0x07070903,
|
|
||||||
0x07070907, 0x0707090f, 0x07070b0b, 0x07070f07, 0x07090107, 0x07090303, 0x0709030d, 0x07090505,
|
|
||||||
0x07090703, 0x07090b05, 0x07090d01, 0x07090d09, 0x070b0103, 0x070b0301, 0x070b0305, 0x070b050b,
|
|
||||||
0x070b0705, 0x070b0909, 0x070b0b0d, 0x070b0f07, 0x070d030d, 0x070d0903, 0x070f0103, 0x070f0107,
|
|
||||||
0x070f0501, 0x070f0505, 0x070f070b, 0x09010101, 0x09010109, 0x09010305, 0x09010501, 0x09010509,
|
|
||||||
0x0901050f, 0x09010705, 0x09010903, 0x09010b01, 0x09010f01, 0x09030105, 0x0903010f, 0x09030303,
|
|
||||||
0x09030307, 0x09030505, 0x09030701, 0x0903070b, 0x09030907, 0x09030b03, 0x09030b0b, 0x09050103,
|
|
||||||
0x09050107, 0x09050301, 0x0905030b, 0x09050503, 0x09050707, 0x09050901, 0x09050b0f, 0x09050d05,
|
|
||||||
0x09050f01, 0x09070109, 0x09070303, 0x09070307, 0x09070501, 0x09070505, 0x09070703, 0x0907070b,
|
|
||||||
0x09090101, 0x09090105, 0x09090509, 0x0909070f, 0x09090901, 0x09090f03, 0x090b010b, 0x090b010f,
|
|
||||||
0x090b0503, 0x090b0d05, 0x090d0307, 0x090d0709, 0x090d0d01, 0x090f0301, 0x090f030b, 0x090f0701,
|
|
||||||
0x090f0907, 0x090f0b03, 0x0b010105, 0x0b010301, 0x0b010309, 0x0b010505, 0x0b010901, 0x0b010909,
|
|
||||||
0x0b01090f, 0x0b010b05, 0x0b010d0d, 0x0b010f09, 0x0b030103, 0x0b030107, 0x0b03010b, 0x0b030305,
|
|
||||||
0x0b030503, 0x0b030705, 0x0b030f05, 0x0b050101, 0x0b050303, 0x0b050507, 0x0b050701, 0x0b05070d,
|
|
||||||
0x0b050b07, 0x0b070105, 0x0b07010f, 0x0b070301, 0x0b07050f, 0x0b070909, 0x0b070b03, 0x0b070d0b,
|
|
||||||
0x0b070f07, 0x0b090103, 0x0b090109, 0x0b090501, 0x0b090705, 0x0b09090d, 0x0b0b0305, 0x0b0b050d,
|
|
||||||
0x0b0b0b03, 0x0b0b0b07, 0x0b0d0905, 0x0b0f0105, 0x0b0f0109, 0x0b0f0505, 0x0d010303, 0x0d010307,
|
|
||||||
0x0d01030b, 0x0d010703, 0x0d010707, 0x0d010d01, 0x0d030101, 0x0d030501, 0x0d03050f, 0x0d030d09,
|
|
||||||
0x0d050305, 0x0d050709, 0x0d050905, 0x0d050b0b, 0x0d050d05, 0x0d050f01, 0x0d070101, 0x0d070309,
|
|
||||||
0x0d070503, 0x0d070901, 0x0d09050b, 0x0d090907, 0x0d090d05, 0x0d0b0101, 0x0d0b0107, 0x0d0b0709,
|
|
||||||
0x0d0b0d01, 0x0d0d010b, 0x0d0d0901, 0x0d0f0303, 0x0d0f0307, 0x0f010101, 0x0f010109, 0x0f01010f,
|
|
||||||
0x0f010501, 0x0f010505, 0x0f01070d, 0x0f010901, 0x0f010b09, 0x0f010d05, 0x0f030105, 0x0f030303,
|
|
||||||
0x0f030509, 0x0f030907, 0x0f03090b, 0x0f050103, 0x0f050109, 0x0f050301, 0x0f05030d, 0x0f050503,
|
|
||||||
0x0f050701, 0x0f050b03, 0x0f070105, 0x0f070705, 0x0f07070b, 0x0f070b07, 0x0f090103, 0x0f09010b,
|
|
||||||
0x0f090307, 0x0f090501, 0x0f090b01, 0x0f0b0505, 0x0f0b0905, 0x0f0d0105, 0x0f0d0703, 0x0f0f0101,
|
|
||||||
};
|
|
||||||
|
|
||||||
#define NGRID_IQ1S 512
|
|
||||||
constexpr constant static uint64_t iq1s_grid[NGRID_IQ1S] = {
|
|
||||||
0xffffffffffff0101, 0xffffffffff01ff00, 0xffffffffff010100, 0xffffffff00000000,
|
|
||||||
0xffffffff01ff00ff, 0xffffffff01ff0001, 0xffffffff0101ffff, 0xffffffff0101ff01,
|
|
||||||
0xffffff00ff000000, 0xffffff000000ff00, 0xffffff00000000ff, 0xffffff0000000100,
|
|
||||||
0xffffff0000010000, 0xffffff0001000000, 0xffffff01ffff00ff, 0xffffff01ff01ff00,
|
|
||||||
0xffffff01ff010100, 0xffffff0100000001, 0xffffff0101ffff00, 0xffffff0101ff0101,
|
|
||||||
0xffffff0101010100, 0xffff00ffff00ff01, 0xffff00ffff0000ff, 0xffff00ff00ff0100,
|
|
||||||
0xffff00ff0100ff00, 0xffff00ff010001ff, 0xffff0000ff0101ff, 0xffff000000ffff00,
|
|
||||||
0xffff000000000000, 0xffff00000001ff01, 0xffff000001000101, 0xffff0000010100ff,
|
|
||||||
0xffff0001ffff0100, 0xffff00010000ff00, 0xffff000100010101, 0xffff000101000000,
|
|
||||||
0xffff01ffffff0000, 0xffff01ffff01ffff, 0xffff01ffff010100, 0xffff01ff00000000,
|
|
||||||
0xffff01ff01ffffff, 0xffff01ff01ff0001, 0xffff01ff0101ffff, 0xffff01ff01010001,
|
|
||||||
0xffff0100ffffff01, 0xffff01000000ffff, 0xffff010000000100, 0xffff010001ff01ff,
|
|
||||||
0xffff010001000000, 0xffff0101ff000000, 0xffff0101000101ff, 0xffff010101ffff01,
|
|
||||||
0xffff01010101ff00, 0xff00ffffff000000, 0xff00ffff00ffff00, 0xff00ffff00000001,
|
|
||||||
0xff00ffff000001ff, 0xff00ffff01010000, 0xff00ff00ffff0000, 0xff00ff00ff00ff00,
|
|
||||||
0xff00ff00ff0000ff, 0xff00ff00ff000100, 0xff00ff00ff010001, 0xff00ff0000ff0001,
|
|
||||||
0xff00ff000000ffff, 0xff00ff0000000000, 0xff00ff000001ff00, 0xff00ff0000010100,
|
|
||||||
0xff00ff0001ff0000, 0xff00ff000100ff00, 0xff00ff0001000100, 0xff00ff01ff000000,
|
|
||||||
0xff00ff0100ff0000, 0xff00ff01000001ff, 0xff00ff0101010001, 0xff0000ff00000000,
|
|
||||||
0xff0000ff0001ff00, 0xff0000ff00010100, 0xff000000ffff0101, 0xff000000ff000000,
|
|
||||||
0xff000000ff01ff00, 0xff00000000ff0000, 0xff0000000000ff00, 0xff000000000000ff,
|
|
||||||
0xff00000000000000, 0xff00000000000001, 0xff00000000000100, 0xff0000000001ffff,
|
|
||||||
0xff00000000010000, 0xff00000001000000, 0xff00000001010100, 0xff000001ff00ff01,
|
|
||||||
0xff000001ff0100ff, 0xff00000100000000, 0xff0000010001ff00, 0xff00000101ff0100,
|
|
||||||
0xff0000010100ff00, 0xff0001ff00ff00ff, 0xff0001ff00000101, 0xff0001ff000100ff,
|
|
||||||
0xff0001ff01000000, 0xff000100ff0001ff, 0xff0001000000ff01, 0xff00010000000000,
|
|
||||||
0xff00010000010001, 0xff00010000010100, 0xff00010001ffff00, 0xff00010001ff0101,
|
|
||||||
0xff00010001010000, 0xff000101ffffffff, 0xff000101ff000101, 0xff00010101ff00ff,
|
|
||||||
0xff00010101000001, 0xff000101010100ff, 0xff01ffffff000101, 0xff01ffffff01ffff,
|
|
||||||
0xff01ffffff01ff01, 0xff01ffffff0101ff, 0xff01ffff00000000, 0xff01ffff01ff0001,
|
|
||||||
0xff01ffff0101ff01, 0xff01ff00ff000000, 0xff01ff0000ff0100, 0xff01ff000000ff01,
|
|
||||||
0xff01ff0000010000, 0xff01ff00010000ff, 0xff01ff01ff01ff00, 0xff01ff0100000101,
|
|
||||||
0xff0100ffffff0000, 0xff0100ffff010000, 0xff0100ff01ff00ff, 0xff0100ff01000100,
|
|
||||||
0xff0100ff010100ff, 0xff010000ffffff01, 0xff01000000000000, 0xff0100000101ff00,
|
|
||||||
0xff010001ffff00ff, 0xff010001ff000100, 0xff01000100ffff00, 0xff01000100010001,
|
|
||||||
0xff01000101ff0001, 0xff010001010001ff, 0xff0101ffffffffff, 0xff0101ffff01ffff,
|
|
||||||
0xff0101ffff010101, 0xff0101ff0000ff00, 0xff0101ff01010001, 0xff010100ff000000,
|
|
||||||
0xff010100ff01ff01, 0xff01010000ff0001, 0xff01010000000100, 0xff01010001000000,
|
|
||||||
0xff0101010100ffff, 0x00ffffff0000ff01, 0x00ffffff000000ff, 0x00ffffff00000100,
|
|
||||||
0x00ffffff00010000, 0x00ffff00ffff0001, 0x00ffff00ff0000ff, 0x00ffff00ff000100,
|
|
||||||
0x00ffff0000000000, 0x00ffff0001000100, 0x00ffff0001010001, 0x00ffff01ff00ff01,
|
|
||||||
0x00ffff0100ff0100, 0x00ffff010000ff00, 0x00ffff01000100ff, 0x00ffff0101ff00ff,
|
|
||||||
0x00ffff010101ff00, 0x00ff00ffffffffff, 0x00ff00ffffff01ff, 0x00ff00ffff000101,
|
|
||||||
0x00ff00ff00000000, 0x00ff00ff000101ff, 0x00ff00ff01010101, 0x00ff0000ff000000,
|
|
||||||
0x00ff0000ff01ffff, 0x00ff000000ff0000, 0x00ff00000000ff00, 0x00ff0000000000ff,
|
|
||||||
0x00ff000000000000, 0x00ff000000000001, 0x00ff000000000100, 0x00ff000000010000,
|
|
||||||
0x00ff000001ffff01, 0x00ff000001000000, 0x00ff0001ff000101, 0x00ff000100ffffff,
|
|
||||||
0x00ff000100000000, 0x00ff0001010001ff, 0x00ff01ffff000000, 0x00ff01ff0001ff00,
|
|
||||||
0x00ff01ff01ff0100, 0x00ff0100ff01ff01, 0x00ff010000ff00ff, 0x00ff010000ff0101,
|
|
||||||
0x00ff010000000000, 0x00ff010000010101, 0x00ff01000100ff00, 0x00ff010001010000,
|
|
||||||
0x00ff0101ffffff00, 0x00ff01010000ff01, 0x00ff010100000100, 0x00ff010101ff0000,
|
|
||||||
0x0000ffffffff0100, 0x0000ffffff00ff00, 0x0000ffffff0000ff, 0x0000ffffff010000,
|
|
||||||
0x0000ffff00000000, 0x0000ffff00010101, 0x0000ffff01ffff01, 0x0000ffff01000100,
|
|
||||||
0x0000ff00ff000000, 0x0000ff00ff01ff00, 0x0000ff00ff0101ff, 0x0000ff0000ff0000,
|
|
||||||
0x0000ff000000ff00, 0x0000ff00000000ff, 0x0000ff0000000000, 0x0000ff0000000001,
|
|
||||||
0x0000ff0000000100, 0x0000ff0000010000, 0x0000ff0001ffffff, 0x0000ff0001ff01ff,
|
|
||||||
0x0000ff0001000000, 0x0000ff000101ffff, 0x0000ff01ffff0101, 0x0000ff01ff010000,
|
|
||||||
0x0000ff0100000000, 0x0000ff0101000101, 0x000000ffffff0001, 0x000000ffff000000,
|
|
||||||
0x000000ff00ff0000, 0x000000ff0000ff00, 0x000000ff000000ff, 0x000000ff00000000,
|
|
||||||
0x000000ff00000001, 0x000000ff00000100, 0x000000ff00010000, 0x000000ff01000000,
|
|
||||||
0x000000ff0101ff00, 0x00000000ffff0000, 0x00000000ff00ff00, 0x00000000ff0000ff,
|
|
||||||
0x00000000ff000000, 0x00000000ff000001, 0x00000000ff000100, 0x00000000ff010000,
|
|
||||||
0x0000000000ffff00, 0x0000000000ff00ff, 0x0000000000ff0000, 0x0000000000ff0001,
|
|
||||||
0x0000000000ff0100, 0x000000000000ffff, 0x000000000000ff00, 0x000000000000ff01,
|
|
||||||
0x00000000000000ff, 0x0000000000000001, 0x00000000000001ff, 0x0000000000000100,
|
|
||||||
0x0000000000000101, 0x000000000001ff00, 0x00000000000100ff, 0x0000000000010000,
|
|
||||||
0x0000000000010001, 0x0000000000010100, 0x0000000001ff0000, 0x000000000100ff00,
|
|
||||||
0x00000000010000ff, 0x0000000001000000, 0x0000000001000001, 0x0000000001000100,
|
|
||||||
0x0000000001010000, 0x00000001ffff01ff, 0x00000001ff000000, 0x0000000100ff0000,
|
|
||||||
0x000000010000ff00, 0x00000001000000ff, 0x0000000100000000, 0x0000000100000001,
|
|
||||||
0x0000000100000100, 0x0000000100010000, 0x0000000101000000, 0x000001ffff00ff00,
|
|
||||||
0x000001ffff010001, 0x000001ffff0101ff, 0x000001ff00ffff01, 0x000001ff0000ffff,
|
|
||||||
0x000001ff00000000, 0x000001ff010000ff, 0x000001ff01010100, 0x00000100ffff0100,
|
|
||||||
0x00000100ff000000, 0x0000010000ff0000, 0x000001000000ff00, 0x00000100000000ff,
|
|
||||||
0x0000010000000000, 0x0000010000000001, 0x0000010000000100, 0x0000010000010000,
|
|
||||||
0x0000010001000000, 0x000001000101ff01, 0x00000101ffff0001, 0x00000101ff01ffff,
|
|
||||||
0x0000010100000000, 0x0000010101010100, 0x0001ffffff000000, 0x0001ffff00ffffff,
|
|
||||||
0x0001ffff00000100, 0x0001ffff0001ff00, 0x0001ffff01000000, 0x0001ff00ffffff00,
|
|
||||||
0x0001ff00ffff01ff, 0x0001ff00ff010000, 0x0001ff0000000000, 0x0001ff0000010001,
|
|
||||||
0x0001ff0001ff0000, 0x0001ff0001010100, 0x0001ff01ff0000ff, 0x0001ff01ff000001,
|
|
||||||
0x0001ff0100ffffff, 0x0001ff010001ffff, 0x0001ff01000101ff, 0x0001ff010100ff01,
|
|
||||||
0x000100ffff00ffff, 0x000100ffff00ff01, 0x000100ffff000100, 0x000100ff00000000,
|
|
||||||
0x000100ff000101ff, 0x000100ff01ff0101, 0x000100ff0100ffff, 0x000100ff01010101,
|
|
||||||
0x00010000ff000000, 0x00010000ff010100, 0x0001000000ff0000, 0x000100000000ff00,
|
|
||||||
0x00010000000000ff, 0x0001000000000000, 0x0001000000000001, 0x0001000000000100,
|
|
||||||
0x0001000000010000, 0x0001000001ffff01, 0x0001000001000000, 0x0001000100ff0101,
|
|
||||||
0x0001000100000000, 0x00010001010100ff, 0x000101ffffff01ff, 0x000101ffffff0101,
|
|
||||||
0x000101ff00010000, 0x000101ff01ff0000, 0x000101ff0100ff01, 0x00010100ffff0000,
|
|
||||||
0x0001010000000000, 0x000101000001ffff, 0x0001010000010101, 0x00010100010001ff,
|
|
||||||
0x00010101ff00ff00, 0x00010101ff010001, 0x0001010100ffffff, 0x0001010100ff01ff,
|
|
||||||
0x00010101000101ff, 0x0001010101ff0000, 0x000101010100ff01, 0x0001010101000101,
|
|
||||||
0x01ffffffffff0101, 0x01ffffffff01ffff, 0x01ffffffff01ff01, 0x01ffffffff0101ff,
|
|
||||||
0x01ffffffff010101, 0x01ffffff00000000, 0x01ffffff01ff01ff, 0x01ffffff01000101,
|
|
||||||
0x01ffffff0101ff01, 0x01ffffff010100ff, 0x01ffff000000ff00, 0x01ffff0000000001,
|
|
||||||
0x01ffff00000001ff, 0x01ffff0000010000, 0x01ffff0001ff0000, 0x01ffff01ffffffff,
|
|
||||||
0x01ffff01ffff01ff, 0x01ffff01ff000000, 0x01ffff01ff01ffff, 0x01ffff01ff0101ff,
|
|
||||||
0x01ffff010100ffff, 0x01ff00ffffff0000, 0x01ff00ffff010000, 0x01ff00ff00ffff01,
|
|
||||||
0x01ff0000ff0000ff, 0x01ff000000000000, 0x01ff00000001ff01, 0x01ff000001ffffff,
|
|
||||||
0x01ff000001010100, 0x01ff0001ffffff01, 0x01ff0001ff010001, 0x01ff000101ff0100,
|
|
||||||
0x01ff000101000001, 0x01ff0001010100ff, 0x01ff01ffff00ffff, 0x01ff01ff00010001,
|
|
||||||
0x01ff01ff01000000, 0x01ff01ff010101ff, 0x01ff0100ff000001, 0x01ff010000ffff00,
|
|
||||||
0x01ff010000000100, 0x01ff010001ff01ff, 0x01ff01000101ffff, 0x01ff0101ffff00ff,
|
|
||||||
0x01ff0101ffff0101, 0x01ff0101ff0101ff, 0x01ff010100010000, 0x0100ffff00ff00ff,
|
|
||||||
0x0100ffff00ff0001, 0x0100ffff00000100, 0x0100ffff0100ff00, 0x0100ff00ffff0000,
|
|
||||||
0x0100ff00ff00ffff, 0x0100ff00ff00ff01, 0x0100ff00ff000100, 0x0100ff00ff010000,
|
|
||||||
0x0100ff0000000000, 0x0100ff00000100ff, 0x0100ff0001ff0101, 0x0100ff0001010101,
|
|
||||||
0x0100ff0100ff00ff, 0x0100ff0100ff0001, 0x0100ff0100000100, 0x0100ff0100010001,
|
|
||||||
0x0100ff0101000000, 0x010000ffff00ff00, 0x010000ff0000ffff, 0x010000ff00000000,
|
|
||||||
0x010000ff010001ff, 0x010000ff01010001, 0x01000000ffffff00, 0x01000000ffff0101,
|
|
||||||
0x01000000ff000000, 0x01000000ff0100ff, 0x01000000ff010101, 0x0100000000ff0000,
|
|
||||||
0x010000000000ff00, 0x01000000000000ff, 0x0100000000000000, 0x0100000000000001,
|
|
||||||
0x0100000000000100, 0x0100000000010000, 0x0100000001000000, 0x0100000100000000,
|
|
||||||
0x01000001000101ff, 0x0100000101ffff01, 0x010001ffff000101, 0x010001ff00ff0100,
|
|
||||||
0x010001ff0000ff00, 0x010001ff000100ff, 0x010001ff01ffffff, 0x01000100ffff0000,
|
|
||||||
0x01000100ff0001ff, 0x0100010000000000, 0x010001000001ff00, 0x0100010001ff0000,
|
|
||||||
0x01000100010000ff, 0x0100010001000101, 0x01000101ff00ff01, 0x0100010100ff0100,
|
|
||||||
0x010001010000ffff, 0x0100010101010001, 0x0101ffffffff0101, 0x0101ffffff0001ff,
|
|
||||||
0x0101ffffff01ffff, 0x0101ffffff010101, 0x0101ffff00000000, 0x0101ffff0101ffff,
|
|
||||||
0x0101ffff010101ff, 0x0101ff00ff000000, 0x0101ff0000ff0100, 0x0101ff000000ff00,
|
|
||||||
0x0101ff0000010000, 0x0101ff00010000ff, 0x0101ff0001000001, 0x0101ff01ff010101,
|
|
||||||
0x0101ff0100000000, 0x0101ff010101ff00, 0x010100ffffff0000, 0x010100ffff010000,
|
|
||||||
0x010100ff00ff01ff, 0x010100ff000000ff, 0x010100ff00000101, 0x010100ff01ffff00,
|
|
||||||
0x01010000ffffff01, 0x01010000ff000100, 0x01010000ff01ff01, 0x0101000000000000,
|
|
||||||
0x01010000000100ff, 0x010100000101ff01, 0x01010001ffff0000, 0x01010001ff00ffff,
|
|
||||||
0x01010001ff010000, 0x0101000101ffffff, 0x0101000101ff01ff, 0x0101000101010101,
|
|
||||||
0x010101ffff01ffff, 0x010101ff00000000, 0x010101ff0001ff01, 0x010101ff0101ffff,
|
|
||||||
0x010101ff010101ff, 0x01010100ffffffff, 0x01010100ff000001, 0x010101000000ff00,
|
|
||||||
0x0101010001010000, 0x0101010100ff0001, 0x010101010001ff01, 0x010101010101ffff,
|
|
||||||
};
|
|
||||||
|
|
||||||
constexpr constant static uint8_t ksigns_iq2xs[128] = {
|
|
||||||
0, 129, 130, 3, 132, 5, 6, 135, 136, 9, 10, 139, 12, 141, 142, 15,
|
|
||||||
144, 17, 18, 147, 20, 149, 150, 23, 24, 153, 154, 27, 156, 29, 30, 159,
|
|
||||||
160, 33, 34, 163, 36, 165, 166, 39, 40, 169, 170, 43, 172, 45, 46, 175,
|
|
||||||
48, 177, 178, 51, 180, 53, 54, 183, 184, 57, 58, 187, 60, 189, 190, 63,
|
|
||||||
192, 65, 66, 195, 68, 197, 198, 71, 72, 201, 202, 75, 204, 77, 78, 207,
|
|
||||||
80, 209, 210, 83, 212, 85, 86, 215, 216, 89, 90, 219, 92, 221, 222, 95,
|
|
||||||
96, 225, 226, 99, 228, 101, 102, 231, 232, 105, 106, 235, 108, 237, 238, 111,
|
|
||||||
240, 113, 114, 243, 116, 245, 246, 119, 120, 249, 250, 123, 252, 125, 126, 255,
|
|
||||||
};
|
|
||||||
|
|
||||||
constexpr constant static uint8_t kmask_iq2xs[8] = {1, 2, 4, 8, 16, 32, 64, 128};
|
|
||||||
|
|
||||||
void kernel_mul_mv_iq2_xxs_f32_impl(
|
void kernel_mul_mv_iq2_xxs_f32_impl(
|
||||||
device const void * src0,
|
device const void * src0,
|
||||||
device const float * src1,
|
device const float * src1,
|
||||||
|
|
859
ggml-quants.c
859
ggml-quants.c
File diff suppressed because it is too large
Load diff
|
@ -1,9 +1,9 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "ggml-impl.h"
|
|
||||||
|
|
||||||
// GGML internal header
|
// GGML internal header
|
||||||
|
|
||||||
|
#include "ggml-impl.h"
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
|
||||||
|
@ -261,6 +261,7 @@ void quantize_row_q4_K_reference(const float * GGML_RESTRICT x, block_q4_K * GGM
|
||||||
void quantize_row_q5_K_reference(const float * GGML_RESTRICT x, block_q5_K * GGML_RESTRICT y, int k);
|
void quantize_row_q5_K_reference(const float * GGML_RESTRICT x, block_q5_K * GGML_RESTRICT y, int k);
|
||||||
void quantize_row_q6_K_reference(const float * GGML_RESTRICT x, block_q6_K * GGML_RESTRICT y, int k);
|
void quantize_row_q6_K_reference(const float * GGML_RESTRICT x, block_q6_K * GGML_RESTRICT y, int k);
|
||||||
void quantize_row_q8_K_reference(const float * GGML_RESTRICT x, block_q8_K * GGML_RESTRICT y, int k);
|
void quantize_row_q8_K_reference(const float * GGML_RESTRICT x, block_q8_K * GGML_RESTRICT y, int k);
|
||||||
|
|
||||||
void quantize_row_iq3_xxs_reference(const float * GGML_RESTRICT x, block_iq3_xxs * GGML_RESTRICT y, int k);
|
void quantize_row_iq3_xxs_reference(const float * GGML_RESTRICT x, block_iq3_xxs * GGML_RESTRICT y, int k);
|
||||||
void quantize_row_iq4_nl_reference (const float * GGML_RESTRICT x, block_iq4_nl * GGML_RESTRICT y, int k);
|
void quantize_row_iq4_nl_reference (const float * GGML_RESTRICT x, block_iq4_nl * GGML_RESTRICT y, int k);
|
||||||
void quantize_row_iq4_xs_reference (const float * GGML_RESTRICT x, block_iq4_xs * GGML_RESTRICT y, int k);
|
void quantize_row_iq4_xs_reference (const float * GGML_RESTRICT x, block_iq4_xs * GGML_RESTRICT y, int k);
|
||||||
|
@ -280,6 +281,7 @@ void quantize_row_q4_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, in
|
||||||
void quantize_row_q5_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
|
void quantize_row_q5_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
|
||||||
void quantize_row_q6_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
|
void quantize_row_q6_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
|
||||||
void quantize_row_q8_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
|
void quantize_row_q8_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
|
||||||
|
|
||||||
void quantize_row_iq3_xxs(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
|
void quantize_row_iq3_xxs(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
|
||||||
void quantize_row_iq4_nl (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
|
void quantize_row_iq4_nl (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
|
||||||
void quantize_row_iq4_xs (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
|
void quantize_row_iq4_xs (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
|
||||||
|
@ -300,6 +302,7 @@ void dequantize_row_q4_K(const block_q4_K * GGML_RESTRICT x, float * GGML_RESTRI
|
||||||
void dequantize_row_q5_K(const block_q5_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
|
void dequantize_row_q5_K(const block_q5_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
|
||||||
void dequantize_row_q6_K(const block_q6_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
|
void dequantize_row_q6_K(const block_q6_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
|
||||||
void dequantize_row_q8_K(const block_q8_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
|
void dequantize_row_q8_K(const block_q8_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
|
||||||
|
|
||||||
void dequantize_row_iq2_xxs(const block_iq2_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
|
void dequantize_row_iq2_xxs(const block_iq2_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
|
||||||
void dequantize_row_iq2_xs (const block_iq2_xs * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
|
void dequantize_row_iq2_xs (const block_iq2_xs * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
|
||||||
void dequantize_row_iq2_s (const block_iq2_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
|
void dequantize_row_iq2_s (const block_iq2_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
|
||||||
|
@ -321,6 +324,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
|
||||||
void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
|
||||||
void ggml_vec_dot_iq2_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
void ggml_vec_dot_iq2_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
void ggml_vec_dot_iq2_xs_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
void ggml_vec_dot_iq2_xs_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
void ggml_vec_dot_iq2_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
void ggml_vec_dot_iq2_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
@ -330,26 +334,26 @@ void ggml_vec_dot_iq4_nl_q8_0 (int n, float * GGML_RESTRICT s, size_t bs, const
|
||||||
void ggml_vec_dot_iq4_xs_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
void ggml_vec_dot_iq4_xs_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
void ggml_vec_dot_iq3_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
void ggml_vec_dot_iq3_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||||
|
|
||||||
//
|
|
||||||
// Quantization utilizing an importance matrix (a.k.a. "Activation aWare Quantization")
|
// Quantization utilizing an importance matrix (a.k.a. "Activation aWare Quantization")
|
||||||
//
|
size_t quantize_iq2_xxs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||||
size_t quantize_iq2_xxs(const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
size_t quantize_iq2_xs (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||||
size_t quantize_iq2_xs (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
size_t quantize_iq2_s (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||||
size_t quantize_iq2_s (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
size_t quantize_iq3_xxs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||||
size_t quantize_iq3_xxs(const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
size_t quantize_iq1_s (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||||
size_t quantize_iq1_s (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
size_t quantize_iq4_nl (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||||
size_t quantize_iq4_nl (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
size_t quantize_iq4_xs (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||||
size_t quantize_iq4_xs (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
size_t quantize_iq3_s (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||||
size_t quantize_iq3_s (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
|
||||||
size_t quantize_q2_K (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
size_t quantize_q2_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||||
size_t quantize_q3_K (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
size_t quantize_q3_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||||
size_t quantize_q4_K (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
size_t quantize_q4_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||||
size_t quantize_q5_K (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
size_t quantize_q5_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||||
size_t quantize_q6_K (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
size_t quantize_q6_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||||
size_t quantize_q4_0 (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
size_t quantize_q4_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||||
size_t quantize_q4_1 (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
size_t quantize_q4_1(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||||
size_t quantize_q5_0 (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
size_t quantize_q5_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||||
size_t quantize_q5_1 (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
size_t quantize_q5_1(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||||
|
size_t quantize_q8_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||||
|
|
||||||
void iq2xs_init_impl(enum ggml_type type);
|
void iq2xs_init_impl(enum ggml_type type);
|
||||||
void iq2xs_free_impl(enum ggml_type type);
|
void iq2xs_free_impl(enum ggml_type type);
|
||||||
|
|
384
ggml-sycl.cpp
384
ggml-sycl.cpp
|
@ -3144,6 +3144,8 @@ namespace dpct
|
||||||
|
|
||||||
} // COPY from DPCT head files
|
} // COPY from DPCT head files
|
||||||
|
|
||||||
|
#define GGML_COMMON_IMPL_SYCL
|
||||||
|
#include "ggml-common.h"
|
||||||
|
|
||||||
static int g_ggml_sycl_debug=0;
|
static int g_ggml_sycl_debug=0;
|
||||||
#define GGML_SYCL_DEBUG(...) do{if(g_ggml_sycl_debug) printf(__VA_ARGS__);}while(0)
|
#define GGML_SYCL_DEBUG(...) do{if(g_ggml_sycl_debug) printf(__VA_ARGS__);}while(0)
|
||||||
|
@ -4745,388 +4747,6 @@ static void dequantize_block_q6_K(const void * __restrict__ vx, dst_t * __restri
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static dpct::global_memory<const uint64_t, 1>
|
|
||||||
iq2xxs_grid(sycl::range<1>(256),
|
|
||||||
{
|
|
||||||
0x0808080808080808, 0x080808080808082b, 0x0808080808081919,
|
|
||||||
0x0808080808082b08, 0x0808080808082b2b, 0x0808080808190819,
|
|
||||||
0x0808080808191908, 0x08080808082b0808, 0x08080808082b082b,
|
|
||||||
0x08080808082b2b08, 0x08080808082b2b2b, 0x0808080819080819,
|
|
||||||
0x0808080819081908, 0x0808080819190808, 0x0808080819192b08,
|
|
||||||
0x08080808192b0819, 0x08080808192b1908, 0x080808082b080808,
|
|
||||||
0x080808082b08082b, 0x080808082b082b2b, 0x080808082b2b082b,
|
|
||||||
0x0808081908080819, 0x0808081908081908, 0x0808081908190808,
|
|
||||||
0x0808081908191919, 0x0808081919080808, 0x080808192b081908,
|
|
||||||
0x080808192b192b08, 0x0808082b08080808, 0x0808082b0808082b,
|
|
||||||
0x0808082b082b082b, 0x0808082b2b08082b, 0x0808190808080819,
|
|
||||||
0x0808190808081908, 0x0808190808190808, 0x08081908082b0819,
|
|
||||||
0x08081908082b1908, 0x0808190819080808, 0x080819081908082b,
|
|
||||||
0x0808190819082b08, 0x08081908192b0808, 0x080819082b080819,
|
|
||||||
0x080819082b081908, 0x080819082b190808, 0x080819082b2b1908,
|
|
||||||
0x0808191908080808, 0x080819190808082b, 0x0808191908082b08,
|
|
||||||
0x08081919082b0808, 0x080819191908192b, 0x08081919192b2b19,
|
|
||||||
0x080819192b080808, 0x080819192b190819, 0x0808192b08082b19,
|
|
||||||
0x0808192b08190808, 0x0808192b19080808, 0x0808192b2b081908,
|
|
||||||
0x0808192b2b2b1908, 0x08082b0808080808, 0x08082b0808081919,
|
|
||||||
0x08082b0808082b08, 0x08082b0808191908, 0x08082b08082b2b08,
|
|
||||||
0x08082b0819080819, 0x08082b0819081908, 0x08082b0819190808,
|
|
||||||
0x08082b081919082b, 0x08082b082b082b08, 0x08082b1908081908,
|
|
||||||
0x08082b1919080808, 0x08082b2b0808082b, 0x08082b2b08191908,
|
|
||||||
0x0819080808080819, 0x0819080808081908, 0x0819080808190808,
|
|
||||||
0x08190808082b0819, 0x0819080819080808, 0x08190808192b0808,
|
|
||||||
0x081908082b081908, 0x081908082b190808, 0x081908082b191919,
|
|
||||||
0x0819081908080808, 0x0819081908082b08, 0x08190819082b0808,
|
|
||||||
0x0819081919190808, 0x0819081919192b2b, 0x081908192b080808,
|
|
||||||
0x0819082b082b1908, 0x0819082b19081919, 0x0819190808080808,
|
|
||||||
0x0819190808082b08, 0x08191908082b0808, 0x08191908082b1919,
|
|
||||||
0x0819190819082b19, 0x081919082b080808, 0x0819191908192b08,
|
|
||||||
0x08191919192b082b, 0x0819192b08080808, 0x0819192b0819192b,
|
|
||||||
0x08192b0808080819, 0x08192b0808081908, 0x08192b0808190808,
|
|
||||||
0x08192b0819080808, 0x08192b082b080819, 0x08192b1908080808,
|
|
||||||
0x08192b1908081919, 0x08192b192b2b0808, 0x08192b2b19190819,
|
|
||||||
0x082b080808080808, 0x082b08080808082b, 0x082b080808082b2b,
|
|
||||||
0x082b080819081908, 0x082b0808192b0819, 0x082b08082b080808,
|
|
||||||
0x082b08082b08082b, 0x082b0819082b2b19, 0x082b081919082b08,
|
|
||||||
0x082b082b08080808, 0x082b082b0808082b, 0x082b190808080819,
|
|
||||||
0x082b190808081908, 0x082b190808190808, 0x082b190819080808,
|
|
||||||
0x082b19081919192b, 0x082b191908080808, 0x082b191919080819,
|
|
||||||
0x082b1919192b1908, 0x082b192b2b190808, 0x082b2b0808082b08,
|
|
||||||
0x082b2b08082b0808, 0x082b2b082b191908, 0x082b2b2b19081908,
|
|
||||||
0x1908080808080819, 0x1908080808081908, 0x1908080808190808,
|
|
||||||
0x1908080808192b08, 0x19080808082b0819, 0x19080808082b1908,
|
|
||||||
0x1908080819080808, 0x1908080819082b08, 0x190808081919192b,
|
|
||||||
0x19080808192b0808, 0x190808082b080819, 0x190808082b081908,
|
|
||||||
0x190808082b190808, 0x1908081908080808, 0x19080819082b0808,
|
|
||||||
0x19080819192b0819, 0x190808192b080808, 0x190808192b081919,
|
|
||||||
0x1908082b08080819, 0x1908082b08190808, 0x1908082b19082b08,
|
|
||||||
0x1908082b1919192b, 0x1908082b192b2b08, 0x1908190808080808,
|
|
||||||
0x1908190808082b08, 0x19081908082b0808, 0x190819082b080808,
|
|
||||||
0x190819082b192b19, 0x190819190819082b, 0x19081919082b1908,
|
|
||||||
0x1908192b08080808, 0x19082b0808080819, 0x19082b0808081908,
|
|
||||||
0x19082b0808190808, 0x19082b0819080808, 0x19082b0819081919,
|
|
||||||
0x19082b1908080808, 0x19082b1919192b08, 0x19082b19192b0819,
|
|
||||||
0x19082b192b08082b, 0x19082b2b19081919, 0x19082b2b2b190808,
|
|
||||||
0x1919080808080808, 0x1919080808082b08, 0x1919080808190819,
|
|
||||||
0x1919080808192b19, 0x19190808082b0808, 0x191908082b080808,
|
|
||||||
0x191908082b082b08, 0x1919081908081908, 0x191908191908082b,
|
|
||||||
0x191908192b2b1908, 0x1919082b2b190819, 0x191919082b190808,
|
|
||||||
0x191919082b19082b, 0x1919191908082b2b, 0x1919192b08080819,
|
|
||||||
0x1919192b19191908, 0x19192b0808080808, 0x19192b0808190819,
|
|
||||||
0x19192b0808192b19, 0x19192b08192b1908, 0x19192b1919080808,
|
|
||||||
0x19192b2b08082b08, 0x192b080808081908, 0x192b080808190808,
|
|
||||||
0x192b080819080808, 0x192b0808192b2b08, 0x192b081908080808,
|
|
||||||
0x192b081919191919, 0x192b082b08192b08, 0x192b082b192b0808,
|
|
||||||
0x192b190808080808, 0x192b190808081919, 0x192b191908190808,
|
|
||||||
0x192b19190819082b, 0x192b19192b081908, 0x192b2b081908082b,
|
|
||||||
0x2b08080808080808, 0x2b0808080808082b, 0x2b08080808082b2b,
|
|
||||||
0x2b08080819080819, 0x2b0808082b08082b, 0x2b08081908081908,
|
|
||||||
0x2b08081908192b08, 0x2b08081919080808, 0x2b08082b08190819,
|
|
||||||
0x2b08190808080819, 0x2b08190808081908, 0x2b08190808190808,
|
|
||||||
0x2b08190808191919, 0x2b08190819080808, 0x2b081908192b0808,
|
|
||||||
0x2b08191908080808, 0x2b0819191908192b, 0x2b0819192b191908,
|
|
||||||
0x2b08192b08082b19, 0x2b08192b19080808, 0x2b08192b192b0808,
|
|
||||||
0x2b082b080808082b, 0x2b082b1908081908, 0x2b082b2b08190819,
|
|
||||||
0x2b19080808081908, 0x2b19080808190808, 0x2b190808082b1908,
|
|
||||||
0x2b19080819080808, 0x2b1908082b2b0819, 0x2b1908190819192b,
|
|
||||||
0x2b1908192b080808, 0x2b19082b19081919, 0x2b19190808080808,
|
|
||||||
0x2b191908082b082b, 0x2b19190819081908, 0x2b19191919190819,
|
|
||||||
0x2b192b082b080819, 0x2b192b19082b0808, 0x2b2b08080808082b,
|
|
||||||
0x2b2b080819190808, 0x2b2b08082b081919, 0x2b2b081908082b19,
|
|
||||||
0x2b2b082b08080808, 0x2b2b190808192b08, 0x2b2b2b0819190808,
|
|
||||||
0x2b2b2b1908081908,
|
|
||||||
});
|
|
||||||
|
|
||||||
static dpct::global_memory<const uint64_t, 1>
|
|
||||||
iq2xs_grid(sycl::range<1>(512),
|
|
||||||
{
|
|
||||||
0x0808080808080808, 0x080808080808082b, 0x0808080808081919,
|
|
||||||
0x0808080808082b08, 0x0808080808082b2b, 0x0808080808190819,
|
|
||||||
0x0808080808191908, 0x080808080819192b, 0x0808080808192b19,
|
|
||||||
0x08080808082b0808, 0x08080808082b082b, 0x08080808082b1919,
|
|
||||||
0x08080808082b2b08, 0x0808080819080819, 0x0808080819081908,
|
|
||||||
0x080808081908192b, 0x0808080819082b19, 0x0808080819190808,
|
|
||||||
0x080808081919082b, 0x0808080819191919, 0x0808080819192b08,
|
|
||||||
0x08080808192b0819, 0x08080808192b1908, 0x080808082b080808,
|
|
||||||
0x080808082b08082b, 0x080808082b081919, 0x080808082b082b08,
|
|
||||||
0x080808082b190819, 0x080808082b191908, 0x080808082b192b19,
|
|
||||||
0x080808082b2b0808, 0x0808081908080819, 0x0808081908081908,
|
|
||||||
0x080808190808192b, 0x0808081908082b19, 0x0808081908190808,
|
|
||||||
0x080808190819082b, 0x0808081908191919, 0x0808081908192b08,
|
|
||||||
0x0808081908192b2b, 0x08080819082b0819, 0x08080819082b1908,
|
|
||||||
0x0808081919080808, 0x080808191908082b, 0x0808081919081919,
|
|
||||||
0x0808081919082b08, 0x0808081919190819, 0x0808081919191908,
|
|
||||||
0x08080819192b0808, 0x08080819192b2b08, 0x080808192b080819,
|
|
||||||
0x080808192b081908, 0x080808192b190808, 0x0808082b08080808,
|
|
||||||
0x0808082b0808082b, 0x0808082b08081919, 0x0808082b08082b08,
|
|
||||||
0x0808082b08190819, 0x0808082b08191908, 0x0808082b082b0808,
|
|
||||||
0x0808082b19080819, 0x0808082b19081908, 0x0808082b19190808,
|
|
||||||
0x0808082b19191919, 0x0808082b2b080808, 0x0808082b2b082b2b,
|
|
||||||
0x0808190808080819, 0x0808190808081908, 0x080819080808192b,
|
|
||||||
0x0808190808082b19, 0x0808190808190808, 0x080819080819082b,
|
|
||||||
0x0808190808191919, 0x0808190808192b08, 0x08081908082b0819,
|
|
||||||
0x08081908082b1908, 0x0808190819080808, 0x080819081908082b,
|
|
||||||
0x0808190819081919, 0x0808190819082b08, 0x0808190819190819,
|
|
||||||
0x0808190819191908, 0x080819081919192b, 0x08081908192b0808,
|
|
||||||
0x080819082b080819, 0x080819082b081908, 0x080819082b190808,
|
|
||||||
0x0808191908080808, 0x080819190808082b, 0x0808191908081919,
|
|
||||||
0x0808191908082b08, 0x0808191908190819, 0x0808191908191908,
|
|
||||||
0x08081919082b0808, 0x0808191919080819, 0x0808191919081908,
|
|
||||||
0x0808191919190808, 0x08081919192b0819, 0x080819192b080808,
|
|
||||||
0x0808192b08080819, 0x0808192b08081908, 0x0808192b08190808,
|
|
||||||
0x0808192b082b192b, 0x0808192b19080808, 0x0808192b1908082b,
|
|
||||||
0x0808192b2b081908, 0x08082b0808080808, 0x08082b080808082b,
|
|
||||||
0x08082b0808081919, 0x08082b0808082b08, 0x08082b0808082b2b,
|
|
||||||
0x08082b0808190819, 0x08082b0808191908, 0x08082b08082b0808,
|
|
||||||
0x08082b08082b1919, 0x08082b0819080819, 0x08082b0819081908,
|
|
||||||
0x08082b0819190808, 0x08082b0819192b08, 0x08082b082b080808,
|
|
||||||
0x08082b082b2b0808, 0x08082b082b2b2b2b, 0x08082b1908080819,
|
|
||||||
0x08082b1908081908, 0x08082b1908190808, 0x08082b1919080808,
|
|
||||||
0x08082b192b080819, 0x08082b192b082b19, 0x08082b2b08080808,
|
|
||||||
0x08082b2b082b0808, 0x08082b2b082b2b08, 0x08082b2b2b19192b,
|
|
||||||
0x08082b2b2b2b0808, 0x0819080808080819, 0x0819080808081908,
|
|
||||||
0x081908080808192b, 0x0819080808082b19, 0x0819080808190808,
|
|
||||||
0x081908080819082b, 0x0819080808191919, 0x0819080808192b08,
|
|
||||||
0x08190808082b0819, 0x08190808082b1908, 0x0819080819080808,
|
|
||||||
0x081908081908082b, 0x0819080819081919, 0x0819080819082b08,
|
|
||||||
0x0819080819190819, 0x0819080819191908, 0x08190808192b0808,
|
|
||||||
0x08190808192b2b2b, 0x081908082b080819, 0x081908082b081908,
|
|
||||||
0x081908082b190808, 0x0819081908080808, 0x081908190808082b,
|
|
||||||
0x0819081908081919, 0x0819081908082b08, 0x0819081908190819,
|
|
||||||
0x0819081908191908, 0x08190819082b0808, 0x0819081919080819,
|
|
||||||
0x0819081919081908, 0x0819081919190808, 0x081908192b080808,
|
|
||||||
0x081908192b191908, 0x081908192b19192b, 0x0819082b08080819,
|
|
||||||
0x0819082b08081908, 0x0819082b0808192b, 0x0819082b08190808,
|
|
||||||
0x0819082b19080808, 0x0819082b192b0808, 0x0819190808080808,
|
|
||||||
0x081919080808082b, 0x0819190808081919, 0x0819190808082b08,
|
|
||||||
0x0819190808190819, 0x0819190808191908, 0x08191908082b0808,
|
|
||||||
0x0819190819080819, 0x0819190819081908, 0x0819190819082b19,
|
|
||||||
0x0819190819190808, 0x08191908192b1908, 0x081919082b080808,
|
|
||||||
0x0819191908080819, 0x0819191908081908, 0x0819191908190808,
|
|
||||||
0x0819191919080808, 0x0819192b08080808, 0x0819192b08191908,
|
|
||||||
0x0819192b19082b19, 0x08192b0808080819, 0x08192b0808081908,
|
|
||||||
0x08192b0808190808, 0x08192b080819082b, 0x08192b0819080808,
|
|
||||||
0x08192b0819191908, 0x08192b082b08192b, 0x08192b1908080808,
|
|
||||||
0x08192b1908081919, 0x08192b19192b192b, 0x08192b2b19190819,
|
|
||||||
0x08192b2b2b2b2b19, 0x082b080808080808, 0x082b08080808082b,
|
|
||||||
0x082b080808081919, 0x082b080808082b08, 0x082b080808082b2b,
|
|
||||||
0x082b080808190819, 0x082b080808191908, 0x082b0808082b0808,
|
|
||||||
0x082b080819080819, 0x082b080819081908, 0x082b080819190808,
|
|
||||||
0x082b08082b080808, 0x082b08082b2b0808, 0x082b081908080819,
|
|
||||||
0x082b081908081908, 0x082b081908190808, 0x082b081919080808,
|
|
||||||
0x082b081919082b08, 0x082b0819192b1919, 0x082b082b08080808,
|
|
||||||
0x082b082b082b082b, 0x082b082b2b080808, 0x082b082b2b2b2b08,
|
|
||||||
0x082b190808080819, 0x082b190808081908, 0x082b190808190808,
|
|
||||||
0x082b1908082b2b19, 0x082b190819080808, 0x082b191908080808,
|
|
||||||
0x082b191919080819, 0x082b19191919082b, 0x082b19192b192b19,
|
|
||||||
0x082b192b08080819, 0x082b192b08192b2b, 0x082b192b2b2b192b,
|
|
||||||
0x082b2b0808080808, 0x082b2b0808082b08, 0x082b2b0808082b2b,
|
|
||||||
0x082b2b08082b0808, 0x082b2b0819191919, 0x082b2b082b082b08,
|
|
||||||
0x082b2b082b2b082b, 0x082b2b19192b2b08, 0x082b2b192b190808,
|
|
||||||
0x082b2b2b08082b08, 0x082b2b2b082b0808, 0x082b2b2b2b08082b,
|
|
||||||
0x082b2b2b2b082b08, 0x082b2b2b2b082b2b, 0x1908080808080819,
|
|
||||||
0x1908080808081908, 0x190808080808192b, 0x1908080808082b19,
|
|
||||||
0x1908080808190808, 0x190808080819082b, 0x1908080808191919,
|
|
||||||
0x1908080808192b08, 0x19080808082b0819, 0x19080808082b1908,
|
|
||||||
0x1908080819080808, 0x190808081908082b, 0x1908080819081919,
|
|
||||||
0x1908080819082b08, 0x1908080819082b2b, 0x1908080819190819,
|
|
||||||
0x1908080819191908, 0x19080808192b0808, 0x19080808192b1919,
|
|
||||||
0x190808082b080819, 0x190808082b081908, 0x190808082b190808,
|
|
||||||
0x1908081908080808, 0x190808190808082b, 0x1908081908081919,
|
|
||||||
0x1908081908082b08, 0x1908081908190819, 0x1908081908191908,
|
|
||||||
0x19080819082b0808, 0x1908081919080819, 0x1908081919081908,
|
|
||||||
0x1908081919190808, 0x190808192b080808, 0x190808192b081919,
|
|
||||||
0x190808192b2b082b, 0x1908082b08080819, 0x1908082b08081908,
|
|
||||||
0x1908082b08190808, 0x1908082b0819082b, 0x1908082b082b2b19,
|
|
||||||
0x1908082b19080808, 0x1908190808080808, 0x190819080808082b,
|
|
||||||
0x1908190808081919, 0x1908190808082b08, 0x1908190808190819,
|
|
||||||
0x1908190808191908, 0x1908190808192b19, 0x19081908082b0808,
|
|
||||||
0x1908190819080819, 0x1908190819081908, 0x1908190819190808,
|
|
||||||
0x190819082b080808, 0x190819082b191908, 0x1908191908080819,
|
|
||||||
0x1908191908081908, 0x1908191908190808, 0x19081919082b1908,
|
|
||||||
0x1908191919080808, 0x190819192b192b2b, 0x1908192b08080808,
|
|
||||||
0x1908192b08082b2b, 0x1908192b19081908, 0x1908192b19190808,
|
|
||||||
0x19082b0808080819, 0x19082b0808081908, 0x19082b0808190808,
|
|
||||||
0x19082b0819080808, 0x19082b0819081919, 0x19082b0819191908,
|
|
||||||
0x19082b08192b082b, 0x19082b1908080808, 0x19082b1908190819,
|
|
||||||
0x19082b1919081908, 0x19082b1919190808, 0x19082b19192b2b19,
|
|
||||||
0x19082b2b08081908, 0x1919080808080808, 0x191908080808082b,
|
|
||||||
0x1919080808081919, 0x1919080808082b08, 0x1919080808190819,
|
|
||||||
0x1919080808191908, 0x19190808082b0808, 0x19190808082b2b08,
|
|
||||||
0x1919080819080819, 0x1919080819081908, 0x1919080819190808,
|
|
||||||
0x191908082b080808, 0x1919081908080819, 0x1919081908081908,
|
|
||||||
0x1919081908190808, 0x1919081908191919, 0x1919081919080808,
|
|
||||||
0x191908191908082b, 0x1919082b08080808, 0x1919082b19081908,
|
|
||||||
0x1919082b2b2b2b2b, 0x1919190808080819, 0x1919190808081908,
|
|
||||||
0x1919190808190808, 0x19191908082b0819, 0x1919190819080808,
|
|
||||||
0x19191908192b0808, 0x191919082b080819, 0x191919082b2b0819,
|
|
||||||
0x1919191908080808, 0x1919191908082b08, 0x191919192b080808,
|
|
||||||
0x191919192b082b08, 0x1919192b082b0819, 0x1919192b192b2b08,
|
|
||||||
0x1919192b2b2b0819, 0x19192b0808080808, 0x19192b0808191908,
|
|
||||||
0x19192b0819080819, 0x19192b0819190808, 0x19192b082b192b19,
|
|
||||||
0x19192b1908192b2b, 0x19192b1919080808, 0x19192b191908082b,
|
|
||||||
0x19192b2b2b081919, 0x192b080808080819, 0x192b080808081908,
|
|
||||||
0x192b080808190808, 0x192b080819080808, 0x192b080819191908,
|
|
||||||
0x192b0808192b082b, 0x192b08082b08192b, 0x192b08082b2b2b19,
|
|
||||||
0x192b081908080808, 0x192b082b082b1908, 0x192b082b19082b2b,
|
|
||||||
0x192b082b2b19082b, 0x192b190808080808, 0x192b19080819192b,
|
|
||||||
0x192b191908190808, 0x192b191919080808, 0x192b191919081919,
|
|
||||||
0x192b19192b2b1908, 0x192b2b0808080819, 0x192b2b08192b2b2b,
|
|
||||||
0x192b2b19082b1919, 0x192b2b2b0808192b, 0x192b2b2b19191908,
|
|
||||||
0x192b2b2b192b082b, 0x2b08080808080808, 0x2b0808080808082b,
|
|
||||||
0x2b08080808081919, 0x2b08080808082b08, 0x2b08080808190819,
|
|
||||||
0x2b08080808191908, 0x2b080808082b0808, 0x2b080808082b2b2b,
|
|
||||||
0x2b08080819080819, 0x2b08080819081908, 0x2b08080819190808,
|
|
||||||
0x2b0808082b080808, 0x2b0808082b08082b, 0x2b0808082b2b2b08,
|
|
||||||
0x2b0808082b2b2b2b, 0x2b08081908080819, 0x2b08081908081908,
|
|
||||||
0x2b0808190808192b, 0x2b08081908190808, 0x2b08081919080808,
|
|
||||||
0x2b08081919190819, 0x2b08081919192b19, 0x2b08082b08080808,
|
|
||||||
0x2b08082b082b0808, 0x2b08082b2b080808, 0x2b08082b2b08082b,
|
|
||||||
0x2b08082b2b2b0808, 0x2b08082b2b2b2b08, 0x2b08190808080819,
|
|
||||||
0x2b08190808081908, 0x2b08190808190808, 0x2b0819080819082b,
|
|
||||||
0x2b08190808191919, 0x2b08190819080808, 0x2b081908192b0808,
|
|
||||||
0x2b0819082b082b19, 0x2b08191908080808, 0x2b08191919081908,
|
|
||||||
0x2b0819192b2b1919, 0x2b08192b08192b08, 0x2b08192b192b2b2b,
|
|
||||||
0x2b082b0808080808, 0x2b082b0808082b08, 0x2b082b08082b1919,
|
|
||||||
0x2b082b0819192b2b, 0x2b082b082b080808, 0x2b082b082b08082b,
|
|
||||||
0x2b082b082b2b2b08, 0x2b082b190808192b, 0x2b082b2b082b082b,
|
|
||||||
0x2b082b2b2b080808, 0x2b082b2b2b082b08, 0x2b082b2b2b19192b,
|
|
||||||
0x2b082b2b2b2b2b08, 0x2b19080808080819, 0x2b19080808081908,
|
|
||||||
0x2b19080808190808, 0x2b19080819080808, 0x2b1908081919192b,
|
|
||||||
0x2b1908082b081908, 0x2b19081908080808, 0x2b190819082b082b,
|
|
||||||
0x2b190819192b1908, 0x2b19082b1919192b, 0x2b19082b2b082b19,
|
|
||||||
0x2b19190808080808, 0x2b19190808081919, 0x2b19190819081908,
|
|
||||||
0x2b19190819190808, 0x2b19190819192b08, 0x2b191919082b2b19,
|
|
||||||
0x2b1919192b190808, 0x2b1919192b19082b, 0x2b19192b19080819,
|
|
||||||
0x2b192b0819190819, 0x2b192b082b2b192b, 0x2b192b1919082b19,
|
|
||||||
0x2b192b2b08191919, 0x2b192b2b192b0808, 0x2b2b080808080808,
|
|
||||||
0x2b2b08080808082b, 0x2b2b080808082b08, 0x2b2b080808082b2b,
|
|
||||||
0x2b2b0808082b0808, 0x2b2b0808082b2b2b, 0x2b2b08082b2b0808,
|
|
||||||
0x2b2b081919190819, 0x2b2b081919192b19, 0x2b2b08192b2b192b,
|
|
||||||
0x2b2b082b08080808, 0x2b2b082b0808082b, 0x2b2b082b08082b08,
|
|
||||||
0x2b2b082b082b2b2b, 0x2b2b082b2b080808, 0x2b2b082b2b2b0808,
|
|
||||||
0x2b2b190819080808, 0x2b2b19082b191919, 0x2b2b192b192b1919,
|
|
||||||
0x2b2b192b2b192b08, 0x2b2b2b0808082b2b, 0x2b2b2b08082b0808,
|
|
||||||
0x2b2b2b08082b082b, 0x2b2b2b08082b2b08, 0x2b2b2b082b2b0808,
|
|
||||||
0x2b2b2b082b2b2b08, 0x2b2b2b1908081908, 0x2b2b2b192b081908,
|
|
||||||
0x2b2b2b192b08192b, 0x2b2b2b2b082b2b08, 0x2b2b2b2b082b2b2b,
|
|
||||||
0x2b2b2b2b2b190819, 0x2b2b2b2b2b2b2b2b,
|
|
||||||
});
|
|
||||||
|
|
||||||
static dpct::global_memory<const uint32_t, 1> iq3xxs_grid(
|
|
||||||
sycl::range<1>(256),
|
|
||||||
{
|
|
||||||
0x04040404, 0x04040414, 0x04040424, 0x04040c0c, 0x04040c1c, 0x04040c3e,
|
|
||||||
0x04041404, 0x04041414, 0x04041c0c, 0x04042414, 0x04043e1c, 0x04043e2c,
|
|
||||||
0x040c040c, 0x040c041c, 0x040c0c04, 0x040c0c14, 0x040c140c, 0x040c142c,
|
|
||||||
0x040c1c04, 0x040c1c14, 0x040c240c, 0x040c2c24, 0x040c3e04, 0x04140404,
|
|
||||||
0x04140414, 0x04140424, 0x04140c0c, 0x04141404, 0x04141414, 0x04141c0c,
|
|
||||||
0x04141c1c, 0x04141c3e, 0x04142c0c, 0x04142c3e, 0x04143e2c, 0x041c040c,
|
|
||||||
0x041c043e, 0x041c0c04, 0x041c0c14, 0x041c142c, 0x041c3e04, 0x04240c1c,
|
|
||||||
0x04241c3e, 0x04242424, 0x04242c3e, 0x04243e1c, 0x04243e2c, 0x042c040c,
|
|
||||||
0x042c043e, 0x042c1c14, 0x042c2c14, 0x04341c2c, 0x04343424, 0x043e0c04,
|
|
||||||
0x043e0c24, 0x043e0c34, 0x043e241c, 0x043e340c, 0x0c04040c, 0x0c04041c,
|
|
||||||
0x0c040c04, 0x0c040c14, 0x0c04140c, 0x0c04141c, 0x0c041c04, 0x0c041c14,
|
|
||||||
0x0c041c24, 0x0c04243e, 0x0c042c04, 0x0c0c0404, 0x0c0c0414, 0x0c0c0c0c,
|
|
||||||
0x0c0c1404, 0x0c0c1414, 0x0c14040c, 0x0c14041c, 0x0c140c04, 0x0c140c14,
|
|
||||||
0x0c14140c, 0x0c141c04, 0x0c143e14, 0x0c1c0404, 0x0c1c0414, 0x0c1c1404,
|
|
||||||
0x0c1c1c0c, 0x0c1c2434, 0x0c1c3434, 0x0c24040c, 0x0c24042c, 0x0c242c04,
|
|
||||||
0x0c2c1404, 0x0c2c1424, 0x0c2c2434, 0x0c2c3e0c, 0x0c34042c, 0x0c3e1414,
|
|
||||||
0x0c3e2404, 0x14040404, 0x14040414, 0x14040c0c, 0x14040c1c, 0x14041404,
|
|
||||||
0x14041414, 0x14041434, 0x14041c0c, 0x14042414, 0x140c040c, 0x140c041c,
|
|
||||||
0x140c042c, 0x140c0c04, 0x140c0c14, 0x140c140c, 0x140c1c04, 0x140c341c,
|
|
||||||
0x140c343e, 0x140c3e04, 0x14140404, 0x14140414, 0x14140c0c, 0x14140c3e,
|
|
||||||
0x14141404, 0x14141414, 0x14141c3e, 0x14142404, 0x14142c2c, 0x141c040c,
|
|
||||||
0x141c0c04, 0x141c0c24, 0x141c3e04, 0x141c3e24, 0x14241c2c, 0x14242c1c,
|
|
||||||
0x142c041c, 0x142c143e, 0x142c240c, 0x142c3e24, 0x143e040c, 0x143e041c,
|
|
||||||
0x143e0c34, 0x143e242c, 0x1c04040c, 0x1c040c04, 0x1c040c14, 0x1c04140c,
|
|
||||||
0x1c04141c, 0x1c042c04, 0x1c04342c, 0x1c043e14, 0x1c0c0404, 0x1c0c0414,
|
|
||||||
0x1c0c1404, 0x1c0c1c0c, 0x1c0c2424, 0x1c0c2434, 0x1c14040c, 0x1c14041c,
|
|
||||||
0x1c140c04, 0x1c14142c, 0x1c142c14, 0x1c143e14, 0x1c1c0c0c, 0x1c1c1c1c,
|
|
||||||
0x1c241c04, 0x1c24243e, 0x1c243e14, 0x1c2c0404, 0x1c2c0434, 0x1c2c1414,
|
|
||||||
0x1c2c2c2c, 0x1c340c24, 0x1c341c34, 0x1c34341c, 0x1c3e1c1c, 0x1c3e3404,
|
|
||||||
0x24040424, 0x24040c3e, 0x24041c2c, 0x24041c3e, 0x24042c1c, 0x24042c3e,
|
|
||||||
0x240c3e24, 0x24141404, 0x24141c3e, 0x24142404, 0x24143404, 0x24143434,
|
|
||||||
0x241c043e, 0x241c242c, 0x24240424, 0x24242c0c, 0x24243424, 0x242c142c,
|
|
||||||
0x242c241c, 0x242c3e04, 0x243e042c, 0x243e0c04, 0x243e0c14, 0x243e1c04,
|
|
||||||
0x2c040c14, 0x2c04240c, 0x2c043e04, 0x2c0c0404, 0x2c0c0434, 0x2c0c1434,
|
|
||||||
0x2c0c2c2c, 0x2c140c24, 0x2c141c14, 0x2c143e14, 0x2c1c0414, 0x2c1c2c1c,
|
|
||||||
0x2c240c04, 0x2c24141c, 0x2c24143e, 0x2c243e14, 0x2c2c0414, 0x2c2c1c0c,
|
|
||||||
0x2c342c04, 0x2c3e1424, 0x2c3e2414, 0x34041424, 0x34042424, 0x34042434,
|
|
||||||
0x34043424, 0x340c140c, 0x340c340c, 0x34140c3e, 0x34143424, 0x341c1c04,
|
|
||||||
0x341c1c34, 0x34242424, 0x342c042c, 0x342c2c14, 0x34341c1c, 0x343e041c,
|
|
||||||
0x343e140c, 0x3e04041c, 0x3e04042c, 0x3e04043e, 0x3e040c04, 0x3e041c14,
|
|
||||||
0x3e042c14, 0x3e0c1434, 0x3e0c2404, 0x3e140c14, 0x3e14242c, 0x3e142c14,
|
|
||||||
0x3e1c0404, 0x3e1c0c2c, 0x3e1c1c1c, 0x3e1c3404, 0x3e24140c, 0x3e24240c,
|
|
||||||
0x3e2c0404, 0x3e2c0414, 0x3e2c1424, 0x3e341c04,
|
|
||||||
});
|
|
||||||
|
|
||||||
static dpct::global_memory<const uint8_t, 1> ksigns_iq2xs(
|
|
||||||
sycl::range<1>(128),
|
|
||||||
{
|
|
||||||
0, 129, 130, 3, 132, 5, 6, 135, 136, 9, 10, 139, 12,
|
|
||||||
141, 142, 15, 144, 17, 18, 147, 20, 149, 150, 23, 24, 153,
|
|
||||||
154, 27, 156, 29, 30, 159, 160, 33, 34, 163, 36, 165, 166,
|
|
||||||
39, 40, 169, 170, 43, 172, 45, 46, 175, 48, 177, 178, 51,
|
|
||||||
180, 53, 54, 183, 184, 57, 58, 187, 60, 189, 190, 63, 192,
|
|
||||||
65, 66, 195, 68, 197, 198, 71, 72, 201, 202, 75, 204, 77,
|
|
||||||
78, 207, 80, 209, 210, 83, 212, 85, 86, 215, 216, 89, 90,
|
|
||||||
219, 92, 221, 222, 95, 96, 225, 226, 99, 228, 101, 102, 231,
|
|
||||||
232, 105, 106, 235, 108, 237, 238, 111, 240, 113, 114, 243, 116,
|
|
||||||
245, 246, 119, 120, 249, 250, 123, 252, 125, 126, 255,
|
|
||||||
});
|
|
||||||
|
|
||||||
static dpct::global_memory<const uint64_t, 1>
|
|
||||||
ksigns64(sycl::range<1>(128),
|
|
||||||
{
|
|
||||||
0x0000000000000000, 0xff000000000000ff, 0xff0000000000ff00,
|
|
||||||
0x000000000000ffff, 0xff00000000ff0000, 0x0000000000ff00ff,
|
|
||||||
0x0000000000ffff00, 0xff00000000ffffff, 0xff000000ff000000,
|
|
||||||
0x00000000ff0000ff, 0x00000000ff00ff00, 0xff000000ff00ffff,
|
|
||||||
0x00000000ffff0000, 0xff000000ffff00ff, 0xff000000ffffff00,
|
|
||||||
0x00000000ffffffff, 0xff0000ff00000000, 0x000000ff000000ff,
|
|
||||||
0x000000ff0000ff00, 0xff0000ff0000ffff, 0x000000ff00ff0000,
|
|
||||||
0xff0000ff00ff00ff, 0xff0000ff00ffff00, 0x000000ff00ffffff,
|
|
||||||
0x000000ffff000000, 0xff0000ffff0000ff, 0xff0000ffff00ff00,
|
|
||||||
0x000000ffff00ffff, 0xff0000ffffff0000, 0x000000ffffff00ff,
|
|
||||||
0x000000ffffffff00, 0xff0000ffffffffff, 0xff00ff0000000000,
|
|
||||||
0x0000ff00000000ff, 0x0000ff000000ff00, 0xff00ff000000ffff,
|
|
||||||
0x0000ff0000ff0000, 0xff00ff0000ff00ff, 0xff00ff0000ffff00,
|
|
||||||
0x0000ff0000ffffff, 0x0000ff00ff000000, 0xff00ff00ff0000ff,
|
|
||||||
0xff00ff00ff00ff00, 0x0000ff00ff00ffff, 0xff00ff00ffff0000,
|
|
||||||
0x0000ff00ffff00ff, 0x0000ff00ffffff00, 0xff00ff00ffffffff,
|
|
||||||
0x0000ffff00000000, 0xff00ffff000000ff, 0xff00ffff0000ff00,
|
|
||||||
0x0000ffff0000ffff, 0xff00ffff00ff0000, 0x0000ffff00ff00ff,
|
|
||||||
0x0000ffff00ffff00, 0xff00ffff00ffffff, 0xff00ffffff000000,
|
|
||||||
0x0000ffffff0000ff, 0x0000ffffff00ff00, 0xff00ffffff00ffff,
|
|
||||||
0x0000ffffffff0000, 0xff00ffffffff00ff, 0xff00ffffffffff00,
|
|
||||||
0x0000ffffffffffff, 0xffff000000000000, 0x00ff0000000000ff,
|
|
||||||
0x00ff00000000ff00, 0xffff00000000ffff, 0x00ff000000ff0000,
|
|
||||||
0xffff000000ff00ff, 0xffff000000ffff00, 0x00ff000000ffffff,
|
|
||||||
0x00ff0000ff000000, 0xffff0000ff0000ff, 0xffff0000ff00ff00,
|
|
||||||
0x00ff0000ff00ffff, 0xffff0000ffff0000, 0x00ff0000ffff00ff,
|
|
||||||
0x00ff0000ffffff00, 0xffff0000ffffffff, 0x00ff00ff00000000,
|
|
||||||
0xffff00ff000000ff, 0xffff00ff0000ff00, 0x00ff00ff0000ffff,
|
|
||||||
0xffff00ff00ff0000, 0x00ff00ff00ff00ff, 0x00ff00ff00ffff00,
|
|
||||||
0xffff00ff00ffffff, 0xffff00ffff000000, 0x00ff00ffff0000ff,
|
|
||||||
0x00ff00ffff00ff00, 0xffff00ffff00ffff, 0x00ff00ffffff0000,
|
|
||||||
0xffff00ffffff00ff, 0xffff00ffffffff00, 0x00ff00ffffffffff,
|
|
||||||
0x00ffff0000000000, 0xffffff00000000ff, 0xffffff000000ff00,
|
|
||||||
0x00ffff000000ffff, 0xffffff0000ff0000, 0x00ffff0000ff00ff,
|
|
||||||
0x00ffff0000ffff00, 0xffffff0000ffffff, 0xffffff00ff000000,
|
|
||||||
0x00ffff00ff0000ff, 0x00ffff00ff00ff00, 0xffffff00ff00ffff,
|
|
||||||
0x00ffff00ffff0000, 0xffffff00ffff00ff, 0xffffff00ffffff00,
|
|
||||||
0x00ffff00ffffffff, 0xffffffff00000000, 0x00ffffff000000ff,
|
|
||||||
0x00ffffff0000ff00, 0xffffffff0000ffff, 0x00ffffff00ff0000,
|
|
||||||
0xffffffff00ff00ff, 0xffffffff00ffff00, 0x00ffffff00ffffff,
|
|
||||||
0x00ffffffff000000, 0xffffffffff0000ff, 0xffffffffff00ff00,
|
|
||||||
0x00ffffffff00ffff, 0xffffffffffff0000, 0x00ffffffffff00ff,
|
|
||||||
0x00ffffffffffff00, 0xffffffffffffffff,
|
|
||||||
});
|
|
||||||
//#endif
|
|
||||||
|
|
||||||
static dpct::global_memory<const uint8_t, 1>
|
|
||||||
kmask_iq2xs(sycl::range<1>(8), {1, 2, 4, 8, 16, 32, 64, 128});
|
|
||||||
|
|
||||||
template<typename dst_t>
|
template<typename dst_t>
|
||||||
static void dequantize_block_iq2_xxs(const void * __restrict__ vx, dst_t * __restrict__ yy,
|
static void dequantize_block_iq2_xxs(const void * __restrict__ vx, dst_t * __restrict__ yy,
|
||||||
const sycl::nd_item<3> &item_ct1,
|
const sycl::nd_item<3> &item_ct1,
|
||||||
|
|
|
@ -4102,45 +4102,7 @@ static void ggml_vk_test_transfer(ggml_backend_vk_context * ctx, size_t ne, bool
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_quantize_data(const float * from, void * to, size_t ne, ggml_type quant) {
|
static void ggml_vk_quantize_data(const float * from, void * to, size_t ne, ggml_type quant) {
|
||||||
std::vector<int64_t> hist_cur(1 << 4, 0);
|
ggml_quantize_chunk(quant, from, to, 0, 1, ne, nullptr);
|
||||||
|
|
||||||
switch(quant) {
|
|
||||||
case GGML_TYPE_F32:
|
|
||||||
memcpy(to, from, sizeof(float) * ne);
|
|
||||||
break;
|
|
||||||
case GGML_TYPE_Q4_0:
|
|
||||||
ggml_quantize_q4_0(from, to, ne, ne, hist_cur.data());
|
|
||||||
break;
|
|
||||||
case GGML_TYPE_Q4_1:
|
|
||||||
ggml_quantize_q4_1(from, to, ne, ne, hist_cur.data());
|
|
||||||
break;
|
|
||||||
case GGML_TYPE_Q5_0:
|
|
||||||
ggml_quantize_q5_0(from, to, ne, ne, hist_cur.data());
|
|
||||||
break;
|
|
||||||
case GGML_TYPE_Q5_1:
|
|
||||||
ggml_quantize_q5_1(from, to, ne, ne, hist_cur.data());
|
|
||||||
break;
|
|
||||||
case GGML_TYPE_Q8_0:
|
|
||||||
ggml_quantize_q8_0(from, to, ne, ne, hist_cur.data());
|
|
||||||
break;
|
|
||||||
case GGML_TYPE_Q2_K:
|
|
||||||
ggml_quantize_q2_K(from, to, ne, ne, hist_cur.data());
|
|
||||||
break;
|
|
||||||
case GGML_TYPE_Q3_K:
|
|
||||||
ggml_quantize_q3_K(from, to, ne, ne, hist_cur.data());
|
|
||||||
break;
|
|
||||||
case GGML_TYPE_Q4_K:
|
|
||||||
ggml_quantize_q4_K(from, to, ne, ne, hist_cur.data());
|
|
||||||
break;
|
|
||||||
case GGML_TYPE_Q5_K:
|
|
||||||
ggml_quantize_q5_K(from, to, ne, ne, hist_cur.data());
|
|
||||||
break;
|
|
||||||
case GGML_TYPE_Q6_K:
|
|
||||||
ggml_quantize_q6_K(from, to, ne, ne, hist_cur.data());
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
GGML_ASSERT(false);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_vk_test_dequant(ggml_backend_vk_context * ctx, size_t ne, ggml_type quant) {
|
static void ggml_vk_test_dequant(ggml_backend_vk_context * ctx, size_t ne, ggml_type quant) {
|
||||||
|
|
718
ggml.c
718
ggml.c
|
@ -1841,6 +1841,8 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
||||||
"FLASH_ATTN",
|
"FLASH_ATTN",
|
||||||
"FLASH_FF",
|
"FLASH_FF",
|
||||||
"FLASH_ATTN_BACK",
|
"FLASH_ATTN_BACK",
|
||||||
|
"SSM_CONV",
|
||||||
|
"SSM_SCAN",
|
||||||
"WIN_PART",
|
"WIN_PART",
|
||||||
"WIN_UNPART",
|
"WIN_UNPART",
|
||||||
"GET_REL_POS",
|
"GET_REL_POS",
|
||||||
|
@ -1863,7 +1865,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
||||||
"CROSS_ENTROPY_LOSS_BACK",
|
"CROSS_ENTROPY_LOSS_BACK",
|
||||||
};
|
};
|
||||||
|
|
||||||
static_assert(GGML_OP_COUNT == 74, "GGML_OP_COUNT != 74");
|
static_assert(GGML_OP_COUNT == 76, "GGML_OP_COUNT != 76");
|
||||||
|
|
||||||
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
||||||
"none",
|
"none",
|
||||||
|
@ -1929,6 +1931,8 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
||||||
"flash_attn(x)",
|
"flash_attn(x)",
|
||||||
"flash_ff(x)",
|
"flash_ff(x)",
|
||||||
"flash_attn_back(x)",
|
"flash_attn_back(x)",
|
||||||
|
"ssm_conv(x)",
|
||||||
|
"ssm_scan(x)",
|
||||||
"win_part(x)",
|
"win_part(x)",
|
||||||
"win_unpart(x)",
|
"win_unpart(x)",
|
||||||
"get_rel_pos(x)",
|
"get_rel_pos(x)",
|
||||||
|
@ -1951,7 +1955,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
||||||
"cross_entropy_loss_back(x,y)",
|
"cross_entropy_loss_back(x,y)",
|
||||||
};
|
};
|
||||||
|
|
||||||
static_assert(GGML_OP_COUNT == 74, "GGML_OP_COUNT != 74");
|
static_assert(GGML_OP_COUNT == 76, "GGML_OP_COUNT != 76");
|
||||||
|
|
||||||
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
||||||
|
|
||||||
|
@ -6154,6 +6158,108 @@ struct ggml_tensor * ggml_flash_attn_back(
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ggml_ssm_conv
|
||||||
|
|
||||||
|
struct ggml_tensor * ggml_ssm_conv(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * s,
|
||||||
|
struct ggml_tensor * x,
|
||||||
|
struct ggml_tensor * c,
|
||||||
|
struct ggml_tensor * sq) {
|
||||||
|
GGML_ASSERT(ggml_is_3d(s));
|
||||||
|
GGML_ASSERT(ggml_is_matrix(x));
|
||||||
|
GGML_ASSERT(ggml_is_matrix(c));
|
||||||
|
GGML_ASSERT(ggml_is_matrix(sq));
|
||||||
|
GGML_ASSERT(sq->type == GGML_TYPE_I32);
|
||||||
|
|
||||||
|
const int64_t d_conv = c->ne[0];
|
||||||
|
const int64_t d_inner = c->ne[1];
|
||||||
|
const int64_t n_tokens = x->ne[1];
|
||||||
|
const int64_t n_kv = s->ne[2];
|
||||||
|
|
||||||
|
GGML_ASSERT( s->ne[0] == d_conv - 1);
|
||||||
|
GGML_ASSERT( s->ne[1] == d_inner);
|
||||||
|
GGML_ASSERT( x->ne[0] == d_inner);
|
||||||
|
GGML_ASSERT(sq->ne[0] == n_kv);
|
||||||
|
GGML_ASSERT(sq->ne[1] == n_tokens);
|
||||||
|
|
||||||
|
bool is_node = false;
|
||||||
|
|
||||||
|
if (s->grad || x->grad || c->grad || sq->grad) {
|
||||||
|
GGML_ASSERT(false); // TODO: implement
|
||||||
|
is_node = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2-in-1 concatenated x and conv_states, {d_inner, n_tokens} with {d_conv, d_inner, n_kv}
|
||||||
|
struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, (d_inner*n_tokens) + (d_conv*d_inner*n_kv));
|
||||||
|
|
||||||
|
result->op = GGML_OP_SSM_CONV;
|
||||||
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
|
result->src[0] = s;
|
||||||
|
result->src[1] = x;
|
||||||
|
result->src[2] = c;
|
||||||
|
result->src[3] = sq;
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ggml_ssm_scan
|
||||||
|
|
||||||
|
struct ggml_tensor * ggml_ssm_scan(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * s,
|
||||||
|
struct ggml_tensor * x,
|
||||||
|
struct ggml_tensor * dt,
|
||||||
|
struct ggml_tensor * A,
|
||||||
|
struct ggml_tensor * B,
|
||||||
|
struct ggml_tensor * C,
|
||||||
|
struct ggml_tensor * sq) {
|
||||||
|
GGML_ASSERT(ggml_is_contiguous(s));
|
||||||
|
GGML_ASSERT(ggml_is_contiguous(x));
|
||||||
|
GGML_ASSERT(ggml_is_contiguous(dt));
|
||||||
|
GGML_ASSERT(ggml_is_contiguous(A));
|
||||||
|
GGML_ASSERT(sq->type == GGML_TYPE_I32);
|
||||||
|
GGML_ASSERT(B->nb[0] == ggml_type_size(B->type));
|
||||||
|
GGML_ASSERT(C->nb[0] == ggml_type_size(C->type));
|
||||||
|
GGML_ASSERT(ggml_are_same_shape(x, dt));
|
||||||
|
|
||||||
|
{
|
||||||
|
const int64_t d_state = s->ne[0];
|
||||||
|
const int64_t d_inner = s->ne[1];
|
||||||
|
const int64_t n_tokens = x->ne[1];
|
||||||
|
|
||||||
|
GGML_ASSERT(x->ne[0] == d_inner);
|
||||||
|
GGML_ASSERT(A->ne[0] == d_state);
|
||||||
|
GGML_ASSERT(A->ne[1] == d_inner);
|
||||||
|
GGML_ASSERT(B->ne[0] == d_state);
|
||||||
|
GGML_ASSERT(B->ne[1] == n_tokens);
|
||||||
|
GGML_ASSERT(C->ne[0] == d_state);
|
||||||
|
GGML_ASSERT(C->ne[1] == n_tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_node = false;
|
||||||
|
|
||||||
|
if (s->grad || x->grad || dt->grad || A->grad || B->grad || C->grad || sq->grad) {
|
||||||
|
GGML_ASSERT(false); // TODO: implement
|
||||||
|
is_node = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2-in-1 concatenated y and ssm_states, {d_inner, n_tokens} with {d_state, d_inner, n_kv}
|
||||||
|
struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, ggml_nelements(x) + ggml_nelements(s));
|
||||||
|
|
||||||
|
result->op = GGML_OP_SSM_SCAN;
|
||||||
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
|
result->src[0] = s;
|
||||||
|
result->src[1] = x;
|
||||||
|
result->src[2] = dt;
|
||||||
|
result->src[3] = A;
|
||||||
|
result->src[4] = B;
|
||||||
|
result->src[5] = C;
|
||||||
|
result->src[6] = sq;
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
// ggml_win_part
|
// ggml_win_part
|
||||||
|
|
||||||
struct ggml_tensor * ggml_win_part(
|
struct ggml_tensor * ggml_win_part(
|
||||||
|
@ -14771,6 +14877,257 @@ static void ggml_compute_forward_flash_attn_back(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ggml_compute_forward_ssm_conv
|
||||||
|
|
||||||
|
static void ggml_compute_forward_ssm_conv_f32(
|
||||||
|
const struct ggml_compute_params * params,
|
||||||
|
struct ggml_tensor * dst) {
|
||||||
|
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const struct ggml_tensor * src0 = dst->src[0]; // conv_state
|
||||||
|
const struct ggml_tensor * src1 = dst->src[1]; // x
|
||||||
|
const struct ggml_tensor * src2 = dst->src[2]; // conv1d.weight
|
||||||
|
const struct ggml_tensor * src3 = dst->src[3]; // state_seq
|
||||||
|
|
||||||
|
const int ith = params->ith;
|
||||||
|
const int nth = params->nth;
|
||||||
|
|
||||||
|
const int nc = src2->ne[0]; // d_conv
|
||||||
|
const int nr = src0->ne[1]; // d_inner
|
||||||
|
const int n_t = src1->ne[1]; // n_tokens
|
||||||
|
const int n_kv = src0->ne[2]; // max number of sequences in the batch
|
||||||
|
|
||||||
|
GGML_ASSERT((nr*n_t) + (nc*nr*n_kv) == ggml_nelements(dst));
|
||||||
|
GGML_ASSERT(src0->nb[0] == sizeof(float));
|
||||||
|
GGML_ASSERT(src1->nb[0] == sizeof(float));
|
||||||
|
GGML_ASSERT(src2->nb[0] == sizeof(float));
|
||||||
|
GGML_ASSERT(src3->nb[0] == sizeof(int32_t));
|
||||||
|
GGML_ASSERT(src0->nb[1] == src0->ne[0]*sizeof(float));
|
||||||
|
// for use with the destination state offset between sequences
|
||||||
|
GGML_ASSERT(src2->nb[2] == src2->ne[1]*src2->ne[0]*sizeof(float));
|
||||||
|
|
||||||
|
// rows per thread
|
||||||
|
const int dr = (nr + nth - 1)/nth;
|
||||||
|
|
||||||
|
// row range for this thread
|
||||||
|
const int ir0 = dr*ith;
|
||||||
|
const int ir1 = MIN(ir0 + dr, nr);
|
||||||
|
const int ir = ir1 - ir0;
|
||||||
|
|
||||||
|
if (n_kv > 1) {
|
||||||
|
// multiple sequences means it's hard to know when it's the first time a state is read,
|
||||||
|
// so copy them all over to the destination, just to be sure.
|
||||||
|
for (int i3 = 0; i3 < n_kv; ++i3) {
|
||||||
|
float * s0 = (float *) ((char *) src0->data + ir0*(src0->nb[1]) + i3*(src0->nb[2]));
|
||||||
|
float * s = (float *) ((char *) dst->data + ir0*(src2->nb[1]) + i3*(src2->nb[2]) + nr*n_t*sizeof(float));
|
||||||
|
// can't use memcpy because of d_conv vs d_conv - 1
|
||||||
|
for (int i1 = 0; i1 < ir; ++i1) {
|
||||||
|
for (int i0 = 0; i0 < nc - 1; ++i0) {
|
||||||
|
// copy s0 to last (d_conv - 1) columns of s
|
||||||
|
s[1 + i0 + i1*nc] = s0[i0 + i1*(nc - 1)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i2 = 0; i2 < n_t; ++i2) {
|
||||||
|
int32_t * sq = (int32_t *) ((char *) src3->data + i2*(src3->nb[1])); // {n_kv, n_tokens}
|
||||||
|
float * x = (float *) ((char *) dst->data + ir0*sizeof(float) + i2*(nr*sizeof(float))); // {d_inner, n_tokens}
|
||||||
|
float * s = (float *) ((char *) dst->data + ir0*(src2->nb[1]) + sq[0]*(src2->nb[2]) + nr*n_t*sizeof(float)); // {d_conv, d_inner, n_kv}
|
||||||
|
float * s0; // {d_conv - 1, d_inner, n_kv}
|
||||||
|
float * x0 = (float *) ((char *) src1->data + ir0*(src1->nb[0]) + i2*(src1->nb[1])); // {d_inner, n_tokens}
|
||||||
|
float * c = (float *) ((char *) src2->data + ir0*(src2->nb[1])); // {d_conv, d_inner}
|
||||||
|
int ne0s0;
|
||||||
|
|
||||||
|
GGML_ASSERT(0 <= sq[0] && sq[0] < n_kv);
|
||||||
|
|
||||||
|
// avoid needing to copy the state for the first token
|
||||||
|
if (i2 == 0) {
|
||||||
|
s0 = (float *) ((char *) src0->data + ir0*(src0->nb[1]) + sq[0]*(src0->nb[2])); // {d_conv - 1, d_inner, n_kv}
|
||||||
|
ne0s0 = src0->ne[0];
|
||||||
|
} else {
|
||||||
|
// the source is the last (d_conv - 1) columns of the destination
|
||||||
|
s0 = s + 1;
|
||||||
|
ne0s0 = nc;
|
||||||
|
}
|
||||||
|
|
||||||
|
// d_inner
|
||||||
|
for (int i1 = 0; i1 < ir; ++i1) {
|
||||||
|
// shift state left
|
||||||
|
for (int i0 = 0; i0 < nc - 1; ++i0) {
|
||||||
|
s[i0 + i1*nc] = s0[i0 + i1*ne0s0];
|
||||||
|
}
|
||||||
|
// insert x on the last column
|
||||||
|
s[(nc - 1) + i1*nc] = x0[i1];
|
||||||
|
}
|
||||||
|
|
||||||
|
// handle copies when there are multiple output states
|
||||||
|
for (int i3 = 1; i3 < n_kv; ++i3) {
|
||||||
|
int32_t seq = sq[i3];
|
||||||
|
if (0 <= seq && seq < n_kv) {
|
||||||
|
float * s1 = s + (seq - sq[0])*nc*nr;
|
||||||
|
memcpy(s1, s, nc*ir*sizeof(float));
|
||||||
|
} else {
|
||||||
|
// stop at negative or too big seq_ids
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// it seems a little faster when this is separate from the state shift
|
||||||
|
for (int i1 = 0; i1 < ir; ++i1) {
|
||||||
|
// rowwise dot product
|
||||||
|
float sumf = 0.0f;
|
||||||
|
for (int i0 = 0; i0 < nc; ++i0) {
|
||||||
|
int i = i0 + i1*nc;
|
||||||
|
sumf += s[i] * c[i];
|
||||||
|
}
|
||||||
|
x[i1] = sumf;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_compute_forward_ssm_conv(
|
||||||
|
const struct ggml_compute_params * params,
|
||||||
|
struct ggml_tensor * dst) {
|
||||||
|
switch (dst->src[0]->type) {
|
||||||
|
case GGML_TYPE_F32:
|
||||||
|
{
|
||||||
|
ggml_compute_forward_ssm_conv_f32(params, dst);
|
||||||
|
} break;
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
GGML_ASSERT(false);
|
||||||
|
} break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ggml_compute_forward_ssm_scan
|
||||||
|
|
||||||
|
static void ggml_compute_forward_ssm_scan_f32(
|
||||||
|
const struct ggml_compute_params * params,
|
||||||
|
struct ggml_tensor * dst) {
|
||||||
|
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const struct ggml_tensor * src0 = dst->src[0]; // s
|
||||||
|
const struct ggml_tensor * src1 = dst->src[1]; // x
|
||||||
|
const struct ggml_tensor * src2 = dst->src[2]; // dt
|
||||||
|
const struct ggml_tensor * src3 = dst->src[3]; // A
|
||||||
|
const struct ggml_tensor * src4 = dst->src[4]; // B
|
||||||
|
const struct ggml_tensor * src5 = dst->src[5]; // C
|
||||||
|
const struct ggml_tensor * src6 = dst->src[6]; // sq
|
||||||
|
|
||||||
|
const int ith = params->ith;
|
||||||
|
const int nth = params->nth;
|
||||||
|
|
||||||
|
const int64_t nc = src0->ne[0]; // d_state
|
||||||
|
const int64_t nr = src0->ne[1]; // d_inner
|
||||||
|
const int64_t n_t = src1->ne[1]; // number of tokens in the batch
|
||||||
|
const int64_t n_kv = src0->ne[2]; // max number of sequences in the batch
|
||||||
|
|
||||||
|
GGML_ASSERT(ggml_nelements(src1) + ggml_nelements(src0) == ggml_nelements(dst));
|
||||||
|
GGML_ASSERT(src0->nb[0] == sizeof(float));
|
||||||
|
GGML_ASSERT(src1->nb[0] == sizeof(float));
|
||||||
|
GGML_ASSERT(src2->nb[0] == sizeof(float));
|
||||||
|
GGML_ASSERT(src3->nb[0] == sizeof(float));
|
||||||
|
GGML_ASSERT(src4->nb[0] == sizeof(float));
|
||||||
|
GGML_ASSERT(src5->nb[0] == sizeof(float));
|
||||||
|
// required for the dot product between s and C, and when copying the states
|
||||||
|
GGML_ASSERT(src0->nb[1] == src0->ne[0]*sizeof(float));
|
||||||
|
// required for per-sequence offsets for states
|
||||||
|
GGML_ASSERT(src0->nb[2] == src0->ne[0]*src0->ne[1]*sizeof(float));
|
||||||
|
// required to get correct offset for state destination (i.e. src1->nb[2])
|
||||||
|
GGML_ASSERT(src1->nb[2] == src1->ne[0]*src1->ne[1]*sizeof(float));
|
||||||
|
|
||||||
|
// rows per thread
|
||||||
|
const int dr = (nr + nth - 1)/nth;
|
||||||
|
|
||||||
|
// row range for this thread
|
||||||
|
const int ir0 = dr*ith;
|
||||||
|
const int ir1 = MIN(ir0 + dr, nr);
|
||||||
|
const int ir = ir1 - ir0;
|
||||||
|
|
||||||
|
if (n_kv > 1) {
|
||||||
|
// it's hard to know if the source states have already been copied
|
||||||
|
// when there are multiple, so copy them already.
|
||||||
|
for (int i3 = 0; i3 < n_kv; ++i3) {
|
||||||
|
float * s0 = (float *) ((char *) src0->data + ir0*(src0->nb[1]) + i3*(src0->nb[2]));
|
||||||
|
float * s = (float *) ((char *) dst->data + ir0*(src0->nb[1]) + i3*(src0->nb[2]) + src1->nb[2]);
|
||||||
|
memcpy(s, s0, nc*ir*sizeof(float));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i2 = 0; i2 < n_t; ++i2) {
|
||||||
|
int32_t * sq = (int32_t *) ((char *) src6->data + i2*(src6->nb[1])); // {n_kv, n_tokens}
|
||||||
|
float * y = (float *) ((char *) dst->data + ir0*(src1->nb[0]) + i2*(src1->nb[1])); // {d_inner, n_tokens}
|
||||||
|
float * s = (float *) ((char *) dst->data + ir0*(src0->nb[1]) + sq[0]*(src0->nb[2]) + src1->nb[2]); // {d_state, d_inner, n_kv}
|
||||||
|
float * s0;
|
||||||
|
float * x = (float *) ((char *) src1->data + ir0*(src1->nb[0]) + i2*(src1->nb[1])); // {d_inner, n_tokens}
|
||||||
|
float * dt = (float *) ((char *) src2->data + ir0*(src2->nb[0]) + i2*(src2->nb[1])); // {d_inner, n_tokens}
|
||||||
|
float * A = (float *) ((char *) src3->data + ir0*(src3->nb[1])); // {d_state, d_inner}
|
||||||
|
float * B = (float *) ((char *) src4->data + i2*(src4->nb[1])); // {d_state, n_tokens}
|
||||||
|
float * C = (float *) ((char *) src5->data + i2*(src5->nb[1])); // {d_state, n_tokens}
|
||||||
|
|
||||||
|
GGML_ASSERT(0 <= sq[0] && sq[0] < n_kv);
|
||||||
|
|
||||||
|
// avoid needing to copy the state for the first token
|
||||||
|
if (i2 == 0) {
|
||||||
|
s0 = (float *) ((char *) src0->data + ir0*(src0->nb[1]) + sq[0]*(src0->nb[2])); // {d_state, d_inner, n_kv}
|
||||||
|
} else {
|
||||||
|
// otherwise the source is the same as the destination
|
||||||
|
s0 = s;
|
||||||
|
}
|
||||||
|
|
||||||
|
// d_inner
|
||||||
|
for (int i1 = 0; i1 < ir; ++i1) {
|
||||||
|
// ref: https://github.com/state-spaces/mamba/blob/34076d664838588a3c97727b263478ab9f621a07/mamba_ssm/ops/triton/selective_state_update.py#L78
|
||||||
|
float dt_soft_plus = dt[i1] <= 20.0f ? log1pf(expf(dt[i1])) : dt[i1];
|
||||||
|
float x_dt = x[i1] * dt_soft_plus;
|
||||||
|
float sumf = 0.0f;
|
||||||
|
// d_state
|
||||||
|
for (int i0 = 0; i0 < nc; ++i0) {
|
||||||
|
int i = i0 + i1*nc;
|
||||||
|
// state = prev_state * dA + dB * x
|
||||||
|
float state = (s0[i] * expf(dt_soft_plus * A[i])) + (B[i0] * x_dt);
|
||||||
|
// y = rowwise_dotprod(state, C)
|
||||||
|
sumf += state * C[i0];
|
||||||
|
s[i] = state;
|
||||||
|
}
|
||||||
|
y[i1] = sumf;
|
||||||
|
}
|
||||||
|
|
||||||
|
// handle copies when there are multiple output states
|
||||||
|
for (int i3 = 1; i3 < n_kv; ++i3) {
|
||||||
|
int32_t seq = sq[i3];
|
||||||
|
if (0 <= seq && seq < n_kv) {
|
||||||
|
float * s1 = s + (seq - sq[0])*nc*nr;
|
||||||
|
memcpy(s1, s, nc*ir*sizeof(float));
|
||||||
|
} else {
|
||||||
|
// stop at negative or too big seq_ids
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_compute_forward_ssm_scan(
|
||||||
|
const struct ggml_compute_params * params,
|
||||||
|
struct ggml_tensor * dst) {
|
||||||
|
switch (dst->src[0]->type) {
|
||||||
|
case GGML_TYPE_F32:
|
||||||
|
{
|
||||||
|
ggml_compute_forward_ssm_scan_f32(params, dst);
|
||||||
|
} break;
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
GGML_ASSERT(false);
|
||||||
|
} break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ggml_compute_forward_win_part
|
// ggml_compute_forward_win_part
|
||||||
|
|
||||||
static void ggml_compute_forward_win_part_f32(
|
static void ggml_compute_forward_win_part_f32(
|
||||||
|
@ -15830,6 +16187,14 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
||||||
bool masked = t != 0;
|
bool masked = t != 0;
|
||||||
ggml_compute_forward_flash_attn_back(params, masked, tensor);
|
ggml_compute_forward_flash_attn_back(params, masked, tensor);
|
||||||
} break;
|
} break;
|
||||||
|
case GGML_OP_SSM_CONV:
|
||||||
|
{
|
||||||
|
ggml_compute_forward_ssm_conv(params, tensor);
|
||||||
|
} break;
|
||||||
|
case GGML_OP_SSM_SCAN:
|
||||||
|
{
|
||||||
|
ggml_compute_forward_ssm_scan(params, tensor);
|
||||||
|
} break;
|
||||||
case GGML_OP_WIN_PART:
|
case GGML_OP_WIN_PART:
|
||||||
{
|
{
|
||||||
ggml_compute_forward_win_part(params, tensor);
|
ggml_compute_forward_win_part(params, tensor);
|
||||||
|
@ -16884,6 +17249,11 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
||||||
{
|
{
|
||||||
GGML_ASSERT(false); // not supported
|
GGML_ASSERT(false); // not supported
|
||||||
} break;
|
} break;
|
||||||
|
case GGML_OP_SSM_CONV:
|
||||||
|
case GGML_OP_SSM_SCAN:
|
||||||
|
{
|
||||||
|
GGML_ASSERT(false); // TODO: not implemented
|
||||||
|
} break;
|
||||||
case GGML_OP_WIN_PART:
|
case GGML_OP_WIN_PART:
|
||||||
case GGML_OP_WIN_UNPART:
|
case GGML_OP_WIN_UNPART:
|
||||||
case GGML_OP_UNARY:
|
case GGML_OP_UNARY:
|
||||||
|
@ -17590,6 +17960,11 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
||||||
{
|
{
|
||||||
n_tasks = n_threads;
|
n_tasks = n_threads;
|
||||||
} break;
|
} break;
|
||||||
|
case GGML_OP_SSM_CONV:
|
||||||
|
case GGML_OP_SSM_SCAN:
|
||||||
|
{
|
||||||
|
n_tasks = n_threads;
|
||||||
|
} break;
|
||||||
case GGML_OP_WIN_PART:
|
case GGML_OP_WIN_PART:
|
||||||
case GGML_OP_WIN_UNPART:
|
case GGML_OP_WIN_UNPART:
|
||||||
case GGML_OP_GET_REL_POS:
|
case GGML_OP_GET_REL_POS:
|
||||||
|
@ -19784,133 +20159,6 @@ void ggml_quantize_free(void) {
|
||||||
ggml_critical_section_end();
|
ggml_critical_section_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist) {
|
|
||||||
assert(k % QK4_0 == 0);
|
|
||||||
const int nb = k / QK4_0;
|
|
||||||
|
|
||||||
for (int b = 0; b < n; b += k) {
|
|
||||||
block_q4_0 * restrict y = (block_q4_0 *) dst + b/QK4_0;
|
|
||||||
|
|
||||||
quantize_row_q4_0_reference(src + b, y, k);
|
|
||||||
|
|
||||||
for (int i = 0; i < nb; i++) {
|
|
||||||
for (int j = 0; j < QK4_0; j += 2) {
|
|
||||||
const uint8_t vi0 = y[i].qs[j/2] & 0x0F;
|
|
||||||
const uint8_t vi1 = y[i].qs[j/2] >> 4;
|
|
||||||
|
|
||||||
hist[vi0]++;
|
|
||||||
hist[vi1]++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return (n/QK4_0*sizeof(block_q4_0));
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist) {
|
|
||||||
assert(k % QK4_1 == 0);
|
|
||||||
const int nb = k / QK4_1;
|
|
||||||
|
|
||||||
for (int b = 0; b < n; b += k) {
|
|
||||||
block_q4_1 * restrict y = (block_q4_1 *) dst + b/QK4_1;
|
|
||||||
|
|
||||||
quantize_row_q4_1_reference(src + b, y, k);
|
|
||||||
|
|
||||||
for (int i = 0; i < nb; i++) {
|
|
||||||
for (int j = 0; j < QK4_1; j += 2) {
|
|
||||||
const uint8_t vi0 = y[i].qs[j/2] & 0x0F;
|
|
||||||
const uint8_t vi1 = y[i].qs[j/2] >> 4;
|
|
||||||
|
|
||||||
hist[vi0]++;
|
|
||||||
hist[vi1]++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return (n/QK4_1*sizeof(block_q4_1));
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t * hist) {
|
|
||||||
assert(k % QK5_0 == 0);
|
|
||||||
const int nb = k / QK5_0;
|
|
||||||
|
|
||||||
for (int b = 0; b < n; b += k) {
|
|
||||||
block_q5_0 * restrict y = (block_q5_0 *)dst + b/QK5_0;
|
|
||||||
|
|
||||||
quantize_row_q5_0_reference(src + b, y, k);
|
|
||||||
|
|
||||||
for (int i = 0; i < nb; i++) {
|
|
||||||
uint32_t qh;
|
|
||||||
memcpy(&qh, &y[i].qh, sizeof(qh));
|
|
||||||
|
|
||||||
for (int j = 0; j < QK5_0; j += 2) {
|
|
||||||
const uint8_t vh0 = ((qh & (1u << (j/2 + 0 ))) >> (j/2 + 0 )) << 4;
|
|
||||||
const uint8_t vh1 = ((qh & (1u << (j/2 + 16))) >> (j/2 + 12));
|
|
||||||
|
|
||||||
// cast to 16 bins
|
|
||||||
const uint8_t vi0 = ((y[i].qs[j/2] & 0x0F) | vh0) / 2;
|
|
||||||
const uint8_t vi1 = ((y[i].qs[j/2] >> 4) | vh1) / 2;
|
|
||||||
|
|
||||||
hist[vi0]++;
|
|
||||||
hist[vi1]++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return (n/QK5_0*sizeof(block_q5_0));
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t * hist) {
|
|
||||||
assert(k % QK5_1 == 0);
|
|
||||||
const int nb = k / QK5_1;
|
|
||||||
|
|
||||||
for (int b = 0; b < n; b += k) {
|
|
||||||
block_q5_1 * restrict y = (block_q5_1 *)dst + b/QK5_1;
|
|
||||||
|
|
||||||
quantize_row_q5_1_reference(src + b, y, k);
|
|
||||||
|
|
||||||
for (int i = 0; i < nb; i++) {
|
|
||||||
uint32_t qh;
|
|
||||||
memcpy(&qh, &y[i].qh, sizeof(qh));
|
|
||||||
|
|
||||||
for (int j = 0; j < QK5_1; j += 2) {
|
|
||||||
const uint8_t vh0 = ((qh & (1u << (j/2 + 0 ))) >> (j/2 + 0 )) << 4;
|
|
||||||
const uint8_t vh1 = ((qh & (1u << (j/2 + 16))) >> (j/2 + 12));
|
|
||||||
|
|
||||||
// cast to 16 bins
|
|
||||||
const uint8_t vi0 = ((y[i].qs[j/2] & 0x0F) | vh0) / 2;
|
|
||||||
const uint8_t vi1 = ((y[i].qs[j/2] >> 4) | vh1) / 2;
|
|
||||||
|
|
||||||
hist[vi0]++;
|
|
||||||
hist[vi1]++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return (n/QK5_1*sizeof(block_q5_1));
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t * hist) {
|
|
||||||
assert(k % QK8_0 == 0);
|
|
||||||
const int nb = k / QK8_0;
|
|
||||||
|
|
||||||
for (int b = 0; b < n; b += k) {
|
|
||||||
block_q8_0 * restrict y = (block_q8_0 *)dst + b/QK8_0;
|
|
||||||
|
|
||||||
quantize_row_q8_0_reference(src + b, y, k);
|
|
||||||
|
|
||||||
for (int i = 0; i < nb; i++) {
|
|
||||||
for (int j = 0; j < QK8_0; ++j) {
|
|
||||||
const int8_t vi = y[i].qs[j];
|
|
||||||
|
|
||||||
hist[vi/16 + 8]++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return (n/QK8_0*sizeof(block_q8_0));
|
|
||||||
}
|
|
||||||
|
|
||||||
bool ggml_quantize_requires_imatrix(enum ggml_type type) {
|
bool ggml_quantize_requires_imatrix(enum ggml_type type) {
|
||||||
return
|
return
|
||||||
type == GGML_TYPE_IQ2_XXS ||
|
type == GGML_TYPE_IQ2_XXS ||
|
||||||
|
@ -19918,177 +20166,52 @@ bool ggml_quantize_requires_imatrix(enum ggml_type type) {
|
||||||
type == GGML_TYPE_IQ1_S;
|
type == GGML_TYPE_IQ1_S;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start,
|
size_t ggml_quantize_chunk(
|
||||||
int nrows, int n_per_row, int64_t * hist, const float * imatrix) {
|
enum ggml_type type,
|
||||||
|
const float * src,
|
||||||
|
void * dst,
|
||||||
|
int start,
|
||||||
|
int nrows,
|
||||||
|
int n_per_row,
|
||||||
|
const float * imatrix) {
|
||||||
|
const int n = nrows * n_per_row;
|
||||||
|
|
||||||
|
if (ggml_quantize_requires_imatrix(type)) {
|
||||||
|
GGML_ASSERT(imatrix != NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
GGML_ASSERT(start % type_traits[type].blck_size == 0);
|
||||||
|
GGML_ASSERT(start % n_per_row == 0);
|
||||||
|
|
||||||
ggml_quantize_init(type); // this is noop if already initialized
|
ggml_quantize_init(type); // this is noop if already initialized
|
||||||
|
|
||||||
|
const size_t start_row = start / n_per_row;
|
||||||
|
const size_t row_size = ggml_row_size(type, n_per_row);
|
||||||
|
|
||||||
size_t result = 0;
|
size_t result = 0;
|
||||||
int n = nrows * n_per_row;
|
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case GGML_TYPE_Q4_0:
|
case GGML_TYPE_Q4_0: result = quantize_q4_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||||
{
|
case GGML_TYPE_Q4_1: result = quantize_q4_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||||
GGML_ASSERT(start % QK4_0 == 0);
|
case GGML_TYPE_Q5_0: result = quantize_q5_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||||
GGML_ASSERT(start % n_per_row == 0);
|
case GGML_TYPE_Q5_1: result = quantize_q5_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||||
size_t start_row = start / n_per_row;
|
case GGML_TYPE_Q8_0: result = quantize_q8_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||||
size_t row_size = ggml_row_size(type, n_per_row);
|
case GGML_TYPE_Q2_K: result = quantize_q2_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||||
result = quantize_q4_0(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
case GGML_TYPE_Q3_K: result = quantize_q3_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||||
GGML_ASSERT(result == row_size * nrows);
|
case GGML_TYPE_Q4_K: result = quantize_q4_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||||
} break;
|
case GGML_TYPE_Q5_K: result = quantize_q5_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||||
case GGML_TYPE_Q4_1:
|
case GGML_TYPE_Q6_K: result = quantize_q6_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||||
{
|
case GGML_TYPE_IQ2_XXS: result = quantize_iq2_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||||
GGML_ASSERT(start % QK4_1 == 0);
|
case GGML_TYPE_IQ2_XS: result = quantize_iq2_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||||
GGML_ASSERT(start % n_per_row == 0);
|
case GGML_TYPE_IQ3_XXS: result = quantize_iq3_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||||
size_t start_row = start / n_per_row;
|
case GGML_TYPE_IQ3_S: result = quantize_iq3_s (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||||
size_t row_size = ggml_row_size(type, n_per_row);
|
case GGML_TYPE_IQ2_S: result = quantize_iq2_s (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||||
result = quantize_q4_1(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
case GGML_TYPE_IQ1_S: result = quantize_iq1_s (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||||
GGML_ASSERT(result == row_size * nrows);
|
case GGML_TYPE_IQ4_NL: result = quantize_iq4_nl (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||||
} break;
|
|
||||||
case GGML_TYPE_Q5_0:
|
|
||||||
{
|
|
||||||
GGML_ASSERT(start % QK5_0 == 0);
|
|
||||||
GGML_ASSERT(start % n_per_row == 0);
|
|
||||||
size_t start_row = start / n_per_row;
|
|
||||||
size_t row_size = ggml_row_size(type, n_per_row);
|
|
||||||
result = quantize_q5_0(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
|
||||||
GGML_ASSERT(result == row_size * nrows);
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_Q5_1:
|
|
||||||
{
|
|
||||||
GGML_ASSERT(start % QK5_1 == 0);
|
|
||||||
GGML_ASSERT(start % n_per_row == 0);
|
|
||||||
size_t start_row = start / n_per_row;
|
|
||||||
size_t row_size = ggml_row_size(type, n_per_row);
|
|
||||||
result = quantize_q5_1(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
|
||||||
GGML_ASSERT(result == row_size * nrows);
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_Q8_0:
|
|
||||||
{
|
|
||||||
GGML_ASSERT(start % QK8_0 == 0);
|
|
||||||
block_q8_0 * block = (block_q8_0*)dst + start / QK8_0;
|
|
||||||
result = ggml_quantize_q8_0(src + start, block, n, n, hist);
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_Q2_K:
|
|
||||||
{
|
|
||||||
GGML_ASSERT(start % QK_K == 0);
|
|
||||||
GGML_ASSERT(start % n_per_row == 0);
|
|
||||||
size_t start_row = start / n_per_row;
|
|
||||||
size_t row_size = ggml_row_size(type, n_per_row);
|
|
||||||
result = quantize_q2_K(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
|
||||||
GGML_ASSERT(result == row_size * nrows);
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_Q3_K:
|
|
||||||
{
|
|
||||||
GGML_ASSERT(start % QK_K == 0);
|
|
||||||
GGML_ASSERT(start % n_per_row == 0);
|
|
||||||
size_t start_row = start / n_per_row;
|
|
||||||
size_t row_size = ggml_row_size(type, n_per_row);
|
|
||||||
result = quantize_q3_K(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
|
||||||
GGML_ASSERT(result == row_size * nrows);
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_Q4_K:
|
|
||||||
{
|
|
||||||
GGML_ASSERT(start % QK_K == 0);
|
|
||||||
GGML_ASSERT(start % n_per_row == 0);
|
|
||||||
size_t start_row = start / n_per_row;
|
|
||||||
size_t row_size = ggml_row_size(type, n_per_row);
|
|
||||||
result = quantize_q4_K(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
|
||||||
GGML_ASSERT(result == row_size * nrows);
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_Q5_K:
|
|
||||||
{
|
|
||||||
GGML_ASSERT(start % QK_K == 0);
|
|
||||||
GGML_ASSERT(start % n_per_row == 0);
|
|
||||||
size_t start_row = start / n_per_row;
|
|
||||||
size_t row_size = ggml_row_size(type, n_per_row);
|
|
||||||
result = quantize_q5_K(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
|
||||||
GGML_ASSERT(result == row_size * nrows);
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_Q6_K:
|
|
||||||
{
|
|
||||||
GGML_ASSERT(start % QK_K == 0);
|
|
||||||
GGML_ASSERT(start % n_per_row == 0);
|
|
||||||
size_t start_row = start / n_per_row;
|
|
||||||
size_t row_size = ggml_row_size(type, n_per_row);
|
|
||||||
result = quantize_q6_K(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
|
||||||
GGML_ASSERT(result == row_size * nrows);
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_IQ2_XXS:
|
|
||||||
{
|
|
||||||
GGML_ASSERT(start % QK_K == 0);
|
|
||||||
GGML_ASSERT(start % n_per_row == 0);
|
|
||||||
GGML_ASSERT(imatrix);
|
|
||||||
size_t start_row = start / n_per_row;
|
|
||||||
size_t row_size = ggml_row_size(type, n_per_row);
|
|
||||||
result = quantize_iq2_xxs(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
|
||||||
GGML_ASSERT(result == row_size * nrows);
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_IQ2_XS:
|
|
||||||
{
|
|
||||||
GGML_ASSERT(start % QK_K == 0);
|
|
||||||
GGML_ASSERT(start % n_per_row == 0);
|
|
||||||
GGML_ASSERT(imatrix);
|
|
||||||
size_t start_row = start / n_per_row;
|
|
||||||
size_t row_size = ggml_row_size(type, n_per_row);
|
|
||||||
result = quantize_iq2_xs(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
|
||||||
GGML_ASSERT(result == row_size * nrows);
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_IQ3_XXS:
|
|
||||||
{
|
|
||||||
GGML_ASSERT(start % QK_K == 0);
|
|
||||||
GGML_ASSERT(start % n_per_row == 0);
|
|
||||||
size_t start_row = start / n_per_row;
|
|
||||||
size_t row_size = ggml_row_size(type, n_per_row);
|
|
||||||
result = quantize_iq3_xxs(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
|
||||||
GGML_ASSERT(result == row_size * nrows);
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_IQ3_S:
|
|
||||||
{
|
|
||||||
GGML_ASSERT(start % QK_K == 0);
|
|
||||||
GGML_ASSERT(start % n_per_row == 0);
|
|
||||||
size_t start_row = start / n_per_row;
|
|
||||||
size_t row_size = ggml_row_size(type, n_per_row);
|
|
||||||
result = quantize_iq3_s(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
|
||||||
GGML_ASSERT(result == row_size * nrows);
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_IQ2_S:
|
|
||||||
{
|
|
||||||
GGML_ASSERT(start % QK_K == 0);
|
|
||||||
GGML_ASSERT(start % n_per_row == 0);
|
|
||||||
size_t start_row = start / n_per_row;
|
|
||||||
size_t row_size = ggml_row_size(type, n_per_row);
|
|
||||||
result = quantize_iq2_s(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
|
||||||
GGML_ASSERT(result == row_size * nrows);
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_IQ1_S:
|
|
||||||
{
|
|
||||||
GGML_ASSERT(start % QK_K == 0);
|
|
||||||
GGML_ASSERT(start % n_per_row == 0);
|
|
||||||
size_t start_row = start / n_per_row;
|
|
||||||
size_t row_size = ggml_row_size(type, n_per_row);
|
|
||||||
result = quantize_iq1_s(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
|
||||||
GGML_ASSERT(result == row_size * nrows);
|
|
||||||
} break;
|
|
||||||
case GGML_TYPE_IQ4_NL:
|
|
||||||
#if QK_K == 64
|
#if QK_K == 64
|
||||||
case GGML_TYPE_IQ4_XS:
|
case GGML_TYPE_IQ4_XS: result = quantize_iq4_nl (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||||
#endif
|
#else
|
||||||
{
|
case GGML_TYPE_IQ4_XS: result = quantize_iq4_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
||||||
GGML_ASSERT(start % QK4_NL == 0);
|
|
||||||
GGML_ASSERT(start % n_per_row == 0);
|
|
||||||
size_t start_row = start / n_per_row;
|
|
||||||
size_t row_size = ggml_row_size(type, n_per_row);
|
|
||||||
result = quantize_iq4_nl(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
|
||||||
GGML_ASSERT(result == row_size * nrows);
|
|
||||||
} break;
|
|
||||||
#if QK_K != 64
|
|
||||||
case GGML_TYPE_IQ4_XS:
|
|
||||||
{
|
|
||||||
GGML_ASSERT(start % QK_K == 0);
|
|
||||||
GGML_ASSERT(start % n_per_row == 0);
|
|
||||||
size_t start_row = start / n_per_row;
|
|
||||||
size_t row_size = ggml_row_size(type, n_per_row);
|
|
||||||
result = quantize_iq4_xs(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
|
||||||
GGML_ASSERT(result == row_size * nrows);
|
|
||||||
} break;
|
|
||||||
#endif
|
#endif
|
||||||
case GGML_TYPE_F16:
|
case GGML_TYPE_F16:
|
||||||
{
|
{
|
||||||
|
@ -20105,6 +20228,9 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
|
||||||
default:
|
default:
|
||||||
assert(false);
|
assert(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GGML_ASSERT(result == nrows * row_size);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
42
ggml.h
42
ggml.h
|
@ -472,6 +472,8 @@ extern "C" {
|
||||||
GGML_OP_FLASH_ATTN,
|
GGML_OP_FLASH_ATTN,
|
||||||
GGML_OP_FLASH_FF,
|
GGML_OP_FLASH_FF,
|
||||||
GGML_OP_FLASH_ATTN_BACK,
|
GGML_OP_FLASH_ATTN_BACK,
|
||||||
|
GGML_OP_SSM_CONV,
|
||||||
|
GGML_OP_SSM_SCAN,
|
||||||
GGML_OP_WIN_PART,
|
GGML_OP_WIN_PART,
|
||||||
GGML_OP_WIN_UNPART,
|
GGML_OP_WIN_UNPART,
|
||||||
GGML_OP_GET_REL_POS,
|
GGML_OP_GET_REL_POS,
|
||||||
|
@ -1728,6 +1730,23 @@ extern "C" {
|
||||||
struct ggml_tensor * c0,
|
struct ggml_tensor * c0,
|
||||||
struct ggml_tensor * c1);
|
struct ggml_tensor * c1);
|
||||||
|
|
||||||
|
GGML_API struct ggml_tensor * ggml_ssm_conv(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * s,
|
||||||
|
struct ggml_tensor * x,
|
||||||
|
struct ggml_tensor * c,
|
||||||
|
struct ggml_tensor * sq);
|
||||||
|
|
||||||
|
GGML_API struct ggml_tensor * ggml_ssm_scan(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * s,
|
||||||
|
struct ggml_tensor * x,
|
||||||
|
struct ggml_tensor * dt,
|
||||||
|
struct ggml_tensor * A,
|
||||||
|
struct ggml_tensor * B,
|
||||||
|
struct ggml_tensor * C,
|
||||||
|
struct ggml_tensor * sq);
|
||||||
|
|
||||||
// partition into non-overlapping windows with padding if needed
|
// partition into non-overlapping windows with padding if needed
|
||||||
// example:
|
// example:
|
||||||
// a: 768 64 64 1
|
// a: 768 64 64 1
|
||||||
|
@ -2175,25 +2194,18 @@ extern "C" {
|
||||||
GGML_API void ggml_quantize_init(enum ggml_type type);
|
GGML_API void ggml_quantize_init(enum ggml_type type);
|
||||||
GGML_API void ggml_quantize_free(void);
|
GGML_API void ggml_quantize_free(void);
|
||||||
|
|
||||||
// TODO: these would probably get removed in favor of the more general ggml_quantize_chunk
|
|
||||||
GGML_API size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist);
|
|
||||||
GGML_API size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist);
|
|
||||||
GGML_API size_t ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t * hist);
|
|
||||||
GGML_API size_t ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t * hist);
|
|
||||||
GGML_API size_t ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t * hist);
|
|
||||||
|
|
||||||
GGML_API size_t ggml_quantize_q2_K(const float * src, void * dst, int n, int k, int64_t * hist);
|
|
||||||
GGML_API size_t ggml_quantize_q3_K(const float * src, void * dst, int n, int k, int64_t * hist);
|
|
||||||
GGML_API size_t ggml_quantize_q4_K(const float * src, void * dst, int n, int k, int64_t * hist);
|
|
||||||
GGML_API size_t ggml_quantize_q5_K(const float * src, void * dst, int n, int k, int64_t * hist);
|
|
||||||
GGML_API size_t ggml_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist);
|
|
||||||
|
|
||||||
// some quantization type cannot be used without an importance matrix
|
// some quantization type cannot be used without an importance matrix
|
||||||
GGML_API bool ggml_quantize_requires_imatrix(enum ggml_type type);
|
GGML_API bool ggml_quantize_requires_imatrix(enum ggml_type type);
|
||||||
|
|
||||||
// calls ggml_quantize_init internally (i.e. can allocate memory)
|
// calls ggml_quantize_init internally (i.e. can allocate memory)
|
||||||
GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst,
|
GGML_API size_t ggml_quantize_chunk(
|
||||||
int start, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
enum ggml_type type,
|
||||||
|
const float * src,
|
||||||
|
void * dst,
|
||||||
|
int start,
|
||||||
|
int nrows,
|
||||||
|
int n_per_row,
|
||||||
|
const float * imatrix);
|
||||||
|
|
||||||
//
|
//
|
||||||
// gguf
|
// gguf
|
||||||
|
|
|
@ -61,6 +61,12 @@ class Keys:
|
||||||
SCALING_ORIG_CTX_LEN = "{arch}.rope.scaling.original_context_length"
|
SCALING_ORIG_CTX_LEN = "{arch}.rope.scaling.original_context_length"
|
||||||
SCALING_FINETUNED = "{arch}.rope.scaling.finetuned"
|
SCALING_FINETUNED = "{arch}.rope.scaling.finetuned"
|
||||||
|
|
||||||
|
class SSM:
|
||||||
|
CONV_KERNEL = "{arch}.ssm.conv_kernel"
|
||||||
|
INNER_SIZE = "{arch}.ssm.inner_size"
|
||||||
|
STATE_SIZE = "{arch}.ssm.state_size"
|
||||||
|
TIME_STEP_RANK = "{arch}.ssm.time_step_rank"
|
||||||
|
|
||||||
class Tokenizer:
|
class Tokenizer:
|
||||||
MODEL = "tokenizer.ggml.model"
|
MODEL = "tokenizer.ggml.model"
|
||||||
LIST = "tokenizer.ggml.tokens"
|
LIST = "tokenizer.ggml.tokens"
|
||||||
|
@ -113,6 +119,7 @@ class MODEL_ARCH(IntEnum):
|
||||||
MINICPM = auto()
|
MINICPM = auto()
|
||||||
GEMMA = auto()
|
GEMMA = auto()
|
||||||
STARCODER2 = auto()
|
STARCODER2 = auto()
|
||||||
|
MAMBA = auto()
|
||||||
|
|
||||||
|
|
||||||
class MODEL_TENSOR(IntEnum):
|
class MODEL_TENSOR(IntEnum):
|
||||||
|
@ -144,6 +151,13 @@ class MODEL_TENSOR(IntEnum):
|
||||||
ATTN_Q_NORM = auto()
|
ATTN_Q_NORM = auto()
|
||||||
ATTN_K_NORM = auto()
|
ATTN_K_NORM = auto()
|
||||||
LAYER_OUT_NORM = auto()
|
LAYER_OUT_NORM = auto()
|
||||||
|
SSM_IN = auto()
|
||||||
|
SSM_CONV1D = auto()
|
||||||
|
SSM_X = auto()
|
||||||
|
SSM_DT = auto()
|
||||||
|
SSM_A = auto()
|
||||||
|
SSM_D = auto()
|
||||||
|
SSM_OUT = auto()
|
||||||
|
|
||||||
|
|
||||||
MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
||||||
|
@ -171,6 +185,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
||||||
MODEL_ARCH.MINICPM: "minicpm",
|
MODEL_ARCH.MINICPM: "minicpm",
|
||||||
MODEL_ARCH.GEMMA: "gemma",
|
MODEL_ARCH.GEMMA: "gemma",
|
||||||
MODEL_ARCH.STARCODER2: "starcoder2",
|
MODEL_ARCH.STARCODER2: "starcoder2",
|
||||||
|
MODEL_ARCH.MAMBA: "mamba",
|
||||||
}
|
}
|
||||||
|
|
||||||
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
||||||
|
@ -202,6 +217,13 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
||||||
MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down.{xid}",
|
MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down.{xid}",
|
||||||
MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up.{xid}",
|
MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up.{xid}",
|
||||||
MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
|
MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
|
||||||
|
MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
|
||||||
|
MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
|
||||||
|
MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
|
||||||
|
MODEL_TENSOR.SSM_DT: "blk.{bid}.ssm_dt",
|
||||||
|
MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a",
|
||||||
|
MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
|
||||||
|
MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
|
||||||
}
|
}
|
||||||
|
|
||||||
MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||||
|
@ -543,6 +565,19 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||||
MODEL_TENSOR.FFN_DOWN,
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
MODEL_TENSOR.FFN_UP,
|
MODEL_TENSOR.FFN_UP,
|
||||||
],
|
],
|
||||||
|
MODEL_ARCH.MAMBA: [
|
||||||
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
MODEL_TENSOR.OUTPUT,
|
||||||
|
MODEL_TENSOR.ATTN_NORM,
|
||||||
|
MODEL_TENSOR.SSM_IN,
|
||||||
|
MODEL_TENSOR.SSM_CONV1D,
|
||||||
|
MODEL_TENSOR.SSM_X,
|
||||||
|
MODEL_TENSOR.SSM_DT,
|
||||||
|
MODEL_TENSOR.SSM_A,
|
||||||
|
MODEL_TENSOR.SSM_D,
|
||||||
|
MODEL_TENSOR.SSM_OUT,
|
||||||
|
],
|
||||||
# TODO
|
# TODO
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -734,6 +769,12 @@ KEY_ROPE_SCALING_FACTOR = Keys.Rope.SCALING_FACTOR
|
||||||
KEY_ROPE_SCALING_ORIG_CTX_LEN = Keys.Rope.SCALING_ORIG_CTX_LEN
|
KEY_ROPE_SCALING_ORIG_CTX_LEN = Keys.Rope.SCALING_ORIG_CTX_LEN
|
||||||
KEY_ROPE_SCALING_FINETUNED = Keys.Rope.SCALING_FINETUNED
|
KEY_ROPE_SCALING_FINETUNED = Keys.Rope.SCALING_FINETUNED
|
||||||
|
|
||||||
|
# SSM
|
||||||
|
KEY_SSM_CONV_KERNEL = Keys.SSM.CONV_KERNEL
|
||||||
|
KEY_SSM_INNER_SIZE = Keys.SSM.INNER_SIZE
|
||||||
|
KEY_SSM_STATE_SIZE = Keys.SSM.STATE_SIZE
|
||||||
|
KEY_SSM_TIME_STEP_RANK = Keys.SSM.TIME_STEP_RANK
|
||||||
|
|
||||||
# tokenization
|
# tokenization
|
||||||
KEY_TOKENIZER_MODEL = Keys.Tokenizer.MODEL
|
KEY_TOKENIZER_MODEL = Keys.Tokenizer.MODEL
|
||||||
KEY_TOKENIZER_LIST = Keys.Tokenizer.LIST
|
KEY_TOKENIZER_LIST = Keys.Tokenizer.LIST
|
||||||
|
|
|
@ -382,6 +382,18 @@ class GGUFWriter:
|
||||||
def add_rope_scaling_finetuned(self, value: bool) -> None:
|
def add_rope_scaling_finetuned(self, value: bool) -> None:
|
||||||
self.add_bool(Keys.Rope.SCALING_FINETUNED.format(arch=self.arch), value)
|
self.add_bool(Keys.Rope.SCALING_FINETUNED.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
def add_ssm_conv_kernel(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.SSM.CONV_KERNEL.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
def add_ssm_inner_size(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.SSM.INNER_SIZE.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
def add_ssm_state_size(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.SSM.STATE_SIZE.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
def add_ssm_time_step_rank(self, value: int) -> None:
|
||||||
|
self.add_uint32(Keys.SSM.TIME_STEP_RANK.format(arch=self.arch), value)
|
||||||
|
|
||||||
def add_tokenizer_model(self, model: str) -> None:
|
def add_tokenizer_model(self, model: str) -> None:
|
||||||
self.add_string(Keys.Tokenizer.MODEL, model)
|
self.add_string(Keys.Tokenizer.MODEL, model)
|
||||||
|
|
||||||
|
|
|
@ -20,6 +20,9 @@ class TensorNameMap:
|
||||||
"wte", # gpt2
|
"wte", # gpt2
|
||||||
"transformer.embd.wte", # phi2
|
"transformer.embd.wte", # phi2
|
||||||
"model.tok_embeddings", # internlm2
|
"model.tok_embeddings", # internlm2
|
||||||
|
"model.embedding", # mamba-qbert
|
||||||
|
"backbone.embedding", # mamba
|
||||||
|
"backbone.embeddings", # mamba-hf
|
||||||
),
|
),
|
||||||
|
|
||||||
# Token type embeddings
|
# Token type embeddings
|
||||||
|
@ -44,7 +47,7 @@ class TensorNameMap:
|
||||||
# Output
|
# Output
|
||||||
MODEL_TENSOR.OUTPUT: (
|
MODEL_TENSOR.OUTPUT: (
|
||||||
"embed_out", # gptneox
|
"embed_out", # gptneox
|
||||||
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen
|
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba
|
||||||
"output", # llama-pth bloom internlm2
|
"output", # llama-pth bloom internlm2
|
||||||
"word_embeddings_for_head", # persimmon
|
"word_embeddings_for_head", # persimmon
|
||||||
"lm_head.linear", # phi2
|
"lm_head.linear", # phi2
|
||||||
|
@ -61,6 +64,8 @@ class TensorNameMap:
|
||||||
"language_model.encoder.final_layernorm", # persimmon
|
"language_model.encoder.final_layernorm", # persimmon
|
||||||
"model.final_layernorm", # persimmon
|
"model.final_layernorm", # persimmon
|
||||||
"lm_head.ln", # phi2
|
"lm_head.ln", # phi2
|
||||||
|
"model.norm_f", # mamba-qbert
|
||||||
|
"backbone.norm_f", # mamba
|
||||||
),
|
),
|
||||||
|
|
||||||
# Rope frequencies
|
# Rope frequencies
|
||||||
|
@ -86,6 +91,8 @@ class TensorNameMap:
|
||||||
"transformer.h.{bid}.ln", # phi2
|
"transformer.h.{bid}.ln", # phi2
|
||||||
"model.layers.layers.{bid}.norm", # plamo
|
"model.layers.layers.{bid}.norm", # plamo
|
||||||
"model.layers.{bid}.attention_norm", # internlm2
|
"model.layers.{bid}.attention_norm", # internlm2
|
||||||
|
"model.layers.{bid}.norm", # mamba-qbert
|
||||||
|
"backbone.layers.{bid}.norm", # mamba
|
||||||
),
|
),
|
||||||
|
|
||||||
# Attention norm 2
|
# Attention norm 2
|
||||||
|
@ -282,7 +289,42 @@ class TensorNameMap:
|
||||||
MODEL_TENSOR.LAYER_OUT_NORM: (
|
MODEL_TENSOR.LAYER_OUT_NORM: (
|
||||||
"encoder.layer.{bid}.output.LayerNorm", # bert
|
"encoder.layer.{bid}.output.LayerNorm", # bert
|
||||||
"encoder.layers.{bid}.norm2", # nomic-bert
|
"encoder.layers.{bid}.norm2", # nomic-bert
|
||||||
)
|
),
|
||||||
|
|
||||||
|
MODEL_TENSOR.SSM_IN: (
|
||||||
|
"model.layers.{bid}.in_proj",
|
||||||
|
"backbone.layers.{bid}.mixer.in_proj",
|
||||||
|
),
|
||||||
|
|
||||||
|
MODEL_TENSOR.SSM_CONV1D: (
|
||||||
|
"model.layers.{bid}.conv1d",
|
||||||
|
"backbone.layers.{bid}.mixer.conv1d",
|
||||||
|
),
|
||||||
|
|
||||||
|
MODEL_TENSOR.SSM_X: (
|
||||||
|
"model.layers.{bid}.x_proj",
|
||||||
|
"backbone.layers.{bid}.mixer.x_proj",
|
||||||
|
),
|
||||||
|
|
||||||
|
MODEL_TENSOR.SSM_DT: (
|
||||||
|
"model.layers.{bid}.dt_proj",
|
||||||
|
"backbone.layers.{bid}.mixer.dt_proj",
|
||||||
|
),
|
||||||
|
|
||||||
|
MODEL_TENSOR.SSM_A: (
|
||||||
|
"model.layers.{bid}.A_log",
|
||||||
|
"backbone.layers.{bid}.mixer.A_log",
|
||||||
|
),
|
||||||
|
|
||||||
|
MODEL_TENSOR.SSM_D: (
|
||||||
|
"model.layers.{bid}.D",
|
||||||
|
"backbone.layers.{bid}.mixer.D",
|
||||||
|
),
|
||||||
|
|
||||||
|
MODEL_TENSOR.SSM_OUT: (
|
||||||
|
"model.layers.{bid}.out_proj",
|
||||||
|
"backbone.layers.{bid}.mixer.out_proj",
|
||||||
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
mapping: dict[str, tuple[MODEL_TENSOR, str]]
|
mapping: dict[str, tuple[MODEL_TENSOR, str]]
|
||||||
|
|
4
llama.h
4
llama.h
|
@ -235,6 +235,7 @@ extern "C" {
|
||||||
uint32_t seed; // RNG seed, -1 for random
|
uint32_t seed; // RNG seed, -1 for random
|
||||||
uint32_t n_ctx; // text context, 0 = from model
|
uint32_t n_ctx; // text context, 0 = from model
|
||||||
uint32_t n_batch; // prompt processing maximum batch size
|
uint32_t n_batch; // prompt processing maximum batch size
|
||||||
|
uint32_t n_parallel; // number of parallel sequences (i.e. distinct states for recurrent models)
|
||||||
uint32_t n_threads; // number of threads to use for generation
|
uint32_t n_threads; // number of threads to use for generation
|
||||||
uint32_t n_threads_batch; // number of threads to use for batch processing
|
uint32_t n_threads_batch; // number of threads to use for batch processing
|
||||||
|
|
||||||
|
@ -376,6 +377,7 @@ extern "C" {
|
||||||
|
|
||||||
LLAMA_API uint32_t llama_n_ctx (const struct llama_context * ctx);
|
LLAMA_API uint32_t llama_n_ctx (const struct llama_context * ctx);
|
||||||
LLAMA_API uint32_t llama_n_batch (const struct llama_context * ctx);
|
LLAMA_API uint32_t llama_n_batch (const struct llama_context * ctx);
|
||||||
|
LLAMA_API uint32_t llama_n_max_seq (const struct llama_context * ctx);
|
||||||
|
|
||||||
LLAMA_API enum llama_vocab_type llama_vocab_type(const struct llama_model * model);
|
LLAMA_API enum llama_vocab_type llama_vocab_type(const struct llama_model * model);
|
||||||
LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model * model);
|
LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model * model);
|
||||||
|
@ -502,7 +504,7 @@ extern "C" {
|
||||||
// seq_id < 0 : match any sequence
|
// seq_id < 0 : match any sequence
|
||||||
// p0 < 0 : [0, p1]
|
// p0 < 0 : [0, p1]
|
||||||
// p1 < 0 : [p0, inf)
|
// p1 < 0 : [p0, inf)
|
||||||
LLAMA_API void llama_kv_cache_seq_rm(
|
LLAMA_API bool llama_kv_cache_seq_rm(
|
||||||
struct llama_context * ctx,
|
struct llama_context * ctx,
|
||||||
llama_seq_id seq_id,
|
llama_seq_id seq_id,
|
||||||
llama_pos p0,
|
llama_pos p0,
|
||||||
|
|
|
@ -94,6 +94,7 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
|
||||||
# src/ggml-alloc.c -> ggml-alloc.c
|
# src/ggml-alloc.c -> ggml-alloc.c
|
||||||
# src/ggml-backend-impl.h -> ggml-backend-impl.h
|
# src/ggml-backend-impl.h -> ggml-backend-impl.h
|
||||||
# src/ggml-backend.c -> ggml-backend.c
|
# src/ggml-backend.c -> ggml-backend.c
|
||||||
|
# src/ggml-common.h -> ggml-common.h
|
||||||
# src/ggml-cuda.cu -> ggml-cuda.cu
|
# src/ggml-cuda.cu -> ggml-cuda.cu
|
||||||
# src/ggml-cuda.h -> ggml-cuda.h
|
# src/ggml-cuda.h -> ggml-cuda.h
|
||||||
# src/ggml-impl.h -> ggml-impl.h
|
# src/ggml-impl.h -> ggml-impl.h
|
||||||
|
@ -126,6 +127,7 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
|
||||||
-e 's/src\/ggml-alloc\.c/ggml-alloc.c/g' \
|
-e 's/src\/ggml-alloc\.c/ggml-alloc.c/g' \
|
||||||
-e 's/src\/ggml-backend-impl\.h/ggml-backend-impl.h/g' \
|
-e 's/src\/ggml-backend-impl\.h/ggml-backend-impl.h/g' \
|
||||||
-e 's/src\/ggml-backend\.c/ggml-backend.c/g' \
|
-e 's/src\/ggml-backend\.c/ggml-backend.c/g' \
|
||||||
|
-e 's/src\/ggml-common\.h/ggml-common.h/g' \
|
||||||
-e 's/src\/ggml-cuda\.cu/ggml-cuda.cu/g' \
|
-e 's/src\/ggml-cuda\.cu/ggml-cuda.cu/g' \
|
||||||
-e 's/src\/ggml-cuda\.h/ggml-cuda.h/g' \
|
-e 's/src\/ggml-cuda\.h/ggml-cuda.h/g' \
|
||||||
-e 's/src\/ggml-impl\.h/ggml-impl.h/g' \
|
-e 's/src\/ggml-impl\.h/ggml-impl.h/g' \
|
||||||
|
|
|
@ -4,6 +4,7 @@ cp -rpv ../ggml/src/ggml.c ./ggml.c
|
||||||
cp -rpv ../ggml/src/ggml-alloc.c ./ggml-alloc.c
|
cp -rpv ../ggml/src/ggml-alloc.c ./ggml-alloc.c
|
||||||
cp -rpv ../ggml/src/ggml-backend-impl.h ./ggml-backend-impl.h
|
cp -rpv ../ggml/src/ggml-backend-impl.h ./ggml-backend-impl.h
|
||||||
cp -rpv ../ggml/src/ggml-backend.c ./ggml-backend.c
|
cp -rpv ../ggml/src/ggml-backend.c ./ggml-backend.c
|
||||||
|
cp -rpv ../ggml/src/ggml-common.h ./ggml-common.h
|
||||||
cp -rpv ../ggml/src/ggml-cuda.cu ./ggml-cuda.cu
|
cp -rpv ../ggml/src/ggml-cuda.cu ./ggml-cuda.cu
|
||||||
cp -rpv ../ggml/src/ggml-cuda.h ./ggml-cuda.h
|
cp -rpv ../ggml/src/ggml-cuda.h ./ggml-cuda.h
|
||||||
cp -rpv ../ggml/src/ggml-impl.h ./ggml-impl.h
|
cp -rpv ../ggml/src/ggml-impl.h ./ggml-impl.h
|
||||||
|
|
1
tests/.gitignore
vendored
1
tests/.gitignore
vendored
|
@ -1,3 +1,4 @@
|
||||||
*
|
*
|
||||||
!*.*
|
!*.*
|
||||||
*.o
|
*.o
|
||||||
|
ggml-common.h
|
||||||
|
|
|
@ -53,7 +53,6 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
|
||||||
} else if (ggml_is_quantized(tensor->type) || tensor->type == GGML_TYPE_F16) {
|
} else if (ggml_is_quantized(tensor->type) || tensor->type == GGML_TYPE_F16) {
|
||||||
GGML_ASSERT(size % ggml_blck_size(tensor->type) == 0);
|
GGML_ASSERT(size % ggml_blck_size(tensor->type) == 0);
|
||||||
std::vector<uint8_t> dataq(ggml_row_size(tensor->type, size));
|
std::vector<uint8_t> dataq(ggml_row_size(tensor->type, size));
|
||||||
int64_t hist[16];
|
|
||||||
std::vector<float> imatrix(tensor->ne[0], 1.0f); // dummy importance matrix
|
std::vector<float> imatrix(tensor->ne[0], 1.0f); // dummy importance matrix
|
||||||
const float * im = imatrix.data();
|
const float * im = imatrix.data();
|
||||||
if (!ggml_quantize_requires_imatrix(tensor->type)) {
|
if (!ggml_quantize_requires_imatrix(tensor->type)) {
|
||||||
|
@ -63,7 +62,7 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
|
||||||
im = nullptr;
|
im = nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, size/tensor->ne[0], tensor->ne[0], hist, im);
|
ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, size/tensor->ne[0], tensor->ne[0], im);
|
||||||
ggml_backend_tensor_set(tensor, dataq.data(), 0, dataq.size());
|
ggml_backend_tensor_set(tensor, dataq.data(), 0, dataq.size());
|
||||||
} else if (tensor->type == GGML_TYPE_I8 || tensor->type == GGML_TYPE_I16 || tensor->type == GGML_TYPE_I32) {
|
} else if (tensor->type == GGML_TYPE_I8 || tensor->type == GGML_TYPE_I16 || tensor->type == GGML_TYPE_I32) {
|
||||||
// This is going to create some weird integers though.
|
// This is going to create some weird integers though.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue