Merge branch 'master' into gg/flash-attn
This commit is contained in:
commit
1f77f49787
15 changed files with 1422 additions and 65 deletions
|
@ -1117,7 +1117,9 @@ docker run --gpus all -v /path/to/models:/models local/llama.cpp:server-cuda -m
|
|||
- Clean-up any trailing whitespaces, use 4 spaces for indentation, brackets on the same line, `void * ptr`, `int & a`
|
||||
- See [good first issues](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) for tasks suitable for first contributions
|
||||
- Tensors store data in row-major order. We refer to dimension 0 as columns, 1 as rows, 2 as matrices
|
||||
- Matrix multiplication is unconventional: [`z = ggml_mul_mat(ctx, x, y)`](https://github.com/ggerganov/llama.cpp/blob/880e352277fc017df4d5794f0c21c44e1eae2b84/ggml.h#L1058-L1064) means `zT = x @ yT`
|
||||
- Matrix multiplication is unconventional: [`C = ggml_mul_mat(ctx, A, B)`](https://github.com/ggerganov/llama.cpp/blob/880e352277fc017df4d5794f0c21c44e1eae2b84/ggml.h#L1058-L1064) means $C^T = A B^T \Leftrightarrow C = B A^T.$
|
||||
|
||||

|
||||
|
||||
### Docs
|
||||
|
||||
|
|
|
@ -161,6 +161,7 @@ function gg_run_test_scripts_debug {
|
|||
set -e
|
||||
|
||||
(cd ./examples/gguf-split && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
|
||||
(cd ./examples/quantize && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
|
||||
|
||||
set +e
|
||||
}
|
||||
|
@ -184,6 +185,7 @@ function gg_run_test_scripts_release {
|
|||
set -e
|
||||
|
||||
(cd ./examples/gguf-split && time bash tests.sh "$SRC/build-ci-release/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
|
||||
(cd ./examples/quantize && time bash tests.sh "$SRC/build-ci-release/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
|
||||
|
||||
set +e
|
||||
}
|
||||
|
|
|
@ -86,8 +86,8 @@ struct gpt_params {
|
|||
|
||||
ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED;
|
||||
|
||||
llama_rope_scaling_type rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
|
||||
llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED; // pooling type for embeddings
|
||||
enum llama_rope_scaling_type rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
|
||||
enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED; // pooling type for embeddings
|
||||
|
||||
// // sampling parameters
|
||||
struct llama_sampling_params sparams;
|
||||
|
|
16
examples/gguf-split/tests.sh
Normal file → Executable file
16
examples/gguf-split/tests.sh
Normal file → Executable file
|
@ -4,16 +4,16 @@ set -eu
|
|||
|
||||
if [ $# -lt 1 ]
|
||||
then
|
||||
echo "usage: $0 path_to_build_binary [path_to_temp_folder]"
|
||||
echo "example: $0 ../../build/bin ../../tmp"
|
||||
exit 1
|
||||
echo "usage: $0 path_to_build_binary [path_to_temp_folder]"
|
||||
echo "example: $0 ../../build/bin ../../tmp"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ $# -gt 1 ]
|
||||
then
|
||||
TMP_DIR=$2
|
||||
TMP_DIR=$2
|
||||
else
|
||||
TMP_DIR=/tmp
|
||||
TMP_DIR=/tmp
|
||||
fi
|
||||
|
||||
set -x
|
||||
|
@ -21,7 +21,7 @@ set -x
|
|||
SPLIT=$1/gguf-split
|
||||
MAIN=$1/main
|
||||
WORK_PATH=$TMP_DIR/gguf-split
|
||||
CUR_DIR=$(pwd)
|
||||
ROOT_DIR=$(realpath $(dirname $0)/../../)
|
||||
|
||||
mkdir -p "$WORK_PATH"
|
||||
|
||||
|
@ -30,8 +30,8 @@ rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-merge*.gguf
|
|||
|
||||
# 1. Get a model
|
||||
(
|
||||
cd $WORK_PATH
|
||||
"$CUR_DIR"/../../scripts/hf.sh --repo ggml-org/gemma-1.1-2b-it-Q8_0-GGUF --file gemma-1.1-2b-it.Q8_0.gguf
|
||||
cd $WORK_PATH
|
||||
"$ROOT_DIR"/scripts/hf.sh --repo ggml-org/gemma-1.1-2b-it-Q8_0-GGUF --file gemma-1.1-2b-it.Q8_0.gguf
|
||||
)
|
||||
echo PASS
|
||||
|
||||
|
|
|
@ -1325,7 +1325,7 @@ bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length
|
|||
}
|
||||
|
||||
// Linear interpolation between two points
|
||||
inline float lerp(float s, float e, float t) {
|
||||
inline float clip_lerp(float s, float e, float t) {
|
||||
return s + (e - s) * t;
|
||||
}
|
||||
// Bilinear resize function
|
||||
|
@ -1347,17 +1347,17 @@ static void bilinear_resize(const clip_image_u8& src, clip_image_u8& dst, int ta
|
|||
float y_lerp = py - y_floor;
|
||||
|
||||
for (int c = 0; c < 3; c++) {
|
||||
float top = lerp(
|
||||
float top = clip_lerp(
|
||||
static_cast<float>(src.buf[3 * (y_floor * src.nx + x_floor) + c]),
|
||||
static_cast<float>(src.buf[3 * (y_floor * src.nx + (x_floor + 1)) + c]),
|
||||
x_lerp
|
||||
);
|
||||
float bottom = lerp(
|
||||
float bottom = clip_lerp(
|
||||
static_cast<float>(src.buf[3 * ((y_floor + 1) * src.nx + x_floor) + c]),
|
||||
static_cast<float>(src.buf[3 * ((y_floor + 1) * src.nx + (x_floor + 1)) + c]),
|
||||
x_lerp
|
||||
);
|
||||
dst.buf[3 * (y * target_width + x) + c] = static_cast<uint8_t>(lerp(top, bottom, y_lerp));
|
||||
dst.buf[3 * (y * target_width + x) + c] = static_cast<uint8_t>(clip_lerp(top, bottom, y_lerp));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -97,6 +97,7 @@ static void usage(const char * executable) {
|
|||
printf(" --exclude-weights tensor_name: use importance matrix for this/these tensor(s)\n");
|
||||
printf(" --output-tensor-type ggml_type: use this ggml_type for the output.weight tensor\n");
|
||||
printf(" --token-embedding-type ggml_type: use this ggml_type for the token embeddings tensor\n");
|
||||
printf(" --keep-split: will generate quatized model in the same shards as input");
|
||||
printf(" --override-kv KEY=TYPE:VALUE\n");
|
||||
printf(" Advanced option to override model metadata by key in the quantized model. May be specified multiple times.\n");
|
||||
printf("Note: --include-weights and --exclude-weights cannot be used together\n");
|
||||
|
@ -300,6 +301,8 @@ int main(int argc, char ** argv) {
|
|||
} else {
|
||||
usage(argv[0]);
|
||||
}
|
||||
} else if (strcmp(argv[arg_idx], "--keep-split")) {
|
||||
params.keep_split = true;
|
||||
} else {
|
||||
usage(argv[0]);
|
||||
}
|
||||
|
@ -332,20 +335,28 @@ int main(int argc, char ** argv) {
|
|||
std::string fname_out;
|
||||
|
||||
std::string ftype_str;
|
||||
std::string suffix = ".gguf";
|
||||
if (try_parse_ftype(argv[arg_idx], params.ftype, ftype_str)) {
|
||||
std::string fpath;
|
||||
const size_t pos = fname_inp.find_last_of("/\\");
|
||||
if (pos != std::string::npos) {
|
||||
fpath = fname_inp.substr(0, pos + 1);
|
||||
}
|
||||
// export as [inp path]/ggml-model-[ftype].gguf
|
||||
fname_out = fpath + "ggml-model-" + ftype_str + ".gguf";
|
||||
|
||||
// export as [inp path]/ggml-model-[ftype]. Only add extension if there is no splitting
|
||||
fname_out = fpath + "ggml-model-" + ftype_str;
|
||||
if (!params.keep_split) {
|
||||
fname_out += suffix;
|
||||
}
|
||||
arg_idx++;
|
||||
if (ftype_str == "COPY") {
|
||||
params.only_copy = true;
|
||||
}
|
||||
} else {
|
||||
fname_out = argv[arg_idx];
|
||||
if (params.keep_split && fname_out.find(suffix) != std::string::npos) {
|
||||
fname_out = fname_out.substr(0, fname_out.length() - suffix.length());
|
||||
}
|
||||
arg_idx++;
|
||||
|
||||
if (argc <= arg_idx) {
|
||||
|
|
65
examples/quantize/tests.sh
Normal file
65
examples/quantize/tests.sh
Normal file
|
@ -0,0 +1,65 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -eu
|
||||
|
||||
if [ $# -lt 1 ]
|
||||
then
|
||||
echo "usage: $0 path_to_build_binary [path_to_temp_folder]"
|
||||
echo "example: $0 ../../build/bin ../../tmp"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ $# -gt 1 ]
|
||||
then
|
||||
TMP_DIR=$2
|
||||
else
|
||||
TMP_DIR=/tmp
|
||||
fi
|
||||
|
||||
set -x
|
||||
|
||||
SPLIT=$1/gguf-split
|
||||
QUANTIZE=$1/quantize
|
||||
MAIN=$1/main
|
||||
WORK_PATH=$TMP_DIR/quantize
|
||||
ROOT_DIR=$(realpath $(dirname $0)/../../)
|
||||
|
||||
mkdir -p "$WORK_PATH"
|
||||
|
||||
# Clean up in case of previously failed test
|
||||
rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-requant*.gguf
|
||||
|
||||
# 1. Get a model
|
||||
(
|
||||
cd $WORK_PATH
|
||||
"$ROOT_DIR"/scripts/hf.sh --repo ggml-org/gemma-1.1-2b-it-Q8_0-GGUF --file gemma-1.1-2b-it.Q8_0.gguf
|
||||
)
|
||||
echo PASS
|
||||
|
||||
# 2. Split model
|
||||
$SPLIT --split-max-tensors 28 $WORK_PATH/gemma-1.1-2b-it.Q8_0.gguf $WORK_PATH/ggml-model-split
|
||||
echo PASS
|
||||
echo
|
||||
|
||||
# 3. Requant model with '--keep_split'
|
||||
$QUANTIZE --allow-requantize --keep_split $WORK_PATH/ggml-model-split-00001-of-00006.gguf $WORK_PATH/ggml-model-requant.gguf Q4_K
|
||||
echo PASS
|
||||
echo
|
||||
|
||||
# 3a. Test the requanted model is loading properly
|
||||
$MAIN --model $WORK_PATH/ggml-model-requant-00001-of-00006.gguf --random-prompt --n-predict 32
|
||||
echo PASS
|
||||
echo
|
||||
|
||||
# 4. Requant mode without '--keep_split'
|
||||
$QUANTIZE --allow-requantize $WORK_PATH/ggml-model-split-00001-of-00006.gguf $WORK_PATH/ggml-model-requant-merge.gguf Q4_K
|
||||
echo PASS
|
||||
echo
|
||||
|
||||
# 4b. Test the requanted model is loading properly
|
||||
$MAIN --model $WORK_PATH/ggml-model-requant-merge.gguf --random-prompt --n-predict 32
|
||||
echo PASS
|
||||
echo
|
||||
|
||||
# Clean up
|
||||
rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-requant*.gguf
|
|
@ -4,9 +4,8 @@ set -eu
|
|||
|
||||
if [ $# -lt 1 ]
|
||||
then
|
||||
# Start @llama.cpp scenario
|
||||
behave --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
|
||||
# Start @llama.cpp scenario
|
||||
behave --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
|
||||
else
|
||||
behave "$@"
|
||||
behave "$@"
|
||||
fi
|
||||
|
||||
|
|
|
@ -11,6 +11,12 @@
|
|||
#include <string.h> // memcpy
|
||||
#include <math.h> // fabsf
|
||||
|
||||
#undef MIN
|
||||
#undef MAX
|
||||
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
|
|
@ -14,12 +14,6 @@
|
|||
#include <stdlib.h> // for qsort
|
||||
#include <stdio.h> // for GGML_ASSERT
|
||||
|
||||
#undef MIN
|
||||
#undef MAX
|
||||
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
#define UNUSED GGML_UNUSED
|
||||
|
||||
// some compilers don't provide _mm256_set_m128i, e.g. gcc 7
|
||||
|
|
12
ggml.c
12
ggml.c
|
@ -858,18 +858,6 @@ ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
|
|||
// simd mappings
|
||||
//
|
||||
|
||||
#if defined(__ARM_NEON)
|
||||
#if !defined(__aarch64__)
|
||||
|
||||
// 64-bit compatibility
|
||||
|
||||
inline static float vaddvq_f32(float32x4_t v) {
|
||||
return vgetq_lane_f32(v, 0) + vgetq_lane_f32(v, 1) + vgetq_lane_f32(v, 2) + vgetq_lane_f32(v, 3);
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// we define a common set of C macros which map to specific intrinsics based on the current architecture
|
||||
// we then implement the fundamental computation operations below using only these macros
|
||||
// adding support for new architectures requires to define the corresponding SIMD macros
|
||||
|
|
97
llama.cpp
97
llama.cpp
|
@ -3297,6 +3297,10 @@ struct llama_model_loader {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
const llama_tensor_weight * get_weight(int i) const {
|
||||
return get_weight(get_tensor_name(i));
|
||||
}
|
||||
|
||||
const llama_tensor_weight & require_weight(const char * name) const {
|
||||
const llama_tensor_weight * weight = get_weight(name);
|
||||
if (!weight) {
|
||||
|
@ -14596,26 +14600,74 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|||
std::vector<no_init<uint8_t>> work;
|
||||
std::vector<no_init<float>> f32_conv_buf;
|
||||
|
||||
uint16_t n_split = 1;
|
||||
// Assume split index is continuous
|
||||
if (params->keep_split) {
|
||||
for (int i = 0; i < ml.n_tensors; ++i) {
|
||||
n_split = std::max(uint16_t(ml.get_weight(i)->idx+1), n_split);
|
||||
}
|
||||
}
|
||||
std::vector<gguf_context*> ctx_outs(n_split, NULL);
|
||||
ctx_outs[0] = ctx_out;
|
||||
|
||||
// populate the original tensors so we get an initial meta data
|
||||
for (int i = 0; i < ml.n_tensors; ++i) {
|
||||
const struct ggml_tensor * meta = ml.get_tensor_meta(i);
|
||||
gguf_add_tensor(ctx_out, meta);
|
||||
auto weight = ml.get_weight(i);
|
||||
uint16_t i_split = params->keep_split ? weight->idx : 0;
|
||||
struct ggml_tensor * tensor = weight->tensor;
|
||||
if (ctx_outs[i_split] == NULL) {
|
||||
ctx_outs[i_split] = gguf_init_empty();
|
||||
}
|
||||
gguf_add_tensor(ctx_outs[i_split], tensor);
|
||||
}
|
||||
|
||||
std::ofstream fout(fname_out, std::ios::binary);
|
||||
fout.exceptions(std::ofstream::failbit); // fail fast on write errors
|
||||
// Set split info if needed
|
||||
if (n_split > 1) {
|
||||
for (size_t i = 0; i < ctx_outs.size(); ++i) {
|
||||
gguf_set_val_u16(ctx_outs[i], ml.llm_kv(LLM_KV_SPLIT_NO).c_str(), i);
|
||||
gguf_set_val_u16(ctx_outs[i], ml.llm_kv(LLM_KV_SPLIT_COUNT).c_str(), n_split);
|
||||
gguf_set_val_i32(ctx_outs[i], ml.llm_kv(LLM_KV_SPLIT_TENSORS_COUNT).c_str(), ml.n_tensors);
|
||||
}
|
||||
}
|
||||
|
||||
const size_t meta_size = gguf_get_meta_size(ctx_out);
|
||||
int cur_split = -1;
|
||||
std::ofstream fout;
|
||||
auto close_ofstream = [&]() {
|
||||
// Write metadata and close file handler
|
||||
if (fout.is_open()) {
|
||||
fout.seekp(0);
|
||||
std::vector<uint8_t> data(gguf_get_meta_size(ctx_outs[cur_split]));
|
||||
gguf_get_meta_data(ctx_outs[cur_split], data.data());
|
||||
fout.write((const char *) data.data(), data.size());
|
||||
fout.close();
|
||||
}
|
||||
};
|
||||
auto new_ofstream = [&](int index) {
|
||||
cur_split = index;
|
||||
GGML_ASSERT(ctx_outs[cur_split] && "Find uninitialized gguf_context");
|
||||
std::string fname = fname_out;
|
||||
if (params->keep_split) {
|
||||
char split_path[PATH_MAX] = {0};
|
||||
llama_split_path(split_path, sizeof(split_path), fname_out.c_str(), cur_split, n_split);
|
||||
fname = std::string(split_path);
|
||||
}
|
||||
|
||||
LLAMA_LOG_INFO("%s: meta size = %zu bytes\n", __func__, meta_size);
|
||||
|
||||
// placeholder for the meta data
|
||||
::zeros(fout, meta_size);
|
||||
fout = std::ofstream(fname, std::ios::binary);
|
||||
fout.exceptions(std::ofstream::failbit); // fail fast on write errors
|
||||
const size_t meta_size = gguf_get_meta_size(ctx_outs[cur_split]);
|
||||
// placeholder for the meta data
|
||||
::zeros(fout, meta_size);
|
||||
};
|
||||
|
||||
const auto tn = LLM_TN(model.arch);
|
||||
|
||||
new_ofstream(0);
|
||||
for (int i = 0; i < ml.n_tensors; ++i) {
|
||||
struct ggml_tensor * tensor = ml.get_tensor_meta(i);
|
||||
auto weight = ml.get_weight(i);
|
||||
struct ggml_tensor * tensor = weight->tensor;
|
||||
if (weight->idx != cur_split && params->keep_split) {
|
||||
close_ofstream();
|
||||
new_ofstream(weight->idx);
|
||||
}
|
||||
|
||||
const std::string name = ggml_get_name(tensor);
|
||||
|
||||
|
@ -14770,26 +14822,18 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|||
total_size_new += new_size;
|
||||
|
||||
// update the gguf meta data as we go
|
||||
gguf_set_tensor_type(ctx_out, name.c_str(), new_type);
|
||||
gguf_set_tensor_data(ctx_out, name.c_str(), new_data, new_size);
|
||||
gguf_set_tensor_type(ctx_outs[cur_split], name.c_str(), new_type);
|
||||
gguf_set_tensor_data(ctx_outs[cur_split], name.c_str(), new_data, new_size);
|
||||
|
||||
// write tensor data + padding
|
||||
fout.write((const char *) new_data, new_size);
|
||||
zeros(fout, GGML_PAD(new_size, align) - new_size);
|
||||
}
|
||||
|
||||
// go back to beginning of file and write the updated meta data
|
||||
{
|
||||
fout.seekp(0);
|
||||
std::vector<uint8_t> data(gguf_get_meta_size(ctx_out));
|
||||
gguf_get_meta_data(ctx_out, data.data());
|
||||
fout.write((const char *) data.data(), data.size());
|
||||
close_ofstream();
|
||||
for (auto & c:ctx_outs) {
|
||||
gguf_free(c);
|
||||
}
|
||||
|
||||
fout.close();
|
||||
|
||||
gguf_free(ctx_out);
|
||||
|
||||
LLAMA_LOG_INFO("%s: model size = %8.2f MB\n", __func__, total_size_org/1024.0/1024.0);
|
||||
LLAMA_LOG_INFO("%s: quant size = %8.2f MB\n", __func__, total_size_new/1024.0/1024.0);
|
||||
|
||||
|
@ -15146,6 +15190,7 @@ struct llama_model_quantize_params llama_model_quantize_default_params() {
|
|||
/*.quantize_output_tensor =*/ true,
|
||||
/*.only_copy =*/ false,
|
||||
/*.pure =*/ false,
|
||||
/*.keep_split =*/ false,
|
||||
/*.imatrix =*/ nullptr,
|
||||
/*.kv_overrides =*/ nullptr,
|
||||
};
|
||||
|
@ -15695,6 +15740,10 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
|
|||
return LLAMA_ROPE_TYPE_NONE;
|
||||
}
|
||||
|
||||
enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx) {
|
||||
return ctx->cparams.pooling_type;
|
||||
}
|
||||
|
||||
int32_t llama_n_vocab(const struct llama_model * model) {
|
||||
return model->hparams.n_vocab;
|
||||
}
|
||||
|
|
7
llama.h
7
llama.h
|
@ -289,6 +289,7 @@ extern "C" {
|
|||
bool quantize_output_tensor; // quantize output.weight
|
||||
bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
|
||||
bool pure; // quantize all tensors to the default type
|
||||
bool keep_split; // quantize to the same number of shards
|
||||
void * imatrix; // pointer to importance matrix data
|
||||
void * kv_overrides; // pointer to vector containing overrides
|
||||
} llama_model_quantize_params;
|
||||
|
@ -391,8 +392,10 @@ extern "C" {
|
|||
LLAMA_API uint32_t llama_n_ubatch (const struct llama_context * ctx);
|
||||
LLAMA_API uint32_t llama_n_seq_max (const struct llama_context * ctx);
|
||||
|
||||
LLAMA_API enum llama_vocab_type llama_vocab_type(const struct llama_model * model);
|
||||
LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model * model);
|
||||
LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx);
|
||||
|
||||
LLAMA_API enum llama_vocab_type llama_vocab_type (const struct llama_model * model);
|
||||
LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model * model);
|
||||
|
||||
LLAMA_API int32_t llama_n_vocab (const struct llama_model * model);
|
||||
LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model);
|
||||
|
|
BIN
media/matmul.png
Normal file
BIN
media/matmul.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 260 KiB |
1238
media/matmul.svg
Normal file
1238
media/matmul.svg
Normal file
File diff suppressed because it is too large
Load diff
After Width: | Height: | Size: 51 KiB |
Loading…
Add table
Add a link
Reference in a new issue