Merge branch 'master' into compilade/mamba2
This commit is contained in:
commit
038d958333
132 changed files with 6559 additions and 5146 deletions
|
@ -116,6 +116,11 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
|
|||
} else if (tensor->type == GGML_TYPE_I8 || tensor->type == GGML_TYPE_I16 || tensor->type == GGML_TYPE_I32) {
|
||||
// This is going to create some weird integers though.
|
||||
ggml_backend_tensor_set(tensor, data.data(), 0, ggml_nbytes(tensor));
|
||||
} else if (tensor->type == GGML_TYPE_I64) {
|
||||
// Integers with a size of 8 bytes can be set by mirroring the float data, the specific values are again not really meaningful.
|
||||
const size_t nbytes_half = ggml_nbytes(tensor)/2;
|
||||
ggml_backend_tensor_set(tensor, data.data(), 0*nbytes_half, nbytes_half);
|
||||
ggml_backend_tensor_set(tensor, data.data(), 1*nbytes_half, nbytes_half);
|
||||
} else {
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
@ -128,7 +133,7 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
|
|||
std::vector<uint8_t> buf(ggml_nbytes(t));
|
||||
ggml_backend_tensor_get(t, buf.data(), 0, ggml_nbytes(t));
|
||||
|
||||
ggml_type_traits_t tt = ggml_internal_get_type_traits(t->type);
|
||||
const auto * tt = ggml_get_type_traits(t->type);
|
||||
size_t bs = ggml_blck_size(t->type);
|
||||
std::vector<float> vq(ggml_blck_size(t->type));
|
||||
bool quantized = ggml_is_quantized(t->type);
|
||||
|
@ -145,6 +150,8 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
|
|||
tv.push_back(ggml_bf16_to_fp32(*(ggml_bf16_t*)&buf[i]));
|
||||
} else if (t->type == GGML_TYPE_F32) {
|
||||
tv.push_back(*(float *) &buf[i]);
|
||||
} else if (t->type == GGML_TYPE_I64) {
|
||||
tv.push_back((float)*(int64_t *) &buf[i]);
|
||||
} else if (t->type == GGML_TYPE_I32) {
|
||||
tv.push_back((float)*(int32_t *) &buf[i]);
|
||||
} else if (t->type == GGML_TYPE_I16) {
|
||||
|
@ -152,7 +159,7 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
|
|||
} else if (t->type == GGML_TYPE_I8) {
|
||||
tv.push_back((float)*(int8_t *) &buf[i]);
|
||||
} else if (quantized) {
|
||||
tt.to_float(&buf[i], vq.data(), bs);
|
||||
tt->to_float(&buf[i], vq.data(), bs);
|
||||
tv.insert(tv.end(), vq.begin(), vq.end());
|
||||
} else {
|
||||
GGML_ABORT("fatal error");
|
||||
|
@ -672,14 +679,11 @@ struct test_case {
|
|||
}
|
||||
|
||||
// run
|
||||
ggml_backend_synchronize(backend);
|
||||
|
||||
int64_t total_time_us = 0;
|
||||
int total_runs = 0;
|
||||
do {
|
||||
int64_t start_time = ggml_time_us();
|
||||
ggml_backend_graph_compute(backend, gf);
|
||||
ggml_backend_synchronize(backend);
|
||||
int64_t end_time = ggml_time_us();
|
||||
|
||||
total_time_us += end_time - start_time;
|
||||
|
@ -1119,6 +1123,71 @@ struct test_get_rows : public test_case {
|
|||
}
|
||||
};
|
||||
|
||||
// GGML_OP_ARGMAX
|
||||
struct test_argmax : public test_case {
|
||||
const ggml_type type;
|
||||
const std::array<int64_t, 4> ne;
|
||||
|
||||
std::string vars() override {
|
||||
return VARS_TO_STR2(type, ne);
|
||||
}
|
||||
|
||||
test_argmax(ggml_type type = GGML_TYPE_F32,
|
||||
std::array<int64_t, 4> ne = {10, 100, 1, 1})
|
||||
: type(type), ne(ne) {}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
||||
ggml_set_name(a, "a");
|
||||
|
||||
ggml_tensor * out = ggml_argmax(ctx, a);
|
||||
ggml_set_name(out, "out");
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
double max_nmse_err() override {
|
||||
return 0.0;
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_COUNT_EQUAL
|
||||
struct test_count_equal : public test_case {
|
||||
const ggml_type type;
|
||||
const std::array<int64_t, 4> ne;
|
||||
|
||||
std::string vars() override {
|
||||
return VARS_TO_STR2(type, ne);
|
||||
}
|
||||
|
||||
test_count_equal(ggml_type type = GGML_TYPE_F32,
|
||||
std::array<int64_t, 4> ne = {4, 500, 1, 1})
|
||||
: type(type), ne(ne) {}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
||||
ggml_set_name(a, "a");
|
||||
|
||||
ggml_tensor * a_argmax = ggml_argmax(ctx, a);
|
||||
ggml_set_name(a_argmax, "a_argmax");
|
||||
|
||||
ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne.data());
|
||||
ggml_set_name(b, "b");
|
||||
|
||||
ggml_tensor * b_argmax = ggml_argmax(ctx, a);
|
||||
ggml_set_name(b_argmax, "b_argmax");
|
||||
|
||||
ggml_tensor * out = ggml_count_equal(ctx, a_argmax, b_argmax);
|
||||
ggml_set_name(out, "out");
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
double max_nmse_err() override {
|
||||
return 0.0;
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_REPEAT
|
||||
struct test_repeat : public test_case {
|
||||
const ggml_type type;
|
||||
|
@ -3294,6 +3363,9 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|||
test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {3,1,2,1}, 1, 0, 1));
|
||||
test_cases.emplace_back(new test_conv_transpose_1d({2,1,1,1}, {3,1,1,1}, 1, 0, 1));
|
||||
|
||||
test_cases.emplace_back(new test_argmax());
|
||||
test_cases.emplace_back(new test_count_equal());
|
||||
|
||||
for (int ne3 : {1, 3}) { // CUDA backward pass only supports ne3 == 1
|
||||
test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 5, 4, ne3}, {1, 1, 1, 1}));
|
||||
test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 5, 4, ne3}, {2, 1, 1, 1}));
|
||||
|
@ -3312,8 +3384,8 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|||
test_cases.emplace_back(new test_dup(GGML_TYPE_F16, {10, 10, 5, 1}, {0, 2, 1, 3})); // dup by rows
|
||||
test_cases.emplace_back(new test_dup(GGML_TYPE_F32, {10, 10, 5, 1}, {1, 0, 2, 3}));
|
||||
test_cases.emplace_back(new test_dup(GGML_TYPE_F16, {10, 10, 5, 1}, {1, 0, 2, 3})); // dup dst not-contiguous
|
||||
test_cases.emplace_back(new test_dup(GGML_TYPE_I16, {10, 8, 3, 1}, {0, 2, 1, 3}));
|
||||
test_cases.emplace_back(new test_dup(GGML_TYPE_I16, {10, 8, 3, 1}, {1, 2, 0, 3}));
|
||||
test_cases.emplace_back(new test_dup(GGML_TYPE_I16, {10, 8, 3, 1}, {0, 2, 1, 3}));
|
||||
test_cases.emplace_back(new test_dup(GGML_TYPE_I16, {10, 8, 3, 1}, {1, 2, 0, 3}));
|
||||
|
||||
for (int dim = 1; dim < GGML_MAX_DIMS; ++dim) {
|
||||
test_cases.emplace_back(new test_set(GGML_TYPE_F32, GGML_TYPE_F32, {6, 5, 4, 3}, dim));
|
||||
|
@ -3755,20 +3827,22 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
|
||||
// enumerate backends
|
||||
printf("Testing %zu backends\n\n", ggml_backend_reg_get_count());
|
||||
printf("Testing %zu devices\n\n", ggml_backend_dev_count());
|
||||
|
||||
size_t n_ok = 0;
|
||||
|
||||
for (size_t i = 0; i < ggml_backend_reg_get_count(); i++) {
|
||||
printf("Backend %zu/%zu (%s)\n", i + 1, ggml_backend_reg_get_count(), ggml_backend_reg_get_name(i));
|
||||
for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
|
||||
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
|
||||
|
||||
if (backend_filter != NULL && strcmp(backend_filter, ggml_backend_reg_get_name(i)) != 0) {
|
||||
printf("Backend %zu/%zu: %s\n", i + 1, ggml_backend_dev_count(), ggml_backend_dev_name(dev));
|
||||
|
||||
if (backend_filter != NULL && strcmp(backend_filter, ggml_backend_dev_name(dev)) != 0) {
|
||||
printf(" Skipping\n");
|
||||
n_ok++;
|
||||
continue;
|
||||
}
|
||||
|
||||
ggml_backend_t backend = ggml_backend_reg_init_backend(i, NULL);
|
||||
ggml_backend_t backend = ggml_backend_dev_init(dev, NULL);
|
||||
GGML_ASSERT(backend != NULL);
|
||||
|
||||
if (backend_filter == NULL && ggml_backend_is_cpu(backend) && mode != MODE_GRAD) {
|
||||
|
@ -3778,12 +3852,18 @@ int main(int argc, char ** argv) {
|
|||
continue;
|
||||
}
|
||||
|
||||
if (ggml_backend_is_cpu(backend)) {
|
||||
ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(dev);
|
||||
auto ggml_backend_set_n_threads_fn = (ggml_backend_set_n_threads_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads");
|
||||
if (ggml_backend_set_n_threads_fn) {
|
||||
// TODO: better value for n_threads
|
||||
ggml_backend_cpu_set_n_threads(backend, std::thread::hardware_concurrency() / 2);
|
||||
ggml_backend_set_n_threads_fn(backend, std::thread::hardware_concurrency());
|
||||
}
|
||||
|
||||
printf(" Backend name: %s\n", ggml_backend_name(backend));
|
||||
printf(" Device description: %s\n", ggml_backend_dev_description(dev));
|
||||
size_t free, total; // NOLINT
|
||||
ggml_backend_dev_memory(dev, &free, &total);
|
||||
printf(" Device memory: %zu MB (%zu MB free)\n", total / 1024 / 1024, free / 1024 / 1024);
|
||||
printf("\n");
|
||||
|
||||
bool ok = test_backend(backend, mode, op_name_filter);
|
||||
|
||||
|
@ -3800,9 +3880,9 @@ int main(int argc, char ** argv) {
|
|||
ggml_backend_free(backend);
|
||||
}
|
||||
|
||||
printf("%zu/%zu backends passed\n", n_ok, ggml_backend_reg_get_count());
|
||||
printf("%zu/%zu backends passed\n", n_ok, ggml_backend_dev_count());
|
||||
|
||||
if (n_ok != ggml_backend_reg_get_count()) {
|
||||
if (n_ok != ggml_backend_dev_count()) {
|
||||
printf("\033[1;31mFAIL\033[0m\n");
|
||||
return 1;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue