sync : ggml (backend v2) (#3912)

* sync : ggml (backend v2) (wip)

* sync : migrate examples and llama.cpp to dynamic graphs (wip)

* sync : update tests + fix max op params to 64

ggml-ci

* sync : ggml-cuda

ggml-ci

* llama : fix save/load state context size

ggml-ci

* sync : try to fix build on tvOS

* sync : pass custom graph sizes in training examples

* sync : update graph copies to new ggml API

* sync : update sync-ggml.sh with new files

* scripts : fix header in sync script

* train : fix context size calculations

* llama : increase inference graph size up to 4096 nodes

* train : allocate grads for backward graphs

* train : allocate grads for gb_tmp
This commit is contained in:
Georgi Gerganov 2023-11-13 14:16:23 +02:00 committed by GitHub
parent bb50a792ec
commit 4760e7cc0b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
22 changed files with 1994 additions and 864 deletions

View file

@ -109,10 +109,11 @@ int main(void) {
struct ggml_tensor * d = ggml_sub(ctx, c, ab);
struct ggml_tensor * e = ggml_sum(ctx, ggml_sqr(ctx, d));
struct ggml_cgraph ge = ggml_build_forward(e);
ggml_graph_reset(&ge);
struct ggml_cgraph * ge = ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, true);
ggml_build_forward_expand(ge, e);
ggml_graph_reset(ge);
ggml_graph_compute_with_ctx(ctx, &ge, /*n_threads*/ 1);
ggml_graph_compute_with_ctx(ctx, ge, /*n_threads*/ 1);
const float fe = ggml_get_f32_1d(e, 0);
printf("%s: e = %.4f\n", __func__, fe);
@ -121,9 +122,9 @@ int main(void) {
ggml_opt(ctx, opt_params, e);
ggml_graph_reset(&ge);
ggml_graph_reset(ge);
ggml_graph_compute_with_ctx(ctx, &ge, /*n_threads*/ 1);
ggml_graph_compute_with_ctx(ctx, ge, /*n_threads*/ 1);
const float fe_opt = ggml_get_f32_1d(e, 0);
printf("%s: original e = %.4f\n", __func__, fe);