ggml : change ggml_graph_compute() API to not require context (#1999)

* ggml_graph_compute: deprecate using ggml_context, try resolve issue #287

* rewrite: no longer consider backward compitability; plan and make_plan

* minor: rename ctx as plan; const

* remove ggml_graph_compute from tests/test-grad0.c, but current change breaks backward

* add static ggml_graph_compute_sugar()

* minor: update comments

* reusable buffers

* ggml : more consistent naming + metal fixes

* ggml : fix docs

* tests : disable grad / opt + minor naming changes

* ggml : add ggml_graph_compute_with_ctx()

- backwards compatible API
- deduplicates a lot of copy-paste

* ci : enable test-grad0

* examples : factor out plan allocation into a helper function

* llama : factor out plan stuff into a helper function

* ci : fix env

* llama : fix duplicate symbols + refactor example benchmark

* ggml : remove obsolete assert + refactor n_tasks section

* ggml : fix indentation in switch

* llama : avoid unnecessary bool

* ggml : remove comments from source file and match order in header

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
Qingyou Meng 2023-07-08 00:24:01 +08:00 committed by GitHub
parent 7242140283
commit 1d656d6360
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 571 additions and 449 deletions

View file

@ -7,6 +7,7 @@
#define MAX_NARGS 2
#pragma GCC diagnostic ignored "-Wdouble-promotion"
//
// logging
@ -33,7 +34,7 @@
#define GGML_PRINT(...) printf(__VA_ARGS__)
float frand() {
float frand(void) {
return (float)rand()/(float)RAND_MAX;
}
@ -114,7 +115,7 @@ void set_element(struct ggml_tensor * t, int idx, float value) {
((float *)t->data)[idx] = value;
}
int main(int argc, const char ** argv) {
int main(void) {
struct ggml_init_params params = {
.mem_size = 1024*1024*1024,
.mem_buffer = NULL,
@ -137,10 +138,11 @@ int main(int argc, const char ** argv) {
struct ggml_tensor * d = ggml_sub(ctx, c, ab);
struct ggml_tensor * e = ggml_sum(ctx, ggml_sqr(ctx, d));
struct ggml_cgraph ge = ggml_build_forward(e);
ggml_graph_reset (&ge);
ggml_graph_compute(ctx, &ge);
ggml_graph_reset(&ge);
ggml_graph_compute_with_ctx(ctx, &ge, /*n_threads*/ 1);
const float fe = ggml_get_f32_1d(e, 0);
printf("%s: e = %.4f\n", __func__, fe);
@ -148,8 +150,10 @@ int main(int argc, const char ** argv) {
ggml_opt(ctx, opt_params, e);
ggml_graph_reset (&ge);
ggml_graph_compute(ctx, &ge);
ggml_graph_reset(&ge);
ggml_graph_compute_with_ctx(ctx, &ge, /*n_threads*/ 1);
const float fe_opt = ggml_get_f32_1d(e, 0);
printf("%s: original e = %.4f\n", __func__, fe);
printf("%s: optimized e = %.4f\n", __func__, fe_opt);