llama : refactor sampling v2 (#9294)

- Add `struct llama_sampler` and `struct llama_sampler_i` - Add `llama_sampler_` API - Add `llama_sampler_chain_` API for chaining multiple samplers - Remove `LLAMA_API_INTERNAL` - Add `llama_perf_` API and remove old `llama_print_timings` and `llama_reset_timings`
2024-09-07 15:16:19 +03:00 · 2024-09-07 15:16:19 +03:00 · df270ef745
commit df270ef745
parent 947538acb8
48 changed files with 3497 additions and 2914 deletions
--- a/examples/quantize-stats/quantize-stats.cpp
+++ b/examples/quantize-stats/quantize-stats.cpp
@ -1,7 +1,7 @@
-#define LLAMA_API_INTERNAL
 #include "common.h"
 #include "ggml.h"
 #include "llama.h"
+#include "llama-impl.h"

 #include <algorithm>
 #include <cassert>
@ -319,8 +319,7 @@ int main(int argc, char ** argv) {
        }

        auto cparams = llama_context_default_params();
-        cparams.n_ctx      = 256;
-        cparams.seed       = 1;
+        cparams.n_ctx = 256;

        ctx = llama_new_context_with_model(model, cparams);