From ad436e9284d15dc648bc2e2bd52634b86804043d Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 4 Sep 2024 14:07:26 +0300 Subject: [PATCH] examples : fix build ggml-ci --- common/sampling.cpp | 10 +++++----- examples/batched.swift/Sources/main.swift | 20 +++++++++---------- examples/batched/batched.cpp | 4 ++-- .../llama/src/main/cpp/llama-android.cpp | 6 +++--- .../llama.cpp.swift/LibLlama.swift | 8 ++++---- 5 files changed, 24 insertions(+), 24 deletions(-) diff --git a/common/sampling.cpp b/common/sampling.cpp index 4e8843224..b4063fe31 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -52,12 +52,12 @@ struct gpt_sampler * gpt_sampler_init(const struct llama_model * model, const st lparams.mirostat_eta = params.mirostat_eta; auto * result = new gpt_sampler { - .params = params, - .bias = llama_constraint_init_logit_bias( + /* .params = */ params, + /* .bias = */ llama_constraint_init_logit_bias( model, params.logit_bias.size(), params.logit_bias.data()), - .pnlt = llama_constraint_init_penalties( + /* .pnlt = */ llama_constraint_init_penalties( model, params.penalty_last_n, params.penalty_repeat, @@ -65,8 +65,8 @@ struct gpt_sampler * gpt_sampler_init(const struct llama_model * model, const st params.penalty_present, params.penalize_nl, params.ignore_eos), - .grmr = llama_constraint_init_grammar(model, params.grammar.c_str(), "root"), - .smpl = llama_sampler_init(model, lparams) + /* .grmr = */ llama_constraint_init_grammar(model, params.grammar.c_str(), "root"), + /* .smpl = */ llama_sampler_init(model, lparams) }; for (const auto & cnstr : params.constraints) { diff --git a/examples/batched.swift/Sources/main.swift b/examples/batched.swift/Sources/main.swift index 81763217a..4d73ccd24 100644 --- a/examples/batched.swift/Sources/main.swift +++ b/examples/batched.swift/Sources/main.swift @@ -50,20 +50,24 @@ defer { llama_free(context) } -var sparams = llama_sampling_params() +var sparams = llama_sampler_params() sparams.top_k = 40 sparams.top_p = 0.9 sparams.temp = 0.4 -let smpl = llama_sampling_init(model, sparams) +let smpl = llama_sampler_init(model, sparams) guard smpl != nil else { print("Failed to initialize sampling") exit(1) } defer { - llama_sampling_free(smpl) + llama_sampler_free(smpl) } +llama_sampler_add_constraint(smpl, llama_constraint_init_top_k(40, 1)); +llama_sampler_add_constraint(smpl, llama_constraint_init_top_p(0.9, 1)); +llama_sampler_add_constraint(smpl, llama_constraint_init_temp (0.4)); + let n_ctx = llama_n_ctx(context) print("\nn_len = \(n_len), n_ctx = \(n_ctx), n_batch = \(context_params.n_batch), n_parallel = \(n_parallel), n_kv_req = \(n_kv_req)\n") @@ -138,15 +142,11 @@ while n_cur <= n_len { var logits = llama_get_logits_ith(context, i_batch[i]) - llama_sampling_set_logits(smpl, logits) + llama_sampler_set_logits(smpl, logits) - llama_sampling_top_k(smpl, nil) - llama_sampling_top_p(smpl, nil) - llama_sampling_temp (smpl, nil) + let new_token_id = llama_sampler_sample_dist(smpl, nil) - let new_token_id = llama_sampling_sample_dist(smpl, nil) - - // const llama_token new_token_id = llama_sampling_sample_greedy(smpl, nil); + // const llama_token new_token_id = llama_sampler_sample_greedy(smpl, nil, false); // is it an end of stream? -> mark the stream as finished if llama_token_is_eog(model, new_token_id) || n_cur == n_len { diff --git a/examples/batched/batched.cpp b/examples/batched/batched.cpp index 3052b96ae..0f35f6cd5 100644 --- a/examples/batched/batched.cpp +++ b/examples/batched/batched.cpp @@ -71,7 +71,7 @@ int main(int argc, char ** argv) { llama_sampler * smpl = llama_sampler_init(model, sparams); llama_sampler_add_constraint(smpl, llama_constraint_init_top_k(params.sparams.top_k, params.sparams.min_keep)); - llama_sampler_add_constraint(smpl, llama_constraint_init_top_p(params.sparams.top_p, params.sparams.min_p)); + llama_sampler_add_constraint(smpl, llama_constraint_init_top_p(params.sparams.top_p, params.sparams.min_keep)); llama_sampler_add_constraint(smpl, llama_constraint_init_temp (params.sparams.temp)); if (ctx == NULL) { @@ -179,7 +179,7 @@ int main(int argc, char ** argv) { const llama_token new_token_id = llama_sampler_sample_dist(smpl, nullptr); - //const llama_token new_token_id = llama_sampler_sample_greedy(smpl, nullptr); + //const llama_token new_token_id = llama_sampler_sample_greedy(smpl, nullptr, false); // is it an end of generation? -> mark the stream as finished if (llama_token_is_eog(model, new_token_id) || n_cur == n_predict) { diff --git a/examples/llama.android/llama/src/main/cpp/llama-android.cpp b/examples/llama.android/llama/src/main/cpp/llama-android.cpp index c33f55f72..666e89764 100644 --- a/examples/llama.android/llama/src/main/cpp/llama-android.cpp +++ b/examples/llama.android/llama/src/main/cpp/llama-android.cpp @@ -386,7 +386,7 @@ Java_android_llama_cpp_LLamaAndroid_completion_1loop( jobject intvar_ncur ) { const auto context = reinterpret_cast(context_pointer); - const auto sampling = reinterpret_cast(sampling_pointer); + const auto sampling = reinterpret_cast(sampling_pointer); const auto batch = reinterpret_cast(batch_pointer); const auto model = llama_get_model(context); @@ -396,10 +396,10 @@ Java_android_llama_cpp_LLamaAndroid_completion_1loop( const auto * logits = llama_get_logits_ith(context, batch->n_tokens - 1); - llama_sampling_set_logits(sampling, logits); + llama_sampler_set_logits(sampling, logits); // sample the most likely token - const auto new_token_id = llama_sampling_sample_greedy(sampling, nullptr); + const auto new_token_id = llama_sampler_sample_greedy(sampling, nullptr, false); const auto n_cur = env->CallIntMethod(intvar_ncur, la_int_var_value); if (llama_token_is_eog(model, new_token_id) || n_cur == n_len) { diff --git a/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift b/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift index 515170f67..930336b27 100644 --- a/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift +++ b/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift @@ -43,11 +43,11 @@ actor LlamaContext { self.tokens_list = [] self.batch = llama_batch_init(512, 0, 1) self.temporary_invalid_cchars = [] - self.sampling = llama_sampling_init(context, llama_sampling_default_params()) + self.sampling = llama_sampler_init(context, llama_sampler_default_params()) } deinit { - llama_sampling_free(sampling) + llama_sampler_free(sampling) llama_batch_free(batch) llama_free(context) llama_free_model(model) @@ -149,9 +149,9 @@ actor LlamaContext { let n_vocab = llama_n_vocab(model) let logits = llama_get_logits_ith(context, batch.n_tokens - 1) - llama_sampling_set_logits(sampling, logits); + llama_sampler_set_logits(sampling, logits); - new_token_id = llama_sampling_sample_greedy(sampling, nil) + new_token_id = llama_sampler_sample_greedy(sampling, nil, false) if llama_token_is_eog(model, new_token_id) || n_cur == n_len { print("\n")