llama : move random seed generation to the samplers (#9398)

* llama_sampler_penalties : clamp penalty_last_n to zero
This commit is contained in:
slaren 2024-09-10 18:04:25 +02:00 committed by GitHub
parent 00ba2ff781
commit 49006c67b4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 92 additions and 34 deletions

View file

@ -90,8 +90,6 @@ int main(int argc, char ** argv) {
print_build_info();
LOG_TEE("%s: seed = %u\n", __func__, params.sparams.seed);
llama_backend_init();
llama_numa_init(params.numa);

View file

@ -159,8 +159,6 @@ int main(int argc, char ** argv) {
print_build_info();
LOG_TEE("%s: seed = %u\n", __func__, params.sparams.seed);
LOG("%s: llama backend init\n", __func__);
llama_backend_init();
llama_numa_init(params.numa);
@ -301,6 +299,9 @@ int main(int argc, char ** argv) {
LOG_TEE("Input suffix: '%s'\n", params.input_suffix.c_str());
}
}
smpl = gpt_sampler_init(model, sparams);
LOG_TEE("sampling seed: %u\n", gpt_sampler_get_seed(smpl));
LOG_TEE("sampling: \n%s\n", sparams.print().c_str());
LOG_TEE("generate: n_ctx = %d, n_batch = %d, n_predict = %d, n_keep = %d\n", n_ctx, params.n_batch, params.n_predict, params.n_keep);
LOG_TEE("\n\n");
@ -340,8 +341,6 @@ int main(int argc, char ** argv) {
std::vector<llama_token> embd;
smpl = gpt_sampler_init(model, sparams);
while (n_remain != 0 || params.interactive) {
// predict
if (!embd.empty()) {

View file

@ -191,8 +191,6 @@ int main(int argc, char ** argv) {
print_build_info();
LOG_TEE("%s: seed = %u\n", __func__, params.sparams.seed);
LOG("%s: llama backend init\n", __func__);
llama_backend_init();
llama_numa_init(params.numa);
@ -470,8 +468,10 @@ int main(int argc, char ** argv) {
exit(1);
}
LOG_TEE("sampling seed: %u\n", gpt_sampler_get_seed(smpl));
LOG_TEE("sampling params: \n%s\n", sparams.print().c_str());
LOG_TEE(" sampler constr: \n%s\n", gpt_sampler_print(smpl).c_str());
LOG_TEE("sampler constr: \n%s\n", gpt_sampler_print(smpl).c_str());
LOG_TEE("generate: n_ctx = %d, n_batch = %d, n_predict = %d, n_keep = %d\n", n_ctx, params.n_batch, params.n_predict, params.n_keep);
// group-attention state

View file

@ -2007,8 +2007,6 @@ int main(int argc, char ** argv) {
print_build_info();
LOG_TEE("%s: seed = %u\n", __func__, params.sparams.seed);
llama_backend_init();
llama_numa_init(params.numa);

View file

@ -1266,6 +1266,7 @@ struct server_context {
{"n_predict", slot.n_predict}, // Server configured n_predict
{"model", params.model_alias},
{"seed", slot.sparams.seed},
{"seed_cur", slot.smpl ? gpt_sampler_get_seed(slot.smpl) : 0},
{"temperature", slot.sparams.temp},
{"dynatemp_range", slot.sparams.dynatemp_range},
{"dynatemp_exponent", slot.sparams.dynatemp_exponent},