llama : move random seed generation to the samplers (#9398)
* llama_sampler_penalties : clamp penalty_last_n to zero
This commit is contained in:
parent
00ba2ff781
commit
49006c67b4
10 changed files with 92 additions and 34 deletions
|
@ -90,8 +90,6 @@ int main(int argc, char ** argv) {
|
|||
|
||||
print_build_info();
|
||||
|
||||
LOG_TEE("%s: seed = %u\n", __func__, params.sparams.seed);
|
||||
|
||||
llama_backend_init();
|
||||
llama_numa_init(params.numa);
|
||||
|
||||
|
|
|
@ -159,8 +159,6 @@ int main(int argc, char ** argv) {
|
|||
|
||||
print_build_info();
|
||||
|
||||
LOG_TEE("%s: seed = %u\n", __func__, params.sparams.seed);
|
||||
|
||||
LOG("%s: llama backend init\n", __func__);
|
||||
llama_backend_init();
|
||||
llama_numa_init(params.numa);
|
||||
|
@ -301,6 +299,9 @@ int main(int argc, char ** argv) {
|
|||
LOG_TEE("Input suffix: '%s'\n", params.input_suffix.c_str());
|
||||
}
|
||||
}
|
||||
smpl = gpt_sampler_init(model, sparams);
|
||||
|
||||
LOG_TEE("sampling seed: %u\n", gpt_sampler_get_seed(smpl));
|
||||
LOG_TEE("sampling: \n%s\n", sparams.print().c_str());
|
||||
LOG_TEE("generate: n_ctx = %d, n_batch = %d, n_predict = %d, n_keep = %d\n", n_ctx, params.n_batch, params.n_predict, params.n_keep);
|
||||
LOG_TEE("\n\n");
|
||||
|
@ -340,8 +341,6 @@ int main(int argc, char ** argv) {
|
|||
|
||||
std::vector<llama_token> embd;
|
||||
|
||||
smpl = gpt_sampler_init(model, sparams);
|
||||
|
||||
while (n_remain != 0 || params.interactive) {
|
||||
// predict
|
||||
if (!embd.empty()) {
|
||||
|
|
|
@ -191,8 +191,6 @@ int main(int argc, char ** argv) {
|
|||
|
||||
print_build_info();
|
||||
|
||||
LOG_TEE("%s: seed = %u\n", __func__, params.sparams.seed);
|
||||
|
||||
LOG("%s: llama backend init\n", __func__);
|
||||
llama_backend_init();
|
||||
llama_numa_init(params.numa);
|
||||
|
@ -470,8 +468,10 @@ int main(int argc, char ** argv) {
|
|||
exit(1);
|
||||
}
|
||||
|
||||
LOG_TEE("sampling seed: %u\n", gpt_sampler_get_seed(smpl));
|
||||
LOG_TEE("sampling params: \n%s\n", sparams.print().c_str());
|
||||
LOG_TEE(" sampler constr: \n%s\n", gpt_sampler_print(smpl).c_str());
|
||||
LOG_TEE("sampler constr: \n%s\n", gpt_sampler_print(smpl).c_str());
|
||||
|
||||
LOG_TEE("generate: n_ctx = %d, n_batch = %d, n_predict = %d, n_keep = %d\n", n_ctx, params.n_batch, params.n_predict, params.n_keep);
|
||||
|
||||
// group-attention state
|
||||
|
|
|
@ -2007,8 +2007,6 @@ int main(int argc, char ** argv) {
|
|||
|
||||
print_build_info();
|
||||
|
||||
LOG_TEE("%s: seed = %u\n", __func__, params.sparams.seed);
|
||||
|
||||
llama_backend_init();
|
||||
llama_numa_init(params.numa);
|
||||
|
||||
|
|
|
@ -1266,6 +1266,7 @@ struct server_context {
|
|||
{"n_predict", slot.n_predict}, // Server configured n_predict
|
||||
{"model", params.model_alias},
|
||||
{"seed", slot.sparams.seed},
|
||||
{"seed_cur", slot.smpl ? gpt_sampler_get_seed(slot.smpl) : 0},
|
||||
{"temperature", slot.sparams.temp},
|
||||
{"dynatemp_range", slot.sparams.dynatemp_range},
|
||||
{"dynatemp_exponent", slot.sparams.dynatemp_exponent},
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue