fix condition

This commit is contained in:
Xuan Son Nguyen 2024-12-28 16:16:57 +01:00
parent b9b2b6371a
commit 076346db8a
3 changed files with 24 additions and 10 deletions

View file

@ -2558,12 +2558,22 @@ struct server_context {
// start populating the batch for this iteration // start populating the batch for this iteration
common_batch_clear(batch); common_batch_clear(batch);
// track if given slot can be batched with slots already in the batch
server_slot * slot_batched = nullptr;
// frist, add sampled tokens from any ongoing sequences // frist, add sampled tokens from any ongoing sequences
for (auto & slot : slots) { for (auto & slot : slots) {
if (slot.state != SLOT_STATE_GENERATING) { if (slot.state != SLOT_STATE_GENERATING) {
continue; continue;
} }
// check if we can batch this slot with the previous one
if (!slot_batched) {
slot_batched = &slot;
} else if (slot_batched && !slot_batched->can_batch_with(slot)) {
continue;
}
slot.i_batch = batch.n_tokens; slot.i_batch = batch.n_tokens;
common_batch_add(batch, slot.sampled, slot.n_past, { slot.id }, true); common_batch_add(batch, slot.sampled, slot.n_past, { slot.id }, true);
@ -2582,17 +2592,16 @@ struct server_context {
int32_t n_batch = llama_n_batch(ctx); int32_t n_batch = llama_n_batch(ctx);
int32_t n_ubatch = llama_n_ubatch(ctx); int32_t n_ubatch = llama_n_ubatch(ctx);
// track if given slot can be batched with slots already in the batch
server_slot * slot_batched = nullptr;
// next, batch any pending prompts without exceeding n_batch // next, batch any pending prompts without exceeding n_batch
if (params_base.cont_batching || batch.n_tokens == 0) { if (params_base.cont_batching || batch.n_tokens == 0) {
for (auto & slot : slots) { for (auto & slot : slots) {
// check if we can batch this slot with the previous one // check if we can batch this slot with the previous one
if (!slot_batched) { if (slot.is_processing()) {
slot_batched = &slot; if (!slot_batched) {
} else if (slot_batched && !slot_batched->can_batch_with(slot)) { slot_batched = &slot;
continue; } else if (slot_batched && !slot_batched->can_batch_with(slot)) {
continue;
}
} }
// this slot still has a prompt to be processed // this slot still has a prompt to be processed

View file

@ -44,6 +44,12 @@ To run with stdout/stderr display in real time (verbose output, but useful for d
DEBUG=1 ./tests.sh -s -v -x DEBUG=1 ./tests.sh -s -v -x
``` ```
To run single test unit:
```shell
./tests.sh unit/test_{name of test case here}.py -v -x
```
Hint: You can compile and run test in single command, useful for local developement: Hint: You can compile and run test in single command, useful for local developement:
```shell ```shell

View file

@ -52,12 +52,11 @@ def test_lora_per_request():
lora_config = [ lora_config = [
( [{"id": 0, "scale": 0.0}], "(bright|day|many|happy)+" ), ( [{"id": 0, "scale": 0.0}], "(bright|day|many|happy)+" ),
( [{"id": 0, "scale": 0.0}], "(bright|day|many|happy)+" ), ( [{"id": 0, "scale": 0.0}], "(bright|day|many|happy)+" ),
( [{"id": 0, "scale": 0.0}], "(bright|day|many|happy)+" ), ( [{"id": 0, "scale": 0.3}], "(special|thing|gifted)+" ),
( [{"id": 0, "scale": 1.0}], "(eye|love|glass|sun)+" ), ( [{"id": 0, "scale": 0.7}], "(far|from|home|away)+" ),
( [{"id": 0, "scale": 1.0}], "(eye|love|glass|sun)+" ), ( [{"id": 0, "scale": 1.0}], "(eye|love|glass|sun)+" ),
( [{"id": 0, "scale": 1.0}], "(eye|love|glass|sun)+" ), ( [{"id": 0, "scale": 1.0}], "(eye|love|glass|sun)+" ),
] ]
# FIXME: tesing with scale between 0.0 and 1.0 (i.e. 0.2, 0.5, 0.7) produces unreliable results
tasks = [( tasks = [(
server.make_request, server.make_request,