fix condition
This commit is contained in:
parent
b9b2b6371a
commit
076346db8a
3 changed files with 24 additions and 10 deletions
|
@ -2558,12 +2558,22 @@ struct server_context {
|
||||||
// start populating the batch for this iteration
|
// start populating the batch for this iteration
|
||||||
common_batch_clear(batch);
|
common_batch_clear(batch);
|
||||||
|
|
||||||
|
// track if given slot can be batched with slots already in the batch
|
||||||
|
server_slot * slot_batched = nullptr;
|
||||||
|
|
||||||
// frist, add sampled tokens from any ongoing sequences
|
// frist, add sampled tokens from any ongoing sequences
|
||||||
for (auto & slot : slots) {
|
for (auto & slot : slots) {
|
||||||
if (slot.state != SLOT_STATE_GENERATING) {
|
if (slot.state != SLOT_STATE_GENERATING) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// check if we can batch this slot with the previous one
|
||||||
|
if (!slot_batched) {
|
||||||
|
slot_batched = &slot;
|
||||||
|
} else if (slot_batched && !slot_batched->can_batch_with(slot)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
slot.i_batch = batch.n_tokens;
|
slot.i_batch = batch.n_tokens;
|
||||||
|
|
||||||
common_batch_add(batch, slot.sampled, slot.n_past, { slot.id }, true);
|
common_batch_add(batch, slot.sampled, slot.n_past, { slot.id }, true);
|
||||||
|
@ -2582,17 +2592,16 @@ struct server_context {
|
||||||
int32_t n_batch = llama_n_batch(ctx);
|
int32_t n_batch = llama_n_batch(ctx);
|
||||||
int32_t n_ubatch = llama_n_ubatch(ctx);
|
int32_t n_ubatch = llama_n_ubatch(ctx);
|
||||||
|
|
||||||
// track if given slot can be batched with slots already in the batch
|
|
||||||
server_slot * slot_batched = nullptr;
|
|
||||||
|
|
||||||
// next, batch any pending prompts without exceeding n_batch
|
// next, batch any pending prompts without exceeding n_batch
|
||||||
if (params_base.cont_batching || batch.n_tokens == 0) {
|
if (params_base.cont_batching || batch.n_tokens == 0) {
|
||||||
for (auto & slot : slots) {
|
for (auto & slot : slots) {
|
||||||
// check if we can batch this slot with the previous one
|
// check if we can batch this slot with the previous one
|
||||||
if (!slot_batched) {
|
if (slot.is_processing()) {
|
||||||
slot_batched = &slot;
|
if (!slot_batched) {
|
||||||
} else if (slot_batched && !slot_batched->can_batch_with(slot)) {
|
slot_batched = &slot;
|
||||||
continue;
|
} else if (slot_batched && !slot_batched->can_batch_with(slot)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// this slot still has a prompt to be processed
|
// this slot still has a prompt to be processed
|
||||||
|
|
|
@ -44,6 +44,12 @@ To run with stdout/stderr display in real time (verbose output, but useful for d
|
||||||
DEBUG=1 ./tests.sh -s -v -x
|
DEBUG=1 ./tests.sh -s -v -x
|
||||||
```
|
```
|
||||||
|
|
||||||
|
To run single test unit:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
./tests.sh unit/test_{name of test case here}.py -v -x
|
||||||
|
```
|
||||||
|
|
||||||
Hint: You can compile and run test in single command, useful for local developement:
|
Hint: You can compile and run test in single command, useful for local developement:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
|
|
|
@ -52,12 +52,11 @@ def test_lora_per_request():
|
||||||
lora_config = [
|
lora_config = [
|
||||||
( [{"id": 0, "scale": 0.0}], "(bright|day|many|happy)+" ),
|
( [{"id": 0, "scale": 0.0}], "(bright|day|many|happy)+" ),
|
||||||
( [{"id": 0, "scale": 0.0}], "(bright|day|many|happy)+" ),
|
( [{"id": 0, "scale": 0.0}], "(bright|day|many|happy)+" ),
|
||||||
( [{"id": 0, "scale": 0.0}], "(bright|day|many|happy)+" ),
|
( [{"id": 0, "scale": 0.3}], "(special|thing|gifted)+" ),
|
||||||
( [{"id": 0, "scale": 1.0}], "(eye|love|glass|sun)+" ),
|
( [{"id": 0, "scale": 0.7}], "(far|from|home|away)+" ),
|
||||||
( [{"id": 0, "scale": 1.0}], "(eye|love|glass|sun)+" ),
|
( [{"id": 0, "scale": 1.0}], "(eye|love|glass|sun)+" ),
|
||||||
( [{"id": 0, "scale": 1.0}], "(eye|love|glass|sun)+" ),
|
( [{"id": 0, "scale": 1.0}], "(eye|love|glass|sun)+" ),
|
||||||
]
|
]
|
||||||
# FIXME: tesing with scale between 0.0 and 1.0 (i.e. 0.2, 0.5, 0.7) produces unreliable results
|
|
||||||
|
|
||||||
tasks = [(
|
tasks = [(
|
||||||
server.make_request,
|
server.make_request,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue