fix condition

2024-12-28 16:16:57 +01:00 · 2024-12-28 16:16:57 +01:00 · 076346db8a
commit 076346db8a
parent b9b2b6371a
3 changed files with 24 additions and 10 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -2558,12 +2558,22 @@ struct server_context {
        // start populating the batch for this iteration
        common_batch_clear(batch);
        // track if given slot can be batched with slots already in the batch
        server_slot * slot_batched = nullptr;
        // frist, add sampled tokens from any ongoing sequences
        for (auto & slot : slots) {
            if (slot.state != SLOT_STATE_GENERATING) {
                continue;
            }
            // check if we can batch this slot with the previous one
            if (!slot_batched) {
                slot_batched = &slot;
            } else if (slot_batched && !slot_batched->can_batch_with(slot)) {
                continue;
            }
            slot.i_batch = batch.n_tokens;
            common_batch_add(batch, slot.sampled, slot.n_past, { slot.id }, true);
@ -2582,17 +2592,16 @@ struct server_context {
        int32_t n_batch  = llama_n_batch(ctx);
        int32_t n_ubatch = llama_n_ubatch(ctx);
        // track if given slot can be batched with slots already in the batch
        server_slot * slot_batched = nullptr;
        // next, batch any pending prompts without exceeding n_batch
        if (params_base.cont_batching || batch.n_tokens == 0) {
            for (auto & slot : slots) {
                // check if we can batch this slot with the previous one
-                if (!slot_batched) {
+                if (slot.is_processing()) {
-                    slot_batched = &slot;
+                    if (!slot_batched) {
-                } else if (slot_batched && !slot_batched->can_batch_with(slot)) {
+                        slot_batched = &slot;
-                    continue;
+                    } else if (slot_batched && !slot_batched->can_batch_with(slot)) {
                        continue;
                    }
                }
                // this slot still has a prompt to be processed
--- a/examples/server/tests/README.md
+++ b/examples/server/tests/README.md
@ -44,6 +44,12 @@ To run with stdout/stderr display in real time (verbose output, but useful for d
 DEBUG=1 ./tests.sh -s -v -x
 ```
 To run single test unit:
 ```shell
 ./tests.sh unit/test_{name of test case here}.py -v -x
 ```
 Hint: You can compile and run test in single command, useful for local developement:
 ```shell
--- a/examples/server/tests/unit/test_lora.py
+++ b/examples/server/tests/unit/test_lora.py
@ -52,12 +52,11 @@ def test_lora_per_request():
    lora_config = [
        ( [{"id": 0, "scale": 0.0}], "(bright|day|many|happy)+" ),
        ( [{"id": 0, "scale": 0.0}], "(bright|day|many|happy)+" ),
-        ( [{"id": 0, "scale": 0.0}], "(bright|day|many|happy)+" ),
+        ( [{"id": 0, "scale": 0.3}], "(special|thing|gifted)+" ),
-        ( [{"id": 0, "scale": 1.0}], "(eye|love|glass|sun)+" ),
+        ( [{"id": 0, "scale": 0.7}], "(far|from|home|away)+" ),
        ( [{"id": 0, "scale": 1.0}], "(eye|love|glass|sun)+" ),
        ( [{"id": 0, "scale": 1.0}], "(eye|love|glass|sun)+" ),
    ]
    # FIXME: tesing with scale between 0.0 and 1.0 (i.e. 0.2, 0.5, 0.7) produces unreliable results
    tasks = [(
        server.make_request,