llama : extend batch API to select which logits to output

2023-09-19 00:24:13 +03:00 · 2023-09-19 00:24:13 +03:00 · fa0e677820
commit fa0e677820
parent 897caccdf4
4 changed files with 46 additions and 6 deletions
--- a/llama.h
+++ b/llama.h
@ -70,11 +70,11 @@ extern "C" {
    typedef struct llama_batch {
        uint32_t n_tokens;

-        // TODO: not sure about these consts - might just get in the way all the time with no benefit
        const llama_token  * token;
        const float        * embd;
        const llama_pos    * pos;
        const llama_seq_id * seq_id;
+        const int8_t       * logits; // if 0, do not extract logits for that token

        // NOTE: helpers for smooth API transition - can be deprecated in the future
        //       for future-proof code, use the above fields instead and ignore everything below