Add per token attrib enum

2024-06-01 19:42:21 +02:00 · 2024-06-01 19:42:21 +02:00 · cec6a3bde9
commit cec6a3bde9
parent 750f60c03e
2 changed files with 40 additions and 6 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -2147,14 +2147,16 @@ struct llama_control_vector {
 };
 struct llama_vocab {
-    using id    = int32_t;
+    using id      = int32_t;
-    using token = std::string;
+    using token   = std::string;
-    using ttype = llama_token_type;
+    using ttype   = llama_token_type;
    using tattrib = llama_token_attrib;
    struct token_data {
-        token text;
+        token   text;
-        float score;
+        float   score;
-        ttype type;
+        ttype   type;
        tattrib attribs;
    };
    enum llama_vocab_type     type     = LLAMA_VOCAB_TYPE_SPM;
@ -4865,6 +4867,24 @@ static void llm_load_vocab(
        LLAMA_LOG_INFO("%s: token to piece cache size = %.4f MB\n", __func__, size_cache / 1024.0 / 1024.0);
    }
    // Handle per token attributes
    //NOTE: Each model customizes per token attributes.
    //NOTE: Per token attributes are missing from the GGUF file.
    //TODO: Merge llama_token_type and llama_token_attrib.
    {
        // convert token type as an attribute
        for (auto data : vocab.id_to_token) {
            uint32_t attrib = LLAMA_TOKEN_ATTRIB_UNDEFINED;
            attrib |= LLAMA_TOKEN_ATTRIB_UNKNOWN      * (data.type == LLAMA_TOKEN_TYPE_UNKNOWN);
            attrib |= LLAMA_TOKEN_ATTRIB_UNUSED       * (data.type == LLAMA_TOKEN_TYPE_UNUSED);
            attrib |= LLAMA_TOKEN_ATTRIB_NORMAL       * (data.type == LLAMA_TOKEN_TYPE_NORMAL);
            attrib |= LLAMA_TOKEN_ATTRIB_CONTROL      * (data.type == LLAMA_TOKEN_TYPE_CONTROL);
            attrib |= LLAMA_TOKEN_ATTRIB_USER_DEFINED * (data.type == LLAMA_TOKEN_TYPE_USER_DEFINED);
            attrib |= LLAMA_TOKEN_ATTRIB_BYTE         * (data.type == LLAMA_TOKEN_TYPE_BYTE);
            data.attribs = (llama_token_attrib) attrib;
        }
    }
 }
 static void llm_load_print_meta(llama_model_loader & ml, llama_model & model) {
--- a/llama.h
+++ b/llama.h
@ -107,6 +107,20 @@ extern "C" {
        LLAMA_TOKEN_TYPE_BYTE         = 6,
    };
    enum llama_token_attrib {
        LLAMA_TOKEN_ATTRIB_UNDEFINED    = 0,
        LLAMA_TOKEN_ATTRIB_UNKNOWN      = 1 <<  1,
        LLAMA_TOKEN_ATTRIB_UNUSED       = 1 <<  2,
        LLAMA_TOKEN_ATTRIB_NORMAL       = 1 <<  3,
        LLAMA_TOKEN_ATTRIB_CONTROL      = 1 <<  4,  // SPECIAL?
        LLAMA_TOKEN_ATTRIB_USER_DEFINED = 1 <<  5,
        LLAMA_TOKEN_ATTRIB_BYTE         = 1 <<  6,
        LLAMA_TOKEN_ATTRIB_NORMALIZED   = 1 <<  7,
        LLAMA_TOKEN_ATTRIB_LSTRIP       = 1 <<  8,
        LLAMA_TOKEN_ATTRIB_RSTRIP       = 1 <<  9,
        LLAMA_TOKEN_ATTRIB_SINGLE_WORD  = 1 << 10,
    };
    // model file types
    enum llama_ftype {
        LLAMA_FTYPE_ALL_F32              = 0,