add printing codes

2024-01-08 16:10:15 +08:00 · 2024-01-08 16:10:15 +08:00 · 5ba6593252
commit 5ba6593252
parent f6793491b5
4 changed files with 200 additions and 2 deletions
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@ -325,6 +325,11 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par

    std::vector<std::thread> workers(std::thread::hardware_concurrency() - 1);

+    fprintf(stderr, "%s: n_tokens: %ld\n", __func__, tokens.size());
+    fprintf(stderr, "%s: n_ctx: %d\n", __func__, n_ctx);
+    fprintf(stderr, "%s: n_batch: %d\n", __func__, n_batch);
+    fprintf(stderr, "%s: num batches per chunk processing: %d\n", __func__, int((n_ctx + n_batch - 1) / n_batch));
+
    for (int i = 0; i < n_chunk; ++i) {
        const int start =     i * n_ctx;
        const int end   = start + n_ctx;
--- a/ggml.c
+++ b/ggml.c
@ -259,6 +259,178 @@ typedef double ggml_float;
 #define MIN(a, b) ((a) < (b) ? (a) : (b))
 #define MAX(a, b) ((a) > (b) ? (a) : (b))

+//
+// enum strings
+//
+
+const char * ggml_backend_type_string(enum ggml_backend_type backend_type) {
+    switch (backend_type) {
+        case 0:
+            return "GGML_BACKEND_CPU";
+        case 10:
+            return "GGML_BACKEND_GPU";
+        case 20:
+            return "GGML_BACKEND_GPU_SPLIT";
+        default:
+            return "WRONG_BACKEND_TYPE";
+    }
+    return "";
+}
+
+const char * ggml_op_string(enum ggml_op op) {
+    switch (op) {
+        case 0:
+            return "GGML_OP_NONE";
+        case 1:
+            return "GGML_OP_DUP";
+        case 2:
+            return "GGML_OP_ADD";
+        case 3:
+            return "GGML_OP_ADD1";
+        case 4:
+            return "GGML_OP_ACC";
+        case 5:
+            return "GGML_OP_SUB";
+        case 6:
+            return "GGML_OP_MUL";
+        case 7:
+            return "GGML_OP_DIV";
+        case 8:
+            return "GGML_OP_SQR";
+        case 9:
+            return "GGML_OP_SQRT";
+        case 10:
+            return "GGML_OP_LOG";
+        case 11:
+            return "GGML_OP_SUM";
+        case 12:
+            return "GGML_OP_SUM_ROWS";
+        case 13:
+            return "GGML_OP_MEAN";
+        case 14:
+            return "GGML_OP_ARGMAX";
+        case 15:
+            return "GGML_OP_REPEAT";
+        case 16:
+            return "GGML_OP_REPEAT_BACK";
+        case 17:
+            return "GGML_OP_CONCAT";
+        case 18:
+            return "GGML_OP_SILU_BACK";
+        case 19:
+            return "GGML_OP_NORM"; // normalize
+        case 20:
+            return "GGML_OP_RMS_NORM";
+        case 21:
+            return "GGML_OP_RMS_NORM_BACK";
+        case 22:
+            return "GGML_OP_GROUP_NORM";
+        case 23:
+            return "GGML_OP_MUL_MAT";
+        case 24:
+            return "GGML_OP_MUL_MAT_ID";
+        case 25:
+            return "GGML_OP_OUT_PROD";
+        case 26:
+            return "GGML_OP_SCALE";
+        case 27:
+            return "GGML_OP_SET";
+        case 28:
+            return "GGML_OP_CPY";
+        case 29:
+            return "GGML_OP_CONT";
+        case 30:
+            return "GGML_OP_RESHAPE";
+        case 31:
+            return "GGML_OP_VIEW";
+        case 32:
+            return "GGML_OP_PERMUTE";
+        case 33:
+            return "GGML_OP_TRANSPOSE";
+        case 34:
+            return "GGML_OP_GET_ROWS";
+        case 35:
+            return "GGML_OP_GET_ROWS_BACK";
+        case 36:
+            return "GGML_OP_DIAG";
+        case 37:
+            return "GGML_OP_DIAG_MASK_INF";
+        case 38:
+            return "GGML_OP_DIAG_MASK_ZERO";
+        case 39:
+            return "GGML_OP_SOFT_MAX";
+        case 40:
+            return "GGML_OP_SOFT_MAX_BACK";
+        case 41:
+            return "GGML_OP_ROPE";
+        case 42:
+            return "GGML_OP_ROPE_BACK";
+        case 43:
+            return "GGML_OP_ALIBI";
+        case 44:
+            return "GGML_OP_CLAMP";
+        case 45:
+            return "GGML_OP_CONV_TRANSPOSE_1D";
+        case 46:
+            return "GGML_OP_IM2COL";
+        case 47:
+            return "GGML_OP_CONV_TRANSPOSE_2D";
+        case 48:
+            return "GGML_OP_POOL_1D";
+        case 49:
+            return "GGML_OP_POOL_2D";
+        case 50:
+            return "GGML_OP_UPSCALE"; // nearest interpolate
+        case 51:
+            return "GGML_OP_PAD";
+        case 52:
+            return "GGML_OP_ARGSORT";
+        case 53:
+            return "GGML_OP_LEAKY_RELU";
+        case 54:
+            return "GGML_OP_FLASH_ATTN";
+        case 55:
+            return "GGML_OP_FLASH_FF";
+        case 56:
+            return "GGML_OP_FLASH_ATTN_BACK";
+        case 57:
+            return "GGML_OP_WIN_PART";
+        case 58:
+            return "GGML_OP_WIN_UNPART";
+        case 59:
+            return "GGML_OP_GET_REL_POS";
+        case 60:
+            return "GGML_OP_ADD_REL_POS";
+        case 61:
+            return "GGML_OP_UNARY";
+        case 62:
+            return "GGML_OP_MAP_UNARY";
+        case 63:
+            return "GGML_OP_MAP_BINARY";
+        case 64:
+            return "GGML_OP_MAP_CUSTOM1_F32";
+        case 65:
+            return "GGML_OP_MAP_CUSTOM2_F32";
+        case 66:
+            return "GGML_OP_MAP_CUSTOM3_F32";
+        case 67:
+            return "GGML_OP_MAP_CUSTOM1";
+        case 68:
+            return "GGML_OP_MAP_CUSTOM2";
+        case 69:
+            return "GGML_OP_MAP_CUSTOM3";
+        case 70:
+            return "GGML_OP_CROSS_ENTROPY_LOSS";
+        case 71:
+            return "GGML_OP_CROSS_ENTROPY_LOSS_BACK";
+        case 72:
+            return "GGML_OP_COUNT";
+        default:
+            return "WRONG_OP";
+    }
+    return "";
+}
+
 //
 // global data
 //
--- a/ggml.h
+++ b/ggml.h
@ -490,6 +490,10 @@ extern "C" {
        GGML_LOG_LEVEL_DEBUG = 5
    };

+    const char * ggml_backend_type_string(enum ggml_backend_type backend_type);
+
+    const char * ggml_op_string(enum ggml_op op);
+
    // ggml object
    struct ggml_object {
        size_t offs;
@ -511,7 +515,7 @@ extern "C" {

        struct ggml_backend_buffer * buffer;

-        int64_t ne[GGML_MAX_DIMS]; // number of elements
+        int64_t ne[GGML_MAX_DIMS]; // number of elements, [batch size,  , seq len, hidden dim]
        size_t  nb[GGML_MAX_DIMS]; // stride in bytes:
                                   // nb[0] = ggml_type_size(type)
                                   // nb[1] = nb[0]   * (ne[0] / ggml_blck_size(type)) + padding
--- a/llama.cpp
+++ b/llama.cpp
@ -4500,6 +4500,23 @@ struct llm_build_context {

        ggml_build_forward_expand(gf, cur);

+        for (int i = 0; i < gf->n_nodes; ++ i) {
+            ggml_tensor * t = gf->nodes[i];
+            LLAMA_LOG_INFO("%s: Tensor name [%s]\n", __func__, t->name);
+            LLAMA_LOG_INFO("%s: \tOP [%s]\n", __func__, ggml_op_string(t->op));
+            LLAMA_LOG_INFO("%s: \tBackend [%s]\n", __func__, ggml_backend_type_string(t->backend));
+            LLAMA_LOG_INFO("%s: \tShape (", __func__);
+            for (int j = 0; j < GGML_MAX_DIMS; ++ j) {
+                LLAMA_LOG_INFO("%ld", t->ne[GGML_MAX_DIMS - 1 - j]);
+                if (j != GGML_MAX_DIMS - 1) {
+                    LLAMA_LOG_INFO(", ");
+                } else {
+                    LLAMA_LOG_INFO(")\n");
+                }
+            }
+        }
+        exit(-1);
+
        return gf;
    }

@ -6437,7 +6454,7 @@ static int llama_decode_internal(
    res->backend = GGML_BACKEND_CPU;
 #endif

-    // LLAMA_LOG_INFO("graph build time: %.3f ms (%d nodes, %d leafs)\n", (ggml_time_us() - t_start_us)/1000.0, gf->n_nodes, gf->n_leafs);
+    LLAMA_LOG_INFO("graph build time: %.3f ms (%d nodes, %d leafs)\n", (ggml_time_us() - t_start_us)/1000.0, gf->n_nodes, gf->n_leafs);

    // for big prompts, if BLAS is enabled, it is better to use only one thread
    // otherwise, the threads are spin-lock waiting for the BLAS calls and are degrading the performance