From 6215c33a2bf0d95376bf011391585070fe8bdd80 Mon Sep 17 00:00:00 2001
From: luffy06 <534427411@qq.com>
Date: Tue, 16 Jan 2024 10:29:42 +0800
Subject: [PATCH] add print codes

---
 analyze_cgf.py                     | 96 ++++++++++++++++++++++++++++++
 common/common.cpp                  |  2 +
 examples/perplexity/perplexity.cpp |  7 +++
 llama.cpp                          |  2 +
 4 files changed, 107 insertions(+)
 create mode 100644 analyze_cgf.py
diff --git a/analyze_cgf.py b/analyze_cgf.py
new file mode 100644
index 000000000..0024ac28c
--- /dev/null
+++ b/analyze_cgf.py
@@ -0,0 +1,96 @@
+import os
+import argparse
+
+class Node:
+    def __init__(self, name, op, backend, shape):
+        self.name = name
+        self.op = op
+        self.backend = backend
+        self.shape = shape
+        self.prev = []
+        self.next = []
+        self.in_deg = 0
+        self.out_deg = 0
+
+def read_graph(file_path, skip_pattens=[]):
+    fin = open(file_path, "r")
+    lines = fin.readlines()
+    fin.close()
+    
+    nodes = {}
+    edges = []
+
+    def do_skip(name):
+        skip = False
+        for skip_patten in skip_pattens:
+            if skip_patten in name:
+                skip = True
+                break
+        if skip:
+            return True
+        return False
+
+    
+    start = False
+    for i, line in enumerate(lines):
+        line = line.strip()
+        if "Start to print tensors in the computation graph" in line:
+            start = True
+            continue
+        elif "Finish printing tensors in the computation graph" in line:
+            start = False
+            break
+        if start and "Tensor name" in line:
+            name = line.split("[")[1].split("]")[0]
+            op = lines[i + 1].split("[")[1].split("]")[0]
+            backend = lines[i + 2].split("[")[1].split("]")[0]
+            shape = lines[i + 3].split("(")[1].split(")")[0]
+            shape = list(map(lambda x: int(x), shape.split(", ")))
+            node = Node(name, op, backend, shape)
+            if do_skip(name):
+                continue
+            nodes[name] = node
+
+            source = lines[i + 4].split("[")[1].split("]")[0]
+            source = list(map(lambda x: x, source.split(", ")))
+            
+            for pre_node in source:
+                if do_skip(pre_node):
+                    continue
+                if pre_node not in nodes:
+                    nodes[pre_node] = Node(pre_node, "", "", [])
+                edges.append((pre_node, name))
+                
+    for prev, next in edges:
+        nodes[next].in_deg += 1
+        nodes[next].prev.append(prev)
+        nodes[prev].out_deg += 1
+        nodes[prev].next.append(next)
+
+    return nodes
+
+def compute_concur(start, nodes):
+    concur = 1
+    order = 0
+    queue = [(order, start)]
+    while len(queue) > 0:
+        if order != queue[0][0]:
+            concur = len(queue)
+            order = queue[0][0]
+        cur_order, cur_node = queue.pop(0)
+        for next_node in nodes[cur_node].next:
+            queue.append((cur_order + 1, next_node))
+    return concur
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--log_file", required=True, type=str)
+    args = parser.parse_args()
+
+    gf = read_graph(args.log_file, skip_pattens=[".weight"])
+
+    max_concur = 1
+    for name, node in gf.items():
+        if node.in_deg == 0:
+            concur = compute_concur(name, gf)
+            print(f"Start node: {name}, Max concurrency: {concur}")
\ No newline at end of file
diff --git a/common/common.cpp b/common/common.cpp
index b3425ab09..b9aa97419 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1102,11 +1102,13 @@ void llama_batch_add(
 std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(gpt_params & params) {
     auto mparams = llama_model_params_from_gpt_params(params);
 
+    fprintf(stderr, "%s: start to load model from file\n", __func__);
     llama_model * model  = llama_load_model_from_file(params.model.c_str(), mparams);
     if (model == NULL) {
         fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str());
         return std::make_tuple(nullptr, nullptr);
     }
+    fprintf(stderr, "%s: finish loading model from file\n", __func__);
 
     auto cparams = llama_context_params_from_gpt_params(params);
 
diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp
index 20e0133ac..5fa4cb9d4 100644
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@@ -355,10 +355,14 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
                 tokens[batch_start] = llama_token_bos(llama_get_model(ctx));
             }
 
+            fprintf(stderr, "%s: number of input tokens %d\n", __func__, batch_size);
+
             if (llama_decode(ctx, llama_batch_get_one(tokens.data() + batch_start, batch_size, j * n_batch, 0))) {
                 fprintf(stderr, "%s : failed to eval\n", __func__);
                 return {tokens, -1, logit_history, prob_history};
             }
+            if (j > 10)
+             exit(-1);
 
             // restore the original token in case it was set to BOS
             tokens[batch_start] = token_org;
@@ -715,12 +719,15 @@ int main(int argc, char ** argv) {
     llama_model * model;
     llama_context * ctx;
 
+    fprintf(stderr, "%s: start to init llama\n", __func__);
+
     // load the model and apply lora adapter, if any
     std::tie(model, ctx) = llama_init_from_gpt_params(params);
     if (model == NULL) {
         fprintf(stderr, "%s: error: unable to load model\n", __func__);
         return 1;
     }
+    fprintf(stderr, "%s: finish init llama\n", __func__);
 
     const int n_ctx_train = llama_n_ctx_train(model);
     if (params.n_ctx > n_ctx_train) {
diff --git a/llama.cpp b/llama.cpp
index 1f4eafeff..54b7c6d4d 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -4500,6 +4500,7 @@ struct llm_build_context {
 
         ggml_build_forward_expand(gf, cur);
 
+        LLAMA_LOG_INFO("%s: Start to print tensors in the computation graph\n", __func__);
         for (int i = 0; i < gf->n_nodes; ++ i) {
             ggml_tensor * t = gf->nodes[i];
             LLAMA_LOG_INFO("%s: Tensor name [%s]\n", __func__, t->name);
@@ -4540,6 +4541,7 @@ struct llm_build_context {
             }
             LLAMA_LOG_INFO("]\n");
         }
+        LLAMA_LOG_INFO("%s: Finish printing tensors in the computation graph\n", __func__);
         exit(-1);
 
         return gf;