add print codes

2024-01-16 10:29:42 +08:00 · 2024-01-16 10:29:42 +08:00 · 6215c33a2b
commit 6215c33a2b
parent 76e1fd0f45
4 changed files with 107 additions and 0 deletions
--- a/analyze_cgf.py
+++ b/analyze_cgf.py
@ -0,0 +1,96 @@
 import os
 import argparse
 class Node:
    def __init__(self, name, op, backend, shape):
        self.name = name
        self.op = op
        self.backend = backend
        self.shape = shape
        self.prev = []
        self.next = []
        self.in_deg = 0
        self.out_deg = 0
 def read_graph(file_path, skip_pattens=[]):
    fin = open(file_path, "r")
    lines = fin.readlines()
    fin.close()
    nodes = {}
    edges = []
    def do_skip(name):
        skip = False
        for skip_patten in skip_pattens:
            if skip_patten in name:
                skip = True
                break
        if skip:
            return True
        return False
    start = False
    for i, line in enumerate(lines):
        line = line.strip()
        if "Start to print tensors in the computation graph" in line:
            start = True
            continue
        elif "Finish printing tensors in the computation graph" in line:
            start = False
            break
        if start and "Tensor name" in line:
            name = line.split("[")[1].split("]")[0]
            op = lines[i + 1].split("[")[1].split("]")[0]
            backend = lines[i + 2].split("[")[1].split("]")[0]
            shape = lines[i + 3].split("(")[1].split(")")[0]
            shape = list(map(lambda x: int(x), shape.split(", ")))
            node = Node(name, op, backend, shape)
            if do_skip(name):
                continue
            nodes[name] = node
            source = lines[i + 4].split("[")[1].split("]")[0]
            source = list(map(lambda x: x, source.split(", ")))
            for pre_node in source:
                if do_skip(pre_node):
                    continue
                if pre_node not in nodes:
                    nodes[pre_node] = Node(pre_node, "", "", [])
                edges.append((pre_node, name))
    for prev, next in edges:
        nodes[next].in_deg += 1
        nodes[next].prev.append(prev)
        nodes[prev].out_deg += 1
        nodes[prev].next.append(next)
    return nodes
 def compute_concur(start, nodes):
    concur = 1
    order = 0
    queue = [(order, start)]
    while len(queue) > 0:
        if order != queue[0][0]:
            concur = len(queue)
            order = queue[0][0]
        cur_order, cur_node = queue.pop(0)
        for next_node in nodes[cur_node].next:
            queue.append((cur_order + 1, next_node))
    return concur
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--log_file", required=True, type=str)
    args = parser.parse_args()
    gf = read_graph(args.log_file, skip_pattens=[".weight"])
    max_concur = 1
    for name, node in gf.items():
        if node.in_deg == 0:
            concur = compute_concur(name, gf)
            print(f"Start node: {name}, Max concurrency: {concur}")
--- a/common/common.cpp
+++ b/common/common.cpp
@ -1102,11 +1102,13 @@ void llama_batch_add(
 std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(gpt_params & params) {
    auto mparams = llama_model_params_from_gpt_params(params);
    fprintf(stderr, "%s: start to load model from file\n", __func__);
    llama_model * model  = llama_load_model_from_file(params.model.c_str(), mparams);
    if (model == NULL) {
        fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str());
        return std::make_tuple(nullptr, nullptr);
    }
    fprintf(stderr, "%s: finish loading model from file\n", __func__);
    auto cparams = llama_context_params_from_gpt_params(params);
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@ -355,10 +355,14 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
                tokens[batch_start] = llama_token_bos(llama_get_model(ctx));
            }
            fprintf(stderr, "%s: number of input tokens %d\n", __func__, batch_size);
            if (llama_decode(ctx, llama_batch_get_one(tokens.data() + batch_start, batch_size, j * n_batch, 0))) {
                fprintf(stderr, "%s : failed to eval\n", __func__);
                return {tokens, -1, logit_history, prob_history};
            }
            if (j > 10)
             exit(-1);
            // restore the original token in case it was set to BOS
            tokens[batch_start] = token_org;
@ -715,12 +719,15 @@ int main(int argc, char ** argv) {
    llama_model * model;
    llama_context * ctx;
    fprintf(stderr, "%s: start to init llama\n", __func__);
    // load the model and apply lora adapter, if any
    std::tie(model, ctx) = llama_init_from_gpt_params(params);
    if (model == NULL) {
        fprintf(stderr, "%s: error: unable to load model\n", __func__);
        return 1;
    }
    fprintf(stderr, "%s: finish init llama\n", __func__);
    const int n_ctx_train = llama_n_ctx_train(model);
    if (params.n_ctx > n_ctx_train) {
--- a/llama.cpp
+++ b/llama.cpp
@ -4500,6 +4500,7 @@ struct llm_build_context {
        ggml_build_forward_expand(gf, cur);
        LLAMA_LOG_INFO("%s: Start to print tensors in the computation graph\n", __func__);
        for (int i = 0; i < gf->n_nodes; ++ i) {
            ggml_tensor * t = gf->nodes[i];
            LLAMA_LOG_INFO("%s: Tensor name [%s]\n", __func__, t->name);
@ -4540,6 +4541,7 @@ struct llm_build_context {
            }
            LLAMA_LOG_INFO("]\n");
        }
        LLAMA_LOG_INFO("%s: Finish printing tensors in the computation graph\n", __func__);
        exit(-1);
        return gf;