From 6215c33a2bf0d95376bf011391585070fe8bdd80 Mon Sep 17 00:00:00 2001 From: luffy06 <534427411@qq.com> Date: Tue, 16 Jan 2024 10:29:42 +0800 Subject: [PATCH] add print codes --- analyze_cgf.py | 96 ++++++++++++++++++++++++++++++ common/common.cpp | 2 + examples/perplexity/perplexity.cpp | 7 +++ llama.cpp | 2 + 4 files changed, 107 insertions(+) create mode 100644 analyze_cgf.py diff --git a/analyze_cgf.py b/analyze_cgf.py new file mode 100644 index 000000000..0024ac28c --- /dev/null +++ b/analyze_cgf.py @@ -0,0 +1,96 @@ +import os +import argparse + +class Node: + def __init__(self, name, op, backend, shape): + self.name = name + self.op = op + self.backend = backend + self.shape = shape + self.prev = [] + self.next = [] + self.in_deg = 0 + self.out_deg = 0 + +def read_graph(file_path, skip_pattens=[]): + fin = open(file_path, "r") + lines = fin.readlines() + fin.close() + + nodes = {} + edges = [] + + def do_skip(name): + skip = False + for skip_patten in skip_pattens: + if skip_patten in name: + skip = True + break + if skip: + return True + return False + + + start = False + for i, line in enumerate(lines): + line = line.strip() + if "Start to print tensors in the computation graph" in line: + start = True + continue + elif "Finish printing tensors in the computation graph" in line: + start = False + break + if start and "Tensor name" in line: + name = line.split("[")[1].split("]")[0] + op = lines[i + 1].split("[")[1].split("]")[0] + backend = lines[i + 2].split("[")[1].split("]")[0] + shape = lines[i + 3].split("(")[1].split(")")[0] + shape = list(map(lambda x: int(x), shape.split(", "))) + node = Node(name, op, backend, shape) + if do_skip(name): + continue + nodes[name] = node + + source = lines[i + 4].split("[")[1].split("]")[0] + source = list(map(lambda x: x, source.split(", "))) + + for pre_node in source: + if do_skip(pre_node): + continue + if pre_node not in nodes: + nodes[pre_node] = Node(pre_node, "", "", []) + edges.append((pre_node, name)) + + for prev, next in edges: + nodes[next].in_deg += 1 + nodes[next].prev.append(prev) + nodes[prev].out_deg += 1 + nodes[prev].next.append(next) + + return nodes + +def compute_concur(start, nodes): + concur = 1 + order = 0 + queue = [(order, start)] + while len(queue) > 0: + if order != queue[0][0]: + concur = len(queue) + order = queue[0][0] + cur_order, cur_node = queue.pop(0) + for next_node in nodes[cur_node].next: + queue.append((cur_order + 1, next_node)) + return concur + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--log_file", required=True, type=str) + args = parser.parse_args() + + gf = read_graph(args.log_file, skip_pattens=[".weight"]) + + max_concur = 1 + for name, node in gf.items(): + if node.in_deg == 0: + concur = compute_concur(name, gf) + print(f"Start node: {name}, Max concurrency: {concur}") \ No newline at end of file diff --git a/common/common.cpp b/common/common.cpp index b3425ab09..b9aa97419 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1102,11 +1102,13 @@ void llama_batch_add( std::tuple llama_init_from_gpt_params(gpt_params & params) { auto mparams = llama_model_params_from_gpt_params(params); + fprintf(stderr, "%s: start to load model from file\n", __func__); llama_model * model = llama_load_model_from_file(params.model.c_str(), mparams); if (model == NULL) { fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str()); return std::make_tuple(nullptr, nullptr); } + fprintf(stderr, "%s: finish loading model from file\n", __func__); auto cparams = llama_context_params_from_gpt_params(params); diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index 20e0133ac..5fa4cb9d4 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -355,10 +355,14 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par tokens[batch_start] = llama_token_bos(llama_get_model(ctx)); } + fprintf(stderr, "%s: number of input tokens %d\n", __func__, batch_size); + if (llama_decode(ctx, llama_batch_get_one(tokens.data() + batch_start, batch_size, j * n_batch, 0))) { fprintf(stderr, "%s : failed to eval\n", __func__); return {tokens, -1, logit_history, prob_history}; } + if (j > 10) + exit(-1); // restore the original token in case it was set to BOS tokens[batch_start] = token_org; @@ -715,12 +719,15 @@ int main(int argc, char ** argv) { llama_model * model; llama_context * ctx; + fprintf(stderr, "%s: start to init llama\n", __func__); + // load the model and apply lora adapter, if any std::tie(model, ctx) = llama_init_from_gpt_params(params); if (model == NULL) { fprintf(stderr, "%s: error: unable to load model\n", __func__); return 1; } + fprintf(stderr, "%s: finish init llama\n", __func__); const int n_ctx_train = llama_n_ctx_train(model); if (params.n_ctx > n_ctx_train) { diff --git a/llama.cpp b/llama.cpp index 1f4eafeff..54b7c6d4d 100644 --- a/llama.cpp +++ b/llama.cpp @@ -4500,6 +4500,7 @@ struct llm_build_context { ggml_build_forward_expand(gf, cur); + LLAMA_LOG_INFO("%s: Start to print tensors in the computation graph\n", __func__); for (int i = 0; i < gf->n_nodes; ++ i) { ggml_tensor * t = gf->nodes[i]; LLAMA_LOG_INFO("%s: Tensor name [%s]\n", __func__, t->name); @@ -4540,6 +4541,7 @@ struct llm_build_context { } LLAMA_LOG_INFO("]\n"); } + LLAMA_LOG_INFO("%s: Finish printing tensors in the computation graph\n", __func__); exit(-1); return gf;