add print codes

This commit is contained in:
luffy06 2024-01-16 10:29:42 +08:00
parent 76e1fd0f45
commit 6215c33a2b
4 changed files with 107 additions and 0 deletions

96
analyze_cgf.py Normal file
View file

@ -0,0 +1,96 @@
import os
import argparse
class Node:
def __init__(self, name, op, backend, shape):
self.name = name
self.op = op
self.backend = backend
self.shape = shape
self.prev = []
self.next = []
self.in_deg = 0
self.out_deg = 0
def read_graph(file_path, skip_pattens=[]):
fin = open(file_path, "r")
lines = fin.readlines()
fin.close()
nodes = {}
edges = []
def do_skip(name):
skip = False
for skip_patten in skip_pattens:
if skip_patten in name:
skip = True
break
if skip:
return True
return False
start = False
for i, line in enumerate(lines):
line = line.strip()
if "Start to print tensors in the computation graph" in line:
start = True
continue
elif "Finish printing tensors in the computation graph" in line:
start = False
break
if start and "Tensor name" in line:
name = line.split("[")[1].split("]")[0]
op = lines[i + 1].split("[")[1].split("]")[0]
backend = lines[i + 2].split("[")[1].split("]")[0]
shape = lines[i + 3].split("(")[1].split(")")[0]
shape = list(map(lambda x: int(x), shape.split(", ")))
node = Node(name, op, backend, shape)
if do_skip(name):
continue
nodes[name] = node
source = lines[i + 4].split("[")[1].split("]")[0]
source = list(map(lambda x: x, source.split(", ")))
for pre_node in source:
if do_skip(pre_node):
continue
if pre_node not in nodes:
nodes[pre_node] = Node(pre_node, "", "", [])
edges.append((pre_node, name))
for prev, next in edges:
nodes[next].in_deg += 1
nodes[next].prev.append(prev)
nodes[prev].out_deg += 1
nodes[prev].next.append(next)
return nodes
def compute_concur(start, nodes):
concur = 1
order = 0
queue = [(order, start)]
while len(queue) > 0:
if order != queue[0][0]:
concur = len(queue)
order = queue[0][0]
cur_order, cur_node = queue.pop(0)
for next_node in nodes[cur_node].next:
queue.append((cur_order + 1, next_node))
return concur
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--log_file", required=True, type=str)
args = parser.parse_args()
gf = read_graph(args.log_file, skip_pattens=[".weight"])
max_concur = 1
for name, node in gf.items():
if node.in_deg == 0:
concur = compute_concur(name, gf)
print(f"Start node: {name}, Max concurrency: {concur}")

View file

@ -1102,11 +1102,13 @@ void llama_batch_add(
std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(gpt_params & params) { std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(gpt_params & params) {
auto mparams = llama_model_params_from_gpt_params(params); auto mparams = llama_model_params_from_gpt_params(params);
fprintf(stderr, "%s: start to load model from file\n", __func__);
llama_model * model = llama_load_model_from_file(params.model.c_str(), mparams); llama_model * model = llama_load_model_from_file(params.model.c_str(), mparams);
if (model == NULL) { if (model == NULL) {
fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str()); fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str());
return std::make_tuple(nullptr, nullptr); return std::make_tuple(nullptr, nullptr);
} }
fprintf(stderr, "%s: finish loading model from file\n", __func__);
auto cparams = llama_context_params_from_gpt_params(params); auto cparams = llama_context_params_from_gpt_params(params);

View file

@ -355,10 +355,14 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
tokens[batch_start] = llama_token_bos(llama_get_model(ctx)); tokens[batch_start] = llama_token_bos(llama_get_model(ctx));
} }
fprintf(stderr, "%s: number of input tokens %d\n", __func__, batch_size);
if (llama_decode(ctx, llama_batch_get_one(tokens.data() + batch_start, batch_size, j * n_batch, 0))) { if (llama_decode(ctx, llama_batch_get_one(tokens.data() + batch_start, batch_size, j * n_batch, 0))) {
fprintf(stderr, "%s : failed to eval\n", __func__); fprintf(stderr, "%s : failed to eval\n", __func__);
return {tokens, -1, logit_history, prob_history}; return {tokens, -1, logit_history, prob_history};
} }
if (j > 10)
exit(-1);
// restore the original token in case it was set to BOS // restore the original token in case it was set to BOS
tokens[batch_start] = token_org; tokens[batch_start] = token_org;
@ -715,12 +719,15 @@ int main(int argc, char ** argv) {
llama_model * model; llama_model * model;
llama_context * ctx; llama_context * ctx;
fprintf(stderr, "%s: start to init llama\n", __func__);
// load the model and apply lora adapter, if any // load the model and apply lora adapter, if any
std::tie(model, ctx) = llama_init_from_gpt_params(params); std::tie(model, ctx) = llama_init_from_gpt_params(params);
if (model == NULL) { if (model == NULL) {
fprintf(stderr, "%s: error: unable to load model\n", __func__); fprintf(stderr, "%s: error: unable to load model\n", __func__);
return 1; return 1;
} }
fprintf(stderr, "%s: finish init llama\n", __func__);
const int n_ctx_train = llama_n_ctx_train(model); const int n_ctx_train = llama_n_ctx_train(model);
if (params.n_ctx > n_ctx_train) { if (params.n_ctx > n_ctx_train) {

View file

@ -4500,6 +4500,7 @@ struct llm_build_context {
ggml_build_forward_expand(gf, cur); ggml_build_forward_expand(gf, cur);
LLAMA_LOG_INFO("%s: Start to print tensors in the computation graph\n", __func__);
for (int i = 0; i < gf->n_nodes; ++ i) { for (int i = 0; i < gf->n_nodes; ++ i) {
ggml_tensor * t = gf->nodes[i]; ggml_tensor * t = gf->nodes[i];
LLAMA_LOG_INFO("%s: Tensor name [%s]\n", __func__, t->name); LLAMA_LOG_INFO("%s: Tensor name [%s]\n", __func__, t->name);
@ -4540,6 +4541,7 @@ struct llm_build_context {
} }
LLAMA_LOG_INFO("]\n"); LLAMA_LOG_INFO("]\n");
} }
LLAMA_LOG_INFO("%s: Finish printing tensors in the computation graph\n", __func__);
exit(-1); exit(-1);
return gf; return gf;