add print codes
This commit is contained in:
parent
76e1fd0f45
commit
6215c33a2b
4 changed files with 107 additions and 0 deletions
96
analyze_cgf.py
Normal file
96
analyze_cgf.py
Normal file
|
@ -0,0 +1,96 @@
|
||||||
|
import os
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
class Node:
|
||||||
|
def __init__(self, name, op, backend, shape):
|
||||||
|
self.name = name
|
||||||
|
self.op = op
|
||||||
|
self.backend = backend
|
||||||
|
self.shape = shape
|
||||||
|
self.prev = []
|
||||||
|
self.next = []
|
||||||
|
self.in_deg = 0
|
||||||
|
self.out_deg = 0
|
||||||
|
|
||||||
|
def read_graph(file_path, skip_pattens=[]):
|
||||||
|
fin = open(file_path, "r")
|
||||||
|
lines = fin.readlines()
|
||||||
|
fin.close()
|
||||||
|
|
||||||
|
nodes = {}
|
||||||
|
edges = []
|
||||||
|
|
||||||
|
def do_skip(name):
|
||||||
|
skip = False
|
||||||
|
for skip_patten in skip_pattens:
|
||||||
|
if skip_patten in name:
|
||||||
|
skip = True
|
||||||
|
break
|
||||||
|
if skip:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
start = False
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
line = line.strip()
|
||||||
|
if "Start to print tensors in the computation graph" in line:
|
||||||
|
start = True
|
||||||
|
continue
|
||||||
|
elif "Finish printing tensors in the computation graph" in line:
|
||||||
|
start = False
|
||||||
|
break
|
||||||
|
if start and "Tensor name" in line:
|
||||||
|
name = line.split("[")[1].split("]")[0]
|
||||||
|
op = lines[i + 1].split("[")[1].split("]")[0]
|
||||||
|
backend = lines[i + 2].split("[")[1].split("]")[0]
|
||||||
|
shape = lines[i + 3].split("(")[1].split(")")[0]
|
||||||
|
shape = list(map(lambda x: int(x), shape.split(", ")))
|
||||||
|
node = Node(name, op, backend, shape)
|
||||||
|
if do_skip(name):
|
||||||
|
continue
|
||||||
|
nodes[name] = node
|
||||||
|
|
||||||
|
source = lines[i + 4].split("[")[1].split("]")[0]
|
||||||
|
source = list(map(lambda x: x, source.split(", ")))
|
||||||
|
|
||||||
|
for pre_node in source:
|
||||||
|
if do_skip(pre_node):
|
||||||
|
continue
|
||||||
|
if pre_node not in nodes:
|
||||||
|
nodes[pre_node] = Node(pre_node, "", "", [])
|
||||||
|
edges.append((pre_node, name))
|
||||||
|
|
||||||
|
for prev, next in edges:
|
||||||
|
nodes[next].in_deg += 1
|
||||||
|
nodes[next].prev.append(prev)
|
||||||
|
nodes[prev].out_deg += 1
|
||||||
|
nodes[prev].next.append(next)
|
||||||
|
|
||||||
|
return nodes
|
||||||
|
|
||||||
|
def compute_concur(start, nodes):
|
||||||
|
concur = 1
|
||||||
|
order = 0
|
||||||
|
queue = [(order, start)]
|
||||||
|
while len(queue) > 0:
|
||||||
|
if order != queue[0][0]:
|
||||||
|
concur = len(queue)
|
||||||
|
order = queue[0][0]
|
||||||
|
cur_order, cur_node = queue.pop(0)
|
||||||
|
for next_node in nodes[cur_node].next:
|
||||||
|
queue.append((cur_order + 1, next_node))
|
||||||
|
return concur
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--log_file", required=True, type=str)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
gf = read_graph(args.log_file, skip_pattens=[".weight"])
|
||||||
|
|
||||||
|
max_concur = 1
|
||||||
|
for name, node in gf.items():
|
||||||
|
if node.in_deg == 0:
|
||||||
|
concur = compute_concur(name, gf)
|
||||||
|
print(f"Start node: {name}, Max concurrency: {concur}")
|
|
@ -1102,11 +1102,13 @@ void llama_batch_add(
|
||||||
std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(gpt_params & params) {
|
std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(gpt_params & params) {
|
||||||
auto mparams = llama_model_params_from_gpt_params(params);
|
auto mparams = llama_model_params_from_gpt_params(params);
|
||||||
|
|
||||||
|
fprintf(stderr, "%s: start to load model from file\n", __func__);
|
||||||
llama_model * model = llama_load_model_from_file(params.model.c_str(), mparams);
|
llama_model * model = llama_load_model_from_file(params.model.c_str(), mparams);
|
||||||
if (model == NULL) {
|
if (model == NULL) {
|
||||||
fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str());
|
fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str());
|
||||||
return std::make_tuple(nullptr, nullptr);
|
return std::make_tuple(nullptr, nullptr);
|
||||||
}
|
}
|
||||||
|
fprintf(stderr, "%s: finish loading model from file\n", __func__);
|
||||||
|
|
||||||
auto cparams = llama_context_params_from_gpt_params(params);
|
auto cparams = llama_context_params_from_gpt_params(params);
|
||||||
|
|
||||||
|
|
|
@ -355,10 +355,14 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
|
||||||
tokens[batch_start] = llama_token_bos(llama_get_model(ctx));
|
tokens[batch_start] = llama_token_bos(llama_get_model(ctx));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fprintf(stderr, "%s: number of input tokens %d\n", __func__, batch_size);
|
||||||
|
|
||||||
if (llama_decode(ctx, llama_batch_get_one(tokens.data() + batch_start, batch_size, j * n_batch, 0))) {
|
if (llama_decode(ctx, llama_batch_get_one(tokens.data() + batch_start, batch_size, j * n_batch, 0))) {
|
||||||
fprintf(stderr, "%s : failed to eval\n", __func__);
|
fprintf(stderr, "%s : failed to eval\n", __func__);
|
||||||
return {tokens, -1, logit_history, prob_history};
|
return {tokens, -1, logit_history, prob_history};
|
||||||
}
|
}
|
||||||
|
if (j > 10)
|
||||||
|
exit(-1);
|
||||||
|
|
||||||
// restore the original token in case it was set to BOS
|
// restore the original token in case it was set to BOS
|
||||||
tokens[batch_start] = token_org;
|
tokens[batch_start] = token_org;
|
||||||
|
@ -715,12 +719,15 @@ int main(int argc, char ** argv) {
|
||||||
llama_model * model;
|
llama_model * model;
|
||||||
llama_context * ctx;
|
llama_context * ctx;
|
||||||
|
|
||||||
|
fprintf(stderr, "%s: start to init llama\n", __func__);
|
||||||
|
|
||||||
// load the model and apply lora adapter, if any
|
// load the model and apply lora adapter, if any
|
||||||
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
||||||
if (model == NULL) {
|
if (model == NULL) {
|
||||||
fprintf(stderr, "%s: error: unable to load model\n", __func__);
|
fprintf(stderr, "%s: error: unable to load model\n", __func__);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
fprintf(stderr, "%s: finish init llama\n", __func__);
|
||||||
|
|
||||||
const int n_ctx_train = llama_n_ctx_train(model);
|
const int n_ctx_train = llama_n_ctx_train(model);
|
||||||
if (params.n_ctx > n_ctx_train) {
|
if (params.n_ctx > n_ctx_train) {
|
||||||
|
|
|
@ -4500,6 +4500,7 @@ struct llm_build_context {
|
||||||
|
|
||||||
ggml_build_forward_expand(gf, cur);
|
ggml_build_forward_expand(gf, cur);
|
||||||
|
|
||||||
|
LLAMA_LOG_INFO("%s: Start to print tensors in the computation graph\n", __func__);
|
||||||
for (int i = 0; i < gf->n_nodes; ++ i) {
|
for (int i = 0; i < gf->n_nodes; ++ i) {
|
||||||
ggml_tensor * t = gf->nodes[i];
|
ggml_tensor * t = gf->nodes[i];
|
||||||
LLAMA_LOG_INFO("%s: Tensor name [%s]\n", __func__, t->name);
|
LLAMA_LOG_INFO("%s: Tensor name [%s]\n", __func__, t->name);
|
||||||
|
@ -4540,6 +4541,7 @@ struct llm_build_context {
|
||||||
}
|
}
|
||||||
LLAMA_LOG_INFO("]\n");
|
LLAMA_LOG_INFO("]\n");
|
||||||
}
|
}
|
||||||
|
LLAMA_LOG_INFO("%s: Finish printing tensors in the computation graph\n", __func__);
|
||||||
exit(-1);
|
exit(-1);
|
||||||
|
|
||||||
return gf;
|
return gf;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue