add printing codes

This commit is contained in:
luffy06 2024-01-08 16:10:15 +08:00
parent f6793491b5
commit 5ba6593252
4 changed files with 200 additions and 2 deletions

View file

@ -325,6 +325,11 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
std::vector<std::thread> workers(std::thread::hardware_concurrency() - 1);
fprintf(stderr, "%s: n_tokens: %ld\n", __func__, tokens.size());
fprintf(stderr, "%s: n_ctx: %d\n", __func__, n_ctx);
fprintf(stderr, "%s: n_batch: %d\n", __func__, n_batch);
fprintf(stderr, "%s: num batches per chunk processing: %d\n", __func__, int((n_ctx + n_batch - 1) / n_batch));
for (int i = 0; i < n_chunk; ++i) {
const int start = i * n_ctx;
const int end = start + n_ctx;

172
ggml.c
View file

@ -259,6 +259,178 @@ typedef double ggml_float;
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b))
//
// enum strings
//
const char * ggml_backend_type_string(enum ggml_backend_type backend_type) {
switch (backend_type) {
case 0:
return "GGML_BACKEND_CPU";
case 10:
return "GGML_BACKEND_GPU";
case 20:
return "GGML_BACKEND_GPU_SPLIT";
default:
return "WRONG_BACKEND_TYPE";
}
return "";
}
const char * ggml_op_string(enum ggml_op op) {
switch (op) {
case 0:
return "GGML_OP_NONE";
case 1:
return "GGML_OP_DUP";
case 2:
return "GGML_OP_ADD";
case 3:
return "GGML_OP_ADD1";
case 4:
return "GGML_OP_ACC";
case 5:
return "GGML_OP_SUB";
case 6:
return "GGML_OP_MUL";
case 7:
return "GGML_OP_DIV";
case 8:
return "GGML_OP_SQR";
case 9:
return "GGML_OP_SQRT";
case 10:
return "GGML_OP_LOG";
case 11:
return "GGML_OP_SUM";
case 12:
return "GGML_OP_SUM_ROWS";
case 13:
return "GGML_OP_MEAN";
case 14:
return "GGML_OP_ARGMAX";
case 15:
return "GGML_OP_REPEAT";
case 16:
return "GGML_OP_REPEAT_BACK";
case 17:
return "GGML_OP_CONCAT";
case 18:
return "GGML_OP_SILU_BACK";
case 19:
return "GGML_OP_NORM"; // normalize
case 20:
return "GGML_OP_RMS_NORM";
case 21:
return "GGML_OP_RMS_NORM_BACK";
case 22:
return "GGML_OP_GROUP_NORM";
case 23:
return "GGML_OP_MUL_MAT";
case 24:
return "GGML_OP_MUL_MAT_ID";
case 25:
return "GGML_OP_OUT_PROD";
case 26:
return "GGML_OP_SCALE";
case 27:
return "GGML_OP_SET";
case 28:
return "GGML_OP_CPY";
case 29:
return "GGML_OP_CONT";
case 30:
return "GGML_OP_RESHAPE";
case 31:
return "GGML_OP_VIEW";
case 32:
return "GGML_OP_PERMUTE";
case 33:
return "GGML_OP_TRANSPOSE";
case 34:
return "GGML_OP_GET_ROWS";
case 35:
return "GGML_OP_GET_ROWS_BACK";
case 36:
return "GGML_OP_DIAG";
case 37:
return "GGML_OP_DIAG_MASK_INF";
case 38:
return "GGML_OP_DIAG_MASK_ZERO";
case 39:
return "GGML_OP_SOFT_MAX";
case 40:
return "GGML_OP_SOFT_MAX_BACK";
case 41:
return "GGML_OP_ROPE";
case 42:
return "GGML_OP_ROPE_BACK";
case 43:
return "GGML_OP_ALIBI";
case 44:
return "GGML_OP_CLAMP";
case 45:
return "GGML_OP_CONV_TRANSPOSE_1D";
case 46:
return "GGML_OP_IM2COL";
case 47:
return "GGML_OP_CONV_TRANSPOSE_2D";
case 48:
return "GGML_OP_POOL_1D";
case 49:
return "GGML_OP_POOL_2D";
case 50:
return "GGML_OP_UPSCALE"; // nearest interpolate
case 51:
return "GGML_OP_PAD";
case 52:
return "GGML_OP_ARGSORT";
case 53:
return "GGML_OP_LEAKY_RELU";
case 54:
return "GGML_OP_FLASH_ATTN";
case 55:
return "GGML_OP_FLASH_FF";
case 56:
return "GGML_OP_FLASH_ATTN_BACK";
case 57:
return "GGML_OP_WIN_PART";
case 58:
return "GGML_OP_WIN_UNPART";
case 59:
return "GGML_OP_GET_REL_POS";
case 60:
return "GGML_OP_ADD_REL_POS";
case 61:
return "GGML_OP_UNARY";
case 62:
return "GGML_OP_MAP_UNARY";
case 63:
return "GGML_OP_MAP_BINARY";
case 64:
return "GGML_OP_MAP_CUSTOM1_F32";
case 65:
return "GGML_OP_MAP_CUSTOM2_F32";
case 66:
return "GGML_OP_MAP_CUSTOM3_F32";
case 67:
return "GGML_OP_MAP_CUSTOM1";
case 68:
return "GGML_OP_MAP_CUSTOM2";
case 69:
return "GGML_OP_MAP_CUSTOM3";
case 70:
return "GGML_OP_CROSS_ENTROPY_LOSS";
case 71:
return "GGML_OP_CROSS_ENTROPY_LOSS_BACK";
case 72:
return "GGML_OP_COUNT";
default:
return "WRONG_OP";
}
return "";
}
//
// global data
//

6
ggml.h
View file

@ -490,6 +490,10 @@ extern "C" {
GGML_LOG_LEVEL_DEBUG = 5
};
const char * ggml_backend_type_string(enum ggml_backend_type backend_type);
const char * ggml_op_string(enum ggml_op op);
// ggml object
struct ggml_object {
size_t offs;
@ -511,7 +515,7 @@ extern "C" {
struct ggml_backend_buffer * buffer;
int64_t ne[GGML_MAX_DIMS]; // number of elements
int64_t ne[GGML_MAX_DIMS]; // number of elements, [batch size, , seq len, hidden dim]
size_t nb[GGML_MAX_DIMS]; // stride in bytes:
// nb[0] = ggml_type_size(type)
// nb[1] = nb[0] * (ne[0] / ggml_blck_size(type)) + padding

View file

@ -4500,6 +4500,23 @@ struct llm_build_context {
ggml_build_forward_expand(gf, cur);
for (int i = 0; i < gf->n_nodes; ++ i) {
ggml_tensor * t = gf->nodes[i];
LLAMA_LOG_INFO("%s: Tensor name [%s]\n", __func__, t->name);
LLAMA_LOG_INFO("%s: \tOP [%s]\n", __func__, ggml_op_string(t->op));
LLAMA_LOG_INFO("%s: \tBackend [%s]\n", __func__, ggml_backend_type_string(t->backend));
LLAMA_LOG_INFO("%s: \tShape (", __func__);
for (int j = 0; j < GGML_MAX_DIMS; ++ j) {
LLAMA_LOG_INFO("%ld", t->ne[GGML_MAX_DIMS - 1 - j]);
if (j != GGML_MAX_DIMS - 1) {
LLAMA_LOG_INFO(", ");
} else {
LLAMA_LOG_INFO(")\n");
}
}
}
exit(-1);
return gf;
}
@ -6437,7 +6454,7 @@ static int llama_decode_internal(
res->backend = GGML_BACKEND_CPU;
#endif
// LLAMA_LOG_INFO("graph build time: %.3f ms (%d nodes, %d leafs)\n", (ggml_time_us() - t_start_us)/1000.0, gf->n_nodes, gf->n_leafs);
LLAMA_LOG_INFO("graph build time: %.3f ms (%d nodes, %d leafs)\n", (ggml_time_us() - t_start_us)/1000.0, gf->n_nodes, gf->n_leafs);
// for big prompts, if BLAS is enabled, it is better to use only one thread
// otherwise, the threads are spin-lock waiting for the BLAS calls and are degrading the performance