From d85a629a6c2efdb2fa257305585c7ac66faed020 Mon Sep 17 00:00:00 2001 From: John <78893154+cmp-nct@users.noreply.github.com> Date: Mon, 22 Jan 2024 23:28:52 +0100 Subject: [PATCH 01/10] Update ggml.c --- ggml.c | 227 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 227 insertions(+) diff --git a/ggml.c b/ggml.c index f85045c9c..267e99c58 100644 --- a/ggml.c +++ b/ggml.c @@ -20158,6 +20158,233 @@ void gguf_get_meta_data(const struct gguf_context * ctx, void * data) { //////////////////////////////////////////////////////////////////////////////// +void ggml_printTensorSample(const char *prefix, const struct ggml_tensor * tensor) { + + char *tensor_data; + if (tensor->backend != GGML_BACKEND_CPU) { + // for any mmap solution we can actually set the CPU data of a tensor during load even if it's GPU offloaded + // this shouldn't have a negative effect, worked well in ggllm, saves the need of tensor_get operations for weights + if (tensor->buffer == NULL) { + printf("ggml_printTensorSample: tensor buffer is NULL\n"); + return; + } + tensor_data = (char *) malloc(ggml_nbytes(tensor)); + ggml_backend_tensor_get(tensor, tensor_data, 0, ggml_nbytes(tensor)); + } else + { + tensor_data = tensor->data; + if (tensor_data == NULL) { + printf("ggml_printTensorSample: tensor data is NULL\n"); + return; + } + } + + const char *sep = "+-------------------------------------------------------------------------------------------+\n"; + printf("%s| Content of %s \"%s\" (%d dim)\n", sep, prefix, tensor->name, ggml_n_dims(tensor)); + + const int MAX_ELEMENTS_ROW = 10; + const int MAX_ELEMENTS_COL = 6; + const int MAX_ELEMENTS_LAYER = 3; // layered + const int MAX_ELEMENTS_BATCH = 2; // repeated display + const char *dimensionLabels[] = {"Row", "Col", "Layer", "Batch"}; + + printf("\n%s| Content of %s \"%s\" (%d dim)\n", sep, prefix, tensor->name, ggml_n_dims(tensor)); + printf("| Total Elements : [ "); + for (int i = 0; i < ggml_n_dims(tensor); i++) + printf("%s:%-3" PRId64 " ", dimensionLabels[i], tensor->ne[i]); + printf("]\n%s", sep); + + int n_dims = ggml_n_dims(tensor); + + if (n_dims == 1) { + printf("| 1: "); + for(int i = 0; i < tensor->ne[0] && i < MAX_ELEMENTS_ROW; i++){ + printf("%-7.3f, ", *(double *)((char *) tensor_data + i*tensor->nb[0])); + } + if(MAX_ELEMENTS_ROW < tensor->ne[0]) printf(", .."); + printf("\n%s", sep); + } + else if (n_dims == 2) { + for(int i = 0; i < tensor->ne[0] && i < MAX_ELEMENTS_ROW; i++){ + printf("| %d: ", i+1); + for(int j = 0; j < tensor->ne[1] && j < MAX_ELEMENTS_COL; j++){ + printf("%-7.3f ", *(double *)((char *) tensor_data + i*tensor->nb[0] + j*tensor->nb[1])); + if(j == MAX_ELEMENTS_COL - 1 && tensor->ne[1] > MAX_ELEMENTS_COL) printf(", .."); + } + printf("\n"); + } + if(MAX_ELEMENTS_ROW < tensor->ne[0]) printf(" .. additional rows\n"); + printf("%s", sep); + } else if(n_dims == 3) { + for(int i = 0; i < tensor->ne[0] && i < MAX_ELEMENTS_ROW; i++){ + printf("| Row %d: ", i+1); + for(int j = 0; j < tensor->ne[1] && j < MAX_ELEMENTS_COL; j++){ + printf("["); + for(int k = 0; k < tensor->ne[2] && k < MAX_ELEMENTS_LAYER; k++){ + printf("%-7.3f", *(double *)((char *) tensor_data + i*tensor->nb[0] + j*tensor->nb[1] + k*tensor->nb[2])); + if(k < tensor->ne[2] - 1 && k < MAX_ELEMENTS_LAYER - 1) + printf(", "); + } + if(MAX_ELEMENTS_LAYER < tensor->ne[2]) printf(", .."); + printf("] "); + } + printf("\n"); + } + if(MAX_ELEMENTS_ROW < tensor->ne[0]) printf(" ... additional layers\n"); + printf("%s", sep); + } else if(n_dims == 4) { + for(int batch = 0; batch < tensor->ne[0] && batch < MAX_ELEMENTS_BATCH; batch++){ + printf("Batch %d\n", batch+1); + for(int i = 0; i < tensor->ne[1] && i < MAX_ELEMENTS_ROW; i++){ + printf("| Row %d: ", i+1); + for(int j = 0; j < tensor->ne[2] && j < MAX_ELEMENTS_COL; j++){ + printf("["); + for(int k = 0; k < tensor->ne[3] && k < MAX_ELEMENTS_LAYER; k++){ + printf("%-7.3f", *(double *)((char *) tensor_data + batch*tensor->nb[0] + i*tensor->nb[1] + j*tensor->nb[2] + k*tensor->nb[3])); + if(k < tensor->ne[3] - 1 && k < MAX_ELEMENTS_LAYER - 1) + printf(", "); + } + if(MAX_ELEMENTS_LAYER < tensor->ne[3]) printf(", .."); + printf("] "); + } + printf("\n"); + } + if(MAX_ELEMENTS_BATCH < tensor->ne[0]) printf(" ... additional batches\n"); + printf("%s", sep); + } + } + if (tensor->backend != GGML_BACKEND_CPU) + free(tensor_data); +} + +void ggml_tensor_printf(const struct ggml_tensor *tensor, char *prefix, int line, bool extended, bool print_sample) { + char tmp_str[256] = {0}; + int pos=0; + const char *sep = "+----------------------+----------------------+----------------------+----------------------+"; + const char *sep_border = "+======================+======================+======================+======================+"; + printf("%s\n", sep_border); + printf("| %s:%d\n", prefix,line); + printf("| %-32s [%s type]\n", tensor->name, ggml_type_name(tensor->type)); + printf("%s\n", sep); + char strides[256] = {0}; + /** + // nb[0] = sizeof(type) + // nb[1] = nb[0] * ne[0] + padding + // nb[i] = nb[i-1] * ne[i-1] + */ + { + strides[0] = '\0'; + for (int i = 0; i < ggml_n_dims(tensor); i++) { + char dim_str[20]; + snprintf(dim_str, sizeof(dim_str), "%" PRId64, tensor->nb[i]); + strncat(strides, dim_str, sizeof(strides) - strlen(strides) - 1); + if (i != ggml_n_dims(tensor) - 1) { + strncat(strides, "x", sizeof(strides) - strlen(strides) - 1); + } + } + } + + printf("| %-20s | %-20s | %-20s | %-20s |\n", "Dimensions", "Strides", "Layer id", "Backend"); + int layer_id=-1; // tensor->meta structure not available + printf("| %-20d | %-20s | %-20d | %-20s |\n", ggml_n_dims(tensor), strides, layer_id, tensor->backend == GGML_BACKEND_CPU ? "CPU" : ((tensor->backend == GGML_BACKEND_GPU) ? "GPU" : "GPU_SPLIT")); + printf("%s\n", sep); + pos = 0; + for (int i = 0; i < ggml_n_dims(tensor); i++) { + pos += snprintf(tmp_str + pos, sizeof(tmp_str) - pos, "%" PRId64, tensor->ne[i]); + if (i != ggml_n_dims(tensor) - 1) { + pos += snprintf(tmp_str + pos, sizeof(tmp_str) - pos, " x "); + } + } + printf("| %-20s | %-20s | %-20s | %-20s |\n", "Elements", "Src0", "Src1","Operation"); + printf("| %-20s |", tmp_str); + + if (tensor->src[0]) { + pos = 0; + for (int i = 0; i < ggml_n_dims(tensor->src[0]); i++) { + pos += snprintf(tmp_str + pos, sizeof(tmp_str) - pos, "%" PRId64, tensor->src[0]->ne[i]); + if (i != ggml_n_dims(tensor->src[0]) - 1) { + pos += snprintf(tmp_str + pos, sizeof(tmp_str) - pos, " x "); + } + } + printf(" %-20s |", tmp_str); + } else { + printf(" %-20s |", "N/A"); + } + if (tensor->src[1]) { + pos = 0; + for (int i = 0; i < ggml_n_dims(tensor->src[1]); i++) { + pos += snprintf(tmp_str + pos, sizeof(tmp_str) - pos, "%" PRId64, tensor->src[1]->ne[i]); + if (i != ggml_n_dims(tensor->src[1]) - 1) { + pos += snprintf(tmp_str + pos, sizeof(tmp_str) - pos, " x "); + } + } + printf(" %-20s |", tmp_str); + } else { + printf(" %-20s |", "N/A"); + } + printf(" %-20s |", tensor->op > 0 ? GGML_OP_NAME[tensor->op] : "N/A"); + printf("\n"); + printf("%s\n", sep); + + if (extended) { + bool is_transposed = ggml_is_transposed(tensor); + bool is_permuted = ggml_is_permuted(tensor); + bool is_cont = ggml_is_contiguous(tensor); + printf("| %-17s%s | %-17s%s | %-17s%s | %-6s%11.2f MB |\n", "Transposed:", is_transposed ? "Yes" : "No ", "Permuted:", is_permuted ? "Yes" : "No ", "Contiguous:", is_cont ? "Yes" : "No ","Size:", ggml_nbytes(tensor)/(1024.0*1024.0)); + } + + if (extended) { + if (tensor->src[0] && strlen(tensor->src[0]->name)) { + printf("| %-20s | ", "Src0 name:"); + printf("%-66s |\n", tensor->src[0]->name); + } + if (tensor->src[1] && strlen(tensor->src[1]->name)) { + printf("| %-20s | ", "Src1 name:"); + printf("%-66s |\n", tensor->src[1]->name); + } + printf("%s\n\n", sep); + } + + if (print_sample) { + if (extended) { + if (tensor->src[0] && tensor->src[0]->ne[0]) { + ggml_printTensorSample("src0", tensor->src[0]); + } + if (tensor->src[1] && tensor->src[1]->ne[0]) { + ggml_printTensorSample("src1", tensor->src[1]); + } + } + ggml_printTensorSample("dst", tensor); + } + printf("%s\n", sep_border); +} + +float ggml_get_tensor_index(const struct ggml_tensor* tensor, int ind1, int ind2, int ind3, int ind4) { + int n_dims = ggml_n_dims(tensor); + if (n_dims < 1 || n_dims > 4) { + printf("Error: Incorrect dimension number %d\n", n_dims); + return -1; // handle error + } + + int indices[4] = {ind1, ind2, ind3, ind4}; + int total_offset = 0; + + for (int i = 0; i < n_dims; i++) { + if (indices[i] >= tensor->ne[i] || indices[i] < 0) { + printf("Error: Incorrect index for dimension %d\n", i); + printf("Index: %d, Dimension size: %" PRId64 "\n", indices[i], tensor->ne[i]); + return -1; // handle error + } + + total_offset += indices[i] * tensor->nb[i]; + } + + // Return the value at the calculated offset + return *(float *)((char *) tensor->data + total_offset); +} + +//////////////////////////////////////////////////////////////////////////////// + int ggml_cpu_has_avx(void) { #if defined(__AVX__) return 1; From 607fbe99c7c3d93bed911ffe612d6c211f4bda80 Mon Sep 17 00:00:00 2001 From: John <78893154+cmp-nct@users.noreply.github.com> Date: Mon, 22 Jan 2024 23:31:24 +0100 Subject: [PATCH 02/10] Update ggml.h --- ggml.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ggml.h b/ggml.h index dca7bd9ce..6c501e170 100644 --- a/ggml.h +++ b/ggml.h @@ -1899,6 +1899,10 @@ extern "C" { // dump the graph into a file using the dot format GGML_API void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename); + // visualize the tensor - extended adds more information - when printing sample content extended will also print src0 and src1 content + // example: ggml_tensor_printf(some_ggml_tensor,"function_name",0,true,true); + void ggml_tensor_printf(const struct ggml_tensor *tensor, char *prefix, int line, bool extended, bool print_sample); + // build gradient checkpointing backward graph gb for gf using provided checkpoints // gb_tmp will contain original backward graph with rewritten backward process nodes, // but without the second forward pass nodes. From 31bfd4a52bb369e3f47963d554742fa459f291be Mon Sep 17 00:00:00 2001 From: John <78893154+cmp-nct@users.noreply.github.com> Date: Tue, 23 Jan 2024 00:26:48 +0100 Subject: [PATCH 03/10] Update ggml.c changed sample precision to 7.4. Optimally this would be a parameter to pass to the function. 7.3 often shows just -0.000 or 0.000 on weights A higher precision would allow to see more in these type of small value tensors but it would also display less. --- ggml.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ggml.c b/ggml.c index 267e99c58..376d8fe98 100644 --- a/ggml.c +++ b/ggml.c @@ -20199,7 +20199,7 @@ void ggml_printTensorSample(const char *prefix, const struct ggml_tensor * tenso if (n_dims == 1) { printf("| 1: "); for(int i = 0; i < tensor->ne[0] && i < MAX_ELEMENTS_ROW; i++){ - printf("%-7.3f, ", *(double *)((char *) tensor_data + i*tensor->nb[0])); + printf("%-7.4f, ", *(double *)((char *) tensor_data + i*tensor->nb[0])); } if(MAX_ELEMENTS_ROW < tensor->ne[0]) printf(", .."); printf("\n%s", sep); @@ -20208,7 +20208,7 @@ void ggml_printTensorSample(const char *prefix, const struct ggml_tensor * tenso for(int i = 0; i < tensor->ne[0] && i < MAX_ELEMENTS_ROW; i++){ printf("| %d: ", i+1); for(int j = 0; j < tensor->ne[1] && j < MAX_ELEMENTS_COL; j++){ - printf("%-7.3f ", *(double *)((char *) tensor_data + i*tensor->nb[0] + j*tensor->nb[1])); + printf("%-7.4f ", *(double *)((char *) tensor_data + i*tensor->nb[0] + j*tensor->nb[1])); if(j == MAX_ELEMENTS_COL - 1 && tensor->ne[1] > MAX_ELEMENTS_COL) printf(", .."); } printf("\n"); @@ -20221,7 +20221,7 @@ void ggml_printTensorSample(const char *prefix, const struct ggml_tensor * tenso for(int j = 0; j < tensor->ne[1] && j < MAX_ELEMENTS_COL; j++){ printf("["); for(int k = 0; k < tensor->ne[2] && k < MAX_ELEMENTS_LAYER; k++){ - printf("%-7.3f", *(double *)((char *) tensor_data + i*tensor->nb[0] + j*tensor->nb[1] + k*tensor->nb[2])); + printf("%-7.4f", *(double *)((char *) tensor_data + i*tensor->nb[0] + j*tensor->nb[1] + k*tensor->nb[2])); if(k < tensor->ne[2] - 1 && k < MAX_ELEMENTS_LAYER - 1) printf(", "); } @@ -20240,7 +20240,7 @@ void ggml_printTensorSample(const char *prefix, const struct ggml_tensor * tenso for(int j = 0; j < tensor->ne[2] && j < MAX_ELEMENTS_COL; j++){ printf("["); for(int k = 0; k < tensor->ne[3] && k < MAX_ELEMENTS_LAYER; k++){ - printf("%-7.3f", *(double *)((char *) tensor_data + batch*tensor->nb[0] + i*tensor->nb[1] + j*tensor->nb[2] + k*tensor->nb[3])); + printf("%-7.4f", *(double *)((char *) tensor_data + batch*tensor->nb[0] + i*tensor->nb[1] + j*tensor->nb[2] + k*tensor->nb[3])); if(k < tensor->ne[3] - 1 && k < MAX_ELEMENTS_LAYER - 1) printf(", "); } From 0fa71d17606ea1d10d83f58c95c5a86a59fcee5a Mon Sep 17 00:00:00 2001 From: John <78893154+cmp-nct@users.noreply.github.com> Date: Tue, 23 Jan 2024 01:07:19 +0100 Subject: [PATCH 04/10] moved from ggml to ggml-backend - as backend retrieval needed but header not available in ggml.c --- ggml-backend.c | 202 +++++++++++++++++++++++++++++++++++++++++++ ggml-backend.h | 4 + ggml.c | 227 ------------------------------------------------- ggml.h | 4 - 4 files changed, 206 insertions(+), 231 deletions(-) diff --git a/ggml-backend.c b/ggml-backend.c index 423512def..8f828564d 100644 --- a/ggml-backend.c +++ b/ggml-backend.c @@ -1722,3 +1722,205 @@ bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t return true; } + +void ggml_printTensorSample(const char *prefix, const struct ggml_tensor * tensor) { + + char *tensor_data; + if (tensor->backend != GGML_BACKEND_CPU) { + // for any mmap solution we can actually set the CPU data of a tensor during load even if it's GPU offloaded + // this shouldn't have a negative effect, worked well in ggllm, saves the need of tensor_get operations for weights + if (tensor->buffer == NULL) { + printf("ggml_printTensorSample: tensor buffer is NULL\n"); + return; + } + tensor_data = (char *) malloc(ggml_nbytes(tensor)); + ggml_backend_tensor_get(tensor, tensor_data, 0, ggml_nbytes(tensor)); + } else + { + tensor_data = tensor->data; + if (tensor_data == NULL) { + printf("ggml_printTensorSample: tensor data is NULL\n"); + return; + } + } + + const char *sep = "+-------------------------------------------------------------------------------------------+\n"; + printf("%s| Content of %s \"%s\" (%d dim)\n", sep, prefix, tensor->name, ggml_n_dims(tensor)); + + const int MAX_ELEMENTS_ROW = 10; + const int MAX_ELEMENTS_COL = 6; + const int MAX_ELEMENTS_LAYER = 3; // layered + const int MAX_ELEMENTS_BATCH = 2; // repeated display + const char *dimensionLabels[] = {"Row", "Col", "Layer", "Batch"}; + + printf("\n%s| Content of %s \"%s\" (%d dim)\n", sep, prefix, tensor->name, ggml_n_dims(tensor)); + printf("| Total Elements : [ "); + for (int i = 0; i < ggml_n_dims(tensor); i++) + printf("%s:%-3" PRId64 " ", dimensionLabels[i], tensor->ne[i]); + printf("]\n%s", sep); + + int n_dims = ggml_n_dims(tensor); + + if (n_dims == 1) { + printf("| 1: "); + for(int i = 0; i < tensor->ne[0] && i < MAX_ELEMENTS_ROW; i++){ + printf("%-7.4f, ", *(double *)((char *) tensor_data + i*tensor->nb[0])); + } + if(MAX_ELEMENTS_ROW < tensor->ne[0]) printf(", .."); + printf("\n%s", sep); + } + else if (n_dims == 2) { + for(int i = 0; i < tensor->ne[0] && i < MAX_ELEMENTS_ROW; i++){ + printf("| %d: ", i+1); + for(int j = 0; j < tensor->ne[1] && j < MAX_ELEMENTS_COL; j++){ + printf("%-7.4f ", *(double *)((char *) tensor_data + i*tensor->nb[0] + j*tensor->nb[1])); + if(j == MAX_ELEMENTS_COL - 1 && tensor->ne[1] > MAX_ELEMENTS_COL) printf(", .."); + } + printf("\n"); + } + if(MAX_ELEMENTS_ROW < tensor->ne[0]) printf(" .. additional rows\n"); + printf("%s", sep); + } else if(n_dims == 3) { + for(int i = 0; i < tensor->ne[0] && i < MAX_ELEMENTS_ROW; i++){ + printf("| Row %d: ", i+1); + for(int j = 0; j < tensor->ne[1] && j < MAX_ELEMENTS_COL; j++){ + printf("["); + for(int k = 0; k < tensor->ne[2] && k < MAX_ELEMENTS_LAYER; k++){ + printf("%-7.4f", *(double *)((char *) tensor_data + i*tensor->nb[0] + j*tensor->nb[1] + k*tensor->nb[2])); + if(k < tensor->ne[2] - 1 && k < MAX_ELEMENTS_LAYER - 1) + printf(", "); + } + if(MAX_ELEMENTS_LAYER < tensor->ne[2]) printf(", .."); + printf("] "); + } + printf("\n"); + } + if(MAX_ELEMENTS_ROW < tensor->ne[0]) printf(" ... additional layers\n"); + printf("%s", sep); + } else if(n_dims == 4) { + for(int batch = 0; batch < tensor->ne[0] && batch < MAX_ELEMENTS_BATCH; batch++){ + printf("Batch %d\n", batch+1); + for(int i = 0; i < tensor->ne[1] && i < MAX_ELEMENTS_ROW; i++){ + printf("| Row %d: ", i+1); + for(int j = 0; j < tensor->ne[2] && j < MAX_ELEMENTS_COL; j++){ + printf("["); + for(int k = 0; k < tensor->ne[3] && k < MAX_ELEMENTS_LAYER; k++){ + printf("%-7.4f", *(double *)((char *) tensor_data + batch*tensor->nb[0] + i*tensor->nb[1] + j*tensor->nb[2] + k*tensor->nb[3])); + if(k < tensor->ne[3] - 1 && k < MAX_ELEMENTS_LAYER - 1) + printf(", "); + } + if(MAX_ELEMENTS_LAYER < tensor->ne[3]) printf(", .."); + printf("] "); + } + printf("\n"); + } + if(MAX_ELEMENTS_BATCH < tensor->ne[0]) printf(" ... additional batches\n"); + printf("%s", sep); + } + } + if (tensor->backend != GGML_BACKEND_CPU) + free(tensor_data); +} + +void ggml_tensor_printf(const struct ggml_tensor *tensor, char *prefix, int line, bool extended, bool print_sample) { + char tmp_str[256] = {0}; + int pos=0; + const char *sep = "+----------------------+----------------------+----------------------+----------------------+"; + const char *sep_border = "+======================+======================+======================+======================+"; + printf("%s\n", sep_border); + printf("| %s:%d\n", prefix,line); + printf("| %-32s [%s type]\n", tensor->name, ggml_type_name(tensor->type)); + printf("%s\n", sep); + char strides[256] = {0}; + /** + // nb[0] = sizeof(type) + // nb[1] = nb[0] * ne[0] + padding + // nb[i] = nb[i-1] * ne[i-1] + */ + { + strides[0] = '\0'; + for (int i = 0; i < ggml_n_dims(tensor); i++) { + char dim_str[20]; + snprintf(dim_str, sizeof(dim_str), "%" PRId64, tensor->nb[i]); + strncat(strides, dim_str, sizeof(strides) - strlen(strides) - 1); + if (i != ggml_n_dims(tensor) - 1) { + strncat(strides, "x", sizeof(strides) - strlen(strides) - 1); + } + } + } + + printf("| %-20s | %-20s | %-20s | %-20s |\n", "Dimensions", "Strides", "Layer id", "Backend"); + int layer_id=-1; // tensor->meta structure not available + printf("| %-20d | %-20s | %-20d | %-20s |\n", ggml_n_dims(tensor), strides, layer_id, tensor->backend == GGML_BACKEND_CPU ? "CPU" : ((tensor->backend == GGML_BACKEND_GPU) ? "GPU" : "GPU_SPLIT")); + printf("%s\n", sep); + pos = 0; + for (int i = 0; i < ggml_n_dims(tensor); i++) { + pos += snprintf(tmp_str + pos, sizeof(tmp_str) - pos, "%" PRId64, tensor->ne[i]); + if (i != ggml_n_dims(tensor) - 1) { + pos += snprintf(tmp_str + pos, sizeof(tmp_str) - pos, " x "); + } + } + printf("| %-20s | %-20s | %-20s | %-20s |\n", "Elements", "Src0", "Src1","Operation"); + printf("| %-20s |", tmp_str); + + if (tensor->src[0]) { + pos = 0; + for (int i = 0; i < ggml_n_dims(tensor->src[0]); i++) { + pos += snprintf(tmp_str + pos, sizeof(tmp_str) - pos, "%" PRId64, tensor->src[0]->ne[i]); + if (i != ggml_n_dims(tensor->src[0]) - 1) { + pos += snprintf(tmp_str + pos, sizeof(tmp_str) - pos, " x "); + } + } + printf(" %-20s |", tmp_str); + } else { + printf(" %-20s |", "N/A"); + } + if (tensor->src[1]) { + pos = 0; + for (int i = 0; i < ggml_n_dims(tensor->src[1]); i++) { + pos += snprintf(tmp_str + pos, sizeof(tmp_str) - pos, "%" PRId64, tensor->src[1]->ne[i]); + if (i != ggml_n_dims(tensor->src[1]) - 1) { + pos += snprintf(tmp_str + pos, sizeof(tmp_str) - pos, " x "); + } + } + printf(" %-20s |", tmp_str); + } else { + printf(" %-20s |", "N/A"); + } + printf(" %-20s |", tensor->op > 0 ? ggml_op_name(tensor->op) : "N/A"); + printf("\n"); + printf("%s\n", sep); + + if (extended) { + bool is_transposed = ggml_is_transposed(tensor); + bool is_permuted = ggml_is_permuted(tensor); + bool is_cont = ggml_is_contiguous(tensor); + printf("| %-17s%s | %-17s%s | %-17s%s | %-6s%11.2f MB |\n", "Transposed:", is_transposed ? "Yes" : "No ", "Permuted:", is_permuted ? "Yes" : "No ", "Contiguous:", is_cont ? "Yes" : "No ","Size:", ggml_nbytes(tensor)/(1024.0*1024.0)); + } + + if (extended) { + if (tensor->src[0] && strlen(tensor->src[0]->name)) { + printf("| %-20s | ", "Src0 name:"); + printf("%-66s |\n", tensor->src[0]->name); + } + if (tensor->src[1] && strlen(tensor->src[1]->name)) { + printf("| %-20s | ", "Src1 name:"); + printf("%-66s |\n", tensor->src[1]->name); + } + printf("%s\n\n", sep); + } + + if (print_sample) { + if (extended) { + if (tensor->src[0] && tensor->src[0]->ne[0]) { + ggml_printTensorSample("src0", tensor->src[0]); + } + if (tensor->src[1] && tensor->src[1]->ne[0]) { + ggml_printTensorSample("src1", tensor->src[1]); + } + } + ggml_printTensorSample("dst", tensor); + } + printf("%s\n", sep_border); +} + diff --git a/ggml-backend.h b/ggml-backend.h index ab4ad773f..d20461d82 100644 --- a/ggml-backend.h +++ b/ggml-backend.h @@ -203,6 +203,10 @@ extern "C" { GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr); GGML_API void ggml_backend_view_init(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); + // Tensor Debug + // visualize the tensor - extended adds more information - when printing sample content extended will also print src0 and src1 content + GGML_API void ggml_tensor_printf(const struct ggml_tensor *tensor, char *prefix, int line, bool extended, bool print_sample); + #ifdef __cplusplus } diff --git a/ggml.c b/ggml.c index 376d8fe98..f85045c9c 100644 --- a/ggml.c +++ b/ggml.c @@ -20158,233 +20158,6 @@ void gguf_get_meta_data(const struct gguf_context * ctx, void * data) { //////////////////////////////////////////////////////////////////////////////// -void ggml_printTensorSample(const char *prefix, const struct ggml_tensor * tensor) { - - char *tensor_data; - if (tensor->backend != GGML_BACKEND_CPU) { - // for any mmap solution we can actually set the CPU data of a tensor during load even if it's GPU offloaded - // this shouldn't have a negative effect, worked well in ggllm, saves the need of tensor_get operations for weights - if (tensor->buffer == NULL) { - printf("ggml_printTensorSample: tensor buffer is NULL\n"); - return; - } - tensor_data = (char *) malloc(ggml_nbytes(tensor)); - ggml_backend_tensor_get(tensor, tensor_data, 0, ggml_nbytes(tensor)); - } else - { - tensor_data = tensor->data; - if (tensor_data == NULL) { - printf("ggml_printTensorSample: tensor data is NULL\n"); - return; - } - } - - const char *sep = "+-------------------------------------------------------------------------------------------+\n"; - printf("%s| Content of %s \"%s\" (%d dim)\n", sep, prefix, tensor->name, ggml_n_dims(tensor)); - - const int MAX_ELEMENTS_ROW = 10; - const int MAX_ELEMENTS_COL = 6; - const int MAX_ELEMENTS_LAYER = 3; // layered - const int MAX_ELEMENTS_BATCH = 2; // repeated display - const char *dimensionLabels[] = {"Row", "Col", "Layer", "Batch"}; - - printf("\n%s| Content of %s \"%s\" (%d dim)\n", sep, prefix, tensor->name, ggml_n_dims(tensor)); - printf("| Total Elements : [ "); - for (int i = 0; i < ggml_n_dims(tensor); i++) - printf("%s:%-3" PRId64 " ", dimensionLabels[i], tensor->ne[i]); - printf("]\n%s", sep); - - int n_dims = ggml_n_dims(tensor); - - if (n_dims == 1) { - printf("| 1: "); - for(int i = 0; i < tensor->ne[0] && i < MAX_ELEMENTS_ROW; i++){ - printf("%-7.4f, ", *(double *)((char *) tensor_data + i*tensor->nb[0])); - } - if(MAX_ELEMENTS_ROW < tensor->ne[0]) printf(", .."); - printf("\n%s", sep); - } - else if (n_dims == 2) { - for(int i = 0; i < tensor->ne[0] && i < MAX_ELEMENTS_ROW; i++){ - printf("| %d: ", i+1); - for(int j = 0; j < tensor->ne[1] && j < MAX_ELEMENTS_COL; j++){ - printf("%-7.4f ", *(double *)((char *) tensor_data + i*tensor->nb[0] + j*tensor->nb[1])); - if(j == MAX_ELEMENTS_COL - 1 && tensor->ne[1] > MAX_ELEMENTS_COL) printf(", .."); - } - printf("\n"); - } - if(MAX_ELEMENTS_ROW < tensor->ne[0]) printf(" .. additional rows\n"); - printf("%s", sep); - } else if(n_dims == 3) { - for(int i = 0; i < tensor->ne[0] && i < MAX_ELEMENTS_ROW; i++){ - printf("| Row %d: ", i+1); - for(int j = 0; j < tensor->ne[1] && j < MAX_ELEMENTS_COL; j++){ - printf("["); - for(int k = 0; k < tensor->ne[2] && k < MAX_ELEMENTS_LAYER; k++){ - printf("%-7.4f", *(double *)((char *) tensor_data + i*tensor->nb[0] + j*tensor->nb[1] + k*tensor->nb[2])); - if(k < tensor->ne[2] - 1 && k < MAX_ELEMENTS_LAYER - 1) - printf(", "); - } - if(MAX_ELEMENTS_LAYER < tensor->ne[2]) printf(", .."); - printf("] "); - } - printf("\n"); - } - if(MAX_ELEMENTS_ROW < tensor->ne[0]) printf(" ... additional layers\n"); - printf("%s", sep); - } else if(n_dims == 4) { - for(int batch = 0; batch < tensor->ne[0] && batch < MAX_ELEMENTS_BATCH; batch++){ - printf("Batch %d\n", batch+1); - for(int i = 0; i < tensor->ne[1] && i < MAX_ELEMENTS_ROW; i++){ - printf("| Row %d: ", i+1); - for(int j = 0; j < tensor->ne[2] && j < MAX_ELEMENTS_COL; j++){ - printf("["); - for(int k = 0; k < tensor->ne[3] && k < MAX_ELEMENTS_LAYER; k++){ - printf("%-7.4f", *(double *)((char *) tensor_data + batch*tensor->nb[0] + i*tensor->nb[1] + j*tensor->nb[2] + k*tensor->nb[3])); - if(k < tensor->ne[3] - 1 && k < MAX_ELEMENTS_LAYER - 1) - printf(", "); - } - if(MAX_ELEMENTS_LAYER < tensor->ne[3]) printf(", .."); - printf("] "); - } - printf("\n"); - } - if(MAX_ELEMENTS_BATCH < tensor->ne[0]) printf(" ... additional batches\n"); - printf("%s", sep); - } - } - if (tensor->backend != GGML_BACKEND_CPU) - free(tensor_data); -} - -void ggml_tensor_printf(const struct ggml_tensor *tensor, char *prefix, int line, bool extended, bool print_sample) { - char tmp_str[256] = {0}; - int pos=0; - const char *sep = "+----------------------+----------------------+----------------------+----------------------+"; - const char *sep_border = "+======================+======================+======================+======================+"; - printf("%s\n", sep_border); - printf("| %s:%d\n", prefix,line); - printf("| %-32s [%s type]\n", tensor->name, ggml_type_name(tensor->type)); - printf("%s\n", sep); - char strides[256] = {0}; - /** - // nb[0] = sizeof(type) - // nb[1] = nb[0] * ne[0] + padding - // nb[i] = nb[i-1] * ne[i-1] - */ - { - strides[0] = '\0'; - for (int i = 0; i < ggml_n_dims(tensor); i++) { - char dim_str[20]; - snprintf(dim_str, sizeof(dim_str), "%" PRId64, tensor->nb[i]); - strncat(strides, dim_str, sizeof(strides) - strlen(strides) - 1); - if (i != ggml_n_dims(tensor) - 1) { - strncat(strides, "x", sizeof(strides) - strlen(strides) - 1); - } - } - } - - printf("| %-20s | %-20s | %-20s | %-20s |\n", "Dimensions", "Strides", "Layer id", "Backend"); - int layer_id=-1; // tensor->meta structure not available - printf("| %-20d | %-20s | %-20d | %-20s |\n", ggml_n_dims(tensor), strides, layer_id, tensor->backend == GGML_BACKEND_CPU ? "CPU" : ((tensor->backend == GGML_BACKEND_GPU) ? "GPU" : "GPU_SPLIT")); - printf("%s\n", sep); - pos = 0; - for (int i = 0; i < ggml_n_dims(tensor); i++) { - pos += snprintf(tmp_str + pos, sizeof(tmp_str) - pos, "%" PRId64, tensor->ne[i]); - if (i != ggml_n_dims(tensor) - 1) { - pos += snprintf(tmp_str + pos, sizeof(tmp_str) - pos, " x "); - } - } - printf("| %-20s | %-20s | %-20s | %-20s |\n", "Elements", "Src0", "Src1","Operation"); - printf("| %-20s |", tmp_str); - - if (tensor->src[0]) { - pos = 0; - for (int i = 0; i < ggml_n_dims(tensor->src[0]); i++) { - pos += snprintf(tmp_str + pos, sizeof(tmp_str) - pos, "%" PRId64, tensor->src[0]->ne[i]); - if (i != ggml_n_dims(tensor->src[0]) - 1) { - pos += snprintf(tmp_str + pos, sizeof(tmp_str) - pos, " x "); - } - } - printf(" %-20s |", tmp_str); - } else { - printf(" %-20s |", "N/A"); - } - if (tensor->src[1]) { - pos = 0; - for (int i = 0; i < ggml_n_dims(tensor->src[1]); i++) { - pos += snprintf(tmp_str + pos, sizeof(tmp_str) - pos, "%" PRId64, tensor->src[1]->ne[i]); - if (i != ggml_n_dims(tensor->src[1]) - 1) { - pos += snprintf(tmp_str + pos, sizeof(tmp_str) - pos, " x "); - } - } - printf(" %-20s |", tmp_str); - } else { - printf(" %-20s |", "N/A"); - } - printf(" %-20s |", tensor->op > 0 ? GGML_OP_NAME[tensor->op] : "N/A"); - printf("\n"); - printf("%s\n", sep); - - if (extended) { - bool is_transposed = ggml_is_transposed(tensor); - bool is_permuted = ggml_is_permuted(tensor); - bool is_cont = ggml_is_contiguous(tensor); - printf("| %-17s%s | %-17s%s | %-17s%s | %-6s%11.2f MB |\n", "Transposed:", is_transposed ? "Yes" : "No ", "Permuted:", is_permuted ? "Yes" : "No ", "Contiguous:", is_cont ? "Yes" : "No ","Size:", ggml_nbytes(tensor)/(1024.0*1024.0)); - } - - if (extended) { - if (tensor->src[0] && strlen(tensor->src[0]->name)) { - printf("| %-20s | ", "Src0 name:"); - printf("%-66s |\n", tensor->src[0]->name); - } - if (tensor->src[1] && strlen(tensor->src[1]->name)) { - printf("| %-20s | ", "Src1 name:"); - printf("%-66s |\n", tensor->src[1]->name); - } - printf("%s\n\n", sep); - } - - if (print_sample) { - if (extended) { - if (tensor->src[0] && tensor->src[0]->ne[0]) { - ggml_printTensorSample("src0", tensor->src[0]); - } - if (tensor->src[1] && tensor->src[1]->ne[0]) { - ggml_printTensorSample("src1", tensor->src[1]); - } - } - ggml_printTensorSample("dst", tensor); - } - printf("%s\n", sep_border); -} - -float ggml_get_tensor_index(const struct ggml_tensor* tensor, int ind1, int ind2, int ind3, int ind4) { - int n_dims = ggml_n_dims(tensor); - if (n_dims < 1 || n_dims > 4) { - printf("Error: Incorrect dimension number %d\n", n_dims); - return -1; // handle error - } - - int indices[4] = {ind1, ind2, ind3, ind4}; - int total_offset = 0; - - for (int i = 0; i < n_dims; i++) { - if (indices[i] >= tensor->ne[i] || indices[i] < 0) { - printf("Error: Incorrect index for dimension %d\n", i); - printf("Index: %d, Dimension size: %" PRId64 "\n", indices[i], tensor->ne[i]); - return -1; // handle error - } - - total_offset += indices[i] * tensor->nb[i]; - } - - // Return the value at the calculated offset - return *(float *)((char *) tensor->data + total_offset); -} - -//////////////////////////////////////////////////////////////////////////////// - int ggml_cpu_has_avx(void) { #if defined(__AVX__) return 1; diff --git a/ggml.h b/ggml.h index 6c501e170..dca7bd9ce 100644 --- a/ggml.h +++ b/ggml.h @@ -1899,10 +1899,6 @@ extern "C" { // dump the graph into a file using the dot format GGML_API void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename); - // visualize the tensor - extended adds more information - when printing sample content extended will also print src0 and src1 content - // example: ggml_tensor_printf(some_ggml_tensor,"function_name",0,true,true); - void ggml_tensor_printf(const struct ggml_tensor *tensor, char *prefix, int line, bool extended, bool print_sample); - // build gradient checkpointing backward graph gb for gf using provided checkpoints // gb_tmp will contain original backward graph with rewritten backward process nodes, // but without the second forward pass nodes. From 8ccb0d69cd0f627a94b31ab1412b3f8fb04fe5a4 Mon Sep 17 00:00:00 2001 From: John <78893154+cmp-nct@users.noreply.github.com> Date: Tue, 23 Jan 2024 01:09:19 +0100 Subject: [PATCH 05/10] trailing ws --- ggml-backend.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ggml-backend.c b/ggml-backend.c index 8f828564d..86218e740 100644 --- a/ggml-backend.c +++ b/ggml-backend.c @@ -1743,10 +1743,10 @@ void ggml_printTensorSample(const char *prefix, const struct ggml_tensor * tenso return; } } - + const char *sep = "+-------------------------------------------------------------------------------------------+\n"; printf("%s| Content of %s \"%s\" (%d dim)\n", sep, prefix, tensor->name, ggml_n_dims(tensor)); - + const int MAX_ELEMENTS_ROW = 10; const int MAX_ELEMENTS_COL = 6; const int MAX_ELEMENTS_LAYER = 3; // layered @@ -1787,7 +1787,7 @@ void ggml_printTensorSample(const char *prefix, const struct ggml_tensor * tenso printf("["); for(int k = 0; k < tensor->ne[2] && k < MAX_ELEMENTS_LAYER; k++){ printf("%-7.4f", *(double *)((char *) tensor_data + i*tensor->nb[0] + j*tensor->nb[1] + k*tensor->nb[2])); - if(k < tensor->ne[2] - 1 && k < MAX_ELEMENTS_LAYER - 1) + if(k < tensor->ne[2] - 1 && k < MAX_ELEMENTS_LAYER - 1) printf(", "); } if(MAX_ELEMENTS_LAYER < tensor->ne[2]) printf(", .."); @@ -1806,7 +1806,7 @@ void ggml_printTensorSample(const char *prefix, const struct ggml_tensor * tenso printf("["); for(int k = 0; k < tensor->ne[3] && k < MAX_ELEMENTS_LAYER; k++){ printf("%-7.4f", *(double *)((char *) tensor_data + batch*tensor->nb[0] + i*tensor->nb[1] + j*tensor->nb[2] + k*tensor->nb[3])); - if(k < tensor->ne[3] - 1 && k < MAX_ELEMENTS_LAYER - 1) + if(k < tensor->ne[3] - 1 && k < MAX_ELEMENTS_LAYER - 1) printf(", "); } if(MAX_ELEMENTS_LAYER < tensor->ne[3]) printf(", .."); @@ -1862,7 +1862,7 @@ void ggml_tensor_printf(const struct ggml_tensor *tensor, char *prefix, int line } printf("| %-20s | %-20s | %-20s | %-20s |\n", "Elements", "Src0", "Src1","Operation"); printf("| %-20s |", tmp_str); - + if (tensor->src[0]) { pos = 0; for (int i = 0; i < ggml_n_dims(tensor->src[0]); i++) { From 8690363183329f97f509adc92d84916d02a663d7 Mon Sep 17 00:00:00 2001 From: John <78893154+cmp-nct@users.noreply.github.com> Date: Tue, 23 Jan 2024 01:13:39 +0100 Subject: [PATCH 06/10] added inttypes.h --- ggml-backend.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ggml-backend.c b/ggml-backend.c index 86218e740..8180a28c7 100644 --- a/ggml-backend.c +++ b/ggml-backend.c @@ -8,6 +8,7 @@ #include #include #include +#include #define MAX(a, b) ((a) > (b) ? (a) : (b)) From 5122c828695708b701f50d5b1e1781b0d14ec727 Mon Sep 17 00:00:00 2001 From: John <78893154+cmp-nct@users.noreply.github.com> Date: Tue, 23 Jan 2024 01:28:02 +0100 Subject: [PATCH 07/10] bugfix --- ggml-backend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml-backend.c b/ggml-backend.c index 8180a28c7..3ab7970f4 100644 --- a/ggml-backend.c +++ b/ggml-backend.c @@ -1746,7 +1746,7 @@ void ggml_printTensorSample(const char *prefix, const struct ggml_tensor * tenso } const char *sep = "+-------------------------------------------------------------------------------------------+\n"; - printf("%s| Content of %s \"%s\" (%d dim)\n", sep, prefix, tensor->name, ggml_n_dims(tensor)); + //printf("%s| Content of %s \"%s\" (%d dim)\n", sep, prefix, tensor->name, ggml_n_dims(tensor)); const int MAX_ELEMENTS_ROW = 10; const int MAX_ELEMENTS_COL = 6; From 582bddc37a526b261bcc6128b4b8221391fa1c73 Mon Sep 17 00:00:00 2001 From: John <78893154+cmp-nct@users.noreply.github.com> Date: Tue, 23 Jan 2024 01:45:27 +0100 Subject: [PATCH 08/10] changed formating to adaptive exponential format --- ggml-backend.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/ggml-backend.c b/ggml-backend.c index 3ab7970f4..81d16d96b 100644 --- a/ggml-backend.c +++ b/ggml-backend.c @@ -1746,8 +1746,7 @@ void ggml_printTensorSample(const char *prefix, const struct ggml_tensor * tenso } const char *sep = "+-------------------------------------------------------------------------------------------+\n"; - //printf("%s| Content of %s \"%s\" (%d dim)\n", sep, prefix, tensor->name, ggml_n_dims(tensor)); - + const int MAX_ELEMENTS_ROW = 10; const int MAX_ELEMENTS_COL = 6; const int MAX_ELEMENTS_LAYER = 3; // layered @@ -1765,7 +1764,7 @@ void ggml_printTensorSample(const char *prefix, const struct ggml_tensor * tenso if (n_dims == 1) { printf("| 1: "); for(int i = 0; i < tensor->ne[0] && i < MAX_ELEMENTS_ROW; i++){ - printf("%-7.4f, ", *(double *)((char *) tensor_data + i*tensor->nb[0])); + printf("%11.3g, ", *(double *)((char *) tensor_data + i*tensor->nb[0])); } if(MAX_ELEMENTS_ROW < tensor->ne[0]) printf(", .."); printf("\n%s", sep); @@ -1774,7 +1773,7 @@ void ggml_printTensorSample(const char *prefix, const struct ggml_tensor * tenso for(int i = 0; i < tensor->ne[0] && i < MAX_ELEMENTS_ROW; i++){ printf("| %d: ", i+1); for(int j = 0; j < tensor->ne[1] && j < MAX_ELEMENTS_COL; j++){ - printf("%-7.4f ", *(double *)((char *) tensor_data + i*tensor->nb[0] + j*tensor->nb[1])); + printf("%11.4g ", *(double *)((char *) tensor_data + i*tensor->nb[0] + j*tensor->nb[1])); if(j == MAX_ELEMENTS_COL - 1 && tensor->ne[1] > MAX_ELEMENTS_COL) printf(", .."); } printf("\n"); @@ -1787,7 +1786,7 @@ void ggml_printTensorSample(const char *prefix, const struct ggml_tensor * tenso for(int j = 0; j < tensor->ne[1] && j < MAX_ELEMENTS_COL; j++){ printf("["); for(int k = 0; k < tensor->ne[2] && k < MAX_ELEMENTS_LAYER; k++){ - printf("%-7.4f", *(double *)((char *) tensor_data + i*tensor->nb[0] + j*tensor->nb[1] + k*tensor->nb[2])); + printf("%11.4g", *(double *)((char *) tensor_data + i*tensor->nb[0] + j*tensor->nb[1] + k*tensor->nb[2])); if(k < tensor->ne[2] - 1 && k < MAX_ELEMENTS_LAYER - 1) printf(", "); } @@ -1806,7 +1805,7 @@ void ggml_printTensorSample(const char *prefix, const struct ggml_tensor * tenso for(int j = 0; j < tensor->ne[2] && j < MAX_ELEMENTS_COL; j++){ printf("["); for(int k = 0; k < tensor->ne[3] && k < MAX_ELEMENTS_LAYER; k++){ - printf("%-7.4f", *(double *)((char *) tensor_data + batch*tensor->nb[0] + i*tensor->nb[1] + j*tensor->nb[2] + k*tensor->nb[3])); + printf("%11.4f", *(double *)((char *) tensor_data + batch*tensor->nb[0] + i*tensor->nb[1] + j*tensor->nb[2] + k*tensor->nb[3])); if(k < tensor->ne[3] - 1 && k < MAX_ELEMENTS_LAYER - 1) printf(", "); } From 0177431cb2229c97ba7a4af07aa4039969171670 Mon Sep 17 00:00:00 2001 From: John <78893154+cmp-nct@users.noreply.github.com> Date: Tue, 23 Jan 2024 01:46:49 +0100 Subject: [PATCH 09/10] ws --- ggml-backend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml-backend.c b/ggml-backend.c index 81d16d96b..b91a8e15b 100644 --- a/ggml-backend.c +++ b/ggml-backend.c @@ -1746,7 +1746,7 @@ void ggml_printTensorSample(const char *prefix, const struct ggml_tensor * tenso } const char *sep = "+-------------------------------------------------------------------------------------------+\n"; - + const int MAX_ELEMENTS_ROW = 10; const int MAX_ELEMENTS_COL = 6; const int MAX_ELEMENTS_LAYER = 3; // layered From b639e2a73fbf1b67adfe7d87ca3c0250d0f3266b Mon Sep 17 00:00:00 2001 From: John <78893154+cmp-nct@users.noreply.github.com> Date: Thu, 8 Feb 2024 03:00:00 +0100 Subject: [PATCH 10/10] Bugfix printf tensor Added a function to access the tensor data by index (like an array) and print the value as float/int. Updated the tensor print loops to use the new function. This fixes two bugs: 1) the previous printf didn't work after a quickfix 2) the previous printf did not show rows/columns in the right place --- ggml-backend.c | 85 ++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 72 insertions(+), 13 deletions(-) diff --git a/ggml-backend.c b/ggml-backend.c index f820c3eac..10cc6de0b 100644 --- a/ggml-backend.c +++ b/ggml-backend.c @@ -1836,22 +1836,80 @@ bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t return true; } -void ggml_printTensorSample(const char *prefix, const struct ggml_tensor * tensor) { +// printf one number from a tensor (cont only) like an array index[0][1][2][3] +void ggml_print_f32_index(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3) { + void * data; + if (!ggml_is_contiguous(tensor)) { + int64_t id[4] = {i0, i1, i2, i3}; + int index_cont = i0*tensor->ne[3]*tensor->ne[2]*tensor->ne[1]*tensor->nb[0] + i1*tensor->ne[3]*tensor->ne[2]*tensor->nb[1] + i2*tensor->ne[3]*tensor->nb[2] + i3*tensor->nb[3]; + ggml_unravel_index(tensor, index_cont, &id[0], &id[1], &id[2], &id[3]); // untested + printf("NONCONT"); + return; + i0 = id[0]; + i1 = id[1]; + i2 = id[2]; + i3 = id[3]; + data = (char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1] + i2*tensor->nb[2] + i3*tensor->nb[3]; // untested + } else + { + data = (char *) tensor->data + i0*tensor->ne[3]*tensor->ne[2]*tensor->ne[1]*tensor->nb[0] + i1*tensor->ne[3]*tensor->ne[2]*tensor->nb[1] + i2*tensor->ne[3]*tensor->nb[2] + i3*tensor->nb[3]; + // void * data = (char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1] + i2*tensor->nb[2] + i3*tensor->nb[3]; + } + + + switch (tensor->type) { + case GGML_TYPE_I8: + printf("%8d", *((int8_t *) data)); + break; + case GGML_TYPE_I16: + printf("%8d", *((int16_t *) data)); + break; + case GGML_TYPE_I32: + printf("%8d", *((int32_t *) data)); - char *tensor_data; - if (tensor->backend != GGML_BACKEND_CPU) { + break; + case GGML_TYPE_F16: + printf("%8.4f", GGML_FP16_TO_FP32(*((ggml_fp16_t *) data))); + break; + case GGML_TYPE_F32: + printf("%8.4f", *((float *) data)); + break; + default: + printf("UNKTYPE"); + break; + } +} +void ggml_printTensorSample(const char *prefix, const struct ggml_tensor * tensor_in) { + struct ggml_tensor tensor_dummy; // avoid any ctx + struct ggml_tensor * tensor = &tensor_dummy; + + if (tensor_in->backend != GGML_BACKEND_CPU) { // for any mmap solution we can actually set the CPU data of a tensor during load even if it's GPU offloaded // this shouldn't have a negative effect, worked well in ggllm, saves the need of tensor_get operations for weights - if (tensor->buffer == NULL) { + if (tensor_in->buffer == NULL) { printf("ggml_printTensorSample: tensor buffer is NULL\n"); return; } - tensor_data = (char *) malloc(ggml_nbytes(tensor)); - ggml_backend_tensor_get(tensor, tensor_data, 0, ggml_nbytes(tensor)); + tensor->data = (char *) malloc(ggml_nbytes(tensor_in)); + ggml_backend_tensor_get(tensor_in, tensor->data, 0, ggml_nbytes(tensor_in)); + memcpy(tensor->name, tensor_in->name, sizeof(tensor->name)); + tensor->type = tensor_in->type; + tensor->ne[0] = tensor_in->ne[0]; + tensor->ne[1] = tensor_in->ne[1]; + tensor->ne[2] = tensor_in->ne[2]; + tensor->ne[3] = tensor_in->ne[3]; + tensor->nb[0] = tensor_in->nb[0]; + tensor->nb[1] = tensor_in->nb[1]; + tensor->nb[2] = tensor_in->nb[2]; + tensor->nb[3] = tensor_in->nb[3]; + tensor->backend = GGML_BACKEND_CPU; + tensor->op = tensor_in->op; + tensor->view_offs = tensor_in->view_offs; + tensor->view_src = tensor_in->view_src; } else { - tensor_data = tensor->data; - if (tensor_data == NULL) { + tensor = tensor_in; + if (tensor->data == NULL) { printf("ggml_printTensorSample: tensor data is NULL\n"); return; } @@ -1876,7 +1934,8 @@ void ggml_printTensorSample(const char *prefix, const struct ggml_tensor * tenso if (n_dims == 1) { printf("| 1: "); for(int i = 0; i < tensor->ne[0] && i < MAX_ELEMENTS_ROW; i++){ - printf("%11.3g, ", *(double *)((char *) tensor_data + i*tensor->nb[0])); + ggml_print_f32_index(tensor, i, 0, 0, 0); + if(i == MAX_ELEMENTS_ROW - 1 && tensor->ne[0] > MAX_ELEMENTS_ROW) printf(", .."); } if(MAX_ELEMENTS_ROW < tensor->ne[0]) printf(", .."); printf("\n%s", sep); @@ -1885,7 +1944,7 @@ void ggml_printTensorSample(const char *prefix, const struct ggml_tensor * tenso for(int i = 0; i < tensor->ne[0] && i < MAX_ELEMENTS_ROW; i++){ printf("| %d: ", i+1); for(int j = 0; j < tensor->ne[1] && j < MAX_ELEMENTS_COL; j++){ - printf("%11.4g ", *(double *)((char *) tensor_data + i*tensor->nb[0] + j*tensor->nb[1])); + ggml_print_f32_index(tensor, i, j, 0, 0); if(j == MAX_ELEMENTS_COL - 1 && tensor->ne[1] > MAX_ELEMENTS_COL) printf(", .."); } printf("\n"); @@ -1898,7 +1957,7 @@ void ggml_printTensorSample(const char *prefix, const struct ggml_tensor * tenso for(int j = 0; j < tensor->ne[1] && j < MAX_ELEMENTS_COL; j++){ printf("["); for(int k = 0; k < tensor->ne[2] && k < MAX_ELEMENTS_LAYER; k++){ - printf("%11.4g", *(double *)((char *) tensor_data + i*tensor->nb[0] + j*tensor->nb[1] + k*tensor->nb[2])); + ggml_print_f32_index(tensor, i, j, k, 0); if(k < tensor->ne[2] - 1 && k < MAX_ELEMENTS_LAYER - 1) printf(", "); } @@ -1917,7 +1976,7 @@ void ggml_printTensorSample(const char *prefix, const struct ggml_tensor * tenso for(int j = 0; j < tensor->ne[2] && j < MAX_ELEMENTS_COL; j++){ printf("["); for(int k = 0; k < tensor->ne[3] && k < MAX_ELEMENTS_LAYER; k++){ - printf("%11.4f", *(double *)((char *) tensor_data + batch*tensor->nb[0] + i*tensor->nb[1] + j*tensor->nb[2] + k*tensor->nb[3])); + ggml_print_f32_index(tensor, batch, i, j, k); if(k < tensor->ne[3] - 1 && k < MAX_ELEMENTS_LAYER - 1) printf(", "); } @@ -1931,7 +1990,7 @@ void ggml_printTensorSample(const char *prefix, const struct ggml_tensor * tenso } } if (tensor->backend != GGML_BACKEND_CPU) - free(tensor_data); + free(tensor->data); } void ggml_tensor_printf(const struct ggml_tensor *tensor, char *prefix, int line, bool extended, bool print_sample) {