add debug prints for training memory improvements
This commit is contained in:
parent
be7e564b11
commit
620275361d
2 changed files with 76 additions and 1 deletions
25
ggml-alloc.c
25
ggml-alloc.c
|
@ -162,12 +162,22 @@ void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor)
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
if ((char*)addr - (char*)alloc->data + size > alloc->max_size) {
|
||||||
|
printf("%s: op=%s name=%s max_size=%zu\n", __func__, ggml_op_name(tensor->op), ggml_get_name(tensor), (char*)addr - (char*)alloc->data + size);
|
||||||
|
}
|
||||||
alloc->max_size = MAX(alloc->max_size, (char*)addr - (char*)alloc->data + size);
|
alloc->max_size = MAX(alloc->max_size, (char*)addr - (char*)alloc->data + size);
|
||||||
}
|
}
|
||||||
|
|
||||||
// this is a very naive implementation, but for our case the number of free blocks should be very small
|
// this is a very naive implementation, but for our case the number of free blocks should be very small
|
||||||
static void ggml_allocator_free_tensor(struct ggml_allocr * alloc, struct ggml_tensor * tensor) {
|
static void ggml_allocator_free_tensor(struct ggml_allocr * alloc, struct ggml_tensor * tensor) {
|
||||||
|
// static int counter = 0;
|
||||||
|
// counter++;
|
||||||
|
// if (counter > 2) {
|
||||||
|
// printf("%s: counter=%d OMIT\n", __func__, counter);
|
||||||
|
// return;
|
||||||
|
// } else {
|
||||||
|
// printf("%s: counter=%d\n", __func__, counter);
|
||||||
|
// }
|
||||||
void * ptr = tensor->data;
|
void * ptr = tensor->data;
|
||||||
|
|
||||||
if (ptr < alloc->data || (char*)ptr >= (char*)alloc->data + alloc->max_size) {
|
if (ptr < alloc->data || (char*)ptr >= (char*)alloc->data + alloc->max_size) {
|
||||||
|
@ -179,6 +189,7 @@ static void ggml_allocator_free_tensor(struct ggml_allocr * alloc, struct ggml_t
|
||||||
|
|
||||||
size_t size = ggml_allocator_get_alloc_size(alloc, tensor);
|
size_t size = ggml_allocator_get_alloc_size(alloc, tensor);
|
||||||
size = aligned_offset(NULL, size, alloc->alignment);
|
size = aligned_offset(NULL, size, alloc->alignment);
|
||||||
|
// printf("%s: free data=[%p..%p] op=%s name=%s n_free_blocks=%d\n", __func__, tensor->data, (char*) tensor->data + size, ggml_op_name(tensor->op), ggml_get_name(tensor), alloc->n_free_blocks);
|
||||||
AT_PRINTF("%s: freeing %s (%zu bytes) - n_free_blocks = %d\n", __func__, tensor->name, size, alloc->n_free_blocks);
|
AT_PRINTF("%s: freeing %s (%zu bytes) - n_free_blocks = %d\n", __func__, tensor->name, size, alloc->n_free_blocks);
|
||||||
|
|
||||||
#ifdef GGML_ALLOCATOR_DEBUG
|
#ifdef GGML_ALLOCATOR_DEBUG
|
||||||
|
@ -478,11 +489,23 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
|
||||||
if (parent == NULL) {
|
if (parent == NULL) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
bool was_null = parent->data == NULL;
|
||||||
allocate_node(alloc, parent);
|
allocate_node(alloc, parent);
|
||||||
|
// if (was_null) {
|
||||||
|
// printf("%s: alloc n[%02d] %d data=[%p..%p] %s %s\n", __func__, i, j, parent->data, (char*) parent->data + ggml_nbytes(parent), ggml_op_name(parent->op), ggml_get_name(parent));
|
||||||
|
// } else {
|
||||||
|
// printf("%s: exist n[%02d] %d data=[%p..%p] %s %s\n", __func__, i, j, parent->data, (char*) parent->data + ggml_nbytes(parent), ggml_op_name(parent->op), ggml_get_name(parent));
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
// allocate node
|
// allocate node
|
||||||
|
bool was_null = node->data == NULL;
|
||||||
allocate_node(alloc, node);
|
allocate_node(alloc, node);
|
||||||
|
// if (was_null) {
|
||||||
|
// printf("%s: alloc node[%02d] data=[%p..%p] %s %s\n", __func__, i, node->data, (char*) node->data + ggml_nbytes(node), ggml_op_name(node->op), ggml_get_name(node));
|
||||||
|
// } else {
|
||||||
|
// printf("%s: exist node[%02d] data=[%p..%p] %s %s\n", __func__, i, node->data, (char*) node->data + ggml_nbytes(node), ggml_op_name(node->op), ggml_get_name(node));
|
||||||
|
// }
|
||||||
|
|
||||||
AT_PRINTF("exec: %s (%s) <= ", ggml_op_name(node->op), node->name);
|
AT_PRINTF("exec: %s (%s) <= ", ggml_op_name(node->op), node->name);
|
||||||
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
||||||
|
|
52
ggml.c
52
ggml.c
|
@ -16557,6 +16557,7 @@ void ggml_graph_reset(struct ggml_cgraph * cgraph) {
|
||||||
struct ggml_tensor * grad = cgraph->grads[i];
|
struct ggml_tensor * grad = cgraph->grads[i];
|
||||||
|
|
||||||
if (grad) {
|
if (grad) {
|
||||||
|
// printf("%s: set_zero data=[%p] op=%s name=%s\n", __func__, grad->data, ggml_op_name(grad->op), ggml_get_name(grad));
|
||||||
ggml_set_zero(grad);
|
ggml_set_zero(grad);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -17312,6 +17313,48 @@ static void ggml_opt_get_grad(int np, struct ggml_tensor * const ps[], float * g
|
||||||
// ref: https://arxiv.org/pdf/1412.6980.pdf
|
// ref: https://arxiv.org/pdf/1412.6980.pdf
|
||||||
//
|
//
|
||||||
|
|
||||||
|
uint32_t compute_data_checksum(struct ggml_tensor * tensor) {
|
||||||
|
const int n3 = (tensor->n_dims >= 3) ? tensor->ne[3] : 1;
|
||||||
|
const int n2 = (tensor->n_dims >= 2) ? tensor->ne[2] : 1;
|
||||||
|
const int n1 = (tensor->n_dims >= 1) ? tensor->ne[1] : 1;
|
||||||
|
const int n0 = (tensor->n_dims >= 0) ? tensor->ne[0] : 1;
|
||||||
|
const size_t nb0 = tensor->nb[0];
|
||||||
|
const size_t nb1 = tensor->nb[1];
|
||||||
|
const size_t nb2 = tensor->nb[2];
|
||||||
|
const size_t nb3 = tensor->nb[3];
|
||||||
|
const size_t nb = ggml_element_size(tensor);
|
||||||
|
uint32_t result = 0;
|
||||||
|
for (int i3 = 0; i3 < n3; ++i3) {
|
||||||
|
for (int i2 = 0; i2 < n2; ++i2) {
|
||||||
|
for (int i1 = 0; i1 < n1; ++i1) {
|
||||||
|
for (int i0 = 0; i0 < n0; ++i0) {
|
||||||
|
char * ptr = ((char *) tensor->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
|
||||||
|
uint32_t val;
|
||||||
|
memcpy(&val, ptr, nb);
|
||||||
|
result = result ^ val;
|
||||||
|
result = (((result << 1u) | ((result >> 31u) & 0x1u)) + 1u) & 0xffffffffu;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void print_data_checksums(struct ggml_cgraph * g) {
|
||||||
|
for (int i = 0; i < g->n_nodes; ++i) {
|
||||||
|
struct ggml_tensor * node = g->nodes[i];
|
||||||
|
for (int j = 0; j<GGML_MAX_SRC; ++j) {
|
||||||
|
if (node->src[j]) {
|
||||||
|
struct ggml_tensor * src = node->src[j];
|
||||||
|
uint32_t chk = compute_data_checksum(src);
|
||||||
|
printf("%s: node[%3d]->src[%d] chk=[%08x] data=[%p] op=%s name=%s\n", __func__, i, j, chk, src->data, ggml_op_name(src->op), ggml_get_name(src));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
uint32_t chk = compute_data_checksum(node);
|
||||||
|
printf("%s: node[%3d] chk=[%08x] data=[%p] op=%s name=%s\n", __func__, i, chk, node->data, ggml_op_name(node->op), ggml_get_name(node));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static enum ggml_opt_result ggml_opt_adam(
|
static enum ggml_opt_result ggml_opt_adam(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_opt_context * opt,
|
struct ggml_opt_context * opt,
|
||||||
|
@ -17373,6 +17416,8 @@ static enum ggml_opt_result ggml_opt_adam(
|
||||||
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
||||||
ggml_graph_compute(gb, &cplan);
|
ggml_graph_compute(gb, &cplan);
|
||||||
|
|
||||||
|
print_data_checksums(gb);
|
||||||
|
|
||||||
opt->adam.fx_prev = ggml_get_f32_1d(f, 0);
|
opt->adam.fx_prev = ggml_get_f32_1d(f, 0);
|
||||||
opt->adam.fx_best = opt->adam.fx_prev;
|
opt->adam.fx_best = opt->adam.fx_prev;
|
||||||
if (pf) {
|
if (pf) {
|
||||||
|
@ -17434,6 +17479,8 @@ static enum ggml_opt_result ggml_opt_adam(
|
||||||
const float beta2h = 1.0f/(1.0f - powf(beta2, opt->iter));
|
const float beta2h = 1.0f/(1.0f - powf(beta2, opt->iter));
|
||||||
int64_t i = 0;
|
int64_t i = 0;
|
||||||
for (int p = 0; p < np; ++p) {
|
for (int p = 0; p < np; ++p) {
|
||||||
|
printf("%s: para[%3d] chk=[%08x] op=%s name=%s\n", __func__, p, compute_data_checksum(ps[p]), ggml_op_name(ps[p]->op), ggml_get_name(ps[p]));
|
||||||
|
printf("%s: para[%3d]->grad chk=[%08x] op=%s name=%s\n", __func__, p, compute_data_checksum(ps[p]->grad), ggml_op_name(ps[p]->grad->op), ggml_get_name(ps[p]->grad));
|
||||||
const int64_t ne = ggml_nelements(ps[p]);
|
const int64_t ne = ggml_nelements(ps[p]);
|
||||||
const float p_decay = ((ps[p]->n_dims >= decay_min_ndim) ? decay : 0.0) * sched;
|
const float p_decay = ((ps[p]->n_dims >= decay_min_ndim) ? decay : 0.0) * sched;
|
||||||
for (int64_t j = 0; j < ne; ++j) {
|
for (int64_t j = 0; j < ne; ++j) {
|
||||||
|
@ -17512,6 +17559,11 @@ static enum ggml_opt_result ggml_opt_adam(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
print_data_checksums(gb);
|
||||||
|
for (int p = 0; p < np; ++p) {
|
||||||
|
printf("%s: para[%3d] chk=[%08x] op=%s name=%s\n", __func__, p, compute_data_checksum(ps[p]), ggml_op_name(ps[p]->op), ggml_get_name(ps[p]));
|
||||||
|
printf("%s: para[%3d]->grad chk=[%08x] op=%s name=%s\n", __func__, p, compute_data_checksum(ps[p]->grad), ggml_op_name(ps[p]->grad->op), ggml_get_name(ps[p]->grad));
|
||||||
|
}
|
||||||
return GGML_OPT_DID_NOT_CONVERGE;
|
return GGML_OPT_DID_NOT_CONVERGE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue