fixing bug that didnt unroll the 1d karpathy arrays
This commit is contained in:
parent
5a87675db4
commit
aebccdbf00
1 changed files with 70 additions and 50 deletions
|
@ -45,8 +45,8 @@ typedef struct {
|
||||||
// final rmsnorm
|
// final rmsnorm
|
||||||
float* rms_final_weight; // (dim,)
|
float* rms_final_weight; // (dim,)
|
||||||
// freq_cis for RoPE relatively positional embeddings
|
// freq_cis for RoPE relatively positional embeddings
|
||||||
float* freq_cis_real; // (seq_len, dim/2)
|
// float* freq_cis_real; // (seq_len, dim/2)
|
||||||
float* freq_cis_imag; // (seq_len, dim/2)
|
// float* freq_cis_imag; // (seq_len, dim/2)
|
||||||
// (optional) classifier weights for the logits, on the last layer
|
// (optional) classifier weights for the logits, on the last layer
|
||||||
float* wcls;
|
float* wcls;
|
||||||
} TransformerWeights;
|
} TransformerWeights;
|
||||||
|
@ -63,9 +63,9 @@ int checkpoint_init_weights(TransformerWeights *w, Config* p, FILE* f) {
|
||||||
if (fread(w->w2, sizeof(float), p->n_layers * p->hidden_dim * p->dim, f) != static_cast<size_t>(p->n_layers * p->hidden_dim * p->dim)) return 1;
|
if (fread(w->w2, sizeof(float), p->n_layers * p->hidden_dim * p->dim, f) != static_cast<size_t>(p->n_layers * p->hidden_dim * p->dim)) return 1;
|
||||||
if (fread(w->w3, sizeof(float), p->n_layers * p->dim * p->hidden_dim, f) != static_cast<size_t>(p->n_layers * p->dim * p->hidden_dim)) return 1;
|
if (fread(w->w3, sizeof(float), p->n_layers * p->dim * p->hidden_dim, f) != static_cast<size_t>(p->n_layers * p->dim * p->hidden_dim)) return 1;
|
||||||
if (fread(w->rms_final_weight, sizeof(float), p->dim, f) != static_cast<size_t>(p->dim)) return 1;
|
if (fread(w->rms_final_weight, sizeof(float), p->dim, f) != static_cast<size_t>(p->dim)) return 1;
|
||||||
int head_size = p->dim / p->n_heads;
|
//int head_size = p->dim / p->n_heads;
|
||||||
if (fread(w->freq_cis_real, sizeof(float), p->seq_len * head_size / 2, f) != static_cast<size_t>(p->seq_len * head_size / 2)) return 1;
|
// if (fread(w->freq_cis_real, sizeof(float), p->seq_len * head_size / 2, f) != static_cast<size_t>(p->seq_len * head_size / 2)) return 1;
|
||||||
if (fread(w->freq_cis_imag, sizeof(float), p->seq_len * head_size / 2, f) != static_cast<size_t>(p->seq_len * head_size / 2)) return 1;
|
// if (fread(w->freq_cis_imag, sizeof(float), p->seq_len * head_size / 2, f) != static_cast<size_t>(p->seq_len * head_size / 2)) return 1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -96,7 +96,7 @@ void malloc_weights(TransformerWeights* w, Config* p) {
|
||||||
printf("[%s:AK] Allocating [%d] x [%d] x [%d] = [%d] float space for w->w1\n",__func__,p->n_layers, p->hidden_dim, p->dim, p->n_layers * p->hidden_dim * p->dim);
|
printf("[%s:AK] Allocating [%d] x [%d] x [%d] = [%d] float space for w->w1\n",__func__,p->n_layers, p->hidden_dim, p->dim, p->n_layers * p->hidden_dim * p->dim);
|
||||||
|
|
||||||
w->w2 = new float[p->n_layers * p->hidden_dim * p->dim](); //calloc(p->n_layers * p->dim * p->hidden_dim, sizeof(float));
|
w->w2 = new float[p->n_layers * p->hidden_dim * p->dim](); //calloc(p->n_layers * p->dim * p->hidden_dim, sizeof(float));
|
||||||
printf("[%s:AK] Allocating [%d] x [%d] x [%d] = [%d] float space for w->w2\n",__func__,p->n_layers, p->hidden_dim, p->dim, p->n_layers * p->hidden_dim * p->dim);
|
printf("[%s:AK] Allocating [%d] x [%d] x [%d] = [%d] float space for w->w2\n",__func__,p->n_layers, p->dim, p->hidden_dim, p->n_layers * p->hidden_dim * p->dim);
|
||||||
|
|
||||||
w->w3 = new float[p->n_layers * p->hidden_dim * p->dim](); //calloc(p->n_layers * p->hidden_dim * p->dim, sizeof(float));
|
w->w3 = new float[p->n_layers * p->hidden_dim * p->dim](); //calloc(p->n_layers * p->hidden_dim * p->dim, sizeof(float));
|
||||||
printf("[%s:AK] Allocating [%d] x [%d] x [%d] = [%d] float space for w->w3\n",__func__,p->n_layers, p->hidden_dim, p->dim, p->n_layers * p->hidden_dim * p->dim);
|
printf("[%s:AK] Allocating [%d] x [%d] x [%d] = [%d] float space for w->w3\n",__func__,p->n_layers, p->hidden_dim, p->dim, p->n_layers * p->hidden_dim * p->dim);
|
||||||
|
@ -104,11 +104,11 @@ void malloc_weights(TransformerWeights* w, Config* p) {
|
||||||
w->rms_final_weight = new float[p->dim](); //calloc(p->dim, sizeof(float));
|
w->rms_final_weight = new float[p->dim](); //calloc(p->dim, sizeof(float));
|
||||||
printf("[%s:AK] Allocating [%d] float space for w->rms_final_weight\n",__func__,p->dim);
|
printf("[%s:AK] Allocating [%d] float space for w->rms_final_weight\n",__func__,p->dim);
|
||||||
|
|
||||||
w->freq_cis_real = new float[p->seq_len * p->dim / 2](); //calloc(p->seq_len * p->dim / 2, sizeof(float));
|
// w->freq_cis_real = new float[p->seq_len * p->dim / 2](); //calloc(p->seq_len * p->dim / 2, sizeof(float));
|
||||||
printf("[%s:AK] Allocating [%d] x [%d] = [%d] float space for w->freq_cis_real\n",__func__,p->seq_len, p->dim / 2, p->seq_len * p->dim / 2);
|
// printf("[%s:AK] Allocating [%d] x [%d] = [%d] float space for w->freq_cis_real\n",__func__,p->seq_len, p->dim / 2, p->seq_len * p->dim / 2);
|
||||||
|
|
||||||
w->freq_cis_imag = new float[p->seq_len * p->dim / 2](); //calloc(p->seq_len * p->dim / 2, sizeof(float));
|
// w->freq_cis_imag = new float[p->seq_len * p->dim / 2](); //calloc(p->seq_len * p->dim / 2, sizeof(float));
|
||||||
printf("[%s:AK] Allocating [%d] x [%d] = [%d] float space for w->freq_cis_imag\n\n",__func__,p->seq_len, p->dim / 2, p->seq_len * p->dim / 2);
|
// printf("[%s:AK] Allocating [%d] x [%d] = [%d] float space for w->freq_cis_imag\n\n",__func__,p->seq_len, p->dim / 2, p->seq_len * p->dim / 2);
|
||||||
|
|
||||||
// ensure all mallocs went fine
|
// ensure all mallocs went fine
|
||||||
// if (!w->token_embedding_table || !w->rms_att_weight || !w->rms_ffn_weight
|
// if (!w->token_embedding_table || !w->rms_att_weight || !w->rms_ffn_weight
|
||||||
|
@ -131,8 +131,8 @@ void free_weights(TransformerWeights* w) {
|
||||||
free(w->w2);
|
free(w->w2);
|
||||||
free(w->w3);
|
free(w->w3);
|
||||||
free(w->rms_final_weight);
|
free(w->rms_final_weight);
|
||||||
free(w->freq_cis_real);
|
// free(w->freq_cis_real);
|
||||||
free(w->freq_cis_imag);
|
// free(w->freq_cis_imag);
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_sample_weights(TransformerWeights *w){
|
void print_sample_weights(TransformerWeights *w){
|
||||||
|
@ -149,8 +149,8 @@ void print_sample_weights(TransformerWeights *w){
|
||||||
printf("%f\n", w->w2[0]);
|
printf("%f\n", w->w2[0]);
|
||||||
printf("%f\n", w->w3[0]);
|
printf("%f\n", w->w3[0]);
|
||||||
printf("%f\n", w->rms_att_weight[0]);
|
printf("%f\n", w->rms_att_weight[0]);
|
||||||
printf("%f\n", w->freq_cis_real[0]);
|
// printf("%f\n", w->freq_cis_real[0]);
|
||||||
printf("%f\n", w->freq_cis_imag[0]);
|
// printf("%f\n", w->freq_cis_imag[0]);
|
||||||
printf("------------------------------------------------------------------\n");
|
printf("------------------------------------------------------------------\n");
|
||||||
|
|
||||||
|
|
||||||
|
@ -213,10 +213,10 @@ struct my_llama_model {
|
||||||
struct ggml_tensor * tok_embeddings;
|
struct ggml_tensor * tok_embeddings;
|
||||||
|
|
||||||
struct ggml_tensor * norm;
|
struct ggml_tensor * norm;
|
||||||
// struct ggml_tensor * output;
|
struct ggml_tensor * output;
|
||||||
|
|
||||||
struct ggml_tensor * freq_cis_real;
|
// struct ggml_tensor * freq_cis_real;
|
||||||
struct ggml_tensor * freq_cis_imag;
|
// struct ggml_tensor * freq_cis_imag;
|
||||||
|
|
||||||
std::vector<my_llama_layer> layers;
|
std::vector<my_llama_layer> layers;
|
||||||
|
|
||||||
|
@ -262,33 +262,33 @@ void init_model(struct my_llama_model * model) {
|
||||||
model->norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);
|
model->norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);
|
||||||
printf("[%s:GG] Allocating [%d] float space for model->norm\n",__func__,n_embd);
|
printf("[%s:GG] Allocating [%d] float space for model->norm\n",__func__,n_embd);
|
||||||
|
|
||||||
// model->output = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_vocab);
|
model->output = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_vocab);
|
||||||
// printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for model->output\n",__func__,n_embd, n_vocab, n_embd * n_vocab);
|
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for model->output\n",__func__,n_embd, n_vocab, n_embd * n_vocab);
|
||||||
|
|
||||||
model->freq_cis_real = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_embd/2);
|
// model->freq_cis_real = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_embd/2);
|
||||||
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for model->freq_cis_real\n",__func__,n_embd, n_embd / 2, n_embd * n_embd / 2);
|
// printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for model->freq_cis_real\n",__func__,n_embd, n_embd / 2, n_embd * n_embd / 2);
|
||||||
|
|
||||||
model->freq_cis_imag = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_embd/2);
|
// model->freq_cis_imag = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_embd/2);
|
||||||
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for model->freq_cis_imag\n",__func__,n_embd, n_embd / 2, n_embd * n_embd / 2);
|
// printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for model->freq_cis_imag\n",__func__,n_embd, n_embd / 2, n_embd * n_embd / 2);
|
||||||
|
|
||||||
// printing the per-layer allocations here so we dont print in the for loop.
|
// printing the per-layer allocations here so we dont print in the for loop.
|
||||||
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.wq for [%d] layers\n",__func__,n_embd, n_embd, n_embd * n_embd, n_layer);
|
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.wq for [%d] layers\n",__func__, n_embd, n_embd, n_embd * n_embd, n_layer);
|
||||||
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.wk for [%d] layers\n",__func__,n_embd, n_embd, n_embd * n_embd, n_layer);
|
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.wk for [%d] layers\n",__func__, n_embd, n_embd, n_embd * n_embd, n_layer);
|
||||||
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.wv for [%d] layers\n",__func__,n_embd, n_embd, n_embd * n_embd, n_layer);
|
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.wv for [%d] layers\n",__func__, n_embd, n_embd, n_embd * n_embd, n_layer);
|
||||||
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.wo for [%d] layers\n",__func__,n_embd, n_embd, n_embd * n_embd, n_layer);
|
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.wo for [%d] layers\n",__func__, n_embd, n_embd, n_embd * n_embd, n_layer);
|
||||||
|
|
||||||
printf("[%s:GG] Allocating [%d] float space for layer.ffn_norm for [%d] layers\n",__func__,n_embd, n_layer);
|
printf("[%s:GG] Allocating [%d] float space for layer.ffn_norm for [%d] layers\n",__func__,n_embd, n_layer);
|
||||||
|
|
||||||
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.w1 for [%d] layers\n",__func__,n_embd, n_ff, n_embd * n_ff, n_layer);
|
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.w1 for [%d] layers\n",__func__, n_ff, n_embd, n_embd * n_ff, n_layer);
|
||||||
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.w2 for [%d] layers\n",__func__,n_ff, n_embd, n_ff * n_embd, n_layer);
|
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.w2 for [%d] layers\n",__func__, n_embd, n_ff, n_ff * n_embd, n_layer);
|
||||||
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.w3 for [%d] layers\n",__func__,n_embd, n_ff, n_embd * n_ff, n_layer);
|
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.w3 for [%d] layers\n",__func__, n_ff, n_embd, n_embd * n_ff, n_layer);
|
||||||
|
|
||||||
|
|
||||||
ggml_set_name(model->tok_embeddings, "tok_embeddings.weight");
|
ggml_set_name(model->tok_embeddings, "tok_embeddings.weight");
|
||||||
ggml_set_name(model->norm, "norm.weight");
|
ggml_set_name(model->norm, "norm.weight");
|
||||||
// ggml_set_name(model->output, "output.weight");
|
ggml_set_name(model->output, "output.weight");
|
||||||
ggml_set_name(model->freq_cis_real, "output.freq_cis_real");
|
// ggml_set_name(model->freq_cis_real, "output.freq_cis_real");
|
||||||
ggml_set_name(model->freq_cis_imag, "output.freq_cis_imag");
|
// ggml_set_name(model->freq_cis_imag, "output.freq_cis_imag");
|
||||||
|
|
||||||
model->layers.resize(n_layer);
|
model->layers.resize(n_layer);
|
||||||
for (uint32_t i = 0; i < n_layer; ++i) {
|
for (uint32_t i = 0; i < n_layer; ++i) {
|
||||||
|
@ -305,10 +305,14 @@ void init_model(struct my_llama_model * model) {
|
||||||
|
|
||||||
layer.ffn_norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);
|
layer.ffn_norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);
|
||||||
|
|
||||||
layer.w1 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_ff);
|
layer.w1 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_ff);
|
||||||
layer.w2 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_ff, n_embd);
|
layer.w2 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_ff, n_embd);
|
||||||
layer.w3 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_ff);
|
layer.w3 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_ff);
|
||||||
|
|
||||||
|
// layer.w1 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_ff, n_embd);
|
||||||
|
// layer.w2 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_ff);
|
||||||
|
// layer.w3 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_ff, n_embd);
|
||||||
|
|
||||||
ggml_set_name(layer.attention_norm, (layers_i + ".attention_norm.weight").c_str());
|
ggml_set_name(layer.attention_norm, (layers_i + ".attention_norm.weight").c_str());
|
||||||
|
|
||||||
ggml_set_name(layer.wq, (layers_i + ".attention.wq.weight").c_str());
|
ggml_set_name(layer.wq, (layers_i + ".attention.wq.weight").c_str());
|
||||||
|
@ -352,7 +356,7 @@ int32_t get_i32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) {
|
||||||
void print_row(struct ggml_tensor * probs, int i) {
|
void print_row(struct ggml_tensor * probs, int i) {
|
||||||
for (int k = 0; k < probs->ne[0]; ++k) {
|
for (int k = 0; k < probs->ne[0]; ++k) {
|
||||||
float p = get_f32_2d(probs, k, i);
|
float p = get_f32_2d(probs, k, i);
|
||||||
printf(" %.2f", p);
|
printf(" %f", p);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
@ -656,34 +660,50 @@ void save_as_llama_model(struct llama_vocab * vocab, struct my_llama_model * mod
|
||||||
// w->token_embedding_table -> model->tok_embeddings
|
// w->token_embedding_table -> model->tok_embeddings
|
||||||
// float* -> struct ggml_tensor
|
// float* -> struct ggml_tensor
|
||||||
stuff_karpathy_weights_into_gg(model->tok_embeddings, w->token_embedding_table);
|
stuff_karpathy_weights_into_gg(model->tok_embeddings, w->token_embedding_table);
|
||||||
|
stuff_karpathy_weights_into_gg(model->output, w->token_embedding_table);
|
||||||
// print_row(model->tok_embeddings, 0);
|
// print_row(model->tok_embeddings, 0);
|
||||||
|
|
||||||
stuff_karpathy_weights_into_gg(model->norm, w->rms_final_weight);
|
stuff_karpathy_weights_into_gg(model->norm, w->rms_final_weight);
|
||||||
stuff_karpathy_weights_into_gg(model->freq_cis_real, w->freq_cis_real);
|
print_row(model->norm, 0);
|
||||||
stuff_karpathy_weights_into_gg(model->freq_cis_imag, w->freq_cis_imag);
|
//stuff_karpathy_weights_into_gg(model->freq_cis_real, w->freq_cis_real);
|
||||||
|
//stuff_karpathy_weights_into_gg(model->freq_cis_imag, w->freq_cis_imag);
|
||||||
|
|
||||||
// for rms-att-weight
|
// for rms-att-weight
|
||||||
int row_length = model->hparams.n_embd;
|
int row_length = model->hparams.n_embd;
|
||||||
|
const auto & hparams = model->hparams;
|
||||||
|
int n_ff = get_n_ff(&hparams);
|
||||||
|
//int n_ff = model->hparams.n_embd;
|
||||||
|
//const auto & hparams = model->hparams;
|
||||||
|
//int row_length = get_n_ff(&hparams);
|
||||||
|
|
||||||
for (uint32_t i = 0; i < model->hparams.n_layer; ++i){
|
for (uint32_t i = 0; i < model->hparams.n_layer; ++i){
|
||||||
auto & layer = model->layers[i];
|
auto & layer = model->layers[i];
|
||||||
// 2d
|
// 1d
|
||||||
stuff_karpathy_weights_into_gg(layer.attention_norm, &w->rms_att_weight[i*row_length]);
|
stuff_karpathy_weights_into_gg(layer.attention_norm, &w->rms_att_weight[i*row_length]);
|
||||||
stuff_karpathy_weights_into_gg(layer.ffn_norm , &w->rms_ffn_weight[i*row_length]);
|
stuff_karpathy_weights_into_gg(layer.ffn_norm , &w->rms_ffn_weight[i*row_length]);
|
||||||
stuff_karpathy_weights_into_gg(layer.wq , &w->wq[i*row_length]);
|
|
||||||
stuff_karpathy_weights_into_gg(layer.wk , &w->wk[i*row_length]);
|
// from 3d matrix layer x dim x dim to 2d matrix dim x dim
|
||||||
stuff_karpathy_weights_into_gg(layer.wv , &w->wv[i*row_length]);
|
stuff_karpathy_weights_into_gg(layer.wq , &w->wq[i*row_length*row_length]);
|
||||||
stuff_karpathy_weights_into_gg(layer.wo , &w->wo[i*row_length]);
|
stuff_karpathy_weights_into_gg(layer.wk , &w->wk[i*row_length*row_length]);
|
||||||
stuff_karpathy_weights_into_gg(layer.w1 , &w->w1[i*row_length]);
|
stuff_karpathy_weights_into_gg(layer.wv , &w->wv[i*row_length*row_length]);
|
||||||
stuff_karpathy_weights_into_gg(layer.w2 , &w->w2[i*row_length]);
|
stuff_karpathy_weights_into_gg(layer.wo , &w->wo[i*row_length*row_length]);
|
||||||
stuff_karpathy_weights_into_gg(layer.w3 , &w->w3[i*row_length]);
|
|
||||||
|
//stuff_karpathy_weights_into_gg(layer.w1 , &w->w1[i*row_length]);
|
||||||
|
stuff_karpathy_weights_into_gg(layer.w1 , &w->w1[i*row_length*n_ff]);
|
||||||
|
|
||||||
|
stuff_karpathy_weights_into_gg(layer.w2 , &w->w2[i*n_ff*row_length]);
|
||||||
|
//stuff_karpathy_weights_into_gg(layer.w2 , &w->w2[i*n_ff]);
|
||||||
|
|
||||||
|
//stuff_karpathy_weights_into_gg(layer.w3 , &w->w3[i*row_length]);
|
||||||
|
stuff_karpathy_weights_into_gg(layer.w3 , &w->w3[i*n_ff*row_length]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// write tensors
|
// write tensors
|
||||||
write_tensor(&file, model->tok_embeddings);
|
write_tensor(&file, model->tok_embeddings);
|
||||||
write_tensor(&file, model->norm);
|
write_tensor(&file, model->norm);
|
||||||
// write_tensor(&file, model->output); // ?
|
write_tensor(&file, model->output); // ?
|
||||||
write_tensor(&file, model->freq_cis_real);
|
// write_tensor(&file, model->freq_cis_real);
|
||||||
write_tensor(&file, model->freq_cis_imag);
|
// write_tensor(&file, model->freq_cis_imag);
|
||||||
for (uint32_t i = 0; i < model->hparams.n_layer; ++i) {
|
for (uint32_t i = 0; i < model->hparams.n_layer; ++i) {
|
||||||
auto & layer = model->layers[i];
|
auto & layer = model->layers[i];
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue