gguf : deduplicate (#2629)
* gguf : better type names * dedup : CPU + Metal is working * ggml : fix warnings about unused results * llama.cpp : fix line feed and compiler warning * llama : fix strncpy warning + note token_to_str does not write null * llama : restore the original load/save session implementation Will migrate this to GGUF in the future * convert-llama-h5-to-gguf.py : support alt ctx param name * ggml : assert when using ggml_mul with non-F32 src1 * examples : dedup simple --------- Co-authored-by: klosax <131523366+klosax@users.noreply.github.com>
This commit is contained in:
parent
758ff1bbb5
commit
88b5769487
21 changed files with 1630 additions and 7398 deletions
|
@ -1,5 +1,6 @@
|
|||
#include "ggml.h"
|
||||
#include "llama.h"
|
||||
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
|
@ -502,7 +503,7 @@ bool is_ggml_file(const char *filename) {
|
|||
return false;
|
||||
}
|
||||
uint32_t magic = file.read_u32();
|
||||
return magic == LLAMA_FILE_MAGIC;
|
||||
return magic == GGUF_MAGIC;
|
||||
}
|
||||
|
||||
void load_vocab(const char *filename, Config *config, struct llama_vocab *vocab) {
|
||||
|
@ -590,75 +591,80 @@ void save_as_llama_model(struct llama_vocab * vocab, struct my_llama_model * mod
|
|||
if (file.fp == NULL) {
|
||||
return;
|
||||
}
|
||||
// write_magic
|
||||
file.write_u32(LLAMA_FILE_MAGIC); // magic
|
||||
file.write_u32(LLAMA_FILE_VERSION); // version
|
||||
// write_hparams
|
||||
file.write_u32(model->hparams.n_vocab);
|
||||
file.write_u32(model->hparams.n_embd);
|
||||
file.write_u32(model->hparams.n_mult);
|
||||
file.write_u32(model->hparams.n_head);
|
||||
file.write_u32(model->hparams.n_layer);
|
||||
file.write_u32(model->hparams.n_rot);
|
||||
file.write_u32(LLAMA_FTYPE_ALL_F32);
|
||||
|
||||
// write_vocab - for now we are just writing the existing BPE voc. assuming karpathy's vocabulary is the same. idk.
|
||||
uint32_t n_vocab = model->hparams.n_vocab;
|
||||
for (uint32_t i = 0; i < n_vocab; i++) {
|
||||
const auto & token_score = vocab->id_to_token.at(i);
|
||||
file.write_u32((uint32_t) token_score.tok.size());
|
||||
file.write_raw(token_score.tok.data(), token_score.tok.size());
|
||||
file.write_raw(&token_score.score, sizeof(token_score.score));
|
||||
}
|
||||
|
||||
// stuff AK weights into GG weights one by one.
|
||||
// w->token_embedding_table -> model->tok_embeddings
|
||||
// float* -> struct ggml_tensor
|
||||
stuff_karpathy_weights_into_gg(model->tok_embeddings, w->token_embedding_table);
|
||||
stuff_karpathy_weights_into_gg(model->output, w->token_embedding_table);
|
||||
|
||||
stuff_karpathy_weights_into_gg(model->norm, w->rms_final_weight);
|
||||
//print_row(model->norm, 0);
|
||||
|
||||
// for rms-att-weight
|
||||
int row_length = model->hparams.n_embd;
|
||||
const auto & hparams = model->hparams;
|
||||
//int n_ff = model->hparams.n_embd;
|
||||
int n_ff = get_n_ff(&hparams);
|
||||
|
||||
for (uint32_t i = 0; i < model->hparams.n_layer; ++i){
|
||||
auto & layer = model->layers[i];
|
||||
// 1d
|
||||
stuff_karpathy_weights_into_gg(layer.attention_norm, &w->rms_att_weight[i*row_length]);
|
||||
stuff_karpathy_weights_into_gg(layer.ffn_norm , &w->rms_ffn_weight[i*row_length]);
|
||||
|
||||
// from 3d matrix layer x dim x dim to 2d matrix dim x dim
|
||||
stuff_karpathy_weights_into_gg(layer.wq , &w->wq[i*row_length*row_length]);
|
||||
stuff_karpathy_weights_into_gg(layer.wk , &w->wk[i*row_length*row_length]);
|
||||
stuff_karpathy_weights_into_gg(layer.wv , &w->wv[i*row_length*row_length]);
|
||||
stuff_karpathy_weights_into_gg(layer.wo , &w->wo[i*row_length*row_length]);
|
||||
|
||||
stuff_karpathy_weights_into_gg(layer.w1 , &w->w1[i*row_length*n_ff]);
|
||||
stuff_karpathy_weights_into_gg(layer.w2 , &w->w2[i*n_ff*row_length]);
|
||||
stuff_karpathy_weights_into_gg(layer.w3 , &w->w3[i*row_length*n_ff]);
|
||||
}
|
||||
// write tensors
|
||||
write_tensor(&file, model->tok_embeddings);
|
||||
write_tensor(&file, model->norm);
|
||||
write_tensor(&file, model->output); // ?
|
||||
for (uint32_t i = 0; i < model->hparams.n_layer; ++i) {
|
||||
auto & layer = model->layers[i];
|
||||
|
||||
write_tensor(&file, layer.attention_norm);
|
||||
write_tensor(&file, layer.wq);
|
||||
write_tensor(&file, layer.wk);
|
||||
write_tensor(&file, layer.wv);
|
||||
write_tensor(&file, layer.wo);
|
||||
write_tensor(&file, layer.ffn_norm);
|
||||
write_tensor(&file, layer.w1);
|
||||
write_tensor(&file, layer.w2);
|
||||
write_tensor(&file, layer.w3);
|
||||
}
|
||||
#pragma message("TODO: implement file saving using gguf")
|
||||
(void) vocab;
|
||||
(void) model;
|
||||
(void) w;
|
||||
// // write_magic
|
||||
// file.write_u32(LLAMA_FILE_MAGIC); // magic
|
||||
// file.write_u32(LLAMA_FILE_VERSION); // version
|
||||
// // write_hparams
|
||||
// file.write_u32(model->hparams.n_vocab);
|
||||
// file.write_u32(model->hparams.n_embd);
|
||||
// file.write_u32(model->hparams.n_mult);
|
||||
// file.write_u32(model->hparams.n_head);
|
||||
// file.write_u32(model->hparams.n_layer);
|
||||
// file.write_u32(model->hparams.n_rot);
|
||||
// file.write_u32(LLAMA_FTYPE_ALL_F32);
|
||||
//
|
||||
// // write_vocab - for now we are just writing the existing BPE voc. assuming karpathy's vocabulary is the same. idk.
|
||||
// uint32_t n_vocab = model->hparams.n_vocab;
|
||||
// for (uint32_t i = 0; i < n_vocab; i++) {
|
||||
// const auto & token_score = vocab->id_to_token.at(i);
|
||||
// file.write_u32((uint32_t) token_score.tok.size());
|
||||
// file.write_raw(token_score.tok.data(), token_score.tok.size());
|
||||
// file.write_raw(&token_score.score, sizeof(token_score.score));
|
||||
// }
|
||||
//
|
||||
// // stuff AK weights into GG weights one by one.
|
||||
// // w->token_embedding_table -> model->tok_embeddings
|
||||
// // float* -> struct ggml_tensor
|
||||
// stuff_karpathy_weights_into_gg(model->tok_embeddings, w->token_embedding_table);
|
||||
// stuff_karpathy_weights_into_gg(model->output, w->token_embedding_table);
|
||||
//
|
||||
// stuff_karpathy_weights_into_gg(model->norm, w->rms_final_weight);
|
||||
// //print_row(model->norm, 0);
|
||||
//
|
||||
// // for rms-att-weight
|
||||
// int row_length = model->hparams.n_embd;
|
||||
// const auto & hparams = model->hparams;
|
||||
// //int n_ff = model->hparams.n_embd;
|
||||
// int n_ff = get_n_ff(&hparams);
|
||||
//
|
||||
// for (uint32_t i = 0; i < model->hparams.n_layer; ++i){
|
||||
// auto & layer = model->layers[i];
|
||||
// // 1d
|
||||
// stuff_karpathy_weights_into_gg(layer.attention_norm, &w->rms_att_weight[i*row_length]);
|
||||
// stuff_karpathy_weights_into_gg(layer.ffn_norm , &w->rms_ffn_weight[i*row_length]);
|
||||
//
|
||||
// // from 3d matrix layer x dim x dim to 2d matrix dim x dim
|
||||
// stuff_karpathy_weights_into_gg(layer.wq , &w->wq[i*row_length*row_length]);
|
||||
// stuff_karpathy_weights_into_gg(layer.wk , &w->wk[i*row_length*row_length]);
|
||||
// stuff_karpathy_weights_into_gg(layer.wv , &w->wv[i*row_length*row_length]);
|
||||
// stuff_karpathy_weights_into_gg(layer.wo , &w->wo[i*row_length*row_length]);
|
||||
//
|
||||
// stuff_karpathy_weights_into_gg(layer.w1 , &w->w1[i*row_length*n_ff]);
|
||||
// stuff_karpathy_weights_into_gg(layer.w2 , &w->w2[i*n_ff*row_length]);
|
||||
// stuff_karpathy_weights_into_gg(layer.w3 , &w->w3[i*row_length*n_ff]);
|
||||
// }
|
||||
// // write tensors
|
||||
// write_tensor(&file, model->tok_embeddings);
|
||||
// write_tensor(&file, model->norm);
|
||||
// write_tensor(&file, model->output); // ?
|
||||
// for (uint32_t i = 0; i < model->hparams.n_layer; ++i) {
|
||||
// auto & layer = model->layers[i];
|
||||
//
|
||||
// write_tensor(&file, layer.attention_norm);
|
||||
// write_tensor(&file, layer.wq);
|
||||
// write_tensor(&file, layer.wk);
|
||||
// write_tensor(&file, layer.wv);
|
||||
// write_tensor(&file, layer.wo);
|
||||
// write_tensor(&file, layer.ffn_norm);
|
||||
// write_tensor(&file, layer.w1);
|
||||
// write_tensor(&file, layer.w2);
|
||||
// write_tensor(&file, layer.w3);
|
||||
// }
|
||||
}
|
||||
|
||||
struct train_params get_default_train_params() {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue