refactor vocab loading into its own method
This commit is contained in:
parent
aab15de466
commit
d2b95e7e70
1 changed files with 28 additions and 25 deletions
|
@ -491,6 +491,32 @@ void write_tensor(struct llama_file * file, struct ggml_tensor * tensor) {
|
|||
file->write_raw(tensor->data, ggml_nbytes(tensor));
|
||||
}
|
||||
|
||||
void load_vocab(const char *filename, struct llama_vocab *vocab) {
|
||||
struct llama_context_params llama_params = llama_context_default_params();
|
||||
llama_params.vocab_only = true;
|
||||
|
||||
struct llama_model * lmodel = llama_load_model_from_file(filename, llama_params);
|
||||
struct llama_context * lctx = llama_new_context_with_model(lmodel, llama_params);
|
||||
|
||||
std::vector<const char *> strings;
|
||||
std::vector<float> scores;
|
||||
int n_vocab = llama_n_vocab(lctx);
|
||||
strings.resize(n_vocab, NULL);
|
||||
scores.resize(n_vocab, 0);
|
||||
n_vocab = llama_get_vocab(lctx, strings.data(), scores.data(), n_vocab);
|
||||
GGML_ASSERT(n_vocab == llama_n_vocab(lctx));
|
||||
vocab->id_to_token.resize(n_vocab);
|
||||
for (int i=0; i<n_vocab; ++i) {
|
||||
std::string tok = std::string(strings[i]);
|
||||
float score = scores[i];
|
||||
vocab->id_to_token[i].tok = tok;
|
||||
vocab->id_to_token[i].score = score;
|
||||
vocab->token_to_id.emplace(tok, i);
|
||||
}
|
||||
llama_free(lctx);
|
||||
llama_free_model(lmodel);
|
||||
}
|
||||
|
||||
void stuff_karpathy_weights_into_gg(struct ggml_tensor * gg_weights, float * karpathy_weights){
|
||||
int ct;
|
||||
switch (gg_weights->n_dims){
|
||||
|
@ -737,30 +763,9 @@ int main(int argc, char ** argv) {
|
|||
fclose(file);
|
||||
}
|
||||
|
||||
struct llama_context_params llama_params = llama_context_default_params();
|
||||
llama_params.vocab_only = true;
|
||||
|
||||
struct llama_model * lmodel = llama_load_model_from_file(params.fn_vocab_model, llama_params);
|
||||
struct llama_context * lctx = llama_new_context_with_model(lmodel, llama_params);
|
||||
|
||||
struct llama_vocab vocab;
|
||||
{
|
||||
std::vector<const char *> strings;
|
||||
std::vector<float> scores;
|
||||
int n_vocab = llama_n_vocab(lctx);
|
||||
strings.resize(n_vocab, NULL);
|
||||
scores.resize(n_vocab, 0);
|
||||
n_vocab = llama_get_vocab(lctx, strings.data(), scores.data(), n_vocab);
|
||||
GGML_ASSERT(n_vocab == llama_n_vocab(lctx));
|
||||
vocab.id_to_token.resize(n_vocab);
|
||||
for (int i=0; i<n_vocab; ++i) {
|
||||
std::string tok = std::string(strings[i]);
|
||||
float score = scores[i];
|
||||
vocab.id_to_token[i].tok = tok;
|
||||
vocab.id_to_token[i].score = score;
|
||||
vocab.token_to_id.emplace(tok, i);
|
||||
}
|
||||
}
|
||||
load_vocab(params.fn_vocab_model, &vocab);
|
||||
|
||||
struct my_llama_model model;
|
||||
model.hparams.n_vocab = config.vocab_size; //llama_n_vocab(lctx);
|
||||
model.hparams.n_ctx = params.n_ctx;
|
||||
|
@ -782,8 +787,6 @@ int main(int argc, char ** argv) {
|
|||
|
||||
printf("Saving llama.c model file %s in ggml format at %s\n", params.fn_llama2c_model, params.fn_llama2c_output_model);
|
||||
|
||||
llama_free(lctx);
|
||||
llama_free_model(lmodel);
|
||||
ggml_free(model.ctx);
|
||||
free_weights(&weights);
|
||||
return 0;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue