diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp index 28759ae39..1a238c4dd 100644 --- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp @@ -438,6 +438,11 @@ struct llama_file { read_raw(&ret, sizeof(ret)); return ret; } + std::float_t read_f32() { + std::float_t ret; + read_raw(&ret, sizeof(ret)); + return ret; + } std::string read_string(std::uint32_t len) { std::vector chars(len); @@ -491,30 +496,57 @@ void write_tensor(struct llama_file * file, struct ggml_tensor * tensor) { file->write_raw(tensor->data, ggml_nbytes(tensor)); } -void load_vocab(const char *filename, struct llama_vocab *vocab) { - struct llama_context_params llama_params = llama_context_default_params(); - llama_params.vocab_only = true; - - struct llama_model * lmodel = llama_load_model_from_file(filename, llama_params); - struct llama_context * lctx = llama_new_context_with_model(lmodel, llama_params); - - std::vector strings; - std::vector scores; - int n_vocab = llama_n_vocab(lctx); - strings.resize(n_vocab, NULL); - scores.resize(n_vocab, 0); - n_vocab = llama_get_vocab(lctx, strings.data(), scores.data(), n_vocab); - GGML_ASSERT(n_vocab == llama_n_vocab(lctx)); - vocab->id_to_token.resize(n_vocab); - for (int i=0; iid_to_token[i].tok = tok; - vocab->id_to_token[i].score = score; - vocab->token_to_id.emplace(tok, i); +bool is_ggml_file(const char *filename) { + llama_file file(filename, "rb"); + if (file.size < 4) { + return false; + } + uint32_t magic = file.read_u32(); + return magic == LLAMA_FILE_MAGIC; +} + +void load_vocab(const char *filename, Config *config, struct llama_vocab *vocab) { + // heuristic to infer whether vocab is from ggml or from llama2.c vocabulary + if (is_ggml_file(filename)) { + + struct llama_context_params llama_params = llama_context_default_params(); + llama_params.vocab_only = true; + + struct llama_model * lmodel = llama_load_model_from_file(filename, llama_params); + struct llama_context * lctx = llama_new_context_with_model(lmodel, llama_params); + + std::vector strings; + std::vector scores; + int n_vocab = llama_n_vocab(lctx); + strings.resize(n_vocab, NULL); + scores.resize(n_vocab, 0); + n_vocab = llama_get_vocab(lctx, strings.data(), scores.data(), n_vocab); + GGML_ASSERT(n_vocab == llama_n_vocab(lctx)); + vocab->id_to_token.resize(n_vocab); + for (int i=0; iid_to_token[i].tok = tok; + vocab->id_to_token[i].score = score; + vocab->token_to_id.emplace(tok, i); + } + llama_free(lctx); + llama_free_model(lmodel); + } else { // assume llama2.c vocabulary + printf("Assuming llama2.c vocabulary since %s is not a ggml file\n", filename); + llama_file file(filename, "rb"); + uint32_t n_vocab = config->vocab_size; + /* uint32_t max_token_length = */ file.read_u32(); // unused + vocab->id_to_token.resize(n_vocab); + for (uint32_t i=0; iid_to_token[i].tok = tok; + vocab->id_to_token[i].score = score; + vocab->token_to_id.emplace(tok, i); + } } - llama_free(lctx); - llama_free_model(lmodel); } void stuff_karpathy_weights_into_gg(struct ggml_tensor * gg_weights, float * karpathy_weights){ @@ -684,7 +716,7 @@ void print_usage(int /*argc*/, char ** argv, const struct train_params * params) fprintf(stderr, "\n"); fprintf(stderr, "options:\n"); fprintf(stderr, " -h, --help show this help message and exit\n"); - fprintf(stderr, " --copy-vocab-from-model FNAME model path from which to copy vocab (default '%s')\n", params->fn_vocab_model); + fprintf(stderr, " --copy-vocab-from-model FNAME llama2.c vocabulary or ggml model path from which to copy vocab (default '%s')\n", params->fn_vocab_model); fprintf(stderr, " --llama2c-model FNAME [REQUIRED] model path from which to load Karpathy's llama2.c model\n"); fprintf(stderr, " --llama2c-output-model FNAME model path to save the converted llama2.c model (default %s')\n", params->fn_llama2c_output_model); fprintf(stderr, "\n"); @@ -764,7 +796,7 @@ int main(int argc, char ** argv) { } struct llama_vocab vocab; - load_vocab(params.fn_vocab_model, &vocab); + load_vocab(params.fn_vocab_model, &config, &vocab); struct my_llama_model model; model.hparams.n_vocab = config.vocab_size; //llama_n_vocab(lctx);