examples : Fix llama-export-lora example (#8607)

* fix export-lora example

* add more logging

* reject merging subset

* better check

* typo
This commit is contained in:
Xuan Son Nguyen 2024-07-23 23:48:37 +02:00 committed by GitHub
parent b841d07408
commit de280085e7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 378 additions and 436 deletions

View file

@ -694,11 +694,6 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
params.lora_adapter.emplace_back(lora_adapter, std::stof(argv[i]));
return true;
}
if (arg == "--lora-base") {
CHECK_ARG
params.lora_base = argv[i];
return true;
}
if (arg == "--control-vector") {
CHECK_ARG
params.control_vectors.push_back({ 1.0f, argv[i], });
@ -1274,6 +1269,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
CHECK_ARG
params.out_file = argv[i];
params.cvector_outfile = argv[i];
params.lora_outfile = argv[i];
return true;
}
if (arg == "-ofreq" || arg == "--output-frequency") {
@ -1583,9 +1579,8 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
options.push_back({ "*", " --override-kv KEY=TYPE:VALUE",
"advanced option to override model metadata by key. may be specified multiple times.\n"
"types: int, float, bool, str. example: --override-kv tokenizer.ggml.add_bos_token=bool:false" });
options.push_back({ "*", " --lora FNAME", "apply LoRA adapter (implies --no-mmap)" });
options.push_back({ "*", " --lora-scaled FNAME S", "apply LoRA adapter with user defined scaling S (implies --no-mmap)" });
options.push_back({ "*", " --lora-base FNAME", "optional model to use as a base for the layers modified by the LoRA adapter" });
options.push_back({ "*", " --lora FNAME", "apply LoRA adapter (can be repeated to use multiple adapters)" });
options.push_back({ "*", " --lora-scaled FNAME S", "apply LoRA adapter with user defined scaling S (can be repeated to use multiple adapters)" });
options.push_back({ "*", " --control-vector FNAME", "add a control vector\n"
"note: this argument can be repeated to add multiple control vectors" });
options.push_back({ "*", " --control-vector-scaled FNAME SCALE",
@ -1676,6 +1671,13 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
options.push_back({ "cvector", " --pca-iter N", "number of iterations used for PCA (default: %d)", params.n_pca_iterations });
options.push_back({ "cvector", " --method {pca,mean}", "dimensionality reduction method to be used (default: pca)" });
options.push_back({ "export-lora" });
options.push_back({ "export-lora", "-m, --model", "model path from which to load base model (default '%s')", params.model.c_str() });
options.push_back({ "export-lora", " --lora FNAME", "path to LoRA adapter (can be repeated to use multiple adapters)" });
options.push_back({ "export-lora", " --lora-scaled FNAME S", "path to LoRA adapter with user defined scaling S (can be repeated to use multiple adapters)" });
options.push_back({ "*", "-t, --threads N", "number of threads to use during computation (default: %d)", params.n_threads });
options.push_back({ "export-lora", "-o, --output FNAME", "output file (default: '%s')", params.lora_outfile.c_str() });
printf("usage: %s [options]\n", argv[0]);
for (const auto & o : options) {
@ -3166,7 +3168,6 @@ void yaml_dump_non_result_info(FILE * stream, const gpt_params & params, const l
}
fprintf(stream, " - %s: %f\n", std::get<0>(la).c_str(), std::get<1>(la));
}
fprintf(stream, "lora_base: %s\n", params.lora_base.c_str());
fprintf(stream, "main_gpu: %d # default: 0\n", params.main_gpu);
fprintf(stream, "min_keep: %d # default: 0 (disabled)\n", sparams.min_keep);
fprintf(stream, "mirostat: %d # default: 0 (disabled)\n", sparams.mirostat);