examples : Fix llama-export-lora example (#8607)
				
					
				
			* fix export-lora example * add more logging * reject merging subset * better check * typo
This commit is contained in:
		
							parent
							
								
									b841d07408
								
							
						
					
					
						commit
						de280085e7
					
				
					 5 changed files with 378 additions and 436 deletions
				
			
		|  | @ -694,11 +694,6 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa | |||
|         params.lora_adapter.emplace_back(lora_adapter, std::stof(argv[i])); | ||||
|         return true; | ||||
|     } | ||||
|     if (arg == "--lora-base") { | ||||
|         CHECK_ARG | ||||
|         params.lora_base = argv[i]; | ||||
|         return true; | ||||
|     } | ||||
|     if (arg == "--control-vector") { | ||||
|         CHECK_ARG | ||||
|         params.control_vectors.push_back({ 1.0f, argv[i], }); | ||||
|  | @ -1274,6 +1269,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa | |||
|         CHECK_ARG | ||||
|         params.out_file = argv[i]; | ||||
|         params.cvector_outfile = argv[i]; | ||||
|         params.lora_outfile = argv[i]; | ||||
|         return true; | ||||
|     } | ||||
|     if (arg == "-ofreq" || arg == "--output-frequency") { | ||||
|  | @ -1583,9 +1579,8 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param | |||
|     options.push_back({ "*",           "       --override-kv KEY=TYPE:VALUE", | ||||
|                                                                         "advanced option to override model metadata by key. may be specified multiple times.\n" | ||||
|                                                                         "types: int, float, bool, str. example: --override-kv tokenizer.ggml.add_bos_token=bool:false" }); | ||||
|     options.push_back({ "*",           "       --lora FNAME",           "apply LoRA adapter (implies --no-mmap)" }); | ||||
|     options.push_back({ "*",           "       --lora-scaled FNAME S",  "apply LoRA adapter with user defined scaling S (implies --no-mmap)" }); | ||||
|     options.push_back({ "*",           "       --lora-base FNAME",      "optional model to use as a base for the layers modified by the LoRA adapter" }); | ||||
|     options.push_back({ "*",           "       --lora FNAME",           "apply LoRA adapter (can be repeated to use multiple adapters)" }); | ||||
|     options.push_back({ "*",           "       --lora-scaled FNAME S",  "apply LoRA adapter with user defined scaling S (can be repeated to use multiple adapters)" }); | ||||
|     options.push_back({ "*",           "       --control-vector FNAME", "add a control vector\n" | ||||
|                                                                         "note: this argument can be repeated to add multiple control vectors" }); | ||||
|     options.push_back({ "*",           "       --control-vector-scaled FNAME SCALE", | ||||
|  | @ -1676,6 +1671,13 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param | |||
|     options.push_back({ "cvector",     "       --pca-iter N",           "number of iterations used for PCA (default: %d)", params.n_pca_iterations }); | ||||
|     options.push_back({ "cvector",     "       --method {pca,mean}",    "dimensionality reduction method to be used (default: pca)" }); | ||||
| 
 | ||||
|     options.push_back({ "export-lora" }); | ||||
|     options.push_back({ "export-lora", "-m,    --model",                "model path from which to load base model (default '%s')", params.model.c_str() }); | ||||
|     options.push_back({ "export-lora", "       --lora FNAME",           "path to LoRA adapter  (can be repeated to use multiple adapters)" }); | ||||
|     options.push_back({ "export-lora", "       --lora-scaled FNAME S",  "path to LoRA adapter with user defined scaling S  (can be repeated to use multiple adapters)" }); | ||||
|     options.push_back({ "*",           "-t,    --threads N",            "number of threads to use during computation (default: %d)", params.n_threads }); | ||||
|     options.push_back({ "export-lora", "-o,    --output FNAME",         "output file (default: '%s')", params.lora_outfile.c_str() }); | ||||
| 
 | ||||
|     printf("usage: %s [options]\n", argv[0]); | ||||
| 
 | ||||
|     for (const auto & o : options) { | ||||
|  | @ -3166,7 +3168,6 @@ void yaml_dump_non_result_info(FILE * stream, const gpt_params & params, const l | |||
|         } | ||||
|         fprintf(stream, "  - %s: %f\n", std::get<0>(la).c_str(), std::get<1>(la)); | ||||
|     } | ||||
|     fprintf(stream, "lora_base: %s\n", params.lora_base.c_str()); | ||||
|     fprintf(stream, "main_gpu: %d # default: 0\n", params.main_gpu); | ||||
|     fprintf(stream, "min_keep: %d # default: 0 (disabled)\n", sparams.min_keep); | ||||
|     fprintf(stream, "mirostat: %d # default: 0 (disabled)\n", sparams.mirostat); | ||||
|  |  | |||
|  | @ -128,7 +128,6 @@ struct gpt_params { | |||
| 
 | ||||
|     // TODO: avoid tuple, use struct
 | ||||
|     std::vector<std::tuple<std::string, float>> lora_adapter; // lora adapter path with user defined scale
 | ||||
|     std::string lora_base  = "";                              // base model path for the lora adapter
 | ||||
| 
 | ||||
|     std::vector<llama_control_vector_load_info> control_vectors; // control vector with user defined scale
 | ||||
| 
 | ||||
|  | @ -255,6 +254,8 @@ struct gpt_params { | |||
|     std::string cvector_negative_file = "examples/cvector-generator/negative.txt"; | ||||
| 
 | ||||
|     bool spm_infill = false; // suffix/prefix/middle pattern for infill
 | ||||
| 
 | ||||
|     std::string lora_outfile = "ggml-lora-merged-f16.gguf"; | ||||
| }; | ||||
| 
 | ||||
| void gpt_params_handle_hf_token(gpt_params & params); | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue