Export lora A matrix pre-transposed

This commit is contained in:
Slaren 2023-04-08 03:37:12 +02:00
parent f52101e889
commit ac3fbe492a
2 changed files with 12 additions and 2 deletions

View file

@ -94,6 +94,8 @@ with open(output_path, "wb") as fout:
# since ggml doesn't always support other types for the second operand, # since ggml doesn't always support other types for the second operand,
# the tensors are always converted and exported as f32 # the tensors are always converted and exported as f32
t = v.float().numpy() t = v.float().numpy()
if "lora_A" in k:
t = t.T
print(f"{k} => {translate_tensor_name(k)} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB") print(f"{k} => {translate_tensor_name(k)} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB")
write_tensor_header(fout, translate_tensor_name(k), t.shape, t.dtype) write_tensor_header(fout, translate_tensor_name(k), t.shape, t.dtype)
t.tofile(fout) t.tofile(fout)

View file

@ -1760,8 +1760,12 @@ int llama_model_quantize(
int llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lora, int n_threads) { int llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lora, int n_threads) {
// TODO: refactor all of this after PR #801 // TODO: refactor all of this after PR #801
fprintf(stderr, "%s: applying lora adapter from '%s' - please wait ...\n", __func__, path_lora);
auto & model = ctx->model; auto & model = ctx->model;
const int64_t t_start_lora_us = ggml_time_us();
auto fin = std::ifstream(path_lora, std::ios::binary); auto fin = std::ifstream(path_lora, std::ios::binary);
if (!fin) { if (!fin) {
fprintf(stderr, "%s: failed to open '%s'\n", __func__, path_lora); fprintf(stderr, "%s: failed to open '%s'\n", __func__, path_lora);
@ -1874,7 +1878,7 @@ int llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lor
lora_tensors.find(base_name + ".loraB") != lora_tensors.end()) { lora_tensors.find(base_name + ".loraB") != lora_tensors.end()) {
ggml_tensor * tensor = model.tensors[base_name]; ggml_tensor * tensor = model.tensors[base_name];
ggml_tensor * loraA = ggml_transpose(lora_ctx, lora_tensors[base_name + ".loraA"]); ggml_tensor * loraA = lora_tensors[base_name + ".loraA"];
ggml_tensor * loraB = lora_tensors[base_name + ".loraB"]; ggml_tensor * loraB = lora_tensors[base_name + ".loraB"];
if (tensor->ne[0] != loraA->ne[1]) { if (tensor->ne[0] != loraA->ne[1]) {
@ -1901,7 +1905,11 @@ int llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lor
fprintf(stderr, "."); fprintf(stderr, ".");
} }
} }
fprintf(stderr, " done\n");
ggml_free(lora_ctx);
const int64_t t_lora_us = ggml_time_us() - t_start_lora_us;
fprintf(stderr, " done (%.2f ms)\n", t_lora_us / 1000.0);
return 0; return 0;
} }