Show warning when using a quantized base model
This commit is contained in:
parent
fc89916002
commit
14858ba2bf
1 changed files with 10 additions and 3 deletions
13
llama.cpp
13
llama.cpp
|
@ -1843,9 +1843,8 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char *
|
||||||
model_loader->mapping.reset(new llama_mmap(&model_loader->file_loaders.at(0)->file, false));
|
model_loader->mapping.reset(new llama_mmap(&model_loader->file_loaders.at(0)->file, false));
|
||||||
}
|
}
|
||||||
|
|
||||||
fprintf(stderr, "%s: ", __func__);
|
|
||||||
|
|
||||||
// read tensors and apply
|
// read tensors and apply
|
||||||
|
bool warned = false;
|
||||||
int n_tensors = 0;
|
int n_tensors = 0;
|
||||||
while (true) {
|
while (true) {
|
||||||
int32_t n_dims;
|
int32_t n_dims;
|
||||||
|
@ -1938,6 +1937,14 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char *
|
||||||
base_t = dest_t;
|
base_t = dest_t;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (base_t->type == GGML_TYPE_Q4_0 || base_t->type == GGML_TYPE_Q4_1) {
|
||||||
|
if (!warned) {
|
||||||
|
fprintf(stderr, "%s: warning: using a lora adapter with a quantized model may result in poor quality, "
|
||||||
|
"use a f16 or f32 base model with --lora-base\n", __func__);
|
||||||
|
warned = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ggml_tensor * loraA = lora_tensors[base_name + ".loraA"];
|
ggml_tensor * loraA = lora_tensors[base_name + ".loraA"];
|
||||||
ggml_tensor * loraB = lora_tensors[base_name + ".loraB"];
|
ggml_tensor * loraB = lora_tensors[base_name + ".loraB"];
|
||||||
|
|
||||||
|
@ -1974,7 +1981,7 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char *
|
||||||
lora_tensors.clear();
|
lora_tensors.clear();
|
||||||
|
|
||||||
n_tensors++;
|
n_tensors++;
|
||||||
if (n_tensors % 8 == 0)
|
if (n_tensors % 4 == 0)
|
||||||
fprintf(stderr, ".");
|
fprintf(stderr, ".");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue