Rebase to master

This commit is contained in:
Slaren 2023-04-13 18:06:33 +02:00
parent c45868ba9f
commit 57627f0e5f
2 changed files with 33 additions and 29 deletions

58
ggml.c
View file

@ -1420,6 +1420,34 @@ static void dequantize_row_q4_1(const void * restrict vx, float * restrict y, in
#endif
}
static void ggml_vec_dot_q4_1(const int n, float * restrict s, const void * restrict vx, const void * restrict vy);
static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void * restrict vx, const void * restrict vy);
static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
[GGML_TYPE_Q4_0] = {
.dequantize_row_q = dequantize_row_q4_0,
.quantize_row_q = quantize_row_q4_0,
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_0_reference,
.quantize_row_q_dot = quantize_row_q8_0,
.vec_dot_q = ggml_vec_dot_q4_0_q8_0,
},
[GGML_TYPE_Q4_1] = {
.dequantize_row_q = dequantize_row_q4_1,
.quantize_row_q = quantize_row_q4_1,
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_1_reference,
.quantize_row_q_dot = quantize_row_q4_1,
.vec_dot_q = ggml_vec_dot_q4_1,
},
// TODO: GGML_TYPE_Q8_0
};
// For internal test use
quantize_fns_t ggml_internal_get_quantize_fn(size_t i) {
GGML_ASSERT(i < GGML_TYPE_COUNT);
return quantize_fns[i];
}
//
// simd mappings
//
@ -5910,12 +5938,12 @@ static void ggml_compute_forward_add_q_f32(
const int64_t ne03 = src0->ne[3];
//const int64_t ne10 = src1->ne[0];
const int64_t ne11 = src1->ne[1];
//const int64_t ne11 = src1->ne[1];
const int64_t ne12 = src1->ne[2];
const int64_t ne13 = src1->ne[3];
const int64_t ne0 = dst->ne[0];
const int64_t ne1 = dst->ne[1];
//const int64_t ne0 = dst->ne[0];
//const int64_t ne1 = dst->ne[1];
const int64_t ne2 = dst->ne[2];
const int64_t ne3 = dst->ne[3];
@ -7307,30 +7335,6 @@ static void ggml_compute_forward_mul_mat_f16_f32(
//}
}
static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
[GGML_TYPE_Q4_0] = {
.dequantize_row_q = dequantize_row_q4_0,
.quantize_row_q = quantize_row_q4_0,
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_0_reference,
.quantize_row_q_dot = quantize_row_q8_0,
.vec_dot_q = ggml_vec_dot_q4_0_q8_0,
},
[GGML_TYPE_Q4_1] = {
.dequantize_row_q = dequantize_row_q4_1,
.quantize_row_q = quantize_row_q4_1,
.quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_1_reference,
.quantize_row_q_dot = quantize_row_q4_1,
.vec_dot_q = ggml_vec_dot_q4_1,
},
// TODO: GGML_TYPE_Q8_0
};
// For internal test use
quantize_fns_t ggml_internal_get_quantize_fn(size_t i) {
GGML_ASSERT(i < GGML_TYPE_COUNT);
return quantize_fns[i];
}
static void ggml_compute_forward_mul_mat_q_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,

View file

@ -1896,8 +1896,8 @@ int llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lor
ggml_tensor * loraA = lora_tensors[base_name + ".loraA"];
ggml_tensor * loraB = lora_tensors[base_name + ".loraB"];
if (tensor->ne[0] != loraA->ne[1]) {
fprintf(stderr, "%s: incompatible tensor dimensions (%" PRId64 " and %" PRId64 ");"
if (tensor->ne[0] != loraA->ne[1] || tensor->ne[1] != loraB->ne[1]) {
fprintf(stderr, "%s: incompatible tensor dimensions (%" PRId64 " and %" PRId64 ");"
" are you sure that this adapter is for this model?\n", __func__, tensor->ne[0], loraA->ne[1]);
return 1;
}