diff --git a/ggml.c b/ggml.c index 8606e9344..4f1ad459c 100644 --- a/ggml.c +++ b/ggml.c @@ -1420,6 +1420,34 @@ static void dequantize_row_q4_1(const void * restrict vx, float * restrict y, in #endif } +static void ggml_vec_dot_q4_1(const int n, float * restrict s, const void * restrict vx, const void * restrict vy); +static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void * restrict vx, const void * restrict vy); + +static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = { + [GGML_TYPE_Q4_0] = { + .dequantize_row_q = dequantize_row_q4_0, + .quantize_row_q = quantize_row_q4_0, + .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_0_reference, + .quantize_row_q_dot = quantize_row_q8_0, + .vec_dot_q = ggml_vec_dot_q4_0_q8_0, + }, + [GGML_TYPE_Q4_1] = { + .dequantize_row_q = dequantize_row_q4_1, + .quantize_row_q = quantize_row_q4_1, + .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_1_reference, + .quantize_row_q_dot = quantize_row_q4_1, + .vec_dot_q = ggml_vec_dot_q4_1, + }, + // TODO: GGML_TYPE_Q8_0 +}; + +// For internal test use +quantize_fns_t ggml_internal_get_quantize_fn(size_t i) { + GGML_ASSERT(i < GGML_TYPE_COUNT); + return quantize_fns[i]; +} + + // // simd mappings // @@ -5910,12 +5938,12 @@ static void ggml_compute_forward_add_q_f32( const int64_t ne03 = src0->ne[3]; //const int64_t ne10 = src1->ne[0]; - const int64_t ne11 = src1->ne[1]; + //const int64_t ne11 = src1->ne[1]; const int64_t ne12 = src1->ne[2]; const int64_t ne13 = src1->ne[3]; - const int64_t ne0 = dst->ne[0]; - const int64_t ne1 = dst->ne[1]; + //const int64_t ne0 = dst->ne[0]; + //const int64_t ne1 = dst->ne[1]; const int64_t ne2 = dst->ne[2]; const int64_t ne3 = dst->ne[3]; @@ -7307,30 +7335,6 @@ static void ggml_compute_forward_mul_mat_f16_f32( //} } -static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = { - [GGML_TYPE_Q4_0] = { - .dequantize_row_q = dequantize_row_q4_0, - .quantize_row_q = quantize_row_q4_0, - .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_0_reference, - .quantize_row_q_dot = quantize_row_q8_0, - .vec_dot_q = ggml_vec_dot_q4_0_q8_0, - }, - [GGML_TYPE_Q4_1] = { - .dequantize_row_q = dequantize_row_q4_1, - .quantize_row_q = quantize_row_q4_1, - .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_1_reference, - .quantize_row_q_dot = quantize_row_q4_1, - .vec_dot_q = ggml_vec_dot_q4_1, - }, - // TODO: GGML_TYPE_Q8_0 -}; - -// For internal test use -quantize_fns_t ggml_internal_get_quantize_fn(size_t i) { - GGML_ASSERT(i < GGML_TYPE_COUNT); - return quantize_fns[i]; -} - static void ggml_compute_forward_mul_mat_q_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, diff --git a/llama.cpp b/llama.cpp index 0627c9b9c..33209615e 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1896,8 +1896,8 @@ int llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lor ggml_tensor * loraA = lora_tensors[base_name + ".loraA"]; ggml_tensor * loraB = lora_tensors[base_name + ".loraB"]; - if (tensor->ne[0] != loraA->ne[1]) { - fprintf(stderr, "%s: incompatible tensor dimensions (%" PRId64 " and %" PRId64 ");" + if (tensor->ne[0] != loraA->ne[1] || tensor->ne[1] != loraB->ne[1]) { + fprintf(stderr, "%s: incompatible tensor dimensions (%" PRId64 " and %" PRId64 ");" " are you sure that this adapter is for this model?\n", __func__, tensor->ne[0], loraA->ne[1]); return 1; }