ggml : build backends as libraries (#10256)

* ggml : build backends as libraries

---------

Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com>
This commit is contained in:
Diego Devesa 2024-11-14 18:04:35 +01:00 committed by GitHub
parent 4a8ccb37ad
commit ae8de6d50a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
191 changed files with 17541 additions and 16879 deletions

View file

@ -45,22 +45,23 @@ static float array_rmse(const float * a1, const float * a2, size_t n) {
}
// Total quantization error on test data
static float total_quantization_error(const ggml_type_traits * qfns, size_t test_size, const float * test_data) {
static float total_quantization_error(const ggml_type_traits * qfns, const ggml_type_traits_cpu * qfns_cpu, size_t test_size, const float * test_data) {
std::vector<uint8_t> tmp_q(2*test_size);
std::vector<float> tmp_out(test_size);
qfns->from_float(test_data, tmp_q.data(), test_size);
qfns_cpu->from_float(test_data, tmp_q.data(), test_size);
qfns->to_float(tmp_q.data(), tmp_out.data(), test_size);
return array_rmse(test_data, tmp_out.data(), test_size);
}
// Total quantization error on test data
static float reference_quantization_error(const ggml_type_traits * qfns, size_t test_size, const float * test_data) {
static float reference_quantization_error(const ggml_type_traits * qfns, const ggml_type_traits_cpu * qfns_cpu, size_t test_size, const float * test_data) {
std::vector<uint8_t> tmp_q(2*test_size);
std::vector<float> tmp_out(test_size);
std::vector<float> tmp_out_ref(test_size);
qfns->from_float(test_data, tmp_q.data(), test_size);
// FIXME: why is done twice?
qfns_cpu->from_float(test_data, tmp_q.data(), test_size);
qfns->to_float(tmp_q.data(), tmp_out.data(), test_size);
qfns->from_float_ref(test_data, tmp_q.data(), test_size);
@ -84,9 +85,9 @@ static float dot_product_error(
std::vector<uint8_t> tmp_q1(2*test_size);
std::vector<uint8_t> tmp_q2(2*test_size);
const auto * vdot = ggml_get_type_traits(qfns_cpu->vec_dot_type);
const auto * vdot = ggml_get_type_traits_cpu(qfns_cpu->vec_dot_type);
qfns->from_float(test_data1, tmp_q1.data(), test_size);
qfns_cpu->from_float(test_data1, tmp_q1.data(), test_size);
vdot->from_float(test_data2, tmp_q2.data(), test_size);
float result = INFINITY;
@ -145,8 +146,8 @@ int main(int argc, char * argv[]) {
printf("Testing %s\n", ggml_type_name((ggml_type) i));
ggml_quantize_init(ei);
if (qfns->from_float && qfns->to_float) {
const float total_error = total_quantization_error(qfns, test_size, test_data.data());
if (qfns_cpu->from_float && qfns->to_float) {
const float total_error = total_quantization_error(qfns, qfns_cpu, test_size, test_data.data());
const float max_quantization_error =
type == GGML_TYPE_TQ1_0 ? MAX_QUANTIZATION_TOTAL_ERROR_TERNARY :
type == GGML_TYPE_TQ2_0 ? MAX_QUANTIZATION_TOTAL_ERROR_TERNARY :
@ -161,7 +162,7 @@ int main(int argc, char * argv[]) {
printf("%5s absolute quantization error: %s (%f)\n", ggml_type_name(type), RESULT_STR[failed], total_error);
}
const float reference_error = reference_quantization_error(qfns, test_size, test_data.data());
const float reference_error = reference_quantization_error(qfns, qfns_cpu, test_size, test_data.data());
failed = !(reference_error < MAX_QUANTIZATION_REFERENCE_ERROR);
num_failed += failed;
if (failed || verbose) {