ggml : build backends as libraries (#10256)
* ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com>
This commit is contained in:
parent
4a8ccb37ad
commit
ae8de6d50a
191 changed files with 17541 additions and 16879 deletions
|
@ -45,22 +45,23 @@ static float array_rmse(const float * a1, const float * a2, size_t n) {
|
|||
}
|
||||
|
||||
// Total quantization error on test data
|
||||
static float total_quantization_error(const ggml_type_traits * qfns, size_t test_size, const float * test_data) {
|
||||
static float total_quantization_error(const ggml_type_traits * qfns, const ggml_type_traits_cpu * qfns_cpu, size_t test_size, const float * test_data) {
|
||||
std::vector<uint8_t> tmp_q(2*test_size);
|
||||
std::vector<float> tmp_out(test_size);
|
||||
|
||||
qfns->from_float(test_data, tmp_q.data(), test_size);
|
||||
qfns_cpu->from_float(test_data, tmp_q.data(), test_size);
|
||||
qfns->to_float(tmp_q.data(), tmp_out.data(), test_size);
|
||||
return array_rmse(test_data, tmp_out.data(), test_size);
|
||||
}
|
||||
|
||||
// Total quantization error on test data
|
||||
static float reference_quantization_error(const ggml_type_traits * qfns, size_t test_size, const float * test_data) {
|
||||
static float reference_quantization_error(const ggml_type_traits * qfns, const ggml_type_traits_cpu * qfns_cpu, size_t test_size, const float * test_data) {
|
||||
std::vector<uint8_t> tmp_q(2*test_size);
|
||||
std::vector<float> tmp_out(test_size);
|
||||
std::vector<float> tmp_out_ref(test_size);
|
||||
|
||||
qfns->from_float(test_data, tmp_q.data(), test_size);
|
||||
// FIXME: why is done twice?
|
||||
qfns_cpu->from_float(test_data, tmp_q.data(), test_size);
|
||||
qfns->to_float(tmp_q.data(), tmp_out.data(), test_size);
|
||||
|
||||
qfns->from_float_ref(test_data, tmp_q.data(), test_size);
|
||||
|
@ -84,9 +85,9 @@ static float dot_product_error(
|
|||
std::vector<uint8_t> tmp_q1(2*test_size);
|
||||
std::vector<uint8_t> tmp_q2(2*test_size);
|
||||
|
||||
const auto * vdot = ggml_get_type_traits(qfns_cpu->vec_dot_type);
|
||||
const auto * vdot = ggml_get_type_traits_cpu(qfns_cpu->vec_dot_type);
|
||||
|
||||
qfns->from_float(test_data1, tmp_q1.data(), test_size);
|
||||
qfns_cpu->from_float(test_data1, tmp_q1.data(), test_size);
|
||||
vdot->from_float(test_data2, tmp_q2.data(), test_size);
|
||||
|
||||
float result = INFINITY;
|
||||
|
@ -145,8 +146,8 @@ int main(int argc, char * argv[]) {
|
|||
printf("Testing %s\n", ggml_type_name((ggml_type) i));
|
||||
ggml_quantize_init(ei);
|
||||
|
||||
if (qfns->from_float && qfns->to_float) {
|
||||
const float total_error = total_quantization_error(qfns, test_size, test_data.data());
|
||||
if (qfns_cpu->from_float && qfns->to_float) {
|
||||
const float total_error = total_quantization_error(qfns, qfns_cpu, test_size, test_data.data());
|
||||
const float max_quantization_error =
|
||||
type == GGML_TYPE_TQ1_0 ? MAX_QUANTIZATION_TOTAL_ERROR_TERNARY :
|
||||
type == GGML_TYPE_TQ2_0 ? MAX_QUANTIZATION_TOTAL_ERROR_TERNARY :
|
||||
|
@ -161,7 +162,7 @@ int main(int argc, char * argv[]) {
|
|||
printf("%5s absolute quantization error: %s (%f)\n", ggml_type_name(type), RESULT_STR[failed], total_error);
|
||||
}
|
||||
|
||||
const float reference_error = reference_quantization_error(qfns, test_size, test_data.data());
|
||||
const float reference_error = reference_quantization_error(qfns, qfns_cpu, test_size, test_data.data());
|
||||
failed = !(reference_error < MAX_QUANTIZATION_REFERENCE_ERROR);
|
||||
num_failed += failed;
|
||||
if (failed || verbose) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue