Apply ggerganov's fixes for test-backend-ops
This commit is contained in:
parent
bc278c8a0e
commit
2741a99791
4 changed files with 10 additions and 5 deletions
|
@ -803,7 +803,7 @@ static bool ggml_metal_supports_op(const struct ggml_metal_context * ctx, const
|
||||||
case GGML_OP_DIAG_MASK_INF:
|
case GGML_OP_DIAG_MASK_INF:
|
||||||
case GGML_OP_GET_ROWS:
|
case GGML_OP_GET_ROWS:
|
||||||
{
|
{
|
||||||
return op->ne[3] == 1;
|
return op->src[0]->type != GGML_TYPE_BF16 && op->ne[3] == 1;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
|
|
5
ggml.c
5
ggml.c
|
@ -19765,7 +19765,10 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
|
||||||
case GGML_OP_CPY:
|
case GGML_OP_CPY:
|
||||||
case GGML_OP_DUP:
|
case GGML_OP_DUP:
|
||||||
{
|
{
|
||||||
if (ggml_is_quantized(node->type)) {
|
if (ggml_is_quantized(node->type) ||
|
||||||
|
// F16 -> BF16 and BF16 -> F16 copies go through intermediate F32
|
||||||
|
(node->src[0]->type == GGML_TYPE_F16 && node->src[1] && node->src[1]->type == GGML_TYPE_BF16) ||
|
||||||
|
(node->src[0]->type == GGML_TYPE_BF16 && node->src[1] && node->src[1]->type == GGML_TYPE_F16)) {
|
||||||
cur = ggml_type_size(GGML_TYPE_F32) * node->ne[0] * n_tasks;
|
cur = ggml_type_size(GGML_TYPE_F32) * node->ne[0] * n_tasks;
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
|
|
|
@ -863,7 +863,6 @@ QK_K = 256
|
||||||
GGML_QUANT_SIZES = {
|
GGML_QUANT_SIZES = {
|
||||||
GGMLQuantizationType.F32: (1, 4),
|
GGMLQuantizationType.F32: (1, 4),
|
||||||
GGMLQuantizationType.F16: (1, 2),
|
GGMLQuantizationType.F16: (1, 2),
|
||||||
GGMLQuantizationType.BF16: (1, 2),
|
|
||||||
GGMLQuantizationType.Q4_0: (32, 2 + 16),
|
GGMLQuantizationType.Q4_0: (32, 2 + 16),
|
||||||
GGMLQuantizationType.Q4_1: (32, 2 + 2 + 16),
|
GGMLQuantizationType.Q4_1: (32, 2 + 2 + 16),
|
||||||
GGMLQuantizationType.Q5_0: (32, 2 + 4 + 16),
|
GGMLQuantizationType.Q5_0: (32, 2 + 4 + 16),
|
||||||
|
@ -890,6 +889,7 @@ GGML_QUANT_SIZES = {
|
||||||
GGMLQuantizationType.I64: (1, 8),
|
GGMLQuantizationType.I64: (1, 8),
|
||||||
GGMLQuantizationType.F64: (1, 8),
|
GGMLQuantizationType.F64: (1, 8),
|
||||||
GGMLQuantizationType.IQ1_M: (256, QK_K // 8 + QK_K // 16 + QK_K // 32),
|
GGMLQuantizationType.IQ1_M: (256, QK_K // 8 + QK_K // 16 + QK_K // 32),
|
||||||
|
GGMLQuantizationType.BF16: (1, 2),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -50,7 +50,7 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
|
||||||
|
|
||||||
if (tensor->type == GGML_TYPE_F32 || tensor->type == GGML_TYPE_I32) {
|
if (tensor->type == GGML_TYPE_F32 || tensor->type == GGML_TYPE_I32) {
|
||||||
ggml_backend_tensor_set(tensor, data.data(), 0, size * sizeof(float));
|
ggml_backend_tensor_set(tensor, data.data(), 0, size * sizeof(float));
|
||||||
} else if (ggml_is_quantized(tensor->type) || tensor->type == GGML_TYPE_F16) {
|
} else if (ggml_is_quantized(tensor->type) || tensor->type == GGML_TYPE_F16 || tensor->type == GGML_TYPE_BF16) {
|
||||||
GGML_ASSERT(size % ggml_blck_size(tensor->type) == 0);
|
GGML_ASSERT(size % ggml_blck_size(tensor->type) == 0);
|
||||||
std::vector<uint8_t> dataq(ggml_row_size(tensor->type, size));
|
std::vector<uint8_t> dataq(ggml_row_size(tensor->type, size));
|
||||||
std::vector<float> imatrix(tensor->ne[0], 1.0f); // dummy importance matrix
|
std::vector<float> imatrix(tensor->ne[0], 1.0f); // dummy importance matrix
|
||||||
|
@ -92,6 +92,8 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
|
||||||
size_t i = i3*t->nb[3] + i2*t->nb[2] + i1*t->nb[1] + i0/bs*t->nb[0];
|
size_t i = i3*t->nb[3] + i2*t->nb[2] + i1*t->nb[1] + i0/bs*t->nb[0];
|
||||||
if (t->type == GGML_TYPE_F16) {
|
if (t->type == GGML_TYPE_F16) {
|
||||||
tv.push_back(ggml_fp16_to_fp32(*(ggml_fp16_t*)&buf[i]));
|
tv.push_back(ggml_fp16_to_fp32(*(ggml_fp16_t*)&buf[i]));
|
||||||
|
} else if (t->type == GGML_TYPE_BF16) {
|
||||||
|
tv.push_back(ggml_bf16_to_fp32(*(ggml_bf16_t*)&buf[i]));
|
||||||
} else if (t->type == GGML_TYPE_F32) {
|
} else if (t->type == GGML_TYPE_F32) {
|
||||||
tv.push_back(*(float *) &buf[i]);
|
tv.push_back(*(float *) &buf[i]);
|
||||||
} else if (t->type == GGML_TYPE_I32) {
|
} else if (t->type == GGML_TYPE_I32) {
|
||||||
|
@ -1898,7 +1900,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
||||||
std::default_random_engine rng(0);
|
std::default_random_engine rng(0);
|
||||||
|
|
||||||
const ggml_type all_types[] = {
|
const ggml_type all_types[] = {
|
||||||
GGML_TYPE_F32, GGML_TYPE_F16,
|
GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_BF16,
|
||||||
GGML_TYPE_Q4_0, GGML_TYPE_Q4_1,
|
GGML_TYPE_Q4_0, GGML_TYPE_Q4_1,
|
||||||
GGML_TYPE_Q5_0, GGML_TYPE_Q5_1,
|
GGML_TYPE_Q5_0, GGML_TYPE_Q5_1,
|
||||||
GGML_TYPE_Q8_0,
|
GGML_TYPE_Q8_0,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue