ggml : fix iq4_nl metal
ggml-ci
This commit is contained in:
parent
f6f2ff9557
commit
3f68842e1c
2 changed files with 2 additions and 6 deletions
|
@ -1786,10 +1786,6 @@ static enum ggml_status ggml_metal_graph_compute(
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if (ggml_is_quantized(src0t)) {
|
|
||||||
GGML_ASSERT(ne00 >= nth0*nth1);
|
|
||||||
}
|
|
||||||
|
|
||||||
[encoder setComputePipelineState:pipeline];
|
[encoder setComputePipelineState:pipeline];
|
||||||
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
|
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
|
||||||
[encoder setBuffer:id_src1 offset:offs_src1 atIndex:1];
|
[encoder setBuffer:id_src1 offset:offs_src1 atIndex:1];
|
||||||
|
|
|
@ -4757,7 +4757,7 @@ void kernel_mul_mv_iq4_nl_f32_impl(
|
||||||
device const float4 * y4 = (device const float4 *)yb;
|
device const float4 * y4 = (device const float4 *)yb;
|
||||||
yl[0] = y4[0]; yl[1] = y4[4]; yl[2] = y4[1]; yl[3] = y4[5];
|
yl[0] = y4[0]; yl[1] = y4[4]; yl[2] = y4[1]; yl[3] = y4[5];
|
||||||
|
|
||||||
for (int row = 0; row < 2; ++row) {
|
for (int row = 0; row < 2 && first_row + row < ne01; ++row) {
|
||||||
|
|
||||||
device const block_iq4_nl & xb = x[row*nb + ib];
|
device const block_iq4_nl & xb = x[row*nb + ib];
|
||||||
device const uint16_t * q4 = (device const uint16_t *)(xb.qs + 8*it);
|
device const uint16_t * q4 = (device const uint16_t *)(xb.qs + 8*it);
|
||||||
|
@ -4789,7 +4789,7 @@ void kernel_mul_mv_iq4_nl_f32_impl(
|
||||||
yb += 16 * QK4_NL;
|
yb += 16 * QK4_NL;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int row = 0; row < 2; ++row) {
|
for (int row = 0; row < 2 && first_row + row < ne01; ++row) {
|
||||||
all_sum = simd_sum(sumf[row]);
|
all_sum = simd_sum(sumf[row]);
|
||||||
if (tiisg == 0) {
|
if (tiisg == 0) {
|
||||||
dst[r1*ne0 + im*ne0*ne1 + first_row + row] = all_sum;
|
dst[r1*ne0 + im*ne0*ne1 + first_row + row] = all_sum;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue