mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-10-05 00:20:32 +00:00
[ARM] cache align memset and memzero
This is a natural extension following the previous patch. Non Feroceon based targets are unchanged. Signed-off-by: Nicolas Pitre <nico@marvell.com> Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
This commit is contained in:
parent
2239aff6ab
commit
f91a8dcc25
2 changed files with 90 additions and 0 deletions
|
@ -39,6 +39,9 @@ ENTRY(memset)
|
||||||
mov r3, r1
|
mov r3, r1
|
||||||
cmp r2, #16
|
cmp r2, #16
|
||||||
blt 4f
|
blt 4f
|
||||||
|
|
||||||
|
#if ! CALGN(1)+0
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We need an extra register for this loop - save the return address and
|
* We need an extra register for this loop - save the return address and
|
||||||
* use the LR
|
* use the LR
|
||||||
|
@ -64,6 +67,49 @@ ENTRY(memset)
|
||||||
stmneia r0!, {r1, r3, ip, lr}
|
stmneia r0!, {r1, r3, ip, lr}
|
||||||
ldr lr, [sp], #4
|
ldr lr, [sp], #4
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This version aligns the destination pointer in order to write
|
||||||
|
* whole cache lines at once.
|
||||||
|
*/
|
||||||
|
|
||||||
|
stmfd sp!, {r4-r7, lr}
|
||||||
|
mov r4, r1
|
||||||
|
mov r5, r1
|
||||||
|
mov r6, r1
|
||||||
|
mov r7, r1
|
||||||
|
mov ip, r1
|
||||||
|
mov lr, r1
|
||||||
|
|
||||||
|
cmp r2, #96
|
||||||
|
tstgt r0, #31
|
||||||
|
ble 3f
|
||||||
|
|
||||||
|
and ip, r0, #31
|
||||||
|
rsb ip, ip, #32
|
||||||
|
sub r2, r2, ip
|
||||||
|
movs ip, ip, lsl #(32 - 4)
|
||||||
|
stmcsia r0!, {r4, r5, r6, r7}
|
||||||
|
stmmiia r0!, {r4, r5}
|
||||||
|
tst ip, #(1 << 30)
|
||||||
|
mov ip, r1
|
||||||
|
strne r1, [r0], #4
|
||||||
|
|
||||||
|
3: subs r2, r2, #64
|
||||||
|
stmgeia r0!, {r1, r3-r7, ip, lr}
|
||||||
|
stmgeia r0!, {r1, r3-r7, ip, lr}
|
||||||
|
bgt 3b
|
||||||
|
ldmeqfd sp!, {r4-r7, pc}
|
||||||
|
|
||||||
|
tst r2, #32
|
||||||
|
stmneia r0!, {r1, r3-r7, ip, lr}
|
||||||
|
tst r2, #16
|
||||||
|
stmneia r0!, {r4-r7}
|
||||||
|
ldmfd sp!, {r4-r7, lr}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
4: tst r2, #8
|
4: tst r2, #8
|
||||||
stmneia r0!, {r1, r3}
|
stmneia r0!, {r1, r3}
|
||||||
tst r2, #4
|
tst r2, #4
|
||||||
|
|
|
@ -39,6 +39,9 @@ ENTRY(__memzero)
|
||||||
*/
|
*/
|
||||||
cmp r1, #16 @ 1 we can skip this chunk if we
|
cmp r1, #16 @ 1 we can skip this chunk if we
|
||||||
blt 4f @ 1 have < 16 bytes
|
blt 4f @ 1 have < 16 bytes
|
||||||
|
|
||||||
|
#if ! CALGN(1)+0
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We need an extra register for this loop - save the return address and
|
* We need an extra register for this loop - save the return address and
|
||||||
* use the LR
|
* use the LR
|
||||||
|
@ -64,6 +67,47 @@ ENTRY(__memzero)
|
||||||
stmneia r0!, {r2, r3, ip, lr} @ 4
|
stmneia r0!, {r2, r3, ip, lr} @ 4
|
||||||
ldr lr, [sp], #4 @ 1
|
ldr lr, [sp], #4 @ 1
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This version aligns the destination pointer in order to write
|
||||||
|
* whole cache lines at once.
|
||||||
|
*/
|
||||||
|
|
||||||
|
stmfd sp!, {r4-r7, lr}
|
||||||
|
mov r4, r2
|
||||||
|
mov r5, r2
|
||||||
|
mov r6, r2
|
||||||
|
mov r7, r2
|
||||||
|
mov ip, r2
|
||||||
|
mov lr, r2
|
||||||
|
|
||||||
|
cmp r1, #96
|
||||||
|
andgts ip, r0, #31
|
||||||
|
ble 3f
|
||||||
|
|
||||||
|
rsb ip, ip, #32
|
||||||
|
sub r1, r1, ip
|
||||||
|
movs ip, ip, lsl #(32 - 4)
|
||||||
|
stmcsia r0!, {r4, r5, r6, r7}
|
||||||
|
stmmiia r0!, {r4, r5}
|
||||||
|
movs ip, ip, lsl #2
|
||||||
|
strcs r2, [r0], #4
|
||||||
|
|
||||||
|
3: subs r1, r1, #64
|
||||||
|
stmgeia r0!, {r2-r7, ip, lr}
|
||||||
|
stmgeia r0!, {r2-r7, ip, lr}
|
||||||
|
bgt 3b
|
||||||
|
ldmeqfd sp!, {r4-r7, pc}
|
||||||
|
|
||||||
|
tst r1, #32
|
||||||
|
stmneia r0!, {r2-r7, ip, lr}
|
||||||
|
tst r1, #16
|
||||||
|
stmneia r0!, {r4-r7}
|
||||||
|
ldmfd sp!, {r4-r7, lr}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
4: tst r1, #8 @ 1 8 bytes or more?
|
4: tst r1, #8 @ 1 8 bytes or more?
|
||||||
stmneia r0!, {r2, r3} @ 2
|
stmneia r0!, {r2, r3} @ 2
|
||||||
tst r1, #4 @ 1 4 bytes or more?
|
tst r1, #4 @ 1 4 bytes or more?
|
||||||
|
|
Loading…
Reference in a new issue