mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-10-02 23:27:06 +00:00
034ff37d34
I didn't really want to do this, but as part of all the other changes to
the user copy loops, I've been looking at this horror.
I tried to clean it up multiple times, but every time I just found more
problems, and the way it's written, it's just too hard to fix them.
For example, the code is written to do quad-word alignment, and will use
regular byte accesses to get to that point. That's fairly simple, but
it means that any initial 8-byte alignment will be done with cached
copies.
However, the code then is very careful to do any 4-byte _tail_ accesses
using an uncached 4-byte write, and that was claimed to be relevant in
commit a82eee7424
("x86/uaccess/64: Handle the caching of 4-byte
nocache copies properly in __copy_user_nocache()").
So if you do a 4-byte copy using that function, it carefully uses a
4-byte 'movnti' for the destination. But if you were to do a 12-byte
copy that is 4-byte aligned, it would _not_ do a 4-byte 'movnti'
followed by a 8-byte 'movnti' to keep it all uncached.
Instead, it would align the destination to 8 bytes using a
byte-at-a-time loop, and then do a 8-byte 'movnti' for the final 8
bytes.
The main caller that cares is __copy_user_flushcache(), which knows
about this insanity, and has odd cases for it all. But I just can't
deal with looking at this kind of "it does one case right, and another
related case entirely wrong".
And the code really wasn't fixable without hard drugs, which I try to
avoid.
So instead, rewrite it in a form that hopefully not only gets this
right, but is a bit more maintainable. Knock wood.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
114 lines
2.5 KiB
ArmAsm
114 lines
2.5 KiB
ArmAsm
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
|
|
* Copyright 2002 Andi Kleen, SuSE Labs.
|
|
*
|
|
* Functions to copy from and to user space.
|
|
*/
|
|
|
|
#include <linux/linkage.h>
|
|
#include <asm/asm.h>
|
|
#include <asm/export.h>
|
|
|
|
/*
|
|
* rep_movs_alternative - memory copy with exception handling.
|
|
* This version is for CPUs that don't have FSRM (Fast Short Rep Movs)
|
|
*
|
|
* Input:
|
|
* rdi destination
|
|
* rsi source
|
|
* rcx count
|
|
*
|
|
* Output:
|
|
* rcx uncopied bytes or 0 if successful.
|
|
*
|
|
* NOTE! The calling convention is very intentionally the same as
|
|
* for 'rep movs', so that we can rewrite the function call with
|
|
* just a plain 'rep movs' on machines that have FSRM. But to make
|
|
* it simpler for us, we can clobber rsi/rdi and rax/r8-r11 freely.
|
|
*/
|
|
SYM_FUNC_START(rep_movs_alternative)
|
|
cmpq $64,%rcx
|
|
jae .Lunrolled
|
|
|
|
cmp $8,%ecx
|
|
jae .Lword
|
|
|
|
testl %ecx,%ecx
|
|
je .Lexit
|
|
|
|
.Lcopy_user_tail:
|
|
0: movb (%rsi),%al
|
|
1: movb %al,(%rdi)
|
|
inc %rdi
|
|
inc %rsi
|
|
dec %rcx
|
|
jne .Lcopy_user_tail
|
|
.Lexit:
|
|
RET
|
|
|
|
_ASM_EXTABLE_UA( 0b, .Lexit)
|
|
_ASM_EXTABLE_UA( 1b, .Lexit)
|
|
|
|
.p2align 4
|
|
.Lword:
|
|
2: movq (%rsi),%rax
|
|
3: movq %rax,(%rdi)
|
|
addq $8,%rsi
|
|
addq $8,%rdi
|
|
sub $8,%ecx
|
|
je .Lexit
|
|
cmp $8,%ecx
|
|
jae .Lword
|
|
jmp .Lcopy_user_tail
|
|
|
|
_ASM_EXTABLE_UA( 2b, .Lcopy_user_tail)
|
|
_ASM_EXTABLE_UA( 3b, .Lcopy_user_tail)
|
|
|
|
.p2align 4
|
|
.Lunrolled:
|
|
10: movq (%rsi),%r8
|
|
11: movq 8(%rsi),%r9
|
|
12: movq 16(%rsi),%r10
|
|
13: movq 24(%rsi),%r11
|
|
14: movq %r8,(%rdi)
|
|
15: movq %r9,8(%rdi)
|
|
16: movq %r10,16(%rdi)
|
|
17: movq %r11,24(%rdi)
|
|
20: movq 32(%rsi),%r8
|
|
21: movq 40(%rsi),%r9
|
|
22: movq 48(%rsi),%r10
|
|
23: movq 56(%rsi),%r11
|
|
24: movq %r8,32(%rdi)
|
|
25: movq %r9,40(%rdi)
|
|
26: movq %r10,48(%rdi)
|
|
27: movq %r11,56(%rdi)
|
|
addq $64,%rsi
|
|
addq $64,%rdi
|
|
subq $64,%rcx
|
|
cmpq $64,%rcx
|
|
jae .Lunrolled
|
|
cmpl $8,%ecx
|
|
jae .Lword
|
|
testl %ecx,%ecx
|
|
jne .Lcopy_user_tail
|
|
RET
|
|
|
|
_ASM_EXTABLE_UA(10b, .Lcopy_user_tail)
|
|
_ASM_EXTABLE_UA(11b, .Lcopy_user_tail)
|
|
_ASM_EXTABLE_UA(12b, .Lcopy_user_tail)
|
|
_ASM_EXTABLE_UA(13b, .Lcopy_user_tail)
|
|
_ASM_EXTABLE_UA(14b, .Lcopy_user_tail)
|
|
_ASM_EXTABLE_UA(15b, .Lcopy_user_tail)
|
|
_ASM_EXTABLE_UA(16b, .Lcopy_user_tail)
|
|
_ASM_EXTABLE_UA(17b, .Lcopy_user_tail)
|
|
_ASM_EXTABLE_UA(20b, .Lcopy_user_tail)
|
|
_ASM_EXTABLE_UA(21b, .Lcopy_user_tail)
|
|
_ASM_EXTABLE_UA(22b, .Lcopy_user_tail)
|
|
_ASM_EXTABLE_UA(23b, .Lcopy_user_tail)
|
|
_ASM_EXTABLE_UA(24b, .Lcopy_user_tail)
|
|
_ASM_EXTABLE_UA(25b, .Lcopy_user_tail)
|
|
_ASM_EXTABLE_UA(26b, .Lcopy_user_tail)
|
|
_ASM_EXTABLE_UA(27b, .Lcopy_user_tail)
|
|
SYM_FUNC_END(rep_movs_alternative)
|
|
EXPORT_SYMBOL(rep_movs_alternative)
|