linux-stable/arch/x86/lib/copy_user_64.S
Linus Torvalds 034ff37d34 x86: rewrite '__copy_user_nocache' function
I didn't really want to do this, but as part of all the other changes to
the user copy loops, I've been looking at this horror.

I tried to clean it up multiple times, but every time I just found more
problems, and the way it's written, it's just too hard to fix them.

For example, the code is written to do quad-word alignment, and will use
regular byte accesses to get to that point.  That's fairly simple, but
it means that any initial 8-byte alignment will be done with cached
copies.

However, the code then is very careful to do any 4-byte _tail_ accesses
using an uncached 4-byte write, and that was claimed to be relevant in
commit a82eee7424 ("x86/uaccess/64: Handle the caching of 4-byte
nocache copies properly in __copy_user_nocache()").

So if you do a 4-byte copy using that function, it carefully uses a
4-byte 'movnti' for the destination.  But if you were to do a 12-byte
copy that is 4-byte aligned, it would _not_ do a 4-byte 'movnti'
followed by a 8-byte 'movnti' to keep it all uncached.

Instead, it would align the destination to 8 bytes using a
byte-at-a-time loop, and then do a 8-byte 'movnti' for the final 8
bytes.

The main caller that cares is __copy_user_flushcache(), which knows
about this insanity, and has odd cases for it all.  But I just can't
deal with looking at this kind of "it does one case right, and another
related case entirely wrong".

And the code really wasn't fixable without hard drugs, which I try to
avoid.

So instead, rewrite it in a form that hopefully not only gets this
right, but is a bit more maintainable.  Knock wood.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2023-04-20 18:53:49 -07:00

114 lines
2.5 KiB
ArmAsm

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
* Copyright 2002 Andi Kleen, SuSE Labs.
*
* Functions to copy from and to user space.
*/
#include <linux/linkage.h>
#include <asm/asm.h>
#include <asm/export.h>
/*
* rep_movs_alternative - memory copy with exception handling.
* This version is for CPUs that don't have FSRM (Fast Short Rep Movs)
*
* Input:
* rdi destination
* rsi source
* rcx count
*
* Output:
* rcx uncopied bytes or 0 if successful.
*
* NOTE! The calling convention is very intentionally the same as
* for 'rep movs', so that we can rewrite the function call with
* just a plain 'rep movs' on machines that have FSRM. But to make
* it simpler for us, we can clobber rsi/rdi and rax/r8-r11 freely.
*/
SYM_FUNC_START(rep_movs_alternative)
cmpq $64,%rcx
jae .Lunrolled
cmp $8,%ecx
jae .Lword
testl %ecx,%ecx
je .Lexit
.Lcopy_user_tail:
0: movb (%rsi),%al
1: movb %al,(%rdi)
inc %rdi
inc %rsi
dec %rcx
jne .Lcopy_user_tail
.Lexit:
RET
_ASM_EXTABLE_UA( 0b, .Lexit)
_ASM_EXTABLE_UA( 1b, .Lexit)
.p2align 4
.Lword:
2: movq (%rsi),%rax
3: movq %rax,(%rdi)
addq $8,%rsi
addq $8,%rdi
sub $8,%ecx
je .Lexit
cmp $8,%ecx
jae .Lword
jmp .Lcopy_user_tail
_ASM_EXTABLE_UA( 2b, .Lcopy_user_tail)
_ASM_EXTABLE_UA( 3b, .Lcopy_user_tail)
.p2align 4
.Lunrolled:
10: movq (%rsi),%r8
11: movq 8(%rsi),%r9
12: movq 16(%rsi),%r10
13: movq 24(%rsi),%r11
14: movq %r8,(%rdi)
15: movq %r9,8(%rdi)
16: movq %r10,16(%rdi)
17: movq %r11,24(%rdi)
20: movq 32(%rsi),%r8
21: movq 40(%rsi),%r9
22: movq 48(%rsi),%r10
23: movq 56(%rsi),%r11
24: movq %r8,32(%rdi)
25: movq %r9,40(%rdi)
26: movq %r10,48(%rdi)
27: movq %r11,56(%rdi)
addq $64,%rsi
addq $64,%rdi
subq $64,%rcx
cmpq $64,%rcx
jae .Lunrolled
cmpl $8,%ecx
jae .Lword
testl %ecx,%ecx
jne .Lcopy_user_tail
RET
_ASM_EXTABLE_UA(10b, .Lcopy_user_tail)
_ASM_EXTABLE_UA(11b, .Lcopy_user_tail)
_ASM_EXTABLE_UA(12b, .Lcopy_user_tail)
_ASM_EXTABLE_UA(13b, .Lcopy_user_tail)
_ASM_EXTABLE_UA(14b, .Lcopy_user_tail)
_ASM_EXTABLE_UA(15b, .Lcopy_user_tail)
_ASM_EXTABLE_UA(16b, .Lcopy_user_tail)
_ASM_EXTABLE_UA(17b, .Lcopy_user_tail)
_ASM_EXTABLE_UA(20b, .Lcopy_user_tail)
_ASM_EXTABLE_UA(21b, .Lcopy_user_tail)
_ASM_EXTABLE_UA(22b, .Lcopy_user_tail)
_ASM_EXTABLE_UA(23b, .Lcopy_user_tail)
_ASM_EXTABLE_UA(24b, .Lcopy_user_tail)
_ASM_EXTABLE_UA(25b, .Lcopy_user_tail)
_ASM_EXTABLE_UA(26b, .Lcopy_user_tail)
_ASM_EXTABLE_UA(27b, .Lcopy_user_tail)
SYM_FUNC_END(rep_movs_alternative)
EXPORT_SYMBOL(rep_movs_alternative)