mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-09-13 22:25:03 +00:00
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86/asm changes from Ingo Molnar * 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86: Include probe_roms.h in probe_roms.c x86/32: Print control and debug registers for kerenel context x86: Tighten dependencies of CPU_SUP_*_32 x86/numa: Improve internode cache alignment x86: Fix the NMI nesting comments x86-64: Improve insn scheduling in SAVE_ARGS_IRQ x86-64: Fix CFI annotations for NMI nesting code bitops: Add missing parentheses to new get_order macro bitops: Optimise get_order() bitops: Adjust the comment on get_order() to describe the size==0 case x86/spinlocks: Eliminate TICKET_MASK x86-64: Handle byte-wise tail copying in memcpy() without a loop x86-64: Fix memcpy() to support sizes of 4Gb and above x86-64: Fix memset() to support sizes of 4Gb and above x86-64: Slightly shorten copy_page()
This commit is contained in:
commit
e17fdf5c67
10 changed files with 128 additions and 98 deletions
|
@ -303,7 +303,6 @@ config X86_GENERIC
|
||||||
config X86_INTERNODE_CACHE_SHIFT
|
config X86_INTERNODE_CACHE_SHIFT
|
||||||
int
|
int
|
||||||
default "12" if X86_VSMP
|
default "12" if X86_VSMP
|
||||||
default "7" if NUMA
|
|
||||||
default X86_L1_CACHE_SHIFT
|
default X86_L1_CACHE_SHIFT
|
||||||
|
|
||||||
config X86_CMPXCHG
|
config X86_CMPXCHG
|
||||||
|
@ -441,7 +440,7 @@ config CPU_SUP_INTEL
|
||||||
config CPU_SUP_CYRIX_32
|
config CPU_SUP_CYRIX_32
|
||||||
default y
|
default y
|
||||||
bool "Support Cyrix processors" if PROCESSOR_SELECT
|
bool "Support Cyrix processors" if PROCESSOR_SELECT
|
||||||
depends on !64BIT
|
depends on M386 || M486 || M586 || M586TSC || M586MMX || (EXPERT && !64BIT)
|
||||||
---help---
|
---help---
|
||||||
This enables detection, tunings and quirks for Cyrix processors
|
This enables detection, tunings and quirks for Cyrix processors
|
||||||
|
|
||||||
|
@ -495,7 +494,7 @@ config CPU_SUP_TRANSMETA_32
|
||||||
config CPU_SUP_UMC_32
|
config CPU_SUP_UMC_32
|
||||||
default y
|
default y
|
||||||
bool "Support UMC processors" if PROCESSOR_SELECT
|
bool "Support UMC processors" if PROCESSOR_SELECT
|
||||||
depends on !64BIT
|
depends on M386 || M486 || (EXPERT && !64BIT)
|
||||||
---help---
|
---help---
|
||||||
This enables detection, tunings and quirks for UMC processors
|
This enables detection, tunings and quirks for UMC processors
|
||||||
|
|
||||||
|
|
|
@ -88,14 +88,14 @@ static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
|
||||||
{
|
{
|
||||||
struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
|
struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
|
||||||
|
|
||||||
return !!(tmp.tail ^ tmp.head);
|
return tmp.tail != tmp.head;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
|
static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
|
||||||
{
|
{
|
||||||
struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
|
struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
|
||||||
|
|
||||||
return ((tmp.tail - tmp.head) & TICKET_MASK) > 1;
|
return (__ticket_t)(tmp.tail - tmp.head) > 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef CONFIG_PARAVIRT_SPINLOCKS
|
#ifndef CONFIG_PARAVIRT_SPINLOCKS
|
||||||
|
|
|
@ -16,7 +16,6 @@ typedef u32 __ticketpair_t;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define TICKET_SHIFT (sizeof(__ticket_t) * 8)
|
#define TICKET_SHIFT (sizeof(__ticket_t) * 8)
|
||||||
#define TICKET_MASK ((__ticket_t)((1 << TICKET_SHIFT) - 1))
|
|
||||||
|
|
||||||
typedef struct arch_spinlock {
|
typedef struct arch_spinlock {
|
||||||
union {
|
union {
|
||||||
|
|
|
@ -87,7 +87,7 @@ void show_registers(struct pt_regs *regs)
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
print_modules();
|
print_modules();
|
||||||
__show_regs(regs, 0);
|
__show_regs(regs, !user_mode_vm(regs));
|
||||||
|
|
||||||
printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n",
|
printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n",
|
||||||
TASK_COMM_LEN, current->comm, task_pid_nr(current),
|
TASK_COMM_LEN, current->comm, task_pid_nr(current),
|
||||||
|
|
|
@ -320,7 +320,7 @@ ENDPROC(native_usergs_sysret64)
|
||||||
movq %rsp, %rsi
|
movq %rsp, %rsi
|
||||||
|
|
||||||
leaq -RBP(%rsp),%rdi /* arg1 for handler */
|
leaq -RBP(%rsp),%rdi /* arg1 for handler */
|
||||||
testl $3, CS(%rdi)
|
testl $3, CS-RBP(%rsi)
|
||||||
je 1f
|
je 1f
|
||||||
SWAPGS
|
SWAPGS
|
||||||
/*
|
/*
|
||||||
|
@ -330,11 +330,10 @@ ENDPROC(native_usergs_sysret64)
|
||||||
* moving irq_enter into assembly, which would be too much work)
|
* moving irq_enter into assembly, which would be too much work)
|
||||||
*/
|
*/
|
||||||
1: incl PER_CPU_VAR(irq_count)
|
1: incl PER_CPU_VAR(irq_count)
|
||||||
jne 2f
|
cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
|
||||||
mov PER_CPU_VAR(irq_stack_ptr),%rsp
|
|
||||||
CFI_DEF_CFA_REGISTER rsi
|
CFI_DEF_CFA_REGISTER rsi
|
||||||
|
|
||||||
2: /* Store previous stack value */
|
/* Store previous stack value */
|
||||||
pushq %rsi
|
pushq %rsi
|
||||||
CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \
|
CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \
|
||||||
0x77 /* DW_OP_breg7 */, 0, \
|
0x77 /* DW_OP_breg7 */, 0, \
|
||||||
|
@ -1530,6 +1529,7 @@ ENTRY(nmi)
|
||||||
|
|
||||||
/* Use %rdx as out temp variable throughout */
|
/* Use %rdx as out temp variable throughout */
|
||||||
pushq_cfi %rdx
|
pushq_cfi %rdx
|
||||||
|
CFI_REL_OFFSET rdx, 0
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If %cs was not the kernel segment, then the NMI triggered in user
|
* If %cs was not the kernel segment, then the NMI triggered in user
|
||||||
|
@ -1554,6 +1554,7 @@ ENTRY(nmi)
|
||||||
*/
|
*/
|
||||||
lea 6*8(%rsp), %rdx
|
lea 6*8(%rsp), %rdx
|
||||||
test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
|
test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
|
||||||
|
CFI_REMEMBER_STATE
|
||||||
|
|
||||||
nested_nmi:
|
nested_nmi:
|
||||||
/*
|
/*
|
||||||
|
@ -1585,10 +1586,12 @@ nested_nmi:
|
||||||
|
|
||||||
nested_nmi_out:
|
nested_nmi_out:
|
||||||
popq_cfi %rdx
|
popq_cfi %rdx
|
||||||
|
CFI_RESTORE rdx
|
||||||
|
|
||||||
/* No need to check faults here */
|
/* No need to check faults here */
|
||||||
INTERRUPT_RETURN
|
INTERRUPT_RETURN
|
||||||
|
|
||||||
|
CFI_RESTORE_STATE
|
||||||
first_nmi:
|
first_nmi:
|
||||||
/*
|
/*
|
||||||
* Because nested NMIs will use the pushed location that we
|
* Because nested NMIs will use the pushed location that we
|
||||||
|
@ -1620,10 +1623,15 @@ first_nmi:
|
||||||
* | pt_regs |
|
* | pt_regs |
|
||||||
* +-------------------------+
|
* +-------------------------+
|
||||||
*
|
*
|
||||||
* The saved RIP is used to fix up the copied RIP that a nested
|
* The saved stack frame is used to fix up the copied stack frame
|
||||||
* NMI may zero out. The original stack frame and the temp storage
|
* that a nested NMI may change to make the interrupted NMI iret jump
|
||||||
|
* to the repeat_nmi. The original stack frame and the temp storage
|
||||||
* is also used by nested NMIs and can not be trusted on exit.
|
* is also used by nested NMIs and can not be trusted on exit.
|
||||||
*/
|
*/
|
||||||
|
/* Do not pop rdx, nested NMIs will corrupt that part of the stack */
|
||||||
|
movq (%rsp), %rdx
|
||||||
|
CFI_RESTORE rdx
|
||||||
|
|
||||||
/* Set the NMI executing variable on the stack. */
|
/* Set the NMI executing variable on the stack. */
|
||||||
pushq_cfi $1
|
pushq_cfi $1
|
||||||
|
|
||||||
|
@ -1631,22 +1639,39 @@ first_nmi:
|
||||||
.rept 5
|
.rept 5
|
||||||
pushq_cfi 6*8(%rsp)
|
pushq_cfi 6*8(%rsp)
|
||||||
.endr
|
.endr
|
||||||
|
CFI_DEF_CFA_OFFSET SS+8-RIP
|
||||||
|
|
||||||
|
/* Everything up to here is safe from nested NMIs */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If there was a nested NMI, the first NMI's iret will return
|
||||||
|
* here. But NMIs are still enabled and we can take another
|
||||||
|
* nested NMI. The nested NMI checks the interrupted RIP to see
|
||||||
|
* if it is between repeat_nmi and end_repeat_nmi, and if so
|
||||||
|
* it will just return, as we are about to repeat an NMI anyway.
|
||||||
|
* This makes it safe to copy to the stack frame that a nested
|
||||||
|
* NMI will update.
|
||||||
|
*/
|
||||||
|
repeat_nmi:
|
||||||
|
/*
|
||||||
|
* Update the stack variable to say we are still in NMI (the update
|
||||||
|
* is benign for the non-repeat case, where 1 was pushed just above
|
||||||
|
* to this very stack slot).
|
||||||
|
*/
|
||||||
|
movq $1, 5*8(%rsp)
|
||||||
|
|
||||||
/* Make another copy, this one may be modified by nested NMIs */
|
/* Make another copy, this one may be modified by nested NMIs */
|
||||||
.rept 5
|
.rept 5
|
||||||
pushq_cfi 4*8(%rsp)
|
pushq_cfi 4*8(%rsp)
|
||||||
.endr
|
.endr
|
||||||
|
CFI_DEF_CFA_OFFSET SS+8-RIP
|
||||||
/* Do not pop rdx, nested NMIs will corrupt it */
|
end_repeat_nmi:
|
||||||
movq 11*8(%rsp), %rdx
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Everything below this point can be preempted by a nested
|
* Everything below this point can be preempted by a nested
|
||||||
* NMI if the first NMI took an exception. Repeated NMIs
|
* NMI if the first NMI took an exception and reset our iret stack
|
||||||
* caused by an exception and nested NMI will start here, and
|
* so that we repeat another NMI.
|
||||||
* can still be preempted by another NMI.
|
|
||||||
*/
|
*/
|
||||||
restart_nmi:
|
|
||||||
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
|
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
|
||||||
subq $ORIG_RAX-R15, %rsp
|
subq $ORIG_RAX-R15, %rsp
|
||||||
CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
|
CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
|
||||||
|
@ -1675,26 +1700,6 @@ nmi_restore:
|
||||||
CFI_ENDPROC
|
CFI_ENDPROC
|
||||||
END(nmi)
|
END(nmi)
|
||||||
|
|
||||||
/*
|
|
||||||
* If an NMI hit an iret because of an exception or breakpoint,
|
|
||||||
* it can lose its NMI context, and a nested NMI may come in.
|
|
||||||
* In that case, the nested NMI will change the preempted NMI's
|
|
||||||
* stack to jump to here when it does the final iret.
|
|
||||||
*/
|
|
||||||
repeat_nmi:
|
|
||||||
INTR_FRAME
|
|
||||||
/* Update the stack variable to say we are still in NMI */
|
|
||||||
movq $1, 5*8(%rsp)
|
|
||||||
|
|
||||||
/* copy the saved stack back to copy stack */
|
|
||||||
.rept 5
|
|
||||||
pushq_cfi 4*8(%rsp)
|
|
||||||
.endr
|
|
||||||
|
|
||||||
jmp restart_nmi
|
|
||||||
CFI_ENDPROC
|
|
||||||
end_repeat_nmi:
|
|
||||||
|
|
||||||
ENTRY(ignore_sysret)
|
ENTRY(ignore_sysret)
|
||||||
CFI_STARTPROC
|
CFI_STARTPROC
|
||||||
mov $-ENOSYS,%eax
|
mov $-ENOSYS,%eax
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
#include <linux/pci.h>
|
#include <linux/pci.h>
|
||||||
#include <linux/export.h>
|
#include <linux/export.h>
|
||||||
|
|
||||||
|
#include <asm/probe_roms.h>
|
||||||
#include <asm/pci-direct.h>
|
#include <asm/pci-direct.h>
|
||||||
#include <asm/e820.h>
|
#include <asm/e820.h>
|
||||||
#include <asm/mmzone.h>
|
#include <asm/mmzone.h>
|
||||||
|
|
|
@ -20,14 +20,12 @@ ENDPROC(copy_page_c)
|
||||||
|
|
||||||
ENTRY(copy_page)
|
ENTRY(copy_page)
|
||||||
CFI_STARTPROC
|
CFI_STARTPROC
|
||||||
subq $3*8,%rsp
|
subq $2*8,%rsp
|
||||||
CFI_ADJUST_CFA_OFFSET 3*8
|
CFI_ADJUST_CFA_OFFSET 2*8
|
||||||
movq %rbx,(%rsp)
|
movq %rbx,(%rsp)
|
||||||
CFI_REL_OFFSET rbx, 0
|
CFI_REL_OFFSET rbx, 0
|
||||||
movq %r12,1*8(%rsp)
|
movq %r12,1*8(%rsp)
|
||||||
CFI_REL_OFFSET r12, 1*8
|
CFI_REL_OFFSET r12, 1*8
|
||||||
movq %r13,2*8(%rsp)
|
|
||||||
CFI_REL_OFFSET r13, 2*8
|
|
||||||
|
|
||||||
movl $(4096/64)-5,%ecx
|
movl $(4096/64)-5,%ecx
|
||||||
.p2align 4
|
.p2align 4
|
||||||
|
@ -91,10 +89,8 @@ ENTRY(copy_page)
|
||||||
CFI_RESTORE rbx
|
CFI_RESTORE rbx
|
||||||
movq 1*8(%rsp),%r12
|
movq 1*8(%rsp),%r12
|
||||||
CFI_RESTORE r12
|
CFI_RESTORE r12
|
||||||
movq 2*8(%rsp),%r13
|
addq $2*8,%rsp
|
||||||
CFI_RESTORE r13
|
CFI_ADJUST_CFA_OFFSET -2*8
|
||||||
addq $3*8,%rsp
|
|
||||||
CFI_ADJUST_CFA_OFFSET -3*8
|
|
||||||
ret
|
ret
|
||||||
.Lcopy_page_end:
|
.Lcopy_page_end:
|
||||||
CFI_ENDPROC
|
CFI_ENDPROC
|
||||||
|
|
|
@ -27,9 +27,8 @@
|
||||||
.section .altinstr_replacement, "ax", @progbits
|
.section .altinstr_replacement, "ax", @progbits
|
||||||
.Lmemcpy_c:
|
.Lmemcpy_c:
|
||||||
movq %rdi, %rax
|
movq %rdi, %rax
|
||||||
|
movq %rdx, %rcx
|
||||||
movl %edx, %ecx
|
shrq $3, %rcx
|
||||||
shrl $3, %ecx
|
|
||||||
andl $7, %edx
|
andl $7, %edx
|
||||||
rep movsq
|
rep movsq
|
||||||
movl %edx, %ecx
|
movl %edx, %ecx
|
||||||
|
@ -48,8 +47,7 @@
|
||||||
.section .altinstr_replacement, "ax", @progbits
|
.section .altinstr_replacement, "ax", @progbits
|
||||||
.Lmemcpy_c_e:
|
.Lmemcpy_c_e:
|
||||||
movq %rdi, %rax
|
movq %rdi, %rax
|
||||||
|
movq %rdx, %rcx
|
||||||
movl %edx, %ecx
|
|
||||||
rep movsb
|
rep movsb
|
||||||
ret
|
ret
|
||||||
.Lmemcpy_e_e:
|
.Lmemcpy_e_e:
|
||||||
|
@ -60,10 +58,7 @@ ENTRY(memcpy)
|
||||||
CFI_STARTPROC
|
CFI_STARTPROC
|
||||||
movq %rdi, %rax
|
movq %rdi, %rax
|
||||||
|
|
||||||
/*
|
cmpq $0x20, %rdx
|
||||||
* Use 32bit CMP here to avoid long NOP padding.
|
|
||||||
*/
|
|
||||||
cmp $0x20, %edx
|
|
||||||
jb .Lhandle_tail
|
jb .Lhandle_tail
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -72,7 +67,7 @@ ENTRY(memcpy)
|
||||||
*/
|
*/
|
||||||
cmp %dil, %sil
|
cmp %dil, %sil
|
||||||
jl .Lcopy_backward
|
jl .Lcopy_backward
|
||||||
subl $0x20, %edx
|
subq $0x20, %rdx
|
||||||
.Lcopy_forward_loop:
|
.Lcopy_forward_loop:
|
||||||
subq $0x20, %rdx
|
subq $0x20, %rdx
|
||||||
|
|
||||||
|
@ -91,7 +86,7 @@ ENTRY(memcpy)
|
||||||
movq %r11, 3*8(%rdi)
|
movq %r11, 3*8(%rdi)
|
||||||
leaq 4*8(%rdi), %rdi
|
leaq 4*8(%rdi), %rdi
|
||||||
jae .Lcopy_forward_loop
|
jae .Lcopy_forward_loop
|
||||||
addq $0x20, %rdx
|
addl $0x20, %edx
|
||||||
jmp .Lhandle_tail
|
jmp .Lhandle_tail
|
||||||
|
|
||||||
.Lcopy_backward:
|
.Lcopy_backward:
|
||||||
|
@ -123,11 +118,11 @@ ENTRY(memcpy)
|
||||||
/*
|
/*
|
||||||
* Calculate copy position to head.
|
* Calculate copy position to head.
|
||||||
*/
|
*/
|
||||||
addq $0x20, %rdx
|
addl $0x20, %edx
|
||||||
subq %rdx, %rsi
|
subq %rdx, %rsi
|
||||||
subq %rdx, %rdi
|
subq %rdx, %rdi
|
||||||
.Lhandle_tail:
|
.Lhandle_tail:
|
||||||
cmpq $16, %rdx
|
cmpl $16, %edx
|
||||||
jb .Lless_16bytes
|
jb .Lless_16bytes
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -144,7 +139,7 @@ ENTRY(memcpy)
|
||||||
retq
|
retq
|
||||||
.p2align 4
|
.p2align 4
|
||||||
.Lless_16bytes:
|
.Lless_16bytes:
|
||||||
cmpq $8, %rdx
|
cmpl $8, %edx
|
||||||
jb .Lless_8bytes
|
jb .Lless_8bytes
|
||||||
/*
|
/*
|
||||||
* Move data from 8 bytes to 15 bytes.
|
* Move data from 8 bytes to 15 bytes.
|
||||||
|
@ -156,7 +151,7 @@ ENTRY(memcpy)
|
||||||
retq
|
retq
|
||||||
.p2align 4
|
.p2align 4
|
||||||
.Lless_8bytes:
|
.Lless_8bytes:
|
||||||
cmpq $4, %rdx
|
cmpl $4, %edx
|
||||||
jb .Lless_3bytes
|
jb .Lless_3bytes
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -169,18 +164,19 @@ ENTRY(memcpy)
|
||||||
retq
|
retq
|
||||||
.p2align 4
|
.p2align 4
|
||||||
.Lless_3bytes:
|
.Lless_3bytes:
|
||||||
cmpl $0, %edx
|
subl $1, %edx
|
||||||
je .Lend
|
jb .Lend
|
||||||
/*
|
/*
|
||||||
* Move data from 1 bytes to 3 bytes.
|
* Move data from 1 bytes to 3 bytes.
|
||||||
*/
|
*/
|
||||||
.Lloop_1:
|
movzbl (%rsi), %ecx
|
||||||
movb (%rsi), %r8b
|
jz .Lstore_1byte
|
||||||
movb %r8b, (%rdi)
|
movzbq 1(%rsi), %r8
|
||||||
incq %rdi
|
movzbq (%rsi, %rdx), %r9
|
||||||
incq %rsi
|
movb %r8b, 1(%rdi)
|
||||||
decl %edx
|
movb %r9b, (%rdi, %rdx)
|
||||||
jnz .Lloop_1
|
.Lstore_1byte:
|
||||||
|
movb %cl, (%rdi)
|
||||||
|
|
||||||
.Lend:
|
.Lend:
|
||||||
retq
|
retq
|
||||||
|
|
|
@ -19,16 +19,15 @@
|
||||||
.section .altinstr_replacement, "ax", @progbits
|
.section .altinstr_replacement, "ax", @progbits
|
||||||
.Lmemset_c:
|
.Lmemset_c:
|
||||||
movq %rdi,%r9
|
movq %rdi,%r9
|
||||||
movl %edx,%r8d
|
movq %rdx,%rcx
|
||||||
andl $7,%r8d
|
andl $7,%edx
|
||||||
movl %edx,%ecx
|
shrq $3,%rcx
|
||||||
shrl $3,%ecx
|
|
||||||
/* expand byte value */
|
/* expand byte value */
|
||||||
movzbl %sil,%esi
|
movzbl %sil,%esi
|
||||||
movabs $0x0101010101010101,%rax
|
movabs $0x0101010101010101,%rax
|
||||||
mulq %rsi /* with rax, clobbers rdx */
|
imulq %rsi,%rax
|
||||||
rep stosq
|
rep stosq
|
||||||
movl %r8d,%ecx
|
movl %edx,%ecx
|
||||||
rep stosb
|
rep stosb
|
||||||
movq %r9,%rax
|
movq %r9,%rax
|
||||||
ret
|
ret
|
||||||
|
@ -50,7 +49,7 @@
|
||||||
.Lmemset_c_e:
|
.Lmemset_c_e:
|
||||||
movq %rdi,%r9
|
movq %rdi,%r9
|
||||||
movb %sil,%al
|
movb %sil,%al
|
||||||
movl %edx,%ecx
|
movq %rdx,%rcx
|
||||||
rep stosb
|
rep stosb
|
||||||
movq %r9,%rax
|
movq %r9,%rax
|
||||||
ret
|
ret
|
||||||
|
@ -61,12 +60,11 @@ ENTRY(memset)
|
||||||
ENTRY(__memset)
|
ENTRY(__memset)
|
||||||
CFI_STARTPROC
|
CFI_STARTPROC
|
||||||
movq %rdi,%r10
|
movq %rdi,%r10
|
||||||
movq %rdx,%r11
|
|
||||||
|
|
||||||
/* expand byte value */
|
/* expand byte value */
|
||||||
movzbl %sil,%ecx
|
movzbl %sil,%ecx
|
||||||
movabs $0x0101010101010101,%rax
|
movabs $0x0101010101010101,%rax
|
||||||
mul %rcx /* with rax, clobbers rdx */
|
imulq %rcx,%rax
|
||||||
|
|
||||||
/* align dst */
|
/* align dst */
|
||||||
movl %edi,%r9d
|
movl %edi,%r9d
|
||||||
|
@ -75,13 +73,13 @@ ENTRY(__memset)
|
||||||
CFI_REMEMBER_STATE
|
CFI_REMEMBER_STATE
|
||||||
.Lafter_bad_alignment:
|
.Lafter_bad_alignment:
|
||||||
|
|
||||||
movl %r11d,%ecx
|
movq %rdx,%rcx
|
||||||
shrl $6,%ecx
|
shrq $6,%rcx
|
||||||
jz .Lhandle_tail
|
jz .Lhandle_tail
|
||||||
|
|
||||||
.p2align 4
|
.p2align 4
|
||||||
.Lloop_64:
|
.Lloop_64:
|
||||||
decl %ecx
|
decq %rcx
|
||||||
movq %rax,(%rdi)
|
movq %rax,(%rdi)
|
||||||
movq %rax,8(%rdi)
|
movq %rax,8(%rdi)
|
||||||
movq %rax,16(%rdi)
|
movq %rax,16(%rdi)
|
||||||
|
@ -97,7 +95,7 @@ ENTRY(__memset)
|
||||||
to predict jump tables. */
|
to predict jump tables. */
|
||||||
.p2align 4
|
.p2align 4
|
||||||
.Lhandle_tail:
|
.Lhandle_tail:
|
||||||
movl %r11d,%ecx
|
movl %edx,%ecx
|
||||||
andl $63&(~7),%ecx
|
andl $63&(~7),%ecx
|
||||||
jz .Lhandle_7
|
jz .Lhandle_7
|
||||||
shrl $3,%ecx
|
shrl $3,%ecx
|
||||||
|
@ -109,12 +107,11 @@ ENTRY(__memset)
|
||||||
jnz .Lloop_8
|
jnz .Lloop_8
|
||||||
|
|
||||||
.Lhandle_7:
|
.Lhandle_7:
|
||||||
movl %r11d,%ecx
|
andl $7,%edx
|
||||||
andl $7,%ecx
|
|
||||||
jz .Lende
|
jz .Lende
|
||||||
.p2align 4
|
.p2align 4
|
||||||
.Lloop_1:
|
.Lloop_1:
|
||||||
decl %ecx
|
decl %edx
|
||||||
movb %al,(%rdi)
|
movb %al,(%rdi)
|
||||||
leaq 1(%rdi),%rdi
|
leaq 1(%rdi),%rdi
|
||||||
jnz .Lloop_1
|
jnz .Lloop_1
|
||||||
|
@ -125,13 +122,13 @@ ENTRY(__memset)
|
||||||
|
|
||||||
CFI_RESTORE_STATE
|
CFI_RESTORE_STATE
|
||||||
.Lbad_alignment:
|
.Lbad_alignment:
|
||||||
cmpq $7,%r11
|
cmpq $7,%rdx
|
||||||
jbe .Lhandle_7
|
jbe .Lhandle_7
|
||||||
movq %rax,(%rdi) /* unaligned store */
|
movq %rax,(%rdi) /* unaligned store */
|
||||||
movq $8,%r8
|
movq $8,%r8
|
||||||
subq %r9,%r8
|
subq %r9,%r8
|
||||||
addq %r8,%rdi
|
addq %r8,%rdi
|
||||||
subq %r8,%r11
|
subq %r8,%rdx
|
||||||
jmp .Lafter_bad_alignment
|
jmp .Lafter_bad_alignment
|
||||||
.Lfinal:
|
.Lfinal:
|
||||||
CFI_ENDPROC
|
CFI_ENDPROC
|
||||||
|
|
|
@ -4,21 +4,58 @@
|
||||||
#ifndef __ASSEMBLY__
|
#ifndef __ASSEMBLY__
|
||||||
|
|
||||||
#include <linux/compiler.h>
|
#include <linux/compiler.h>
|
||||||
|
#include <linux/log2.h>
|
||||||
|
|
||||||
/* Pure 2^n version of get_order */
|
/*
|
||||||
static inline __attribute_const__ int get_order(unsigned long size)
|
* Runtime evaluation of get_order()
|
||||||
|
*/
|
||||||
|
static inline __attribute_const__
|
||||||
|
int __get_order(unsigned long size)
|
||||||
{
|
{
|
||||||
int order;
|
int order;
|
||||||
|
|
||||||
size = (size - 1) >> (PAGE_SHIFT - 1);
|
size--;
|
||||||
order = -1;
|
size >>= PAGE_SHIFT;
|
||||||
do {
|
#if BITS_PER_LONG == 32
|
||||||
size >>= 1;
|
order = fls(size);
|
||||||
order++;
|
#else
|
||||||
} while (size);
|
order = fls64(size);
|
||||||
|
#endif
|
||||||
return order;
|
return order;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get_order - Determine the allocation order of a memory size
|
||||||
|
* @size: The size for which to get the order
|
||||||
|
*
|
||||||
|
* Determine the allocation order of a particular sized block of memory. This
|
||||||
|
* is on a logarithmic scale, where:
|
||||||
|
*
|
||||||
|
* 0 -> 2^0 * PAGE_SIZE and below
|
||||||
|
* 1 -> 2^1 * PAGE_SIZE to 2^0 * PAGE_SIZE + 1
|
||||||
|
* 2 -> 2^2 * PAGE_SIZE to 2^1 * PAGE_SIZE + 1
|
||||||
|
* 3 -> 2^3 * PAGE_SIZE to 2^2 * PAGE_SIZE + 1
|
||||||
|
* 4 -> 2^4 * PAGE_SIZE to 2^3 * PAGE_SIZE + 1
|
||||||
|
* ...
|
||||||
|
*
|
||||||
|
* The order returned is used to find the smallest allocation granule required
|
||||||
|
* to hold an object of the specified size.
|
||||||
|
*
|
||||||
|
* The result is undefined if the size is 0.
|
||||||
|
*
|
||||||
|
* This function may be used to initialise variables with compile time
|
||||||
|
* evaluations of constants.
|
||||||
|
*/
|
||||||
|
#define get_order(n) \
|
||||||
|
( \
|
||||||
|
__builtin_constant_p(n) ? ( \
|
||||||
|
((n) == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \
|
||||||
|
(((n) < (1UL << PAGE_SHIFT)) ? 0 : \
|
||||||
|
ilog2((n) - 1) - PAGE_SHIFT + 1) \
|
||||||
|
) : \
|
||||||
|
__get_order(n) \
|
||||||
|
)
|
||||||
|
|
||||||
#endif /* __ASSEMBLY__ */
|
#endif /* __ASSEMBLY__ */
|
||||||
|
|
||||||
#endif /* __ASM_GENERIC_GETORDER_H */
|
#endif /* __ASM_GENERIC_GETORDER_H */
|
||||||
|
|
Loading…
Reference in a new issue