diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 6fbd19564e4e..9e9a70e50c72 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -552,6 +552,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) int cpu = smp_processor_id(); struct tss_struct *tss = &per_cpu(init_tss, cpu); + /* we're going to use this soon, after a few expensive things */ + if (next_p->fpu_counter>5) + prefetch(&next->i387.fxsave); + /* * Reload esp0, LDT and the page table pointer: */ @@ -629,6 +633,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) __switch_to_xtra(prev_p, next_p, tss); + /* If the task has used fpu the last 5 timeslices, just do a full + * restore of the math state immediately to avoid the trap; the + * chances of needing FPU soon are obviously high now + */ + if (next_p->fpu_counter>5) + math_state_restore(); return prev_p; } diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 28e53342f294..ffc40cff1e07 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -1136,6 +1136,7 @@ asmlinkage void math_state_restore(void) init_fpu(me); restore_fpu_checking(&me->thread.i387.fxsave); task_thread_info(me)->status |= TS_USEDFPU; + me->fpu_counter++; } void __init trap_init(void) diff --git a/include/asm-x86_64/i387.h b/include/asm-x86_64/i387.h index cba8a3b0cded..60c0f4853fdb 100644 --- a/include/asm-x86_64/i387.h +++ b/include/asm-x86_64/i387.h @@ -24,6 +24,7 @@ extern unsigned int mxcsr_feature_mask; extern void mxcsr_feature_mask_init(void); extern void init_fpu(struct task_struct *child); extern int save_i387(struct _fpstate __user *buf); +extern asmlinkage void math_state_restore(void); /* * FPU lazy state save handling... @@ -31,7 +32,9 @@ extern int save_i387(struct _fpstate __user *buf); #define unlazy_fpu(tsk) do { \ if (task_thread_info(tsk)->status & TS_USEDFPU) \ - save_init_fpu(tsk); \ + save_init_fpu(tsk); \ + else \ + tsk->fpu_counter = 0; \ } while (0) /* Ignore delayed exceptions from user space */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 34ed0d99b1bd..807556c5bcd2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -865,6 +865,15 @@ struct task_struct { struct key *thread_keyring; /* keyring private to this thread */ unsigned char jit_keyring; /* default keyring to attach requested keys to */ #endif + /* + * fpu_counter contains the number of consecutive context switches + * that the FPU is used. If this is over a threshold, the lazy fpu + * saving becomes unlazy to save the trap. This is an unsigned char + * so that after 256 times the counter wraps and the behavior turns + * lazy again; this to deal with bursty apps that only use FPU for + * a short time + */ + unsigned char fpu_counter; int oomkilladj; /* OOM kill score adjustment (bit shift). */ char comm[TASK_COMM_LEN]; /* executable name excluding path - access with [gs]et_task_comm (which lock