linux-stable/arch/arc/kernel/fpu.c
Vineet Gupta 3a715e8040 ARC: fp: set FPU_STATUS.FWE to enable FPU_STATUS update on context switch
FPU_STATUS register contains FP exception flags bits which are updated
by core as side-effect of FP instructions but can also be manually
wiggled such as by glibc C99 functions fe{raise,clear,test}except() etc.
To effect the update, the programming model requires OR'ing FWE
bit (31). This bit is write-only and RAZ, meaning it is effectively
auto-cleared after write and thus needs to be set everytime: which
is how glibc implements this.

However there's another usecase of FPU_STATUS update, at the time of
Linux task switch when incoming task value needs to be programmed into
the register. This was added as part of f45ba2bd6d ("ARCv2:
fpu: preserve userspace fpu state") which missed OR'ing FWE bit,
meaning the new value is effectively not being written at all.
This patch remedies that.

Interestingly, this snafu was not caught in interm glibc testing as the
race window which relies on a specific exception bit to be set/clear is
really small specially when it nvolves context switch.
Fortunately this was caught by glibc's math/test-fenv-tls test which
repeatedly set/clear exception flags in a big loop, concurrently in main
program and also in a thread.

Fixes: https://github.com/foss-for-synopsys-dwc-arc-processors/linux/issues/54
Fixes: f45ba2bd6d ("ARCv2: fpu: preserve userspace fpu state")
Cc: stable@vger.kernel.org	#5.6+
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
2021-08-03 18:58:33 -07:00

82 lines
2.4 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* fpu.c - save/restore of Floating Point Unit Registers on task switch
*
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
*/
#include <linux/sched.h>
#include <asm/fpu.h>
#ifdef CONFIG_ISA_ARCOMPACT
/*
* To save/restore FPU regs, simplest scheme would use LR/SR insns.
* However since SR serializes the pipeline, an alternate "hack" can be used
* which uses the FPU Exchange insn (DEXCL) to r/w FPU regs.
*
* Store to 64bit dpfp1 reg from a pair of core regs:
* dexcl1 0, r1, r0 ; where r1:r0 is the 64 bit val
*
* Read from dpfp1 into pair of core regs (w/o clobbering dpfp1)
* mov_s r3, 0
* daddh11 r1, r3, r3 ; get "hi" into r1 (dpfp1 unchanged)
* dexcl1 r0, r1, r3 ; get "low" into r0 (dpfp1 low clobbered)
* dexcl1 0, r1, r0 ; restore dpfp1 to orig value
*
* However we can tweak the read, so that read-out of outgoing task's FPU regs
* and write of incoming task's regs happen in one shot. So all the work is
* done before context switch
*/
void fpu_save_restore(struct task_struct *prev, struct task_struct *next)
{
unsigned int *saveto = &prev->thread.fpu.aux_dpfp[0].l;
unsigned int *readfrom = &next->thread.fpu.aux_dpfp[0].l;
const unsigned int zero = 0;
__asm__ __volatile__(
"daddh11 %0, %2, %2\n"
"dexcl1 %1, %3, %4\n"
: "=&r" (*(saveto + 1)), /* early clobber must here */
"=&r" (*(saveto))
: "r" (zero), "r" (*(readfrom + 1)), "r" (*(readfrom))
);
__asm__ __volatile__(
"daddh22 %0, %2, %2\n"
"dexcl2 %1, %3, %4\n"
: "=&r"(*(saveto + 3)), /* early clobber must here */
"=&r"(*(saveto + 2))
: "r" (zero), "r" (*(readfrom + 3)), "r" (*(readfrom + 2))
);
}
#else
void fpu_init_task(struct pt_regs *regs)
{
const unsigned int fwe = 0x80000000;
/* default rounding mode */
write_aux_reg(ARC_REG_FPU_CTRL, 0x100);
/* Initialize to zero: setting requires FWE be set */
write_aux_reg(ARC_REG_FPU_STATUS, fwe);
}
void fpu_save_restore(struct task_struct *prev, struct task_struct *next)
{
struct arc_fpu *save = &prev->thread.fpu;
struct arc_fpu *restore = &next->thread.fpu;
const unsigned int fwe = 0x80000000;
save->ctrl = read_aux_reg(ARC_REG_FPU_CTRL);
save->status = read_aux_reg(ARC_REG_FPU_STATUS);
write_aux_reg(ARC_REG_FPU_CTRL, restore->ctrl);
write_aux_reg(ARC_REG_FPU_STATUS, (fwe | restore->status));
}
#endif