x86/iopl: Restrict iopl() permission scope

The access to the full I/O port range can be also provided by the TSS I/O
bitmap, but that would require to copy 8k of data on scheduling in the
task. As shown with the sched out optimization TSS.io_bitmap_base can be
used to switch the incoming task to a preallocated I/O bitmap which has all
bits zero, i.e. allows access to all I/O ports.

Implementing this allows to provide an iopl() emulation mode which restricts
the IOPL level 3 permissions to I/O port access but removes the STI/CLI
permission which is coming with the hardware IOPL mechansim.

Provide a config option to switch IOPL to emulation mode, make it the
default and while at it also provide an option to disable IOPL completely.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Andy Lutomirski <luto@kernel.org>
This commit is contained in:
Thomas Gleixner 2019-11-11 23:03:28 +01:00
parent be9afb4b52
commit c8137ace56
6 changed files with 137 additions and 45 deletions

View file

@ -1254,6 +1254,38 @@ config X86_VSYSCALL_EMULATION
Disabling this option saves about 7K of kernel size and Disabling this option saves about 7K of kernel size and
possibly 4K of additional runtime pagetable memory. possibly 4K of additional runtime pagetable memory.
choice
prompt "IOPL"
default X86_IOPL_EMULATION
config X86_IOPL_EMULATION
bool "IOPL Emulation"
---help---
Legacy IOPL support is an overbroad mechanism which allows user
space aside of accessing all 65536 I/O ports also to disable
interrupts. To gain this access the caller needs CAP_SYS_RAWIO
capabilities and permission from potentially active security
modules.
The emulation restricts the functionality of the syscall to
only allowing the full range I/O port access, but prevents the
ability to disable interrupts from user space.
config X86_IOPL_LEGACY
bool "IOPL Legacy"
---help---
Allow the full IOPL permissions, i.e. user space access to all
65536 I/O ports and also the ability to disable interrupts, which
is overbroad and can result in system lockups.
config X86_IOPL_NONE
bool "IOPL None"
---help---
Disable the IOPL permission syscall. That's the safest option as
no sane application should depend on this functionality.
endchoice
config TOSHIBA config TOSHIBA
tristate "Toshiba Laptop support" tristate "Toshiba Laptop support"
depends on X86_32 depends on X86_32

View file

@ -44,7 +44,7 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
* Define this here and validate with BUILD_BUG_ON() in pgtable_32.c * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
* to avoid include recursion hell * to avoid include recursion hell
*/ */
#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40) #define CPU_ENTRY_AREA_PAGES (NR_CPUS * 41)
#define CPU_ENTRY_AREA_BASE \ #define CPU_ENTRY_AREA_BASE \
((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) \ ((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) \

View file

@ -332,19 +332,21 @@ struct x86_hw_tss {
#define IO_BITMAP_BYTES (IO_BITMAP_BITS / BITS_PER_BYTE) #define IO_BITMAP_BYTES (IO_BITMAP_BITS / BITS_PER_BYTE)
#define IO_BITMAP_LONGS (IO_BITMAP_BYTES / sizeof(long)) #define IO_BITMAP_LONGS (IO_BITMAP_BYTES / sizeof(long))
#define IO_BITMAP_OFFSET_VALID \ #define IO_BITMAP_OFFSET_VALID_MAP \
(offsetof(struct tss_struct, io_bitmap.bitmap) - \ (offsetof(struct tss_struct, io_bitmap.bitmap) - \
offsetof(struct tss_struct, x86_tss)) offsetof(struct tss_struct, x86_tss))
#define IO_BITMAP_OFFSET_VALID_ALL \
(offsetof(struct tss_struct, io_bitmap.mapall) - \
offsetof(struct tss_struct, x86_tss))
/* /*
* sizeof(unsigned long) coming from an extra "long" at the end * sizeof(unsigned long) coming from an extra "long" at the end of the
* of the iobitmap. * iobitmap. The limit is inclusive, i.e. the last valid byte.
*
* -1? seg base+limit should be pointing to the address of the
* last valid byte
*/ */
#define __KERNEL_TSS_LIMIT \ #define __KERNEL_TSS_LIMIT \
(IO_BITMAP_OFFSET_VALID + IO_BITMAP_BYTES + sizeof(unsigned long) - 1) (IO_BITMAP_OFFSET_VALID_ALL + IO_BITMAP_BYTES + \
sizeof(unsigned long) - 1)
/* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */ /* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */
#define IO_BITMAP_OFFSET_INVALID (__KERNEL_TSS_LIMIT + 1) #define IO_BITMAP_OFFSET_INVALID (__KERNEL_TSS_LIMIT + 1)
@ -380,6 +382,12 @@ struct x86_io_bitmap {
* be within the limit. * be within the limit.
*/ */
unsigned long bitmap[IO_BITMAP_LONGS + 1]; unsigned long bitmap[IO_BITMAP_LONGS + 1];
/*
* Special I/O bitmap to emulate IOPL(3). All bytes zero,
* except the additional byte at the end.
*/
unsigned long mapall[IO_BITMAP_LONGS + 1];
}; };
struct tss_struct { struct tss_struct {
@ -506,7 +514,13 @@ struct thread_struct {
#endif #endif
/* IO permissions: */ /* IO permissions: */
struct io_bitmap *io_bitmap; struct io_bitmap *io_bitmap;
/*
* IOPL. Priviledge level dependent I/O permission which includes
* user space CLI/STI when granted.
*/
unsigned long iopl; unsigned long iopl;
unsigned long iopl_emul;
mm_segment_t addr_limit; mm_segment_t addr_limit;

View file

@ -1864,6 +1864,11 @@ void cpu_init(void)
tss->io_bitmap.prev_max = 0; tss->io_bitmap.prev_max = 0;
tss->io_bitmap.prev_sequence = 0; tss->io_bitmap.prev_sequence = 0;
memset(tss->io_bitmap.bitmap, 0xff, sizeof(tss->io_bitmap.bitmap)); memset(tss->io_bitmap.bitmap, 0xff, sizeof(tss->io_bitmap.bitmap));
/*
* Invalidate the extra array entry past the end of the all
* permission bitmap as required by the hardware.
*/
tss->io_bitmap.mapall[IO_BITMAP_LONGS] = ~0UL;
set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
load_TR_desc(); load_TR_desc();

View file

@ -17,25 +17,41 @@
static atomic64_t io_bitmap_sequence; static atomic64_t io_bitmap_sequence;
void io_bitmap_share(struct task_struct *tsk) void io_bitmap_share(struct task_struct *tsk)
{ {
/* /* Can be NULL when current->thread.iopl_emul == 3 */
* Take a refcount on current's bitmap. It can be used by if (current->thread.io_bitmap) {
* both tasks as long as none of them changes the bitmap. /*
*/ * Take a refcount on current's bitmap. It can be used by
refcount_inc(&current->thread.io_bitmap->refcnt); * both tasks as long as none of them changes the bitmap.
tsk->thread.io_bitmap = current->thread.io_bitmap; */
refcount_inc(&current->thread.io_bitmap->refcnt);
tsk->thread.io_bitmap = current->thread.io_bitmap;
}
set_tsk_thread_flag(tsk, TIF_IO_BITMAP); set_tsk_thread_flag(tsk, TIF_IO_BITMAP);
} }
static void task_update_io_bitmap(void)
{
struct thread_struct *t = &current->thread;
if (t->iopl_emul == 3 || t->io_bitmap) {
/* TSS update is handled on exit to user space */
set_thread_flag(TIF_IO_BITMAP);
} else {
clear_thread_flag(TIF_IO_BITMAP);
/* Invalidate TSS */
preempt_disable();
tss_update_io_bitmap();
preempt_enable();
}
}
void io_bitmap_exit(void) void io_bitmap_exit(void)
{ {
struct io_bitmap *iobm = current->thread.io_bitmap; struct io_bitmap *iobm = current->thread.io_bitmap;
current->thread.io_bitmap = NULL; current->thread.io_bitmap = NULL;
clear_thread_flag(TIF_IO_BITMAP); task_update_io_bitmap();
preempt_disable();
tss_update_io_bitmap();
preempt_enable();
if (iobm && refcount_dec_and_test(&iobm->refcnt)) if (iobm && refcount_dec_and_test(&iobm->refcnt))
kfree(iobm); kfree(iobm);
} }
@ -157,36 +173,55 @@ SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on)
*/ */
SYSCALL_DEFINE1(iopl, unsigned int, level) SYSCALL_DEFINE1(iopl, unsigned int, level)
{ {
struct pt_regs *regs = current_pt_regs();
struct thread_struct *t = &current->thread; struct thread_struct *t = &current->thread;
struct pt_regs *regs = current_pt_regs();
unsigned int old;
/* /*
* Careful: the IOPL bits in regs->flags are undefined under Xen PV * Careful: the IOPL bits in regs->flags are undefined under Xen PV
* and changing them has no effect. * and changing them has no effect.
*/ */
unsigned int old = t->iopl >> X86_EFLAGS_IOPL_BIT; if (IS_ENABLED(CONFIG_X86_IOPL_NONE))
return -ENOSYS;
if (level > 3) if (level > 3)
return -EINVAL; return -EINVAL;
if (IS_ENABLED(CONFIG_X86_IOPL_EMULATION))
old = t->iopl_emul;
else
old = t->iopl >> X86_EFLAGS_IOPL_BIT;
/* No point in going further if nothing changes */
if (level == old)
return 0;
/* Trying to gain more privileges? */ /* Trying to gain more privileges? */
if (level > old) { if (level > old) {
if (!capable(CAP_SYS_RAWIO) || if (!capable(CAP_SYS_RAWIO) ||
security_locked_down(LOCKDOWN_IOPORT)) security_locked_down(LOCKDOWN_IOPORT))
return -EPERM; return -EPERM;
} }
/*
* Change the flags value on the return stack, which has been set if (IS_ENABLED(CONFIG_X86_IOPL_EMULATION)) {
* up on system-call entry. See also the fork and signal handling t->iopl_emul = level;
* code how this is handled. task_update_io_bitmap();
*/ } else {
regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | /*
(level << X86_EFLAGS_IOPL_BIT); * Change the flags value on the return stack, which has
/* Store the new level in the thread struct */ * been set up on system-call entry. See also the fork and
t->iopl = level << X86_EFLAGS_IOPL_BIT; * signal handling code how this is handled.
/* */
* X86_32 switches immediately and XEN handles it via emulation. regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) |
*/ (level << X86_EFLAGS_IOPL_BIT);
set_iopl_mask(t->iopl); /* Store the new level in the thread struct */
t->iopl = level << X86_EFLAGS_IOPL_BIT;
/*
* X86_32 switches immediately and XEN handles it via
* emulation.
*/
set_iopl_mask(t->iopl);
}
return 0; return 0;
} }

View file

@ -376,21 +376,27 @@ static void tss_copy_io_bitmap(struct tss_struct *tss, struct io_bitmap *iobm)
void tss_update_io_bitmap(void) void tss_update_io_bitmap(void)
{ {
struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
u16 *base = &tss->x86_tss.io_bitmap_base;
if (test_thread_flag(TIF_IO_BITMAP)) { if (test_thread_flag(TIF_IO_BITMAP)) {
struct io_bitmap *iobm = current->thread.io_bitmap; struct thread_struct *t = &current->thread;
/* if (IS_ENABLED(CONFIG_X86_IOPL_EMULATION) &&
* Only copy bitmap data when the sequence number t->iopl_emul == 3) {
* differs. The update time is accounted to the incoming *base = IO_BITMAP_OFFSET_VALID_ALL;
* task. } else {
*/ struct io_bitmap *iobm = t->io_bitmap;
if (tss->io_bitmap.prev_sequence != iobm->sequence) /*
tss_copy_io_bitmap(tss, iobm); * Only copy bitmap data when the sequence number
* differs. The update time is accounted to the
/* Enable the bitmap */ * incoming task.
tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_VALID; */
if (tss->io_bitmap.prev_sequence != iobm->sequence)
tss_copy_io_bitmap(tss, iobm);
/* Enable the bitmap */
*base = IO_BITMAP_OFFSET_VALID_MAP;
}
/* /*
* Make sure that the TSS limit is covering the io bitmap. * Make sure that the TSS limit is covering the io bitmap.
* It might have been cut down by a VMEXIT to 0x67 which * It might have been cut down by a VMEXIT to 0x67 which