This pull request contains the following changes for UML:

- Support for preemption
 - i386 Rust support
 - Huge cleanup by Benjamin Berg
 - UBSAN support
 - Removal of dead code
 -----BEGIN PGP SIGNATURE-----
 
 iQJKBAABCAA0FiEEdgfidid8lnn52cLTZvlZhesYu8EFAmahIpkWHHJpY2hhcmRA
 c2lnbWEtc3Rhci5hdAAKCRBm+VmF6xi7wW4PD/wN03iDGNTPhegGgXTJTSwA8Gwk
 i5JTEmhc84ifE9/bJpru8w4mcLMiWLWFIpF4bGcqfKLp67tTi3jn9Vk7ivaYkn2G
 S875GqjdyqMVMfhJX+1qTxM6q/J5B7XGUpt1Zrot3AY1ANxnlwYscWX8jNvwmf+5
 eCK9+xldkNWh1N67EjwsDgH6kkWyx3fcEe4E3gjXY0eSZtIwO/ZXYHSCSKznJOfu
 iXo1Sx02w8TZp4tf/EwpWR1SMkPL23X8Of+rmiyI5udyLZixTnrFlclu8WUK4ZBO
 ExYvOrzyYZ3E/mPFZf0E88h8xC3ETLsiHO3++JRAM1uDMp1+a6tPK7Bi6NTytemH
 PIT++XRiORAbXu3aSTjpFDAhTHIMZ925eJMvQAtVhtAAwbkjSNh9NbusbMiucPNm
 vvtYrEqYjPJpx+HRxy8kUywe/+jFLYofSDn6YrNRM+3HaM44YgkvbD6AOEMxWq19
 YWkflmkDADez6eti03bAbiVuBB1v+Vnuz15ofrx45IUubb3uGVJYwEqQA5u8bAVr
 H4NeIWDRpXOuYLgSyxRLFFVYhe6eAWbAXeSWBFxcGNDY6OBpMqr7kgV1mBOZtooK
 8aBgZ0YcyiTpmiEevskkNWSBnqUMKIdztKkD7Db9HfCgd9yy7Vvfl+iLJTIqFJ5m
 JxpvTy3it53ghQj40A==
 =ybqq
 -----END PGP SIGNATURE-----

Merge tag 'uml-for-linus-6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/uml/linux

Pull UML updates from Richard Weinberger:

 - Support for preemption

 - i386 Rust support

 - Huge cleanup by Benjamin Berg

 - UBSAN support

 - Removal of dead code

* tag 'uml-for-linus-6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/uml/linux: (41 commits)
  um: vector: always reset vp->opened
  um: vector: remove vp->lock
  um: register power-off handler
  um: line: always fill *error_out in setup_one_line()
  um: remove pcap driver from documentation
  um: Enable preemption in UML
  um: refactor TLB update handling
  um: simplify and consolidate TLB updates
  um: remove force_flush_all from fork_handler
  um: Do not flush MM in flush_thread
  um: Delay flushing syscalls until the thread is restarted
  um: remove copy_context_skas0
  um: remove LDT support
  um: compress memory related stub syscalls while adding them
  um: Rework syscall handling
  um: Add generic stub_syscall6 function
  um: Create signal stack memory assignment in stub_data
  um: Remove stub-data.h include from common-offsets.h
  um: time-travel: fix signal blocking race/hang
  um: time-travel: remove time_exit()
  ...
This commit is contained in:
Linus Torvalds 2024-07-25 12:33:08 -07:00
commit f9bcc61ad1
69 changed files with 1313 additions and 1972 deletions

View file

@ -18,7 +18,7 @@ Architecture Level of support Constraints
``arm64`` Maintained Little Endian only. ``arm64`` Maintained Little Endian only.
``loongarch`` Maintained \- ``loongarch`` Maintained \-
``riscv`` Maintained ``riscv64`` only. ``riscv`` Maintained ``riscv64`` only.
``um`` Maintained ``x86_64`` only. ``um`` Maintained \-
``x86`` Maintained ``x86_64`` only. ``x86`` Maintained ``x86_64`` only.
============= ================ ============================================== ============= ================ ==============================================

View file

@ -223,8 +223,6 @@ remote UML and other VM instances.
+-----------+--------+------------------------------------+------------+ +-----------+--------+------------------------------------+------------+
| socket | legacy | none | ~ 450Mbit | | socket | legacy | none | ~ 450Mbit |
+-----------+--------+------------------------------------+------------+ +-----------+--------+------------------------------------+------------+
| pcap | legacy | rx only | ~ 450Mbit |
+-----------+--------+------------------------------------+------------+
| ethertap | legacy | obsolete | ~ 500Mbit | | ethertap | legacy | obsolete | ~ 500Mbit |
+-----------+--------+------------------------------------+------------+ +-----------+--------+------------------------------------+------------+
| vde | legacy | obsolete | ~ 500Mbit | | vde | legacy | obsolete | ~ 500Mbit |

View file

@ -11,7 +11,7 @@ config UML
select ARCH_HAS_KCOV select ARCH_HAS_KCOV
select ARCH_HAS_STRNCPY_FROM_USER select ARCH_HAS_STRNCPY_FROM_USER
select ARCH_HAS_STRNLEN_USER select ARCH_HAS_STRNLEN_USER
select ARCH_NO_PREEMPT select ARCH_NO_PREEMPT_DYNAMIC
select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_KASAN if X86_64 select HAVE_ARCH_KASAN if X86_64
select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
@ -31,7 +31,8 @@ config UML
select TRACE_IRQFLAGS_SUPPORT select TRACE_IRQFLAGS_SUPPORT
select TTY # Needed for line.c select TTY # Needed for line.c
select HAVE_ARCH_VMAP_STACK select HAVE_ARCH_VMAP_STACK
select HAVE_RUST if X86_64 select HAVE_RUST
select ARCH_HAS_UBSAN
config MMU config MMU
bool bool
@ -48,12 +49,13 @@ config NO_IOMEM
config UML_IOMEM_EMULATION config UML_IOMEM_EMULATION
bool bool
select INDIRECT_IOMEM select INDIRECT_IOMEM
select HAS_IOPORT
select GENERIC_PCI_IOMAP select GENERIC_PCI_IOMAP
select GENERIC_IOMAP select GENERIC_IOMAP
select NO_GENERIC_PCI_IOPORT_MAP select NO_GENERIC_PCI_IOPORT_MAP
config NO_IOPORT_MAP config NO_IOPORT_MAP
def_bool y def_bool !UML_IOMEM_EMULATION
config ISA config ISA
bool bool

View file

@ -297,26 +297,6 @@ config UML_NET_MCAST
If unsure, say N. If unsure, say N.
config UML_NET_PCAP
bool "pcap transport (obsolete)"
depends on UML_NET
depends on !MODVERSIONS
select MAY_HAVE_RUNTIME_DEPS
help
The pcap transport makes a pcap packet stream on the host look
like an ethernet device inside UML. This is useful for making
UML act as a network monitor for the host. You must have libcap
installed in order to build the pcap transport into UML.
For more information, see
<http://user-mode-linux.sourceforge.net/old/networking.html> That site
has examples of the UML command line to use to enable this option.
NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
migrate to UML_NET_VECTOR.
If unsure, say N.
config UML_NET_SLIRP config UML_NET_SLIRP
bool "SLiRP transport (obsolete)" bool "SLiRP transport (obsolete)"
depends on UML_NET depends on UML_NET

View file

@ -20,14 +20,9 @@ harddog-objs := harddog_kern.o
harddog-builtin-$(CONFIG_UML_WATCHDOG) := harddog_user.o harddog_user_exp.o harddog-builtin-$(CONFIG_UML_WATCHDOG) := harddog_user.o harddog_user_exp.o
rtc-objs := rtc_kern.o rtc_user.o rtc-objs := rtc_kern.o rtc_user.o
LDFLAGS_pcap.o = $(shell $(CC) $(KBUILD_CFLAGS) -print-file-name=libpcap.a)
LDFLAGS_vde.o = $(shell $(CC) $(CFLAGS) -print-file-name=libvdeplug.a) LDFLAGS_vde.o = $(shell $(CC) $(CFLAGS) -print-file-name=libvdeplug.a)
targets := pcap_kern.o pcap_user.o vde_kern.o vde_user.o targets := vde_kern.o vde_user.o
$(obj)/pcap.o: $(obj)/pcap_kern.o $(obj)/pcap_user.o
$(LD) -r -dp -o $@ $^ $(ld_flags)
$(obj)/vde.o: $(obj)/vde_kern.o $(obj)/vde_user.o $(obj)/vde.o: $(obj)/vde_kern.o $(obj)/vde_user.o
$(LD) -r -dp -o $@ $^ $(ld_flags) $(LD) -r -dp -o $@ $^ $(ld_flags)
@ -49,7 +44,6 @@ obj-$(CONFIG_UML_NET_DAEMON) += daemon.o
obj-$(CONFIG_UML_NET_VECTOR) += vector.o obj-$(CONFIG_UML_NET_VECTOR) += vector.o
obj-$(CONFIG_UML_NET_VDE) += vde.o obj-$(CONFIG_UML_NET_VDE) += vde.o
obj-$(CONFIG_UML_NET_MCAST) += umcast.o obj-$(CONFIG_UML_NET_MCAST) += umcast.o
obj-$(CONFIG_UML_NET_PCAP) += pcap.o
obj-$(CONFIG_UML_NET) += net.o obj-$(CONFIG_UML_NET) += net.o
obj-$(CONFIG_MCONSOLE) += mconsole.o obj-$(CONFIG_MCONSOLE) += mconsole.o
obj-$(CONFIG_MMAPPER) += mmapper_kern.o obj-$(CONFIG_MMAPPER) += mmapper_kern.o
@ -69,7 +63,7 @@ obj-$(CONFIG_UML_RTC) += rtc.o
obj-$(CONFIG_UML_PCI_OVER_VIRTIO) += virt-pci.o obj-$(CONFIG_UML_PCI_OVER_VIRTIO) += virt-pci.o
# pcap_user.o must be added explicitly. # pcap_user.o must be added explicitly.
USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o vde_user.o vector_user.o
CFLAGS_null.o = -DDEV_NULL=$(DEV_NULL_PATH) CFLAGS_null.o = -DDEV_NULL=$(DEV_NULL_PATH)
CFLAGS_xterm.o += '-DCONFIG_XTERM_CHAN_DEFAULT_EMULATOR="$(CONFIG_XTERM_CHAN_DEFAULT_EMULATOR)"' CFLAGS_xterm.o += '-DCONFIG_XTERM_CHAN_DEFAULT_EMULATOR="$(CONFIG_XTERM_CHAN_DEFAULT_EMULATOR)"'

View file

@ -22,7 +22,8 @@ struct chan {
unsigned int output:1; unsigned int output:1;
unsigned int opened:1; unsigned int opened:1;
unsigned int enabled:1; unsigned int enabled:1;
int fd; int fd_in;
int fd_out; /* only different to fd_in if blocking output is needed */
const struct chan_ops *ops; const struct chan_ops *ops;
void *data; void *data;
}; };

View file

@ -81,6 +81,12 @@ static const struct chan_ops not_configged_ops = {
}; };
#endif /* CONFIG_NOCONFIG_CHAN */ #endif /* CONFIG_NOCONFIG_CHAN */
static inline bool need_output_blocking(void)
{
return time_travel_mode == TT_MODE_INFCPU ||
time_travel_mode == TT_MODE_EXTERNAL;
}
static int open_one_chan(struct chan *chan) static int open_one_chan(struct chan *chan)
{ {
int fd, err; int fd, err;
@ -96,15 +102,43 @@ static int open_one_chan(struct chan *chan)
return fd; return fd;
err = os_set_fd_block(fd, 0); err = os_set_fd_block(fd, 0);
if (err) { if (err)
(*chan->ops->close)(fd, chan->data); goto out_close;
return err;
}
chan->fd = fd; chan->fd_in = fd;
chan->fd_out = fd;
/*
* In time-travel modes infinite-CPU and external we need to guarantee
* that any writes to the output succeed immdiately from the point of
* the VM. The best way to do this is to put the FD in blocking mode
* and simply wait/retry until everything is written.
* As every write is guaranteed to complete, we also do not need to
* request an IRQ for the output.
*
* Note that input cannot happen in a time synchronized way. We permit
* it, but time passes very quickly if anything waits for a read.
*/
if (chan->output && need_output_blocking()) {
err = os_dup_file(chan->fd_out);
if (err < 0)
goto out_close;
chan->fd_out = err;
err = os_set_fd_block(chan->fd_out, 1);
if (err) {
os_close_file(chan->fd_out);
goto out_close;
}
}
chan->opened = 1; chan->opened = 1;
return 0; return 0;
out_close:
(*chan->ops->close)(fd, chan->data);
return err;
} }
static int open_chan(struct list_head *chans) static int open_chan(struct list_head *chans)
@ -125,7 +159,7 @@ static int open_chan(struct list_head *chans)
void chan_enable_winch(struct chan *chan, struct tty_port *port) void chan_enable_winch(struct chan *chan, struct tty_port *port)
{ {
if (chan && chan->primary && chan->ops->winch) if (chan && chan->primary && chan->ops->winch)
register_winch(chan->fd, port); register_winch(chan->fd_in, port);
} }
static void line_timer_cb(struct work_struct *work) static void line_timer_cb(struct work_struct *work)
@ -156,8 +190,9 @@ int enable_chan(struct line *line)
if (chan->enabled) if (chan->enabled)
continue; continue;
err = line_setup_irq(chan->fd, chan->input, chan->output, line, err = line_setup_irq(chan->fd_in, chan->input,
chan); chan->output && !need_output_blocking(),
line, chan);
if (err) if (err)
goto out_close; goto out_close;
@ -196,7 +231,8 @@ void free_irqs(void)
if (chan->input && chan->enabled) if (chan->input && chan->enabled)
um_free_irq(chan->line->read_irq, chan); um_free_irq(chan->line->read_irq, chan);
if (chan->output && chan->enabled) if (chan->output && chan->enabled &&
!need_output_blocking())
um_free_irq(chan->line->write_irq, chan); um_free_irq(chan->line->write_irq, chan);
chan->enabled = 0; chan->enabled = 0;
} }
@ -216,15 +252,19 @@ static void close_one_chan(struct chan *chan, int delay_free_irq)
} else { } else {
if (chan->input && chan->enabled) if (chan->input && chan->enabled)
um_free_irq(chan->line->read_irq, chan); um_free_irq(chan->line->read_irq, chan);
if (chan->output && chan->enabled) if (chan->output && chan->enabled &&
!need_output_blocking())
um_free_irq(chan->line->write_irq, chan); um_free_irq(chan->line->write_irq, chan);
chan->enabled = 0; chan->enabled = 0;
} }
if (chan->fd_out != chan->fd_in)
os_close_file(chan->fd_out);
if (chan->ops->close != NULL) if (chan->ops->close != NULL)
(*chan->ops->close)(chan->fd, chan->data); (*chan->ops->close)(chan->fd_in, chan->data);
chan->opened = 0; chan->opened = 0;
chan->fd = -1; chan->fd_in = -1;
chan->fd_out = -1;
} }
void close_chan(struct line *line) void close_chan(struct line *line)
@ -244,7 +284,7 @@ void close_chan(struct line *line)
void deactivate_chan(struct chan *chan, int irq) void deactivate_chan(struct chan *chan, int irq)
{ {
if (chan && chan->enabled) if (chan && chan->enabled)
deactivate_fd(chan->fd, irq); deactivate_fd(chan->fd_in, irq);
} }
int write_chan(struct chan *chan, const u8 *buf, size_t len, int write_irq) int write_chan(struct chan *chan, const u8 *buf, size_t len, int write_irq)
@ -254,7 +294,7 @@ int write_chan(struct chan *chan, const u8 *buf, size_t len, int write_irq)
if (len == 0 || !chan || !chan->ops->write) if (len == 0 || !chan || !chan->ops->write)
return 0; return 0;
n = chan->ops->write(chan->fd, buf, len, chan->data); n = chan->ops->write(chan->fd_out, buf, len, chan->data);
if (chan->primary) { if (chan->primary) {
ret = n; ret = n;
} }
@ -268,7 +308,7 @@ int console_write_chan(struct chan *chan, const char *buf, int len)
if (!chan || !chan->ops->console_write) if (!chan || !chan->ops->console_write)
return 0; return 0;
n = chan->ops->console_write(chan->fd, buf, len); n = chan->ops->console_write(chan->fd_out, buf, len);
if (chan->primary) if (chan->primary)
ret = n; ret = n;
return ret; return ret;
@ -296,14 +336,14 @@ int chan_window_size(struct line *line, unsigned short *rows_out,
if (chan && chan->primary) { if (chan && chan->primary) {
if (chan->ops->window_size == NULL) if (chan->ops->window_size == NULL)
return 0; return 0;
return chan->ops->window_size(chan->fd, chan->data, return chan->ops->window_size(chan->fd_in, chan->data,
rows_out, cols_out); rows_out, cols_out);
} }
chan = line->chan_out; chan = line->chan_out;
if (chan && chan->primary) { if (chan && chan->primary) {
if (chan->ops->window_size == NULL) if (chan->ops->window_size == NULL)
return 0; return 0;
return chan->ops->window_size(chan->fd, chan->data, return chan->ops->window_size(chan->fd_in, chan->data,
rows_out, cols_out); rows_out, cols_out);
} }
return 0; return 0;
@ -319,7 +359,7 @@ static void free_one_chan(struct chan *chan)
(*chan->ops->free)(chan->data); (*chan->ops->free)(chan->data);
if (chan->primary && chan->output) if (chan->primary && chan->output)
ignore_sigio_fd(chan->fd); ignore_sigio_fd(chan->fd_in);
kfree(chan); kfree(chan);
} }
@ -478,7 +518,8 @@ static struct chan *parse_chan(struct line *line, char *str, int device,
.output = 0, .output = 0,
.opened = 0, .opened = 0,
.enabled = 0, .enabled = 0,
.fd = -1, .fd_in = -1,
.fd_out = -1,
.ops = ops, .ops = ops,
.data = data }); .data = data });
return chan; return chan;
@ -549,7 +590,7 @@ void chan_interrupt(struct line *line, int irq)
schedule_delayed_work(&line->task, 1); schedule_delayed_work(&line->task, 1);
goto out; goto out;
} }
err = chan->ops->read(chan->fd, &c, chan->data); err = chan->ops->read(chan->fd_in, &c, chan->data);
if (err > 0) if (err > 0)
tty_insert_flip_char(port, c, TTY_NORMAL); tty_insert_flip_char(port, c, TTY_NORMAL);
} while (err > 0); } while (err > 0);

View file

@ -23,7 +23,7 @@ int generic_read(int fd, __u8 *c_out, void *unused)
{ {
int n; int n;
n = read(fd, c_out, sizeof(*c_out)); CATCH_EINTR(n = read(fd, c_out, sizeof(*c_out)));
if (n > 0) if (n > 0)
return n; return n;
else if (n == 0) else if (n == 0)
@ -37,11 +37,23 @@ int generic_read(int fd, __u8 *c_out, void *unused)
int generic_write(int fd, const __u8 *buf, size_t n, void *unused) int generic_write(int fd, const __u8 *buf, size_t n, void *unused)
{ {
int written = 0;
int err; int err;
err = write(fd, buf, n); /* The FD may be in blocking mode, as such, need to retry short writes,
if (err > 0) * they may have been interrupted by a signal.
return err; */
do {
errno = 0;
err = write(fd, buf + written, n - written);
if (err > 0) {
written += err;
continue;
}
} while (err < 0 && errno == EINTR);
if (written > 0)
return written;
else if (errno == EAGAIN) else if (errno == EAGAIN)
return 0; return 0;
else if (err == 0) else if (err == 0)

View file

@ -49,6 +49,7 @@
#include "mconsole.h" #include "mconsole.h"
#include "harddog.h" #include "harddog.h"
MODULE_DESCRIPTION("UML hardware watchdog");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
static DEFINE_MUTEX(harddog_mutex); static DEFINE_MUTEX(harddog_mutex);

View file

@ -383,6 +383,7 @@ int setup_one_line(struct line *lines, int n, char *init,
parse_chan_pair(NULL, line, n, opts, error_out); parse_chan_pair(NULL, line, n, opts, error_out);
err = 0; err = 0;
} }
*error_out = "configured as 'none'";
} else { } else {
char *new = kstrdup(init, GFP_KERNEL); char *new = kstrdup(init, GFP_KERNEL);
if (!new) { if (!new) {
@ -406,6 +407,7 @@ int setup_one_line(struct line *lines, int n, char *init,
} }
} }
if (err) { if (err) {
*error_out = "failed to parse channel pair";
line->init_str = NULL; line->init_str = NULL;
line->valid = 0; line->valid = 0;
kfree(new); kfree(new);

View file

@ -1,113 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
*/
#include <linux/init.h>
#include <linux/netdevice.h>
#include <net_kern.h>
#include "pcap_user.h"
struct pcap_init {
char *host_if;
int promisc;
int optimize;
char *filter;
};
static void pcap_init_kern(struct net_device *dev, void *data)
{
struct uml_net_private *pri;
struct pcap_data *ppri;
struct pcap_init *init = data;
pri = netdev_priv(dev);
ppri = (struct pcap_data *) pri->user;
ppri->host_if = init->host_if;
ppri->promisc = init->promisc;
ppri->optimize = init->optimize;
ppri->filter = init->filter;
printk("pcap backend, host interface %s\n", ppri->host_if);
}
static int pcap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
{
return pcap_user_read(fd, skb_mac_header(skb),
skb->dev->mtu + ETH_HEADER_OTHER,
(struct pcap_data *) &lp->user);
}
static int pcap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
{
return -EPERM;
}
static const struct net_kern_info pcap_kern_info = {
.init = pcap_init_kern,
.protocol = eth_protocol,
.read = pcap_read,
.write = pcap_write,
};
static int pcap_setup(char *str, char **mac_out, void *data)
{
struct pcap_init *init = data;
char *remain, *host_if = NULL, *options[2] = { NULL, NULL };
int i;
*init = ((struct pcap_init)
{ .host_if = "eth0",
.promisc = 1,
.optimize = 0,
.filter = NULL });
remain = split_if_spec(str, &host_if, &init->filter,
&options[0], &options[1], mac_out, NULL);
if (remain != NULL) {
printk(KERN_ERR "pcap_setup - Extra garbage on "
"specification : '%s'\n", remain);
return 0;
}
if (host_if != NULL)
init->host_if = host_if;
for (i = 0; i < ARRAY_SIZE(options); i++) {
if (options[i] == NULL)
continue;
if (!strcmp(options[i], "promisc"))
init->promisc = 1;
else if (!strcmp(options[i], "nopromisc"))
init->promisc = 0;
else if (!strcmp(options[i], "optimize"))
init->optimize = 1;
else if (!strcmp(options[i], "nooptimize"))
init->optimize = 0;
else {
printk(KERN_ERR "pcap_setup : bad option - '%s'\n",
options[i]);
return 0;
}
}
return 1;
}
static struct transport pcap_transport = {
.list = LIST_HEAD_INIT(pcap_transport.list),
.name = "pcap",
.setup = pcap_setup,
.user = &pcap_user_info,
.kern = &pcap_kern_info,
.private_size = sizeof(struct pcap_data),
.setup_size = sizeof(struct pcap_init),
};
static int register_pcap(void)
{
register_transport(&pcap_transport);
return 0;
}
late_initcall(register_pcap);

View file

@ -1,137 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
*/
#include <errno.h>
#include <pcap.h>
#include <string.h>
#include <asm/types.h>
#include <net_user.h>
#include "pcap_user.h"
#include <um_malloc.h>
#define PCAP_FD(p) (*(int *)(p))
static int pcap_user_init(void *data, void *dev)
{
struct pcap_data *pri = data;
pcap_t *p;
char errors[PCAP_ERRBUF_SIZE];
p = pcap_open_live(pri->host_if, ETH_MAX_PACKET + ETH_HEADER_OTHER,
pri->promisc, 0, errors);
if (p == NULL) {
printk(UM_KERN_ERR "pcap_user_init : pcap_open_live failed - "
"'%s'\n", errors);
return -EINVAL;
}
pri->dev = dev;
pri->pcap = p;
return 0;
}
static int pcap_user_open(void *data)
{
struct pcap_data *pri = data;
__u32 netmask;
int err;
if (pri->pcap == NULL)
return -ENODEV;
if (pri->filter != NULL) {
err = dev_netmask(pri->dev, &netmask);
if (err < 0) {
printk(UM_KERN_ERR "pcap_user_open : dev_netmask failed\n");
return -EIO;
}
pri->compiled = uml_kmalloc(sizeof(struct bpf_program),
UM_GFP_KERNEL);
if (pri->compiled == NULL) {
printk(UM_KERN_ERR "pcap_user_open : kmalloc failed\n");
return -ENOMEM;
}
err = pcap_compile(pri->pcap,
(struct bpf_program *) pri->compiled,
pri->filter, pri->optimize, netmask);
if (err < 0) {
printk(UM_KERN_ERR "pcap_user_open : pcap_compile failed - "
"'%s'\n", pcap_geterr(pri->pcap));
goto out;
}
err = pcap_setfilter(pri->pcap, pri->compiled);
if (err < 0) {
printk(UM_KERN_ERR "pcap_user_open : pcap_setfilter "
"failed - '%s'\n", pcap_geterr(pri->pcap));
goto out;
}
}
return PCAP_FD(pri->pcap);
out:
kfree(pri->compiled);
return -EIO;
}
static void pcap_remove(void *data)
{
struct pcap_data *pri = data;
if (pri->compiled != NULL)
pcap_freecode(pri->compiled);
if (pri->pcap != NULL)
pcap_close(pri->pcap);
}
struct pcap_handler_data {
char *buffer;
int len;
};
static void handler(u_char *data, const struct pcap_pkthdr *header,
const u_char *packet)
{
int len;
struct pcap_handler_data *hdata = (struct pcap_handler_data *) data;
len = hdata->len < header->caplen ? hdata->len : header->caplen;
memcpy(hdata->buffer, packet, len);
hdata->len = len;
}
int pcap_user_read(int fd, void *buffer, int len, struct pcap_data *pri)
{
struct pcap_handler_data hdata = ((struct pcap_handler_data)
{ .buffer = buffer,
.len = len });
int n;
n = pcap_dispatch(pri->pcap, 1, handler, (u_char *) &hdata);
if (n < 0) {
printk(UM_KERN_ERR "pcap_dispatch failed - %s\n",
pcap_geterr(pri->pcap));
return -EIO;
}
else if (n == 0)
return 0;
return hdata.len;
}
const struct net_user_info pcap_user_info = {
.init = pcap_user_init,
.open = pcap_user_open,
.close = NULL,
.remove = pcap_remove,
.add_address = NULL,
.delete_address = NULL,
.mtu = ETH_MAX_PACKET,
.max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER,
};

View file

@ -1,21 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
*/
#include <net_user.h>
struct pcap_data {
char *host_if;
int promisc;
int optimize;
char *filter;
void *compiled;
void *pcap;
void *dev;
};
extern const struct net_user_info pcap_user_info;
extern int pcap_user_read(int fd, void *buf, int len, struct pcap_data *pri);

View file

@ -45,15 +45,17 @@ struct connection {
static irqreturn_t pipe_interrupt(int irq, void *data) static irqreturn_t pipe_interrupt(int irq, void *data)
{ {
struct connection *conn = data; struct connection *conn = data;
int fd; int n_fds = 1, fd = -1;
ssize_t ret;
fd = os_rcv_fd(conn->socket[0], &conn->helper_pid); ret = os_rcv_fd_msg(conn->socket[0], &fd, n_fds, &conn->helper_pid,
if (fd < 0) { sizeof(conn->helper_pid));
if (fd == -EAGAIN) if (ret != sizeof(conn->helper_pid)) {
if (ret == -EAGAIN)
return IRQ_NONE; return IRQ_NONE;
printk(KERN_ERR "pipe_interrupt : os_rcv_fd returned %d\n", printk(KERN_ERR "pipe_interrupt : os_rcv_fd_msg returned %zd\n",
-fd); ret);
os_close_file(conn->fd); os_close_file(conn->fd);
} }

View file

@ -36,7 +36,6 @@
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/platform_device.h> #include <linux/platform_device.h>
#include <linux/scatterlist.h> #include <linux/scatterlist.h>
#include <asm/tlbflush.h>
#include <kern_util.h> #include <kern_util.h>
#include "mconsole_kern.h" #include "mconsole_kern.h"
#include <init.h> #include <init.h>
@ -106,7 +105,6 @@ static inline void ubd_set_bit(__u64 bit, unsigned char *data)
#define DRIVER_NAME "uml-blkdev" #define DRIVER_NAME "uml-blkdev"
static DEFINE_MUTEX(ubd_lock); static DEFINE_MUTEX(ubd_lock);
static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode, static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned int cmd, unsigned long arg); unsigned int cmd, unsigned long arg);
@ -759,7 +757,6 @@ static int ubd_open_dev(struct ubd *ubd_dev)
printk(KERN_ERR "Failed to vmalloc COW bitmap\n"); printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
goto error; goto error;
} }
flush_tlb_kernel_vm();
err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap, err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_offset,

View file

@ -1115,11 +1115,12 @@ static int irq_rr;
static int vector_net_close(struct net_device *dev) static int vector_net_close(struct net_device *dev)
{ {
struct vector_private *vp = netdev_priv(dev); struct vector_private *vp = netdev_priv(dev);
unsigned long flags;
netif_stop_queue(dev); netif_stop_queue(dev);
del_timer(&vp->tl); del_timer(&vp->tl);
vp->opened = false;
if (vp->fds == NULL) if (vp->fds == NULL)
return 0; return 0;
@ -1158,10 +1159,7 @@ static int vector_net_close(struct net_device *dev)
destroy_queue(vp->tx_queue); destroy_queue(vp->tx_queue);
kfree(vp->fds); kfree(vp->fds);
vp->fds = NULL; vp->fds = NULL;
spin_lock_irqsave(&vp->lock, flags);
vp->opened = false;
vp->in_error = false; vp->in_error = false;
spin_unlock_irqrestore(&vp->lock, flags);
return 0; return 0;
} }
@ -1203,17 +1201,12 @@ static void vector_reset_tx(struct work_struct *work)
static int vector_net_open(struct net_device *dev) static int vector_net_open(struct net_device *dev)
{ {
struct vector_private *vp = netdev_priv(dev); struct vector_private *vp = netdev_priv(dev);
unsigned long flags;
int err = -EINVAL; int err = -EINVAL;
struct vector_device *vdevice; struct vector_device *vdevice;
spin_lock_irqsave(&vp->lock, flags); if (vp->opened)
if (vp->opened) {
spin_unlock_irqrestore(&vp->lock, flags);
return -ENXIO; return -ENXIO;
}
vp->opened = true; vp->opened = true;
spin_unlock_irqrestore(&vp->lock, flags);
vp->bpf = uml_vector_user_bpf(get_bpf_file(vp->parsed)); vp->bpf = uml_vector_user_bpf(get_bpf_file(vp->parsed));
@ -1387,8 +1380,6 @@ static int vector_net_load_bpf_flash(struct net_device *dev,
return -1; return -1;
} }
spin_lock(&vp->lock);
if (vp->bpf != NULL) { if (vp->bpf != NULL) {
if (vp->opened) if (vp->opened)
uml_vector_detach_bpf(vp->fds->rx_fd, vp->bpf); uml_vector_detach_bpf(vp->fds->rx_fd, vp->bpf);
@ -1417,15 +1408,12 @@ static int vector_net_load_bpf_flash(struct net_device *dev,
if (vp->opened) if (vp->opened)
result = uml_vector_attach_bpf(vp->fds->rx_fd, vp->bpf); result = uml_vector_attach_bpf(vp->fds->rx_fd, vp->bpf);
spin_unlock(&vp->lock);
return result; return result;
free_buffer: free_buffer:
release_firmware(fw); release_firmware(fw);
flash_fail: flash_fail:
spin_unlock(&vp->lock);
if (vp->bpf != NULL) if (vp->bpf != NULL)
kfree(vp->bpf->filter); kfree(vp->bpf->filter);
kfree(vp->bpf); kfree(vp->bpf);
@ -1631,7 +1619,6 @@ static void vector_eth_configure(
INIT_WORK(&vp->reset_tx, vector_reset_tx); INIT_WORK(&vp->reset_tx, vector_reset_tx);
timer_setup(&vp->tl, vector_timer_expire, 0); timer_setup(&vp->tl, vector_timer_expire, 0);
spin_lock_init(&vp->lock);
/* FIXME */ /* FIXME */
dev->netdev_ops = &vector_netdev_ops; dev->netdev_ops = &vector_netdev_ops;

View file

@ -71,7 +71,6 @@ struct vector_estats {
struct vector_private { struct vector_private {
struct list_head list; struct list_head list;
spinlock_t lock;
struct net_device *dev; struct net_device *dev;
struct napi_struct napi ____cacheline_aligned; struct napi_struct napi ____cacheline_aligned;

View file

@ -156,7 +156,7 @@ static int xterm_open(int input, int output, int primary, void *d,
new = xterm_fd(fd, &data->helper_pid); new = xterm_fd(fd, &data->helper_pid);
if (new < 0) { if (new < 0) {
err = new; err = new;
printk(UM_KERN_ERR "xterm_open : os_rcv_fd failed, err = %d\n", printk(UM_KERN_ERR "xterm_open : xterm_fd failed, err = %d\n",
-err); -err);
goto out_kill; goto out_kill;
} }

View file

@ -21,12 +21,19 @@ struct xterm_wait {
static irqreturn_t xterm_interrupt(int irq, void *data) static irqreturn_t xterm_interrupt(int irq, void *data)
{ {
struct xterm_wait *xterm = data; struct xterm_wait *xterm = data;
int fd; int fd = -1, n_fds = 1;
ssize_t ret;
fd = os_rcv_fd(xterm->fd, &xterm->pid); ret = os_rcv_fd_msg(xterm->fd, &fd, n_fds,
if (fd == -EAGAIN) &xterm->pid, sizeof(xterm->pid));
if (ret == -EAGAIN)
return IRQ_NONE; return IRQ_NONE;
if (ret < 0)
fd = ret;
else if (ret != sizeof(xterm->pid))
fd = -EMSGSIZE;
xterm->new_fd = fd; xterm->new_fd = fd;
complete(&xterm->ready); complete(&xterm->ready);

View file

@ -7,15 +7,13 @@
#define __ARCH_UM_MMU_H #define __ARCH_UM_MMU_H
#include <mm_id.h> #include <mm_id.h>
#include <asm/mm_context.h>
typedef struct mm_context { typedef struct mm_context {
struct mm_id id; struct mm_id id;
struct uml_arch_mm_context arch;
/* Address range in need of a TLB sync */
unsigned long sync_tlb_range_from;
unsigned long sync_tlb_range_to;
} mm_context_t; } mm_context_t;
/* Avoid tangled inclusion with asm/ldt.h */
extern long init_new_ldt(struct mm_context *to_mm, struct mm_context *from_mm);
extern void free_ldt(struct mm_context *mm);
#endif #endif

View file

@ -13,8 +13,6 @@
#include <asm/mm_hooks.h> #include <asm/mm_hooks.h>
#include <asm/mmu.h> #include <asm/mmu.h>
extern void force_flush_all(void);
#define activate_mm activate_mm #define activate_mm activate_mm
static inline void activate_mm(struct mm_struct *old, struct mm_struct *new) static inline void activate_mm(struct mm_struct *old, struct mm_struct *new)
{ {

View file

@ -244,6 +244,38 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval)
#define PFN_PTE_SHIFT PAGE_SHIFT #define PFN_PTE_SHIFT PAGE_SHIFT
static inline void um_tlb_mark_sync(struct mm_struct *mm, unsigned long start,
unsigned long end)
{
if (!mm->context.sync_tlb_range_to) {
mm->context.sync_tlb_range_from = start;
mm->context.sync_tlb_range_to = end;
} else {
if (start < mm->context.sync_tlb_range_from)
mm->context.sync_tlb_range_from = start;
if (end > mm->context.sync_tlb_range_to)
mm->context.sync_tlb_range_to = end;
}
}
#define set_ptes set_ptes
static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte, int nr)
{
/* Basically the default implementation */
size_t length = nr * PAGE_SIZE;
for (;;) {
set_pte(ptep, pte);
if (--nr == 0)
break;
ptep++;
pte = __pte(pte_val(pte) + (nr << PFN_PTE_SHIFT));
}
um_tlb_mark_sync(mm, addr, addr + length);
}
#define __HAVE_ARCH_PTE_SAME #define __HAVE_ARCH_PTE_SAME
static inline int pte_same(pte_t pte_a, pte_t pte_b) static inline int pte_same(pte_t pte_a, pte_t pte_b)
{ {

View file

@ -9,23 +9,51 @@
#include <linux/mm.h> #include <linux/mm.h>
/* /*
* TLB flushing: * In UML, we need to sync the TLB over by using mmap/munmap/mprotect syscalls
* from the process handling the MM (which can be the kernel itself).
*
* To track updates, we can hook into set_ptes and flush_tlb_*. With set_ptes
* we catch all PTE transitions where memory that was unusable becomes usable.
* While with flush_tlb_* we can track any memory that becomes unusable and
* even if a higher layer of the page table was modified.
*
* So, we simply track updates using both methods and mark the memory area to
* be synced later on. The only special case is that flush_tlb_kern_* needs to
* be executed immediately as there is no good synchronization point in that
* case. In contrast, in the set_ptes case we can wait for the next kernel
* segfault before we do the synchornization.
* *
* - flush_tlb() flushes the current mm struct TLBs
* - flush_tlb_all() flushes all processes TLBs * - flush_tlb_all() flushes all processes TLBs
* - flush_tlb_mm(mm) flushes the specified mm context TLB's * - flush_tlb_mm(mm) flushes the specified mm context TLB's
* - flush_tlb_page(vma, vmaddr) flushes one page * - flush_tlb_page(vma, vmaddr) flushes one page
* - flush_tlb_kernel_vm() flushes the kernel vm area
* - flush_tlb_range(vma, start, end) flushes a range of pages * - flush_tlb_range(vma, start, end) flushes a range of pages
* - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
*/ */
extern int um_tlb_sync(struct mm_struct *mm);
extern void flush_tlb_all(void); extern void flush_tlb_all(void);
extern void flush_tlb_mm(struct mm_struct *mm); extern void flush_tlb_mm(struct mm_struct *mm);
extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end); static inline void flush_tlb_page(struct vm_area_struct *vma,
extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long address); unsigned long address)
extern void flush_tlb_kernel_vm(void); {
extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); um_tlb_mark_sync(vma->vm_mm, address, address + PAGE_SIZE);
extern void __flush_tlb_one(unsigned long addr); }
static inline void flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
um_tlb_mark_sync(vma->vm_mm, start, end);
}
static inline void flush_tlb_kernel_range(unsigned long start,
unsigned long end)
{
um_tlb_mark_sync(&init_mm, start, end);
/* Kernel needs to be synced immediately */
um_tlb_sync(&init_mm);
}
#endif #endif

View file

@ -23,7 +23,7 @@
#define STUB_START stub_start #define STUB_START stub_start
#define STUB_CODE STUB_START #define STUB_CODE STUB_START
#define STUB_DATA (STUB_CODE + UM_KERN_PAGE_SIZE) #define STUB_DATA (STUB_CODE + UM_KERN_PAGE_SIZE)
#define STUB_DATA_PAGES 1 /* must be a power of two */ #define STUB_DATA_PAGES 2 /* must be a power of two */
#define STUB_END (STUB_DATA + STUB_DATA_PAGES * UM_KERN_PAGE_SIZE) #define STUB_END (STUB_DATA + STUB_DATA_PAGES * UM_KERN_PAGE_SIZE)
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__

View file

@ -1,6 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */ /* SPDX-License-Identifier: GPL-2.0 */
/* for use by sys-$SUBARCH/kernel-offsets.c */ /* for use by sys-$SUBARCH/kernel-offsets.c */
#include <stub-data.h>
DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE); DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE);
@ -30,7 +29,3 @@ DEFINE(UML_CONFIG_64BIT, CONFIG_64BIT);
DEFINE(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT, CONFIG_UML_TIME_TRAVEL_SUPPORT); DEFINE(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT, CONFIG_UML_TIME_TRAVEL_SUPPORT);
#endif #endif
/* for stub */
DEFINE(UML_STUB_FIELD_OFFSET, offsetof(struct stub_data, offset));
DEFINE(UML_STUB_FIELD_CHILD_ERR, offsetof(struct stub_data, child_err));
DEFINE(UML_STUB_FIELD_FD, offsetof(struct stub_data, fd));

View file

@ -13,7 +13,6 @@ struct siginfo;
extern int uml_exitcode; extern int uml_exitcode;
extern int ncpus;
extern int kmalloc_ok; extern int kmalloc_ok;
#define UML_ROUND_UP(addr) \ #define UML_ROUND_UP(addr) \

View file

@ -163,8 +163,10 @@ extern int os_set_fd_block(int fd, int blocking);
extern int os_accept_connection(int fd); extern int os_accept_connection(int fd);
extern int os_create_unix_socket(const char *file, int len, int close_on_exec); extern int os_create_unix_socket(const char *file, int len, int close_on_exec);
extern int os_shutdown_socket(int fd, int r, int w); extern int os_shutdown_socket(int fd, int r, int w);
extern int os_dup_file(int fd);
extern void os_close_file(int fd); extern void os_close_file(int fd);
extern int os_rcv_fd(int fd, int *helper_pid_out); ssize_t os_rcv_fd_msg(int fd, int *fds, unsigned int n_fds,
void *data, size_t data_len);
extern int os_connect_socket(const char *name); extern int os_connect_socket(const char *name);
extern int os_file_type(char *file); extern int os_file_type(char *file);
extern int os_file_mode(const char *file, struct openflags *mode_out); extern int os_file_mode(const char *file, struct openflags *mode_out);
@ -179,6 +181,8 @@ extern int os_eventfd(unsigned int initval, int flags);
extern int os_sendmsg_fds(int fd, const void *buf, unsigned int len, extern int os_sendmsg_fds(int fd, const void *buf, unsigned int len,
const int *fds, unsigned int fds_num); const int *fds, unsigned int fds_num);
int os_poll(unsigned int n, const int *fds); int os_poll(unsigned int n, const int *fds);
void *os_mmap_rw_shared(int fd, size_t size);
void *os_mremap_rw_shared(void *old_addr, size_t old_size, size_t new_size);
/* start_up.c */ /* start_up.c */
extern void os_early_checks(void); extern void os_early_checks(void);
@ -191,6 +195,9 @@ extern void get_host_cpu_features(
/* mem.c */ /* mem.c */
extern int create_mem_file(unsigned long long len); extern int create_mem_file(unsigned long long len);
/* tlb.c */
extern void report_enomem(void);
/* process.c */ /* process.c */
extern unsigned long os_process_pc(int pid); extern unsigned long os_process_pc(int pid);
extern int os_process_parent(int pid); extern int os_process_parent(int pid);
@ -268,24 +275,20 @@ extern long long os_persistent_clock_emulation(void);
extern long long os_nsecs(void); extern long long os_nsecs(void);
/* skas/mem.c */ /* skas/mem.c */
extern long run_syscall_stub(struct mm_id * mm_idp, int syscall_stub_flush(struct mm_id *mm_idp);
int syscall, unsigned long *args, long expected, struct stub_syscall *syscall_stub_alloc(struct mm_id *mm_idp);
void **addr, int done); void syscall_stub_dump_error(struct mm_id *mm_idp);
extern long syscall_stub_data(struct mm_id * mm_idp,
unsigned long *data, int data_count, int map(struct mm_id *mm_idp, unsigned long virt,
void **addr, void **stub_addr); unsigned long len, int prot, int phys_fd,
extern int map(struct mm_id * mm_idp, unsigned long virt, unsigned long long offset);
unsigned long len, int prot, int phys_fd, int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len);
unsigned long long offset, int done, void **data); int protect(struct mm_id *mm_idp, unsigned long addr,
extern int unmap(struct mm_id * mm_idp, unsigned long addr, unsigned long len, unsigned long len, unsigned int prot);
int done, void **data);
extern int protect(struct mm_id * mm_idp, unsigned long addr,
unsigned long len, unsigned int prot, int done, void **data);
/* skas/process.c */ /* skas/process.c */
extern int is_skas_winch(int pid, int fd, void *data); extern int is_skas_winch(int pid, int fd, void *data);
extern int start_userspace(unsigned long stub_stack); extern int start_userspace(unsigned long stub_stack);
extern int copy_context_skas0(unsigned long stack, int pid);
extern void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs); extern void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs);
extern void new_thread(void *stack, jmp_buf *buf, void (*handler)(void)); extern void new_thread(void *stack, jmp_buf *buf, void (*handler)(void));
extern void switch_threads(jmp_buf *me, jmp_buf *you); extern void switch_threads(jmp_buf *me, jmp_buf *you);

View file

@ -12,7 +12,7 @@ struct mm_id {
int pid; int pid;
} u; } u;
unsigned long stack; unsigned long stack;
int kill; int syscall_data_len;
}; };
void __switch_mm(struct mm_id *mm_idp); void __switch_mm(struct mm_id *mm_idp);

View file

@ -15,5 +15,7 @@ extern void new_thread_handler(void);
extern void handle_syscall(struct uml_pt_regs *regs); extern void handle_syscall(struct uml_pt_regs *regs);
extern long execute_syscall_skas(void *r); extern long execute_syscall_skas(void *r);
extern unsigned long current_stub_stack(void); extern unsigned long current_stub_stack(void);
extern struct mm_id *current_mm_id(void);
extern void current_mm_sync(void);
#endif #endif

View file

@ -8,10 +8,42 @@
#ifndef __STUB_DATA_H #ifndef __STUB_DATA_H
#define __STUB_DATA_H #define __STUB_DATA_H
#include <linux/compiler_types.h>
#include <as-layout.h>
#include <sysdep/tls.h>
#define STUB_NEXT_SYSCALL(s) \
((struct stub_syscall *) (((unsigned long) s) + (s)->cmd_len))
enum stub_syscall_type {
STUB_SYSCALL_UNSET = 0,
STUB_SYSCALL_MMAP,
STUB_SYSCALL_MUNMAP,
STUB_SYSCALL_MPROTECT,
};
struct stub_syscall {
struct {
unsigned long addr;
unsigned long length;
unsigned long offset;
int fd;
int prot;
} mem;
enum stub_syscall_type syscall;
};
struct stub_data { struct stub_data {
unsigned long offset; unsigned long offset;
int fd; long err, child_err;
long parent_err, child_err;
int syscall_data_len;
/* 128 leaves enough room for additional fields in the struct */
struct stub_syscall syscall_data[(UM_KERN_PAGE_SIZE - 128) / sizeof(struct stub_syscall)] __aligned(16);
/* Stack for our signal handlers and for calling into . */
unsigned char sigstack[UM_KERN_PAGE_SIZE] __aligned(UM_KERN_PAGE_SIZE);
}; };
#endif #endif

View file

@ -15,8 +15,17 @@ enum time_travel_mode {
#if defined(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT) || \ #if defined(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT) || \
defined(CONFIG_UML_TIME_TRAVEL_SUPPORT) defined(CONFIG_UML_TIME_TRAVEL_SUPPORT)
extern enum time_travel_mode time_travel_mode; extern enum time_travel_mode time_travel_mode;
extern int time_travel_should_print_bc_msg;
#else #else
#define time_travel_mode TT_MODE_OFF #define time_travel_mode TT_MODE_OFF
#define time_travel_should_print_bc_msg 0
#endif /* (UML_)CONFIG_UML_TIME_TRAVEL_SUPPORT */ #endif /* (UML_)CONFIG_UML_TIME_TRAVEL_SUPPORT */
void _time_travel_print_bc_msg(void);
static inline void time_travel_print_bc_msg(void)
{
if (time_travel_should_print_bc_msg)
_time_travel_print_bc_msg();
}
#endif /* _UM_TIME_TRAVEL_H_ */ #endif /* _UM_TIME_TRAVEL_H_ */

View file

@ -42,11 +42,19 @@ extern void panic(const char *fmt, ...)
#define printk(...) _printk(__VA_ARGS__) #define printk(...) _printk(__VA_ARGS__)
extern int _printk(const char *fmt, ...) extern int _printk(const char *fmt, ...)
__attribute__ ((format (printf, 1, 2))); __attribute__ ((format (printf, 1, 2)));
extern void print_hex_dump(const char *level, const char *prefix_str,
int prefix_type, int rowsize, int groupsize,
const void *buf, size_t len, _Bool ascii);
#else #else
static inline int printk(const char *fmt, ...) static inline int printk(const char *fmt, ...)
{ {
return 0; return 0;
} }
static inline void print_hex_dump(const char *level, const char *prefix_str,
int prefix_type, int rowsize, int groupsize,
const void *buf, size_t len, _Bool ascii)
{
}
#endif #endif
extern int in_aton(char *str); extern int in_aton(char *str);

View file

@ -22,17 +22,8 @@
void flush_thread(void) void flush_thread(void)
{ {
void *data = NULL;
int ret;
arch_flush_thread(&current->thread.arch); arch_flush_thread(&current->thread.arch);
ret = unmap(&current->mm->context.id, 0, TASK_SIZE, 1, &data);
if (ret) {
printk(KERN_ERR "%s - clearing address space failed, err = %d\n",
__func__, ret);
force_sig(SIGKILL);
}
get_safe_registers(current_pt_regs()->regs.gp, get_safe_registers(current_pt_regs()->regs.gp,
current_pt_regs()->regs.fp); current_pt_regs()->regs.fp);

View file

@ -37,7 +37,7 @@ struct irq_reg {
bool pending; bool pending;
bool wakeup; bool wakeup;
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
bool pending_on_resume; bool pending_event;
void (*timetravel_handler)(int, int, void *, void (*timetravel_handler)(int, int, void *,
struct time_travel_event *); struct time_travel_event *);
struct time_travel_event event; struct time_travel_event event;
@ -56,6 +56,9 @@ static DEFINE_SPINLOCK(irq_lock);
static LIST_HEAD(active_fds); static LIST_HEAD(active_fds);
static DECLARE_BITMAP(irqs_allocated, UM_LAST_SIGNAL_IRQ); static DECLARE_BITMAP(irqs_allocated, UM_LAST_SIGNAL_IRQ);
static bool irqs_suspended; static bool irqs_suspended;
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
static bool irqs_pending;
#endif
static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs) static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs)
{ {
@ -84,9 +87,12 @@ static void irq_event_handler(struct time_travel_event *ev)
{ {
struct irq_reg *reg = container_of(ev, struct irq_reg, event); struct irq_reg *reg = container_of(ev, struct irq_reg, event);
/* do nothing if suspended - just to cause a wakeup */ /* do nothing if suspended; just cause a wakeup and mark as pending */
if (irqs_suspended) if (irqs_suspended) {
irqs_pending = true;
reg->pending_event = true;
return; return;
}
generic_handle_irq(reg->irq); generic_handle_irq(reg->irq);
} }
@ -110,16 +116,47 @@ static bool irq_do_timetravel_handler(struct irq_entry *entry,
if (!reg->event.pending) if (!reg->event.pending)
return false; return false;
if (irqs_suspended)
reg->pending_on_resume = true;
return true; return true;
} }
static void irq_do_pending_events(bool timetravel_handlers_only)
{
struct irq_entry *entry;
if (!irqs_pending || timetravel_handlers_only)
return;
irqs_pending = false;
list_for_each_entry(entry, &active_fds, list) {
enum um_irq_type t;
for (t = 0; t < NUM_IRQ_TYPES; t++) {
struct irq_reg *reg = &entry->reg[t];
/*
* Any timetravel_handler was invoked already, just
* directly run the IRQ.
*/
if (reg->pending_event) {
irq_enter();
generic_handle_irq(reg->irq);
irq_exit();
reg->pending_event = false;
}
}
}
}
#else #else
static bool irq_do_timetravel_handler(struct irq_entry *entry, static bool irq_do_timetravel_handler(struct irq_entry *entry,
enum um_irq_type t) enum um_irq_type t)
{ {
return false; return false;
} }
static void irq_do_pending_events(bool timetravel_handlers_only)
{
}
#endif #endif
static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type t, static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type t,
@ -145,6 +182,8 @@ static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type
*/ */
if (timetravel_handlers_only) { if (timetravel_handlers_only) {
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
reg->pending_event = true;
irqs_pending = true;
mark_sigio_pending(); mark_sigio_pending();
#endif #endif
return; return;
@ -162,6 +201,10 @@ static void _sigio_handler(struct uml_pt_regs *regs,
if (timetravel_handlers_only && !um_irq_timetravel_handler_used()) if (timetravel_handlers_only && !um_irq_timetravel_handler_used())
return; return;
/* Flush out pending events that were ignored due to time-travel. */
if (!irqs_suspended)
irq_do_pending_events(timetravel_handlers_only);
while (1) { while (1) {
/* This is now lockless - epoll keeps back-referencesto the irqs /* This is now lockless - epoll keeps back-referencesto the irqs
* which have trigger it so there is no need to walk the irq * which have trigger it so there is no need to walk the irq
@ -195,7 +238,9 @@ static void _sigio_handler(struct uml_pt_regs *regs,
void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
{ {
preempt_disable();
_sigio_handler(regs, irqs_suspended); _sigio_handler(regs, irqs_suspended);
preempt_enable();
} }
static struct irq_entry *get_irq_entry_by_fd(int fd) static struct irq_entry *get_irq_entry_by_fd(int fd)
@ -543,30 +588,7 @@ void um_irqs_resume(void)
unsigned long flags; unsigned long flags;
local_irq_save(flags); spin_lock_irqsave(&irq_lock, flags);
#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
/*
* We don't need to lock anything here since we're in resume
* and nothing else is running, but have disabled IRQs so we
* don't try anything else with the interrupt list from there.
*/
list_for_each_entry(entry, &active_fds, list) {
enum um_irq_type t;
for (t = 0; t < NUM_IRQ_TYPES; t++) {
struct irq_reg *reg = &entry->reg[t];
if (reg->pending_on_resume) {
irq_enter();
generic_handle_irq(reg->irq);
irq_exit();
reg->pending_on_resume = false;
}
}
}
#endif
spin_lock(&irq_lock);
list_for_each_entry(entry, &active_fds, list) { list_for_each_entry(entry, &active_fds, list) {
if (entry->suspended) { if (entry->suspended) {
int err = os_set_fd_async(entry->fd); int err = os_set_fd_async(entry->fd);

View file

@ -33,7 +33,7 @@ EXPORT_SYMBOL(os_shutdown_socket);
EXPORT_SYMBOL(os_create_unix_socket); EXPORT_SYMBOL(os_create_unix_socket);
EXPORT_SYMBOL(os_connect_socket); EXPORT_SYMBOL(os_connect_socket);
EXPORT_SYMBOL(os_accept_connection); EXPORT_SYMBOL(os_accept_connection);
EXPORT_SYMBOL(os_rcv_fd); EXPORT_SYMBOL(os_rcv_fd_msg);
EXPORT_SYMBOL(run_helper); EXPORT_SYMBOL(run_helper);
EXPORT_SYMBOL(os_major); EXPORT_SYMBOL(os_major);
EXPORT_SYMBOL(os_minor); EXPORT_SYMBOL(os_minor);

View file

@ -73,7 +73,6 @@ void __init mem_init(void)
/* this will put all low memory onto the freelists */ /* this will put all low memory onto the freelists */
memblock_free_all(); memblock_free_all();
max_low_pfn = totalram_pages();
max_pfn = max_low_pfn; max_pfn = max_low_pfn;
kmalloc_ok = 1; kmalloc_ok = 1;
} }

View file

@ -122,8 +122,6 @@ void new_thread_handler(void)
/* Called magically, see new_thread_handler above */ /* Called magically, see new_thread_handler above */
static void fork_handler(void) static void fork_handler(void)
{ {
force_flush_all();
schedule_tail(current->thread.prev_sched); schedule_tail(current->thread.prev_sched);
/* /*
@ -237,73 +235,6 @@ int copy_from_user_proc(void *to, void __user *from, int size)
return copy_from_user(to, from, size); return copy_from_user(to, from, size);
} }
static atomic_t using_sysemu = ATOMIC_INIT(0);
int sysemu_supported;
static void set_using_sysemu(int value)
{
if (value > sysemu_supported)
return;
atomic_set(&using_sysemu, value);
}
static int get_using_sysemu(void)
{
return atomic_read(&using_sysemu);
}
static int sysemu_proc_show(struct seq_file *m, void *v)
{
seq_printf(m, "%d\n", get_using_sysemu());
return 0;
}
static int sysemu_proc_open(struct inode *inode, struct file *file)
{
return single_open(file, sysemu_proc_show, NULL);
}
static ssize_t sysemu_proc_write(struct file *file, const char __user *buf,
size_t count, loff_t *pos)
{
char tmp[2];
if (copy_from_user(tmp, buf, 1))
return -EFAULT;
if (tmp[0] >= '0' && tmp[0] <= '2')
set_using_sysemu(tmp[0] - '0');
/* We use the first char, but pretend to write everything */
return count;
}
static const struct proc_ops sysemu_proc_ops = {
.proc_open = sysemu_proc_open,
.proc_read = seq_read,
.proc_lseek = seq_lseek,
.proc_release = single_release,
.proc_write = sysemu_proc_write,
};
static int __init make_proc_sysemu(void)
{
struct proc_dir_entry *ent;
if (!sysemu_supported)
return 0;
ent = proc_create("sysemu", 0600, NULL, &sysemu_proc_ops);
if (ent == NULL)
{
printk(KERN_WARNING "Failed to register /proc/sysemu\n");
return 0;
}
return 0;
}
late_initcall(make_proc_sysemu);
int singlestepping(void) int singlestepping(void)
{ {
return test_thread_flag(TIF_SINGLESTEP); return test_thread_flag(TIF_SINGLESTEP);

View file

@ -59,3 +59,18 @@ void machine_halt(void)
{ {
machine_power_off(); machine_power_off();
} }
static int sys_power_off_handler(struct sys_off_data *data)
{
machine_power_off();
return 0;
}
static int register_power_off(void)
{
register_sys_off_handler(SYS_OFF_MODE_POWER_OFF,
SYS_OFF_PRIO_DEFAULT,
sys_power_off_handler, NULL);
return 0;
}
__initcall(register_power_off);

View file

@ -3,15 +3,14 @@
# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) # Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
# #
obj-y := clone.o mmu.o process.o syscall.o uaccess.o obj-y := stub.o mmu.o process.o syscall.o uaccess.o
# clone.o is in the stub, so it can't be built with profiling # stub.o is in the stub, so it can't be built with profiling
# GCC hardened also auto-enables -fpic, but we need %ebx so it can't work -> # GCC hardened also auto-enables -fpic, but we need %ebx so it can't work ->
# disable it # disable it
CFLAGS_clone.o := $(CFLAGS_NO_HARDENING) CFLAGS_stub.o := $(CFLAGS_NO_HARDENING)
UNPROFILE_OBJS := clone.o UNPROFILE_OBJS := stub.o
KCOV_INSTRUMENT := n KCOV_INSTRUMENT := n
include $(srctree)/arch/um/scripts/Makefile.rules include $(srctree)/arch/um/scripts/Makefile.rules

View file

@ -1,48 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
* Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
*/
#include <signal.h>
#include <sched.h>
#include <asm/unistd.h>
#include <sys/time.h>
#include <as-layout.h>
#include <ptrace_user.h>
#include <stub-data.h>
#include <sysdep/stub.h>
/*
* This is in a separate file because it needs to be compiled with any
* extraneous gcc flags (-pg, -fprofile-arcs, -ftest-coverage) disabled
*
* Use UM_KERN_PAGE_SIZE instead of PAGE_SIZE because that calls getpagesize
* on some systems.
*/
void __attribute__ ((__section__ (".__syscall_stub")))
stub_clone_handler(void)
{
struct stub_data *data = get_stub_data();
long err;
err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD,
(unsigned long)data +
STUB_DATA_PAGES * UM_KERN_PAGE_SIZE / 2);
if (err) {
data->parent_err = err;
goto done;
}
err = stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0);
if (err) {
data->child_err = err;
goto done;
}
remap_stack_and_trap();
done:
trap_myself();
}

View file

@ -14,11 +14,14 @@
#include <as-layout.h> #include <as-layout.h>
#include <os.h> #include <os.h>
#include <skas.h> #include <skas.h>
#include <stub-data.h>
/* Ensure the stub_data struct covers the allocated area */
static_assert(sizeof(struct stub_data) == STUB_DATA_PAGES * UM_KERN_PAGE_SIZE);
int init_new_context(struct task_struct *task, struct mm_struct *mm) int init_new_context(struct task_struct *task, struct mm_struct *mm)
{ {
struct mm_context *from_mm = NULL; struct mm_id *new_id = &mm->context.id;
struct mm_context *to_mm = &mm->context;
unsigned long stack = 0; unsigned long stack = 0;
int ret = -ENOMEM; int ret = -ENOMEM;
@ -26,34 +29,46 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm)
if (stack == 0) if (stack == 0)
goto out; goto out;
to_mm->id.stack = stack; new_id->stack = stack;
if (current->mm != NULL && current->mm != &init_mm)
from_mm = &current->mm->context;
block_signals_trace(); block_signals_trace();
if (from_mm) new_id->u.pid = start_userspace(stack);
to_mm->id.u.pid = copy_context_skas0(stack,
from_mm->id.u.pid);
else to_mm->id.u.pid = start_userspace(stack);
unblock_signals_trace(); unblock_signals_trace();
if (to_mm->id.u.pid < 0) { if (new_id->u.pid < 0) {
ret = to_mm->id.u.pid; ret = new_id->u.pid;
goto out_free; goto out_free;
} }
ret = init_new_ldt(to_mm, from_mm); /*
if (ret < 0) { * Ensure the new MM is clean and nothing unwanted is mapped.
printk(KERN_ERR "init_new_context_skas - init_ldt" *
" failed, errno = %d\n", ret); * TODO: We should clear the memory up to STUB_START to ensure there is
goto out_free; * nothing mapped there, i.e. we (currently) have:
} *
* |- user memory -|- unused -|- stub -|- unused -|
* ^ TASK_SIZE ^ STUB_START
*
* Meaning we have two unused areas where we may still have valid
* mappings from our internal clone(). That isn't really a problem as
* userspace is not going to access them, but it is definitely not
* correct.
*
* However, we are "lucky" and if rseq is configured, then on 32 bit
* it will fall into the first empty range while on 64 bit it is going
* to use an anonymous mapping in the second range. As such, things
* continue to work for now as long as we don't start unmapping these
* areas.
*
* Change this to STUB_START once we have a clean userspace.
*/
unmap(new_id, 0, TASK_SIZE);
return 0; return 0;
out_free: out_free:
if (to_mm->id.stack != 0) if (new_id->stack != 0)
free_pages(to_mm->id.stack, ilog2(STUB_DATA_PAGES)); free_pages(new_id->stack, ilog2(STUB_DATA_PAGES));
out: out:
return ret; return ret;
} }
@ -76,5 +91,4 @@ void destroy_context(struct mm_struct *mm)
os_kill_ptraced_process(mmu->id.u.pid, 1); os_kill_ptraced_process(mmu->id.u.pid, 1);
free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES)); free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES));
free_ldt(mmu);
} }

View file

@ -8,6 +8,8 @@
#include <linux/sched/task_stack.h> #include <linux/sched/task_stack.h>
#include <linux/sched/task.h> #include <linux/sched/task.h>
#include <asm/tlbflush.h>
#include <as-layout.h> #include <as-layout.h>
#include <kern.h> #include <kern.h>
#include <os.h> #include <os.h>
@ -50,3 +52,19 @@ unsigned long current_stub_stack(void)
return current->mm->context.id.stack; return current->mm->context.id.stack;
} }
struct mm_id *current_mm_id(void)
{
if (current->mm == NULL)
return NULL;
return &current->mm->context.id;
}
void current_mm_sync(void)
{
if (current->mm == NULL)
return;
um_tlb_sync(current->mm);
}

View file

@ -0,0 +1,69 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net>
*/
#include <sysdep/stub.h>
static __always_inline int syscall_handler(struct stub_data *d)
{
int i;
unsigned long res;
for (i = 0; i < d->syscall_data_len; i++) {
struct stub_syscall *sc = &d->syscall_data[i];
switch (sc->syscall) {
case STUB_SYSCALL_MMAP:
res = stub_syscall6(STUB_MMAP_NR,
sc->mem.addr, sc->mem.length,
sc->mem.prot,
MAP_SHARED | MAP_FIXED,
sc->mem.fd, sc->mem.offset);
if (res != sc->mem.addr) {
d->err = res;
d->syscall_data_len = i;
return -1;
}
break;
case STUB_SYSCALL_MUNMAP:
res = stub_syscall2(__NR_munmap,
sc->mem.addr, sc->mem.length);
if (res) {
d->err = res;
d->syscall_data_len = i;
return -1;
}
break;
case STUB_SYSCALL_MPROTECT:
res = stub_syscall3(__NR_mprotect,
sc->mem.addr, sc->mem.length,
sc->mem.prot);
if (res) {
d->err = res;
d->syscall_data_len = i;
return -1;
}
break;
default:
d->err = -95; /* EOPNOTSUPP */
d->syscall_data_len = i;
return -1;
}
}
d->err = 0;
d->syscall_data_len = 0;
return 0;
}
void __section(".__syscall_stub")
stub_syscall_handler(void)
{
struct stub_data *d = get_stub_data();
syscall_handler(d);
trap_myself();
}

View file

@ -31,6 +31,7 @@ EXPORT_SYMBOL_GPL(time_travel_mode);
static bool time_travel_start_set; static bool time_travel_start_set;
static unsigned long long time_travel_start; static unsigned long long time_travel_start;
static unsigned long long time_travel_time; static unsigned long long time_travel_time;
static unsigned long long time_travel_shm_offset;
static LIST_HEAD(time_travel_events); static LIST_HEAD(time_travel_events);
static LIST_HEAD(time_travel_irqs); static LIST_HEAD(time_travel_irqs);
static unsigned long long time_travel_timer_interval; static unsigned long long time_travel_timer_interval;
@ -40,8 +41,11 @@ static int time_travel_ext_fd = -1;
static unsigned int time_travel_ext_waiting; static unsigned int time_travel_ext_waiting;
static bool time_travel_ext_prev_request_valid; static bool time_travel_ext_prev_request_valid;
static unsigned long long time_travel_ext_prev_request; static unsigned long long time_travel_ext_prev_request;
static bool time_travel_ext_free_until_valid; static unsigned long long *time_travel_ext_free_until;
static unsigned long long time_travel_ext_free_until; static unsigned long long _time_travel_ext_free_until;
static u16 time_travel_shm_id;
static struct um_timetravel_schedshm *time_travel_shm;
static union um_timetravel_schedshm_client *time_travel_shm_client;
static void time_travel_set_time(unsigned long long ns) static void time_travel_set_time(unsigned long long ns)
{ {
@ -58,8 +62,52 @@ enum time_travel_message_handling {
TTMH_IDLE, TTMH_IDLE,
TTMH_POLL, TTMH_POLL,
TTMH_READ, TTMH_READ,
TTMH_READ_START_ACK,
}; };
static u64 bc_message;
int time_travel_should_print_bc_msg;
void _time_travel_print_bc_msg(void)
{
time_travel_should_print_bc_msg = 0;
printk(KERN_INFO "time-travel: received broadcast 0x%llx\n", bc_message);
}
static void time_travel_setup_shm(int fd, u16 id)
{
u32 len;
time_travel_shm = os_mmap_rw_shared(fd, sizeof(*time_travel_shm));
if (!time_travel_shm)
goto out;
len = time_travel_shm->len;
if (time_travel_shm->version != UM_TIMETRAVEL_SCHEDSHM_VERSION ||
len < struct_size(time_travel_shm, clients, id + 1)) {
os_unmap_memory(time_travel_shm, sizeof(*time_travel_shm));
time_travel_shm = NULL;
goto out;
}
time_travel_shm = os_mremap_rw_shared(time_travel_shm,
sizeof(*time_travel_shm),
len);
if (!time_travel_shm)
goto out;
time_travel_shm_offset = time_travel_shm->current_time;
time_travel_shm_client = &time_travel_shm->clients[id];
time_travel_shm_client->capa |= UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE;
time_travel_shm_id = id;
/* always look at that free_until from now on */
time_travel_ext_free_until = &time_travel_shm->free_until;
out:
os_close_file(fd);
}
static void time_travel_handle_message(struct um_timetravel_msg *msg, static void time_travel_handle_message(struct um_timetravel_msg *msg,
enum time_travel_message_handling mode) enum time_travel_message_handling mode)
{ {
@ -80,7 +128,20 @@ static void time_travel_handle_message(struct um_timetravel_msg *msg,
} }
} }
ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg)); if (unlikely(mode == TTMH_READ_START_ACK)) {
int fd[UM_TIMETRAVEL_SHARED_MAX_FDS];
ret = os_rcv_fd_msg(time_travel_ext_fd, fd,
ARRAY_SIZE(fd), msg, sizeof(*msg));
if (ret == sizeof(*msg)) {
time_travel_setup_shm(fd[UM_TIMETRAVEL_SHARED_MEMFD],
msg->time & UM_TIMETRAVEL_START_ACK_ID);
/* we don't use the logging for now */
os_close_file(fd[UM_TIMETRAVEL_SHARED_LOGFD]);
}
} else {
ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg));
}
if (ret == 0) if (ret == 0)
panic("time-travel external link is broken\n"); panic("time-travel external link is broken\n");
@ -96,10 +157,24 @@ static void time_travel_handle_message(struct um_timetravel_msg *msg,
return; return;
case UM_TIMETRAVEL_RUN: case UM_TIMETRAVEL_RUN:
time_travel_set_time(msg->time); time_travel_set_time(msg->time);
if (time_travel_shm) {
/* no request right now since we're running */
time_travel_shm_client->flags &=
~UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN;
/* no ack for shared memory RUN */
return;
}
break; break;
case UM_TIMETRAVEL_FREE_UNTIL: case UM_TIMETRAVEL_FREE_UNTIL:
time_travel_ext_free_until_valid = true; /* not supposed to get this with shm, but ignore it */
time_travel_ext_free_until = msg->time; if (time_travel_shm)
break;
time_travel_ext_free_until = &_time_travel_ext_free_until;
_time_travel_ext_free_until = msg->time;
break;
case UM_TIMETRAVEL_BROADCAST:
bc_message = msg->time;
time_travel_should_print_bc_msg = 1;
break; break;
} }
@ -136,8 +211,15 @@ static u64 time_travel_ext_req(u32 op, u64 time)
block_signals_hard(); block_signals_hard();
os_write_file(time_travel_ext_fd, &msg, sizeof(msg)); os_write_file(time_travel_ext_fd, &msg, sizeof(msg));
/* no ACK expected for WAIT in shared memory mode */
if (msg.op == UM_TIMETRAVEL_WAIT && time_travel_shm)
goto done;
while (msg.op != UM_TIMETRAVEL_ACK) while (msg.op != UM_TIMETRAVEL_ACK)
time_travel_handle_message(&msg, TTMH_READ); time_travel_handle_message(&msg,
op == UM_TIMETRAVEL_START ?
TTMH_READ_START_ACK :
TTMH_READ);
if (msg.seq != mseq) if (msg.seq != mseq)
panic("time-travel: ACK message has different seqno! op=%d, seq=%d != %d time=%lld\n", panic("time-travel: ACK message has different seqno! op=%d, seq=%d != %d time=%lld\n",
@ -145,6 +227,7 @@ static u64 time_travel_ext_req(u32 op, u64 time)
if (op == UM_TIMETRAVEL_GET) if (op == UM_TIMETRAVEL_GET)
time_travel_set_time(msg.time); time_travel_set_time(msg.time);
done:
unblock_signals_hard(); unblock_signals_hard();
return msg.time; return msg.time;
@ -180,13 +263,33 @@ static void time_travel_ext_update_request(unsigned long long time)
/* /*
* if we're running and are allowed to run past the request * if we're running and are allowed to run past the request
* then we don't need to update it either * then we don't need to update it either
*
* Note for shm we ignore FREE_UNTIL messages and leave the pointer
* to shared memory, and for non-shm the offset is 0.
*/ */
if (!time_travel_ext_waiting && time_travel_ext_free_until_valid && if (!time_travel_ext_waiting && time_travel_ext_free_until &&
time < time_travel_ext_free_until) time < (*time_travel_ext_free_until - time_travel_shm_offset))
return; return;
time_travel_ext_prev_request = time; time_travel_ext_prev_request = time;
time_travel_ext_prev_request_valid = true; time_travel_ext_prev_request_valid = true;
if (time_travel_shm) {
union um_timetravel_schedshm_client *running;
running = &time_travel_shm->clients[time_travel_shm->running_id];
if (running->capa & UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE) {
time_travel_shm_client->flags |=
UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN;
time += time_travel_shm_offset;
time_travel_shm_client->req_time = time;
if (time < time_travel_shm->free_until)
time_travel_shm->free_until = time;
return;
}
}
time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time); time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time);
} }
@ -194,6 +297,14 @@ void __time_travel_propagate_time(void)
{ {
static unsigned long long last_propagated; static unsigned long long last_propagated;
if (time_travel_shm) {
if (time_travel_shm->running_id != time_travel_shm_id)
panic("time-travel: setting time while not running\n");
time_travel_shm->current_time = time_travel_time +
time_travel_shm_offset;
return;
}
if (last_propagated == time_travel_time) if (last_propagated == time_travel_time)
return; return;
@ -209,9 +320,12 @@ static bool time_travel_ext_request(unsigned long long time)
* If we received an external sync point ("free until") then we * If we received an external sync point ("free until") then we
* don't have to request/wait for anything until then, unless * don't have to request/wait for anything until then, unless
* we're already waiting. * we're already waiting.
*
* Note for shm we ignore FREE_UNTIL messages and leave the pointer
* to shared memory, and for non-shm the offset is 0.
*/ */
if (!time_travel_ext_waiting && time_travel_ext_free_until_valid && if (!time_travel_ext_waiting && time_travel_ext_free_until &&
time < time_travel_ext_free_until) time < (*time_travel_ext_free_until - time_travel_shm_offset))
return false; return false;
time_travel_ext_update_request(time); time_travel_ext_update_request(time);
@ -225,7 +339,8 @@ static void time_travel_ext_wait(bool idle)
}; };
time_travel_ext_prev_request_valid = false; time_travel_ext_prev_request_valid = false;
time_travel_ext_free_until_valid = false; if (!time_travel_shm)
time_travel_ext_free_until = NULL;
time_travel_ext_waiting++; time_travel_ext_waiting++;
time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1); time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1);
@ -248,7 +363,11 @@ static void time_travel_ext_wait(bool idle)
static void time_travel_ext_get_time(void) static void time_travel_ext_get_time(void)
{ {
time_travel_ext_req(UM_TIMETRAVEL_GET, -1); if (time_travel_shm)
time_travel_set_time(time_travel_shm->current_time -
time_travel_shm_offset);
else
time_travel_ext_req(UM_TIMETRAVEL_GET, -1);
} }
static void __time_travel_update_time(unsigned long long ns, bool idle) static void __time_travel_update_time(unsigned long long ns, bool idle)
@ -875,9 +994,49 @@ static int setup_time_travel_start(char *str)
return 1; return 1;
} }
__setup("time-travel-start", setup_time_travel_start); __setup("time-travel-start=", setup_time_travel_start);
__uml_help(setup_time_travel_start, __uml_help(setup_time_travel_start,
"time-travel-start=<seconds>\n" "time-travel-start=<nanoseconds>\n"
"Configure the UML instance's wall clock to start at this value rather than\n" "Configure the UML instance's wall clock to start at this value rather than\n"
"the host's wall clock at the time of UML boot.\n"); "the host's wall clock at the time of UML boot.\n");
static struct kobject *bc_time_kobject;
static ssize_t bc_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
return sprintf(buf, "0x%llx", bc_message);
}
static ssize_t bc_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count)
{
int ret;
u64 user_bc_message;
ret = kstrtou64(buf, 0, &user_bc_message);
if (ret)
return ret;
bc_message = user_bc_message;
time_travel_ext_req(UM_TIMETRAVEL_BROADCAST, bc_message);
pr_info("um: time: sent broadcast message: 0x%llx\n", bc_message);
return count;
}
static struct kobj_attribute bc_attribute = __ATTR(bc-message, 0660, bc_show, bc_store);
static int __init um_bc_start(void)
{
if (time_travel_mode != TT_MODE_EXTERNAL)
return 0;
bc_time_kobject = kobject_create_and_add("um-ext-time", kernel_kobj);
if (!bc_time_kobject)
return 0;
if (sysfs_create_file(bc_time_kobject, &bc_attribute.attr))
pr_debug("failed to create the bc file in /sys/kernel/um_time");
return 0;
}
late_initcall(um_bc_start);
#endif #endif

View file

@ -15,209 +15,54 @@
#include <skas.h> #include <skas.h>
#include <kern_util.h> #include <kern_util.h>
struct host_vm_change { struct vm_ops {
struct host_vm_op { struct mm_id *mm_idp;
enum { NONE, MMAP, MUNMAP, MPROTECT } type;
union { int (*mmap)(struct mm_id *mm_idp,
struct { unsigned long virt, unsigned long len, int prot,
unsigned long addr; int phys_fd, unsigned long long offset);
unsigned long len; int (*unmap)(struct mm_id *mm_idp,
unsigned int prot; unsigned long virt, unsigned long len);
int fd; int (*mprotect)(struct mm_id *mm_idp,
__u64 offset; unsigned long virt, unsigned long len,
} mmap; unsigned int prot);
struct {
unsigned long addr;
unsigned long len;
} munmap;
struct {
unsigned long addr;
unsigned long len;
unsigned int prot;
} mprotect;
} u;
} ops[1];
int userspace;
int index;
struct mm_struct *mm;
void *data;
int force;
}; };
#define INIT_HVC(mm, force, userspace) \ static int kern_map(struct mm_id *mm_idp,
((struct host_vm_change) \ unsigned long virt, unsigned long len, int prot,
{ .ops = { { .type = NONE } }, \ int phys_fd, unsigned long long offset)
.mm = mm, \ {
.data = NULL, \ /* TODO: Why is executable needed to be always set in the kernel? */
.userspace = userspace, \ return os_map_memory((void *)virt, phys_fd, offset, len,
.index = 0, \ prot & UM_PROT_READ, prot & UM_PROT_WRITE,
.force = force }) 1);
}
static void report_enomem(void) static int kern_unmap(struct mm_id *mm_idp,
unsigned long virt, unsigned long len)
{
return os_unmap_memory((void *)virt, len);
}
static int kern_mprotect(struct mm_id *mm_idp,
unsigned long virt, unsigned long len,
unsigned int prot)
{
return os_protect_memory((void *)virt, len,
prot & UM_PROT_READ, prot & UM_PROT_WRITE,
1);
}
void report_enomem(void)
{ {
printk(KERN_ERR "UML ran out of memory on the host side! " printk(KERN_ERR "UML ran out of memory on the host side! "
"This can happen due to a memory limitation or " "This can happen due to a memory limitation or "
"vm.max_map_count has been reached.\n"); "vm.max_map_count has been reached.\n");
} }
static int do_ops(struct host_vm_change *hvc, int end,
int finished)
{
struct host_vm_op *op;
int i, ret = 0;
for (i = 0; i < end && !ret; i++) {
op = &hvc->ops[i];
switch (op->type) {
case MMAP:
if (hvc->userspace)
ret = map(&hvc->mm->context.id, op->u.mmap.addr,
op->u.mmap.len, op->u.mmap.prot,
op->u.mmap.fd,
op->u.mmap.offset, finished,
&hvc->data);
else
map_memory(op->u.mmap.addr, op->u.mmap.offset,
op->u.mmap.len, 1, 1, 1);
break;
case MUNMAP:
if (hvc->userspace)
ret = unmap(&hvc->mm->context.id,
op->u.munmap.addr,
op->u.munmap.len, finished,
&hvc->data);
else
ret = os_unmap_memory(
(void *) op->u.munmap.addr,
op->u.munmap.len);
break;
case MPROTECT:
if (hvc->userspace)
ret = protect(&hvc->mm->context.id,
op->u.mprotect.addr,
op->u.mprotect.len,
op->u.mprotect.prot,
finished, &hvc->data);
else
ret = os_protect_memory(
(void *) op->u.mprotect.addr,
op->u.mprotect.len,
1, 1, 1);
break;
default:
printk(KERN_ERR "Unknown op type %d in do_ops\n",
op->type);
BUG();
break;
}
}
if (ret == -ENOMEM)
report_enomem();
return ret;
}
static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
unsigned int prot, struct host_vm_change *hvc)
{
__u64 offset;
struct host_vm_op *last;
int fd = -1, ret = 0;
if (hvc->userspace)
fd = phys_mapping(phys, &offset);
else
offset = phys;
if (hvc->index != 0) {
last = &hvc->ops[hvc->index - 1];
if ((last->type == MMAP) &&
(last->u.mmap.addr + last->u.mmap.len == virt) &&
(last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) &&
(last->u.mmap.offset + last->u.mmap.len == offset)) {
last->u.mmap.len += len;
return 0;
}
}
if (hvc->index == ARRAY_SIZE(hvc->ops)) {
ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
hvc->index = 0;
}
hvc->ops[hvc->index++] = ((struct host_vm_op)
{ .type = MMAP,
.u = { .mmap = { .addr = virt,
.len = len,
.prot = prot,
.fd = fd,
.offset = offset }
} });
return ret;
}
static int add_munmap(unsigned long addr, unsigned long len,
struct host_vm_change *hvc)
{
struct host_vm_op *last;
int ret = 0;
if (hvc->index != 0) {
last = &hvc->ops[hvc->index - 1];
if ((last->type == MUNMAP) &&
(last->u.munmap.addr + last->u.mmap.len == addr)) {
last->u.munmap.len += len;
return 0;
}
}
if (hvc->index == ARRAY_SIZE(hvc->ops)) {
ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
hvc->index = 0;
}
hvc->ops[hvc->index++] = ((struct host_vm_op)
{ .type = MUNMAP,
.u = { .munmap = { .addr = addr,
.len = len } } });
return ret;
}
static int add_mprotect(unsigned long addr, unsigned long len,
unsigned int prot, struct host_vm_change *hvc)
{
struct host_vm_op *last;
int ret = 0;
if (hvc->index != 0) {
last = &hvc->ops[hvc->index - 1];
if ((last->type == MPROTECT) &&
(last->u.mprotect.addr + last->u.mprotect.len == addr) &&
(last->u.mprotect.prot == prot)) {
last->u.mprotect.len += len;
return 0;
}
}
if (hvc->index == ARRAY_SIZE(hvc->ops)) {
ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
hvc->index = 0;
}
hvc->ops[hvc->index++] = ((struct host_vm_op)
{ .type = MPROTECT,
.u = { .mprotect = { .addr = addr,
.len = len,
.prot = prot } } });
return ret;
}
#define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1))
static inline int update_pte_range(pmd_t *pmd, unsigned long addr, static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
unsigned long end, unsigned long end,
struct host_vm_change *hvc) struct vm_ops *ops)
{ {
pte_t *pte; pte_t *pte;
int r, w, x, prot, ret = 0; int r, w, x, prot, ret = 0;
@ -235,15 +80,22 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) | prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
(x ? UM_PROT_EXEC : 0)); (x ? UM_PROT_EXEC : 0));
if (hvc->force || pte_newpage(*pte)) { if (pte_newpage(*pte)) {
if (pte_present(*pte)) { if (pte_present(*pte)) {
if (pte_newpage(*pte)) if (pte_newpage(*pte)) {
ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK, __u64 offset;
PAGE_SIZE, prot, hvc); unsigned long phys =
pte_val(*pte) & PAGE_MASK;
int fd = phys_mapping(phys, &offset);
ret = ops->mmap(ops->mm_idp, addr,
PAGE_SIZE, prot, fd,
offset);
}
} else } else
ret = add_munmap(addr, PAGE_SIZE, hvc); ret = ops->unmap(ops->mm_idp, addr, PAGE_SIZE);
} else if (pte_newprot(*pte)) } else if (pte_newprot(*pte))
ret = add_mprotect(addr, PAGE_SIZE, prot, hvc); ret = ops->mprotect(ops->mm_idp, addr, PAGE_SIZE, prot);
*pte = pte_mkuptodate(*pte); *pte = pte_mkuptodate(*pte);
} while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret)); } while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret));
return ret; return ret;
@ -251,7 +103,7 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
static inline int update_pmd_range(pud_t *pud, unsigned long addr, static inline int update_pmd_range(pud_t *pud, unsigned long addr,
unsigned long end, unsigned long end,
struct host_vm_change *hvc) struct vm_ops *ops)
{ {
pmd_t *pmd; pmd_t *pmd;
unsigned long next; unsigned long next;
@ -261,19 +113,20 @@ static inline int update_pmd_range(pud_t *pud, unsigned long addr,
do { do {
next = pmd_addr_end(addr, end); next = pmd_addr_end(addr, end);
if (!pmd_present(*pmd)) { if (!pmd_present(*pmd)) {
if (hvc->force || pmd_newpage(*pmd)) { if (pmd_newpage(*pmd)) {
ret = add_munmap(addr, next - addr, hvc); ret = ops->unmap(ops->mm_idp, addr,
next - addr);
pmd_mkuptodate(*pmd); pmd_mkuptodate(*pmd);
} }
} }
else ret = update_pte_range(pmd, addr, next, hvc); else ret = update_pte_range(pmd, addr, next, ops);
} while (pmd++, addr = next, ((addr < end) && !ret)); } while (pmd++, addr = next, ((addr < end) && !ret));
return ret; return ret;
} }
static inline int update_pud_range(p4d_t *p4d, unsigned long addr, static inline int update_pud_range(p4d_t *p4d, unsigned long addr,
unsigned long end, unsigned long end,
struct host_vm_change *hvc) struct vm_ops *ops)
{ {
pud_t *pud; pud_t *pud;
unsigned long next; unsigned long next;
@ -283,19 +136,20 @@ static inline int update_pud_range(p4d_t *p4d, unsigned long addr,
do { do {
next = pud_addr_end(addr, end); next = pud_addr_end(addr, end);
if (!pud_present(*pud)) { if (!pud_present(*pud)) {
if (hvc->force || pud_newpage(*pud)) { if (pud_newpage(*pud)) {
ret = add_munmap(addr, next - addr, hvc); ret = ops->unmap(ops->mm_idp, addr,
next - addr);
pud_mkuptodate(*pud); pud_mkuptodate(*pud);
} }
} }
else ret = update_pmd_range(pud, addr, next, hvc); else ret = update_pmd_range(pud, addr, next, ops);
} while (pud++, addr = next, ((addr < end) && !ret)); } while (pud++, addr = next, ((addr < end) && !ret));
return ret; return ret;
} }
static inline int update_p4d_range(pgd_t *pgd, unsigned long addr, static inline int update_p4d_range(pgd_t *pgd, unsigned long addr,
unsigned long end, unsigned long end,
struct host_vm_change *hvc) struct vm_ops *ops)
{ {
p4d_t *p4d; p4d_t *p4d;
unsigned long next; unsigned long next;
@ -305,227 +159,59 @@ static inline int update_p4d_range(pgd_t *pgd, unsigned long addr,
do { do {
next = p4d_addr_end(addr, end); next = p4d_addr_end(addr, end);
if (!p4d_present(*p4d)) { if (!p4d_present(*p4d)) {
if (hvc->force || p4d_newpage(*p4d)) { if (p4d_newpage(*p4d)) {
ret = add_munmap(addr, next - addr, hvc); ret = ops->unmap(ops->mm_idp, addr,
next - addr);
p4d_mkuptodate(*p4d); p4d_mkuptodate(*p4d);
} }
} else } else
ret = update_pud_range(p4d, addr, next, hvc); ret = update_pud_range(p4d, addr, next, ops);
} while (p4d++, addr = next, ((addr < end) && !ret)); } while (p4d++, addr = next, ((addr < end) && !ret));
return ret; return ret;
} }
static void fix_range_common(struct mm_struct *mm, unsigned long start_addr, int um_tlb_sync(struct mm_struct *mm)
unsigned long end_addr, int force)
{ {
pgd_t *pgd; pgd_t *pgd;
struct host_vm_change hvc; struct vm_ops ops;
unsigned long addr = start_addr, next; unsigned long addr = mm->context.sync_tlb_range_from, next;
int ret = 0, userspace = 1; int ret = 0;
if (mm->context.sync_tlb_range_to == 0)
return 0;
ops.mm_idp = &mm->context.id;
if (mm == &init_mm) {
ops.mmap = kern_map;
ops.unmap = kern_unmap;
ops.mprotect = kern_mprotect;
} else {
ops.mmap = map;
ops.unmap = unmap;
ops.mprotect = protect;
}
hvc = INIT_HVC(mm, force, userspace);
pgd = pgd_offset(mm, addr); pgd = pgd_offset(mm, addr);
do { do {
next = pgd_addr_end(addr, end_addr); next = pgd_addr_end(addr, mm->context.sync_tlb_range_to);
if (!pgd_present(*pgd)) { if (!pgd_present(*pgd)) {
if (force || pgd_newpage(*pgd)) { if (pgd_newpage(*pgd)) {
ret = add_munmap(addr, next - addr, &hvc); ret = ops.unmap(ops.mm_idp, addr,
next - addr);
pgd_mkuptodate(*pgd); pgd_mkuptodate(*pgd);
} }
} else } else
ret = update_p4d_range(pgd, addr, next, &hvc); ret = update_p4d_range(pgd, addr, next, &ops);
} while (pgd++, addr = next, ((addr < end_addr) && !ret)); } while (pgd++, addr = next,
((addr < mm->context.sync_tlb_range_to) && !ret));
if (!ret) if (ret == -ENOMEM)
ret = do_ops(&hvc, hvc.index, 1); report_enomem();
/* This is not an else because ret is modified above */ mm->context.sync_tlb_range_from = 0;
if (ret) { mm->context.sync_tlb_range_to = 0;
struct mm_id *mm_idp = &current->mm->context.id;
printk(KERN_ERR "fix_range_common: failed, killing current " return ret;
"process: %d\n", task_tgid_vnr(current));
mm_idp->kill = 1;
}
}
static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
{
struct mm_struct *mm;
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
unsigned long addr, last;
int updated = 0, err = 0, force = 0, userspace = 0;
struct host_vm_change hvc;
mm = &init_mm;
hvc = INIT_HVC(mm, force, userspace);
for (addr = start; addr < end;) {
pgd = pgd_offset(mm, addr);
if (!pgd_present(*pgd)) {
last = ADD_ROUND(addr, PGDIR_SIZE);
if (last > end)
last = end;
if (pgd_newpage(*pgd)) {
updated = 1;
err = add_munmap(addr, last - addr, &hvc);
if (err < 0)
panic("munmap failed, errno = %d\n",
-err);
}
addr = last;
continue;
}
p4d = p4d_offset(pgd, addr);
if (!p4d_present(*p4d)) {
last = ADD_ROUND(addr, P4D_SIZE);
if (last > end)
last = end;
if (p4d_newpage(*p4d)) {
updated = 1;
err = add_munmap(addr, last - addr, &hvc);
if (err < 0)
panic("munmap failed, errno = %d\n",
-err);
}
addr = last;
continue;
}
pud = pud_offset(p4d, addr);
if (!pud_present(*pud)) {
last = ADD_ROUND(addr, PUD_SIZE);
if (last > end)
last = end;
if (pud_newpage(*pud)) {
updated = 1;
err = add_munmap(addr, last - addr, &hvc);
if (err < 0)
panic("munmap failed, errno = %d\n",
-err);
}
addr = last;
continue;
}
pmd = pmd_offset(pud, addr);
if (!pmd_present(*pmd)) {
last = ADD_ROUND(addr, PMD_SIZE);
if (last > end)
last = end;
if (pmd_newpage(*pmd)) {
updated = 1;
err = add_munmap(addr, last - addr, &hvc);
if (err < 0)
panic("munmap failed, errno = %d\n",
-err);
}
addr = last;
continue;
}
pte = pte_offset_kernel(pmd, addr);
if (!pte_present(*pte) || pte_newpage(*pte)) {
updated = 1;
err = add_munmap(addr, PAGE_SIZE, &hvc);
if (err < 0)
panic("munmap failed, errno = %d\n",
-err);
if (pte_present(*pte))
err = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
PAGE_SIZE, 0, &hvc);
}
else if (pte_newprot(*pte)) {
updated = 1;
err = add_mprotect(addr, PAGE_SIZE, 0, &hvc);
}
addr += PAGE_SIZE;
}
if (!err)
err = do_ops(&hvc, hvc.index, 1);
if (err < 0)
panic("flush_tlb_kernel failed, errno = %d\n", err);
return updated;
}
void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
{
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
struct mm_struct *mm = vma->vm_mm;
void *flush = NULL;
int r, w, x, prot, err = 0;
struct mm_id *mm_id;
address &= PAGE_MASK;
pgd = pgd_offset(mm, address);
if (!pgd_present(*pgd))
goto kill;
p4d = p4d_offset(pgd, address);
if (!p4d_present(*p4d))
goto kill;
pud = pud_offset(p4d, address);
if (!pud_present(*pud))
goto kill;
pmd = pmd_offset(pud, address);
if (!pmd_present(*pmd))
goto kill;
pte = pte_offset_kernel(pmd, address);
r = pte_read(*pte);
w = pte_write(*pte);
x = pte_exec(*pte);
if (!pte_young(*pte)) {
r = 0;
w = 0;
} else if (!pte_dirty(*pte)) {
w = 0;
}
mm_id = &mm->context.id;
prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
(x ? UM_PROT_EXEC : 0));
if (pte_newpage(*pte)) {
if (pte_present(*pte)) {
unsigned long long offset;
int fd;
fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset);
err = map(mm_id, address, PAGE_SIZE, prot, fd, offset,
1, &flush);
}
else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush);
}
else if (pte_newprot(*pte))
err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush);
if (err) {
if (err == -ENOMEM)
report_enomem();
goto kill;
}
*pte = pte_mkuptodate(*pte);
return;
kill:
printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address);
force_sig(SIGKILL);
} }
void flush_tlb_all(void) void flush_tlb_all(void)
@ -540,60 +226,11 @@ void flush_tlb_all(void)
flush_tlb_mm(current->mm); flush_tlb_mm(current->mm);
} }
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
flush_tlb_kernel_range_common(start, end);
}
void flush_tlb_kernel_vm(void)
{
flush_tlb_kernel_range_common(start_vm, end_vm);
}
void __flush_tlb_one(unsigned long addr)
{
flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE);
}
static void fix_range(struct mm_struct *mm, unsigned long start_addr,
unsigned long end_addr, int force)
{
/*
* Don't bother flushing if this address space is about to be
* destroyed.
*/
if (atomic_read(&mm->mm_users) == 0)
return;
fix_range_common(mm, start_addr, end_addr, force);
}
void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end)
{
if (vma->vm_mm == NULL)
flush_tlb_kernel_range_common(start, end);
else fix_range(vma->vm_mm, start, end, 0);
}
EXPORT_SYMBOL(flush_tlb_range);
void flush_tlb_mm(struct mm_struct *mm) void flush_tlb_mm(struct mm_struct *mm)
{ {
struct vm_area_struct *vma; struct vm_area_struct *vma;
VMA_ITERATOR(vmi, mm, 0); VMA_ITERATOR(vmi, mm, 0);
for_each_vma(vmi, vma) for_each_vma(vmi, vma)
fix_range(mm, vma->vm_start, vma->vm_end, 0); um_tlb_mark_sync(mm, vma->vm_start, vma->vm_end);
}
void force_flush_all(void)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
VMA_ITERATOR(vmi, mm, 0);
mmap_read_lock(mm);
for_each_vma(vmi, vma)
fix_range(mm, vma->vm_start, vma->vm_end, 1);
mmap_read_unlock(mm);
} }

View file

@ -113,7 +113,7 @@ int handle_page_fault(unsigned long address, unsigned long ip,
#if 0 #if 0
WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte))); WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte)));
#endif #endif
flush_tlb_page(vma, address);
out: out:
mmap_read_unlock(mm); mmap_read_unlock(mm);
out_nosemaphore: out_nosemaphore:
@ -210,8 +210,17 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
if (!is_user && regs) if (!is_user && regs)
current->thread.segv_regs = container_of(regs, struct pt_regs, regs); current->thread.segv_regs = container_of(regs, struct pt_regs, regs);
if (!is_user && (address >= start_vm) && (address < end_vm)) { if (!is_user && init_mm.context.sync_tlb_range_to) {
flush_tlb_kernel_vm(); /*
* Kernel has pending updates from set_ptes that were not
* flushed yet. Syncing them should fix the pagefault (if not
* we'll get here again and panic).
*/
err = um_tlb_sync(&init_mm);
if (err == -ENOMEM)
report_enomem();
if (err)
panic("Failed to sync kernel TLBs: %d", err);
goto out; goto out;
} }
else if (current->mm == NULL) { else if (current->mm == NULL) {

View file

@ -126,9 +126,6 @@ unsigned long uml_reserved; /* Also modified in mem_init */
unsigned long start_vm; unsigned long start_vm;
unsigned long end_vm; unsigned long end_vm;
/* Set in uml_ncpus_setup */
int ncpus = 1;
/* Set in early boot */ /* Set in early boot */
static int have_root __initdata; static int have_root __initdata;
static int have_console __initdata; static int have_console __initdata;

View file

@ -17,6 +17,7 @@
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/sysmacros.h> #include <sys/sysmacros.h>
#include <sys/un.h> #include <sys/un.h>
#include <sys/mman.h>
#include <sys/types.h> #include <sys/types.h>
#include <sys/eventfd.h> #include <sys/eventfd.h>
#include <poll.h> #include <poll.h>
@ -240,6 +241,16 @@ int os_connect_socket(const char *name)
return err; return err;
} }
int os_dup_file(int fd)
{
int new_fd = dup(fd);
if (new_fd < 0)
return -errno;
return new_fd;
}
void os_close_file(int fd) void os_close_file(int fd)
{ {
close(fd); close(fd);
@ -502,44 +513,47 @@ int os_shutdown_socket(int fd, int r, int w)
return 0; return 0;
} }
int os_rcv_fd(int fd, int *helper_pid_out) /**
* os_rcv_fd_msg - receive message with (optional) FDs
* @fd: the FD to receive from
* @fds: the array for FDs to write to
* @n_fds: number of FDs to receive (@fds array size)
* @data: the message buffer
* @data_len: the size of the message to receive
*
* Receive a message with FDs.
*
* Returns: the size of the received message, or an error code
*/
ssize_t os_rcv_fd_msg(int fd, int *fds, unsigned int n_fds,
void *data, size_t data_len)
{ {
int new, n; char buf[CMSG_SPACE(sizeof(*fds) * n_fds)];
char buf[CMSG_SPACE(sizeof(new))];
struct msghdr msg;
struct cmsghdr *cmsg; struct cmsghdr *cmsg;
struct iovec iov; struct iovec iov = {
.iov_base = data,
msg.msg_name = NULL; .iov_len = data_len,
msg.msg_namelen = 0; };
iov = ((struct iovec) { .iov_base = helper_pid_out, struct msghdr msg = {
.iov_len = sizeof(*helper_pid_out) }); .msg_iov = &iov,
msg.msg_iov = &iov; .msg_iovlen = 1,
msg.msg_iovlen = 1; .msg_control = buf,
msg.msg_control = buf; .msg_controllen = sizeof(buf),
msg.msg_controllen = sizeof(buf); };
msg.msg_flags = 0; int n;
n = recvmsg(fd, &msg, 0); n = recvmsg(fd, &msg, 0);
if (n < 0) if (n < 0)
return -errno; return -errno;
else if (n != iov.iov_len)
*helper_pid_out = -1;
cmsg = CMSG_FIRSTHDR(&msg); cmsg = CMSG_FIRSTHDR(&msg);
if (cmsg == NULL) { if (!cmsg ||
printk(UM_KERN_ERR "rcv_fd didn't receive anything, " cmsg->cmsg_level != SOL_SOCKET ||
"error = %d\n", errno); cmsg->cmsg_type != SCM_RIGHTS)
return -1; return n;
}
if ((cmsg->cmsg_level != SOL_SOCKET) ||
(cmsg->cmsg_type != SCM_RIGHTS)) {
printk(UM_KERN_ERR "rcv_fd didn't receive a descriptor\n");
return -1;
}
new = ((int *) CMSG_DATA(cmsg))[0]; memcpy(fds, CMSG_DATA(cmsg), cmsg->cmsg_len);
return new; return n;
} }
int os_create_unix_socket(const char *file, int len, int close_on_exec) int os_create_unix_socket(const char *file, int len, int close_on_exec)
@ -705,3 +719,25 @@ int os_poll(unsigned int n, const int *fds)
return -EIO; return -EIO;
} }
void *os_mmap_rw_shared(int fd, size_t size)
{
void *res = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (res == MAP_FAILED)
return NULL;
return res;
}
void *os_mremap_rw_shared(void *old_addr, size_t old_size, size_t new_size)
{
void *res;
res = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE, NULL);
if (res == MAP_FAILED)
return NULL;
return res;
}

View file

@ -8,6 +8,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <stdarg.h> #include <stdarg.h>
#include <stdbool.h>
#include <errno.h> #include <errno.h>
#include <signal.h> #include <signal.h>
#include <string.h> #include <string.h>
@ -65,9 +66,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
int signals_enabled; int signals_enabled;
#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT #ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT
static int signals_blocked; static int signals_blocked, signals_blocked_pending;
#else
#define signals_blocked 0
#endif #endif
static unsigned int signals_pending; static unsigned int signals_pending;
static unsigned int signals_active = 0; static unsigned int signals_active = 0;
@ -76,14 +75,27 @@ static void sig_handler(int sig, struct siginfo *si, mcontext_t *mc)
{ {
int enabled = signals_enabled; int enabled = signals_enabled;
if ((signals_blocked || !enabled) && (sig == SIGIO)) { #ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT
if ((signals_blocked ||
__atomic_load_n(&signals_blocked_pending, __ATOMIC_SEQ_CST)) &&
(sig == SIGIO)) {
/* increment so unblock will do another round */
__atomic_add_fetch(&signals_blocked_pending, 1,
__ATOMIC_SEQ_CST);
return;
}
#endif
if (!enabled && (sig == SIGIO)) {
/* /*
* In TT_MODE_EXTERNAL, need to still call time-travel * In TT_MODE_EXTERNAL, need to still call time-travel
* handlers unless signals are also blocked for the * handlers. This will mark signals_pending by itself
* external time message processing. This will mark * (only if necessary.)
* signals_pending by itself (only if necessary.) * Note we won't get here if signals are hard-blocked
* (which is handled above), in that case the hard-
* unblock will handle things.
*/ */
if (!signals_blocked && time_travel_mode == TT_MODE_EXTERNAL) if (time_travel_mode == TT_MODE_EXTERNAL)
sigio_run_timetravel_handlers(); sigio_run_timetravel_handlers();
else else
signals_pending |= SIGIO_MASK; signals_pending |= SIGIO_MASK;
@ -380,33 +392,99 @@ int um_set_signals_trace(int enable)
#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT #ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT
void mark_sigio_pending(void) void mark_sigio_pending(void)
{ {
/*
* It would seem that this should be atomic so
* it isn't a read-modify-write with a signal
* that could happen in the middle, losing the
* value set by the signal.
*
* However, this function is only called when in
* time-travel=ext simulation mode, in which case
* the only signal ever pending is SIGIO, which
* is blocked while this can be called, and the
* timer signal (SIGALRM) cannot happen.
*/
signals_pending |= SIGIO_MASK; signals_pending |= SIGIO_MASK;
} }
void block_signals_hard(void) void block_signals_hard(void)
{ {
if (signals_blocked) signals_blocked++;
return;
signals_blocked = 1;
barrier(); barrier();
} }
void unblock_signals_hard(void) void unblock_signals_hard(void)
{ {
static bool unblocking;
if (!signals_blocked) if (!signals_blocked)
panic("unblocking signals while not blocked");
if (--signals_blocked)
return; return;
/* Must be set to 0 before we check the pending bits etc. */ /*
signals_blocked = 0; * Must be set to 0 before we check pending so the
* SIGIO handler will run as normal unless we're still
* going to process signals_blocked_pending.
*/
barrier(); barrier();
if (signals_pending && signals_enabled) { /*
/* this is a bit inefficient, but that's not really important */ * Note that block_signals_hard()/unblock_signals_hard() can be called
block_signals(); * within the unblock_signals()/sigio_run_timetravel_handlers() below.
unblock_signals(); * This would still be prone to race conditions since it's actually a
} else if (signals_pending & SIGIO_MASK) { * call _within_ e.g. vu_req_read_message(), where we observed this
/* we need to run time-travel handlers even if not enabled */ * issue, which loops. Thus, if the inner call handles the recorded
sigio_run_timetravel_handlers(); * pending signals, we can get out of the inner call with the real
* signal hander no longer blocked, and still have a race. Thus don't
* handle unblocking in the inner call, if it happens, but only in
* the outermost call - 'unblocking' serves as an ownership for the
* signals_blocked_pending decrement.
*/
if (unblocking)
return;
unblocking = true;
while (__atomic_load_n(&signals_blocked_pending, __ATOMIC_SEQ_CST)) {
if (signals_enabled) {
/* signals are enabled so we can touch this */
signals_pending |= SIGIO_MASK;
/*
* this is a bit inefficient, but that's
* not really important
*/
block_signals();
unblock_signals();
} else {
/*
* we need to run time-travel handlers even
* if not enabled
*/
sigio_run_timetravel_handlers();
}
/*
* The decrement of signals_blocked_pending must be atomic so
* that the signal handler will either happen before or after
* the decrement, not during a read-modify-write:
* - If it happens before, it can increment it and we'll
* decrement it and do another round in the loop.
* - If it happens after it'll see 0 for both signals_blocked
* and signals_blocked_pending and thus run the handler as
* usual (subject to signals_enabled, but that's unrelated.)
*
* Note that a call to unblock_signals_hard() within the calls
* to unblock_signals() or sigio_run_timetravel_handlers() above
* will do nothing due to the 'unblocking' state, so this cannot
* underflow as the only one decrementing will be the outermost
* one.
*/
if (__atomic_sub_fetch(&signals_blocked_pending, 1,
__ATOMIC_SEQ_CST) < 0)
panic("signals_blocked_pending underflow");
} }
unblocking = false;
} }
#endif #endif

View file

@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
/* /*
* Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net>
* Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
*/ */
@ -19,7 +20,30 @@
#include <sysdep/stub.h> #include <sysdep/stub.h>
#include "../internal.h" #include "../internal.h"
extern char batch_syscall_stub[], __syscall_stub_start[]; extern char __syscall_stub_start[];
void syscall_stub_dump_error(struct mm_id *mm_idp)
{
struct stub_data *proc_data = (void *)mm_idp->stack;
struct stub_syscall *sc;
if (proc_data->syscall_data_len < 0 ||
proc_data->syscall_data_len >= ARRAY_SIZE(proc_data->syscall_data))
panic("Syscall data was corrupted by stub (len is: %d, expected maximum: %d)!",
proc_data->syscall_data_len,
mm_idp->syscall_data_len);
sc = &proc_data->syscall_data[proc_data->syscall_data_len];
printk(UM_KERN_ERR "%s : length = %d, last offset = %d",
__func__, mm_idp->syscall_data_len,
proc_data->syscall_data_len);
printk(UM_KERN_ERR "%s : stub syscall type %d failed, return value = 0x%lx\n",
__func__, sc->syscall, proc_data->err);
print_hex_dump(UM_KERN_ERR, " syscall data: ", 0,
16, 4, sc, sizeof(*sc), 0);
}
static inline unsigned long *check_init_stack(struct mm_id * mm_idp, static inline unsigned long *check_init_stack(struct mm_id * mm_idp,
unsigned long *stack) unsigned long *stack)
@ -36,22 +60,24 @@ static unsigned long syscall_regs[MAX_REG_NR];
static int __init init_syscall_regs(void) static int __init init_syscall_regs(void)
{ {
get_safe_registers(syscall_regs, NULL); get_safe_registers(syscall_regs, NULL);
syscall_regs[REGS_IP_INDEX] = STUB_CODE + syscall_regs[REGS_IP_INDEX] = STUB_CODE +
((unsigned long) batch_syscall_stub - ((unsigned long) stub_syscall_handler -
(unsigned long) __syscall_stub_start); (unsigned long) __syscall_stub_start);
syscall_regs[REGS_SP_INDEX] = STUB_DATA; syscall_regs[REGS_SP_INDEX] = STUB_DATA +
offsetof(struct stub_data, sigstack) +
sizeof(((struct stub_data *) 0)->sigstack) -
sizeof(void *);
return 0; return 0;
} }
__initcall(init_syscall_regs); __initcall(init_syscall_regs);
static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr) static inline long do_syscall_stub(struct mm_id *mm_idp)
{ {
struct stub_data *proc_data = (void *)mm_idp->stack;
int n, i; int n, i;
long ret, offset;
unsigned long * data;
unsigned long * syscall;
int err, pid = mm_idp->u.pid; int err, pid = mm_idp->u.pid;
n = ptrace_setregs(pid, syscall_regs); n = ptrace_setregs(pid, syscall_regs);
@ -63,6 +89,9 @@ static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr)
__func__, -n); __func__, -n);
} }
/* Inform process how much we have filled in. */
proc_data->syscall_data_len = mm_idp->syscall_data_len;
err = ptrace(PTRACE_CONT, pid, 0, 0); err = ptrace(PTRACE_CONT, pid, 0, 0);
if (err) if (err)
panic("Failed to continue stub, pid = %d, errno = %d\n", pid, panic("Failed to continue stub, pid = %d, errno = %d\n", pid,
@ -71,135 +100,141 @@ static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr)
wait_stub_done(pid); wait_stub_done(pid);
/* /*
* When the stub stops, we find the following values on the * proc_data->err will be non-zero if there was an (unexpected) error.
* beginning of the stack: * In that case, syscall_data_len points to the last executed syscall,
* (long )return_value * otherwise it will be zero (but we do not need to rely on that).
* (long )offset to failed sycall-data (0, if no error)
*/ */
ret = *((unsigned long *) mm_idp->stack); if (proc_data->err < 0) {
offset = *((unsigned long *) mm_idp->stack + 1); syscall_stub_dump_error(mm_idp);
if (offset) {
data = (unsigned long *)(mm_idp->stack + offset - STUB_DATA); /* Store error code in case someone tries to add more syscalls */
printk(UM_KERN_ERR "%s : ret = %ld, offset = %ld, data = %p\n", mm_idp->syscall_data_len = proc_data->err;
__func__, ret, offset, data); } else {
syscall = (unsigned long *)((unsigned long)data + data[0]); mm_idp->syscall_data_len = 0;
printk(UM_KERN_ERR "%s: syscall %ld failed, return value = 0x%lx, expected return value = 0x%lx\n",
__func__, syscall[0], ret, syscall[7]);
printk(UM_KERN_ERR " syscall parameters: 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
syscall[1], syscall[2], syscall[3],
syscall[4], syscall[5], syscall[6]);
for (n = 1; n < data[0]/sizeof(long); n++) {
if (n == 1)
printk(UM_KERN_ERR " additional syscall data:");
if (n % 4 == 1)
printk("\n" UM_KERN_ERR " ");
printk(" 0x%lx", data[n]);
}
if (n > 1)
printk("\n");
} }
else ret = 0;
*addr = check_init_stack(mm_idp, NULL); return mm_idp->syscall_data_len;
return ret;
} }
long run_syscall_stub(struct mm_id * mm_idp, int syscall, int syscall_stub_flush(struct mm_id *mm_idp)
unsigned long *args, long expected, void **addr,
int done)
{ {
unsigned long *stack = check_init_stack(mm_idp, *addr); int res;
*stack += sizeof(long); if (mm_idp->syscall_data_len == 0)
stack += *stack / sizeof(long); return 0;
*stack++ = syscall; /* If an error happened already, report it and reset the state. */
*stack++ = args[0]; if (mm_idp->syscall_data_len < 0) {
*stack++ = args[1]; res = mm_idp->syscall_data_len;
*stack++ = args[2]; mm_idp->syscall_data_len = 0;
*stack++ = args[3]; return res;
*stack++ = args[4]; }
*stack++ = args[5];
*stack++ = expected;
*stack = 0;
if (!done && ((((unsigned long) stack) & ~UM_KERN_PAGE_MASK) < res = do_syscall_stub(mm_idp);
UM_KERN_PAGE_SIZE - 10 * sizeof(long))) { mm_idp->syscall_data_len = 0;
*addr = stack;
return res;
}
struct stub_syscall *syscall_stub_alloc(struct mm_id *mm_idp)
{
struct stub_syscall *sc;
struct stub_data *proc_data = (struct stub_data *) mm_idp->stack;
if (mm_idp->syscall_data_len > 0 &&
mm_idp->syscall_data_len == ARRAY_SIZE(proc_data->syscall_data))
do_syscall_stub(mm_idp);
if (mm_idp->syscall_data_len < 0) {
/* Return dummy to retain error state. */
sc = &proc_data->syscall_data[0];
} else {
sc = &proc_data->syscall_data[mm_idp->syscall_data_len];
mm_idp->syscall_data_len += 1;
}
memset(sc, 0, sizeof(*sc));
return sc;
}
static struct stub_syscall *syscall_stub_get_previous(struct mm_id *mm_idp,
int syscall_type,
unsigned long virt)
{
if (mm_idp->syscall_data_len > 0) {
struct stub_data *proc_data = (void *) mm_idp->stack;
struct stub_syscall *sc;
sc = &proc_data->syscall_data[mm_idp->syscall_data_len - 1];
if (sc->syscall == syscall_type &&
sc->mem.addr + sc->mem.length == virt)
return sc;
}
return NULL;
}
int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, int prot,
int phys_fd, unsigned long long offset)
{
struct stub_syscall *sc;
/* Compress with previous syscall if that is possible */
sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MMAP, virt);
if (sc && sc->mem.prot == prot && sc->mem.fd == phys_fd &&
sc->mem.offset == MMAP_OFFSET(offset - sc->mem.length)) {
sc->mem.length += len;
return 0; return 0;
} }
return do_syscall_stub(mm_idp, addr); sc = syscall_stub_alloc(mm_idp);
} sc->syscall = STUB_SYSCALL_MMAP;
sc->mem.addr = virt;
long syscall_stub_data(struct mm_id * mm_idp, sc->mem.length = len;
unsigned long *data, int data_count, sc->mem.prot = prot;
void **addr, void **stub_addr) sc->mem.fd = phys_fd;
{ sc->mem.offset = MMAP_OFFSET(offset);
unsigned long *stack;
int ret = 0;
/*
* If *addr still is uninitialized, it *must* contain NULL.
* Thus in this case do_syscall_stub correctly won't be called.
*/
if ((((unsigned long) *addr) & ~UM_KERN_PAGE_MASK) >=
UM_KERN_PAGE_SIZE - (10 + data_count) * sizeof(long)) {
ret = do_syscall_stub(mm_idp, addr);
/* in case of error, don't overwrite data on stack */
if (ret)
return ret;
}
stack = check_init_stack(mm_idp, *addr);
*addr = stack;
*stack = data_count * sizeof(long);
memcpy(stack + 1, data, data_count * sizeof(long));
*stub_addr = (void *)(((unsigned long)(stack + 1) &
~UM_KERN_PAGE_MASK) + STUB_DATA);
return 0; return 0;
} }
int map(struct mm_id * mm_idp, unsigned long virt, unsigned long len, int prot, int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len)
int phys_fd, unsigned long long offset, int done, void **data)
{ {
int ret; struct stub_syscall *sc;
unsigned long args[] = { virt, len, prot,
MAP_SHARED | MAP_FIXED, phys_fd,
MMAP_OFFSET(offset) };
ret = run_syscall_stub(mm_idp, STUB_MMAP_NR, args, virt, /* Compress with previous syscall if that is possible */
data, done); sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MUNMAP, addr);
if (sc) {
sc->mem.length += len;
return 0;
}
return ret; sc = syscall_stub_alloc(mm_idp);
sc->syscall = STUB_SYSCALL_MUNMAP;
sc->mem.addr = addr;
sc->mem.length = len;
return 0;
} }
int unmap(struct mm_id * mm_idp, unsigned long addr, unsigned long len, int protect(struct mm_id *mm_idp, unsigned long addr, unsigned long len,
int done, void **data) unsigned int prot)
{ {
int ret; struct stub_syscall *sc;
unsigned long args[] = { (unsigned long) addr, len, 0, 0, 0,
0 };
ret = run_syscall_stub(mm_idp, __NR_munmap, args, 0, /* Compress with previous syscall if that is possible */
data, done); sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MPROTECT, addr);
if (sc && sc->mem.prot == prot) {
sc->mem.length += len;
return 0;
}
return ret; sc = syscall_stub_alloc(mm_idp);
} sc->syscall = STUB_SYSCALL_MPROTECT;
sc->mem.addr = addr;
int protect(struct mm_id * mm_idp, unsigned long addr, unsigned long len, sc->mem.length = len;
unsigned int prot, int done, void **data) sc->mem.prot = prot;
{
int ret; return 0;
unsigned long args[] = { addr, len, prot, 0, 0, 0 };
ret = run_syscall_stub(mm_idp, __NR_mprotect, args, 0,
data, done);
return ret;
} }

View file

@ -23,6 +23,7 @@
#include <skas.h> #include <skas.h>
#include <sysdep/stub.h> #include <sysdep/stub.h>
#include <linux/threads.h> #include <linux/threads.h>
#include <timetravel.h>
#include "../internal.h" #include "../internal.h"
int is_skas_winch(int pid, int fd, void *data) int is_skas_winch(int pid, int fd, void *data)
@ -253,7 +254,6 @@ static int userspace_tramp(void *stack)
} }
int userspace_pid[NR_CPUS]; int userspace_pid[NR_CPUS];
int kill_userspace_mm[NR_CPUS];
/** /**
* start_userspace() - prepare a new userspace process * start_userspace() - prepare a new userspace process
@ -345,8 +345,20 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
interrupt_end(); interrupt_end();
while (1) { while (1) {
if (kill_userspace_mm[0]) time_travel_print_bc_msg();
current_mm_sync();
/* Flush out any pending syscalls */
err = syscall_stub_flush(current_mm_id());
if (err) {
if (err == -ENOMEM)
report_enomem();
printk(UM_KERN_ERR "%s - Error flushing stub syscalls: %d",
__func__, -err);
fatal_sigsegv(); fatal_sigsegv();
}
/* /*
* This can legitimately fail if the process loads a * This can legitimately fail if the process loads a
@ -461,113 +473,6 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
} }
} }
static unsigned long thread_regs[MAX_REG_NR];
static unsigned long thread_fp_regs[FP_SIZE];
static int __init init_thread_regs(void)
{
get_safe_registers(thread_regs, thread_fp_regs);
/* Set parent's instruction pointer to start of clone-stub */
thread_regs[REGS_IP_INDEX] = STUB_CODE +
(unsigned long) stub_clone_handler -
(unsigned long) __syscall_stub_start;
thread_regs[REGS_SP_INDEX] = STUB_DATA + STUB_DATA_PAGES * UM_KERN_PAGE_SIZE -
sizeof(void *);
#ifdef __SIGNAL_FRAMESIZE
thread_regs[REGS_SP_INDEX] -= __SIGNAL_FRAMESIZE;
#endif
return 0;
}
__initcall(init_thread_regs);
int copy_context_skas0(unsigned long new_stack, int pid)
{
int err;
unsigned long current_stack = current_stub_stack();
struct stub_data *data = (struct stub_data *) current_stack;
struct stub_data *child_data = (struct stub_data *) new_stack;
unsigned long long new_offset;
int new_fd = phys_mapping(uml_to_phys((void *)new_stack), &new_offset);
/*
* prepare offset and fd of child's stack as argument for parent's
* and child's mmap2 calls
*/
*data = ((struct stub_data) {
.offset = MMAP_OFFSET(new_offset),
.fd = new_fd,
.parent_err = -ESRCH,
.child_err = 0,
});
*child_data = ((struct stub_data) {
.child_err = -ESRCH,
});
err = ptrace_setregs(pid, thread_regs);
if (err < 0) {
err = -errno;
printk(UM_KERN_ERR "%s : PTRACE_SETREGS failed, pid = %d, errno = %d\n",
__func__, pid, -err);
return err;
}
err = put_fp_registers(pid, thread_fp_regs);
if (err < 0) {
printk(UM_KERN_ERR "%s : put_fp_registers failed, pid = %d, err = %d\n",
__func__, pid, err);
return err;
}
/*
* Wait, until parent has finished its work: read child's pid from
* parent's stack, and check, if bad result.
*/
err = ptrace(PTRACE_CONT, pid, 0, 0);
if (err) {
err = -errno;
printk(UM_KERN_ERR "Failed to continue new process, pid = %d, errno = %d\n",
pid, errno);
return err;
}
wait_stub_done(pid);
pid = data->parent_err;
if (pid < 0) {
printk(UM_KERN_ERR "%s - stub-parent reports error %d\n",
__func__, -pid);
return pid;
}
/*
* Wait, until child has finished too: read child's result from
* child's stack and check it.
*/
wait_stub_done(pid);
if (child_data->child_err != STUB_DATA) {
printk(UM_KERN_ERR "%s - stub-child %d reports error %ld\n",
__func__, pid, data->child_err);
err = data->child_err;
goto out_kill;
}
if (ptrace(PTRACE_SETOPTIONS, pid, NULL,
(void *)PTRACE_O_TRACESYSGOOD) < 0) {
err = -errno;
printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n",
__func__, errno);
goto out_kill;
}
return pid;
out_kill:
os_kill_ptraced_process(pid, 1);
return err;
}
void new_thread(void *stack, jmp_buf *buf, void (*handler)(void)) void new_thread(void *stack, jmp_buf *buf, void (*handler)(void))
{ {
(*buf)[0].JB_IP = (unsigned long) handler; (*buf)[0].JB_IP = (unsigned long) handler;
@ -684,5 +589,4 @@ void reboot_skas(void)
void __switch_mm(struct mm_id *mm_idp) void __switch_mm(struct mm_id *mm_idp)
{ {
userspace_pid[0] = mm_idp->u.pid; userspace_pid[0] = mm_idp->u.pid;
kill_userspace_mm[0] = mm_idp->kill;
} }

View file

@ -17,6 +17,7 @@
#include <sys/wait.h> #include <sys/wait.h>
#include <sys/time.h> #include <sys/time.h>
#include <sys/resource.h> #include <sys/resource.h>
#include <asm/ldt.h>
#include <asm/unistd.h> #include <asm/unistd.h>
#include <init.h> #include <init.h>
#include <os.h> #include <os.h>

View file

@ -9,6 +9,7 @@ core-y += arch/x86/crypto/
# #
ifeq ($(CONFIG_CC_IS_CLANG),y) ifeq ($(CONFIG_CC_IS_CLANG),y)
KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx
KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json
KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2 KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2
endif endif

View file

@ -9,9 +9,9 @@ else
BITS := 64 BITS := 64
endif endif
obj-y = bugs_$(BITS).o delay.o fault.o ldt.o \ obj-y = bugs_$(BITS).o delay.o fault.o \
ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \ ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \
stub_$(BITS).o stub_segv.o \ stub_segv.o \
sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \ sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \
mem_$(BITS).o subarch.o os-Linux/ mem_$(BITS).o subarch.o os-Linux/
@ -31,7 +31,6 @@ obj-y += syscalls_64.o vdso/
subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o \ subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o \
../lib/memmove_64.o ../lib/memset_64.o ../lib/memmove_64.o ../lib/memset_64.o
subarch-$(CONFIG_PREEMPTION) += ../entry/thunk_64.o
endif endif

View file

@ -1,70 +0,0 @@
/*
* Copyright (C) 2004 Fujitsu Siemens Computers GmbH
* Licensed under the GPL
*
* Author: Bodo Stroesser <bstroesser@fujitsu-siemens.com>
*/
#ifndef __ASM_LDT_H
#define __ASM_LDT_H
#include <linux/mutex.h>
#include <asm/ldt.h>
#define LDT_PAGES_MAX \
((LDT_ENTRIES * LDT_ENTRY_SIZE)/PAGE_SIZE)
#define LDT_ENTRIES_PER_PAGE \
(PAGE_SIZE/LDT_ENTRY_SIZE)
#define LDT_DIRECT_ENTRIES \
((LDT_PAGES_MAX*sizeof(void *))/LDT_ENTRY_SIZE)
struct ldt_entry {
__u32 a;
__u32 b;
};
typedef struct uml_ldt {
int entry_count;
struct mutex lock;
union {
struct ldt_entry * pages[LDT_PAGES_MAX];
struct ldt_entry entries[LDT_DIRECT_ENTRIES];
} u;
} uml_ldt_t;
#define LDT_entry_a(info) \
((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
#define LDT_entry_b(info) \
(((info)->base_addr & 0xff000000) | \
(((info)->base_addr & 0x00ff0000) >> 16) | \
((info)->limit & 0xf0000) | \
(((info)->read_exec_only ^ 1) << 9) | \
((info)->contents << 10) | \
(((info)->seg_not_present ^ 1) << 15) | \
((info)->seg_32bit << 22) | \
((info)->limit_in_pages << 23) | \
((info)->useable << 20) | \
0x7000)
#define _LDT_empty(info) (\
(info)->base_addr == 0 && \
(info)->limit == 0 && \
(info)->contents == 0 && \
(info)->read_exec_only == 1 && \
(info)->seg_32bit == 0 && \
(info)->limit_in_pages == 0 && \
(info)->seg_not_present == 1 && \
(info)->useable == 0 )
#ifdef CONFIG_X86_64
#define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0))
#else
#define LDT_empty(info) (_LDT_empty(info))
#endif
struct uml_arch_mm_context {
uml_ldt_t ldt;
};
#endif

View file

@ -1,380 +0,0 @@
/*
* Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
* Licensed under the GPL
*/
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/syscalls.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
#include <os.h>
#include <skas.h>
#include <sysdep/tls.h>
static inline int modify_ldt (int func, void *ptr, unsigned long bytecount)
{
return syscall(__NR_modify_ldt, func, ptr, bytecount);
}
static long write_ldt_entry(struct mm_id *mm_idp, int func,
struct user_desc *desc, void **addr, int done)
{
long res;
void *stub_addr;
BUILD_BUG_ON(sizeof(*desc) % sizeof(long));
res = syscall_stub_data(mm_idp, (unsigned long *)desc,
sizeof(*desc) / sizeof(long),
addr, &stub_addr);
if (!res) {
unsigned long args[] = { func,
(unsigned long)stub_addr,
sizeof(*desc),
0, 0, 0 };
res = run_syscall_stub(mm_idp, __NR_modify_ldt, args,
0, addr, done);
}
return res;
}
/*
* In skas mode, we hold our own ldt data in UML.
* Thus, the code implementing sys_modify_ldt_skas
* is very similar to (and mostly stolen from) sys_modify_ldt
* for arch/i386/kernel/ldt.c
* The routines copied and modified in part are:
* - read_ldt
* - read_default_ldt
* - write_ldt
* - sys_modify_ldt_skas
*/
static int read_ldt(void __user * ptr, unsigned long bytecount)
{
int i, err = 0;
unsigned long size;
uml_ldt_t *ldt = &current->mm->context.arch.ldt;
if (!ldt->entry_count)
goto out;
if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
err = bytecount;
mutex_lock(&ldt->lock);
if (ldt->entry_count <= LDT_DIRECT_ENTRIES) {
size = LDT_ENTRY_SIZE*LDT_DIRECT_ENTRIES;
if (size > bytecount)
size = bytecount;
if (copy_to_user(ptr, ldt->u.entries, size))
err = -EFAULT;
bytecount -= size;
ptr += size;
}
else {
for (i=0; i<ldt->entry_count/LDT_ENTRIES_PER_PAGE && bytecount;
i++) {
size = PAGE_SIZE;
if (size > bytecount)
size = bytecount;
if (copy_to_user(ptr, ldt->u.pages[i], size)) {
err = -EFAULT;
break;
}
bytecount -= size;
ptr += size;
}
}
mutex_unlock(&ldt->lock);
if (bytecount == 0 || err == -EFAULT)
goto out;
if (clear_user(ptr, bytecount))
err = -EFAULT;
out:
return err;
}
static int read_default_ldt(void __user * ptr, unsigned long bytecount)
{
int err;
if (bytecount > 5*LDT_ENTRY_SIZE)
bytecount = 5*LDT_ENTRY_SIZE;
err = bytecount;
/*
* UML doesn't support lcall7 and lcall27.
* So, we don't really have a default ldt, but emulate
* an empty ldt of common host default ldt size.
*/
if (clear_user(ptr, bytecount))
err = -EFAULT;
return err;
}
static int write_ldt(void __user * ptr, unsigned long bytecount, int func)
{
uml_ldt_t *ldt = &current->mm->context.arch.ldt;
struct mm_id * mm_idp = &current->mm->context.id;
int i, err;
struct user_desc ldt_info;
struct ldt_entry entry0, *ldt_p;
void *addr = NULL;
err = -EINVAL;
if (bytecount != sizeof(ldt_info))
goto out;
err = -EFAULT;
if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
goto out;
err = -EINVAL;
if (ldt_info.entry_number >= LDT_ENTRIES)
goto out;
if (ldt_info.contents == 3) {
if (func == 1)
goto out;
if (ldt_info.seg_not_present == 0)
goto out;
}
mutex_lock(&ldt->lock);
err = write_ldt_entry(mm_idp, func, &ldt_info, &addr, 1);
if (err)
goto out_unlock;
if (ldt_info.entry_number >= ldt->entry_count &&
ldt_info.entry_number >= LDT_DIRECT_ENTRIES) {
for (i=ldt->entry_count/LDT_ENTRIES_PER_PAGE;
i*LDT_ENTRIES_PER_PAGE <= ldt_info.entry_number;
i++) {
if (i == 0)
memcpy(&entry0, ldt->u.entries,
sizeof(entry0));
ldt->u.pages[i] = (struct ldt_entry *)
__get_free_page(GFP_KERNEL|__GFP_ZERO);
if (!ldt->u.pages[i]) {
err = -ENOMEM;
/* Undo the change in host */
memset(&ldt_info, 0, sizeof(ldt_info));
write_ldt_entry(mm_idp, 1, &ldt_info, &addr, 1);
goto out_unlock;
}
if (i == 0) {
memcpy(ldt->u.pages[0], &entry0,
sizeof(entry0));
memcpy(ldt->u.pages[0]+1, ldt->u.entries+1,
sizeof(entry0)*(LDT_DIRECT_ENTRIES-1));
}
ldt->entry_count = (i + 1) * LDT_ENTRIES_PER_PAGE;
}
}
if (ldt->entry_count <= ldt_info.entry_number)
ldt->entry_count = ldt_info.entry_number + 1;
if (ldt->entry_count <= LDT_DIRECT_ENTRIES)
ldt_p = ldt->u.entries + ldt_info.entry_number;
else
ldt_p = ldt->u.pages[ldt_info.entry_number/LDT_ENTRIES_PER_PAGE] +
ldt_info.entry_number%LDT_ENTRIES_PER_PAGE;
if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
(func == 1 || LDT_empty(&ldt_info))) {
ldt_p->a = 0;
ldt_p->b = 0;
}
else{
if (func == 1)
ldt_info.useable = 0;
ldt_p->a = LDT_entry_a(&ldt_info);
ldt_p->b = LDT_entry_b(&ldt_info);
}
err = 0;
out_unlock:
mutex_unlock(&ldt->lock);
out:
return err;
}
static long do_modify_ldt_skas(int func, void __user *ptr,
unsigned long bytecount)
{
int ret = -ENOSYS;
switch (func) {
case 0:
ret = read_ldt(ptr, bytecount);
break;
case 1:
case 0x11:
ret = write_ldt(ptr, bytecount, func);
break;
case 2:
ret = read_default_ldt(ptr, bytecount);
break;
}
return ret;
}
static DEFINE_SPINLOCK(host_ldt_lock);
static short dummy_list[9] = {0, -1};
static short * host_ldt_entries = NULL;
static void ldt_get_host_info(void)
{
long ret;
struct ldt_entry * ldt;
short *tmp;
int i, size, k, order;
spin_lock(&host_ldt_lock);
if (host_ldt_entries != NULL) {
spin_unlock(&host_ldt_lock);
return;
}
host_ldt_entries = dummy_list+1;
spin_unlock(&host_ldt_lock);
for (i = LDT_PAGES_MAX-1, order=0; i; i>>=1, order++)
;
ldt = (struct ldt_entry *)
__get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
if (ldt == NULL) {
printk(KERN_ERR "ldt_get_host_info: couldn't allocate buffer "
"for host ldt\n");
return;
}
ret = modify_ldt(0, ldt, (1<<order)*PAGE_SIZE);
if (ret < 0) {
printk(KERN_ERR "ldt_get_host_info: couldn't read host ldt\n");
goto out_free;
}
if (ret == 0) {
/* default_ldt is active, simply write an empty entry 0 */
host_ldt_entries = dummy_list;
goto out_free;
}
for (i=0, size=0; i<ret/LDT_ENTRY_SIZE; i++) {
if (ldt[i].a != 0 || ldt[i].b != 0)
size++;
}
if (size < ARRAY_SIZE(dummy_list))
host_ldt_entries = dummy_list;
else {
size = (size + 1) * sizeof(dummy_list[0]);
tmp = kmalloc(size, GFP_KERNEL);
if (tmp == NULL) {
printk(KERN_ERR "ldt_get_host_info: couldn't allocate "
"host ldt list\n");
goto out_free;
}
host_ldt_entries = tmp;
}
for (i=0, k=0; i<ret/LDT_ENTRY_SIZE; i++) {
if (ldt[i].a != 0 || ldt[i].b != 0)
host_ldt_entries[k++] = i;
}
host_ldt_entries[k] = -1;
out_free:
free_pages((unsigned long)ldt, order);
}
long init_new_ldt(struct mm_context *new_mm, struct mm_context *from_mm)
{
struct user_desc desc;
short * num_p;
int i;
long page, err=0;
void *addr = NULL;
mutex_init(&new_mm->arch.ldt.lock);
if (!from_mm) {
memset(&desc, 0, sizeof(desc));
/*
* Now we try to retrieve info about the ldt, we
* inherited from the host. All ldt-entries found
* will be reset in the following loop
*/
ldt_get_host_info();
for (num_p=host_ldt_entries; *num_p != -1; num_p++) {
desc.entry_number = *num_p;
err = write_ldt_entry(&new_mm->id, 1, &desc,
&addr, *(num_p + 1) == -1);
if (err)
break;
}
new_mm->arch.ldt.entry_count = 0;
goto out;
}
/*
* Our local LDT is used to supply the data for
* modify_ldt(READLDT), if PTRACE_LDT isn't available,
* i.e., we have to use the stub for modify_ldt, which
* can't handle the big read buffer of up to 64kB.
*/
mutex_lock(&from_mm->arch.ldt.lock);
if (from_mm->arch.ldt.entry_count <= LDT_DIRECT_ENTRIES)
memcpy(new_mm->arch.ldt.u.entries, from_mm->arch.ldt.u.entries,
sizeof(new_mm->arch.ldt.u.entries));
else {
i = from_mm->arch.ldt.entry_count / LDT_ENTRIES_PER_PAGE;
while (i-->0) {
page = __get_free_page(GFP_KERNEL|__GFP_ZERO);
if (!page) {
err = -ENOMEM;
break;
}
new_mm->arch.ldt.u.pages[i] =
(struct ldt_entry *) page;
memcpy(new_mm->arch.ldt.u.pages[i],
from_mm->arch.ldt.u.pages[i], PAGE_SIZE);
}
}
new_mm->arch.ldt.entry_count = from_mm->arch.ldt.entry_count;
mutex_unlock(&from_mm->arch.ldt.lock);
out:
return err;
}
void free_ldt(struct mm_context *mm)
{
int i;
if (mm->arch.ldt.entry_count > LDT_DIRECT_ENTRIES) {
i = mm->arch.ldt.entry_count / LDT_ENTRIES_PER_PAGE;
while (i-- > 0)
free_page((long) mm->arch.ldt.u.pages[i]);
}
mm->arch.ldt.entry_count = 0;
}
SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
unsigned long , bytecount)
{
/* See non-um modify_ldt() for why we do this cast */
return (unsigned int)do_modify_ldt_skas(func, ptr, bytecount);
}

View file

@ -12,4 +12,4 @@
#endif #endif
extern void stub_segv_handler(int, siginfo_t *, void *); extern void stub_segv_handler(int, siginfo_t *, void *);
extern void stub_clone_handler(void); extern void stub_syscall_handler(void);

View file

@ -6,6 +6,7 @@
#ifndef __SYSDEP_STUB_H #ifndef __SYSDEP_STUB_H
#define __SYSDEP_STUB_H #define __SYSDEP_STUB_H
#include <stddef.h>
#include <asm/ptrace.h> #include <asm/ptrace.h>
#include <generated/asm-offsets.h> #include <generated/asm-offsets.h>
@ -79,35 +80,33 @@ static __always_inline long stub_syscall5(long syscall, long arg1, long arg2,
return ret; return ret;
} }
static __always_inline long stub_syscall6(long syscall, long arg1, long arg2,
long arg3, long arg4, long arg5,
long arg6)
{
struct syscall_args {
int ebx, ebp;
} args = { arg1, arg6 };
long ret;
__asm__ volatile ("pushl %%ebp;"
"movl 0x4(%%ebx),%%ebp;"
"movl (%%ebx),%%ebx;"
"int $0x80;"
"popl %%ebp"
: "=a" (ret)
: "0" (syscall), "b" (&args),
"c" (arg2), "d" (arg3), "S" (arg4), "D" (arg5)
: "memory");
return ret;
}
static __always_inline void trap_myself(void) static __always_inline void trap_myself(void)
{ {
__asm("int3"); __asm("int3");
} }
static __always_inline void remap_stack_and_trap(void)
{
__asm__ volatile (
"movl %%esp,%%ebx ;"
"andl %0,%%ebx ;"
"movl %1,%%eax ;"
"movl %%ebx,%%edi ; addl %2,%%edi ; movl (%%edi),%%edi ;"
"movl %%ebx,%%ebp ; addl %3,%%ebp ; movl (%%ebp),%%ebp ;"
"int $0x80 ;"
"addl %4,%%ebx ; movl %%eax, (%%ebx) ;"
"int $3"
: :
"g" (~(STUB_DATA_PAGES * UM_KERN_PAGE_SIZE - 1)),
"g" (STUB_MMAP_NR),
"g" (UML_STUB_FIELD_FD),
"g" (UML_STUB_FIELD_OFFSET),
"g" (UML_STUB_FIELD_CHILD_ERR),
"c" (STUB_DATA_PAGES * UM_KERN_PAGE_SIZE),
"d" (PROT_READ | PROT_WRITE),
"S" (MAP_FIXED | MAP_SHARED)
:
"memory");
}
static __always_inline void *get_stub_data(void) static __always_inline void *get_stub_data(void)
{ {
unsigned long ret; unsigned long ret;

View file

@ -6,6 +6,7 @@
#ifndef __SYSDEP_STUB_H #ifndef __SYSDEP_STUB_H
#define __SYSDEP_STUB_H #define __SYSDEP_STUB_H
#include <stddef.h>
#include <sysdep/ptrace_user.h> #include <sysdep/ptrace_user.h>
#include <generated/asm-offsets.h> #include <generated/asm-offsets.h>
#include <linux/stddef.h> #include <linux/stddef.h>
@ -79,37 +80,27 @@ static __always_inline long stub_syscall5(long syscall, long arg1, long arg2,
return ret; return ret;
} }
static __always_inline long stub_syscall6(long syscall, long arg1, long arg2,
long arg3, long arg4, long arg5,
long arg6)
{
long ret;
__asm__ volatile ("movq %5,%%r10 ; movq %6,%%r8 ; movq %7,%%r9 ; "
__syscall
: "=a" (ret)
: "0" (syscall), "D" (arg1), "S" (arg2), "d" (arg3),
"g" (arg4), "g" (arg5), "g" (arg6)
: __syscall_clobber, "r10", "r8", "r9");
return ret;
}
static __always_inline void trap_myself(void) static __always_inline void trap_myself(void)
{ {
__asm("int3"); __asm("int3");
} }
static __always_inline void remap_stack_and_trap(void)
{
__asm__ volatile (
"movq %0,%%rax ;"
"movq %%rsp,%%rdi ;"
"andq %1,%%rdi ;"
"movq %2,%%r10 ;"
"movq %%rdi,%%r8 ; addq %3,%%r8 ; movq (%%r8),%%r8 ;"
"movq %%rdi,%%r9 ; addq %4,%%r9 ; movq (%%r9),%%r9 ;"
__syscall ";"
"movq %%rsp,%%rdi ; andq %1,%%rdi ;"
"addq %5,%%rdi ; movq %%rax, (%%rdi) ;"
"int3"
: :
"g" (STUB_MMAP_NR),
"g" (~(STUB_DATA_PAGES * UM_KERN_PAGE_SIZE - 1)),
"g" (MAP_FIXED | MAP_SHARED),
"g" (UML_STUB_FIELD_FD),
"g" (UML_STUB_FIELD_OFFSET),
"g" (UML_STUB_FIELD_CHILD_ERR),
"S" (STUB_DATA_PAGES * UM_KERN_PAGE_SIZE),
"d" (PROT_READ | PROT_WRITE)
:
__syscall_clobber, "r10", "r8", "r9");
}
static __always_inline void *get_stub_data(void) static __always_inline void *get_stub_data(void)
{ {
unsigned long ret; unsigned long ret;

View file

@ -1,56 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <as-layout.h>
.section .__syscall_stub, "ax"
.globl batch_syscall_stub
batch_syscall_stub:
/* %esp comes in as "top of page" */
mov %esp, %ecx
/* %esp has pointer to first operation */
add $8, %esp
again:
/* load length of additional data */
mov 0x0(%esp), %eax
/* if(length == 0) : end of list */
/* write possible 0 to header */
mov %eax, 0x4(%ecx)
cmpl $0, %eax
jz done
/* save current pointer */
mov %esp, 0x4(%ecx)
/* skip additional data */
add %eax, %esp
/* load syscall-# */
pop %eax
/* load syscall params */
pop %ebx
pop %ecx
pop %edx
pop %esi
pop %edi
pop %ebp
/* execute syscall */
int $0x80
/* restore top of page pointer in %ecx */
mov %esp, %ecx
andl $(~UM_KERN_PAGE_SIZE) + 1, %ecx
/* check return value */
pop %ebx
cmp %ebx, %eax
je again
done:
/* save return value */
mov %eax, (%ecx)
/* stop */
int3

View file

@ -1,50 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <as-layout.h>
.section .__syscall_stub, "ax"
.globl batch_syscall_stub
batch_syscall_stub:
/* %rsp has the pointer to first operation */
mov %rsp, %rbx
add $0x10, %rsp
again:
/* load length of additional data */
mov 0x0(%rsp), %rax
/* if(length == 0) : end of list */
/* write possible 0 to header */
mov %rax, 8(%rbx)
cmp $0, %rax
jz done
/* save current pointer */
mov %rsp, 8(%rbx)
/* skip additional data */
add %rax, %rsp
/* load syscall-# */
pop %rax
/* load syscall params */
pop %rdi
pop %rsi
pop %rdx
pop %r10
pop %r8
pop %r9
/* execute syscall */
syscall
/* check return value */
pop %rcx
cmp %rcx, %rax
je again
done:
/* save return value */
mov %rax, (%rbx)
/* stop */
int3

View file

@ -11,6 +11,7 @@
#include <os.h> #include <os.h>
#include <skas.h> #include <skas.h>
#include <sysdep/tls.h> #include <sysdep/tls.h>
#include <asm/desc.h>
/* /*
* If needed we can detect when it's uninitialized. * If needed we can detect when it's uninitialized.

View file

@ -63,9 +63,10 @@ struct hostfs_stat {
struct hostfs_timespec atime, mtime, ctime; struct hostfs_timespec atime, mtime, ctime;
unsigned int blksize; unsigned int blksize;
unsigned long long blocks; unsigned long long blocks;
unsigned int maj; struct {
unsigned int min; unsigned int maj;
dev_t dev; unsigned int min;
} rdev, dev;
}; };
extern int stat_file(const char *path, struct hostfs_stat *p, int fd); extern int stat_file(const char *path, struct hostfs_stat *p, int fd);

View file

@ -532,10 +532,11 @@ static int hostfs_inode_update(struct inode *ino, const struct hostfs_stat *st)
static int hostfs_inode_set(struct inode *ino, void *data) static int hostfs_inode_set(struct inode *ino, void *data)
{ {
struct hostfs_stat *st = data; struct hostfs_stat *st = data;
dev_t rdev; dev_t dev, rdev;
/* Reencode maj and min with the kernel encoding.*/ /* Reencode maj and min with the kernel encoding.*/
rdev = MKDEV(st->maj, st->min); rdev = MKDEV(st->rdev.maj, st->rdev.min);
dev = MKDEV(st->dev.maj, st->dev.min);
switch (st->mode & S_IFMT) { switch (st->mode & S_IFMT) {
case S_IFLNK: case S_IFLNK:
@ -561,7 +562,7 @@ static int hostfs_inode_set(struct inode *ino, void *data)
return -EIO; return -EIO;
} }
HOSTFS_I(ino)->dev = st->dev; HOSTFS_I(ino)->dev = dev;
ino->i_ino = st->ino; ino->i_ino = st->ino;
ino->i_mode = st->mode; ino->i_mode = st->mode;
return hostfs_inode_update(ino, st); return hostfs_inode_update(ino, st);
@ -570,8 +571,9 @@ static int hostfs_inode_set(struct inode *ino, void *data)
static int hostfs_inode_test(struct inode *inode, void *data) static int hostfs_inode_test(struct inode *inode, void *data)
{ {
const struct hostfs_stat *st = data; const struct hostfs_stat *st = data;
dev_t dev = MKDEV(st->dev.maj, st->dev.min);
return inode->i_ino == st->ino && HOSTFS_I(inode)->dev == st->dev; return inode->i_ino == st->ino && HOSTFS_I(inode)->dev == dev;
} }
static struct inode *hostfs_iget(struct super_block *sb, char *name) static struct inode *hostfs_iget(struct super_block *sb, char *name)
@ -1040,4 +1042,5 @@ static void __exit exit_hostfs(void)
module_init(init_hostfs) module_init(init_hostfs)
module_exit(exit_hostfs) module_exit(exit_hostfs)
MODULE_DESCRIPTION("User-Mode Linux Host filesystem");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");

View file

@ -34,9 +34,10 @@ static void stat64_to_hostfs(const struct stat64 *buf, struct hostfs_stat *p)
p->mtime.tv_nsec = 0; p->mtime.tv_nsec = 0;
p->blksize = buf->st_blksize; p->blksize = buf->st_blksize;
p->blocks = buf->st_blocks; p->blocks = buf->st_blocks;
p->maj = os_major(buf->st_rdev); p->rdev.maj = os_major(buf->st_rdev);
p->min = os_minor(buf->st_rdev); p->rdev.min = os_minor(buf->st_rdev);
p->dev = buf->st_dev; p->dev.maj = os_major(buf->st_dev);
p->dev.min = os_minor(buf->st_dev);
} }
int stat_file(const char *path, struct hostfs_stat *p, int fd) int stat_file(const char *path, struct hostfs_stat *p, int fd)

View file

@ -1,17 +1,6 @@
/* SPDX-License-Identifier: BSD-3-Clause */
/* /*
* Permission to use, copy, modify, and/or distribute this software for any * Copyright (C) 2019 - 2023 Intel Corporation
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* Copyright (C) 2019 Intel Corporation
*/ */
#ifndef _UAPI_LINUX_UM_TIMETRAVEL_H #ifndef _UAPI_LINUX_UM_TIMETRAVEL_H
#define _UAPI_LINUX_UM_TIMETRAVEL_H #define _UAPI_LINUX_UM_TIMETRAVEL_H
@ -50,6 +39,36 @@ struct um_timetravel_msg {
__u64 time; __u64 time;
}; };
/* max number of file descriptors that can be sent/received in a message */
#define UM_TIMETRAVEL_MAX_FDS 2
/**
* enum um_timetravel_shared_mem_fds - fds sent in ACK message for START message
*/
enum um_timetravel_shared_mem_fds {
/**
* @UM_TIMETRAVEL_SHARED_MEMFD: Index of the shared memory file
* descriptor in the control message
*/
UM_TIMETRAVEL_SHARED_MEMFD,
/**
* @UM_TIMETRAVEL_SHARED_LOGFD: Index of the logging file descriptor
* in the control message
*/
UM_TIMETRAVEL_SHARED_LOGFD,
UM_TIMETRAVEL_SHARED_MAX_FDS,
};
/**
* enum um_timetravel_start_ack - ack-time mask for start message
*/
enum um_timetravel_start_ack {
/**
* @UM_TIMETRAVEL_START_ACK_ID: client ID that controller allocated.
*/
UM_TIMETRAVEL_START_ACK_ID = 0xffff,
};
/** /**
* enum um_timetravel_ops - Operation codes * enum um_timetravel_ops - Operation codes
*/ */
@ -57,7 +76,9 @@ enum um_timetravel_ops {
/** /**
* @UM_TIMETRAVEL_ACK: response (ACK) to any previous message, * @UM_TIMETRAVEL_ACK: response (ACK) to any previous message,
* this usually doesn't carry any data in the 'time' field * this usually doesn't carry any data in the 'time' field
* unless otherwise specified below * unless otherwise specified below, note: while using shared
* memory no ACK for WAIT and RUN messages, for more info see
* &struct um_timetravel_schedshm.
*/ */
UM_TIMETRAVEL_ACK = 0, UM_TIMETRAVEL_ACK = 0,
@ -123,6 +144,147 @@ enum um_timetravel_ops {
* the simulation. * the simulation.
*/ */
UM_TIMETRAVEL_GET_TOD = 8, UM_TIMETRAVEL_GET_TOD = 8,
/**
* @UM_TIMETRAVEL_BROADCAST: Send/Receive a broadcast message.
* This message can be used to sync all components in the system
* with a single message, if the calender gets the message, the
* calender broadcast the message to all components, and if a
* component receives it it should act based on it e.g print a
* message to it's log system.
* (calendar <-> host)
*/
UM_TIMETRAVEL_BROADCAST = 9,
}; };
/* version of struct um_timetravel_schedshm */
#define UM_TIMETRAVEL_SCHEDSHM_VERSION 2
/**
* enum um_timetravel_schedshm_cap - time travel capabilities of every client
*
* These flags must be set immediately after processing the ACK to
* the START message, before sending any message to the controller.
*/
enum um_timetravel_schedshm_cap {
/**
* @UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE: client can read current time
* update internal time request to shared memory and read
* free until and send no Ack on RUN and doesn't expect ACK on
* WAIT.
*/
UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE = 0x1,
};
/**
* enum um_timetravel_schedshm_flags - time travel flags of every client
*/
enum um_timetravel_schedshm_flags {
/**
* @UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN: client has a request to run.
* It's set by client when it has a request to run, if (and only
* if) the @running_id points to a client that is able to use
* shared memory, i.e. has %UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE
* (this includes the client itself). Otherwise, a message must
* be used.
*/
UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN = 0x1,
};
/**
* DOC: Time travel shared memory overview
*
* The main purpose of the shared memory is to avoid all time travel message
* that don't need any action, for example current time can be held in shared
* memory without the need of any client to send a message UM_TIMETRAVEL_GET
* in order to know what's the time.
*
* Since this is shared memory with all clients and controller and controller
* creates the shared memory space, all time values are absolute to controller
* time. So first time client connects to shared memory mode it should take the
* current_time value in shared memory and keep it internally as a diff to
* shared memory times, and once shared memory is initialized, any interaction
* with the controller must happen in the controller time domain, including any
* messages (for clients that are not using shared memory, the controller will
* handle an offset and make the clients think they start at time zero.)
*
* Along with the shared memory file descriptor is sent to the client a logging
* file descriptor, to have all logs related to shared memory,
* logged into one place. note: to have all logs synced into log file at write,
* file should be flushed (fflush) after writing to it.
*
* To avoid memory corruption, we define below for each field who can write to
* it at what time, defined in the structure fields.
*
* To avoid having to pack this struct, all fields in it must be naturally aligned
* (i.e. aligned to their size).
*/
/**
* union um_timetravel_schedshm_client - UM time travel client struct
*
* Every entity using the shared memory including the controller has a place in
* the um_timetravel_schedshm clients array, that holds info related to the client
* using the shared memory, and can be set only by the client after it gets the
* fd memory.
*
* @capa: bit fields with client capabilities see
* &enum um_timetravel_schedshm_cap, set by client once after getting the
* shared memory file descriptor.
* @flags: bit fields for flags see &enum um_timetravel_schedshm_flags for doc.
* @req_time: request time to run, set by client on every request it needs.
* @name: unique id sent to the controller by client with START message.
*/
union um_timetravel_schedshm_client {
struct {
__u32 capa;
__u32 flags;
__u64 req_time;
__u64 name;
};
char reserve[128]; /* reserved for future usage */
};
/**
* struct um_timetravel_schedshm - UM time travel shared memory struct
*
* @hdr: header fields:
* @version: Current version struct UM_TIMETRAVEL_SCHEDSHM_VERSION,
* set by controller once at init, clients must check this after mapping
* and work without shared memory if they cannot handle the indicated
* version.
* @len: Length of all the memory including header (@hdr), clients should once
* per connection first mmap the header and take the length (@len) to remap the entire size.
* This is done in order to support dynamic struct size letting number of
* clients be dynamic based on controller support.
* @free_until: Stores the next request to run by any client, in order for the
* current client to know how long it can still run. A client needs to (at
* least) reload this value immediately after communicating with any other
* client, since the controller will update this field when a new request
* is made by any client. Clients also must update this value when they
* insert/update an own request into the shared memory while not running
* themselves, and the new request is before than the current value.
* current_time: Current time, can only be set by the client in running state
* (indicated by @running_id), though that client may only run until @free_until,
* so it must remain smaller than @free_until.
* @running_id: The current client in state running, set before a client is
* notified that it's now running.
* @max_clients: size of @clients array, set once at init by the controller.
* @clients: clients array see &union um_timetravel_schedshm_client for doc,
* set only by client.
*/
struct um_timetravel_schedshm {
union {
struct {
__u32 version;
__u32 len;
__u64 free_until;
__u64 current_time;
__u16 running_id;
__u16 max_clients;
};
char hdr[4096]; /* align to 4K page size */
};
union um_timetravel_schedshm_client clients[];
};
#endif /* _UAPI_LINUX_UM_TIMETRAVEL_H */ #endif /* _UAPI_LINUX_UM_TIMETRAVEL_H */

View file

@ -426,7 +426,7 @@ $(obj)/core.o: private rustc_objcopy = $(foreach sym,$(redirect-intrinsics),--re
$(obj)/core.o: private rustc_target_flags = $(core-cfgs) $(obj)/core.o: private rustc_target_flags = $(core-cfgs)
$(obj)/core.o: $(RUST_LIB_SRC)/core/src/lib.rs FORCE $(obj)/core.o: $(RUST_LIB_SRC)/core/src/lib.rs FORCE
+$(call if_changed_dep,rustc_library) +$(call if_changed_dep,rustc_library)
ifdef CONFIG_X86_64 ifneq ($(or $(CONFIG_X86_64),$(CONFIG_X86_32)),)
$(obj)/core.o: scripts/target.json $(obj)/core.o: scripts/target.json
endif endif

View file

@ -12,7 +12,7 @@ hostprogs-always-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert
hostprogs-always-$(CONFIG_RUST_KERNEL_DOCTESTS) += rustdoc_test_builder hostprogs-always-$(CONFIG_RUST_KERNEL_DOCTESTS) += rustdoc_test_builder
hostprogs-always-$(CONFIG_RUST_KERNEL_DOCTESTS) += rustdoc_test_gen hostprogs-always-$(CONFIG_RUST_KERNEL_DOCTESTS) += rustdoc_test_gen
ifdef CONFIG_X86_64 ifneq ($(or $(CONFIG_X86_64),$(CONFIG_X86_32)),)
always-$(CONFIG_RUST) += target.json always-$(CONFIG_RUST) += target.json
filechk_rust_target = $< < include/config/auto.conf filechk_rust_target = $< < include/config/auto.conf

View file

@ -169,6 +169,23 @@ fn main() {
ts.push("features", features); ts.push("features", features);
ts.push("llvm-target", "x86_64-linux-gnu"); ts.push("llvm-target", "x86_64-linux-gnu");
ts.push("target-pointer-width", "64"); ts.push("target-pointer-width", "64");
} else if cfg.has("X86_32") {
// This only works on UML, as i386 otherwise needs regparm support in rustc
if !cfg.has("UML") {
panic!("32-bit x86 only works under UML");
}
ts.push("arch", "x86");
ts.push(
"data-layout",
"e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:32-n8:16:32-S128",
);
let mut features = "-3dnow,-3dnowa,-mmx,+soft-float".to_string();
if cfg.has("MITIGATION_RETPOLINE") {
features += ",+retpoline-external-thunk";
}
ts.push("features", features);
ts.push("llvm-target", "i386-unknown-linux-gnu");
ts.push("target-pointer-width", "32");
} else if cfg.has("LOONGARCH") { } else if cfg.has("LOONGARCH") {
panic!("loongarch uses the builtin rustc loongarch64-unknown-none-softfloat target"); panic!("loongarch uses the builtin rustc loongarch64-unknown-none-softfloat target");
} else { } else {