Merge branch 'xsa' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Merge xen fixes from Juergen Gross:
 "Fixes for two issues related to Xen and malicious guests:

   - Guest can force the netback driver to hog large amounts of memory

   - Denial of Service in other guests due to event storms"

* 'xsa' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
  xen/netback: don't queue unlimited number of packages
  xen/netback: fix rx queue stall detection
  xen/console: harden hvc_xen against event channel storms
  xen/netfront: harden netfront against event channel storms
  xen/blkfront: harden blkfront against event channel storms
This commit is contained in:
Linus Torvalds 2021-12-20 07:42:21 -08:00
commit 59b3f94488
7 changed files with 191 additions and 66 deletions

View file

@ -1512,9 +1512,12 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
unsigned long flags;
struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id;
struct blkfront_info *info = rinfo->dev_info;
unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;
if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS);
return IRQ_HANDLED;
}
spin_lock_irqsave(&rinfo->ring_lock, flags);
again:
@ -1530,6 +1533,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
unsigned long id;
unsigned int op;
eoiflag = 0;
RING_COPY_RESPONSE(&rinfo->ring, i, &bret);
id = bret.id;
@ -1646,6 +1651,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
xen_irq_lateeoi(irq, eoiflag);
return IRQ_HANDLED;
err:
@ -1653,6 +1660,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
/* No EOI in order to avoid further interrupts. */
pr_alert("%s disabled for further use\n", info->gd->disk_name);
return IRQ_HANDLED;
}
@ -1692,8 +1701,8 @@ static int setup_blkring(struct xenbus_device *dev,
if (err)
goto fail;
err = bind_evtchn_to_irqhandler(rinfo->evtchn, blkif_interrupt, 0,
"blkif", rinfo);
err = bind_evtchn_to_irqhandler_lateeoi(rinfo->evtchn, blkif_interrupt,
0, "blkif", rinfo);
if (err <= 0) {
xenbus_dev_fatal(dev, err,
"bind_evtchn_to_irqhandler failed");

View file

@ -203,6 +203,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
unsigned int rx_queue_max;
unsigned int rx_queue_len;
unsigned long last_rx_time;
unsigned int rx_slots_needed;
bool stalled;
struct xenvif_copy_state rx_copy;

View file

@ -33,28 +33,36 @@
#include <xen/xen.h>
#include <xen/events.h>
/*
* Update the needed ring page slots for the first SKB queued.
* Note that any call sequence outside the RX thread calling this function
* needs to wake up the RX thread via a call of xenvif_kick_thread()
* afterwards in order to avoid a race with putting the thread to sleep.
*/
static void xenvif_update_needed_slots(struct xenvif_queue *queue,
const struct sk_buff *skb)
{
unsigned int needed = 0;
if (skb) {
needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE);
if (skb_is_gso(skb))
needed++;
if (skb->sw_hash)
needed++;
}
WRITE_ONCE(queue->rx_slots_needed, needed);
}
static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue)
{
RING_IDX prod, cons;
struct sk_buff *skb;
int needed;
unsigned long flags;
unsigned int needed;
spin_lock_irqsave(&queue->rx_queue.lock, flags);
skb = skb_peek(&queue->rx_queue);
if (!skb) {
spin_unlock_irqrestore(&queue->rx_queue.lock, flags);
needed = READ_ONCE(queue->rx_slots_needed);
if (!needed)
return false;
}
needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE);
if (skb_is_gso(skb))
needed++;
if (skb->sw_hash)
needed++;
spin_unlock_irqrestore(&queue->rx_queue.lock, flags);
do {
prod = queue->rx.sring->req_prod;
@ -80,13 +88,19 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
spin_lock_irqsave(&queue->rx_queue.lock, flags);
__skb_queue_tail(&queue->rx_queue, skb);
queue->rx_queue_len += skb->len;
if (queue->rx_queue_len > queue->rx_queue_max) {
if (queue->rx_queue_len >= queue->rx_queue_max) {
struct net_device *dev = queue->vif->dev;
netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
kfree_skb(skb);
queue->vif->dev->stats.rx_dropped++;
} else {
if (skb_queue_empty(&queue->rx_queue))
xenvif_update_needed_slots(queue, skb);
__skb_queue_tail(&queue->rx_queue, skb);
queue->rx_queue_len += skb->len;
}
spin_unlock_irqrestore(&queue->rx_queue.lock, flags);
@ -100,6 +114,8 @@ static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue)
skb = __skb_dequeue(&queue->rx_queue);
if (skb) {
xenvif_update_needed_slots(queue, skb_peek(&queue->rx_queue));
queue->rx_queue_len -= skb->len;
if (queue->rx_queue_len < queue->rx_queue_max) {
struct netdev_queue *txq;
@ -134,6 +150,7 @@ static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue)
break;
xenvif_rx_dequeue(queue);
kfree_skb(skb);
queue->vif->dev->stats.rx_dropped++;
}
}
@ -487,27 +504,31 @@ void xenvif_rx_action(struct xenvif_queue *queue)
xenvif_rx_copy_flush(queue);
}
static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue)
static RING_IDX xenvif_rx_queue_slots(const struct xenvif_queue *queue)
{
RING_IDX prod, cons;
prod = queue->rx.sring->req_prod;
cons = queue->rx.req_cons;
return prod - cons;
}
static bool xenvif_rx_queue_stalled(const struct xenvif_queue *queue)
{
unsigned int needed = READ_ONCE(queue->rx_slots_needed);
return !queue->stalled &&
prod - cons < 1 &&
xenvif_rx_queue_slots(queue) < needed &&
time_after(jiffies,
queue->last_rx_time + queue->vif->stall_timeout);
}
static bool xenvif_rx_queue_ready(struct xenvif_queue *queue)
{
RING_IDX prod, cons;
unsigned int needed = READ_ONCE(queue->rx_slots_needed);
prod = queue->rx.sring->req_prod;
cons = queue->rx.req_cons;
return queue->stalled && prod - cons >= 1;
return queue->stalled && xenvif_rx_queue_slots(queue) >= needed;
}
bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread)

View file

@ -148,6 +148,9 @@ struct netfront_queue {
grant_ref_t gref_rx_head;
grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
unsigned int rx_rsp_unconsumed;
spinlock_t rx_cons_lock;
struct page_pool *page_pool;
struct xdp_rxq_info xdp_rxq;
};
@ -376,12 +379,13 @@ static int xennet_open(struct net_device *dev)
return 0;
}
static void xennet_tx_buf_gc(struct netfront_queue *queue)
static bool xennet_tx_buf_gc(struct netfront_queue *queue)
{
RING_IDX cons, prod;
unsigned short id;
struct sk_buff *skb;
bool more_to_do;
bool work_done = false;
const struct device *dev = &queue->info->netdev->dev;
BUG_ON(!netif_carrier_ok(queue->info->netdev));
@ -398,6 +402,8 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue)
for (cons = queue->tx.rsp_cons; cons != prod; cons++) {
struct xen_netif_tx_response txrsp;
work_done = true;
RING_COPY_RESPONSE(&queue->tx, cons, &txrsp);
if (txrsp.status == XEN_NETIF_RSP_NULL)
continue;
@ -441,11 +447,13 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue)
xennet_maybe_wake_tx(queue);
return;
return work_done;
err:
queue->info->broken = true;
dev_alert(dev, "Disabled for further use\n");
return work_done;
}
struct xennet_gnttab_make_txreq {
@ -834,6 +842,16 @@ static int xennet_close(struct net_device *dev)
return 0;
}
static void xennet_set_rx_rsp_cons(struct netfront_queue *queue, RING_IDX val)
{
unsigned long flags;
spin_lock_irqsave(&queue->rx_cons_lock, flags);
queue->rx.rsp_cons = val;
queue->rx_rsp_unconsumed = RING_HAS_UNCONSUMED_RESPONSES(&queue->rx);
spin_unlock_irqrestore(&queue->rx_cons_lock, flags);
}
static void xennet_move_rx_slot(struct netfront_queue *queue, struct sk_buff *skb,
grant_ref_t ref)
{
@ -885,7 +903,7 @@ static int xennet_get_extras(struct netfront_queue *queue,
xennet_move_rx_slot(queue, skb, ref);
} while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
queue->rx.rsp_cons = cons;
xennet_set_rx_rsp_cons(queue, cons);
return err;
}
@ -1039,7 +1057,7 @@ static int xennet_get_responses(struct netfront_queue *queue,
}
if (unlikely(err))
queue->rx.rsp_cons = cons + slots;
xennet_set_rx_rsp_cons(queue, cons + slots);
return err;
}
@ -1093,7 +1111,8 @@ static int xennet_fill_frags(struct netfront_queue *queue,
__pskb_pull_tail(skb, pull_to - skb_headlen(skb));
}
if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) {
queue->rx.rsp_cons = ++cons + skb_queue_len(list);
xennet_set_rx_rsp_cons(queue,
++cons + skb_queue_len(list));
kfree_skb(nskb);
return -ENOENT;
}
@ -1106,7 +1125,7 @@ static int xennet_fill_frags(struct netfront_queue *queue,
kfree_skb(nskb);
}
queue->rx.rsp_cons = cons;
xennet_set_rx_rsp_cons(queue, cons);
return 0;
}
@ -1229,7 +1248,9 @@ static int xennet_poll(struct napi_struct *napi, int budget)
if (unlikely(xennet_set_skb_gso(skb, gso))) {
__skb_queue_head(&tmpq, skb);
queue->rx.rsp_cons += skb_queue_len(&tmpq);
xennet_set_rx_rsp_cons(queue,
queue->rx.rsp_cons +
skb_queue_len(&tmpq));
goto err;
}
}
@ -1253,7 +1274,8 @@ static int xennet_poll(struct napi_struct *napi, int budget)
__skb_queue_tail(&rxq, skb);
i = ++queue->rx.rsp_cons;
i = queue->rx.rsp_cons + 1;
xennet_set_rx_rsp_cons(queue, i);
work_done++;
}
if (need_xdp_flush)
@ -1417,40 +1439,79 @@ static int xennet_set_features(struct net_device *dev,
return 0;
}
static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id)
static bool xennet_handle_tx(struct netfront_queue *queue, unsigned int *eoi)
{
struct netfront_queue *queue = dev_id;
unsigned long flags;
if (queue->info->broken)
return IRQ_HANDLED;
if (unlikely(queue->info->broken))
return false;
spin_lock_irqsave(&queue->tx_lock, flags);
xennet_tx_buf_gc(queue);
if (xennet_tx_buf_gc(queue))
*eoi = 0;
spin_unlock_irqrestore(&queue->tx_lock, flags);
return true;
}
static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id)
{
unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;
if (likely(xennet_handle_tx(dev_id, &eoiflag)))
xen_irq_lateeoi(irq, eoiflag);
return IRQ_HANDLED;
}
static bool xennet_handle_rx(struct netfront_queue *queue, unsigned int *eoi)
{
unsigned int work_queued;
unsigned long flags;
if (unlikely(queue->info->broken))
return false;
spin_lock_irqsave(&queue->rx_cons_lock, flags);
work_queued = RING_HAS_UNCONSUMED_RESPONSES(&queue->rx);
if (work_queued > queue->rx_rsp_unconsumed) {
queue->rx_rsp_unconsumed = work_queued;
*eoi = 0;
} else if (unlikely(work_queued < queue->rx_rsp_unconsumed)) {
const struct device *dev = &queue->info->netdev->dev;
spin_unlock_irqrestore(&queue->rx_cons_lock, flags);
dev_alert(dev, "RX producer index going backwards\n");
dev_alert(dev, "Disabled for further use\n");
queue->info->broken = true;
return false;
}
spin_unlock_irqrestore(&queue->rx_cons_lock, flags);
if (likely(netif_carrier_ok(queue->info->netdev) && work_queued))
napi_schedule(&queue->napi);
return true;
}
static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id)
{
struct netfront_queue *queue = dev_id;
struct net_device *dev = queue->info->netdev;
unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;
if (queue->info->broken)
return IRQ_HANDLED;
if (likely(netif_carrier_ok(dev) &&
RING_HAS_UNCONSUMED_RESPONSES(&queue->rx)))
napi_schedule(&queue->napi);
if (likely(xennet_handle_rx(dev_id, &eoiflag)))
xen_irq_lateeoi(irq, eoiflag);
return IRQ_HANDLED;
}
static irqreturn_t xennet_interrupt(int irq, void *dev_id)
{
xennet_tx_interrupt(irq, dev_id);
xennet_rx_interrupt(irq, dev_id);
unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;
if (xennet_handle_tx(dev_id, &eoiflag) &&
xennet_handle_rx(dev_id, &eoiflag))
xen_irq_lateeoi(irq, eoiflag);
return IRQ_HANDLED;
}
@ -1768,9 +1829,10 @@ static int setup_netfront_single(struct netfront_queue *queue)
if (err < 0)
goto fail;
err = bind_evtchn_to_irqhandler(queue->tx_evtchn,
xennet_interrupt,
0, queue->info->netdev->name, queue);
err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn,
xennet_interrupt, 0,
queue->info->netdev->name,
queue);
if (err < 0)
goto bind_fail;
queue->rx_evtchn = queue->tx_evtchn;
@ -1798,18 +1860,18 @@ static int setup_netfront_split(struct netfront_queue *queue)
snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name),
"%s-tx", queue->name);
err = bind_evtchn_to_irqhandler(queue->tx_evtchn,
xennet_tx_interrupt,
0, queue->tx_irq_name, queue);
err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn,
xennet_tx_interrupt, 0,
queue->tx_irq_name, queue);
if (err < 0)
goto bind_tx_fail;
queue->tx_irq = err;
snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name),
"%s-rx", queue->name);
err = bind_evtchn_to_irqhandler(queue->rx_evtchn,
xennet_rx_interrupt,
0, queue->rx_irq_name, queue);
err = bind_evtchn_to_irqhandler_lateeoi(queue->rx_evtchn,
xennet_rx_interrupt, 0,
queue->rx_irq_name, queue);
if (err < 0)
goto bind_rx_fail;
queue->rx_irq = err;
@ -1911,6 +1973,7 @@ static int xennet_init_queue(struct netfront_queue *queue)
spin_lock_init(&queue->tx_lock);
spin_lock_init(&queue->rx_lock);
spin_lock_init(&queue->rx_cons_lock);
timer_setup(&queue->rx_refill_timer, rx_refill_timeout, 0);

View file

@ -37,6 +37,8 @@ struct xencons_info {
struct xenbus_device *xbdev;
struct xencons_interface *intf;
unsigned int evtchn;
XENCONS_RING_IDX out_cons;
unsigned int out_cons_same;
struct hvc_struct *hvc;
int irq;
int vtermno;
@ -138,6 +140,8 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len)
XENCONS_RING_IDX cons, prod;
int recv = 0;
struct xencons_info *xencons = vtermno_to_xencons(vtermno);
unsigned int eoiflag = 0;
if (xencons == NULL)
return -EINVAL;
intf = xencons->intf;
@ -157,7 +161,27 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len)
mb(); /* read ring before consuming */
intf->in_cons = cons;
notify_daemon(xencons);
/*
* When to mark interrupt having been spurious:
* - there was no new data to be read, and
* - the backend did not consume some output bytes, and
* - the previous round with no read data didn't see consumed bytes
* (we might have a race with an interrupt being in flight while
* updating xencons->out_cons, so account for that by allowing one
* round without any visible reason)
*/
if (intf->out_cons != xencons->out_cons) {
xencons->out_cons = intf->out_cons;
xencons->out_cons_same = 0;
}
if (recv) {
notify_daemon(xencons);
} else if (xencons->out_cons_same++ > 1) {
eoiflag = XEN_EOI_FLAG_SPURIOUS;
}
xen_irq_lateeoi(xencons->irq, eoiflag);
return recv;
}
@ -386,7 +410,7 @@ static int xencons_connect_backend(struct xenbus_device *dev,
if (ret)
return ret;
info->evtchn = evtchn;
irq = bind_evtchn_to_irq(evtchn);
irq = bind_interdomain_evtchn_to_irq_lateeoi(dev, evtchn);
if (irq < 0)
return irq;
info->irq = irq;
@ -551,7 +575,7 @@ static int __init xen_hvc_init(void)
return r;
info = vtermno_to_xencons(HVC_COOKIE);
info->irq = bind_evtchn_to_irq(info->evtchn);
info->irq = bind_evtchn_to_irq_lateeoi(info->evtchn);
}
if (info->irq < 0)
info->irq = 0; /* NO_IRQ */

View file

@ -1251,6 +1251,12 @@ int bind_evtchn_to_irq(evtchn_port_t evtchn)
}
EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn)
{
return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip, NULL);
}
EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi);
static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
{
struct evtchn_bind_ipi bind_ipi;

View file

@ -17,6 +17,7 @@ struct xenbus_device;
unsigned xen_evtchn_nr_channels(void);
int bind_evtchn_to_irq(evtchn_port_t evtchn);
int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn);
int bind_evtchn_to_irqhandler(evtchn_port_t evtchn,
irq_handler_t handler,
unsigned long irqflags, const char *devname,