xdp: fix bug in cpumap teardown code path

When removing a cpumap entry, a number of syncronization steps happen.
Eventually the teardown code __cpu_map_entry_free is invoked from/via
call_rcu.

The teardown code __cpu_map_entry_free() flushes remaining xdp_frames,
by invoking bq_flush_to_queue, which calls xdp_return_frame_rx_napi().
The issues is that the teardown code is not running in the RX NAPI
code path.  Thus, it is not allowed to invoke the NAPI variant of
xdp_return_frame.

This bug was found and triggered by using the --stress-mode option to
the samples/bpf program xdp_redirect_cpu.  It is hard to trigger,
because the ptr_ring have to be full and cpumap bulk queue max
contains 8 packets, and a remote CPU is racing to empty the ptr_ring
queue.

Fixes: 389ab7f01a ("xdp: introduce xdp_return_frame_rx_napi")
Tested-by: Jean-Tsung Hsiao <jhsiao@redhat.com>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
This commit is contained in:
Jesper Dangaard Brouer 2018-08-08 23:00:34 +02:00 committed by Daniel Borkmann
parent bf9bae0ea6
commit ad0ab027fc

View file

@ -69,7 +69,7 @@ struct bpf_cpu_map {
}; };
static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
struct xdp_bulk_queue *bq); struct xdp_bulk_queue *bq, bool in_napi_ctx);
static u64 cpu_map_bitmap_size(const union bpf_attr *attr) static u64 cpu_map_bitmap_size(const union bpf_attr *attr)
{ {
@ -375,7 +375,7 @@ static void __cpu_map_entry_free(struct rcu_head *rcu)
struct xdp_bulk_queue *bq = per_cpu_ptr(rcpu->bulkq, cpu); struct xdp_bulk_queue *bq = per_cpu_ptr(rcpu->bulkq, cpu);
/* No concurrent bq_enqueue can run at this point */ /* No concurrent bq_enqueue can run at this point */
bq_flush_to_queue(rcpu, bq); bq_flush_to_queue(rcpu, bq, false);
} }
free_percpu(rcpu->bulkq); free_percpu(rcpu->bulkq);
/* Cannot kthread_stop() here, last put free rcpu resources */ /* Cannot kthread_stop() here, last put free rcpu resources */
@ -558,7 +558,7 @@ const struct bpf_map_ops cpu_map_ops = {
}; };
static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
struct xdp_bulk_queue *bq) struct xdp_bulk_queue *bq, bool in_napi_ctx)
{ {
unsigned int processed = 0, drops = 0; unsigned int processed = 0, drops = 0;
const int to_cpu = rcpu->cpu; const int to_cpu = rcpu->cpu;
@ -578,7 +578,10 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
err = __ptr_ring_produce(q, xdpf); err = __ptr_ring_produce(q, xdpf);
if (err) { if (err) {
drops++; drops++;
xdp_return_frame_rx_napi(xdpf); if (likely(in_napi_ctx))
xdp_return_frame_rx_napi(xdpf);
else
xdp_return_frame(xdpf);
} }
processed++; processed++;
} }
@ -598,7 +601,7 @@ static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq); struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
if (unlikely(bq->count == CPU_MAP_BULK_SIZE)) if (unlikely(bq->count == CPU_MAP_BULK_SIZE))
bq_flush_to_queue(rcpu, bq); bq_flush_to_queue(rcpu, bq, true);
/* Notice, xdp_buff/page MUST be queued here, long enough for /* Notice, xdp_buff/page MUST be queued here, long enough for
* driver to code invoking us to finished, due to driver * driver to code invoking us to finished, due to driver
@ -661,7 +664,7 @@ void __cpu_map_flush(struct bpf_map *map)
/* Flush all frames in bulkq to real queue */ /* Flush all frames in bulkq to real queue */
bq = this_cpu_ptr(rcpu->bulkq); bq = this_cpu_ptr(rcpu->bulkq);
bq_flush_to_queue(rcpu, bq); bq_flush_to_queue(rcpu, bq, true);
/* If already running, costs spin_lock_irqsave + smb_mb */ /* If already running, costs spin_lock_irqsave + smb_mb */
wake_up_process(rcpu->kthread); wake_up_process(rcpu->kthread);