From 4fa07edbb7eacfb56b3aa64f590e9f38e7f1042c Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Mon, 5 Sep 2022 10:04:36 +0800 Subject: [PATCH 1/5] io_uring/notif: Remove the unused function io_notif_complete() The function io_notif_complete() is defined in the notif.c file, but not called elsewhere, so delete this unused function. io_uring/notif.c:24:20: warning: unused function 'io_notif_complete' [-Wunused-function]. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2047 Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Reviewed-by: Pavel Begunkov Link: https://lore.kernel.org/r/20220905020436.51894-1-jiapeng.chong@linux.alibaba.com Signed-off-by: Jens Axboe --- io_uring/notif.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/io_uring/notif.c b/io_uring/notif.c index 38d77165edc3..e37c6569d82e 100644 --- a/io_uring/notif.c +++ b/io_uring/notif.c @@ -21,14 +21,6 @@ static void __io_notif_complete_tw(struct io_kiocb *notif, bool *locked) io_req_task_complete(notif, locked); } -static inline void io_notif_complete(struct io_kiocb *notif) - __must_hold(¬if->ctx->uring_lock) -{ - bool locked = true; - - __io_notif_complete_tw(notif, &locked); -} - static void io_uring_tx_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg, bool success) From df6d3422d3eed27afa23df092b3ce147c558d1a8 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 6 Sep 2022 17:11:16 +0100 Subject: [PATCH 2/5] io_uring/kbuf: fix not advancing READV kbuf ring When we don't recycle a selected ring buffer we should advance the head of the ring, so don't just skip io_kbuf_recycle() for IORING_OP_READV but adjust the ring. Fixes: 934447a603b22 ("io_uring: do not recycle buffer in READV") Signed-off-by: Pavel Begunkov Reviewed-by: Dylan Yudaken Link: https://lore.kernel.org/r/a6d85e2611471bcb5d5dcd63a8342077ddc2d73d.1662480490.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/kbuf.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index d6af208d109f..746fbf31a703 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -91,9 +91,13 @@ static inline void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags) * buffer data. However if that buffer is recycled the original request * data stored in addr is lost. Therefore forbid recycling for now. */ - if (req->opcode == IORING_OP_READV) + if (req->opcode == IORING_OP_READV) { + if ((req->flags & REQ_F_BUFFER_RING) && req->buf_list) { + req->buf_list->head++; + req->buf_list = NULL; + } return; - + } if (req->flags & REQ_F_BUFFER_SELECTED) io_kbuf_recycle_legacy(req, issue_flags); if (req->flags & REQ_F_BUFFER_RING) From 336d28a8f38013a069f2d46e73aaa1880ef17a47 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 6 Sep 2022 17:11:17 +0100 Subject: [PATCH 3/5] io_uring: recycle kbuf recycle on tw requeue When we queue a request via tw for execution it's not going to be executed immediately, so when io_queue_async() hits IO_APOLL_READY and queues a tw but doesn't try to recycle/consume the buffer some other request may try to use the the buffer. Fixes: c7fb19428d67 ("io_uring: add support for ring mapped supplied buffers") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/a19bc9e211e3184215a58e129b62f440180e9212.1662480490.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index f9be9b7eb654..b9640ad5069f 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1728,6 +1728,7 @@ static void io_queue_async(struct io_kiocb *req, int ret) switch (io_arm_poll_handler(req, 0)) { case IO_APOLL_READY: + io_kbuf_recycle(req, 0); io_req_task_queue(req); break; case IO_APOLL_ABORTED: From 3c8400532dd8305024ff6eea38707de20b1b9822 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 8 Sep 2022 14:01:10 +0100 Subject: [PATCH 4/5] io_uring/net: copy addr for zc on POLL_FIRST Every time we return from an issue handler and expect the request to be retried we should also setup it for async exec ourselves. Do that when we return on IORING_RECVSEND_POLL_FIRST in io_sendzc(), otherwise it'll re-read the address, which might be a surprise for the userspace. Fixes: 092aeedb750a9 ("io_uring: allow to pass addr into sendzc") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/ab1d0657890d6721339c56d2e161a4bba06f85d0.1662642013.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index 7047c1342541..e9efed40cf3d 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1003,9 +1003,6 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) unsigned msg_flags, cflags; int ret, min_ret = 0; - if (!(req->flags & REQ_F_POLLED) && - (zc->flags & IORING_RECVSEND_POLL_FIRST)) - return -EAGAIN; sock = sock_from_file(req->file); if (unlikely(!sock)) return -ENOTSOCK; @@ -1030,6 +1027,10 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags) msg.msg_namelen = zc->addr_len; } + if (!(req->flags & REQ_F_POLLED) && + (zc->flags & IORING_RECVSEND_POLL_FIRST)) + return io_setup_async_addr(req, addr, issue_flags); + if (zc->flags & IORING_RECVSEND_FIXED_BUF) { ret = io_import_fixed(WRITE, &msg.msg_iter, req->imu, (u64)(uintptr_t)zc->buf, zc->len); From 4d9cb92ca41dd8e905a4569ceba4716c2f39c75a Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 9 Sep 2022 12:11:49 +0100 Subject: [PATCH 5/5] io_uring/rw: fix short rw error handling We have a couple of problems, first reports of unexpected link breakage for reads when cqe->res indicates that the IO was done in full. The reason here is partial IO with retries. TL;DR; we compare the result in __io_complete_rw_common() against req->cqe.res, but req->cqe.res doesn't store the full length but rather the length left to be done. So, when we pass the full corrected result via kiocb_done() -> __io_complete_rw_common(), it fails. The second problem is that we don't try to correct res in io_complete_rw(), which, for instance, might be a problem for O_DIRECT but when a prefix of data was cached in the page cache. We also definitely don't want to pass a corrected result into io_rw_done(). The fix here is to leave __io_complete_rw_common() alone, always pass not corrected result into it and fix it up as the last step just before actually finishing the I/O. Cc: stable@vger.kernel.org Signed-off-by: Pavel Begunkov Link: https://github.com/axboe/liburing/issues/643 Reported-by: Beld Zhang Signed-off-by: Jens Axboe --- io_uring/rw.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/io_uring/rw.c b/io_uring/rw.c index 1babd77da79c..1e18a44adcf5 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -206,6 +206,20 @@ static bool __io_complete_rw_common(struct io_kiocb *req, long res) return false; } +static inline unsigned io_fixup_rw_res(struct io_kiocb *req, unsigned res) +{ + struct io_async_rw *io = req->async_data; + + /* add previously done IO, if any */ + if (req_has_async_data(req) && io->bytes_done > 0) { + if (res < 0) + res = io->bytes_done; + else + res += io->bytes_done; + } + return res; +} + static void io_complete_rw(struct kiocb *kiocb, long res) { struct io_rw *rw = container_of(kiocb, struct io_rw, kiocb); @@ -213,7 +227,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res) if (__io_complete_rw_common(req, res)) return; - io_req_set_res(req, res, 0); + io_req_set_res(req, io_fixup_rw_res(req, res), 0); req->io_task_work.func = io_req_task_complete; io_req_task_work_add(req); } @@ -240,22 +254,14 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res) static int kiocb_done(struct io_kiocb *req, ssize_t ret, unsigned int issue_flags) { - struct io_async_rw *io = req->async_data; struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); - - /* add previously done IO, if any */ - if (req_has_async_data(req) && io->bytes_done > 0) { - if (ret < 0) - ret = io->bytes_done; - else - ret += io->bytes_done; - } + unsigned final_ret = io_fixup_rw_res(req, ret); if (req->flags & REQ_F_CUR_POS) req->file->f_pos = rw->kiocb.ki_pos; if (ret >= 0 && (rw->kiocb.ki_complete == io_complete_rw)) { if (!__io_complete_rw_common(req, ret)) { - io_req_set_res(req, req->cqe.res, + io_req_set_res(req, final_ret, io_put_kbuf(req, issue_flags)); return IOU_OK; } @@ -268,7 +274,7 @@ static int kiocb_done(struct io_kiocb *req, ssize_t ret, if (io_resubmit_prep(req)) io_req_task_queue_reissue(req); else - io_req_task_queue_fail(req, ret); + io_req_task_queue_fail(req, final_ret); } return IOU_ISSUE_SKIP_COMPLETE; }