From 69800e516e968fa1cf9202b872dc86515eeeebe6 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 30 Sep 2022 08:59:31 -0700 Subject: [PATCH 1/4] mptcp: propagate fastclose error When an mptcp socket is closed due to an incoming FASTCLOSE option, so specific sk_err is set and later syscall will fail usually with EPIPE. Align the current fastclose error handling with TCP reset, properly setting the socket error according to the current msk state and propagating such error. Additionally sendmsg() is currently not handling properly the sk_err, always returning EPIPE. Reviewed-by: Matthieu Baerts Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 47 +++++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 760404b15cd0..cad0346c9281 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1707,7 +1707,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) goto out; } else if (ret) { release_sock(ssk); - goto out; + goto do_error; } release_sock(ssk); } @@ -1717,9 +1717,13 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) { ret = sk_stream_wait_connect(sk, &timeo); if (ret) - goto out; + goto do_error; } + ret = -EPIPE; + if (unlikely(sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))) + goto do_error; + pfrag = sk_page_frag(sk); while (msg_data_left(msg)) { @@ -1728,11 +1732,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) bool dfrag_collapsed; size_t psize, offset; - if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) { - ret = -EPIPE; - goto out; - } - /* reuse tail pfrag, if possible, or carve a new one from the * page allocator */ @@ -1764,7 +1763,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (copy_page_from_iter(dfrag->page, offset, psize, &msg->msg_iter) != psize) { ret = -EFAULT; - goto out; + goto do_error; } /* data successfully copied into the write queue */ @@ -1796,7 +1795,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) __mptcp_push_pending(sk, msg->msg_flags); ret = sk_stream_wait_memory(sk, &timeo); if (ret) - goto out; + goto do_error; } if (copied) @@ -1804,7 +1803,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) out: release_sock(sk); - return copied ? : ret; + return copied; + +do_error: + if (copied) + goto out; + + copied = sk_stream_error(sk, msg->msg_flags, ret); + goto out; } static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, @@ -2441,12 +2447,31 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) unlock_sock_fast(tcp_sk, slow); } + /* Mirror the tcp_reset() error propagation */ + switch (sk->sk_state) { + case TCP_SYN_SENT: + sk->sk_err = ECONNREFUSED; + break; + case TCP_CLOSE_WAIT: + sk->sk_err = EPIPE; + break; + case TCP_CLOSE: + return; + default: + sk->sk_err = ECONNRESET; + } + inet_sk_state_store(sk, TCP_CLOSE); sk->sk_shutdown = SHUTDOWN_MASK; smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags); - mptcp_close_wake_up(sk); + /* the calling mptcp_worker will properly destroy the socket */ + if (sock_flag(sk, SOCK_DEAD)) + return; + + sk->sk_state_change(sk); + sk_error_report(sk); } static void __mptcp_retrans(struct sock *sk) From d21f8348551831efd917528afc545832a372426e Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 30 Sep 2022 08:59:32 -0700 Subject: [PATCH 2/4] mptcp: use fastclose on more edge scenarios Daire reported a user-space application hang-up when the peer is forcibly closed before the data transfer completion. The relevant application expects the peer to either do an application-level clean shutdown or a transport-level connection reset. We can accommodate a such user by extending the fastclose usage: at fd close time, if the msk socket has some unread data, and at FIN_WAIT timeout. Note that at MPTCP close time we must ensure that the TCP subflows will reset: set the linger socket option to a suitable value. Reviewed-by: Matthieu Baerts Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 63 +++++++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 19 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index cad0346c9281..acf44075ba40 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2313,8 +2313,14 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); - if (flags & MPTCP_CF_FASTCLOSE) + if (flags & MPTCP_CF_FASTCLOSE) { + /* be sure to force the tcp_disconnect() path, + * to generate the egress reset + */ + ssk->sk_lingertime = 0; + sock_set_flag(ssk, SOCK_LINGER); subflow->send_fastclose = 1; + } need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk); if (!dispose_it) { @@ -2577,6 +2583,16 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk) mptcp_reset_timeout(msk, 0); } +static void mptcp_do_fastclose(struct sock *sk) +{ + struct mptcp_subflow_context *subflow, *tmp; + struct mptcp_sock *msk = mptcp_sk(sk); + + mptcp_for_each_subflow_safe(msk, subflow, tmp) + __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), + subflow, MPTCP_CF_FASTCLOSE); +} + static void mptcp_worker(struct work_struct *work) { struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work); @@ -2605,11 +2621,15 @@ static void mptcp_worker(struct work_struct *work) * closed, but we need the msk around to reply to incoming DATA_FIN, * even if it is orphaned and in FIN_WAIT2 state */ - if (sock_flag(sk, SOCK_DEAD) && - (mptcp_check_close_timeout(sk) || sk->sk_state == TCP_CLOSE)) { - inet_sk_state_store(sk, TCP_CLOSE); - __mptcp_destroy_sock(sk); - goto unlock; + if (sock_flag(sk, SOCK_DEAD)) { + if (mptcp_check_close_timeout(sk)) { + inet_sk_state_store(sk, TCP_CLOSE); + mptcp_do_fastclose(sk); + } + if (sk->sk_state == TCP_CLOSE) { + __mptcp_destroy_sock(sk); + goto unlock; + } } if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) @@ -2850,6 +2870,18 @@ static void __mptcp_destroy_sock(struct sock *sk) sock_put(sk); } +static __poll_t mptcp_check_readable(struct mptcp_sock *msk) +{ + /* Concurrent splices from sk_receive_queue into receive_queue will + * always show at least one non-empty queue when checked in this order. + */ + if (skb_queue_empty_lockless(&((struct sock *)msk)->sk_receive_queue) && + skb_queue_empty_lockless(&msk->receive_queue)) + return 0; + + return EPOLLIN | EPOLLRDNORM; +} + bool __mptcp_close(struct sock *sk, long timeout) { struct mptcp_subflow_context *subflow; @@ -2863,8 +2895,13 @@ bool __mptcp_close(struct sock *sk, long timeout) goto cleanup; } - if (mptcp_close_state(sk)) + if (mptcp_check_readable(msk)) { + /* the msk has read data, do the MPTCP equivalent of TCP reset */ + inet_sk_state_store(sk, TCP_CLOSE); + mptcp_do_fastclose(sk); + } else if (mptcp_close_state(sk)) { __mptcp_wr_shutdown(sk); + } sk_stream_wait_close(sk, timeout); @@ -3681,18 +3718,6 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, return err; } -static __poll_t mptcp_check_readable(struct mptcp_sock *msk) -{ - /* Concurrent splices from sk_receive_queue into receive_queue will - * always show at least one non-empty queue when checked in this order. - */ - if (skb_queue_empty_lockless(&((struct sock *)msk)->sk_receive_queue) && - skb_queue_empty_lockless(&msk->receive_queue)) - return 0; - - return EPOLLIN | EPOLLRDNORM; -} - static __poll_t mptcp_check_writeable(struct mptcp_sock *msk) { struct sock *sk = (struct sock *)msk; From 6bf41020b72b1c5ab96acc309a135153abbe20c8 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 30 Sep 2022 08:59:33 -0700 Subject: [PATCH 3/4] selftests: mptcp: update and extend fastclose test-cases After the previous patches, the MPTCP protocol can generate fast-closes on both ends of the connection. Rework the relevant test-case to carefully trigger the fast-close code-path on a single end at the time, while ensuring than a predictable amount of data is spooled on both ends. Additionally add another test-cases for the passive socket fast-close. Reviewed-by: Matthieu Baerts Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- .../selftests/net/mptcp/mptcp_connect.c | 65 ++++++++++++-- .../testing/selftests/net/mptcp/mptcp_join.sh | 90 +++++++++++++++---- 2 files changed, 130 insertions(+), 25 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 24d4e9cb617e..e54653ea2ed4 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -72,6 +72,8 @@ static int cfg_wait; static uint32_t cfg_mark; static char *cfg_input; static int cfg_repeat = 1; +static int cfg_truncate; +static int cfg_rcv_trunc; struct cfg_cmsg_types { unsigned int cmsg_enabled:1; @@ -95,11 +97,15 @@ static struct cfg_sockopt_types cfg_sockopt_types; static void die_usage(void) { - fprintf(stderr, "Usage: mptcp_connect [-6] [-c cmsg] [-i file] [-I num] [-j] [-l] " + fprintf(stderr, "Usage: mptcp_connect [-6] [-c cmsg] [-f offset] [-i file] [-I num] [-j] [-l] " "[-m mode] [-M mark] [-o option] [-p port] [-P mode] [-j] [-l] [-r num] " "[-s MPTCP|TCP] [-S num] [-r num] [-t num] [-T num] [-u] [-w sec] connect_address\n"); fprintf(stderr, "\t-6 use ipv6\n"); fprintf(stderr, "\t-c cmsg -- test cmsg type \n"); + fprintf(stderr, "\t-f offset -- stop the I/O after receiving and sending the specified amount " + "of bytes. If there are unread bytes in the receive queue, that will cause a MPTCP " + "fastclose at close/shutdown. If offset is negative, expect the peer to close before " + "all the local data as been sent, thus toleration errors on write and EPIPE signals\n"); fprintf(stderr, "\t-i file -- read the data to send from the given file instead of stdin"); fprintf(stderr, "\t-I num -- repeat the transfer 'num' times. In listen mode accepts num " "incoming connections, in client mode, disconnect and reconnect to the server\n"); @@ -382,7 +388,7 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len) bw = write(fd, buf, do_w); if (bw < 0) - perror("write"); + return bw; /* let the join handshake complete, before going on */ if (cfg_join && first) { @@ -571,7 +577,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after .fd = peerfd, .events = POLLIN | POLLOUT, }; - unsigned int woff = 0, wlen = 0; + unsigned int woff = 0, wlen = 0, total_wlen = 0, total_rlen = 0; char wbuf[8192]; set_nonblock(peerfd, true); @@ -597,7 +603,16 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after } if (fds.revents & POLLIN) { - len = do_rnd_read(peerfd, rbuf, sizeof(rbuf)); + ssize_t rb = sizeof(rbuf); + + /* limit the total amount of read data to the trunc value*/ + if (cfg_truncate > 0) { + if (rb + total_rlen > cfg_truncate) + rb = cfg_truncate - total_rlen; + len = read(peerfd, rbuf, rb); + } else { + len = do_rnd_read(peerfd, rbuf, sizeof(rbuf)); + } if (len == 0) { /* no more data to receive: * peer has closed its write side @@ -612,10 +627,13 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after /* Else, still have data to transmit */ } else if (len < 0) { + if (cfg_rcv_trunc) + return 0; perror("read"); return 3; } + total_rlen += len; do_write(outfd, rbuf, len); } @@ -628,12 +646,21 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after if (wlen > 0) { ssize_t bw; + /* limit the total amount of written data to the trunc value */ + if (cfg_truncate > 0 && wlen + total_wlen > cfg_truncate) + wlen = cfg_truncate - total_wlen; + bw = do_rnd_write(peerfd, wbuf + woff, wlen); - if (bw < 0) + if (bw < 0) { + if (cfg_rcv_trunc) + return 0; + perror("write"); return 111; + } woff += bw; wlen -= bw; + total_wlen += bw; } else if (wlen == 0) { /* We have no more data to send. */ fds.events &= ~POLLOUT; @@ -652,10 +679,16 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after } if (fds.revents & (POLLERR | POLLNVAL)) { + if (cfg_rcv_trunc) + return 0; fprintf(stderr, "Unexpected revents: " "POLLERR/POLLNVAL(%x)\n", fds.revents); return 5; } + + if (cfg_truncate > 0 && total_wlen >= cfg_truncate && + total_rlen >= cfg_truncate) + break; } /* leave some time for late join/announce */ @@ -1160,11 +1193,13 @@ int main_loop(void) } /* close the client socket open only if we are not going to reconnect */ - ret = copyfd_io(fd_in, fd, 1, cfg_repeat == 1); + ret = copyfd_io(fd_in, fd, 1, 0); if (ret) return ret; - if (--cfg_repeat > 0) { + if (cfg_truncate > 0) { + xdisconnect(fd, peer->ai_addrlen); + } else if (--cfg_repeat > 0) { xdisconnect(fd, peer->ai_addrlen); /* the socket could be unblocking at this point, we need the @@ -1176,7 +1211,10 @@ int main_loop(void) if (cfg_input) close(fd_in); goto again; + } else { + close(fd); } + return 0; } @@ -1262,8 +1300,19 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "6c:hi:I:jlm:M:o:p:P:r:R:s:S:t:T:w:")) != -1) { + while ((c = getopt(argc, argv, "6c:f:hi:I:jlm:M:o:p:P:r:R:s:S:t:T:w:")) != -1) { switch (c) { + case 'f': + cfg_truncate = atoi(optarg); + + /* when receiving a fastclose, ignore PIPE signals and + * all the I/O errors later in the code + */ + if (cfg_truncate < 0) { + cfg_rcv_trunc = true; + signal(SIGPIPE, handle_signal); + } + break; case 'j': cfg_join = true; cfg_mode = CFG_MODE_POLL; diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 2957fe414639..f3dd5f2a0272 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -346,10 +346,21 @@ check_transfer() local in=$1 local out=$2 local what=$3 + local bytes=$4 local i a b local line - cmp -l "$in" "$out" | while read -r i a b; do + if [ -n "$bytes" ]; then + # when truncating we must check the size explicitly + local out_size=$(wc -c $out | awk '{print $1}') + if [ $out_size -ne $bytes ]; then + echo "[ FAIL ] $what output file has wrong size ($out_size, $bytes)" + fail_test + return 1 + fi + bytes="--bytes=${bytes}" + fi + cmp -l "$in" "$out" ${bytes} | while read -r i a b; do local sum=$((0${a} + 0${b})) if [ $check_invert -eq 0 ] || [ $sum -ne $((0xff)) ]; then echo "[ FAIL ] $what does not match (in, out):" @@ -707,9 +718,31 @@ do_transfer() fi local flags="subflow" + local extra_cl_args="" + local extra_srv_args="" + local trunc_size="" if [[ "${addr_nr_ns2}" = "fastclose_"* ]]; then + if [ ${test_link_fail} -le 1 ]; then + echo "fastclose tests need test_link_fail argument" + fail_test + return 1 + fi + # disconnect - extra_args="$extra_args -I ${addr_nr_ns2:10}" + trunc_size=${test_link_fail} + local side=${addr_nr_ns2:10} + + if [ ${side} = "client" ]; then + extra_cl_args="-f ${test_link_fail}" + extra_srv_args="-f -1" + elif [ ${side} = "server" ]; then + extra_srv_args="-f ${test_link_fail}" + extra_cl_args="-f -1" + else + echo "wrong/unknown fastclose spec ${side}" + fail_test + return 1 + fi addr_nr_ns2=0 elif [[ "${addr_nr_ns2}" = "userspace_"* ]]; then userspace_pm=1 @@ -737,39 +770,41 @@ do_transfer() local_addr="0.0.0.0" fi + extra_srv_args="$extra_args $extra_srv_args" if [ "$test_link_fail" -gt 1 ];then timeout ${timeout_test} \ ip netns exec ${listener_ns} \ ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ - $extra_args ${local_addr} < "$sinfail" > "$sout" & + $extra_srv_args ${local_addr} < "$sinfail" > "$sout" & else timeout ${timeout_test} \ ip netns exec ${listener_ns} \ ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ - $extra_args ${local_addr} < "$sin" > "$sout" & + $extra_srv_args ${local_addr} < "$sin" > "$sout" & fi local spid=$! wait_local_port_listen "${listener_ns}" "${port}" + extra_cl_args="$extra_args $extra_cl_args" if [ "$test_link_fail" -eq 0 ];then timeout ${timeout_test} \ ip netns exec ${connector_ns} \ ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ - $extra_args $connect_addr < "$cin" > "$cout" & + $extra_cl_args $connect_addr < "$cin" > "$cout" & elif [ "$test_link_fail" -eq 1 ] || [ "$test_link_fail" -eq 2 ];then ( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \ tee "$cinsent" | \ timeout ${timeout_test} \ ip netns exec ${connector_ns} \ ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ - $extra_args $connect_addr > "$cout" & + $extra_cl_args $connect_addr > "$cout" & else tee "$cinsent" < "$cinfail" | \ timeout ${timeout_test} \ ip netns exec ${connector_ns} \ ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ - $extra_args $connect_addr > "$cout" & + $extra_cl_args $connect_addr > "$cout" & fi local cpid=$! @@ -971,15 +1006,15 @@ do_transfer() fi if [ "$test_link_fail" -gt 1 ];then - check_transfer $sinfail $cout "file received by client" + check_transfer $sinfail $cout "file received by client" $trunc_size else - check_transfer $sin $cout "file received by client" + check_transfer $sin $cout "file received by client" $trunc_size fi retc=$? if [ "$test_link_fail" -eq 0 ];then - check_transfer $cin $sout "file received by server" + check_transfer $cin $sout "file received by server" $trunc_size else - check_transfer $cinsent $sout "file received by server" + check_transfer $cinsent $sout "file received by server" $trunc_size fi rets=$? @@ -1188,12 +1223,23 @@ chk_fclose_nr() { local fclose_tx=$1 local fclose_rx=$2 + local ns_invert=$3 local count local dump_stats + local ns_tx=$ns2 + local ns_rx=$ns1 + local extra_msg=" " + + if [[ $ns_invert = "invert" ]]; then + ns_tx=$ns1 + ns_rx=$ns2 + extra_msg=${extra_msg}"invert" + fi printf "%-${nr_blank}s %s" " " "ctx" - count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtMPFastcloseTx | awk '{print $2}') + count=$(ip netns exec $ns_tx nstat -as | grep MPTcpExtMPFastcloseTx | awk '{print $2}') [ -z "$count" ] && count=0 + [ "$count" != "$fclose_tx" ] && extra_msg="$extra_msg,tx=$count" if [ "$count" != "$fclose_tx" ]; then echo "[fail] got $count MP_FASTCLOSE[s] TX expected $fclose_tx" fail_test @@ -1203,17 +1249,20 @@ chk_fclose_nr() fi echo -n " - fclzrx" - count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPFastcloseRx | awk '{print $2}') + count=$(ip netns exec $ns_rx nstat -as | grep MPTcpExtMPFastcloseRx | awk '{print $2}') [ -z "$count" ] && count=0 + [ "$count" != "$fclose_rx" ] && extra_msg="$extra_msg,rx=$count" if [ "$count" != "$fclose_rx" ]; then echo "[fail] got $count MP_FASTCLOSE[s] RX expected $fclose_rx" fail_test dump_stats=1 else - echo "[ ok ]" + echo -n "[ ok ]" fi [ "${dump_stats}" = 1 ] && dump_stats + + echo "$extra_msg" } chk_rst_nr() @@ -1236,7 +1285,7 @@ chk_rst_nr() printf "%-${nr_blank}s %s" " " "rtx" count=$(ip netns exec $ns_tx nstat -as | grep MPTcpExtMPRstTx | awk '{print $2}') [ -z "$count" ] && count=0 - if [ "$count" != "$rst_tx" ]; then + if [ $count -lt $rst_tx ]; then echo "[fail] got $count MP_RST[s] TX expected $rst_tx" fail_test dump_stats=1 @@ -1247,7 +1296,7 @@ chk_rst_nr() echo -n " - rstrx " count=$(ip netns exec $ns_rx nstat -as | grep MPTcpExtMPRstRx | awk '{print $2}') [ -z "$count" ] && count=0 - if [ "$count" != "$rst_rx" ]; then + if [ "$count" -lt "$rst_rx" ]; then echo "[fail] got $count MP_RST[s] RX expected $rst_rx" fail_test dump_stats=1 @@ -2801,11 +2850,18 @@ fullmesh_tests() fastclose_tests() { if reset "fastclose test"; then - run_tests $ns1 $ns2 10.0.1.1 1024 0 fastclose_2 + run_tests $ns1 $ns2 10.0.1.1 1024 0 fastclose_client chk_join_nr 0 0 0 chk_fclose_nr 1 1 chk_rst_nr 1 1 invert fi + + if reset "fastclose server test"; then + run_tests $ns1 $ns2 10.0.1.1 1024 0 fastclose_server + chk_join_nr 0 0 0 + chk_fclose_nr 1 1 invert + chk_rst_nr 1 1 + fi } pedit_action_pkts() From d89e3ed76b6efd1a4fd213feebedd01337a16b5a Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 30 Sep 2022 08:59:34 -0700 Subject: [PATCH 4/4] mptcp: update misleading comments. The MPTCP data path is quite complex and hard to understend even without some foggy comments referring to modified code and/or completely misleading from the beginning. Update a few of them to more accurately describing the current status. Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index acf44075ba40..f599ad44ed24 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -662,9 +662,9 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, skb = skb_peek(&ssk->sk_receive_queue); if (!skb) { - /* if no data is found, a racing workqueue/recvmsg - * already processed the new data, stop here or we - * can enter an infinite loop + /* With racing move_skbs_to_msk() and __mptcp_move_skbs(), + * a different CPU can have already processed the pending + * data, stop here or we can enter an infinite loop */ if (!moved) done = true; @@ -672,9 +672,9 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, } if (__mptcp_check_fallback(msk)) { - /* if we are running under the workqueue, TCP could have - * collapsed skbs between dummy map creation and now - * be sure to adjust the size + /* Under fallback skbs have no MPTCP extension and TCP could + * collapse them between the dummy map creation and the + * current dequeue. Be sure to adjust the map size. */ map_remaining = skb->len; subflow->map_data_len = skb->len; @@ -3768,7 +3768,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; - /* This barrier is coupled with smp_wmb() in tcp_reset() */ + /* This barrier is coupled with smp_wmb() in __mptcp_error_report() */ smp_rmb(); if (sk->sk_err) mask |= EPOLLERR;