From cee1af825d65b8122627fc2efbc36c1bd51ee103 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Mar 2023 20:57:41 +0000 Subject: [PATCH 1/6] tcp: annotate lockless accesses to sk->sk_err_soft This field can be read/written without lock synchronization. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_ipv4.c | 6 +++--- net/ipv4/tcp_timer.c | 6 +++--- net/ipv6/tcp_ipv6.c | 11 ++++++----- 4 files changed, 13 insertions(+), 12 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index cc072d2cfcd8..8b5b6ca6617d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3874,7 +3874,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) /* We passed data and got it acked, remove any soft error * log. Something worked... */ - sk->sk_err_soft = 0; + WRITE_ONCE(sk->sk_err_soft, 0); icsk->icsk_probes_out = 0; tp->rcv_tstamp = tcp_jiffies32; if (!prior_packets) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ea370afa70ed..4f6894469b62 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -361,7 +361,7 @@ void tcp_v4_mtu_reduced(struct sock *sk) * for the case, if this connection will not able to recover. */ if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) - sk->sk_err_soft = EMSGSIZE; + WRITE_ONCE(sk->sk_err_soft, EMSGSIZE); mtu = dst_mtu(dst); @@ -602,7 +602,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) tcp_done(sk); } else { - sk->sk_err_soft = err; + WRITE_ONCE(sk->sk_err_soft, err); } goto out; } @@ -628,7 +628,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) sk->sk_err = err; sk_error_report(sk); } else { /* Only an error on timeout */ - sk->sk_err_soft = err; + WRITE_ONCE(sk->sk_err_soft, err); } out: diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index cb79127f45c3..8823e2182713 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -67,7 +67,7 @@ u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when) static void tcp_write_err(struct sock *sk) { - sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; + sk->sk_err = READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT; sk_error_report(sk); tcp_write_queue_purge(sk); @@ -110,7 +110,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) shift++; /* If some dubious ICMP arrived, penalize even more. */ - if (sk->sk_err_soft) + if (READ_ONCE(sk->sk_err_soft)) shift++; if (tcp_check_oom(sk, shift)) { @@ -146,7 +146,7 @@ static int tcp_orphan_retries(struct sock *sk, bool alive) int retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_orphan_retries); /* May be zero. */ /* We know from an ICMP that something is wrong. */ - if (sk->sk_err_soft && !alive) + if (READ_ONCE(sk->sk_err_soft) && !alive) retries = 0; /* However, if socket sent something recently, select some safe diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1bf93b61aa06..dc963eebc668 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -497,8 +497,9 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ tcp_done(sk); - } else - sk->sk_err_soft = err; + } else { + WRITE_ONCE(sk->sk_err_soft, err); + } goto out; case TCP_LISTEN: break; @@ -514,9 +515,9 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!sock_owned_by_user(sk) && np->recverr) { sk->sk_err = err; sk_error_report(sk); - } else - sk->sk_err_soft = err; - + } else { + WRITE_ONCE(sk->sk_err_soft, err); + } out: bh_unlock_sock(sk); sock_put(sk); From 9a25f0cb0d7ee689f54f38890e66bc78520b0c62 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Mar 2023 20:57:42 +0000 Subject: [PATCH 2/6] dccp: annotate lockless accesses to sk->sk_err_soft This field can be read/written without lock synchronization. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 12 +++++++----- net/dccp/ipv6.c | 11 ++++++----- net/dccp/timer.c | 2 +- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index b780827f5e0a..3ab68415d121 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -177,7 +177,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, * for the case, if this connection will not able to recover. */ if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) - sk->sk_err_soft = EMSGSIZE; + WRITE_ONCE(sk->sk_err_soft, EMSGSIZE); mtu = dst_mtu(dst); @@ -339,8 +339,9 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info) sk_error_report(sk); dccp_done(sk); - } else - sk->sk_err_soft = err; + } else { + WRITE_ONCE(sk->sk_err_soft, err); + } goto out; } @@ -364,8 +365,9 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info) if (!sock_owned_by_user(sk) && inet->recverr) { sk->sk_err = err; sk_error_report(sk); - } else /* Only an error on timeout */ - sk->sk_err_soft = err; + } else { /* Only an error on timeout */ + WRITE_ONCE(sk->sk_err_soft, err); + } out: bh_unlock_sock(sk); sock_put(sk); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index b9d7c3dd1cb3..47fb10834223 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -174,17 +174,18 @@ static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, */ sk_error_report(sk); dccp_done(sk); - } else - sk->sk_err_soft = err; + } else { + WRITE_ONCE(sk->sk_err_soft, err); + } goto out; } if (!sock_owned_by_user(sk) && np->recverr) { sk->sk_err = err; sk_error_report(sk); - } else - sk->sk_err_soft = err; - + } else { + WRITE_ONCE(sk->sk_err_soft, err); + } out: bh_unlock_sock(sk); sock_put(sk); diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 27a3b37acd2e..b3255e87cc7e 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -19,7 +19,7 @@ int sysctl_dccp_retries2 __read_mostly = TCP_RETR2; static void dccp_write_err(struct sock *sk) { - sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; + sk->sk_err = READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT; sk_error_report(sk); dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); From 2f2d9972affae1f3282eff97c794cf843aedf61e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Mar 2023 20:57:43 +0000 Subject: [PATCH 3/6] net: annotate lockless accesses to sk->sk_err_soft This field can be read/written without lock synchronization. tcp and dccp have been handled in different patches. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- fs/dlm/lowcomms.c | 7 ++++--- net/atm/signaling.c | 2 +- net/ipv4/af_inet.c | 2 +- net/ipv6/af_inet6.c | 2 +- net/ipv6/inet6_connection_sock.c | 2 +- net/sctp/input.c | 2 +- net/sctp/ipv6.c | 2 +- 7 files changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index a9b14f81d655..bd786b3be5ec 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -601,7 +601,7 @@ static void lowcomms_error_report(struct sock *sk) "sk_err=%d/%d\n", dlm_our_nodeid(), con->nodeid, &inet->inet_daddr, ntohs(inet->inet_dport), sk->sk_err, - sk->sk_err_soft); + READ_ONCE(sk->sk_err_soft)); break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: @@ -610,14 +610,15 @@ static void lowcomms_error_report(struct sock *sk) "dport %d, sk_err=%d/%d\n", dlm_our_nodeid(), con->nodeid, &sk->sk_v6_daddr, ntohs(inet->inet_dport), sk->sk_err, - sk->sk_err_soft); + READ_ONCE(sk->sk_err_soft)); break; #endif default: printk_ratelimited(KERN_ERR "dlm: node %d: socket error " "invalid socket family %d set, " "sk_err=%d/%d\n", dlm_our_nodeid(), - sk->sk_family, sk->sk_err, sk->sk_err_soft); + sk->sk_family, sk->sk_err, + READ_ONCE(sk->sk_err_soft)); break; } diff --git a/net/atm/signaling.c b/net/atm/signaling.c index 5de06ab8ed75..e70ae2c113f9 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -125,7 +125,7 @@ as_indicate_complete: break; case as_addparty: case as_dropparty: - sk->sk_err_soft = -msg->reply; + WRITE_ONCE(sk->sk_err_soft, -msg->reply); /* < 0 failure, otherwise ep_ref */ clear_bit(ATM_VF_WAITING, &vcc->flags); break; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8db6747f892f..940062e08f57 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1322,7 +1322,7 @@ int inet_sk_rebuild_header(struct sock *sk) sk->sk_state != TCP_SYN_SENT || (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || (err = inet_sk_reselect_saddr(sk)) != 0) - sk->sk_err_soft = -err; + WRITE_ONCE(sk->sk_err_soft, -err); } return err; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 38689bedfce7..e1b679a590c9 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -845,7 +845,7 @@ int inet6_sk_rebuild_header(struct sock *sk) dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); if (IS_ERR(dst)) { sk->sk_route_caps = 0; - sk->sk_err_soft = -PTR_ERR(dst); + WRITE_ONCE(sk->sk_err_soft, -PTR_ERR(dst)); return PTR_ERR(dst); } diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 5a9f4d722f35..0c50dcd35fe8 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -120,7 +120,7 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused dst = inet6_csk_route_socket(sk, &fl6); if (IS_ERR(dst)) { - sk->sk_err_soft = -PTR_ERR(dst); + WRITE_ONCE(sk->sk_err_soft, -PTR_ERR(dst)); sk->sk_route_caps = 0; kfree_skb(skb); return PTR_ERR(dst); diff --git a/net/sctp/input.c b/net/sctp/input.c index bf70371301ff..127bf28a6033 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -585,7 +585,7 @@ static void sctp_v4_err_handle(struct sctp_transport *t, struct sk_buff *skb, sk->sk_err = err; sk_error_report(sk); } else { /* Only an error on timeout */ - sk->sk_err_soft = err; + WRITE_ONCE(sk->sk_err_soft, err); } } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 62b436a2c8fe..43f2731bf590 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -155,7 +155,7 @@ static void sctp_v6_err_handle(struct sctp_transport *t, struct sk_buff *skb, sk->sk_err = err; sk_error_report(sk); } else { - sk->sk_err_soft = err; + WRITE_ONCE(sk->sk_err_soft, err); } } From e13ec3da05d130f0d10da8e1fbe1be26dcdb0e27 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Mar 2023 20:57:44 +0000 Subject: [PATCH 4/6] tcp: annotate lockless access to sk->sk_err tcp_poll() reads sk->sk_err without socket lock held/owned. We should used READ_ONCE() here, and update writers to use WRITE_ONCE(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 11 ++++++----- net/ipv4/tcp_input.c | 6 +++--- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv4/tcp_output.c | 2 +- net/ipv4/tcp_timer.c | 2 +- net/ipv6/tcp_ipv6.c | 4 ++-- 6 files changed, 15 insertions(+), 14 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 288693981b00..01569de651b6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -589,7 +589,8 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) } /* This barrier is coupled with smp_wmb() in tcp_reset() */ smp_rmb(); - if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) + if (READ_ONCE(sk->sk_err) || + !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR; return mask; @@ -3094,7 +3095,7 @@ int tcp_disconnect(struct sock *sk, int flags) if (old_state == TCP_LISTEN) { inet_csk_listen_stop(sk); } else if (unlikely(tp->repair)) { - sk->sk_err = ECONNABORTED; + WRITE_ONCE(sk->sk_err, ECONNABORTED); } else if (tcp_need_reset(old_state) || (tp->snd_nxt != tp->write_seq && (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { @@ -3102,9 +3103,9 @@ int tcp_disconnect(struct sock *sk, int flags) * states */ tcp_send_active_reset(sk, gfp_any()); - sk->sk_err = ECONNRESET; + WRITE_ONCE(sk->sk_err, ECONNRESET); } else if (old_state == TCP_SYN_SENT) - sk->sk_err = ECONNRESET; + WRITE_ONCE(sk->sk_err, ECONNRESET); tcp_clear_xmit_timers(sk); __skb_queue_purge(&sk->sk_receive_queue); @@ -4692,7 +4693,7 @@ int tcp_abort(struct sock *sk, int err) bh_lock_sock(sk); if (!sock_flag(sk, SOCK_DEAD)) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err); /* This barrier is coupled with smp_rmb() in tcp_poll() */ smp_wmb(); sk_error_report(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8b5b6ca6617d..754ddbe0577f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4322,15 +4322,15 @@ void tcp_reset(struct sock *sk, struct sk_buff *skb) /* We want the right error as BSD sees it (and indeed as we do). */ switch (sk->sk_state) { case TCP_SYN_SENT: - sk->sk_err = ECONNREFUSED; + WRITE_ONCE(sk->sk_err, ECONNREFUSED); break; case TCP_CLOSE_WAIT: - sk->sk_err = EPIPE; + WRITE_ONCE(sk->sk_err, EPIPE); break; case TCP_CLOSE: return; default: - sk->sk_err = ECONNRESET; + WRITE_ONCE(sk->sk_err, ECONNRESET); } /* This barrier is coupled with smp_rmb() in tcp_poll() */ smp_wmb(); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4f6894469b62..89daa6b953ff 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -596,7 +596,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) ip_icmp_error(sk, skb, err, th->dest, info, (u8 *)th); if (!sock_owned_by_user(sk)) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); @@ -625,7 +625,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) inet = inet_sk(sk); if (!sock_owned_by_user(sk) && inet->recverr) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); } else { /* Only an error on timeout */ WRITE_ONCE(sk->sk_err_soft, err); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 71d01cf3c13e..f7e00d90a730 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3699,7 +3699,7 @@ static void tcp_connect_init(struct sock *sk) tp->rx_opt.rcv_wscale = rcv_wscale; tp->rcv_ssthresh = tp->rcv_wnd; - sk->sk_err = 0; + WRITE_ONCE(sk->sk_err, 0); sock_reset_flag(sk, SOCK_DONE); tp->snd_wnd = 0; tcp_init_wl(tp, 0); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 8823e2182713..b839c2f91292 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -67,7 +67,7 @@ u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when) static void tcp_write_err(struct sock *sk) { - sk->sk_err = READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT; + WRITE_ONCE(sk->sk_err, READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT); sk_error_report(sk); tcp_write_queue_purge(sk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index dc963eebc668..35cf523c9efd 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -493,7 +493,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); if (!sock_owned_by_user(sk)) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ tcp_done(sk); @@ -513,7 +513,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } if (!sock_owned_by_user(sk) && np->recverr) { - sk->sk_err = err; + WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); } else { WRITE_ONCE(sk->sk_err_soft, err); From 9ae8e5ad99b8ebcd3d3dd46075f3825e6f08f063 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Mar 2023 20:57:45 +0000 Subject: [PATCH 5/6] mptcp: annotate lockless accesses to sk->sk_err mptcp_poll() reads sk->sk_err without socket lock held/owned. Add READ_ONCE() and WRITE_ONCE() to avoid load/store tearing. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/mptcp/pm_netlink.c | 2 +- net/mptcp/protocol.c | 8 ++++---- net/mptcp/subflow.c | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 56628b52d100..cbaa1b49f7fe 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -2019,7 +2019,7 @@ static int mptcp_event_put_token_and_ssk(struct sk_buff *skb, nla_put_s32(skb, MPTCP_ATTR_IF_IDX, ssk->sk_bound_dev_if)) return -EMSGSIZE; - sk_err = ssk->sk_err; + sk_err = READ_ONCE(ssk->sk_err); if (sk_err && sk->sk_state == TCP_ESTABLISHED && nla_put_u8(skb, MPTCP_ATTR_ERROR, sk_err)) return -EMSGSIZE; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 3ad9c46202fc..3005a5adf715 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2463,15 +2463,15 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) /* Mirror the tcp_reset() error propagation */ switch (sk->sk_state) { case TCP_SYN_SENT: - sk->sk_err = ECONNREFUSED; + WRITE_ONCE(sk->sk_err, ECONNREFUSED); break; case TCP_CLOSE_WAIT: - sk->sk_err = EPIPE; + WRITE_ONCE(sk->sk_err, EPIPE); break; case TCP_CLOSE: return; default: - sk->sk_err = ECONNRESET; + WRITE_ONCE(sk->sk_err, ECONNRESET); } inet_sk_state_store(sk, TCP_CLOSE); @@ -3791,7 +3791,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, /* This barrier is coupled with smp_wmb() in __mptcp_error_report() */ smp_rmb(); - if (sk->sk_err) + if (READ_ONCE(sk->sk_err)) mask |= EPOLLERR; return mask; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 4ae1a7304cf0..01874059a168 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1335,7 +1335,7 @@ fallback: subflow->reset_reason = MPTCP_RST_EMPTCP; reset: - ssk->sk_err = EBADMSG; + WRITE_ONCE(ssk->sk_err, EBADMSG); tcp_set_state(ssk, TCP_CLOSE); while ((skb = skb_peek(&ssk->sk_receive_queue))) sk_eat_skb(ssk, skb); @@ -1419,7 +1419,7 @@ void __mptcp_error_report(struct sock *sk) ssk_state = inet_sk_state_load(ssk); if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) inet_sk_state_store(sk, ssk_state); - sk->sk_err = -err; + WRITE_ONCE(sk->sk_err, -err); /* This barrier is coupled with smp_rmb() in mptcp_poll() */ smp_wmb(); From cc04410af7de348234ac36a5f50c4ce416efdb4b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Mar 2023 20:57:46 +0000 Subject: [PATCH 6/6] af_unix: annotate lockless accesses to sk->sk_err unix_poll() and unix_dgram_poll() read sk->sk_err without any lock held. Add relevant READ_ONCE()/WRITE_ONCE() annotations. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/unix/af_unix.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 0b0f18ecce44..fb31e8a4409e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -557,7 +557,7 @@ static void unix_dgram_disconnected(struct sock *sk, struct sock *other) * when peer was not connected to us. */ if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { - other->sk_err = ECONNRESET; + WRITE_ONCE(other->sk_err, ECONNRESET); sk_error_report(other); } } @@ -630,7 +630,7 @@ static void unix_release_sock(struct sock *sk, int embrion) /* No more writes */ skpair->sk_shutdown = SHUTDOWN_MASK; if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) - skpair->sk_err = ECONNRESET; + WRITE_ONCE(skpair->sk_err, ECONNRESET); unix_state_unlock(skpair); skpair->sk_state_change(skpair); sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); @@ -3165,7 +3165,7 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa mask = 0; /* exceptional events? */ - if (sk->sk_err) + if (READ_ONCE(sk->sk_err)) mask |= EPOLLERR; if (sk->sk_shutdown == SHUTDOWN_MASK) mask |= EPOLLHUP; @@ -3208,7 +3208,8 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, mask = 0; /* exceptional events? */ - if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) + if (READ_ONCE(sk->sk_err) || + !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);