tcp: replace TCP_SKB_CB(skb)->tcp_tw_isn with a per-cpu field

TCP can transform a TIMEWAIT socket into a SYN_RECV one from
a SYN packet, and the ISN of the SYNACK packet is normally
generated using TIMEWAIT tw_snd_nxt :

tcp_timewait_state_process()
...
    u32 isn = tcptw->tw_snd_nxt + 65535 + 2;
    if (isn == 0)
        isn++;
    TCP_SKB_CB(skb)->tcp_tw_isn = isn;
    return TCP_TW_SYN;

This SYN packet also bypasses normal checks against listen queue
being full or not.

tcp_conn_request()
...
       __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
...
        /* TW buckets are converted to open requests without
         * limitations, they conserve resources and peer is
         * evidently real one.
         */
        if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) {
                want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
                if (!want_cookie)
                        goto drop;
        }

This was using TCP_SKB_CB(skb)->tcp_tw_isn field in skb.

Unfortunately this field has been accidentally cleared
after the call to tcp_timewait_state_process() returning
TCP_TW_SYN.

Using a field in TCP_SKB_CB(skb) for a temporary state
is overkill.

Switch instead to a per-cpu variable.

As a bonus, we do not have to clear tcp_tw_isn in TCP receive
fast path.
It is temporarily set then cleared only in the TCP_TW_SYN dance.

Fixes: 4ad19de877 ("net: tcp6: fix double call of tcp_v6_fill_cb()")
Fixes: eeea10b83a ("tcp: add tcp_v4_fill_cb()/tcp_v4_restore_cb()")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
Eric Dumazet 2024-04-07 09:33:22 +00:00 committed by Paolo Abeni
parent b9e8104058
commit 41eecbd712
6 changed files with 32 additions and 21 deletions

View File

@ -52,6 +52,8 @@ extern struct inet_hashinfo tcp_hashinfo;
DECLARE_PER_CPU(unsigned int, tcp_orphan_count);
int tcp_orphan_count_sum(void);
DECLARE_PER_CPU(u32, tcp_tw_isn);
void tcp_time_wait(struct sock *sk, int state, int timeo);
#define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER)
@ -392,7 +394,8 @@ enum tcp_tw_status {
enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
struct sk_buff *skb,
const struct tcphdr *th);
const struct tcphdr *th,
u32 *tw_isn);
struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req, bool fastopen,
bool *lost_race);
@ -935,13 +938,10 @@ struct tcp_skb_cb {
__u32 seq; /* Starting sequence number */
__u32 end_seq; /* SEQ + FIN + SYN + datalen */
union {
/* Note : tcp_tw_isn is used in input path only
* (isn chosen by tcp_timewait_state_process())
*
/* Note :
* tcp_gso_segs/size are used in write queue only,
* cf tcp_skb_pcount()/tcp_skb_mss()
*/
__u32 tcp_tw_isn;
struct {
u16 tcp_gso_segs;
u16 tcp_gso_size;

View File

@ -290,6 +290,9 @@ enum {
DEFINE_PER_CPU(unsigned int, tcp_orphan_count);
EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count);
DEFINE_PER_CPU(u32, tcp_tw_isn);
EXPORT_PER_CPU_SYMBOL_GPL(tcp_tw_isn);
long sysctl_tcp_mem[3] __read_mostly;
EXPORT_SYMBOL(sysctl_tcp_mem);

View File

@ -7097,7 +7097,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
struct sock *sk, struct sk_buff *skb)
{
struct tcp_fastopen_cookie foc = { .len = -1 };
__u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
struct tcp_options_received tmp_opt;
struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
@ -7107,21 +7106,28 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
struct dst_entry *dst;
struct flowi fl;
u8 syncookies;
u32 isn;
#ifdef CONFIG_TCP_AO
const struct tcp_ao_hdr *aoh;
#endif
syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
isn = __this_cpu_read(tcp_tw_isn);
if (isn) {
/* TW buckets are converted to open requests without
* limitations, they conserve resources and peer is
* evidently real one.
*/
__this_cpu_write(tcp_tw_isn, 0);
} else {
syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
/* TW buckets are converted to open requests without
* limitations, they conserve resources and peer is
* evidently real one.
*/
if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) {
want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
if (!want_cookie)
goto drop;
if (syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) {
want_cookie = tcp_syn_flood_action(sk,
rsk_ops->slab_name);
if (!want_cookie)
goto drop;
}
}
if (sk_acceptq_is_full(sk)) {

View File

@ -2146,7 +2146,6 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
skb->len - th->doff * 4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
TCP_SKB_CB(skb)->tcp_tw_isn = 0;
TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
TCP_SKB_CB(skb)->sacked = 0;
TCP_SKB_CB(skb)->has_rxtstamp =
@ -2168,6 +2167,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
bool refcounted;
struct sock *sk;
int ret;
u32 isn;
drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (skb->pkt_type != PACKET_HOST)
@ -2383,7 +2383,7 @@ do_time_wait:
inet_twsk_put(inet_twsk(sk));
goto csum_error;
}
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) {
case TCP_TW_SYN: {
struct sock *sk2 = inet_lookup_listener(net,
net->ipv4.tcp_death_row.hashinfo,
@ -2397,6 +2397,7 @@ do_time_wait:
sk = sk2;
tcp_v4_restore_cb(skb);
refcounted = false;
__this_cpu_write(tcp_tw_isn, isn);
goto process;
}
}

View File

@ -95,7 +95,7 @@ static void twsk_rcv_nxt_update(struct tcp_timewait_sock *tcptw, u32 seq)
*/
enum tcp_tw_status
tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
const struct tcphdr *th)
const struct tcphdr *th, u32 *tw_isn)
{
struct tcp_options_received tmp_opt;
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
@ -228,7 +228,7 @@ kill:
u32 isn = tcptw->tw_snd_nxt + 65535 + 2;
if (isn == 0)
isn++;
TCP_SKB_CB(skb)->tcp_tw_isn = isn;
*tw_isn = isn;
return TCP_TW_SYN;
}

View File

@ -1741,7 +1741,6 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
skb->len - th->doff*4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
TCP_SKB_CB(skb)->tcp_tw_isn = 0;
TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
TCP_SKB_CB(skb)->sacked = 0;
TCP_SKB_CB(skb)->has_rxtstamp =
@ -1758,6 +1757,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
bool refcounted;
struct sock *sk;
int ret;
u32 isn;
struct net *net = dev_net(skb->dev);
drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
@ -1967,7 +1967,7 @@ do_time_wait:
goto csum_error;
}
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) {
case TCP_TW_SYN:
{
struct sock *sk2;
@ -1985,6 +1985,7 @@ do_time_wait:
sk = sk2;
tcp_v6_restore_cb(skb);
refcounted = false;
__this_cpu_write(tcp_tw_isn, isn);
goto process;
}
}