diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f7567a3698eb..1fa15beb8380 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -457,18 +457,6 @@ void tcp_init_sock(struct sock *sk) } EXPORT_SYMBOL(tcp_init_sock); -void tcp_init_transfer(struct sock *sk, int bpf_op) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - tcp_mtup_init(sk); - icsk->icsk_af_ops->rebuild_header(sk); - tcp_init_metrics(sk); - tcp_call_bpf(sk, bpf_op, 0, NULL); - tcp_init_congestion_control(sk); - tcp_init_buffer_space(sk); -} - static void tcp_tx_timestamp(struct sock *sk, u16 tsflags) { struct sk_buff *skb = tcp_write_queue_tail(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 97671bff597a..077d9abdfcf5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2252,7 +2252,7 @@ static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp, */ static inline bool tcp_packet_delayed(const struct tcp_sock *tp) { - return !tp->retrans_stamp || + return tp->retrans_stamp && tcp_tsopt_ecr_before(tp, tp->retrans_stamp); } @@ -3521,7 +3521,7 @@ static void tcp_xmit_recovery(struct sock *sk, int rexmit) { struct tcp_sock *tp = tcp_sk(sk); - if (rexmit == REXMIT_NONE) + if (rexmit == REXMIT_NONE || sk->sk_state == TCP_SYN_SENT) return; if (unlikely(rexmit == 2)) { @@ -5647,6 +5647,32 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(tcp_rcv_established); +void tcp_init_transfer(struct sock *sk, int bpf_op) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + + tcp_mtup_init(sk); + icsk->icsk_af_ops->rebuild_header(sk); + tcp_init_metrics(sk); + + /* Initialize the congestion window to start the transfer. + * Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been + * retransmitted. In light of RFC6298 more aggressive 1sec + * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK + * retransmission has occurred. + */ + if (tp->total_retrans > 1 && tp->undo_marker) + tp->snd_cwnd = 1; + else + tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk)); + tp->snd_cwnd_stamp = tcp_jiffies32; + + tcp_call_bpf(sk, bpf_op, 0, NULL); + tcp_init_congestion_control(sk); + tcp_init_buffer_space(sk); +} + void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); @@ -5748,6 +5774,21 @@ static void smc_check_reset_syn(struct tcp_sock *tp) #endif } +static void tcp_try_undo_spurious_syn(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + u32 syn_stamp; + + /* undo_marker is set when SYN or SYNACK times out. The timeout is + * spurious if the ACK's timestamp option echo value matches the + * original SYN timestamp. + */ + syn_stamp = tp->retrans_stamp; + if (tp->undo_marker && syn_stamp && tp->rx_opt.saw_tstamp && + syn_stamp == tp->rx_opt.rcv_tsecr) + tp->undo_marker = 0; +} + static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th) { @@ -5815,6 +5856,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tcp_ecn_rcv_synack(tp, th); tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); + tcp_try_undo_spurious_syn(sk); tcp_ack(sk, skb, FLAG_SLOWPATH); /* Ok.. it's good. Set up sequence numbers and @@ -5973,6 +6015,27 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, return 1; } +static void tcp_rcv_synrecv_state_fastopen(struct sock *sk) +{ + tcp_try_undo_loss(sk, false); + inet_csk(sk)->icsk_retransmits = 0; + + /* Once we leave TCP_SYN_RECV or TCP_FIN_WAIT_1, + * we no longer need req so release it. + */ + reqsk_fastopen_remove(sk, tcp_sk(sk)->fastopen_rsk, false); + + /* Re-arm the timer because data may have been sent out. + * This is similar to the regular data transmission case + * when new data has just been ack'ed. + * + * (TFO) - we could try to be more aggressive and + * retransmitting any data sooner based on when they + * are sent out. + */ + tcp_rearm_rto(sk); +} + /* * This function implements the receiving procedure of RFC 793 for * all states except ESTABLISHED and TIME_WAIT. @@ -6069,22 +6132,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (!tp->srtt_us) tcp_synack_rtt_meas(sk, req); - /* Once we leave TCP_SYN_RECV, we no longer need req - * so release it. - */ if (req) { - inet_csk(sk)->icsk_retransmits = 0; - reqsk_fastopen_remove(sk, req, false); - /* Re-arm the timer because data may have been sent out. - * This is similar to the regular data transmission case - * when new data has just been ack'ed. - * - * (TFO) - we could try to be more aggressive and - * retransmitting any data sooner based on when they - * are sent out. - */ - tcp_rearm_rto(sk); + tcp_rcv_synrecv_state_fastopen(sk); } else { + tcp_try_undo_spurious_syn(sk); + tp->retrans_stamp = 0; tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB); tp->copied_seq = tp->rcv_nxt; } @@ -6119,16 +6171,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) case TCP_FIN_WAIT1: { int tmo; - /* If we enter the TCP_FIN_WAIT1 state and we are a - * Fast Open socket and this is the first acceptable - * ACK we have received, this would have acknowledged - * our SYNACK so stop the SYNACK timer. - */ - if (req) { - /* We no longer need the request sock. */ - reqsk_fastopen_remove(sk, req, false); - tcp_rearm_rto(sk); - } + if (req) + tcp_rcv_synrecv_state_fastopen(sk); + if (tp->snd_una != tp->write_seq) break; @@ -6303,7 +6348,7 @@ static void tcp_openreq_init(struct request_sock *req, req->cookie_ts = 0; tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; - tcp_rsk(req)->snt_synack = tcp_clock_us(); + tcp_rsk(req)->snt_synack = 0; tcp_rsk(req)->last_oow_ack_time = 0; req->mss = rx_opt->mss_clamp; req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0; diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index f262f2cace29..c4848e7a0aad 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -512,16 +512,6 @@ void tcp_init_metrics(struct sock *sk) inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; } - /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been - * retransmitted. In light of RFC6298 more aggressive 1sec - * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK - * retransmission has occurred. - */ - if (tp->total_retrans > 1) - tp->snd_cwnd = 1; - else - tp->snd_cwnd = tcp_init_cwnd(tp, dst); - tp->snd_cwnd_stamp = tcp_jiffies32; } bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 79900f783e0d..9c2a0d36fb20 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -522,6 +522,11 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->rx_opt.ts_recent_stamp = 0; newtp->tcp_header_len = sizeof(struct tcphdr); } + if (req->num_timeout) { + newtp->undo_marker = treq->snt_isn; + newtp->retrans_stamp = div_u64(treq->snt_synack, + USEC_PER_SEC / TCP_TS_HZ); + } newtp->tsoffset = treq->ts_off; #ifdef CONFIG_TCP_MD5SIG newtp->md5sig_info = NULL; /*XXX*/ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 32061928b054..0c4ed66dc1bf 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3247,7 +3247,11 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, skb->skb_mstamp_ns = cookie_init_timestamp(req); else #endif + { skb->skb_mstamp_ns = tcp_clock_ns(); + if (!tcp_rsk(req)->snt_synack) /* Timestamp first SYNACK */ + tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb); + } #ifdef CONFIG_TCP_MD5SIG rcu_read_lock(); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index f0c86398e6a7..2ac23da42dd2 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -393,6 +393,9 @@ static void tcp_fastopen_synack_timer(struct sock *sk) tcp_write_err(sk); return; } + /* Lower cwnd after certain SYNACK timeout like tcp_init_transfer() */ + if (icsk->icsk_retransmits == 1) + tcp_enter_loss(sk); /* XXX (TFO) - Unlike regular SYN-ACK retransmit, we ignore error * returned from rtx_syn_ack() to make it more persistent like * regular retransmit because if the child socket has been accepted