From 67cb9366ff5f99868100198efba5ca88aaa6ad25 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 11 Jun 2014 18:19:28 +0200 Subject: [PATCH 1/5] ktime: add ktime_after and ktime_before helper Add two minimal helper functions analogous to time_before() and time_after() that will later on both be needed by SCTP code. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/ktime.h | 24 ++++++++++++++++++++++++ net/sctp/sm_make_chunk.c | 2 +- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 31c0cd1c941a..de9e46e6bcc9 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -304,6 +304,30 @@ static inline int ktime_compare(const ktime_t cmp1, const ktime_t cmp2) return 0; } +/** + * ktime_after - Compare if a ktime_t value is bigger than another one. + * @cmp1: comparable1 + * @cmp2: comparable2 + * + * Return: true if cmp1 happened after cmp2. + */ +static inline bool ktime_after(const ktime_t cmp1, const ktime_t cmp2) +{ + return ktime_compare(cmp1, cmp2) > 0; +} + +/** + * ktime_before - Compare if a ktime_t value is smaller than another one. + * @cmp1: comparable1 + * @cmp2: comparable2 + * + * Return: true if cmp1 happened before cmp2. + */ +static inline bool ktime_before(const ktime_t cmp1, const ktime_t cmp2) +{ + return ktime_compare(cmp1, cmp2) < 0; +} + static inline s64 ktime_to_us(const ktime_t kt) { struct timeval tv = ktime_to_timeval(kt); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index fee5552ddf92..ae0e616a7ca5 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1782,7 +1782,7 @@ no_hmac: else kt = ktime_get(); - if (!asoc && ktime_compare(bear_cookie->expiration, kt) < 0) { + if (!asoc && ktime_before(bear_cookie->expiration, kt)) { /* * Section 3.3.10.3 Stale Cookie Error (3) * From b82e8f31acc7d799638692e65ff017f3e1b6a43d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 11 Jun 2014 18:19:29 +0200 Subject: [PATCH 2/5] net: sctp: refactor active path selection This patch just refactors and moves the code for the active path selection into its own helper function outside of sctp_assoc_control_transport() which is already big enough. No functional changes here. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/sctp/associola.c | 120 ++++++++++++++++++++++--------------------- 1 file changed, 61 insertions(+), 59 deletions(-) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 39579c3e0d14..9f1cc6f1535d 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -55,6 +55,7 @@ #include /* Forward declarations for internal functions. */ +static void sctp_select_active_and_retran_path(struct sctp_association *asoc); static void sctp_assoc_bh_rcv(struct work_struct *work); static void sctp_assoc_free_asconf_acks(struct sctp_association *asoc); static void sctp_assoc_free_asconf_queue(struct sctp_association *asoc); @@ -774,9 +775,6 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, sctp_transport_cmd_t command, sctp_sn_error_t error) { - struct sctp_transport *t = NULL; - struct sctp_transport *first; - struct sctp_transport *second; struct sctp_ulpevent *event; struct sockaddr_storage addr; int spc_state = 0; @@ -829,13 +827,14 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, return; } - /* Generate and send a SCTP_PEER_ADDR_CHANGE notification to the - * user. + /* Generate and send a SCTP_PEER_ADDR_CHANGE notification + * to the user. */ if (ulp_notify) { memset(&addr, 0, sizeof(struct sockaddr_storage)); memcpy(&addr, &transport->ipaddr, transport->af_specific->sockaddr_len); + event = sctp_ulpevent_make_peer_addr_change(asoc, &addr, 0, spc_state, error, GFP_ATOMIC); if (event) @@ -843,60 +842,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, } /* Select new active and retran paths. */ - - /* Look for the two most recently used active transports. - * - * This code produces the wrong ordering whenever jiffies - * rolls over, but we still get usable transports, so we don't - * worry about it. - */ - first = NULL; second = NULL; - - list_for_each_entry(t, &asoc->peer.transport_addr_list, - transports) { - - if ((t->state == SCTP_INACTIVE) || - (t->state == SCTP_UNCONFIRMED) || - (t->state == SCTP_PF)) - continue; - if (!first || t->last_time_heard > first->last_time_heard) { - second = first; - first = t; - } else if (!second || - t->last_time_heard > second->last_time_heard) - second = t; - } - - /* RFC 2960 6.4 Multi-Homed SCTP Endpoints - * - * By default, an endpoint should always transmit to the - * primary path, unless the SCTP user explicitly specifies the - * destination transport address (and possibly source - * transport address) to use. - * - * [If the primary is active but not most recent, bump the most - * recently used transport.] - */ - if (((asoc->peer.primary_path->state == SCTP_ACTIVE) || - (asoc->peer.primary_path->state == SCTP_UNKNOWN)) && - first != asoc->peer.primary_path) { - second = first; - first = asoc->peer.primary_path; - } - - if (!second) - second = first; - /* If we failed to find a usable transport, just camp on the - * primary, even if it is inactive. - */ - if (!first) { - first = asoc->peer.primary_path; - second = asoc->peer.primary_path; - } - - /* Set the active and retran transports. */ - asoc->peer.active_path = first; - asoc->peer.retran_path = second; + sctp_select_active_and_retran_path(asoc); } /* Hold a reference to an association. */ @@ -1325,6 +1271,62 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) __func__, asoc, &asoc->peer.retran_path->ipaddr.sa); } +static void sctp_select_active_and_retran_path(struct sctp_association *asoc) +{ + struct sctp_transport *trans, *trans_pri = NULL, *trans_sec = NULL; + + /* Look for the two most recently used active transports. */ + list_for_each_entry(trans, &asoc->peer.transport_addr_list, + transports) { + if (trans->state == SCTP_INACTIVE || + trans->state == SCTP_UNCONFIRMED || + trans->state == SCTP_PF) + continue; + if (trans_pri == NULL || + trans->last_time_heard > trans_pri->last_time_heard) { + trans_sec = trans_pri; + trans_pri = trans; + } else if (trans_sec == NULL || + trans->last_time_heard > trans_sec->last_time_heard) { + trans_sec = trans; + } + } + + /* RFC 2960 6.4 Multi-Homed SCTP Endpoints + * + * By default, an endpoint should always transmit to the primary + * path, unless the SCTP user explicitly specifies the + * destination transport address (and possibly source transport + * address) to use. [If the primary is active but not most recent, + * bump the most recently used transport.] + */ + if ((asoc->peer.primary_path->state == SCTP_ACTIVE || + asoc->peer.primary_path->state == SCTP_UNKNOWN) && + asoc->peer.primary_path != trans_pri) { + trans_sec = trans_pri; + trans_pri = asoc->peer.primary_path; + } + + /* We did not find anything useful for a possible retransmission + * path; either primary path that we found is the the same as + * the current one, or we didn't generally find an active one. + */ + if (trans_sec == NULL) + trans_sec = trans_pri; + + /* If we failed to find a usable transport, just camp on the + * primary, even if they are inactive. + */ + if (trans_pri == NULL) { + trans_pri = asoc->peer.primary_path; + trans_sec = asoc->peer.primary_path; + } + + /* Set the active and retran transports. */ + asoc->peer.active_path = trans_pri; + asoc->peer.retran_path = trans_sec; +} + struct sctp_transport * sctp_assoc_choose_alter_transport(struct sctp_association *asoc, struct sctp_transport *last_sent_to) From e575235fc6026bb75e166ff68f84118c62d73f94 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 11 Jun 2014 18:19:30 +0200 Subject: [PATCH 3/5] net: sctp: migrate most recently used transport to ktime Be more precise in transport path selection and use ktime helpers instead of jiffies to compare and pick the better primary and secondary recently used transports. This also avoids any side-effects during a possible roll-over, and could lead to better path decision-making. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 6 +++--- net/sctp/associola.c | 8 +++++--- net/sctp/endpointola.c | 2 +- net/sctp/transport.c | 2 +- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 0dfcc92600e8..f38588bf3462 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -838,10 +838,10 @@ struct sctp_transport { unsigned long sackdelay; __u32 sackfreq; - /* When was the last time (in jiffies) that we heard from this - * transport? We use this to pick new active and retran paths. + /* When was the last time that we heard from this transport? We use + * this to pick new active and retran paths. */ - unsigned long last_time_heard; + ktime_t last_time_heard; /* Last time(in jiffies) when cwnd is reduced due to the congestion * indication based on ECNE chunk. diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 9f1cc6f1535d..620c99e19e77 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1036,7 +1036,7 @@ static void sctp_assoc_bh_rcv(struct work_struct *work) } if (chunk->transport) - chunk->transport->last_time_heard = jiffies; + chunk->transport->last_time_heard = ktime_get(); /* Run through the state machine. */ error = sctp_do_sm(net, SCTP_EVENT_T_CHUNK, subtype, @@ -1283,11 +1283,13 @@ static void sctp_select_active_and_retran_path(struct sctp_association *asoc) trans->state == SCTP_PF) continue; if (trans_pri == NULL || - trans->last_time_heard > trans_pri->last_time_heard) { + ktime_after(trans->last_time_heard, + trans_pri->last_time_heard)) { trans_sec = trans_pri; trans_pri = trans; } else if (trans_sec == NULL || - trans->last_time_heard > trans_sec->last_time_heard) { + ktime_after(trans->last_time_heard, + trans_sec->last_time_heard)) { trans_sec = trans; } } diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 3d9f429858dc..9da76ba4d10f 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -481,7 +481,7 @@ normal: } if (chunk->transport) - chunk->transport->last_time_heard = jiffies; + chunk->transport->last_time_heard = ktime_get(); error = sctp_do_sm(net, SCTP_EVENT_T_CHUNK, subtype, state, ep, asoc, chunk, GFP_ATOMIC); diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 1d348d15b33d..7dd672fa651f 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -72,7 +72,7 @@ static struct sctp_transport *sctp_transport_init(struct net *net, */ peer->rto = msecs_to_jiffies(net->sctp.rto_initial); - peer->last_time_heard = jiffies; + peer->last_time_heard = ktime_get(); peer->last_time_ecne_reduced = jiffies; peer->param_flags = SPP_HB_DISABLE | From a7288c4dd5094b3878b7ad817b0cd130a6f8e916 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 11 Jun 2014 18:19:31 +0200 Subject: [PATCH 4/5] net: sctp: improve sctp_select_active_and_retran_path selection In function sctp_select_active_and_retran_path(), we walk the transport list in order to look for the two most recently used ACTIVE transports (trans_pri, trans_sec). In case we didn't find anything ACTIVE, we currently just camp on a possibly PF or INACTIVE transport that is primary path; this behavior actually dates back to linux-history tree of the very early days of lksctp, and can yield a behavior that chooses suboptimal transport paths. Instead, be a bit more clever by reusing and extending the recently introduced sctp_trans_elect_best() handler. In case both transports are evaluated to have the same score resulting from their states, break the tie by looking at: 1) transport patch error count 2) last_time_heard value from each transport. This is analogous to Nishida's Quick Failover draft [1], section 5.1, 3: The sender SHOULD avoid data transmission to PF destinations. When all destinations are in either PF or Inactive state, the sender MAY either move the destination from PF to active state (and transmit data to the active destination) or the sender MAY transmit data to a PF destination. In the former scenario, (i) the sender MUST NOT notify the ULP about the state transition, and (ii) MUST NOT clear the destination's error counter. It is recommended that the sender picks the PF destination with least error count (fewest consecutive timeouts) for data transmission. In case of a tie (multiple PF destinations with same error count), the sender MAY choose the last active destination. Thus for sctp_select_active_and_retran_path(), we keep track of the best, if any, transport that is in PF state and in case no ACTIVE transport has been found (hence trans_{pri,sec} is NULL), we select the best out of the three: current primary_path and retran_path as well as a possible PF transport. The secondary may still camp on the original primary_path as before. The change in sctp_trans_elect_best() with a more fine grained tie selection also improves at the same time path selection for sctp_assoc_update_retran_path() in case of non-ACTIVE states. [1] http://tools.ietf.org/html/draft-nishida-tsvwg-sctp-failover-05 Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/sctp/associola.c | 50 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 620c99e19e77..58bbb731fd26 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1224,13 +1224,41 @@ static u8 sctp_trans_score(const struct sctp_transport *trans) return sctp_trans_state_to_prio_map[trans->state]; } +static struct sctp_transport *sctp_trans_elect_tie(struct sctp_transport *trans1, + struct sctp_transport *trans2) +{ + if (trans1->error_count > trans2->error_count) { + return trans2; + } else if (trans1->error_count == trans2->error_count && + ktime_after(trans2->last_time_heard, + trans1->last_time_heard)) { + return trans2; + } else { + return trans1; + } +} + static struct sctp_transport *sctp_trans_elect_best(struct sctp_transport *curr, struct sctp_transport *best) { + u8 score_curr, score_best; + if (best == NULL) return curr; - return sctp_trans_score(curr) > sctp_trans_score(best) ? curr : best; + score_curr = sctp_trans_score(curr); + score_best = sctp_trans_score(best); + + /* First, try a score-based selection if both transport states + * differ. If we're in a tie, lets try to make a more clever + * decision here based on error counts and last time heard. + */ + if (score_curr > score_best) + return curr; + else if (score_curr == score_best) + return sctp_trans_elect_tie(curr, best); + else + return best; } void sctp_assoc_update_retran_path(struct sctp_association *asoc) @@ -1274,14 +1302,23 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) static void sctp_select_active_and_retran_path(struct sctp_association *asoc) { struct sctp_transport *trans, *trans_pri = NULL, *trans_sec = NULL; + struct sctp_transport *trans_pf = NULL; /* Look for the two most recently used active transports. */ list_for_each_entry(trans, &asoc->peer.transport_addr_list, transports) { + /* Skip uninteresting transports. */ if (trans->state == SCTP_INACTIVE || - trans->state == SCTP_UNCONFIRMED || - trans->state == SCTP_PF) + trans->state == SCTP_UNCONFIRMED) continue; + /* Keep track of the best PF transport from our + * list in case we don't find an active one. + */ + if (trans->state == SCTP_PF) { + trans_pf = sctp_trans_elect_best(trans, trans_pf); + continue; + } + /* For active transports, pick the most recent ones. */ if (trans_pri == NULL || ktime_after(trans->last_time_heard, trans_pri->last_time_heard)) { @@ -1317,10 +1354,13 @@ static void sctp_select_active_and_retran_path(struct sctp_association *asoc) trans_sec = trans_pri; /* If we failed to find a usable transport, just camp on the - * primary, even if they are inactive. + * primary or retran, even if they are inactive, if possible + * pick a PF iff it's the better choice. */ if (trans_pri == NULL) { - trans_pri = asoc->peer.primary_path; + trans_pri = sctp_trans_elect_best(asoc->peer.primary_path, + asoc->peer.retran_path); + trans_pri = sctp_trans_elect_best(trans_pri, trans_pf); trans_sec = asoc->peer.primary_path; } From 9b87d4651069a3ff72edc11f170f4729585ba828 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 11 Jun 2014 18:19:32 +0200 Subject: [PATCH 5/5] net: sctp: fix incorrect type in gfp initializer This fixes the following sparse warning: net/sctp/associola.c:1556:29: warning: incorrect type in initializer (different base types) net/sctp/associola.c:1556:29: expected bool [unsigned] [usertype] preload net/sctp/associola.c:1556:29: got restricted gfp_t Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/sctp/associola.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 58bbb731fd26..9e0509ce2f84 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1591,7 +1591,7 @@ int sctp_assoc_lookup_laddr(struct sctp_association *asoc, /* Set an association id for a given association */ int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp) { - bool preload = gfp & __GFP_WAIT; + bool preload = !!(gfp & __GFP_WAIT); int ret; /* If the id is already assigned, keep it. */