diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 4cf1ef935ed9..9e4d0f0aeb6d 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -66,12 +66,6 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr) (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); } -static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i) -{ - return (f6i->fib6_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) == - RTF_GATEWAY; -} - void ip6_route_input(struct sk_buff *skb); struct dst_entry *ip6_route_input_lookup(struct net *net, struct net_device *dev, diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index d1dc6017f5a6..f9132a6de917 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -934,19 +934,19 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, { struct fib6_info *leaf = rcu_dereference_protected(fn->leaf, lockdep_is_held(&rt->fib6_table->tb6_lock)); - struct fib6_info *iter = NULL; + struct fib6_info *iter = NULL, *match = NULL; struct fib6_info __rcu **ins; - struct fib6_info __rcu **fallback_ins = NULL; int replace = (info->nlh && (info->nlh->nlmsg_flags & NLM_F_REPLACE)); + int append = (info->nlh && + (info->nlh->nlmsg_flags & NLM_F_APPEND)); int add = (!info->nlh || (info->nlh->nlmsg_flags & NLM_F_CREATE)); int found = 0; - bool rt_can_ecmp = rt6_qualify_for_ecmp(rt); u16 nlflags = NLM_F_EXCL; int err; - if (info->nlh && (info->nlh->nlmsg_flags & NLM_F_APPEND)) + if (append) nlflags |= NLM_F_APPEND; ins = &fn->leaf; @@ -968,13 +968,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, nlflags &= ~NLM_F_EXCL; if (replace) { - if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) { - found++; - break; - } - if (rt_can_ecmp) - fallback_ins = fallback_ins ?: ins; - goto next_iter; + found++; + break; } if (rt6_duplicate_nexthop(iter, rt)) { @@ -989,86 +984,67 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, fib6_metric_set(iter, RTAX_MTU, rt->fib6_pmtu); return -EEXIST; } - /* If we have the same destination and the same metric, - * but not the same gateway, then the route we try to - * add is sibling to this route, increment our counter - * of siblings, and later we will add our route to the - * list. - * Only static routes (which don't have flag - * RTF_EXPIRES) are used for ECMPv6. - * - * To avoid long list, we only had siblings if the - * route have a gateway. - */ - if (rt_can_ecmp && - rt6_qualify_for_ecmp(iter)) - rt->fib6_nsiblings++; + + /* first route that matches */ + if (!match) + match = iter; } if (iter->fib6_metric > rt->fib6_metric) break; -next_iter: ins = &iter->fib6_next; } - if (fallback_ins && !found) { - /* No ECMP-able route found, replace first non-ECMP one */ - ins = fallback_ins; - iter = rcu_dereference_protected(*ins, - lockdep_is_held(&rt->fib6_table->tb6_lock)); - found++; - } - /* Reset round-robin state, if necessary */ if (ins == &fn->leaf) fn->rr_ptr = NULL; /* Link this route to others same route. */ - if (rt->fib6_nsiblings) { - unsigned int fib6_nsiblings; + if (append && match) { struct fib6_info *sibling, *temp_sibling; - /* Find the first route that have the same metric */ - sibling = leaf; - while (sibling) { - if (sibling->fib6_metric == rt->fib6_metric && - rt6_qualify_for_ecmp(sibling)) { - list_add_tail(&rt->fib6_siblings, - &sibling->fib6_siblings); - break; - } - sibling = rcu_dereference_protected(sibling->fib6_next, - lockdep_is_held(&rt->fib6_table->tb6_lock)); + if (rt->fib6_flags & RTF_REJECT) { + NL_SET_ERR_MSG(extack, + "Can not append a REJECT route"); + return -EINVAL; + } else if (match->fib6_flags & RTF_REJECT) { + NL_SET_ERR_MSG(extack, + "Can not append to a REJECT route"); + return -EINVAL; } + rt->fib6_nsiblings = match->fib6_nsiblings; + list_add_tail(&rt->fib6_siblings, &match->fib6_siblings); + match->fib6_nsiblings++; + /* For each sibling in the list, increment the counter of * siblings. BUG() if counters does not match, list of siblings * is broken! */ - fib6_nsiblings = 0; list_for_each_entry_safe(sibling, temp_sibling, - &rt->fib6_siblings, fib6_siblings) { + &match->fib6_siblings, fib6_siblings) { sibling->fib6_nsiblings++; - BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings); - fib6_nsiblings++; + BUG_ON(sibling->fib6_nsiblings != match->fib6_nsiblings); } - BUG_ON(fib6_nsiblings != rt->fib6_nsiblings); - rt6_multipath_rebalance(temp_sibling); + + rt6_multipath_rebalance(match); } /* * insert node */ if (!replace) { + enum fib_event_type event; + if (!add) pr_warn("NLM_F_CREATE should be set when creating new route\n"); add: nlflags |= NLM_F_CREATE; - err = call_fib6_entry_notifiers(info->nl_net, - FIB_EVENT_ENTRY_ADD, - rt, extack); + event = append ? FIB_EVENT_ENTRY_APPEND : FIB_EVENT_ENTRY_ADD; + err = call_fib6_entry_notifiers(info->nl_net, event, rt, + extack); if (err) return err; @@ -1086,7 +1062,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, } } else { - int nsiblings; + struct fib6_info *tmp; if (!found) { if (add) @@ -1101,48 +1077,57 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, if (err) return err; + /* if route being replaced has siblings, set tmp to + * last one, otherwise tmp is current route. this is + * used to set fib6_next for new route + */ + if (iter->fib6_nsiblings) + tmp = list_last_entry(&iter->fib6_siblings, + struct fib6_info, + fib6_siblings); + else + tmp = iter; + + /* insert new route */ atomic_inc(&rt->fib6_ref); rcu_assign_pointer(rt->fib6_node, fn); - rt->fib6_next = iter->fib6_next; + rt->fib6_next = tmp->fib6_next; rcu_assign_pointer(*ins, rt); + if (!info->skip_notify) inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE); if (!(fn->fn_flags & RTN_RTINFO)) { info->nl_net->ipv6.rt6_stats->fib_route_nodes++; fn->fn_flags |= RTN_RTINFO; } - nsiblings = iter->fib6_nsiblings; - iter->fib6_node = NULL; - fib6_purge_rt(iter, fn, info->nl_net); - if (rcu_access_pointer(fn->rr_ptr) == iter) - fn->rr_ptr = NULL; - fib6_info_release(iter); - if (nsiblings) { + /* delete old route */ + rt = iter; + + if (rt->fib6_nsiblings) { + struct fib6_info *tmp; + /* Replacing an ECMP route, remove all siblings */ - ins = &rt->fib6_next; - iter = rcu_dereference_protected(*ins, - lockdep_is_held(&rt->fib6_table->tb6_lock)); - while (iter) { - if (iter->fib6_metric > rt->fib6_metric) - break; - if (rt6_qualify_for_ecmp(iter)) { - *ins = iter->fib6_next; - iter->fib6_node = NULL; - fib6_purge_rt(iter, fn, info->nl_net); - if (rcu_access_pointer(fn->rr_ptr) == iter) - fn->rr_ptr = NULL; - fib6_info_release(iter); - nsiblings--; - info->nl_net->ipv6.rt6_stats->fib_rt_entries--; - } else { - ins = &iter->fib6_next; - } - iter = rcu_dereference_protected(*ins, - lockdep_is_held(&rt->fib6_table->tb6_lock)); + list_for_each_entry_safe(iter, tmp, &rt->fib6_siblings, + fib6_siblings) { + iter->fib6_node = NULL; + fib6_purge_rt(iter, fn, info->nl_net); + if (rcu_access_pointer(fn->rr_ptr) == iter) + fn->rr_ptr = NULL; + fib6_info_release(iter); + + rt->fib6_nsiblings--; + info->nl_net->ipv6.rt6_stats->fib_rt_entries--; } - WARN_ON(nsiblings != 0); } + + WARN_ON(rt->fib6_nsiblings != 0); + + rt->fib6_node = NULL; + fib6_purge_rt(rt, fn, info->nl_net); + if (rcu_access_pointer(fn->rr_ptr) == rt) + fn->rr_ptr = NULL; + fib6_info_release(rt); } return 0; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index cc24ed3bc334..bcb8785c0451 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3791,7 +3791,7 @@ static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt) lockdep_is_held(&rt->fib6_table->tb6_lock)); while (iter) { if (iter->fib6_metric == rt->fib6_metric && - rt6_qualify_for_ecmp(iter)) + iter->fib6_nsiblings) return iter; iter = rcu_dereference_protected(iter->fib6_next, lockdep_is_held(&rt->fib6_table->tb6_lock)); @@ -4381,6 +4381,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, */ cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | NLM_F_REPLACE); + cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_APPEND; nhn++; }