From cff9f12b18915d957a2130885a00f8ab15cff7e4 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 30 Apr 2020 22:21:31 +0300 Subject: [PATCH 01/11] net/core: Introduce netdev_get_xmit_slave Add new ndo to get the xmit slave of master device. The reference counters are not incremented so the caller must be careful with locks. User can ask to get the xmit slave assume all the slaves can transmit by set all_slaves arg to true. Signed-off-by: Maor Gottlieb Reviewed-by: Jiri Pirko Reviewed-by: David Ahern Acked-by: David S. Miller Signed-off-by: Saeed Mahameed --- include/linux/netdevice.h | 12 ++++++++++++ net/core/dev.c | 22 ++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 130a668049ab..26bc0f11b7ad 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1146,6 +1146,12 @@ struct netdev_net_notifier { * int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); * Called to release previously enslaved netdev. * + * struct net_device *(*ndo_get_xmit_slave)(struct net_device *dev, + * struct sk_buff *skb, + * bool all_slaves); + * Get the xmit slave of master device. If all_slaves is true, function + * assume all the slaves can transmit. + * * Feature/offload setting functions. * netdev_features_t (*ndo_fix_features)(struct net_device *dev, * netdev_features_t features); @@ -1389,6 +1395,9 @@ struct net_device_ops { struct netlink_ext_ack *extack); int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); + struct net_device* (*ndo_get_xmit_slave)(struct net_device *dev, + struct sk_buff *skb, + bool all_slaves); netdev_features_t (*ndo_fix_features)(struct net_device *dev, netdev_features_t features); int (*ndo_set_features)(struct net_device *dev, @@ -2731,6 +2740,9 @@ void netdev_freemem(struct net_device *dev); void synchronize_net(void); int init_dummy_netdev(struct net_device *dev); +struct net_device *netdev_get_xmit_slave(struct net_device *dev, + struct sk_buff *skb, + bool all_slaves); struct net_device *dev_get_by_index(struct net *net, int ifindex); struct net_device *__dev_get_by_index(struct net *net, int ifindex); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); diff --git a/net/core/dev.c b/net/core/dev.c index 9c9e763bfe0e..e6c10980abfd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7785,6 +7785,28 @@ void netdev_bonding_info_change(struct net_device *dev, } EXPORT_SYMBOL(netdev_bonding_info_change); +/** + * netdev_get_xmit_slave - Get the xmit slave of master device + * @skb: The packet + * @all_slaves: assume all the slaves are active + * + * The reference counters are not incremented so the caller must be + * careful with locks. The caller must hold RCU lock. + * %NULL is returned if no slave is found. + */ + +struct net_device *netdev_get_xmit_slave(struct net_device *dev, + struct sk_buff *skb, + bool all_slaves) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (!ops->ndo_get_xmit_slave) + return NULL; + return ops->ndo_get_xmit_slave(dev, skb, all_slaves); +} +EXPORT_SYMBOL(netdev_get_xmit_slave); + static void netdev_adjacent_add_links(struct net_device *dev) { struct netdev_adjacent *iter; From 119d48fd4298594beccf4f2ecd00627826ce2646 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 30 Apr 2020 22:21:32 +0300 Subject: [PATCH 02/11] bonding: Export skip slave logic to function As a preparation for following change that add array of all slaves, extract code that skip slave to function. Signed-off-by: Maor Gottlieb Reviewed-by: Jiri Pirko Reviewed-by: Jay Vosburgh Acked-by: David S. Miller Signed-off-by: Saeed Mahameed --- drivers/net/bonding/bond_main.c | 45 ++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 2e70e43c5df5..f7aded014f08 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4087,6 +4087,29 @@ static void bond_slave_arr_handler(struct work_struct *work) bond_slave_arr_work_rearm(bond, 1); } +static void bond_skip_slave(struct bond_up_slave *slaves, + struct slave *skipslave) +{ + int idx; + + /* Rare situation where caller has asked to skip a specific + * slave but allocation failed (most likely!). BTW this is + * only possible when the call is initiated from + * __bond_release_one(). In this situation; overwrite the + * skipslave entry in the array with the last entry from the + * array to avoid a situation where the xmit path may choose + * this to-be-skipped slave to send a packet out. + */ + for (idx = 0; slaves && idx < slaves->count; idx++) { + if (skipslave == slaves->arr[idx]) { + slaves->arr[idx] = + slaves->arr[slaves->count - 1]; + slaves->count--; + break; + } + } +} + /* Build the usable slaves array in control path for modes that use xmit-hash * to determine the slave interface - * (a) BOND_MODE_8023AD @@ -4156,27 +4179,9 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) if (old_arr) kfree_rcu(old_arr, rcu); out: - if (ret != 0 && skipslave) { - int idx; + if (ret != 0 && skipslave) + bond_skip_slave(rtnl_dereference(bond->slave_arr), skipslave); - /* Rare situation where caller has asked to skip a specific - * slave but allocation failed (most likely!). BTW this is - * only possible when the call is initiated from - * __bond_release_one(). In this situation; overwrite the - * skipslave entry in the array with the last entry from the - * array to avoid a situation where the xmit path may choose - * this to-be-skipped slave to send a packet out. - */ - old_arr = rtnl_dereference(bond->slave_arr); - for (idx = 0; old_arr != NULL && idx < old_arr->count; idx++) { - if (skipslave == old_arr->arr[idx]) { - old_arr->arr[idx] = - old_arr->arr[old_arr->count-1]; - old_arr->count--; - break; - } - } - } return ret; } From ed7d4f023b1a9b0578f20d66557c66452ab845ec Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 30 Apr 2020 22:21:33 +0300 Subject: [PATCH 03/11] bonding: Rename slave_arr to usable_slaves Rename slave_arr to usable_slaves, since we will have two arrays, one for the usable slaves and the other to all slaves. Signed-off-by: Maor Gottlieb Reviewed-by: Jiri Pirko Reviewed-by: Jay Vosburgh Acked-by: David S. Miller Signed-off-by: Saeed Mahameed --- drivers/net/bonding/bond_alb.c | 4 ++-- drivers/net/bonding/bond_main.c | 40 ++++++++++++++++----------------- include/net/bonding.h | 2 +- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index c81698550e5a..7bb49b049dcc 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1360,7 +1360,7 @@ netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) struct bond_up_slave *slaves; unsigned int count; - slaves = rcu_dereference(bond->slave_arr); + slaves = rcu_dereference(bond->usable_slaves); count = slaves ? READ_ONCE(slaves->count) : 0; if (likely(count)) tx_slave = slaves->arr[hash_index % @@ -1494,7 +1494,7 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) struct bond_up_slave *slaves; unsigned int count; - slaves = rcu_dereference(bond->slave_arr); + slaves = rcu_dereference(bond->usable_slaves); count = slaves ? READ_ONCE(slaves->count) : 0; if (likely(count)) tx_slave = slaves->arr[bond_xmit_hash(bond, skb) % diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index f7aded014f08..2cb41d480ae2 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4120,9 +4120,9 @@ static void bond_skip_slave(struct bond_up_slave *slaves, */ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) { + struct bond_up_slave *usable_slaves, *old_usable_slaves; struct slave *slave; struct list_head *iter; - struct bond_up_slave *new_arr, *old_arr; int agg_id = 0; int ret = 0; @@ -4130,11 +4130,10 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) WARN_ON(lockdep_is_held(&bond->mode_lock)); #endif - new_arr = kzalloc(offsetof(struct bond_up_slave, arr[bond->slave_cnt]), - GFP_KERNEL); - if (!new_arr) { + usable_slaves = kzalloc(struct_size(usable_slaves, arr, + bond->slave_cnt), GFP_KERNEL); + if (!usable_slaves) { ret = -ENOMEM; - pr_err("Failed to build slave-array.\n"); goto out; } if (BOND_MODE(bond) == BOND_MODE_8023AD) { @@ -4142,14 +4141,14 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) if (bond_3ad_get_active_agg_info(bond, &ad_info)) { pr_debug("bond_3ad_get_active_agg_info failed\n"); - kfree_rcu(new_arr, rcu); + kfree_rcu(usable_slaves, rcu); /* No active aggragator means it's not safe to use * the previous array. */ - old_arr = rtnl_dereference(bond->slave_arr); - if (old_arr) { - RCU_INIT_POINTER(bond->slave_arr, NULL); - kfree_rcu(old_arr, rcu); + old_usable_slaves = rtnl_dereference(bond->usable_slaves); + if (old_usable_slaves) { + RCU_INIT_POINTER(bond->usable_slaves, NULL); + kfree_rcu(old_usable_slaves, rcu); } goto out; } @@ -4169,18 +4168,19 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) continue; slave_dbg(bond->dev, slave->dev, "Adding slave to tx hash array[%d]\n", - new_arr->count); + usable_slaves->count); - new_arr->arr[new_arr->count++] = slave; + usable_slaves->arr[usable_slaves->count++] = slave; } - old_arr = rtnl_dereference(bond->slave_arr); - rcu_assign_pointer(bond->slave_arr, new_arr); - if (old_arr) - kfree_rcu(old_arr, rcu); + old_usable_slaves = rtnl_dereference(bond->usable_slaves); + rcu_assign_pointer(bond->usable_slaves, usable_slaves); + if (old_usable_slaves) + kfree_rcu(old_usable_slaves, rcu); out: if (ret != 0 && skipslave) - bond_skip_slave(rtnl_dereference(bond->slave_arr), skipslave); + bond_skip_slave(rtnl_dereference(bond->usable_slaves), + skipslave); return ret; } @@ -4197,7 +4197,7 @@ static netdev_tx_t bond_3ad_xor_xmit(struct sk_buff *skb, struct bond_up_slave *slaves; unsigned int count; - slaves = rcu_dereference(bond->slave_arr); + slaves = rcu_dereference(bond->usable_slaves); count = slaves ? READ_ONCE(slaves->count) : 0; if (likely(count)) { slave = slaves->arr[bond_xmit_hash(bond, skb) % count]; @@ -4488,9 +4488,9 @@ static void bond_uninit(struct net_device *bond_dev) __bond_release_one(bond_dev, slave->dev, true, true); netdev_info(bond_dev, "Released all slaves\n"); - arr = rtnl_dereference(bond->slave_arr); + arr = rtnl_dereference(bond->usable_slaves); if (arr) { - RCU_INIT_POINTER(bond->slave_arr, NULL); + RCU_INIT_POINTER(bond->usable_slaves, NULL); kfree_rcu(arr, rcu); } diff --git a/include/net/bonding.h b/include/net/bonding.h index dc2ce31a1f52..33bdb6d5182d 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -200,7 +200,7 @@ struct bonding { struct slave __rcu *curr_active_slave; struct slave __rcu *current_arp_slave; struct slave __rcu *primary_slave; - struct bond_up_slave __rcu *slave_arr; /* Array of usable slaves */ + struct bond_up_slave __rcu *usable_slaves; /* Array of usable slaves */ bool force_primary; s32 slave_cnt; /* never change this value outside the attach/detach wrappers */ int (*recv_probe)(const struct sk_buff *, struct bonding *, From 34b37e204dfc8b20a09bb7b7f4c5e970c87420dd Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 30 Apr 2020 22:21:34 +0300 Subject: [PATCH 04/11] bonding/alb: Add helper functions to get the xmit slave Add two helper functions to get the xmit slave of bond in alb or tlb mode. Extract the logic of find the xmit slave from the xmit flow to function. Xmit flow will xmit through this slave and in the following patches the new .ndo will call to the helper function to return the xmit slave. Signed-off-by: Maor Gottlieb Reviewed-by: Jiri Pirko Reviewed-by: Jay Vosburgh Acked-by: David S. Miller Signed-off-by: Saeed Mahameed --- drivers/net/bonding/bond_alb.c | 37 +++++++++++++++++++++++++--------- include/net/bond_alb.h | 4 ++++ 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 7bb49b049dcc..e863c694c309 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1334,11 +1334,11 @@ static netdev_tx_t bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond, return NETDEV_TX_OK; } -netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) +struct slave *bond_xmit_tlb_slave_get(struct bonding *bond, + struct sk_buff *skb) { - struct bonding *bond = netdev_priv(bond_dev); - struct ethhdr *eth_data; struct slave *tx_slave = NULL; + struct ethhdr *eth_data; u32 hash_index; skb_reset_mac_header(skb); @@ -1369,20 +1369,29 @@ netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) break; } } + return tx_slave; +} + +netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) +{ + struct bonding *bond = netdev_priv(bond_dev); + struct slave *tx_slave; + + tx_slave = bond_xmit_tlb_slave_get(bond, skb); return bond_do_alb_xmit(skb, bond, tx_slave); } -netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) +struct slave *bond_xmit_alb_slave_get(struct bonding *bond, + struct sk_buff *skb) { - struct bonding *bond = netdev_priv(bond_dev); - struct ethhdr *eth_data; struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); - struct slave *tx_slave = NULL; static const __be32 ip_bcast = htonl(0xffffffff); - int hash_size = 0; - bool do_tx_balance = true; - u32 hash_index = 0; + struct slave *tx_slave = NULL; const u8 *hash_start = NULL; + bool do_tx_balance = true; + struct ethhdr *eth_data; + u32 hash_index = 0; + int hash_size = 0; skb_reset_mac_header(skb); eth_data = eth_hdr(skb); @@ -1501,7 +1510,15 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) count]; } } + return tx_slave; +} +netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) +{ + struct bonding *bond = netdev_priv(bond_dev); + struct slave *tx_slave = NULL; + + tx_slave = bond_xmit_alb_slave_get(bond, skb); return bond_do_alb_xmit(skb, bond, tx_slave); } diff --git a/include/net/bond_alb.h b/include/net/bond_alb.h index b3504fcd773d..f6af76c87a6c 100644 --- a/include/net/bond_alb.h +++ b/include/net/bond_alb.h @@ -158,6 +158,10 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave); int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev); int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev); +struct slave *bond_xmit_alb_slave_get(struct bonding *bond, + struct sk_buff *skb); +struct slave *bond_xmit_tlb_slave_get(struct bonding *bond, + struct sk_buff *skb); void bond_alb_monitor(struct work_struct *); int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr); void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id); From c071d91d2a89b0dac1354673810b36453aed62c4 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 30 Apr 2020 22:21:35 +0300 Subject: [PATCH 05/11] bonding: Add helper function to get the xmit slave based on hash Both xor and 802.3ad modes use bond_xmit_hash to get the xmit slave. Export the logic to helper function so it could be used in the following patches by the .ndo to get the xmit slave. Signed-off-by: Maor Gottlieb Reviewed-by: Jiri Pirko Reviewed-by: Jay Vosburgh Acked-by: David S. Miller Signed-off-by: Saeed Mahameed --- drivers/net/bonding/bond_main.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 2cb41d480ae2..8e6305955c75 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4185,6 +4185,23 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) return ret; } +static struct slave *bond_xmit_3ad_xor_slave_get(struct bonding *bond, + struct sk_buff *skb, + struct bond_up_slave *slaves) +{ + struct slave *slave; + unsigned int count; + u32 hash; + + hash = bond_xmit_hash(bond, skb); + count = slaves ? READ_ONCE(slaves->count) : 0; + if (unlikely(!count)) + return NULL; + + slave = slaves->arr[hash % count]; + return slave; +} + /* Use this Xmit function for 3AD as well as XOR modes. The current * usable slave array is formed in the control path. The xmit function * just calculates hash and sends the packet out. @@ -4193,18 +4210,15 @@ static netdev_tx_t bond_3ad_xor_xmit(struct sk_buff *skb, struct net_device *dev) { struct bonding *bond = netdev_priv(dev); - struct slave *slave; struct bond_up_slave *slaves; - unsigned int count; + struct slave *slave; slaves = rcu_dereference(bond->usable_slaves); - count = slaves ? READ_ONCE(slaves->count) : 0; - if (likely(count)) { - slave = slaves->arr[bond_xmit_hash(bond, skb) % count]; + slave = bond_xmit_3ad_xor_slave_get(bond, skb, slaves); + if (likely(slave)) bond_dev_queue_xmit(bond, skb, slave->dev); - } else { + else bond_tx_drop(dev, skb); - } return NETDEV_TX_OK; } From 29d5bbccb3a171eb146c94efeb3d752fad3ddf7d Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 30 Apr 2020 22:21:36 +0300 Subject: [PATCH 06/11] bonding: Add helper function to get the xmit slave in rr mode Add helper function to get the xmit slave when bond is in round robin mode. Change bond_xmit_slave_id to bond_get_slave_by_id, then the logic for find the next slave for transmit could be used both by the xmit flow and the .ndo to get the xmit slave. Signed-off-by: Maor Gottlieb Reviewed-by: Jiri Pirko Reviewed-by: Jay Vosburgh Acked-by: David S. Miller Signed-off-by: Saeed Mahameed --- drivers/net/bonding/bond_main.c | 56 ++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 8e6305955c75..09c8485e965d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3923,16 +3923,15 @@ static int bond_set_mac_address(struct net_device *bond_dev, void *addr) } /** - * bond_xmit_slave_id - transmit skb through slave with slave_id + * bond_get_slave_by_id - get xmit slave with slave_id * @bond: bonding device that is transmitting - * @skb: buffer to transmit * @slave_id: slave id up to slave_cnt-1 through which to transmit * - * This function tries to transmit through slave with slave_id but in case + * This function tries to get slave with slave_id but in case * it fails, it tries to find the first available slave for transmission. - * The skb is consumed in all cases, thus the function is void. */ -static void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id) +static struct slave *bond_get_slave_by_id(struct bonding *bond, + int slave_id) { struct list_head *iter; struct slave *slave; @@ -3941,10 +3940,8 @@ static void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int sl /* Here we start from the slave with slave_id */ bond_for_each_slave_rcu(bond, slave, iter) { if (--i < 0) { - if (bond_slave_can_tx(slave)) { - bond_dev_queue_xmit(bond, skb, slave->dev); - return; - } + if (bond_slave_can_tx(slave)) + return slave; } } @@ -3953,13 +3950,11 @@ static void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int sl bond_for_each_slave_rcu(bond, slave, iter) { if (--i < 0) break; - if (bond_slave_can_tx(slave)) { - bond_dev_queue_xmit(bond, skb, slave->dev); - return; - } + if (bond_slave_can_tx(slave)) + return slave; } - /* no slave that can tx has been found */ - bond_tx_drop(bond->dev, skb); + + return NULL; } /** @@ -3995,10 +3990,9 @@ static u32 bond_rr_gen_slave_id(struct bonding *bond) return slave_id; } -static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb, - struct net_device *bond_dev) +static struct slave *bond_xmit_roundrobin_slave_get(struct bonding *bond, + struct sk_buff *skb) { - struct bonding *bond = netdev_priv(bond_dev); struct slave *slave; int slave_cnt; u32 slave_id; @@ -4020,21 +4014,31 @@ static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb, if (iph->protocol == IPPROTO_IGMP) { slave = rcu_dereference(bond->curr_active_slave); if (slave) - bond_dev_queue_xmit(bond, skb, slave->dev); - else - bond_xmit_slave_id(bond, skb, 0); - return NETDEV_TX_OK; + return slave; + return bond_get_slave_by_id(bond, 0); } } non_igmp: slave_cnt = READ_ONCE(bond->slave_cnt); if (likely(slave_cnt)) { - slave_id = bond_rr_gen_slave_id(bond); - bond_xmit_slave_id(bond, skb, slave_id % slave_cnt); - } else { - bond_tx_drop(bond_dev, skb); + slave_id = bond_rr_gen_slave_id(bond) % slave_cnt; + return bond_get_slave_by_id(bond, slave_id); } + return NULL; +} + +static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb, + struct net_device *bond_dev) +{ + struct bonding *bond = netdev_priv(bond_dev); + struct slave *slave; + + slave = bond_xmit_roundrobin_slave_get(bond, skb); + if (slave) + bond_dev_queue_xmit(bond, skb, slave->dev); + else + bond_tx_drop(bond_dev, skb); return NETDEV_TX_OK; } From 5a19f1c1a2a0f7d5fb80b130ab4a15fa99e792d7 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 30 Apr 2020 22:21:37 +0300 Subject: [PATCH 07/11] bonding: Add function to get the xmit slave in active-backup mode Add helper function to get the xmit slave in active-backup mode. It's only one line function that return the curr_active_slave, but it will used both in the xmit flow and by the new .ndo to get the xmit slave. Signed-off-by: Maor Gottlieb Reviewed-by: Jiri Pirko Reviewed-by: Jay Vosburgh Acked-by: David S. Miller Signed-off-by: Saeed Mahameed --- drivers/net/bonding/bond_main.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 09c8485e965d..1b0ae750d732 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4042,6 +4042,12 @@ static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb, return NETDEV_TX_OK; } +static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond, + struct sk_buff *skb) +{ + return rcu_dereference(bond->curr_active_slave); +} + /* In active-backup mode, we know that bond->curr_active_slave is always valid if * the bond has a usable interface. */ @@ -4051,7 +4057,7 @@ static netdev_tx_t bond_xmit_activebackup(struct sk_buff *skb, struct bonding *bond = netdev_priv(bond_dev); struct slave *slave; - slave = rcu_dereference(bond->curr_active_slave); + slave = bond_xmit_activebackup_slave_get(bond, skb); if (slave) bond_dev_queue_xmit(bond, skb, slave->dev); else From 6b447e76ed44cc354cd0a346b86efe393e603e0d Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 30 Apr 2020 22:21:38 +0300 Subject: [PATCH 08/11] bonding: Add array of all slaves Keep all slaves in array so it could be used to get the xmit slave assume all the slaves are active. The logic to add slave to the array is like the usable slaves, except that we also add slaves that currently can't transmit - not up or active. Signed-off-by: Maor Gottlieb Reviewed-by: Jiri Pirko Reviewed-by: Jay Vosburgh Acked-by: David S. Miller Signed-off-by: Saeed Mahameed --- drivers/net/bonding/bond_main.c | 78 +++++++++++++++++++++++++-------- include/net/bonding.h | 3 +- 2 files changed, 61 insertions(+), 20 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 1b0ae750d732..2de693f0262e 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4120,6 +4120,38 @@ static void bond_skip_slave(struct bond_up_slave *slaves, } } +static void bond_set_slave_arr(struct bonding *bond, + struct bond_up_slave *usable_slaves, + struct bond_up_slave *all_slaves) +{ + struct bond_up_slave *usable, *all; + + usable = rtnl_dereference(bond->usable_slaves); + rcu_assign_pointer(bond->usable_slaves, usable_slaves); + kfree_rcu(usable, rcu); + + all = rtnl_dereference(bond->all_slaves); + rcu_assign_pointer(bond->all_slaves, all_slaves); + kfree_rcu(all, rcu); +} + +static void bond_reset_slave_arr(struct bonding *bond) +{ + struct bond_up_slave *usable, *all; + + usable = rtnl_dereference(bond->usable_slaves); + if (usable) { + RCU_INIT_POINTER(bond->usable_slaves, NULL); + kfree_rcu(usable, rcu); + } + + all = rtnl_dereference(bond->all_slaves); + if (all) { + RCU_INIT_POINTER(bond->all_slaves, NULL); + kfree_rcu(all, rcu); + } +} + /* Build the usable slaves array in control path for modes that use xmit-hash * to determine the slave interface - * (a) BOND_MODE_8023AD @@ -4130,7 +4162,7 @@ static void bond_skip_slave(struct bond_up_slave *slaves, */ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) { - struct bond_up_slave *usable_slaves, *old_usable_slaves; + struct bond_up_slave *usable_slaves = NULL, *all_slaves = NULL; struct slave *slave; struct list_head *iter; int agg_id = 0; @@ -4142,7 +4174,9 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) usable_slaves = kzalloc(struct_size(usable_slaves, arr, bond->slave_cnt), GFP_KERNEL); - if (!usable_slaves) { + all_slaves = kzalloc(struct_size(all_slaves, arr, + bond->slave_cnt), GFP_KERNEL); + if (!usable_slaves || !all_slaves) { ret = -ENOMEM; goto out; } @@ -4151,20 +4185,19 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) if (bond_3ad_get_active_agg_info(bond, &ad_info)) { pr_debug("bond_3ad_get_active_agg_info failed\n"); - kfree_rcu(usable_slaves, rcu); /* No active aggragator means it's not safe to use * the previous array. */ - old_usable_slaves = rtnl_dereference(bond->usable_slaves); - if (old_usable_slaves) { - RCU_INIT_POINTER(bond->usable_slaves, NULL); - kfree_rcu(old_usable_slaves, rcu); - } + bond_reset_slave_arr(bond); goto out; } agg_id = ad_info.aggregator_id; } bond_for_each_slave(bond, slave, iter) { + if (skipslave == slave) + continue; + + all_slaves->arr[all_slaves->count++] = slave; if (BOND_MODE(bond) == BOND_MODE_8023AD) { struct aggregator *agg; @@ -4174,8 +4207,6 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) } if (!bond_slave_can_tx(slave)) continue; - if (skipslave == slave) - continue; slave_dbg(bond->dev, slave->dev, "Adding slave to tx hash array[%d]\n", usable_slaves->count); @@ -4183,14 +4214,17 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave) usable_slaves->arr[usable_slaves->count++] = slave; } - old_usable_slaves = rtnl_dereference(bond->usable_slaves); - rcu_assign_pointer(bond->usable_slaves, usable_slaves); - if (old_usable_slaves) - kfree_rcu(old_usable_slaves, rcu); + bond_set_slave_arr(bond, usable_slaves, all_slaves); + return ret; out: - if (ret != 0 && skipslave) + if (ret != 0 && skipslave) { + bond_skip_slave(rtnl_dereference(bond->all_slaves), + skipslave); bond_skip_slave(rtnl_dereference(bond->usable_slaves), skipslave); + } + kfree_rcu(all_slaves, rcu); + kfree_rcu(usable_slaves, rcu); return ret; } @@ -4501,9 +4535,9 @@ void bond_setup(struct net_device *bond_dev) static void bond_uninit(struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); + struct bond_up_slave *usable, *all; struct list_head *iter; struct slave *slave; - struct bond_up_slave *arr; bond_netpoll_cleanup(bond_dev); @@ -4512,10 +4546,16 @@ static void bond_uninit(struct net_device *bond_dev) __bond_release_one(bond_dev, slave->dev, true, true); netdev_info(bond_dev, "Released all slaves\n"); - arr = rtnl_dereference(bond->usable_slaves); - if (arr) { + usable = rtnl_dereference(bond->usable_slaves); + if (usable) { RCU_INIT_POINTER(bond->usable_slaves, NULL); - kfree_rcu(arr, rcu); + kfree_rcu(usable, rcu); + } + + all = rtnl_dereference(bond->all_slaves); + if (all) { + RCU_INIT_POINTER(bond->all_slaves, NULL); + kfree_rcu(all, rcu); } list_del(&bond->bond_list); diff --git a/include/net/bonding.h b/include/net/bonding.h index 33bdb6d5182d..b5e49bedbc9f 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -200,7 +200,8 @@ struct bonding { struct slave __rcu *curr_active_slave; struct slave __rcu *current_arp_slave; struct slave __rcu *primary_slave; - struct bond_up_slave __rcu *usable_slaves; /* Array of usable slaves */ + struct bond_up_slave __rcu *usable_slaves; + struct bond_up_slave __rcu *all_slaves; bool force_primary; s32 slave_cnt; /* never change this value outside the attach/detach wrappers */ int (*recv_probe)(const struct sk_buff *, struct bonding *, From 33720aaf8c2af5c0ff341a16b5048b9c7ecae569 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 30 Apr 2020 22:21:39 +0300 Subject: [PATCH 09/11] bonding: Implement ndo_get_xmit_slave Add implementation of ndo_get_xmit_slave. Find the slave by using the helper function according to the bond mode. If the caller set all_slaves to true, then it assumes that all slaves are available to transmit. Signed-off-by: Maor Gottlieb Reviewed-by: Jay Vosburgh Reviewed-by: Jiri Pirko Acked-by: David S. Miller Signed-off-by: Saeed Mahameed --- drivers/net/bonding/bond_main.c | 43 +++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 2de693f0262e..39b1ad7edbb4 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4347,6 +4347,48 @@ static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb, return txq; } +static struct net_device *bond_xmit_get_slave(struct net_device *master_dev, + struct sk_buff *skb, + bool all_slaves) +{ + struct bonding *bond = netdev_priv(master_dev); + struct bond_up_slave *slaves; + struct slave *slave = NULL; + + switch (BOND_MODE(bond)) { + case BOND_MODE_ROUNDROBIN: + slave = bond_xmit_roundrobin_slave_get(bond, skb); + break; + case BOND_MODE_ACTIVEBACKUP: + slave = bond_xmit_activebackup_slave_get(bond, skb); + break; + case BOND_MODE_8023AD: + case BOND_MODE_XOR: + if (all_slaves) + slaves = rcu_dereference(bond->all_slaves); + else + slaves = rcu_dereference(bond->usable_slaves); + slave = bond_xmit_3ad_xor_slave_get(bond, skb, slaves); + break; + case BOND_MODE_BROADCAST: + break; + case BOND_MODE_ALB: + slave = bond_xmit_alb_slave_get(bond, skb); + break; + case BOND_MODE_TLB: + slave = bond_xmit_tlb_slave_get(bond, skb); + break; + default: + /* Should never happen, mode already checked */ + WARN_ONCE(true, "Unknown bonding mode"); + break; + } + + if (slave) + return slave->dev; + return NULL; +} + static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct bonding *bond = netdev_priv(dev); @@ -4468,6 +4510,7 @@ static const struct net_device_ops bond_netdev_ops = { .ndo_del_slave = bond_release, .ndo_fix_features = bond_fix_features, .ndo_features_check = passthru_features_check, + .ndo_get_xmit_slave = bond_xmit_get_slave, }; static const struct device_type bond_type = { From 64363e61c7bbcfa4c7d6697d96ef2e18fc311cf3 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 30 Apr 2020 22:21:40 +0300 Subject: [PATCH 10/11] net/mlx5: Change lag mutex lock to spin lock The lag lock could be a spin lock, the critical section is short and there is no need that the thread will sleep. Change the lock that protects the LAG structure from mutex to spin lock. It is required for next patch that need to access this structure from context that we can't sleep. In addition there is no need to hold this lock when query the congestion counters. Signed-off-by: Maor Gottlieb Reviewed-by: Leon Romanovsky Acked-by: David S. Miller Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index c6ad5ca46877..b17b80bcd045 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -42,7 +42,7 @@ * Beware of lock dependencies (preferably, no locks should be acquired * under it). */ -static DEFINE_MUTEX(lag_mutex); +static DEFINE_SPINLOCK(lag_lock); static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1, u8 remap_port2) @@ -274,9 +274,9 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) if (!dev0 || !dev1) return; - mutex_lock(&lag_mutex); + spin_lock(&lag_lock); tracker = ldev->tracker; - mutex_unlock(&lag_mutex); + spin_unlock(&lag_lock); do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); @@ -458,9 +458,9 @@ static int mlx5_lag_netdev_event(struct notifier_block *this, break; } - mutex_lock(&lag_mutex); + spin_lock(&lag_lock); ldev->tracker = tracker; - mutex_unlock(&lag_mutex); + spin_unlock(&lag_lock); if (changed) mlx5_queue_bond_work(ldev, 0); @@ -502,7 +502,7 @@ static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev, if (fn >= MLX5_MAX_PORTS) return; - mutex_lock(&lag_mutex); + spin_lock(&lag_lock); ldev->pf[fn].dev = dev; ldev->pf[fn].netdev = netdev; ldev->tracker.netdev_state[fn].link_up = 0; @@ -510,7 +510,7 @@ static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev, dev->priv.lag = ldev; - mutex_unlock(&lag_mutex); + spin_unlock(&lag_lock); } static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev, @@ -525,11 +525,11 @@ static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev, if (i == MLX5_MAX_PORTS) return; - mutex_lock(&lag_mutex); + spin_lock(&lag_lock); memset(&ldev->pf[i], 0, sizeof(*ldev->pf)); dev->priv.lag = NULL; - mutex_unlock(&lag_mutex); + spin_unlock(&lag_lock); } /* Must be called with intf_mutex held */ @@ -607,10 +607,10 @@ bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) struct mlx5_lag *ldev; bool res; - mutex_lock(&lag_mutex); + spin_lock(&lag_lock); ldev = mlx5_lag_dev_get(dev); res = ldev && __mlx5_lag_is_roce(ldev); - mutex_unlock(&lag_mutex); + spin_unlock(&lag_lock); return res; } @@ -621,10 +621,10 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev) struct mlx5_lag *ldev; bool res; - mutex_lock(&lag_mutex); + spin_lock(&lag_lock); ldev = mlx5_lag_dev_get(dev); res = ldev && __mlx5_lag_is_active(ldev); - mutex_unlock(&lag_mutex); + spin_unlock(&lag_lock); return res; } @@ -635,10 +635,10 @@ bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) struct mlx5_lag *ldev; bool res; - mutex_lock(&lag_mutex); + spin_lock(&lag_lock); ldev = mlx5_lag_dev_get(dev); res = ldev && __mlx5_lag_is_sriov(ldev); - mutex_unlock(&lag_mutex); + spin_unlock(&lag_lock); return res; } @@ -664,7 +664,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) struct net_device *ndev = NULL; struct mlx5_lag *ldev; - mutex_lock(&lag_mutex); + spin_lock(&lag_lock); ldev = mlx5_lag_dev_get(dev); if (!(ldev && __mlx5_lag_is_roce(ldev))) @@ -681,7 +681,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) dev_hold(ndev); unlock: - mutex_unlock(&lag_mutex); + spin_unlock(&lag_lock); return ndev; } @@ -723,7 +723,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, memset(values, 0, sizeof(*values) * num_counters); - mutex_lock(&lag_mutex); + spin_lock(&lag_lock); ldev = mlx5_lag_dev_get(dev); if (ldev && __mlx5_lag_is_roce(ldev)) { num_ports = MLX5_MAX_PORTS; @@ -733,6 +733,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, num_ports = 1; mdev[MLX5_LAG_P1] = dev; } + spin_unlock(&lag_lock); for (i = 0; i < num_ports; ++i) { u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {}; @@ -742,14 +743,13 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in, out); if (ret) - goto unlock; + goto free; for (j = 0; j < num_counters; ++j) values[j] += be64_to_cpup((__be64 *)(out + offsets[j])); } -unlock: - mutex_unlock(&lag_mutex); +free: kvfree(out); return ret; } From c6bc6041b10f70b617f2d13894311fe62027d292 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 30 Apr 2020 22:21:41 +0300 Subject: [PATCH 11/11] net/mlx5: Add support to get lag physical port Add function to get the device physical port of the lag slave. Signed-off-by: Maor Gottlieb Reviewed-by: Leon Romanovsky Acked-by: David S. Miller Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 24 +++++++++++++++++++ include/linux/mlx5/driver.h | 2 ++ 2 files changed, 26 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index b17b80bcd045..874c70e8cc54 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -687,6 +687,30 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_lag_get_roce_netdev); +u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, + struct net_device *slave) +{ + struct mlx5_lag *ldev; + u8 port = 0; + + spin_lock(&lag_lock); + ldev = mlx5_lag_dev_get(dev); + if (!(ldev && __mlx5_lag_is_roce(ldev))) + goto unlock; + + if (ldev->pf[MLX5_LAG_P1].netdev == slave) + port = MLX5_LAG_P1; + else + port = MLX5_LAG_P2; + + port = ldev->v2p_map[port]; + +unlock: + spin_unlock(&lag_lock); + return port; +} +EXPORT_SYMBOL(mlx5_lag_get_slave_port); + bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv) { struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index d82dbbab8179..267dfcc5493e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1074,6 +1074,8 @@ bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev); bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev); bool mlx5_lag_is_active(struct mlx5_core_dev *dev); struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); +u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, + struct net_device *slave); int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, u64 *values, int num_counters,