Daniel Borkmann says:

====================
bpf-next 2021-08-10

We've added 31 non-merge commits during the last 8 day(s) which contain
a total of 28 files changed, 3644 insertions(+), 519 deletions(-).

1) Native XDP support for bonding driver & related BPF selftests, from Jussi Maki.

2) Large batch of new BPF JIT tests for test_bpf.ko that came out as a result from
   32-bit MIPS JIT development, from Johan Almbladh.

3) Rewrite of netcnt BPF selftest and merge into test_progs, from Stanislav Fomichev.

4) Fix XDP bpf_prog_test_run infra after net to net-next merge, from Andrii Nakryiko.

5) Follow-up fix in unix_bpf_update_proto() to enforce socket type, from Cong Wang.

6) Fix bpf-iter-tcp4 selftest to print the correct dest IP, from Jose Blanquicet.

7) Various misc BPF XDP sample improvements, from Niklas Söderlund, Matthew Cover,
   and Muhammad Falak R Wani.

* https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (31 commits)
  bpf, tests: Add tail call test suite
  bpf, tests: Add tests for BPF_CMPXCHG
  bpf, tests: Add tests for atomic operations
  bpf, tests: Add test for 32-bit context pointer argument passing
  bpf, tests: Add branch conversion JIT test
  bpf, tests: Add word-order tests for load/store of double words
  bpf, tests: Add tests for ALU operations implemented with function calls
  bpf, tests: Add more ALU64 BPF_MUL tests
  bpf, tests: Add more BPF_LSH/RSH/ARSH tests for ALU64
  bpf, tests: Add more ALU32 tests for BPF_LSH/RSH/ARSH
  bpf, tests: Add more tests of ALU32 and ALU64 bitwise operations
  bpf, tests: Fix typos in test case descriptions
  bpf, tests: Add BPF_MOV tests for zero and sign extension
  bpf, tests: Add BPF_JMP32 test cases
  samples, bpf: Add an explict comment to handle nested vlan tagging.
  selftests/bpf: Add tests for XDP bonding
  selftests/bpf: Fix xdp_tx.c prog section name
  net, core: Allow netdev_lower_get_next_private_rcu in bh context
  bpf, devmap: Exclude XDP broadcast to master device
  net, bonding: Add XDP support to the bonding driver
  ...
====================

Link: https://lore.kernel.org/r/20210810130038.16927-1-daniel@iogearbox.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2021-08-10 07:27:09 -07:00
commit d1a4e0a957
28 changed files with 3442 additions and 317 deletions

View File

@ -317,6 +317,19 @@ bool bond_sk_check(struct bonding *bond)
}
}
static bool bond_xdp_check(struct bonding *bond)
{
switch (BOND_MODE(bond)) {
case BOND_MODE_ROUNDROBIN:
case BOND_MODE_ACTIVEBACKUP:
case BOND_MODE_8023AD:
case BOND_MODE_XOR:
return true;
default:
return false;
}
}
/*---------------------------------- VLAN -----------------------------------*/
/* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid,
@ -2133,6 +2146,41 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
bond_update_slave_arr(bond, NULL);
if (!slave_dev->netdev_ops->ndo_bpf ||
!slave_dev->netdev_ops->ndo_xdp_xmit) {
if (bond->xdp_prog) {
NL_SET_ERR_MSG(extack, "Slave does not support XDP");
slave_err(bond_dev, slave_dev, "Slave does not support XDP\n");
res = -EOPNOTSUPP;
goto err_sysfs_del;
}
} else {
struct netdev_bpf xdp = {
.command = XDP_SETUP_PROG,
.flags = 0,
.prog = bond->xdp_prog,
.extack = extack,
};
if (dev_xdp_prog_count(slave_dev) > 0) {
NL_SET_ERR_MSG(extack,
"Slave has XDP program loaded, please unload before enslaving");
slave_err(bond_dev, slave_dev,
"Slave has XDP program loaded, please unload before enslaving\n");
res = -EOPNOTSUPP;
goto err_sysfs_del;
}
res = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
if (res < 0) {
/* ndo_bpf() sets extack error message */
slave_dbg(bond_dev, slave_dev, "Error %d calling ndo_bpf\n", res);
goto err_sysfs_del;
}
if (bond->xdp_prog)
bpf_prog_inc(bond->xdp_prog);
}
slave_info(bond_dev, slave_dev, "Enslaving as %s interface with %s link\n",
bond_is_active_slave(new_slave) ? "an active" : "a backup",
new_slave->link != BOND_LINK_DOWN ? "an up" : "a down");
@ -2252,6 +2300,17 @@ static int __bond_release_one(struct net_device *bond_dev,
/* recompute stats just before removing the slave */
bond_get_stats(bond->dev, &bond->bond_stats);
if (bond->xdp_prog) {
struct netdev_bpf xdp = {
.command = XDP_SETUP_PROG,
.flags = 0,
.prog = NULL,
.extack = NULL,
};
if (slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp))
slave_warn(bond_dev, slave_dev, "failed to unload XDP program\n");
}
/* unregister rx_handler early so bond_handle_frame wouldn't be called
* for this slave anymore.
*/
@ -3614,55 +3673,80 @@ static struct notifier_block bond_netdev_notifier = {
/*---------------------------- Hashing Policies -----------------------------*/
/* L2 hash helper */
static inline u32 bond_eth_hash(struct sk_buff *skb)
/* Helper to access data in a packet, with or without a backing skb.
* If skb is given the data is linearized if necessary via pskb_may_pull.
*/
static inline const void *bond_pull_data(struct sk_buff *skb,
const void *data, int hlen, int n)
{
struct ethhdr *ep, hdr_tmp;
if (likely(n <= hlen))
return data;
else if (skb && likely(pskb_may_pull(skb, n)))
return skb->head;
ep = skb_header_pointer(skb, 0, sizeof(hdr_tmp), &hdr_tmp);
if (ep)
return ep->h_dest[5] ^ ep->h_source[5] ^ ep->h_proto;
return 0;
return NULL;
}
static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk,
int *noff, int *proto, bool l34)
/* L2 hash helper */
static inline u32 bond_eth_hash(struct sk_buff *skb, const void *data, int mhoff, int hlen)
{
struct ethhdr *ep;
data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr));
if (!data)
return 0;
ep = (struct ethhdr *)(data + mhoff);
return ep->h_dest[5] ^ ep->h_source[5] ^ be16_to_cpu(ep->h_proto);
}
static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk, const void *data,
int hlen, __be16 l2_proto, int *nhoff, int *ip_proto, bool l34)
{
const struct ipv6hdr *iph6;
const struct iphdr *iph;
if (skb->protocol == htons(ETH_P_IP)) {
if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph))))
if (l2_proto == htons(ETH_P_IP)) {
data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph));
if (!data)
return false;
iph = (const struct iphdr *)(skb->data + *noff);
iph = (const struct iphdr *)(data + *nhoff);
iph_to_flow_copy_v4addrs(fk, iph);
*noff += iph->ihl << 2;
*nhoff += iph->ihl << 2;
if (!ip_is_fragment(iph))
*proto = iph->protocol;
} else if (skb->protocol == htons(ETH_P_IPV6)) {
if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph6))))
*ip_proto = iph->protocol;
} else if (l2_proto == htons(ETH_P_IPV6)) {
data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph6));
if (!data)
return false;
iph6 = (const struct ipv6hdr *)(skb->data + *noff);
iph6 = (const struct ipv6hdr *)(data + *nhoff);
iph_to_flow_copy_v6addrs(fk, iph6);
*noff += sizeof(*iph6);
*proto = iph6->nexthdr;
*nhoff += sizeof(*iph6);
*ip_proto = iph6->nexthdr;
} else {
return false;
}
if (l34 && *proto >= 0)
fk->ports.ports = skb_flow_get_ports(skb, *noff, *proto);
if (l34 && *ip_proto >= 0)
fk->ports.ports = __skb_flow_get_ports(skb, *nhoff, *ip_proto, data, hlen);
return true;
}
static u32 bond_vlan_srcmac_hash(struct sk_buff *skb)
static u32 bond_vlan_srcmac_hash(struct sk_buff *skb, const void *data, int mhoff, int hlen)
{
struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
struct ethhdr *mac_hdr;
u32 srcmac_vendor = 0, srcmac_dev = 0;
u16 vlan;
int i;
data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr));
if (!data)
return 0;
mac_hdr = (struct ethhdr *)(data + mhoff);
for (i = 0; i < 3; i++)
srcmac_vendor = (srcmac_vendor << 8) | mac_hdr->h_source[i];
@ -3678,26 +3762,25 @@ static u32 bond_vlan_srcmac_hash(struct sk_buff *skb)
}
/* Extract the appropriate headers based on bond's xmit policy */
static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
struct flow_keys *fk)
static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, const void *data,
__be16 l2_proto, int nhoff, int hlen, struct flow_keys *fk)
{
bool l34 = bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34;
int noff, proto = -1;
int ip_proto = -1;
switch (bond->params.xmit_policy) {
case BOND_XMIT_POLICY_ENCAP23:
case BOND_XMIT_POLICY_ENCAP34:
memset(fk, 0, sizeof(*fk));
return __skb_flow_dissect(NULL, skb, &flow_keys_bonding,
fk, NULL, 0, 0, 0, 0);
fk, data, l2_proto, nhoff, hlen, 0);
default:
break;
}
fk->ports.ports = 0;
memset(&fk->icmp, 0, sizeof(fk->icmp));
noff = skb_network_offset(skb);
if (!bond_flow_ip(skb, fk, &noff, &proto, l34))
if (!bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34))
return false;
/* ICMP error packets contains at least 8 bytes of the header
@ -3705,22 +3788,20 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
* to correlate ICMP error packets within the same flow which
* generated the error.
*/
if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) {
skb_flow_get_icmp_tci(skb, &fk->icmp, skb->data,
skb_transport_offset(skb),
skb_headlen(skb));
if (proto == IPPROTO_ICMP) {
if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) {
skb_flow_get_icmp_tci(skb, &fk->icmp, data, nhoff, hlen);
if (ip_proto == IPPROTO_ICMP) {
if (!icmp_is_err(fk->icmp.type))
return true;
noff += sizeof(struct icmphdr);
} else if (proto == IPPROTO_ICMPV6) {
nhoff += sizeof(struct icmphdr);
} else if (ip_proto == IPPROTO_ICMPV6) {
if (!icmpv6_is_err(fk->icmp.type))
return true;
noff += sizeof(struct icmp6hdr);
nhoff += sizeof(struct icmp6hdr);
}
return bond_flow_ip(skb, fk, &noff, &proto, l34);
return bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34);
}
return true;
@ -3736,6 +3817,36 @@ static u32 bond_ip_hash(u32 hash, struct flow_keys *flow)
return hash >> 1;
}
/* Generate hash based on xmit policy. If @skb is given it is used to linearize
* the data as required, but this function can be used without it if the data is
* known to be linear (e.g. with xdp_buff).
*/
static u32 __bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, const void *data,
__be16 l2_proto, int mhoff, int nhoff, int hlen)
{
struct flow_keys flow;
u32 hash;
if (bond->params.xmit_policy == BOND_XMIT_POLICY_VLAN_SRCMAC)
return bond_vlan_srcmac_hash(skb, data, mhoff, hlen);
if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
!bond_flow_dissect(bond, skb, data, l2_proto, nhoff, hlen, &flow))
return bond_eth_hash(skb, data, mhoff, hlen);
if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 ||
bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) {
hash = bond_eth_hash(skb, data, mhoff, hlen);
} else {
if (flow.icmp.id)
memcpy(&hash, &flow.icmp, sizeof(hash));
else
memcpy(&hash, &flow.ports.ports, sizeof(hash));
}
return bond_ip_hash(hash, &flow);
}
/**
* bond_xmit_hash - generate a hash value based on the xmit policy
* @bond: bonding device
@ -3746,31 +3857,33 @@ static u32 bond_ip_hash(u32 hash, struct flow_keys *flow)
*/
u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
{
struct flow_keys flow;
u32 hash;
if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 &&
skb->l4_hash)
return skb->hash;
if (bond->params.xmit_policy == BOND_XMIT_POLICY_VLAN_SRCMAC)
return bond_vlan_srcmac_hash(skb);
return __bond_xmit_hash(bond, skb, skb->head, skb->protocol,
skb->mac_header, skb->network_header,
skb_headlen(skb));
}
if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
!bond_flow_dissect(bond, skb, &flow))
return bond_eth_hash(skb);
/**
* bond_xmit_hash_xdp - generate a hash value based on the xmit policy
* @bond: bonding device
* @xdp: buffer to use for headers
*
* The XDP variant of bond_xmit_hash.
*/
static u32 bond_xmit_hash_xdp(struct bonding *bond, struct xdp_buff *xdp)
{
struct ethhdr *eth;
if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 ||
bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) {
hash = bond_eth_hash(skb);
} else {
if (flow.icmp.id)
memcpy(&hash, &flow.icmp, sizeof(hash));
else
memcpy(&hash, &flow.ports.ports, sizeof(hash));
}
if (xdp->data + sizeof(struct ethhdr) > xdp->data_end)
return 0;
return bond_ip_hash(hash, &flow);
eth = (struct ethhdr *)xdp->data;
return __bond_xmit_hash(bond, NULL, xdp->data, eth->h_proto, 0,
sizeof(struct ethhdr), xdp->data_end - xdp->data);
}
/*-------------------------- Device entry points ----------------------------*/
@ -4421,6 +4534,47 @@ non_igmp:
return NULL;
}
static struct slave *bond_xdp_xmit_roundrobin_slave_get(struct bonding *bond,
struct xdp_buff *xdp)
{
struct slave *slave;
int slave_cnt;
u32 slave_id;
const struct ethhdr *eth;
void *data = xdp->data;
if (data + sizeof(struct ethhdr) > xdp->data_end)
goto non_igmp;
eth = (struct ethhdr *)data;
data += sizeof(struct ethhdr);
/* See comment on IGMP in bond_xmit_roundrobin_slave_get() */
if (eth->h_proto == htons(ETH_P_IP)) {
const struct iphdr *iph;
if (data + sizeof(struct iphdr) > xdp->data_end)
goto non_igmp;
iph = (struct iphdr *)data;
if (iph->protocol == IPPROTO_IGMP) {
slave = rcu_dereference(bond->curr_active_slave);
if (slave)
return slave;
return bond_get_slave_by_id(bond, 0);
}
}
non_igmp:
slave_cnt = READ_ONCE(bond->slave_cnt);
if (likely(slave_cnt)) {
slave_id = bond_rr_gen_slave_id(bond) % slave_cnt;
return bond_get_slave_by_id(bond, slave_id);
}
return NULL;
}
static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb,
struct net_device *bond_dev)
{
@ -4434,8 +4588,7 @@ static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb,
return bond_tx_drop(bond_dev, skb);
}
static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond,
struct sk_buff *skb)
static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond)
{
return rcu_dereference(bond->curr_active_slave);
}
@ -4449,7 +4602,7 @@ static netdev_tx_t bond_xmit_activebackup(struct sk_buff *skb,
struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave;
slave = bond_xmit_activebackup_slave_get(bond, skb);
slave = bond_xmit_activebackup_slave_get(bond);
if (slave)
return bond_dev_queue_xmit(bond, skb, slave->dev);
@ -4637,6 +4790,22 @@ static struct slave *bond_xmit_3ad_xor_slave_get(struct bonding *bond,
return slave;
}
static struct slave *bond_xdp_xmit_3ad_xor_slave_get(struct bonding *bond,
struct xdp_buff *xdp)
{
struct bond_up_slave *slaves;
unsigned int count;
u32 hash;
hash = bond_xmit_hash_xdp(bond, xdp);
slaves = rcu_dereference(bond->usable_slaves);
count = slaves ? READ_ONCE(slaves->count) : 0;
if (unlikely(!count))
return NULL;
return slaves->arr[hash % count];
}
/* Use this Xmit function for 3AD as well as XOR modes. The current
* usable slave array is formed in the control path. The xmit function
* just calculates hash and sends the packet out.
@ -4747,7 +4916,7 @@ static struct net_device *bond_xmit_get_slave(struct net_device *master_dev,
slave = bond_xmit_roundrobin_slave_get(bond, skb);
break;
case BOND_MODE_ACTIVEBACKUP:
slave = bond_xmit_activebackup_slave_get(bond, skb);
slave = bond_xmit_activebackup_slave_get(bond);
break;
case BOND_MODE_8023AD:
case BOND_MODE_XOR:
@ -4921,6 +5090,174 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
return ret;
}
static struct net_device *
bond_xdp_get_xmit_slave(struct net_device *bond_dev, struct xdp_buff *xdp)
{
struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave;
/* Caller needs to hold rcu_read_lock() */
switch (BOND_MODE(bond)) {
case BOND_MODE_ROUNDROBIN:
slave = bond_xdp_xmit_roundrobin_slave_get(bond, xdp);
break;
case BOND_MODE_ACTIVEBACKUP:
slave = bond_xmit_activebackup_slave_get(bond);
break;
case BOND_MODE_8023AD:
case BOND_MODE_XOR:
slave = bond_xdp_xmit_3ad_xor_slave_get(bond, xdp);
break;
default:
/* Should never happen. Mode guarded by bond_xdp_check() */
netdev_err(bond_dev, "Unknown bonding mode %d for xdp xmit\n", BOND_MODE(bond));
WARN_ON_ONCE(1);
return NULL;
}
if (slave)
return slave->dev;
return NULL;
}
static int bond_xdp_xmit(struct net_device *bond_dev,
int n, struct xdp_frame **frames, u32 flags)
{
int nxmit, err = -ENXIO;
rcu_read_lock();
for (nxmit = 0; nxmit < n; nxmit++) {
struct xdp_frame *frame = frames[nxmit];
struct xdp_frame *frames1[] = {frame};
struct net_device *slave_dev;
struct xdp_buff xdp;
xdp_convert_frame_to_buff(frame, &xdp);
slave_dev = bond_xdp_get_xmit_slave(bond_dev, &xdp);
if (!slave_dev) {
err = -ENXIO;
break;
}
err = slave_dev->netdev_ops->ndo_xdp_xmit(slave_dev, 1, frames1, flags);
if (err < 1)
break;
}
rcu_read_unlock();
/* If error happened on the first frame then we can pass the error up, otherwise
* report the number of frames that were xmitted.
*/
if (err < 0)
return (nxmit == 0 ? err : nxmit);
return nxmit;
}
static int bond_xdp_set(struct net_device *dev, struct bpf_prog *prog,
struct netlink_ext_ack *extack)
{
struct bonding *bond = netdev_priv(dev);
struct list_head *iter;
struct slave *slave, *rollback_slave;
struct bpf_prog *old_prog;
struct netdev_bpf xdp = {
.command = XDP_SETUP_PROG,
.flags = 0,
.prog = prog,
.extack = extack,
};
int err;
ASSERT_RTNL();
if (!bond_xdp_check(bond))
return -EOPNOTSUPP;
old_prog = bond->xdp_prog;
bond->xdp_prog = prog;
bond_for_each_slave(bond, slave, iter) {
struct net_device *slave_dev = slave->dev;
if (!slave_dev->netdev_ops->ndo_bpf ||
!slave_dev->netdev_ops->ndo_xdp_xmit) {
NL_SET_ERR_MSG(extack, "Slave device does not support XDP");
slave_err(dev, slave_dev, "Slave does not support XDP\n");
err = -EOPNOTSUPP;
goto err;
}
if (dev_xdp_prog_count(slave_dev) > 0) {
NL_SET_ERR_MSG(extack,
"Slave has XDP program loaded, please unload before enslaving");
slave_err(dev, slave_dev,
"Slave has XDP program loaded, please unload before enslaving\n");
err = -EOPNOTSUPP;
goto err;
}
err = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
if (err < 0) {
/* ndo_bpf() sets extack error message */
slave_err(dev, slave_dev, "Error %d calling ndo_bpf\n", err);
goto err;
}
if (prog)
bpf_prog_inc(prog);
}
if (old_prog)
bpf_prog_put(old_prog);
if (prog)
static_branch_inc(&bpf_master_redirect_enabled_key);
else
static_branch_dec(&bpf_master_redirect_enabled_key);
return 0;
err:
/* unwind the program changes */
bond->xdp_prog = old_prog;
xdp.prog = old_prog;
xdp.extack = NULL; /* do not overwrite original error */
bond_for_each_slave(bond, rollback_slave, iter) {
struct net_device *slave_dev = rollback_slave->dev;
int err_unwind;
if (slave == rollback_slave)
break;
err_unwind = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
if (err_unwind < 0)
slave_err(dev, slave_dev,
"Error %d when unwinding XDP program change\n", err_unwind);
else if (xdp.prog)
bpf_prog_inc(xdp.prog);
}
return err;
}
static int bond_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
switch (xdp->command) {
case XDP_SETUP_PROG:
return bond_xdp_set(dev, xdp->prog, xdp->extack);
default:
return -EINVAL;
}
}
static u32 bond_mode_bcast_speed(struct slave *slave, u32 speed)
{
if (speed == 0 || speed == SPEED_UNKNOWN)
@ -5009,6 +5346,9 @@ static const struct net_device_ops bond_netdev_ops = {
.ndo_features_check = passthru_features_check,
.ndo_get_xmit_slave = bond_xmit_get_slave,
.ndo_sk_get_lower_dev = bond_sk_get_lower_dev,
.ndo_bpf = bond_xdp,
.ndo_xdp_xmit = bond_xdp_xmit,
.ndo_xdp_get_xmit_slave = bond_xdp_get_xmit_slave,
};
static const struct device_type bond_type = {

View File

@ -776,6 +776,10 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
DECLARE_BPF_DISPATCHER(xdp)
DECLARE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);
u32 xdp_master_redirect(struct xdp_buff *xdp);
static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
struct xdp_buff *xdp)
{
@ -783,7 +787,14 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
* under local_bh_disable(), which provides the needed RCU protection
* for accessing map entries.
*/
return __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
u32 act = __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) {
if (act == XDP_TX && netif_is_bond_slave(xdp->rxq->dev))
act = xdp_master_redirect(xdp);
}
return act;
}
void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog);

View File

@ -1318,6 +1318,9 @@ struct netdev_net_notifier {
* that got dropped are freed/returned via xdp_return_frame().
* Returns negative number, means general error invoking ndo, meaning
* no frames were xmit'ed and core-caller will free all frames.
* struct net_device *(*ndo_xdp_get_xmit_slave)(struct net_device *dev,
* struct xdp_buff *xdp);
* Get the xmit slave of master device based on the xdp_buff.
* int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags);
* This function is used to wake up the softirq, ksoftirqd or kthread
* responsible for sending and/or receiving packets on a specific
@ -1545,6 +1548,8 @@ struct net_device_ops {
int (*ndo_xdp_xmit)(struct net_device *dev, int n,
struct xdp_frame **xdp,
u32 flags);
struct net_device * (*ndo_xdp_get_xmit_slave)(struct net_device *dev,
struct xdp_buff *xdp);
int (*ndo_xsk_wakeup)(struct net_device *dev,
u32 queue_id, u32 flags);
struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev);
@ -4076,6 +4081,7 @@ typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
int fd, int expected_fd, u32 flags);
int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
u8 dev_xdp_prog_count(struct net_device *dev);
u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode);
int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);

View File

@ -259,6 +259,7 @@ struct bonding {
/* protecting ipsec_list */
spinlock_t ipsec_lock;
#endif /* CONFIG_XFRM_OFFLOAD */
struct bpf_prog *xdp_prog;
};
#define bond_slave_get_rcu(dev) \

View File

@ -1562,7 +1562,7 @@ select_insn:
if (unlikely(index >= array->map.max_entries))
goto out;
if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT))
if (unlikely(tail_call_cnt >= MAX_TAIL_CALL_CNT))
goto out;
tail_call_cnt++;

View File

@ -534,10 +534,9 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
return __xdp_enqueue(dev, xdp, dev_rx, dst->xdp_prog);
}
static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp,
int exclude_ifindex)
static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp)
{
if (!obj || obj->dev->ifindex == exclude_ifindex ||
if (!obj ||
!obj->dev->netdev_ops->ndo_xdp_xmit)
return false;
@ -562,17 +561,48 @@ static int dev_map_enqueue_clone(struct bpf_dtab_netdev *obj,
return 0;
}
static inline bool is_ifindex_excluded(int *excluded, int num_excluded, int ifindex)
{
while (num_excluded--) {
if (ifindex == excluded[num_excluded])
return true;
}
return false;
}
/* Get ifindex of each upper device. 'indexes' must be able to hold at
* least MAX_NEST_DEV elements.
* Returns the number of ifindexes added.
*/
static int get_upper_ifindexes(struct net_device *dev, int *indexes)
{
struct net_device *upper;
struct list_head *iter;
int n = 0;
netdev_for_each_upper_dev_rcu(dev, upper, iter) {
indexes[n++] = upper->ifindex;
}
return n;
}
int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
struct bpf_map *map, bool exclude_ingress)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
int exclude_ifindex = exclude_ingress ? dev_rx->ifindex : 0;
struct bpf_dtab_netdev *dst, *last_dst = NULL;
int excluded_devices[1+MAX_NEST_DEV];
struct hlist_head *head;
struct xdp_frame *xdpf;
int num_excluded = 0;
unsigned int i;
int err;
if (exclude_ingress) {
num_excluded = get_upper_ifindexes(dev_rx, excluded_devices);
excluded_devices[num_excluded++] = dev_rx->ifindex;
}
xdpf = xdp_convert_buff_to_frame(xdp);
if (unlikely(!xdpf))
return -EOVERFLOW;
@ -581,7 +611,10 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
for (i = 0; i < map->max_entries; i++) {
dst = rcu_dereference_check(dtab->netdev_map[i],
rcu_read_lock_bh_held());
if (!is_valid_dst(dst, xdp, exclude_ifindex))
if (!is_valid_dst(dst, xdp))
continue;
if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex))
continue;
/* we only need n-1 clones; last_dst enqueued below */
@ -601,7 +634,11 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
head = dev_map_index_hash(dtab, i);
hlist_for_each_entry_rcu(dst, head, index_hlist,
lockdep_is_held(&dtab->index_lock)) {
if (!is_valid_dst(dst, xdp, exclude_ifindex))
if (!is_valid_dst(dst, xdp))
continue;
if (is_ifindex_excluded(excluded_devices, num_excluded,
dst->dev->ifindex))
continue;
/* we only need n-1 clones; last_dst enqueued below */
@ -675,18 +712,27 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
bool exclude_ingress)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
int exclude_ifindex = exclude_ingress ? dev->ifindex : 0;
struct bpf_dtab_netdev *dst, *last_dst = NULL;
int excluded_devices[1+MAX_NEST_DEV];
struct hlist_head *head;
struct hlist_node *next;
int num_excluded = 0;
unsigned int i;
int err;
if (exclude_ingress) {
num_excluded = get_upper_ifindexes(dev, excluded_devices);
excluded_devices[num_excluded++] = dev->ifindex;
}
if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
for (i = 0; i < map->max_entries; i++) {
dst = rcu_dereference_check(dtab->netdev_map[i],
rcu_read_lock_bh_held());
if (!dst || dst->dev->ifindex == exclude_ifindex)
if (!dst)
continue;
if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex))
continue;
/* we only need n-1 clones; last_dst enqueued below */
@ -700,12 +746,17 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
return err;
last_dst = dst;
}
} else { /* BPF_MAP_TYPE_DEVMAP_HASH */
for (i = 0; i < dtab->n_buckets; i++) {
head = dev_map_index_hash(dtab, i);
hlist_for_each_entry_safe(dst, next, head, index_hlist) {
if (!dst || dst->dev->ifindex == exclude_ifindex)
if (!dst)
continue;
if (is_ifindex_excluded(excluded_devices, num_excluded,
dst->dev->ifindex))
continue;
/* we only need n-1 clones; last_dst enqueued below */

File diff suppressed because it is too large Load Diff

View File

@ -763,8 +763,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
if (prog->expected_attach_type == BPF_XDP_DEVMAP ||
prog->expected_attach_type == BPF_XDP_CPUMAP)
return -EINVAL;
if (kattr->test.ctx_in || kattr->test.ctx_out)
return -EINVAL;
ctx = bpf_ctx_init(kattr, sizeof(struct xdp_md));
if (IS_ERR(ctx))
return PTR_ERR(ctx);

View File

@ -7532,7 +7532,7 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev,
{
struct netdev_adjacent *lower;
WARN_ON_ONCE(!rcu_read_lock_held());
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
@ -9297,7 +9297,7 @@ static struct bpf_prog *dev_xdp_prog(struct net_device *dev,
return dev->xdp_state[mode].prog;
}
static u8 dev_xdp_prog_count(struct net_device *dev)
u8 dev_xdp_prog_count(struct net_device *dev)
{
u8 count = 0;
int i;
@ -9307,6 +9307,7 @@ static u8 dev_xdp_prog_count(struct net_device *dev)
count++;
return count;
}
EXPORT_SYMBOL_GPL(dev_xdp_prog_count);
u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
{
@ -9400,6 +9401,8 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack
{
unsigned int num_modes = hweight32(flags & XDP_FLAGS_MODES);
struct bpf_prog *cur_prog;
struct net_device *upper;
struct list_head *iter;
enum bpf_xdp_mode mode;
bpf_op_t bpf_op;
int err;
@ -9438,6 +9441,14 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack
return -EBUSY;
}
/* don't allow if an upper device already has a program */
netdev_for_each_upper_dev_rcu(dev, upper, iter) {
if (dev_xdp_prog_count(upper) > 0) {
NL_SET_ERR_MSG(extack, "Cannot attach when an upper device already has a program");
return -EEXIST;
}
}
cur_prog = dev_xdp_prog(dev, mode);
/* can't replace attached prog with link */
if (link && cur_prog) {

View File

@ -3933,6 +3933,31 @@ void bpf_clear_redirect_map(struct bpf_map *map)
}
}
DEFINE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);
EXPORT_SYMBOL_GPL(bpf_master_redirect_enabled_key);
u32 xdp_master_redirect(struct xdp_buff *xdp)
{
struct net_device *master, *slave;
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
master = netdev_master_upper_dev_get_rcu(xdp->rxq->dev);
slave = master->netdev_ops->ndo_xdp_get_xmit_slave(master, xdp);
if (slave && slave != xdp->rxq->dev) {
/* The target device is different from the receiving device, so
* redirect it to the new device.
* Using XDP_REDIRECT gets the correct behaviour from XDP enabled
* drivers to unmap the packet from their rx ring.
*/
ri->tgt_index = slave->ifindex;
ri->map_id = INT_MAX;
ri->map_type = BPF_MAP_TYPE_UNSPEC;
return XDP_REDIRECT;
}
return XDP_TX;
}
EXPORT_SYMBOL_GPL(xdp_master_redirect);
int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{

View File

@ -105,6 +105,9 @@ static void unix_bpf_check_needs_rebuild(struct proto *ops)
int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
{
if (sk->sk_type != SOCK_DGRAM)
return -EOPNOTSUPP;
if (restore) {
sk->sk_write_space = psock->saved_write_space;
WRITE_ONCE(sk->sk_prot, psock->sk_proto);

View File

@ -57,6 +57,7 @@ int xdp_prog1(struct xdp_md *ctx)
h_proto = eth->h_proto;
/* Handle VLAN tagged packet */
if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
struct vlan_hdr *vhdr;
@ -66,6 +67,7 @@ int xdp_prog1(struct xdp_md *ctx)
return rc;
h_proto = vhdr->h_vlan_encapsulated_proto;
}
/* Handle double VLAN tagged packet */
if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
struct vlan_hdr *vhdr;

View File

@ -73,6 +73,7 @@ int xdp_prog1(struct xdp_md *ctx)
h_proto = eth->h_proto;
/* Handle VLAN tagged packet */
if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
struct vlan_hdr *vhdr;
@ -82,6 +83,7 @@ int xdp_prog1(struct xdp_md *ctx)
return rc;
h_proto = vhdr->h_vlan_encapsulated_proto;
}
/* Handle double VLAN tagged packet */
if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
struct vlan_hdr *vhdr;

View File

@ -841,7 +841,7 @@ int main(int argc, char **argv)
memset(cpu, 0, n_cpus * sizeof(int));
/* Parse commands line args */
while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:",
while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:n",
long_options, &longindex)) != -1) {
switch (opt) {
case 'd':

View File

@ -1,12 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2017 - 2018 Intel Corporation. */
#include <asm/barrier.h>
#include <errno.h>
#include <getopt.h>
#include <libgen.h>
#include <linux/bpf.h>
#include <linux/compiler.h>
#include <linux/if_link.h>
#include <linux/if_xdp.h>
#include <linux/if_ether.h>
@ -653,17 +651,15 @@ out:
return result;
}
__sum16 ip_fast_csum(const void *iph, unsigned int ihl);
/*
* This is a version of ip_compute_csum() optimized for IP headers,
* which always checksum on 4 octet boundaries.
* This function code has been taken from
* Linux kernel lib/checksum.c
*/
__sum16 ip_fast_csum(const void *iph, unsigned int ihl)
static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
{
return (__force __sum16)~do_csum(iph, ihl * 4);
return (__sum16)~do_csum(iph, ihl * 4);
}
/*
@ -673,11 +669,11 @@ __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
*/
static inline __sum16 csum_fold(__wsum csum)
{
u32 sum = (__force u32)csum;
u32 sum = (u32)csum;
sum = (sum & 0xffff) + (sum >> 16);
sum = (sum & 0xffff) + (sum >> 16);
return (__force __sum16)~sum;
return (__sum16)~sum;
}
/*
@ -703,16 +699,16 @@ __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
__u32 len, __u8 proto, __wsum sum)
{
unsigned long long s = (__force u32)sum;
unsigned long long s = (u32)sum;
s += (__force u32)saddr;
s += (__force u32)daddr;
s += (u32)saddr;
s += (u32)daddr;
#ifdef __BIG_ENDIAN__
s += proto + len;
#else
s += (proto + len) << 8;
#endif
return (__force __wsum)from64to32(s);
return (__wsum)from64to32(s);
}
/*

View File

@ -23,7 +23,6 @@ test_skb_cgroup_id_user
test_cgroup_storage
test_flow_dissector
flow_dissector_load
test_netcnt
test_tcpnotify_user
test_libbpf
test_tcp_check_syncookie_user

View File

@ -38,7 +38,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
test_verifier_log test_dev_cgroup \
test_sock test_sockmap get_cgroup_id_user \
test_cgroup_storage \
test_netcnt test_tcpnotify_user test_sysctl \
test_tcpnotify_user test_sysctl \
test_progs-no_alu32
# Also test bpf-gcc, if present
@ -197,7 +197,6 @@ $(OUTPUT)/test_sockmap: cgroup_helpers.c
$(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c
$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
$(OUTPUT)/test_netcnt: cgroup_helpers.c
$(OUTPUT)/test_sock_fields: cgroup_helpers.c
$(OUTPUT)/test_sysctl: cgroup_helpers.c

View File

@ -310,3 +310,15 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
}
return -1;
}
char *ping_command(int family)
{
if (family == AF_INET6) {
/* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */
if (!system("which ping6 >/dev/null 2>&1"))
return "ping6";
else
return "ping -6";
}
return "ping";
}

View File

@ -46,5 +46,6 @@ int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
int timeout_ms);
int make_sockaddr(int family, const char *addr_str, __u16 port,
struct sockaddr_storage *addr, socklen_t *len);
char *ping_command(int family);
#endif

View File

@ -0,0 +1,82 @@
// SPDX-License-Identifier: GPL-2.0
#include <sys/sysinfo.h>
#include <test_progs.h>
#include "network_helpers.h"
#include "netcnt_prog.skel.h"
#include "netcnt_common.h"
#define CG_NAME "/netcnt"
void test_netcnt(void)
{
union percpu_net_cnt *percpu_netcnt = NULL;
struct bpf_cgroup_storage_key key;
int map_fd, percpu_map_fd;
struct netcnt_prog *skel;
unsigned long packets;
union net_cnt netcnt;
unsigned long bytes;
int cpu, nproc;
int cg_fd = -1;
char cmd[128];
skel = netcnt_prog__open_and_load();
if (!ASSERT_OK_PTR(skel, "netcnt_prog__open_and_load"))
return;
nproc = get_nprocs_conf();
percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
if (!ASSERT_OK_PTR(percpu_netcnt, "malloc(percpu_netcnt)"))
goto err;
cg_fd = test__join_cgroup(CG_NAME);
if (!ASSERT_GE(cg_fd, 0, "test__join_cgroup"))
goto err;
skel->links.bpf_nextcnt = bpf_program__attach_cgroup(skel->progs.bpf_nextcnt, cg_fd);
if (!ASSERT_OK_PTR(skel->links.bpf_nextcnt,
"attach_cgroup(bpf_nextcnt)"))
goto err;
snprintf(cmd, sizeof(cmd), "%s ::1 -A -c 10000 -q > /dev/null", ping_command(AF_INET6));
ASSERT_OK(system(cmd), cmd);
map_fd = bpf_map__fd(skel->maps.netcnt);
if (!ASSERT_OK(bpf_map_get_next_key(map_fd, NULL, &key), "bpf_map_get_next_key"))
goto err;
if (!ASSERT_OK(bpf_map_lookup_elem(map_fd, &key, &netcnt), "bpf_map_lookup_elem(netcnt)"))
goto err;
percpu_map_fd = bpf_map__fd(skel->maps.percpu_netcnt);
if (!ASSERT_OK(bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0]),
"bpf_map_lookup_elem(percpu_netcnt)"))
goto err;
/* Some packets can be still in per-cpu cache, but not more than
* MAX_PERCPU_PACKETS.
*/
packets = netcnt.packets;
bytes = netcnt.bytes;
for (cpu = 0; cpu < nproc; cpu++) {
ASSERT_LE(percpu_netcnt[cpu].packets, MAX_PERCPU_PACKETS, "MAX_PERCPU_PACKETS");
packets += percpu_netcnt[cpu].packets;
bytes += percpu_netcnt[cpu].bytes;
}
/* No packets should be lost */
ASSERT_EQ(packets, 10000, "packets");
/* Let's check that bytes counter matches the number of packets
* multiplied by the size of ipv6 ICMP packet.
*/
ASSERT_EQ(bytes, packets * 104, "bytes");
err:
if (cg_fd != -1)
close(cg_fd);
free(percpu_netcnt);
netcnt_prog__destroy(skel);
}

View File

@ -34,8 +34,8 @@ void test_reference_tracking(void)
if (!test__start_subtest(title))
continue;
/* Expect verifier failure if test name has 'fail' */
if (strstr(title, "fail") != NULL) {
/* Expect verifier failure if test name has 'err' */
if (strstr(title, "err_") != NULL) {
libbpf_print_fn_t old_print_fn;
old_print_fn = libbpf_set_print(NULL);

View File

@ -390,18 +390,6 @@ done:
close(client_fd);
}
static char *ping_command(int family)
{
if (family == AF_INET6) {
/* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */
if (!system("which ping6 >/dev/null 2>&1"))
return "ping6";
else
return "ping -6";
}
return "ping";
}
static int test_ping(int family, const char *addr)
{
SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr);

View File

@ -0,0 +1,520 @@
// SPDX-License-Identifier: GPL-2.0
/**
* Test XDP bonding support
*
* Sets up two bonded veth pairs between two fresh namespaces
* and verifies that XDP_TX program loaded on a bond device
* are correctly loaded onto the slave devices and XDP_TX'd
* packets are balanced using bonding.
*/
#define _GNU_SOURCE
#include <sched.h>
#include <net/if.h>
#include <linux/if_link.h>
#include "test_progs.h"
#include "network_helpers.h"
#include <linux/if_bonding.h>
#include <linux/limits.h>
#include <linux/udp.h>
#include "xdp_dummy.skel.h"
#include "xdp_redirect_multi_kern.skel.h"
#include "xdp_tx.skel.h"
#define BOND1_MAC {0x00, 0x11, 0x22, 0x33, 0x44, 0x55}
#define BOND1_MAC_STR "00:11:22:33:44:55"
#define BOND2_MAC {0x00, 0x22, 0x33, 0x44, 0x55, 0x66}
#define BOND2_MAC_STR "00:22:33:44:55:66"
#define NPACKETS 100
static int root_netns_fd = -1;
static void restore_root_netns(void)
{
ASSERT_OK(setns(root_netns_fd, CLONE_NEWNET), "restore_root_netns");
}
static int setns_by_name(char *name)
{
int nsfd, err;
char nspath[PATH_MAX];
snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
if (nsfd < 0)
return -1;
err = setns(nsfd, CLONE_NEWNET);
close(nsfd);
return err;
}
static int get_rx_packets(const char *iface)
{
FILE *f;
char line[512];
int iface_len = strlen(iface);
f = fopen("/proc/net/dev", "r");
if (!f)
return -1;
while (fgets(line, sizeof(line), f)) {
char *p = line;
while (*p == ' ')
p++; /* skip whitespace */
if (!strncmp(p, iface, iface_len)) {
p += iface_len;
if (*p++ != ':')
continue;
while (*p == ' ')
p++; /* skip whitespace */
while (*p && *p != ' ')
p++; /* skip rx bytes */
while (*p == ' ')
p++; /* skip whitespace */
fclose(f);
return atoi(p);
}
}
fclose(f);
return -1;
}
#define MAX_BPF_LINKS 8
struct skeletons {
struct xdp_dummy *xdp_dummy;
struct xdp_tx *xdp_tx;
struct xdp_redirect_multi_kern *xdp_redirect_multi_kern;
int nlinks;
struct bpf_link *links[MAX_BPF_LINKS];
};
static int xdp_attach(struct skeletons *skeletons, struct bpf_program *prog, char *iface)
{
struct bpf_link *link;
int ifindex;
ifindex = if_nametoindex(iface);
if (!ASSERT_GT(ifindex, 0, "get ifindex"))
return -1;
if (!ASSERT_LE(skeletons->nlinks+1, MAX_BPF_LINKS, "too many XDP programs attached"))
return -1;
link = bpf_program__attach_xdp(prog, ifindex);
if (!ASSERT_OK_PTR(link, "attach xdp program"))
return -1;
skeletons->links[skeletons->nlinks++] = link;
return 0;
}
enum {
BOND_ONE_NO_ATTACH = 0,
BOND_BOTH_AND_ATTACH,
};
static const char * const mode_names[] = {
[BOND_MODE_ROUNDROBIN] = "balance-rr",
[BOND_MODE_ACTIVEBACKUP] = "active-backup",
[BOND_MODE_XOR] = "balance-xor",
[BOND_MODE_BROADCAST] = "broadcast",
[BOND_MODE_8023AD] = "802.3ad",
[BOND_MODE_TLB] = "balance-tlb",
[BOND_MODE_ALB] = "balance-alb",
};
static const char * const xmit_policy_names[] = {
[BOND_XMIT_POLICY_LAYER2] = "layer2",
[BOND_XMIT_POLICY_LAYER34] = "layer3+4",
[BOND_XMIT_POLICY_LAYER23] = "layer2+3",
[BOND_XMIT_POLICY_ENCAP23] = "encap2+3",
[BOND_XMIT_POLICY_ENCAP34] = "encap3+4",
};
static int bonding_setup(struct skeletons *skeletons, int mode, int xmit_policy,
int bond_both_attach)
{
#define SYS(fmt, ...) \
({ \
char cmd[1024]; \
snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
if (!ASSERT_OK(system(cmd), cmd)) \
return -1; \
})
SYS("ip netns add ns_dst");
SYS("ip link add veth1_1 type veth peer name veth2_1 netns ns_dst");
SYS("ip link add veth1_2 type veth peer name veth2_2 netns ns_dst");
SYS("ip link add bond1 type bond mode %s xmit_hash_policy %s",
mode_names[mode], xmit_policy_names[xmit_policy]);
SYS("ip link set bond1 up address " BOND1_MAC_STR " addrgenmode none");
SYS("ip -netns ns_dst link add bond2 type bond mode %s xmit_hash_policy %s",
mode_names[mode], xmit_policy_names[xmit_policy]);
SYS("ip -netns ns_dst link set bond2 up address " BOND2_MAC_STR " addrgenmode none");
SYS("ip link set veth1_1 master bond1");
if (bond_both_attach == BOND_BOTH_AND_ATTACH) {
SYS("ip link set veth1_2 master bond1");
} else {
SYS("ip link set veth1_2 up addrgenmode none");
if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "veth1_2"))
return -1;
}
SYS("ip -netns ns_dst link set veth2_1 master bond2");
if (bond_both_attach == BOND_BOTH_AND_ATTACH)
SYS("ip -netns ns_dst link set veth2_2 master bond2");
else
SYS("ip -netns ns_dst link set veth2_2 up addrgenmode none");
/* Load a dummy program on sending side as with veth peer needs to have a
* XDP program loaded as well.
*/
if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "bond1"))
return -1;
if (bond_both_attach == BOND_BOTH_AND_ATTACH) {
if (!ASSERT_OK(setns_by_name("ns_dst"), "set netns to ns_dst"))
return -1;
if (xdp_attach(skeletons, skeletons->xdp_tx->progs.xdp_tx, "bond2"))
return -1;
restore_root_netns();
}
return 0;
#undef SYS
}
static void bonding_cleanup(struct skeletons *skeletons)
{
restore_root_netns();
while (skeletons->nlinks) {
skeletons->nlinks--;
bpf_link__destroy(skeletons->links[skeletons->nlinks]);
}
ASSERT_OK(system("ip link delete bond1"), "delete bond1");
ASSERT_OK(system("ip link delete veth1_1"), "delete veth1_1");
ASSERT_OK(system("ip link delete veth1_2"), "delete veth1_2");
ASSERT_OK(system("ip netns delete ns_dst"), "delete ns_dst");
}
static int send_udp_packets(int vary_dst_ip)
{
struct ethhdr eh = {
.h_source = BOND1_MAC,
.h_dest = BOND2_MAC,
.h_proto = htons(ETH_P_IP),
};
uint8_t buf[128] = {};
struct iphdr *iph = (struct iphdr *)(buf + sizeof(eh));
struct udphdr *uh = (struct udphdr *)(buf + sizeof(eh) + sizeof(*iph));
int i, s = -1;
int ifindex;
s = socket(AF_PACKET, SOCK_RAW, IPPROTO_RAW);
if (!ASSERT_GE(s, 0, "socket"))
goto err;
ifindex = if_nametoindex("bond1");
if (!ASSERT_GT(ifindex, 0, "get bond1 ifindex"))
goto err;
memcpy(buf, &eh, sizeof(eh));
iph->ihl = 5;
iph->version = 4;
iph->tos = 16;
iph->id = 1;
iph->ttl = 64;
iph->protocol = IPPROTO_UDP;
iph->saddr = 1;
iph->daddr = 2;
iph->tot_len = htons(sizeof(buf) - ETH_HLEN);
iph->check = 0;
for (i = 1; i <= NPACKETS; i++) {
int n;
struct sockaddr_ll saddr_ll = {
.sll_ifindex = ifindex,
.sll_halen = ETH_ALEN,
.sll_addr = BOND2_MAC,
};
/* vary the UDP destination port for even distribution with roundrobin/xor modes */
uh->dest++;
if (vary_dst_ip)
iph->daddr++;
n = sendto(s, buf, sizeof(buf), 0, (struct sockaddr *)&saddr_ll, sizeof(saddr_ll));
if (!ASSERT_EQ(n, sizeof(buf), "sendto"))
goto err;
}
return 0;
err:
if (s >= 0)
close(s);
return -1;
}
static void test_xdp_bonding_with_mode(struct skeletons *skeletons, int mode, int xmit_policy)
{
int bond1_rx;
if (bonding_setup(skeletons, mode, xmit_policy, BOND_BOTH_AND_ATTACH))
goto out;
if (send_udp_packets(xmit_policy != BOND_XMIT_POLICY_LAYER34))
goto out;
bond1_rx = get_rx_packets("bond1");
ASSERT_EQ(bond1_rx, NPACKETS, "expected more received packets");
switch (mode) {
case BOND_MODE_ROUNDROBIN:
case BOND_MODE_XOR: {
int veth1_rx = get_rx_packets("veth1_1");
int veth2_rx = get_rx_packets("veth1_2");
int diff = abs(veth1_rx - veth2_rx);
ASSERT_GE(veth1_rx + veth2_rx, NPACKETS, "expected more packets");
switch (xmit_policy) {
case BOND_XMIT_POLICY_LAYER2:
ASSERT_GE(diff, NPACKETS,
"expected packets on only one of the interfaces");
break;
case BOND_XMIT_POLICY_LAYER23:
case BOND_XMIT_POLICY_LAYER34:
ASSERT_LT(diff, NPACKETS/2,
"expected even distribution of packets");
break;
default:
PRINT_FAIL("Unimplemented xmit_policy=%d\n", xmit_policy);
break;
}
break;
}
case BOND_MODE_ACTIVEBACKUP: {
int veth1_rx = get_rx_packets("veth1_1");
int veth2_rx = get_rx_packets("veth1_2");
int diff = abs(veth1_rx - veth2_rx);
ASSERT_GE(diff, NPACKETS,
"expected packets on only one of the interfaces");
break;
}
default:
PRINT_FAIL("Unimplemented xmit_policy=%d\n", xmit_policy);
break;
}
out:
bonding_cleanup(skeletons);
}
/* Test the broadcast redirection using xdp_redirect_map_multi_prog and adding
* all the interfaces to it and checking that broadcasting won't send the packet
* to neither the ingress bond device (bond2) or its slave (veth2_1).
*/
static void test_xdp_bonding_redirect_multi(struct skeletons *skeletons)
{
static const char * const ifaces[] = {"bond2", "veth2_1", "veth2_2"};
int veth1_1_rx, veth1_2_rx;
int err;
if (bonding_setup(skeletons, BOND_MODE_ROUNDROBIN, BOND_XMIT_POLICY_LAYER23,
BOND_ONE_NO_ATTACH))
goto out;
if (!ASSERT_OK(setns_by_name("ns_dst"), "could not set netns to ns_dst"))
goto out;
/* populate the devmap with the relevant interfaces */
for (int i = 0; i < ARRAY_SIZE(ifaces); i++) {
int ifindex = if_nametoindex(ifaces[i]);
int map_fd = bpf_map__fd(skeletons->xdp_redirect_multi_kern->maps.map_all);
if (!ASSERT_GT(ifindex, 0, "could not get interface index"))
goto out;
err = bpf_map_update_elem(map_fd, &ifindex, &ifindex, 0);
if (!ASSERT_OK(err, "add interface to map_all"))
goto out;
}
if (xdp_attach(skeletons,
skeletons->xdp_redirect_multi_kern->progs.xdp_redirect_map_multi_prog,
"bond2"))
goto out;
restore_root_netns();
if (send_udp_packets(BOND_MODE_ROUNDROBIN))
goto out;
veth1_1_rx = get_rx_packets("veth1_1");
veth1_2_rx = get_rx_packets("veth1_2");
ASSERT_EQ(veth1_1_rx, 0, "expected no packets on veth1_1");
ASSERT_GE(veth1_2_rx, NPACKETS, "expected packets on veth1_2");
out:
restore_root_netns();
bonding_cleanup(skeletons);
}
/* Test that XDP programs cannot be attached to both the bond master and slaves simultaneously */
static void test_xdp_bonding_attach(struct skeletons *skeletons)
{
struct bpf_link *link = NULL;
struct bpf_link *link2 = NULL;
int veth, bond;
int err;
if (!ASSERT_OK(system("ip link add veth type veth"), "add veth"))
goto out;
if (!ASSERT_OK(system("ip link add bond type bond"), "add bond"))
goto out;
veth = if_nametoindex("veth");
if (!ASSERT_GE(veth, 0, "if_nametoindex veth"))
goto out;
bond = if_nametoindex("bond");
if (!ASSERT_GE(bond, 0, "if_nametoindex bond"))
goto out;
/* enslaving with a XDP program loaded fails */
link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth);
if (!ASSERT_OK_PTR(link, "attach program to veth"))
goto out;
err = system("ip link set veth master bond");
if (!ASSERT_NEQ(err, 0, "attaching slave with xdp program expected to fail"))
goto out;
bpf_link__destroy(link);
link = NULL;
err = system("ip link set veth master bond");
if (!ASSERT_OK(err, "set veth master"))
goto out;
/* attaching to slave when master has no program is allowed */
link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth);
if (!ASSERT_OK_PTR(link, "attach program to slave when enslaved"))
goto out;
/* attaching to master not allowed when slave has program loaded */
link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
if (!ASSERT_ERR_PTR(link2, "attach program to master when slave has program"))
goto out;
bpf_link__destroy(link);
link = NULL;
/* attaching XDP program to master allowed when slave has no program */
link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
if (!ASSERT_OK_PTR(link, "attach program to master"))
goto out;
/* attaching to slave not allowed when master has program loaded */
link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
ASSERT_ERR_PTR(link2, "attach program to slave when master has program");
out:
bpf_link__destroy(link);
bpf_link__destroy(link2);
system("ip link del veth");
system("ip link del bond");
}
static int libbpf_debug_print(enum libbpf_print_level level,
const char *format, va_list args)
{
if (level != LIBBPF_WARN)
vprintf(format, args);
return 0;
}
struct bond_test_case {
char *name;
int mode;
int xmit_policy;
};
static struct bond_test_case bond_test_cases[] = {
{ "xdp_bonding_roundrobin", BOND_MODE_ROUNDROBIN, BOND_XMIT_POLICY_LAYER23, },
{ "xdp_bonding_activebackup", BOND_MODE_ACTIVEBACKUP, BOND_XMIT_POLICY_LAYER23 },
{ "xdp_bonding_xor_layer2", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER2, },
{ "xdp_bonding_xor_layer23", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER23, },
{ "xdp_bonding_xor_layer34", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER34, },
};
void test_xdp_bonding(void)
{
libbpf_print_fn_t old_print_fn;
struct skeletons skeletons = {};
int i;
old_print_fn = libbpf_set_print(libbpf_debug_print);
root_netns_fd = open("/proc/self/ns/net", O_RDONLY);
if (!ASSERT_GE(root_netns_fd, 0, "open /proc/self/ns/net"))
goto out;
skeletons.xdp_dummy = xdp_dummy__open_and_load();
if (!ASSERT_OK_PTR(skeletons.xdp_dummy, "xdp_dummy__open_and_load"))
goto out;
skeletons.xdp_tx = xdp_tx__open_and_load();
if (!ASSERT_OK_PTR(skeletons.xdp_tx, "xdp_tx__open_and_load"))
goto out;
skeletons.xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load();
if (!ASSERT_OK_PTR(skeletons.xdp_redirect_multi_kern,
"xdp_redirect_multi_kern__open_and_load"))
goto out;
if (!test__start_subtest("xdp_bonding_attach"))
test_xdp_bonding_attach(&skeletons);
for (i = 0; i < ARRAY_SIZE(bond_test_cases); i++) {
struct bond_test_case *test_case = &bond_test_cases[i];
if (!test__start_subtest(test_case->name))
test_xdp_bonding_with_mode(
&skeletons,
test_case->mode,
test_case->xmit_policy);
}
if (!test__start_subtest("xdp_bonding_redirect_multi"))
test_xdp_bonding_redirect_multi(&skeletons);
out:
xdp_dummy__destroy(skeletons.xdp_dummy);
xdp_tx__destroy(skeletons.xdp_tx);
xdp_redirect_multi_kern__destroy(skeletons.xdp_redirect_multi_kern);
libbpf_set_print(old_print_fn);
if (root_netns_fd >= 0)
close(root_netns_fd);
}

View File

@ -121,7 +121,7 @@ static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp,
}
BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
seq_num, src, srcp, destp, destp);
seq_num, src, srcp, dest, destp);
BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ",
state,
tp->write_seq - tp->snd_una, rx_queue,

View File

@ -91,7 +91,7 @@ int bpf_sk_lookup_test1(struct __sk_buff *skb)
return 0;
}
SEC("classifier/fail_use_after_free")
SEC("classifier/err_use_after_free")
int bpf_sk_lookup_uaf(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@ -106,7 +106,7 @@ int bpf_sk_lookup_uaf(struct __sk_buff *skb)
return family;
}
SEC("classifier/fail_modify_sk_pointer")
SEC("classifier/err_modify_sk_pointer")
int bpf_sk_lookup_modptr(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@ -121,7 +121,7 @@ int bpf_sk_lookup_modptr(struct __sk_buff *skb)
return 0;
}
SEC("classifier/fail_modify_sk_or_null_pointer")
SEC("classifier/err_modify_sk_or_null_pointer")
int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@ -135,7 +135,7 @@ int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
return 0;
}
SEC("classifier/fail_no_release")
SEC("classifier/err_no_release")
int bpf_sk_lookup_test2(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@ -144,7 +144,7 @@ int bpf_sk_lookup_test2(struct __sk_buff *skb)
return 0;
}
SEC("classifier/fail_release_twice")
SEC("classifier/err_release_twice")
int bpf_sk_lookup_test3(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@ -156,7 +156,7 @@ int bpf_sk_lookup_test3(struct __sk_buff *skb)
return 0;
}
SEC("classifier/fail_release_unchecked")
SEC("classifier/err_release_unchecked")
int bpf_sk_lookup_test4(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@ -173,7 +173,7 @@ void lookup_no_release(struct __sk_buff *skb)
bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
}
SEC("classifier/fail_no_release_subcall")
SEC("classifier/err_no_release_subcall")
int bpf_sk_lookup_test5(struct __sk_buff *skb)
{
lookup_no_release(skb);

View File

@ -3,7 +3,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
SEC("tx")
SEC("xdp")
int xdp_tx(struct xdp_md *xdp)
{
return XDP_TX;

View File

@ -1,148 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <assert.h>
#include <sys/sysinfo.h>
#include <sys/time.h>
#include <linux/bpf.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include "cgroup_helpers.h"
#include "bpf_rlimit.h"
#include "netcnt_common.h"
#define BPF_PROG "./netcnt_prog.o"
#define TEST_CGROUP "/test-network-counters/"
static int bpf_find_map(const char *test, struct bpf_object *obj,
const char *name)
{
struct bpf_map *map;
map = bpf_object__find_map_by_name(obj, name);
if (!map) {
printf("%s:FAIL:map '%s' not found\n", test, name);
return -1;
}
return bpf_map__fd(map);
}
int main(int argc, char **argv)
{
union percpu_net_cnt *percpu_netcnt;
struct bpf_cgroup_storage_key key;
int map_fd, percpu_map_fd;
int error = EXIT_FAILURE;
struct bpf_object *obj;
int prog_fd, cgroup_fd;
unsigned long packets;
union net_cnt netcnt;
unsigned long bytes;
int cpu, nproc;
__u32 prog_cnt;
nproc = get_nprocs_conf();
percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
if (!percpu_netcnt) {
printf("Not enough memory for per-cpu area (%d cpus)\n", nproc);
goto err;
}
if (bpf_prog_load(BPF_PROG, BPF_PROG_TYPE_CGROUP_SKB,
&obj, &prog_fd)) {
printf("Failed to load bpf program\n");
goto out;
}
cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
if (cgroup_fd < 0)
goto err;
/* Attach bpf program */
if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
printf("Failed to attach bpf program");
goto err;
}
if (system("which ping6 &>/dev/null") == 0)
assert(!system("ping6 ::1 -c 10000 -f -q > /dev/null"));
else
assert(!system("ping -6 ::1 -c 10000 -f -q > /dev/null"));
if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL,
&prog_cnt)) {
printf("Failed to query attached programs");
goto err;
}
map_fd = bpf_find_map(__func__, obj, "netcnt");
if (map_fd < 0) {
printf("Failed to find bpf map with net counters");
goto err;
}
percpu_map_fd = bpf_find_map(__func__, obj, "percpu_netcnt");
if (percpu_map_fd < 0) {
printf("Failed to find bpf map with percpu net counters");
goto err;
}
if (bpf_map_get_next_key(map_fd, NULL, &key)) {
printf("Failed to get key in cgroup storage\n");
goto err;
}
if (bpf_map_lookup_elem(map_fd, &key, &netcnt)) {
printf("Failed to lookup cgroup storage\n");
goto err;
}
if (bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0])) {
printf("Failed to lookup percpu cgroup storage\n");
goto err;
}
/* Some packets can be still in per-cpu cache, but not more than
* MAX_PERCPU_PACKETS.
*/
packets = netcnt.packets;
bytes = netcnt.bytes;
for (cpu = 0; cpu < nproc; cpu++) {
if (percpu_netcnt[cpu].packets > MAX_PERCPU_PACKETS) {
printf("Unexpected percpu value: %llu\n",
percpu_netcnt[cpu].packets);
goto err;
}
packets += percpu_netcnt[cpu].packets;
bytes += percpu_netcnt[cpu].bytes;
}
/* No packets should be lost */
if (packets != 10000) {
printf("Unexpected packet count: %lu\n", packets);
goto err;
}
/* Let's check that bytes counter matches the number of packets
* multiplied by the size of ipv6 ICMP packet.
*/
if (bytes != packets * 104) {
printf("Unexpected bytes count: %lu\n", bytes);
goto err;
}
error = 0;
printf("test_netcnt:PASS\n");
err:
cleanup_cgroup_environment();
free(percpu_netcnt);
out:
return error;
}

View File

@ -108,7 +108,7 @@ ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1
ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2
ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp_dummy
ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec tx
ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec xdp
ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp_dummy
trap cleanup EXIT