Merge branch 'netdevsim-link'

David Wei says:

====================
netdevsim: link and forward skbs between ports

This patchset adds the ability to link two netdevsim ports together and
forward skbs between them, similar to veth. The goal is to use netdevsim
for testing features e.g. zero copy Rx using io_uring.

This feature was tested locally on QEMU, and a selftest is included.

I ran netdev selftests CI style and all tests but the following passed:
- gro.sh
- l2tp.sh
- ip_local_port_range.sh

gro.sh fails because virtme-ng mounts as read-only and it tries to write
to log.txt. This issue was reported to virtme-ng upstream.

l2tp.sh and ip_local_port_range.sh both fail for me on net-next/main as
well.

---
v13->v14:
- implement ndo_get_iflink()
- fix returning 0 if peer is already linked during linking or not linked
  during unlinking
- bump dropped counter if nsim_ipsec_tx() fails and generally reorder
  nsim_start_xmit()
- fix overflowing lines and indentations

v12->v13:
- wait for socat listening port to be ready before sending data in
  selftest

v11->v12:
- fix leaked netns refs
- fix rtnetlink.sh kci_test_ipsec_offload() selftest

v10->v11:
- add udevadm settle after creating netdevsims in selftest

v9->v10:
- fix not freeing skb when not there is no peer
- prevent possible id clashes in selftest
- cleanup selftest on error paths

v8->v9:
- switch to getting netns using fd rather than id
- prevent linking a netdevsim to itself
- update tests

v7->v8:
- fix not dereferencing RCU ptr using rcu_dereference()
- remove unused variables in selftest

v6->v7:
- change link syntax to netnsid:ifidx
- replace dev_get_by_index() with __dev_get_by_index()
- check for NULL peer when linking
- add a sysfs attribute for unlinking
- only update Tx stats if not dropped
- update selftest

v5->v6:
- reworked to link two netdevsims using sysfs attribute on the bus
  device instead of debugfs due to deadlock possibility if a netdevsim
  is removed during linking
- removed unnecessary patch maintaining a list of probed nsim_devs
- updated selftest

v4->v5:
- reduce nsim_dev_list_lock critical section
- fixed missing mutex unlock during unwind ladder
- rework nsim_dev_peer_write synchronization to take devlink lock as
  well as rtnl_lock
- return err msgs to user during linking if port doesn't exist or
  linking to self
- update tx stats outside of RCU lock

v3->v4:
- maintain a mutex protected list of probed nsim_devs instead of using
  nsim_bus_dev
- fixed synchronization issues by taking rtnl_lock
- track tx_dropped skbs

v2->v3:
- take lock when traversing nsim_bus_dev_list
- take device ref when getting a nsim_bus_dev
- return 0 if nsim_dev_peer_read cannot find the port
- address code formatting
- do not hard code values in selftests
- add Makefile for selftests

v1->v2:
- renamed debugfs file from "link" to "peer"
- replaced strstep() with sscanf() for consistency
- increased char[] buf sz to 22 for copying id + port from user
- added err msg w/ expected fmt when linking as a hint to user
- prevent linking port to itself
- protect peer ptr using RCU

====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2024-03-01 10:43:11 +00:00
commit 76f06cbd7b
6 changed files with 342 additions and 5 deletions

View file

@ -232,9 +232,154 @@ del_device_store(const struct bus_type *bus, const char *buf, size_t count)
}
static BUS_ATTR_WO(del_device);
static ssize_t link_device_store(const struct bus_type *bus, const char *buf, size_t count)
{
struct netdevsim *nsim_a, *nsim_b, *peer;
struct net_device *dev_a, *dev_b;
unsigned int ifidx_a, ifidx_b;
int netnsfd_a, netnsfd_b, err;
struct net *ns_a, *ns_b;
err = sscanf(buf, "%d:%u %d:%u", &netnsfd_a, &ifidx_a, &netnsfd_b,
&ifidx_b);
if (err != 4) {
pr_err("Format for linking two devices is \"netnsfd_a:ifidx_a netnsfd_b:ifidx_b\" (int uint int uint).\n");
return -EINVAL;
}
ns_a = get_net_ns_by_fd(netnsfd_a);
if (IS_ERR(ns_a)) {
pr_err("Could not find netns with fd: %d\n", netnsfd_a);
return -EINVAL;
}
ns_b = get_net_ns_by_fd(netnsfd_b);
if (IS_ERR(ns_b)) {
pr_err("Could not find netns with fd: %d\n", netnsfd_b);
put_net(ns_a);
return -EINVAL;
}
err = -EINVAL;
rtnl_lock();
dev_a = __dev_get_by_index(ns_a, ifidx_a);
if (!dev_a) {
pr_err("Could not find device with ifindex %u in netnsfd %d\n",
ifidx_a, netnsfd_a);
goto out_err;
}
if (!netdev_is_nsim(dev_a)) {
pr_err("Device with ifindex %u in netnsfd %d is not a netdevsim\n",
ifidx_a, netnsfd_a);
goto out_err;
}
dev_b = __dev_get_by_index(ns_b, ifidx_b);
if (!dev_b) {
pr_err("Could not find device with ifindex %u in netnsfd %d\n",
ifidx_b, netnsfd_b);
goto out_err;
}
if (!netdev_is_nsim(dev_b)) {
pr_err("Device with ifindex %u in netnsfd %d is not a netdevsim\n",
ifidx_b, netnsfd_b);
goto out_err;
}
if (dev_a == dev_b) {
pr_err("Cannot link a netdevsim to itself\n");
goto out_err;
}
err = -EBUSY;
nsim_a = netdev_priv(dev_a);
peer = rtnl_dereference(nsim_a->peer);
if (peer) {
pr_err("Netdevsim %d:%u is already linked\n", netnsfd_a,
ifidx_a);
goto out_err;
}
nsim_b = netdev_priv(dev_b);
peer = rtnl_dereference(nsim_b->peer);
if (peer) {
pr_err("Netdevsim %d:%u is already linked\n", netnsfd_b,
ifidx_b);
goto out_err;
}
err = 0;
rcu_assign_pointer(nsim_a->peer, nsim_b);
rcu_assign_pointer(nsim_b->peer, nsim_a);
out_err:
put_net(ns_b);
put_net(ns_a);
rtnl_unlock();
return !err ? count : err;
}
static BUS_ATTR_WO(link_device);
static ssize_t unlink_device_store(const struct bus_type *bus, const char *buf, size_t count)
{
struct netdevsim *nsim, *peer;
struct net_device *dev;
unsigned int ifidx;
int netnsfd, err;
struct net *ns;
err = sscanf(buf, "%u:%u", &netnsfd, &ifidx);
if (err != 2) {
pr_err("Format for unlinking a device is \"netnsfd:ifidx\" (int uint).\n");
return -EINVAL;
}
ns = get_net_ns_by_fd(netnsfd);
if (IS_ERR(ns)) {
pr_err("Could not find netns with fd: %d\n", netnsfd);
return -EINVAL;
}
err = -EINVAL;
rtnl_lock();
dev = __dev_get_by_index(ns, ifidx);
if (!dev) {
pr_err("Could not find device with ifindex %u in netnsfd %d\n",
ifidx, netnsfd);
goto out_put_netns;
}
if (!netdev_is_nsim(dev)) {
pr_err("Device with ifindex %u in netnsfd %d is not a netdevsim\n",
ifidx, netnsfd);
goto out_put_netns;
}
nsim = netdev_priv(dev);
peer = rtnl_dereference(nsim->peer);
if (!peer)
goto out_put_netns;
err = 0;
RCU_INIT_POINTER(nsim->peer, NULL);
RCU_INIT_POINTER(peer->peer, NULL);
out_put_netns:
put_net(ns);
rtnl_unlock();
return !err ? count : err;
}
static BUS_ATTR_WO(unlink_device);
static struct attribute *nsim_bus_attrs[] = {
&bus_attr_new_device.attr,
&bus_attr_del_device.attr,
&bus_attr_link_device.attr,
&bus_attr_unlink_device.attr,
NULL
};
ATTRIBUTE_GROUPS(nsim_bus);

View file

@ -29,18 +29,35 @@
static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct netdevsim *ns = netdev_priv(dev);
unsigned int len = skb->len;
struct netdevsim *peer_ns;
rcu_read_lock();
if (!nsim_ipsec_tx(ns, skb))
goto out;
goto out_drop_free;
peer_ns = rcu_dereference(ns->peer);
if (!peer_ns)
goto out_drop_free;
skb_tx_timestamp(skb);
if (unlikely(dev_forward_skb(peer_ns->netdev, skb) == NET_RX_DROP))
goto out_drop_cnt;
rcu_read_unlock();
u64_stats_update_begin(&ns->syncp);
ns->tx_packets++;
ns->tx_bytes += skb->len;
ns->tx_bytes += len;
u64_stats_update_end(&ns->syncp);
return NETDEV_TX_OK;
out:
out_drop_free:
dev_kfree_skb(skb);
out_drop_cnt:
rcu_read_unlock();
u64_stats_update_begin(&ns->syncp);
ns->tx_dropped++;
u64_stats_update_end(&ns->syncp);
return NETDEV_TX_OK;
}
@ -70,6 +87,7 @@ nsim_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
start = u64_stats_fetch_begin(&ns->syncp);
stats->tx_bytes = ns->tx_bytes;
stats->tx_packets = ns->tx_packets;
stats->tx_dropped = ns->tx_dropped;
} while (u64_stats_fetch_retry(&ns->syncp, start));
}
@ -265,6 +283,21 @@ nsim_set_features(struct net_device *dev, netdev_features_t features)
return 0;
}
static int nsim_get_iflink(const struct net_device *dev)
{
struct netdevsim *nsim, *peer;
int iflink;
nsim = netdev_priv(dev);
rcu_read_lock();
peer = rcu_dereference(nsim->peer);
iflink = peer ? READ_ONCE(peer->netdev->ifindex) : 0;
rcu_read_unlock();
return iflink;
}
static const struct net_device_ops nsim_netdev_ops = {
.ndo_start_xmit = nsim_start_xmit,
.ndo_set_rx_mode = nsim_set_rx_mode,
@ -282,6 +315,7 @@ static const struct net_device_ops nsim_netdev_ops = {
.ndo_set_vf_rss_query_en = nsim_set_vf_rss_query_en,
.ndo_setup_tc = nsim_setup_tc,
.ndo_set_features = nsim_set_features,
.ndo_get_iflink = nsim_get_iflink,
.ndo_bpf = nsim_bpf,
};
@ -302,7 +336,6 @@ static void nsim_setup(struct net_device *dev)
eth_hw_addr_random(dev);
dev->tx_queue_len = 0;
dev->flags |= IFF_NOARP;
dev->flags &= ~IFF_MULTICAST;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE |
IFF_NO_QUEUE;
@ -413,8 +446,13 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
void nsim_destroy(struct netdevsim *ns)
{
struct net_device *dev = ns->netdev;
struct netdevsim *peer;
rtnl_lock();
peer = rtnl_dereference(ns->peer);
if (peer)
RCU_INIT_POINTER(peer->peer, NULL);
RCU_INIT_POINTER(ns->peer, NULL);
unregister_netdevice(dev);
if (nsim_dev_port_is_pf(ns->nsim_dev_port)) {
nsim_macsec_teardown(ns);
@ -427,6 +465,11 @@ void nsim_destroy(struct netdevsim *ns)
free_netdev(dev);
}
bool netdev_is_nsim(struct net_device *dev)
{
return dev->netdev_ops == &nsim_netdev_ops;
}
static int nsim_validate(struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{

View file

@ -98,6 +98,7 @@ struct netdevsim {
u64 tx_packets;
u64 tx_bytes;
u64 tx_dropped;
struct u64_stats_sync syncp;
struct nsim_bus_dev *nsim_bus_dev;
@ -125,11 +126,13 @@ struct netdevsim {
} udp_ports;
struct nsim_ethtool ethtool;
struct netdevsim __rcu *peer;
};
struct netdevsim *
nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port);
void nsim_destroy(struct netdevsim *ns);
bool netdev_is_nsim(struct net_device *dev);
void nsim_ethtool_init(struct netdevsim *ns);

View file

@ -10,6 +10,7 @@ TEST_PROGS = devlink.sh \
fib.sh \
hw_stats_l3.sh \
nexthop.sh \
peer.sh \
psample.sh \
tc-mq-visibility.sh \
udp_tunnel_nic.sh \

View file

@ -0,0 +1,143 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0-only
source ../../../net/net_helper.sh
NSIM_DEV_1_ID=$((256 + RANDOM % 256))
NSIM_DEV_1_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_1_ID
NSIM_DEV_2_ID=$((512 + RANDOM % 256))
NSIM_DEV_2_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_2_ID
NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device
NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device
NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device
NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device
socat_check()
{
if [ ! -x "$(command -v socat)" ]; then
echo "socat command not found. Skipping test"
return 1
fi
return 0
}
setup_ns()
{
set -e
ip netns add nssv
ip netns add nscl
NSIM_DEV_1_NAME=$(find $NSIM_DEV_1_SYS/net -maxdepth 1 -type d ! \
-path $NSIM_DEV_1_SYS/net -exec basename {} \;)
NSIM_DEV_2_NAME=$(find $NSIM_DEV_2_SYS/net -maxdepth 1 -type d ! \
-path $NSIM_DEV_2_SYS/net -exec basename {} \;)
ip link set $NSIM_DEV_1_NAME netns nssv
ip link set $NSIM_DEV_2_NAME netns nscl
ip netns exec nssv ip addr add '192.168.1.1/24' dev $NSIM_DEV_1_NAME
ip netns exec nscl ip addr add '192.168.1.2/24' dev $NSIM_DEV_2_NAME
ip netns exec nssv ip link set dev $NSIM_DEV_1_NAME up
ip netns exec nscl ip link set dev $NSIM_DEV_2_NAME up
set +e
}
cleanup_ns()
{
ip netns del nscl
ip netns del nssv
}
###
### Code start
###
socat_check || exit 4
modprobe netdevsim
# linking
echo $NSIM_DEV_1_ID > $NSIM_DEV_SYS_NEW
echo $NSIM_DEV_2_ID > $NSIM_DEV_SYS_NEW
udevadm settle
setup_ns
NSIM_DEV_1_FD=$((256 + RANDOM % 256))
exec {NSIM_DEV_1_FD}</var/run/netns/nssv
NSIM_DEV_1_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_DEV_1_NAME/ifindex)
NSIM_DEV_2_FD=$((256 + RANDOM % 256))
exec {NSIM_DEV_2_FD}</var/run/netns/nscl
NSIM_DEV_2_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_DEV_2_NAME/ifindex)
echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:2000" > $NSIM_DEV_SYS_LINK 2>/dev/null
if [ $? -eq 0 ]; then
echo "linking with non-existent netdevsim should fail"
cleanup_ns
exit 1
fi
echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX 2000:$NSIM_DEV_2_IFIDX" > $NSIM_DEV_SYS_LINK 2>/dev/null
if [ $? -eq 0 ]; then
echo "linking with non-existent netnsid should fail"
cleanup_ns
exit 1
fi
echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX" > $NSIM_DEV_SYS_LINK 2>/dev/null
if [ $? -eq 0 ]; then
echo "linking with self should fail"
cleanup_ns
exit 1
fi
echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:$NSIM_DEV_2_IFIDX" > $NSIM_DEV_SYS_LINK
if [ $? -ne 0 ]; then
echo "linking netdevsim1 with netdevsim2 should succeed"
cleanup_ns
exit 1
fi
# argument error checking
echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:a" > $NSIM_DEV_SYS_LINK 2>/dev/null
if [ $? -eq 0 ]; then
echo "invalid arg should fail"
cleanup_ns
exit 1
fi
# send/recv packets
tmp_file=$(mktemp)
ip netns exec nssv socat TCP-LISTEN:1234,fork $tmp_file &
pid=$!
res=0
wait_local_port_listen nssv 1234 tcp
echo "HI" | ip netns exec nscl socat STDIN TCP:192.168.1.1:1234
count=$(cat $tmp_file | wc -c)
if [[ $count -ne 3 ]]; then
echo "expected 3 bytes, got $count"
res=1
fi
echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX" > $NSIM_DEV_SYS_UNLINK
echo $NSIM_DEV_2_ID > $NSIM_DEV_SYS_DEL
kill $pid
echo $NSIM_DEV_1_ID > $NSIM_DEV_SYS_DEL
cleanup_ns
modprobe -r netdevsim
exit $res

View file

@ -801,6 +801,8 @@ kci_test_ipsec_offload()
end_test "FAIL: ipsec_offload SA offload missing from list output"
fi
# we didn't create a peer, make sure we can Tx
ip neigh add $dstip dev $dev lladdr 00:11:22:33:44:55
# use ping to exercise the Tx path
ping -I $dev -c 3 -W 1 -i 0 $dstip >/dev/null