selftests: bpf: tc-bpf flow shaping with EDT

Add a small test that shows how to shape a TCP flow in tc-bpf with EDT and ECN. Signed-off-by: Peter Oskolkov <posk@google.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2019-03-22 16:40:19 -07:00 · 2019-03-22 16:40:19 -07:00 · 7df5e3db8f
parent 315a202987
commit 7df5e3db8f
3 changed files with 210 additions and 1 deletions
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@ -53,7 +53,8 @@ TEST_PROGS := test_kmod.sh \
 	test_xdp_vlan.sh \
 	test_lwt_ip_encap.sh \
 	test_tcp_check_syncookie.sh \
-	test_tc_tunnel.sh
+	test_tc_tunnel.sh \
+	test_tc_edt.sh

 TEST_PROGS_EXTENDED := with_addr.sh \
 	with_tunnels.sh \
--- a/tools/testing/selftests/bpf/progs/test_tc_edt.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_edt.c
@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdint.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/pkt_cls.h>
+#include <linux/tcp.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+/* the maximum delay we are willing to add (drop packets beyond that) */
+#define TIME_HORIZON_NS (2000 * 1000 * 1000)
+#define NS_PER_SEC 1000000000
+#define ECN_HORIZON_NS 5000000
+#define THROTTLE_RATE_BPS (5 * 1000 * 1000)
+
+/* flow_key => last_tstamp timestamp used */
+struct bpf_map_def SEC("maps") flow_map = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(uint32_t),
+	.value_size = sizeof(uint64_t),
+	.max_entries = 1,
+};
+
+static inline int throttle_flow(struct __sk_buff *skb)
+{
+	int key = 0;
+	uint64_t *last_tstamp = bpf_map_lookup_elem(&flow_map, &key);
+	uint64_t delay_ns = ((uint64_t)skb->len) * NS_PER_SEC /
+			THROTTLE_RATE_BPS;
+	uint64_t now = bpf_ktime_get_ns();
+	uint64_t tstamp, next_tstamp = 0;
+
+	if (last_tstamp)
+		next_tstamp = *last_tstamp + delay_ns;
+
+	tstamp = skb->tstamp;
+	if (tstamp < now)
+		tstamp = now;
+
+	/* should we throttle? */
+	if (next_tstamp <= tstamp) {
+		if (bpf_map_update_elem(&flow_map, &key, &tstamp, BPF_ANY))
+			return TC_ACT_SHOT;
+		return TC_ACT_OK;
+	}
+
+	/* do not queue past the time horizon */
+	if (next_tstamp - now >= TIME_HORIZON_NS)
+		return TC_ACT_SHOT;
+
+	/* set ecn bit, if needed */
+	if (next_tstamp - now >= ECN_HORIZON_NS)
+		bpf_skb_ecn_set_ce(skb);
+
+	if (bpf_map_update_elem(&flow_map, &key, &next_tstamp, BPF_EXIST))
+		return TC_ACT_SHOT;
+	skb->tstamp = next_tstamp;
+
+	return TC_ACT_OK;
+}
+
+static inline int handle_tcp(struct __sk_buff *skb, struct tcphdr *tcp)
+{
+	void *data_end = (void *)(long)skb->data_end;
+
+	/* drop malformed packets */
+	if ((void *)(tcp + 1) > data_end)
+		return TC_ACT_SHOT;
+
+	if (tcp->dest == bpf_htons(9000))
+		return throttle_flow(skb);
+
+	return TC_ACT_OK;
+}
+
+static inline int handle_ipv4(struct __sk_buff *skb)
+{
+	void *data_end = (void *)(long)skb->data_end;
+	void *data = (void *)(long)skb->data;
+	struct iphdr *iph;
+	uint32_t ihl;
+
+	/* drop malformed packets */
+	if (data + sizeof(struct ethhdr) > data_end)
+		return TC_ACT_SHOT;
+	iph = (struct iphdr *)(data + sizeof(struct ethhdr));
+	if ((void *)(iph + 1) > data_end)
+		return TC_ACT_SHOT;
+	ihl = iph->ihl * 4;
+	if (((void *)iph) + ihl > data_end)
+		return TC_ACT_SHOT;
+
+	if (iph->protocol == IPPROTO_TCP)
+		return handle_tcp(skb, (struct tcphdr *)(((void *)iph) + ihl));
+
+	return TC_ACT_OK;
+}
+
+SEC("cls_test") int tc_prog(struct __sk_buff *skb)
+{
+	if (skb->protocol == bpf_htons(ETH_P_IP))
+		return handle_ipv4(skb);
+
+	return TC_ACT_OK;
+}
+
+char __license[] SEC("license") = "GPL";
--- a/tools/testing/selftests/bpf/test_tc_edt.sh
+++ b/tools/testing/selftests/bpf/test_tc_edt.sh
@ -0,0 +1,99 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test installs a TC bpf program that throttles a TCP flow
+# with dst port = 9000 down to 5MBps. Then it measures actual
+# throughput of the flow.
+
+if [[ $EUID -ne 0 ]]; then
+	echo "This script must be run as root"
+	echo "FAIL"
+	exit 1
+fi
+
+# check that nc, dd, and timeout are present
+command -v nc >/dev/null 2>&1 || \
+	{ echo >&2 "nc is not available"; exit 1; }
+command -v dd >/dev/null 2>&1 || \
+	{ echo >&2 "nc is not available"; exit 1; }
+command -v timeout >/dev/null 2>&1 || \
+	{ echo >&2 "timeout is not available"; exit 1; }
+
+readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)"
+readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)"
+
+readonly IP_SRC="172.16.1.100"
+readonly IP_DST="172.16.2.100"
+
+cleanup()
+{
+	ip netns del ${NS_SRC}
+	ip netns del ${NS_DST}
+}
+
+trap cleanup EXIT
+
+set -e  # exit on error
+
+ip netns add "${NS_SRC}"
+ip netns add "${NS_DST}"
+ip link add veth_src type veth peer name veth_dst
+ip link set veth_src netns ${NS_SRC}
+ip link set veth_dst netns ${NS_DST}
+
+ip -netns ${NS_SRC} addr add ${IP_SRC}/24  dev veth_src
+ip -netns ${NS_DST} addr add ${IP_DST}/24  dev veth_dst
+
+ip -netns ${NS_SRC} link set dev veth_src up
+ip -netns ${NS_DST} link set dev veth_dst up
+
+ip -netns ${NS_SRC} route add ${IP_DST}/32  dev veth_src
+ip -netns ${NS_DST} route add ${IP_SRC}/32  dev veth_dst
+
+# set up TC on TX
+ip netns exec ${NS_SRC} tc qdisc add dev veth_src root fq
+ip netns exec ${NS_SRC} tc qdisc add dev veth_src clsact
+ip netns exec ${NS_SRC} tc filter add dev veth_src egress \
+	bpf da obj test_tc_edt.o sec cls_test
+
+
+# start the listener
+ip netns exec ${NS_DST} bash -c \
+	"nc -4 -l -s ${IP_DST} -p 9000 >/dev/null &"
+declare -i NC_PID=$!
+sleep 1
+
+declare -ir TIMEOUT=20
+declare -ir EXPECTED_BPS=5000000
+
+# run the load, capture RX bytes on DST
+declare -ir RX_BYTES_START=$( ip netns exec ${NS_DST} \
+	cat /sys/class/net/veth_dst/statistics/rx_bytes )
+
+set +e
+ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero \
+	bs=1000 count=1000000 > /dev/tcp/${IP_DST}/9000 2>/dev/null"
+set -e
+
+declare -ir RX_BYTES_END=$( ip netns exec ${NS_DST} \
+	cat /sys/class/net/veth_dst/statistics/rx_bytes )
+
+declare -ir ACTUAL_BPS=$(( ($RX_BYTES_END - $RX_BYTES_START) / $TIMEOUT ))
+
+echo $TIMEOUT $ACTUAL_BPS $EXPECTED_BPS | \
+	awk '{printf "elapsed: %d sec; bps difference: %.2f%%\n",
+		$1, ($2-$3)*100.0/$3}'
+
+# Pass the test if the actual bps is within 1% of the expected bps.
+# The difference is usually about 0.1% on a 20-sec test, and ==> zero
+# the longer the test runs.
+declare -ir RES=$( echo $ACTUAL_BPS $EXPECTED_BPS | \
+	 awk 'function abs(x){return ((x < 0.0) ? -x : x)}
+	      {if (abs(($1-$2)*100.0/$2) > 1.0) { print "1" }
+		else { print "0"} }' )
+if [ "${RES}" == "0" ] ; then
+	echo "PASS"
+else
+	echo "FAIL"
+	exit 1
+fi