From 134611446dc657e1bbc73ca0e4e6b599df687db0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 9 Mar 2016 03:00:02 +0100 Subject: [PATCH 1/4] ip_tunnel: add support for setting flow label via collect metadata This patch extends udp_tunnel6_xmit_skb() to pass in the IPv6 flow label from call sites. Currently, there's no such option and it's always set to zero when writing ip6_flow_hdr(). Add a label member to ip_tunnel_key, so that flow-based tunnels via collect metadata frontends can make use of it. vxlan and geneve will be converted to add flow label support separately. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- drivers/net/geneve.c | 2 +- drivers/net/vxlan.c | 2 +- include/net/dst_metadata.h | 5 ++++- include/net/ip_tunnels.h | 4 +++- include/net/udp_tunnel.h | 4 ++-- net/ipv6/ip6_udp_tunnel.c | 6 +++--- net/tipc/udp_media.c | 2 +- 7 files changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 6a0cbbe03e5d..89ccff79d76c 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1054,7 +1054,7 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, ttl = ttl ? : ip6_dst_hoplimit(dst); } udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev, - &fl6.saddr, &fl6.daddr, prio, ttl, + &fl6.saddr, &fl6.daddr, prio, ttl, 0, sport, geneve->dst_port, !!(flags & GENEVE_F_UDP_ZERO_CSUM6_TX)); return NETDEV_TX_OK; diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 2399099e68cf..8bdcd5ea8424 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2066,7 +2066,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, return; } udp_tunnel6_xmit_skb(ndst, sk, skb, dev, - &saddr, &dst->sin6.sin6_addr, tos, ttl, + &saddr, &dst->sin6.sin6_addr, tos, ttl, 0, src_port, dst_port, !udp_sum); #endif } diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 84b833af6882..5db9f5910428 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -126,7 +126,7 @@ static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, ip_tunnel_key_init(&tun_dst->u.tun_info.key, iph->saddr, iph->daddr, iph->tos, iph->ttl, - 0, 0, tunnel_id, flags); + 0, 0, 0, tunnel_id, flags); return tun_dst; } @@ -152,8 +152,11 @@ static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, info->key.u.ipv6.src = ip6h->saddr; info->key.u.ipv6.dst = ip6h->daddr; + info->key.tos = ipv6_get_dsfield(ip6h); info->key.ttl = ip6h->hop_limit; + info->key.label = ip6_flowlabel(ip6h); + return tun_dst; } diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 0acd80fadb32..5dc2e454f866 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -48,6 +48,7 @@ struct ip_tunnel_key { __be16 tun_flags; u8 tos; /* TOS for IPv4, TC for IPv6 */ u8 ttl; /* TTL for IPv4, HL for IPv6 */ + __be32 label; /* Flow Label for IPv6 */ __be16 tp_src; __be16 tp_dst; }; @@ -181,7 +182,7 @@ int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op, static inline void ip_tunnel_key_init(struct ip_tunnel_key *key, __be32 saddr, __be32 daddr, - u8 tos, u8 ttl, + u8 tos, u8 ttl, __be32 label, __be16 tp_src, __be16 tp_dst, __be64 tun_id, __be16 tun_flags) { @@ -192,6 +193,7 @@ static inline void ip_tunnel_key_init(struct ip_tunnel_key *key, 0, IP_TUNNEL_KEY_IPV4_PAD_LEN); key->tos = tos; key->ttl = ttl; + key->label = label; key->tun_flags = tun_flags; /* For the tunnel types on the top of IPsec, the tp_src and tp_dst of diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 97f5adb121a6..b83114077cee 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -88,8 +88,8 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, struct net_device *dev, struct in6_addr *saddr, struct in6_addr *daddr, - __u8 prio, __u8 ttl, __be16 src_port, - __be16 dst_port, bool nocheck); + __u8 prio, __u8 ttl, __be32 label, + __be16 src_port, __be16 dst_port, bool nocheck); #endif void udp_tunnel_sock_release(struct socket *sock); diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index 14dacf1df529..a7520528ecd2 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -73,8 +73,8 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, struct net_device *dev, struct in6_addr *saddr, struct in6_addr *daddr, - __u8 prio, __u8 ttl, __be16 src_port, - __be16 dst_port, bool nocheck) + __u8 prio, __u8 ttl, __be32 label, + __be16 src_port, __be16 dst_port, bool nocheck) { struct udphdr *uh; struct ipv6hdr *ip6h; @@ -98,7 +98,7 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, __skb_push(skb, sizeof(*ip6h)); skb_reset_network_header(skb); ip6h = ipv6_hdr(skb); - ip6_flow_hdr(ip6h, prio, htonl(0)); + ip6_flow_hdr(ip6h, prio, label); ip6h->payload_len = htons(skb->len); ip6h->nexthdr = IPPROTO_UDP; ip6h->hop_limit = ttl; diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 49b3c2ede7ab..c94f9a15e2cd 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -196,7 +196,7 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, ttl = ip6_dst_hoplimit(ndst); err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, ndst->dev, &src->ipv6, - &dst->ipv6, 0, ttl, src->udp_port, + &dst->ipv6, 0, ttl, 0, src->udp_port, dst->udp_port, false); #endif } From e7f70af111f086a20800ad2e17f544b2e3e0f375 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 9 Mar 2016 03:00:03 +0100 Subject: [PATCH 2/4] vxlan: support setting IPv6 flow label This work adds support for setting the IPv6 flow label for vxlan per device and through collect metadata (ip_tunnel_key) frontends. The vxlan dst cache does not need any special considerations here, for the cases where caches can be used, the label is static per cache. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 26 +++++++++++++++++++++----- include/net/vxlan.h | 1 + include/uapi/linux/if_link.h | 1 + 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 8bdcd5ea8424..8eda76f9e474 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1790,6 +1790,7 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, #if IS_ENABLED(CONFIG_IPV6) static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, struct sk_buff *skb, int oif, u8 tos, + __be32 label, const struct in6_addr *daddr, struct in6_addr *saddr, struct dst_cache *dst_cache, @@ -1813,6 +1814,7 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, fl6.flowi6_tos = RT_TOS(tos); fl6.daddr = *daddr; fl6.saddr = vxlan->cfg.saddr.sin6.sin6_addr; + fl6.flowlabel = label; fl6.flowi6_mark = skb->mark; fl6.flowi6_proto = IPPROTO_UDP; @@ -1888,7 +1890,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, struct vxlan_metadata _md; struct vxlan_metadata *md = &_md; __be16 src_port = 0, dst_port; - __be32 vni; + __be32 vni, label; __be16 df = 0; __u8 tos, ttl; int err; @@ -1939,12 +1941,14 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, if (tos == 1) tos = ip_tunnel_get_dsfield(old_iph, skb); + label = vxlan->cfg.label; src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min, vxlan->cfg.port_max, true); if (info) { ttl = info->key.ttl; tos = info->key.tos; + label = info->key.label; udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM); if (info->options_len) @@ -2020,7 +2024,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ndst = vxlan6_get_route(vxlan, skb, rdst ? rdst->remote_ifindex : 0, tos, - &dst->sin6.sin6_addr, &saddr, + label, &dst->sin6.sin6_addr, &saddr, dst_cache, info); if (IS_ERR(ndst)) { netdev_dbg(dev, "no route to %pI6\n", @@ -2066,8 +2070,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, return; } udp_tunnel6_xmit_skb(ndst, sk, skb, dev, - &saddr, &dst->sin6.sin6_addr, tos, ttl, 0, - src_port, dst_port, !udp_sum); + &saddr, &dst->sin6.sin6_addr, tos, ttl, + label, src_port, dst_port, !udp_sum); #endif } @@ -2390,7 +2394,7 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) if (!vxlan->vn6_sock) return -EINVAL; ndst = vxlan6_get_route(vxlan, skb, 0, info->key.tos, - &info->key.u.ipv6.dst, + info->key.label, &info->key.u.ipv6.dst, &info->key.u.ipv6.src, NULL, info); if (IS_ERR(ndst)) return PTR_ERR(ndst); @@ -2505,6 +2509,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { [IFLA_VXLAN_LOCAL6] = { .len = sizeof(struct in6_addr) }, [IFLA_VXLAN_TOS] = { .type = NLA_U8 }, [IFLA_VXLAN_TTL] = { .type = NLA_U8 }, + [IFLA_VXLAN_LABEL] = { .type = NLA_U32 }, [IFLA_VXLAN_LEARNING] = { .type = NLA_U8 }, [IFLA_VXLAN_AGEING] = { .type = NLA_U32 }, [IFLA_VXLAN_LIMIT] = { .type = NLA_U32 }, @@ -2739,6 +2744,11 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, vxlan->flags |= VXLAN_F_IPV6; } + if (conf->label && !use_ipv6) { + pr_info("label only supported in use with IPv6\n"); + return -EINVAL; + } + if (conf->remote_ifindex) { lowerdev = __dev_get_by_index(src_net, conf->remote_ifindex); dst->remote_ifindex = conf->remote_ifindex; @@ -2887,6 +2897,10 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev, if (data[IFLA_VXLAN_TTL]) conf.ttl = nla_get_u8(data[IFLA_VXLAN_TTL]); + if (data[IFLA_VXLAN_LABEL]) + conf.label = nla_get_be32(data[IFLA_VXLAN_LABEL]) & + IPV6_FLOWLABEL_MASK; + if (!data[IFLA_VXLAN_LEARNING] || nla_get_u8(data[IFLA_VXLAN_LEARNING])) conf.flags |= VXLAN_F_LEARN; @@ -2990,6 +3004,7 @@ static size_t vxlan_get_size(const struct net_device *dev) nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VXLAN_LOCAL{6} */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TOS */ + nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_PROXY */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_RSC */ @@ -3053,6 +3068,7 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->cfg.ttl) || nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) || + nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) || nla_put_u8(skb, IFLA_VXLAN_LEARNING, !!(vxlan->flags & VXLAN_F_LEARN)) || nla_put_u8(skb, IFLA_VXLAN_PROXY, diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 6eda4ed4d78b..a763c96ecde4 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -162,6 +162,7 @@ struct vxlan_config { u16 port_max; u8 tos; u8 ttl; + __be32 label; u32 flags; unsigned long age_interval; unsigned int addrmax; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index d452cea59020..6bebc975031d 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -456,6 +456,7 @@ enum { IFLA_VXLAN_GBP, IFLA_VXLAN_REMCSUM_NOPARTIAL, IFLA_VXLAN_COLLECT_METADATA, + IFLA_VXLAN_LABEL, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) From 8eb3b99554b82da968d1fbc00df9f3156c5e2d63 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 9 Mar 2016 03:00:04 +0100 Subject: [PATCH 3/4] geneve: support setting IPv6 flow label This work adds support for setting the IPv6 flow label for geneve per device and through collect metadata (ip_tunnel_key) frontends. Also here, the geneve dst cache does not need any special considerations, for the cases where caches can be used, the label is static per cache. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- drivers/net/geneve.c | 35 +++++++++++++++++++++++++++-------- include/uapi/linux/if_link.h | 1 + 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 89ccff79d76c..33185b9a435e 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -68,6 +68,7 @@ struct geneve_dev { u8 tos; /* TOS override */ union geneve_addr remote; /* IP address for link partner */ struct list_head next; /* geneve's per namespace list */ + __be32 label; /* IPv6 flowlabel override */ __be16 dst_port; bool collect_md; struct gro_cells gro_cells; @@ -846,6 +847,7 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, fl6->daddr = info->key.u.ipv6.dst; fl6->saddr = info->key.u.ipv6.src; fl6->flowi6_tos = RT_TOS(info->key.tos); + fl6->flowlabel = info->key.label; dst_cache = &info->dst_cache; } else { prio = geneve->tos; @@ -857,6 +859,7 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, } fl6->flowi6_tos = RT_TOS(prio); + fl6->flowlabel = geneve->label; fl6->daddr = geneve->remote.sin6.sin6_addr; dst_cache = &geneve->dst_cache; } @@ -998,6 +1001,7 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct flowi6 fl6; __u8 prio, ttl; __be16 sport; + __be32 label; bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); u32 flags = geneve->flags; @@ -1041,6 +1045,7 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, prio = ip_tunnel_ecn_encap(key->tos, iip, skb); ttl = key->ttl; + label = info->key.label; } else { err = geneve6_build_skb(dst, skb, 0, geneve->vni, 0, NULL, flags, xnet); @@ -1052,9 +1057,11 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, if (!ttl && ipv6_addr_is_multicast(&fl6.daddr)) ttl = 1; ttl = ttl ? : ip6_dst_hoplimit(dst); + label = geneve->label; } + udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev, - &fl6.saddr, &fl6.daddr, prio, ttl, 0, + &fl6.saddr, &fl6.daddr, prio, ttl, label, sport, geneve->dst_port, !!(flags & GENEVE_F_UDP_ZERO_CSUM6_TX)); return NETDEV_TX_OK; @@ -1238,6 +1245,7 @@ static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { [IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) }, [IFLA_GENEVE_TTL] = { .type = NLA_U8 }, [IFLA_GENEVE_TOS] = { .type = NLA_U8 }, + [IFLA_GENEVE_LABEL] = { .type = NLA_U32 }, [IFLA_GENEVE_PORT] = { .type = NLA_U16 }, [IFLA_GENEVE_COLLECT_METADATA] = { .type = NLA_FLAG }, [IFLA_GENEVE_UDP_CSUM] = { .type = NLA_U8 }, @@ -1295,8 +1303,8 @@ static struct geneve_dev *geneve_find_dev(struct geneve_net *gn, static int geneve_configure(struct net *net, struct net_device *dev, union geneve_addr *remote, - __u32 vni, __u8 ttl, __u8 tos, __be16 dst_port, - bool metadata, u32 flags) + __u32 vni, __u8 ttl, __u8 tos, __be32 label, + __be16 dst_port, bool metadata, u32 flags) { struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_dev *t, *geneve = netdev_priv(dev); @@ -1306,7 +1314,7 @@ static int geneve_configure(struct net *net, struct net_device *dev, if (!remote) return -EINVAL; if (metadata && - (remote->sa.sa_family != AF_UNSPEC || vni || tos || ttl)) + (remote->sa.sa_family != AF_UNSPEC || vni || tos || ttl || label)) return -EINVAL; geneve->net = net; @@ -1321,10 +1329,14 @@ static int geneve_configure(struct net *net, struct net_device *dev, (remote->sa.sa_family == AF_INET6 && ipv6_addr_is_multicast(&remote->sin6.sin6_addr))) return -EINVAL; + if (label && remote->sa.sa_family != AF_INET6) + return -EINVAL; + geneve->remote = *remote; geneve->ttl = ttl; geneve->tos = tos; + geneve->label = label; geneve->dst_port = dst_port; geneve->collect_md = metadata; geneve->flags = flags; @@ -1367,6 +1379,7 @@ static int geneve_newlink(struct net *net, struct net_device *dev, __u8 ttl = 0, tos = 0; bool metadata = false; union geneve_addr remote = geneve_remote_unspec; + __be32 label = 0; __u32 vni = 0; u32 flags = 0; @@ -1403,6 +1416,10 @@ static int geneve_newlink(struct net *net, struct net_device *dev, if (data[IFLA_GENEVE_TOS]) tos = nla_get_u8(data[IFLA_GENEVE_TOS]); + if (data[IFLA_GENEVE_LABEL]) + label = nla_get_be32(data[IFLA_GENEVE_LABEL]) & + IPV6_FLOWLABEL_MASK; + if (data[IFLA_GENEVE_PORT]) dst_port = nla_get_be16(data[IFLA_GENEVE_PORT]); @@ -1421,8 +1438,8 @@ static int geneve_newlink(struct net *net, struct net_device *dev, nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX])) flags |= GENEVE_F_UDP_ZERO_CSUM6_RX; - return geneve_configure(net, dev, &remote, vni, ttl, tos, dst_port, - metadata, flags); + return geneve_configure(net, dev, &remote, vni, ttl, tos, label, + dst_port, metadata, flags); } static void geneve_dellink(struct net_device *dev, struct list_head *head) @@ -1439,6 +1456,7 @@ static size_t geneve_get_size(const struct net_device *dev) nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */ + nla_total_size(sizeof(__be32)) + /* IFLA_GENEVE_LABEL */ nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */ nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_CSUM */ @@ -1469,7 +1487,8 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) } if (nla_put_u8(skb, IFLA_GENEVE_TTL, geneve->ttl) || - nla_put_u8(skb, IFLA_GENEVE_TOS, geneve->tos)) + nla_put_u8(skb, IFLA_GENEVE_TOS, geneve->tos) || + nla_put_be32(skb, IFLA_GENEVE_LABEL, geneve->label)) goto nla_put_failure; if (nla_put_be16(skb, IFLA_GENEVE_PORT, geneve->dst_port)) @@ -1521,7 +1540,7 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name, return dev; err = geneve_configure(net, dev, &geneve_remote_unspec, - 0, 0, 0, htons(dst_port), true, + 0, 0, 0, 0, htons(dst_port), true, GENEVE_F_UDP_ZERO_CSUM6_RX); if (err) goto err; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 6bebc975031d..249eef9a21bd 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -479,6 +479,7 @@ enum { IFLA_GENEVE_UDP_CSUM, IFLA_GENEVE_UDP_ZERO_CSUM6_TX, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, + IFLA_GENEVE_LABEL, __IFLA_GENEVE_MAX }; #define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) From 4018ab1875e0d00b84ac61bc15427136ad55849e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 9 Mar 2016 03:00:05 +0100 Subject: [PATCH 4/4] bpf: support flow label for bpf_skb_{set, get}_tunnel_key This patch extends bpf_tunnel_key with a tunnel_label member, that maps to ip_tunnel_key's label so underlying backends like vxlan and geneve can propagate the label to udp_tunnel6_xmit_skb(), where it's being set in the IPv6 header. It allows for having 20 more bits to encode/decode flow related meta information programmatically. Tested with vxlan and geneve. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + net/core/filter.c | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0e30b19012a5..924f537183fd 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -375,6 +375,7 @@ struct bpf_tunnel_key { }; __u8 tunnel_tos; __u8 tunnel_ttl; + __u32 tunnel_label; }; #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/net/core/filter.c b/net/core/filter.c index a66dc03c261f..6fc3893a6170 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1770,12 +1770,15 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) return -EPROTO; if (unlikely(size != sizeof(struct bpf_tunnel_key))) { switch (size) { + case offsetof(struct bpf_tunnel_key, tunnel_label): + goto set_compat; case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): /* Fixup deprecated structure layouts here, so we have * a common path later on. */ if (ip_tunnel_info_af(info) != AF_INET) return -EINVAL; +set_compat: to = (struct bpf_tunnel_key *)compat; break; default: @@ -1787,11 +1790,13 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) to->tunnel_tos = info->key.tos; to->tunnel_ttl = info->key.ttl; - if (flags & BPF_F_TUNINFO_IPV6) + if (flags & BPF_F_TUNINFO_IPV6) { memcpy(to->remote_ipv6, &info->key.u.ipv6.src, sizeof(to->remote_ipv6)); - else + to->tunnel_label = be32_to_cpu(info->key.label); + } else { to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src); + } if (unlikely(size != sizeof(struct bpf_tunnel_key))) memcpy((void *)(long) r2, to, size); @@ -1850,6 +1855,7 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) return -EINVAL; if (unlikely(size != sizeof(struct bpf_tunnel_key))) { switch (size) { + case offsetof(struct bpf_tunnel_key, tunnel_label): case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): /* Fixup deprecated structure layouts here, so we have * a common path later on. @@ -1862,6 +1868,8 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) return -EINVAL; } } + if (unlikely(!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label)) + return -EINVAL; skb_dst_drop(skb); dst_hold((struct dst_entry *) md); @@ -1882,6 +1890,8 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) info->mode |= IP_TUNNEL_INFO_IPV6; memcpy(&info->key.u.ipv6.dst, from->remote_ipv6, sizeof(from->remote_ipv6)); + info->key.label = cpu_to_be32(from->tunnel_label) & + IPV6_FLOWLABEL_MASK; } else { info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); if (flags & BPF_F_ZERO_CSUM_TX)