Merge branch 'net-move-more-duplicate-code-of-ovs-and-tc-conntrack-into-nf_conntrack_ovs'

Xin Long says:

====================
net: move more duplicate code of ovs and tc conntrack into nf_conntrack_ovs

We've moved some duplicate code into nf_nat_ovs in:

  "net: eliminate the duplicate code in the ct nat functions of ovs and tc"

This patchset addresses more code duplication in the conntrack of ovs
and tc then creates nf_conntrack_ovs for them, and four functions will
be extracted and moved into it:

  nf_ct_handle_fragments()
  nf_ct_skb_network_trim()
  nf_ct_helper()
  nf_ct_add_helper()
====================

Link: https://lore.kernel.org/r/cover.1675810210.git.lucien.xin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2023-02-10 16:23:05 -08:00
commit 33c6ce4a4c
9 changed files with 207 additions and 235 deletions

View File

@ -362,6 +362,10 @@ static inline struct nf_conntrack_net *nf_ct_pernet(const struct net *net)
return net_generic(net, nf_conntrack_net_id);
}
int nf_ct_skb_network_trim(struct sk_buff *skb, int family);
int nf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
u16 zone, u8 family, u8 *proto, u16 *mru);
#define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count)
#define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)
#define NF_CT_STAT_ADD_ATOMIC(net, count, v) this_cpu_add((net)->ct.stat->count, (v))

View File

@ -189,6 +189,9 @@ config NF_CONNTRACK_LABELS
to connection tracking entries. It can be used with xtables connlabel
match and the nftables ct expression.
config NF_CONNTRACK_OVS
bool
config NF_CT_PROTO_DCCP
bool 'DCCP protocol connection tracking support'
depends on NETFILTER_ADVANCED

View File

@ -11,6 +11,7 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_OVS) += nf_conntrack_ovs.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o

View File

@ -242,104 +242,6 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
}
EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper);
/* 'skb' should already be pulled to nh_ofs. */
int nf_ct_helper(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo, u16 proto)
{
const struct nf_conntrack_helper *helper;
const struct nf_conn_help *help;
unsigned int protoff;
int err;
if (ctinfo == IP_CT_RELATED_REPLY)
return NF_ACCEPT;
help = nfct_help(ct);
if (!help)
return NF_ACCEPT;
helper = rcu_dereference(help->helper);
if (!helper)
return NF_ACCEPT;
if (helper->tuple.src.l3num != NFPROTO_UNSPEC &&
helper->tuple.src.l3num != proto)
return NF_ACCEPT;
switch (proto) {
case NFPROTO_IPV4:
protoff = ip_hdrlen(skb);
proto = ip_hdr(skb)->protocol;
break;
case NFPROTO_IPV6: {
u8 nexthdr = ipv6_hdr(skb)->nexthdr;
__be16 frag_off;
int ofs;
ofs = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
&frag_off);
if (ofs < 0 || (frag_off & htons(~0x7)) != 0) {
pr_debug("proto header not found\n");
return NF_ACCEPT;
}
protoff = ofs;
proto = nexthdr;
break;
}
default:
WARN_ONCE(1, "helper invoked on non-IP family!");
return NF_DROP;
}
if (helper->tuple.dst.protonum != proto)
return NF_ACCEPT;
err = helper->help(skb, protoff, ct, ctinfo);
if (err != NF_ACCEPT)
return err;
/* Adjust seqs after helper. This is needed due to some helpers (e.g.,
* FTP with NAT) adusting the TCP payload size when mangling IP
* addresses and/or port numbers in the text-based control connection.
*/
if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
!nf_ct_seq_adjust(skb, ct, ctinfo, protoff))
return NF_DROP;
return NF_ACCEPT;
}
EXPORT_SYMBOL_GPL(nf_ct_helper);
int nf_ct_add_helper(struct nf_conn *ct, const char *name, u8 family,
u8 proto, bool nat, struct nf_conntrack_helper **hp)
{
struct nf_conntrack_helper *helper;
struct nf_conn_help *help;
int ret = 0;
helper = nf_conntrack_helper_try_module_get(name, family, proto);
if (!helper)
return -EINVAL;
help = nf_ct_helper_ext_add(ct, GFP_KERNEL);
if (!help) {
nf_conntrack_helper_put(helper);
return -ENOMEM;
}
#if IS_ENABLED(CONFIG_NF_NAT)
if (nat) {
ret = nf_nat_helper_try_module_get(name, family, proto);
if (ret) {
nf_conntrack_helper_put(helper);
return ret;
}
}
#endif
rcu_assign_pointer(help->helper, helper);
*hp = helper;
return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_add_helper);
/* appropriate ct lock protecting must be taken by caller */
static int unhelp(struct nf_conn *ct, void *me)
{

View File

@ -0,0 +1,178 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Support ct functions for openvswitch and used by OVS and TC conntrack. */
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#include <net/ipv6_frag.h>
#include <net/ip.h>
/* 'skb' should already be pulled to nh_ofs. */
int nf_ct_helper(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo, u16 proto)
{
const struct nf_conntrack_helper *helper;
const struct nf_conn_help *help;
unsigned int protoff;
int err;
if (ctinfo == IP_CT_RELATED_REPLY)
return NF_ACCEPT;
help = nfct_help(ct);
if (!help)
return NF_ACCEPT;
helper = rcu_dereference(help->helper);
if (!helper)
return NF_ACCEPT;
if (helper->tuple.src.l3num != NFPROTO_UNSPEC &&
helper->tuple.src.l3num != proto)
return NF_ACCEPT;
switch (proto) {
case NFPROTO_IPV4:
protoff = ip_hdrlen(skb);
proto = ip_hdr(skb)->protocol;
break;
case NFPROTO_IPV6: {
u8 nexthdr = ipv6_hdr(skb)->nexthdr;
__be16 frag_off;
int ofs;
ofs = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
&frag_off);
if (ofs < 0 || (frag_off & htons(~0x7)) != 0) {
pr_debug("proto header not found\n");
return NF_ACCEPT;
}
protoff = ofs;
proto = nexthdr;
break;
}
default:
WARN_ONCE(1, "helper invoked on non-IP family!");
return NF_DROP;
}
if (helper->tuple.dst.protonum != proto)
return NF_ACCEPT;
err = helper->help(skb, protoff, ct, ctinfo);
if (err != NF_ACCEPT)
return err;
/* Adjust seqs after helper. This is needed due to some helpers (e.g.,
* FTP with NAT) adusting the TCP payload size when mangling IP
* addresses and/or port numbers in the text-based control connection.
*/
if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
!nf_ct_seq_adjust(skb, ct, ctinfo, protoff))
return NF_DROP;
return NF_ACCEPT;
}
EXPORT_SYMBOL_GPL(nf_ct_helper);
int nf_ct_add_helper(struct nf_conn *ct, const char *name, u8 family,
u8 proto, bool nat, struct nf_conntrack_helper **hp)
{
struct nf_conntrack_helper *helper;
struct nf_conn_help *help;
int ret = 0;
helper = nf_conntrack_helper_try_module_get(name, family, proto);
if (!helper)
return -EINVAL;
help = nf_ct_helper_ext_add(ct, GFP_KERNEL);
if (!help) {
nf_conntrack_helper_put(helper);
return -ENOMEM;
}
#if IS_ENABLED(CONFIG_NF_NAT)
if (nat) {
ret = nf_nat_helper_try_module_get(name, family, proto);
if (ret) {
nf_conntrack_helper_put(helper);
return ret;
}
}
#endif
rcu_assign_pointer(help->helper, helper);
*hp = helper;
return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_add_helper);
/* Trim the skb to the length specified by the IP/IPv6 header,
* removing any trailing lower-layer padding. This prepares the skb
* for higher-layer processing that assumes skb->len excludes padding
* (such as nf_ip_checksum). The caller needs to pull the skb to the
* network header, and ensure ip_hdr/ipv6_hdr points to valid data.
*/
int nf_ct_skb_network_trim(struct sk_buff *skb, int family)
{
unsigned int len;
switch (family) {
case NFPROTO_IPV4:
len = skb_ip_totlen(skb);
break;
case NFPROTO_IPV6:
len = sizeof(struct ipv6hdr)
+ ntohs(ipv6_hdr(skb)->payload_len);
break;
default:
len = skb->len;
}
return pskb_trim_rcsum(skb, len);
}
EXPORT_SYMBOL_GPL(nf_ct_skb_network_trim);
/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
* value if 'skb' is freed.
*/
int nf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
u16 zone, u8 family, u8 *proto, u16 *mru)
{
int err;
if (family == NFPROTO_IPV4) {
enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
local_bh_disable();
err = ip_defrag(net, skb, user);
local_bh_enable();
if (err)
return err;
*mru = IPCB(skb)->frag_max_size;
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
} else if (family == NFPROTO_IPV6) {
enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
err = nf_ct_frag6_gather(net, skb, user);
if (err) {
if (err != -EINPROGRESS)
kfree_skb(skb);
return err;
}
*proto = ipv6_hdr(skb)->nexthdr;
*mru = IP6CB(skb)->frag_max_size;
#endif
} else {
kfree_skb(skb);
return -EPFNOSUPPORT;
}
skb_clear_hash(skb);
skb->ignore_df = 1;
return 0;
}
EXPORT_SYMBOL_GPL(nf_ct_handle_fragments);

View File

@ -15,6 +15,7 @@ config OPENVSWITCH
select NET_MPLS_GSO
select DST_CACHE
select NET_NSH
select NF_CONNTRACK_OVS if NF_CONNTRACK
select NF_NAT_OVS if NF_NAT
help
Open vSwitch is a multilayer Ethernet switch targeted at virtualized

View File

@ -435,52 +435,21 @@ static int ovs_ct_set_labels(struct nf_conn *ct, struct sw_flow_key *key,
return 0;
}
/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
* value if 'skb' is freed.
*/
static int handle_fragments(struct net *net, struct sw_flow_key *key,
u16 zone, struct sk_buff *skb)
static int ovs_ct_handle_fragments(struct net *net, struct sw_flow_key *key,
u16 zone, int family, struct sk_buff *skb)
{
struct ovs_skb_cb ovs_cb = *OVS_CB(skb);
int err;
if (key->eth.type == htons(ETH_P_IP)) {
enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
err = ip_defrag(net, skb, user);
if (err)
return err;
ovs_cb.mru = IPCB(skb)->frag_max_size;
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
} else if (key->eth.type == htons(ETH_P_IPV6)) {
enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
err = nf_ct_frag6_gather(net, skb, user);
if (err) {
if (err != -EINPROGRESS)
kfree_skb(skb);
return err;
}
key->ip.proto = ipv6_hdr(skb)->nexthdr;
ovs_cb.mru = IP6CB(skb)->frag_max_size;
#endif
} else {
kfree_skb(skb);
return -EPFNOSUPPORT;
}
err = nf_ct_handle_fragments(net, skb, zone, family, &key->ip.proto, &ovs_cb.mru);
if (err)
return err;
/* The key extracted from the fragment that completed this datagram
* likely didn't have an L4 header, so regenerate it.
*/
ovs_flow_key_update_l3l4(skb, key);
key->ip.frag = OVS_FRAG_TYPE_NONE;
skb_clear_hash(skb);
skb->ignore_df = 1;
*OVS_CB(skb) = ovs_cb;
return 0;
@ -1091,36 +1060,6 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
return 0;
}
/* Trim the skb to the length specified by the IP/IPv6 header,
* removing any trailing lower-layer padding. This prepares the skb
* for higher-layer processing that assumes skb->len excludes padding
* (such as nf_ip_checksum). The caller needs to pull the skb to the
* network header, and ensure ip_hdr/ipv6_hdr points to valid data.
*/
static int ovs_skb_network_trim(struct sk_buff *skb)
{
unsigned int len;
int err;
switch (skb->protocol) {
case htons(ETH_P_IP):
len = skb_ip_totlen(skb);
break;
case htons(ETH_P_IPV6):
len = sizeof(struct ipv6hdr)
+ ntohs(ipv6_hdr(skb)->payload_len);
break;
default:
len = skb->len;
}
err = pskb_trim_rcsum(skb, len);
if (err)
kfree_skb(skb);
return err;
}
/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
* value if 'skb' is freed.
*/
@ -1135,12 +1074,15 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
nh_ofs = skb_network_offset(skb);
skb_pull_rcsum(skb, nh_ofs);
err = ovs_skb_network_trim(skb);
if (err)
err = nf_ct_skb_network_trim(skb, info->family);
if (err) {
kfree_skb(skb);
return err;
}
if (key->ip.frag != OVS_FRAG_TYPE_NONE) {
err = handle_fragments(net, key, info->zone.id, skb);
err = ovs_ct_handle_fragments(net, key, info->zone.id,
info->family, skb);
if (err)
return err;
}

View File

@ -984,6 +984,7 @@ config NET_ACT_TUNNEL_KEY
config NET_ACT_CT
tristate "connection tracking tc action"
depends on NET_CLS_ACT && NF_CONNTRACK && (!NF_NAT || NF_NAT) && NF_FLOW_TABLE
select NF_CONNTRACK_OVS
select NF_NAT_OVS if NF_NAT
help
Say Y here to allow sending the packets to conntrack module.

View File

@ -726,31 +726,6 @@ drop_ct:
return false;
}
/* Trim the skb to the length specified by the IP/IPv6 header,
* removing any trailing lower-layer padding. This prepares the skb
* for higher-layer processing that assumes skb->len excludes padding
* (such as nf_ip_checksum). The caller needs to pull the skb to the
* network header, and ensure ip_hdr/ipv6_hdr points to valid data.
*/
static int tcf_ct_skb_network_trim(struct sk_buff *skb, int family)
{
unsigned int len;
switch (family) {
case NFPROTO_IPV4:
len = skb_ip_totlen(skb);
break;
case NFPROTO_IPV6:
len = sizeof(struct ipv6hdr)
+ ntohs(ipv6_hdr(skb)->payload_len);
break;
default:
len = skb->len;
}
return pskb_trim_rcsum(skb, len);
}
static u8 tcf_ct_skb_nf_family(struct sk_buff *skb)
{
u8 family = NFPROTO_UNSPEC;
@ -810,6 +785,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
struct nf_conn *ct;
int err = 0;
bool frag;
u8 proto;
u16 mru;
/* Previously seen (loopback)? Ignore. */
@ -825,50 +801,14 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
return err;
skb_get(skb);
mru = tc_skb_cb(skb)->mru;
err = nf_ct_handle_fragments(net, skb, zone, family, &proto, &mru);
if (err)
return err;
if (family == NFPROTO_IPV4) {
enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
*defrag = true;
tc_skb_cb(skb)->mru = mru;
memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
local_bh_disable();
err = ip_defrag(net, skb, user);
local_bh_enable();
if (err && err != -EINPROGRESS)
return err;
if (!err) {
*defrag = true;
mru = IPCB(skb)->frag_max_size;
}
} else { /* NFPROTO_IPV6 */
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
err = nf_ct_frag6_gather(net, skb, user);
if (err && err != -EINPROGRESS)
goto out_free;
if (!err) {
*defrag = true;
mru = IP6CB(skb)->frag_max_size;
}
#else
err = -EOPNOTSUPP;
goto out_free;
#endif
}
if (err != -EINPROGRESS)
tc_skb_cb(skb)->mru = mru;
skb_clear_hash(skb);
skb->ignore_df = 1;
return err;
out_free:
kfree_skb(skb);
return err;
return 0;
}
static void tcf_ct_params_free(struct tcf_ct_params *params)
@ -1011,7 +951,7 @@ TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
if (err)
goto drop;
err = tcf_ct_skb_network_trim(skb, family);
err = nf_ct_skb_network_trim(skb, family);
if (err)
goto drop;