From 1de6b15a434c0068253fea5d719f71143e7e3a79 Mon Sep 17 00:00:00 2001 From: xu xin Date: Tue, 4 Jan 2022 10:59:34 +0000 Subject: [PATCH 1/2] Namespaceify min_pmtu sysctl This patch enables the sysctl min_pmtu to be configured per net namespace. Signed-off-by: xu xin Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 2 ++ net/ipv4/route.c | 53 ++++++++++++++++++++++++++++------------ 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 6c5b2efc4f17..1ecbf82b07f1 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -85,6 +85,8 @@ struct netns_ipv4 { int sysctl_icmp_ratelimit; int sysctl_icmp_ratemask; + u32 ip_rt_min_pmtu; + struct local_ports ip_local_ports; u8 sysctl_tcp_ecn; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 843a7a3699fe..f29637e85c05 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -110,6 +110,8 @@ #define RT_GC_TIMEOUT (300*HZ) +#define DEFAULT_MIN_PMTU (512 + 20 + 20) + static int ip_rt_max_size; static int ip_rt_redirect_number __read_mostly = 9; static int ip_rt_redirect_load __read_mostly = HZ / 50; @@ -117,7 +119,6 @@ static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1)); static int ip_rt_error_cost __read_mostly = HZ; static int ip_rt_error_burst __read_mostly = 5 * HZ; static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; -static u32 ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; static int ip_rt_min_advmss __read_mostly = 256; static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; @@ -1018,9 +1019,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (old_mtu < mtu) return; - if (mtu < ip_rt_min_pmtu) { + if (mtu < net->ipv4.ip_rt_min_pmtu) { lock = true; - mtu = min(old_mtu, ip_rt_min_pmtu); + mtu = min(old_mtu, net->ipv4.ip_rt_min_pmtu); } if (rt->rt_pmtu == mtu && !lock && @@ -3541,14 +3542,6 @@ static struct ctl_table ipv4_route_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .procname = "min_pmtu", - .data = &ip_rt_min_pmtu, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &ip_min_valid_pmtu, - }, { .procname = "min_adv_mss", .data = &ip_rt_min_advmss, @@ -3561,13 +3554,21 @@ static struct ctl_table ipv4_route_table[] = { static const char ipv4_route_flush_procname[] = "flush"; -static struct ctl_table ipv4_route_flush_table[] = { +static struct ctl_table ipv4_route_netns_table[] = { { .procname = ipv4_route_flush_procname, .maxlen = sizeof(int), .mode = 0200, .proc_handler = ipv4_sysctl_rtcache_flush, }, + { + .procname = "min_pmtu", + .data = &init_net.ipv4.ip_rt_min_pmtu, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &ip_min_valid_pmtu, + }, { }, }; @@ -3575,9 +3576,11 @@ static __net_init int sysctl_route_net_init(struct net *net) { struct ctl_table *tbl; - tbl = ipv4_route_flush_table; + tbl = ipv4_route_netns_table; if (!net_eq(net, &init_net)) { - tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL); + int i; + + tbl = kmemdup(tbl, sizeof(ipv4_route_netns_table), GFP_KERNEL); if (!tbl) goto err_dup; @@ -3586,6 +3589,12 @@ static __net_init int sysctl_route_net_init(struct net *net) if (tbl[0].procname != ipv4_route_flush_procname) tbl[0].procname = NULL; } + + /* Update the variables to point into the current struct net + * except for the first element flush + */ + for (i = 1; i < ARRAY_SIZE(ipv4_route_netns_table) - 1; i++) + tbl[i].data += (void *)net - (void *)&init_net; } tbl[0].extra1 = net; @@ -3595,7 +3604,7 @@ static __net_init int sysctl_route_net_init(struct net *net) return 0; err_reg: - if (tbl != ipv4_route_flush_table) + if (tbl != ipv4_route_netns_table) kfree(tbl); err_dup: return -ENOMEM; @@ -3607,7 +3616,7 @@ static __net_exit void sysctl_route_net_exit(struct net *net) tbl = net->ipv4.route_hdr->ctl_table_arg; unregister_net_sysctl_table(net->ipv4.route_hdr); - BUG_ON(tbl == ipv4_route_flush_table); + BUG_ON(tbl == ipv4_route_netns_table); kfree(tbl); } @@ -3617,6 +3626,17 @@ static __net_initdata struct pernet_operations sysctl_route_ops = { }; #endif +static __net_init int netns_ip_rt_init(struct net *net) +{ + /* Set default value for namespaceified sysctls */ + net->ipv4.ip_rt_min_pmtu = DEFAULT_MIN_PMTU; + return 0; +} + +static struct pernet_operations __net_initdata ip_rt_ops = { + .init = netns_ip_rt_init, +}; + static __net_init int rt_genid_init(struct net *net) { atomic_set(&net->ipv4.rt_genid, 0); @@ -3722,6 +3742,7 @@ int __init ip_rt_init(void) #ifdef CONFIG_SYSCTL register_pernet_subsys(&sysctl_route_ops); #endif + register_pernet_subsys(&ip_rt_ops); register_pernet_subsys(&rt_genid_ops); register_pernet_subsys(&ipv4_inetpeer_ops); return 0; From 1135fad204805518462c1f0caaca6bcd52ba78cf Mon Sep 17 00:00:00 2001 From: xu xin Date: Tue, 4 Jan 2022 10:59:47 +0000 Subject: [PATCH 2/2] Namespaceify mtu_expires sysctl This patch enables the sysctl mtu_expires to be configured per net namespace. Signed-off-by: xu xin Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + net/ipv4/route.c | 21 +++++++++++---------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 1ecbf82b07f1..78557643526e 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -86,6 +86,7 @@ struct netns_ipv4 { int sysctl_icmp_ratemask; u32 ip_rt_min_pmtu; + int ip_rt_mtu_expires; struct local_ports ip_local_ports; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f29637e85c05..ff6f91cdb6c4 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -111,6 +111,7 @@ #define RT_GC_TIMEOUT (300*HZ) #define DEFAULT_MIN_PMTU (512 + 20 + 20) +#define DEFAULT_MTU_EXPIRES (10 * 60 * HZ) static int ip_rt_max_size; static int ip_rt_redirect_number __read_mostly = 9; @@ -118,7 +119,6 @@ static int ip_rt_redirect_load __read_mostly = HZ / 50; static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1)); static int ip_rt_error_cost __read_mostly = HZ; static int ip_rt_error_burst __read_mostly = 5 * HZ; -static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; static int ip_rt_min_advmss __read_mostly = 256; static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; @@ -1025,7 +1025,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) } if (rt->rt_pmtu == mtu && !lock && - time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2)) + time_before(jiffies, dst->expires - net->ipv4.ip_rt_mtu_expires / 2)) return; rcu_read_lock(); @@ -1035,7 +1035,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) fib_select_path(net, &res, fl4, NULL); nhc = FIB_RES_NHC(res); update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock, - jiffies + ip_rt_mtu_expires); + jiffies + net->ipv4.ip_rt_mtu_expires); } rcu_read_unlock(); } @@ -3535,13 +3535,6 @@ static struct ctl_table ipv4_route_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { - .procname = "mtu_expires", - .data = &ip_rt_mtu_expires, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, { .procname = "min_adv_mss", .data = &ip_rt_min_advmss, @@ -3569,6 +3562,13 @@ static struct ctl_table ipv4_route_netns_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = &ip_min_valid_pmtu, }, + { + .procname = "mtu_expires", + .data = &init_net.ipv4.ip_rt_mtu_expires, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, { }, }; @@ -3630,6 +3630,7 @@ static __net_init int netns_ip_rt_init(struct net *net) { /* Set default value for namespaceified sysctls */ net->ipv4.ip_rt_min_pmtu = DEFAULT_MIN_PMTU; + net->ipv4.ip_rt_mtu_expires = DEFAULT_MTU_EXPIRES; return 0; }