linux-stable/net/netfilter/ipvs/ip_vs_sched.c
Davide Caratti 62931f59ce ipvs: don't ignore errors in case refcounting ip_vs module fails
if the IPVS module is removed while the sync daemon is starting, there is
a small gap where try_module_get() might fail getting the refcount inside
ip_vs_use_count_inc(). Then, the refcounts of IPVS module are unbalanced,
and the subsequent call to stop_sync_thread() causes the following splat:

 WARNING: CPU: 0 PID: 4013 at kernel/module.c:1146 module_put.part.44+0x15b/0x290
  Modules linked in: ip_vs(-) nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 veth ip6table_filter ip6_tables iptable_filter binfmt_misc intel_rapl_msr intel_rapl_common crct10dif_pclmul crc32_pclmul ext4 mbcache jbd2 ghash_clmulni_intel snd_hda_codec_generic ledtrig_audio snd_hda_intel snd_intel_nhlt snd_hda_codec snd_hda_core snd_hwdep snd_seq snd_seq_device snd_pcm aesni_intel crypto_simd cryptd glue_helper joydev pcspkr snd_timer virtio_balloon snd soundcore i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c ata_generic pata_acpi virtio_net net_failover virtio_blk failover virtio_console qxl drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ata_piix ttm crc32c_intel serio_raw drm virtio_pci libata virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod [last unloaded: nf_defrag_ipv6]
  CPU: 0 PID: 4013 Comm: modprobe Tainted: G        W         5.4.0-rc1.upstream+ #741
  Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
  RIP: 0010:module_put.part.44+0x15b/0x290
  Code: 04 25 28 00 00 00 0f 85 18 01 00 00 48 83 c4 68 5b 5d 41 5c 41 5d 41 5e 41 5f c3 89 44 24 28 83 e8 01 89 c5 0f 89 57 ff ff ff <0f> 0b e9 78 ff ff ff 65 8b 1d 67 83 26 4a 89 db be 08 00 00 00 48
  RSP: 0018:ffff888050607c78 EFLAGS: 00010297
  RAX: 0000000000000003 RBX: ffffffffc1420590 RCX: ffffffffb5db0ef9
  RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffffffffc1420590
  RBP: 00000000ffffffff R08: fffffbfff82840b3 R09: fffffbfff82840b3
  R10: 0000000000000001 R11: fffffbfff82840b2 R12: 1ffff1100a0c0f90
  R13: ffffffffc1420200 R14: ffff88804f533300 R15: ffff88804f533ca0
  FS:  00007f8ea9720740(0000) GS:ffff888053800000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007f3245abe000 CR3: 000000004c28a006 CR4: 00000000001606f0
  Call Trace:
   stop_sync_thread+0x3a3/0x7c0 [ip_vs]
   ip_vs_sync_net_cleanup+0x13/0x50 [ip_vs]
   ops_exit_list.isra.5+0x94/0x140
   unregister_pernet_operations+0x29d/0x460
   unregister_pernet_device+0x26/0x60
   ip_vs_cleanup+0x11/0x38 [ip_vs]
   __x64_sys_delete_module+0x2d5/0x400
   do_syscall_64+0xa5/0x4e0
   entry_SYSCALL_64_after_hwframe+0x49/0xbe
  RIP: 0033:0x7f8ea8bf0db7
  Code: 73 01 c3 48 8b 0d b9 80 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 89 80 2c 00 f7 d8 64 89 01 48
  RSP: 002b:00007ffcd38d2fe8 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
  RAX: ffffffffffffffda RBX: 0000000002436240 RCX: 00007f8ea8bf0db7
  RDX: 0000000000000000 RSI: 0000000000000800 RDI: 00000000024362a8
  RBP: 0000000000000000 R08: 00007f8ea8eba060 R09: 00007f8ea8c658a0
  R10: 00007ffcd38d2a60 R11: 0000000000000206 R12: 0000000000000000
  R13: 0000000000000001 R14: 00000000024362a8 R15: 0000000000000000
  irq event stamp: 4538
  hardirqs last  enabled at (4537): [<ffffffffb6193dde>] quarantine_put+0x9e/0x170
  hardirqs last disabled at (4538): [<ffffffffb5a0556a>] trace_hardirqs_off_thunk+0x1a/0x20
  softirqs last  enabled at (4522): [<ffffffffb6f8ebe9>] sk_common_release+0x169/0x2d0
  softirqs last disabled at (4520): [<ffffffffb6f8eb3e>] sk_common_release+0xbe/0x2d0

Check the return value of ip_vs_use_count_inc() and let its caller return
proper error. Inside do_ip_vs_set_ctl() the module is already refcounted,
we don't need refcount/derefcount there. Finally, in register_ip_vs_app()
and start_sync_thread(), take the module refcount earlier and ensure it's
released in the error path.

Change since v1:
 - better return values in case of failure of ip_vs_use_count_inc(),
   thanks to Julian Anastasov
 - no need to increase/decrease the module refcount in ip_vs_set_ctl(),
   thanks to Julian Anastasov

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
2019-10-24 11:53:19 +02:00

250 lines
5.6 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* IPVS An implementation of the IP virtual server support for the
* LINUX operating system. IPVS is now implemented as a module
* over the Netfilter framework. IPVS can be used to build a
* high-performance and highly available server based on a
* cluster of servers.
*
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Peter Kese <peter.kese@ijs.si>
*
* Changes:
*/
#define KMSG_COMPONENT "IPVS"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
#include <asm/string.h>
#include <linux/kmod.h>
#include <linux/sysctl.h>
#include <net/ip_vs.h>
EXPORT_SYMBOL(ip_vs_scheduler_err);
/*
* IPVS scheduler list
*/
static LIST_HEAD(ip_vs_schedulers);
/* semaphore for schedulers */
static DEFINE_MUTEX(ip_vs_sched_mutex);
/*
* Bind a service with a scheduler
*/
int ip_vs_bind_scheduler(struct ip_vs_service *svc,
struct ip_vs_scheduler *scheduler)
{
int ret;
if (scheduler->init_service) {
ret = scheduler->init_service(svc);
if (ret) {
pr_err("%s(): init error\n", __func__);
return ret;
}
}
rcu_assign_pointer(svc->scheduler, scheduler);
return 0;
}
/*
* Unbind a service with its scheduler
*/
void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
struct ip_vs_scheduler *sched)
{
struct ip_vs_scheduler *cur_sched;
cur_sched = rcu_dereference_protected(svc->scheduler, 1);
/* This check proves that old 'sched' was installed */
if (!cur_sched)
return;
if (sched->done_service)
sched->done_service(svc);
/* svc->scheduler can be set to NULL only by caller */
}
/*
* Get scheduler in the scheduler list by name
*/
static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
{
struct ip_vs_scheduler *sched;
IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name);
mutex_lock(&ip_vs_sched_mutex);
list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
/*
* Test and get the modules atomically
*/
if (sched->module && !try_module_get(sched->module)) {
/*
* This scheduler is just deleted
*/
continue;
}
if (strcmp(sched_name, sched->name)==0) {
/* HIT */
mutex_unlock(&ip_vs_sched_mutex);
return sched;
}
module_put(sched->module);
}
mutex_unlock(&ip_vs_sched_mutex);
return NULL;
}
/*
* Lookup scheduler and try to load it if it doesn't exist
*/
struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name)
{
struct ip_vs_scheduler *sched;
/*
* Search for the scheduler by sched_name
*/
sched = ip_vs_sched_getbyname(sched_name);
/*
* If scheduler not found, load the module and search again
*/
if (sched == NULL) {
request_module("ip_vs_%s", sched_name);
sched = ip_vs_sched_getbyname(sched_name);
}
return sched;
}
void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
{
if (scheduler)
module_put(scheduler->module);
}
/*
* Common error output helper for schedulers
*/
void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg)
{
struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
char *sched_name = sched ? sched->name : "none";
if (svc->fwmark) {
IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n",
sched_name, svc->fwmark, svc->fwmark, msg);
#ifdef CONFIG_IP_VS_IPV6
} else if (svc->af == AF_INET6) {
IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n",
sched_name, ip_vs_proto_name(svc->protocol),
&svc->addr.in6, ntohs(svc->port), msg);
#endif
} else {
IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n",
sched_name, ip_vs_proto_name(svc->protocol),
&svc->addr.ip, ntohs(svc->port), msg);
}
}
/*
* Register a scheduler in the scheduler list
*/
int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
{
struct ip_vs_scheduler *sched;
if (!scheduler) {
pr_err("%s(): NULL arg\n", __func__);
return -EINVAL;
}
if (!scheduler->name) {
pr_err("%s(): NULL scheduler_name\n", __func__);
return -EINVAL;
}
/* increase the module use count */
if (!ip_vs_use_count_inc())
return -ENOENT;
mutex_lock(&ip_vs_sched_mutex);
if (!list_empty(&scheduler->n_list)) {
mutex_unlock(&ip_vs_sched_mutex);
ip_vs_use_count_dec();
pr_err("%s(): [%s] scheduler already linked\n",
__func__, scheduler->name);
return -EINVAL;
}
/*
* Make sure that the scheduler with this name doesn't exist
* in the scheduler list.
*/
list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
if (strcmp(scheduler->name, sched->name) == 0) {
mutex_unlock(&ip_vs_sched_mutex);
ip_vs_use_count_dec();
pr_err("%s(): [%s] scheduler already existed "
"in the system\n", __func__, scheduler->name);
return -EINVAL;
}
}
/*
* Add it into the d-linked scheduler list
*/
list_add(&scheduler->n_list, &ip_vs_schedulers);
mutex_unlock(&ip_vs_sched_mutex);
pr_info("[%s] scheduler registered.\n", scheduler->name);
return 0;
}
/*
* Unregister a scheduler from the scheduler list
*/
int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
{
if (!scheduler) {
pr_err("%s(): NULL arg\n", __func__);
return -EINVAL;
}
mutex_lock(&ip_vs_sched_mutex);
if (list_empty(&scheduler->n_list)) {
mutex_unlock(&ip_vs_sched_mutex);
pr_err("%s(): [%s] scheduler is not in the list. failed\n",
__func__, scheduler->name);
return -EINVAL;
}
/*
* Remove it from the d-linked scheduler list
*/
list_del(&scheduler->n_list);
mutex_unlock(&ip_vs_sched_mutex);
/* decrease the module use count */
ip_vs_use_count_dec();
pr_info("[%s] scheduler unregistered.\n", scheduler->name);
return 0;
}