linux-stable/include/linux/percpu_counter.h
Jiebin Sun 5d0ce3595a percpu: add percpu_counter_add_local and percpu_counter_sub_local
Patch series "/msg: mitigate the lock contention in ipc/msg", v6.

Here are two patches to mitigate the lock contention in ipc/msg.

The 1st patch is to add the new interface percpu_counter_add_local and
percpu_counter_sub_local.  The batch size in percpu_counter_add_batch
should be very large in heavy writing and rare reading case.  Add the
"_local" version, and mostly it will do local adding, reduce the global
updating and mitigate lock contention in writing.

The 2nd patch is to use percpu_counter instead of atomic update in
ipc/msg.  The msg_bytes and msg_hdrs atomic counters are frequently
updated when IPC msg queue is in heavy use, causing heavy cache bounce and
overhead.  Change them to percpu_counter greatly improve the performance. 
Since there is one percpu struct per namespace, additional memory cost is
minimal.  Reading of the count done in msgctl call, which is infrequent. 
So the need to sum up the counts in each CPU is infrequent.


This patch (of 2):

The batch size in percpu_counter_add_batch should be very large in
heavy writing and rare reading case. Add the "_local" version, and
mostly it will do local adding, reduce the global updating and
mitigate lock contention in writing.

Link: https://lkml.kernel.org/r/20220913192538.3023708-1-jiebin.sun@intel.com
Link: https://lkml.kernel.org/r/20220913192538.3023708-2-jiebin.sun@intel.com
Signed-off-by: Jiebin Sun <jiebin.sun@intel.com>
Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
Cc: Alexey Gladkov <legion@kernel.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vasily Averin <vasily.averin@linux.dev>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2022-10-03 14:21:43 -07:00

228 lines
5.5 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_PERCPU_COUNTER_H
#define _LINUX_PERCPU_COUNTER_H
/*
* A simple "approximate counter" for use in ext2 and ext3 superblocks.
*
* WARNING: these things are HUGE. 4 kbytes per counter on 32-way P4.
*/
#include <linux/spinlock.h>
#include <linux/smp.h>
#include <linux/list.h>
#include <linux/threads.h>
#include <linux/percpu.h>
#include <linux/types.h>
#include <linux/gfp.h>
/* percpu_counter batch for local add or sub */
#define PERCPU_COUNTER_LOCAL_BATCH INT_MAX
#ifdef CONFIG_SMP
struct percpu_counter {
raw_spinlock_t lock;
s64 count;
#ifdef CONFIG_HOTPLUG_CPU
struct list_head list; /* All percpu_counters are on a list */
#endif
s32 __percpu *counters;
};
extern int percpu_counter_batch;
int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp,
struct lock_class_key *key);
#define percpu_counter_init(fbc, value, gfp) \
({ \
static struct lock_class_key __key; \
\
__percpu_counter_init(fbc, value, gfp, &__key); \
})
void percpu_counter_destroy(struct percpu_counter *fbc);
void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount,
s32 batch);
s64 __percpu_counter_sum(struct percpu_counter *fbc);
int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch);
void percpu_counter_sync(struct percpu_counter *fbc);
static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
{
return __percpu_counter_compare(fbc, rhs, percpu_counter_batch);
}
static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
{
percpu_counter_add_batch(fbc, amount, percpu_counter_batch);
}
/*
* With percpu_counter_add_local() and percpu_counter_sub_local(), counts
* are accumulated in local per cpu counter and not in fbc->count until
* local count overflows PERCPU_COUNTER_LOCAL_BATCH. This makes counter
* write efficient.
* But percpu_counter_sum(), instead of percpu_counter_read(), needs to be
* used to add up the counts from each CPU to account for all the local
* counts. So percpu_counter_add_local() and percpu_counter_sub_local()
* should be used when a counter is updated frequently and read rarely.
*/
static inline void
percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
{
percpu_counter_add_batch(fbc, amount, PERCPU_COUNTER_LOCAL_BATCH);
}
static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
{
s64 ret = __percpu_counter_sum(fbc);
return ret < 0 ? 0 : ret;
}
static inline s64 percpu_counter_sum(struct percpu_counter *fbc)
{
return __percpu_counter_sum(fbc);
}
static inline s64 percpu_counter_read(struct percpu_counter *fbc)
{
return fbc->count;
}
/*
* It is possible for the percpu_counter_read() to return a small negative
* number for some counter which should never be negative.
*
*/
static inline s64 percpu_counter_read_positive(struct percpu_counter *fbc)
{
/* Prevent reloads of fbc->count */
s64 ret = READ_ONCE(fbc->count);
if (ret >= 0)
return ret;
return 0;
}
static inline bool percpu_counter_initialized(struct percpu_counter *fbc)
{
return (fbc->counters != NULL);
}
#else /* !CONFIG_SMP */
struct percpu_counter {
s64 count;
};
static inline int percpu_counter_init(struct percpu_counter *fbc, s64 amount,
gfp_t gfp)
{
fbc->count = amount;
return 0;
}
static inline void percpu_counter_destroy(struct percpu_counter *fbc)
{
}
static inline void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
{
fbc->count = amount;
}
static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
{
if (fbc->count > rhs)
return 1;
else if (fbc->count < rhs)
return -1;
else
return 0;
}
static inline int
__percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch)
{
return percpu_counter_compare(fbc, rhs);
}
static inline void
percpu_counter_add(struct percpu_counter *fbc, s64 amount)
{
preempt_disable();
fbc->count += amount;
preempt_enable();
}
/* non-SMP percpu_counter_add_local is the same with percpu_counter_add */
static inline void
percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
{
percpu_counter_add(fbc, amount);
}
static inline void
percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
{
percpu_counter_add(fbc, amount);
}
static inline s64 percpu_counter_read(struct percpu_counter *fbc)
{
return fbc->count;
}
/*
* percpu_counter is intended to track positive numbers. In the UP case the
* number should never be negative.
*/
static inline s64 percpu_counter_read_positive(struct percpu_counter *fbc)
{
return fbc->count;
}
static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
{
return percpu_counter_read_positive(fbc);
}
static inline s64 percpu_counter_sum(struct percpu_counter *fbc)
{
return percpu_counter_read(fbc);
}
static inline bool percpu_counter_initialized(struct percpu_counter *fbc)
{
return true;
}
static inline void percpu_counter_sync(struct percpu_counter *fbc)
{
}
#endif /* CONFIG_SMP */
static inline void percpu_counter_inc(struct percpu_counter *fbc)
{
percpu_counter_add(fbc, 1);
}
static inline void percpu_counter_dec(struct percpu_counter *fbc)
{
percpu_counter_add(fbc, -1);
}
static inline void percpu_counter_sub(struct percpu_counter *fbc, s64 amount)
{
percpu_counter_add(fbc, -amount);
}
static inline void
percpu_counter_sub_local(struct percpu_counter *fbc, s64 amount)
{
percpu_counter_add_local(fbc, -amount);
}
#endif /* _LINUX_PERCPU_COUNTER_H */