once: add DO_ONCE_SLOW() for sleepable contexts

[ Upstream commit 62c07983be ]

Christophe Leroy reported a ~80ms latency spike
happening at first TCP connect() time.

This is because __inet_hash_connect() uses get_random_once()
to populate a perturbation table which became quite big
after commit 4c2c8f03a5 ("tcp: increase source port perturb table to 2^16")

get_random_once() uses DO_ONCE(), which block hard irqs for the duration
of the operation.

This patch adds DO_ONCE_SLOW() which uses a mutex instead of a spinlock
for operations where we prefer to stay in process context.

Then __inet_hash_connect() can use get_random_slow_once()
to populate its perturbation table.

Fixes: 4c2c8f03a5 ("tcp: increase source port perturb table to 2^16")
Fixes: 190cc82489 ("tcp: change source port randomizarion at connect() time")
Reported-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Link: https://lore.kernel.org/netdev/CANn89iLAEYBaoYajy0Y9UmGFff5GPxDUoG-ErVB2jDdRNQ5Tug@mail.gmail.com/T/#t
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Willy Tarreau <w@1wt.eu>
Tested-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
Eric Dumazet 2022-10-01 13:51:02 -07:00 committed by Greg Kroah-Hartman
parent 77bfd26cbb
commit 7aef5082c5
3 changed files with 60 additions and 2 deletions

View File

@ -5,10 +5,18 @@
#include <linux/types.h>
#include <linux/jump_label.h>
/* Helpers used from arbitrary contexts.
* Hard irqs are blocked, be cautious.
*/
bool __do_once_start(bool *done, unsigned long *flags);
void __do_once_done(bool *done, struct static_key_true *once_key,
unsigned long *flags, struct module *mod);
/* Variant for process contexts only. */
bool __do_once_slow_start(bool *done);
void __do_once_slow_done(bool *done, struct static_key_true *once_key,
struct module *mod);
/* Call a function exactly once. The idea of DO_ONCE() is to perform
* a function call such as initialization of random seeds, etc, only
* once, where DO_ONCE() can live in the fast-path. After @func has
@ -52,9 +60,29 @@ void __do_once_done(bool *done, struct static_key_true *once_key,
___ret; \
})
/* Variant of DO_ONCE() for process/sleepable contexts. */
#define DO_ONCE_SLOW(func, ...) \
({ \
bool ___ret = false; \
static bool __section(".data.once") ___done = false; \
static DEFINE_STATIC_KEY_TRUE(___once_key); \
if (static_branch_unlikely(&___once_key)) { \
___ret = __do_once_slow_start(&___done); \
if (unlikely(___ret)) { \
func(__VA_ARGS__); \
__do_once_slow_done(&___done, &___once_key, \
THIS_MODULE); \
} \
} \
___ret; \
})
#define get_random_once(buf, nbytes) \
DO_ONCE(get_random_bytes, (buf), (nbytes))
#define get_random_once_wait(buf, nbytes) \
DO_ONCE(get_random_bytes_wait, (buf), (nbytes)) \
#define get_random_slow_once(buf, nbytes) \
DO_ONCE_SLOW(get_random_bytes, (buf), (nbytes))
#endif /* _LINUX_ONCE_H */

View File

@ -66,3 +66,33 @@ void __do_once_done(bool *done, struct static_key_true *once_key,
once_disable_jump(once_key, mod);
}
EXPORT_SYMBOL(__do_once_done);
static DEFINE_MUTEX(once_mutex);
bool __do_once_slow_start(bool *done)
__acquires(once_mutex)
{
mutex_lock(&once_mutex);
if (*done) {
mutex_unlock(&once_mutex);
/* Keep sparse happy by restoring an even lock count on
* this mutex. In case we return here, we don't call into
* __do_once_done but return early in the DO_ONCE_SLOW() macro.
*/
__acquire(once_mutex);
return false;
}
return true;
}
EXPORT_SYMBOL(__do_once_slow_start);
void __do_once_slow_done(bool *done, struct static_key_true *once_key,
struct module *mod)
__releases(once_mutex)
{
*done = true;
mutex_unlock(&once_mutex);
once_disable_jump(once_key, mod);
}
EXPORT_SYMBOL(__do_once_slow_done);

View File

@ -771,8 +771,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
if (likely(remaining > 1))
remaining &= ~1U;
net_get_random_once(table_perturb,
INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb));
get_random_slow_once(table_perturb,
INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb));
index = port_offset & (INET_TABLE_PERTURB_SIZE - 1);
offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32);