linux-stable/include/linux/posix-timers.h
Frederic Weisbecker ee375328f5 posix-cpu-timers: Recalc next expiration when timer_settime() ends up not queueing
There are several scenarios that can result in posix_cpu_timer_set()
not queueing the timer but still leaving the threadgroup cputime counter
running or keeping the tick dependency around for a random amount of time.

1) If timer_settime() is called with a 0 expiration on a timer that is
   already disabled, the process wide cputime counter will be started
   and won't ever get a chance to be stopped by stop_process_timer()
   since no timer is actually armed to be processed.

   The following snippet is enough to trigger the issue.

	void trigger_process_counter(void)
	{
		timer_t id;
		struct itimerspec val = { };

		timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id);
		timer_settime(id, TIMER_ABSTIME, &val, NULL);
		timer_delete(id);
	}

2) If timer_settime() is called with a 0 expiration on a timer that is
   already armed, the timer is dequeued but not really disarmed. So the
   process wide cputime counter and the tick dependency may still remain
   a while around.

   The following code snippet keeps this overhead around for one week after
   the timer deletion:

	void trigger_process_counter(void)
	{
		timer_t id;
		struct itimerspec val = { };

		val.it_value.tv_sec = 604800;
		timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id);
		timer_settime(id, 0, &val, NULL);
		timer_delete(id);
	}

3) If the timer was initially deactivated, this call to timer_settime()
   with an early expiration may have started the process wide cputime
   counter even though the timer hasn't been queued and armed because it
   has fired early and inline within posix_cpu_timer_set() itself. As a
   result the process wide cputime counter may never stop until a new
   timer is ever armed in the future.

   The following code snippet can reproduce this:

	void trigger_process_counter(void)
	{
		timer_t id;
		struct itimerspec val = { };

		signal(SIGALRM, SIG_IGN);
		timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id);
		val.it_value.tv_nsec = 1;
		timer_settime(id, TIMER_ABSTIME, &val, NULL);
	}

4) If the timer was initially armed with a former expiration value
   before this call to timer_settime() and the current call sets an
   early deadline that has already expired, the timer fires inline
   within posix_cpu_timer_set(). In this case it must have been dequeued
   before firing inline with its new expiration value, yet it hasn't
   been disarmed in this case. So the process wide cputime counter and
   the tick dependency may still be around for a while even after the
   timer fired.

   The following code snippet can reproduce this:

	void trigger_process_counter(void)
	{
		timer_t id;
		struct itimerspec val = { };

		signal(SIGALRM, SIG_IGN);
		timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id);
		val.it_value.tv_sec = 100;
		timer_settime(id, TIMER_ABSTIME, &val, NULL);
		val.it_value.tv_sec = 0;
		val.it_value.tv_nsec = 1;
		timer_settime(id, TIMER_ABSTIME, &val, NULL);
	}

Fix all these issues with triggering the related base next expiration
recalculation on the next tick. This also implies to re-evaluate the need
to keep around the process wide cputime counter and the tick dependency, in
a similar fashion to disarm_timer().

Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210726125513.271824-7-frederic@kernel.org
2021-08-10 17:09:59 +02:00

257 lines
6.9 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _linux_POSIX_TIMERS_H
#define _linux_POSIX_TIMERS_H
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/alarmtimer.h>
#include <linux/timerqueue.h>
#include <linux/task_work.h>
struct kernel_siginfo;
struct task_struct;
/*
* Bit fields within a clockid:
*
* The most significant 29 bits hold either a pid or a file descriptor.
*
* Bit 2 indicates whether a cpu clock refers to a thread or a process.
*
* Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or FD=3.
*
* A clockid is invalid if bits 2, 1, and 0 are all set.
*/
#define CPUCLOCK_PID(clock) ((pid_t) ~((clock) >> 3))
#define CPUCLOCK_PERTHREAD(clock) \
(((clock) & (clockid_t) CPUCLOCK_PERTHREAD_MASK) != 0)
#define CPUCLOCK_PERTHREAD_MASK 4
#define CPUCLOCK_WHICH(clock) ((clock) & (clockid_t) CPUCLOCK_CLOCK_MASK)
#define CPUCLOCK_CLOCK_MASK 3
#define CPUCLOCK_PROF 0
#define CPUCLOCK_VIRT 1
#define CPUCLOCK_SCHED 2
#define CPUCLOCK_MAX 3
#define CLOCKFD CPUCLOCK_MAX
#define CLOCKFD_MASK (CPUCLOCK_PERTHREAD_MASK|CPUCLOCK_CLOCK_MASK)
static inline clockid_t make_process_cpuclock(const unsigned int pid,
const clockid_t clock)
{
return ((~pid) << 3) | clock;
}
static inline clockid_t make_thread_cpuclock(const unsigned int tid,
const clockid_t clock)
{
return make_process_cpuclock(tid, clock | CPUCLOCK_PERTHREAD_MASK);
}
static inline clockid_t fd_to_clockid(const int fd)
{
return make_process_cpuclock((unsigned int) fd, CLOCKFD);
}
static inline int clockid_to_fd(const clockid_t clk)
{
return ~(clk >> 3);
}
#ifdef CONFIG_POSIX_TIMERS
/**
* cpu_timer - Posix CPU timer representation for k_itimer
* @node: timerqueue node to queue in the task/sig
* @head: timerqueue head on which this timer is queued
* @task: Pointer to target task
* @elist: List head for the expiry list
* @firing: Timer is currently firing
*/
struct cpu_timer {
struct timerqueue_node node;
struct timerqueue_head *head;
struct pid *pid;
struct list_head elist;
int firing;
};
static inline bool cpu_timer_enqueue(struct timerqueue_head *head,
struct cpu_timer *ctmr)
{
ctmr->head = head;
return timerqueue_add(head, &ctmr->node);
}
static inline bool cpu_timer_queued(struct cpu_timer *ctmr)
{
return !!ctmr->head;
}
static inline bool cpu_timer_dequeue(struct cpu_timer *ctmr)
{
if (cpu_timer_queued(ctmr)) {
timerqueue_del(ctmr->head, &ctmr->node);
ctmr->head = NULL;
return true;
}
return false;
}
static inline u64 cpu_timer_getexpires(struct cpu_timer *ctmr)
{
return ctmr->node.expires;
}
static inline void cpu_timer_setexpires(struct cpu_timer *ctmr, u64 exp)
{
ctmr->node.expires = exp;
}
/**
* posix_cputimer_base - Container per posix CPU clock
* @nextevt: Earliest-expiration cache
* @tqhead: timerqueue head for cpu_timers
*/
struct posix_cputimer_base {
u64 nextevt;
struct timerqueue_head tqhead;
};
/**
* posix_cputimers - Container for posix CPU timer related data
* @bases: Base container for posix CPU clocks
* @timers_active: Timers are queued.
* @expiry_active: Timer expiry is active. Used for
* process wide timers to avoid multiple
* task trying to handle expiry concurrently
*
* Used in task_struct and signal_struct
*/
struct posix_cputimers {
struct posix_cputimer_base bases[CPUCLOCK_MAX];
unsigned int timers_active;
unsigned int expiry_active;
};
/**
* posix_cputimers_work - Container for task work based posix CPU timer expiry
* @work: The task work to be scheduled
* @scheduled: @work has been scheduled already, no further processing
*/
struct posix_cputimers_work {
struct callback_head work;
unsigned int scheduled;
};
static inline void posix_cputimers_init(struct posix_cputimers *pct)
{
memset(pct, 0, sizeof(*pct));
pct->bases[0].nextevt = U64_MAX;
pct->bases[1].nextevt = U64_MAX;
pct->bases[2].nextevt = U64_MAX;
}
void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit);
static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct,
u64 runtime)
{
pct->bases[CPUCLOCK_SCHED].nextevt = runtime;
}
/* Init task static initializer */
#define INIT_CPU_TIMERBASE(b) { \
.nextevt = U64_MAX, \
}
#define INIT_CPU_TIMERBASES(b) { \
INIT_CPU_TIMERBASE(b[0]), \
INIT_CPU_TIMERBASE(b[1]), \
INIT_CPU_TIMERBASE(b[2]), \
}
#define INIT_CPU_TIMERS(s) \
.posix_cputimers = { \
.bases = INIT_CPU_TIMERBASES(s.posix_cputimers.bases), \
},
#else
struct posix_cputimers { };
struct cpu_timer { };
#define INIT_CPU_TIMERS(s)
static inline void posix_cputimers_init(struct posix_cputimers *pct) { }
static inline void posix_cputimers_group_init(struct posix_cputimers *pct,
u64 cpu_limit) { }
#endif
#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
void posix_cputimers_init_work(void);
#else
static inline void posix_cputimers_init_work(void) { }
#endif
#define REQUEUE_PENDING 1
/**
* struct k_itimer - POSIX.1b interval timer structure.
* @list: List head for binding the timer to signals->posix_timers
* @t_hash: Entry in the posix timer hash table
* @it_lock: Lock protecting the timer
* @kclock: Pointer to the k_clock struct handling this timer
* @it_clock: The posix timer clock id
* @it_id: The posix timer id for identifying the timer
* @it_active: Marker that timer is active
* @it_overrun: The overrun counter for pending signals
* @it_overrun_last: The overrun at the time of the last delivered signal
* @it_requeue_pending: Indicator that timer waits for being requeued on
* signal delivery
* @it_sigev_notify: The notify word of sigevent struct for signal delivery
* @it_interval: The interval for periodic timers
* @it_signal: Pointer to the creators signal struct
* @it_pid: The pid of the process/task targeted by the signal
* @it_process: The task to wakeup on clock_nanosleep (CPU timers)
* @sigq: Pointer to preallocated sigqueue
* @it: Union representing the various posix timer type
* internals.
* @rcu: RCU head for freeing the timer.
*/
struct k_itimer {
struct list_head list;
struct hlist_node t_hash;
spinlock_t it_lock;
const struct k_clock *kclock;
clockid_t it_clock;
timer_t it_id;
int it_active;
s64 it_overrun;
s64 it_overrun_last;
int it_requeue_pending;
int it_sigev_notify;
ktime_t it_interval;
struct signal_struct *it_signal;
union {
struct pid *it_pid;
struct task_struct *it_process;
};
struct sigqueue *sigq;
union {
struct {
struct hrtimer timer;
} real;
struct cpu_timer cpu;
struct {
struct alarm alarmtimer;
} alarm;
} it;
struct rcu_head rcu;
};
void run_posix_cpu_timers(void);
void posix_cpu_timers_exit(struct task_struct *task);
void posix_cpu_timers_exit_group(struct task_struct *task);
void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
u64 *newval, u64 *oldval);
void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new);
void posixtimer_rearm(struct kernel_siginfo *info);
#endif