linux-stable/include/linux/hrtimer.h
Thomas Gleixner 5c0930ccaa hrtimers: Push pending hrtimers away from outgoing CPU earlier
2b8272ff4a ("cpu/hotplug: Prevent self deadlock on CPU hot-unplug")
solved the straight forward CPU hotplug deadlock vs. the scheduler
bandwidth timer. Yu discovered a more involved variant where a task which
has a bandwidth timer started on the outgoing CPU holds a lock and then
gets throttled. If the lock required by one of the CPU hotplug callbacks
the hotplug operation deadlocks because the unthrottling timer event is not
handled on the dying CPU and can only be recovered once the control CPU
reaches the hotplug state which pulls the pending hrtimers from the dead
CPU.

Solve this by pushing the hrtimers away from the dying CPU in the dying
callbacks. Nothing can queue a hrtimer on the dying CPU at that point because
all other CPUs spin in stop_machine() with interrupts disabled and once the
operation is finished the CPU is marked offline.

Reported-by: Yu Liao <liaoyu15@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Liu Tie <liutie4@huawei.com>
Link: https://lore.kernel.org/r/87a5rphara.ffs@tglx
2023-11-11 18:06:42 +01:00

539 lines
16 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* hrtimers - High-resolution kernel timers
*
* Copyright(C) 2005, Thomas Gleixner <tglx@linutronix.de>
* Copyright(C) 2005, Red Hat, Inc., Ingo Molnar
*
* data type definitions, declarations, prototypes
*
* Started by: Thomas Gleixner and Ingo Molnar
*/
#ifndef _LINUX_HRTIMER_H
#define _LINUX_HRTIMER_H
#include <linux/hrtimer_defs.h>
#include <linux/rbtree.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/percpu.h>
#include <linux/seqlock.h>
#include <linux/timer.h>
#include <linux/timerqueue.h>
struct hrtimer_clock_base;
struct hrtimer_cpu_base;
/*
* Mode arguments of xxx_hrtimer functions:
*
* HRTIMER_MODE_ABS - Time value is absolute
* HRTIMER_MODE_REL - Time value is relative to now
* HRTIMER_MODE_PINNED - Timer is bound to CPU (is only considered
* when starting the timer)
* HRTIMER_MODE_SOFT - Timer callback function will be executed in
* soft irq context
* HRTIMER_MODE_HARD - Timer callback function will be executed in
* hard irq context even on PREEMPT_RT.
*/
enum hrtimer_mode {
HRTIMER_MODE_ABS = 0x00,
HRTIMER_MODE_REL = 0x01,
HRTIMER_MODE_PINNED = 0x02,
HRTIMER_MODE_SOFT = 0x04,
HRTIMER_MODE_HARD = 0x08,
HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED,
HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED,
HRTIMER_MODE_ABS_SOFT = HRTIMER_MODE_ABS | HRTIMER_MODE_SOFT,
HRTIMER_MODE_REL_SOFT = HRTIMER_MODE_REL | HRTIMER_MODE_SOFT,
HRTIMER_MODE_ABS_PINNED_SOFT = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_SOFT,
HRTIMER_MODE_REL_PINNED_SOFT = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_SOFT,
HRTIMER_MODE_ABS_HARD = HRTIMER_MODE_ABS | HRTIMER_MODE_HARD,
HRTIMER_MODE_REL_HARD = HRTIMER_MODE_REL | HRTIMER_MODE_HARD,
HRTIMER_MODE_ABS_PINNED_HARD = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_HARD,
HRTIMER_MODE_REL_PINNED_HARD = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_HARD,
};
/*
* Return values for the callback function
*/
enum hrtimer_restart {
HRTIMER_NORESTART, /* Timer is not restarted */
HRTIMER_RESTART, /* Timer must be restarted */
};
/*
* Values to track state of the timer
*
* Possible states:
*
* 0x00 inactive
* 0x01 enqueued into rbtree
*
* The callback state is not part of the timer->state because clearing it would
* mean touching the timer after the callback, this makes it impossible to free
* the timer from the callback function.
*
* Therefore we track the callback state in:
*
* timer->base->cpu_base->running == timer
*
* On SMP it is possible to have a "callback function running and enqueued"
* status. It happens for example when a posix timer expired and the callback
* queued a signal. Between dropping the lock which protects the posix timer
* and reacquiring the base lock of the hrtimer, another CPU can deliver the
* signal and rearm the timer.
*
* All state transitions are protected by cpu_base->lock.
*/
#define HRTIMER_STATE_INACTIVE 0x00
#define HRTIMER_STATE_ENQUEUED 0x01
/**
* struct hrtimer - the basic hrtimer structure
* @node: timerqueue node, which also manages node.expires,
* the absolute expiry time in the hrtimers internal
* representation. The time is related to the clock on
* which the timer is based. Is setup by adding
* slack to the _softexpires value. For non range timers
* identical to _softexpires.
* @_softexpires: the absolute earliest expiry time of the hrtimer.
* The time which was given as expiry time when the timer
* was armed.
* @function: timer expiry callback function
* @base: pointer to the timer base (per cpu and per clock)
* @state: state information (See bit values above)
* @is_rel: Set if the timer was armed relative
* @is_soft: Set if hrtimer will be expired in soft interrupt context.
* @is_hard: Set if hrtimer will be expired in hard interrupt context
* even on RT.
*
* The hrtimer structure must be initialized by hrtimer_init()
*/
struct hrtimer {
struct timerqueue_node node;
ktime_t _softexpires;
enum hrtimer_restart (*function)(struct hrtimer *);
struct hrtimer_clock_base *base;
u8 state;
u8 is_rel;
u8 is_soft;
u8 is_hard;
};
/**
* struct hrtimer_sleeper - simple sleeper structure
* @timer: embedded timer structure
* @task: task to wake up
*
* task is set to NULL, when the timer expires.
*/
struct hrtimer_sleeper {
struct hrtimer timer;
struct task_struct *task;
};
#ifdef CONFIG_64BIT
# define __hrtimer_clock_base_align ____cacheline_aligned
#else
# define __hrtimer_clock_base_align
#endif
/**
* struct hrtimer_clock_base - the timer base for a specific clock
* @cpu_base: per cpu clock base
* @index: clock type index for per_cpu support when moving a
* timer to a base on another cpu.
* @clockid: clock id for per_cpu support
* @seq: seqcount around __run_hrtimer
* @running: pointer to the currently running hrtimer
* @active: red black tree root node for the active timers
* @get_time: function to retrieve the current time of the clock
* @offset: offset of this clock to the monotonic base
*/
struct hrtimer_clock_base {
struct hrtimer_cpu_base *cpu_base;
unsigned int index;
clockid_t clockid;
seqcount_raw_spinlock_t seq;
struct hrtimer *running;
struct timerqueue_head active;
ktime_t (*get_time)(void);
ktime_t offset;
} __hrtimer_clock_base_align;
enum hrtimer_base_type {
HRTIMER_BASE_MONOTONIC,
HRTIMER_BASE_REALTIME,
HRTIMER_BASE_BOOTTIME,
HRTIMER_BASE_TAI,
HRTIMER_BASE_MONOTONIC_SOFT,
HRTIMER_BASE_REALTIME_SOFT,
HRTIMER_BASE_BOOTTIME_SOFT,
HRTIMER_BASE_TAI_SOFT,
HRTIMER_MAX_CLOCK_BASES,
};
/**
* struct hrtimer_cpu_base - the per cpu clock bases
* @lock: lock protecting the base and associated clock bases
* and timers
* @cpu: cpu number
* @active_bases: Bitfield to mark bases with active timers
* @clock_was_set_seq: Sequence counter of clock was set events
* @hres_active: State of high resolution mode
* @in_hrtirq: hrtimer_interrupt() is currently executing
* @hang_detected: The last hrtimer interrupt detected a hang
* @softirq_activated: displays, if the softirq is raised - update of softirq
* related settings is not required then.
* @nr_events: Total number of hrtimer interrupt events
* @nr_retries: Total number of hrtimer interrupt retries
* @nr_hangs: Total number of hrtimer interrupt hangs
* @max_hang_time: Maximum time spent in hrtimer_interrupt
* @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are
* expired
* @timer_waiters: A hrtimer_cancel() invocation waits for the timer
* callback to finish.
* @expires_next: absolute time of the next event, is required for remote
* hrtimer enqueue; it is the total first expiry time (hard
* and soft hrtimer are taken into account)
* @next_timer: Pointer to the first expiring timer
* @softirq_expires_next: Time to check, if soft queues needs also to be expired
* @softirq_next_timer: Pointer to the first expiring softirq based timer
* @clock_base: array of clock bases for this cpu
*
* Note: next_timer is just an optimization for __remove_hrtimer().
* Do not dereference the pointer because it is not reliable on
* cross cpu removals.
*/
struct hrtimer_cpu_base {
raw_spinlock_t lock;
unsigned int cpu;
unsigned int active_bases;
unsigned int clock_was_set_seq;
unsigned int hres_active : 1,
in_hrtirq : 1,
hang_detected : 1,
softirq_activated : 1;
#ifdef CONFIG_HIGH_RES_TIMERS
unsigned int nr_events;
unsigned short nr_retries;
unsigned short nr_hangs;
unsigned int max_hang_time;
#endif
#ifdef CONFIG_PREEMPT_RT
spinlock_t softirq_expiry_lock;
atomic_t timer_waiters;
#endif
ktime_t expires_next;
struct hrtimer *next_timer;
ktime_t softirq_expires_next;
struct hrtimer *softirq_next_timer;
struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
} ____cacheline_aligned;
static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time)
{
timer->node.expires = time;
timer->_softexpires = time;
}
static inline void hrtimer_set_expires_range(struct hrtimer *timer, ktime_t time, ktime_t delta)
{
timer->_softexpires = time;
timer->node.expires = ktime_add_safe(time, delta);
}
static inline void hrtimer_set_expires_range_ns(struct hrtimer *timer, ktime_t time, u64 delta)
{
timer->_softexpires = time;
timer->node.expires = ktime_add_safe(time, ns_to_ktime(delta));
}
static inline void hrtimer_set_expires_tv64(struct hrtimer *timer, s64 tv64)
{
timer->node.expires = tv64;
timer->_softexpires = tv64;
}
static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time)
{
timer->node.expires = ktime_add_safe(timer->node.expires, time);
timer->_softexpires = ktime_add_safe(timer->_softexpires, time);
}
static inline void hrtimer_add_expires_ns(struct hrtimer *timer, u64 ns)
{
timer->node.expires = ktime_add_ns(timer->node.expires, ns);
timer->_softexpires = ktime_add_ns(timer->_softexpires, ns);
}
static inline ktime_t hrtimer_get_expires(const struct hrtimer *timer)
{
return timer->node.expires;
}
static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer)
{
return timer->_softexpires;
}
static inline s64 hrtimer_get_expires_tv64(const struct hrtimer *timer)
{
return timer->node.expires;
}
static inline s64 hrtimer_get_softexpires_tv64(const struct hrtimer *timer)
{
return timer->_softexpires;
}
static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer)
{
return ktime_to_ns(timer->node.expires);
}
static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer)
{
return ktime_sub(timer->node.expires, timer->base->get_time());
}
static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer)
{
return timer->base->get_time();
}
static inline int hrtimer_is_hres_active(struct hrtimer *timer)
{
return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ?
timer->base->cpu_base->hres_active : 0;
}
#ifdef CONFIG_HIGH_RES_TIMERS
struct clock_event_device;
extern void hrtimer_interrupt(struct clock_event_device *dev);
extern unsigned int hrtimer_resolution;
#else
#define hrtimer_resolution (unsigned int)LOW_RES_NSEC
#endif
static inline ktime_t
__hrtimer_expires_remaining_adjusted(const struct hrtimer *timer, ktime_t now)
{
ktime_t rem = ktime_sub(timer->node.expires, now);
/*
* Adjust relative timers for the extra we added in
* hrtimer_start_range_ns() to prevent short timeouts.
*/
if (IS_ENABLED(CONFIG_TIME_LOW_RES) && timer->is_rel)
rem -= hrtimer_resolution;
return rem;
}
static inline ktime_t
hrtimer_expires_remaining_adjusted(const struct hrtimer *timer)
{
return __hrtimer_expires_remaining_adjusted(timer,
timer->base->get_time());
}
#ifdef CONFIG_TIMERFD
extern void timerfd_clock_was_set(void);
extern void timerfd_resume(void);
#else
static inline void timerfd_clock_was_set(void) { }
static inline void timerfd_resume(void) { }
#endif
DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
#ifdef CONFIG_PREEMPT_RT
void hrtimer_cancel_wait_running(const struct hrtimer *timer);
#else
static inline void hrtimer_cancel_wait_running(struct hrtimer *timer)
{
cpu_relax();
}
#endif
/* Exported timer functions: */
/* Initialize timers: */
extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock,
enum hrtimer_mode mode);
extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id,
enum hrtimer_mode mode);
#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock,
enum hrtimer_mode mode);
extern void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
clockid_t clock_id,
enum hrtimer_mode mode);
extern void destroy_hrtimer_on_stack(struct hrtimer *timer);
#else
static inline void hrtimer_init_on_stack(struct hrtimer *timer,
clockid_t which_clock,
enum hrtimer_mode mode)
{
hrtimer_init(timer, which_clock, mode);
}
static inline void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
clockid_t clock_id,
enum hrtimer_mode mode)
{
hrtimer_init_sleeper(sl, clock_id, mode);
}
static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { }
#endif
/* Basic timer operations: */
extern void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
u64 range_ns, const enum hrtimer_mode mode);
/**
* hrtimer_start - (re)start an hrtimer
* @timer: the timer to be added
* @tim: expiry time
* @mode: timer mode: absolute (HRTIMER_MODE_ABS) or
* relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED);
* softirq based mode is considered for debug purpose only!
*/
static inline void hrtimer_start(struct hrtimer *timer, ktime_t tim,
const enum hrtimer_mode mode)
{
hrtimer_start_range_ns(timer, tim, 0, mode);
}
extern int hrtimer_cancel(struct hrtimer *timer);
extern int hrtimer_try_to_cancel(struct hrtimer *timer);
static inline void hrtimer_start_expires(struct hrtimer *timer,
enum hrtimer_mode mode)
{
u64 delta;
ktime_t soft, hard;
soft = hrtimer_get_softexpires(timer);
hard = hrtimer_get_expires(timer);
delta = ktime_to_ns(ktime_sub(hard, soft));
hrtimer_start_range_ns(timer, soft, delta, mode);
}
void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl,
enum hrtimer_mode mode);
static inline void hrtimer_restart(struct hrtimer *timer)
{
hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
}
/* Query timers: */
extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust);
/**
* hrtimer_get_remaining - get remaining time for the timer
* @timer: the timer to read
*/
static inline ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
{
return __hrtimer_get_remaining(timer, false);
}
extern u64 hrtimer_get_next_event(void);
extern u64 hrtimer_next_event_without(const struct hrtimer *exclude);
extern bool hrtimer_active(const struct hrtimer *timer);
/**
* hrtimer_is_queued - check, whether the timer is on one of the queues
* @timer: Timer to check
*
* Returns: True if the timer is queued, false otherwise
*
* The function can be used lockless, but it gives only a current snapshot.
*/
static inline bool hrtimer_is_queued(struct hrtimer *timer)
{
/* The READ_ONCE pairs with the update functions of timer->state */
return !!(READ_ONCE(timer->state) & HRTIMER_STATE_ENQUEUED);
}
/*
* Helper function to check, whether the timer is running the callback
* function
*/
static inline int hrtimer_callback_running(struct hrtimer *timer)
{
return timer->base->running == timer;
}
/* Forward a hrtimer so it expires after now: */
extern u64
hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval);
/**
* hrtimer_forward_now - forward the timer expiry so it expires after now
* @timer: hrtimer to forward
* @interval: the interval to forward
*
* Forward the timer expiry so it will expire after the current time
* of the hrtimer clock base. Returns the number of overruns.
*
* Can be safely called from the callback function of @timer. If
* called from other contexts @timer must neither be enqueued nor
* running the callback and the caller needs to take care of
* serialization.
*
* Note: This only updates the timer expiry value and does not requeue
* the timer.
*/
static inline u64 hrtimer_forward_now(struct hrtimer *timer,
ktime_t interval)
{
return hrtimer_forward(timer, timer->base->get_time(), interval);
}
/* Precise sleep: */
extern int nanosleep_copyout(struct restart_block *, struct timespec64 *);
extern long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode,
const clockid_t clockid);
extern int schedule_hrtimeout_range(ktime_t *expires, u64 delta,
const enum hrtimer_mode mode);
extern int schedule_hrtimeout_range_clock(ktime_t *expires,
u64 delta,
const enum hrtimer_mode mode,
clockid_t clock_id);
extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode);
/* Soft interrupt function to run the hrtimer queues: */
extern void hrtimer_run_queues(void);
/* Bootup initialization: */
extern void __init hrtimers_init(void);
/* Show pending timers: */
extern void sysrq_timer_list_show(void);
int hrtimers_prepare_cpu(unsigned int cpu);
#ifdef CONFIG_HOTPLUG_CPU
int hrtimers_cpu_dying(unsigned int cpu);
#else
#define hrtimers_cpu_dying NULL
#endif
#endif