From 6b93bb41f6eaa1cc5c5f30ec3b687b380f116cd0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Sep 2023 21:26:00 +0206 Subject: [PATCH 01/12] printk: Add non-BKL (nbcon) console basic infrastructure The current console/printk subsystem is protected by a Big Kernel Lock, (aka console_lock) which has ill defined semantics and is more or less stateless. This puts severe limitations on the console subsystem and makes forced takeover and output in emergency and panic situations a fragile endeavour that is based on try and pray. The goal of non-BKL (nbcon) consoles is to break out of the console lock jail and to provide a new infrastructure that avoids the pitfalls and also allows console drivers to be gradually converted over. The proposed infrastructure aims for the following properties: - Per console locking instead of global locking - Per console state that allows to make informed decisions - Stateful handover and takeover As a first step, state is added to struct console. The per console state is an atomic_t using a 32bit bit field. Reserve state bits, which will be populated later in the series. Wire it up into the console register/unregister functionality. It was decided to use a bitfield because using a plain u32 with mask/shift operations resulted in uncomprehensible code. Co-developed-by: John Ogness Signed-off-by: John Ogness Signed-off-by: Thomas Gleixner (Intel) Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230916192007.608398-2-john.ogness@linutronix.de --- include/linux/console.h | 31 ++++++++++++++++++ kernel/printk/Makefile | 2 +- kernel/printk/internal.h | 8 +++++ kernel/printk/nbcon.c | 70 ++++++++++++++++++++++++++++++++++++++++ kernel/printk/printk.c | 13 ++++++-- 5 files changed, 120 insertions(+), 4 deletions(-) create mode 100644 kernel/printk/nbcon.c diff --git a/include/linux/console.h b/include/linux/console.h index 7de11c763eb3..a2d37a7a98a8 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -156,6 +156,8 @@ static inline int con_debug_leave(void) * /dev/kmesg which requires a larger output buffer. * @CON_SUSPENDED: Indicates if a console is suspended. If true, the * printing callbacks must not be called. + * @CON_NBCON: Console can operate outside of the legacy style console_lock + * constraints. */ enum cons_flags { CON_PRINTBUFFER = BIT(0), @@ -166,8 +168,32 @@ enum cons_flags { CON_BRL = BIT(5), CON_EXTENDED = BIT(6), CON_SUSPENDED = BIT(7), + CON_NBCON = BIT(8), }; +/** + * struct nbcon_state - console state for nbcon consoles + * @atom: Compound of the state fields for atomic operations + * + * To be used for reading and preparing of the value stored in the nbcon + * state variable @console::nbcon_state. + */ +struct nbcon_state { + union { + unsigned int atom; + struct { + }; + }; +}; + +/* + * The nbcon_state struct is used to easily create and interpret values that + * are stored in the @console::nbcon_state variable. Ensure this struct stays + * within the size boundaries of the atomic variable's underlying type in + * order to avoid any accidental truncation. + */ +static_assert(sizeof(struct nbcon_state) <= sizeof(int)); + /** * struct console - The console descriptor structure * @name: The name of the console driver @@ -187,6 +213,8 @@ enum cons_flags { * @dropped: Number of unreported dropped ringbuffer records * @data: Driver private data * @node: hlist node for the console list + * + * @nbcon_state: State for nbcon consoles */ struct console { char name[16]; @@ -206,6 +234,9 @@ struct console { unsigned long dropped; void *data; struct hlist_node node; + + /* nbcon console specific members */ + atomic_t __private nbcon_state; }; #ifdef CONFIG_LOCKDEP diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile index f5b388e810b9..39a2b61c7232 100644 --- a/kernel/printk/Makefile +++ b/kernel/printk/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y = printk.o -obj-$(CONFIG_PRINTK) += printk_safe.o +obj-$(CONFIG_PRINTK) += printk_safe.o nbcon.o obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o obj-$(CONFIG_PRINTK_INDEX) += index.o diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 7d4979d5c3ce..2ca0ab78802c 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -3,6 +3,7 @@ * internal.h - printk internal definitions */ #include +#include #if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL) void __init printk_sysctl_init(void); @@ -61,6 +62,10 @@ void defer_console_output(void); u16 printk_parse_prefix(const char *text, int *level, enum printk_info_flags *flags); + +void nbcon_init(struct console *con); +void nbcon_cleanup(struct console *con); + #else #define PRINTK_PREFIX_MAX 0 @@ -76,6 +81,9 @@ u16 printk_parse_prefix(const char *text, int *level, #define printk_safe_exit_irqrestore(flags) local_irq_restore(flags) static inline bool printk_percpu_data_ready(void) { return false; } +static inline void nbcon_init(struct console *con) { } +static inline void nbcon_cleanup(struct console *con) { } + #endif /* CONFIG_PRINTK */ /** diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c new file mode 100644 index 000000000000..63d24ca62ac5 --- /dev/null +++ b/kernel/printk/nbcon.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2022 Linutronix GmbH, John Ogness +// Copyright (C) 2022 Intel, Thomas Gleixner + +#include +#include +#include "internal.h" +/* + * Printk console printing implementation for consoles which does not depend + * on the legacy style console_lock mechanism. + */ + +/** + * nbcon_state_set - Helper function to set the console state + * @con: Console to update + * @new: The new state to write + * + * Only to be used when the console is not yet or no longer visible in the + * system. Otherwise use nbcon_state_try_cmpxchg(). + */ +static inline void nbcon_state_set(struct console *con, struct nbcon_state *new) +{ + atomic_set(&ACCESS_PRIVATE(con, nbcon_state), new->atom); +} + +/** + * nbcon_state_read - Helper function to read the console state + * @con: Console to read + * @state: The state to store the result + */ +static inline void nbcon_state_read(struct console *con, struct nbcon_state *state) +{ + state->atom = atomic_read(&ACCESS_PRIVATE(con, nbcon_state)); +} + +/** + * nbcon_state_try_cmpxchg() - Helper function for atomic_try_cmpxchg() on console state + * @con: Console to update + * @cur: Old/expected state + * @new: New state + * + * Return: True on success. False on fail and @cur is updated. + */ +static inline bool nbcon_state_try_cmpxchg(struct console *con, struct nbcon_state *cur, + struct nbcon_state *new) +{ + return atomic_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_state), &cur->atom, new->atom); +} + +/** + * nbcon_init - Initialize the nbcon console specific data + * @con: Console to initialize + */ +void nbcon_init(struct console *con) +{ + struct nbcon_state state = { }; + + nbcon_state_set(con, &state); +} + +/** + * nbcon_cleanup - Cleanup the nbcon console specific data + * @con: Console to cleanup + */ +void nbcon_cleanup(struct console *con) +{ + struct nbcon_state state = { }; + + nbcon_state_set(con, &state); +} diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 7e0b4dd02398..5f372eaceb29 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3326,9 +3326,10 @@ static void try_enable_default_console(struct console *newcon) newcon->flags |= CON_CONSDEV; } -#define con_printk(lvl, con, fmt, ...) \ - printk(lvl pr_fmt("%sconsole [%s%d] " fmt), \ - (con->flags & CON_BOOT) ? "boot" : "", \ +#define con_printk(lvl, con, fmt, ...) \ + printk(lvl pr_fmt("%s%sconsole [%s%d] " fmt), \ + (con->flags & CON_NBCON) ? "" : "legacy ", \ + (con->flags & CON_BOOT) ? "boot" : "", \ con->name, con->index, ##__VA_ARGS__) static void console_init_seq(struct console *newcon, bool bootcon_registered) @@ -3488,6 +3489,9 @@ void register_console(struct console *newcon) newcon->dropped = 0; console_init_seq(newcon, bootcon_registered); + if (newcon->flags & CON_NBCON) + nbcon_init(newcon); + /* * Put this console in the list - keep the * preferred driver at the head of the list. @@ -3579,6 +3583,9 @@ static int unregister_console_locked(struct console *console) */ synchronize_srcu(&console_srcu); + if (console->flags & CON_NBCON) + nbcon_cleanup(console); + console_sysfs_notify(); if (console->exit) From 3a5bb25162b880da749f7cdf281c78dbade4164b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Sep 2023 21:26:01 +0206 Subject: [PATCH 02/12] printk: nbcon: Add acquire/release logic Add per console acquire/release functionality. The state of the console is maintained in the "nbcon_state" atomic variable. The console is locked when: - The 'prio' field contains the priority of the context that owns the console. Only higher priority contexts are allowed to take over the lock. A value of 0 (NBCON_PRIO_NONE) means the console is not locked. - The 'cpu' field denotes on which CPU the console is locked. It is used to prevent busy waiting on the same CPU. Also it informs the lock owner that it has lost the lock in a more complex scenario when the lock was taken over by a higher priority context, released, and taken on another CPU with the same priority as the interrupted owner. The acquire mechanism uses a few more fields: - The 'req_prio' field is used by the handover approach to make the current owner aware that there is a context with a higher priority waiting for the friendly handover. - The 'unsafe' field allows to take over the console in a safe way in the middle of emitting a message. The field is set only when accessing some shared resources or when the console device is manipulated. It can be cleared, for example, after emitting one character when the console device is in a consistent state. - The 'unsafe_takeover' field is set when a hostile takeover took the console in an unsafe state. The console will stay in the unsafe state until re-initialized. The acquire mechanism uses three approaches: 1) Direct acquire when the console is not owned or is owned by a lower priority context and is in a safe state. 2) Friendly handover mechanism uses a request/grant handshake. It is used when the current owner has lower priority and the console is in an unsafe state. The requesting context: a) Sets its priority into the 'req_prio' field. b) Waits (with a timeout) for the owning context to unlock the console. c) Takes the lock and clears the 'req_prio' field. The owning context: a) Observes the 'req_prio' field set on exit from the unsafe console state. b) Gives up console ownership by clearing the 'prio' field. 3) Unsafe hostile takeover allows to take over the lock even when the console is an unsafe state. It is used only in panic() by the final attempt to flush consoles in a try and hope mode. Note that separate record buffers are used in panic(). As a result, the messages can be read and formatted without any risk even after using the hostile takeover in unsafe state. The release function simply clears the 'prio' field. All operations on @console::nbcon_state are atomic cmpxchg based to handle concurrency. The acquire/release functions implement only minimal policies: - Preference for higher priority contexts. - Protection of the panic CPU. All other policy decisions must be made at the call sites: - What is marked as an unsafe section. - Whether to spin-wait if there is already an owner and the console is in an unsafe state. - Whether to attempt an unsafe hostile takeover. The design allows to implement the well known: acquire() output_one_printk_record() release() The output of one printk record might be interrupted with a higher priority context. The new owner is supposed to reprint the entire interrupted record from scratch. Co-developed-by: John Ogness Signed-off-by: John Ogness Signed-off-by: Thomas Gleixner (Intel) Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230916192007.608398-3-john.ogness@linutronix.de --- include/linux/console.h | 56 +++++ kernel/printk/nbcon.c | 497 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 553 insertions(+) diff --git a/include/linux/console.h b/include/linux/console.h index a2d37a7a98a8..98210fd01f18 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -175,13 +175,29 @@ enum cons_flags { * struct nbcon_state - console state for nbcon consoles * @atom: Compound of the state fields for atomic operations * + * @req_prio: The priority of a handover request + * @prio: The priority of the current owner + * @unsafe: Console is busy in a non takeover region + * @unsafe_takeover: A hostile takeover in an unsafe state happened in the + * past. The console cannot be safe until re-initialized. + * @cpu: The CPU on which the owner runs + * * To be used for reading and preparing of the value stored in the nbcon * state variable @console::nbcon_state. + * + * The @prio and @req_prio fields are particularly important to allow + * spin-waiting to timeout and give up without the risk of a waiter being + * assigned the lock after giving up. */ struct nbcon_state { union { unsigned int atom; struct { + unsigned int prio : 2; + unsigned int req_prio : 2; + unsigned int unsafe : 1; + unsigned int unsafe_takeover : 1; + unsigned int cpu : 24; }; }; }; @@ -194,6 +210,46 @@ struct nbcon_state { */ static_assert(sizeof(struct nbcon_state) <= sizeof(int)); +/** + * nbcon_prio - console owner priority for nbcon consoles + * @NBCON_PRIO_NONE: Unused + * @NBCON_PRIO_NORMAL: Normal (non-emergency) usage + * @NBCON_PRIO_EMERGENCY: Emergency output (WARN/OOPS...) + * @NBCON_PRIO_PANIC: Panic output + * @NBCON_PRIO_MAX: The number of priority levels + * + * A higher priority context can takeover the console when it is + * in the safe state. The final attempt to flush consoles in panic() + * can be allowed to do so even in an unsafe state (Hope and pray). + */ +enum nbcon_prio { + NBCON_PRIO_NONE = 0, + NBCON_PRIO_NORMAL, + NBCON_PRIO_EMERGENCY, + NBCON_PRIO_PANIC, + NBCON_PRIO_MAX, +}; + +struct console; + +/** + * struct nbcon_context - Context for console acquire/release + * @console: The associated console + * @spinwait_max_us: Limit for spin-wait acquire + * @prio: Priority of the context + * @allow_unsafe_takeover: Allow performing takeover even if unsafe. Can + * be used only with NBCON_PRIO_PANIC @prio. It + * might cause a system freeze when the console + * is used later. + */ +struct nbcon_context { + /* members set by caller */ + struct console *console; + unsigned int spinwait_max_us; + enum nbcon_prio prio; + unsigned int allow_unsafe_takeover : 1; +}; + /** * struct console - The console descriptor structure * @name: The name of the console driver diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 63d24ca62ac5..a2a354f859f9 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -4,10 +4,98 @@ #include #include +#include #include "internal.h" /* * Printk console printing implementation for consoles which does not depend * on the legacy style console_lock mechanism. + * + * The state of the console is maintained in the "nbcon_state" atomic + * variable. + * + * The console is locked when: + * + * - The 'prio' field contains the priority of the context that owns the + * console. Only higher priority contexts are allowed to take over the + * lock. A value of 0 (NBCON_PRIO_NONE) means the console is not locked. + * + * - The 'cpu' field denotes on which CPU the console is locked. It is used + * to prevent busy waiting on the same CPU. Also it informs the lock owner + * that it has lost the lock in a more complex scenario when the lock was + * taken over by a higher priority context, released, and taken on another + * CPU with the same priority as the interrupted owner. + * + * The acquire mechanism uses a few more fields: + * + * - The 'req_prio' field is used by the handover approach to make the + * current owner aware that there is a context with a higher priority + * waiting for the friendly handover. + * + * - The 'unsafe' field allows to take over the console in a safe way in the + * middle of emitting a message. The field is set only when accessing some + * shared resources or when the console device is manipulated. It can be + * cleared, for example, after emitting one character when the console + * device is in a consistent state. + * + * - The 'unsafe_takeover' field is set when a hostile takeover took the + * console in an unsafe state. The console will stay in the unsafe state + * until re-initialized. + * + * The acquire mechanism uses three approaches: + * + * 1) Direct acquire when the console is not owned or is owned by a lower + * priority context and is in a safe state. + * + * 2) Friendly handover mechanism uses a request/grant handshake. It is used + * when the current owner has lower priority and the console is in an + * unsafe state. + * + * The requesting context: + * + * a) Sets its priority into the 'req_prio' field. + * + * b) Waits (with a timeout) for the owning context to unlock the + * console. + * + * c) Takes the lock and clears the 'req_prio' field. + * + * The owning context: + * + * a) Observes the 'req_prio' field set on exit from the unsafe + * console state. + * + * b) Gives up console ownership by clearing the 'prio' field. + * + * 3) Unsafe hostile takeover allows to take over the lock even when the + * console is an unsafe state. It is used only in panic() by the final + * attempt to flush consoles in a try and hope mode. + * + * The release function simply clears the 'prio' field. + * + * All operations on @console::nbcon_state are atomic cmpxchg based to + * handle concurrency. + * + * The acquire/release functions implement only minimal policies: + * + * - Preference for higher priority contexts. + * - Protection of the panic CPU. + * + * All other policy decisions must be made at the call sites: + * + * - What is marked as an unsafe section. + * - Whether to spin-wait if there is already an owner and the console is + * in an unsafe state. + * - Whether to attempt an unsafe hostile takeover. + * + * The design allows to implement the well known: + * + * acquire() + * output_one_printk_record() + * release() + * + * The output of one printk record might be interrupted with a higher priority + * context. The new owner is supposed to reprint the entire interrupted record + * from scratch. */ /** @@ -47,6 +135,415 @@ static inline bool nbcon_state_try_cmpxchg(struct console *con, struct nbcon_sta return atomic_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_state), &cur->atom, new->atom); } +/** + * nbcon_context_try_acquire_direct - Try to acquire directly + * @ctxt: The context of the caller + * @cur: The current console state + * + * Acquire the console when it is released. Also acquire the console when + * the current owner has a lower priority and the console is in a safe state. + * + * Return: 0 on success. Otherwise, an error code on failure. Also @cur + * is updated to the latest state when failed to modify it. + * + * Errors: + * + * -EPERM: A panic is in progress and this is not the panic CPU. + * Or the current owner or waiter has the same or higher + * priority. No acquire method can be successful in + * this case. + * + * -EBUSY: The current owner has a lower priority but the console + * in an unsafe state. The caller should try using + * the handover acquire method. + */ +static int nbcon_context_try_acquire_direct(struct nbcon_context *ctxt, + struct nbcon_state *cur) +{ + unsigned int cpu = smp_processor_id(); + struct console *con = ctxt->console; + struct nbcon_state new; + + do { + if (other_cpu_in_panic()) + return -EPERM; + + if (ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio) + return -EPERM; + + if (cur->unsafe) + return -EBUSY; + + /* + * The console should never be safe for a direct acquire + * if an unsafe hostile takeover has ever happened. + */ + WARN_ON_ONCE(cur->unsafe_takeover); + + new.atom = cur->atom; + new.prio = ctxt->prio; + new.req_prio = NBCON_PRIO_NONE; + new.unsafe = cur->unsafe_takeover; + new.cpu = cpu; + + } while (!nbcon_state_try_cmpxchg(con, cur, &new)); + + return 0; +} + +static bool nbcon_waiter_matches(struct nbcon_state *cur, int expected_prio) +{ + /* + * The request context is well defined by the @req_prio because: + * + * - Only a context with a higher priority can take over the request. + * - There are only three priorities. + * - Only one CPU is allowed to request PANIC priority. + * - Lower priorities are ignored during panic() until reboot. + * + * As a result, the following scenario is *not* possible: + * + * 1. Another context with a higher priority directly takes ownership. + * 2. The higher priority context releases the ownership. + * 3. A lower priority context takes the ownership. + * 4. Another context with the same priority as this context + * creates a request and starts waiting. + */ + + return (cur->req_prio == expected_prio); +} + +/** + * nbcon_context_try_acquire_requested - Try to acquire after having + * requested a handover + * @ctxt: The context of the caller + * @cur: The current console state + * + * This is a helper function for nbcon_context_try_acquire_handover(). + * It is called when the console is in an unsafe state. The current + * owner will release the console on exit from the unsafe region. + * + * Return: 0 on success and @cur is updated to the new console state. + * Otherwise an error code on failure. + * + * Errors: + * + * -EPERM: A panic is in progress and this is not the panic CPU + * or this context is no longer the waiter. + * + * -EBUSY: The console is still locked. The caller should + * continue waiting. + * + * Note: The caller must still remove the request when an error has occurred + * except when this context is no longer the waiter. + */ +static int nbcon_context_try_acquire_requested(struct nbcon_context *ctxt, + struct nbcon_state *cur) +{ + unsigned int cpu = smp_processor_id(); + struct console *con = ctxt->console; + struct nbcon_state new; + + /* Note that the caller must still remove the request! */ + if (other_cpu_in_panic()) + return -EPERM; + + /* + * Note that the waiter will also change if there was an unsafe + * hostile takeover. + */ + if (!nbcon_waiter_matches(cur, ctxt->prio)) + return -EPERM; + + /* If still locked, caller should continue waiting. */ + if (cur->prio != NBCON_PRIO_NONE) + return -EBUSY; + + /* + * The previous owner should have never released ownership + * in an unsafe region. + */ + WARN_ON_ONCE(cur->unsafe); + + new.atom = cur->atom; + new.prio = ctxt->prio; + new.req_prio = NBCON_PRIO_NONE; + new.unsafe = cur->unsafe_takeover; + new.cpu = cpu; + + if (!nbcon_state_try_cmpxchg(con, cur, &new)) { + /* + * The acquire could fail only when it has been taken + * over by a higher priority context. + */ + WARN_ON_ONCE(nbcon_waiter_matches(cur, ctxt->prio)); + return -EPERM; + } + + /* Handover success. This context now owns the console. */ + return 0; +} + +/** + * nbcon_context_try_acquire_handover - Try to acquire via handover + * @ctxt: The context of the caller + * @cur: The current console state + * + * The function must be called only when the context has higher priority + * than the current owner and the console is in an unsafe state. + * It is the case when nbcon_context_try_acquire_direct() returns -EBUSY. + * + * The function sets "req_prio" field to make the current owner aware of + * the request. Then it waits until the current owner releases the console, + * or an even higher context takes over the request, or timeout expires. + * + * The current owner checks the "req_prio" field on exit from the unsafe + * region and releases the console. It does not touch the "req_prio" field + * so that the console stays reserved for the waiter. + * + * Return: 0 on success. Otherwise, an error code on failure. Also @cur + * is updated to the latest state when failed to modify it. + * + * Errors: + * + * -EPERM: A panic is in progress and this is not the panic CPU. + * Or a higher priority context has taken over the + * console or the handover request. + * + * -EBUSY: The current owner is on the same CPU so that the hand + * shake could not work. Or the current owner is not + * willing to wait (zero timeout). Or the console does + * not enter the safe state before timeout passed. The + * caller might still use the unsafe hostile takeover + * when allowed. + * + * -EAGAIN: @cur has changed when creating the handover request. + * The caller should retry with direct acquire. + */ +static int nbcon_context_try_acquire_handover(struct nbcon_context *ctxt, + struct nbcon_state *cur) +{ + unsigned int cpu = smp_processor_id(); + struct console *con = ctxt->console; + struct nbcon_state new; + int timeout; + int request_err = -EBUSY; + + /* + * Check that the handover is called when the direct acquire failed + * with -EBUSY. + */ + WARN_ON_ONCE(ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio); + WARN_ON_ONCE(!cur->unsafe); + + /* Handover is not possible on the same CPU. */ + if (cur->cpu == cpu) + return -EBUSY; + + /* + * Console stays unsafe after an unsafe takeover until re-initialized. + * Waiting is not going to help in this case. + */ + if (cur->unsafe_takeover) + return -EBUSY; + + /* Is the caller willing to wait? */ + if (ctxt->spinwait_max_us == 0) + return -EBUSY; + + /* + * Setup a request for the handover. The caller should try to acquire + * the console directly when the current state has been modified. + */ + new.atom = cur->atom; + new.req_prio = ctxt->prio; + if (!nbcon_state_try_cmpxchg(con, cur, &new)) + return -EAGAIN; + + cur->atom = new.atom; + + /* Wait until there is no owner and then acquire the console. */ + for (timeout = ctxt->spinwait_max_us; timeout >= 0; timeout--) { + /* On successful acquire, this request is cleared. */ + request_err = nbcon_context_try_acquire_requested(ctxt, cur); + if (!request_err) + return 0; + + /* + * If the acquire should be aborted, it must be ensured + * that the request is removed before returning to caller. + */ + if (request_err == -EPERM) + break; + + udelay(1); + + /* Re-read the state because some time has passed. */ + nbcon_state_read(con, cur); + } + + /* Timed out or aborted. Carefully remove handover request. */ + do { + /* + * No need to remove request if there is a new waiter. This + * can only happen if a higher priority context has taken over + * the console or the handover request. + */ + if (!nbcon_waiter_matches(cur, ctxt->prio)) + return -EPERM; + + /* Unset request for handover. */ + new.atom = cur->atom; + new.req_prio = NBCON_PRIO_NONE; + if (nbcon_state_try_cmpxchg(con, cur, &new)) { + /* + * Request successfully unset. Report failure of + * acquiring via handover. + */ + cur->atom = new.atom; + return request_err; + } + + /* + * Unable to remove request. Try to acquire in case + * the owner has released the lock. + */ + } while (nbcon_context_try_acquire_requested(ctxt, cur)); + + /* Lucky timing. The acquire succeeded while removing the request. */ + return 0; +} + +/** + * nbcon_context_try_acquire_hostile - Acquire via unsafe hostile takeover + * @ctxt: The context of the caller + * @cur: The current console state + * + * Acquire the console even in the unsafe state. + * + * It can be permitted by setting the 'allow_unsafe_takeover' field only + * by the final attempt to flush messages in panic(). + * + * Return: 0 on success. -EPERM when not allowed by the context. + */ +static int nbcon_context_try_acquire_hostile(struct nbcon_context *ctxt, + struct nbcon_state *cur) +{ + unsigned int cpu = smp_processor_id(); + struct console *con = ctxt->console; + struct nbcon_state new; + + if (!ctxt->allow_unsafe_takeover) + return -EPERM; + + /* Ensure caller is allowed to perform unsafe hostile takeovers. */ + if (WARN_ON_ONCE(ctxt->prio != NBCON_PRIO_PANIC)) + return -EPERM; + + /* + * Check that try_acquire_direct() and try_acquire_handover() returned + * -EBUSY in the right situation. + */ + WARN_ON_ONCE(ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio); + WARN_ON_ONCE(cur->unsafe != true); + + do { + new.atom = cur->atom; + new.cpu = cpu; + new.prio = ctxt->prio; + new.unsafe |= cur->unsafe_takeover; + new.unsafe_takeover |= cur->unsafe; + + } while (!nbcon_state_try_cmpxchg(con, cur, &new)); + + return 0; +} + +/** + * nbcon_context_try_acquire - Try to acquire nbcon console + * @ctxt: The context of the caller + * + * Return: True if the console was acquired. False otherwise. + * + * If the caller allowed an unsafe hostile takeover, on success the + * caller should check the current console state to see if it is + * in an unsafe state. Otherwise, on success the caller may assume + * the console is not in an unsafe state. + */ +__maybe_unused +static bool nbcon_context_try_acquire(struct nbcon_context *ctxt) +{ + struct console *con = ctxt->console; + struct nbcon_state cur; + int err; + + nbcon_state_read(con, &cur); +try_again: + err = nbcon_context_try_acquire_direct(ctxt, &cur); + if (err != -EBUSY) + goto out; + + err = nbcon_context_try_acquire_handover(ctxt, &cur); + if (err == -EAGAIN) + goto try_again; + if (err != -EBUSY) + goto out; + + err = nbcon_context_try_acquire_hostile(ctxt, &cur); +out: + return !err; +} + +static bool nbcon_owner_matches(struct nbcon_state *cur, int expected_cpu, + int expected_prio) +{ + /* + * Since consoles can only be acquired by higher priorities, + * owning contexts are uniquely identified by @prio. However, + * since contexts can unexpectedly lose ownership, it is + * possible that later another owner appears with the same + * priority. For this reason @cpu is also needed. + */ + + if (cur->prio != expected_prio) + return false; + + if (cur->cpu != expected_cpu) + return false; + + return true; +} + +/** + * nbcon_context_release - Release the console + * @ctxt: The nbcon context from nbcon_context_try_acquire() + */ +__maybe_unused +static void nbcon_context_release(struct nbcon_context *ctxt) +{ + unsigned int cpu = smp_processor_id(); + struct console *con = ctxt->console; + struct nbcon_state cur; + struct nbcon_state new; + + nbcon_state_read(con, &cur); + + do { + if (!nbcon_owner_matches(&cur, cpu, ctxt->prio)) + return; + + new.atom = cur.atom; + new.prio = NBCON_PRIO_NONE; + + /* + * If @unsafe_takeover is set, it is kept set so that + * the state remains permanently unsafe. + */ + new.unsafe |= cur.unsafe_takeover; + + } while (!nbcon_state_try_cmpxchg(con, &cur, &new)); +} + /** * nbcon_init - Initialize the nbcon console specific data * @con: Console to initialize From d818b56f77521ecc5e3eda71dc9b2beb3d6681e3 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Sat, 16 Sep 2023 21:26:02 +0206 Subject: [PATCH 03/12] printk: Make static printk buffers available to nbcon The nbcon boot consoles also need printk buffers that are available very early. Since the nbcon boot consoles will also be serialized by the console_lock, they can use the same static printk buffers that the legacy consoles are using. Make the legacy static printk buffers available outside of printk.c so they can be used by nbcon.c. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230916192007.608398-4-john.ogness@linutronix.de --- kernel/printk/internal.h | 2 ++ kernel/printk/printk.c | 13 +++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 2ca0ab78802c..7199d60bfc25 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -86,6 +86,8 @@ static inline void nbcon_cleanup(struct console *con) { } #endif /* CONFIG_PRINTK */ +extern struct printk_buffers printk_shared_pbufs; + /** * struct printk_buffers - Buffers to read/format/output printk messages. * @outbuf: After formatting, contains text to output. diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 5f372eaceb29..17def3791bc0 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2846,6 +2846,13 @@ out: return true; } +/* + * Used as the printk buffers for non-panic, serialized console printing. + * This is for legacy (!CON_NBCON) as well as all boot (CON_BOOT) consoles. + * Its usage requires the console_lock held. + */ +struct printk_buffers printk_shared_pbufs; + /* * Print one record for the given console. The record printed is whatever * record is the next available record for the given console. @@ -2863,12 +2870,10 @@ out: */ static bool console_emit_next_record(struct console *con, bool *handover, int cookie) { - static struct printk_buffers pbufs; - bool is_extended = console_srcu_read_flags(con) & CON_EXTENDED; - char *outbuf = &pbufs.outbuf[0]; + char *outbuf = &printk_shared_pbufs.outbuf[0]; struct printk_message pmsg = { - .pbufs = &pbufs, + .pbufs = &printk_shared_pbufs, }; unsigned long flags; From 5634c90fd8553de7cbafecd048d0273690a2e84e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Sep 2023 21:26:03 +0206 Subject: [PATCH 04/12] printk: nbcon: Add buffer management In case of hostile takeovers it must be ensured that the previous owner cannot scribble over the output buffer of the emergency/panic context. This is achieved by: - Adding a global output buffer instance for the panic context. This is the only situation where hostile takeovers can occur and there is always at most 1 panic context. - Allocating an output buffer per non-boot console upon console registration. This buffer is used by the console owner when not in panic context. (For boot consoles, the existing shared global legacy output buffer is used instead. Boot console printing will be synchronized with legacy console printing.) - Choosing the appropriate buffer is handled in the acquire/release functions. Co-developed-by: John Ogness Signed-off-by: John Ogness Signed-off-by: Thomas Gleixner (Intel) Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230916192007.608398-5-john.ogness@linutronix.de --- include/linux/console.h | 7 ++++ kernel/printk/internal.h | 12 +++++-- kernel/printk/nbcon.c | 73 +++++++++++++++++++++++++++++++++++++--- kernel/printk/printk.c | 22 +++++++----- 4 files changed, 99 insertions(+), 15 deletions(-) diff --git a/include/linux/console.h b/include/linux/console.h index 98210fd01f18..ca1ef8700e55 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -231,6 +231,7 @@ enum nbcon_prio { }; struct console; +struct printk_buffers; /** * struct nbcon_context - Context for console acquire/release @@ -241,6 +242,7 @@ struct console; * be used only with NBCON_PRIO_PANIC @prio. It * might cause a system freeze when the console * is used later. + * @pbufs: Pointer to the text buffer for this context */ struct nbcon_context { /* members set by caller */ @@ -248,6 +250,9 @@ struct nbcon_context { unsigned int spinwait_max_us; enum nbcon_prio prio; unsigned int allow_unsafe_takeover : 1; + + /* members set by acquire */ + struct printk_buffers *pbufs; }; /** @@ -271,6 +276,7 @@ struct nbcon_context { * @node: hlist node for the console list * * @nbcon_state: State for nbcon consoles + * @pbufs: Pointer to nbcon private buffer */ struct console { char name[16]; @@ -293,6 +299,7 @@ struct console { /* nbcon console specific members */ atomic_t __private nbcon_state; + struct printk_buffers *pbufs; }; #ifdef CONFIG_LOCKDEP diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 7199d60bfc25..f6161cd75d7d 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -13,6 +13,12 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, #define printk_sysctl_init() do { } while (0) #endif +#define con_printk(lvl, con, fmt, ...) \ + printk(lvl pr_fmt("%s%sconsole [%s%d] " fmt), \ + (con->flags & CON_NBCON) ? "" : "legacy ", \ + (con->flags & CON_BOOT) ? "boot" : "", \ + con->name, con->index, ##__VA_ARGS__) + #ifdef CONFIG_PRINTK #ifdef CONFIG_PRINTK_CALLER @@ -63,8 +69,9 @@ void defer_console_output(void); u16 printk_parse_prefix(const char *text, int *level, enum printk_info_flags *flags); +bool nbcon_alloc(struct console *con); void nbcon_init(struct console *con); -void nbcon_cleanup(struct console *con); +void nbcon_free(struct console *con); #else @@ -81,8 +88,9 @@ void nbcon_cleanup(struct console *con); #define printk_safe_exit_irqrestore(flags) local_irq_restore(flags) static inline bool printk_percpu_data_ready(void) { return false; } +static inline bool nbcon_alloc(struct console *con) { return false; } static inline void nbcon_init(struct console *con) { } -static inline void nbcon_cleanup(struct console *con) { } +static inline void nbcon_free(struct console *con) { } #endif /* CONFIG_PRINTK */ diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index a2a354f859f9..ba1febf15db6 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "internal.h" /* * Printk console printing implementation for consoles which does not depend @@ -70,6 +71,10 @@ * console is an unsafe state. It is used only in panic() by the final * attempt to flush consoles in a try and hope mode. * + * Note that separate record buffers are used in panic(). As a result, + * the messages can be read and formatted without any risk even after + * using the hostile takeover in unsafe state. + * * The release function simply clears the 'prio' field. * * All operations on @console::nbcon_state are atomic cmpxchg based to @@ -459,6 +464,8 @@ static int nbcon_context_try_acquire_hostile(struct nbcon_context *ctxt, return 0; } +static struct printk_buffers panic_nbcon_pbufs; + /** * nbcon_context_try_acquire - Try to acquire nbcon console * @ctxt: The context of the caller @@ -473,6 +480,7 @@ static int nbcon_context_try_acquire_hostile(struct nbcon_context *ctxt, __maybe_unused static bool nbcon_context_try_acquire(struct nbcon_context *ctxt) { + unsigned int cpu = smp_processor_id(); struct console *con = ctxt->console; struct nbcon_state cur; int err; @@ -491,7 +499,18 @@ try_again: err = nbcon_context_try_acquire_hostile(ctxt, &cur); out: - return !err; + if (err) + return false; + + /* Acquire succeeded. */ + + /* Assign the appropriate buffer for this context. */ + if (atomic_read(&panic_cpu) == cpu) + ctxt->pbufs = &panic_nbcon_pbufs; + else + ctxt->pbufs = con->pbufs; + + return true; } static bool nbcon_owner_matches(struct nbcon_state *cur, int expected_cpu, @@ -530,7 +549,7 @@ static void nbcon_context_release(struct nbcon_context *ctxt) do { if (!nbcon_owner_matches(&cur, cpu, ctxt->prio)) - return; + break; new.atom = cur.atom; new.prio = NBCON_PRIO_NONE; @@ -542,26 +561,70 @@ static void nbcon_context_release(struct nbcon_context *ctxt) new.unsafe |= cur.unsafe_takeover; } while (!nbcon_state_try_cmpxchg(con, &cur, &new)); + + ctxt->pbufs = NULL; +} + +/** + * nbcon_alloc - Allocate buffers needed by the nbcon console + * @con: Console to allocate buffers for + * + * Return: True on success. False otherwise and the console cannot + * be used. + * + * This is not part of nbcon_init() because buffer allocation must + * be performed earlier in the console registration process. + */ +bool nbcon_alloc(struct console *con) +{ + if (con->flags & CON_BOOT) { + /* + * Boot console printing is synchronized with legacy console + * printing, so boot consoles can share the same global printk + * buffers. + */ + con->pbufs = &printk_shared_pbufs; + } else { + con->pbufs = kmalloc(sizeof(*con->pbufs), GFP_KERNEL); + if (!con->pbufs) { + con_printk(KERN_ERR, con, "failed to allocate printing buffer\n"); + return false; + } + } + + return true; } /** * nbcon_init - Initialize the nbcon console specific data * @con: Console to initialize + * + * nbcon_alloc() *must* be called and succeed before this function + * is called. */ void nbcon_init(struct console *con) { struct nbcon_state state = { }; + /* nbcon_alloc() must have been called and successful! */ + BUG_ON(!con->pbufs); + nbcon_state_set(con, &state); } /** - * nbcon_cleanup - Cleanup the nbcon console specific data - * @con: Console to cleanup + * nbcon_free - Free and cleanup the nbcon console specific data + * @con: Console to free/cleanup nbcon data */ -void nbcon_cleanup(struct console *con) +void nbcon_free(struct console *con) { struct nbcon_state state = { }; nbcon_state_set(con, &state); + + /* Boot consoles share global printk buffers. */ + if (!(con->flags & CON_BOOT)) + kfree(con->pbufs); + + con->pbufs = NULL; } diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 17def3791bc0..1c9720acd960 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3331,12 +3331,6 @@ static void try_enable_default_console(struct console *newcon) newcon->flags |= CON_CONSDEV; } -#define con_printk(lvl, con, fmt, ...) \ - printk(lvl pr_fmt("%s%sconsole [%s%d] " fmt), \ - (con->flags & CON_NBCON) ? "" : "legacy ", \ - (con->flags & CON_BOOT) ? "boot" : "", \ - con->name, con->index, ##__VA_ARGS__) - static void console_init_seq(struct console *newcon, bool bootcon_registered) { struct console *con; @@ -3450,6 +3444,15 @@ void register_console(struct console *newcon) goto unlock; } + if (newcon->flags & CON_NBCON) { + /* + * Ensure the nbcon console buffers can be allocated + * before modifying any global data. + */ + if (!nbcon_alloc(newcon)) + goto unlock; + } + /* * See if we want to enable this console driver by default. * @@ -3477,8 +3480,11 @@ void register_console(struct console *newcon) err = try_enable_preferred_console(newcon, false); /* printk() messages are not printed to the Braille console. */ - if (err || newcon->flags & CON_BRL) + if (err || newcon->flags & CON_BRL) { + if (newcon->flags & CON_NBCON) + nbcon_free(newcon); goto unlock; + } /* * If we have a bootconsole, and are switching to a real console, @@ -3589,7 +3595,7 @@ static int unregister_console_locked(struct console *console) synchronize_srcu(&console_srcu); if (console->flags & CON_NBCON) - nbcon_cleanup(console); + nbcon_free(console); console_sysfs_notify(); From 4b08d9e24f50163a8bda0a702b45b06bf841d792 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Sep 2023 21:26:04 +0206 Subject: [PATCH 05/12] printk: nbcon: Add ownership state functions Provide functions that are related to the safe handover mechanism and allow console drivers to dynamically specify unsafe regions: - nbcon_context_can_proceed() Invoked by a console owner to check whether a handover request is pending or whether the console has been taken over by another context. If a handover request is pending, this function will also perform the handover, thus cancelling its own ownership. - nbcon_context_enter_unsafe()/nbcon_context_exit_unsafe() Invoked by a console owner to denote that the driver is about to enter or leave a critical region where a take over is unsafe. The exit variant is the point where the current owner releases the lock for a higher priority context which asked for the friendly handover. The unsafe state is stored in the console state and allows a new context to make informed decisions whether to attempt a takeover of such a console. The unsafe state is also available to the driver so that it can make informed decisions about the required actions and possibly take a special emergency path. Co-developed-by: John Ogness Signed-off-by: John Ogness Signed-off-by: Thomas Gleixner (Intel) Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230916192007.608398-6-john.ogness@linutronix.de --- kernel/printk/nbcon.c | 123 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 122 insertions(+), 1 deletion(-) diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index ba1febf15db6..98e4be5429f0 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -537,7 +537,6 @@ static bool nbcon_owner_matches(struct nbcon_state *cur, int expected_cpu, * nbcon_context_release - Release the console * @ctxt: The nbcon context from nbcon_context_try_acquire() */ -__maybe_unused static void nbcon_context_release(struct nbcon_context *ctxt) { unsigned int cpu = smp_processor_id(); @@ -565,6 +564,128 @@ static void nbcon_context_release(struct nbcon_context *ctxt) ctxt->pbufs = NULL; } +/** + * nbcon_context_can_proceed - Check whether ownership can proceed + * @ctxt: The nbcon context from nbcon_context_try_acquire() + * @cur: The current console state + * + * Return: True if this context still owns the console. False if + * ownership was handed over or taken. + * + * Must be invoked when entering the unsafe state to make sure that it still + * owns the lock. Also must be invoked when exiting the unsafe context + * to eventually free the lock for a higher priority context which asked + * for the friendly handover. + * + * It can be called inside an unsafe section when the console is just + * temporary in safe state instead of exiting and entering the unsafe + * state. + * + * Also it can be called in the safe context before doing an expensive + * safe operation. It does not make sense to do the operation when + * a higher priority context took the lock. + * + * When this function returns false then the calling context no longer owns + * the console and is no longer allowed to go forward. In this case it must + * back out immediately and carefully. The buffer content is also no longer + * trusted since it no longer belongs to the calling context. + */ +static bool nbcon_context_can_proceed(struct nbcon_context *ctxt, struct nbcon_state *cur) +{ + unsigned int cpu = smp_processor_id(); + + /* Make sure this context still owns the console. */ + if (!nbcon_owner_matches(cur, cpu, ctxt->prio)) + return false; + + /* The console owner can proceed if there is no waiter. */ + if (cur->req_prio == NBCON_PRIO_NONE) + return true; + + /* + * A console owner within an unsafe region is always allowed to + * proceed, even if there are waiters. It can perform a handover + * when exiting the unsafe region. Otherwise the waiter will + * need to perform an unsafe hostile takeover. + */ + if (cur->unsafe) + return true; + + /* Waiters always have higher priorities than owners. */ + WARN_ON_ONCE(cur->req_prio <= cur->prio); + + /* + * Having a safe point for take over and eventually a few + * duplicated characters or a full line is way better than a + * hostile takeover. Post processing can take care of the garbage. + * Release and hand over. + */ + nbcon_context_release(ctxt); + + /* + * It is not clear whether the waiter really took over ownership. The + * outermost callsite must make the final decision whether console + * ownership is needed for it to proceed. If yes, it must reacquire + * ownership (possibly hostile) before carefully proceeding. + * + * The calling context no longer owns the console so go back all the + * way instead of trying to implement reacquire heuristics in tons of + * places. + */ + return false; +} + +#define nbcon_context_enter_unsafe(c) __nbcon_context_update_unsafe(c, true) +#define nbcon_context_exit_unsafe(c) __nbcon_context_update_unsafe(c, false) + +/** + * __nbcon_context_update_unsafe - Update the unsafe bit in @con->nbcon_state + * @ctxt: The nbcon context from nbcon_context_try_acquire() + * @unsafe: The new value for the unsafe bit + * + * Return: True if the unsafe state was updated and this context still + * owns the console. Otherwise false if ownership was handed + * over or taken. + * + * This function allows console owners to modify the unsafe status of the + * console. + * + * When this function returns false then the calling context no longer owns + * the console and is no longer allowed to go forward. In this case it must + * back out immediately and carefully. The buffer content is also no longer + * trusted since it no longer belongs to the calling context. + * + * Internal helper to avoid duplicated code. + */ +__maybe_unused +static bool __nbcon_context_update_unsafe(struct nbcon_context *ctxt, bool unsafe) +{ + struct console *con = ctxt->console; + struct nbcon_state cur; + struct nbcon_state new; + + nbcon_state_read(con, &cur); + + do { + /* + * The unsafe bit must not be cleared if an + * unsafe hostile takeover has occurred. + */ + if (!unsafe && cur.unsafe_takeover) + goto out; + + if (!nbcon_context_can_proceed(ctxt, &cur)) + return false; + + new.atom = cur.atom; + new.unsafe = unsafe; + } while (!nbcon_state_try_cmpxchg(con, &cur, &new)); + + cur.atom = new.atom; +out: + return nbcon_context_can_proceed(ctxt, &cur); +} + /** * nbcon_alloc - Allocate buffers needed by the nbcon console * @con: Console to allocate buffers for From ad56ebd1d79b216dc147474fac89a11daf6b10df Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Sep 2023 21:26:05 +0206 Subject: [PATCH 06/12] printk: nbcon: Add sequence handling Add an atomic_long_t field @nbcon_seq to the console struct to store the sequence number for nbcon consoles. For nbcon consoles this will be used instead of the non-atomic @seq field. The new field allows for safe atomic sequence number updates without requiring any locking. On 64bit systems the new field stores the full sequence number. On 32bit systems the new field stores the lower 32 bits of the sequence number, which are expanded to 64bit as needed by folding the values based on the sequence numbers available in the ringbuffer. For 32bit systems, having a 32bit representation in the console is sufficient. If a console ever gets more than 2^31 records behind the ringbuffer then this is the least of the problems. Co-developed-by: John Ogness Signed-off-by: John Ogness Signed-off-by: Thomas Gleixner (Intel) Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230916192007.608398-7-john.ogness@linutronix.de --- include/linux/console.h | 4 ++ kernel/printk/internal.h | 7 +++ kernel/printk/nbcon.c | 101 +++++++++++++++++++++++++++++++++++++++ kernel/printk/printk.c | 31 +++++++++--- 4 files changed, 136 insertions(+), 7 deletions(-) diff --git a/include/linux/console.h b/include/linux/console.h index ca1ef8700e55..20cd486b76ad 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -243,6 +243,7 @@ struct printk_buffers; * might cause a system freeze when the console * is used later. * @pbufs: Pointer to the text buffer for this context + * @seq: The sequence number to print for this context */ struct nbcon_context { /* members set by caller */ @@ -253,6 +254,7 @@ struct nbcon_context { /* members set by acquire */ struct printk_buffers *pbufs; + u64 seq; }; /** @@ -276,6 +278,7 @@ struct nbcon_context { * @node: hlist node for the console list * * @nbcon_state: State for nbcon consoles + * @nbcon_seq: Sequence number of the next record for nbcon to print * @pbufs: Pointer to nbcon private buffer */ struct console { @@ -299,6 +302,7 @@ struct console { /* nbcon console specific members */ atomic_t __private nbcon_state; + atomic_long_t __private nbcon_seq; struct printk_buffers *pbufs; }; diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index f6161cd75d7d..6473f5ae4a18 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -4,6 +4,7 @@ */ #include #include +#include "printk_ringbuffer.h" #if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL) void __init printk_sysctl_init(void); @@ -42,6 +43,8 @@ enum printk_info_flags { LOG_CONT = 8, /* text is a fragment of a continuation line */ }; +extern struct printk_ringbuffer *prb; + __printf(4, 0) int vprintk_store(int facility, int level, const struct dev_printk_info *dev_info, @@ -69,6 +72,8 @@ void defer_console_output(void); u16 printk_parse_prefix(const char *text, int *level, enum printk_info_flags *flags); +u64 nbcon_seq_read(struct console *con); +void nbcon_seq_force(struct console *con, u64 seq); bool nbcon_alloc(struct console *con); void nbcon_init(struct console *con); void nbcon_free(struct console *con); @@ -88,6 +93,8 @@ void nbcon_free(struct console *con); #define printk_safe_exit_irqrestore(flags) local_irq_restore(flags) static inline bool printk_percpu_data_ready(void) { return false; } +static inline u64 nbcon_seq_read(struct console *con) { return 0; } +static inline void nbcon_seq_force(struct console *con, u64 seq) { } static inline bool nbcon_alloc(struct console *con) { return false; } static inline void nbcon_init(struct console *con) { } static inline void nbcon_free(struct console *con) { } diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 98e4be5429f0..e076096b31c0 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -140,6 +140,101 @@ static inline bool nbcon_state_try_cmpxchg(struct console *con, struct nbcon_sta return atomic_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_state), &cur->atom, new->atom); } +#ifdef CONFIG_64BIT + +#define __seq_to_nbcon_seq(seq) (seq) +#define __nbcon_seq_to_seq(seq) (seq) + +#else /* CONFIG_64BIT */ + +#define __seq_to_nbcon_seq(seq) ((u32)seq) + +static inline u64 __nbcon_seq_to_seq(u32 nbcon_seq) +{ + u64 seq; + u64 rb_next_seq; + + /* + * The provided sequence is only the lower 32 bits of the ringbuffer + * sequence. It needs to be expanded to 64bit. Get the next sequence + * number from the ringbuffer and fold it. + * + * Having a 32bit representation in the console is sufficient. + * If a console ever gets more than 2^31 records behind + * the ringbuffer then this is the least of the problems. + * + * Also the access to the ring buffer is always safe. + */ + rb_next_seq = prb_next_seq(prb); + seq = rb_next_seq - ((u32)rb_next_seq - nbcon_seq); + + return seq; +} + +#endif /* CONFIG_64BIT */ + +/** + * nbcon_seq_read - Read the current console sequence + * @con: Console to read the sequence of + * + * Return: Sequence number of the next record to print on @con. + */ +u64 nbcon_seq_read(struct console *con) +{ + unsigned long nbcon_seq = atomic_long_read(&ACCESS_PRIVATE(con, nbcon_seq)); + + return __nbcon_seq_to_seq(nbcon_seq); +} + +/** + * nbcon_seq_force - Force console sequence to a specific value + * @con: Console to work on + * @seq: Sequence number value to set + * + * Only to be used during init (before registration) or in extreme situations + * (such as panic with CONSOLE_REPLAY_ALL). + */ +void nbcon_seq_force(struct console *con, u64 seq) +{ + /* + * If the specified record no longer exists, the oldest available record + * is chosen. This is especially important on 32bit systems because only + * the lower 32 bits of the sequence number are stored. The upper 32 bits + * are derived from the sequence numbers available in the ringbuffer. + */ + u64 valid_seq = max_t(u64, seq, prb_first_valid_seq(prb)); + + atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), __seq_to_nbcon_seq(valid_seq)); + + /* Clear con->seq since nbcon consoles use con->nbcon_seq instead. */ + con->seq = 0; +} + +/** + * nbcon_seq_try_update - Try to update the console sequence number + * @ctxt: Pointer to an acquire context that contains + * all information about the acquire mode + * @new_seq: The new sequence number to set + * + * @ctxt->seq is updated to the new value of @con::nbcon_seq (expanded to + * the 64bit value). This could be a different value than @new_seq if + * nbcon_seq_force() was used or the current context no longer owns the + * console. In the later case, it will stop printing anyway. + */ +__maybe_unused +static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq) +{ + unsigned long nbcon_seq = __seq_to_nbcon_seq(ctxt->seq); + struct console *con = ctxt->console; + + if (atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_seq), &nbcon_seq, + __seq_to_nbcon_seq(new_seq))) { + ctxt->seq = new_seq; + } else { + ctxt->seq = nbcon_seq_read(con); + } +} + /** * nbcon_context_try_acquire_direct - Try to acquire directly * @ctxt: The context of the caller @@ -510,6 +605,9 @@ out: else ctxt->pbufs = con->pbufs; + /* Set the record sequence for this context to print. */ + ctxt->seq = nbcon_seq_read(ctxt->console); + return true; } @@ -722,6 +820,8 @@ bool nbcon_alloc(struct console *con) * * nbcon_alloc() *must* be called and succeed before this function * is called. + * + * This function expects that the legacy @con->seq has been set. */ void nbcon_init(struct console *con) { @@ -730,6 +830,7 @@ void nbcon_init(struct console *con) /* nbcon_alloc() must have been called and successful! */ BUG_ON(!con->pbufs); + nbcon_seq_force(con, con->seq); nbcon_state_set(con, &state); } diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 1c9720acd960..77857d2118ca 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -494,7 +494,7 @@ _DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS, static struct printk_ringbuffer printk_rb_dynamic; -static struct printk_ringbuffer *prb = &printk_rb_static; +struct printk_ringbuffer *prb = &printk_rb_static; /* * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before @@ -3168,6 +3168,7 @@ void console_flush_on_panic(enum con_flush_mode mode) if (mode == CONSOLE_REPLAY_ALL) { struct console *c; + short flags; int cookie; u64 seq; @@ -3175,11 +3176,17 @@ void console_flush_on_panic(enum con_flush_mode mode) cookie = console_srcu_read_lock(); for_each_console_srcu(c) { - /* - * This is an unsynchronized assignment, but the - * kernel is in "hope and pray" mode anyway. - */ - c->seq = seq; + flags = console_srcu_read_flags(c); + + if (flags & CON_NBCON) { + nbcon_seq_force(c, seq); + } else { + /* + * This is an unsynchronized assignment. On + * panic legacy consoles are only best effort. + */ + c->seq = seq; + } } console_srcu_read_unlock(cookie); } @@ -3750,6 +3757,7 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre struct console *c; u64 last_diff = 0; u64 printk_seq; + short flags; int cookie; u64 diff; u64 seq; @@ -3771,6 +3779,9 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre for_each_console_srcu(c) { if (con && con != c) continue; + + flags = console_srcu_read_flags(c); + /* * If consoles are not usable, it cannot be expected * that they make forward progress, so only increment @@ -3778,7 +3789,13 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre */ if (!console_is_usable(c)) continue; - printk_seq = c->seq; + + if (flags & CON_NBCON) { + printk_seq = nbcon_seq_read(c); + } else { + printk_seq = c->seq; + } + if (printk_seq < seq) diff += seq - printk_seq; } From 06653d57ff283be627a2c769139d73ecc487810f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Sep 2023 21:26:06 +0206 Subject: [PATCH 07/12] printk: nbcon: Add emit function and callback function for atomic printing Implement an emit function for nbcon consoles to output printk messages. It utilizes the lockless printk_get_next_message() and console_prepend_dropped() functions to retrieve/build the output message. The emit function includes the required safety points to check for handover/takeover and calls a new write_atomic callback of the console driver to output the message. It also includes proper handling for updating the nbcon console sequence number. A new nbcon_write_context struct is introduced. This is provided to the write_atomic callback and includes only the information necessary for performing atomic writes. Co-developed-by: John Ogness Signed-off-by: John Ogness Signed-off-by: Thomas Gleixner (Intel) Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230916192007.608398-8-john.ogness@linutronix.de --- include/linux/console.h | 21 ++++++++ kernel/printk/internal.h | 6 +++ kernel/printk/nbcon.c | 106 ++++++++++++++++++++++++++++++++++++++- kernel/printk/printk.c | 9 ++-- 4 files changed, 134 insertions(+), 8 deletions(-) diff --git a/include/linux/console.h b/include/linux/console.h index 20cd486b76ad..14563dcb34b1 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -242,6 +242,7 @@ struct printk_buffers; * be used only with NBCON_PRIO_PANIC @prio. It * might cause a system freeze when the console * is used later. + * @backlog: Ringbuffer has pending records * @pbufs: Pointer to the text buffer for this context * @seq: The sequence number to print for this context */ @@ -252,11 +253,28 @@ struct nbcon_context { enum nbcon_prio prio; unsigned int allow_unsafe_takeover : 1; + /* members set by emit */ + unsigned int backlog : 1; + /* members set by acquire */ struct printk_buffers *pbufs; u64 seq; }; +/** + * struct nbcon_write_context - Context handed to the nbcon write callbacks + * @ctxt: The core console context + * @outbuf: Pointer to the text buffer for output + * @len: Length to write + * @unsafe_takeover: If a hostile takeover in an unsafe state has occurred + */ +struct nbcon_write_context { + struct nbcon_context __private ctxt; + char *outbuf; + unsigned int len; + bool unsafe_takeover; +}; + /** * struct console - The console descriptor structure * @name: The name of the console driver @@ -277,6 +295,7 @@ struct nbcon_context { * @data: Driver private data * @node: hlist node for the console list * + * @write_atomic: Write callback for atomic context * @nbcon_state: State for nbcon consoles * @nbcon_seq: Sequence number of the next record for nbcon to print * @pbufs: Pointer to nbcon private buffer @@ -301,6 +320,8 @@ struct console { struct hlist_node node; /* nbcon console specific members */ + bool (*write_atomic)(struct console *con, + struct nbcon_write_context *wctxt); atomic_t __private nbcon_state; atomic_long_t __private nbcon_seq; struct printk_buffers *pbufs; diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 6473f5ae4a18..6c2afee5ef62 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -130,3 +130,9 @@ struct printk_message { }; bool other_cpu_in_panic(void); +bool printk_get_next_message(struct printk_message *pmsg, u64 seq, + bool is_extended, bool may_supress); + +#ifdef CONFIG_PRINTK +void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped); +#endif diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index e076096b31c0..6e05d263fd22 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -221,7 +221,6 @@ void nbcon_seq_force(struct console *con, u64 seq) * nbcon_seq_force() was used or the current context no longer owns the * console. In the later case, it will stop printing anyway. */ -__maybe_unused static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq) { unsigned long nbcon_seq = __seq_to_nbcon_seq(ctxt->seq); @@ -755,7 +754,6 @@ static bool nbcon_context_can_proceed(struct nbcon_context *ctxt, struct nbcon_s * * Internal helper to avoid duplicated code. */ -__maybe_unused static bool __nbcon_context_update_unsafe(struct nbcon_context *ctxt, bool unsafe) { struct console *con = ctxt->console; @@ -784,6 +782,110 @@ out: return nbcon_context_can_proceed(ctxt, &cur); } +/** + * nbcon_emit_next_record - Emit a record in the acquired context + * @wctxt: The write context that will be handed to the write function + * + * Return: True if this context still owns the console. False if + * ownership was handed over or taken. + * + * When this function returns false then the calling context no longer owns + * the console and is no longer allowed to go forward. In this case it must + * back out immediately and carefully. The buffer content is also no longer + * trusted since it no longer belongs to the calling context. If the caller + * wants to do more it must reacquire the console first. + * + * When true is returned, @wctxt->ctxt.backlog indicates whether there are + * still records pending in the ringbuffer, + */ +__maybe_unused +static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) +{ + struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + struct console *con = ctxt->console; + bool is_extended = console_srcu_read_flags(con) & CON_EXTENDED; + struct printk_message pmsg = { + .pbufs = ctxt->pbufs, + }; + unsigned long con_dropped; + struct nbcon_state cur; + unsigned long dropped; + bool done; + + /* + * The printk buffers are filled within an unsafe section. This + * prevents NBCON_PRIO_NORMAL and NBCON_PRIO_EMERGENCY from + * clobbering each other. + */ + + if (!nbcon_context_enter_unsafe(ctxt)) + return false; + + ctxt->backlog = printk_get_next_message(&pmsg, ctxt->seq, is_extended, true); + if (!ctxt->backlog) + return nbcon_context_exit_unsafe(ctxt); + + /* + * @con->dropped is not protected in case of an unsafe hostile + * takeover. In that situation the update can be racy so + * annotate it accordingly. + */ + con_dropped = data_race(READ_ONCE(con->dropped)); + + dropped = con_dropped + pmsg.dropped; + if (dropped && !is_extended) + console_prepend_dropped(&pmsg, dropped); + + if (!nbcon_context_exit_unsafe(ctxt)) + return false; + + /* For skipped records just update seq/dropped in @con. */ + if (pmsg.outbuf_len == 0) + goto update_con; + + /* Initialize the write context for driver callbacks. */ + wctxt->outbuf = &pmsg.pbufs->outbuf[0]; + wctxt->len = pmsg.outbuf_len; + nbcon_state_read(con, &cur); + wctxt->unsafe_takeover = cur.unsafe_takeover; + + if (con->write_atomic) { + done = con->write_atomic(con, wctxt); + } else { + nbcon_context_release(ctxt); + WARN_ON_ONCE(1); + done = false; + } + + /* If not done, the emit was aborted. */ + if (!done) + return false; + + /* + * Since any dropped message was successfully output, reset the + * dropped count for the console. + */ + dropped = 0; +update_con: + /* + * The dropped count and the sequence number are updated within an + * unsafe section. This limits update races to the panic context and + * allows the panic context to win. + */ + + if (!nbcon_context_enter_unsafe(ctxt)) + return false; + + if (dropped != con_dropped) { + /* Counterpart to the READ_ONCE() above. */ + WRITE_ONCE(con->dropped, dropped); + } + + nbcon_seq_try_update(ctxt, pmsg.seq + 1); + + return nbcon_context_exit_unsafe(ctxt); +} + /** * nbcon_alloc - Allocate buffers needed by the nbcon console * @con: Console to allocate buffers for diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 77857d2118ca..778359b21761 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -698,9 +698,6 @@ out: return len; } -static bool printk_get_next_message(struct printk_message *pmsg, u64 seq, - bool is_extended, bool may_supress); - /* /dev/kmsg - userspace message inject/listen interface */ struct devkmsg_user { atomic64_t seq; @@ -2733,7 +2730,7 @@ static void __console_unlock(void) * If @pmsg->pbufs->outbuf is modified, @pmsg->outbuf_len is updated. */ #ifdef CONFIG_PRINTK -static void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) +void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) { struct printk_buffers *pbufs = pmsg->pbufs; const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf); @@ -2787,8 +2784,8 @@ static void console_prepend_dropped(struct printk_message *pmsg, unsigned long d * of @pmsg are valid. (See the documentation of struct printk_message * for information about the @pmsg fields.) */ -static bool printk_get_next_message(struct printk_message *pmsg, u64 seq, - bool is_extended, bool may_suppress) +bool printk_get_next_message(struct printk_message *pmsg, u64 seq, + bool is_extended, bool may_suppress) { static int panic_console_dropped; From 9757acd0a700ba4a0d16dde4ba820eb052aba1a7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Sep 2023 21:26:07 +0206 Subject: [PATCH 08/12] printk: nbcon: Allow drivers to mark unsafe regions and check state For the write_atomic callback, the console driver may have unsafe regions that need to be appropriately marked. Provide functions that accept the nbcon_write_context struct to allow for the driver to enter and exit unsafe regions. Also provide a function for drivers to check if they are still the owner of the console. Co-developed-by: John Ogness Signed-off-by: John Ogness Signed-off-by: Thomas Gleixner (Intel) Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230916192007.608398-9-john.ogness@linutronix.de --- include/linux/console.h | 10 ++++++ kernel/printk/nbcon.c | 75 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) diff --git a/include/linux/console.h b/include/linux/console.h index 14563dcb34b1..e4fc6f7c1496 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -451,6 +451,16 @@ static inline bool console_is_registered(const struct console *con) lockdep_assert_console_list_lock_held(); \ hlist_for_each_entry(con, &console_list, node) +#ifdef CONFIG_PRINTK +extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt); +extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt); +extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt); +#else +static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; } +static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; } +static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; } +#endif + extern int console_set_on_cmdline; extern struct console *early_console; diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c index 6e05d263fd22..b96077152f49 100644 --- a/kernel/printk/nbcon.c +++ b/kernel/printk/nbcon.c @@ -732,6 +732,41 @@ static bool nbcon_context_can_proceed(struct nbcon_context *ctxt, struct nbcon_s return false; } +/** + * nbcon_can_proceed - Check whether ownership can proceed + * @wctxt: The write context that was handed to the write function + * + * Return: True if this context still owns the console. False if + * ownership was handed over or taken. + * + * It is used in nbcon_enter_unsafe() to make sure that it still owns the + * lock. Also it is used in nbcon_exit_unsafe() to eventually free the lock + * for a higher priority context which asked for the friendly handover. + * + * It can be called inside an unsafe section when the console is just + * temporary in safe state instead of exiting and entering the unsafe state. + * + * Also it can be called in the safe context before doing an expensive safe + * operation. It does not make sense to do the operation when a higher + * priority context took the lock. + * + * When this function returns false then the calling context no longer owns + * the console and is no longer allowed to go forward. In this case it must + * back out immediately and carefully. The buffer content is also no longer + * trusted since it no longer belongs to the calling context. + */ +bool nbcon_can_proceed(struct nbcon_write_context *wctxt) +{ + struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + struct console *con = ctxt->console; + struct nbcon_state cur; + + nbcon_state_read(con, &cur); + + return nbcon_context_can_proceed(ctxt, &cur); +} +EXPORT_SYMBOL_GPL(nbcon_can_proceed); + #define nbcon_context_enter_unsafe(c) __nbcon_context_update_unsafe(c, true) #define nbcon_context_exit_unsafe(c) __nbcon_context_update_unsafe(c, false) @@ -782,6 +817,46 @@ out: return nbcon_context_can_proceed(ctxt, &cur); } +/** + * nbcon_enter_unsafe - Enter an unsafe region in the driver + * @wctxt: The write context that was handed to the write function + * + * Return: True if this context still owns the console. False if + * ownership was handed over or taken. + * + * When this function returns false then the calling context no longer owns + * the console and is no longer allowed to go forward. In this case it must + * back out immediately and carefully. The buffer content is also no longer + * trusted since it no longer belongs to the calling context. + */ +bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) +{ + struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + + return nbcon_context_enter_unsafe(ctxt); +} +EXPORT_SYMBOL_GPL(nbcon_enter_unsafe); + +/** + * nbcon_exit_unsafe - Exit an unsafe region in the driver + * @wctxt: The write context that was handed to the write function + * + * Return: True if this context still owns the console. False if + * ownership was handed over or taken. + * + * When this function returns false then the calling context no longer owns + * the console and is no longer allowed to go forward. In this case it must + * back out immediately and carefully. The buffer content is also no longer + * trusted since it no longer belongs to the calling context. + */ +bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) +{ + struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt); + + return nbcon_context_exit_unsafe(ctxt); +} +EXPORT_SYMBOL_GPL(nbcon_exit_unsafe); + /** * nbcon_emit_next_record - Emit a record in the acquired context * @wctxt: The write context that will be handed to the write function From 98a0465531a5982dd897fd81222a5d3465999951 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 20 Sep 2023 17:58:38 +0206 Subject: [PATCH 09/12] printk: fix illegal pbufs access for !CONFIG_PRINTK When CONFIG_PRINTK is not set, PRINTK_MESSAGE_MAX is 0. This leads to a zero-sized array @outbuf in @printk_shared_pbufs. In console_flush_all() a pointer to the first element of the array is assigned with: char *outbuf = &printk_shared_pbufs.outbuf[0]; For !CONFIG_PRINTK this leads to a compiler warning: warning: array subscript 0 is outside array bounds of 'char[0]' [-Warray-bounds] This is not really dangerous because printk_get_next_message() always returns false for !CONFIG_PRINTK, which leads to @outbuf never being used. However, it makes no sense to even compile these functions for !CONFIG_PRINTK. Extend the existing '#ifdef CONFIG_PRINTK' block to contain the formatting and emitting functions since these have no purpose in !CONFIG_PRINTK. This also allows removing several more !CONFIG_PRINTK dummies as well as moving @suppress_panic_printk into a CONFIG_PRINTK block. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202309201724.M9BMAQIh-lkp@intel.com/ Signed-off-by: John Ogness Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230920155238.670439-1-john.ogness@linutronix.de --- kernel/printk/printk.c | 44 +++++++++++++++++------------------------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 778359b21761..7f40d9122caa 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -102,12 +102,6 @@ DEFINE_STATIC_SRCU(console_srcu); */ int __read_mostly suppress_printk; -/* - * During panic, heavy printk by other CPUs can delay the - * panic and risk deadlock on console resources. - */ -static int __read_mostly suppress_panic_printk; - #ifdef CONFIG_LOCKDEP static struct lockdep_map console_lock_dep_map = { .name = "console_lock" @@ -445,6 +439,12 @@ static int console_msg_format = MSG_FORMAT_DEFAULT; static DEFINE_MUTEX(syslog_lock); #ifdef CONFIG_PRINTK +/* + * During panic, heavy printk by other CPUs can delay the + * panic and risk deadlock on console resources. + */ +static int __read_mostly suppress_panic_printk; + DECLARE_WAIT_QUEUE_HEAD(log_wait); /* All 3 protected by @syslog_lock. */ /* the next printk record to read by syslog(READ) or /proc/kmsg */ @@ -2346,22 +2346,6 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre static u64 syslog_seq; -static size_t record_print_text(const struct printk_record *r, - bool syslog, bool time) -{ - return 0; -} -static ssize_t info_print_ext_header(char *buf, size_t size, - struct printk_info *info) -{ - return 0; -} -static ssize_t msg_print_ext_body(char *buf, size_t size, - char *text, size_t text_len, - struct dev_printk_info *dev_info) { return 0; } -static void console_lock_spinning_enable(void) { } -static int console_lock_spinning_disable_and_check(int cookie) { return 0; } -static bool suppress_message_printing(int level) { return false; } static bool pr_flush(int timeout_ms, bool reset_on_progress) { return true; } static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; } @@ -2715,6 +2699,8 @@ static void __console_unlock(void) up_console_sem(); } +#ifdef CONFIG_PRINTK + /* * Prepend the message in @pmsg->pbufs->outbuf with a "dropped message". This * is achieved by shifting the existing message over and inserting the dropped @@ -2729,7 +2715,6 @@ static void __console_unlock(void) * * If @pmsg->pbufs->outbuf is modified, @pmsg->outbuf_len is updated. */ -#ifdef CONFIG_PRINTK void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) { struct printk_buffers *pbufs = pmsg->pbufs; @@ -2761,9 +2746,6 @@ void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) memcpy(outbuf, scratchbuf, len); pmsg->outbuf_len += len; } -#else -#define console_prepend_dropped(pmsg, dropped) -#endif /* CONFIG_PRINTK */ /* * Read and format the specified record (or a later record if the specified @@ -2921,6 +2903,16 @@ skip: return true; } +#else + +static bool console_emit_next_record(struct console *con, bool *handover, int cookie) +{ + *handover = false; + return false; +} + +#endif /* CONFIG_PRINTK */ + /* * Print out all remaining records to all consoles. * From 29fda1ad2a64a62e1c51d61207396e2de1c67362 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 6 Oct 2023 10:21:51 +0200 Subject: [PATCH 10/12] printk: Reduce pr_flush() pooling time pr_flush() does not guarantee that all messages would really get flushed to the console. The best it could do is to wait with a given timeout.[*] The current interval 100ms for checking the progress might seem too long in some situations. For example, such delays are not appreciated during suspend and resume especially when the consoles have been flushed "long" time before the check. On the other hand, the sleeping wait might be useful in other situations. Especially, it would allow flushing the messages using printk kthreads on the same CPU[*]. Use msleep(1) as a compromise. Also measure the time using jiffies. msleep() does not guarantee precise wakeup after the given delay. It might be much longer, especially for times < 20s. See Documentation/timers/timers-howto.rst for more details. Note that msecs_to_jiffies() already translates a negative value into an infinite timeout. [*] console_unlock() does not guarantee flushing the consoles since the commit dbdda842fe96f893 ("printk: Add console owner and waiter logic to load balance console writes"). It would be possible to guarantee it another way. For example, the spinning might be enabled only when the console_lock has been taken via console_trylock(). But the load balancing is helpful. And more importantly, the flush with a timeout has been added as a preparation step for introducing printk kthreads. Signed-off-by: Petr Mladek Reviewed-by: John Ogness Link: https://lore.kernel.org/r/20231006082151.6969-3-pmladek@suse.com --- kernel/printk/printk.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 1919899b6897..80eb79177bab 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3726,7 +3726,8 @@ late_initcall(printk_late_init); /* If @con is specified, only wait for that console. Otherwise wait for all. */ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { - int remaining = timeout_ms; + unsigned long timeout_jiffies = msecs_to_jiffies(timeout_ms); + unsigned long remaining_jiffies = timeout_jiffies; struct console *c; u64 last_diff = 0; u64 printk_seq; @@ -3743,6 +3744,9 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre console_unlock(); for (;;) { + unsigned long begin_jiffies; + unsigned long slept_jiffies; + diff = 0; /* @@ -3771,24 +3775,20 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre console_srcu_read_unlock(cookie); if (diff != last_diff && reset_on_progress) - remaining = timeout_ms; + remaining_jiffies = timeout_jiffies; console_unlock(); /* Note: @diff is 0 if there are no usable consoles. */ - if (diff == 0 || remaining == 0) + if (diff == 0 || remaining_jiffies == 0) break; - if (remaining < 0) { - /* no timeout limit */ - msleep(100); - } else if (remaining < 100) { - msleep(remaining); - remaining = 0; - } else { - msleep(100); - remaining -= 100; - } + /* msleep(1) might sleep much longer. Check time by jiffies. */ + begin_jiffies = jiffies; + msleep(1); + slept_jiffies = jiffies - begin_jiffies; + + remaining_jiffies -= min(slept_jiffies, remaining_jiffies); last_diff = diff; } From 3f6074cf467f6c2beb7db0b0d685fb53f25ae9aa Mon Sep 17 00:00:00 2001 From: Li kunyu Date: Mon, 23 Oct 2023 14:23:59 +0800 Subject: [PATCH 11/12] =?UTF-8?q?printk:=20printk:=20Remove=20unnecessary?= =?UTF-8?q?=20statements=EF=BC=87len=20=3D=200;=EF=BC=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the following two functions, len has already been assigned a value of 0 when defining the variable, so remove 'len=0;'. Signed-off-by: Li kunyu Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20231023062359.130633-1-kunyu@nfschina.com --- kernel/printk/printk.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 96fc38cb2e84..1c6ed598b480 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1671,7 +1671,6 @@ static int syslog_print_all(char __user *buf, int size, bool clear) prb_rec_init_rd(&r, &info, text, PRINTK_MESSAGE_MAX); - len = 0; prb_for_each_record(seq, prb, seq, &r) { int textlen; @@ -4190,7 +4189,6 @@ bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog, prb_rec_init_rd(&r, &info, buf, size); - len = 0; prb_for_each_record(seq, prb, seq, &r) { if (r.info->seq >= iter->next_seq) break; From 72fcce70faf06dd7442bf0b41b71f071d7fa29a4 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 27 Oct 2023 17:13:58 +0300 Subject: [PATCH 12/12] vsprintf: uninline simple_strntoull(), reorder arguments * uninline simple_strntoull(), gcc overinlines and this function is not performance critical * reorder arguments, so that appending INT_MAX as 4th argument generates very efficient tail call Space savings: add/remove: 1/0 grow/shrink: 0/3 up/down: 27/-179 (-152) Function old new delta simple_strntoll - 27 +27 simple_strtoull 15 10 -5 simple_strtoll 41 7 -34 vsscanf 1930 1790 -140 Signed-off-by: Alexey Dobriyan Reviewed-by: Andy Shevchenko Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/all/82a2af6e-9b6c-4a09-89d7-ca90cc1cdad1@p183/ --- lib/vsprintf.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index afb88b24fa74..3e3733a7084f 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -60,7 +60,8 @@ bool no_hash_pointers __ro_after_init; EXPORT_SYMBOL_GPL(no_hash_pointers); -static noinline unsigned long long simple_strntoull(const char *startp, size_t max_chars, char **endp, unsigned int base) +noinline +static unsigned long long simple_strntoull(const char *startp, char **endp, unsigned int base, size_t max_chars) { const char *cp; unsigned long long result = 0ULL; @@ -95,7 +96,7 @@ static noinline unsigned long long simple_strntoull(const char *startp, size_t m noinline unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base) { - return simple_strntoull(cp, INT_MAX, endp, base); + return simple_strntoull(cp, endp, base, INT_MAX); } EXPORT_SYMBOL(simple_strtoull); @@ -130,8 +131,8 @@ long simple_strtol(const char *cp, char **endp, unsigned int base) } EXPORT_SYMBOL(simple_strtol); -static long long simple_strntoll(const char *cp, size_t max_chars, char **endp, - unsigned int base) +noinline +static long long simple_strntoll(const char *cp, char **endp, unsigned int base, size_t max_chars) { /* * simple_strntoull() safely handles receiving max_chars==0 in the @@ -140,9 +141,9 @@ static long long simple_strntoll(const char *cp, size_t max_chars, char **endp, * and the content of *cp is irrelevant. */ if (*cp == '-' && max_chars > 0) - return -simple_strntoull(cp + 1, max_chars - 1, endp, base); + return -simple_strntoull(cp + 1, endp, base, max_chars - 1); - return simple_strntoull(cp, max_chars, endp, base); + return simple_strntoull(cp, endp, base, max_chars); } /** @@ -155,7 +156,7 @@ static long long simple_strntoll(const char *cp, size_t max_chars, char **endp, */ long long simple_strtoll(const char *cp, char **endp, unsigned int base) { - return simple_strntoll(cp, INT_MAX, endp, base); + return simple_strntoll(cp, endp, base, INT_MAX); } EXPORT_SYMBOL(simple_strtoll); @@ -3648,13 +3649,11 @@ int vsscanf(const char *buf, const char *fmt, va_list args) break; if (is_sign) - val.s = simple_strntoll(str, - field_width >= 0 ? field_width : INT_MAX, - &next, base); + val.s = simple_strntoll(str, &next, base, + field_width >= 0 ? field_width : INT_MAX); else - val.u = simple_strntoull(str, - field_width >= 0 ? field_width : INT_MAX, - &next, base); + val.u = simple_strntoull(str, &next, base, + field_width >= 0 ? field_width : INT_MAX); switch (qualifier) { case 'H': /* that's 'hh' in format */