time-namespace-v5.11

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCX9cwgAAKCRCRxhvAZXjc
 onViAP9CDMQct0RfdpdKOrh4NkxWiheBp7CzVSP1Xfy8KHBslgD/X7kilcthT8PC
 JTJmngrVWoehX+s49kl2PSuuLsGElAo=
 =llnx
 -----END PGP SIGNATURE-----

Merge tag 'time-namespace-v5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux

Pull time namespace updates from Christian Brauner:
 "When time namespaces were introduced we missed to virtualize the
  'btime' field in /proc/stat. This confuses tasks which are in another
  time namespace with a virtualized boottime which is common in some
  container workloads. This contains Michael's series to fix 'btime'
  which Thomas asked me to take through my tree.

  To fix 'btime' virtualization we simply subtract the offset of the
  time namespace's boottime from btime before printing the stats. Note
  that since start_boottime of processes are seconds since boottime and
  the boottime stamp is now shifted according to the time namespace's
  offset, the offset of the time namespace also needs to be applied
  before the process stats are given to userspace. This avoids that
  processes shown by tools such as 'ps' appear as time travelers in the
  corresponding time namespace.

  Selftests are included to verify that btime virtualization in
  /proc/stat works as expected"

* tag 'time-namespace-v5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux:
  namespace: make timens_on_fork() return nothing
  selftests/timens: added selftest for /proc/stat btime
  fs/proc: apply the time namespace offset to /proc/stat btime
  timens: additional helper functions for boottime offset handling
This commit is contained in:
Linus Torvalds 2020-12-14 16:35:39 -08:00
commit 6d93a1971a
6 changed files with 92 additions and 16 deletions

View file

@ -56,6 +56,7 @@
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/time.h>
#include <linux/time_namespace.h>
#include <linux/kernel.h>
#include <linux/kernel_stat.h>
#include <linux/tty.h>
@ -533,8 +534,9 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
priority = task_prio(task);
nice = task_nice(task);
/* convert nsec -> ticks */
start_time = nsec_to_clock_t(task->start_boottime);
/* apply timens offset for boottime and convert nsec -> ticks */
start_time =
nsec_to_clock_t(timens_add_boottime_ns(task->start_boottime));
seq_put_decimal_ull(m, "", pid_nr_ns(pid, ns));
seq_puts(m, " (");

View file

@ -10,6 +10,7 @@
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/time_namespace.h>
#include <linux/irqnr.h>
#include <linux/sched/cputime.h>
#include <linux/tick.h>
@ -118,6 +119,8 @@ static int show_stat(struct seq_file *p, void *v)
irq = softirq = steal = 0;
guest = guest_nice = 0;
getboottime64(&boottime);
/* shift boot timestamp according to the timens offset */
timens_sub_boottime(&boottime);
for_each_possible_cpu(i) {
struct kernel_cpustat kcpustat;

View file

@ -45,7 +45,7 @@ struct time_namespace *copy_time_ns(unsigned long flags,
struct user_namespace *user_ns,
struct time_namespace *old_ns);
void free_time_ns(struct kref *kref);
int timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk);
void timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk);
struct vdso_data *arch_get_vdso_data(void *vvar_page);
static inline void put_time_ns(struct time_namespace *ns)
@ -77,6 +77,20 @@ static inline void timens_add_boottime(struct timespec64 *ts)
*ts = timespec64_add(*ts, ns_offsets->boottime);
}
static inline u64 timens_add_boottime_ns(u64 nsec)
{
struct timens_offsets *ns_offsets = &current->nsproxy->time_ns->offsets;
return nsec + timespec64_to_ns(&ns_offsets->boottime);
}
static inline void timens_sub_boottime(struct timespec64 *ts)
{
struct timens_offsets *ns_offsets = &current->nsproxy->time_ns->offsets;
*ts = timespec64_sub(*ts, ns_offsets->boottime);
}
ktime_t do_timens_ktime_to_host(clockid_t clockid, ktime_t tim,
struct timens_offsets *offsets);
@ -122,14 +136,22 @@ struct time_namespace *copy_time_ns(unsigned long flags,
return old_ns;
}
static inline int timens_on_fork(struct nsproxy *nsproxy,
static inline void timens_on_fork(struct nsproxy *nsproxy,
struct task_struct *tsk)
{
return 0;
return;
}
static inline void timens_add_monotonic(struct timespec64 *ts) { }
static inline void timens_add_boottime(struct timespec64 *ts) { }
static inline u64 timens_add_boottime_ns(u64 nsec)
{
return nsec;
}
static inline void timens_sub_boottime(struct timespec64 *ts) { }
static inline ktime_t timens_ktime_to_host(clockid_t clockid, ktime_t tim)
{
return tim;

View file

@ -153,7 +153,6 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
struct nsproxy *old_ns = tsk->nsproxy;
struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
struct nsproxy *new_ns;
int ret;
if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
CLONE_NEWPID | CLONE_NEWNET |
@ -180,11 +179,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
if (IS_ERR(new_ns))
return PTR_ERR(new_ns);
ret = timens_on_fork(new_ns, tsk);
if (ret) {
free_nsproxy(new_ns);
return ret;
}
timens_on_fork(new_ns, tsk);
tsk->nsproxy = new_ns;
return 0;

View file

@ -308,22 +308,20 @@ static int timens_install(struct nsset *nsset, struct ns_common *new)
return 0;
}
int timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk)
void timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk)
{
struct ns_common *nsc = &nsproxy->time_ns_for_children->ns;
struct time_namespace *ns = to_time_ns(nsc);
/* create_new_namespaces() already incremented the ref counter */
if (nsproxy->time_ns == nsproxy->time_ns_for_children)
return 0;
return;
get_time_ns(ns);
put_time_ns(nsproxy->time_ns);
nsproxy->time_ns = ns;
timens_commit(tsk, ns);
return 0;
}
static struct user_namespace *timens_owner(struct ns_common *ns)

View file

@ -93,6 +93,33 @@ static int read_proc_uptime(struct timespec *uptime)
return 0;
}
static int read_proc_stat_btime(unsigned long long *boottime_sec)
{
FILE *proc;
char line_buf[2048];
proc = fopen("/proc/stat", "r");
if (proc == NULL) {
pr_perror("Unable to open /proc/stat");
return -1;
}
while (fgets(line_buf, 2048, proc)) {
if (sscanf(line_buf, "btime %llu", boottime_sec) != 1)
continue;
fclose(proc);
return 0;
}
if (errno) {
pr_perror("fscanf");
fclose(proc);
return -errno;
}
pr_err("failed to parse /proc/stat");
fclose(proc);
return -1;
}
static int check_uptime(void)
{
struct timespec uptime_new, uptime_old;
@ -123,18 +150,47 @@ static int check_uptime(void)
return 0;
}
static int check_stat_btime(void)
{
unsigned long long btime_new, btime_old;
unsigned long long btime_expected;
if (switch_ns(parent_ns))
return pr_err("switch_ns(%d)", parent_ns);
if (read_proc_stat_btime(&btime_old))
return 1;
if (switch_ns(child_ns))
return pr_err("switch_ns(%d)", child_ns);
if (read_proc_stat_btime(&btime_new))
return 1;
btime_expected = btime_old - TEN_DAYS_IN_SEC;
if (btime_new != btime_expected) {
pr_fail("btime in /proc/stat: old %llu, new %llu [%llu]",
btime_old, btime_new, btime_expected);
return 1;
}
ksft_test_result_pass("Passed for /proc/stat btime\n");
return 0;
}
int main(int argc, char *argv[])
{
int ret = 0;
nscheck();
ksft_set_plan(1);
ksft_set_plan(2);
if (init_namespaces())
return 1;
ret |= check_uptime();
ret |= check_stat_btime();
if (ret)
ksft_exit_fail();