perf/core: Fix race between close() and fork()

Syzcaller reported the following Use-after-Free bug:

	close()						clone()

							  copy_process()
							    perf_event_init_task()
							      perf_event_init_context()
							        mutex_lock(parent_ctx->mutex)
								inherit_task_group()
								  inherit_group()
								    inherit_event()
								      mutex_lock(event->child_mutex)
								      // expose event on child list
								      list_add_tail()
								      mutex_unlock(event->child_mutex)
							        mutex_unlock(parent_ctx->mutex)

							    ...
							    goto bad_fork_*

							  bad_fork_cleanup_perf:
							    perf_event_free_task()

	  perf_release()
	    perf_event_release_kernel()
	      list_for_each_entry()
		mutex_lock(ctx->mutex)
		mutex_lock(event->child_mutex)
		// event is from the failing inherit
		// on the other CPU
		perf_remove_from_context()
		list_move()
		mutex_unlock(event->child_mutex)
		mutex_unlock(ctx->mutex)

							      mutex_lock(ctx->mutex)
							      list_for_each_entry_safe()
							        // event already stolen
							      mutex_unlock(ctx->mutex)

							    delayed_free_task()
							      free_task()

	     list_for_each_entry_safe()
	       list_del()
	       free_event()
	         _free_event()
		   // and so event->hw.target
		   // is the already freed failed clone()
		   if (event->hw.target)
		     put_task_struct(event->hw.target)
		       // WHOOPSIE, already quite dead

Which puts the lie to the the comment on perf_event_free_task():
'unexposed, unused context' not so much.

Which is a 'fun' confluence of fail; copy_process() doing an
unconditional free_task() and not respecting refcounts, and perf having
creative locking. In particular:

  82d94856fa ("perf/core: Fix lock inversion between perf,trace,cpuhp")

seems to have overlooked this 'fun' parade.

Solve it by using the fact that detached events still have a reference
count on their (previous) context. With this perf_event_free_task()
can detect when events have escaped and wait for their destruction.

Debugged-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Reported-by: syzbot+a24c397a29ad22d86c98@syzkaller.appspotmail.com
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Cc: <stable@vger.kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Fixes: 82d94856fa ("perf/core: Fix lock inversion between perf,trace,cpuhp")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Peter Zijlstra 2019-07-13 11:21:25 +02:00 committed by Ingo Molnar
parent e5eb08ac81
commit 1cf8dfe8a6
1 changed files with 41 additions and 8 deletions

View File

@ -4465,12 +4465,20 @@ static void _free_event(struct perf_event *event)
if (event->destroy)
event->destroy(event);
if (event->ctx)
put_ctx(event->ctx);
/*
* Must be after ->destroy(), due to uprobe_perf_close() using
* hw.target.
*/
if (event->hw.target)
put_task_struct(event->hw.target);
/*
* perf_event_free_task() relies on put_ctx() being 'last', in particular
* all task references must be cleaned up.
*/
if (event->ctx)
put_ctx(event->ctx);
exclusive_event_destroy(event);
module_put(event->pmu->module);
@ -4650,8 +4658,17 @@ again:
mutex_unlock(&event->child_mutex);
list_for_each_entry_safe(child, tmp, &free_list, child_list) {
void *var = &child->ctx->refcount;
list_del(&child->child_list);
free_event(child);
/*
* Wake any perf_event_free_task() waiting for this event to be
* freed.
*/
smp_mb(); /* pairs with wait_var_event() */
wake_up_var(var);
}
no_ctx:
@ -11527,11 +11544,11 @@ static void perf_free_event(struct perf_event *event,
}
/*
* Free an unexposed, unused context as created by inheritance by
* perf_event_init_task below, used by fork() in case of fail.
* Free a context as created by inheritance by perf_event_init_task() below,
* used by fork() in case of fail.
*
* Not all locks are strictly required, but take them anyway to be nice and
* help out with the lockdep assertions.
* Even though the task has never lived, the context and events have been
* exposed through the child_list, so we must take care tearing it all down.
*/
void perf_event_free_task(struct task_struct *task)
{
@ -11561,7 +11578,23 @@ void perf_event_free_task(struct task_struct *task)
perf_free_event(event, ctx);
mutex_unlock(&ctx->mutex);
put_ctx(ctx);
/*
* perf_event_release_kernel() could've stolen some of our
* child events and still have them on its free_list. In that
* case we must wait for these events to have been freed (in
* particular all their references to this task must've been
* dropped).
*
* Without this copy_process() will unconditionally free this
* task (irrespective of its reference count) and
* _free_event()'s put_task_struct(event->hw.target) will be a
* use-after-free.
*
* Wait for all events to drop their context reference.
*/
wait_var_event(&ctx->refcount, refcount_read(&ctx->refcount) == 1);
put_ctx(ctx); /* must be last */
}
}