[linux] 01/02: perf: Fix event->ctx locking (CVE-2016-6786)

debian-kernel at lists.debian.org debian-kernel at lists.debian.org
Sat Feb 18 19:55:07 UTC 2017


This is an automated email from the git hooks/post-receive script.

carnil pushed a commit to branch jessie-security
in repository linux.

commit 651fe4f4c4275bfc6273219dfa9692ba01613311
Author: Salvatore Bonaccorso <carnil at debian.org>
Date:   Sat Feb 18 18:26:02 2017 +0100

    perf: Fix event->ctx locking (CVE-2016-6786)
---
 debian/changelog                                   |   6 +
 .../bugfix/all/perf-Fix-event-ctx-locking.patch    | 501 +++++++++++++++++++++
 debian/patches/series                              |   1 +
 3 files changed, 508 insertions(+)

diff --git a/debian/changelog b/debian/changelog
index 3b18875..746341c 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+linux (3.16.39-1+deb8u1) UNRELEASED; urgency=medium
+
+  * perf: Fix event->ctx locking (CVE-2016-6786)
+
+ -- Salvatore Bonaccorso <carnil at debian.org>  Sat, 18 Feb 2017 18:26:58 +0100
+
 linux (3.16.39-1) jessie; urgency=medium
 
   * New upstream stable update:
diff --git a/debian/patches/bugfix/all/perf-Fix-event-ctx-locking.patch b/debian/patches/bugfix/all/perf-Fix-event-ctx-locking.patch
new file mode 100644
index 0000000..d1e342f
--- /dev/null
+++ b/debian/patches/bugfix/all/perf-Fix-event-ctx-locking.patch
@@ -0,0 +1,501 @@
+From: Peter Zijlstra <peterz at infradead.org>
+Date: Fri, 23 Jan 2015 12:24:14 +0100
+Subject: perf: Fix event->ctx locking
+Origin: https://git.kernel.org/linus/f63a8daa5812afef4f06c962351687e1ff9ccb2b
+
+There have been a few reported issues wrt. the lack of locking around
+changing event->ctx. This patch tries to address those.
+
+It avoids the whole rwsem thing; and while it appears to work, please
+give it some thought in review.
+
+What I did fail at is sensible runtime checks on the use of
+event->ctx, the RCU use makes it very hard.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
+Cc: Paul E. McKenney <paulmck at linux.vnet.ibm.com>
+Cc: Jiri Olsa <jolsa at redhat.com>
+Cc: Arnaldo Carvalho de Melo <acme at kernel.org>
+Cc: Linus Torvalds <torvalds at linux-foundation.org>
+Link: http://lkml.kernel.org/r/20150123125834.209535886@infradead.org
+Signed-off-by: Ingo Molnar <mingo at kernel.org>
+[carnil: backport to 3.16, adjust context for 3.16]
+---
+ kernel/events/core.c | 244 +++++++++++++++++++++++++++++++++++++++++++--------
+ 1 file changed, 207 insertions(+), 37 deletions(-)
+
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -908,6 +908,77 @@ static void put_ctx(struct perf_event_co
+ }
+ 
+ /*
++ * Because of perf_event::ctx migration in sys_perf_event_open::move_group and
++ * perf_pmu_migrate_context() we need some magic.
++ *
++ * Those places that change perf_event::ctx will hold both
++ * perf_event_ctx::mutex of the 'old' and 'new' ctx value.
++ *
++ * Lock ordering is by mutex address. There is one other site where
++ * perf_event_context::mutex nests and that is put_event(). But remember that
++ * that is a parent<->child context relation, and migration does not affect
++ * children, therefore these two orderings should not interact.
++ *
++ * The change in perf_event::ctx does not affect children (as claimed above)
++ * because the sys_perf_event_open() case will install a new event and break
++ * the ctx parent<->child relation, and perf_pmu_migrate_context() is only
++ * concerned with cpuctx and that doesn't have children.
++ *
++ * The places that change perf_event::ctx will issue:
++ *
++ *   perf_remove_from_context();
++ *   synchronize_rcu();
++ *   perf_install_in_context();
++ *
++ * to affect the change. The remove_from_context() + synchronize_rcu() should
++ * quiesce the event, after which we can install it in the new location. This
++ * means that only external vectors (perf_fops, prctl) can perturb the event
++ * while in transit. Therefore all such accessors should also acquire
++ * perf_event_context::mutex to serialize against this.
++ *
++ * However; because event->ctx can change while we're waiting to acquire
++ * ctx->mutex we must be careful and use the below perf_event_ctx_lock()
++ * function.
++ *
++ * Lock order:
++ *	task_struct::perf_event_mutex
++ *	  perf_event_context::mutex
++ *	    perf_event_context::lock
++ *	    perf_event::child_mutex;
++ *	    perf_event::mmap_mutex
++ *	    mmap_sem
++ */
++static struct perf_event_context *perf_event_ctx_lock(struct perf_event *event)
++{
++	struct perf_event_context *ctx;
++
++again:
++	rcu_read_lock();
++	ctx = ACCESS_ONCE(event->ctx);
++	if (!atomic_inc_not_zero(&ctx->refcount)) {
++		rcu_read_unlock();
++		goto again;
++	}
++	rcu_read_unlock();
++
++	mutex_lock(&ctx->mutex);
++	if (event->ctx != ctx) {
++		mutex_unlock(&ctx->mutex);
++		put_ctx(ctx);
++		goto again;
++	}
++
++	return ctx;
++}
++
++static void perf_event_ctx_unlock(struct perf_event *event,
++				  struct perf_event_context *ctx)
++{
++	mutex_unlock(&ctx->mutex);
++	put_ctx(ctx);
++}
++
++/*
+  * This must be done under the ctx->lock, such as to serialize against
+  * context_equiv(), therefore we cannot call put_ctx() since that might end up
+  * calling scheduler related locks and ctx->lock nests inside those.
+@@ -1611,7 +1682,7 @@ int __perf_event_disable(void *info)
+  * is the current context on this CPU and preemption is disabled,
+  * hence we can't get into perf_event_task_sched_out for this context.
+  */
+-void perf_event_disable(struct perf_event *event)
++static void _perf_event_disable(struct perf_event *event)
+ {
+ 	struct perf_event_context *ctx = event->ctx;
+ 	struct task_struct *task = ctx->task;
+@@ -1652,6 +1723,19 @@ retry:
+ 	}
+ 	raw_spin_unlock_irq(&ctx->lock);
+ }
++
++/*
++ * Strictly speaking kernel users cannot create groups and therefore this
++ * interface does not need the perf_event_ctx_lock() magic.
++ */
++void perf_event_disable(struct perf_event *event)
++{
++	struct perf_event_context *ctx;
++
++	ctx = perf_event_ctx_lock(event);
++	_perf_event_disable(event);
++	perf_event_ctx_unlock(event, ctx);
++}
+ EXPORT_SYMBOL_GPL(perf_event_disable);
+ 
+ static void perf_set_shadow_time(struct perf_event *event,
+@@ -2112,7 +2196,7 @@ unlock:
+  * perf_event_for_each_child or perf_event_for_each as described
+  * for perf_event_disable.
+  */
+-void perf_event_enable(struct perf_event *event)
++static void _perf_event_enable(struct perf_event *event)
+ {
+ 	struct perf_event_context *ctx = event->ctx;
+ 	struct task_struct *task = ctx->task;
+@@ -2168,9 +2252,21 @@ retry:
+ out:
+ 	raw_spin_unlock_irq(&ctx->lock);
+ }
++
++/*
++ * See perf_event_disable();
++ */
++void perf_event_enable(struct perf_event *event)
++{
++	struct perf_event_context *ctx;
++
++	ctx = perf_event_ctx_lock(event);
++	_perf_event_enable(event);
++	perf_event_ctx_unlock(event, ctx);
++}
+ EXPORT_SYMBOL_GPL(perf_event_enable);
+ 
+-int perf_event_refresh(struct perf_event *event, int refresh)
++static int _perf_event_refresh(struct perf_event *event, int refresh)
+ {
+ 	/*
+ 	 * not supported on inherited events
+@@ -2179,10 +2275,25 @@ int perf_event_refresh(struct perf_event
+ 		return -EINVAL;
+ 
+ 	atomic_add(refresh, &event->event_limit);
+-	perf_event_enable(event);
++	_perf_event_enable(event);
+ 
+ 	return 0;
+ }
++
++/*
++ * See perf_event_disable()
++ */
++int perf_event_refresh(struct perf_event *event, int refresh)
++{
++	struct perf_event_context *ctx;
++	int ret;
++
++	ctx = perf_event_ctx_lock(event);
++	ret = _perf_event_refresh(event, refresh);
++	perf_event_ctx_unlock(event, ctx);
++
++	return ret;
++}
+ EXPORT_SYMBOL_GPL(perf_event_refresh);
+ 
+ static void ctx_sched_out(struct perf_event_context *ctx,
+@@ -3378,7 +3489,16 @@ static void put_event(struct perf_event
+ 	rcu_read_unlock();
+ 
+ 	if (owner) {
+-		mutex_lock(&owner->perf_event_mutex);
++		/*
++		 * If we're here through perf_event_exit_task() we're already
++		 * holding ctx->mutex which would be an inversion wrt. the
++		 * normal lock order.
++		 *
++		 * However we can safely take this lock because its the child
++		 * ctx->mutex.
++		 */
++		mutex_lock_nested(&owner->perf_event_mutex, SINGLE_DEPTH_NESTING);
++
+ 		/*
+ 		 * We have to re-check the event->owner field, if it is cleared
+ 		 * we raced with perf_event_exit_task(), acquiring the mutex
+@@ -3454,12 +3574,13 @@ static int perf_event_read_group(struct
+ 				   u64 read_format, char __user *buf)
+ {
+ 	struct perf_event *leader = event->group_leader, *sub;
+-	int n = 0, size = 0, ret = -EFAULT;
+ 	struct perf_event_context *ctx = leader->ctx;
+-	u64 values[5];
++	int n = 0, size = 0, ret;
+ 	u64 count, enabled, running;
++	u64 values[5];
++
++	lockdep_assert_held(&ctx->mutex);
+ 
+-	mutex_lock(&ctx->mutex);
+ 	count = perf_event_read_value(leader, &enabled, &running);
+ 
+ 	values[n++] = 1 + leader->nr_siblings;
+@@ -3474,7 +3595,7 @@ static int perf_event_read_group(struct
+ 	size = n * sizeof(u64);
+ 
+ 	if (copy_to_user(buf, values, size))
+-		goto unlock;
++		return -EFAULT;
+ 
+ 	ret = size;
+ 
+@@ -3488,14 +3609,11 @@ static int perf_event_read_group(struct
+ 		size = n * sizeof(u64);
+ 
+ 		if (copy_to_user(buf + ret, values, size)) {
+-			ret = -EFAULT;
+-			goto unlock;
++			return -EFAULT;
+ 		}
+ 
+ 		ret += size;
+ 	}
+-unlock:
+-	mutex_unlock(&ctx->mutex);
+ 
+ 	return ret;
+ }
+@@ -3554,8 +3672,14 @@ static ssize_t
+ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+ {
+ 	struct perf_event *event = file->private_data;
++	struct perf_event_context *ctx;
++	int ret;
+ 
+-	return perf_read_hw(event, buf, count);
++	ctx = perf_event_ctx_lock(event);
++	ret = perf_read_hw(event, buf, count);
++	perf_event_ctx_unlock(event, ctx);
++
++	return ret;
+ }
+ 
+ static unsigned int perf_poll(struct file *file, poll_table *wait)
+@@ -3579,7 +3703,7 @@ static unsigned int perf_poll(struct fil
+ 	return events;
+ }
+ 
+-static void perf_event_reset(struct perf_event *event)
++static void _perf_event_reset(struct perf_event *event)
+ {
+ 	(void)perf_event_read(event);
+ 	local64_set(&event->count, 0);
+@@ -3598,6 +3722,7 @@ static void perf_event_for_each_child(st
+ 	struct perf_event *child;
+ 
+ 	WARN_ON_ONCE(event->ctx->parent_ctx);
++
+ 	mutex_lock(&event->child_mutex);
+ 	func(event);
+ 	list_for_each_entry(child, &event->child_list, child_list)
+@@ -3611,14 +3736,13 @@ static void perf_event_for_each(struct p
+ 	struct perf_event_context *ctx = event->ctx;
+ 	struct perf_event *sibling;
+ 
+-	WARN_ON_ONCE(ctx->parent_ctx);
+-	mutex_lock(&ctx->mutex);
++	lockdep_assert_held(&ctx->mutex);
++
+ 	event = event->group_leader;
+ 
+ 	perf_event_for_each_child(event, func);
+ 	list_for_each_entry(sibling, &event->sibling_list, group_entry)
+ 		perf_event_for_each_child(sibling, func);
+-	mutex_unlock(&ctx->mutex);
+ }
+ 
+ struct period_event {
+@@ -3730,25 +3854,24 @@ static int perf_event_set_output(struct
+ 				 struct perf_event *output_event);
+ static int perf_event_set_filter(struct perf_event *event, void __user *arg);
+ 
+-static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
++static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg)
+ {
+-	struct perf_event *event = file->private_data;
+ 	void (*func)(struct perf_event *);
+ 	u32 flags = arg;
+ 
+ 	switch (cmd) {
+ 	case PERF_EVENT_IOC_ENABLE:
+-		func = perf_event_enable;
++		func = _perf_event_enable;
+ 		break;
+ 	case PERF_EVENT_IOC_DISABLE:
+-		func = perf_event_disable;
++		func = _perf_event_disable;
+ 		break;
+ 	case PERF_EVENT_IOC_RESET:
+-		func = perf_event_reset;
++		func = _perf_event_reset;
+ 		break;
+ 
+ 	case PERF_EVENT_IOC_REFRESH:
+-		return perf_event_refresh(event, arg);
++		return _perf_event_refresh(event, arg);
+ 
+ 	case PERF_EVENT_IOC_PERIOD:
+ 		return perf_event_period(event, (u64 __user *)arg);
+@@ -3795,6 +3918,19 @@ static long perf_ioctl(struct file *file
+ 	return 0;
+ }
+ 
++static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
++{
++	struct perf_event *event = file->private_data;
++	struct perf_event_context *ctx;
++	long ret;
++
++	ctx = perf_event_ctx_lock(event);
++	ret = _perf_ioctl(event, cmd, arg);
++	perf_event_ctx_unlock(event, ctx);
++
++	return ret;
++}
++
+ #ifdef CONFIG_COMPAT
+ static long perf_compat_ioctl(struct file *file, unsigned int cmd,
+ 				unsigned long arg)
+@@ -3817,11 +3953,15 @@ static long perf_compat_ioctl(struct fil
+ 
+ int perf_event_task_enable(void)
+ {
++	struct perf_event_context *ctx;
+ 	struct perf_event *event;
+ 
+ 	mutex_lock(&current->perf_event_mutex);
+-	list_for_each_entry(event, &current->perf_event_list, owner_entry)
+-		perf_event_for_each_child(event, perf_event_enable);
++	list_for_each_entry(event, &current->perf_event_list, owner_entry) {
++		ctx = perf_event_ctx_lock(event);
++		perf_event_for_each_child(event, _perf_event_enable);
++		perf_event_ctx_unlock(event, ctx);
++	}
+ 	mutex_unlock(&current->perf_event_mutex);
+ 
+ 	return 0;
+@@ -3829,11 +3969,15 @@ int perf_event_task_enable(void)
+ 
+ int perf_event_task_disable(void)
+ {
++	struct perf_event_context *ctx;
+ 	struct perf_event *event;
+ 
+ 	mutex_lock(&current->perf_event_mutex);
+-	list_for_each_entry(event, &current->perf_event_list, owner_entry)
+-		perf_event_for_each_child(event, perf_event_disable);
++	list_for_each_entry(event, &current->perf_event_list, owner_entry) {
++		ctx = perf_event_ctx_lock(event);
++		perf_event_for_each_child(event, _perf_event_disable);
++		perf_event_ctx_unlock(event, ctx);
++	}
+ 	mutex_unlock(&current->perf_event_mutex);
+ 
+ 	return 0;
+@@ -7163,6 +7307,15 @@ out:
+ 	return ret;
+ }
+ 
++static void mutex_lock_double(struct mutex *a, struct mutex *b)
++{
++	if (b < a)
++		swap(a, b);
++
++	mutex_lock(a);
++	mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
++}
++
+ /**
+  * sys_perf_event_open - open a performance event, associate it to a task/cpu
+  *
+@@ -7178,7 +7331,7 @@ SYSCALL_DEFINE5(perf_event_open,
+ 	struct perf_event *group_leader = NULL, *output_event = NULL;
+ 	struct perf_event *event, *sibling;
+ 	struct perf_event_attr attr;
+-	struct perf_event_context *ctx;
++	struct perf_event_context *ctx, *uninitialized_var(gctx);
+ 	struct file *event_file = NULL;
+ 	struct fd group = {NULL, 0};
+ 	struct task_struct *task = NULL;
+@@ -7377,9 +7530,14 @@ SYSCALL_DEFINE5(perf_event_open,
+ 	}
+ 
+ 	if (move_group) {
+-		struct perf_event_context *gctx = group_leader->ctx;
++		gctx = group_leader->ctx;
++
++		/*
++		 * See perf_event_ctx_lock() for comments on the details
++		 * of swizzling perf_event::ctx.
++		 */
++		mutex_lock_double(&gctx->mutex, &ctx->mutex);
+ 
+-		mutex_lock(&gctx->mutex);
+ 		perf_remove_from_context(group_leader, false);
+ 
+ 		/*
+@@ -7394,15 +7552,19 @@ SYSCALL_DEFINE5(perf_event_open,
+ 			perf_event__state_init(sibling);
+ 			put_ctx(gctx);
+ 		}
+-		mutex_unlock(&gctx->mutex);
+-		put_ctx(gctx);
++	} else {
++		mutex_lock(&ctx->mutex);
+ 	}
+ 
+ 	WARN_ON_ONCE(ctx->parent_ctx);
+-	mutex_lock(&ctx->mutex);
+ 
+ 	if (move_group) {
++		/*
++		 * Wait for everybody to stop referencing the events through
++		 * the old lists, before installing it on new lists.
++		 */
+ 		synchronize_rcu();
++
+ 		perf_install_in_context(ctx, group_leader, group_leader->cpu);
+ 		get_ctx(ctx);
+ 		list_for_each_entry(sibling, &group_leader->sibling_list,
+@@ -7414,6 +7576,11 @@ SYSCALL_DEFINE5(perf_event_open,
+ 
+ 	perf_install_in_context(ctx, event, event->cpu);
+ 	perf_unpin_context(ctx);
++
++	if (move_group) {
++		mutex_unlock(&gctx->mutex);
++		put_ctx(gctx);
++	}
+ 	mutex_unlock(&ctx->mutex);
+ 
+ 	put_online_cpus();
+@@ -7516,7 +7683,11 @@ void perf_pmu_migrate_context(struct pmu
+ 	src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx;
+ 	dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx;
+ 
+-	mutex_lock(&src_ctx->mutex);
++	/*
++	 * See perf_event_ctx_lock() for comments on the details
++	 * of swizzling perf_event::ctx.
++	 */
++	mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex);
+ 	list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
+ 				 event_entry) {
+ 		perf_remove_from_context(event, false);
+@@ -7524,11 +7695,9 @@ void perf_pmu_migrate_context(struct pmu
+ 		put_ctx(src_ctx);
+ 		list_add(&event->migrate_entry, &events);
+ 	}
+-	mutex_unlock(&src_ctx->mutex);
+ 
+ 	synchronize_rcu();
+ 
+-	mutex_lock(&dst_ctx->mutex);
+ 	list_for_each_entry_safe(event, tmp, &events, migrate_entry) {
+ 		list_del(&event->migrate_entry);
+ 		if (event->state >= PERF_EVENT_STATE_OFF)
+@@ -7538,6 +7707,7 @@ void perf_pmu_migrate_context(struct pmu
+ 		get_ctx(dst_ctx);
+ 	}
+ 	mutex_unlock(&dst_ctx->mutex);
++	mutex_unlock(&src_ctx->mutex);
+ }
+ EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);
+ 
diff --git a/debian/patches/series b/debian/patches/series
index 558cb57..1c96fa3 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -679,6 +679,7 @@ bugfix/all/sg_write-bsg_write-is-not-fit-to-be-called-under-ker.patch
 bugfix/x86/kvm-x86-drop-error-recovery-in-em_jmp_far-and-em_ret.patch
 bugfix/all/net-avoid-signed-overflows-for-so_-snd-rcv-bufforce.patch
 bugfix/all/alsa-pcm-call-kill_fasync-in-stream-lock.patch
+bugfix/all/perf-Fix-event-ctx-locking.patch
 
 # Fix ABI changes
 debian/of-fix-abi-changes.patch

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/kernel/linux.git



More information about the Kernel-svn-changes mailing list