[linux] 01/01: nbd: Restore request timeout detection (Closes: #770479)

debian-kernel at lists.debian.org debian-kernel at lists.debian.org
Thu Oct 8 20:45:44 UTC 2015


This is an automated email from the git hooks/post-receive script.

benh pushed a commit to branch sid
in repository linux.

commit e3bec54b78019f0678901c1248b21a1b3a2d63fd
Author: Ben Hutchings <ben at decadent.org.uk>
Date:   Thu Oct 8 21:26:03 2015 +0100

    nbd: Restore request timeout detection (Closes: #770479)
---
 debian/changelog                                   |   6 +
 .../bugfix/all/nbd-add-locking-for-tasks.patch     | 134 ++++++++++++
 .../bugfix/all/nbd-fix-timeout-detection.patch     | 239 +++++++++++++++++++++
 .../bugfix/all/nbd-remove-variable-pid.patch       |  82 +++++++
 debian/patches/series                              |   3 +
 5 files changed, 464 insertions(+)

diff --git a/debian/changelog b/debian/changelog
index 6b76170..b881115 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+linux (4.2.3-2) UNRELEASED; urgency=medium
+
+  * nbd: Restore request timeout detection (Closes: #770479)
+
+ -- Ben Hutchings <ben at decadent.org.uk>  Thu, 08 Oct 2015 21:24:14 +0100
+
 linux (4.2.3-1) unstable; urgency=medium
 
   * New upstream stable update:
diff --git a/debian/patches/bugfix/all/nbd-add-locking-for-tasks.patch b/debian/patches/bugfix/all/nbd-add-locking-for-tasks.patch
new file mode 100644
index 0000000..0023618
--- /dev/null
+++ b/debian/patches/bugfix/all/nbd-add-locking-for-tasks.patch
@@ -0,0 +1,134 @@
+From: Markus Pargmann <mpa at pengutronix.de>
+Date: Tue,  6 Oct 2015 20:03:54 +0200
+Subject: nbd: Add locking for tasks
+Origin: http://mid.gmane.org/1444154634-24927-1-git-send-email-mpa@pengutronix.de
+Bug-Debian: https://bugs.debian.org/770479
+
+The timeout handling introduced in
+	7e2893a16d3e (nbd: Fix timeout detection)
+introduces a race condition which may lead to killing of tasks that are
+not in nbd context anymore. This was not observed or reproducable yet.
+
+This patch adds locking to critical use of task_recv and task_send to
+avoid killing tasks that already left the NBD thread functions. This
+lock is only acquired if a timeout occures or the nbd device
+starts/stops.
+
+Reported-by: Ben Hutchings <ben at decadent.org.uk>
+Signed-off-by: Markus Pargmann <mpa at pengutronix.de>
+[bwh: Backported to 4.2: adjust context]
+---
+ drivers/block/nbd.c | 36 ++++++++++++++++++++++++++++++------
+ 1 file changed, 30 insertions(+), 6 deletions(-)
+
+--- a/drivers/block/nbd.c
++++ b/drivers/block/nbd.c
+@@ -60,6 +60,7 @@ struct nbd_device {
+ 	int disconnect; /* a disconnect has been requested by user */
+ 
+ 	struct timer_list timeout_timer;
++	spinlock_t tasks_lock;
+ 	struct task_struct *task_recv;
+ 	struct task_struct *task_send;
+ };
+@@ -133,21 +134,23 @@ static void sock_shutdown(struct nbd_dev
+ static void nbd_xmit_timeout(unsigned long arg)
+ {
+ 	struct nbd_device *nbd = (struct nbd_device *)arg;
+-	struct task_struct *task;
++	unsigned long flags;
+ 
+ 	if (list_empty(&nbd->queue_head))
+ 		return;
+ 
+ 	nbd->disconnect = 1;
+ 
+-	task = READ_ONCE(nbd->task_recv);
+-	if (task)
+-		force_sig(SIGKILL, task);
++	spin_lock_irqsave(&nbd->tasks_lock, flags);
++
++	if (nbd->task_recv)
++		force_sig(SIGKILL, nbd->task_recv);
+ 
+-	task = READ_ONCE(nbd->task_send);
+-	if (task)
++	if (nbd->task_send)
+ 		force_sig(SIGKILL, nbd->task_send);
+ 
++	spin_unlock_irqrestore(&nbd->tasks_lock, flags);
++
+ 	dev_err(nbd_to_dev(nbd), "Connection timed out, killed receiver and sender, shutting down connection\n");
+ }
+ 
+@@ -401,17 +404,24 @@ static int nbd_do_it(struct nbd_device *
+ {
+ 	struct request *req;
+ 	int ret;
++	unsigned long flags;
+ 
+ 	BUG_ON(nbd->magic != NBD_MAGIC);
+ 
+ 	sk_set_memalloc(nbd->sock->sk);
+ 
++	spin_lock_irqsave(&nbd->tasks_lock, flags);
+ 	nbd->task_recv = current;
++	spin_unlock_irqrestore(&nbd->tasks_lock, flags);
+ 
+ 	ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
+ 	if (ret) {
+ 		dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
++
++		spin_lock_irqsave(&nbd->tasks_lock, flags);
+ 		nbd->task_recv = NULL;
++		spin_unlock_irqrestore(&nbd->tasks_lock, flags);
++
+ 		return ret;
+ 	}
+ 
+@@ -420,7 +430,9 @@ static int nbd_do_it(struct nbd_device *
+ 
+ 	device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
+ 
++	spin_lock_irqsave(&nbd->tasks_lock, flags);
+ 	nbd->task_recv = NULL;
++	spin_unlock_irqrestore(&nbd->tasks_lock, flags);
+ 
+ 	if (signal_pending(current)) {
+ 		siginfo_t info;
+@@ -522,8 +534,11 @@ static int nbd_thread(void *data)
+ {
+ 	struct nbd_device *nbd = data;
+ 	struct request *req;
++	unsigned long flags;
+ 
++	spin_lock_irqsave(&nbd->tasks_lock, flags);
+ 	nbd->task_send = current;
++	spin_unlock_irqrestore(&nbd->tasks_lock, flags);
+ 
+ 	set_user_nice(current, MIN_NICE);
+ 	while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
+@@ -558,7 +573,15 @@ static int nbd_thread(void *data)
+ 		nbd_handle_req(nbd, req);
+ 	}
+ 
++	spin_lock_irqsave(&nbd->tasks_lock, flags);
+ 	nbd->task_send = NULL;
++	spin_unlock_irqrestore(&nbd->tasks_lock, flags);
++
++	/* Clear maybe pending signals */
++	if (signal_pending(current)) {
++		siginfo_t info;
++		dequeue_signal_lock(current, &current->blocked, &info);
++	}
+ 
+ 	return 0;
+ }
+@@ -878,6 +901,7 @@ static int __init nbd_init(void)
+ 		nbd_dev[i].magic = NBD_MAGIC;
+ 		INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
+ 		spin_lock_init(&nbd_dev[i].queue_lock);
++		spin_lock_init(&nbd_dev[i].tasks_lock);
+ 		INIT_LIST_HEAD(&nbd_dev[i].queue_head);
+ 		mutex_init(&nbd_dev[i].tx_lock);
+ 		init_timer(&nbd_dev[i].timeout_timer);
diff --git a/debian/patches/bugfix/all/nbd-fix-timeout-detection.patch b/debian/patches/bugfix/all/nbd-fix-timeout-detection.patch
new file mode 100644
index 0000000..e83dfa3
--- /dev/null
+++ b/debian/patches/bugfix/all/nbd-fix-timeout-detection.patch
@@ -0,0 +1,239 @@
+From: Markus Pargmann <mpa at pengutronix.de>
+Date: Mon, 17 Aug 2015 08:20:00 +0200
+Subject: nbd: Fix timeout detection
+Origin: https://git.kernel.org/linus/7e2893a16d3e71035a38122a77bc55848a29f0e4
+Bug-Debian: https://bugs.debian.org/770479
+
+At the moment the nbd timeout just detects hanging tcp operations. This
+is not enough to detect a hanging or bad connection as expected of a
+timeout.
+
+This patch redesigns the timeout detection to include some more cases.
+The timeout is now in relation to replies from the server. If the server
+does not send replies within the timeout the connection will be shut
+down.
+
+The patch adds a continous timer 'timeout_timer' that is setup in one of
+two cases:
+ - The request list is empty and we are sending the first request out to
+   the server. We want to have a reply within the given timeout,
+   otherwise we consider the connection to be dead.
+ - A server response was received. This means the server is still
+   communicating with us. The timer is reset to the timeout value.
+
+The timer is not stopped if the list becomes empty. It will just trigger
+a timeout which will directly leave the handling routine again as the
+request list is empty.
+
+The whole patch does not use any additional explicit locking. The
+list_empty() calls are safe to be used concurrently. The timer is locked
+internally as we just use mod_timer and del_timer_sync().
+
+The patch is based on the idea of Michal Belczyk with a previous
+different implementation.
+
+Cc: Michal Belczyk <belczyk at bsd.krakow.pl>
+Cc: Hermann Lauer <Hermann.Lauer at iwr.uni-heidelberg.de>
+Signed-off-by: Markus Pargmann <mpa at pengutronix.de>
+Tested-by: Hermann Lauer <Hermann.Lauer at iwr.uni-heidelberg.de>
+Signed-off-by: Jens Axboe <axboe at fb.com>
+---
+ drivers/block/nbd.c | 98 ++++++++++++++++++++++++++++++++++++++---------------
+ 1 file changed, 70 insertions(+), 28 deletions(-)
+
+diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
+index f169faf..f3536e6 100644
+--- a/drivers/block/nbd.c
++++ b/drivers/block/nbd.c
+@@ -59,6 +59,10 @@ struct nbd_device {
+ 	pid_t pid; /* pid of nbd-client, if attached */
+ 	int xmit_timeout;
+ 	int disconnect; /* a disconnect has been requested by user */
++
++	struct timer_list timeout_timer;
++	struct task_struct *task_recv;
++	struct task_struct *task_send;
+ };
+ 
+ #define NBD_MAGIC 0x68797548
+@@ -121,6 +125,7 @@ static void sock_shutdown(struct nbd_device *nbd, int lock)
+ 		dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
+ 		kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
+ 		nbd->sock = NULL;
++		del_timer_sync(&nbd->timeout_timer);
+ 	}
+ 	if (lock)
+ 		mutex_unlock(&nbd->tx_lock);
+@@ -128,11 +133,23 @@ static void sock_shutdown(struct nbd_device *nbd, int lock)
+ 
+ static void nbd_xmit_timeout(unsigned long arg)
+ {
+-	struct task_struct *task = (struct task_struct *)arg;
++	struct nbd_device *nbd = (struct nbd_device *)arg;
++	struct task_struct *task;
++
++	if (list_empty(&nbd->queue_head))
++		return;
++
++	nbd->disconnect = 1;
++
++	task = READ_ONCE(nbd->task_recv);
++	if (task)
++		force_sig(SIGKILL, task);
+ 
+-	printk(KERN_WARNING "nbd: killing hung xmit (%s, pid: %d)\n",
+-		task->comm, task->pid);
+-	force_sig(SIGKILL, task);
++	task = READ_ONCE(nbd->task_send);
++	if (task)
++		force_sig(SIGKILL, nbd->task_send);
++
++	dev_err(nbd_to_dev(nbd), "Connection timed out, killed receiver and sender, shutting down connection\n");
+ }
+ 
+ /*
+@@ -171,33 +188,12 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
+ 		msg.msg_controllen = 0;
+ 		msg.msg_flags = msg_flags | MSG_NOSIGNAL;
+ 
+-		if (send) {
+-			struct timer_list ti;
+-
+-			if (nbd->xmit_timeout) {
+-				init_timer(&ti);
+-				ti.function = nbd_xmit_timeout;
+-				ti.data = (unsigned long)current;
+-				ti.expires = jiffies + nbd->xmit_timeout;
+-				add_timer(&ti);
+-			}
++		if (send)
+ 			result = kernel_sendmsg(sock, &msg, &iov, 1, size);
+-			if (nbd->xmit_timeout)
+-				del_timer_sync(&ti);
+-		} else
++		else
+ 			result = kernel_recvmsg(sock, &msg, &iov, 1, size,
+ 						msg.msg_flags);
+ 
+-		if (signal_pending(current)) {
+-			siginfo_t info;
+-			printk(KERN_WARNING "nbd (pid %d: %s) got signal %d\n",
+-				task_pid_nr(current), current->comm,
+-				dequeue_signal_lock(current, &current->blocked, &info));
+-			result = -EINTR;
+-			sock_shutdown(nbd, !send);
+-			break;
+-		}
+-
+ 		if (result <= 0) {
+ 			if (result == 0)
+ 				result = -EPIPE; /* short read */
+@@ -210,6 +206,9 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
+ 	sigprocmask(SIG_SETMASK, &oldset, NULL);
+ 	tsk_restore_flags(current, pflags, PF_MEMALLOC);
+ 
++	if (!send && nbd->xmit_timeout)
++		mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout);
++
+ 	return result;
+ }
+ 
+@@ -415,12 +414,26 @@ static int nbd_do_it(struct nbd_device *nbd)
+ 		return ret;
+ 	}
+ 
++	nbd->task_recv = current;
++
+ 	while ((req = nbd_read_stat(nbd)) != NULL)
+ 		nbd_end_request(nbd, req);
+ 
++	nbd->task_recv = NULL;
++
++	if (signal_pending(current)) {
++		siginfo_t info;
++
++		ret = dequeue_signal_lock(current, &current->blocked, &info);
++		dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n",
++			 task_pid_nr(current), current->comm, ret);
++		sock_shutdown(nbd, 1);
++		ret = -ETIMEDOUT;
++	}
++
+ 	device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
+ 	nbd->pid = 0;
+-	return 0;
++	return ret;
+ }
+ 
+ static void nbd_clear_que(struct nbd_device *nbd)
+@@ -482,6 +495,9 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
+ 
+ 	nbd->active_req = req;
+ 
++	if (nbd->xmit_timeout && list_empty_careful(&nbd->queue_head))
++		mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout);
++
+ 	if (nbd_send_req(nbd, req) != 0) {
+ 		dev_err(disk_to_dev(nbd->disk), "Request send failed\n");
+ 		req->errors++;
+@@ -508,6 +524,8 @@ static int nbd_thread(void *data)
+ 	struct nbd_device *nbd = data;
+ 	struct request *req;
+ 
++	nbd->task_send = current;
++
+ 	set_user_nice(current, MIN_NICE);
+ 	while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
+ 		/* wait for something to do */
+@@ -515,6 +533,18 @@ static int nbd_thread(void *data)
+ 					 kthread_should_stop() ||
+ 					 !list_empty(&nbd->waiting_queue));
+ 
++		if (signal_pending(current)) {
++			siginfo_t info;
++			int ret;
++
++			ret = dequeue_signal_lock(current, &current->blocked,
++						  &info);
++			dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n",
++				 task_pid_nr(current), current->comm, ret);
++			sock_shutdown(nbd, 1);
++			break;
++		}
++
+ 		/* extract request */
+ 		if (list_empty(&nbd->waiting_queue))
+ 			continue;
+@@ -528,6 +558,9 @@ static int nbd_thread(void *data)
+ 		/* handle request */
+ 		nbd_handle_req(nbd, req);
+ 	}
++
++	nbd->task_send = NULL;
++
+ 	return 0;
+ }
+ 
+@@ -648,6 +681,12 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
+ 
+ 	case NBD_SET_TIMEOUT:
+ 		nbd->xmit_timeout = arg * HZ;
++		if (arg)
++			mod_timer(&nbd->timeout_timer,
++				  jiffies + nbd->xmit_timeout);
++		else
++			del_timer_sync(&nbd->timeout_timer);
++
+ 		return 0;
+ 
+ 	case NBD_SET_FLAGS:
+@@ -842,6 +881,9 @@ static int __init nbd_init(void)
+ 		spin_lock_init(&nbd_dev[i].queue_lock);
+ 		INIT_LIST_HEAD(&nbd_dev[i].queue_head);
+ 		mutex_init(&nbd_dev[i].tx_lock);
++		init_timer(&nbd_dev[i].timeout_timer);
++		nbd_dev[i].timeout_timer.function = nbd_xmit_timeout;
++		nbd_dev[i].timeout_timer.data = (unsigned long)&nbd_dev[i];
+ 		init_waitqueue_head(&nbd_dev[i].active_wq);
+ 		init_waitqueue_head(&nbd_dev[i].waiting_wq);
+ 		nbd_dev[i].blksize = 1024;
diff --git a/debian/patches/bugfix/all/nbd-remove-variable-pid.patch b/debian/patches/bugfix/all/nbd-remove-variable-pid.patch
new file mode 100644
index 0000000..ed94513
--- /dev/null
+++ b/debian/patches/bugfix/all/nbd-remove-variable-pid.patch
@@ -0,0 +1,82 @@
+From: Markus Pargmann <mpa at pengutronix.de>
+Date: Mon, 17 Aug 2015 08:20:05 +0200
+Subject: nbd: Remove variable 'pid'
+Origin: https://git.kernel.org/linus/6521d39a64b3f9c3acb0fd25a34cfaf9a40e548e
+Bug-Debian: https://bugs.debian.org/770479
+
+This patch uses nbd->task_recv to determine the value of the previously
+used variable 'pid' for sysfs.
+
+Signed-off-by: Markus Pargmann <mpa at pengutronix.de>
+Signed-off-by: Jens Axboe <axboe at fb.com>
+[bwh: Backported to 4.2: adjust context]
+---
+ drivers/block/nbd.c | 19 +++++++++----------
+ 1 file changed, 9 insertions(+), 10 deletions(-)
+
+--- a/drivers/block/nbd.c
++++ b/drivers/block/nbd.c
+@@ -56,7 +56,6 @@ struct nbd_device {
+ 	struct gendisk *disk;
+ 	int blksize;
+ 	loff_t bytesize;
+-	pid_t pid; /* pid of nbd-client, if attached */
+ 	int xmit_timeout;
+ 	int disconnect; /* a disconnect has been requested by user */
+ 
+@@ -388,9 +387,9 @@ static ssize_t pid_show(struct device *d
+ 			struct device_attribute *attr, char *buf)
+ {
+ 	struct gendisk *disk = dev_to_disk(dev);
++	struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
+ 
+-	return sprintf(buf, "%ld\n",
+-		(long) ((struct nbd_device *)disk->private_data)->pid);
++	return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv));
+ }
+ 
+ static struct device_attribute pid_attr = {
+@@ -406,19 +405,21 @@ static int nbd_do_it(struct nbd_device *
+ 	BUG_ON(nbd->magic != NBD_MAGIC);
+ 
+ 	sk_set_memalloc(nbd->sock->sk);
+-	nbd->pid = task_pid_nr(current);
++
++	nbd->task_recv = current;
++
+ 	ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
+ 	if (ret) {
+ 		dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
+-		nbd->pid = 0;
++		nbd->task_recv = NULL;
+ 		return ret;
+ 	}
+ 
+-	nbd->task_recv = current;
+-
+ 	while ((req = nbd_read_stat(nbd)) != NULL)
+ 		nbd_end_request(nbd, req);
+ 
++	device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
++
+ 	nbd->task_recv = NULL;
+ 
+ 	if (signal_pending(current)) {
+@@ -431,8 +432,6 @@ static int nbd_do_it(struct nbd_device *
+ 		ret = -ETIMEDOUT;
+ 	}
+ 
+-	device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
+-	nbd->pid = 0;
+ 	return ret;
+ }
+ 
+@@ -705,7 +704,7 @@ static int __nbd_ioctl(struct block_devi
+ 		struct socket *sock;
+ 		int error;
+ 
+-		if (nbd->pid)
++		if (nbd->task_recv)
+ 			return -EBUSY;
+ 		if (!nbd->sock)
+ 			return -EINVAL;
diff --git a/debian/patches/series b/debian/patches/series
index adb15cc..c7350cf 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -104,3 +104,6 @@ bugfix/all/Initialize-msg-shm-IPC-objects-before-doing-ipc_addi.patch
 features/all/ath10k-add-qca6164-support.patch
 debian/block-fix-abi-change-in-4.2.2.patch
 bugfix/x86/crypto-x86-camellia_aesni_avx-fix-cpu-feature-checks.patch
+bugfix/all/nbd-fix-timeout-detection.patch
+bugfix/all/nbd-remove-variable-pid.patch
+bugfix/all/nbd-add-locking-for-tasks.patch

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/kernel/linux.git



More information about the Kernel-svn-changes mailing list