[linux] 01/01: nbd: Restore request timeout detection (Closes: #770479)
debian-kernel at lists.debian.org
debian-kernel at lists.debian.org
Thu Oct 8 20:45:15 UTC 2015
This is an automated email from the git hooks/post-receive script.
benh pushed a commit to branch jessie
in repository linux.
commit cdbfd56110ab489ffec3f11c51e9fcfdb1e0380f
Author: Ben Hutchings <ben at decadent.org.uk>
Date: Thu Oct 8 21:26:03 2015 +0100
nbd: Restore request timeout detection (Closes: #770479)
---
debian/changelog | 3 +
.../bugfix/all/nbd-add-locking-for-tasks.patch | 133 ++++++++++++
.../bugfix/all/nbd-fix-timeout-detection.patch | 239 +++++++++++++++++++++
.../bugfix/all/nbd-remove-variable-pid.patch | 81 +++++++
debian/patches/series | 3 +
5 files changed, 459 insertions(+)
diff --git a/debian/changelog b/debian/changelog
index ac35714..6f921ca 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -3,6 +3,9 @@ linux (3.16.7-ckt17-2) UNRELEASED; urgency=medium
[ Aurelien Jarno ]
* [mips*/octeon] Enable CAVIUM_CN63XXP1 (Closes: #800595)
+ [ Ben Hutchings ]
+ * nbd: Restore request timeout detection (Closes: #770479)
+
-- Aurelien Jarno <aurel32 at debian.org> Sat, 03 Oct 2015 22:32:19 +0200
linux (3.16.7-ckt17-1) jessie; urgency=medium
diff --git a/debian/patches/bugfix/all/nbd-add-locking-for-tasks.patch b/debian/patches/bugfix/all/nbd-add-locking-for-tasks.patch
new file mode 100644
index 0000000..6c7d422
--- /dev/null
+++ b/debian/patches/bugfix/all/nbd-add-locking-for-tasks.patch
@@ -0,0 +1,133 @@
+From: Markus Pargmann <mpa at pengutronix.de>
+Date: Tue, 6 Oct 2015 20:03:54 +0200
+Subject: nbd: Add locking for tasks
+Origin: http://mid.gmane.org/1444154634-24927-1-git-send-email-mpa@pengutronix.de
+Bug-Debian: https://bugs.debian.org/770479
+
+The timeout handling introduced in
+ 7e2893a16d3e (nbd: Fix timeout detection)
+introduces a race condition which may lead to killing of tasks that are
+not in nbd context anymore. This was not observed or reproducable yet.
+
+This patch adds locking to critical use of task_recv and task_send to
+avoid killing tasks that already left the NBD thread functions. This
+lock is only acquired if a timeout occures or the nbd device
+starts/stops.
+
+Reported-by: Ben Hutchings <ben at decadent.org.uk>
+Signed-off-by: Markus Pargmann <mpa at pengutronix.de>
+[bwh: Backported to 3.16: adjust filenames, context]
+---
+--- a/drivers/block/nbd.c
++++ b/drivers/block/nbd.c
+@@ -142,21 +142,23 @@ static void sock_shutdown(struct nbd_dev
+ static void nbd_xmit_timeout(unsigned long arg)
+ {
+ struct nbd_device *nbd = (struct nbd_device *)arg;
+- struct task_struct *task;
++ unsigned long flags;
+
+ if (list_empty(&nbd->queue_head))
+ return;
+
+ nbd->disconnect = 1;
+
+- task = READ_ONCE(nbd->task_recv);
+- if (task)
+- force_sig(SIGKILL, task);
++ spin_lock_irqsave(&nbd->tasks_lock, flags);
+
+- task = READ_ONCE(nbd->task_send);
+- if (task)
++ if (nbd->task_recv)
++ force_sig(SIGKILL, nbd->task_recv);
++
++ if (nbd->task_send)
+ force_sig(SIGKILL, nbd->task_send);
+
++ spin_unlock_irqrestore(&nbd->tasks_lock, flags);
++
+ dev_err(disk_to_dev(nbd->disk), "Connection timed out, killed receiver and sender, shutting down connection\n");
+ }
+
+@@ -408,17 +410,24 @@ static int nbd_do_it(struct nbd_device *
+ {
+ struct request *req;
+ int ret;
++ unsigned long flags;
+
+ BUG_ON(nbd->magic != NBD_MAGIC);
+
+ sk_set_memalloc(nbd->sock->sk);
+
++ spin_lock_irqsave(&nbd->tasks_lock, flags);
+ nbd->task_recv = current;
++ spin_unlock_irqrestore(&nbd->tasks_lock, flags);
+
+ ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
+ if (ret) {
+ dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
++
++ spin_lock_irqsave(&nbd->tasks_lock, flags);
+ nbd->task_recv = NULL;
++ spin_unlock_irqrestore(&nbd->tasks_lock, flags);
++
+ return ret;
+ }
+
+@@ -427,7 +436,9 @@ static int nbd_do_it(struct nbd_device *
+
+ device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
+
++ spin_lock_irqsave(&nbd->tasks_lock, flags);
+ nbd->task_recv = NULL;
++ spin_unlock_irqrestore(&nbd->tasks_lock, flags);
+
+ if (signal_pending(current)) {
+ siginfo_t info;
+@@ -541,8 +552,11 @@ static int nbd_thread(void *data)
+ {
+ struct nbd_device *nbd = data;
+ struct request *req;
++ unsigned long flags;
+
++ spin_lock_irqsave(&nbd->tasks_lock, flags);
+ nbd->task_send = current;
++ spin_unlock_irqrestore(&nbd->tasks_lock, flags);
+
+ set_user_nice(current, MIN_NICE);
+ while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
+@@ -577,7 +591,15 @@ static int nbd_thread(void *data)
+ nbd_handle_req(nbd, req);
+ }
+
++ spin_lock_irqsave(&nbd->tasks_lock, flags);
+ nbd->task_send = NULL;
++ spin_unlock_irqrestore(&nbd->tasks_lock, flags);
++
++ /* Clear maybe pending signals */
++ if (signal_pending(current)) {
++ siginfo_t info;
++ dequeue_signal_lock(current, ¤t->blocked, &info);
++ }
+
+ return 0;
+ }
+@@ -902,6 +924,7 @@ static int __init nbd_init(void)
+ nbd_dev[i].magic = NBD_MAGIC;
+ INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
+ spin_lock_init(&nbd_dev[i].queue_lock);
++ spin_lock_init(&nbd_dev[i].tasks_lock);
+ INIT_LIST_HEAD(&nbd_dev[i].queue_head);
+ mutex_init(&nbd_dev[i].tx_lock);
+ init_timer(&nbd_dev[i].timeout_timer);
+--- a/include/linux/nbd.h
++++ b/include/linux/nbd.h
+@@ -42,6 +42,7 @@ struct nbd_device {
+ int disconnect; /* a disconnect has been requested by user */
+
+ struct timer_list timeout_timer;
++ spinlock_t tasks_lock;
+ struct task_struct *task_recv;
+ struct task_struct *task_send;
+ };
diff --git a/debian/patches/bugfix/all/nbd-fix-timeout-detection.patch b/debian/patches/bugfix/all/nbd-fix-timeout-detection.patch
new file mode 100644
index 0000000..9c8af9b
--- /dev/null
+++ b/debian/patches/bugfix/all/nbd-fix-timeout-detection.patch
@@ -0,0 +1,239 @@
+From: Markus Pargmann <mpa at pengutronix.de>
+Date: Mon, 17 Aug 2015 08:20:00 +0200
+Subject: nbd: Fix timeout detection
+Origin: https://git.kernel.org/linus/7e2893a16d3e71035a38122a77bc55848a29f0e4
+Bug-Debian: https://bugs.debian.org/770479
+
+At the moment the nbd timeout just detects hanging tcp operations. This
+is not enough to detect a hanging or bad connection as expected of a
+timeout.
+
+This patch redesigns the timeout detection to include some more cases.
+The timeout is now in relation to replies from the server. If the server
+does not send replies within the timeout the connection will be shut
+down.
+
+The patch adds a continous timer 'timeout_timer' that is setup in one of
+two cases:
+ - The request list is empty and we are sending the first request out to
+ the server. We want to have a reply within the given timeout,
+ otherwise we consider the connection to be dead.
+ - A server response was received. This means the server is still
+ communicating with us. The timer is reset to the timeout value.
+
+The timer is not stopped if the list becomes empty. It will just trigger
+a timeout which will directly leave the handling routine again as the
+request list is empty.
+
+The whole patch does not use any additional explicit locking. The
+list_empty() calls are safe to be used concurrently. The timer is locked
+internally as we just use mod_timer and del_timer_sync().
+
+The patch is based on the idea of Michal Belczyk with a previous
+different implementation.
+
+Cc: Michal Belczyk <belczyk at bsd.krakow.pl>
+Cc: Hermann Lauer <Hermann.Lauer at iwr.uni-heidelberg.de>
+Signed-off-by: Markus Pargmann <mpa at pengutronix.de>
+Tested-by: Hermann Lauer <Hermann.Lauer at iwr.uni-heidelberg.de>
+Signed-off-by: Jens Axboe <axboe at fb.com>
+[bwh: Backported to 3.16:
+ - Open-code nbd_to_dev()
+ - Adjust filenames, context]
+---
+--- a/drivers/block/nbd.c
++++ b/drivers/block/nbd.c
+@@ -133,6 +133,7 @@ static void sock_shutdown(struct nbd_dev
+ dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
+ kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
+ nbd->sock = NULL;
++ del_timer_sync(&nbd->timeout_timer);
+ }
+ if (lock)
+ mutex_unlock(&nbd->tx_lock);
+@@ -140,11 +141,23 @@ static void sock_shutdown(struct nbd_dev
+
+ static void nbd_xmit_timeout(unsigned long arg)
+ {
+- struct task_struct *task = (struct task_struct *)arg;
++ struct nbd_device *nbd = (struct nbd_device *)arg;
++ struct task_struct *task;
+
+- printk(KERN_WARNING "nbd: killing hung xmit (%s, pid: %d)\n",
+- task->comm, task->pid);
+- force_sig(SIGKILL, task);
++ if (list_empty(&nbd->queue_head))
++ return;
++
++ nbd->disconnect = 1;
++
++ task = READ_ONCE(nbd->task_recv);
++ if (task)
++ force_sig(SIGKILL, task);
++
++ task = READ_ONCE(nbd->task_send);
++ if (task)
++ force_sig(SIGKILL, nbd->task_send);
++
++ dev_err(disk_to_dev(nbd->disk), "Connection timed out, killed receiver and sender, shutting down connection\n");
+ }
+
+ /*
+@@ -183,33 +196,12 @@ static int sock_xmit(struct nbd_device *
+ msg.msg_controllen = 0;
+ msg.msg_flags = msg_flags | MSG_NOSIGNAL;
+
+- if (send) {
+- struct timer_list ti;
+-
+- if (nbd->xmit_timeout) {
+- init_timer(&ti);
+- ti.function = nbd_xmit_timeout;
+- ti.data = (unsigned long)current;
+- ti.expires = jiffies + nbd->xmit_timeout;
+- add_timer(&ti);
+- }
++ if (send)
+ result = kernel_sendmsg(sock, &msg, &iov, 1, size);
+- if (nbd->xmit_timeout)
+- del_timer_sync(&ti);
+- } else
++ else
+ result = kernel_recvmsg(sock, &msg, &iov, 1, size,
+ msg.msg_flags);
+
+- if (signal_pending(current)) {
+- siginfo_t info;
+- printk(KERN_WARNING "nbd (pid %d: %s) got signal %d\n",
+- task_pid_nr(current), current->comm,
+- dequeue_signal_lock(current, ¤t->blocked, &info));
+- result = -EINTR;
+- sock_shutdown(nbd, !send);
+- break;
+- }
+-
+ if (result <= 0) {
+ if (result == 0)
+ result = -EPIPE; /* short read */
+@@ -222,6 +214,9 @@ static int sock_xmit(struct nbd_device *
+ sigprocmask(SIG_SETMASK, &oldset, NULL);
+ tsk_restore_flags(current, pflags, PF_MEMALLOC);
+
++ if (!send && nbd->xmit_timeout)
++ mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout);
++
+ return result;
+ }
+
+@@ -425,12 +420,26 @@ static int nbd_do_it(struct nbd_device *
+ return ret;
+ }
+
++ nbd->task_recv = current;
++
+ while ((req = nbd_read_stat(nbd)) != NULL)
+ nbd_end_request(req);
+
++ nbd->task_recv = NULL;
++
++ if (signal_pending(current)) {
++ siginfo_t info;
++
++ ret = dequeue_signal_lock(current, ¤t->blocked, &info);
++ dev_warn(disk_to_dev(nbd->disk), "pid %d, %s, got signal %d\n",
++ task_pid_nr(current), current->comm, ret);
++ sock_shutdown(nbd, 1);
++ ret = -ETIMEDOUT;
++ }
++
+ device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
+ nbd->pid = 0;
+- return 0;
++ return ret;
+ }
+
+ static void nbd_clear_que(struct nbd_device *nbd)
+@@ -504,6 +513,9 @@ static void nbd_handle_req(struct nbd_de
+
+ nbd->active_req = req;
+
++ if (nbd->xmit_timeout && list_empty_careful(&nbd->queue_head))
++ mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout);
++
+ if (nbd_send_req(nbd, req) != 0) {
+ dev_err(disk_to_dev(nbd->disk), "Request send failed\n");
+ req->errors++;
+@@ -530,6 +542,8 @@ static int nbd_thread(void *data)
+ struct nbd_device *nbd = data;
+ struct request *req;
+
++ nbd->task_send = current;
++
+ set_user_nice(current, MIN_NICE);
+ while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
+ /* wait for something to do */
+@@ -537,6 +551,18 @@ static int nbd_thread(void *data)
+ kthread_should_stop() ||
+ !list_empty(&nbd->waiting_queue));
+
++ if (signal_pending(current)) {
++ siginfo_t info;
++ int ret;
++
++ ret = dequeue_signal_lock(current, ¤t->blocked,
++ &info);
++ dev_warn(disk_to_dev(nbd->disk), "pid %d, %s, got signal %d\n",
++ task_pid_nr(current), current->comm, ret);
++ sock_shutdown(nbd, 1);
++ break;
++ }
++
+ /* extract request */
+ if (list_empty(&nbd->waiting_queue))
+ continue;
+@@ -550,6 +576,9 @@ static int nbd_thread(void *data)
+ /* handle request */
+ nbd_handle_req(nbd, req);
+ }
++
++ nbd->task_send = NULL;
++
+ return 0;
+ }
+
+@@ -671,6 +700,12 @@ static int __nbd_ioctl(struct block_devi
+
+ case NBD_SET_TIMEOUT:
+ nbd->xmit_timeout = arg * HZ;
++ if (arg)
++ mod_timer(&nbd->timeout_timer,
++ jiffies + nbd->xmit_timeout);
++ else
++ del_timer_sync(&nbd->timeout_timer);
++
+ return 0;
+
+ case NBD_SET_FLAGS:
+@@ -869,6 +904,9 @@ static int __init nbd_init(void)
+ spin_lock_init(&nbd_dev[i].queue_lock);
+ INIT_LIST_HEAD(&nbd_dev[i].queue_head);
+ mutex_init(&nbd_dev[i].tx_lock);
++ init_timer(&nbd_dev[i].timeout_timer);
++ nbd_dev[i].timeout_timer.function = nbd_xmit_timeout;
++ nbd_dev[i].timeout_timer.data = (unsigned long)&nbd_dev[i];
+ init_waitqueue_head(&nbd_dev[i].active_wq);
+ init_waitqueue_head(&nbd_dev[i].waiting_wq);
+ nbd_dev[i].blksize = 1024;
+--- a/include/linux/nbd.h
++++ b/include/linux/nbd.h
+@@ -41,6 +41,10 @@ struct nbd_device {
+ pid_t pid; /* pid of nbd-client, if attached */
+ int xmit_timeout;
+ int disconnect; /* a disconnect has been requested by user */
++
++ struct timer_list timeout_timer;
++ struct task_struct *task_recv;
++ struct task_struct *task_send;
+ };
+
+ #endif
diff --git a/debian/patches/bugfix/all/nbd-remove-variable-pid.patch b/debian/patches/bugfix/all/nbd-remove-variable-pid.patch
new file mode 100644
index 0000000..9409d19
--- /dev/null
+++ b/debian/patches/bugfix/all/nbd-remove-variable-pid.patch
@@ -0,0 +1,81 @@
+From: Markus Pargmann <mpa at pengutronix.de>
+Date: Mon, 17 Aug 2015 08:20:05 +0200
+Subject: nbd: Remove variable 'pid'
+Origin: https://git.kernel.org/linus/6521d39a64b3f9c3acb0fd25a34cfaf9a40e548e
+Bug-Debian: https://bugs.debian.org/770479
+
+This patch uses nbd->task_recv to determine the value of the previously
+used variable 'pid' for sysfs.
+
+Signed-off-by: Markus Pargmann <mpa at pengutronix.de>
+Signed-off-by: Jens Axboe <axboe at fb.com>
+[bwh: Backported to 3.16: adjust filenames, context]
+---
+--- a/drivers/block/nbd.c
++++ b/drivers/block/nbd.c
+@@ -394,9 +394,9 @@ static ssize_t pid_show(struct device *d
+ struct device_attribute *attr, char *buf)
+ {
+ struct gendisk *disk = dev_to_disk(dev);
++ struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
+
+- return sprintf(buf, "%ld\n",
+- (long) ((struct nbd_device *)disk->private_data)->pid);
++ return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv));
+ }
+
+ static struct device_attribute pid_attr = {
+@@ -412,19 +412,21 @@ static int nbd_do_it(struct nbd_device *
+ BUG_ON(nbd->magic != NBD_MAGIC);
+
+ sk_set_memalloc(nbd->sock->sk);
+- nbd->pid = task_pid_nr(current);
++
++ nbd->task_recv = current;
++
+ ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
+ if (ret) {
+ dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
+- nbd->pid = 0;
++ nbd->task_recv = NULL;
+ return ret;
+ }
+
+- nbd->task_recv = current;
+-
+ while ((req = nbd_read_stat(nbd)) != NULL)
+ nbd_end_request(req);
+
++ device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
++
+ nbd->task_recv = NULL;
+
+ if (signal_pending(current)) {
+@@ -437,8 +439,6 @@ static int nbd_do_it(struct nbd_device *
+ ret = -ETIMEDOUT;
+ }
+
+- device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
+- nbd->pid = 0;
+ return ret;
+ }
+
+@@ -724,7 +724,7 @@ static int __nbd_ioctl(struct block_devi
+ struct socket *sock;
+ int error;
+
+- if (nbd->pid)
++ if (nbd->task_recv)
+ return -EBUSY;
+ if (!nbd->sock)
+ return -EINVAL;
+--- a/include/linux/nbd.h
++++ b/include/linux/nbd.h
+@@ -38,7 +38,6 @@ struct nbd_device {
+ struct gendisk *disk;
+ int blksize;
+ u64 bytesize;
+- pid_t pid; /* pid of nbd-client, if attached */
+ int xmit_timeout;
+ int disconnect; /* a disconnect has been requested by user */
+
diff --git a/debian/patches/series b/debian/patches/series
index f18329a..1b8217f 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -638,3 +638,6 @@ bugfix/all/dcache-handle-escaped-paths-in-prepend_path.patch
bugfix/all/vfs-test-for-and-handle-paths-that-are-unreachable-f.patch
bugfix/x86/i2c-i801-Use-wait_event_timeout-to-wait-for-interrup.patch
bugfix/all/media-uvcvideo-disable-hardware-timestamps-by-defaul.patch
+bugfix/all/nbd-fix-timeout-detection.patch
+bugfix/all/nbd-remove-variable-pid.patch
+bugfix/all/nbd-add-locking-for-tasks.patch
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/kernel/linux.git
More information about the Kernel-svn-changes
mailing list