diff --git a/debian/changelog b/debian/changelog index 6b761701f..b88111523 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +linux (4.2.3-2) UNRELEASED; urgency=medium + + * nbd: Restore request timeout detection (Closes: #770479) + + -- Ben Hutchings Thu, 08 Oct 2015 21:24:14 +0100 + linux (4.2.3-1) unstable; urgency=medium * New upstream stable update: diff --git a/debian/patches/bugfix/all/nbd-add-locking-for-tasks.patch b/debian/patches/bugfix/all/nbd-add-locking-for-tasks.patch new file mode 100644 index 000000000..00236186f --- /dev/null +++ b/debian/patches/bugfix/all/nbd-add-locking-for-tasks.patch @@ -0,0 +1,134 @@ +From: Markus Pargmann +Date: Tue, 6 Oct 2015 20:03:54 +0200 +Subject: nbd: Add locking for tasks +Origin: http://mid.gmane.org/1444154634-24927-1-git-send-email-mpa@pengutronix.de +Bug-Debian: https://bugs.debian.org/770479 + +The timeout handling introduced in + 7e2893a16d3e (nbd: Fix timeout detection) +introduces a race condition which may lead to killing of tasks that are +not in nbd context anymore. This was not observed or reproducable yet. + +This patch adds locking to critical use of task_recv and task_send to +avoid killing tasks that already left the NBD thread functions. This +lock is only acquired if a timeout occures or the nbd device +starts/stops. + +Reported-by: Ben Hutchings +Signed-off-by: Markus Pargmann +[bwh: Backported to 4.2: adjust context] +--- + drivers/block/nbd.c | 36 ++++++++++++++++++++++++++++++------ + 1 file changed, 30 insertions(+), 6 deletions(-) + +--- a/drivers/block/nbd.c ++++ b/drivers/block/nbd.c +@@ -60,6 +60,7 @@ struct nbd_device { + int disconnect; /* a disconnect has been requested by user */ + + struct timer_list timeout_timer; ++ spinlock_t tasks_lock; + struct task_struct *task_recv; + struct task_struct *task_send; + }; +@@ -133,21 +134,23 @@ static void sock_shutdown(struct nbd_dev + static void nbd_xmit_timeout(unsigned long arg) + { + struct nbd_device *nbd = (struct nbd_device *)arg; +- struct task_struct *task; ++ unsigned long flags; + + if (list_empty(&nbd->queue_head)) + return; + + nbd->disconnect = 1; + +- task = READ_ONCE(nbd->task_recv); +- if (task) +- force_sig(SIGKILL, task); ++ spin_lock_irqsave(&nbd->tasks_lock, flags); ++ ++ if (nbd->task_recv) ++ force_sig(SIGKILL, nbd->task_recv); + +- task = READ_ONCE(nbd->task_send); +- if (task) ++ if (nbd->task_send) + force_sig(SIGKILL, nbd->task_send); + ++ spin_unlock_irqrestore(&nbd->tasks_lock, flags); ++ + dev_err(nbd_to_dev(nbd), "Connection timed out, killed receiver and sender, shutting down connection\n"); + } + +@@ -401,17 +404,24 @@ static int nbd_do_it(struct nbd_device * + { + struct request *req; + int ret; ++ unsigned long flags; + + BUG_ON(nbd->magic != NBD_MAGIC); + + sk_set_memalloc(nbd->sock->sk); + ++ spin_lock_irqsave(&nbd->tasks_lock, flags); + nbd->task_recv = current; ++ spin_unlock_irqrestore(&nbd->tasks_lock, flags); + + ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr); + if (ret) { + dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); ++ ++ spin_lock_irqsave(&nbd->tasks_lock, flags); + nbd->task_recv = NULL; ++ spin_unlock_irqrestore(&nbd->tasks_lock, flags); ++ + return ret; + } + +@@ -420,7 +430,9 @@ static int nbd_do_it(struct nbd_device * + + device_remove_file(disk_to_dev(nbd->disk), &pid_attr); + ++ spin_lock_irqsave(&nbd->tasks_lock, flags); + nbd->task_recv = NULL; ++ spin_unlock_irqrestore(&nbd->tasks_lock, flags); + + if (signal_pending(current)) { + siginfo_t info; +@@ -522,8 +534,11 @@ static int nbd_thread(void *data) + { + struct nbd_device *nbd = data; + struct request *req; ++ unsigned long flags; + ++ spin_lock_irqsave(&nbd->tasks_lock, flags); + nbd->task_send = current; ++ spin_unlock_irqrestore(&nbd->tasks_lock, flags); + + set_user_nice(current, MIN_NICE); + while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) { +@@ -558,7 +573,15 @@ static int nbd_thread(void *data) + nbd_handle_req(nbd, req); + } + ++ spin_lock_irqsave(&nbd->tasks_lock, flags); + nbd->task_send = NULL; ++ spin_unlock_irqrestore(&nbd->tasks_lock, flags); ++ ++ /* Clear maybe pending signals */ ++ if (signal_pending(current)) { ++ siginfo_t info; ++ dequeue_signal_lock(current, ¤t->blocked, &info); ++ } + + return 0; + } +@@ -878,6 +901,7 @@ static int __init nbd_init(void) + nbd_dev[i].magic = NBD_MAGIC; + INIT_LIST_HEAD(&nbd_dev[i].waiting_queue); + spin_lock_init(&nbd_dev[i].queue_lock); ++ spin_lock_init(&nbd_dev[i].tasks_lock); + INIT_LIST_HEAD(&nbd_dev[i].queue_head); + mutex_init(&nbd_dev[i].tx_lock); + init_timer(&nbd_dev[i].timeout_timer); diff --git a/debian/patches/bugfix/all/nbd-fix-timeout-detection.patch b/debian/patches/bugfix/all/nbd-fix-timeout-detection.patch new file mode 100644 index 000000000..e83dfa31e --- /dev/null +++ b/debian/patches/bugfix/all/nbd-fix-timeout-detection.patch @@ -0,0 +1,239 @@ +From: Markus Pargmann +Date: Mon, 17 Aug 2015 08:20:00 +0200 +Subject: nbd: Fix timeout detection +Origin: https://git.kernel.org/linus/7e2893a16d3e71035a38122a77bc55848a29f0e4 +Bug-Debian: https://bugs.debian.org/770479 + +At the moment the nbd timeout just detects hanging tcp operations. This +is not enough to detect a hanging or bad connection as expected of a +timeout. + +This patch redesigns the timeout detection to include some more cases. +The timeout is now in relation to replies from the server. If the server +does not send replies within the timeout the connection will be shut +down. + +The patch adds a continous timer 'timeout_timer' that is setup in one of +two cases: + - The request list is empty and we are sending the first request out to + the server. We want to have a reply within the given timeout, + otherwise we consider the connection to be dead. + - A server response was received. This means the server is still + communicating with us. The timer is reset to the timeout value. + +The timer is not stopped if the list becomes empty. It will just trigger +a timeout which will directly leave the handling routine again as the +request list is empty. + +The whole patch does not use any additional explicit locking. The +list_empty() calls are safe to be used concurrently. The timer is locked +internally as we just use mod_timer and del_timer_sync(). + +The patch is based on the idea of Michal Belczyk with a previous +different implementation. + +Cc: Michal Belczyk +Cc: Hermann Lauer +Signed-off-by: Markus Pargmann +Tested-by: Hermann Lauer +Signed-off-by: Jens Axboe +--- + drivers/block/nbd.c | 98 ++++++++++++++++++++++++++++++++++++++--------------- + 1 file changed, 70 insertions(+), 28 deletions(-) + +diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c +index f169faf..f3536e6 100644 +--- a/drivers/block/nbd.c ++++ b/drivers/block/nbd.c +@@ -59,6 +59,10 @@ struct nbd_device { + pid_t pid; /* pid of nbd-client, if attached */ + int xmit_timeout; + int disconnect; /* a disconnect has been requested by user */ ++ ++ struct timer_list timeout_timer; ++ struct task_struct *task_recv; ++ struct task_struct *task_send; + }; + + #define NBD_MAGIC 0x68797548 +@@ -121,6 +125,7 @@ static void sock_shutdown(struct nbd_device *nbd, int lock) + dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n"); + kernel_sock_shutdown(nbd->sock, SHUT_RDWR); + nbd->sock = NULL; ++ del_timer_sync(&nbd->timeout_timer); + } + if (lock) + mutex_unlock(&nbd->tx_lock); +@@ -128,11 +133,23 @@ static void sock_shutdown(struct nbd_device *nbd, int lock) + + static void nbd_xmit_timeout(unsigned long arg) + { +- struct task_struct *task = (struct task_struct *)arg; ++ struct nbd_device *nbd = (struct nbd_device *)arg; ++ struct task_struct *task; ++ ++ if (list_empty(&nbd->queue_head)) ++ return; ++ ++ nbd->disconnect = 1; ++ ++ task = READ_ONCE(nbd->task_recv); ++ if (task) ++ force_sig(SIGKILL, task); + +- printk(KERN_WARNING "nbd: killing hung xmit (%s, pid: %d)\n", +- task->comm, task->pid); +- force_sig(SIGKILL, task); ++ task = READ_ONCE(nbd->task_send); ++ if (task) ++ force_sig(SIGKILL, nbd->task_send); ++ ++ dev_err(nbd_to_dev(nbd), "Connection timed out, killed receiver and sender, shutting down connection\n"); + } + + /* +@@ -171,33 +188,12 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size, + msg.msg_controllen = 0; + msg.msg_flags = msg_flags | MSG_NOSIGNAL; + +- if (send) { +- struct timer_list ti; +- +- if (nbd->xmit_timeout) { +- init_timer(&ti); +- ti.function = nbd_xmit_timeout; +- ti.data = (unsigned long)current; +- ti.expires = jiffies + nbd->xmit_timeout; +- add_timer(&ti); +- } ++ if (send) + result = kernel_sendmsg(sock, &msg, &iov, 1, size); +- if (nbd->xmit_timeout) +- del_timer_sync(&ti); +- } else ++ else + result = kernel_recvmsg(sock, &msg, &iov, 1, size, + msg.msg_flags); + +- if (signal_pending(current)) { +- siginfo_t info; +- printk(KERN_WARNING "nbd (pid %d: %s) got signal %d\n", +- task_pid_nr(current), current->comm, +- dequeue_signal_lock(current, ¤t->blocked, &info)); +- result = -EINTR; +- sock_shutdown(nbd, !send); +- break; +- } +- + if (result <= 0) { + if (result == 0) + result = -EPIPE; /* short read */ +@@ -210,6 +206,9 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size, + sigprocmask(SIG_SETMASK, &oldset, NULL); + tsk_restore_flags(current, pflags, PF_MEMALLOC); + ++ if (!send && nbd->xmit_timeout) ++ mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout); ++ + return result; + } + +@@ -415,12 +414,26 @@ static int nbd_do_it(struct nbd_device *nbd) + return ret; + } + ++ nbd->task_recv = current; ++ + while ((req = nbd_read_stat(nbd)) != NULL) + nbd_end_request(nbd, req); + ++ nbd->task_recv = NULL; ++ ++ if (signal_pending(current)) { ++ siginfo_t info; ++ ++ ret = dequeue_signal_lock(current, ¤t->blocked, &info); ++ dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n", ++ task_pid_nr(current), current->comm, ret); ++ sock_shutdown(nbd, 1); ++ ret = -ETIMEDOUT; ++ } ++ + device_remove_file(disk_to_dev(nbd->disk), &pid_attr); + nbd->pid = 0; +- return 0; ++ return ret; + } + + static void nbd_clear_que(struct nbd_device *nbd) +@@ -482,6 +495,9 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req) + + nbd->active_req = req; + ++ if (nbd->xmit_timeout && list_empty_careful(&nbd->queue_head)) ++ mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout); ++ + if (nbd_send_req(nbd, req) != 0) { + dev_err(disk_to_dev(nbd->disk), "Request send failed\n"); + req->errors++; +@@ -508,6 +524,8 @@ static int nbd_thread(void *data) + struct nbd_device *nbd = data; + struct request *req; + ++ nbd->task_send = current; ++ + set_user_nice(current, MIN_NICE); + while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) { + /* wait for something to do */ +@@ -515,6 +533,18 @@ static int nbd_thread(void *data) + kthread_should_stop() || + !list_empty(&nbd->waiting_queue)); + ++ if (signal_pending(current)) { ++ siginfo_t info; ++ int ret; ++ ++ ret = dequeue_signal_lock(current, ¤t->blocked, ++ &info); ++ dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n", ++ task_pid_nr(current), current->comm, ret); ++ sock_shutdown(nbd, 1); ++ break; ++ } ++ + /* extract request */ + if (list_empty(&nbd->waiting_queue)) + continue; +@@ -528,6 +558,9 @@ static int nbd_thread(void *data) + /* handle request */ + nbd_handle_req(nbd, req); + } ++ ++ nbd->task_send = NULL; ++ + return 0; + } + +@@ -648,6 +681,12 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, + + case NBD_SET_TIMEOUT: + nbd->xmit_timeout = arg * HZ; ++ if (arg) ++ mod_timer(&nbd->timeout_timer, ++ jiffies + nbd->xmit_timeout); ++ else ++ del_timer_sync(&nbd->timeout_timer); ++ + return 0; + + case NBD_SET_FLAGS: +@@ -842,6 +881,9 @@ static int __init nbd_init(void) + spin_lock_init(&nbd_dev[i].queue_lock); + INIT_LIST_HEAD(&nbd_dev[i].queue_head); + mutex_init(&nbd_dev[i].tx_lock); ++ init_timer(&nbd_dev[i].timeout_timer); ++ nbd_dev[i].timeout_timer.function = nbd_xmit_timeout; ++ nbd_dev[i].timeout_timer.data = (unsigned long)&nbd_dev[i]; + init_waitqueue_head(&nbd_dev[i].active_wq); + init_waitqueue_head(&nbd_dev[i].waiting_wq); + nbd_dev[i].blksize = 1024; diff --git a/debian/patches/bugfix/all/nbd-remove-variable-pid.patch b/debian/patches/bugfix/all/nbd-remove-variable-pid.patch new file mode 100644 index 000000000..ed9451354 --- /dev/null +++ b/debian/patches/bugfix/all/nbd-remove-variable-pid.patch @@ -0,0 +1,82 @@ +From: Markus Pargmann +Date: Mon, 17 Aug 2015 08:20:05 +0200 +Subject: nbd: Remove variable 'pid' +Origin: https://git.kernel.org/linus/6521d39a64b3f9c3acb0fd25a34cfaf9a40e548e +Bug-Debian: https://bugs.debian.org/770479 + +This patch uses nbd->task_recv to determine the value of the previously +used variable 'pid' for sysfs. + +Signed-off-by: Markus Pargmann +Signed-off-by: Jens Axboe +[bwh: Backported to 4.2: adjust context] +--- + drivers/block/nbd.c | 19 +++++++++---------- + 1 file changed, 9 insertions(+), 10 deletions(-) + +--- a/drivers/block/nbd.c ++++ b/drivers/block/nbd.c +@@ -56,7 +56,6 @@ struct nbd_device { + struct gendisk *disk; + int blksize; + loff_t bytesize; +- pid_t pid; /* pid of nbd-client, if attached */ + int xmit_timeout; + int disconnect; /* a disconnect has been requested by user */ + +@@ -388,9 +387,9 @@ static ssize_t pid_show(struct device *d + struct device_attribute *attr, char *buf) + { + struct gendisk *disk = dev_to_disk(dev); ++ struct nbd_device *nbd = (struct nbd_device *)disk->private_data; + +- return sprintf(buf, "%ld\n", +- (long) ((struct nbd_device *)disk->private_data)->pid); ++ return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv)); + } + + static struct device_attribute pid_attr = { +@@ -406,19 +405,21 @@ static int nbd_do_it(struct nbd_device * + BUG_ON(nbd->magic != NBD_MAGIC); + + sk_set_memalloc(nbd->sock->sk); +- nbd->pid = task_pid_nr(current); ++ ++ nbd->task_recv = current; ++ + ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr); + if (ret) { + dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); +- nbd->pid = 0; ++ nbd->task_recv = NULL; + return ret; + } + +- nbd->task_recv = current; +- + while ((req = nbd_read_stat(nbd)) != NULL) + nbd_end_request(nbd, req); + ++ device_remove_file(disk_to_dev(nbd->disk), &pid_attr); ++ + nbd->task_recv = NULL; + + if (signal_pending(current)) { +@@ -431,8 +432,6 @@ static int nbd_do_it(struct nbd_device * + ret = -ETIMEDOUT; + } + +- device_remove_file(disk_to_dev(nbd->disk), &pid_attr); +- nbd->pid = 0; + return ret; + } + +@@ -705,7 +704,7 @@ static int __nbd_ioctl(struct block_devi + struct socket *sock; + int error; + +- if (nbd->pid) ++ if (nbd->task_recv) + return -EBUSY; + if (!nbd->sock) + return -EINVAL; diff --git a/debian/patches/series b/debian/patches/series index adb15cc61..c7350cf35 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -104,3 +104,6 @@ bugfix/all/Initialize-msg-shm-IPC-objects-before-doing-ipc_addi.patch features/all/ath10k-add-qca6164-support.patch debian/block-fix-abi-change-in-4.2.2.patch bugfix/x86/crypto-x86-camellia_aesni_avx-fix-cpu-feature-checks.patch +bugfix/all/nbd-fix-timeout-detection.patch +bugfix/all/nbd-remove-variable-pid.patch +bugfix/all/nbd-add-locking-for-tasks.patch