nbd: Restore request timeout detection (Closes: #770479)

This commit is contained in:
Ben Hutchings 2015-10-08 21:26:03 +01:00
parent 6df52e8fed
commit e3bec54b78
5 changed files with 464 additions and 0 deletions

6
debian/changelog vendored
View File

@ -1,3 +1,9 @@
linux (4.2.3-2) UNRELEASED; urgency=medium
* nbd: Restore request timeout detection (Closes: #770479)
-- Ben Hutchings <ben@decadent.org.uk> Thu, 08 Oct 2015 21:24:14 +0100
linux (4.2.3-1) unstable; urgency=medium
* New upstream stable update:

View File

@ -0,0 +1,134 @@
From: Markus Pargmann <mpa@pengutronix.de>
Date: Tue, 6 Oct 2015 20:03:54 +0200
Subject: nbd: Add locking for tasks
Origin: http://mid.gmane.org/1444154634-24927-1-git-send-email-mpa@pengutronix.de
Bug-Debian: https://bugs.debian.org/770479
The timeout handling introduced in
7e2893a16d3e (nbd: Fix timeout detection)
introduces a race condition which may lead to killing of tasks that are
not in nbd context anymore. This was not observed or reproducable yet.
This patch adds locking to critical use of task_recv and task_send to
avoid killing tasks that already left the NBD thread functions. This
lock is only acquired if a timeout occures or the nbd device
starts/stops.
Reported-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
[bwh: Backported to 4.2: adjust context]
---
drivers/block/nbd.c | 36 ++++++++++++++++++++++++++++++------
1 file changed, 30 insertions(+), 6 deletions(-)
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -60,6 +60,7 @@ struct nbd_device {
int disconnect; /* a disconnect has been requested by user */
struct timer_list timeout_timer;
+ spinlock_t tasks_lock;
struct task_struct *task_recv;
struct task_struct *task_send;
};
@@ -133,21 +134,23 @@ static void sock_shutdown(struct nbd_dev
static void nbd_xmit_timeout(unsigned long arg)
{
struct nbd_device *nbd = (struct nbd_device *)arg;
- struct task_struct *task;
+ unsigned long flags;
if (list_empty(&nbd->queue_head))
return;
nbd->disconnect = 1;
- task = READ_ONCE(nbd->task_recv);
- if (task)
- force_sig(SIGKILL, task);
+ spin_lock_irqsave(&nbd->tasks_lock, flags);
+
+ if (nbd->task_recv)
+ force_sig(SIGKILL, nbd->task_recv);
- task = READ_ONCE(nbd->task_send);
- if (task)
+ if (nbd->task_send)
force_sig(SIGKILL, nbd->task_send);
+ spin_unlock_irqrestore(&nbd->tasks_lock, flags);
+
dev_err(nbd_to_dev(nbd), "Connection timed out, killed receiver and sender, shutting down connection\n");
}
@@ -401,17 +404,24 @@ static int nbd_do_it(struct nbd_device *
{
struct request *req;
int ret;
+ unsigned long flags;
BUG_ON(nbd->magic != NBD_MAGIC);
sk_set_memalloc(nbd->sock->sk);
+ spin_lock_irqsave(&nbd->tasks_lock, flags);
nbd->task_recv = current;
+ spin_unlock_irqrestore(&nbd->tasks_lock, flags);
ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
if (ret) {
dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
+
+ spin_lock_irqsave(&nbd->tasks_lock, flags);
nbd->task_recv = NULL;
+ spin_unlock_irqrestore(&nbd->tasks_lock, flags);
+
return ret;
}
@@ -420,7 +430,9 @@ static int nbd_do_it(struct nbd_device *
device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
+ spin_lock_irqsave(&nbd->tasks_lock, flags);
nbd->task_recv = NULL;
+ spin_unlock_irqrestore(&nbd->tasks_lock, flags);
if (signal_pending(current)) {
siginfo_t info;
@@ -522,8 +534,11 @@ static int nbd_thread(void *data)
{
struct nbd_device *nbd = data;
struct request *req;
+ unsigned long flags;
+ spin_lock_irqsave(&nbd->tasks_lock, flags);
nbd->task_send = current;
+ spin_unlock_irqrestore(&nbd->tasks_lock, flags);
set_user_nice(current, MIN_NICE);
while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
@@ -558,7 +573,15 @@ static int nbd_thread(void *data)
nbd_handle_req(nbd, req);
}
+ spin_lock_irqsave(&nbd->tasks_lock, flags);
nbd->task_send = NULL;
+ spin_unlock_irqrestore(&nbd->tasks_lock, flags);
+
+ /* Clear maybe pending signals */
+ if (signal_pending(current)) {
+ siginfo_t info;
+ dequeue_signal_lock(current, &current->blocked, &info);
+ }
return 0;
}
@@ -878,6 +901,7 @@ static int __init nbd_init(void)
nbd_dev[i].magic = NBD_MAGIC;
INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
spin_lock_init(&nbd_dev[i].queue_lock);
+ spin_lock_init(&nbd_dev[i].tasks_lock);
INIT_LIST_HEAD(&nbd_dev[i].queue_head);
mutex_init(&nbd_dev[i].tx_lock);
init_timer(&nbd_dev[i].timeout_timer);

View File

@ -0,0 +1,239 @@
From: Markus Pargmann <mpa@pengutronix.de>
Date: Mon, 17 Aug 2015 08:20:00 +0200
Subject: nbd: Fix timeout detection
Origin: https://git.kernel.org/linus/7e2893a16d3e71035a38122a77bc55848a29f0e4
Bug-Debian: https://bugs.debian.org/770479
At the moment the nbd timeout just detects hanging tcp operations. This
is not enough to detect a hanging or bad connection as expected of a
timeout.
This patch redesigns the timeout detection to include some more cases.
The timeout is now in relation to replies from the server. If the server
does not send replies within the timeout the connection will be shut
down.
The patch adds a continous timer 'timeout_timer' that is setup in one of
two cases:
- The request list is empty and we are sending the first request out to
the server. We want to have a reply within the given timeout,
otherwise we consider the connection to be dead.
- A server response was received. This means the server is still
communicating with us. The timer is reset to the timeout value.
The timer is not stopped if the list becomes empty. It will just trigger
a timeout which will directly leave the handling routine again as the
request list is empty.
The whole patch does not use any additional explicit locking. The
list_empty() calls are safe to be used concurrently. The timer is locked
internally as we just use mod_timer and del_timer_sync().
The patch is based on the idea of Michal Belczyk with a previous
different implementation.
Cc: Michal Belczyk <belczyk@bsd.krakow.pl>
Cc: Hermann Lauer <Hermann.Lauer@iwr.uni-heidelberg.de>
Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
Tested-by: Hermann Lauer <Hermann.Lauer@iwr.uni-heidelberg.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
drivers/block/nbd.c | 98 ++++++++++++++++++++++++++++++++++++++---------------
1 file changed, 70 insertions(+), 28 deletions(-)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index f169faf..f3536e6 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -59,6 +59,10 @@ struct nbd_device {
pid_t pid; /* pid of nbd-client, if attached */
int xmit_timeout;
int disconnect; /* a disconnect has been requested by user */
+
+ struct timer_list timeout_timer;
+ struct task_struct *task_recv;
+ struct task_struct *task_send;
};
#define NBD_MAGIC 0x68797548
@@ -121,6 +125,7 @@ static void sock_shutdown(struct nbd_device *nbd, int lock)
dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
nbd->sock = NULL;
+ del_timer_sync(&nbd->timeout_timer);
}
if (lock)
mutex_unlock(&nbd->tx_lock);
@@ -128,11 +133,23 @@ static void sock_shutdown(struct nbd_device *nbd, int lock)
static void nbd_xmit_timeout(unsigned long arg)
{
- struct task_struct *task = (struct task_struct *)arg;
+ struct nbd_device *nbd = (struct nbd_device *)arg;
+ struct task_struct *task;
+
+ if (list_empty(&nbd->queue_head))
+ return;
+
+ nbd->disconnect = 1;
+
+ task = READ_ONCE(nbd->task_recv);
+ if (task)
+ force_sig(SIGKILL, task);
- printk(KERN_WARNING "nbd: killing hung xmit (%s, pid: %d)\n",
- task->comm, task->pid);
- force_sig(SIGKILL, task);
+ task = READ_ONCE(nbd->task_send);
+ if (task)
+ force_sig(SIGKILL, nbd->task_send);
+
+ dev_err(nbd_to_dev(nbd), "Connection timed out, killed receiver and sender, shutting down connection\n");
}
/*
@@ -171,33 +188,12 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
msg.msg_controllen = 0;
msg.msg_flags = msg_flags | MSG_NOSIGNAL;
- if (send) {
- struct timer_list ti;
-
- if (nbd->xmit_timeout) {
- init_timer(&ti);
- ti.function = nbd_xmit_timeout;
- ti.data = (unsigned long)current;
- ti.expires = jiffies + nbd->xmit_timeout;
- add_timer(&ti);
- }
+ if (send)
result = kernel_sendmsg(sock, &msg, &iov, 1, size);
- if (nbd->xmit_timeout)
- del_timer_sync(&ti);
- } else
+ else
result = kernel_recvmsg(sock, &msg, &iov, 1, size,
msg.msg_flags);
- if (signal_pending(current)) {
- siginfo_t info;
- printk(KERN_WARNING "nbd (pid %d: %s) got signal %d\n",
- task_pid_nr(current), current->comm,
- dequeue_signal_lock(current, &current->blocked, &info));
- result = -EINTR;
- sock_shutdown(nbd, !send);
- break;
- }
-
if (result <= 0) {
if (result == 0)
result = -EPIPE; /* short read */
@@ -210,6 +206,9 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
sigprocmask(SIG_SETMASK, &oldset, NULL);
tsk_restore_flags(current, pflags, PF_MEMALLOC);
+ if (!send && nbd->xmit_timeout)
+ mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout);
+
return result;
}
@@ -415,12 +414,26 @@ static int nbd_do_it(struct nbd_device *nbd)
return ret;
}
+ nbd->task_recv = current;
+
while ((req = nbd_read_stat(nbd)) != NULL)
nbd_end_request(nbd, req);
+ nbd->task_recv = NULL;
+
+ if (signal_pending(current)) {
+ siginfo_t info;
+
+ ret = dequeue_signal_lock(current, &current->blocked, &info);
+ dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n",
+ task_pid_nr(current), current->comm, ret);
+ sock_shutdown(nbd, 1);
+ ret = -ETIMEDOUT;
+ }
+
device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
nbd->pid = 0;
- return 0;
+ return ret;
}
static void nbd_clear_que(struct nbd_device *nbd)
@@ -482,6 +495,9 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
nbd->active_req = req;
+ if (nbd->xmit_timeout && list_empty_careful(&nbd->queue_head))
+ mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout);
+
if (nbd_send_req(nbd, req) != 0) {
dev_err(disk_to_dev(nbd->disk), "Request send failed\n");
req->errors++;
@@ -508,6 +524,8 @@ static int nbd_thread(void *data)
struct nbd_device *nbd = data;
struct request *req;
+ nbd->task_send = current;
+
set_user_nice(current, MIN_NICE);
while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
/* wait for something to do */
@@ -515,6 +533,18 @@ static int nbd_thread(void *data)
kthread_should_stop() ||
!list_empty(&nbd->waiting_queue));
+ if (signal_pending(current)) {
+ siginfo_t info;
+ int ret;
+
+ ret = dequeue_signal_lock(current, &current->blocked,
+ &info);
+ dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n",
+ task_pid_nr(current), current->comm, ret);
+ sock_shutdown(nbd, 1);
+ break;
+ }
+
/* extract request */
if (list_empty(&nbd->waiting_queue))
continue;
@@ -528,6 +558,9 @@ static int nbd_thread(void *data)
/* handle request */
nbd_handle_req(nbd, req);
}
+
+ nbd->task_send = NULL;
+
return 0;
}
@@ -648,6 +681,12 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
case NBD_SET_TIMEOUT:
nbd->xmit_timeout = arg * HZ;
+ if (arg)
+ mod_timer(&nbd->timeout_timer,
+ jiffies + nbd->xmit_timeout);
+ else
+ del_timer_sync(&nbd->timeout_timer);
+
return 0;
case NBD_SET_FLAGS:
@@ -842,6 +881,9 @@ static int __init nbd_init(void)
spin_lock_init(&nbd_dev[i].queue_lock);
INIT_LIST_HEAD(&nbd_dev[i].queue_head);
mutex_init(&nbd_dev[i].tx_lock);
+ init_timer(&nbd_dev[i].timeout_timer);
+ nbd_dev[i].timeout_timer.function = nbd_xmit_timeout;
+ nbd_dev[i].timeout_timer.data = (unsigned long)&nbd_dev[i];
init_waitqueue_head(&nbd_dev[i].active_wq);
init_waitqueue_head(&nbd_dev[i].waiting_wq);
nbd_dev[i].blksize = 1024;

View File

@ -0,0 +1,82 @@
From: Markus Pargmann <mpa@pengutronix.de>
Date: Mon, 17 Aug 2015 08:20:05 +0200
Subject: nbd: Remove variable 'pid'
Origin: https://git.kernel.org/linus/6521d39a64b3f9c3acb0fd25a34cfaf9a40e548e
Bug-Debian: https://bugs.debian.org/770479
This patch uses nbd->task_recv to determine the value of the previously
used variable 'pid' for sysfs.
Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
[bwh: Backported to 4.2: adjust context]
---
drivers/block/nbd.c | 19 +++++++++----------
1 file changed, 9 insertions(+), 10 deletions(-)
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -56,7 +56,6 @@ struct nbd_device {
struct gendisk *disk;
int blksize;
loff_t bytesize;
- pid_t pid; /* pid of nbd-client, if attached */
int xmit_timeout;
int disconnect; /* a disconnect has been requested by user */
@@ -388,9 +387,9 @@ static ssize_t pid_show(struct device *d
struct device_attribute *attr, char *buf)
{
struct gendisk *disk = dev_to_disk(dev);
+ struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
- return sprintf(buf, "%ld\n",
- (long) ((struct nbd_device *)disk->private_data)->pid);
+ return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv));
}
static struct device_attribute pid_attr = {
@@ -406,19 +405,21 @@ static int nbd_do_it(struct nbd_device *
BUG_ON(nbd->magic != NBD_MAGIC);
sk_set_memalloc(nbd->sock->sk);
- nbd->pid = task_pid_nr(current);
+
+ nbd->task_recv = current;
+
ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
if (ret) {
dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
- nbd->pid = 0;
+ nbd->task_recv = NULL;
return ret;
}
- nbd->task_recv = current;
-
while ((req = nbd_read_stat(nbd)) != NULL)
nbd_end_request(nbd, req);
+ device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
+
nbd->task_recv = NULL;
if (signal_pending(current)) {
@@ -431,8 +432,6 @@ static int nbd_do_it(struct nbd_device *
ret = -ETIMEDOUT;
}
- device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
- nbd->pid = 0;
return ret;
}
@@ -705,7 +704,7 @@ static int __nbd_ioctl(struct block_devi
struct socket *sock;
int error;
- if (nbd->pid)
+ if (nbd->task_recv)
return -EBUSY;
if (!nbd->sock)
return -EINVAL;

View File

@ -104,3 +104,6 @@ bugfix/all/Initialize-msg-shm-IPC-objects-before-doing-ipc_addi.patch
features/all/ath10k-add-qca6164-support.patch
debian/block-fix-abi-change-in-4.2.2.patch
bugfix/x86/crypto-x86-camellia_aesni_avx-fix-cpu-feature-checks.patch
bugfix/all/nbd-fix-timeout-detection.patch
bugfix/all/nbd-remove-variable-pid.patch
bugfix/all/nbd-add-locking-for-tasks.patch