nbd: Restore request timeout detection (Closes: #770479)
This commit is contained in:
parent
6df52e8fed
commit
e3bec54b78
|
@ -1,3 +1,9 @@
|
|||
linux (4.2.3-2) UNRELEASED; urgency=medium
|
||||
|
||||
* nbd: Restore request timeout detection (Closes: #770479)
|
||||
|
||||
-- Ben Hutchings <ben@decadent.org.uk> Thu, 08 Oct 2015 21:24:14 +0100
|
||||
|
||||
linux (4.2.3-1) unstable; urgency=medium
|
||||
|
||||
* New upstream stable update:
|
||||
|
|
|
@ -0,0 +1,134 @@
|
|||
From: Markus Pargmann <mpa@pengutronix.de>
|
||||
Date: Tue, 6 Oct 2015 20:03:54 +0200
|
||||
Subject: nbd: Add locking for tasks
|
||||
Origin: http://mid.gmane.org/1444154634-24927-1-git-send-email-mpa@pengutronix.de
|
||||
Bug-Debian: https://bugs.debian.org/770479
|
||||
|
||||
The timeout handling introduced in
|
||||
7e2893a16d3e (nbd: Fix timeout detection)
|
||||
introduces a race condition which may lead to killing of tasks that are
|
||||
not in nbd context anymore. This was not observed or reproducable yet.
|
||||
|
||||
This patch adds locking to critical use of task_recv and task_send to
|
||||
avoid killing tasks that already left the NBD thread functions. This
|
||||
lock is only acquired if a timeout occures or the nbd device
|
||||
starts/stops.
|
||||
|
||||
Reported-by: Ben Hutchings <ben@decadent.org.uk>
|
||||
Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
|
||||
[bwh: Backported to 4.2: adjust context]
|
||||
---
|
||||
drivers/block/nbd.c | 36 ++++++++++++++++++++++++++++++------
|
||||
1 file changed, 30 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/drivers/block/nbd.c
|
||||
+++ b/drivers/block/nbd.c
|
||||
@@ -60,6 +60,7 @@ struct nbd_device {
|
||||
int disconnect; /* a disconnect has been requested by user */
|
||||
|
||||
struct timer_list timeout_timer;
|
||||
+ spinlock_t tasks_lock;
|
||||
struct task_struct *task_recv;
|
||||
struct task_struct *task_send;
|
||||
};
|
||||
@@ -133,21 +134,23 @@ static void sock_shutdown(struct nbd_dev
|
||||
static void nbd_xmit_timeout(unsigned long arg)
|
||||
{
|
||||
struct nbd_device *nbd = (struct nbd_device *)arg;
|
||||
- struct task_struct *task;
|
||||
+ unsigned long flags;
|
||||
|
||||
if (list_empty(&nbd->queue_head))
|
||||
return;
|
||||
|
||||
nbd->disconnect = 1;
|
||||
|
||||
- task = READ_ONCE(nbd->task_recv);
|
||||
- if (task)
|
||||
- force_sig(SIGKILL, task);
|
||||
+ spin_lock_irqsave(&nbd->tasks_lock, flags);
|
||||
+
|
||||
+ if (nbd->task_recv)
|
||||
+ force_sig(SIGKILL, nbd->task_recv);
|
||||
|
||||
- task = READ_ONCE(nbd->task_send);
|
||||
- if (task)
|
||||
+ if (nbd->task_send)
|
||||
force_sig(SIGKILL, nbd->task_send);
|
||||
|
||||
+ spin_unlock_irqrestore(&nbd->tasks_lock, flags);
|
||||
+
|
||||
dev_err(nbd_to_dev(nbd), "Connection timed out, killed receiver and sender, shutting down connection\n");
|
||||
}
|
||||
|
||||
@@ -401,17 +404,24 @@ static int nbd_do_it(struct nbd_device *
|
||||
{
|
||||
struct request *req;
|
||||
int ret;
|
||||
+ unsigned long flags;
|
||||
|
||||
BUG_ON(nbd->magic != NBD_MAGIC);
|
||||
|
||||
sk_set_memalloc(nbd->sock->sk);
|
||||
|
||||
+ spin_lock_irqsave(&nbd->tasks_lock, flags);
|
||||
nbd->task_recv = current;
|
||||
+ spin_unlock_irqrestore(&nbd->tasks_lock, flags);
|
||||
|
||||
ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
|
||||
if (ret) {
|
||||
dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
|
||||
+
|
||||
+ spin_lock_irqsave(&nbd->tasks_lock, flags);
|
||||
nbd->task_recv = NULL;
|
||||
+ spin_unlock_irqrestore(&nbd->tasks_lock, flags);
|
||||
+
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -420,7 +430,9 @@ static int nbd_do_it(struct nbd_device *
|
||||
|
||||
device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
|
||||
|
||||
+ spin_lock_irqsave(&nbd->tasks_lock, flags);
|
||||
nbd->task_recv = NULL;
|
||||
+ spin_unlock_irqrestore(&nbd->tasks_lock, flags);
|
||||
|
||||
if (signal_pending(current)) {
|
||||
siginfo_t info;
|
||||
@@ -522,8 +534,11 @@ static int nbd_thread(void *data)
|
||||
{
|
||||
struct nbd_device *nbd = data;
|
||||
struct request *req;
|
||||
+ unsigned long flags;
|
||||
|
||||
+ spin_lock_irqsave(&nbd->tasks_lock, flags);
|
||||
nbd->task_send = current;
|
||||
+ spin_unlock_irqrestore(&nbd->tasks_lock, flags);
|
||||
|
||||
set_user_nice(current, MIN_NICE);
|
||||
while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
|
||||
@@ -558,7 +573,15 @@ static int nbd_thread(void *data)
|
||||
nbd_handle_req(nbd, req);
|
||||
}
|
||||
|
||||
+ spin_lock_irqsave(&nbd->tasks_lock, flags);
|
||||
nbd->task_send = NULL;
|
||||
+ spin_unlock_irqrestore(&nbd->tasks_lock, flags);
|
||||
+
|
||||
+ /* Clear maybe pending signals */
|
||||
+ if (signal_pending(current)) {
|
||||
+ siginfo_t info;
|
||||
+ dequeue_signal_lock(current, ¤t->blocked, &info);
|
||||
+ }
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -878,6 +901,7 @@ static int __init nbd_init(void)
|
||||
nbd_dev[i].magic = NBD_MAGIC;
|
||||
INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
|
||||
spin_lock_init(&nbd_dev[i].queue_lock);
|
||||
+ spin_lock_init(&nbd_dev[i].tasks_lock);
|
||||
INIT_LIST_HEAD(&nbd_dev[i].queue_head);
|
||||
mutex_init(&nbd_dev[i].tx_lock);
|
||||
init_timer(&nbd_dev[i].timeout_timer);
|
|
@ -0,0 +1,239 @@
|
|||
From: Markus Pargmann <mpa@pengutronix.de>
|
||||
Date: Mon, 17 Aug 2015 08:20:00 +0200
|
||||
Subject: nbd: Fix timeout detection
|
||||
Origin: https://git.kernel.org/linus/7e2893a16d3e71035a38122a77bc55848a29f0e4
|
||||
Bug-Debian: https://bugs.debian.org/770479
|
||||
|
||||
At the moment the nbd timeout just detects hanging tcp operations. This
|
||||
is not enough to detect a hanging or bad connection as expected of a
|
||||
timeout.
|
||||
|
||||
This patch redesigns the timeout detection to include some more cases.
|
||||
The timeout is now in relation to replies from the server. If the server
|
||||
does not send replies within the timeout the connection will be shut
|
||||
down.
|
||||
|
||||
The patch adds a continous timer 'timeout_timer' that is setup in one of
|
||||
two cases:
|
||||
- The request list is empty and we are sending the first request out to
|
||||
the server. We want to have a reply within the given timeout,
|
||||
otherwise we consider the connection to be dead.
|
||||
- A server response was received. This means the server is still
|
||||
communicating with us. The timer is reset to the timeout value.
|
||||
|
||||
The timer is not stopped if the list becomes empty. It will just trigger
|
||||
a timeout which will directly leave the handling routine again as the
|
||||
request list is empty.
|
||||
|
||||
The whole patch does not use any additional explicit locking. The
|
||||
list_empty() calls are safe to be used concurrently. The timer is locked
|
||||
internally as we just use mod_timer and del_timer_sync().
|
||||
|
||||
The patch is based on the idea of Michal Belczyk with a previous
|
||||
different implementation.
|
||||
|
||||
Cc: Michal Belczyk <belczyk@bsd.krakow.pl>
|
||||
Cc: Hermann Lauer <Hermann.Lauer@iwr.uni-heidelberg.de>
|
||||
Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
|
||||
Tested-by: Hermann Lauer <Hermann.Lauer@iwr.uni-heidelberg.de>
|
||||
Signed-off-by: Jens Axboe <axboe@fb.com>
|
||||
---
|
||||
drivers/block/nbd.c | 98 ++++++++++++++++++++++++++++++++++++++---------------
|
||||
1 file changed, 70 insertions(+), 28 deletions(-)
|
||||
|
||||
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
|
||||
index f169faf..f3536e6 100644
|
||||
--- a/drivers/block/nbd.c
|
||||
+++ b/drivers/block/nbd.c
|
||||
@@ -59,6 +59,10 @@ struct nbd_device {
|
||||
pid_t pid; /* pid of nbd-client, if attached */
|
||||
int xmit_timeout;
|
||||
int disconnect; /* a disconnect has been requested by user */
|
||||
+
|
||||
+ struct timer_list timeout_timer;
|
||||
+ struct task_struct *task_recv;
|
||||
+ struct task_struct *task_send;
|
||||
};
|
||||
|
||||
#define NBD_MAGIC 0x68797548
|
||||
@@ -121,6 +125,7 @@ static void sock_shutdown(struct nbd_device *nbd, int lock)
|
||||
dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
|
||||
kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
|
||||
nbd->sock = NULL;
|
||||
+ del_timer_sync(&nbd->timeout_timer);
|
||||
}
|
||||
if (lock)
|
||||
mutex_unlock(&nbd->tx_lock);
|
||||
@@ -128,11 +133,23 @@ static void sock_shutdown(struct nbd_device *nbd, int lock)
|
||||
|
||||
static void nbd_xmit_timeout(unsigned long arg)
|
||||
{
|
||||
- struct task_struct *task = (struct task_struct *)arg;
|
||||
+ struct nbd_device *nbd = (struct nbd_device *)arg;
|
||||
+ struct task_struct *task;
|
||||
+
|
||||
+ if (list_empty(&nbd->queue_head))
|
||||
+ return;
|
||||
+
|
||||
+ nbd->disconnect = 1;
|
||||
+
|
||||
+ task = READ_ONCE(nbd->task_recv);
|
||||
+ if (task)
|
||||
+ force_sig(SIGKILL, task);
|
||||
|
||||
- printk(KERN_WARNING "nbd: killing hung xmit (%s, pid: %d)\n",
|
||||
- task->comm, task->pid);
|
||||
- force_sig(SIGKILL, task);
|
||||
+ task = READ_ONCE(nbd->task_send);
|
||||
+ if (task)
|
||||
+ force_sig(SIGKILL, nbd->task_send);
|
||||
+
|
||||
+ dev_err(nbd_to_dev(nbd), "Connection timed out, killed receiver and sender, shutting down connection\n");
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -171,33 +188,12 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
|
||||
msg.msg_controllen = 0;
|
||||
msg.msg_flags = msg_flags | MSG_NOSIGNAL;
|
||||
|
||||
- if (send) {
|
||||
- struct timer_list ti;
|
||||
-
|
||||
- if (nbd->xmit_timeout) {
|
||||
- init_timer(&ti);
|
||||
- ti.function = nbd_xmit_timeout;
|
||||
- ti.data = (unsigned long)current;
|
||||
- ti.expires = jiffies + nbd->xmit_timeout;
|
||||
- add_timer(&ti);
|
||||
- }
|
||||
+ if (send)
|
||||
result = kernel_sendmsg(sock, &msg, &iov, 1, size);
|
||||
- if (nbd->xmit_timeout)
|
||||
- del_timer_sync(&ti);
|
||||
- } else
|
||||
+ else
|
||||
result = kernel_recvmsg(sock, &msg, &iov, 1, size,
|
||||
msg.msg_flags);
|
||||
|
||||
- if (signal_pending(current)) {
|
||||
- siginfo_t info;
|
||||
- printk(KERN_WARNING "nbd (pid %d: %s) got signal %d\n",
|
||||
- task_pid_nr(current), current->comm,
|
||||
- dequeue_signal_lock(current, ¤t->blocked, &info));
|
||||
- result = -EINTR;
|
||||
- sock_shutdown(nbd, !send);
|
||||
- break;
|
||||
- }
|
||||
-
|
||||
if (result <= 0) {
|
||||
if (result == 0)
|
||||
result = -EPIPE; /* short read */
|
||||
@@ -210,6 +206,9 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
|
||||
sigprocmask(SIG_SETMASK, &oldset, NULL);
|
||||
tsk_restore_flags(current, pflags, PF_MEMALLOC);
|
||||
|
||||
+ if (!send && nbd->xmit_timeout)
|
||||
+ mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout);
|
||||
+
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -415,12 +414,26 @@ static int nbd_do_it(struct nbd_device *nbd)
|
||||
return ret;
|
||||
}
|
||||
|
||||
+ nbd->task_recv = current;
|
||||
+
|
||||
while ((req = nbd_read_stat(nbd)) != NULL)
|
||||
nbd_end_request(nbd, req);
|
||||
|
||||
+ nbd->task_recv = NULL;
|
||||
+
|
||||
+ if (signal_pending(current)) {
|
||||
+ siginfo_t info;
|
||||
+
|
||||
+ ret = dequeue_signal_lock(current, ¤t->blocked, &info);
|
||||
+ dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n",
|
||||
+ task_pid_nr(current), current->comm, ret);
|
||||
+ sock_shutdown(nbd, 1);
|
||||
+ ret = -ETIMEDOUT;
|
||||
+ }
|
||||
+
|
||||
device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
|
||||
nbd->pid = 0;
|
||||
- return 0;
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
static void nbd_clear_que(struct nbd_device *nbd)
|
||||
@@ -482,6 +495,9 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
|
||||
|
||||
nbd->active_req = req;
|
||||
|
||||
+ if (nbd->xmit_timeout && list_empty_careful(&nbd->queue_head))
|
||||
+ mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout);
|
||||
+
|
||||
if (nbd_send_req(nbd, req) != 0) {
|
||||
dev_err(disk_to_dev(nbd->disk), "Request send failed\n");
|
||||
req->errors++;
|
||||
@@ -508,6 +524,8 @@ static int nbd_thread(void *data)
|
||||
struct nbd_device *nbd = data;
|
||||
struct request *req;
|
||||
|
||||
+ nbd->task_send = current;
|
||||
+
|
||||
set_user_nice(current, MIN_NICE);
|
||||
while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
|
||||
/* wait for something to do */
|
||||
@@ -515,6 +533,18 @@ static int nbd_thread(void *data)
|
||||
kthread_should_stop() ||
|
||||
!list_empty(&nbd->waiting_queue));
|
||||
|
||||
+ if (signal_pending(current)) {
|
||||
+ siginfo_t info;
|
||||
+ int ret;
|
||||
+
|
||||
+ ret = dequeue_signal_lock(current, ¤t->blocked,
|
||||
+ &info);
|
||||
+ dev_warn(nbd_to_dev(nbd), "pid %d, %s, got signal %d\n",
|
||||
+ task_pid_nr(current), current->comm, ret);
|
||||
+ sock_shutdown(nbd, 1);
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
/* extract request */
|
||||
if (list_empty(&nbd->waiting_queue))
|
||||
continue;
|
||||
@@ -528,6 +558,9 @@ static int nbd_thread(void *data)
|
||||
/* handle request */
|
||||
nbd_handle_req(nbd, req);
|
||||
}
|
||||
+
|
||||
+ nbd->task_send = NULL;
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -648,6 +681,12 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
|
||||
|
||||
case NBD_SET_TIMEOUT:
|
||||
nbd->xmit_timeout = arg * HZ;
|
||||
+ if (arg)
|
||||
+ mod_timer(&nbd->timeout_timer,
|
||||
+ jiffies + nbd->xmit_timeout);
|
||||
+ else
|
||||
+ del_timer_sync(&nbd->timeout_timer);
|
||||
+
|
||||
return 0;
|
||||
|
||||
case NBD_SET_FLAGS:
|
||||
@@ -842,6 +881,9 @@ static int __init nbd_init(void)
|
||||
spin_lock_init(&nbd_dev[i].queue_lock);
|
||||
INIT_LIST_HEAD(&nbd_dev[i].queue_head);
|
||||
mutex_init(&nbd_dev[i].tx_lock);
|
||||
+ init_timer(&nbd_dev[i].timeout_timer);
|
||||
+ nbd_dev[i].timeout_timer.function = nbd_xmit_timeout;
|
||||
+ nbd_dev[i].timeout_timer.data = (unsigned long)&nbd_dev[i];
|
||||
init_waitqueue_head(&nbd_dev[i].active_wq);
|
||||
init_waitqueue_head(&nbd_dev[i].waiting_wq);
|
||||
nbd_dev[i].blksize = 1024;
|
|
@ -0,0 +1,82 @@
|
|||
From: Markus Pargmann <mpa@pengutronix.de>
|
||||
Date: Mon, 17 Aug 2015 08:20:05 +0200
|
||||
Subject: nbd: Remove variable 'pid'
|
||||
Origin: https://git.kernel.org/linus/6521d39a64b3f9c3acb0fd25a34cfaf9a40e548e
|
||||
Bug-Debian: https://bugs.debian.org/770479
|
||||
|
||||
This patch uses nbd->task_recv to determine the value of the previously
|
||||
used variable 'pid' for sysfs.
|
||||
|
||||
Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
|
||||
Signed-off-by: Jens Axboe <axboe@fb.com>
|
||||
[bwh: Backported to 4.2: adjust context]
|
||||
---
|
||||
drivers/block/nbd.c | 19 +++++++++----------
|
||||
1 file changed, 9 insertions(+), 10 deletions(-)
|
||||
|
||||
--- a/drivers/block/nbd.c
|
||||
+++ b/drivers/block/nbd.c
|
||||
@@ -56,7 +56,6 @@ struct nbd_device {
|
||||
struct gendisk *disk;
|
||||
int blksize;
|
||||
loff_t bytesize;
|
||||
- pid_t pid; /* pid of nbd-client, if attached */
|
||||
int xmit_timeout;
|
||||
int disconnect; /* a disconnect has been requested by user */
|
||||
|
||||
@@ -388,9 +387,9 @@ static ssize_t pid_show(struct device *d
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct gendisk *disk = dev_to_disk(dev);
|
||||
+ struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
|
||||
|
||||
- return sprintf(buf, "%ld\n",
|
||||
- (long) ((struct nbd_device *)disk->private_data)->pid);
|
||||
+ return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv));
|
||||
}
|
||||
|
||||
static struct device_attribute pid_attr = {
|
||||
@@ -406,19 +405,21 @@ static int nbd_do_it(struct nbd_device *
|
||||
BUG_ON(nbd->magic != NBD_MAGIC);
|
||||
|
||||
sk_set_memalloc(nbd->sock->sk);
|
||||
- nbd->pid = task_pid_nr(current);
|
||||
+
|
||||
+ nbd->task_recv = current;
|
||||
+
|
||||
ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
|
||||
if (ret) {
|
||||
dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
|
||||
- nbd->pid = 0;
|
||||
+ nbd->task_recv = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
- nbd->task_recv = current;
|
||||
-
|
||||
while ((req = nbd_read_stat(nbd)) != NULL)
|
||||
nbd_end_request(nbd, req);
|
||||
|
||||
+ device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
|
||||
+
|
||||
nbd->task_recv = NULL;
|
||||
|
||||
if (signal_pending(current)) {
|
||||
@@ -431,8 +432,6 @@ static int nbd_do_it(struct nbd_device *
|
||||
ret = -ETIMEDOUT;
|
||||
}
|
||||
|
||||
- device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
|
||||
- nbd->pid = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -705,7 +704,7 @@ static int __nbd_ioctl(struct block_devi
|
||||
struct socket *sock;
|
||||
int error;
|
||||
|
||||
- if (nbd->pid)
|
||||
+ if (nbd->task_recv)
|
||||
return -EBUSY;
|
||||
if (!nbd->sock)
|
||||
return -EINVAL;
|
|
@ -104,3 +104,6 @@ bugfix/all/Initialize-msg-shm-IPC-objects-before-doing-ipc_addi.patch
|
|||
features/all/ath10k-add-qca6164-support.patch
|
||||
debian/block-fix-abi-change-in-4.2.2.patch
|
||||
bugfix/x86/crypto-x86-camellia_aesni_avx-fix-cpu-feature-checks.patch
|
||||
bugfix/all/nbd-fix-timeout-detection.patch
|
||||
bugfix/all/nbd-remove-variable-pid.patch
|
||||
bugfix/all/nbd-add-locking-for-tasks.patch
|
||||
|
|
Loading…
Reference in New Issue