aio: Apply fixes from 4.19.38 (CVE-2019-10125)

This commit is contained in:
Ben Hutchings 2019-05-05 15:30:46 +01:00
parent ee337c6a41
commit 4f3fa1e296
16 changed files with 1489 additions and 0 deletions

15
debian/changelog vendored
View File

@ -1095,6 +1095,21 @@ linux (4.19.37-1) UNRELEASED; urgency=medium
* [x86] platform: Enable INTEL_ATOMISP2_PM as module
* drivers/firmware/google: Adjust configuration for 4.19
* MODSIGN: Make shash allocation failure fatal
* aio: Apply fixes from 4.19.38:
- aio: clear IOCB_HIPRI
- aio: use assigned completion handler
- aio: separate out ring reservation from req allocation
- aio: don't zero entire aio_kiocb aio_get_req()
- aio: use iocb_put() instead of open coding it
- aio: split out iocb copy from io_submit_one()
- aio: abstract out io_event filler helper
- aio: initialize kiocb private in case any filesystems expect it.
- aio: simplify - and fix - fget/fput for io_submit() (CVE-2019-10125)
- pin iocb through aio.
- aio: fold lookup_kiocb() into its sole caller
- aio: keep io_event in aio_kiocb
- aio: store event at final iocb_put()
- Fix aio_poll() races
[ YunQiang Su ]
* [mips*r6] Re-enable CONFIG_JUMP_LABEL, which has been fixed in upstream.

View File

@ -0,0 +1,52 @@
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 22 Nov 2018 16:44:07 +0100
Subject: [01/14] aio: clear IOCB_HIPRI
Origin: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/commit?id=9101cbe70ef64c7f35fb75552005a3a696cc288e
commit 154989e45fd8de9bfb52bbd6e5ea763e437e54c5 upstream.
No one is going to poll for aio (yet), so we must clear the HIPRI
flag, as we would otherwise send it down the poll queues, where no
one will be polling for completions.
Signed-off-by: Christoph Hellwig <hch@lst.de>
IOCB_HIPRI, not RWF_HIPRI.
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
fs/aio.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index 45d5ef8dd0a8..78aa249070b1 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1438,8 +1438,7 @@ static int aio_prep_rw(struct kiocb *req, struct iocb *iocb)
ret = ioprio_check_cap(iocb->aio_reqprio);
if (ret) {
pr_debug("aio ioprio check cap error: %d\n", ret);
- fput(req->ki_filp);
- return ret;
+ goto out_fput;
}
req->ki_ioprio = iocb->aio_reqprio;
@@ -1448,7 +1447,13 @@ static int aio_prep_rw(struct kiocb *req, struct iocb *iocb)
ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags);
if (unlikely(ret))
- fput(req->ki_filp);
+ goto out_fput;
+
+ req->ki_flags &= ~IOCB_HIPRI; /* no one is going to poll for this I/O */
+ return 0;
+
+out_fput:
+ fput(req->ki_filp);
return ret;
}

View File

@ -0,0 +1,32 @@
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 6 Nov 2018 14:27:13 -0700
Subject: [02/14] aio: use assigned completion handler
Origin: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/commit?id=b3373253f0bab538a7521537dfcb73e731b3d732
commit bc9bff61624ac33b7c95861abea1af24ee7a94fc upstream.
We know this is a read/write request, but in preparation for
having different kinds of those, ensure that we call the assigned
handler instead of assuming it's aio_complete_rq().
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
fs/aio.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/aio.c b/fs/aio.c
index 78aa249070b1..3df3fb0678e5 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1492,7 +1492,7 @@ static inline void aio_rw_done(struct kiocb *req, ssize_t ret)
ret = -EINTR;
/*FALLTHRU*/
default:
- aio_complete_rw(req, ret, 0);
+ req->ki_complete(req, ret, 0);
}
}

View File

@ -0,0 +1,101 @@
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 19 Nov 2018 15:57:42 -0700
Subject: [03/14] aio: separate out ring reservation from req allocation
Origin: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/commit?id=730198c889d85db78058cfb57c1b41c65f55c94e
commit 432c79978c33ecef91b1b04cea6936c20810da29 upstream.
This is in preparation for certain types of IO not needing a ring
reserveration.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
fs/aio.c | 30 +++++++++++++++++-------------
1 file changed, 17 insertions(+), 13 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index 3df3fb0678e5..b9e0df08277b 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -902,7 +902,7 @@ static void put_reqs_available(struct kioctx *ctx, unsigned nr)
local_irq_restore(flags);
}
-static bool get_reqs_available(struct kioctx *ctx)
+static bool __get_reqs_available(struct kioctx *ctx)
{
struct kioctx_cpu *kcpu;
bool ret = false;
@@ -994,6 +994,14 @@ static void user_refill_reqs_available(struct kioctx *ctx)
spin_unlock_irq(&ctx->completion_lock);
}
+static bool get_reqs_available(struct kioctx *ctx)
+{
+ if (__get_reqs_available(ctx))
+ return true;
+ user_refill_reqs_available(ctx);
+ return __get_reqs_available(ctx);
+}
+
/* aio_get_req
* Allocate a slot for an aio request.
* Returns NULL if no requests are free.
@@ -1002,24 +1010,15 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
{
struct aio_kiocb *req;
- if (!get_reqs_available(ctx)) {
- user_refill_reqs_available(ctx);
- if (!get_reqs_available(ctx))
- return NULL;
- }
-
req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO);
if (unlikely(!req))
- goto out_put;
+ return NULL;
percpu_ref_get(&ctx->reqs);
INIT_LIST_HEAD(&req->ki_list);
refcount_set(&req->ki_refcnt, 0);
req->ki_ctx = ctx;
return req;
-out_put:
- put_reqs_available(ctx, 1);
- return NULL;
}
static struct kioctx *lookup_ioctx(unsigned long ctx_id)
@@ -1813,9 +1812,13 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
return -EINVAL;
}
+ if (!get_reqs_available(ctx))
+ return -EAGAIN;
+
+ ret = -EAGAIN;
req = aio_get_req(ctx);
if (unlikely(!req))
- return -EAGAIN;
+ goto out_put_reqs_available;
if (iocb.aio_flags & IOCB_FLAG_RESFD) {
/*
@@ -1878,11 +1881,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
goto out_put_req;
return 0;
out_put_req:
- put_reqs_available(ctx, 1);
percpu_ref_put(&ctx->reqs);
if (req->ki_eventfd)
eventfd_ctx_put(req->ki_eventfd);
kmem_cache_free(kiocb_cachep, req);
+out_put_reqs_available:
+ put_reqs_available(ctx, 1);
return ret;
}

View File

@ -0,0 +1,52 @@
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 4 Dec 2018 09:44:49 -0700
Subject: [04/14] aio: don't zero entire aio_kiocb aio_get_req()
Origin: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/commit?id=ef529eead8cfc11c051b90d239e7137f7141ea94
commit 2bc4ca9bb600cbe36941da2b2a67189fc4302a04 upstream.
It's 192 bytes, fairly substantial. Most items don't need to be cleared,
especially not upfront. Clear the ones we do need to clear, and leave
the other ones for setup when the iocb is prepared and submitted.
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
fs/aio.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index b9e0df08277b..2547f17b4fef 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1010,14 +1010,15 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
{
struct aio_kiocb *req;
- req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO);
+ req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
if (unlikely(!req))
return NULL;
percpu_ref_get(&ctx->reqs);
+ req->ki_ctx = ctx;
INIT_LIST_HEAD(&req->ki_list);
refcount_set(&req->ki_refcnt, 0);
- req->ki_ctx = ctx;
+ req->ki_eventfd = NULL;
return req;
}
@@ -1738,6 +1739,10 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
if (unlikely(!req->file))
return -EBADF;
+ req->head = NULL;
+ req->woken = false;
+ req->cancelled = false;
+
apt.pt._qproc = aio_poll_queue_proc;
apt.pt._key = req->events;
apt.iocb = aiocb;

View File

@ -0,0 +1,34 @@
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 24 Nov 2018 21:33:09 -0700
Subject: [05/14] aio: use iocb_put() instead of open coding it
Origin: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/commit?id=4d677689742ab60d5be46e20708276368564427a
commit 71ebc6fef0f53459f37fb39e1466792232fa52ee upstream.
Replace the percpu_ref_put() + kmem_cache_free() with a call to
iocb_put() instead.
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
fs/aio.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index 2547f17b4fef..e2b63ab28ecc 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1886,10 +1886,9 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
goto out_put_req;
return 0;
out_put_req:
- percpu_ref_put(&ctx->reqs);
if (req->ki_eventfd)
eventfd_ctx_put(req->ki_eventfd);
- kmem_cache_free(kiocb_cachep, req);
+ iocb_put(req);
out_put_reqs_available:
put_reqs_available(ctx, 1);
return ret;

View File

@ -0,0 +1,194 @@
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 24 Nov 2018 14:46:14 -0700
Subject: [06/14] aio: split out iocb copy from io_submit_one()
Origin: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/commit?id=d384f8b855a573ea301fd7f5558cc64cb22107e6
commit 88a6f18b950e2e4dce57d31daa151105f4f3dcff upstream.
In preparation of handing in iocbs in a different fashion as well. Also
make it clear that the iocb being passed in isn't modified, by marking
it const throughout.
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
fs/aio.c | 68 +++++++++++++++++++++++++++++++-------------------------
1 file changed, 38 insertions(+), 30 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index e2b63ab28ecc..6e1da220f04b 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1416,7 +1416,7 @@ static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
aio_complete(iocb, res, res2);
}
-static int aio_prep_rw(struct kiocb *req, struct iocb *iocb)
+static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
{
int ret;
@@ -1457,7 +1457,7 @@ static int aio_prep_rw(struct kiocb *req, struct iocb *iocb)
return ret;
}
-static int aio_setup_rw(int rw, struct iocb *iocb, struct iovec **iovec,
+static int aio_setup_rw(int rw, const struct iocb *iocb, struct iovec **iovec,
bool vectored, bool compat, struct iov_iter *iter)
{
void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf;
@@ -1496,8 +1496,8 @@ static inline void aio_rw_done(struct kiocb *req, ssize_t ret)
}
}
-static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored,
- bool compat)
+static ssize_t aio_read(struct kiocb *req, const struct iocb *iocb,
+ bool vectored, bool compat)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct iov_iter iter;
@@ -1529,8 +1529,8 @@ static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored,
return ret;
}
-static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored,
- bool compat)
+static ssize_t aio_write(struct kiocb *req, const struct iocb *iocb,
+ bool vectored, bool compat)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct iov_iter iter;
@@ -1585,7 +1585,8 @@ static void aio_fsync_work(struct work_struct *work)
aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0);
}
-static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync)
+static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
+ bool datasync)
{
if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes ||
iocb->aio_rw_flags))
@@ -1719,7 +1720,7 @@ aio_poll_queue_proc(struct file *file, struct wait_queue_head *head,
add_wait_queue(head, &pt->iocb->poll.wait);
}
-static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
+static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
{
struct kioctx *ctx = aiocb->ki_ctx;
struct poll_iocb *req = &aiocb->poll;
@@ -1791,27 +1792,23 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
return 0;
}
-static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
- bool compat)
+static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
+ struct iocb __user *user_iocb, bool compat)
{
struct aio_kiocb *req;
- struct iocb iocb;
ssize_t ret;
- if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb))))
- return -EFAULT;
-
/* enforce forwards compatibility on users */
- if (unlikely(iocb.aio_reserved2)) {
+ if (unlikely(iocb->aio_reserved2)) {
pr_debug("EINVAL: reserve field set\n");
return -EINVAL;
}
/* prevent overflows */
if (unlikely(
- (iocb.aio_buf != (unsigned long)iocb.aio_buf) ||
- (iocb.aio_nbytes != (size_t)iocb.aio_nbytes) ||
- ((ssize_t)iocb.aio_nbytes < 0)
+ (iocb->aio_buf != (unsigned long)iocb->aio_buf) ||
+ (iocb->aio_nbytes != (size_t)iocb->aio_nbytes) ||
+ ((ssize_t)iocb->aio_nbytes < 0)
)) {
pr_debug("EINVAL: overflow check\n");
return -EINVAL;
@@ -1825,14 +1822,14 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
if (unlikely(!req))
goto out_put_reqs_available;
- if (iocb.aio_flags & IOCB_FLAG_RESFD) {
+ if (iocb->aio_flags & IOCB_FLAG_RESFD) {
/*
* If the IOCB_FLAG_RESFD flag of aio_flags is set, get an
* instance of the file* now. The file descriptor must be
* an eventfd() fd, and will be signaled for each completed
* event using the eventfd_signal() function.
*/
- req->ki_eventfd = eventfd_ctx_fdget((int) iocb.aio_resfd);
+ req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd);
if (IS_ERR(req->ki_eventfd)) {
ret = PTR_ERR(req->ki_eventfd);
req->ki_eventfd = NULL;
@@ -1847,32 +1844,32 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
}
req->ki_user_iocb = user_iocb;
- req->ki_user_data = iocb.aio_data;
+ req->ki_user_data = iocb->aio_data;
- switch (iocb.aio_lio_opcode) {
+ switch (iocb->aio_lio_opcode) {
case IOCB_CMD_PREAD:
- ret = aio_read(&req->rw, &iocb, false, compat);
+ ret = aio_read(&req->rw, iocb, false, compat);
break;
case IOCB_CMD_PWRITE:
- ret = aio_write(&req->rw, &iocb, false, compat);
+ ret = aio_write(&req->rw, iocb, false, compat);
break;
case IOCB_CMD_PREADV:
- ret = aio_read(&req->rw, &iocb, true, compat);
+ ret = aio_read(&req->rw, iocb, true, compat);
break;
case IOCB_CMD_PWRITEV:
- ret = aio_write(&req->rw, &iocb, true, compat);
+ ret = aio_write(&req->rw, iocb, true, compat);
break;
case IOCB_CMD_FSYNC:
- ret = aio_fsync(&req->fsync, &iocb, false);
+ ret = aio_fsync(&req->fsync, iocb, false);
break;
case IOCB_CMD_FDSYNC:
- ret = aio_fsync(&req->fsync, &iocb, true);
+ ret = aio_fsync(&req->fsync, iocb, true);
break;
case IOCB_CMD_POLL:
- ret = aio_poll(req, &iocb);
+ ret = aio_poll(req, iocb);
break;
default:
- pr_debug("invalid aio operation %d\n", iocb.aio_lio_opcode);
+ pr_debug("invalid aio operation %d\n", iocb->aio_lio_opcode);
ret = -EINVAL;
break;
}
@@ -1894,6 +1891,17 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
return ret;
}
+static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
+ bool compat)
+{
+ struct iocb iocb;
+
+ if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb))))
+ return -EFAULT;
+
+ return __io_submit_one(ctx, &iocb, user_iocb, compat);
+}
+
/* sys_io_submit:
* Queue the nr iocbs pointed to by iocbpp for processing. Returns
* the number of iocbs queued. May return -EINVAL if the aio_context

View File

@ -0,0 +1,47 @@
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 20 Nov 2018 20:06:23 -0700
Subject: [07/14] aio: abstract out io_event filler helper
Origin: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/commit?id=a812f7b68a3940e0369fd0fb24febec794a67623
commit 875736bb3f3ded168469f6a14df7a938416a99d5 upstream.
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
fs/aio.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index 6e1da220f04b..f6ce01ca6903 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1059,6 +1059,15 @@ static inline void iocb_put(struct aio_kiocb *iocb)
}
}
+static void aio_fill_event(struct io_event *ev, struct aio_kiocb *iocb,
+ long res, long res2)
+{
+ ev->obj = (u64)(unsigned long)iocb->ki_user_iocb;
+ ev->data = iocb->ki_user_data;
+ ev->res = res;
+ ev->res2 = res2;
+}
+
/* aio_complete
* Called when the io request on the given iocb is complete.
*/
@@ -1086,10 +1095,7 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
event = ev_page + pos % AIO_EVENTS_PER_PAGE;
- event->obj = (u64)(unsigned long)iocb->ki_user_iocb;
- event->data = iocb->ki_user_data;
- event->res = res;
- event->res2 = res2;
+ aio_fill_event(event, iocb, res, res2);
kunmap_atomic(ev_page);
flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);

View File

@ -0,0 +1,32 @@
From: Mike Marshall <hubcap@omnibond.com>
Date: Tue, 5 Feb 2019 14:13:35 -0500
Subject: [08/14] aio: initialize kiocb private in case any filesystems expect
it.
Origin: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/commit?id=2afa01cd9186974051b38b7d1f31bb2407e41e3a
commit ec51f8ee1e63498e9f521ec0e5a6d04622bb2c67 upstream.
A recent optimization had left private uninitialized.
Fixes: 2bc4ca9bb600 ("aio: don't zero entire aio_kiocb aio_get_req()")
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
fs/aio.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/fs/aio.c b/fs/aio.c
index f6ce01ca6903..d74fc9e112ac 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1430,6 +1430,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
if (unlikely(!req->ki_filp))
return -EBADF;
req->ki_complete = aio_complete_rw;
+ req->private = NULL;
req->ki_pos = iocb->aio_offset;
req->ki_flags = iocb_flags(req->ki_filp);
if (iocb->aio_flags & IOCB_FLAG_RESFD)

View File

@ -0,0 +1,312 @@
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 3 Mar 2019 14:23:33 -0800
Subject: [09/14] aio: simplify - and fix - fget/fput for io_submit()
Origin: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/commit?id=d6b2615f7d31d8e58b685d42dbafcc7dc1204bbd
Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-10125
commit 84c4e1f89fefe70554da0ab33be72c9be7994379 upstream.
Al Viro root-caused a race where the IOCB_CMD_POLL handling of
fget/fput() could cause us to access the file pointer after it had
already been freed:
"In more details - normally IOCB_CMD_POLL handling looks so:
1) io_submit(2) allocates aio_kiocb instance and passes it to
aio_poll()
2) aio_poll() resolves the descriptor to struct file by req->file =
fget(iocb->aio_fildes)
3) aio_poll() sets ->woken to false and raises ->ki_refcnt of that
aio_kiocb to 2 (bumps by 1, that is).
4) aio_poll() calls vfs_poll(). After sanity checks (basically,
"poll_wait() had been called and only once") it locks the queue.
That's what the extra reference to iocb had been for - we know we
can safely access it.
5) With queue locked, we check if ->woken has already been set to
true (by aio_poll_wake()) and, if it had been, we unlock the
queue, drop a reference to aio_kiocb and bugger off - at that
point it's a responsibility to aio_poll_wake() and the stuff
called/scheduled by it. That code will drop the reference to file
in req->file, along with the other reference to our aio_kiocb.
6) otherwise, we see whether we need to wait. If we do, we unlock the
queue, drop one reference to aio_kiocb and go away - eventual
wakeup (or cancel) will deal with the reference to file and with
the other reference to aio_kiocb
7) otherwise we remove ourselves from waitqueue (still under the
queue lock), so that wakeup won't get us. No async activity will
be happening, so we can safely drop req->file and iocb ourselves.
If wakeup happens while we are in vfs_poll(), we are fine - aio_kiocb
won't get freed under us, so we can do all the checks and locking
safely. And we don't touch ->file if we detect that case.
However, vfs_poll() most certainly *does* touch the file it had been
given. So wakeup coming while we are still in ->poll() might end up
doing fput() on that file. That case is not too rare, and usually we
are saved by the still present reference from descriptor table - that
fput() is not the final one.
But if another thread closes that descriptor right after our fget()
and wakeup does happen before ->poll() returns, we are in trouble -
final fput() done while we are in the middle of a method:
Al also wrote a patch to take an extra reference to the file descriptor
to fix this, but I instead suggested we just streamline the whole file
pointer handling by submit_io() so that the generic aio submission code
simply keeps the file pointer around until the aio has completed.
Fixes: bfe4037e722e ("aio: implement IOCB_CMD_POLL")
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Reported-by: syzbot+503d4cc169fcec1cb18c@syzkaller.appspotmail.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
fs/aio.c | 72 +++++++++++++++++++---------------------------
include/linux/fs.h | 8 +++++-
2 files changed, 36 insertions(+), 44 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index d74fc9e112ac..46229e663b57 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -161,9 +161,13 @@ struct kioctx {
unsigned id;
};
+/*
+ * First field must be the file pointer in all the
+ * iocb unions! See also 'struct kiocb' in <linux/fs.h>
+ */
struct fsync_iocb {
- struct work_struct work;
struct file *file;
+ struct work_struct work;
bool datasync;
};
@@ -177,8 +181,15 @@ struct poll_iocb {
struct work_struct work;
};
+/*
+ * NOTE! Each of the iocb union members has the file pointer
+ * as the first entry in their struct definition. So you can
+ * access the file pointer through any of the sub-structs,
+ * or directly as just 'ki_filp' in this struct.
+ */
struct aio_kiocb {
union {
+ struct file *ki_filp;
struct kiocb rw;
struct fsync_iocb fsync;
struct poll_iocb poll;
@@ -1054,6 +1065,8 @@ static inline void iocb_put(struct aio_kiocb *iocb)
{
if (refcount_read(&iocb->ki_refcnt) == 0 ||
refcount_dec_and_test(&iocb->ki_refcnt)) {
+ if (iocb->ki_filp)
+ fput(iocb->ki_filp);
percpu_ref_put(&iocb->ki_ctx->reqs);
kmem_cache_free(kiocb_cachep, iocb);
}
@@ -1418,7 +1431,6 @@ static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
file_end_write(kiocb->ki_filp);
}
- fput(kiocb->ki_filp);
aio_complete(iocb, res, res2);
}
@@ -1426,9 +1438,6 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
{
int ret;
- req->ki_filp = fget(iocb->aio_fildes);
- if (unlikely(!req->ki_filp))
- return -EBADF;
req->ki_complete = aio_complete_rw;
req->private = NULL;
req->ki_pos = iocb->aio_offset;
@@ -1445,7 +1454,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
ret = ioprio_check_cap(iocb->aio_reqprio);
if (ret) {
pr_debug("aio ioprio check cap error: %d\n", ret);
- goto out_fput;
+ return ret;
}
req->ki_ioprio = iocb->aio_reqprio;
@@ -1454,14 +1463,10 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags);
if (unlikely(ret))
- goto out_fput;
+ return ret;
req->ki_flags &= ~IOCB_HIPRI; /* no one is going to poll for this I/O */
return 0;
-
-out_fput:
- fput(req->ki_filp);
- return ret;
}
static int aio_setup_rw(int rw, const struct iocb *iocb, struct iovec **iovec,
@@ -1515,24 +1520,19 @@ static ssize_t aio_read(struct kiocb *req, const struct iocb *iocb,
if (ret)
return ret;
file = req->ki_filp;
-
- ret = -EBADF;
if (unlikely(!(file->f_mode & FMODE_READ)))
- goto out_fput;
+ return -EBADF;
ret = -EINVAL;
if (unlikely(!file->f_op->read_iter))
- goto out_fput;
+ return -EINVAL;
ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter);
if (ret)
- goto out_fput;
+ return ret;
ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter));
if (!ret)
aio_rw_done(req, call_read_iter(file, req, &iter));
kfree(iovec);
-out_fput:
- if (unlikely(ret))
- fput(file);
return ret;
}
@@ -1549,16 +1549,14 @@ static ssize_t aio_write(struct kiocb *req, const struct iocb *iocb,
return ret;
file = req->ki_filp;
- ret = -EBADF;
if (unlikely(!(file->f_mode & FMODE_WRITE)))
- goto out_fput;
- ret = -EINVAL;
+ return -EBADF;
if (unlikely(!file->f_op->write_iter))
- goto out_fput;
+ return -EINVAL;
ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter);
if (ret)
- goto out_fput;
+ return ret;
ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter));
if (!ret) {
/*
@@ -1576,9 +1574,6 @@ static ssize_t aio_write(struct kiocb *req, const struct iocb *iocb,
aio_rw_done(req, call_write_iter(file, req, &iter));
}
kfree(iovec);
-out_fput:
- if (unlikely(ret))
- fput(file);
return ret;
}
@@ -1588,7 +1583,6 @@ static void aio_fsync_work(struct work_struct *work)
int ret;
ret = vfs_fsync(req->file, req->datasync);
- fput(req->file);
aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0);
}
@@ -1599,13 +1593,8 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
iocb->aio_rw_flags))
return -EINVAL;
- req->file = fget(iocb->aio_fildes);
- if (unlikely(!req->file))
- return -EBADF;
- if (unlikely(!req->file->f_op->fsync)) {
- fput(req->file);
+ if (unlikely(!req->file->f_op->fsync))
return -EINVAL;
- }
req->datasync = datasync;
INIT_WORK(&req->work, aio_fsync_work);
@@ -1615,10 +1604,7 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
{
- struct file *file = iocb->poll.file;
-
aio_complete(iocb, mangle_poll(mask), 0);
- fput(file);
}
static void aio_poll_complete_work(struct work_struct *work)
@@ -1743,9 +1729,6 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
INIT_WORK(&req->work, aio_poll_complete_work);
req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
- req->file = fget(iocb->aio_fildes);
- if (unlikely(!req->file))
- return -EBADF;
req->head = NULL;
req->woken = false;
@@ -1788,10 +1771,8 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
spin_unlock_irq(&ctx->ctx_lock);
out:
- if (unlikely(apt.error)) {
- fput(req->file);
+ if (unlikely(apt.error))
return apt.error;
- }
if (mask)
aio_poll_complete(aiocb, mask);
@@ -1829,6 +1810,11 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
if (unlikely(!req))
goto out_put_reqs_available;
+ req->ki_filp = fget(iocb->aio_fildes);
+ ret = -EBADF;
+ if (unlikely(!req->ki_filp))
+ goto out_put_req;
+
if (iocb->aio_flags & IOCB_FLAG_RESFD) {
/*
* If the IOCB_FLAG_RESFD flag of aio_flags is set, get an
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7b6084854bfe..111c94c4baa1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -304,13 +304,19 @@ enum rw_hint {
struct kiocb {
struct file *ki_filp;
+
+ /* The 'ki_filp' pointer is shared in a union for aio */
+ randomized_struct_fields_start
+
loff_t ki_pos;
void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
void *private;
int ki_flags;
u16 ki_hint;
u16 ki_ioprio; /* See linux/ioprio.h */
-} __randomize_layout;
+
+ randomized_struct_fields_end
+};
static inline bool is_sync_kiocb(struct kiocb *kiocb)
{

View File

@ -0,0 +1,112 @@
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 6 Mar 2019 20:22:54 -0500
Subject: [10/14] pin iocb through aio.
Origin: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/commit?id=c7f2525abfecf8a57a1417837b6a809df79b299e
commit b53119f13a04879c3bf502828d99d13726639ead upstream.
aio_poll() is not the only case that needs file pinned; worse, while
aio_read()/aio_write() can live without pinning iocb itself, the
proof is rather brittle and can easily break on later changes.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
fs/aio.c | 37 +++++++++++++++++++++----------------
1 file changed, 21 insertions(+), 16 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index 46229e663b57..10e5a8f52dce 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1016,6 +1016,9 @@ static bool get_reqs_available(struct kioctx *ctx)
/* aio_get_req
* Allocate a slot for an aio request.
* Returns NULL if no requests are free.
+ *
+ * The refcount is initialized to 2 - one for the async op completion,
+ * one for the synchronous code that does this.
*/
static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
{
@@ -1028,7 +1031,7 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
percpu_ref_get(&ctx->reqs);
req->ki_ctx = ctx;
INIT_LIST_HEAD(&req->ki_list);
- refcount_set(&req->ki_refcnt, 0);
+ refcount_set(&req->ki_refcnt, 2);
req->ki_eventfd = NULL;
return req;
}
@@ -1061,15 +1064,18 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
return ret;
}
+static inline void iocb_destroy(struct aio_kiocb *iocb)
+{
+ if (iocb->ki_filp)
+ fput(iocb->ki_filp);
+ percpu_ref_put(&iocb->ki_ctx->reqs);
+ kmem_cache_free(kiocb_cachep, iocb);
+}
+
static inline void iocb_put(struct aio_kiocb *iocb)
{
- if (refcount_read(&iocb->ki_refcnt) == 0 ||
- refcount_dec_and_test(&iocb->ki_refcnt)) {
- if (iocb->ki_filp)
- fput(iocb->ki_filp);
- percpu_ref_put(&iocb->ki_ctx->reqs);
- kmem_cache_free(kiocb_cachep, iocb);
- }
+ if (refcount_dec_and_test(&iocb->ki_refcnt))
+ iocb_destroy(iocb);
}
static void aio_fill_event(struct io_event *ev, struct aio_kiocb *iocb,
@@ -1743,9 +1749,6 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
INIT_LIST_HEAD(&req->wait.entry);
init_waitqueue_func_entry(&req->wait, aio_poll_wake);
- /* one for removal from waitqueue, one for this function */
- refcount_set(&aiocb->ki_refcnt, 2);
-
mask = vfs_poll(req->file, &apt.pt) & req->events;
if (unlikely(!req->head)) {
/* we did not manage to set up a waitqueue, done */
@@ -1776,7 +1779,6 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
if (mask)
aio_poll_complete(aiocb, mask);
- iocb_put(aiocb);
return 0;
}
@@ -1867,18 +1869,21 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
break;
}
+ /* Done with the synchronous reference */
+ iocb_put(req);
+
/*
* If ret is 0, we'd either done aio_complete() ourselves or have
* arranged for that to be done asynchronously. Anything non-zero
* means that we need to destroy req ourselves.
*/
- if (ret)
- goto out_put_req;
- return 0;
+ if (!ret)
+ return 0;
+
out_put_req:
if (req->ki_eventfd)
eventfd_ctx_put(req->ki_eventfd);
- iocb_put(req);
+ iocb_destroy(req);
out_put_reqs_available:
put_reqs_available(ctx, 1);
return ret;

View File

@ -0,0 +1,61 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 11 Mar 2019 19:00:36 -0400
Subject: [11/14] aio: fold lookup_kiocb() into its sole caller
Origin: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/commit?id=592ea630b081a6c97ec56499b0e12f68fd2da2d8
commit 833f4154ed560232120bc475935ee1d6a20e159f upstream.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
fs/aio.c | 29 +++++++----------------------
1 file changed, 7 insertions(+), 22 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index 10e5a8f52dce..cda193f6de76 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1992,24 +1992,6 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
}
#endif
-/* lookup_kiocb
- * Finds a given iocb for cancellation.
- */
-static struct aio_kiocb *
-lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb)
-{
- struct aio_kiocb *kiocb;
-
- assert_spin_locked(&ctx->ctx_lock);
-
- /* TODO: use a hash or array, this sucks. */
- list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
- if (kiocb->ki_user_iocb == iocb)
- return kiocb;
- }
- return NULL;
-}
-
/* sys_io_cancel:
* Attempts to cancel an iocb previously passed to io_submit. If
* the operation is successfully cancelled, the resulting event is
@@ -2038,10 +2020,13 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
return -EINVAL;
spin_lock_irq(&ctx->ctx_lock);
- kiocb = lookup_kiocb(ctx, iocb);
- if (kiocb) {
- ret = kiocb->ki_cancel(&kiocb->rw);
- list_del_init(&kiocb->ki_list);
+ /* TODO: use a hash or array, this sucks. */
+ list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
+ if (kiocb->ki_user_iocb == iocb) {
+ ret = kiocb->ki_cancel(&kiocb->rw);
+ list_del_init(&kiocb->ki_list);
+ break;
+ }
}
spin_unlock_irq(&ctx->ctx_lock);

View File

@ -0,0 +1,105 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 7 Mar 2019 19:43:45 -0500
Subject: [12/14] aio: keep io_event in aio_kiocb
Origin: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/commit?id=c20202c51d2b6703a4e539235f892f34daabd791
commit a9339b7855094ba11a97e8822ae038135e879e79 upstream.
We want to separate forming the resulting io_event from putting it
into the ring buffer.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
fs/aio.c | 31 +++++++++++++------------------
1 file changed, 13 insertions(+), 18 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index cda193f6de76..ec30f1bdac0c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -198,8 +198,7 @@ struct aio_kiocb {
struct kioctx *ki_ctx;
kiocb_cancel_fn *ki_cancel;
- struct iocb __user *ki_user_iocb; /* user's aiocb */
- __u64 ki_user_data; /* user's data for completion */
+ struct io_event ki_res;
struct list_head ki_list; /* the aio core uses this
* for cancellation */
@@ -1078,15 +1077,6 @@ static inline void iocb_put(struct aio_kiocb *iocb)
iocb_destroy(iocb);
}
-static void aio_fill_event(struct io_event *ev, struct aio_kiocb *iocb,
- long res, long res2)
-{
- ev->obj = (u64)(unsigned long)iocb->ki_user_iocb;
- ev->data = iocb->ki_user_data;
- ev->res = res;
- ev->res2 = res2;
-}
-
/* aio_complete
* Called when the io request on the given iocb is complete.
*/
@@ -1098,6 +1088,8 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
unsigned tail, pos, head;
unsigned long flags;
+ iocb->ki_res.res = res;
+ iocb->ki_res.res2 = res2;
/*
* Add a completion event to the ring buffer. Must be done holding
* ctx->completion_lock to prevent other code from messing with the tail
@@ -1114,14 +1106,14 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
event = ev_page + pos % AIO_EVENTS_PER_PAGE;
- aio_fill_event(event, iocb, res, res2);
+ *event = iocb->ki_res;
kunmap_atomic(ev_page);
flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
- pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n",
- ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data,
- res, res2);
+ pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx\n", ctx, tail, iocb,
+ (void __user *)(unsigned long)iocb->ki_res.obj,
+ iocb->ki_res.data, iocb->ki_res.res, iocb->ki_res.res2);
/* after flagging the request as done, we
* must never even look at it again
@@ -1838,8 +1830,10 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
goto out_put_req;
}
- req->ki_user_iocb = user_iocb;
- req->ki_user_data = iocb->aio_data;
+ req->ki_res.obj = (u64)(unsigned long)user_iocb;
+ req->ki_res.data = iocb->aio_data;
+ req->ki_res.res = 0;
+ req->ki_res.res2 = 0;
switch (iocb->aio_lio_opcode) {
case IOCB_CMD_PREAD:
@@ -2009,6 +2003,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
struct aio_kiocb *kiocb;
int ret = -EINVAL;
u32 key;
+ u64 obj = (u64)(unsigned long)iocb;
if (unlikely(get_user(key, &iocb->aio_key)))
return -EFAULT;
@@ -2022,7 +2017,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
spin_lock_irq(&ctx->ctx_lock);
/* TODO: use a hash or array, this sucks. */
list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
- if (kiocb->ki_user_iocb == iocb) {
+ if (kiocb->ki_res.obj == obj) {
ret = kiocb->ki_cancel(&kiocb->rw);
list_del_init(&kiocb->ki_list);
break;

View File

@ -0,0 +1,101 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 7 Mar 2019 19:49:55 -0500
Subject: [13/14] aio: store event at final iocb_put()
Origin: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/commit?id=aab66dfb757aa5b211ec6b0c322b42f4ef5ab34f
commit 2bb874c0d873d13bd9b9b9c6d7b7c4edab18c8b4 upstream.
Instead of having aio_complete() set ->ki_res.{res,res2}, do that
explicitly in its callers, drop the reference (as aio_complete()
used to do) and delay the rest until the final iocb_put().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
fs/aio.c | 33 +++++++++++++++++----------------
1 file changed, 17 insertions(+), 16 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index ec30f1bdac0c..556ee620038f 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1071,16 +1071,10 @@ static inline void iocb_destroy(struct aio_kiocb *iocb)
kmem_cache_free(kiocb_cachep, iocb);
}
-static inline void iocb_put(struct aio_kiocb *iocb)
-{
- if (refcount_dec_and_test(&iocb->ki_refcnt))
- iocb_destroy(iocb);
-}
-
/* aio_complete
* Called when the io request on the given iocb is complete.
*/
-static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
+static void aio_complete(struct aio_kiocb *iocb)
{
struct kioctx *ctx = iocb->ki_ctx;
struct aio_ring *ring;
@@ -1088,8 +1082,6 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
unsigned tail, pos, head;
unsigned long flags;
- iocb->ki_res.res = res;
- iocb->ki_res.res2 = res2;
/*
* Add a completion event to the ring buffer. Must be done holding
* ctx->completion_lock to prevent other code from messing with the tail
@@ -1155,7 +1147,14 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
if (waitqueue_active(&ctx->wait))
wake_up(&ctx->wait);
- iocb_put(iocb);
+}
+
+static inline void iocb_put(struct aio_kiocb *iocb)
+{
+ if (refcount_dec_and_test(&iocb->ki_refcnt)) {
+ aio_complete(iocb);
+ iocb_destroy(iocb);
+ }
}
/* aio_read_events_ring
@@ -1429,7 +1428,9 @@ static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
file_end_write(kiocb->ki_filp);
}
- aio_complete(iocb, res, res2);
+ iocb->ki_res.res = res;
+ iocb->ki_res.res2 = res2;
+ iocb_put(iocb);
}
static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
@@ -1577,11 +1578,10 @@ static ssize_t aio_write(struct kiocb *req, const struct iocb *iocb,
static void aio_fsync_work(struct work_struct *work)
{
- struct fsync_iocb *req = container_of(work, struct fsync_iocb, work);
- int ret;
+ struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, fsync.work);
- ret = vfs_fsync(req->file, req->datasync);
- aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0);
+ iocb->ki_res.res = vfs_fsync(iocb->fsync.file, iocb->fsync.datasync);
+ iocb_put(iocb);
}
static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
@@ -1602,7 +1602,8 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
{
- aio_complete(iocb, mangle_poll(mask), 0);
+ iocb->ki_res.res = mangle_poll(mask);
+ iocb_put(iocb);
}
static void aio_poll_complete_work(struct work_struct *work)

View File

@ -0,0 +1,225 @@
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 7 Mar 2019 21:45:41 -0500
Subject: [14/14] Fix aio_poll() races
Origin: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/commit?id=e9e47779aaa7212ccb75f8d8d4d16ab188efb313
commit af5c72b1fc7a00aa484e90b0c4e0eeb582545634 upstream.
aio_poll() has to cope with several unpleasant problems:
* requests that might stay around indefinitely need to
be made visible for io_cancel(2); that must not be done to
a request already completed, though.
* in cases when ->poll() has placed us on a waitqueue,
wakeup might have happened (and request completed) before ->poll()
returns.
* worse, in some early wakeup cases request might end
up re-added into the queue later - we can't treat "woken up and
currently not in the queue" as "it's not going to stick around
indefinitely"
* ... moreover, ->poll() might have decided not to
put it on any queues to start with, and that needs to be distinguished
from the previous case
* ->poll() might have tried to put us on more than one queue.
Only the first will succeed for aio poll, so we might end up missing
wakeups. OTOH, we might very well notice that only after the
wakeup hits and request gets completed (all before ->poll() gets
around to the second poll_wait()). In that case it's too late to
decide that we have an error.
req->woken was an attempt to deal with that. Unfortunately, it was
broken. What we need to keep track of is not that wakeup has happened -
the thing might come back after that. It's that async reference is
already gone and won't come back, so we can't (and needn't) put the
request on the list of cancellables.
The easiest case is "request hadn't been put on any waitqueues"; we
can tell by seeing NULL apt.head, and in that case there won't be
anything async. We should either complete the request ourselves
(if vfs_poll() reports anything of interest) or return an error.
In all other cases we get exclusion with wakeups by grabbing the
queue lock.
If request is currently on queue and we have something interesting
from vfs_poll(), we can steal it and complete the request ourselves.
If it's on queue and vfs_poll() has not reported anything interesting,
we either put it on the cancellable list, or, if we know that it
hadn't been put on all queues ->poll() wanted it on, we steal it and
return an error.
If it's _not_ on queue, it's either been already dealt with (in which
case we do nothing), or there's aio_poll_complete_work() about to be
executed. In that case we either put it on the cancellable list,
or, if we know it hadn't been put on all queues ->poll() wanted it on,
simulate what cancel would've done.
It's a lot more convoluted than I'd like it to be. Single-consumer APIs
suck, and unfortunately aio is not an exception...
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
fs/aio.c | 90 +++++++++++++++++++++++++-------------------------------
1 file changed, 40 insertions(+), 50 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index 556ee620038f..911e23087dfb 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -175,7 +175,7 @@ struct poll_iocb {
struct file *file;
struct wait_queue_head *head;
__poll_t events;
- bool woken;
+ bool done;
bool cancelled;
struct wait_queue_entry wait;
struct work_struct work;
@@ -1600,12 +1600,6 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
return 0;
}
-static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
-{
- iocb->ki_res.res = mangle_poll(mask);
- iocb_put(iocb);
-}
-
static void aio_poll_complete_work(struct work_struct *work)
{
struct poll_iocb *req = container_of(work, struct poll_iocb, work);
@@ -1631,9 +1625,11 @@ static void aio_poll_complete_work(struct work_struct *work)
return;
}
list_del_init(&iocb->ki_list);
+ iocb->ki_res.res = mangle_poll(mask);
+ req->done = true;
spin_unlock_irq(&ctx->ctx_lock);
- aio_poll_complete(iocb, mask);
+ iocb_put(iocb);
}
/* assumes we are called with irqs disabled */
@@ -1661,31 +1657,27 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
__poll_t mask = key_to_poll(key);
unsigned long flags;
- req->woken = true;
-
/* for instances that support it check for an event match first: */
- if (mask) {
- if (!(mask & req->events))
- return 0;
+ if (mask && !(mask & req->events))
+ return 0;
+ list_del_init(&req->wait.entry);
+
+ if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
/*
* Try to complete the iocb inline if we can. Use
* irqsave/irqrestore because not all filesystems (e.g. fuse)
* call this function with IRQs disabled and because IRQs
* have to be disabled before ctx_lock is obtained.
*/
- if (spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
- list_del(&iocb->ki_list);
- spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags);
-
- list_del_init(&req->wait.entry);
- aio_poll_complete(iocb, mask);
- return 1;
- }
+ list_del(&iocb->ki_list);
+ iocb->ki_res.res = mangle_poll(mask);
+ req->done = true;
+ spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags);
+ iocb_put(iocb);
+ } else {
+ schedule_work(&req->work);
}
-
- list_del_init(&req->wait.entry);
- schedule_work(&req->work);
return 1;
}
@@ -1717,6 +1709,7 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
struct kioctx *ctx = aiocb->ki_ctx;
struct poll_iocb *req = &aiocb->poll;
struct aio_poll_table apt;
+ bool cancel = false;
__poll_t mask;
/* reject any unknown events outside the normal event mask. */
@@ -1730,7 +1723,7 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
req->head = NULL;
- req->woken = false;
+ req->done = false;
req->cancelled = false;
apt.pt._qproc = aio_poll_queue_proc;
@@ -1743,36 +1736,33 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
init_waitqueue_func_entry(&req->wait, aio_poll_wake);
mask = vfs_poll(req->file, &apt.pt) & req->events;
- if (unlikely(!req->head)) {
- /* we did not manage to set up a waitqueue, done */
- goto out;
- }
-
spin_lock_irq(&ctx->ctx_lock);
- spin_lock(&req->head->lock);
- if (req->woken) {
- /* wake_up context handles the rest */
- mask = 0;
+ if (likely(req->head)) {
+ spin_lock(&req->head->lock);
+ if (unlikely(list_empty(&req->wait.entry))) {
+ if (apt.error)
+ cancel = true;
+ apt.error = 0;
+ mask = 0;
+ }
+ if (mask || apt.error) {
+ list_del_init(&req->wait.entry);
+ } else if (cancel) {
+ WRITE_ONCE(req->cancelled, true);
+ } else if (!req->done) { /* actually waiting for an event */
+ list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
+ aiocb->ki_cancel = aio_poll_cancel;
+ }
+ spin_unlock(&req->head->lock);
+ }
+ if (mask) { /* no async, we'd stolen it */
+ aiocb->ki_res.res = mangle_poll(mask);
apt.error = 0;
- } else if (mask || apt.error) {
- /* if we get an error or a mask we are done */
- WARN_ON_ONCE(list_empty(&req->wait.entry));
- list_del_init(&req->wait.entry);
- } else {
- /* actually waiting for an event */
- list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
- aiocb->ki_cancel = aio_poll_cancel;
}
- spin_unlock(&req->head->lock);
spin_unlock_irq(&ctx->ctx_lock);
-
-out:
- if (unlikely(apt.error))
- return apt.error;
-
if (mask)
- aio_poll_complete(aiocb, mask);
- return 0;
+ iocb_put(aiocb);
+ return apt.error;
}
static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,

14
debian/patches/series vendored
View File

@ -162,6 +162,20 @@ debian/i386-686-pae-pci-set-pci-nobios-by-default.patch
bugfix/all/xen-pciback-Don-t-disable-PCI_COMMAND-on-PCI-device-.patch
debian/ntfs-mark-it-as-broken.patch
bugfix/all/vfio-type1-Limit-DMA-mappings-per-container.patch
bugfix/all/0001-aio-clear-IOCB_HIPRI.patch
bugfix/all/0002-aio-use-assigned-completion-handler.patch
bugfix/all/0003-aio-separate-out-ring-reservation-from-req-allocatio.patch
bugfix/all/0004-aio-don-t-zero-entire-aio_kiocb-aio_get_req.patch
bugfix/all/0005-aio-use-iocb_put-instead-of-open-coding-it.patch
bugfix/all/0006-aio-split-out-iocb-copy-from-io_submit_one.patch
bugfix/all/0007-aio-abstract-out-io_event-filler-helper.patch
bugfix/all/0008-aio-initialize-kiocb-private-in-case-any-filesystems.patch
bugfix/all/0009-aio-simplify-and-fix-fget-fput-for-io_submit.patch
bugfix/all/0010-pin-iocb-through-aio.patch
bugfix/all/0011-aio-fold-lookup_kiocb-into-its-sole-caller.patch
bugfix/all/0012-aio-keep-io_event-in-aio_kiocb.patch
bugfix/all/0013-aio-store-event-at-final-iocb_put.patch
bugfix/all/0014-Fix-aio_poll-races.patch
# Fix exported symbol versions
bugfix/all/module-disable-matching-missing-version-crc.patch