Update to 3.2.24

Drop many bugfix patches that passed review in this version.
Resolve conflicts between rt featureset and the leap second fixes.
Add patches to undo various ABI changes.

svn path=/dists/sid/linux/; revision=19293
This commit is contained in:
Ben Hutchings 2012-07-26 01:04:02 +00:00
parent 14fd890c1d
commit aa7975939f
32 changed files with 301 additions and 1646 deletions

31
debian/changelog vendored
View File

@ -1,5 +1,34 @@
linux (3.2.23-2) UNRELEASED; urgency=low
linux (3.2.24-1) UNRELEASED; urgency=low
* New upstream stable update:
http://www.kernel.org/pub/linux/kernel/v3.x/ChangeLog-3.2.24
- sched/nohz: Rewrite and fix load-avg computation -- again
(Closes: #674153)
- libsas: fix taskfile corruption in sas_ata_qc_fill_rtf
- md/raid1: fix use-after-free bug in RAID1 data-check code.
- PCI: EHCI: fix crash during suspend on ASUS computers
- cpufreq / ACPI: Fix not loading acpi-cpufreq driver (regression in 3.2.2)
- block: fix infinite loop in __getblk_slow (regression in 3.2.19)
- PM / Hibernate: Hibernate/thaw fixes/improvements
- tcm_fc: Fix crash seen with aborts and large reads
- fifo: Do not restart open() if it already found a partner
- cifs: on CONFIG_HIGHMEM machines, limit the rsize/wsize to the kmap space
- UBIFS: fix a bug in empty space fix-up
- ore: Fix NFS crash by supporting any unaligned RAID IO
- ore: Remove support of partial IO request (NFS crash)
- pnfs-obj: don't leak objio_state if ore_write/read fails
- pnfs-obj: Fix __r4w_get_page when offset is beyond i_size
- dm raid1: fix crash with mirror recovery and discard
- dm raid1: set discard_zeroes_data_unsupported
- time: Fix bugs in leap-second handling (Closes: #679882)
+ ntp: Fix leap-second hrtimer livelock
+ timekeeping: Fix leapsecond triggered load spike issue
- bnx2x: fix checksum validation
- bnx2x: fix panic when TX ring is full
- eCryptfs: Gracefully refuse miscdev file ops on inherited/passed files
- ACPI / PM: Make acpi_pm_device_sleep_state() follow the specification
[ Ben Hutchings ]
* linux-image: Include package version in utsname version string
('uname -v' output) (Closes: #638878)
* linux-source: Drop support for version.$DISTRIBUTION

View File

@ -1,33 +0,0 @@
From: Avi Kivity <avi@redhat.com>
Date: Sun, 22 Apr 2012 17:02:11 +0300
Subject: [PATCH] KVM: Fix buffer overflow in kvm_set_irq()
commit f2ebd422f71cda9c791f76f85d2ca102ae34a1ed upstream.
kvm_set_irq() has an internal buffer of three irq routing entries, allowing
connecting a GSI to three IRQ chips or on MSI. However setup_routing_entry()
does not properly enforce this, allowing three irqchip routes followed by
an MSI route to overflow the buffer.
Fix by ensuring that an MSI entry is added to an empty list.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
virt/kvm/irq_comm.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index a6a0365..5afb431 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -332,6 +332,7 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
*/
hlist_for_each_entry(ei, n, &rt->map[ue->gsi], link)
if (ei->type == KVM_IRQ_ROUTING_MSI ||
+ ue->type == KVM_IRQ_ROUTING_MSI ||
ue->u.irqchip.irqchip == ei->irqchip.irqchip)
return r;
--
1.7.10

View File

@ -1,124 +0,0 @@
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 8 Feb 2012 13:39:15 -0500
Subject: [2/2] NFSv4: Further reduce the footprint of the idmapper
commit 685f50f9188ac1e8244d0340a9d6ea36b6136cec upstream.
Don't allocate the legacy idmapper tables until we actually need
them.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
[bwh: Backported to 3.2: adjust context in nfs_idmap_delete()]
---
fs/nfs/idmap.c | 42 ++++++++++++++++++++++++++++++++++++------
1 file changed, 36 insertions(+), 6 deletions(-)
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -323,7 +323,7 @@
struct idmap_hashtable {
__u8 h_type;
- struct idmap_hashent h_entries[IDMAP_HASH_SZ];
+ struct idmap_hashent *h_entries;
};
struct idmap {
@@ -378,20 +378,39 @@
return 0;
}
+static void
+idmap_alloc_hashtable(struct idmap_hashtable *h)
+{
+ if (h->h_entries != NULL)
+ return;
+ h->h_entries = kcalloc(IDMAP_HASH_SZ,
+ sizeof(*h->h_entries),
+ GFP_KERNEL);
+}
+
+static void
+idmap_free_hashtable(struct idmap_hashtable *h)
+{
+ int i;
+
+ if (h->h_entries == NULL)
+ return;
+ for (i = 0; i < IDMAP_HASH_SZ; i++)
+ kfree(h->h_entries[i].ih_name);
+ kfree(h->h_entries);
+}
+
void
nfs_idmap_delete(struct nfs_client *clp)
{
struct idmap *idmap = clp->cl_idmap;
- int i;
if (!idmap)
return;
rpc_unlink(idmap->idmap_dentry);
clp->cl_idmap = NULL;
- for (i = 0; i < ARRAY_SIZE(idmap->idmap_user_hash.h_entries); i++)
- kfree(idmap->idmap_user_hash.h_entries[i].ih_name);
- for (i = 0; i < ARRAY_SIZE(idmap->idmap_group_hash.h_entries); i++)
- kfree(idmap->idmap_group_hash.h_entries[i].ih_name);
+ idmap_free_hashtable(&idmap->idmap_user_hash);
+ idmap_free_hashtable(&idmap->idmap_group_hash);
kfree(idmap);
}
@@ -401,6 +420,8 @@
static inline struct idmap_hashent *
idmap_name_hash(struct idmap_hashtable* h, const char *name, size_t len)
{
+ if (h->h_entries == NULL)
+ return NULL;
return &h->h_entries[fnvhash32(name, len) % IDMAP_HASH_SZ];
}
@@ -409,6 +430,8 @@
{
struct idmap_hashent *he = idmap_name_hash(h, name, len);
+ if (he == NULL)
+ return NULL;
if (he->ih_namelen != len || memcmp(he->ih_name, name, len) != 0)
return NULL;
if (time_after(jiffies, he->ih_expires))
@@ -419,6 +442,8 @@
static inline struct idmap_hashent *
idmap_id_hash(struct idmap_hashtable* h, __u32 id)
{
+ if (h->h_entries == NULL)
+ return NULL;
return &h->h_entries[fnvhash32(&id, sizeof(id)) % IDMAP_HASH_SZ];
}
@@ -426,6 +451,9 @@
idmap_lookup_id(struct idmap_hashtable *h, __u32 id)
{
struct idmap_hashent *he = idmap_id_hash(h, id);
+
+ if (he == NULL)
+ return NULL;
if (he->ih_id != id || he->ih_namelen == 0)
return NULL;
if (time_after(jiffies, he->ih_expires))
@@ -441,12 +469,14 @@
static inline struct idmap_hashent *
idmap_alloc_name(struct idmap_hashtable *h, char *name, size_t len)
{
+ idmap_alloc_hashtable(h);
return idmap_name_hash(h, name, len);
}
static inline struct idmap_hashent *
idmap_alloc_id(struct idmap_hashtable *h, __u32 id)
{
+ idmap_alloc_hashtable(h);
return idmap_id_hash(h, id);
}

View File

@ -1,28 +0,0 @@
From: William Dauchy <wdauchy@gmail.com>
Date: Wed, 14 Mar 2012 12:32:04 +0100
Subject: [PATCH] NFSv4: Rate limit the state manager for lock reclaim warning
messages
commit 96dcadc2fdd111dca90d559f189a30c65394451a upstream.
Adding rate limit on `Lock reclaim failed` messages since it could fill
up system logs
Signed-off-by: William Dauchy <wdauchy@gmail.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
[bwh: Backported to 3.2: add the 'NFS:' prefix at the same time]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1261,8 +1261,9 @@ restart:
spin_lock(&state->state_lock);
list_for_each_entry(lock, &state->lock_states, ls_locks) {
if (!(lock->ls_flags & NFS_LOCK_INITIALIZED))
- printk("%s: Lock reclaim failed!\n",
- __func__);
+ pr_warn_ratelimited("NFS: "
+ "%s: Lock reclaim "
+ "failed!\n", __func__);
}
spin_unlock(&state->state_lock);
nfs4_put_open_state(state);

View File

@ -1,62 +0,0 @@
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 7 Feb 2012 14:59:05 -0500
Subject: [1/2] NFSv4: Reduce the footprint of the idmapper
commit d073e9b541e1ac3f52d72c3a153855d9a9ee3278 upstream.
Instead of pre-allocating the storage for all the strings, we can
significantly reduce the size of that table by doing the allocation
when we do the downcall.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
[bwh: Backported to 3.2: adjust context in nfs_idmap_delete()]
---
fs/nfs/idmap.c | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -318,7 +318,7 @@
unsigned long ih_expires;
__u32 ih_id;
size_t ih_namelen;
- char ih_name[IDMAP_NAMESZ];
+ const char *ih_name;
};
struct idmap_hashtable {
@@ -382,11 +382,16 @@
nfs_idmap_delete(struct nfs_client *clp)
{
struct idmap *idmap = clp->cl_idmap;
+ int i;
if (!idmap)
return;
rpc_unlink(idmap->idmap_dentry);
clp->cl_idmap = NULL;
+ for (i = 0; i < ARRAY_SIZE(idmap->idmap_user_hash.h_entries); i++)
+ kfree(idmap->idmap_user_hash.h_entries[i].ih_name);
+ for (i = 0; i < ARRAY_SIZE(idmap->idmap_group_hash.h_entries); i++)
+ kfree(idmap->idmap_group_hash.h_entries[i].ih_name);
kfree(idmap);
}
@@ -449,9 +454,14 @@
idmap_update_entry(struct idmap_hashent *he, const char *name,
size_t namelen, __u32 id)
{
+ char *str = kmalloc(namelen + 1, GFP_KERNEL);
+ if (str == NULL)
+ return;
+ kfree(he->ih_name);
he->ih_id = id;
- memcpy(he->ih_name, name, namelen);
- he->ih_name[namelen] = '\0';
+ memcpy(str, name, namelen);
+ str[namelen] = '\0';
+ he->ih_name = str;
he->ih_namelen = namelen;
he->ih_expires = jiffies + nfs_idmap_cache_timeout;
}

View File

@ -1,34 +0,0 @@
From: Cloud Ren <cjren@qca.qualcomm.com>
Date: Tue, 3 Jul 2012 16:51:48 +0000
Subject: atl1c: fix issue of transmit queue 0 timed out
commit b94e52f62683dc0b00c6d1b58b80929a078c0fd5 upstream.
some people report atl1c could cause system hang with following
kernel trace info:
---------------------------------------
WARNING: at.../net/sched/sch_generic.c:258 dev_watchdog+0x1db/0x1d0()
...
NETDEV WATCHDOG: eth0 (atl1c): transmit queue 0 timed out
...
---------------------------------------
This is caused by netif_stop_queue calling when cable Link is down.
So remove netif_stop_queue, because link_watch will take it over.
Signed-off-by: xiong <xiong@qca.qualcomm.com>
Signed-off-by: Cloud Ren <cjren@qca.qualcomm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 1 -
1 file changed, 1 deletion(-)
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -267,7 +267,6 @@ static void atl1c_check_link_status(stru
dev_warn(&pdev->dev, "stop mac failed\n");
atl1c_set_aspm(hw, false);
netif_carrier_off(netdev);
- netif_stop_queue(netdev);
atl1c_phy_reset(hw);
atl1c_phy_init(&adapter->hw);
} else {

View File

@ -1,42 +0,0 @@
From: Eldad Zack <eldad@fogrefinery.com>
Date: Sun, 22 Apr 2012 00:48:04 +0200
Subject: [PATCH] brcmsmac: "INTERMEDIATE but not AMPDU" only when tracing
commit 6ead629b27269c553c9092c47cd8f5ab0309ee3b upstream.
I keep getting the following messages on the log buffer:
[ 2167.097507] ieee80211 phy0: brcms_c_dotxstatus: INTERMEDIATE but not AMPDU
[ 2281.331305] ieee80211 phy0: brcms_c_dotxstatus: INTERMEDIATE but not AMPDU
[ 2281.332539] ieee80211 phy0: brcms_c_dotxstatus: INTERMEDIATE but not AMPDU
[ 2329.876605] ieee80211 phy0: brcms_c_dotxstatus: INTERMEDIATE but not AMPDU
[ 2329.877354] ieee80211 phy0: brcms_c_dotxstatus: INTERMEDIATE but not AMPDU
[ 2462.280756] ieee80211 phy0: brcms_c_dotxstatus: INTERMEDIATE but not AMPDU
[ 2615.651689] ieee80211 phy0: brcms_c_dotxstatus: INTERMEDIATE but not AMPDU
From the code comment I understand that this something that can -
and does, quite frequently - happen.
Signed-off-by: Eldad Zack <eldad@fogrefinery.com>
Acked-by: Franky Lin<frankyl@broadcom.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
drivers/net/wireless/brcm80211/brcmsmac/main.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/net/wireless/brcm80211/brcmsmac/main.c b/drivers/net/wireless/brcm80211/brcmsmac/main.c
index 7083db7..b4d9279 100644
--- a/drivers/net/wireless/brcm80211/brcmsmac/main.c
+++ b/drivers/net/wireless/brcm80211/brcmsmac/main.c
@@ -847,8 +847,7 @@ brcms_c_dotxstatus(struct brcms_c_info *wlc, struct tx_status *txs)
*/
if (!(txs->status & TX_STATUS_AMPDU)
&& (txs->status & TX_STATUS_INTERMEDIATE)) {
- wiphy_err(wlc->wiphy, "%s: INTERMEDIATE but not AMPDU\n",
- __func__);
+ BCMMSG(wlc->wiphy, "INTERMEDIATE but not AMPDU\n");
return false;
}
--
1.7.10

View File

@ -1,39 +0,0 @@
From: Jason Baron <jbaron@redhat.com>
Date: Wed, 25 Apr 2012 16:01:47 -0700
Subject: epoll: clear the tfile_check_list on -ELOOP
commit 13d518074a952d33d47c428419693f63389547e9 upstream.
An epoll_ctl(,EPOLL_CTL_ADD,,) operation can return '-ELOOP' to prevent
circular epoll dependencies from being created. However, in that case we
do not properly clear the 'tfile_check_list'. Thus, add a call to
clear_tfile_check_list() for the -ELOOP case.
Signed-off-by: Jason Baron <jbaron@redhat.com>
Reported-by: Yurij M. Plotnikov <Yurij.Plotnikov@oktetlabs.ru>
Cc: Nelson Elhage <nelhage@nelhage.com>
Cc: Davide Libenzi <davidel@xmailserver.org>
Tested-by: Alexandra N. Kossovsky <Alexandra.Kossovsky@oktetlabs.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
fs/eventpoll.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 739b098..c0b3c70 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1663,8 +1663,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
if (op == EPOLL_CTL_ADD) {
if (is_file_epoll(tfile)) {
error = -ELOOP;
- if (ep_loop_check(ep, tfile) != 0)
+ if (ep_loop_check(ep, tfile) != 0) {
+ clear_tfile_check_list();
goto error_tgt_fput;
+ }
} else
list_add(&tfile->f_tfile_llink, &tfile_check_list);
}

View File

@ -1,31 +0,0 @@
From: Ben Hutchings <ben@decadent.org.uk>
Date: Wed, 4 Jan 2012 21:22:51 -0500
Subject: [PATCH] ext4: Report max_batch_time option correctly
commit 1d526fc91bea04ee35b7599bf8b82f86c0aaf46c upstream.
Currently the value reported for max_batch_time is really the
value of min_batch_time.
Reported-by: Russell Coker <russell@coker.com.au>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
fs/ext4/super.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 35377d5..36570b7 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1096,7 +1096,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
}
if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) {
seq_printf(seq, ",max_batch_time=%u",
- (unsigned) sbi->s_min_batch_time);
+ (unsigned) sbi->s_max_batch_time);
}
/*
--
1.7.10

View File

@ -1,110 +0,0 @@
From: Anders Kaseorg <andersk@MIT.EDU>
Date: Sun, 15 Jul 2012 17:14:25 -0400
Subject: fifo: Do not restart open() if it already found a partner
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
commit 05d290d66be6ef77a0b962ebecf01911bd984a78 upstream.
If a parent and child process open the two ends of a fifo, and the
child immediately exits, the parent may receive a SIGCHLD before its
open() returns. In that case, we need to make sure that open() will
return successfully after the SIGCHLD handler returns, instead of
throwing EINTR or being restarted. Otherwise, the restarted open()
would incorrectly wait for a second partner on the other end.
The following test demonstrates the EINTR that was wrongly thrown from
the parents open(). Change .sa_flags = 0 to .sa_flags = SA_RESTART
to see a deadlock instead, in which the restarted open() waits for a
second reader that will never come. (On my systems, this happens
pretty reliably within about 5 to 500 iterations. Others report that
it manages to loop ~forever sometimes; YMMV.)
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <fcntl.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#define CHECK(x) do if ((x) == -1) {perror(#x); abort();} while(0)
void handler(int signum) {}
int main()
{
struct sigaction act = {.sa_handler = handler, .sa_flags = 0};
CHECK(sigaction(SIGCHLD, &act, NULL));
CHECK(mknod("fifo", S_IFIFO | S_IRWXU, 0));
for (;;) {
int fd;
pid_t pid;
putc('.', stderr);
CHECK(pid = fork());
if (pid == 0) {
CHECK(fd = open("fifo", O_RDONLY));
_exit(0);
}
CHECK(fd = open("fifo", O_WRONLY));
CHECK(close(fd));
CHECK(waitpid(pid, NULL, 0));
}
}
This is what I suspect was causing the Git test suite to fail in
t9010-svn-fe.sh:
http://bugs.debian.org/678852
Signed-off-by: Anders Kaseorg <andersk@mit.edu>
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
fs/fifo.c | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/fs/fifo.c b/fs/fifo.c
index b1a524d..cf6f434 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -14,7 +14,7 @@
#include <linux/sched.h>
#include <linux/pipe_fs_i.h>
-static void wait_for_partner(struct inode* inode, unsigned int *cnt)
+static int wait_for_partner(struct inode* inode, unsigned int *cnt)
{
int cur = *cnt;
@@ -23,6 +23,7 @@ static void wait_for_partner(struct inode* inode, unsigned int *cnt)
if (signal_pending(current))
break;
}
+ return cur == *cnt ? -ERESTARTSYS : 0;
}
static void wake_up_partner(struct inode* inode)
@@ -67,8 +68,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
* seen a writer */
filp->f_version = pipe->w_counter;
} else {
- wait_for_partner(inode, &pipe->w_counter);
- if(signal_pending(current))
+ if (wait_for_partner(inode, &pipe->w_counter))
goto err_rd;
}
}
@@ -90,8 +90,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
wake_up_partner(inode);
if (!pipe->readers) {
- wait_for_partner(inode, &pipe->r_counter);
- if (signal_pending(current))
+ if (wait_for_partner(inode, &pipe->r_counter))
goto err_wr;
}
break;

View File

@ -1,451 +0,0 @@
From: David Gibson <david@gibson.dropbear.id.au>
Date: Wed, 21 Mar 2012 16:34:12 -0700
Subject: [PATCH] hugepages: fix use after free bug in "quota" handling
commit 90481622d75715bfcb68501280a917dbfe516029 upstream.
hugetlbfs_{get,put}_quota() are badly named. They don't interact with the
general quota handling code, and they don't much resemble its behaviour.
Rather than being about maintaining limits on on-disk block usage by
particular users, they are instead about maintaining limits on in-memory
page usage (including anonymous MAP_PRIVATE copied-on-write pages)
associated with a particular hugetlbfs filesystem instance.
Worse, they work by having callbacks to the hugetlbfs filesystem code from
the low-level page handling code, in particular from free_huge_page().
This is a layering violation of itself, but more importantly, if the
kernel does a get_user_pages() on hugepages (which can happen from KVM
amongst others), then the free_huge_page() can be delayed until after the
associated inode has already been freed. If an unmount occurs at the
wrong time, even the hugetlbfs superblock where the "quota" limits are
stored may have been freed.
Andrew Barry proposed a patch to fix this by having hugepages, instead of
storing a pointer to their address_space and reaching the superblock from
there, had the hugepages store pointers directly to the superblock,
bumping the reference count as appropriate to avoid it being freed.
Andrew Morton rejected that version, however, on the grounds that it made
the existing layering violation worse.
This is a reworked version of Andrew's patch, which removes the extra, and
some of the existing, layering violation. It works by introducing the
concept of a hugepage "subpool" at the lower hugepage mm layer - that is a
finite logical pool of hugepages to allocate from. hugetlbfs now creates
a subpool for each filesystem instance with a page limit set, and a
pointer to the subpool gets added to each allocated hugepage, instead of
the address_space pointer used now. The subpool has its own lifetime and
is only freed once all pages in it _and_ all other references to it (i.e.
superblocks) are gone.
subpools are optional - a NULL subpool pointer is taken by the code to
mean that no subpool limits are in effect.
Previous discussion of this bug found in: "Fix refcounting in hugetlbfs
quota handling.". See: https://lkml.org/lkml/2011/8/11/28 or
http://marc.info/?l=linux-mm&m=126928970510627&w=1
v2: Fixed a bug spotted by Hillf Danton, and removed the extra parameter to
alloc_huge_page() - since it already takes the vma, it is not necessary.
Signed-off-by: Andrew Barry <abarry@cray.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Cc: Hugh Dickins <hughd@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
[bwh: Backported to 3.2: adjust context to apply after commit
c50ac050811d6485616a193eb0f37bfbd191cc89 'hugetlb: fix resv_map leak in
error path' which should be in 3.2.20]
---
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -626,9 +626,15 @@ static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
spin_lock(&sbinfo->stat_lock);
/* If no limits set, just report 0 for max/free/used
* blocks, like simple_statfs() */
- if (sbinfo->max_blocks >= 0) {
- buf->f_blocks = sbinfo->max_blocks;
- buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
+ if (sbinfo->spool) {
+ long free_pages;
+
+ spin_lock(&sbinfo->spool->lock);
+ buf->f_blocks = sbinfo->spool->max_hpages;
+ free_pages = sbinfo->spool->max_hpages
+ - sbinfo->spool->used_hpages;
+ buf->f_bavail = buf->f_bfree = free_pages;
+ spin_unlock(&sbinfo->spool->lock);
buf->f_files = sbinfo->max_inodes;
buf->f_ffree = sbinfo->free_inodes;
}
@@ -644,6 +650,10 @@ static void hugetlbfs_put_super(struct super_block *sb)
if (sbi) {
sb->s_fs_info = NULL;
+
+ if (sbi->spool)
+ hugepage_put_subpool(sbi->spool);
+
kfree(sbi);
}
}
@@ -874,10 +884,14 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_fs_info = sbinfo;
sbinfo->hstate = config.hstate;
spin_lock_init(&sbinfo->stat_lock);
- sbinfo->max_blocks = config.nr_blocks;
- sbinfo->free_blocks = config.nr_blocks;
sbinfo->max_inodes = config.nr_inodes;
sbinfo->free_inodes = config.nr_inodes;
+ sbinfo->spool = NULL;
+ if (config.nr_blocks != -1) {
+ sbinfo->spool = hugepage_new_subpool(config.nr_blocks);
+ if (!sbinfo->spool)
+ goto out_free;
+ }
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_blocksize = huge_page_size(config.hstate);
sb->s_blocksize_bits = huge_page_shift(config.hstate);
@@ -896,38 +910,12 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_root = root;
return 0;
out_free:
+ if (sbinfo->spool)
+ kfree(sbinfo->spool);
kfree(sbinfo);
return -ENOMEM;
}
-int hugetlb_get_quota(struct address_space *mapping, long delta)
-{
- int ret = 0;
- struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);
-
- if (sbinfo->free_blocks > -1) {
- spin_lock(&sbinfo->stat_lock);
- if (sbinfo->free_blocks - delta >= 0)
- sbinfo->free_blocks -= delta;
- else
- ret = -ENOMEM;
- spin_unlock(&sbinfo->stat_lock);
- }
-
- return ret;
-}
-
-void hugetlb_put_quota(struct address_space *mapping, long delta)
-{
- struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);
-
- if (sbinfo->free_blocks > -1) {
- spin_lock(&sbinfo->stat_lock);
- sbinfo->free_blocks += delta;
- spin_unlock(&sbinfo->stat_lock);
- }
-}
-
static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 7adc492..cf01817 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -14,6 +14,15 @@ struct user_struct;
#include <linux/shm.h>
#include <asm/tlbflush.h>
+struct hugepage_subpool {
+ spinlock_t lock;
+ long count;
+ long max_hpages, used_hpages;
+};
+
+struct hugepage_subpool *hugepage_new_subpool(long nr_blocks);
+void hugepage_put_subpool(struct hugepage_subpool *spool);
+
int PageHuge(struct page *page);
void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
@@ -129,12 +138,11 @@ enum {
};
struct hugetlbfs_sb_info {
- long max_blocks; /* blocks allowed */
- long free_blocks; /* blocks free */
long max_inodes; /* inodes allowed */
long free_inodes; /* inodes free */
spinlock_t stat_lock;
struct hstate *hstate;
+ struct hugepage_subpool *spool;
};
@@ -146,8 +154,6 @@ extern const struct file_operations hugetlbfs_file_operations;
extern const struct vm_operations_struct hugetlb_vm_ops;
struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct,
struct user_struct **user, int creat_flags);
-int hugetlb_get_quota(struct address_space *mapping, long delta);
-void hugetlb_put_quota(struct address_space *mapping, long delta);
static inline int is_file_hugepages(struct file *file)
{
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index b1c3148..afa057a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -53,6 +53,84 @@ static unsigned long __initdata default_hstate_size;
*/
static DEFINE_SPINLOCK(hugetlb_lock);
+static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
+{
+ bool free = (spool->count == 0) && (spool->used_hpages == 0);
+
+ spin_unlock(&spool->lock);
+
+ /* If no pages are used, and no other handles to the subpool
+ * remain, free the subpool the subpool remain */
+ if (free)
+ kfree(spool);
+}
+
+struct hugepage_subpool *hugepage_new_subpool(long nr_blocks)
+{
+ struct hugepage_subpool *spool;
+
+ spool = kmalloc(sizeof(*spool), GFP_KERNEL);
+ if (!spool)
+ return NULL;
+
+ spin_lock_init(&spool->lock);
+ spool->count = 1;
+ spool->max_hpages = nr_blocks;
+ spool->used_hpages = 0;
+
+ return spool;
+}
+
+void hugepage_put_subpool(struct hugepage_subpool *spool)
+{
+ spin_lock(&spool->lock);
+ BUG_ON(!spool->count);
+ spool->count--;
+ unlock_or_release_subpool(spool);
+}
+
+static int hugepage_subpool_get_pages(struct hugepage_subpool *spool,
+ long delta)
+{
+ int ret = 0;
+
+ if (!spool)
+ return 0;
+
+ spin_lock(&spool->lock);
+ if ((spool->used_hpages + delta) <= spool->max_hpages) {
+ spool->used_hpages += delta;
+ } else {
+ ret = -ENOMEM;
+ }
+ spin_unlock(&spool->lock);
+
+ return ret;
+}
+
+static void hugepage_subpool_put_pages(struct hugepage_subpool *spool,
+ long delta)
+{
+ if (!spool)
+ return;
+
+ spin_lock(&spool->lock);
+ spool->used_hpages -= delta;
+ /* If hugetlbfs_put_super couldn't free spool due to
+ * an outstanding quota reference, free it now. */
+ unlock_or_release_subpool(spool);
+}
+
+static inline struct hugepage_subpool *subpool_inode(struct inode *inode)
+{
+ return HUGETLBFS_SB(inode->i_sb)->spool;
+}
+
+static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma)
+{
+ return subpool_inode(vma->vm_file->f_dentry->d_inode);
+}
+
/*
* Region tracking -- allows tracking of reservations and instantiated pages
* across the pages in a mapping.
@@ -540,9 +618,9 @@ static void free_huge_page(struct page *page)
*/
struct hstate *h = page_hstate(page);
int nid = page_to_nid(page);
- struct address_space *mapping;
+ struct hugepage_subpool *spool =
+ (struct hugepage_subpool *)page_private(page);
- mapping = (struct address_space *) page_private(page);
set_page_private(page, 0);
page->mapping = NULL;
BUG_ON(page_count(page));
@@ -558,8 +636,7 @@ static void free_huge_page(struct page *page)
enqueue_huge_page(h, page);
}
spin_unlock(&hugetlb_lock);
- if (mapping)
- hugetlb_put_quota(mapping, 1);
+ hugepage_subpool_put_pages(spool, 1);
}
static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
@@ -977,11 +1054,12 @@ static void return_unused_surplus_pages(struct hstate *h,
/*
* Determine if the huge page at addr within the vma has an associated
* reservation. Where it does not we will need to logically increase
- * reservation and actually increase quota before an allocation can occur.
- * Where any new reservation would be required the reservation change is
- * prepared, but not committed. Once the page has been quota'd allocated
- * an instantiated the change should be committed via vma_commit_reservation.
- * No action is required on failure.
+ * reservation and actually increase subpool usage before an allocation
+ * can occur. Where any new reservation would be required the
+ * reservation change is prepared, but not committed. Once the page
+ * has been allocated from the subpool and instantiated the change should
+ * be committed via vma_commit_reservation. No action is required on
+ * failure.
*/
static long vma_needs_reservation(struct hstate *h,
struct vm_area_struct *vma, unsigned long addr)
@@ -1030,24 +1108,24 @@ static void vma_commit_reservation(struct hstate *h,
static struct page *alloc_huge_page(struct vm_area_struct *vma,
unsigned long addr, int avoid_reserve)
{
+ struct hugepage_subpool *spool = subpool_vma(vma);
struct hstate *h = hstate_vma(vma);
struct page *page;
- struct address_space *mapping = vma->vm_file->f_mapping;
- struct inode *inode = mapping->host;
long chg;
/*
- * Processes that did not create the mapping will have no reserves and
- * will not have accounted against quota. Check that the quota can be
- * made before satisfying the allocation
- * MAP_NORESERVE mappings may also need pages and quota allocated
- * if no reserve mapping overlaps.
+ * Processes that did not create the mapping will have no
+ * reserves and will not have accounted against subpool
+ * limit. Check that the subpool limit can be made before
+ * satisfying the allocation MAP_NORESERVE mappings may also
+ * need pages and subpool limit allocated allocated if no reserve
+ * mapping overlaps.
*/
chg = vma_needs_reservation(h, vma, addr);
if (chg < 0)
return ERR_PTR(-VM_FAULT_OOM);
if (chg)
- if (hugetlb_get_quota(inode->i_mapping, chg))
+ if (hugepage_subpool_get_pages(spool, chg))
return ERR_PTR(-VM_FAULT_SIGBUS);
spin_lock(&hugetlb_lock);
@@ -1057,12 +1135,12 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
if (!page) {
page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
if (!page) {
- hugetlb_put_quota(inode->i_mapping, chg);
+ hugepage_subpool_put_pages(spool, chg);
return ERR_PTR(-VM_FAULT_SIGBUS);
}
}
- set_page_private(page, (unsigned long) mapping);
+ set_page_private(page, (unsigned long)spool);
vma_commit_reservation(h, vma, addr);
@@ -2083,6 +2161,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
{
struct hstate *h = hstate_vma(vma);
struct resv_map *reservations = vma_resv_map(vma);
+ struct hugepage_subpool *spool = subpool_vma(vma);
unsigned long reserve;
unsigned long start;
unsigned long end;
@@ -2098,7 +2177,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
if (reserve) {
hugetlb_acct_memory(h, -reserve);
- hugetlb_put_quota(vma->vm_file->f_mapping, reserve);
+ hugepage_subpool_put_pages(spool, reserve);
}
}
}
@@ -2331,7 +2410,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
address = address & huge_page_mask(h);
pgoff = ((address - vma->vm_start) >> PAGE_SHIFT)
+ (vma->vm_pgoff >> PAGE_SHIFT);
- mapping = (struct address_space *)page_private(page);
+ mapping = vma->vm_file->f_dentry->d_inode->i_mapping;
/*
* Take the mapping lock for the duration of the table walk. As
@@ -2884,11 +2963,12 @@ int hugetlb_reserve_pages(struct inode *inode,
{
long ret, chg;
struct hstate *h = hstate_inode(inode);
+ struct hugepage_subpool *spool = subpool_inode(inode);
/*
* Only apply hugepage reservation if asked. At fault time, an
* attempt will be made for VM_NORESERVE to allocate a page
- * and filesystem quota without using reserves
+ * without using reserves
*/
if (vm_flags & VM_NORESERVE)
return 0;
@@ -2915,19 +2995,19 @@ int hugetlb_reserve_pages(struct inode *inode,
goto out_err;
}
- /* There must be enough filesystem quota for the mapping */
- if (hugetlb_get_quota(inode->i_mapping, chg)) {
+ /* There must be enough pages in the subpool for the mapping */
+ if (hugepage_subpool_get_pages(spool, chg)) {
ret = -ENOSPC;
goto out_err;
}
/*
* Check enough hugepages are available for the reservation.
- * Hand back the quota if there are not
+ * Hand the pages back to the subpool if there are not
*/
ret = hugetlb_acct_memory(h, chg);
if (ret < 0) {
- hugetlb_put_quota(inode->i_mapping, chg);
+ hugepage_subpool_put_pages(spool, chg);
goto out_err;
}
@@ -2949,12 +3029,13 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
{
struct hstate *h = hstate_inode(inode);
long chg = region_truncate(&inode->i_mapping->private_list, offset);
+ struct hugepage_subpool *spool = subpool_inode(inode);
spin_lock(&inode->i_lock);
inode->i_blocks -= (blocks_per_huge_page(h) * freed);
spin_unlock(&inode->i_lock);
- hugetlb_put_quota(inode->i_mapping, (chg - freed));
+ hugepage_subpool_put_pages(spool, (chg - freed));
hugetlb_acct_memory(h, -(chg - freed));
}

View File

@ -1,45 +0,0 @@
From: Lucas De Marchi <lucas.demarchi@profusion.mobi>
Date: Tue, 17 Jan 2012 14:50:51 -0200
Subject: [PATCH] kbuild: do not check for ancient modutils tools
commit 620c231c7a7f48745094727bb612f6321cfc8844 upstream.
scripts/depmod.sh checks for the output of '-V' expecting that it has
module-init-tools in it. It's a hack to prevent users from using
modutils instead of module-init-tools, that only works with 2.4.x
kernels. This however prints an annoying warning for kmod tool, that is
currently replacing module-init-tools.
Rather than putting another check for kmod's version, just remove it
since users of 2.4.x kernel are unlikely to upgrade to 3.x, and if they
do, let depmod fail in that case because they should know what they are
doing.
Signed-off-by: Lucas De Marchi <lucas.demarchi@profusion.mobi>
Acked-by: WANG Cong <amwang@redhat.com>
Acked-By: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
scripts/depmod.sh | 6 ------
1 files changed, 0 insertions(+), 6 deletions(-)
diff --git a/scripts/depmod.sh b/scripts/depmod.sh
index a272356..2ae4817 100755
--- a/scripts/depmod.sh
+++ b/scripts/depmod.sh
@@ -9,12 +9,6 @@ fi
DEPMOD=$1
KERNELRELEASE=$2
-if ! "$DEPMOD" -V 2>/dev/null | grep -q module-init-tools; then
- echo "Warning: you may need to install module-init-tools" >&2
- echo "See http://www.codemonkey.org.uk/docs/post-halloween-2.6.txt" >&2
- sleep 1
-fi
-
if ! test -r System.map -a -x "$DEPMOD"; then
exit 0
fi
--
1.7.9.1

View File

@ -1,61 +0,0 @@
From: Jason Wang <jasowang@redhat.com>
Date: Wed, 2 May 2012 11:41:30 +0800
Subject: [1/5] macvtap: zerocopy: fix offset calculation when building skb
commit 3afc9621f15701c557e60f61eba9242bac2771dd upstream.
This patch fixes the offset calculation when building skb:
- offset1 were used as skb data offset not vector offset
- reset offset to zero only when we advance to next vector
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
drivers/net/macvtap.c | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 0427c65..bd4a70d 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -505,10 +505,11 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
if (copy > size) {
++from;
--count;
- }
+ offset = 0;
+ } else
+ offset += size;
copy -= size;
offset1 += size;
- offset = 0;
}
if (len == offset1)
@@ -519,13 +520,13 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
int num_pages;
unsigned long base;
- len = from->iov_len - offset1;
+ len = from->iov_len - offset;
if (!len) {
- offset1 = 0;
+ offset = 0;
++from;
continue;
}
- base = (unsigned long)from->iov_base + offset1;
+ base = (unsigned long)from->iov_base + offset;
size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
num_pages = get_user_pages_fast(base, size, 0, &page[i]);
if ((num_pages != size) ||
@@ -546,7 +547,7 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
len -= size;
i++;
}
- offset1 = 0;
+ offset = 0;
++from;
}
return 0;

View File

@ -1,41 +0,0 @@
From: Jason Wang <jasowang@redhat.com>
Date: Wed, 2 May 2012 11:41:44 +0800
Subject: [2/5] macvtap: zerocopy: fix truesize underestimation
commit 4ef67ebedffa44ed9939b34708ac2fee06d2f65f upstream.
As the skb fragment were pinned/built from user pages, we should
account the page instead of length for truesize.
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
drivers/net/macvtap.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index bd4a70d..7cb2684 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -519,6 +519,7 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
struct page *page[MAX_SKB_FRAGS];
int num_pages;
unsigned long base;
+ unsigned long truesize;
len = from->iov_len - offset;
if (!len) {
@@ -533,10 +534,11 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
(num_pages > MAX_SKB_FRAGS - skb_shinfo(skb)->nr_frags))
/* put_page is in skb free */
return -EFAULT;
+ truesize = size * PAGE_SIZE;
skb->data_len += len;
skb->len += len;
- skb->truesize += len;
- atomic_add(len, &skb->sk->sk_wmem_alloc);
+ skb->truesize += truesize;
+ atomic_add(truesize, &skb->sk->sk_wmem_alloc);
while (len) {
int off = base & ~PAGE_MASK;
int size = min_t(int, len, PAGE_SIZE - off);

View File

@ -1,35 +0,0 @@
From: Jason Wang <jasowang@redhat.com>
Date: Wed, 2 May 2012 11:41:58 +0800
Subject: [3/5] macvtap: zerocopy: put page when fail to get all requested
user pages
commit 02ce04bb3d28c3333231f43bca677228dbc686fe upstream.
When get_user_pages_fast() fails to get all requested pages, we could not use
kfree_skb() to free it as it has not been put in the skb fragments. So we need
to call put_page() instead.
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
drivers/net/macvtap.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 7cb2684..9ab182a 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -531,9 +531,11 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
num_pages = get_user_pages_fast(base, size, 0, &page[i]);
if ((num_pages != size) ||
- (num_pages > MAX_SKB_FRAGS - skb_shinfo(skb)->nr_frags))
- /* put_page is in skb free */
+ (num_pages > MAX_SKB_FRAGS - skb_shinfo(skb)->nr_frags)) {
+ for (i = 0; i < num_pages; i++)
+ put_page(page[i]);
return -EFAULT;
+ }
truesize = size * PAGE_SIZE;
skb->data_len += len;
skb->len += len;

View File

@ -1,48 +0,0 @@
From: Jason Wang <jasowang@redhat.com>
Date: Wed, 2 May 2012 11:42:06 +0800
Subject: [4/5] macvtap: zerocopy: set SKBTX_DEV_ZEROCOPY only when skb is
built successfully
commit 01d6657b388438def19c8baaea28e742b6ed32ec upstream.
Current the SKBTX_DEV_ZEROCOPY is set unconditionally after
zerocopy_sg_from_iovec(), this would lead NULL pointer when macvtap
fails to build zerocopy skb because destructor_arg was not
initialized. Solve this by set this flag after the skb were built
successfully.
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
drivers/net/macvtap.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 9ab182a..a4ff694 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -699,10 +699,9 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
if (!skb)
goto err;
- if (zerocopy) {
+ if (zerocopy)
err = zerocopy_sg_from_iovec(skb, iv, vnet_hdr_len, count);
- skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
- } else
+ else
err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len,
len);
if (err)
@@ -721,8 +720,10 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
rcu_read_lock_bh();
vlan = rcu_dereference_bh(q->vlan);
/* copy skb_ubuf_info for callback when skb has no error */
- if (zerocopy)
+ if (zerocopy) {
skb_shinfo(skb)->destructor_arg = m->msg_control;
+ skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+ }
if (vlan)
macvlan_start_xmit(skb, vlan->dev);
else

View File

@ -1,78 +0,0 @@
From: Jason Wang <jasowang@redhat.com>
Date: Wed, 2 May 2012 11:42:15 +0800
Subject: macvtap: zerocopy: validate vectors before building skb
commit b92946e2919134ebe2a4083e4302236295ea2a73 upstream.
There're several reasons that the vectors need to be validated:
- Return error when caller provides vectors whose num is greater than UIO_MAXIOV.
- Linearize part of skb when userspace provides vectors grater than MAX_SKB_FRAGS.
- Return error when userspace provides vectors whose total length may exceed
- MAX_SKB_FRAGS * PAGE_SIZE.
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
drivers/net/macvtap.c | 25 +++++++++++++++++++++----
1 file changed, 21 insertions(+), 4 deletions(-)
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index a4ff694..163559c 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -529,9 +529,10 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
}
base = (unsigned long)from->iov_base + offset;
size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
+ if (i + size > MAX_SKB_FRAGS)
+ return -EMSGSIZE;
num_pages = get_user_pages_fast(base, size, 0, &page[i]);
- if ((num_pages != size) ||
- (num_pages > MAX_SKB_FRAGS - skb_shinfo(skb)->nr_frags)) {
+ if (num_pages != size) {
for (i = 0; i < num_pages; i++)
put_page(page[i]);
return -EFAULT;
@@ -651,7 +652,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
int err;
struct virtio_net_hdr vnet_hdr = { 0 };
int vnet_hdr_len = 0;
- int copylen;
+ int copylen = 0;
bool zerocopy = false;
if (q->flags & IFF_VNET_HDR) {
@@ -680,15 +681,31 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
if (unlikely(len < ETH_HLEN))
goto err;
+ err = -EMSGSIZE;
+ if (unlikely(count > UIO_MAXIOV))
+ goto err;
+
if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY))
zerocopy = true;
if (zerocopy) {
+ /* Userspace may produce vectors with count greater than
+ * MAX_SKB_FRAGS, so we need to linearize parts of the skb
+ * to let the rest of data to be fit in the frags.
+ */
+ if (count > MAX_SKB_FRAGS) {
+ copylen = iov_length(iv, count - MAX_SKB_FRAGS);
+ if (copylen < vnet_hdr_len)
+ copylen = 0;
+ else
+ copylen -= vnet_hdr_len;
+ }
/* There are 256 bytes to be copied in skb, so there is enough
* room for skb expand head in case it is used.
* The rest buffer is mapped from userspace.
*/
- copylen = vnet_hdr.hdr_len;
+ if (copylen < vnet_hdr.hdr_len)
+ copylen = vnet_hdr.hdr_len;
if (!copylen)
copylen = GOODCOPY_LEN;
} else

View File

@ -1,39 +0,0 @@
From: Shaohua Li <shli@kernel.org>
Date: Tue, 3 Jul 2012 15:57:19 +1000
Subject: raid5: delayed stripe fix
commit fab363b5ff502d1b39ddcfec04271f5858d9f26e upstream.
There isn't locking setting STRIPE_DELAYED and STRIPE_PREREAD_ACTIVE bits, but
the two bits have relationship. A delayed stripe can be moved to hold list only
when preread active stripe count is below IO_THRESHOLD. If a stripe has both
the bits set, such stripe will be in delayed list and preread count not 0,
which will make such stripe never leave delayed list.
Signed-off-by: Shaohua Li <shli@fusionio.com>
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
drivers/md/raid5.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 51169ec..7245a9d 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -196,12 +196,14 @@ static void __release_stripe(struct r5conf *conf, struct stripe_head *sh)
BUG_ON(!list_empty(&sh->lru));
BUG_ON(atomic_read(&conf->active_stripes)==0);
if (test_bit(STRIPE_HANDLE, &sh->state)) {
- if (test_bit(STRIPE_DELAYED, &sh->state))
+ if (test_bit(STRIPE_DELAYED, &sh->state) &&
+ !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
list_add_tail(&sh->lru, &conf->delayed_list);
else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
sh->bm_seq - conf->seq_write > 0)
list_add_tail(&sh->lru, &conf->bitmap_list);
else {
+ clear_bit(STRIPE_DELAYED, &sh->state);
clear_bit(STRIPE_BIT_DELAY, &sh->state);
list_add_tail(&sh->lru, &conf->handle_list);
}

View File

@ -1,37 +0,0 @@
From: Dave Jones <davej@redhat.com>
Date: Fri, 13 Jul 2012 13:35:36 -0400
Subject: Remove easily user-triggerable BUG from generic_setlease
commit 8d657eb3b43861064d36241e88d9d61c709f33f0 upstream.
This can be trivially triggered from userspace by passing in something unexpected.
kernel BUG at fs/locks.c:1468!
invalid opcode: 0000 [#1] SMP
RIP: 0010:generic_setlease+0xc2/0x100
Call Trace:
__vfs_setlease+0x35/0x40
fcntl_setlease+0x76/0x150
sys_fcntl+0x1c6/0x810
system_call_fastpath+0x1a/0x1f
Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
fs/locks.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/locks.c b/fs/locks.c
index 814c51d..fce6238 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1465,7 +1465,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
case F_WRLCK:
return generic_add_lease(filp, arg, flp);
default:
- BUG();
+ return -EINVAL;
}
}
EXPORT_SYMBOL(generic_setlease);

View File

@ -1,46 +0,0 @@
From: Jan Kara <jack@suse.cz>
Date: Fri, 15 Jun 2012 12:52:46 +0200
Subject: scsi: Silence unnecessary warnings about ioctl to partition
commit 6d9359280753d2955f86d6411047516a9431eb51 upstream.
Sometimes, warnings about ioctls to partition happen often enough that they
form majority of the warnings in the kernel log and users complain. In some
cases warnings are about ioctls such as SG_IO so it's not good to get rid of
the warnings completely as they can ease debugging of userspace problems
when ioctl is refused.
Since I have seen warnings from lots of commands, including some proprietary
userspace applications, I don't think disallowing the ioctls for processes
with CAP_SYS_RAWIO will happen in the near future if ever. So lets just
stop warning for processes with CAP_SYS_RAWIO for which ioctl is allowed.
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: James Bottomley <JBottomley@parallels.com>
CC: linux-scsi@vger.kernel.org
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
[bwh: Backported to 3.2: use ENOTTY, not ENOIOCTLCMD]
---
block/scsi_ioctl.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -721,11 +721,14 @@ int scsi_verify_blk_ioctl(struct block_device *bd, unsigned int cmd)
break;
}
+ if (capable(CAP_SYS_RAWIO))
+ return 0;
+
/* In particular, rule out all resets and host-specific ioctls. */
printk_ratelimited(KERN_WARNING
"%s: sending ioctl %x to a partition!\n", current->comm, cmd);
- return capable(CAP_SYS_RAWIO) ? 0 : -ENOTTY;
+ return -ENOTTY;
}
EXPORT_SYMBOL(scsi_verify_blk_ioctl);

View File

@ -1,32 +0,0 @@
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 2 Dec 2011 23:41:42 +0000
Subject: tcp: drop SYN+FIN messages
commit fdf5af0daf8019cec2396cdef8fb042d80fe71fa upstream.
Denys Fedoryshchenko reported that SYN+FIN attacks were bringing his
linux machines to their limits.
Dont call conn_request() if the TCP flags includes SYN flag
Reported-by: Denys Fedoryshchenko <denys@visp.net.lb>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
net/ipv4/tcp_input.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 78dd38c..0cbb440 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5811,6 +5811,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
goto discard;
if (th->syn) {
+ if (th->fin)
+ goto discard;
if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
return 1;

View File

@ -1,45 +0,0 @@
From: Eugeni Dodonov <eugeni.dodonov@intel.com>
Date: Tue, 14 Feb 2012 11:44:48 -0200
Subject: [PATCH] drm/i915: do not enable RC6p on Sandy Bridge
commit 1c8ecf80fdee4e7b23a9e7da7ff9bd59ba2dcf96 upstream.
With base on latest findings, RC6p seems to be respondible for RC6-related
issues on Sandy Bridge platform. To work-around those issues, the previous
solution was to completely disable RC6 on Sandy Bridge for the past few
releases, even if plain RC6 was not giving any issues.
What this patch does is preventing RC6p from being enabled on Sandy Bridge
even if users enable RC6 via a kernel parameter. So it won't change the
defaults in any way, but will ensure that if users do enable RC6 manually
it won't break their machines by enabling this extra state.
Proper fix for this (enabling specific RC6 states according to the GPU
generation) were proposed for the -next kernel, but we are too late in the
release process now to pick such changes.
Acked-by: Keith Packard <keithp@keithp.com>
Signed-off-by: Eugeni Dodonov <eugeni.dodonov@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
drivers/gpu/drm/i915/intel_display.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index d9b042b..049804e 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -8182,8 +8182,8 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
if (intel_enable_rc6(dev_priv->dev))
- rc6_mask = GEN6_RC_CTL_RC6p_ENABLE |
- GEN6_RC_CTL_RC6_ENABLE;
+ rc6_mask = GEN6_RC_CTL_RC6_ENABLE |
+ (IS_GEN7(dev_priv->dev)) ? GEN6_RC_CTL_RC6p_ENABLE : 0;
I915_WRITE(GEN6_RC_CONTROL,
rc6_mask |
--
1.7.9.1

View File

@ -1,32 +0,0 @@
From: Eugeni Dodonov <eugeni.dodonov@intel.com>
Date: Thu, 23 Feb 2012 23:57:06 -0200
Subject: [PATCH] drm/i915: fix operator precedence when enabling RC6p
commit c0e2ee1bc0cf82eec89e26b7afe7e4db0561b7d9 upstream.
As noticed by Torsten Kaiser, the operator precedence can play tricks with
us here.
CC: Dave Airlie <airlied@redhat.com>
Signed-off-by: Eugeni Dodonov <eugeni.dodonov@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
drivers/gpu/drm/i915/intel_display.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index e654f32..4871ba0 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -8194,7 +8194,7 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
if (intel_enable_rc6(dev_priv->dev))
rc6_mask = GEN6_RC_CTL_RC6_ENABLE |
- (IS_GEN7(dev_priv->dev)) ? GEN6_RC_CTL_RC6p_ENABLE : 0;
+ ((IS_GEN7(dev_priv->dev)) ? GEN6_RC_CTL_RC6p_ENABLE : 0);
I915_WRITE(GEN6_RC_CONTROL,
rc6_mask |
--
1.7.10

View File

@ -0,0 +1,36 @@
From e858f73d794205cfda77ddfb4cbf86a1b5aeb72f Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Wed, 25 Jul 2012 05:00:02 +0100
Subject: [PATCH 1/4] hrtimer: Avoid ABI change in 3.2.24
struct hrtimer_cpu_base should not be allocated by modules. Move
the new member clock_was_set to the end and hide it from genksyms.
Revert the type change of active_bases.
---
include/linux/hrtimer.h | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index cc07d27..c2012e3 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -178,8 +178,7 @@ enum hrtimer_base_type {
*/
struct hrtimer_cpu_base {
raw_spinlock_t lock;
- unsigned int active_bases;
- unsigned int clock_was_set;
+ unsigned long active_bases;
#ifdef CONFIG_HIGH_RES_TIMERS
ktime_t expires_next;
int hres_active;
@@ -190,6 +189,9 @@ struct hrtimer_cpu_base {
ktime_t max_hang_time;
#endif
struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
+#ifndef __GENKSYMS__
+ unsigned int clock_was_set;
+#endif
};
static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time)

View File

@ -0,0 +1,85 @@
From 331cf8308dcde22597583c4be4eabdc5d168c703 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Wed, 25 Jul 2012 05:26:58 +0100
Subject: [PATCH 3/4] libsas: Avoid ABI change in 3.2.24
Revert commit 6ef1b512f4e6f936d89aa20be3d97a7ec7c290ac ('libsas: fix
taskfile corruption in sas_ata_qc_fill_rtf') and fix the bug in a
different way.
SATA command results must be converted from FIS to ata_taskfile
format. This is done in two stages via a temporary buffer in
struct sata_device, as the source and target addresses aren't
known at the same time (?).
The old code does conversion and then memcpy(), which is wrong
because the conversion only assigns some members and the rest
of the target structure should be left unchanged.
The upstream fix switches this to memcpy() and conversion, but
that changes the temporary buffer format and makes it larger
(the ABI change).
This fix changes the memcpy() to write only the target members
that would be written by the conversion function.
---
drivers/scsi/libsas/sas_ata.c | 15 +++++++++------
include/scsi/libsas.h | 2 +-
2 files changed, 10 insertions(+), 7 deletions(-)
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 4868fc9..c707410 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -112,12 +112,12 @@ static void sas_ata_task_done(struct sas_task *task)
if (stat->stat == SAS_PROTO_RESPONSE || stat->stat == SAM_STAT_GOOD ||
((stat->stat == SAM_STAT_CHECK_CONDITION &&
dev->sata_dev.command_set == ATAPI_COMMAND_SET))) {
- memcpy(dev->sata_dev.fis, resp->ending_fis, ATA_RESP_FIS_SIZE);
+ ata_tf_from_fis(resp->ending_fis, &dev->sata_dev.tf);
if (!link->sactive) {
- qc->err_mask |= ac_err_mask(dev->sata_dev.fis[2]);
+ qc->err_mask |= ac_err_mask(dev->sata_dev.tf.command);
} else {
- link->eh_info.err_mask |= ac_err_mask(dev->sata_dev.fis[2]);
+ link->eh_info.err_mask |= ac_err_mask(dev->sata_dev.tf.command);
if (unlikely(link->eh_info.err_mask))
qc->flags |= ATA_QCFLAG_FAILED;
}
@@ -138,8 +138,8 @@ static void sas_ata_task_done(struct sas_task *task)
qc->flags |= ATA_QCFLAG_FAILED;
}
- dev->sata_dev.fis[3] = 0x04; /* status err */
- dev->sata_dev.fis[2] = ATA_ERR;
+ dev->sata_dev.tf.feature = 0x04; /* status err */
+ dev->sata_dev.tf.command = ATA_ERR;
}
}
@@ -252,7 +252,10 @@ static bool sas_ata_qc_fill_rtf(struct ata_queued_cmd *qc)
{
struct domain_device *dev = qc->ap->private_data;
- ata_tf_from_fis(dev->sata_dev.fis, &qc->result_tf);
+ /* Copy only those fields that ata_tf_from_fis() sets */
+ memcpy(&qc->result_tf.hob_nsect, &dev->sata_dev.tf.hob_nsect,
+ sizeof(dev->sata_dev.tf) -
+ offsetof(struct ata_taskfile, hob_nsect));
return true;
}
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index 1e100c6..4cd529d 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -172,7 +172,7 @@ struct sata_device {
struct ata_port *ap;
struct ata_host ata_host;
- u8 fis[ATA_RESP_FIS_SIZE];
+ struct ata_taskfile tf;
u32 sstatus;
u32 serror;
u32 scontrol;

View File

@ -0,0 +1,23 @@
From 222f965efd8d504e374758209dcc8f56d37f62fc Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Wed, 25 Jul 2012 05:00:55 +0100
Subject: [PATCH 2/4] net: Avoid ABI change in 3.2.24
Don't renumber skb_shared_info TX flags.
---
include/linux/skbuff.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 53dc7e7..a3615cc 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -214,7 +214,7 @@ enum {
SKBTX_IN_PROGRESS = 1 << 2,
/* device driver supports TX zero-copy buffers */
- SKBTX_DEV_ZEROCOPY = 1 << 3,
+ SKBTX_DEV_ZEROCOPY = 1 << 4,
};
/*

View File

@ -0,0 +1,32 @@
From 70c7bdf78972dd72ceffbf8b88c5d9fec22dae01 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Wed, 25 Jul 2012 05:47:16 +0100
Subject: [PATCH 4/4] powerpc: cputime: Avoid ABI change in 3.2.24
Restore __cputime_msec_factor.
---
arch/powerpc/kernel/time.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index ec8affe..010b325 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -175,6 +175,8 @@ u64 __cputime_jiffies_factor;
EXPORT_SYMBOL(__cputime_jiffies_factor);
u64 __cputime_usec_factor;
EXPORT_SYMBOL(__cputime_usec_factor);
+u64 __cputime_msec_factor;
+EXPORT_SYMBOL(__cputime_msec_factor);
u64 __cputime_sec_factor;
EXPORT_SYMBOL(__cputime_sec_factor);
u64 __cputime_clockt_factor;
@@ -194,6 +196,8 @@ static void calc_cputime_factors(void)
__cputime_jiffies_factor = res.result_low;
div128_by_32(1000000, 0, tb_ticks_per_sec, &res);
__cputime_usec_factor = res.result_low;
+ div128_by_32(1000, 0, tb_ticks_per_sec, &res);
+ __cputime_msec_factor = res.result_low;
div128_by_32(1, 0, tb_ticks_per_sec, &res);
__cputime_sec_factor = res.result_low;
div128_by_32(USER_HZ, 0, tb_ticks_per_sec, &res);

View File

@ -9,6 +9,10 @@ code pathes. This is a straight forward split, so we can avoid the
whole mess with raw seqlocks for RT.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[bwh: Forward-ported to 3.2.24: adjust context; drop changes to
ntp_leap_second() function removed by commit
6b43ae8a619d17c4935c3320d2ef9e92bdeed05d ('ntp: Fix leap-second hrtimer
livelock')]
---
kernel/time/jiffies.c | 4 +-
kernel/time/ntp.c | 24 ++++++++----
@ -18,8 +22,6 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
kernel/time/timekeeping.c | 90 +++++++++++++++++++++++++------------------
6 files changed, 88 insertions(+), 59 deletions(-)
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index a470154..21940eb 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -74,9 +74,9 @@ u64 get_jiffies_64(void)
@ -34,31 +36,9 @@ index a470154..21940eb 100644
return ret;
}
EXPORT_SYMBOL(get_jiffies_64);
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 4b85a7a..419cbaa 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -358,7 +358,8 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
{
enum hrtimer_restart res = HRTIMER_NORESTART;
- write_seqlock(&xtime_lock);
+ raw_spin_lock(&xtime_lock);
+ write_seqcount_begin(&xtime_seq);
switch (time_state) {
case TIME_OK:
@@ -388,7 +389,8 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
break;
}
- write_sequnlock(&xtime_lock);
+ write_seqcount_end(&xtime_seq);
+ raw_spin_unlock(&xtime_lock);
return res;
}
@@ -663,7 +665,8 @@ int do_adjtimex(struct timex *txc)
@@ -623,7 +623,8 @@ int do_adjtimex(struct timex *txc)
getnstimeofday(&ts);
@ -68,7 +48,7 @@ index 4b85a7a..419cbaa 100644
if (txc->modes & ADJ_ADJTIME) {
long save_adjust = time_adjust;
@@ -705,7 +708,8 @@ int do_adjtimex(struct timex *txc)
@@ -665,7 +666,8 @@ int do_adjtimex(struct timex *txc)
/* fill PPS status fields */
pps_fill_timex(txc);
@ -78,7 +58,7 @@ index 4b85a7a..419cbaa 100644
txc->time.tv_sec = ts.tv_sec;
txc->time.tv_usec = ts.tv_nsec;
@@ -903,7 +907,8 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
@@ -863,7 +865,8 @@ void hardpps(const struct timespec *phas
pts_norm = pps_normalize_ts(*phase_ts);
@ -88,7 +68,7 @@ index 4b85a7a..419cbaa 100644
/* clear the error bits, they will be set again if needed */
time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR);
@@ -916,7 +921,8 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
@@ -876,7 +879,8 @@ void hardpps(const struct timespec *phas
* just start the frequency interval */
if (unlikely(pps_fbase.tv_sec == 0)) {
pps_fbase = *raw_ts;
@ -98,7 +78,7 @@ index 4b85a7a..419cbaa 100644
return;
}
@@ -931,7 +937,8 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
@@ -891,7 +895,8 @@ void hardpps(const struct timespec *phas
time_status |= STA_PPSJITTER;
/* restart the frequency calibration interval */
pps_fbase = *raw_ts;
@ -108,7 +88,7 @@ index 4b85a7a..419cbaa 100644
pr_err("hardpps: PPSJITTER: bad pulse\n");
return;
}
@@ -948,7 +955,8 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
@@ -908,7 +913,8 @@ void hardpps(const struct timespec *phas
hardpps_update_phase(pts_norm.nsec);
@ -118,8 +98,6 @@ index 4b85a7a..419cbaa 100644
}
EXPORT_SYMBOL(hardpps);
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index da6c9ec..39de540 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -63,13 +63,15 @@ int tick_is_oneshot_available(void)
@ -140,7 +118,7 @@ index da6c9ec..39de540 100644
}
update_process_times(user_mode(get_irq_regs()));
@@ -130,9 +132,9 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
@@ -130,9 +132,9 @@ void tick_setup_periodic(struct clock_ev
ktime_t next;
do {
@ -152,22 +130,18 @@ index da6c9ec..39de540 100644
clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 4e265b9..c91100d 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -141,4 +141,5 @@ static inline int tick_device_is_functional(struct clock_event_device *dev)
@@ -141,4 +141,5 @@ static inline int tick_device_is_functio
#endif
extern void do_timer(unsigned long ticks);
-extern seqlock_t xtime_lock;
+extern raw_spinlock_t xtime_lock;
+extern seqcount_t xtime_seq;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index c923640..d7abd2f 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -56,7 +56,8 @@ static void tick_do_update_jiffies64(ktime_t now)
@@ -56,7 +56,8 @@ static void tick_do_update_jiffies64(kti
return;
/* Reevalute with xtime_lock held */
@ -177,7 +151,7 @@ index c923640..d7abd2f 100644
delta = ktime_sub(now, last_jiffies_update);
if (delta.tv64 >= tick_period.tv64) {
@@ -79,7 +80,8 @@ static void tick_do_update_jiffies64(ktime_t now)
@@ -79,7 +80,8 @@ static void tick_do_update_jiffies64(kti
/* Keep the tick_next_period variable up to date */
tick_next_period = ktime_add(last_jiffies_update, tick_period);
}
@ -187,7 +161,7 @@ index c923640..d7abd2f 100644
}
/*
@@ -89,12 +91,14 @@ static ktime_t tick_init_jiffy_update(void)
@@ -89,12 +91,14 @@ static ktime_t tick_init_jiffy_update(vo
{
ktime_t period;
@ -204,7 +178,7 @@ index c923640..d7abd2f 100644
return period;
}
@@ -345,11 +349,11 @@ void tick_nohz_stop_sched_tick(int inidle)
@@ -345,11 +349,11 @@ void tick_nohz_stop_sched_tick(int inidl
ts->idle_calls++;
/* Read jiffies and the time when jiffies were updated last */
do {
@ -218,11 +192,9 @@ index c923640..d7abd2f 100644
if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
arch_needs_cpu(cpu)) {
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 2378413..da9e1f9 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -139,8 +139,8 @@ static inline s64 timekeeping_get_ns_raw(void)
@@ -139,8 +139,8 @@ static inline s64 timekeeping_get_ns_raw
* This read-write spinlock protects us from races in SMP while
* playing with xtime.
*/
@ -233,7 +205,7 @@ index 2378413..da9e1f9 100644
/*
* The current time
@@ -222,7 +222,7 @@ void getnstimeofday(struct timespec *ts)
@@ -242,7 +242,7 @@ void getnstimeofday(struct timespec *ts)
WARN_ON(timekeeping_suspended);
do {
@ -242,7 +214,7 @@ index 2378413..da9e1f9 100644
*ts = xtime;
nsecs = timekeeping_get_ns();
@@ -230,7 +230,7 @@ void getnstimeofday(struct timespec *ts)
@@ -250,7 +250,7 @@ void getnstimeofday(struct timespec *ts)
/* If arch requires, add in gettimeoffset() */
nsecs += arch_gettimeoffset();
@ -251,7 +223,7 @@ index 2378413..da9e1f9 100644
timespec_add_ns(ts, nsecs);
}
@@ -245,14 +245,14 @@ ktime_t ktime_get(void)
@@ -265,14 +265,14 @@ ktime_t ktime_get(void)
WARN_ON(timekeeping_suspended);
do {
@ -268,7 +240,7 @@ index 2378413..da9e1f9 100644
/*
* Use ktime_set/ktime_add_ns to create a proper ktime on
* 32-bit architectures without CONFIG_KTIME_SCALAR.
@@ -278,14 +278,14 @@ void ktime_get_ts(struct timespec *ts)
@@ -298,14 +298,14 @@ void ktime_get_ts(struct timespec *ts)
WARN_ON(timekeeping_suspended);
do {
@ -285,7 +257,7 @@ index 2378413..da9e1f9 100644
set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec,
ts->tv_nsec + tomono.tv_nsec + nsecs);
@@ -313,7 +313,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
@@ -333,7 +333,7 @@ void getnstime_raw_and_real(struct times
do {
u32 arch_offset;
@ -294,7 +266,7 @@ index 2378413..da9e1f9 100644
*ts_raw = raw_time;
*ts_real = xtime;
@@ -326,7 +326,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
@@ -346,7 +346,7 @@ void getnstime_raw_and_real(struct times
nsecs_raw += arch_offset;
nsecs_real += arch_offset;
@ -303,7 +275,7 @@ index 2378413..da9e1f9 100644
timespec_add_ns(ts_raw, nsecs_raw);
timespec_add_ns(ts_real, nsecs_real);
@@ -365,7 +365,8 @@ int do_settimeofday(const struct timespec *tv)
@@ -385,7 +385,8 @@ int do_settimeofday(const struct timespe
if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
return -EINVAL;
@ -313,9 +285,9 @@ index 2378413..da9e1f9 100644
timekeeping_forward_now();
@@ -381,7 +382,8 @@ int do_settimeofday(const struct timespec *tv)
update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
timekeeper.mult);
@@ -397,7 +398,8 @@ int do_settimeofday(const struct timespe
timekeeping_update(true);
- write_sequnlock_irqrestore(&xtime_lock, flags);
+ write_seqcount_end(&xtime_seq);
@ -323,7 +295,7 @@ index 2378413..da9e1f9 100644
/* signal hrtimers about time change */
clock_was_set();
@@ -405,7 +407,8 @@ int timekeeping_inject_offset(struct timespec *ts)
@@ -421,7 +423,8 @@ int timekeeping_inject_offset(struct tim
if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
return -EINVAL;
@ -333,9 +305,9 @@ index 2378413..da9e1f9 100644
timekeeping_forward_now();
@@ -418,7 +421,8 @@ int timekeeping_inject_offset(struct timespec *ts)
update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
timekeeper.mult);
@@ -430,7 +433,8 @@ int timekeeping_inject_offset(struct tim
timekeeping_update(true);
- write_sequnlock_irqrestore(&xtime_lock, flags);
+ write_seqcount_end(&xtime_seq);
@ -343,7 +315,7 @@ index 2378413..da9e1f9 100644
/* signal hrtimers about time change */
clock_was_set();
@@ -490,11 +494,11 @@ void getrawmonotonic(struct timespec *ts)
@@ -502,11 +506,11 @@ void getrawmonotonic(struct timespec *ts
s64 nsecs;
do {
@ -357,7 +329,7 @@ index 2378413..da9e1f9 100644
timespec_add_ns(ts, nsecs);
}
@@ -510,11 +514,11 @@ int timekeeping_valid_for_hres(void)
@@ -522,11 +526,11 @@ int timekeeping_valid_for_hres(void)
int ret;
do {
@ -371,7 +343,7 @@ index 2378413..da9e1f9 100644
return ret;
}
@@ -572,7 +576,8 @@ void __init timekeeping_init(void)
@@ -584,7 +588,8 @@ void __init timekeeping_init(void)
read_persistent_clock(&now);
read_boot_clock(&boot);
@ -381,8 +353,8 @@ index 2378413..da9e1f9 100644
ntp_init();
@@ -593,7 +598,8 @@ void __init timekeeping_init(void)
-boot.tv_sec, -boot.tv_nsec);
@@ -606,7 +611,8 @@ void __init timekeeping_init(void)
update_rt_offset();
total_sleep_time.tv_sec = 0;
total_sleep_time.tv_nsec = 0;
- write_sequnlock_irqrestore(&xtime_lock, flags);
@ -391,7 +363,7 @@ index 2378413..da9e1f9 100644
}
/* time in seconds when suspend began */
@@ -640,7 +646,8 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
@@ -659,14 +665,16 @@ void timekeeping_inject_sleeptime(struct
if (!(ts.tv_sec == 0 && ts.tv_nsec == 0))
return;
@ -401,9 +373,8 @@ index 2378413..da9e1f9 100644
timekeeping_forward_now();
__timekeeping_inject_sleeptime(delta);
@@ -650,7 +657,8 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
timekeeper.mult);
timekeeping_update(true);
- write_sequnlock_irqrestore(&xtime_lock, flags);
+ write_seqcount_end(&xtime_seq);
@ -411,7 +382,7 @@ index 2378413..da9e1f9 100644
/* signal hrtimers about time change */
clock_was_set();
@@ -673,7 +681,8 @@ static void timekeeping_resume(void)
@@ -689,7 +697,8 @@ static void timekeeping_resume(void)
clocksource_resume();
@ -421,17 +392,17 @@ index 2378413..da9e1f9 100644
if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) {
ts = timespec_sub(ts, timekeeping_suspend_time);
@@ -683,7 +692,8 @@ static void timekeeping_resume(void)
timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
@@ -700,7 +709,8 @@ static void timekeeping_resume(void)
timekeeper.ntp_error = 0;
timekeeping_suspended = 0;
timekeeping_update(false);
- write_sequnlock_irqrestore(&xtime_lock, flags);
+ write_seqcount_end(&xtime_seq);
+ raw_spin_unlock_irqrestore(&xtime_lock, flags);
touch_softlockup_watchdog();
@@ -701,7 +711,8 @@ static int timekeeping_suspend(void)
@@ -718,7 +728,8 @@ static int timekeeping_suspend(void)
read_persistent_clock(&timekeeping_suspend_time);
@ -441,7 +412,7 @@ index 2378413..da9e1f9 100644
timekeeping_forward_now();
timekeeping_suspended = 1;
@@ -724,7 +735,8 @@ static int timekeeping_suspend(void)
@@ -741,7 +752,8 @@ static int timekeeping_suspend(void)
timekeeping_suspend_time =
timespec_add(timekeeping_suspend_time, delta_delta);
}
@ -451,7 +422,7 @@ index 2378413..da9e1f9 100644
clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
clocksource_suspend();
@@ -1101,13 +1113,13 @@ void get_monotonic_boottime(struct timespec *ts)
@@ -1126,13 +1138,13 @@ void get_monotonic_boottime(struct times
WARN_ON(timekeeping_suspended);
do {
@ -467,7 +438,7 @@ index 2378413..da9e1f9 100644
set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec + sleep.tv_sec,
ts->tv_nsec + tomono.tv_nsec + sleep.tv_nsec + nsecs);
@@ -1158,10 +1170,10 @@ struct timespec current_kernel_time(void)
@@ -1183,10 +1195,10 @@ struct timespec current_kernel_time(void
unsigned long seq;
do {
@ -480,7 +451,7 @@ index 2378413..da9e1f9 100644
return now;
}
@@ -1173,11 +1185,11 @@ struct timespec get_monotonic_coarse(void)
@@ -1198,11 +1210,11 @@ struct timespec get_monotonic_coarse(voi
unsigned long seq;
do {
@ -494,7 +465,7 @@ index 2378413..da9e1f9 100644
set_normalized_timespec(&now, now.tv_sec + mono.tv_sec,
now.tv_nsec + mono.tv_nsec);
@@ -1209,11 +1221,11 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
@@ -1234,11 +1246,11 @@ void get_xtime_and_monotonic_and_sleep_o
unsigned long seq;
do {
@ -507,8 +478,8 @@ index 2378413..da9e1f9 100644
+ } while (read_seqcount_retry(&xtime_seq, seq));
}
/**
@@ -1225,9 +1237,9 @@ ktime_t ktime_get_monotonic_offset(void)
#ifdef CONFIG_HIGH_RES_TIMERS
@@ -1284,9 +1296,9 @@ ktime_t ktime_get_monotonic_offset(void)
struct timespec wtom;
do {
@ -520,7 +491,7 @@ index 2378413..da9e1f9 100644
return timespec_to_ktime(wtom);
}
@@ -1239,7 +1251,9 @@ ktime_t ktime_get_monotonic_offset(void)
@@ -1298,7 +1310,9 @@ ktime_t ktime_get_monotonic_offset(void)
*/
void xtime_update(unsigned long ticks)
{
@ -532,6 +503,3 @@ index 2378413..da9e1f9 100644
+ write_seqcount_end(&xtime_seq);
+ raw_spin_unlock(&xtime_lock);
}
--
1.7.10

View File

@ -8,6 +8,7 @@ against preemption.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[bwh: Adjust context for ABI bodge in struct hrtimer_cpu_base]
---
include/linux/hrtimer.h | 10 ++++++++++
kernel/hrtimer.c | 33 ++++++++++++++++++++++++++++++++-
@ -15,11 +16,9 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
kernel/posix-timers.c | 33 +++++++++++++++++++++++++++++++++
4 files changed, 76 insertions(+), 1 deletion(-)
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index fd0dc30..e8b395d 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -187,6 +187,9 @@ struct hrtimer_cpu_base {
@@ -188,6 +188,9 @@ struct hrtimer_cpu_base {
unsigned long nr_hangs;
ktime_t max_hang_time;
#endif
@ -27,9 +26,9 @@ index fd0dc30..e8b395d 100644
+ wait_queue_head_t wait;
+#endif
struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
};
@@ -374,6 +377,13 @@ static inline int hrtimer_restart(struct hrtimer *timer)
#ifndef __GENKSYMS__
unsigned int clock_was_set;
@@ -384,6 +387,13 @@ static inline int hrtimer_restart(struct
return hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
}
@ -43,11 +42,9 @@ index fd0dc30..e8b395d 100644
/* Query timers: */
extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 1a3695e..905e2cd2 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -847,6 +847,32 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
@@ -857,6 +857,32 @@ u64 hrtimer_forward(struct hrtimer *time
}
EXPORT_SYMBOL_GPL(hrtimer_forward);
@ -80,7 +77,7 @@ index 1a3695e..905e2cd2 100644
/*
* enqueue_hrtimer - internal function to (re)start a timer
*
@@ -1073,7 +1099,7 @@ int hrtimer_cancel(struct hrtimer *timer)
@@ -1083,7 +1109,7 @@ int hrtimer_cancel(struct hrtimer *timer
if (ret >= 0)
return ret;
@ -89,7 +86,7 @@ index 1a3695e..905e2cd2 100644
}
}
EXPORT_SYMBOL_GPL(hrtimer_cancel);
@@ -1476,6 +1502,8 @@ void hrtimer_run_queues(void)
@@ -1497,6 +1523,8 @@ void hrtimer_run_queues(void)
}
raw_spin_unlock(&cpu_base->lock);
}
@ -98,7 +95,7 @@ index 1a3695e..905e2cd2 100644
}
/*
@@ -1638,6 +1666,9 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
@@ -1659,6 +1687,9 @@ static void __cpuinit init_hrtimers_cpu(
}
hrtimer_init_hres(cpu_base);
@ -108,8 +105,6 @@ index 1a3695e..905e2cd2 100644
}
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/kernel/itimer.c b/kernel/itimer.c
index d802883..2c582fc 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -214,6 +214,7 @@ again:
@ -120,11 +115,9 @@ index d802883..2c582fc 100644
goto again;
}
expires = timeval_to_ktime(value->it_value);
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 7b73c34..6a74800 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -766,6 +766,20 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id)
@@ -766,6 +766,20 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_
return overrun;
}
@ -182,7 +175,7 @@ index 7b73c34..6a74800 100644
spin_lock(&current->sighand->siglock);
list_del(&timer->list);
@@ -920,8 +943,18 @@ static void itimer_delete(struct k_itimer *timer)
@@ -920,8 +943,18 @@ static void itimer_delete(struct k_itime
retry_delete:
spin_lock_irqsave(&timer->it_lock, flags);
@ -201,6 +194,3 @@ index 7b73c34..6a74800 100644
goto retry_delete;
}
list_del(&timer->list);
--
1.7.10

View File

@ -12,6 +12,9 @@ delivery problem for real.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
[bwh: Forward-ported to 3.2.24: move the code added to run_hrtimer_softirq()
by commit f55a6faa384304c89cfef162768e88374d3312cb ('hrtimer: Provide
clock_was_set_delayed()')]
---
include/linux/hrtimer.h | 3 +
kernel/hrtimer.c | 190 +++++++++++++++++++++++++++++++++++++++++-----
@ -292,12 +295,19 @@ index 905e2cd2..1dd627b 100644
return;
}
@@ -1430,17 +1580,17 @@ void hrtimer_peek_ahead_timers(void)
@@ -1430,24 +1580,24 @@ void hrtimer_peek_ahead_timers(void)
local_irq_restore(flags);
}
-static void run_hrtimer_softirq(struct softirq_action *h)
-{
- struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
-
- if (cpu_base->clock_was_set) {
- cpu_base->clock_was_set = 0;
- clock_was_set();
- }
-
- hrtimer_peek_ahead_timers();
-}
-
@ -309,6 +319,13 @@ index 905e2cd2..1dd627b 100644
+static void run_hrtimer_softirq(struct softirq_action *h)
+{
+ struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
+
+ if (cpu_base->clock_was_set) {
+ cpu_base->clock_was_set = 0;
+ clock_was_set();
+ }
+
+ hrtimer_rt_run_pending();
+}
+

View File

@ -54,11 +54,9 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
kernel/sched.c | 82 ++++++++++++++++-
3 files changed, 285 insertions(+), 40 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0174e3a..9ca3172 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1933,6 +1933,10 @@ extern void do_set_cpus_allowed(struct task_struct *p,
@@ -1933,6 +1933,10 @@ extern void do_set_cpus_allowed(struct t
extern int set_cpus_allowed_ptr(struct task_struct *p,
const struct cpumask *new_mask);
@ -69,7 +67,7 @@ index 0174e3a..9ca3172 100644
#else
static inline void do_set_cpus_allowed(struct task_struct *p,
const struct cpumask *new_mask)
@@ -1945,6 +1949,9 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p,
@@ -1945,6 +1949,9 @@ static inline int set_cpus_allowed_ptr(s
return -EINVAL;
return 0;
}
@ -78,9 +76,7 @@ index 0174e3a..9ca3172 100644
+static inline void tell_sched_cpu_down_done(int cpu) { }
#endif
#ifndef CONFIG_CPUMASK_OFFSTACK
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 66dfb74..0964e93 100644
#ifdef CONFIG_NO_HZ
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -46,12 +46,7 @@ static int cpu_hotplug_disabled;
@ -147,7 +143,7 @@ index 66dfb74..0964e93 100644
static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp);
/**
@@ -94,18 +111,40 @@ static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp);
@@ -94,18 +111,40 @@ static DEFINE_PER_CPU(struct hotplug_pcp
void pin_current_cpu(void)
{
struct hotplug_pcp *hp;
@ -284,7 +280,7 @@ index 66dfb74..0964e93 100644
/*
* Start the sync_unplug_thread on the target cpu and wait for it to
* complete.
@@ -154,23 +251,83 @@ static int sync_unplug_thread(void *data)
@@ -154,23 +251,83 @@ static int sync_unplug_thread(void *data
static int cpu_unplug_begin(unsigned int cpu)
{
struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
@ -423,7 +419,7 @@ index 66dfb74..0964e93 100644
}
#else /* #if CONFIG_HOTPLUG_CPU */
@@ -371,6 +528,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
@@ -371,6 +528,9 @@ static int __ref _cpu_down(unsigned int
goto out_release;
}
@ -433,11 +429,9 @@ index 66dfb74..0964e93 100644
err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
if (err) {
/* CPU didn't die: tell everyone. Can't complain. */
diff --git a/kernel/sched.c b/kernel/sched.c
index 4dd1fff..bc2375e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4403,7 +4403,7 @@ void migrate_disable(void)
@@ -4533,7 +4533,7 @@ void migrate_disable(void)
{
struct task_struct *p = current;
@ -446,7 +440,7 @@ index 4dd1fff..bc2375e 100644
#ifdef CONFIG_SCHED_DEBUG
p->migrate_disable_atomic++;
#endif
@@ -4434,7 +4434,7 @@ void migrate_enable(void)
@@ -4564,7 +4564,7 @@ void migrate_enable(void)
unsigned long flags;
struct rq *rq;
@ -455,7 +449,7 @@ index 4dd1fff..bc2375e 100644
#ifdef CONFIG_SCHED_DEBUG
p->migrate_disable_atomic--;
#endif
@@ -6360,6 +6360,84 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
@@ -6490,6 +6490,84 @@ void do_set_cpus_allowed(struct task_str
cpumask_copy(&p->cpus_allowed, new_mask);
}
@ -540,6 +534,3 @@ index 4dd1fff..bc2375e 100644
/*
* This is how migration works:
*
--
1.7.10

35
debian/patches/series vendored
View File

@ -62,15 +62,11 @@ bugfix/x86/KVM-nVMX-Fix-warning-causing-idt-vectoring-info-beha.patch
features/all/hwmon-it87-Add-IT8728F-support.patch
bugfix/x86/drm-i915-do-not-enable-RC6p-on-Sandy-Bridge.patch
bugfix/x86/drm-i915-fix-operator-precedence-when-enabling-RC6p.patch
features/all/fs-symlink-restrictions-on-sticky-directories.patch
features/all/fs-symlink-restrictions-on-sticky-directories-fix-2.patch
features/all/fs-hardlink-creation-restrictions.patch
features/all/fs-hardlink-creation-restrictions-fix.patch
features/all/fs-hardlink-creation-restriction-cleanup.patch
bugfix/all/kbuild-do-not-check-for-ancient-modutils-tools.patch
# Update all Hyper-V drivers to 3.4-rc1 (no longer staging)
features/x86/hyperv/0001-NLS-improve-UTF8-UTF16-string-conversion-routine.patch
@ -171,11 +167,6 @@ features/x86/efi-stub/0015-x86-efi-Fix-endian-issues-and-unaligned-accesses.patc
features/x86/efi-stub/0016-x86-boot-Correct-CFLAGS-for-hostprogs.patch
features/x86/efi-stub/0017-x86-efi-Add-dedicated-EFI-stub-entry-point.patch
bugfix/all/brcmsmac-INTERMEDIATE-but-not-AMPDU-only-when-tracin.patch
bugfix/all/NFSv4-Rate-limit-the-state-manager-for-lock-reclaim-.patch
bugfix/all/ext4-Report-max_batch_time-option-correctly.patch
# Update wacom driver to 3.5ish
features/all/wacom/0001-Input-wacom-cleanup-feature-report-for-bamboos.patch
features/all/wacom/0002-Input-wacom-remove-unused-bamboo-HID-parsing.patch
@ -287,8 +278,6 @@ features/all/codel/0007-fq_codel-should-use-qdisc-backlog-as-threshold.patch
features/all/AppArmor-compatibility-patch-for-v5-interface.patch
bugfix/all/apparmor-remove-advertising-the-support-of-network-r.patch
bugfix/all/hugepages-fix-use-after-free-bug-in-quota-handling.patch
# netdev features, probably useful for other backports but not needed yet
#features/all/define-netdev_features_t.patch
#features/all/filter-Allow-to-create-sk-unattached-filters.patch
@ -299,17 +288,6 @@ features/all/hidepid/0002-procfs-add-hidepid-and-gid-mount-options.patch
features/all/hidepid/0003-proc-fix-null-pointer-deref-in-proc_pid_permission.patch
features/all/hidepid/0004-proc-fix-mount-t-proc-o-AAA.patch
bugfix/all/NFSv4-Reduce-the-footprint-of-the-idmapper.patch
bugfix/all/NFSv4-Further-reduce-the-footprint-of-the-idmapper.patch
bugfix/all/macvtap-zerocopy-fix-offset-calculation-when-buildin.patch
bugfix/all/macvtap-zerocopy-fix-truesize-underestimation.patch
bugfix/all/macvtap-zerocopy-put-page-when-fail-to-get-all-reque.patch
bugfix/all/macvtap-zerocopy-set-SKBTX_DEV_ZEROCOPY-only-when-sk.patch
bugfix/all/macvtap-zerocopy-validate-vectors-before-building-sk.patch
bugfix/all/KVM-Fix-buffer-overflow-in-kvm_set_irq.patch
# CPU sysdev removal from 3.3 and x86 CPU auto-loading from 3.4
features/all/cpu-devices/driver-core-implement-sysdev-functionality-for-regul.patch
features/all/cpu-devices/cpu-convert-cpu-and-machinecheck-sysdev_class-to-a-r.patch
@ -365,9 +343,7 @@ features/arm/net-drop-NET-dependency-from-HAVE_BPF_JIT.patch
# Until next ABI bump
debian/driver-core-avoid-ABI-change-for-removal-of-__must_check.patch
bugfix/all/scsi-Silence-unnecessary-warnings-about-ioctl-to-par.patch
bugfix/all/udf-Improve-table-length-check-to-avoid-possible-underflow.patch
bugfix/all/epoll-clear-the-tfile_check_list-on-eloop.patch
# nouveau update to support Fermi (NVC0+) acceleration
features/all/fermi-accel/drm-nouveau-ttm-always-do-buffer-moves-on-kernel-cha.patch
@ -383,8 +359,9 @@ features/all/fermi-accel/drm-nouveau-bump-version-to-1.0.0.patch
bugfix/all/net-e100-ucode-is-optional-in-some-cases.patch
bugfix/x86/drm-i915-prefer-wide-slow-to-fast-narrow-in-DP-confi.patch
bugfix/all/cipso-don-t-follow-a-NULL-pointer-when-setsockopt-is.patch
bugfix/all/atl1c-fix-issue-of-transmit-queue-0-timed-out.patch
bugfix/all/raid5-delayed-stripe-fix.patch
bugfix/all/remove-easily-user-triggerable-bug-from-generic_setlease.patch
bugfix/all/tcp-drop-syn-fin-messages.patch
bugfix/all/fifo-do-not-restart-open-if-it-already-found-a-partner.patch
# Until next ABI bump
debian/hrtimer-Avoid-ABI-change-in-3.2.24.patch
debian/net-Avoid-ABI-change-in-3.2.24.patch
debian/libsas-Avoid-ABI-change-in-3.2.24.patch
debian/powerpc-cputime-Avoid-ABI-change-in-3.2.24.patch