diff --git a/debian/changelog b/debian/changelog index a324e5384..3aca8bdff 100644 --- a/debian/changelog +++ b/debian/changelog @@ -95,6 +95,10 @@ linux (3.2.37-1) UNRELEASED; urgency=low (Closes: #697903) * [armel/versatile,armhf/vexpress] i2c: Enable I2C, I2C_VERSATILE as modules (Closes: #696182) + * ext4: Fix corruption by hole punch in large files (Closes: #685726) + - rewrite punch hole to use ext4_ext_remove_space() + - fix hole punch failure when depth is greater than 0 + - fix kernel BUG on large-scale rm -rf commands [ Aurelien Jarno ] * [armhf/vexpress] Add kernel udebs. diff --git a/debian/patches/bugfix/all/ext4-fix-hole-punch-failure-when-depth-is-greater-th.patch b/debian/patches/bugfix/all/ext4-fix-hole-punch-failure-when-depth-is-greater-th.patch new file mode 100644 index 000000000..2004badd0 --- /dev/null +++ b/debian/patches/bugfix/all/ext4-fix-hole-punch-failure-when-depth-is-greater-th.patch @@ -0,0 +1,120 @@ +From: Ashish Sangwan +Date: Sun, 22 Jul 2012 22:49:08 -0400 +Subject: [PATCH 2/3] ext4: fix hole punch failure when depth is greater than 0 + +commit 968dee77220768a5f52cf8b21d0bdb73486febef upstream. + +Whether to continue removing extents or not is decided by the return +value of function ext4_ext_more_to_rm() which checks 2 conditions: +a) if there are no more indexes to process. +b) if the number of entries are decreased in the header of "depth -1". + +In case of hole punch, if the last block to be removed is not part of +the last extent index than this index will not be deleted, hence the +number of valid entries in the extent header of "depth - 1" will +remain as it is and ext4_ext_more_to_rm will return 0 although the +required blocks are not yet removed. + +This patch fixes the above mentioned problem as instead of removing +the extents from the end of file, it starts removing the blocks from +the particular extent from which removing blocks is actually required +and continue backward until done. + +Signed-off-by: Ashish Sangwan +Signed-off-by: Namjae Jeon +Reviewed-by: Lukas Czerner +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/extents.c | 46 +++++++++++++++++++++++++++++----------------- + 1 file changed, 29 insertions(+), 17 deletions(-) + +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -2518,10 +2518,10 @@ static int ext4_ext_remove_space(struct + { + struct super_block *sb = inode->i_sb; + int depth = ext_depth(inode); +- struct ext4_ext_path *path; ++ struct ext4_ext_path *path = NULL; + ext4_fsblk_t partial_cluster = 0; + handle_t *handle; +- int i, err; ++ int i = 0, err; + + ext_debug("truncate since %u to %u\n", start, end); + +@@ -2554,8 +2554,12 @@ again: + } + depth = ext_depth(inode); + ex = path[depth].p_ext; +- if (!ex) ++ if (!ex) { ++ ext4_ext_drop_refs(path); ++ kfree(path); ++ path = NULL; + goto cont; ++ } + + ee_block = le32_to_cpu(ex->ee_block); + +@@ -2585,8 +2589,6 @@ again: + if (err < 0) + goto out; + } +- ext4_ext_drop_refs(path); +- kfree(path); + } + cont: + +@@ -2595,19 +2597,27 @@ cont: + * after i_size and walking into the tree depth-wise. + */ + depth = ext_depth(inode); +- path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS); +- if (path == NULL) { +- ext4_journal_stop(handle); +- return -ENOMEM; +- } +- path[0].p_depth = depth; +- path[0].p_hdr = ext_inode_hdr(inode); +- +- if (ext4_ext_check(inode, path[0].p_hdr, depth)) { +- err = -EIO; +- goto out; ++ if (path) { ++ int k = i = depth; ++ while (--k > 0) ++ path[k].p_block = ++ le16_to_cpu(path[k].p_hdr->eh_entries)+1; ++ } else { ++ path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), ++ GFP_NOFS); ++ if (path == NULL) { ++ ext4_journal_stop(handle); ++ return -ENOMEM; ++ } ++ path[0].p_depth = depth; ++ path[0].p_hdr = ext_inode_hdr(inode); ++ ++ if (ext4_ext_check(inode, path[0].p_hdr, depth)) { ++ err = -EIO; ++ goto out; ++ } + } +- i = err = 0; ++ err = 0; + + while (i >= 0 && err == 0) { + if (i == depth) { +@@ -2721,8 +2731,10 @@ cont: + out: + ext4_ext_drop_refs(path); + kfree(path); +- if (err == -EAGAIN) ++ if (err == -EAGAIN) { ++ path = NULL; + goto again; ++ } + ext4_journal_stop(handle); + + return err; diff --git a/debian/patches/bugfix/all/ext4-fix-kernel-BUG-on-large-scale-rm-rf-commands.patch b/debian/patches/bugfix/all/ext4-fix-kernel-BUG-on-large-scale-rm-rf-commands.patch new file mode 100644 index 000000000..a841544eb --- /dev/null +++ b/debian/patches/bugfix/all/ext4-fix-kernel-BUG-on-large-scale-rm-rf-commands.patch @@ -0,0 +1,63 @@ +From: Theodore Ts'o +Date: Fri, 17 Aug 2012 08:54:52 -0400 +Subject: [PATCH 3/3] ext4: fix kernel BUG on large-scale rm -rf commands +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 89a4e48f8479f8145eca9698f39fe188c982212f upstream. + +Commit 968dee7722: "ext4: fix hole punch failure when depth is greater +than 0" introduced a regression in v3.5.1/v3.6-rc1 which caused kernel +crashes when users ran run "rm -rf" on large directory hierarchy on +ext4 filesystems on RAID devices: + + BUG: unable to handle kernel NULL pointer dereference at 0000000000000028 + + Process rm (pid: 18229, threadinfo ffff8801276bc000, task ffff880123631710) + Call Trace: + [] ? __ext4_handle_dirty_metadata+0x83/0x110 + [] ext4_ext_truncate+0x193/0x1d0 + [] ? ext4_mark_inode_dirty+0x7f/0x1f0 + [] ext4_truncate+0xf5/0x100 + [] ext4_evict_inode+0x461/0x490 + [] evict+0xa2/0x1a0 + [] iput+0x103/0x1f0 + [] do_unlinkat+0x154/0x1c0 + [] ? sys_newfstatat+0x2a/0x40 + [] sys_unlinkat+0x1b/0x50 + [] system_call_fastpath+0x16/0x1b + Code: 8b 4d 20 0f b7 41 02 48 8d 04 40 48 8d 04 81 49 89 45 18 0f b7 49 02 48 83 c1 01 49 89 4d 00 e9 ae f8 ff ff 0f 1f 00 49 8b 45 28 <48> 8b 40 28 49 89 45 20 e9 85 f8 ff ff 0f 1f 80 00 00 00 + + RIP [] ext4_ext_remove_space+0xa34/0xdf0 + +This could be reproduced as follows: + +The problem in commit 968dee7722 was that caused the variable 'i' to +be left uninitialized if the truncate required more space than was +available in the journal. This resulted in the function +ext4_ext_truncate_extend_restart() returning -EAGAIN, which caused +ext4_ext_remove_space() to restart the truncate operation after +starting a new jbd2 handle. + +Reported-by: Maciej Żenczykowski +Reported-by: Marti Raudsepp +Tested-by: Fengguang Wu +Signed-off-by: "Theodore Ts'o" +Cc: stable@vger.kernel.org +--- + fs/ext4/extents.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c +index 2b8b3c9..9169e11 100644 +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -2594,6 +2594,7 @@ cont: + } + path[0].p_depth = depth; + path[0].p_hdr = ext_inode_hdr(inode); ++ i = 0; + + if (ext4_ext_check(inode, path[0].p_hdr, depth)) { + err = -EIO; diff --git a/debian/patches/bugfix/all/ext4-rewrite-punch-hole-to-use-ext4_ext_remove_space.patch b/debian/patches/bugfix/all/ext4-rewrite-punch-hole-to-use-ext4_ext_remove_space.patch new file mode 100644 index 000000000..7c47655f4 --- /dev/null +++ b/debian/patches/bugfix/all/ext4-rewrite-punch-hole-to-use-ext4_ext_remove_space.patch @@ -0,0 +1,349 @@ +From: Lukas Czerner +Date: Mon, 19 Mar 2012 23:03:19 -0400 +Subject: [PATCH 1/3] ext4: rewrite punch hole to use ext4_ext_remove_space() + +commit 5f95d21fb6f2aaa52830e5b7fb405f6c71d3ab85 upstream. + +This commit rewrites ext4 punch hole implementation to use +ext4_ext_remove_space() instead of its home gown way of doing this via +ext4_ext_map_blocks(). There are several reasons for changing this. + +Firstly it is quite non obvious that punching hole needs to +ext4_ext_map_blocks() to punch a hole, especially given that this +function should map blocks, not unmap it. It also required a lot of new +code in ext4_ext_map_blocks(). + +Secondly the design of it is not very effective. The reason is that we +are trying to punch out blocks in ext4_ext_punch_hole() in opposite +direction than in ext4_ext_rm_leaf() which causes the ext4_ext_rm_leaf() +to iterate through the whole tree from the end to the start to find the +requested extent for every extent we are going to punch out. + +And finally the current implementation does not use the existing code, +but bring a lot of new code, which is IMO unnecessary since there +already is some infrastructure we can use. Specifically +ext4_ext_remove_space(). + +This commit changes ext4_ext_remove_space() to accept 'end' parameter so +we can not only truncate to the end of file, but also remove the space +in the middle of the file (punch a hole). Moreover, because the last +block to punch out, might be in the middle of the extent, we have to +split the extent at 'end + 1' so ext4_ext_rm_leaf() can easily either +remove the whole fist part of split extent, or change its size. + +ext4_ext_remove_space() is then used to actually remove the space +(extents) from within the hole, instead of ext4_ext_map_blocks(). + +Note that this also fix the issue with punch hole, where we would forget +to remove empty index blocks from the extent tree, resulting in double +free block error and file system corruption. This is simply because we +now use different code path, where this problem does not exist. + +This has been tested with fsx running for several days and xfstests, +plus xfstest #251 with '-o discard' run on the loop image (which +converts discard requestes into punch hole to the backing file). All of +it on 1K and 4K file system block size. + +Signed-off-by: Lukas Czerner +Signed-off-by: "Theodore Ts'o" +[bwh: Backported to 3.2.y: move EXT4_EXT_DATA_VALID{1,2} along with the + other extent splitting flags] +--- + fs/ext4/extents.c | 170 ++++++++++++++++++++++++++++-------------------------- + 1 file changed, 88 insertions(+), 82 deletions(-) + +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -45,6 +45,17 @@ + + #include + ++/* ++ * used by extent splitting. ++ */ ++#define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \ ++ due to ENOSPC */ ++#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ ++#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ ++ ++#define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */ ++#define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */ ++ + static int ext4_split_extent(handle_t *handle, + struct inode *inode, + struct ext4_ext_path *path, +@@ -52,6 +63,13 @@ static int ext4_split_extent(handle_t *h + int split_flag, + int flags); + ++static int ext4_split_extent_at(handle_t *handle, ++ struct inode *inode, ++ struct ext4_ext_path *path, ++ ext4_lblk_t split, ++ int split_flag, ++ int flags); ++ + static int ext4_ext_truncate_extend_restart(handle_t *handle, + struct inode *inode, + int needed) +@@ -2321,7 +2339,7 @@ ext4_ext_rm_leaf(handle_t *handle, struc + struct ext4_extent *ex; + + /* the header must be checked already in ext4_ext_remove_space() */ +- ext_debug("truncate since %u in leaf\n", start); ++ ext_debug("truncate since %u in leaf to %u\n", start, end); + if (!path[depth].p_hdr) + path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); + eh = path[depth].p_hdr; +@@ -2356,7 +2374,7 @@ ext4_ext_rm_leaf(handle_t *handle, struc + ext_debug(" border %u:%u\n", a, b); + + /* If this extent is beyond the end of the hole, skip it */ +- if (end <= ex_ee_block) { ++ if (end < ex_ee_block) { + ex--; + ex_ee_block = le32_to_cpu(ex->ee_block); + ex_ee_len = ext4_ext_get_actual_len(ex); +@@ -2495,7 +2513,8 @@ ext4_ext_more_to_rm(struct ext4_ext_path + return 1; + } + +-static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) ++static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, ++ ext4_lblk_t end) + { + struct super_block *sb = inode->i_sb; + int depth = ext_depth(inode); +@@ -2504,7 +2523,7 @@ static int ext4_ext_remove_space(struct + handle_t *handle; + int i, err; + +- ext_debug("truncate since %u\n", start); ++ ext_debug("truncate since %u to %u\n", start, end); + + /* probably first extent we're gonna free will be last in block */ + handle = ext4_journal_start(inode, depth + 1); +@@ -2517,6 +2536,61 @@ again: + trace_ext4_ext_remove_space(inode, start, depth); + + /* ++ * Check if we are removing extents inside the extent tree. If that ++ * is the case, we are going to punch a hole inside the extent tree ++ * so we have to check whether we need to split the extent covering ++ * the last block to remove so we can easily remove the part of it ++ * in ext4_ext_rm_leaf(). ++ */ ++ if (end < EXT_MAX_BLOCKS - 1) { ++ struct ext4_extent *ex; ++ ext4_lblk_t ee_block; ++ ++ /* find extent for this block */ ++ path = ext4_ext_find_extent(inode, end, NULL); ++ if (IS_ERR(path)) { ++ ext4_journal_stop(handle); ++ return PTR_ERR(path); ++ } ++ depth = ext_depth(inode); ++ ex = path[depth].p_ext; ++ if (!ex) ++ goto cont; ++ ++ ee_block = le32_to_cpu(ex->ee_block); ++ ++ /* ++ * See if the last block is inside the extent, if so split ++ * the extent at 'end' block so we can easily remove the ++ * tail of the first part of the split extent in ++ * ext4_ext_rm_leaf(). ++ */ ++ if (end >= ee_block && ++ end < ee_block + ext4_ext_get_actual_len(ex) - 1) { ++ int split_flag = 0; ++ ++ if (ext4_ext_is_uninitialized(ex)) ++ split_flag = EXT4_EXT_MARK_UNINIT1 | ++ EXT4_EXT_MARK_UNINIT2; ++ ++ /* ++ * Split the extent in two so that 'end' is the last ++ * block in the first new extent ++ */ ++ err = ext4_split_extent_at(handle, inode, path, ++ end + 1, split_flag, ++ EXT4_GET_BLOCKS_PRE_IO | ++ EXT4_GET_BLOCKS_PUNCH_OUT_EXT); ++ ++ if (err < 0) ++ goto out; ++ } ++ ext4_ext_drop_refs(path); ++ kfree(path); ++ } ++cont: ++ ++ /* + * We start scanning from right side, freeing all the blocks + * after i_size and walking into the tree depth-wise. + */ +@@ -2528,6 +2602,7 @@ again: + } + path[0].p_depth = depth; + path[0].p_hdr = ext_inode_hdr(inode); ++ + if (ext4_ext_check(inode, path[0].p_hdr, depth)) { + err = -EIO; + goto out; +@@ -2539,7 +2614,7 @@ again: + /* this is leaf block */ + err = ext4_ext_rm_leaf(handle, inode, path, + &partial_cluster, start, +- EXT_MAX_BLOCKS - 1); ++ end); + /* root level has p_bh == NULL, brelse() eats this */ + brelse(path[i].p_bh); + path[i].p_bh = NULL; +@@ -2722,17 +2797,6 @@ static int ext4_ext_zeroout(struct inode + } + + /* +- * used by extent splitting. +- */ +-#define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \ +- due to ENOSPC */ +-#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ +-#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ +- +-#define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */ +-#define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */ +- +-/* + * ext4_split_extent_at() splits an extent at given block. + * + * @handle: the journal handle +@@ -4274,7 +4338,7 @@ void ext4_ext_truncate(struct inode *ino + + last_block = (inode->i_size + sb->s_blocksize - 1) + >> EXT4_BLOCK_SIZE_BITS(sb); +- err = ext4_ext_remove_space(inode, last_block); ++ err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); + + /* In a multi-transaction truncate, we only make the final + * transaction synchronous. +@@ -4751,14 +4815,12 @@ int ext4_ext_punch_hole(struct file *fil + { + struct inode *inode = file->f_path.dentry->d_inode; + struct super_block *sb = inode->i_sb; +- struct ext4_ext_cache cache_ex; +- ext4_lblk_t first_block, last_block, num_blocks, iblock, max_blocks; ++ ext4_lblk_t first_block, stop_block; + struct address_space *mapping = inode->i_mapping; +- struct ext4_map_blocks map; + handle_t *handle; + loff_t first_page, last_page, page_len; + loff_t first_page_offset, last_page_offset; +- int ret, credits, blocks_released, err = 0; ++ int credits, err = 0; + + /* No need to punch hole beyond i_size */ + if (offset >= inode->i_size) +@@ -4774,10 +4836,6 @@ int ext4_ext_punch_hole(struct file *fil + offset; + } + +- first_block = (offset + sb->s_blocksize - 1) >> +- EXT4_BLOCK_SIZE_BITS(sb); +- last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); +- + first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + last_page = (offset + length) >> PAGE_CACHE_SHIFT; + +@@ -4856,7 +4914,6 @@ int ext4_ext_punch_hole(struct file *fil + } + } + +- + /* + * If i_size is contained in the last page, we need to + * unmap and zero the partial page after i_size +@@ -4876,73 +4933,22 @@ int ext4_ext_punch_hole(struct file *fil + } + } + ++ first_block = (offset + sb->s_blocksize - 1) >> ++ EXT4_BLOCK_SIZE_BITS(sb); ++ stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); ++ + /* If there are no blocks to remove, return now */ +- if (first_block >= last_block) ++ if (first_block >= stop_block) + goto out; + + down_write(&EXT4_I(inode)->i_data_sem); + ext4_ext_invalidate_cache(inode); + ext4_discard_preallocations(inode); + +- /* +- * Loop over all the blocks and identify blocks +- * that need to be punched out +- */ +- iblock = first_block; +- blocks_released = 0; +- while (iblock < last_block) { +- max_blocks = last_block - iblock; +- num_blocks = 1; +- memset(&map, 0, sizeof(map)); +- map.m_lblk = iblock; +- map.m_len = max_blocks; +- ret = ext4_ext_map_blocks(handle, inode, &map, +- EXT4_GET_BLOCKS_PUNCH_OUT_EXT); +- +- if (ret > 0) { +- blocks_released += ret; +- num_blocks = ret; +- } else if (ret == 0) { +- /* +- * If map blocks could not find the block, +- * then it is in a hole. If the hole was +- * not already cached, then map blocks should +- * put it in the cache. So we can get the hole +- * out of the cache +- */ +- memset(&cache_ex, 0, sizeof(cache_ex)); +- if ((ext4_ext_check_cache(inode, iblock, &cache_ex)) && +- !cache_ex.ec_start) { +- +- /* The hole is cached */ +- num_blocks = cache_ex.ec_block + +- cache_ex.ec_len - iblock; +- +- } else { +- /* The block could not be identified */ +- err = -EIO; +- break; +- } +- } else { +- /* Map blocks error */ +- err = ret; +- break; +- } ++ err = ext4_ext_remove_space(inode, first_block, stop_block - 1); + +- if (num_blocks == 0) { +- /* This condition should never happen */ +- ext_debug("Block lookup failed"); +- err = -EIO; +- break; +- } +- +- iblock += num_blocks; +- } +- +- if (blocks_released > 0) { +- ext4_ext_invalidate_cache(inode); +- ext4_discard_preallocations(inode); +- } ++ ext4_ext_invalidate_cache(inode); ++ ext4_discard_preallocations(inode); + + if (IS_SYNC(inode)) + ext4_handle_sync(handle); diff --git a/debian/patches/series b/debian/patches/series index afd9d1470..8cc1fc18d 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -468,3 +468,6 @@ bugfix/all/fs-cachefiles-add-support-for-large-files-in-filesys.patch bugfix/x86/xen-Fix-stack-corruption-in-xen_failsafe_callback-fo.patch bugfix/s390/s390-time-fix-sched_clock-overflow.patch bugfix/all/bridge-Pull-ip-header-into-skb-data-before-looking-i.patch +bugfix/all/ext4-rewrite-punch-hole-to-use-ext4_ext_remove_space.patch +bugfix/all/ext4-fix-hole-punch-failure-when-depth-is-greater-th.patch +bugfix/all/ext4-fix-kernel-BUG-on-large-scale-rm-rf-commands.patch