diff --git a/debian/changelog b/debian/changelog index 0e811e7fc..11bde0e67 100644 --- a/debian/changelog +++ b/debian/changelog @@ -23,6 +23,10 @@ linux-2.6 (2.6.32-2) UNRELEASED; urgency=low [ Aurelien Jarno ] * Add support for the sparc64 architecture. + [ dann frazier ] + * Add stable release 2.6.32.1: + - ext4: Fix insufficient checks in EXT4_IOC_MOVE_EXT (CVE-2009-4131) + -- Aurelien Jarno Sun, 13 Dec 2009 13:25:45 +0100 linux-2.6 (2.6.32-1) unstable; urgency=low diff --git a/debian/patches/bugfix/all/stable/2.6.32.1.patch b/debian/patches/bugfix/all/stable/2.6.32.1.patch new file mode 100644 index 000000000..ccaaa6199 --- /dev/null +++ b/debian/patches/bugfix/all/stable/2.6.32.1.patch @@ -0,0 +1,1834 @@ +diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt +index 6d94e06..af6885c 100644 +--- a/Documentation/filesystems/ext4.txt ++++ b/Documentation/filesystems/ext4.txt +@@ -153,8 +153,8 @@ journal_dev=devnum When the external journal device's major/minor numbers + identified through its new major/minor numbers encoded + in devnum. + +-noload Don't load the journal on mounting. Note that +- if the filesystem was not unmounted cleanly, ++norecovery Don't load the journal on mounting. Note that ++noload if the filesystem was not unmounted cleanly, + skipping the journal replay will lead to the + filesystem containing inconsistencies that can + lead to any number of problems. +@@ -353,6 +353,12 @@ noauto_da_alloc replacing existing files via patterns such as + system crashes before the delayed allocation + blocks are forced to disk. + ++discard Controls whether ext4 should issue discard/TRIM ++nodiscard(*) commands to the underlying block device when ++ blocks are freed. This is useful for SSD devices ++ and sparse/thinly-provisioned LUNs, but it is off ++ by default until sufficient testing has been done. ++ + Data Mode + ========= + There are 3 different data modes: +diff --git a/Makefile b/Makefile +index f5cdb72..d0d7e9c 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,7 +1,7 @@ + VERSION = 2 + PATCHLEVEL = 6 + SUBLEVEL = 32 +-EXTRAVERSION = ++EXTRAVERSION = .1 + NAME = Man-Eating Seals of Antiquity + + # *DOCUMENTATION* +diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c +index c968cc3..554626e 100644 +--- a/drivers/scsi/hosts.c ++++ b/drivers/scsi/hosts.c +@@ -180,14 +180,20 @@ void scsi_remove_host(struct Scsi_Host *shost) + EXPORT_SYMBOL(scsi_remove_host); + + /** +- * scsi_add_host - add a scsi host ++ * scsi_add_host_with_dma - add a scsi host with dma device + * @shost: scsi host pointer to add + * @dev: a struct device of type scsi class ++ * @dma_dev: dma device for the host ++ * ++ * Note: You rarely need to worry about this unless you're in a ++ * virtualised host environments, so use the simpler scsi_add_host() ++ * function instead. + * + * Return value: + * 0 on success / != 0 for error + **/ +-int scsi_add_host(struct Scsi_Host *shost, struct device *dev) ++int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev, ++ struct device *dma_dev) + { + struct scsi_host_template *sht = shost->hostt; + int error = -EINVAL; +@@ -207,6 +213,7 @@ int scsi_add_host(struct Scsi_Host *shost, struct device *dev) + + if (!shost->shost_gendev.parent) + shost->shost_gendev.parent = dev ? dev : &platform_bus; ++ shost->dma_dev = dma_dev; + + error = device_add(&shost->shost_gendev); + if (error) +@@ -262,7 +269,7 @@ int scsi_add_host(struct Scsi_Host *shost, struct device *dev) + fail: + return error; + } +-EXPORT_SYMBOL(scsi_add_host); ++EXPORT_SYMBOL(scsi_add_host_with_dma); + + static void scsi_host_dev_release(struct device *dev) + { +diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c +index 562d8ce..f913f1e 100644 +--- a/drivers/scsi/lpfc/lpfc_init.c ++++ b/drivers/scsi/lpfc/lpfc_init.c +@@ -2408,7 +2408,7 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev) + vport->els_tmofunc.function = lpfc_els_timeout; + vport->els_tmofunc.data = (unsigned long)vport; + +- error = scsi_add_host(shost, dev); ++ error = scsi_add_host_with_dma(shost, dev, &phba->pcidev->dev); + if (error) + goto out_put_shost; + +diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c +index a39addc..507ccc6 100644 +--- a/drivers/scsi/megaraid/megaraid_sas.c ++++ b/drivers/scsi/megaraid/megaraid_sas.c +@@ -3032,7 +3032,7 @@ megasas_mgmt_fw_ioctl(struct megasas_instance *instance, + int error = 0, i; + void *sense = NULL; + dma_addr_t sense_handle; +- u32 *sense_ptr; ++ unsigned long *sense_ptr; + + memset(kbuff_arr, 0, sizeof(kbuff_arr)); + +@@ -3109,7 +3109,7 @@ megasas_mgmt_fw_ioctl(struct megasas_instance *instance, + } + + sense_ptr = +- (u32 *) ((unsigned long)cmd->frame + ioc->sense_off); ++ (unsigned long *) ((unsigned long)cmd->frame + ioc->sense_off); + *sense_ptr = sense_handle; + } + +@@ -3140,8 +3140,8 @@ megasas_mgmt_fw_ioctl(struct megasas_instance *instance, + * sense_ptr points to the location that has the user + * sense buffer address + */ +- sense_ptr = (u32 *) ((unsigned long)ioc->frame.raw + +- ioc->sense_off); ++ sense_ptr = (unsigned long *) ((unsigned long)ioc->frame.raw + ++ ioc->sense_off); + + if (copy_to_user((void __user *)((unsigned long)(*sense_ptr)), + sense, ioc->sense_len)) { +diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c +index fbcb82a..21e2bc4 100644 +--- a/drivers/scsi/qla2xxx/qla_attr.c ++++ b/drivers/scsi/qla2xxx/qla_attr.c +@@ -1654,7 +1654,8 @@ qla24xx_vport_create(struct fc_vport *fc_vport, bool disable) + fc_vport_set_state(fc_vport, FC_VPORT_LINKDOWN); + } + +- if (scsi_add_host(vha->host, &fc_vport->dev)) { ++ if (scsi_add_host_with_dma(vha->host, &fc_vport->dev, ++ &ha->pdev->dev)) { + DEBUG15(printk("scsi(%ld): scsi_add_host failure for VP[%d].\n", + vha->host_no, vha->vp_idx)); + goto vport_create_failed_2; +diff --git a/drivers/scsi/scsi_lib_dma.c b/drivers/scsi/scsi_lib_dma.c +index ac6855c..dcd1285 100644 +--- a/drivers/scsi/scsi_lib_dma.c ++++ b/drivers/scsi/scsi_lib_dma.c +@@ -23,7 +23,7 @@ int scsi_dma_map(struct scsi_cmnd *cmd) + int nseg = 0; + + if (scsi_sg_count(cmd)) { +- struct device *dev = cmd->device->host->shost_gendev.parent; ++ struct device *dev = cmd->device->host->dma_dev; + + nseg = dma_map_sg(dev, scsi_sglist(cmd), scsi_sg_count(cmd), + cmd->sc_data_direction); +@@ -41,7 +41,7 @@ EXPORT_SYMBOL(scsi_dma_map); + void scsi_dma_unmap(struct scsi_cmnd *cmd) + { + if (scsi_sg_count(cmd)) { +- struct device *dev = cmd->device->host->shost_gendev.parent; ++ struct device *dev = cmd->device->host->dma_dev; + + dma_unmap_sg(dev, scsi_sglist(cmd), scsi_sg_count(cmd), + cmd->sc_data_direction); +diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c +index 1d04189..f3032c9 100644 +--- a/fs/ext4/balloc.c ++++ b/fs/ext4/balloc.c +@@ -761,7 +761,13 @@ static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb, + static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, + ext4_group_t group) + { +- return ext4_bg_has_super(sb, group) ? EXT4_SB(sb)->s_gdb_count : 0; ++ if (!ext4_bg_has_super(sb, group)) ++ return 0; ++ ++ if (EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG)) ++ return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg); ++ else ++ return EXT4_SB(sb)->s_gdb_count; + } + + /** +diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c +index 50784ef..dc79b75 100644 +--- a/fs/ext4/block_validity.c ++++ b/fs/ext4/block_validity.c +@@ -160,7 +160,7 @@ int ext4_setup_system_zone(struct super_block *sb) + if (ext4_bg_has_super(sb, i) && + ((i < 5) || ((i % flex_size) == 0))) + add_system_zone(sbi, ext4_group_first_block_no(sb, i), +- sbi->s_gdb_count + 1); ++ ext4_bg_num_gdb(sb, i) + 1); + gdp = ext4_get_group_desc(sb, i, NULL); + ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1); + if (ret) +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h +index 8825515..bd2a9dd 100644 +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -703,6 +703,13 @@ struct ext4_inode_info { + struct list_head i_aio_dio_complete_list; + /* current io_end structure for async DIO write*/ + ext4_io_end_t *cur_aio_dio; ++ ++ /* ++ * Transactions that contain inode's metadata needed to complete ++ * fsync and fdatasync, respectively. ++ */ ++ tid_t i_sync_tid; ++ tid_t i_datasync_tid; + }; + + /* +@@ -750,6 +757,7 @@ struct ext4_inode_info { + #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ + #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ + #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ ++#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ + + #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt + #define set_opt(o, opt) o |= EXT4_MOUNT_##opt +diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h +index a286598..1892a77 100644 +--- a/fs/ext4/ext4_jbd2.h ++++ b/fs/ext4/ext4_jbd2.h +@@ -49,7 +49,7 @@ + + #define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \ + EXT4_XATTR_TRANS_BLOCKS - 2 + \ +- 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) ++ EXT4_MAXQUOTAS_TRANS_BLOCKS(sb)) + + /* + * Define the number of metadata blocks we need to account to modify data. +@@ -57,7 +57,7 @@ + * This include super block, inode block, quota blocks and xattr blocks + */ + #define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \ +- 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) ++ EXT4_MAXQUOTAS_TRANS_BLOCKS(sb)) + + /* Delete operations potentially hit one directory's namespace plus an + * entire inode, plus arbitrary amounts of bitmap/indirection data. Be +@@ -92,6 +92,7 @@ + * but inode, sb and group updates are done only once */ + #define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ + (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0) ++ + #define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\ + (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0) + #else +@@ -99,6 +100,9 @@ + #define EXT4_QUOTA_INIT_BLOCKS(sb) 0 + #define EXT4_QUOTA_DEL_BLOCKS(sb) 0 + #endif ++#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb)) ++#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) ++#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) + + int + ext4_mark_iloc_dirty(handle_t *handle, +@@ -254,6 +258,19 @@ static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode) + return 0; + } + ++static inline void ext4_update_inode_fsync_trans(handle_t *handle, ++ struct inode *inode, ++ int datasync) ++{ ++ struct ext4_inode_info *ei = EXT4_I(inode); ++ ++ if (ext4_handle_valid(handle)) { ++ ei->i_sync_tid = handle->h_transaction->t_tid; ++ if (datasync) ++ ei->i_datasync_tid = handle->h_transaction->t_tid; ++ } ++} ++ + /* super.c */ + int ext4_force_commit(struct super_block *sb); + +diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c +index 715264b..8b8bae4 100644 +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -1761,7 +1761,9 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, + while (block < last && block != EXT_MAX_BLOCK) { + num = last - block; + /* find extent for this block */ ++ down_read(&EXT4_I(inode)->i_data_sem); + path = ext4_ext_find_extent(inode, block, path); ++ up_read(&EXT4_I(inode)->i_data_sem); + if (IS_ERR(path)) { + err = PTR_ERR(path); + path = NULL; +@@ -2074,7 +2076,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, + ext_debug("free last %u blocks starting %llu\n", num, start); + for (i = 0; i < num; i++) { + bh = sb_find_get_block(inode->i_sb, start + i); +- ext4_forget(handle, 0, inode, bh, start + i); ++ ext4_forget(handle, metadata, inode, bh, start + i); + } + ext4_free_blocks(handle, inode, start, num, metadata); + } else if (from == le32_to_cpu(ex->ee_block) +@@ -2167,7 +2169,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, + correct_index = 1; + credits += (ext_depth(inode)) + 1; + } +- credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); ++ credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb); + + err = ext4_ext_truncate_extend_restart(handle, inode, credits); + if (err) +@@ -3064,6 +3066,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, + if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) { + ret = ext4_convert_unwritten_extents_dio(handle, inode, + path); ++ if (ret >= 0) ++ ext4_update_inode_fsync_trans(handle, inode, 1); + goto out2; + } + /* buffered IO case */ +@@ -3091,6 +3095,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, + ret = ext4_ext_convert_to_initialized(handle, inode, + path, iblock, + max_blocks); ++ if (ret >= 0) ++ ext4_update_inode_fsync_trans(handle, inode, 1); + out: + if (ret <= 0) { + err = ret; +@@ -3329,10 +3335,16 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, + allocated = ext4_ext_get_actual_len(&newex); + set_buffer_new(bh_result); + +- /* Cache only when it is _not_ an uninitialized extent */ +- if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) ++ /* ++ * Cache the extent and update transaction to commit on fdatasync only ++ * when it is _not_ an uninitialized extent. ++ */ ++ if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) { + ext4_ext_put_in_cache(inode, iblock, allocated, newblock, + EXT4_EXT_CACHE_EXTENT); ++ ext4_update_inode_fsync_trans(handle, inode, 1); ++ } else ++ ext4_update_inode_fsync_trans(handle, inode, 0); + out: + if (allocated > max_blocks) + allocated = max_blocks; +@@ -3720,10 +3732,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + * Walk the extent tree gathering extent information. + * ext4_ext_fiemap_cb will push extents back to user. + */ +- down_read(&EXT4_I(inode)->i_data_sem); + error = ext4_ext_walk_space(inode, start_blk, len_blks, + ext4_ext_fiemap_cb, fieinfo); +- up_read(&EXT4_I(inode)->i_data_sem); + } + + return error; +diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c +index 2b15312..d6049e4 100644 +--- a/fs/ext4/fsync.c ++++ b/fs/ext4/fsync.c +@@ -51,25 +51,30 @@ + int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) + { + struct inode *inode = dentry->d_inode; ++ struct ext4_inode_info *ei = EXT4_I(inode); + journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; +- int err, ret = 0; ++ int ret; ++ tid_t commit_tid; + + J_ASSERT(ext4_journal_current_handle() == NULL); + + trace_ext4_sync_file(file, dentry, datasync); + ++ if (inode->i_sb->s_flags & MS_RDONLY) ++ return 0; ++ + ret = flush_aio_dio_completed_IO(inode); + if (ret < 0) +- goto out; ++ return ret; ++ ++ if (!journal) ++ return simple_fsync(file, dentry, datasync); ++ + /* +- * data=writeback: ++ * data=writeback,ordered: + * The caller's filemap_fdatawrite()/wait will sync the data. +- * sync_inode() will sync the metadata +- * +- * data=ordered: +- * The caller's filemap_fdatawrite() will write the data and +- * sync_inode() will write the inode if it is dirty. Then the caller's +- * filemap_fdatawait() will wait on the pages. ++ * Metadata is in the journal, we wait for proper transaction to ++ * commit here. + * + * data=journal: + * filemap_fdatawrite won't do anything (the buffers are clean). +@@ -79,32 +84,13 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) + * (they were dirtied by commit). But that's OK - the blocks are + * safe in-journal, which is all fsync() needs to ensure. + */ +- if (ext4_should_journal_data(inode)) { +- ret = ext4_force_commit(inode->i_sb); +- goto out; +- } ++ if (ext4_should_journal_data(inode)) ++ return ext4_force_commit(inode->i_sb); + +- if (!journal) +- ret = sync_mapping_buffers(inode->i_mapping); +- +- if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) +- goto out; +- +- /* +- * The VFS has written the file data. If the inode is unaltered +- * then we need not start a commit. +- */ +- if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) { +- struct writeback_control wbc = { +- .sync_mode = WB_SYNC_ALL, +- .nr_to_write = 0, /* sys_fsync did this */ +- }; +- err = sync_inode(inode, &wbc); +- if (ret == 0) +- ret = err; +- } +-out: +- if (journal && (journal->j_flags & JBD2_BARRIER)) ++ commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; ++ if (jbd2_log_start_commit(journal, commit_tid)) ++ jbd2_log_wait_commit(journal, commit_tid); ++ else if (journal->j_flags & JBD2_BARRIER) + blkdev_issue_flush(inode->i_sb->s_bdev, NULL); + return ret; + } +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index 2c8caa5..1dae9a4 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -1021,10 +1021,12 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, + if (!err) + err = ext4_splice_branch(handle, inode, iblock, + partial, indirect_blks, count); +- else ++ if (err) + goto cleanup; + + set_buffer_new(bh_result); ++ ++ ext4_update_inode_fsync_trans(handle, inode, 1); + got_it: + map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); + if (count > blocks_to_boundary) +@@ -1052,7 +1054,7 @@ qsize_t ext4_get_reserved_space(struct inode *inode) + EXT4_I(inode)->i_reserved_meta_blocks; + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); + +- return total; ++ return (total << inode->i_blkbits); + } + /* + * Calculate the number of metadata blocks need to reserve +@@ -1534,6 +1536,16 @@ static int do_journal_get_write_access(handle_t *handle, + return ext4_journal_get_write_access(handle, bh); + } + ++/* ++ * Truncate blocks that were not used by write. We have to truncate the ++ * pagecache as well so that corresponding buffers get properly unmapped. ++ */ ++static void ext4_truncate_failed_write(struct inode *inode) ++{ ++ truncate_inode_pages(inode->i_mapping, inode->i_size); ++ ext4_truncate(inode); ++} ++ + static int ext4_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +@@ -1599,7 +1611,7 @@ retry: + + ext4_journal_stop(handle); + if (pos + len > inode->i_size) { +- ext4_truncate(inode); ++ ext4_truncate_failed_write(inode); + /* + * If truncate failed early the inode might + * still be on the orphan list; we need to +@@ -1709,7 +1721,7 @@ static int ext4_ordered_write_end(struct file *file, + ret = ret2; + + if (pos + len > inode->i_size) { +- ext4_truncate(inode); ++ ext4_truncate_failed_write(inode); + /* + * If truncate failed early the inode might still be + * on the orphan list; we need to make sure the inode +@@ -1751,7 +1763,7 @@ static int ext4_writeback_write_end(struct file *file, + ret = ret2; + + if (pos + len > inode->i_size) { +- ext4_truncate(inode); ++ ext4_truncate_failed_write(inode); + /* + * If truncate failed early the inode might still be + * on the orphan list; we need to make sure the inode +@@ -1814,7 +1826,7 @@ static int ext4_journalled_write_end(struct file *file, + if (!ret) + ret = ret2; + if (pos + len > inode->i_size) { +- ext4_truncate(inode); ++ ext4_truncate_failed_write(inode); + /* + * If truncate failed early the inode might still be + * on the orphan list; we need to make sure the inode +@@ -2788,7 +2800,7 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) + * number of contiguous block. So we will limit + * number of contiguous block to a sane value + */ +- if (!(inode->i_flags & EXT4_EXTENTS_FL) && ++ if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) && + (max_blocks > EXT4_MAX_TRANS_DATA)) + max_blocks = EXT4_MAX_TRANS_DATA; + +@@ -3091,7 +3103,7 @@ retry: + * i_size_read because we hold i_mutex. + */ + if (pos + len > inode->i_size) +- ext4_truncate(inode); ++ ext4_truncate_failed_write(inode); + } + + if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) +@@ -4120,6 +4132,8 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, + __le32 *last) + { + __le32 *p; ++ int is_metadata = S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode); ++ + if (try_to_extend_transaction(handle, inode)) { + if (bh) { + BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); +@@ -4150,11 +4164,11 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, + + *p = 0; + tbh = sb_find_get_block(inode->i_sb, nr); +- ext4_forget(handle, 0, inode, tbh, nr); ++ ext4_forget(handle, is_metadata, inode, tbh, nr); + } + } + +- ext4_free_blocks(handle, inode, block_to_free, count, 0); ++ ext4_free_blocks(handle, inode, block_to_free, count, is_metadata); + } + + /** +@@ -4781,8 +4795,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) + struct ext4_iloc iloc; + struct ext4_inode *raw_inode; + struct ext4_inode_info *ei; +- struct buffer_head *bh; + struct inode *inode; ++ journal_t *journal = EXT4_SB(sb)->s_journal; + long ret; + int block; + +@@ -4793,11 +4807,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) + return inode; + + ei = EXT4_I(inode); ++ iloc.bh = 0; + + ret = __ext4_get_inode_loc(inode, &iloc, 0); + if (ret < 0) + goto bad_inode; +- bh = iloc.bh; + raw_inode = ext4_raw_inode(&iloc); + inode->i_mode = le16_to_cpu(raw_inode->i_mode); + inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); +@@ -4820,7 +4834,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) + if (inode->i_mode == 0 || + !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { + /* this inode is deleted */ +- brelse(bh); + ret = -ESTALE; + goto bad_inode; + } +@@ -4848,11 +4861,35 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) + ei->i_data[block] = raw_inode->i_block[block]; + INIT_LIST_HEAD(&ei->i_orphan); + ++ /* ++ * Set transaction id's of transactions that have to be committed ++ * to finish f[data]sync. We set them to currently running transaction ++ * as we cannot be sure that the inode or some of its metadata isn't ++ * part of the transaction - the inode could have been reclaimed and ++ * now it is reread from disk. ++ */ ++ if (journal) { ++ transaction_t *transaction; ++ tid_t tid; ++ ++ spin_lock(&journal->j_state_lock); ++ if (journal->j_running_transaction) ++ transaction = journal->j_running_transaction; ++ else ++ transaction = journal->j_committing_transaction; ++ if (transaction) ++ tid = transaction->t_tid; ++ else ++ tid = journal->j_commit_sequence; ++ spin_unlock(&journal->j_state_lock); ++ ei->i_sync_tid = tid; ++ ei->i_datasync_tid = tid; ++ } ++ + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { + ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); + if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > + EXT4_INODE_SIZE(inode->i_sb)) { +- brelse(bh); + ret = -EIO; + goto bad_inode; + } +@@ -4884,10 +4921,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) + + ret = 0; + if (ei->i_file_acl && +- ((ei->i_file_acl < +- (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) + +- EXT4_SB(sb)->s_gdb_count)) || +- (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) { ++ !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { + ext4_error(sb, __func__, + "bad extended attribute block %llu in inode #%lu", + ei->i_file_acl, inode->i_ino); +@@ -4905,10 +4939,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) + /* Validate block references which are part of inode */ + ret = ext4_check_inode_blockref(inode); + } +- if (ret) { +- brelse(bh); ++ if (ret) + goto bad_inode; +- } + + if (S_ISREG(inode->i_mode)) { + inode->i_op = &ext4_file_inode_operations; +@@ -4936,7 +4968,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) + init_special_inode(inode, inode->i_mode, + new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); + } else { +- brelse(bh); + ret = -EIO; + ext4_error(inode->i_sb, __func__, + "bogus i_mode (%o) for inode=%lu", +@@ -4949,6 +4980,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) + return inode; + + bad_inode: ++ brelse(iloc.bh); + iget_failed(inode); + return ERR_PTR(ret); + } +@@ -5108,6 +5140,7 @@ static int ext4_do_update_inode(handle_t *handle, + err = rc; + ei->i_state &= ~EXT4_STATE_NEW; + ++ ext4_update_inode_fsync_trans(handle, inode, 0); + out_brelse: + brelse(bh); + ext4_std_error(inode->i_sb, err); +@@ -5227,8 +5260,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) + + /* (user+group)*(old+new) structure, inode write (sb, + * inode block, ? - but truncate inode update has it) */ +- handle = ext4_journal_start(inode, 2*(EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)+ +- EXT4_QUOTA_DEL_BLOCKS(inode->i_sb))+3); ++ handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+ ++ EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3); + if (IS_ERR(handle)) { + error = PTR_ERR(handle); + goto err_out; +diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c +index c1cdf61..b63d193 100644 +--- a/fs/ext4/ioctl.c ++++ b/fs/ext4/ioctl.c +@@ -221,31 +221,38 @@ setversion_out: + struct file *donor_filp; + int err; + ++ if (!(filp->f_mode & FMODE_READ) || ++ !(filp->f_mode & FMODE_WRITE)) ++ return -EBADF; ++ + if (copy_from_user(&me, + (struct move_extent __user *)arg, sizeof(me))) + return -EFAULT; ++ me.moved_len = 0; + + donor_filp = fget(me.donor_fd); + if (!donor_filp) + return -EBADF; + +- if (!capable(CAP_DAC_OVERRIDE)) { +- if ((current->real_cred->fsuid != inode->i_uid) || +- !(inode->i_mode & S_IRUSR) || +- !(donor_filp->f_dentry->d_inode->i_mode & +- S_IRUSR)) { +- fput(donor_filp); +- return -EACCES; +- } ++ if (!(donor_filp->f_mode & FMODE_WRITE)) { ++ err = -EBADF; ++ goto mext_out; + } + ++ err = mnt_want_write(filp->f_path.mnt); ++ if (err) ++ goto mext_out; ++ + err = ext4_move_extents(filp, donor_filp, me.orig_start, + me.donor_start, me.len, &me.moved_len); +- fput(donor_filp); ++ mnt_drop_write(filp->f_path.mnt); ++ if (me.moved_len > 0) ++ file_remove_suid(donor_filp); + + if (copy_to_user((struct move_extent *)arg, &me, sizeof(me))) +- return -EFAULT; +- ++ err = -EFAULT; ++mext_out: ++ fput(donor_filp); + return err; + } + +diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c +index bba1282..7d71148 100644 +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -2529,7 +2529,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) + struct ext4_group_info *db; + int err, count = 0, count2 = 0; + struct ext4_free_data *entry; +- ext4_fsblk_t discard_block; + struct list_head *l, *ltmp; + + list_for_each_safe(l, ltmp, &txn->t_private_list) { +@@ -2559,13 +2558,19 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) + page_cache_release(e4b.bd_bitmap_page); + } + ext4_unlock_group(sb, entry->group); +- discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb) +- + entry->start_blk +- + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); +- trace_ext4_discard_blocks(sb, (unsigned long long)discard_block, +- entry->count); +- sb_issue_discard(sb, discard_block, entry->count); +- ++ if (test_opt(sb, DISCARD)) { ++ ext4_fsblk_t discard_block; ++ struct ext4_super_block *es = EXT4_SB(sb)->s_es; ++ ++ discard_block = (ext4_fsblk_t)entry->group * ++ EXT4_BLOCKS_PER_GROUP(sb) ++ + entry->start_blk ++ + le32_to_cpu(es->s_first_data_block); ++ trace_ext4_discard_blocks(sb, ++ (unsigned long long)discard_block, ++ entry->count); ++ sb_issue_discard(sb, discard_block, entry->count); ++ } + kmem_cache_free(ext4_free_ext_cachep, entry); + ext4_mb_release_desc(&e4b); + } +@@ -3006,6 +3011,24 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac) + } + + /* ++ * Called on failure; free up any blocks from the inode PA for this ++ * context. We don't need this for MB_GROUP_PA because we only change ++ * pa_free in ext4_mb_release_context(), but on failure, we've already ++ * zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed. ++ */ ++static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac) ++{ ++ struct ext4_prealloc_space *pa = ac->ac_pa; ++ int len; ++ ++ if (pa && pa->pa_type == MB_INODE_PA) { ++ len = ac->ac_b_ex.fe_len; ++ pa->pa_free += len; ++ } ++ ++} ++ ++/* + * use blocks preallocated to inode + */ + static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, +@@ -4290,6 +4313,7 @@ repeat: + ac->ac_status = AC_STATUS_CONTINUE; + goto repeat; + } else if (*errp) { ++ ext4_discard_allocated_blocks(ac); + ac->ac_b_ex.fe_len = 0; + ar->len = 0; + ext4_mb_show_ac(ac); +diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c +index a93d5b8..8646149 100644 +--- a/fs/ext4/migrate.c ++++ b/fs/ext4/migrate.c +@@ -238,7 +238,7 @@ static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode) + * So allocate a credit of 3. We may update + * quota (user and group). + */ +- needed = 3 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); ++ needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb); + + if (ext4_journal_extend(handle, needed) != 0) + retval = ext4_journal_restart(handle, needed); +@@ -477,7 +477,7 @@ int ext4_ext_migrate(struct inode *inode) + handle = ext4_journal_start(inode, + EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + +- 2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb) ++ EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb) + + 1); + if (IS_ERR(handle)) { + retval = PTR_ERR(handle); +diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c +index 25b6b14..f5b03a1 100644 +--- a/fs/ext4/move_extent.c ++++ b/fs/ext4/move_extent.c +@@ -77,12 +77,14 @@ static int + mext_next_extent(struct inode *inode, struct ext4_ext_path *path, + struct ext4_extent **extent) + { ++ struct ext4_extent_header *eh; + int ppos, leaf_ppos = path->p_depth; + + ppos = leaf_ppos; + if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { + /* leaf block */ + *extent = ++path[ppos].p_ext; ++ path[ppos].p_block = ext_pblock(path[ppos].p_ext); + return 0; + } + +@@ -119,9 +121,18 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, + ext_block_hdr(path[cur_ppos+1].p_bh); + } + ++ path[leaf_ppos].p_ext = *extent = NULL; ++ ++ eh = path[leaf_ppos].p_hdr; ++ if (le16_to_cpu(eh->eh_entries) == 0) ++ /* empty leaf is found */ ++ return -ENODATA; ++ + /* leaf block */ + path[leaf_ppos].p_ext = *extent = + EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); ++ path[leaf_ppos].p_block = ++ ext_pblock(path[leaf_ppos].p_ext); + return 0; + } + } +@@ -155,40 +166,15 @@ mext_check_null_inode(struct inode *inode1, struct inode *inode2, + } + + /** +- * mext_double_down_read - Acquire two inodes' read semaphore +- * +- * @orig_inode: original inode structure +- * @donor_inode: donor inode structure +- * Acquire read semaphore of the two inodes (orig and donor) by i_ino order. +- */ +-static void +-mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode) +-{ +- struct inode *first = orig_inode, *second = donor_inode; +- +- /* +- * Use the inode number to provide the stable locking order instead +- * of its address, because the C language doesn't guarantee you can +- * compare pointers that don't come from the same array. +- */ +- if (donor_inode->i_ino < orig_inode->i_ino) { +- first = donor_inode; +- second = orig_inode; +- } +- +- down_read(&EXT4_I(first)->i_data_sem); +- down_read(&EXT4_I(second)->i_data_sem); +-} +- +-/** +- * mext_double_down_write - Acquire two inodes' write semaphore ++ * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem + * + * @orig_inode: original inode structure + * @donor_inode: donor inode structure +- * Acquire write semaphore of the two inodes (orig and donor) by i_ino order. ++ * Acquire write lock of i_data_sem of the two inodes (orig and donor) by ++ * i_ino order. + */ + static void +-mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode) ++double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode) + { + struct inode *first = orig_inode, *second = donor_inode; + +@@ -203,32 +189,18 @@ mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode) + } + + down_write(&EXT4_I(first)->i_data_sem); +- down_write(&EXT4_I(second)->i_data_sem); ++ down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING); + } + + /** +- * mext_double_up_read - Release two inodes' read semaphore ++ * double_up_write_data_sem - Release two inodes' write lock of i_data_sem + * + * @orig_inode: original inode structure to be released its lock first + * @donor_inode: donor inode structure to be released its lock second +- * Release read semaphore of two inodes (orig and donor). ++ * Release write lock of i_data_sem of two inodes (orig and donor). + */ + static void +-mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode) +-{ +- up_read(&EXT4_I(orig_inode)->i_data_sem); +- up_read(&EXT4_I(donor_inode)->i_data_sem); +-} +- +-/** +- * mext_double_up_write - Release two inodes' write semaphore +- * +- * @orig_inode: original inode structure to be released its lock first +- * @donor_inode: donor inode structure to be released its lock second +- * Release write semaphore of two inodes (orig and donor). +- */ +-static void +-mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode) ++double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode) + { + up_write(&EXT4_I(orig_inode)->i_data_sem); + up_write(&EXT4_I(donor_inode)->i_data_sem); +@@ -661,6 +633,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, + * @donor_inode: donor inode + * @from: block offset of orig_inode + * @count: block count to be replaced ++ * @err: pointer to save return value + * + * Replace original inode extents and donor inode extents page by page. + * We implement this replacement in the following three steps: +@@ -671,33 +644,33 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, + * 3. Change the block information of donor inode to point at the saved + * original inode blocks in the dummy extents. + * +- * Return 0 on success, or a negative error value on failure. ++ * Return replaced block count. + */ + static int + mext_replace_branches(handle_t *handle, struct inode *orig_inode, + struct inode *donor_inode, ext4_lblk_t from, +- ext4_lblk_t count) ++ ext4_lblk_t count, int *err) + { + struct ext4_ext_path *orig_path = NULL; + struct ext4_ext_path *donor_path = NULL; + struct ext4_extent *oext, *dext; + struct ext4_extent tmp_dext, tmp_oext; + ext4_lblk_t orig_off = from, donor_off = from; +- int err = 0; + int depth; + int replaced_count = 0; + int dext_alen; + +- mext_double_down_write(orig_inode, donor_inode); ++ /* Protect extent trees against block allocations via delalloc */ ++ double_down_write_data_sem(orig_inode, donor_inode); + + /* Get the original extent for the block "orig_off" */ +- err = get_ext_path(orig_inode, orig_off, &orig_path); +- if (err) ++ *err = get_ext_path(orig_inode, orig_off, &orig_path); ++ if (*err) + goto out; + + /* Get the donor extent for the head */ +- err = get_ext_path(donor_inode, donor_off, &donor_path); +- if (err) ++ *err = get_ext_path(donor_inode, donor_off, &donor_path); ++ if (*err) + goto out; + depth = ext_depth(orig_inode); + oext = orig_path[depth].p_ext; +@@ -707,9 +680,9 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, + dext = donor_path[depth].p_ext; + tmp_dext = *dext; + +- err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, ++ *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, + donor_off, count); +- if (err) ++ if (*err) + goto out; + + /* Loop for the donor extents */ +@@ -718,7 +691,7 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, + if (!dext) { + ext4_error(donor_inode->i_sb, __func__, + "The extent for donor must be found"); +- err = -EIO; ++ *err = -EIO; + goto out; + } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { + ext4_error(donor_inode->i_sb, __func__, +@@ -726,20 +699,20 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, + "extent(%u) should be equal", + donor_off, + le32_to_cpu(tmp_dext.ee_block)); +- err = -EIO; ++ *err = -EIO; + goto out; + } + + /* Set donor extent to orig extent */ +- err = mext_leaf_block(handle, orig_inode, ++ *err = mext_leaf_block(handle, orig_inode, + orig_path, &tmp_dext, &orig_off); +- if (err < 0) ++ if (*err) + goto out; + + /* Set orig extent to donor extent */ +- err = mext_leaf_block(handle, donor_inode, ++ *err = mext_leaf_block(handle, donor_inode, + donor_path, &tmp_oext, &donor_off); +- if (err < 0) ++ if (*err) + goto out; + + dext_alen = ext4_ext_get_actual_len(&tmp_dext); +@@ -753,35 +726,25 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, + + if (orig_path) + ext4_ext_drop_refs(orig_path); +- err = get_ext_path(orig_inode, orig_off, &orig_path); +- if (err) ++ *err = get_ext_path(orig_inode, orig_off, &orig_path); ++ if (*err) + goto out; + depth = ext_depth(orig_inode); + oext = orig_path[depth].p_ext; +- if (le32_to_cpu(oext->ee_block) + +- ext4_ext_get_actual_len(oext) <= orig_off) { +- err = 0; +- goto out; +- } + tmp_oext = *oext; + + if (donor_path) + ext4_ext_drop_refs(donor_path); +- err = get_ext_path(donor_inode, donor_off, &donor_path); +- if (err) ++ *err = get_ext_path(donor_inode, donor_off, &donor_path); ++ if (*err) + goto out; + depth = ext_depth(donor_inode); + dext = donor_path[depth].p_ext; +- if (le32_to_cpu(dext->ee_block) + +- ext4_ext_get_actual_len(dext) <= donor_off) { +- err = 0; +- goto out; +- } + tmp_dext = *dext; + +- err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, ++ *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, + donor_off, count - replaced_count); +- if (err) ++ if (*err) + goto out; + } + +@@ -795,8 +758,12 @@ out: + kfree(donor_path); + } + +- mext_double_up_write(orig_inode, donor_inode); +- return err; ++ ext4_ext_invalidate_cache(orig_inode); ++ ext4_ext_invalidate_cache(donor_inode); ++ ++ double_up_write_data_sem(orig_inode, donor_inode); ++ ++ return replaced_count; + } + + /** +@@ -808,16 +775,17 @@ out: + * @data_offset_in_page: block index where data swapping starts + * @block_len_in_page: the number of blocks to be swapped + * @uninit: orig extent is uninitialized or not ++ * @err: pointer to save return value + * + * Save the data in original inode blocks and replace original inode extents + * with donor inode extents by calling mext_replace_branches(). +- * Finally, write out the saved data in new original inode blocks. Return 0 +- * on success, or a negative error value on failure. ++ * Finally, write out the saved data in new original inode blocks. Return ++ * replaced block count. + */ + static int + move_extent_per_page(struct file *o_filp, struct inode *donor_inode, + pgoff_t orig_page_offset, int data_offset_in_page, +- int block_len_in_page, int uninit) ++ int block_len_in_page, int uninit, int *err) + { + struct inode *orig_inode = o_filp->f_dentry->d_inode; + struct address_space *mapping = orig_inode->i_mapping; +@@ -829,9 +797,11 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, + long long offs = orig_page_offset << PAGE_CACHE_SHIFT; + unsigned long blocksize = orig_inode->i_sb->s_blocksize; + unsigned int w_flags = 0; +- unsigned int tmp_data_len, data_len; ++ unsigned int tmp_data_size, data_size, replaced_size; + void *fsdata; +- int ret, i, jblocks; ++ int i, jblocks; ++ int err2 = 0; ++ int replaced_count = 0; + int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; + + /* +@@ -841,8 +811,8 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, + jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; + handle = ext4_journal_start(orig_inode, jblocks); + if (IS_ERR(handle)) { +- ret = PTR_ERR(handle); +- return ret; ++ *err = PTR_ERR(handle); ++ return 0; + } + + if (segment_eq(get_fs(), KERNEL_DS)) +@@ -858,39 +828,36 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, + * Just swap data blocks between orig and donor. + */ + if (uninit) { +- ret = mext_replace_branches(handle, orig_inode, +- donor_inode, orig_blk_offset, +- block_len_in_page); +- +- /* Clear the inode cache not to refer to the old data */ +- ext4_ext_invalidate_cache(orig_inode); +- ext4_ext_invalidate_cache(donor_inode); ++ replaced_count = mext_replace_branches(handle, orig_inode, ++ donor_inode, orig_blk_offset, ++ block_len_in_page, err); + goto out2; + } + + offs = (long long)orig_blk_offset << orig_inode->i_blkbits; + +- /* Calculate data_len */ ++ /* Calculate data_size */ + if ((orig_blk_offset + block_len_in_page - 1) == + ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { + /* Replace the last block */ +- tmp_data_len = orig_inode->i_size & (blocksize - 1); ++ tmp_data_size = orig_inode->i_size & (blocksize - 1); + /* +- * If data_len equal zero, it shows data_len is multiples of ++ * If data_size equal zero, it shows data_size is multiples of + * blocksize. So we set appropriate value. + */ +- if (tmp_data_len == 0) +- tmp_data_len = blocksize; ++ if (tmp_data_size == 0) ++ tmp_data_size = blocksize; + +- data_len = tmp_data_len + ++ data_size = tmp_data_size + + ((block_len_in_page - 1) << orig_inode->i_blkbits); +- } else { +- data_len = block_len_in_page << orig_inode->i_blkbits; +- } ++ } else ++ data_size = block_len_in_page << orig_inode->i_blkbits; ++ ++ replaced_size = data_size; + +- ret = a_ops->write_begin(o_filp, mapping, offs, data_len, w_flags, ++ *err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags, + &page, &fsdata); +- if (unlikely(ret < 0)) ++ if (unlikely(*err < 0)) + goto out; + + if (!PageUptodate(page)) { +@@ -911,14 +878,17 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, + /* Release old bh and drop refs */ + try_to_release_page(page, 0); + +- ret = mext_replace_branches(handle, orig_inode, donor_inode, +- orig_blk_offset, block_len_in_page); +- if (ret < 0) +- goto out; +- +- /* Clear the inode cache not to refer to the old data */ +- ext4_ext_invalidate_cache(orig_inode); +- ext4_ext_invalidate_cache(donor_inode); ++ replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, ++ orig_blk_offset, block_len_in_page, ++ &err2); ++ if (err2) { ++ if (replaced_count) { ++ block_len_in_page = replaced_count; ++ replaced_size = ++ block_len_in_page << orig_inode->i_blkbits; ++ } else ++ goto out; ++ } + + if (!page_has_buffers(page)) + create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0); +@@ -928,16 +898,16 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, + bh = bh->b_this_page; + + for (i = 0; i < block_len_in_page; i++) { +- ret = ext4_get_block(orig_inode, ++ *err = ext4_get_block(orig_inode, + (sector_t)(orig_blk_offset + i), bh, 0); +- if (ret < 0) ++ if (*err < 0) + goto out; + + if (bh->b_this_page != NULL) + bh = bh->b_this_page; + } + +- ret = a_ops->write_end(o_filp, mapping, offs, data_len, data_len, ++ *err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size, + page, fsdata); + page = NULL; + +@@ -951,7 +921,10 @@ out: + out2: + ext4_journal_stop(handle); + +- return ret < 0 ? ret : 0; ++ if (err2) ++ *err = err2; ++ ++ return replaced_count; + } + + /** +@@ -962,7 +935,6 @@ out2: + * @orig_start: logical start offset in block for orig + * @donor_start: logical start offset in block for donor + * @len: the number of blocks to be moved +- * @moved_len: moved block length + * + * Check the arguments of ext4_move_extents() whether the files can be + * exchanged with each other. +@@ -970,8 +942,8 @@ out2: + */ + static int + mext_check_arguments(struct inode *orig_inode, +- struct inode *donor_inode, __u64 orig_start, +- __u64 donor_start, __u64 *len, __u64 moved_len) ++ struct inode *donor_inode, __u64 orig_start, ++ __u64 donor_start, __u64 *len) + { + ext4_lblk_t orig_blocks, donor_blocks; + unsigned int blkbits = orig_inode->i_blkbits; +@@ -985,6 +957,13 @@ mext_check_arguments(struct inode *orig_inode, + return -EINVAL; + } + ++ if (donor_inode->i_mode & (S_ISUID|S_ISGID)) { ++ ext4_debug("ext4 move extent: suid or sgid is set" ++ " to donor file [ino:orig %lu, donor %lu]\n", ++ orig_inode->i_ino, donor_inode->i_ino); ++ return -EINVAL; ++ } ++ + /* Ext4 move extent does not support swapfile */ + if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) { + ext4_debug("ext4 move extent: The argument files should " +@@ -1025,13 +1004,6 @@ mext_check_arguments(struct inode *orig_inode, + return -EINVAL; + } + +- if (moved_len) { +- ext4_debug("ext4 move extent: moved_len should be 0 " +- "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, +- donor_inode->i_ino); +- return -EINVAL; +- } +- + if ((orig_start > EXT_MAX_BLOCK) || + (donor_start > EXT_MAX_BLOCK) || + (*len > EXT_MAX_BLOCK) || +@@ -1232,16 +1204,16 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, + return -EINVAL; + } + +- /* protect orig and donor against a truncate */ ++ /* Protect orig and donor inodes against a truncate */ + ret1 = mext_inode_double_lock(orig_inode, donor_inode); + if (ret1 < 0) + return ret1; + +- mext_double_down_read(orig_inode, donor_inode); ++ /* Protect extent tree against block allocations via delalloc */ ++ double_down_write_data_sem(orig_inode, donor_inode); + /* Check the filesystem environment whether move_extent can be done */ + ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start, +- donor_start, &len, *moved_len); +- mext_double_up_read(orig_inode, donor_inode); ++ donor_start, &len); + if (ret1) + goto out; + +@@ -1355,36 +1327,39 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, + seq_start = le32_to_cpu(ext_cur->ee_block); + rest_blocks = seq_blocks; + +- /* Discard preallocations of two inodes */ +- down_write(&EXT4_I(orig_inode)->i_data_sem); +- ext4_discard_preallocations(orig_inode); +- up_write(&EXT4_I(orig_inode)->i_data_sem); +- +- down_write(&EXT4_I(donor_inode)->i_data_sem); +- ext4_discard_preallocations(donor_inode); +- up_write(&EXT4_I(donor_inode)->i_data_sem); ++ /* ++ * Up semaphore to avoid following problems: ++ * a. transaction deadlock among ext4_journal_start, ++ * ->write_begin via pagefault, and jbd2_journal_commit ++ * b. racing with ->readpage, ->write_begin, and ext4_get_block ++ * in move_extent_per_page ++ */ ++ double_up_write_data_sem(orig_inode, donor_inode); + + while (orig_page_offset <= seq_end_page) { + + /* Swap original branches with new branches */ +- ret1 = move_extent_per_page(o_filp, donor_inode, ++ block_len_in_page = move_extent_per_page( ++ o_filp, donor_inode, + orig_page_offset, + data_offset_in_page, +- block_len_in_page, uninit); +- if (ret1 < 0) +- goto out; +- orig_page_offset++; ++ block_len_in_page, uninit, ++ &ret1); ++ + /* Count how many blocks we have exchanged */ + *moved_len += block_len_in_page; ++ if (ret1 < 0) ++ break; + if (*moved_len > len) { + ext4_error(orig_inode->i_sb, __func__, + "We replaced blocks too much! " + "sum of replaced: %llu requested: %llu", + *moved_len, len); + ret1 = -EIO; +- goto out; ++ break; + } + ++ orig_page_offset++; + data_offset_in_page = 0; + rest_blocks -= block_len_in_page; + if (rest_blocks > blocks_per_page) +@@ -1393,6 +1368,10 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, + block_len_in_page = rest_blocks; + } + ++ double_down_write_data_sem(orig_inode, donor_inode); ++ if (ret1 < 0) ++ break; ++ + /* Decrease buffer counter */ + if (holecheck_path) + ext4_ext_drop_refs(holecheck_path); +@@ -1414,6 +1393,11 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, + + } + out: ++ if (*moved_len) { ++ ext4_discard_preallocations(orig_inode); ++ ext4_discard_preallocations(donor_inode); ++ } ++ + if (orig_path) { + ext4_ext_drop_refs(orig_path); + kfree(orig_path); +@@ -1422,7 +1406,7 @@ out: + ext4_ext_drop_refs(holecheck_path); + kfree(holecheck_path); + } +- ++ double_up_write_data_sem(orig_inode, donor_inode); + ret2 = mext_inode_double_unlock(orig_inode, donor_inode); + + if (ret1) +diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c +index 6d2c1b8..17a17e1 100644 +--- a/fs/ext4/namei.c ++++ b/fs/ext4/namei.c +@@ -1292,9 +1292,6 @@ errout: + * add_dirent_to_buf will attempt search the directory block for + * space. It will return -ENOSPC if no space is available, and -EIO + * and -EEXIST if directory entry already exists. +- * +- * NOTE! bh is NOT released in the case where ENOSPC is returned. In +- * all other cases bh is released. + */ + static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, + struct inode *inode, struct ext4_dir_entry_2 *de, +@@ -1315,14 +1312,10 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, + top = bh->b_data + blocksize - reclen; + while ((char *) de <= top) { + if (!ext4_check_dir_entry("ext4_add_entry", dir, de, +- bh, offset)) { +- brelse(bh); ++ bh, offset)) + return -EIO; +- } +- if (ext4_match(namelen, name, de)) { +- brelse(bh); ++ if (ext4_match(namelen, name, de)) + return -EEXIST; +- } + nlen = EXT4_DIR_REC_LEN(de->name_len); + rlen = ext4_rec_len_from_disk(de->rec_len, blocksize); + if ((de->inode? rlen - nlen: rlen) >= reclen) +@@ -1337,7 +1330,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, + err = ext4_journal_get_write_access(handle, bh); + if (err) { + ext4_std_error(dir->i_sb, err); +- brelse(bh); + return err; + } + +@@ -1377,7 +1369,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, + err = ext4_handle_dirty_metadata(handle, dir, bh); + if (err) + ext4_std_error(dir->i_sb, err); +- brelse(bh); + return 0; + } + +@@ -1471,7 +1462,9 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, + if (!(de)) + return retval; + +- return add_dirent_to_buf(handle, dentry, inode, de, bh); ++ retval = add_dirent_to_buf(handle, dentry, inode, de, bh); ++ brelse(bh); ++ return retval; + } + + /* +@@ -1514,8 +1507,10 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, + if(!bh) + return retval; + retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); +- if (retval != -ENOSPC) ++ if (retval != -ENOSPC) { ++ brelse(bh); + return retval; ++ } + + if (blocks == 1 && !dx_fallback && + EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) +@@ -1528,7 +1523,9 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, + de = (struct ext4_dir_entry_2 *) bh->b_data; + de->inode = 0; + de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); +- return add_dirent_to_buf(handle, dentry, inode, de, bh); ++ retval = add_dirent_to_buf(handle, dentry, inode, de, bh); ++ brelse(bh); ++ return retval; + } + + /* +@@ -1561,10 +1558,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, + goto journal_error; + + err = add_dirent_to_buf(handle, dentry, inode, NULL, bh); +- if (err != -ENOSPC) { +- bh = NULL; ++ if (err != -ENOSPC) + goto cleanup; +- } + + /* Block full, should compress but for now just split */ + dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n", +@@ -1657,7 +1652,6 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, + if (!de) + goto cleanup; + err = add_dirent_to_buf(handle, dentry, inode, de, bh); +- bh = NULL; + goto cleanup; + + journal_error: +@@ -1775,7 +1769,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, int mode, + retry: + handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + +- 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); ++ EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); + if (IS_ERR(handle)) + return PTR_ERR(handle); + +@@ -1809,7 +1803,7 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry, + retry: + handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + +- 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); ++ EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); + if (IS_ERR(handle)) + return PTR_ERR(handle); + +@@ -1846,7 +1840,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode) + retry: + handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + +- 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); ++ EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); + if (IS_ERR(handle)) + return PTR_ERR(handle); + +@@ -2259,7 +2253,7 @@ static int ext4_symlink(struct inode *dir, + retry: + handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 + +- 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); ++ EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); + if (IS_ERR(handle)) + return PTR_ERR(handle); + +diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c +index 3cfc343..3b2c554 100644 +--- a/fs/ext4/resize.c ++++ b/fs/ext4/resize.c +@@ -247,7 +247,7 @@ static int setup_new_group_blocks(struct super_block *sb, + goto exit_bh; + + if (IS_ERR(gdb = bclean(handle, sb, block))) { +- err = PTR_ERR(bh); ++ err = PTR_ERR(gdb); + goto exit_bh; + } + ext4_handle_dirty_metadata(handle, NULL, gdb); +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index d4ca92a..9ae5217 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -603,10 +603,6 @@ static void ext4_put_super(struct super_block *sb) + if (sb->s_dirt) + ext4_commit_super(sb, 1); + +- ext4_release_system_zone(sb); +- ext4_mb_release(sb); +- ext4_ext_release(sb); +- ext4_xattr_put_super(sb); + if (sbi->s_journal) { + err = jbd2_journal_destroy(sbi->s_journal); + sbi->s_journal = NULL; +@@ -614,6 +610,12 @@ static void ext4_put_super(struct super_block *sb) + ext4_abort(sb, __func__, + "Couldn't clean up the journal"); + } ++ ++ ext4_release_system_zone(sb); ++ ext4_mb_release(sb); ++ ext4_ext_release(sb); ++ ext4_xattr_put_super(sb); ++ + if (!(sb->s_flags & MS_RDONLY)) { + EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + es->s_state = cpu_to_le16(sbi->s_mount_state); +@@ -704,6 +706,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) + spin_lock_init(&(ei->i_block_reservation_lock)); + INIT_LIST_HEAD(&ei->i_aio_dio_complete_list); + ei->cur_aio_dio = NULL; ++ ei->i_sync_tid = 0; ++ ei->i_datasync_tid = 0; + + return &ei->vfs_inode; + } +@@ -899,6 +903,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) + if (test_opt(sb, NO_AUTO_DA_ALLOC)) + seq_puts(seq, ",noauto_da_alloc"); + ++ if (test_opt(sb, DISCARD)) ++ seq_puts(seq, ",discard"); ++ ++ if (test_opt(sb, NOLOAD)) ++ seq_puts(seq, ",norecovery"); ++ + ext4_show_quota_options(seq, sb); + + return 0; +@@ -1079,7 +1089,8 @@ enum { + Opt_usrquota, Opt_grpquota, Opt_i_version, + Opt_stripe, Opt_delalloc, Opt_nodelalloc, + Opt_block_validity, Opt_noblock_validity, +- Opt_inode_readahead_blks, Opt_journal_ioprio ++ Opt_inode_readahead_blks, Opt_journal_ioprio, ++ Opt_discard, Opt_nodiscard, + }; + + static const match_table_t tokens = { +@@ -1104,6 +1115,7 @@ static const match_table_t tokens = { + {Opt_acl, "acl"}, + {Opt_noacl, "noacl"}, + {Opt_noload, "noload"}, ++ {Opt_noload, "norecovery"}, + {Opt_nobh, "nobh"}, + {Opt_bh, "bh"}, + {Opt_commit, "commit=%u"}, +@@ -1144,6 +1156,8 @@ static const match_table_t tokens = { + {Opt_auto_da_alloc, "auto_da_alloc=%u"}, + {Opt_auto_da_alloc, "auto_da_alloc"}, + {Opt_noauto_da_alloc, "noauto_da_alloc"}, ++ {Opt_discard, "discard"}, ++ {Opt_nodiscard, "nodiscard"}, + {Opt_err, NULL}, + }; + +@@ -1565,6 +1579,12 @@ set_qf_format: + else + set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); + break; ++ case Opt_discard: ++ set_opt(sbi->s_mount_opt, DISCARD); ++ break; ++ case Opt_nodiscard: ++ clear_opt(sbi->s_mount_opt, DISCARD); ++ break; + default: + ext4_msg(sb, KERN_ERR, + "Unrecognized mount option \"%s\" " +@@ -1673,14 +1693,14 @@ static int ext4_fill_flex_info(struct super_block *sb) + size_t size; + int i; + +- if (!sbi->s_es->s_log_groups_per_flex) { ++ sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; ++ groups_per_flex = 1 << sbi->s_log_groups_per_flex; ++ ++ if (groups_per_flex < 2) { + sbi->s_log_groups_per_flex = 0; + return 1; + } + +- sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; +- groups_per_flex = 1 << sbi->s_log_groups_per_flex; +- + /* We allocate both existing and potentially added groups */ + flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + + ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << +@@ -3668,13 +3688,11 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) + buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; + buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - + percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); +- ext4_free_blocks_count_set(es, buf->f_bfree); + buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); + if (buf->f_bfree < ext4_r_blocks_count(es)) + buf->f_bavail = 0; + buf->f_files = le32_to_cpu(es->s_inodes_count); + buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); +- es->s_free_inodes_count = cpu_to_le32(buf->f_ffree); + buf->f_namelen = EXT4_NAME_LEN; + fsid = le64_to_cpup((void *)es->s_uuid) ^ + le64_to_cpup((void *)es->s_uuid + sizeof(u64)); +diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c +index fed5b01..0257019 100644 +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -988,6 +988,10 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, + if (error) + goto cleanup; + ++ error = ext4_journal_get_write_access(handle, is.iloc.bh); ++ if (error) ++ goto cleanup; ++ + if (EXT4_I(inode)->i_state & EXT4_STATE_NEW) { + struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc); + memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); +@@ -1013,9 +1017,6 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, + if (flags & XATTR_CREATE) + goto cleanup; + } +- error = ext4_journal_get_write_access(handle, is.iloc.bh); +- if (error) +- goto cleanup; + if (!value) { + if (!is.s.not_found) + error = ext4_xattr_ibody_set(handle, inode, &i, &is); +diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c +index d4cfd6d..8896c1d 100644 +--- a/fs/jbd2/commit.c ++++ b/fs/jbd2/commit.c +@@ -636,6 +636,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) + JBUFFER_TRACE(jh, "ph3: write metadata"); + flags = jbd2_journal_write_metadata_buffer(commit_transaction, + jh, &new_jh, blocknr); ++ if (flags < 0) { ++ jbd2_journal_abort(journal, flags); ++ continue; ++ } + set_bit(BH_JWrite, &jh2bh(new_jh)->b_state); + wbuf[bufs++] = jh2bh(new_jh); + +diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c +index fed8538..82c295d 100644 +--- a/fs/jbd2/journal.c ++++ b/fs/jbd2/journal.c +@@ -78,6 +78,7 @@ EXPORT_SYMBOL(jbd2_journal_errno); + EXPORT_SYMBOL(jbd2_journal_ack_err); + EXPORT_SYMBOL(jbd2_journal_clear_err); + EXPORT_SYMBOL(jbd2_log_wait_commit); ++EXPORT_SYMBOL(jbd2_log_start_commit); + EXPORT_SYMBOL(jbd2_journal_start_commit); + EXPORT_SYMBOL(jbd2_journal_force_commit_nested); + EXPORT_SYMBOL(jbd2_journal_wipe); +@@ -358,6 +359,10 @@ repeat: + + jbd_unlock_bh_state(bh_in); + tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); ++ if (!tmp) { ++ jbd2_journal_put_journal_head(new_jh); ++ return -ENOMEM; ++ } + jbd_lock_bh_state(bh_in); + if (jh_in->b_frozen_data) { + jbd2_free(tmp, bh_in->b_size); +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 75e6e60..0f67914 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -2086,11 +2086,18 @@ static inline int is_si_special(const struct siginfo *info) + return info <= SEND_SIG_FORCED; + } + +-/* True if we are on the alternate signal stack. */ +- ++/* ++ * True if we are on the alternate signal stack. ++ */ + static inline int on_sig_stack(unsigned long sp) + { +- return (sp - current->sas_ss_sp < current->sas_ss_size); ++#ifdef CONFIG_STACK_GROWSUP ++ return sp >= current->sas_ss_sp && ++ sp - current->sas_ss_sp < current->sas_ss_size; ++#else ++ return sp > current->sas_ss_sp && ++ sp - current->sas_ss_sp <= current->sas_ss_size; ++#endif + } + + static inline int sas_ss_flags(unsigned long sp) +diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h +index 47941fc..0b4baba 100644 +--- a/include/scsi/scsi_host.h ++++ b/include/scsi/scsi_host.h +@@ -677,6 +677,12 @@ struct Scsi_Host { + void *shost_data; + + /* ++ * Points to the physical bus device we'd use to do DMA ++ * Needed just in case we have virtual hosts. ++ */ ++ struct device *dma_dev; ++ ++ /* + * We should ensure that this is aligned, both for better performance + * and also because some compilers (m68k) don't automatically force + * alignment to a long boundary. +@@ -720,7 +726,9 @@ extern int scsi_queue_work(struct Scsi_Host *, struct work_struct *); + extern void scsi_flush_work(struct Scsi_Host *); + + extern struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *, int); +-extern int __must_check scsi_add_host(struct Scsi_Host *, struct device *); ++extern int __must_check scsi_add_host_with_dma(struct Scsi_Host *, ++ struct device *, ++ struct device *); + extern void scsi_scan_host(struct Scsi_Host *); + extern void scsi_rescan_device(struct device *); + extern void scsi_remove_host(struct Scsi_Host *); +@@ -731,6 +739,12 @@ extern const char *scsi_host_state_name(enum scsi_host_state); + + extern u64 scsi_calculate_bounce_limit(struct Scsi_Host *); + ++static inline int __must_check scsi_add_host(struct Scsi_Host *host, ++ struct device *dev) ++{ ++ return scsi_add_host_with_dma(host, dev, dev); ++} ++ + static inline struct device *scsi_get_device(struct Scsi_Host *shost) + { + return shost->shost_gendev.parent; diff --git a/debian/patches/series/2 b/debian/patches/series/2 index 205fb762c..8048ea250 100644 --- a/debian/patches/series/2 +++ b/debian/patches/series/2 @@ -1,3 +1,4 @@ + features/all/aufs2/aufs2-20091205.patch + bugfix/all/atl1c-use-common_task-instead-of-reset_task-and-link.patch + bugfix/all/netfilter-xtables-fix-conntrack-match-v1-ipt-save-output.patch ++ bugfix/all/stable/2.6.32.1.patch