[kernel] r14786 - in dists/trunk/linux-2.6/debian: . patches/bugfix/all/stable patches/series

Mon Dec 14 22:27:59 UTC 2009

Author: dannf
Date: Mon Dec 14 22:27:57 2009
New Revision: 14786

Log:
* Add stable release 2.6.32.1:
  - ext4: Fix insufficient checks in EXT4_IOC_MOVE_EXT (CVE-2009-4131)

Added:
   dists/trunk/linux-2.6/debian/patches/bugfix/all/stable/2.6.32.1.patch
Modified:
   dists/trunk/linux-2.6/debian/changelog
   dists/trunk/linux-2.6/debian/patches/series/2

Modified: dists/trunk/linux-2.6/debian/changelog
==============================================================================

--- dists/trunk/linux-2.6/debian/changelog	Mon Dec 14 02:03:57 2009	(r14785)
+++ dists/trunk/linux-2.6/debian/changelog	Mon Dec 14 22:27:57 2009	(r14786)
@@ -23,6 +23,10 @@
   [ Aurelien Jarno ]
   * Add support for the sparc64 architecture.
 
+  [ dann frazier ]
+  * Add stable release 2.6.32.1:
+    - ext4: Fix insufficient checks in EXT4_IOC_MOVE_EXT (CVE-2009-4131)
+
  -- Aurelien Jarno <aurel32 at debian.org>  Sun, 13 Dec 2009 13:25:45 +0100
 
 linux-2.6 (2.6.32-1) unstable; urgency=low

Added: dists/trunk/linux-2.6/debian/patches/bugfix/all/stable/2.6.32.1.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ dists/trunk/linux-2.6/debian/patches/bugfix/all/stable/2.6.32.1.patch	Mon Dec 14 22:27:57 2009	(r14786)
@@ -0,0 +1,1834 @@
+diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
+index 6d94e06..af6885c 100644
+--- a/Documentation/filesystems/ext4.txt
++++ b/Documentation/filesystems/ext4.txt
+@@ -153,8 +153,8 @@ journal_dev=devnum	When the external journal device's major/minor numbers
+ 			identified through its new major/minor numbers encoded
+ 			in devnum.
+ 
+-noload			Don't load the journal on mounting.  Note that
+-                     	if the filesystem was not unmounted cleanly,
++norecovery		Don't load the journal on mounting.  Note that
++noload			if the filesystem was not unmounted cleanly,
+                      	skipping the journal replay will lead to the
+                      	filesystem containing inconsistencies that can
+                      	lead to any number of problems.
+@@ -353,6 +353,12 @@ noauto_da_alloc		replacing existing files via patterns such as
+ 			system crashes before the delayed allocation
+ 			blocks are forced to disk.
+ 
++discard		Controls whether ext4 should issue discard/TRIM
++nodiscard(*)		commands to the underlying block device when
++			blocks are freed.  This is useful for SSD devices
++			and sparse/thinly-provisioned LUNs, but it is off
++			by default until sufficient testing has been done.
++
+ Data Mode
+ =========
+ There are 3 different data modes:
+diff --git a/Makefile b/Makefile
+index f5cdb72..d0d7e9c 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,7 +1,7 @@
+ VERSION = 2
+ PATCHLEVEL = 6
+ SUBLEVEL = 32
+-EXTRAVERSION =
++EXTRAVERSION = .1
+ NAME = Man-Eating Seals of Antiquity
+ 
+ # *DOCUMENTATION*
+diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
+index c968cc3..554626e 100644
+--- a/drivers/scsi/hosts.c
++++ b/drivers/scsi/hosts.c
+@@ -180,14 +180,20 @@ void scsi_remove_host(struct Scsi_Host *shost)
+ EXPORT_SYMBOL(scsi_remove_host);
+ 
+ /**
+- * scsi_add_host - add a scsi host
++ * scsi_add_host_with_dma - add a scsi host with dma device
+  * @shost:	scsi host pointer to add
+  * @dev:	a struct device of type scsi class
++ * @dma_dev:	dma device for the host
++ *
++ * Note: You rarely need to worry about this unless you're in a
++ * virtualised host environments, so use the simpler scsi_add_host()
++ * function instead.
+  *
+  * Return value: 
+  * 	0 on success / != 0 for error
+  **/
+-int scsi_add_host(struct Scsi_Host *shost, struct device *dev)
++int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
++			   struct device *dma_dev)
+ {
+ 	struct scsi_host_template *sht = shost->hostt;
+ 	int error = -EINVAL;
+@@ -207,6 +213,7 @@ int scsi_add_host(struct Scsi_Host *shost, struct device *dev)
+ 
+ 	if (!shost->shost_gendev.parent)
+ 		shost->shost_gendev.parent = dev ? dev : &platform_bus;
++	shost->dma_dev = dma_dev;
+ 
+ 	error = device_add(&shost->shost_gendev);
+ 	if (error)
+@@ -262,7 +269,7 @@ int scsi_add_host(struct Scsi_Host *shost, struct device *dev)
+  fail:
+ 	return error;
+ }
+-EXPORT_SYMBOL(scsi_add_host);
++EXPORT_SYMBOL(scsi_add_host_with_dma);
+ 
+ static void scsi_host_dev_release(struct device *dev)
+ {
+diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
+index 562d8ce..f913f1e 100644
+--- a/drivers/scsi/lpfc/lpfc_init.c
++++ b/drivers/scsi/lpfc/lpfc_init.c
+@@ -2408,7 +2408,7 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
+ 	vport->els_tmofunc.function = lpfc_els_timeout;
+ 	vport->els_tmofunc.data = (unsigned long)vport;
+ 
+-	error = scsi_add_host(shost, dev);
++	error = scsi_add_host_with_dma(shost, dev, &phba->pcidev->dev);
+ 	if (error)
+ 		goto out_put_shost;
+ 
+diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c
+index a39addc..507ccc6 100644
+--- a/drivers/scsi/megaraid/megaraid_sas.c
++++ b/drivers/scsi/megaraid/megaraid_sas.c
+@@ -3032,7 +3032,7 @@ megasas_mgmt_fw_ioctl(struct megasas_instance *instance,
+ 	int error = 0, i;
+ 	void *sense = NULL;
+ 	dma_addr_t sense_handle;
+-	u32 *sense_ptr;
++	unsigned long *sense_ptr;
+ 
+ 	memset(kbuff_arr, 0, sizeof(kbuff_arr));
+ 
+@@ -3109,7 +3109,7 @@ megasas_mgmt_fw_ioctl(struct megasas_instance *instance,
+ 		}
+ 
+ 		sense_ptr =
+-		    (u32 *) ((unsigned long)cmd->frame + ioc->sense_off);
++		(unsigned long *) ((unsigned long)cmd->frame + ioc->sense_off);
+ 		*sense_ptr = sense_handle;
+ 	}
+ 
+@@ -3140,8 +3140,8 @@ megasas_mgmt_fw_ioctl(struct megasas_instance *instance,
+ 		 * sense_ptr points to the location that has the user
+ 		 * sense buffer address
+ 		 */
+-		sense_ptr = (u32 *) ((unsigned long)ioc->frame.raw +
+-				     ioc->sense_off);
++		sense_ptr = (unsigned long *) ((unsigned long)ioc->frame.raw +
++				ioc->sense_off);
+ 
+ 		if (copy_to_user((void __user *)((unsigned long)(*sense_ptr)),
+ 				 sense, ioc->sense_len)) {
+diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
+index fbcb82a..21e2bc4 100644
+--- a/drivers/scsi/qla2xxx/qla_attr.c
++++ b/drivers/scsi/qla2xxx/qla_attr.c
+@@ -1654,7 +1654,8 @@ qla24xx_vport_create(struct fc_vport *fc_vport, bool disable)
+ 			fc_vport_set_state(fc_vport, FC_VPORT_LINKDOWN);
+ 	}
+ 
+-	if (scsi_add_host(vha->host, &fc_vport->dev)) {
++	if (scsi_add_host_with_dma(vha->host, &fc_vport->dev,
++				   &ha->pdev->dev)) {
+ 		DEBUG15(printk("scsi(%ld): scsi_add_host failure for VP[%d].\n",
+ 			vha->host_no, vha->vp_idx));
+ 		goto vport_create_failed_2;
+diff --git a/drivers/scsi/scsi_lib_dma.c b/drivers/scsi/scsi_lib_dma.c
+index ac6855c..dcd1285 100644
+--- a/drivers/scsi/scsi_lib_dma.c
++++ b/drivers/scsi/scsi_lib_dma.c
+@@ -23,7 +23,7 @@ int scsi_dma_map(struct scsi_cmnd *cmd)
+ 	int nseg = 0;
+ 
+ 	if (scsi_sg_count(cmd)) {
+-		struct device *dev = cmd->device->host->shost_gendev.parent;
++		struct device *dev = cmd->device->host->dma_dev;
+ 
+ 		nseg = dma_map_sg(dev, scsi_sglist(cmd), scsi_sg_count(cmd),
+ 				  cmd->sc_data_direction);
+@@ -41,7 +41,7 @@ EXPORT_SYMBOL(scsi_dma_map);
+ void scsi_dma_unmap(struct scsi_cmnd *cmd)
+ {
+ 	if (scsi_sg_count(cmd)) {
+-		struct device *dev = cmd->device->host->shost_gendev.parent;
++		struct device *dev = cmd->device->host->dma_dev;
+ 
+ 		dma_unmap_sg(dev, scsi_sglist(cmd), scsi_sg_count(cmd),
+ 			     cmd->sc_data_direction);
+diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
+index 1d04189..f3032c9 100644
+--- a/fs/ext4/balloc.c
++++ b/fs/ext4/balloc.c
+@@ -761,7 +761,13 @@ static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb,
+ static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb,
+ 					ext4_group_t group)
+ {
+-	return ext4_bg_has_super(sb, group) ? EXT4_SB(sb)->s_gdb_count : 0;
++	if (!ext4_bg_has_super(sb, group))
++		return 0;
++
++	if (EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG))
++		return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
++	else
++		return EXT4_SB(sb)->s_gdb_count;
+ }
+ 
+ /**
+diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
+index 50784ef..dc79b75 100644
+--- a/fs/ext4/block_validity.c
++++ b/fs/ext4/block_validity.c
+@@ -160,7 +160,7 @@ int ext4_setup_system_zone(struct super_block *sb)
+ 		if (ext4_bg_has_super(sb, i) &&
+ 		    ((i < 5) || ((i % flex_size) == 0)))
+ 			add_system_zone(sbi, ext4_group_first_block_no(sb, i),
+-					sbi->s_gdb_count + 1);
++					ext4_bg_num_gdb(sb, i) + 1);
+ 		gdp = ext4_get_group_desc(sb, i, NULL);
+ 		ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
+ 		if (ret)
+diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
+index 8825515..bd2a9dd 100644
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -703,6 +703,13 @@ struct ext4_inode_info {
+ 	struct list_head i_aio_dio_complete_list;
+ 	/* current io_end structure for async DIO write*/
+ 	ext4_io_end_t *cur_aio_dio;
++
++	/*
++	 * Transactions that contain inode's metadata needed to complete
++	 * fsync and fdatasync, respectively.
++	 */
++	tid_t i_sync_tid;
++	tid_t i_datasync_tid;
+ };
+ 
+ /*
+@@ -750,6 +757,7 @@ struct ext4_inode_info {
+ #define EXT4_MOUNT_DELALLOC		0x8000000 /* Delalloc support */
+ #define EXT4_MOUNT_DATA_ERR_ABORT	0x10000000 /* Abort on file data write */
+ #define EXT4_MOUNT_BLOCK_VALIDITY	0x20000000 /* Block validity checking */
++#define EXT4_MOUNT_DISCARD		0x40000000 /* Issue DISCARD requests */
+ 
+ #define clear_opt(o, opt)		o &= ~EXT4_MOUNT_##opt
+ #define set_opt(o, opt)			o |= EXT4_MOUNT_##opt
+diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
+index a286598..1892a77 100644
+--- a/fs/ext4/ext4_jbd2.h
++++ b/fs/ext4/ext4_jbd2.h
+@@ -49,7 +49,7 @@
+ 
+ #define EXT4_DATA_TRANS_BLOCKS(sb)	(EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \
+ 					 EXT4_XATTR_TRANS_BLOCKS - 2 + \
+-					 2*EXT4_QUOTA_TRANS_BLOCKS(sb))
++					 EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
+ 
+ /*
+  * Define the number of metadata blocks we need to account to modify data.
+@@ -57,7 +57,7 @@
+  * This include super block, inode block, quota blocks and xattr blocks
+  */
+ #define EXT4_META_TRANS_BLOCKS(sb)	(EXT4_XATTR_TRANS_BLOCKS + \
+-					2*EXT4_QUOTA_TRANS_BLOCKS(sb))
++					EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
+ 
+ /* Delete operations potentially hit one directory's namespace plus an
+  * entire inode, plus arbitrary amounts of bitmap/indirection data.  Be
+@@ -92,6 +92,7 @@
+  * but inode, sb and group updates are done only once */
+ #define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
+ 		(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0)
++
+ #define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
+ 		(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0)
+ #else
+@@ -99,6 +100,9 @@
+ #define EXT4_QUOTA_INIT_BLOCKS(sb) 0
+ #define EXT4_QUOTA_DEL_BLOCKS(sb) 0
+ #endif
++#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb))
++#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
++#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
+ 
+ int
+ ext4_mark_iloc_dirty(handle_t *handle,
+@@ -254,6 +258,19 @@ static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
+ 	return 0;
+ }
+ 
++static inline void ext4_update_inode_fsync_trans(handle_t *handle,
++						 struct inode *inode,
++						 int datasync)
++{
++	struct ext4_inode_info *ei = EXT4_I(inode);
++
++	if (ext4_handle_valid(handle)) {
++		ei->i_sync_tid = handle->h_transaction->t_tid;
++		if (datasync)
++			ei->i_datasync_tid = handle->h_transaction->t_tid;
++	}
++}
++
+ /* super.c */
+ int ext4_force_commit(struct super_block *sb);
+ 
+diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
+index 715264b..8b8bae4 100644
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -1761,7 +1761,9 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
+ 	while (block < last && block != EXT_MAX_BLOCK) {
+ 		num = last - block;
+ 		/* find extent for this block */
++		down_read(&EXT4_I(inode)->i_data_sem);
+ 		path = ext4_ext_find_extent(inode, block, path);
++		up_read(&EXT4_I(inode)->i_data_sem);
+ 		if (IS_ERR(path)) {
+ 			err = PTR_ERR(path);
+ 			path = NULL;
+@@ -2074,7 +2076,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
+ 		ext_debug("free last %u blocks starting %llu\n", num, start);
+ 		for (i = 0; i < num; i++) {
+ 			bh = sb_find_get_block(inode->i_sb, start + i);
+-			ext4_forget(handle, 0, inode, bh, start + i);
++			ext4_forget(handle, metadata, inode, bh, start + i);
+ 		}
+ 		ext4_free_blocks(handle, inode, start, num, metadata);
+ 	} else if (from == le32_to_cpu(ex->ee_block)
+@@ -2167,7 +2169,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
+ 			correct_index = 1;
+ 			credits += (ext_depth(inode)) + 1;
+ 		}
+-		credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
++		credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
+ 
+ 		err = ext4_ext_truncate_extend_restart(handle, inode, credits);
+ 		if (err)
+@@ -3064,6 +3066,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
+ 	if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) {
+ 		ret = ext4_convert_unwritten_extents_dio(handle, inode,
+ 							path);
++		if (ret >= 0)
++			ext4_update_inode_fsync_trans(handle, inode, 1);
+ 		goto out2;
+ 	}
+ 	/* buffered IO case */
+@@ -3091,6 +3095,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
+ 	ret = ext4_ext_convert_to_initialized(handle, inode,
+ 						path, iblock,
+ 						max_blocks);
++	if (ret >= 0)
++		ext4_update_inode_fsync_trans(handle, inode, 1);
+ out:
+ 	if (ret <= 0) {
+ 		err = ret;
+@@ -3329,10 +3335,16 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
+ 	allocated = ext4_ext_get_actual_len(&newex);
+ 	set_buffer_new(bh_result);
+ 
+-	/* Cache only when it is _not_ an uninitialized extent */
+-	if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
++	/*
++	 * Cache the extent and update transaction to commit on fdatasync only
++	 * when it is _not_ an uninitialized extent.
++	 */
++	if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
+ 		ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
+ 						EXT4_EXT_CACHE_EXTENT);
++		ext4_update_inode_fsync_trans(handle, inode, 1);
++	} else
++		ext4_update_inode_fsync_trans(handle, inode, 0);
+ out:
+ 	if (allocated > max_blocks)
+ 		allocated = max_blocks;
+@@ -3720,10 +3732,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ 		 * Walk the extent tree gathering extent information.
+ 		 * ext4_ext_fiemap_cb will push extents back to user.
+ 		 */
+-		down_read(&EXT4_I(inode)->i_data_sem);
+ 		error = ext4_ext_walk_space(inode, start_blk, len_blks,
+ 					  ext4_ext_fiemap_cb, fieinfo);
+-		up_read(&EXT4_I(inode)->i_data_sem);
+ 	}
+ 
+ 	return error;
+diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
+index 2b15312..d6049e4 100644
+--- a/fs/ext4/fsync.c
++++ b/fs/ext4/fsync.c
+@@ -51,25 +51,30 @@
+ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
+ {
+ 	struct inode *inode = dentry->d_inode;
++	struct ext4_inode_info *ei = EXT4_I(inode);
+ 	journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
+-	int err, ret = 0;
++	int ret;
++	tid_t commit_tid;
+ 
+ 	J_ASSERT(ext4_journal_current_handle() == NULL);
+ 
+ 	trace_ext4_sync_file(file, dentry, datasync);
+ 
++	if (inode->i_sb->s_flags & MS_RDONLY)
++		return 0;
++
+ 	ret = flush_aio_dio_completed_IO(inode);
+ 	if (ret < 0)
+-		goto out;
++		return ret;
++
++	if (!journal)
++		return simple_fsync(file, dentry, datasync);
++
+ 	/*
+-	 * data=writeback:
++	 * data=writeback,ordered:
+ 	 *  The caller's filemap_fdatawrite()/wait will sync the data.
+-	 *  sync_inode() will sync the metadata
+-	 *
+-	 * data=ordered:
+-	 *  The caller's filemap_fdatawrite() will write the data and
+-	 *  sync_inode() will write the inode if it is dirty.  Then the caller's
+-	 *  filemap_fdatawait() will wait on the pages.
++	 *  Metadata is in the journal, we wait for proper transaction to
++	 *  commit here.
+ 	 *
+ 	 * data=journal:
+ 	 *  filemap_fdatawrite won't do anything (the buffers are clean).
+@@ -79,32 +84,13 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
+ 	 *  (they were dirtied by commit).  But that's OK - the blocks are
+ 	 *  safe in-journal, which is all fsync() needs to ensure.
+ 	 */
+-	if (ext4_should_journal_data(inode)) {
+-		ret = ext4_force_commit(inode->i_sb);
+-		goto out;
+-	}
++	if (ext4_should_journal_data(inode))
++		return ext4_force_commit(inode->i_sb);
+ 
+-	if (!journal)
+-		ret = sync_mapping_buffers(inode->i_mapping);
+-
+-	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+-		goto out;
+-
+-	/*
+-	 * The VFS has written the file data.  If the inode is unaltered
+-	 * then we need not start a commit.
+-	 */
+-	if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) {
+-		struct writeback_control wbc = {
+-			.sync_mode = WB_SYNC_ALL,
+-			.nr_to_write = 0, /* sys_fsync did this */
+-		};
+-		err = sync_inode(inode, &wbc);
+-		if (ret == 0)
+-			ret = err;
+-	}
+-out:
+-	if (journal && (journal->j_flags & JBD2_BARRIER))
++	commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
++	if (jbd2_log_start_commit(journal, commit_tid))
++		jbd2_log_wait_commit(journal, commit_tid);
++	else if (journal->j_flags & JBD2_BARRIER)
+ 		blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
+ 	return ret;
+ }
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index 2c8caa5..1dae9a4 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1021,10 +1021,12 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
+ 	if (!err)
+ 		err = ext4_splice_branch(handle, inode, iblock,
+ 					 partial, indirect_blks, count);
+-	else
++	if (err)
+ 		goto cleanup;
+ 
+ 	set_buffer_new(bh_result);
++
++	ext4_update_inode_fsync_trans(handle, inode, 1);
+ got_it:
+ 	map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
+ 	if (count > blocks_to_boundary)
+@@ -1052,7 +1054,7 @@ qsize_t ext4_get_reserved_space(struct inode *inode)
+ 		EXT4_I(inode)->i_reserved_meta_blocks;
+ 	spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+ 
+-	return total;
++	return (total << inode->i_blkbits);
+ }
+ /*
+  * Calculate the number of metadata blocks need to reserve
+@@ -1534,6 +1536,16 @@ static int do_journal_get_write_access(handle_t *handle,
+ 	return ext4_journal_get_write_access(handle, bh);
+ }
+ 
++/*
++ * Truncate blocks that were not used by write. We have to truncate the
++ * pagecache as well so that corresponding buffers get properly unmapped.
++ */
++static void ext4_truncate_failed_write(struct inode *inode)
++{
++	truncate_inode_pages(inode->i_mapping, inode->i_size);
++	ext4_truncate(inode);
++}
++
+ static int ext4_write_begin(struct file *file, struct address_space *mapping,
+ 			    loff_t pos, unsigned len, unsigned flags,
+ 			    struct page **pagep, void **fsdata)
+@@ -1599,7 +1611,7 @@ retry:
+ 
+ 		ext4_journal_stop(handle);
+ 		if (pos + len > inode->i_size) {
+-			ext4_truncate(inode);
++			ext4_truncate_failed_write(inode);
+ 			/*
+ 			 * If truncate failed early the inode might
+ 			 * still be on the orphan list; we need to
+@@ -1709,7 +1721,7 @@ static int ext4_ordered_write_end(struct file *file,
+ 		ret = ret2;
+ 
+ 	if (pos + len > inode->i_size) {
+-		ext4_truncate(inode);
++		ext4_truncate_failed_write(inode);
+ 		/*
+ 		 * If truncate failed early the inode might still be
+ 		 * on the orphan list; we need to make sure the inode
+@@ -1751,7 +1763,7 @@ static int ext4_writeback_write_end(struct file *file,
+ 		ret = ret2;
+ 
+ 	if (pos + len > inode->i_size) {
+-		ext4_truncate(inode);
++		ext4_truncate_failed_write(inode);
+ 		/*
+ 		 * If truncate failed early the inode might still be
+ 		 * on the orphan list; we need to make sure the inode
+@@ -1814,7 +1826,7 @@ static int ext4_journalled_write_end(struct file *file,
+ 	if (!ret)
+ 		ret = ret2;
+ 	if (pos + len > inode->i_size) {
+-		ext4_truncate(inode);
++		ext4_truncate_failed_write(inode);
+ 		/*
+ 		 * If truncate failed early the inode might still be
+ 		 * on the orphan list; we need to make sure the inode
+@@ -2788,7 +2800,7 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
+ 	 * number of contiguous block. So we will limit
+ 	 * number of contiguous block to a sane value
+ 	 */
+-	if (!(inode->i_flags & EXT4_EXTENTS_FL) &&
++	if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) &&
+ 	    (max_blocks > EXT4_MAX_TRANS_DATA))
+ 		max_blocks = EXT4_MAX_TRANS_DATA;
+ 
+@@ -3091,7 +3103,7 @@ retry:
+ 		 * i_size_read because we hold i_mutex.
+ 		 */
+ 		if (pos + len > inode->i_size)
+-			ext4_truncate(inode);
++			ext4_truncate_failed_write(inode);
+ 	}
+ 
+ 	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
+@@ -4120,6 +4132,8 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
+ 			      __le32 *last)
+ {
+ 	__le32 *p;
++	int	is_metadata = S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode);
++
+ 	if (try_to_extend_transaction(handle, inode)) {
+ 		if (bh) {
+ 			BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+@@ -4150,11 +4164,11 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
+ 
+ 			*p = 0;
+ 			tbh = sb_find_get_block(inode->i_sb, nr);
+-			ext4_forget(handle, 0, inode, tbh, nr);
++			ext4_forget(handle, is_metadata, inode, tbh, nr);
+ 		}
+ 	}
+ 
+-	ext4_free_blocks(handle, inode, block_to_free, count, 0);
++	ext4_free_blocks(handle, inode, block_to_free, count, is_metadata);
+ }
+ 
+ /**
+@@ -4781,8 +4795,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
+ 	struct ext4_iloc iloc;
+ 	struct ext4_inode *raw_inode;
+ 	struct ext4_inode_info *ei;
+-	struct buffer_head *bh;
+ 	struct inode *inode;
++	journal_t *journal = EXT4_SB(sb)->s_journal;
+ 	long ret;
+ 	int block;
+ 
+@@ -4793,11 +4807,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
+ 		return inode;
+ 
+ 	ei = EXT4_I(inode);
++	iloc.bh = 0;
+ 
+ 	ret = __ext4_get_inode_loc(inode, &iloc, 0);
+ 	if (ret < 0)
+ 		goto bad_inode;
+-	bh = iloc.bh;
+ 	raw_inode = ext4_raw_inode(&iloc);
+ 	inode->i_mode = le16_to_cpu(raw_inode->i_mode);
+ 	inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
+@@ -4820,7 +4834,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
+ 		if (inode->i_mode == 0 ||
+ 		    !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
+ 			/* this inode is deleted */
+-			brelse(bh);
+ 			ret = -ESTALE;
+ 			goto bad_inode;
+ 		}
+@@ -4848,11 +4861,35 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
+ 		ei->i_data[block] = raw_inode->i_block[block];
+ 	INIT_LIST_HEAD(&ei->i_orphan);
+ 
++	/*
++	 * Set transaction id's of transactions that have to be committed
++	 * to finish f[data]sync. We set them to currently running transaction
++	 * as we cannot be sure that the inode or some of its metadata isn't
++	 * part of the transaction - the inode could have been reclaimed and
++	 * now it is reread from disk.
++	 */
++	if (journal) {
++		transaction_t *transaction;
++		tid_t tid;
++
++		spin_lock(&journal->j_state_lock);
++		if (journal->j_running_transaction)
++			transaction = journal->j_running_transaction;
++		else
++			transaction = journal->j_committing_transaction;
++		if (transaction)
++			tid = transaction->t_tid;
++		else
++			tid = journal->j_commit_sequence;
++		spin_unlock(&journal->j_state_lock);
++		ei->i_sync_tid = tid;
++		ei->i_datasync_tid = tid;
++	}
++
+ 	if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
+ 		ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
+ 		if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
+ 		    EXT4_INODE_SIZE(inode->i_sb)) {
+-			brelse(bh);
+ 			ret = -EIO;
+ 			goto bad_inode;
+ 		}
+@@ -4884,10 +4921,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
+ 
+ 	ret = 0;
+ 	if (ei->i_file_acl &&
+-	    ((ei->i_file_acl <
+-	      (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
+-	       EXT4_SB(sb)->s_gdb_count)) ||
+-	     (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
++	    !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
+ 		ext4_error(sb, __func__,
+ 			   "bad extended attribute block %llu in inode #%lu",
+ 			   ei->i_file_acl, inode->i_ino);
+@@ -4905,10 +4939,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
+ 		/* Validate block references which are part of inode */
+ 		ret = ext4_check_inode_blockref(inode);
+ 	}
+-	if (ret) {
+-		brelse(bh);
++	if (ret)
+ 		goto bad_inode;
+-	}
+ 
+ 	if (S_ISREG(inode->i_mode)) {
+ 		inode->i_op = &ext4_file_inode_operations;
+@@ -4936,7 +4968,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
+ 			init_special_inode(inode, inode->i_mode,
+ 			   new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
+ 	} else {
+-		brelse(bh);
+ 		ret = -EIO;
+ 		ext4_error(inode->i_sb, __func__,
+ 			   "bogus i_mode (%o) for inode=%lu",
+@@ -4949,6 +4980,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
+ 	return inode;
+ 
+ bad_inode:
++	brelse(iloc.bh);
+ 	iget_failed(inode);
+ 	return ERR_PTR(ret);
+ }
+@@ -5108,6 +5140,7 @@ static int ext4_do_update_inode(handle_t *handle,
+ 		err = rc;
+ 	ei->i_state &= ~EXT4_STATE_NEW;
+ 
++	ext4_update_inode_fsync_trans(handle, inode, 0);
+ out_brelse:
+ 	brelse(bh);
+ 	ext4_std_error(inode->i_sb, err);
+@@ -5227,8 +5260,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
+ 
+ 		/* (user+group)*(old+new) structure, inode write (sb,
+ 		 * inode block, ? - but truncate inode update has it) */
+-		handle = ext4_journal_start(inode, 2*(EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)+
+-					EXT4_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
++		handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
++					EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3);
+ 		if (IS_ERR(handle)) {
+ 			error = PTR_ERR(handle);
+ 			goto err_out;
+diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
+index c1cdf61..b63d193 100644
+--- a/fs/ext4/ioctl.c
++++ b/fs/ext4/ioctl.c
+@@ -221,31 +221,38 @@ setversion_out:
+ 		struct file *donor_filp;
+ 		int err;
+ 
++		if (!(filp->f_mode & FMODE_READ) ||
++		    !(filp->f_mode & FMODE_WRITE))
++			return -EBADF;
++
+ 		if (copy_from_user(&me,
+ 			(struct move_extent __user *)arg, sizeof(me)))
+ 			return -EFAULT;
++		me.moved_len = 0;
+ 
+ 		donor_filp = fget(me.donor_fd);
+ 		if (!donor_filp)
+ 			return -EBADF;
+ 
+-		if (!capable(CAP_DAC_OVERRIDE)) {
+-			if ((current->real_cred->fsuid != inode->i_uid) ||
+-				!(inode->i_mode & S_IRUSR) ||
+-				!(donor_filp->f_dentry->d_inode->i_mode &
+-				S_IRUSR)) {
+-				fput(donor_filp);
+-				return -EACCES;
+-			}
++		if (!(donor_filp->f_mode & FMODE_WRITE)) {
++			err = -EBADF;
++			goto mext_out;
+ 		}
+ 
++		err = mnt_want_write(filp->f_path.mnt);
++		if (err)
++			goto mext_out;
++
+ 		err = ext4_move_extents(filp, donor_filp, me.orig_start,
+ 					me.donor_start, me.len, &me.moved_len);
+-		fput(donor_filp);
++		mnt_drop_write(filp->f_path.mnt);
++		if (me.moved_len > 0)
++			file_remove_suid(donor_filp);
+ 
+ 		if (copy_to_user((struct move_extent *)arg, &me, sizeof(me)))
+-			return -EFAULT;
+-
++			err = -EFAULT;
++mext_out:
++		fput(donor_filp);
+ 		return err;
+ 	}
+ 
+diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
+index bba1282..7d71148 100644
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -2529,7 +2529,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
+ 	struct ext4_group_info *db;
+ 	int err, count = 0, count2 = 0;
+ 	struct ext4_free_data *entry;
+-	ext4_fsblk_t discard_block;
+ 	struct list_head *l, *ltmp;
+ 
+ 	list_for_each_safe(l, ltmp, &txn->t_private_list) {
+@@ -2559,13 +2558,19 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
+ 			page_cache_release(e4b.bd_bitmap_page);
+ 		}
+ 		ext4_unlock_group(sb, entry->group);
+-		discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
+-			+ entry->start_blk
+-			+ le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
+-		trace_ext4_discard_blocks(sb, (unsigned long long)discard_block,
+-					  entry->count);
+-		sb_issue_discard(sb, discard_block, entry->count);
+-
++		if (test_opt(sb, DISCARD)) {
++			ext4_fsblk_t discard_block;
++			struct ext4_super_block *es = EXT4_SB(sb)->s_es;
++
++			discard_block = (ext4_fsblk_t)entry->group *
++						EXT4_BLOCKS_PER_GROUP(sb)
++					+ entry->start_blk
++					+ le32_to_cpu(es->s_first_data_block);
++			trace_ext4_discard_blocks(sb,
++					(unsigned long long)discard_block,
++					entry->count);
++			sb_issue_discard(sb, discard_block, entry->count);
++		}
+ 		kmem_cache_free(ext4_free_ext_cachep, entry);
+ 		ext4_mb_release_desc(&e4b);
+ 	}
+@@ -3006,6 +3011,24 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
+ }
+ 
+ /*
++ * Called on failure; free up any blocks from the inode PA for this
++ * context.  We don't need this for MB_GROUP_PA because we only change
++ * pa_free in ext4_mb_release_context(), but on failure, we've already
++ * zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed.
++ */
++static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
++{
++	struct ext4_prealloc_space *pa = ac->ac_pa;
++	int len;
++
++	if (pa && pa->pa_type == MB_INODE_PA) {
++		len = ac->ac_b_ex.fe_len;
++		pa->pa_free += len;
++	}
++
++}
++
++/*
+  * use blocks preallocated to inode
+  */
+ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
+@@ -4290,6 +4313,7 @@ repeat:
+ 			ac->ac_status = AC_STATUS_CONTINUE;
+ 			goto repeat;
+ 		} else if (*errp) {
++			ext4_discard_allocated_blocks(ac);
+ 			ac->ac_b_ex.fe_len = 0;
+ 			ar->len = 0;
+ 			ext4_mb_show_ac(ac);
+diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
+index a93d5b8..8646149 100644
+--- a/fs/ext4/migrate.c
++++ b/fs/ext4/migrate.c
+@@ -238,7 +238,7 @@ static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
+ 	 * So allocate a credit of 3. We may update
+ 	 * quota (user and group).
+ 	 */
+-	needed = 3 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
++	needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
+ 
+ 	if (ext4_journal_extend(handle, needed) != 0)
+ 		retval = ext4_journal_restart(handle, needed);
+@@ -477,7 +477,7 @@ int ext4_ext_migrate(struct inode *inode)
+ 	handle = ext4_journal_start(inode,
+ 					EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
+ 					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+-					2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)
++					EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)
+ 					+ 1);
+ 	if (IS_ERR(handle)) {
+ 		retval = PTR_ERR(handle);
+diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
+index 25b6b14..f5b03a1 100644
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -77,12 +77,14 @@ static int
+ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
+ 		      struct ext4_extent **extent)
+ {
++	struct ext4_extent_header *eh;
+ 	int ppos, leaf_ppos = path->p_depth;
+ 
+ 	ppos = leaf_ppos;
+ 	if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
+ 		/* leaf block */
+ 		*extent = ++path[ppos].p_ext;
++		path[ppos].p_block = ext_pblock(path[ppos].p_ext);
+ 		return 0;
+ 	}
+ 
+@@ -119,9 +121,18 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
+ 					ext_block_hdr(path[cur_ppos+1].p_bh);
+ 			}
+ 
++			path[leaf_ppos].p_ext = *extent = NULL;
++
++			eh = path[leaf_ppos].p_hdr;
++			if (le16_to_cpu(eh->eh_entries) == 0)
++				/* empty leaf is found */
++				return -ENODATA;
++
+ 			/* leaf block */
+ 			path[leaf_ppos].p_ext = *extent =
+ 				EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
++			path[leaf_ppos].p_block =
++					ext_pblock(path[leaf_ppos].p_ext);
+ 			return 0;
+ 		}
+ 	}
+@@ -155,40 +166,15 @@ mext_check_null_inode(struct inode *inode1, struct inode *inode2,
+ }
+ 
+ /**
+- * mext_double_down_read - Acquire two inodes' read semaphore
+- *
+- * @orig_inode:		original inode structure
+- * @donor_inode:	donor inode structure
+- * Acquire read semaphore of the two inodes (orig and donor) by i_ino order.
+- */
+-static void
+-mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode)
+-{
+-	struct inode *first = orig_inode, *second = donor_inode;
+-
+-	/*
+-	 * Use the inode number to provide the stable locking order instead
+-	 * of its address, because the C language doesn't guarantee you can
+-	 * compare pointers that don't come from the same array.
+-	 */
+-	if (donor_inode->i_ino < orig_inode->i_ino) {
+-		first = donor_inode;
+-		second = orig_inode;
+-	}
+-
+-	down_read(&EXT4_I(first)->i_data_sem);
+-	down_read(&EXT4_I(second)->i_data_sem);
+-}
+-
+-/**
+- * mext_double_down_write - Acquire two inodes' write semaphore
++ * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem
+  *
+  * @orig_inode:		original inode structure
+  * @donor_inode:	donor inode structure
+- * Acquire write semaphore of the two inodes (orig and donor) by i_ino order.
++ * Acquire write lock of i_data_sem of the two inodes (orig and donor) by
++ * i_ino order.
+  */
+ static void
+-mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
++double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
+ {
+ 	struct inode *first = orig_inode, *second = donor_inode;
+ 
+@@ -203,32 +189,18 @@ mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
+ 	}
+ 
+ 	down_write(&EXT4_I(first)->i_data_sem);
+-	down_write(&EXT4_I(second)->i_data_sem);
++	down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
+ }
+ 
+ /**
+- * mext_double_up_read - Release two inodes' read semaphore
++ * double_up_write_data_sem - Release two inodes' write lock of i_data_sem
+  *
+  * @orig_inode:		original inode structure to be released its lock first
+  * @donor_inode:	donor inode structure to be released its lock second
+- * Release read semaphore of two inodes (orig and donor).
++ * Release write lock of i_data_sem of two inodes (orig and donor).
+  */
+ static void
+-mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode)
+-{
+-	up_read(&EXT4_I(orig_inode)->i_data_sem);
+-	up_read(&EXT4_I(donor_inode)->i_data_sem);
+-}
+-
+-/**
+- * mext_double_up_write - Release two inodes' write semaphore
+- *
+- * @orig_inode:		original inode structure to be released its lock first
+- * @donor_inode:	donor inode structure to be released its lock second
+- * Release write semaphore of two inodes (orig and donor).
+- */
+-static void
+-mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode)
++double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
+ {
+ 	up_write(&EXT4_I(orig_inode)->i_data_sem);
+ 	up_write(&EXT4_I(donor_inode)->i_data_sem);
+@@ -661,6 +633,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
+  * @donor_inode:	donor inode
+  * @from:		block offset of orig_inode
+  * @count:		block count to be replaced
++ * @err:		pointer to save return value
+  *
+  * Replace original inode extents and donor inode extents page by page.
+  * We implement this replacement in the following three steps:
+@@ -671,33 +644,33 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
+  * 3. Change the block information of donor inode to point at the saved
+  *    original inode blocks in the dummy extents.
+  *
+- * Return 0 on success, or a negative error value on failure.
++ * Return replaced block count.
+  */
+ static int
+ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
+ 			   struct inode *donor_inode, ext4_lblk_t from,
+-			   ext4_lblk_t count)
++			   ext4_lblk_t count, int *err)
+ {
+ 	struct ext4_ext_path *orig_path = NULL;
+ 	struct ext4_ext_path *donor_path = NULL;
+ 	struct ext4_extent *oext, *dext;
+ 	struct ext4_extent tmp_dext, tmp_oext;
+ 	ext4_lblk_t orig_off = from, donor_off = from;
+-	int err = 0;
+ 	int depth;
+ 	int replaced_count = 0;
+ 	int dext_alen;
+ 
+-	mext_double_down_write(orig_inode, donor_inode);
++	/* Protect extent trees against block allocations via delalloc */
++	double_down_write_data_sem(orig_inode, donor_inode);
+ 
+ 	/* Get the original extent for the block "orig_off" */
+-	err = get_ext_path(orig_inode, orig_off, &orig_path);
+-	if (err)
++	*err = get_ext_path(orig_inode, orig_off, &orig_path);
++	if (*err)
+ 		goto out;
+ 
+ 	/* Get the donor extent for the head */
+-	err = get_ext_path(donor_inode, donor_off, &donor_path);
+-	if (err)
++	*err = get_ext_path(donor_inode, donor_off, &donor_path);
++	if (*err)
+ 		goto out;
+ 	depth = ext_depth(orig_inode);
+ 	oext = orig_path[depth].p_ext;
+@@ -707,9 +680,9 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
+ 	dext = donor_path[depth].p_ext;
+ 	tmp_dext = *dext;
+ 
+-	err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
++	*err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
+ 				      donor_off, count);
+-	if (err)
++	if (*err)
+ 		goto out;
+ 
+ 	/* Loop for the donor extents */
+@@ -718,7 +691,7 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
+ 		if (!dext) {
+ 			ext4_error(donor_inode->i_sb, __func__,
+ 				   "The extent for donor must be found");
+-			err = -EIO;
++			*err = -EIO;
+ 			goto out;
+ 		} else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) {
+ 			ext4_error(donor_inode->i_sb, __func__,
+@@ -726,20 +699,20 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
+ 				"extent(%u) should be equal",
+ 				donor_off,
+ 				le32_to_cpu(tmp_dext.ee_block));
+-			err = -EIO;
++			*err = -EIO;
+ 			goto out;
+ 		}
+ 
+ 		/* Set donor extent to orig extent */
+-		err = mext_leaf_block(handle, orig_inode,
++		*err = mext_leaf_block(handle, orig_inode,
+ 					   orig_path, &tmp_dext, &orig_off);
+-		if (err < 0)
++		if (*err)
+ 			goto out;
+ 
+ 		/* Set orig extent to donor extent */
+-		err = mext_leaf_block(handle, donor_inode,
++		*err = mext_leaf_block(handle, donor_inode,
+ 					   donor_path, &tmp_oext, &donor_off);
+-		if (err < 0)
++		if (*err)
+ 			goto out;
+ 
+ 		dext_alen = ext4_ext_get_actual_len(&tmp_dext);
+@@ -753,35 +726,25 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
+ 
+ 		if (orig_path)
+ 			ext4_ext_drop_refs(orig_path);
+-		err = get_ext_path(orig_inode, orig_off, &orig_path);
+-		if (err)
++		*err = get_ext_path(orig_inode, orig_off, &orig_path);
++		if (*err)
+ 			goto out;
+ 		depth = ext_depth(orig_inode);
+ 		oext = orig_path[depth].p_ext;
+-		if (le32_to_cpu(oext->ee_block) +
+-				ext4_ext_get_actual_len(oext) <= orig_off) {
+-			err = 0;
+-			goto out;
+-		}
+ 		tmp_oext = *oext;
+ 
+ 		if (donor_path)
+ 			ext4_ext_drop_refs(donor_path);
+-		err = get_ext_path(donor_inode, donor_off, &donor_path);
+-		if (err)
++		*err = get_ext_path(donor_inode, donor_off, &donor_path);
++		if (*err)
+ 			goto out;
+ 		depth = ext_depth(donor_inode);
+ 		dext = donor_path[depth].p_ext;
+-		if (le32_to_cpu(dext->ee_block) +
+-				ext4_ext_get_actual_len(dext) <= donor_off) {
+-			err = 0;
+-			goto out;
+-		}
+ 		tmp_dext = *dext;
+ 
+-		err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
++		*err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
+ 					   donor_off, count - replaced_count);
+-		if (err)
++		if (*err)
+ 			goto out;
+ 	}
+ 
+@@ -795,8 +758,12 @@ out:
+ 		kfree(donor_path);
+ 	}
+ 
+-	mext_double_up_write(orig_inode, donor_inode);
+-	return err;
++	ext4_ext_invalidate_cache(orig_inode);
++	ext4_ext_invalidate_cache(donor_inode);
++
++	double_up_write_data_sem(orig_inode, donor_inode);
++
++	return replaced_count;
+ }
+ 
+ /**
+@@ -808,16 +775,17 @@ out:
+  * @data_offset_in_page:	block index where data swapping starts
+  * @block_len_in_page:		the number of blocks to be swapped
+  * @uninit:			orig extent is uninitialized or not
++ * @err:			pointer to save return value
+  *
+  * Save the data in original inode blocks and replace original inode extents
+  * with donor inode extents by calling mext_replace_branches().
+- * Finally, write out the saved data in new original inode blocks. Return 0
+- * on success, or a negative error value on failure.
++ * Finally, write out the saved data in new original inode blocks. Return
++ * replaced block count.
+  */
+ static int
+ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
+ 		  pgoff_t orig_page_offset, int data_offset_in_page,
+-		  int block_len_in_page, int uninit)
++		  int block_len_in_page, int uninit, int *err)
+ {
+ 	struct inode *orig_inode = o_filp->f_dentry->d_inode;
+ 	struct address_space *mapping = orig_inode->i_mapping;
+@@ -829,9 +797,11 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
+ 	long long offs = orig_page_offset << PAGE_CACHE_SHIFT;
+ 	unsigned long blocksize = orig_inode->i_sb->s_blocksize;
+ 	unsigned int w_flags = 0;
+-	unsigned int tmp_data_len, data_len;
++	unsigned int tmp_data_size, data_size, replaced_size;
+ 	void *fsdata;
+-	int ret, i, jblocks;
++	int i, jblocks;
++	int err2 = 0;
++	int replaced_count = 0;
+ 	int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
+ 
+ 	/*
+@@ -841,8 +811,8 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
+ 	jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
+ 	handle = ext4_journal_start(orig_inode, jblocks);
+ 	if (IS_ERR(handle)) {
+-		ret = PTR_ERR(handle);
+-		return ret;
++		*err = PTR_ERR(handle);
++		return 0;
+ 	}
+ 
+ 	if (segment_eq(get_fs(), KERNEL_DS))
+@@ -858,39 +828,36 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
+ 	 * Just swap data blocks between orig and donor.
+ 	 */
+ 	if (uninit) {
+-		ret = mext_replace_branches(handle, orig_inode,
+-						 donor_inode, orig_blk_offset,
+-						 block_len_in_page);
+-
+-		/* Clear the inode cache not to refer to the old data */
+-		ext4_ext_invalidate_cache(orig_inode);
+-		ext4_ext_invalidate_cache(donor_inode);
++		replaced_count = mext_replace_branches(handle, orig_inode,
++						donor_inode, orig_blk_offset,
++						block_len_in_page, err);
+ 		goto out2;
+ 	}
+ 
+ 	offs = (long long)orig_blk_offset << orig_inode->i_blkbits;
+ 
+-	/* Calculate data_len */
++	/* Calculate data_size */
+ 	if ((orig_blk_offset + block_len_in_page - 1) ==
+ 	    ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
+ 		/* Replace the last block */
+-		tmp_data_len = orig_inode->i_size & (blocksize - 1);
++		tmp_data_size = orig_inode->i_size & (blocksize - 1);
+ 		/*
+-		 * If data_len equal zero, it shows data_len is multiples of
++		 * If data_size equal zero, it shows data_size is multiples of
+ 		 * blocksize. So we set appropriate value.
+ 		 */
+-		if (tmp_data_len == 0)
+-			tmp_data_len = blocksize;
++		if (tmp_data_size == 0)
++			tmp_data_size = blocksize;
+ 
+-		data_len = tmp_data_len +
++		data_size = tmp_data_size +
+ 			((block_len_in_page - 1) << orig_inode->i_blkbits);
+-	} else {
+-		data_len = block_len_in_page << orig_inode->i_blkbits;
+-	}
++	} else
++		data_size = block_len_in_page << orig_inode->i_blkbits;
++
++	replaced_size = data_size;
+ 
+-	ret = a_ops->write_begin(o_filp, mapping, offs, data_len, w_flags,
++	*err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags,
+ 				 &page, &fsdata);
+-	if (unlikely(ret < 0))
++	if (unlikely(*err < 0))
+ 		goto out;
+ 
+ 	if (!PageUptodate(page)) {
+@@ -911,14 +878,17 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
+ 	/* Release old bh and drop refs */
+ 	try_to_release_page(page, 0);
+ 
+-	ret = mext_replace_branches(handle, orig_inode, donor_inode,
+-					 orig_blk_offset, block_len_in_page);
+-	if (ret < 0)
+-		goto out;
+-
+-	/* Clear the inode cache not to refer to the old data */
+-	ext4_ext_invalidate_cache(orig_inode);
+-	ext4_ext_invalidate_cache(donor_inode);
++	replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
++					orig_blk_offset, block_len_in_page,
++					&err2);
++	if (err2) {
++		if (replaced_count) {
++			block_len_in_page = replaced_count;
++			replaced_size =
++				block_len_in_page << orig_inode->i_blkbits;
++		} else
++			goto out;
++	}
+ 
+ 	if (!page_has_buffers(page))
+ 		create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0);
+@@ -928,16 +898,16 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
+ 		bh = bh->b_this_page;
+ 
+ 	for (i = 0; i < block_len_in_page; i++) {
+-		ret = ext4_get_block(orig_inode,
++		*err = ext4_get_block(orig_inode,
+ 				(sector_t)(orig_blk_offset + i), bh, 0);
+-		if (ret < 0)
++		if (*err < 0)
+ 			goto out;
+ 
+ 		if (bh->b_this_page != NULL)
+ 			bh = bh->b_this_page;
+ 	}
+ 
+-	ret = a_ops->write_end(o_filp, mapping, offs, data_len, data_len,
++	*err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size,
+ 			       page, fsdata);
+ 	page = NULL;
+ 
+@@ -951,7 +921,10 @@ out:
+ out2:
+ 	ext4_journal_stop(handle);
+ 
+-	return ret < 0 ? ret : 0;
++	if (err2)
++		*err = err2;
++
++	return replaced_count;
+ }
+ 
+ /**
+@@ -962,7 +935,6 @@ out2:
+  * @orig_start:		logical start offset in block for orig
+  * @donor_start:	logical start offset in block for donor
+  * @len:		the number of blocks to be moved
+- * @moved_len:		moved block length
+  *
+  * Check the arguments of ext4_move_extents() whether the files can be
+  * exchanged with each other.
+@@ -970,8 +942,8 @@ out2:
+  */
+ static int
+ mext_check_arguments(struct inode *orig_inode,
+-			  struct inode *donor_inode, __u64 orig_start,
+-			  __u64 donor_start, __u64 *len, __u64 moved_len)
++		     struct inode *donor_inode, __u64 orig_start,
++		     __u64 donor_start, __u64 *len)
+ {
+ 	ext4_lblk_t orig_blocks, donor_blocks;
+ 	unsigned int blkbits = orig_inode->i_blkbits;
+@@ -985,6 +957,13 @@ mext_check_arguments(struct inode *orig_inode,
+ 		return -EINVAL;
+ 	}
+ 
++	if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
++		ext4_debug("ext4 move extent: suid or sgid is set"
++			   " to donor file [ino:orig %lu, donor %lu]\n",
++			   orig_inode->i_ino, donor_inode->i_ino);
++		return -EINVAL;
++	}
++
+ 	/* Ext4 move extent does not support swapfile */
+ 	if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
+ 		ext4_debug("ext4 move extent: The argument files should "
+@@ -1025,13 +1004,6 @@ mext_check_arguments(struct inode *orig_inode,
+ 		return -EINVAL;
+ 	}
+ 
+-	if (moved_len) {
+-		ext4_debug("ext4 move extent: moved_len should be 0 "
+-			"[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
+-			donor_inode->i_ino);
+-		return -EINVAL;
+-	}
+-
+ 	if ((orig_start > EXT_MAX_BLOCK) ||
+ 	    (donor_start > EXT_MAX_BLOCK) ||
+ 	    (*len > EXT_MAX_BLOCK) ||
+@@ -1232,16 +1204,16 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
+ 		return -EINVAL;
+ 	}
+ 
+-	/* protect orig and donor against a truncate */
++	/* Protect orig and donor inodes against a truncate */
+ 	ret1 = mext_inode_double_lock(orig_inode, donor_inode);
+ 	if (ret1 < 0)
+ 		return ret1;
+ 
+-	mext_double_down_read(orig_inode, donor_inode);
++	/* Protect extent tree against block allocations via delalloc */
++	double_down_write_data_sem(orig_inode, donor_inode);
+ 	/* Check the filesystem environment whether move_extent can be done */
+ 	ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start,
+-					donor_start, &len, *moved_len);
+-	mext_double_up_read(orig_inode, donor_inode);
++				    donor_start, &len);
+ 	if (ret1)
+ 		goto out;
+ 
+@@ -1355,36 +1327,39 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
+ 		seq_start = le32_to_cpu(ext_cur->ee_block);
+ 		rest_blocks = seq_blocks;
+ 
+-		/* Discard preallocations of two inodes */
+-		down_write(&EXT4_I(orig_inode)->i_data_sem);
+-		ext4_discard_preallocations(orig_inode);
+-		up_write(&EXT4_I(orig_inode)->i_data_sem);
+-
+-		down_write(&EXT4_I(donor_inode)->i_data_sem);
+-		ext4_discard_preallocations(donor_inode);
+-		up_write(&EXT4_I(donor_inode)->i_data_sem);
++		/*
++		 * Up semaphore to avoid following problems:
++		 * a. transaction deadlock among ext4_journal_start,
++		 *    ->write_begin via pagefault, and jbd2_journal_commit
++		 * b. racing with ->readpage, ->write_begin, and ext4_get_block
++		 *    in move_extent_per_page
++		 */
++		double_up_write_data_sem(orig_inode, donor_inode);
+ 
+ 		while (orig_page_offset <= seq_end_page) {
+ 
+ 			/* Swap original branches with new branches */
+-			ret1 = move_extent_per_page(o_filp, donor_inode,
++			block_len_in_page = move_extent_per_page(
++						o_filp, donor_inode,
+ 						orig_page_offset,
+ 						data_offset_in_page,
+-						block_len_in_page, uninit);
+-			if (ret1 < 0)
+-				goto out;
+-			orig_page_offset++;
++						block_len_in_page, uninit,
++						&ret1);
++
+ 			/* Count how many blocks we have exchanged */
+ 			*moved_len += block_len_in_page;
++			if (ret1 < 0)
++				break;
+ 			if (*moved_len > len) {
+ 				ext4_error(orig_inode->i_sb, __func__,
+ 					"We replaced blocks too much! "
+ 					"sum of replaced: %llu requested: %llu",
+ 					*moved_len, len);
+ 				ret1 = -EIO;
+-				goto out;
++				break;
+ 			}
+ 
++			orig_page_offset++;
+ 			data_offset_in_page = 0;
+ 			rest_blocks -= block_len_in_page;
+ 			if (rest_blocks > blocks_per_page)
+@@ -1393,6 +1368,10 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
+ 				block_len_in_page = rest_blocks;
+ 		}
+ 
++		double_down_write_data_sem(orig_inode, donor_inode);
++		if (ret1 < 0)
++			break;
++
+ 		/* Decrease buffer counter */
+ 		if (holecheck_path)
+ 			ext4_ext_drop_refs(holecheck_path);
+@@ -1414,6 +1393,11 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
+ 
+ 	}
+ out:
++	if (*moved_len) {
++		ext4_discard_preallocations(orig_inode);
++		ext4_discard_preallocations(donor_inode);
++	}
++
+ 	if (orig_path) {
+ 		ext4_ext_drop_refs(orig_path);
+ 		kfree(orig_path);
+@@ -1422,7 +1406,7 @@ out:
+ 		ext4_ext_drop_refs(holecheck_path);
+ 		kfree(holecheck_path);
+ 	}
+-
++	double_up_write_data_sem(orig_inode, donor_inode);
+ 	ret2 = mext_inode_double_unlock(orig_inode, donor_inode);
+ 
+ 	if (ret1)
+diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
+index 6d2c1b8..17a17e1 100644
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -1292,9 +1292,6 @@ errout:
+  * add_dirent_to_buf will attempt search the directory block for
+  * space.  It will return -ENOSPC if no space is available, and -EIO
+  * and -EEXIST if directory entry already exists.
+- *
+- * NOTE!  bh is NOT released in the case where ENOSPC is returned.  In
+- * all other cases bh is released.
+  */
+ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
+ 			     struct inode *inode, struct ext4_dir_entry_2 *de,
+@@ -1315,14 +1312,10 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
+ 		top = bh->b_data + blocksize - reclen;
+ 		while ((char *) de <= top) {
+ 			if (!ext4_check_dir_entry("ext4_add_entry", dir, de,
+-						  bh, offset)) {
+-				brelse(bh);
++						  bh, offset))
+ 				return -EIO;
+-			}
+-			if (ext4_match(namelen, name, de)) {
+-				brelse(bh);
++			if (ext4_match(namelen, name, de))
+ 				return -EEXIST;
+-			}
+ 			nlen = EXT4_DIR_REC_LEN(de->name_len);
+ 			rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
+ 			if ((de->inode? rlen - nlen: rlen) >= reclen)
+@@ -1337,7 +1330,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
+ 	err = ext4_journal_get_write_access(handle, bh);
+ 	if (err) {
+ 		ext4_std_error(dir->i_sb, err);
+-		brelse(bh);
+ 		return err;
+ 	}
+ 
+@@ -1377,7 +1369,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
+ 	err = ext4_handle_dirty_metadata(handle, dir, bh);
+ 	if (err)
+ 		ext4_std_error(dir->i_sb, err);
+-	brelse(bh);
+ 	return 0;
+ }
+ 
+@@ -1471,7 +1462,9 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
+ 	if (!(de))
+ 		return retval;
+ 
+-	return add_dirent_to_buf(handle, dentry, inode, de, bh);
++	retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
++	brelse(bh);
++	return retval;
+ }
+ 
+ /*
+@@ -1514,8 +1507,10 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
+ 		if(!bh)
+ 			return retval;
+ 		retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
+-		if (retval != -ENOSPC)
++		if (retval != -ENOSPC) {
++			brelse(bh);
+ 			return retval;
++		}
+ 
+ 		if (blocks == 1 && !dx_fallback &&
+ 		    EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX))
+@@ -1528,7 +1523,9 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
+ 	de = (struct ext4_dir_entry_2 *) bh->b_data;
+ 	de->inode = 0;
+ 	de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize);
+-	return add_dirent_to_buf(handle, dentry, inode, de, bh);
++	retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
++	brelse(bh);
++	return retval;
+ }
+ 
+ /*
+@@ -1561,10 +1558,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
+ 		goto journal_error;
+ 
+ 	err = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
+-	if (err != -ENOSPC) {
+-		bh = NULL;
++	if (err != -ENOSPC)
+ 		goto cleanup;
+-	}
+ 
+ 	/* Block full, should compress but for now just split */
+ 	dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
+@@ -1657,7 +1652,6 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
+ 	if (!de)
+ 		goto cleanup;
+ 	err = add_dirent_to_buf(handle, dentry, inode, de, bh);
+-	bh = NULL;
+ 	goto cleanup;
+ 
+ journal_error:
+@@ -1775,7 +1769,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, int mode,
+ retry:
+ 	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+ 					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+-					2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
++					EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
+ 	if (IS_ERR(handle))
+ 		return PTR_ERR(handle);
+ 
+@@ -1809,7 +1803,7 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
+ retry:
+ 	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+ 					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+-					2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
++					EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
+ 	if (IS_ERR(handle))
+ 		return PTR_ERR(handle);
+ 
+@@ -1846,7 +1840,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+ retry:
+ 	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+ 					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+-					2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
++					EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
+ 	if (IS_ERR(handle))
+ 		return PTR_ERR(handle);
+ 
+@@ -2259,7 +2253,7 @@ static int ext4_symlink(struct inode *dir,
+ retry:
+ 	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+ 					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 +
+-					2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
++					EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
+ 	if (IS_ERR(handle))
+ 		return PTR_ERR(handle);
+ 
+diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
+index 3cfc343..3b2c554 100644
+--- a/fs/ext4/resize.c
++++ b/fs/ext4/resize.c
+@@ -247,7 +247,7 @@ static int setup_new_group_blocks(struct super_block *sb,
+ 			goto exit_bh;
+ 
+ 		if (IS_ERR(gdb = bclean(handle, sb, block))) {
+-			err = PTR_ERR(bh);
++			err = PTR_ERR(gdb);
+ 			goto exit_bh;
+ 		}
+ 		ext4_handle_dirty_metadata(handle, NULL, gdb);
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c
+index d4ca92a..9ae5217 100644
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -603,10 +603,6 @@ static void ext4_put_super(struct super_block *sb)
+ 	if (sb->s_dirt)
+ 		ext4_commit_super(sb, 1);
+ 
+-	ext4_release_system_zone(sb);
+-	ext4_mb_release(sb);
+-	ext4_ext_release(sb);
+-	ext4_xattr_put_super(sb);
+ 	if (sbi->s_journal) {
+ 		err = jbd2_journal_destroy(sbi->s_journal);
+ 		sbi->s_journal = NULL;
+@@ -614,6 +610,12 @@ static void ext4_put_super(struct super_block *sb)
+ 			ext4_abort(sb, __func__,
+ 				   "Couldn't clean up the journal");
+ 	}
++
++	ext4_release_system_zone(sb);
++	ext4_mb_release(sb);
++	ext4_ext_release(sb);
++	ext4_xattr_put_super(sb);
++
+ 	if (!(sb->s_flags & MS_RDONLY)) {
+ 		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+ 		es->s_state = cpu_to_le16(sbi->s_mount_state);
+@@ -704,6 +706,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
+ 	spin_lock_init(&(ei->i_block_reservation_lock));
+ 	INIT_LIST_HEAD(&ei->i_aio_dio_complete_list);
+ 	ei->cur_aio_dio = NULL;
++	ei->i_sync_tid = 0;
++	ei->i_datasync_tid = 0;
+ 
+ 	return &ei->vfs_inode;
+ }
+@@ -899,6 +903,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
+ 	if (test_opt(sb, NO_AUTO_DA_ALLOC))
+ 		seq_puts(seq, ",noauto_da_alloc");
+ 
++	if (test_opt(sb, DISCARD))
++		seq_puts(seq, ",discard");
++
++	if (test_opt(sb, NOLOAD))
++		seq_puts(seq, ",norecovery");
++
+ 	ext4_show_quota_options(seq, sb);
+ 
+ 	return 0;
+@@ -1079,7 +1089,8 @@ enum {
+ 	Opt_usrquota, Opt_grpquota, Opt_i_version,
+ 	Opt_stripe, Opt_delalloc, Opt_nodelalloc,
+ 	Opt_block_validity, Opt_noblock_validity,
+-	Opt_inode_readahead_blks, Opt_journal_ioprio
++	Opt_inode_readahead_blks, Opt_journal_ioprio,
++	Opt_discard, Opt_nodiscard,
+ };
+ 
+ static const match_table_t tokens = {
+@@ -1104,6 +1115,7 @@ static const match_table_t tokens = {
+ 	{Opt_acl, "acl"},
+ 	{Opt_noacl, "noacl"},
+ 	{Opt_noload, "noload"},
++	{Opt_noload, "norecovery"},
+ 	{Opt_nobh, "nobh"},
+ 	{Opt_bh, "bh"},
+ 	{Opt_commit, "commit=%u"},
+@@ -1144,6 +1156,8 @@ static const match_table_t tokens = {
+ 	{Opt_auto_da_alloc, "auto_da_alloc=%u"},
+ 	{Opt_auto_da_alloc, "auto_da_alloc"},
+ 	{Opt_noauto_da_alloc, "noauto_da_alloc"},
++	{Opt_discard, "discard"},
++	{Opt_nodiscard, "nodiscard"},
+ 	{Opt_err, NULL},
+ };
+ 
+@@ -1565,6 +1579,12 @@ set_qf_format:
+ 			else
+ 				set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
+ 			break;
++		case Opt_discard:
++			set_opt(sbi->s_mount_opt, DISCARD);
++			break;
++		case Opt_nodiscard:
++			clear_opt(sbi->s_mount_opt, DISCARD);
++			break;
+ 		default:
+ 			ext4_msg(sb, KERN_ERR,
+ 			       "Unrecognized mount option \"%s\" "
+@@ -1673,14 +1693,14 @@ static int ext4_fill_flex_info(struct super_block *sb)
+ 	size_t size;
+ 	int i;
+ 
+-	if (!sbi->s_es->s_log_groups_per_flex) {
++	sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
++	groups_per_flex = 1 << sbi->s_log_groups_per_flex;
++
++	if (groups_per_flex < 2) {
+ 		sbi->s_log_groups_per_flex = 0;
+ 		return 1;
+ 	}
+ 
+-	sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
+-	groups_per_flex = 1 << sbi->s_log_groups_per_flex;
+-
+ 	/* We allocate both existing and potentially added groups */
+ 	flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
+ 			((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
+@@ -3668,13 +3688,11 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
+ 	buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
+ 	buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
+ 		       percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
+-	ext4_free_blocks_count_set(es, buf->f_bfree);
+ 	buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
+ 	if (buf->f_bfree < ext4_r_blocks_count(es))
+ 		buf->f_bavail = 0;
+ 	buf->f_files = le32_to_cpu(es->s_inodes_count);
+ 	buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
+-	es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
+ 	buf->f_namelen = EXT4_NAME_LEN;
+ 	fsid = le64_to_cpup((void *)es->s_uuid) ^
+ 	       le64_to_cpup((void *)es->s_uuid + sizeof(u64));
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index fed5b01..0257019 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -988,6 +988,10 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
+ 	if (error)
+ 		goto cleanup;
+ 
++	error = ext4_journal_get_write_access(handle, is.iloc.bh);
++	if (error)
++		goto cleanup;
++
+ 	if (EXT4_I(inode)->i_state & EXT4_STATE_NEW) {
+ 		struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc);
+ 		memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
+@@ -1013,9 +1017,6 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
+ 		if (flags & XATTR_CREATE)
+ 			goto cleanup;
+ 	}
+-	error = ext4_journal_get_write_access(handle, is.iloc.bh);
+-	if (error)
+-		goto cleanup;
+ 	if (!value) {
+ 		if (!is.s.not_found)
+ 			error = ext4_xattr_ibody_set(handle, inode, &i, &is);
+diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
+index d4cfd6d..8896c1d 100644
+--- a/fs/jbd2/commit.c
++++ b/fs/jbd2/commit.c
+@@ -636,6 +636,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
+ 		JBUFFER_TRACE(jh, "ph3: write metadata");
+ 		flags = jbd2_journal_write_metadata_buffer(commit_transaction,
+ 						      jh, &new_jh, blocknr);
++		if (flags < 0) {
++			jbd2_journal_abort(journal, flags);
++			continue;
++		}
+ 		set_bit(BH_JWrite, &jh2bh(new_jh)->b_state);
+ 		wbuf[bufs++] = jh2bh(new_jh);
+ 
+diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
+index fed8538..82c295d 100644
+--- a/fs/jbd2/journal.c
++++ b/fs/jbd2/journal.c
+@@ -78,6 +78,7 @@ EXPORT_SYMBOL(jbd2_journal_errno);
+ EXPORT_SYMBOL(jbd2_journal_ack_err);
+ EXPORT_SYMBOL(jbd2_journal_clear_err);
+ EXPORT_SYMBOL(jbd2_log_wait_commit);
++EXPORT_SYMBOL(jbd2_log_start_commit);
+ EXPORT_SYMBOL(jbd2_journal_start_commit);
+ EXPORT_SYMBOL(jbd2_journal_force_commit_nested);
+ EXPORT_SYMBOL(jbd2_journal_wipe);
+@@ -358,6 +359,10 @@ repeat:
+ 
+ 		jbd_unlock_bh_state(bh_in);
+ 		tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
++		if (!tmp) {
++			jbd2_journal_put_journal_head(new_jh);
++			return -ENOMEM;
++		}
+ 		jbd_lock_bh_state(bh_in);
+ 		if (jh_in->b_frozen_data) {
+ 			jbd2_free(tmp, bh_in->b_size);
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 75e6e60..0f67914 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -2086,11 +2086,18 @@ static inline int is_si_special(const struct siginfo *info)
+ 	return info <= SEND_SIG_FORCED;
+ }
+ 
+-/* True if we are on the alternate signal stack.  */
+-
++/*
++ * True if we are on the alternate signal stack.
++ */
+ static inline int on_sig_stack(unsigned long sp)
+ {
+-	return (sp - current->sas_ss_sp < current->sas_ss_size);
++#ifdef CONFIG_STACK_GROWSUP
++	return sp >= current->sas_ss_sp &&
++		sp - current->sas_ss_sp < current->sas_ss_size;
++#else
++	return sp > current->sas_ss_sp &&
++		sp - current->sas_ss_sp <= current->sas_ss_size;
++#endif
+ }
+ 
+ static inline int sas_ss_flags(unsigned long sp)
+diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
+index 47941fc..0b4baba 100644
+--- a/include/scsi/scsi_host.h
++++ b/include/scsi/scsi_host.h
+@@ -677,6 +677,12 @@ struct Scsi_Host {
+ 	void *shost_data;
+ 
+ 	/*
++	 * Points to the physical bus device we'd use to do DMA
++	 * Needed just in case we have virtual hosts.
++	 */
++	struct device *dma_dev;
++
++	/*
+ 	 * We should ensure that this is aligned, both for better performance
+ 	 * and also because some compilers (m68k) don't automatically force
+ 	 * alignment to a long boundary.
+@@ -720,7 +726,9 @@ extern int scsi_queue_work(struct Scsi_Host *, struct work_struct *);
+ extern void scsi_flush_work(struct Scsi_Host *);
+ 
+ extern struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *, int);
+-extern int __must_check scsi_add_host(struct Scsi_Host *, struct device *);
++extern int __must_check scsi_add_host_with_dma(struct Scsi_Host *,
++					       struct device *,
++					       struct device *);
+ extern void scsi_scan_host(struct Scsi_Host *);
+ extern void scsi_rescan_device(struct device *);
+ extern void scsi_remove_host(struct Scsi_Host *);
+@@ -731,6 +739,12 @@ extern const char *scsi_host_state_name(enum scsi_host_state);
+ 
+ extern u64 scsi_calculate_bounce_limit(struct Scsi_Host *);
+ 
++static inline int __must_check scsi_add_host(struct Scsi_Host *host,
++					     struct device *dev)
++{
++	return scsi_add_host_with_dma(host, dev, dev);
++}
++
+ static inline struct device *scsi_get_device(struct Scsi_Host *shost)
+ {
+         return shost->shost_gendev.parent;

Modified: dists/trunk/linux-2.6/debian/patches/series/2
==============================================================================
--- dists/trunk/linux-2.6/debian/patches/series/2	Mon Dec 14 02:03:57 2009	(r14785)
+++ dists/trunk/linux-2.6/debian/patches/series/2	Mon Dec 14 22:27:57 2009	(r14786)
@@ -1,3 +1,4 @@
 + features/all/aufs2/aufs2-20091205.patch
 + bugfix/all/atl1c-use-common_task-instead-of-reset_task-and-link.patch
 + bugfix/all/netfilter-xtables-fix-conntrack-match-v1-ipt-save-output.patch
++ bugfix/all/stable/2.6.32.1.patch