[kernel] r17588 - in dists/sid/linux-2.6/debian: . patches/features/all/aufs2 patches/series
Ben Hutchings
benh at alioth.debian.org
Fri Jun 3 05:30:23 UTC 2011
Author: benh
Date: Fri Jun 3 05:30:22 2011
New Revision: 17588
Log:
aufs: Update for 2.6.39 (Closes: #627837)
Deleted:
dists/sid/linux-2.6/debian/patches/features/all/aufs2/Fix-aufs-calling-of-security_path_mknod.patch
Modified:
dists/sid/linux-2.6/debian/changelog
dists/sid/linux-2.6/debian/patches/features/all/aufs2/aufs2-add.patch
dists/sid/linux-2.6/debian/patches/features/all/aufs2/aufs2-base.patch
dists/sid/linux-2.6/debian/patches/features/all/aufs2/aufs2-kbuild.patch
dists/sid/linux-2.6/debian/patches/features/all/aufs2/aufs2-standalone.patch
dists/sid/linux-2.6/debian/patches/series/2
dists/sid/linux-2.6/debian/patches/series/base
Modified: dists/sid/linux-2.6/debian/changelog
==============================================================================
--- dists/sid/linux-2.6/debian/changelog Fri Jun 3 04:21:35 2011 (r17587)
+++ dists/sid/linux-2.6/debian/changelog Fri Jun 3 05:30:22 2011 (r17588)
@@ -12,6 +12,7 @@
'Celeron M' models do not.
* Update Swedish debconf template translation (Martin Bagge)
(Closes: #628932)
+ * aufs: Update for 2.6.39 (Closes: #627837)
[ maximilian attems ]
* Add stable 2.6.39.1-rc1, including:
Modified: dists/sid/linux-2.6/debian/patches/features/all/aufs2/aufs2-add.patch
==============================================================================
--- dists/sid/linux-2.6/debian/patches/features/all/aufs2/aufs2-add.patch Fri Jun 3 04:21:35 2011 (r17587)
+++ dists/sid/linux-2.6/debian/patches/features/all/aufs2/aufs2-add.patch Fri Jun 3 05:30:22 2011 (r17588)
@@ -1,6 +1,5 @@
-diff -urN a/fs/aufs/Kconfig b/fs/aufs/Kconfig
--- a/fs/aufs/Kconfig 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/Kconfig 2011-03-06 23:28:02.616413258 +0000
++++ b/fs/aufs/Kconfig 2011-06-03 06:08:42.677958173 +0100
@@ -0,0 +1,180 @@
+config AUFS_FS
+ tristate "Aufs (Advanced multi layered unification filesystem) support"
@@ -76,7 +75,7 @@
+
+config AUFS_EXPORT
+ bool "NFS-exportable aufs"
-+ depends on (AUFS_FS = y && EXPORTFS = y) || (AUFS_FS = m && EXPORTFS)
++ depends on EXPORTFS
+ help
+ If you want to export your mounted aufs via NFS, then enable this
+ option. There are several requirements for this configuration.
@@ -182,9 +181,8 @@
+ Automatic configuration for internal use.
+ When aufs supports Magic SysRq, enabled automatically.
+endif
-diff -urN a/fs/aufs/Makefile b/fs/aufs/Makefile
--- a/fs/aufs/Makefile 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/Makefile 2011-03-06 23:28:02.620413138 +0000
++++ b/fs/aufs/Makefile 2011-02-12 16:30:08.940114159 +0000
@@ -0,0 +1,38 @@
+
+include ${src}/magic.mk
@@ -224,9 +222,8 @@
+aufs-$(CONFIG_AUFS_BR_HFSPLUS) += hfsplus.o
+aufs-$(CONFIG_AUFS_DEBUG) += debug.o
+aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o
-diff -urN a/fs/aufs/aufs.h b/fs/aufs/aufs.h
--- a/fs/aufs/aufs.h 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/aufs.h 2011-03-06 23:28:02.616413258 +0000
++++ b/fs/aufs/aufs.h 2011-02-12 16:30:08.940114159 +0000
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -289,9 +286,8 @@
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_H__ */
-diff -urN a/fs/aufs/branch.c b/fs/aufs/branch.c
--- a/fs/aufs/branch.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/branch.c 2011-03-06 23:28:02.616413258 +0000
++++ b/fs/aufs/branch.c 2011-03-06 23:22:01.408413001 +0000
@@ -0,0 +1,1160 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -1453,9 +1449,8 @@
+ AuTraceErr(err);
+ return err;
+}
-diff -urN a/fs/aufs/branch.h b/fs/aufs/branch.h
--- a/fs/aufs/branch.h 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/branch.h 2011-03-06 23:28:02.616413258 +0000
++++ b/fs/aufs/branch.h 2011-03-06 23:22:01.408413001 +0000
@@ -0,0 +1,233 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -1690,9 +1685,8 @@
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_BRANCH_H__ */
-diff -urN a/fs/aufs/conf.mk b/fs/aufs/conf.mk
--- a/fs/aufs/conf.mk 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/conf.mk 2011-03-06 23:28:02.620413138 +0000
++++ b/fs/aufs/conf.mk 2011-02-12 16:30:08.940114159 +0000
@@ -0,0 +1,37 @@
+
+AuConfStr = CONFIG_AUFS_FS=${CONFIG_AUFS_FS}
@@ -1731,9 +1725,8 @@
+${obj}/sysfs.o: ${AuConfName}
+
+-include ${srctree}/${src}/conf_priv.mk
-diff -urN a/fs/aufs/cpup.c b/fs/aufs/cpup.c
--- a/fs/aufs/cpup.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/cpup.c 2011-03-06 23:28:02.620413138 +0000
++++ b/fs/aufs/cpup.c 2011-02-12 16:30:08.940114159 +0000
@@ -0,0 +1,1063 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -2798,9 +2791,8 @@
+ dput(parent);
+ return err;
+}
-diff -urN a/fs/aufs/cpup.h b/fs/aufs/cpup.h
--- a/fs/aufs/cpup.h 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/cpup.h 2011-03-06 23:28:02.620413138 +0000
++++ b/fs/aufs/cpup.h 2011-02-12 16:30:08.940114159 +0000
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -2885,9 +2877,8 @@
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_CPUP_H__ */
-diff -urN a/fs/aufs/dbgaufs.c b/fs/aufs/dbgaufs.c
--- a/fs/aufs/dbgaufs.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/dbgaufs.c 2011-03-06 23:28:02.620413138 +0000
++++ b/fs/aufs/dbgaufs.c 2011-02-12 16:30:08.940114159 +0000
@@ -0,0 +1,334 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -3223,9 +3214,8 @@
+ err = 0;
+ return err;
+}
-diff -urN a/fs/aufs/dbgaufs.h b/fs/aufs/dbgaufs.h
--- a/fs/aufs/dbgaufs.h 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/dbgaufs.h 2011-03-06 23:28:02.620413138 +0000
++++ b/fs/aufs/dbgaufs.h 2011-02-12 16:30:08.940114159 +0000
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -3279,9 +3269,8 @@
+
+#endif /* __KERNEL__ */
+#endif /* __DBGAUFS_H__ */
-diff -urN a/fs/aufs/dcsub.c b/fs/aufs/dcsub.c
--- a/fs/aufs/dcsub.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/dcsub.c 2011-03-06 23:28:02.620413138 +0000
++++ b/fs/aufs/dcsub.c 2011-03-06 23:22:01.408413001 +0000
@@ -0,0 +1,243 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -3526,9 +3515,8 @@
+
+ return path_is_under(path + 0, path + 1);
+}
-diff -urN a/fs/aufs/dcsub.h b/fs/aufs/dcsub.h
--- a/fs/aufs/dcsub.h 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/dcsub.h 2011-03-06 23:28:02.620413138 +0000
++++ b/fs/aufs/dcsub.h 2011-02-12 16:30:08.940114159 +0000
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -3625,10 +3613,9 @@
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_DCSUB_H__ */
-diff -urN a/fs/aufs/debug.c b/fs/aufs/debug.c
--- a/fs/aufs/debug.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/debug.c 2011-03-06 23:28:02.616413258 +0000
-@@ -0,0 +1,469 @@
++++ b/fs/aufs/debug.c 2011-06-03 06:08:42.677958173 +0100
+@@ -0,0 +1,479 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
+ *
@@ -3764,6 +3751,16 @@
+ iinfo->ii_hinode[0 + bindex].hi_whdentry);
+}
+
++void au_dpri_dalias(struct inode *inode)
++{
++ struct dentry *d;
++
++ spin_lock(&inode->i_lock);
++ list_for_each_entry(d, &inode->i_dentry, d_alias)
++ au_dpri_dentry(d);
++ spin_unlock(&inode->i_lock);
++}
++
+static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry)
+{
+ struct dentry *wh = NULL;
@@ -4098,10 +4095,9 @@
+
+ return 0;
+}
-diff -urN a/fs/aufs/debug.h b/fs/aufs/debug.h
--- a/fs/aufs/debug.h 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/debug.h 2011-03-06 23:28:02.616413258 +0000
-@@ -0,0 +1,245 @@
++++ b/fs/aufs/debug.h 2011-06-03 06:08:42.677958173 +0100
+@@ -0,0 +1,252 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
+ *
@@ -4226,6 +4222,7 @@
+void au_dpri_vdir(struct au_vdir *vdir);
+struct inode;
+void au_dpri_inode(struct inode *inode);
++void au_dpri_dalias(struct inode *inode);
+void au_dpri_dentry(struct dentry *dentry);
+struct file;
+void au_dpri_file(struct file *filp);
@@ -4260,6 +4257,11 @@
+ au_dpri_inode(i); \
+} while (0)
+
++#define AuDbgDAlias(i) do { \
++ AuDbg(#i "\n"); \
++ au_dpri_dalias(i); \
++} while (0)
++
+#define AuDbgDentry(d) do { \
+ AuDbg(#d "\n"); \
+ au_dpri_dentry(d); \
@@ -4314,6 +4316,7 @@
+#define AuDbgWhlist(w) do {} while (0)
+#define AuDbgVdir(v) do {} while (0)
+#define AuDbgInode(i) do {} while (0)
++#define AuDbgDAlias(i) do {} while (0)
+#define AuDbgDentry(d) do {} while (0)
+#define AuDbgFile(f) do {} while (0)
+#define AuDbgSb(sb) do {} while (0)
@@ -4347,9 +4350,8 @@
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_DEBUG_H__ */
-diff -urN a/fs/aufs/dentry.c b/fs/aufs/dentry.c
--- a/fs/aufs/dentry.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/dentry.c 2011-03-06 23:28:02.620413138 +0000
++++ b/fs/aufs/dentry.c 2011-06-03 06:08:42.681958206 +0100
@@ -0,0 +1,1140 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -4415,7 +4417,7 @@
+ h_nd.path.dentry = h_parent;
+ h_nd.path.mnt = br->br_mnt;
+
-+ err = __lookup_one_len(name->name, &h_nd.last, NULL, name->len);
++ err = vfsub_name_hash(name->name, &h_nd.last, name->len);
+ h_dentry = ERR_PTR(err);
+ if (!err) {
+ path_get(&h_nd.path);
@@ -5491,10 +5493,9 @@
+ .d_revalidate = aufs_d_revalidate,
+ .d_release = aufs_d_release
+};
-diff -urN a/fs/aufs/dentry.h b/fs/aufs/dentry.h
--- a/fs/aufs/dentry.h 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/dentry.h 2011-03-06 23:28:02.620413138 +0000
-@@ -0,0 +1,237 @@
++++ b/fs/aufs/dentry.h 2011-06-03 06:08:42.681958206 +0100
+@@ -0,0 +1,238 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
+ *
@@ -5577,6 +5578,7 @@
+void di_write_unlock2(struct dentry *d1, struct dentry *d2);
+
+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex);
++struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex);
+aufs_bindex_t au_dbtail(struct dentry *dentry);
+aufs_bindex_t au_dbtaildir(struct dentry *dentry);
+
@@ -5732,10 +5734,9 @@
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_DENTRY_H__ */
-diff -urN a/fs/aufs/dinfo.c b/fs/aufs/dinfo.c
--- a/fs/aufs/dinfo.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/dinfo.c 2011-03-06 23:28:02.620413138 +0000
-@@ -0,0 +1,493 @@
++++ b/fs/aufs/dinfo.c 2011-06-03 06:08:42.681958206 +0100
+@@ -0,0 +1,543 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
+ *
@@ -6060,6 +6061,56 @@
+ return d;
+}
+
++/*
++ * extended version of au_h_dptr().
++ * returns a hashed and positive h_dentry in bindex, NULL, or error.
++ */
++struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex)
++{
++ struct dentry *h_dentry;
++ struct inode *inode, *h_inode;
++
++ inode = dentry->d_inode;
++ AuDebugOn(!inode);
++
++ h_dentry = NULL;
++ if (au_dbstart(dentry) <= bindex
++ && bindex <= au_dbend(dentry))
++ h_dentry = au_h_dptr(dentry, bindex);
++ if (h_dentry && !au_d_hashed_positive(h_dentry)) {
++ dget(h_dentry);
++ goto out; /* success */
++ }
++
++ AuDebugOn(bindex < au_ibstart(inode));
++ AuDebugOn(au_ibend(inode) < bindex);
++ h_inode = au_h_iptr(inode, bindex);
++ h_dentry = d_find_alias(h_inode);
++ if (h_dentry) {
++ if (!IS_ERR(h_dentry)) {
++ if (!au_d_hashed_positive(h_dentry))
++ goto out; /* success */
++ dput(h_dentry);
++ } else
++ goto out;
++ }
++
++ if (au_opt_test(au_mntflags(dentry->d_sb), PLINK)) {
++ h_dentry = au_plink_lkup(inode, bindex);
++ AuDebugOn(!h_dentry);
++ if (!IS_ERR(h_dentry)) {
++ if (!au_d_hashed_positive(h_dentry))
++ goto out; /* success */
++ dput(h_dentry);
++ h_dentry = NULL;
++ }
++ }
++
++out:
++ AuDbgDentry(h_dentry);
++ return h_dentry;
++}
++
+aufs_bindex_t au_dbtail(struct dentry *dentry)
+{
+ aufs_bindex_t bend, bwh;
@@ -6229,9 +6280,8 @@
+ return bindex;
+ return -1;
+}
-diff -urN a/fs/aufs/dir.c b/fs/aufs/dir.c
--- a/fs/aufs/dir.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/dir.c 2011-03-06 23:28:02.616413258 +0000
++++ b/fs/aufs/dir.c 2011-02-12 16:30:08.944127798 +0000
@@ -0,0 +1,647 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -6880,9 +6930,8 @@
+ .flush = aufs_flush_dir,
+ .fsync = aufs_fsync_dir
+};
-diff -urN a/fs/aufs/dir.h b/fs/aufs/dir.h
--- a/fs/aufs/dir.h 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/dir.h 2011-03-06 23:28:02.616413258 +0000
++++ b/fs/aufs/dir.h 2011-02-12 16:30:08.944127798 +0000
@@ -0,0 +1,138 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -7022,10 +7071,9 @@
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_DIR_H__ */
-diff -urN a/fs/aufs/dynop.c b/fs/aufs/dynop.c
--- a/fs/aufs/dynop.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/dynop.c 2011-03-06 23:28:02.616413258 +0000
-@@ -0,0 +1,426 @@
++++ b/fs/aufs/dynop.c 2011-06-03 06:08:42.681958206 +0100
+@@ -0,0 +1,425 @@
+/*
+ * Copyright (C) 2010-2011 Junjiro R. Okajima
+ *
@@ -7206,7 +7254,6 @@
+
+ DySetAop(writepage);
+ DySetAopForce(readpage); /* force */
-+ DySetAop(sync_page);
+ DySetAop(writepages);
+ DySetAop(set_page_dirty);
+ DySetAop(readpages);
@@ -7452,9 +7499,8 @@
+ for (i = 0; i < AuDyLast; i++)
+ WARN_ON(!list_empty(&dynop[i].head));
+}
-diff -urN a/fs/aufs/dynop.h b/fs/aufs/dynop.h
--- a/fs/aufs/dynop.h 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/dynop.h 2011-03-06 23:28:02.620413138 +0000
++++ b/fs/aufs/dynop.h 2011-02-12 16:30:08.944127798 +0000
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2010-2011 Junjiro R. Okajima
@@ -7545,10 +7591,9 @@
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_DYNOP_H__ */
-diff -urN a/fs/aufs/export.c b/fs/aufs/export.c
--- a/fs/aufs/export.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/export.c 2011-03-06 23:28:02.616413258 +0000
-@@ -0,0 +1,803 @@
++++ b/fs/aufs/export.c 2011-06-03 06:08:42.681958206 +0100
+@@ -0,0 +1,805 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
+ *
@@ -7793,13 +7838,15 @@
+ spin_unlock(&inode->i_lock);
+ }
+ if (unlikely(dentry && au_digen_test(dentry, sigen))) {
++ /* need to refresh */
+ dput(dentry);
-+ dentry = ERR_PTR(-ESTALE);
++ dentry = NULL;
+ }
+
+out_iput:
+ iput(inode);
+out:
++ AuTraceErrPtr(dentry);
+ return dentry;
+}
+
@@ -8352,10 +8399,9 @@
+ BUILD_BUG_ON(sizeof(u) != sizeof(int));
+ atomic_set(&sbinfo->si_xigen_next, u);
+}
-diff -urN a/fs/aufs/file.c b/fs/aufs/file.c
---- a/fs/aufs/file.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/file.c 2011-03-06 23:28:02.620413138 +0000
-@@ -0,0 +1,679 @@
+--- a/fs/aufs/f_op.c 1970-01-01 01:00:00.000000000 +0100
++++ b/fs/aufs/f_op.c 2011-06-03 06:08:42.681958206 +0100
+@@ -0,0 +1,939 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
+ *
@@ -8375,2001 +8421,1910 @@
+ */
+
+/*
-+ * handling file/dir, and address_space operation
++ * file and vm operations
+ */
+
+#include <linux/file.h>
-+#include <linux/fsnotify.h>
-+#include <linux/namei.h>
-+#include <linux/pagemap.h>
++#include <linux/fs_stack.h>
++#include <linux/mman.h>
++#include <linux/mm.h>
++#include <linux/security.h>
+#include "aufs.h"
+
-+/* drop flags for writing */
-+unsigned int au_file_roflags(unsigned int flags)
-+{
-+ flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC);
-+ flags |= O_RDONLY | O_NOATIME;
-+ return flags;
-+}
-+
-+/* common functions to regular file and dir */
-+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
-+ struct file *file)
++int au_do_open_nondir(struct file *file, int flags)
+{
++ int err;
++ aufs_bindex_t bindex;
+ struct file *h_file;
-+ struct dentry *h_dentry;
-+ struct inode *h_inode;
-+ struct super_block *sb;
-+ struct au_branch *br;
-+ struct path h_path;
-+ int err, exec_flag;
++ struct dentry *dentry;
++ struct au_finfo *finfo;
+
-+ /* a race condition can happen between open and unlink/rmdir */
-+ h_file = ERR_PTR(-ENOENT);
-+ h_dentry = au_h_dptr(dentry, bindex);
-+ if (au_test_nfsd() && !h_dentry)
-+ goto out;
-+ h_inode = h_dentry->d_inode;
-+ if (au_test_nfsd() && !h_inode)
-+ goto out;
-+ spin_lock(&h_dentry->d_lock);
-+ err = (!d_unhashed(dentry) && d_unlinked(h_dentry))
-+ || !h_inode
-+ /* || !dentry->d_inode->i_nlink */
-+ ;
-+ spin_unlock(&h_dentry->d_lock);
-+ if (unlikely(err))
-+ goto out;
++ FiMustWriteLock(file);
+
-+ sb = dentry->d_sb;
-+ br = au_sbr(sb, bindex);
-+ h_file = ERR_PTR(-EACCES);
-+ exec_flag = flags & vfsub_fmode_to_uint(FMODE_EXEC);
-+ if (exec_flag && (br->br_mnt->mnt_flags & MNT_NOEXEC))
++ dentry = file->f_dentry;
++ err = au_d_alive(dentry);
++ if (unlikely(err))
+ goto out;
+
-+ /* drop flags for writing */
-+ if (au_test_ro(sb, bindex, dentry->d_inode))
-+ flags = au_file_roflags(flags);
-+ flags &= ~O_CREAT;
-+ atomic_inc(&br->br_count);
-+ h_path.dentry = h_dentry;
-+ h_path.mnt = br->br_mnt;
-+ if (!au_special_file(h_inode->i_mode))
-+ h_file = vfsub_dentry_open(&h_path, flags);
-+ else {
-+ /* this block depends upon the configuration */
-+ di_read_unlock(dentry, AuLock_IR);
-+ fi_write_unlock(file);
-+ si_read_unlock(sb);
-+ h_file = vfsub_dentry_open(&h_path, flags);
-+ si_noflush_read_lock(sb);
-+ fi_write_lock(file);
-+ di_read_lock_child(dentry, AuLock_IR);
-+ }
++ finfo = au_fi(file);
++ memset(&finfo->fi_htop, 0, sizeof(finfo->fi_htop));
++ finfo->fi_hvmop = NULL;
++ bindex = au_dbstart(dentry);
++ h_file = au_h_open(dentry, bindex, flags, file);
+ if (IS_ERR(h_file))
-+ goto out_br;
-+
-+ if (exec_flag) {
-+ err = deny_write_access(h_file);
-+ if (unlikely(err)) {
-+ fput(h_file);
-+ h_file = ERR_PTR(err);
-+ goto out_br;
-+ }
++ err = PTR_ERR(h_file);
++ else {
++ au_set_fbstart(file, bindex);
++ au_set_h_fptr(file, bindex, h_file);
++ au_update_figen(file);
++ /* todo: necessary? */
++ /* file->f_ra = h_file->f_ra; */
+ }
-+ fsnotify_open(h_file);
-+ goto out; /* success */
+
-+out_br:
-+ atomic_dec(&br->br_count);
+out:
-+ return h_file;
++ return err;
+}
+
-+int au_do_open(struct file *file, int (*open)(struct file *file, int flags),
-+ struct au_fidir *fidir)
++static int aufs_open_nondir(struct inode *inode __maybe_unused,
++ struct file *file)
+{
+ int err;
-+ struct dentry *dentry;
-+
-+ err = au_finfo_init(file, fidir);
-+ if (unlikely(err))
-+ goto out;
-+
-+ dentry = file->f_dentry;
-+ di_read_lock_child(dentry, AuLock_IR);
-+ err = open(file, vfsub_file_flags(file));
-+ di_read_unlock(dentry, AuLock_IR);
++ struct super_block *sb;
+
-+ fi_write_unlock(file);
-+ if (unlikely(err)) {
-+ au_fi(file)->fi_hdir = NULL;
-+ au_finfo_fin(file);
-+ }
++ AuDbg("%.*s, f_flags 0x%x, f_mode 0x%x\n",
++ AuDLNPair(file->f_dentry), vfsub_file_flags(file),
++ file->f_mode);
+
-+out:
++ sb = file->f_dentry->d_sb;
++ si_read_lock(sb, AuLock_FLUSH);
++ err = au_do_open(file, au_do_open_nondir, /*fidir*/NULL);
++ si_read_unlock(sb);
+ return err;
+}
+
-+int au_reopen_nondir(struct file *file)
++int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file)
+{
-+ int err;
-+ aufs_bindex_t bstart;
-+ struct dentry *dentry;
-+ struct file *h_file, *h_file_tmp;
++ struct au_finfo *finfo;
++ aufs_bindex_t bindex;
+
-+ dentry = file->f_dentry;
-+ AuDebugOn(au_special_file(dentry->d_inode->i_mode));
-+ bstart = au_dbstart(dentry);
-+ h_file_tmp = NULL;
-+ if (au_fbstart(file) == bstart) {
-+ h_file = au_hf_top(file);
-+ if (file->f_mode == h_file->f_mode)
-+ return 0; /* success */
-+ h_file_tmp = h_file;
-+ get_file(h_file_tmp);
-+ au_set_h_fptr(file, bstart, NULL);
++ finfo = au_fi(file);
++ bindex = finfo->fi_btop;
++ if (bindex >= 0) {
++ /* remove me from sb->s_files */
++ file_sb_list_del(file);
++ au_set_h_fptr(file, bindex, NULL);
+ }
-+ AuDebugOn(au_fi(file)->fi_hdir);
-+ AuDebugOn(au_fbstart(file) < bstart);
-+
-+ h_file = au_h_open(dentry, bstart, vfsub_file_flags(file) & ~O_TRUNC,
-+ file);
-+ err = PTR_ERR(h_file);
-+ if (IS_ERR(h_file))
-+ goto out; /* todo: close all? */
-+
-+ err = 0;
-+ au_set_fbstart(file, bstart);
-+ au_set_h_fptr(file, bstart, h_file);
-+ au_update_figen(file);
-+ /* todo: necessary? */
-+ /* file->f_ra = h_file->f_ra; */
+
-+out:
-+ if (h_file_tmp)
-+ fput(h_file_tmp);
-+ return err;
++ au_finfo_fin(file);
++ return 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
-+static int au_reopen_wh(struct file *file, aufs_bindex_t btgt,
-+ struct dentry *hi_wh)
++static int au_do_flush_nondir(struct file *file, fl_owner_t id)
+{
+ int err;
-+ aufs_bindex_t bstart;
-+ struct au_dinfo *dinfo;
-+ struct dentry *h_dentry;
-+ struct au_hdentry *hdp;
-+
-+ dinfo = au_di(file->f_dentry);
-+ AuRwMustWriteLock(&dinfo->di_rwsem);
-+
-+ bstart = dinfo->di_bstart;
-+ dinfo->di_bstart = btgt;
-+ hdp = dinfo->di_hdentry;
-+ h_dentry = hdp[0 + btgt].hd_dentry;
-+ hdp[0 + btgt].hd_dentry = hi_wh;
-+ err = au_reopen_nondir(file);
-+ hdp[0 + btgt].hd_dentry = h_dentry;
-+ dinfo->di_bstart = bstart;
++ struct file *h_file;
+
++ err = 0;
++ h_file = au_hf_top(file);
++ if (h_file)
++ err = vfsub_flush(h_file, id);
+ return err;
+}
+
-+static int au_ready_to_write_wh(struct file *file, loff_t len,
-+ aufs_bindex_t bcpup)
++static int aufs_flush_nondir(struct file *file, fl_owner_t id)
+{
-+ int err;
-+ struct inode *inode, *h_inode;
-+ struct dentry *dentry, *h_dentry, *hi_wh;
++ return au_do_flush(file, id, au_do_flush_nondir);
++}
+
-+ dentry = file->f_dentry;
-+ au_update_dbstart(dentry);
-+ inode = dentry->d_inode;
-+ h_inode = NULL;
-+ if (au_dbstart(dentry) <= bcpup && au_dbend(dentry) >= bcpup) {
-+ h_dentry = au_h_dptr(dentry, bcpup);
-+ if (h_dentry)
-+ h_inode = h_dentry->d_inode;
-+ }
-+ hi_wh = au_hi_wh(inode, bcpup);
-+ if (!hi_wh && !h_inode)
-+ err = au_sio_cpup_wh(dentry, bcpup, len, file);
-+ else
-+ /* already copied-up after unlink */
-+ err = au_reopen_wh(file, bcpup, hi_wh);
++/* ---------------------------------------------------------------------- */
+
-+ if (!err
-+ && inode->i_nlink > 1
-+ && au_opt_test(au_mntflags(dentry->d_sb), PLINK))
-+ au_plink_append(inode, bcpup, au_h_dptr(dentry, bcpup));
++static ssize_t aufs_read(struct file *file, char __user *buf, size_t count,
++ loff_t *ppos)
++{
++ ssize_t err;
++ struct dentry *dentry;
++ struct file *h_file;
++ struct super_block *sb;
++
++ dentry = file->f_dentry;
++ sb = dentry->d_sb;
++ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
++ if (unlikely(err))
++ goto out;
++
++ h_file = au_hf_top(file);
++ err = vfsub_read_u(h_file, buf, count, ppos);
++ /* todo: necessary? */
++ /* file->f_ra = h_file->f_ra; */
++ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
+
++ di_read_unlock(dentry, AuLock_IR);
++ fi_read_unlock(file);
++out:
++ si_read_unlock(sb);
+ return err;
+}
+
+/*
-+ * prepare the @file for writing.
++ * todo: very ugly
++ * it locks both of i_mutex and si_rwsem for read in safe.
++ * if the plink maintenance mode continues forever (that is the problem),
++ * may loop forever.
+ */
-+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin)
++static void au_mtx_and_read_lock(struct inode *inode)
+{
+ int err;
-+ aufs_bindex_t bstart, bcpup, dbstart;
-+ struct dentry *dentry, *parent, *h_dentry;
-+ struct inode *h_inode, *inode;
-+ struct super_block *sb;
++ struct super_block *sb = inode->i_sb;
++
++ while (1) {
++ mutex_lock(&inode->i_mutex);
++ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
++ if (!err)
++ break;
++ mutex_unlock(&inode->i_mutex);
++ si_read_lock(sb, AuLock_NOPLMW);
++ si_read_unlock(sb);
++ }
++}
++
++static ssize_t aufs_write(struct file *file, const char __user *ubuf,
++ size_t count, loff_t *ppos)
++{
++ ssize_t err;
++ struct au_pin pin;
++ struct dentry *dentry;
++ struct inode *inode;
+ struct file *h_file;
++ char __user *buf = (char __user *)ubuf;
+
+ dentry = file->f_dentry;
-+ sb = dentry->d_sb;
+ inode = dentry->d_inode;
-+ AuDebugOn(au_special_file(inode->i_mode));
-+ bstart = au_fbstart(file);
-+ err = au_test_ro(sb, bstart, inode);
-+ if (!err && (au_hf_top(file)->f_mode & FMODE_WRITE)) {
-+ err = au_pin(pin, dentry, bstart, AuOpt_UDBA_NONE, /*flags*/0);
++ au_mtx_and_read_lock(inode);
++
++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
++ if (unlikely(err))
+ goto out;
-+ }
+
-+ /* need to cpup or reopen */
-+ parent = dget_parent(dentry);
-+ di_write_lock_parent(parent);
-+ err = AuWbrCopyup(au_sbi(sb), dentry);
-+ bcpup = err;
-+ if (unlikely(err < 0))
-+ goto out_dgrade;
-+ err = 0;
++ err = au_ready_to_write(file, -1, &pin);
++ di_downgrade_lock(dentry, AuLock_IR);
++ if (unlikely(err))
++ goto out_unlock;
+
-+ if (!d_unhashed(dentry) && !au_h_dptr(parent, bcpup)) {
-+ err = au_cpup_dirs(dentry, bcpup);
-+ if (unlikely(err))
-+ goto out_dgrade;
-+ }
++ h_file = au_hf_top(file);
++ au_unpin(&pin);
++ err = vfsub_write_u(h_file, buf, count, ppos);
++ au_cpup_attr_timesizes(inode);
++ inode->i_mode = h_file->f_dentry->d_inode->i_mode;
+
-+ err = au_pin(pin, dentry, bcpup, AuOpt_UDBA_NONE,
-+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
-+ if (unlikely(err))
-+ goto out_dgrade;
++out_unlock:
++ di_read_unlock(dentry, AuLock_IR);
++ fi_write_unlock(file);
++out:
++ si_read_unlock(inode->i_sb);
++ mutex_unlock(&inode->i_mutex);
++ return err;
++}
+
-+ h_dentry = au_hf_top(file)->f_dentry;
-+ h_inode = h_dentry->d_inode;
-+ dbstart = au_dbstart(dentry);
-+ if (dbstart <= bcpup) {
-+ h_dentry = au_h_dptr(dentry, bcpup);
-+ AuDebugOn(!h_dentry);
-+ h_inode = h_dentry->d_inode;
-+ AuDebugOn(!h_inode);
-+ bstart = bcpup;
-+ }
++static ssize_t au_do_aio(struct file *h_file, int rw, struct kiocb *kio,
++ const struct iovec *iov, unsigned long nv, loff_t pos)
++{
++ ssize_t err;
++ struct file *file;
++ ssize_t (*func)(struct kiocb *, const struct iovec *, unsigned long,
++ loff_t);
+
-+ if (dbstart <= bcpup /* just reopen */
-+ || !d_unhashed(dentry) /* copyup and reopen */
-+ ) {
-+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
-+ h_file = au_h_open_pre(dentry, bstart);
-+ if (IS_ERR(h_file)) {
-+ err = PTR_ERR(h_file);
-+ h_file = NULL;
-+ } else {
-+ di_downgrade_lock(parent, AuLock_IR);
-+ if (dbstart > bcpup)
-+ err = au_sio_cpup_simple(dentry, bcpup, len,
-+ AuCpup_DTIME);
-+ if (!err)
-+ err = au_reopen_nondir(file);
-+ }
-+ mutex_unlock(&h_inode->i_mutex);
-+ au_h_open_post(dentry, bstart, h_file);
-+ } else { /* copyup as wh and reopen */
-+ /*
-+ * since writable hfsplus branch is not supported,
-+ * h_open_pre/post() are unnecessary.
-+ */
-+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
-+ err = au_ready_to_write_wh(file, len, bcpup);
-+ di_downgrade_lock(parent, AuLock_IR);
-+ mutex_unlock(&h_inode->i_mutex);
-+ }
++ err = security_file_permission(h_file, rw);
++ if (unlikely(err))
++ goto out;
+
-+ if (!err) {
-+ au_pin_set_parent_lflag(pin, /*lflag*/0);
-+ goto out_dput; /* success */
-+ }
-+ au_unpin(pin);
-+ goto out_unlock;
++ err = -ENOSYS;
++ func = NULL;
++ if (rw == MAY_READ)
++ func = h_file->f_op->aio_read;
++ else if (rw == MAY_WRITE)
++ func = h_file->f_op->aio_write;
++ if (func) {
++ file = kio->ki_filp;
++ kio->ki_filp = h_file;
++ lockdep_off();
++ err = func(kio, iov, nv, pos);
++ lockdep_on();
++ kio->ki_filp = file;
++ } else
++ /* currently there is no such fs */
++ WARN_ON_ONCE(1);
+
-+out_dgrade:
-+ di_downgrade_lock(parent, AuLock_IR);
-+out_unlock:
-+ di_read_unlock(parent, AuLock_IR);
-+out_dput:
-+ dput(parent);
+out:
+ return err;
+}
+
-+/* ---------------------------------------------------------------------- */
-+
-+int au_do_flush(struct file *file, fl_owner_t id,
-+ int (*flush)(struct file *file, fl_owner_t id))
++static ssize_t aufs_aio_read(struct kiocb *kio, const struct iovec *iov,
++ unsigned long nv, loff_t pos)
+{
-+ int err;
++ ssize_t err;
++ struct file *file, *h_file;
+ struct dentry *dentry;
+ struct super_block *sb;
-+ struct inode *inode;
+
++ file = kio->ki_filp;
+ dentry = file->f_dentry;
+ sb = dentry->d_sb;
-+ inode = dentry->d_inode;
-+ si_noflush_read_lock(sb);
-+ fi_read_lock(file);
-+ ii_read_lock_child(inode);
-+
-+ err = flush(file, id);
-+ au_cpup_attr_timesizes(inode);
++ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
++ if (unlikely(err))
++ goto out;
+
-+ ii_read_unlock(inode);
++ h_file = au_hf_top(file);
++ err = au_do_aio(h_file, MAY_READ, kio, iov, nv, pos);
++ /* todo: necessary? */
++ /* file->f_ra = h_file->f_ra; */
++ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
++ di_read_unlock(dentry, AuLock_IR);
+ fi_read_unlock(file);
++
++out:
+ si_read_unlock(sb);
+ return err;
+}
+
-+/* ---------------------------------------------------------------------- */
-+
-+static int au_file_refresh_by_inode(struct file *file, int *need_reopen)
++static ssize_t aufs_aio_write(struct kiocb *kio, const struct iovec *iov,
++ unsigned long nv, loff_t pos)
+{
-+ int err;
-+ aufs_bindex_t bstart;
++ ssize_t err;
+ struct au_pin pin;
-+ struct au_finfo *finfo;
-+ struct dentry *dentry, *parent, *hi_wh;
++ struct dentry *dentry;
+ struct inode *inode;
-+ struct super_block *sb;
-+
-+ FiMustWriteLock(file);
++ struct file *file, *h_file;
+
-+ err = 0;
-+ finfo = au_fi(file);
++ file = kio->ki_filp;
+ dentry = file->f_dentry;
-+ sb = dentry->d_sb;
+ inode = dentry->d_inode;
-+ bstart = au_ibstart(inode);
-+ if (bstart == finfo->fi_btop || IS_ROOT(dentry))
++ au_mtx_and_read_lock(inode);
++
++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
++ if (unlikely(err))
+ goto out;
+
-+ parent = dget_parent(dentry);
-+ if (au_test_ro(sb, bstart, inode)) {
-+ di_read_lock_parent(parent, !AuLock_IR);
-+ err = AuWbrCopyup(au_sbi(sb), dentry);
-+ bstart = err;
-+ di_read_unlock(parent, !AuLock_IR);
-+ if (unlikely(err < 0))
-+ goto out_parent;
-+ err = 0;
-+ }
-+
-+ di_read_lock_parent(parent, AuLock_IR);
-+ hi_wh = au_hi_wh(inode, bstart);
-+ if (!S_ISDIR(inode->i_mode)
-+ && au_opt_test(au_mntflags(sb), PLINK)
-+ && au_plink_test(inode)
-+ && !d_unhashed(dentry)) {
-+ err = au_test_and_cpup_dirs(dentry, bstart);
-+ if (unlikely(err))
-+ goto out_unlock;
++ err = au_ready_to_write(file, -1, &pin);
++ di_downgrade_lock(dentry, AuLock_IR);
++ if (unlikely(err))
++ goto out_unlock;
+
-+ /* always superio. */
-+ err = au_pin(&pin, dentry, bstart, AuOpt_UDBA_NONE,
-+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
-+ if (!err)
-+ err = au_sio_cpup_simple(dentry, bstart, -1,
-+ AuCpup_DTIME);
-+ au_unpin(&pin);
-+ } else if (hi_wh) {
-+ /* already copied-up after unlink */
-+ err = au_reopen_wh(file, bstart, hi_wh);
-+ *need_reopen = 0;
-+ }
++ au_unpin(&pin);
++ h_file = au_hf_top(file);
++ err = au_do_aio(h_file, MAY_WRITE, kio, iov, nv, pos);
++ au_cpup_attr_timesizes(inode);
++ inode->i_mode = h_file->f_dentry->d_inode->i_mode;
+
+out_unlock:
-+ di_read_unlock(parent, AuLock_IR);
-+out_parent:
-+ dput(parent);
++ di_read_unlock(dentry, AuLock_IR);
++ fi_write_unlock(file);
+out:
++ si_read_unlock(inode->i_sb);
++ mutex_unlock(&inode->i_mutex);
+ return err;
+}
+
-+static void au_do_refresh_dir(struct file *file)
-+{
-+ aufs_bindex_t bindex, bend, new_bindex, brid;
-+ struct au_hfile *p, tmp, *q;
-+ struct au_finfo *finfo;
-+ struct super_block *sb;
-+ struct au_fidir *fidir;
-+
-+ FiMustWriteLock(file);
-+
-+ sb = file->f_dentry->d_sb;
-+ finfo = au_fi(file);
-+ fidir = finfo->fi_hdir;
-+ AuDebugOn(!fidir);
-+ p = fidir->fd_hfile + finfo->fi_btop;
-+ brid = p->hf_br->br_id;
-+ bend = fidir->fd_bbot;
-+ for (bindex = finfo->fi_btop; bindex <= bend; bindex++, p++) {
-+ if (!p->hf_file)
-+ continue;
-+
-+ new_bindex = au_br_index(sb, p->hf_br->br_id);
-+ if (new_bindex == bindex)
-+ continue;
-+ if (new_bindex < 0) {
-+ au_set_h_fptr(file, bindex, NULL);
-+ continue;
-+ }
-+
-+ /* swap two lower inode, and loop again */
-+ q = fidir->fd_hfile + new_bindex;
-+ tmp = *q;
-+ *q = *p;
-+ *p = tmp;
-+ if (tmp.hf_file) {
-+ bindex--;
-+ p--;
-+ }
-+ }
-+
-+ p = fidir->fd_hfile;
-+ if (!au_test_mmapped(file) && !d_unlinked(file->f_dentry)) {
-+ bend = au_sbend(sb);
-+ for (finfo->fi_btop = 0; finfo->fi_btop <= bend;
-+ finfo->fi_btop++, p++)
-+ if (p->hf_file) {
-+ if (p->hf_file->f_dentry
-+ && p->hf_file->f_dentry->d_inode)
-+ break;
-+ else
-+ au_hfput(p, file);
-+ }
-+ } else {
-+ bend = au_br_index(sb, brid);
-+ for (finfo->fi_btop = 0; finfo->fi_btop < bend;
-+ finfo->fi_btop++, p++)
-+ if (p->hf_file)
-+ au_hfput(p, file);
-+ bend = au_sbend(sb);
-+ }
-+
-+ p = fidir->fd_hfile + bend;
-+ for (fidir->fd_bbot = bend; fidir->fd_bbot >= finfo->fi_btop;
-+ fidir->fd_bbot--, p--)
-+ if (p->hf_file) {
-+ if (p->hf_file->f_dentry
-+ && p->hf_file->f_dentry->d_inode)
-+ break;
-+ else
-+ au_hfput(p, file);
-+ }
-+ AuDebugOn(fidir->fd_bbot < finfo->fi_btop);
-+}
-+
-+/*
-+ * after branch manipulating, refresh the file.
-+ */
-+static int refresh_file(struct file *file, int (*reopen)(struct file *file))
++static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
++ struct pipe_inode_info *pipe, size_t len,
++ unsigned int flags)
+{
-+ int err, need_reopen;
-+ aufs_bindex_t bend, bindex;
++ ssize_t err;
++ struct file *h_file;
+ struct dentry *dentry;
-+ struct au_finfo *finfo;
-+ struct au_hfile *hfile;
++ struct super_block *sb;
+
+ dentry = file->f_dentry;
-+ finfo = au_fi(file);
-+ if (!finfo->fi_hdir) {
-+ hfile = &finfo->fi_htop;
-+ AuDebugOn(!hfile->hf_file);
-+ bindex = au_br_index(dentry->d_sb, hfile->hf_br->br_id);
-+ AuDebugOn(bindex < 0);
-+ if (bindex != finfo->fi_btop)
-+ au_set_fbstart(file, bindex);
-+ } else {
-+ err = au_fidir_realloc(finfo, au_sbend(dentry->d_sb) + 1);
-+ if (unlikely(err))
-+ goto out;
-+ au_do_refresh_dir(file);
-+ }
++ sb = dentry->d_sb;
++ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
++ if (unlikely(err))
++ goto out;
+
-+ err = 0;
-+ need_reopen = 1;
-+ if (!au_test_mmapped(file))
-+ err = au_file_refresh_by_inode(file, &need_reopen);
-+ if (!err && need_reopen && !d_unlinked(dentry))
-+ err = reopen(file);
-+ if (!err) {
-+ au_update_figen(file);
-+ goto out; /* success */
++ err = -EINVAL;
++ h_file = au_hf_top(file);
++ if (au_test_loopback_kthread()) {
++ file->f_mapping = h_file->f_mapping;
++ smp_mb(); /* unnecessary? */
+ }
++ err = vfsub_splice_to(h_file, ppos, pipe, len, flags);
++ /* todo: necessasry? */
++ /* file->f_ra = h_file->f_ra; */
++ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
+
-+ /* error, close all lower files */
-+ if (finfo->fi_hdir) {
-+ bend = au_fbend_dir(file);
-+ for (bindex = au_fbstart(file); bindex <= bend; bindex++)
-+ au_set_h_fptr(file, bindex, NULL);
-+ }
++ di_read_unlock(dentry, AuLock_IR);
++ fi_read_unlock(file);
+
+out:
++ si_read_unlock(sb);
+ return err;
+}
+
-+/* common function to regular file and dir */
-+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
-+ int wlock)
++static ssize_t
++aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos,
++ size_t len, unsigned int flags)
+{
-+ int err;
-+ unsigned int sigen, figen;
-+ aufs_bindex_t bstart;
-+ unsigned char pseudo_link;
++ ssize_t err;
++ struct au_pin pin;
+ struct dentry *dentry;
+ struct inode *inode;
++ struct file *h_file;
+
-+ err = 0;
+ dentry = file->f_dentry;
+ inode = dentry->d_inode;
-+ AuDebugOn(au_special_file(inode->i_mode));
-+ sigen = au_sigen(dentry->d_sb);
-+ fi_write_lock(file);
-+ figen = au_figen(file);
-+ di_write_lock_child(dentry);
-+ bstart = au_dbstart(dentry);
-+ pseudo_link = (bstart != au_ibstart(inode));
-+ if (sigen == figen && !pseudo_link && au_fbstart(file) == bstart) {
-+ if (!wlock) {
-+ di_downgrade_lock(dentry, AuLock_IR);
-+ fi_downgrade_lock(file);
-+ }
-+ goto out; /* success */
-+ }
++ au_mtx_and_read_lock(inode);
++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
++ if (unlikely(err))
++ goto out;
+
-+ AuDbg("sigen %d, figen %d\n", sigen, figen);
-+ if (au_digen_test(dentry, sigen)) {
-+ err = au_reval_dpath(dentry, sigen);
-+ AuDebugOn(!err && au_digen_test(dentry, sigen));
-+ }
++ err = au_ready_to_write(file, -1, &pin);
++ di_downgrade_lock(dentry, AuLock_IR);
++ if (unlikely(err))
++ goto out_unlock;
+
-+ if (!err)
-+ err = refresh_file(file, reopen);
-+ if (!err) {
-+ if (!wlock) {
-+ di_downgrade_lock(dentry, AuLock_IR);
-+ fi_downgrade_lock(file);
-+ }
-+ } else {
-+ di_write_unlock(dentry);
-+ fi_write_unlock(file);
-+ }
++ h_file = au_hf_top(file);
++ au_unpin(&pin);
++ err = vfsub_splice_from(pipe, h_file, ppos, len, flags);
++ au_cpup_attr_timesizes(inode);
++ inode->i_mode = h_file->f_dentry->d_inode->i_mode;
+
++out_unlock:
++ di_read_unlock(dentry, AuLock_IR);
++ fi_write_unlock(file);
+out:
++ si_read_unlock(inode->i_sb);
++ mutex_unlock(&inode->i_mutex);
+ return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
-+/* cf. aufs_nopage() */
-+/* for madvise(2) */
-+static int aufs_readpage(struct file *file __maybe_unused, struct page *page)
++static struct file *au_safe_file(struct vm_area_struct *vma)
+{
-+ unlock_page(page);
-+ return 0;
++ struct file *file;
++
++ file = vma->vm_file;
++ if (au_fi(file) && au_test_aufs(file->f_dentry->d_sb))
++ return file;
++ return NULL;
+}
+
-+/* it will never be called, but necessary to support O_DIRECT */
-+static ssize_t aufs_direct_IO(int rw, struct kiocb *iocb,
-+ const struct iovec *iov, loff_t offset,
-+ unsigned long nr_segs)
-+{ BUG(); return 0; }
++static void au_reset_file(struct vm_area_struct *vma, struct file *file)
++{
++ vma->vm_file = file;
++ /* smp_mb(); */ /* flush vm_file */
++}
+
-+/*
-+ * it will never be called, but madvise and fadvise behaves differently
-+ * when get_xip_mem is defined
-+ */
-+static int aufs_get_xip_mem(struct address_space *mapping, pgoff_t pgoff,
-+ int create, void **kmem, unsigned long *pfn)
-+{ BUG(); return 0; }
++static int aufs_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
++{
++ int err;
++ static DECLARE_WAIT_QUEUE_HEAD(wq);
++ struct file *file, *h_file;
++ struct au_finfo *finfo;
+
-+/* they will never be called. */
-+#ifdef CONFIG_AUFS_DEBUG
-+static int aufs_write_begin(struct file *file, struct address_space *mapping,
-+ loff_t pos, unsigned len, unsigned flags,
-+ struct page **pagep, void **fsdata)
-+{ AuUnsupport(); return 0; }
-+static int aufs_write_end(struct file *file, struct address_space *mapping,
-+ loff_t pos, unsigned len, unsigned copied,
-+ struct page *page, void *fsdata)
-+{ AuUnsupport(); return 0; }
-+static int aufs_writepage(struct page *page, struct writeback_control *wbc)
-+{ AuUnsupport(); return 0; }
-+static void aufs_sync_page(struct page *page)
-+{ AuUnsupport(); }
++ /* todo: non-robr mode, user vm_file as it is? */
++ wait_event(wq, (file = au_safe_file(vma)));
+
-+static int aufs_set_page_dirty(struct page *page)
-+{ AuUnsupport(); return 0; }
-+static void aufs_invalidatepage(struct page *page, unsigned long offset)
-+{ AuUnsupport(); }
-+static int aufs_releasepage(struct page *page, gfp_t gfp)
-+{ AuUnsupport(); return 0; }
-+static int aufs_migratepage(struct address_space *mapping, struct page *newpage,
-+ struct page *page)
-+{ AuUnsupport(); return 0; }
-+static int aufs_launder_page(struct page *page)
-+{ AuUnsupport(); return 0; }
-+static int aufs_is_partially_uptodate(struct page *page,
-+ read_descriptor_t *desc,
-+ unsigned long from)
-+{ AuUnsupport(); return 0; }
-+static int aufs_error_remove_page(struct address_space *mapping,
-+ struct page *page)
-+{ AuUnsupport(); return 0; }
-+#endif /* CONFIG_AUFS_DEBUG */
++ /* do not revalidate, no si lock */
++ finfo = au_fi(file);
++ AuDebugOn(finfo->fi_hdir);
++ h_file = finfo->fi_htop.hf_file;
++ AuDebugOn(!h_file || !finfo->fi_hvmop);
+
-+const struct address_space_operations aufs_aop = {
-+ .readpage = aufs_readpage,
-+ .direct_IO = aufs_direct_IO,
-+ .get_xip_mem = aufs_get_xip_mem,
-+#ifdef CONFIG_AUFS_DEBUG
-+ .writepage = aufs_writepage,
-+ .sync_page = aufs_sync_page,
-+ /* no writepages, because of writepage */
-+ .set_page_dirty = aufs_set_page_dirty,
-+ /* no readpages, because of readpage */
-+ .write_begin = aufs_write_begin,
-+ .write_end = aufs_write_end,
-+ /* no bmap, no block device */
-+ .invalidatepage = aufs_invalidatepage,
-+ .releasepage = aufs_releasepage,
-+ .migratepage = aufs_migratepage,
-+ .launder_page = aufs_launder_page,
-+ .is_partially_uptodate = aufs_is_partially_uptodate,
-+ .error_remove_page = aufs_error_remove_page
-+#endif /* CONFIG_AUFS_DEBUG */
-+};
-diff -urN a/fs/aufs/file.h b/fs/aufs/file.h
---- a/fs/aufs/file.h 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/file.h 2011-03-06 23:28:02.620413138 +0000
-@@ -0,0 +1,238 @@
-+/*
-+ * Copyright (C) 2005-2011 Junjiro R. Okajima
-+ *
-+ * This program, aufs is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or
-+ * (at your option) any later version.
-+ *
-+ * This program is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with this program; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
++ mutex_lock(&finfo->fi_vm_mtx);
++ vma->vm_file = h_file;
++ err = finfo->fi_hvmop->fault(vma, vmf);
++ /* todo: necessary? */
++ /* file->f_ra = h_file->f_ra; */
++ au_reset_file(vma, file);
++ mutex_unlock(&finfo->fi_vm_mtx);
++#if 0 /* def CONFIG_SMP */
++ /* wake_up_nr(&wq, online_cpu - 1); */
++ wake_up_all(&wq);
++#else
++ wake_up(&wq);
++#endif
+
-+/*
-+ * file operations
-+ */
++ return err;
++}
+
-+#ifndef __AUFS_FILE_H__
-+#define __AUFS_FILE_H__
++static int aufs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
++{
++ int err;
++ static DECLARE_WAIT_QUEUE_HEAD(wq);
++ struct file *file, *h_file;
++ struct au_finfo *finfo;
+
-+#ifdef __KERNEL__
++ wait_event(wq, (file = au_safe_file(vma)));
+
-+#include <linux/fs.h>
-+#include <linux/poll.h>
-+#include <linux/aufs_type.h>
-+#include "rwsem.h"
++ finfo = au_fi(file);
++ AuDebugOn(finfo->fi_hdir);
++ h_file = finfo->fi_htop.hf_file;
++ AuDebugOn(!h_file || !finfo->fi_hvmop);
+
-+struct au_branch;
-+struct au_hfile {
-+ struct file *hf_file;
-+ struct au_branch *hf_br;
-+};
++ mutex_lock(&finfo->fi_vm_mtx);
++ vma->vm_file = h_file;
++ err = finfo->fi_hvmop->page_mkwrite(vma, vmf);
++ au_reset_file(vma, file);
++ mutex_unlock(&finfo->fi_vm_mtx);
++ wake_up(&wq);
+
-+struct au_vdir;
-+struct au_fidir {
-+ aufs_bindex_t fd_bbot;
-+ aufs_bindex_t fd_nent;
-+ struct au_vdir *fd_vdir_cache;
-+ struct au_hfile fd_hfile[];
-+};
++ return err;
++}
+
-+static inline int au_fidir_sz(int nent)
++static void aufs_vm_close(struct vm_area_struct *vma)
+{
-+ AuDebugOn(nent < 0);
-+ return sizeof(struct au_fidir) + sizeof(struct au_hfile) * nent;
-+}
++ static DECLARE_WAIT_QUEUE_HEAD(wq);
++ struct file *file, *h_file;
++ struct au_finfo *finfo;
+
-+struct au_finfo {
-+ atomic_t fi_generation;
++ wait_event(wq, (file = au_safe_file(vma)));
+
-+ struct au_rwsem fi_rwsem;
-+ aufs_bindex_t fi_btop;
++ finfo = au_fi(file);
++ AuDebugOn(finfo->fi_hdir);
++ h_file = finfo->fi_htop.hf_file;
++ AuDebugOn(!h_file || !finfo->fi_hvmop);
+
-+ /* do not union them */
-+ struct { /* for non-dir */
-+ struct au_hfile fi_htop;
-+ struct vm_operations_struct *fi_hvmop;
-+ struct mutex fi_vm_mtx;
-+ struct mutex fi_mmap;
-+ };
-+ struct au_fidir *fi_hdir; /* for dir only */
-+} ____cacheline_aligned_in_smp;
++ mutex_lock(&finfo->fi_vm_mtx);
++ vma->vm_file = h_file;
++ finfo->fi_hvmop->close(vma);
++ au_reset_file(vma, file);
++ mutex_unlock(&finfo->fi_vm_mtx);
++ wake_up(&wq);
++}
++
++const struct vm_operations_struct aufs_vm_ops = {
++ .close = aufs_vm_close,
++ .fault = aufs_fault,
++ .page_mkwrite = aufs_page_mkwrite
++};
+
+/* ---------------------------------------------------------------------- */
+
-+/* file.c */
-+extern const struct address_space_operations aufs_aop;
-+unsigned int au_file_roflags(unsigned int flags);
-+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
-+ struct file *file);
-+int au_do_open(struct file *file, int (*open)(struct file *file, int flags),
-+ struct au_fidir *fidir);
-+int au_reopen_nondir(struct file *file);
-+struct au_pin;
-+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin);
-+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
-+ int wlock);
-+int au_do_flush(struct file *file, fl_owner_t id,
-+ int (*flush)(struct file *file, fl_owner_t id));
++/* cf. linux/include/linux/mman.h: calc_vm_prot_bits() */
++#define AuConv_VM_PROT(f, b) _calc_vm_trans(f, VM_##b, PROT_##b)
+
-+/* poll.c */
-+#ifdef CONFIG_AUFS_POLL
-+unsigned int aufs_poll(struct file *file, poll_table *wait);
++static unsigned long au_arch_prot_conv(unsigned long flags)
++{
++ /* currently ppc64 only */
++#ifdef CONFIG_PPC64
++ /* cf. linux/arch/powerpc/include/asm/mman.h */
++ AuDebugOn(arch_calc_vm_prot_bits(-1) != VM_SAO);
++ return AuConv_VM_PROT(flags, SAO);
++#else
++ AuDebugOn(arch_calc_vm_prot_bits(-1));
++ return 0;
+#endif
++}
+
-+#ifdef CONFIG_AUFS_BR_HFSPLUS
-+/* hfsplus.c */
-+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex);
-+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
-+ struct file *h_file);
-+#else
-+static inline
-+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex)
++static unsigned long au_prot_conv(unsigned long flags)
+{
-+ return NULL;
++ return AuConv_VM_PROT(flags, READ)
++ | AuConv_VM_PROT(flags, WRITE)
++ | AuConv_VM_PROT(flags, EXEC)
++ | au_arch_prot_conv(flags);
+}
+
-+AuStubVoid(au_h_open_post, struct dentry *dentry, aufs_bindex_t bindex,
-+ struct file *h_file);
-+#endif
-+
-+/* f_op.c */
-+extern const struct file_operations aufs_file_fop;
-+extern const struct vm_operations_struct aufs_vm_ops;
-+int au_do_open_nondir(struct file *file, int flags);
-+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file);
++/* cf. linux/include/linux/mman.h: calc_vm_flag_bits() */
++#define AuConv_VM_MAP(f, b) _calc_vm_trans(f, VM_##b, MAP_##b)
+
-+#ifdef CONFIG_AUFS_SP_IATTR
-+/* f_op_sp.c */
-+int au_special_file(umode_t mode);
-+void au_init_special_fop(struct inode *inode, umode_t mode, dev_t rdev);
-+#else
-+AuStubInt0(au_special_file, umode_t mode)
-+static inline void au_init_special_fop(struct inode *inode, umode_t mode,
-+ dev_t rdev)
++static unsigned long au_flag_conv(unsigned long flags)
+{
-+ init_special_inode(inode, mode, rdev);
++ return AuConv_VM_MAP(flags, GROWSDOWN)
++ | AuConv_VM_MAP(flags, DENYWRITE)
++ | AuConv_VM_MAP(flags, EXECUTABLE)
++ | AuConv_VM_MAP(flags, LOCKED);
+}
-+#endif
+
-+/* finfo.c */
-+void au_hfput(struct au_hfile *hf, struct file *file);
-+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex,
-+ struct file *h_file);
++static struct vm_operations_struct *
++au_hvmop(struct file *h_file, struct vm_area_struct *vma, unsigned long *flags)
++{
++ struct vm_operations_struct *h_vmop;
++ unsigned long prot;
++ int err;
+
-+void au_update_figen(struct file *file);
-+void au_fi_mmap_lock(struct file *file);
-+void au_fi_mmap_unlock(struct file *file);
-+struct au_fidir *au_fidir_alloc(struct super_block *sb);
-+int au_fidir_realloc(struct au_finfo *finfo, int nbr);
++ h_vmop = ERR_PTR(-ENODEV);
++ if (!h_file->f_op || !h_file->f_op->mmap)
++ goto out;
+
-+void au_fi_init_once(void *_fi);
-+void au_finfo_fin(struct file *file);
-+int au_finfo_init(struct file *file, struct au_fidir *fidir);
++ prot = au_prot_conv(vma->vm_flags);
++ err = security_file_mmap(h_file, /*reqprot*/prot, prot,
++ au_flag_conv(vma->vm_flags), vma->vm_start, 0);
++ h_vmop = ERR_PTR(err);
++ if (unlikely(err))
++ goto out;
+
-+/* ioctl.c */
-+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg);
-+#ifdef CONFIG_COMPAT
-+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
-+ unsigned long arg);
-+#endif
++ err = h_file->f_op->mmap(h_file, vma);
++ h_vmop = ERR_PTR(err);
++ if (unlikely(err))
++ goto out;
+
-+/* ---------------------------------------------------------------------- */
++ /* oops, it became 'const' */
++ h_vmop = (struct vm_operations_struct *)vma->vm_ops;
++ *flags = vma->vm_flags;
++ err = do_munmap(current->mm, vma->vm_start,
++ vma->vm_end - vma->vm_start);
++ if (unlikely(err)) {
++ AuIOErr("failed internal unmapping %.*s, %d\n",
++ AuDLNPair(h_file->f_dentry), err);
++ h_vmop = ERR_PTR(-EIO);
++ }
+
-+static inline struct au_finfo *au_fi(struct file *file)
-+{
-+ return file->private_data;
++out:
++ return h_vmop;
+}
+
-+/* ---------------------------------------------------------------------- */
-+
+/*
-+ * fi_read_lock, fi_write_lock,
-+ * fi_read_unlock, fi_write_unlock, fi_downgrade_lock
++ * This is another ugly approach to keep the lock order, particularly
++ * mm->mmap_sem and aufs rwsem. The previous approach was reverted and you can
++ * find it in git-log, if you want.
++ *
++ * native readdir: i_mutex, copy_to_user, mmap_sem
++ * aufs readdir: i_mutex, rwsem, nested-i_mutex, copy_to_user, mmap_sem
++ *
++ * Before aufs_mmap() mmap_sem is acquired already, but aufs_mmap() has to
++ * acquire aufs rwsem. It introduces a circular locking dependency.
++ * To address this problem, aufs_mmap() delegates the part which requires aufs
++ * rwsem to its internal workqueue.
+ */
-+AuSimpleRwsemFuncs(fi, struct file *f, &au_fi(f)->fi_rwsem);
-+
-+#define FiMustNoWaiters(f) AuRwMustNoWaiters(&au_fi(f)->fi_rwsem)
-+#define FiMustAnyLock(f) AuRwMustAnyLock(&au_fi(f)->fi_rwsem)
-+#define FiMustWriteLock(f) AuRwMustWriteLock(&au_fi(f)->fi_rwsem)
-+
-+/* ---------------------------------------------------------------------- */
-+
-+/* todo: hard/soft set? */
-+static inline aufs_bindex_t au_fbstart(struct file *file)
-+{
-+ FiMustAnyLock(file);
-+ return au_fi(file)->fi_btop;
-+}
+
-+static inline aufs_bindex_t au_fbend_dir(struct file *file)
-+{
-+ FiMustAnyLock(file);
-+ AuDebugOn(!au_fi(file)->fi_hdir);
-+ return au_fi(file)->fi_hdir->fd_bbot;
-+}
-+
-+static inline struct au_vdir *au_fvdir_cache(struct file *file)
-+{
-+ FiMustAnyLock(file);
-+ AuDebugOn(!au_fi(file)->fi_hdir);
-+ return au_fi(file)->fi_hdir->fd_vdir_cache;
-+}
++/* very ugly approach */
++#include "mtx.h"
+
-+static inline void au_set_fbstart(struct file *file, aufs_bindex_t bindex)
++static void au_fi_mmap_lock_and_sell(struct file *file)
+{
-+ FiMustWriteLock(file);
-+ au_fi(file)->fi_btop = bindex;
-+}
++ struct mutex *mtx;
+
-+static inline void au_set_fbend_dir(struct file *file, aufs_bindex_t bindex)
-+{
+ FiMustWriteLock(file);
-+ AuDebugOn(!au_fi(file)->fi_hdir);
-+ au_fi(file)->fi_hdir->fd_bbot = bindex;
-+}
+
-+static inline void au_set_fvdir_cache(struct file *file,
-+ struct au_vdir *vdir_cache)
-+{
-+ FiMustWriteLock(file);
-+ AuDebugOn(!au_fi(file)->fi_hdir);
-+ au_fi(file)->fi_hdir->fd_vdir_cache = vdir_cache;
++ mtx = &au_fi(file)->fi_mmap;
++ mutex_lock(mtx);
++ mutex_release(&mtx->dep_map, /*nested*/0, _RET_IP_);
+}
+
-+static inline struct file *au_hf_top(struct file *file)
++static void au_fi_mmap_buy(struct file *file)
+{
-+ FiMustAnyLock(file);
-+ AuDebugOn(au_fi(file)->fi_hdir);
-+ return au_fi(file)->fi_htop.hf_file;
-+}
++ struct mutex *mtx;
+
-+static inline struct file *au_hf_dir(struct file *file, aufs_bindex_t bindex)
-+{
-+ FiMustAnyLock(file);
-+ AuDebugOn(!au_fi(file)->fi_hdir);
-+ return au_fi(file)->fi_hdir->fd_hfile[0 + bindex].hf_file;
-+}
++ mtx = &au_fi(file)->fi_mmap;
++ MtxMustLock(mtx);
+
-+/* todo: memory barrier? */
-+static inline unsigned int au_figen(struct file *f)
-+{
-+ return atomic_read(&au_fi(f)->fi_generation);
++ mutex_set_owner(mtx);
++ mutex_acquire(&mtx->dep_map, /*subclass*/0, /*trylock*/0, _RET_IP_);
+}
+
-+static inline int au_test_mmapped(struct file *f)
++static void au_fi_mmap_unlock(struct file *file)
+{
-+ FiMustAnyLock(f);
-+ return !!(au_fi(f)->fi_hvmop);
++ mutex_unlock(&au_fi(file)->fi_mmap);
+}
+
-+#endif /* __KERNEL__ */
-+#endif /* __AUFS_FILE_H__ */
-diff -urN a/fs/aufs/finfo.c b/fs/aufs/finfo.c
---- a/fs/aufs/finfo.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/finfo.c 2011-03-06 23:28:02.620413138 +0000
-@@ -0,0 +1,174 @@
-+/*
-+ * Copyright (C) 2005-2011 Junjiro R. Okajima
-+ *
-+ * This program, aufs is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or
-+ * (at your option) any later version.
-+ *
-+ * This program is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with this program; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+/*
-+ * file private data
-+ */
++struct au_mmap_pre_args {
++ /* input */
++ struct file *file;
++ struct vm_area_struct *vma;
+
-+#include <linux/file.h>
-+#include "aufs.h"
++ /* output */
++ int *errp;
++ struct file *h_file;
++ struct au_branch *br;
++ int mmapped;
++};
+
-+void au_hfput(struct au_hfile *hf, struct file *file)
++static int au_mmap_pre(struct file *file, struct vm_area_struct *vma,
++ struct file **h_file, struct au_branch **br,
++ int *mmapped)
+{
-+ /* todo: direct access f_flags */
-+ if (vfsub_file_flags(file) & vfsub_fmode_to_uint(FMODE_EXEC))
-+ allow_write_access(hf->hf_file);
-+ fput(hf->hf_file);
-+ hf->hf_file = NULL;
-+ atomic_dec(&hf->hf_br->br_count);
-+ hf->hf_br = NULL;
-+}
++ int err;
++ aufs_bindex_t bstart;
++ const unsigned char wlock
++ = !!(file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED);
++ struct dentry *dentry;
++ struct super_block *sb;
+
-+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val)
-+{
-+ struct au_finfo *finfo = au_fi(file);
-+ struct au_hfile *hf;
-+ struct au_fidir *fidir;
++ dentry = file->f_dentry;
++ sb = dentry->d_sb;
++ si_read_lock(sb, AuLock_NOPLMW);
++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
++ if (unlikely(err))
++ goto out;
+
-+ fidir = finfo->fi_hdir;
-+ if (!fidir) {
-+ AuDebugOn(finfo->fi_btop != bindex);
-+ hf = &finfo->fi_htop;
-+ } else
-+ hf = fidir->fd_hfile + bindex;
++ *mmapped = !!au_test_mmapped(file);
++ if (wlock) {
++ struct au_pin pin;
+
-+ if (hf && hf->hf_file)
-+ au_hfput(hf, file);
-+ if (val) {
-+ FiMustWriteLock(file);
-+ hf->hf_file = val;
-+ hf->hf_br = au_sbr(file->f_dentry->d_sb, bindex);
-+ }
-+}
++ err = au_ready_to_write(file, -1, &pin);
++ di_write_unlock(dentry);
++ if (unlikely(err))
++ goto out_unlock;
++ au_unpin(&pin);
++ } else
++ di_write_unlock(dentry);
++ bstart = au_fbstart(file);
++ *br = au_sbr(sb, bstart);
++ *h_file = au_hf_top(file);
++ get_file(*h_file);
++ if (!*mmapped)
++ au_fi_mmap_lock_and_sell(file);
+
-+void au_update_figen(struct file *file)
-+{
-+ atomic_set(&au_fi(file)->fi_generation, au_digen(file->f_dentry));
-+ /* smp_mb(); */ /* atomic_set */
++out_unlock:
++ fi_write_unlock(file);
++out:
++ si_read_unlock(sb);
++ return err;
+}
+
-+/* ---------------------------------------------------------------------- */
-+
-+void au_fi_mmap_lock(struct file *file)
++static void au_call_mmap_pre(void *args)
+{
-+ FiMustWriteLock(file);
-+ lockdep_off();
-+ mutex_lock(&au_fi(file)->fi_mmap);
-+ lockdep_on();
++ struct au_mmap_pre_args *a = args;
++ *a->errp = au_mmap_pre(a->file, a->vma, &a->h_file, &a->br,
++ &a->mmapped);
+}
+
-+void au_fi_mmap_unlock(struct file *file)
++static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
+{
-+ lockdep_off();
-+ mutex_unlock(&au_fi(file)->fi_mmap);
-+ lockdep_on();
-+}
++ int err, wkq_err;
++ unsigned long h_vmflags;
++ struct au_finfo *finfo;
++ struct dentry *h_dentry;
++ struct vm_operations_struct *h_vmop, *vmop;
++ struct au_mmap_pre_args args = {
++ .file = file,
++ .vma = vma,
++ .errp = &err
++ };
+
-+/* ---------------------------------------------------------------------- */
-+
-+struct au_fidir *au_fidir_alloc(struct super_block *sb)
-+{
-+ struct au_fidir *fidir;
-+ int nbr;
++ wkq_err = au_wkq_wait_pre(au_call_mmap_pre, &args);
++ if (unlikely(wkq_err))
++ err = wkq_err;
++ if (unlikely(err))
++ goto out;
++ if (!args.mmapped)
++ au_fi_mmap_buy(file);
+
-+ nbr = au_sbend(sb) + 1;
-+ if (nbr < 2)
-+ nbr = 2; /* initial allocate for 2 branches */
-+ fidir = kzalloc(au_fidir_sz(nbr), GFP_NOFS);
-+ if (fidir) {
-+ fidir->fd_bbot = -1;
-+ fidir->fd_nent = nbr;
-+ fidir->fd_vdir_cache = NULL;
++ h_dentry = args.h_file->f_dentry;
++ if (!args.mmapped && au_test_fs_bad_mapping(h_dentry->d_sb)) {
++ /*
++ * by this assignment, f_mapping will differs from aufs inode
++ * i_mapping.
++ * if someone else mixes the use of f_dentry->d_inode and
++ * f_mapping->host, then a problem may arise.
++ */
++ file->f_mapping = args.h_file->f_mapping;
+ }
+
-+ return fidir;
-+}
++ /* always try this internal mmap to get vma flags */
++ h_vmflags = 0; /* gcc warning */
++ h_vmop = au_hvmop(args.h_file, vma, &h_vmflags);
++ err = PTR_ERR(h_vmop);
++ if (IS_ERR(h_vmop))
++ goto out_unlock;
++ finfo = au_fi(file);
++ AuDebugOn(args.mmapped && h_vmop != finfo->fi_hvmop);
+
-+int au_fidir_realloc(struct au_finfo *finfo, int nbr)
-+{
-+ int err;
-+ struct au_fidir *fidir, *p;
++ vmop = (void *)au_dy_vmop(file, args.br, h_vmop);
++ err = PTR_ERR(vmop);
++ if (IS_ERR(vmop))
++ goto out_unlock;
+
-+ AuRwMustWriteLock(&finfo->fi_rwsem);
-+ fidir = finfo->fi_hdir;
-+ AuDebugOn(!fidir);
++ /*
++ * unnecessary to handle MAP_DENYWRITE and deny_write_access()?
++ * currently MAP_DENYWRITE from userspace is ignored, but elf loader
++ * sets it. when FMODE_EXEC is set (by open_exec() or sys_uselib()),
++ * both of the aufs file and the lower file is deny_write_access()-ed.
++ * finally I hope we can skip handlling MAP_DENYWRITE here.
++ */
++ err = generic_file_mmap(file, vma);
++ if (unlikely(err))
++ goto out_unlock;
+
-+ err = -ENOMEM;
-+ p = au_kzrealloc(fidir, au_fidir_sz(fidir->fd_nent), au_fidir_sz(nbr),
-+ GFP_NOFS);
-+ if (p) {
-+ p->fd_nent = nbr;
-+ finfo->fi_hdir = p;
-+ err = 0;
-+ }
++ vma->vm_ops = vmop;
++ vma->vm_flags = h_vmflags;
++ if (!args.mmapped)
++ finfo->fi_hvmop = h_vmop;
+
++ vfsub_file_accessed(args.h_file);
++ /* update without lock, I don't think it a problem */
++ fsstack_copy_attr_atime(file->f_dentry->d_inode, h_dentry->d_inode);
++
++out_unlock:
++ if (!args.mmapped)
++ au_fi_mmap_unlock(file);
++ fput(args.h_file);
++out:
+ return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
-+void au_finfo_fin(struct file *file)
++static int aufs_fsync_nondir(struct file *file, int datasync)
+{
-+ struct au_finfo *finfo;
++ int err;
++ struct au_pin pin;
++ struct dentry *dentry;
++ struct inode *inode;
++ struct file *h_file;
++ struct super_block *sb;
+
-+ au_nfiles_dec(file->f_dentry->d_sb);
++ dentry = file->f_dentry;
++ inode = dentry->d_inode;
++ IMustLock(file->f_mapping->host);
++ if (inode != file->f_mapping->host) {
++ mutex_unlock(&file->f_mapping->host->i_mutex);
++ mutex_lock(&inode->i_mutex);
++ }
++ IMustLock(inode);
+
-+ finfo = au_fi(file);
-+ AuDebugOn(finfo->fi_hdir);
-+ AuRwDestroy(&finfo->fi_rwsem);
-+ au_cache_free_finfo(finfo);
-+}
++ sb = dentry->d_sb;
++ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
++ if (unlikely(err))
++ goto out;
+
-+void au_fi_init_once(void *_finfo)
-+{
-+ struct au_finfo *finfo = _finfo;
-+ static struct lock_class_key aufs_fi, aufs_fi_vm, aufs_fi_mmap;
++ err = 0; /* -EBADF; */ /* posix? */
++ if (unlikely(!(file->f_mode & FMODE_WRITE)))
++ goto out_si;
++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
++ if (unlikely(err))
++ goto out_si;
+
-+ au_rw_init(&finfo->fi_rwsem);
-+ au_rw_class(&finfo->fi_rwsem, &aufs_fi);
-+ mutex_init(&finfo->fi_vm_mtx);
-+ lockdep_set_class(&finfo->fi_vm_mtx, &aufs_fi_vm);
-+ mutex_init(&finfo->fi_mmap);
-+ lockdep_set_class(&finfo->fi_mmap, &aufs_fi_mmap);
++ err = au_ready_to_write(file, -1, &pin);
++ di_downgrade_lock(dentry, AuLock_IR);
++ if (unlikely(err))
++ goto out_unlock;
++ au_unpin(&pin);
++
++ err = -EINVAL;
++ h_file = au_hf_top(file);
++ if (h_file->f_op && h_file->f_op->fsync) {
++ struct mutex *h_mtx;
++
++ /*
++ * no filemap_fdatawrite() since aufs file has no its own
++ * mapping, but dir.
++ */
++ h_mtx = &h_file->f_dentry->d_inode->i_mutex;
++ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
++ err = h_file->f_op->fsync(h_file, datasync);
++ if (!err)
++ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
++ /*ignore*/
++ au_cpup_attr_timesizes(inode);
++ mutex_unlock(h_mtx);
++ }
++
++out_unlock:
++ di_read_unlock(dentry, AuLock_IR);
++ fi_write_unlock(file);
++out_si:
++ si_read_unlock(sb);
++out:
++ if (inode != file->f_mapping->host) {
++ mutex_unlock(&inode->i_mutex);
++ mutex_lock(&file->f_mapping->host->i_mutex);
++ }
++ return err;
+}
+
-+int au_finfo_init(struct file *file, struct au_fidir *fidir)
++/* no one supports this operation, currently */
++#if 0
++static int aufs_aio_fsync_nondir(struct kiocb *kio, int datasync)
+{
+ int err;
-+ struct au_finfo *finfo;
++ struct au_pin pin;
+ struct dentry *dentry;
++ struct inode *inode;
++ struct file *file, *h_file;
+
-+ err = -ENOMEM;
++ file = kio->ki_filp;
+ dentry = file->f_dentry;
-+ finfo = au_cache_alloc_finfo();
-+ if (unlikely(!finfo))
++ inode = dentry->d_inode;
++ au_mtx_and_read_lock(inode);
++
++ err = 0; /* -EBADF; */ /* posix? */
++ if (unlikely(!(file->f_mode & FMODE_WRITE)))
++ goto out;
++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
++ if (unlikely(err))
+ goto out;
+
-+ err = 0;
-+ au_nfiles_inc(dentry->d_sb);
-+ au_rw_write_lock(&finfo->fi_rwsem);
-+ finfo->fi_btop = -1;
-+ finfo->fi_hdir = fidir;
-+ atomic_set(&finfo->fi_generation, au_digen(dentry));
-+ /* smp_mb(); */ /* atomic_set */
++ err = au_ready_to_write(file, -1, &pin);
++ di_downgrade_lock(dentry, AuLock_IR);
++ if (unlikely(err))
++ goto out_unlock;
++ au_unpin(&pin);
+
-+ file->private_data = finfo;
++ err = -ENOSYS;
++ h_file = au_hf_top(file);
++ if (h_file->f_op && h_file->f_op->aio_fsync) {
++ struct dentry *h_d;
++ struct mutex *h_mtx;
++
++ h_d = h_file->f_dentry;
++ h_mtx = &h_d->d_inode->i_mutex;
++ if (!is_sync_kiocb(kio)) {
++ get_file(h_file);
++ fput(file);
++ }
++ kio->ki_filp = h_file;
++ err = h_file->f_op->aio_fsync(kio, datasync);
++ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
++ if (!err)
++ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
++ /*ignore*/
++ au_cpup_attr_timesizes(inode);
++ mutex_unlock(h_mtx);
++ }
+
++out_unlock:
++ di_read_unlock(dentry, AuLock_IR);
++ fi_write_unlock(file);
+out:
++ si_read_unlock(inode->sb);
++ mutex_unlock(&inode->i_mutex);
+ return err;
+}
-diff -urN a/fs/aufs/f_op.c b/fs/aufs/f_op.c
---- a/fs/aufs/f_op.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/f_op.c 2011-03-06 23:28:02.616413258 +0000
-@@ -0,0 +1,907 @@
-+/*
-+ * Copyright (C) 2005-2011 Junjiro R. Okajima
-+ *
-+ * This program, aufs is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or
-+ * (at your option) any later version.
-+ *
-+ * This program is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with this program; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+/*
-+ * file and vm operations
-+ */
-+
-+#include <linux/file.h>
-+#include <linux/fs_stack.h>
-+#include <linux/mman.h>
-+#include <linux/mm.h>
-+#include <linux/security.h>
-+#include "aufs.h"
++#endif
+
-+int au_do_open_nondir(struct file *file, int flags)
++static int aufs_fasync(int fd, struct file *file, int flag)
+{
+ int err;
-+ aufs_bindex_t bindex;
+ struct file *h_file;
+ struct dentry *dentry;
-+ struct au_finfo *finfo;
-+
-+ FiMustWriteLock(file);
++ struct super_block *sb;
+
+ dentry = file->f_dentry;
-+ err = au_d_alive(dentry);
++ sb = dentry->d_sb;
++ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
+ if (unlikely(err))
+ goto out;
+
-+ finfo = au_fi(file);
-+ memset(&finfo->fi_htop, 0, sizeof(finfo->fi_htop));
-+ finfo->fi_hvmop = NULL;
-+ bindex = au_dbstart(dentry);
-+ h_file = au_h_open(dentry, bindex, flags, file);
-+ if (IS_ERR(h_file))
-+ err = PTR_ERR(h_file);
-+ else {
-+ au_set_fbstart(file, bindex);
-+ au_set_h_fptr(file, bindex, h_file);
-+ au_update_figen(file);
-+ /* todo: necessary? */
-+ /* file->f_ra = h_file->f_ra; */
-+ }
++ h_file = au_hf_top(file);
++ if (h_file->f_op && h_file->f_op->fasync)
++ err = h_file->f_op->fasync(fd, h_file, flag);
++
++ di_read_unlock(dentry, AuLock_IR);
++ fi_read_unlock(file);
+
+out:
++ si_read_unlock(sb);
+ return err;
+}
+
-+static int aufs_open_nondir(struct inode *inode __maybe_unused,
-+ struct file *file)
-+{
-+ int err;
-+ struct super_block *sb;
-+
-+ AuDbg("%.*s, f_ flags 0x%x, f_mode 0x%x\n",
-+ AuDLNPair(file->f_dentry), vfsub_file_flags(file),
-+ file->f_mode);
++/* ---------------------------------------------------------------------- */
+
-+ sb = file->f_dentry->d_sb;
-+ si_read_lock(sb, AuLock_FLUSH);
-+ err = au_do_open(file, au_do_open_nondir, /*fidir*/NULL);
-+ si_read_unlock(sb);
-+ return err;
++/* no one supports this operation, currently */
++#if 0
++static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset,
++ size_t len, loff_t *pos , int more)
++{
+}
++#endif
+
-+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file)
-+{
-+ struct au_finfo *finfo;
-+ aufs_bindex_t bindex;
++/* ---------------------------------------------------------------------- */
+
-+ finfo = au_fi(file);
-+ bindex = finfo->fi_btop;
-+ if (bindex >= 0) {
-+ /* remove me from sb->s_files */
-+ file_sb_list_del(file);
-+ au_set_h_fptr(file, bindex, NULL);
-+ }
++const struct file_operations aufs_file_fop = {
++ .owner = THIS_MODULE,
++ /*
++ * while generic_file_llseek/_unlocked() don't use BKL,
++ * don't use it since it operates file->f_mapping->host.
++ * in aufs, it may be a real file and may confuse users by UDBA.
++ */
++ /* .llseek = generic_file_llseek, */
++ .llseek = default_llseek,
+
-+ au_finfo_fin(file);
-+ return 0;
-+}
++ .read = aufs_read,
++ .write = aufs_write,
++ .aio_read = aufs_aio_read,
++ .aio_write = aufs_aio_write,
++#ifdef CONFIG_AUFS_POLL
++ .poll = aufs_poll,
++#endif
++ .unlocked_ioctl = aufs_ioctl_nondir,
++#ifdef CONFIG_COMPAT
++ .compat_ioctl = aufs_ioctl_nondir, /* same */
++#endif
++ .mmap = aufs_mmap,
++ .open = aufs_open_nondir,
++ .flush = aufs_flush_nondir,
++ .release = aufs_release_nondir,
++ .fsync = aufs_fsync_nondir,
++ /* .aio_fsync = aufs_aio_fsync_nondir, */
++ .fasync = aufs_fasync,
++ /* .sendpage = aufs_sendpage, */
++ .splice_write = aufs_splice_write,
++ .splice_read = aufs_splice_read,
++#if 0
++ .aio_splice_write = aufs_aio_splice_write,
++ .aio_splice_read = aufs_aio_splice_read
++#endif
++};
+--- a/fs/aufs/f_op_sp.c 1970-01-01 01:00:00.000000000 +0100
++++ b/fs/aufs/f_op_sp.c 2011-02-12 16:30:08.944127798 +0000
+@@ -0,0 +1,299 @@
++/*
++ * Copyright (C) 2005-2011 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++ */
+
-+/* ---------------------------------------------------------------------- */
++/*
++ * file operations for special files.
++ * while they exist in aufs virtually,
++ * their file I/O is handled out of aufs.
++ */
+
-+static int au_do_flush_nondir(struct file *file, fl_owner_t id)
++#include <linux/fs_stack.h>
++#include "aufs.h"
++
++static ssize_t aufs_aio_read_sp(struct kiocb *kio, const struct iovec *iov,
++ unsigned long nv, loff_t pos)
+{
-+ int err;
-+ struct file *h_file;
++ ssize_t err;
++ aufs_bindex_t bstart;
++ unsigned char wbr;
++ struct file *file, *h_file;
++ struct super_block *sb;
+
-+ err = 0;
++ file = kio->ki_filp;
++ sb = file->f_dentry->d_sb;
++ si_read_lock(sb, AuLock_FLUSH);
++ fi_read_lock(file);
++ bstart = au_fbstart(file);
+ h_file = au_hf_top(file);
-+ if (h_file)
-+ err = vfsub_flush(h_file, id);
++ fi_read_unlock(file);
++ wbr = !!au_br_writable(au_sbr(sb, bstart)->br_perm);
++ si_read_unlock(sb);
++
++ /* do not change the file in kio */
++ AuDebugOn(!h_file->f_op || !h_file->f_op->aio_read);
++ err = h_file->f_op->aio_read(kio, iov, nv, pos);
++ if (err > 0 && wbr)
++ file_accessed(h_file);
++
+ return err;
+}
+
-+static int aufs_flush_nondir(struct file *file, fl_owner_t id)
++static ssize_t aufs_aio_write_sp(struct kiocb *kio, const struct iovec *iov,
++ unsigned long nv, loff_t pos)
+{
-+ return au_do_flush(file, id, au_do_flush_nondir);
++ ssize_t err;
++ aufs_bindex_t bstart;
++ unsigned char wbr;
++ struct super_block *sb;
++ struct file *file, *h_file;
++
++ file = kio->ki_filp;
++ sb = file->f_dentry->d_sb;
++ si_read_lock(sb, AuLock_FLUSH);
++ fi_read_lock(file);
++ bstart = au_fbstart(file);
++ h_file = au_hf_top(file);
++ fi_read_unlock(file);
++ wbr = !!au_br_writable(au_sbr(sb, bstart)->br_perm);
++ si_read_unlock(sb);
++
++ /* do not change the file in kio */
++ AuDebugOn(!h_file->f_op || !h_file->f_op->aio_write);
++ err = h_file->f_op->aio_write(kio, iov, nv, pos);
++ if (err > 0 && wbr)
++ file_update_time(h_file);
++
++ return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
-+static ssize_t aufs_read(struct file *file, char __user *buf, size_t count,
-+ loff_t *ppos)
++static int aufs_release_sp(struct inode *inode, struct file *file)
+{
-+ ssize_t err;
-+ struct dentry *dentry;
++ int err;
+ struct file *h_file;
-+ struct super_block *sb;
-+
-+ dentry = file->f_dentry;
-+ sb = dentry->d_sb;
-+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
-+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
-+ if (unlikely(err))
-+ goto out;
+
++ fi_read_lock(file);
+ h_file = au_hf_top(file);
-+ err = vfsub_read_u(h_file, buf, count, ppos);
-+ /* todo: necessary? */
-+ /* file->f_ra = h_file->f_ra; */
-+ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
-+
-+ di_read_unlock(dentry, AuLock_IR);
+ fi_read_unlock(file);
-+out:
-+ si_read_unlock(sb);
++ /* close this fifo in aufs */
++ err = h_file->f_op->release(inode, file); /* ignore */
++ aufs_release_nondir(inode, file); /* ignore */
+ return err;
+}
+
-+/*
-+ * todo: very ugly
-+ * it locks both of i_mutex and si_rwsem for read in safe.
-+ * if the plink maintenance mode continues forever (that is the problem),
-+ * may loop forever.
-+ */
-+static void au_mtx_and_read_lock(struct inode *inode)
-+{
-+ int err;
-+ struct super_block *sb = inode->i_sb;
++/* ---------------------------------------------------------------------- */
+
-+ while (1) {
-+ mutex_lock(&inode->i_mutex);
-+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
-+ if (!err)
-+ break;
-+ mutex_unlock(&inode->i_mutex);
-+ si_read_lock(sb, AuLock_NOPLMW);
-+ si_read_unlock(sb);
++/* currently, support only FIFO */
++enum {
++ AuSp_FIFO, AuSp_FIFO_R, AuSp_FIFO_W, AuSp_FIFO_RW,
++ /* AuSp_SOCK, AuSp_CHR, AuSp_BLK, */
++ AuSp_Last
++};
++static int aufs_open_sp(struct inode *inode, struct file *file);
++static struct au_sp_fop {
++ int done;
++ struct file_operations fop; /* not 'const' */
++ spinlock_t spin;
++} au_sp_fop[AuSp_Last] = {
++ [AuSp_FIFO] = {
++ .fop = {
++ .owner = THIS_MODULE,
++ .open = aufs_open_sp
++ }
+ }
-+}
++};
+
-+static ssize_t aufs_write(struct file *file, const char __user *ubuf,
-+ size_t count, loff_t *ppos)
++static void au_init_fop_sp(struct file *file)
+{
-+ ssize_t err;
-+ struct au_pin pin;
-+ struct dentry *dentry;
-+ struct inode *inode;
++ struct au_sp_fop *p;
++ int i;
+ struct file *h_file;
-+ char __user *buf = (char __user *)ubuf;
-+
-+ dentry = file->f_dentry;
-+ inode = dentry->d_inode;
-+ au_mtx_and_read_lock(inode);
+
-+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
-+ if (unlikely(err))
-+ goto out;
++ p = au_sp_fop;
++ if (unlikely(!p->done)) {
++ /* initialize first time only */
++ static DEFINE_SPINLOCK(spin);
+
-+ err = au_ready_to_write(file, -1, &pin);
-+ di_downgrade_lock(dentry, AuLock_IR);
-+ if (unlikely(err))
-+ goto out_unlock;
++ spin_lock(&spin);
++ if (!p->done) {
++ BUILD_BUG_ON(sizeof(au_sp_fop)/sizeof(*au_sp_fop)
++ != AuSp_Last);
++ for (i = 0; i < AuSp_Last; i++)
++ spin_lock_init(&p[i].spin);
++ p->done = 1;
++ }
++ spin_unlock(&spin);
++ }
+
-+ h_file = au_hf_top(file);
-+ au_unpin(&pin);
-+ err = vfsub_write_u(h_file, buf, count, ppos);
-+ au_cpup_attr_timesizes(inode);
-+ inode->i_mode = h_file->f_dentry->d_inode->i_mode;
++ switch (file->f_mode & (FMODE_READ | FMODE_WRITE)) {
++ case FMODE_READ:
++ i = AuSp_FIFO_R;
++ break;
++ case FMODE_WRITE:
++ i = AuSp_FIFO_W;
++ break;
++ case FMODE_READ | FMODE_WRITE:
++ i = AuSp_FIFO_RW;
++ break;
++ default:
++ BUG();
++ }
+
-+out_unlock:
-+ di_read_unlock(dentry, AuLock_IR);
-+ fi_write_unlock(file);
-+out:
-+ si_read_unlock(inode->i_sb);
-+ mutex_unlock(&inode->i_mutex);
-+ return err;
++ p += i;
++ if (unlikely(!p->done)) {
++ /* initialize first time only */
++ h_file = au_hf_top(file);
++ spin_lock(&p->spin);
++ if (!p->done) {
++ p->fop = *h_file->f_op;
++ p->fop.owner = THIS_MODULE;
++ if (p->fop.aio_read)
++ p->fop.aio_read = aufs_aio_read_sp;
++ if (p->fop.aio_write)
++ p->fop.aio_write = aufs_aio_write_sp;
++ p->fop.release = aufs_release_sp;
++ p->done = 1;
++ }
++ spin_unlock(&p->spin);
++ }
++ file->f_op = &p->fop;
+}
+
-+static ssize_t au_do_aio(struct file *h_file, int rw, struct kiocb *kio,
-+ const struct iovec *iov, unsigned long nv, loff_t pos)
++static int au_cpup_sp(struct dentry *dentry)
+{
-+ ssize_t err;
-+ struct file *file;
-+ ssize_t (*func)(struct kiocb *, const struct iovec *, unsigned long,
-+ loff_t);
++ int err;
++ aufs_bindex_t bcpup;
++ struct au_pin pin;
++ struct au_wr_dir_args wr_dir_args = {
++ .force_btgt = -1,
++ .flags = 0
++ };
+
-+ err = security_file_permission(h_file, rw);
-+ if (unlikely(err))
++ AuDbg("%.*s\n", AuDLNPair(dentry));
++
++ di_read_unlock(dentry, AuLock_IR);
++ di_write_lock_child(dentry);
++ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
++ if (unlikely(err < 0))
+ goto out;
++ bcpup = err;
++ err = 0;
++ if (bcpup == au_dbstart(dentry))
++ goto out; /* success */
+
-+ err = -ENOSYS;
-+ func = NULL;
-+ if (rw == MAY_READ)
-+ func = h_file->f_op->aio_read;
-+ else if (rw == MAY_WRITE)
-+ func = h_file->f_op->aio_write;
-+ if (func) {
-+ file = kio->ki_filp;
-+ kio->ki_filp = h_file;
-+ err = func(kio, iov, nv, pos);
-+ kio->ki_filp = file;
-+ } else
-+ /* currently there is no such fs */
-+ WARN_ON_ONCE(1);
++ err = au_pin(&pin, dentry, bcpup, au_opt_udba(dentry->d_sb),
++ AuPin_MNT_WRITE);
++ if (!err) {
++ err = au_sio_cpup_simple(dentry, bcpup, -1, AuCpup_DTIME);
++ au_unpin(&pin);
++ }
+
+out:
++ di_downgrade_lock(dentry, AuLock_IR);
+ return err;
+}
+
-+static ssize_t aufs_aio_read(struct kiocb *kio, const struct iovec *iov,
-+ unsigned long nv, loff_t pos)
++static int au_do_open_sp(struct file *file, int flags)
+{
-+ ssize_t err;
-+ struct file *file, *h_file;
++ int err;
+ struct dentry *dentry;
+ struct super_block *sb;
++ struct file *h_file;
++ struct inode *h_inode;
+
-+ file = kio->ki_filp;
+ dentry = file->f_dentry;
-+ sb = dentry->d_sb;
-+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
-+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
-+ if (unlikely(err))
-+ goto out;
-+
-+ h_file = au_hf_top(file);
-+ err = au_do_aio(h_file, MAY_READ, kio, iov, nv, pos);
-+ /* todo: necessary? */
-+ /* file->f_ra = h_file->f_ra; */
-+ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
-+ di_read_unlock(dentry, AuLock_IR);
-+ fi_read_unlock(file);
-+
-+out:
-+ si_read_unlock(sb);
-+ return err;
-+}
-+
-+static ssize_t aufs_aio_write(struct kiocb *kio, const struct iovec *iov,
-+ unsigned long nv, loff_t pos)
-+{
-+ ssize_t err;
-+ struct au_pin pin;
-+ struct dentry *dentry;
-+ struct inode *inode;
-+ struct file *file, *h_file;
++ AuDbg("%.*s\n", AuDLNPair(dentry));
+
-+ file = kio->ki_filp;
-+ dentry = file->f_dentry;
-+ inode = dentry->d_inode;
-+ au_mtx_and_read_lock(inode);
++ /*
++ * try copying-up.
++ * operate on the ro branch is not an error.
++ */
++ au_cpup_sp(dentry); /* ignore */
+
-+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
++ /* prepare h_file */
++ err = au_do_open_nondir(file, vfsub_file_flags(file));
+ if (unlikely(err))
+ goto out;
+
-+ err = au_ready_to_write(file, -1, &pin);
-+ di_downgrade_lock(dentry, AuLock_IR);
-+ if (unlikely(err))
-+ goto out_unlock;
-+
-+ au_unpin(&pin);
++ sb = dentry->d_sb;
+ h_file = au_hf_top(file);
-+ err = au_do_aio(h_file, MAY_WRITE, kio, iov, nv, pos);
-+ au_cpup_attr_timesizes(inode);
-+ inode->i_mode = h_file->f_dentry->d_inode->i_mode;
-+
-+out_unlock:
++ h_inode = h_file->f_dentry->d_inode;
+ di_read_unlock(dentry, AuLock_IR);
+ fi_write_unlock(file);
++ si_read_unlock(sb);
++ /* open this fifo in aufs */
++ err = h_inode->i_fop->open(file->f_dentry->d_inode, file);
++ si_noflush_read_lock(sb);
++ fi_write_lock(file);
++ di_read_lock_child(dentry, AuLock_IR);
++ if (!err)
++ au_init_fop_sp(file);
++
+out:
-+ si_read_unlock(inode->i_sb);
-+ mutex_unlock(&inode->i_mutex);
+ return err;
+}
+
-+static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
-+ struct pipe_inode_info *pipe, size_t len,
-+ unsigned int flags)
++static int aufs_open_sp(struct inode *inode, struct file *file)
+{
-+ ssize_t err;
-+ struct file *h_file;
-+ struct dentry *dentry;
++ int err;
+ struct super_block *sb;
+
-+ dentry = file->f_dentry;
-+ sb = dentry->d_sb;
-+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
-+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
-+ if (unlikely(err))
-+ goto out;
-+
-+ err = -EINVAL;
-+ h_file = au_hf_top(file);
-+ if (au_test_loopback_kthread()) {
-+ file->f_mapping = h_file->f_mapping;
-+ smp_mb(); /* unnecessary? */
-+ }
-+ err = vfsub_splice_to(h_file, ppos, pipe, len, flags);
-+ /* todo: necessasry? */
-+ /* file->f_ra = h_file->f_ra; */
-+ fsstack_copy_attr_atime(dentry->d_inode, h_file->f_dentry->d_inode);
-+
-+ di_read_unlock(dentry, AuLock_IR);
-+ fi_read_unlock(file);
-+
-+out:
++ sb = file->f_dentry->d_sb;
++ si_read_lock(sb, AuLock_FLUSH);
++ err = au_do_open(file, au_do_open_sp, /*fidir*/NULL);
+ si_read_unlock(sb);
+ return err;
+}
+
-+static ssize_t
-+aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos,
-+ size_t len, unsigned int flags)
++/* ---------------------------------------------------------------------- */
++
++void au_init_special_fop(struct inode *inode, umode_t mode, dev_t rdev)
+{
-+ ssize_t err;
-+ struct au_pin pin;
-+ struct dentry *dentry;
-+ struct inode *inode;
-+ struct file *h_file;
++ init_special_inode(inode, mode, rdev);
+
-+ dentry = file->f_dentry;
-+ inode = dentry->d_inode;
-+ au_mtx_and_read_lock(inode);
-+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
-+ if (unlikely(err))
-+ goto out;
++ switch (mode & S_IFMT) {
++ case S_IFIFO:
++ inode->i_fop = &au_sp_fop[AuSp_FIFO].fop;
++ /*FALLTHROUGH*/
++ case S_IFCHR:
++ case S_IFBLK:
++ case S_IFSOCK:
++ break;
++ default:
++ AuDebugOn(1);
++ }
++}
+
-+ err = au_ready_to_write(file, -1, &pin);
-+ di_downgrade_lock(dentry, AuLock_IR);
-+ if (unlikely(err))
-+ goto out_unlock;
++int au_special_file(umode_t mode)
++{
++ int ret;
+
-+ h_file = au_hf_top(file);
-+ au_unpin(&pin);
-+ err = vfsub_splice_from(pipe, h_file, ppos, len, flags);
-+ au_cpup_attr_timesizes(inode);
-+ inode->i_mode = h_file->f_dentry->d_inode->i_mode;
++ ret = 0;
++ switch (mode & S_IFMT) {
++ case S_IFIFO:
++#if 0
++ case S_IFCHR:
++ case S_IFBLK:
++ case S_IFSOCK:
++#endif
++ ret = 1;
++ }
+
-+out_unlock:
-+ di_read_unlock(dentry, AuLock_IR);
-+ fi_write_unlock(file);
-+out:
-+ si_read_unlock(inode->i_sb);
-+ mutex_unlock(&inode->i_mutex);
-+ return err;
++ return ret;
+}
+--- a/fs/aufs/file.c 1970-01-01 01:00:00.000000000 +0100
++++ b/fs/aufs/file.c 2011-06-03 06:08:42.681958206 +0100
+@@ -0,0 +1,676 @@
++/*
++ * Copyright (C) 2005-2011 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++ */
+
-+/* ---------------------------------------------------------------------- */
++/*
++ * handling file/dir, and address_space operation
++ */
+
-+static struct file *au_safe_file(struct vm_area_struct *vma)
-+{
-+ struct file *file;
++#include <linux/file.h>
++#include <linux/fsnotify.h>
++#include <linux/namei.h>
++#include <linux/pagemap.h>
++#include "aufs.h"
+
-+ file = vma->vm_file;
-+ if (au_fi(file) && au_test_aufs(file->f_dentry->d_sb))
-+ return file;
-+ return NULL;
++/* drop flags for writing */
++unsigned int au_file_roflags(unsigned int flags)
++{
++ flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC);
++ flags |= O_RDONLY | O_NOATIME;
++ return flags;
+}
+
-+static void au_reset_file(struct vm_area_struct *vma, struct file *file)
++/* common functions to regular file and dir */
++struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
++ struct file *file)
+{
-+ vma->vm_file = file;
-+ /* smp_mb(); */ /* flush vm_file */
++ struct file *h_file;
++ struct dentry *h_dentry;
++ struct inode *h_inode;
++ struct super_block *sb;
++ struct au_branch *br;
++ struct path h_path;
++ int err, exec_flag;
++
++ /* a race condition can happen between open and unlink/rmdir */
++ h_file = ERR_PTR(-ENOENT);
++ h_dentry = au_h_dptr(dentry, bindex);
++ if (au_test_nfsd() && !h_dentry)
++ goto out;
++ h_inode = h_dentry->d_inode;
++ if (au_test_nfsd() && !h_inode)
++ goto out;
++ spin_lock(&h_dentry->d_lock);
++ err = (!d_unhashed(dentry) && d_unlinked(h_dentry))
++ || !h_inode
++ /* || !dentry->d_inode->i_nlink */
++ ;
++ spin_unlock(&h_dentry->d_lock);
++ if (unlikely(err))
++ goto out;
++
++ sb = dentry->d_sb;
++ br = au_sbr(sb, bindex);
++ h_file = ERR_PTR(-EACCES);
++ exec_flag = flags & __FMODE_EXEC;
++ if (exec_flag && (br->br_mnt->mnt_flags & MNT_NOEXEC))
++ goto out;
++
++ /* drop flags for writing */
++ if (au_test_ro(sb, bindex, dentry->d_inode))
++ flags = au_file_roflags(flags);
++ flags &= ~O_CREAT;
++ atomic_inc(&br->br_count);
++ h_path.dentry = h_dentry;
++ h_path.mnt = br->br_mnt;
++ if (!au_special_file(h_inode->i_mode))
++ h_file = vfsub_dentry_open(&h_path, flags);
++ else {
++ /* this block depends upon the configuration */
++ di_read_unlock(dentry, AuLock_IR);
++ fi_write_unlock(file);
++ si_read_unlock(sb);
++ h_file = vfsub_dentry_open(&h_path, flags);
++ si_noflush_read_lock(sb);
++ fi_write_lock(file);
++ di_read_lock_child(dentry, AuLock_IR);
++ }
++ if (IS_ERR(h_file))
++ goto out_br;
++
++ if (exec_flag) {
++ err = deny_write_access(h_file);
++ if (unlikely(err)) {
++ fput(h_file);
++ h_file = ERR_PTR(err);
++ goto out_br;
++ }
++ }
++ fsnotify_open(h_file);
++ goto out; /* success */
++
++out_br:
++ atomic_dec(&br->br_count);
++out:
++ return h_file;
+}
+
-+static int aufs_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
++int au_do_open(struct file *file, int (*open)(struct file *file, int flags),
++ struct au_fidir *fidir)
+{
+ int err;
-+ static DECLARE_WAIT_QUEUE_HEAD(wq);
-+ struct file *file, *h_file;
-+ struct au_finfo *finfo;
++ struct dentry *dentry;
+
-+ /* todo: non-robr mode, user vm_file as it is? */
-+ wait_event(wq, (file = au_safe_file(vma)));
++ err = au_finfo_init(file, fidir);
++ if (unlikely(err))
++ goto out;
+
-+ /* do not revalidate, no si lock */
-+ finfo = au_fi(file);
-+ AuDebugOn(finfo->fi_hdir);
-+ h_file = finfo->fi_htop.hf_file;
-+ AuDebugOn(!h_file || !finfo->fi_hvmop);
++ dentry = file->f_dentry;
++ di_read_lock_child(dentry, AuLock_IR);
++ err = open(file, vfsub_file_flags(file));
++ di_read_unlock(dentry, AuLock_IR);
+
-+ mutex_lock(&finfo->fi_vm_mtx);
-+ vma->vm_file = h_file;
-+ err = finfo->fi_hvmop->fault(vma, vmf);
-+ /* todo: necessary? */
-+ /* file->f_ra = h_file->f_ra; */
-+ au_reset_file(vma, file);
-+ mutex_unlock(&finfo->fi_vm_mtx);
-+#if 0 /* def CONFIG_SMP */
-+ /* wake_up_nr(&wq, online_cpu - 1); */
-+ wake_up_all(&wq);
-+#else
-+ wake_up(&wq);
-+#endif
++ fi_write_unlock(file);
++ if (unlikely(err)) {
++ au_fi(file)->fi_hdir = NULL;
++ au_finfo_fin(file);
++ }
+
++out:
+ return err;
+}
+
-+static int aufs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
++int au_reopen_nondir(struct file *file)
+{
+ int err;
-+ static DECLARE_WAIT_QUEUE_HEAD(wq);
-+ struct file *file, *h_file;
-+ struct au_finfo *finfo;
++ aufs_bindex_t bstart;
++ struct dentry *dentry;
++ struct file *h_file, *h_file_tmp;
+
-+ wait_event(wq, (file = au_safe_file(vma)));
++ dentry = file->f_dentry;
++ AuDebugOn(au_special_file(dentry->d_inode->i_mode));
++ bstart = au_dbstart(dentry);
++ h_file_tmp = NULL;
++ if (au_fbstart(file) == bstart) {
++ h_file = au_hf_top(file);
++ if (file->f_mode == h_file->f_mode)
++ return 0; /* success */
++ h_file_tmp = h_file;
++ get_file(h_file_tmp);
++ au_set_h_fptr(file, bstart, NULL);
++ }
++ AuDebugOn(au_fi(file)->fi_hdir);
++ AuDebugOn(au_fbstart(file) < bstart);
+
-+ finfo = au_fi(file);
-+ AuDebugOn(finfo->fi_hdir);
-+ h_file = finfo->fi_htop.hf_file;
-+ AuDebugOn(!h_file || !finfo->fi_hvmop);
++ h_file = au_h_open(dentry, bstart, vfsub_file_flags(file) & ~O_TRUNC,
++ file);
++ err = PTR_ERR(h_file);
++ if (IS_ERR(h_file))
++ goto out; /* todo: close all? */
+
-+ mutex_lock(&finfo->fi_vm_mtx);
-+ vma->vm_file = h_file;
-+ err = finfo->fi_hvmop->page_mkwrite(vma, vmf);
-+ au_reset_file(vma, file);
-+ mutex_unlock(&finfo->fi_vm_mtx);
-+ wake_up(&wq);
++ err = 0;
++ au_set_fbstart(file, bstart);
++ au_set_h_fptr(file, bstart, h_file);
++ au_update_figen(file);
++ /* todo: necessary? */
++ /* file->f_ra = h_file->f_ra; */
+
++out:
++ if (h_file_tmp)
++ fput(h_file_tmp);
+ return err;
+}
+
-+static void aufs_vm_close(struct vm_area_struct *vma)
++/* ---------------------------------------------------------------------- */
++
++static int au_reopen_wh(struct file *file, aufs_bindex_t btgt,
++ struct dentry *hi_wh)
+{
-+ static DECLARE_WAIT_QUEUE_HEAD(wq);
-+ struct file *file, *h_file;
-+ struct au_finfo *finfo;
++ int err;
++ aufs_bindex_t bstart;
++ struct au_dinfo *dinfo;
++ struct dentry *h_dentry;
++ struct au_hdentry *hdp;
+
-+ wait_event(wq, (file = au_safe_file(vma)));
++ dinfo = au_di(file->f_dentry);
++ AuRwMustWriteLock(&dinfo->di_rwsem);
+
-+ finfo = au_fi(file);
-+ AuDebugOn(finfo->fi_hdir);
-+ h_file = finfo->fi_htop.hf_file;
-+ AuDebugOn(!h_file || !finfo->fi_hvmop);
++ bstart = dinfo->di_bstart;
++ dinfo->di_bstart = btgt;
++ hdp = dinfo->di_hdentry;
++ h_dentry = hdp[0 + btgt].hd_dentry;
++ hdp[0 + btgt].hd_dentry = hi_wh;
++ err = au_reopen_nondir(file);
++ hdp[0 + btgt].hd_dentry = h_dentry;
++ dinfo->di_bstart = bstart;
+
-+ mutex_lock(&finfo->fi_vm_mtx);
-+ vma->vm_file = h_file;
-+ finfo->fi_hvmop->close(vma);
-+ au_reset_file(vma, file);
-+ mutex_unlock(&finfo->fi_vm_mtx);
-+ wake_up(&wq);
++ return err;
+}
+
-+const struct vm_operations_struct aufs_vm_ops = {
-+ .close = aufs_vm_close,
-+ .fault = aufs_fault,
-+ .page_mkwrite = aufs_page_mkwrite
-+};
-+
-+/* ---------------------------------------------------------------------- */
-+
-+/* cf. linux/include/linux/mman.h: calc_vm_prot_bits() */
-+#define AuConv_VM_PROT(f, b) _calc_vm_trans(f, VM_##b, PROT_##b)
-+
-+static unsigned long au_arch_prot_conv(unsigned long flags)
++static int au_ready_to_write_wh(struct file *file, loff_t len,
++ aufs_bindex_t bcpup)
+{
-+ /* currently ppc64 only */
-+#ifdef CONFIG_PPC64
-+ /* cf. linux/arch/powerpc/include/asm/mman.h */
-+ AuDebugOn(arch_calc_vm_prot_bits(-1) != VM_SAO);
-+ return AuConv_VM_PROT(flags, SAO);
-+#else
-+ AuDebugOn(arch_calc_vm_prot_bits(-1));
-+ return 0;
-+#endif
-+}
++ int err;
++ struct inode *inode, *h_inode;
++ struct dentry *dentry, *h_dentry, *hi_wh;
+
-+static unsigned long au_prot_conv(unsigned long flags)
-+{
-+ return AuConv_VM_PROT(flags, READ)
-+ | AuConv_VM_PROT(flags, WRITE)
-+ | AuConv_VM_PROT(flags, EXEC)
-+ | au_arch_prot_conv(flags);
-+}
++ dentry = file->f_dentry;
++ au_update_dbstart(dentry);
++ inode = dentry->d_inode;
++ h_inode = NULL;
++ if (au_dbstart(dentry) <= bcpup && au_dbend(dentry) >= bcpup) {
++ h_dentry = au_h_dptr(dentry, bcpup);
++ if (h_dentry)
++ h_inode = h_dentry->d_inode;
++ }
++ hi_wh = au_hi_wh(inode, bcpup);
++ if (!hi_wh && !h_inode)
++ err = au_sio_cpup_wh(dentry, bcpup, len, file);
++ else
++ /* already copied-up after unlink */
++ err = au_reopen_wh(file, bcpup, hi_wh);
+
-+/* cf. linux/include/linux/mman.h: calc_vm_flag_bits() */
-+#define AuConv_VM_MAP(f, b) _calc_vm_trans(f, VM_##b, MAP_##b)
++ if (!err
++ && inode->i_nlink > 1
++ && au_opt_test(au_mntflags(dentry->d_sb), PLINK))
++ au_plink_append(inode, bcpup, au_h_dptr(dentry, bcpup));
+
-+static unsigned long au_flag_conv(unsigned long flags)
-+{
-+ return AuConv_VM_MAP(flags, GROWSDOWN)
-+ | AuConv_VM_MAP(flags, DENYWRITE)
-+ | AuConv_VM_MAP(flags, EXECUTABLE)
-+ | AuConv_VM_MAP(flags, LOCKED);
++ return err;
+}
+
-+static struct vm_operations_struct *
-+au_hvmop(struct file *h_file, struct vm_area_struct *vma, unsigned long *flags)
++/*
++ * prepare the @file for writing.
++ */
++int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin)
+{
-+ struct vm_operations_struct *h_vmop;
-+ unsigned long prot;
+ int err;
++ aufs_bindex_t bstart, bcpup, dbstart;
++ struct dentry *dentry, *parent, *h_dentry;
++ struct inode *h_inode, *inode;
++ struct super_block *sb;
++ struct file *h_file;
+
-+ h_vmop = ERR_PTR(-ENODEV);
-+ if (!h_file->f_op || !h_file->f_op->mmap)
++ dentry = file->f_dentry;
++ sb = dentry->d_sb;
++ inode = dentry->d_inode;
++ AuDebugOn(au_special_file(inode->i_mode));
++ bstart = au_fbstart(file);
++ err = au_test_ro(sb, bstart, inode);
++ if (!err && (au_hf_top(file)->f_mode & FMODE_WRITE)) {
++ err = au_pin(pin, dentry, bstart, AuOpt_UDBA_NONE, /*flags*/0);
+ goto out;
++ }
+
-+ prot = au_prot_conv(vma->vm_flags);
-+ err = security_file_mmap(h_file, /*reqprot*/prot, prot,
-+ au_flag_conv(vma->vm_flags), vma->vm_start, 0);
-+ h_vmop = ERR_PTR(err);
-+ if (unlikely(err))
-+ goto out;
++ /* need to cpup or reopen */
++ parent = dget_parent(dentry);
++ di_write_lock_parent(parent);
++ err = AuWbrCopyup(au_sbi(sb), dentry);
++ bcpup = err;
++ if (unlikely(err < 0))
++ goto out_dgrade;
++ err = 0;
+
-+ err = h_file->f_op->mmap(h_file, vma);
-+ h_vmop = ERR_PTR(err);
++ if (!d_unhashed(dentry) && !au_h_dptr(parent, bcpup)) {
++ err = au_cpup_dirs(dentry, bcpup);
++ if (unlikely(err))
++ goto out_dgrade;
++ }
++
++ err = au_pin(pin, dentry, bcpup, AuOpt_UDBA_NONE,
++ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
+ if (unlikely(err))
-+ goto out;
++ goto out_dgrade;
+
-+ /* oops, it became 'const' */
-+ h_vmop = (struct vm_operations_struct *)vma->vm_ops;
-+ *flags = vma->vm_flags;
-+ err = do_munmap(current->mm, vma->vm_start,
-+ vma->vm_end - vma->vm_start);
-+ if (unlikely(err)) {
-+ AuIOErr("failed internal unmapping %.*s, %d\n",
-+ AuDLNPair(h_file->f_dentry), err);
-+ h_vmop = ERR_PTR(-EIO);
++ h_dentry = au_hf_top(file)->f_dentry;
++ h_inode = h_dentry->d_inode;
++ dbstart = au_dbstart(dentry);
++ if (dbstart <= bcpup) {
++ h_dentry = au_h_dptr(dentry, bcpup);
++ AuDebugOn(!h_dentry);
++ h_inode = h_dentry->d_inode;
++ AuDebugOn(!h_inode);
++ bstart = bcpup;
+ }
+
-+out:
-+ return h_vmop;
-+}
-+
-+/*
-+ * This is another ugly approach to keep the lock order, particularly
-+ * mm->mmap_sem and aufs rwsem. The previous approach was reverted and you can
-+ * find it in git-log, if you want.
-+ *
-+ * native readdir: i_mutex, copy_to_user, mmap_sem
-+ * aufs readdir: i_mutex, rwsem, nested-i_mutex, copy_to_user, mmap_sem
-+ *
-+ * Before aufs_mmap() mmap_sem is acquired already, but aufs_mmap() has to
-+ * acquire aufs rwsem. It introduces a circular locking dependency.
-+ * To address this problem, aufs_mmap() delegates the part which requires aufs
-+ * rwsem to its internal workqueue.
-+ */
++ if (dbstart <= bcpup /* just reopen */
++ || !d_unhashed(dentry) /* copyup and reopen */
++ ) {
++ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
++ h_file = au_h_open_pre(dentry, bstart);
++ if (IS_ERR(h_file)) {
++ err = PTR_ERR(h_file);
++ h_file = NULL;
++ } else {
++ di_downgrade_lock(parent, AuLock_IR);
++ if (dbstart > bcpup)
++ err = au_sio_cpup_simple(dentry, bcpup, len,
++ AuCpup_DTIME);
++ if (!err)
++ err = au_reopen_nondir(file);
++ }
++ mutex_unlock(&h_inode->i_mutex);
++ au_h_open_post(dentry, bstart, h_file);
++ } else { /* copyup as wh and reopen */
++ /*
++ * since writable hfsplus branch is not supported,
++ * h_open_pre/post() are unnecessary.
++ */
++ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
++ err = au_ready_to_write_wh(file, len, bcpup);
++ di_downgrade_lock(parent, AuLock_IR);
++ mutex_unlock(&h_inode->i_mutex);
++ }
+
-+/* very ugly approach */
-+#include "mtx.h"
++ if (!err) {
++ au_pin_set_parent_lflag(pin, /*lflag*/0);
++ goto out_dput; /* success */
++ }
++ au_unpin(pin);
++ goto out_unlock;
+
-+struct au_mmap_pre_args {
-+ /* input */
-+ struct file *file;
-+ struct vm_area_struct *vma;
++out_dgrade:
++ di_downgrade_lock(parent, AuLock_IR);
++out_unlock:
++ di_read_unlock(parent, AuLock_IR);
++out_dput:
++ dput(parent);
++out:
++ return err;
++}
+
-+ /* output */
-+ int *errp;
-+ struct file *h_file;
-+ struct au_branch *br;
-+ int mmapped;
-+};
++/* ---------------------------------------------------------------------- */
+
-+static int au_mmap_pre(struct file *file, struct vm_area_struct *vma,
-+ struct file **h_file, struct au_branch **br,
-+ int *mmapped)
++int au_do_flush(struct file *file, fl_owner_t id,
++ int (*flush)(struct file *file, fl_owner_t id))
+{
+ int err;
-+ aufs_bindex_t bstart;
-+ const unsigned char wlock
-+ = !!(file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED);
+ struct dentry *dentry;
+ struct super_block *sb;
++ struct inode *inode;
+
+ dentry = file->f_dentry;
+ sb = dentry->d_sb;
-+ si_read_lock(sb, AuLock_NOPLMW);
-+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
-+ if (unlikely(err))
-+ goto out;
-+
-+ *mmapped = !!au_test_mmapped(file);
-+ if (wlock) {
-+ struct au_pin pin;
++ inode = dentry->d_inode;
++ si_noflush_read_lock(sb);
++ fi_read_lock(file);
++ ii_read_lock_child(inode);
+
-+ err = au_ready_to_write(file, -1, &pin);
-+ di_write_unlock(dentry);
-+ if (unlikely(err))
-+ goto out_unlock;
-+ au_unpin(&pin);
-+ } else
-+ di_write_unlock(dentry);
-+ bstart = au_fbstart(file);
-+ *br = au_sbr(sb, bstart);
-+ *h_file = au_hf_top(file);
-+ get_file(*h_file);
-+ au_fi_mmap_lock(file);
++ err = flush(file, id);
++ au_cpup_attr_timesizes(inode);
+
-+out_unlock:
-+ fi_write_unlock(file);
-+out:
++ ii_read_unlock(inode);
++ fi_read_unlock(file);
+ si_read_unlock(sb);
+ return err;
+}
+
-+static void au_call_mmap_pre(void *args)
-+{
-+ struct au_mmap_pre_args *a = args;
-+ *a->errp = au_mmap_pre(a->file, a->vma, &a->h_file, &a->br,
-+ &a->mmapped);
-+}
++/* ---------------------------------------------------------------------- */
+
-+static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
++static int au_file_refresh_by_inode(struct file *file, int *need_reopen)
+{
-+ int err, wkq_err;
-+ unsigned long h_vmflags;
++ int err;
++ aufs_bindex_t bstart;
++ struct au_pin pin;
+ struct au_finfo *finfo;
-+ struct dentry *h_dentry;
-+ struct vm_operations_struct *h_vmop, *vmop;
-+ struct au_mmap_pre_args args = {
-+ .file = file,
-+ .vma = vma,
-+ .errp = &err
-+ };
++ struct dentry *dentry, *parent, *hi_wh;
++ struct inode *inode;
++ struct super_block *sb;
+
-+ wkq_err = au_wkq_wait_pre(au_call_mmap_pre, &args);
-+ if (unlikely(wkq_err))
-+ err = wkq_err;
-+ if (unlikely(err))
-+ goto out;
++ FiMustWriteLock(file);
++
++ err = 0;
+ finfo = au_fi(file);
-+ mutex_set_owner(&finfo->fi_mmap);
++ dentry = file->f_dentry;
++ sb = dentry->d_sb;
++ inode = dentry->d_inode;
++ bstart = au_ibstart(inode);
++ if (bstart == finfo->fi_btop || IS_ROOT(dentry))
++ goto out;
+
-+ h_dentry = args.h_file->f_dentry;
-+ if (!args.mmapped && au_test_fs_bad_mapping(h_dentry->d_sb)) {
-+ /*
-+ * by this assignment, f_mapping will differs from aufs inode
-+ * i_mapping.
-+ * if someone else mixes the use of f_dentry->d_inode and
-+ * f_mapping->host, then a problem may arise.
-+ */
-+ file->f_mapping = args.h_file->f_mapping;
++ parent = dget_parent(dentry);
++ if (au_test_ro(sb, bstart, inode)) {
++ di_read_lock_parent(parent, !AuLock_IR);
++ err = AuWbrCopyup(au_sbi(sb), dentry);
++ bstart = err;
++ di_read_unlock(parent, !AuLock_IR);
++ if (unlikely(err < 0))
++ goto out_parent;
++ err = 0;
+ }
+
-+ /* always try this internal mmap to get vma flags */
-+ h_vmflags = 0; /* gcc warning */
-+ h_vmop = au_hvmop(args.h_file, vma, &h_vmflags);
-+ err = PTR_ERR(h_vmop);
-+ if (IS_ERR(h_vmop))
-+ goto out_unlock;
-+ AuDebugOn(args.mmapped && h_vmop != finfo->fi_hvmop);
-+
-+ vmop = (void *)au_dy_vmop(file, args.br, h_vmop);
-+ err = PTR_ERR(vmop);
-+ if (IS_ERR(vmop))
-+ goto out_unlock;
-+
-+ /*
-+ * unnecessary to handle MAP_DENYWRITE and deny_write_access()?
-+ * currently MAP_DENYWRITE from userspace is ignored, but elf loader
-+ * sets it. when FMODE_EXEC is set (by open_exec() or sys_uselib()),
-+ * both of the aufs file and the lower file is deny_write_access()-ed.
-+ * finally I hope we can skip handlling MAP_DENYWRITE here.
-+ */
-+ err = generic_file_mmap(file, vma);
-+ if (unlikely(err))
-+ goto out_unlock;
-+
-+ vma->vm_ops = vmop;
-+ vma->vm_flags = h_vmflags;
-+ if (!args.mmapped)
-+ finfo->fi_hvmop = h_vmop;
++ di_read_lock_parent(parent, AuLock_IR);
++ hi_wh = au_hi_wh(inode, bstart);
++ if (!S_ISDIR(inode->i_mode)
++ && au_opt_test(au_mntflags(sb), PLINK)
++ && au_plink_test(inode)
++ && !d_unhashed(dentry)) {
++ err = au_test_and_cpup_dirs(dentry, bstart);
++ if (unlikely(err))
++ goto out_unlock;
+
-+ vfsub_file_accessed(args.h_file);
-+ /* update without lock, I don't think it a problem */
-+ fsstack_copy_attr_atime(file->f_dentry->d_inode, h_dentry->d_inode);
++ /* always superio. */
++ err = au_pin(&pin, dentry, bstart, AuOpt_UDBA_NONE,
++ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
++ if (!err)
++ err = au_sio_cpup_simple(dentry, bstart, -1,
++ AuCpup_DTIME);
++ au_unpin(&pin);
++ } else if (hi_wh) {
++ /* already copied-up after unlink */
++ err = au_reopen_wh(file, bstart, hi_wh);
++ *need_reopen = 0;
++ }
+
+out_unlock:
-+ au_fi_mmap_unlock(file);
-+ fput(args.h_file);
++ di_read_unlock(parent, AuLock_IR);
++out_parent:
++ dput(parent);
+out:
+ return err;
+}
+
-+/* ---------------------------------------------------------------------- */
-+
-+static int aufs_fsync_nondir(struct file *file, int datasync)
++static void au_do_refresh_dir(struct file *file)
+{
-+ int err;
-+ struct au_pin pin;
-+ struct dentry *dentry;
-+ struct inode *inode;
-+ struct file *h_file;
++ aufs_bindex_t bindex, bend, new_bindex, brid;
++ struct au_hfile *p, tmp, *q;
++ struct au_finfo *finfo;
+ struct super_block *sb;
++ struct au_fidir *fidir;
+
-+ dentry = file->f_dentry;
-+ inode = dentry->d_inode;
-+ IMustLock(file->f_mapping->host);
-+ if (inode != file->f_mapping->host) {
-+ mutex_unlock(&file->f_mapping->host->i_mutex);
-+ mutex_lock(&inode->i_mutex);
-+ }
-+ IMustLock(inode);
-+
-+ sb = dentry->d_sb;
-+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
-+ if (unlikely(err))
-+ goto out;
-+
-+ err = 0; /* -EBADF; */ /* posix? */
-+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
-+ goto out_si;
-+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
-+ if (unlikely(err))
-+ goto out_si;
++ FiMustWriteLock(file);
+
-+ err = au_ready_to_write(file, -1, &pin);
-+ di_downgrade_lock(dentry, AuLock_IR);
-+ if (unlikely(err))
-+ goto out_unlock;
-+ au_unpin(&pin);
++ sb = file->f_dentry->d_sb;
++ finfo = au_fi(file);
++ fidir = finfo->fi_hdir;
++ AuDebugOn(!fidir);
++ p = fidir->fd_hfile + finfo->fi_btop;
++ brid = p->hf_br->br_id;
++ bend = fidir->fd_bbot;
++ for (bindex = finfo->fi_btop; bindex <= bend; bindex++, p++) {
++ if (!p->hf_file)
++ continue;
+
-+ err = -EINVAL;
-+ h_file = au_hf_top(file);
-+ if (h_file->f_op && h_file->f_op->fsync) {
-+ struct mutex *h_mtx;
++ new_bindex = au_br_index(sb, p->hf_br->br_id);
++ if (new_bindex == bindex)
++ continue;
++ if (new_bindex < 0) {
++ au_set_h_fptr(file, bindex, NULL);
++ continue;
++ }
+
-+ /*
-+ * no filemap_fdatawrite() since aufs file has no its own
-+ * mapping, but dir.
-+ */
-+ h_mtx = &h_file->f_dentry->d_inode->i_mutex;
-+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
-+ err = h_file->f_op->fsync(h_file, datasync);
-+ if (!err)
-+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
-+ /*ignore*/
-+ au_cpup_attr_timesizes(inode);
-+ mutex_unlock(h_mtx);
++ /* swap two lower inode, and loop again */
++ q = fidir->fd_hfile + new_bindex;
++ tmp = *q;
++ *q = *p;
++ *p = tmp;
++ if (tmp.hf_file) {
++ bindex--;
++ p--;
++ }
+ }
+
-+out_unlock:
-+ di_read_unlock(dentry, AuLock_IR);
-+ fi_write_unlock(file);
-+out_si:
-+ si_read_unlock(sb);
-+out:
-+ if (inode != file->f_mapping->host) {
-+ mutex_unlock(&inode->i_mutex);
-+ mutex_lock(&file->f_mapping->host->i_mutex);
++ p = fidir->fd_hfile;
++ if (!au_test_mmapped(file) && !d_unlinked(file->f_dentry)) {
++ bend = au_sbend(sb);
++ for (finfo->fi_btop = 0; finfo->fi_btop <= bend;
++ finfo->fi_btop++, p++)
++ if (p->hf_file) {
++ if (p->hf_file->f_dentry
++ && p->hf_file->f_dentry->d_inode)
++ break;
++ else
++ au_hfput(p, file);
++ }
++ } else {
++ bend = au_br_index(sb, brid);
++ for (finfo->fi_btop = 0; finfo->fi_btop < bend;
++ finfo->fi_btop++, p++)
++ if (p->hf_file)
++ au_hfput(p, file);
++ bend = au_sbend(sb);
+ }
-+ return err;
++
++ p = fidir->fd_hfile + bend;
++ for (fidir->fd_bbot = bend; fidir->fd_bbot >= finfo->fi_btop;
++ fidir->fd_bbot--, p--)
++ if (p->hf_file) {
++ if (p->hf_file->f_dentry
++ && p->hf_file->f_dentry->d_inode)
++ break;
++ else
++ au_hfput(p, file);
++ }
++ AuDebugOn(fidir->fd_bbot < finfo->fi_btop);
+}
+
-+/* no one supports this operation, currently */
-+#if 0
-+static int aufs_aio_fsync_nondir(struct kiocb *kio, int datasync)
++/*
++ * after branch manipulating, refresh the file.
++ */
++static int refresh_file(struct file *file, int (*reopen)(struct file *file))
+{
-+ int err;
-+ struct au_pin pin;
++ int err, need_reopen;
++ aufs_bindex_t bend, bindex;
+ struct dentry *dentry;
-+ struct inode *inode;
-+ struct file *file, *h_file;
++ struct au_finfo *finfo;
++ struct au_hfile *hfile;
+
-+ file = kio->ki_filp;
+ dentry = file->f_dentry;
-+ inode = dentry->d_inode;
-+ au_mtx_and_read_lock(inode);
++ finfo = au_fi(file);
++ if (!finfo->fi_hdir) {
++ hfile = &finfo->fi_htop;
++ AuDebugOn(!hfile->hf_file);
++ bindex = au_br_index(dentry->d_sb, hfile->hf_br->br_id);
++ AuDebugOn(bindex < 0);
++ if (bindex != finfo->fi_btop)
++ au_set_fbstart(file, bindex);
++ } else {
++ err = au_fidir_realloc(finfo, au_sbend(dentry->d_sb) + 1);
++ if (unlikely(err))
++ goto out;
++ au_do_refresh_dir(file);
++ }
+
-+ err = 0; /* -EBADF; */ /* posix? */
-+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
-+ goto out;
-+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
-+ if (unlikely(err))
-+ goto out;
-+
-+ err = au_ready_to_write(file, -1, &pin);
-+ di_downgrade_lock(dentry, AuLock_IR);
-+ if (unlikely(err))
-+ goto out_unlock;
-+ au_unpin(&pin);
-+
-+ err = -ENOSYS;
-+ h_file = au_hf_top(file);
-+ if (h_file->f_op && h_file->f_op->aio_fsync) {
-+ struct dentry *h_d;
-+ struct mutex *h_mtx;
++ err = 0;
++ need_reopen = 1;
++ if (!au_test_mmapped(file))
++ err = au_file_refresh_by_inode(file, &need_reopen);
++ if (!err && need_reopen && !d_unlinked(dentry))
++ err = reopen(file);
++ if (!err) {
++ au_update_figen(file);
++ goto out; /* success */
++ }
+
-+ h_d = h_file->f_dentry;
-+ h_mtx = &h_d->d_inode->i_mutex;
-+ if (!is_sync_kiocb(kio)) {
-+ get_file(h_file);
-+ fput(file);
-+ }
-+ kio->ki_filp = h_file;
-+ err = h_file->f_op->aio_fsync(kio, datasync);
-+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
-+ if (!err)
-+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
-+ /*ignore*/
-+ au_cpup_attr_timesizes(inode);
-+ mutex_unlock(h_mtx);
++ /* error, close all lower files */
++ if (finfo->fi_hdir) {
++ bend = au_fbend_dir(file);
++ for (bindex = au_fbstart(file); bindex <= bend; bindex++)
++ au_set_h_fptr(file, bindex, NULL);
+ }
+
-+out_unlock:
-+ di_read_unlock(dentry, AuLock_IR);
-+ fi_write_unlock(file);
+out:
-+ si_read_unlock(inode->sb);
-+ mutex_unlock(&inode->i_mutex);
+ return err;
+}
-+#endif
+
-+static int aufs_fasync(int fd, struct file *file, int flag)
++/* common function to regular file and dir */
++int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
++ int wlock)
+{
+ int err;
-+ struct file *h_file;
++ unsigned int sigen, figen;
++ aufs_bindex_t bstart;
++ unsigned char pseudo_link;
+ struct dentry *dentry;
-+ struct super_block *sb;
++ struct inode *inode;
+
++ err = 0;
+ dentry = file->f_dentry;
-+ sb = dentry->d_sb;
-+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
-+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
-+ if (unlikely(err))
-+ goto out;
++ inode = dentry->d_inode;
++ AuDebugOn(au_special_file(inode->i_mode));
++ sigen = au_sigen(dentry->d_sb);
++ fi_write_lock(file);
++ figen = au_figen(file);
++ di_write_lock_child(dentry);
++ bstart = au_dbstart(dentry);
++ pseudo_link = (bstart != au_ibstart(inode));
++ if (sigen == figen && !pseudo_link && au_fbstart(file) == bstart) {
++ if (!wlock) {
++ di_downgrade_lock(dentry, AuLock_IR);
++ fi_downgrade_lock(file);
++ }
++ goto out; /* success */
++ }
+
-+ h_file = au_hf_top(file);
-+ if (h_file->f_op && h_file->f_op->fasync)
-+ err = h_file->f_op->fasync(fd, h_file, flag);
++ AuDbg("sigen %d, figen %d\n", sigen, figen);
++ if (au_digen_test(dentry, sigen)) {
++ err = au_reval_dpath(dentry, sigen);
++ AuDebugOn(!err && au_digen_test(dentry, sigen));
++ }
+
-+ di_read_unlock(dentry, AuLock_IR);
-+ fi_read_unlock(file);
++ if (!err)
++ err = refresh_file(file, reopen);
++ if (!err) {
++ if (!wlock) {
++ di_downgrade_lock(dentry, AuLock_IR);
++ fi_downgrade_lock(file);
++ }
++ } else {
++ di_write_unlock(dentry);
++ fi_write_unlock(file);
++ }
+
+out:
-+ si_read_unlock(sb);
+ return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
-+/* no one supports this operation, currently */
-+#if 0
-+static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset,
-+ size_t len, loff_t *pos , int more)
++/* cf. aufs_nopage() */
++/* for madvise(2) */
++static int aufs_readpage(struct file *file __maybe_unused, struct page *page)
+{
++ unlock_page(page);
++ return 0;
+}
-+#endif
+
-+/* ---------------------------------------------------------------------- */
++/* it will never be called, but necessary to support O_DIRECT */
++static ssize_t aufs_direct_IO(int rw, struct kiocb *iocb,
++ const struct iovec *iov, loff_t offset,
++ unsigned long nr_segs)
++{ BUG(); return 0; }
+
-+const struct file_operations aufs_file_fop = {
-+ .owner = THIS_MODULE,
-+ /*
-+ * while generic_file_llseek/_unlocked() don't use BKL,
-+ * don't use it since it operates file->f_mapping->host.
-+ * in aufs, it may be a real file and may confuse users by UDBA.
-+ */
-+ /* .llseek = generic_file_llseek, */
-+ .llseek = default_llseek,
++/*
++ * it will never be called, but madvise and fadvise behaves differently
++ * when get_xip_mem is defined
++ */
++static int aufs_get_xip_mem(struct address_space *mapping, pgoff_t pgoff,
++ int create, void **kmem, unsigned long *pfn)
++{ BUG(); return 0; }
+
-+ .read = aufs_read,
-+ .write = aufs_write,
-+ .aio_read = aufs_aio_read,
-+ .aio_write = aufs_aio_write,
-+#ifdef CONFIG_AUFS_POLL
-+ .poll = aufs_poll,
-+#endif
-+ .unlocked_ioctl = aufs_ioctl_nondir,
-+#ifdef CONFIG_COMPAT
-+ .compat_ioctl = aufs_ioctl_nondir, /* same */
-+#endif
-+ .mmap = aufs_mmap,
-+ .open = aufs_open_nondir,
-+ .flush = aufs_flush_nondir,
-+ .release = aufs_release_nondir,
-+ .fsync = aufs_fsync_nondir,
-+ /* .aio_fsync = aufs_aio_fsync_nondir, */
-+ .fasync = aufs_fasync,
-+ /* .sendpage = aufs_sendpage, */
-+ .splice_write = aufs_splice_write,
-+ .splice_read = aufs_splice_read,
-+#if 0
-+ .aio_splice_write = aufs_aio_splice_write,
-+ .aio_splice_read = aufs_aio_splice_read
-+#endif
++/* they will never be called. */
++#ifdef CONFIG_AUFS_DEBUG
++static int aufs_write_begin(struct file *file, struct address_space *mapping,
++ loff_t pos, unsigned len, unsigned flags,
++ struct page **pagep, void **fsdata)
++{ AuUnsupport(); return 0; }
++static int aufs_write_end(struct file *file, struct address_space *mapping,
++ loff_t pos, unsigned len, unsigned copied,
++ struct page *page, void *fsdata)
++{ AuUnsupport(); return 0; }
++static int aufs_writepage(struct page *page, struct writeback_control *wbc)
++{ AuUnsupport(); return 0; }
++
++static int aufs_set_page_dirty(struct page *page)
++{ AuUnsupport(); return 0; }
++static void aufs_invalidatepage(struct page *page, unsigned long offset)
++{ AuUnsupport(); }
++static int aufs_releasepage(struct page *page, gfp_t gfp)
++{ AuUnsupport(); return 0; }
++static int aufs_migratepage(struct address_space *mapping, struct page *newpage,
++ struct page *page)
++{ AuUnsupport(); return 0; }
++static int aufs_launder_page(struct page *page)
++{ AuUnsupport(); return 0; }
++static int aufs_is_partially_uptodate(struct page *page,
++ read_descriptor_t *desc,
++ unsigned long from)
++{ AuUnsupport(); return 0; }
++static int aufs_error_remove_page(struct address_space *mapping,
++ struct page *page)
++{ AuUnsupport(); return 0; }
++#endif /* CONFIG_AUFS_DEBUG */
++
++const struct address_space_operations aufs_aop = {
++ .readpage = aufs_readpage,
++ .direct_IO = aufs_direct_IO,
++ .get_xip_mem = aufs_get_xip_mem,
++#ifdef CONFIG_AUFS_DEBUG
++ .writepage = aufs_writepage,
++ /* no writepages, because of writepage */
++ .set_page_dirty = aufs_set_page_dirty,
++ /* no readpages, because of readpage */
++ .write_begin = aufs_write_begin,
++ .write_end = aufs_write_end,
++ /* no bmap, no block device */
++ .invalidatepage = aufs_invalidatepage,
++ .releasepage = aufs_releasepage,
++ .migratepage = aufs_migratepage,
++ .launder_page = aufs_launder_page,
++ .is_partially_uptodate = aufs_is_partially_uptodate,
++ .error_remove_page = aufs_error_remove_page
++#endif /* CONFIG_AUFS_DEBUG */
+};
-diff -urN a/fs/aufs/f_op_sp.c b/fs/aufs/f_op_sp.c
---- a/fs/aufs/f_op_sp.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/f_op_sp.c 2011-03-06 23:28:02.620413138 +0000
-@@ -0,0 +1,299 @@
+--- a/fs/aufs/file.h 1970-01-01 01:00:00.000000000 +0100
++++ b/fs/aufs/file.h 2011-06-03 06:08:42.681958206 +0100
+@@ -0,0 +1,236 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
+ *
@@ -10389,289 +10344,385 @@
+ */
+
+/*
-+ * file operations for special files.
-+ * while they exist in aufs virtually,
-+ * their file I/O is handled out of aufs.
++ * file operations
+ */
+
-+#include <linux/fs_stack.h>
-+#include "aufs.h"
++#ifndef __AUFS_FILE_H__
++#define __AUFS_FILE_H__
+
-+static ssize_t aufs_aio_read_sp(struct kiocb *kio, const struct iovec *iov,
-+ unsigned long nv, loff_t pos)
-+{
-+ ssize_t err;
-+ aufs_bindex_t bstart;
-+ unsigned char wbr;
-+ struct file *file, *h_file;
-+ struct super_block *sb;
++#ifdef __KERNEL__
+
-+ file = kio->ki_filp;
-+ sb = file->f_dentry->d_sb;
-+ si_read_lock(sb, AuLock_FLUSH);
-+ fi_read_lock(file);
-+ bstart = au_fbstart(file);
-+ h_file = au_hf_top(file);
-+ fi_read_unlock(file);
-+ wbr = !!au_br_writable(au_sbr(sb, bstart)->br_perm);
-+ si_read_unlock(sb);
++#include <linux/fs.h>
++#include <linux/poll.h>
++#include <linux/aufs_type.h>
++#include "rwsem.h"
+
-+ /* do not change the file in kio */
-+ AuDebugOn(!h_file->f_op || !h_file->f_op->aio_read);
-+ err = h_file->f_op->aio_read(kio, iov, nv, pos);
-+ if (err > 0 && wbr)
-+ file_accessed(h_file);
++struct au_branch;
++struct au_hfile {
++ struct file *hf_file;
++ struct au_branch *hf_br;
++};
+
-+ return err;
-+}
++struct au_vdir;
++struct au_fidir {
++ aufs_bindex_t fd_bbot;
++ aufs_bindex_t fd_nent;
++ struct au_vdir *fd_vdir_cache;
++ struct au_hfile fd_hfile[];
++};
+
-+static ssize_t aufs_aio_write_sp(struct kiocb *kio, const struct iovec *iov,
-+ unsigned long nv, loff_t pos)
++static inline int au_fidir_sz(int nent)
+{
-+ ssize_t err;
-+ aufs_bindex_t bstart;
-+ unsigned char wbr;
-+ struct super_block *sb;
-+ struct file *file, *h_file;
++ AuDebugOn(nent < 0);
++ return sizeof(struct au_fidir) + sizeof(struct au_hfile) * nent;
++}
+
-+ file = kio->ki_filp;
-+ sb = file->f_dentry->d_sb;
-+ si_read_lock(sb, AuLock_FLUSH);
-+ fi_read_lock(file);
-+ bstart = au_fbstart(file);
-+ h_file = au_hf_top(file);
-+ fi_read_unlock(file);
-+ wbr = !!au_br_writable(au_sbr(sb, bstart)->br_perm);
-+ si_read_unlock(sb);
++struct au_finfo {
++ atomic_t fi_generation;
+
-+ /* do not change the file in kio */
-+ AuDebugOn(!h_file->f_op || !h_file->f_op->aio_write);
-+ err = h_file->f_op->aio_write(kio, iov, nv, pos);
-+ if (err > 0 && wbr)
-+ file_update_time(h_file);
++ struct au_rwsem fi_rwsem;
++ aufs_bindex_t fi_btop;
+
-+ return err;
-+}
++ /* do not union them */
++ struct { /* for non-dir */
++ struct au_hfile fi_htop;
++ struct vm_operations_struct *fi_hvmop;
++ struct mutex fi_vm_mtx;
++ struct mutex fi_mmap;
++ };
++ struct au_fidir *fi_hdir; /* for dir only */
++} ____cacheline_aligned_in_smp;
+
+/* ---------------------------------------------------------------------- */
+
-+static int aufs_release_sp(struct inode *inode, struct file *file)
-+{
-+ int err;
-+ struct file *h_file;
++/* file.c */
++extern const struct address_space_operations aufs_aop;
++unsigned int au_file_roflags(unsigned int flags);
++struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
++ struct file *file);
++int au_do_open(struct file *file, int (*open)(struct file *file, int flags),
++ struct au_fidir *fidir);
++int au_reopen_nondir(struct file *file);
++struct au_pin;
++int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin);
++int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
++ int wlock);
++int au_do_flush(struct file *file, fl_owner_t id,
++ int (*flush)(struct file *file, fl_owner_t id));
+
-+ fi_read_lock(file);
-+ h_file = au_hf_top(file);
-+ fi_read_unlock(file);
-+ /* close this fifo in aufs */
-+ err = h_file->f_op->release(inode, file); /* ignore */
-+ aufs_release_nondir(inode, file); /* ignore */
-+ return err;
++/* poll.c */
++#ifdef CONFIG_AUFS_POLL
++unsigned int aufs_poll(struct file *file, poll_table *wait);
++#endif
++
++#ifdef CONFIG_AUFS_BR_HFSPLUS
++/* hfsplus.c */
++struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex);
++void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
++ struct file *h_file);
++#else
++static inline
++struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex)
++{
++ return NULL;
+}
+
-+/* ---------------------------------------------------------------------- */
++AuStubVoid(au_h_open_post, struct dentry *dentry, aufs_bindex_t bindex,
++ struct file *h_file);
++#endif
+
-+/* currently, support only FIFO */
-+enum {
-+ AuSp_FIFO, AuSp_FIFO_R, AuSp_FIFO_W, AuSp_FIFO_RW,
-+ /* AuSp_SOCK, AuSp_CHR, AuSp_BLK, */
-+ AuSp_Last
-+};
-+static int aufs_open_sp(struct inode *inode, struct file *file);
-+static struct au_sp_fop {
-+ int done;
-+ struct file_operations fop; /* not 'const' */
-+ spinlock_t spin;
-+} au_sp_fop[AuSp_Last] = {
-+ [AuSp_FIFO] = {
-+ .fop = {
-+ .owner = THIS_MODULE,
-+ .open = aufs_open_sp
-+ }
-+ }
-+};
++/* f_op.c */
++extern const struct file_operations aufs_file_fop;
++extern const struct vm_operations_struct aufs_vm_ops;
++int au_do_open_nondir(struct file *file, int flags);
++int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file);
+
-+static void au_init_fop_sp(struct file *file)
++#ifdef CONFIG_AUFS_SP_IATTR
++/* f_op_sp.c */
++int au_special_file(umode_t mode);
++void au_init_special_fop(struct inode *inode, umode_t mode, dev_t rdev);
++#else
++AuStubInt0(au_special_file, umode_t mode)
++static inline void au_init_special_fop(struct inode *inode, umode_t mode,
++ dev_t rdev)
+{
-+ struct au_sp_fop *p;
-+ int i;
-+ struct file *h_file;
++ init_special_inode(inode, mode, rdev);
++}
++#endif
+
-+ p = au_sp_fop;
-+ if (unlikely(!p->done)) {
-+ /* initialize first time only */
-+ static DEFINE_SPINLOCK(spin);
++/* finfo.c */
++void au_hfput(struct au_hfile *hf, struct file *file);
++void au_set_h_fptr(struct file *file, aufs_bindex_t bindex,
++ struct file *h_file);
+
-+ spin_lock(&spin);
-+ if (!p->done) {
-+ BUILD_BUG_ON(sizeof(au_sp_fop)/sizeof(*au_sp_fop)
-+ != AuSp_Last);
-+ for (i = 0; i < AuSp_Last; i++)
-+ spin_lock_init(&p[i].spin);
-+ p->done = 1;
-+ }
-+ spin_unlock(&spin);
-+ }
++void au_update_figen(struct file *file);
++struct au_fidir *au_fidir_alloc(struct super_block *sb);
++int au_fidir_realloc(struct au_finfo *finfo, int nbr);
+
-+ switch (file->f_mode & (FMODE_READ | FMODE_WRITE)) {
-+ case FMODE_READ:
-+ i = AuSp_FIFO_R;
-+ break;
-+ case FMODE_WRITE:
-+ i = AuSp_FIFO_W;
-+ break;
-+ case FMODE_READ | FMODE_WRITE:
-+ i = AuSp_FIFO_RW;
-+ break;
-+ default:
-+ BUG();
-+ }
++void au_fi_init_once(void *_fi);
++void au_finfo_fin(struct file *file);
++int au_finfo_init(struct file *file, struct au_fidir *fidir);
+
-+ p += i;
-+ if (unlikely(!p->done)) {
-+ /* initialize first time only */
-+ h_file = au_hf_top(file);
-+ spin_lock(&p->spin);
-+ if (!p->done) {
-+ p->fop = *h_file->f_op;
-+ p->fop.owner = THIS_MODULE;
-+ if (p->fop.aio_read)
-+ p->fop.aio_read = aufs_aio_read_sp;
-+ if (p->fop.aio_write)
-+ p->fop.aio_write = aufs_aio_write_sp;
-+ p->fop.release = aufs_release_sp;
-+ p->done = 1;
-+ }
-+ spin_unlock(&p->spin);
-+ }
-+ file->f_op = &p->fop;
-+}
++/* ioctl.c */
++long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg);
++#ifdef CONFIG_COMPAT
++long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
++ unsigned long arg);
++#endif
+
-+static int au_cpup_sp(struct dentry *dentry)
++/* ---------------------------------------------------------------------- */
++
++static inline struct au_finfo *au_fi(struct file *file)
+{
-+ int err;
-+ aufs_bindex_t bcpup;
-+ struct au_pin pin;
-+ struct au_wr_dir_args wr_dir_args = {
-+ .force_btgt = -1,
-+ .flags = 0
-+ };
++ return file->private_data;
++}
+
-+ AuDbg("%.*s\n", AuDLNPair(dentry));
++/* ---------------------------------------------------------------------- */
+
-+ di_read_unlock(dentry, AuLock_IR);
-+ di_write_lock_child(dentry);
-+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
-+ if (unlikely(err < 0))
-+ goto out;
-+ bcpup = err;
-+ err = 0;
-+ if (bcpup == au_dbstart(dentry))
-+ goto out; /* success */
++/*
++ * fi_read_lock, fi_write_lock,
++ * fi_read_unlock, fi_write_unlock, fi_downgrade_lock
++ */
++AuSimpleRwsemFuncs(fi, struct file *f, &au_fi(f)->fi_rwsem);
+
-+ err = au_pin(&pin, dentry, bcpup, au_opt_udba(dentry->d_sb),
-+ AuPin_MNT_WRITE);
-+ if (!err) {
-+ err = au_sio_cpup_simple(dentry, bcpup, -1, AuCpup_DTIME);
-+ au_unpin(&pin);
-+ }
++#define FiMustNoWaiters(f) AuRwMustNoWaiters(&au_fi(f)->fi_rwsem)
++#define FiMustAnyLock(f) AuRwMustAnyLock(&au_fi(f)->fi_rwsem)
++#define FiMustWriteLock(f) AuRwMustWriteLock(&au_fi(f)->fi_rwsem)
+
-+out:
-+ di_downgrade_lock(dentry, AuLock_IR);
-+ return err;
-+}
++/* ---------------------------------------------------------------------- */
+
-+static int au_do_open_sp(struct file *file, int flags)
++/* todo: hard/soft set? */
++static inline aufs_bindex_t au_fbstart(struct file *file)
+{
-+ int err;
-+ struct dentry *dentry;
-+ struct super_block *sb;
-+ struct file *h_file;
-+ struct inode *h_inode;
++ FiMustAnyLock(file);
++ return au_fi(file)->fi_btop;
++}
+
-+ dentry = file->f_dentry;
-+ AuDbg("%.*s\n", AuDLNPair(dentry));
++static inline aufs_bindex_t au_fbend_dir(struct file *file)
++{
++ FiMustAnyLock(file);
++ AuDebugOn(!au_fi(file)->fi_hdir);
++ return au_fi(file)->fi_hdir->fd_bbot;
++}
+
-+ /*
-+ * try copying-up.
-+ * operate on the ro branch is not an error.
-+ */
-+ au_cpup_sp(dentry); /* ignore */
++static inline struct au_vdir *au_fvdir_cache(struct file *file)
++{
++ FiMustAnyLock(file);
++ AuDebugOn(!au_fi(file)->fi_hdir);
++ return au_fi(file)->fi_hdir->fd_vdir_cache;
++}
+
-+ /* prepare h_file */
-+ err = au_do_open_nondir(file, vfsub_file_flags(file));
-+ if (unlikely(err))
-+ goto out;
++static inline void au_set_fbstart(struct file *file, aufs_bindex_t bindex)
++{
++ FiMustWriteLock(file);
++ au_fi(file)->fi_btop = bindex;
++}
+
-+ sb = dentry->d_sb;
-+ h_file = au_hf_top(file);
-+ h_inode = h_file->f_dentry->d_inode;
-+ di_read_unlock(dentry, AuLock_IR);
-+ fi_write_unlock(file);
-+ si_read_unlock(sb);
-+ /* open this fifo in aufs */
-+ err = h_inode->i_fop->open(file->f_dentry->d_inode, file);
-+ si_noflush_read_lock(sb);
-+ fi_write_lock(file);
-+ di_read_lock_child(dentry, AuLock_IR);
-+ if (!err)
-+ au_init_fop_sp(file);
++static inline void au_set_fbend_dir(struct file *file, aufs_bindex_t bindex)
++{
++ FiMustWriteLock(file);
++ AuDebugOn(!au_fi(file)->fi_hdir);
++ au_fi(file)->fi_hdir->fd_bbot = bindex;
++}
+
-+out:
-+ return err;
++static inline void au_set_fvdir_cache(struct file *file,
++ struct au_vdir *vdir_cache)
++{
++ FiMustWriteLock(file);
++ AuDebugOn(!au_fi(file)->fi_hdir);
++ au_fi(file)->fi_hdir->fd_vdir_cache = vdir_cache;
+}
+
-+static int aufs_open_sp(struct inode *inode, struct file *file)
++static inline struct file *au_hf_top(struct file *file)
+{
-+ int err;
-+ struct super_block *sb;
++ FiMustAnyLock(file);
++ AuDebugOn(au_fi(file)->fi_hdir);
++ return au_fi(file)->fi_htop.hf_file;
++}
+
-+ sb = file->f_dentry->d_sb;
-+ si_read_lock(sb, AuLock_FLUSH);
-+ err = au_do_open(file, au_do_open_sp, /*fidir*/NULL);
-+ si_read_unlock(sb);
-+ return err;
++static inline struct file *au_hf_dir(struct file *file, aufs_bindex_t bindex)
++{
++ FiMustAnyLock(file);
++ AuDebugOn(!au_fi(file)->fi_hdir);
++ return au_fi(file)->fi_hdir->fd_hfile[0 + bindex].hf_file;
++}
++
++/* todo: memory barrier? */
++static inline unsigned int au_figen(struct file *f)
++{
++ return atomic_read(&au_fi(f)->fi_generation);
++}
++
++static inline int au_test_mmapped(struct file *f)
++{
++ FiMustAnyLock(f);
++ return !!(au_fi(f)->fi_hvmop);
++}
++
++#endif /* __KERNEL__ */
++#endif /* __AUFS_FILE_H__ */
+--- a/fs/aufs/finfo.c 1970-01-01 01:00:00.000000000 +0100
++++ b/fs/aufs/finfo.c 2011-06-03 06:08:42.681958206 +0100
+@@ -0,0 +1,157 @@
++/*
++ * Copyright (C) 2005-2011 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/*
++ * file private data
++ */
++
++#include <linux/file.h>
++#include "aufs.h"
++
++void au_hfput(struct au_hfile *hf, struct file *file)
++{
++ /* todo: direct access f_flags */
++ if (vfsub_file_flags(file) & __FMODE_EXEC)
++ allow_write_access(hf->hf_file);
++ fput(hf->hf_file);
++ hf->hf_file = NULL;
++ atomic_dec(&hf->hf_br->br_count);
++ hf->hf_br = NULL;
++}
++
++void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val)
++{
++ struct au_finfo *finfo = au_fi(file);
++ struct au_hfile *hf;
++ struct au_fidir *fidir;
++
++ fidir = finfo->fi_hdir;
++ if (!fidir) {
++ AuDebugOn(finfo->fi_btop != bindex);
++ hf = &finfo->fi_htop;
++ } else
++ hf = fidir->fd_hfile + bindex;
++
++ if (hf && hf->hf_file)
++ au_hfput(hf, file);
++ if (val) {
++ FiMustWriteLock(file);
++ hf->hf_file = val;
++ hf->hf_br = au_sbr(file->f_dentry->d_sb, bindex);
++ }
++}
++
++void au_update_figen(struct file *file)
++{
++ atomic_set(&au_fi(file)->fi_generation, au_digen(file->f_dentry));
++ /* smp_mb(); */ /* atomic_set */
+}
+
+/* ---------------------------------------------------------------------- */
+
-+void au_init_special_fop(struct inode *inode, umode_t mode, dev_t rdev)
++struct au_fidir *au_fidir_alloc(struct super_block *sb)
+{
-+ init_special_inode(inode, mode, rdev);
++ struct au_fidir *fidir;
++ int nbr;
+
-+ switch (mode & S_IFMT) {
-+ case S_IFIFO:
-+ inode->i_fop = &au_sp_fop[AuSp_FIFO].fop;
-+ /*FALLTHROUGH*/
-+ case S_IFCHR:
-+ case S_IFBLK:
-+ case S_IFSOCK:
-+ break;
-+ default:
-+ AuDebugOn(1);
++ nbr = au_sbend(sb) + 1;
++ if (nbr < 2)
++ nbr = 2; /* initial allocate for 2 branches */
++ fidir = kzalloc(au_fidir_sz(nbr), GFP_NOFS);
++ if (fidir) {
++ fidir->fd_bbot = -1;
++ fidir->fd_nent = nbr;
++ fidir->fd_vdir_cache = NULL;
+ }
++
++ return fidir;
+}
+
-+int au_special_file(umode_t mode)
++int au_fidir_realloc(struct au_finfo *finfo, int nbr)
+{
-+ int ret;
++ int err;
++ struct au_fidir *fidir, *p;
+
-+ ret = 0;
-+ switch (mode & S_IFMT) {
-+ case S_IFIFO:
-+#if 0
-+ case S_IFCHR:
-+ case S_IFBLK:
-+ case S_IFSOCK:
-+#endif
-+ ret = 1;
++ AuRwMustWriteLock(&finfo->fi_rwsem);
++ fidir = finfo->fi_hdir;
++ AuDebugOn(!fidir);
++
++ err = -ENOMEM;
++ p = au_kzrealloc(fidir, au_fidir_sz(fidir->fd_nent), au_fidir_sz(nbr),
++ GFP_NOFS);
++ if (p) {
++ p->fd_nent = nbr;
++ finfo->fi_hdir = p;
++ err = 0;
+ }
+
-+ return ret;
++ return err;
++}
++
++/* ---------------------------------------------------------------------- */
++
++void au_finfo_fin(struct file *file)
++{
++ struct au_finfo *finfo;
++
++ au_nfiles_dec(file->f_dentry->d_sb);
++
++ finfo = au_fi(file);
++ AuDebugOn(finfo->fi_hdir);
++ AuRwDestroy(&finfo->fi_rwsem);
++ au_cache_free_finfo(finfo);
++}
++
++void au_fi_init_once(void *_finfo)
++{
++ struct au_finfo *finfo = _finfo;
++ static struct lock_class_key aufs_fi, aufs_fi_vm, aufs_fi_mmap;
++
++ au_rw_init(&finfo->fi_rwsem);
++ au_rw_class(&finfo->fi_rwsem, &aufs_fi);
++ mutex_init(&finfo->fi_vm_mtx);
++ lockdep_set_class(&finfo->fi_vm_mtx, &aufs_fi_vm);
++ mutex_init(&finfo->fi_mmap);
++ lockdep_set_class(&finfo->fi_mmap, &aufs_fi_mmap);
++}
++
++int au_finfo_init(struct file *file, struct au_fidir *fidir)
++{
++ int err;
++ struct au_finfo *finfo;
++ struct dentry *dentry;
++
++ err = -ENOMEM;
++ dentry = file->f_dentry;
++ finfo = au_cache_alloc_finfo();
++ if (unlikely(!finfo))
++ goto out;
++
++ err = 0;
++ au_nfiles_inc(dentry->d_sb);
++ au_rw_write_lock(&finfo->fi_rwsem);
++ finfo->fi_btop = -1;
++ finfo->fi_hdir = fidir;
++ atomic_set(&finfo->fi_generation, au_digen(dentry));
++ /* smp_mb(); */ /* atomic_set */
++
++ file->private_data = finfo;
++
++out:
++ return err;
+}
-diff -urN a/fs/aufs/fstype.h b/fs/aufs/fstype.h
--- a/fs/aufs/fstype.h 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/fstype.h 2011-03-06 23:28:02.620413138 +0000
++++ b/fs/aufs/fstype.h 2011-02-12 16:30:08.944127798 +0000
@@ -0,0 +1,497 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -11170,9 +11221,8 @@
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_FSTYPE_H__ */
-diff -urN a/fs/aufs/hfsnotify.c b/fs/aufs/hfsnotify.c
--- a/fs/aufs/hfsnotify.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/hfsnotify.c 2011-03-06 23:28:02.616413258 +0000
++++ b/fs/aufs/hfsnotify.c 2011-02-12 16:30:08.944127798 +0000
@@ -0,0 +1,247 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -11421,9 +11471,8 @@
+ .fin_br = au_hfsn_fin_br,
+ .init_br = au_hfsn_init_br
+};
-diff -urN a/fs/aufs/hfsplus.c b/fs/aufs/hfsplus.c
--- a/fs/aufs/hfsplus.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/hfsplus.c 2011-03-06 23:28:02.616413258 +0000
++++ b/fs/aufs/hfsplus.c 2011-02-12 16:30:08.944127798 +0000
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2010-2011 Junjiro R. Okajima
@@ -11483,9 +11532,8 @@
+ au_sbr_put(dentry->d_sb, bindex);
+ }
+}
-diff -urN a/fs/aufs/hnotify.c b/fs/aufs/hnotify.c
--- a/fs/aufs/hnotify.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/hnotify.c 2011-03-06 23:28:02.620413138 +0000
++++ b/fs/aufs/hnotify.c 2011-03-06 23:22:01.412413001 +0000
@@ -0,0 +1,709 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -12196,10 +12244,9 @@
+ if (au_cachep[AuCache_HNOTIFY])
+ au_hn_destroy_cache();
+}
-diff -urN a/fs/aufs/iinfo.c b/fs/aufs/iinfo.c
---- a/fs/aufs/iinfo.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/iinfo.c 2011-03-06 23:28:02.616413258 +0000
-@@ -0,0 +1,264 @@
+--- a/fs/aufs/i_op.c 1970-01-01 01:00:00.000000000 +0100
++++ b/fs/aufs/i_op.c 2011-03-06 23:22:01.412413001 +0000
+@@ -0,0 +1,976 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
+ *
@@ -12219,730 +12266,966 @@
+ */
+
+/*
-+ * inode private data
++ * inode operations (except add/del/rename)
+ */
+
-+#include "aufs.h"
-+
-+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex)
++#include <linux/device_cgroup.h>
++#include <linux/fs_stack.h>
++#include <linux/mm.h>
++#include <linux/namei.h>
++#include <linux/security.h>
++#include <linux/uaccess.h>
++#include "aufs.h"
++
++static int h_permission(struct inode *h_inode, int mask, unsigned int flags,
++ struct vfsmount *h_mnt, int brperm)
+{
-+ struct inode *h_inode;
++ int err;
++ const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
+
-+ IiMustAnyLock(inode);
++ err = -EACCES;
++ if ((write_mask && IS_IMMUTABLE(h_inode))
++ || ((mask & MAY_EXEC)
++ && S_ISREG(h_inode->i_mode)
++ && ((h_mnt->mnt_flags & MNT_NOEXEC)
++ || !(h_inode->i_mode & S_IXUGO))))
++ goto out;
+
-+ h_inode = au_ii(inode)->ii_hinode[0 + bindex].hi_inode;
-+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
-+ return h_inode;
-+}
++ /*
++ * - skip the lower fs test in the case of write to ro branch.
++ * - nfs dir permission write check is optimized, but a policy for
++ * link/rename requires a real check.
++ */
++ if ((write_mask && !au_br_writable(brperm))
++ || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode)
++ && write_mask && !(mask & MAY_READ))
++ || !h_inode->i_op->permission) {
++ /* AuLabel(generic_permission); */
++ err = generic_permission(h_inode, mask, flags,
++ h_inode->i_op->check_acl);
++ } else {
++ /* AuLabel(h_inode->permission); */
++ err = h_inode->i_op->permission(h_inode, mask, flags);
++ AuTraceErr(err);
++ }
+
-+/* todo: hard/soft set? */
-+void au_hiput(struct au_hinode *hinode)
-+{
-+ au_hn_free(hinode);
-+ dput(hinode->hi_whdentry);
-+ iput(hinode->hi_inode);
-+}
++ if (!err)
++ err = devcgroup_inode_permission(h_inode, mask);
++ if (!err)
++ err = security_inode_permission(h_inode, mask);
+
-+unsigned int au_hi_flags(struct inode *inode, int isdir)
-+{
-+ unsigned int flags;
-+ const unsigned int mnt_flags = au_mntflags(inode->i_sb);
++#if 0
++ if (!err) {
++ /* todo: do we need to call ima_path_check()? */
++ struct path h_path = {
++ .dentry =
++ .mnt = h_mnt
++ };
++ err = ima_path_check(&h_path,
++ mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
++ IMA_COUNT_LEAVE);
++ }
++#endif
+
-+ flags = 0;
-+ if (au_opt_test(mnt_flags, XINO))
-+ au_fset_hi(flags, XINO);
-+ if (isdir && au_opt_test(mnt_flags, UDBA_HNOTIFY))
-+ au_fset_hi(flags, HNOTIFY);
-+ return flags;
++out:
++ return err;
+}
+
-+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
-+ struct inode *h_inode, unsigned int flags)
++static int aufs_permission(struct inode *inode, int mask, unsigned int flags)
+{
-+ struct au_hinode *hinode;
-+ struct inode *hi;
-+ struct au_iinfo *iinfo = au_ii(inode);
++ int err;
++ aufs_bindex_t bindex, bend;
++ const unsigned char isdir = !!S_ISDIR(inode->i_mode),
++ write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
++ struct inode *h_inode;
++ struct super_block *sb;
++ struct au_branch *br;
+
-+ IiMustWriteLock(inode);
++ /* todo: support rcu-walk? */
++ if (flags & IPERM_FLAG_RCU)
++ return -ECHILD;
+
-+ hinode = iinfo->ii_hinode + bindex;
-+ hi = hinode->hi_inode;
-+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
++ sb = inode->i_sb;
++ si_read_lock(sb, AuLock_FLUSH);
++ ii_read_lock_child(inode);
++#if 0
++ err = au_iigen_test(inode, au_sigen(sb));
++ if (unlikely(err))
++ goto out;
++#endif
+
-+ if (hi)
-+ au_hiput(hinode);
-+ hinode->hi_inode = h_inode;
-+ if (h_inode) {
-+ int err;
-+ struct super_block *sb = inode->i_sb;
-+ struct au_branch *br;
++ if (!isdir || write_mask) {
++ err = au_busy_or_stale();
++ h_inode = au_h_iptr(inode, au_ibstart(inode));
++ if (unlikely(!h_inode
++ || (h_inode->i_mode & S_IFMT)
++ != (inode->i_mode & S_IFMT)))
++ goto out;
+
-+ AuDebugOn(inode->i_mode
-+ && (h_inode->i_mode & S_IFMT)
-+ != (inode->i_mode & S_IFMT));
-+ if (bindex == iinfo->ii_bstart)
-+ au_cpup_igen(inode, h_inode);
++ err = 0;
++ bindex = au_ibstart(inode);
+ br = au_sbr(sb, bindex);
-+ hinode->hi_id = br->br_id;
-+ if (au_ftest_hi(flags, XINO)) {
-+ err = au_xino_write(sb, bindex, h_inode->i_ino,
-+ inode->i_ino);
-+ if (unlikely(err))
-+ AuIOErr1("failed au_xino_write() %d\n", err);
++ err = h_permission(h_inode, mask, flags, br->br_mnt,
++ br->br_perm);
++ if (write_mask
++ && !err
++ && !special_file(h_inode->i_mode)) {
++ /* test whether the upper writable branch exists */
++ err = -EROFS;
++ for (; bindex >= 0; bindex--)
++ if (!au_br_rdonly(au_sbr(sb, bindex))) {
++ err = 0;
++ break;
++ }
+ }
++ goto out;
++ }
+
-+ if (au_ftest_hi(flags, HNOTIFY)
-+ && au_br_hnotifyable(br->br_perm)) {
-+ err = au_hn_alloc(hinode, inode);
-+ if (unlikely(err))
-+ AuIOErr1("au_hn_alloc() %d\n", err);
++ /* non-write to dir */
++ err = 0;
++ bend = au_ibend(inode);
++ for (bindex = au_ibstart(inode); !err && bindex <= bend; bindex++) {
++ h_inode = au_h_iptr(inode, bindex);
++ if (h_inode) {
++ err = au_busy_or_stale();
++ if (unlikely(!S_ISDIR(h_inode->i_mode)))
++ break;
++
++ br = au_sbr(sb, bindex);
++ err = h_permission(h_inode, mask, flags, br->br_mnt,
++ br->br_perm);
+ }
+ }
++
++out:
++ ii_read_unlock(inode);
++ si_read_unlock(sb);
++ return err;
+}
+
-+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
-+ struct dentry *h_wh)
++/* ---------------------------------------------------------------------- */
++
++static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry,
++ struct nameidata *nd)
+{
-+ struct au_hinode *hinode;
++ struct dentry *ret, *parent;
++ struct inode *inode;
++ struct super_block *sb;
++ int err, npositive;
+
-+ IiMustWriteLock(inode);
++ IMustLock(dir);
+
-+ hinode = au_ii(inode)->ii_hinode + bindex;
-+ AuDebugOn(hinode->hi_whdentry);
-+ hinode->hi_whdentry = h_wh;
-+}
++ sb = dir->i_sb;
++ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
++ ret = ERR_PTR(err);
++ if (unlikely(err))
++ goto out;
+
-+void au_update_iigen(struct inode *inode)
-+{
-+ atomic_set(&au_ii(inode)->ii_generation, au_sigen(inode->i_sb));
-+ /* smp_mb(); */ /* atomic_set */
-+}
++ ret = ERR_PTR(-ENAMETOOLONG);
++ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
++ goto out_si;
++ err = au_di_init(dentry);
++ ret = ERR_PTR(err);
++ if (unlikely(err))
++ goto out_si;
+
-+/* it may be called at remount time, too */
-+void au_update_ibrange(struct inode *inode, int do_put_zero)
-+{
-+ struct au_iinfo *iinfo;
-+ aufs_bindex_t bindex, bend;
++ npositive = 0; /* suppress a warning */
++ parent = dentry->d_parent; /* dir inode is locked */
++ di_read_lock_parent(parent, AuLock_IR);
++ err = au_alive_dir(parent);
++ if (!err)
++ err = au_digen_test(parent, au_sigen(sb));
++ if (!err) {
++ npositive = au_lkup_dentry(dentry, au_dbstart(parent),
++ /*type*/0, nd);
++ err = npositive;
++ }
++ di_read_unlock(parent, AuLock_IR);
++ ret = ERR_PTR(err);
++ if (unlikely(err < 0))
++ goto out_unlock;
+
-+ iinfo = au_ii(inode);
-+ if (!iinfo)
-+ return;
++ inode = NULL;
++ if (npositive) {
++ inode = au_new_inode(dentry, /*must_new*/0);
++ ret = (void *)inode;
++ }
++ if (IS_ERR(inode))
++ goto out_unlock;
+
-+ IiMustWriteLock(inode);
++ ret = d_splice_alias(inode, dentry);
++ if (unlikely(IS_ERR(ret) && inode)) {
++ ii_write_unlock(inode);
++ iput(inode);
++ }
+
-+ if (do_put_zero && iinfo->ii_bstart >= 0) {
-+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
-+ bindex++) {
-+ struct inode *h_i;
++out_unlock:
++ di_write_unlock(dentry);
++out_si:
++ si_read_unlock(sb);
++out:
++ return ret;
++}
+
-+ h_i = iinfo->ii_hinode[0 + bindex].hi_inode;
-+ if (h_i && !h_i->i_nlink)
-+ au_set_h_iptr(inode, bindex, NULL, 0);
-+ }
-+ }
++/* ---------------------------------------------------------------------- */
+
-+ iinfo->ii_bstart = -1;
-+ iinfo->ii_bend = -1;
-+ bend = au_sbend(inode->i_sb);
-+ for (bindex = 0; bindex <= bend; bindex++)
-+ if (iinfo->ii_hinode[0 + bindex].hi_inode) {
-+ iinfo->ii_bstart = bindex;
-+ break;
-+ }
-+ if (iinfo->ii_bstart >= 0)
-+ for (bindex = bend; bindex >= iinfo->ii_bstart; bindex--)
-+ if (iinfo->ii_hinode[0 + bindex].hi_inode) {
-+ iinfo->ii_bend = bindex;
-+ break;
-+ }
-+ AuDebugOn(iinfo->ii_bstart > iinfo->ii_bend);
-+}
-+
-+/* ---------------------------------------------------------------------- */
-+
-+void au_icntnr_init_once(void *_c)
-+{
-+ struct au_icntnr *c = _c;
-+ struct au_iinfo *iinfo = &c->iinfo;
-+ static struct lock_class_key aufs_ii;
-+
-+ au_rw_init(&iinfo->ii_rwsem);
-+ au_rw_class(&iinfo->ii_rwsem, &aufs_ii);
-+ inode_init_once(&c->vfs_inode);
-+}
-+
-+int au_iinfo_init(struct inode *inode)
++static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent,
++ const unsigned char add_entry, aufs_bindex_t bcpup,
++ aufs_bindex_t bstart)
+{
-+ struct au_iinfo *iinfo;
-+ struct super_block *sb;
-+ int nbr, i;
++ int err;
++ struct dentry *h_parent;
++ struct inode *h_dir;
+
-+ sb = inode->i_sb;
-+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
-+ nbr = au_sbend(sb) + 1;
-+ if (unlikely(nbr <= 0))
-+ nbr = 1;
-+ iinfo->ii_hinode = kcalloc(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS);
-+ if (iinfo->ii_hinode) {
-+ au_ninodes_inc(sb);
-+ for (i = 0; i < nbr; i++)
-+ iinfo->ii_hinode[i].hi_id = -1;
++ if (add_entry)
++ IMustLock(parent->d_inode);
++ else
++ di_write_lock_parent(parent);
+
-+ atomic_set(&iinfo->ii_generation, au_sigen(sb));
-+ /* smp_mb(); */ /* atomic_set */
-+ iinfo->ii_bstart = -1;
-+ iinfo->ii_bend = -1;
-+ iinfo->ii_vdir = NULL;
-+ return 0;
++ err = 0;
++ if (!au_h_dptr(parent, bcpup)) {
++ if (bstart < bcpup)
++ err = au_cpdown_dirs(dentry, bcpup);
++ else
++ err = au_cpup_dirs(dentry, bcpup);
+ }
-+ return -ENOMEM;
-+}
-+
-+int au_ii_realloc(struct au_iinfo *iinfo, int nbr)
-+{
-+ int err, sz;
-+ struct au_hinode *hip;
-+
-+ AuRwMustWriteLock(&iinfo->ii_rwsem);
++ if (!err && add_entry) {
++ h_parent = au_h_dptr(parent, bcpup);
++ h_dir = h_parent->d_inode;
++ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
++ err = au_lkup_neg(dentry, bcpup);
++ /* todo: no unlock here */
++ mutex_unlock(&h_dir->i_mutex);
+
-+ err = -ENOMEM;
-+ sz = sizeof(*hip) * (iinfo->ii_bend + 1);
-+ if (!sz)
-+ sz = sizeof(*hip);
-+ hip = au_kzrealloc(iinfo->ii_hinode, sz, sizeof(*hip) * nbr, GFP_NOFS);
-+ if (hip) {
-+ iinfo->ii_hinode = hip;
-+ err = 0;
++ AuDbg("bcpup %d\n", bcpup);
++ if (!err) {
++ if (!dentry->d_inode)
++ au_set_h_dptr(dentry, bstart, NULL);
++ au_update_dbrange(dentry, /*do_put_zero*/0);
++ }
+ }
+
++ if (!add_entry)
++ di_write_unlock(parent);
++ if (!err)
++ err = bcpup; /* success */
++
++ AuTraceErr(err);
+ return err;
+}
+
-+void au_iinfo_fin(struct inode *inode)
++/*
++ * decide the branch and the parent dir where we will create a new entry.
++ * returns new bindex or an error.
++ * copyup the parent dir if needed.
++ */
++int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
++ struct au_wr_dir_args *args)
+{
-+ struct au_iinfo *iinfo;
-+ struct au_hinode *hi;
++ int err;
++ aufs_bindex_t bcpup, bstart, src_bstart;
++ const unsigned char add_entry = !!au_ftest_wrdir(args->flags,
++ ADD_ENTRY);
+ struct super_block *sb;
-+ aufs_bindex_t bindex, bend;
-+ const unsigned char unlinked = !inode->i_nlink;
++ struct dentry *parent;
++ struct au_sbinfo *sbinfo;
+
-+ iinfo = au_ii(inode);
-+ /* bad_inode case */
-+ if (!iinfo)
-+ return;
++ sb = dentry->d_sb;
++ sbinfo = au_sbi(sb);
++ parent = dget_parent(dentry);
++ bstart = au_dbstart(dentry);
++ bcpup = bstart;
++ if (args->force_btgt < 0) {
++ if (src_dentry) {
++ src_bstart = au_dbstart(src_dentry);
++ if (src_bstart < bstart)
++ bcpup = src_bstart;
++ } else if (add_entry) {
++ err = AuWbrCreate(sbinfo, dentry,
++ au_ftest_wrdir(args->flags, ISDIR));
++ bcpup = err;
++ }
+
-+ sb = inode->i_sb;
-+ au_ninodes_dec(sb);
-+ if (si_pid_test(sb))
-+ au_xino_delete_inode(inode, unlinked);
-+ else {
-+ /*
-+ * it is safe to hide the dependency between sbinfo and
-+ * sb->s_umount.
-+ */
-+ lockdep_off();
-+ si_noflush_read_lock(sb);
-+ au_xino_delete_inode(inode, unlinked);
-+ si_read_unlock(sb);
-+ lockdep_on();
++ if (bcpup < 0 || au_test_ro(sb, bcpup, dentry->d_inode)) {
++ if (add_entry)
++ err = AuWbrCopyup(sbinfo, dentry);
++ else {
++ if (!IS_ROOT(dentry)) {
++ di_read_lock_parent(parent, !AuLock_IR);
++ err = AuWbrCopyup(sbinfo, dentry);
++ di_read_unlock(parent, !AuLock_IR);
++ } else
++ err = AuWbrCopyup(sbinfo, dentry);
++ }
++ bcpup = err;
++ if (unlikely(err < 0))
++ goto out;
++ }
++ } else {
++ bcpup = args->force_btgt;
++ AuDebugOn(au_test_ro(sb, bcpup, dentry->d_inode));
+ }
+
-+ if (iinfo->ii_vdir)
-+ au_vdir_free(iinfo->ii_vdir);
++ AuDbg("bstart %d, bcpup %d\n", bstart, bcpup);
++ err = bcpup;
++ if (bcpup == bstart)
++ goto out; /* success */
+
-+ bindex = iinfo->ii_bstart;
-+ if (bindex >= 0) {
-+ hi = iinfo->ii_hinode + bindex;
-+ bend = iinfo->ii_bend;
-+ while (bindex++ <= bend) {
-+ if (hi->hi_inode)
-+ au_hiput(hi);
-+ hi++;
++ /* copyup the new parent into the branch we process */
++ err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, bstart);
++ if (err >= 0) {
++ if (!dentry->d_inode) {
++ au_set_h_dptr(dentry, bstart, NULL);
++ au_set_dbstart(dentry, bcpup);
++ au_set_dbend(dentry, bcpup);
+ }
++ AuDebugOn(add_entry && !au_h_dptr(dentry, bcpup));
+ }
-+ kfree(iinfo->ii_hinode);
-+ iinfo->ii_hinode = NULL;
-+ AuRwDestroy(&iinfo->ii_rwsem);
-+}
-diff -urN a/fs/aufs/inode.c b/fs/aufs/inode.c
---- a/fs/aufs/inode.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/inode.c 2011-03-06 23:28:02.620413138 +0000
-@@ -0,0 +1,471 @@
-+/*
-+ * Copyright (C) 2005-2011 Junjiro R. Okajima
-+ *
-+ * This program, aufs is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or
-+ * (at your option) any later version.
-+ *
-+ * This program is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with this program; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
+
-+/*
-+ * inode functions
-+ */
++out:
++ dput(parent);
++ return err;
++}
+
-+#include "aufs.h"
++/* ---------------------------------------------------------------------- */
+
-+struct inode *au_igrab(struct inode *inode)
++struct dentry *au_pinned_h_parent(struct au_pin *pin)
+{
-+ if (inode) {
-+ AuDebugOn(!atomic_read(&inode->i_count));
-+ ihold(inode);
-+ }
-+ return inode;
++ if (pin && pin->parent)
++ return au_h_dptr(pin->parent, pin->bindex);
++ return NULL;
+}
+
-+static void au_refresh_hinode_attr(struct inode *inode, int do_version)
++void au_unpin(struct au_pin *p)
+{
-+ au_cpup_attr_all(inode, /*force*/0);
-+ au_update_iigen(inode);
-+ if (do_version)
-+ inode->i_version++;
++ if (p->h_mnt && au_ftest_pin(p->flags, MNT_WRITE))
++ mnt_drop_write(p->h_mnt);
++ if (!p->hdir)
++ return;
++
++ au_hn_imtx_unlock(p->hdir);
++ if (!au_ftest_pin(p->flags, DI_LOCKED))
++ di_read_unlock(p->parent, AuLock_IR);
++ iput(p->hdir->hi_inode);
++ dput(p->parent);
++ p->parent = NULL;
++ p->hdir = NULL;
++ p->h_mnt = NULL;
+}
+
-+static int au_ii_refresh(struct inode *inode, int *update)
++int au_do_pin(struct au_pin *p)
+{
-+ int err, e;
-+ umode_t type;
-+ aufs_bindex_t bindex, new_bindex;
++ int err;
+ struct super_block *sb;
-+ struct au_iinfo *iinfo;
-+ struct au_hinode *p, *q, tmp;
-+
-+ IiMustWriteLock(inode);
++ struct dentry *h_dentry, *h_parent;
++ struct au_branch *br;
++ struct inode *h_dir;
+
-+ *update = 0;
-+ sb = inode->i_sb;
-+ type = inode->i_mode & S_IFMT;
-+ iinfo = au_ii(inode);
-+ err = au_ii_realloc(iinfo, au_sbend(sb) + 1);
-+ if (unlikely(err))
++ err = 0;
++ sb = p->dentry->d_sb;
++ br = au_sbr(sb, p->bindex);
++ if (IS_ROOT(p->dentry)) {
++ if (au_ftest_pin(p->flags, MNT_WRITE)) {
++ p->h_mnt = br->br_mnt;
++ err = mnt_want_write(p->h_mnt);
++ if (unlikely(err)) {
++ au_fclr_pin(p->flags, MNT_WRITE);
++ goto out_err;
++ }
++ }
+ goto out;
++ }
+
-+ AuDebugOn(iinfo->ii_bstart < 0);
-+ p = iinfo->ii_hinode + iinfo->ii_bstart;
-+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
-+ bindex++, p++) {
-+ if (!p->hi_inode)
-+ continue;
++ h_dentry = NULL;
++ if (p->bindex <= au_dbend(p->dentry))
++ h_dentry = au_h_dptr(p->dentry, p->bindex);
+
-+ AuDebugOn(type != (p->hi_inode->i_mode & S_IFMT));
-+ new_bindex = au_br_index(sb, p->hi_id);
-+ if (new_bindex == bindex)
-+ continue;
++ p->parent = dget_parent(p->dentry);
++ if (!au_ftest_pin(p->flags, DI_LOCKED))
++ di_read_lock(p->parent, AuLock_IR, p->lsc_di);
+
-+ if (new_bindex < 0) {
-+ *update = 1;
-+ au_hiput(p);
-+ p->hi_inode = NULL;
-+ continue;
++ h_dir = NULL;
++ h_parent = au_h_dptr(p->parent, p->bindex);
++ p->hdir = au_hi(p->parent->d_inode, p->bindex);
++ if (p->hdir)
++ h_dir = p->hdir->hi_inode;
++
++ /*
++ * udba case, or
++ * if DI_LOCKED is not set, then p->parent may be different
++ * and h_parent can be NULL.
++ */
++ if (unlikely(!p->hdir || !h_dir || !h_parent)) {
++ err = -EBUSY;
++ if (!au_ftest_pin(p->flags, DI_LOCKED))
++ di_read_unlock(p->parent, AuLock_IR);
++ dput(p->parent);
++ p->parent = NULL;
++ goto out_err;
++ }
++
++ au_igrab(h_dir);
++ au_hn_imtx_lock_nested(p->hdir, p->lsc_hi);
++
++ if (unlikely(p->hdir->hi_inode != h_parent->d_inode)) {
++ err = -EBUSY;
++ goto out_unpin;
++ }
++ if (h_dentry) {
++ err = au_h_verify(h_dentry, p->udba, h_dir, h_parent, br);
++ if (unlikely(err)) {
++ au_fclr_pin(p->flags, MNT_WRITE);
++ goto out_unpin;
+ }
++ }
+
-+ if (new_bindex < iinfo->ii_bstart)
-+ iinfo->ii_bstart = new_bindex;
-+ if (iinfo->ii_bend < new_bindex)
-+ iinfo->ii_bend = new_bindex;
-+ /* swap two lower inode, and loop again */
-+ q = iinfo->ii_hinode + new_bindex;
-+ tmp = *q;
-+ *q = *p;
-+ *p = tmp;
-+ if (tmp.hi_inode) {
-+ bindex--;
-+ p--;
++ if (au_ftest_pin(p->flags, MNT_WRITE)) {
++ p->h_mnt = br->br_mnt;
++ err = mnt_want_write(p->h_mnt);
++ if (unlikely(err)) {
++ au_fclr_pin(p->flags, MNT_WRITE);
++ goto out_unpin;
+ }
+ }
-+ au_update_ibrange(inode, /*do_put_zero*/0);
-+ e = au_dy_irefresh(inode);
-+ if (unlikely(e && !err))
-+ err = e;
++ goto out; /* success */
+
++out_unpin:
++ au_unpin(p);
++out_err:
++ pr_err("err %d\n", err);
++ err = au_busy_or_stale();
+out:
-+ AuTraceErr(err);
+ return err;
+}
+
-+int au_refresh_hinode_self(struct inode *inode)
++void au_pin_init(struct au_pin *p, struct dentry *dentry,
++ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
++ unsigned int udba, unsigned char flags)
+{
-+ int err, update;
-+
-+ err = au_ii_refresh(inode, &update);
-+ if (!err)
-+ au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode));
++ p->dentry = dentry;
++ p->udba = udba;
++ p->lsc_di = lsc_di;
++ p->lsc_hi = lsc_hi;
++ p->flags = flags;
++ p->bindex = bindex;
+
-+ AuTraceErr(err);
-+ return err;
++ p->parent = NULL;
++ p->hdir = NULL;
++ p->h_mnt = NULL;
+}
+
-+int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
++int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
++ unsigned int udba, unsigned char flags)
+{
-+ int err, e, update;
-+ unsigned int flags;
-+ umode_t mode;
-+ aufs_bindex_t bindex, bend;
-+ unsigned char isdir;
-+ struct au_hinode *p;
-+ struct au_iinfo *iinfo;
-+
-+ err = au_ii_refresh(inode, &update);
-+ if (unlikely(err))
-+ goto out;
++ au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2,
++ udba, flags);
++ return au_do_pin(pin);
++}
+
-+ update = 0;
-+ iinfo = au_ii(inode);
-+ p = iinfo->ii_hinode + iinfo->ii_bstart;
-+ mode = (inode->i_mode & S_IFMT);
-+ isdir = S_ISDIR(mode);
-+ flags = au_hi_flags(inode, isdir);
-+ bend = au_dbend(dentry);
-+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
-+ struct inode *h_i;
-+ struct dentry *h_d;
++/* ---------------------------------------------------------------------- */
+
-+ h_d = au_h_dptr(dentry, bindex);
-+ if (!h_d || !h_d->d_inode)
-+ continue;
++/*
++ * ->setattr() and ->getattr() are called in various cases.
++ * chmod, stat: dentry is revalidated.
++ * fchmod, fstat: file and dentry are not revalidated, additionally they may be
++ * unhashed.
++ * for ->setattr(), ia->ia_file is passed from ftruncate only.
++ */
++/* todo: consolidate with do_refresh() and simple_reval_dpath() */
++static int au_reval_for_attr(struct dentry *dentry, unsigned int sigen)
++{
++ int err;
++ struct inode *inode;
++ struct dentry *parent;
+
-+ AuDebugOn(mode != (h_d->d_inode->i_mode & S_IFMT));
-+ if (iinfo->ii_bstart <= bindex && bindex <= iinfo->ii_bend) {
-+ h_i = au_h_iptr(inode, bindex);
-+ if (h_i) {
-+ if (h_i == h_d->d_inode)
-+ continue;
-+ err = -EIO;
-+ break;
-+ }
-+ }
-+ if (bindex < iinfo->ii_bstart)
-+ iinfo->ii_bstart = bindex;
-+ if (iinfo->ii_bend < bindex)
-+ iinfo->ii_bend = bindex;
-+ au_set_h_iptr(inode, bindex, au_igrab(h_d->d_inode), flags);
-+ update = 1;
++ err = 0;
++ inode = dentry->d_inode;
++ if (au_digen_test(dentry, sigen)) {
++ parent = dget_parent(dentry);
++ di_read_lock_parent(parent, AuLock_IR);
++ err = au_refresh_dentry(dentry, parent);
++ di_read_unlock(parent, AuLock_IR);
++ dput(parent);
+ }
-+ au_update_ibrange(inode, /*do_put_zero*/0);
-+ e = au_dy_irefresh(inode);
-+ if (unlikely(e && !err))
-+ err = e;
-+ if (!err)
-+ au_refresh_hinode_attr(inode, update && isdir);
+
-+out:
+ AuTraceErr(err);
+ return err;
+}
+
-+static int set_inode(struct inode *inode, struct dentry *dentry)
-+{
-+ int err;
-+ unsigned int flags;
-+ umode_t mode;
-+ aufs_bindex_t bindex, bstart, btail;
-+ unsigned char isdir;
-+ struct dentry *h_dentry;
++#define AuIcpup_DID_CPUP 1
++#define au_ftest_icpup(flags, name) ((flags) & AuIcpup_##name)
++#define au_fset_icpup(flags, name) \
++ do { (flags) |= AuIcpup_##name; } while (0)
++#define au_fclr_icpup(flags, name) \
++ do { (flags) &= ~AuIcpup_##name; } while (0)
++
++struct au_icpup_args {
++ unsigned char flags;
++ unsigned char pin_flags;
++ aufs_bindex_t btgt;
++ unsigned int udba;
++ struct au_pin pin;
++ struct path h_path;
+ struct inode *h_inode;
-+ struct au_iinfo *iinfo;
++};
+
-+ IiMustWriteLock(inode);
++static int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
++ struct au_icpup_args *a)
++{
++ int err;
++ loff_t sz;
++ aufs_bindex_t bstart, ibstart;
++ struct dentry *hi_wh, *parent;
++ struct inode *inode;
++ struct file *h_file;
++ struct au_wr_dir_args wr_dir_args = {
++ .force_btgt = -1,
++ .flags = 0
++ };
+
-+ err = 0;
-+ isdir = 0;
+ bstart = au_dbstart(dentry);
-+ h_inode = au_h_dptr(dentry, bstart)->d_inode;
-+ mode = h_inode->i_mode;
-+ switch (mode & S_IFMT) {
-+ case S_IFREG:
-+ btail = au_dbtail(dentry);
-+ inode->i_op = &aufs_iop;
-+ inode->i_fop = &aufs_file_fop;
-+ err = au_dy_iaop(inode, bstart, h_inode);
-+ if (unlikely(err))
-+ goto out;
-+ break;
-+ case S_IFDIR:
-+ isdir = 1;
-+ btail = au_dbtaildir(dentry);
-+ inode->i_op = &aufs_dir_iop;
-+ inode->i_fop = &aufs_dir_fop;
-+ break;
-+ case S_IFLNK:
-+ btail = au_dbtail(dentry);
-+ inode->i_op = &aufs_symlink_iop;
-+ break;
-+ case S_IFBLK:
-+ case S_IFCHR:
-+ case S_IFIFO:
-+ case S_IFSOCK:
-+ btail = au_dbtail(dentry);
-+ inode->i_op = &aufs_iop;
-+ au_init_special_fop(inode, mode, h_inode->i_rdev);
-+ break;
-+ default:
-+ AuIOErr("Unknown file type 0%o\n", mode);
-+ err = -EIO;
++ inode = dentry->d_inode;
++ if (S_ISDIR(inode->i_mode))
++ au_fset_wrdir(wr_dir_args.flags, ISDIR);
++ /* plink or hi_wh() case */
++ ibstart = au_ibstart(inode);
++ if (bstart != ibstart && !au_test_ro(inode->i_sb, ibstart, inode))
++ wr_dir_args.force_btgt = ibstart;
++ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
++ if (unlikely(err < 0))
+ goto out;
-+ }
++ a->btgt = err;
++ if (err != bstart)
++ au_fset_icpup(a->flags, DID_CPUP);
+
-+ /* do not set hnotify for whiteouted dirs (SHWH mode) */
-+ flags = au_hi_flags(inode, isdir);
-+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)
-+ && au_ftest_hi(flags, HNOTIFY)
-+ && dentry->d_name.len > AUFS_WH_PFX_LEN
-+ && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))
-+ au_fclr_hi(flags, HNOTIFY);
-+ iinfo = au_ii(inode);
-+ iinfo->ii_bstart = bstart;
-+ iinfo->ii_bend = btail;
-+ for (bindex = bstart; bindex <= btail; bindex++) {
-+ h_dentry = au_h_dptr(dentry, bindex);
-+ if (h_dentry)
-+ au_set_h_iptr(inode, bindex,
-+ au_igrab(h_dentry->d_inode), flags);
++ err = 0;
++ a->pin_flags = AuPin_MNT_WRITE;
++ parent = NULL;
++ if (!IS_ROOT(dentry)) {
++ au_fset_pin(a->pin_flags, DI_LOCKED);
++ parent = dget_parent(dentry);
++ di_write_lock_parent(parent);
+ }
-+ au_cpup_attr_all(inode, /*force*/1);
-+
-+out:
-+ return err;
-+}
+
-+/*
-+ * successful returns with iinfo write_locked
-+ * minus: errno
-+ * zero: success, matched
-+ * plus: no error, but unmatched
-+ */
-+static int reval_inode(struct inode *inode, struct dentry *dentry)
-+{
-+ int err;
-+ aufs_bindex_t bindex, bend;
-+ struct inode *h_inode, *h_dinode;
++ err = au_pin(&a->pin, dentry, a->btgt, a->udba, a->pin_flags);
++ if (unlikely(err))
++ goto out_parent;
+
-+ /*
-+ * before this function, if aufs got any iinfo lock, it must be only
-+ * one, the parent dir.
-+ * it can happen by UDBA and the obsoleted inode number.
-+ */
-+ err = -EIO;
-+ if (unlikely(inode->i_ino == parent_ino(dentry)))
-+ goto out;
++ a->h_path.dentry = au_h_dptr(dentry, bstart);
++ a->h_inode = a->h_path.dentry->d_inode;
++ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
++ sz = -1;
++ if ((ia->ia_valid & ATTR_SIZE) && ia->ia_size < i_size_read(a->h_inode))
++ sz = ia->ia_size;
+
-+ err = 1;
-+ ii_write_lock_new_child(inode);
-+ h_dinode = au_h_dptr(dentry, au_dbstart(dentry))->d_inode;
-+ bend = au_ibend(inode);
-+ for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
-+ h_inode = au_h_iptr(inode, bindex);
-+ if (h_inode && h_inode == h_dinode) {
-+ err = 0;
-+ if (au_iigen_test(inode, au_digen(dentry)))
-+ err = au_refresh_hinode(inode, dentry);
-+ break;
++ h_file = NULL;
++ hi_wh = NULL;
++ if (au_ftest_icpup(a->flags, DID_CPUP) && d_unlinked(dentry)) {
++ hi_wh = au_hi_wh(inode, a->btgt);
++ if (!hi_wh) {
++ err = au_sio_cpup_wh(dentry, a->btgt, sz, /*file*/NULL);
++ if (unlikely(err))
++ goto out_unlock;
++ hi_wh = au_hi_wh(inode, a->btgt);
++ /* todo: revalidate hi_wh? */
+ }
+ }
+
-+ if (unlikely(err))
-+ ii_write_unlock(inode);
-+out:
-+ return err;
-+}
++ if (parent) {
++ au_pin_set_parent_lflag(&a->pin, /*lflag*/0);
++ di_downgrade_lock(parent, AuLock_IR);
++ dput(parent);
++ parent = NULL;
++ }
++ if (!au_ftest_icpup(a->flags, DID_CPUP))
++ goto out; /* success */
+
-+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
-+ unsigned int d_type, ino_t *ino)
-+{
-+ int err;
-+ struct mutex *mtx;
++ if (!d_unhashed(dentry)) {
++ h_file = au_h_open_pre(dentry, bstart);
++ if (IS_ERR(h_file)) {
++ err = PTR_ERR(h_file);
++ h_file = NULL;
++ } else
++ err = au_sio_cpup_simple(dentry, a->btgt, sz,
++ AuCpup_DTIME);
++ if (!err)
++ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
++ } else if (!hi_wh)
++ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
++ else
++ a->h_path.dentry = hi_wh; /* do not dget here */
+
-+ /* prevent hardlinked inode number from race condition */
-+ mtx = NULL;
-+ if (d_type != DT_DIR) {
-+ mtx = &au_sbr(sb, bindex)->br_xino.xi_nondir_mtx;
-+ mutex_lock(mtx);
++out_unlock:
++ mutex_unlock(&a->h_inode->i_mutex);
++ au_h_open_post(dentry, bstart, h_file);
++ a->h_inode = a->h_path.dentry->d_inode;
++ if (!err) {
++ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
++ goto out; /* success */
+ }
-+ err = au_xino_read(sb, bindex, h_ino, ino);
-+ if (unlikely(err))
-+ goto out;
+
-+ if (!*ino) {
-+ err = -EIO;
-+ *ino = au_xino_new_ino(sb);
-+ if (unlikely(!*ino))
-+ goto out;
-+ err = au_xino_write(sb, bindex, h_ino, *ino);
-+ if (unlikely(err))
-+ goto out;
++ au_unpin(&a->pin);
++out_parent:
++ if (parent) {
++ di_write_unlock(parent);
++ dput(parent);
+ }
-+
+out:
-+ if (mtx)
-+ mutex_unlock(mtx);
+ return err;
+}
+
-+/* successful returns with iinfo write_locked */
-+/* todo: return with unlocked? */
-+struct inode *au_new_inode(struct dentry *dentry, int must_new)
++static int aufs_setattr(struct dentry *dentry, struct iattr *ia)
+{
-+ struct inode *inode, *h_inode;
-+ struct dentry *h_dentry;
-+ struct super_block *sb;
-+ struct mutex *mtx;
-+ ino_t h_ino, ino;
+ int err;
-+ aufs_bindex_t bstart;
++ struct inode *inode;
++ struct super_block *sb;
++ struct file *file;
++ struct au_icpup_args *a;
+
-+ sb = dentry->d_sb;
-+ bstart = au_dbstart(dentry);
-+ h_dentry = au_h_dptr(dentry, bstart);
-+ h_inode = h_dentry->d_inode;
-+ h_ino = h_inode->i_ino;
++ inode = dentry->d_inode;
++ IMustLock(inode);
+
-+ /*
-+ * stop 'race'-ing between hardlinks under different
-+ * parents.
-+ */
-+ mtx = NULL;
-+ if (!S_ISDIR(h_inode->i_mode))
-+ mtx = &au_sbr(sb, bstart)->br_xino.xi_nondir_mtx;
++ err = -ENOMEM;
++ a = kzalloc(sizeof(*a), GFP_NOFS);
++ if (unlikely(!a))
++ goto out;
+
-+new_ino:
-+ if (mtx)
-+ mutex_lock(mtx);
-+ err = au_xino_read(sb, bstart, h_ino, &ino);
-+ inode = ERR_PTR(err);
++ if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
++ ia->ia_valid &= ~ATTR_MODE;
++
++ file = NULL;
++ sb = dentry->d_sb;
++ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
+ if (unlikely(err))
-+ goto out;
++ goto out_kfree;
+
-+ if (!ino) {
-+ ino = au_xino_new_ino(sb);
-+ if (unlikely(!ino)) {
-+ inode = ERR_PTR(-EIO);
-+ goto out;
++ if (ia->ia_valid & ATTR_FILE) {
++ /* currently ftruncate(2) only */
++ AuDebugOn(!S_ISREG(inode->i_mode));
++ file = ia->ia_file;
++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
++ if (unlikely(err))
++ goto out_si;
++ ia->ia_file = au_hf_top(file);
++ a->udba = AuOpt_UDBA_NONE;
++ } else {
++ /* fchmod() doesn't pass ia_file */
++ a->udba = au_opt_udba(sb);
++ di_write_lock_child(dentry);
++ /* no d_unlinked(), to set UDBA_NONE for root */
++ if (d_unhashed(dentry))
++ a->udba = AuOpt_UDBA_NONE;
++ if (a->udba != AuOpt_UDBA_NONE) {
++ AuDebugOn(IS_ROOT(dentry));
++ err = au_reval_for_attr(dentry, au_sigen(sb));
++ if (unlikely(err))
++ goto out_dentry;
+ }
+ }
+
-+ AuDbg("i%lu\n", (unsigned long)ino);
-+ inode = au_iget_locked(sb, ino);
-+ err = PTR_ERR(inode);
-+ if (IS_ERR(inode))
-+ goto out;
-+
-+ AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW));
-+ if (inode->i_state & I_NEW) {
-+ ii_write_lock_new_child(inode);
-+ err = set_inode(inode, dentry);
++ err = au_pin_and_icpup(dentry, ia, a);
++ if (unlikely(err < 0))
++ goto out_dentry;
++ if (au_ftest_icpup(a->flags, DID_CPUP)) {
++ ia->ia_file = NULL;
++ ia->ia_valid &= ~ATTR_FILE;
++ }
++
++ a->h_path.mnt = au_sbr_mnt(sb, a->btgt);
++ if ((ia->ia_valid & (ATTR_MODE | ATTR_CTIME))
++ == (ATTR_MODE | ATTR_CTIME)) {
++ err = security_path_chmod(a->h_path.dentry, a->h_path.mnt,
++ ia->ia_mode);
++ if (unlikely(err))
++ goto out_unlock;
++ } else if ((ia->ia_valid & (ATTR_UID | ATTR_GID))
++ && (ia->ia_valid & ATTR_CTIME)) {
++ err = security_path_chown(&a->h_path, ia->ia_uid, ia->ia_gid);
++ if (unlikely(err))
++ goto out_unlock;
++ }
++
++ if (ia->ia_valid & ATTR_SIZE) {
++ struct file *f;
++
++ if (ia->ia_size < i_size_read(inode))
++ /* unmap only */
++ truncate_setsize(inode, ia->ia_size);
++
++ f = NULL;
++ if (ia->ia_valid & ATTR_FILE)
++ f = ia->ia_file;
++ mutex_unlock(&a->h_inode->i_mutex);
++ err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f);
++ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
++ } else
++ err = vfsub_notify_change(&a->h_path, ia);
++ if (!err)
++ au_cpup_attr_changeable(inode);
++
++out_unlock:
++ mutex_unlock(&a->h_inode->i_mutex);
++ au_unpin(&a->pin);
++ if (unlikely(err))
++ au_update_dbstart(dentry);
++out_dentry:
++ di_write_unlock(dentry);
++ if (file) {
++ fi_write_unlock(file);
++ ia->ia_file = file;
++ ia->ia_valid |= ATTR_FILE;
++ }
++out_si:
++ si_read_unlock(sb);
++out_kfree:
++ kfree(a);
++out:
++ AuTraceErr(err);
++ return err;
++}
++
++static void au_refresh_iattr(struct inode *inode, struct kstat *st,
++ unsigned int nlink)
++{
++ inode->i_mode = st->mode;
++ inode->i_uid = st->uid;
++ inode->i_gid = st->gid;
++ inode->i_atime = st->atime;
++ inode->i_mtime = st->mtime;
++ inode->i_ctime = st->ctime;
++
++ au_cpup_attr_nlink(inode, /*force*/0);
++ if (S_ISDIR(inode->i_mode)) {
++ inode->i_nlink -= nlink;
++ inode->i_nlink += st->nlink;
++ }
++
++ spin_lock(&inode->i_lock);
++ inode->i_blocks = st->blocks;
++ i_size_write(inode, st->size);
++ spin_unlock(&inode->i_lock);
++}
++
++static int aufs_getattr(struct vfsmount *mnt __maybe_unused,
++ struct dentry *dentry, struct kstat *st)
++{
++ int err;
++ unsigned int mnt_flags;
++ aufs_bindex_t bindex;
++ unsigned char udba_none, positive;
++ struct super_block *sb, *h_sb;
++ struct inode *inode;
++ struct vfsmount *h_mnt;
++ struct dentry *h_dentry;
++
++ sb = dentry->d_sb;
++ inode = dentry->d_inode;
++ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
++ if (unlikely(err))
++ goto out;
++ mnt_flags = au_mntflags(sb);
++ udba_none = !!au_opt_test(mnt_flags, UDBA_NONE);
++
++ /* support fstat(2) */
++ if (!d_unlinked(dentry) && !udba_none) {
++ unsigned int sigen = au_sigen(sb);
++ err = au_digen_test(dentry, sigen);
+ if (!err) {
-+ unlock_new_inode(inode);
-+ goto out; /* success */
++ di_read_lock_child(dentry, AuLock_IR);
++ err = au_dbrange_test(dentry);
++ if (unlikely(err))
++ goto out_unlock;
++ } else {
++ AuDebugOn(IS_ROOT(dentry));
++ di_write_lock_child(dentry);
++ err = au_dbrange_test(dentry);
++ if (!err)
++ err = au_reval_for_attr(dentry, sigen);
++ di_downgrade_lock(dentry, AuLock_IR);
++ if (unlikely(err))
++ goto out_unlock;
+ }
++ } else
++ di_read_lock_child(dentry, AuLock_IR);
+
-+ /*
-+ * iget_failed() calls iput(), but we need to call
-+ * ii_write_unlock() after iget_failed(). so dirty hack for
-+ * i_count.
-+ */
-+ atomic_inc(&inode->i_count);
-+ iget_failed(inode);
-+ ii_write_unlock(inode);
-+ au_xino_write(sb, bstart, h_ino, /*ino*/0);
-+ /* ignore this error */
-+ goto out_iput;
-+ } else if (!must_new && !IS_DEADDIR(inode) && inode->i_nlink) {
-+ /*
-+ * horrible race condition between lookup, readdir and copyup
-+ * (or something).
-+ */
-+ if (mtx)
-+ mutex_unlock(mtx);
-+ err = reval_inode(inode, dentry);
-+ if (unlikely(err < 0)) {
-+ mtx = NULL;
-+ goto out_iput;
-+ }
++ bindex = au_ibstart(inode);
++ h_mnt = au_sbr_mnt(sb, bindex);
++ h_sb = h_mnt->mnt_sb;
++ if (!au_test_fs_bad_iattr(h_sb) && udba_none)
++ goto out_fill; /* success */
+
-+ if (!err) {
-+ mtx = NULL;
-+ goto out; /* success */
-+ } else if (mtx)
-+ mutex_lock(mtx);
++ h_dentry = NULL;
++ if (au_dbstart(dentry) == bindex)
++ h_dentry = dget(au_h_dptr(dentry, bindex));
++ else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) {
++ h_dentry = au_plink_lkup(inode, bindex);
++ if (IS_ERR(h_dentry))
++ goto out_fill; /* pretending success */
+ }
++ /* illegally overlapped or something */
++ if (unlikely(!h_dentry))
++ goto out_fill; /* pretending success */
+
-+ if (unlikely(au_test_fs_unique_ino(h_dentry->d_inode)))
-+ AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir,"
-+ " b%d, %s, %.*s, hi%lu, i%lu.\n",
-+ bstart, au_sbtype(h_dentry->d_sb), AuDLNPair(dentry),
-+ (unsigned long)h_ino, (unsigned long)ino);
-+ ino = 0;
-+ err = au_xino_write(sb, bstart, h_ino, /*ino*/0);
++ positive = !!h_dentry->d_inode;
++ if (positive)
++ err = vfs_getattr(h_mnt, h_dentry, st);
++ dput(h_dentry);
+ if (!err) {
-+ iput(inode);
-+ if (mtx)
-+ mutex_unlock(mtx);
-+ goto new_ino;
++ if (positive)
++ au_refresh_iattr(inode, st, h_dentry->d_inode->i_nlink);
++ goto out_fill; /* success */
+ }
++ AuTraceErr(err);
++ goto out_unlock;
+
-+out_iput:
-+ iput(inode);
-+ inode = ERR_PTR(err);
++out_fill:
++ generic_fillattr(inode, st);
++out_unlock:
++ di_read_unlock(dentry, AuLock_IR);
++ si_read_unlock(sb);
+out:
-+ if (mtx)
-+ mutex_unlock(mtx);
-+ return inode;
++ AuTraceErr(err);
++ return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
-+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
-+ struct inode *inode)
++static int h_readlink(struct dentry *dentry, int bindex, char __user *buf,
++ int bufsiz)
+{
+ int err;
++ struct super_block *sb;
++ struct dentry *h_dentry;
+
-+ err = au_br_rdonly(au_sbr(sb, bindex));
++ err = -EINVAL;
++ h_dentry = au_h_dptr(dentry, bindex);
++ if (unlikely(!h_dentry->d_inode->i_op->readlink))
++ goto out;
+
-+ /* pseudo-link after flushed may happen out of bounds */
-+ if (!err
-+ && inode
-+ && au_ibstart(inode) <= bindex
-+ && bindex <= au_ibend(inode)) {
-+ /*
-+ * permission check is unnecessary since vfsub routine
-+ * will be called later
-+ */
-+ struct inode *hi = au_h_iptr(inode, bindex);
-+ if (hi)
-+ err = IS_IMMUTABLE(hi) ? -EROFS : 0;
++ err = security_inode_readlink(h_dentry);
++ if (unlikely(err))
++ goto out;
++
++ sb = dentry->d_sb;
++ if (!au_test_ro(sb, bindex, dentry->d_inode)) {
++ vfsub_touch_atime(au_sbr_mnt(sb, bindex), h_dentry);
++ fsstack_copy_attr_atime(dentry->d_inode, h_dentry->d_inode);
+ }
++ err = h_dentry->d_inode->i_op->readlink(h_dentry, buf, bufsiz);
+
++out:
+ return err;
+}
+
-+int au_test_h_perm(struct inode *h_inode, int mask)
++static int aufs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
+{
-+ if (!current_fsuid())
-+ return 0;
-+ return inode_permission(h_inode, mask);
++ int err;
++
++ err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
++ if (unlikely(err))
++ goto out;
++ err = au_d_hashed_positive(dentry);
++ if (!err)
++ err = h_readlink(dentry, au_dbstart(dentry), buf, bufsiz);
++ aufs_read_unlock(dentry, AuLock_IR);
++
++out:
++ return err;
+}
+
-+int au_test_h_perm_sio(struct inode *h_inode, int mask)
++static void *aufs_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
-+ if (au_test_nfs(h_inode->i_sb)
-+ && (mask & MAY_WRITE)
-+ && S_ISDIR(h_inode->i_mode))
-+ mask |= MAY_READ; /* force permission check */
-+ return au_test_h_perm(h_inode, mask);
++ int err;
++ mm_segment_t old_fs;
++ union {
++ char *k;
++ char __user *u;
++ } buf;
++
++ err = -ENOMEM;
++ buf.k = __getname_gfp(GFP_NOFS);
++ if (unlikely(!buf.k))
++ goto out;
++
++ err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
++ if (unlikely(err))
++ goto out_name;
++
++ err = au_d_hashed_positive(dentry);
++ if (!err) {
++ old_fs = get_fs();
++ set_fs(KERNEL_DS);
++ err = h_readlink(dentry, au_dbstart(dentry), buf.u, PATH_MAX);
++ set_fs(old_fs);
++ }
++ aufs_read_unlock(dentry, AuLock_IR);
++
++ if (err >= 0) {
++ buf.k[err] = 0;
++ /* will be freed by put_link */
++ nd_set_link(nd, buf.k);
++ return NULL; /* success */
++ }
++
++out_name:
++ __putname(buf.k);
++out:
++ path_put(&nd->path);
++ AuTraceErr(err);
++ return ERR_PTR(err);
+}
-diff -urN a/fs/aufs/inode.h b/fs/aufs/inode.h
---- a/fs/aufs/inode.h 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/inode.h 2011-03-06 23:28:02.616413258 +0000
-@@ -0,0 +1,546 @@
++
++static void aufs_put_link(struct dentry *dentry __maybe_unused,
++ struct nameidata *nd, void *cookie __maybe_unused)
++{
++ __putname(nd_get_link(nd));
++}
++
++/* ---------------------------------------------------------------------- */
++
++static void aufs_truncate_range(struct inode *inode __maybe_unused,
++ loff_t start __maybe_unused,
++ loff_t end __maybe_unused)
++{
++ AuUnsupport();
++}
++
++/* ---------------------------------------------------------------------- */
++
++struct inode_operations aufs_symlink_iop = {
++ .permission = aufs_permission,
++ .setattr = aufs_setattr,
++ .getattr = aufs_getattr,
++ .readlink = aufs_readlink,
++ .follow_link = aufs_follow_link,
++ .put_link = aufs_put_link
++};
++
++struct inode_operations aufs_dir_iop = {
++ .create = aufs_create,
++ .lookup = aufs_lookup,
++ .link = aufs_link,
++ .unlink = aufs_unlink,
++ .symlink = aufs_symlink,
++ .mkdir = aufs_mkdir,
++ .rmdir = aufs_rmdir,
++ .mknod = aufs_mknod,
++ .rename = aufs_rename,
++
++ .permission = aufs_permission,
++ .setattr = aufs_setattr,
++ .getattr = aufs_getattr
++};
++
++struct inode_operations aufs_iop = {
++ .permission = aufs_permission,
++ .setattr = aufs_setattr,
++ .getattr = aufs_getattr,
++ .truncate_range = aufs_truncate_range
++};
+--- a/fs/aufs/i_op_add.c 1970-01-01 01:00:00.000000000 +0100
++++ b/fs/aufs/i_op_add.c 2011-06-03 06:08:42.681958206 +0100
+@@ -0,0 +1,711 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
+ *
@@ -12962,699 +13245,701 @@
+ */
+
+/*
-+ * inode operations
++ * inode operations (add entry)
+ */
+
-+#ifndef __AUFS_INODE_H__
-+#define __AUFS_INODE_H__
-+
-+#ifdef __KERNEL__
++#include "aufs.h"
+
-+#include <linux/fs.h>
-+#include <linux/fsnotify.h>
-+#include <linux/aufs_type.h>
-+#include "rwsem.h"
++/*
++ * final procedure of adding a new entry, except link(2).
++ * remove whiteout, instantiate, copyup the parent dir's times and size
++ * and update version.
++ * if it failed, re-create the removed whiteout.
++ */
++static int epilog(struct inode *dir, aufs_bindex_t bindex,
++ struct dentry *wh_dentry, struct dentry *dentry)
++{
++ int err, rerr;
++ aufs_bindex_t bwh;
++ struct path h_path;
++ struct inode *inode, *h_dir;
++ struct dentry *wh;
+
-+struct vfsmount;
++ bwh = -1;
++ if (wh_dentry) {
++ h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */
++ IMustLock(h_dir);
++ AuDebugOn(au_h_iptr(dir, bindex) != h_dir);
++ bwh = au_dbwh(dentry);
++ h_path.dentry = wh_dentry;
++ h_path.mnt = au_sbr_mnt(dir->i_sb, bindex);
++ err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path,
++ dentry);
++ if (unlikely(err))
++ goto out;
++ }
+
-+struct au_hnotify {
-+#ifdef CONFIG_AUFS_HNOTIFY
-+#ifdef CONFIG_AUFS_HFSNOTIFY
-+ /* never use fsnotify_add_vfsmount_mark() */
-+ struct fsnotify_mark hn_mark;
-+ int hn_mark_dead;
-+#endif
-+ struct inode *hn_aufs_inode; /* no get/put */
-+#endif
-+} ____cacheline_aligned_in_smp;
++ inode = au_new_inode(dentry, /*must_new*/1);
++ if (!IS_ERR(inode)) {
++ d_instantiate(dentry, inode);
++ dir = dentry->d_parent->d_inode; /* dir inode is locked */
++ IMustLock(dir);
++ if (au_ibstart(dir) == au_dbstart(dentry))
++ au_cpup_attr_timesizes(dir);
++ dir->i_version++;
++ return 0; /* success */
++ }
+
-+struct au_hinode {
-+ struct inode *hi_inode;
-+ aufs_bindex_t hi_id;
-+#ifdef CONFIG_AUFS_HNOTIFY
-+ struct au_hnotify *hi_notify;
-+#endif
++ err = PTR_ERR(inode);
++ if (!wh_dentry)
++ goto out;
+
-+ /* reference to the copied-up whiteout with get/put */
-+ struct dentry *hi_whdentry;
-+};
++ /* revert */
++ /* dir inode is locked */
++ wh = au_wh_create(dentry, bwh, wh_dentry->d_parent);
++ rerr = PTR_ERR(wh);
++ if (IS_ERR(wh)) {
++ AuIOErr("%.*s reverting whiteout failed(%d, %d)\n",
++ AuDLNPair(dentry), err, rerr);
++ err = -EIO;
++ } else
++ dput(wh);
+
-+struct au_vdir;
-+struct au_iinfo {
-+ atomic_t ii_generation;
-+ struct super_block *ii_hsb1; /* no get/put */
++out:
++ return err;
++}
+
-+ struct au_rwsem ii_rwsem;
-+ aufs_bindex_t ii_bstart, ii_bend;
-+ __u32 ii_higen;
-+ struct au_hinode *ii_hinode;
-+ struct au_vdir *ii_vdir;
-+};
++static int au_d_may_add(struct dentry *dentry)
++{
++ int err;
+
-+struct au_icntnr {
-+ struct au_iinfo iinfo;
-+ struct inode vfs_inode;
-+} ____cacheline_aligned_in_smp;
++ err = 0;
++ if (unlikely(d_unhashed(dentry)))
++ err = -ENOENT;
++ if (unlikely(dentry->d_inode))
++ err = -EEXIST;
++ return err;
++}
+
-+/* au_pin flags */
-+#define AuPin_DI_LOCKED 1
-+#define AuPin_MNT_WRITE (1 << 1)
-+#define au_ftest_pin(flags, name) ((flags) & AuPin_##name)
-+#define au_fset_pin(flags, name) \
-+ do { (flags) |= AuPin_##name; } while (0)
-+#define au_fclr_pin(flags, name) \
-+ do { (flags) &= ~AuPin_##name; } while (0)
++/*
++ * simple tests for the adding inode operations.
++ * following the checks in vfs, plus the parent-child relationship.
++ */
++int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
++ struct dentry *h_parent, int isdir)
++{
++ int err;
++ umode_t h_mode;
++ struct dentry *h_dentry;
++ struct inode *h_inode;
+
-+struct au_pin {
-+ /* input */
-+ struct dentry *dentry;
-+ unsigned int udba;
-+ unsigned char lsc_di, lsc_hi, flags;
-+ aufs_bindex_t bindex;
++ err = -ENAMETOOLONG;
++ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
++ goto out;
+
-+ /* output */
-+ struct dentry *parent;
-+ struct au_hinode *hdir;
-+ struct vfsmount *h_mnt;
-+};
++ h_dentry = au_h_dptr(dentry, bindex);
++ h_inode = h_dentry->d_inode;
++ if (!dentry->d_inode) {
++ err = -EEXIST;
++ if (unlikely(h_inode))
++ goto out;
++ } else {
++ /* rename(2) case */
++ err = -EIO;
++ if (unlikely(!h_inode || !h_inode->i_nlink))
++ goto out;
+
-+/* ---------------------------------------------------------------------- */
-+
-+static inline struct au_iinfo *au_ii(struct inode *inode)
-+{
-+ struct au_iinfo *iinfo;
-+
-+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
-+ if (iinfo->ii_hinode)
-+ return iinfo;
-+ return NULL; /* debugging bad_inode case */
-+}
-+
-+/* ---------------------------------------------------------------------- */
-+
-+/* inode.c */
-+struct inode *au_igrab(struct inode *inode);
-+int au_refresh_hinode_self(struct inode *inode);
-+int au_refresh_hinode(struct inode *inode, struct dentry *dentry);
-+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
-+ unsigned int d_type, ino_t *ino);
-+struct inode *au_new_inode(struct dentry *dentry, int must_new);
-+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
-+ struct inode *inode);
-+int au_test_h_perm(struct inode *h_inode, int mask);
-+int au_test_h_perm_sio(struct inode *h_inode, int mask);
-+
-+static inline int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex,
-+ ino_t h_ino, unsigned int d_type, ino_t *ino)
-+{
-+#ifdef CONFIG_AUFS_SHWH
-+ return au_ino(sb, bindex, h_ino, d_type, ino);
-+#else
-+ return 0;
-+#endif
-+}
-+
-+/* i_op.c */
-+extern struct inode_operations aufs_iop, aufs_symlink_iop, aufs_dir_iop;
-+
-+/* au_wr_dir flags */
-+#define AuWrDir_ADD_ENTRY 1
-+#define AuWrDir_ISDIR (1 << 1)
-+#define au_ftest_wrdir(flags, name) ((flags) & AuWrDir_##name)
-+#define au_fset_wrdir(flags, name) \
-+ do { (flags) |= AuWrDir_##name; } while (0)
-+#define au_fclr_wrdir(flags, name) \
-+ do { (flags) &= ~AuWrDir_##name; } while (0)
-+
-+struct au_wr_dir_args {
-+ aufs_bindex_t force_btgt;
-+ unsigned char flags;
-+};
-+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
-+ struct au_wr_dir_args *args);
-+
-+struct dentry *au_pinned_h_parent(struct au_pin *pin);
-+void au_pin_init(struct au_pin *pin, struct dentry *dentry,
-+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
-+ unsigned int udba, unsigned char flags);
-+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
-+ unsigned int udba, unsigned char flags) __must_check;
-+int au_do_pin(struct au_pin *pin) __must_check;
-+void au_unpin(struct au_pin *pin);
-+
-+/* i_op_add.c */
-+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
-+ struct dentry *h_parent, int isdir);
-+int aufs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev);
-+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname);
-+int aufs_create(struct inode *dir, struct dentry *dentry, int mode,
-+ struct nameidata *nd);
-+int aufs_link(struct dentry *src_dentry, struct inode *dir,
-+ struct dentry *dentry);
-+int aufs_mkdir(struct inode *dir, struct dentry *dentry, int mode);
-+
-+/* i_op_del.c */
-+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup);
-+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
-+ struct dentry *h_parent, int isdir);
-+int aufs_unlink(struct inode *dir, struct dentry *dentry);
-+int aufs_rmdir(struct inode *dir, struct dentry *dentry);
-+
-+/* i_op_ren.c */
-+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt);
-+int aufs_rename(struct inode *src_dir, struct dentry *src_dentry,
-+ struct inode *dir, struct dentry *dentry);
-+
-+/* iinfo.c */
-+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex);
-+void au_hiput(struct au_hinode *hinode);
-+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
-+ struct dentry *h_wh);
-+unsigned int au_hi_flags(struct inode *inode, int isdir);
-+
-+/* hinode flags */
-+#define AuHi_XINO 1
-+#define AuHi_HNOTIFY (1 << 1)
-+#define au_ftest_hi(flags, name) ((flags) & AuHi_##name)
-+#define au_fset_hi(flags, name) \
-+ do { (flags) |= AuHi_##name; } while (0)
-+#define au_fclr_hi(flags, name) \
-+ do { (flags) &= ~AuHi_##name; } while (0)
-+
-+#ifndef CONFIG_AUFS_HNOTIFY
-+#undef AuHi_HNOTIFY
-+#define AuHi_HNOTIFY 0
-+#endif
-+
-+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
-+ struct inode *h_inode, unsigned int flags);
-+
-+void au_update_iigen(struct inode *inode);
-+void au_update_ibrange(struct inode *inode, int do_put_zero);
-+
-+void au_icntnr_init_once(void *_c);
-+int au_iinfo_init(struct inode *inode);
-+void au_iinfo_fin(struct inode *inode);
-+int au_ii_realloc(struct au_iinfo *iinfo, int nbr);
-+
-+#ifdef CONFIG_PROC_FS
-+/* plink.c */
-+int au_plink_maint(struct super_block *sb, int flags);
-+void au_plink_maint_leave(struct au_sbinfo *sbinfo);
-+int au_plink_maint_enter(struct super_block *sb);
-+#ifdef CONFIG_AUFS_DEBUG
-+void au_plink_list(struct super_block *sb);
-+#else
-+AuStubVoid(au_plink_list, struct super_block *sb)
-+#endif
-+int au_plink_test(struct inode *inode);
-+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex);
-+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
-+ struct dentry *h_dentry);
-+void au_plink_put(struct super_block *sb, int verbose);
-+void au_plink_clean(struct super_block *sb, int verbose);
-+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id);
-+#else
-+AuStubInt0(au_plink_maint, struct super_block *sb, int flags);
-+AuStubVoid(au_plink_maint_leave, struct au_sbinfo *sbinfo);
-+AuStubInt0(au_plink_maint_enter, struct super_block *sb);
-+AuStubVoid(au_plink_list, struct super_block *sb);
-+AuStubInt0(au_plink_test, struct inode *inode);
-+AuStub(struct dentry *, au_plink_lkup, return NULL,
-+ struct inode *inode, aufs_bindex_t bindex);
-+AuStubVoid(au_plink_append, struct inode *inode, aufs_bindex_t bindex,
-+ struct dentry *h_dentry);
-+AuStubVoid(au_plink_put, struct super_block *sb, int verbose);
-+AuStubVoid(au_plink_clean, struct super_block *sb, int verbose);
-+AuStubVoid(au_plink_half_refresh, struct super_block *sb, aufs_bindex_t br_id);
-+#endif /* CONFIG_PROC_FS */
-+
-+/* ---------------------------------------------------------------------- */
-+
-+/* lock subclass for iinfo */
-+enum {
-+ AuLsc_II_CHILD, /* child first */
-+ AuLsc_II_CHILD2, /* rename(2), link(2), and cpup at hnotify */
-+ AuLsc_II_CHILD3, /* copyup dirs */
-+ AuLsc_II_PARENT, /* see AuLsc_I_PARENT in vfsub.h */
-+ AuLsc_II_PARENT2,
-+ AuLsc_II_PARENT3, /* copyup dirs */
-+ AuLsc_II_NEW_CHILD
-+};
-+
-+/*
-+ * ii_read_lock_child, ii_write_lock_child,
-+ * ii_read_lock_child2, ii_write_lock_child2,
-+ * ii_read_lock_child3, ii_write_lock_child3,
-+ * ii_read_lock_parent, ii_write_lock_parent,
-+ * ii_read_lock_parent2, ii_write_lock_parent2,
-+ * ii_read_lock_parent3, ii_write_lock_parent3,
-+ * ii_read_lock_new_child, ii_write_lock_new_child,
-+ */
-+#define AuReadLockFunc(name, lsc) \
-+static inline void ii_read_lock_##name(struct inode *i) \
-+{ \
-+ au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
-+}
-+
-+#define AuWriteLockFunc(name, lsc) \
-+static inline void ii_write_lock_##name(struct inode *i) \
-+{ \
-+ au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
-+}
-+
-+#define AuRWLockFuncs(name, lsc) \
-+ AuReadLockFunc(name, lsc) \
-+ AuWriteLockFunc(name, lsc)
-+
-+AuRWLockFuncs(child, CHILD);
-+AuRWLockFuncs(child2, CHILD2);
-+AuRWLockFuncs(child3, CHILD3);
-+AuRWLockFuncs(parent, PARENT);
-+AuRWLockFuncs(parent2, PARENT2);
-+AuRWLockFuncs(parent3, PARENT3);
-+AuRWLockFuncs(new_child, NEW_CHILD);
-+
-+#undef AuReadLockFunc
-+#undef AuWriteLockFunc
-+#undef AuRWLockFuncs
-+
-+/*
-+ * ii_read_unlock, ii_write_unlock, ii_downgrade_lock
-+ */
-+AuSimpleUnlockRwsemFuncs(ii, struct inode *i, &au_ii(i)->ii_rwsem);
-+
-+#define IiMustNoWaiters(i) AuRwMustNoWaiters(&au_ii(i)->ii_rwsem)
-+#define IiMustAnyLock(i) AuRwMustAnyLock(&au_ii(i)->ii_rwsem)
-+#define IiMustWriteLock(i) AuRwMustWriteLock(&au_ii(i)->ii_rwsem)
-+
-+/* ---------------------------------------------------------------------- */
-+
-+static inline void au_icntnr_init(struct au_icntnr *c)
-+{
-+#ifdef CONFIG_AUFS_DEBUG
-+ c->vfs_inode.i_mode = 0;
-+#endif
-+}
-+
-+static inline unsigned int au_iigen(struct inode *inode)
-+{
-+ return atomic_read(&au_ii(inode)->ii_generation);
-+}
-+
-+/* tiny test for inode number */
-+/* tmpfs generation is too rough */
-+static inline int au_test_higen(struct inode *inode, struct inode *h_inode)
-+{
-+ struct au_iinfo *iinfo;
-+
-+ iinfo = au_ii(inode);
-+ AuRwMustAnyLock(&iinfo->ii_rwsem);
-+ return !(iinfo->ii_hsb1 == h_inode->i_sb
-+ && iinfo->ii_higen == h_inode->i_generation);
-+}
-+
-+static inline void au_iigen_dec(struct inode *inode)
-+{
-+ atomic_dec(&au_ii(inode)->ii_generation);
-+}
-+
-+static inline int au_iigen_test(struct inode *inode, unsigned int sigen)
-+{
-+ int err;
++ h_mode = h_inode->i_mode;
++ if (!isdir) {
++ err = -EISDIR;
++ if (unlikely(S_ISDIR(h_mode)))
++ goto out;
++ } else if (unlikely(!S_ISDIR(h_mode))) {
++ err = -ENOTDIR;
++ goto out;
++ }
++ }
+
+ err = 0;
-+ if (unlikely(inode && au_iigen(inode) != sigen))
++ /* expected parent dir is locked */
++ if (unlikely(h_parent != h_dentry->d_parent))
+ err = -EIO;
+
++out:
++ AuTraceErr(err);
+ return err;
+}
+
-+/* ---------------------------------------------------------------------- */
-+
-+static inline aufs_bindex_t au_ii_br_id(struct inode *inode,
-+ aufs_bindex_t bindex)
-+{
-+ IiMustAnyLock(inode);
-+ return au_ii(inode)->ii_hinode[0 + bindex].hi_id;
-+}
-+
-+static inline aufs_bindex_t au_ibstart(struct inode *inode)
-+{
-+ IiMustAnyLock(inode);
-+ return au_ii(inode)->ii_bstart;
-+}
-+
-+static inline aufs_bindex_t au_ibend(struct inode *inode)
-+{
-+ IiMustAnyLock(inode);
-+ return au_ii(inode)->ii_bend;
-+}
-+
-+static inline struct au_vdir *au_ivdir(struct inode *inode)
-+{
-+ IiMustAnyLock(inode);
-+ return au_ii(inode)->ii_vdir;
-+}
-+
-+static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex)
-+{
-+ IiMustAnyLock(inode);
-+ return au_ii(inode)->ii_hinode[0 + bindex].hi_whdentry;
-+}
-+
-+static inline void au_set_ibstart(struct inode *inode, aufs_bindex_t bindex)
-+{
-+ IiMustWriteLock(inode);
-+ au_ii(inode)->ii_bstart = bindex;
-+}
-+
-+static inline void au_set_ibend(struct inode *inode, aufs_bindex_t bindex)
-+{
-+ IiMustWriteLock(inode);
-+ au_ii(inode)->ii_bend = bindex;
-+}
-+
-+static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir)
-+{
-+ IiMustWriteLock(inode);
-+ au_ii(inode)->ii_vdir = vdir;
-+}
-+
-+static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex)
-+{
-+ IiMustAnyLock(inode);
-+ return au_ii(inode)->ii_hinode + bindex;
-+}
-+
-+/* ---------------------------------------------------------------------- */
-+
-+static inline struct dentry *au_pinned_parent(struct au_pin *pin)
++/*
++ * initial procedure of adding a new entry.
++ * prepare writable branch and the parent dir, lock it,
++ * and lookup whiteout for the new entry.
++ */
++static struct dentry*
++lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt,
++ struct dentry *src_dentry, struct au_pin *pin,
++ struct au_wr_dir_args *wr_dir_args)
+{
-+ if (pin)
-+ return pin->parent;
-+ return NULL;
-+}
++ struct dentry *wh_dentry, *h_parent;
++ struct super_block *sb;
++ struct au_branch *br;
++ int err;
++ unsigned int udba;
++ aufs_bindex_t bcpup;
+
-+static inline struct inode *au_pinned_h_dir(struct au_pin *pin)
-+{
-+ if (pin && pin->hdir)
-+ return pin->hdir->hi_inode;
-+ return NULL;
-+}
++ AuDbg("%.*s\n", AuDLNPair(dentry));
+
-+static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin)
-+{
-+ if (pin)
-+ return pin->hdir;
-+ return NULL;
-+}
++ err = au_wr_dir(dentry, src_dentry, wr_dir_args);
++ bcpup = err;
++ wh_dentry = ERR_PTR(err);
++ if (unlikely(err < 0))
++ goto out;
+
-+static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry)
-+{
-+ if (pin)
-+ pin->dentry = dentry;
-+}
++ sb = dentry->d_sb;
++ udba = au_opt_udba(sb);
++ err = au_pin(pin, dentry, bcpup, udba,
++ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
++ wh_dentry = ERR_PTR(err);
++ if (unlikely(err))
++ goto out;
+
-+static inline void au_pin_set_parent_lflag(struct au_pin *pin,
-+ unsigned char lflag)
-+{
-+ if (pin) {
-+ if (lflag)
-+ au_fset_pin(pin->flags, DI_LOCKED);
-+ else
-+ au_fclr_pin(pin->flags, DI_LOCKED);
-+ }
-+}
++ h_parent = au_pinned_h_parent(pin);
++ if (udba != AuOpt_UDBA_NONE
++ && au_dbstart(dentry) == bcpup)
++ err = au_may_add(dentry, bcpup, h_parent,
++ au_ftest_wrdir(wr_dir_args->flags, ISDIR));
++ else if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
++ err = -ENAMETOOLONG;
++ wh_dentry = ERR_PTR(err);
++ if (unlikely(err))
++ goto out_unpin;
+
-+static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent)
-+{
-+ if (pin) {
-+ dput(pin->parent);
-+ pin->parent = dget(parent);
++ br = au_sbr(sb, bcpup);
++ if (dt) {
++ struct path tmp = {
++ .dentry = h_parent,
++ .mnt = br->br_mnt
++ };
++ au_dtime_store(dt, au_pinned_parent(pin), &tmp);
+ }
-+}
+
-+/* ---------------------------------------------------------------------- */
++ wh_dentry = NULL;
++ if (bcpup != au_dbwh(dentry))
++ goto out; /* success */
+
-+struct au_branch;
-+#ifdef CONFIG_AUFS_HNOTIFY
-+struct au_hnotify_op {
-+ void (*ctl)(struct au_hinode *hinode, int do_set);
-+ int (*alloc)(struct au_hinode *hinode);
-+ void (*free)(struct au_hinode *hinode);
++ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
+
-+ void (*fin)(void);
-+ int (*init)(void);
++out_unpin:
++ if (IS_ERR(wh_dentry))
++ au_unpin(pin);
++out:
++ return wh_dentry;
++}
+
-+ int (*reset_br)(unsigned int udba, struct au_branch *br, int perm);
-+ void (*fin_br)(struct au_branch *br);
-+ int (*init_br)(struct au_branch *br, int perm);
++/* ---------------------------------------------------------------------- */
++
++enum { Mknod, Symlink, Creat };
++struct simple_arg {
++ int type;
++ union {
++ struct {
++ int mode;
++ struct nameidata *nd;
++ } c;
++ struct {
++ const char *symname;
++ } s;
++ struct {
++ int mode;
++ dev_t dev;
++ } m;
++ } u;
+};
+
-+/* hnotify.c */
-+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode);
-+void au_hn_free(struct au_hinode *hinode);
-+void au_hn_ctl(struct au_hinode *hinode, int do_set);
-+void au_hn_reset(struct inode *inode, unsigned int flags);
-+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
-+ struct qstr *h_child_qstr, struct inode *h_child_inode);
-+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm);
-+int au_hnotify_init_br(struct au_branch *br, int perm);
-+void au_hnotify_fin_br(struct au_branch *br);
-+int __init au_hnotify_init(void);
-+void au_hnotify_fin(void);
++static int add_simple(struct inode *dir, struct dentry *dentry,
++ struct simple_arg *arg)
++{
++ int err;
++ aufs_bindex_t bstart;
++ unsigned char created;
++ struct au_dtime dt;
++ struct au_pin pin;
++ struct path h_path;
++ struct dentry *wh_dentry, *parent;
++ struct inode *h_dir;
++ struct au_wr_dir_args wr_dir_args = {
++ .force_btgt = -1,
++ .flags = AuWrDir_ADD_ENTRY
++ };
+
-+/* hfsnotify.c */
-+extern const struct au_hnotify_op au_hnotify_op;
++ AuDbg("%.*s\n", AuDLNPair(dentry));
++ IMustLock(dir);
+
-+static inline
-+void au_hn_init(struct au_hinode *hinode)
-+{
-+ hinode->hi_notify = NULL;
-+}
++ parent = dentry->d_parent; /* dir inode is locked */
++ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
++ if (unlikely(err))
++ goto out;
++ err = au_d_may_add(dentry);
++ if (unlikely(err))
++ goto out_unlock;
++ di_write_lock_parent(parent);
++ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, /*src_dentry*/NULL, &pin,
++ &wr_dir_args);
++ err = PTR_ERR(wh_dentry);
++ if (IS_ERR(wh_dentry))
++ goto out_parent;
+
-+#else
-+static inline
-+int au_hn_alloc(struct au_hinode *hinode __maybe_unused,
-+ struct inode *inode __maybe_unused)
-+{
-+ return -EOPNOTSUPP;
-+}
++ bstart = au_dbstart(dentry);
++ h_path.dentry = au_h_dptr(dentry, bstart);
++ h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
++ h_dir = au_pinned_h_dir(&pin);
++ switch (arg->type) {
++ case Creat:
++ err = vfsub_create(h_dir, &h_path, arg->u.c.mode);
++ break;
++ case Symlink:
++ err = vfsub_symlink(h_dir, &h_path, arg->u.s.symname);
++ break;
++ case Mknod:
++ err = vfsub_mknod(h_dir, &h_path, arg->u.m.mode, arg->u.m.dev);
++ break;
++ default:
++ BUG();
++ }
++ created = !err;
++ if (!err)
++ err = epilog(dir, bstart, wh_dentry, dentry);
+
-+AuStubVoid(au_hn_free, struct au_hinode *hinode __maybe_unused)
-+AuStubVoid(au_hn_ctl, struct au_hinode *hinode __maybe_unused,
-+ int do_set __maybe_unused)
-+AuStubVoid(au_hn_reset, struct inode *inode __maybe_unused,
-+ unsigned int flags __maybe_unused)
-+AuStubInt0(au_hnotify_reset_br, unsigned int udba __maybe_unused,
-+ struct au_branch *br __maybe_unused,
-+ int perm __maybe_unused)
-+AuStubInt0(au_hnotify_init_br, struct au_branch *br __maybe_unused,
-+ int perm __maybe_unused)
-+AuStubVoid(au_hnotify_fin_br, struct au_branch *br __maybe_unused)
-+AuStubInt0(__init au_hnotify_init, void)
-+AuStubVoid(au_hnotify_fin, void)
-+AuStubVoid(au_hn_init, struct au_hinode *hinode __maybe_unused)
-+#endif /* CONFIG_AUFS_HNOTIFY */
++ /* revert */
++ if (unlikely(created && err && h_path.dentry->d_inode)) {
++ int rerr;
++ rerr = vfsub_unlink(h_dir, &h_path, /*force*/0);
++ if (rerr) {
++ AuIOErr("%.*s revert failure(%d, %d)\n",
++ AuDLNPair(dentry), err, rerr);
++ err = -EIO;
++ }
++ au_dtime_revert(&dt);
++ }
+
-+static inline void au_hn_suspend(struct au_hinode *hdir)
-+{
-+ au_hn_ctl(hdir, /*do_set*/0);
++ au_unpin(&pin);
++ dput(wh_dentry);
++
++out_parent:
++ di_write_unlock(parent);
++out_unlock:
++ if (unlikely(err)) {
++ au_update_dbstart(dentry);
++ d_drop(dentry);
++ }
++ aufs_read_unlock(dentry, AuLock_DW);
++out:
++ return err;
+}
+
-+static inline void au_hn_resume(struct au_hinode *hdir)
++int aufs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+{
-+ au_hn_ctl(hdir, /*do_set*/1);
++ struct simple_arg arg = {
++ .type = Mknod,
++ .u.m = {
++ .mode = mode,
++ .dev = dev
++ }
++ };
++ return add_simple(dir, dentry, &arg);
+}
+
-+static inline void au_hn_imtx_lock(struct au_hinode *hdir)
++int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+{
-+ mutex_lock(&hdir->hi_inode->i_mutex);
-+ au_hn_suspend(hdir);
++ struct simple_arg arg = {
++ .type = Symlink,
++ .u.s.symname = symname
++ };
++ return add_simple(dir, dentry, &arg);
+}
+
-+static inline void au_hn_imtx_lock_nested(struct au_hinode *hdir,
-+ unsigned int sc __maybe_unused)
++int aufs_create(struct inode *dir, struct dentry *dentry, int mode,
++ struct nameidata *nd)
+{
-+ mutex_lock_nested(&hdir->hi_inode->i_mutex, sc);
-+ au_hn_suspend(hdir);
++ struct simple_arg arg = {
++ .type = Creat,
++ .u.c = {
++ .mode = mode,
++ .nd = nd
++ }
++ };
++ return add_simple(dir, dentry, &arg);
+}
+
-+static inline void au_hn_imtx_unlock(struct au_hinode *hdir)
++/* ---------------------------------------------------------------------- */
++
++struct au_link_args {
++ aufs_bindex_t bdst, bsrc;
++ struct au_pin pin;
++ struct path h_path;
++ struct dentry *src_parent, *parent;
++};
++
++static int au_cpup_before_link(struct dentry *src_dentry,
++ struct au_link_args *a)
+{
-+ au_hn_resume(hdir);
-+ mutex_unlock(&hdir->hi_inode->i_mutex);
-+}
++ int err;
++ struct dentry *h_src_dentry;
++ struct mutex *h_mtx;
++ struct file *h_file;
+
-+#endif /* __KERNEL__ */
-+#endif /* __AUFS_INODE_H__ */
-diff -urN a/fs/aufs/ioctl.c b/fs/aufs/ioctl.c
---- a/fs/aufs/ioctl.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/ioctl.c 2011-03-06 23:28:02.616413258 +0000
-@@ -0,0 +1,158 @@
-+/*
-+ * Copyright (C) 2005-2011 Junjiro R. Okajima
-+ *
-+ * This program, aufs is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or
-+ * (at your option) any later version.
-+ *
-+ * This program is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with this program; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
++ di_read_lock_parent(a->src_parent, AuLock_IR);
++ err = au_test_and_cpup_dirs(src_dentry, a->bdst);
++ if (unlikely(err))
++ goto out;
+
-+/*
-+ * ioctl
-+ * plink-management and readdir in userspace.
-+ * assist the pathconf(3) wrapper library.
-+ */
++ h_src_dentry = au_h_dptr(src_dentry, a->bsrc);
++ h_mtx = &h_src_dentry->d_inode->i_mutex;
++ err = au_pin(&a->pin, src_dentry, a->bdst,
++ au_opt_udba(src_dentry->d_sb),
++ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
++ if (unlikely(err))
++ goto out;
++ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
++ h_file = au_h_open_pre(src_dentry, a->bsrc);
++ if (IS_ERR(h_file)) {
++ err = PTR_ERR(h_file);
++ h_file = NULL;
++ } else
++ err = au_sio_cpup_simple(src_dentry, a->bdst, a->bsrc,
++ AuCpup_DTIME /* | AuCpup_KEEPLINO */);
++ mutex_unlock(h_mtx);
++ au_h_open_post(src_dentry, a->bsrc, h_file);
++ au_unpin(&a->pin);
+
-+#include <linux/file.h>
-+#include "aufs.h"
++out:
++ di_read_unlock(a->src_parent, AuLock_IR);
++ return err;
++}
+
-+static int au_wbr_fd(struct path *path)
++static int au_cpup_or_link(struct dentry *src_dentry, struct au_link_args *a)
+{
-+ int err, fd;
-+ aufs_bindex_t wbi, bindex, bend;
-+ struct file *h_file;
++ int err;
++ unsigned char plink;
++ struct inode *h_inode, *inode;
++ struct dentry *h_src_dentry;
+ struct super_block *sb;
-+ struct dentry *root;
-+ struct au_branch *wbr;
++ struct file *h_file;
+
-+ err = get_unused_fd();
-+ if (unlikely(err < 0))
-+ goto out;
-+ fd = err;
++ plink = 0;
++ h_inode = NULL;
++ sb = src_dentry->d_sb;
++ inode = src_dentry->d_inode;
++ if (au_ibstart(inode) <= a->bdst)
++ h_inode = au_h_iptr(inode, a->bdst);
++ if (!h_inode || !h_inode->i_nlink) {
++ /* copyup src_dentry as the name of dentry. */
++ au_set_dbstart(src_dentry, a->bdst);
++ au_set_h_dptr(src_dentry, a->bdst, dget(a->h_path.dentry));
++ h_inode = au_h_dptr(src_dentry, a->bsrc)->d_inode;
++ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
++ h_file = au_h_open_pre(src_dentry, a->bsrc);
++ if (IS_ERR(h_file)) {
++ err = PTR_ERR(h_file);
++ h_file = NULL;
++ } else
++ err = au_sio_cpup_single(src_dentry, a->bdst, a->bsrc,
++ -1, AuCpup_KEEPLINO,
++ a->parent);
++ mutex_unlock(&h_inode->i_mutex);
++ au_h_open_post(src_dentry, a->bsrc, h_file);
++ au_set_h_dptr(src_dentry, a->bdst, NULL);
++ au_set_dbstart(src_dentry, a->bsrc);
++ } else {
++ /* the inode of src_dentry already exists on a.bdst branch */
++ h_src_dentry = d_find_alias(h_inode);
++ if (!h_src_dentry && au_plink_test(inode)) {
++ plink = 1;
++ h_src_dentry = au_plink_lkup(inode, a->bdst);
++ err = PTR_ERR(h_src_dentry);
++ if (IS_ERR(h_src_dentry))
++ goto out;
+
-+ wbi = 0;
-+ sb = path->dentry->d_sb;
-+ root = sb->s_root;
-+ aufs_read_lock(root, AuLock_IR);
-+ wbr = au_sbr(sb, wbi);
-+ if (!(path->mnt->mnt_flags & MNT_READONLY)
-+ && !au_br_writable(wbr->br_perm)) {
-+ bend = au_sbend(sb);
-+ for (bindex = 1; bindex <= bend; bindex++) {
-+ wbr = au_sbr(sb, bindex);
-+ if (au_br_writable(wbr->br_perm)) {
-+ wbi = bindex;
-+ break;
++ if (unlikely(!h_src_dentry->d_inode)) {
++ dput(h_src_dentry);
++ h_src_dentry = NULL;
+ }
++
++ }
++ if (h_src_dentry) {
++ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
++ &a->h_path);
++ dput(h_src_dentry);
++ } else {
++ AuIOErr("no dentry found for hi%lu on b%d\n",
++ h_inode->i_ino, a->bdst);
++ err = -EIO;
+ }
-+ wbr = au_sbr(sb, wbi);
+ }
-+ AuDbg("wbi %d\n", wbi);
-+ h_file = au_h_open(root, wbi, O_RDONLY | O_DIRECTORY | O_LARGEFILE,
-+ NULL);
-+ aufs_read_unlock(root, AuLock_IR);
-+ err = PTR_ERR(h_file);
-+ if (IS_ERR(h_file))
-+ goto out_fd;
+
-+ atomic_dec(&wbr->br_count); /* cf. au_h_open() */
-+ fd_install(fd, h_file);
-+ err = fd;
-+ goto out; /* success */
++ if (!err && !plink)
++ au_plink_append(inode, a->bdst, a->h_path.dentry);
+
-+out_fd:
-+ put_unused_fd(fd);
+out:
++ AuTraceErr(err);
+ return err;
+}
+
-+/* ---------------------------------------------------------------------- */
-+
-+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg)
++int aufs_link(struct dentry *src_dentry, struct inode *dir,
++ struct dentry *dentry)
+{
-+ long err;
++ int err, rerr;
++ struct au_dtime dt;
++ struct au_link_args *a;
++ struct dentry *wh_dentry, *h_src_dentry;
++ struct inode *inode;
++ struct super_block *sb;
++ struct au_wr_dir_args wr_dir_args = {
++ /* .force_btgt = -1, */
++ .flags = AuWrDir_ADD_ENTRY
++ };
+
-+ switch (cmd) {
-+ case AUFS_CTL_RDU:
-+ case AUFS_CTL_RDU_INO:
-+ err = au_rdu_ioctl(file, cmd, arg);
-+ break;
++ IMustLock(dir);
++ inode = src_dentry->d_inode;
++ IMustLock(inode);
+
-+ case AUFS_CTL_WBR_FD:
-+ err = au_wbr_fd(&file->f_path);
-+ break;
++ err = -ENOMEM;
++ a = kzalloc(sizeof(*a), GFP_NOFS);
++ if (unlikely(!a))
++ goto out;
+
-+ case AUFS_CTL_IBUSY:
-+ err = au_ibusy_ioctl(file, arg);
-+ break;
++ a->parent = dentry->d_parent; /* dir inode is locked */
++ err = aufs_read_and_write_lock2(dentry, src_dentry,
++ AuLock_NOPLM | AuLock_GEN);
++ if (unlikely(err))
++ goto out_kfree;
++ err = au_d_hashed_positive(src_dentry);
++ if (unlikely(err))
++ goto out_unlock;
++ err = au_d_may_add(dentry);
++ if (unlikely(err))
++ goto out_unlock;
+
-+ default:
-+ /* do not call the lower */
-+ AuDbg("0x%x\n", cmd);
-+ err = -ENOTTY;
++ a->src_parent = dget_parent(src_dentry);
++ wr_dir_args.force_btgt = au_ibstart(inode);
++
++ di_write_lock_parent(a->parent);
++ wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt);
++ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin,
++ &wr_dir_args);
++ err = PTR_ERR(wh_dentry);
++ if (IS_ERR(wh_dentry))
++ goto out_parent;
++
++ err = 0;
++ sb = dentry->d_sb;
++ a->bdst = au_dbstart(dentry);
++ a->h_path.dentry = au_h_dptr(dentry, a->bdst);
++ a->h_path.mnt = au_sbr_mnt(sb, a->bdst);
++ a->bsrc = au_ibstart(inode);
++ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
++ if (!h_src_dentry) {
++ a->bsrc = au_dbstart(src_dentry);
++ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
++ AuDebugOn(!h_src_dentry);
++ } else if (IS_ERR(h_src_dentry))
++ goto out_parent;
++
++ if (au_opt_test(au_mntflags(sb), PLINK)) {
++ if (a->bdst < a->bsrc
++ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */)
++ err = au_cpup_or_link(src_dentry, a);
++ else
++ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
++ &a->h_path);
++ dput(h_src_dentry);
++ } else {
++ /*
++ * copyup src_dentry to the branch we process,
++ * and then link(2) to it.
++ */
++ dput(h_src_dentry);
++ if (a->bdst < a->bsrc
++ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) {
++ au_unpin(&a->pin);
++ di_write_unlock(a->parent);
++ err = au_cpup_before_link(src_dentry, a);
++ di_write_lock_parent(a->parent);
++ if (!err)
++ err = au_pin(&a->pin, dentry, a->bdst,
++ au_opt_udba(sb),
++ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
++ if (unlikely(err))
++ goto out_wh;
++ }
++ if (!err) {
++ h_src_dentry = au_h_dptr(src_dentry, a->bdst);
++ err = -ENOENT;
++ if (h_src_dentry && h_src_dentry->d_inode)
++ err = vfsub_link(h_src_dentry,
++ au_pinned_h_dir(&a->pin),
++ &a->h_path);
++ }
++ }
++ if (unlikely(err))
++ goto out_unpin;
++
++ if (wh_dentry) {
++ a->h_path.dentry = wh_dentry;
++ err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path,
++ dentry);
++ if (unlikely(err))
++ goto out_revert;
++ }
++
++ dir->i_version++;
++ if (au_ibstart(dir) == au_dbstart(dentry))
++ au_cpup_attr_timesizes(dir);
++ inc_nlink(inode);
++ inode->i_ctime = dir->i_ctime;
++ d_instantiate(dentry, au_igrab(inode));
++ if (d_unhashed(a->h_path.dentry))
++ /* some filesystem calls d_drop() */
++ d_drop(dentry);
++ goto out_unpin; /* success */
++
++out_revert:
++ rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path, /*force*/0);
++ if (unlikely(rerr)) {
++ AuIOErr("%.*s reverting failed(%d, %d)\n",
++ AuDLNPair(dentry), err, rerr);
++ err = -EIO;
++ }
++ au_dtime_revert(&dt);
++out_unpin:
++ au_unpin(&a->pin);
++out_wh:
++ dput(wh_dentry);
++out_parent:
++ di_write_unlock(a->parent);
++ dput(a->src_parent);
++out_unlock:
++ if (unlikely(err)) {
++ au_update_dbstart(dentry);
++ d_drop(dentry);
+ }
-+
-+ AuTraceErr(err);
++ aufs_read_and_write_unlock2(dentry, src_dentry);
++out_kfree:
++ kfree(a);
++out:
+ return err;
+}
+
-+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg)
++int aufs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
-+ long err;
++ int err, rerr;
++ aufs_bindex_t bindex;
++ unsigned char diropq;
++ struct path h_path;
++ struct dentry *wh_dentry, *parent, *opq_dentry;
++ struct mutex *h_mtx;
++ struct super_block *sb;
++ struct {
++ struct au_pin pin;
++ struct au_dtime dt;
++ } *a; /* reduce the stack usage */
++ struct au_wr_dir_args wr_dir_args = {
++ .force_btgt = -1,
++ .flags = AuWrDir_ADD_ENTRY | AuWrDir_ISDIR
++ };
+
-+ switch (cmd) {
-+ case AUFS_CTL_WBR_FD:
-+ err = au_wbr_fd(&file->f_path);
-+ break;
++ IMustLock(dir);
+
-+ default:
-+ /* do not call the lower */
-+ AuDbg("0x%x\n", cmd);
-+ err = -ENOTTY;
-+ }
++ err = -ENOMEM;
++ a = kmalloc(sizeof(*a), GFP_NOFS);
++ if (unlikely(!a))
++ goto out;
+
-+ AuTraceErr(err);
-+ return err;
-+}
++ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
++ if (unlikely(err))
++ goto out_free;
++ err = au_d_may_add(dentry);
++ if (unlikely(err))
++ goto out_unlock;
+
-+#ifdef CONFIG_COMPAT
-+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
-+ unsigned long arg)
-+{
-+ long err;
++ parent = dentry->d_parent; /* dir inode is locked */
++ di_write_lock_parent(parent);
++ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
++ &a->pin, &wr_dir_args);
++ err = PTR_ERR(wh_dentry);
++ if (IS_ERR(wh_dentry))
++ goto out_parent;
+
-+ switch (cmd) {
-+ case AUFS_CTL_RDU:
-+ case AUFS_CTL_RDU_INO:
-+ err = au_rdu_compat_ioctl(file, cmd, arg);
-+ break;
++ sb = dentry->d_sb;
++ bindex = au_dbstart(dentry);
++ h_path.dentry = au_h_dptr(dentry, bindex);
++ h_path.mnt = au_sbr_mnt(sb, bindex);
++ err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode);
++ if (unlikely(err))
++ goto out_unpin;
+
-+ case AUFS_CTL_IBUSY:
-+ err = au_ibusy_compat_ioctl(file, arg);
-+ break;
++ /* make the dir opaque */
++ diropq = 0;
++ h_mtx = &h_path.dentry->d_inode->i_mutex;
++ if (wh_dentry
++ || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) {
++ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
++ opq_dentry = au_diropq_create(dentry, bindex);
++ mutex_unlock(h_mtx);
++ err = PTR_ERR(opq_dentry);
++ if (IS_ERR(opq_dentry))
++ goto out_dir;
++ dput(opq_dentry);
++ diropq = 1;
++ }
+
-+ default:
-+ err = aufs_ioctl_dir(file, cmd, arg);
++ err = epilog(dir, bindex, wh_dentry, dentry);
++ if (!err) {
++ inc_nlink(dir);
++ goto out_unpin; /* success */
+ }
+
-+ AuTraceErr(err);
-+ return err;
-+}
++ /* revert */
++ if (diropq) {
++ AuLabel(revert opq);
++ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
++ rerr = au_diropq_remove(dentry, bindex);
++ mutex_unlock(h_mtx);
++ if (rerr) {
++ AuIOErr("%.*s reverting diropq failed(%d, %d)\n",
++ AuDLNPair(dentry), err, rerr);
++ err = -EIO;
++ }
++ }
+
-+#if 0 /* unused yet */
-+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
-+ unsigned long arg)
-+{
-+ return aufs_ioctl_nondir(file, cmd, (unsigned long)compat_ptr(arg));
++out_dir:
++ AuLabel(revert dir);
++ rerr = vfsub_rmdir(au_pinned_h_dir(&a->pin), &h_path);
++ if (rerr) {
++ AuIOErr("%.*s reverting dir failed(%d, %d)\n",
++ AuDLNPair(dentry), err, rerr);
++ err = -EIO;
++ }
++ au_dtime_revert(&a->dt);
++out_unpin:
++ au_unpin(&a->pin);
++ dput(wh_dentry);
++out_parent:
++ di_write_unlock(parent);
++out_unlock:
++ if (unlikely(err)) {
++ au_update_dbstart(dentry);
++ d_drop(dentry);
++ }
++ aufs_read_unlock(dentry, AuLock_DW);
++out_free:
++ kfree(a);
++out:
++ return err;
+}
-+#endif
-+#endif
-diff -urN a/fs/aufs/i_op_add.c b/fs/aufs/i_op_add.c
---- a/fs/aufs/i_op_add.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/i_op_add.c 2011-03-06 23:28:02.620413138 +0000
-@@ -0,0 +1,702 @@
+--- a/fs/aufs/i_op_del.c 1970-01-01 01:00:00.000000000 +0100
++++ b/fs/aufs/i_op_del.c 2011-02-12 16:30:08.944127798 +0000
+@@ -0,0 +1,481 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
+ *
@@ -13674,107 +13959,87 @@
+ */
+
+/*
-+ * inode operations (add entry)
++ * inode operations (del entry)
+ */
+
+#include "aufs.h"
+
+/*
-+ * final procedure of adding a new entry, except link(2).
-+ * remove whiteout, instantiate, copyup the parent dir's times and size
-+ * and update version.
-+ * if it failed, re-create the removed whiteout.
++ * decide if a new whiteout for @dentry is necessary or not.
++ * when it is necessary, prepare the parent dir for the upper branch whose
++ * branch index is @bcpup for creation. the actual creation of the whiteout will
++ * be done by caller.
++ * return value:
++ * 0: wh is unnecessary
++ * plus: wh is necessary
++ * minus: error
+ */
-+static int epilog(struct inode *dir, aufs_bindex_t bindex,
-+ struct dentry *wh_dentry, struct dentry *dentry)
++int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup)
+{
-+ int err, rerr;
-+ aufs_bindex_t bwh;
-+ struct path h_path;
-+ struct inode *inode, *h_dir;
-+ struct dentry *wh;
++ int need_wh, err;
++ aufs_bindex_t bstart;
++ struct super_block *sb;
+
-+ bwh = -1;
-+ if (wh_dentry) {
-+ h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */
-+ IMustLock(h_dir);
-+ AuDebugOn(au_h_iptr(dir, bindex) != h_dir);
-+ bwh = au_dbwh(dentry);
-+ h_path.dentry = wh_dentry;
-+ h_path.mnt = au_sbr_mnt(dir->i_sb, bindex);
-+ err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path,
-+ dentry);
++ sb = dentry->d_sb;
++ bstart = au_dbstart(dentry);
++ if (*bcpup < 0) {
++ *bcpup = bstart;
++ if (au_test_ro(sb, bstart, dentry->d_inode)) {
++ err = AuWbrCopyup(au_sbi(sb), dentry);
++ *bcpup = err;
++ if (unlikely(err < 0))
++ goto out;
++ }
++ } else
++ AuDebugOn(bstart < *bcpup
++ || au_test_ro(sb, *bcpup, dentry->d_inode));
++ AuDbg("bcpup %d, bstart %d\n", *bcpup, bstart);
++
++ if (*bcpup != bstart) {
++ err = au_cpup_dirs(dentry, *bcpup);
+ if (unlikely(err))
+ goto out;
-+ }
++ need_wh = 1;
++ } else {
++ struct au_dinfo *dinfo, *tmp;
+
-+ inode = au_new_inode(dentry, /*must_new*/1);
-+ if (!IS_ERR(inode)) {
-+ d_instantiate(dentry, inode);
-+ dir = dentry->d_parent->d_inode; /* dir inode is locked */
-+ IMustLock(dir);
-+ if (au_ibstart(dir) == au_dbstart(dentry))
-+ au_cpup_attr_timesizes(dir);
-+ dir->i_version++;
-+ return 0; /* success */
++ need_wh = -ENOMEM;
++ dinfo = au_di(dentry);
++ tmp = au_di_alloc(sb, AuLsc_DI_TMP);
++ if (tmp) {
++ au_di_cp(tmp, dinfo);
++ au_di_swap(tmp, dinfo);
++ /* returns the number of positive dentries */
++ need_wh = au_lkup_dentry(dentry, bstart + 1, /*type*/0,
++ /*nd*/NULL);
++ au_di_swap(tmp, dinfo);
++ au_rw_write_unlock(&tmp->di_rwsem);
++ au_di_free(tmp);
++ }
+ }
-+
-+ err = PTR_ERR(inode);
-+ if (!wh_dentry)
-+ goto out;
-+
-+ /* revert */
-+ /* dir inode is locked */
-+ wh = au_wh_create(dentry, bwh, wh_dentry->d_parent);
-+ rerr = PTR_ERR(wh);
-+ if (IS_ERR(wh)) {
-+ AuIOErr("%.*s reverting whiteout failed(%d, %d)\n",
-+ AuDLNPair(dentry), err, rerr);
-+ err = -EIO;
-+ } else
-+ dput(wh);
++ AuDbg("need_wh %d\n", need_wh);
++ err = need_wh;
+
+out:
+ return err;
+}
+
-+static int au_d_may_add(struct dentry *dentry)
-+{
-+ int err;
-+
-+ err = 0;
-+ if (unlikely(d_unhashed(dentry)))
-+ err = -ENOENT;
-+ if (unlikely(dentry->d_inode))
-+ err = -EEXIST;
-+ return err;
-+}
-+
+/*
-+ * simple tests for the adding inode operations.
++ * simple tests for the del-entry operations.
+ * following the checks in vfs, plus the parent-child relationship.
+ */
-+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
++int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
+ struct dentry *h_parent, int isdir)
+{
+ int err;
+ umode_t h_mode;
-+ struct dentry *h_dentry;
++ struct dentry *h_dentry, *h_latest;
+ struct inode *h_inode;
+
-+ err = -ENAMETOOLONG;
-+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
-+ goto out;
-+
+ h_dentry = au_h_dptr(dentry, bindex);
+ h_inode = h_dentry->d_inode;
-+ if (!dentry->d_inode) {
-+ err = -EEXIST;
-+ if (unlikely(h_inode))
-+ goto out;
-+ } else {
-+ /* rename(2) case */
-+ err = -EIO;
++ if (dentry->d_inode) {
++ err = -ENOENT;
+ if (unlikely(!h_inode || !h_inode->i_nlink))
+ goto out;
+
@@ -13787,1560 +14052,1665 @@
+ err = -ENOTDIR;
+ goto out;
+ }
++ } else {
++ /* rename(2) case */
++ err = -EIO;
++ if (unlikely(h_inode))
++ goto out;
+ }
+
-+ err = 0;
++ err = -ENOENT;
+ /* expected parent dir is locked */
+ if (unlikely(h_parent != h_dentry->d_parent))
-+ err = -EIO;
++ goto out;
++ err = 0;
++
++ /*
++ * rmdir a dir may break the consistency on some filesystem.
++ * let's try heavy test.
++ */
++ err = -EACCES;
++ if (unlikely(au_test_h_perm(h_parent->d_inode, MAY_EXEC | MAY_WRITE)))
++ goto out;
++
++ h_latest = au_sio_lkup_one(&dentry->d_name, h_parent,
++ au_sbr(dentry->d_sb, bindex));
++ err = -EIO;
++ if (IS_ERR(h_latest))
++ goto out;
++ if (h_latest == h_dentry)
++ err = 0;
++ dput(h_latest);
+
+out:
-+ AuTraceErr(err);
+ return err;
+}
+
+/*
-+ * initial procedure of adding a new entry.
-+ * prepare writable branch and the parent dir, lock it,
-+ * and lookup whiteout for the new entry.
++ * decide the branch where we operate for @dentry. the branch index will be set
++ * @rbcpup. after diciding it, 'pin' it and store the timestamps of the parent
++ * dir for reverting.
++ * when a new whiteout is necessary, create it.
+ */
+static struct dentry*
-+lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt,
-+ struct dentry *src_dentry, struct au_pin *pin,
-+ struct au_wr_dir_args *wr_dir_args)
++lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup,
++ struct au_dtime *dt, struct au_pin *pin)
+{
-+ struct dentry *wh_dentry, *h_parent;
++ struct dentry *wh_dentry;
+ struct super_block *sb;
-+ struct au_branch *br;
-+ int err;
++ struct path h_path;
++ int err, need_wh;
+ unsigned int udba;
+ aufs_bindex_t bcpup;
+
-+ AuDbg("%.*s\n", AuDLNPair(dentry));
-+
-+ err = au_wr_dir(dentry, src_dentry, wr_dir_args);
-+ bcpup = err;
-+ wh_dentry = ERR_PTR(err);
-+ if (unlikely(err < 0))
++ need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup);
++ wh_dentry = ERR_PTR(need_wh);
++ if (unlikely(need_wh < 0))
+ goto out;
+
+ sb = dentry->d_sb;
+ udba = au_opt_udba(sb);
++ bcpup = *rbcpup;
+ err = au_pin(pin, dentry, bcpup, udba,
+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
+ wh_dentry = ERR_PTR(err);
+ if (unlikely(err))
+ goto out;
+
-+ h_parent = au_pinned_h_parent(pin);
++ h_path.dentry = au_pinned_h_parent(pin);
+ if (udba != AuOpt_UDBA_NONE
-+ && au_dbstart(dentry) == bcpup)
-+ err = au_may_add(dentry, bcpup, h_parent,
-+ au_ftest_wrdir(wr_dir_args->flags, ISDIR));
-+ else if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
-+ err = -ENAMETOOLONG;
-+ wh_dentry = ERR_PTR(err);
-+ if (unlikely(err))
++ && au_dbstart(dentry) == bcpup) {
++ err = au_may_del(dentry, bcpup, h_path.dentry, isdir);
++ wh_dentry = ERR_PTR(err);
++ if (unlikely(err))
++ goto out_unpin;
++ }
++
++ h_path.mnt = au_sbr_mnt(sb, bcpup);
++ au_dtime_store(dt, au_pinned_parent(pin), &h_path);
++ wh_dentry = NULL;
++ if (!need_wh)
++ goto out; /* success, no need to create whiteout */
++
++ wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry);
++ if (IS_ERR(wh_dentry))
+ goto out_unpin;
+
-+ br = au_sbr(sb, bcpup);
-+ if (dt) {
-+ struct path tmp = {
-+ .dentry = h_parent,
-+ .mnt = br->br_mnt
-+ };
-+ au_dtime_store(dt, au_pinned_parent(pin), &tmp);
++ /* returns with the parent is locked and wh_dentry is dget-ed */
++ goto out; /* success */
++
++out_unpin:
++ au_unpin(pin);
++out:
++ return wh_dentry;
++}
++
++/*
++ * when removing a dir, rename it to a unique temporary whiteout-ed name first
++ * in order to be revertible and save time for removing many child whiteouts
++ * under the dir.
++ * returns 1 when there are too many child whiteout and caller should remove
++ * them asynchronously. returns 0 when the number of children is enough small to
++ * remove now or the branch fs is a remote fs.
++ * otherwise return an error.
++ */
++static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex,
++ struct au_nhash *whlist, struct inode *dir)
++{
++ int rmdir_later, err, dirwh;
++ struct dentry *h_dentry;
++ struct super_block *sb;
++
++ sb = dentry->d_sb;
++ SiMustAnyLock(sb);
++ h_dentry = au_h_dptr(dentry, bindex);
++ err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex));
++ if (unlikely(err))
++ goto out;
++
++ /* stop monitoring */
++ au_hn_free(au_hi(dentry->d_inode, bindex));
++
++ if (!au_test_fs_remote(h_dentry->d_sb)) {
++ dirwh = au_sbi(sb)->si_dirwh;
++ rmdir_later = (dirwh <= 1);
++ if (!rmdir_later)
++ rmdir_later = au_nhash_test_longer_wh(whlist, bindex,
++ dirwh);
++ if (rmdir_later)
++ return rmdir_later;
++ }
++
++ err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist);
++ if (unlikely(err)) {
++ AuIOErr("rmdir %.*s, b%d failed, %d. ignored\n",
++ AuDLNPair(h_dentry), bindex, err);
++ err = 0;
++ }
++
++out:
++ AuTraceErr(err);
++ return err;
++}
++
++/*
++ * final procedure for deleting a entry.
++ * maintain dentry and iattr.
++ */
++static void epilog(struct inode *dir, struct dentry *dentry,
++ aufs_bindex_t bindex)
++{
++ struct inode *inode;
++
++ inode = dentry->d_inode;
++ d_drop(dentry);
++ inode->i_ctime = dir->i_ctime;
++
++ if (au_ibstart(dir) == bindex)
++ au_cpup_attr_timesizes(dir);
++ dir->i_version++;
++}
++
++/*
++ * when an error happened, remove the created whiteout and revert everything.
++ */
++static int do_revert(int err, struct inode *dir, aufs_bindex_t bindex,
++ aufs_bindex_t bwh, struct dentry *wh_dentry,
++ struct dentry *dentry, struct au_dtime *dt)
++{
++ int rerr;
++ struct path h_path = {
++ .dentry = wh_dentry,
++ .mnt = au_sbr_mnt(dir->i_sb, bindex)
++ };
++
++ rerr = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, dentry);
++ if (!rerr) {
++ au_set_dbwh(dentry, bwh);
++ au_dtime_revert(dt);
++ return 0;
++ }
++
++ AuIOErr("%.*s reverting whiteout failed(%d, %d)\n",
++ AuDLNPair(dentry), err, rerr);
++ return -EIO;
++}
++
++/* ---------------------------------------------------------------------- */
++
++int aufs_unlink(struct inode *dir, struct dentry *dentry)
++{
++ int err;
++ aufs_bindex_t bwh, bindex, bstart;
++ struct au_dtime dt;
++ struct au_pin pin;
++ struct path h_path;
++ struct inode *inode, *h_dir;
++ struct dentry *parent, *wh_dentry;
++
++ IMustLock(dir);
++
++ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
++ if (unlikely(err))
++ goto out;
++ err = au_d_hashed_positive(dentry);
++ if (unlikely(err))
++ goto out_unlock;
++ inode = dentry->d_inode;
++ IMustLock(inode);
++ err = -EISDIR;
++ if (unlikely(S_ISDIR(inode->i_mode)))
++ goto out_unlock; /* possible? */
++
++ bstart = au_dbstart(dentry);
++ bwh = au_dbwh(dentry);
++ bindex = -1;
++ parent = dentry->d_parent; /* dir inode is locked */
++ di_write_lock_parent(parent);
++ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &dt, &pin);
++ err = PTR_ERR(wh_dentry);
++ if (IS_ERR(wh_dentry))
++ goto out_parent;
++
++ h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
++ h_path.dentry = au_h_dptr(dentry, bstart);
++ dget(h_path.dentry);
++ if (bindex == bstart) {
++ h_dir = au_pinned_h_dir(&pin);
++ err = vfsub_unlink(h_dir, &h_path, /*force*/0);
++ } else {
++ /* dir inode is locked */
++ h_dir = wh_dentry->d_parent->d_inode;
++ IMustLock(h_dir);
++ err = 0;
++ }
++
++ if (!err) {
++ vfsub_drop_nlink(inode);
++ epilog(dir, dentry, bindex);
++
++ /* update target timestamps */
++ if (bindex == bstart) {
++ vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
++ inode->i_ctime = h_path.dentry->d_inode->i_ctime;
++ } else
++ /* todo: this timestamp may be reverted later */
++ inode->i_ctime = h_dir->i_ctime;
++ goto out_unpin; /* success */
+ }
+
-+ wh_dentry = NULL;
-+ if (bcpup != au_dbwh(dentry))
-+ goto out; /* success */
++ /* revert */
++ if (wh_dentry) {
++ int rerr;
+
-+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
++ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry, &dt);
++ if (rerr)
++ err = rerr;
++ }
+
+out_unpin:
-+ if (IS_ERR(wh_dentry))
-+ au_unpin(pin);
++ au_unpin(&pin);
++ dput(wh_dentry);
++ dput(h_path.dentry);
++out_parent:
++ di_write_unlock(parent);
++out_unlock:
++ aufs_read_unlock(dentry, AuLock_DW);
+out:
-+ return wh_dentry;
++ return err;
+}
+
-+/* ---------------------------------------------------------------------- */
-+
-+enum { Mknod, Symlink, Creat };
-+struct simple_arg {
-+ int type;
-+ union {
-+ struct {
-+ int mode;
-+ struct nameidata *nd;
-+ } c;
-+ struct {
-+ const char *symname;
-+ } s;
-+ struct {
-+ int mode;
-+ dev_t dev;
-+ } m;
-+ } u;
-+};
-+
-+static int add_simple(struct inode *dir, struct dentry *dentry,
-+ struct simple_arg *arg)
++int aufs_rmdir(struct inode *dir, struct dentry *dentry)
+{
-+ int err;
-+ aufs_bindex_t bstart;
-+ unsigned char created;
++ int err, rmdir_later;
++ aufs_bindex_t bwh, bindex, bstart;
+ struct au_dtime dt;
+ struct au_pin pin;
-+ struct path h_path;
-+ struct dentry *wh_dentry, *parent;
-+ struct inode *h_dir;
-+ struct au_wr_dir_args wr_dir_args = {
-+ .force_btgt = -1,
-+ .flags = AuWrDir_ADD_ENTRY
-+ };
++ struct inode *inode;
++ struct dentry *parent, *wh_dentry, *h_dentry;
++ struct au_whtmp_rmdir *args;
+
-+ AuDbg("%.*s\n", AuDLNPair(dentry));
+ IMustLock(dir);
+
-+ parent = dentry->d_parent; /* dir inode is locked */
-+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
++ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
+ if (unlikely(err))
+ goto out;
-+ err = au_d_may_add(dentry);
-+ if (unlikely(err))
++
++ /* VFS already unhashes it */
++ inode = dentry->d_inode;
++ err = -ENOENT;
++ if (unlikely(!inode || !inode->i_nlink
++ || IS_DEADDIR(inode)))
++ goto out_unlock;
++ IMustLock(inode);
++ err = -ENOTDIR;
++ if (unlikely(!S_ISDIR(inode->i_mode)))
++ goto out_unlock; /* possible? */
++
++ err = -ENOMEM;
++ args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS);
++ if (unlikely(!args))
+ goto out_unlock;
++
++ parent = dentry->d_parent; /* dir inode is locked */
+ di_write_lock_parent(parent);
-+ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, /*src_dentry*/NULL, &pin,
-+ &wr_dir_args);
++ err = au_test_empty(dentry, &args->whlist);
++ if (unlikely(err))
++ goto out_parent;
++
++ bstart = au_dbstart(dentry);
++ bwh = au_dbwh(dentry);
++ bindex = -1;
++ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &dt, &pin);
+ err = PTR_ERR(wh_dentry);
+ if (IS_ERR(wh_dentry))
+ goto out_parent;
+
-+ bstart = au_dbstart(dentry);
-+ h_path.dentry = au_h_dptr(dentry, bstart);
-+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
-+ h_dir = au_pinned_h_dir(&pin);
-+ switch (arg->type) {
-+ case Creat:
-+ err = vfsub_create(h_dir, &h_path, arg->u.c.mode);
-+ break;
-+ case Symlink:
-+ err = vfsub_symlink(h_dir, &h_path, arg->u.s.symname);
-+ break;
-+ case Mknod:
-+ err = vfsub_mknod(h_dir, &h_path, arg->u.m.mode, arg->u.m.dev);
-+ break;
-+ default:
-+ BUG();
++ h_dentry = au_h_dptr(dentry, bstart);
++ dget(h_dentry);
++ rmdir_later = 0;
++ if (bindex == bstart) {
++ err = renwh_and_rmdir(dentry, bstart, &args->whlist, dir);
++ if (err > 0) {
++ rmdir_later = err;
++ err = 0;
++ }
++ } else {
++ /* stop monitoring */
++ au_hn_free(au_hi(inode, bstart));
++
++ /* dir inode is locked */
++ IMustLock(wh_dentry->d_parent->d_inode);
++ err = 0;
++ }
++
++ if (!err) {
++ vfsub_dead_dir(inode);
++ au_set_dbdiropq(dentry, -1);
++ epilog(dir, dentry, bindex);
++
++ if (rmdir_later) {
++ au_whtmp_kick_rmdir(dir, bstart, h_dentry, args);
++ args = NULL;
++ }
++
++ goto out_unpin; /* success */
+ }
-+ created = !err;
-+ if (!err)
-+ err = epilog(dir, bstart, wh_dentry, dentry);
+
+ /* revert */
-+ if (unlikely(created && err && h_path.dentry->d_inode)) {
++ AuLabel(revert);
++ if (wh_dentry) {
+ int rerr;
-+ rerr = vfsub_unlink(h_dir, &h_path, /*force*/0);
-+ if (rerr) {
-+ AuIOErr("%.*s revert failure(%d, %d)\n",
-+ AuDLNPair(dentry), err, rerr);
-+ err = -EIO;
-+ }
-+ au_dtime_revert(&dt);
++
++ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry, &dt);
++ if (rerr)
++ err = rerr;
+ }
+
++out_unpin:
+ au_unpin(&pin);
+ dput(wh_dentry);
-+
++ dput(h_dentry);
+out_parent:
+ di_write_unlock(parent);
++ if (args)
++ au_whtmp_rmdir_free(args);
+out_unlock:
-+ if (unlikely(err)) {
-+ au_update_dbstart(dentry);
-+ d_drop(dentry);
-+ }
+ aufs_read_unlock(dentry, AuLock_DW);
+out:
++ AuTraceErr(err);
+ return err;
+}
+--- a/fs/aufs/i_op_ren.c 1970-01-01 01:00:00.000000000 +0100
++++ b/fs/aufs/i_op_ren.c 2011-02-12 16:30:08.944127798 +0000
+@@ -0,0 +1,1017 @@
++/*
++ * Copyright (C) 2005-2011 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++ */
+
-+int aufs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
-+{
-+ struct simple_arg arg = {
-+ .type = Mknod,
-+ .u.m = {
-+ .mode = mode,
-+ .dev = dev
-+ }
-+ };
-+ return add_simple(dir, dentry, &arg);
-+}
++/*
++ * inode operation (rename entry)
++ * todo: this is crazy monster
++ */
++
++#include "aufs.h"
++
++enum { AuSRC, AuDST, AuSrcDst };
++enum { AuPARENT, AuCHILD, AuParentChild };
++
++#define AuRen_ISDIR 1
++#define AuRen_ISSAMEDIR (1 << 1)
++#define AuRen_WHSRC (1 << 2)
++#define AuRen_WHDST (1 << 3)
++#define AuRen_MNT_WRITE (1 << 4)
++#define AuRen_DT_DSTDIR (1 << 5)
++#define AuRen_DIROPQ (1 << 6)
++#define AuRen_CPUP (1 << 7)
++#define au_ftest_ren(flags, name) ((flags) & AuRen_##name)
++#define au_fset_ren(flags, name) \
++ do { (flags) |= AuRen_##name; } while (0)
++#define au_fclr_ren(flags, name) \
++ do { (flags) &= ~AuRen_##name; } while (0)
++
++struct au_ren_args {
++ struct {
++ struct dentry *dentry, *h_dentry, *parent, *h_parent,
++ *wh_dentry;
++ struct inode *dir, *inode;
++ struct au_hinode *hdir;
++ struct au_dtime dt[AuParentChild];
++ aufs_bindex_t bstart;
++ } sd[AuSrcDst];
++
++#define src_dentry sd[AuSRC].dentry
++#define src_dir sd[AuSRC].dir
++#define src_inode sd[AuSRC].inode
++#define src_h_dentry sd[AuSRC].h_dentry
++#define src_parent sd[AuSRC].parent
++#define src_h_parent sd[AuSRC].h_parent
++#define src_wh_dentry sd[AuSRC].wh_dentry
++#define src_hdir sd[AuSRC].hdir
++#define src_h_dir sd[AuSRC].hdir->hi_inode
++#define src_dt sd[AuSRC].dt
++#define src_bstart sd[AuSRC].bstart
++
++#define dst_dentry sd[AuDST].dentry
++#define dst_dir sd[AuDST].dir
++#define dst_inode sd[AuDST].inode
++#define dst_h_dentry sd[AuDST].h_dentry
++#define dst_parent sd[AuDST].parent
++#define dst_h_parent sd[AuDST].h_parent
++#define dst_wh_dentry sd[AuDST].wh_dentry
++#define dst_hdir sd[AuDST].hdir
++#define dst_h_dir sd[AuDST].hdir->hi_inode
++#define dst_dt sd[AuDST].dt
++#define dst_bstart sd[AuDST].bstart
+
-+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
-+{
-+ struct simple_arg arg = {
-+ .type = Symlink,
-+ .u.s.symname = symname
-+ };
-+ return add_simple(dir, dentry, &arg);
-+}
++ struct dentry *h_trap;
++ struct au_branch *br;
++ struct au_hinode *src_hinode;
++ struct path h_path;
++ struct au_nhash whlist;
++ aufs_bindex_t btgt, src_bwh, src_bdiropq;
+
-+int aufs_create(struct inode *dir, struct dentry *dentry, int mode,
-+ struct nameidata *nd)
-+{
-+ struct simple_arg arg = {
-+ .type = Creat,
-+ .u.c = {
-+ .mode = mode,
-+ .nd = nd
-+ }
-+ };
-+ return add_simple(dir, dentry, &arg);
-+}
++ unsigned int flags;
++
++ struct au_whtmp_rmdir *thargs;
++ struct dentry *h_dst;
++};
+
+/* ---------------------------------------------------------------------- */
+
-+struct au_link_args {
-+ aufs_bindex_t bdst, bsrc;
-+ struct au_pin pin;
-+ struct path h_path;
-+ struct dentry *src_parent, *parent;
-+};
++/*
++ * functions for reverting.
++ * when an error happened in a single rename systemcall, we should revert
++ * everything as if nothing happend.
++ * we don't need to revert the copied-up/down the parent dir since they are
++ * harmless.
++ */
+
-+static int au_cpup_before_link(struct dentry *src_dentry,
-+ struct au_link_args *a)
++#define RevertFailure(fmt, ...) do { \
++ AuIOErr("revert failure: " fmt " (%d, %d)\n", \
++ ##__VA_ARGS__, err, rerr); \
++ err = -EIO; \
++} while (0)
++
++static void au_ren_rev_diropq(int err, struct au_ren_args *a)
+{
-+ int err;
-+ struct dentry *h_src_dentry;
-+ struct mutex *h_mtx;
-+ struct file *h_file;
++ int rerr;
+
-+ di_read_lock_parent(a->src_parent, AuLock_IR);
-+ err = au_test_and_cpup_dirs(src_dentry, a->bdst);
-+ if (unlikely(err))
-+ goto out;
++ au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
++ rerr = au_diropq_remove(a->src_dentry, a->btgt);
++ au_hn_imtx_unlock(a->src_hinode);
++ au_set_dbdiropq(a->src_dentry, a->src_bdiropq);
++ if (rerr)
++ RevertFailure("remove diropq %.*s", AuDLNPair(a->src_dentry));
++}
+
-+ h_src_dentry = au_h_dptr(src_dentry, a->bsrc);
-+ h_mtx = &h_src_dentry->d_inode->i_mutex;
-+ err = au_pin(&a->pin, src_dentry, a->bdst,
-+ au_opt_udba(src_dentry->d_sb),
-+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
-+ if (unlikely(err))
-+ goto out;
-+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
-+ h_file = au_h_open_pre(src_dentry, a->bsrc);
-+ if (IS_ERR(h_file)) {
-+ err = PTR_ERR(h_file);
-+ h_file = NULL;
-+ } else
-+ err = au_sio_cpup_simple(src_dentry, a->bdst, a->bsrc,
-+ AuCpup_DTIME /* | AuCpup_KEEPLINO */);
-+ mutex_unlock(h_mtx);
-+ au_h_open_post(src_dentry, a->bsrc, h_file);
-+ au_unpin(&a->pin);
++static void au_ren_rev_rename(int err, struct au_ren_args *a)
++{
++ int rerr;
+
-+out:
-+ di_read_unlock(a->src_parent, AuLock_IR);
-+ return err;
++ a->h_path.dentry = au_lkup_one(&a->src_dentry->d_name, a->src_h_parent,
++ a->br, /*nd*/NULL);
++ rerr = PTR_ERR(a->h_path.dentry);
++ if (IS_ERR(a->h_path.dentry)) {
++ RevertFailure("au_lkup_one %.*s", AuDLNPair(a->src_dentry));
++ return;
++ }
++
++ rerr = vfsub_rename(a->dst_h_dir,
++ au_h_dptr(a->src_dentry, a->btgt),
++ a->src_h_dir, &a->h_path);
++ d_drop(a->h_path.dentry);
++ dput(a->h_path.dentry);
++ /* au_set_h_dptr(a->src_dentry, a->btgt, NULL); */
++ if (rerr)
++ RevertFailure("rename %.*s", AuDLNPair(a->src_dentry));
+}
+
-+static int au_cpup_or_link(struct dentry *src_dentry, struct au_link_args *a)
++static void au_ren_rev_cpup(int err, struct au_ren_args *a)
+{
-+ int err;
-+ unsigned char plink;
-+ struct inode *h_inode, *inode;
-+ struct dentry *h_src_dentry;
-+ struct super_block *sb;
-+ struct file *h_file;
++ int rerr;
+
-+ plink = 0;
-+ h_inode = NULL;
-+ sb = src_dentry->d_sb;
-+ inode = src_dentry->d_inode;
-+ if (au_ibstart(inode) <= a->bdst)
-+ h_inode = au_h_iptr(inode, a->bdst);
-+ if (!h_inode || !h_inode->i_nlink) {
-+ /* copyup src_dentry as the name of dentry. */
-+ au_set_dbstart(src_dentry, a->bdst);
-+ au_set_h_dptr(src_dentry, a->bdst, dget(a->h_path.dentry));
-+ h_inode = au_h_dptr(src_dentry, a->bsrc)->d_inode;
-+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
-+ h_file = au_h_open_pre(src_dentry, a->bsrc);
-+ if (IS_ERR(h_file)) {
-+ err = PTR_ERR(h_file);
-+ h_file = NULL;
-+ } else
-+ err = au_sio_cpup_single(src_dentry, a->bdst, a->bsrc,
-+ -1, AuCpup_KEEPLINO,
-+ a->parent);
-+ mutex_unlock(&h_inode->i_mutex);
-+ au_h_open_post(src_dentry, a->bsrc, h_file);
-+ au_set_h_dptr(src_dentry, a->bdst, NULL);
-+ au_set_dbstart(src_dentry, a->bsrc);
-+ } else {
-+ /* the inode of src_dentry already exists on a.bdst branch */
-+ h_src_dentry = d_find_alias(h_inode);
-+ if (!h_src_dentry && au_plink_test(inode)) {
-+ plink = 1;
-+ h_src_dentry = au_plink_lkup(inode, a->bdst);
-+ err = PTR_ERR(h_src_dentry);
-+ if (IS_ERR(h_src_dentry))
-+ goto out;
++ a->h_path.dentry = a->dst_h_dentry;
++ rerr = vfsub_unlink(a->dst_h_dir, &a->h_path, /*force*/0);
++ au_set_h_dptr(a->src_dentry, a->btgt, NULL);
++ au_set_dbstart(a->src_dentry, a->src_bstart);
++ if (rerr)
++ RevertFailure("unlink %.*s", AuDLNPair(a->dst_h_dentry));
++}
+
-+ if (unlikely(!h_src_dentry->d_inode)) {
-+ dput(h_src_dentry);
-+ h_src_dentry = NULL;
-+ }
++static void au_ren_rev_whtmp(int err, struct au_ren_args *a)
++{
++ int rerr;
+
-+ }
-+ if (h_src_dentry) {
-+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
-+ &a->h_path);
-+ dput(h_src_dentry);
-+ } else {
-+ AuIOErr("no dentry found for hi%lu on b%d\n",
-+ h_inode->i_ino, a->bdst);
-+ err = -EIO;
-+ }
++ a->h_path.dentry = au_lkup_one(&a->dst_dentry->d_name, a->dst_h_parent,
++ a->br, /*nd*/NULL);
++ rerr = PTR_ERR(a->h_path.dentry);
++ if (IS_ERR(a->h_path.dentry)) {
++ RevertFailure("lookup %.*s", AuDLNPair(a->dst_dentry));
++ return;
++ }
++ if (a->h_path.dentry->d_inode) {
++ d_drop(a->h_path.dentry);
++ dput(a->h_path.dentry);
++ return;
+ }
+
-+ if (!err && !plink)
-+ au_plink_append(inode, a->bdst, a->h_path.dentry);
-+
-+out:
-+ return err;
++ rerr = vfsub_rename(a->dst_h_dir, a->h_dst, a->dst_h_dir, &a->h_path);
++ d_drop(a->h_path.dentry);
++ dput(a->h_path.dentry);
++ if (!rerr)
++ au_set_h_dptr(a->dst_dentry, a->btgt, dget(a->h_dst));
++ else
++ RevertFailure("rename %.*s", AuDLNPair(a->h_dst));
+}
+
-+int aufs_link(struct dentry *src_dentry, struct inode *dir,
-+ struct dentry *dentry)
++static void au_ren_rev_whsrc(int err, struct au_ren_args *a)
+{
-+ int err, rerr;
-+ struct au_dtime dt;
-+ struct au_link_args *a;
-+ struct dentry *wh_dentry, *h_src_dentry;
-+ struct inode *inode;
-+ struct super_block *sb;
-+ struct au_wr_dir_args wr_dir_args = {
-+ /* .force_btgt = -1, */
-+ .flags = AuWrDir_ADD_ENTRY
-+ };
-+
-+ IMustLock(dir);
-+ inode = src_dentry->d_inode;
-+ IMustLock(inode);
-+
-+ err = -ENOMEM;
-+ a = kzalloc(sizeof(*a), GFP_NOFS);
-+ if (unlikely(!a))
-+ goto out;
++ int rerr;
+
-+ a->parent = dentry->d_parent; /* dir inode is locked */
-+ err = aufs_read_and_write_lock2(dentry, src_dentry,
-+ AuLock_NOPLM | AuLock_GEN);
-+ if (unlikely(err))
-+ goto out_kfree;
-+ err = au_d_hashed_positive(src_dentry);
-+ if (unlikely(err))
-+ goto out_unlock;
-+ err = au_d_may_add(dentry);
-+ if (unlikely(err))
-+ goto out_unlock;
++ a->h_path.dentry = a->src_wh_dentry;
++ rerr = au_wh_unlink_dentry(a->src_h_dir, &a->h_path, a->src_dentry);
++ au_set_dbwh(a->src_dentry, a->src_bwh);
++ if (rerr)
++ RevertFailure("unlink %.*s", AuDLNPair(a->src_wh_dentry));
++}
++#undef RevertFailure
+
-+ a->src_parent = dget_parent(src_dentry);
-+ wr_dir_args.force_btgt = au_dbstart(src_dentry);
++/* ---------------------------------------------------------------------- */
+
-+ di_write_lock_parent(a->parent);
-+ wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt);
-+ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin,
-+ &wr_dir_args);
-+ err = PTR_ERR(wh_dentry);
-+ if (IS_ERR(wh_dentry))
-+ goto out_parent;
++/*
++ * when we have to copyup the renaming entry, do it with the rename-target name
++ * in order to minimize the cost (the later actual rename is unnecessary).
++ * otherwise rename it on the target branch.
++ */
++static int au_ren_or_cpup(struct au_ren_args *a)
++{
++ int err;
++ struct dentry *d;
+
-+ err = 0;
-+ sb = dentry->d_sb;
-+ a->bdst = au_dbstart(dentry);
-+ a->h_path.dentry = au_h_dptr(dentry, a->bdst);
-+ a->h_path.mnt = au_sbr_mnt(sb, a->bdst);
-+ a->bsrc = au_dbstart(src_dentry);
-+ if (au_opt_test(au_mntflags(sb), PLINK)) {
-+ if (a->bdst < a->bsrc
-+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */)
-+ err = au_cpup_or_link(src_dentry, a);
-+ else {
-+ h_src_dentry = au_h_dptr(src_dentry, a->bdst);
-+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
-+ &a->h_path);
-+ }
++ d = a->src_dentry;
++ if (au_dbstart(d) == a->btgt) {
++ a->h_path.dentry = a->dst_h_dentry;
++ if (au_ftest_ren(a->flags, DIROPQ)
++ && au_dbdiropq(d) == a->btgt)
++ au_fclr_ren(a->flags, DIROPQ);
++ AuDebugOn(au_dbstart(d) != a->btgt);
++ err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt),
++ a->dst_h_dir, &a->h_path);
+ } else {
-+ /*
-+ * copyup src_dentry to the branch we process,
-+ * and then link(2) to it.
-+ */
-+ if (a->bdst < a->bsrc
-+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) {
-+ au_unpin(&a->pin);
-+ di_write_unlock(a->parent);
-+ err = au_cpup_before_link(src_dentry, a);
-+ di_write_lock_parent(a->parent);
-+ if (!err)
-+ err = au_pin(&a->pin, dentry, a->bdst,
-+ au_opt_udba(sb),
-+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
-+ if (unlikely(err))
-+ goto out_wh;
-+ }
++ struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex;
++ struct file *h_file;
++
++ au_fset_ren(a->flags, CPUP);
++ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
++ au_set_dbstart(d, a->btgt);
++ au_set_h_dptr(d, a->btgt, dget(a->dst_h_dentry));
++ h_file = au_h_open_pre(d, a->src_bstart);
++ if (IS_ERR(h_file)) {
++ err = PTR_ERR(h_file);
++ h_file = NULL;
++ } else
++ err = au_sio_cpup_single(d, a->btgt, a->src_bstart, -1,
++ !AuCpup_DTIME, a->dst_parent);
++ mutex_unlock(h_mtx);
++ au_h_open_post(d, a->src_bstart, h_file);
+ if (!err) {
-+ h_src_dentry = au_h_dptr(src_dentry, a->bdst);
-+ err = -ENOENT;
-+ if (h_src_dentry && h_src_dentry->d_inode)
-+ err = vfsub_link(h_src_dentry,
-+ au_pinned_h_dir(&a->pin),
-+ &a->h_path);
++ d = a->dst_dentry;
++ au_set_h_dptr(d, a->btgt, NULL);
++ au_update_dbstart(d);
++ } else {
++ au_set_h_dptr(d, a->btgt, NULL);
++ au_set_dbstart(d, a->src_bstart);
+ }
+ }
-+ if (unlikely(err))
-+ goto out_unpin;
++ if (!err && a->h_dst)
++ /* it will be set to dinfo later */
++ dget(a->h_dst);
+
-+ if (wh_dentry) {
-+ a->h_path.dentry = wh_dentry;
-+ err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path,
-+ dentry);
++ return err;
++}
++
++/* cf. aufs_rmdir() */
++static int au_ren_del_whtmp(struct au_ren_args *a)
++{
++ int err;
++ struct inode *dir;
++
++ dir = a->dst_dir;
++ SiMustAnyLock(dir->i_sb);
++ if (!au_nhash_test_longer_wh(&a->whlist, a->btgt,
++ au_sbi(dir->i_sb)->si_dirwh)
++ || au_test_fs_remote(a->h_dst->d_sb)) {
++ err = au_whtmp_rmdir(dir, a->btgt, a->h_dst, &a->whlist);
+ if (unlikely(err))
-+ goto out_revert;
++ pr_warning("failed removing whtmp dir %.*s (%d), "
++ "ignored.\n", AuDLNPair(a->h_dst), err);
++ } else {
++ au_nhash_wh_free(&a->thargs->whlist);
++ a->thargs->whlist = a->whlist;
++ a->whlist.nh_num = 0;
++ au_whtmp_kick_rmdir(dir, a->btgt, a->h_dst, a->thargs);
++ dput(a->h_dst);
++ a->thargs = NULL;
+ }
+
-+ dir->i_version++;
-+ if (au_ibstart(dir) == au_dbstart(dentry))
-+ au_cpup_attr_timesizes(dir);
-+ inc_nlink(inode);
-+ inode->i_ctime = dir->i_ctime;
-+ d_instantiate(dentry, au_igrab(inode));
-+ if (d_unhashed(a->h_path.dentry))
-+ /* some filesystem calls d_drop() */
-+ d_drop(dentry);
-+ goto out_unpin; /* success */
++ return 0;
++}
++
++/* make it 'opaque' dir. */
++static int au_ren_diropq(struct au_ren_args *a)
++{
++ int err;
++ struct dentry *diropq;
++
++ err = 0;
++ a->src_bdiropq = au_dbdiropq(a->src_dentry);
++ a->src_hinode = au_hi(a->src_inode, a->btgt);
++ au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
++ diropq = au_diropq_create(a->src_dentry, a->btgt);
++ au_hn_imtx_unlock(a->src_hinode);
++ if (IS_ERR(diropq))
++ err = PTR_ERR(diropq);
++ dput(diropq);
+
-+out_revert:
-+ rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path, /*force*/0);
-+ if (unlikely(rerr)) {
-+ AuIOErr("%.*s reverting failed(%d, %d)\n",
-+ AuDLNPair(dentry), err, rerr);
-+ err = -EIO;
-+ }
-+ au_dtime_revert(&dt);
-+out_unpin:
-+ au_unpin(&a->pin);
-+out_wh:
-+ dput(wh_dentry);
-+out_parent:
-+ di_write_unlock(a->parent);
-+ dput(a->src_parent);
-+out_unlock:
-+ if (unlikely(err)) {
-+ au_update_dbstart(dentry);
-+ d_drop(dentry);
-+ }
-+ aufs_read_and_write_unlock2(dentry, src_dentry);
-+out_kfree:
-+ kfree(a);
-+out:
+ return err;
+}
+
-+int aufs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
++static int do_rename(struct au_ren_args *a)
+{
-+ int err, rerr;
-+ aufs_bindex_t bindex;
-+ unsigned char diropq;
-+ struct path h_path;
-+ struct dentry *wh_dentry, *parent, *opq_dentry;
-+ struct mutex *h_mtx;
-+ struct super_block *sb;
-+ struct {
-+ struct au_pin pin;
-+ struct au_dtime dt;
-+ } *a; /* reduce the stack usage */
-+ struct au_wr_dir_args wr_dir_args = {
-+ .force_btgt = -1,
-+ .flags = AuWrDir_ADD_ENTRY | AuWrDir_ISDIR
-+ };
-+
-+ IMustLock(dir);
++ int err;
++ struct dentry *d, *h_d;
+
-+ err = -ENOMEM;
-+ a = kmalloc(sizeof(*a), GFP_NOFS);
-+ if (unlikely(!a))
-+ goto out;
++ /* prepare workqueue args for asynchronous rmdir */
++ h_d = a->dst_h_dentry;
++ if (au_ftest_ren(a->flags, ISDIR) && h_d->d_inode) {
++ err = -ENOMEM;
++ a->thargs = au_whtmp_rmdir_alloc(a->src_dentry->d_sb, GFP_NOFS);
++ if (unlikely(!a->thargs))
++ goto out;
++ a->h_dst = dget(h_d);
++ }
+
-+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
-+ if (unlikely(err))
-+ goto out_free;
-+ err = au_d_may_add(dentry);
-+ if (unlikely(err))
-+ goto out_unlock;
++ /* create whiteout for src_dentry */
++ if (au_ftest_ren(a->flags, WHSRC)) {
++ a->src_bwh = au_dbwh(a->src_dentry);
++ AuDebugOn(a->src_bwh >= 0);
++ a->src_wh_dentry
++ = au_wh_create(a->src_dentry, a->btgt, a->src_h_parent);
++ err = PTR_ERR(a->src_wh_dentry);
++ if (IS_ERR(a->src_wh_dentry))
++ goto out_thargs;
++ }
+
-+ parent = dentry->d_parent; /* dir inode is locked */
-+ di_write_lock_parent(parent);
-+ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
-+ &a->pin, &wr_dir_args);
-+ err = PTR_ERR(wh_dentry);
-+ if (IS_ERR(wh_dentry))
-+ goto out_parent;
++ /* lookup whiteout for dentry */
++ if (au_ftest_ren(a->flags, WHDST)) {
++ h_d = au_wh_lkup(a->dst_h_parent, &a->dst_dentry->d_name,
++ a->br);
++ err = PTR_ERR(h_d);
++ if (IS_ERR(h_d))
++ goto out_whsrc;
++ if (!h_d->d_inode)
++ dput(h_d);
++ else
++ a->dst_wh_dentry = h_d;
++ }
+
-+ sb = dentry->d_sb;
-+ bindex = au_dbstart(dentry);
-+ h_path.dentry = au_h_dptr(dentry, bindex);
-+ h_path.mnt = au_sbr_mnt(sb, bindex);
-+ err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode);
-+ if (unlikely(err))
-+ goto out_unpin;
++ /* rename dentry to tmpwh */
++ if (a->thargs) {
++ err = au_whtmp_ren(a->dst_h_dentry, a->br);
++ if (unlikely(err))
++ goto out_whdst;
+
-+ /* make the dir opaque */
-+ diropq = 0;
-+ h_mtx = &h_path.dentry->d_inode->i_mutex;
-+ if (wh_dentry
-+ || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) {
-+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
-+ opq_dentry = au_diropq_create(dentry, bindex);
-+ mutex_unlock(h_mtx);
-+ err = PTR_ERR(opq_dentry);
-+ if (IS_ERR(opq_dentry))
-+ goto out_dir;
-+ dput(opq_dentry);
-+ diropq = 1;
++ d = a->dst_dentry;
++ au_set_h_dptr(d, a->btgt, NULL);
++ err = au_lkup_neg(d, a->btgt);
++ if (unlikely(err))
++ goto out_whtmp;
++ a->dst_h_dentry = au_h_dptr(d, a->btgt);
+ }
+
-+ err = epilog(dir, bindex, wh_dentry, dentry);
-+ if (!err) {
-+ inc_nlink(dir);
-+ goto out_unpin; /* success */
-+ }
++ /* cpup src */
++ if (a->dst_h_dentry->d_inode && a->src_bstart != a->btgt) {
++ struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex;
++ struct file *h_file;
+
-+ /* revert */
-+ if (diropq) {
-+ AuLabel(revert opq);
+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
-+ rerr = au_diropq_remove(dentry, bindex);
++ AuDebugOn(au_dbstart(a->src_dentry) != a->src_bstart);
++ h_file = au_h_open_pre(a->src_dentry, a->src_bstart);
++ if (IS_ERR(h_file)) {
++ err = PTR_ERR(h_file);
++ h_file = NULL;
++ } else
++ err = au_sio_cpup_simple(a->src_dentry, a->btgt, -1,
++ !AuCpup_DTIME);
+ mutex_unlock(h_mtx);
-+ if (rerr) {
-+ AuIOErr("%.*s reverting diropq failed(%d, %d)\n",
-+ AuDLNPair(dentry), err, rerr);
-+ err = -EIO;
-+ }
-+ }
-+
-+out_dir:
-+ AuLabel(revert dir);
-+ rerr = vfsub_rmdir(au_pinned_h_dir(&a->pin), &h_path);
-+ if (rerr) {
-+ AuIOErr("%.*s reverting dir failed(%d, %d)\n",
-+ AuDLNPair(dentry), err, rerr);
-+ err = -EIO;
-+ }
-+ au_dtime_revert(&a->dt);
-+out_unpin:
-+ au_unpin(&a->pin);
-+ dput(wh_dentry);
-+out_parent:
-+ di_write_unlock(parent);
-+out_unlock:
-+ if (unlikely(err)) {
-+ au_update_dbstart(dentry);
-+ d_drop(dentry);
++ au_h_open_post(a->src_dentry, a->src_bstart, h_file);
++ if (unlikely(err))
++ goto out_whtmp;
+ }
-+ aufs_read_unlock(dentry, AuLock_DW);
-+out_free:
-+ kfree(a);
-+out:
-+ return err;
-+}
-diff -urN a/fs/aufs/i_op.c b/fs/aufs/i_op.c
---- a/fs/aufs/i_op.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/i_op.c 2011-03-06 23:28:02.616413258 +0000
-@@ -0,0 +1,976 @@
-+/*
-+ * Copyright (C) 2005-2011 Junjiro R. Okajima
-+ *
-+ * This program, aufs is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or
-+ * (at your option) any later version.
-+ *
-+ * This program is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with this program; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+/*
-+ * inode operations (except add/del/rename)
-+ */
+
-+#include <linux/device_cgroup.h>
-+#include <linux/fs_stack.h>
-+#include <linux/mm.h>
-+#include <linux/namei.h>
-+#include <linux/security.h>
-+#include <linux/uaccess.h>
-+#include "aufs.h"
++ /* rename by vfs_rename or cpup */
++ d = a->dst_dentry;
++ if (au_ftest_ren(a->flags, ISDIR)
++ && (a->dst_wh_dentry
++ || au_dbdiropq(d) == a->btgt
++ /* hide the lower to keep xino */
++ || a->btgt < au_dbend(d)
++ || au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ)))
++ au_fset_ren(a->flags, DIROPQ);
++ err = au_ren_or_cpup(a);
++ if (unlikely(err))
++ /* leave the copied-up one */
++ goto out_whtmp;
+
-+static int h_permission(struct inode *h_inode, int mask, unsigned int flags,
-+ struct vfsmount *h_mnt, int brperm)
-+{
-+ int err;
-+ const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
++ /* make dir opaque */
++ if (au_ftest_ren(a->flags, DIROPQ)) {
++ err = au_ren_diropq(a);
++ if (unlikely(err))
++ goto out_rename;
++ }
+
-+ err = -EACCES;
-+ if ((write_mask && IS_IMMUTABLE(h_inode))
-+ || ((mask & MAY_EXEC)
-+ && S_ISREG(h_inode->i_mode)
-+ && ((h_mnt->mnt_flags & MNT_NOEXEC)
-+ || !(h_inode->i_mode & S_IXUGO))))
-+ goto out;
++ /* update target timestamps */
++ AuDebugOn(au_dbstart(a->src_dentry) != a->btgt);
++ a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt);
++ vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
++ a->src_inode->i_ctime = a->h_path.dentry->d_inode->i_ctime;
+
-+ /*
-+ * - skip the lower fs test in the case of write to ro branch.
-+ * - nfs dir permission write check is optimized, but a policy for
-+ * link/rename requires a real check.
-+ */
-+ if ((write_mask && !au_br_writable(brperm))
-+ || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode)
-+ && write_mask && !(mask & MAY_READ))
-+ || !h_inode->i_op->permission) {
-+ /* AuLabel(generic_permission); */
-+ err = generic_permission(h_inode, mask, flags,
-+ h_inode->i_op->check_acl);
-+ } else {
-+ /* AuLabel(h_inode->permission); */
-+ err = h_inode->i_op->permission(h_inode, mask, flags);
-+ AuTraceErr(err);
++ /* remove whiteout for dentry */
++ if (a->dst_wh_dentry) {
++ a->h_path.dentry = a->dst_wh_dentry;
++ err = au_wh_unlink_dentry(a->dst_h_dir, &a->h_path,
++ a->dst_dentry);
++ if (unlikely(err))
++ goto out_diropq;
+ }
+
-+ if (!err)
-+ err = devcgroup_inode_permission(h_inode, mask);
-+ if (!err)
-+ err = security_inode_permission(h_inode, mask);
++ /* remove whtmp */
++ if (a->thargs)
++ au_ren_del_whtmp(a); /* ignore this error */
+
-+#if 0
-+ if (!err) {
-+ /* todo: do we need to call ima_path_check()? */
-+ struct path h_path = {
-+ .dentry =
-+ .mnt = h_mnt
-+ };
-+ err = ima_path_check(&h_path,
-+ mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
-+ IMA_COUNT_LEAVE);
-+ }
-+#endif
++ err = 0;
++ goto out_success;
+
++out_diropq:
++ if (au_ftest_ren(a->flags, DIROPQ))
++ au_ren_rev_diropq(err, a);
++out_rename:
++ if (!au_ftest_ren(a->flags, CPUP))
++ au_ren_rev_rename(err, a);
++ else
++ au_ren_rev_cpup(err, a);
++ dput(a->h_dst);
++out_whtmp:
++ if (a->thargs)
++ au_ren_rev_whtmp(err, a);
++out_whdst:
++ dput(a->dst_wh_dentry);
++ a->dst_wh_dentry = NULL;
++out_whsrc:
++ if (a->src_wh_dentry)
++ au_ren_rev_whsrc(err, a);
++out_success:
++ dput(a->src_wh_dentry);
++ dput(a->dst_wh_dentry);
++out_thargs:
++ if (a->thargs) {
++ dput(a->h_dst);
++ au_whtmp_rmdir_free(a->thargs);
++ a->thargs = NULL;
++ }
+out:
+ return err;
+}
+
-+static int aufs_permission(struct inode *inode, int mask, unsigned int flags)
-+{
-+ int err;
-+ aufs_bindex_t bindex, bend;
-+ const unsigned char isdir = !!S_ISDIR(inode->i_mode),
-+ write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
-+ struct inode *h_inode;
-+ struct super_block *sb;
-+ struct au_branch *br;
-+
-+ /* todo: support rcu-walk? */
-+ if (flags & IPERM_FLAG_RCU)
-+ return -ECHILD;
-+
-+ sb = inode->i_sb;
-+ si_read_lock(sb, AuLock_FLUSH);
-+ ii_read_lock_child(inode);
-+#if 0
-+ err = au_iigen_test(inode, au_sigen(sb));
-+ if (unlikely(err))
-+ goto out;
-+#endif
++/* ---------------------------------------------------------------------- */
+
-+ if (!isdir || write_mask) {
-+ err = au_busy_or_stale();
-+ h_inode = au_h_iptr(inode, au_ibstart(inode));
-+ if (unlikely(!h_inode
-+ || (h_inode->i_mode & S_IFMT)
-+ != (inode->i_mode & S_IFMT)))
-+ goto out;
++/*
++ * test if @dentry dir can be rename destination or not.
++ * success means, it is a logically empty dir.
++ */
++static int may_rename_dstdir(struct dentry *dentry, struct au_nhash *whlist)
++{
++ return au_test_empty(dentry, whlist);
++}
+
-+ err = 0;
-+ bindex = au_ibstart(inode);
-+ br = au_sbr(sb, bindex);
-+ err = h_permission(h_inode, mask, flags, br->br_mnt,
-+ br->br_perm);
-+ if (write_mask
-+ && !err
-+ && !special_file(h_inode->i_mode)) {
-+ /* test whether the upper writable branch exists */
-+ err = -EROFS;
-+ for (; bindex >= 0; bindex--)
-+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
-+ err = 0;
-+ break;
-+ }
-+ }
++/*
++ * test if @dentry dir can be rename source or not.
++ * if it can, return 0 and @children is filled.
++ * success means,
++ * - it is a logically empty dir.
++ * - or, it exists on writable branch and has no children including whiteouts
++ * on the lower branch.
++ */
++static int may_rename_srcdir(struct dentry *dentry, aufs_bindex_t btgt)
++{
++ int err;
++ unsigned int rdhash;
++ aufs_bindex_t bstart;
++
++ bstart = au_dbstart(dentry);
++ if (bstart != btgt) {
++ struct au_nhash whlist;
++
++ SiMustAnyLock(dentry->d_sb);
++ rdhash = au_sbi(dentry->d_sb)->si_rdhash;
++ if (!rdhash)
++ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL,
++ dentry));
++ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
++ if (unlikely(err))
++ goto out;
++ err = au_test_empty(dentry, &whlist);
++ au_nhash_wh_free(&whlist);
+ goto out;
+ }
+
-+ /* non-write to dir */
-+ err = 0;
-+ bend = au_ibend(inode);
-+ for (bindex = au_ibstart(inode); !err && bindex <= bend; bindex++) {
-+ h_inode = au_h_iptr(inode, bindex);
-+ if (h_inode) {
-+ err = au_busy_or_stale();
-+ if (unlikely(!S_ISDIR(h_inode->i_mode)))
-+ break;
++ if (bstart == au_dbtaildir(dentry))
++ return 0; /* success */
+
-+ br = au_sbr(sb, bindex);
-+ err = h_permission(h_inode, mask, flags, br->br_mnt,
-+ br->br_perm);
-+ }
-+ }
++ err = au_test_empty_lower(dentry);
+
+out:
-+ ii_read_unlock(inode);
-+ si_read_unlock(sb);
++ if (err == -ENOTEMPTY) {
++ AuWarn1("renaming dir who has child(ren) on multiple branches,"
++ " is not supported\n");
++ err = -EXDEV;
++ }
+ return err;
+}
+
-+/* ---------------------------------------------------------------------- */
-+
-+static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry,
-+ struct nameidata *nd)
++/* side effect: sets whlist and h_dentry */
++static int au_ren_may_dir(struct au_ren_args *a)
+{
-+ struct dentry *ret, *parent;
-+ struct inode *inode;
-+ struct super_block *sb;
-+ int err, npositive;
-+
-+ IMustLock(dir);
-+
-+ sb = dir->i_sb;
-+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
-+ ret = ERR_PTR(err);
-+ if (unlikely(err))
-+ goto out;
++ int err;
++ unsigned int rdhash;
++ struct dentry *d;
+
-+ ret = ERR_PTR(-ENAMETOOLONG);
-+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
-+ goto out_si;
-+ err = au_di_init(dentry);
-+ ret = ERR_PTR(err);
-+ if (unlikely(err))
-+ goto out_si;
++ d = a->dst_dentry;
++ SiMustAnyLock(d->d_sb);
+
-+ npositive = 0; /* suppress a warning */
-+ parent = dentry->d_parent; /* dir inode is locked */
-+ di_read_lock_parent(parent, AuLock_IR);
-+ err = au_alive_dir(parent);
-+ if (!err)
-+ err = au_digen_test(parent, au_sigen(sb));
-+ if (!err) {
-+ npositive = au_lkup_dentry(dentry, au_dbstart(parent),
-+ /*type*/0, nd);
-+ err = npositive;
-+ }
-+ di_read_unlock(parent, AuLock_IR);
-+ ret = ERR_PTR(err);
-+ if (unlikely(err < 0))
-+ goto out_unlock;
++ err = 0;
++ if (au_ftest_ren(a->flags, ISDIR) && a->dst_inode) {
++ rdhash = au_sbi(d->d_sb)->si_rdhash;
++ if (!rdhash)
++ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, d));
++ err = au_nhash_alloc(&a->whlist, rdhash, GFP_NOFS);
++ if (unlikely(err))
++ goto out;
+
-+ inode = NULL;
-+ if (npositive) {
-+ inode = au_new_inode(dentry, /*must_new*/0);
-+ ret = (void *)inode;
++ au_set_dbstart(d, a->dst_bstart);
++ err = may_rename_dstdir(d, &a->whlist);
++ au_set_dbstart(d, a->btgt);
+ }
-+ if (IS_ERR(inode))
-+ goto out_unlock;
++ a->dst_h_dentry = au_h_dptr(d, au_dbstart(d));
++ if (unlikely(err))
++ goto out;
+
-+ ret = d_splice_alias(inode, dentry);
-+ if (unlikely(IS_ERR(ret) && inode)) {
-+ ii_write_unlock(inode);
-+ iput(inode);
++ d = a->src_dentry;
++ a->src_h_dentry = au_h_dptr(d, au_dbstart(d));
++ if (au_ftest_ren(a->flags, ISDIR)) {
++ err = may_rename_srcdir(d, a->btgt);
++ if (unlikely(err)) {
++ au_nhash_wh_free(&a->whlist);
++ a->whlist.nh_num = 0;
++ }
+ }
-+
-+out_unlock:
-+ di_write_unlock(dentry);
-+out_si:
-+ si_read_unlock(sb);
+out:
-+ return ret;
++ return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
-+static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent,
-+ const unsigned char add_entry, aufs_bindex_t bcpup,
-+ aufs_bindex_t bstart)
++/*
++ * simple tests for rename.
++ * following the checks in vfs, plus the parent-child relationship.
++ */
++static int au_may_ren(struct au_ren_args *a)
+{
-+ int err;
-+ struct dentry *h_parent;
-+ struct inode *h_dir;
++ int err, isdir;
++ struct inode *h_inode;
+
-+ if (add_entry)
-+ IMustLock(parent->d_inode);
-+ else
-+ di_write_lock_parent(parent);
++ if (a->src_bstart == a->btgt) {
++ err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent,
++ au_ftest_ren(a->flags, ISDIR));
++ if (unlikely(err))
++ goto out;
++ err = -EINVAL;
++ if (unlikely(a->src_h_dentry == a->h_trap))
++ goto out;
++ }
+
+ err = 0;
-+ if (!au_h_dptr(parent, bcpup)) {
-+ if (bstart < bcpup)
-+ err = au_cpdown_dirs(dentry, bcpup);
-+ else
-+ err = au_cpup_dirs(dentry, bcpup);
-+ }
-+ if (!err && add_entry) {
-+ h_parent = au_h_dptr(parent, bcpup);
-+ h_dir = h_parent->d_inode;
-+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
-+ err = au_lkup_neg(dentry, bcpup);
-+ /* todo: no unlock here */
-+ mutex_unlock(&h_dir->i_mutex);
++ if (a->dst_bstart != a->btgt)
++ goto out;
+
-+ AuDbg("bcpup %d\n", bcpup);
-+ if (!err) {
-+ if (!dentry->d_inode)
-+ au_set_h_dptr(dentry, bstart, NULL);
-+ au_update_dbrange(dentry, /*do_put_zero*/0);
-+ }
-+ }
++ err = -ENOTEMPTY;
++ if (unlikely(a->dst_h_dentry == a->h_trap))
++ goto out;
+
-+ if (!add_entry)
-+ di_write_unlock(parent);
-+ if (!err)
-+ err = bcpup; /* success */
++ err = -EIO;
++ h_inode = a->dst_h_dentry->d_inode;
++ isdir = !!au_ftest_ren(a->flags, ISDIR);
++ if (!a->dst_dentry->d_inode) {
++ if (unlikely(h_inode))
++ goto out;
++ err = au_may_add(a->dst_dentry, a->btgt, a->dst_h_parent,
++ isdir);
++ } else {
++ if (unlikely(!h_inode || !h_inode->i_nlink))
++ goto out;
++ err = au_may_del(a->dst_dentry, a->btgt, a->dst_h_parent,
++ isdir);
++ if (unlikely(err))
++ goto out;
++ }
+
++out:
++ if (unlikely(err == -ENOENT || err == -EEXIST))
++ err = -EIO;
+ AuTraceErr(err);
+ return err;
+}
+
++/* ---------------------------------------------------------------------- */
++
+/*
-+ * decide the branch and the parent dir where we will create a new entry.
-+ * returns new bindex or an error.
-+ * copyup the parent dir if needed.
++ * locking order
++ * (VFS)
++ * - src_dir and dir by lock_rename()
++ * - inode if exitsts
++ * (aufs)
++ * - lock all
++ * + src_dentry and dentry by aufs_read_and_write_lock2() which calls,
++ * + si_read_lock
++ * + di_write_lock2_child()
++ * + di_write_lock_child()
++ * + ii_write_lock_child()
++ * + di_write_lock_child2()
++ * + ii_write_lock_child2()
++ * + src_parent and parent
++ * + di_write_lock_parent()
++ * + ii_write_lock_parent()
++ * + di_write_lock_parent2()
++ * + ii_write_lock_parent2()
++ * + lower src_dir and dir by vfsub_lock_rename()
++ * + verify the every relationships between child and parent. if any
++ * of them failed, unlock all and return -EBUSY.
+ */
-+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
-+ struct au_wr_dir_args *args)
++static void au_ren_unlock(struct au_ren_args *a)
+{
-+ int err;
-+ aufs_bindex_t bcpup, bstart, src_bstart;
-+ const unsigned char add_entry = !!au_ftest_wrdir(args->flags,
-+ ADD_ENTRY);
+ struct super_block *sb;
-+ struct dentry *parent;
-+ struct au_sbinfo *sbinfo;
+
-+ sb = dentry->d_sb;
-+ sbinfo = au_sbi(sb);
-+ parent = dget_parent(dentry);
-+ bstart = au_dbstart(dentry);
-+ bcpup = bstart;
-+ if (args->force_btgt < 0) {
-+ if (src_dentry) {
-+ src_bstart = au_dbstart(src_dentry);
-+ if (src_bstart < bstart)
-+ bcpup = src_bstart;
-+ } else if (add_entry) {
-+ err = AuWbrCreate(sbinfo, dentry,
-+ au_ftest_wrdir(args->flags, ISDIR));
-+ bcpup = err;
-+ }
++ sb = a->dst_dentry->d_sb;
++ if (au_ftest_ren(a->flags, MNT_WRITE))
++ mnt_drop_write(a->br->br_mnt);
++ vfsub_unlock_rename(a->src_h_parent, a->src_hdir,
++ a->dst_h_parent, a->dst_hdir);
++}
+
-+ if (bcpup < 0 || au_test_ro(sb, bcpup, dentry->d_inode)) {
-+ if (add_entry)
-+ err = AuWbrCopyup(sbinfo, dentry);
-+ else {
-+ if (!IS_ROOT(dentry)) {
-+ di_read_lock_parent(parent, !AuLock_IR);
-+ err = AuWbrCopyup(sbinfo, dentry);
-+ di_read_unlock(parent, !AuLock_IR);
-+ } else
-+ err = AuWbrCopyup(sbinfo, dentry);
-+ }
-+ bcpup = err;
-+ if (unlikely(err < 0))
-+ goto out;
-+ }
-+ } else {
-+ bcpup = args->force_btgt;
-+ AuDebugOn(au_test_ro(sb, bcpup, dentry->d_inode));
-+ }
++static int au_ren_lock(struct au_ren_args *a)
++{
++ int err;
++ unsigned int udba;
+
-+ AuDbg("bstart %d, bcpup %d\n", bstart, bcpup);
-+ err = bcpup;
-+ if (bcpup == bstart)
++ err = 0;
++ a->src_h_parent = au_h_dptr(a->src_parent, a->btgt);
++ a->src_hdir = au_hi(a->src_dir, a->btgt);
++ a->dst_h_parent = au_h_dptr(a->dst_parent, a->btgt);
++ a->dst_hdir = au_hi(a->dst_dir, a->btgt);
++ a->h_trap = vfsub_lock_rename(a->src_h_parent, a->src_hdir,
++ a->dst_h_parent, a->dst_hdir);
++ udba = au_opt_udba(a->src_dentry->d_sb);
++ if (unlikely(a->src_hdir->hi_inode != a->src_h_parent->d_inode
++ || a->dst_hdir->hi_inode != a->dst_h_parent->d_inode))
++ err = au_busy_or_stale();
++ if (!err && au_dbstart(a->src_dentry) == a->btgt)
++ err = au_h_verify(a->src_h_dentry, udba,
++ a->src_h_parent->d_inode, a->src_h_parent,
++ a->br);
++ if (!err && au_dbstart(a->dst_dentry) == a->btgt)
++ err = au_h_verify(a->dst_h_dentry, udba,
++ a->dst_h_parent->d_inode, a->dst_h_parent,
++ a->br);
++ if (!err) {
++ err = mnt_want_write(a->br->br_mnt);
++ if (unlikely(err))
++ goto out_unlock;
++ au_fset_ren(a->flags, MNT_WRITE);
+ goto out; /* success */
-+
-+ /* copyup the new parent into the branch we process */
-+ err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, bstart);
-+ if (err >= 0) {
-+ if (!dentry->d_inode) {
-+ au_set_h_dptr(dentry, bstart, NULL);
-+ au_set_dbstart(dentry, bcpup);
-+ au_set_dbend(dentry, bcpup);
-+ }
-+ AuDebugOn(add_entry && !au_h_dptr(dentry, bcpup));
+ }
+
++ err = au_busy_or_stale();
++
++out_unlock:
++ au_ren_unlock(a);
+out:
-+ dput(parent);
+ return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
-+struct dentry *au_pinned_h_parent(struct au_pin *pin)
++static void au_ren_refresh_dir(struct au_ren_args *a)
+{
-+ if (pin && pin->parent)
-+ return au_h_dptr(pin->parent, pin->bindex);
-+ return NULL;
-+}
++ struct inode *dir;
+
-+void au_unpin(struct au_pin *p)
-+{
-+ if (p->h_mnt && au_ftest_pin(p->flags, MNT_WRITE))
-+ mnt_drop_write(p->h_mnt);
-+ if (!p->hdir)
++ dir = a->dst_dir;
++ dir->i_version++;
++ if (au_ftest_ren(a->flags, ISDIR)) {
++ /* is this updating defined in POSIX? */
++ au_cpup_attr_timesizes(a->src_inode);
++ au_cpup_attr_nlink(dir, /*force*/1);
++ }
++
++ if (au_ibstart(dir) == a->btgt)
++ au_cpup_attr_timesizes(dir);
++
++ if (au_ftest_ren(a->flags, ISSAMEDIR))
+ return;
+
-+ au_hn_imtx_unlock(p->hdir);
-+ if (!au_ftest_pin(p->flags, DI_LOCKED))
-+ di_read_unlock(p->parent, AuLock_IR);
-+ iput(p->hdir->hi_inode);
-+ dput(p->parent);
-+ p->parent = NULL;
-+ p->hdir = NULL;
-+ p->h_mnt = NULL;
++ dir = a->src_dir;
++ dir->i_version++;
++ if (au_ftest_ren(a->flags, ISDIR))
++ au_cpup_attr_nlink(dir, /*force*/1);
++ if (au_ibstart(dir) == a->btgt)
++ au_cpup_attr_timesizes(dir);
+}
+
-+int au_do_pin(struct au_pin *p)
++static void au_ren_refresh(struct au_ren_args *a)
+{
-+ int err;
++ aufs_bindex_t bend, bindex;
++ struct dentry *d, *h_d;
++ struct inode *i, *h_i;
+ struct super_block *sb;
-+ struct dentry *h_dentry, *h_parent;
-+ struct au_branch *br;
-+ struct inode *h_dir;
+
-+ err = 0;
-+ sb = p->dentry->d_sb;
-+ br = au_sbr(sb, p->bindex);
-+ if (IS_ROOT(p->dentry)) {
-+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
-+ p->h_mnt = br->br_mnt;
-+ err = mnt_want_write(p->h_mnt);
-+ if (unlikely(err)) {
-+ au_fclr_pin(p->flags, MNT_WRITE);
-+ goto out_err;
-+ }
++ d = a->dst_dentry;
++ d_drop(d);
++ if (a->h_dst)
++ /* already dget-ed by au_ren_or_cpup() */
++ au_set_h_dptr(d, a->btgt, a->h_dst);
++
++ i = a->dst_inode;
++ if (i) {
++ if (!au_ftest_ren(a->flags, ISDIR))
++ vfsub_drop_nlink(i);
++ else {
++ vfsub_dead_dir(i);
++ au_cpup_attr_timesizes(i);
+ }
-+ goto out;
++ au_update_dbrange(d, /*do_put_zero*/1);
++ } else {
++ bend = a->btgt;
++ for (bindex = au_dbstart(d); bindex < bend; bindex++)
++ au_set_h_dptr(d, bindex, NULL);
++ bend = au_dbend(d);
++ for (bindex = a->btgt + 1; bindex <= bend; bindex++)
++ au_set_h_dptr(d, bindex, NULL);
++ au_update_dbrange(d, /*do_put_zero*/0);
+ }
+
-+ h_dentry = NULL;
-+ if (p->bindex <= au_dbend(p->dentry))
-+ h_dentry = au_h_dptr(p->dentry, p->bindex);
-+
-+ p->parent = dget_parent(p->dentry);
-+ if (!au_ftest_pin(p->flags, DI_LOCKED))
-+ di_read_lock(p->parent, AuLock_IR, p->lsc_di);
-+
-+ h_dir = NULL;
-+ h_parent = au_h_dptr(p->parent, p->bindex);
-+ p->hdir = au_hi(p->parent->d_inode, p->bindex);
-+ if (p->hdir)
-+ h_dir = p->hdir->hi_inode;
-+
-+ /*
-+ * udba case, or
-+ * if DI_LOCKED is not set, then p->parent may be different
-+ * and h_parent can be NULL.
-+ */
-+ if (unlikely(!p->hdir || !h_dir || !h_parent)) {
-+ err = -EBUSY;
-+ if (!au_ftest_pin(p->flags, DI_LOCKED))
-+ di_read_unlock(p->parent, AuLock_IR);
-+ dput(p->parent);
-+ p->parent = NULL;
-+ goto out_err;
++ d = a->src_dentry;
++ au_set_dbwh(d, -1);
++ bend = au_dbend(d);
++ for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
++ h_d = au_h_dptr(d, bindex);
++ if (h_d)
++ au_set_h_dptr(d, bindex, NULL);
+ }
++ au_set_dbend(d, a->btgt);
+
-+ au_igrab(h_dir);
-+ au_hn_imtx_lock_nested(p->hdir, p->lsc_hi);
-+
-+ if (unlikely(p->hdir->hi_inode != h_parent->d_inode)) {
-+ err = -EBUSY;
-+ goto out_unpin;
-+ }
-+ if (h_dentry) {
-+ err = au_h_verify(h_dentry, p->udba, h_dir, h_parent, br);
-+ if (unlikely(err)) {
-+ au_fclr_pin(p->flags, MNT_WRITE);
-+ goto out_unpin;
-+ }
-+ }
++ sb = d->d_sb;
++ i = a->src_inode;
++ if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i))
++ return; /* success */
+
-+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
-+ p->h_mnt = br->br_mnt;
-+ err = mnt_want_write(p->h_mnt);
-+ if (unlikely(err)) {
-+ au_fclr_pin(p->flags, MNT_WRITE);
-+ goto out_unpin;
++ bend = au_ibend(i);
++ for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
++ h_i = au_h_iptr(i, bindex);
++ if (h_i) {
++ au_xino_write(sb, bindex, h_i->i_ino, /*ino*/0);
++ /* ignore this error */
++ au_set_h_iptr(i, bindex, NULL, 0);
+ }
+ }
-+ goto out; /* success */
-+
-+out_unpin:
-+ au_unpin(p);
-+out_err:
-+ pr_err("err %d\n", err);
-+ err = au_busy_or_stale();
-+out:
-+ return err;
-+}
-+
-+void au_pin_init(struct au_pin *p, struct dentry *dentry,
-+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
-+ unsigned int udba, unsigned char flags)
-+{
-+ p->dentry = dentry;
-+ p->udba = udba;
-+ p->lsc_di = lsc_di;
-+ p->lsc_hi = lsc_hi;
-+ p->flags = flags;
-+ p->bindex = bindex;
-+
-+ p->parent = NULL;
-+ p->hdir = NULL;
-+ p->h_mnt = NULL;
-+}
-+
-+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
-+ unsigned int udba, unsigned char flags)
-+{
-+ au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2,
-+ udba, flags);
-+ return au_do_pin(pin);
++ au_set_ibend(i, a->btgt);
+}
+
-+/* ---------------------------------------------------------------------- */
-+
-+/*
-+ * ->setattr() and ->getattr() are called in various cases.
-+ * chmod, stat: dentry is revalidated.
-+ * fchmod, fstat: file and dentry are not revalidated, additionally they may be
-+ * unhashed.
-+ * for ->setattr(), ia->ia_file is passed from ftruncate only.
-+ */
-+/* todo: consolidate with do_refresh() and simple_reval_dpath() */
-+static int au_reval_for_attr(struct dentry *dentry, unsigned int sigen)
++/* ---------------------------------------------------------------------- */
++
++/* mainly for link(2) and rename(2) */
++int au_wbr(struct dentry *dentry, aufs_bindex_t btgt)
+{
-+ int err;
-+ struct inode *inode;
++ aufs_bindex_t bdiropq, bwh;
+ struct dentry *parent;
++ struct au_branch *br;
+
-+ err = 0;
-+ inode = dentry->d_inode;
-+ if (au_digen_test(dentry, sigen)) {
-+ parent = dget_parent(dentry);
-+ di_read_lock_parent(parent, AuLock_IR);
-+ err = au_refresh_dentry(dentry, parent);
-+ di_read_unlock(parent, AuLock_IR);
-+ dput(parent);
-+ }
-+
-+ AuTraceErr(err);
-+ return err;
-+}
++ parent = dentry->d_parent;
++ IMustLock(parent->d_inode); /* dir is locked */
+
-+#define AuIcpup_DID_CPUP 1
-+#define au_ftest_icpup(flags, name) ((flags) & AuIcpup_##name)
-+#define au_fset_icpup(flags, name) \
-+ do { (flags) |= AuIcpup_##name; } while (0)
-+#define au_fclr_icpup(flags, name) \
-+ do { (flags) &= ~AuIcpup_##name; } while (0)
++ bdiropq = au_dbdiropq(parent);
++ bwh = au_dbwh(dentry);
++ br = au_sbr(dentry->d_sb, btgt);
++ if (au_br_rdonly(br)
++ || (0 <= bdiropq && bdiropq < btgt)
++ || (0 <= bwh && bwh < btgt))
++ btgt = -1;
+
-+struct au_icpup_args {
-+ unsigned char flags;
-+ unsigned char pin_flags;
-+ aufs_bindex_t btgt;
-+ unsigned int udba;
-+ struct au_pin pin;
-+ struct path h_path;
-+ struct inode *h_inode;
-+};
++ AuDbg("btgt %d\n", btgt);
++ return btgt;
++}
+
-+static int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
-+ struct au_icpup_args *a)
++/* sets src_bstart, dst_bstart and btgt */
++static int au_ren_wbr(struct au_ren_args *a)
+{
+ int err;
-+ loff_t sz;
-+ aufs_bindex_t bstart, ibstart;
-+ struct dentry *hi_wh, *parent;
-+ struct inode *inode;
-+ struct file *h_file;
+ struct au_wr_dir_args wr_dir_args = {
-+ .force_btgt = -1,
-+ .flags = 0
++ /* .force_btgt = -1, */
++ .flags = AuWrDir_ADD_ENTRY
+ };
+
-+ bstart = au_dbstart(dentry);
-+ inode = dentry->d_inode;
-+ if (S_ISDIR(inode->i_mode))
++ a->src_bstart = au_dbstart(a->src_dentry);
++ a->dst_bstart = au_dbstart(a->dst_dentry);
++ if (au_ftest_ren(a->flags, ISDIR))
+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
-+ /* plink or hi_wh() case */
-+ ibstart = au_ibstart(inode);
-+ if (bstart != ibstart && !au_test_ro(inode->i_sb, ibstart, inode))
-+ wr_dir_args.force_btgt = ibstart;
-+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
-+ if (unlikely(err < 0))
-+ goto out;
++ wr_dir_args.force_btgt = a->src_bstart;
++ if (a->dst_inode && a->dst_bstart < a->src_bstart)
++ wr_dir_args.force_btgt = a->dst_bstart;
++ wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt);
++ err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args);
+ a->btgt = err;
-+ if (err != bstart)
-+ au_fset_icpup(a->flags, DID_CPUP);
+
-+ err = 0;
-+ a->pin_flags = AuPin_MNT_WRITE;
-+ parent = NULL;
-+ if (!IS_ROOT(dentry)) {
-+ au_fset_pin(a->pin_flags, DI_LOCKED);
-+ parent = dget_parent(dentry);
-+ di_write_lock_parent(parent);
-+ }
++ return err;
++}
+
-+ err = au_pin(&a->pin, dentry, a->btgt, a->udba, a->pin_flags);
-+ if (unlikely(err))
-+ goto out_parent;
++static void au_ren_dt(struct au_ren_args *a)
++{
++ a->h_path.dentry = a->src_h_parent;
++ au_dtime_store(a->src_dt + AuPARENT, a->src_parent, &a->h_path);
++ if (!au_ftest_ren(a->flags, ISSAMEDIR)) {
++ a->h_path.dentry = a->dst_h_parent;
++ au_dtime_store(a->dst_dt + AuPARENT, a->dst_parent, &a->h_path);
++ }
+
-+ a->h_path.dentry = au_h_dptr(dentry, bstart);
-+ a->h_inode = a->h_path.dentry->d_inode;
-+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
-+ sz = -1;
-+ if ((ia->ia_valid & ATTR_SIZE) && ia->ia_size < i_size_read(a->h_inode))
-+ sz = ia->ia_size;
++ au_fclr_ren(a->flags, DT_DSTDIR);
++ if (!au_ftest_ren(a->flags, ISDIR))
++ return;
+
-+ h_file = NULL;
-+ hi_wh = NULL;
-+ if (au_ftest_icpup(a->flags, DID_CPUP) && d_unlinked(dentry)) {
-+ hi_wh = au_hi_wh(inode, a->btgt);
-+ if (!hi_wh) {
-+ err = au_sio_cpup_wh(dentry, a->btgt, sz, /*file*/NULL);
-+ if (unlikely(err))
-+ goto out_unlock;
-+ hi_wh = au_hi_wh(inode, a->btgt);
-+ /* todo: revalidate hi_wh? */
-+ }
++ a->h_path.dentry = a->src_h_dentry;
++ au_dtime_store(a->src_dt + AuCHILD, a->src_dentry, &a->h_path);
++ if (a->dst_h_dentry->d_inode) {
++ au_fset_ren(a->flags, DT_DSTDIR);
++ a->h_path.dentry = a->dst_h_dentry;
++ au_dtime_store(a->dst_dt + AuCHILD, a->dst_dentry, &a->h_path);
+ }
++}
+
-+ if (parent) {
-+ au_pin_set_parent_lflag(&a->pin, /*lflag*/0);
-+ di_downgrade_lock(parent, AuLock_IR);
-+ dput(parent);
-+ parent = NULL;
-+ }
-+ if (!au_ftest_icpup(a->flags, DID_CPUP))
-+ goto out; /* success */
++static void au_ren_rev_dt(int err, struct au_ren_args *a)
++{
++ struct dentry *h_d;
++ struct mutex *h_mtx;
+
-+ if (!d_unhashed(dentry)) {
-+ h_file = au_h_open_pre(dentry, bstart);
-+ if (IS_ERR(h_file)) {
-+ err = PTR_ERR(h_file);
-+ h_file = NULL;
-+ } else
-+ err = au_sio_cpup_simple(dentry, a->btgt, sz,
-+ AuCpup_DTIME);
-+ if (!err)
-+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
-+ } else if (!hi_wh)
-+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
-+ else
-+ a->h_path.dentry = hi_wh; /* do not dget here */
++ au_dtime_revert(a->src_dt + AuPARENT);
++ if (!au_ftest_ren(a->flags, ISSAMEDIR))
++ au_dtime_revert(a->dst_dt + AuPARENT);
+
-+out_unlock:
-+ mutex_unlock(&a->h_inode->i_mutex);
-+ au_h_open_post(dentry, bstart, h_file);
-+ a->h_inode = a->h_path.dentry->d_inode;
-+ if (!err) {
-+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
-+ goto out; /* success */
-+ }
++ if (au_ftest_ren(a->flags, ISDIR) && err != -EIO) {
++ h_d = a->src_dt[AuCHILD].dt_h_path.dentry;
++ h_mtx = &h_d->d_inode->i_mutex;
++ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
++ au_dtime_revert(a->src_dt + AuCHILD);
++ mutex_unlock(h_mtx);
+
-+ au_unpin(&a->pin);
-+out_parent:
-+ if (parent) {
-+ di_write_unlock(parent);
-+ dput(parent);
++ if (au_ftest_ren(a->flags, DT_DSTDIR)) {
++ h_d = a->dst_dt[AuCHILD].dt_h_path.dentry;
++ h_mtx = &h_d->d_inode->i_mutex;
++ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
++ au_dtime_revert(a->dst_dt + AuCHILD);
++ mutex_unlock(h_mtx);
++ }
+ }
-+out:
-+ return err;
+}
+
-+static int aufs_setattr(struct dentry *dentry, struct iattr *ia)
++/* ---------------------------------------------------------------------- */
++
++int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry,
++ struct inode *_dst_dir, struct dentry *_dst_dentry)
+{
-+ int err;
-+ struct inode *inode;
-+ struct super_block *sb;
-+ struct file *file;
-+ struct au_icpup_args *a;
++ int err, flags;
++ /* reduce stack space */
++ struct au_ren_args *a;
+
-+ inode = dentry->d_inode;
-+ IMustLock(inode);
++ AuDbg("%.*s, %.*s\n", AuDLNPair(_src_dentry), AuDLNPair(_dst_dentry));
++ IMustLock(_src_dir);
++ IMustLock(_dst_dir);
++
++ err = -ENOMEM;
++ BUILD_BUG_ON(sizeof(*a) > PAGE_SIZE);
++ a = kzalloc(sizeof(*a), GFP_NOFS);
++ if (unlikely(!a))
++ goto out;
++
++ a->src_dir = _src_dir;
++ a->src_dentry = _src_dentry;
++ a->src_inode = a->src_dentry->d_inode;
++ a->src_parent = a->src_dentry->d_parent; /* dir inode is locked */
++ a->dst_dir = _dst_dir;
++ a->dst_dentry = _dst_dentry;
++ a->dst_inode = a->dst_dentry->d_inode;
++ a->dst_parent = a->dst_dentry->d_parent; /* dir inode is locked */
++ if (a->dst_inode) {
++ IMustLock(a->dst_inode);
++ au_igrab(a->dst_inode);
++ }
++
++ err = -ENOTDIR;
++ flags = AuLock_FLUSH | AuLock_NOPLM | AuLock_GEN;
++ if (S_ISDIR(a->src_inode->i_mode)) {
++ au_fset_ren(a->flags, ISDIR);
++ if (unlikely(a->dst_inode && !S_ISDIR(a->dst_inode->i_mode)))
++ goto out_free;
++ err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry,
++ AuLock_DIR | flags);
++ } else
++ err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry,
++ flags);
++ if (unlikely(err))
++ goto out_free;
++
++ err = au_d_hashed_positive(a->src_dentry);
++ if (unlikely(err))
++ goto out_unlock;
++ err = -ENOENT;
++ if (a->dst_inode) {
++ /*
++ * If it is a dir, VFS unhash dst_dentry before this
++ * function. It means we cannot rely upon d_unhashed().
++ */
++ if (unlikely(!a->dst_inode->i_nlink))
++ goto out_unlock;
++ if (!S_ISDIR(a->dst_inode->i_mode)) {
++ err = au_d_hashed_positive(a->dst_dentry);
++ if (unlikely(err))
++ goto out_unlock;
++ } else if (unlikely(IS_DEADDIR(a->dst_inode)))
++ goto out_unlock;
++ } else if (unlikely(d_unhashed(a->dst_dentry)))
++ goto out_unlock;
+
-+ err = -ENOMEM;
-+ a = kzalloc(sizeof(*a), GFP_NOFS);
-+ if (unlikely(!a))
-+ goto out;
++ au_fset_ren(a->flags, ISSAMEDIR); /* temporary */
++ di_write_lock_parent(a->dst_parent);
+
-+ if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
-+ ia->ia_valid &= ~ATTR_MODE;
++ /* which branch we process */
++ err = au_ren_wbr(a);
++ if (unlikely(err < 0))
++ goto out_parent;
++ a->br = au_sbr(a->dst_dentry->d_sb, a->btgt);
++ a->h_path.mnt = a->br->br_mnt;
+
-+ file = NULL;
-+ sb = dentry->d_sb;
-+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
++ /* are they available to be renamed */
++ err = au_ren_may_dir(a);
+ if (unlikely(err))
-+ goto out_kfree;
++ goto out_children;
+
-+ if (ia->ia_valid & ATTR_FILE) {
-+ /* currently ftruncate(2) only */
-+ AuDebugOn(!S_ISREG(inode->i_mode));
-+ file = ia->ia_file;
-+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
-+ if (unlikely(err))
-+ goto out_si;
-+ ia->ia_file = au_hf_top(file);
-+ a->udba = AuOpt_UDBA_NONE;
++ /* prepare the writable parent dir on the same branch */
++ if (a->dst_bstart == a->btgt) {
++ au_fset_ren(a->flags, WHDST);
+ } else {
-+ /* fchmod() doesn't pass ia_file */
-+ a->udba = au_opt_udba(sb);
-+ di_write_lock_child(dentry);
-+ /* no d_unlinked(), to set UDBA_NONE for root */
-+ if (d_unhashed(dentry))
-+ a->udba = AuOpt_UDBA_NONE;
-+ if (a->udba != AuOpt_UDBA_NONE) {
-+ AuDebugOn(IS_ROOT(dentry));
-+ err = au_reval_for_attr(dentry, au_sigen(sb));
-+ if (unlikely(err))
-+ goto out_dentry;
-+ }
++ err = au_cpup_dirs(a->dst_dentry, a->btgt);
++ if (unlikely(err))
++ goto out_children;
+ }
+
-+ err = au_pin_and_icpup(dentry, ia, a);
++ if (a->src_dir != a->dst_dir) {
++ /*
++ * this temporary unlock is safe,
++ * because both dir->i_mutex are locked.
++ */
++ di_write_unlock(a->dst_parent);
++ di_write_lock_parent(a->src_parent);
++ err = au_wr_dir_need_wh(a->src_dentry,
++ au_ftest_ren(a->flags, ISDIR),
++ &a->btgt);
++ di_write_unlock(a->src_parent);
++ di_write_lock2_parent(a->src_parent, a->dst_parent, /*isdir*/1);
++ au_fclr_ren(a->flags, ISSAMEDIR);
++ } else
++ err = au_wr_dir_need_wh(a->src_dentry,
++ au_ftest_ren(a->flags, ISDIR),
++ &a->btgt);
+ if (unlikely(err < 0))
-+ goto out_dentry;
-+ if (au_ftest_icpup(a->flags, DID_CPUP)) {
-+ ia->ia_file = NULL;
-+ ia->ia_valid &= ~ATTR_FILE;
-+ }
-+
-+ a->h_path.mnt = au_sbr_mnt(sb, a->btgt);
-+ if ((ia->ia_valid & (ATTR_MODE | ATTR_CTIME))
-+ == (ATTR_MODE | ATTR_CTIME)) {
-+ err = security_path_chmod(a->h_path.dentry, a->h_path.mnt,
-+ ia->ia_mode);
-+ if (unlikely(err))
-+ goto out_unlock;
-+ } else if ((ia->ia_valid & (ATTR_UID | ATTR_GID))
-+ && (ia->ia_valid & ATTR_CTIME)) {
-+ err = security_path_chown(&a->h_path, ia->ia_uid, ia->ia_gid);
-+ if (unlikely(err))
-+ goto out_unlock;
-+ }
++ goto out_children;
++ if (err)
++ au_fset_ren(a->flags, WHSRC);
+
-+ if (ia->ia_valid & ATTR_SIZE) {
-+ struct file *f;
++ /* lock them all */
++ err = au_ren_lock(a);
++ if (unlikely(err))
++ goto out_children;
+
-+ if (ia->ia_size < i_size_read(inode))
-+ /* unmap only */
-+ truncate_setsize(inode, ia->ia_size);
++ if (!au_opt_test(au_mntflags(a->dst_dir->i_sb), UDBA_NONE))
++ err = au_may_ren(a);
++ else if (unlikely(a->dst_dentry->d_name.len > AUFS_MAX_NAMELEN))
++ err = -ENAMETOOLONG;
++ if (unlikely(err))
++ goto out_hdir;
+
-+ f = NULL;
-+ if (ia->ia_valid & ATTR_FILE)
-+ f = ia->ia_file;
-+ mutex_unlock(&a->h_inode->i_mutex);
-+ err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f);
-+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
-+ } else
-+ err = vfsub_notify_change(&a->h_path, ia);
-+ if (!err)
-+ au_cpup_attr_changeable(inode);
++ /* store timestamps to be revertible */
++ au_ren_dt(a);
+
-+out_unlock:
-+ mutex_unlock(&a->h_inode->i_mutex);
-+ au_unpin(&a->pin);
++ /* here we go */
++ err = do_rename(a);
+ if (unlikely(err))
-+ au_update_dbstart(dentry);
-+out_dentry:
-+ di_write_unlock(dentry);
-+ if (file) {
-+ fi_write_unlock(file);
-+ ia->ia_file = file;
-+ ia->ia_valid |= ATTR_FILE;
++ goto out_dt;
++
++ /* update dir attributes */
++ au_ren_refresh_dir(a);
++
++ /* dput/iput all lower dentries */
++ au_ren_refresh(a);
++
++ goto out_hdir; /* success */
++
++out_dt:
++ au_ren_rev_dt(err, a);
++out_hdir:
++ au_ren_unlock(a);
++out_children:
++ au_nhash_wh_free(&a->whlist);
++ if (err && a->dst_inode && a->dst_bstart != a->btgt) {
++ AuDbg("bstart %d, btgt %d\n", a->dst_bstart, a->btgt);
++ au_set_h_dptr(a->dst_dentry, a->btgt, NULL);
++ au_set_dbstart(a->dst_dentry, a->dst_bstart);
+ }
-+out_si:
-+ si_read_unlock(sb);
-+out_kfree:
++out_parent:
++ if (!err)
++ d_move(a->src_dentry, a->dst_dentry);
++ else {
++ au_update_dbstart(a->dst_dentry);
++ if (!a->dst_inode)
++ d_drop(a->dst_dentry);
++ }
++ if (au_ftest_ren(a->flags, ISSAMEDIR))
++ di_write_unlock(a->dst_parent);
++ else
++ di_write_unlock2(a->src_parent, a->dst_parent);
++out_unlock:
++ aufs_read_and_write_unlock2(a->dst_dentry, a->src_dentry);
++out_free:
++ iput(a->dst_inode);
++ if (a->thargs)
++ au_whtmp_rmdir_free(a->thargs);
+ kfree(a);
+out:
+ AuTraceErr(err);
+ return err;
+}
+--- a/fs/aufs/iinfo.c 1970-01-01 01:00:00.000000000 +0100
++++ b/fs/aufs/iinfo.c 2011-03-06 23:22:01.412413001 +0000
+@@ -0,0 +1,264 @@
++/*
++ * Copyright (C) 2005-2011 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++ */
+
-+static void au_refresh_iattr(struct inode *inode, struct kstat *st,
-+ unsigned int nlink)
++/*
++ * inode private data
++ */
++
++#include "aufs.h"
++
++struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex)
+{
-+ inode->i_mode = st->mode;
-+ inode->i_uid = st->uid;
-+ inode->i_gid = st->gid;
-+ inode->i_atime = st->atime;
-+ inode->i_mtime = st->mtime;
-+ inode->i_ctime = st->ctime;
++ struct inode *h_inode;
+
-+ au_cpup_attr_nlink(inode, /*force*/0);
-+ if (S_ISDIR(inode->i_mode)) {
-+ inode->i_nlink -= nlink;
-+ inode->i_nlink += st->nlink;
-+ }
++ IiMustAnyLock(inode);
+
-+ spin_lock(&inode->i_lock);
-+ inode->i_blocks = st->blocks;
-+ i_size_write(inode, st->size);
-+ spin_unlock(&inode->i_lock);
++ h_inode = au_ii(inode)->ii_hinode[0 + bindex].hi_inode;
++ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
++ return h_inode;
+}
+
-+static int aufs_getattr(struct vfsmount *mnt __maybe_unused,
-+ struct dentry *dentry, struct kstat *st)
++/* todo: hard/soft set? */
++void au_hiput(struct au_hinode *hinode)
+{
-+ int err;
-+ unsigned int mnt_flags;
-+ aufs_bindex_t bindex;
-+ unsigned char udba_none, positive;
-+ struct super_block *sb, *h_sb;
-+ struct inode *inode;
-+ struct vfsmount *h_mnt;
-+ struct dentry *h_dentry;
++ au_hn_free(hinode);
++ dput(hinode->hi_whdentry);
++ iput(hinode->hi_inode);
++}
++
++unsigned int au_hi_flags(struct inode *inode, int isdir)
++{
++ unsigned int flags;
++ const unsigned int mnt_flags = au_mntflags(inode->i_sb);
++
++ flags = 0;
++ if (au_opt_test(mnt_flags, XINO))
++ au_fset_hi(flags, XINO);
++ if (isdir && au_opt_test(mnt_flags, UDBA_HNOTIFY))
++ au_fset_hi(flags, HNOTIFY);
++ return flags;
++}
++
++void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
++ struct inode *h_inode, unsigned int flags)
++{
++ struct au_hinode *hinode;
++ struct inode *hi;
++ struct au_iinfo *iinfo = au_ii(inode);
++
++ IiMustWriteLock(inode);
++
++ hinode = iinfo->ii_hinode + bindex;
++ hi = hinode->hi_inode;
++ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
+
-+ sb = dentry->d_sb;
-+ inode = dentry->d_inode;
-+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
-+ if (unlikely(err))
-+ goto out;
-+ mnt_flags = au_mntflags(sb);
-+ udba_none = !!au_opt_test(mnt_flags, UDBA_NONE);
++ if (hi)
++ au_hiput(hinode);
++ hinode->hi_inode = h_inode;
++ if (h_inode) {
++ int err;
++ struct super_block *sb = inode->i_sb;
++ struct au_branch *br;
+
-+ /* support fstat(2) */
-+ if (!d_unlinked(dentry) && !udba_none) {
-+ unsigned int sigen = au_sigen(sb);
-+ err = au_digen_test(dentry, sigen);
-+ if (!err) {
-+ di_read_lock_child(dentry, AuLock_IR);
-+ err = au_dbrange_test(dentry);
-+ if (unlikely(err))
-+ goto out_unlock;
-+ } else {
-+ AuDebugOn(IS_ROOT(dentry));
-+ di_write_lock_child(dentry);
-+ err = au_dbrange_test(dentry);
-+ if (!err)
-+ err = au_reval_for_attr(dentry, sigen);
-+ di_downgrade_lock(dentry, AuLock_IR);
++ AuDebugOn(inode->i_mode
++ && (h_inode->i_mode & S_IFMT)
++ != (inode->i_mode & S_IFMT));
++ if (bindex == iinfo->ii_bstart)
++ au_cpup_igen(inode, h_inode);
++ br = au_sbr(sb, bindex);
++ hinode->hi_id = br->br_id;
++ if (au_ftest_hi(flags, XINO)) {
++ err = au_xino_write(sb, bindex, h_inode->i_ino,
++ inode->i_ino);
+ if (unlikely(err))
-+ goto out_unlock;
++ AuIOErr1("failed au_xino_write() %d\n", err);
+ }
-+ } else
-+ di_read_lock_child(dentry, AuLock_IR);
-+
-+ bindex = au_ibstart(inode);
-+ h_mnt = au_sbr_mnt(sb, bindex);
-+ h_sb = h_mnt->mnt_sb;
-+ if (!au_test_fs_bad_iattr(h_sb) && udba_none)
-+ goto out_fill; /* success */
+
-+ h_dentry = NULL;
-+ if (au_dbstart(dentry) == bindex)
-+ h_dentry = dget(au_h_dptr(dentry, bindex));
-+ else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) {
-+ h_dentry = au_plink_lkup(inode, bindex);
-+ if (IS_ERR(h_dentry))
-+ goto out_fill; /* pretending success */
++ if (au_ftest_hi(flags, HNOTIFY)
++ && au_br_hnotifyable(br->br_perm)) {
++ err = au_hn_alloc(hinode, inode);
++ if (unlikely(err))
++ AuIOErr1("au_hn_alloc() %d\n", err);
++ }
+ }
-+ /* illegally overlapped or something */
-+ if (unlikely(!h_dentry))
-+ goto out_fill; /* pretending success */
++}
+
-+ positive = !!h_dentry->d_inode;
-+ if (positive)
-+ err = vfs_getattr(h_mnt, h_dentry, st);
-+ dput(h_dentry);
-+ if (!err) {
-+ if (positive)
-+ au_refresh_iattr(inode, st, h_dentry->d_inode->i_nlink);
-+ goto out_fill; /* success */
-+ }
-+ AuTraceErr(err);
-+ goto out_unlock;
++void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
++ struct dentry *h_wh)
++{
++ struct au_hinode *hinode;
+
-+out_fill:
-+ generic_fillattr(inode, st);
-+out_unlock:
-+ di_read_unlock(dentry, AuLock_IR);
-+ si_read_unlock(sb);
-+out:
-+ AuTraceErr(err);
-+ return err;
++ IiMustWriteLock(inode);
++
++ hinode = au_ii(inode)->ii_hinode + bindex;
++ AuDebugOn(hinode->hi_whdentry);
++ hinode->hi_whdentry = h_wh;
+}
+
-+/* ---------------------------------------------------------------------- */
++void au_update_iigen(struct inode *inode)
++{
++ atomic_set(&au_ii(inode)->ii_generation, au_sigen(inode->i_sb));
++ /* smp_mb(); */ /* atomic_set */
++}
+
-+static int h_readlink(struct dentry *dentry, int bindex, char __user *buf,
-+ int bufsiz)
++/* it may be called at remount time, too */
++void au_update_ibrange(struct inode *inode, int do_put_zero)
+{
-+ int err;
-+ struct super_block *sb;
-+ struct dentry *h_dentry;
++ struct au_iinfo *iinfo;
++ aufs_bindex_t bindex, bend;
+
-+ err = -EINVAL;
-+ h_dentry = au_h_dptr(dentry, bindex);
-+ if (unlikely(!h_dentry->d_inode->i_op->readlink))
-+ goto out;
++ iinfo = au_ii(inode);
++ if (!iinfo)
++ return;
+
-+ err = security_inode_readlink(h_dentry);
-+ if (unlikely(err))
-+ goto out;
++ IiMustWriteLock(inode);
+
-+ sb = dentry->d_sb;
-+ if (!au_test_ro(sb, bindex, dentry->d_inode)) {
-+ vfsub_touch_atime(au_sbr_mnt(sb, bindex), h_dentry);
-+ fsstack_copy_attr_atime(dentry->d_inode, h_dentry->d_inode);
++ if (do_put_zero && iinfo->ii_bstart >= 0) {
++ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
++ bindex++) {
++ struct inode *h_i;
++
++ h_i = iinfo->ii_hinode[0 + bindex].hi_inode;
++ if (h_i && !h_i->i_nlink)
++ au_set_h_iptr(inode, bindex, NULL, 0);
++ }
+ }
-+ err = h_dentry->d_inode->i_op->readlink(h_dentry, buf, bufsiz);
+
-+out:
-+ return err;
++ iinfo->ii_bstart = -1;
++ iinfo->ii_bend = -1;
++ bend = au_sbend(inode->i_sb);
++ for (bindex = 0; bindex <= bend; bindex++)
++ if (iinfo->ii_hinode[0 + bindex].hi_inode) {
++ iinfo->ii_bstart = bindex;
++ break;
++ }
++ if (iinfo->ii_bstart >= 0)
++ for (bindex = bend; bindex >= iinfo->ii_bstart; bindex--)
++ if (iinfo->ii_hinode[0 + bindex].hi_inode) {
++ iinfo->ii_bend = bindex;
++ break;
++ }
++ AuDebugOn(iinfo->ii_bstart > iinfo->ii_bend);
+}
+
-+static int aufs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
-+{
-+ int err;
++/* ---------------------------------------------------------------------- */
+
-+ err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
-+ if (unlikely(err))
-+ goto out;
-+ err = au_d_hashed_positive(dentry);
-+ if (!err)
-+ err = h_readlink(dentry, au_dbstart(dentry), buf, bufsiz);
-+ aufs_read_unlock(dentry, AuLock_IR);
++void au_icntnr_init_once(void *_c)
++{
++ struct au_icntnr *c = _c;
++ struct au_iinfo *iinfo = &c->iinfo;
++ static struct lock_class_key aufs_ii;
+
-+out:
-+ return err;
++ au_rw_init(&iinfo->ii_rwsem);
++ au_rw_class(&iinfo->ii_rwsem, &aufs_ii);
++ inode_init_once(&c->vfs_inode);
+}
+
-+static void *aufs_follow_link(struct dentry *dentry, struct nameidata *nd)
++int au_iinfo_init(struct inode *inode)
+{
-+ int err;
-+ mm_segment_t old_fs;
-+ union {
-+ char *k;
-+ char __user *u;
-+ } buf;
-+
-+ err = -ENOMEM;
-+ buf.k = __getname_gfp(GFP_NOFS);
-+ if (unlikely(!buf.k))
-+ goto out;
-+
-+ err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
-+ if (unlikely(err))
-+ goto out_name;
++ struct au_iinfo *iinfo;
++ struct super_block *sb;
++ int nbr, i;
+
-+ err = au_d_hashed_positive(dentry);
-+ if (!err) {
-+ old_fs = get_fs();
-+ set_fs(KERNEL_DS);
-+ err = h_readlink(dentry, au_dbstart(dentry), buf.u, PATH_MAX);
-+ set_fs(old_fs);
-+ }
-+ aufs_read_unlock(dentry, AuLock_IR);
++ sb = inode->i_sb;
++ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
++ nbr = au_sbend(sb) + 1;
++ if (unlikely(nbr <= 0))
++ nbr = 1;
++ iinfo->ii_hinode = kcalloc(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS);
++ if (iinfo->ii_hinode) {
++ au_ninodes_inc(sb);
++ for (i = 0; i < nbr; i++)
++ iinfo->ii_hinode[i].hi_id = -1;
+
-+ if (err >= 0) {
-+ buf.k[err] = 0;
-+ /* will be freed by put_link */
-+ nd_set_link(nd, buf.k);
-+ return NULL; /* success */
++ atomic_set(&iinfo->ii_generation, au_sigen(sb));
++ /* smp_mb(); */ /* atomic_set */
++ iinfo->ii_bstart = -1;
++ iinfo->ii_bend = -1;
++ iinfo->ii_vdir = NULL;
++ return 0;
+ }
-+
-+out_name:
-+ __putname(buf.k);
-+out:
-+ path_put(&nd->path);
-+ AuTraceErr(err);
-+ return ERR_PTR(err);
++ return -ENOMEM;
+}
+
-+static void aufs_put_link(struct dentry *dentry __maybe_unused,
-+ struct nameidata *nd, void *cookie __maybe_unused)
++int au_ii_realloc(struct au_iinfo *iinfo, int nbr)
+{
-+ __putname(nd_get_link(nd));
-+}
++ int err, sz;
++ struct au_hinode *hip;
+
-+/* ---------------------------------------------------------------------- */
++ AuRwMustWriteLock(&iinfo->ii_rwsem);
+
-+static void aufs_truncate_range(struct inode *inode __maybe_unused,
-+ loff_t start __maybe_unused,
-+ loff_t end __maybe_unused)
-+{
-+ AuUnsupport();
++ err = -ENOMEM;
++ sz = sizeof(*hip) * (iinfo->ii_bend + 1);
++ if (!sz)
++ sz = sizeof(*hip);
++ hip = au_kzrealloc(iinfo->ii_hinode, sz, sizeof(*hip) * nbr, GFP_NOFS);
++ if (hip) {
++ iinfo->ii_hinode = hip;
++ err = 0;
++ }
++
++ return err;
+}
+
-+/* ---------------------------------------------------------------------- */
++void au_iinfo_fin(struct inode *inode)
++{
++ struct au_iinfo *iinfo;
++ struct au_hinode *hi;
++ struct super_block *sb;
++ aufs_bindex_t bindex, bend;
++ const unsigned char unlinked = !inode->i_nlink;
+
-+struct inode_operations aufs_symlink_iop = {
-+ .permission = aufs_permission,
-+ .setattr = aufs_setattr,
-+ .getattr = aufs_getattr,
-+ .readlink = aufs_readlink,
-+ .follow_link = aufs_follow_link,
-+ .put_link = aufs_put_link
-+};
++ iinfo = au_ii(inode);
++ /* bad_inode case */
++ if (!iinfo)
++ return;
+
-+struct inode_operations aufs_dir_iop = {
-+ .create = aufs_create,
-+ .lookup = aufs_lookup,
-+ .link = aufs_link,
-+ .unlink = aufs_unlink,
-+ .symlink = aufs_symlink,
-+ .mkdir = aufs_mkdir,
-+ .rmdir = aufs_rmdir,
-+ .mknod = aufs_mknod,
-+ .rename = aufs_rename,
++ sb = inode->i_sb;
++ au_ninodes_dec(sb);
++ if (si_pid_test(sb))
++ au_xino_delete_inode(inode, unlinked);
++ else {
++ /*
++ * it is safe to hide the dependency between sbinfo and
++ * sb->s_umount.
++ */
++ lockdep_off();
++ si_noflush_read_lock(sb);
++ au_xino_delete_inode(inode, unlinked);
++ si_read_unlock(sb);
++ lockdep_on();
++ }
+
-+ .permission = aufs_permission,
-+ .setattr = aufs_setattr,
-+ .getattr = aufs_getattr
-+};
++ if (iinfo->ii_vdir)
++ au_vdir_free(iinfo->ii_vdir);
+
-+struct inode_operations aufs_iop = {
-+ .permission = aufs_permission,
-+ .setattr = aufs_setattr,
-+ .getattr = aufs_getattr,
-+ .truncate_range = aufs_truncate_range
-+};
-diff -urN a/fs/aufs/i_op_del.c b/fs/aufs/i_op_del.c
---- a/fs/aufs/i_op_del.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/i_op_del.c 2011-03-06 23:28:02.616413258 +0000
-@@ -0,0 +1,481 @@
++ bindex = iinfo->ii_bstart;
++ if (bindex >= 0) {
++ hi = iinfo->ii_hinode + bindex;
++ bend = iinfo->ii_bend;
++ while (bindex++ <= bend) {
++ if (hi->hi_inode)
++ au_hiput(hi);
++ hi++;
++ }
++ }
++ kfree(iinfo->ii_hinode);
++ iinfo->ii_hinode = NULL;
++ AuRwDestroy(&iinfo->ii_rwsem);
++}
+--- a/fs/aufs/inode.c 1970-01-01 01:00:00.000000000 +0100
++++ b/fs/aufs/inode.c 2011-02-12 16:30:08.944127798 +0000
+@@ -0,0 +1,471 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
+ *
@@ -15360,472 +15730,461 @@
+ */
+
+/*
-+ * inode operations (del entry)
++ * inode functions
+ */
+
+#include "aufs.h"
+
-+/*
-+ * decide if a new whiteout for @dentry is necessary or not.
-+ * when it is necessary, prepare the parent dir for the upper branch whose
-+ * branch index is @bcpup for creation. the actual creation of the whiteout will
-+ * be done by caller.
-+ * return value:
-+ * 0: wh is unnecessary
-+ * plus: wh is necessary
-+ * minus: error
-+ */
-+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup)
++struct inode *au_igrab(struct inode *inode)
+{
-+ int need_wh, err;
-+ aufs_bindex_t bstart;
++ if (inode) {
++ AuDebugOn(!atomic_read(&inode->i_count));
++ ihold(inode);
++ }
++ return inode;
++}
++
++static void au_refresh_hinode_attr(struct inode *inode, int do_version)
++{
++ au_cpup_attr_all(inode, /*force*/0);
++ au_update_iigen(inode);
++ if (do_version)
++ inode->i_version++;
++}
++
++static int au_ii_refresh(struct inode *inode, int *update)
++{
++ int err, e;
++ umode_t type;
++ aufs_bindex_t bindex, new_bindex;
+ struct super_block *sb;
++ struct au_iinfo *iinfo;
++ struct au_hinode *p, *q, tmp;
+
-+ sb = dentry->d_sb;
-+ bstart = au_dbstart(dentry);
-+ if (*bcpup < 0) {
-+ *bcpup = bstart;
-+ if (au_test_ro(sb, bstart, dentry->d_inode)) {
-+ err = AuWbrCopyup(au_sbi(sb), dentry);
-+ *bcpup = err;
-+ if (unlikely(err < 0))
-+ goto out;
-+ }
-+ } else
-+ AuDebugOn(bstart < *bcpup
-+ || au_test_ro(sb, *bcpup, dentry->d_inode));
-+ AuDbg("bcpup %d, bstart %d\n", *bcpup, bstart);
++ IiMustWriteLock(inode);
+
-+ if (*bcpup != bstart) {
-+ err = au_cpup_dirs(dentry, *bcpup);
-+ if (unlikely(err))
-+ goto out;
-+ need_wh = 1;
-+ } else {
-+ struct au_dinfo *dinfo, *tmp;
++ *update = 0;
++ sb = inode->i_sb;
++ type = inode->i_mode & S_IFMT;
++ iinfo = au_ii(inode);
++ err = au_ii_realloc(iinfo, au_sbend(sb) + 1);
++ if (unlikely(err))
++ goto out;
+
-+ need_wh = -ENOMEM;
-+ dinfo = au_di(dentry);
-+ tmp = au_di_alloc(sb, AuLsc_DI_TMP);
-+ if (tmp) {
-+ au_di_cp(tmp, dinfo);
-+ au_di_swap(tmp, dinfo);
-+ /* returns the number of positive dentries */
-+ need_wh = au_lkup_dentry(dentry, bstart + 1, /*type*/0,
-+ /*nd*/NULL);
-+ au_di_swap(tmp, dinfo);
-+ au_rw_write_unlock(&tmp->di_rwsem);
-+ au_di_free(tmp);
++ AuDebugOn(iinfo->ii_bstart < 0);
++ p = iinfo->ii_hinode + iinfo->ii_bstart;
++ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
++ bindex++, p++) {
++ if (!p->hi_inode)
++ continue;
++
++ AuDebugOn(type != (p->hi_inode->i_mode & S_IFMT));
++ new_bindex = au_br_index(sb, p->hi_id);
++ if (new_bindex == bindex)
++ continue;
++
++ if (new_bindex < 0) {
++ *update = 1;
++ au_hiput(p);
++ p->hi_inode = NULL;
++ continue;
++ }
++
++ if (new_bindex < iinfo->ii_bstart)
++ iinfo->ii_bstart = new_bindex;
++ if (iinfo->ii_bend < new_bindex)
++ iinfo->ii_bend = new_bindex;
++ /* swap two lower inode, and loop again */
++ q = iinfo->ii_hinode + new_bindex;
++ tmp = *q;
++ *q = *p;
++ *p = tmp;
++ if (tmp.hi_inode) {
++ bindex--;
++ p--;
+ }
+ }
-+ AuDbg("need_wh %d\n", need_wh);
-+ err = need_wh;
++ au_update_ibrange(inode, /*do_put_zero*/0);
++ e = au_dy_irefresh(inode);
++ if (unlikely(e && !err))
++ err = e;
+
+out:
++ AuTraceErr(err);
+ return err;
+}
+
-+/*
-+ * simple tests for the del-entry operations.
-+ * following the checks in vfs, plus the parent-child relationship.
-+ */
-+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
-+ struct dentry *h_parent, int isdir)
++int au_refresh_hinode_self(struct inode *inode)
+{
-+ int err;
-+ umode_t h_mode;
-+ struct dentry *h_dentry, *h_latest;
-+ struct inode *h_inode;
-+
-+ h_dentry = au_h_dptr(dentry, bindex);
-+ h_inode = h_dentry->d_inode;
-+ if (dentry->d_inode) {
-+ err = -ENOENT;
-+ if (unlikely(!h_inode || !h_inode->i_nlink))
-+ goto out;
-+
-+ h_mode = h_inode->i_mode;
-+ if (!isdir) {
-+ err = -EISDIR;
-+ if (unlikely(S_ISDIR(h_mode)))
-+ goto out;
-+ } else if (unlikely(!S_ISDIR(h_mode))) {
-+ err = -ENOTDIR;
-+ goto out;
-+ }
-+ } else {
-+ /* rename(2) case */
-+ err = -EIO;
-+ if (unlikely(h_inode))
-+ goto out;
-+ }
-+
-+ err = -ENOENT;
-+ /* expected parent dir is locked */
-+ if (unlikely(h_parent != h_dentry->d_parent))
-+ goto out;
-+ err = 0;
-+
-+ /*
-+ * rmdir a dir may break the consistency on some filesystem.
-+ * let's try heavy test.
-+ */
-+ err = -EACCES;
-+ if (unlikely(au_test_h_perm(h_parent->d_inode, MAY_EXEC | MAY_WRITE)))
-+ goto out;
++ int err, update;
+
-+ h_latest = au_sio_lkup_one(&dentry->d_name, h_parent,
-+ au_sbr(dentry->d_sb, bindex));
-+ err = -EIO;
-+ if (IS_ERR(h_latest))
-+ goto out;
-+ if (h_latest == h_dentry)
-+ err = 0;
-+ dput(h_latest);
++ err = au_ii_refresh(inode, &update);
++ if (!err)
++ au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode));
+
-+out:
++ AuTraceErr(err);
+ return err;
+}
+
-+/*
-+ * decide the branch where we operate for @dentry. the branch index will be set
-+ * @rbcpup. after diciding it, 'pin' it and store the timestamps of the parent
-+ * dir for reverting.
-+ * when a new whiteout is necessary, create it.
-+ */
-+static struct dentry*
-+lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup,
-+ struct au_dtime *dt, struct au_pin *pin)
++int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
+{
-+ struct dentry *wh_dentry;
-+ struct super_block *sb;
-+ struct path h_path;
-+ int err, need_wh;
-+ unsigned int udba;
-+ aufs_bindex_t bcpup;
-+
-+ need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup);
-+ wh_dentry = ERR_PTR(need_wh);
-+ if (unlikely(need_wh < 0))
-+ goto out;
++ int err, e, update;
++ unsigned int flags;
++ umode_t mode;
++ aufs_bindex_t bindex, bend;
++ unsigned char isdir;
++ struct au_hinode *p;
++ struct au_iinfo *iinfo;
+
-+ sb = dentry->d_sb;
-+ udba = au_opt_udba(sb);
-+ bcpup = *rbcpup;
-+ err = au_pin(pin, dentry, bcpup, udba,
-+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
-+ wh_dentry = ERR_PTR(err);
++ err = au_ii_refresh(inode, &update);
+ if (unlikely(err))
+ goto out;
+
-+ h_path.dentry = au_pinned_h_parent(pin);
-+ if (udba != AuOpt_UDBA_NONE
-+ && au_dbstart(dentry) == bcpup) {
-+ err = au_may_del(dentry, bcpup, h_path.dentry, isdir);
-+ wh_dentry = ERR_PTR(err);
-+ if (unlikely(err))
-+ goto out_unpin;
-+ }
-+
-+ h_path.mnt = au_sbr_mnt(sb, bcpup);
-+ au_dtime_store(dt, au_pinned_parent(pin), &h_path);
-+ wh_dentry = NULL;
-+ if (!need_wh)
-+ goto out; /* success, no need to create whiteout */
++ update = 0;
++ iinfo = au_ii(inode);
++ p = iinfo->ii_hinode + iinfo->ii_bstart;
++ mode = (inode->i_mode & S_IFMT);
++ isdir = S_ISDIR(mode);
++ flags = au_hi_flags(inode, isdir);
++ bend = au_dbend(dentry);
++ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
++ struct inode *h_i;
++ struct dentry *h_d;
+
-+ wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry);
-+ if (IS_ERR(wh_dentry))
-+ goto out_unpin;
++ h_d = au_h_dptr(dentry, bindex);
++ if (!h_d || !h_d->d_inode)
++ continue;
+
-+ /* returns with the parent is locked and wh_dentry is dget-ed */
-+ goto out; /* success */
++ AuDebugOn(mode != (h_d->d_inode->i_mode & S_IFMT));
++ if (iinfo->ii_bstart <= bindex && bindex <= iinfo->ii_bend) {
++ h_i = au_h_iptr(inode, bindex);
++ if (h_i) {
++ if (h_i == h_d->d_inode)
++ continue;
++ err = -EIO;
++ break;
++ }
++ }
++ if (bindex < iinfo->ii_bstart)
++ iinfo->ii_bstart = bindex;
++ if (iinfo->ii_bend < bindex)
++ iinfo->ii_bend = bindex;
++ au_set_h_iptr(inode, bindex, au_igrab(h_d->d_inode), flags);
++ update = 1;
++ }
++ au_update_ibrange(inode, /*do_put_zero*/0);
++ e = au_dy_irefresh(inode);
++ if (unlikely(e && !err))
++ err = e;
++ if (!err)
++ au_refresh_hinode_attr(inode, update && isdir);
+
-+out_unpin:
-+ au_unpin(pin);
+out:
-+ return wh_dentry;
++ AuTraceErr(err);
++ return err;
+}
+
-+/*
-+ * when removing a dir, rename it to a unique temporary whiteout-ed name first
-+ * in order to be revertible and save time for removing many child whiteouts
-+ * under the dir.
-+ * returns 1 when there are too many child whiteout and caller should remove
-+ * them asynchronously. returns 0 when the number of children is enough small to
-+ * remove now or the branch fs is a remote fs.
-+ * otherwise return an error.
-+ */
-+static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex,
-+ struct au_nhash *whlist, struct inode *dir)
++static int set_inode(struct inode *inode, struct dentry *dentry)
+{
-+ int rmdir_later, err, dirwh;
++ int err;
++ unsigned int flags;
++ umode_t mode;
++ aufs_bindex_t bindex, bstart, btail;
++ unsigned char isdir;
+ struct dentry *h_dentry;
-+ struct super_block *sb;
-+
-+ sb = dentry->d_sb;
-+ SiMustAnyLock(sb);
-+ h_dentry = au_h_dptr(dentry, bindex);
-+ err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex));
-+ if (unlikely(err))
-+ goto out;
++ struct inode *h_inode;
++ struct au_iinfo *iinfo;
+
-+ /* stop monitoring */
-+ au_hn_free(au_hi(dentry->d_inode, bindex));
++ IiMustWriteLock(inode);
+
-+ if (!au_test_fs_remote(h_dentry->d_sb)) {
-+ dirwh = au_sbi(sb)->si_dirwh;
-+ rmdir_later = (dirwh <= 1);
-+ if (!rmdir_later)
-+ rmdir_later = au_nhash_test_longer_wh(whlist, bindex,
-+ dirwh);
-+ if (rmdir_later)
-+ return rmdir_later;
++ err = 0;
++ isdir = 0;
++ bstart = au_dbstart(dentry);
++ h_inode = au_h_dptr(dentry, bstart)->d_inode;
++ mode = h_inode->i_mode;
++ switch (mode & S_IFMT) {
++ case S_IFREG:
++ btail = au_dbtail(dentry);
++ inode->i_op = &aufs_iop;
++ inode->i_fop = &aufs_file_fop;
++ err = au_dy_iaop(inode, bstart, h_inode);
++ if (unlikely(err))
++ goto out;
++ break;
++ case S_IFDIR:
++ isdir = 1;
++ btail = au_dbtaildir(dentry);
++ inode->i_op = &aufs_dir_iop;
++ inode->i_fop = &aufs_dir_fop;
++ break;
++ case S_IFLNK:
++ btail = au_dbtail(dentry);
++ inode->i_op = &aufs_symlink_iop;
++ break;
++ case S_IFBLK:
++ case S_IFCHR:
++ case S_IFIFO:
++ case S_IFSOCK:
++ btail = au_dbtail(dentry);
++ inode->i_op = &aufs_iop;
++ au_init_special_fop(inode, mode, h_inode->i_rdev);
++ break;
++ default:
++ AuIOErr("Unknown file type 0%o\n", mode);
++ err = -EIO;
++ goto out;
+ }
+
-+ err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist);
-+ if (unlikely(err)) {
-+ AuIOErr("rmdir %.*s, b%d failed, %d. ignored\n",
-+ AuDLNPair(h_dentry), bindex, err);
-+ err = 0;
++ /* do not set hnotify for whiteouted dirs (SHWH mode) */
++ flags = au_hi_flags(inode, isdir);
++ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)
++ && au_ftest_hi(flags, HNOTIFY)
++ && dentry->d_name.len > AUFS_WH_PFX_LEN
++ && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))
++ au_fclr_hi(flags, HNOTIFY);
++ iinfo = au_ii(inode);
++ iinfo->ii_bstart = bstart;
++ iinfo->ii_bend = btail;
++ for (bindex = bstart; bindex <= btail; bindex++) {
++ h_dentry = au_h_dptr(dentry, bindex);
++ if (h_dentry)
++ au_set_h_iptr(inode, bindex,
++ au_igrab(h_dentry->d_inode), flags);
+ }
++ au_cpup_attr_all(inode, /*force*/1);
+
+out:
-+ AuTraceErr(err);
+ return err;
+}
+
+/*
-+ * final procedure for deleting a entry.
-+ * maintain dentry and iattr.
++ * successful returns with iinfo write_locked
++ * minus: errno
++ * zero: success, matched
++ * plus: no error, but unmatched
+ */
-+static void epilog(struct inode *dir, struct dentry *dentry,
-+ aufs_bindex_t bindex)
++static int reval_inode(struct inode *inode, struct dentry *dentry)
+{
-+ struct inode *inode;
-+
-+ inode = dentry->d_inode;
-+ d_drop(dentry);
-+ inode->i_ctime = dir->i_ctime;
-+
-+ if (au_ibstart(dir) == bindex)
-+ au_cpup_attr_timesizes(dir);
-+ dir->i_version++;
-+}
++ int err;
++ aufs_bindex_t bindex, bend;
++ struct inode *h_inode, *h_dinode;
+
-+/*
-+ * when an error happened, remove the created whiteout and revert everything.
-+ */
-+static int do_revert(int err, struct inode *dir, aufs_bindex_t bindex,
-+ aufs_bindex_t bwh, struct dentry *wh_dentry,
-+ struct dentry *dentry, struct au_dtime *dt)
-+{
-+ int rerr;
-+ struct path h_path = {
-+ .dentry = wh_dentry,
-+ .mnt = au_sbr_mnt(dir->i_sb, bindex)
-+ };
++ /*
++ * before this function, if aufs got any iinfo lock, it must be only
++ * one, the parent dir.
++ * it can happen by UDBA and the obsoleted inode number.
++ */
++ err = -EIO;
++ if (unlikely(inode->i_ino == parent_ino(dentry)))
++ goto out;
+
-+ rerr = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, dentry);
-+ if (!rerr) {
-+ au_set_dbwh(dentry, bwh);
-+ au_dtime_revert(dt);
-+ return 0;
++ err = 1;
++ ii_write_lock_new_child(inode);
++ h_dinode = au_h_dptr(dentry, au_dbstart(dentry))->d_inode;
++ bend = au_ibend(inode);
++ for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
++ h_inode = au_h_iptr(inode, bindex);
++ if (h_inode && h_inode == h_dinode) {
++ err = 0;
++ if (au_iigen_test(inode, au_digen(dentry)))
++ err = au_refresh_hinode(inode, dentry);
++ break;
++ }
+ }
+
-+ AuIOErr("%.*s reverting whiteout failed(%d, %d)\n",
-+ AuDLNPair(dentry), err, rerr);
-+ return -EIO;
-+}
-+
-+/* ---------------------------------------------------------------------- */
++ if (unlikely(err))
++ ii_write_unlock(inode);
++out:
++ return err;
++}
+
-+int aufs_unlink(struct inode *dir, struct dentry *dentry)
++int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
++ unsigned int d_type, ino_t *ino)
+{
+ int err;
-+ aufs_bindex_t bwh, bindex, bstart;
-+ struct au_dtime dt;
-+ struct au_pin pin;
-+ struct path h_path;
-+ struct inode *inode, *h_dir;
-+ struct dentry *parent, *wh_dentry;
-+
-+ IMustLock(dir);
++ struct mutex *mtx;
+
-+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
++ /* prevent hardlinked inode number from race condition */
++ mtx = NULL;
++ if (d_type != DT_DIR) {
++ mtx = &au_sbr(sb, bindex)->br_xino.xi_nondir_mtx;
++ mutex_lock(mtx);
++ }
++ err = au_xino_read(sb, bindex, h_ino, ino);
+ if (unlikely(err))
+ goto out;
-+ err = au_d_hashed_positive(dentry);
-+ if (unlikely(err))
-+ goto out_unlock;
-+ inode = dentry->d_inode;
-+ IMustLock(inode);
-+ err = -EISDIR;
-+ if (unlikely(S_ISDIR(inode->i_mode)))
-+ goto out_unlock; /* possible? */
-+
-+ bstart = au_dbstart(dentry);
-+ bwh = au_dbwh(dentry);
-+ bindex = -1;
-+ parent = dentry->d_parent; /* dir inode is locked */
-+ di_write_lock_parent(parent);
-+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &dt, &pin);
-+ err = PTR_ERR(wh_dentry);
-+ if (IS_ERR(wh_dentry))
-+ goto out_parent;
-+
-+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
-+ h_path.dentry = au_h_dptr(dentry, bstart);
-+ dget(h_path.dentry);
-+ if (bindex == bstart) {
-+ h_dir = au_pinned_h_dir(&pin);
-+ err = vfsub_unlink(h_dir, &h_path, /*force*/0);
-+ } else {
-+ /* dir inode is locked */
-+ h_dir = wh_dentry->d_parent->d_inode;
-+ IMustLock(h_dir);
-+ err = 0;
-+ }
-+
-+ if (!err) {
-+ vfsub_drop_nlink(inode);
-+ epilog(dir, dentry, bindex);
-+
-+ /* update target timestamps */
-+ if (bindex == bstart) {
-+ vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
-+ inode->i_ctime = h_path.dentry->d_inode->i_ctime;
-+ } else
-+ /* todo: this timestamp may be reverted later */
-+ inode->i_ctime = h_dir->i_ctime;
-+ goto out_unpin; /* success */
-+ }
-+
-+ /* revert */
-+ if (wh_dentry) {
-+ int rerr;
+
-+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry, &dt);
-+ if (rerr)
-+ err = rerr;
++ if (!*ino) {
++ err = -EIO;
++ *ino = au_xino_new_ino(sb);
++ if (unlikely(!*ino))
++ goto out;
++ err = au_xino_write(sb, bindex, h_ino, *ino);
++ if (unlikely(err))
++ goto out;
+ }
+
-+out_unpin:
-+ au_unpin(&pin);
-+ dput(wh_dentry);
-+ dput(h_path.dentry);
-+out_parent:
-+ di_write_unlock(parent);
-+out_unlock:
-+ aufs_read_unlock(dentry, AuLock_DW);
+out:
++ if (mtx)
++ mutex_unlock(mtx);
+ return err;
+}
+
-+int aufs_rmdir(struct inode *dir, struct dentry *dentry)
++/* successful returns with iinfo write_locked */
++/* todo: return with unlocked? */
++struct inode *au_new_inode(struct dentry *dentry, int must_new)
+{
-+ int err, rmdir_later;
-+ aufs_bindex_t bwh, bindex, bstart;
-+ struct au_dtime dt;
-+ struct au_pin pin;
-+ struct inode *inode;
-+ struct dentry *parent, *wh_dentry, *h_dentry;
-+ struct au_whtmp_rmdir *args;
++ struct inode *inode, *h_inode;
++ struct dentry *h_dentry;
++ struct super_block *sb;
++ struct mutex *mtx;
++ ino_t h_ino, ino;
++ int err;
++ aufs_bindex_t bstart;
+
-+ IMustLock(dir);
++ sb = dentry->d_sb;
++ bstart = au_dbstart(dentry);
++ h_dentry = au_h_dptr(dentry, bstart);
++ h_inode = h_dentry->d_inode;
++ h_ino = h_inode->i_ino;
+
-+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
++ /*
++ * stop 'race'-ing between hardlinks under different
++ * parents.
++ */
++ mtx = NULL;
++ if (!S_ISDIR(h_inode->i_mode))
++ mtx = &au_sbr(sb, bstart)->br_xino.xi_nondir_mtx;
++
++new_ino:
++ if (mtx)
++ mutex_lock(mtx);
++ err = au_xino_read(sb, bstart, h_ino, &ino);
++ inode = ERR_PTR(err);
+ if (unlikely(err))
+ goto out;
+
-+ /* VFS already unhashes it */
-+ inode = dentry->d_inode;
-+ err = -ENOENT;
-+ if (unlikely(!inode || !inode->i_nlink
-+ || IS_DEADDIR(inode)))
-+ goto out_unlock;
-+ IMustLock(inode);
-+ err = -ENOTDIR;
-+ if (unlikely(!S_ISDIR(inode->i_mode)))
-+ goto out_unlock; /* possible? */
-+
-+ err = -ENOMEM;
-+ args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS);
-+ if (unlikely(!args))
-+ goto out_unlock;
++ if (!ino) {
++ ino = au_xino_new_ino(sb);
++ if (unlikely(!ino)) {
++ inode = ERR_PTR(-EIO);
++ goto out;
++ }
++ }
+
-+ parent = dentry->d_parent; /* dir inode is locked */
-+ di_write_lock_parent(parent);
-+ err = au_test_empty(dentry, &args->whlist);
-+ if (unlikely(err))
-+ goto out_parent;
++ AuDbg("i%lu\n", (unsigned long)ino);
++ inode = au_iget_locked(sb, ino);
++ err = PTR_ERR(inode);
++ if (IS_ERR(inode))
++ goto out;
+
-+ bstart = au_dbstart(dentry);
-+ bwh = au_dbwh(dentry);
-+ bindex = -1;
-+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &dt, &pin);
-+ err = PTR_ERR(wh_dentry);
-+ if (IS_ERR(wh_dentry))
-+ goto out_parent;
++ AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW));
++ if (inode->i_state & I_NEW) {
++ ii_write_lock_new_child(inode);
++ err = set_inode(inode, dentry);
++ if (!err) {
++ unlock_new_inode(inode);
++ goto out; /* success */
++ }
+
-+ h_dentry = au_h_dptr(dentry, bstart);
-+ dget(h_dentry);
-+ rmdir_later = 0;
-+ if (bindex == bstart) {
-+ err = renwh_and_rmdir(dentry, bstart, &args->whlist, dir);
-+ if (err > 0) {
-+ rmdir_later = err;
-+ err = 0;
++ /*
++ * iget_failed() calls iput(), but we need to call
++ * ii_write_unlock() after iget_failed(). so dirty hack for
++ * i_count.
++ */
++ atomic_inc(&inode->i_count);
++ iget_failed(inode);
++ ii_write_unlock(inode);
++ au_xino_write(sb, bstart, h_ino, /*ino*/0);
++ /* ignore this error */
++ goto out_iput;
++ } else if (!must_new && !IS_DEADDIR(inode) && inode->i_nlink) {
++ /*
++ * horrible race condition between lookup, readdir and copyup
++ * (or something).
++ */
++ if (mtx)
++ mutex_unlock(mtx);
++ err = reval_inode(inode, dentry);
++ if (unlikely(err < 0)) {
++ mtx = NULL;
++ goto out_iput;
+ }
-+ } else {
-+ /* stop monitoring */
-+ au_hn_free(au_hi(inode, bstart));
+
-+ /* dir inode is locked */
-+ IMustLock(wh_dentry->d_parent->d_inode);
-+ err = 0;
++ if (!err) {
++ mtx = NULL;
++ goto out; /* success */
++ } else if (mtx)
++ mutex_lock(mtx);
+ }
+
++ if (unlikely(au_test_fs_unique_ino(h_dentry->d_inode)))
++ AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir,"
++ " b%d, %s, %.*s, hi%lu, i%lu.\n",
++ bstart, au_sbtype(h_dentry->d_sb), AuDLNPair(dentry),
++ (unsigned long)h_ino, (unsigned long)ino);
++ ino = 0;
++ err = au_xino_write(sb, bstart, h_ino, /*ino*/0);
+ if (!err) {
-+ vfsub_dead_dir(inode);
-+ au_set_dbdiropq(dentry, -1);
-+ epilog(dir, dentry, bindex);
++ iput(inode);
++ if (mtx)
++ mutex_unlock(mtx);
++ goto new_ino;
++ }
+
-+ if (rmdir_later) {
-+ au_whtmp_kick_rmdir(dir, bstart, h_dentry, args);
-+ args = NULL;
-+ }
++out_iput:
++ iput(inode);
++ inode = ERR_PTR(err);
++out:
++ if (mtx)
++ mutex_unlock(mtx);
++ return inode;
++}
+
-+ goto out_unpin; /* success */
-+ }
++/* ---------------------------------------------------------------------- */
++
++int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
++ struct inode *inode)
++{
++ int err;
+
-+ /* revert */
-+ AuLabel(revert);
-+ if (wh_dentry) {
-+ int rerr;
++ err = au_br_rdonly(au_sbr(sb, bindex));
+
-+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry, &dt);
-+ if (rerr)
-+ err = rerr;
++ /* pseudo-link after flushed may happen out of bounds */
++ if (!err
++ && inode
++ && au_ibstart(inode) <= bindex
++ && bindex <= au_ibend(inode)) {
++ /*
++ * permission check is unnecessary since vfsub routine
++ * will be called later
++ */
++ struct inode *hi = au_h_iptr(inode, bindex);
++ if (hi)
++ err = IS_IMMUTABLE(hi) ? -EROFS : 0;
+ }
+
-+out_unpin:
-+ au_unpin(&pin);
-+ dput(wh_dentry);
-+ dput(h_dentry);
-+out_parent:
-+ di_write_unlock(parent);
-+ if (args)
-+ au_whtmp_rmdir_free(args);
-+out_unlock:
-+ aufs_read_unlock(dentry, AuLock_DW);
-+out:
-+ AuTraceErr(err);
+ return err;
+}
-diff -urN a/fs/aufs/i_op_ren.c b/fs/aufs/i_op_ren.c
---- a/fs/aufs/i_op_ren.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/i_op_ren.c 2011-03-06 23:28:02.616413258 +0000
-@@ -0,0 +1,1017 @@
++
++int au_test_h_perm(struct inode *h_inode, int mask)
++{
++ if (!current_fsuid())
++ return 0;
++ return inode_permission(h_inode, mask);
++}
++
++int au_test_h_perm_sio(struct inode *h_inode, int mask)
++{
++ if (au_test_nfs(h_inode->i_sb)
++ && (mask & MAY_WRITE)
++ && S_ISDIR(h_inode->i_mode))
++ mask |= MAY_READ; /* force permission check */
++ return au_test_h_perm(h_inode, mask);
++}
+--- a/fs/aufs/inode.h 1970-01-01 01:00:00.000000000 +0100
++++ b/fs/aufs/inode.h 2011-02-12 16:30:08.944127798 +0000
+@@ -0,0 +1,546 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
+ *
@@ -15845,1007 +16204,696 @@
+ */
+
+/*
-+ * inode operation (rename entry)
-+ * todo: this is crazy monster
-+ */
-+
-+#include "aufs.h"
-+
-+enum { AuSRC, AuDST, AuSrcDst };
-+enum { AuPARENT, AuCHILD, AuParentChild };
-+
-+#define AuRen_ISDIR 1
-+#define AuRen_ISSAMEDIR (1 << 1)
-+#define AuRen_WHSRC (1 << 2)
-+#define AuRen_WHDST (1 << 3)
-+#define AuRen_MNT_WRITE (1 << 4)
-+#define AuRen_DT_DSTDIR (1 << 5)
-+#define AuRen_DIROPQ (1 << 6)
-+#define AuRen_CPUP (1 << 7)
-+#define au_ftest_ren(flags, name) ((flags) & AuRen_##name)
-+#define au_fset_ren(flags, name) \
-+ do { (flags) |= AuRen_##name; } while (0)
-+#define au_fclr_ren(flags, name) \
-+ do { (flags) &= ~AuRen_##name; } while (0)
-+
-+struct au_ren_args {
-+ struct {
-+ struct dentry *dentry, *h_dentry, *parent, *h_parent,
-+ *wh_dentry;
-+ struct inode *dir, *inode;
-+ struct au_hinode *hdir;
-+ struct au_dtime dt[AuParentChild];
-+ aufs_bindex_t bstart;
-+ } sd[AuSrcDst];
-+
-+#define src_dentry sd[AuSRC].dentry
-+#define src_dir sd[AuSRC].dir
-+#define src_inode sd[AuSRC].inode
-+#define src_h_dentry sd[AuSRC].h_dentry
-+#define src_parent sd[AuSRC].parent
-+#define src_h_parent sd[AuSRC].h_parent
-+#define src_wh_dentry sd[AuSRC].wh_dentry
-+#define src_hdir sd[AuSRC].hdir
-+#define src_h_dir sd[AuSRC].hdir->hi_inode
-+#define src_dt sd[AuSRC].dt
-+#define src_bstart sd[AuSRC].bstart
-+
-+#define dst_dentry sd[AuDST].dentry
-+#define dst_dir sd[AuDST].dir
-+#define dst_inode sd[AuDST].inode
-+#define dst_h_dentry sd[AuDST].h_dentry
-+#define dst_parent sd[AuDST].parent
-+#define dst_h_parent sd[AuDST].h_parent
-+#define dst_wh_dentry sd[AuDST].wh_dentry
-+#define dst_hdir sd[AuDST].hdir
-+#define dst_h_dir sd[AuDST].hdir->hi_inode
-+#define dst_dt sd[AuDST].dt
-+#define dst_bstart sd[AuDST].bstart
-+
-+ struct dentry *h_trap;
-+ struct au_branch *br;
-+ struct au_hinode *src_hinode;
-+ struct path h_path;
-+ struct au_nhash whlist;
-+ aufs_bindex_t btgt, src_bwh, src_bdiropq;
-+
-+ unsigned int flags;
-+
-+ struct au_whtmp_rmdir *thargs;
-+ struct dentry *h_dst;
-+};
-+
-+/* ---------------------------------------------------------------------- */
-+
-+/*
-+ * functions for reverting.
-+ * when an error happened in a single rename systemcall, we should revert
-+ * everything as if nothing happend.
-+ * we don't need to revert the copied-up/down the parent dir since they are
-+ * harmless.
++ * inode operations
+ */
+
-+#define RevertFailure(fmt, ...) do { \
-+ AuIOErr("revert failure: " fmt " (%d, %d)\n", \
-+ ##__VA_ARGS__, err, rerr); \
-+ err = -EIO; \
-+} while (0)
-+
-+static void au_ren_rev_diropq(int err, struct au_ren_args *a)
-+{
-+ int rerr;
-+
-+ au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
-+ rerr = au_diropq_remove(a->src_dentry, a->btgt);
-+ au_hn_imtx_unlock(a->src_hinode);
-+ au_set_dbdiropq(a->src_dentry, a->src_bdiropq);
-+ if (rerr)
-+ RevertFailure("remove diropq %.*s", AuDLNPair(a->src_dentry));
-+}
-+
-+static void au_ren_rev_rename(int err, struct au_ren_args *a)
-+{
-+ int rerr;
-+
-+ a->h_path.dentry = au_lkup_one(&a->src_dentry->d_name, a->src_h_parent,
-+ a->br, /*nd*/NULL);
-+ rerr = PTR_ERR(a->h_path.dentry);
-+ if (IS_ERR(a->h_path.dentry)) {
-+ RevertFailure("au_lkup_one %.*s", AuDLNPair(a->src_dentry));
-+ return;
-+ }
-+
-+ rerr = vfsub_rename(a->dst_h_dir,
-+ au_h_dptr(a->src_dentry, a->btgt),
-+ a->src_h_dir, &a->h_path);
-+ d_drop(a->h_path.dentry);
-+ dput(a->h_path.dentry);
-+ /* au_set_h_dptr(a->src_dentry, a->btgt, NULL); */
-+ if (rerr)
-+ RevertFailure("rename %.*s", AuDLNPair(a->src_dentry));
-+}
-+
-+static void au_ren_rev_cpup(int err, struct au_ren_args *a)
-+{
-+ int rerr;
-+
-+ a->h_path.dentry = a->dst_h_dentry;
-+ rerr = vfsub_unlink(a->dst_h_dir, &a->h_path, /*force*/0);
-+ au_set_h_dptr(a->src_dentry, a->btgt, NULL);
-+ au_set_dbstart(a->src_dentry, a->src_bstart);
-+ if (rerr)
-+ RevertFailure("unlink %.*s", AuDLNPair(a->dst_h_dentry));
-+}
-+
-+static void au_ren_rev_whtmp(int err, struct au_ren_args *a)
-+{
-+ int rerr;
-+
-+ a->h_path.dentry = au_lkup_one(&a->dst_dentry->d_name, a->dst_h_parent,
-+ a->br, /*nd*/NULL);
-+ rerr = PTR_ERR(a->h_path.dentry);
-+ if (IS_ERR(a->h_path.dentry)) {
-+ RevertFailure("lookup %.*s", AuDLNPair(a->dst_dentry));
-+ return;
-+ }
-+ if (a->h_path.dentry->d_inode) {
-+ d_drop(a->h_path.dentry);
-+ dput(a->h_path.dentry);
-+ return;
-+ }
-+
-+ rerr = vfsub_rename(a->dst_h_dir, a->h_dst, a->dst_h_dir, &a->h_path);
-+ d_drop(a->h_path.dentry);
-+ dput(a->h_path.dentry);
-+ if (!rerr)
-+ au_set_h_dptr(a->dst_dentry, a->btgt, dget(a->h_dst));
-+ else
-+ RevertFailure("rename %.*s", AuDLNPair(a->h_dst));
-+}
-+
-+static void au_ren_rev_whsrc(int err, struct au_ren_args *a)
-+{
-+ int rerr;
++#ifndef __AUFS_INODE_H__
++#define __AUFS_INODE_H__
+
-+ a->h_path.dentry = a->src_wh_dentry;
-+ rerr = au_wh_unlink_dentry(a->src_h_dir, &a->h_path, a->src_dentry);
-+ au_set_dbwh(a->src_dentry, a->src_bwh);
-+ if (rerr)
-+ RevertFailure("unlink %.*s", AuDLNPair(a->src_wh_dentry));
-+}
-+#undef RevertFailure
++#ifdef __KERNEL__
+
-+/* ---------------------------------------------------------------------- */
++#include <linux/fs.h>
++#include <linux/fsnotify.h>
++#include <linux/aufs_type.h>
++#include "rwsem.h"
+
-+/*
-+ * when we have to copyup the renaming entry, do it with the rename-target name
-+ * in order to minimize the cost (the later actual rename is unnecessary).
-+ * otherwise rename it on the target branch.
-+ */
-+static int au_ren_or_cpup(struct au_ren_args *a)
-+{
-+ int err;
-+ struct dentry *d;
++struct vfsmount;
+
-+ d = a->src_dentry;
-+ if (au_dbstart(d) == a->btgt) {
-+ a->h_path.dentry = a->dst_h_dentry;
-+ if (au_ftest_ren(a->flags, DIROPQ)
-+ && au_dbdiropq(d) == a->btgt)
-+ au_fclr_ren(a->flags, DIROPQ);
-+ AuDebugOn(au_dbstart(d) != a->btgt);
-+ err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt),
-+ a->dst_h_dir, &a->h_path);
-+ } else {
-+ struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex;
-+ struct file *h_file;
++struct au_hnotify {
++#ifdef CONFIG_AUFS_HNOTIFY
++#ifdef CONFIG_AUFS_HFSNOTIFY
++ /* never use fsnotify_add_vfsmount_mark() */
++ struct fsnotify_mark hn_mark;
++ int hn_mark_dead;
++#endif
++ struct inode *hn_aufs_inode; /* no get/put */
++#endif
++} ____cacheline_aligned_in_smp;
+
-+ au_fset_ren(a->flags, CPUP);
-+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
-+ au_set_dbstart(d, a->btgt);
-+ au_set_h_dptr(d, a->btgt, dget(a->dst_h_dentry));
-+ h_file = au_h_open_pre(d, a->src_bstart);
-+ if (IS_ERR(h_file)) {
-+ err = PTR_ERR(h_file);
-+ h_file = NULL;
-+ } else
-+ err = au_sio_cpup_single(d, a->btgt, a->src_bstart, -1,
-+ !AuCpup_DTIME, a->dst_parent);
-+ mutex_unlock(h_mtx);
-+ au_h_open_post(d, a->src_bstart, h_file);
-+ if (!err) {
-+ d = a->dst_dentry;
-+ au_set_h_dptr(d, a->btgt, NULL);
-+ au_update_dbstart(d);
-+ } else {
-+ au_set_h_dptr(d, a->btgt, NULL);
-+ au_set_dbstart(d, a->src_bstart);
-+ }
-+ }
-+ if (!err && a->h_dst)
-+ /* it will be set to dinfo later */
-+ dget(a->h_dst);
++struct au_hinode {
++ struct inode *hi_inode;
++ aufs_bindex_t hi_id;
++#ifdef CONFIG_AUFS_HNOTIFY
++ struct au_hnotify *hi_notify;
++#endif
+
-+ return err;
-+}
++ /* reference to the copied-up whiteout with get/put */
++ struct dentry *hi_whdentry;
++};
+
-+/* cf. aufs_rmdir() */
-+static int au_ren_del_whtmp(struct au_ren_args *a)
-+{
-+ int err;
-+ struct inode *dir;
++struct au_vdir;
++struct au_iinfo {
++ atomic_t ii_generation;
++ struct super_block *ii_hsb1; /* no get/put */
+
-+ dir = a->dst_dir;
-+ SiMustAnyLock(dir->i_sb);
-+ if (!au_nhash_test_longer_wh(&a->whlist, a->btgt,
-+ au_sbi(dir->i_sb)->si_dirwh)
-+ || au_test_fs_remote(a->h_dst->d_sb)) {
-+ err = au_whtmp_rmdir(dir, a->btgt, a->h_dst, &a->whlist);
-+ if (unlikely(err))
-+ pr_warning("failed removing whtmp dir %.*s (%d), "
-+ "ignored.\n", AuDLNPair(a->h_dst), err);
-+ } else {
-+ au_nhash_wh_free(&a->thargs->whlist);
-+ a->thargs->whlist = a->whlist;
-+ a->whlist.nh_num = 0;
-+ au_whtmp_kick_rmdir(dir, a->btgt, a->h_dst, a->thargs);
-+ dput(a->h_dst);
-+ a->thargs = NULL;
-+ }
++ struct au_rwsem ii_rwsem;
++ aufs_bindex_t ii_bstart, ii_bend;
++ __u32 ii_higen;
++ struct au_hinode *ii_hinode;
++ struct au_vdir *ii_vdir;
++};
+
-+ return 0;
-+}
++struct au_icntnr {
++ struct au_iinfo iinfo;
++ struct inode vfs_inode;
++} ____cacheline_aligned_in_smp;
+
-+/* make it 'opaque' dir. */
-+static int au_ren_diropq(struct au_ren_args *a)
-+{
-+ int err;
-+ struct dentry *diropq;
++/* au_pin flags */
++#define AuPin_DI_LOCKED 1
++#define AuPin_MNT_WRITE (1 << 1)
++#define au_ftest_pin(flags, name) ((flags) & AuPin_##name)
++#define au_fset_pin(flags, name) \
++ do { (flags) |= AuPin_##name; } while (0)
++#define au_fclr_pin(flags, name) \
++ do { (flags) &= ~AuPin_##name; } while (0)
+
-+ err = 0;
-+ a->src_bdiropq = au_dbdiropq(a->src_dentry);
-+ a->src_hinode = au_hi(a->src_inode, a->btgt);
-+ au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
-+ diropq = au_diropq_create(a->src_dentry, a->btgt);
-+ au_hn_imtx_unlock(a->src_hinode);
-+ if (IS_ERR(diropq))
-+ err = PTR_ERR(diropq);
-+ dput(diropq);
++struct au_pin {
++ /* input */
++ struct dentry *dentry;
++ unsigned int udba;
++ unsigned char lsc_di, lsc_hi, flags;
++ aufs_bindex_t bindex;
+
-+ return err;
-+}
++ /* output */
++ struct dentry *parent;
++ struct au_hinode *hdir;
++ struct vfsmount *h_mnt;
++};
+
-+static int do_rename(struct au_ren_args *a)
++/* ---------------------------------------------------------------------- */
++
++static inline struct au_iinfo *au_ii(struct inode *inode)
+{
-+ int err;
-+ struct dentry *d, *h_d;
++ struct au_iinfo *iinfo;
+
-+ /* prepare workqueue args for asynchronous rmdir */
-+ h_d = a->dst_h_dentry;
-+ if (au_ftest_ren(a->flags, ISDIR) && h_d->d_inode) {
-+ err = -ENOMEM;
-+ a->thargs = au_whtmp_rmdir_alloc(a->src_dentry->d_sb, GFP_NOFS);
-+ if (unlikely(!a->thargs))
-+ goto out;
-+ a->h_dst = dget(h_d);
-+ }
++ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
++ if (iinfo->ii_hinode)
++ return iinfo;
++ return NULL; /* debugging bad_inode case */
++}
+
-+ /* create whiteout for src_dentry */
-+ if (au_ftest_ren(a->flags, WHSRC)) {
-+ a->src_bwh = au_dbwh(a->src_dentry);
-+ AuDebugOn(a->src_bwh >= 0);
-+ a->src_wh_dentry
-+ = au_wh_create(a->src_dentry, a->btgt, a->src_h_parent);
-+ err = PTR_ERR(a->src_wh_dentry);
-+ if (IS_ERR(a->src_wh_dentry))
-+ goto out_thargs;
-+ }
++/* ---------------------------------------------------------------------- */
+
-+ /* lookup whiteout for dentry */
-+ if (au_ftest_ren(a->flags, WHDST)) {
-+ h_d = au_wh_lkup(a->dst_h_parent, &a->dst_dentry->d_name,
-+ a->br);
-+ err = PTR_ERR(h_d);
-+ if (IS_ERR(h_d))
-+ goto out_whsrc;
-+ if (!h_d->d_inode)
-+ dput(h_d);
-+ else
-+ a->dst_wh_dentry = h_d;
-+ }
++/* inode.c */
++struct inode *au_igrab(struct inode *inode);
++int au_refresh_hinode_self(struct inode *inode);
++int au_refresh_hinode(struct inode *inode, struct dentry *dentry);
++int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
++ unsigned int d_type, ino_t *ino);
++struct inode *au_new_inode(struct dentry *dentry, int must_new);
++int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
++ struct inode *inode);
++int au_test_h_perm(struct inode *h_inode, int mask);
++int au_test_h_perm_sio(struct inode *h_inode, int mask);
+
-+ /* rename dentry to tmpwh */
-+ if (a->thargs) {
-+ err = au_whtmp_ren(a->dst_h_dentry, a->br);
-+ if (unlikely(err))
-+ goto out_whdst;
++static inline int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex,
++ ino_t h_ino, unsigned int d_type, ino_t *ino)
++{
++#ifdef CONFIG_AUFS_SHWH
++ return au_ino(sb, bindex, h_ino, d_type, ino);
++#else
++ return 0;
++#endif
++}
+
-+ d = a->dst_dentry;
-+ au_set_h_dptr(d, a->btgt, NULL);
-+ err = au_lkup_neg(d, a->btgt);
-+ if (unlikely(err))
-+ goto out_whtmp;
-+ a->dst_h_dentry = au_h_dptr(d, a->btgt);
-+ }
++/* i_op.c */
++extern struct inode_operations aufs_iop, aufs_symlink_iop, aufs_dir_iop;
+
-+ /* cpup src */
-+ if (a->dst_h_dentry->d_inode && a->src_bstart != a->btgt) {
-+ struct mutex *h_mtx = &a->src_h_dentry->d_inode->i_mutex;
-+ struct file *h_file;
++/* au_wr_dir flags */
++#define AuWrDir_ADD_ENTRY 1
++#define AuWrDir_ISDIR (1 << 1)
++#define au_ftest_wrdir(flags, name) ((flags) & AuWrDir_##name)
++#define au_fset_wrdir(flags, name) \
++ do { (flags) |= AuWrDir_##name; } while (0)
++#define au_fclr_wrdir(flags, name) \
++ do { (flags) &= ~AuWrDir_##name; } while (0)
+
-+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
-+ AuDebugOn(au_dbstart(a->src_dentry) != a->src_bstart);
-+ h_file = au_h_open_pre(a->src_dentry, a->src_bstart);
-+ if (IS_ERR(h_file)) {
-+ err = PTR_ERR(h_file);
-+ h_file = NULL;
-+ } else
-+ err = au_sio_cpup_simple(a->src_dentry, a->btgt, -1,
-+ !AuCpup_DTIME);
-+ mutex_unlock(h_mtx);
-+ au_h_open_post(a->src_dentry, a->src_bstart, h_file);
-+ if (unlikely(err))
-+ goto out_whtmp;
-+ }
++struct au_wr_dir_args {
++ aufs_bindex_t force_btgt;
++ unsigned char flags;
++};
++int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
++ struct au_wr_dir_args *args);
+
-+ /* rename by vfs_rename or cpup */
-+ d = a->dst_dentry;
-+ if (au_ftest_ren(a->flags, ISDIR)
-+ && (a->dst_wh_dentry
-+ || au_dbdiropq(d) == a->btgt
-+ /* hide the lower to keep xino */
-+ || a->btgt < au_dbend(d)
-+ || au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ)))
-+ au_fset_ren(a->flags, DIROPQ);
-+ err = au_ren_or_cpup(a);
-+ if (unlikely(err))
-+ /* leave the copied-up one */
-+ goto out_whtmp;
++struct dentry *au_pinned_h_parent(struct au_pin *pin);
++void au_pin_init(struct au_pin *pin, struct dentry *dentry,
++ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
++ unsigned int udba, unsigned char flags);
++int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
++ unsigned int udba, unsigned char flags) __must_check;
++int au_do_pin(struct au_pin *pin) __must_check;
++void au_unpin(struct au_pin *pin);
+
-+ /* make dir opaque */
-+ if (au_ftest_ren(a->flags, DIROPQ)) {
-+ err = au_ren_diropq(a);
-+ if (unlikely(err))
-+ goto out_rename;
-+ }
++/* i_op_add.c */
++int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
++ struct dentry *h_parent, int isdir);
++int aufs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev);
++int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname);
++int aufs_create(struct inode *dir, struct dentry *dentry, int mode,
++ struct nameidata *nd);
++int aufs_link(struct dentry *src_dentry, struct inode *dir,
++ struct dentry *dentry);
++int aufs_mkdir(struct inode *dir, struct dentry *dentry, int mode);
+
-+ /* update target timestamps */
-+ AuDebugOn(au_dbstart(a->src_dentry) != a->btgt);
-+ a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt);
-+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
-+ a->src_inode->i_ctime = a->h_path.dentry->d_inode->i_ctime;
++/* i_op_del.c */
++int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup);
++int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
++ struct dentry *h_parent, int isdir);
++int aufs_unlink(struct inode *dir, struct dentry *dentry);
++int aufs_rmdir(struct inode *dir, struct dentry *dentry);
+
-+ /* remove whiteout for dentry */
-+ if (a->dst_wh_dentry) {
-+ a->h_path.dentry = a->dst_wh_dentry;
-+ err = au_wh_unlink_dentry(a->dst_h_dir, &a->h_path,
-+ a->dst_dentry);
-+ if (unlikely(err))
-+ goto out_diropq;
-+ }
++/* i_op_ren.c */
++int au_wbr(struct dentry *dentry, aufs_bindex_t btgt);
++int aufs_rename(struct inode *src_dir, struct dentry *src_dentry,
++ struct inode *dir, struct dentry *dentry);
+
-+ /* remove whtmp */
-+ if (a->thargs)
-+ au_ren_del_whtmp(a); /* ignore this error */
++/* iinfo.c */
++struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex);
++void au_hiput(struct au_hinode *hinode);
++void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
++ struct dentry *h_wh);
++unsigned int au_hi_flags(struct inode *inode, int isdir);
+
-+ err = 0;
-+ goto out_success;
++/* hinode flags */
++#define AuHi_XINO 1
++#define AuHi_HNOTIFY (1 << 1)
++#define au_ftest_hi(flags, name) ((flags) & AuHi_##name)
++#define au_fset_hi(flags, name) \
++ do { (flags) |= AuHi_##name; } while (0)
++#define au_fclr_hi(flags, name) \
++ do { (flags) &= ~AuHi_##name; } while (0)
+
-+out_diropq:
-+ if (au_ftest_ren(a->flags, DIROPQ))
-+ au_ren_rev_diropq(err, a);
-+out_rename:
-+ if (!au_ftest_ren(a->flags, CPUP))
-+ au_ren_rev_rename(err, a);
-+ else
-+ au_ren_rev_cpup(err, a);
-+ dput(a->h_dst);
-+out_whtmp:
-+ if (a->thargs)
-+ au_ren_rev_whtmp(err, a);
-+out_whdst:
-+ dput(a->dst_wh_dentry);
-+ a->dst_wh_dentry = NULL;
-+out_whsrc:
-+ if (a->src_wh_dentry)
-+ au_ren_rev_whsrc(err, a);
-+out_success:
-+ dput(a->src_wh_dentry);
-+ dput(a->dst_wh_dentry);
-+out_thargs:
-+ if (a->thargs) {
-+ dput(a->h_dst);
-+ au_whtmp_rmdir_free(a->thargs);
-+ a->thargs = NULL;
-+ }
-+out:
-+ return err;
-+}
++#ifndef CONFIG_AUFS_HNOTIFY
++#undef AuHi_HNOTIFY
++#define AuHi_HNOTIFY 0
++#endif
+
-+/* ---------------------------------------------------------------------- */
++void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
++ struct inode *h_inode, unsigned int flags);
+
-+/*
-+ * test if @dentry dir can be rename destination or not.
-+ * success means, it is a logically empty dir.
-+ */
-+static int may_rename_dstdir(struct dentry *dentry, struct au_nhash *whlist)
-+{
-+ return au_test_empty(dentry, whlist);
-+}
++void au_update_iigen(struct inode *inode);
++void au_update_ibrange(struct inode *inode, int do_put_zero);
+
-+/*
-+ * test if @dentry dir can be rename source or not.
-+ * if it can, return 0 and @children is filled.
-+ * success means,
-+ * - it is a logically empty dir.
-+ * - or, it exists on writable branch and has no children including whiteouts
-+ * on the lower branch.
-+ */
-+static int may_rename_srcdir(struct dentry *dentry, aufs_bindex_t btgt)
-+{
-+ int err;
-+ unsigned int rdhash;
-+ aufs_bindex_t bstart;
++void au_icntnr_init_once(void *_c);
++int au_iinfo_init(struct inode *inode);
++void au_iinfo_fin(struct inode *inode);
++int au_ii_realloc(struct au_iinfo *iinfo, int nbr);
+
-+ bstart = au_dbstart(dentry);
-+ if (bstart != btgt) {
-+ struct au_nhash whlist;
++#ifdef CONFIG_PROC_FS
++/* plink.c */
++int au_plink_maint(struct super_block *sb, int flags);
++void au_plink_maint_leave(struct au_sbinfo *sbinfo);
++int au_plink_maint_enter(struct super_block *sb);
++#ifdef CONFIG_AUFS_DEBUG
++void au_plink_list(struct super_block *sb);
++#else
++AuStubVoid(au_plink_list, struct super_block *sb)
++#endif
++int au_plink_test(struct inode *inode);
++struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex);
++void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
++ struct dentry *h_dentry);
++void au_plink_put(struct super_block *sb, int verbose);
++void au_plink_clean(struct super_block *sb, int verbose);
++void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id);
++#else
++AuStubInt0(au_plink_maint, struct super_block *sb, int flags);
++AuStubVoid(au_plink_maint_leave, struct au_sbinfo *sbinfo);
++AuStubInt0(au_plink_maint_enter, struct super_block *sb);
++AuStubVoid(au_plink_list, struct super_block *sb);
++AuStubInt0(au_plink_test, struct inode *inode);
++AuStub(struct dentry *, au_plink_lkup, return NULL,
++ struct inode *inode, aufs_bindex_t bindex);
++AuStubVoid(au_plink_append, struct inode *inode, aufs_bindex_t bindex,
++ struct dentry *h_dentry);
++AuStubVoid(au_plink_put, struct super_block *sb, int verbose);
++AuStubVoid(au_plink_clean, struct super_block *sb, int verbose);
++AuStubVoid(au_plink_half_refresh, struct super_block *sb, aufs_bindex_t br_id);
++#endif /* CONFIG_PROC_FS */
+
-+ SiMustAnyLock(dentry->d_sb);
-+ rdhash = au_sbi(dentry->d_sb)->si_rdhash;
-+ if (!rdhash)
-+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL,
-+ dentry));
-+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
-+ if (unlikely(err))
-+ goto out;
-+ err = au_test_empty(dentry, &whlist);
-+ au_nhash_wh_free(&whlist);
-+ goto out;
-+ }
++/* ---------------------------------------------------------------------- */
+
-+ if (bstart == au_dbtaildir(dentry))
-+ return 0; /* success */
++/* lock subclass for iinfo */
++enum {
++ AuLsc_II_CHILD, /* child first */
++ AuLsc_II_CHILD2, /* rename(2), link(2), and cpup at hnotify */
++ AuLsc_II_CHILD3, /* copyup dirs */
++ AuLsc_II_PARENT, /* see AuLsc_I_PARENT in vfsub.h */
++ AuLsc_II_PARENT2,
++ AuLsc_II_PARENT3, /* copyup dirs */
++ AuLsc_II_NEW_CHILD
++};
+
-+ err = au_test_empty_lower(dentry);
++/*
++ * ii_read_lock_child, ii_write_lock_child,
++ * ii_read_lock_child2, ii_write_lock_child2,
++ * ii_read_lock_child3, ii_write_lock_child3,
++ * ii_read_lock_parent, ii_write_lock_parent,
++ * ii_read_lock_parent2, ii_write_lock_parent2,
++ * ii_read_lock_parent3, ii_write_lock_parent3,
++ * ii_read_lock_new_child, ii_write_lock_new_child,
++ */
++#define AuReadLockFunc(name, lsc) \
++static inline void ii_read_lock_##name(struct inode *i) \
++{ \
++ au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
++}
+
-+out:
-+ if (err == -ENOTEMPTY) {
-+ AuWarn1("renaming dir who has child(ren) on multiple branches,"
-+ " is not supported\n");
-+ err = -EXDEV;
-+ }
-+ return err;
++#define AuWriteLockFunc(name, lsc) \
++static inline void ii_write_lock_##name(struct inode *i) \
++{ \
++ au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
+}
+
-+/* side effect: sets whlist and h_dentry */
-+static int au_ren_may_dir(struct au_ren_args *a)
-+{
-+ int err;
-+ unsigned int rdhash;
-+ struct dentry *d;
++#define AuRWLockFuncs(name, lsc) \
++ AuReadLockFunc(name, lsc) \
++ AuWriteLockFunc(name, lsc)
+
-+ d = a->dst_dentry;
-+ SiMustAnyLock(d->d_sb);
++AuRWLockFuncs(child, CHILD);
++AuRWLockFuncs(child2, CHILD2);
++AuRWLockFuncs(child3, CHILD3);
++AuRWLockFuncs(parent, PARENT);
++AuRWLockFuncs(parent2, PARENT2);
++AuRWLockFuncs(parent3, PARENT3);
++AuRWLockFuncs(new_child, NEW_CHILD);
+
-+ err = 0;
-+ if (au_ftest_ren(a->flags, ISDIR) && a->dst_inode) {
-+ rdhash = au_sbi(d->d_sb)->si_rdhash;
-+ if (!rdhash)
-+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, d));
-+ err = au_nhash_alloc(&a->whlist, rdhash, GFP_NOFS);
-+ if (unlikely(err))
-+ goto out;
++#undef AuReadLockFunc
++#undef AuWriteLockFunc
++#undef AuRWLockFuncs
+
-+ au_set_dbstart(d, a->dst_bstart);
-+ err = may_rename_dstdir(d, &a->whlist);
-+ au_set_dbstart(d, a->btgt);
-+ }
-+ a->dst_h_dentry = au_h_dptr(d, au_dbstart(d));
-+ if (unlikely(err))
-+ goto out;
++/*
++ * ii_read_unlock, ii_write_unlock, ii_downgrade_lock
++ */
++AuSimpleUnlockRwsemFuncs(ii, struct inode *i, &au_ii(i)->ii_rwsem);
+
-+ d = a->src_dentry;
-+ a->src_h_dentry = au_h_dptr(d, au_dbstart(d));
-+ if (au_ftest_ren(a->flags, ISDIR)) {
-+ err = may_rename_srcdir(d, a->btgt);
-+ if (unlikely(err)) {
-+ au_nhash_wh_free(&a->whlist);
-+ a->whlist.nh_num = 0;
-+ }
-+ }
-+out:
-+ return err;
-+}
++#define IiMustNoWaiters(i) AuRwMustNoWaiters(&au_ii(i)->ii_rwsem)
++#define IiMustAnyLock(i) AuRwMustAnyLock(&au_ii(i)->ii_rwsem)
++#define IiMustWriteLock(i) AuRwMustWriteLock(&au_ii(i)->ii_rwsem)
+
+/* ---------------------------------------------------------------------- */
+
-+/*
-+ * simple tests for rename.
-+ * following the checks in vfs, plus the parent-child relationship.
-+ */
-+static int au_may_ren(struct au_ren_args *a)
++static inline void au_icntnr_init(struct au_icntnr *c)
++{
++#ifdef CONFIG_AUFS_DEBUG
++ c->vfs_inode.i_mode = 0;
++#endif
++}
++
++static inline unsigned int au_iigen(struct inode *inode)
+{
-+ int err, isdir;
-+ struct inode *h_inode;
++ return atomic_read(&au_ii(inode)->ii_generation);
++}
+
-+ if (a->src_bstart == a->btgt) {
-+ err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent,
-+ au_ftest_ren(a->flags, ISDIR));
-+ if (unlikely(err))
-+ goto out;
-+ err = -EINVAL;
-+ if (unlikely(a->src_h_dentry == a->h_trap))
-+ goto out;
-+ }
++/* tiny test for inode number */
++/* tmpfs generation is too rough */
++static inline int au_test_higen(struct inode *inode, struct inode *h_inode)
++{
++ struct au_iinfo *iinfo;
+
-+ err = 0;
-+ if (a->dst_bstart != a->btgt)
-+ goto out;
++ iinfo = au_ii(inode);
++ AuRwMustAnyLock(&iinfo->ii_rwsem);
++ return !(iinfo->ii_hsb1 == h_inode->i_sb
++ && iinfo->ii_higen == h_inode->i_generation);
++}
+
-+ err = -ENOTEMPTY;
-+ if (unlikely(a->dst_h_dentry == a->h_trap))
-+ goto out;
++static inline void au_iigen_dec(struct inode *inode)
++{
++ atomic_dec(&au_ii(inode)->ii_generation);
++}
+
-+ err = -EIO;
-+ h_inode = a->dst_h_dentry->d_inode;
-+ isdir = !!au_ftest_ren(a->flags, ISDIR);
-+ if (!a->dst_dentry->d_inode) {
-+ if (unlikely(h_inode))
-+ goto out;
-+ err = au_may_add(a->dst_dentry, a->btgt, a->dst_h_parent,
-+ isdir);
-+ } else {
-+ if (unlikely(!h_inode || !h_inode->i_nlink))
-+ goto out;
-+ err = au_may_del(a->dst_dentry, a->btgt, a->dst_h_parent,
-+ isdir);
-+ if (unlikely(err))
-+ goto out;
-+ }
++static inline int au_iigen_test(struct inode *inode, unsigned int sigen)
++{
++ int err;
+
-+out:
-+ if (unlikely(err == -ENOENT || err == -EEXIST))
++ err = 0;
++ if (unlikely(inode && au_iigen(inode) != sigen))
+ err = -EIO;
-+ AuTraceErr(err);
++
+ return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
-+/*
-+ * locking order
-+ * (VFS)
-+ * - src_dir and dir by lock_rename()
-+ * - inode if exitsts
-+ * (aufs)
-+ * - lock all
-+ * + src_dentry and dentry by aufs_read_and_write_lock2() which calls,
-+ * + si_read_lock
-+ * + di_write_lock2_child()
-+ * + di_write_lock_child()
-+ * + ii_write_lock_child()
-+ * + di_write_lock_child2()
-+ * + ii_write_lock_child2()
-+ * + src_parent and parent
-+ * + di_write_lock_parent()
-+ * + ii_write_lock_parent()
-+ * + di_write_lock_parent2()
-+ * + ii_write_lock_parent2()
-+ * + lower src_dir and dir by vfsub_lock_rename()
-+ * + verify the every relationships between child and parent. if any
-+ * of them failed, unlock all and return -EBUSY.
-+ */
-+static void au_ren_unlock(struct au_ren_args *a)
++static inline aufs_bindex_t au_ii_br_id(struct inode *inode,
++ aufs_bindex_t bindex)
+{
-+ struct super_block *sb;
-+
-+ sb = a->dst_dentry->d_sb;
-+ if (au_ftest_ren(a->flags, MNT_WRITE))
-+ mnt_drop_write(a->br->br_mnt);
-+ vfsub_unlock_rename(a->src_h_parent, a->src_hdir,
-+ a->dst_h_parent, a->dst_hdir);
++ IiMustAnyLock(inode);
++ return au_ii(inode)->ii_hinode[0 + bindex].hi_id;
+}
+
-+static int au_ren_lock(struct au_ren_args *a)
++static inline aufs_bindex_t au_ibstart(struct inode *inode)
+{
-+ int err;
-+ unsigned int udba;
++ IiMustAnyLock(inode);
++ return au_ii(inode)->ii_bstart;
++}
+
-+ err = 0;
-+ a->src_h_parent = au_h_dptr(a->src_parent, a->btgt);
-+ a->src_hdir = au_hi(a->src_dir, a->btgt);
-+ a->dst_h_parent = au_h_dptr(a->dst_parent, a->btgt);
-+ a->dst_hdir = au_hi(a->dst_dir, a->btgt);
-+ a->h_trap = vfsub_lock_rename(a->src_h_parent, a->src_hdir,
-+ a->dst_h_parent, a->dst_hdir);
-+ udba = au_opt_udba(a->src_dentry->d_sb);
-+ if (unlikely(a->src_hdir->hi_inode != a->src_h_parent->d_inode
-+ || a->dst_hdir->hi_inode != a->dst_h_parent->d_inode))
-+ err = au_busy_or_stale();
-+ if (!err && au_dbstart(a->src_dentry) == a->btgt)
-+ err = au_h_verify(a->src_h_dentry, udba,
-+ a->src_h_parent->d_inode, a->src_h_parent,
-+ a->br);
-+ if (!err && au_dbstart(a->dst_dentry) == a->btgt)
-+ err = au_h_verify(a->dst_h_dentry, udba,
-+ a->dst_h_parent->d_inode, a->dst_h_parent,
-+ a->br);
-+ if (!err) {
-+ err = mnt_want_write(a->br->br_mnt);
-+ if (unlikely(err))
-+ goto out_unlock;
-+ au_fset_ren(a->flags, MNT_WRITE);
-+ goto out; /* success */
-+ }
++static inline aufs_bindex_t au_ibend(struct inode *inode)
++{
++ IiMustAnyLock(inode);
++ return au_ii(inode)->ii_bend;
++}
+
-+ err = au_busy_or_stale();
++static inline struct au_vdir *au_ivdir(struct inode *inode)
++{
++ IiMustAnyLock(inode);
++ return au_ii(inode)->ii_vdir;
++}
+
-+out_unlock:
-+ au_ren_unlock(a);
-+out:
-+ return err;
++static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex)
++{
++ IiMustAnyLock(inode);
++ return au_ii(inode)->ii_hinode[0 + bindex].hi_whdentry;
+}
+
-+/* ---------------------------------------------------------------------- */
++static inline void au_set_ibstart(struct inode *inode, aufs_bindex_t bindex)
++{
++ IiMustWriteLock(inode);
++ au_ii(inode)->ii_bstart = bindex;
++}
+
-+static void au_ren_refresh_dir(struct au_ren_args *a)
++static inline void au_set_ibend(struct inode *inode, aufs_bindex_t bindex)
+{
-+ struct inode *dir;
++ IiMustWriteLock(inode);
++ au_ii(inode)->ii_bend = bindex;
++}
+
-+ dir = a->dst_dir;
-+ dir->i_version++;
-+ if (au_ftest_ren(a->flags, ISDIR)) {
-+ /* is this updating defined in POSIX? */
-+ au_cpup_attr_timesizes(a->src_inode);
-+ au_cpup_attr_nlink(dir, /*force*/1);
-+ }
++static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir)
++{
++ IiMustWriteLock(inode);
++ au_ii(inode)->ii_vdir = vdir;
++}
+
-+ if (au_ibstart(dir) == a->btgt)
-+ au_cpup_attr_timesizes(dir);
++static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex)
++{
++ IiMustAnyLock(inode);
++ return au_ii(inode)->ii_hinode + bindex;
++}
+
-+ if (au_ftest_ren(a->flags, ISSAMEDIR))
-+ return;
++/* ---------------------------------------------------------------------- */
+
-+ dir = a->src_dir;
-+ dir->i_version++;
-+ if (au_ftest_ren(a->flags, ISDIR))
-+ au_cpup_attr_nlink(dir, /*force*/1);
-+ if (au_ibstart(dir) == a->btgt)
-+ au_cpup_attr_timesizes(dir);
++static inline struct dentry *au_pinned_parent(struct au_pin *pin)
++{
++ if (pin)
++ return pin->parent;
++ return NULL;
+}
+
-+static void au_ren_refresh(struct au_ren_args *a)
++static inline struct inode *au_pinned_h_dir(struct au_pin *pin)
+{
-+ aufs_bindex_t bend, bindex;
-+ struct dentry *d, *h_d;
-+ struct inode *i, *h_i;
-+ struct super_block *sb;
++ if (pin && pin->hdir)
++ return pin->hdir->hi_inode;
++ return NULL;
++}
+
-+ d = a->dst_dentry;
-+ d_drop(d);
-+ if (a->h_dst)
-+ /* already dget-ed by au_ren_or_cpup() */
-+ au_set_h_dptr(d, a->btgt, a->h_dst);
++static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin)
++{
++ if (pin)
++ return pin->hdir;
++ return NULL;
++}
+
-+ i = a->dst_inode;
-+ if (i) {
-+ if (!au_ftest_ren(a->flags, ISDIR))
-+ vfsub_drop_nlink(i);
-+ else {
-+ vfsub_dead_dir(i);
-+ au_cpup_attr_timesizes(i);
-+ }
-+ au_update_dbrange(d, /*do_put_zero*/1);
-+ } else {
-+ bend = a->btgt;
-+ for (bindex = au_dbstart(d); bindex < bend; bindex++)
-+ au_set_h_dptr(d, bindex, NULL);
-+ bend = au_dbend(d);
-+ for (bindex = a->btgt + 1; bindex <= bend; bindex++)
-+ au_set_h_dptr(d, bindex, NULL);
-+ au_update_dbrange(d, /*do_put_zero*/0);
-+ }
++static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry)
++{
++ if (pin)
++ pin->dentry = dentry;
++}
+
-+ d = a->src_dentry;
-+ au_set_dbwh(d, -1);
-+ bend = au_dbend(d);
-+ for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
-+ h_d = au_h_dptr(d, bindex);
-+ if (h_d)
-+ au_set_h_dptr(d, bindex, NULL);
++static inline void au_pin_set_parent_lflag(struct au_pin *pin,
++ unsigned char lflag)
++{
++ if (pin) {
++ if (lflag)
++ au_fset_pin(pin->flags, DI_LOCKED);
++ else
++ au_fclr_pin(pin->flags, DI_LOCKED);
+ }
-+ au_set_dbend(d, a->btgt);
-+
-+ sb = d->d_sb;
-+ i = a->src_inode;
-+ if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i))
-+ return; /* success */
++}
+
-+ bend = au_ibend(i);
-+ for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
-+ h_i = au_h_iptr(i, bindex);
-+ if (h_i) {
-+ au_xino_write(sb, bindex, h_i->i_ino, /*ino*/0);
-+ /* ignore this error */
-+ au_set_h_iptr(i, bindex, NULL, 0);
-+ }
++static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent)
++{
++ if (pin) {
++ dput(pin->parent);
++ pin->parent = dget(parent);
+ }
-+ au_set_ibend(i, a->btgt);
+}
+
+/* ---------------------------------------------------------------------- */
+
-+/* mainly for link(2) and rename(2) */
-+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt)
-+{
-+ aufs_bindex_t bdiropq, bwh;
-+ struct dentry *parent;
-+ struct au_branch *br;
++struct au_branch;
++#ifdef CONFIG_AUFS_HNOTIFY
++struct au_hnotify_op {
++ void (*ctl)(struct au_hinode *hinode, int do_set);
++ int (*alloc)(struct au_hinode *hinode);
++ void (*free)(struct au_hinode *hinode);
+
-+ parent = dentry->d_parent;
-+ IMustLock(parent->d_inode); /* dir is locked */
++ void (*fin)(void);
++ int (*init)(void);
+
-+ bdiropq = au_dbdiropq(parent);
-+ bwh = au_dbwh(dentry);
-+ br = au_sbr(dentry->d_sb, btgt);
-+ if (au_br_rdonly(br)
-+ || (0 <= bdiropq && bdiropq < btgt)
-+ || (0 <= bwh && bwh < btgt))
-+ btgt = -1;
++ int (*reset_br)(unsigned int udba, struct au_branch *br, int perm);
++ void (*fin_br)(struct au_branch *br);
++ int (*init_br)(struct au_branch *br, int perm);
++};
+
-+ AuDbg("btgt %d\n", btgt);
-+ return btgt;
++/* hnotify.c */
++int au_hn_alloc(struct au_hinode *hinode, struct inode *inode);
++void au_hn_free(struct au_hinode *hinode);
++void au_hn_ctl(struct au_hinode *hinode, int do_set);
++void au_hn_reset(struct inode *inode, unsigned int flags);
++int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
++ struct qstr *h_child_qstr, struct inode *h_child_inode);
++int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm);
++int au_hnotify_init_br(struct au_branch *br, int perm);
++void au_hnotify_fin_br(struct au_branch *br);
++int __init au_hnotify_init(void);
++void au_hnotify_fin(void);
++
++/* hfsnotify.c */
++extern const struct au_hnotify_op au_hnotify_op;
++
++static inline
++void au_hn_init(struct au_hinode *hinode)
++{
++ hinode->hi_notify = NULL;
+}
+
-+/* sets src_bstart, dst_bstart and btgt */
-+static int au_ren_wbr(struct au_ren_args *a)
++#else
++static inline
++int au_hn_alloc(struct au_hinode *hinode __maybe_unused,
++ struct inode *inode __maybe_unused)
+{
-+ int err;
-+ struct au_wr_dir_args wr_dir_args = {
-+ /* .force_btgt = -1, */
-+ .flags = AuWrDir_ADD_ENTRY
-+ };
++ return -EOPNOTSUPP;
++}
+
-+ a->src_bstart = au_dbstart(a->src_dentry);
-+ a->dst_bstart = au_dbstart(a->dst_dentry);
-+ if (au_ftest_ren(a->flags, ISDIR))
-+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
-+ wr_dir_args.force_btgt = a->src_bstart;
-+ if (a->dst_inode && a->dst_bstart < a->src_bstart)
-+ wr_dir_args.force_btgt = a->dst_bstart;
-+ wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt);
-+ err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args);
-+ a->btgt = err;
++AuStubVoid(au_hn_free, struct au_hinode *hinode __maybe_unused)
++AuStubVoid(au_hn_ctl, struct au_hinode *hinode __maybe_unused,
++ int do_set __maybe_unused)
++AuStubVoid(au_hn_reset, struct inode *inode __maybe_unused,
++ unsigned int flags __maybe_unused)
++AuStubInt0(au_hnotify_reset_br, unsigned int udba __maybe_unused,
++ struct au_branch *br __maybe_unused,
++ int perm __maybe_unused)
++AuStubInt0(au_hnotify_init_br, struct au_branch *br __maybe_unused,
++ int perm __maybe_unused)
++AuStubVoid(au_hnotify_fin_br, struct au_branch *br __maybe_unused)
++AuStubInt0(__init au_hnotify_init, void)
++AuStubVoid(au_hnotify_fin, void)
++AuStubVoid(au_hn_init, struct au_hinode *hinode __maybe_unused)
++#endif /* CONFIG_AUFS_HNOTIFY */
+
-+ return err;
++static inline void au_hn_suspend(struct au_hinode *hdir)
++{
++ au_hn_ctl(hdir, /*do_set*/0);
+}
+
-+static void au_ren_dt(struct au_ren_args *a)
++static inline void au_hn_resume(struct au_hinode *hdir)
+{
-+ a->h_path.dentry = a->src_h_parent;
-+ au_dtime_store(a->src_dt + AuPARENT, a->src_parent, &a->h_path);
-+ if (!au_ftest_ren(a->flags, ISSAMEDIR)) {
-+ a->h_path.dentry = a->dst_h_parent;
-+ au_dtime_store(a->dst_dt + AuPARENT, a->dst_parent, &a->h_path);
-+ }
-+
-+ au_fclr_ren(a->flags, DT_DSTDIR);
-+ if (!au_ftest_ren(a->flags, ISDIR))
-+ return;
++ au_hn_ctl(hdir, /*do_set*/1);
++}
+
-+ a->h_path.dentry = a->src_h_dentry;
-+ au_dtime_store(a->src_dt + AuCHILD, a->src_dentry, &a->h_path);
-+ if (a->dst_h_dentry->d_inode) {
-+ au_fset_ren(a->flags, DT_DSTDIR);
-+ a->h_path.dentry = a->dst_h_dentry;
-+ au_dtime_store(a->dst_dt + AuCHILD, a->dst_dentry, &a->h_path);
-+ }
++static inline void au_hn_imtx_lock(struct au_hinode *hdir)
++{
++ mutex_lock(&hdir->hi_inode->i_mutex);
++ au_hn_suspend(hdir);
+}
+
-+static void au_ren_rev_dt(int err, struct au_ren_args *a)
++static inline void au_hn_imtx_lock_nested(struct au_hinode *hdir,
++ unsigned int sc __maybe_unused)
+{
-+ struct dentry *h_d;
-+ struct mutex *h_mtx;
++ mutex_lock_nested(&hdir->hi_inode->i_mutex, sc);
++ au_hn_suspend(hdir);
++}
+
-+ au_dtime_revert(a->src_dt + AuPARENT);
-+ if (!au_ftest_ren(a->flags, ISSAMEDIR))
-+ au_dtime_revert(a->dst_dt + AuPARENT);
++static inline void au_hn_imtx_unlock(struct au_hinode *hdir)
++{
++ au_hn_resume(hdir);
++ mutex_unlock(&hdir->hi_inode->i_mutex);
++}
+
-+ if (au_ftest_ren(a->flags, ISDIR) && err != -EIO) {
-+ h_d = a->src_dt[AuCHILD].dt_h_path.dentry;
-+ h_mtx = &h_d->d_inode->i_mutex;
-+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
-+ au_dtime_revert(a->src_dt + AuCHILD);
-+ mutex_unlock(h_mtx);
++#endif /* __KERNEL__ */
++#endif /* __AUFS_INODE_H__ */
+--- a/fs/aufs/ioctl.c 1970-01-01 01:00:00.000000000 +0100
++++ b/fs/aufs/ioctl.c 2011-03-06 23:22:01.412413001 +0000
+@@ -0,0 +1,158 @@
++/*
++ * Copyright (C) 2005-2011 Junjiro R. Okajima
++ *
++ * This program, aufs is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++ */
+
-+ if (au_ftest_ren(a->flags, DT_DSTDIR)) {
-+ h_d = a->dst_dt[AuCHILD].dt_h_path.dentry;
-+ h_mtx = &h_d->d_inode->i_mutex;
-+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
-+ au_dtime_revert(a->dst_dt + AuCHILD);
-+ mutex_unlock(h_mtx);
-+ }
-+ }
-+}
++/*
++ * ioctl
++ * plink-management and readdir in userspace.
++ * assist the pathconf(3) wrapper library.
++ */
+
-+/* ---------------------------------------------------------------------- */
++#include <linux/file.h>
++#include "aufs.h"
+
-+int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry,
-+ struct inode *_dst_dir, struct dentry *_dst_dentry)
++static int au_wbr_fd(struct path *path)
+{
-+ int err, flags;
-+ /* reduce stack space */
-+ struct au_ren_args *a;
-+
-+ AuDbg("%.*s, %.*s\n", AuDLNPair(_src_dentry), AuDLNPair(_dst_dentry));
-+ IMustLock(_src_dir);
-+ IMustLock(_dst_dir);
++ int err, fd;
++ aufs_bindex_t wbi, bindex, bend;
++ struct file *h_file;
++ struct super_block *sb;
++ struct dentry *root;
++ struct au_branch *wbr;
+
-+ err = -ENOMEM;
-+ BUILD_BUG_ON(sizeof(*a) > PAGE_SIZE);
-+ a = kzalloc(sizeof(*a), GFP_NOFS);
-+ if (unlikely(!a))
++ err = get_unused_fd();
++ if (unlikely(err < 0))
+ goto out;
++ fd = err;
+
-+ a->src_dir = _src_dir;
-+ a->src_dentry = _src_dentry;
-+ a->src_inode = a->src_dentry->d_inode;
-+ a->src_parent = a->src_dentry->d_parent; /* dir inode is locked */
-+ a->dst_dir = _dst_dir;
-+ a->dst_dentry = _dst_dentry;
-+ a->dst_inode = a->dst_dentry->d_inode;
-+ a->dst_parent = a->dst_dentry->d_parent; /* dir inode is locked */
-+ if (a->dst_inode) {
-+ IMustLock(a->dst_inode);
-+ au_igrab(a->dst_inode);
++ wbi = 0;
++ sb = path->dentry->d_sb;
++ root = sb->s_root;
++ aufs_read_lock(root, AuLock_IR);
++ wbr = au_sbr(sb, wbi);
++ if (!(path->mnt->mnt_flags & MNT_READONLY)
++ && !au_br_writable(wbr->br_perm)) {
++ bend = au_sbend(sb);
++ for (bindex = 1; bindex <= bend; bindex++) {
++ wbr = au_sbr(sb, bindex);
++ if (au_br_writable(wbr->br_perm)) {
++ wbi = bindex;
++ break;
++ }
++ }
++ wbr = au_sbr(sb, wbi);
+ }
++ AuDbg("wbi %d\n", wbi);
++ h_file = au_h_open(root, wbi, O_RDONLY | O_DIRECTORY | O_LARGEFILE,
++ NULL);
++ aufs_read_unlock(root, AuLock_IR);
++ err = PTR_ERR(h_file);
++ if (IS_ERR(h_file))
++ goto out_fd;
++
++ atomic_dec(&wbr->br_count); /* cf. au_h_open() */
++ fd_install(fd, h_file);
++ err = fd;
++ goto out; /* success */
+
-+ err = -ENOTDIR;
-+ flags = AuLock_FLUSH | AuLock_NOPLM | AuLock_GEN;
-+ if (S_ISDIR(a->src_inode->i_mode)) {
-+ au_fset_ren(a->flags, ISDIR);
-+ if (unlikely(a->dst_inode && !S_ISDIR(a->dst_inode->i_mode)))
-+ goto out_free;
-+ err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry,
-+ AuLock_DIR | flags);
-+ } else
-+ err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry,
-+ flags);
-+ if (unlikely(err))
-+ goto out_free;
++out_fd:
++ put_unused_fd(fd);
++out:
++ return err;
++}
+
-+ err = au_d_hashed_positive(a->src_dentry);
-+ if (unlikely(err))
-+ goto out_unlock;
-+ err = -ENOENT;
-+ if (a->dst_inode) {
-+ /*
-+ * If it is a dir, VFS unhash dst_dentry before this
-+ * function. It means we cannot rely upon d_unhashed().
-+ */
-+ if (unlikely(!a->dst_inode->i_nlink))
-+ goto out_unlock;
-+ if (!S_ISDIR(a->dst_inode->i_mode)) {
-+ err = au_d_hashed_positive(a->dst_dentry);
-+ if (unlikely(err))
-+ goto out_unlock;
-+ } else if (unlikely(IS_DEADDIR(a->dst_inode)))
-+ goto out_unlock;
-+ } else if (unlikely(d_unhashed(a->dst_dentry)))
-+ goto out_unlock;
++/* ---------------------------------------------------------------------- */
+
-+ au_fset_ren(a->flags, ISSAMEDIR); /* temporary */
-+ di_write_lock_parent(a->dst_parent);
++long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg)
++{
++ long err;
+
-+ /* which branch we process */
-+ err = au_ren_wbr(a);
-+ if (unlikely(err < 0))
-+ goto out_parent;
-+ a->br = au_sbr(a->dst_dentry->d_sb, a->btgt);
-+ a->h_path.mnt = a->br->br_mnt;
++ switch (cmd) {
++ case AUFS_CTL_RDU:
++ case AUFS_CTL_RDU_INO:
++ err = au_rdu_ioctl(file, cmd, arg);
++ break;
+
-+ /* are they available to be renamed */
-+ err = au_ren_may_dir(a);
-+ if (unlikely(err))
-+ goto out_children;
++ case AUFS_CTL_WBR_FD:
++ err = au_wbr_fd(&file->f_path);
++ break;
+
-+ /* prepare the writable parent dir on the same branch */
-+ if (a->dst_bstart == a->btgt) {
-+ au_fset_ren(a->flags, WHDST);
-+ } else {
-+ err = au_cpup_dirs(a->dst_dentry, a->btgt);
-+ if (unlikely(err))
-+ goto out_children;
++ case AUFS_CTL_IBUSY:
++ err = au_ibusy_ioctl(file, arg);
++ break;
++
++ default:
++ /* do not call the lower */
++ AuDbg("0x%x\n", cmd);
++ err = -ENOTTY;
+ }
+
-+ if (a->src_dir != a->dst_dir) {
-+ /*
-+ * this temporary unlock is safe,
-+ * because both dir->i_mutex are locked.
-+ */
-+ di_write_unlock(a->dst_parent);
-+ di_write_lock_parent(a->src_parent);
-+ err = au_wr_dir_need_wh(a->src_dentry,
-+ au_ftest_ren(a->flags, ISDIR),
-+ &a->btgt);
-+ di_write_unlock(a->src_parent);
-+ di_write_lock2_parent(a->src_parent, a->dst_parent, /*isdir*/1);
-+ au_fclr_ren(a->flags, ISSAMEDIR);
-+ } else
-+ err = au_wr_dir_need_wh(a->src_dentry,
-+ au_ftest_ren(a->flags, ISDIR),
-+ &a->btgt);
-+ if (unlikely(err < 0))
-+ goto out_children;
-+ if (err)
-+ au_fset_ren(a->flags, WHSRC);
++ AuTraceErr(err);
++ return err;
++}
+
-+ /* lock them all */
-+ err = au_ren_lock(a);
-+ if (unlikely(err))
-+ goto out_children;
++long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg)
++{
++ long err;
+
-+ if (!au_opt_test(au_mntflags(a->dst_dir->i_sb), UDBA_NONE))
-+ err = au_may_ren(a);
-+ else if (unlikely(a->dst_dentry->d_name.len > AUFS_MAX_NAMELEN))
-+ err = -ENAMETOOLONG;
-+ if (unlikely(err))
-+ goto out_hdir;
++ switch (cmd) {
++ case AUFS_CTL_WBR_FD:
++ err = au_wbr_fd(&file->f_path);
++ break;
+
-+ /* store timestamps to be revertible */
-+ au_ren_dt(a);
++ default:
++ /* do not call the lower */
++ AuDbg("0x%x\n", cmd);
++ err = -ENOTTY;
++ }
+
-+ /* here we go */
-+ err = do_rename(a);
-+ if (unlikely(err))
-+ goto out_dt;
++ AuTraceErr(err);
++ return err;
++}
+
-+ /* update dir attributes */
-+ au_ren_refresh_dir(a);
++#ifdef CONFIG_COMPAT
++long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
++ unsigned long arg)
++{
++ long err;
+
-+ /* dput/iput all lower dentries */
-+ au_ren_refresh(a);
++ switch (cmd) {
++ case AUFS_CTL_RDU:
++ case AUFS_CTL_RDU_INO:
++ err = au_rdu_compat_ioctl(file, cmd, arg);
++ break;
+
-+ goto out_hdir; /* success */
++ case AUFS_CTL_IBUSY:
++ err = au_ibusy_compat_ioctl(file, arg);
++ break;
+
-+out_dt:
-+ au_ren_rev_dt(err, a);
-+out_hdir:
-+ au_ren_unlock(a);
-+out_children:
-+ au_nhash_wh_free(&a->whlist);
-+ if (err && a->dst_inode && a->dst_bstart != a->btgt) {
-+ AuDbg("bstart %d, btgt %d\n", a->dst_bstart, a->btgt);
-+ au_set_h_dptr(a->dst_dentry, a->btgt, NULL);
-+ au_set_dbstart(a->dst_dentry, a->dst_bstart);
-+ }
-+out_parent:
-+ if (!err)
-+ d_move(a->src_dentry, a->dst_dentry);
-+ else {
-+ au_update_dbstart(a->dst_dentry);
-+ if (!a->dst_inode)
-+ d_drop(a->dst_dentry);
++ default:
++ err = aufs_ioctl_dir(file, cmd, arg);
+ }
-+ if (au_ftest_ren(a->flags, ISSAMEDIR))
-+ di_write_unlock(a->dst_parent);
-+ else
-+ di_write_unlock2(a->src_parent, a->dst_parent);
-+out_unlock:
-+ aufs_read_and_write_unlock2(a->dst_dentry, a->src_dentry);
-+out_free:
-+ iput(a->dst_inode);
-+ if (a->thargs)
-+ au_whtmp_rmdir_free(a->thargs);
-+ kfree(a);
-+out:
++
+ AuTraceErr(err);
+ return err;
+}
-diff -urN a/fs/aufs/loop.c b/fs/aufs/loop.c
++
++#if 0 /* unused yet */
++long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
++ unsigned long arg)
++{
++ return aufs_ioctl_nondir(file, cmd, (unsigned long)compat_ptr(arg));
++}
++#endif
++#endif
--- a/fs/aufs/loop.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/loop.c 2011-03-06 23:28:02.616413258 +0000
++++ b/fs/aufs/loop.c 2011-02-12 16:30:08.948122160 +0000
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -16910,9 +16958,8 @@
+
+ return ret;
+}
-diff -urN a/fs/aufs/loop.h b/fs/aufs/loop.h
--- a/fs/aufs/loop.h 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/loop.h 2011-03-06 23:28:02.620413138 +0000
++++ b/fs/aufs/loop.h 2011-02-12 16:30:08.948122160 +0000
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -16956,9 +17003,8 @@
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_LOOP_H__ */
-diff -urN a/fs/aufs/magic.mk b/fs/aufs/magic.mk
--- a/fs/aufs/magic.mk 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/magic.mk 2011-03-06 23:28:02.616413258 +0000
++++ b/fs/aufs/magic.mk 2011-02-12 16:30:08.948122160 +0000
@@ -0,0 +1,54 @@
+
+# defined in ${srctree}/fs/fuse/inode.c
@@ -17014,9 +17060,8 @@
+ifdef CONFIG_HFSPLUS_FS
+ccflags-y += -DHFSPLUS_SUPER_MAGIC=0x482b
+endif
-diff -urN a/fs/aufs/module.c b/fs/aufs/module.c
--- a/fs/aufs/module.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/module.c 2011-03-06 23:28:02.616413258 +0000
++++ b/fs/aufs/module.c 2011-03-06 23:22:01.412413001 +0000
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -17201,9 +17246,8 @@
+
+module_init(aufs_init);
+module_exit(aufs_exit);
-diff -urN a/fs/aufs/module.h b/fs/aufs/module.h
--- a/fs/aufs/module.h 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/module.h 2011-03-06 23:28:02.616413258 +0000
++++ b/fs/aufs/module.h 2011-02-12 16:30:08.948122160 +0000
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -17296,9 +17340,8 @@
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_MODULE_H__ */
-diff -urN a/fs/aufs/mtx.h b/fs/aufs/mtx.h
--- a/fs/aufs/mtx.h 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/mtx.h 2011-03-06 23:28:02.620413138 +0000
++++ b/fs/aufs/mtx.h 2011-02-12 16:30:08.948122160 +0000
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2010-2011 Junjiro R. Okajima
@@ -17348,9 +17391,8 @@
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_MTX_H__ */
-diff -urN a/fs/aufs/opts.c b/fs/aufs/opts.c
--- a/fs/aufs/opts.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/opts.c 2011-03-06 23:28:02.620413138 +0000
++++ b/fs/aufs/opts.c 2011-02-12 16:30:08.948122160 +0000
@@ -0,0 +1,1595 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -18947,9 +18989,8 @@
+{
+ return au_mntflags(sb) & AuOptMask_UDBA;
+}
-diff -urN a/fs/aufs/opts.h b/fs/aufs/opts.h
--- a/fs/aufs/opts.h 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/opts.h 2011-03-06 23:28:02.620413138 +0000
++++ b/fs/aufs/opts.h 2011-02-12 16:30:08.948122160 +0000
@@ -0,0 +1,210 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -19161,9 +19202,8 @@
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_OPTS_H__ */
-diff -urN a/fs/aufs/plink.c b/fs/aufs/plink.c
--- a/fs/aufs/plink.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/plink.c 2011-03-06 23:28:02.616413258 +0000
++++ b/fs/aufs/plink.c 2011-02-12 16:30:08.948122160 +0000
@@ -0,0 +1,515 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -19680,9 +19720,8 @@
+ iput(inode);
+ }
+}
-diff -urN a/fs/aufs/poll.c b/fs/aufs/poll.c
--- a/fs/aufs/poll.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/poll.c 2011-03-06 23:28:02.616413258 +0000
++++ b/fs/aufs/poll.c 2011-02-12 16:30:08.948122160 +0000
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -19740,9 +19779,8 @@
+ AuTraceErr((int)mask);
+ return mask;
+}
-diff -urN a/fs/aufs/procfs.c b/fs/aufs/procfs.c
--- a/fs/aufs/procfs.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/procfs.c 2011-03-06 23:28:02.620413138 +0000
++++ b/fs/aufs/procfs.c 2011-02-12 16:30:08.948122160 +0000
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2010-2011 Junjiro R. Okajima
@@ -19913,9 +19951,8 @@
+out:
+ return err;
+}
-diff -urN a/fs/aufs/rdu.c b/fs/aufs/rdu.c
--- a/fs/aufs/rdu.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/rdu.c 2011-03-06 23:28:02.620413138 +0000
++++ b/fs/aufs/rdu.c 2011-02-12 16:30:08.948122160 +0000
@@ -0,0 +1,383 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -20300,9 +20337,8 @@
+ return err;
+}
+#endif
-diff -urN a/fs/aufs/rwsem.h b/fs/aufs/rwsem.h
--- a/fs/aufs/rwsem.h 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/rwsem.h 2011-03-06 23:28:02.616413258 +0000
++++ b/fs/aufs/rwsem.h 2011-02-12 16:30:08.948122160 +0000
@@ -0,0 +1,189 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -20493,9 +20529,8 @@
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_RWSEM_H__ */
-diff -urN a/fs/aufs/sbinfo.c b/fs/aufs/sbinfo.c
--- a/fs/aufs/sbinfo.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/sbinfo.c 2011-03-06 23:28:02.616413258 +0000
++++ b/fs/aufs/sbinfo.c 2011-02-12 16:30:08.948122160 +0000
@@ -0,0 +1,344 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -20841,9 +20876,8 @@
+ p = radix_tree_delete(&sbinfo->au_si_pid.tree, current->pid);
+ spin_unlock(&sbinfo->au_si_pid.tree_lock);
+}
-diff -urN a/fs/aufs/spl.h b/fs/aufs/spl.h
--- a/fs/aufs/spl.h 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/spl.h 2011-03-06 23:28:02.616413258 +0000
++++ b/fs/aufs/spl.h 2011-02-12 16:30:08.948122160 +0000
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -20911,10 +20945,9 @@
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_SPL_H__ */
-diff -urN a/fs/aufs/super.c b/fs/aufs/super.c
--- a/fs/aufs/super.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/super.c 2011-03-06 23:28:02.620413138 +0000
-@@ -0,0 +1,925 @@
++++ b/fs/aufs/super.c 2011-06-03 06:08:42.681958206 +0100
+@@ -0,0 +1,929 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
+ *
@@ -21367,17 +21400,21 @@
+ n = 0;
+ p = a;
+ head = arg;
-+ spin_lock(&inode_lock);
++ spin_lock(&inode_sb_list_lock);
+ list_for_each_entry(inode, head, i_sb_list) {
+ if (!is_bad_inode(inode)
+ && au_ii(inode)->ii_bstart >= 0) {
-+ au_igrab(inode);
-+ *p++ = inode;
-+ n++;
-+ AuDebugOn(n > max);
++ spin_lock(&inode->i_lock);
++ if (atomic_read(&inode->i_count)) {
++ au_igrab(inode);
++ *p++ = inode;
++ n++;
++ AuDebugOn(n > max);
++ }
++ spin_unlock(&inode->i_lock);
+ }
+ }
-+ spin_unlock(&inode_lock);
++ spin_unlock(&inode_sb_list_lock);
+
+ return n;
+}
@@ -21840,9 +21877,8 @@
+ /* no need to __module_get() and module_put(). */
+ .owner = THIS_MODULE,
+};
-diff -urN a/fs/aufs/super.h b/fs/aufs/super.h
--- a/fs/aufs/super.h 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/super.h 2011-03-06 23:28:02.620413138 +0000
++++ b/fs/aufs/super.h 2011-02-12 16:30:08.948122160 +0000
@@ -0,0 +1,527 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -22371,9 +22407,8 @@
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_SUPER_H__ */
-diff -urN a/fs/aufs/sysaufs.c b/fs/aufs/sysaufs.c
--- a/fs/aufs/sysaufs.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/sysaufs.c 2011-03-06 23:28:02.620413138 +0000
++++ b/fs/aufs/sysaufs.c 2011-02-12 16:30:08.948122160 +0000
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -22482,9 +22517,8 @@
+out:
+ return err;
+}
-diff -urN a/fs/aufs/sysaufs.h b/fs/aufs/sysaufs.h
--- a/fs/aufs/sysaufs.h 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/sysaufs.h 2011-03-06 23:28:02.616413258 +0000
++++ b/fs/aufs/sysaufs.h 2011-02-12 16:30:08.948122160 +0000
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -22591,9 +22625,8 @@
+
+#endif /* __KERNEL__ */
+#endif /* __SYSAUFS_H__ */
-diff -urN a/fs/aufs/sysfs.c b/fs/aufs/sysfs.c
--- a/fs/aufs/sysfs.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/sysfs.c 2011-03-06 23:28:02.620413138 +0000
++++ b/fs/aufs/sysfs.c 2011-02-12 16:30:08.948122160 +0000
@@ -0,0 +1,250 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -22845,10 +22878,9 @@
+ br->br_name, err);
+ }
+}
-diff -urN a/fs/aufs/sysrq.c b/fs/aufs/sysrq.c
--- a/fs/aufs/sysrq.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/sysrq.c 2011-03-06 23:28:02.620413138 +0000
-@@ -0,0 +1,148 @@
++++ b/fs/aufs/sysrq.c 2011-06-03 06:08:42.681958206 +0100
+@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
+ *
@@ -22927,11 +22959,14 @@
+ {
+ struct inode *i;
+ printk(KERN_WARNING AUFS_NAME ": isolated inode\n");
-+ spin_lock(&inode_lock);
-+ list_for_each_entry(i, &sb->s_inodes, i_sb_list)
++ spin_lock(&inode_sb_list_lock);
++ list_for_each_entry(i, &sb->s_inodes, i_sb_list) {
++ spin_lock(&i->i_lock);
+ if (1 || list_empty(&i->i_dentry))
+ au_dpri_inode(i);
-+ spin_unlock(&inode_lock);
++ spin_unlock(&i->i_lock);
++ }
++ spin_unlock(&inode_sb_list_lock);
+ }
+#endif
+ printk(KERN_WARNING AUFS_NAME ": files\n");
@@ -22997,9 +23032,8 @@
+ if (unlikely(err))
+ pr_err("err %d (ignored)\n", err);
+}
-diff -urN a/fs/aufs/vdir.c b/fs/aufs/vdir.c
--- a/fs/aufs/vdir.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/vdir.c 2011-03-06 23:28:02.616413258 +0000
++++ b/fs/aufs/vdir.c 2011-02-12 16:30:08.948122160 +0000
@@ -0,0 +1,886 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -23887,10 +23921,9 @@
+ /* smp_mb(); */
+ return 0;
+}
-diff -urN a/fs/aufs/vfsub.c b/fs/aufs/vfsub.c
--- a/fs/aufs/vfsub.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/vfsub.c 2011-03-06 23:28:02.620413138 +0000
-@@ -0,0 +1,790 @@
++++ b/fs/aufs/vfsub.c 2011-06-03 06:08:42.681958206 +0100
+@@ -0,0 +1,818 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
+ *
@@ -23943,51 +23976,18 @@
+
+/* ---------------------------------------------------------------------- */
+
-+static int au_conv_oflags(int flags)
-+{
-+ int mask = 0;
-+
-+#ifdef CONFIG_IMA
-+ fmode_t fmode;
-+
-+ /* mask = MAY_OPEN; */
-+ fmode = OPEN_FMODE(flags);
-+ if (fmode & FMODE_READ)
-+ mask |= MAY_READ;
-+ if ((fmode & FMODE_WRITE)
-+ || (flags & O_TRUNC))
-+ mask |= MAY_WRITE;
-+ /*
-+ * if (flags & O_APPEND)
-+ * mask |= MAY_APPEND;
-+ */
-+ if (flags & vfsub_fmode_to_uint(FMODE_EXEC))
-+ mask |= MAY_EXEC;
-+
-+ AuDbg("flags 0x%x, mask 0x%x\n", flags, mask);
-+#endif
-+
-+ return mask;
-+}
-+
+struct file *vfsub_dentry_open(struct path *path, int flags)
+{
+ struct file *file;
-+ int err;
+
+ path_get(path);
+ file = dentry_open(path->dentry, path->mnt,
-+ flags /* | vfsub_fmode_to_uint(FMODE_NONOTIFY) */,
++ flags /* | __FMODE_NONOTIFY */,
+ current_cred());
-+ if (IS_ERR(file))
-+ goto out;
++ if (!IS_ERR_OR_NULL(file)
++ && (file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
++ i_readcount_inc(path->dentry->d_inode);
+
-+ err = ima_file_check(file, au_conv_oflags(flags));
-+ if (unlikely(err)) {
-+ fput(file);
-+ file = ERR_PTR(err);
-+ }
-+out:
+ return file;
+}
+
@@ -23995,9 +23995,11 @@
+{
+ struct file *file;
+
++ lockdep_off();
+ file = filp_open(path,
-+ oflags /* | vfsub_fmode_to_uint(FMODE_NONOTIFY) */,
++ oflags /* | __FMODE_NONOTIFY */,
+ mode);
++ lockdep_on();
+ if (IS_ERR(file))
+ goto out;
+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
@@ -24056,6 +24058,34 @@
+ return path.dentry;
+}
+
++/*
++ * this is "VFS:__lookup_one_len()" which was removed and merged into
++ * VFS:lookup_one_len() by the commit.
++ * 6a96ba5 2011-03-14 kill __lookup_one_len()
++ * this function should always be equivalent to the corresponding part in
++ * VFS:lookup_one_len().
++ */
++int vfsub_name_hash(const char *name, struct qstr *this, int len)
++{
++ unsigned long hash;
++ unsigned int c;
++
++ this->name = name;
++ this->len = len;
++ if (!len)
++ return -EACCES;
++
++ hash = init_name_hash();
++ while (len--) {
++ c = *(const unsigned char *)name++;
++ if (c == '/' || c == '\0')
++ return -EACCES;
++ hash = partial_name_hash(c, hash);
++ }
++ this->hash = end_name_hash(hash);
++ return 0;
++}
++
+/* ---------------------------------------------------------------------- */
+
+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
@@ -24063,7 +24093,9 @@
+{
+ struct dentry *d;
+
++ lockdep_off();
+ d = lock_rename(d1, d2);
++ lockdep_on();
+ au_hn_suspend(hdir1);
+ if (hdir1 != hdir2)
+ au_hn_suspend(hdir2);
@@ -24077,7 +24109,9 @@
+ au_hn_resume(hdir1);
+ if (hdir1 != hdir2)
+ au_hn_resume(hdir2);
++ lockdep_off();
+ unlock_rename(d1, d2);
++ lockdep_on();
+}
+
+/* ---------------------------------------------------------------------- */
@@ -24169,7 +24203,7 @@
+
+ d = path->dentry;
+ path->dentry = d->d_parent;
-+ err = security_path_mknod(path, d, mode, dev);
++ err = security_path_mknod(path, d, mode, new_encode_dev(dev));
+ path->dentry = d;
+ if (unlikely(err))
+ goto out;
@@ -24219,7 +24253,9 @@
+ if (unlikely(err))
+ goto out;
+
++ lockdep_off();
+ err = vfs_link(src_dentry, dir, path->dentry);
++ lockdep_on();
+ if (!err) {
+ struct path tmp = *path;
+ int did;
@@ -24259,7 +24295,9 @@
+ if (unlikely(err))
+ goto out;
+
++ lockdep_off();
+ err = vfs_rename(src_dir, src_dentry, dir, path->dentry);
++ lockdep_on();
+ if (!err) {
+ int did;
+
@@ -24323,7 +24361,9 @@
+ if (unlikely(err))
+ goto out;
+
++ lockdep_off();
+ err = vfs_rmdir(dir, path->dentry);
++ lockdep_on();
+ if (!err) {
+ struct path tmp = {
+ .dentry = path->dentry->d_parent,
@@ -24344,7 +24384,9 @@
+{
+ ssize_t err;
+
++ lockdep_off();
+ err = vfs_read(file, ubuf, count, ppos);
++ lockdep_on();
+ if (err >= 0)
+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
+ return err;
@@ -24374,7 +24416,9 @@
+{
+ ssize_t err;
+
++ lockdep_off();
+ err = vfs_write(file, ubuf, count, ppos);
++ lockdep_on();
+ if (err >= 0)
+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
+ return err;
@@ -24403,7 +24447,13 @@
+
+ err = 0;
+ if (file->f_op && file->f_op->flush) {
-+ err = file->f_op->flush(file, id);
++ if (!au_test_nfs(file->f_dentry->d_sb))
++ err = file->f_op->flush(file, id);
++ else {
++ lockdep_off();
++ err = file->f_op->flush(file, id);
++ lockdep_on();
++ }
+ if (!err)
+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL);
+ /*ignore*/
@@ -24415,7 +24465,9 @@
+{
+ int err;
+
++ lockdep_off();
+ err = vfs_readdir(file, filldir, arg);
++ lockdep_on();
+ if (err >= 0)
+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
+ return err;
@@ -24427,7 +24479,9 @@
+{
+ long err;
+
++ lockdep_off();
+ err = do_splice_to(in, ppos, pipe, len, flags);
++ lockdep_on();
+ file_accessed(in);
+ if (err >= 0)
+ vfsub_update_h_iattr(&in->f_path, /*did*/NULL); /*ignore*/
@@ -24439,7 +24493,9 @@
+{
+ long err;
+
++ lockdep_off();
+ err = do_splice_from(pipe, out, ppos, len, flags);
++ lockdep_on();
+ if (err >= 0)
+ vfsub_update_h_iattr(&out->f_path, /*did*/NULL); /*ignore*/
+ return err;
@@ -24471,8 +24527,11 @@
+ err = locks_verify_truncate(h_inode, h_file, length);
+ if (!err)
+ err = security_path_truncate(h_path);
-+ if (!err)
++ if (!err) {
++ lockdep_off();
+ err = do_truncate(h_path->dentry, length, attr, h_file);
++ lockdep_on();
++ }
+
+out_inode:
+ if (!h_file)
@@ -24639,7 +24698,9 @@
+ if (h_inode)
+ ihold(h_inode);
+
++ lockdep_off();
+ *a->errp = vfs_unlink(a->dir, d);
++ lockdep_on();
+ if (!*a->errp) {
+ struct path tmp = {
+ .dentry = d->d_parent,
@@ -24681,10 +24742,9 @@
+
+ return err;
+}
-diff -urN a/fs/aufs/vfsub.h b/fs/aufs/vfsub.h
--- a/fs/aufs/vfsub.h 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/vfsub.h 2011-03-06 23:28:02.620413138 +0000
-@@ -0,0 +1,226 @@
++++ b/fs/aufs/vfsub.h 2011-06-03 06:08:42.681958206 +0100
+@@ -0,0 +1,231 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
+ *
@@ -24717,8 +24777,10 @@
+#include "debug.h"
+
+/* copied from linux/fs/internal.h */
++/* todo: BAD approach!! */
+DECLARE_BRLOCK(vfsmount_lock);
+extern void file_sb_list_del(struct file *f);
++extern spinlock_t inode_sb_list_lock;
+
+/* copied from linux/fs/file_table.c */
+DECLARE_LGLOCK(files_lglock);
@@ -24794,6 +24856,7 @@
+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
+ int len);
+struct dentry *vfsub_lookup_hash(struct nameidata *nd);
++int vfsub_name_hash(const char *name, struct qstr *this, int len);
+
+/* ---------------------------------------------------------------------- */
+
@@ -24869,7 +24932,9 @@
+{
+ loff_t err;
+
++ lockdep_off();
+ err = vfs_llseek(file, offset, origin);
++ lockdep_on();
+ return err;
+}
+
@@ -24911,9 +24976,8 @@
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_VFSUB_H__ */
-diff -urN a/fs/aufs/wbr_policy.c b/fs/aufs/wbr_policy.c
--- a/fs/aufs/wbr_policy.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/wbr_policy.c 2011-03-06 23:28:02.616413258 +0000
++++ b/fs/aufs/wbr_policy.c 2011-02-12 16:30:08.948122160 +0000
@@ -0,0 +1,700 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -25615,9 +25679,8 @@
+ .fin = au_wbr_create_fin_mfs
+ }
+};
-diff -urN a/fs/aufs/whout.c b/fs/aufs/whout.c
--- a/fs/aufs/whout.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/whout.c 2011-03-06 23:28:02.620413138 +0000
++++ b/fs/aufs/whout.c 2011-03-06 23:22:01.416413001 +0000
@@ -0,0 +1,1062 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -26681,9 +26744,8 @@
+ au_whtmp_rmdir_free(args);
+ }
+}
-diff -urN a/fs/aufs/whout.h b/fs/aufs/whout.h
--- a/fs/aufs/whout.h 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/whout.h 2011-03-06 23:28:02.616413258 +0000
++++ b/fs/aufs/whout.h 2011-02-12 16:30:08.948122160 +0000
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -26774,9 +26836,8 @@
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_WHOUT_H__ */
-diff -urN a/fs/aufs/wkq.c b/fs/aufs/wkq.c
--- a/fs/aufs/wkq.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/wkq.c 2011-03-06 23:28:02.620413138 +0000
++++ b/fs/aufs/wkq.c 2011-02-12 16:30:08.952122041 +0000
@@ -0,0 +1,236 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -27014,9 +27075,8 @@
+
+ return err;
+}
-diff -urN a/fs/aufs/wkq.h b/fs/aufs/wkq.h
--- a/fs/aufs/wkq.h 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/wkq.h 2011-03-06 23:28:02.616413258 +0000
++++ b/fs/aufs/wkq.h 2011-02-12 16:30:08.952122041 +0000
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -27108,9 +27168,8 @@
+
+#endif /* __KERNEL__ */
+#endif /* __AUFS_WKQ_H__ */
-diff -urN a/fs/aufs/xino.c b/fs/aufs/xino.c
--- a/fs/aufs/xino.c 1970-01-01 01:00:00.000000000 +0100
-+++ b/fs/aufs/xino.c 2011-03-06 23:28:02.616413258 +0000
++++ b/fs/aufs/xino.c 2011-06-03 06:08:42.681958206 +0100
@@ -0,0 +1,1265 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -27283,7 +27342,7 @@
+ path.mnt = base_file->f_vfsmnt;
+ file = vfsub_dentry_open(&path,
+ O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
-+ /* | FMODE_NONOTIFY */);
++ /* | __FMODE_NONOTIFY */);
+ if (IS_ERR(file)) {
+ pr_err("%.*s open err %ld\n", AuLNPair(name), PTR_ERR(file));
+ goto out_dput;
@@ -27800,7 +27859,7 @@
+ * when a user specified the xino, we cannot get au_hdir to be ignored.
+ */
+ file = vfsub_filp_open(fname, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
-+ /* | FMODE_NONOTIFY */,
++ /* | __FMODE_NONOTIFY */,
+ S_IRUGO | S_IWUGO);
+ if (IS_ERR(file)) {
+ if (!silent)
@@ -28377,9 +28436,8 @@
+out:
+ return err;
+}
-diff -urN a/include/linux/aufs_type.h b/include/linux/aufs_type.h
--- a/include/linux/aufs_type.h 1970-01-01 01:00:00.000000000 +0100
-+++ b/include/linux/aufs_type.h 2011-03-06 23:28:02.624413046 +0000
++++ b/include/linux/aufs_type.h 2011-06-03 06:08:42.697958282 +0100
@@ -0,0 +1,206 @@
+/*
+ * Copyright (C) 2005-2011 Junjiro R. Okajima
@@ -28407,7 +28465,7 @@
+#include <linux/limits.h>
+#include <linux/types.h>
+
-+#define AUFS_VERSION "2.1-standalone.tree-38-rcN-20110228"
++#define AUFS_VERSION "2.1-standalone.tree-39-rcN-20110418"
+
+/* todo? move this to linux-2.6.19/include/magic.h */
+#define AUFS_SUPER_MAGIC ('a' << 24 | 'u' << 16 | 'f' << 8 | 's')
Modified: dists/sid/linux-2.6/debian/patches/features/all/aufs2/aufs2-base.patch
==============================================================================
--- dists/sid/linux-2.6/debian/patches/features/all/aufs2/aufs2-base.patch Fri Jun 3 04:21:35 2011 (r17587)
+++ dists/sid/linux-2.6/debian/patches/features/all/aufs2/aufs2-base.patch Fri Jun 3 05:30:22 2011 (r17588)
@@ -1,10 +1,10 @@
aufs2.1 base patch for linux-2.6.
diff --git a/fs/namei.c b/fs/namei.c
-index 0087cf9..cd39cdf 100644
+index e6cd611..27d7dc7 100644
--- a/fs/namei.c
+++ b/fs/namei.c
-@@ -1841,12 +1841,12 @@ out:
+@@ -1768,7 +1768,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
* needs parent already locked. Doesn't follow mounts.
* SMP-safe.
*/
@@ -13,17 +13,11 @@
{
return __lookup_hash(&nd->last, nd->path.dentry, nd);
}
-
--static int __lookup_one_len(const char *name, struct qstr *this,
-+int __lookup_one_len(const char *name, struct qstr *this,
- struct dentry *base, int len)
- {
- unsigned long hash;
diff --git a/fs/splice.c b/fs/splice.c
-index 50a5d97..886e942 100644
+index 50a5d978..886e942 100644
--- a/fs/splice.c
+++ b/fs/splice.c
-@@ -1081,8 +1081,8 @@ EXPORT_SYMBOL_GPL(generic_splice_sendpage);
+@@ -1081,8 +1081,8 @@ EXPORT_SYMBOL(generic_splice_sendpage);
/*
* Attempt to initiate a splice from pipe to file.
*/
@@ -48,16 +42,14 @@
ssize_t (*splice_read)(struct file *, loff_t *,
struct pipe_inode_info *, size_t, unsigned int);
diff --git a/include/linux/namei.h b/include/linux/namei.h
-index f276d4f..4eb5fcb 100644
+index eba45ea..21ed6c9 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
-@@ -79,6 +79,9 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
+@@ -82,6 +82,7 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
int (*open)(struct inode *, struct file *));
+extern struct dentry *lookup_hash(struct nameidata *nd);
-+extern int __lookup_one_len(const char *name, struct qstr *this,
-+ struct dentry *base, int len);
extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
extern int follow_down_one(struct path *);
Modified: dists/sid/linux-2.6/debian/patches/features/all/aufs2/aufs2-kbuild.patch
==============================================================================
--- dists/sid/linux-2.6/debian/patches/features/all/aufs2/aufs2-kbuild.patch Fri Jun 3 04:21:35 2011 (r17587)
+++ dists/sid/linux-2.6/debian/patches/features/all/aufs2/aufs2-kbuild.patch Fri Jun 3 05:30:22 2011 (r17588)
@@ -1,10 +1,10 @@
aufs2.1 kbuild patch for linux-2.6.
diff --git a/fs/Kconfig b/fs/Kconfig
-index 3db9caa..c9e1f11 100644
+index f3aa9b0..d0eaf6e 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
-@@ -190,6 +190,7 @@ source "fs/romfs/Kconfig"
+@@ -191,6 +191,7 @@ source "fs/pstore/Kconfig"
source "fs/sysv/Kconfig"
source "fs/ufs/Kconfig"
source "fs/exofs/Kconfig"
@@ -13,16 +13,16 @@
endif # MISC_FILESYSTEMS
diff --git a/fs/Makefile b/fs/Makefile
-index a7f7cef..95dd4d3 100644
+index fb68c2b..c031a85 100644
--- a/fs/Makefile
+++ b/fs/Makefile
-@@ -121,3 +121,4 @@ obj-$(CONFIG_BTRFS_FS) += btrfs/
- obj-$(CONFIG_GFS2_FS) += gfs2/
+@@ -124,3 +124,4 @@ obj-$(CONFIG_GFS2_FS) += gfs2/
obj-$(CONFIG_EXOFS_FS) += exofs/
obj-$(CONFIG_CEPH_FS) += ceph/
+ obj-$(CONFIG_PSTORE) += pstore/
+obj-$(CONFIG_AUFS_FS) += aufs/
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
-index b0ada6f..5cb5837 100644
+index 75cf611..f9f42b5 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -64,6 +64,7 @@ header-y += atmppp.h
Modified: dists/sid/linux-2.6/debian/patches/features/all/aufs2/aufs2-standalone.patch
==============================================================================
--- dists/sid/linux-2.6/debian/patches/features/all/aufs2/aufs2-standalone.patch Fri Jun 3 04:21:35 2011 (r17587)
+++ dists/sid/linux-2.6/debian/patches/features/all/aufs2/aufs2-standalone.patch Fri Jun 3 05:30:22 2011 (r17588)
@@ -1,10 +1,10 @@
aufs2.1 standalone patch for linux-2.6.
diff --git a/fs/file_table.c b/fs/file_table.c
-index eb36b6b..12f2809 100644
+index 01e4c1e..0e800e2 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
-@@ -393,6 +393,8 @@ void file_sb_list_del(struct file *file)
+@@ -443,6 +443,8 @@ void file_sb_list_del(struct file *file)
}
}
@@ -14,22 +14,22 @@
/*
diff --git a/fs/inode.c b/fs/inode.c
-index da85e56..b3dc5d8 100644
+index 33c963d..e31e8d5 100644
--- a/fs/inode.c
+++ b/fs/inode.c
-@@ -82,6 +82,7 @@ static struct hlist_head *inode_hashtable __read_mostly;
- * the i_state of an inode while it is in use..
- */
- DEFINE_SPINLOCK(inode_lock);
-+EXPORT_SYMBOL_GPL(inode_lock);
+@@ -112,6 +112,7 @@ static DEFINE_SPINLOCK(inode_lru_lock);
+
+ __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
+ __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock);
++EXPORT_SYMBOL_GPL(inode_sb_list_lock);
/*
- * iprune_sem provides exclusion between the kswapd or try_to_free_pages
+ * iprune_sem provides exclusion between the icache shrinking and the
diff --git a/fs/namei.c b/fs/namei.c
-index cd39cdf..db4290c 100644
+index 27d7dc7..3c72fd5 100644
--- a/fs/namei.c
+++ b/fs/namei.c
-@@ -353,6 +353,7 @@ int deny_write_access(struct file * file)
+@@ -364,6 +364,7 @@ int deny_write_access(struct file * file)
return 0;
}
@@ -37,27 +37,19 @@
/**
* path_get - get a reference to a path
-@@ -1845,6 +1846,7 @@ struct dentry *lookup_hash(struct nameidata *nd)
+@@ -1772,6 +1773,7 @@ struct dentry *lookup_hash(struct nameidata *nd)
{
return __lookup_hash(&nd->last, nd->path.dentry, nd);
}
+EXPORT_SYMBOL_GPL(lookup_hash);
- int __lookup_one_len(const char *name, struct qstr *this,
- struct dentry *base, int len)
-@@ -1867,6 +1869,7 @@ int __lookup_one_len(const char *name, struct qstr *this,
- this->hash = end_name_hash(hash);
- return 0;
- }
-+EXPORT_SYMBOL_GPL(__lookup_one_len);
-
/**
* lookup_one_len - filesystem helper to lookup single pathname component
diff --git a/fs/namespace.c b/fs/namespace.c
-index 7b0b953..b304f68 100644
+index 7dba2ed..d7570d1 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
-@@ -1465,6 +1465,7 @@ int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
+@@ -1524,6 +1524,7 @@ int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
}
return 0;
}
@@ -91,10 +83,10 @@
}
+EXPORT_SYMBOL_GPL(fsnotify_alloc_group);
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
-index 325185e..adede09 100644
+index 252ab1f..2199b9b 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
-@@ -113,6 +113,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
+@@ -112,6 +112,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
if (atomic_dec_and_test(&mark->refcnt))
mark->free_mark(mark);
}
@@ -102,7 +94,7 @@
/*
* Any time a mark is getting freed we end up here.
-@@ -190,6 +191,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
+@@ -189,6 +190,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
if (unlikely(atomic_dec_and_test(&group->num_marks)))
fsnotify_final_destroy_group(group);
}
@@ -110,7 +102,7 @@
void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask)
{
-@@ -277,6 +279,7 @@ err:
+@@ -276,6 +278,7 @@ err:
return ret;
}
@@ -118,7 +110,7 @@
/*
* clear any marks in a group in which mark->flags & flags is true
-@@ -332,6 +335,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
+@@ -331,6 +334,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
atomic_set(&mark->refcnt, 1);
mark->free_mark = free_mark;
}
@@ -127,7 +119,7 @@
static int fsnotify_mark_destroy(void *ignored)
{
diff --git a/fs/open.c b/fs/open.c
-index 5a2c6eb..f0fa5b2 100644
+index b52cf01..c1b341c 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -60,6 +60,7 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
@@ -159,10 +151,10 @@
/**
* splice_direct_to_actor - splices data directly between two non-pipes
diff --git a/security/commoncap.c b/security/commoncap.c
-index 64c2ed9..e58b5d8 100644
+index f20e984..d39acd9 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
-@@ -929,3 +929,4 @@ int cap_file_mmap(struct file *file, unsigned long reqprot,
+@@ -976,3 +976,4 @@ int cap_file_mmap(struct file *file, unsigned long reqprot,
}
return ret;
}
@@ -180,18 +172,10 @@
int devcgroup_inode_mknod(int mode, dev_t dev)
{
diff --git a/security/security.c b/security/security.c
-index 7b7308a..140afc7 100644
+index 1011423..3bb850b 100644
--- a/security/security.c
+++ b/security/security.c
-@@ -359,6 +359,7 @@ int security_path_mkdir(struct path *dir, struct dentry *dentry, int mode)
- return 0;
- return security_ops->path_mkdir(dir, dentry, mode);
- }
-+EXPORT_SYMBOL_GPL(security_path_mkdir);
-
- int security_path_rmdir(struct path *dir, struct dentry *dentry)
- {
-@@ -366,6 +367,7 @@ int security_path_rmdir(struct path *dir, struct dentry *dentry)
+@@ -373,6 +373,7 @@ int security_path_rmdir(struct path *dir, struct dentry *dentry)
return 0;
return security_ops->path_rmdir(dir, dentry);
}
@@ -199,15 +183,7 @@
int security_path_unlink(struct path *dir, struct dentry *dentry)
{
-@@ -373,6 +375,7 @@ int security_path_unlink(struct path *dir, struct dentry *dentry)
- return 0;
- return security_ops->path_unlink(dir, dentry);
- }
-+EXPORT_SYMBOL_GPL(security_path_unlink);
-
- int security_path_symlink(struct path *dir, struct dentry *dentry,
- const char *old_name)
-@@ -381,6 +384,7 @@ int security_path_symlink(struct path *dir, struct dentry *dentry,
+@@ -389,6 +390,7 @@ int security_path_symlink(struct path *dir, struct dentry *dentry,
return 0;
return security_ops->path_symlink(dir, dentry, old_name);
}
@@ -215,7 +191,7 @@
int security_path_link(struct dentry *old_dentry, struct path *new_dir,
struct dentry *new_dentry)
-@@ -389,6 +393,7 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir,
+@@ -397,6 +399,7 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir,
return 0;
return security_ops->path_link(old_dentry, new_dir, new_dentry);
}
@@ -223,15 +199,7 @@
int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
struct path *new_dir, struct dentry *new_dentry)
-@@ -399,6 +404,7 @@ int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
- return security_ops->path_rename(old_dir, old_dentry, new_dir,
- new_dentry);
- }
-+EXPORT_SYMBOL_GPL(security_path_rename);
-
- int security_path_truncate(struct path *path)
- {
-@@ -406,6 +412,7 @@ int security_path_truncate(struct path *path)
+@@ -415,6 +418,7 @@ int security_path_truncate(struct path *path)
return 0;
return security_ops->path_truncate(path);
}
@@ -239,7 +207,7 @@
int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
mode_t mode)
-@@ -414,6 +421,7 @@ int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
+@@ -423,6 +427,7 @@ int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt,
return 0;
return security_ops->path_chmod(dentry, mnt, mode);
}
@@ -247,7 +215,7 @@
int security_path_chown(struct path *path, uid_t uid, gid_t gid)
{
-@@ -421,6 +429,7 @@ int security_path_chown(struct path *path, uid_t uid, gid_t gid)
+@@ -430,6 +435,7 @@ int security_path_chown(struct path *path, uid_t uid, gid_t gid)
return 0;
return security_ops->path_chown(path, uid, gid);
}
@@ -255,7 +223,7 @@
int security_path_chroot(struct path *path)
{
-@@ -497,6 +506,7 @@ int security_inode_readlink(struct dentry *dentry)
+@@ -506,6 +512,7 @@ int security_inode_readlink(struct dentry *dentry)
return 0;
return security_ops->inode_readlink(dentry);
}
@@ -263,15 +231,15 @@
int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd)
{
-@@ -511,6 +521,7 @@ int security_inode_permission(struct inode *inode, int mask)
+@@ -520,6 +527,7 @@ int security_inode_permission(struct inode *inode, int mask)
return 0;
- return security_ops->inode_permission(inode, mask);
+ return security_ops->inode_permission(inode, mask, 0);
}
+EXPORT_SYMBOL_GPL(security_inode_permission);
int security_inode_exec_permission(struct inode *inode, unsigned int flags)
{
-@@ -619,6 +630,7 @@ int security_file_permission(struct file *file, int mask)
+@@ -628,6 +636,7 @@ int security_file_permission(struct file *file, int mask)
return fsnotify_perm(file, mask);
}
@@ -279,7 +247,7 @@
int security_file_alloc(struct file *file)
{
-@@ -646,6 +658,7 @@ int security_file_mmap(struct file *file, unsigned long reqprot,
+@@ -655,6 +664,7 @@ int security_file_mmap(struct file *file, unsigned long reqprot,
return ret;
return ima_file_mmap(file, prot);
}
Modified: dists/sid/linux-2.6/debian/patches/series/2
==============================================================================
--- dists/sid/linux-2.6/debian/patches/series/2 Fri Jun 3 04:21:35 2011 (r17587)
+++ dists/sid/linux-2.6/debian/patches/series/2 Fri Jun 3 05:30:22 2011 (r17588)
@@ -4,3 +4,9 @@
+ features/all/rt2800usb-Add-seven-new-USB-IDs.patch
+ features/all/rt2x00-Linksys-WUSB600N-rev2-is-a-RT3572-device.patch
+ bugfix/all/stable/2.6.39.1-rc1
+
++ features/all/aufs2/aufs2-base.patch
++ features/all/aufs2/aufs2-standalone.patch
++ features/all/aufs2/aufs2-kbuild.patch
++ features/all/aufs2/aufs2-add.patch
++ features/all/aufs2/mark-as-staging.patch
Modified: dists/sid/linux-2.6/debian/patches/series/base
==============================================================================
--- dists/sid/linux-2.6/debian/patches/series/base Fri Jun 3 04:21:35 2011 (r17587)
+++ dists/sid/linux-2.6/debian/patches/series/base Fri Jun 3 05:30:22 2011 (r17588)
@@ -14,8 +14,6 @@
#+ features/all/aufs2/aufs2-add.patch
# mark as staging/crap
#+ features/all/aufs2/mark-as-staging.patch
-# fix not upstream yet
-#+ features/all/aufs2/Fix-aufs-calling-of-security_path_mknod.patch
+ bugfix/ia64/hardcode-arch-script-output.patch
+ bugfix/mips/disable-advansys.patch
More information about the Kernel-svn-changes
mailing list