[kernel] r20208 - in dists/wheezy/linux/debian: . patches patches/bugfix/all

Ben Hutchings benh at alioth.debian.org
Sat Jun 8 17:12:22 UTC 2013


Author: benh
Date: Sat Jun  8 17:12:21 2013
New Revision: 20208

Log:
ext3,ext4,nfsd: dir_index: Return 64-bit readdir cookies for NFSv3 and 4 (Closes: #685407)

Added:
   dists/wheezy/linux/debian/patches/bugfix/all/ext3-return-32-64-bit-dir-name-hash-according-to-usa.patch
   dists/wheezy/linux/debian/patches/bugfix/all/ext4-return-32-64-bit-dir-name-hash-according-to-usa.patch
   dists/wheezy/linux/debian/patches/bugfix/all/fs-add-new-FMODE-flags-FMODE_32bithash-and-FMODE_64b.patch
   dists/wheezy/linux/debian/patches/bugfix/all/nfsd-rename-int-access-to-int-may_flags-in-nfsd_open.patch
   dists/wheezy/linux/debian/patches/bugfix/all/nfsd-vfs_llseek-with-32-or-64-bit-offsets-hashes.patch
Modified:
   dists/wheezy/linux/debian/changelog
   dists/wheezy/linux/debian/patches/series

Modified: dists/wheezy/linux/debian/changelog
==============================================================================
--- dists/wheezy/linux/debian/changelog	Sat Jun  8 16:40:34 2013	(r20207)
+++ dists/wheezy/linux/debian/changelog	Sat Jun  8 17:12:21 2013	(r20208)
@@ -194,6 +194,10 @@
     - maintain one seqid stream per (lockowner, file)
   * ipw2100,ipw2200: Fix order of device registration (Closes: #656813)
 
+  [ Jonathan Nieder ]
+  * ext3,ext4,nfsd: dir_index: Return 64-bit readdir cookies for NFSv3 and 4
+    (Closes: #685407)
+
  -- Ben Hutchings <ben at decadent.org.uk>  Wed, 27 Mar 2013 14:10:40 +0000
 
 linux (3.2.41-2+deb7u2) wheezy-security; urgency=high

Added: dists/wheezy/linux/debian/patches/bugfix/all/ext3-return-32-64-bit-dir-name-hash-according-to-usa.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ dists/wheezy/linux/debian/patches/bugfix/all/ext3-return-32-64-bit-dir-name-hash-according-to-usa.patch	Sat Jun  8 17:12:21 2013	(r20208)
@@ -0,0 +1,348 @@
+From: Eric Sandeen <sandeen at redhat.com>
+Date: Thu, 26 Apr 2012 13:10:39 -0500
+Subject: ext3: return 32/64-bit dir name hash according to usage type
+
+commit d7dab39b6e16d5eea78ed3c705d2a2d0772b4f06 upstream.
+
+This is based on commit d1f5273e9adb40724a85272f248f210dc4ce919a
+ext4: return 32/64-bit dir name hash according to usage type
+by Fan Yong <yong.fan at whamcloud.com>
+
+Traditionally ext2/3/4 has returned a 32-bit hash value from llseek()
+to appease NFSv2, which can only handle a 32-bit cookie for seekdir()
+and telldir().  However, this causes problems if there are 32-bit hash
+collisions, since the NFSv2 server can get stuck resending the same
+entries from the directory repeatedly.
+
+Allow ext3 to return a full 64-bit hash (both major and minor) for
+telldir to decrease the chance of hash collisions.
+
+This patch does implement a new ext3_dir_llseek op, because with 64-bit
+hashes, nfs will attempt to seek to a hash "offset" which is much
+larger than ext3's s_maxbytes.  So for dx dirs, we call
+generic_file_llseek_size() with the appropriate max hash value as the
+maximum seekable size.  Otherwise we just pass through to
+generic_file_llseek().
+
+Patch-updated-by: Bernd Schubert <bernd.schubert at itwm.fraunhofer.de>
+Patch-updated-by: Eric Sandeen <sandeen at redhat.com>
+(blame us if something is not correct)
+
+Signed-off-by: Eric Sandeen <sandeen at redhat.com>
+Signed-off-by: Jan Kara <jack at suse.cz>
+Signed-off-by: Jonathan Nieder <jrnieder at gmail.com>
+---
+ fs/ext3/dir.c           |  167 ++++++++++++++++++++++++++++++++++-------------
+ fs/ext3/hash.c          |    4 +-
+ include/linux/ext3_fs.h |    6 +-
+ 3 files changed, 129 insertions(+), 48 deletions(-)
+
+diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
+index 34f0a07..3268697 100644
+--- a/fs/ext3/dir.c
++++ b/fs/ext3/dir.c
+@@ -25,6 +25,7 @@
+ #include <linux/jbd.h>
+ #include <linux/ext3_fs.h>
+ #include <linux/buffer_head.h>
++#include <linux/compat.h>
+ #include <linux/slab.h>
+ #include <linux/rbtree.h>
+ 
+@@ -32,24 +33,8 @@ static unsigned char ext3_filetype_table[] = {
+ 	DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
+ };
+ 
+-static int ext3_readdir(struct file *, void *, filldir_t);
+ static int ext3_dx_readdir(struct file * filp,
+ 			   void * dirent, filldir_t filldir);
+-static int ext3_release_dir (struct inode * inode,
+-				struct file * filp);
+-
+-const struct file_operations ext3_dir_operations = {
+-	.llseek		= generic_file_llseek,
+-	.read		= generic_read_dir,
+-	.readdir	= ext3_readdir,		/* we take BKL. needed?*/
+-	.unlocked_ioctl	= ext3_ioctl,
+-#ifdef CONFIG_COMPAT
+-	.compat_ioctl	= ext3_compat_ioctl,
+-#endif
+-	.fsync		= ext3_sync_file,	/* BKL held */
+-	.release	= ext3_release_dir,
+-};
+-
+ 
+ static unsigned char get_dtype(struct super_block *sb, int filetype)
+ {
+@@ -60,6 +45,25 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
+ 	return (ext3_filetype_table[filetype]);
+ }
+ 
++/**
++ * Check if the given dir-inode refers to an htree-indexed directory
++ * (or a directory which chould potentially get coverted to use htree
++ * indexing).
++ *
++ * Return 1 if it is a dx dir, 0 if not
++ */
++static int is_dx_dir(struct inode *inode)
++{
++	struct super_block *sb = inode->i_sb;
++
++	if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
++		     EXT3_FEATURE_COMPAT_DIR_INDEX) &&
++	    ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) ||
++	     ((inode->i_size >> sb->s_blocksize_bits) == 1)))
++		return 1;
++
++	return 0;
++}
+ 
+ int ext3_check_dir_entry (const char * function, struct inode * dir,
+ 			  struct ext3_dir_entry_2 * de,
+@@ -99,18 +103,13 @@ static int ext3_readdir(struct file * filp,
+ 	unsigned long offset;
+ 	int i, stored;
+ 	struct ext3_dir_entry_2 *de;
+-	struct super_block *sb;
+ 	int err;
+ 	struct inode *inode = filp->f_path.dentry->d_inode;
++	struct super_block *sb = inode->i_sb;
+ 	int ret = 0;
+ 	int dir_has_error = 0;
+ 
+-	sb = inode->i_sb;
+-
+-	if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
+-				    EXT3_FEATURE_COMPAT_DIR_INDEX) &&
+-	    ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) ||
+-	     ((inode->i_size >> sb->s_blocksize_bits) == 1))) {
++	if (is_dx_dir(inode)) {
+ 		err = ext3_dx_readdir(filp, dirent, filldir);
+ 		if (err != ERR_BAD_DX_DIR) {
+ 			ret = err;
+@@ -232,22 +231,87 @@ out:
+ 	return ret;
+ }
+ 
++static inline int is_32bit_api(void)
++{
++#ifdef CONFIG_COMPAT
++	return is_compat_task();
++#else
++	return (BITS_PER_LONG == 32);
++#endif
++}
++
+ /*
+  * These functions convert from the major/minor hash to an f_pos
+- * value.
++ * value for dx directories
+  *
+- * Currently we only use major hash numer.  This is unfortunate, but
+- * on 32-bit machines, the same VFS interface is used for lseek and
+- * llseek, so if we use the 64 bit offset, then the 32-bit versions of
+- * lseek/telldir/seekdir will blow out spectacularly, and from within
+- * the ext2 low-level routine, we don't know if we're being called by
+- * a 64-bit version of the system call or the 32-bit version of the
+- * system call.  Worse yet, NFSv2 only allows for a 32-bit readdir
+- * cookie.  Sigh.
++ * Upper layer (for example NFS) should specify FMODE_32BITHASH or
++ * FMODE_64BITHASH explicitly. On the other hand, we allow ext3 to be mounted
++ * directly on both 32-bit and 64-bit nodes, under such case, neither
++ * FMODE_32BITHASH nor FMODE_64BITHASH is specified.
+  */
+-#define hash2pos(major, minor)	(major >> 1)
+-#define pos2maj_hash(pos)	((pos << 1) & 0xffffffff)
+-#define pos2min_hash(pos)	(0)
++static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor)
++{
++	if ((filp->f_mode & FMODE_32BITHASH) ||
++	    (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
++		return major >> 1;
++	else
++		return ((__u64)(major >> 1) << 32) | (__u64)minor;
++}
++
++static inline __u32 pos2maj_hash(struct file *filp, loff_t pos)
++{
++	if ((filp->f_mode & FMODE_32BITHASH) ||
++	    (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
++		return (pos << 1) & 0xffffffff;
++	else
++		return ((pos >> 32) << 1) & 0xffffffff;
++}
++
++static inline __u32 pos2min_hash(struct file *filp, loff_t pos)
++{
++	if ((filp->f_mode & FMODE_32BITHASH) ||
++	    (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
++		return 0;
++	else
++		return pos & 0xffffffff;
++}
++
++/*
++ * Return 32- or 64-bit end-of-file for dx directories
++ */
++static inline loff_t ext3_get_htree_eof(struct file *filp)
++{
++	if ((filp->f_mode & FMODE_32BITHASH) ||
++	    (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
++		return EXT3_HTREE_EOF_32BIT;
++	else
++		return EXT3_HTREE_EOF_64BIT;
++}
++
++
++/*
++ * ext3_dir_llseek() calls generic_file_llseek[_size]() to handle both
++ * non-htree and htree directories, where the "offset" is in terms
++ * of the filename hash value instead of the byte offset.
++ *
++ * Because we may return a 64-bit hash that is well beyond s_maxbytes,
++ * we need to pass the max hash as the maximum allowable offset in
++ * the htree directory case.
++ *
++ * NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX)
++ *       will be invalid once the directory was converted into a dx directory
++ */
++loff_t ext3_dir_llseek(struct file *file, loff_t offset, int origin)
++{
++	struct inode *inode = file->f_mapping->host;
++	int dx_dir = is_dx_dir(inode);
++
++	if (likely(dx_dir))
++		return generic_file_llseek_size(file, offset, origin,
++					        ext3_get_htree_eof(file));
++	else
++		return generic_file_llseek(file, offset, origin);
++}
+ 
+ /*
+  * This structure holds the nodes of the red-black tree used to store
+@@ -308,15 +372,16 @@ static void free_rb_tree_fname(struct rb_root *root)
+ }
+ 
+ 
+-static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos)
++static struct dir_private_info *ext3_htree_create_dir_info(struct file *filp,
++							   loff_t pos)
+ {
+ 	struct dir_private_info *p;
+ 
+ 	p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
+ 	if (!p)
+ 		return NULL;
+-	p->curr_hash = pos2maj_hash(pos);
+-	p->curr_minor_hash = pos2min_hash(pos);
++	p->curr_hash = pos2maj_hash(filp, pos);
++	p->curr_minor_hash = pos2min_hash(filp, pos);
+ 	return p;
+ }
+ 
+@@ -406,7 +471,7 @@ static int call_filldir(struct file * filp, void * dirent,
+ 		printk("call_filldir: called with null fname?!?\n");
+ 		return 0;
+ 	}
+-	curr_pos = hash2pos(fname->hash, fname->minor_hash);
++	curr_pos = hash2pos(filp, fname->hash, fname->minor_hash);
+ 	while (fname) {
+ 		error = filldir(dirent, fname->name,
+ 				fname->name_len, curr_pos,
+@@ -431,13 +496,13 @@ static int ext3_dx_readdir(struct file * filp,
+ 	int	ret;
+ 
+ 	if (!info) {
+-		info = ext3_htree_create_dir_info(filp->f_pos);
++		info = ext3_htree_create_dir_info(filp, filp->f_pos);
+ 		if (!info)
+ 			return -ENOMEM;
+ 		filp->private_data = info;
+ 	}
+ 
+-	if (filp->f_pos == EXT3_HTREE_EOF)
++	if (filp->f_pos == ext3_get_htree_eof(filp))
+ 		return 0;	/* EOF */
+ 
+ 	/* Some one has messed with f_pos; reset the world */
+@@ -445,8 +510,8 @@ static int ext3_dx_readdir(struct file * filp,
+ 		free_rb_tree_fname(&info->root);
+ 		info->curr_node = NULL;
+ 		info->extra_fname = NULL;
+-		info->curr_hash = pos2maj_hash(filp->f_pos);
+-		info->curr_minor_hash = pos2min_hash(filp->f_pos);
++		info->curr_hash = pos2maj_hash(filp, filp->f_pos);
++		info->curr_minor_hash = pos2min_hash(filp, filp->f_pos);
+ 	}
+ 
+ 	/*
+@@ -478,7 +543,7 @@ static int ext3_dx_readdir(struct file * filp,
+ 			if (ret < 0)
+ 				return ret;
+ 			if (ret == 0) {
+-				filp->f_pos = EXT3_HTREE_EOF;
++				filp->f_pos = ext3_get_htree_eof(filp);
+ 				break;
+ 			}
+ 			info->curr_node = rb_first(&info->root);
+@@ -498,7 +563,7 @@ static int ext3_dx_readdir(struct file * filp,
+ 			info->curr_minor_hash = fname->minor_hash;
+ 		} else {
+ 			if (info->next_hash == ~0) {
+-				filp->f_pos = EXT3_HTREE_EOF;
++				filp->f_pos = ext3_get_htree_eof(filp);
+ 				break;
+ 			}
+ 			info->curr_hash = info->next_hash;
+@@ -517,3 +582,15 @@ static int ext3_release_dir (struct inode * inode, struct file * filp)
+ 
+ 	return 0;
+ }
++
++const struct file_operations ext3_dir_operations = {
++	.llseek		= ext3_dir_llseek,
++	.read		= generic_read_dir,
++	.readdir	= ext3_readdir,
++	.unlocked_ioctl = ext3_ioctl,
++#ifdef CONFIG_COMPAT
++	.compat_ioctl	= ext3_compat_ioctl,
++#endif
++	.fsync		= ext3_sync_file,
++	.release	= ext3_release_dir,
++};
+diff --git a/fs/ext3/hash.c b/fs/ext3/hash.c
+index 7d215b4..d4d3ade 100644
+--- a/fs/ext3/hash.c
++++ b/fs/ext3/hash.c
+@@ -200,8 +200,8 @@ int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
+ 		return -1;
+ 	}
+ 	hash = hash & ~1;
+-	if (hash == (EXT3_HTREE_EOF << 1))
+-		hash = (EXT3_HTREE_EOF-1) << 1;
++	if (hash == (EXT3_HTREE_EOF_32BIT << 1))
++		hash = (EXT3_HTREE_EOF_32BIT - 1) << 1;
+ 	hinfo->hash = hash;
+ 	hinfo->minor_hash = minor_hash;
+ 	return 0;
+diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
+index dec9911..d59ab12 100644
+--- a/include/linux/ext3_fs.h
++++ b/include/linux/ext3_fs.h
+@@ -781,7 +781,11 @@ struct dx_hash_info
+ 	u32		*seed;
+ };
+ 
+-#define EXT3_HTREE_EOF	0x7fffffff
++
++/* 32 and 64 bit signed EOF for dx directories */
++#define EXT3_HTREE_EOF_32BIT   ((1UL  << (32 - 1)) - 1)
++#define EXT3_HTREE_EOF_64BIT   ((1ULL << (64 - 1)) - 1)
++
+ 
+ /*
+  * Control parameters used by ext3_htree_next_block
+-- 
+1.7.10.4
+

Added: dists/wheezy/linux/debian/patches/bugfix/all/ext4-return-32-64-bit-dir-name-hash-according-to-usa.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ dists/wheezy/linux/debian/patches/bugfix/all/ext4-return-32-64-bit-dir-name-hash-according-to-usa.patch	Sat Jun  8 17:12:21 2013	(r20208)
@@ -0,0 +1,379 @@
+From: Fan Yong <yong.fan at whamcloud.com>
+Date: Sun, 18 Mar 2012 22:44:40 -0400
+Subject: ext4: return 32/64-bit dir name hash according to usage type
+
+commit d1f5273e9adb40724a85272f248f210dc4ce919a upstream.
+
+Traditionally ext2/3/4 has returned a 32-bit hash value from llseek()
+to appease NFSv2, which can only handle a 32-bit cookie for seekdir()
+and telldir().  However, this causes problems if there are 32-bit hash
+collisions, since the NFSv2 server can get stuck resending the same
+entries from the directory repeatedly.
+
+Allow ext4 to return a full 64-bit hash (both major and minor) for
+telldir to decrease the chance of hash collisions.  This still needs
+integration on the NFS side.
+
+Patch-updated-by: Bernd Schubert <bernd.schubert at itwm.fraunhofer.de>
+(blame me if something is not correct)
+
+Signed-off-by: Fan Yong <yong.fan at whamcloud.com>
+Signed-off-by: Andreas Dilger <adilger at whamcloud.com>
+Signed-off-by: Bernd Schubert <bernd.schubert at itwm.fraunhofer.de>
+Signed-off-by: "Theodore Ts'o" <tytso at mit.edu>
+Signed-off-by: Jonathan Nieder <jrnieder at gmail.com>
+---
+ fs/ext4/dir.c  |  214 ++++++++++++++++++++++++++++++++++++++++++++------------
+ fs/ext4/ext4.h |    6 +-
+ fs/ext4/hash.c |    4 +-
+ 3 files changed, 176 insertions(+), 48 deletions(-)
+
+diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
+index 164c560..689d1b1 100644
+--- a/fs/ext4/dir.c
++++ b/fs/ext4/dir.c
+@@ -32,24 +32,8 @@ static unsigned char ext4_filetype_table[] = {
+ 	DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
+ };
+ 
+-static int ext4_readdir(struct file *, void *, filldir_t);
+ static int ext4_dx_readdir(struct file *filp,
+ 			   void *dirent, filldir_t filldir);
+-static int ext4_release_dir(struct inode *inode,
+-				struct file *filp);
+-
+-const struct file_operations ext4_dir_operations = {
+-	.llseek		= ext4_llseek,
+-	.read		= generic_read_dir,
+-	.readdir	= ext4_readdir,		/* we take BKL. needed?*/
+-	.unlocked_ioctl = ext4_ioctl,
+-#ifdef CONFIG_COMPAT
+-	.compat_ioctl	= ext4_compat_ioctl,
+-#endif
+-	.fsync		= ext4_sync_file,
+-	.release	= ext4_release_dir,
+-};
+-
+ 
+ static unsigned char get_dtype(struct super_block *sb, int filetype)
+ {
+@@ -60,6 +44,26 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
+ 	return (ext4_filetype_table[filetype]);
+ }
+ 
++/**
++ * Check if the given dir-inode refers to an htree-indexed directory
++ * (or a directory which chould potentially get coverted to use htree
++ * indexing).
++ *
++ * Return 1 if it is a dx dir, 0 if not
++ */
++static int is_dx_dir(struct inode *inode)
++{
++	struct super_block *sb = inode->i_sb;
++
++	if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
++		     EXT4_FEATURE_COMPAT_DIR_INDEX) &&
++	    ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) ||
++	     ((inode->i_size >> sb->s_blocksize_bits) == 1)))
++		return 1;
++
++	return 0;
++}
++
+ /*
+  * Return 0 if the directory entry is OK, and 1 if there is a problem
+  *
+@@ -115,18 +119,13 @@ static int ext4_readdir(struct file *filp,
+ 	unsigned int offset;
+ 	int i, stored;
+ 	struct ext4_dir_entry_2 *de;
+-	struct super_block *sb;
+ 	int err;
+ 	struct inode *inode = filp->f_path.dentry->d_inode;
++	struct super_block *sb = inode->i_sb;
+ 	int ret = 0;
+ 	int dir_has_error = 0;
+ 
+-	sb = inode->i_sb;
+-
+-	if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
+-				    EXT4_FEATURE_COMPAT_DIR_INDEX) &&
+-	    ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) ||
+-	     ((inode->i_size >> sb->s_blocksize_bits) == 1))) {
++	if (is_dx_dir(inode)) {
+ 		err = ext4_dx_readdir(filp, dirent, filldir);
+ 		if (err != ERR_BAD_DX_DIR) {
+ 			ret = err;
+@@ -254,22 +253,134 @@ out:
+ 	return ret;
+ }
+ 
++static inline int is_32bit_api(void)
++{
++#ifdef CONFIG_COMPAT
++	return is_compat_task();
++#else
++	return (BITS_PER_LONG == 32);
++#endif
++}
++
+ /*
+  * These functions convert from the major/minor hash to an f_pos
+- * value.
++ * value for dx directories
+  *
+- * Currently we only use major hash numer.  This is unfortunate, but
+- * on 32-bit machines, the same VFS interface is used for lseek and
+- * llseek, so if we use the 64 bit offset, then the 32-bit versions of
+- * lseek/telldir/seekdir will blow out spectacularly, and from within
+- * the ext2 low-level routine, we don't know if we're being called by
+- * a 64-bit version of the system call or the 32-bit version of the
+- * system call.  Worse yet, NFSv2 only allows for a 32-bit readdir
+- * cookie.  Sigh.
++ * Upper layer (for example NFS) should specify FMODE_32BITHASH or
++ * FMODE_64BITHASH explicitly. On the other hand, we allow ext4 to be mounted
++ * directly on both 32-bit and 64-bit nodes, under such case, neither
++ * FMODE_32BITHASH nor FMODE_64BITHASH is specified.
+  */
+-#define hash2pos(major, minor)	(major >> 1)
+-#define pos2maj_hash(pos)	((pos << 1) & 0xffffffff)
+-#define pos2min_hash(pos)	(0)
++static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor)
++{
++	if ((filp->f_mode & FMODE_32BITHASH) ||
++	    (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
++		return major >> 1;
++	else
++		return ((__u64)(major >> 1) << 32) | (__u64)minor;
++}
++
++static inline __u32 pos2maj_hash(struct file *filp, loff_t pos)
++{
++	if ((filp->f_mode & FMODE_32BITHASH) ||
++	    (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
++		return (pos << 1) & 0xffffffff;
++	else
++		return ((pos >> 32) << 1) & 0xffffffff;
++}
++
++static inline __u32 pos2min_hash(struct file *filp, loff_t pos)
++{
++	if ((filp->f_mode & FMODE_32BITHASH) ||
++	    (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
++		return 0;
++	else
++		return pos & 0xffffffff;
++}
++
++/*
++ * Return 32- or 64-bit end-of-file for dx directories
++ */
++static inline loff_t ext4_get_htree_eof(struct file *filp)
++{
++	if ((filp->f_mode & FMODE_32BITHASH) ||
++	    (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
++		return EXT4_HTREE_EOF_32BIT;
++	else
++		return EXT4_HTREE_EOF_64BIT;
++}
++
++
++/*
++ * ext4_dir_llseek() based on generic_file_llseek() to handle both
++ * non-htree and htree directories, where the "offset" is in terms
++ * of the filename hash value instead of the byte offset.
++ *
++ * NOTE: offsets obtained *before* ext4_set_inode_flag(dir, EXT4_INODE_INDEX)
++ *       will be invalid once the directory was converted into a dx directory
++ */
++loff_t ext4_dir_llseek(struct file *file, loff_t offset, int origin)
++{
++	struct inode *inode = file->f_mapping->host;
++	loff_t ret = -EINVAL;
++	int dx_dir = is_dx_dir(inode);
++
++	mutex_lock(&inode->i_mutex);
++
++	/* NOTE: relative offsets with dx directories might not work
++	 *       as expected, as it is difficult to figure out the
++	 *       correct offset between dx hashes */
++
++	switch (origin) {
++	case SEEK_END:
++		if (unlikely(offset > 0))
++			goto out_err; /* not supported for directories */
++
++		/* so only negative offsets are left, does that have a
++		 * meaning for directories at all? */
++		if (dx_dir)
++			offset += ext4_get_htree_eof(file);
++		else
++			offset += inode->i_size;
++		break;
++	case SEEK_CUR:
++		/*
++		 * Here we special-case the lseek(fd, 0, SEEK_CUR)
++		 * position-querying operation.  Avoid rewriting the "same"
++		 * f_pos value back to the file because a concurrent read(),
++		 * write() or lseek() might have altered it
++		 */
++		if (offset == 0) {
++			offset = file->f_pos;
++			goto out_ok;
++		}
++
++		offset += file->f_pos;
++		break;
++	}
++
++	if (unlikely(offset < 0))
++		goto out_err;
++
++	if (!dx_dir) {
++		if (offset > inode->i_sb->s_maxbytes)
++			goto out_err;
++	} else if (offset > ext4_get_htree_eof(file))
++		goto out_err;
++
++	/* Special lock needed here? */
++	if (offset != file->f_pos) {
++		file->f_pos = offset;
++		file->f_version = 0;
++	}
++
++out_ok:
++	ret = offset;
++out_err:
++	mutex_unlock(&inode->i_mutex);
++
++	return ret;
++}
+ 
+ /*
+  * This structure holds the nodes of the red-black tree used to store
+@@ -330,15 +441,16 @@ static void free_rb_tree_fname(struct rb_root *root)
+ }
+ 
+ 
+-static struct dir_private_info *ext4_htree_create_dir_info(loff_t pos)
++static struct dir_private_info *ext4_htree_create_dir_info(struct file *filp,
++							   loff_t pos)
+ {
+ 	struct dir_private_info *p;
+ 
+ 	p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
+ 	if (!p)
+ 		return NULL;
+-	p->curr_hash = pos2maj_hash(pos);
+-	p->curr_minor_hash = pos2min_hash(pos);
++	p->curr_hash = pos2maj_hash(filp, pos);
++	p->curr_minor_hash = pos2min_hash(filp, pos);
+ 	return p;
+ }
+ 
+@@ -429,7 +541,7 @@ static int call_filldir(struct file *filp, void *dirent,
+ 		       "null fname?!?\n");
+ 		return 0;
+ 	}
+-	curr_pos = hash2pos(fname->hash, fname->minor_hash);
++	curr_pos = hash2pos(filp, fname->hash, fname->minor_hash);
+ 	while (fname) {
+ 		error = filldir(dirent, fname->name,
+ 				fname->name_len, curr_pos,
+@@ -454,13 +566,13 @@ static int ext4_dx_readdir(struct file *filp,
+ 	int	ret;
+ 
+ 	if (!info) {
+-		info = ext4_htree_create_dir_info(filp->f_pos);
++		info = ext4_htree_create_dir_info(filp, filp->f_pos);
+ 		if (!info)
+ 			return -ENOMEM;
+ 		filp->private_data = info;
+ 	}
+ 
+-	if (filp->f_pos == EXT4_HTREE_EOF)
++	if (filp->f_pos == ext4_get_htree_eof(filp))
+ 		return 0;	/* EOF */
+ 
+ 	/* Some one has messed with f_pos; reset the world */
+@@ -468,8 +580,8 @@ static int ext4_dx_readdir(struct file *filp,
+ 		free_rb_tree_fname(&info->root);
+ 		info->curr_node = NULL;
+ 		info->extra_fname = NULL;
+-		info->curr_hash = pos2maj_hash(filp->f_pos);
+-		info->curr_minor_hash = pos2min_hash(filp->f_pos);
++		info->curr_hash = pos2maj_hash(filp, filp->f_pos);
++		info->curr_minor_hash = pos2min_hash(filp, filp->f_pos);
+ 	}
+ 
+ 	/*
+@@ -501,7 +613,7 @@ static int ext4_dx_readdir(struct file *filp,
+ 			if (ret < 0)
+ 				return ret;
+ 			if (ret == 0) {
+-				filp->f_pos = EXT4_HTREE_EOF;
++				filp->f_pos = ext4_get_htree_eof(filp);
+ 				break;
+ 			}
+ 			info->curr_node = rb_first(&info->root);
+@@ -521,7 +633,7 @@ static int ext4_dx_readdir(struct file *filp,
+ 			info->curr_minor_hash = fname->minor_hash;
+ 		} else {
+ 			if (info->next_hash == ~0) {
+-				filp->f_pos = EXT4_HTREE_EOF;
++				filp->f_pos = ext4_get_htree_eof(filp);
+ 				break;
+ 			}
+ 			info->curr_hash = info->next_hash;
+@@ -540,3 +652,15 @@ static int ext4_release_dir(struct inode *inode, struct file *filp)
+ 
+ 	return 0;
+ }
++
++const struct file_operations ext4_dir_operations = {
++	.llseek		= ext4_dir_llseek,
++	.read		= generic_read_dir,
++	.readdir	= ext4_readdir,
++	.unlocked_ioctl = ext4_ioctl,
++#ifdef CONFIG_COMPAT
++	.compat_ioctl	= ext4_compat_ioctl,
++#endif
++	.fsync		= ext4_sync_file,
++	.release	= ext4_release_dir,
++};
+diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
+index 8cb184c..2ac1eef 100644
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -1597,7 +1597,11 @@ struct dx_hash_info
+ 	u32		*seed;
+ };
+ 
+-#define EXT4_HTREE_EOF	0x7fffffff
++
++/* 32 and 64 bit signed EOF for dx directories */
++#define EXT4_HTREE_EOF_32BIT   ((1UL  << (32 - 1)) - 1)
++#define EXT4_HTREE_EOF_64BIT   ((1ULL << (64 - 1)) - 1)
++
+ 
+ /*
+  * Control parameters used by ext4_htree_next_block
+diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
+index ac8f168..fa8e491 100644
+--- a/fs/ext4/hash.c
++++ b/fs/ext4/hash.c
+@@ -200,8 +200,8 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
+ 		return -1;
+ 	}
+ 	hash = hash & ~1;
+-	if (hash == (EXT4_HTREE_EOF << 1))
+-		hash = (EXT4_HTREE_EOF-1) << 1;
++	if (hash == (EXT4_HTREE_EOF_32BIT << 1))
++		hash = (EXT4_HTREE_EOF_32BIT - 1) << 1;
+ 	hinfo->hash = hash;
+ 	hinfo->minor_hash = minor_hash;
+ 	return 0;
+-- 
+1.7.10.4
+

Added: dists/wheezy/linux/debian/patches/bugfix/all/fs-add-new-FMODE-flags-FMODE_32bithash-and-FMODE_64b.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ dists/wheezy/linux/debian/patches/bugfix/all/fs-add-new-FMODE-flags-FMODE_32bithash-and-FMODE_64b.patch	Sat Jun  8 17:12:21 2013	(r20208)
@@ -0,0 +1,34 @@
+From: Bernd Schubert <bernd.schubert at itwm.fraunhofer.de>
+Date: Tue, 13 Mar 2012 22:51:38 -0400
+Subject: fs: add new FMODE flags: FMODE_32bithash and FMODE_64bithash
+
+commit 6a8a13e03861c0ab83ab07d573ca793cff0e5d00 upstream.
+
+Those flags are supposed to be set by NFS readdir() to tell ext3/ext4
+to 32bit (NFSv2) or 64bit hash values (offsets) in seekdir().
+
+Signed-off-by: Bernd Schubert <bernd.schubert at itwm.fraunhofer.de>
+Signed-off-by: "Theodore Ts'o" <tytso at mit.edu>
+Signed-off-by: Jonathan Nieder <jrnieder at gmail.com>
+---
+ include/linux/fs.h |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/include/linux/fs.h b/include/linux/fs.h
+index 29b6353..fb7ce74 100644
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -92,6 +92,10 @@ struct inodes_stat_t {
+ /* File is opened using open(.., 3, ..) and is writeable only for ioctls
+    (specialy hack for floppy.c) */
+ #define FMODE_WRITE_IOCTL	((__force fmode_t)0x100)
++/* 32bit hashes as llseek() offset (for directories) */
++#define FMODE_32BITHASH         ((__force fmode_t)0x200)
++/* 64bit hashes as llseek() offset (for directories) */
++#define FMODE_64BITHASH         ((__force fmode_t)0x400)
+ 
+ /*
+  * Don't update ctime and mtime.
+-- 
+1.7.10.4
+

Added: dists/wheezy/linux/debian/patches/bugfix/all/nfsd-rename-int-access-to-int-may_flags-in-nfsd_open.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ dists/wheezy/linux/debian/patches/bugfix/all/nfsd-rename-int-access-to-int-may_flags-in-nfsd_open.patch	Sat Jun  8 17:12:21 2013	(r20208)
@@ -0,0 +1,84 @@
+From: Bernd Schubert <bernd.schubert at itwm.fraunhofer.de>
+Date: Sun, 18 Mar 2012 22:44:49 -0400
+Subject: nfsd: rename 'int access' to 'int may_flags' in nfsd_open()
+
+commit 999448a8c0202d8c41711c92385323520644527b upstream.
+
+Just rename this variable, as the next patch will add a flag and
+'access' as variable name would not be correct any more.
+
+Signed-off-by: Bernd Schubert <bernd.schubert at itwm.fraunhofer.de>
+Signed-off-by: "Theodore Ts'o" <tytso at mit.edu>
+Acked-by: J. Bruce Fields <bfields at redhat.com>
+Signed-off-by: Jonathan Nieder <jrnieder at gmail.com>
+---
+ fs/nfsd/vfs.c |   18 ++++++++++--------
+ 1 file changed, 10 insertions(+), 8 deletions(-)
+
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 5c3cd82..b395c61 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -726,12 +726,13 @@ static int nfsd_open_break_lease(struct inode *inode, int access)
+ 
+ /*
+  * Open an existing file or directory.
+- * The access argument indicates the type of open (read/write/lock)
++ * The may_flags argument indicates the type of open (read/write/lock)
++ * and additional flags.
+  * N.B. After this call fhp needs an fh_put
+  */
+ __be32
+ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
+-			int access, struct file **filp)
++			int may_flags, struct file **filp)
+ {
+ 	struct dentry	*dentry;
+ 	struct inode	*inode;
+@@ -746,7 +747,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
+ 	 * and (hopefully) checked permission - so allow OWNER_OVERRIDE
+ 	 * in case a chmod has now revoked permission.
+ 	 */
+-	err = fh_verify(rqstp, fhp, type, access | NFSD_MAY_OWNER_OVERRIDE);
++	err = fh_verify(rqstp, fhp, type, may_flags | NFSD_MAY_OWNER_OVERRIDE);
+ 	if (err)
+ 		goto out;
+ 
+@@ -757,7 +758,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
+ 	 * or any access when mandatory locking enabled
+ 	 */
+ 	err = nfserr_perm;
+-	if (IS_APPEND(inode) && (access & NFSD_MAY_WRITE))
++	if (IS_APPEND(inode) && (may_flags & NFSD_MAY_WRITE))
+ 		goto out;
+ 	/*
+ 	 * We must ignore files (but only files) which might have mandatory
+@@ -770,12 +771,12 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
+ 	if (!inode->i_fop)
+ 		goto out;
+ 
+-	host_err = nfsd_open_break_lease(inode, access);
++	host_err = nfsd_open_break_lease(inode, may_flags);
+ 	if (host_err) /* NOMEM or WOULDBLOCK */
+ 		goto out_nfserr;
+ 
+-	if (access & NFSD_MAY_WRITE) {
+-		if (access & NFSD_MAY_READ)
++	if (may_flags & NFSD_MAY_WRITE) {
++		if (may_flags & NFSD_MAY_READ)
+ 			flags = O_RDWR|O_LARGEFILE;
+ 		else
+ 			flags = O_WRONLY|O_LARGEFILE;
+@@ -785,7 +786,8 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
+ 	if (IS_ERR(*filp))
+ 		host_err = PTR_ERR(*filp);
+ 	else
+-		host_err = ima_file_check(*filp, access);
++		host_err = ima_file_check(*filp, may_flags);
++
+ out_nfserr:
+ 	err = nfserrno(host_err);
+ out:
+-- 
+1.7.10.4
+

Added: dists/wheezy/linux/debian/patches/bugfix/all/nfsd-vfs_llseek-with-32-or-64-bit-offsets-hashes.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ dists/wheezy/linux/debian/patches/bugfix/all/nfsd-vfs_llseek-with-32-or-64-bit-offsets-hashes.patch	Sat Jun  8 17:12:21 2013	(r20208)
@@ -0,0 +1,77 @@
+From: Bernd Schubert <bernd.schubert at itwm.fraunhofer.de>
+Date: Sun, 18 Mar 2012 22:44:50 -0400
+Subject: nfsd: vfs_llseek() with 32 or 64 bit offsets (hashes)
+
+commit 06effdbb49af5f6c7d20affaec74603914acc768 upstream.
+
+Use 32-bit or 64-bit llseek() hashes for directory offsets depending on
+the NFS version. NFSv2 gets 32-bit hashes only.
+
+NOTE: This patch got rather complex as Christoph asked to set the
+filp->f_mode flag in the open call or immediatly after dentry_open()
+in nfsd_open() to avoid races.
+Personally I still do not see a reason for that and in my opinion
+FMODE_32BITHASH/FMODE_64BITHASH flags could be set nfsd_readdir(), as it
+follows directly after nfsd_open() without a chance of races.
+
+Signed-off-by: Bernd Schubert <bernd.schubert at itwm.fraunhofer.de>
+Signed-off-by: "Theodore Ts'o" <tytso at mit.edu>
+Acked-by: J. Bruce Fields <bfields at redhat.com>
+Signed-off-by: Jonathan Nieder <jrnieder at gmail.com>
+---
+ fs/nfsd/vfs.c |   15 +++++++++++++--
+ fs/nfsd/vfs.h |    2 ++
+ 2 files changed, 15 insertions(+), 2 deletions(-)
+
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index b395c61..959039e 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -785,9 +785,15 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
+ 			    flags, current_cred());
+ 	if (IS_ERR(*filp))
+ 		host_err = PTR_ERR(*filp);
+-	else
++	else {
+ 		host_err = ima_file_check(*filp, may_flags);
+ 
++		if (may_flags & NFSD_MAY_64BIT_COOKIE)
++			(*filp)->f_mode |= FMODE_64BITHASH;
++		else
++			(*filp)->f_mode |= FMODE_32BITHASH;
++	}
++
+ out_nfserr:
+ 	err = nfserrno(host_err);
+ out:
+@@ -2011,8 +2017,13 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
+ 	__be32		err;
+ 	struct file	*file;
+ 	loff_t		offset = *offsetp;
++	int             may_flags = NFSD_MAY_READ;
+ 
+-	err = nfsd_open(rqstp, fhp, S_IFDIR, NFSD_MAY_READ, &file);
++	/* NFSv2 only supports 32 bit cookies */
++	if (rqstp->rq_vers > 2)
++		may_flags |= NFSD_MAY_64BIT_COOKIE;
++
++	err = nfsd_open(rqstp, fhp, S_IFDIR, may_flags, &file);
+ 	if (err)
+ 		goto out;
+ 
+diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
+index 3f54ad0..85d4d42 100644
+--- a/fs/nfsd/vfs.h
++++ b/fs/nfsd/vfs.h
+@@ -27,6 +27,8 @@
+ #define NFSD_MAY_BYPASS_GSS		0x400
+ #define NFSD_MAY_READ_IF_EXEC		0x800
+ 
++#define NFSD_MAY_64BIT_COOKIE		0x1000 /* 64 bit readdir cookies for >= NFSv3 */
++
+ #define NFSD_MAY_CREATE		(NFSD_MAY_EXEC|NFSD_MAY_WRITE)
+ #define NFSD_MAY_REMOVE		(NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
+ 
+-- 
+1.7.10.4
+

Modified: dists/wheezy/linux/debian/patches/series
==============================================================================
--- dists/wheezy/linux/debian/patches/series	Sat Jun  8 16:40:34 2013	(r20207)
+++ dists/wheezy/linux/debian/patches/series	Sat Jun  8 17:12:21 2013	(r20208)
@@ -643,3 +643,10 @@
 bugfix/x86/ipw2200-Fix-order-of-device-registration.patch
 bugfix/x86/ipw2100-Fix-order-of-device-registration.patch
 bugfix/x86/net-wireless-ipw2100-Fix-WARN_ON-occurring-in-wiphy_.patch
+
+# 64-bit NFS readdir cookies on ext3/ext4 with dir_index
+bugfix/all/fs-add-new-FMODE-flags-FMODE_32bithash-and-FMODE_64b.patch
+bugfix/all/ext4-return-32-64-bit-dir-name-hash-according-to-usa.patch
+bugfix/all/nfsd-rename-int-access-to-int-may_flags-in-nfsd_open.patch
+bugfix/all/nfsd-vfs_llseek-with-32-or-64-bit-offsets-hashes.patch
+bugfix/all/ext3-return-32-64-bit-dir-name-hash-according-to-usa.patch



More information about the Kernel-svn-changes mailing list